Merge tag 'trace-v6.9-2' of git://git.kernel.org/pub/scm/linux/kernel/git/trace/linux...
authorLinus Torvalds <torvalds@linux-foundation.org>
Mon, 18 Mar 2024 22:11:44 +0000 (15:11 -0700)
committerLinus Torvalds <torvalds@linux-foundation.org>
Mon, 18 Mar 2024 22:11:44 +0000 (15:11 -0700)
Pull tracing updates from Steven Rostedt:
 "Main user visible change:

   - User events can now have "multi formats"

     The current user events have a single format. If another event is
     created with a different format, it will fail to be created. That
     is, once an event name is used, it cannot be used again with a
     different format. This can cause issues if a library is using an
     event and updates its format. An application using the older format
     will prevent an application using the new library from registering
     its event.

     A task could also DOS another application if it knows the event
     names, and it creates events with different formats.

     The multi-format event is in a different name space from the single
     format. Both the event name and its format are the unique
     identifier. This will allow two different applications to use the
     same user event name but with different payloads.

   - Added support to have ftrace_dump_on_oops dump out instances and
     not just the main top level tracing buffer.

  Other changes:

   - Add eventfs_root_inode

     Only the root inode has a dentry that is static (never goes away)
     and stores it upon creation. There's no reason that the thousands
     of other eventfs inodes should have a pointer that never gets set
     in its descriptor. Create a eventfs_root_inode desciptor that has a
     eventfs_inode descriptor and a dentry pointer, and only the root
     inode will use this.

   - Added WARN_ON()s in eventfs

     There's some conditionals remaining in eventfs that should never be
     hit, but instead of removing them, add WARN_ON() around them to
     make sure that they are never hit.

   - Have saved_cmdlines allocation also include the map_cmdline_to_pid
     array

     The saved_cmdlines structure allocates a large amount of data to
     hold its mappings. Within it, it has three arrays. Two are already
     apart of it: map_pid_to_cmdline[] and saved_cmdlines[]. More memory
     can be saved by also including the map_cmdline_to_pid[] array as
     well.

   - Restructure __string() and __assign_str() macros used in
     TRACE_EVENT()

     Dynamic strings in TRACE_EVENT() are declared with:

         __string(name, source)

     And assigned with:

        __assign_str(name, source)

     In the tracepoint callback of the event, the __string() is used to
     get the size needed to allocate on the ring buffer and
     __assign_str() is used to copy the string into the ring buffer.
     There's a helper structure that is created in the TRACE_EVENT()
     macro logic that will hold the string length and its position in
     the ring buffer which is created by __string().

     There are several trace events that have a function to create the
     string to save. This function is executed twice. Once for
     __string() and again for __assign_str(). There's no reason for
     this. The helper structure could also save the string it used in
     __string() and simply copy that into __assign_str() (it also
     already has its length).

     By using the structure to store the source string for the
     assignment, it means that the second argument to __assign_str() is
     no longer needed.

     It will be removed in the next merge window, but for now add a
     warning if the source string given to __string() is different than
     the source string given to __assign_str(), as the source to
     __assign_str() isn't even used and will be going away.

   - Added checks to make sure that the source of __string() is also the
     source of __assign_str() so that it can be safely removed in the
     next merge window.

     Included fixes that the above check found.

   - Other minor clean ups and fixes"

* tag 'trace-v6.9-2' of git://git.kernel.org/pub/scm/linux/kernel/git/trace/linux-trace: (34 commits)
  tracing: Add __string_src() helper to help compilers not to get confused
  tracing: Use strcmp() in __assign_str() WARN_ON() check
  tracepoints: Use WARN() and not WARN_ON() for warnings
  tracing: Use div64_u64() instead of do_div()
  tracing: Support to dump instance traces by ftrace_dump_on_oops
  tracing: Remove second parameter to __assign_rel_str()
  tracing: Add warning if string in __assign_str() does not match __string()
  tracing: Add __string_len() example
  tracing: Remove __assign_str_len()
  ftrace: Fix most kernel-doc warnings
  tracing: Decrement the snapshot if the snapshot trigger fails to register
  tracing: Fix snapshot counter going between two tracers that use it
  tracing: Use EVENT_NULL_STR macro instead of open coding "(null)"
  tracing: Use ? : shortcut in trace macros
  tracing: Do not calculate strlen() twice for __string() fields
  tracing: Rework __assign_str() and __string() to not duplicate getting the string
  cxl/trace: Properly initialize cxl_poison region name
  net: hns3: tracing: fix hclgevf trace event strings
  drm/i915: Add missing ; to __assign_str() macros in tracepoint code
  NFSD: Fix nfsd_clid_class use of __string_len() macro
  ...

2379 files changed:
CREDITS
Documentation/ABI/testing/debugfs-cxl
Documentation/ABI/testing/debugfs-driver-qat
Documentation/ABI/testing/debugfs-hisi-hpre
Documentation/ABI/testing/debugfs-hisi-sec
Documentation/ABI/testing/debugfs-hisi-zip
Documentation/ABI/testing/sysfs-bus-cxl
Documentation/ABI/testing/sysfs-bus-dax [new file with mode: 0644]
Documentation/ABI/testing/sysfs-driver-qat
Documentation/ABI/testing/sysfs-fs-f2fs
Documentation/ABI/testing/sysfs-fs-virtiofs [new file with mode: 0644]
Documentation/ABI/testing/sysfs-kernel-mm-cma
Documentation/ABI/testing/sysfs-kernel-mm-damon
Documentation/ABI/testing/sysfs-kernel-mm-mempolicy [new file with mode: 0644]
Documentation/ABI/testing/sysfs-kernel-mm-mempolicy-weighted-interleave [new file with mode: 0644]
Documentation/admin-guide/kdump/vmcoreinfo.rst
Documentation/admin-guide/kernel-parameters.txt
Documentation/admin-guide/media/visl.rst
Documentation/admin-guide/media/vivid.rst
Documentation/admin-guide/mm/damon/reclaim.rst
Documentation/admin-guide/mm/damon/usage.rst
Documentation/admin-guide/mm/numa_memory_policy.rst
Documentation/admin-guide/sysctl/kernel.rst
Documentation/dev-tools/kasan.rst
Documentation/devicetree/bindings/Makefile
Documentation/devicetree/bindings/arm/mediatek/mediatek,hifsys.txt [deleted file]
Documentation/devicetree/bindings/arm/mediatek/mediatek,pciesys.txt [deleted file]
Documentation/devicetree/bindings/arm/mediatek/mediatek,ssusbsys.txt [deleted file]
Documentation/devicetree/bindings/arm/syna.txt
Documentation/devicetree/bindings/clock/mediatek,mt2701-hifsys.yaml [new file with mode: 0644]
Documentation/devicetree/bindings/clock/mediatek,mt7622-pciesys.yaml [new file with mode: 0644]
Documentation/devicetree/bindings/clock/mediatek,mt7622-ssusbsys.yaml [new file with mode: 0644]
Documentation/devicetree/bindings/clock/mobileye,eyeq5-clk.yaml [new file with mode: 0644]
Documentation/devicetree/bindings/clock/qcom,gpucc.yaml
Documentation/devicetree/bindings/clock/qcom,q6sstopcc.yaml
Documentation/devicetree/bindings/clock/qcom,sc7180-mss.yaml [deleted file]
Documentation/devicetree/bindings/clock/samsung,exynos850-clock.yaml
Documentation/devicetree/bindings/clock/tesla,fsd-clock.yaml
Documentation/devicetree/bindings/crypto/atmel,at91sam9g46-aes.yaml
Documentation/devicetree/bindings/crypto/atmel,at91sam9g46-sha.yaml
Documentation/devicetree/bindings/crypto/atmel,at91sam9g46-tdes.yaml
Documentation/devicetree/bindings/crypto/qcom,inline-crypto-engine.yaml
Documentation/devicetree/bindings/crypto/qcom-qce.yaml
Documentation/devicetree/bindings/display/atmel/atmel,hlcdc-display-controller.yaml [new file with mode: 0644]
Documentation/devicetree/bindings/display/atmel/hlcdc-dc.txt [deleted file]
Documentation/devicetree/bindings/display/fsl,lcdif.yaml
Documentation/devicetree/bindings/display/panel/visionox,r66451.yaml
Documentation/devicetree/bindings/dma/allwinner,sun50i-a64-dma.yaml
Documentation/devicetree/bindings/dma/fsl,edma.yaml
Documentation/devicetree/bindings/dma/fsl,imx-sdma.yaml
Documentation/devicetree/bindings/dma/marvell,mmp-dma.yaml [new file with mode: 0644]
Documentation/devicetree/bindings/dma/mediatek,mt7622-hsdma.yaml [new file with mode: 0644]
Documentation/devicetree/bindings/dma/mmp-dma.txt [deleted file]
Documentation/devicetree/bindings/dma/mtk-hsdma.txt [deleted file]
Documentation/devicetree/bindings/dma/renesas,rcar-dmac.yaml
Documentation/devicetree/bindings/fpga/fpga-region.txt [deleted file]
Documentation/devicetree/bindings/fpga/fpga-region.yaml [new file with mode: 0644]
Documentation/devicetree/bindings/gpio/gateworks,pld-gpio.txt
Documentation/devicetree/bindings/gpio/mrvl-gpio.yaml
Documentation/devicetree/bindings/i2c/atmel,at91sam-i2c.yaml
Documentation/devicetree/bindings/i2c/i2c-demux-pinctrl.yaml
Documentation/devicetree/bindings/i2c/i2c-imx-lpi2c.yaml
Documentation/devicetree/bindings/i2c/i2c-mpc.yaml
Documentation/devicetree/bindings/i2c/i2c-mux-pca954x.yaml
Documentation/devicetree/bindings/i2c/i2c-pxa.yaml
Documentation/devicetree/bindings/i2c/i2c.txt [deleted file]
Documentation/devicetree/bindings/i2c/nvidia,tegra186-bpmp-i2c.yaml
Documentation/devicetree/bindings/i2c/renesas,rcar-i2c.yaml
Documentation/devicetree/bindings/i3c/aspeed,ast2600-i3c.yaml
Documentation/devicetree/bindings/i3c/cdns,i3c-master.yaml
Documentation/devicetree/bindings/i3c/i3c.yaml
Documentation/devicetree/bindings/i3c/mipi-i3c-hci.yaml
Documentation/devicetree/bindings/i3c/silvaco,i3c-master.yaml
Documentation/devicetree/bindings/i3c/snps,dw-i3c-master.yaml
Documentation/devicetree/bindings/input/allwinner,sun4i-a10-lradc-keys.yaml
Documentation/devicetree/bindings/input/atmel,captouch.txt [deleted file]
Documentation/devicetree/bindings/input/atmel,captouch.yaml [new file with mode: 0644]
Documentation/devicetree/bindings/input/samsung,s3c6410-keypad.yaml [new file with mode: 0644]
Documentation/devicetree/bindings/input/samsung-keypad.txt [deleted file]
Documentation/devicetree/bindings/input/touchscreen/fsl,imx6ul-tsc.yaml [new file with mode: 0644]
Documentation/devicetree/bindings/input/touchscreen/goodix,gt9916.yaml [new file with mode: 0644]
Documentation/devicetree/bindings/input/touchscreen/goodix.yaml
Documentation/devicetree/bindings/input/touchscreen/imagis,ist3038c.yaml
Documentation/devicetree/bindings/input/touchscreen/imx6ul_tsc.txt [deleted file]
Documentation/devicetree/bindings/input/touchscreen/melfas,mms114.yaml
Documentation/devicetree/bindings/input/touchscreen/silead,gsl1680.yaml
Documentation/devicetree/bindings/interrupt-controller/atmel,aic.txt [deleted file]
Documentation/devicetree/bindings/interrupt-controller/atmel,aic.yaml [new file with mode: 0644]
Documentation/devicetree/bindings/interrupt-controller/fsl,intmux.yaml
Documentation/devicetree/bindings/interrupt-controller/mediatek,mt6577-sysirq.yaml [new file with mode: 0644]
Documentation/devicetree/bindings/interrupt-controller/mediatek,sysirq.txt [deleted file]
Documentation/devicetree/bindings/interrupt-controller/renesas,rzg2l-irqc.yaml
Documentation/devicetree/bindings/media/i2c/techwell,tw9900.yaml
Documentation/devicetree/bindings/media/rockchip-isp1.yaml
Documentation/devicetree/bindings/media/st,stm32mp25-video-codec.yaml [new file with mode: 0644]
Documentation/devicetree/bindings/mips/cpus.yaml
Documentation/devicetree/bindings/mips/mobileye.yaml [new file with mode: 0644]
Documentation/devicetree/bindings/misc/qcom,fastrpc.yaml
Documentation/devicetree/bindings/misc/xlnx,sd-fec.txt [deleted file]
Documentation/devicetree/bindings/misc/xlnx,sd-fec.yaml [new file with mode: 0644]
Documentation/devicetree/bindings/mtd/atmel-nand.txt
Documentation/devicetree/bindings/mtd/brcm,brcmnand.yaml
Documentation/devicetree/bindings/mtd/davinci-nand.txt
Documentation/devicetree/bindings/mtd/flctl-nand.txt
Documentation/devicetree/bindings/mtd/fsl-upm-nand.txt
Documentation/devicetree/bindings/mtd/gpio-control-nand.txt
Documentation/devicetree/bindings/mtd/gpmi-nand.yaml
Documentation/devicetree/bindings/mtd/hisi504-nand.txt
Documentation/devicetree/bindings/mtd/jedec,spi-nor.yaml
Documentation/devicetree/bindings/mtd/mtd.yaml
Documentation/devicetree/bindings/mtd/nvidia-tegra20-nand.txt
Documentation/devicetree/bindings/mtd/orion-nand.txt
Documentation/devicetree/bindings/mtd/samsung-s3c2410.txt
Documentation/devicetree/bindings/mtd/st,stm32-fmc2-nand.yaml
Documentation/devicetree/bindings/mux/mux-controller.yaml
Documentation/devicetree/bindings/net/bluetooth/qualcomm-bluetooth.yaml
Documentation/devicetree/bindings/net/can/fsl,flexcan.yaml
Documentation/devicetree/bindings/net/can/microchip,mpfs-can.yaml
Documentation/devicetree/bindings/phy/mediatek,mt8365-csi-rx.yaml [new file with mode: 0644]
Documentation/devicetree/bindings/phy/phy-cadence-torrent.yaml
Documentation/devicetree/bindings/phy/qcom,msm8998-qmp-usb3-phy.yaml [new file with mode: 0644]
Documentation/devicetree/bindings/phy/qcom,sc8280xp-qmp-pcie-phy.yaml
Documentation/devicetree/bindings/phy/qcom,sc8280xp-qmp-ufs-phy.yaml
Documentation/devicetree/bindings/phy/qcom,sc8280xp-qmp-usb3-uni-phy.yaml
Documentation/devicetree/bindings/phy/rockchip,rk3588-hdptx-phy.yaml [new file with mode: 0644]
Documentation/devicetree/bindings/pinctrl/fsl,imx6ul-pinctrl.txt [deleted file]
Documentation/devicetree/bindings/pinctrl/fsl,imx6ul-pinctrl.yaml [new file with mode: 0644]
Documentation/devicetree/bindings/power/wakeup-source.txt
Documentation/devicetree/bindings/reset/mobileye,eyeq5-reset.yaml [new file with mode: 0644]
Documentation/devicetree/bindings/rng/atmel,at91-trng.yaml
Documentation/devicetree/bindings/rtc/sa1100-rtc.yaml
Documentation/devicetree/bindings/soc/imx/fsl,imx-anatop.yaml [new file with mode: 0644]
Documentation/devicetree/bindings/soc/imx/fsl,imx-iomuxc-gpr.yaml
Documentation/devicetree/bindings/sound/cs4341.txt
Documentation/devicetree/bindings/submitting-patches.rst
Documentation/devicetree/bindings/timer/mediatek,mtk-timer.txt [deleted file]
Documentation/devicetree/bindings/timer/mediatek,timer.yaml [new file with mode: 0644]
Documentation/devicetree/bindings/timer/mrvl,mmp-timer.yaml
Documentation/devicetree/bindings/trivial-devices.yaml
Documentation/devicetree/bindings/usb/cypress,hx3.yaml
Documentation/devicetree/bindings/vendor-prefixes.yaml
Documentation/devicetree/bindings/watchdog/arm,sp805.yaml
Documentation/devicetree/bindings/watchdog/atmel,sama5d4-wdt.yaml
Documentation/devicetree/bindings/watchdog/brcm,bcm2835-pm-wdog.txt [deleted file]
Documentation/devicetree/bindings/watchdog/qcom-wdt.yaml
Documentation/devicetree/bindings/watchdog/renesas,wdt.yaml
Documentation/devicetree/bindings/watchdog/sprd,sp9860-wdt.yaml [new file with mode: 0644]
Documentation/devicetree/bindings/watchdog/sprd-wdt.txt [deleted file]
Documentation/devicetree/bindings/watchdog/starfive,jh7100-wdt.yaml
Documentation/devicetree/bindings/writing-schema.rst
Documentation/driver-api/media/drivers/ccs/ccs.rst
Documentation/driver-api/media/v4l2-subdev.rst
Documentation/driver-api/soundwire/stream.rst
Documentation/filesystems/bcachefs/errorcodes.rst [new file with mode: 0644]
Documentation/filesystems/f2fs.rst
Documentation/firmware-guide/acpi/apei/einj.rst
Documentation/i2c/writing-clients.rst
Documentation/misc-devices/xilinx_sdfec.rst
Documentation/mm/damon/design.rst
Documentation/mm/damon/maintainer-profile.rst
Documentation/mm/page_owner.rst
Documentation/process/changes.rst
Documentation/translations/zh_CN/admin-guide/mm/damon/usage.rst
Documentation/translations/zh_CN/dev-tools/kasan.rst
Documentation/translations/zh_TW/admin-guide/mm/damon/usage.rst
Documentation/translations/zh_TW/dev-tools/kasan.rst
Documentation/userspace-api/media/drivers/ccs.rst
Documentation/userspace-api/media/dvb/legacy_dvb_apis.rst
Documentation/userspace-api/media/dvb/legacy_dvb_audio.rst [new file with mode: 0644]
Documentation/userspace-api/media/dvb/legacy_dvb_decoder_api.rst [new file with mode: 0644]
Documentation/userspace-api/media/dvb/legacy_dvb_osd.rst [new file with mode: 0644]
Documentation/userspace-api/media/dvb/legacy_dvb_video.rst [new file with mode: 0644]
Documentation/userspace-api/media/mediactl/media-types.rst
Documentation/userspace-api/media/v4l/vidioc-subdev-g-client-cap.rst
Documentation/virt/kvm/api.rst
MAINTAINERS
Makefile
arch/arc/Kconfig
arch/arc/include/asm/cachetype.h [new file with mode: 0644]
arch/arm/Kconfig
arch/arm/Kconfig.debug
arch/arm/boot/dts/broadcom/bcm47622.dtsi
arch/arm/boot/dts/broadcom/bcm63138.dtsi
arch/arm/boot/dts/broadcom/bcm63148.dtsi
arch/arm/boot/dts/broadcom/bcm63178.dtsi
arch/arm/boot/dts/broadcom/bcm6756.dtsi
arch/arm/boot/dts/broadcom/bcm6846.dtsi
arch/arm/boot/dts/broadcom/bcm6855.dtsi
arch/arm/boot/dts/broadcom/bcm6878.dtsi
arch/arm/boot/dts/broadcom/bcm947622.dts
arch/arm/boot/dts/broadcom/bcm963138.dts
arch/arm/boot/dts/broadcom/bcm963138dvt.dts
arch/arm/boot/dts/broadcom/bcm963148.dts
arch/arm/boot/dts/broadcom/bcm963178.dts
arch/arm/boot/dts/broadcom/bcm96756.dts
arch/arm/boot/dts/broadcom/bcm96846.dts
arch/arm/boot/dts/broadcom/bcm96855.dts
arch/arm/boot/dts/broadcom/bcm96878.dts
arch/arm/configs/aspeed_g4_defconfig
arch/arm/configs/aspeed_g5_defconfig
arch/arm/crypto/sha256_glue.c
arch/arm/crypto/sha512-glue.c
arch/arm/include/asm/cachetype.h
arch/arm/include/asm/current.h
arch/arm/include/asm/pgtable-2level.h
arch/arm/include/asm/pgtable-3level.h
arch/arm/include/asm/pgtable.h
arch/arm/include/asm/ptdump.h
arch/arm/kernel/Makefile
arch/arm/kernel/machine_kexec.c
arch/arm/kernel/setup.c
arch/arm/kernel/vmcore_info.c [new file with mode: 0644]
arch/arm/mm/dump.c
arch/arm/mm/init.c
arch/arm/mm/mmu.c
arch/arm64/Kconfig
arch/arm64/boot/dts/broadcom/bcmbca/bcm4906-netgear-r8000p.dts
arch/arm64/boot/dts/broadcom/bcmbca/bcm4906-tplink-archer-c2300-v1.dts
arch/arm64/boot/dts/broadcom/bcmbca/bcm4908-asus-gt-ac5300.dts
arch/arm64/boot/dts/broadcom/bcmbca/bcm4908.dtsi
arch/arm64/boot/dts/broadcom/bcmbca/bcm4912.dtsi
arch/arm64/boot/dts/broadcom/bcmbca/bcm63146.dtsi
arch/arm64/boot/dts/broadcom/bcmbca/bcm63158.dtsi
arch/arm64/boot/dts/broadcom/bcmbca/bcm6813.dtsi
arch/arm64/boot/dts/broadcom/bcmbca/bcm6856.dtsi
arch/arm64/boot/dts/broadcom/bcmbca/bcm6858.dtsi
arch/arm64/boot/dts/broadcom/bcmbca/bcm94908.dts
arch/arm64/boot/dts/broadcom/bcmbca/bcm94912.dts
arch/arm64/boot/dts/broadcom/bcmbca/bcm963146.dts
arch/arm64/boot/dts/broadcom/bcmbca/bcm963158.dts
arch/arm64/boot/dts/broadcom/bcmbca/bcm96813.dts
arch/arm64/boot/dts/broadcom/bcmbca/bcm96856.dts
arch/arm64/boot/dts/broadcom/bcmbca/bcm96858.dts
arch/arm64/crypto/Kconfig
arch/arm64/crypto/aes-ce-ccm-core.S
arch/arm64/crypto/aes-ce-ccm-glue.c
arch/arm64/crypto/aes-glue.c
arch/arm64/include/asm/cpu.h
arch/arm64/include/asm/cpufeature.h
arch/arm64/include/asm/crash_reserve.h [moved from arch/arm64/include/asm/crash_core.h with 81% similarity]
arch/arm64/include/asm/kexec.h
arch/arm64/include/asm/kvm_arm.h
arch/arm64/include/asm/kvm_emulate.h
arch/arm64/include/asm/kvm_host.h
arch/arm64/include/asm/kvm_hyp.h
arch/arm64/include/asm/kvm_mmu.h
arch/arm64/include/asm/kvm_nested.h
arch/arm64/include/asm/kvm_pgtable.h
arch/arm64/include/asm/memory.h
arch/arm64/include/asm/pgtable.h
arch/arm64/include/asm/ptdump.h
arch/arm64/include/asm/sysreg.h
arch/arm64/include/asm/tlbflush.h
arch/arm64/include/uapi/asm/kvm.h
arch/arm64/kernel/Makefile
arch/arm64/kernel/cpufeature.c
arch/arm64/kernel/cpuinfo.c
arch/arm64/kernel/efi.c
arch/arm64/kernel/head.S
arch/arm64/kernel/machine_kexec.c
arch/arm64/kernel/machine_kexec_file.c
arch/arm64/kernel/mte.c
arch/arm64/kernel/vmcore_info.c [moved from arch/arm64/kernel/crash_core.c with 92% similarity]
arch/arm64/kvm/Kconfig
arch/arm64/kvm/arch_timer.c
arch/arm64/kvm/arm.c
arch/arm64/kvm/debug.c
arch/arm64/kvm/emulate-nested.c
arch/arm64/kvm/fpsimd.c
arch/arm64/kvm/guest.c
arch/arm64/kvm/hyp/aarch32.c
arch/arm64/kvm/hyp/include/hyp/switch.h
arch/arm64/kvm/hyp/include/hyp/sysreg-sr.h
arch/arm64/kvm/hyp/nvhe/debug-sr.c
arch/arm64/kvm/hyp/nvhe/host.S
arch/arm64/kvm/hyp/nvhe/mm.c
arch/arm64/kvm/hyp/pgtable.c
arch/arm64/kvm/hyp/vhe/sysreg-sr.c
arch/arm64/kvm/inject_fault.c
arch/arm64/kvm/mmu.c
arch/arm64/kvm/nested.c
arch/arm64/kvm/pmu-emul.c
arch/arm64/kvm/sys_regs.c
arch/arm64/kvm/sys_regs.h
arch/arm64/kvm/vgic/vgic-debug.c
arch/arm64/kvm/vgic/vgic-init.c
arch/arm64/kvm/vgic/vgic-its.c
arch/arm64/kvm/vgic/vgic-v3.c
arch/arm64/kvm/vgic/vgic.c
arch/arm64/kvm/vgic/vgic.h
arch/arm64/mm/Makefile
arch/arm64/mm/contpte.c [new file with mode: 0644]
arch/arm64/mm/fault.c
arch/arm64/mm/fixmap.c
arch/arm64/mm/hugetlbpage.c
arch/arm64/mm/init.c
arch/arm64/mm/kasan_init.c
arch/arm64/mm/mmu.c
arch/arm64/mm/pageattr.c
arch/arm64/mm/ptdump.c
arch/arm64/mm/trans_pgd.c
arch/arm64/tools/cpucaps
arch/arm64/tools/sysreg
arch/csky/Kconfig
arch/csky/include/asm/cachetype.h [new file with mode: 0644]
arch/loongarch/Kconfig
arch/loongarch/include/uapi/asm/kvm.h
arch/loongarch/kernel/setup.c
arch/loongarch/kvm/Kconfig
arch/loongarch/kvm/mmu.c
arch/loongarch/kvm/switch.S
arch/loongarch/kvm/timer.c
arch/loongarch/kvm/vcpu.c
arch/m68k/Kconfig
arch/m68k/include/asm/cachetype.h [new file with mode: 0644]
arch/mips/Kbuild
arch/mips/Kbuild.platforms
arch/mips/Kconfig
arch/mips/Makefile
arch/mips/alchemy/common/clock.c
arch/mips/boot/compressed/uart-16550.c
arch/mips/boot/compressed/uart-alchemy.c
arch/mips/boot/compressed/uart-prom.c
arch/mips/boot/dts/Makefile
arch/mips/boot/dts/mobileye/Makefile [new file with mode: 0644]
arch/mips/boot/dts/mobileye/eyeq5-epm5.dts [new file with mode: 0644]
arch/mips/boot/dts/mobileye/eyeq5-fixed-clocks.dtsi [new file with mode: 0644]
arch/mips/boot/dts/mobileye/eyeq5.dtsi [new file with mode: 0644]
arch/mips/boot/dts/ralink/mt7621.dtsi
arch/mips/configs/eyeq5_defconfig [new file with mode: 0644]
arch/mips/generic/Makefile
arch/mips/include/asm/addrspace.h
arch/mips/include/asm/asmmacro.h
arch/mips/include/asm/cachetype.h [new file with mode: 0644]
arch/mips/include/asm/cdmm.h
arch/mips/include/asm/mach-generic/spaces.h
arch/mips/include/asm/mips-cm.h
arch/mips/include/asm/mips_mt.h
arch/mips/include/asm/mipsmtregs.h
arch/mips/include/asm/mipsregs.h
arch/mips/include/asm/regdef.h
arch/mips/include/asm/smp-cps.h
arch/mips/include/uapi/asm/kvm.h
arch/mips/kernel/cps-vec.S
arch/mips/kernel/mips-cm.c
arch/mips/kernel/mips-mt.c
arch/mips/kernel/pm-cps.c
arch/mips/kernel/rtlx-mt.c
arch/mips/kernel/setup.c
arch/mips/kernel/smp-cps.c
arch/mips/kernel/traps.c
arch/mips/kernel/vpe-mt.c
arch/mips/kvm/Kconfig
arch/mips/kvm/entry.c
arch/mips/mm/page.c
arch/mips/mm/tlbex.c
arch/mips/mobileye/Makefile [new file with mode: 0644]
arch/mips/mobileye/Platform [new file with mode: 0644]
arch/mips/mobileye/board-epm5.its.S [new file with mode: 0644]
arch/mips/mobileye/vmlinux.its.S [new file with mode: 0644]
arch/mips/pci/fixup-ath79.c
arch/mips/pci/fixup-lantiq.c
arch/mips/pci/ops-tx4927.c
arch/mips/ralink/timer.c
arch/mips/sgi-ip22/ip22-gio.c
arch/mips/sibyte/common/sb_tbprof.c
arch/mips/txx9/generic/setup.c
arch/nios2/Kconfig
arch/nios2/include/asm/cachetype.h [new file with mode: 0644]
arch/nios2/include/asm/pgtable.h
arch/parisc/Kconfig
arch/parisc/include/asm/assembly.h
arch/parisc/include/asm/cachetype.h [new file with mode: 0644]
arch/parisc/include/asm/checksum.h
arch/parisc/include/asm/parisc-device.h
arch/parisc/kernel/drivers.c
arch/parisc/kernel/irq.c
arch/parisc/kernel/unaligned.c
arch/parisc/math-emu/dfsqrt.c
arch/parisc/math-emu/fcnvff.c
arch/parisc/math-emu/fcnvfu.c
arch/parisc/math-emu/fcnvfut.c
arch/parisc/math-emu/fcnvfx.c
arch/parisc/math-emu/fcnvfxt.c
arch/parisc/math-emu/fcnvuf.c
arch/parisc/math-emu/fcnvxf.c
arch/parisc/math-emu/frnd.c
arch/parisc/math-emu/sfsqrt.c
arch/powerpc/Kconfig
arch/powerpc/Makefile
arch/powerpc/boot/simple_alloc.c
arch/powerpc/configs/40x.config [new file with mode: 0644]
arch/powerpc/configs/44x.config [new file with mode: 0644]
arch/powerpc/configs/85xx-32bit.config
arch/powerpc/configs/8xx.config [new file with mode: 0644]
arch/powerpc/configs/ps3_defconfig
arch/powerpc/crypto/Kconfig
arch/powerpc/crypto/Makefile
arch/powerpc/crypto/aes.c [moved from drivers/crypto/vmx/aes.c with 100% similarity]
arch/powerpc/crypto/aes_cbc.c [moved from drivers/crypto/vmx/aes_cbc.c with 100% similarity]
arch/powerpc/crypto/aes_ctr.c [moved from drivers/crypto/vmx/aes_ctr.c with 100% similarity]
arch/powerpc/crypto/aes_xts.c [moved from drivers/crypto/vmx/aes_xts.c with 100% similarity]
arch/powerpc/crypto/aesp8-ppc.h [moved from drivers/crypto/vmx/aesp8-ppc.h with 100% similarity]
arch/powerpc/crypto/aesp8-ppc.pl [moved from drivers/crypto/vmx/aesp8-ppc.pl with 100% similarity]
arch/powerpc/crypto/ghash.c [moved from drivers/crypto/vmx/ghash.c with 100% similarity]
arch/powerpc/crypto/ghashp8-ppc.pl [moved from drivers/crypto/vmx/ghashp8-ppc.pl with 100% similarity]
arch/powerpc/crypto/vmx.c [moved from drivers/crypto/vmx/vmx.c with 100% similarity]
arch/powerpc/include/asm/book3s/64/hash.h
arch/powerpc/include/asm/book3s/64/pgtable-64k.h
arch/powerpc/include/asm/book3s/64/pgtable.h
arch/powerpc/include/asm/book3s/64/radix.h
arch/powerpc/include/asm/cputable.h
arch/powerpc/include/asm/ibmebus.h
arch/powerpc/include/asm/interrupt.h
arch/powerpc/include/asm/machdep.h
arch/powerpc/include/asm/macio.h
arch/powerpc/include/asm/mmu.h
arch/powerpc/include/asm/mpic.h
arch/powerpc/include/asm/paca.h
arch/powerpc/include/asm/pgtable.h
arch/powerpc/include/asm/ppc_asm.h
arch/powerpc/include/asm/reg.h
arch/powerpc/include/asm/reg_fsl_emb.h
arch/powerpc/include/asm/set_memory.h
arch/powerpc/include/asm/smp.h
arch/powerpc/include/asm/switch_to.h
arch/powerpc/include/asm/tlb.h
arch/powerpc/include/asm/trace.h
arch/powerpc/include/asm/vio.h
arch/powerpc/include/asm/vmalloc.h
arch/powerpc/include/uapi/asm/kvm.h
arch/powerpc/kernel/Makefile
arch/powerpc/kernel/asm-offsets.c
arch/powerpc/kernel/cpu_specs_book3s_64.h
arch/powerpc/kernel/dt_cpu_ftrs.c
arch/powerpc/kernel/irq.c
arch/powerpc/kernel/kprobes.c
arch/powerpc/kernel/prom.c
arch/powerpc/kernel/prom_init.c
arch/powerpc/kernel/secure_boot.c
arch/powerpc/kernel/setup-common.c
arch/powerpc/kernel/syscall.c
arch/powerpc/kernel/traps.c
arch/powerpc/kexec/Makefile
arch/powerpc/kexec/core.c
arch/powerpc/kexec/ranges.c
arch/powerpc/kexec/relocate_32.S
arch/powerpc/kexec/vmcore_info.c [new file with mode: 0644]
arch/powerpc/kvm/Kconfig
arch/powerpc/kvm/book3s_64_mmu_radix.c
arch/powerpc/kvm/book3s_hv.c
arch/powerpc/kvm/book3s_hv_nested.c
arch/powerpc/kvm/powerpc.c
arch/powerpc/lib/Makefile
arch/powerpc/lib/copypage_power7.S
arch/powerpc/lib/copyuser_power7.S
arch/powerpc/lib/memcpy_power7.S
arch/powerpc/lib/sstep.c
arch/powerpc/mm/book3s64/hash_hugepage.c
arch/powerpc/mm/book3s64/hash_utils.c
arch/powerpc/mm/book3s64/pgtable.c
arch/powerpc/mm/book3s64/radix_pgtable.c
arch/powerpc/mm/drmem.c
arch/powerpc/mm/hugetlbpage.c
arch/powerpc/mm/mmu_decl.h
arch/powerpc/mm/nohash/kaslr_booke.c
arch/powerpc/mm/numa.c
arch/powerpc/mm/pageattr.c
arch/powerpc/mm/pgtable.c
arch/powerpc/mm/pgtable_32.c
arch/powerpc/mm/pgtable_64.c
arch/powerpc/mm/ptdump/ptdump.c
arch/powerpc/perf/core-book3s.c
arch/powerpc/perf/hv-gpci.c
arch/powerpc/perf/internal.h
arch/powerpc/perf/power10-pmu.c
arch/powerpc/platforms/40x/ppc40x_simple.c
arch/powerpc/platforms/44x/warp.c
arch/powerpc/platforms/512x/mpc512x_generic.c
arch/powerpc/platforms/52xx/efika.c
arch/powerpc/platforms/52xx/lite5200.c
arch/powerpc/platforms/52xx/mpc5200_simple.c
arch/powerpc/platforms/83xx/mpc830x_rdb.c
arch/powerpc/platforms/83xx/mpc831x_rdb.c
arch/powerpc/platforms/83xx/mpc837x_rdb.c
arch/powerpc/platforms/83xx/suspend.c
arch/powerpc/platforms/85xx/bsc913x_qds.c
arch/powerpc/platforms/85xx/bsc913x_rdb.c
arch/powerpc/platforms/85xx/corenet_generic.c
arch/powerpc/platforms/85xx/ge_imp3a.c
arch/powerpc/platforms/85xx/sgy_cts1000.c
arch/powerpc/platforms/85xx/tqm85xx.c
arch/powerpc/platforms/amigaone/setup.c
arch/powerpc/platforms/embedded6xx/linkstation.c
arch/powerpc/platforms/embedded6xx/mpc10x.h
arch/powerpc/platforms/pasemi/gpio_mdio.c
arch/powerpc/platforms/pasemi/pci.c
arch/powerpc/platforms/powermac/Kconfig
arch/powerpc/platforms/powermac/feature.c
arch/powerpc/platforms/powernv/opal-core.c
arch/powerpc/platforms/powernv/opal-prd.c
arch/powerpc/platforms/ps3/hvcall.S
arch/powerpc/platforms/pseries/ibmebus.c
arch/powerpc/platforms/pseries/lparcfg.c
arch/powerpc/platforms/pseries/msi.c
arch/powerpc/platforms/pseries/papr_platform_attributes.c
arch/powerpc/platforms/pseries/papr_scm.c
arch/powerpc/platforms/pseries/setup.c
arch/powerpc/platforms/pseries/vio.c
arch/powerpc/sysdev/fsl_msi.c
arch/powerpc/sysdev/mpic.c
arch/powerpc/sysdev/pmi.c
arch/powerpc/xmon/xmon.c
arch/riscv/Kconfig
arch/riscv/include/asm/crash_reserve.h [moved from arch/riscv/include/asm/crash_core.h with 78% similarity]
arch/riscv/include/asm/ftrace.h
arch/riscv/include/asm/pgtable-64.h
arch/riscv/include/asm/pgtable.h
arch/riscv/include/asm/ptdump.h [deleted file]
arch/riscv/include/uapi/asm/kvm.h
arch/riscv/kernel/Makefile
arch/riscv/kernel/elf_kexec.c
arch/riscv/kernel/mcount.S
arch/riscv/kernel/vmcore_info.c [moved from arch/riscv/kernel/crash_core.c with 88% similarity]
arch/riscv/kvm/Kconfig
arch/riscv/kvm/vcpu_insn.c
arch/riscv/kvm/vcpu_onereg.c
arch/riscv/mm/init.c
arch/riscv/mm/ptdump.c
arch/s390/Kconfig
arch/s390/boot/vmem.c
arch/s390/include/asm/ftrace.h
arch/s390/include/asm/pgalloc.h
arch/s390/include/asm/pgtable.h
arch/s390/include/asm/ptdump.h [deleted file]
arch/s390/include/asm/tlb.h
arch/s390/include/uapi/asm/kvm.h
arch/s390/kernel/Makefile
arch/s390/kernel/kexec_elf.c
arch/s390/kernel/kexec_image.c
arch/s390/kernel/machine_kexec.c
arch/s390/kernel/machine_kexec_file.c
arch/s390/kernel/vmcore_info.c [new file with mode: 0644]
arch/s390/kvm/Kconfig
arch/s390/kvm/diag.c
arch/s390/kvm/gaccess.c
arch/s390/kvm/interrupt.c
arch/s390/kvm/kvm-s390.c
arch/s390/kvm/priv.c
arch/s390/kvm/sigp.c
arch/s390/mm/dump_pagetables.c
arch/s390/mm/gmap.c
arch/s390/mm/hugetlbpage.c
arch/s390/mm/init.c
arch/s390/mm/pageattr.c
arch/s390/mm/pgalloc.c
arch/s390/mm/pgtable.c
arch/s390/mm/vmem.c
arch/sh/Kconfig
arch/sh/include/asm/cachetype.h [new file with mode: 0644]
arch/sh/kernel/Makefile
arch/sh/kernel/machine_kexec.c
arch/sh/kernel/setup.c
arch/sh/kernel/vmcore_info.c [new file with mode: 0644]
arch/sparc/Kconfig
arch/sparc/Kconfig.debug
arch/sparc/include/asm/cachetype.h [new file with mode: 0644]
arch/sparc/include/asm/hypervisor.h
arch/sparc/include/asm/ldc.h
arch/sparc/include/asm/mmu_context_64.h
arch/sparc/include/asm/parport.h
arch/sparc/include/asm/parport_64.h [new file with mode: 0644]
arch/sparc/include/asm/pgtable_64.h
arch/sparc/include/asm/switch_to_64.h
arch/sparc/kernel/btext.c
arch/sparc/kernel/chmc.c
arch/sparc/kernel/ds.c
arch/sparc/kernel/irq_32.c
arch/sparc/kernel/irq_64.c
arch/sparc/kernel/kernel.h
arch/sparc/kernel/kgdb_32.c
arch/sparc/kernel/kprobes.c
arch/sparc/kernel/ldc.c
arch/sparc/kernel/leon_pci_grpci1.c
arch/sparc/kernel/leon_pci_grpci2.c
arch/sparc/kernel/leon_smp.c
arch/sparc/kernel/nmi.c
arch/sparc/kernel/of_device_64.c
arch/sparc/kernel/pci.c
arch/sparc/kernel/pci_impl.h
arch/sparc/kernel/pci_schizo.c
arch/sparc/kernel/perf_event.c
arch/sparc/kernel/prom_irqtrans.c
arch/sparc/kernel/psycho_common.c
arch/sparc/kernel/setup_32.c
arch/sparc/kernel/signal_32.c
arch/sparc/kernel/signal_64.c
arch/sparc/kernel/vio.c
arch/sparc/lib/Makefile
arch/sparc/lib/cmpdi2.c [deleted file]
arch/sparc/lib/ucmpdi2.c [deleted file]
arch/sparc/mm/init_64.c
arch/sparc/mm/srmmu.c
arch/sparc/mm/tsb.c
arch/sparc/net/bpf_jit_comp_32.c
arch/sparc/vdso/vma.c
arch/um/kernel/dtb.c
arch/x86/Kconfig
arch/x86/Makefile
arch/x86/boot/compressed/ident_map_64.c
arch/x86/events/amd/core.c
arch/x86/events/amd/lbr.c
arch/x86/include/asm/crash_reserve.h [moved from arch/x86/include/asm/crash_core.h with 92% similarity]
arch/x86/include/asm/hardirq.h
arch/x86/include/asm/idtentry.h
arch/x86/include/asm/irq.h
arch/x86/include/asm/irq_vectors.h
arch/x86/include/asm/kvm-x86-ops.h
arch/x86/include/asm/kvm-x86-pmu-ops.h
arch/x86/include/asm/kvm_host.h
arch/x86/include/asm/mmu.h
arch/x86/include/asm/pgtable.h
arch/x86/include/asm/svm.h
arch/x86/include/asm/vmxfeatures.h
arch/x86/include/uapi/asm/kvm.h
arch/x86/include/uapi/asm/kvm_para.h
arch/x86/kernel/Makefile
arch/x86/kernel/alternative.c
arch/x86/kernel/cpu/amd.c
arch/x86/kernel/cpu/feat_ctl.c
arch/x86/kernel/cpu/mshyperv.c
arch/x86/kernel/devicetree.c
arch/x86/kernel/idt.c
arch/x86/kernel/irq.c
arch/x86/kernel/kexec-bzimage64.c
arch/x86/kernel/kvm.c
arch/x86/kernel/machine_kexec_64.c
arch/x86/kernel/reboot.c
arch/x86/kernel/setup.c
arch/x86/kernel/smp.c
arch/x86/kernel/vmcore_info_32.c [moved from arch/x86/kernel/crash_core_32.c with 90% similarity]
arch/x86/kernel/vmcore_info_64.c [moved from arch/x86/kernel/crash_core_64.c with 94% similarity]
arch/x86/kvm/Kconfig
arch/x86/kvm/debugfs.c
arch/x86/kvm/emulate.c
arch/x86/kvm/kvm_emulate.h
arch/x86/kvm/lapic.c
arch/x86/kvm/mmu/mmu.c
arch/x86/kvm/mmu/page_track.c
arch/x86/kvm/mmu/tdp_mmu.c
arch/x86/kvm/mmu/tdp_mmu.h
arch/x86/kvm/pmu.c
arch/x86/kvm/pmu.h
arch/x86/kvm/smm.c
arch/x86/kvm/svm/pmu.c
arch/x86/kvm/svm/svm.c
arch/x86/kvm/trace.h
arch/x86/kvm/vmx/nested.c
arch/x86/kvm/vmx/pmu_intel.c
arch/x86/kvm/vmx/vmx.c
arch/x86/kvm/vmx/vmx.h
arch/x86/kvm/x86.c
arch/x86/kvm/x86.h
arch/x86/kvm/xen.c
arch/x86/kvm/xen.h
arch/x86/mm/dump_pagetables.c
arch/x86/mm/fault.c
arch/x86/mm/ident_map.c
arch/x86/mm/init_32.c
arch/x86/mm/init_64.c
arch/x86/mm/kasan_init_64.c
arch/x86/mm/mem_encrypt_identity.c
arch/x86/mm/pat/set_memory.c
arch/x86/mm/pgtable.c
arch/x86/mm/pti.c
arch/x86/mm/tlb.c
arch/x86/power/Makefile
arch/x86/power/hibernate.c
arch/x86/xen/enlighten_hvm.c
arch/x86/xen/mmu_pv.c
arch/xtensa/Kconfig
arch/xtensa/include/asm/cachetype.h [new file with mode: 0644]
block/bdev.c
block/blk-lib.c
block/blk-settings.c
block/blk.h
block/mq-deadline.c
crypto/Kconfig
crypto/ahash.c
crypto/asymmetric_keys/verify_pefile.c
crypto/blake2b_generic.c
crypto/dh.c
crypto/pcbc.c
crypto/rsa.c
crypto/scompress.c
crypto/tcrypt.c
crypto/testmgr.c
drivers/acpi/apei/Kconfig
drivers/acpi/apei/Makefile
drivers/acpi/apei/apei-internal.h
drivers/acpi/apei/einj-core.c [moved from drivers/acpi/apei/einj.c with 90% similarity]
drivers/acpi/apei/einj-cxl.c [new file with mode: 0644]
drivers/acpi/numa/hmat.c
drivers/acpi/numa/srat.c
drivers/acpi/tables.c
drivers/android/binder.c
drivers/ata/pata_macio.c
drivers/base/cacheinfo.c
drivers/base/cpu.c
drivers/base/memory.c
drivers/base/node.c
drivers/block/sunvdc.c
drivers/block/zram/zcomp.c
drivers/block/zram/zcomp.h
drivers/block/zram/zram_drv.c
drivers/bus/bt1-apb.c
drivers/bus/mips_cdmm.c
drivers/char/hw_random/hisi-rng.c
drivers/char/hw_random/n2-drv.c
drivers/char/tpm/st33zp24/i2c.c
drivers/char/tpm/st33zp24/spi.c
drivers/char/tpm/st33zp24/st33zp24.c
drivers/char/tpm/tpm-interface.c
drivers/char/tpm/tpm_atmel.c
drivers/char/tpm/tpm_i2c_nuvoton.c
drivers/char/tpm/tpm_nsc.c
drivers/char/tpm/tpm_tis.c
drivers/char/tpm/tpm_tis_core.c
drivers/char/tpm/tpm_vtpm_proxy.c
drivers/clk/clk-ast2600.c
drivers/clk/clk-cdce925.c
drivers/clk/clk-devres.c
drivers/clk/clk-fixed-factor.c
drivers/clk/clk-fractional-divider.c
drivers/clk/clk.c
drivers/clk/clkdev.c
drivers/clk/hisilicon/clk-hi3519.c
drivers/clk/hisilicon/clk-hi3559a.c
drivers/clk/imx/clk-composite-8m.c
drivers/clk/imx/clk-imx8-acm.c
drivers/clk/imx/clk-imx8mp-audiomix.c
drivers/clk/imx/clk-scu.c
drivers/clk/keystone/sci-clk.c
drivers/clk/mediatek/clk-mt7622-apmixedsys.c
drivers/clk/mediatek/clk-mt7981-topckgen.c
drivers/clk/mediatek/clk-mt7988-infracfg.c
drivers/clk/mediatek/clk-mt8135-apmixedsys.c
drivers/clk/mediatek/clk-mt8173-apmixedsys.c
drivers/clk/mediatek/clk-mt8183.c
drivers/clk/meson/axg.c
drivers/clk/microchip/clk-mpfs.c
drivers/clk/qcom/Kconfig
drivers/clk/qcom/Makefile
drivers/clk/qcom/camcc-sc7180.c
drivers/clk/qcom/camcc-sc7280.c
drivers/clk/qcom/camcc-sc8280xp.c
drivers/clk/qcom/camcc-sdm845.c
drivers/clk/qcom/camcc-sm6350.c
drivers/clk/qcom/camcc-sm8550.c
drivers/clk/qcom/camcc-x1e80100.c [new file with mode: 0644]
drivers/clk/qcom/clk-alpha-pll.c
drivers/clk/qcom/clk-alpha-pll.h
drivers/clk/qcom/clk-branch.h
drivers/clk/qcom/dispcc-qcm2290.c
drivers/clk/qcom/dispcc-sc7180.c
drivers/clk/qcom/dispcc-sc7280.c
drivers/clk/qcom/dispcc-sc8280xp.c
drivers/clk/qcom/dispcc-sdm845.c
drivers/clk/qcom/dispcc-sm6115.c
drivers/clk/qcom/dispcc-sm6125.c
drivers/clk/qcom/dispcc-sm6350.c
drivers/clk/qcom/dispcc-sm6375.c
drivers/clk/qcom/dispcc-sm8250.c
drivers/clk/qcom/dispcc-sm8450.c
drivers/clk/qcom/dispcc-sm8550.c
drivers/clk/qcom/dispcc-sm8650.c
drivers/clk/qcom/dispcc-x1e80100.c [new file with mode: 0644]
drivers/clk/qcom/gcc-ipq5018.c
drivers/clk/qcom/gcc-ipq6018.c
drivers/clk/qcom/gcc-ipq8074.c
drivers/clk/qcom/gcc-ipq9574.c
drivers/clk/qcom/gcc-msm8953.c
drivers/clk/qcom/gcc-sa8775p.c
drivers/clk/qcom/gcc-sc7180.c
drivers/clk/qcom/gcc-sc7280.c
drivers/clk/qcom/gcc-sc8180x.c
drivers/clk/qcom/gcc-sc8280xp.c
drivers/clk/qcom/gcc-sdm845.c
drivers/clk/qcom/gcc-sdx55.c
drivers/clk/qcom/gcc-sdx65.c
drivers/clk/qcom/gcc-sdx75.c
drivers/clk/qcom/gcc-sm4450.c
drivers/clk/qcom/gcc-sm6375.c
drivers/clk/qcom/gcc-sm7150.c
drivers/clk/qcom/gcc-sm8150.c
drivers/clk/qcom/gcc-sm8250.c
drivers/clk/qcom/gcc-sm8350.c
drivers/clk/qcom/gcc-sm8450.c
drivers/clk/qcom/gcc-sm8550.c
drivers/clk/qcom/gcc-sm8650.c
drivers/clk/qcom/gcc-x1e80100.c
drivers/clk/qcom/gdsc.c
drivers/clk/qcom/gpucc-sa8775p.c
drivers/clk/qcom/gpucc-sc7180.c
drivers/clk/qcom/gpucc-sc7280.c
drivers/clk/qcom/gpucc-sc8280xp.c
drivers/clk/qcom/gpucc-sdm845.c
drivers/clk/qcom/gpucc-sm8150.c
drivers/clk/qcom/gpucc-sm8250.c
drivers/clk/qcom/gpucc-sm8350.c
drivers/clk/qcom/gpucc-sm8550.c
drivers/clk/qcom/gpucc-x1e80100.c [new file with mode: 0644]
drivers/clk/qcom/lpasscorecc-sc7180.c
drivers/clk/qcom/mmcc-apq8084.c
drivers/clk/qcom/mmcc-msm8974.c
drivers/clk/qcom/mss-sc7180.c [deleted file]
drivers/clk/qcom/reset.c
drivers/clk/qcom/reset.h
drivers/clk/qcom/tcsrcc-x1e80100.c [new file with mode: 0644]
drivers/clk/qcom/videocc-sc7180.c
drivers/clk/qcom/videocc-sc7280.c
drivers/clk/qcom/videocc-sdm845.c
drivers/clk/qcom/videocc-sm8150.c
drivers/clk/qcom/videocc-sm8250.c
drivers/clk/qcom/videocc-sm8350.c
drivers/clk/qcom/videocc-sm8450.c
drivers/clk/qcom/videocc-sm8550.c
drivers/clk/renesas/Kconfig
drivers/clk/renesas/Makefile
drivers/clk/renesas/clk-mstp.c
drivers/clk/renesas/r8a779f0-cpg-mssr.c
drivers/clk/renesas/r8a779g0-cpg-mssr.c
drivers/clk/renesas/r8a779h0-cpg-mssr.c [new file with mode: 0644]
drivers/clk/renesas/r9a07g043-cpg.c
drivers/clk/renesas/r9a07g044-cpg.c
drivers/clk/renesas/r9a08g045-cpg.c
drivers/clk/renesas/rcar-gen4-cpg.c
drivers/clk/renesas/renesas-cpg-mssr.c
drivers/clk/renesas/renesas-cpg-mssr.h
drivers/clk/rockchip/clk-rk3399.c
drivers/clk/rockchip/clk-rk3568.c
drivers/clk/rockchip/clk-rk3588.c
drivers/clk/samsung/clk-cpu.c
drivers/clk/samsung/clk-cpu.h
drivers/clk/samsung/clk-exynos3250.c
drivers/clk/samsung/clk-exynos4.c
drivers/clk/samsung/clk-exynos5250.c
drivers/clk/samsung/clk-exynos5420.c
drivers/clk/samsung/clk-exynos5433.c
drivers/clk/samsung/clk-exynos850.c
drivers/clk/samsung/clk-gs101.c
drivers/clk/samsung/clk.h
drivers/clk/starfive/clk-starfive-jh7110-isp.c
drivers/clk/starfive/clk-starfive-jh7110-vout.c
drivers/clk/sunxi/clk-a20-gmac.c
drivers/clk/sunxi/clk-sun9i-cpus.c
drivers/clk/sunxi/clk-usb.c
drivers/clk/ti/dpll3xxx.c
drivers/clk/xilinx/clk-xlnx-clock-wizard.c
drivers/clk/zynq/clkc.c
drivers/cpuidle/cpuidle.c
drivers/crypto/Kconfig
drivers/crypto/Makefile
drivers/crypto/allwinner/sun8i-ce/sun8i-ce-hash.c
drivers/crypto/ccp/platform-access.c
drivers/crypto/ccp/psp-dev.c
drivers/crypto/hisilicon/debugfs.c
drivers/crypto/hisilicon/hpre/hpre_main.c
drivers/crypto/hisilicon/qm.c
drivers/crypto/hisilicon/sec2/sec_crypto.c
drivers/crypto/hisilicon/sec2/sec_main.c
drivers/crypto/hisilicon/zip/zip_crypto.c
drivers/crypto/hisilicon/zip/zip_main.c
drivers/crypto/intel/iaa/iaa_crypto.h
drivers/crypto/intel/iaa/iaa_crypto_comp_fixed.c
drivers/crypto/intel/iaa/iaa_crypto_main.c
drivers/crypto/intel/iaa/iaa_crypto_stats.c
drivers/crypto/intel/iaa/iaa_crypto_stats.h
drivers/crypto/intel/qat/Kconfig
drivers/crypto/intel/qat/qat_420xx/adf_420xx_hw_data.c
drivers/crypto/intel/qat/qat_4xxx/adf_4xxx_hw_data.c
drivers/crypto/intel/qat/qat_common/Makefile
drivers/crypto/intel/qat/qat_common/adf_accel_devices.h
drivers/crypto/intel/qat/qat_common/adf_aer.c
drivers/crypto/intel/qat/qat_common/adf_cfg_strings.h
drivers/crypto/intel/qat/qat_common/adf_clock.c
drivers/crypto/intel/qat/qat_common/adf_cnv_dbgfs.c
drivers/crypto/intel/qat/qat_common/adf_common_drv.h
drivers/crypto/intel/qat/qat_common/adf_dev_mgr.c
drivers/crypto/intel/qat/qat_common/adf_gen4_hw_data.c
drivers/crypto/intel/qat/qat_common/adf_gen4_hw_data.h
drivers/crypto/intel/qat/qat_common/adf_gen4_ras.c
drivers/crypto/intel/qat/qat_common/adf_heartbeat.c
drivers/crypto/intel/qat/qat_common/adf_heartbeat.h
drivers/crypto/intel/qat/qat_common/adf_heartbeat_dbgfs.c
drivers/crypto/intel/qat/qat_common/adf_heartbeat_inject.c [new file with mode: 0644]
drivers/crypto/intel/qat/qat_common/adf_hw_arbiter.c
drivers/crypto/intel/qat/qat_common/adf_init.c
drivers/crypto/intel/qat/qat_common/adf_isr.c
drivers/crypto/intel/qat/qat_common/adf_pfvf_msg.h
drivers/crypto/intel/qat/qat_common/adf_pfvf_pf_msg.c
drivers/crypto/intel/qat/qat_common/adf_pfvf_pf_msg.h
drivers/crypto/intel/qat/qat_common/adf_pfvf_pf_proto.c
drivers/crypto/intel/qat/qat_common/adf_pfvf_vf_proto.c
drivers/crypto/intel/qat/qat_common/adf_rl.c
drivers/crypto/intel/qat/qat_common/adf_sriov.c
drivers/crypto/intel/qat/qat_common/adf_sysfs.c
drivers/crypto/intel/qat/qat_common/adf_vf_isr.c
drivers/crypto/intel/qat/qat_common/qat_comp_algs.c
drivers/crypto/intel/qat/qat_common/qat_crypto.c
drivers/crypto/n2_core.c
drivers/crypto/rockchip/rk3288_crypto.c
drivers/crypto/virtio/virtio_crypto_akcipher_algs.c
drivers/crypto/virtio/virtio_crypto_core.c
drivers/crypto/vmx/Kconfig [deleted file]
drivers/crypto/vmx/Makefile [deleted file]
drivers/crypto/vmx/ppc-xlate.pl [deleted file]
drivers/crypto/xilinx/zynqmp-aes-gcm.c
drivers/cxl/acpi.c
drivers/cxl/core/cdat.c
drivers/cxl/core/core.h
drivers/cxl/core/pci.c
drivers/cxl/core/port.c
drivers/cxl/core/region.c
drivers/cxl/cxl.h
drivers/cxl/cxlpci.h
drivers/dax/bus.c
drivers/dax/super.c
drivers/dma/Kconfig
drivers/dma/amba-pl08x.c
drivers/dma/bestcomm/sram.c
drivers/dma/fsl-edma-common.c
drivers/dma/fsl-edma-common.h
drivers/dma/fsl-edma-main.c
drivers/dma/idxd/bus.c
drivers/dma/idxd/cdev.c
drivers/dma/idxd/idxd.h
drivers/dma/idxd/sysfs.c
drivers/dma/mcf-edma-main.c
drivers/dma/of-dma.c
drivers/dma/pl330.c
drivers/dma/ti/k3-psil-j721s2.c
drivers/dma/ti/k3-udma-glue.c
drivers/dma/xilinx/xilinx_dma.c
drivers/firewire/core-device.c
drivers/firmware/efi/libstub/Makefile
drivers/firmware/efi/libstub/x86-stub.c
drivers/firmware/qemu_fw_cfg.c
drivers/gpu/drm/amd/amdgpu/sdma_v4_4_2.c
drivers/hwmon/dell-smm-hwmon.c
drivers/hwmon/ultra45_env.c
drivers/i2c/busses/Kconfig
drivers/i2c/busses/i2c-cadence.c
drivers/i2c/busses/i2c-designware-common.c
drivers/i2c/busses/i2c-designware-core.h
drivers/i2c/busses/i2c-designware-master.c
drivers/i2c/busses/i2c-designware-pcidrv.c
drivers/i2c/busses/i2c-designware-platdrv.c
drivers/i2c/busses/i2c-hisi.c
drivers/i2c/busses/i2c-i801.c
drivers/i2c/busses/i2c-imx-lpi2c.c
drivers/i2c/busses/i2c-imx.c
drivers/i2c/busses/i2c-mpc.c
drivers/i2c/busses/i2c-npcm7xx.c
drivers/i2c/busses/i2c-sh_mobile.c
drivers/i2c/busses/i2c-sprd.c
drivers/i2c/i2c-core-base.c
drivers/i2c/i2c-smbus.c
drivers/i2c/muxes/i2c-mux-mlxcpld.c
drivers/i2c/muxes/i2c-mux-pca954x.c
drivers/i3c/internals.h
drivers/i3c/master.c
drivers/i3c/master/dw-i3c-master.c
drivers/input/gameport/gameport.c
drivers/input/input-leds.c
drivers/input/input.c
drivers/input/joystick/xpad.c
drivers/input/keyboard/bcm-keypad.c
drivers/input/keyboard/matrix_keypad.c
drivers/input/misc/88pm80x_onkey.c
drivers/input/misc/iqs7222.c
drivers/input/mouse/Kconfig
drivers/input/mouse/Makefile
drivers/input/mouse/navpoint.c [deleted file]
drivers/input/rmi4/rmi_bus.c
drivers/input/rmi4/rmi_bus.h
drivers/input/rmi4/rmi_driver.c
drivers/input/serio/serio.c
drivers/input/serio/xilinx_ps2.c
drivers/input/touchscreen/Kconfig
drivers/input/touchscreen/Makefile
drivers/input/touchscreen/goodix_berlin.h [new file with mode: 0644]
drivers/input/touchscreen/goodix_berlin_core.c [new file with mode: 0644]
drivers/input/touchscreen/goodix_berlin_i2c.c [new file with mode: 0644]
drivers/input/touchscreen/goodix_berlin_spi.c [new file with mode: 0644]
drivers/input/touchscreen/imagis.c
drivers/input/touchscreen/ti_am335x_tsc.c
drivers/irqchip/irq-riscv-intc.c
drivers/leds/leds-sunfire.c
drivers/macintosh/adb.c
drivers/macintosh/macio_asic.c
drivers/macintosh/rack-meter.c
drivers/macintosh/therm_windtunnel.c
drivers/macintosh/windfarm_pm112.c
drivers/macintosh/windfarm_pm121.c
drivers/macintosh/windfarm_pm72.c
drivers/macintosh/windfarm_pm81.c
drivers/macintosh/windfarm_pm91.c
drivers/macintosh/windfarm_rm31.c
drivers/md/bcache/sysfs.c
drivers/md/dm.c
drivers/media/cec/core/cec-adap.c
drivers/media/cec/core/cec-core.c
drivers/media/cec/platform/cros-ec/cros-ec-cec.c
drivers/media/common/siano/smscoreapi.c
drivers/media/common/siano/smsdvb-main.c
drivers/media/common/v4l2-tpg/v4l2-tpg-core.c
drivers/media/dvb-core/dvb_frontend.c
drivers/media/dvb-core/dvbdev.c
drivers/media/dvb-frontends/bcm3510.c
drivers/media/dvb-frontends/bcm3510_priv.h
drivers/media/dvb-frontends/cx24110.c
drivers/media/dvb-frontends/cx24110.h
drivers/media/dvb-frontends/cx24117.c
drivers/media/dvb-frontends/dvb-pll.c
drivers/media/dvb-frontends/stv0367.c
drivers/media/dvb-frontends/stv6110x_priv.h
drivers/media/dvb-frontends/tda8083.h
drivers/media/dvb-frontends/zl10036.c
drivers/media/dvb-frontends/zl10036.h
drivers/media/i2c/Kconfig
drivers/media/i2c/adv7180.c
drivers/media/i2c/adv7343.c
drivers/media/i2c/adv748x/adv748x.h
drivers/media/i2c/adv7604.c
drivers/media/i2c/alvium-csi2.c
drivers/media/i2c/alvium-csi2.h
drivers/media/i2c/ar0521.c
drivers/media/i2c/ccs/ccs-quirk.h
drivers/media/i2c/dw9714.c
drivers/media/i2c/imx214.c
drivers/media/i2c/imx274.c
drivers/media/i2c/imx290.c
drivers/media/i2c/imx319.c
drivers/media/i2c/imx334.c
drivers/media/i2c/imx335.c
drivers/media/i2c/imx355.c
drivers/media/i2c/imx415.c
drivers/media/i2c/isl7998x.c
drivers/media/i2c/max2175.c
drivers/media/i2c/msp3400-driver.c
drivers/media/i2c/msp3400-driver.h
drivers/media/i2c/mt9p031.c
drivers/media/i2c/mt9v032.c
drivers/media/i2c/ov08x40.c
drivers/media/i2c/ov2659.c
drivers/media/i2c/ov5645.c
drivers/media/i2c/ov5647.c
drivers/media/i2c/s5c73m3/s5c73m3-core.c
drivers/media/i2c/s5k5baf.c
drivers/media/i2c/st-vgxy61.c
drivers/media/i2c/tc358743.c
drivers/media/i2c/tc358746.c
drivers/media/i2c/tda1997x.c
drivers/media/i2c/tvp514x.c
drivers/media/i2c/tvp5150.c
drivers/media/i2c/tvp7002.c
drivers/media/mc/mc-devnode.c
drivers/media/mc/mc-entity.c
drivers/media/pci/bt8xx/bttv-gpio.c
drivers/media/pci/bt8xx/bttvp.h
drivers/media/pci/cx23885/cx23885-video.c
drivers/media/pci/dt3155/dt3155.h
drivers/media/pci/intel/ipu-bridge.c
drivers/media/pci/intel/ipu3/ipu3-cio2.c
drivers/media/pci/intel/ivsc/mei_csi.c
drivers/media/pci/sta2x11/sta2x11_vip.c
drivers/media/pci/ttpci/budget-av.c
drivers/media/platform/amphion/vdec.c
drivers/media/platform/atmel/atmel-isi.c
drivers/media/platform/cadence/cdns-csi2rx.c
drivers/media/platform/chips-media/wave5/wave5-hw.c
drivers/media/platform/chips-media/wave5/wave5-vpu-enc.c
drivers/media/platform/chips-media/wave5/wave5-vpu.c
drivers/media/platform/intel/pxa_camera.c
drivers/media/platform/marvell/Kconfig
drivers/media/platform/mediatek/jpeg/mtk_jpeg_core.h
drivers/media/platform/mediatek/mdp/mtk_mdp_vpu.c
drivers/media/platform/mediatek/mdp3/mdp_cfg_data.c
drivers/media/platform/mediatek/mdp3/mdp_reg_aal.h [new file with mode: 0644]
drivers/media/platform/mediatek/mdp3/mdp_reg_color.h [new file with mode: 0644]
drivers/media/platform/mediatek/mdp3/mdp_reg_fg.h [new file with mode: 0644]
drivers/media/platform/mediatek/mdp3/mdp_reg_hdr.h [new file with mode: 0644]
drivers/media/platform/mediatek/mdp3/mdp_reg_merge.h [new file with mode: 0644]
drivers/media/platform/mediatek/mdp3/mdp_reg_ovl.h [new file with mode: 0644]
drivers/media/platform/mediatek/mdp3/mdp_reg_pad.h [new file with mode: 0644]
drivers/media/platform/mediatek/mdp3/mdp_reg_rdma.h
drivers/media/platform/mediatek/mdp3/mdp_reg_rsz.h
drivers/media/platform/mediatek/mdp3/mdp_reg_tdshp.h [new file with mode: 0644]
drivers/media/platform/mediatek/mdp3/mdp_reg_wrot.h
drivers/media/platform/mediatek/mdp3/mdp_sm_mt8195.h [new file with mode: 0644]
drivers/media/platform/mediatek/mdp3/mtk-img-ipi.h
drivers/media/platform/mediatek/mdp3/mtk-mdp3-cfg.h
drivers/media/platform/mediatek/mdp3/mtk-mdp3-cmdq.c
drivers/media/platform/mediatek/mdp3/mtk-mdp3-cmdq.h
drivers/media/platform/mediatek/mdp3/mtk-mdp3-comp.c
drivers/media/platform/mediatek/mdp3/mtk-mdp3-comp.h
drivers/media/platform/mediatek/mdp3/mtk-mdp3-core.c
drivers/media/platform/mediatek/mdp3/mtk-mdp3-core.h
drivers/media/platform/mediatek/mdp3/mtk-mdp3-m2m.c
drivers/media/platform/mediatek/mdp3/mtk-mdp3-regs.c
drivers/media/platform/mediatek/mdp3/mtk-mdp3-regs.h
drivers/media/platform/mediatek/mdp3/mtk-mdp3-vpu.c
drivers/media/platform/mediatek/vcodec/common/mtk_vcodec_fw_vpu.c
drivers/media/platform/mediatek/vcodec/decoder/mtk_vcodec_dec.h
drivers/media/platform/mediatek/vcodec/decoder/mtk_vcodec_dec_stateless.c
drivers/media/platform/mediatek/vcodec/decoder/vdec/vdec_vp8_req_if.c
drivers/media/platform/mediatek/vcodec/decoder/vdec/vdec_vp9_req_lat_if.c
drivers/media/platform/mediatek/vcodec/decoder/vdec_vpu_if.h
drivers/media/platform/mediatek/vcodec/encoder/mtk_vcodec_enc.h
drivers/media/platform/mediatek/vpu/mtk_vpu.c
drivers/media/platform/mediatek/vpu/mtk_vpu.h
drivers/media/platform/nuvoton/npcm-video.c
drivers/media/platform/nxp/imx-jpeg/mxc-jpeg.c
drivers/media/platform/nxp/imx-jpeg/mxc-jpeg.h
drivers/media/platform/nxp/imx8-isi/imx8-isi-core.c
drivers/media/platform/nxp/imx8-isi/imx8-isi-crossbar.c
drivers/media/platform/nxp/imx8-isi/imx8-isi-hw.c
drivers/media/platform/qcom/venus/core.h
drivers/media/platform/renesas/Kconfig
drivers/media/platform/renesas/Makefile
drivers/media/platform/renesas/rcar-csi2.c [moved from drivers/media/platform/renesas/rcar-vin/rcar-csi2.c with 100% similarity]
drivers/media/platform/renesas/rcar-isp.c
drivers/media/platform/renesas/rcar-vin/Kconfig
drivers/media/platform/renesas/rcar-vin/Makefile
drivers/media/platform/renesas/rzg2l-cru/rzg2l-cru.h
drivers/media/platform/renesas/rzg2l-cru/rzg2l-csi2.c
drivers/media/platform/renesas/rzg2l-cru/rzg2l-ip.c
drivers/media/platform/renesas/rzg2l-cru/rzg2l-video.c
drivers/media/platform/rockchip/rkisp1/rkisp1-capture.c
drivers/media/platform/rockchip/rkisp1/rkisp1-common.h
drivers/media/platform/rockchip/rkisp1/rkisp1-dev.c
drivers/media/platform/rockchip/rkisp1/rkisp1-isp.c
drivers/media/platform/rockchip/rkisp1/rkisp1-regs.h
drivers/media/platform/rockchip/rkisp1/rkisp1-resizer.c
drivers/media/platform/samsung/exynos4-is/fimc-capture.c
drivers/media/platform/samsung/exynos4-is/fimc-core.c
drivers/media/platform/samsung/exynos4-is/fimc-core.h
drivers/media/platform/samsung/exynos4-is/fimc-is.c
drivers/media/platform/samsung/exynos4-is/fimc-isp-video.c
drivers/media/platform/samsung/exynos4-is/fimc-lite-reg.c
drivers/media/platform/samsung/exynos4-is/fimc-lite-reg.h
drivers/media/platform/samsung/exynos4-is/fimc-lite.c
drivers/media/platform/samsung/exynos4-is/fimc-lite.h
drivers/media/platform/samsung/exynos4-is/fimc-m2m.c
drivers/media/platform/samsung/exynos4-is/fimc-reg.c
drivers/media/platform/samsung/exynos4-is/fimc-reg.h
drivers/media/platform/samsung/exynos4-is/mipi-csis.c
drivers/media/platform/samsung/s5p-mfc/s5p_mfc.c
drivers/media/platform/samsung/s5p-mfc/s5p_mfc_cmd.c
drivers/media/platform/samsung/s5p-mfc/s5p_mfc_cmd.h
drivers/media/platform/samsung/s5p-mfc/s5p_mfc_cmd_v5.c
drivers/media/platform/samsung/s5p-mfc/s5p_mfc_cmd_v5.h
drivers/media/platform/samsung/s5p-mfc/s5p_mfc_cmd_v6.c
drivers/media/platform/samsung/s5p-mfc/s5p_mfc_cmd_v6.h
drivers/media/platform/samsung/s5p-mfc/s5p_mfc_common.h
drivers/media/platform/samsung/s5p-mfc/s5p_mfc_ctrl.c
drivers/media/platform/samsung/s5p-mfc/s5p_mfc_dec.c
drivers/media/platform/samsung/s5p-mfc/s5p_mfc_dec.h
drivers/media/platform/samsung/s5p-mfc/s5p_mfc_enc.c
drivers/media/platform/samsung/s5p-mfc/s5p_mfc_enc.h
drivers/media/platform/samsung/s5p-mfc/s5p_mfc_opr.c
drivers/media/platform/samsung/s5p-mfc/s5p_mfc_opr_v5.c
drivers/media/platform/samsung/s5p-mfc/s5p_mfc_opr_v5.h
drivers/media/platform/samsung/s5p-mfc/s5p_mfc_opr_v6.c
drivers/media/platform/samsung/s5p-mfc/s5p_mfc_opr_v6.h
drivers/media/platform/samsung/s5p-mfc/s5p_mfc_pm.c
drivers/media/platform/samsung/s5p-mfc/s5p_mfc_pm.h
drivers/media/platform/st/stm32/stm32-dcmi.c
drivers/media/platform/st/stm32/stm32-dcmipp/dcmipp-core.c
drivers/media/platform/sunxi/sun8i-di/sun8i-di.c
drivers/media/platform/ti/davinci/vpif.c
drivers/media/platform/ti/j721e-csi2rx/j721e-csi2rx.c
drivers/media/platform/verisilicon/Kconfig
drivers/media/platform/verisilicon/Makefile
drivers/media/platform/verisilicon/hantro.h
drivers/media/platform/verisilicon/hantro_drv.c
drivers/media/platform/verisilicon/hantro_g1_h264_dec.c
drivers/media/platform/verisilicon/hantro_hw.h
drivers/media/platform/verisilicon/rockchip_vpu2_hw_h264_dec.c
drivers/media/platform/verisilicon/rockchip_vpu981_regs.h
drivers/media/platform/verisilicon/stm32mp25_vpu_hw.c [new file with mode: 0644]
drivers/media/platform/xilinx/Kconfig
drivers/media/test-drivers/vicodec/codec-fwht.c
drivers/media/test-drivers/vidtv/vidtv_bridge.c
drivers/media/test-drivers/visl/visl-core.c
drivers/media/test-drivers/visl/visl-dec.c
drivers/media/test-drivers/visl/visl.h
drivers/media/tuners/tda18271-fe.c
drivers/media/tuners/xc4000.c
drivers/media/usb/cx231xx/cx231xx-417.c
drivers/media/usb/dvb-usb/dvb-usb.h
drivers/media/usb/em28xx/em28xx-cards.c
drivers/media/usb/go7007/go7007-driver.c
drivers/media/usb/go7007/go7007-usb.c
drivers/media/usb/pvrusb2/pvrusb2-context.c
drivers/media/usb/pvrusb2/pvrusb2-dvb.c
drivers/media/usb/pvrusb2/pvrusb2-v4l2.c
drivers/media/usb/s2255/s2255drv.c
drivers/media/usb/siano/smsusb.c
drivers/media/usb/usbtv/usbtv-video.c
drivers/media/v4l2-core/v4l2-cci.c
drivers/media/v4l2-core/v4l2-common.c
drivers/media/v4l2-core/v4l2-ctrls-api.c
drivers/media/v4l2-core/v4l2-ctrls-core.c
drivers/media/v4l2-core/v4l2-ioctl.c
drivers/media/v4l2-core/v4l2-mc.c
drivers/media/v4l2-core/v4l2-mem2mem.c
drivers/message/fusion/mptfc.c
drivers/misc/sgi-gru/grufault.c
drivers/mtd/chips/cfi_cmdset_0002.c
drivers/mtd/maps/Kconfig
drivers/mtd/maps/Makefile
drivers/mtd/maps/intel_vr_nor.c [deleted file]
drivers/mtd/maps/physmap-core.c
drivers/mtd/maps/sun_uflash.c
drivers/mtd/nand/raw/atmel/nand-controller.c
drivers/mtd/nand/raw/brcmnand/Makefile
drivers/mtd/nand/raw/brcmnand/bcm63138_nand.c [deleted file]
drivers/mtd/nand/raw/brcmnand/bcmbca_nand.c [new file with mode: 0644]
drivers/mtd/nand/raw/brcmnand/brcmnand.c
drivers/mtd/nand/raw/brcmnand/brcmnand.h
drivers/mtd/nand/raw/fsl_elbc_nand.c
drivers/mtd/nand/raw/lpc32xx_mlc.c
drivers/mtd/nand/raw/meson_nand.c
drivers/mtd/nand/raw/mtk_nand.c
drivers/mtd/nand/raw/nand_base.c
drivers/mtd/nand/raw/nand_bbt.c
drivers/mtd/nand/raw/nand_hynix.c
drivers/mtd/nand/raw/stm32_fmc2_nand.c
drivers/mtd/nand/spi/esmt.c
drivers/mtd/nand/spi/winbond.c
drivers/mtd/spi-nor/core.c
drivers/mtd/spi-nor/core.h
drivers/mtd/spi-nor/debugfs.c
drivers/mtd/spi-nor/sfdp.c
drivers/mtd/ssfdc.c
drivers/net/ethernet/apple/bmac.c
drivers/net/ethernet/apple/mace.c
drivers/net/ethernet/broadcom/tg3.c
drivers/net/ethernet/sun/cassini.c
drivers/net/ethernet/sun/niu.c
drivers/net/ethernet/sun/sunhme.c
drivers/net/ethernet/sun/sunvnet.c
drivers/net/ethernet/sun/sunvnet_common.c
drivers/net/ppp/pptp.c
drivers/nvdimm/Kconfig
drivers/nvdimm/bus.c
drivers/nvdimm/pmem.c
drivers/of/.kunitconfig [new file with mode: 0644]
drivers/of/Kconfig
drivers/of/Makefile
drivers/of/base.c
drivers/of/empty_root.dts [new file with mode: 0644]
drivers/of/fdt.c
drivers/of/kexec.c
drivers/of/of_private.h
drivers/of/of_reserved_mem.c
drivers/of/of_test.c [new file with mode: 0644]
drivers/of/platform.c
drivers/of/property.c
drivers/of/unittest.c
drivers/parisc/led.c
drivers/pcmcia/cs.c
drivers/pcmcia/cs_internal.h
drivers/pcmcia/ds.c
drivers/phy/allwinner/phy-sun4i-usb.c
drivers/phy/amlogic/phy-meson-g12a-usb3-pcie.c
drivers/phy/broadcom/phy-bcm-sr-pcie.c
drivers/phy/broadcom/phy-bcm-sr-usb.c
drivers/phy/broadcom/phy-bcm63xx-usbh.c
drivers/phy/broadcom/phy-brcm-usb.c
drivers/phy/cadence/phy-cadence-torrent.c
drivers/phy/freescale/phy-fsl-imx8qm-lvds-phy.c
drivers/phy/freescale/phy-fsl-lynx-28g.c
drivers/phy/hisilicon/phy-histb-combphy.c
drivers/phy/intel/phy-intel-lgm-combo.c
drivers/phy/lantiq/phy-lantiq-vrx200-pcie.c
drivers/phy/marvell/phy-armada375-usb2.c
drivers/phy/marvell/phy-armada38x-comphy.c
drivers/phy/marvell/phy-berlin-sata.c
drivers/phy/marvell/phy-mvebu-a3700-comphy.c
drivers/phy/marvell/phy-mvebu-cp110-comphy.c
drivers/phy/mediatek/Kconfig
drivers/phy/mediatek/Makefile
drivers/phy/mediatek/phy-mtk-mipi-csi-0-5-rx-reg.h [new file with mode: 0644]
drivers/phy/mediatek/phy-mtk-mipi-csi-0-5.c [new file with mode: 0644]
drivers/phy/mediatek/phy-mtk-tphy.c
drivers/phy/mediatek/phy-mtk-xsphy.c
drivers/phy/microchip/lan966x_serdes.c
drivers/phy/microchip/sparx5_serdes.c
drivers/phy/mscc/phy-ocelot-serdes.c
drivers/phy/phy-core.c
drivers/phy/phy-xgene.c
drivers/phy/qualcomm/Makefile
drivers/phy/qualcomm/phy-qcom-edp.c
drivers/phy/qualcomm/phy-qcom-qmp-combo.c
drivers/phy/qualcomm/phy-qcom-qmp-common.h [new file with mode: 0644]
drivers/phy/qualcomm/phy-qcom-qmp-dp-com-v3.h [new file with mode: 0644]
drivers/phy/qualcomm/phy-qcom-qmp-dp-phy-v3.h [new file with mode: 0644]
drivers/phy/qualcomm/phy-qcom-qmp-dp-phy-v4.h [new file with mode: 0644]
drivers/phy/qualcomm/phy-qcom-qmp-dp-phy-v5.h [new file with mode: 0644]
drivers/phy/qualcomm/phy-qcom-qmp-dp-phy-v6.h [new file with mode: 0644]
drivers/phy/qualcomm/phy-qcom-qmp-dp-phy.h [new file with mode: 0644]
drivers/phy/qualcomm/phy-qcom-qmp-pcie-msm8996.c
drivers/phy/qualcomm/phy-qcom-qmp-pcie.c
drivers/phy/qualcomm/phy-qcom-qmp-pcs-pcie-v6.h
drivers/phy/qualcomm/phy-qcom-qmp-pcs-pcie-v6_20.h
drivers/phy/qualcomm/phy-qcom-qmp-pcs-sgmii.h [new file with mode: 0644]
drivers/phy/qualcomm/phy-qcom-qmp-pcs-ufs-v6.h
drivers/phy/qualcomm/phy-qcom-qmp-pcs-v6_20.h
drivers/phy/qualcomm/phy-qcom-qmp-qserdes-com-v6.h
drivers/phy/qualcomm/phy-qcom-qmp-qserdes-txrx-ufs-v6.h
drivers/phy/qualcomm/phy-qcom-qmp-qserdes-txrx-v6_20.h
drivers/phy/qualcomm/phy-qcom-qmp-ufs.c
drivers/phy/qualcomm/phy-qcom-qmp-usb-legacy.c
drivers/phy/qualcomm/phy-qcom-qmp-usb.c
drivers/phy/qualcomm/phy-qcom-qmp-usbc.c [new file with mode: 0644]
drivers/phy/qualcomm/phy-qcom-qmp.h
drivers/phy/qualcomm/phy-qcom-sgmii-eth.c
drivers/phy/ralink/phy-mt7621-pci.c
drivers/phy/renesas/phy-rcar-gen2.c
drivers/phy/renesas/phy-rcar-gen3-usb2.c
drivers/phy/renesas/r8a779f0-ether-serdes.c
drivers/phy/rockchip/Kconfig
drivers/phy/rockchip/Makefile
drivers/phy/rockchip/phy-rockchip-naneng-combphy.c
drivers/phy/rockchip/phy-rockchip-pcie.c
drivers/phy/rockchip/phy-rockchip-samsung-hdptx.c [new file with mode: 0644]
drivers/phy/samsung/phy-exynos-mipi-video.c
drivers/phy/samsung/phy-exynos5-usbdrd.c
drivers/phy/samsung/phy-samsung-usb2.c
drivers/phy/socionext/phy-uniphier-usb2.c
drivers/phy/st/phy-miphy28lp.c
drivers/phy/st/phy-spear1310-miphy.c
drivers/phy/st/phy-spear1340-miphy.c
drivers/phy/st/phy-stm32-usbphyc.c
drivers/phy/tegra/xusb.c
drivers/phy/ti/phy-am654-serdes.c
drivers/phy/ti/phy-da8xx-usb.c
drivers/phy/ti/phy-gmii-sel.c
drivers/phy/ti/phy-tusb1210.c
drivers/phy/xilinx/phy-zynqmp.c
drivers/pinctrl/tegra/pinctrl-tegra-xusb.c
drivers/platform/x86/compal-laptop.c
drivers/platform/x86/intel/oaktrail.c
drivers/platform/x86/mlx-platform.c
drivers/regulator/Kconfig
drivers/s390/block/dcssblk.c
drivers/s390/char/sclp_cmd.c
drivers/s390/net/fsm.c
drivers/sbus/char/bbc_i2c.c
drivers/sbus/char/bbc_i2c.h
drivers/sbus/char/display7seg.c
drivers/sbus/char/envctrl.c
drivers/sbus/char/flash.c
drivers/sbus/char/openprom.c
drivers/sbus/char/uctrl.c
drivers/scsi/3w-9xxx.c
drivers/scsi/3w-sas.c
drivers/scsi/3w-xxxx.c
drivers/scsi/53c700.c
drivers/scsi/Kconfig
drivers/scsi/aacraid/aachba.c
drivers/scsi/bfa/bfa.h
drivers/scsi/bfa/bfa_core.c
drivers/scsi/bfa/bfa_cs.h
drivers/scsi/bfa/bfa_fcpim.c
drivers/scsi/bfa/bfa_fcpim.h
drivers/scsi/bfa/bfa_fcs.h
drivers/scsi/bfa/bfa_fcs_fcpim.c
drivers/scsi/bfa/bfa_fcs_lport.c
drivers/scsi/bfa/bfa_fcs_rport.c
drivers/scsi/bfa/bfa_ioc.c
drivers/scsi/bfa/bfa_ioc.h
drivers/scsi/bfa/bfa_svc.c
drivers/scsi/bfa/bfa_svc.h
drivers/scsi/bfa/bfad_bsg.c
drivers/scsi/bfa/bfad_drv.h
drivers/scsi/ch.c
drivers/scsi/csiostor/csio_defs.h
drivers/scsi/csiostor/csio_lnode.c
drivers/scsi/csiostor/csio_lnode.h
drivers/scsi/device_handler/scsi_dh_hp_sw.c
drivers/scsi/device_handler/scsi_dh_rdac.c
drivers/scsi/esp_scsi.c
drivers/scsi/fcoe/fcoe_sysfs.c
drivers/scsi/fnic/fnic_attrs.c
drivers/scsi/fnic/fnic_scsi.c
drivers/scsi/hisi_sas/hisi_sas_main.c
drivers/scsi/hisi_sas/hisi_sas_v3_hw.c
drivers/scsi/hosts.c
drivers/scsi/ibmvscsi/ibmvfc.c
drivers/scsi/ibmvscsi_tgt/ibmvscsi_tgt.c
drivers/scsi/isci/init.c
drivers/scsi/jazz_esp.c
drivers/scsi/libfc/fc_encode.h
drivers/scsi/lpfc/lpfc.h
drivers/scsi/lpfc/lpfc_attr.c
drivers/scsi/lpfc/lpfc_bsg.c
drivers/scsi/lpfc/lpfc_ct.c
drivers/scsi/lpfc/lpfc_debugfs.c
drivers/scsi/lpfc/lpfc_els.c
drivers/scsi/lpfc/lpfc_hbadisc.c
drivers/scsi/lpfc/lpfc_hw4.h
drivers/scsi/lpfc/lpfc_init.c
drivers/scsi/lpfc/lpfc_mbox.c
drivers/scsi/lpfc/lpfc_nportdisc.c
drivers/scsi/lpfc/lpfc_nvme.c
drivers/scsi/lpfc/lpfc_nvmet.c
drivers/scsi/lpfc/lpfc_scsi.c
drivers/scsi/lpfc/lpfc_sli.c
drivers/scsi/lpfc/lpfc_version.h
drivers/scsi/lpfc/lpfc_vport.c
drivers/scsi/mac53c94.c
drivers/scsi/megaraid.c
drivers/scsi/mesh.c
drivers/scsi/mpi3mr/mpi3mr_os.c
drivers/scsi/mpt3sas/mpt3sas_base.c
drivers/scsi/mpt3sas/mpt3sas_base.h
drivers/scsi/mpt3sas/mpt3sas_ctl.c
drivers/scsi/mpt3sas/mpt3sas_ctl.h
drivers/scsi/mpt3sas/mpt3sas_scsih.c
drivers/scsi/pm8001/pm8001_ctl.c
drivers/scsi/qla1280.c
drivers/scsi/qlogicpti.c
drivers/scsi/scsi_debug.c
drivers/scsi/scsi_devinfo.c
drivers/scsi/scsi_lib.c
drivers/scsi/scsi_lib_test.c [new file with mode: 0644]
drivers/scsi/scsi_priv.h
drivers/scsi/scsi_scan.c
drivers/scsi/scsi_sysfs.c
drivers/scsi/scsi_transport_iscsi.c
drivers/scsi/scsi_transport_spi.c
drivers/scsi/sd.c
drivers/scsi/ses.c
drivers/scsi/sr.c
drivers/scsi/sun3x_esp.c
drivers/scsi/sun_esp.c
drivers/soc/fsl/qbman/bman_ccsr.c
drivers/soc/fsl/qbman/dpaa_sys.c
drivers/soc/fsl/qbman/dpaa_sys.h
drivers/soc/fsl/qbman/qman_ccsr.c
drivers/soundwire/bus_type.c
drivers/soundwire/intel_auxdevice.c
drivers/soundwire/master.c
drivers/soundwire/slave.c
drivers/soundwire/stream.c
drivers/staging/media/atomisp/TODO
drivers/staging/media/atomisp/i2c/gc2235.h
drivers/staging/media/atomisp/pci/atomisp_cmd.c
drivers/staging/media/atomisp/pci/atomisp_compat_css20.c
drivers/staging/media/atomisp/pci/atomisp_drvfs.c
drivers/staging/media/atomisp/pci/atomisp_drvfs.h
drivers/staging/media/atomisp/pci/atomisp_internal.h
drivers/staging/media/atomisp/pci/atomisp_ioctl.c
drivers/staging/media/atomisp/pci/atomisp_v4l2.c
drivers/staging/media/atomisp/pci/base/circbuf/interface/ia_css_circbuf.h
drivers/staging/media/atomisp/pci/base/circbuf/src/circbuf.c
drivers/staging/media/atomisp/pci/ia_css_acc_types.h
drivers/staging/media/atomisp/pci/ia_css_control.h
drivers/staging/media/atomisp/pci/ia_css_firmware.h
drivers/staging/media/atomisp/pci/ia_css_irq.h
drivers/staging/media/atomisp/pci/isp/kernels/hdr/ia_css_hdr_types.h
drivers/staging/media/atomisp/pci/isp/kernels/macc/macc_1.0/ia_css_macc_table.host.c
drivers/staging/media/atomisp/pci/isp2400_input_system_global.h
drivers/staging/media/atomisp/pci/isp2400_input_system_public.h
drivers/staging/media/atomisp/pci/runtime/binary/src/binary.c
drivers/staging/media/atomisp/pci/runtime/pipeline/src/pipeline.c
drivers/staging/media/atomisp/pci/runtime/queue/src/queue.c
drivers/staging/media/atomisp/pci/runtime/rmgr/src/rmgr_vbuf.c
drivers/staging/media/atomisp/pci/sh_css.c
drivers/staging/media/atomisp/pci/sh_css_defs.h
drivers/staging/media/atomisp/pci/sh_css_mipi.c
drivers/staging/media/imx/imx-media-csc-scaler.c
drivers/staging/media/imx/imx-media-fim.c
drivers/staging/media/ipu3/include/uapi/intel-ipu3.h
drivers/staging/media/ipu3/ipu3-v4l2.c
drivers/staging/media/meson/vdec/vdec.h
drivers/staging/media/starfive/camss/stf-capture.c
drivers/staging/media/sunxi/cedrus/cedrus_h265.c
drivers/target/loopback/tcm_loop.c
drivers/tc/tc-driver.c
drivers/tty/mips_ejtag_fdc.c
drivers/tty/serial/pmac_zilog.c
drivers/ufs/core/ufs-mcq.c
drivers/ufs/core/ufs-sysfs.c
drivers/ufs/core/ufshcd.c
drivers/ufs/host/ufs-mediatek.c
drivers/ufs/host/ufs-mediatek.h
drivers/ufs/host/ufs-qcom.c
drivers/vfio/fsl-mc/vfio_fsl_mc_intr.c
drivers/vfio/mdev/mdev_driver.c
drivers/vfio/mdev/mdev_private.h
drivers/vfio/pci/Kconfig
drivers/vfio/pci/Makefile
drivers/vfio/pci/hisilicon/hisi_acc_vfio_pci.c
drivers/vfio/pci/hisilicon/hisi_acc_vfio_pci.h
drivers/vfio/pci/mlx5/cmd.c
drivers/vfio/pci/mlx5/cmd.h
drivers/vfio/pci/mlx5/main.c
drivers/vfio/pci/nvgrace-gpu/Kconfig [new file with mode: 0644]
drivers/vfio/pci/nvgrace-gpu/Makefile [new file with mode: 0644]
drivers/vfio/pci/nvgrace-gpu/main.c [new file with mode: 0644]
drivers/vfio/pci/pds/dirty.c
drivers/vfio/pci/pds/lm.c
drivers/vfio/pci/pds/lm.h
drivers/vfio/pci/pds/pci_drv.c
drivers/vfio/pci/pds/vfio_dev.c
drivers/vfio/pci/pds/vfio_dev.h
drivers/vfio/pci/vfio_pci_config.c
drivers/vfio/pci/vfio_pci_core.c
drivers/vfio/pci/vfio_pci_intrs.c
drivers/vfio/pci/vfio_pci_rdwr.c
drivers/vfio/pci/virtio/main.c
drivers/vfio/platform/vfio_amba.c
drivers/vfio/platform/vfio_platform.c
drivers/vfio/platform/vfio_platform_irq.c
drivers/vfio/vfio.h
drivers/vfio/vfio_iommu_type1.c
drivers/vfio/vfio_main.c
drivers/vfio/virqfd.c
drivers/video/fbdev/hgafb.c
drivers/watchdog/hpwdt.c
drivers/watchdog/intel-mid_wdt.c
drivers/watchdog/it87_wdt.c
drivers/watchdog/qcom-wdt.c
drivers/watchdog/sp805_wdt.c
drivers/watchdog/starfive-wdt.c
drivers/watchdog/stm32_iwdg.c
drivers/watchdog/watchdog_core.c
fs/9p/v9fs.h
fs/9p/v9fs_vfs.h
fs/9p/vfs_dir.c
fs/9p/vfs_inode.c
fs/9p/vfs_inode_dotl.c
fs/9p/vfs_super.c
fs/Kconfig
fs/afs/dir.c
fs/afs/rotate.c
fs/afs/validation.c
fs/bcachefs/Makefile
fs/bcachefs/alloc_background.c
fs/bcachefs/alloc_background.h
fs/bcachefs/alloc_foreground.c
fs/bcachefs/backpointers.c
fs/bcachefs/bbpos_types.h
fs/bcachefs/bcachefs.h
fs/bcachefs/bcachefs_format.h
fs/bcachefs/bkey.h
fs/bcachefs/bkey_types.h [new file with mode: 0644]
fs/bcachefs/btree_cache.c
fs/bcachefs/btree_gc.c
fs/bcachefs/btree_io.c
fs/bcachefs/btree_iter.c
fs/bcachefs/btree_journal_iter.c
fs/bcachefs/btree_journal_iter.h
fs/bcachefs/btree_key_cache.c
fs/bcachefs/btree_locking.c
fs/bcachefs/btree_types.h
fs/bcachefs/btree_update.c
fs/bcachefs/btree_update.h
fs/bcachefs/btree_update_interior.c
fs/bcachefs/btree_update_interior.h
fs/bcachefs/btree_write_buffer.c
fs/bcachefs/buckets.c
fs/bcachefs/chardev.c
fs/bcachefs/checksum.c
fs/bcachefs/compress.c
fs/bcachefs/debug.c
fs/bcachefs/dirent.c
fs/bcachefs/dirent.h
fs/bcachefs/ec.c
fs/bcachefs/errcode.c
fs/bcachefs/errcode.h
fs/bcachefs/error.c
fs/bcachefs/error.h
fs/bcachefs/extents.h
fs/bcachefs/fifo.h
fs/bcachefs/fs-common.c
fs/bcachefs/fs-io-buffered.c
fs/bcachefs/fs-io-pagecache.h
fs/bcachefs/fs.c
fs/bcachefs/fsck.c
fs/bcachefs/fsck.h
fs/bcachefs/inode.c
fs/bcachefs/inode.h
fs/bcachefs/io_read.c
fs/bcachefs/io_write.c
fs/bcachefs/journal.c
fs/bcachefs/journal.h
fs/bcachefs/journal_io.c
fs/bcachefs/journal_io.h
fs/bcachefs/journal_reclaim.c
fs/bcachefs/journal_seq_blacklist.c
fs/bcachefs/journal_types.h
fs/bcachefs/lru.c
fs/bcachefs/mean_and_variance.c
fs/bcachefs/mean_and_variance.h
fs/bcachefs/mean_and_variance_test.c
fs/bcachefs/migrate.c
fs/bcachefs/opts.c
fs/bcachefs/opts.h
fs/bcachefs/rebalance.c
fs/bcachefs/recovery.c
fs/bcachefs/recovery_types.h
fs/bcachefs/sb-clean.c
fs/bcachefs/sb-downgrade.c
fs/bcachefs/sb-errors_types.h
fs/bcachefs/str_hash.h
fs/bcachefs/subvolume.c
fs/bcachefs/subvolume.h
fs/bcachefs/subvolume_format.h
fs/bcachefs/super-io.c
fs/bcachefs/super.c
fs/bcachefs/sysfs.c
fs/bcachefs/thread_with_file.c
fs/bcachefs/thread_with_file.h
fs/bcachefs/thread_with_file_types.h
fs/bcachefs/time_stats.c [new file with mode: 0644]
fs/bcachefs/time_stats.h [new file with mode: 0644]
fs/bcachefs/trace.h
fs/bcachefs/util.c
fs/bcachefs/util.h
fs/bcachefs/xattr.c
fs/ext4/extents.c
fs/ext4/inode.c
fs/ext4/mballoc-test.c
fs/ext4/mballoc.c
fs/ext4/resize.c
fs/ext4/super.c
fs/ext4/xattr.c
fs/f2fs/checkpoint.c
fs/f2fs/compress.c
fs/f2fs/data.c
fs/f2fs/debug.c
fs/f2fs/dir.c
fs/f2fs/extent_cache.c
fs/f2fs/f2fs.h
fs/f2fs/file.c
fs/f2fs/gc.c
fs/f2fs/gc.h
fs/f2fs/namei.c
fs/f2fs/node.c
fs/f2fs/node.h
fs/f2fs/recovery.c
fs/f2fs/segment.c
fs/f2fs/segment.h
fs/f2fs/super.c
fs/f2fs/sysfs.c
fs/f2fs/verity.c
fs/fat/nfs.c
fs/fuse/Kconfig
fs/fuse/Makefile
fs/fuse/control.c
fs/fuse/dev.c
fs/fuse/dir.c
fs/fuse/file.c
fs/fuse/fuse_i.h
fs/fuse/inode.c
fs/fuse/iomode.c [new file with mode: 0644]
fs/fuse/passthrough.c [new file with mode: 0644]
fs/fuse/readdir.c
fs/fuse/virtio_fs.c
fs/inode.c
fs/netfs/fscache_io.c
fs/nfs/client.c
fs/nfs/delegation.c
fs/nfs/direct.c
fs/nfs/filelayout/filelayoutdev.c
fs/nfs/flexfilelayout/flexfilelayout.c
fs/nfs/fs_context.c
fs/nfs/fscache.c
fs/nfs/inode.c
fs/nfs/internal.h
fs/nfs/netns.h
fs/nfs/nfs3client.c
fs/nfs/nfs42.h
fs/nfs/nfs4_fs.h
fs/nfs/nfs4client.c
fs/nfs/nfs4proc.c
fs/nfs/nfs4state.c
fs/nfs/nfs4super.c
fs/nfs/nfs4trace.c
fs/nfs/nfs4trace.h
fs/nfs/nfsroot.c
fs/nfs/pnfs.c
fs/nfs/pnfs_nfs.c
fs/nfs/read.c
fs/nfs/super.c
fs/nfs/write.c
fs/nilfs2/alloc.c
fs/nilfs2/bmap.c
fs/nilfs2/btree.c
fs/nilfs2/cpfile.c
fs/nilfs2/cpfile.h
fs/nilfs2/dat.c
fs/nilfs2/direct.c
fs/nilfs2/ifile.c
fs/nilfs2/ifile.h
fs/nilfs2/inode.c
fs/nilfs2/ioctl.c
fs/nilfs2/mdt.c
fs/nilfs2/nilfs.h
fs/nilfs2/page.c
fs/nilfs2/recovery.c
fs/nilfs2/segbuf.c
fs/nilfs2/segment.c
fs/nilfs2/sufile.c
fs/nilfs2/super.c
fs/nilfs2/the_nilfs.c
fs/ocfs2/dlmglue.c
fs/ocfs2/file.c
fs/ocfs2/super.c
fs/orangefs/orangefs-cache.c
fs/orangefs/orangefs-kernel.h
fs/orangefs/super.c
fs/overlayfs/copy_up.c
fs/proc/Kconfig
fs/proc/kcore.c
fs/proc/task_mmu.c
fs/super.c
fs/userfaultfd.c
include/asm-generic/tlb.h
include/asm-generic/vmlinux.lds.h
include/crypto/acompress.h
include/crypto/internal/hash.h
include/crypto/public_key.h
include/dt-bindings/clock/ast2600-clock.h
include/dt-bindings/clock/exynos850.h
include/dt-bindings/clock/microchip,mpfs-clock.h
include/dt-bindings/clock/mobileye,eyeq5-clk.h [new file with mode: 0644]
include/dt-bindings/clock/qcom,gcc-sm8150.h
include/dt-bindings/clock/r8a779g0-cpg-mssr.h
include/dt-bindings/power/amlogic,c3-pwrc.h
include/dt-bindings/reset/mediatek,mt7988-resets.h
include/kvm/arm_pmu.h
include/kvm/arm_vgic.h
include/linux/acpi.h
include/linux/bits.h
include/linux/blkdev.h
include/linux/buildid.h
include/linux/cacheinfo.h
include/linux/clk-provider.h
include/linux/clk.h
include/linux/cma.h
include/linux/compiler-clang.h
include/linux/crash_core.h
include/linux/crash_reserve.h [new file with mode: 0644]
include/linux/damon.h
include/linux/dax.h
include/linux/dma/k3-udma-glue.h
include/linux/efi.h
include/linux/einj-cxl.h [new file with mode: 0644]
include/linux/f2fs_fs.h
include/linux/flex_proportions.h
include/linux/fs.h
include/linux/fw_table.h
include/linux/generic-radix-tree.h
include/linux/gfp.h
include/linux/gfp_types.h
include/linux/highmem.h
include/linux/hisi_acc_qm.h
include/linux/huge_mm.h
include/linux/hugetlb.h
include/linux/i2c.h
include/linux/input.h
include/linux/input/navpoint.h [deleted file]
include/linux/kexec.h
include/linux/kvm_host.h
include/linux/kvm_types.h
include/linux/list.h
include/linux/list_lru.h
include/linux/memcontrol.h
include/linux/memory.h
include/linux/memory_hotplug.h
include/linux/mempool.h
include/linux/memremap.h
include/linux/min_heap.h
include/linux/mlx5/mlx5_ifc.h
include/linux/mm.h
include/linux/mm_types.h
include/linux/mmdebug.h
include/linux/mmu_context.h
include/linux/mmzone.h
include/linux/moduleloader.h
include/linux/mtd/flashchip.h
include/linux/mtd/lpc32xx_mlc.h
include/linux/mtd/lpc32xx_slc.h
include/linux/mtd/mtd.h
include/linux/mtd/spinand.h
include/linux/nfs_fs.h
include/linux/nfs_xdr.h
include/linux/nmi.h
include/linux/node.h
include/linux/of.h
include/linux/of_graph.h
include/linux/padata.h
include/linux/page-flags.h
include/linux/page_counter.h
include/linux/page_owner.h
include/linux/pagevec.h
include/linux/pgtable.h
include/linux/phy/phy.h
include/linux/ptdump.h
include/linux/sched.h
include/linux/sched/mm.h
include/linux/serio.h
include/linux/soundwire/sdw_type.h
include/linux/stackdepot.h
include/linux/start_kernel.h
include/linux/sunrpc/clnt.h
include/linux/sunrpc/sched.h
include/linux/sunrpc/xprt.h
include/linux/swap.h
include/linux/swapops.h
include/linux/tc.h
include/linux/userfaultfd_k.h
include/linux/vfio.h
include/linux/vfio_pci_core.h
include/linux/vmalloc.h
include/linux/vmcore_info.h [new file with mode: 0644]
include/linux/win_minmax.h
include/linux/writeback.h
include/linux/zswap.h
include/media/cec.h
include/media/media-entity.h
include/media/v4l2-common.h
include/media/videobuf2-core.h
include/scsi/scsi_device.h
include/scsi/scsi_host.h
include/trace/events/compaction.h
include/trace/events/kmem.h
include/trace/events/oom.h
include/trace/events/sunrpc.h
include/trace/misc/nfs.h
include/uapi/asm-generic/bitsperlong.h
include/uapi/linux/auxvec.h
include/uapi/linux/bits.h [new file with mode: 0644]
include/uapi/linux/fuse.h
include/uapi/linux/kvm.h
include/uapi/linux/mempolicy.h
include/uapi/linux/rkisp1-config.h
include/uapi/linux/videodev2.h
include/ufs/ufshcd.h
include/ufs/ufshci.h
init/initramfs.c
init/main.c
ipc/ipc_sysctl.c
ipc/mq_sysctl.c
kernel/Kconfig.kexec
kernel/Makefile
kernel/bounds.c
kernel/crash_core.c
kernel/crash_reserve.c [new file with mode: 0644]
kernel/dma/contiguous.c
kernel/elfcorehdr.c [moved from kernel/crash_dump.c with 100% similarity]
kernel/events/uprobes.c
kernel/hung_task.c
kernel/kallsyms_selftest.c
kernel/kexec.c
kernel/kexec_core.c
kernel/kexec_file.c
kernel/kexec_internal.h
kernel/ksysfs.c
kernel/module/main.c
kernel/padata.c
kernel/panic.c
kernel/printk/printk.c
kernel/ptrace.c
kernel/sched/fair.c
kernel/signal.c
kernel/time/timer_migration.c
kernel/user_namespace.c
kernel/vmcore_info.c [new file with mode: 0644]
kernel/watchdog.c
lib/Kconfig.debug
lib/Kconfig.kasan
lib/assoc_array.c
lib/buildid.c
lib/dhry_1.c
lib/dhry_run.c
lib/dynamic_debug.c
lib/flex_proportions.c
lib/fonts/Kconfig
lib/fw_table.c
lib/generic-radix-tree.c
lib/maple_tree.c
lib/math/div64.c
lib/raid6/Makefile
lib/sort.c
lib/stackdepot.c
lib/stackinit_kunit.c
lib/test_vmalloc.c
lib/test_xarray.c
mm/Kconfig
mm/cma.c
mm/cma.h
mm/cma_sysfs.c
mm/compaction.c
mm/damon/Kconfig
mm/damon/core.c
mm/damon/dbgfs.c
mm/damon/paddr.c
mm/damon/reclaim.c
mm/damon/sysfs-common.h
mm/damon/sysfs-schemes.c
mm/damon/sysfs.c
mm/debug.c
mm/filemap.c
mm/huge_memory.c
mm/hugetlb.c
mm/internal.h
mm/kasan/common.c
mm/kasan/kasan_test.c
mm/kasan/kasan_test_module.c
mm/kasan/report.c
mm/kasan/shadow.c
mm/khugepaged.c
mm/kmsan/hooks.c
mm/list_lru.c
mm/madvise.c
mm/memcontrol.c
mm/memfd.c
mm/memory-tiers.c
mm/memory.c
mm/memory_hotplug.c
mm/mempolicy.c
mm/mempool.c
mm/memtest.c
mm/migrate.c
mm/mlock.c
mm/mm_init.c
mm/mmap.c
mm/mmu_gather.c
mm/mprotect.c
mm/nommu.c
mm/oom_kill.c
mm/page-writeback.c
mm/page_alloc.c
mm/page_isolation.c
mm/page_owner.c
mm/ptdump.c
mm/readahead.c
mm/rmap.c
mm/shmem.c
mm/slab_common.c
mm/sparse.c
mm/swap.c
mm/swap_slots.c
mm/swap_state.c
mm/swapfile.c
mm/userfaultfd.c
mm/util.c
mm/vmalloc.c
mm/vmscan.c
mm/z3fold.c
mm/zsmalloc.c
mm/zswap.c
net/bridge/br_multicast.c
net/ipv4/gre_demux.c
net/ipv6/ip6_gre.c
net/iucv/iucv.c
net/mpls/mpls_gso.c
net/sunrpc/addr.c
net/sunrpc/clnt.c
net/sunrpc/xprt.c
net/sunrpc/xprtsock.c
samples/vfio-mdev/mbochs.c
samples/vfio-mdev/mdpy.c
scripts/check-sysctl-docs
scripts/const_structs.checkpatch
scripts/gdb/linux/constants.py.in
scripts/gdb/linux/interrupts.py
scripts/gdb/linux/vmalloc.py
scripts/min-tool-version.sh
scripts/recordmcount.pl
security/Kconfig
sound/aoa/soundbus/i2sbus/core.c
sound/core/.kunitconfig [new file with mode: 0644]
sound/core/timer.c
sound/pci/hda/patch_realtek.c
sound/usb/mixer.c
tools/arch/riscv/include/asm/csr.h [new file with mode: 0644]
tools/arch/riscv/include/asm/vdso/processor.h [new file with mode: 0644]
tools/arch/x86/include/asm/irq_vectors.h
tools/build/Makefile.feature
tools/build/feature/Makefile
tools/build/feature/test-all.c
tools/build/feature/test-libcapstone.c [new file with mode: 0644]
tools/crypto/ccp/test_dbc.py
tools/lib/perf/evlist.c
tools/lib/perf/include/internal/evlist.h
tools/lib/subcmd/run-command.c
tools/lib/subcmd/run-command.h
tools/mm/Makefile
tools/mm/thpmaps [new file with mode: 0644]
tools/objtool/noreturns.h
tools/perf/Documentation/perf-intel-pt.txt
tools/perf/Documentation/perf-report.txt
tools/perf/Documentation/perf-script-python.txt
tools/perf/Documentation/perf-script.txt
tools/perf/Documentation/perf-stat.txt
tools/perf/Documentation/perf-top.txt
tools/perf/Documentation/perf.txt
tools/perf/Documentation/tips.txt
tools/perf/Makefile.config
tools/perf/Makefile.perf
tools/perf/arch/arm/util/perf_regs.c
tools/perf/arch/arm/util/pmu.c
tools/perf/arch/arm64/Makefile
tools/perf/arch/arm64/util/machine.c
tools/perf/arch/arm64/util/mem-events.c
tools/perf/arch/arm64/util/mem-events.h [new file with mode: 0644]
tools/perf/arch/arm64/util/perf_regs.c
tools/perf/arch/csky/util/perf_regs.c
tools/perf/arch/loongarch/Makefile
tools/perf/arch/loongarch/util/perf_regs.c
tools/perf/arch/mips/Makefile
tools/perf/arch/mips/util/perf_regs.c
tools/perf/arch/powerpc/Makefile
tools/perf/arch/powerpc/util/Build
tools/perf/arch/powerpc/util/kvm-stat.c
tools/perf/arch/powerpc/util/mem-events.c
tools/perf/arch/powerpc/util/mem-events.h [new file with mode: 0644]
tools/perf/arch/powerpc/util/perf_regs.c
tools/perf/arch/powerpc/util/pmu.c [new file with mode: 0644]
tools/perf/arch/riscv/util/perf_regs.c
tools/perf/arch/s390/Makefile
tools/perf/arch/s390/util/perf_regs.c
tools/perf/arch/x86/Makefile
tools/perf/arch/x86/tests/dwarf-unwind.c
tools/perf/arch/x86/tests/hybrid.c
tools/perf/arch/x86/util/mem-events.c
tools/perf/arch/x86/util/mem-events.h [new file with mode: 0644]
tools/perf/arch/x86/util/perf_regs.c
tools/perf/arch/x86/util/pmu.c
tools/perf/arch/x86/util/tsc.c
tools/perf/builtin-c2c.c
tools/perf/builtin-list.c
tools/perf/builtin-mem.c
tools/perf/builtin-record.c
tools/perf/builtin-report.c
tools/perf/builtin-sched.c
tools/perf/builtin-script.c
tools/perf/builtin-stat.c
tools/perf/builtin-top.c
tools/perf/builtin-trace.c
tools/perf/builtin-version.c
tools/perf/pmu-events/arch/powerpc/mapfile.csv
tools/perf/pmu-events/arch/s390/cf_z16/extended.json
tools/perf/pmu-events/arch/x86/alderlake/adl-metrics.json
tools/perf/pmu-events/arch/x86/alderlake/floating-point.json
tools/perf/pmu-events/arch/x86/alderlake/metricgroups.json
tools/perf/pmu-events/arch/x86/alderlake/other.json
tools/perf/pmu-events/arch/x86/alderlake/pipeline.json
tools/perf/pmu-events/arch/x86/alderlaken/other.json
tools/perf/pmu-events/arch/x86/alderlaken/pipeline.json
tools/perf/pmu-events/arch/x86/amdzen4/cache.json
tools/perf/pmu-events/arch/x86/broadwell/bdw-metrics.json
tools/perf/pmu-events/arch/x86/broadwell/memory.json
tools/perf/pmu-events/arch/x86/broadwell/metricgroups.json
tools/perf/pmu-events/arch/x86/broadwellde/bdwde-metrics.json
tools/perf/pmu-events/arch/x86/broadwellde/metricgroups.json
tools/perf/pmu-events/arch/x86/broadwellde/uncore-power.json
tools/perf/pmu-events/arch/x86/broadwellx/bdx-metrics.json
tools/perf/pmu-events/arch/x86/broadwellx/metricgroups.json
tools/perf/pmu-events/arch/x86/broadwellx/uncore-power.json
tools/perf/pmu-events/arch/x86/cascadelakex/clx-metrics.json
tools/perf/pmu-events/arch/x86/cascadelakex/metricgroups.json
tools/perf/pmu-events/arch/x86/cascadelakex/uncore-power.json
tools/perf/pmu-events/arch/x86/emeraldrapids/uncore-cache.json
tools/perf/pmu-events/arch/x86/grandridge/cache.json
tools/perf/pmu-events/arch/x86/grandridge/floating-point.json [new file with mode: 0644]
tools/perf/pmu-events/arch/x86/grandridge/frontend.json
tools/perf/pmu-events/arch/x86/grandridge/memory.json
tools/perf/pmu-events/arch/x86/grandridge/other.json
tools/perf/pmu-events/arch/x86/grandridge/pipeline.json
tools/perf/pmu-events/arch/x86/grandridge/uncore-cache.json [new file with mode: 0644]
tools/perf/pmu-events/arch/x86/grandridge/uncore-interconnect.json [new file with mode: 0644]
tools/perf/pmu-events/arch/x86/grandridge/uncore-io.json [new file with mode: 0644]
tools/perf/pmu-events/arch/x86/grandridge/uncore-memory.json [new file with mode: 0644]
tools/perf/pmu-events/arch/x86/grandridge/uncore-power.json [new file with mode: 0644]
tools/perf/pmu-events/arch/x86/grandridge/virtual-memory.json
tools/perf/pmu-events/arch/x86/haswell/hsw-metrics.json
tools/perf/pmu-events/arch/x86/haswell/memory.json
tools/perf/pmu-events/arch/x86/haswell/metricgroups.json
tools/perf/pmu-events/arch/x86/haswellx/hsx-metrics.json
tools/perf/pmu-events/arch/x86/haswellx/metricgroups.json
tools/perf/pmu-events/arch/x86/haswellx/uncore-power.json
tools/perf/pmu-events/arch/x86/icelake/icl-metrics.json
tools/perf/pmu-events/arch/x86/icelake/memory.json
tools/perf/pmu-events/arch/x86/icelake/metricgroups.json
tools/perf/pmu-events/arch/x86/icelake/other.json
tools/perf/pmu-events/arch/x86/icelake/pipeline.json
tools/perf/pmu-events/arch/x86/icelakex/icx-metrics.json
tools/perf/pmu-events/arch/x86/icelakex/metricgroups.json
tools/perf/pmu-events/arch/x86/icelakex/uncore-power.json
tools/perf/pmu-events/arch/x86/ivybridge/ivb-metrics.json
tools/perf/pmu-events/arch/x86/ivybridge/metricgroups.json
tools/perf/pmu-events/arch/x86/ivytown/ivt-metrics.json
tools/perf/pmu-events/arch/x86/ivytown/metricgroups.json
tools/perf/pmu-events/arch/x86/ivytown/uncore-power.json
tools/perf/pmu-events/arch/x86/jaketown/jkt-metrics.json
tools/perf/pmu-events/arch/x86/jaketown/metricgroups.json
tools/perf/pmu-events/arch/x86/jaketown/uncore-power.json
tools/perf/pmu-events/arch/x86/mapfile.csv
tools/perf/pmu-events/arch/x86/meteorlake/cache.json
tools/perf/pmu-events/arch/x86/meteorlake/floating-point.json
tools/perf/pmu-events/arch/x86/meteorlake/other.json
tools/perf/pmu-events/arch/x86/meteorlake/pipeline.json
tools/perf/pmu-events/arch/x86/meteorlake/virtual-memory.json
tools/perf/pmu-events/arch/x86/rocketlake/memory.json
tools/perf/pmu-events/arch/x86/rocketlake/metricgroups.json
tools/perf/pmu-events/arch/x86/rocketlake/other.json
tools/perf/pmu-events/arch/x86/rocketlake/pipeline.json
tools/perf/pmu-events/arch/x86/rocketlake/rkl-metrics.json
tools/perf/pmu-events/arch/x86/sandybridge/metricgroups.json
tools/perf/pmu-events/arch/x86/sandybridge/snb-metrics.json
tools/perf/pmu-events/arch/x86/sapphirerapids/metricgroups.json
tools/perf/pmu-events/arch/x86/sapphirerapids/spr-metrics.json
tools/perf/pmu-events/arch/x86/sierraforest/cache.json
tools/perf/pmu-events/arch/x86/sierraforest/floating-point.json [new file with mode: 0644]
tools/perf/pmu-events/arch/x86/sierraforest/frontend.json
tools/perf/pmu-events/arch/x86/sierraforest/memory.json
tools/perf/pmu-events/arch/x86/sierraforest/other.json
tools/perf/pmu-events/arch/x86/sierraforest/pipeline.json
tools/perf/pmu-events/arch/x86/sierraforest/uncore-cache.json [new file with mode: 0644]
tools/perf/pmu-events/arch/x86/sierraforest/uncore-cxl.json [new file with mode: 0644]
tools/perf/pmu-events/arch/x86/sierraforest/uncore-interconnect.json [new file with mode: 0644]
tools/perf/pmu-events/arch/x86/sierraforest/uncore-io.json [new file with mode: 0644]
tools/perf/pmu-events/arch/x86/sierraforest/uncore-memory.json [new file with mode: 0644]
tools/perf/pmu-events/arch/x86/sierraforest/uncore-power.json [new file with mode: 0644]
tools/perf/pmu-events/arch/x86/sierraforest/virtual-memory.json
tools/perf/pmu-events/arch/x86/skylake/memory.json
tools/perf/pmu-events/arch/x86/skylake/metricgroups.json
tools/perf/pmu-events/arch/x86/skylake/pipeline.json
tools/perf/pmu-events/arch/x86/skylake/skl-metrics.json
tools/perf/pmu-events/arch/x86/skylake/virtual-memory.json
tools/perf/pmu-events/arch/x86/skylakex/metricgroups.json
tools/perf/pmu-events/arch/x86/skylakex/skx-metrics.json
tools/perf/pmu-events/arch/x86/skylakex/uncore-power.json
tools/perf/pmu-events/arch/x86/snowridgex/uncore-power.json
tools/perf/pmu-events/arch/x86/tigerlake/metricgroups.json
tools/perf/pmu-events/arch/x86/tigerlake/other.json
tools/perf/pmu-events/arch/x86/tigerlake/pipeline.json
tools/perf/pmu-events/arch/x86/tigerlake/tgl-metrics.json
tools/perf/pmu-events/arch/x86/tigerlake/uncore-interconnect.json
tools/perf/pmu-events/jevents.py
tools/perf/tests/Build
tools/perf/tests/builtin-test-list.c [deleted file]
tools/perf/tests/builtin-test-list.h [deleted file]
tools/perf/tests/builtin-test.c
tools/perf/tests/expand-cgroup.c
tools/perf/tests/make
tools/perf/tests/maps.c
tools/perf/tests/parse-events.c
tools/perf/tests/pmu-events.c
tools/perf/tests/shell/base_probe/settings.sh [new file with mode: 0644]
tools/perf/tests/shell/base_probe/test_adding_kernel.sh [new file with mode: 0755]
tools/perf/tests/shell/common/check_all_lines_matched.pl [new file with mode: 0755]
tools/perf/tests/shell/common/check_all_patterns_found.pl [new file with mode: 0755]
tools/perf/tests/shell/common/check_no_patterns_found.pl [new file with mode: 0755]
tools/perf/tests/shell/common/init.sh [new file with mode: 0644]
tools/perf/tests/shell/common/patterns.sh [new file with mode: 0644]
tools/perf/tests/shell/common/settings.sh [new file with mode: 0644]
tools/perf/tests/shell/lib/perf_has_symbol.sh
tools/perf/tests/shell/lib/perf_json_output_lint.py
tools/perf/tests/shell/lib/perf_metric_validation.py
tools/perf/tests/shell/lib/stat_output.sh
tools/perf/tests/shell/perftool-testsuite_probe.sh [new file with mode: 0755]
tools/perf/tests/shell/stat+csv_output.sh
tools/perf/tests/shell/stat+json_output.sh
tools/perf/tests/shell/stat+std_output.sh
tools/perf/tests/shell/stat_bpf_counters.sh
tools/perf/tests/shell/stat_metrics_values.sh
tools/perf/tests/shell/test_arm_callgraph_fp.sh
tools/perf/tests/symbols.c
tools/perf/tests/tests-scripts.c [new file with mode: 0644]
tools/perf/tests/tests-scripts.h [new file with mode: 0644]
tools/perf/tests/tests.h
tools/perf/tests/thread-maps-share.c
tools/perf/tests/vmlinux-kallsyms.c
tools/perf/ui/browsers/res_sample.c
tools/perf/ui/browsers/scripts.c
tools/perf/ui/gtk/annotate.c
tools/perf/util/Build
tools/perf/util/annotate-data.c
tools/perf/util/annotate-data.h
tools/perf/util/annotate.c
tools/perf/util/annotate.h
tools/perf/util/bpf-event.c
tools/perf/util/bpf_lock_contention.c
tools/perf/util/bpf_skel/augmented_raw_syscalls.bpf.c
tools/perf/util/bpf_skel/lock_contention.bpf.c
tools/perf/util/bpf_skel/lock_data.h
tools/perf/util/bpf_skel/vmlinux/vmlinux.h
tools/perf/util/callchain.c
tools/perf/util/cpumap.c
tools/perf/util/cpumap.h
tools/perf/util/data-convert-json.c
tools/perf/util/data.c
tools/perf/util/data.h
tools/perf/util/debug.c
tools/perf/util/debug.h
tools/perf/util/dwarf-aux.c
tools/perf/util/dwarf-aux.h
tools/perf/util/env.h
tools/perf/util/event.c
tools/perf/util/evsel.c
tools/perf/util/evsel.h
tools/perf/util/expr.c
tools/perf/util/expr.l
tools/perf/util/machine.c
tools/perf/util/machine.h
tools/perf/util/map.c
tools/perf/util/maps.c
tools/perf/util/maps.h
tools/perf/util/mem-events.c
tools/perf/util/mem-events.h
tools/perf/util/metricgroup.c
tools/perf/util/parse-events.c
tools/perf/util/parse-events.h
tools/perf/util/parse-events.y
tools/perf/util/parse-regs-options.c
tools/perf/util/perf-regs-arch/perf_regs_aarch64.c
tools/perf/util/perf-regs-arch/perf_regs_arm.c
tools/perf/util/perf-regs-arch/perf_regs_csky.c
tools/perf/util/perf-regs-arch/perf_regs_loongarch.c
tools/perf/util/perf-regs-arch/perf_regs_mips.c
tools/perf/util/perf-regs-arch/perf_regs_powerpc.c
tools/perf/util/perf-regs-arch/perf_regs_riscv.c
tools/perf/util/perf-regs-arch/perf_regs_s390.c
tools/perf/util/perf-regs-arch/perf_regs_x86.c
tools/perf/util/perf_regs.c
tools/perf/util/perf_regs.h
tools/perf/util/pmu.c
tools/perf/util/pmu.h
tools/perf/util/pmus.c
tools/perf/util/pmus.h
tools/perf/util/print-events.c
tools/perf/util/print_insn.c [new file with mode: 0644]
tools/perf/util/print_insn.h [new file with mode: 0644]
tools/perf/util/probe-event.c
tools/perf/util/python-ext-sources
tools/perf/util/python.c
tools/perf/util/rb_resort.h
tools/perf/util/scripting-engines/trace-event-python.c
tools/perf/util/session.c
tools/perf/util/session.h
tools/perf/util/setup.py
tools/perf/util/sort.c
tools/perf/util/srcline.c
tools/perf/util/stat-display.c
tools/perf/util/stat-shadow.c
tools/perf/util/stat.h
tools/perf/util/symbol-elf.c
tools/perf/util/symbol.c
tools/perf/util/thread.c
tools/perf/util/thread.h
tools/perf/util/thread_map.c
tools/perf/util/threads.c [new file with mode: 0644]
tools/perf/util/threads.h [new file with mode: 0644]
tools/perf/util/trace-event-parse.c
tools/perf/util/trace-event.h
tools/perf/util/unwind-libdw.c
tools/perf/util/unwind-libunwind-local.c
tools/perf/util/unwind-libunwind.c
tools/perf/util/util.c
tools/perf/util/util.h
tools/scripts/Makefile.include
tools/testing/selftests/cgroup/test_zswap.c
tools/testing/selftests/damon/.gitignore
tools/testing/selftests/damon/Makefile
tools/testing/selftests/damon/_chk_dependency.sh
tools/testing/selftests/damon/_damon_sysfs.py
tools/testing/selftests/damon/_debugfs_common.sh
tools/testing/selftests/damon/damos_apply_interval.py [new file with mode: 0644]
tools/testing/selftests/damon/damos_quota.py [new file with mode: 0644]
tools/testing/selftests/damon/debugfs_empty_targets.sh
tools/testing/selftests/damon/debugfs_target_ids_pid_leak.c [new file with mode: 0644]
tools/testing/selftests/damon/debugfs_target_ids_pid_leak.sh [new file with mode: 0644]
tools/testing/selftests/damon/debugfs_target_ids_read_before_terminate_race.c [new file with mode: 0644]
tools/testing/selftests/damon/debugfs_target_ids_read_before_terminate_race.sh [new file with mode: 0644]
tools/testing/selftests/damon/sysfs_update_schemes_tried_regions_hang.py
tools/testing/selftests/damon/sysfs_update_schemes_tried_regions_wss_estimation.py
tools/testing/selftests/filesystems/eventfd/.gitignore [moved from drivers/crypto/vmx/.gitignore with 60% similarity]
tools/testing/selftests/filesystems/eventfd/Makefile [new file with mode: 0644]
tools/testing/selftests/filesystems/eventfd/eventfd_test.c [new file with mode: 0644]
tools/testing/selftests/kvm/Makefile
tools/testing/selftests/kvm/aarch64/arch_timer.c
tools/testing/selftests/kvm/aarch64/debug-exceptions.c
tools/testing/selftests/kvm/aarch64/hypercalls.c
tools/testing/selftests/kvm/aarch64/page_fault_test.c
tools/testing/selftests/kvm/aarch64/set_id_regs.c
tools/testing/selftests/kvm/aarch64/vpmu_counter_access.c
tools/testing/selftests/kvm/arch_timer.c [new file with mode: 0644]
tools/testing/selftests/kvm/guest_memfd_test.c
tools/testing/selftests/kvm/include/aarch64/kvm_util_arch.h [new file with mode: 0644]
tools/testing/selftests/kvm/include/aarch64/processor.h
tools/testing/selftests/kvm/include/kvm_test_harness.h [new file with mode: 0644]
tools/testing/selftests/kvm/include/kvm_util_base.h
tools/testing/selftests/kvm/include/riscv/arch_timer.h [new file with mode: 0644]
tools/testing/selftests/kvm/include/riscv/kvm_util_arch.h [new file with mode: 0644]
tools/testing/selftests/kvm/include/riscv/processor.h
tools/testing/selftests/kvm/include/s390x/kvm_util_arch.h [new file with mode: 0644]
tools/testing/selftests/kvm/include/sparsebit.h
tools/testing/selftests/kvm/include/test_util.h
tools/testing/selftests/kvm/include/timer_test.h [new file with mode: 0644]
tools/testing/selftests/kvm/include/x86_64/kvm_util_arch.h [new file with mode: 0644]
tools/testing/selftests/kvm/include/x86_64/pmu.h [new file with mode: 0644]
tools/testing/selftests/kvm/include/x86_64/processor.h
tools/testing/selftests/kvm/include/x86_64/sev.h [new file with mode: 0644]
tools/testing/selftests/kvm/lib/aarch64/processor.c
tools/testing/selftests/kvm/lib/kvm_util.c
tools/testing/selftests/kvm/lib/riscv/handlers.S [new file with mode: 0644]
tools/testing/selftests/kvm/lib/riscv/processor.c
tools/testing/selftests/kvm/lib/s390x/processor.c
tools/testing/selftests/kvm/lib/sparsebit.c
tools/testing/selftests/kvm/lib/ucall_common.c
tools/testing/selftests/kvm/lib/x86_64/pmu.c [new file with mode: 0644]
tools/testing/selftests/kvm/lib/x86_64/processor.c
tools/testing/selftests/kvm/lib/x86_64/sev.c [new file with mode: 0644]
tools/testing/selftests/kvm/riscv/arch_timer.c [new file with mode: 0644]
tools/testing/selftests/kvm/riscv/get-reg-list.c
tools/testing/selftests/kvm/s390x/memop.c
tools/testing/selftests/kvm/x86_64/fix_hypercall_test.c
tools/testing/selftests/kvm/x86_64/pmu_counters_test.c [new file with mode: 0644]
tools/testing/selftests/kvm/x86_64/pmu_event_filter_test.c
tools/testing/selftests/kvm/x86_64/private_mem_conversions_test.c
tools/testing/selftests/kvm/x86_64/sev_migrate_tests.c
tools/testing/selftests/kvm/x86_64/sev_smoke_test.c [new file with mode: 0644]
tools/testing/selftests/kvm/x86_64/smaller_maxphyaddr_emulation_test.c
tools/testing/selftests/kvm/x86_64/sync_regs_test.c
tools/testing/selftests/kvm/x86_64/userspace_msr_exit_test.c
tools/testing/selftests/kvm/x86_64/vmx_pmu_caps_test.c
tools/testing/selftests/kvm/x86_64/xen_shinfo_test.c
tools/testing/selftests/memfd/memfd_test.c
tools/testing/selftests/mm/.gitignore
tools/testing/selftests/mm/Makefile
tools/testing/selftests/mm/charge_reserved_hugetlb.sh
tools/testing/selftests/mm/compaction_test.c
tools/testing/selftests/mm/hugetlb-madvise.c
tools/testing/selftests/mm/hugetlb_madv_vs_map.c [new file with mode: 0644]
tools/testing/selftests/mm/hugetlb_reparenting_test.sh
tools/testing/selftests/mm/ksm_functional_tests.c
tools/testing/selftests/mm/map_fixed_noreplace.c
tools/testing/selftests/mm/map_hugetlb.c
tools/testing/selftests/mm/map_populate.c
tools/testing/selftests/mm/mlock-random-test.c
tools/testing/selftests/mm/mlock2-tests.c
tools/testing/selftests/mm/mlock2.h
tools/testing/selftests/mm/mrelease_test.c
tools/testing/selftests/mm/mremap_dontunmap.c
tools/testing/selftests/mm/on-fault-limit.c
tools/testing/selftests/mm/protection_keys.c
tools/testing/selftests/mm/run_vmtests.sh
tools/testing/selftests/mm/split_huge_page_test.c
tools/testing/selftests/mm/thuge-gen.c
tools/testing/selftests/mm/transhuge-stress.c
tools/testing/selftests/mm/uffd-stress.c
tools/testing/selftests/mm/virtual_address_range.c
tools/testing/selftests/mm/vm_util.c
tools/testing/selftests/powerpc/copyloops/asm/ppc_asm.h
virt/kvm/Kconfig
virt/kvm/async_pf.c
virt/kvm/kvm_main.c
virt/kvm/pfncache.c

diff --git a/CREDITS b/CREDITS
index 3c2bb55847c607f027ddc6c50c259545327c0fe8..c55c5a0ee4ff65e244eb3a9de9aeb35515bc2381 100644 (file)
--- a/CREDITS
+++ b/CREDITS
@@ -2960,6 +2960,11 @@ S: 2364 Old Trail Drive
 S: Reston, Virginia 20191
 S: USA
 
+N: Sekhar Nori
+E: nori.sekhar@gmail.com
+D: Maintainer of Texas Instruments DaVinci machine support, contributor
+D: to device drivers relevant to that SoC family.
+
 N: Fredrik Noring
 E: noring@nocrew.org
 W: http://www.lysator.liu.se/~noring/
index fe61d372e3fac9a8f565add77702b2821be6b3a6..c61f9b8139730cbe79466cfec81b82021ca26775 100644 (file)
@@ -33,3 +33,37 @@ Description:
                device cannot clear poison from the address, -ENXIO is returned.
                The clear_poison attribute is only visible for devices
                supporting the capability.
+
+What:          /sys/kernel/debug/cxl/einj_types
+Date:          January, 2024
+KernelVersion: v6.9
+Contact:       linux-cxl@vger.kernel.org
+Description:
+               (RO) Prints the CXL protocol error types made available by
+               the platform in the format:
+
+                       0x<error number> <error type>
+
+               The possible error types are (as of ACPI v6.5):
+
+                       0x1000  CXL.cache Protocol Correctable
+                       0x2000  CXL.cache Protocol Uncorrectable non-fatal
+                       0x4000  CXL.cache Protocol Uncorrectable fatal
+                       0x8000  CXL.mem Protocol Correctable
+                       0x10000 CXL.mem Protocol Uncorrectable non-fatal
+                       0x20000 CXL.mem Protocol Uncorrectable fatal
+
+               The <error number> can be written to einj_inject to inject
+               <error type> into a chosen dport.
+
+What:          /sys/kernel/debug/cxl/$dport_dev/einj_inject
+Date:          January, 2024
+KernelVersion: v6.9
+Contact:       linux-cxl@vger.kernel.org
+Description:
+               (WO) Writing an integer to this file injects the corresponding
+               CXL protocol error into $dport_dev ($dport_dev will be a device
+               name from /sys/bus/pci/devices). The integer to type mapping for
+               injection can be found by reading from einj_types. If the dport
+               was enumerated in RCH mode, a CXL 1.1 error is injected, otherwise
+               a CXL 2.0 error is injected.
index b2db010d851eeb67febe0dda7f5ca0dc5e7d1ff9..bd6793760f29842b841217d4226f6d5538403a62 100644 (file)
@@ -81,3 +81,29 @@ Description: (RO) Read returns, for each Acceleration Engine (AE), the number
                        <N>: Number of Compress and Verify (CnV) errors and type
                             of the last CnV error detected by Acceleration
                             Engine N.
+
+What:          /sys/kernel/debug/qat_<device>_<BDF>/heartbeat/inject_error
+Date:          March 2024
+KernelVersion: 6.8
+Contact:       qat-linux@intel.com
+Description:   (WO) Write to inject an error that simulates an heartbeat
+               failure. This is to be used for testing purposes.
+
+               After writing this file, the driver stops arbitration on a
+               random engine and disables the fetching of heartbeat counters.
+               If a workload is running on the device, a job submitted to the
+               accelerator might not get a response and a read of the
+               `heartbeat/status` attribute might report -1, i.e. device
+               unresponsive.
+               The error is unrecoverable thus the device must be restarted to
+               restore its functionality.
+
+               This attribute is available only when the kernel is built with
+               CONFIG_CRYPTO_DEV_QAT_ERROR_INJECTION=y.
+
+               A write of 1 enables error injection.
+
+               The following example shows how to enable error injection::
+
+                       # cd /sys/kernel/debug/qat_<device>_<BDF>
+                       # echo 1 > heartbeat/inject_error
index 8e8de49c5cc6698704057de6ade9f339898873ce..d4e16ef9ac9a99ecae8ee442134daf1935e5056e 100644 (file)
@@ -111,6 +111,28 @@ Description:       QM debug registers(regs) read hardware register value. This
                node is used to show the change of the qm register values. This
                node can be help users to check the change of register values.
 
+What:          /sys/kernel/debug/hisi_hpre/<bdf>/qm/qm_state
+Date:          Jan 2024
+Contact:       linux-crypto@vger.kernel.org
+Description:   Dump the state of the device.
+               0: busy, 1: idle.
+               Only available for PF, and take no other effect on HPRE.
+
+What:          /sys/kernel/debug/hisi_hpre/<bdf>/qm/dev_timeout
+Date:          Feb 2024
+Contact:       linux-crypto@vger.kernel.org
+Description:   Set the wait time when stop queue fails. Available for both PF
+               and VF, and take no other effect on HPRE.
+               0: not wait(default), others value: wait dev_timeout * 20 microsecond.
+
+What:          /sys/kernel/debug/hisi_hpre/<bdf>/qm/dev_state
+Date:          Feb 2024
+Contact:       linux-crypto@vger.kernel.org
+Description:   Dump the stop queue status of the QM. The default value is 0,
+               if dev_timeout is set, when stop queue fails, the dev_state
+               will return non-zero value. Available for both PF and VF,
+               and take no other effect on HPRE.
+
 What:          /sys/kernel/debug/hisi_hpre/<bdf>/hpre_dfx/diff_regs
 Date:          Mar 2022
 Contact:       linux-crypto@vger.kernel.org
index deeefe2c735ed7822856c5bea84a8e8909a5ec4e..6c6c9a6e150acb8dc0725d3ffb10e274e08cefb2 100644 (file)
@@ -91,6 +91,28 @@ Description: QM debug registers(regs) read hardware register value. This
                node is used to show the change of the qm register values. This
                node can be help users to check the change of register values.
 
+What:          /sys/kernel/debug/hisi_sec2/<bdf>/qm/qm_state
+Date:          Jan 2024
+Contact:       linux-crypto@vger.kernel.org
+Description:   Dump the state of the device.
+               0: busy, 1: idle.
+               Only available for PF, and take no other effect on SEC.
+
+What:          /sys/kernel/debug/hisi_sec2/<bdf>/qm/dev_timeout
+Date:          Feb 2024
+Contact:       linux-crypto@vger.kernel.org
+Description:   Set the wait time when stop queue fails. Available for both PF
+               and VF, and take no other effect on SEC.
+               0: not wait(default), others value: wait dev_timeout * 20 microsecond.
+
+What:          /sys/kernel/debug/hisi_sec2/<bdf>/qm/dev_state
+Date:          Feb 2024
+Contact:       linux-crypto@vger.kernel.org
+Description:   Dump the stop queue status of the QM. The default value is 0,
+               if dev_timeout is set, when stop queue fails, the dev_state
+               will return non-zero value. Available for both PF and VF,
+               and take no other effect on SEC.
+
 What:          /sys/kernel/debug/hisi_sec2/<bdf>/sec_dfx/diff_regs
 Date:          Mar 2022
 Contact:       linux-crypto@vger.kernel.org
index 593714afaed249327778ce82d8263893b8800884..a22dd6942219354675ba93ab2eb64363f6056b10 100644 (file)
@@ -104,6 +104,28 @@ Description:       QM debug registers(regs) read hardware register value. This
                node is used to show the change of the qm registers value. This
                node can be help users to check the change of register values.
 
+What:          /sys/kernel/debug/hisi_zip/<bdf>/qm/qm_state
+Date:          Jan 2024
+Contact:       linux-crypto@vger.kernel.org
+Description:   Dump the state of the device.
+               0: busy, 1: idle.
+               Only available for PF, and take no other effect on ZIP.
+
+What:          /sys/kernel/debug/hisi_zip/<bdf>/qm/dev_timeout
+Date:          Feb 2024
+Contact:       linux-crypto@vger.kernel.org
+Description:   Set the wait time when stop queue fails. Available for both PF
+               and VF, and take no other effect on ZIP.
+               0: not wait(default), others value: wait dev_timeout * 20 microsecond.
+
+What:          /sys/kernel/debug/hisi_zip/<bdf>/qm/dev_state
+Date:          Feb 2024
+Contact:       linux-crypto@vger.kernel.org
+Description:   Dump the stop queue status of the QM. The default value is 0,
+               if dev_timeout is set, when stop queue fails, the dev_state
+               will return non-zero value. Available for both PF and VF,
+               and take no other effect on ZIP.
+
 What:          /sys/kernel/debug/hisi_zip/<bdf>/zip_dfx/diff_regs
 Date:          Mar 2022
 Contact:       linux-crypto@vger.kernel.org
index fff2581b80335891247c5fe1a86cb410ea8a559b..3f5627a1210a16aca7c18d17131a56491048a0c2 100644 (file)
@@ -552,3 +552,37 @@ Description:
                attribute is only visible for devices supporting the
                capability. The retrieved errors are logged as kernel
                events when cxl_poison event tracing is enabled.
+
+
+What:          /sys/bus/cxl/devices/regionZ/accessY/read_bandwidth
+               /sys/bus/cxl/devices/regionZ/accessY/write_banwidth
+Date:          Jan, 2024
+KernelVersion: v6.9
+Contact:       linux-cxl@vger.kernel.org
+Description:
+               (RO) The aggregated read or write bandwidth of the region. The
+               number is the accumulated read or write bandwidth of all CXL memory
+               devices that contributes to the region in MB/s. It is
+               identical data that should appear in
+               /sys/devices/system/node/nodeX/accessY/initiators/read_bandwidth or
+               /sys/devices/system/node/nodeX/accessY/initiators/write_bandwidth.
+               See Documentation/ABI/stable/sysfs-devices-node. access0 provides
+               the number to the closest initiator and access1 provides the
+               number to the closest CPU.
+
+
+What:          /sys/bus/cxl/devices/regionZ/accessY/read_latency
+               /sys/bus/cxl/devices/regionZ/accessY/write_latency
+Date:          Jan, 2024
+KernelVersion: v6.9
+Contact:       linux-cxl@vger.kernel.org
+Description:
+               (RO) The read or write latency of the region. The number is
+               the worst read or write latency of all CXL memory devices that
+               contributes to the region in nanoseconds. It is identical data
+               that should appear in
+               /sys/devices/system/node/nodeX/accessY/initiators/read_latency or
+               /sys/devices/system/node/nodeX/accessY/initiators/write_latency.
+               See Documentation/ABI/stable/sysfs-devices-node. access0 provides
+               the number to the closest initiator and access1 provides the
+               number to the closest CPU.
diff --git a/Documentation/ABI/testing/sysfs-bus-dax b/Documentation/ABI/testing/sysfs-bus-dax
new file mode 100644 (file)
index 0000000..b34266b
--- /dev/null
@@ -0,0 +1,153 @@
+What:          /sys/bus/dax/devices/daxX.Y/align
+Date:          October, 2020
+KernelVersion: v5.10
+Contact:       nvdimm@lists.linux.dev
+Description:
+               (RW) Provides a way to specify an alignment for a dax device.
+               Values allowed are constrained by the physical address ranges
+               that back the dax device, and also by arch requirements.
+
+What:          /sys/bus/dax/devices/daxX.Y/mapping
+Date:          October, 2020
+KernelVersion: v5.10
+Contact:       nvdimm@lists.linux.dev
+Description:
+               (WO) Provides a way to allocate a mapping range under a dax
+               device. Specified in the format <start>-<end>.
+
+What:          /sys/bus/dax/devices/daxX.Y/mapping[0..N]/start
+What:          /sys/bus/dax/devices/daxX.Y/mapping[0..N]/end
+What:          /sys/bus/dax/devices/daxX.Y/mapping[0..N]/page_offset
+Date:          October, 2020
+KernelVersion: v5.10
+Contact:       nvdimm@lists.linux.dev
+Description:
+               (RO) A dax device may have multiple constituent discontiguous
+               address ranges. These are represented by the different
+               'mappingX' subdirectories. The 'start' attribute indicates the
+               start physical address for the given range. The 'end' attribute
+               indicates the end physical address for the given range. The
+               'page_offset' attribute indicates the offset of the current
+               range in the dax device.
+
+What:          /sys/bus/dax/devices/daxX.Y/resource
+Date:          June, 2019
+KernelVersion: v5.3
+Contact:       nvdimm@lists.linux.dev
+Description:
+               (RO) The resource attribute indicates the starting physical
+               address of a dax device. In case of a device with multiple
+               constituent ranges, it indicates the starting address of the
+               first range.
+
+What:          /sys/bus/dax/devices/daxX.Y/size
+Date:          October, 2020
+KernelVersion: v5.10
+Contact:       nvdimm@lists.linux.dev
+Description:
+               (RW) The size attribute indicates the total size of a dax
+               device. For creating subdivided dax devices, or for resizing
+               an existing device, the new size can be written to this as
+               part of the reconfiguration process.
+
+What:          /sys/bus/dax/devices/daxX.Y/numa_node
+Date:          November, 2019
+KernelVersion: v5.5
+Contact:       nvdimm@lists.linux.dev
+Description:
+               (RO) If NUMA is enabled and the platform has affinitized the
+               backing device for this dax device, emit the CPU node
+               affinity for this device.
+
+What:          /sys/bus/dax/devices/daxX.Y/target_node
+Date:          February, 2019
+KernelVersion: v5.1
+Contact:       nvdimm@lists.linux.dev
+Description:
+               (RO) The target-node attribute is the Linux numa-node that a
+               device-dax instance may create when it is online. Prior to
+               being online the device's 'numa_node' property reflects the
+               closest online cpu node which is the typical expectation of a
+               device 'numa_node'. Once it is online it becomes its own
+               distinct numa node.
+
+What:          $(readlink -f /sys/bus/dax/devices/daxX.Y)/../dax_region/available_size
+Date:          October, 2020
+KernelVersion: v5.10
+Contact:       nvdimm@lists.linux.dev
+Description:
+               (RO) The available_size attribute tracks available dax region
+               capacity. This only applies to volatile hmem devices, not pmem
+               devices, since pmem devices are defined by nvdimm namespace
+               boundaries.
+
+What:          $(readlink -f /sys/bus/dax/devices/daxX.Y)/../dax_region/size
+Date:          July, 2017
+KernelVersion: v5.1
+Contact:       nvdimm@lists.linux.dev
+Description:
+               (RO) The size attribute indicates the size of a given dax region
+               in bytes.
+
+What:          $(readlink -f /sys/bus/dax/devices/daxX.Y)/../dax_region/align
+Date:          October, 2020
+KernelVersion: v5.10
+Contact:       nvdimm@lists.linux.dev
+Description:
+               (RO) The align attribute indicates alignment of the dax region.
+               Changes on align may not always be valid, when say certain
+               mappings were created with 2M and then we switch to 1G. This
+               validates all ranges against the new value being attempted, post
+               resizing.
+
+What:          $(readlink -f /sys/bus/dax/devices/daxX.Y)/../dax_region/seed
+Date:          October, 2020
+KernelVersion: v5.10
+Contact:       nvdimm@lists.linux.dev
+Description:
+               (RO) The seed device is a concept for dynamic dax regions to be
+               able to split the region amongst multiple sub-instances.  The
+               seed device, similar to libnvdimm seed devices, is a device
+               that starts with zero capacity allocated and unbound to a
+               driver.
+
+What:          $(readlink -f /sys/bus/dax/devices/daxX.Y)/../dax_region/create
+Date:          October, 2020
+KernelVersion: v5.10
+Contact:       nvdimm@lists.linux.dev
+Description:
+               (RW) The create interface to the dax region provides a way to
+               create a new unconfigured dax device under the given region, which
+               can then be configured (with a size etc.) and then probed.
+
+What:          $(readlink -f /sys/bus/dax/devices/daxX.Y)/../dax_region/delete
+Date:          October, 2020
+KernelVersion: v5.10
+Contact:       nvdimm@lists.linux.dev
+Description:
+               (WO) The delete interface for a dax region provides for deletion
+               of any 0-sized and idle dax devices.
+
+What:          $(readlink -f /sys/bus/dax/devices/daxX.Y)/../dax_region/id
+Date:          July, 2017
+KernelVersion: v5.1
+Contact:       nvdimm@lists.linux.dev
+Description:
+               (RO) The id attribute indicates the region id of a dax region.
+
+What:          /sys/bus/dax/devices/daxX.Y/memmap_on_memory
+Date:          January, 2024
+KernelVersion: v6.8
+Contact:       nvdimm@lists.linux.dev
+Description:
+               (RW) Control the memmap_on_memory setting if the dax device
+               were to be hotplugged as system memory. This determines whether
+               the 'altmap' for the hotplugged memory will be placed on the
+               device being hotplugged (memmap_on_memory=1) or if it will be
+               placed on regular memory (memmap_on_memory=0). This attribute
+               must be set before the device is handed over to the 'kmem'
+               driver (i.e.  hotplugged into system-ram). Additionally, this
+               depends on CONFIG_MHP_MEMMAP_ON_MEMORY, and a globally enabled
+               memmap_on_memory parameter for memory_hotplug. This is
+               typically set on the kernel command line -
+               memory_hotplug.memmap_on_memory set to 'true' or 'force'."
index bbf329cf0d67bc10c5e3b74fdf7a1e34317688ef..96020fb051c347e0f3c87a0775853be0300425a5 100644 (file)
@@ -141,3 +141,23 @@ Description:
                        64
 
                This attribute is only available for qat_4xxx devices.
+
+What:          /sys/bus/pci/devices/<BDF>/qat/auto_reset
+Date:          March 2024
+KernelVersion: 6.8
+Contact:       qat-linux@intel.com
+Description:   (RW) Reports the current state of the autoreset feature
+               for a QAT device
+
+               Write to the attribute to enable or disable device auto reset.
+
+               Device auto reset is disabled by default.
+
+               The values are:
+
+               * 1/Yy/on: auto reset enabled. If the device encounters an
+                 unrecoverable error, it will be reset automatically.
+               * 0/Nn/off: auto reset disabled. If the device encounters an
+                 unrecoverable error, it will not be reset.
+
+               This attribute is only available for qat_4xxx devices.
index 99fa87a43926e6346aa16d63e03ab0321e4dc29b..1a4d83953379ec902c828af8f359003ff08955f0 100644 (file)
@@ -205,7 +205,7 @@ Description:        Controls the idle timing of system, if there is no FS operation
 What:          /sys/fs/f2fs/<disk>/discard_idle_interval
 Date:          September 2018
 Contact:       "Chao Yu" <yuchao0@huawei.com>
-Contact:       "Sahitya Tummala" <stummala@codeaurora.org>
+Contact:       "Sahitya Tummala" <quic_stummala@quicinc.com>
 Description:   Controls the idle timing of discard thread given
                this time interval.
                Default is 5 secs.
@@ -213,7 +213,7 @@ Description:        Controls the idle timing of discard thread given
 What:          /sys/fs/f2fs/<disk>/gc_idle_interval
 Date:          September 2018
 Contact:       "Chao Yu" <yuchao0@huawei.com>
-Contact:       "Sahitya Tummala" <stummala@codeaurora.org>
+Contact:       "Sahitya Tummala" <quic_stummala@quicinc.com>
 Description:    Controls the idle timing for gc path. Set to 5 seconds by default.
 
 What:          /sys/fs/f2fs/<disk>/iostat_enable
@@ -701,29 +701,31 @@ Description:      Support configuring fault injection type, should be
                enabled with fault_injection option, fault type value
                is shown below, it supports single or combined type.
 
-               ===================      ===========
-               Type_Name                Type_Value
-               ===================      ===========
-               FAULT_KMALLOC            0x000000001
-               FAULT_KVMALLOC           0x000000002
-               FAULT_PAGE_ALLOC         0x000000004
-               FAULT_PAGE_GET           0x000000008
-               FAULT_ALLOC_BIO          0x000000010 (obsolete)
-               FAULT_ALLOC_NID          0x000000020
-               FAULT_ORPHAN             0x000000040
-               FAULT_BLOCK              0x000000080
-               FAULT_DIR_DEPTH          0x000000100
-               FAULT_EVICT_INODE        0x000000200
-               FAULT_TRUNCATE           0x000000400
-               FAULT_READ_IO            0x000000800
-               FAULT_CHECKPOINT         0x000001000
-               FAULT_DISCARD            0x000002000
-               FAULT_WRITE_IO           0x000004000
-               FAULT_SLAB_ALLOC         0x000008000
-               FAULT_DQUOT_INIT         0x000010000
-               FAULT_LOCK_OP            0x000020000
-               FAULT_BLKADDR            0x000040000
-               ===================      ===========
+               ===========================      ===========
+               Type_Name                        Type_Value
+               ===========================      ===========
+               FAULT_KMALLOC                    0x000000001
+               FAULT_KVMALLOC                   0x000000002
+               FAULT_PAGE_ALLOC                 0x000000004
+               FAULT_PAGE_GET                   0x000000008
+               FAULT_ALLOC_BIO                  0x000000010 (obsolete)
+               FAULT_ALLOC_NID                  0x000000020
+               FAULT_ORPHAN                     0x000000040
+               FAULT_BLOCK                      0x000000080
+               FAULT_DIR_DEPTH                  0x000000100
+               FAULT_EVICT_INODE                0x000000200
+               FAULT_TRUNCATE                   0x000000400
+               FAULT_READ_IO                    0x000000800
+               FAULT_CHECKPOINT                 0x000001000
+               FAULT_DISCARD                    0x000002000
+               FAULT_WRITE_IO                   0x000004000
+               FAULT_SLAB_ALLOC                 0x000008000
+               FAULT_DQUOT_INIT                 0x000010000
+               FAULT_LOCK_OP                    0x000020000
+               FAULT_BLKADDR_VALIDITY           0x000040000
+               FAULT_BLKADDR_CONSISTENCE        0x000080000
+               FAULT_NO_SEGMENT                 0x000100000
+               ===========================      ===========
 
 What:          /sys/fs/f2fs/<disk>/discard_io_aware_gran
 Date:          January 2023
diff --git a/Documentation/ABI/testing/sysfs-fs-virtiofs b/Documentation/ABI/testing/sysfs-fs-virtiofs
new file mode 100644 (file)
index 0000000..4839dbc
--- /dev/null
@@ -0,0 +1,11 @@
+What:          /sys/fs/virtiofs/<n>/tag
+Date:          Feb 2024
+Contact:       virtio-fs@lists.linux.dev
+Description:
+               [RO] The mount "tag" that can be used to mount this filesystem.
+
+What:          /sys/fs/virtiofs/<n>/device
+Date:          Feb 2024
+Contact:       virtio-fs@lists.linux.dev
+Description:
+               Symlink to the virtio device that exports this filesystem.
index 02b2bb60c2969c70ac47676d99847eac171d9ce0..dfd755201142f1c8f53416ed82010b316f75ee49 100644 (file)
@@ -23,3 +23,9 @@ Date:         Feb 2021
 Contact:       Minchan Kim <minchan@kernel.org>
 Description:
                the number of pages CMA API failed to allocate
+
+What:          /sys/kernel/mm/cma/<cma-heap-name>/release_pages_success
+Date:          Feb 2024
+Contact:       Anshuman Khandual <anshuman.khandual@arm.com>
+Description:
+               the number of pages CMA API succeeded to release
index bfa5b8288d8d1161669961b999a2a89de3c0eb6d..dad4d5ffd78656bdd1840677c0a9ce89a08b732d 100644 (file)
@@ -34,7 +34,9 @@ Description:  Writing 'on' or 'off' to this file makes the kdamond starts or
                kdamond.  Writing 'update_schemes_tried_bytes' to the file
                updates only '.../tried_regions/total_bytes' files of this
                kdamond.  Writing 'clear_schemes_tried_regions' to the file
-               removes contents of the 'tried_regions' directory.
+               removes contents of the 'tried_regions' directory.  Writing
+               'update_schemes_effective_quotas' to the file updates
+               '.../quotas/effective_bytes' files of this kdamond.
 
 What:          /sys/kernel/mm/damon/admin/kdamonds/<K>/pid
 Date:          Mar 2022
@@ -208,6 +210,12 @@ Contact:   SeongJae Park <sj@kernel.org>
 Description:   Writing to and reading from this file sets and gets the size
                quota of the scheme in bytes.
 
+What:          /sys/kernel/mm/damon/admin/kdamonds/<K>/contexts/<C>/schemes/<S>/quotas/effective_bytes
+Date:          Feb 2024
+Contact:       SeongJae Park <sj@kernel.org>
+Description:   Reading from this file gets the effective size quota of the
+               scheme in bytes, which adjusted for the time quota and goals.
+
 What:          /sys/kernel/mm/damon/admin/kdamonds/<K>/contexts/<C>/schemes/<S>/quotas/reset_interval_ms
 Date:          Mar 2022
 Contact:       SeongJae Park <sj@kernel.org>
@@ -221,6 +229,12 @@ Description:       Writing a number 'N' to this file creates the number of
                directories for setting automatic tuning of the scheme's
                aggressiveness named '0' to 'N-1' under the goals/ directory.
 
+What:          /sys/kernel/mm/damon/admin/kdamonds/<K>/contexts/<C>/schemes/<S>/quotas/goals/<G>/target_metric
+Date:          Feb 2024
+Contact:       SeongJae Park <sj@kernel.org>
+Description:   Writing to and reading from this file sets and gets the quota
+               auto-tuning goal metric.
+
 What:          /sys/kernel/mm/damon/admin/kdamonds/<K>/contexts/<C>/schemes/<S>/quotas/goals/<G>/target_value
 Date:          Nov 2023
 Contact:       SeongJae Park <sj@kernel.org>
diff --git a/Documentation/ABI/testing/sysfs-kernel-mm-mempolicy b/Documentation/ABI/testing/sysfs-kernel-mm-mempolicy
new file mode 100644 (file)
index 0000000..8ac327f
--- /dev/null
@@ -0,0 +1,4 @@
+What:          /sys/kernel/mm/mempolicy/
+Date:          January 2024
+Contact:       Linux memory management mailing list <linux-mm@kvack.org>
+Description:   Interface for Mempolicy
diff --git a/Documentation/ABI/testing/sysfs-kernel-mm-mempolicy-weighted-interleave b/Documentation/ABI/testing/sysfs-kernel-mm-mempolicy-weighted-interleave
new file mode 100644 (file)
index 0000000..0b7972d
--- /dev/null
@@ -0,0 +1,25 @@
+What:          /sys/kernel/mm/mempolicy/weighted_interleave/
+Date:          January 2024
+Contact:       Linux memory management mailing list <linux-mm@kvack.org>
+Description:   Configuration Interface for the Weighted Interleave policy
+
+What:          /sys/kernel/mm/mempolicy/weighted_interleave/nodeN
+Date:          January 2024
+Contact:       Linux memory management mailing list <linux-mm@kvack.org>
+Description:   Weight configuration interface for nodeN
+
+               The interleave weight for a memory node (N). These weights are
+               utilized by tasks which have set their mempolicy to
+               MPOL_WEIGHTED_INTERLEAVE.
+
+               These weights only affect new allocations, and changes at runtime
+               will not cause migrations on already allocated pages.
+
+               The minimum weight for a node is always 1.
+
+               Minimum weight: 1
+               Maximum weight: 255
+
+               Writing an empty string or `0` will reset the weight to the
+               system default. The system default may be set by the kernel
+               or drivers at boot or during hotplug events.
index bced9e4b6e089912be3c2a5c1fcf80ea9f98d60f..0f714fc945acf4213c99dbc18166b1424c3a3700 100644 (file)
@@ -65,11 +65,11 @@ Defines the beginning of the text section. In general, _stext indicates
 the kernel start address. Used to convert a virtual address from the
 direct kernel map to a physical address.
 
-vmap_area_list
---------------
+VMALLOC_START
+-------------
 
-Stores the virtual area list. makedumpfile gets the vmalloc start value
-from this variable and its value is necessary for vmalloc translation.
+Stores the base address of vmalloc area. makedumpfile gets this value
+since is necessary for vmalloc translation.
 
 mem_map
 -------
index fa871d53641c6ffc3258a295d5ba225134ba1178..bb884c14b2f679dba3a36ba89755a1eca2fe6db2 100644 (file)
                        bit 4: print ftrace buffer
                        bit 5: print all printk messages in buffer
                        bit 6: print all CPUs backtrace (if available in the arch)
+                       bit 7: print only tasks in uninterruptible (blocked) state
                        *Be aware* that this option may print a _lot_ of lines,
                        so there are risks of losing older messages in the log.
                        Use this option carefully, maybe worth to setup a
index db1ef29438e16c019f73050be7a747be20643a2a..cd45145cde686acf711258d5c7030f8b76b077bb 100644 (file)
@@ -49,6 +49,10 @@ Module parameters
   visl_dprintk_frame_start, visl_dprintk_nframes, but controls the dumping of
   buffer data through debugfs instead.
 
+- tpg_verbose: Write extra information on each output frame to ease debugging
+  the API. When set to true, the output frames are not stable for a given input
+  as some information like pointers or queue status will be added to them.
+
 What is the default use case for this driver?
 ---------------------------------------------
 
@@ -57,8 +61,12 @@ This assumes that a working client is run against visl and that the ftrace and
 OUTPUT buffer data is subsequently used to debug a work-in-progress
 implementation.
 
-Information on reference frames, their timestamps, the status of the OUTPUT and
-CAPTURE queues and more can be read directly from the CAPTURE buffers.
+Even though no video decoding is actually done, the output frames can be used
+against a reference for a given input, except if tpg_verbose is set to true.
+
+Depending on the tpg_verbose parameter value, information on reference frames,
+their timestamps, the status of the OUTPUT and CAPTURE queues and more can be
+read directly from the CAPTURE buffers.
 
 Supported codecs
 ----------------
index 58ac25b2c385c44054ac54b350fde3ffe1c9e6e1..b6f658c0997ec351b0c2cbc16c66ca3c35fe675b 100644 (file)
@@ -60,7 +60,7 @@ all configurable using the following module options:
 - node_types:
 
        which devices should each driver instance create. An array of
-       hexadecimal values, one for each instance. The default is 0x1d3d.
+       hexadecimal values, one for each instance. The default is 0xe1d3d.
        Each value is a bitmask with the following meaning:
 
                - bit 0: Video Capture node
index 343e25b252f430f687c6549437b3f15fec86d261..af05ae6170184f9e24f2daeac298dcf88e04e340 100644 (file)
@@ -117,6 +117,33 @@ milliseconds.
 
 1 second by default.
 
+quota_mem_pressure_us
+---------------------
+
+Desired level of memory pressure-stall time in microseconds.
+
+While keeping the caps that set by other quotas, DAMON_RECLAIM automatically
+increases and decreases the effective level of the quota aiming this level of
+memory pressure is incurred.  System-wide ``some`` memory PSI in microseconds
+per quota reset interval (``quota_reset_interval_ms``) is collected and
+compared to this value to see if the aim is satisfied.  Value zero means
+disabling this auto-tuning feature.
+
+Disabled by default.
+
+quota_autotune_feedback
+-----------------------
+
+User-specifiable feedback for auto-tuning of the effective quota.
+
+While keeping the caps that set by other quotas, DAMON_RECLAIM automatically
+increases and decreases the effective level of the quota aiming receiving this
+feedback of value ``10,000`` from the user.  DAMON_RECLAIM assumes the feedback
+value and the quota are positively proportional.  Value zero means disabling
+this auto-tuning feature.
+
+Disabled by default.
+
 wmarks_interval
 ---------------
 
index 9d23144bf985013c693d7a27c4fa4d2db97cdf90..6fce035fdbf5c74d5c79160167130ad76020a48f 100644 (file)
@@ -83,10 +83,10 @@ comma (",").
     │ │ │ │ │ │ │ │ sz/min,max
     │ │ │ │ │ │ │ │ nr_accesses/min,max
     │ │ │ │ │ │ │ │ age/min,max
-    │ │ │ │ │ │ │ :ref:`quotas <sysfs_quotas>`/ms,bytes,reset_interval_ms
+    │ │ │ │ │ │ │ :ref:`quotas <sysfs_quotas>`/ms,bytes,reset_interval_ms,effective_bytes
     │ │ │ │ │ │ │ │ weights/sz_permil,nr_accesses_permil,age_permil
     │ │ │ │ │ │ │ │ :ref:`goals <sysfs_schemes_quota_goals>`/nr_goals
-    │ │ │ │ │ │ │ │ │ 0/target_value,current_value
+    │ │ │ │ │ │ │ │ │ 0/target_metric,target_value,current_value
     │ │ │ │ │ │ │ :ref:`watermarks <sysfs_watermarks>`/metric,interval_us,high,mid,low
     │ │ │ │ │ │ │ :ref:`filters <sysfs_filters>`/nr_filters
     │ │ │ │ │ │ │ │ 0/type,matching,memcg_id
@@ -153,6 +153,9 @@ Users can write below commands for the kdamond to the ``state`` file.
 - ``clear_schemes_tried_regions``: Clear the DAMON-based operating scheme
   action tried regions directory for each DAMON-based operation scheme of the
   kdamond.
+- ``update_schemes_effective_bytes``: Update the contents of
+  ``effective_bytes`` files for each DAMON-based operation scheme of the
+  kdamond.  For more details, refer to :ref:`quotas directory <sysfs_quotas>`.
 
 If the state is ``on``, reading ``pid`` shows the pid of the kdamond thread.
 
@@ -180,19 +183,14 @@ In each context directory, two files (``avail_operations`` and ``operations``)
 and three directories (``monitoring_attrs``, ``targets``, and ``schemes``)
 exist.
 
-DAMON supports multiple types of monitoring operations, including those for
-virtual address space and the physical address space.  You can get the list of
-available monitoring operations set on the currently running kernel by reading
+DAMON supports multiple types of :ref:`monitoring operations
+<damon_design_configurable_operations_set>`, including those for virtual address
+space and the physical address space.  You can get the list of available
+monitoring operations set on the currently running kernel by reading
 ``avail_operations`` file.  Based on the kernel configuration, the file will
-list some or all of below keywords.
-
- - vaddr: Monitor virtual address spaces of specific processes
- - fvaddr: Monitor fixed virtual address ranges
- - paddr: Monitor the physical address space of the system
-
-Please refer to :ref:`regions sysfs directory <sysfs_regions>` for detailed
-differences between the operations sets in terms of the monitoring target
-regions.
+list different available operation sets.  Please refer to the :ref:`design
+<damon_operations_set>` for the list of all available operation sets and their
+brief explanations.
 
 You can set and get what type of monitoring operations DAMON will use for the
 context by writing one of the keywords listed in ``avail_operations`` file and
@@ -247,17 +245,11 @@ process to the ``pid_target`` file.
 targets/<N>/regions
 -------------------
 
-When ``vaddr`` monitoring operations set is being used (``vaddr`` is written to
-the ``contexts/<N>/operations`` file), DAMON automatically sets and updates the
-monitoring target regions so that entire memory mappings of target processes
-can be covered.  However, users could want to set the initial monitoring region
-to specific address ranges.
-
-In contrast, DAMON do not automatically sets and updates the monitoring target
-regions when ``fvaddr`` or ``paddr`` monitoring operations sets are being used
-(``fvaddr`` or ``paddr`` have written to the ``contexts/<N>/operations``).
-Therefore, users should set the monitoring target regions by themselves in the
-cases.
+In case of ``fvaddr`` or ``paddr`` monitoring operations sets, users are
+required to set the monitoring target address ranges.  In case of ``vaddr``
+operations set, it is not mandatory, but users can optionally set the initial
+monitoring region to specific address ranges.  Please refer to the :ref:`design
+<damon_design_vaddr_target_regions_construction>` for more details.
 
 For such cases, users can explicitly set the initial monitoring target regions
 as they want, by writing proper values to the files under this directory.
@@ -302,27 +294,8 @@ In each scheme directory, five directories (``access_pattern``, ``quotas``,
 
 The ``action`` file is for setting and getting the scheme's :ref:`action
 <damon_design_damos_action>`.  The keywords that can be written to and read
-from the file and their meaning are as below.
-
-Note that support of each action depends on the running DAMON operations set
-:ref:`implementation <sysfs_context>`.
-
- - ``willneed``: Call ``madvise()`` for the region with ``MADV_WILLNEED``.
-   Supported by ``vaddr`` and ``fvaddr`` operations set.
- - ``cold``: Call ``madvise()`` for the region with ``MADV_COLD``.
-   Supported by ``vaddr`` and ``fvaddr`` operations set.
- - ``pageout``: Call ``madvise()`` for the region with ``MADV_PAGEOUT``.
-   Supported by ``vaddr``, ``fvaddr`` and ``paddr`` operations set.
- - ``hugepage``: Call ``madvise()`` for the region with ``MADV_HUGEPAGE``.
-   Supported by ``vaddr`` and ``fvaddr`` operations set.
- - ``nohugepage``: Call ``madvise()`` for the region with ``MADV_NOHUGEPAGE``.
-   Supported by ``vaddr`` and ``fvaddr`` operations set.
- - ``lru_prio``: Prioritize the region on its LRU lists.
-   Supported by ``paddr`` operations set.
- - ``lru_deprio``: Deprioritize the region on its LRU lists.
-   Supported by ``paddr`` operations set.
- - ``stat``: Do nothing but count the statistics.
-   Supported by all operations sets.
+from the file and their meaning are same to those of the list on
+:ref:`design doc <damon_design_damos_action>`.
 
 The ``apply_interval_us`` file is for setting and getting the scheme's
 :ref:`apply_interval <damon_design_damos>` in microseconds.
@@ -350,8 +323,9 @@ schemes/<N>/quotas/
 The directory for the :ref:`quotas <damon_design_damos_quotas>` of the given
 DAMON-based operation scheme.
 
-Under ``quotas`` directory, three files (``ms``, ``bytes``,
-``reset_interval_ms``) and two directores (``weights`` and ``goals``) exist.
+Under ``quotas`` directory, four files (``ms``, ``bytes``,
+``reset_interval_ms``, ``effective_bytes``) and two directores (``weights`` and
+``goals``) exist.
 
 You can set the ``time quota`` in milliseconds, ``size quota`` in bytes, and
 ``reset interval`` in milliseconds by writing the values to the three files,
@@ -359,7 +333,17 @@ respectively.  Then, DAMON tries to use only up to ``time quota`` milliseconds
 for applying the ``action`` to memory regions of the ``access_pattern``, and to
 apply the action to only up to ``bytes`` bytes of memory regions within the
 ``reset_interval_ms``.  Setting both ``ms`` and ``bytes`` zero disables the
-quota limits.
+quota limits unless at least one :ref:`goal <sysfs_schemes_quota_goals>` is
+set.
+
+The time quota is internally transformed to a size quota.  Between the
+transformed size quota and user-specified size quota, smaller one is applied.
+Based on the user-specified :ref:`goal <sysfs_schemes_quota_goals>`, the
+effective size quota is further adjusted.  Reading ``effective_bytes`` returns
+the current effective size quota.  The file is not updated in real time, so
+users should ask DAMON sysfs interface to update the content of the file for
+the stats by writing a special keyword, ``update_schemes_effective_bytes`` to
+the relevant ``kdamonds/<N>/state`` file.
 
 Under ``weights`` directory, three files (``sz_permil``,
 ``nr_accesses_permil``, and ``age_permil``) exist.
@@ -382,11 +366,11 @@ number (``N``) to the file creates the number of child directories named ``0``
 to ``N-1``.  Each directory represents each goal and current achievement.
 Among the multiple feedback, the best one is used.
 
-Each goal directory contains two files, namely ``target_value`` and
-``current_value``.  Users can set and get any number to those files to set the
-feedback.  User space main workload's latency or throughput, system metrics
-like free memory ratio or memory pressure stall time (PSI) could be example
-metrics for the values.  Note that users should write
+Each goal directory contains three files, namely ``target_metric``,
+``target_value`` and ``current_value``.  Users can set and get the three
+parameters for the quota auto-tuning goals that specified on the :ref:`design
+doc <damon_design_damos_quotas_auto_tuning>` by writing to and reading from each
+of the files.  Note that users should further write
 ``commit_schemes_quota_goals`` to the ``state`` file of the :ref:`kdamond
 directory <sysfs_kdamond>` to pass the feedback to DAMON.
 
@@ -579,11 +563,11 @@ monitoring results recording.
 While the monitoring is turned on, you could record the tracepoint events and
 show results using tracepoint supporting tools like ``perf``.  For example::
 
-    # echo on > monitor_on
+    # echo on > kdamonds/0/state
     # perf record -e damon:damon_aggregated &
     # sleep 5
     # kill 9 $(pidof perf)
-    # echo off > monitor_on
+    # echo off > kdamonds/0/state
     # perf script
     kdamond.0 46568 [027] 79357.842179: damon:damon_aggregated: target_id=0 nr_regions=11 122509119488-135708762112: 0 864
     [...]
@@ -628,9 +612,17 @@ debugfs Interface (DEPRECATED!)
   move, please report your usecase to damon@lists.linux.dev and
   linux-mm@kvack.org.
 
-DAMON exports eight files, ``attrs``, ``target_ids``, ``init_regions``,
-``schemes``, ``monitor_on``, ``kdamond_pid``, ``mk_contexts`` and
-``rm_contexts`` under its debugfs directory, ``<debugfs>/damon/``.
+DAMON exports nine files, ``DEPRECATED``, ``attrs``, ``target_ids``,
+``init_regions``, ``schemes``, ``monitor_on_DEPRECATED``, ``kdamond_pid``,
+``mk_contexts`` and ``rm_contexts`` under its debugfs directory,
+``<debugfs>/damon/``.
+
+
+``DEPRECATED`` is a read-only file for the DAMON debugfs interface deprecation
+notice.  Reading it returns the deprecation notice, as below::
+
+    # cat DEPRECATED
+    DAMON debugfs interface is deprecated, so users should move to DAMON_SYSFS. If you cannot, please report your usecase to damon@lists.linux.dev and linux-mm@kvack.org.
 
 
 Attributes
@@ -755,19 +747,17 @@ Action
 ~~~~~~
 
 The ``<action>`` is a predefined integer for memory management :ref:`actions
-<damon_design_damos_action>`.  The supported numbers and their meanings are as
-below.
-
- - 0: Call ``madvise()`` for the region with ``MADV_WILLNEED``.  Ignored if
-   ``target`` is ``paddr``.
- - 1: Call ``madvise()`` for the region with ``MADV_COLD``.  Ignored if
-   ``target`` is ``paddr``.
- - 2: Call ``madvise()`` for the region with ``MADV_PAGEOUT``.
- - 3: Call ``madvise()`` for the region with ``MADV_HUGEPAGE``.  Ignored if
-   ``target`` is ``paddr``.
- - 4: Call ``madvise()`` for the region with ``MADV_NOHUGEPAGE``.  Ignored if
-   ``target`` is ``paddr``.
- - 5: Do nothing but count the statistics
+<damon_design_damos_action>`.  The mapping between the ``<action>`` values and
+the memory management actions is as below.  For the detailed meaning of the
+action and DAMON operations set supporting each action, please refer to the
+list on :ref:`design doc <damon_design_damos_action>`.
+
+ - 0: ``willneed``
+ - 1: ``cold``
+ - 2: ``pageout``
+ - 3: ``hugepage``
+ - 4: ``nohugepage``
+ - 5: ``stat``
 
 Quota
 ~~~~~
@@ -848,16 +838,16 @@ Turning On/Off
 
 Setting the files as described above doesn't incur effect unless you explicitly
 start the monitoring.  You can start, stop, and check the current status of the
-monitoring by writing to and reading from the ``monitor_on`` file.  Writing
-``on`` to the file starts the monitoring of the targets with the attributes.
-Writing ``off`` to the file stops those.  DAMON also stops if every target
-process is terminated.  Below example commands turn on, off, and check the
-status of DAMON::
+monitoring by writing to and reading from the ``monitor_on_DEPRECATED`` file.
+Writing ``on`` to the file starts the monitoring of the targets with the
+attributes.  Writing ``off`` to the file stops those.  DAMON also stops if
+every target process is terminated.  Below example commands turn on, off, and
+check the status of DAMON::
 
     # cd <debugfs>/damon
-    # echo on > monitor_on
-    # echo off > monitor_on
-    # cat monitor_on
+    # echo on > monitor_on_DEPRECATED
+    # echo off > monitor_on_DEPRECATED
+    # cat monitor_on_DEPRECATED
     off
 
 Please note that you cannot write to the above-mentioned debugfs files while
@@ -873,11 +863,11 @@ can get the pid of the thread by reading the ``kdamond_pid`` file.  When the
 monitoring is turned off, reading the file returns ``none``. ::
 
     # cd <debugfs>/damon
-    # cat monitor_on
+    # cat monitor_on_DEPRECATED
     off
     # cat kdamond_pid
     none
-    # echo on > monitor_on
+    # echo on > monitor_on_DEPRECATED
     # cat kdamond_pid
     18594
 
@@ -907,5 +897,5 @@ directory by putting the name of the context to the ``rm_contexts`` file. ::
     # ls foo
     # ls: cannot access 'foo': No such file or directory
 
-Note that ``mk_contexts``, ``rm_contexts``, and ``monitor_on`` files are in the
-root directory only.
+Note that ``mk_contexts``, ``rm_contexts``, and ``monitor_on_DEPRECATED`` files
+are in the root directory only.
index eca38fa81e0f98521a2963cec536de98bc2297ef..a70f20ce1ffb4ffd0cdcb8e575dce5547c76e73a 100644 (file)
@@ -250,6 +250,15 @@ MPOL_PREFERRED_MANY
        can fall back to all existing numa nodes. This is effectively
        MPOL_PREFERRED allowed for a mask rather than a single node.
 
+MPOL_WEIGHTED_INTERLEAVE
+       This mode operates the same as MPOL_INTERLEAVE, except that
+       interleaving behavior is executed based on weights set in
+       /sys/kernel/mm/mempolicy/weighted_interleave/
+
+       Weighted interleave allocates pages on nodes according to a
+       weight.  For example if nodes [0,1] are weighted [5,2], 5 pages
+       will be allocated on node0 for every 2 pages allocated on node1.
+
 NUMA memory policy supports the following optional mode flags:
 
 MPOL_F_STATIC_NODES
index ea8e5f152edcf5d142044ce632fe13747516185c..7fd43947832f8f62206f220880f78767ea9020f1 100644 (file)
@@ -612,6 +612,9 @@ default (``MSGMNB``).
 ``msgmni`` is the maximum number of IPC queues. 32000 by default
 (``MSGMNI``).
 
+All of these parameters are set per ipc namespace. The maximum number of bytes
+in POSIX message queues is limited by ``RLIMIT_MSGQUEUE``. This limit is
+respected hierarchically in the each user namespace.
 
 msg_next_id, sem_next_id, and shm_next_id (System V IPC)
 ========================================================
@@ -868,6 +871,7 @@ bit 3  print locks info if ``CONFIG_LOCKDEP`` is on
 bit 4  print ftrace buffer
 bit 5  print all printk messages in buffer
 bit 6  print all CPUs backtrace (if available in the arch)
+bit 7  print only tasks in uninterruptible (blocked) state
 =====  ============================================
 
 So for example to print tasks and memory info on panic, user can::
@@ -1292,15 +1296,20 @@ are doing anyway :)
 shmall
 ======
 
-This parameter sets the total amount of shared memory pages that
-can be used system wide. Hence, ``shmall`` should always be at least
-``ceil(shmmax/PAGE_SIZE)``.
+This parameter sets the total amount of shared memory pages that can be used
+inside ipc namespace. The shared memory pages counting occurs for each ipc
+namespace separately and is not inherited. Hence, ``shmall`` should always be at
+least ``ceil(shmmax/PAGE_SIZE)``.
 
 If you are not sure what the default ``PAGE_SIZE`` is on your Linux
 system, you can run the following command::
 
        # getconf PAGE_SIZE
 
+To reduce or disable the ability to allocate shared memory, you must create a
+new ipc namespace, set this parameter to the required value and prohibit the
+creation of a new ipc namespace in the current user namespace or cgroups can
+be used.
 
 shmmax
 ======
index d56f298a9d7cae155790a49ed446e6d9e0ad0902..d7de44f5339d43aee128091930ead29511060925 100644 (file)
@@ -169,7 +169,7 @@ Error reports
 A typical KASAN report looks like this::
 
     ==================================================================
-    BUG: KASAN: slab-out-of-bounds in kmalloc_oob_right+0xa8/0xbc [test_kasan]
+    BUG: KASAN: slab-out-of-bounds in kmalloc_oob_right+0xa8/0xbc [kasan_test]
     Write of size 1 at addr ffff8801f44ec37b by task insmod/2760
 
     CPU: 1 PID: 2760 Comm: insmod Not tainted 4.19.0-rc3+ #698
@@ -179,8 +179,8 @@ A typical KASAN report looks like this::
      print_address_description+0x73/0x280
      kasan_report+0x144/0x187
      __asan_report_store1_noabort+0x17/0x20
-     kmalloc_oob_right+0xa8/0xbc [test_kasan]
-     kmalloc_tests_init+0x16/0x700 [test_kasan]
+     kmalloc_oob_right+0xa8/0xbc [kasan_test]
+     kmalloc_tests_init+0x16/0x700 [kasan_test]
      do_one_initcall+0xa5/0x3ae
      do_init_module+0x1b6/0x547
      load_module+0x75df/0x8070
@@ -200,8 +200,8 @@ A typical KASAN report looks like this::
      save_stack+0x43/0xd0
      kasan_kmalloc+0xa7/0xd0
      kmem_cache_alloc_trace+0xe1/0x1b0
-     kmalloc_oob_right+0x56/0xbc [test_kasan]
-     kmalloc_tests_init+0x16/0x700 [test_kasan]
+     kmalloc_oob_right+0x56/0xbc [kasan_test]
+     kmalloc_tests_init+0x16/0x700 [kasan_test]
      do_one_initcall+0xa5/0x3ae
      do_init_module+0x1b6/0x547
      load_module+0x75df/0x8070
@@ -531,15 +531,15 @@ When a test passes::
 
 When a test fails due to a failed ``kmalloc``::
 
-        # kmalloc_large_oob_right: ASSERTION FAILED at lib/test_kasan.c:163
+        # kmalloc_large_oob_right: ASSERTION FAILED at mm/kasan/kasan_test.c:245
         Expected ptr is not null, but is
-        not ok 4 - kmalloc_large_oob_right
+        not ok 5 - kmalloc_large_oob_right
 
 When a test fails due to a missing KASAN report::
 
-        # kmalloc_double_kzfree: EXPECTATION FAILED at lib/test_kasan.c:974
+        # kmalloc_double_kzfree: EXPECTATION FAILED at mm/kasan/kasan_test.c:709
         KASAN failure expected in "kfree_sensitive(ptr)", but none occurred
-        not ok 44 - kmalloc_double_kzfree
+        not ok 28 - kmalloc_double_kzfree
 
 
 At the end the cumulative status of all KASAN tests is printed. On success::
@@ -555,7 +555,7 @@ There are a few ways to run KUnit-compatible KASAN tests.
 1. Loadable module
 
    With ``CONFIG_KUNIT`` enabled, KASAN-KUnit tests can be built as a loadable
-   module and run by loading ``test_kasan.ko`` with ``insmod`` or ``modprobe``.
+   module and run by loading ``kasan_test.ko`` with ``insmod`` or ``modprobe``.
 
 2. Built-In
 
index 129cf698fa8a66fd2be5111074319da545f4cc98..5e08e3a6a97b108c8371668af468c395b3e41413 100644 (file)
@@ -64,9 +64,6 @@ override DTC_FLAGS := \
        -Wno-unique_unit_address \
        -Wunique_unit_address_if_enabled
 
-# Disable undocumented compatible checks until warning free
-override DT_CHECKER_FLAGS ?=
-
 $(obj)/processed-schema.json: $(DT_DOCS) $(src)/.yamllint check_dtschema_version FORCE
        $(call if_changed_rule,chkdt)
 
diff --git a/Documentation/devicetree/bindings/arm/mediatek/mediatek,hifsys.txt b/Documentation/devicetree/bindings/arm/mediatek/mediatek,hifsys.txt
deleted file mode 100644 (file)
index 323905a..0000000
+++ /dev/null
@@ -1,26 +0,0 @@
-Mediatek hifsys controller
-============================
-
-The Mediatek hifsys controller provides various clocks and reset
-outputs to the system.
-
-Required Properties:
-
-- compatible: Should be:
-       - "mediatek,mt2701-hifsys", "syscon"
-       - "mediatek,mt7622-hifsys", "syscon"
-       - "mediatek,mt7623-hifsys", "mediatek,mt2701-hifsys", "syscon"
-- #clock-cells: Must be 1
-
-The hifsys controller uses the common clk binding from
-Documentation/devicetree/bindings/clock/clock-bindings.txt
-The available clocks are defined in dt-bindings/clock/mt*-clk.h.
-
-Example:
-
-hifsys: clock-controller@1a000000 {
-       compatible = "mediatek,mt2701-hifsys", "syscon";
-       reg = <0 0x1a000000 0 0x1000>;
-       #clock-cells = <1>;
-       #reset-cells = <1>;
-};
diff --git a/Documentation/devicetree/bindings/arm/mediatek/mediatek,pciesys.txt b/Documentation/devicetree/bindings/arm/mediatek/mediatek,pciesys.txt
deleted file mode 100644 (file)
index d179a61..0000000
+++ /dev/null
@@ -1,25 +0,0 @@
-MediaTek PCIESYS controller
-============================
-
-The MediaTek PCIESYS controller provides various clocks to the system.
-
-Required Properties:
-
-- compatible: Should be:
-       - "mediatek,mt7622-pciesys", "syscon"
-       - "mediatek,mt7629-pciesys", "syscon"
-- #clock-cells: Must be 1
-- #reset-cells: Must be 1
-
-The PCIESYS controller uses the common clk binding from
-Documentation/devicetree/bindings/clock/clock-bindings.txt
-The available clocks are defined in dt-bindings/clock/mt*-clk.h.
-
-Example:
-
-pciesys: pciesys@1a100800 {
-       compatible = "mediatek,mt7622-pciesys", "syscon";
-       reg = <0 0x1a100800 0 0x1000>;
-       #clock-cells = <1>;
-       #reset-cells = <1>;
-};
diff --git a/Documentation/devicetree/bindings/arm/mediatek/mediatek,ssusbsys.txt b/Documentation/devicetree/bindings/arm/mediatek/mediatek,ssusbsys.txt
deleted file mode 100644 (file)
index 7cb02c9..0000000
+++ /dev/null
@@ -1,25 +0,0 @@
-MediaTek SSUSBSYS controller
-============================
-
-The MediaTek SSUSBSYS controller provides various clocks to the system.
-
-Required Properties:
-
-- compatible: Should be:
-       - "mediatek,mt7622-ssusbsys", "syscon"
-       - "mediatek,mt7629-ssusbsys", "syscon"
-- #clock-cells: Must be 1
-- #reset-cells: Must be 1
-
-The SSUSBSYS controller uses the common clk binding from
-Documentation/devicetree/bindings/clock/clock-bindings.txt
-The available clocks are defined in dt-bindings/clock/mt*-clk.h.
-
-Example:
-
-ssusbsys: ssusbsys@1a000000 {
-       compatible = "mediatek,mt7622-ssusbsys", "syscon";
-       reg = <0 0x1a000000 0 0x1000>;
-       #clock-cells = <1>;
-       #reset-cells = <1>;
-};
index 851f48ead92710525977bad8aa0ca80d105f2195..f53c430f648c9728f81d202055cf9f432421f199 100644 (file)
@@ -6,18 +6,6 @@ berlin SoCs are now Synaptics' SoCs now.
 
 ---------------------------------------------------------------
 
-Work in progress statement:
-
-Device tree files and bindings applying to Marvell Berlin SoCs and boards are
-considered "unstable". Any Marvell Berlin device tree binding may change at any
-time. Be sure to use a device tree binary and a kernel image generated from the
-same source tree.
-
-Please refer to Documentation/devicetree/bindings/ABI.rst for a definition of a
-stable binding/ABI.
-
----------------------------------------------------------------
-
 Boards with a SoC of the Marvell Berlin family, e.g. Armada 1500
 shall have the following properties:
 
diff --git a/Documentation/devicetree/bindings/clock/mediatek,mt2701-hifsys.yaml b/Documentation/devicetree/bindings/clock/mediatek,mt2701-hifsys.yaml
new file mode 100644 (file)
index 0000000..9e7c725
--- /dev/null
@@ -0,0 +1,50 @@
+# SPDX-License-Identifier: GPL-2.0-only OR BSD-2-Clause
+%YAML 1.2
+---
+$id: http://devicetree.org/schemas/clock/mediatek,mt2701-hifsys.yaml#
+$schema: http://devicetree.org/meta-schemas/core.yaml#
+
+title: MediaTek HIFSYS clock and reset controller
+
+description:
+  The MediaTek HIFSYS controller provides various clocks and reset outputs to
+  the system.
+
+maintainers:
+  - Matthias Brugger <matthias.bgg@gmail.com>
+
+properties:
+  compatible:
+    oneOf:
+      - enum:
+          - mediatek,mt2701-hifsys
+          - mediatek,mt7622-hifsys
+      - items:
+          - enum:
+              - mediatek,mt7623-hifsys
+          - const: mediatek,mt2701-hifsys
+
+  reg:
+    maxItems: 1
+
+  "#clock-cells":
+    const: 1
+    description: The available clocks are defined in dt-bindings/clock/mt*-clk.h
+
+  "#reset-cells":
+    const: 1
+
+required:
+  - reg
+  - "#clock-cells"
+
+additionalProperties: false
+
+examples:
+  - |
+    clock-controller@1a000000 {
+        compatible = "mediatek,mt2701-hifsys";
+        reg = <0x1a000000 0x1000>;
+        #clock-cells = <1>;
+        #reset-cells = <1>;
+    };
diff --git a/Documentation/devicetree/bindings/clock/mediatek,mt7622-pciesys.yaml b/Documentation/devicetree/bindings/clock/mediatek,mt7622-pciesys.yaml
new file mode 100644 (file)
index 0000000..c77111d
--- /dev/null
@@ -0,0 +1,45 @@
+# SPDX-License-Identifier: GPL-2.0-only OR BSD-2-Clause
+%YAML 1.2
+---
+$id: http://devicetree.org/schemas/clock/mediatek,mt7622-pciesys.yaml#
+$schema: http://devicetree.org/meta-schemas/core.yaml#
+
+title: MediaTek PCIESYS clock and reset controller
+
+description:
+  The MediaTek PCIESYS controller provides various clocks to the system.
+
+maintainers:
+  - Matthias Brugger <matthias.bgg@gmail.com>
+
+properties:
+  compatible:
+    enum:
+      - mediatek,mt7622-pciesys
+      - mediatek,mt7629-pciesys
+
+  reg:
+    maxItems: 1
+
+  "#clock-cells":
+    const: 1
+    description: The available clocks are defined in dt-bindings/clock/mt*-clk.h
+
+  "#reset-cells":
+    const: 1
+
+required:
+  - reg
+  - "#clock-cells"
+  - "#reset-cells"
+
+additionalProperties: false
+
+examples:
+  - |
+    clock-controller@1a100800 {
+        compatible = "mediatek,mt7622-pciesys";
+        reg = <0x1a100800 0x1000>;
+        #clock-cells = <1>;
+        #reset-cells = <1>;
+    };
diff --git a/Documentation/devicetree/bindings/clock/mediatek,mt7622-ssusbsys.yaml b/Documentation/devicetree/bindings/clock/mediatek,mt7622-ssusbsys.yaml
new file mode 100644 (file)
index 0000000..da93ecc
--- /dev/null
@@ -0,0 +1,45 @@
+# SPDX-License-Identifier: GPL-2.0-only OR BSD-2-Clause
+%YAML 1.2
+---
+$id: http://devicetree.org/schemas/clock/mediatek,mt7622-ssusbsys.yaml#
+$schema: http://devicetree.org/meta-schemas/core.yaml#
+
+title: MediaTek SSUSBSYS clock and reset controller
+
+description:
+  The MediaTek SSUSBSYS controller provides various clocks to the system.
+
+maintainers:
+  - Matthias Brugger <matthias.bgg@gmail.com>
+
+properties:
+  compatible:
+    enum:
+      - mediatek,mt7622-ssusbsys
+      - mediatek,mt7629-ssusbsys
+
+  reg:
+    maxItems: 1
+
+  "#clock-cells":
+    const: 1
+    description: The available clocks are defined in dt-bindings/clock/mt*-clk.h
+
+  "#reset-cells":
+    const: 1
+
+required:
+  - reg
+  - "#clock-cells"
+  - "#reset-cells"
+
+additionalProperties: false
+
+examples:
+  - |
+    clock-controller@1a000000 {
+        compatible = "mediatek,mt7622-ssusbsys";
+        reg = <0x1a000000 0x1000>;
+        #clock-cells = <1>;
+        #reset-cells = <1>;
+    };
diff --git a/Documentation/devicetree/bindings/clock/mobileye,eyeq5-clk.yaml b/Documentation/devicetree/bindings/clock/mobileye,eyeq5-clk.yaml
new file mode 100644 (file)
index 0000000..2d4f2cd
--- /dev/null
@@ -0,0 +1,51 @@
+# SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause)
+%YAML 1.2
+---
+$id: http://devicetree.org/schemas/clock/mobileye,eyeq5-clk.yaml#
+$schema: http://devicetree.org/meta-schemas/core.yaml#
+
+title: Mobileye EyeQ5 clock controller
+
+description:
+  The EyeQ5 clock controller handles 10 read-only PLLs derived from the main
+  crystal clock. It also exposes one divider clock, a child of one of the PLLs.
+  Its registers live in a shared region called OLB.
+
+maintainers:
+  - Grégory Clement <gregory.clement@bootlin.com>
+  - Théo Lebrun <theo.lebrun@bootlin.com>
+  - Vladimir Kondratiev <vladimir.kondratiev@mobileye.com>
+
+properties:
+  compatible:
+    const: mobileye,eyeq5-clk
+
+  reg:
+    maxItems: 2
+
+  reg-names:
+    items:
+      - const: plls
+      - const: ospi
+
+  "#clock-cells":
+    const: 1
+
+  clocks:
+    maxItems: 1
+    description:
+      Input parent clock to all PLLs. Expected to be the main crystal.
+
+  clock-names:
+    items:
+      - const: ref
+
+required:
+  - compatible
+  - reg
+  - reg-names
+  - "#clock-cells"
+  - clocks
+  - clock-names
+
+additionalProperties: false
index f369fa34e00cf8d5a6bfe5313d2719c1c08d8ab9..f57aceddac6bed80406da4c87f225d0b52da0d27 100644 (file)
@@ -53,6 +53,9 @@ properties:
   power-domains:
     maxItems: 1
 
+  vdd-gfx-supply:
+    description: Regulator supply for the VDD_GFX pads
+
   '#clock-cells':
     const: 1
 
@@ -74,6 +77,12 @@ required:
   - '#reset-cells'
   - '#power-domain-cells'
 
+# Require that power-domains and vdd-gfx-supply are not both present
+not:
+  required:
+    - power-domains
+    - vdd-gfx-supply
+
 additionalProperties: false
 
 examples:
index 03fa30fe9253932535be68154b807ad43fe61495..e0f4d692728c920a31bb88e8bc77aca79111da56 100644 (file)
@@ -7,7 +7,7 @@ $schema: http://devicetree.org/meta-schemas/core.yaml#
 title: Q6SSTOP clock Controller
 
 maintainers:
-  - Govind Singh <govinds@codeaurora.org>
+  - Bjorn Andersson <andersson@kernel.org>
 
 properties:
   compatible:
diff --git a/Documentation/devicetree/bindings/clock/qcom,sc7180-mss.yaml b/Documentation/devicetree/bindings/clock/qcom,sc7180-mss.yaml
deleted file mode 100644 (file)
index 873a2f9..0000000
+++ /dev/null
@@ -1,61 +0,0 @@
-# SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause)
-%YAML 1.2
----
-$id: http://devicetree.org/schemas/clock/qcom,sc7180-mss.yaml#
-$schema: http://devicetree.org/meta-schemas/core.yaml#
-
-title: Qualcomm Modem Clock Controller on SC7180
-
-maintainers:
-  - Taniya Das <quic_tdas@quicinc.com>
-
-description: |
-  Qualcomm modem clock control module provides the clocks on SC7180.
-
-  See also:: include/dt-bindings/clock/qcom,mss-sc7180.h
-
-properties:
-  compatible:
-    const: qcom,sc7180-mss
-
-  clocks:
-    items:
-      - description: gcc_mss_mfab_axi clock from GCC
-      - description: gcc_mss_nav_axi clock from GCC
-      - description: gcc_mss_cfg_ahb clock from GCC
-
-  clock-names:
-    items:
-      - const: gcc_mss_mfab_axis
-      - const: gcc_mss_nav_axi
-      - const: cfg_ahb
-
-  '#clock-cells':
-    const: 1
-
-  reg:
-    maxItems: 1
-
-required:
-  - compatible
-  - reg
-  - clocks
-  - '#clock-cells'
-
-additionalProperties: false
-
-examples:
-  - |
-    #include <dt-bindings/clock/qcom,gcc-sc7180.h>
-    clock-controller@41a8000 {
-      compatible = "qcom,sc7180-mss";
-      reg = <0x041a8000 0x8000>;
-      clocks = <&gcc GCC_MSS_MFAB_AXIS_CLK>,
-               <&gcc GCC_MSS_NAV_AXI_CLK>,
-               <&gcc GCC_MSS_CFG_AHB_CLK>;
-      clock-names = "gcc_mss_mfab_axis",
-                    "gcc_mss_nav_axi",
-                    "cfg_ahb";
-      #clock-cells = <1>;
-    };
-...
index c752c8985a536e9868492223746170300aa5c8ad..cdc5ded59fe5e0b18e4a80387de4e7318f3d6fc2 100644 (file)
@@ -36,6 +36,8 @@ properties:
       - samsung,exynos850-cmu-aud
       - samsung,exynos850-cmu-cmgp
       - samsung,exynos850-cmu-core
+      - samsung,exynos850-cmu-cpucl0
+      - samsung,exynos850-cmu-cpucl1
       - samsung,exynos850-cmu-dpu
       - samsung,exynos850-cmu-g3d
       - samsung,exynos850-cmu-hsi
@@ -152,6 +154,46 @@ allOf:
             - const: dout_core_mmc_embd
             - const: dout_core_sss
 
+  - if:
+      properties:
+        compatible:
+          contains:
+            const: samsung,exynos850-cmu-cpucl0
+
+    then:
+      properties:
+        clocks:
+          items:
+            - description: External reference clock (26 MHz)
+            - description: CPUCL0 switch clock (from CMU_TOP)
+            - description: CPUCL0 debug clock (from CMU_TOP)
+
+        clock-names:
+          items:
+            - const: oscclk
+            - const: dout_cpucl0_switch
+            - const: dout_cpucl0_dbg
+
+  - if:
+      properties:
+        compatible:
+          contains:
+            const: samsung,exynos850-cmu-cpucl1
+
+    then:
+      properties:
+        clocks:
+          items:
+            - description: External reference clock (26 MHz)
+            - description: CPUCL1 switch clock (from CMU_TOP)
+            - description: CPUCL1 debug clock (from CMU_TOP)
+
+        clock-names:
+          items:
+            - const: oscclk
+            - const: dout_cpucl1_switch
+            - const: dout_cpucl1_dbg
+
   - if:
       properties:
         compatible:
index dc808e2f83272a6eedd46ccec21f4c4e6e45ce96..b370a10a23a64cffda2e277e01363abca416ba3f 100644 (file)
@@ -12,7 +12,7 @@ maintainers:
 
 description: |
   FSD clock controller consist of several clock management unit
-  (CMU), which generates clocks for various inteernal SoC blocks.
+  (CMU), which generates clocks for various internal SoC blocks.
   The root clock comes from external OSC clock (24 MHz).
 
   All available clocks are defined as preprocessor macros in
index 0b7383b3106b51aa937bb25f569d78a5bea474d7..7dc0748444fde4767e2635802e0a1ff47c88f0a7 100644 (file)
@@ -12,7 +12,11 @@ maintainers:
 
 properties:
   compatible:
-    const: atmel,at91sam9g46-aes
+    oneOf:
+      - const: atmel,at91sam9g46-aes
+      - items:
+          - const: microchip,sam9x7-aes
+          - const: atmel,at91sam9g46-aes
 
   reg:
     maxItems: 1
index ee2ffb0343251f5a2ce7c4d94286ae898db43baa..d378c53314dd06d4d8f60c223452eae645f21d10 100644 (file)
@@ -12,7 +12,11 @@ maintainers:
 
 properties:
   compatible:
-    const: atmel,at91sam9g46-sha
+    oneOf:
+      - const: atmel,at91sam9g46-sha
+      - items:
+          - const: microchip,sam9x7-sha
+          - const: atmel,at91sam9g46-sha
 
   reg:
     maxItems: 1
index 3d6ed24b1b006480d9e4d86793078b9a5422eb72..6a441f79efea52be4d45bd5afd856039f01743d6 100644 (file)
@@ -12,7 +12,11 @@ maintainers:
 
 properties:
   compatible:
-    const: atmel,at91sam9g46-tdes
+    oneOf:
+      - const: atmel,at91sam9g46-tdes
+      - items:
+          - const: microchip,sam9x7-tdes
+          - const: atmel,at91sam9g46-tdes
 
   reg:
     maxItems: 1
index 09e43157cc71fe343a05020ebe9b7c2b2f0fc5d8..e91bc7dc6ad3d74efe3f1e736ff2a956314619ba 100644 (file)
@@ -14,6 +14,7 @@ properties:
     items:
       - enum:
           - qcom,sa8775p-inline-crypto-engine
+          - qcom,sc7180-inline-crypto-engine
           - qcom,sm8450-inline-crypto-engine
           - qcom,sm8550-inline-crypto-engine
           - qcom,sm8650-inline-crypto-engine
index a48bd381063aaf8475b2919ba097494456f54f67..e285e382d4ecce479554fc865545353d7d5f250e 100644 (file)
@@ -45,6 +45,7 @@ properties:
       - items:
           - enum:
               - qcom,sc7280-qce
+              - qcom,sm6350-qce
               - qcom,sm8250-qce
               - qcom,sm8350-qce
               - qcom,sm8450-qce
diff --git a/Documentation/devicetree/bindings/display/atmel/atmel,hlcdc-display-controller.yaml b/Documentation/devicetree/bindings/display/atmel/atmel,hlcdc-display-controller.yaml
new file mode 100644 (file)
index 0000000..29ed424
--- /dev/null
@@ -0,0 +1,63 @@
+# SPDX-License-Identifier: (GPL-2.0 OR BSD-2-Clause)
+%YAML 1.2
+---
+$id: http://devicetree.org/schemas/display/atmel/atmel,hlcdc-display-controller.yaml#
+$schema: http://devicetree.org/meta-schemas/core.yaml#
+
+title: Atmel's High LCD Controller (HLCDC)
+
+maintainers:
+  - Nicolas Ferre <nicolas.ferre@microchip.com>
+  - Alexandre Belloni <alexandre.belloni@bootlin.com>
+  - Claudiu Beznea <claudiu.beznea@tuxon.dev>
+
+description:
+  The LCD Controller (LCDC) consists of logic for transferring LCD image
+  data from an external display buffer to a TFT LCD panel. The LCDC has one
+  display input buffer per layer that fetches pixels through the single bus
+  host interface and a look-up table to allow palletized display
+  configurations.
+
+properties:
+  compatible:
+    const: atmel,hlcdc-display-controller
+
+  '#address-cells':
+    const: 1
+
+  '#size-cells':
+    const: 0
+
+  port@0:
+    $ref: /schemas/graph.yaml#/$defs/port-base
+    unevaluatedProperties: false
+    description:
+      Output endpoint of the controller, connecting the LCD panel signals.
+
+    properties:
+      '#address-cells':
+        const: 1
+
+      '#size-cells':
+        const: 0
+
+      reg:
+        maxItems: 1
+
+      endpoint:
+        $ref: /schemas/media/video-interfaces.yaml#
+        unevaluatedProperties: false
+        description:
+          Endpoint connecting the LCD panel signals.
+
+        properties:
+          bus-width:
+            enum: [ 12, 16, 18, 24 ]
+
+required:
+  - '#address-cells'
+  - '#size-cells'
+  - compatible
+  - port@0
+
+additionalProperties: false
diff --git a/Documentation/devicetree/bindings/display/atmel/hlcdc-dc.txt b/Documentation/devicetree/bindings/display/atmel/hlcdc-dc.txt
deleted file mode 100644 (file)
index 923aea2..0000000
+++ /dev/null
@@ -1,75 +0,0 @@
-Device-Tree bindings for Atmel's HLCDC (High LCD Controller) DRM driver
-
-The Atmel HLCDC Display Controller is subdevice of the HLCDC MFD device.
-See ../../mfd/atmel-hlcdc.txt for more details.
-
-Required properties:
- - compatible: value should be "atmel,hlcdc-display-controller"
- - pinctrl-names: the pin control state names. Should contain "default".
- - pinctrl-0: should contain the default pinctrl states.
- - #address-cells: should be set to 1.
- - #size-cells: should be set to 0.
-
-Required children nodes:
- Children nodes are encoding available output ports and their connections
- to external devices using the OF graph representation (see ../graph.txt).
- At least one port node is required.
-
-Optional properties in grandchild nodes:
- Any endpoint grandchild node may specify a desired video interface
- according to ../../media/video-interfaces.txt, specifically
- - bus-width: recognized values are <12>, <16>, <18> and <24>, and
-   override any output mode selection heuristic, forcing "rgb444",
-   "rgb565", "rgb666" and "rgb888" respectively.
-
-Example:
-
-       hlcdc: hlcdc@f0030000 {
-               compatible = "atmel,sama5d3-hlcdc";
-               reg = <0xf0030000 0x2000>;
-               interrupts = <36 IRQ_TYPE_LEVEL_HIGH 0>;
-               clocks = <&lcdc_clk>, <&lcdck>, <&clk32k>;
-               clock-names = "periph_clk","sys_clk", "slow_clk";
-
-               hlcdc-display-controller {
-                       compatible = "atmel,hlcdc-display-controller";
-                       pinctrl-names = "default";
-                       pinctrl-0 = <&pinctrl_lcd_base &pinctrl_lcd_rgb888>;
-                       #address-cells = <1>;
-                       #size-cells = <0>;
-
-                       port@0 {
-                               #address-cells = <1>;
-                               #size-cells = <0>;
-                               reg = <0>;
-
-                               hlcdc_panel_output: endpoint@0 {
-                                       reg = <0>;
-                                       remote-endpoint = <&panel_input>;
-                               };
-                       };
-               };
-
-               hlcdc_pwm: hlcdc-pwm {
-                       compatible = "atmel,hlcdc-pwm";
-                       pinctrl-names = "default";
-                       pinctrl-0 = <&pinctrl_lcd_pwm>;
-                       #pwm-cells = <3>;
-               };
-       };
-
-Example 2: With a video interface override to force rgb565; as above
-but with these changes/additions:
-
-       &hlcdc {
-               hlcdc-display-controller {
-                       pinctrl-names = "default";
-                       pinctrl-0 = <&pinctrl_lcd_base &pinctrl_lcd_rgb565>;
-
-                       port@0 {
-                               hlcdc_panel_output: endpoint@0 {
-                                       bus-width = <16>;
-                               };
-                       };
-               };
-       };
index 1c2be8d6f6334052058b203ea79ef89cbf50a049..0681fc49aa1b08b6e0ee6324aaedc67bca7090b2 100644 (file)
@@ -120,13 +120,19 @@ allOf:
           maxItems: 1
         clock-names:
           maxItems: 1
+  - if:
+      properties:
+        compatible:
+          const: fsl,imx6sx-lcdif
+    then:
+      required:
+        - power-domains
   - if:
       properties:
         compatible:
           contains:
             enum:
               - fsl,imx6sl-lcdif
-              - fsl,imx6sx-lcdif
               - fsl,imx8mm-lcdif
               - fsl,imx8mn-lcdif
               - fsl,imx8mp-lcdif
index 6ba3236839215adaaa14e74a293ef9a4500c9507..187840bb76c7a50705ca97b82e31a53f7e805816 100644 (file)
@@ -1,4 +1,4 @@
-# SPDX-License-Identifier: GPL-2.0-only or BSD-2-Clause
+# SPDX-License-Identifier: GPL-2.0-only OR BSD-2-Clause
 %YAML 1.2
 ---
 $id: http://devicetree.org/schemas/display/panel/visionox,r66451.yaml#
index ec2d7a789ffe25cff58b5778c38568f127983145..0f2501f72ccace37c5d4f046b41cab597b367699 100644 (file)
@@ -28,6 +28,9 @@ properties:
       - items:
           - const: allwinner,sun8i-r40-dma
           - const: allwinner,sun50i-a64-dma
+      - items:
+          - const: allwinner,sun50i-h616-dma
+          - const: allwinner,sun50i-a100-dma
 
   reg:
     maxItems: 1
@@ -59,10 +62,11 @@ required:
 if:
   properties:
     compatible:
-      enum:
-        - allwinner,sun20i-d1-dma
-        - allwinner,sun50i-a100-dma
-        - allwinner,sun50i-h6-dma
+      contains:
+        enum:
+          - allwinner,sun20i-d1-dma
+          - allwinner,sun50i-a100-dma
+          - allwinner,sun50i-h6-dma
 
 then:
   properties:
index 437db0c62339facea75e27a057af158c8a01f041..aa51d278cb67b41e6730a207aa0f45021e954fe9 100644 (file)
@@ -25,6 +25,7 @@ properties:
           - fsl,imx8qm-edma
           - fsl,imx93-edma3
           - fsl,imx93-edma4
+          - fsl,imx95-edma5
       - items:
           - const: fsl,ls1028a-edma
           - const: fsl,vf610-edma
@@ -83,6 +84,7 @@ allOf:
               - fsl,imx8qm-edma
               - fsl,imx93-edma3
               - fsl,imx93-edma4
+              - fsl,imx95-edma5
     then:
       properties:
         "#dma-cells":
index b95dd8db5a30a06a89745a4016eaf79448e2316b..37135fa024f9b822e3e451b0a6e9d0b5d4717bca 100644 (file)
@@ -92,7 +92,8 @@ properties:
               description: needs firmware more than ver 2
           - Shared ASRC: 23
           - SAI: 24
-          - HDMI Audio: 25
+          - Multi SAI: 25
+          - HDMI Audio: 26
 
        The third cell: transfer priority ID
          enum:
diff --git a/Documentation/devicetree/bindings/dma/marvell,mmp-dma.yaml b/Documentation/devicetree/bindings/dma/marvell,mmp-dma.yaml
new file mode 100644 (file)
index 0000000..d447d52
--- /dev/null
@@ -0,0 +1,72 @@
+# SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause)
+%YAML 1.2
+---
+$id: http://devicetree.org/schemas/dma/marvell,mmp-dma.yaml#
+$schema: http://devicetree.org/meta-schemas/core.yaml#
+
+title: Marvell MMP DMA controller
+
+maintainers:
+  - Duje Mihanović <duje.mihanovic@skole.hr>
+
+description:
+  Marvell MMP SoCs may have two types of DMA controllers, peripheral and audio.
+
+properties:
+  compatible:
+    enum:
+      - marvell,pdma-1.0
+      - marvell,adma-1.0
+      - marvell,pxa910-squ
+
+  reg:
+    maxItems: 1
+
+  interrupts:
+    description:
+      Interrupt lines for the controller, may be shared or one per DMA channel
+    minItems: 1
+
+  asram:
+    description:
+      A phandle to the SRAM pool
+    $ref: /schemas/types.yaml#/definitions/phandle
+
+  '#dma-channels':
+    deprecated: true
+
+  '#dma-requests':
+    deprecated: true
+
+required:
+  - compatible
+  - reg
+  - interrupts
+  - '#dma-cells'
+
+allOf:
+  - $ref: dma-controller.yaml#
+  - if:
+      properties:
+        compatible:
+          contains:
+            enum:
+              - marvell,pdma-1.0
+    then:
+      properties:
+        asram: false
+    else:
+      required:
+        - asram
+
+unevaluatedProperties: false
+
+examples:
+  - |
+    dma-controller@d4000000 {
+        compatible = "marvell,pdma-1.0";
+        reg = <0xd4000000 0x10000>;
+        interrupts = <47>;
+        #dma-cells = <2>;
+        dma-channels = <16>;
+    };
diff --git a/Documentation/devicetree/bindings/dma/mediatek,mt7622-hsdma.yaml b/Documentation/devicetree/bindings/dma/mediatek,mt7622-hsdma.yaml
new file mode 100644 (file)
index 0000000..3f1e120
--- /dev/null
@@ -0,0 +1,63 @@
+# SPDX-License-Identifier: GPL-2.0-only OR BSD-2-Clause
+%YAML 1.2
+---
+$id: http://devicetree.org/schemas/dma/mediatek,mt7622-hsdma.yaml#
+$schema: http://devicetree.org/meta-schemas/core.yaml#
+
+title: MediaTek High-Speed DMA Controller
+
+maintainers:
+  - Sean Wang <sean.wang@mediatek.com>
+
+allOf:
+  - $ref: dma-controller.yaml#
+
+properties:
+  compatible:
+    enum:
+      - mediatek,mt7622-hsdma
+      - mediatek,mt7623-hsdma
+
+  reg:
+    maxItems: 1
+
+  interrupts:
+    maxItems: 1
+
+  clocks:
+    maxItems: 1
+
+  clock-names:
+    const: hsdma
+
+  power-domains:
+    maxItems: 1
+
+  "#dma-cells":
+    description: Channel number
+    const: 1
+
+required:
+  - reg
+  - interrupts
+  - clocks
+  - clock-names
+  - power-domains
+
+unevaluatedProperties: false
+
+examples:
+  - |
+    #include <dt-bindings/clock/mt2701-clk.h>
+    #include <dt-bindings/interrupt-controller/arm-gic.h>
+    #include <dt-bindings/power/mt2701-power.h>
+
+    dma-controller@1b007000 {
+        compatible = "mediatek,mt7623-hsdma";
+        reg = <0x1b007000 0x1000>;
+        interrupts = <GIC_SPI 98 IRQ_TYPE_LEVEL_LOW>;
+        clocks = <&ethsys CLK_ETHSYS_HSDMA>;
+        clock-names = "hsdma";
+        power-domains = <&scpsys MT2701_POWER_DOMAIN_ETH>;
+        #dma-cells = <1>;
+    };
diff --git a/Documentation/devicetree/bindings/dma/mmp-dma.txt b/Documentation/devicetree/bindings/dma/mmp-dma.txt
deleted file mode 100644 (file)
index ec18bf0..0000000
+++ /dev/null
@@ -1,81 +0,0 @@
-* MARVELL MMP DMA controller
-
-Marvell Peripheral DMA Controller
-Used platforms: pxa688, pxa910, pxa3xx, etc
-
-Required properties:
-- compatible: Should be "marvell,pdma-1.0"
-- reg: Should contain DMA registers location and length.
-- interrupts: Either contain all of the per-channel DMA interrupts
-               or one irq for pdma device
-
-Optional properties:
-- dma-channels: Number of DMA channels supported by the controller (defaults
-  to 32 when not specified)
-- #dma-channels: deprecated
-- dma-requests: Number of DMA requestor lines supported by the controller
-  (defaults to 32 when not specified)
-- #dma-requests: deprecated
-
-"marvell,pdma-1.0"
-Used platforms: pxa25x, pxa27x, pxa3xx, pxa93x, pxa168, pxa910, pxa688.
-
-Examples:
-
-/*
- * Each channel has specific irq
- * ICU parse out irq channel from ICU register,
- * while DMA controller may not able to distinguish the irq channel
- * Using this method, interrupt-parent is required as demuxer
- * For example, pxa688 icu register 0x128, bit 0~15 is PDMA channel irq,
- * 18~21 is ADMA irq
- */
-pdma: dma-controller@d4000000 {
-             compatible = "marvell,pdma-1.0";
-             reg = <0xd4000000 0x10000>;
-             interrupts = <0 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15>;
-             interrupt-parent = <&intcmux32>;
-             dma-channels = <16>;
-      };
-
-/*
- * One irq for all channels
- * Dmaengine driver (DMA controller) distinguish irq channel via
- * parsing internal register
- */
-pdma: dma-controller@d4000000 {
-             compatible = "marvell,pdma-1.0";
-             reg = <0xd4000000 0x10000>;
-             interrupts = <47>;
-             dma-channels = <16>;
-      };
-
-
-Marvell Two Channel DMA Controller used specifically for audio
-Used platforms: pxa688, pxa910
-
-Required properties:
-- compatible: Should be "marvell,adma-1.0" or "marvell,pxa910-squ"
-- reg: Should contain DMA registers location and length.
-- interrupts: Either contain all of the per-channel DMA interrupts
-               or one irq for dma device
-
-"marvell,adma-1.0" used on pxa688
-"marvell,pxa910-squ" used on pxa910
-
-Examples:
-
-/* each channel has specific irq */
-adma0: dma-controller@d42a0800 {
-             compatible = "marvell,adma-1.0";
-             reg = <0xd42a0800 0x100>;
-             interrupts = <18 19>;
-             interrupt-parent = <&intcmux32>;
-      };
-
-/* One irq for all channels */
-squ: dma-controller@d42a0800 {
-             compatible = "marvell,pxa910-squ";
-             reg = <0xd42a0800 0x100>;
-             interrupts = <46>;
-      };
diff --git a/Documentation/devicetree/bindings/dma/mtk-hsdma.txt b/Documentation/devicetree/bindings/dma/mtk-hsdma.txt
deleted file mode 100644 (file)
index 4bb3173..0000000
+++ /dev/null
@@ -1,33 +0,0 @@
-MediaTek High-Speed DMA Controller
-==================================
-
-This device follows the generic DMA bindings defined in dma/dma.txt.
-
-Required properties:
-
-- compatible:  Must be one of
-                 "mediatek,mt7622-hsdma": for MT7622 SoC
-                 "mediatek,mt7623-hsdma": for MT7623 SoC
-- reg:         Should contain the register's base address and length.
-- interrupts:  Should contain a reference to the interrupt used by this
-               device.
-- clocks:      Should be the clock specifiers corresponding to the entry in
-               clock-names property.
-- clock-names: Should contain "hsdma" entries.
-- power-domains: Phandle to the power domain that the device is part of
-- #dma-cells:  The length of the DMA specifier, must be <1>. This one cell
-               in dmas property of a client device represents the channel
-               number.
-Example:
-
-        hsdma: dma-controller@1b007000 {
-               compatible = "mediatek,mt7623-hsdma";
-               reg = <0 0x1b007000 0 0x1000>;
-               interrupts = <GIC_SPI 98 IRQ_TYPE_LEVEL_LOW>;
-               clocks = <&ethsys CLK_ETHSYS_HSDMA>;
-               clock-names = "hsdma";
-               power-domains = <&scpsys MT2701_POWER_DOMAIN_ETH>;
-               #dma-cells = <1>;
-       };
-
-DMA clients must use the format described in dma/dma.txt file.
index 03aa067b1229f676cca5c54519ec5224413f6de0..04fc4a99a7cb539a9c8d29f10a38cc7a65066bc8 100644 (file)
@@ -46,6 +46,7 @@ properties:
               - renesas,dmac-r8a779a0     # R-Car V3U
               - renesas,dmac-r8a779f0     # R-Car S4-8
               - renesas,dmac-r8a779g0     # R-Car V4H
+              - renesas,dmac-r8a779h0     # R-Car V4M
           - const: renesas,rcar-gen4-dmac # R-Car Gen4
 
   reg: true
diff --git a/Documentation/devicetree/bindings/fpga/fpga-region.txt b/Documentation/devicetree/bindings/fpga/fpga-region.txt
deleted file mode 100644 (file)
index 528df8a..0000000
+++ /dev/null
@@ -1,479 +0,0 @@
-FPGA Region Device Tree Binding
-
-Alan Tull 2016
-
- CONTENTS
- - Introduction
- - Terminology
- - Sequence
- - FPGA Region
- - Supported Use Models
- - Device Tree Examples
- - Constraints
-
-
-Introduction
-============
-
-FPGA Regions represent FPGA's and partial reconfiguration regions of FPGA's in
-the Device Tree.  FPGA Regions provide a way to program FPGAs under device tree
-control.
-
-This device tree binding document hits some of the high points of FPGA usage and
-attempts to include terminology used by both major FPGA manufacturers.  This
-document isn't a replacement for any manufacturers specifications for FPGA
-usage.
-
-
-Terminology
-===========
-
-Full Reconfiguration
- * The entire FPGA is programmed.
-
-Partial Reconfiguration (PR)
- * A section of an FPGA is reprogrammed while the rest of the FPGA is not
-   affected.
- * Not all FPGA's support PR.
-
-Partial Reconfiguration Region (PRR)
- * Also called a "reconfigurable partition"
- * A PRR is a specific section of an FPGA reserved for reconfiguration.
- * A base (or static) FPGA image may create a set of PRR's that later may
-   be independently reprogrammed many times.
- * The size and specific location of each PRR is fixed.
- * The connections at the edge of each PRR are fixed.  The image that is loaded
-   into a PRR must fit and must use a subset of the region's connections.
- * The busses within the FPGA are split such that each region gets its own
-   branch that may be gated independently.
-
-Persona
- * Also called a "partial bit stream"
- * An FPGA image that is designed to be loaded into a PRR.  There may be
-   any number of personas designed to fit into a PRR, but only one at at time
-   may be loaded.
- * A persona may create more regions.
-
-FPGA Bridge
- * FPGA Bridges gate bus signals between a host and FPGA.
- * FPGA Bridges should be disabled while the FPGA is being programmed to
-   prevent spurious signals on the cpu bus and to the soft logic.
- * FPGA bridges may be actual hardware or soft logic on an FPGA.
- * During Full Reconfiguration, hardware bridges between the host and FPGA
-   will be disabled.
- * During Partial Reconfiguration of a specific region, that region's bridge
-   will be used to gate the busses.  Traffic to other regions is not affected.
- * In some implementations, the FPGA Manager transparently handles gating the
-   buses, eliminating the need to show the hardware FPGA bridges in the
-   device tree.
- * An FPGA image may create a set of reprogrammable regions, each having its
-   own bridge and its own split of the busses in the FPGA.
-
-FPGA Manager
- * An FPGA Manager is a hardware block that programs an FPGA under the control
-   of a host processor.
-
-Base Image
- * Also called the "static image"
- * An FPGA image that is designed to do full reconfiguration of the FPGA.
- * A base image may set up a set of partial reconfiguration regions that may
-   later be reprogrammed.
-
-    ----------------       ----------------------------------
-    |  Host CPU    |       |             FPGA               |
-    |              |       |                                |
-    |          ----|       |       -----------    --------  |
-    |          | H |       |   |==>| Bridge0 |<==>| PRR0 |  |
-    |          | W |       |   |   -----------    --------  |
-    |          |   |       |   |                            |
-    |          | B |<=====>|<==|   -----------    --------  |
-    |          | R |       |   |==>| Bridge1 |<==>| PRR1 |  |
-    |          | I |       |   |   -----------    --------  |
-    |          | D |       |   |                            |
-    |          | G |       |   |   -----------    --------  |
-    |          | E |       |   |==>| Bridge2 |<==>| PRR2 |  |
-    |          ----|       |       -----------    --------  |
-    |              |       |                                |
-    ----------------       ----------------------------------
-
-Figure 1: An FPGA set up with a base image that created three regions.  Each
-region (PRR0-2) gets its own split of the busses that is independently gated by
-a soft logic bridge (Bridge0-2) in the FPGA.  The contents of each PRR can be
-reprogrammed independently while the rest of the system continues to function.
-
-
-Sequence
-========
-
-When a DT overlay that targets an FPGA Region is applied, the FPGA Region will
-do the following:
-
- 1. Disable appropriate FPGA bridges.
- 2. Program the FPGA using the FPGA manager.
- 3. Enable the FPGA bridges.
- 4. The Device Tree overlay is accepted into the live tree.
- 5. Child devices are populated.
-
-When the overlay is removed, the child nodes will be removed and the FPGA Region
-will disable the bridges.
-
-
-FPGA Region
-===========
-
-FPGA Regions represent FPGA's and FPGA PR regions in the device tree.  An FPGA
-Region brings together the elements needed to program on a running system and
-add the child devices:
-
- * FPGA Manager
- * FPGA Bridges
- * image-specific information needed to to the programming.
- * child nodes
-
-The intended use is that a Device Tree overlay (DTO) can be used to reprogram an
-FPGA while an operating system is running.
-
-An FPGA Region that exists in the live Device Tree reflects the current state.
-If the live tree shows a "firmware-name" property or child nodes under an FPGA
-Region, the FPGA already has been programmed.  A DTO that targets an FPGA Region
-and adds the "firmware-name" property is taken as a request to reprogram the
-FPGA.  After reprogramming is successful, the overlay is accepted into the live
-tree.
-
-The base FPGA Region in the device tree represents the FPGA and supports full
-reconfiguration.  It must include a phandle to an FPGA Manager.  The base
-FPGA region will be the child of one of the hardware bridges (the bridge that
-allows register access) between the cpu and the FPGA.  If there are more than
-one bridge to control during FPGA programming, the region will also contain a
-list of phandles to the additional hardware FPGA Bridges.
-
-For partial reconfiguration (PR), each PR region will have an FPGA Region.
-These FPGA regions are children of FPGA bridges which are then children of the
-base FPGA region.  The "Full Reconfiguration to add PRR's" example below shows
-this.
-
-If an FPGA Region does not specify an FPGA Manager, it will inherit the FPGA
-Manager specified by its ancestor FPGA Region.  This supports both the case
-where the same FPGA Manager is used for all of an FPGA as well the case where
-a different FPGA Manager is used for each region.
-
-FPGA Regions do not inherit their ancestor FPGA regions' bridges.  This prevents
-shutting down bridges that are upstream from the other active regions while one
-region is getting reconfigured (see Figure 1 above).  During PR, the FPGA's
-hardware bridges remain enabled.  The PR regions' bridges will be FPGA bridges
-within the static image of the FPGA.
-
-Required properties:
-- compatible : should contain "fpga-region"
-- fpga-mgr : should contain a phandle to an FPGA Manager.  Child FPGA Regions
-       inherit this property from their ancestor regions.  An fpga-mgr property
-       in a region will override any inherited FPGA manager.
-- #address-cells, #size-cells, ranges : must be present to handle address space
-       mapping for child nodes.
-
-Optional properties:
-- firmware-name : should contain the name of an FPGA image file located on the
-       firmware search path.  If this property shows up in a live device tree
-       it indicates that the FPGA has already been programmed with this image.
-       If this property is in an overlay targeting an FPGA region, it is a
-       request to program the FPGA with that image.
-- fpga-bridges : should contain a list of phandles to FPGA Bridges that must be
-       controlled during FPGA programming along with the parent FPGA bridge.
-       This property is optional if the FPGA Manager handles the bridges.
-        If the fpga-region is  the child of an fpga-bridge, the list should not
-        contain the parent bridge.
-- partial-fpga-config : boolean, set if partial reconfiguration is to be done,
-       otherwise full reconfiguration is done.
-- external-fpga-config : boolean, set if the FPGA has already been configured
-       prior to OS boot up.
-- encrypted-fpga-config : boolean, set if the bitstream is encrypted
-- region-unfreeze-timeout-us : The maximum time in microseconds to wait for
-       bridges to successfully become enabled after the region has been
-       programmed.
-- region-freeze-timeout-us : The maximum time in microseconds to wait for
-       bridges to successfully become disabled before the region has been
-       programmed.
-- config-complete-timeout-us : The maximum time in microseconds time for the
-       FPGA to go to operating mode after the region has been programmed.
-- child nodes : devices in the FPGA after programming.
-
-In the example below, when an overlay is applied targeting fpga-region0,
-fpga_mgr is used to program the FPGA.  Two bridges are controlled during
-programming: the parent fpga_bridge0 and fpga_bridge1.  Because the region is
-the child of fpga_bridge0, only fpga_bridge1 needs to be specified in the
-fpga-bridges property.  During programming, these bridges are disabled, the
-firmware specified in the overlay is loaded to the FPGA using the FPGA manager
-specified in the region.  If FPGA programming succeeds, the bridges are
-reenabled and the overlay makes it into the live device tree.  The child devices
-are then populated.  If FPGA programming fails, the bridges are left disabled
-and the overlay is rejected.  The overlay's ranges property maps the lwhps
-bridge's region (0xff200000) and the hps bridge's region (0xc0000000) for use by
-the two child devices.
-
-Example:
-Base tree contains:
-
-       fpga_mgr: fpga-mgr@ff706000 {
-               compatible = "altr,socfpga-fpga-mgr";
-               reg = <0xff706000 0x1000
-                      0xffb90000 0x20>;
-               interrupts = <0 175 4>;
-       };
-
-       fpga_bridge0: fpga-bridge@ff400000 {
-               compatible = "altr,socfpga-lwhps2fpga-bridge";
-               reg = <0xff400000 0x100000>;
-               resets = <&rst LWHPS2FPGA_RESET>;
-               clocks = <&l4_main_clk>;
-
-               #address-cells = <1>;
-               #size-cells = <1>;
-               ranges;
-
-               fpga_region0: fpga-region0 {
-                       compatible = "fpga-region";
-                       fpga-mgr = <&fpga_mgr>;
-               };
-       };
-
-       fpga_bridge1: fpga-bridge@ff500000 {
-               compatible = "altr,socfpga-hps2fpga-bridge";
-               reg = <0xff500000 0x10000>;
-               resets = <&rst HPS2FPGA_RESET>;
-               clocks = <&l4_main_clk>;
-       };
-
-Overlay contains:
-
-/dts-v1/;
-/plugin/;
-
-&fpga_region0 {
-       #address-cells = <1>;
-       #size-cells = <1>;
-
-       firmware-name = "soc_system.rbf";
-       fpga-bridges = <&fpga_bridge1>;
-       ranges = <0x20000 0xff200000 0x100000>,
-                <0x0 0xc0000000 0x20000000>;
-
-       gpio@10040 {
-               compatible = "altr,pio-1.0";
-               reg = <0x10040 0x20>;
-               altr,ngpio = <4>;
-               #gpio-cells = <2>;
-               clocks = <2>;
-               gpio-controller;
-       };
-
-       onchip-memory {
-               device_type = "memory";
-               compatible = "altr,onchipmem-15.1";
-               reg = <0x0 0x10000>;
-       };
-};
-
-
-Supported Use Models
-====================
-
-In all cases the live DT must have the FPGA Manager, FPGA Bridges (if any), and
-a FPGA Region.  The target of the Device Tree Overlay is the FPGA Region.  Some
-uses are specific to an FPGA device.
-
- * No FPGA Bridges
-   In this case, the FPGA Manager which programs the FPGA also handles the
-   bridges behind the scenes.  No FPGA Bridge devices are needed for full
-   reconfiguration.
-
- * Full reconfiguration with hardware bridges
-   In this case, there are hardware bridges between the processor and FPGA that
-   need to be controlled during full reconfiguration.  Before the overlay is
-   applied, the live DT must include the FPGA Manager, FPGA Bridges, and a
-   FPGA Region.  The FPGA Region is the child of the bridge that allows
-   register access to the FPGA.  Additional bridges may be listed in a
-   fpga-bridges property in the FPGA region or in the device tree overlay.
-
- * Partial reconfiguration with bridges in the FPGA
-   In this case, the FPGA will have one or more PRR's that may be programmed
-   separately while the rest of the FPGA can remain active.  To manage this,
-   bridges need to exist in the FPGA that can gate the buses going to each FPGA
-   region while the buses are enabled for other sections.  Before any partial
-   reconfiguration can be done, a base FPGA image must be loaded which includes
-   PRR's with FPGA bridges.  The device tree should have an FPGA region for each
-   PRR.
-
-Device Tree Examples
-====================
-
-The intention of this section is to give some simple examples, focusing on
-the placement of the elements detailed above, especially:
- * FPGA Manager
- * FPGA Bridges
- * FPGA Region
- * ranges
- * target-path or target
-
-For the purposes of this section, I'm dividing the Device Tree into two parts,
-each with its own requirements.  The two parts are:
- * The live DT prior to the overlay being added
- * The DT overlay
-
-The live Device Tree must contain an FPGA Region, an FPGA Manager, and any FPGA
-Bridges.  The FPGA Region's "fpga-mgr" property specifies the manager by phandle
-to handle programming the FPGA.  If the FPGA Region is the child of another FPGA
-Region, the parent's FPGA Manager is used.  If FPGA Bridges need to be involved,
-they are specified in the FPGA Region by the "fpga-bridges" property.  During
-FPGA programming, the FPGA Region will disable the bridges that are in its
-"fpga-bridges" list and will re-enable them after FPGA programming has
-succeeded.
-
-The Device Tree Overlay will contain:
- * "target-path" or "target"
-   The insertion point where the contents of the overlay will go into the
-   live tree.  target-path is a full path, while target is a phandle.
- * "ranges"
-    The address space mapping from processor to FPGA bus(ses).
- * "firmware-name"
-   Specifies the name of the FPGA image file on the firmware search
-   path.  The search path is described in the firmware class documentation.
- * "partial-fpga-config"
-   This binding is a boolean and should be present if partial reconfiguration
-   is to be done.
- * child nodes corresponding to hardware that will be loaded in this region of
-   the FPGA.
-
-Device Tree Example: Full Reconfiguration without Bridges
-=========================================================
-
-Live Device Tree contains:
-       fpga_mgr0: fpga-mgr@f8007000 {
-               compatible = "xlnx,zynq-devcfg-1.0";
-               reg = <0xf8007000 0x100>;
-               interrupt-parent = <&intc>;
-               interrupts = <0 8 4>;
-               clocks = <&clkc 12>;
-               clock-names = "ref_clk";
-               syscon = <&slcr>;
-       };
-
-       fpga_region0: fpga-region0 {
-               compatible = "fpga-region";
-               fpga-mgr = <&fpga_mgr0>;
-               #address-cells = <0x1>;
-               #size-cells = <0x1>;
-               ranges;
-       };
-
-DT Overlay contains:
-
-/dts-v1/;
-/plugin/;
-
-&fpga_region0 {
-       #address-cells = <1>;
-       #size-cells = <1>;
-
-       firmware-name = "zynq-gpio.bin";
-
-       gpio1: gpio@40000000 {
-               compatible = "xlnx,xps-gpio-1.00.a";
-               reg = <0x40000000 0x10000>;
-               gpio-controller;
-               #gpio-cells = <0x2>;
-               xlnx,gpio-width= <0x6>;
-       };
-};
-
-Device Tree Example: Full Reconfiguration to add PRR's
-======================================================
-
-The base FPGA Region is specified similar to the first example above.
-
-This example programs the FPGA to have two regions that can later be partially
-configured.  Each region has its own bridge in the FPGA fabric.
-
-DT Overlay contains:
-
-/dts-v1/;
-/plugin/;
-
-&fpga_region0 {
-       #address-cells = <1>;
-       #size-cells = <1>;
-
-       firmware-name = "base.rbf";
-
-       fpga-bridge@4400 {
-               compatible = "altr,freeze-bridge-controller";
-               reg = <0x4400 0x10>;
-
-               fpga_region1: fpga-region1 {
-                       compatible = "fpga-region";
-                       #address-cells = <0x1>;
-                       #size-cells = <0x1>;
-                       ranges;
-               };
-       };
-
-       fpga-bridge@4420 {
-               compatible = "altr,freeze-bridge-controller";
-               reg = <0x4420 0x10>;
-
-               fpga_region2: fpga-region2 {
-                       compatible = "fpga-region";
-                       #address-cells = <0x1>;
-                       #size-cells = <0x1>;
-                       ranges;
-               };
-       };
-};
-
-Device Tree Example: Partial Reconfiguration
-============================================
-
-This example reprograms one of the PRR's set up in the previous example.
-
-The sequence that occurs when this overlay is similar to the above, the only
-differences are that the FPGA is partially reconfigured due to the
-"partial-fpga-config" boolean and the only bridge that is controlled during
-programming is the FPGA based bridge of fpga_region1.
-
-/dts-v1/;
-/plugin/;
-
-&fpga_region1 {
-       #address-cells = <1>;
-       #size-cells = <1>;
-
-       firmware-name = "soc_image2.rbf";
-       partial-fpga-config;
-
-       gpio@10040 {
-               compatible = "altr,pio-1.0";
-               reg = <0x10040 0x20>;
-               clocks = <0x2>;
-               altr,ngpio = <0x4>;
-               #gpio-cells = <0x2>;
-               gpio-controller;
-       };
-};
-
-Constraints
-===========
-
-It is beyond the scope of this document to fully describe all the FPGA design
-constraints required to make partial reconfiguration work[1] [2] [3], but a few
-deserve quick mention.
-
-A persona must have boundary connections that line up with those of the partition
-or region it is designed to go into.
-
-During programming, transactions through those connections must be stopped and
-the connections must be held at a fixed logic level.  This can be achieved by
-FPGA Bridges that exist on the FPGA fabric prior to the partial reconfiguration.
-
---
-[1] www.altera.com/content/dam/altera-www/global/en_US/pdfs/literature/ug/ug_partrecon.pdf
-[2] tspace.library.utoronto.ca/bitstream/1807/67932/1/Byma_Stuart_A_201411_MAS_thesis.pdf
-[3] https://www.xilinx.com/support/documentation/sw_manuals/xilinx14_1/ug702.pdf
diff --git a/Documentation/devicetree/bindings/fpga/fpga-region.yaml b/Documentation/devicetree/bindings/fpga/fpga-region.yaml
new file mode 100644 (file)
index 0000000..7755488
--- /dev/null
@@ -0,0 +1,358 @@
+# SPDX-License-Identifier: GPL-2.0
+%YAML 1.2
+---
+$id: http://devicetree.org/schemas/fpga/fpga-region.yaml#
+$schema: http://devicetree.org/meta-schemas/core.yaml#
+
+title: FPGA Region
+
+maintainers:
+  - Michal Simek <michal.simek@amd.com>
+
+description: |
+  CONTENTS
+   - Introduction
+   - Terminology
+   - Sequence
+   - FPGA Region
+   - Supported Use Models
+   - Constraints
+
+
+  Introduction
+  ============
+
+  FPGA Regions represent FPGA's and partial reconfiguration regions of FPGA's in
+  the Device Tree.  FPGA Regions provide a way to program FPGAs under device tree
+  control.
+
+  The documentation hits some of the high points of FPGA usage and
+  attempts to include terminology used by both major FPGA manufacturers.  This
+  document isn't a replacement for any manufacturers specifications for FPGA
+  usage.
+
+
+  Terminology
+  ===========
+
+  Full Reconfiguration
+   * The entire FPGA is programmed.
+
+  Partial Reconfiguration (PR)
+   * A section of an FPGA is reprogrammed while the rest of the FPGA is not
+     affected.
+   * Not all FPGA's support PR.
+
+  Partial Reconfiguration Region (PRR)
+   * Also called a "reconfigurable partition"
+   * A PRR is a specific section of an FPGA reserved for reconfiguration.
+   * A base (or static) FPGA image may create a set of PRR's that later may
+     be independently reprogrammed many times.
+   * The size and specific location of each PRR is fixed.
+   * The connections at the edge of each PRR are fixed.  The image that is loaded
+     into a PRR must fit and must use a subset of the region's connections.
+   * The busses within the FPGA are split such that each region gets its own
+     branch that may be gated independently.
+
+  Persona
+   * Also called a "partial bit stream"
+   * An FPGA image that is designed to be loaded into a PRR.  There may be
+     any number of personas designed to fit into a PRR, but only one at a time
+     may be loaded.
+   * A persona may create more regions.
+
+  FPGA Bridge
+   * FPGA Bridges gate bus signals between a host and FPGA.
+   * FPGA Bridges should be disabled while the FPGA is being programmed to
+     prevent spurious signals on the cpu bus and to the soft logic.
+   * FPGA bridges may be actual hardware or soft logic on an FPGA.
+   * During Full Reconfiguration, hardware bridges between the host and FPGA
+     will be disabled.
+   * During Partial Reconfiguration of a specific region, that region's bridge
+     will be used to gate the busses.  Traffic to other regions is not affected.
+   * In some implementations, the FPGA Manager transparently handles gating the
+     buses, eliminating the need to show the hardware FPGA bridges in the
+     device tree.
+   * An FPGA image may create a set of reprogrammable regions, each having its
+     own bridge and its own split of the busses in the FPGA.
+
+  FPGA Manager
+   * An FPGA Manager is a hardware block that programs an FPGA under the control
+     of a host processor.
+
+  Base Image
+   * Also called the "static image"
+   * An FPGA image that is designed to do full reconfiguration of the FPGA.
+   * A base image may set up a set of partial reconfiguration regions that may
+     later be reprogrammed.
+
+      ----------------       ----------------------------------
+      |  Host CPU    |       |             FPGA               |
+      |              |       |                                |
+      |          ----|       |       -----------    --------  |
+      |          | H |       |   |==>| Bridge0 |<==>| PRR0 |  |
+      |          | W |       |   |   -----------    --------  |
+      |          |   |       |   |                            |
+      |          | B |<=====>|<==|   -----------    --------  |
+      |          | R |       |   |==>| Bridge1 |<==>| PRR1 |  |
+      |          | I |       |   |   -----------    --------  |
+      |          | D |       |   |                            |
+      |          | G |       |   |   -----------    --------  |
+      |          | E |       |   |==>| Bridge2 |<==>| PRR2 |  |
+      |          ----|       |       -----------    --------  |
+      |              |       |                                |
+      ----------------       ----------------------------------
+
+  Figure 1: An FPGA set up with a base image that created three regions.  Each
+  region (PRR0-2) gets its own split of the busses that is independently gated by
+  a soft logic bridge (Bridge0-2) in the FPGA.  The contents of each PRR can be
+  reprogrammed independently while the rest of the system continues to function.
+
+
+  Sequence
+  ========
+
+  When a DT overlay that targets an FPGA Region is applied, the FPGA Region will
+  do the following:
+
+   1. Disable appropriate FPGA bridges.
+   2. Program the FPGA using the FPGA manager.
+   3. Enable the FPGA bridges.
+   4. The Device Tree overlay is accepted into the live tree.
+   5. Child devices are populated.
+
+  When the overlay is removed, the child nodes will be removed and the FPGA Region
+  will disable the bridges.
+
+
+  FPGA Region
+  ===========
+
+  FPGA Regions represent FPGA's and FPGA PR regions in the device tree.  An FPGA
+  Region brings together the elements needed to program on a running system and
+  add the child devices:
+
+   * FPGA Manager
+   * FPGA Bridges
+   * image-specific information needed to the programming.
+   * child nodes
+
+  The intended use is that a Device Tree overlay (DTO) can be used to reprogram an
+  FPGA while an operating system is running.
+
+  An FPGA Region that exists in the live Device Tree reflects the current state.
+  If the live tree shows a "firmware-name" property or child nodes under an FPGA
+  Region, the FPGA already has been programmed.  A DTO that targets an FPGA Region
+  and adds the "firmware-name" property is taken as a request to reprogram the
+  FPGA.  After reprogramming is successful, the overlay is accepted into the live
+  tree.
+
+  The base FPGA Region in the device tree represents the FPGA and supports full
+  reconfiguration.  It must include a phandle to an FPGA Manager.  The base
+  FPGA region will be the child of one of the hardware bridges (the bridge that
+  allows register access) between the cpu and the FPGA.  If there are more than
+  one bridge to control during FPGA programming, the region will also contain a
+  list of phandles to the additional hardware FPGA Bridges.
+
+  For partial reconfiguration (PR), each PR region will have an FPGA Region.
+  These FPGA regions are children of FPGA bridges which are then children of the
+  base FPGA region.  The "Full Reconfiguration to add PRR's" example below shows
+  this.
+
+  If an FPGA Region does not specify an FPGA Manager, it will inherit the FPGA
+  Manager specified by its ancestor FPGA Region.  This supports both the case
+  where the same FPGA Manager is used for all of an FPGA as well the case where
+  a different FPGA Manager is used for each region.
+
+  FPGA Regions do not inherit their ancestor FPGA regions' bridges.  This prevents
+  shutting down bridges that are upstream from the other active regions while one
+  region is getting reconfigured (see Figure 1 above).  During PR, the FPGA's
+  hardware bridges remain enabled.  The PR regions' bridges will be FPGA bridges
+  within the static image of the FPGA.
+
+
+  Supported Use Models
+  ====================
+
+  In all cases the live DT must have the FPGA Manager, FPGA Bridges (if any), and
+  a FPGA Region.  The target of the Device Tree Overlay is the FPGA Region.  Some
+  uses are specific to an FPGA device.
+
+   * No FPGA Bridges
+     In this case, the FPGA Manager which programs the FPGA also handles the
+     bridges behind the scenes.  No FPGA Bridge devices are needed for full
+     reconfiguration.
+
+   * Full reconfiguration with hardware bridges
+     In this case, there are hardware bridges between the processor and FPGA that
+     need to be controlled during full reconfiguration.  Before the overlay is
+     applied, the live DT must include the FPGA Manager, FPGA Bridges, and a
+     FPGA Region.  The FPGA Region is the child of the bridge that allows
+     register access to the FPGA.  Additional bridges may be listed in a
+     fpga-bridges property in the FPGA region or in the device tree overlay.
+
+   * Partial reconfiguration with bridges in the FPGA
+     In this case, the FPGA will have one or more PRR's that may be programmed
+     separately while the rest of the FPGA can remain active.  To manage this,
+     bridges need to exist in the FPGA that can gate the buses going to each FPGA
+     region while the buses are enabled for other sections.  Before any partial
+     reconfiguration can be done, a base FPGA image must be loaded which includes
+     PRR's with FPGA bridges.  The device tree should have an FPGA region for each
+     PRR.
+
+  Constraints
+  ===========
+
+  It is beyond the scope of this document to fully describe all the FPGA design
+  constraints required to make partial reconfiguration work[1] [2] [3], but a few
+  deserve quick mention.
+
+  A persona must have boundary connections that line up with those of the partition
+  or region it is designed to go into.
+
+  During programming, transactions through those connections must be stopped and
+  the connections must be held at a fixed logic level.  This can be achieved by
+  FPGA Bridges that exist on the FPGA fabric prior to the partial reconfiguration.
+
+  --
+  [1] www.altera.com/content/dam/altera-www/global/en_US/pdfs/literature/ug/ug_partrecon.pdf
+  [2] tspace.library.utoronto.ca/bitstream/1807/67932/1/Byma_Stuart_A_201411_MAS_thesis.pdf
+  [3] https://www.xilinx.com/support/documentation/sw_manuals/xilinx14_1/ug702.pdf
+
+properties:
+  $nodename:
+    pattern: "^fpga-region(@.*|-([0-9]|[1-9][0-9]+))?$"
+
+  compatible:
+    const: fpga-region
+
+  reg:
+    maxItems: 1
+
+  ranges: true
+  "#address-cells": true
+  "#size-cells": true
+
+  config-complete-timeout-us:
+    description:
+      The maximum time in microseconds time for the FPGA to go to operating
+      mode after the region has been programmed.
+
+  encrypted-fpga-config:
+    type: boolean
+    description:
+      Set if the bitstream is encrypted.
+
+  external-fpga-config:
+    type: boolean
+    description:
+      Set if the FPGA has already been configured prior to OS boot up.
+
+  firmware-name:
+    maxItems: 1
+    description:
+      Should contain the name of an FPGA image file located on the firmware
+      search path. If this property shows up in a live device tree it indicates
+      that the FPGA has already been programmed with this image.
+      If this property is in an overlay targeting an FPGA region, it is
+      a request to program the FPGA with that image.
+
+  fpga-bridges:
+    $ref: /schemas/types.yaml#/definitions/phandle-array
+    description:
+      Should contain a list of phandles to FPGA Bridges that must be
+      controlled during FPGA programming along with the parent FPGA bridge.
+      This property is optional if the FPGA Manager handles the bridges.
+      If the fpga-region is  the child of an fpga-bridge, the list should not
+      contain the parent bridge.
+
+  fpga-mgr:
+    $ref: /schemas/types.yaml#/definitions/phandle
+    description:
+      Should contain a phandle to an FPGA Manager.  Child FPGA Regions
+      inherit this property from their ancestor regions.  An fpga-mgr property
+      in a region will override any inherited FPGA manager.
+
+  partial-fpga-config:
+    type: boolean
+    description:
+      Set if partial reconfiguration is to be done, otherwise full
+      reconfiguration is done.
+
+  region-freeze-timeout-us:
+    description:
+      The maximum time in microseconds to wait for bridges to successfully
+      become disabled before the region has been programmed.
+
+  region-unfreeze-timeout-us:
+    description:
+      The maximum time in microseconds to wait for bridges to successfully
+      become enabled after the region has been programmed.
+
+required:
+  - compatible
+  - fpga-mgr
+
+additionalProperties:
+  type: object
+
+examples:
+  - |
+    /*
+     * Full Reconfiguration without Bridges with DT overlay
+     */
+    fpga_region0: fpga-region@0 {
+      compatible = "fpga-region";
+      reg = <0 0>;
+      #address-cells = <1>;
+      #size-cells = <1>;
+      fpga-mgr = <&fpga_mgr0>;
+      ranges = <0x10000000 0x20000000 0x10000000>;
+
+      /* DT Overlay contains: &fpga_region0 */
+      firmware-name = "zynq-gpio.bin";
+      gpio@40000000 {
+        compatible = "xlnx,xps-gpio-1.00.a";
+        reg = <0x40000000 0x10000>;
+        gpio-controller;
+        #gpio-cells = <2>;
+      };
+    };
+
+  - |
+    /*
+     * Partial reconfiguration with bridge
+     */
+    fpga_region1: fpga-region@0 {
+      compatible = "fpga-region";
+      reg = <0 0>;
+      ranges;
+      #address-cells = <1>;
+      #size-cells = <1>;
+      fpga-mgr = <&fpga_mgr1>;
+      fpga-bridges = <&fpga_bridge1>;
+      partial-fpga-config;
+
+      /* DT Overlay contains: &fpga_region1 */
+      firmware-name = "zynq-gpio-partial.bin";
+      clk: clock {
+        compatible = "fixed-factor-clock";
+        clocks = <&parentclk>;
+        #clock-cells = <0>;
+        clock-div = <2>;
+        clock-mult = <1>;
+      };
+      axi {
+        compatible = "simple-bus";
+        #address-cells = <1>;
+        #size-cells = <1>;
+        ranges;
+        gpio@40000000 {
+          compatible = "xlnx,xps-gpio-1.00.a";
+          reg = <0x40000000 0x10000>;
+          #gpio-cells = <2>;
+          gpio-controller;
+          clocks = <&clk>;
+        };
+      };
+    };
index 6e81f8b755c596dc321d31be0d8a48c076d3a03c..d543fd1b8b23e866a6a43aa93e5c5d2f860a5c54 100644 (file)
@@ -1,7 +1,6 @@
 Gateworks PLD GPIO controller bindings
 
-The GPIO controller should be a child node on an I2C bus,
-see: i2c/i2c.txt for details.
+The GPIO controller should be a child node on an I2C bus.
 
 Required properties:
 - compatible: Should be "gateworks,pld-gpio"
index 9cf6137dd52413f956ed8c46e6c024efde5e27be..65155bb701a9fb3dc719c4d45b24f3c9075a5aa5 100644 (file)
@@ -9,7 +9,7 @@ title: Marvell PXA GPIO controller
 maintainers:
   - Linus Walleij <linus.walleij@linaro.org>
   - Bartosz Golaszewski <bgolaszewski@baylibre.com>
-  - Rob Herring <robh+dt@kernel.org>
+  - Rob Herring <robh@kernel.org>
 
 allOf:
   - if:
index 6adedd3ec399b90310a93e68d82eaa63325a772b..b1c13bab24722ff5ccdbb99e4bf9ecd7dc81f16d 100644 (file)
@@ -25,7 +25,9 @@ properties:
               - atmel,sama5d2-i2c
               - microchip,sam9x60-i2c
       - items:
-          - const: microchip,sama7g5-i2c
+          - enum:
+              - microchip,sama7g5-i2c
+              - microchip,sam9x7-i2c
           - const: microchip,sam9x60-i2c
 
   reg:
index 2c08f2a7cf1ee28c2862d146593919ff8f1ad119..b813f6d4810c9add9cb711614b9825f914892a0a 100644 (file)
@@ -32,7 +32,6 @@ description: |
       +-------------------------------+
 
 allOf:
-  - $ref: i2c-mux.yaml
   - $ref: /schemas/i2c/i2c-controller.yaml#
 
 properties:
@@ -41,6 +40,8 @@ properties:
 
   i2c-parent:
     $ref: /schemas/types.yaml#/definitions/phandle-array
+    items:
+      maxItems: 1
     description:
       List of phandles of I2C masters available for selection.  The first one
       will be used as default.
index 4656f5112b84e97d7642e8bec4eb2fe26c5ac8da..54d500be6aaac3bc925d5fcaa99d9bdb6b0d04c5 100644 (file)
@@ -24,6 +24,7 @@ properties:
               - fsl,imx8qm-lpi2c
               - fsl,imx8ulp-lpi2c
               - fsl,imx93-lpi2c
+              - fsl,imx95-lpi2c
           - const: fsl,imx7ulp-lpi2c
 
   reg:
index 70fb69b923c46da3441cde4bac2ae57e955496c8..b1d7d14c0be40af317b23b7fcb94342decb7f99e 100644 (file)
@@ -96,6 +96,6 @@ examples:
         interrupts = <43 2>;
         interrupt-parent = <&mpic>;
         clock-frequency = <400000>;
-        i2c-scl-clk-low-timeout-us = <10000>;
+        i2c-transfer-timeout-us = <10000>;
     };
 ...
index 2d7bb998b0e9d2651daf0efae9dd312f2ddf5ae9..9aa0585200c9cd24615d171b299c19ade27d861b 100644 (file)
@@ -71,6 +71,23 @@ properties:
     description: A voltage regulator supplying power to the chip. On PCA9846
       the regulator supplies power to VDD2 (core logic) and optionally to VDD1.
 
+  maxim,isolate-stuck-channel:
+    type: boolean
+    description: Allows to use non faulty channels while a stuck channel is
+      isolated from the upstream bus. If not set all channels are isolated from
+      the upstream bus until the fault is cleared.
+
+  maxim,send-flush-out-sequence:
+    type: boolean
+    description: Send a flush-out sequence to stuck auxiliary buses
+      automatically after a stuck channel is being detected.
+
+  maxim,preconnection-wiggle-test-enable:
+    type: boolean
+    description: Send a STOP condition to the auxiliary buses when the switch
+      register activates a channel to detect a stuck high fault. On fault the
+      channel is isolated from the upstream bus.
+
 required:
   - compatible
   - reg
@@ -95,6 +112,19 @@ allOf:
         "#interrupt-cells": false
         interrupt-controller: false
 
+  - if:
+      not:
+        properties:
+          compatible:
+            contains:
+              enum:
+                - maxim,max7357
+    then:
+      properties:
+        maxim,isolate-stuck-channel: false
+        maxim,send-flush-out-sequence: false
+        maxim,preconnection-wiggle-test-enable: false
+
 unevaluatedProperties: false
 
 examples:
index 31386a8d7684547261e95bb3d864e77869fd0b0e..e89ee361741e8fb78bcd35cad7510eb5330164c6 100644 (file)
@@ -7,7 +7,7 @@ $schema: http://devicetree.org/meta-schemas/core.yaml#
 title: Marvell MMP I2C controller
 
 maintainers:
-  - Rob Herring <robh+dt@kernel.org>
+  - Rob Herring <robh@kernel.org>
 
 allOf:
   - $ref: /schemas/i2c/i2c-controller.yaml#
diff --git a/Documentation/devicetree/bindings/i2c/i2c.txt b/Documentation/devicetree/bindings/i2c/i2c.txt
deleted file mode 100644 (file)
index fc3dd7e..0000000
+++ /dev/null
@@ -1,151 +0,0 @@
-Generic device tree bindings for I2C busses
-===========================================
-
-This document describes generic bindings which can be used to describe I2C
-busses and their child devices in a device tree.
-
-Required properties (per bus)
------------------------------
-
-- #address-cells  - should be <1>. Read more about addresses below.
-- #size-cells     - should be <0>.
-- compatible      - name of I2C bus controller
-
-For other required properties e.g. to describe register sets,
-clocks, etc. check the binding documentation of the specific driver.
-
-The cells properties above define that an address of children of an I2C bus
-are described by a single value.
-
-Optional properties (per bus)
------------------------------
-
-These properties may not be supported by all drivers. However, if a driver
-wants to support one of the below features, it should adapt these bindings.
-
-- clock-frequency
-       frequency of bus clock in Hz.
-
-- i2c-bus
-       For I2C adapters that have child nodes that are a mixture of both I2C
-       devices and non-I2C devices, the 'i2c-bus' subnode can be used for
-       populating I2C devices. If the 'i2c-bus' subnode is present, only
-       subnodes of this will be considered as I2C slaves. The properties,
-       '#address-cells' and '#size-cells' must be defined under this subnode
-       if present.
-
-- i2c-scl-falling-time-ns
-       Number of nanoseconds the SCL signal takes to fall; t(f) in the I2C
-       specification.
-
-- i2c-scl-internal-delay-ns
-       Number of nanoseconds the IP core additionally needs to setup SCL.
-
-- i2c-scl-rising-time-ns
-       Number of nanoseconds the SCL signal takes to rise; t(r) in the I2C
-       specification.
-
-- i2c-sda-falling-time-ns
-       Number of nanoseconds the SDA signal takes to fall; t(f) in the I2C
-       specification.
-
-- i2c-analog-filter
-       Enable analog filter for i2c lines.
-
-- i2c-digital-filter
-       Enable digital filter for i2c lines.
-
-- i2c-digital-filter-width-ns
-       Width of spikes which can be filtered by digital filter
-       (i2c-digital-filter). This width is specified in nanoseconds.
-
-- i2c-analog-filter-cutoff-frequency
-       Frequency that the analog filter (i2c-analog-filter) uses to distinguish
-       which signal to filter. Signal with higher frequency than specified will
-       be filtered out. Only lower frequency will pass (this is applicable to
-       a low-pass analog filter). Typical value should be above the normal
-       i2c bus clock frequency (clock-frequency).
-       Specified in Hz.
-
-- multi-master
-       states that there is another master active on this bus. The OS can use
-       this information to adapt power management to keep the arbitration awake
-       all the time, for example. Can not be combined with 'single-master'.
-
-- pinctrl
-       add extra pinctrl to configure SCL/SDA pins to GPIO function for bus
-       recovery, call it "gpio" or "recovery" (deprecated) state
-
-- scl-gpios
-       specify the gpio related to SCL pin. Used for GPIO bus recovery.
-
-- sda-gpios
-       specify the gpio related to SDA pin. Optional for GPIO bus recovery.
-
-- single-master
-       states that there is no other master active on this bus. The OS can use
-       this information to detect a stalled bus more reliably, for example.
-       Can not be combined with 'multi-master'.
-
-- smbus
-       states that additional SMBus restrictions and features apply to this bus.
-       An example of feature is SMBusHostNotify. Examples of restrictions are
-       more reserved addresses and timeout definitions.
-
-- smbus-alert
-       states that the optional SMBus-Alert feature apply to this bus.
-
-- mctp-controller
-       indicates that the system is accessible via this bus as an endpoint for
-       MCTP over I2C transport.
-
-Required properties (per child device)
---------------------------------------
-
-- compatible
-       name of I2C slave device
-
-- reg
-       One or many I2C slave addresses. These are usually a 7 bit addresses.
-       However, flags can be attached to an address. I2C_TEN_BIT_ADDRESS is
-       used to mark a 10 bit address. It is needed to avoid the ambiguity
-       between e.g. a 7 bit address of 0x50 and a 10 bit address of 0x050
-       which, in theory, can be on the same bus.
-       Another flag is I2C_OWN_SLAVE_ADDRESS to mark addresses on which we
-       listen to be devices ourselves.
-
-Optional properties (per child device)
---------------------------------------
-
-These properties may not be supported by all drivers. However, if a driver
-wants to support one of the below features, it should adapt these bindings.
-
-- host-notify
-       device uses SMBus host notify protocol instead of interrupt line.
-
-- interrupts
-       interrupts used by the device.
-
-- interrupt-names
-       "irq", "wakeup" and "smbus_alert" names are recognized by I2C core,
-       other names are left to individual drivers.
-
-- reg-names
-       Names of map programmable addresses.
-       It can contain any map needing another address than default one.
-
-- wakeup-source
-       device can be used as a wakeup source.
-
-Binding may contain optional "interrupts" property, describing interrupts
-used by the device. I2C core will assign "irq" interrupt (or the very first
-interrupt if not using interrupt names) as primary interrupt for the slave.
-
-Alternatively, devices supporting SMBus Host Notify, and connected to
-adapters that support this feature, may use "host-notify" property. I2C
-core will create a virtual interrupt for Host Notify and assign it as
-primary interrupt for the slave.
-
-Also, if device is marked as a wakeup source, I2C core will set up "wakeup"
-interrupt for the device. If "wakeup" interrupt name is not present in the
-binding, then primary interrupt will be used as wakeup interrupt.
index b8319dcf3d8aece2f81372f33926e977d1fe9b8a..8676335e9e94bf43ffd45126f10e695046770ca1 100644 (file)
@@ -21,8 +21,7 @@ description: |
   See ../firmware/nvidia,tegra186-bpmp.yaml for details of the BPMP
   binding.
 
-  This node represents an I2C controller. See ../i2c/i2c.txt for details
-  of the core I2C binding.
+  This node represents an I2C controller.
 
 properties:
   compatible:
index c4ace5585e1e22d32b17abc5a859f5c3b298b258..51b220da461b068d3af8614c8fc8d14071169de9 100644 (file)
@@ -53,6 +53,7 @@ properties:
               - renesas,i2c-r8a779a0     # R-Car V3U
               - renesas,i2c-r8a779f0     # R-Car S4-8
               - renesas,i2c-r8a779g0     # R-Car V4H
+              - renesas,i2c-r8a779h0     # R-Car V4M
           - const: renesas,rcar-gen4-i2c # R-Car Gen4
 
   reg:
index fcc3dbff9c9a555848dded95a5c84d7234bb693b..47be5d9a32d4257e1f9dbc5590806cf956cd5e44 100644 (file)
@@ -57,7 +57,7 @@ examples:
   - |
     #include <dt-bindings/interrupt-controller/arm-gic.h>
 
-    i3c-master@2000 {
+    i3c@2000 {
         compatible = "aspeed,ast2600-i3c";
         reg = <0x2000 0x1000>;
         #address-cells = <3>;
index cc40d25358ecfb6e7d95cc6983575f42dfe57263..cad6d53d0e2e35ddaaad35215ec93dd182f28319 100644 (file)
@@ -41,7 +41,7 @@ unevaluatedProperties: false
 
 examples:
   - |
-    i3c-master@d040000 {
+    i3c@d040000 {
         compatible = "cdns,i3c-master";
         clocks = <&coreclock>, <&i3csysclock>;
         clock-names = "pclk", "sysclk";
index c816e295d5651f372c4884751c9954a953d8838a..113957ebe9f1df4a12928d66c04bbb1cf3defc78 100644 (file)
@@ -17,7 +17,7 @@ description: |
 
 properties:
   $nodename:
-    pattern: "^i3c-master@[0-9a-f]+$"
+    pattern: "^i3c@[0-9a-f]+$"
 
   "#address-cells":
     const: 3
@@ -71,7 +71,7 @@ patternProperties:
     description: |
       I2C child, should be named: <device-type>@<i2c-address>
 
-      All properties described in Documentation/devicetree/bindings/i2c/i2c.txt
+      All properties described in dtschema schemas/i2c/i2c-controller.yaml
       are valid here, except the reg property whose content is changed.
 
     properties:
@@ -153,7 +153,7 @@ additionalProperties: true
 
 examples:
   - |
-    i3c-master@d040000 {
+    i3c@d040000 {
         compatible = "cdns,i3c-master";
         clocks = <&coreclock>, <&i3csysclock>;
         clock-names = "pclk", "sysclk";
index 5dda8cb44cdbc026d8ffeb28711bd8506e1c3cab..39bb1a1784c9bc8b260dda3293ad38c3b4cc7d7c 100644 (file)
@@ -43,7 +43,7 @@ unevaluatedProperties: false
 
 examples:
   - |
-    i3c-master@a0000000 {
+    i3c@a0000000 {
       compatible = "mipi-i3c-hci";
       reg = <0xa0000000 0x2000>;
       interrupts = <89>;
index 133855f11b4f5d34ccd8d3c7fb32ef4cff5aa29c..c56ff77677f1706fa9bddff4bb7be6a2e5637d54 100644 (file)
@@ -48,7 +48,7 @@ unevaluatedProperties: false
 
 examples:
   - |
-    i3c-master@a0000000 {
+    i3c@a0000000 {
         compatible = "silvaco,i3c-master-v1";
         clocks = <&zynqmp_clk 71>, <&fclk>, <&sclk>;
         clock-names = "pclk", "fast_clk", "slow_clk";
index 7a76fd32962ac77e5cecfcd5cdd3ef996f1c0559..c0e805e531be231817bac96206b4e0624b4881bf 100644 (file)
@@ -35,7 +35,7 @@ unevaluatedProperties: false
 
 examples:
   - |
-    i3c-master@2000 {
+    i3c@2000 {
         compatible = "snps,dw-i3c-master-1.00a";
         #address-cells = <3>;
         #size-cells = <0>;
index 5efceb313879172c390f6e656efc1929c4c2c82e..c384bf0bb25dab599608735d0163196c2fcd7e84 100644 (file)
@@ -49,7 +49,6 @@ patternProperties:
     $ref: input.yaml#
     properties:
       label:
-        $ref: /schemas/types.yaml#/definitions/string
         description: Descriptive name of the key
 
       linux,code: true
diff --git a/Documentation/devicetree/bindings/input/atmel,captouch.txt b/Documentation/devicetree/bindings/input/atmel,captouch.txt
deleted file mode 100644 (file)
index fe9ee5c..0000000
+++ /dev/null
@@ -1,36 +0,0 @@
-Device tree bindings for Atmel capacitive touch device, typically
-an Atmel touch sensor connected to AtmegaXX MCU running firmware
-based on Qtouch library.
-
-The node for this device must be a child of a I2C controller node, as the
-device communicates via I2C.
-
-Required properties:
-
-       compatible:     Must be "atmel,captouch".
-       reg:            The I2C slave address of the device.
-       interrupts:     Property describing the interrupt line the device
-                       is connected to. The device only has one interrupt
-                       source.
-       linux,keycodes: Specifies an array of numeric keycode values to
-                       be used for reporting button presses. The array can
-                       contain up to 8 entries.
-
-Optional properties:
-
-       autorepeat:     Enables the Linux input system's autorepeat
-                       feature on the input device.
-
-Example:
-
-       atmel-captouch@51 {
-               compatible = "atmel,captouch";
-               reg = <0x51>;
-               interrupt-parent = <&tlmm>;
-               interrupts = <67 IRQ_TYPE_EDGE_FALLING>;
-               linux,keycodes = <BTN_0>, <BTN_1>,
-                       <BTN_2>, <BTN_3>,
-                       <BTN_4>, <BTN_5>,
-                       <BTN_6>, <BTN_7>;
-               autorepeat;
-       };
diff --git a/Documentation/devicetree/bindings/input/atmel,captouch.yaml b/Documentation/devicetree/bindings/input/atmel,captouch.yaml
new file mode 100644 (file)
index 0000000..f747709
--- /dev/null
@@ -0,0 +1,59 @@
+# SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause)
+%YAML 1.2
+---
+$id: http://devicetree.org/schemas/input/atmel,captouch.yaml#
+$schema: http://devicetree.org/meta-schemas/core.yaml#
+
+title: Atmel capacitive touch device
+
+maintainers:
+  - Dharma balasubiramani <dharma.b@microchip.com>
+
+description:
+  Atmel capacitive touch device, typically an Atmel touch sensor connected to
+  AtmegaXX MCU running firmware based on Qtouch library.
+
+allOf:
+  - $ref: input.yaml#
+
+properties:
+  compatible:
+    const: atmel,captouch
+
+  reg:
+    maxItems: 1
+
+  interrupts:
+    maxItems: 1
+
+  linux,keycodes:
+    minItems: 1
+    maxItems: 8
+
+required:
+  - compatible
+  - reg
+  - interrupts
+  - linux,keycodes
+
+unevaluatedProperties: false
+
+examples:
+  - |
+    #include <dt-bindings/interrupt-controller/irq.h>
+    #include <dt-bindings/input/linux-event-codes.h>
+    i2c {
+      #address-cells = <1>;
+      #size-cells = <0>;
+      touch@51 {
+        compatible = "atmel,captouch";
+        reg = <0x51>;
+        interrupt-parent = <&tlmm>;
+        interrupts = <67 IRQ_TYPE_EDGE_FALLING>;
+        linux,keycodes = <BTN_0>, <BTN_1>,
+                         <BTN_2>, <BTN_3>,
+                         <BTN_4>, <BTN_5>,
+                         <BTN_6>, <BTN_7>;
+        autorepeat;
+      };
+    };
diff --git a/Documentation/devicetree/bindings/input/samsung,s3c6410-keypad.yaml b/Documentation/devicetree/bindings/input/samsung,s3c6410-keypad.yaml
new file mode 100644 (file)
index 0000000..a53569a
--- /dev/null
@@ -0,0 +1,121 @@
+# SPDX-License-Identifier: GPL-2.0-only OR BSD-2-Clause
+%YAML 1.2
+---
+$id: http://devicetree.org/schemas/input/samsung,s3c6410-keypad.yaml#
+$schema: http://devicetree.org/meta-schemas/core.yaml#
+
+title: Samsung SoC series Keypad Controller
+
+description:
+  Samsung SoC Keypad controller is used to interface a SoC with a matrix-type
+  keypad device. The keypad controller supports multiple row and column lines.
+  A key can be placed at each intersection of a unique row and a unique column.
+  The keypad controller can sense a key-press and key-release and report the
+  event using a interrupt to the cpu.
+
+maintainers:
+  - Krzysztof Kozlowski <krzk@kernel.org>
+
+properties:
+  compatible:
+    enum:
+      - samsung,s3c6410-keypad
+      - samsung,s5pv210-keypad
+
+  reg:
+    maxItems: 1
+
+  clocks:
+    maxItems: 1
+
+  clock-names:
+    items:
+      - const: keypad
+
+  interrupts:
+    maxItems: 1
+
+  wakeup-source: true
+
+  linux,input-no-autorepeat:
+    type: boolean
+    description:
+      Do no enable autorepeat feature.
+
+  linux,input-wakeup:
+    type: boolean
+    deprecated: true
+
+  samsung,keypad-num-columns:
+    $ref: /schemas/types.yaml#/definitions/uint32
+    description:
+      Number of column lines connected to the keypad controller.
+
+  samsung,keypad-num-rows:
+    $ref: /schemas/types.yaml#/definitions/uint32
+    description:
+      Number of row lines connected to the keypad controller.
+
+patternProperties:
+  '^key-[0-9a-z]+$':
+    type: object
+    $ref: input.yaml#
+    additionalProperties: false
+    description:
+      Each key connected to the keypad controller is represented as a child
+      node to the keypad controller device node.
+
+    properties:
+      keypad,column:
+        $ref: /schemas/types.yaml#/definitions/uint32
+        description: The column number to which the key is connected.
+
+      keypad,row:
+        $ref: /schemas/types.yaml#/definitions/uint32
+        description: The row number to which the key is connected.
+
+      linux,code: true
+
+    required:
+      - keypad,column
+      - keypad,row
+      - linux,code
+
+required:
+  - compatible
+  - reg
+  - interrupts
+  - samsung,keypad-num-columns
+  - samsung,keypad-num-rows
+
+additionalProperties: false
+
+examples:
+  - |
+    #include <dt-bindings/clock/exynos4.h>
+    #include <dt-bindings/interrupt-controller/arm-gic.h>
+
+    keypad@100a0000 {
+        compatible = "samsung,s5pv210-keypad";
+        reg = <0x100a0000 0x100>;
+        interrupts = <GIC_SPI 109 IRQ_TYPE_LEVEL_HIGH>;
+        clocks = <&clock CLK_KEYIF>;
+        clock-names = "keypad";
+
+        samsung,keypad-num-rows = <2>;
+        samsung,keypad-num-columns = <8>;
+        linux,input-no-autorepeat;
+        wakeup-source;
+
+        key-1 {
+            keypad,row = <0>;
+            keypad,column = <3>;
+            linux,code = <2>;
+        };
+
+        key-2 {
+            keypad,row = <0>;
+            keypad,column = <4>;
+            linux,code = <3>;
+        };
+    };
diff --git a/Documentation/devicetree/bindings/input/samsung-keypad.txt b/Documentation/devicetree/bindings/input/samsung-keypad.txt
deleted file mode 100644 (file)
index 4c5c0a8..0000000
+++ /dev/null
@@ -1,77 +0,0 @@
-* Samsung's Keypad Controller device tree bindings
-
-Samsung's Keypad controller is used to interface a SoC with a matrix-type
-keypad device. The keypad controller supports multiple row and column lines.
-A key can be placed at each intersection of a unique row and a unique column.
-The keypad controller can sense a key-press and key-release and report the
-event using a interrupt to the cpu.
-
-Required SoC Specific Properties:
-- compatible: should be one of the following
-  - "samsung,s3c6410-keypad": For controllers compatible with s3c6410 keypad
-    controller.
-  - "samsung,s5pv210-keypad": For controllers compatible with s5pv210 keypad
-    controller.
-
-- reg: physical base address of the controller and length of memory mapped
-  region.
-
-- interrupts: The interrupt number to the cpu.
-
-Required Board Specific Properties:
-- samsung,keypad-num-rows: Number of row lines connected to the keypad
-  controller.
-
-- samsung,keypad-num-columns: Number of column lines connected to the
-  keypad controller.
-
-- Keys represented as child nodes: Each key connected to the keypad
-  controller is represented as a child node to the keypad controller
-  device node and should include the following properties.
-  - keypad,row: the row number to which the key is connected.
-  - keypad,column: the column number to which the key is connected.
-  - linux,code: the key-code to be reported when the key is pressed
-    and released.
-
-- pinctrl-0: Should specify pin control groups used for this controller.
-- pinctrl-names: Should contain only one value - "default".
-
-Optional Properties:
-- wakeup-source: use any event on keypad as wakeup event.
-                (Legacy property supported: "linux,input-wakeup")
-
-Optional Properties specific to linux:
-- linux,keypad-no-autorepeat: do no enable autorepeat feature.
-
-
-Example:
-       keypad@100a0000 {
-               compatible = "samsung,s5pv210-keypad";
-               reg = <0x100A0000 0x100>;
-               interrupts = <173>;
-               samsung,keypad-num-rows = <2>;
-               samsung,keypad-num-columns = <8>;
-               linux,input-no-autorepeat;
-               wakeup-source;
-
-               pinctrl-names = "default";
-               pinctrl-0 = <&keypad_rows &keypad_columns>;
-
-               key_1 {
-                       keypad,row = <0>;
-                       keypad,column = <3>;
-                       linux,code = <2>;
-               };
-
-               key_2 {
-                       keypad,row = <0>;
-                       keypad,column = <4>;
-                       linux,code = <3>;
-               };
-
-               key_3 {
-                       keypad,row = <0>;
-                       keypad,column = <5>;
-                       linux,code = <4>;
-               };
-       };
diff --git a/Documentation/devicetree/bindings/input/touchscreen/fsl,imx6ul-tsc.yaml b/Documentation/devicetree/bindings/input/touchscreen/fsl,imx6ul-tsc.yaml
new file mode 100644 (file)
index 0000000..678756a
--- /dev/null
@@ -0,0 +1,97 @@
+# SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause)
+%YAML 1.2
+---
+$id: http://devicetree.org/schemas/input/touchscreen/fsl,imx6ul-tsc.yaml#
+$schema: http://devicetree.org/meta-schemas/core.yaml#
+
+title: Freescale i.MX6UL Touch Controller
+
+maintainers:
+  - Haibo Chen <haibo.chen@nxp.com>
+  - Shawn Guo <shawnguo@kernel.org>
+  - Sascha Hauer <s.hauer@pengutronix.de>
+
+properties:
+  compatible:
+    const: fsl,imx6ul-tsc
+
+  reg:
+    items:
+      - description: touch controller address
+      - description: ADC2 address
+
+  interrupts:
+    items:
+      - description: touch controller address
+      - description: ADC2 address
+
+  clocks:
+    maxItems: 2
+
+  clock-names:
+    items:
+      - const: tsc
+      - const: adc
+
+  xnur-gpios:
+    maxItems: 1
+    description:
+      The X- gpio this controller connect to. This xnur-gpio returns to
+      low once the finger leave the touch screen (The last touch event
+      the touch controller capture).
+
+  measure-delay-time:
+    $ref: /schemas/types.yaml#/definitions/uint32
+    description:
+      The value of measure delay time. Before X-axis or Y-axis measurement,
+      the screen need some time before even potential distribution ready.
+    default: 0xffff
+    minimum: 0
+    maximum: 0xffffff
+
+  pre-charge-time:
+    $ref: /schemas/types.yaml#/definitions/uint32
+    description:
+      The touch screen need some time to precharge.
+    default: 0xfff
+    minimum: 0
+    maximum: 0xffffffff
+
+  touchscreen-average-samples:
+    $ref: /schemas/types.yaml#/definitions/uint32
+    description: Number of data samples which are averaged for each read.
+    enum: [ 1, 4, 8, 16, 32 ]
+
+required:
+  - compatible
+  - reg
+  - interrupts
+  - clocks
+  - clock-names
+  - xnur-gpios
+
+allOf:
+  - $ref: touchscreen.yaml#
+
+additionalProperties: false
+
+examples:
+  - |
+    #include <dt-bindings/interrupt-controller/arm-gic.h>
+    #include <dt-bindings/clock/imx6ul-clock.h>
+    #include <dt-bindings/gpio/gpio.h>
+    touchscreen@2040000 {
+        compatible = "fsl,imx6ul-tsc";
+        reg = <0x02040000 0x4000>, <0x0219c000 0x4000>;
+        interrupts = <GIC_SPI 3 IRQ_TYPE_LEVEL_HIGH>,
+                     <GIC_SPI 101 IRQ_TYPE_LEVEL_HIGH>;
+        clocks = <&clks IMX6UL_CLK_IPG>,
+                 <&clks IMX6UL_CLK_ADC2>;
+        clock-names = "tsc", "adc";
+        pinctrl-names = "default";
+        pinctrl-0 = <&pinctrl_tsc>;
+        xnur-gpios = <&gpio1 3 GPIO_ACTIVE_LOW>;
+        measure-delay-time = <0xfff>;
+        pre-charge-time = <0xffff>;
+        touchscreen-average-samples = <32>;
+    };
diff --git a/Documentation/devicetree/bindings/input/touchscreen/goodix,gt9916.yaml b/Documentation/devicetree/bindings/input/touchscreen/goodix,gt9916.yaml
new file mode 100644 (file)
index 0000000..d90f045
--- /dev/null
@@ -0,0 +1,95 @@
+# SPDX-License-Identifier: GPL-2.0-only OR BSD-2-Clause
+%YAML 1.2
+---
+$id: http://devicetree.org/schemas/input/touchscreen/goodix,gt9916.yaml#
+$schema: http://devicetree.org/meta-schemas/core.yaml#
+
+title: Goodix Berlin series touchscreen controller
+
+description: The Goodix Berlin series of touchscreen controllers
+  be connected to either I2C or SPI buses.
+
+maintainers:
+  - Neil Armstrong <neil.armstrong@linaro.org>
+
+allOf:
+  - $ref: touchscreen.yaml#
+  - $ref: /schemas/spi/spi-peripheral-props.yaml#
+
+properties:
+  compatible:
+    enum:
+      - goodix,gt9916
+
+  reg:
+    maxItems: 1
+
+  interrupts:
+    maxItems: 1
+
+  reset-gpios:
+    maxItems: 1
+
+  avdd-supply:
+    description: Analog power supply regulator on AVDD pin
+
+  vddio-supply:
+    description: power supply regulator on VDDIO pin
+
+  spi-max-frequency: true
+  touchscreen-inverted-x: true
+  touchscreen-inverted-y: true
+  touchscreen-size-x: true
+  touchscreen-size-y: true
+  touchscreen-swapped-x-y: true
+
+additionalProperties: false
+
+required:
+  - compatible
+  - reg
+  - interrupts
+  - avdd-supply
+  - touchscreen-size-x
+  - touchscreen-size-y
+
+examples:
+  - |
+    #include <dt-bindings/interrupt-controller/irq.h>
+    #include <dt-bindings/gpio/gpio.h>
+    i2c {
+      #address-cells = <1>;
+      #size-cells = <0>;
+      touchscreen@5d {
+        compatible = "goodix,gt9916";
+        reg = <0x5d>;
+        interrupt-parent = <&gpio>;
+        interrupts = <25 IRQ_TYPE_LEVEL_LOW>;
+        reset-gpios = <&gpio1 1 GPIO_ACTIVE_LOW>;
+        avdd-supply = <&ts_avdd>;
+        touchscreen-size-x = <1024>;
+        touchscreen-size-y = <768>;
+      };
+    };
+  - |
+    #include <dt-bindings/interrupt-controller/irq.h>
+    #include <dt-bindings/gpio/gpio.h>
+    spi {
+      #address-cells = <1>;
+      #size-cells = <0>;
+      num-cs = <1>;
+      cs-gpios = <&gpio 2 GPIO_ACTIVE_HIGH>;
+      touchscreen@0 {
+        compatible = "goodix,gt9916";
+        reg = <0>;
+        interrupt-parent = <&gpio>;
+        interrupts = <25 IRQ_TYPE_LEVEL_LOW>;
+        reset-gpios = <&gpio1 1 GPIO_ACTIVE_LOW>;
+        avdd-supply = <&ts_avdd>;
+        spi-max-frequency = <1000000>;
+        touchscreen-size-x = <1024>;
+        touchscreen-size-y = <768>;
+      };
+    };
+
+...
index 3d016b87c8df8684aaee0779e11f8aa930a158a9..2a2d86cfd1048781a66d71ec8a8d60cc298e4f5b 100644 (file)
@@ -37,8 +37,9 @@ properties:
     maxItems: 1
 
   irq-gpios:
-    description: GPIO pin used for IRQ. The driver uses the interrupt gpio pin
-      as output to reset the device.
+    description: GPIO pin used for IRQ input. Additionally, this line is
+      sampled by the device on reset deassertion to select the I2C client
+      address, thus it can be driven by the host during the reset sequence.
     maxItems: 1
 
   reset-gpios:
index 0d6b033fd5fbc2508206ac7dd0bcf0201704c9c2..77ba280b3bdcc5891e13fddd87545328f47ec5c7 100644 (file)
@@ -9,15 +9,14 @@ title: Imagis IST30XXC family touchscreen controller
 maintainers:
   - Markuss Broks <markuss.broks@gmail.com>
 
-allOf:
-  - $ref: touchscreen.yaml#
-
 properties:
   $nodename:
     pattern: "^touchscreen@[0-9a-f]+$"
 
   compatible:
     enum:
+      - imagis,ist3032c
+      - imagis,ist3038b
       - imagis,ist3038c
 
   reg:
@@ -32,6 +31,10 @@ properties:
   vddio-supply:
     description: Power supply regulator for the I2C bus
 
+  linux,keycodes:
+    description: Keycodes for the touch keys
+    maxItems: 5
+
   touchscreen-size-x: true
   touchscreen-size-y: true
   touchscreen-fuzz-x: true
@@ -42,6 +45,18 @@ properties:
 
 additionalProperties: false
 
+allOf:
+  - $ref: touchscreen.yaml#
+  - if:
+      not:
+        properties:
+          compatible:
+            contains:
+              const: imagis,ist3032c
+    then:
+      properties:
+        linux,keycodes: false
+
 required:
   - compatible
   - reg
diff --git a/Documentation/devicetree/bindings/input/touchscreen/imx6ul_tsc.txt b/Documentation/devicetree/bindings/input/touchscreen/imx6ul_tsc.txt
deleted file mode 100644 (file)
index 1649150..0000000
+++ /dev/null
@@ -1,38 +0,0 @@
-* Freescale i.MX6UL Touch Controller
-
-Required properties:
-- compatible: must be "fsl,imx6ul-tsc".
-- reg: this touch controller address and the ADC2 address.
-- interrupts: the interrupt of this touch controller and ADC2.
-- clocks: the root clock of touch controller and ADC2.
-- clock-names; must be "tsc" and "adc".
-- xnur-gpio: the X- gpio this controller connect to.
-  This xnur-gpio returns to low once the finger leave the touch screen (The
-  last touch event the touch controller capture).
-
-Optional properties:
-- measure-delay-time: the value of measure delay time.
-  Before X-axis or Y-axis measurement, the screen need some time before
-  even potential distribution ready.
-  This value depends on the touch screen.
-- pre-charge-time: the touch screen need some time to precharge.
-  This value depends on the touch screen.
-- touchscreen-average-samples: Number of data samples which are averaged for
-  each read. Valid values are 1, 4, 8, 16 and 32.
-
-Example:
-       tsc: tsc@2040000 {
-               compatible = "fsl,imx6ul-tsc";
-               reg = <0x02040000 0x4000>, <0x0219c000 0x4000>;
-               interrupts = <GIC_SPI 3 IRQ_TYPE_LEVEL_HIGH>,
-                            <GIC_SPI 101 IRQ_TYPE_LEVEL_HIGH>;
-               clocks = <&clks IMX6UL_CLK_IPG>,
-                        <&clks IMX6UL_CLK_ADC2>;
-               clock-names = "tsc", "adc";
-               pinctrl-names = "default";
-               pinctrl-0 = <&pinctrl_tsc>;
-               xnur-gpio = <&gpio1 3 GPIO_ACTIVE_LOW>;
-               measure-delay-time = <0xfff>;
-               pre-charge-time = <0xffff>;
-               touchscreen-average-samples = <32>;
-       };
index 07f9dd6b1c9c44af761bb0cd4de9f3f201f940e0..90ebd4f8354c27b5a4fb83ea6f9ad470dba843a2 100644 (file)
@@ -17,13 +17,17 @@ properties:
     pattern: "^touchscreen(@.*)?$"
 
   compatible:
-    items:
+    oneOf:
       - enum:
           - melfas,mms114
           - melfas,mms134s
           - melfas,mms136
           - melfas,mms152
           - melfas,mms345l
+      - items:
+          - enum:
+              - melfas,mms252
+          - const: melfas,mms114
 
   reg:
     description: I2C address
index 95b554be25b4076ad4615f919e5b76b55c62fbde..5381a96f494998355452d034fb91ed60b158bcbb 100644 (file)
@@ -31,7 +31,7 @@ properties:
     maxItems: 1
 
   firmware-name:
-    $ref: /schemas/types.yaml#/definitions/string
+    maxItems: 1
     description: >
       File basename for board specific firmware
 
diff --git a/Documentation/devicetree/bindings/interrupt-controller/atmel,aic.txt b/Documentation/devicetree/bindings/interrupt-controller/atmel,aic.txt
deleted file mode 100644 (file)
index 7079d44..0000000
+++ /dev/null
@@ -1,43 +0,0 @@
-* Advanced Interrupt Controller (AIC)
-
-Required properties:
-- compatible: Should be:
-    - "atmel,<chip>-aic" where  <chip> can be "at91rm9200", "sama5d2",
-      "sama5d3" or "sama5d4"
-    - "microchip,<chip>-aic" where <chip> can be "sam9x60"
-
-- interrupt-controller: Identifies the node as an interrupt controller.
-- #interrupt-cells: The number of cells to define the interrupts. It should be 3.
-  The first cell is the IRQ number (aka "Peripheral IDentifier" on datasheet).
-  The second cell is used to specify flags:
-    bits[3:0] trigger type and level flags:
-      1 = low-to-high edge triggered.
-      2 = high-to-low edge triggered.
-      4 = active high level-sensitive.
-      8 = active low level-sensitive.
-      Valid combinations are 1, 2, 3, 4, 8.
-      Default flag for internal sources should be set to 4 (active high).
-  The third cell is used to specify the irq priority from 0 (lowest) to 7
-  (highest).
-- reg: Should contain AIC registers location and length
-- atmel,external-irqs: u32 array of external irqs.
-
-Examples:
-       /*
-        * AIC
-        */
-       aic: interrupt-controller@fffff000 {
-               compatible = "atmel,at91rm9200-aic";
-               interrupt-controller;
-               #interrupt-cells = <3>;
-               reg = <0xfffff000 0x200>;
-       };
-
-       /*
-        * An interrupt generating device that is wired to an AIC.
-        */
-       dma: dma-controller@ffffec00 {
-               compatible = "atmel,at91sam9g45-dma";
-               reg = <0xffffec00 0x200>;
-               interrupts = <21 4 5>;
-       };
diff --git a/Documentation/devicetree/bindings/interrupt-controller/atmel,aic.yaml b/Documentation/devicetree/bindings/interrupt-controller/atmel,aic.yaml
new file mode 100644 (file)
index 0000000..d4658fe
--- /dev/null
@@ -0,0 +1,89 @@
+# SPDX-License-Identifier: (GPL-2.0 OR BSD-2-Clause)
+%YAML 1.2
+---
+$id: http://devicetree.org/schemas/interrupt-controller/atmel,aic.yaml#
+$schema: http://devicetree.org/meta-schemas/core.yaml#
+
+title: Advanced Interrupt Controller (AIC)
+
+maintainers:
+  - Nicolas Ferre <nicolas.ferre@microchip.com>
+  - Dharma balasubiramani <dharma.b@microchip.com>
+
+description:
+  The Advanced Interrupt Controller (AIC) is an 8-level priority, individually
+  maskable, vectored interrupt controller providing handling of up to one
+  hundred and twenty-eight interrupt sources.
+
+properties:
+  compatible:
+    enum:
+      - atmel,at91rm9200-aic
+      - atmel,sama5d2-aic
+      - atmel,sama5d3-aic
+      - atmel,sama5d4-aic
+      - microchip,sam9x60-aic
+
+  reg:
+    maxItems: 1
+
+  interrupt-controller: true
+
+  "#interrupt-cells":
+    const: 3
+    description: |
+      The 1st cell is the IRQ number (Peripheral IDentifier on datasheet).
+      The 2nd cell specifies flags:
+        bits[3:0] trigger type and level flags:
+          1 = low-to-high edge triggered.
+          2 = high-to-low edge triggered.
+          4 = active high level-sensitive.
+          8 = active low level-sensitive.
+        Valid combinations: 1, 2, 3, 4, 8.
+        Default for internal sources: 4 (active high).
+      The 3rd cell specifies irq priority from 0 (lowest) to 7 (highest).
+
+  interrupts:
+    maxItems: 1
+
+  atmel,external-irqs:
+    $ref: /schemas/types.yaml#/definitions/uint32-array
+    description: u32 array of external irqs.
+
+allOf:
+  - $ref: /schemas/interrupt-controller.yaml#
+  - if:
+      properties:
+        compatible:
+          contains:
+            const: atmel,at91rm9200-aic
+    then:
+      properties:
+        atmel,external-irqs:
+          minItems: 1
+          maxItems: 7
+    else:
+      properties:
+        atmel,external-irqs:
+          minItems: 1
+          maxItems: 1
+
+required:
+  - compatible
+  - reg
+  - interrupt-controller
+  - "#interrupt-cells"
+  - atmel,external-irqs
+
+unevaluatedProperties: false
+
+examples:
+  - |
+    interrupt-controller@fffff000 {
+      compatible = "atmel,at91rm9200-aic";
+      reg = <0xfffff000 0x200>;
+      interrupt-controller;
+      #interrupt-cells = <3>;
+      atmel,external-irqs = <31>;
+    };
+...
index 985bfa4f6fda134252e749551688ee87be09a0c0..78baa0a571cf7f8f51eec3347783bb9d3560db04 100644 (file)
@@ -37,6 +37,9 @@ properties:
   clock-names:
     const: ipg
 
+  power-domains:
+    maxItems: 1
+
 required:
   - compatible
   - reg
diff --git a/Documentation/devicetree/bindings/interrupt-controller/mediatek,mt6577-sysirq.yaml b/Documentation/devicetree/bindings/interrupt-controller/mediatek,mt6577-sysirq.yaml
new file mode 100644 (file)
index 0000000..e1a379c
--- /dev/null
@@ -0,0 +1,85 @@
+# SPDX-License-Identifier: GPL-2.0-only OR BSD-2-Clause
+%YAML 1.2
+---
+$id: http://devicetree.org/schemas/interrupt-controller/mediatek,mt6577-sysirq.yaml#
+$schema: http://devicetree.org/meta-schemas/core.yaml#
+
+title: MediaTek sysirq
+
+description:
+  MediaTek SOCs sysirq support controllable irq inverter for each GIC SPI
+  interrupt.
+
+maintainers:
+  - Matthias Brugger <matthias.bgg@gmail.com>
+
+properties:
+  compatible:
+    oneOf:
+      - const: mediatek,mt6577-sysirq
+      - items:
+          - enum:
+              - mediatek,mt2701-sysirq
+              - mediatek,mt2712-sysirq
+              - mediatek,mt6580-sysirq
+              - mediatek,mt6582-sysirq
+              - mediatek,mt6589-sysirq
+              - mediatek,mt6592-sysirq
+              - mediatek,mt6755-sysirq
+              - mediatek,mt6765-sysirq
+              - mediatek,mt6779-sysirq
+              - mediatek,mt6795-sysirq
+              - mediatek,mt6797-sysirq
+              - mediatek,mt7622-sysirq
+              - mediatek,mt7623-sysirq
+              - mediatek,mt7629-sysirq
+              - mediatek,mt8127-sysirq
+              - mediatek,mt8135-sysirq
+              - mediatek,mt8173-sysirq
+              - mediatek,mt8183-sysirq
+              - mediatek,mt8365-sysirq
+              - mediatek,mt8516-sysirq
+          - const: mediatek,mt6577-sysirq
+
+  reg:
+    minItems: 1
+    maxItems: 2
+
+  interrupt-controller: true
+
+  "#interrupt-cells":
+    $ref: "arm,gic.yaml#/properties/#interrupt-cells"
+
+required:
+  - reg
+  - interrupt-controller
+  - "#interrupt-cells"
+
+allOf:
+  - $ref: /schemas/interrupt-controller.yaml#
+  - if:
+      properties:
+        compatible:
+          contains:
+            const: mediatek,mt6797-sysirq
+    then:
+      properties:
+        reg:
+          minItems: 2
+    else:
+      properties:
+        reg:
+          maxItems: 1
+
+unevaluatedProperties: false
+
+examples:
+  - |
+    interrupt-controller@10200620 {
+        compatible = "mediatek,mt6797-sysirq", "mediatek,mt6577-sysirq";
+        reg = <0x10220620 0x20>,
+              <0x10220690 0x10>;
+        interrupt-parent = <&gic>;
+        interrupt-controller;
+        #interrupt-cells = <3>;
+    };
diff --git a/Documentation/devicetree/bindings/interrupt-controller/mediatek,sysirq.txt b/Documentation/devicetree/bindings/interrupt-controller/mediatek,sysirq.txt
deleted file mode 100644 (file)
index 3ffc601..0000000
+++ /dev/null
@@ -1,44 +0,0 @@
-MediaTek sysirq
-
-MediaTek SOCs sysirq support controllable irq inverter for each GIC SPI
-interrupt.
-
-Required properties:
-- compatible: should be
-       "mediatek,mt8516-sysirq", "mediatek,mt6577-sysirq": for MT8516
-       "mediatek,mt8183-sysirq", "mediatek,mt6577-sysirq": for MT8183
-       "mediatek,mt8173-sysirq", "mediatek,mt6577-sysirq": for MT8173
-       "mediatek,mt8135-sysirq", "mediatek,mt6577-sysirq": for MT8135
-       "mediatek,mt8127-sysirq", "mediatek,mt6577-sysirq": for MT8127
-       "mediatek,mt7622-sysirq", "mediatek,mt6577-sysirq": for MT7622
-       "mediatek,mt7623-sysirq", "mediatek,mt6577-sysirq": for MT7623
-       "mediatek,mt7629-sysirq", "mediatek,mt6577-sysirq": for MT7629
-       "mediatek,mt6795-sysirq", "mediatek,mt6577-sysirq": for MT6795
-       "mediatek,mt6797-sysirq", "mediatek,mt6577-sysirq": for MT6797
-       "mediatek,mt6779-sysirq", "mediatek,mt6577-sysirq": for MT6779
-       "mediatek,mt6765-sysirq", "mediatek,mt6577-sysirq": for MT6765
-       "mediatek,mt6755-sysirq", "mediatek,mt6577-sysirq": for MT6755
-       "mediatek,mt6592-sysirq", "mediatek,mt6577-sysirq": for MT6592
-       "mediatek,mt6589-sysirq", "mediatek,mt6577-sysirq": for MT6589
-       "mediatek,mt6582-sysirq", "mediatek,mt6577-sysirq": for MT6582
-       "mediatek,mt6580-sysirq", "mediatek,mt6577-sysirq": for MT6580
-       "mediatek,mt6577-sysirq": for MT6577
-       "mediatek,mt2712-sysirq", "mediatek,mt6577-sysirq": for MT2712
-       "mediatek,mt2701-sysirq", "mediatek,mt6577-sysirq": for MT2701
-       "mediatek,mt8365-sysirq", "mediatek,mt6577-sysirq": for MT8365
-- interrupt-controller : Identifies the node as an interrupt controller
-- #interrupt-cells : Use the same format as specified by GIC in arm,gic.txt.
-- reg: Physical base address of the intpol registers and length of memory
-  mapped region. Could be multiple bases here. Ex: mt6797 needs 2 reg, others
-  need 1.
-
-Example:
-       sysirq: intpol-controller@10200620 {
-               compatible = "mediatek,mt6797-sysirq",
-                            "mediatek,mt6577-sysirq";
-               interrupt-controller;
-               #interrupt-cells = <3>;
-               interrupt-parent = <&gic>;
-               reg = <0 0x10220620 0 0x20>,
-                     <0 0x10220690 0 0x10>;
-       };
index d3b5aec0a3f74d83389ac7b94ad94a1dc9dabf09..daef4ee06f4ed6fadbbd04d618b385ac99080626 100644 (file)
@@ -44,7 +44,7 @@ properties:
     maxItems: 1
 
   interrupts:
-    minItems: 41
+    minItems: 45
     items:
       - description: NMI interrupt
       - description: IRQ0 interrupt
@@ -88,9 +88,15 @@ properties:
       - description: GPIO interrupt, TINT30
       - description: GPIO interrupt, TINT31
       - description: Bus error interrupt
+      - description: ECCRAM0 or combined ECCRAM0/1 1bit error interrupt
+      - description: ECCRAM0 or combined ECCRAM0/1 2bit error interrupt
+      - description: ECCRAM0 or combined ECCRAM0/1 error overflow interrupt
+      - description: ECCRAM1 1bit error interrupt
+      - description: ECCRAM1 2bit error interrupt
+      - description: ECCRAM1 error overflow interrupt
 
   interrupt-names:
-    minItems: 41
+    minItems: 45
     items:
       - const: nmi
       - const: irq0
@@ -134,6 +140,12 @@ properties:
       - const: tint30
       - const: tint31
       - const: bus-err
+      - const: ec7tie1-0
+      - const: ec7tie2-0
+      - const: ec7tiovf-0
+      - const: ec7tie1-1
+      - const: ec7tie2-1
+      - const: ec7tiovf-1
 
   clocks:
     maxItems: 2
@@ -156,6 +168,7 @@ required:
   - interrupt-controller
   - reg
   - interrupts
+  - interrupt-names
   - clocks
   - clock-names
   - power-domains
@@ -169,16 +182,19 @@ allOf:
         compatible:
           contains:
             enum:
-              - renesas,r9a07g043u-irqc
               - renesas,r9a08g045-irqc
     then:
       properties:
         interrupts:
-          minItems: 42
+          maxItems: 45
         interrupt-names:
-          minItems: 42
-      required:
-        - interrupt-names
+          maxItems: 45
+    else:
+      properties:
+        interrupts:
+          minItems: 48
+        interrupt-names:
+          minItems: 48
 
 unevaluatedProperties: false
 
@@ -233,7 +249,14 @@ examples:
                      <GIC_SPI 472 IRQ_TYPE_LEVEL_HIGH>,
                      <GIC_SPI 473 IRQ_TYPE_LEVEL_HIGH>,
                      <GIC_SPI 474 IRQ_TYPE_LEVEL_HIGH>,
-                     <GIC_SPI 475 IRQ_TYPE_LEVEL_HIGH>;
+                     <GIC_SPI 475 IRQ_TYPE_LEVEL_HIGH>,
+                     <GIC_SPI 25 IRQ_TYPE_EDGE_RISING>,
+                     <GIC_SPI 34 IRQ_TYPE_EDGE_RISING>,
+                     <GIC_SPI 35 IRQ_TYPE_EDGE_RISING>,
+                     <GIC_SPI 36 IRQ_TYPE_EDGE_RISING>,
+                     <GIC_SPI 37 IRQ_TYPE_EDGE_RISING>,
+                     <GIC_SPI 38 IRQ_TYPE_EDGE_RISING>,
+                     <GIC_SPI 39 IRQ_TYPE_EDGE_RISING>;
         interrupt-names = "nmi",
                           "irq0", "irq1", "irq2", "irq3",
                           "irq4", "irq5", "irq6", "irq7",
@@ -244,7 +267,10 @@ examples:
                           "tint16", "tint17", "tint18", "tint19",
                           "tint20", "tint21", "tint22", "tint23",
                           "tint24", "tint25", "tint26", "tint27",
-                          "tint28", "tint29", "tint30", "tint31";
+                          "tint28", "tint29", "tint30", "tint31",
+                          "bus-err", "ec7tie1-0", "ec7tie2-0",
+                          "ec7tiovf-0", "ec7tie1-1", "ec7tie2-1",
+                          "ec7tiovf-1";
         clocks = <&cpg CPG_MOD R9A07G044_IA55_CLK>,
                  <&cpg CPG_MOD R9A07G044_IA55_PCLK>;
         clock-names = "clk", "pclk";
index e37317f810722ddf7662f310be7939e82161b749..c9673391afdbde00b3e53a74e6e5fb667d7aa01d 100644 (file)
@@ -36,7 +36,7 @@ properties:
 
     properties:
       port@0:
-        $ref: /schemas/graph.yaml#/$defs/port-base
+        $ref: /schemas/graph.yaml#/properties/port
         description: Analog input port
 
         properties:
index afcaa427d48b09ea331c60b00602eaeff45b74ef..6be00aca4181767379c104786e98cae7be62b38b 100644 (file)
@@ -16,6 +16,7 @@ description: |
 properties:
   compatible:
     enum:
+      - fsl,imx8mp-isp
       - rockchip,px30-cif-isp
       - rockchip,rk3399-cif-isp
 
@@ -36,9 +37,9 @@ properties:
     minItems: 3
     items:
       # isp0 and isp1
-      - description: ISP clock
-      - description: ISP AXI clock
-      - description: ISP AHB clock
+      - description: ISP clock (for imx8mp, clk)
+      - description: ISP AXI clock (for imx8mp, m_hclk)
+      - description: ISP AHB clock (for imx8mp, hclk)
       # only for isp1
       - description: ISP Pixel clock
 
@@ -52,6 +53,13 @@ properties:
       # only for isp1
       - const: pclk
 
+  fsl,blk-ctrl:
+    $ref: /schemas/types.yaml#/definitions/phandle-array
+    maxItems: 1
+    description:
+      A phandle to the media block control for the ISP, followed by a cell
+      containing the index of the gasket.
+
   iommus:
     maxItems: 1
 
@@ -113,9 +121,6 @@ required:
   - interrupts
   - clocks
   - clock-names
-  - iommus
-  - phys
-  - phy-names
   - power-domains
   - ports
 
@@ -143,6 +148,26 @@ allOf:
       required:
         - interrupt-names
 
+  - if:
+      properties:
+        compatible:
+          contains:
+            const: fsl,imx8mp-isp
+    then:
+      properties:
+        iommus: false
+        phys: false
+        phy-names: false
+      required:
+        - fsl,blk-ctrl
+    else:
+      properties:
+        fsl,blk-ctrl: false
+      required:
+        - iommus
+        - phys
+        - phy-names
+
 additionalProperties: false
 
 examples:
diff --git a/Documentation/devicetree/bindings/media/st,stm32mp25-video-codec.yaml b/Documentation/devicetree/bindings/media/st,stm32mp25-video-codec.yaml
new file mode 100644 (file)
index 0000000..b8611bc
--- /dev/null
@@ -0,0 +1,49 @@
+# SPDX-License-Identifier: (GPL-2.0 OR BSD-2-Clause)
+%YAML 1.2
+---
+$id: http://devicetree.org/schemas/media/st,stm32mp25-video-codec.yaml#
+$schema: http://devicetree.org/meta-schemas/core.yaml#
+
+title: STMicroelectronics STM32MP25 VDEC video decoder & VENC video encoder
+
+maintainers:
+  - Hugues Fruchet <hugues.fruchet@foss.st.com>
+
+description:
+  The STMicroelectronics STM32MP25 SOCs embeds a VDEC video hardware
+  decoder peripheral based on Verisilicon VC8000NanoD IP (former Hantro G1)
+  and a VENC video hardware encoder peripheral based on Verisilicon
+  VC8000NanoE IP (former Hantro H1).
+
+properties:
+  compatible:
+    enum:
+      - st,stm32mp25-vdec
+      - st,stm32mp25-venc
+
+  reg:
+    maxItems: 1
+
+  interrupts:
+    maxItems: 1
+
+  clocks:
+    maxItems: 1
+
+required:
+  - compatible
+  - reg
+  - interrupts
+  - clocks
+
+additionalProperties: false
+
+examples:
+  - |
+    #include <dt-bindings/interrupt-controller/arm-gic.h>
+    video-codec@580d0000 {
+        compatible = "st,stm32mp25-vdec";
+        reg = <0x580d0000 0x3c8>;
+        interrupts = <GIC_SPI 117 IRQ_TYPE_LEVEL_HIGH>;
+        clocks = <&ck_icn_p_vdec>;
+    };
index cf382dea3922c915df71d5b8fb8cfa2bcdc78d91..a85137add66894cad5a36d27afaa3e134c7b618d 100644 (file)
@@ -23,22 +23,23 @@ properties:
       - brcm,bmips4380
       - brcm,bmips5000
       - brcm,bmips5200
-      - ingenic,xburst-mxu1.0
+      - img,i6500
       - ingenic,xburst-fpu1.0-mxu1.1
       - ingenic,xburst-fpu2.0-mxu2.0
+      - ingenic,xburst-mxu1.0
       - ingenic,xburst2-fpu2.1-mxu2.1-smt
       - loongson,gs264
       - mips,m14Kc
-      - mips,mips4Kc
-      - mips,mips4KEc
-      - mips,mips24Kc
+      - mips,mips1004Kc
       - mips,mips24KEc
+      - mips,mips24Kc
+      - mips,mips4KEc
+      - mips,mips4Kc
       - mips,mips74Kc
-      - mips,mips1004Kc
       - mti,interaptiv
-      - mti,mips24KEc
       - mti,mips14KEc
       - mti,mips14Kc
+      - mti,mips24KEc
 
   reg:
     maxItems: 1
diff --git a/Documentation/devicetree/bindings/mips/mobileye.yaml b/Documentation/devicetree/bindings/mips/mobileye.yaml
new file mode 100644 (file)
index 0000000..831975f
--- /dev/null
@@ -0,0 +1,32 @@
+# SPDX-License-Identifier: GPL-2.0-only OR BSD-2-Clause
+# Copyright 2023 Mobileye Vision Technologies Ltd.
+%YAML 1.2
+---
+$id: http://devicetree.org/schemas/mips/mobileye.yaml#
+$schema: http://devicetree.org/meta-schemas/core.yaml#
+
+title: Mobileye SoC series
+
+maintainers:
+  - Vladimir Kondratiev <vladimir.kondratiev@intel.com>
+  - Gregory CLEMENT <gregory.clement@bootlin.com>
+  - Théo Lebrun <theo.lebrun@bootlin.com>
+
+description:
+  Boards with a Mobileye SoC shall have the following properties.
+
+properties:
+  $nodename:
+    const: '/'
+
+  compatible:
+    oneOf:
+      - description: Boards with Mobileye EyeQ5 SoC
+        items:
+          - enum:
+              - mobileye,eyeq5-epm5
+          - const: mobileye,eyeq5
+
+additionalProperties: true
+
+...
index 2dc3e245fa5d342f4aa8ca550b29e979732dbb05..c27a8f33d8d76913084be32ed4886d678fca7aeb 100644 (file)
@@ -77,6 +77,8 @@ patternProperties:
       reg:
         maxItems: 1
 
+      dma-coherent: true
+
       iommus:
         minItems: 1
         maxItems: 3
diff --git a/Documentation/devicetree/bindings/misc/xlnx,sd-fec.txt b/Documentation/devicetree/bindings/misc/xlnx,sd-fec.txt
deleted file mode 100644 (file)
index e328963..0000000
+++ /dev/null
@@ -1,58 +0,0 @@
-* Xilinx SDFEC(16nm) IP *
-
-The Soft Decision Forward Error Correction (SDFEC) Engine is a Hard IP block
-which provides high-throughput LDPC and Turbo Code implementations.
-The LDPC decode & encode functionality is capable of covering a range of
-customer specified Quasi-cyclic (QC) codes. The Turbo decode functionality
-principally covers codes used by LTE. The FEC Engine offers significant
-power and area savings versus implementations done in the FPGA fabric.
-
-
-Required properties:
-- compatible: Must be "xlnx,sd-fec-1.1"
-- clock-names : List of input clock names from the following:
-    - "core_clk", Main processing clock for processing core (required)
-    - "s_axi_aclk", AXI4-Lite memory-mapped slave interface clock (required)
-    - "s_axis_din_aclk", DIN AXI4-Stream Slave interface clock (optional)
-    - "s_axis_din_words-aclk", DIN_WORDS AXI4-Stream Slave interface clock (optional)
-    - "s_axis_ctrl_aclk",  Control input AXI4-Stream Slave interface clock (optional)
-    - "m_axis_dout_aclk", DOUT AXI4-Stream Master interface clock (optional)
-    - "m_axis_dout_words_aclk", DOUT_WORDS AXI4-Stream Master interface clock (optional)
-    - "m_axis_status_aclk", Status output AXI4-Stream Master interface clock (optional)
-- clocks : Clock phandles (see clock_bindings.txt for details).
-- reg: Should contain Xilinx SDFEC 16nm Hardened IP block registers
-  location and length.
-- xlnx,sdfec-code : Should contain "ldpc" or "turbo" to describe the codes
-  being used.
-- xlnx,sdfec-din-words : A value 0 indicates that the DIN_WORDS interface is
-  driven with a fixed value and is not present on the device, a value of 1
-  configures the DIN_WORDS to be block based, while a value of 2 configures the
-  DIN_WORDS input to be supplied for each AXI transaction.
-- xlnx,sdfec-din-width : Configures the DIN AXI stream where a value of 1
-  configures a width of "1x128b", 2 a width of "2x128b" and 4 configures a width
-  of "4x128b".
-- xlnx,sdfec-dout-words : A value 0 indicates that the DOUT_WORDS interface is
-  driven with a fixed value and is not present on the device, a value of 1
-  configures the DOUT_WORDS to be block based, while a value of 2 configures the
-  DOUT_WORDS input to be supplied for each AXI transaction.
-- xlnx,sdfec-dout-width : Configures the DOUT AXI stream where a value of 1
-  configures a width of "1x128b", 2 a width of "2x128b" and 4 configures a width
-  of "4x128b".
-Optional properties:
-- interrupts: should contain SDFEC interrupt number
-
-Example
----------------------------------------
-       sd_fec_0: sd-fec@a0040000 {
-               compatible = "xlnx,sd-fec-1.1";
-               clock-names = "core_clk","s_axi_aclk","s_axis_ctrl_aclk","s_axis_din_aclk","m_axis_status_aclk","m_axis_dout_aclk";
-               clocks = <&misc_clk_2>,<&misc_clk_0>,<&misc_clk_1>,<&misc_clk_1>,<&misc_clk_1>, <&misc_clk_1>;
-               reg = <0x0 0xa0040000 0x0 0x40000>;
-               interrupt-parent = <&axi_intc>;
-               interrupts = <1 0>;
-               xlnx,sdfec-code = "ldpc";
-               xlnx,sdfec-din-words = <0>;
-               xlnx,sdfec-din-width = <2>;
-               xlnx,sdfec-dout-words = <0>;
-               xlnx,sdfec-dout-width = <1>;
-       };
diff --git a/Documentation/devicetree/bindings/misc/xlnx,sd-fec.yaml b/Documentation/devicetree/bindings/misc/xlnx,sd-fec.yaml
new file mode 100644 (file)
index 0000000..9bd2103
--- /dev/null
@@ -0,0 +1,140 @@
+# SPDX-License-Identifier: (GPL-2.0 OR BSD-2-Clause)
+%YAML 1.2
+---
+$id: http://devicetree.org/schemas/misc/xlnx,sd-fec.yaml#
+$schema: http://devicetree.org/meta-schemas/core.yaml#
+
+title: Xilinx SDFEC(16nm) IP
+
+maintainers:
+  - Cvetic, Dragan <dragan.cvetic@amd.com>
+  - Erim, Salih <salih.erim@amd.com>
+
+description:
+  The Soft Decision Forward Error Correction (SDFEC) Engine is a Hard IP block
+  which provides high-throughput LDPC and Turbo Code implementations.
+  The LDPC decode & encode functionality is capable of covering a range of
+  customer specified Quasi-cyclic (QC) codes. The Turbo decode functionality
+  principally covers codes used by LTE. The FEC Engine offers significant
+  power and area savings versus implementations done in the FPGA fabric.
+
+properties:
+  compatible:
+    const: xlnx,sd-fec-1.1
+
+  reg:
+    maxItems: 1
+
+  clocks:
+    minItems: 2
+    maxItems: 8
+    additionalItems: true
+    items:
+      - description: Main processing clock for processing core
+      - description: AXI4-Lite memory-mapped slave interface clock
+      - description: Control input AXI4-Stream Slave interface clock
+      - description: DIN AXI4-Stream Slave interface clock
+      - description: Status output AXI4-Stream Master interface clock
+      - description: DOUT AXI4-Stream Master interface clock
+      - description: DIN_WORDS AXI4-Stream Slave interface clock
+      - description: DOUT_WORDS AXI4-Stream Master interface clock
+
+  clock-names:
+    allOf:
+      - minItems: 2
+        maxItems: 8
+        additionalItems: true
+        items:
+          - const: core_clk
+          - const: s_axi_aclk
+      - items:
+          enum:
+            - core_clk
+            - s_axi_aclk
+            - s_axis_ctrl_aclk
+            - s_axis_din_aclk
+            - m_axis_status_aclk
+            - m_axis_dout_aclk
+            - s_axis_din_words_aclk
+            - m_axis_dout_words_aclk
+
+  interrupts:
+    maxItems: 1
+
+  xlnx,sdfec-code:
+    description:
+      The SD-FEC integrated block supports Low Density Parity Check (LDPC)
+      decoding and encoding and Turbo code decoding. The LDPC codes used are
+      highly configurable, and the specific code used can be specified on
+      a codeword-by-codeword basis. The Turbo code decoding is required by LTE
+      standard.
+    $ref: /schemas/types.yaml#/definitions/string
+    items:
+      enum: [ ldpc, turbo ]
+
+  xlnx,sdfec-din-width:
+    description:
+      Configures the DIN AXI stream where a value of 1
+      configures a width of "1x128b", 2 a width of "2x128b" and 4 configures a width
+      of "4x128b".
+    $ref: /schemas/types.yaml#/definitions/uint32
+    enum: [ 1, 2, 4 ]
+
+  xlnx,sdfec-din-words:
+    description:
+      A value 0 indicates that the DIN_WORDS interface is
+      driven with a fixed value and is not present on the device, a value of 1
+      configures the DIN_WORDS to be block based, while a value of 2 configures the
+      DIN_WORDS input to be supplied for each AXI transaction.
+    $ref: /schemas/types.yaml#/definitions/uint32
+    enum: [ 0, 1, 2 ]
+
+  xlnx,sdfec-dout-width:
+    description:
+      Configures the DOUT AXI stream where a value of 1 configures a width of "1x128b",
+      2 a width of "2x128b" and 4 configures a width of "4x128b".
+    $ref: /schemas/types.yaml#/definitions/uint32
+    enum: [ 1, 2, 4 ]
+
+  xlnx,sdfec-dout-words:
+    description:
+      A value 0 indicates that the DOUT_WORDS interface is
+      driven with a fixed value and is not present on the device, a value of 1
+      configures the DOUT_WORDS to be block based, while a value of 2 configures the
+      DOUT_WORDS input to be supplied for each AXI transaction.
+    $ref: /schemas/types.yaml#/definitions/uint32
+    enum: [ 0, 1, 2 ]
+
+required:
+  - compatible
+  - reg
+  - clocks
+  - clock-names
+  - xlnx,sdfec-code
+  - xlnx,sdfec-din-width
+  - xlnx,sdfec-din-words
+  - xlnx,sdfec-dout-width
+  - xlnx,sdfec-dout-words
+
+additionalProperties: false
+
+examples:
+  - |
+    #include <dt-bindings/interrupt-controller/irq.h>
+
+    sd-fec@a0040000 {
+        compatible = "xlnx,sd-fec-1.1";
+        reg = <0xa0040000 0x40000>;
+        clocks = <&misc_clk_2>, <&misc_clk_0>, <&misc_clk_1>, <&misc_clk_1>,
+                 <&misc_clk_1>, <&misc_clk_1>;
+        clock-names = "core_clk", "s_axi_aclk", "s_axis_ctrl_aclk",
+                      "s_axis_din_aclk", "m_axis_status_aclk",
+                      "m_axis_dout_aclk";
+        interrupts = <1 IRQ_TYPE_LEVEL_HIGH>;
+        xlnx,sdfec-code = "ldpc";
+        xlnx,sdfec-din-width = <2>;
+        xlnx,sdfec-din-words = <0>;
+        xlnx,sdfec-dout-width = <1>;
+        xlnx,sdfec-dout-words = <0>;
+    };
+
index 50645828ac20aa1e6a9c023f1d4e81c570363616..4598930851d94d77d26a828a04aed29ee7327c5a 100644 (file)
@@ -56,6 +56,7 @@ Required properties:
        "atmel,sama5d4-pmecc"
        "atmel,sama5d2-pmecc"
        "microchip,sam9x60-pmecc"
+       "microchip,sam9x7-pmecc", "atmel,at91sam9g45-pmecc"
 - reg: should contain 2 register ranges. The first one is pointing to the PMECC
        block, and the second one to the PMECC_ERRLOC block.
 
index f57e96374e67184e7013f88d3121fef9a72c689f..064e840aeaa1159d17c4e5bf61a2f61c64e702eb 100644 (file)
@@ -9,6 +9,7 @@ title: Broadcom STB NAND Controller
 maintainers:
   - Brian Norris <computersforpeace@gmail.com>
   - Kamal Dasu <kdasu.kdev@gmail.com>
+  - William Zhang <william.zhang@broadcom.com>
 
 description: |
   The Broadcom Set-Top Box NAND controller supports low-level access to raw NAND
@@ -18,9 +19,10 @@ description: |
   supports basic PROGRAM and READ functions, among other features.
 
   This controller was originally designed for STB SoCs (BCM7xxx) but is now
-  available on a variety of Broadcom SoCs, including some BCM3xxx, BCM63xx, and
-  iProc/Cygnus. Its history includes several similar (but not fully register
-  compatible) versions.
+  available on a variety of Broadcom SoCs, including some BCM3xxx, MIPS based
+  Broadband SoC (BCM63xx), ARM based Broadband SoC (BCMBCA) and iProc/Cygnus.
+  Its history includes several similar (but not fully register compatible)
+  versions.
 
   -- Additional SoC-specific NAND controller properties --
 
@@ -53,7 +55,7 @@ properties:
               - brcm,brcmnand-v7.2
               - brcm,brcmnand-v7.3
           - const: brcm,brcmnand
-      - description: BCM63138 SoC-specific NAND controller
+      - description: BCMBCA SoC-specific NAND controller
         items:
           - const: brcm,nand-bcm63138
           - enum:
@@ -111,6 +113,13 @@ properties:
       earlier versions of this core that include WP
     type: boolean
 
+  brcm,wp-not-connected:
+    description:
+      Use this property when WP pin is not physically wired to the NAND chip.
+      Write protection feature cannot be used. By default, controller assumes
+      the pin is connected and feature is used.
+    $ref: /schemas/types.yaml#/definitions/flag
+
 patternProperties:
   "^nand@[a-f0-9]$":
     type: object
@@ -137,6 +146,15 @@ patternProperties:
           layout.
         $ref: /schemas/types.yaml#/definitions/uint32
 
+      brcm,nand-ecc-use-strap:
+        description:
+          This property requires the host system to get the ECC related
+          settings from the SoC NAND boot strap configuration instead of
+          the generic NAND ECC settings. This is a common hardware design
+          on BCMBCA based boards. This strap ECC option and generic NAND
+          ECC option can not be specified at the same time.
+        $ref: /schemas/types.yaml#/definitions/flag
+
     unevaluatedProperties: false
 
 allOf:
@@ -177,6 +195,8 @@ allOf:
             - const: iproc-idm
             - const: iproc-ext
   - if:
+      required:
+        - interrupts
       properties:
         interrupts:
           minItems: 2
@@ -184,12 +204,26 @@ allOf:
       required:
         - interrupt-names
 
+  - if:
+      patternProperties:
+        "^nand@[a-f0-9]$":
+          required:
+            - brcm,nand-ecc-use-strap
+    then:
+      patternProperties:
+        "^nand@[a-f0-9]$":
+          properties:
+            nand-ecc-strength: false
+            nand-ecc-step-size: false
+            nand-ecc-maximize: false
+            nand-ecc-algo: false
+            brcm,nand-oob-sector-size: false
+
 unevaluatedProperties: false
 
 required:
   - reg
   - reg-names
-  - interrupts
 
 examples:
   - |
index edebeae1f5b31eea4871881a532e89678f781088..eb8e2ff4dbd2901b3c396f2e66c1f590a32dcf67 100644 (file)
@@ -68,7 +68,7 @@ Deprecated properties:
                                false.
 
 Nand device bindings may contain additional sub-nodes describing partitions of
-the address space. See partition.txt for more detail. The NAND Flash timing
+the address space. See mtd.yaml for more detail. The NAND Flash timing
 values must be programmed in the chip select’s node of AEMIF
 memory-controller (see Documentation/devicetree/bindings/memory-controllers/
 davinci-aemif.txt).
index 427f46dc60add1340418951cd365ab04d4fa9a1d..51518399d7377909738a821fdc7c3e1016355a67 100644 (file)
@@ -15,7 +15,7 @@ The DMA fields are not used yet in the driver but are listed here for
 completing the bindings.
 
 The device tree may optionally contain sub-nodes describing partitions of the
-address space. See partition.txt for more detail.
+address space. See mtd.yaml for more detail.
 
 Example:
 
index 25f07c1f9e44d9a5701c5ae3050fb9fb88210b6e..530c017e014e29f2e1f6883ccea245826fd3fbf9 100644 (file)
@@ -22,7 +22,7 @@ Deprecated properties:
        (R/B# pins not connected).
 
 Each flash chip described may optionally contain additional sub-nodes
-describing partitions of the address space. See partition.txt for more
+describing partitions of the address space. See mtd.yaml for more
 detail.
 
 Examples:
index 486a17d533d7a33cce0ac8814a51979ebc7bc0ca..0edf55d47ea8ecd5b7d637d0a2b4705e04cf96ae 100644 (file)
@@ -26,7 +26,7 @@ Optional properties:
   read to ensure that the GPIO accesses have completed.
 
 The device tree may optionally contain sub-nodes describing partitions of the
-address space. See partition.txt for more detail.
+address space. See mtd.yaml for more detail.
 
 Examples:
 
index ba086c34626d101c2df94596de78f8d24add7cac..021c0da0b072ff6c4d71eb58a7319103d59597a8 100644 (file)
@@ -12,7 +12,7 @@ maintainers:
 description: |
   The GPMI nand controller provides an interface to control the NAND
   flash chips. The device tree may optionally contain sub-nodes
-  describing partitions of the address space. See partition.txt for
+  describing partitions of the address space. See mtd.yaml for
   more detail.
 
 properties:
index 8963983ae7cbac37585bef3301952448270897e1..362203e7d50e7584f5833788d832b929b687dfc8 100644 (file)
@@ -22,7 +22,7 @@ The following ECC strength and step size are currently supported:
  - nand-ecc-strength = <16>, nand-ecc-step-size = <1024>
 
 Flash chip may optionally contain additional sub-nodes describing partitions of
-the address space. See partition.txt for more detail.
+the address space. See mtd.yaml for more detail.
 
 Example:
 
index 58f0cea160ef54295b3a942d8cf7c7c46c04ec29..6e3afb42926ea6d22f0dd2ebf39752e115502615 100644 (file)
@@ -52,6 +52,9 @@ properties:
     minItems: 1
     maxItems: 2
 
+  interrupts:
+    maxItems: 1
+
   m25p,fast-read:
     type: boolean
     description:
index f322290ee5165f5de8b4168ceb9731cf7ac2a317..ee442ecb11cd9cbc9562777f7cb3c3225087c394 100644 (file)
@@ -10,6 +10,8 @@ maintainers:
   - Miquel Raynal <miquel.raynal@bootlin.com>
   - Richard Weinberger <richard@nod.at>
 
+select: false
+
 properties:
   $nodename:
     pattern: "^(flash|.*sram|nand)(@.*)?$"
index e737e5beb7bf48fad54cfaebc5aad780160ea74a..4a00ec2b2540c522874cf91b4f36f12edbb91fc7 100644 (file)
@@ -39,7 +39,7 @@ Optional children node properties:
 - wp-gpios: GPIO specifier for the write protect pin.
 
 Optional child node of NAND chip nodes:
-Partitions: see partition.txt
+Partitions: see mtd.yaml
 
   Example:
        nand-controller@70008000 {
index 2d6ab660e6032a3c4147d2e22ab2f33e417d1155..b9997b1f13ac70693c6dc5e627812f810d493e7e 100644 (file)
@@ -13,7 +13,7 @@ Optional properties:
                registers in usecs
 
 The device tree may optionally contain sub-nodes describing partitions of the
-address space. See partition.txt for more detail.
+address space. See mtd.yaml for more detail.
 
 Example:
 
index 09815c40fc8aeeed16a5bb5f8936f8642fd6951e..6354553506602d452acd02dbac48c854b8b59cbe 100644 (file)
@@ -19,7 +19,7 @@ Optional child properties:
 
 Each child device node may optionally contain a 'partitions' sub-node,
 which further contains sub-nodes describing the flash partition mapping.
-See partition.txt for more detail.
+See mtd.yaml for more detail.
 
 Example:
 
index e72cb5bacaf0a983033b734a4e84a5b3a226b58f..b8ef9ba88e92a5f91193f9d1286c882b10d2a42c 100644 (file)
@@ -14,10 +14,11 @@ properties:
     enum:
       - st,stm32mp15-fmc2
       - st,stm32mp1-fmc2-nfc
+      - st,stm32mp25-fmc2-nfc
 
   reg:
     minItems: 6
-    maxItems: 7
+    maxItems: 12
 
   interrupts:
     maxItems: 1
@@ -92,6 +93,28 @@ allOf:
             - description: Chip select 1 command
             - description: Chip select 1 address space
 
+  - if:
+      properties:
+        compatible:
+          contains:
+            const: st,stm32mp25-fmc2-nfc
+    then:
+      properties:
+        reg:
+          items:
+            - description: Chip select 0 data
+            - description: Chip select 0 command
+            - description: Chip select 0 address space
+            - description: Chip select 1 data
+            - description: Chip select 1 command
+            - description: Chip select 1 address space
+            - description: Chip select 2 data
+            - description: Chip select 2 command
+            - description: Chip select 2 address space
+            - description: Chip select 3 data
+            - description: Chip select 3 command
+            - description: Chip select 3 address space
+
 required:
   - compatible
   - reg
index 8b943082a2416e6b168534cff7f42b18740f8288..571ad9e13ecfb61b6ee78da8cec78d2bb08f0fe3 100644 (file)
@@ -74,7 +74,7 @@ select:
 
 properties:
   $nodename:
-    pattern: '^mux-controller(@.*|-[0-9a-f]+)?$'
+    pattern: '^mux-controller(@.*|-([0-9]|[1-9][0-9]+))?$'
 
   '#mux-control-cells':
     enum: [ 0, 1 ]
index eba2f3026ab0ac54b096598e5218ecc6f87eb873..528ef3572b621e75ee6cadfe7e8f82652f54476d 100644 (file)
@@ -7,8 +7,8 @@ $schema: http://devicetree.org/meta-schemas/core.yaml#
 title: Qualcomm Bluetooth Chips
 
 maintainers:
-  - Balakrishna Godavarthi <bgodavar@codeaurora.org>
-  - Rocky Liao <rjliao@codeaurora.org>
+  - Balakrishna Godavarthi <quic_bgodavar@quicinc.com>
+  - Rocky Liao <quic_rjliao@quicinc.com>
 
 description:
   This binding describes Qualcomm UART-attached bluetooth chips.
index 4162469c3c08a5bedb69856407bbd02cee57679c..f197d9b516bb2a5448b7eb4af3671604244f97c0 100644 (file)
@@ -38,6 +38,9 @@ properties:
               - fsl,imx6ul-flexcan
               - fsl,imx6sx-flexcan
           - const: fsl,imx6q-flexcan
+      - items:
+          - const: fsl,imx95-flexcan
+          - const: fsl,imx93-flexcan
       - items:
           - enum:
               - fsl,ls1028ar1-flexcan
index 45aa3de7cf014afe5297448d0f0309afce91c7e3..01e4d4a54df6a065049413babf7313a140ee8a4e 100644 (file)
@@ -24,7 +24,9 @@ properties:
     maxItems: 1
 
   clocks:
-    maxItems: 1
+    items:
+      - description: AHB peripheral clock
+      - description: CAN bus clock
 
 required:
   - compatible
@@ -39,7 +41,7 @@ examples:
     can@2010c000 {
         compatible = "microchip,mpfs-can";
         reg = <0x2010c000 0x1000>;
-        clocks = <&clkcfg 17>;
+        clocks = <&clkcfg 17>, <&clkcfg 37>;
         interrupt-parent = <&plic>;
         interrupts = <56>;
     };
diff --git a/Documentation/devicetree/bindings/phy/mediatek,mt8365-csi-rx.yaml b/Documentation/devicetree/bindings/phy/mediatek,mt8365-csi-rx.yaml
new file mode 100644 (file)
index 0000000..2127a57
--- /dev/null
@@ -0,0 +1,79 @@
+# SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause)
+# Copyright (c) 2023 MediaTek, BayLibre
+%YAML 1.2
+---
+$id: http://devicetree.org/schemas/phy/mediatek,mt8365-csi-rx.yaml#
+$schema: http://devicetree.org/meta-schemas/core.yaml#
+
+title: Mediatek Sensor Interface MIPI CSI CD-PHY
+
+maintainers:
+  - Julien Stephan <jstephan@baylibre.com>
+  - Andy Hsieh <andy.hsieh@mediatek.com>
+
+description:
+  The SENINF CD-PHY is a set of CD-PHY connected to the SENINF CSI-2
+  receivers. The number of PHYs depends on the SoC model.
+  Depending on the SoC model, each PHYs can be either CD-PHY or D-PHY only
+  capable.
+
+properties:
+  compatible:
+    enum:
+      - mediatek,mt8365-csi-rx
+
+  reg:
+    maxItems: 1
+
+  num-lanes:
+    enum: [2, 3, 4]
+
+  '#phy-cells':
+    enum: [0, 1]
+    description: |
+      If the PHY doesn't support mode selection then #phy-cells must be 0 and
+      PHY mode is described using phy-type property.
+      If the PHY supports mode selection, then #phy-cells must be 1 and mode
+      is set in the PHY cells. Supported modes are:
+        - PHY_TYPE_DPHY
+        - PHY_TYPE_CPHY
+      See include/dt-bindings/phy/phy.h for constants.
+
+  phy-type:
+    description:
+      If the PHY doesn't support mode selection then this set the operating mode.
+      See include/dt-bindings/phy/phy.h for constants.
+    const: 10
+    $ref: /schemas/types.yaml#/definitions/uint32
+
+required:
+  - compatible
+  - reg
+  - num-lanes
+  - '#phy-cells'
+
+additionalProperties: false
+
+examples:
+  - |
+    #include <dt-bindings/phy/phy.h>
+    soc {
+      #address-cells = <2>;
+      #size-cells = <2>;
+
+      csi0_rx: phy@11c10000 {
+        compatible = "mediatek,mt8365-csi-rx";
+        reg = <0 0x11c10000 0 0x2000>;
+        num-lanes = <2>;
+        #phy-cells = <1>;
+      };
+
+      csi1_rx: phy@11c12000 {
+        compatible = "mediatek,mt8365-csi-rx";
+        reg = <0 0x11c12000 0 0x2000>;
+        phy-type = <PHY_TYPE_DPHY>;
+        num-lanes = <2>;
+        #phy-cells = <0>;
+      };
+    };
+...
index dfb31314face761bd300a12c0066901fdbca6035..15dc8efe6ffe74f0fdb258db54481cc4d75d5091 100644 (file)
@@ -20,6 +20,7 @@ properties:
   compatible:
     enum:
       - cdns,torrent-phy
+      - ti,j7200-serdes-10g
       - ti,j721e-serdes-10g
 
   '#address-cells':
@@ -35,14 +36,18 @@ properties:
     minItems: 1
     maxItems: 2
     description:
-      PHY reference clock for 1 item. Must contain an entry in clock-names.
-      Optional Parent to enable output reference clock.
+      PHY input reference clocks - refclk (for PLL0) & pll1_refclk (for PLL1).
+      pll1_refclk is optional and used for multi-protocol configurations requiring
+      separate reference clock for each protocol.
+      Same refclk is used for both PLL0 and PLL1 if no separate pll1_refclk is used.
+      Optional parent clock (phy_en_refclk) to enable a reference clock output feature
+      on some platforms to output either derived or received reference clock.
 
   clock-names:
     minItems: 1
     items:
       - const: refclk
-      - const: phy_en_refclk
+      - enum: [ pll1_refclk, phy_en_refclk ]
 
   reg:
     minItems: 1
diff --git a/Documentation/devicetree/bindings/phy/qcom,msm8998-qmp-usb3-phy.yaml b/Documentation/devicetree/bindings/phy/qcom,msm8998-qmp-usb3-phy.yaml
new file mode 100644 (file)
index 0000000..f1f4e4f
--- /dev/null
@@ -0,0 +1,184 @@
+# SPDX-License-Identifier: (GPL-2.0 OR BSD-2-Clause)
+%YAML 1.2
+---
+$id: http://devicetree.org/schemas/phy/qcom,msm8998-qmp-usb3-phy.yaml#
+$schema: http://devicetree.org/meta-schemas/core.yaml#
+
+title: Qualcomm QMP PHY controller (USB, MSM8998)
+
+maintainers:
+  - Vinod Koul <vkoul@kernel.org>
+
+description:
+  The QMP PHY controller supports physical layer functionality for USB-C on
+  several Qualcomm chipsets.
+
+properties:
+  compatible:
+    enum:
+      - qcom,msm8998-qmp-usb3-phy
+      - qcom,qcm2290-qmp-usb3-phy
+      - qcom,sdm660-qmp-usb3-phy
+      - qcom,sm6115-qmp-usb3-phy
+
+  reg:
+    maxItems: 1
+
+  clocks:
+    maxItems: 4
+
+  clock-names:
+    maxItems: 4
+
+  resets:
+    maxItems: 2
+
+  reset-names:
+    items:
+      - const: phy
+      - const: phy_phy
+
+  vdda-phy-supply: true
+
+  vdda-pll-supply: true
+
+  "#clock-cells":
+    const: 0
+
+  clock-output-names:
+    maxItems: 1
+
+  "#phy-cells":
+    const: 0
+
+  orientation-switch:
+    description:
+      Flag the PHY as possible handler of USB Type-C orientation switching
+    type: boolean
+
+  qcom,tcsr-reg:
+    $ref: /schemas/types.yaml#/definitions/phandle-array
+    items:
+      - items:
+          - description: phandle to TCSR hardware block
+          - description: offset of the VLS CLAMP register
+    description: Clamp register present in the TCSR
+
+  ports:
+    $ref: /schemas/graph.yaml#/properties/ports
+    properties:
+      port@0:
+        $ref: /schemas/graph.yaml#/properties/port
+        description: Output endpoint of the PHY
+
+      port@1:
+        $ref: /schemas/graph.yaml#/properties/port
+        description: Incoming endpoint from the USB controller
+
+required:
+  - compatible
+  - reg
+  - clocks
+  - clock-names
+  - resets
+  - reset-names
+  - vdda-phy-supply
+  - vdda-pll-supply
+  - "#clock-cells"
+  - clock-output-names
+  - "#phy-cells"
+  - qcom,tcsr-reg
+
+allOf:
+  - if:
+      properties:
+        compatible:
+          contains:
+            enum:
+              - qcom,msm8998-qmp-usb3-phy
+              - qcom,sdm660-qmp-usb3-phy
+    then:
+      properties:
+        clocks:
+          maxItems: 4
+        clock-names:
+          items:
+            - const: aux
+            - const: ref
+            - const: cfg_ahb
+            - const: pipe
+
+  - if:
+      properties:
+        compatible:
+          contains:
+            enum:
+              - qcom,qcm2290-qmp-usb3-phy
+              - qcom,sm6115-qmp-usb3-phy
+    then:
+      properties:
+        clocks:
+          maxItems: 4
+        clock-names:
+          items:
+            - const: cfg_ahb
+            - const: ref
+            - const: com_aux
+            - const: pipe
+
+additionalProperties: false
+
+examples:
+  - |
+    #include <dt-bindings/clock/qcom,gcc-msm8998.h>
+    #include <dt-bindings/clock/qcom,rpmh.h>
+
+    phy@c010000 {
+      compatible = "qcom,msm8998-qmp-usb3-phy";
+      reg = <0x0c010000 0x1000>;
+
+      clocks = <&gcc GCC_USB3_PHY_AUX_CLK>,
+               <&gcc GCC_USB3_CLKREF_CLK>,
+               <&gcc GCC_USB_PHY_CFG_AHB2PHY_CLK>,
+               <&gcc GCC_USB3_PHY_PIPE_CLK>;
+      clock-names = "aux",
+                    "ref",
+                    "cfg_ahb",
+                    "pipe";
+      clock-output-names = "usb3_phy_pipe_clk_src";
+      #clock-cells = <0>;
+      #phy-cells = <0>;
+
+      resets = <&gcc GCC_USB3_PHY_BCR>,
+               <&gcc GCC_USB3PHY_PHY_BCR>;
+      reset-names = "phy",
+                    "phy_phy";
+
+      vdda-phy-supply = <&vreg_l1a_0p875>;
+      vdda-pll-supply = <&vreg_l2a_1p2>;
+
+      orientation-switch;
+
+      qcom,tcsr-reg = <&tcsr_regs_1 0x6b244>;
+
+      ports {
+        #address-cells = <1>;
+        #size-cells = <0>;
+
+        port@0 {
+          reg = <0>;
+
+          endpoint {
+            remote-endpoint = <&pmic_typec_mux_in>;
+          };
+        };
+
+        port@1 {
+          reg = <1>;
+
+          endpoint {
+            remote-endpoint = <&usb_dwc3_ss>;
+          };
+        };
+      };
+    };
index 6c03f2d5fca3cca6ad0cccc4ae3f8679e4c59026..ba966a78a1283f5249a7c015c45d605845b85e41 100644 (file)
@@ -38,6 +38,8 @@ properties:
       - qcom,sm8550-qmp-gen4x2-pcie-phy
       - qcom,sm8650-qmp-gen3x2-pcie-phy
       - qcom,sm8650-qmp-gen4x2-pcie-phy
+      - qcom,x1e80100-qmp-gen3x2-pcie-phy
+      - qcom,x1e80100-qmp-gen4x2-pcie-phy
 
   reg:
     minItems: 1
@@ -151,6 +153,8 @@ allOf:
               - qcom,sm8550-qmp-gen4x2-pcie-phy
               - qcom,sm8650-qmp-gen3x2-pcie-phy
               - qcom,sm8650-qmp-gen4x2-pcie-phy
+              - qcom,x1e80100-qmp-gen3x2-pcie-phy
+              - qcom,x1e80100-qmp-gen4x2-pcie-phy
     then:
       properties:
         clocks:
@@ -194,6 +198,8 @@ allOf:
             enum:
               - qcom,sm8550-qmp-gen4x2-pcie-phy
               - qcom,sm8650-qmp-gen4x2-pcie-phy
+              - qcom,x1e80100-qmp-gen3x2-pcie-phy
+              - qcom,x1e80100-qmp-gen4x2-pcie-phy
     then:
       properties:
         resets:
index 8474eef8d0ff5233a075bf5c17ca0abaa14fbd41..91a6cc38ff7ff5d8df2fd33997b540e4338e9ab2 100644 (file)
@@ -19,6 +19,7 @@ properties:
       - qcom,msm8996-qmp-ufs-phy
       - qcom,msm8998-qmp-ufs-phy
       - qcom,sa8775p-qmp-ufs-phy
+      - qcom,sc7180-qmp-ufs-phy
       - qcom,sc7280-qmp-ufs-phy
       - qcom,sc8180x-qmp-ufs-phy
       - qcom,sc8280xp-qmp-ufs-phy
@@ -38,15 +39,12 @@ properties:
     maxItems: 1
 
   clocks:
-    minItems: 1
+    minItems: 2
     maxItems: 3
 
   clock-names:
-    minItems: 1
-    items:
-      - const: ref
-      - const: ref_aux
-      - const: qref
+    minItems: 2
+    maxItems: 3
 
   power-domains:
     maxItems: 1
@@ -86,22 +84,9 @@ allOf:
         compatible:
           contains:
             enum:
+              - qcom,msm8998-qmp-ufs-phy
               - qcom,sa8775p-qmp-ufs-phy
               - qcom,sc7280-qmp-ufs-phy
-              - qcom,sm8450-qmp-ufs-phy
-    then:
-      properties:
-        clocks:
-          minItems: 3
-        clock-names:
-          minItems: 3
-
-  - if:
-      properties:
-        compatible:
-          contains:
-            enum:
-              - qcom,msm8998-qmp-ufs-phy
               - qcom,sc8180x-qmp-ufs-phy
               - qcom,sc8280xp-qmp-ufs-phy
               - qcom,sdm845-qmp-ufs-phy
@@ -112,14 +97,19 @@ allOf:
               - qcom,sm8150-qmp-ufs-phy
               - qcom,sm8250-qmp-ufs-phy
               - qcom,sm8350-qmp-ufs-phy
+              - qcom,sm8450-qmp-ufs-phy
               - qcom,sm8550-qmp-ufs-phy
               - qcom,sm8650-qmp-ufs-phy
     then:
       properties:
         clocks:
-          maxItems: 2
+          minItems: 3
+          maxItems: 3
         clock-names:
-          maxItems: 2
+          items:
+            - const: ref
+            - const: ref_aux
+            - const: qref
 
   - if:
       properties:
@@ -130,22 +120,28 @@ allOf:
     then:
       properties:
         clocks:
-          maxItems: 1
+          minItems: 2
+          maxItems: 2
         clock-names:
-          maxItems: 1
+          items:
+            - const: ref
+            - const: qref
 
 additionalProperties: false
 
 examples:
   - |
     #include <dt-bindings/clock/qcom,gcc-sc8280xp.h>
+    #include <dt-bindings/clock/qcom,rpmh.h>
 
     ufs_mem_phy: phy@1d87000 {
         compatible = "qcom,sc8280xp-qmp-ufs-phy";
         reg = <0x01d87000 0x1000>;
 
-        clocks = <&gcc GCC_UFS_REF_CLKREF_CLK>, <&gcc GCC_UFS_PHY_PHY_AUX_CLK>;
-        clock-names = "ref", "ref_aux";
+        clocks = <&rpmhcc RPMH_CXO_CLK>, <&gcc GCC_UFS_PHY_PHY_AUX_CLK>,
+                 <&gcc GCC_UFS_REF_CLKREF_CLK>;
+
+        clock-names = "ref", "ref_aux", "qref";
 
         power-domains = <&gcc UFS_PHY_GDSC>;
 
index 15d82c67f157b6ceadc366540fca0c200201d920..1e2d4ddc5391f10e2d1c983a4058bbe6225d4f42 100644 (file)
@@ -20,15 +20,12 @@ properties:
       - qcom,ipq8074-qmp-usb3-phy
       - qcom,ipq9574-qmp-usb3-phy
       - qcom,msm8996-qmp-usb3-phy
-      - qcom,msm8998-qmp-usb3-phy
-      - qcom,qcm2290-qmp-usb3-phy
       - qcom,sa8775p-qmp-usb3-uni-phy
       - qcom,sc8280xp-qmp-usb3-uni-phy
       - qcom,sdm845-qmp-usb3-uni-phy
       - qcom,sdx55-qmp-usb3-uni-phy
       - qcom,sdx65-qmp-usb3-uni-phy
       - qcom,sdx75-qmp-usb3-uni-phy
-      - qcom,sm6115-qmp-usb3-phy
       - qcom,sm8150-qmp-usb3-uni-phy
       - qcom,sm8250-qmp-usb3-uni-phy
       - qcom,sm8350-qmp-usb3-uni-phy
@@ -93,7 +90,6 @@ allOf:
               - qcom,ipq8074-qmp-usb3-phy
               - qcom,ipq9574-qmp-usb3-phy
               - qcom,msm8996-qmp-usb3-phy
-              - qcom,msm8998-qmp-usb3-phy
               - qcom,sdx55-qmp-usb3-uni-phy
               - qcom,sdx65-qmp-usb3-uni-phy
               - qcom,sdx75-qmp-usb3-uni-phy
@@ -108,24 +104,6 @@ allOf:
             - const: cfg_ahb
             - const: pipe
 
-  - if:
-      properties:
-        compatible:
-          contains:
-            enum:
-              - qcom,qcm2290-qmp-usb3-phy
-              - qcom,sm6115-qmp-usb3-phy
-    then:
-      properties:
-        clocks:
-          maxItems: 4
-        clock-names:
-          items:
-            - const: cfg_ahb
-            - const: ref
-            - const: com_aux
-            - const: pipe
-
   - if:
       properties:
         compatible:
diff --git a/Documentation/devicetree/bindings/phy/rockchip,rk3588-hdptx-phy.yaml b/Documentation/devicetree/bindings/phy/rockchip,rk3588-hdptx-phy.yaml
new file mode 100644 (file)
index 0000000..54e822c
--- /dev/null
@@ -0,0 +1,91 @@
+# SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause)
+%YAML 1.2
+---
+$id: http://devicetree.org/schemas/phy/rockchip,rk3588-hdptx-phy.yaml#
+$schema: http://devicetree.org/meta-schemas/core.yaml#
+
+title: Rockchip SoC HDMI/eDP Transmitter Combo PHY
+
+maintainers:
+  - Cristian Ciocaltea <cristian.ciocaltea@collabora.com>
+
+properties:
+  compatible:
+    enum:
+      - rockchip,rk3588-hdptx-phy
+
+  reg:
+    maxItems: 1
+
+  clocks:
+    items:
+      - description: Reference clock
+      - description: APB clock
+
+  clock-names:
+    items:
+      - const: ref
+      - const: apb
+
+  "#phy-cells":
+    const: 0
+
+  resets:
+    items:
+      - description: PHY reset line
+      - description: APB reset line
+      - description: INIT reset line
+      - description: CMN reset line
+      - description: LANE reset line
+      - description: ROPLL reset line
+      - description: LCPLL reset line
+
+  reset-names:
+    items:
+      - const: phy
+      - const: apb
+      - const: init
+      - const: cmn
+      - const: lane
+      - const: ropll
+      - const: lcpll
+
+  rockchip,grf:
+    $ref: /schemas/types.yaml#/definitions/phandle
+    description: Some PHY related data is accessed through GRF regs.
+
+required:
+  - compatible
+  - reg
+  - clocks
+  - clock-names
+  - "#phy-cells"
+  - resets
+  - reset-names
+  - rockchip,grf
+
+additionalProperties: false
+
+examples:
+  - |
+    #include <dt-bindings/clock/rockchip,rk3588-cru.h>
+    #include <dt-bindings/reset/rockchip,rk3588-cru.h>
+
+    soc {
+      #address-cells = <2>;
+      #size-cells = <2>;
+
+      phy@fed60000 {
+        compatible = "rockchip,rk3588-hdptx-phy";
+        reg = <0x0 0xfed60000 0x0 0x2000>;
+        clocks = <&cru CLK_USB2PHY_HDPTXRXPHY_REF>, <&cru PCLK_HDPTX0>;
+        clock-names = "ref", "apb";
+        #phy-cells = <0>;
+        resets = <&cru SRST_HDPTX0>, <&cru SRST_P_HDPTX0>,
+                 <&cru SRST_HDPTX0_INIT>, <&cru SRST_HDPTX0_CMN>,
+                 <&cru SRST_HDPTX0_LANE>, <&cru SRST_HDPTX0_ROPLL>,
+                 <&cru SRST_HDPTX0_LCPLL>;
+        reset-names = "phy", "apb", "init", "cmn", "lane", "ropll", "lcpll";
+        rockchip,grf = <&hdptxphy_grf>;
+      };
+    };
diff --git a/Documentation/devicetree/bindings/pinctrl/fsl,imx6ul-pinctrl.txt b/Documentation/devicetree/bindings/pinctrl/fsl,imx6ul-pinctrl.txt
deleted file mode 100644 (file)
index 7ca4f61..0000000
+++ /dev/null
@@ -1,37 +0,0 @@
-* Freescale i.MX6 UltraLite IOMUX Controller
-
-Please refer to fsl,imx-pinctrl.txt in this directory for common binding part
-and usage.
-
-Required properties:
-- compatible: "fsl,imx6ul-iomuxc" for main IOMUX controller or
-  "fsl,imx6ull-iomuxc-snvs" for i.MX 6ULL's SNVS IOMUX controller.
-- fsl,pins: each entry consists of 6 integers and represents the mux and config
-  setting for one pin.  The first 5 integers <mux_reg conf_reg input_reg mux_val
-  input_val> are specified using a PIN_FUNC_ID macro, which can be found in
-  imx6ul-pinfunc.h under device tree source folder.  The last integer CONFIG is
-  the pad setting value like pull-up on this pin.  Please refer to i.MX6 UltraLite
-  Reference Manual for detailed CONFIG settings.
-
-CONFIG bits definition:
-PAD_CTL_HYS                     (1 << 16)
-PAD_CTL_PUS_100K_DOWN           (0 << 14)
-PAD_CTL_PUS_47K_UP              (1 << 14)
-PAD_CTL_PUS_100K_UP             (2 << 14)
-PAD_CTL_PUS_22K_UP              (3 << 14)
-PAD_CTL_PUE                     (1 << 13)
-PAD_CTL_PKE                     (1 << 12)
-PAD_CTL_ODE                     (1 << 11)
-PAD_CTL_SPEED_LOW               (0 << 6)
-PAD_CTL_SPEED_MED               (1 << 6)
-PAD_CTL_SPEED_HIGH              (3 << 6)
-PAD_CTL_DSE_DISABLE             (0 << 3)
-PAD_CTL_DSE_260ohm              (1 << 3)
-PAD_CTL_DSE_130ohm              (2 << 3)
-PAD_CTL_DSE_87ohm               (3 << 3)
-PAD_CTL_DSE_65ohm               (4 << 3)
-PAD_CTL_DSE_52ohm               (5 << 3)
-PAD_CTL_DSE_43ohm               (6 << 3)
-PAD_CTL_DSE_37ohm               (7 << 3)
-PAD_CTL_SRE_FAST                (1 << 0)
-PAD_CTL_SRE_SLOW                (0 << 0)
diff --git a/Documentation/devicetree/bindings/pinctrl/fsl,imx6ul-pinctrl.yaml b/Documentation/devicetree/bindings/pinctrl/fsl,imx6ul-pinctrl.yaml
new file mode 100644 (file)
index 0000000..906b264
--- /dev/null
@@ -0,0 +1,116 @@
+# SPDX-License-Identifier: GPL-2.0-only OR BSD-2-Clause
+%YAML 1.2
+---
+$id: http://devicetree.org/schemas/pinctrl/fsl,imx6ul-pinctrl.yaml#
+$schema: http://devicetree.org/meta-schemas/core.yaml#
+
+title: Freescale IMX6UL IOMUX Controller
+
+maintainers:
+  - Dong Aisheng <aisheng.dong@nxp.com>
+
+description:
+  Please refer to fsl,imx-pinctrl.txt and pinctrl-bindings.txt in this directory
+  for common binding part and usage.
+
+allOf:
+  - $ref: pinctrl.yaml#
+
+properties:
+  compatible:
+    enum:
+      - fsl,imx6ul-iomuxc
+      - fsl,imx6ull-iomuxc-snvs
+
+  reg:
+    maxItems: 1
+
+# Client device subnode's properties
+patternProperties:
+  'grp$':
+    type: object
+    description:
+      Pinctrl node's client devices use subnodes for desired pin configuration.
+      Client device subnodes use below standard properties.
+
+    properties:
+      fsl,pins:
+        description:
+          each entry consists of 6 integers and represents the mux and config
+          setting for one pin. The first 5 integers <mux_reg conf_reg input_reg
+          mux_val input_val> are specified using a PIN_FUNC_ID macro, which can
+          be found in <arch/arm/boot/dts/imx6ul-pinfunc.h>. The last integer
+          CONFIG is the pad setting value like pull-up on this pin. Please
+          refer to i.MX6UL Reference Manual for detailed CONFIG settings.
+        $ref: /schemas/types.yaml#/definitions/uint32-matrix
+        items:
+          items:
+            - description: |
+                "mux_reg" indicates the offset of mux register.
+            - description: |
+                "conf_reg" indicates the offset of pad configuration register.
+            - description: |
+                "input_reg" indicates the offset of select input register.
+            - description: |
+                "mux_val" indicates the mux value to be applied.
+            - description: |
+                "input_val" indicates the select input value to be applied.
+            - description: |
+                "pad_setting" indicates the pad configuration value to be applied:
+                  PAD_CTL_HYS                     (1 << 16)
+                  PAD_CTL_PUS_100K_DOWN           (0 << 14)
+                  PAD_CTL_PUS_47K_UP              (1 << 14)
+                  PAD_CTL_PUS_100K_UP             (2 << 14)
+                  PAD_CTL_PUS_22K_UP              (3 << 14)
+                  PAD_CTL_PUE                     (1 << 13)
+                  PAD_CTL_PKE                     (1 << 12)
+                  PAD_CTL_ODE                     (1 << 11)
+                  PAD_CTL_SPEED_LOW               (0 << 6)
+                  PAD_CTL_SPEED_MED               (1 << 6)
+                  PAD_CTL_SPEED_HIGH              (3 << 6)
+                  PAD_CTL_DSE_DISABLE             (0 << 3)
+                  PAD_CTL_DSE_260ohm              (1 << 3)
+                  PAD_CTL_DSE_130ohm              (2 << 3)
+                  PAD_CTL_DSE_87ohm               (3 << 3)
+                  PAD_CTL_DSE_65ohm               (4 << 3)
+                  PAD_CTL_DSE_52ohm               (5 << 3)
+                  PAD_CTL_DSE_43ohm               (6 << 3)
+                  PAD_CTL_DSE_37ohm               (7 << 3)
+                  PAD_CTL_SRE_FAST                (1 << 0)
+                  PAD_CTL_SRE_SLOW                (0 << 0)
+
+    required:
+      - fsl,pins
+
+    additionalProperties: false
+
+required:
+  - compatible
+  - reg
+
+additionalProperties: false
+
+examples:
+  - |
+    iomuxc: pinctrl@20e0000 {
+      compatible = "fsl,imx6ul-iomuxc";
+      reg = <0x020e0000 0x4000>;
+
+      mux_uart: uartgrp {
+        fsl,pins = <
+          0x0084 0x0310 0x0000 0 0 0x1b0b1
+          0x0088 0x0314 0x0624 0 3 0x1b0b1
+        >;
+      };
+    };
+  - |
+    iomuxc_snvs: pinctrl@2290000 {
+      compatible = "fsl,imx6ull-iomuxc-snvs";
+      reg = <0x02290000 0x4000>;
+
+      pinctrl_snvs_usbc_det: snvsusbcdetgrp {
+        fsl,pins = <
+          0x0010 0x0054 0x0000 0x5 0x0 0x130b0
+        >;
+      };
+    };
index 75bc20b95688f6b7ab74579b8a859b8cdbc2a00c..a6c8978964aa163250f070aa1ead247792fffa50 100644 (file)
@@ -27,7 +27,7 @@ List of legacy properties and respective binding document
                                Documentation/devicetree/bindings/mfd/tc3589x.txt
                                Documentation/devicetree/bindings/input/touchscreen/ads7846.txt
 4. "linux,keypad-wakeup"       Documentation/devicetree/bindings/input/qcom,pm8xxx-keypad.txt
-5. "linux,input-wakeup"                Documentation/devicetree/bindings/input/samsung-keypad.txt
+5. "linux,input-wakeup"                Documentation/devicetree/bindings/input/samsung,s3c6410-keypad.yaml
 6. "nvidia,wakeup-source"      Documentation/devicetree/bindings/input/nvidia,tegra20-kbc.txt
 
 Examples
diff --git a/Documentation/devicetree/bindings/reset/mobileye,eyeq5-reset.yaml b/Documentation/devicetree/bindings/reset/mobileye,eyeq5-reset.yaml
new file mode 100644 (file)
index 0000000..062b451
--- /dev/null
@@ -0,0 +1,43 @@
+# SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause)
+%YAML 1.2
+---
+$id: http://devicetree.org/schemas/reset/mobileye,eyeq5-reset.yaml#
+$schema: http://devicetree.org/meta-schemas/core.yaml#
+
+title: Mobileye EyeQ5 reset controller
+
+description:
+  The EyeQ5 reset driver handles three reset domains. Its registers live in a
+  shared region called OLB.
+
+maintainers:
+  - Grégory Clement <gregory.clement@bootlin.com>
+  - Théo Lebrun <theo.lebrun@bootlin.com>
+  - Vladimir Kondratiev <vladimir.kondratiev@mobileye.com>
+
+properties:
+  compatible:
+    const: mobileye,eyeq5-reset
+
+  reg:
+    maxItems: 3
+
+  reg-names:
+    items:
+      - const: d0
+      - const: d1
+      - const: d2
+
+  "#reset-cells":
+    const: 2
+    description:
+      The first cell is the domain (0 to 2 inclusive) and the second one is the
+      reset index inside that domain.
+
+required:
+  - compatible
+  - reg
+  - reg-names
+  - "#reset-cells"
+
+additionalProperties: false
index 3ce45456d867e1361c899c4433b538c2a3d16315..b38f8252342eeb7e846d3699f0799c00d340cdcd 100644 (file)
@@ -21,6 +21,10 @@ properties:
           - enum:
               - microchip,sama7g5-trng
           - const: atmel,at91sam9g45-trng
+      - items:
+          - enum:
+              - microchip,sam9x7-trng
+          - const: microchip,sam9x60-trng
 
   clocks:
     maxItems: 1
index a16c355dcd11320c050804878ddcfcee9e8bd653..fcf52d2cac9ec04eb79c144a78fdebf50b7f9295 100644 (file)
@@ -12,7 +12,7 @@ allOf:
 maintainers:
   - Alessandro Zummo <a.zummo@towertech.it>
   - Alexandre Belloni <alexandre.belloni@bootlin.com>
-  - Rob Herring <robh+dt@kernel.org>
+  - Rob Herring <robh@kernel.org>
 
 properties:
   compatible:
diff --git a/Documentation/devicetree/bindings/soc/imx/fsl,imx-anatop.yaml b/Documentation/devicetree/bindings/soc/imx/fsl,imx-anatop.yaml
new file mode 100644 (file)
index 0000000..c4ae4f2
--- /dev/null
@@ -0,0 +1,128 @@
+# SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause)
+%YAML 1.2
+---
+$id: http://devicetree.org/schemas/soc/imx/fsl,imx-anatop.yaml#
+$schema: http://devicetree.org/meta-schemas/core.yaml#
+
+title: ANATOP register
+
+maintainers:
+  - Shawn Guo <shawnguo@kernel.org>
+  - Sascha Hauer <s.hauer@pengutronix.de>
+
+properties:
+  compatible:
+    oneOf:
+      - items:
+          - enum:
+              - fsl,imx6sl-anatop
+              - fsl,imx6sll-anatop
+              - fsl,imx6sx-anatop
+              - fsl,imx6ul-anatop
+              - fsl,imx7d-anatop
+          - const: fsl,imx6q-anatop
+          - const: syscon
+          - const: simple-mfd
+      - items:
+          - const: fsl,imx6q-anatop
+          - const: syscon
+          - const: simple-mfd
+
+  reg:
+    maxItems: 1
+
+  interrupts:
+    items:
+      - description: Temperature sensor event
+      - description: Brown-out event on either of the support regulators
+      - description: Brown-out event on either the core, gpu or soc regulators
+
+  tempmon:
+    type: object
+    unevaluatedProperties: false
+    $ref: /schemas/thermal/imx-thermal.yaml
+
+patternProperties:
+  "regulator-((1p1)|(2p5)|(3p0)|(vddcore)|(vddpu)|(vddsoc))$":
+    type: object
+    unevaluatedProperties: false
+    $ref: /schemas/regulator/anatop-regulator.yaml
+
+required:
+  - compatible
+  - reg
+
+additionalProperties: false
+
+examples:
+  - |
+    #include <dt-bindings/clock/imx6ul-clock.h>
+    #include <dt-bindings/interrupt-controller/arm-gic.h>
+
+    anatop: anatop@20c8000 {
+        compatible = "fsl,imx6ul-anatop", "fsl,imx6q-anatop",
+                     "syscon", "simple-mfd";
+        reg = <0x020c8000 0x1000>;
+        interrupts = <GIC_SPI 49 IRQ_TYPE_LEVEL_HIGH>,
+                     <GIC_SPI 54 IRQ_TYPE_LEVEL_HIGH>,
+                     <GIC_SPI 127 IRQ_TYPE_LEVEL_HIGH>;
+
+        reg_3p0: regulator-3p0 {
+            compatible = "fsl,anatop-regulator";
+            regulator-name = "vdd3p0";
+            regulator-min-microvolt = <2625000>;
+            regulator-max-microvolt = <3400000>;
+            anatop-reg-offset = <0x120>;
+            anatop-vol-bit-shift = <8>;
+            anatop-vol-bit-width = <5>;
+            anatop-min-bit-val = <0>;
+            anatop-min-voltage = <2625000>;
+            anatop-max-voltage = <3400000>;
+            anatop-enable-bit = <0>;
+        };
+
+        reg_arm: regulator-vddcore {
+            compatible = "fsl,anatop-regulator";
+            regulator-name = "cpu";
+            regulator-min-microvolt = <725000>;
+            regulator-max-microvolt = <1450000>;
+            regulator-always-on;
+            anatop-reg-offset = <0x140>;
+            anatop-vol-bit-shift = <0>;
+            anatop-vol-bit-width = <5>;
+            anatop-delay-reg-offset = <0x170>;
+            anatop-delay-bit-shift = <24>;
+            anatop-delay-bit-width = <2>;
+            anatop-min-bit-val = <1>;
+            anatop-min-voltage = <725000>;
+            anatop-max-voltage = <1450000>;
+        };
+
+        reg_soc: regulator-vddsoc {
+            compatible = "fsl,anatop-regulator";
+            regulator-name = "vddsoc";
+            regulator-min-microvolt = <725000>;
+            regulator-max-microvolt = <1450000>;
+            regulator-always-on;
+            anatop-reg-offset = <0x140>;
+            anatop-vol-bit-shift = <18>;
+            anatop-vol-bit-width = <5>;
+            anatop-delay-reg-offset = <0x170>;
+            anatop-delay-bit-shift = <28>;
+            anatop-delay-bit-width = <2>;
+            anatop-min-bit-val = <1>;
+            anatop-min-voltage = <725000>;
+            anatop-max-voltage = <1450000>;
+        };
+
+        tempmon: tempmon {
+            compatible = "fsl,imx6ul-tempmon", "fsl,imx6sx-tempmon";
+            interrupt-parent = <&gpc>;
+            interrupts = <GIC_SPI 49 IRQ_TYPE_LEVEL_HIGH>;
+            fsl,tempmon = <&anatop>;
+            nvmem-cells = <&tempmon_calib>, <&tempmon_temp_grade>;
+            nvmem-cell-names = "calib", "temp_grade";
+            clocks = <&clks IMX6UL_CLK_PLL3_USB_OTG>;
+            #thermal-sensor-cells = <0>;
+        };
+    };
index 1da1b758b4ae50adc22e25cdfc51d2b111493c57..8451cb4dd87c6a91b7c3e3e663f343a00d58d394 100644 (file)
@@ -17,7 +17,23 @@ properties:
   compatible:
     oneOf:
       - items:
-          - const: fsl,imx8mq-iomuxc-gpr
+          - enum:
+              - fsl,imx6q-iomuxc-gpr
+              - fsl,imx8mq-iomuxc-gpr
+          - const: syscon
+          - const: simple-mfd
+      - items:
+          - enum:
+              - fsl,imx6sl-iomuxc-gpr
+              - fsl,imx6sll-iomuxc-gpr
+              - fsl,imx6ul-iomuxc-gpr
+          - const: fsl,imx6q-iomuxc-gpr
+          - const: syscon
+      - items:
+          - enum:
+              - fsl,imx6sx-iomuxc-gpr
+              - fsl,imx7d-iomuxc-gpr
+          - const: fsl,imx6q-iomuxc-gpr
           - const: syscon
           - const: simple-mfd
       - items:
index 12b4aa8ef0dbe18842f9166342fa7fc3bdeb2773..c1d5c8ad1a36a5c81c458f2e10033f0c95810318 100644 (file)
@@ -9,7 +9,7 @@ Required properties:
           number for SPI.
 
 For required properties on I2C-bus, please consult
-Documentation/devicetree/bindings/i2c/i2c.txt
+dtschema schemas/i2c/i2c-controller.yaml
 For required properties on SPI-bus, please consult
 Documentation/devicetree/bindings/spi/spi-bus.txt
 
index 36a17b250ccc75a2eb877f416c16744c6a01815d..a64f21a5f299a6493c334662b25fffacc2a63fb0 100644 (file)
@@ -15,6 +15,11 @@ I. For patch submitters
 
        "dt-bindings: <binding dir>: ..."
 
+     Few subsystems, like ASoC, media, regulators and SPI, expect reverse order
+     of the prefixes::
+
+       "<binding dir>: dt-bindings: ..."
+
      The 80 characters of the subject are precious. It is recommended to not
      use "Documentation" or "doc" because that is implied. All bindings are
      docs. Repeating "binding" again should also be avoided.
@@ -42,28 +47,18 @@ I. For patch submitters
      the code implementing the binding.
 
   6) Any compatible strings used in a chip or board DTS file must be
-     previously documented in the corresponding DT binding text file
+     previously documented in the corresponding DT binding file
      in Documentation/devicetree/bindings.  This rule applies even if
      the Linux device driver does not yet match on the compatible
      string.  [ checkpatch will emit warnings if this step is not
      followed as of commit bff5da4335256513497cc8c79f9a9d1665e09864
      ("checkpatch: add DT compatible string documentation checks"). ]
 
-  7) The wildcard "<chip>" may be used in compatible strings, as in
-     the following example:
-
-         - compatible: Must contain '"nvidia,<chip>-pcie",
-           "nvidia,tegra20-pcie"' where <chip> is tegra30, tegra132, ...
-
-     As in the above example, the known values of "<chip>" should be
-     documented if it is used.
-
-  8) If a documented compatible string is not yet matched by the
+  7) If a documented compatible string is not yet matched by the
      driver, the documentation should also include a compatible
-     string that is matched by the driver (as in the "nvidia,tegra20-pcie"
-     example above).
+     string that is matched by the driver.
 
-  9) Bindings are actively used by multiple projects other than the Linux
+  8) Bindings are actively used by multiple projects other than the Linux
      Kernel, extra care and consideration may need to be taken when making changes
      to existing bindings.
 
diff --git a/Documentation/devicetree/bindings/timer/mediatek,mtk-timer.txt b/Documentation/devicetree/bindings/timer/mediatek,mtk-timer.txt
deleted file mode 100644 (file)
index b3e797e..0000000
+++ /dev/null
@@ -1,48 +0,0 @@
-MediaTek Timers
----------------
-
-MediaTek SoCs have different timers on different platforms,
-- CPUX (ARM/ARM64 System Timer)
-- GPT (General Purpose Timer)
-- SYST (System Timer)
-
-The proper timer will be selected automatically by driver.
-
-Required properties:
-- compatible should contain:
-       For those SoCs that use GPT
-       * "mediatek,mt2701-timer" for MT2701 compatible timers (GPT)
-       * "mediatek,mt6580-timer" for MT6580 compatible timers (GPT)
-       * "mediatek,mt6582-timer" for MT6582 compatible timers (GPT)
-       * "mediatek,mt6589-timer" for MT6589 compatible timers (GPT)
-       * "mediatek,mt7623-timer" for MT7623 compatible timers (GPT)
-       * "mediatek,mt8127-timer" for MT8127 compatible timers (GPT)
-       * "mediatek,mt8135-timer" for MT8135 compatible timers (GPT)
-       * "mediatek,mt8173-timer" for MT8173 compatible timers (GPT)
-       * "mediatek,mt8516-timer" for MT8516 compatible timers (GPT)
-       * "mediatek,mt6577-timer" for MT6577 and all above compatible timers (GPT)
-
-       For those SoCs that use SYST
-       * "mediatek,mt8183-timer" for MT8183 compatible timers (SYST)
-       * "mediatek,mt8186-timer" for MT8186 compatible timers (SYST)
-       * "mediatek,mt8188-timer" for MT8188 compatible timers (SYST)
-       * "mediatek,mt8192-timer" for MT8192 compatible timers (SYST)
-       * "mediatek,mt8195-timer" for MT8195 compatible timers (SYST)
-       * "mediatek,mt7629-timer" for MT7629 compatible timers (SYST)
-       * "mediatek,mt6765-timer" for MT6765 and all above compatible timers (SYST)
-
-       For those SoCs that use CPUX
-       * "mediatek,mt6795-systimer" for MT6795 compatible timers (CPUX)
-       * "mediatek,mt8365-systimer" for MT8365 compatible timers (CPUX)
-
-- reg: Should contain location and length for timer register.
-- clocks: Should contain system clock.
-
-Examples:
-
-       timer@10008000 {
-               compatible = "mediatek,mt6577-timer";
-               reg = <0x10008000 0x80>;
-               interrupts = <GIC_SPI 113 IRQ_TYPE_LEVEL_LOW>;
-               clocks = <&system_clk>;
-       };
diff --git a/Documentation/devicetree/bindings/timer/mediatek,timer.yaml b/Documentation/devicetree/bindings/timer/mediatek,timer.yaml
new file mode 100644 (file)
index 0000000..f68fc70
--- /dev/null
@@ -0,0 +1,84 @@
+# SPDX-License-Identifier: (GPL-2.0 OR BSD-2-Clause)
+%YAML 1.2
+---
+$id: http://devicetree.org/schemas/timer/mediatek,timer.yaml#
+$schema: http://devicetree.org/meta-schemas/core.yaml#
+
+title: MediaTek SoC timers
+
+maintainers:
+  - Matthias Brugger <matthias.bgg@gmail.com>
+
+description:
+  MediaTek SoCs have different timers on different platforms,
+  CPUX (ARM/ARM64 System Timer), GPT (General Purpose Timer)
+  and SYST (System Timer).
+
+properties:
+  compatible:
+    oneOf:
+      - items:
+          - enum:
+              - mediatek,mt6577-timer
+              - mediatek,mt6765-timer
+              - mediatek,mt6795-systimer
+      # GPT Timers
+      - items:
+          - enum:
+              - mediatek,mt2701-timer
+              - mediatek,mt6580-timer
+              - mediatek,mt6582-timer
+              - mediatek,mt6589-timer
+              - mediatek,mt7623-timer
+              - mediatek,mt8127-timer
+              - mediatek,mt8135-timer
+              - mediatek,mt8173-timer
+              - mediatek,mt8516-timer
+          - const: mediatek,mt6577-timer
+      # SYST Timers
+      - items:
+          - enum:
+              - mediatek,mt7629-timer
+              - mediatek,mt8183-timer
+              - mediatek,mt8186-timer
+              - mediatek,mt8188-timer
+              - mediatek,mt8192-timer
+              - mediatek,mt8195-timer
+              - mediatek,mt8365-systimer
+          - const: mediatek,mt6765-timer
+
+  reg:
+    maxItems: 1
+
+  interrupts:
+    maxItems: 1
+
+  clocks:
+    minItems: 1
+    items:
+      - description: Timer clock
+      - description: RTC or bus clock
+
+  clock-names:
+    minItems: 1
+    maxItems: 2
+
+required:
+  - compatible
+  - reg
+  - interrupts
+  - clocks
+
+additionalProperties: false
+
+examples:
+  - |
+    #include <dt-bindings/interrupt-controller/irq.h>
+    #include <dt-bindings/interrupt-controller/arm-gic.h>
+
+    timer@10008000 {
+      compatible = "mediatek,mt6577-timer";
+      reg = <0x10008000 0x80>;
+      interrupts = <GIC_SPI 113 IRQ_TYPE_LEVEL_LOW>;
+      clocks = <&system_clk>;
+    };
index 1ee4aab695d38440c85af71c994f44c0b126a97d..fe6bc4173789da478ef2e66993389f25e4c5fd34 100644 (file)
@@ -9,7 +9,7 @@ title: Marvell MMP Timer
 maintainers:
   - Daniel Lezcano <daniel.lezcano@linaro.org>
   - Thomas Gleixner <tglx@linutronix.de>
-  - Rob Herring <robh+dt@kernel.org>
+  - Rob Herring <robh@kernel.org>
 
 properties:
   $nodename:
index 2210964faaf69b4a1844dfcb971a536073d022b5..e07be7bf8395f7dd29f59e1028ce87311b90b19c 100644 (file)
@@ -28,6 +28,7 @@ properties:
 
   compatible:
     items:
+      # Entries are sorted alphanumerically by the compatible
       - enum:
             # Acbel fsg032 power supply
           - acbel,fsg032
@@ -51,12 +52,12 @@ properties:
           - asteralabs,pt5161l
             # i2c serial eeprom (24cxx)
           - at,24c08
+            # i2c h/w elliptic curve crypto module
+          - atmel,atecc508a
             # ATSHA204 - i2c h/w symmetric crypto module
           - atmel,atsha204
             # ATSHA204A - i2c h/w symmetric crypto module
           - atmel,atsha204a
-            # i2c h/w elliptic curve crypto module
-          - atmel,atecc508a
             # BPA-RS600: Power Supply
           - blutek,bpa-rs600
             # Bosch Sensortec pressure, temperature, humididty and VOC sensor
@@ -117,22 +118,6 @@ properties:
           - fsl,mpl3115
             # MPR121: Proximity Capacitive Touch Sensor Controller
           - fsl,mpr121
-            # Monolithic Power Systems Inc. multi-phase controller mp2856
-          - mps,mp2856
-            # Monolithic Power Systems Inc. multi-phase controller mp2857
-          - mps,mp2857
-            # Monolithic Power Systems Inc. multi-phase controller mp2888
-          - mps,mp2888
-            # Monolithic Power Systems Inc. multi-phase controller mp2971
-          - mps,mp2971
-            # Monolithic Power Systems Inc. multi-phase controller mp2973
-          - mps,mp2973
-            # Monolithic Power Systems Inc. multi-phase controller mp2975
-          - mps,mp2975
-            # Monolithic Power Systems Inc. multi-phase hot-swap controller mp5990
-          - mps,mp5990
-            # Monolithic Power Systems Inc. synchronous step-down converter mpq8785
-          - mps,mpq8785
             # Honeywell Humidicon HIH-6130 humidity/temperature sensor
           - honeywell,hi6130
             # IBM Common Form Factor Power Supply Versions (all versions)
@@ -141,6 +126,8 @@ properties:
           - ibm,cffps1
             # IBM Common Form Factor Power Supply Versions 2
           - ibm,cffps2
+            # Infineon barometric pressure and temperature sensor
+          - infineon,dps310
             # Infineon IR36021 digital POL buck controller
           - infineon,ir36021
             # Infineon IRPS5401 Voltage Regulator (PMIC)
@@ -191,6 +178,8 @@ properties:
           - maxim,max1237
             # Temperature Sensor, I2C interface
           - maxim,max1619
+            # 3-Channel Remote Temperature Sensor
+          - maxim,max31730
             # 10-bit 10 kOhm linear programmable voltage divider
           - maxim,max5481
             # 10-bit 50 kOhm linear programmable voltage divider
@@ -203,8 +192,6 @@ properties:
           - maxim,max6621
             # 9-Bit/12-Bit Temperature Sensors with I²C-Compatible Serial Interface
           - maxim,max6625
-            # 3-Channel Remote Temperature Sensor
-          - maxim,max31730
             # mCube 3-axis 8-bit digital accelerometer
           - mcube,mc3230
             # Measurement Specialities I2C temperature and humidity sensor
@@ -235,8 +222,6 @@ properties:
           - memsic,mxc6655
             # Menlo on-board CPLD trivial SPI device
           - menlo,m53cpld
-            # Micron SPI NOR Authenta
-          - micron,spi-authenta
             # Microchip differential I2C ADC, 1 Channel, 18 bit
           - microchip,mcp3421
             # Microchip differential I2C ADC, 2 Channel, 18 bit
@@ -253,40 +238,58 @@ properties:
           - microchip,mcp3427
             # Microchip differential I2C ADC, 4 Channel, 16 bit
           - microchip,mcp3428
-            # Microchip 7-bit Single I2C Digital POT (5k)
-          - microchip,mcp4017-502
             # Microchip 7-bit Single I2C Digital POT (10k)
           - microchip,mcp4017-103
-            # Microchip 7-bit Single I2C Digital POT (50k)
-          - microchip,mcp4017-503
             # Microchip 7-bit Single I2C Digital POT (100k)
           - microchip,mcp4017-104
             # Microchip 7-bit Single I2C Digital POT (5k)
-          - microchip,mcp4018-502
+          - microchip,mcp4017-502
+            # Microchip 7-bit Single I2C Digital POT (50k)
+          - microchip,mcp4017-503
             # Microchip 7-bit Single I2C Digital POT (10k)
           - microchip,mcp4018-103
-            # Microchip 7-bit Single I2C Digital POT (50k)
-          - microchip,mcp4018-503
             # Microchip 7-bit Single I2C Digital POT (100k)
           - microchip,mcp4018-104
             # Microchip 7-bit Single I2C Digital POT (5k)
-          - microchip,mcp4019-502
+          - microchip,mcp4018-502
+            # Microchip 7-bit Single I2C Digital POT (50k)
+          - microchip,mcp4018-503
             # Microchip 7-bit Single I2C Digital POT (10k)
           - microchip,mcp4019-103
-            # Microchip 7-bit Single I2C Digital POT (50k)
-          - microchip,mcp4019-503
             # Microchip 7-bit Single I2C Digital POT (100k)
           - microchip,mcp4019-104
+            # Microchip 7-bit Single I2C Digital POT (5k)
+          - microchip,mcp4019-502
+            # Microchip 7-bit Single I2C Digital POT (50k)
+          - microchip,mcp4019-503
             # PWM Fan Speed Controller With Fan Fault Detection
           - microchip,tc654
             # PWM Fan Speed Controller With Fan Fault Detection
           - microchip,tc655
+            # Micron SPI NOR Authenta
+          - micron,spi-authenta
             # MiraMEMS DA226 2-axis 14-bit digital accelerometer
           - miramems,da226
             # MiraMEMS DA280 3-axis 14-bit digital accelerometer
           - miramems,da280
             # MiraMEMS DA311 3-axis 12-bit digital accelerometer
           - miramems,da311
+            # Monolithic Power Systems Inc. multi-phase controller mp2856
+          - mps,mp2856
+            # Monolithic Power Systems Inc. multi-phase controller mp2857
+          - mps,mp2857
+            # Monolithic Power Systems Inc. multi-phase controller mp2888
+          - mps,mp2888
+            # Monolithic Power Systems Inc. multi-phase controller mp2971
+          - mps,mp2971
+            # Monolithic Power Systems Inc. multi-phase controller mp2973
+          - mps,mp2973
+            # Monolithic Power Systems Inc. multi-phase controller mp2975
+          - mps,mp2975
+            # Monolithic Power Systems Inc. multi-phase hot-swap controller mp5990
+          - mps,mp5990
+            # Monolithic Power Systems Inc. synchronous step-down converter mpq8785
+          - mps,mpq8785
             # Temperature sensor with integrated fan control
           - national,lm63
             # Serial Interface ACPI-Compatible Microprocessor System Hardware Monitor
@@ -317,12 +320,12 @@ properties:
           - samsung,exynos-sataphy-i2c
             # Semtech sx1301 baseband processor
           - semtech,sx1301
-            # Sensirion low power multi-pixel gas sensor with I2C interface
-          - sensirion,sgpc3
             # Sensirion multi-pixel gas sensor with I2C interface
           - sensirion,sgp30
             # Sensirion gas sensor with I2C interface
           - sensirion,sgp40
+            # Sensirion low power multi-pixel gas sensor with I2C interface
+          - sensirion,sgpc3
             # Sensirion temperature & humidity sensor with I2C interface
           - sensirion,sht4x
             # Sensortek 3 axis accelerometer
@@ -368,8 +371,6 @@ properties:
           - ti,lm74
             # Temperature sensor with integrated fan control
           - ti,lm96000
-            # I2C Touch-Screen Controller
-          - ti,tsc2003
             # Low Power Digital Temperature Sensor with SMBUS/Two Wire Serial Interface
           - ti,tmp103
             # Thermometer with SPI interface
@@ -391,10 +392,12 @@ properties:
           - ti,tps544b25
           - ti,tps544c20
           - ti,tps544c25
-            # Winbond/Nuvoton H/W Monitor
-          - winbond,w83793
+            # I2C Touch-Screen Controller
+          - ti,tsc2003
             # Vicor Corporation Digital Supervisor
           - vicor,pli1209bc
+            # Winbond/Nuvoton H/W Monitor
+          - winbond,w83793
 
 required:
   - compatible
index 47add0d85fb891793c4d1c67f93fe16983309db7..28096619a882712d70e7522f3ea4b99438db27d2 100644 (file)
@@ -1,4 +1,4 @@
-# SPDX-License-Identifier: GPL-2.0-only or BSD-2-Clause
+# SPDX-License-Identifier: GPL-2.0-only OR BSD-2-Clause
 %YAML 1.2
 ---
 $id: http://devicetree.org/schemas/usb/cypress,hx3.yaml#
index ace572665cf04167343371b25bd3ed376474c91c..04505cb0b640f1b996606ea85c3d84206d649db8 100644 (file)
@@ -238,6 +238,8 @@ patternProperties:
     description: ByteDance Ltd.
   "^calamp,.*":
     description: CalAmp Corp.
+  "^calao,.*":
+    description: CALAO Systems SAS
   "^calaosystems,.*":
     description: CALAO Systems SAS
   "^calxeda,.*":
@@ -486,6 +488,9 @@ patternProperties:
     description: EZchip Semiconductor
   "^facebook,.*":
     description: Facebook
+  "^fairchild,.*":
+    description: Fairchild Semiconductor (deprecated, use 'onnn')
+    deprecated: true
   "^fairphone,.*":
     description: Fairphone B.V.
   "^faraday,.*":
@@ -552,6 +557,8 @@ patternProperties:
     description: Giantec Semiconductor, Inc.
   "^giantplus,.*":
     description: Giantplus Technology Co., Ltd.
+  "^glinet,.*":
+    description: GL Intelligence, Inc.
   "^globalscale,.*":
     description: Globalscale Technologies, Inc.
   "^globaltop,.*":
@@ -611,6 +618,8 @@ patternProperties:
     description: Honestar Technologies Co., Ltd.
   "^honeywell,.*":
     description: Honeywell
+  "^hoperf,.*":
+    description: Shenzhen Hope Microelectronics Co., Ltd.
   "^hoperun,.*":
     description: Jiangsu HopeRun Software Co., Ltd.
   "^hp,.*":
@@ -641,12 +650,16 @@ patternProperties:
     description: Hyundai Technology
   "^i2se,.*":
     description: I2SE GmbH
+  "^IBM,.*":
+    description: International Business Machines (IBM)
   "^ibm,.*":
     description: International Business Machines (IBM)
   "^icplus,.*":
     description: IC Plus Corp.
   "^idt,.*":
     description: Integrated Device Technologies, Inc.
+  "^iei,.*":
+    description: IEI Integration Corp.
   "^ifi,.*":
     description: Ingenieurburo Fur Ic-Technologie (I/F/I)
   "^ilitek,.*":
@@ -833,6 +846,8 @@ patternProperties:
     description: LSI Corp. (LSI Logic)
   "^lunzn,.*":
     description: Shenzhen Lunzn Technology Co., Ltd.
+  "^luxul,.*":
+    description: Lagrand | AV
   "^lwn,.*":
     description: Liebherr-Werk Nenzing GmbH
   "^lxa,.*":
@@ -911,6 +926,9 @@ patternProperties:
     description: Miniand Tech
   "^minix,.*":
     description: MINIX Technology Ltd.
+  "^mips,.*":
+    description: MIPS Technology (deprecated, use 'mti' or 'img')
+    deprecated: true
   "^miramems,.*":
     description: MiraMEMS Sensing Technology Co., Ltd.
   "^mitsubishi,.*":
@@ -923,6 +941,8 @@ patternProperties:
     description: Miyoo
   "^mntre,.*":
     description: MNT Research GmbH
+  "^mobileye,.*":
+    description: Mobileye Vision Technologies Ltd.
   "^modtronix,.*":
     description: Modtronix Engineering
   "^moortec,.*":
@@ -1005,6 +1025,9 @@ patternProperties:
     description: Novatek
   "^novtech,.*":
     description: NovTech, Inc.
+  "^numonyx,.*":
+    description: Numonyx (deprecated, use micron)
+    deprecated: true
   "^nutsboard,.*":
     description: NutsBoard
   "^nuvoton,.*":
@@ -1309,6 +1332,8 @@ patternProperties:
     description: Skyworks Solutions, Inc.
   "^smartlabs,.*":
     description: SmartLabs LLC
+  "^smartrg,.*":
+    description: SmartRG, Inc.
   "^smi,.*":
     description: Silicon Motion Technology Corporation
   "^smsc,.*":
@@ -1550,8 +1575,12 @@ patternProperties:
     description: Voipac Technologies s.r.o.
   "^vot,.*":
     description: Vision Optical Technology Co., Ltd.
+  "^vscom,.*":
+    description: VS Visions Systems GmbH
   "^vxt,.*":
     description: VXT Ltd
+  "^wacom,.*":
+    description: Wacom
   "^wanchanglong,.*":
     description: Wanchanglong Electronics Technology(SHENZHEN)Co.,Ltd.
   "^wand,.*":
index 7aea255b301be555d4721bfa83c9d99b7ae0e8e3..bd7c09ed1938550b031a64001cf5d498cb34b368 100644 (file)
@@ -50,6 +50,10 @@ properties:
       - const: wdog_clk
       - const: apb_pclk
 
+  resets:
+    maxItems: 1
+    description: WDOGRESn input reset signal for sp805 module.
+
 required:
   - compatible
   - reg
@@ -67,4 +71,5 @@ examples:
         interrupts = <GIC_SPI 406 IRQ_TYPE_LEVEL_HIGH>;
         clocks = <&wdt_clk>, <&apb_pclk>;
         clock-names = "wdog_clk", "apb_pclk";
+        resets = <&wdt_rst>;
     };
index 816f85ee2c772220fb1f0ed36ea7c2f771a3f4df..cdf87db361837e1a9764e7a00b75523dec3a9964 100644 (file)
@@ -14,10 +14,14 @@ allOf:
 
 properties:
   compatible:
-    enum:
-      - atmel,sama5d4-wdt
-      - microchip,sam9x60-wdt
-      - microchip,sama7g5-wdt
+    oneOf:
+      - enum:
+          - atmel,sama5d4-wdt
+          - microchip,sam9x60-wdt
+          - microchip,sama7g5-wdt
+      - items:
+          - const: microchip,sam9x7-wdt
+          - const: microchip,sam9x60-wdt
 
   reg:
     maxItems: 1
diff --git a/Documentation/devicetree/bindings/watchdog/brcm,bcm2835-pm-wdog.txt b/Documentation/devicetree/bindings/watchdog/brcm,bcm2835-pm-wdog.txt
deleted file mode 100644 (file)
index f801d71..0000000
+++ /dev/null
@@ -1,18 +0,0 @@
-BCM2835 Watchdog timer
-
-Required properties:
-
-- compatible : should be "brcm,bcm2835-pm-wdt"
-- reg : Specifies base physical address and size of the registers.
-
-Optional properties:
-
-- timeout-sec   : Contains the watchdog timeout in seconds
-
-Example:
-
-watchdog {
-       compatible = "brcm,bcm2835-pm-wdt";
-       reg = <0x7e100000 0x28>;
-       timeout-sec = <10>;
-};
index a4f35c598cdb54aa4142bec2085e5963056fcbf6..47587971fb0b7c3aa0a3d91ce03f93a10f6389ee 100644 (file)
@@ -7,7 +7,7 @@ $schema: http://devicetree.org/meta-schemas/core.yaml#
 title: Qualcomm Krait Processor Sub-system (KPSS) Watchdog timer
 
 maintainers:
-  - Sai Prakash Ranjan <saiprakash.ranjan@codeaurora.org>
+  - Rajendra Nayak <quic_rjendra@quicinc.com>
 
 properties:
   $nodename:
index 951a7d54135a684939ba940e112c1736bbfbbcf3..ffb17add491af98b703ac67a2c45830bec9e962b 100644 (file)
@@ -71,6 +71,7 @@ properties:
               - renesas,r8a779a0-wdt     # R-Car V3U
               - renesas,r8a779f0-wdt     # R-Car S4-8
               - renesas,r8a779g0-wdt     # R-Car V4H
+              - renesas,r8a779h0-wdt     # R-Car V4M
           - const: renesas,rcar-gen4-wdt # R-Car Gen4
 
   reg:
diff --git a/Documentation/devicetree/bindings/watchdog/sprd,sp9860-wdt.yaml b/Documentation/devicetree/bindings/watchdog/sprd,sp9860-wdt.yaml
new file mode 100644 (file)
index 0000000..730d9a3
--- /dev/null
@@ -0,0 +1,64 @@
+# SPDX-License-Identifier: GPL-2.0-only OR BSD-2-Clause
+%YAML 1.2
+---
+$id: http://devicetree.org/schemas/watchdog/sprd,sp9860-wdt.yaml#
+$schema: http://devicetree.org/meta-schemas/core.yaml#
+
+title: Spreadtrum SP9860 watchdog timer
+
+maintainers:
+  - Orson Zhai <orsonzhai@gmail.com>
+  - Baolin Wang <baolin.wang7@gmail.com>
+  - Chunyan Zhang <zhang.lyra@gmail.com>
+
+allOf:
+  - $ref: watchdog.yaml#
+
+properties:
+  compatible:
+    const: sprd,sp9860-wdt
+
+  reg:
+    maxItems: 1
+
+  interrupts:
+    maxItems: 1
+
+  clocks:
+    maxItems: 2
+
+  clock-names:
+    items:
+      - const: enable
+      - const: rtc_enable
+
+required:
+  - compatible
+  - reg
+  - interrupts
+  - clocks
+  - clock-names
+  - timeout-sec
+
+unevaluatedProperties: false
+
+examples:
+  - |
+    #include <dt-bindings/clock/sprd,sc9860-clk.h>
+    #include <dt-bindings/interrupt-controller/arm-gic.h>
+    #include <dt-bindings/interrupt-controller/irq.h>
+
+    soc {
+        #address-cells = <2>;
+        #size-cells = <2>;
+
+        watchdog@40310000 {
+            compatible = "sprd,sp9860-wdt";
+            reg = <0 0x40310000 0 0x1000>;
+            interrupts = <GIC_SPI 61 IRQ_TYPE_LEVEL_HIGH>;
+            clocks = <&aon_gate CLK_APCPU_WDG_EB>, <&aon_gate CLK_AP_WDG_RTC_EB>;
+            clock-names = "enable", "rtc_enable";
+            timeout-sec = <12>;
+        };
+    };
+...
diff --git a/Documentation/devicetree/bindings/watchdog/sprd-wdt.txt b/Documentation/devicetree/bindings/watchdog/sprd-wdt.txt
deleted file mode 100644 (file)
index aeaf3e0..0000000
+++ /dev/null
@@ -1,19 +0,0 @@
-Spreadtrum SoCs Watchdog timer
-
-Required properties:
-- compatible : Should be "sprd,sp9860-wdt".
-- reg : Specifies base physical address and size of the registers.
-- interrupts : Exactly one interrupt specifier.
-- timeout-sec : Contain the default watchdog timeout in seconds.
-- clock-names : Contain the input clock names.
-- clocks : Phandles to input clocks.
-
-Example:
-       watchdog: watchdog@40310000 {
-               compatible = "sprd,sp9860-wdt";
-               reg = <0 0x40310000 0 0x1000>;
-               interrupts = <GIC_SPI 61 IRQ_TYPE_LEVEL_HIGH>;
-               timeout-sec = <12>;
-               clock-names = "enable", "rtc_enable";
-               clocks = <&clk_aon_apb_gates1 8>, <&clk_aon_apb_rtc_gates 9>;
-       };
index 68f3f6fd08a628781752dc64b54e7f856d34157a..e21f807b0b69ff317ac978dd173b3f0cce4ada88 100644 (file)
@@ -19,14 +19,16 @@ description:
   isn't cleared, the watchdog will reset the system unless the watchdog
   reset is disabled.
 
-allOf:
-  - $ref: watchdog.yaml#
-
 properties:
   compatible:
-    enum:
-      - starfive,jh7100-wdt
-      - starfive,jh7110-wdt
+    oneOf:
+      - enum:
+          - starfive,jh7100-wdt
+          - starfive,jh7110-wdt
+      - items:
+          - enum:
+              - starfive,jh8100-wdt
+          - const: starfive,jh7110-wdt
 
   reg:
     maxItems: 1
@@ -45,9 +47,8 @@ properties:
       - const: core
 
   resets:
-    items:
-      - description: APB reset
-      - description: Core reset
+    minItems: 1
+    maxItems: 2
 
 required:
   - compatible
@@ -56,6 +57,27 @@ required:
   - clock-names
   - resets
 
+allOf:
+  - $ref: watchdog.yaml#
+
+  - if:
+      properties:
+        compatible:
+          contains:
+            enum:
+              - starfive,jh8100-wdt
+    then:
+      properties:
+        resets:
+          items:
+            - description: Core reset
+    else:
+      properties:
+        resets:
+          items:
+            - description: APB reset
+            - description: Core reset
+
 unevaluatedProperties: false
 
 examples:
index 0a6cf19a14599b818164426a28881f2a6f4be4f6..7e71cdd1d6ded6cb3b6b4ae76d6249e2504a1cd5 100644 (file)
@@ -31,7 +31,7 @@ $schema
   Indicates the meta-schema the schema file adheres to.
 
 title
-  A one-line description on the contents of the binding schema.
+  A one-line description of the hardware being described in the binding schema.
 
 maintainers
   A DT specific property. Contains a list of email address(es)
@@ -39,7 +39,7 @@ maintainers
 
 description
   Optional. A multi-line text block containing any detailed
-  information about this binding. It should contain things such as what the block
+  information about this hardware. It should contain things such as what the block
   or device does, standards the device conforms to, and links to datasheets for
   more information.
 
@@ -71,9 +71,31 @@ required
   A list of DT properties from the 'properties' section that
   must always be present.
 
+additionalProperties / unevaluatedProperties
+  Keywords controlling how schema will validate properties not matched by this
+  schema's 'properties' or 'patternProperties'. Each schema is supposed to
+  have exactly one of these keywords in top-level part, so either
+  additionalProperties or unevaluatedProperties. Nested nodes, so properties
+  being objects, are supposed to have one as well.
+
+  * additionalProperties: false
+      Most common case, where no additional schema is referenced or if this
+      binding allows subset of properties from other referenced schemas.
+
+  * unevaluatedProperties: false
+      Used when this binding references other schema whose all properties
+      should be allowed.
+
+  * additionalProperties: true
+      Rare case, used for schemas implementing common set of properties. Such
+      schemas are supposed to be referenced by other schemas, which then use
+      'unevaluatedProperties: false'.  Typically bus or common-part schemas.
+
 examples
-  Optional. A list of one or more DTS hunks implementing the
-  binding. Note: YAML doesn't allow leading tabs, so spaces must be used instead.
+  Optional. A list of one or more DTS hunks implementing this binding only.
+  Example should not contain unrelated device nodes, e.g. consumer nodes in a
+  provider binding, other nodes referenced by phandle.
+  Note: YAML doesn't allow leading tabs, so spaces must be used instead.
 
 Unless noted otherwise, all properties are required.
 
index 776eec72bc80a1176663daf2b76502163e230604..5d4451339b7fbc4f5fd9af0978a0118a35b0f592 100644 (file)
@@ -2,59 +2,16 @@
 
 .. include:: <isonum.txt>
 
+.. _media-ccs-driver:
+
 MIPI CCS camera sensor driver
 =============================
 
 The MIPI CCS camera sensor driver is a generic driver for `MIPI CCS
 <https://www.mipi.org/specifications/camera-command-set>`_ compliant
-camera sensors. It exposes three sub-devices representing the pixel array,
-the binner and the scaler.
-
-As the capabilities of individual devices vary, the driver exposes
-interfaces based on the capabilities that exist in hardware.
-
-Pixel Array sub-device
-----------------------
-
-The pixel array sub-device represents the camera sensor's pixel matrix, as well
-as analogue crop functionality present in many compliant devices. The analogue
-crop is configured using the ``V4L2_SEL_TGT_CROP`` on the source pad (0) of the
-entity. The size of the pixel matrix can be obtained by getting the
-``V4L2_SEL_TGT_NATIVE_SIZE`` target.
-
-Binner
-------
-
-The binner sub-device represents the binning functionality on the sensor. For
-that purpose, selection target ``V4L2_SEL_TGT_COMPOSE`` is supported on the
-sink pad (0).
-
-Additionally, if a device has no scaler or digital crop functionality, the
-source pad (1) exposes another digital crop selection rectangle that can only
-crop at the end of the lines and frames.
-
-Scaler
-------
-
-The scaler sub-device represents the digital crop and scaling functionality of
-the sensor. The V4L2 selection target ``V4L2_SEL_TGT_CROP`` is used to
-configure the digital crop on the sink pad (0) when digital crop is supported.
-Scaling is configured using selection target ``V4L2_SEL_TGT_COMPOSE`` on the
-sink pad (0) as well.
-
-Additionally, if the scaler sub-device exists, its source pad (1) exposes
-another digital crop selection rectangle that can only crop at the end of the
-lines and frames.
-
-Digital and analogue crop
--------------------------
-
-Digital crop functionality is referred to as cropping that effectively works by
-dropping some data on the floor. Analogue crop, on the other hand, means that
-the cropped information is never retrieved. In case of camera sensors, the
-analogue data is never read from the pixel matrix that are outside the
-configured selection rectangle that designates crop. The difference has an
-effect in device timing and likely also in power consumption.
+camera sensors.
+
+Also see :ref:`the CCS driver UAPI documentation <media-ccs-uapi>`.
 
 CCS static data
 ---------------
index 1db2ba27c54c3de68c50ddf6861b9f5a40d3a214..13aec460e802f62a91d17bae78a9ac537bf97a81 100644 (file)
@@ -229,7 +229,7 @@ Asynchronous sub-device notifier for sub-devices
 ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
 
 A driver that registers an asynchronous sub-device may also register an
-asynchronous notifier. This is called an asynchronous sub-device notifier andthe
+asynchronous notifier. This is called an asynchronous sub-device notifier and the
 process is similar to that of a bridge driver apart from that the notifier is
 initialised using :c:func:`v4l2_async_subdev_nf_init` instead. A sub-device
 notifier may complete only after the V4L2 device becomes available, i.e. there's
index b432a2de45d37b32001f9e2e41492a586a37bb8a..2a794484f62c970a95cd35fa688b98fc9bbf2a63 100644 (file)
@@ -324,12 +324,12 @@ framework, this stream state is linked to .hw_params() operation.
 
   int sdw_stream_add_master(struct sdw_bus * bus,
                struct sdw_stream_config * stream_config,
-               struct sdw_ports_config * ports_config,
+               const struct sdw_ports_config * ports_config,
                struct sdw_stream_runtime * stream);
 
   int sdw_stream_add_slave(struct sdw_slave * slave,
                struct sdw_stream_config * stream_config,
-               struct sdw_ports_config * ports_config,
+               const struct sdw_ports_config * ports_config,
                struct sdw_stream_runtime * stream);
 
 
diff --git a/Documentation/filesystems/bcachefs/errorcodes.rst b/Documentation/filesystems/bcachefs/errorcodes.rst
new file mode 100644 (file)
index 0000000..2cccaa0
--- /dev/null
@@ -0,0 +1,30 @@
+.. SPDX-License-Identifier: GPL-2.0
+
+bcachefs private error codes
+----------------------------
+
+In bcachefs, as a hard rule we do not throw or directly use standard error
+codes (-EINVAL, -EBUSY, etc.). Instead, we define private error codes as needed
+in fs/bcachefs/errcode.h.
+
+This gives us much better error messages and makes debugging much easier. Any
+direct uses of standard error codes you see in the source code are simply old
+code that has yet to be converted - feel free to clean it up!
+
+Private error codes may subtype another error code, this allows for grouping of
+related errors that should be handled similarly (e.g. transaction restart
+errors), as well as specifying which standard error code should be returned at
+the bcachefs module boundary.
+
+At the module boundary, we use bch2_err_class() to convert to a standard error
+code; this also emits a trace event so that the original error code be
+recovered even if it wasn't logged.
+
+Do not reuse error codes! Generally speaking, a private error code should only
+be thrown in one place. That means that when we see it in a log message we can
+see, unambiguously, exactly which file and line number it was returned from.
+
+Try to give error codes names that are as reasonably descriptive of the error
+as possible. Frequently, the error will be logged at a place far removed from
+where the error was generated; good names for error codes mean much more
+descriptive and useful error messages.
index d32c6209685d64bae996ed110ea9cfe7543fa14c..efc3493fd6f84b92b986da63c44e8629bd042f02 100644 (file)
@@ -126,9 +126,7 @@ norecovery           Disable the roll-forward recovery routine, mounted read-
 discard/nodiscard       Enable/disable real-time discard in f2fs, if discard is
                         enabled, f2fs will issue discard/TRIM commands when a
                         segment is cleaned.
-no_heap                         Disable heap-style segment allocation which finds free
-                        segments for data from the beginning of main area, while
-                        for node from the end of main area.
+heap/no_heap            Deprecated.
 nouser_xattr            Disable Extended User Attributes. Note: xattr is enabled
                         by default if CONFIG_F2FS_FS_XATTR is selected.
 noacl                   Disable POSIX Access Control List. Note: acl is enabled
@@ -184,29 +182,31 @@ fault_type=%d              Support configuring fault injection type, should be
                         enabled with fault_injection option, fault type value
                         is shown below, it supports single or combined type.
 
-                        ===================      ===========
-                        Type_Name                Type_Value
-                        ===================      ===========
-                        FAULT_KMALLOC            0x000000001
-                        FAULT_KVMALLOC           0x000000002
-                        FAULT_PAGE_ALLOC         0x000000004
-                        FAULT_PAGE_GET           0x000000008
-                        FAULT_ALLOC_BIO          0x000000010 (obsolete)
-                        FAULT_ALLOC_NID          0x000000020
-                        FAULT_ORPHAN             0x000000040
-                        FAULT_BLOCK              0x000000080
-                        FAULT_DIR_DEPTH          0x000000100
-                        FAULT_EVICT_INODE        0x000000200
-                        FAULT_TRUNCATE           0x000000400
-                        FAULT_READ_IO            0x000000800
-                        FAULT_CHECKPOINT         0x000001000
-                        FAULT_DISCARD            0x000002000
-                        FAULT_WRITE_IO           0x000004000
-                        FAULT_SLAB_ALLOC         0x000008000
-                        FAULT_DQUOT_INIT         0x000010000
-                        FAULT_LOCK_OP            0x000020000
-                        FAULT_BLKADDR            0x000040000
-                        ===================      ===========
+                        ===========================      ===========
+                        Type_Name                        Type_Value
+                        ===========================      ===========
+                        FAULT_KMALLOC                    0x000000001
+                        FAULT_KVMALLOC                   0x000000002
+                        FAULT_PAGE_ALLOC                 0x000000004
+                        FAULT_PAGE_GET                   0x000000008
+                        FAULT_ALLOC_BIO                  0x000000010 (obsolete)
+                        FAULT_ALLOC_NID                  0x000000020
+                        FAULT_ORPHAN                     0x000000040
+                        FAULT_BLOCK                      0x000000080
+                        FAULT_DIR_DEPTH                  0x000000100
+                        FAULT_EVICT_INODE                0x000000200
+                        FAULT_TRUNCATE                   0x000000400
+                        FAULT_READ_IO                    0x000000800
+                        FAULT_CHECKPOINT                 0x000001000
+                        FAULT_DISCARD                    0x000002000
+                        FAULT_WRITE_IO                   0x000004000
+                        FAULT_SLAB_ALLOC                 0x000008000
+                        FAULT_DQUOT_INIT                 0x000010000
+                        FAULT_LOCK_OP                    0x000020000
+                        FAULT_BLKADDR_VALIDITY           0x000040000
+                        FAULT_BLKADDR_CONSISTENCE        0x000080000
+                        FAULT_NO_SEGMENT                 0x000100000
+                        ===========================      ===========
 mode=%s                         Control block allocation mode which supports "adaptive"
                         and "lfs". In "lfs" mode, there should be no random
                         writes towards main area.
@@ -228,8 +228,6 @@ mode=%s                      Control block allocation mode which supports "adaptive"
                         option for more randomness.
                         Please, use these options for your experiments and we strongly
                         recommend to re-format the filesystem after using these options.
-io_bits=%u              Set the bit size of write IO requests. It should be set
-                        with "mode=lfs".
 usrquota                Enable plain user disk quota accounting.
 grpquota                Enable plain group disk quota accounting.
 prjquota                Enable plain project quota accounting.
index d6b61d22f52584a66f5f8aed51b0a9ce0d93c2d6..c52b9da08fa9a2619ee45950b304a0838f735348 100644 (file)
@@ -32,6 +32,10 @@ configuration::
   CONFIG_ACPI_APEI
   CONFIG_ACPI_APEI_EINJ
 
+...and to (optionally) enable CXL protocol error injection set::
+
+  CONFIG_ACPI_APEI_EINJ_CXL
+
 The EINJ user interface is in <debugfs mount point>/apei/einj.
 
 The following files belong to it:
@@ -118,6 +122,24 @@ The following files belong to it:
   this actually works depends on what operations the BIOS actually
   includes in the trigger phase.
 
+CXL error types are supported from ACPI 6.5 onwards (given a CXL port
+is present). The EINJ user interface for CXL error types is at
+<debugfs mount point>/cxl. The following files belong to it:
+
+- einj_types:
+
+  Provides the same functionality as available_error_types above, but
+  for CXL error types
+
+- $dport_dev/einj_inject:
+
+  Injects a CXL error type into the CXL port represented by $dport_dev,
+  where $dport_dev is the name of the CXL port (usually a PCIe device name).
+  Error injections targeting a CXL 2.0+ port can use the legacy interface
+  under <debugfs mount point>/apei/einj, while CXL 1.1/1.0 port injections
+  must use this file.
+
+
 BIOS versions based on the ACPI 4.0 specification have limited options
 in controlling where the errors are injected. Your BIOS may support an
 extension (enabled with the param_extension=1 module parameter, or boot
@@ -181,6 +203,18 @@ You should see something like this in dmesg::
   [22715.834759] EDAC sbridge MC3: PROCESSOR 0:306e7 TIME 1422553404 SOCKET 0 APIC 0
   [22716.616173] EDAC MC3: 1 CE memory read error on CPU_SrcID#0_Channel#0_DIMM#0 (channel:0 slot:0 page:0x12345 offset:0x0 grain:32 syndrome:0x0 -  area:DRAM err_code:0001:0090 socket:0 channel_mask:1 rank:0)
 
+A CXL error injection example with $dport_dev=0000:e0:01.1::
+
+    # cd /sys/kernel/debug/cxl/
+    # ls
+    0000:e0:01.1 0000:0c:00.0
+    # cat einj_types                # See which errors can be injected
+       0x00008000  CXL.mem Protocol Correctable
+       0x00010000  CXL.mem Protocol Uncorrectable non-fatal
+       0x00020000  CXL.mem Protocol Uncorrectable fatal
+    # cd 0000:e0:01.1               # Navigate to dport to inject into
+    # echo 0x8000 > einj_inject     # Inject error
+
 Special notes for injection into SGX enclaves:
 
 There may be a separate BIOS setup option to enable SGX injection.
index 41ddc10f1ac7b5dd860a829a4723b25b47ae5c4d..0b8439ea954ccdbd8fbf1f8c6564617298cc540c 100644 (file)
@@ -48,10 +48,6 @@ driver model device node, and its I2C address.
        .id_table       = foo_idtable,
        .probe          = foo_probe,
        .remove         = foo_remove,
-       /* if device autodetection is needed: */
-       .class          = I2C_CLASS_SOMETHING,
-       .detect         = foo_detect,
-       .address_list   = normal_i2c,
 
        .shutdown       = foo_shutdown, /* optional */
        .command        = foo_command,  /* optional, deprecated */
@@ -203,27 +199,8 @@ reference for later use.
 Device Detection
 ----------------
 
-Sometimes you do not know in advance which I2C devices are connected to
-a given I2C bus.  This is for example the case of hardware monitoring
-devices on a PC's SMBus.  In that case, you may want to let your driver
-detect supported devices automatically.  This is how the legacy model
-was working, and is now available as an extension to the standard
-driver model.
-
-You simply have to define a detect callback which will attempt to
-identify supported devices (returning 0 for supported ones and -ENODEV
-for unsupported ones), a list of addresses to probe, and a device type
-(or class) so that only I2C buses which may have that type of device
-connected (and not otherwise enumerated) will be probed.  For example,
-a driver for a hardware monitoring chip for which auto-detection is
-needed would set its class to I2C_CLASS_HWMON, and only I2C adapters
-with a class including I2C_CLASS_HWMON would be probed by this driver.
-Note that the absence of matching classes does not prevent the use of
-a device of that type on the given I2C adapter.  All it prevents is
-auto-detection; explicit instantiation of devices is still possible.
-
-Note that this mechanism is purely optional and not suitable for all
-devices.  You need some reliable way to identify the supported devices
+The device detection mechanism comes with a number of disadvantages.
+You need some reliable way to identify the supported devices
 (typically using device-specific, dedicated identification registers),
 otherwise misdetections are likely to occur and things can get wrong
 quickly.  Keep in mind that the I2C protocol doesn't include any
@@ -231,9 +208,8 @@ standard way to detect the presence of a chip at a given address, let
 alone a standard way to identify devices.  Even worse is the lack of
 semantics associated to bus transfers, which means that the same
 transfer can be seen as a read operation by a chip and as a write
-operation by another chip.  For these reasons, explicit device
-instantiation should always be preferred to auto-detection where
-possible.
+operation by another chip.  For these reasons, device detection is
+considered a legacy mechanism and shouldn't be used in new code.
 
 
 Device Deletion
index 8c8a289d69a3672260bcb711dd027460ec52dcdc..698e6630f3a7b3c3a66cadc85b26750c1478b33e 100644 (file)
@@ -29,7 +29,7 @@ follows:
   - Does not support shared LDPC code table wraparound
 
 The device tree entry is described in:
-`linux-xlnx/Documentation/devicetree/bindings/misc/xlnx,sd-fec.txt <https://github.com/Xilinx/linux-xlnx/blob/master/Documentation/devicetree/bindings/misc/xlnx%2Csd-fec.txt>`_
+`linux-xlnx/Documentation/devicetree/bindings/misc/xlnx,sd-fec.yaml <https://github.com/Xilinx/linux-xlnx/blob/master/Documentation/devicetree/bindings/misc/xlnx%2Csd-fec.yaml>`_
 
 
 Modes of Operation
index 1bb69524a62ea6fc7efea49de8735558cdf3d57a..5620aab9b38507ab5bca91b797eea7d575a89641 100644 (file)
@@ -31,6 +31,8 @@ DAMON subsystem is configured with three layers including
   interfaces for the user space, on top of the core layer.
 
 
+.. _damon_design_configurable_operations_set:
+
 Configurable Operations Set
 ---------------------------
 
@@ -63,6 +65,8 @@ modules that built on top of the core layer using the API, which can be easily
 used by the user space end users.
 
 
+.. _damon_operations_set:
+
 Operations Set Layer
 ====================
 
@@ -71,16 +75,26 @@ The monitoring operations are defined in two parts:
 1. Identification of the monitoring target address range for the address space.
 2. Access check of specific address range in the target space.
 
-DAMON currently provides the implementations of the operations for the physical
-and virtual address spaces. Below two subsections describe how those work.
+DAMON currently provides below three operation sets.  Below two subsections
+describe how those work.
+
+ - vaddr: Monitor virtual address spaces of specific processes
+ - fvaddr: Monitor fixed virtual address ranges
+ - paddr: Monitor the physical address space of the system
 
 
+ .. _damon_design_vaddr_target_regions_construction:
+
 VMA-based Target Address Range Construction
 -------------------------------------------
 
-This is only for the virtual address space monitoring operations
-implementation.  That for the physical address space simply asks users to
-manually set the monitoring target address ranges.
+A mechanism of ``vaddr`` DAMON operations set that automatically initializes
+and updates the monitoring target address regions so that entire memory
+mappings of the target processes can be covered.
+
+This mechanism is only for the ``vaddr`` operations set.  In cases of
+``fvaddr`` and ``paddr`` operation sets, users are asked to manually set the
+monitoring target address ranges.
 
 Only small parts in the super-huge virtual address space of the processes are
 mapped to the physical memory and accessed.  Thus, tracking the unmapped
@@ -294,9 +308,29 @@ not mandated to support all actions of the list.  Hence, the availability of
 specific DAMOS action depends on what operations set is selected to be used
 together.
 
-Applying an action to a region is considered as changing the region's
-characteristics.  Hence, DAMOS resets the age of regions when an action is
-applied to those.
+The list of the supported actions, their meaning, and DAMON operations sets
+that supports each action are as below.
+
+ - ``willneed``: Call ``madvise()`` for the region with ``MADV_WILLNEED``.
+   Supported by ``vaddr`` and ``fvaddr`` operations set.
+ - ``cold``: Call ``madvise()`` for the region with ``MADV_COLD``.
+   Supported by ``vaddr`` and ``fvaddr`` operations set.
+ - ``pageout``: Reclaim the region.
+   Supported by ``vaddr``, ``fvaddr`` and ``paddr`` operations set.
+ - ``hugepage``: Call ``madvise()`` for the region with ``MADV_HUGEPAGE``.
+   Supported by ``vaddr`` and ``fvaddr`` operations set.
+ - ``nohugepage``: Call ``madvise()`` for the region with ``MADV_NOHUGEPAGE``.
+   Supported by ``vaddr`` and ``fvaddr`` operations set.
+ - ``lru_prio``: Prioritize the region on its LRU lists.
+   Supported by ``paddr`` operations set.
+ - ``lru_deprio``: Deprioritize the region on its LRU lists.
+   Supported by ``paddr`` operations set.
+ - ``stat``: Do nothing but count the statistics.
+   Supported by all operations sets.
+
+Applying the actions except ``stat`` to a region is considered as changing the
+region's characteristics.  Hence, DAMOS resets the age of regions when any such
+actions are applied to those.
 
 
 .. _damon_design_damos_access_pattern:
@@ -364,12 +398,28 @@ Aim-oriented Feedback-driven Auto-tuning
 ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
 
 Automatic feedback-driven quota tuning.  Instead of setting the absolute quota
-value, users can repeatedly provide numbers representing how much of their goal
-for the scheme is achieved as feedback.  DAMOS then automatically tunes the
+value, users can specify the metric of their interest, and what target value
+they want the metric value to be.  DAMOS then automatically tunes the
 aggressiveness (the quota) of the corresponding scheme.  For example, if DAMOS
 is under achieving the goal, DAMOS automatically increases the quota.  If DAMOS
 is over achieving the goal, it decreases the quota.
 
+The goal can be specified with three parameters, namely ``target_metric``,
+``target_value``, and ``current_value``.  The auto-tuning mechanism tries to
+make ``current_value`` of ``target_metric`` be same to ``target_value``.
+Currently, two ``target_metric`` are provided.
+
+- ``user_input``: User-provided value.  Users could use any metric that they
+  has interest in for the value.  Use space main workload's latency or
+  throughput, system metrics like free memory ratio or memory pressure stall
+  time (PSI) could be examples.  Note that users should explicitly set
+  ``current_value`` on their own in this case.  In other words, users should
+  repeatedly provide the feedback.
+- ``some_mem_psi_us``: System-wide ``some`` memory pressure stall information
+  in microseconds that measured from last quota reset to next quota reset.
+  DAMOS does the measurement on its own, so only ``target_value`` need to be
+  set by users at the initial time.  In other words, DAMOS does self-feedback.
+
 
 .. _damon_design_damos_watermarks:
 
index a84c14e5905307d50817733bc9e0ab3ef37b904f..5a306e4de22e50d78e4d76213690b274e44af66b 100644 (file)
@@ -21,8 +21,8 @@ be queued in mm-stable [3]_ , and finally pull-requested to the mainline by the
 memory management subsystem maintainer.
 
 Note again the patches for review should be made against the mm-unstable
-tree[1] whenever possible.  damon/next is only for preview of others' works in
-progress.
+tree [1]_ whenever possible.  damon/next is only for preview of others' works
+in progress.
 
 Submit checklist addendum
 -------------------------
@@ -41,8 +41,8 @@ Further doing below and putting the results will be helpful.
 Key cycle dates
 ---------------
 
-Patches can be sent anytime.  Key cycle dates of the mm-unstable[1] and
-mm-stable[3] trees depend on the memory management subsystem maintainer.
+Patches can be sent anytime.  Key cycle dates of the mm-unstable [1]_ and
+mm-stable [3]_ trees depend on the memory management subsystem maintainer.
 
 Review cadence
 --------------
index 62e3f7ab23cc18b131276cc778ed4a5e0ffdb093..0d0334cd51798b63af73cb86f891c07e1c7e587c 100644 (file)
@@ -24,6 +24,11 @@ fragmentation statistics can be obtained through gfp flag information of
 each page. It is already implemented and activated if page owner is
 enabled. Other usages are more than welcome.
 
+It can also be used to show all the stacks and their outstanding
+allocations, which gives us a quick overview of where the memory is going
+without the need to screen through all the pages and match the allocation
+and free operation.
+
 page owner is disabled by default. So, if you'd like to use it, you need
 to add "page_owner=on" to your boot cmdline. If the kernel is built
 with page owner and page owner is disabled in runtime due to not enabling
@@ -68,6 +73,46 @@ Usage
 
 4) Analyze information from page owner::
 
+       cat /sys/kernel/debug/page_owner_stacks/show_stacks > stacks.txt
+       cat stacks.txt
+        prep_new_page+0xa9/0x120
+        get_page_from_freelist+0x7e6/0x2140
+        __alloc_pages+0x18a/0x370
+        new_slab+0xc8/0x580
+        ___slab_alloc+0x1f2/0xaf0
+        __slab_alloc.isra.86+0x22/0x40
+        kmem_cache_alloc+0x31b/0x350
+        __khugepaged_enter+0x39/0x100
+        dup_mmap+0x1c7/0x5ce
+        copy_process+0x1afe/0x1c90
+        kernel_clone+0x9a/0x3c0
+        __do_sys_clone+0x66/0x90
+        do_syscall_64+0x7f/0x160
+        entry_SYSCALL_64_after_hwframe+0x6c/0x74
+       stack_count: 234
+       ...
+       ...
+       echo 7000 > /sys/kernel/debug/page_owner_stacks/count_threshold
+       cat /sys/kernel/debug/page_owner_stacks/show_stacks> stacks_7000.txt
+       cat stacks_7000.txt
+        prep_new_page+0xa9/0x120
+        get_page_from_freelist+0x7e6/0x2140
+        __alloc_pages+0x18a/0x370
+        alloc_pages_mpol+0xdf/0x1e0
+        folio_alloc+0x14/0x50
+        filemap_alloc_folio+0xb0/0x100
+        page_cache_ra_unbounded+0x97/0x180
+        filemap_fault+0x4b4/0x1200
+        __do_fault+0x2d/0x110
+        do_pte_missing+0x4b0/0xa30
+        __handle_mm_fault+0x7fa/0xb70
+        handle_mm_fault+0x125/0x300
+        do_user_addr_fault+0x3c9/0x840
+        exc_page_fault+0x68/0x150
+        asm_exc_page_fault+0x22/0x30
+       stack_count: 8248
+       ...
+
        cat /sys/kernel/debug/page_owner > page_owner_full.txt
        ./page_owner_sort page_owner_full.txt sorted_page_owner.txt
 
index ca611c9c2d1e671549bfbf317d9fb0a52104bd2c..7ef8de58f7f89252fb4e9330542d622d969d6a4e 100644 (file)
@@ -30,7 +30,7 @@ you probably needn't concern yourself with pcmciautils.
         Program        Minimal version       Command to check the version
 ====================== ===============  ========================================
 GNU C                  5.1              gcc --version
-Clang/LLVM (optional)  11.0.0           clang --version
+Clang/LLVM (optional)  13.0.1           clang --version
 Rust (optional)        1.76.0           rustc --version
 bindgen (optional)     0.65.1           bindgen --version
 GNU make               3.82             make --version
index 17b9949d9b43578c16665541d6c5a951382927a0..da2745464ece453d90af0dc97d3cf57427e3518f 100644 (file)
@@ -344,7 +344,7 @@ debugfs接口
   :ref:`sysfs接口<sysfs_interface>`。
 
 DAMON导出了八个文件, ``attrs``, ``target_ids``, ``init_regions``,
-``schemes``, ``monitor_on``, ``kdamond_pid``, ``mk_contexts`` 和
+``schemes``, ``monitor_on_DEPRECATED``, ``kdamond_pid``, ``mk_contexts`` 和
 ``rm_contexts`` under its debugfs directory, ``<debugfs>/damon/``.
 
 
@@ -521,15 +521,15 @@ DAMON导出了八个文件, ``attrs``, ``target_ids``, ``init_regions``,
 开关
 ----
 
-除非你明确地启动监测,否则如上所述的文件设置不会产生效果。你可以通过写入和读取 ``monitor_on``
+除非你明确地启动监测,否则如上所述的文件设置不会产生效果。你可以通过写入和读取 ``monitor_on_DEPRECATED``
 文件来启动、停止和检查监测的当前状态。写入 ``on`` 该文件可以启动对有属性的目标的监测。写入
 ``off`` 该文件则停止这些目标。如果每个目标进程被终止,DAMON也会停止。下面的示例命令开启、关
 闭和检查DAMON的状态::
 
     # cd <debugfs>/damon
-    # echo on > monitor_on
-    # echo off > monitor_on
-    # cat monitor_on
+    # echo on > monitor_on_DEPRECATED
+    # echo off > monitor_on_DEPRECATED
+    # cat monitor_on_DEPRECATED
     off
 
 请注意,当监测开启时,你不能写到上述的debugfs文件。如果你在DAMON运行时写到这些文件,将会返
@@ -543,11 +543,11 @@ DAMON通过一个叫做kdamond的内核线程来进行请求监测。你可以
 得该线程的 ``pid`` 。当监测被 ``关闭`` 时,读取该文件不会返回任何信息::
 
     # cd <debugfs>/damon
-    # cat monitor_on
+    # cat monitor_on_DEPRECATED
     off
     # cat kdamond_pid
     none
-    # echo on > monitor_on
+    # echo on > monitor_on_DEPRECATED
     # cat kdamond_pid
     18594
 
@@ -574,7 +574,7 @@ DAMON通过一个叫做kdamond的内核线程来进行请求监测。你可以
     # ls foo
     # ls: cannot access 'foo': No such file or directory
 
-注意, ``mk_contexts`` 、 ``rm_contexts`` 和 ``monitor_on`` 文件只在根目录下。
+注意, ``mk_contexts`` 、 ``rm_contexts`` 和 ``monitor_on_DEPRECATED`` 文件只在根目录下。
 
 
 监测结果的监测点
@@ -583,9 +583,9 @@ DAMON通过一个叫做kdamond的内核线程来进行请求监测。你可以
 DAMON通过一个tracepoint ``damon:damon_aggregated`` 提供监测结果.  当监测开启时,你可
 以记录追踪点事件,并使用追踪点支持工具如perf显示结果。比如说::
 
-    # echo on > monitor_on
+    # echo on > monitor_on_DEPRECATED
     # perf record -e damon:damon_aggregated &
     # sleep 5
     # kill 9 $(pidof perf)
-    # echo off > monitor_on
+    # echo off > monitor_on_DEPRECATED
     # perf script
index 8fdb20c9665b4b5e8a9afe12fb361f9a1d2315d7..2b1e8f74904b01f2b29d44f729e750843005ed72 100644 (file)
@@ -137,7 +137,7 @@ KASAN受到通用 ``panic_on_warn`` 命令行参数的影响。当它被启用
 典型的KASAN报告如下所示::
 
     ==================================================================
-    BUG: KASAN: slab-out-of-bounds in kmalloc_oob_right+0xa8/0xbc [test_kasan]
+    BUG: KASAN: slab-out-of-bounds in kmalloc_oob_right+0xa8/0xbc [kasan_test]
     Write of size 1 at addr ffff8801f44ec37b by task insmod/2760
 
     CPU: 1 PID: 2760 Comm: insmod Not tainted 4.19.0-rc3+ #698
@@ -147,8 +147,8 @@ KASAN受到通用 ``panic_on_warn`` 命令行参数的影响。当它被启用
      print_address_description+0x73/0x280
      kasan_report+0x144/0x187
      __asan_report_store1_noabort+0x17/0x20
-     kmalloc_oob_right+0xa8/0xbc [test_kasan]
-     kmalloc_tests_init+0x16/0x700 [test_kasan]
+     kmalloc_oob_right+0xa8/0xbc [kasan_test]
+     kmalloc_tests_init+0x16/0x700 [kasan_test]
      do_one_initcall+0xa5/0x3ae
      do_init_module+0x1b6/0x547
      load_module+0x75df/0x8070
@@ -168,8 +168,8 @@ KASAN受到通用 ``panic_on_warn`` 命令行参数的影响。当它被启用
      save_stack+0x43/0xd0
      kasan_kmalloc+0xa7/0xd0
      kmem_cache_alloc_trace+0xe1/0x1b0
-     kmalloc_oob_right+0x56/0xbc [test_kasan]
-     kmalloc_tests_init+0x16/0x700 [test_kasan]
+     kmalloc_oob_right+0x56/0xbc [kasan_test]
+     kmalloc_tests_init+0x16/0x700 [kasan_test]
      do_one_initcall+0xa5/0x3ae
      do_init_module+0x1b6/0x547
      load_module+0x75df/0x8070
@@ -421,15 +421,15 @@ KASAN连接到vmap基础架构以懒清理未使用的影子内存。
 
 当由于 ``kmalloc`` 失败而导致测试失败时::
 
-        # kmalloc_large_oob_right: ASSERTION FAILED at lib/test_kasan.c:163
+        # kmalloc_large_oob_right: ASSERTION FAILED at mm/kasan/kasan_test.c:245
         Expected ptr is not null, but is
-        not ok 4 - kmalloc_large_oob_right
+        not ok 5 - kmalloc_large_oob_right
 
 当由于缺少KASAN报告而导致测试失败时::
 
-        # kmalloc_double_kzfree: EXPECTATION FAILED at lib/test_kasan.c:974
+        # kmalloc_double_kzfree: EXPECTATION FAILED at mm/kasan/kasan_test.c:709
         KASAN failure expected in "kfree_sensitive(ptr)", but none occurred
-        not ok 44 - kmalloc_double_kzfree
+        not ok 28 - kmalloc_double_kzfree
 
 
 最后打印所有KASAN测试的累积状态。成功::
@@ -445,7 +445,7 @@ KASAN连接到vmap基础架构以懒清理未使用的影子内存。
 1. 可加载模块
 
    启用 ``CONFIG_KUNIT`` 后,KASAN-KUnit测试可以构建为可加载模块,并通过使用
-   ``insmod`` 或 ``modprobe`` 加载 ``test_kasan.ko`` 来运行。
+   ``insmod`` 或 ``modprobe`` 加载 ``kasan_test.ko`` 来运行。
 
 2. 内置
 
index 6dee719a32ea6158fbc52bb02d8b567b7d01d02b..7464279f9b7de024e0470985f0d4b7a53d7f1375 100644 (file)
@@ -344,7 +344,7 @@ debugfs接口
   :ref:`sysfs接口<sysfs_interface>`。
 
 DAMON導出了八個文件, ``attrs``, ``target_ids``, ``init_regions``,
-``schemes``, ``monitor_on``, ``kdamond_pid``, ``mk_contexts`` 和
+``schemes``, ``monitor_on_DEPRECATED``, ``kdamond_pid``, ``mk_contexts`` 和
 ``rm_contexts`` under its debugfs directory, ``<debugfs>/damon/``.
 
 
@@ -521,15 +521,15 @@ DAMON導出了八個文件, ``attrs``, ``target_ids``, ``init_regions``,
 開關
 ----
 
-除非你明確地啓動監測,否則如上所述的文件設置不會產生效果。你可以通過寫入和讀取 ``monitor_on``
+除非你明確地啓動監測,否則如上所述的文件設置不會產生效果。你可以通過寫入和讀取 ``monitor_on_DEPRECATED``
 文件來啓動、停止和檢查監測的當前狀態。寫入 ``on`` 該文件可以啓動對有屬性的目標的監測。寫入
 ``off`` 該文件則停止這些目標。如果每個目標進程被終止,DAMON也會停止。下面的示例命令開啓、關
 閉和檢查DAMON的狀態::
 
     # cd <debugfs>/damon
-    # echo on > monitor_on
-    # echo off > monitor_on
-    # cat monitor_on
+    # echo on > monitor_on_DEPRECATED
+    # echo off > monitor_on_DEPRECATED
+    # cat monitor_on_DEPRECATED
     off
 
 請注意,當監測開啓時,你不能寫到上述的debugfs文件。如果你在DAMON運行時寫到這些文件,將會返
@@ -543,11 +543,11 @@ DAMON通過一個叫做kdamond的內核線程來進行請求監測。你可以
 得該線程的 ``pid`` 。當監測被 ``關閉`` 時,讀取該文件不會返回任何信息::
 
     # cd <debugfs>/damon
-    # cat monitor_on
+    # cat monitor_on_DEPRECATED
     off
     # cat kdamond_pid
     none
-    # echo on > monitor_on
+    # echo on > monitor_on_DEPRECATED
     # cat kdamond_pid
     18594
 
@@ -574,7 +574,7 @@ DAMON通過一個叫做kdamond的內核線程來進行請求監測。你可以
     # ls foo
     # ls: cannot access 'foo': No such file or directory
 
-注意, ``mk_contexts`` 、 ``rm_contexts`` 和 ``monitor_on`` 文件只在根目錄下。
+注意, ``mk_contexts`` 、 ``rm_contexts`` 和 ``monitor_on_DEPRECATED`` 文件只在根目錄下。
 
 
 監測結果的監測點
@@ -583,10 +583,10 @@ DAMON通過一個叫做kdamond的內核線程來進行請求監測。你可以
 DAMON通過一個tracepoint ``damon:damon_aggregated`` 提供監測結果.  當監測開啓時,你可
 以記錄追蹤點事件,並使用追蹤點支持工具如perf顯示結果。比如說::
 
-    # echo on > monitor_on
+    # echo on > monitor_on_DEPRECATED
     # perf record -e damon:damon_aggregated &
     # sleep 5
     # kill 9 $(pidof perf)
-    # echo off > monitor_on
+    # echo off > monitor_on_DEPRECATED
     # perf script
 
index 979eb84bc58f1a351d6f9b3228a3fbd942f0882c..ed342e67d8ed0243c8ef6efa994e5e7deb9a0060 100644 (file)
@@ -137,7 +137,7 @@ KASAN受到通用 ``panic_on_warn`` 命令行參數的影響。當它被啓用
 典型的KASAN報告如下所示::
 
     ==================================================================
-    BUG: KASAN: slab-out-of-bounds in kmalloc_oob_right+0xa8/0xbc [test_kasan]
+    BUG: KASAN: slab-out-of-bounds in kmalloc_oob_right+0xa8/0xbc [kasan_test]
     Write of size 1 at addr ffff8801f44ec37b by task insmod/2760
 
     CPU: 1 PID: 2760 Comm: insmod Not tainted 4.19.0-rc3+ #698
@@ -147,8 +147,8 @@ KASAN受到通用 ``panic_on_warn`` 命令行參數的影響。當它被啓用
      print_address_description+0x73/0x280
      kasan_report+0x144/0x187
      __asan_report_store1_noabort+0x17/0x20
-     kmalloc_oob_right+0xa8/0xbc [test_kasan]
-     kmalloc_tests_init+0x16/0x700 [test_kasan]
+     kmalloc_oob_right+0xa8/0xbc [kasan_test]
+     kmalloc_tests_init+0x16/0x700 [kasan_test]
      do_one_initcall+0xa5/0x3ae
      do_init_module+0x1b6/0x547
      load_module+0x75df/0x8070
@@ -168,8 +168,8 @@ KASAN受到通用 ``panic_on_warn`` 命令行參數的影響。當它被啓用
      save_stack+0x43/0xd0
      kasan_kmalloc+0xa7/0xd0
      kmem_cache_alloc_trace+0xe1/0x1b0
-     kmalloc_oob_right+0x56/0xbc [test_kasan]
-     kmalloc_tests_init+0x16/0x700 [test_kasan]
+     kmalloc_oob_right+0x56/0xbc [kasan_test]
+     kmalloc_tests_init+0x16/0x700 [kasan_test]
      do_one_initcall+0xa5/0x3ae
      do_init_module+0x1b6/0x547
      load_module+0x75df/0x8070
@@ -421,15 +421,15 @@ KASAN連接到vmap基礎架構以懶清理未使用的影子內存。
 
 當由於 ``kmalloc`` 失敗而導致測試失敗時::
 
-        # kmalloc_large_oob_right: ASSERTION FAILED at lib/test_kasan.c:163
+        # kmalloc_large_oob_right: ASSERTION FAILED at mm/kasan/kasan_test.c:245
         Expected ptr is not null, but is
-        not ok 4 - kmalloc_large_oob_right
+        not ok 5 - kmalloc_large_oob_right
 
 當由於缺少KASAN報告而導致測試失敗時::
 
-        # kmalloc_double_kzfree: EXPECTATION FAILED at lib/test_kasan.c:974
+        # kmalloc_double_kzfree: EXPECTATION FAILED at mm/kasan/kasan_test.c:709
         KASAN failure expected in "kfree_sensitive(ptr)", but none occurred
-        not ok 44 - kmalloc_double_kzfree
+        not ok 28 - kmalloc_double_kzfree
 
 
 最後打印所有KASAN測試的累積狀態。成功::
@@ -445,7 +445,7 @@ KASAN連接到vmap基礎架構以懶清理未使用的影子內存。
 1. 可加載模塊
 
    啓用 ``CONFIG_KUNIT`` 後,KASAN-KUnit測試可以構建爲可加載模塊,並通過使用
-   ``insmod`` 或 ``modprobe`` 加載 ``test_kasan.ko`` 來運行。
+   ``insmod`` 或 ``modprobe`` 加載 ``kasan_test.ko`` 來運行。
 
 2. 內置
 
index 161cb65f4d98a64c5d951a8e91572f0517d5180c..03015b33d5abbea88ecebdf3ec760bd22ddaadf8 100644 (file)
@@ -2,6 +2,8 @@
 
 .. include:: <isonum.txt>
 
+.. _media-ccs-uapi:
+
 MIPI CCS camera sensor driver
 =============================
 
@@ -13,6 +15,8 @@ the binner and the scaler.
 As the capabilities of individual devices vary, the driver exposes
 interfaces based on the capabilities that exist in hardware.
 
+Also see :ref:`the CCS driver kernel documentation <media-ccs-driver>`.
+
 Pixel Array sub-device
 ----------------------
 
@@ -30,7 +34,7 @@ that purpose, selection target ``V4L2_SEL_TGT_COMPOSE`` is supported on the
 sink pad (0).
 
 Additionally, if a device has no scaler or digital crop functionality, the
-source pad (1) expses another digital crop selection rectangle that can only
+source pad (1) exposes another digital crop selection rectangle that can only
 crop at the end of the lines and frames.
 
 Scaler
index b97d56ee543cf51ecf54aea51e56f98874554616..ffe8325749e5f4145212f7c8359008b44a06c5e8 100644 (file)
@@ -23,3 +23,4 @@ DVB-S2, DVB-T2, ISDB, etc.
     :maxdepth: 1
 
     frontend_legacy_dvbv3_api
+    legacy_dvb_decoder_api
diff --git a/Documentation/userspace-api/media/dvb/legacy_dvb_audio.rst b/Documentation/userspace-api/media/dvb/legacy_dvb_audio.rst
new file mode 100644 (file)
index 0000000..b46fe2b
--- /dev/null
@@ -0,0 +1,1642 @@
+.. SPDX-License-Identifier: GFDL-1.1-no-invariants-or-later OR GPL-2.0
+
+.. c:namespace:: dtv.legacy.audio
+
+.. _dvb_audio:
+
+================
+DVB Audio Device
+================
+
+.. attention:: Do **not** use in new drivers!
+             See: :ref:`legacy_dvb_decoder_notes`
+
+The DVB audio device controls the MPEG2 audio decoder of the DVB
+hardware. It can be accessed through ``/dev/dvb/adapter?/audio?``. Data
+types and ioctl definitions can be accessed by including
+``linux/dvb/audio.h`` in your application.
+
+Please note that most DVB cards don’t have their own MPEG decoder, which
+results in the omission of the audio and video device.
+
+These ioctls were also used by V4L2 to control MPEG decoders implemented
+in V4L2. The use of these ioctls for that purpose has been made obsolete
+and proper V4L2 ioctls or controls have been created to replace that
+functionality. Use :ref:`V4L2 ioctls<audio>` for new drivers!
+
+
+Audio Data Types
+================
+
+This section describes the structures, data types and defines used when
+talking to the audio device.
+
+
+-----
+
+
+audio_stream_source_t
+---------------------
+
+Synopsis
+~~~~~~~~
+
+.. c:enum:: audio_stream_source_t
+
+.. code-block:: c
+
+    typedef enum {
+    AUDIO_SOURCE_DEMUX,
+    AUDIO_SOURCE_MEMORY
+    } audio_stream_source_t;
+
+Constants
+~~~~~~~~~
+
+.. flat-table::
+    :header-rows:  0
+    :stub-columns: 0
+
+    -  ..
+
+       -  ``AUDIO_SOURCE_DEMUX``
+
+       -  :cspan:`1` Selects the demultiplexer (fed either by the frontend
+          or the DVR device) as the source of the video stream.
+
+    -  ..
+
+       -  ``AUDIO_SOURCE_MEMORY``
+
+       -  Selects the stream from the application that comes through
+          the `write()`_ system call.
+
+Description
+~~~~~~~~~~~
+
+The audio stream source is set through the `AUDIO_SELECT_SOURCE`_ call
+and can take the following values, depending on whether we are replaying
+from an internal (demux) or external (user write) source.
+
+The data fed to the decoder is also controlled by the PID-filter.
+Output selection: :c:type:`dmx_output` ``DMX_OUT_DECODER``.
+
+
+-----
+
+
+audio_play_state_t
+------------------
+
+Synopsis
+~~~~~~~~
+
+.. c:enum:: audio_play_state_t
+
+.. code-block:: c
+
+    typedef enum {
+       AUDIO_STOPPED,
+       AUDIO_PLAYING,
+       AUDIO_PAUSED
+    } audio_play_state_t;
+
+Constants
+~~~~~~~~~
+
+.. flat-table::
+    :header-rows:  0
+    :stub-columns: 0
+
+    -  ..
+
+       -  ``AUDIO_STOPPED``
+
+       -  Audio is stopped.
+
+    -  ..
+
+       -  ``AUDIO_PLAYING``
+
+       -  Audio is currently playing.
+
+    -  ..
+
+       -  ``AUDIO_PAUSE``
+
+       -  Audio is frozen.
+
+Description
+~~~~~~~~~~~
+
+This values can be returned by the `AUDIO_GET_STATUS`_ call
+representing the state of audio playback.
+
+
+-----
+
+
+audio_channel_select_t
+----------------------
+
+Synopsis
+~~~~~~~~
+
+.. c:enum:: audio_channel_select_t
+
+.. code-block:: c
+
+    typedef enum {
+       AUDIO_STEREO,
+       AUDIO_MONO_LEFT,
+       AUDIO_MONO_RIGHT,
+       AUDIO_MONO,
+       AUDIO_STEREO_SWAPPED
+    } audio_channel_select_t;
+
+Constants
+~~~~~~~~~
+
+.. flat-table::
+    :header-rows:  0
+    :stub-columns: 0
+
+    -  ..
+
+       -  ``AUDIO_STEREO``
+
+       -  Stereo.
+
+    -  ..
+
+       -  ``AUDIO_MONO_LEFT``
+
+       -  Mono, select left stereo channel as source.
+
+    -  ..
+
+       -  ``AUDIO_MONO_RIGHT``
+
+       -  Mono, select right stereo channel as source.
+
+    -  ..
+
+       -  ``AUDIO_MONO``
+
+       -  Mono source only.
+
+    -  ..
+
+       -  ``AUDIO_STEREO_SWAPPED``
+
+       -  Stereo, swap L & R.
+
+Description
+~~~~~~~~~~~
+
+The audio channel selected via `AUDIO_CHANNEL_SELECT`_ is determined by
+this values.
+
+
+-----
+
+
+audio_mixer_t
+-------------
+
+Synopsis
+~~~~~~~~
+
+.. c:struct:: audio_mixer
+
+.. code-block:: c
+
+    typedef struct audio_mixer {
+       unsigned int volume_left;
+       unsigned int volume_right;
+    } audio_mixer_t;
+
+Variables
+~~~~~~~~~
+
+.. flat-table::
+    :header-rows:  0
+    :stub-columns: 0
+
+    -  ..
+
+       -  ``unsigned int volume_left``
+
+       -  Volume left channel.
+          Valid range: 0 ... 255
+
+    -  ..
+
+       -  ``unsigned int volume_right``
+
+       -  Volume right channel.
+          Valid range: 0 ... 255
+
+Description
+~~~~~~~~~~~
+
+This structure is used by the `AUDIO_SET_MIXER`_ call to set the
+audio volume.
+
+
+-----
+
+
+audio_status
+------------
+
+Synopsis
+~~~~~~~~
+
+.. c:struct:: audio_status
+
+.. code-block:: c
+
+    typedef struct audio_status {
+       int AV_sync_state;
+       int mute_state;
+       audio_play_state_t play_state;
+       audio_stream_source_t stream_source;
+       audio_channel_select_t channel_select;
+       int bypass_mode;
+       audio_mixer_t mixer_state;
+    } audio_status_t;
+
+Variables
+~~~~~~~~~
+
+.. flat-table::
+    :header-rows:  0
+    :stub-columns: 0
+
+    -  ..
+
+       -  :rspan:`2` ``int AV_sync_state``
+
+       -  :cspan:`1` Shows if A/V synchronization is ON or OFF.
+
+    -  ..
+
+       -  TRUE  ( != 0 )
+
+       -  AV-sync ON.
+
+    -  ..
+
+       -  FALSE ( == 0 )
+
+       -  AV-sync OFF.
+
+    -  ..
+
+       -  :rspan:`2` ``int mute_state``
+
+       -  :cspan:`1` Indicates if audio is muted or not.
+
+    -  ..
+
+       -  TRUE  ( != 0 )
+
+       -  mute audio
+
+    -  ..
+
+       -  FALSE ( == 0 )
+
+       -  unmute audio
+
+    -  ..
+
+       -  `audio_play_state_t`_ ``play_state``
+
+       -  Current playback state.
+
+    -  ..
+
+       -  `audio_stream_source_t`_ ``stream_source``
+
+       -  Current source of the data.
+
+    -  ..
+
+       -  :rspan:`2` ``int bypass_mode``
+
+       -  :cspan:`1` Is the decoding of the current Audio stream in
+          the DVB subsystem enabled or disabled.
+
+    -  ..
+
+       -  TRUE  ( != 0 )
+
+       -  Bypass disabled.
+
+    -  ..
+
+       -  FALSE ( == 0 )
+
+       -  Bypass enabled.
+
+    -  ..
+
+       -  `audio_mixer_t`_ ``mixer_state``
+
+       -  Current volume settings.
+
+Description
+~~~~~~~~~~~
+
+The `AUDIO_GET_STATUS`_ call returns this structure as information
+about various states of the playback operation.
+
+
+-----
+
+
+audio encodings
+---------------
+
+Synopsis
+~~~~~~~~
+
+.. code-block:: c
+
+     #define AUDIO_CAP_DTS    1
+     #define AUDIO_CAP_LPCM   2
+     #define AUDIO_CAP_MP1    4
+     #define AUDIO_CAP_MP2    8
+     #define AUDIO_CAP_MP3   16
+     #define AUDIO_CAP_AAC   32
+     #define AUDIO_CAP_OGG   64
+     #define AUDIO_CAP_SDDS 128
+     #define AUDIO_CAP_AC3  256
+
+Constants
+~~~~~~~~~
+
+.. flat-table::
+    :header-rows:  0
+    :stub-columns: 0
+
+    -  ..
+
+       -  ``AUDIO_CAP_DTS``
+
+       -  :cspan:`1` The hardware accepts DTS audio tracks.
+
+    -  ..
+
+       -  ``AUDIO_CAP_LPCM``
+
+       -   The hardware accepts uncompressed audio with
+           Linear Pulse-Code Modulation (LPCM)
+
+    -  ..
+
+       -  ``AUDIO_CAP_MP1``
+
+       -  The hardware accepts MPEG-1 Audio Layer 1.
+
+    -  ..
+
+       -  ``AUDIO_CAP_MP2``
+
+       -  The hardware accepts MPEG-1 Audio Layer 2.
+          Also known as MUSICAM.
+
+    -  ..
+
+       -  ``AUDIO_CAP_MP3``
+
+       -  The hardware accepts MPEG-1 Audio Layer III.
+          Commomly known as .mp3.
+
+    -  ..
+
+       -  ``AUDIO_CAP_AAC``
+
+       -  The hardware accepts AAC (Advanced Audio Coding).
+
+    -  ..
+
+       -  ``AUDIO_CAP_OGG``
+
+       -  The hardware accepts Vorbis audio tracks.
+
+    -  ..
+
+       -  ``AUDIO_CAP_SDDS``
+
+       -  The hardware accepts Sony Dynamic Digital Sound (SDDS).
+
+    -  ..
+
+       -  ``AUDIO_CAP_AC3``
+
+       -  The hardware accepts Dolby Digital ATSC A/52 audio.
+          Also known as AC-3.
+
+Description
+~~~~~~~~~~~
+
+A call to `AUDIO_GET_CAPABILITIES`_ returns an unsigned integer with the
+following bits set according to the hardwares capabilities.
+
+
+-----
+
+
+Audio Function Calls
+====================
+
+
+AUDIO_STOP
+----------
+
+Synopsis
+~~~~~~~~
+
+.. c:macro:: AUDIO_STOP
+
+.. code-block:: c
+
+        int ioctl(int fd, int request = AUDIO_STOP)
+
+Arguments
+~~~~~~~~~
+
+.. flat-table::
+    :header-rows:  0
+    :stub-columns: 0
+
+    -  ..
+
+       -  ``int fd``
+
+       -  File descriptor returned by a previous call to `open()`_.
+
+    -  ..
+
+       -  ``int request``
+
+       -  :cspan:`1` Equals ``AUDIO_STOP`` for this command.
+
+Description
+~~~~~~~~~~~
+
+.. attention:: Do **not** use in new drivers!
+             See: :ref:`legacy_dvb_decoder_notes`
+
+This ioctl call asks the Audio Device to stop playing the current
+stream.
+
+Return Value
+~~~~~~~~~~~~
+
+On success 0 is returned, on error -1 and the ``errno`` variable is set
+appropriately. The generic error codes are described at the
+:ref:`Generic Error Codes <gen-errors>` chapter.
+
+
+-----
+
+
+AUDIO_PLAY
+----------
+
+Synopsis
+~~~~~~~~
+
+.. c:macro:: AUDIO_PLAY
+
+.. code-block:: c
+
+        int  ioctl(int fd, int request = AUDIO_PLAY)
+
+Arguments
+~~~~~~~~~
+
+.. flat-table::
+    :header-rows:  0
+    :stub-columns: 0
+
+    -  ..
+
+       -  ``int fd``
+
+       -  File descriptor returned by a previous call to `open()`_.
+
+    -  ..
+
+       -  ``int request``
+
+       -  :cspan:`1` Equals ``AUDIO_PLAY`` for this command.
+
+Description
+~~~~~~~~~~~
+
+.. attention:: Do **not** use in new drivers!
+             See: :ref:`legacy_dvb_decoder_notes`
+
+This ioctl call asks the Audio Device to start playing an audio stream
+from the selected source.
+
+Return Value
+~~~~~~~~~~~~
+
+On success 0 is returned, on error -1 and the ``errno`` variable is set
+appropriately. The generic error codes are described at the
+:ref:`Generic Error Codes <gen-errors>` chapter.
+
+
+-----
+
+
+AUDIO_PAUSE
+-----------
+
+Synopsis
+~~~~~~~~
+
+.. c:macro:: AUDIO_PAUSE
+
+.. code-block:: c
+
+        int  ioctl(int fd, int request = AUDIO_PAUSE)
+
+Arguments
+~~~~~~~~~
+
+.. flat-table::
+    :header-rows:  0
+    :stub-columns: 0
+
+    -  ..
+
+       -  ``int fd``
+
+       -  :cspan:`1` File descriptor returned by a previous call
+          to `open()`_.
+
+    -  ..
+
+       -  ``int request``
+
+       -  Equals ``AUDIO_PAUSE`` for this command.
+
+Description
+~~~~~~~~~~~
+
+.. attention:: Do **not** use in new drivers!
+             See: :ref:`legacy_dvb_decoder_notes`
+
+This ioctl call suspends the audio stream being played. Decoding and
+playing are paused. It is then possible to restart again decoding and
+playing process of the audio stream using `AUDIO_CONTINUE`_ command.
+
+Return Value
+~~~~~~~~~~~~
+
+On success 0 is returned, on error -1 and the ``errno`` variable is set
+appropriately. The generic error codes are described at the
+:ref:`Generic Error Codes <gen-errors>` chapter.
+
+
+-----
+
+
+AUDIO_CONTINUE
+--------------
+
+Synopsis
+~~~~~~~~
+
+.. c:macro:: AUDIO_CONTINUE
+
+.. code-block:: c
+
+        int  ioctl(int fd, int request = AUDIO_CONTINUE)
+
+Arguments
+~~~~~~~~~
+
+.. flat-table::
+    :header-rows:  0
+    :stub-columns: 0
+
+    -  ..
+
+       -  ``int fd``
+
+       -  :cspan:`1` File descriptor returned by a previous call
+          to `open()`_.
+
+    -  ..
+
+       -  ``int request``
+
+       -  Equals ``AUDIO_CONTINUE`` for this command.
+
+Description
+~~~~~~~~~~~
+
+.. attention:: Do **not** use in new drivers!
+             See: :ref:`legacy_dvb_decoder_notes`
+
+This ioctl restarts the decoding and playing process previously paused
+with `AUDIO_PAUSE`_ command.
+
+Return Value
+~~~~~~~~~~~~
+
+On success 0 is returned, on error -1 and the ``errno`` variable is set
+appropriately. The generic error codes are described at the
+:ref:`Generic Error Codes <gen-errors>` chapter.
+
+
+-----
+
+
+AUDIO_SELECT_SOURCE
+-------------------
+
+Synopsis
+~~~~~~~~
+
+.. c:macro:: AUDIO_SELECT_SOURCE
+
+.. code-block:: c
+
+        int ioctl(int fd, int request = AUDIO_SELECT_SOURCE,
+        audio_stream_source_t source)
+
+Arguments
+~~~~~~~~~
+
+.. flat-table::
+    :header-rows:  0
+    :stub-columns: 0
+
+    -  ..
+
+       -  ``int fd``
+
+       -  :cspan:`1` File descriptor returned by a previous call
+          to `open()`_.
+
+    -  ..
+
+       -  ``int request``
+
+       -  Equals ``AUDIO_SELECT_SOURCE`` for this command.
+
+    -  ..
+
+       -  `audio_stream_source_t`_ ``source``
+
+       -  Indicates the source that shall be used for the Audio stream.
+
+Description
+~~~~~~~~~~~
+
+.. attention:: Do **not** use in new drivers!
+             See: :ref:`legacy_dvb_decoder_notes`
+
+This ioctl call informs the audio device which source shall be used for
+the input data. The possible sources are demux or memory. If
+``AUDIO_SOURCE_MEMORY`` is selected, the data is fed to the Audio Device
+through the write command. If ``AUDIO_SOURCE_DEMUX`` is selected, the data
+is directly transferred from the onboard demux-device to the decoder.
+Note: This only supports DVB-devices with one demux and one decoder so far.
+
+Return Value
+~~~~~~~~~~~~
+
+On success 0 is returned, on error -1 and the ``errno`` variable is set
+appropriately. The generic error codes are described at the
+:ref:`Generic Error Codes <gen-errors>` chapter.
+
+
+-----
+
+
+AUDIO_SET_MUTE
+--------------
+
+Synopsis
+~~~~~~~~
+
+.. c:macro:: AUDIO_SET_MUTE
+
+.. code-block:: c
+
+        int  ioctl(int fd, int request = AUDIO_SET_MUTE, int state)
+
+Arguments
+~~~~~~~~~
+
+.. flat-table::
+    :header-rows:  0
+    :stub-columns: 0
+
+    -  ..
+
+       -  ``int fd``
+
+       -  :cspan:`1` File descriptor returned by a previous call
+          to `open()`_.
+
+    -  ..
+
+       -  ``int request``
+
+       -  :cspan:`1` Equals ``AUDIO_SET_MUTE`` for this command.
+
+    -  ..
+
+       -  :rspan:`2` ``int state``
+
+       -  :cspan:`1` Indicates if audio device shall mute or not.
+
+    -  ..
+
+       -  TRUE  ( != 0 )
+
+       -  mute audio
+
+    -  ..
+
+       -  FALSE ( == 0 )
+
+       -  unmute audio
+
+Description
+~~~~~~~~~~~
+
+.. attention:: Do **not** use in new drivers!
+             See: :ref:`legacy_dvb_decoder_notes`
+
+This ioctl is for DVB devices only. To control a V4L2 decoder use the
+V4L2 :ref:`VIDIOC_DECODER_CMD` with the
+``V4L2_DEC_CMD_START_MUTE_AUDIO`` flag instead.
+
+This ioctl call asks the audio device to mute the stream that is
+currently being played.
+
+Return Value
+~~~~~~~~~~~~
+
+On success 0 is returned, on error -1 and the ``errno`` variable is set
+appropriately. The generic error codes are described at the
+:ref:`Generic Error Codes <gen-errors>` chapter.
+
+
+-----
+
+
+AUDIO_SET_AV_SYNC
+-----------------
+
+Synopsis
+~~~~~~~~
+
+.. c:macro:: AUDIO_SET_AV_SYNC
+
+.. code-block:: c
+
+        int  ioctl(int fd, int request = AUDIO_SET_AV_SYNC, int state)
+
+Arguments
+~~~~~~~~~
+
+.. flat-table::
+    :header-rows:  0
+    :stub-columns: 0
+
+    -  ..
+
+       -  ``int fd``
+
+       -  :cspan:`1` File descriptor returned by a previous call
+          to `open()`_.
+
+    -  ..
+
+       -  ``int request``
+
+       -  :cspan:`1` Equals ``AUDIO_AV_SYNC`` for this command.
+
+    -  ..
+
+       -  :rspan:`2` ``int state``
+
+       -  :cspan:`1` Tells the DVB subsystem if A/V synchronization
+          shall be ON or OFF.
+
+    -  ..
+
+       -  TRUE  ( != 0 )
+
+       -  AV-sync ON.
+
+    -  ..
+
+       -  FALSE ( == 0 )
+
+       -  AV-sync OFF.
+
+Description
+~~~~~~~~~~~
+
+.. attention:: Do **not** use in new drivers!
+             See: :ref:`legacy_dvb_decoder_notes`
+
+This ioctl call asks the Audio Device to turn ON or OFF A/V
+synchronization.
+
+Return Value
+~~~~~~~~~~~~
+
+On success 0 is returned, on error -1 and the ``errno`` variable is set
+appropriately. The generic error codes are described at the
+:ref:`Generic Error Codes <gen-errors>` chapter.
+
+
+-----
+
+
+AUDIO_SET_BYPASS_MODE
+---------------------
+
+Synopsis
+~~~~~~~~
+
+.. c:macro:: AUDIO_SET_BYPASS_MODE
+
+.. code-block:: c
+
+        int ioctl(int fd, int request = AUDIO_SET_BYPASS_MODE, int mode)
+
+Arguments
+~~~~~~~~~
+
+.. flat-table::
+    :header-rows:  0
+    :stub-columns: 0
+
+    -  ..
+
+       -  ``int fd``
+
+       -  :cspan:`1` File descriptor returned by a previous call
+          to `open()`_.
+
+    -  ..
+
+       -  ``int request``
+
+       -  :cspan:`1` Equals ``AUDIO_SET_BYPASS_MODE`` for this command.
+
+    -  ..
+
+       -  :rspan:`2` ``int mode``
+
+       -  :cspan:`1` Enables or disables the decoding of the current
+          Audio stream in the DVB subsystem.
+    -  ..
+
+       -  TRUE  ( != 0 )
+
+       -  Disable bypass
+
+    -  ..
+
+       -  FALSE ( == 0 )
+
+       -  Enable bypass
+
+Description
+~~~~~~~~~~~
+
+.. attention:: Do **not** use in new drivers!
+             See: :ref:`legacy_dvb_decoder_notes`
+
+This ioctl call asks the Audio Device to bypass the Audio decoder and
+forward the stream without decoding. This mode shall be used if streams
+that can’t be handled by the DVB system shall be decoded. Dolby
+DigitalTM streams are automatically forwarded by the DVB subsystem if
+the hardware can handle it.
+
+Return Value
+~~~~~~~~~~~~
+
+On success 0 is returned, on error -1 and the ``errno`` variable is set
+appropriately. The generic error codes are described at the
+:ref:`Generic Error Codes <gen-errors>` chapter.
+
+
+-----
+
+
+AUDIO_CHANNEL_SELECT
+--------------------
+
+Synopsis
+~~~~~~~~
+
+.. c:macro:: AUDIO_CHANNEL_SELECT
+
+.. code-block:: c
+
+        int ioctl(int fd, int request = AUDIO_CHANNEL_SELECT,
+        audio_channel_select_t)
+
+Arguments
+~~~~~~~~~
+
+.. flat-table::
+    :header-rows:  0
+    :stub-columns: 0
+
+    -  ..
+
+       -  ``int fd``
+
+       -  :cspan:`1` File descriptor returned by a previous call
+          to `open()`_.
+
+    -  ..
+
+       -  ``int request``
+
+       -  Equals ``AUDIO_CHANNEL_SELECT`` for this command.
+
+    -  ..
+
+       -  `audio_channel_select_t`_ ``ch``
+
+       -  Select the output format of the audio (mono left/right, stereo).
+
+Description
+~~~~~~~~~~~
+
+.. attention:: Do **not** use in new drivers!
+             See: :ref:`legacy_dvb_decoder_notes`
+
+This ioctl is for DVB devices only. To control a V4L2 decoder use the
+V4L2 ``V4L2_CID_MPEG_AUDIO_DEC_PLAYBACK`` control instead.
+
+This ioctl call asks the Audio Device to select the requested channel if
+possible.
+
+Return Value
+~~~~~~~~~~~~
+
+On success 0 is returned, on error -1 and the ``errno`` variable is set
+appropriately. The generic error codes are described at the
+:ref:`Generic Error Codes <gen-errors>` chapter.
+
+
+-----
+
+
+AUDIO_GET_STATUS
+----------------
+
+Synopsis
+~~~~~~~~
+
+.. c:macro:: AUDIO_GET_STATUS
+
+.. code-block:: c
+
+        int ioctl(int fd, int request = AUDIO_GET_STATUS,
+        struct audio_status *status)
+
+Arguments
+~~~~~~~~~
+
+.. flat-table::
+    :header-rows:  0
+    :stub-columns: 0
+
+    -  ..
+
+       -  ``int fd``
+
+       -  :cspan:`1` File descriptor returned by a previous call
+          to `open()`_.
+
+    -  ..
+
+       -  ``int request``
+
+       -  Equals AUDIO_GET_STATUS for this command.
+
+    -  ..
+
+       -  ``struct`` `audio_status`_ ``*status``
+
+       -  Returns the current state of Audio Device.
+
+Description
+~~~~~~~~~~~
+
+.. attention:: Do **not** use in new drivers!
+             See: :ref:`legacy_dvb_decoder_notes`
+
+This ioctl call asks the Audio Device to return the current state of the
+Audio Device.
+
+Return Value
+~~~~~~~~~~~~
+
+On success 0 is returned, on error -1 and the ``errno`` variable is set
+appropriately. The generic error codes are described at the
+:ref:`Generic Error Codes <gen-errors>` chapter.
+
+
+-----
+
+
+AUDIO_GET_CAPABILITIES
+----------------------
+
+Synopsis
+~~~~~~~~
+
+.. c:macro:: AUDIO_GET_CAPABILITIES
+
+.. code-block:: c
+
+        int ioctl(int fd, int request = AUDIO_GET_CAPABILITIES,
+        unsigned int *cap)
+
+Arguments
+~~~~~~~~~
+
+.. flat-table::
+    :header-rows:  0
+    :stub-columns: 0
+
+    -  ..
+
+       -  ``int fd``
+
+       -  :cspan:`1` File descriptor returned by a previous call
+          to `open()`_.
+
+    -  ..
+
+       -  ``int request``
+
+       -  Equals ``AUDIO_GET_CAPABILITIES`` for this command.
+
+    -  ..
+
+       -  ``unsigned int *cap``
+
+       -  Returns a bit array of supported sound formats.
+          Bits are defined in `audio encodings`_.
+
+Description
+~~~~~~~~~~~
+
+.. attention:: Do **not** use in new drivers!
+             See: :ref:`legacy_dvb_decoder_notes`
+
+This ioctl call asks the Audio Device to tell us about the decoding
+capabilities of the audio hardware.
+
+Return Value
+~~~~~~~~~~~~
+
+On success 0 is returned, on error -1 and the ``errno`` variable is set
+appropriately. The generic error codes are described at the
+:ref:`Generic Error Codes <gen-errors>` chapter.
+
+
+-----
+
+
+AUDIO_CLEAR_BUFFER
+------------------
+
+Synopsis
+~~~~~~~~
+
+.. c:macro:: AUDIO_CLEAR_BUFFER
+
+.. code-block:: c
+
+        int  ioctl(int fd, int request = AUDIO_CLEAR_BUFFER)
+
+Arguments
+~~~~~~~~~
+
+.. flat-table::
+    :header-rows:  0
+    :stub-columns: 0
+
+    -  ..
+
+       -  ``int fd``
+
+       -  :cspan:`1` File descriptor returned by a previous call
+          to `open()`_.
+
+    -  ..
+
+       -  ``int request``
+
+       -  Equals ``AUDIO_CLEAR_BUFFER`` for this command.
+
+Description
+~~~~~~~~~~~
+
+.. attention:: Do **not** use in new drivers!
+             See: :ref:`legacy_dvb_decoder_notes`
+
+This ioctl call asks the Audio Device to clear all software and hardware
+buffers of the audio decoder device.
+
+Return Value
+~~~~~~~~~~~~
+
+On success 0 is returned, on error -1 and the ``errno`` variable is set
+appropriately. The generic error codes are described at the
+:ref:`Generic Error Codes <gen-errors>` chapter.
+
+
+-----
+
+
+AUDIO_SET_ID
+------------
+
+Synopsis
+~~~~~~~~
+
+.. c:macro:: AUDIO_SET_ID
+
+.. code-block:: c
+
+        int  ioctl(int fd, int request = AUDIO_SET_ID, int id)
+
+Arguments
+~~~~~~~~~
+
+.. flat-table::
+    :header-rows:  0
+    :stub-columns: 0
+
+    -  ..
+
+       -  ``int fd``
+
+       -  :cspan:`1` File descriptor returned by a previous call
+          to `open()`_.
+
+    -  ..
+
+       -  ``int request``
+
+       -  Equals ``AUDIO_SET_ID`` for this command.
+
+    -  ..
+
+       -  ``int id``
+
+       -  Audio sub-stream id.
+
+Description
+~~~~~~~~~~~
+
+.. attention:: Do **not** use in new drivers!
+             See: :ref:`legacy_dvb_decoder_notes`
+
+This ioctl selects which sub-stream is to be decoded if a program or
+system stream is sent to the video device.
+
+If no audio stream type is set the id has to be in range [0xC0,0xDF]
+for MPEG sound, in [0x80,0x87] for AC3 and in [0xA0,0xA7] for LPCM.
+See ITU-T H.222.0 | ISO/IEC 13818-1 for further description.
+
+If the stream type is set with `AUDIO_SET_STREAMTYPE`_, specifies the
+id just the sub-stream id of the audio stream and only the first 5 bits
+(& 0x1F) are recognized.
+
+Return Value
+~~~~~~~~~~~~
+
+On success 0 is returned, on error -1 and the ``errno`` variable is set
+appropriately. The generic error codes are described at the
+:ref:`Generic Error Codes <gen-errors>` chapter.
+
+
+-----
+
+
+AUDIO_SET_MIXER
+---------------
+
+Synopsis
+~~~~~~~~
+
+.. c:macro:: AUDIO_SET_MIXER
+
+.. code-block:: c
+
+        int ioctl(int fd, int request = AUDIO_SET_MIXER, audio_mixer_t *mix)
+
+Arguments
+~~~~~~~~~
+
+.. flat-table::
+    :header-rows:  0
+    :stub-columns: 0
+
+    -  ..
+
+       -  ``int fd``
+
+       -  :cspan:`1` File descriptor returned by a previous call
+          to `open()`_.
+
+    -  ..
+
+       -  ``int request``
+
+       -  Equals ``AUDIO_SET_MIXER`` for this command.
+
+    -  ..
+
+       -  ``audio_mixer_t *mix``
+
+       -  Mixer settings.
+
+Description
+~~~~~~~~~~~
+
+.. attention:: Do **not** use in new drivers!
+             See: :ref:`legacy_dvb_decoder_notes`
+
+This ioctl lets you adjust the mixer settings of the audio decoder.
+
+Return Value
+~~~~~~~~~~~~
+
+On success 0 is returned, on error -1 and the ``errno`` variable is set
+appropriately. The generic error codes are described at the
+:ref:`Generic Error Codes <gen-errors>` chapter.
+
+
+-----
+
+
+AUDIO_SET_STREAMTYPE
+--------------------
+
+Synopsis
+~~~~~~~~
+
+.. c:macro:: AUDIO_SET_STREAMTYPE
+
+.. code-block:: c
+
+        int  ioctl(fd, int request = AUDIO_SET_STREAMTYPE, int type)
+
+Arguments
+~~~~~~~~~
+
+.. flat-table::
+    :header-rows:  0
+    :stub-columns: 0
+
+    -  ..
+
+       -  ``int fd``
+
+       -  :cspan:`1` File descriptor returned by a previous call
+          to `open()`_.
+
+    -  ..
+
+       -  ``int request``
+
+       -  Equals ``AUDIO_SET_STREAMTYPE`` for this command.
+
+    -  ..
+
+       -  ``int type``
+
+       -  Stream type.
+
+Description
+~~~~~~~~~~~
+
+.. attention:: Do **not** use in new drivers!
+             See: :ref:`legacy_dvb_decoder_notes`
+
+This ioctl tells the driver which kind of audio stream to expect. This
+is useful if the stream offers several audio sub-streams like LPCM and
+AC3.
+
+Stream types defined in ITU-T H.222.0 | ISO/IEC 13818-1 are used.
+
+
+Return Value
+~~~~~~~~~~~~
+
+On success 0 is returned, on error -1 and the ``errno`` variable is set
+appropriately. The generic error codes are described at the
+:ref:`Generic Error Codes <gen-errors>` chapter.
+
+.. flat-table::
+    :header-rows:  0
+    :stub-columns: 0
+
+    -  ..
+
+       -  ``EINVAL``
+
+       -  Type is not a valid or supported stream type.
+
+
+-----
+
+
+AUDIO_BILINGUAL_CHANNEL_SELECT
+------------------------------
+
+Synopsis
+~~~~~~~~
+
+.. c:macro:: AUDIO_BILINGUAL_CHANNEL_SELECT
+
+.. code-block:: c
+
+        int ioctl(int fd, int request = AUDIO_BILINGUAL_CHANNEL_SELECT,
+        audio_channel_select_t)
+
+Arguments
+~~~~~~~~~
+
+.. flat-table::
+    :header-rows:  0
+    :stub-columns: 0
+
+    -  ..
+
+       -  ``int fd``
+
+       -  :cspan:`1` File descriptor returned by a previous call
+          to `open()`_.
+
+    -  ..
+
+       -  ``int request``
+
+       -  Equals ``AUDIO_BILINGUAL_CHANNEL_SELECT`` for this command.
+
+    -  ..
+
+       -  ``audio_channel_select_t ch``
+
+       -  Select the output format of the audio (mono left/right, stereo).
+
+Description
+~~~~~~~~~~~
+
+.. attention:: Do **not** use in new drivers!
+             See: :ref:`legacy_dvb_decoder_notes`
+
+This ioctl has been replaced by the V4L2
+``V4L2_CID_MPEG_AUDIO_DEC_MULTILINGUAL_PLAYBACK`` control
+for MPEG decoders controlled through V4L2.
+
+This ioctl call asks the Audio Device to select the requested channel
+for bilingual streams if possible.
+
+Return Value
+~~~~~~~~~~~~
+
+On success 0 is returned, on error -1 and the ``errno`` variable is set
+appropriately. The generic error codes are described at the
+:ref:`Generic Error Codes <gen-errors>` chapter.
+
+
+-----
+
+
+open()
+------
+
+Synopsis
+~~~~~~~~
+
+.. code-block:: c
+
+    #include <fcntl.h>
+
+.. c:function:: int  open(const char *deviceName, int flags)
+
+Arguments
+~~~~~~~~~
+
+.. flat-table::
+    :header-rows:  0
+    :stub-columns: 0
+
+    -  ..
+
+       -  ``const char *deviceName``
+
+       -  Name of specific audio device.
+
+    -  ..
+
+       -  :rspan:`3` ``int flags``
+
+       -  :cspan:`1` A bit-wise OR of the following flags:
+
+    -  ..
+
+       -  ``O_RDONLY``
+
+       -  read-only access
+
+    -  ..
+
+       -  ``O_RDWR``
+
+       -  read/write access
+
+    -  ..
+
+       -  ``O_NONBLOCK``
+       -  | Open in non-blocking mode
+          | (blocking mode is the default)
+
+Description
+~~~~~~~~~~~
+
+This system call opens a named audio device (e.g.
+``/dev/dvb/adapter0/audio0``) for subsequent use. When an open() call has
+succeeded, the device will be ready for use. The significance of
+blocking or non-blocking mode is described in the documentation for
+functions where there is a difference. It does not affect the semantics
+of the open() call itself. A device opened in blocking mode can later be
+put into non-blocking mode (and vice versa) using the F_SETFL command
+of the fcntl system call. This is a standard system call, documented in
+the Linux manual page for fcntl. Only one user can open the Audio Device
+in O_RDWR mode. All other attempts to open the device in this mode will
+fail, and an error code will be returned. If the Audio Device is opened
+in O_RDONLY mode, the only ioctl call that can be used is
+`AUDIO_GET_STATUS`_. All other call will return with an error code.
+
+Return Value
+~~~~~~~~~~~~
+
+.. flat-table::
+    :header-rows:  0
+    :stub-columns: 0
+
+    -  ..
+
+       -  ``ENODEV``
+
+       -  Device driver not loaded/available.
+
+    -  ..
+
+       -  ``EBUSY``
+
+       -  Device or resource busy.
+
+    -  ..
+
+       -  ``EINVAL``
+
+       -  Invalid argument.
+
+
+-----
+
+
+close()
+-------
+
+Synopsis
+~~~~~~~~
+
+.. c:function::        int close(int fd)
+
+Arguments
+~~~~~~~~~
+
+.. flat-table::
+    :header-rows:  0
+    :stub-columns: 0
+
+    -  ..
+
+       -  ``int fd``
+
+       -  :cspan:`1` File descriptor returned by a previous call
+          to `open()`_.
+
+Description
+~~~~~~~~~~~
+
+This system call closes a previously opened audio device.
+
+Return Value
+~~~~~~~~~~~~
+
+.. flat-table::
+    :header-rows:  0
+    :stub-columns: 0
+
+    -  ..
+
+       -  ``EBADF``
+
+       -  Fd is not a valid open file descriptor.
+
+-----
+
+
+write()
+-------
+
+Synopsis
+~~~~~~~~
+
+.. code-block:: c
+
+        size_t write(int fd, const void *buf, size_t count)
+
+Arguments
+~~~~~~~~~
+
+.. flat-table::
+    :header-rows:  0
+    :stub-columns: 0
+
+    -  ..
+
+       -  ``int fd``
+
+       -  :cspan:`1` File descriptor returned by a previous call
+          to `open()`_.
+
+    -  ..
+
+       -  ``void *buf``
+
+       -  Pointer to the buffer containing the PES data.
+
+    -  ..
+
+       -  ``size_t count``
+
+       -  Size of buf.
+
+Description
+~~~~~~~~~~~
+
+This system call can only be used if ``AUDIO_SOURCE_MEMORY`` is selected
+in the ioctl call `AUDIO_SELECT_SOURCE`_. The data provided shall be in
+PES format. If ``O_NONBLOCK`` is not specified the function will block
+until buffer space is available. The amount of data to be transferred is
+implied by count.
+
+Return Value
+~~~~~~~~~~~~
+
+.. flat-table::
+    :header-rows:  0
+    :stub-columns: 0
+
+    -  ..
+
+       -  ``EPERM``
+
+       -  :cspan:`1` Mode ``AUDIO_SOURCE_MEMORY`` not selected.
+
+    -  ..
+
+       -  ``ENOMEM``
+
+       -  Attempted to write more data than the internal buffer can hold.
+
+    -  ..
+
+       -  ``EBADF``
+
+       -  Fd is not a valid open file descriptor.
diff --git a/Documentation/userspace-api/media/dvb/legacy_dvb_decoder_api.rst b/Documentation/userspace-api/media/dvb/legacy_dvb_decoder_api.rst
new file mode 100644 (file)
index 0000000..f58985a
--- /dev/null
@@ -0,0 +1,61 @@
+.. SPDX-License-Identifier: GFDL-1.1-no-invariants-or-later OR GPL-2.0
+
+.. _legacy_dvb_decoder_api:
+
+============================
+Legacy DVB MPEG Decoder APIs
+============================
+
+.. _legacy_dvb_decoder_notes:
+
+General Notes
+=============
+
+This API has originally been designed for DVB only and is therefore limited to
+the :ref:`legacy_dvb_decoder_formats` used in such digital TV-broadcastsystems.
+
+To circumvent this limitations the more versatile :ref:`V4L2 <v4l2spec>` API has
+been designed. Which replaces this part of the DVB API.
+
+Nevertheless there have been projects build around this API.
+To ensure compatibility this API is kept as it is.
+
+.. attention:: Do **not** use this API in new drivers!
+
+    For audio and video use the :ref:`V4L2 <v4l2spec>` and ALSA APIs.
+
+    Pipelines should be set up using the :ref:`Media Controller  API<media_controller>`.
+
+Practically the decoders seem to be treated differently. The application typically
+knows which decoder is in use or it is specially written for one decoder type.
+Querying capabilities are rarely used because they are already known.
+
+
+.. _legacy_dvb_decoder_formats:
+
+Data Formats
+============
+
+The API has been designed for DVB and compatible broadcastsystems.
+Because of that fact the only supported data formats are ISO/IEC 13818-1
+compatible MPEG streams. The supported payloads may vary depending on the
+used decoder.
+
+Timestamps are always MPEG PTS as defined in ITU T-REC-H.222.0 /
+ISO/IEC 13818-1, if not otherwise noted.
+
+For storing recordings typically TS streams are used, in lesser extent PES.
+Both variants are commonly accepted for playback, but it may be driver dependent.
+
+
+
+
+Table of Contents
+=================
+
+.. toctree::
+    :maxdepth: 2
+
+    legacy_dvb_video
+    legacy_dvb_audio
+    legacy_dvb_osd
diff --git a/Documentation/userspace-api/media/dvb/legacy_dvb_osd.rst b/Documentation/userspace-api/media/dvb/legacy_dvb_osd.rst
new file mode 100644 (file)
index 0000000..179b66a
--- /dev/null
@@ -0,0 +1,883 @@
+.. SPDX-License-Identifier: GFDL-1.1-no-invariants-or-later OR GPL-2.0
+
+.. c:namespace:: dtv.legacy.osd
+
+.. _dvb_osd:
+
+==============
+DVB OSD Device
+==============
+
+.. attention:: Do **not** use in new drivers!
+             See: :ref:`legacy_dvb_decoder_notes`
+
+The DVB OSD device controls the OnScreen-Display of the AV7110 based
+DVB-cards with hardware MPEG2 decoder. It can be accessed through
+``/dev/dvb/adapter?/osd0``.
+Data types and ioctl definitions can be accessed by including
+``linux/dvb/osd.h`` in your application.
+
+The OSD is not a frame-buffer like on many other cards.
+It is a kind of canvas one can draw on.
+The color-depth is limited depending on the memory size installed.
+An appropriate palette of colors has to be set up.
+The installed memory size can be identified with the `OSD_GET_CAPABILITY`_
+ioctl.
+
+OSD Data Types
+==============
+
+OSD_Command
+-----------
+
+Synopsis
+~~~~~~~~
+
+.. code-block:: c
+
+    typedef enum {
+       /* All functions return -2 on "not open" */
+       OSD_Close = 1,
+       OSD_Open,
+       OSD_Show,
+       OSD_Hide,
+       OSD_Clear,
+       OSD_Fill,
+       OSD_SetColor,
+       OSD_SetPalette,
+       OSD_SetTrans,
+       OSD_SetPixel,
+       OSD_GetPixel,
+       OSD_SetRow,
+       OSD_SetBlock,
+       OSD_FillRow,
+       OSD_FillBlock,
+       OSD_Line,
+       OSD_Query,
+       OSD_Test,
+       OSD_Text,
+       OSD_SetWindow,
+       OSD_MoveWindow,
+       OSD_OpenRaw,
+    } OSD_Command;
+
+Commands
+~~~~~~~~
+
+.. note::  All functions return -2 on "not open"
+
+.. flat-table::
+    :header-rows:  1
+    :stub-columns: 0
+
+    -  ..
+
+       -  Command
+
+       -  | Used variables of ``struct`` `osd_cmd_t`_.
+          | Usage{variable} if alternative use.
+
+       -  :cspan:`2` Description
+
+
+
+    -  ..
+
+       -  ``OSD_Close``
+
+       -  -
+
+       -  | Disables OSD and releases the buffers.
+          | Returns 0 on success.
+
+    -  ..
+
+       -  ``OSD_Open``
+
+       -  | x0,y0,x1,y1,
+          | BitPerPixel[2/4/8]{color&0x0F},
+          | mix[0..15]{color&0xF0}
+
+       -  | Opens OSD with this size and bit depth
+          | Returns 0 on success,
+          | -1 on DRAM allocation error,
+          | -2 on "already open".
+
+    -  ..
+
+       -  ``OSD_Show``
+
+       - -
+
+       -  | Enables OSD mode.
+          | Returns 0 on success.
+
+    -  ..
+
+       -  ``OSD_Hide``
+
+       - -
+
+       -  | Disables OSD mode.
+          | Returns 0 on success.
+
+    -  ..
+
+       -  ``OSD_Clear``
+
+       - -
+
+       -  | Sets all pixel to color 0.
+          | Returns 0 on success.
+
+    -  ..
+
+       -  ``OSD_Fill``
+
+       -  color
+
+       -  | Sets all pixel to color <color>.
+          | Returns 0 on success.
+
+    -  ..
+
+       -  ``OSD_SetColor``
+
+       -  | color,
+          | R{x0},G{y0},B{x1},
+          | opacity{y1}
+
+       -  | Set palette entry <num> to <r,g,b>, <mix> and <trans> apply
+          | R,G,B: 0..255
+          | R=Red, G=Green, B=Blue
+          | opacity=0:      pixel opacity 0% (only video pixel shows)
+          | opacity=1..254: pixel opacity as specified in header
+          | opacity=255:    pixel opacity 100% (only OSD pixel shows)
+          | Returns 0 on success, -1 on error.
+
+    -  ..
+
+       -  ``OSD_SetPalette``
+
+       -  | firstcolor{color},
+          | lastcolor{x0},data
+
+       -  | Set a number of entries in the palette.
+          | Sets the entries "firstcolor" through "lastcolor" from the
+            array "data".
+          | Data has 4 byte for each color:
+          | R,G,B, and a opacity value: 0->transparent, 1..254->mix,
+            255->pixel
+
+    -  ..
+
+       -  ``OSD_SetTrans``
+
+       -  transparency{color}
+
+       -  | Sets transparency of mixed pixel (0..15).
+          | Returns 0 on success.
+
+    -  ..
+
+       -  ``OSD_SetPixel``
+
+       -  x0,y0,color
+
+       -  | Sets pixel <x>,<y> to color number <color>.
+          | Returns 0 on success, -1 on error.
+
+    -  ..
+
+       -  ``OSD_GetPixel``
+
+       -  x0,y0
+
+       -  | Returns color number of pixel <x>,<y>,  or -1.
+          | Command currently not supported by the AV7110!
+
+    -  ..
+
+       -  ``OSD_SetRow``
+
+       -  x0,y0,x1,data
+
+       -  | Fills pixels x0,y through  x1,y with the content of data[].
+          | Returns 0 on success, -1 on clipping all pixel (no pixel
+            drawn).
+
+    -  ..
+
+       -  ``OSD_SetBlock``
+
+       -  | x0,y0,x1,y1,
+          | increment{color},
+          | data
+
+       -  | Fills pixels x0,y0 through  x1,y1 with the content of data[].
+          | Inc contains the width of one line in the data block,
+          | inc<=0 uses block width as line width.
+          | Returns 0 on success, -1 on clipping all pixel.
+
+    -  ..
+
+       -  ``OSD_FillRow``
+
+       -  x0,y0,x1,color
+
+       -  | Fills pixels x0,y through  x1,y with the color <color>.
+          | Returns 0 on success, -1 on clipping all pixel.
+
+    -  ..
+
+       -  ``OSD_FillBlock``
+
+       -  x0,y0,x1,y1,color
+
+       -  | Fills pixels x0,y0 through  x1,y1 with the color <color>.
+          | Returns 0 on success, -1 on clipping all pixel.
+
+    -  ..
+
+       -  ``OSD_Line``
+
+       -  x0,y0,x1,y1,color
+
+       -  | Draw a line from x0,y0 to x1,y1 with the color <color>.
+          | Returns 0 on success.
+
+    -  ..
+
+       -  ``OSD_Query``
+
+       -  | x0,y0,x1,y1,
+          | xasp{color}; yasp=11
+
+       -  | Fills parameters with the picture dimensions and the pixel
+            aspect ratio.
+          | Returns 0 on success.
+          | Command currently not supported by the AV7110!
+
+    -  ..
+
+       -  ``OSD_Test``
+
+       -  -
+
+       -  | Draws a test picture.
+          | For debugging purposes only.
+          | Returns 0 on success.
+    -  ..
+
+       -  ``OSD_Text``
+
+       -  x0,y0,size,color,text
+
+       -  Draws a text at position x0,y0 with the color <color>.
+
+    -  ..
+
+       -  ``OSD_SetWindow``
+
+       -  x0
+
+       -  Set window with number 0<x0<8 as current.
+
+    -  ..
+
+       -  ``OSD_MoveWindow``
+
+       -  x0,y0
+
+       -  Move current window to (x0, y0).
+
+    -  ..
+
+       -  ``OSD_OpenRaw``
+
+       -  | x0,y0,x1,y1,
+          | `osd_raw_window_t`_ {color}
+
+       -  Open other types of OSD windows.
+
+Description
+~~~~~~~~~~~
+
+The ``OSD_Command`` data type is used with the `OSD_SEND_CMD`_ ioctl to
+tell the driver which OSD_Command to execute.
+
+
+-----
+
+osd_cmd_t
+---------
+
+Synopsis
+~~~~~~~~
+
+.. code-block:: c
+
+    typedef struct osd_cmd_s {
+       OSD_Command cmd;
+       int x0;
+       int y0;
+       int x1;
+       int y1;
+       int color;
+       void __user *data;
+    } osd_cmd_t;
+
+Variables
+~~~~~~~~~
+
+.. flat-table::
+    :header-rows:  0
+    :stub-columns: 0
+
+    -  ..
+
+       -  ``OSD_Command cmd``
+
+       -  `OSD_Command`_ to be executed.
+
+    -  ..
+
+       -  ``int x0``
+
+       -  First horizontal position.
+
+    -  ..
+
+       -  ``int y0``
+
+       -  First vertical position.
+
+    -  ..
+
+       -  ``int x1``
+
+       -  Second horizontal position.
+
+    -  ..
+
+       -  ``int y1``
+
+       -  Second vertical position.
+
+    -  ..
+
+       -  ``int color``
+
+       -  Number of the color in the palette.
+
+    -  ..
+
+       -  ``void __user *data``
+
+       -  Command specific Data.
+
+Description
+~~~~~~~~~~~
+
+The ``osd_cmd_t`` data type is used with the `OSD_SEND_CMD`_ ioctl.
+It contains the data for the OSD_Command and the `OSD_Command`_ itself.
+The structure has to be passed to the driver and the components may be
+modified by it.
+
+
+-----
+
+
+osd_raw_window_t
+----------------
+
+Synopsis
+~~~~~~~~
+
+.. code-block:: c
+
+    typedef enum {
+       OSD_BITMAP1,
+       OSD_BITMAP2,
+       OSD_BITMAP4,
+       OSD_BITMAP8,
+       OSD_BITMAP1HR,
+       OSD_BITMAP2HR,
+       OSD_BITMAP4HR,
+       OSD_BITMAP8HR,
+       OSD_YCRCB422,
+       OSD_YCRCB444,
+       OSD_YCRCB444HR,
+       OSD_VIDEOTSIZE,
+       OSD_VIDEOHSIZE,
+       OSD_VIDEOQSIZE,
+       OSD_VIDEODSIZE,
+       OSD_VIDEOTHSIZE,
+       OSD_VIDEOTQSIZE,
+       OSD_VIDEOTDSIZE,
+       OSD_VIDEONSIZE,
+       OSD_CURSOR
+    } osd_raw_window_t;
+
+Constants
+~~~~~~~~~
+
+.. flat-table::
+    :header-rows:  0
+    :stub-columns: 0
+
+    -  ..
+
+       -  ``OSD_BITMAP1``
+
+       -  :cspan:`1` 1 bit bitmap
+
+    -  ..
+
+       -  ``OSD_BITMAP2``
+
+       -  2 bit bitmap
+
+    -  ..
+
+       -  ``OSD_BITMAP4``
+
+       -  4 bit bitmap
+
+    -  ..
+
+       -  ``OSD_BITMAP8``
+
+       -  8 bit bitmap
+
+    -  ..
+
+       -  ``OSD_BITMAP1HR``
+
+       -  1 Bit bitmap half resolution
+
+    -  ..
+
+       -  ``OSD_BITMAP2HR``
+
+       -  2 Bit bitmap half resolution
+
+    -  ..
+
+       -  ``OSD_BITMAP4HR``
+
+       -  4 Bit bitmap half resolution
+
+    -  ..
+
+       -  ``OSD_BITMAP8HR``
+
+       -  8 Bit bitmap half resolution
+
+    -  ..
+
+       -  ``OSD_YCRCB422``
+
+       -  4:2:2 YCRCB Graphic Display
+
+    -  ..
+
+       -  ``OSD_YCRCB444``
+
+       -  4:4:4 YCRCB Graphic Display
+
+    -  ..
+
+       -  ``OSD_YCRCB444HR``
+
+       -  4:4:4 YCRCB graphic half resolution
+
+    -  ..
+
+       -  ``OSD_VIDEOTSIZE``
+
+       -  True Size Normal MPEG Video Display
+
+    -  ..
+
+       -  ``OSD_VIDEOHSIZE``
+
+       -  MPEG Video Display Half Resolution
+
+    -  ..
+
+       -  ``OSD_VIDEOQSIZE``
+
+       -  MPEG Video Display Quarter Resolution
+
+    -  ..
+
+       -  ``OSD_VIDEODSIZE``
+
+       -  MPEG Video Display Double Resolution
+
+    -  ..
+
+       -  ``OSD_VIDEOTHSIZE``
+
+       -  True Size MPEG Video Display Half Resolution
+
+    -  ..
+
+       -  ``OSD_VIDEOTQSIZE``
+
+       -  True Size MPEG Video Display Quarter Resolution
+
+    -  ..
+
+       -  ``OSD_VIDEOTDSIZE``
+
+       -  True Size MPEG Video Display Double Resolution
+
+    -  ..
+
+       -  ``OSD_VIDEONSIZE``
+
+       -  Full Size MPEG Video Display
+
+    -  ..
+
+       -  ``OSD_CURSOR``
+
+       -  Cursor
+
+Description
+~~~~~~~~~~~
+
+The ``osd_raw_window_t`` data type is used with the `OSD_Command`_
+OSD_OpenRaw to tell the driver which type of OSD to open.
+
+
+-----
+
+
+osd_cap_t
+---------
+
+Synopsis
+~~~~~~~~
+
+.. code-block:: c
+
+    typedef struct osd_cap_s {
+       int  cmd;
+    #define OSD_CAP_MEMSIZE         1
+       long val;
+    } osd_cap_t;
+
+Variables
+~~~~~~~~~
+
+.. flat-table::
+    :header-rows:  0
+    :stub-columns: 0
+
+    -  ..
+
+       -  ``int  cmd``
+
+       -  Capability to query.
+
+    -  ..
+
+       -  ``long val``
+
+       -  Used to store the Data.
+
+Supported capabilities
+~~~~~~~~~~~~~~~~~~~~~~
+
+.. flat-table::
+    :header-rows:  0
+    :stub-columns: 0
+
+    -  ..
+
+       -  ``OSD_CAP_MEMSIZE``
+
+       -  Memory size installed on the card.
+
+Description
+~~~~~~~~~~~
+
+This structure of data used with the `OSD_GET_CAPABILITY`_ call.
+
+
+-----
+
+
+OSD Function Calls
+==================
+
+OSD_SEND_CMD
+------------
+
+Synopsis
+~~~~~~~~
+
+.. c:macro:: OSD_SEND_CMD
+
+.. code-block:: c
+
+    int ioctl(int fd, int request = OSD_SEND_CMD, enum osd_cmd_t *cmd)
+
+
+Arguments
+~~~~~~~~~
+
+.. flat-table::
+    :header-rows:  0
+    :stub-columns: 0
+
+    -  ..
+
+       -  ``int fd``
+
+       -  :cspan:`1` File descriptor returned by a previous call
+          to `open()`_.
+
+    -  ..
+
+       -  ``int request``
+
+       -  Pointer to the location of the structure `osd_cmd_t`_ for this
+          command.
+
+Description
+~~~~~~~~~~~
+
+.. attention:: Do **not** use in new drivers!
+             See: :ref:`legacy_dvb_decoder_notes`
+
+This ioctl sends the `OSD_Command`_ to the card.
+
+Return Value
+~~~~~~~~~~~~
+
+On success 0 is returned, on error -1 and the ``errno`` variable is set
+appropriately. The generic error codes are described at the
+:ref:`Generic Error Codes <gen-errors>` chapter.
+
+.. flat-table::
+    :header-rows:  0
+    :stub-columns: 0
+
+    -  ..
+
+       -  ``EINVAL``
+
+       -  Command is out of range.
+
+
+-----
+
+
+OSD_GET_CAPABILITY
+------------------
+
+Synopsis
+~~~~~~~~
+
+.. c:macro:: OSD_GET_CAPABILITY
+
+.. code-block:: c
+
+    int ioctl(int fd, int request = OSD_GET_CAPABILITY,
+    struct osd_cap_t *cap)
+
+Arguments
+~~~~~~~~~
+
+.. flat-table::
+    :header-rows:  0
+    :stub-columns: 0
+
+    -  ..
+
+       -  ``int fd``
+
+       -  :cspan:`1` File descriptor returned by a previous call
+          to `open()`_.
+
+    -  ..
+
+       -  ``int request``
+
+       -  Equals ``OSD_GET_CAPABILITY`` for this command.
+
+    -  ..
+
+       -  ``unsigned int *cap``
+
+       -  Pointer to the location of the structure `osd_cap_t`_ for this
+          command.
+
+Description
+~~~~~~~~~~~
+
+.. attention:: Do **not** use in new drivers!
+             See: :ref:`legacy_dvb_decoder_notes`
+
+This ioctl is used to get the capabilities of the OSD of the AV7110 based
+DVB-decoder-card in use.
+
+.. note::
+    The structure osd_cap_t has to be setup by the user and passed to the
+    driver.
+
+Return Value
+~~~~~~~~~~~~
+
+On success 0 is returned, on error -1 and the ``errno`` variable is set
+appropriately. The generic error codes are described at the
+:ref:`Generic Error Codes <gen-errors>` chapter.
+
+.. flat-table::
+    :header-rows:  0
+    :stub-columns: 0
+
+
+    -  ..
+
+       -  ``EINVAL``
+
+       -  Unsupported capability.
+
+
+-----
+
+
+open()
+------
+
+Synopsis
+~~~~~~~~
+
+.. code-block:: c
+
+    #include <fcntl.h>
+
+.. c:function:: int open(const char *deviceName, int flags)
+
+Arguments
+~~~~~~~~~
+
+.. flat-table::
+    :header-rows:  0
+    :stub-columns: 0
+
+    -  ..
+
+       -  ``const char *deviceName``
+
+       -  Name of specific OSD device.
+
+    -  ..
+
+       -  :rspan:`3` ``int flags``
+
+       -  :cspan:`1` A bit-wise OR of the following flags:
+
+    -  ..
+
+       -  ``O_RDONLY``
+
+       -  read-only access
+
+    -  ..
+
+       -  ``O_RDWR``
+
+       -  read/write access
+
+    -  ..
+
+       -  ``O_NONBLOCK``
+       -  | Open in non-blocking mode
+          | (blocking mode is the default)
+
+Description
+~~~~~~~~~~~
+
+This system call opens a named OSD device (e.g.
+``/dev/dvb/adapter?/osd0``) for subsequent use.
+
+Return Value
+~~~~~~~~~~~~
+
+.. flat-table::
+    :header-rows:  0
+    :stub-columns: 0
+
+    -  ..
+
+       -  ``ENODEV``
+
+       -  Device driver not loaded/available.
+
+    -  ..
+
+       -  ``EINTERNAL``
+
+       -  Internal error.
+
+    -  ..
+
+       -  ``EBUSY``
+
+       -  Device or resource busy.
+
+    -  ..
+
+       -  ``EINVAL``
+
+       -  Invalid argument.
+
+
+-----
+
+
+close()
+-------
+
+Synopsis
+~~~~~~~~
+
+.. c:function:: int close(int fd)
+
+Arguments
+~~~~~~~~~
+
+.. flat-table::
+    :header-rows:  0
+    :stub-columns: 0
+
+    -  ..
+
+       -  ``int fd``
+
+       -  :cspan:`1` File descriptor returned by a previous call
+          to `open()`_ .
+
+Description
+~~~~~~~~~~~
+
+This system call closes a previously opened OSD device.
+
+Return Value
+~~~~~~~~~~~~
+
+.. flat-table::
+    :header-rows:  0
+    :stub-columns: 0
+
+    -  ..
+
+       -  ``EBADF``
+
+       -  fd is not a valid open file descriptor.
diff --git a/Documentation/userspace-api/media/dvb/legacy_dvb_video.rst b/Documentation/userspace-api/media/dvb/legacy_dvb_video.rst
new file mode 100644 (file)
index 0000000..b9fd5ca
--- /dev/null
@@ -0,0 +1,2430 @@
+.. SPDX-License-Identifier: GFDL-1.1-no-invariants-or-later OR GPL-2.0
+
+.. c:namespace:: dtv.legacy.video
+
+.. _dvb_video:
+
+================
+DVB Video Device
+================
+
+.. attention:: Do **not** use in new drivers!
+             See: :ref:`legacy_dvb_decoder_notes`
+
+The DVB video device controls the MPEG2 video decoder of the DVB
+hardware. It can be accessed through ``/dev/dvb/adapter0/video0``. Data
+types and ioctl definitions can be accessed by including
+``linux/dvb/video.h`` in your application.
+
+Note that the DVB video device only controls decoding of the MPEG video
+stream, not its presentation on the TV or computer screen. On PCs this
+is typically handled by an associated video4linux device, e.g.
+``/dev/video``, which allows scaling and defining output windows.
+
+Most DVB cards don’t have their own MPEG decoder, which results in the
+omission of the audio and video device as well as the video4linux
+device.
+
+These ioctls were also used by V4L2 to control MPEG decoders implemented
+in V4L2. The use of these ioctls for that purpose has been made obsolete
+and proper V4L2 ioctls or controls have been created to replace that
+functionality. Use :ref:`V4L2 ioctls<video>` for new drivers!
+
+
+Video Data Types
+================
+
+
+
+video_format_t
+--------------
+
+Synopsis
+~~~~~~~~
+
+.. code-block:: c
+
+    typedef enum {
+       VIDEO_FORMAT_4_3,
+       VIDEO_FORMAT_16_9,
+       VIDEO_FORMAT_221_1
+    } video_format_t;
+
+Constants
+~~~~~~~~~
+
+.. flat-table::
+    :header-rows:  0
+    :stub-columns: 0
+
+    -  ..
+
+       -  ``VIDEO_FORMAT_4_3``
+
+       -  Select 4:3 format.
+
+    -  ..
+
+       -  ``VIDEO_FORMAT_16_9``
+
+       -  Select 16:9 format.
+
+    -  ..
+
+       -  ``VIDEO_FORMAT_221_1``
+
+       -  Select 2.21:1 format.
+
+Description
+~~~~~~~~~~~
+
+The ``video_format_t`` data type
+is used in the `VIDEO_SET_FORMAT`_ function to tell the driver which
+aspect ratio the output hardware (e.g. TV) has. It is also used in the
+data structures `video_status`_ returned by `VIDEO_GET_STATUS`_
+and `video_event`_ returned by `VIDEO_GET_EVENT`_ which report
+about the display format of the current video stream.
+
+
+-----
+
+
+video_displayformat_t
+---------------------
+
+Synopsis
+~~~~~~~~
+
+.. code-block:: c
+
+    typedef enum {
+       VIDEO_PAN_SCAN,
+       VIDEO_LETTER_BOX,
+       VIDEO_CENTER_CUT_OUT
+    } video_displayformat_t;
+
+Constants
+~~~~~~~~~
+
+.. flat-table::
+    :header-rows:  0
+    :stub-columns: 0
+
+    -  ..
+
+       -  ``VIDEO_PAN_SCAN``
+
+       -  Use pan and scan format.
+
+    -  ..
+
+       -  ``VIDEO_LETTER_BOX``
+
+       -  Use letterbox format.
+
+    -  ..
+
+       -  ``VIDEO_CENTER_CUT_OUT``
+
+       -  Use center cut out format.
+
+Description
+~~~~~~~~~~~
+
+In case the display format of the video stream and of the display
+hardware differ the application has to specify how to handle the
+cropping of the picture. This can be done using the
+`VIDEO_SET_DISPLAY_FORMAT`_ call which accepts this enum as argument.
+
+
+-----
+
+
+video_size_t
+------------
+
+Synopsis
+~~~~~~~~
+
+.. code-block:: c
+
+    typedef struct {
+       int w;
+       int h;
+       video_format_t aspect_ratio;
+    } video_size_t;
+
+Variables
+~~~~~~~~~
+
+.. flat-table::
+    :header-rows:  0
+    :stub-columns: 0
+
+    -  ..
+
+       -  ``int w``
+
+       -  Video width in pixels.
+
+    -  ..
+
+       -  ``int h``
+
+       -  Video height in pixels.
+
+    -  ..
+
+       -  `video_format_t`_ ``aspect_ratio``
+
+       -  Aspect ratio.
+
+Description
+~~~~~~~~~~~
+
+Used in the struct `video_event`_. It stores the resolution and
+aspect ratio of the video.
+
+
+-----
+
+
+video_stream_source_t
+---------------------
+
+Synopsis
+~~~~~~~~
+
+.. code-block:: c
+
+    typedef enum {
+       VIDEO_SOURCE_DEMUX,
+       VIDEO_SOURCE_MEMORY
+    } video_stream_source_t;
+
+Constants
+~~~~~~~~~
+
+.. flat-table::
+    :header-rows:  0
+    :stub-columns: 0
+
+    -  ..
+
+       -  ``VIDEO_SOURCE_DEMUX``
+
+       -  :cspan:`1` Select the demux as the main source.
+
+    -  ..
+
+       -  ``VIDEO_SOURCE_MEMORY``
+
+       -  If this source is selected, the stream
+          comes from the user through the write
+          system call.
+
+Description
+~~~~~~~~~~~
+
+The video stream source is set through the `VIDEO_SELECT_SOURCE`_ call
+and can take the following values, depending on whether we are replaying
+from an internal (demuxer) or external (user write) source.
+VIDEO_SOURCE_DEMUX selects the demultiplexer (fed either by the
+frontend or the DVR device) as the source of the video stream. If
+VIDEO_SOURCE_MEMORY is selected the stream comes from the application
+through the `write()`_ system call.
+
+
+-----
+
+
+video_play_state_t
+------------------
+
+Synopsis
+~~~~~~~~
+
+.. code-block:: c
+
+    typedef enum {
+       VIDEO_STOPPED,
+       VIDEO_PLAYING,
+       VIDEO_FREEZED
+    } video_play_state_t;
+
+Constants
+~~~~~~~~~
+
+.. flat-table::
+    :header-rows:  0
+    :stub-columns: 0
+
+    -  ..
+
+       -  ``VIDEO_STOPPED``
+
+       -  Video is stopped.
+
+    -  ..
+
+       -  ``VIDEO_PLAYING``
+
+       -  Video is currently playing.
+
+    -  ..
+
+       -  ``VIDEO_FREEZED``
+
+       -  Video is frozen.
+
+Description
+~~~~~~~~~~~
+
+This values can be returned by the `VIDEO_GET_STATUS`_ call
+representing the state of video playback.
+
+
+-----
+
+
+struct video_command
+--------------------
+
+Synopsis
+~~~~~~~~
+
+.. code-block:: c
+
+    struct video_command {
+       __u32 cmd;
+       __u32 flags;
+       union {
+           struct {
+               __u64 pts;
+           } stop;
+
+           struct {
+               __s32 speed;
+               __u32 format;
+           } play;
+
+           struct {
+               __u32 data[16];
+           } raw;
+       };
+    };
+
+
+Variables
+~~~~~~~~~
+
+.. flat-table::
+    :header-rows:  0
+    :stub-columns: 0
+
+    -  ..
+
+       -  ``__u32 cmd``
+
+       -  `Decoder command`_
+
+    -  ..
+
+       -  ``__u32 flags``
+
+       -  Flags for the `Decoder command`_.
+
+    -  ..
+
+       -  ``struct stop``
+
+       -  ``__u64 pts``
+
+       -  MPEG PTS
+
+    -  ..
+
+       -  :rspan:`5` ``stuct play``
+
+       -  :rspan:`4` ``__s32 speed``
+
+       -   0 or 1000 specifies normal speed,
+
+    -  ..
+
+       -   1:  specifies forward single stepping,
+
+    -  ..
+
+       -   -1: specifies backward single stepping,
+
+    -  ..
+
+       -   >1: playback at speed / 1000 of the normal speed
+
+    -  ..
+
+       -   <-1: reverse playback at ( -speed / 1000 ) of the normal speed.
+
+    -  ..
+
+       -  ``__u32 format``
+
+       -  `Play input formats`_
+
+    -  ..
+
+       -  ``__u32 data[16]``
+
+       -  Reserved
+
+Description
+~~~~~~~~~~~
+
+The structure must be zeroed before use by the application. This ensures
+it can be extended safely in the future.
+
+
+-----
+
+
+Predefined decoder commands and flags
+-------------------------------------
+
+Synopsis
+~~~~~~~~
+
+.. code-block:: c
+
+    #define VIDEO_CMD_PLAY                      (0)
+    #define VIDEO_CMD_STOP                      (1)
+    #define VIDEO_CMD_FREEZE                    (2)
+    #define VIDEO_CMD_CONTINUE                  (3)
+
+    #define VIDEO_CMD_FREEZE_TO_BLACK      (1 << 0)
+
+    #define VIDEO_CMD_STOP_TO_BLACK        (1 << 0)
+    #define VIDEO_CMD_STOP_IMMEDIATELY     (1 << 1)
+
+    #define VIDEO_PLAY_FMT_NONE                 (0)
+    #define VIDEO_PLAY_FMT_GOP                  (1)
+
+    #define VIDEO_VSYNC_FIELD_UNKNOWN           (0)
+    #define VIDEO_VSYNC_FIELD_ODD               (1)
+    #define VIDEO_VSYNC_FIELD_EVEN              (2)
+    #define VIDEO_VSYNC_FIELD_PROGRESSIVE       (3)
+
+Constants
+~~~~~~~~~
+
+.. flat-table::
+    :header-rows:  0
+    :stub-columns: 0
+
+    -  ..
+
+       -  :rspan:`3` _`Decoder command`
+
+       -  ``VIDEO_CMD_PLAY``
+
+       -  Start playback.
+
+    -  ..
+
+       -  ``VIDEO_CMD_STOP``
+
+       -  Stop playback.
+
+    -  ..
+
+       -  ``VIDEO_CMD_FREEZE``
+
+       -  Freeze playback.
+
+    -  ..
+
+       -  ``VIDEO_CMD_CONTINUE``
+
+       -  Continue playback after freeze.
+
+    -  ..
+
+       -  Flags for ``VIDEO_CMD_FREEZE``
+
+       -  ``VIDEO_CMD_FREEZE_TO_BLACK``
+
+       -  Show black picture on freeze.
+
+    -  ..
+
+       -  :rspan:`1` Flags for ``VIDEO_CMD_STOP``
+
+       -  ``VIDEO_CMD_STOP_TO_BLACK``
+
+       -  Show black picture on stop.
+
+    -  ..
+
+       -  ``VIDEO_CMD_STOP_IMMEDIATELY``
+
+       -  Stop immediately, without emptying buffers.
+
+    -  ..
+
+       -  :rspan:`1` _`Play input formats`
+
+       -  ``VIDEO_PLAY_FMT_NONE``
+
+       -  The decoder has no special format requirements
+
+    -  ..
+
+       -  ``VIDEO_PLAY_FMT_GOP``
+
+       -  The decoder requires full GOPs
+
+    -  ..
+
+       -  :rspan:`3` Field order
+
+       -  ``VIDEO_VSYNC_FIELD_UNKNOWN``
+
+       -  FIELD_UNKNOWN can be used if the hardware does not know
+          whether the Vsync is for an odd, even or progressive
+          (i.e. non-interlaced) field.
+
+    -  ..
+
+       -  ``VIDEO_VSYNC_FIELD_ODD``
+
+       -  Vsync is for an odd field.
+
+    -  ..
+
+       -  ``VIDEO_VSYNC_FIELD_EVEN``
+
+       -  Vsync is for an even field.
+
+    -  ..
+
+       -  ``VIDEO_VSYNC_FIELD_PROGRESSIVE``
+
+       -  progressive (i.e. non-interlaced)
+
+
+-----
+
+
+video_event
+-----------
+
+Synopsis
+~~~~~~~~
+
+.. code-block:: c
+
+    struct video_event {
+       __s32 type;
+    #define VIDEO_EVENT_SIZE_CHANGED        1
+    #define VIDEO_EVENT_FRAME_RATE_CHANGED  2
+    #define VIDEO_EVENT_DECODER_STOPPED     3
+    #define VIDEO_EVENT_VSYNC               4
+       long timestamp;
+       union {
+           video_size_t size;
+           unsigned int frame_rate;
+           unsigned char vsync_field;
+       } u;
+    };
+
+Variables
+~~~~~~~~~
+
+.. flat-table::
+    :header-rows:  0
+    :stub-columns: 0
+
+    -  ..
+
+       -  :rspan:`4` ``__s32 type``
+
+       -  :cspan:`1` Event type.
+
+    -  ..
+
+       -  ``VIDEO_EVENT_SIZE_CHANGED``
+
+       -  Size changed.
+
+    -  ..
+
+       -  ``VIDEO_EVENT_FRAME_RATE_CHANGED``
+
+       -  Framerate changed.
+
+    -  ..
+
+       -  ``VIDEO_EVENT_DECODER_STOPPED``
+
+       -  Decoder stopped.
+
+    -  ..
+
+       -  ``VIDEO_EVENT_VSYNC``
+
+       -  Vsync occurred.
+
+    -  ..
+
+       -  ``long timestamp``
+
+       -  :cspan:`1` MPEG PTS at occurrence.
+
+    -  ..
+
+       -  :rspan:`2` ``union u``
+
+       -  `video_size_t`_ size
+
+       -  Resolution and aspect ratio of the video.
+
+    -  ..
+
+       -  ``unsigned int frame_rate``
+
+       -  in frames per 1000sec
+
+    -  ..
+
+       -  ``unsigned char vsync_field``
+
+       -  | unknown / odd / even / progressive
+          | See: `Predefined decoder commands and flags`_
+
+Description
+~~~~~~~~~~~
+
+This is the structure of a video event as it is returned by the
+`VIDEO_GET_EVENT`_ call. See there for more details.
+
+
+-----
+
+
+video_status
+------------
+
+Synopsis
+~~~~~~~~
+
+The `VIDEO_GET_STATUS`_ call returns the following structure informing
+about various states of the playback operation.
+
+.. code-block:: c
+
+    struct video_status {
+       int                    video_blank;
+       video_play_state_t     play_state;
+       video_stream_source_t  stream_source;
+       video_format_t         video_format;
+       video_displayformat_t  display_format;
+    };
+
+Variables
+~~~~~~~~~
+
+.. flat-table::
+    :header-rows:  0
+    :stub-columns: 0
+
+    -  ..
+
+       -  :rspan:`2` ``int video_blank``
+
+       -  :cspan:`1` Show blank video on freeze?
+
+    -  ..
+
+       -  TRUE  ( != 0 )
+
+       -  Blank screen when freeze.
+
+    -  ..
+
+       -  FALSE ( == 0 )
+
+       -  Show last decoded frame.
+
+    -  ..
+
+       -  `video_play_state_t`_ ``play_state``
+
+       -  Current state of playback.
+
+    -  ..
+
+       -  `video_stream_source_t`_ ``stream_source``
+
+       -  Current source (demux/memory).
+
+    -  ..
+
+       -  `video_format_t`_ ``video_format``
+
+       -  Current aspect ratio of stream.
+
+    -  ..
+
+       -  `video_displayformat_t`_ ``display_format``
+
+       -  Applied cropping mode.
+
+Description
+~~~~~~~~~~~
+
+If ``video_blank`` is set ``TRUE`` video will be blanked out if the
+channel is changed or if playback is stopped. Otherwise, the last picture
+will be displayed. ``play_state`` indicates if the video is currently
+frozen, stopped, or being played back. The ``stream_source`` corresponds
+to the selected source for the video stream. It can come either from the
+demultiplexer or from memory. The ``video_format`` indicates the aspect
+ratio (one of 4:3 or 16:9) of the currently played video stream.
+Finally, ``display_format`` corresponds to the applied cropping mode in
+case the source video format is not the same as the format of the output
+device.
+
+
+-----
+
+
+video_still_picture
+-------------------
+
+Synopsis
+~~~~~~~~
+
+.. code-block:: c
+
+    struct video_still_picture {
+    char *iFrame;
+    int32_t size;
+    };
+
+Variables
+~~~~~~~~~
+
+.. flat-table::
+    :header-rows:  0
+    :stub-columns: 0
+
+    -  ..
+
+       -  ``char *iFrame``
+
+       -  Pointer to a single iframe in memory.
+
+    -  ..
+
+       -  ``int32_t size``
+
+       -  Size of the iframe.
+
+
+Description
+~~~~~~~~~~~
+
+An I-frame displayed via the `VIDEO_STILLPICTURE`_ call is passed on
+within this structure.
+
+
+-----
+
+
+video capabilities
+------------------
+
+Synopsis
+~~~~~~~~
+
+.. code-block:: c
+
+    #define VIDEO_CAP_MPEG1   1
+    #define VIDEO_CAP_MPEG2   2
+    #define VIDEO_CAP_SYS     4
+    #define VIDEO_CAP_PROG    8
+
+Constants
+~~~~~~~~~
+Bit definitions for capabilities:
+
+.. flat-table::
+    :header-rows:  0
+    :stub-columns: 0
+
+    -  ..
+
+       -  ``VIDEO_CAP_MPEG1``
+
+       -  :cspan:`1` The hardware can decode MPEG1.
+
+    -  ..
+
+       -  ``VIDEO_CAP_MPEG2``
+
+       -  The hardware can decode MPEG2.
+
+    -  ..
+
+       -  ``VIDEO_CAP_SYS``
+
+       -  The video device accepts system stream.
+
+          You still have to open the video and the audio device
+          but only send the stream to the video device.
+
+    -  ..
+
+       -  ``VIDEO_CAP_PROG``
+
+       -  The video device accepts program stream.
+
+          You still have to open the video and the audio device
+          but only send the stream to the video device.
+
+Description
+~~~~~~~~~~~
+
+A call to `VIDEO_GET_CAPABILITIES`_ returns an unsigned integer with the
+following bits set according to the hardware's capabilities.
+
+
+-----
+
+
+Video Function Calls
+====================
+
+
+VIDEO_STOP
+----------
+
+Synopsis
+~~~~~~~~
+
+.. c:macro:: VIDEO_STOP
+
+.. code-block:: c
+
+       int ioctl(fd, VIDEO_STOP, int mode)
+
+Arguments
+~~~~~~~~~
+
+.. flat-table::
+    :header-rows:  0
+    :stub-columns: 0
+
+    -  ..
+
+       -  ``int fd``
+
+       -  :cspan:`1` File descriptor returned by a previous call
+          to `open()`_.
+
+    -  ..
+
+       -  ``int request``
+
+       -  :cspan:`1` Equals ``VIDEO_STOP`` for this command.
+
+    -  ..
+
+       -  :rspan:`2` ``int mode``
+
+       -  :cspan:`1` Indicates how the screen shall be handled.
+
+    -  ..
+
+       -  TRUE  ( != 0 )
+
+       -  Blank screen when stop.
+
+    -  ..
+
+       -  FALSE ( == 0 )
+
+       -  Show last decoded frame.
+
+Description
+~~~~~~~~~~~
+
+.. attention:: Do **not** use in new drivers!
+             See: :ref:`legacy_dvb_decoder_notes`
+
+This ioctl is for Digital TV devices only. To control a V4L2 decoder use
+the V4L2 :ref:`VIDIOC_DECODER_CMD` instead.
+
+This ioctl call asks the Video Device to stop playing the current
+stream. Depending on the input parameter, the screen can be blanked out
+or displaying the last decoded frame.
+
+Return Value
+~~~~~~~~~~~~
+
+On success 0 is returned, on error -1 and the ``errno`` variable is set
+appropriately. The generic error codes are described at the
+:ref:`Generic Error Codes <gen-errors>` chapter.
+
+
+-----
+
+
+VIDEO_PLAY
+----------
+
+Synopsis
+~~~~~~~~
+
+.. c:macro:: VIDEO_PLAY
+
+.. code-block:: c
+
+       int ioctl(fd, VIDEO_PLAY)
+
+Arguments
+~~~~~~~~~
+
+.. flat-table::
+    :header-rows:  0
+    :stub-columns: 0
+
+    -  ..
+
+       -  ``int fd``
+
+       -  :cspan:`1` File descriptor returned by a previous call
+          to `open()`_.
+
+    -  ..
+
+       -  ``int request``
+
+       -  Equals ``VIDEO_PLAY`` for this command.
+
+Description
+~~~~~~~~~~~
+
+.. attention:: Do **not** use in new drivers!
+             See: :ref:`legacy_dvb_decoder_notes`
+
+This ioctl is for Digital TV devices only. To control a V4L2 decoder use
+the V4L2 :ref:`VIDIOC_DECODER_CMD` instead.
+
+This ioctl call asks the Video Device to start playing a video stream
+from the selected source.
+
+Return Value
+~~~~~~~~~~~~
+
+On success 0 is returned, on error -1 and the ``errno`` variable is set
+appropriately. The generic error codes are described at the
+:ref:`Generic Error Codes <gen-errors>` chapter.
+
+
+-----
+
+
+VIDEO_FREEZE
+------------
+
+Synopsis
+~~~~~~~~
+
+.. c:macro:: VIDEO_FREEZE
+
+.. code-block:: c
+
+       int ioctl(fd, VIDEO_FREEZE)
+
+Arguments
+~~~~~~~~~
+
+.. flat-table::
+    :header-rows:  0
+    :stub-columns: 0
+
+    -  ..
+
+       -  ``int fd``
+
+       -  :cspan:`1` File descriptor returned by a previous call
+          to `open()`_.
+
+    -  ..
+
+       -  ``int request``
+
+       -  Equals ``VIDEO_FREEZE`` for this command.
+
+Description
+~~~~~~~~~~~
+
+.. attention:: Do **not** use in new drivers!
+             See: :ref:`legacy_dvb_decoder_notes`
+
+This ioctl is for Digital TV devices only. To control a V4L2 decoder use
+the V4L2 :ref:`VIDIOC_DECODER_CMD` instead.
+
+This ioctl call suspends the live video stream being played, if
+VIDEO_SOURCE_DEMUX is selected. Decoding and playing are frozen.
+It is then possible to restart the decoding and playing process of the
+video stream using the `VIDEO_CONTINUE`_ command.
+If VIDEO_SOURCE_MEMORY is selected in the ioctl call
+`VIDEO_SELECT_SOURCE`_, the Digital TV subsystem will not decode any more
+data until the ioctl call `VIDEO_CONTINUE`_ or `VIDEO_PLAY`_ is performed.
+
+Return Value
+~~~~~~~~~~~~
+
+On success 0 is returned, on error -1 and the ``errno`` variable is set
+appropriately. The generic error codes are described at the
+:ref:`Generic Error Codes <gen-errors>` chapter.
+
+
+-----
+
+
+VIDEO_CONTINUE
+--------------
+
+Synopsis
+~~~~~~~~
+
+.. c:macro:: VIDEO_CONTINUE
+
+.. code-block:: c
+
+       int ioctl(fd, VIDEO_CONTINUE)
+
+Arguments
+~~~~~~~~~
+
+.. flat-table::
+    :header-rows:  0
+    :stub-columns: 0
+
+    -  ..
+
+       -  ``int fd``
+
+       -  :cspan:`1` File descriptor returned by a previous call
+          to `open()`_.
+
+    -  ..
+
+       -  ``int request``
+
+       -  Equals ``VIDEO_CONTINUE`` for this command.
+
+Description
+~~~~~~~~~~~
+
+.. attention:: Do **not** use in new drivers!
+             See: :ref:`legacy_dvb_decoder_notes`
+
+This ioctl is for Digital TV devices only. To control a V4L2 decoder use
+the V4L2 :ref:`VIDIOC_DECODER_CMD` instead.
+
+This ioctl call restarts decoding and playing processes of the video
+stream which was played before a call to `VIDEO_FREEZE`_ was made.
+
+Return Value
+~~~~~~~~~~~~
+
+On success 0 is returned, on error -1 and the ``errno`` variable is set
+appropriately. The generic error codes are described at the
+:ref:`Generic Error Codes <gen-errors>` chapter.
+
+
+-----
+
+
+VIDEO_SELECT_SOURCE
+-------------------
+
+Synopsis
+~~~~~~~~
+
+.. c:macro:: VIDEO_SELECT_SOURCE
+
+.. code-block:: c
+
+       int ioctl(fd, VIDEO_SELECT_SOURCE, video_stream_source_t source)
+
+Arguments
+~~~~~~~~~
+
+.. flat-table::
+    :header-rows:  0
+    :stub-columns: 0
+
+    -  ..
+
+       -  ``int fd``
+
+       -  :cspan:`1` File descriptor returned by a previous call
+          to `open()`_.
+
+    -  ..
+
+       -  ``int request``
+
+       -  Equals ``VIDEO_SELECT_SOURCE`` for this command.
+
+    -  ..
+
+       -  `video_stream_source_t`_ ``source``
+
+       -  Indicates which source shall be used for the Video stream.
+
+Description
+~~~~~~~~~~~
+
+.. attention:: Do **not** use in new drivers!
+             See: :ref:`legacy_dvb_decoder_notes`
+
+This ioctl is for Digital TV devices only. This ioctl was also supported
+by the V4L2 ivtv driver, but that has been replaced by the ivtv-specific
+``IVTV_IOC_PASSTHROUGH_MODE`` ioctl.
+
+This ioctl call informs the video device which source shall be used for
+the input data. The possible sources are demux or memory. If memory is
+selected, the data is fed to the video device through the write command
+using the struct `video_stream_source_t`_. If demux is selected, the data
+is directly transferred from the onboard demux-device to the decoder.
+
+The data fed to the decoder is also controlled by the PID-filter.
+Output selection: :c:type:`dmx_output` ``DMX_OUT_DECODER``.
+
+
+Return Value
+~~~~~~~~~~~~
+
+On success 0 is returned, on error -1 and the ``errno`` variable is set
+appropriately. The generic error codes are described at the
+:ref:`Generic Error Codes <gen-errors>` chapter.
+
+
+-----
+
+
+VIDEO_SET_BLANK
+---------------
+
+Synopsis
+~~~~~~~~
+
+.. c:macro:: VIDEO_SET_BLANK
+
+.. code-block:: c
+
+       int ioctl(fd, VIDEO_SET_BLANK, int mode)
+
+Arguments
+~~~~~~~~~
+
+.. flat-table::
+    :header-rows:  0
+    :stub-columns: 0
+
+    -  ..
+
+       -  ``int fd``
+
+       -  :cspan:`1` File descriptor returned by a previous call
+          to `open()`_.
+
+    -  ..
+
+       -  ``int request``
+
+       -  :cspan:`1` Equals ``VIDEO_SET_BLANK`` for this command.
+
+    -  ..
+
+       -  :rspan:`2` ``int mode``
+
+       -  :cspan:`1` Indicates if the screen shall be blanked.
+
+    -  ..
+
+       -  TRUE  ( != 0 )
+
+       -  Blank screen when stop.
+
+    -  ..
+
+       -  FALSE ( == 0 )
+
+       -  Show last decoded frame.
+
+Description
+~~~~~~~~~~~
+
+.. attention:: Do **not** use in new drivers!
+             See: :ref:`legacy_dvb_decoder_notes`
+
+This ioctl call asks the Video Device to blank out the picture.
+
+Return Value
+~~~~~~~~~~~~
+
+On success 0 is returned, on error -1 and the ``errno`` variable is set
+appropriately. The generic error codes are described at the
+:ref:`Generic Error Codes <gen-errors>` chapter.
+
+
+-----
+
+
+VIDEO_GET_STATUS
+----------------
+
+Synopsis
+~~~~~~~~
+
+.. c:macro:: VIDEO_GET_STATUS
+
+.. code-block:: c
+
+       int ioctl(fd, int request = VIDEO_GET_STATUS,
+       struct video_status *status)
+
+Arguments
+~~~~~~~~~
+
+.. flat-table::
+    :header-rows:  0
+    :stub-columns: 0
+
+    -  ..
+
+       -  ``int fd``
+
+       -  :cspan:`1` File descriptor returned by a previous call
+          to `open()`_.
+
+    -  ..
+
+       -  ``int request``
+
+       -  Equals ``VIDEO_GET_STATUS`` for this command.
+
+    -  ..
+
+       -  ``struct`` `video_status`_ ``*status``
+
+       -  Returns the current status of the Video Device.
+
+Description
+~~~~~~~~~~~
+
+.. attention:: Do **not** use in new drivers!
+             See: :ref:`legacy_dvb_decoder_notes`
+
+This ioctl call asks the Video Device to return the current status of
+the device.
+
+Return Value
+~~~~~~~~~~~~
+
+On success 0 is returned, on error -1 and the ``errno`` variable is set
+appropriately. The generic error codes are described at the
+:ref:`Generic Error Codes <gen-errors>` chapter.
+
+
+-----
+
+
+VIDEO_GET_EVENT
+---------------
+
+Synopsis
+~~~~~~~~
+
+.. c:macro:: VIDEO_GET_EVENT
+
+.. code-block:: c
+
+       int ioctl(fd, int request = VIDEO_GET_EVENT,
+       struct video_event *ev)
+
+Arguments
+~~~~~~~~~
+
+.. flat-table::
+    :header-rows:  0
+    :stub-columns: 0
+
+    -  ..
+
+       -  ``int fd``
+
+       -  :cspan:`1` File descriptor returned by a previous call
+          to `open()`_.
+
+    -  ..
+
+       -  ``int request``
+
+       -  Equals ``VIDEO_GET_EVENT`` for this command.
+
+    -  ..
+
+       -  ``struct`` `video_event`_ ``*ev``
+
+       -  Points to the location where the event, if any, is to be stored.
+
+Description
+~~~~~~~~~~~
+
+.. attention:: Do **not** use in new drivers!
+             See: :ref:`legacy_dvb_decoder_notes`
+
+This ioctl is for DVB devices only. To get events from a V4L2 decoder
+use the V4L2 :ref:`VIDIOC_DQEVENT` ioctl instead.
+
+This ioctl call returns an event of type `video_event`_ if available. A
+certain number of the latest events will be cued and returned in order of
+occurrence. Older events may be discarded if not fetched in time. If
+an event is not available, the behavior depends on whether the device is
+in blocking or non-blocking mode. In the latter case, the call fails
+immediately with errno set to ``EWOULDBLOCK``. In the former case, the
+call blocks until an event becomes available. The standard Linux poll()
+and/or select() system calls can be used with the device file descriptor
+to watch for new events. For select(), the file descriptor should be
+included in the exceptfds argument, and for poll(), POLLPRI should be
+specified as the wake-up condition. Read-only permissions are sufficient
+for this ioctl call.
+
+Return Value
+~~~~~~~~~~~~
+
+On success 0 is returned, on error -1 and the ``errno`` variable is set
+appropriately. The generic error codes are described at the
+:ref:`Generic Error Codes <gen-errors>` chapter.
+
+.. flat-table::
+    :header-rows:  0
+    :stub-columns: 0
+
+    -  ..
+
+       -  ``EWOULDBLOCK``
+
+       -  :cspan:`1` There is no event pending, and the device is in
+          non-blocking mode.
+
+    -  ..
+
+       -  ``EOVERFLOW``
+
+       -  Overflow in event queue - one or more events were lost.
+
+
+-----
+
+
+VIDEO_SET_DISPLAY_FORMAT
+------------------------
+
+Synopsis
+~~~~~~~~
+
+.. c:macro:: VIDEO_SET_DISPLAY_FORMAT
+
+.. code-block:: c
+
+       int ioctl(fd, int request = VIDEO_SET_DISPLAY_FORMAT,
+       video_display_format_t format)
+
+Arguments
+~~~~~~~~~
+
+.. flat-table::
+    :header-rows:  0
+    :stub-columns: 0
+
+    -  ..
+
+       -  ``int fd``
+
+       -  :cspan:`1` File descriptor returned by a previous call
+          to `open()`_.
+
+    -  ..
+
+       -  ``int request``
+
+       -  Equals ``VIDEO_SET_DISPLAY_FORMAT`` for this command.
+
+    -  ..
+
+       -  `video_displayformat_t`_ ``format``
+
+       -  Selects the video format to be used.
+
+Description
+~~~~~~~~~~~
+
+.. attention:: Do **not** use in new drivers!
+             See: :ref:`legacy_dvb_decoder_notes`
+
+This ioctl call asks the Video Device to select the video format to be
+applied by the MPEG chip on the video.
+
+Return Value
+~~~~~~~~~~~~
+
+On success 0 is returned, on error -1 and the ``errno`` variable is set
+appropriately. The generic error codes are described at the
+:ref:`Generic Error Codes <gen-errors>` chapter.
+
+
+-----
+
+
+VIDEO_STILLPICTURE
+------------------
+
+Synopsis
+~~~~~~~~
+
+.. c:macro:: VIDEO_STILLPICTURE
+
+.. code-block:: c
+
+       int ioctl(fd, int request = VIDEO_STILLPICTURE,
+       struct video_still_picture *sp)
+
+Arguments
+~~~~~~~~~
+
+.. flat-table::
+    :header-rows:  0
+    :stub-columns: 0
+
+    -  ..
+
+       -  ``int fd``
+
+       -  :cspan:`1` File descriptor returned by a previous call
+          to `open()`_.
+
+    -  ..
+
+       -  ``int request``
+
+       -  Equals ``VIDEO_STILLPICTURE`` for this command.
+
+    -  ..
+
+       -  ``struct`` `video_still_picture`_ ``*sp``
+
+       -  Pointer to the location where the struct with the I-frame
+          and size is stored.
+
+Description
+~~~~~~~~~~~
+
+.. attention:: Do **not** use in new drivers!
+             See: :ref:`legacy_dvb_decoder_notes`
+
+This ioctl call asks the Video Device to display a still picture
+(I-frame). The input data shall be the section of an elementary video
+stream containing an I-frame. Typically this section is extracted from a
+TS or PES recording. Resolution and codec (see `video capabilities`_) must
+be supported by the device. If the pointer is NULL, then the current
+displayed still picture is blanked.
+
+e.g. The AV7110 supports MPEG1 and MPEG2 with the common PAL-SD
+resolutions.
+
+Return Value
+~~~~~~~~~~~~
+
+On success 0 is returned, on error -1 and the ``errno`` variable is set
+appropriately. The generic error codes are described at the
+:ref:`Generic Error Codes <gen-errors>` chapter.
+
+
+-----
+
+
+VIDEO_FAST_FORWARD
+------------------
+
+Synopsis
+~~~~~~~~
+
+.. c:macro:: VIDEO_FAST_FORWARD
+
+.. code-block:: c
+
+       int ioctl(fd, int request = VIDEO_FAST_FORWARD, int nFrames)
+
+Arguments
+~~~~~~~~~
+
+.. flat-table::
+    :header-rows:  0
+    :stub-columns: 0
+
+    -  ..
+
+       -  ``int fd``
+
+       -  :cspan:`1` File descriptor returned by a previous call
+          to `open()`_.
+
+    -  ..
+
+       -  ``int request``
+
+       -  Equals ``VIDEO_FAST_FORWARD`` for this command.
+
+    -  ..
+
+       -  ``int nFrames``
+
+       -  The number of frames to skip.
+
+Description
+~~~~~~~~~~~
+
+.. attention:: Do **not** use in new drivers!
+             See: :ref:`legacy_dvb_decoder_notes`
+
+This ioctl call asks the Video Device to skip decoding of N number of
+I-frames. This call can only be used if ``VIDEO_SOURCE_MEMORY`` is
+selected.
+
+Return Value
+~~~~~~~~~~~~
+
+On success 0 is returned, on error -1 and the ``errno`` variable is set
+appropriately. The generic error codes are described at the
+:ref:`Generic Error Codes <gen-errors>` chapter.
+
+.. flat-table::
+    :header-rows:  0
+    :stub-columns: 0
+
+    -  ..
+
+       -  ``EPERM``
+
+       -  Mode ``VIDEO_SOURCE_MEMORY`` not selected.
+
+
+-----
+
+
+VIDEO_SLOWMOTION
+----------------
+
+Synopsis
+~~~~~~~~
+
+.. c:macro:: VIDEO_SLOWMOTION
+
+.. code-block:: c
+
+       int ioctl(fd, int request = VIDEO_SLOWMOTION, int nFrames)
+
+Arguments
+~~~~~~~~~
+
+.. flat-table::
+    :header-rows:  0
+    :stub-columns: 0
+
+    -  ..
+
+       -  ``int fd``
+
+       -  :cspan:`1` File descriptor returned by a previous call
+          to `open()`_.
+
+    -  ..
+
+       -  ``int request``
+
+       -  Equals ``VIDEO_SLOWMOTION`` for this command.
+
+    -  ..
+
+       -  ``int nFrames``
+
+       -  The number of times to repeat each frame.
+
+Description
+~~~~~~~~~~~
+
+.. attention:: Do **not** use in new drivers!
+             See: :ref:`legacy_dvb_decoder_notes`
+
+This ioctl call asks the video device to repeat decoding frames N number
+of times. This call can only be used if ``VIDEO_SOURCE_MEMORY`` is
+selected.
+
+Return Value
+~~~~~~~~~~~~
+
+On success 0 is returned, on error -1 and the ``errno`` variable is set
+appropriately. The generic error codes are described at the
+:ref:`Generic Error Codes <gen-errors>` chapter.
+
+.. flat-table::
+    :header-rows:  0
+    :stub-columns: 0
+
+    -  ..
+
+       -  ``EPERM``
+
+       -  Mode ``VIDEO_SOURCE_MEMORY`` not selected.
+
+
+-----
+
+
+VIDEO_GET_CAPABILITIES
+----------------------
+
+Synopsis
+~~~~~~~~
+
+.. c:macro:: VIDEO_GET_CAPABILITIES
+
+.. code-block:: c
+
+       int ioctl(fd, int request = VIDEO_GET_CAPABILITIES, unsigned int *cap)
+
+Arguments
+~~~~~~~~~
+
+.. flat-table::
+    :header-rows:  0
+    :stub-columns: 0
+
+    -  ..
+
+       -  ``int fd``
+
+       -  :cspan:`1` File descriptor returned by a previous call
+          to `open()`_.
+
+    -  ..
+
+       -  ``int request``
+
+       -  Equals ``VIDEO_GET_CAPABILITIES`` for this command.
+
+    -  ..
+
+       -  ``unsigned int *cap``
+
+       -  Pointer to a location where to store the capability information.
+
+Description
+~~~~~~~~~~~
+
+.. attention:: Do **not** use in new drivers!
+             See: :ref:`legacy_dvb_decoder_notes`
+
+This ioctl call asks the video device about its decoding capabilities.
+On success it returns an integer which has bits set according to the
+defines in `video capabilities`_.
+
+Return Value
+~~~~~~~~~~~~
+
+On success 0 is returned, on error -1 and the ``errno`` variable is set
+appropriately. The generic error codes are described at the
+:ref:`Generic Error Codes <gen-errors>` chapter.
+
+
+-----
+
+
+VIDEO_CLEAR_BUFFER
+------------------
+
+Synopsis
+~~~~~~~~
+
+.. c:macro:: VIDEO_CLEAR_BUFFER
+
+.. code-block:: c
+
+       int ioctl(fd, int request = VIDEO_CLEAR_BUFFER)
+
+Arguments
+~~~~~~~~~
+
+.. flat-table::
+    :header-rows:  0
+    :stub-columns: 0
+
+    -  ..
+
+       -  ``int fd``
+
+       -  :cspan:`1` File descriptor returned by a previous call
+          to `open()`_.
+
+    -  ..
+
+       -  ``int request``
+
+       -  Equals ``VIDEO_CLEAR_BUFFER`` for this command.
+
+Description
+~~~~~~~~~~~
+
+.. attention:: Do **not** use in new drivers!
+             See: :ref:`legacy_dvb_decoder_notes`
+
+This ioctl call clears all video buffers in the driver and in the
+decoder hardware.
+
+Return Value
+~~~~~~~~~~~~
+
+On success 0 is returned, on error -1 and the ``errno`` variable is set
+appropriately. The generic error codes are described at the
+:ref:`Generic Error Codes <gen-errors>` chapter.
+
+
+-----
+
+
+VIDEO_SET_STREAMTYPE
+--------------------
+
+Synopsis
+~~~~~~~~
+
+.. c:macro:: VIDEO_SET_STREAMTYPE
+
+.. code-block:: c
+
+       int ioctl(fd, int request = VIDEO_SET_STREAMTYPE, int type)
+
+Arguments
+~~~~~~~~~
+
+.. flat-table::
+    :header-rows:  0
+    :stub-columns: 0
+
+    -  ..
+
+       -  ``int fd``
+
+       -  :cspan:`1` File descriptor returned by a previous call
+          to `open()`_.
+
+    -  ..
+
+       -  ``int request``
+
+       -  Equals ``VIDEO_SET_STREAMTYPE`` for this command.
+
+    -  ..
+
+       -  ``int type``
+
+       -  Stream type.
+
+Description
+~~~~~~~~~~~
+
+.. attention:: Do **not** use in new drivers!
+             See: :ref:`legacy_dvb_decoder_notes`
+
+This ioctl tells the driver which kind of stream to expect being written
+to it.
+Intelligent decoder might also not support or ignore (like the AV7110)
+this call and determine the stream type themselves.
+
+Currently used stream types:
+
+.. flat-table::
+    :header-rows:  1
+    :stub-columns: 0
+
+    -  ..
+
+       -  Codec
+
+       -  Stream type
+
+    -  ..
+
+       -  MPEG2
+
+       -  0
+
+    -  ..
+
+       -  MPEG4 h.264
+
+       -  1
+
+    -  ..
+
+       -  VC1
+
+       -  3
+
+    -  ..
+
+       -  MPEG4 Part2
+
+       -  4
+
+    -  ..
+
+       -  VC1 SM
+
+       -  5
+
+    -  ..
+
+       -  MPEG1
+
+       -  6
+
+    -  ..
+
+       -  HEVC h.265
+
+       -  | 7
+          | DREAMBOX: 22
+
+    -  ..
+
+       -  AVS
+
+       -  16
+
+    -  ..
+
+       -  AVS2
+
+       -  40
+
+Not every decoder supports all stream types.
+
+Return Value
+~~~~~~~~~~~~
+
+On success 0 is returned, on error -1 and the ``errno`` variable is set
+appropriately. The generic error codes are described at the
+:ref:`Generic Error Codes <gen-errors>` chapter.
+
+
+-----
+
+
+VIDEO_SET_FORMAT
+----------------
+
+Synopsis
+~~~~~~~~
+
+.. c:macro:: VIDEO_SET_FORMAT
+
+.. code-block:: c
+
+       int ioctl(fd, int request = VIDEO_SET_FORMAT, video_format_t format)
+
+Arguments
+~~~~~~~~~
+
+.. flat-table::
+    :header-rows:  0
+    :stub-columns: 0
+
+    -  ..
+
+       -  ``int fd``
+
+       -  :cspan:`1` File descriptor returned by a previous call
+          to `open()`_.
+
+    -  ..
+
+       -  ``int request``
+
+       -  Equals ``VIDEO_SET_FORMAT`` for this command.
+
+    -  ..
+
+       -  `video_format_t`_ ``format``
+
+       -  Video format of TV as defined in section `video_format_t`_.
+
+Description
+~~~~~~~~~~~
+
+.. attention:: Do **not** use in new drivers!
+             See: :ref:`legacy_dvb_decoder_notes`
+
+This ioctl sets the screen format (aspect ratio) of the connected output
+device (TV) so that the output of the decoder can be adjusted
+accordingly.
+
+Return Value
+~~~~~~~~~~~~
+
+On success 0 is returned, on error -1 and the ``errno`` variable is set
+appropriately. The generic error codes are described at the
+:ref:`Generic Error Codes <gen-errors>` chapter.
+
+
+-----
+
+
+VIDEO_GET_SIZE
+--------------
+
+Synopsis
+~~~~~~~~
+
+.. c:macro:: VIDEO_GET_SIZE
+
+.. code-block:: c
+
+       int ioctl(int fd, int request = VIDEO_GET_SIZE, video_size_t *size)
+
+Arguments
+~~~~~~~~~
+
+.. flat-table::
+    :header-rows:  0
+    :stub-columns: 0
+
+    -  ..
+
+       -  ``int fd``
+
+       -  :cspan:`1` File descriptor returned by a previous call,
+          to `open()`_.
+
+    -  ..
+
+       -  ``int request``
+
+       -  Equals ``VIDEO_GET_SIZE`` for this command.
+
+    -  ..
+
+       -  `video_size_t`_ ``*size``
+
+       -  Returns the size and aspect ratio.
+
+Description
+~~~~~~~~~~~
+
+.. attention:: Do **not** use in new drivers!
+             See: :ref:`legacy_dvb_decoder_notes`
+
+This ioctl returns the size and aspect ratio.
+
+Return Value
+~~~~~~~~~~~~
+
+On success 0 is returned, on error -1 and the ``errno`` variable is set
+appropriately. The generic error codes are described at the
+:ref:`Generic Error Codes <gen-errors>` chapter.
+
+
+-----
+
+
+VIDEO_GET_PTS
+-------------
+
+Synopsis
+~~~~~~~~
+
+.. c:macro:: VIDEO_GET_PTS
+
+.. code-block:: c
+
+       int ioctl(int fd, int request = VIDEO_GET_PTS, __u64 *pts)
+
+Arguments
+~~~~~~~~~
+
+.. flat-table::
+    :header-rows:  0
+    :stub-columns: 0
+
+    -  ..
+
+       -  ``int fd``
+
+       -  :cspan:`1` File descriptor returned by a previous call
+          to `open()`_.
+
+    -  ..
+
+       -  ``int request``
+
+       -  Equals ``VIDEO_GET_PTS`` for this command.
+
+    -  ..
+
+       -  ``__u64 *pts``
+
+       -  Returns the 33-bit timestamp as defined in ITU T-REC-H.222.0 /
+          ISO/IEC 13818-1.
+
+          The PTS should belong to the currently played frame if possible,
+          but may also be a value close to it like the PTS of the last
+          decoded frame or the last PTS extracted by the PES parser.
+
+Description
+~~~~~~~~~~~
+
+.. attention:: Do **not** use in new drivers!
+             See: :ref:`legacy_dvb_decoder_notes`
+
+For V4L2 decoders this ioctl has been replaced by the
+``V4L2_CID_MPEG_VIDEO_DEC_PTS`` control.
+
+This ioctl call asks the Video Device to return the current PTS
+timestamp.
+
+Return Value
+~~~~~~~~~~~~
+
+On success 0 is returned, on error -1 and the ``errno`` variable is set
+appropriately. The generic error codes are described at the
+:ref:`Generic Error Codes <gen-errors>` chapter.
+
+
+-----
+
+
+VIDEO_GET_FRAME_COUNT
+---------------------
+
+Synopsis
+~~~~~~~~
+
+.. c:macro:: VIDEO_GET_FRAME_COUNT
+
+.. code-block:: c
+
+       int ioctl(int fd, VIDEO_GET_FRAME_COUNT, __u64 *pts)
+
+Arguments
+~~~~~~~~~
+
+.. flat-table::
+    :header-rows:  0
+    :stub-columns: 0
+
+    -  ..
+
+       -  ``int fd``
+
+       -  :cspan:`1` File descriptor returned by a previous call
+          to `open()`_.
+
+    -  ..
+
+       -  ``int request``
+
+       -  Equals ``VIDEO_GET_FRAME_COUNT`` for this command.
+
+    -  ..
+
+       -  ``__u64 *pts``
+
+       -  Returns the number of frames displayed since the decoder was
+          started.
+
+Description
+~~~~~~~~~~~
+
+.. attention:: Do **not** use in new drivers!
+             See: :ref:`legacy_dvb_decoder_notes`
+
+For V4L2 decoders this ioctl has been replaced by the
+``V4L2_CID_MPEG_VIDEO_DEC_FRAME`` control.
+
+This ioctl call asks the Video Device to return the number of displayed
+frames since the decoder was started.
+
+Return Value
+~~~~~~~~~~~~
+
+On success 0 is returned, on error -1 and the ``errno`` variable is set
+appropriately. The generic error codes are described at the
+:ref:`Generic Error Codes <gen-errors>` chapter.
+
+
+-----
+
+
+VIDEO_COMMAND
+-------------
+
+Synopsis
+~~~~~~~~
+
+.. c:macro:: VIDEO_COMMAND
+
+.. code-block:: c
+
+       int ioctl(int fd, int request = VIDEO_COMMAND,
+       struct video_command *cmd)
+
+Arguments
+~~~~~~~~~
+
+.. flat-table::
+    :header-rows:  0
+    :stub-columns: 0
+
+    -  ..
+
+       -  ``int fd``
+
+       -  :cspan:`1` File descriptor returned by a previous call
+          to `open()`_.
+
+    -  ..
+
+       -  ``int request``
+
+       -  Equals ``VIDEO_COMMAND`` for this command.
+
+    -  ..
+
+       -  `struct video_command`_ ``*cmd``
+
+       -  Commands the decoder.
+
+Description
+~~~~~~~~~~~
+
+.. attention:: Do **not** use in new drivers!
+             See: :ref:`legacy_dvb_decoder_notes`
+
+For V4L2 decoders this ioctl has been replaced by the
+:ref:`VIDIOC_DECODER_CMD` ioctl.
+
+This ioctl commands the decoder. The `struct video_command`_ is a
+subset of the ``v4l2_decoder_cmd`` struct, so refer to the
+:ref:`VIDIOC_DECODER_CMD` documentation for
+more information.
+
+Return Value
+~~~~~~~~~~~~
+
+On success 0 is returned, on error -1 and the ``errno`` variable is set
+appropriately. The generic error codes are described at the
+:ref:`Generic Error Codes <gen-errors>` chapter.
+
+
+-----
+
+
+VIDEO_TRY_COMMAND
+-----------------
+
+Synopsis
+~~~~~~~~
+
+.. c:macro:: VIDEO_TRY_COMMAND
+
+.. code-block:: c
+
+       int ioctl(int fd, int request = VIDEO_TRY_COMMAND,
+       struct video_command *cmd)
+
+Arguments
+~~~~~~~~~
+
+.. flat-table::
+    :header-rows:  0
+    :stub-columns: 0
+
+    -  ..
+
+       -  ``int fd``
+
+       -  :cspan:`1` File descriptor returned by a previous call
+          to `open()`_.
+
+    -  ..
+
+       -  ``int request``
+
+       -  Equals ``VIDEO_TRY_COMMAND`` for this command.
+
+    -  ..
+
+       -  `struct video_command`_ ``*cmd``
+
+       -  Try a decoder command.
+
+Description
+~~~~~~~~~~~
+
+.. attention:: Do **not** use in new drivers!
+             See: :ref:`legacy_dvb_decoder_notes`
+
+For V4L2 decoders this ioctl has been replaced by the
+:ref:`VIDIOC_TRY_DECODER_CMD <VIDIOC_DECODER_CMD>` ioctl.
+
+This ioctl tries a decoder command. The `struct video_command`_ is a
+subset of the ``v4l2_decoder_cmd`` struct, so refer to the
+:ref:`VIDIOC_TRY_DECODER_CMD <VIDIOC_DECODER_CMD>` documentation
+for more information.
+
+Return Value
+~~~~~~~~~~~~
+
+On success 0 is returned, on error -1 and the ``errno`` variable is set
+appropriately. The generic error codes are described at the
+:ref:`Generic Error Codes <gen-errors>` chapter.
+
+
+-----
+
+
+open()
+------
+
+Synopsis
+~~~~~~~~
+
+.. code-block:: c
+
+    #include <fcntl.h>
+
+.. c:function::        int open(const char *deviceName, int flags)
+
+Arguments
+~~~~~~~~~
+
+.. flat-table::
+    :header-rows:  0
+    :stub-columns: 0
+
+    -  ..
+
+       -  ``const char *deviceName``
+
+       -  Name of specific video device.
+
+    -  ..
+
+       -  :rspan:`3` ``int flags``
+
+       -  :cspan:`1` A bit-wise OR of the following flags:
+
+    -  ..
+
+       -  ``O_RDONLY``
+
+       -  read-only access
+
+    -  ..
+
+       -  ``O_RDWR``
+
+       -  read/write access
+
+    -  ..
+
+       -  ``O_NONBLOCK``
+       -  | Open in non-blocking mode
+          | (blocking mode is the default)
+
+Description
+~~~~~~~~~~~
+
+This system call opens a named video device (e.g.
+/dev/dvb/adapter?/video?) for subsequent use.
+
+When an open() call has succeeded, the device will be ready for use. The
+significance of blocking or non-blocking mode is described in the
+documentation for functions where there is a difference. It does not
+affect the semantics of the open() call itself. A device opened in
+blocking mode can later be put into non-blocking mode (and vice versa)
+using the F_SETFL command of the fcntl system call. This is a standard
+system call, documented in the Linux manual page for fcntl. Only one
+user can open the Video Device in O_RDWR mode. All other attempts to
+open the device in this mode will fail, and an error-code will be
+returned. If the Video Device is opened in O_RDONLY mode, the only
+ioctl call that can be used is `VIDEO_GET_STATUS`_. All other call will
+return an error code.
+
+Return Value
+~~~~~~~~~~~~
+
+.. flat-table::
+    :header-rows:  0
+    :stub-columns: 0
+
+    -  ..
+
+       -  ``ENODEV``
+
+       -  :cspan:`1` Device driver not loaded/available.
+
+    -  ..
+
+       -  ``EINTERNAL``
+
+       -  Internal error.
+
+    -  ..
+
+       -  ``EBUSY``
+
+       -  Device or resource busy.
+
+    -  ..
+
+       -  ``EINVAL``
+
+       -  Invalid argument.
+
+
+-----
+
+
+close()
+-------
+
+Synopsis
+~~~~~~~~
+
+.. c:function::        int close(int fd)
+
+Arguments
+~~~~~~~~~
+
+.. flat-table::
+    :header-rows:  0
+    :stub-columns: 0
+
+    -  ..
+
+       -  ``int fd``
+
+       -  :cspan:`1` File descriptor returned by a previous call
+          to `open()`_.
+
+Description
+~~~~~~~~~~~
+
+This system call closes a previously opened video device.
+
+Return Value
+~~~~~~~~~~~~
+
+.. flat-table::
+    :header-rows:  0
+    :stub-columns: 0
+
+    -  ..
+
+       -  ``EBADF``
+
+       -  fd is not a valid open file descriptor.
+
+
+-----
+
+
+write()
+-------
+
+Synopsis
+~~~~~~~~
+
+.. c:function:: size_t write(int fd, const void *buf, size_t count)
+
+Arguments
+~~~~~~~~~
+
+.. flat-table::
+    :header-rows:  0
+    :stub-columns: 0
+
+    -  ..
+
+       -  ``int fd``
+
+       -  :cspan:`1` File descriptor returned by a previous call
+          to `open()`_.
+
+    -  ..
+
+       -  ``void *buf``
+
+       -  Pointer to the buffer containing the PES data.
+
+    -  ..
+
+       -  ``size_t count``
+
+       -  Size of buf.
+
+Description
+~~~~~~~~~~~
+
+This system call can only be used if VIDEO_SOURCE_MEMORY is selected
+in the ioctl call `VIDEO_SELECT_SOURCE`_. The data provided shall be in
+PES format, unless the capability allows other formats. TS is the
+most common format for storing DVB-data, it is usually supported too.
+If O_NONBLOCK is not specified the function will block until buffer space
+is available. The amount of data to be transferred is implied by count.
+
+.. note:: See: :ref:`DVB Data Formats <legacy_dvb_decoder_formats>`
+
+Return Value
+~~~~~~~~~~~~
+
+.. flat-table::
+    :header-rows:  0
+    :stub-columns: 0
+
+    -  ..
+
+       -  ``EPERM``
+
+       -  :cspan:`1` Mode ``VIDEO_SOURCE_MEMORY`` not selected.
+
+    -  ..
+
+       -  ``ENOMEM``
+
+       -  Attempted to write more data than the internal buffer can hold.
+
+    -  ..
+
+       -  ``EBADF``
+
+       -  fd is not a valid open file descriptor.
index 0ffeece1e0c8e94c3b25185b1de5ccc15350db52..6332e8395263b04935758d42a17adfe690fcd042 100644 (file)
@@ -375,12 +375,11 @@ Types and flags used to represent the media graph elements
          are origins of links.
 
     *  -  ``MEDIA_PAD_FL_MUST_CONNECT``
-       -  If this flag is set and the pad is linked to any other pad, then
-         at least one of those links must be enabled for the entity to be
-         able to stream. There could be temporary reasons (e.g. device
-         configuration dependent) for the pad to need enabled links even
-         when this flag isn't set; the absence of the flag doesn't imply
-         there is none.
+       -  If this flag is set, then for this pad to be able to stream, it must
+         be connected by at least one enabled link. There could be temporary
+         reasons (e.g. device configuration dependent) for the pad to need
+         enabled links even when this flag isn't set; the absence of the flag
+         doesn't imply there is none.
 
 
 One and only one of ``MEDIA_PAD_FL_SINK`` and ``MEDIA_PAD_FL_SOURCE``
index 810b6a859dc8d75897f70657f447147f605e7e5f..da4a358ce7621ceeb93f763e031de0064ffb280d 100644 (file)
@@ -61,6 +61,21 @@ been accepted. A common case for the kernel not accepting a capability is that
 the kernel is older than the headers the userspace uses, and thus the capability
 is unknown to the kernel.
 
+.. tabularcolumns:: |p{1.5cm}|p{2.9cm}|p{12.9cm}|
+
+.. c:type:: v4l2_subdev_client_capability
+
+.. flat-table:: struct v4l2_subdev_client_capability
+    :header-rows:  0
+    :stub-columns: 0
+    :widths:       3 4 20
+
+    * - __u64
+      - ``capabilities``
+      - Sub-device client capabilities of the opened device.
+
+.. tabularcolumns:: |p{6.8cm}|p{2.4cm}|p{8.1cm}|
+
 .. flat-table:: Client Capabilities
     :header-rows:  1
 
index 09c7e585ff5800da5a72a1f9dbd8b719f0b6d595..0b5a33ee71eea11e5dabe09ad77e62426a72fd11 100644 (file)
@@ -372,7 +372,7 @@ The bits in the dirty bitmap are cleared before the ioctl returns, unless
 KVM_CAP_MANUAL_DIRTY_LOG_PROTECT2 is enabled.  For more information,
 see the description of the capability.
 
-Note that the Xen shared info page, if configured, shall always be assumed
+Note that the Xen shared_info page, if configured, shall always be assumed
 to be dirty. KVM will not explicitly mark it such.
 
 
@@ -5487,8 +5487,9 @@ KVM_PV_ASYNC_CLEANUP_PERFORM
                __u8 long_mode;
                __u8 vector;
                __u8 runstate_update_flag;
-               struct {
+               union {
                        __u64 gfn;
+                       __u64 hva;
                } shared_info;
                struct {
                        __u32 send_port;
@@ -5516,19 +5517,20 @@ type values:
 
 KVM_XEN_ATTR_TYPE_LONG_MODE
   Sets the ABI mode of the VM to 32-bit or 64-bit (long mode). This
-  determines the layout of the shared info pages exposed to the VM.
+  determines the layout of the shared_info page exposed to the VM.
 
 KVM_XEN_ATTR_TYPE_SHARED_INFO
-  Sets the guest physical frame number at which the Xen "shared info"
+  Sets the guest physical frame number at which the Xen shared_info
   page resides. Note that although Xen places vcpu_info for the first
   32 vCPUs in the shared_info page, KVM does not automatically do so
-  and instead requires that KVM_XEN_VCPU_ATTR_TYPE_VCPU_INFO be used
-  explicitly even when the vcpu_info for a given vCPU resides at the
-  "default" location in the shared_info page. This is because KVM may
-  not be aware of the Xen CPU id which is used as the index into the
-  vcpu_info[] array, so may know the correct default location.
-
-  Note that the shared info page may be constantly written to by KVM;
+  and instead requires that KVM_XEN_VCPU_ATTR_TYPE_VCPU_INFO or
+  KVM_XEN_VCPU_ATTR_TYPE_VCPU_INFO_HVA be used explicitly even when
+  the vcpu_info for a given vCPU resides at the "default" location
+  in the shared_info page. This is because KVM may not be aware of
+  the Xen CPU id which is used as the index into the vcpu_info[]
+  array, so may know the correct default location.
+
+  Note that the shared_info page may be constantly written to by KVM;
   it contains the event channel bitmap used to deliver interrupts to
   a Xen guest, amongst other things. It is exempt from dirty tracking
   mechanisms — KVM will not explicitly mark the page as dirty each
@@ -5537,9 +5539,21 @@ KVM_XEN_ATTR_TYPE_SHARED_INFO
   any vCPU has been running or any event channel interrupts can be
   routed to the guest.
 
-  Setting the gfn to KVM_XEN_INVALID_GFN will disable the shared info
+  Setting the gfn to KVM_XEN_INVALID_GFN will disable the shared_info
   page.
 
+KVM_XEN_ATTR_TYPE_SHARED_INFO_HVA
+  If the KVM_XEN_HVM_CONFIG_SHARED_INFO_HVA flag is also set in the
+  Xen capabilities, then this attribute may be used to set the
+  userspace address at which the shared_info page resides, which
+  will always be fixed in the VMM regardless of where it is mapped
+  in guest physical address space. This attribute should be used in
+  preference to KVM_XEN_ATTR_TYPE_SHARED_INFO as it avoids
+  unnecessary invalidation of an internal cache when the page is
+  re-mapped in guest physcial address space.
+
+  Setting the hva to zero will disable the shared_info page.
+
 KVM_XEN_ATTR_TYPE_UPCALL_VECTOR
   Sets the exception vector used to deliver Xen event channel upcalls.
   This is the HVM-wide vector injected directly by the hypervisor
@@ -5636,6 +5650,21 @@ KVM_XEN_VCPU_ATTR_TYPE_VCPU_INFO
   on dirty logging. Setting the gpa to KVM_XEN_INVALID_GPA will disable
   the vcpu_info.
 
+KVM_XEN_VCPU_ATTR_TYPE_VCPU_INFO_HVA
+  If the KVM_XEN_HVM_CONFIG_SHARED_INFO_HVA flag is also set in the
+  Xen capabilities, then this attribute may be used to set the
+  userspace address of the vcpu_info for a given vCPU. It should
+  only be used when the vcpu_info resides at the "default" location
+  in the shared_info page. In this case it is safe to assume the
+  userspace address will not change, because the shared_info page is
+  an overlay on guest memory and remains at a fixed host address
+  regardless of where it is mapped in guest physical address space
+  and hence unnecessary invalidation of an internal cache may be
+  avoided if the guest memory layout is modified.
+  If the vcpu_info does not reside at the "default" location then
+  it is not guaranteed to remain at the same host address and
+  hence the aforementioned cache invalidation is required.
+
 KVM_XEN_VCPU_ATTR_TYPE_VCPU_TIME_INFO
   Sets the guest physical address of an additional pvclock structure
   for a given vCPU. This is typically used for guest vsyscall support.
index 874f3645bf1776daa231ebd71a1ade76c662449c..1339918df52afe676890c5c6ccde71ce849a882e 100644 (file)
@@ -1064,7 +1064,7 @@ F:        include/linux/amd-pstate.h
 F:     tools/power/x86/amd_pstate_tracer/amd_pstate_trace.py
 
 AMD PTDMA DRIVER
-M:     Sanjay R Mehta <sanju.mehta@amd.com>
+M:     Basavaraj Natikar <Basavaraj.Natikar@amd.com>
 L:     dmaengine@vger.kernel.org
 S:     Maintained
 F:     drivers/dma/ptdma/
@@ -2761,6 +2761,7 @@ M:        Andrzej Hajda <andrzej.hajda@intel.com>
 L:     linux-arm-kernel@lists.infradead.org (moderated for non-subscribers)
 L:     linux-media@vger.kernel.org
 S:     Maintained
+F:     Documentation/devicetree/bindings/media/samsung,s5p-mfc.yaml
 F:     drivers/media/platform/samsung/s5p-mfc/
 
 ARM/SOCFPGA ARCHITECTURE
@@ -3555,6 +3556,7 @@ R:        Brian Foster <bfoster@redhat.com>
 L:     linux-bcachefs@vger.kernel.org
 S:     Supported
 C:     irc://irc.oftc.net/bcache
+T:     git https://evilpiepirate.org/git/bcachefs.git
 F:     fs/bcachefs/
 
 BDISP ST MEDIA DRIVER
@@ -5319,6 +5321,7 @@ M:        Dan Williams <dan.j.williams@intel.com>
 L:     linux-cxl@vger.kernel.org
 S:     Maintained
 F:     drivers/cxl/
+F:     include/linux/cxl-einj.h
 F:     include/linux/cxl-event.h
 F:     include/uapi/linux/cxl_mem.h
 F:     tools/testing/cxl/
@@ -5413,6 +5416,7 @@ R:        Muchun Song <muchun.song@linux.dev>
 L:     cgroups@vger.kernel.org
 L:     linux-mm@kvack.org
 S:     Maintained
+F:     include/linux/memcontrol.h
 F:     mm/memcontrol.c
 F:     mm/swap_cgroup.c
 F:     samples/cgroup/*
@@ -10155,7 +10159,6 @@ S:      Maintained
 W:     https://i2c.wiki.kernel.org/
 Q:     https://patchwork.ozlabs.org/project/linux-i2c/list/
 T:     git git://git.kernel.org/pub/scm/linux/kernel/git/wsa/linux.git
-F:     Documentation/devicetree/bindings/i2c/i2c.txt
 F:     Documentation/i2c/
 F:     drivers/i2c/*
 F:     include/dt-bindings/i2c/i2c.h
@@ -10375,12 +10378,17 @@ M:    Nayna Jain <nayna@linux.ibm.com>
 M:     Paulo Flabiano Smorigo <pfsmorigo@gmail.com>
 L:     linux-crypto@vger.kernel.org
 S:     Supported
-F:     drivers/crypto/vmx/Kconfig
-F:     drivers/crypto/vmx/Makefile
-F:     drivers/crypto/vmx/aes*
-F:     drivers/crypto/vmx/ghash*
-F:     drivers/crypto/vmx/ppc-xlate.pl
-F:     drivers/crypto/vmx/vmx.c
+F:     arch/powerpc/crypto/Kconfig
+F:     arch/powerpc/crypto/Makefile
+F:     arch/powerpc/crypto/aes.c
+F:     arch/powerpc/crypto/aes_cbc.c
+F:     arch/powerpc/crypto/aes_ctr.c
+F:     arch/powerpc/crypto/aes_xts.c
+F:     arch/powerpc/crypto/aesp8-ppc.*
+F:     arch/powerpc/crypto/ghash.c
+F:     arch/powerpc/crypto/ghashp8-ppc.pl
+F:     arch/powerpc/crypto/ppc-xlate.pl
+F:     arch/powerpc/crypto/vmx.c
 
 IBM ServeRAID RAID DRIVER
 S:     Orphan
@@ -12454,7 +12462,6 @@ F:      drivers/*/*/*pasemi*
 F:     drivers/*/*pasemi*
 F:     drivers/char/tpm/tpm_ibmvtpm*
 F:     drivers/crypto/nx/
-F:     drivers/crypto/vmx/
 F:     drivers/i2c/busses/i2c-opal.c
 F:     drivers/net/ethernet/ibm/ibmveth.*
 F:     drivers/net/ethernet/ibm/ibmvnic.*
@@ -13628,6 +13635,7 @@ T:      git git://linuxtv.org/media_tree.git
 F:     Documentation/devicetree/bindings/media/renesas,csi2.yaml
 F:     Documentation/devicetree/bindings/media/renesas,isp.yaml
 F:     Documentation/devicetree/bindings/media/renesas,vin.yaml
+F:     drivers/media/platform/renesas/rcar-csi2.c
 F:     drivers/media/platform/renesas/rcar-isp.c
 F:     drivers/media/platform/renesas/rcar-vin/
 
@@ -13715,7 +13723,7 @@ L:      dmaengine@vger.kernel.org
 L:     linux-arm-kernel@lists.infradead.org (moderated for non-subscribers)
 L:     linux-mediatek@lists.infradead.org (moderated for non-subscribers)
 S:     Maintained
-F:     Documentation/devicetree/bindings/dma/mtk-*
+F:     Documentation/devicetree/bindings/dma/mediatek,*
 F:     drivers/dma/mediatek/
 
 MEDIATEK ETHERNET DRIVER
@@ -13792,6 +13800,13 @@ F:     Documentation/devicetree/bindings/media/mediatek-vpu.txt
 F:     drivers/media/platform/mediatek/vcodec/
 F:     drivers/media/platform/mediatek/vpu/
 
+MEDIATEK MIPI-CSI CDPHY DRIVER
+M:     Julien Stephan <jstephan@baylibre.com>
+M:     Andy Hsieh <andy.hsieh@mediatek.com>
+S:     Supported
+F:     Documentation/devicetree/bindings/phy/mediatek,mt8365-csi-rx.yaml
+F:     drivers/phy/mediatek/phy-mtk-mipi-csi-0-5*
+
 MEDIATEK MMC/SD/SDIO DRIVER
 M:     Chaotian Jing <chaotian.jing@mediatek.com>
 S:     Maintained
@@ -14144,15 +14159,24 @@ T:    git git://git.kernel.org/pub/scm/linux/kernel/git/akpm/mm
 T:     quilt git://git.kernel.org/pub/scm/linux/kernel/git/akpm/25-new
 F:     include/linux/gfp.h
 F:     include/linux/gfp_types.h
+F:     include/linux/memfd.h
+F:     include/linux/memory.h
 F:     include/linux/memory_hotplug.h
+F:     include/linux/memory-tiers.h
+F:     include/linux/mempolicy.h
+F:     include/linux/mempool.h
+F:     include/linux/memremap.h
 F:     include/linux/mm.h
+F:     include/linux/mm_*.h
 F:     include/linux/mmzone.h
+F:     include/linux/mmu_notifier.h
 F:     include/linux/pagewalk.h
 F:     include/linux/rmap.h
 F:     include/trace/events/ksm.h
 F:     mm/
 F:     tools/mm/
 F:     tools/testing/selftests/mm/
+N:     include/linux/page[-_]*
 
 MEMORY MAPPING
 M:     Andrew Morton <akpm@linux-foundation.org>
@@ -14292,7 +14316,6 @@ F:      drivers/misc/xilinx_tmr_manager.c
 
 MICROCHIP AT91 DMA DRIVERS
 M:     Ludovic Desroches <ludovic.desroches@microchip.com>
-M:     Tudor Ambarus <tudor.ambarus@linaro.org>
 L:     linux-arm-kernel@lists.infradead.org (moderated for non-subscribers)
 L:     dmaengine@vger.kernel.org
 S:     Supported
@@ -14341,9 +14364,8 @@ F:      Documentation/devicetree/bindings/media/microchip,csi2dc.yaml
 F:     drivers/media/platform/microchip/microchip-csi2dc.c
 
 MICROCHIP ECC DRIVER
-M:     Tudor Ambarus <tudor.ambarus@linaro.org>
 L:     linux-crypto@vger.kernel.org
-S:     Maintained
+S:     Orphan
 F:     drivers/crypto/atmel-ecc.*
 
 MICROCHIP EIC DRIVER
@@ -14448,9 +14470,8 @@ S:      Maintained
 F:     drivers/mmc/host/atmel-mci.c
 
 MICROCHIP NAND DRIVER
-M:     Tudor Ambarus <tudor.ambarus@linaro.org>
 L:     linux-mtd@lists.infradead.org
-S:     Supported
+S:     Orphan
 F:     Documentation/devicetree/bindings/mtd/atmel-nand.txt
 F:     drivers/mtd/nand/raw/atmel/*
 
@@ -14729,6 +14750,17 @@ F:     arch/mips/
 F:     drivers/platform/mips/
 F:     include/dt-bindings/mips/
 
+MIPS BAIKAL-T1 PLATFORM
+M:     Serge Semin <fancer.lancer@gmail.com>
+L:     linux-mips@vger.kernel.org
+S:     Supported
+F:     Documentation/devicetree/bindings/bus/baikal,bt1-*.yaml
+F:     Documentation/devicetree/bindings/clock/baikal,bt1-*.yaml
+F:     drivers/bus/bt1-*.c
+F:     drivers/clk/baikal-t1/
+F:     drivers/memory/bt1-l2-ctl.c
+F:     drivers/mtd/maps/physmap-bt1-rom.[ch]
+
 MIPS BOSTON DEVELOPMENT BOARD
 M:     Paul Burton <paulburton@kernel.org>
 L:     linux-mips@vger.kernel.org
@@ -14849,6 +14881,17 @@ W:     https://linuxtv.org
 Q:     http://patchwork.linuxtv.org/project/linux-media/list/
 F:     drivers/media/dvb-frontends/mn88473*
 
+MOBILEYE MIPS SOCS
+M:     Vladimir Kondratiev <vladimir.kondratiev@mobileye.com>
+M:     Gregory CLEMENT <gregory.clement@bootlin.com>
+M:     Théo Lebrun <theo.lebrun@bootlin.com>
+L:     linux-mips@vger.kernel.org
+S:     Maintained
+F:     Documentation/devicetree/bindings/mips/mobileye.yaml
+F:     arch/mips/boot/dts/mobileye/
+F:     arch/mips/configs/eyeq5_defconfig
+F:     arch/mips/mobileye/board-epm5.its.S
+
 MODULE SUPPORT
 M:     Luis Chamberlain <mcgrof@kernel.org>
 L:     linux-modules@vger.kernel.org
@@ -15491,7 +15534,6 @@ M:      Ryusuke Konishi <konishi.ryusuke@gmail.com>
 L:     linux-nilfs@vger.kernel.org
 S:     Supported
 W:     https://nilfs.sourceforge.io/
-W:     https://nilfs.osdn.jp/
 T:     git https://github.com/konis/nilfs2.git
 F:     Documentation/filesystems/nilfs2.rst
 F:     fs/nilfs2/
@@ -16422,8 +16464,8 @@ S:      Supported
 F:     drivers/infiniband/ulp/opa_vnic
 
 OPEN FIRMWARE AND FLATTENED DEVICE TREE
-M:     Rob Herring <robh+dt@kernel.org>
-M:     Frank Rowand <frowand.list@gmail.com>
+M:     Rob Herring <robh@kernel.org>
+M:     Saravana Kannan <saravanak@google.com>
 L:     devicetree@vger.kernel.org
 S:     Maintained
 W:     http://www.devicetree.org/
@@ -16439,7 +16481,7 @@ K:      of_overlay_fdt_apply
 K:     of_overlay_remove
 
 OPEN FIRMWARE AND FLATTENED DEVICE TREE BINDINGS
-M:     Rob Herring <robh+dt@kernel.org>
+M:     Rob Herring <robh@kernel.org>
 M:     Krzysztof Kozlowski <krzysztof.kozlowski+dt@linaro.org>
 M:     Conor Dooley <conor+dt@kernel.org>
 L:     devicetree@vger.kernel.org
@@ -16546,7 +16588,7 @@ M:      Miklos Szeredi <miklos@szeredi.hu>
 M:     Amir Goldstein <amir73il@gmail.com>
 L:     linux-unionfs@vger.kernel.org
 S:     Supported
-T:     git git://git.kernel.org/pub/scm/linux/kernel/git/mszeredi/vfs.git
+T:     git git://git.kernel.org/pub/scm/linux/kernel/git/overlayfs/vfs.git
 F:     Documentation/filesystems/overlayfs.rst
 F:     fs/overlayfs/
 
@@ -17647,7 +17689,7 @@ M:      Joel Granados <j.granados@samsung.com>
 L:     linux-kernel@vger.kernel.org
 L:     linux-fsdevel@vger.kernel.org
 S:     Maintained
-T:     git git://git.kernel.org/pub/scm/linux/kernel/git/mcgrof/linux.git sysctl-next
+T:     git git://git.kernel.org/pub/scm/linux/kernel/git/sysctl/sysctl.git sysctl-next
 F:     fs/proc/proc_sysctl.c
 F:     include/linux/sysctl.h
 F:     kernel/sysctl-test.c
@@ -21993,7 +22035,6 @@ F:      drivers/i2c/busses/i2c-davinci.c
 
 TI DAVINCI SERIES CLOCK DRIVER
 M:     David Lechner <david@lechnology.com>
-R:     Sekhar Nori <nsekhar@ti.com>
 S:     Maintained
 F:     Documentation/devicetree/bindings/clock/ti/davinci/
 F:     drivers/clk/davinci/
@@ -23132,12 +23173,11 @@ L:    kvm@vger.kernel.org
 S:     Maintained
 F:     drivers/vfio/pci/mlx5/
 
-VFIO VIRTIO PCI DRIVER
-M:     Yishai Hadas <yishaih@nvidia.com>
+VFIO NVIDIA GRACE GPU DRIVER
+M:     Ankit Agrawal <ankita@nvidia.com>
 L:     kvm@vger.kernel.org
-L:     virtualization@lists.linux.dev
-S:     Maintained
-F:     drivers/vfio/pci/virtio
+S:     Supported
+F:     drivers/vfio/pci/nvgrace-gpu/
 
 VFIO PCI DEVICE SPECIFIC DRIVERS
 R:     Jason Gunthorpe <jgg@nvidia.com>
@@ -23162,6 +23202,13 @@ L:     kvm@vger.kernel.org
 S:     Maintained
 F:     drivers/vfio/platform/
 
+VFIO VIRTIO PCI DRIVER
+M:     Yishai Hadas <yishaih@nvidia.com>
+L:     kvm@vger.kernel.org
+L:     virtualization@lists.linux.dev
+S:     Maintained
+F:     drivers/vfio/pci/virtio
+
 VGA_SWITCHEROO
 R:     Lukas Wunner <lukas@wunner.de>
 S:     Maintained
@@ -24215,7 +24262,7 @@ XILINX SD-FEC IP CORES
 M:     Derek Kiernan <derek.kiernan@amd.com>
 M:     Dragan Cvetic <dragan.cvetic@amd.com>
 S:     Maintained
-F:     Documentation/devicetree/bindings/misc/xlnx,sd-fec.txt
+F:     Documentation/devicetree/bindings/misc/xlnx,sd-fec.yaml
 F:     Documentation/misc-devices/xilinx_sdfec.rst
 F:     drivers/misc/Kconfig
 F:     drivers/misc/Makefile
@@ -24447,6 +24494,7 @@ ZSWAP COMPRESSED SWAP CACHING
 M:     Johannes Weiner <hannes@cmpxchg.org>
 M:     Yosry Ahmed <yosryahmed@google.com>
 M:     Nhat Pham <nphamcs@gmail.com>
+R:     Chengming Zhou <chengming.zhou@linux.dev>
 L:     linux-mm@kvack.org
 S:     Maintained
 F:     Documentation/admin-guide/mm/zswap.rst
@@ -24454,6 +24502,7 @@ F:      include/linux/zpool.h
 F:     include/linux/zswap.h
 F:     mm/zpool.c
 F:     mm/zswap.c
+F:     tools/testing/selftests/cgroup/test_zswap.c
 
 THE REST
 M:     Linus Torvalds <torvalds@linux-foundation.org>
index 1cbf21c87b8308151bb5ce7677392d9241791b51..6fe2ae1a6d9214a8ff31253b7ac575fb7ef78f22 100644 (file)
--- a/Makefile
+++ b/Makefile
@@ -950,14 +950,6 @@ CC_FLAGS_LTO       += -fvisibility=hidden
 
 # Limit inlining across translation units to reduce binary size
 KBUILD_LDFLAGS += -mllvm -import-instr-limit=5
-
-# Check for frame size exceeding threshold during prolog/epilog insertion
-# when using lld < 13.0.0.
-ifneq ($(CONFIG_FRAME_WARN),0)
-ifeq ($(call test-lt, $(CONFIG_LLD_VERSION), 130000),y)
-KBUILD_LDFLAGS += -plugin-opt=-warn-stack-size=$(CONFIG_FRAME_WARN)
-endif
-endif
 endif
 
 ifdef CONFIG_LTO
index 4092bec198beca44d6c019643db6829293f7320d..99d2845f3feb954d495253e938fb4dec73be1720 100644 (file)
@@ -6,6 +6,7 @@
 config ARC
        def_bool y
        select ARC_TIMERS
+       select ARCH_HAS_CPU_CACHE_ALIASING
        select ARCH_HAS_CACHE_LINE_SIZE
        select ARCH_HAS_DEBUG_VM_PGTABLE
        select ARCH_HAS_DMA_PREP_COHERENT
diff --git a/arch/arc/include/asm/cachetype.h b/arch/arc/include/asm/cachetype.h
new file mode 100644 (file)
index 0000000..05fc7ed
--- /dev/null
@@ -0,0 +1,9 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef __ASM_ARC_CACHETYPE_H
+#define __ASM_ARC_CACHETYPE_H
+
+#include <linux/types.h>
+
+#define cpu_dcache_is_aliasing()       true
+
+#endif
index c46ec54c53631a09b0f59c44c092b800ea940eb6..702d97a9c30471a59dc6d2d3bfaf8436621d63df 100644 (file)
@@ -5,6 +5,7 @@ config ARM
        select ARCH_32BIT_OFF_T
        select ARCH_CORRECT_STACKTRACE_ON_KRETPROBE if HAVE_KRETPROBES && FRAME_POINTER && !ARM_UNWIND
        select ARCH_HAS_BINFMT_FLAT
+       select ARCH_HAS_CPU_CACHE_ALIASING
        select ARCH_HAS_CPU_FINALIZE_INIT if MMU
        select ARCH_HAS_CURRENT_STACK_POINTER
        select ARCH_HAS_DEBUG_VIRTUAL if MMU
index 5fbbac1b708b0a7e11894b17a8ef5d2f6bcc94aa..f1fc278081d0354d1cf027f3c73179f2654b32c4 100644 (file)
@@ -17,7 +17,7 @@ config ARM_PTDUMP_DEBUGFS
          kernel.
          If in doubt, say "N"
 
-config DEBUG_WX
+config ARM_DEBUG_WX
        bool "Warn on W+X mappings at boot"
        depends on MMU
        select ARM_PTDUMP_CORE
index 7cd38de118c320e86fdc91ea1ebb5f148ee2d69c..485863f9c4203db67169163353ddc6f011d61058 100644 (file)
                        status = "disabled";
                };
 
+               nand_controller: nand-controller@1800 {
+                       #address-cells = <1>;
+                       #size-cells = <0>;
+                       compatible = "brcm,nand-bcm63138", "brcm,brcmnand-v7.1", "brcm,brcmnand";
+                       reg = <0x1800 0x600>, <0x2000 0x10>;
+                       reg-names = "nand", "nand-int-base";
+                       status = "disabled";
+
+                       nandcs: nand@0 {
+                               compatible = "brcm,nandcs";
+                               reg = <0>;
+                       };
+               };
+
                uart0: serial@12000 {
                        compatible = "arm,pl011", "arm,primecell";
                        reg = <0x12000 0x1000>;
index 4ef02283612bb416ed1223a8d6afb54e551fc0a7..e74ba6bf370da63d3c115e38b4f20c71baff2116 100644 (file)
                        reg-names = "nand", "nand-int-base";
                        status = "disabled";
                        interrupts = <GIC_SPI 38 IRQ_TYPE_LEVEL_HIGH>;
-                       interrupt-names = "nand";
+                       interrupt-names = "nand_ctlrdy";
+
+                       nandcs: nand@0 {
+                               compatible = "brcm,nandcs";
+                               reg = <0>;
+                       };
                };
 
                serial@4400 {
index 24431de1810ed028651a6b9fb842e2515c01b858..53703827ee3fe58ead1dbe70536d7293ad842d0c 100644 (file)
                        num-cs = <8>;
                        status = "disabled";
                };
+
+               nand_controller: nand-controller@2000 {
+                       #address-cells = <1>;
+                       #size-cells = <0>;
+                       compatible = "brcm,nand-bcm63138", "brcm,brcmnand-v7.1", "brcm,brcmnand";
+                       reg = <0x2000 0x600>, <0xf0 0x10>;
+                       reg-names = "nand", "nand-int-base";
+                       status = "disabled";
+
+                       nandcs: nand@0 {
+                               compatible = "brcm,nandcs";
+                               reg = <0>;
+                       };
+               };
        };
 };
index 3f9aed96babfc75e11bb152aa673c6329f39106d..6d8d33498983acfc0c65ee155f64ddedc4a6b376 100644 (file)
                        status = "disabled";
                };
 
+               nand_controller: nand-controller@1800 {
+                       #address-cells = <1>;
+                       #size-cells = <0>;
+                       compatible = "brcm,nand-bcm63138", "brcm,brcmnand-v7.1", "brcm,brcmnand";
+                       reg = <0x1800 0x600>, <0x2000 0x10>;
+                       reg-names = "nand", "nand-int-base";
+                       status = "disabled";
+
+                       nandcs: nand@0 {
+                               compatible = "brcm,nandcs";
+                               reg = <0>;
+                       };
+               };
+
                uart0: serial@12000 {
                        compatible = "arm,pl011", "arm,primecell";
                        reg = <0x12000 0x1000>;
index 1d8d957d65dd08292e16b5c54b20316b12545e82..6433f8fa5effd76510ceb6b9d4a7083c1b6fabfc 100644 (file)
                        status = "disabled";
                };
 
+               nand_controller: nand-controller@1800 {
+                       #address-cells = <1>;
+                       #size-cells = <0>;
+                       compatible = "brcm,nand-bcm63138", "brcm,brcmnand-v7.1", "brcm,brcmnand";
+                       reg = <0x1800 0x600>, <0x2000 0x10>;
+                       reg-names = "nand", "nand-int-base";
+                       status = "disabled";
+
+                       nandcs: nand@0 {
+                               compatible = "brcm,nandcs";
+                               reg = <0>;
+                       };
+               };
+
                uart0: serial@12000 {
                        compatible = "arm,pl011", "arm,primecell";
                        reg = <0x12000 0x1000>;
index cf92cf8c4693304cf3d0c4dbf36365c5c49ff481..ee361cb00b7ca6ac12aaded84337146c289ff509 100644 (file)
                        num-cs = <8>;
                        status = "disabled";
                };
+
+               nand_controller: nand-controller@1800 {
+                       #address-cells = <1>;
+                       #size-cells = <0>;
+                       compatible = "brcm,nand-bcm63138", "brcm,brcmnand-v7.1", "brcm,brcmnand";
+                       reg = <0x1800 0x600>, <0x2000 0x10>;
+                       reg-names = "nand", "nand-int-base";
+                       status = "disabled";
+
+                       nandcs: nand@0 {
+                               compatible = "brcm,nandcs";
+                               reg = <0>;
+                       };
+               };
        };
 };
index 52d6bc89f9f828a725c51692791e135561429ed1..52915ec6f339335d87b4e50e1c03625fffb9a45d 100644 (file)
                        status = "disabled";
                };
 
+               nand_controller: nand-controller@1800 {
+                       #address-cells = <1>;
+                       #size-cells = <0>;
+                       compatible = "brcm,nand-bcm63138", "brcm,brcmnand-v7.1", "brcm,brcmnand";
+                       reg = <0x1800 0x600>, <0x2000 0x10>;
+                       reg-names = "nand", "nand-int-base";
+                       status = "disabled";
+
+                       nandcs: nand@0 {
+                               compatible = "brcm,nandcs";
+                               reg = <0>;
+                       };
+               };
+
                uart0: serial@12000 {
                        compatible = "arm,pl011", "arm,primecell";
                        reg = <0x12000 0x1000>;
index 2c5d706bac7e3def48e2d7f524a18598e3ce721b..70cf23a65fdb5ac7ed9eabc986f4ebb4df263c43 100644 (file)
                        status = "disabled";
                };
 
+               nand_controller: nand-controller@1800 {
+                       #address-cells = <1>;
+                       #size-cells = <0>;
+                       compatible = "brcm,nand-bcm63138", "brcm,brcmnand-v7.1", "brcm,brcmnand";
+                       reg = <0x1800 0x600>, <0x2000 0x10>;
+                       reg-names = "nand", "nand-int-base";
+                       status = "disabled";
+
+                       nandcs: nand@0 {
+                               compatible = "brcm,nandcs";
+                               reg = <0>;
+                       };
+               };
+
                uart0: serial@12000 {
                        compatible = "arm,pl011", "arm,primecell";
                        reg = <0x12000 0x1000>;
index 93b8ce22678d3ace8ade0cf148d413120369197b..6241485408d3b4058b4a379d93f08ba9b3d0fb0a 100644 (file)
 &hsspi {
        status = "okay";
 };
+
+&nand_controller {
+       brcm,wp-not-connected;
+       status = "okay";
+};
+
+&nandcs {
+       nand-on-flash-bbt;
+       brcm,nand-ecc-use-strap;
+};
index 1b405c2492137a9af8fc38234e06f7b1183c404c..7fd87e05ec20adfafdb33e4e4437a2ac29694a81 100644 (file)
 &hsspi {
        status = "okay";
 };
+
+&nand_controller {
+       brcm,wp-not-connected;
+       status = "okay";
+};
+
+&nandcs {
+       nand-on-flash-bbt;
+       brcm,nand-ecc-use-strap;
+};
index b5af61853a0726089c71703aa831e1e9c3520f36..f60d09908ab964615907f8e70166f68d9d86419a 100644 (file)
 };
 
 &nand_controller {
+       brcm,wp-not-connected;
        status = "okay";
+};
 
-       nand@0 {
-               compatible = "brcm,nandcs";
-               reg = <0>;
-               nand-ecc-strength = <4>;
-               nand-ecc-step-size = <512>;
-               brcm,nand-oob-sectors-size = <16>;
-       };
+&nandcs {
+       nand-ecc-strength = <4>;
+       nand-ecc-step-size = <512>;
+       brcm,nand-oob-sector-size = <16>;
+       nand-on-flash-bbt;
 };
 
 &ahci {
index 1f5d6d783f090f0e92983b1138551139246fb6bb..44bca063a3273d643a908617337380def5a5fa8d 100644 (file)
 &hsspi {
        status = "okay";
 };
+
+&nand_controller {
+       brcm,wp-not-connected;
+       status = "okay";
+};
+
+&nandcs {
+       nand-on-flash-bbt;
+       brcm,nand-ecc-use-strap;
+};
index d036e99dd8d16e526def14b030aa1543890a5284..098a222cd71a476eb8ea0a3ada055a794448e363 100644 (file)
 &hsspi {
        status = "okay";
 };
+
+&nand_controller {
+       brcm,wp-not-connected;
+       status = "okay";
+};
+
+&nandcs {
+       nand-on-flash-bbt;
+       brcm,nand-ecc-use-strap;
+};
index 8b104f3fb14ae76ff16f0bbd47e63e5762ef0c83..402038d3cd0c938f6273cd8a57ef3aec411f289b 100644 (file)
 &hsspi {
        status = "okay";
 };
+
+&nand_controller {
+       brcm,wp-not-connected;
+       status = "okay";
+};
+
+&nandcs {
+       nand-on-flash-bbt;
+       brcm,nand-ecc-use-strap;
+};
index 55852c2296087e79bba798dacab2829d0630e14d..943896afb7cc6899dd97d6ca093689c537b99099 100644 (file)
 &hsspi {
        status = "okay";
 };
+
+&nand_controller {
+       brcm,wp-not-connected;
+       status = "okay";
+};
+
+&nandcs {
+       nand-on-flash-bbt;
+       brcm,nand-ecc-use-strap;
+};
index 2ad880af210440c67c26bdad3f22abf27caf11f2..571663d9a1eac3357996d67c9f50aa7853786a90 100644 (file)
 &hsspi {
        status = "okay";
 };
+
+&nand_controller {
+       brcm,wp-not-connected;
+       status = "okay";
+};
+
+&nandcs {
+       nand-on-flash-bbt;
+       brcm,nand-ecc-use-strap;
+};
index b7af8ade7a9d009ca70e80aec4df148ddd23e285..8d6eddd54c6e48701557e5498c87a437165c2d23 100644 (file)
 &hsspi {
        status = "okay";
 };
+
+&nand_controller {
+       brcm,wp-not-connected;
+       status = "okay";
+};
+
+&nandcs {
+       nand-on-flash-bbt;
+       brcm,nand-ecc-use-strap;
+};
index b3dc0465796f9a81c59d641b05325172c2393a2f..28b724d59e7e23b627814bc85ab24aed5f557b00 100644 (file)
@@ -252,7 +252,7 @@ CONFIG_DEBUG_INFO_REDUCED=y
 CONFIG_GDB_SCRIPTS=y
 CONFIG_STRIP_ASM_SYMS=y
 CONFIG_DEBUG_FS=y
-CONFIG_DEBUG_WX=y
+CONFIG_ARM_DEBUG_WX=y
 CONFIG_SCHED_STACK_END_CHECK=y
 CONFIG_PANIC_ON_OOPS=y
 CONFIG_PANIC_TIMEOUT=-1
index 3fdf4dbfdea5dbb2f53417eb083340989f8bafa1..61cee1e7ebea6187b60053579c8e82bef1dadd24 100644 (file)
@@ -302,7 +302,7 @@ CONFIG_DEBUG_INFO_REDUCED=y
 CONFIG_GDB_SCRIPTS=y
 CONFIG_STRIP_ASM_SYMS=y
 CONFIG_DEBUG_FS=y
-CONFIG_DEBUG_WX=y
+CONFIG_ARM_DEBUG_WX=y
 CONFIG_SCHED_STACK_END_CHECK=y
 CONFIG_PANIC_ON_OOPS=y
 CONFIG_PANIC_TIMEOUT=-1
index 433ee4ddce6c81721978306b547d08693adf4643..f85933fdec75fd3ab627ebb02641debfefe46668 100644 (file)
@@ -24,8 +24,8 @@
 
 #include "sha256_glue.h"
 
-asmlinkage void sha256_block_data_order(u32 *digest, const void *data,
-                                       unsigned int num_blks);
+asmlinkage void sha256_block_data_order(struct sha256_state *state,
+                                       const u8 *data, int num_blks);
 
 int crypto_sha256_arm_update(struct shash_desc *desc, const u8 *data,
                             unsigned int len)
@@ -33,23 +33,20 @@ int crypto_sha256_arm_update(struct shash_desc *desc, const u8 *data,
        /* make sure casting to sha256_block_fn() is safe */
        BUILD_BUG_ON(offsetof(struct sha256_state, state) != 0);
 
-       return sha256_base_do_update(desc, data, len,
-                               (sha256_block_fn *)sha256_block_data_order);
+       return sha256_base_do_update(desc, data, len, sha256_block_data_order);
 }
 EXPORT_SYMBOL(crypto_sha256_arm_update);
 
 static int crypto_sha256_arm_final(struct shash_desc *desc, u8 *out)
 {
-       sha256_base_do_finalize(desc,
-                               (sha256_block_fn *)sha256_block_data_order);
+       sha256_base_do_finalize(desc, sha256_block_data_order);
        return sha256_base_finish(desc, out);
 }
 
 int crypto_sha256_arm_finup(struct shash_desc *desc, const u8 *data,
                            unsigned int len, u8 *out)
 {
-       sha256_base_do_update(desc, data, len,
-                             (sha256_block_fn *)sha256_block_data_order);
+       sha256_base_do_update(desc, data, len, sha256_block_data_order);
        return crypto_sha256_arm_final(desc, out);
 }
 EXPORT_SYMBOL(crypto_sha256_arm_finup);
index 0635a65aa488ba201a10f19fea35fe8e551ccf8f..1be5bd498af36fb710d671228326af983e811903 100644 (file)
@@ -25,27 +25,25 @@ MODULE_ALIAS_CRYPTO("sha512");
 MODULE_ALIAS_CRYPTO("sha384-arm");
 MODULE_ALIAS_CRYPTO("sha512-arm");
 
-asmlinkage void sha512_block_data_order(u64 *state, u8 const *src, int blocks);
+asmlinkage void sha512_block_data_order(struct sha512_state *state,
+                                       u8 const *src, int blocks);
 
 int sha512_arm_update(struct shash_desc *desc, const u8 *data,
                      unsigned int len)
 {
-       return sha512_base_do_update(desc, data, len,
-               (sha512_block_fn *)sha512_block_data_order);
+       return sha512_base_do_update(desc, data, len, sha512_block_data_order);
 }
 
 static int sha512_arm_final(struct shash_desc *desc, u8 *out)
 {
-       sha512_base_do_finalize(desc,
-               (sha512_block_fn *)sha512_block_data_order);
+       sha512_base_do_finalize(desc, sha512_block_data_order);
        return sha512_base_finish(desc, out);
 }
 
 int sha512_arm_finup(struct shash_desc *desc, const u8 *data,
                     unsigned int len, u8 *out)
 {
-       sha512_base_do_update(desc, data, len,
-               (sha512_block_fn *)sha512_block_data_order);
+       sha512_base_do_update(desc, data, len, sha512_block_data_order);
        return sha512_arm_final(desc, out);
 }
 
index e8c30430be33f576bd328b3dc952b4c9c3bfdd47..b9dbe1d4c8fe08bec63ed15a12a2221b032c302e 100644 (file)
@@ -20,6 +20,8 @@ extern unsigned int cacheid;
 #define icache_is_vipt_aliasing()      cacheid_is(CACHEID_VIPT_I_ALIASING)
 #define icache_is_pipt()               cacheid_is(CACHEID_PIPT)
 
+#define cpu_dcache_is_aliasing()       (cache_is_vivt() || cache_is_vipt_aliasing())
+
 /*
  * __LINUX_ARM_ARCH__ is the minimum supported CPU architecture
  * Mask out support which will never be present on newer CPUs.
index 1e1178bf176da64dbdd628e61c3023baeed2e386..5225cb1c803b166ca45f8bef5c4496cbc9510e55 100644 (file)
@@ -18,18 +18,12 @@ static __always_inline __attribute_const__ struct task_struct *get_current(void)
 {
        struct task_struct *cur;
 
-#if __has_builtin(__builtin_thread_pointer) && \
-    defined(CONFIG_CURRENT_POINTER_IN_TPIDRURO) && \
-    !(defined(CONFIG_THUMB2_KERNEL) && \
-      defined(CONFIG_CC_IS_CLANG) && CONFIG_CLANG_VERSION < 130001)
+#if __has_builtin(__builtin_thread_pointer) && defined(CONFIG_CURRENT_POINTER_IN_TPIDRURO)
        /*
         * Use the __builtin helper when available - this results in better
         * code, especially when using GCC in combination with the per-task
         * stack protector, as the compiler will recognize that it needs to
         * load the TLS register only once in every function.
-        *
-        * Clang < 13.0.1 gets this wrong for Thumb2 builds:
-        * https://github.com/ClangBuiltLinux/linux/issues/1485
         */
        cur = __builtin_thread_pointer();
 #elif defined(CONFIG_CURRENT_POINTER_IN_TPIDRURO) || defined(CONFIG_SMP)
index ce543cd9380cd527e571f793abc786e61665e013..b0a262566eb95abdde4a5606f2e364156c00869a 100644 (file)
@@ -213,7 +213,6 @@ static inline pmd_t *pmd_offset(pud_t *pud, unsigned long addr)
 
 #define pmd_pfn(pmd)           (__phys_to_pfn(pmd_val(pmd) & PHYS_MASK))
 
-#define pmd_large(pmd)         (pmd_val(pmd) & 2)
 #define pmd_leaf(pmd)          (pmd_val(pmd) & 2)
 #define pmd_bad(pmd)           (pmd_val(pmd) & 2)
 #define pmd_present(pmd)       (pmd_val(pmd))
index 71c3add6417f91772d2303e779394a52b26a5027..4b1d9eb3908a2278fe9738a05a875a968dfd55c4 100644 (file)
                                                 PMD_TYPE_TABLE)
 #define pmd_sect(pmd)          ((pmd_val(pmd) & PMD_TYPE_MASK) == \
                                                 PMD_TYPE_SECT)
-#define pmd_large(pmd)         pmd_sect(pmd)
 #define pmd_leaf(pmd)          pmd_sect(pmd)
 
 #define pud_clear(pudp)                        \
index d657b84b6bf706a701d3e93a51a18e21755d0bde..be91e376df79e44c1e835eec8a762dac5e83430a 100644 (file)
@@ -209,6 +209,8 @@ static inline void __sync_icache_dcache(pte_t pteval)
 extern void __sync_icache_dcache(pte_t pteval);
 #endif
 
+#define PFN_PTE_SHIFT          PAGE_SHIFT
+
 void set_ptes(struct mm_struct *mm, unsigned long addr,
                      pte_t *ptep, pte_t pteval, unsigned int nr);
 #define set_ptes set_ptes
index aad1d034136cea4f31cb9316ae2cdb9b24adaf20..46a4575146ee85810fc948ef38d614d5082b2d8f 100644 (file)
@@ -32,10 +32,10 @@ void ptdump_check_wx(void);
 
 #endif /* CONFIG_ARM_PTDUMP_CORE */
 
-#ifdef CONFIG_DEBUG_WX
-#define debug_checkwx() ptdump_check_wx()
+#ifdef CONFIG_ARM_DEBUG_WX
+#define arm_debug_checkwx() ptdump_check_wx()
 #else
-#define debug_checkwx() do { } while (0)
+#define arm_debug_checkwx() do { } while (0)
 #endif
 
 #endif /* __ASM_PTDUMP_H */
index 771264d4726a732030c9af167ab535a3395a532b..6a9de826ffd3c035d938b8177d06ab1476cbf951 100644 (file)
@@ -60,6 +60,7 @@ obj-$(CONFIG_DYNAMIC_FTRACE)  += ftrace.o insn.o patch.o
 obj-$(CONFIG_FUNCTION_GRAPH_TRACER)    += ftrace.o insn.o patch.o
 obj-$(CONFIG_JUMP_LABEL)       += jump_label.o insn.o patch.o
 obj-$(CONFIG_KEXEC_CORE)       += machine_kexec.o relocate_kernel.o
+obj-$(CONFIG_VMCORE_INFO)      += vmcore_info.o
 # Main staffs in KPROBES are in arch/arm/probes/ .
 obj-$(CONFIG_KPROBES)          += patch.o insn.o
 obj-$(CONFIG_OABI_COMPAT)      += sys_oabi-compat.o
index 5d07cf9e0044d152e7bb43cc8e3e2e164dd85f16..80ceb5bd2680bc253af2f8affe84d08231d1d4e6 100644 (file)
@@ -198,10 +198,3 @@ void machine_kexec(struct kimage *image)
 
        soft_restart(reboot_entry_phys);
 }
-
-void arch_crash_save_vmcoreinfo(void)
-{
-#ifdef CONFIG_ARM_LPAE
-       VMCOREINFO_CONFIG(ARM_LPAE);
-#endif
-}
index ff2299ce1ad7a3d16260f5141b7a1bbd1b978348..7b33b157fca0dc2007b5cbb714d7d15c6459bfb2 100644 (file)
@@ -979,7 +979,7 @@ static int __init init_machine_late(void)
 }
 late_initcall(init_machine_late);
 
-#ifdef CONFIG_KEXEC
+#ifdef CONFIG_CRASH_RESERVE
 /*
  * The crash region must be aligned to 128MB to avoid
  * zImage relocating below the reserved region.
@@ -1066,7 +1066,7 @@ static void __init reserve_crashkernel(void)
 }
 #else
 static inline void reserve_crashkernel(void) {}
-#endif /* CONFIG_KEXEC */
+#endif /* CONFIG_CRASH_RESERVE*/
 
 void __init hyp_mode_check(void)
 {
diff --git a/arch/arm/kernel/vmcore_info.c b/arch/arm/kernel/vmcore_info.c
new file mode 100644 (file)
index 0000000..1437aba
--- /dev/null
@@ -0,0 +1,10 @@
+// SPDX-License-Identifier: GPL-2.0-only
+
+#include <linux/vmcore_info.h>
+
+void arch_crash_save_vmcoreinfo(void)
+{
+#ifdef CONFIG_ARM_LPAE
+       VMCOREINFO_CONFIG(ARM_LPAE);
+#endif
+}
index a9381095ab3662cc6f2e7fdcbedb387c12efeed2..cd032522d902fff4cf8adc13f6a83d00f344fb12 100644 (file)
@@ -349,12 +349,12 @@ static void walk_pmd(struct pg_state *st, pud_t *pud, unsigned long start)
        for (i = 0; i < PTRS_PER_PMD; i++, pmd++) {
                addr = start + i * PMD_SIZE;
                domain = get_domain_name(pmd);
-               if (pmd_none(*pmd) || pmd_large(*pmd) || !pmd_present(*pmd))
+               if (pmd_none(*pmd) || pmd_leaf(*pmd) || !pmd_present(*pmd))
                        note_page(st, addr, 4, pmd_val(*pmd), domain);
                else
                        walk_pte(st, pmd, addr, domain);
 
-               if (SECTION_SIZE < PMD_SIZE && pmd_large(pmd[1])) {
+               if (SECTION_SIZE < PMD_SIZE && pmd_leaf(pmd[1])) {
                        addr += SECTION_SIZE;
                        pmd++;
                        domain = get_domain_name(pmd);
index a42e4cd11db2949875814e23cb21e8a3203ace60..4c3d78691279d3e4022bce8b2e8dde8df916a19f 100644 (file)
@@ -458,7 +458,7 @@ static int __mark_rodata_ro(void *unused)
 void mark_rodata_ro(void)
 {
        stop_machine(__mark_rodata_ro, NULL, NULL);
-       debug_checkwx();
+       arm_debug_checkwx();
 }
 
 #else
index 674ed71573a84c50ee690ae52afc27984192e87f..c24e29c0b9a48e94684b43add0403faa7cb5d300 100644 (file)
@@ -1814,6 +1814,6 @@ void set_ptes(struct mm_struct *mm, unsigned long addr,
                if (--nr == 0)
                        break;
                ptep++;
-               pte_val(pteval) += PAGE_SIZE;
+               pteval = pte_next_pfn(pteval);
        }
 }
index 6e1a2edb2be0df5d42c6837cb8b0826d7f38267e..77e05d4959f289313488dc5b03d9254db571e50f 100644 (file)
@@ -216,7 +216,6 @@ config ARM64
        select HAVE_HW_BREAKPOINT if PERF_EVENTS
        select HAVE_IOREMAP_PROT
        select HAVE_IRQ_TIME_ACCOUNTING
-       select HAVE_KVM
        select HAVE_MOD_ARCH_SPECIFIC
        select HAVE_NMI
        select HAVE_PERF_EVENTS
@@ -379,8 +378,8 @@ config BROKEN_GAS_INST
 config BUILTIN_RETURN_ADDRESS_STRIPS_PAC
        bool
        # Clang's __builtin_return_adddress() strips the PAC since 12.0.0
-       # https://reviews.llvm.org/D75044
-       default y if CC_IS_CLANG && (CLANG_VERSION >= 120000)
+       # https://github.com/llvm/llvm-project/commit/2a96f47c5ffca84cd774ad402cacd137f4bf45e2
+       default y if CC_IS_CLANG
        # GCC's __builtin_return_address() strips the PAC since 11.1.0,
        # and this was backported to 10.2.0, 9.4.0, 8.5.0, but not earlier
        # https://gcc.gnu.org/bugzilla/show_bug.cgi?id=94891
@@ -1387,7 +1386,6 @@ choice
 
 config CPU_BIG_ENDIAN
        bool "Build big-endian kernel"
-       depends on !LD_IS_LLD || LLD_VERSION >= 130000
        # https://github.com/llvm/llvm-project/commit/1379b150991f70a5782e9a143c2ba5308da1161c
        depends on AS_IS_GNU || AS_VERSION >= 150000
        help
@@ -1519,7 +1517,7 @@ config ARCH_SUPPORTS_CRASH_DUMP
        def_bool y
 
 config ARCH_HAS_GENERIC_CRASHKERNEL_RESERVATION
-       def_bool CRASH_CORE
+       def_bool CRASH_RESERVE
 
 config TRANS_TABLE
        def_bool y
@@ -2018,8 +2016,6 @@ config ARM64_BTI_KERNEL
        depends on !CC_IS_GCC || GCC_VERSION >= 100100
        # https://gcc.gnu.org/bugzilla/show_bug.cgi?id=106671
        depends on !CC_IS_GCC
-       # https://github.com/llvm/llvm-project/commit/a88c722e687e6780dcd6a58718350dc76fcc4cc9
-       depends on !CC_IS_CLANG || CLANG_VERSION >= 120000
        depends on (!FUNCTION_GRAPH_TRACER || DYNAMIC_FTRACE_WITH_ARGS)
        help
          Build the kernel with Branch Target Identification annotations
@@ -2222,13 +2218,22 @@ config STACKPROTECTOR_PER_TASK
 
 config UNWIND_PATCH_PAC_INTO_SCS
        bool "Enable shadow call stack dynamically using code patching"
-       # needs Clang with https://reviews.llvm.org/D111780 incorporated
+       # needs Clang with https://github.com/llvm/llvm-project/commit/de07cde67b5d205d58690be012106022aea6d2b3 incorporated
        depends on CC_IS_CLANG && CLANG_VERSION >= 150000
        depends on ARM64_PTR_AUTH_KERNEL && CC_HAS_BRANCH_PROT_PAC_RET
        depends on SHADOW_CALL_STACK
        select UNWIND_TABLES
        select DYNAMIC_SCS
 
+config ARM64_CONTPTE
+       bool "Contiguous PTE mappings for user memory" if EXPERT
+       depends on TRANSPARENT_HUGEPAGE
+       default y
+       help
+         When enabled, user mappings are configured using the PTE contiguous
+         bit, for any mappings that meet the size and alignment requirements.
+         This reduces TLB pressure and improves performance.
+
 endmenu # "Kernel Features"
 
 menu "Boot options"
index 78204d71ecd21ffbb913656e0a2ce8ffff445d3f..999d937302406975cd3bdf93a9ce33fa8578ab3a 100644 (file)
        };
 };
 
+&nand_controller {
+       brcm,wp-not-connected;
+       status = "okay";
+};
+
 &nandcs {
        nand-ecc-strength = <4>;
        nand-ecc-step-size = <512>;
index fcf092c81b595ec2242f8622ab113dfa4a2cda68..19fc03ef47a08c508b88cd69fcff39706c4ce025 100644 (file)
        };
 };
 
+&nand_controller {
+       brcm,wp-not-connected;
+       status = "okay";
+};
+
 &nandcs {
        nand-ecc-strength = <4>;
        nand-ecc-step-size = <512>;
index d94a53d68320b3e764ef202b2e610cc094d9a204..52f928dbfa3cd7a878ace893b2d56a95b3f74ef3 100644 (file)
        };
 };
 
+&nand_controller {
+       brcm,wp-not-connected;
+       status = "okay";
+};
+
 &nandcs {
        nand-ecc-strength = <4>;
        nand-ecc-step-size = <512>;
        nand-on-flash-bbt;
-       brcm,nand-has-wp;
 
        #address-cells = <1>;
        #size-cells = <0>;
index 2f124b027bbf0a2507edcbe16614667bf7232536..336016e334d9374741ad2d18989d7922efed6082 100644 (file)
                        status = "disabled";
                };
 
-               nand-controller@1800 {
+               nand_controller: nand-controller@1800 {
                        #address-cells = <1>;
                        #size-cells = <0>;
                        compatible = "brcm,nand-bcm63138", "brcm,brcmnand-v7.1", "brcm,brcmnand";
                        reg-names = "nand", "nand-int-base";
                        interrupts = <GIC_SPI 37 IRQ_TYPE_LEVEL_HIGH>;
                        interrupt-names = "nand_ctlrdy";
-                       status = "okay";
+                       status = "disabled";
 
                        nandcs: nand@0 {
                                compatible = "brcm,nandcs";
index d658c81f7285ece1eb405bc8781a493fcd0713f3..14b2adfb817c2ae6e62ef078a7d5206521fd9864 100644 (file)
                        status = "disabled";
                };
 
+               nand_controller: nand-controller@1800 {
+                       #address-cells = <1>;
+                       #size-cells = <0>;
+                       compatible = "brcm,nand-bcm63138", "brcm,brcmnand-v7.1", "brcm,brcmnand";
+                       reg = <0x1800 0x600>, <0x2000 0x10>;
+                       reg-names = "nand", "nand-int-base";
+                       status = "disabled";
+
+                       nandcs: nand@0 {
+                               compatible = "brcm,nandcs";
+                               reg = <0>;
+                       };
+               };
+
                uart0: serial@12000 {
                        compatible = "arm,pl011", "arm,primecell";
                        reg = <0x12000 0x1000>;
index 4f474d47022e261bafd0e876d1fa3ea3452ece36..589b8a1efc72f8c0435580cb3fddc5ea86d1c5c1 100644 (file)
                        status = "disabled";
                };
 
+               nand_controller: nand-controller@1800 {
+                       #address-cells = <1>;
+                       #size-cells = <0>;
+                       compatible = "brcm,nand-bcm63138", "brcm,brcmnand-v7.1", "brcm,brcmnand";
+                       reg = <0x1800 0x600>, <0x2000 0x10>;
+                       reg-names = "nand", "nand-int-base";
+                       status = "disabled";
+
+                       nandcs: nand@0 {
+                               compatible = "brcm,nandcs";
+                               reg = <0>;
+                       };
+               };
+
                uart0: serial@12000 {
                        compatible = "arm,pl011", "arm,primecell";
                        reg = <0x12000 0x1000>;
index 909f254dc47d1112549b0e3cf9576f18b3566fce..48d618e75866452a64adfdc781ac0ea3c2eff3e8 100644 (file)
                        status = "disabled";
                };
 
+               nand_controller: nand-controller@1800 {
+                       #address-cells = <1>;
+                       #size-cells = <0>;
+                       compatible = "brcm,nand-bcm63138", "brcm,brcmnand-v7.1", "brcm,brcmnand";
+                       reg = <0x1800 0x600>, <0x2000 0x10>;
+                       reg-names = "nand", "nand-int-base";
+                       status = "disabled";
+
+                       nandcs: nand@0 {
+                               compatible = "brcm,nandcs";
+                               reg = <0>;
+                       };
+               };
+
                uart0: serial@12000 {
                        compatible = "arm,pl011", "arm,primecell";
                        reg = <0x12000 0x1000>;
index 685ae32951c9c7313e509e046fe8df6df3ffc10d..1d1303cf90f307a100f15cddc1d70bdc8a25142a 100644 (file)
                        status = "disabled";
                };
 
+               nand_controller: nand-controller@1800 {
+                       #address-cells = <1>;
+                       #size-cells = <0>;
+                       compatible = "brcm,nand-bcm63138", "brcm,brcmnand-v7.1", "brcm,brcmnand";
+                       reg = <0x1800 0x600>, <0x2000 0x10>;
+                       reg-names = "nand", "nand-int-base";
+                       status = "disabled";
+
+                       nandcs: nand@0 {
+                               compatible = "brcm,nandcs";
+                               reg = <0>;
+                       };
+               };
+
                uart0: serial@12000 {
                        compatible = "arm,pl011", "arm,primecell";
                        reg = <0x12000 0x1000>;
index 820553ce541b4fd12f2fbcbf7bd1cbb73c42b512..00c62c1e5df00c722884a7adfcb7be08a43c0dc3 100644 (file)
                        num-cs = <8>;
                        status = "disabled";
                };
+
+               nand_controller: nand-controller@1800 {
+                       #address-cells = <1>;
+                       #size-cells = <0>;
+                       compatible = "brcm,nand-bcm63138", "brcm,brcmnand-v7.1", "brcm,brcmnand";
+                       reg = <0x1800 0x600>, <0x2000 0x10>;
+                       reg-names = "nand", "nand-int-base";
+                       status = "disabled";
+
+                       nandcs: nand@0 {
+                               compatible = "brcm,nandcs";
+                               reg = <0>;
+                       };
+               };
        };
 };
index 0eb93c298297221787ff0ee1bfc1d7f121baee63..caeaf428dc15db3089bf4dc62c4a272782c22c3f 100644 (file)
                        num-cs = <8>;
                        status = "disabled";
                };
+
+               nand_controller: nand-controller@1800 {
+                       #address-cells = <1>;
+                       #size-cells = <0>;
+                       compatible = "brcm,nand-bcm63138", "brcm,brcmnand-v7.1", "brcm,brcmnand";
+                       reg = <0x1800 0x600>, <0x2000 0x10>;
+                       reg-names = "nand", "nand-int-base";
+                       status = "disabled";
+
+                       nandcs: nand@0 {
+                               compatible = "brcm,nandcs";
+                               reg = <0>;
+                       };
+               };
        };
 };
index c4e6e71f63107dbb13a8a498c8ec27bde9c974c4..030ffa5364fbc1245abf693185d4520d59e72f65 100644 (file)
 &hsspi {
        status = "okay";
 };
+
+&nand_controller {
+       brcm,wp-not-connected;
+       status = "okay";
+};
+
+&nandcs {
+       nand-on-flash-bbt;
+       brcm,nand-ecc-use-strap;
+};
index e69cd683211a99eca9faa95b54c2a69350a7f3d9..4b779e6c22e1cc031e94108cb435565022aecbfb 100644 (file)
 &hsspi {
        status = "okay";
 };
+
+&nand_controller {
+       brcm,wp-not-connected;
+       status = "okay";
+};
+
+&nandcs {
+       nand-on-flash-bbt;
+       brcm,nand-ecc-use-strap;
+};
index db2c82d6dfd82823b5ebc368c40f26ef940e211c..2851e8e41bf4e0ae9d0175e6d5237444751fbca2 100644 (file)
 &hsspi {
        status = "okay";
 };
+
+&nand_controller {
+       brcm,wp-not-connected;
+       status = "okay";
+};
+
+&nandcs {
+       nand-on-flash-bbt;
+       brcm,nand-ecc-use-strap;
+};
index 25c12bc63545d6ceb9570af5632fb36dea6fdcf2..17dc594fe83f213128ddb33ea0ec3caa3fe31c04 100644 (file)
 &hsspi {
        status = "okay";
 };
+
+&nand_controller {
+       brcm,wp-not-connected;
+       status = "okay";
+};
+
+&nandcs {
+       nand-on-flash-bbt;
+       brcm,nand-ecc-use-strap;
+};
index faba21f031203ff6f17ee3d85d9f28e63f8bb991..34832a734734059d7f6d50f112858bfdeb7d3f88 100644 (file)
 &hsspi {
        status = "okay";
 };
+
+&nand_controller {
+       brcm,wp-not-connected;
+       status = "okay";
+};
+
+&nandcs {
+       nand-on-flash-bbt;
+       brcm,nand-ecc-use-strap;
+};
index 9808331eede2cb34dc86bc609e41d14cd5e4b392..e1396b5544b7ccc87ecee71696b64b98ea37c33a 100644 (file)
 &hsspi {
        status = "okay";
 };
+
+&nand_controller {
+       brcm,wp-not-connected;
+       status = "okay";
+};
+
+&nandcs {
+       nand-on-flash-bbt;
+       brcm,nand-ecc-use-strap;
+};
index 1f561c8e13b0ece2c3555a29f51707cca2e67087..30bbf6f2917e75dfd1a202ef0db066628109a042 100644 (file)
 &hsspi {
        status = "okay";
 };
+
+&nand_controller {
+       brcm,wp-not-connected;
+       status = "okay";
+};
+
+&nandcs {
+       nand-on-flash-bbt;
+       brcm,nand-ecc-use-strap;
+};
index eb7b423ba46350838b8008f08a11a7a8b60af554..e7d9bd8e4709b6c4562fb07d0f4352a029b6ad06 100644 (file)
@@ -268,6 +268,7 @@ config CRYPTO_AES_ARM64_CE_CCM
        depends on ARM64 && KERNEL_MODE_NEON
        select CRYPTO_ALGAPI
        select CRYPTO_AES_ARM64_CE
+       select CRYPTO_AES_ARM64_CE_BLK
        select CRYPTO_AEAD
        select CRYPTO_LIB_AES
        help
index b03f7f71f893cd0fa376a625b63f43314b9dbb8c..f2624238fd9543d9894eea87f4e0c86df31be941 100644 (file)
@@ -1,8 +1,11 @@
 /* SPDX-License-Identifier: GPL-2.0-only */
 /*
- * aesce-ccm-core.S - AES-CCM transform for ARMv8 with Crypto Extensions
+ * aes-ce-ccm-core.S - AES-CCM transform for ARMv8 with Crypto Extensions
  *
- * Copyright (C) 2013 - 2017 Linaro Ltd <ard.biesheuvel@linaro.org>
+ * Copyright (C) 2013 - 2017 Linaro Ltd.
+ * Copyright (C) 2024 Google LLC
+ *
+ * Author: Ard Biesheuvel <ardb@kernel.org>
  */
 
 #include <linux/linkage.h>
        .text
        .arch   armv8-a+crypto
 
-       /*
-        * u32 ce_aes_ccm_auth_data(u8 mac[], u8 const in[], u32 abytes,
-        *                          u32 macp, u8 const rk[], u32 rounds);
-        */
-SYM_FUNC_START(ce_aes_ccm_auth_data)
-       ld1     {v0.16b}, [x0]                  /* load mac */
-       cbz     w3, 1f
-       sub     w3, w3, #16
-       eor     v1.16b, v1.16b, v1.16b
-0:     ldrb    w7, [x1], #1                    /* get 1 byte of input */
-       subs    w2, w2, #1
-       add     w3, w3, #1
-       ins     v1.b[0], w7
-       ext     v1.16b, v1.16b, v1.16b, #1      /* rotate in the input bytes */
-       beq     8f                              /* out of input? */
-       cbnz    w3, 0b
-       eor     v0.16b, v0.16b, v1.16b
-1:     ld1     {v3.4s}, [x4]                   /* load first round key */
-       prfm    pldl1strm, [x1]
-       cmp     w5, #12                         /* which key size? */
-       add     x6, x4, #16
-       sub     w7, w5, #2                      /* modified # of rounds */
-       bmi     2f
-       bne     5f
-       mov     v5.16b, v3.16b
-       b       4f
-2:     mov     v4.16b, v3.16b
-       ld1     {v5.4s}, [x6], #16              /* load 2nd round key */
-3:     aese    v0.16b, v4.16b
-       aesmc   v0.16b, v0.16b
-4:     ld1     {v3.4s}, [x6], #16              /* load next round key */
-       aese    v0.16b, v5.16b
-       aesmc   v0.16b, v0.16b
-5:     ld1     {v4.4s}, [x6], #16              /* load next round key */
-       subs    w7, w7, #3
-       aese    v0.16b, v3.16b
-       aesmc   v0.16b, v0.16b
-       ld1     {v5.4s}, [x6], #16              /* load next round key */
-       bpl     3b
-       aese    v0.16b, v4.16b
-       subs    w2, w2, #16                     /* last data? */
-       eor     v0.16b, v0.16b, v5.16b          /* final round */
-       bmi     6f
-       ld1     {v1.16b}, [x1], #16             /* load next input block */
-       eor     v0.16b, v0.16b, v1.16b          /* xor with mac */
-       bne     1b
-6:     st1     {v0.16b}, [x0]                  /* store mac */
-       beq     10f
-       adds    w2, w2, #16
-       beq     10f
-       mov     w3, w2
-7:     ldrb    w7, [x1], #1
-       umov    w6, v0.b[0]
-       eor     w6, w6, w7
-       strb    w6, [x0], #1
-       subs    w2, w2, #1
-       beq     10f
-       ext     v0.16b, v0.16b, v0.16b, #1      /* rotate out the mac bytes */
-       b       7b
-8:     cbz     w3, 91f
-       mov     w7, w3
-       add     w3, w3, #16
-9:     ext     v1.16b, v1.16b, v1.16b, #1
-       adds    w7, w7, #1
-       bne     9b
-91:    eor     v0.16b, v0.16b, v1.16b
-       st1     {v0.16b}, [x0]
-10:    mov     w0, w3
-       ret
-SYM_FUNC_END(ce_aes_ccm_auth_data)
+       .macro  load_round_keys, rk, nr, tmp
+       sub     w\tmp, \nr, #10
+       add     \tmp, \rk, w\tmp, sxtw #4
+       ld1     {v10.4s-v13.4s}, [\rk]
+       ld1     {v14.4s-v17.4s}, [\tmp], #64
+       ld1     {v18.4s-v21.4s}, [\tmp], #64
+       ld1     {v3.4s-v5.4s}, [\tmp]
+       .endm
 
-       /*
-        * void ce_aes_ccm_final(u8 mac[], u8 const ctr[], u8 const rk[],
-        *                       u32 rounds);
-        */
-SYM_FUNC_START(ce_aes_ccm_final)
-       ld1     {v3.4s}, [x2], #16              /* load first round key */
-       ld1     {v0.16b}, [x0]                  /* load mac */
-       cmp     w3, #12                         /* which key size? */
-       sub     w3, w3, #2                      /* modified # of rounds */
-       ld1     {v1.16b}, [x1]                  /* load 1st ctriv */
-       bmi     0f
-       bne     3f
-       mov     v5.16b, v3.16b
-       b       2f
-0:     mov     v4.16b, v3.16b
-1:     ld1     {v5.4s}, [x2], #16              /* load next round key */
-       aese    v0.16b, v4.16b
-       aesmc   v0.16b, v0.16b
-       aese    v1.16b, v4.16b
-       aesmc   v1.16b, v1.16b
-2:     ld1     {v3.4s}, [x2], #16              /* load next round key */
-       aese    v0.16b, v5.16b
-       aesmc   v0.16b, v0.16b
-       aese    v1.16b, v5.16b
-       aesmc   v1.16b, v1.16b
-3:     ld1     {v4.4s}, [x2], #16              /* load next round key */
-       subs    w3, w3, #3
-       aese    v0.16b, v3.16b
-       aesmc   v0.16b, v0.16b
-       aese    v1.16b, v3.16b
-       aesmc   v1.16b, v1.16b
-       bpl     1b
-       aese    v0.16b, v4.16b
-       aese    v1.16b, v4.16b
-       /* final round key cancels out */
-       eor     v0.16b, v0.16b, v1.16b          /* en-/decrypt the mac */
-       st1     {v0.16b}, [x0]                  /* store result */
-       ret
-SYM_FUNC_END(ce_aes_ccm_final)
+       .macro  dround, va, vb, vk
+       aese    \va\().16b, \vk\().16b
+       aesmc   \va\().16b, \va\().16b
+       aese    \vb\().16b, \vk\().16b
+       aesmc   \vb\().16b, \vb\().16b
+       .endm
+
+       .macro  aes_encrypt, va, vb, nr
+       tbz     \nr, #2, .L\@
+       dround  \va, \vb, v10
+       dround  \va, \vb, v11
+       tbz     \nr, #1, .L\@
+       dround  \va, \vb, v12
+       dround  \va, \vb, v13
+.L\@:  .irp    v, v14, v15, v16, v17, v18, v19, v20, v21, v3
+       dround  \va, \vb, \v
+       .endr
+       aese    \va\().16b, v4.16b
+       aese    \vb\().16b, v4.16b
+       .endm
 
        .macro  aes_ccm_do_crypt,enc
-       cbz     x2, 5f
-       ldr     x8, [x6, #8]                    /* load lower ctr */
+       load_round_keys x3, w4, x10
+
        ld1     {v0.16b}, [x5]                  /* load mac */
+       cbz     x2, ce_aes_ccm_final
+       ldr     x8, [x6, #8]                    /* load lower ctr */
 CPU_LE(        rev     x8, x8                  )       /* keep swabbed ctr in reg */
 0:     /* outer loop */
        ld1     {v1.8b}, [x6]                   /* load upper ctr */
        prfm    pldl1strm, [x1]
        add     x8, x8, #1
        rev     x9, x8
-       cmp     w4, #12                         /* which key size? */
-       sub     w7, w4, #2                      /* get modified # of rounds */
        ins     v1.d[1], x9                     /* no carry in lower ctr */
-       ld1     {v3.4s}, [x3]                   /* load first round key */
-       add     x10, x3, #16
-       bmi     1f
-       bne     4f
-       mov     v5.16b, v3.16b
-       b       3f
-1:     mov     v4.16b, v3.16b
-       ld1     {v5.4s}, [x10], #16             /* load 2nd round key */
-2:     /* inner loop: 3 rounds, 2x interleaved */
-       aese    v0.16b, v4.16b
-       aesmc   v0.16b, v0.16b
-       aese    v1.16b, v4.16b
-       aesmc   v1.16b, v1.16b
-3:     ld1     {v3.4s}, [x10], #16             /* load next round key */
-       aese    v0.16b, v5.16b
-       aesmc   v0.16b, v0.16b
-       aese    v1.16b, v5.16b
-       aesmc   v1.16b, v1.16b
-4:     ld1     {v4.4s}, [x10], #16             /* load next round key */
-       subs    w7, w7, #3
-       aese    v0.16b, v3.16b
-       aesmc   v0.16b, v0.16b
-       aese    v1.16b, v3.16b
-       aesmc   v1.16b, v1.16b
-       ld1     {v5.4s}, [x10], #16             /* load next round key */
-       bpl     2b
-       aese    v0.16b, v4.16b
-       aese    v1.16b, v4.16b
+
+       aes_encrypt     v0, v1, w4
+
        subs    w2, w2, #16
-       bmi     6f                              /* partial block? */
+       bmi     ce_aes_ccm_crypt_tail
        ld1     {v2.16b}, [x1], #16             /* load next input block */
        .if     \enc == 1
        eor     v2.16b, v2.16b, v5.16b          /* final round enc+mac */
-       eor     v1.16b, v1.16b, v2.16b          /* xor with crypted ctr */
+       eor     v6.16b, v1.16b, v2.16b          /* xor with crypted ctr */
        .else
        eor     v2.16b, v2.16b, v1.16b          /* xor with crypted ctr */
-       eor     v1.16b, v2.16b, v5.16b          /* final round enc */
+       eor     v6.16b, v2.16b, v5.16b          /* final round enc */
        .endif
        eor     v0.16b, v0.16b, v2.16b          /* xor mac with pt ^ rk[last] */
-       st1     {v1.16b}, [x0], #16             /* write output block */
+       st1     {v6.16b}, [x0], #16             /* write output block */
        bne     0b
 CPU_LE(        rev     x8, x8                  )
-       st1     {v0.16b}, [x5]                  /* store mac */
        str     x8, [x6, #8]                    /* store lsb end of ctr (BE) */
-5:     ret
-
-6:     eor     v0.16b, v0.16b, v5.16b          /* final round mac */
-       eor     v1.16b, v1.16b, v5.16b          /* final round enc */
+       cbnz    x7, ce_aes_ccm_final
        st1     {v0.16b}, [x5]                  /* store mac */
-       add     w2, w2, #16                     /* process partial tail block */
-7:     ldrb    w9, [x1], #1                    /* get 1 byte of input */
-       umov    w6, v1.b[0]                     /* get top crypted ctr byte */
-       umov    w7, v0.b[0]                     /* get top mac byte */
-       .if     \enc == 1
-       eor     w7, w7, w9
-       eor     w9, w9, w6
-       .else
-       eor     w9, w9, w6
-       eor     w7, w7, w9
-       .endif
-       strb    w9, [x0], #1                    /* store out byte */
-       strb    w7, [x5], #1                    /* store mac byte */
-       subs    w2, w2, #1
-       beq     5b
-       ext     v0.16b, v0.16b, v0.16b, #1      /* shift out mac byte */
-       ext     v1.16b, v1.16b, v1.16b, #1      /* shift out ctr byte */
-       b       7b
+       ret
        .endm
 
+SYM_FUNC_START_LOCAL(ce_aes_ccm_crypt_tail)
+       eor     v0.16b, v0.16b, v5.16b          /* final round mac */
+       eor     v1.16b, v1.16b, v5.16b          /* final round enc */
+
+       add     x1, x1, w2, sxtw                /* rewind the input pointer (w2 < 0) */
+       add     x0, x0, w2, sxtw                /* rewind the output pointer */
+
+       adr_l   x8, .Lpermute                   /* load permute vectors */
+       add     x9, x8, w2, sxtw
+       sub     x8, x8, w2, sxtw
+       ld1     {v7.16b-v8.16b}, [x9]
+       ld1     {v9.16b}, [x8]
+
+       ld1     {v2.16b}, [x1]                  /* load a full block of input */
+       tbl     v1.16b, {v1.16b}, v7.16b        /* move keystream to end of register */
+       eor     v7.16b, v2.16b, v1.16b          /* encrypt partial input block */
+       bif     v2.16b, v7.16b, v22.16b         /* select plaintext */
+       tbx     v7.16b, {v6.16b}, v8.16b        /* insert output from previous iteration */
+       tbl     v2.16b, {v2.16b}, v9.16b        /* copy plaintext to start of v2 */
+       eor     v0.16b, v0.16b, v2.16b          /* fold plaintext into mac */
+
+       st1     {v7.16b}, [x0]                  /* store output block */
+       cbz     x7, 0f
+
+SYM_INNER_LABEL(ce_aes_ccm_final, SYM_L_LOCAL)
+       ld1     {v1.16b}, [x7]                  /* load 1st ctriv */
+
+       aes_encrypt     v0, v1, w4
+
+       /* final round key cancels out */
+       eor     v0.16b, v0.16b, v1.16b          /* en-/decrypt the mac */
+0:     st1     {v0.16b}, [x5]                  /* store result */
+       ret
+SYM_FUNC_END(ce_aes_ccm_crypt_tail)
+
        /*
         * void ce_aes_ccm_encrypt(u8 out[], u8 const in[], u32 cbytes,
         *                         u8 const rk[], u32 rounds, u8 mac[],
-        *                         u8 ctr[]);
+        *                         u8 ctr[], u8 const final_iv[]);
         * void ce_aes_ccm_decrypt(u8 out[], u8 const in[], u32 cbytes,
         *                         u8 const rk[], u32 rounds, u8 mac[],
-        *                         u8 ctr[]);
+        *                         u8 ctr[], u8 const final_iv[]);
         */
 SYM_FUNC_START(ce_aes_ccm_encrypt)
+       movi    v22.16b, #255
        aes_ccm_do_crypt        1
 SYM_FUNC_END(ce_aes_ccm_encrypt)
 
 SYM_FUNC_START(ce_aes_ccm_decrypt)
+       movi    v22.16b, #0
        aes_ccm_do_crypt        0
 SYM_FUNC_END(ce_aes_ccm_decrypt)
+
+       .section ".rodata", "a"
+       .align  6
+       .fill   15, 1, 0xff
+.Lpermute:
+       .byte   0x0, 0x1, 0x2, 0x3, 0x4, 0x5, 0x6, 0x7
+       .byte   0x8, 0x9, 0xa, 0xb, 0xc, 0xd, 0xe, 0xf
+       .fill   15, 1, 0xff
index 25cd3808ecbe6757551964147a7c636ead0fc199..ce9b28e3c7d63462c02827b177dc22633edb5474 100644 (file)
@@ -1,8 +1,11 @@
 // SPDX-License-Identifier: GPL-2.0-only
 /*
- * aes-ccm-glue.c - AES-CCM transform for ARMv8 with Crypto Extensions
+ * aes-ce-ccm-glue.c - AES-CCM transform for ARMv8 with Crypto Extensions
  *
- * Copyright (C) 2013 - 2017 Linaro Ltd <ard.biesheuvel@linaro.org>
+ * Copyright (C) 2013 - 2017 Linaro Ltd.
+ * Copyright (C) 2024 Google LLC
+ *
+ * Author: Ard Biesheuvel <ardb@kernel.org>
  */
 
 #include <asm/neon.h>
@@ -15,6 +18,8 @@
 
 #include "aes-ce-setkey.h"
 
+MODULE_IMPORT_NS(CRYPTO_INTERNAL);
+
 static int num_rounds(struct crypto_aes_ctx *ctx)
 {
        /*
@@ -27,19 +32,17 @@ static int num_rounds(struct crypto_aes_ctx *ctx)
        return 6 + ctx->key_length / 4;
 }
 
-asmlinkage u32 ce_aes_ccm_auth_data(u8 mac[], u8 const in[], u32 abytes,
-                                   u32 macp, u32 const rk[], u32 rounds);
+asmlinkage u32 ce_aes_mac_update(u8 const in[], u32 const rk[], int rounds,
+                                int blocks, u8 dg[], int enc_before,
+                                int enc_after);
 
 asmlinkage void ce_aes_ccm_encrypt(u8 out[], u8 const in[], u32 cbytes,
                                   u32 const rk[], u32 rounds, u8 mac[],
-                                  u8 ctr[]);
+                                  u8 ctr[], u8 const final_iv[]);
 
 asmlinkage void ce_aes_ccm_decrypt(u8 out[], u8 const in[], u32 cbytes,
                                   u32 const rk[], u32 rounds, u8 mac[],
-                                  u8 ctr[]);
-
-asmlinkage void ce_aes_ccm_final(u8 mac[], u8 const ctr[], u32 const rk[],
-                                u32 rounds);
+                                  u8 ctr[], u8 const final_iv[]);
 
 static int ccm_setkey(struct crypto_aead *tfm, const u8 *in_key,
                      unsigned int key_len)
@@ -94,6 +97,41 @@ static int ccm_init_mac(struct aead_request *req, u8 maciv[], u32 msglen)
        return 0;
 }
 
+static u32 ce_aes_ccm_auth_data(u8 mac[], u8 const in[], u32 abytes,
+                               u32 macp, u32 const rk[], u32 rounds)
+{
+       int enc_after = (macp + abytes) % AES_BLOCK_SIZE;
+
+       do {
+               u32 blocks = abytes / AES_BLOCK_SIZE;
+
+               if (macp == AES_BLOCK_SIZE || (!macp && blocks > 0)) {
+                       u32 rem = ce_aes_mac_update(in, rk, rounds, blocks, mac,
+                                                   macp, enc_after);
+                       u32 adv = (blocks - rem) * AES_BLOCK_SIZE;
+
+                       macp = enc_after ? 0 : AES_BLOCK_SIZE;
+                       in += adv;
+                       abytes -= adv;
+
+                       if (unlikely(rem)) {
+                               kernel_neon_end();
+                               kernel_neon_begin();
+                               macp = 0;
+                       }
+               } else {
+                       u32 l = min(AES_BLOCK_SIZE - macp, abytes);
+
+                       crypto_xor(&mac[macp], in, l);
+                       in += l;
+                       macp += l;
+                       abytes -= l;
+               }
+       } while (abytes > 0);
+
+       return macp;
+}
+
 static void ccm_calculate_auth_mac(struct aead_request *req, u8 mac[])
 {
        struct crypto_aead *aead = crypto_aead_reqtfm(req);
@@ -101,7 +139,7 @@ static void ccm_calculate_auth_mac(struct aead_request *req, u8 mac[])
        struct __packed { __be16 l; __be32 h; u16 len; } ltag;
        struct scatter_walk walk;
        u32 len = req->assoclen;
-       u32 macp = 0;
+       u32 macp = AES_BLOCK_SIZE;
 
        /* prepend the AAD with a length tag */
        if (len < 0xff00) {
@@ -125,16 +163,11 @@ static void ccm_calculate_auth_mac(struct aead_request *req, u8 mac[])
                        scatterwalk_start(&walk, sg_next(walk.sg));
                        n = scatterwalk_clamp(&walk, len);
                }
-               n = min_t(u32, n, SZ_4K); /* yield NEON at least every 4k */
                p = scatterwalk_map(&walk);
 
                macp = ce_aes_ccm_auth_data(mac, p, n, macp, ctx->key_enc,
                                            num_rounds(ctx));
 
-               if (len / SZ_4K > (len - n) / SZ_4K) {
-                       kernel_neon_end();
-                       kernel_neon_begin();
-               }
                len -= n;
 
                scatterwalk_unmap(p);
@@ -149,7 +182,7 @@ static int ccm_encrypt(struct aead_request *req)
        struct crypto_aes_ctx *ctx = crypto_aead_ctx(aead);
        struct skcipher_walk walk;
        u8 __aligned(8) mac[AES_BLOCK_SIZE];
-       u8 buf[AES_BLOCK_SIZE];
+       u8 orig_iv[AES_BLOCK_SIZE];
        u32 len = req->cryptlen;
        int err;
 
@@ -158,42 +191,55 @@ static int ccm_encrypt(struct aead_request *req)
                return err;
 
        /* preserve the original iv for the final round */
-       memcpy(buf, req->iv, AES_BLOCK_SIZE);
+       memcpy(orig_iv, req->iv, AES_BLOCK_SIZE);
 
        err = skcipher_walk_aead_encrypt(&walk, req, false);
+       if (unlikely(err))
+               return err;
 
        kernel_neon_begin();
 
        if (req->assoclen)
                ccm_calculate_auth_mac(req, mac);
 
-       while (walk.nbytes) {
+       do {
                u32 tail = walk.nbytes % AES_BLOCK_SIZE;
-               bool final = walk.nbytes == walk.total;
+               const u8 *src = walk.src.virt.addr;
+               u8 *dst = walk.dst.virt.addr;
+               u8 buf[AES_BLOCK_SIZE];
+               u8 *final_iv = NULL;
 
-               if (final)
+               if (walk.nbytes == walk.total) {
                        tail = 0;
+                       final_iv = orig_iv;
+               }
 
-               ce_aes_ccm_encrypt(walk.dst.virt.addr, walk.src.virt.addr,
-                                  walk.nbytes - tail, ctx->key_enc,
-                                  num_rounds(ctx), mac, walk.iv);
+               if (unlikely(walk.nbytes < AES_BLOCK_SIZE))
+                       src = dst = memcpy(&buf[sizeof(buf) - walk.nbytes],
+                                          src, walk.nbytes);
 
-               if (!final)
-                       kernel_neon_end();
-               err = skcipher_walk_done(&walk, tail);
-               if (!final)
-                       kernel_neon_begin();
-       }
+               ce_aes_ccm_encrypt(dst, src, walk.nbytes - tail,
+                                  ctx->key_enc, num_rounds(ctx),
+                                  mac, walk.iv, final_iv);
+
+               if (unlikely(walk.nbytes < AES_BLOCK_SIZE))
+                       memcpy(walk.dst.virt.addr, dst, walk.nbytes);
 
-       ce_aes_ccm_final(mac, buf, ctx->key_enc, num_rounds(ctx));
+               if (walk.nbytes) {
+                       err = skcipher_walk_done(&walk, tail);
+               }
+       } while (walk.nbytes);
 
        kernel_neon_end();
 
+       if (unlikely(err))
+               return err;
+
        /* copy authtag to end of dst */
        scatterwalk_map_and_copy(mac, req->dst, req->assoclen + req->cryptlen,
                                 crypto_aead_authsize(aead), 1);
 
-       return err;
+       return 0;
 }
 
 static int ccm_decrypt(struct aead_request *req)
@@ -203,7 +249,7 @@ static int ccm_decrypt(struct aead_request *req)
        unsigned int authsize = crypto_aead_authsize(aead);
        struct skcipher_walk walk;
        u8 __aligned(8) mac[AES_BLOCK_SIZE];
-       u8 buf[AES_BLOCK_SIZE];
+       u8 orig_iv[AES_BLOCK_SIZE];
        u32 len = req->cryptlen - authsize;
        int err;
 
@@ -212,34 +258,44 @@ static int ccm_decrypt(struct aead_request *req)
                return err;
 
        /* preserve the original iv for the final round */
-       memcpy(buf, req->iv, AES_BLOCK_SIZE);
+       memcpy(orig_iv, req->iv, AES_BLOCK_SIZE);
 
        err = skcipher_walk_aead_decrypt(&walk, req, false);
+       if (unlikely(err))
+               return err;
 
        kernel_neon_begin();
 
        if (req->assoclen)
                ccm_calculate_auth_mac(req, mac);
 
-       while (walk.nbytes) {
+       do {
                u32 tail = walk.nbytes % AES_BLOCK_SIZE;
-               bool final = walk.nbytes == walk.total;
+               const u8 *src = walk.src.virt.addr;
+               u8 *dst = walk.dst.virt.addr;
+               u8 buf[AES_BLOCK_SIZE];
+               u8 *final_iv = NULL;
 
-               if (final)
+               if (walk.nbytes == walk.total) {
                        tail = 0;
+                       final_iv = orig_iv;
+               }
 
-               ce_aes_ccm_decrypt(walk.dst.virt.addr, walk.src.virt.addr,
-                                  walk.nbytes - tail, ctx->key_enc,
-                                  num_rounds(ctx), mac, walk.iv);
+               if (unlikely(walk.nbytes < AES_BLOCK_SIZE))
+                       src = dst = memcpy(&buf[sizeof(buf) - walk.nbytes],
+                                          src, walk.nbytes);
 
-               if (!final)
-                       kernel_neon_end();
-               err = skcipher_walk_done(&walk, tail);
-               if (!final)
-                       kernel_neon_begin();
-       }
+               ce_aes_ccm_decrypt(dst, src, walk.nbytes - tail,
+                                  ctx->key_enc, num_rounds(ctx),
+                                  mac, walk.iv, final_iv);
+
+               if (unlikely(walk.nbytes < AES_BLOCK_SIZE))
+                       memcpy(walk.dst.virt.addr, dst, walk.nbytes);
 
-       ce_aes_ccm_final(mac, buf, ctx->key_enc, num_rounds(ctx));
+               if (walk.nbytes) {
+                       err = skcipher_walk_done(&walk, tail);
+               }
+       } while (walk.nbytes);
 
        kernel_neon_end();
 
@@ -247,11 +303,11 @@ static int ccm_decrypt(struct aead_request *req)
                return err;
 
        /* compare calculated auth tag with the stored one */
-       scatterwalk_map_and_copy(buf, req->src,
+       scatterwalk_map_and_copy(orig_iv, req->src,
                                 req->assoclen + req->cryptlen - authsize,
                                 authsize, 0);
 
-       if (crypto_memneq(mac, buf, authsize))
+       if (crypto_memneq(mac, orig_iv, authsize))
                return -EBADMSG;
        return 0;
 }
@@ -290,6 +346,6 @@ module_init(aes_mod_init);
 module_exit(aes_mod_exit);
 
 MODULE_DESCRIPTION("Synchronous AES in CCM mode using ARMv8 Crypto Extensions");
-MODULE_AUTHOR("Ard Biesheuvel <ard.biesheuvel@linaro.org>");
+MODULE_AUTHOR("Ard Biesheuvel <ardb@kernel.org>");
 MODULE_LICENSE("GPL v2");
 MODULE_ALIAS_CRYPTO("ccm(aes)");
index 162787c7aa86500b551805eaf9e123a0944631e5..a147e847a5a181af2679398cab4d63f6259a6ba3 100644 (file)
@@ -1048,6 +1048,7 @@ unregister_ciphers:
 
 #ifdef USE_V8_CRYPTO_EXTENSIONS
 module_cpu_feature_match(AES, aes_init);
+EXPORT_SYMBOL_NS(ce_aes_mac_update, CRYPTO_INTERNAL);
 #else
 module_init(aes_init);
 EXPORT_SYMBOL(neon_aes_ecb_encrypt);
index 96379be913cd54e45fd655b461a6a5a014d670b9..9b73fd0cd7212459e0ba0d1758610ee898fc81e1 100644 (file)
@@ -57,6 +57,7 @@ struct cpuinfo_arm64 {
        u64             reg_id_aa64mmfr1;
        u64             reg_id_aa64mmfr2;
        u64             reg_id_aa64mmfr3;
+       u64             reg_id_aa64mmfr4;
        u64             reg_id_aa64pfr0;
        u64             reg_id_aa64pfr1;
        u64             reg_id_aa64pfr2;
index df62b61ff4678ca4d3e926f51e443baec32f5c7e..8b904a757bd3417d935871e9b1dc91b3fac3bdcf 100644 (file)
@@ -364,6 +364,7 @@ struct arm64_cpu_capabilities {
                        u8 field_pos;
                        u8 field_width;
                        u8 min_field_value;
+                       u8 max_field_value;
                        u8 hwcap_type;
                        bool sign;
                        unsigned long hwcap;
similarity index 81%
rename from arch/arm64/include/asm/crash_core.h
rename to arch/arm64/include/asm/crash_reserve.h
index 9f5c8d339f44f5440d822ef64b277bb85d92833f..4afe027a4e7b2c11517b70785e629b72a8ff4634 100644 (file)
@@ -1,6 +1,6 @@
 /* SPDX-License-Identifier: GPL-2.0-only */
-#ifndef _ARM64_CRASH_CORE_H
-#define _ARM64_CRASH_CORE_H
+#ifndef _ARM64_CRASH_RESERVE_H
+#define _ARM64_CRASH_RESERVE_H
 
 /* Current arm64 boot protocol requires 2MB alignment */
 #define CRASH_ALIGN                     SZ_2M
index 9ac9572a3bbee2caec02b6d5797e84bfd5194f6b..4d9cc7a76d9ca16c78bc38ab06d68fb9ba08bd79 100644 (file)
@@ -80,7 +80,7 @@ static inline void crash_setup_regs(struct pt_regs *newregs,
        }
 }
 
-#if defined(CONFIG_KEXEC_CORE) && defined(CONFIG_HIBERNATION)
+#if defined(CONFIG_CRASH_DUMP) && defined(CONFIG_HIBERNATION)
 extern bool crash_is_nosave(unsigned long pfn);
 extern void crash_prepare_suspend(void);
 extern void crash_post_resume(void);
index 7f45ce9170bb3edc85b4f9e857b588b277a6de4d..e01bb5ca13b7cce66ed0fc6d43a5a8edecc81413 100644 (file)
 #define HCR_HOST_NVHE_PROTECTED_FLAGS (HCR_HOST_NVHE_FLAGS | HCR_TSC)
 #define HCR_HOST_VHE_FLAGS (HCR_RW | HCR_TGE | HCR_E2H)
 
-#define HCRX_GUEST_FLAGS \
-       (HCRX_EL2_SMPME | HCRX_EL2_TCR2En | \
-        (cpus_have_final_cap(ARM64_HAS_MOPS) ? (HCRX_EL2_MSCEn | HCRX_EL2_MCE2) : 0))
+#define HCRX_GUEST_FLAGS (HCRX_EL2_SMPME | HCRX_EL2_TCR2En)
 #define HCRX_HOST_FLAGS (HCRX_EL2_MSCEn | HCRX_EL2_TCR2En | HCRX_EL2_EnFPM)
 
 /* TCR_EL2 Registers bits */
index 6f5b41c70103869909442c854119d1e9ec19ed84..975af30af31fa239524ccd5a52aa453e8675835d 100644 (file)
@@ -209,7 +209,8 @@ static inline bool vcpu_is_el2(const struct kvm_vcpu *vcpu)
 
 static inline bool __vcpu_el2_e2h_is_set(const struct kvm_cpu_context *ctxt)
 {
-       return ctxt_sys_reg(ctxt, HCR_EL2) & HCR_E2H;
+       return (!cpus_have_final_cap(ARM64_HAS_HCR_NV1) ||
+               (ctxt_sys_reg(ctxt, HCR_EL2) & HCR_E2H));
 }
 
 static inline bool vcpu_el2_e2h_is_set(const struct kvm_vcpu *vcpu)
index b779cbc2211cec9aaf1cbc206b5276c6e44f0c33..9e8a496fb284ea3aff570950fc2b3e15b5e4e586 100644 (file)
@@ -238,9 +238,32 @@ static inline u16 kvm_mpidr_index(struct kvm_mpidr_data *data, u64 mpidr)
        return index;
 }
 
+struct kvm_sysreg_masks;
+
+enum fgt_group_id {
+       __NO_FGT_GROUP__,
+       HFGxTR_GROUP,
+       HDFGRTR_GROUP,
+       HDFGWTR_GROUP = HDFGRTR_GROUP,
+       HFGITR_GROUP,
+       HAFGRTR_GROUP,
+
+       /* Must be last */
+       __NR_FGT_GROUP_IDS__
+};
+
 struct kvm_arch {
        struct kvm_s2_mmu mmu;
 
+       /*
+        * Fine-Grained UNDEF, mimicking the FGT layout defined by the
+        * architecture. We track them globally, as we present the
+        * same feature-set to all vcpus.
+        *
+        * Index 0 is currently spare.
+        */
+       u64 fgu[__NR_FGT_GROUP_IDS__];
+
        /* Interrupt controller */
        struct vgic_dist        vgic;
 
@@ -274,6 +297,8 @@ struct kvm_arch {
 #define KVM_ARCH_FLAG_TIMER_PPIS_IMMUTABLE             6
        /* Initial ID reg values loaded */
 #define KVM_ARCH_FLAG_ID_REGS_INITIALIZED              7
+       /* Fine-Grained UNDEF initialised */
+#define KVM_ARCH_FLAG_FGU_INITIALIZED                  8
        unsigned long flags;
 
        /* VM-wide vCPU feature set */
@@ -294,6 +319,9 @@ struct kvm_arch {
        /* PMCR_EL0.N value for the guest */
        u8 pmcr_n;
 
+       /* Iterator for idreg debugfs */
+       u8      idreg_debugfs_iter;
+
        /* Hypercall features firmware registers' descriptor */
        struct kvm_smccc_features smccc_feat;
        struct maple_tree smccc_filter;
@@ -312,6 +340,9 @@ struct kvm_arch {
 #define KVM_ARM_ID_REG_NUM     (IDREG_IDX(sys_reg(3, 0, 0, 7, 7)) + 1)
        u64 id_regs[KVM_ARM_ID_REG_NUM];
 
+       /* Masks for VNCR-baked sysregs */
+       struct kvm_sysreg_masks *sysreg_masks;
+
        /*
         * For an untrusted host VM, 'pkvm.handle' is used to lookup
         * the associated pKVM instance in the hypervisor.
@@ -474,6 +505,13 @@ enum vcpu_sysreg {
        NR_SYS_REGS     /* Nothing after this line! */
 };
 
+struct kvm_sysreg_masks {
+       struct {
+               u64     res0;
+               u64     res1;
+       } mask[NR_SYS_REGS - __VNCR_START__];
+};
+
 struct kvm_cpu_context {
        struct user_pt_regs regs;       /* sp = sp_el0 */
 
@@ -550,6 +588,7 @@ struct kvm_vcpu_arch {
 
        /* Values of trap registers for the guest. */
        u64 hcr_el2;
+       u64 hcrx_el2;
        u64 mdcr_el2;
        u64 cptr_el2;
 
@@ -869,7 +908,15 @@ static inline u64 *__ctxt_sys_reg(const struct kvm_cpu_context *ctxt, int r)
 
 #define ctxt_sys_reg(c,r)      (*__ctxt_sys_reg(c,r))
 
-#define __vcpu_sys_reg(v,r)    (ctxt_sys_reg(&(v)->arch.ctxt, (r)))
+u64 kvm_vcpu_sanitise_vncr_reg(const struct kvm_vcpu *, enum vcpu_sysreg);
+#define __vcpu_sys_reg(v,r)                                            \
+       (*({                                                            \
+               const struct kvm_cpu_context *ctxt = &(v)->arch.ctxt;   \
+               u64 *__r = __ctxt_sys_reg(ctxt, (r));                   \
+               if (vcpu_has_nv((v)) && (r) >= __VNCR_START__)          \
+                       *__r = kvm_vcpu_sanitise_vncr_reg((v), (r));    \
+               __r;                                                    \
+       }))
 
 u64 vcpu_read_sys_reg(const struct kvm_vcpu *vcpu, int reg);
 void vcpu_write_sys_reg(struct kvm_vcpu *vcpu, u64 val, int reg);
@@ -1056,14 +1103,20 @@ int kvm_handle_cp15_64(struct kvm_vcpu *vcpu);
 int kvm_handle_sys_reg(struct kvm_vcpu *vcpu);
 int kvm_handle_cp10_id(struct kvm_vcpu *vcpu);
 
+void kvm_sys_regs_create_debugfs(struct kvm *kvm);
 void kvm_reset_sys_regs(struct kvm_vcpu *vcpu);
 
 int __init kvm_sys_reg_table_init(void);
+struct sys_reg_desc;
+int __init populate_sysreg_config(const struct sys_reg_desc *sr,
+                                 unsigned int idx);
 int __init populate_nv_trap_config(void);
 
 bool lock_all_vcpus(struct kvm *kvm);
 void unlock_all_vcpus(struct kvm *kvm);
 
+void kvm_init_sysreg(struct kvm_vcpu *);
+
 /* MMIO helpers */
 void kvm_mmio_write_buf(void *buf, unsigned int len, unsigned long data);
 unsigned long kvm_mmio_read_buf(const void *buf, unsigned int len);
@@ -1234,4 +1287,48 @@ static inline void kvm_hyp_reserve(void) { }
 void kvm_arm_vcpu_power_off(struct kvm_vcpu *vcpu);
 bool kvm_arm_vcpu_stopped(struct kvm_vcpu *vcpu);
 
+#define __expand_field_sign_unsigned(id, fld, val)                     \
+       ((u64)SYS_FIELD_VALUE(id, fld, val))
+
+#define __expand_field_sign_signed(id, fld, val)                       \
+       ({                                                              \
+               u64 __val = SYS_FIELD_VALUE(id, fld, val);              \
+               sign_extend64(__val, id##_##fld##_WIDTH - 1);           \
+       })
+
+#define expand_field_sign(id, fld, val)                                        \
+       (id##_##fld##_SIGNED ?                                          \
+        __expand_field_sign_signed(id, fld, val) :                     \
+        __expand_field_sign_unsigned(id, fld, val))
+
+#define get_idreg_field_unsigned(kvm, id, fld)                         \
+       ({                                                              \
+               u64 __val = IDREG((kvm), SYS_##id);                     \
+               FIELD_GET(id##_##fld##_MASK, __val);                    \
+       })
+
+#define get_idreg_field_signed(kvm, id, fld)                           \
+       ({                                                              \
+               u64 __val = get_idreg_field_unsigned(kvm, id, fld);     \
+               sign_extend64(__val, id##_##fld##_WIDTH - 1);           \
+       })
+
+#define get_idreg_field_enum(kvm, id, fld)                             \
+       get_idreg_field_unsigned(kvm, id, fld)
+
+#define get_idreg_field(kvm, id, fld)                                  \
+       (id##_##fld##_SIGNED ?                                          \
+        get_idreg_field_signed(kvm, id, fld) :                         \
+        get_idreg_field_unsigned(kvm, id, fld))
+
+#define kvm_has_feat(kvm, id, fld, limit)                              \
+       (get_idreg_field((kvm), id, fld) >= expand_field_sign(id, fld, limit))
+
+#define kvm_has_feat_enum(kvm, id, fld, val)                           \
+       (get_idreg_field_unsigned((kvm), id, fld) == __expand_field_sign_unsigned(id, fld, val))
+
+#define kvm_has_feat_range(kvm, id, fld, min, max)                     \
+       (get_idreg_field((kvm), id, fld) >= expand_field_sign(id, fld, min) && \
+        get_idreg_field((kvm), id, fld) <= expand_field_sign(id, fld, max))
+
 #endif /* __ARM64_KVM_HOST_H__ */
index 145ce73fc16c950ee41a2db31d29186bdf9c0149..3e2a1ac0c9bb81629b4599b95ddd9a708c4d65f5 100644 (file)
@@ -70,7 +70,7 @@ DECLARE_PER_CPU(struct kvm_nvhe_init_params, kvm_init_params);
 /*
  * Without an __arch_swab32(), we fall back to ___constant_swab32(), but the
  * static inline can allow the compiler to out-of-line this. KVM always wants
- * the macro version as its always inlined.
+ * the macro version as it's always inlined.
  */
 #define __kvm_swab32(x)        ___constant_swab32(x)
 
index e3e793d0ec30413491839f9e060bd9e27deff227..d5e48d870461baded85222fe3f55067376b4a19d 100644 (file)
 
 #include <asm/alternative.h>
 
-/*
- * Convert a kernel VA into a HYP VA.
- * reg: VA to be converted.
- *
- * The actual code generation takes place in kvm_update_va_mask, and
- * the instructions below are only there to reserve the space and
- * perform the register allocation (kvm_update_va_mask uses the
- * specific registers encoded in the instructions).
- */
-.macro kern_hyp_va     reg
-#ifndef __KVM_VHE_HYPERVISOR__
-alternative_cb ARM64_ALWAYS_SYSTEM, kvm_update_va_mask
-       and     \reg, \reg, #1          /* mask with va_mask */
-       ror     \reg, \reg, #1          /* rotate to the first tag bit */
-       add     \reg, \reg, #0          /* insert the low 12 bits of the tag */
-       add     \reg, \reg, #0, lsl 12  /* insert the top 12 bits of the tag */
-       ror     \reg, \reg, #63         /* rotate back */
-alternative_cb_end
-#endif
-.endm
-
 /*
  * Convert a hypervisor VA to a PA
  * reg: hypervisor address to be converted in place
@@ -127,14 +106,29 @@ void kvm_apply_hyp_relocations(void);
 
 #define __hyp_pa(x) (((phys_addr_t)(x)) + hyp_physvirt_offset)
 
+/*
+ * Convert a kernel VA into a HYP VA.
+ *
+ * Can be called from hyp or non-hyp context.
+ *
+ * The actual code generation takes place in kvm_update_va_mask(), and
+ * the instructions below are only there to reserve the space and
+ * perform the register allocation (kvm_update_va_mask() uses the
+ * specific registers encoded in the instructions).
+ */
 static __always_inline unsigned long __kern_hyp_va(unsigned long v)
 {
+/*
+ * This #ifndef is an optimisation for when this is called from VHE hyp
+ * context.  When called from a VHE non-hyp context, kvm_update_va_mask() will
+ * replace the instructions with `nop`s.
+ */
 #ifndef __KVM_VHE_HYPERVISOR__
-       asm volatile(ALTERNATIVE_CB("and %0, %0, #1\n"
-                                   "ror %0, %0, #1\n"
-                                   "add %0, %0, #0\n"
-                                   "add %0, %0, #0, lsl 12\n"
-                                   "ror %0, %0, #63\n",
+       asm volatile(ALTERNATIVE_CB("and %0, %0, #1\n"         /* mask with va_mask */
+                                   "ror %0, %0, #1\n"         /* rotate to the first tag bit */
+                                   "add %0, %0, #0\n"         /* insert the low 12 bits of the tag */
+                                   "add %0, %0, #0, lsl 12\n" /* insert the top 12 bits of the tag */
+                                   "ror %0, %0, #63\n",       /* rotate back */
                                    ARM64_ALWAYS_SYSTEM,
                                    kvm_update_va_mask)
                     : "+r" (v));
index 4882905357f43b6b5146f5d4f38d13b100f8dbb8..c77d795556e130d0c2ae05be272043e65d1acddb 100644 (file)
@@ -60,7 +60,6 @@ static inline u64 translate_ttbr0_el2_to_ttbr0_el1(u64 ttbr0)
        return ttbr0 & ~GENMASK_ULL(63, 48);
 }
 
-extern bool __check_nv_sr_forward(struct kvm_vcpu *vcpu);
 
 int kvm_init_nv_sysregs(struct kvm *kvm);
 
index cfdf40f734b12264ea9b4227839ffa191d512e33..19278dfe79782561013d4c2b03e9a866615479b0 100644 (file)
@@ -197,6 +197,7 @@ enum kvm_pgtable_stage2_flags {
  * @KVM_PGTABLE_PROT_W:                Write permission.
  * @KVM_PGTABLE_PROT_R:                Read permission.
  * @KVM_PGTABLE_PROT_DEVICE:   Device attributes.
+ * @KVM_PGTABLE_PROT_NORMAL_NC:        Normal noncacheable attributes.
  * @KVM_PGTABLE_PROT_SW0:      Software bit 0.
  * @KVM_PGTABLE_PROT_SW1:      Software bit 1.
  * @KVM_PGTABLE_PROT_SW2:      Software bit 2.
@@ -208,6 +209,7 @@ enum kvm_pgtable_prot {
        KVM_PGTABLE_PROT_R                      = BIT(2),
 
        KVM_PGTABLE_PROT_DEVICE                 = BIT(3),
+       KVM_PGTABLE_PROT_NORMAL_NC              = BIT(4),
 
        KVM_PGTABLE_PROT_SW0                    = BIT(55),
        KVM_PGTABLE_PROT_SW1                    = BIT(56),
index b850b1b9147131b16c60856635f82ff339aeffe1..54fb014eba058220257766d195c683fa30a98522 100644 (file)
  * Memory types for Stage-2 translation
  */
 #define MT_S2_NORMAL           0xf
+#define MT_S2_NORMAL_NC                0x5
 #define MT_S2_DEVICE_nGnRE     0x1
 
 /*
  * Stage-2 enforces Normal-WB and Device-nGnRE
  */
 #define MT_S2_FWB_NORMAL       6
+#define MT_S2_FWB_NORMAL_NC    5
 #define MT_S2_FWB_DEVICE_nGnRE 1
 
 #ifdef CONFIG_ARM64_4K_PAGES
index 8bec85350865a173cab3b68cecf6c385d4409834..afdd56d26ad7004eecce8254cfe589c81eb8a285 100644 (file)
@@ -98,7 +98,8 @@ static inline pteval_t __phys_to_pte_val(phys_addr_t phys)
        __pte(__phys_to_pte_val((phys_addr_t)(pfn) << PAGE_SHIFT) | pgprot_val(prot))
 
 #define pte_none(pte)          (!pte_val(pte))
-#define pte_clear(mm,addr,ptep)        set_pte(ptep, __pte(0))
+#define __pte_clear(mm, addr, ptep) \
+                               __set_pte(ptep, __pte(0))
 #define pte_page(pte)          (pfn_to_page(pte_pfn(pte)))
 
 /*
@@ -137,12 +138,16 @@ static inline pteval_t __phys_to_pte_val(phys_addr_t phys)
  */
 #define pte_valid_not_user(pte) \
        ((pte_val(pte) & (PTE_VALID | PTE_USER | PTE_UXN)) == (PTE_VALID | PTE_UXN))
+/*
+ * Returns true if the pte is valid and has the contiguous bit set.
+ */
+#define pte_valid_cont(pte)    (pte_valid(pte) && pte_cont(pte))
 /*
  * Could the pte be present in the TLB? We must check mm_tlb_flush_pending
  * so that we don't erroneously return false for pages that have been
  * remapped as PROT_NONE but are yet to be flushed from the TLB.
  * Note that we can't make any assumptions based on the state of the access
- * flag, since ptep_clear_flush_young() elides a DSB when invalidating the
+ * flag, since __ptep_clear_flush_young() elides a DSB when invalidating the
  * TLB.
  */
 #define pte_accessible(mm, pte)        \
@@ -266,7 +271,7 @@ static inline pte_t pte_mkdevmap(pte_t pte)
        return set_pte_bit(pte, __pgprot(PTE_DEVMAP | PTE_SPECIAL));
 }
 
-static inline void set_pte(pte_t *ptep, pte_t pte)
+static inline void __set_pte(pte_t *ptep, pte_t pte)
 {
        WRITE_ONCE(*ptep, pte);
 
@@ -280,6 +285,11 @@ static inline void set_pte(pte_t *ptep, pte_t pte)
        }
 }
 
+static inline pte_t __ptep_get(pte_t *ptep)
+{
+       return READ_ONCE(*ptep);
+}
+
 extern void __sync_icache_dcache(pte_t pteval);
 bool pgattr_change_is_safe(u64 old, u64 new);
 
@@ -307,7 +317,7 @@ static inline void __check_safe_pte_update(struct mm_struct *mm, pte_t *ptep,
        if (!IS_ENABLED(CONFIG_DEBUG_VM))
                return;
 
-       old_pte = READ_ONCE(*ptep);
+       old_pte = __ptep_get(ptep);
 
        if (!pte_valid(old_pte) || !pte_valid(pte))
                return;
@@ -316,7 +326,7 @@ static inline void __check_safe_pte_update(struct mm_struct *mm, pte_t *ptep,
 
        /*
         * Check for potential race with hardware updates of the pte
-        * (ptep_set_access_flags safely changes valid ptes without going
+        * (__ptep_set_access_flags safely changes valid ptes without going
         * through an invalid entry).
         */
        VM_WARN_ONCE(!pte_young(pte),
@@ -346,23 +356,38 @@ static inline void __sync_cache_and_tags(pte_t pte, unsigned int nr_pages)
                mte_sync_tags(pte, nr_pages);
 }
 
-static inline void set_ptes(struct mm_struct *mm,
-                           unsigned long __always_unused addr,
-                           pte_t *ptep, pte_t pte, unsigned int nr)
+/*
+ * Select all bits except the pfn
+ */
+static inline pgprot_t pte_pgprot(pte_t pte)
+{
+       unsigned long pfn = pte_pfn(pte);
+
+       return __pgprot(pte_val(pfn_pte(pfn, __pgprot(0))) ^ pte_val(pte));
+}
+
+#define pte_advance_pfn pte_advance_pfn
+static inline pte_t pte_advance_pfn(pte_t pte, unsigned long nr)
+{
+       return pfn_pte(pte_pfn(pte) + nr, pte_pgprot(pte));
+}
+
+static inline void __set_ptes(struct mm_struct *mm,
+                             unsigned long __always_unused addr,
+                             pte_t *ptep, pte_t pte, unsigned int nr)
 {
        page_table_check_ptes_set(mm, ptep, pte, nr);
        __sync_cache_and_tags(pte, nr);
 
        for (;;) {
                __check_safe_pte_update(mm, ptep, pte);
-               set_pte(ptep, pte);
+               __set_pte(ptep, pte);
                if (--nr == 0)
                        break;
                ptep++;
-               pte_val(pte) += PAGE_SIZE;
+               pte = pte_advance_pfn(pte, 1);
        }
 }
-#define set_ptes set_ptes
 
 /*
  * Huge pte definitions.
@@ -438,16 +463,6 @@ static inline pte_t pte_swp_clear_exclusive(pte_t pte)
        return clear_pte_bit(pte, __pgprot(PTE_SWP_EXCLUSIVE));
 }
 
-/*
- * Select all bits except the pfn
- */
-static inline pgprot_t pte_pgprot(pte_t pte)
-{
-       unsigned long pfn = pte_pfn(pte);
-
-       return __pgprot(pte_val(pfn_pte(pfn, __pgprot(0))) ^ pte_val(pte));
-}
-
 #ifdef CONFIG_NUMA_BALANCING
 /*
  * See the comment in include/linux/pgtable.h
@@ -539,7 +554,7 @@ static inline void __set_pte_at(struct mm_struct *mm,
 {
        __sync_cache_and_tags(pte, nr);
        __check_safe_pte_update(mm, ptep, pte);
-       set_pte(ptep, pte);
+       __set_pte(ptep, pte);
 }
 
 static inline void set_pmd_at(struct mm_struct *mm, unsigned long addr,
@@ -1033,8 +1048,7 @@ static inline pmd_t pmd_modify(pmd_t pmd, pgprot_t newprot)
        return pte_pmd(pte_modify(pmd_pte(pmd), newprot));
 }
 
-#define __HAVE_ARCH_PTEP_SET_ACCESS_FLAGS
-extern int ptep_set_access_flags(struct vm_area_struct *vma,
+extern int __ptep_set_access_flags(struct vm_area_struct *vma,
                                 unsigned long address, pte_t *ptep,
                                 pte_t entry, int dirty);
 
@@ -1044,7 +1058,8 @@ static inline int pmdp_set_access_flags(struct vm_area_struct *vma,
                                        unsigned long address, pmd_t *pmdp,
                                        pmd_t entry, int dirty)
 {
-       return ptep_set_access_flags(vma, address, (pte_t *)pmdp, pmd_pte(entry), dirty);
+       return __ptep_set_access_flags(vma, address, (pte_t *)pmdp,
+                                                       pmd_pte(entry), dirty);
 }
 
 static inline int pud_devmap(pud_t pud)
@@ -1078,12 +1093,13 @@ static inline bool pud_user_accessible_page(pud_t pud)
 /*
  * Atomic pte/pmd modifications.
  */
-#define __HAVE_ARCH_PTEP_TEST_AND_CLEAR_YOUNG
-static inline int __ptep_test_and_clear_young(pte_t *ptep)
+static inline int __ptep_test_and_clear_young(struct vm_area_struct *vma,
+                                             unsigned long address,
+                                             pte_t *ptep)
 {
        pte_t old_pte, pte;
 
-       pte = READ_ONCE(*ptep);
+       pte = __ptep_get(ptep);
        do {
                old_pte = pte;
                pte = pte_mkold(pte);
@@ -1094,18 +1110,10 @@ static inline int __ptep_test_and_clear_young(pte_t *ptep)
        return pte_young(pte);
 }
 
-static inline int ptep_test_and_clear_young(struct vm_area_struct *vma,
-                                           unsigned long address,
-                                           pte_t *ptep)
-{
-       return __ptep_test_and_clear_young(ptep);
-}
-
-#define __HAVE_ARCH_PTEP_CLEAR_YOUNG_FLUSH
-static inline int ptep_clear_flush_young(struct vm_area_struct *vma,
+static inline int __ptep_clear_flush_young(struct vm_area_struct *vma,
                                         unsigned long address, pte_t *ptep)
 {
-       int young = ptep_test_and_clear_young(vma, address, ptep);
+       int young = __ptep_test_and_clear_young(vma, address, ptep);
 
        if (young) {
                /*
@@ -1128,12 +1136,11 @@ static inline int pmdp_test_and_clear_young(struct vm_area_struct *vma,
                                            unsigned long address,
                                            pmd_t *pmdp)
 {
-       return ptep_test_and_clear_young(vma, address, (pte_t *)pmdp);
+       return __ptep_test_and_clear_young(vma, address, (pte_t *)pmdp);
 }
 #endif /* CONFIG_TRANSPARENT_HUGEPAGE */
 
-#define __HAVE_ARCH_PTEP_GET_AND_CLEAR
-static inline pte_t ptep_get_and_clear(struct mm_struct *mm,
+static inline pte_t __ptep_get_and_clear(struct mm_struct *mm,
                                       unsigned long address, pte_t *ptep)
 {
        pte_t pte = __pte(xchg_relaxed(&pte_val(*ptep), 0));
@@ -1143,6 +1150,37 @@ static inline pte_t ptep_get_and_clear(struct mm_struct *mm,
        return pte;
 }
 
+static inline void __clear_full_ptes(struct mm_struct *mm, unsigned long addr,
+                               pte_t *ptep, unsigned int nr, int full)
+{
+       for (;;) {
+               __ptep_get_and_clear(mm, addr, ptep);
+               if (--nr == 0)
+                       break;
+               ptep++;
+               addr += PAGE_SIZE;
+       }
+}
+
+static inline pte_t __get_and_clear_full_ptes(struct mm_struct *mm,
+                               unsigned long addr, pte_t *ptep,
+                               unsigned int nr, int full)
+{
+       pte_t pte, tmp_pte;
+
+       pte = __ptep_get_and_clear(mm, addr, ptep);
+       while (--nr) {
+               ptep++;
+               addr += PAGE_SIZE;
+               tmp_pte = __ptep_get_and_clear(mm, addr, ptep);
+               if (pte_dirty(tmp_pte))
+                       pte = pte_mkdirty(pte);
+               if (pte_young(tmp_pte))
+                       pte = pte_mkyoung(pte);
+       }
+       return pte;
+}
+
 #ifdef CONFIG_TRANSPARENT_HUGEPAGE
 #define __HAVE_ARCH_PMDP_HUGE_GET_AND_CLEAR
 static inline pmd_t pmdp_huge_get_and_clear(struct mm_struct *mm,
@@ -1156,16 +1194,12 @@ static inline pmd_t pmdp_huge_get_and_clear(struct mm_struct *mm,
 }
 #endif /* CONFIG_TRANSPARENT_HUGEPAGE */
 
-/*
- * ptep_set_wrprotect - mark read-only while trasferring potential hardware
- * dirty status (PTE_DBM && !PTE_RDONLY) to the software PTE_DIRTY bit.
- */
-#define __HAVE_ARCH_PTEP_SET_WRPROTECT
-static inline void ptep_set_wrprotect(struct mm_struct *mm, unsigned long address, pte_t *ptep)
+static inline void ___ptep_set_wrprotect(struct mm_struct *mm,
+                                       unsigned long address, pte_t *ptep,
+                                       pte_t pte)
 {
-       pte_t old_pte, pte;
+       pte_t old_pte;
 
-       pte = READ_ONCE(*ptep);
        do {
                old_pte = pte;
                pte = pte_wrprotect(pte);
@@ -1174,12 +1208,31 @@ static inline void ptep_set_wrprotect(struct mm_struct *mm, unsigned long addres
        } while (pte_val(pte) != pte_val(old_pte));
 }
 
+/*
+ * __ptep_set_wrprotect - mark read-only while trasferring potential hardware
+ * dirty status (PTE_DBM && !PTE_RDONLY) to the software PTE_DIRTY bit.
+ */
+static inline void __ptep_set_wrprotect(struct mm_struct *mm,
+                                       unsigned long address, pte_t *ptep)
+{
+       ___ptep_set_wrprotect(mm, address, ptep, __ptep_get(ptep));
+}
+
+static inline void __wrprotect_ptes(struct mm_struct *mm, unsigned long address,
+                               pte_t *ptep, unsigned int nr)
+{
+       unsigned int i;
+
+       for (i = 0; i < nr; i++, address += PAGE_SIZE, ptep++)
+               __ptep_set_wrprotect(mm, address, ptep);
+}
+
 #ifdef CONFIG_TRANSPARENT_HUGEPAGE
 #define __HAVE_ARCH_PMDP_SET_WRPROTECT
 static inline void pmdp_set_wrprotect(struct mm_struct *mm,
                                      unsigned long address, pmd_t *pmdp)
 {
-       ptep_set_wrprotect(mm, address, (pte_t *)pmdp);
+       __ptep_set_wrprotect(mm, address, (pte_t *)pmdp);
 }
 
 #define pmdp_establish pmdp_establish
@@ -1257,7 +1310,7 @@ static inline void arch_swap_restore(swp_entry_t entry, struct folio *folio)
 #endif /* CONFIG_ARM64_MTE */
 
 /*
- * On AArch64, the cache coherency is handled via the set_pte_at() function.
+ * On AArch64, the cache coherency is handled via the __set_ptes() function.
  */
 static inline void update_mmu_cache_range(struct vm_fault *vmf,
                struct vm_area_struct *vma, unsigned long addr, pte_t *ptep,
@@ -1309,6 +1362,282 @@ extern pte_t ptep_modify_prot_start(struct vm_area_struct *vma,
 extern void ptep_modify_prot_commit(struct vm_area_struct *vma,
                                    unsigned long addr, pte_t *ptep,
                                    pte_t old_pte, pte_t new_pte);
+
+#ifdef CONFIG_ARM64_CONTPTE
+
+/*
+ * The contpte APIs are used to transparently manage the contiguous bit in ptes
+ * where it is possible and makes sense to do so. The PTE_CONT bit is considered
+ * a private implementation detail of the public ptep API (see below).
+ */
+extern void __contpte_try_fold(struct mm_struct *mm, unsigned long addr,
+                               pte_t *ptep, pte_t pte);
+extern void __contpte_try_unfold(struct mm_struct *mm, unsigned long addr,
+                               pte_t *ptep, pte_t pte);
+extern pte_t contpte_ptep_get(pte_t *ptep, pte_t orig_pte);
+extern pte_t contpte_ptep_get_lockless(pte_t *orig_ptep);
+extern void contpte_set_ptes(struct mm_struct *mm, unsigned long addr,
+                               pte_t *ptep, pte_t pte, unsigned int nr);
+extern void contpte_clear_full_ptes(struct mm_struct *mm, unsigned long addr,
+                               pte_t *ptep, unsigned int nr, int full);
+extern pte_t contpte_get_and_clear_full_ptes(struct mm_struct *mm,
+                               unsigned long addr, pte_t *ptep,
+                               unsigned int nr, int full);
+extern int contpte_ptep_test_and_clear_young(struct vm_area_struct *vma,
+                               unsigned long addr, pte_t *ptep);
+extern int contpte_ptep_clear_flush_young(struct vm_area_struct *vma,
+                               unsigned long addr, pte_t *ptep);
+extern void contpte_wrprotect_ptes(struct mm_struct *mm, unsigned long addr,
+                               pte_t *ptep, unsigned int nr);
+extern int contpte_ptep_set_access_flags(struct vm_area_struct *vma,
+                               unsigned long addr, pte_t *ptep,
+                               pte_t entry, int dirty);
+
+static __always_inline void contpte_try_fold(struct mm_struct *mm,
+                               unsigned long addr, pte_t *ptep, pte_t pte)
+{
+       /*
+        * Only bother trying if both the virtual and physical addresses are
+        * aligned and correspond to the last entry in a contig range. The core
+        * code mostly modifies ranges from low to high, so this is the likely
+        * the last modification in the contig range, so a good time to fold.
+        * We can't fold special mappings, because there is no associated folio.
+        */
+
+       const unsigned long contmask = CONT_PTES - 1;
+       bool valign = ((addr >> PAGE_SHIFT) & contmask) == contmask;
+
+       if (unlikely(valign)) {
+               bool palign = (pte_pfn(pte) & contmask) == contmask;
+
+               if (unlikely(palign &&
+                   pte_valid(pte) && !pte_cont(pte) && !pte_special(pte)))
+                       __contpte_try_fold(mm, addr, ptep, pte);
+       }
+}
+
+static __always_inline void contpte_try_unfold(struct mm_struct *mm,
+                               unsigned long addr, pte_t *ptep, pte_t pte)
+{
+       if (unlikely(pte_valid_cont(pte)))
+               __contpte_try_unfold(mm, addr, ptep, pte);
+}
+
+#define pte_batch_hint pte_batch_hint
+static inline unsigned int pte_batch_hint(pte_t *ptep, pte_t pte)
+{
+       if (!pte_valid_cont(pte))
+               return 1;
+
+       return CONT_PTES - (((unsigned long)ptep >> 3) & (CONT_PTES - 1));
+}
+
+/*
+ * The below functions constitute the public API that arm64 presents to the
+ * core-mm to manipulate PTE entries within their page tables (or at least this
+ * is the subset of the API that arm64 needs to implement). These public
+ * versions will automatically and transparently apply the contiguous bit where
+ * it makes sense to do so. Therefore any users that are contig-aware (e.g.
+ * hugetlb, kernel mapper) should NOT use these APIs, but instead use the
+ * private versions, which are prefixed with double underscore. All of these
+ * APIs except for ptep_get_lockless() are expected to be called with the PTL
+ * held. Although the contiguous bit is considered private to the
+ * implementation, it is deliberately allowed to leak through the getters (e.g.
+ * ptep_get()), back to core code. This is required so that pte_leaf_size() can
+ * provide an accurate size for perf_get_pgtable_size(). But this leakage means
+ * its possible a pte will be passed to a setter with the contiguous bit set, so
+ * we explicitly clear the contiguous bit in those cases to prevent accidentally
+ * setting it in the pgtable.
+ */
+
+#define ptep_get ptep_get
+static inline pte_t ptep_get(pte_t *ptep)
+{
+       pte_t pte = __ptep_get(ptep);
+
+       if (likely(!pte_valid_cont(pte)))
+               return pte;
+
+       return contpte_ptep_get(ptep, pte);
+}
+
+#define ptep_get_lockless ptep_get_lockless
+static inline pte_t ptep_get_lockless(pte_t *ptep)
+{
+       pte_t pte = __ptep_get(ptep);
+
+       if (likely(!pte_valid_cont(pte)))
+               return pte;
+
+       return contpte_ptep_get_lockless(ptep);
+}
+
+static inline void set_pte(pte_t *ptep, pte_t pte)
+{
+       /*
+        * We don't have the mm or vaddr so cannot unfold contig entries (since
+        * it requires tlb maintenance). set_pte() is not used in core code, so
+        * this should never even be called. Regardless do our best to service
+        * any call and emit a warning if there is any attempt to set a pte on
+        * top of an existing contig range.
+        */
+       pte_t orig_pte = __ptep_get(ptep);
+
+       WARN_ON_ONCE(pte_valid_cont(orig_pte));
+       __set_pte(ptep, pte_mknoncont(pte));
+}
+
+#define set_ptes set_ptes
+static __always_inline void set_ptes(struct mm_struct *mm, unsigned long addr,
+                               pte_t *ptep, pte_t pte, unsigned int nr)
+{
+       pte = pte_mknoncont(pte);
+
+       if (likely(nr == 1)) {
+               contpte_try_unfold(mm, addr, ptep, __ptep_get(ptep));
+               __set_ptes(mm, addr, ptep, pte, 1);
+               contpte_try_fold(mm, addr, ptep, pte);
+       } else {
+               contpte_set_ptes(mm, addr, ptep, pte, nr);
+       }
+}
+
+static inline void pte_clear(struct mm_struct *mm,
+                               unsigned long addr, pte_t *ptep)
+{
+       contpte_try_unfold(mm, addr, ptep, __ptep_get(ptep));
+       __pte_clear(mm, addr, ptep);
+}
+
+#define clear_full_ptes clear_full_ptes
+static inline void clear_full_ptes(struct mm_struct *mm, unsigned long addr,
+                               pte_t *ptep, unsigned int nr, int full)
+{
+       if (likely(nr == 1)) {
+               contpte_try_unfold(mm, addr, ptep, __ptep_get(ptep));
+               __clear_full_ptes(mm, addr, ptep, nr, full);
+       } else {
+               contpte_clear_full_ptes(mm, addr, ptep, nr, full);
+       }
+}
+
+#define get_and_clear_full_ptes get_and_clear_full_ptes
+static inline pte_t get_and_clear_full_ptes(struct mm_struct *mm,
+                               unsigned long addr, pte_t *ptep,
+                               unsigned int nr, int full)
+{
+       pte_t pte;
+
+       if (likely(nr == 1)) {
+               contpte_try_unfold(mm, addr, ptep, __ptep_get(ptep));
+               pte = __get_and_clear_full_ptes(mm, addr, ptep, nr, full);
+       } else {
+               pte = contpte_get_and_clear_full_ptes(mm, addr, ptep, nr, full);
+       }
+
+       return pte;
+}
+
+#define __HAVE_ARCH_PTEP_GET_AND_CLEAR
+static inline pte_t ptep_get_and_clear(struct mm_struct *mm,
+                               unsigned long addr, pte_t *ptep)
+{
+       contpte_try_unfold(mm, addr, ptep, __ptep_get(ptep));
+       return __ptep_get_and_clear(mm, addr, ptep);
+}
+
+#define __HAVE_ARCH_PTEP_TEST_AND_CLEAR_YOUNG
+static inline int ptep_test_and_clear_young(struct vm_area_struct *vma,
+                               unsigned long addr, pte_t *ptep)
+{
+       pte_t orig_pte = __ptep_get(ptep);
+
+       if (likely(!pte_valid_cont(orig_pte)))
+               return __ptep_test_and_clear_young(vma, addr, ptep);
+
+       return contpte_ptep_test_and_clear_young(vma, addr, ptep);
+}
+
+#define __HAVE_ARCH_PTEP_CLEAR_YOUNG_FLUSH
+static inline int ptep_clear_flush_young(struct vm_area_struct *vma,
+                               unsigned long addr, pte_t *ptep)
+{
+       pte_t orig_pte = __ptep_get(ptep);
+
+       if (likely(!pte_valid_cont(orig_pte)))
+               return __ptep_clear_flush_young(vma, addr, ptep);
+
+       return contpte_ptep_clear_flush_young(vma, addr, ptep);
+}
+
+#define wrprotect_ptes wrprotect_ptes
+static __always_inline void wrprotect_ptes(struct mm_struct *mm,
+                               unsigned long addr, pte_t *ptep, unsigned int nr)
+{
+       if (likely(nr == 1)) {
+               /*
+                * Optimization: wrprotect_ptes() can only be called for present
+                * ptes so we only need to check contig bit as condition for
+                * unfold, and we can remove the contig bit from the pte we read
+                * to avoid re-reading. This speeds up fork() which is sensitive
+                * for order-0 folios. Equivalent to contpte_try_unfold().
+                */
+               pte_t orig_pte = __ptep_get(ptep);
+
+               if (unlikely(pte_cont(orig_pte))) {
+                       __contpte_try_unfold(mm, addr, ptep, orig_pte);
+                       orig_pte = pte_mknoncont(orig_pte);
+               }
+               ___ptep_set_wrprotect(mm, addr, ptep, orig_pte);
+       } else {
+               contpte_wrprotect_ptes(mm, addr, ptep, nr);
+       }
+}
+
+#define __HAVE_ARCH_PTEP_SET_WRPROTECT
+static inline void ptep_set_wrprotect(struct mm_struct *mm,
+                               unsigned long addr, pte_t *ptep)
+{
+       wrprotect_ptes(mm, addr, ptep, 1);
+}
+
+#define __HAVE_ARCH_PTEP_SET_ACCESS_FLAGS
+static inline int ptep_set_access_flags(struct vm_area_struct *vma,
+                               unsigned long addr, pte_t *ptep,
+                               pte_t entry, int dirty)
+{
+       pte_t orig_pte = __ptep_get(ptep);
+
+       entry = pte_mknoncont(entry);
+
+       if (likely(!pte_valid_cont(orig_pte)))
+               return __ptep_set_access_flags(vma, addr, ptep, entry, dirty);
+
+       return contpte_ptep_set_access_flags(vma, addr, ptep, entry, dirty);
+}
+
+#else /* CONFIG_ARM64_CONTPTE */
+
+#define ptep_get                               __ptep_get
+#define set_pte                                        __set_pte
+#define set_ptes                               __set_ptes
+#define pte_clear                              __pte_clear
+#define clear_full_ptes                                __clear_full_ptes
+#define get_and_clear_full_ptes                        __get_and_clear_full_ptes
+#define __HAVE_ARCH_PTEP_GET_AND_CLEAR
+#define ptep_get_and_clear                     __ptep_get_and_clear
+#define __HAVE_ARCH_PTEP_TEST_AND_CLEAR_YOUNG
+#define ptep_test_and_clear_young              __ptep_test_and_clear_young
+#define __HAVE_ARCH_PTEP_CLEAR_YOUNG_FLUSH
+#define ptep_clear_flush_young                 __ptep_clear_flush_young
+#define __HAVE_ARCH_PTEP_SET_WRPROTECT
+#define ptep_set_wrprotect                     __ptep_set_wrprotect
+#define wrprotect_ptes                         __wrprotect_ptes
+#define __HAVE_ARCH_PTEP_SET_ACCESS_FLAGS
+#define ptep_set_access_flags                  __ptep_set_access_flags
+
+#endif /* CONFIG_ARM64_CONTPTE */
+
 #endif /* !__ASSEMBLY__ */
 
 #endif /* __ASM_PGTABLE_H */
index 581caac525b03affb45808198bce14be5d7a13fa..5b1701c76d1cec2b26f23b198d8528fb2aec8806 100644 (file)
@@ -29,13 +29,6 @@ void __init ptdump_debugfs_register(struct ptdump_info *info, const char *name);
 static inline void ptdump_debugfs_register(struct ptdump_info *info,
                                           const char *name) { }
 #endif
-void ptdump_check_wx(void);
 #endif /* CONFIG_PTDUMP_CORE */
 
-#ifdef CONFIG_DEBUG_WX
-#define debug_checkwx()        ptdump_check_wx()
-#else
-#define debug_checkwx()        do { } while (0)
-#endif
-
 #endif /* __ASM_PTDUMP_H */
index c3b19b376c86722c668a208b0be6cd122de17851..9e8999592f3af5068b27717f137337e5af500165 100644 (file)
        par;                                                            \
 })
 
+#define SYS_FIELD_VALUE(reg, field, val)       reg##_##field##_##val
+
 #define SYS_FIELD_GET(reg, field, val)         \
                 FIELD_GET(reg##_##field##_MASK, val)
 
                 FIELD_PREP(reg##_##field##_MASK, val)
 
 #define SYS_FIELD_PREP_ENUM(reg, field, val)           \
-                FIELD_PREP(reg##_##field##_MASK, reg##_##field##_##val)
+                FIELD_PREP(reg##_##field##_MASK,       \
+                           SYS_FIELD_VALUE(reg, field, val))
 
 #endif
 
index 1deb5d789c2e238abfbc228e4a6349ab210dd9ca..3b0e8248e1a41a1ead90bdbf4fea82054d9fcd90 100644 (file)
@@ -422,7 +422,7 @@ do {                                                                        \
 #define __flush_s2_tlb_range_op(op, start, pages, stride, tlb_level) \
        __flush_tlb_range_op(op, start, pages, stride, 0, tlb_level, false, kvm_lpa2_is_enabled());
 
-static inline void __flush_tlb_range(struct vm_area_struct *vma,
+static inline void __flush_tlb_range_nosync(struct vm_area_struct *vma,
                                     unsigned long start, unsigned long end,
                                     unsigned long stride, bool last_level,
                                     int tlb_level)
@@ -456,10 +456,19 @@ static inline void __flush_tlb_range(struct vm_area_struct *vma,
                __flush_tlb_range_op(vae1is, start, pages, stride, asid,
                                     tlb_level, true, lpa2_is_enabled());
 
-       dsb(ish);
        mmu_notifier_arch_invalidate_secondary_tlbs(vma->vm_mm, start, end);
 }
 
+static inline void __flush_tlb_range(struct vm_area_struct *vma,
+                                    unsigned long start, unsigned long end,
+                                    unsigned long stride, bool last_level,
+                                    int tlb_level)
+{
+       __flush_tlb_range_nosync(vma, start, end, stride,
+                                last_level, tlb_level);
+       dsb(ish);
+}
+
 static inline void flush_tlb_range(struct vm_area_struct *vma,
                                   unsigned long start, unsigned long end)
 {
index 89d2fc872d9f5e63dce2e2a74dfb422c9e255030..964df31da9751c96c984358c66d6f73c8519b2e7 100644 (file)
@@ -37,9 +37,7 @@
 #include <asm/ptrace.h>
 #include <asm/sve_context.h>
 
-#define __KVM_HAVE_GUEST_DEBUG
 #define __KVM_HAVE_IRQ_LINE
-#define __KVM_HAVE_READONLY_MEM
 #define __KVM_HAVE_VCPU_EVENTS
 
 #define KVM_COALESCED_MMIO_PAGE_OFFSET 1
@@ -76,11 +74,11 @@ struct kvm_regs {
 
 /* KVM_ARM_SET_DEVICE_ADDR ioctl id encoding */
 #define KVM_ARM_DEVICE_TYPE_SHIFT      0
-#define KVM_ARM_DEVICE_TYPE_MASK       GENMASK(KVM_ARM_DEVICE_TYPE_SHIFT + 15, \
-                                               KVM_ARM_DEVICE_TYPE_SHIFT)
+#define KVM_ARM_DEVICE_TYPE_MASK       __GENMASK(KVM_ARM_DEVICE_TYPE_SHIFT + 15, \
+                                                 KVM_ARM_DEVICE_TYPE_SHIFT)
 #define KVM_ARM_DEVICE_ID_SHIFT                16
-#define KVM_ARM_DEVICE_ID_MASK         GENMASK(KVM_ARM_DEVICE_ID_SHIFT + 15, \
-                                               KVM_ARM_DEVICE_ID_SHIFT)
+#define KVM_ARM_DEVICE_ID_MASK         __GENMASK(KVM_ARM_DEVICE_ID_SHIFT + 15, \
+                                                 KVM_ARM_DEVICE_ID_SHIFT)
 
 /* Supported device IDs */
 #define KVM_ARM_DEVICE_VGIC_V2         0
@@ -162,6 +160,11 @@ struct kvm_sync_regs {
        __u64 device_irq_level;
 };
 
+/* Bits for run->s.regs.device_irq_level */
+#define KVM_ARM_DEV_EL1_VTIMER         (1 << 0)
+#define KVM_ARM_DEV_EL1_PTIMER         (1 << 1)
+#define KVM_ARM_DEV_PMU                        (1 << 2)
+
 /*
  * PMU filter structure. Describe a range of events with a particular
  * action. To be used with KVM_ARM_VCPU_PMU_V3_FILTER.
index 14b4a179bad313ead93f620f6377a395ea79e535..763824963ed157b24b8bf324d76b27fafc5374fd 100644 (file)
@@ -65,7 +65,7 @@ obj-$(CONFIG_KEXEC_FILE)              += machine_kexec_file.o kexec_image.o
 obj-$(CONFIG_ARM64_RELOC_TEST)         += arm64-reloc-test.o
 arm64-reloc-test-y := reloc_test_core.o reloc_test_syms.o
 obj-$(CONFIG_CRASH_DUMP)               += crash_dump.o
-obj-$(CONFIG_CRASH_CORE)               += crash_core.o
+obj-$(CONFIG_VMCORE_INFO)              += vmcore_info.o
 obj-$(CONFIG_ARM_SDE_INTERFACE)                += sdei.o
 obj-$(CONFIG_ARM64_PTR_AUTH)           += pointer_auth.o
 obj-$(CONFIG_ARM64_MTE)                        += mte.o
index d6679d8b737e3ac2d7a516d11ef7836a61f5e354..56583677c1f294d66bd5ac50741089243bf5e0cd 100644 (file)
@@ -140,12 +140,42 @@ void dump_cpu_features(void)
        pr_emerg("0x%*pb\n", ARM64_NCAPS, &system_cpucaps);
 }
 
+#define __ARM64_MAX_POSITIVE(reg, field)                               \
+               ((reg##_##field##_SIGNED ?                              \
+                 BIT(reg##_##field##_WIDTH - 1) :                      \
+                 BIT(reg##_##field##_WIDTH)) - 1)
+
+#define __ARM64_MIN_NEGATIVE(reg, field)  BIT(reg##_##field##_WIDTH - 1)
+
+#define __ARM64_CPUID_FIELDS(reg, field, min_value, max_value)         \
+               .sys_reg = SYS_##reg,                                   \
+               .field_pos = reg##_##field##_SHIFT,                     \
+               .field_width = reg##_##field##_WIDTH,                   \
+               .sign = reg##_##field##_SIGNED,                         \
+               .min_field_value = min_value,                           \
+               .max_field_value = max_value,
+
+/*
+ * ARM64_CPUID_FIELDS() encodes a field with a range from min_value to
+ * an implicit maximum that depends on the sign-ess of the field.
+ *
+ * An unsigned field will be capped at all ones, while a signed field
+ * will be limited to the positive half only.
+ */
 #define ARM64_CPUID_FIELDS(reg, field, min_value)                      \
-               .sys_reg = SYS_##reg,                                                   \
-               .field_pos = reg##_##field##_SHIFT,                                             \
-               .field_width = reg##_##field##_WIDTH,                                           \
-               .sign = reg##_##field##_SIGNED,                                                 \
-               .min_field_value = reg##_##field##_##min_value,
+       __ARM64_CPUID_FIELDS(reg, field,                                \
+                            SYS_FIELD_VALUE(reg, field, min_value),    \
+                            __ARM64_MAX_POSITIVE(reg, field))
+
+/*
+ * ARM64_CPUID_FIELDS_NEG() encodes a field with a range from an
+ * implicit minimal value to max_value. This should be used when
+ * matching a non-implemented property.
+ */
+#define ARM64_CPUID_FIELDS_NEG(reg, field, max_value)                  \
+       __ARM64_CPUID_FIELDS(reg, field,                                \
+                            __ARM64_MIN_NEGATIVE(reg, field),          \
+                            SYS_FIELD_VALUE(reg, field, max_value))
 
 #define __ARM64_FTR_BITS(SIGNED, VISIBLE, STRICT, TYPE, SHIFT, WIDTH, SAFE_VAL) \
        {                                               \
@@ -440,6 +470,11 @@ static const struct arm64_ftr_bits ftr_id_aa64mmfr3[] = {
        ARM64_FTR_END,
 };
 
+static const struct arm64_ftr_bits ftr_id_aa64mmfr4[] = {
+       S_ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_LOWER_SAFE, ID_AA64MMFR4_EL1_E2H0_SHIFT, 4, 0),
+       ARM64_FTR_END,
+};
+
 static const struct arm64_ftr_bits ftr_ctr[] = {
        ARM64_FTR_BITS(FTR_VISIBLE, FTR_STRICT, FTR_EXACT, 31, 1, 1), /* RES1 */
        ARM64_FTR_BITS(FTR_VISIBLE, FTR_STRICT, FTR_LOWER_SAFE, CTR_EL0_DIC_SHIFT, 1, 1),
@@ -764,6 +799,7 @@ static const struct __ftr_reg_entry {
        ARM64_FTR_REG_OVERRIDE(SYS_ID_AA64MMFR2_EL1, ftr_id_aa64mmfr2,
                               &id_aa64mmfr2_override),
        ARM64_FTR_REG(SYS_ID_AA64MMFR3_EL1, ftr_id_aa64mmfr3),
+       ARM64_FTR_REG(SYS_ID_AA64MMFR4_EL1, ftr_id_aa64mmfr4),
 
        /* Op1 = 1, CRn = 0, CRm = 0 */
        ARM64_FTR_REG(SYS_GMID_EL1, ftr_gmid),
@@ -959,7 +995,8 @@ static void init_cpu_ftr_reg(u32 sys_reg, u64 new)
                                pr_warn("%s[%d:%d]: %s to %llx\n",
                                        reg->name,
                                        ftrp->shift + ftrp->width - 1,
-                                       ftrp->shift, str, tmp);
+                                       ftrp->shift, str,
+                                       tmp & (BIT(ftrp->width) - 1));
                } else if ((ftr_mask & reg->override->val) == ftr_mask) {
                        reg->override->val &= ~ftr_mask;
                        pr_warn("%s[%d:%d]: impossible override, ignored\n",
@@ -1088,6 +1125,7 @@ void __init init_cpu_features(struct cpuinfo_arm64 *info)
        init_cpu_ftr_reg(SYS_ID_AA64MMFR1_EL1, info->reg_id_aa64mmfr1);
        init_cpu_ftr_reg(SYS_ID_AA64MMFR2_EL1, info->reg_id_aa64mmfr2);
        init_cpu_ftr_reg(SYS_ID_AA64MMFR3_EL1, info->reg_id_aa64mmfr3);
+       init_cpu_ftr_reg(SYS_ID_AA64MMFR4_EL1, info->reg_id_aa64mmfr4);
        init_cpu_ftr_reg(SYS_ID_AA64PFR0_EL1, info->reg_id_aa64pfr0);
        init_cpu_ftr_reg(SYS_ID_AA64PFR1_EL1, info->reg_id_aa64pfr1);
        init_cpu_ftr_reg(SYS_ID_AA64PFR2_EL1, info->reg_id_aa64pfr2);
@@ -1470,6 +1508,7 @@ u64 __read_sysreg_by_encoding(u32 sys_id)
        read_sysreg_case(SYS_ID_AA64MMFR1_EL1);
        read_sysreg_case(SYS_ID_AA64MMFR2_EL1);
        read_sysreg_case(SYS_ID_AA64MMFR3_EL1);
+       read_sysreg_case(SYS_ID_AA64MMFR4_EL1);
        read_sysreg_case(SYS_ID_AA64ISAR0_EL1);
        read_sysreg_case(SYS_ID_AA64ISAR1_EL1);
        read_sysreg_case(SYS_ID_AA64ISAR2_EL1);
@@ -1504,11 +1543,28 @@ has_always(const struct arm64_cpu_capabilities *entry, int scope)
 static bool
 feature_matches(u64 reg, const struct arm64_cpu_capabilities *entry)
 {
-       int val = cpuid_feature_extract_field_width(reg, entry->field_pos,
-                                                   entry->field_width,
-                                                   entry->sign);
+       int val, min, max;
+       u64 tmp;
+
+       val = cpuid_feature_extract_field_width(reg, entry->field_pos,
+                                               entry->field_width,
+                                               entry->sign);
+
+       tmp = entry->min_field_value;
+       tmp <<= entry->field_pos;
 
-       return val >= entry->min_field_value;
+       min = cpuid_feature_extract_field_width(tmp, entry->field_pos,
+                                               entry->field_width,
+                                               entry->sign);
+
+       tmp = entry->max_field_value;
+       tmp <<= entry->field_pos;
+
+       max = cpuid_feature_extract_field_width(tmp, entry->field_pos,
+                                               entry->field_width,
+                                               entry->sign);
+
+       return val >= min && val <= max;
 }
 
 static u64
@@ -1752,6 +1808,28 @@ static bool unmap_kernel_at_el0(const struct arm64_cpu_capabilities *entry,
        return !meltdown_safe;
 }
 
+static bool has_nv1(const struct arm64_cpu_capabilities *entry, int scope)
+{
+       /*
+        * Although the Apple M2 family appears to support NV1, the
+        * PTW barfs on the nVHE EL2 S1 page table format. Pretend
+        * that it doesn't support NV1 at all.
+        */
+       static const struct midr_range nv1_ni_list[] = {
+               MIDR_ALL_VERSIONS(MIDR_APPLE_M2_BLIZZARD),
+               MIDR_ALL_VERSIONS(MIDR_APPLE_M2_AVALANCHE),
+               MIDR_ALL_VERSIONS(MIDR_APPLE_M2_BLIZZARD_PRO),
+               MIDR_ALL_VERSIONS(MIDR_APPLE_M2_AVALANCHE_PRO),
+               MIDR_ALL_VERSIONS(MIDR_APPLE_M2_BLIZZARD_MAX),
+               MIDR_ALL_VERSIONS(MIDR_APPLE_M2_AVALANCHE_MAX),
+               {}
+       };
+
+       return (__system_matches_cap(ARM64_HAS_NESTED_VIRT) &&
+               !(has_cpuid_feature(entry, scope) ||
+                 is_midr_in_range_list(read_cpuid_id(), nv1_ni_list)));
+}
+
 #if defined(ID_AA64MMFR0_EL1_TGRAN_LPA2) && defined(ID_AA64MMFR0_EL1_TGRAN_2_SUPPORTED_LPA2)
 static bool has_lpa2_at_stage1(u64 mmfr0)
 {
@@ -2776,6 +2854,13 @@ static const struct arm64_cpu_capabilities arm64_features[] = {
 #endif
        },
 #endif
+       {
+               .desc = "NV1",
+               .capability = ARM64_HAS_HCR_NV1,
+               .type = ARM64_CPUCAP_SYSTEM_FEATURE,
+               .matches = has_nv1,
+               ARM64_CPUID_FIELDS_NEG(ID_AA64MMFR4_EL1, E2H0, NI_NV1)
+       },
        {},
 };
 
index f0abb150f73efccfca75b020d989090630e72c81..09eeaa24d456053aeef86c79ac6a020dc5a29447 100644 (file)
@@ -463,6 +463,7 @@ static void __cpuinfo_store_cpu(struct cpuinfo_arm64 *info)
        info->reg_id_aa64mmfr1 = read_cpuid(ID_AA64MMFR1_EL1);
        info->reg_id_aa64mmfr2 = read_cpuid(ID_AA64MMFR2_EL1);
        info->reg_id_aa64mmfr3 = read_cpuid(ID_AA64MMFR3_EL1);
+       info->reg_id_aa64mmfr4 = read_cpuid(ID_AA64MMFR4_EL1);
        info->reg_id_aa64pfr0 = read_cpuid(ID_AA64PFR0_EL1);
        info->reg_id_aa64pfr1 = read_cpuid(ID_AA64PFR1_EL1);
        info->reg_id_aa64pfr2 = read_cpuid(ID_AA64PFR2_EL1);
index 0228001347beafa9d2f433149c175284a662eeac..9afcc690fe73c2e570e07a4757bc7e6b513cc81a 100644 (file)
@@ -103,7 +103,7 @@ static int __init set_permissions(pte_t *ptep, unsigned long addr, void *data)
 {
        struct set_perm_data *spd = data;
        const efi_memory_desc_t *md = spd->md;
-       pte_t pte = READ_ONCE(*ptep);
+       pte_t pte = __ptep_get(ptep);
 
        if (md->attribute & EFI_MEMORY_RO)
                pte = set_pte_bit(pte, __pgprot(PTE_RDONLY));
@@ -111,7 +111,7 @@ static int __init set_permissions(pte_t *ptep, unsigned long addr, void *data)
                pte = set_pte_bit(pte, __pgprot(PTE_PXN));
        else if (system_supports_bti_kernel() && spd->has_bti)
                pte = set_pte_bit(pte, __pgprot(PTE_GP));
-       set_pte(ptep, pte);
+       __set_pte(ptep, pte);
        return 0;
 }
 
index 405e9bce8c738ad7d13a51c2f9420968ebe66dc4..ce08b744aaab22fb2a5886b5c08005261ca7d5b0 100644 (file)
@@ -304,25 +304,32 @@ SYM_INNER_LABEL(init_el2, SYM_L_LOCAL)
        mov_q   x1, INIT_SCTLR_EL1_MMU_OFF
 
        /*
-        * Fruity CPUs seem to have HCR_EL2.E2H set to RES1,
-        * making it impossible to start in nVHE mode. Is that
-        * compliant with the architecture? Absolutely not!
+        * Compliant CPUs advertise their VHE-onlyness with
+        * ID_AA64MMFR4_EL1.E2H0 < 0. HCR_EL2.E2H can be
+        * RES1 in that case.
+        *
+        * Fruity CPUs seem to have HCR_EL2.E2H set to RES1, but
+        * don't advertise it (they predate this relaxation).
         */
+       mrs_s   x0, SYS_ID_AA64MMFR4_EL1
+       ubfx    x0, x0, #ID_AA64MMFR4_EL1_E2H0_SHIFT, #ID_AA64MMFR4_EL1_E2H0_WIDTH
+       tbnz    x0, #(ID_AA64MMFR4_EL1_E2H0_SHIFT + ID_AA64MMFR4_EL1_E2H0_WIDTH - 1), 1f
+
        mrs     x0, hcr_el2
        and     x0, x0, #HCR_E2H
-       cbz     x0, 1f
-
+       cbz     x0, 2f
+1:
        /* Set a sane SCTLR_EL1, the VHE way */
        pre_disable_mmu_workaround
        msr_s   SYS_SCTLR_EL12, x1
        mov     x2, #BOOT_CPU_FLAG_E2H
-       b       2f
+       b       3f
 
-1:
+2:
        pre_disable_mmu_workaround
        msr     sctlr_el1, x1
        mov     x2, xzr
-2:
+3:
        __init_el2_nvhe_prepare_eret
 
        mov     w0, #BOOT_CPU_MODE_EL2
index b38aae5b488d074dc6e4d36f44d741ea200acaf9..82e2203d86a31f830435f0d6b0bd31414ddda31f 100644 (file)
@@ -255,7 +255,7 @@ void machine_crash_shutdown(struct pt_regs *regs)
        pr_info("Starting crashdump kernel...\n");
 }
 
-#ifdef CONFIG_HIBERNATION
+#if defined(CONFIG_CRASH_DUMP) && defined(CONFIG_HIBERNATION)
 /*
  * To preserve the crash dump kernel image, the relevant memory segments
  * should be mapped again around the hibernation.
index 0e017358f4ba64ed07fb2d0626725b680810c681..af1ca875c52ce2c7c1044cc021fbbd7f987848e2 100644 (file)
@@ -39,6 +39,7 @@ int arch_kimage_file_post_load_cleanup(struct kimage *image)
        return kexec_image_post_load_cleanup_default(image);
 }
 
+#ifdef CONFIG_CRASH_DUMP
 static int prepare_elf_headers(void **addr, unsigned long *sz)
 {
        struct crash_mem *cmem;
@@ -80,6 +81,7 @@ out:
        kfree(cmem);
        return ret;
 }
+#endif
 
 /*
  * Tries to add the initrd and DTB to the image. If it is not possible to find
@@ -93,8 +95,8 @@ int load_other_segments(struct kimage *image,
                        char *cmdline)
 {
        struct kexec_buf kbuf;
-       void *headers, *dtb = NULL;
-       unsigned long headers_sz, initrd_load_addr = 0, dtb_len,
+       void *dtb = NULL;
+       unsigned long initrd_load_addr = 0, dtb_len,
                      orig_segments = image->nr_segments;
        int ret = 0;
 
@@ -102,7 +104,10 @@ int load_other_segments(struct kimage *image,
        /* not allocate anything below the kernel */
        kbuf.buf_min = kernel_load_addr + kernel_size;
 
+#ifdef CONFIG_CRASH_DUMP
        /* load elf core header */
+       void *headers;
+       unsigned long headers_sz;
        if (image->type == KEXEC_TYPE_CRASH) {
                ret = prepare_elf_headers(&headers, &headers_sz);
                if (ret) {
@@ -130,6 +135,7 @@ int load_other_segments(struct kimage *image,
                kexec_dprintk("Loaded elf core header at 0x%lx bufsz=0x%lx memsz=0x%lx\n",
                              image->elf_load_addr, kbuf.bufsz, kbuf.memsz);
        }
+#endif
 
        /* load initrd */
        if (initrd) {
index a41ef3213e1e9560ccfbf98d96443160eea2e784..dcdcccd40891c638951be93b6f5f2884502a2f7c 100644 (file)
@@ -67,7 +67,7 @@ int memcmp_pages(struct page *page1, struct page *page2)
        /*
         * If the page content is identical but at least one of the pages is
         * tagged, return non-zero to avoid KSM merging. If only one of the
-        * pages is tagged, set_pte_at() may zero or change the tags of the
+        * pages is tagged, __set_ptes() may zero or change the tags of the
         * other page via mte_sync_tags().
         */
        if (page_mte_tagged(page1) || page_mte_tagged(page2))
similarity index 92%
rename from arch/arm64/kernel/crash_core.c
rename to arch/arm64/kernel/vmcore_info.c
index 66cde752cd7409fb64da615df14602697e43921a..b19d5d6cb8b3872ddf0fa729e412d69a0fe35b38 100644 (file)
@@ -4,7 +4,7 @@
  * Copyright (C) Huawei Futurewei Technologies.
  */
 
-#include <linux/crash_core.h>
+#include <linux/vmcore_info.h>
 #include <asm/cpufeature.h>
 #include <asm/memory.h>
 #include <asm/pgtable-hwdef.h>
@@ -23,7 +23,6 @@ void arch_crash_save_vmcoreinfo(void)
        /* Please note VMCOREINFO_NUMBER() uses "%d", not "%x" */
        vmcoreinfo_append_str("NUMBER(MODULES_VADDR)=0x%lx\n", MODULES_VADDR);
        vmcoreinfo_append_str("NUMBER(MODULES_END)=0x%lx\n", MODULES_END);
-       vmcoreinfo_append_str("NUMBER(VMALLOC_START)=0x%lx\n", VMALLOC_START);
        vmcoreinfo_append_str("NUMBER(VMALLOC_END)=0x%lx\n", VMALLOC_END);
        vmcoreinfo_append_str("NUMBER(VMEMMAP_START)=0x%lx\n", VMEMMAP_START);
        vmcoreinfo_append_str("NUMBER(VMEMMAP_END)=0x%lx\n", VMEMMAP_END);
index 27ca89b628a02499d18505953ad1cee73ccf7f88..58f09370d17e017614c690000f55e9b3b816d77c 100644 (file)
@@ -19,7 +19,6 @@ if VIRTUALIZATION
 
 menuconfig KVM
        bool "Kernel-based Virtual Machine (KVM) support"
-       depends on HAVE_KVM
        select KVM_COMMON
        select KVM_GENERIC_HARDWARE_ENABLING
        select KVM_GENERIC_MMU_NOTIFIER
@@ -33,12 +32,11 @@ menuconfig KVM
        select HAVE_KVM_MSI
        select HAVE_KVM_IRQCHIP
        select HAVE_KVM_IRQ_ROUTING
-       select IRQ_BYPASS_MANAGER
        select HAVE_KVM_IRQ_BYPASS
+       select HAVE_KVM_READONLY_MEM
        select HAVE_KVM_VCPU_RUN_PID_CHANGE
        select SCHED_INFO
        select GUEST_PERF_EVENTS if PERF_EVENTS
-       select XARRAY_MULTI
        help
          Support hosting virtualized guest machines.
 
index 9dec8c419bf4028e11350f5bab3cc079cdaf67b7..879982b1cc739eaed31ee503a76c01e83d42a5af 100644 (file)
@@ -745,7 +745,7 @@ static void kvm_timer_vcpu_load_nested_switch(struct kvm_vcpu *vcpu,
                WARN_ON_ONCE(ret);
 
                /*
-                * The virtual offset behaviour is "interresting", as it
+                * The virtual offset behaviour is "interesting", as it
                 * always applies when HCR_EL2.E2H==0, but only when
                 * accessed from EL1 when HCR_EL2.E2H==1. So make sure we
                 * track E2H when putting the HV timer in "direct" mode.
index a25265aca4324e490298794f5418aaa85b64480c..3dee5490eea94dd08e4ff88cb79f41d5d60be139 100644 (file)
@@ -190,6 +190,10 @@ vm_fault_t kvm_arch_vcpu_fault(struct kvm_vcpu *vcpu, struct vm_fault *vmf)
        return VM_FAULT_SIGBUS;
 }
 
+void kvm_arch_create_vm_debugfs(struct kvm *kvm)
+{
+       kvm_sys_regs_create_debugfs(kvm);
+}
 
 /**
  * kvm_arch_destroy_vm - destroy the VM data structure
@@ -206,6 +210,7 @@ void kvm_arch_destroy_vm(struct kvm *kvm)
                pkvm_destroy_hyp_vm(kvm);
 
        kfree(kvm->arch.mpidr_data);
+       kfree(kvm->arch.sysreg_masks);
        kvm_destroy_vcpus(kvm);
 
        kvm_unshare_hyp(kvm, kvm + 1);
@@ -674,6 +679,12 @@ int kvm_arch_vcpu_run_pid_change(struct kvm_vcpu *vcpu)
                        return ret;
        }
 
+       /*
+        * This needs to happen after NV has imposed its own restrictions on
+        * the feature set
+        */
+       kvm_init_sysreg(vcpu);
+
        ret = kvm_timer_enable(vcpu);
        if (ret)
                return ret;
@@ -2591,7 +2602,8 @@ static __init int kvm_arm_init(void)
        } else if (in_hyp_mode) {
                kvm_info("VHE mode initialized successfully\n");
        } else {
-               kvm_info("Hyp mode initialized successfully\n");
+               char mode = cpus_have_final_cap(ARM64_KVM_HVHE) ? 'h' : 'n';
+               kvm_info("Hyp mode (%cVHE) initialized successfully\n", mode);
        }
 
        /*
index 8725291cb00a185780ed1d795ff4ef0dcef31d36..ce8886122ed30c042df6b0d3786889b9f36bd1d4 100644 (file)
@@ -23,7 +23,7 @@
 
 static DEFINE_PER_CPU(u64, mdcr_el2);
 
-/**
+/*
  * save/restore_guest_debug_regs
  *
  * For some debug operations we need to tweak some guest registers. As
@@ -143,6 +143,7 @@ void kvm_arm_vcpu_init_debug(struct kvm_vcpu *vcpu)
 
 /**
  * kvm_arm_reset_debug_ptr - reset the debug ptr to point to the vcpu state
+ * @vcpu:      the vcpu pointer
  */
 
 void kvm_arm_reset_debug_ptr(struct kvm_vcpu *vcpu)
index 431fd429932dfaa557ac0977fbbc3baa58eef99d..4697ba41b3a9c7723b862b35d43019f97d0945d2 100644 (file)
@@ -427,12 +427,14 @@ static const complex_condition_check ccc[] = {
  * [19:14]     bit number in the FGT register (6 bits)
  * [20]                trap polarity (1 bit)
  * [25:21]     FG filter (5 bits)
- * [62:26]     Unused (37 bits)
+ * [35:26]     Main SysReg table index (10 bits)
+ * [62:36]     Unused (27 bits)
  * [63]                RES0 - Must be zero, as lost on insertion in the xarray
  */
 #define TC_CGT_BITS    10
 #define TC_FGT_BITS    4
 #define TC_FGF_BITS    5
+#define TC_SRI_BITS    10
 
 union trap_config {
        u64     val;
@@ -442,7 +444,8 @@ union trap_config {
                unsigned long   bit:6;           /* Bit number */
                unsigned long   pol:1;           /* Polarity */
                unsigned long   fgf:TC_FGF_BITS; /* Fine Grained Filter */
-               unsigned long   unused:37;       /* Unused, should be zero */
+               unsigned long   sri:TC_SRI_BITS; /* SysReg Index */
+               unsigned long   unused:27;       /* Unused, should be zero */
                unsigned long   mbz:1;           /* Must Be Zero */
        };
 };
@@ -1006,18 +1009,6 @@ static const struct encoding_to_trap_config encoding_to_cgt[] __initconst = {
 
 static DEFINE_XARRAY(sr_forward_xa);
 
-enum fgt_group_id {
-       __NO_FGT_GROUP__,
-       HFGxTR_GROUP,
-       HDFGRTR_GROUP,
-       HDFGWTR_GROUP,
-       HFGITR_GROUP,
-       HAFGRTR_GROUP,
-
-       /* Must be last */
-       __NR_FGT_GROUP_IDS__
-};
-
 enum fg_filter_id {
        __NO_FGF__,
        HCRX_FGTnXS,
@@ -1757,6 +1748,28 @@ static __init void print_nv_trap_error(const struct encoding_to_trap_config *tc,
                err);
 }
 
+static u32 encoding_next(u32 encoding)
+{
+       u8 op0, op1, crn, crm, op2;
+
+       op0 = sys_reg_Op0(encoding);
+       op1 = sys_reg_Op1(encoding);
+       crn = sys_reg_CRn(encoding);
+       crm = sys_reg_CRm(encoding);
+       op2 = sys_reg_Op2(encoding);
+
+       if (op2 < Op2_mask)
+               return sys_reg(op0, op1, crn, crm, op2 + 1);
+       if (crm < CRm_mask)
+               return sys_reg(op0, op1, crn, crm + 1, 0);
+       if (crn < CRn_mask)
+               return sys_reg(op0, op1, crn + 1, 0, 0);
+       if (op1 < Op1_mask)
+               return sys_reg(op0, op1 + 1, 0, 0, 0);
+
+       return sys_reg(op0 + 1, 0, 0, 0, 0);
+}
+
 int __init populate_nv_trap_config(void)
 {
        int ret = 0;
@@ -1775,23 +1788,18 @@ int __init populate_nv_trap_config(void)
                        ret = -EINVAL;
                }
 
-               if (cgt->encoding != cgt->end) {
-                       prev = xa_store_range(&sr_forward_xa,
-                                             cgt->encoding, cgt->end,
-                                             xa_mk_value(cgt->tc.val),
-                                             GFP_KERNEL);
-               } else {
-                       prev = xa_store(&sr_forward_xa, cgt->encoding,
+               for (u32 enc = cgt->encoding; enc <= cgt->end; enc = encoding_next(enc)) {
+                       prev = xa_store(&sr_forward_xa, enc,
                                        xa_mk_value(cgt->tc.val), GFP_KERNEL);
                        if (prev && !xa_is_err(prev)) {
                                ret = -EINVAL;
                                print_nv_trap_error(cgt, "Duplicate CGT", ret);
                        }
-               }
 
-               if (xa_is_err(prev)) {
-                       ret = xa_err(prev);
-                       print_nv_trap_error(cgt, "Failed CGT insertion", ret);
+                       if (xa_is_err(prev)) {
+                               ret = xa_err(prev);
+                               print_nv_trap_error(cgt, "Failed CGT insertion", ret);
+                       }
                }
        }
 
@@ -1804,6 +1812,7 @@ int __init populate_nv_trap_config(void)
        for (int i = 0; i < ARRAY_SIZE(encoding_to_fgt); i++) {
                const struct encoding_to_trap_config *fgt = &encoding_to_fgt[i];
                union trap_config tc;
+               void *prev;
 
                if (fgt->tc.fgt >= __NR_FGT_GROUP_IDS__) {
                        ret = -EINVAL;
@@ -1818,8 +1827,13 @@ int __init populate_nv_trap_config(void)
                }
 
                tc.val |= fgt->tc.val;
-               xa_store(&sr_forward_xa, fgt->encoding,
-                        xa_mk_value(tc.val), GFP_KERNEL);
+               prev = xa_store(&sr_forward_xa, fgt->encoding,
+                               xa_mk_value(tc.val), GFP_KERNEL);
+
+               if (xa_is_err(prev)) {
+                       ret = xa_err(prev);
+                       print_nv_trap_error(fgt, "Failed FGT insertion", ret);
+               }
        }
 
        kvm_info("nv: %ld fine grained trap handlers\n",
@@ -1845,6 +1859,38 @@ check_mcb:
        return ret;
 }
 
+int __init populate_sysreg_config(const struct sys_reg_desc *sr,
+                                 unsigned int idx)
+{
+       union trap_config tc;
+       u32 encoding;
+       void *ret;
+
+       /*
+        * 0 is a valid value for the index, but not for the storage.
+        * We'll store (idx+1), so check against an offset'd limit.
+        */
+       if (idx >= (BIT(TC_SRI_BITS) - 1)) {
+               kvm_err("sysreg %s (%d) out of range\n", sr->name, idx);
+               return -EINVAL;
+       }
+
+       encoding = sys_reg(sr->Op0, sr->Op1, sr->CRn, sr->CRm, sr->Op2);
+       tc = get_trap_config(encoding);
+
+       if (tc.sri) {
+               kvm_err("sysreg %s (%d) duplicate entry (%d)\n",
+                       sr->name, idx - 1, tc.sri);
+               return -EINVAL;
+       }
+
+       tc.sri = idx + 1;
+       ret = xa_store(&sr_forward_xa, encoding,
+                      xa_mk_value(tc.val), GFP_KERNEL);
+
+       return xa_err(ret);
+}
+
 static enum trap_behaviour get_behaviour(struct kvm_vcpu *vcpu,
                                         const struct trap_bits *tb)
 {
@@ -1892,20 +1938,64 @@ static enum trap_behaviour compute_trap_behaviour(struct kvm_vcpu *vcpu,
        return __compute_trap_behaviour(vcpu, tc.cgt, b);
 }
 
-static bool check_fgt_bit(u64 val, const union trap_config tc)
+static u64 kvm_get_sysreg_res0(struct kvm *kvm, enum vcpu_sysreg sr)
 {
-       return ((val >> tc.bit) & 1) == tc.pol;
+       struct kvm_sysreg_masks *masks;
+
+       /* Only handle the VNCR-backed regs for now */
+       if (sr < __VNCR_START__)
+               return 0;
+
+       masks = kvm->arch.sysreg_masks;
+
+       return masks->mask[sr - __VNCR_START__].res0;
 }
 
-#define sanitised_sys_reg(vcpu, reg)                   \
-       ({                                              \
-               u64 __val;                              \
-               __val = __vcpu_sys_reg(vcpu, reg);      \
-               __val &= ~__ ## reg ## _RES0;           \
-               (__val);                                \
-       })
+static bool check_fgt_bit(struct kvm *kvm, bool is_read,
+                         u64 val, const union trap_config tc)
+{
+       enum vcpu_sysreg sr;
+
+       if (tc.pol)
+               return (val & BIT(tc.bit));
+
+       /*
+        * FGTs with negative polarities are an absolute nightmare, as
+        * we need to evaluate the bit in the light of the feature
+        * that defines it. WTF were they thinking?
+        *
+        * So let's check if the bit has been earmarked as RES0, as
+        * this indicates an unimplemented feature.
+        */
+       if (val & BIT(tc.bit))
+               return false;
+
+       switch ((enum fgt_group_id)tc.fgt) {
+       case HFGxTR_GROUP:
+               sr = is_read ? HFGRTR_EL2 : HFGWTR_EL2;
+               break;
+
+       case HDFGRTR_GROUP:
+               sr = is_read ? HDFGRTR_EL2 : HDFGWTR_EL2;
+               break;
+
+       case HAFGRTR_GROUP:
+               sr = HAFGRTR_EL2;
+               break;
+
+       case HFGITR_GROUP:
+               sr = HFGITR_EL2;
+               break;
+
+       default:
+               WARN_ONCE(1, "Unhandled FGT group");
+               return false;
+       }
+
+       return !(kvm_get_sysreg_res0(kvm, sr) & BIT(tc.bit));
+}
 
-bool __check_nv_sr_forward(struct kvm_vcpu *vcpu)
+bool triage_sysreg_trap(struct kvm_vcpu *vcpu, int *sr_index)
 {
        union trap_config tc;
        enum trap_behaviour b;
@@ -1913,9 +2003,6 @@ bool __check_nv_sr_forward(struct kvm_vcpu *vcpu)
        u32 sysreg;
        u64 esr, val;
 
-       if (!vcpu_has_nv(vcpu) || is_hyp_ctxt(vcpu))
-               return false;
-
        esr = kvm_vcpu_get_esr(vcpu);
        sysreg = esr_sys64_to_sysreg(esr);
        is_read = (esr & ESR_ELx_SYS64_ISS_DIR_MASK) == ESR_ELx_SYS64_ISS_DIR_READ;
@@ -1926,13 +2013,27 @@ bool __check_nv_sr_forward(struct kvm_vcpu *vcpu)
         * A value of 0 for the whole entry means that we know nothing
         * for this sysreg, and that it cannot be re-injected into the
         * nested hypervisor. In this situation, let's cut it short.
-        *
-        * Note that ultimately, we could also make use of the xarray
-        * to store the index of the sysreg in the local descriptor
-        * array, avoiding another search... Hint, hint...
         */
        if (!tc.val)
-               return false;
+               goto local;
+
+       /*
+        * If a sysreg can be trapped using a FGT, first check whether we
+        * trap for the purpose of forbidding the feature. In that case,
+        * inject an UNDEF.
+        */
+       if (tc.fgt != __NO_FGT_GROUP__ &&
+           (vcpu->kvm->arch.fgu[tc.fgt] & BIT(tc.bit))) {
+               kvm_inject_undefined(vcpu);
+               return true;
+       }
+
+       /*
+        * If we're not nesting, immediately return to the caller, with the
+        * sysreg index, should we have it.
+        */
+       if (!vcpu_has_nv(vcpu) || is_hyp_ctxt(vcpu))
+               goto local;
 
        switch ((enum fgt_group_id)tc.fgt) {
        case __NO_FGT_GROUP__:
@@ -1940,25 +2041,24 @@ bool __check_nv_sr_forward(struct kvm_vcpu *vcpu)
 
        case HFGxTR_GROUP:
                if (is_read)
-                       val = sanitised_sys_reg(vcpu, HFGRTR_EL2);
+                       val = __vcpu_sys_reg(vcpu, HFGRTR_EL2);
                else
-                       val = sanitised_sys_reg(vcpu, HFGWTR_EL2);
+                       val = __vcpu_sys_reg(vcpu, HFGWTR_EL2);
                break;
 
        case HDFGRTR_GROUP:
-       case HDFGWTR_GROUP:
                if (is_read)
-                       val = sanitised_sys_reg(vcpu, HDFGRTR_EL2);
+                       val = __vcpu_sys_reg(vcpu, HDFGRTR_EL2);
                else
-                       val = sanitised_sys_reg(vcpu, HDFGWTR_EL2);
+                       val = __vcpu_sys_reg(vcpu, HDFGWTR_EL2);
                break;
 
        case HAFGRTR_GROUP:
-               val = sanitised_sys_reg(vcpu, HAFGRTR_EL2);
+               val = __vcpu_sys_reg(vcpu, HAFGRTR_EL2);
                break;
 
        case HFGITR_GROUP:
-               val = sanitised_sys_reg(vcpu, HFGITR_EL2);
+               val = __vcpu_sys_reg(vcpu, HFGITR_EL2);
                switch (tc.fgf) {
                        u64 tmp;
 
@@ -1966,7 +2066,7 @@ bool __check_nv_sr_forward(struct kvm_vcpu *vcpu)
                        break;
 
                case HCRX_FGTnXS:
-                       tmp = sanitised_sys_reg(vcpu, HCRX_EL2);
+                       tmp = __vcpu_sys_reg(vcpu, HCRX_EL2);
                        if (tmp & HCRX_EL2_FGTnXS)
                                tc.fgt = __NO_FGT_GROUP__;
                }
@@ -1975,10 +2075,11 @@ bool __check_nv_sr_forward(struct kvm_vcpu *vcpu)
        case __NR_FGT_GROUP_IDS__:
                /* Something is really wrong, bail out */
                WARN_ONCE(1, "__NR_FGT_GROUP_IDS__");
-               return false;
+               goto local;
        }
 
-       if (tc.fgt != __NO_FGT_GROUP__ && check_fgt_bit(val, tc))
+       if (tc.fgt != __NO_FGT_GROUP__ && check_fgt_bit(vcpu->kvm, is_read,
+                                                       val, tc))
                goto inject;
 
        b = compute_trap_behaviour(vcpu, tc);
@@ -1987,6 +2088,26 @@ bool __check_nv_sr_forward(struct kvm_vcpu *vcpu)
            ((b & BEHAVE_FORWARD_WRITE) && !is_read))
                goto inject;
 
+local:
+       if (!tc.sri) {
+               struct sys_reg_params params;
+
+               params = esr_sys64_to_params(esr);
+
+               /*
+                * Check for the IMPDEF range, as per DDI0487 J.a,
+                * D18.3.2 Reserved encodings for IMPLEMENTATION
+                * DEFINED registers.
+                */
+               if (!(params.Op0 == 3 && (params.CRn & 0b1011) == 0b1011))
+                       print_sys_reg_msg(&params,
+                                         "Unsupported guest access at: %lx\n",
+                                         *vcpu_pc(vcpu));
+               kvm_inject_undefined(vcpu);
+               return true;
+       }
+
+       *sr_index = tc.sri - 1;
        return false;
 
 inject:
index e3e611e30e916af5687a60b12c0898dac86cef16..826307e19e3a5d8ba7ed7704a7457e66e19d948b 100644 (file)
@@ -117,7 +117,7 @@ void kvm_arch_vcpu_load_fp(struct kvm_vcpu *vcpu)
 }
 
 /*
- * Called just before entering the guest once we are no longer preemptable
+ * Called just before entering the guest once we are no longer preemptible
  * and interrupts are disabled. If we have managed to run anything using
  * FP while we were preemptible (such as off the back of an interrupt),
  * then neither the host nor the guest own the FP hardware (and it was the
index aaf1d49397392b0b9d1b1284b65354db6f883dce..e2f762d959bb3325173ae05bd9ae93ad5c308cef 100644 (file)
@@ -711,6 +711,7 @@ static int copy_sve_reg_indices(const struct kvm_vcpu *vcpu,
 
 /**
  * kvm_arm_num_regs - how many registers do we present via KVM_GET_ONE_REG
+ * @vcpu: the vCPU pointer
  *
  * This is for all registers.
  */
@@ -729,6 +730,8 @@ unsigned long kvm_arm_num_regs(struct kvm_vcpu *vcpu)
 
 /**
  * kvm_arm_copy_reg_indices - get indices of all registers.
+ * @vcpu: the vCPU pointer
+ * @uindices: register list to copy
  *
  * We do core registers right here, then we append system regs.
  */
@@ -902,8 +905,8 @@ int kvm_arch_vcpu_ioctl_translate(struct kvm_vcpu *vcpu,
 
 /**
  * kvm_arch_vcpu_ioctl_set_guest_debug - set up guest debugging
- * @kvm:       pointer to the KVM struct
- * @kvm_guest_debug: the ioctl data buffer
+ * @vcpu: the vCPU pointer
+ * @dbg: the ioctl data buffer
  *
  * This sets up and enables the VM for guest debugging. Userspace
  * passes in a control flag to enable different debug types and
@@ -1072,7 +1075,7 @@ int kvm_vm_ioctl_mte_copy_tags(struct kvm *kvm,
                } else {
                        /*
                         * Only locking to serialise with a concurrent
-                        * set_pte_at() in the VMM but still overriding the
+                        * __set_ptes() in the VMM but still overriding the
                         * tags, hence ignoring the return value.
                         */
                        try_page_mte_tagging(page);
index f98cbe2626a1cb545da7a71ffd31fb11f0c6e8ae..8d9670e6615dc8767e3b1524e04fcd96ae6f63c8 100644 (file)
@@ -84,7 +84,7 @@ bool kvm_condition_valid32(const struct kvm_vcpu *vcpu)
 }
 
 /**
- * adjust_itstate - adjust ITSTATE when emulating instructions in IT-block
+ * kvm_adjust_itstate - adjust ITSTATE when emulating instructions in IT-block
  * @vcpu:      The VCPU pointer
  *
  * When exceptions occur while instructions are executed in Thumb IF-THEN
@@ -120,7 +120,7 @@ static void kvm_adjust_itstate(struct kvm_vcpu *vcpu)
 }
 
 /**
- * kvm_skip_instr - skip a trapped instruction and proceed to the next
+ * kvm_skip_instr32 - skip a trapped instruction and proceed to the next
  * @vcpu: The vcpu pointer
  */
 void kvm_skip_instr32(struct kvm_vcpu *vcpu)
index a038320cdb089074328a29419a5fa1c121a5052f..e3fcf8c4d5b4d4c847e0dd1522380463a6c92e44 100644 (file)
@@ -79,14 +79,48 @@ static inline void __activate_traps_fpsimd32(struct kvm_vcpu *vcpu)
                clr |= ~hfg & __ ## reg ## _nMASK;                      \
        } while(0)
 
-#define update_fgt_traps_cs(vcpu, reg, clr, set)                       \
+#define reg_to_fgt_group_id(reg)                                       \
+       ({                                                              \
+               enum fgt_group_id id;                                   \
+               switch(reg) {                                           \
+               case HFGRTR_EL2:                                        \
+               case HFGWTR_EL2:                                        \
+                       id = HFGxTR_GROUP;                              \
+                       break;                                          \
+               case HFGITR_EL2:                                        \
+                       id = HFGITR_GROUP;                              \
+                       break;                                          \
+               case HDFGRTR_EL2:                                       \
+               case HDFGWTR_EL2:                                       \
+                       id = HDFGRTR_GROUP;                             \
+                       break;                                          \
+               case HAFGRTR_EL2:                                       \
+                       id = HAFGRTR_GROUP;                             \
+                       break;                                          \
+               default:                                                \
+                       BUILD_BUG_ON(1);                                \
+               }                                                       \
+                                                                       \
+               id;                                                     \
+       })
+
+#define compute_undef_clr_set(vcpu, kvm, reg, clr, set)                        \
+       do {                                                            \
+               u64 hfg = kvm->arch.fgu[reg_to_fgt_group_id(reg)];      \
+               set |= hfg & __ ## reg ## _MASK;                        \
+               clr |= hfg & __ ## reg ## _nMASK;                       \
+       } while(0)
+
+#define update_fgt_traps_cs(hctxt, vcpu, kvm, reg, clr, set)           \
        do {                                                            \
-               struct kvm_cpu_context *hctxt =                         \
-                       &this_cpu_ptr(&kvm_host_data)->host_ctxt;       \
                u64 c = 0, s = 0;                                       \
                                                                        \
                ctxt_sys_reg(hctxt, reg) = read_sysreg_s(SYS_ ## reg);  \
-               compute_clr_set(vcpu, reg, c, s);                       \
+               if (vcpu_has_nv(vcpu) && !is_hyp_ctxt(vcpu))            \
+                       compute_clr_set(vcpu, reg, c, s);               \
+                                                                       \
+               compute_undef_clr_set(vcpu, kvm, reg, c, s);            \
+                                                                       \
                s |= set;                                               \
                c |= clr;                                               \
                if (c || s) {                                           \
@@ -97,8 +131,8 @@ static inline void __activate_traps_fpsimd32(struct kvm_vcpu *vcpu)
                }                                                       \
        } while(0)
 
-#define update_fgt_traps(vcpu, reg)            \
-       update_fgt_traps_cs(vcpu, reg, 0, 0)
+#define update_fgt_traps(hctxt, vcpu, kvm, reg)                \
+       update_fgt_traps_cs(hctxt, vcpu, kvm, reg, 0, 0)
 
 /*
  * Validate the fine grain trap masks.
@@ -122,8 +156,7 @@ static inline bool cpu_has_amu(void)
 static inline void __activate_traps_hfgxtr(struct kvm_vcpu *vcpu)
 {
        struct kvm_cpu_context *hctxt = &this_cpu_ptr(&kvm_host_data)->host_ctxt;
-       u64 r_clr = 0, w_clr = 0, r_set = 0, w_set = 0, tmp;
-       u64 r_val, w_val;
+       struct kvm *kvm = kern_hyp_va(vcpu->kvm);
 
        CHECK_FGT_MASKS(HFGRTR_EL2);
        CHECK_FGT_MASKS(HFGWTR_EL2);
@@ -136,72 +169,45 @@ static inline void __activate_traps_hfgxtr(struct kvm_vcpu *vcpu)
        if (!cpus_have_final_cap(ARM64_HAS_FGT))
                return;
 
-       ctxt_sys_reg(hctxt, HFGRTR_EL2) = read_sysreg_s(SYS_HFGRTR_EL2);
-       ctxt_sys_reg(hctxt, HFGWTR_EL2) = read_sysreg_s(SYS_HFGWTR_EL2);
-
-       if (cpus_have_final_cap(ARM64_SME)) {
-               tmp = HFGxTR_EL2_nSMPRI_EL1_MASK | HFGxTR_EL2_nTPIDR2_EL0_MASK;
-
-               r_clr |= tmp;
-               w_clr |= tmp;
-       }
-
-       /*
-        * Trap guest writes to TCR_EL1 to prevent it from enabling HA or HD.
-        */
-       if (cpus_have_final_cap(ARM64_WORKAROUND_AMPERE_AC03_CPU_38))
-               w_set |= HFGxTR_EL2_TCR_EL1_MASK;
-
-       if (vcpu_has_nv(vcpu) && !is_hyp_ctxt(vcpu)) {
-               compute_clr_set(vcpu, HFGRTR_EL2, r_clr, r_set);
-               compute_clr_set(vcpu, HFGWTR_EL2, w_clr, w_set);
-       }
-
-       /* The default to trap everything not handled or supported in KVM. */
-       tmp = HFGxTR_EL2_nAMAIR2_EL1 | HFGxTR_EL2_nMAIR2_EL1 | HFGxTR_EL2_nS2POR_EL1 |
-             HFGxTR_EL2_nPOR_EL1 | HFGxTR_EL2_nPOR_EL0 | HFGxTR_EL2_nACCDATA_EL1;
-
-       r_val = __HFGRTR_EL2_nMASK & ~tmp;
-       r_val |= r_set;
-       r_val &= ~r_clr;
-
-       w_val = __HFGWTR_EL2_nMASK & ~tmp;
-       w_val |= w_set;
-       w_val &= ~w_clr;
-
-       write_sysreg_s(r_val, SYS_HFGRTR_EL2);
-       write_sysreg_s(w_val, SYS_HFGWTR_EL2);
-
-       if (!vcpu_has_nv(vcpu) || is_hyp_ctxt(vcpu))
-               return;
-
-       update_fgt_traps(vcpu, HFGITR_EL2);
-       update_fgt_traps(vcpu, HDFGRTR_EL2);
-       update_fgt_traps(vcpu, HDFGWTR_EL2);
+       update_fgt_traps(hctxt, vcpu, kvm, HFGRTR_EL2);
+       update_fgt_traps_cs(hctxt, vcpu, kvm, HFGWTR_EL2, 0,
+                           cpus_have_final_cap(ARM64_WORKAROUND_AMPERE_AC03_CPU_38) ?
+                           HFGxTR_EL2_TCR_EL1_MASK : 0);
+       update_fgt_traps(hctxt, vcpu, kvm, HFGITR_EL2);
+       update_fgt_traps(hctxt, vcpu, kvm, HDFGRTR_EL2);
+       update_fgt_traps(hctxt, vcpu, kvm, HDFGWTR_EL2);
 
        if (cpu_has_amu())
-               update_fgt_traps(vcpu, HAFGRTR_EL2);
+               update_fgt_traps(hctxt, vcpu, kvm, HAFGRTR_EL2);
 }
 
+#define __deactivate_fgt(htcxt, vcpu, kvm, reg)                                \
+       do {                                                            \
+               if ((vcpu_has_nv(vcpu) && !is_hyp_ctxt(vcpu)) ||        \
+                   kvm->arch.fgu[reg_to_fgt_group_id(reg)])            \
+                       write_sysreg_s(ctxt_sys_reg(hctxt, reg),        \
+                                      SYS_ ## reg);                    \
+       } while(0)
+
 static inline void __deactivate_traps_hfgxtr(struct kvm_vcpu *vcpu)
 {
        struct kvm_cpu_context *hctxt = &this_cpu_ptr(&kvm_host_data)->host_ctxt;
+       struct kvm *kvm = kern_hyp_va(vcpu->kvm);
 
        if (!cpus_have_final_cap(ARM64_HAS_FGT))
                return;
 
-       write_sysreg_s(ctxt_sys_reg(hctxt, HFGRTR_EL2), SYS_HFGRTR_EL2);
-       write_sysreg_s(ctxt_sys_reg(hctxt, HFGWTR_EL2), SYS_HFGWTR_EL2);
-
-       if (!vcpu_has_nv(vcpu) || is_hyp_ctxt(vcpu))
-               return;
-
-       write_sysreg_s(ctxt_sys_reg(hctxt, HFGITR_EL2), SYS_HFGITR_EL2);
-       write_sysreg_s(ctxt_sys_reg(hctxt, HDFGRTR_EL2), SYS_HDFGRTR_EL2);
-       write_sysreg_s(ctxt_sys_reg(hctxt, HDFGWTR_EL2), SYS_HDFGWTR_EL2);
+       __deactivate_fgt(hctxt, vcpu, kvm, HFGRTR_EL2);
+       if (cpus_have_final_cap(ARM64_WORKAROUND_AMPERE_AC03_CPU_38))
+               write_sysreg_s(ctxt_sys_reg(hctxt, HFGWTR_EL2), SYS_HFGWTR_EL2);
+       else
+               __deactivate_fgt(hctxt, vcpu, kvm, HFGWTR_EL2);
+       __deactivate_fgt(hctxt, vcpu, kvm, HFGITR_EL2);
+       __deactivate_fgt(hctxt, vcpu, kvm, HDFGRTR_EL2);
+       __deactivate_fgt(hctxt, vcpu, kvm, HDFGWTR_EL2);
 
        if (cpu_has_amu())
-               write_sysreg_s(ctxt_sys_reg(hctxt, HAFGRTR_EL2), SYS_HAFGRTR_EL2);
+               __deactivate_fgt(hctxt, vcpu, kvm, HAFGRTR_EL2);
 }
 
 static inline void __activate_traps_common(struct kvm_vcpu *vcpu)
@@ -230,7 +236,7 @@ static inline void __activate_traps_common(struct kvm_vcpu *vcpu)
        write_sysreg(vcpu->arch.mdcr_el2, mdcr_el2);
 
        if (cpus_have_final_cap(ARM64_HAS_HCX)) {
-               u64 hcrx = HCRX_GUEST_FLAGS;
+               u64 hcrx = vcpu->arch.hcrx_el2;
                if (vcpu_has_nv(vcpu) && !is_hyp_ctxt(vcpu)) {
                        u64 clr = 0, set = 0;
 
index bb6b571ec627dede466c5fa1d05785a9c9f78764..4be6a7fa007082ac008c83422e9bb69b0f5da324 100644 (file)
@@ -27,16 +27,34 @@ static inline void __sysreg_save_user_state(struct kvm_cpu_context *ctxt)
        ctxt_sys_reg(ctxt, TPIDRRO_EL0) = read_sysreg(tpidrro_el0);
 }
 
-static inline bool ctxt_has_mte(struct kvm_cpu_context *ctxt)
+static inline struct kvm_vcpu *ctxt_to_vcpu(struct kvm_cpu_context *ctxt)
 {
        struct kvm_vcpu *vcpu = ctxt->__hyp_running_vcpu;
 
        if (!vcpu)
                vcpu = container_of(ctxt, struct kvm_vcpu, arch.ctxt);
 
+       return vcpu;
+}
+
+static inline bool ctxt_has_mte(struct kvm_cpu_context *ctxt)
+{
+       struct kvm_vcpu *vcpu = ctxt_to_vcpu(ctxt);
+
        return kvm_has_mte(kern_hyp_va(vcpu->kvm));
 }
 
+static inline bool ctxt_has_s1pie(struct kvm_cpu_context *ctxt)
+{
+       struct kvm_vcpu *vcpu;
+
+       if (!cpus_have_final_cap(ARM64_HAS_S1PIE))
+               return false;
+
+       vcpu = ctxt_to_vcpu(ctxt);
+       return kvm_has_feat(kern_hyp_va(vcpu->kvm), ID_AA64MMFR3_EL1, S1PIE, IMP);
+}
+
 static inline void __sysreg_save_el1_state(struct kvm_cpu_context *ctxt)
 {
        ctxt_sys_reg(ctxt, SCTLR_EL1)   = read_sysreg_el1(SYS_SCTLR);
@@ -55,7 +73,7 @@ static inline void __sysreg_save_el1_state(struct kvm_cpu_context *ctxt)
        ctxt_sys_reg(ctxt, CONTEXTIDR_EL1) = read_sysreg_el1(SYS_CONTEXTIDR);
        ctxt_sys_reg(ctxt, AMAIR_EL1)   = read_sysreg_el1(SYS_AMAIR);
        ctxt_sys_reg(ctxt, CNTKCTL_EL1) = read_sysreg_el1(SYS_CNTKCTL);
-       if (cpus_have_final_cap(ARM64_HAS_S1PIE)) {
+       if (ctxt_has_s1pie(ctxt)) {
                ctxt_sys_reg(ctxt, PIR_EL1)     = read_sysreg_el1(SYS_PIR);
                ctxt_sys_reg(ctxt, PIRE0_EL1)   = read_sysreg_el1(SYS_PIRE0);
        }
@@ -131,7 +149,7 @@ static inline void __sysreg_restore_el1_state(struct kvm_cpu_context *ctxt)
        write_sysreg_el1(ctxt_sys_reg(ctxt, CONTEXTIDR_EL1), SYS_CONTEXTIDR);
        write_sysreg_el1(ctxt_sys_reg(ctxt, AMAIR_EL1), SYS_AMAIR);
        write_sysreg_el1(ctxt_sys_reg(ctxt, CNTKCTL_EL1), SYS_CNTKCTL);
-       if (cpus_have_final_cap(ARM64_HAS_S1PIE)) {
+       if (ctxt_has_s1pie(ctxt)) {
                write_sysreg_el1(ctxt_sys_reg(ctxt, PIR_EL1),   SYS_PIR);
                write_sysreg_el1(ctxt_sys_reg(ctxt, PIRE0_EL1), SYS_PIRE0);
        }
index 4558c02eb352d06be1c5861ea22a956c2c05954f..7746ea507b6f00ea0214310a5f52c840e3435b11 100644 (file)
@@ -31,8 +31,8 @@ static void __debug_save_spe(u64 *pmscr_el1)
                return;
 
        /* Yes; save the control register and disable data generation */
-       *pmscr_el1 = read_sysreg_s(SYS_PMSCR_EL1);
-       write_sysreg_s(0, SYS_PMSCR_EL1);
+       *pmscr_el1 = read_sysreg_el1(SYS_PMSCR);
+       write_sysreg_el1(0, SYS_PMSCR);
        isb();
 
        /* Now drain all buffered data to memory */
@@ -48,7 +48,7 @@ static void __debug_restore_spe(u64 pmscr_el1)
        isb();
 
        /* Re-enable data generation */
-       write_sysreg_s(pmscr_el1, SYS_PMSCR_EL1);
+       write_sysreg_el1(pmscr_el1, SYS_PMSCR);
 }
 
 static void __debug_save_trace(u64 *trfcr_el1)
@@ -63,8 +63,8 @@ static void __debug_save_trace(u64 *trfcr_el1)
         * Since access to TRFCR_EL1 is trapped, the guest can't
         * modify the filtering set by the host.
         */
-       *trfcr_el1 = read_sysreg_s(SYS_TRFCR_EL1);
-       write_sysreg_s(0, SYS_TRFCR_EL1);
+       *trfcr_el1 = read_sysreg_el1(SYS_TRFCR);
+       write_sysreg_el1(0, SYS_TRFCR);
        isb();
        /* Drain the trace buffer to memory */
        tsb_csync();
@@ -76,7 +76,7 @@ static void __debug_restore_trace(u64 trfcr_el1)
                return;
 
        /* Restore trace filter controls */
-       write_sysreg_s(trfcr_el1, SYS_TRFCR_EL1);
+       write_sysreg_el1(trfcr_el1, SYS_TRFCR);
 }
 
 void __debug_save_host_buffers_nvhe(struct kvm_vcpu *vcpu)
index 7693a6757cd766b4fc166a6d1adf3f2b1a2ad1a3..135cfb294ee51f96c3578450f90daf8925b3a255 100644 (file)
@@ -110,7 +110,7 @@ SYM_FUNC_END(__host_enter)
  *                               u64 elr, u64 par);
  */
 SYM_FUNC_START(__hyp_do_panic)
-       /* Prepare and exit to the host's panic funciton. */
+       /* Prepare and exit to the host's panic function. */
        mov     lr, #(PSR_F_BIT | PSR_I_BIT | PSR_A_BIT | PSR_D_BIT |\
                      PSR_MODE_EL1h)
        msr     spsr_el2, lr
index b01a3d1078a8803f061496044066d749f9881f94..8850b591d775181a9f8545a21f05bb8fdc2b3a21 100644 (file)
@@ -155,7 +155,7 @@ int hyp_back_vmemmap(phys_addr_t back)
                start = hyp_memory[i].base;
                start = ALIGN_DOWN((u64)hyp_phys_to_page(start), PAGE_SIZE);
                /*
-                * The begining of the hyp_vmemmap region for the current
+                * The beginning of the hyp_vmemmap region for the current
                 * memblock may already be backed by the page backing the end
                 * the previous region, so avoid mapping it twice.
                 */
@@ -408,7 +408,7 @@ static void *admit_host_page(void *arg)
        return pop_hyp_memcache(host_mc, hyp_phys_to_virt);
 }
 
-/* Refill our local memcache by poping pages from the one provided by the host. */
+/* Refill our local memcache by popping pages from the one provided by the host. */
 int refill_memcache(struct kvm_hyp_memcache *mc, unsigned long min_pages,
                    struct kvm_hyp_memcache *host_mc)
 {
index ab9d05fcf98b23b992343d6a11a1daf9de806b3a..3fae5830f8d2c72f4ed4032cfd99fd285cbcb885 100644 (file)
@@ -717,15 +717,29 @@ void kvm_tlb_flush_vmid_range(struct kvm_s2_mmu *mmu,
 static int stage2_set_prot_attr(struct kvm_pgtable *pgt, enum kvm_pgtable_prot prot,
                                kvm_pte_t *ptep)
 {
-       bool device = prot & KVM_PGTABLE_PROT_DEVICE;
-       kvm_pte_t attr = device ? KVM_S2_MEMATTR(pgt, DEVICE_nGnRE) :
-                           KVM_S2_MEMATTR(pgt, NORMAL);
+       kvm_pte_t attr;
        u32 sh = KVM_PTE_LEAF_ATTR_LO_S2_SH_IS;
 
+       switch (prot & (KVM_PGTABLE_PROT_DEVICE |
+                       KVM_PGTABLE_PROT_NORMAL_NC)) {
+       case KVM_PGTABLE_PROT_DEVICE | KVM_PGTABLE_PROT_NORMAL_NC:
+               return -EINVAL;
+       case KVM_PGTABLE_PROT_DEVICE:
+               if (prot & KVM_PGTABLE_PROT_X)
+                       return -EINVAL;
+               attr = KVM_S2_MEMATTR(pgt, DEVICE_nGnRE);
+               break;
+       case KVM_PGTABLE_PROT_NORMAL_NC:
+               if (prot & KVM_PGTABLE_PROT_X)
+                       return -EINVAL;
+               attr = KVM_S2_MEMATTR(pgt, NORMAL_NC);
+               break;
+       default:
+               attr = KVM_S2_MEMATTR(pgt, NORMAL);
+       }
+
        if (!(prot & KVM_PGTABLE_PROT_X))
                attr |= KVM_PTE_LEAF_ATTR_HI_S2_XN;
-       else if (device)
-               return -EINVAL;
 
        if (prot & KVM_PGTABLE_PROT_R)
                attr |= KVM_PTE_LEAF_ATTR_LO_S2_S2AP_R;
index 8e1e0d5033b689243186e54b99f581a0fd50cd96..a8b9ea496706df291b479b12bf9e7ffed6aa204d 100644 (file)
@@ -95,7 +95,7 @@ void __vcpu_load_switch_sysregs(struct kvm_vcpu *vcpu)
 }
 
 /**
- * __vcpu_put_switch_syregs - Restore host system registers to the physical CPU
+ * __vcpu_put_switch_sysregs - Restore host system registers to the physical CPU
  *
  * @vcpu: The VCPU pointer
  *
index 0bd93a5f21ce382506803d018ba7df7072ac0aa1..a640e839848e601f622c7eeacec7b3600608dbbd 100644 (file)
@@ -134,7 +134,7 @@ static void inject_abt32(struct kvm_vcpu *vcpu, bool is_pabt, u32 addr)
        if (vcpu_read_sys_reg(vcpu, TCR_EL1) & TTBCR_EAE) {
                fsr = DFSR_LPAE | DFSR_FSC_EXTABT_LPAE;
        } else {
-               /* no need to shuffle FS[4] into DFSR[10] as its 0 */
+               /* no need to shuffle FS[4] into DFSR[10] as it's 0 */
                fsr = DFSR_FSC_EXTABT_nLPAE;
        }
 
index cd9456a03e3860eec81bdd12da69dc2725cca807..18680771cdb0ea4c9ee2fcea29d1219189fda752 100644 (file)
@@ -305,7 +305,7 @@ static void invalidate_icache_guest_page(void *va, size_t size)
  * does.
  */
 /**
- * unmap_stage2_range -- Clear stage2 page table entries to unmap a range
+ * __unmap_stage2_range -- Clear stage2 page table entries to unmap a range
  * @mmu:   The KVM stage-2 MMU pointer
  * @start: The intermediate physical base address of the range to unmap
  * @size:  The size of the area to unmap
@@ -1381,7 +1381,7 @@ static int user_mem_abort(struct kvm_vcpu *vcpu, phys_addr_t fault_ipa,
        int ret = 0;
        bool write_fault, writable, force_pte = false;
        bool exec_fault, mte_allowed;
-       bool device = false;
+       bool device = false, vfio_allow_any_uc = false;
        unsigned long mmu_seq;
        struct kvm *kvm = vcpu->kvm;
        struct kvm_mmu_memory_cache *memcache = &vcpu->arch.mmu_page_cache;
@@ -1472,6 +1472,8 @@ static int user_mem_abort(struct kvm_vcpu *vcpu, phys_addr_t fault_ipa,
        gfn = fault_ipa >> PAGE_SHIFT;
        mte_allowed = kvm_vma_mte_allowed(vma);
 
+       vfio_allow_any_uc = vma->vm_flags & VM_ALLOW_ANY_UNCACHED;
+
        /* Don't use the VMA after the unlock -- it may have vanished */
        vma = NULL;
 
@@ -1557,10 +1559,14 @@ static int user_mem_abort(struct kvm_vcpu *vcpu, phys_addr_t fault_ipa,
        if (exec_fault)
                prot |= KVM_PGTABLE_PROT_X;
 
-       if (device)
-               prot |= KVM_PGTABLE_PROT_DEVICE;
-       else if (cpus_have_final_cap(ARM64_HAS_CACHE_DIC))
+       if (device) {
+               if (vfio_allow_any_uc)
+                       prot |= KVM_PGTABLE_PROT_NORMAL_NC;
+               else
+                       prot |= KVM_PGTABLE_PROT_DEVICE;
+       } else if (cpus_have_final_cap(ARM64_HAS_CACHE_DIC)) {
                prot |= KVM_PGTABLE_PROT_X;
+       }
 
        /*
         * Under the premise of getting a FSC_PERM fault, we just need to relax
index ba95d044bc98fd397d2b59e48aeb92d56a39c656..ced30c90521a02713a4e0e06c1d7b1430df55ea6 100644 (file)
@@ -133,6 +133,13 @@ static u64 limit_nv_id_reg(u32 id, u64 val)
                val |= FIELD_PREP(NV_FTR(MMFR2, TTL), 0b0001);
                break;
 
+       case SYS_ID_AA64MMFR4_EL1:
+               val = 0;
+               if (!cpus_have_final_cap(ARM64_HAS_HCR_NV1))
+                       val |= FIELD_PREP(NV_FTR(MMFR4, E2H0),
+                                         ID_AA64MMFR4_EL1_E2H0_NI_NV1);
+               break;
+
        case SYS_ID_AA64DFR0_EL1:
                /* Only limited support for PMU, Debug, BPs and WPs */
                val &= (NV_FTR(DFR0, PMUVer)    |
@@ -156,15 +163,280 @@ static u64 limit_nv_id_reg(u32 id, u64 val)
 
        return val;
 }
+
+u64 kvm_vcpu_sanitise_vncr_reg(const struct kvm_vcpu *vcpu, enum vcpu_sysreg sr)
+{
+       u64 v = ctxt_sys_reg(&vcpu->arch.ctxt, sr);
+       struct kvm_sysreg_masks *masks;
+
+       masks = vcpu->kvm->arch.sysreg_masks;
+
+       if (masks) {
+               sr -= __VNCR_START__;
+
+               v &= ~masks->mask[sr].res0;
+               v |= masks->mask[sr].res1;
+       }
+
+       return v;
+}
+
+static void set_sysreg_masks(struct kvm *kvm, int sr, u64 res0, u64 res1)
+{
+       int i = sr - __VNCR_START__;
+
+       kvm->arch.sysreg_masks->mask[i].res0 = res0;
+       kvm->arch.sysreg_masks->mask[i].res1 = res1;
+}
+
 int kvm_init_nv_sysregs(struct kvm *kvm)
 {
+       u64 res0, res1;
+       int ret = 0;
+
        mutex_lock(&kvm->arch.config_lock);
 
+       if (kvm->arch.sysreg_masks)
+               goto out;
+
+       kvm->arch.sysreg_masks = kzalloc(sizeof(*(kvm->arch.sysreg_masks)),
+                                        GFP_KERNEL);
+       if (!kvm->arch.sysreg_masks) {
+               ret = -ENOMEM;
+               goto out;
+       }
+
        for (int i = 0; i < KVM_ARM_ID_REG_NUM; i++)
                kvm->arch.id_regs[i] = limit_nv_id_reg(IDX_IDREG(i),
                                                       kvm->arch.id_regs[i]);
 
+       /* VTTBR_EL2 */
+       res0 = res1 = 0;
+       if (!kvm_has_feat_enum(kvm, ID_AA64MMFR1_EL1, VMIDBits, 16))
+               res0 |= GENMASK(63, 56);
+       if (!kvm_has_feat(kvm, ID_AA64MMFR2_EL1, CnP, IMP))
+               res0 |= VTTBR_CNP_BIT;
+       set_sysreg_masks(kvm, VTTBR_EL2, res0, res1);
+
+       /* VTCR_EL2 */
+       res0 = GENMASK(63, 32) | GENMASK(30, 20);
+       res1 = BIT(31);
+       set_sysreg_masks(kvm, VTCR_EL2, res0, res1);
+
+       /* VMPIDR_EL2 */
+       res0 = GENMASK(63, 40) | GENMASK(30, 24);
+       res1 = BIT(31);
+       set_sysreg_masks(kvm, VMPIDR_EL2, res0, res1);
+
+       /* HCR_EL2 */
+       res0 = BIT(48);
+       res1 = HCR_RW;
+       if (!kvm_has_feat(kvm, ID_AA64MMFR1_EL1, TWED, IMP))
+               res0 |= GENMASK(63, 59);
+       if (!kvm_has_feat(kvm, ID_AA64PFR1_EL1, MTE, MTE2))
+               res0 |= (HCR_TID5 | HCR_DCT | HCR_ATA);
+       if (!kvm_has_feat(kvm, ID_AA64MMFR2_EL1, EVT, TTLBxS))
+               res0 |= (HCR_TTLBIS | HCR_TTLBOS);
+       if (!kvm_has_feat(kvm, ID_AA64PFR0_EL1, CSV2, CSV2_2) &&
+           !kvm_has_feat(kvm, ID_AA64PFR1_EL1, CSV2_frac, CSV2_1p2))
+               res0 |= HCR_ENSCXT;
+       if (!kvm_has_feat(kvm, ID_AA64MMFR2_EL1, EVT, IMP))
+               res0 |= (HCR_TOCU | HCR_TICAB | HCR_TID4);
+       if (!kvm_has_feat(kvm, ID_AA64PFR0_EL1, AMU, V1P1))
+               res0 |= HCR_AMVOFFEN;
+       if (!kvm_has_feat(kvm, ID_AA64PFR0_EL1, RAS, V1P1))
+               res0 |= HCR_FIEN;
+       if (!kvm_has_feat(kvm, ID_AA64MMFR2_EL1, FWB, IMP))
+               res0 |= HCR_FWB;
+       if (!kvm_has_feat(kvm, ID_AA64MMFR2_EL1, NV, NV2))
+               res0 |= HCR_NV2;
+       if (!kvm_has_feat(kvm, ID_AA64MMFR2_EL1, NV, IMP))
+               res0 |= (HCR_AT | HCR_NV1 | HCR_NV);
+       if (!(__vcpu_has_feature(&kvm->arch, KVM_ARM_VCPU_PTRAUTH_ADDRESS) &&
+             __vcpu_has_feature(&kvm->arch, KVM_ARM_VCPU_PTRAUTH_GENERIC)))
+               res0 |= (HCR_API | HCR_APK);
+       if (!kvm_has_feat(kvm, ID_AA64ISAR0_EL1, TME, IMP))
+               res0 |= BIT(39);
+       if (!kvm_has_feat(kvm, ID_AA64PFR0_EL1, RAS, IMP))
+               res0 |= (HCR_TEA | HCR_TERR);
+       if (!kvm_has_feat(kvm, ID_AA64MMFR1_EL1, LO, IMP))
+               res0 |= HCR_TLOR;
+       if (!kvm_has_feat(kvm, ID_AA64MMFR4_EL1, E2H0, IMP))
+               res1 |= HCR_E2H;
+       set_sysreg_masks(kvm, HCR_EL2, res0, res1);
+
+       /* HCRX_EL2 */
+       res0 = HCRX_EL2_RES0;
+       res1 = HCRX_EL2_RES1;
+       if (!kvm_has_feat(kvm, ID_AA64ISAR3_EL1, PACM, TRIVIAL_IMP))
+               res0 |= HCRX_EL2_PACMEn;
+       if (!kvm_has_feat(kvm, ID_AA64PFR2_EL1, FPMR, IMP))
+               res0 |= HCRX_EL2_EnFPM;
+       if (!kvm_has_feat(kvm, ID_AA64PFR1_EL1, GCS, IMP))
+               res0 |= HCRX_EL2_GCSEn;
+       if (!kvm_has_feat(kvm, ID_AA64ISAR2_EL1, SYSREG_128, IMP))
+               res0 |= HCRX_EL2_EnIDCP128;
+       if (!kvm_has_feat(kvm, ID_AA64MMFR3_EL1, ADERR, DEV_ASYNC))
+               res0 |= (HCRX_EL2_EnSDERR | HCRX_EL2_EnSNERR);
+       if (!kvm_has_feat(kvm, ID_AA64PFR1_EL1, DF2, IMP))
+               res0 |= HCRX_EL2_TMEA;
+       if (!kvm_has_feat(kvm, ID_AA64MMFR3_EL1, D128, IMP))
+               res0 |= HCRX_EL2_D128En;
+       if (!kvm_has_feat(kvm, ID_AA64PFR1_EL1, THE, IMP))
+               res0 |= HCRX_EL2_PTTWI;
+       if (!kvm_has_feat(kvm, ID_AA64MMFR3_EL1, SCTLRX, IMP))
+               res0 |= HCRX_EL2_SCTLR2En;
+       if (!kvm_has_feat(kvm, ID_AA64MMFR3_EL1, TCRX, IMP))
+               res0 |= HCRX_EL2_TCR2En;
+       if (!kvm_has_feat(kvm, ID_AA64ISAR2_EL1, MOPS, IMP))
+               res0 |= (HCRX_EL2_MSCEn | HCRX_EL2_MCE2);
+       if (!kvm_has_feat(kvm, ID_AA64MMFR1_EL1, CMOW, IMP))
+               res0 |= HCRX_EL2_CMOW;
+       if (!kvm_has_feat(kvm, ID_AA64PFR1_EL1, NMI, IMP))
+               res0 |= (HCRX_EL2_VFNMI | HCRX_EL2_VINMI | HCRX_EL2_TALLINT);
+       if (!kvm_has_feat(kvm, ID_AA64PFR1_EL1, SME, IMP) ||
+           !(read_sysreg_s(SYS_SMIDR_EL1) & SMIDR_EL1_SMPS))
+               res0 |= HCRX_EL2_SMPME;
+       if (!kvm_has_feat(kvm, ID_AA64ISAR1_EL1, XS, IMP))
+               res0 |= (HCRX_EL2_FGTnXS | HCRX_EL2_FnXS);
+       if (!kvm_has_feat(kvm, ID_AA64ISAR1_EL1, LS64, LS64_V))
+               res0 |= HCRX_EL2_EnASR;
+       if (!kvm_has_feat(kvm, ID_AA64ISAR1_EL1, LS64, LS64))
+               res0 |= HCRX_EL2_EnALS;
+       if (!kvm_has_feat(kvm, ID_AA64ISAR1_EL1, LS64, LS64_ACCDATA))
+               res0 |= HCRX_EL2_EnAS0;
+       set_sysreg_masks(kvm, HCRX_EL2, res0, res1);
+
+       /* HFG[RW]TR_EL2 */
+       res0 = res1 = 0;
+       if (!(__vcpu_has_feature(&kvm->arch, KVM_ARM_VCPU_PTRAUTH_ADDRESS) &&
+             __vcpu_has_feature(&kvm->arch, KVM_ARM_VCPU_PTRAUTH_GENERIC)))
+               res0 |= (HFGxTR_EL2_APDAKey | HFGxTR_EL2_APDBKey |
+                        HFGxTR_EL2_APGAKey | HFGxTR_EL2_APIAKey |
+                        HFGxTR_EL2_APIBKey);
+       if (!kvm_has_feat(kvm, ID_AA64MMFR1_EL1, LO, IMP))
+               res0 |= (HFGxTR_EL2_LORC_EL1 | HFGxTR_EL2_LOREA_EL1 |
+                        HFGxTR_EL2_LORID_EL1 | HFGxTR_EL2_LORN_EL1 |
+                        HFGxTR_EL2_LORSA_EL1);
+       if (!kvm_has_feat(kvm, ID_AA64PFR0_EL1, CSV2, CSV2_2) &&
+           !kvm_has_feat(kvm, ID_AA64PFR1_EL1, CSV2_frac, CSV2_1p2))
+               res0 |= (HFGxTR_EL2_SCXTNUM_EL1 | HFGxTR_EL2_SCXTNUM_EL0);
+       if (!kvm_has_feat(kvm, ID_AA64PFR0_EL1, GIC, IMP))
+               res0 |= HFGxTR_EL2_ICC_IGRPENn_EL1;
+       if (!kvm_has_feat(kvm, ID_AA64PFR0_EL1, RAS, IMP))
+               res0 |= (HFGxTR_EL2_ERRIDR_EL1 | HFGxTR_EL2_ERRSELR_EL1 |
+                        HFGxTR_EL2_ERXFR_EL1 | HFGxTR_EL2_ERXCTLR_EL1 |
+                        HFGxTR_EL2_ERXSTATUS_EL1 | HFGxTR_EL2_ERXMISCn_EL1 |
+                        HFGxTR_EL2_ERXPFGF_EL1 | HFGxTR_EL2_ERXPFGCTL_EL1 |
+                        HFGxTR_EL2_ERXPFGCDN_EL1 | HFGxTR_EL2_ERXADDR_EL1);
+       if (!kvm_has_feat(kvm, ID_AA64ISAR1_EL1, LS64, LS64_ACCDATA))
+               res0 |= HFGxTR_EL2_nACCDATA_EL1;
+       if (!kvm_has_feat(kvm, ID_AA64PFR1_EL1, GCS, IMP))
+               res0 |= (HFGxTR_EL2_nGCS_EL0 | HFGxTR_EL2_nGCS_EL1);
+       if (!kvm_has_feat(kvm, ID_AA64PFR1_EL1, SME, IMP))
+               res0 |= (HFGxTR_EL2_nSMPRI_EL1 | HFGxTR_EL2_nTPIDR2_EL0);
+       if (!kvm_has_feat(kvm, ID_AA64PFR1_EL1, THE, IMP))
+               res0 |= HFGxTR_EL2_nRCWMASK_EL1;
+       if (!kvm_has_feat(kvm, ID_AA64MMFR3_EL1, S1PIE, IMP))
+               res0 |= (HFGxTR_EL2_nPIRE0_EL1 | HFGxTR_EL2_nPIR_EL1);
+       if (!kvm_has_feat(kvm, ID_AA64MMFR3_EL1, S1POE, IMP))
+               res0 |= (HFGxTR_EL2_nPOR_EL0 | HFGxTR_EL2_nPOR_EL1);
+       if (!kvm_has_feat(kvm, ID_AA64MMFR3_EL1, S2POE, IMP))
+               res0 |= HFGxTR_EL2_nS2POR_EL1;
+       if (!kvm_has_feat(kvm, ID_AA64MMFR3_EL1, AIE, IMP))
+               res0 |= (HFGxTR_EL2_nMAIR2_EL1 | HFGxTR_EL2_nAMAIR2_EL1);
+       set_sysreg_masks(kvm, HFGRTR_EL2, res0 | __HFGRTR_EL2_RES0, res1);
+       set_sysreg_masks(kvm, HFGWTR_EL2, res0 | __HFGWTR_EL2_RES0, res1);
+
+       /* HDFG[RW]TR_EL2 */
+       res0 = res1 = 0;
+       if (!kvm_has_feat(kvm, ID_AA64DFR0_EL1, DoubleLock, IMP))
+               res0 |= HDFGRTR_EL2_OSDLR_EL1;
+       if (!kvm_has_feat(kvm, ID_AA64DFR0_EL1, PMUVer, IMP))
+               res0 |= (HDFGRTR_EL2_PMEVCNTRn_EL0 | HDFGRTR_EL2_PMEVTYPERn_EL0 |
+                        HDFGRTR_EL2_PMCCFILTR_EL0 | HDFGRTR_EL2_PMCCNTR_EL0 |
+                        HDFGRTR_EL2_PMCNTEN | HDFGRTR_EL2_PMINTEN |
+                        HDFGRTR_EL2_PMOVS | HDFGRTR_EL2_PMSELR_EL0 |
+                        HDFGRTR_EL2_PMMIR_EL1 | HDFGRTR_EL2_PMUSERENR_EL0 |
+                        HDFGRTR_EL2_PMCEIDn_EL0);
+       if (!kvm_has_feat(kvm, ID_AA64DFR0_EL1, PMSVer, IMP))
+               res0 |= (HDFGRTR_EL2_PMBLIMITR_EL1 | HDFGRTR_EL2_PMBPTR_EL1 |
+                        HDFGRTR_EL2_PMBSR_EL1 | HDFGRTR_EL2_PMSCR_EL1 |
+                        HDFGRTR_EL2_PMSEVFR_EL1 | HDFGRTR_EL2_PMSFCR_EL1 |
+                        HDFGRTR_EL2_PMSICR_EL1 | HDFGRTR_EL2_PMSIDR_EL1 |
+                        HDFGRTR_EL2_PMSIRR_EL1 | HDFGRTR_EL2_PMSLATFR_EL1 |
+                        HDFGRTR_EL2_PMBIDR_EL1);
+       if (!kvm_has_feat(kvm, ID_AA64DFR0_EL1, TraceVer, IMP))
+               res0 |= (HDFGRTR_EL2_TRC | HDFGRTR_EL2_TRCAUTHSTATUS |
+                        HDFGRTR_EL2_TRCAUXCTLR | HDFGRTR_EL2_TRCCLAIM |
+                        HDFGRTR_EL2_TRCCNTVRn | HDFGRTR_EL2_TRCID |
+                        HDFGRTR_EL2_TRCIMSPECn | HDFGRTR_EL2_TRCOSLSR |
+                        HDFGRTR_EL2_TRCPRGCTLR | HDFGRTR_EL2_TRCSEQSTR |
+                        HDFGRTR_EL2_TRCSSCSRn | HDFGRTR_EL2_TRCSTATR |
+                        HDFGRTR_EL2_TRCVICTLR);
+       if (!kvm_has_feat(kvm, ID_AA64DFR0_EL1, TraceBuffer, IMP))
+               res0 |= (HDFGRTR_EL2_TRBBASER_EL1 | HDFGRTR_EL2_TRBIDR_EL1 |
+                        HDFGRTR_EL2_TRBLIMITR_EL1 | HDFGRTR_EL2_TRBMAR_EL1 |
+                        HDFGRTR_EL2_TRBPTR_EL1 | HDFGRTR_EL2_TRBSR_EL1 |
+                        HDFGRTR_EL2_TRBTRG_EL1);
+       if (!kvm_has_feat(kvm, ID_AA64DFR0_EL1, BRBE, IMP))
+               res0 |= (HDFGRTR_EL2_nBRBIDR | HDFGRTR_EL2_nBRBCTL |
+                        HDFGRTR_EL2_nBRBDATA);
+       if (!kvm_has_feat(kvm, ID_AA64DFR0_EL1, PMSVer, V1P2))
+               res0 |= HDFGRTR_EL2_nPMSNEVFR_EL1;
+       set_sysreg_masks(kvm, HDFGRTR_EL2, res0 | HDFGRTR_EL2_RES0, res1);
+
+       /* Reuse the bits from the read-side and add the write-specific stuff */
+       if (!kvm_has_feat(kvm, ID_AA64DFR0_EL1, PMUVer, IMP))
+               res0 |= (HDFGWTR_EL2_PMCR_EL0 | HDFGWTR_EL2_PMSWINC_EL0);
+       if (!kvm_has_feat(kvm, ID_AA64DFR0_EL1, TraceVer, IMP))
+               res0 |= HDFGWTR_EL2_TRCOSLAR;
+       if (!kvm_has_feat(kvm, ID_AA64DFR0_EL1, TraceFilt, IMP))
+               res0 |= HDFGWTR_EL2_TRFCR_EL1;
+       set_sysreg_masks(kvm, HFGWTR_EL2, res0 | HDFGWTR_EL2_RES0, res1);
+
+       /* HFGITR_EL2 */
+       res0 = HFGITR_EL2_RES0;
+       res1 = HFGITR_EL2_RES1;
+       if (!kvm_has_feat(kvm, ID_AA64ISAR1_EL1, DPB, DPB2))
+               res0 |= HFGITR_EL2_DCCVADP;
+       if (!kvm_has_feat(kvm, ID_AA64MMFR1_EL1, PAN, PAN2))
+               res0 |= (HFGITR_EL2_ATS1E1RP | HFGITR_EL2_ATS1E1WP);
+       if (!kvm_has_feat(kvm, ID_AA64ISAR0_EL1, TLB, OS))
+               res0 |= (HFGITR_EL2_TLBIRVAALE1OS | HFGITR_EL2_TLBIRVALE1OS |
+                        HFGITR_EL2_TLBIRVAAE1OS | HFGITR_EL2_TLBIRVAE1OS |
+                        HFGITR_EL2_TLBIVAALE1OS | HFGITR_EL2_TLBIVALE1OS |
+                        HFGITR_EL2_TLBIVAAE1OS | HFGITR_EL2_TLBIASIDE1OS |
+                        HFGITR_EL2_TLBIVAE1OS | HFGITR_EL2_TLBIVMALLE1OS);
+       if (!kvm_has_feat(kvm, ID_AA64ISAR0_EL1, TLB, RANGE))
+               res0 |= (HFGITR_EL2_TLBIRVAALE1 | HFGITR_EL2_TLBIRVALE1 |
+                        HFGITR_EL2_TLBIRVAAE1 | HFGITR_EL2_TLBIRVAE1 |
+                        HFGITR_EL2_TLBIRVAALE1IS | HFGITR_EL2_TLBIRVALE1IS |
+                        HFGITR_EL2_TLBIRVAAE1IS | HFGITR_EL2_TLBIRVAE1IS |
+                        HFGITR_EL2_TLBIRVAALE1OS | HFGITR_EL2_TLBIRVALE1OS |
+                        HFGITR_EL2_TLBIRVAAE1OS | HFGITR_EL2_TLBIRVAE1OS);
+       if (!kvm_has_feat(kvm, ID_AA64ISAR1_EL1, SPECRES, IMP))
+               res0 |= (HFGITR_EL2_CFPRCTX | HFGITR_EL2_DVPRCTX |
+                        HFGITR_EL2_CPPRCTX);
+       if (!kvm_has_feat(kvm, ID_AA64DFR0_EL1, BRBE, IMP))
+               res0 |= (HFGITR_EL2_nBRBINJ | HFGITR_EL2_nBRBIALL);
+       if (!kvm_has_feat(kvm, ID_AA64PFR1_EL1, GCS, IMP))
+               res0 |= (HFGITR_EL2_nGCSPUSHM_EL1 | HFGITR_EL2_nGCSSTR_EL1 |
+                        HFGITR_EL2_nGCSEPP);
+       if (!kvm_has_feat(kvm, ID_AA64ISAR1_EL1, SPECRES, COSP_RCTX))
+               res0 |= HFGITR_EL2_COSPRCTX;
+       if (!kvm_has_feat(kvm, ID_AA64ISAR2_EL1, ATS1A, IMP))
+               res0 |= HFGITR_EL2_ATS1E1A;
+       set_sysreg_masks(kvm, HFGITR_EL2, res0, res1);
+
+       /* HAFGRTR_EL2 - not a lot to see here */
+       res0 = HAFGRTR_EL2_RES0;
+       res1 = HAFGRTR_EL2_RES1;
+       if (!kvm_has_feat(kvm, ID_AA64PFR0_EL1, AMU, V1P1))
+               res0 |= ~(res0 | res1);
+       set_sysreg_masks(kvm, HAFGRTR_EL2, res0, res1);
+out:
        mutex_unlock(&kvm->arch.config_lock);
 
-       return 0;
+       return ret;
 }
index 3d9467ff73bcbff82f8f22f51dd0784a83cecede..a35ce10e0a9f3efe554cbf4b1d11fc2300ed54e4 100644 (file)
@@ -64,12 +64,11 @@ u64 kvm_pmu_evtyper_mask(struct kvm *kvm)
 {
        u64 mask = ARMV8_PMU_EXCLUDE_EL1 | ARMV8_PMU_EXCLUDE_EL0 |
                   kvm_pmu_event_mask(kvm);
-       u64 pfr0 = IDREG(kvm, SYS_ID_AA64PFR0_EL1);
 
-       if (SYS_FIELD_GET(ID_AA64PFR0_EL1, EL2, pfr0))
+       if (kvm_has_feat(kvm, ID_AA64PFR0_EL1, EL2, IMP))
                mask |= ARMV8_PMU_INCLUDE_EL2;
 
-       if (SYS_FIELD_GET(ID_AA64PFR0_EL1, EL3, pfr0))
+       if (kvm_has_feat(kvm, ID_AA64PFR0_EL1, EL3, IMP))
                mask |= ARMV8_PMU_EXCLUDE_NS_EL0 |
                        ARMV8_PMU_EXCLUDE_NS_EL1 |
                        ARMV8_PMU_EXCLUDE_EL3;
@@ -83,8 +82,10 @@ u64 kvm_pmu_evtyper_mask(struct kvm *kvm)
  */
 static bool kvm_pmc_is_64bit(struct kvm_pmc *pmc)
 {
+       struct kvm_vcpu *vcpu = kvm_pmc_to_vcpu(pmc);
+
        return (pmc->idx == ARMV8_PMU_CYCLE_IDX ||
-               kvm_pmu_is_3p5(kvm_pmc_to_vcpu(pmc)));
+               kvm_has_feat(vcpu->kvm, ID_AA64DFR0_EL1, PMUVer, V3P5));
 }
 
 static bool kvm_pmc_has_64bit_overflow(struct kvm_pmc *pmc)
@@ -419,7 +420,7 @@ void kvm_pmu_sync_hwstate(struct kvm_vcpu *vcpu)
        kvm_pmu_update_state(vcpu);
 }
 
-/**
+/*
  * When perf interrupt is an NMI, we cannot safely notify the vcpu corresponding
  * to the event.
  * This is why we need a callback to do it once outside of the NMI context.
@@ -490,7 +491,7 @@ static u64 compute_period(struct kvm_pmc *pmc, u64 counter)
        return val;
 }
 
-/**
+/*
  * When the perf event overflows, set the overflow status and inform the vcpu.
  */
 static void kvm_pmu_perf_overflow(struct perf_event *perf_event,
@@ -556,7 +557,7 @@ void kvm_pmu_handle_pmcr(struct kvm_vcpu *vcpu, u64 val)
                return;
 
        /* Fixup PMCR_EL0 to reconcile the PMU version and the LP bit */
-       if (!kvm_pmu_is_3p5(vcpu))
+       if (!kvm_has_feat(vcpu->kvm, ID_AA64DFR0_EL1, PMUVer, V3P5))
                val &= ~ARMV8_PMU_PMCR_LP;
 
        /* The reset bits don't indicate any state, and shouldn't be saved. */
index 30253bd19917f46640ecffc914d91a7d1102dc81..c9f4f387155f8cc0097f03a8366105682a81f1a0 100644 (file)
@@ -12,6 +12,7 @@
 #include <linux/bitfield.h>
 #include <linux/bsearch.h>
 #include <linux/cacheinfo.h>
+#include <linux/debugfs.h>
 #include <linux/kvm_host.h>
 #include <linux/mm.h>
 #include <linux/printk.h>
@@ -505,10 +506,9 @@ static bool trap_loregion(struct kvm_vcpu *vcpu,
                          struct sys_reg_params *p,
                          const struct sys_reg_desc *r)
 {
-       u64 val = IDREG(vcpu->kvm, SYS_ID_AA64MMFR1_EL1);
        u32 sr = reg_to_encoding(r);
 
-       if (!(val & (0xfUL << ID_AA64MMFR1_EL1_LO_SHIFT))) {
+       if (!kvm_has_feat(vcpu->kvm, ID_AA64MMFR1_EL1, LO, IMP)) {
                kvm_inject_undefined(vcpu);
                return false;
        }
@@ -1685,7 +1685,8 @@ static u64 read_sanitised_id_aa64pfr0_el1(struct kvm_vcpu *vcpu,
        u64 __f_val = FIELD_GET(reg##_##field##_MASK, val);                    \
        (val) &= ~reg##_##field##_MASK;                                        \
        (val) |= FIELD_PREP(reg##_##field##_MASK,                              \
-                       min(__f_val, (u64)reg##_##field##_##limit));           \
+                           min(__f_val,                                       \
+                               (u64)SYS_FIELD_VALUE(reg, field, limit)));     \
        (val);                                                                 \
 })
 
@@ -2174,6 +2175,16 @@ static bool access_spsr(struct kvm_vcpu *vcpu,
        return true;
 }
 
+static u64 reset_hcr(struct kvm_vcpu *vcpu, const struct sys_reg_desc *r)
+{
+       u64 val = r->val;
+
+       if (!cpus_have_final_cap(ARM64_HAS_HCR_NV1))
+               val |= HCR_E2H;
+
+       return __vcpu_sys_reg(vcpu, r->reg) = val;
+}
+
 /*
  * Architected system registers.
  * Important: Must be sorted ascending by Op0, Op1, CRn, CRm, Op2
@@ -2186,16 +2197,6 @@ static bool access_spsr(struct kvm_vcpu *vcpu,
  * guest...
  */
 static const struct sys_reg_desc sys_reg_descs[] = {
-       { SYS_DESC(SYS_DC_ISW), access_dcsw },
-       { SYS_DESC(SYS_DC_IGSW), access_dcgsw },
-       { SYS_DESC(SYS_DC_IGDSW), access_dcgsw },
-       { SYS_DESC(SYS_DC_CSW), access_dcsw },
-       { SYS_DESC(SYS_DC_CGSW), access_dcgsw },
-       { SYS_DESC(SYS_DC_CGDSW), access_dcgsw },
-       { SYS_DESC(SYS_DC_CISW), access_dcsw },
-       { SYS_DESC(SYS_DC_CIGSW), access_dcgsw },
-       { SYS_DESC(SYS_DC_CIGDSW), access_dcgsw },
-
        DBG_BCR_BVR_WCR_WVR_EL1(0),
        DBG_BCR_BVR_WCR_WVR_EL1(1),
        { SYS_DESC(SYS_MDCCINT_EL1), trap_debug_regs, reset_val, MDCCINT_EL1, 0 },
@@ -2349,7 +2350,7 @@ static const struct sys_reg_desc sys_reg_descs[] = {
                                        ID_AA64MMFR2_EL1_NV |
                                        ID_AA64MMFR2_EL1_CCIDX)),
        ID_SANITISED(ID_AA64MMFR3_EL1),
-       ID_UNALLOCATED(7,4),
+       ID_SANITISED(ID_AA64MMFR4_EL1),
        ID_UNALLOCATED(7,5),
        ID_UNALLOCATED(7,6),
        ID_UNALLOCATED(7,7),
@@ -2665,7 +2666,7 @@ static const struct sys_reg_desc sys_reg_descs[] = {
        EL2_REG_VNCR(VMPIDR_EL2, reset_unknown, 0),
        EL2_REG(SCTLR_EL2, access_rw, reset_val, SCTLR_EL2_RES1),
        EL2_REG(ACTLR_EL2, access_rw, reset_val, 0),
-       EL2_REG_VNCR(HCR_EL2, reset_val, 0),
+       EL2_REG_VNCR(HCR_EL2, reset_hcr, 0),
        EL2_REG(MDCR_EL2, access_rw, reset_val, 0),
        EL2_REG(CPTR_EL2, access_rw, reset_val, CPTR_NVHE_EL2_RES1),
        EL2_REG_VNCR(HSTR_EL2, reset_val, 0),
@@ -2727,6 +2728,18 @@ static const struct sys_reg_desc sys_reg_descs[] = {
        EL2_REG(SP_EL2, NULL, reset_unknown, 0),
 };
 
+static struct sys_reg_desc sys_insn_descs[] = {
+       { SYS_DESC(SYS_DC_ISW), access_dcsw },
+       { SYS_DESC(SYS_DC_IGSW), access_dcgsw },
+       { SYS_DESC(SYS_DC_IGDSW), access_dcgsw },
+       { SYS_DESC(SYS_DC_CSW), access_dcsw },
+       { SYS_DESC(SYS_DC_CGSW), access_dcgsw },
+       { SYS_DESC(SYS_DC_CGDSW), access_dcgsw },
+       { SYS_DESC(SYS_DC_CISW), access_dcsw },
+       { SYS_DESC(SYS_DC_CIGSW), access_dcgsw },
+       { SYS_DESC(SYS_DC_CIGDSW), access_dcgsw },
+};
+
 static const struct sys_reg_desc *first_idreg;
 
 static bool trap_dbgdidr(struct kvm_vcpu *vcpu,
@@ -2737,8 +2750,7 @@ static bool trap_dbgdidr(struct kvm_vcpu *vcpu,
                return ignore_write(vcpu, p);
        } else {
                u64 dfr = IDREG(vcpu->kvm, SYS_ID_AA64DFR0_EL1);
-               u64 pfr = IDREG(vcpu->kvm, SYS_ID_AA64PFR0_EL1);
-               u32 el3 = !!SYS_FIELD_GET(ID_AA64PFR0_EL1, EL3, pfr);
+               u32 el3 = kvm_has_feat(vcpu->kvm, ID_AA64PFR0_EL1, EL3, IMP);
 
                p->regval = ((SYS_FIELD_GET(ID_AA64DFR0_EL1, WRPs, dfr) << 28) |
                             (SYS_FIELD_GET(ID_AA64DFR0_EL1, BRPs, dfr) << 24) |
@@ -3159,7 +3171,8 @@ static void unhandled_cp_access(struct kvm_vcpu *vcpu,
 /**
  * kvm_handle_cp_64 -- handles a mrrc/mcrr trap on a guest CP14/CP15 access
  * @vcpu: The VCPU pointer
- * @run:  The kvm_run struct
+ * @global: &struct sys_reg_desc
+ * @nr_global: size of the @global array
  */
 static int kvm_handle_cp_64(struct kvm_vcpu *vcpu,
                            const struct sys_reg_desc *global,
@@ -3326,7 +3339,9 @@ static int kvm_emulate_cp15_id_reg(struct kvm_vcpu *vcpu,
 /**
  * kvm_handle_cp_32 -- handles a mrc/mcr trap on a guest CP14/CP15 access
  * @vcpu: The VCPU pointer
- * @run:  The kvm_run struct
+ * @params: &struct sys_reg_params
+ * @global: &struct sys_reg_desc
+ * @nr_global: size of the @global array
  */
 static int kvm_handle_cp_32(struct kvm_vcpu *vcpu,
                            struct sys_reg_params *params,
@@ -3384,12 +3399,6 @@ int kvm_handle_cp14_32(struct kvm_vcpu *vcpu)
        return kvm_handle_cp_32(vcpu, &params, cp14_regs, ARRAY_SIZE(cp14_regs));
 }
 
-static bool is_imp_def_sys_reg(struct sys_reg_params *params)
-{
-       // See ARM DDI 0487E.a, section D12.3.2
-       return params->Op0 == 3 && (params->CRn & 0b1011) == 0b1011;
-}
-
 /**
  * emulate_sys_reg - Emulate a guest access to an AArch64 system register
  * @vcpu: The VCPU pointer
@@ -3398,26 +3407,106 @@ static bool is_imp_def_sys_reg(struct sys_reg_params *params)
  * Return: true if the system register access was successful, false otherwise.
  */
 static bool emulate_sys_reg(struct kvm_vcpu *vcpu,
-                          struct sys_reg_params *params)
+                           struct sys_reg_params *params)
 {
        const struct sys_reg_desc *r;
 
        r = find_reg(params, sys_reg_descs, ARRAY_SIZE(sys_reg_descs));
-
        if (likely(r)) {
                perform_access(vcpu, params, r);
                return true;
        }
 
-       if (is_imp_def_sys_reg(params)) {
-               kvm_inject_undefined(vcpu);
+       print_sys_reg_msg(params,
+                         "Unsupported guest sys_reg access at: %lx [%08lx]\n",
+                         *vcpu_pc(vcpu), *vcpu_cpsr(vcpu));
+       kvm_inject_undefined(vcpu);
+
+       return false;
+}
+
+static void *idregs_debug_start(struct seq_file *s, loff_t *pos)
+{
+       struct kvm *kvm = s->private;
+       u8 *iter;
+
+       mutex_lock(&kvm->arch.config_lock);
+
+       iter = &kvm->arch.idreg_debugfs_iter;
+       if (test_bit(KVM_ARCH_FLAG_ID_REGS_INITIALIZED, &kvm->arch.flags) &&
+           *iter == (u8)~0) {
+               *iter = *pos;
+               if (*iter >= KVM_ARM_ID_REG_NUM)
+                       iter = NULL;
        } else {
-               print_sys_reg_msg(params,
-                                 "Unsupported guest sys_reg access at: %lx [%08lx]\n",
-                                 *vcpu_pc(vcpu), *vcpu_cpsr(vcpu));
-               kvm_inject_undefined(vcpu);
+               iter = ERR_PTR(-EBUSY);
        }
-       return false;
+
+       mutex_unlock(&kvm->arch.config_lock);
+
+       return iter;
+}
+
+static void *idregs_debug_next(struct seq_file *s, void *v, loff_t *pos)
+{
+       struct kvm *kvm = s->private;
+
+       (*pos)++;
+
+       if ((kvm->arch.idreg_debugfs_iter + 1) < KVM_ARM_ID_REG_NUM) {
+               kvm->arch.idreg_debugfs_iter++;
+
+               return &kvm->arch.idreg_debugfs_iter;
+       }
+
+       return NULL;
+}
+
+static void idregs_debug_stop(struct seq_file *s, void *v)
+{
+       struct kvm *kvm = s->private;
+
+       if (IS_ERR(v))
+               return;
+
+       mutex_lock(&kvm->arch.config_lock);
+
+       kvm->arch.idreg_debugfs_iter = ~0;
+
+       mutex_unlock(&kvm->arch.config_lock);
+}
+
+static int idregs_debug_show(struct seq_file *s, void *v)
+{
+       struct kvm *kvm = s->private;
+       const struct sys_reg_desc *desc;
+
+       desc = first_idreg + kvm->arch.idreg_debugfs_iter;
+
+       if (!desc->name)
+               return 0;
+
+       seq_printf(s, "%20s:\t%016llx\n",
+                  desc->name, IDREG(kvm, IDX_IDREG(kvm->arch.idreg_debugfs_iter)));
+
+       return 0;
+}
+
+static const struct seq_operations idregs_debug_sops = {
+       .start  = idregs_debug_start,
+       .next   = idregs_debug_next,
+       .stop   = idregs_debug_stop,
+       .show   = idregs_debug_show,
+};
+
+DEFINE_SEQ_ATTRIBUTE(idregs_debug);
+
+void kvm_sys_regs_create_debugfs(struct kvm *kvm)
+{
+       kvm->arch.idreg_debugfs_iter = ~0;
+
+       debugfs_create_file("idregs", 0444, kvm->debugfs_dentry, kvm,
+                           &idregs_debug_fops);
 }
 
 static void kvm_reset_id_regs(struct kvm_vcpu *vcpu)
@@ -3467,28 +3556,39 @@ void kvm_reset_sys_regs(struct kvm_vcpu *vcpu)
 }
 
 /**
- * kvm_handle_sys_reg -- handles a mrs/msr trap on a guest sys_reg access
+ * kvm_handle_sys_reg -- handles a system instruction or mrs/msr instruction
+ *                      trap on a guest execution
  * @vcpu: The VCPU pointer
  */
 int kvm_handle_sys_reg(struct kvm_vcpu *vcpu)
 {
+       const struct sys_reg_desc *desc = NULL;
        struct sys_reg_params params;
        unsigned long esr = kvm_vcpu_get_esr(vcpu);
        int Rt = kvm_vcpu_sys_get_rt(vcpu);
+       int sr_idx;
 
        trace_kvm_handle_sys_reg(esr);
 
-       if (__check_nv_sr_forward(vcpu))
+       if (triage_sysreg_trap(vcpu, &sr_idx))
                return 1;
 
        params = esr_sys64_to_params(esr);
        params.regval = vcpu_get_reg(vcpu, Rt);
 
-       if (!emulate_sys_reg(vcpu, &params))
-               return 1;
+       /* System registers have Op0=={2,3}, as per DDI487 J.a C5.1.2 */
+       if (params.Op0 == 2 || params.Op0 == 3)
+               desc = &sys_reg_descs[sr_idx];
+       else
+               desc = &sys_insn_descs[sr_idx];
+
+       perform_access(vcpu, &params, desc);
 
-       if (!params.is_write)
+       /* Read from system register? */
+       if (!params.is_write &&
+           (params.Op0 == 2 || params.Op0 == 3))
                vcpu_set_reg(vcpu, Rt, params.regval);
+
        return 1;
 }
 
@@ -3930,11 +4030,84 @@ int kvm_vm_ioctl_get_reg_writable_masks(struct kvm *kvm, struct reg_mask_range *
        return 0;
 }
 
+void kvm_init_sysreg(struct kvm_vcpu *vcpu)
+{
+       struct kvm *kvm = vcpu->kvm;
+
+       mutex_lock(&kvm->arch.config_lock);
+
+       /*
+        * In the absence of FGT, we cannot independently trap TLBI
+        * Range instructions. This isn't great, but trapping all
+        * TLBIs would be far worse. Live with it...
+        */
+       if (!kvm_has_feat(kvm, ID_AA64ISAR0_EL1, TLB, OS))
+               vcpu->arch.hcr_el2 |= HCR_TTLBOS;
+
+       if (cpus_have_final_cap(ARM64_HAS_HCX)) {
+               vcpu->arch.hcrx_el2 = HCRX_GUEST_FLAGS;
+
+               if (kvm_has_feat(kvm, ID_AA64ISAR2_EL1, MOPS, IMP))
+                       vcpu->arch.hcrx_el2 |= (HCRX_EL2_MSCEn | HCRX_EL2_MCE2);
+       }
+
+       if (test_bit(KVM_ARCH_FLAG_FGU_INITIALIZED, &kvm->arch.flags))
+               goto out;
+
+       kvm->arch.fgu[HFGxTR_GROUP] = (HFGxTR_EL2_nAMAIR2_EL1           |
+                                      HFGxTR_EL2_nMAIR2_EL1            |
+                                      HFGxTR_EL2_nS2POR_EL1            |
+                                      HFGxTR_EL2_nPOR_EL1              |
+                                      HFGxTR_EL2_nPOR_EL0              |
+                                      HFGxTR_EL2_nACCDATA_EL1          |
+                                      HFGxTR_EL2_nSMPRI_EL1_MASK       |
+                                      HFGxTR_EL2_nTPIDR2_EL0_MASK);
+
+       if (!kvm_has_feat(kvm, ID_AA64ISAR0_EL1, TLB, OS))
+               kvm->arch.fgu[HFGITR_GROUP] |= (HFGITR_EL2_TLBIRVAALE1OS|
+                                               HFGITR_EL2_TLBIRVALE1OS |
+                                               HFGITR_EL2_TLBIRVAAE1OS |
+                                               HFGITR_EL2_TLBIRVAE1OS  |
+                                               HFGITR_EL2_TLBIVAALE1OS |
+                                               HFGITR_EL2_TLBIVALE1OS  |
+                                               HFGITR_EL2_TLBIVAAE1OS  |
+                                               HFGITR_EL2_TLBIASIDE1OS |
+                                               HFGITR_EL2_TLBIVAE1OS   |
+                                               HFGITR_EL2_TLBIVMALLE1OS);
+
+       if (!kvm_has_feat(kvm, ID_AA64ISAR0_EL1, TLB, RANGE))
+               kvm->arch.fgu[HFGITR_GROUP] |= (HFGITR_EL2_TLBIRVAALE1  |
+                                               HFGITR_EL2_TLBIRVALE1   |
+                                               HFGITR_EL2_TLBIRVAAE1   |
+                                               HFGITR_EL2_TLBIRVAE1    |
+                                               HFGITR_EL2_TLBIRVAALE1IS|
+                                               HFGITR_EL2_TLBIRVALE1IS |
+                                               HFGITR_EL2_TLBIRVAAE1IS |
+                                               HFGITR_EL2_TLBIRVAE1IS  |
+                                               HFGITR_EL2_TLBIRVAALE1OS|
+                                               HFGITR_EL2_TLBIRVALE1OS |
+                                               HFGITR_EL2_TLBIRVAAE1OS |
+                                               HFGITR_EL2_TLBIRVAE1OS);
+
+       if (!kvm_has_feat(kvm, ID_AA64MMFR3_EL1, S1PIE, IMP))
+               kvm->arch.fgu[HFGxTR_GROUP] |= (HFGxTR_EL2_nPIRE0_EL1 |
+                                               HFGxTR_EL2_nPIR_EL1);
+
+       if (!kvm_has_feat(kvm, ID_AA64PFR0_EL1, AMU, IMP))
+               kvm->arch.fgu[HAFGRTR_GROUP] |= ~(HAFGRTR_EL2_RES0 |
+                                                 HAFGRTR_EL2_RES1);
+
+       set_bit(KVM_ARCH_FLAG_FGU_INITIALIZED, &kvm->arch.flags);
+out:
+       mutex_unlock(&kvm->arch.config_lock);
+}
+
 int __init kvm_sys_reg_table_init(void)
 {
        struct sys_reg_params params;
        bool valid = true;
        unsigned int i;
+       int ret = 0;
 
        /* Make sure tables are unique and in order. */
        valid &= check_sysreg_table(sys_reg_descs, ARRAY_SIZE(sys_reg_descs), false);
@@ -3943,6 +4116,7 @@ int __init kvm_sys_reg_table_init(void)
        valid &= check_sysreg_table(cp15_regs, ARRAY_SIZE(cp15_regs), true);
        valid &= check_sysreg_table(cp15_64_regs, ARRAY_SIZE(cp15_64_regs), true);
        valid &= check_sysreg_table(invariant_sys_regs, ARRAY_SIZE(invariant_sys_regs), false);
+       valid &= check_sysreg_table(sys_insn_descs, ARRAY_SIZE(sys_insn_descs), false);
 
        if (!valid)
                return -EINVAL;
@@ -3957,8 +4131,13 @@ int __init kvm_sys_reg_table_init(void)
        if (!first_idreg)
                return -EINVAL;
 
-       if (kvm_get_mode() == KVM_MODE_NV)
-               return populate_nv_trap_config();
+       ret = populate_nv_trap_config();
 
-       return 0;
+       for (i = 0; !ret && i < ARRAY_SIZE(sys_reg_descs); i++)
+               ret = populate_sysreg_config(sys_reg_descs + i, i);
+
+       for (i = 0; !ret && i < ARRAY_SIZE(sys_insn_descs); i++)
+               ret = populate_sysreg_config(sys_insn_descs + i, i);
+
+       return ret;
 }
index c65c129b35001049ed4b14c02ec6fcfb8e37b1fa..997eea21ba2ab3d7b08ae8f6178d208b74453606 100644 (file)
@@ -233,6 +233,8 @@ int kvm_sys_reg_get_user(struct kvm_vcpu *vcpu, const struct kvm_one_reg *reg,
 int kvm_sys_reg_set_user(struct kvm_vcpu *vcpu, const struct kvm_one_reg *reg,
                         const struct sys_reg_desc table[], unsigned int num);
 
+bool triage_sysreg_trap(struct kvm_vcpu *vcpu, int *sr_index);
+
 #define AA32(_x)       .aarch32_map = AA32_##_x
 #define Op0(_x)        .Op0 = _x
 #define Op1(_x)        .Op1 = _x
index 85606a531dc3987fc04c18a179c577efd2113ff8..389025ce7749bc32e889f80f2a395552831b0f77 100644 (file)
@@ -149,7 +149,7 @@ static void print_dist_state(struct seq_file *s, struct vgic_dist *dist)
        seq_printf(s, "vgic_model:\t%s\n", v3 ? "GICv3" : "GICv2");
        seq_printf(s, "nr_spis:\t%d\n", dist->nr_spis);
        if (v3)
-               seq_printf(s, "nr_lpis:\t%d\n", dist->lpi_list_count);
+               seq_printf(s, "nr_lpis:\t%d\n", atomic_read(&dist->lpi_count));
        seq_printf(s, "enabled:\t%d\n", dist->enabled);
        seq_printf(s, "\n");
 
index e949e1d0fd9f7ba80d3a1e5d6cda02354dfe0403..f20941f83a077cbc2be093b487e4afe5a29f30ae 100644 (file)
@@ -53,9 +53,9 @@ void kvm_vgic_early_init(struct kvm *kvm)
 {
        struct vgic_dist *dist = &kvm->arch.vgic;
 
-       INIT_LIST_HEAD(&dist->lpi_list_head);
        INIT_LIST_HEAD(&dist->lpi_translation_cache);
        raw_spin_lock_init(&dist->lpi_list_lock);
+       xa_init_flags(&dist->lpi_xa, XA_FLAGS_LOCK_IRQ);
 }
 
 /* CREATION */
@@ -309,7 +309,7 @@ int vgic_init(struct kvm *kvm)
                vgic_lpi_translation_cache_init(kvm);
 
        /*
-        * If we have GICv4.1 enabled, unconditionnaly request enable the
+        * If we have GICv4.1 enabled, unconditionally request enable the
         * v4 support so that we get HW-accelerated vSGIs. Otherwise, only
         * enable it if we present a virtual ITS to the guest.
         */
@@ -366,6 +366,8 @@ static void kvm_vgic_dist_destroy(struct kvm *kvm)
 
        if (vgic_supports_direct_msis(kvm))
                vgic_v4_teardown(kvm);
+
+       xa_destroy(&dist->lpi_xa);
 }
 
 static void __kvm_vgic_vcpu_destroy(struct kvm_vcpu *vcpu)
@@ -445,13 +447,15 @@ int vgic_lazy_init(struct kvm *kvm)
 /* RESOURCE MAPPING */
 
 /**
+ * kvm_vgic_map_resources - map the MMIO regions
+ * @kvm: kvm struct pointer
+ *
  * Map the MMIO regions depending on the VGIC model exposed to the guest
  * called on the first VCPU run.
  * Also map the virtual CPU interface into the VM.
  * v2 calls vgic_init() if not already done.
  * v3 and derivatives return an error if the VGIC is not initialized.
  * vgic_ready() returns true if this function has succeeded.
- * @kvm: kvm struct pointer
  */
 int kvm_vgic_map_resources(struct kvm *kvm)
 {
index 28a93074eca17dbb10c7c75e23baf72edf126391..e85a495ada9c193aa75ac4e2d404070d6390c758 100644 (file)
@@ -52,7 +52,12 @@ static struct vgic_irq *vgic_add_lpi(struct kvm *kvm, u32 intid,
        if (!irq)
                return ERR_PTR(-ENOMEM);
 
-       INIT_LIST_HEAD(&irq->lpi_list);
+       ret = xa_reserve_irq(&dist->lpi_xa, intid, GFP_KERNEL_ACCOUNT);
+       if (ret) {
+               kfree(irq);
+               return ERR_PTR(ret);
+       }
+
        INIT_LIST_HEAD(&irq->ap_list);
        raw_spin_lock_init(&irq->irq_lock);
 
@@ -68,30 +73,30 @@ static struct vgic_irq *vgic_add_lpi(struct kvm *kvm, u32 intid,
         * There could be a race with another vgic_add_lpi(), so we need to
         * check that we don't add a second list entry with the same LPI.
         */
-       list_for_each_entry(oldirq, &dist->lpi_list_head, lpi_list) {
-               if (oldirq->intid != intid)
-                       continue;
-
+       oldirq = xa_load(&dist->lpi_xa, intid);
+       if (vgic_try_get_irq_kref(oldirq)) {
                /* Someone was faster with adding this LPI, lets use that. */
                kfree(irq);
                irq = oldirq;
 
-               /*
-                * This increases the refcount, the caller is expected to
-                * call vgic_put_irq() on the returned pointer once it's
-                * finished with the IRQ.
-                */
-               vgic_get_irq_kref(irq);
+               goto out_unlock;
+       }
 
+       ret = xa_err(xa_store(&dist->lpi_xa, intid, irq, 0));
+       if (ret) {
+               xa_release(&dist->lpi_xa, intid);
+               kfree(irq);
                goto out_unlock;
        }
 
-       list_add_tail(&irq->lpi_list, &dist->lpi_list_head);
-       dist->lpi_list_count++;
+       atomic_inc(&dist->lpi_count);
 
 out_unlock:
        raw_spin_unlock_irqrestore(&dist->lpi_list_lock, flags);
 
+       if (ret)
+               return ERR_PTR(ret);
+
        /*
         * We "cache" the configuration table entries in our struct vgic_irq's.
         * However we only have those structs for mapped IRQs, so we read in
@@ -158,7 +163,7 @@ struct vgic_translation_cache_entry {
  * @cte_esz: collection table entry size
  * @dte_esz: device table entry size
  * @ite_esz: interrupt translation table entry size
- * @save tables: save the ITS tables into guest RAM
+ * @save_tables: save the ITS tables into guest RAM
  * @restore_tables: restore the ITS internal structs from tables
  *  stored in guest RAM
  * @commit: initialize the registers which expose the ABI settings,
@@ -311,6 +316,8 @@ static int update_lpi_config(struct kvm *kvm, struct vgic_irq *irq,
        return 0;
 }
 
+#define GIC_LPI_MAX_INTID      ((1 << INTERRUPT_ID_BITS_ITS) - 1)
+
 /*
  * Create a snapshot of the current LPIs targeting @vcpu, so that we can
  * enumerate those LPIs without holding any lock.
@@ -319,6 +326,7 @@ static int update_lpi_config(struct kvm *kvm, struct vgic_irq *irq,
 int vgic_copy_lpi_list(struct kvm *kvm, struct kvm_vcpu *vcpu, u32 **intid_ptr)
 {
        struct vgic_dist *dist = &kvm->arch.vgic;
+       XA_STATE(xas, &dist->lpi_xa, GIC_LPI_OFFSET);
        struct vgic_irq *irq;
        unsigned long flags;
        u32 *intids;
@@ -331,13 +339,15 @@ int vgic_copy_lpi_list(struct kvm *kvm, struct kvm_vcpu *vcpu, u32 **intid_ptr)
         * command). If coming from another path (such as enabling LPIs),
         * we must be careful not to overrun the array.
         */
-       irq_count = READ_ONCE(dist->lpi_list_count);
+       irq_count = atomic_read(&dist->lpi_count);
        intids = kmalloc_array(irq_count, sizeof(intids[0]), GFP_KERNEL_ACCOUNT);
        if (!intids)
                return -ENOMEM;
 
        raw_spin_lock_irqsave(&dist->lpi_list_lock, flags);
-       list_for_each_entry(irq, &dist->lpi_list_head, lpi_list) {
+       rcu_read_lock();
+
+       xas_for_each(&xas, irq, GIC_LPI_MAX_INTID) {
                if (i == irq_count)
                        break;
                /* We don't need to "get" the IRQ, as we hold the list lock. */
@@ -345,6 +355,8 @@ int vgic_copy_lpi_list(struct kvm *kvm, struct kvm_vcpu *vcpu, u32 **intid_ptr)
                        continue;
                intids[i++] = irq->intid;
        }
+
+       rcu_read_unlock();
        raw_spin_unlock_irqrestore(&dist->lpi_list_lock, flags);
 
        *intid_ptr = intids;
@@ -595,8 +607,8 @@ static struct vgic_irq *vgic_its_check_cache(struct kvm *kvm, phys_addr_t db,
        raw_spin_lock_irqsave(&dist->lpi_list_lock, flags);
 
        irq = __vgic_its_check_cache(dist, db, devid, eventid);
-       if (irq)
-               vgic_get_irq_kref(irq);
+       if (!vgic_try_get_irq_kref(irq))
+               irq = NULL;
 
        raw_spin_unlock_irqrestore(&dist->lpi_list_lock, flags);
 
@@ -640,8 +652,13 @@ static void vgic_its_cache_translation(struct kvm *kvm, struct vgic_its *its,
         * was in the cache, and increment it on the new interrupt.
         */
        if (cte->irq)
-               __vgic_put_lpi_locked(kvm, cte->irq);
+               vgic_put_irq(kvm, cte->irq);
 
+       /*
+        * The irq refcount is guaranteed to be nonzero while holding the
+        * its_lock, as the ITE (and the reference it holds) cannot be freed.
+        */
+       lockdep_assert_held(&its->its_lock);
        vgic_get_irq_kref(irq);
 
        cte->db         = db;
@@ -672,7 +689,7 @@ void vgic_its_invalidate_cache(struct kvm *kvm)
                if (!cte->irq)
                        break;
 
-               __vgic_put_lpi_locked(kvm, cte->irq);
+               vgic_put_irq(kvm, cte->irq);
                cte->irq = NULL;
        }
 
@@ -1345,8 +1362,8 @@ static int vgic_its_cmd_handle_inv(struct kvm *kvm, struct vgic_its *its,
 }
 
 /**
- * vgic_its_invall - invalidate all LPIs targetting a given vcpu
- * @vcpu: the vcpu for which the RD is targetted by an invalidation
+ * vgic_its_invall - invalidate all LPIs targeting a given vcpu
+ * @vcpu: the vcpu for which the RD is targeted by an invalidation
  *
  * Contrary to the INVALL command, this targets a RD instead of a
  * collection, and we don't need to hold the its_lock, since no ITS is
@@ -2144,7 +2161,7 @@ static u32 compute_next_eventid_offset(struct list_head *h, struct its_ite *ite)
 }
 
 /**
- * entry_fn_t - Callback called on a table entry restore path
+ * typedef entry_fn_t - Callback called on a table entry restore path
  * @its: its handle
  * @id: id of the entry
  * @entry: pointer to the entry
index 9465d3706ab9bcf8fb7fd72a8904d6b87ede574a..4ea3340786b950527fe87821ec26252a99e384bb 100644 (file)
@@ -380,6 +380,7 @@ int vgic_v3_save_pending_tables(struct kvm *kvm)
        struct vgic_irq *irq;
        gpa_t last_ptr = ~(gpa_t)0;
        bool vlpi_avail = false;
+       unsigned long index;
        int ret = 0;
        u8 val;
 
@@ -396,7 +397,7 @@ int vgic_v3_save_pending_tables(struct kvm *kvm)
                vlpi_avail = true;
        }
 
-       list_for_each_entry(irq, &dist->lpi_list_head, lpi_list) {
+       xa_for_each(&dist->lpi_xa, index, irq) {
                int byte_offset, bit_nr;
                struct kvm_vcpu *vcpu;
                gpa_t pendbase, ptr;
index db2a95762b1b663e7ccc2c45184f8a8af1fd4ffd..4ec93587c8cd2b07d11e08efe0614e211c44ef51 100644 (file)
@@ -30,7 +30,8 @@ struct vgic_global kvm_vgic_global_state __ro_after_init = {
  *         its->its_lock (mutex)
  *           vgic_cpu->ap_list_lock            must be taken with IRQs disabled
  *             kvm->lpi_list_lock              must be taken with IRQs disabled
- *               vgic_irq->irq_lock            must be taken with IRQs disabled
+ *               vgic_dist->lpi_xa.xa_lock     must be taken with IRQs disabled
+ *                 vgic_irq->irq_lock          must be taken with IRQs disabled
  *
  * As the ap_list_lock might be taken from the timer interrupt handler,
  * we have to disable IRQs before taking this lock and everything lower
@@ -54,32 +55,22 @@ struct vgic_global kvm_vgic_global_state __ro_after_init = {
  */
 
 /*
- * Iterate over the VM's list of mapped LPIs to find the one with a
- * matching interrupt ID and return a reference to the IRQ structure.
+ * Index the VM's xarray of mapped LPIs and return a reference to the IRQ
+ * structure. The caller is expected to call vgic_put_irq() later once it's
+ * finished with the IRQ.
  */
 static struct vgic_irq *vgic_get_lpi(struct kvm *kvm, u32 intid)
 {
        struct vgic_dist *dist = &kvm->arch.vgic;
        struct vgic_irq *irq = NULL;
-       unsigned long flags;
-
-       raw_spin_lock_irqsave(&dist->lpi_list_lock, flags);
 
-       list_for_each_entry(irq, &dist->lpi_list_head, lpi_list) {
-               if (irq->intid != intid)
-                       continue;
+       rcu_read_lock();
 
-               /*
-                * This increases the refcount, the caller is expected to
-                * call vgic_put_irq() later once it's finished with the IRQ.
-                */
-               vgic_get_irq_kref(irq);
-               goto out_unlock;
-       }
-       irq = NULL;
+       irq = xa_load(&dist->lpi_xa, intid);
+       if (!vgic_try_get_irq_kref(irq))
+               irq = NULL;
 
-out_unlock:
-       raw_spin_unlock_irqrestore(&dist->lpi_list_lock, flags);
+       rcu_read_unlock();
 
        return irq;
 }
@@ -120,22 +111,6 @@ static void vgic_irq_release(struct kref *ref)
 {
 }
 
-/*
- * Drop the refcount on the LPI. Must be called with lpi_list_lock held.
- */
-void __vgic_put_lpi_locked(struct kvm *kvm, struct vgic_irq *irq)
-{
-       struct vgic_dist *dist = &kvm->arch.vgic;
-
-       if (!kref_put(&irq->refcount, vgic_irq_release))
-               return;
-
-       list_del(&irq->lpi_list);
-       dist->lpi_list_count--;
-
-       kfree(irq);
-}
-
 void vgic_put_irq(struct kvm *kvm, struct vgic_irq *irq)
 {
        struct vgic_dist *dist = &kvm->arch.vgic;
@@ -144,9 +119,15 @@ void vgic_put_irq(struct kvm *kvm, struct vgic_irq *irq)
        if (irq->intid < VGIC_MIN_LPI)
                return;
 
-       raw_spin_lock_irqsave(&dist->lpi_list_lock, flags);
-       __vgic_put_lpi_locked(kvm, irq);
-       raw_spin_unlock_irqrestore(&dist->lpi_list_lock, flags);
+       if (!kref_put(&irq->refcount, vgic_irq_release))
+               return;
+
+       xa_lock_irqsave(&dist->lpi_xa, flags);
+       __xa_erase(&dist->lpi_xa, irq->intid);
+       xa_unlock_irqrestore(&dist->lpi_xa, flags);
+
+       atomic_dec(&dist->lpi_count);
+       kfree_rcu(irq, rcu);
 }
 
 void vgic_flush_pending_lpis(struct kvm_vcpu *vcpu)
@@ -203,7 +184,7 @@ void vgic_irq_set_phys_active(struct vgic_irq *irq, bool active)
 }
 
 /**
- * kvm_vgic_target_oracle - compute the target vcpu for an irq
+ * vgic_target_oracle - compute the target vcpu for an irq
  *
  * @irq:       The irq to route. Must be already locked.
  *
@@ -404,7 +385,8 @@ retry:
 
        /*
         * Grab a reference to the irq to reflect the fact that it is
-        * now in the ap_list.
+        * now in the ap_list. This is safe as the caller must already hold a
+        * reference on the irq.
         */
        vgic_get_irq_kref(irq);
        list_add_tail(&irq->ap_list, &vcpu->arch.vgic_cpu.ap_list_head);
index 8d134569d0a1f6c26f9a4735aa0c89847d9add48..0c2b82de8fa3c723279695c8641de02e7775065d 100644 (file)
@@ -180,7 +180,6 @@ vgic_get_mmio_region(struct kvm_vcpu *vcpu, struct vgic_io_device *iodev,
                     gpa_t addr, int len);
 struct vgic_irq *vgic_get_irq(struct kvm *kvm, struct kvm_vcpu *vcpu,
                              u32 intid);
-void __vgic_put_lpi_locked(struct kvm *kvm, struct vgic_irq *irq);
 void vgic_put_irq(struct kvm *kvm, struct vgic_irq *irq);
 bool vgic_get_phys_line_level(struct vgic_irq *irq);
 void vgic_irq_set_phys_pending(struct vgic_irq *irq, bool pending);
@@ -220,12 +219,20 @@ void vgic_v2_vmcr_sync(struct kvm_vcpu *vcpu);
 void vgic_v2_save_state(struct kvm_vcpu *vcpu);
 void vgic_v2_restore_state(struct kvm_vcpu *vcpu);
 
-static inline void vgic_get_irq_kref(struct vgic_irq *irq)
+static inline bool vgic_try_get_irq_kref(struct vgic_irq *irq)
 {
+       if (!irq)
+               return false;
+
        if (irq->intid < VGIC_MIN_LPI)
-               return;
+               return true;
 
-       kref_get(&irq->refcount);
+       return kref_get_unless_zero(&irq->refcount);
+}
+
+static inline void vgic_get_irq_kref(struct vgic_irq *irq)
+{
+       WARN_ON_ONCE(!vgic_try_get_irq_kref(irq));
 }
 
 void vgic_v3_fold_lr_state(struct kvm_vcpu *vcpu);
index dbd1bc95967d00d364e27e27c1f5fff2bc48e81f..60454256945b8b3ff47bdb609b51b3d5ad9b0572 100644 (file)
@@ -3,6 +3,7 @@ obj-y                           := dma-mapping.o extable.o fault.o init.o \
                                   cache.o copypage.o flush.o \
                                   ioremap.o mmap.o pgd.o mmu.o \
                                   context.o proc.o pageattr.o fixmap.o
+obj-$(CONFIG_ARM64_CONTPTE)    += contpte.o
 obj-$(CONFIG_HUGETLB_PAGE)     += hugetlbpage.o
 obj-$(CONFIG_PTDUMP_CORE)      += ptdump.o
 obj-$(CONFIG_PTDUMP_DEBUGFS)   += ptdump_debugfs.o
diff --git a/arch/arm64/mm/contpte.c b/arch/arm64/mm/contpte.c
new file mode 100644 (file)
index 0000000..1b64b4c
--- /dev/null
@@ -0,0 +1,408 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * Copyright (C) 2023 ARM Ltd.
+ */
+
+#include <linux/mm.h>
+#include <linux/efi.h>
+#include <linux/export.h>
+#include <asm/tlbflush.h>
+
+static inline bool mm_is_user(struct mm_struct *mm)
+{
+       /*
+        * Don't attempt to apply the contig bit to kernel mappings, because
+        * dynamically adding/removing the contig bit can cause page faults.
+        * These racing faults are ok for user space, since they get serialized
+        * on the PTL. But kernel mappings can't tolerate faults.
+        */
+       if (unlikely(mm_is_efi(mm)))
+               return false;
+       return mm != &init_mm;
+}
+
+static inline pte_t *contpte_align_down(pte_t *ptep)
+{
+       return PTR_ALIGN_DOWN(ptep, sizeof(*ptep) * CONT_PTES);
+}
+
+static void contpte_try_unfold_partial(struct mm_struct *mm, unsigned long addr,
+                                       pte_t *ptep, unsigned int nr)
+{
+       /*
+        * Unfold any partially covered contpte block at the beginning and end
+        * of the range.
+        */
+
+       if (ptep != contpte_align_down(ptep) || nr < CONT_PTES)
+               contpte_try_unfold(mm, addr, ptep, __ptep_get(ptep));
+
+       if (ptep + nr != contpte_align_down(ptep + nr)) {
+               unsigned long last_addr = addr + PAGE_SIZE * (nr - 1);
+               pte_t *last_ptep = ptep + nr - 1;
+
+               contpte_try_unfold(mm, last_addr, last_ptep,
+                                  __ptep_get(last_ptep));
+       }
+}
+
+static void contpte_convert(struct mm_struct *mm, unsigned long addr,
+                           pte_t *ptep, pte_t pte)
+{
+       struct vm_area_struct vma = TLB_FLUSH_VMA(mm, 0);
+       unsigned long start_addr;
+       pte_t *start_ptep;
+       int i;
+
+       start_ptep = ptep = contpte_align_down(ptep);
+       start_addr = addr = ALIGN_DOWN(addr, CONT_PTE_SIZE);
+       pte = pfn_pte(ALIGN_DOWN(pte_pfn(pte), CONT_PTES), pte_pgprot(pte));
+
+       for (i = 0; i < CONT_PTES; i++, ptep++, addr += PAGE_SIZE) {
+               pte_t ptent = __ptep_get_and_clear(mm, addr, ptep);
+
+               if (pte_dirty(ptent))
+                       pte = pte_mkdirty(pte);
+
+               if (pte_young(ptent))
+                       pte = pte_mkyoung(pte);
+       }
+
+       __flush_tlb_range(&vma, start_addr, addr, PAGE_SIZE, true, 3);
+
+       __set_ptes(mm, start_addr, start_ptep, pte, CONT_PTES);
+}
+
+void __contpte_try_fold(struct mm_struct *mm, unsigned long addr,
+                       pte_t *ptep, pte_t pte)
+{
+       /*
+        * We have already checked that the virtual and pysical addresses are
+        * correctly aligned for a contpte mapping in contpte_try_fold() so the
+        * remaining checks are to ensure that the contpte range is fully
+        * covered by a single folio, and ensure that all the ptes are valid
+        * with contiguous PFNs and matching prots. We ignore the state of the
+        * access and dirty bits for the purpose of deciding if its a contiguous
+        * range; the folding process will generate a single contpte entry which
+        * has a single access and dirty bit. Those 2 bits are the logical OR of
+        * their respective bits in the constituent pte entries. In order to
+        * ensure the contpte range is covered by a single folio, we must
+        * recover the folio from the pfn, but special mappings don't have a
+        * folio backing them. Fortunately contpte_try_fold() already checked
+        * that the pte is not special - we never try to fold special mappings.
+        * Note we can't use vm_normal_page() for this since we don't have the
+        * vma.
+        */
+
+       unsigned long folio_start, folio_end;
+       unsigned long cont_start, cont_end;
+       pte_t expected_pte, subpte;
+       struct folio *folio;
+       struct page *page;
+       unsigned long pfn;
+       pte_t *orig_ptep;
+       pgprot_t prot;
+
+       int i;
+
+       if (!mm_is_user(mm))
+               return;
+
+       page = pte_page(pte);
+       folio = page_folio(page);
+       folio_start = addr - (page - &folio->page) * PAGE_SIZE;
+       folio_end = folio_start + folio_nr_pages(folio) * PAGE_SIZE;
+       cont_start = ALIGN_DOWN(addr, CONT_PTE_SIZE);
+       cont_end = cont_start + CONT_PTE_SIZE;
+
+       if (folio_start > cont_start || folio_end < cont_end)
+               return;
+
+       pfn = ALIGN_DOWN(pte_pfn(pte), CONT_PTES);
+       prot = pte_pgprot(pte_mkold(pte_mkclean(pte)));
+       expected_pte = pfn_pte(pfn, prot);
+       orig_ptep = ptep;
+       ptep = contpte_align_down(ptep);
+
+       for (i = 0; i < CONT_PTES; i++) {
+               subpte = pte_mkold(pte_mkclean(__ptep_get(ptep)));
+               if (!pte_same(subpte, expected_pte))
+                       return;
+               expected_pte = pte_advance_pfn(expected_pte, 1);
+               ptep++;
+       }
+
+       pte = pte_mkcont(pte);
+       contpte_convert(mm, addr, orig_ptep, pte);
+}
+EXPORT_SYMBOL_GPL(__contpte_try_fold);
+
+void __contpte_try_unfold(struct mm_struct *mm, unsigned long addr,
+                       pte_t *ptep, pte_t pte)
+{
+       /*
+        * We have already checked that the ptes are contiguous in
+        * contpte_try_unfold(), so just check that the mm is user space.
+        */
+       if (!mm_is_user(mm))
+               return;
+
+       pte = pte_mknoncont(pte);
+       contpte_convert(mm, addr, ptep, pte);
+}
+EXPORT_SYMBOL_GPL(__contpte_try_unfold);
+
+pte_t contpte_ptep_get(pte_t *ptep, pte_t orig_pte)
+{
+       /*
+        * Gather access/dirty bits, which may be populated in any of the ptes
+        * of the contig range. We are guaranteed to be holding the PTL, so any
+        * contiguous range cannot be unfolded or otherwise modified under our
+        * feet.
+        */
+
+       pte_t pte;
+       int i;
+
+       ptep = contpte_align_down(ptep);
+
+       for (i = 0; i < CONT_PTES; i++, ptep++) {
+               pte = __ptep_get(ptep);
+
+               if (pte_dirty(pte))
+                       orig_pte = pte_mkdirty(orig_pte);
+
+               if (pte_young(pte))
+                       orig_pte = pte_mkyoung(orig_pte);
+       }
+
+       return orig_pte;
+}
+EXPORT_SYMBOL_GPL(contpte_ptep_get);
+
+pte_t contpte_ptep_get_lockless(pte_t *orig_ptep)
+{
+       /*
+        * The ptep_get_lockless() API requires us to read and return *orig_ptep
+        * so that it is self-consistent, without the PTL held, so we may be
+        * racing with other threads modifying the pte. Usually a READ_ONCE()
+        * would suffice, but for the contpte case, we also need to gather the
+        * access and dirty bits from across all ptes in the contiguous block,
+        * and we can't read all of those neighbouring ptes atomically, so any
+        * contiguous range may be unfolded/modified/refolded under our feet.
+        * Therefore we ensure we read a _consistent_ contpte range by checking
+        * that all ptes in the range are valid and have CONT_PTE set, that all
+        * pfns are contiguous and that all pgprots are the same (ignoring
+        * access/dirty). If we find a pte that is not consistent, then we must
+        * be racing with an update so start again. If the target pte does not
+        * have CONT_PTE set then that is considered consistent on its own
+        * because it is not part of a contpte range.
+        */
+
+       pgprot_t orig_prot;
+       unsigned long pfn;
+       pte_t orig_pte;
+       pgprot_t prot;
+       pte_t *ptep;
+       pte_t pte;
+       int i;
+
+retry:
+       orig_pte = __ptep_get(orig_ptep);
+
+       if (!pte_valid_cont(orig_pte))
+               return orig_pte;
+
+       orig_prot = pte_pgprot(pte_mkold(pte_mkclean(orig_pte)));
+       ptep = contpte_align_down(orig_ptep);
+       pfn = pte_pfn(orig_pte) - (orig_ptep - ptep);
+
+       for (i = 0; i < CONT_PTES; i++, ptep++, pfn++) {
+               pte = __ptep_get(ptep);
+               prot = pte_pgprot(pte_mkold(pte_mkclean(pte)));
+
+               if (!pte_valid_cont(pte) ||
+                  pte_pfn(pte) != pfn ||
+                  pgprot_val(prot) != pgprot_val(orig_prot))
+                       goto retry;
+
+               if (pte_dirty(pte))
+                       orig_pte = pte_mkdirty(orig_pte);
+
+               if (pte_young(pte))
+                       orig_pte = pte_mkyoung(orig_pte);
+       }
+
+       return orig_pte;
+}
+EXPORT_SYMBOL_GPL(contpte_ptep_get_lockless);
+
+void contpte_set_ptes(struct mm_struct *mm, unsigned long addr,
+                                       pte_t *ptep, pte_t pte, unsigned int nr)
+{
+       unsigned long next;
+       unsigned long end;
+       unsigned long pfn;
+       pgprot_t prot;
+
+       /*
+        * The set_ptes() spec guarantees that when nr > 1, the initial state of
+        * all ptes is not-present. Therefore we never need to unfold or
+        * otherwise invalidate a range before we set the new ptes.
+        * contpte_set_ptes() should never be called for nr < 2.
+        */
+       VM_WARN_ON(nr == 1);
+
+       if (!mm_is_user(mm))
+               return __set_ptes(mm, addr, ptep, pte, nr);
+
+       end = addr + (nr << PAGE_SHIFT);
+       pfn = pte_pfn(pte);
+       prot = pte_pgprot(pte);
+
+       do {
+               next = pte_cont_addr_end(addr, end);
+               nr = (next - addr) >> PAGE_SHIFT;
+               pte = pfn_pte(pfn, prot);
+
+               if (((addr | next | (pfn << PAGE_SHIFT)) & ~CONT_PTE_MASK) == 0)
+                       pte = pte_mkcont(pte);
+               else
+                       pte = pte_mknoncont(pte);
+
+               __set_ptes(mm, addr, ptep, pte, nr);
+
+               addr = next;
+               ptep += nr;
+               pfn += nr;
+
+       } while (addr != end);
+}
+EXPORT_SYMBOL_GPL(contpte_set_ptes);
+
+void contpte_clear_full_ptes(struct mm_struct *mm, unsigned long addr,
+                               pte_t *ptep, unsigned int nr, int full)
+{
+       contpte_try_unfold_partial(mm, addr, ptep, nr);
+       __clear_full_ptes(mm, addr, ptep, nr, full);
+}
+EXPORT_SYMBOL_GPL(contpte_clear_full_ptes);
+
+pte_t contpte_get_and_clear_full_ptes(struct mm_struct *mm,
+                               unsigned long addr, pte_t *ptep,
+                               unsigned int nr, int full)
+{
+       contpte_try_unfold_partial(mm, addr, ptep, nr);
+       return __get_and_clear_full_ptes(mm, addr, ptep, nr, full);
+}
+EXPORT_SYMBOL_GPL(contpte_get_and_clear_full_ptes);
+
+int contpte_ptep_test_and_clear_young(struct vm_area_struct *vma,
+                                       unsigned long addr, pte_t *ptep)
+{
+       /*
+        * ptep_clear_flush_young() technically requires us to clear the access
+        * flag for a _single_ pte. However, the core-mm code actually tracks
+        * access/dirty per folio, not per page. And since we only create a
+        * contig range when the range is covered by a single folio, we can get
+        * away with clearing young for the whole contig range here, so we avoid
+        * having to unfold.
+        */
+
+       int young = 0;
+       int i;
+
+       ptep = contpte_align_down(ptep);
+       addr = ALIGN_DOWN(addr, CONT_PTE_SIZE);
+
+       for (i = 0; i < CONT_PTES; i++, ptep++, addr += PAGE_SIZE)
+               young |= __ptep_test_and_clear_young(vma, addr, ptep);
+
+       return young;
+}
+EXPORT_SYMBOL_GPL(contpte_ptep_test_and_clear_young);
+
+int contpte_ptep_clear_flush_young(struct vm_area_struct *vma,
+                                       unsigned long addr, pte_t *ptep)
+{
+       int young;
+
+       young = contpte_ptep_test_and_clear_young(vma, addr, ptep);
+
+       if (young) {
+               /*
+                * See comment in __ptep_clear_flush_young(); same rationale for
+                * eliding the trailing DSB applies here.
+                */
+               addr = ALIGN_DOWN(addr, CONT_PTE_SIZE);
+               __flush_tlb_range_nosync(vma, addr, addr + CONT_PTE_SIZE,
+                                        PAGE_SIZE, true, 3);
+       }
+
+       return young;
+}
+EXPORT_SYMBOL_GPL(contpte_ptep_clear_flush_young);
+
+void contpte_wrprotect_ptes(struct mm_struct *mm, unsigned long addr,
+                                       pte_t *ptep, unsigned int nr)
+{
+       /*
+        * If wrprotecting an entire contig range, we can avoid unfolding. Just
+        * set wrprotect and wait for the later mmu_gather flush to invalidate
+        * the tlb. Until the flush, the page may or may not be wrprotected.
+        * After the flush, it is guaranteed wrprotected. If it's a partial
+        * range though, we must unfold, because we can't have a case where
+        * CONT_PTE is set but wrprotect applies to a subset of the PTEs; this
+        * would cause it to continue to be unpredictable after the flush.
+        */
+
+       contpte_try_unfold_partial(mm, addr, ptep, nr);
+       __wrprotect_ptes(mm, addr, ptep, nr);
+}
+EXPORT_SYMBOL_GPL(contpte_wrprotect_ptes);
+
+int contpte_ptep_set_access_flags(struct vm_area_struct *vma,
+                                       unsigned long addr, pte_t *ptep,
+                                       pte_t entry, int dirty)
+{
+       unsigned long start_addr;
+       pte_t orig_pte;
+       int i;
+
+       /*
+        * Gather the access/dirty bits for the contiguous range. If nothing has
+        * changed, its a noop.
+        */
+       orig_pte = pte_mknoncont(ptep_get(ptep));
+       if (pte_val(orig_pte) == pte_val(entry))
+               return 0;
+
+       /*
+        * We can fix up access/dirty bits without having to unfold the contig
+        * range. But if the write bit is changing, we must unfold.
+        */
+       if (pte_write(orig_pte) == pte_write(entry)) {
+               /*
+                * For HW access management, we technically only need to update
+                * the flag on a single pte in the range. But for SW access
+                * management, we need to update all the ptes to prevent extra
+                * faults. Avoid per-page tlb flush in __ptep_set_access_flags()
+                * and instead flush the whole range at the end.
+                */
+               ptep = contpte_align_down(ptep);
+               start_addr = addr = ALIGN_DOWN(addr, CONT_PTE_SIZE);
+
+               for (i = 0; i < CONT_PTES; i++, ptep++, addr += PAGE_SIZE)
+                       __ptep_set_access_flags(vma, addr, ptep, entry, 0);
+
+               if (dirty)
+                       __flush_tlb_range(vma, start_addr, addr,
+                                                       PAGE_SIZE, true, 3);
+       } else {
+               __contpte_try_unfold(vma->vm_mm, addr, ptep, orig_pte);
+               __ptep_set_access_flags(vma, addr, ptep, entry, dirty);
+       }
+
+       return 1;
+}
+EXPORT_SYMBOL_GPL(contpte_ptep_set_access_flags);
index 60265ede48fe3aa72a336d34d901d1b94e439372..8251e2fea9c7573ce89a55770656ce43509cfc14 100644 (file)
@@ -191,7 +191,7 @@ static void show_pte(unsigned long addr)
                if (!ptep)
                        break;
 
-               pte = READ_ONCE(*ptep);
+               pte = __ptep_get(ptep);
                pr_cont(", pte=%016llx", pte_val(pte));
                pte_unmap(ptep);
        } while(0);
@@ -205,16 +205,16 @@ static void show_pte(unsigned long addr)
  *
  * It needs to cope with hardware update of the accessed/dirty state by other
  * agents in the system and can safely skip the __sync_icache_dcache() call as,
- * like set_pte_at(), the PTE is never changed from no-exec to exec here.
+ * like __set_ptes(), the PTE is never changed from no-exec to exec here.
  *
  * Returns whether or not the PTE actually changed.
  */
-int ptep_set_access_flags(struct vm_area_struct *vma,
-                         unsigned long address, pte_t *ptep,
-                         pte_t entry, int dirty)
+int __ptep_set_access_flags(struct vm_area_struct *vma,
+                           unsigned long address, pte_t *ptep,
+                           pte_t entry, int dirty)
 {
        pteval_t old_pteval, pteval;
-       pte_t pte = READ_ONCE(*ptep);
+       pte_t pte = __ptep_get(ptep);
 
        if (pte_same(pte, entry))
                return 0;
index d22506e9c7fdaf2150455146160ad699dfd909d4..de1e09d986ad230f22eff8def3416c5d94d82bb3 100644 (file)
@@ -124,9 +124,9 @@ void __set_fixmap(enum fixed_addresses idx,
        ptep = fixmap_pte(addr);
 
        if (pgprot_val(flags)) {
-               set_pte(ptep, pfn_pte(phys >> PAGE_SHIFT, flags));
+               __set_pte(ptep, pfn_pte(phys >> PAGE_SHIFT, flags));
        } else {
-               pte_clear(&init_mm, addr, ptep);
+               __pte_clear(&init_mm, addr, ptep);
                flush_tlb_kernel_range(addr, addr+PAGE_SIZE);
        }
 }
index 8116ac599f801d5b621ef8af96178094c877c5a4..0f0e10bb0a9540a9a18ab6c9cf0ed509d2a11e14 100644 (file)
@@ -45,13 +45,6 @@ void __init arm64_hugetlb_cma_reserve(void)
        else
                order = CONT_PMD_SHIFT - PAGE_SHIFT;
 
-       /*
-        * HugeTLB CMA reservation is required for gigantic
-        * huge pages which could not be allocated via the
-        * page allocator. Just warn if there is any change
-        * breaking this assumption.
-        */
-       WARN_ON(order <= MAX_PAGE_ORDER);
        hugetlb_cma_reserve(order);
 }
 #endif /* CONFIG_CMA */
@@ -152,14 +145,14 @@ pte_t huge_ptep_get(pte_t *ptep)
 {
        int ncontig, i;
        size_t pgsize;
-       pte_t orig_pte = ptep_get(ptep);
+       pte_t orig_pte = __ptep_get(ptep);
 
        if (!pte_present(orig_pte) || !pte_cont(orig_pte))
                return orig_pte;
 
        ncontig = num_contig_ptes(page_size(pte_page(orig_pte)), &pgsize);
        for (i = 0; i < ncontig; i++, ptep++) {
-               pte_t pte = ptep_get(ptep);
+               pte_t pte = __ptep_get(ptep);
 
                if (pte_dirty(pte))
                        orig_pte = pte_mkdirty(orig_pte);
@@ -184,11 +177,11 @@ static pte_t get_clear_contig(struct mm_struct *mm,
                             unsigned long pgsize,
                             unsigned long ncontig)
 {
-       pte_t orig_pte = ptep_get(ptep);
+       pte_t orig_pte = __ptep_get(ptep);
        unsigned long i;
 
        for (i = 0; i < ncontig; i++, addr += pgsize, ptep++) {
-               pte_t pte = ptep_get_and_clear(mm, addr, ptep);
+               pte_t pte = __ptep_get_and_clear(mm, addr, ptep);
 
                /*
                 * If HW_AFDBM is enabled, then the HW could turn on
@@ -236,7 +229,7 @@ static void clear_flush(struct mm_struct *mm,
        unsigned long i, saddr = addr;
 
        for (i = 0; i < ncontig; i++, addr += pgsize, ptep++)
-               ptep_clear(mm, addr, ptep);
+               __ptep_get_and_clear(mm, addr, ptep);
 
        flush_tlb_range(&vma, saddr, addr);
 }
@@ -254,12 +247,12 @@ void set_huge_pte_at(struct mm_struct *mm, unsigned long addr,
 
        if (!pte_present(pte)) {
                for (i = 0; i < ncontig; i++, ptep++, addr += pgsize)
-                       set_pte_at(mm, addr, ptep, pte);
+                       __set_ptes(mm, addr, ptep, pte, 1);
                return;
        }
 
        if (!pte_cont(pte)) {
-               set_pte_at(mm, addr, ptep, pte);
+               __set_ptes(mm, addr, ptep, pte, 1);
                return;
        }
 
@@ -270,7 +263,7 @@ void set_huge_pte_at(struct mm_struct *mm, unsigned long addr,
        clear_flush(mm, addr, ptep, pgsize, ncontig);
 
        for (i = 0; i < ncontig; i++, ptep++, addr += pgsize, pfn += dpfn)
-               set_pte_at(mm, addr, ptep, pfn_pte(pfn, hugeprot));
+               __set_ptes(mm, addr, ptep, pfn_pte(pfn, hugeprot), 1);
 }
 
 pte_t *huge_pte_alloc(struct mm_struct *mm, struct vm_area_struct *vma,
@@ -400,7 +393,7 @@ void huge_pte_clear(struct mm_struct *mm, unsigned long addr,
        ncontig = num_contig_ptes(sz, &pgsize);
 
        for (i = 0; i < ncontig; i++, addr += pgsize, ptep++)
-               pte_clear(mm, addr, ptep);
+               __pte_clear(mm, addr, ptep);
 }
 
 pte_t huge_ptep_get_and_clear(struct mm_struct *mm,
@@ -408,10 +401,10 @@ pte_t huge_ptep_get_and_clear(struct mm_struct *mm,
 {
        int ncontig;
        size_t pgsize;
-       pte_t orig_pte = ptep_get(ptep);
+       pte_t orig_pte = __ptep_get(ptep);
 
        if (!pte_cont(orig_pte))
-               return ptep_get_and_clear(mm, addr, ptep);
+               return __ptep_get_and_clear(mm, addr, ptep);
 
        ncontig = find_num_contig(mm, addr, ptep, &pgsize);
 
@@ -431,11 +424,11 @@ static int __cont_access_flags_changed(pte_t *ptep, pte_t pte, int ncontig)
 {
        int i;
 
-       if (pte_write(pte) != pte_write(ptep_get(ptep)))
+       if (pte_write(pte) != pte_write(__ptep_get(ptep)))
                return 1;
 
        for (i = 0; i < ncontig; i++) {
-               pte_t orig_pte = ptep_get(ptep + i);
+               pte_t orig_pte = __ptep_get(ptep + i);
 
                if (pte_dirty(pte) != pte_dirty(orig_pte))
                        return 1;
@@ -459,7 +452,7 @@ int huge_ptep_set_access_flags(struct vm_area_struct *vma,
        pte_t orig_pte;
 
        if (!pte_cont(pte))
-               return ptep_set_access_flags(vma, addr, ptep, pte, dirty);
+               return __ptep_set_access_flags(vma, addr, ptep, pte, dirty);
 
        ncontig = find_num_contig(mm, addr, ptep, &pgsize);
        dpfn = pgsize >> PAGE_SHIFT;
@@ -478,7 +471,7 @@ int huge_ptep_set_access_flags(struct vm_area_struct *vma,
 
        hugeprot = pte_pgprot(pte);
        for (i = 0; i < ncontig; i++, ptep++, addr += pgsize, pfn += dpfn)
-               set_pte_at(mm, addr, ptep, pfn_pte(pfn, hugeprot));
+               __set_ptes(mm, addr, ptep, pfn_pte(pfn, hugeprot), 1);
 
        return 1;
 }
@@ -492,8 +485,8 @@ void huge_ptep_set_wrprotect(struct mm_struct *mm,
        size_t pgsize;
        pte_t pte;
 
-       if (!pte_cont(READ_ONCE(*ptep))) {
-               ptep_set_wrprotect(mm, addr, ptep);
+       if (!pte_cont(__ptep_get(ptep))) {
+               __ptep_set_wrprotect(mm, addr, ptep);
                return;
        }
 
@@ -507,7 +500,7 @@ void huge_ptep_set_wrprotect(struct mm_struct *mm,
        pfn = pte_pfn(pte);
 
        for (i = 0; i < ncontig; i++, ptep++, addr += pgsize, pfn += dpfn)
-               set_pte_at(mm, addr, ptep, pfn_pte(pfn, hugeprot));
+               __set_ptes(mm, addr, ptep, pfn_pte(pfn, hugeprot), 1);
 }
 
 pte_t huge_ptep_clear_flush(struct vm_area_struct *vma,
@@ -517,7 +510,7 @@ pte_t huge_ptep_clear_flush(struct vm_area_struct *vma,
        size_t pgsize;
        int ncontig;
 
-       if (!pte_cont(READ_ONCE(*ptep)))
+       if (!pte_cont(__ptep_get(ptep)))
                return ptep_clear_flush(vma, addr, ptep);
 
        ncontig = find_num_contig(mm, addr, ptep, &pgsize);
@@ -550,7 +543,7 @@ pte_t huge_ptep_modify_prot_start(struct vm_area_struct *vma, unsigned long addr
                 * when the permission changes from executable to non-executable
                 * in cases where cpu is affected with errata #2645198.
                 */
-               if (pte_user_exec(READ_ONCE(*ptep)))
+               if (pte_user_exec(__ptep_get(ptep)))
                        return huge_ptep_clear_flush(vma, addr, ptep);
        }
        return huge_ptep_get_and_clear(vma->vm_mm, addr, ptep);
index 0f427b50fdc30d2a0a469cdeff85c75bc9188c56..03efd86dce0ae9735bb0bfae74caecfe758666d0 100644 (file)
@@ -100,7 +100,7 @@ static void __init arch_reserve_crashkernel(void)
        bool high = false;
        int ret;
 
-       if (!IS_ENABLED(CONFIG_KEXEC_CORE))
+       if (!IS_ENABLED(CONFIG_CRASH_RESERVE))
                return;
 
        ret = parse_crashkernel(cmdline, memblock_phys_mem_size(),
index fbddbf9faf1966786e8846e8c619cdefeac8ffca..b65a29440a0c96d54a457909f10ee2e5124f52a9 100644 (file)
@@ -125,8 +125,8 @@ static void __init kasan_pte_populate(pmd_t *pmdp, unsigned long addr,
                if (!early)
                        memset(__va(page_phys), KASAN_SHADOW_INIT, PAGE_SIZE);
                next = addr + PAGE_SIZE;
-               set_pte(ptep, pfn_pte(__phys_to_pfn(page_phys), PAGE_KERNEL));
-       } while (ptep++, addr = next, addr != end && pte_none(READ_ONCE(*ptep)));
+               __set_pte(ptep, pfn_pte(__phys_to_pfn(page_phys), PAGE_KERNEL));
+       } while (ptep++, addr = next, addr != end && pte_none(__ptep_get(ptep)));
 }
 
 static void __init kasan_pmd_populate(pud_t *pudp, unsigned long addr,
@@ -366,7 +366,7 @@ static void __init kasan_init_shadow(void)
         * so we should make sure that it maps the zero page read-only.
         */
        for (i = 0; i < PTRS_PER_PTE; i++)
-               set_pte(&kasan_early_shadow_pte[i],
+               __set_pte(&kasan_early_shadow_pte[i],
                        pfn_pte(sym_to_pfn(kasan_early_shadow_page),
                                PAGE_KERNEL_RO));
 
index bf5b1c426ad0dd8137968f9484d90ae2e757d9f3..495b732d5af36f208291b28e1f1f167231b01e2d 100644 (file)
@@ -179,16 +179,16 @@ static void init_pte(pmd_t *pmdp, unsigned long addr, unsigned long end,
 
        ptep = pte_set_fixmap_offset(pmdp, addr);
        do {
-               pte_t old_pte = READ_ONCE(*ptep);
+               pte_t old_pte = __ptep_get(ptep);
 
-               set_pte(ptep, pfn_pte(__phys_to_pfn(phys), prot));
+               __set_pte(ptep, pfn_pte(__phys_to_pfn(phys), prot));
 
                /*
                 * After the PTE entry has been populated once, we
                 * only allow updates to the permission attributes.
                 */
                BUG_ON(!pgattr_change_is_safe(pte_val(old_pte),
-                                             READ_ONCE(pte_val(*ptep))));
+                                             pte_val(__ptep_get(ptep))));
 
                phys += PAGE_SIZE;
        } while (ptep++, addr += PAGE_SIZE, addr != end);
@@ -682,8 +682,6 @@ void mark_rodata_ro(void)
        WRITE_ONCE(rodata_is_rw, false);
        update_mapping_prot(__pa_symbol(__start_rodata), (unsigned long)__start_rodata,
                            section_size, PAGE_KERNEL_RO);
-
-       debug_checkwx();
 }
 
 static void __init declare_vma(struct vm_struct *vma,
@@ -846,12 +844,12 @@ static void unmap_hotplug_pte_range(pmd_t *pmdp, unsigned long addr,
 
        do {
                ptep = pte_offset_kernel(pmdp, addr);
-               pte = READ_ONCE(*ptep);
+               pte = __ptep_get(ptep);
                if (pte_none(pte))
                        continue;
 
                WARN_ON(!pte_present(pte));
-               pte_clear(&init_mm, addr, ptep);
+               __pte_clear(&init_mm, addr, ptep);
                flush_tlb_kernel_range(addr, addr + PAGE_SIZE);
                if (free_mapped)
                        free_hotplug_page_range(pte_page(pte),
@@ -979,7 +977,7 @@ static void free_empty_pte_table(pmd_t *pmdp, unsigned long addr,
 
        do {
                ptep = pte_offset_kernel(pmdp, addr);
-               pte = READ_ONCE(*ptep);
+               pte = __ptep_get(ptep);
 
                /*
                 * This is just a sanity check here which verifies that
@@ -998,7 +996,7 @@ static void free_empty_pte_table(pmd_t *pmdp, unsigned long addr,
         */
        ptep = pte_offset_kernel(pmdp, 0UL);
        for (i = 0; i < PTRS_PER_PTE; i++) {
-               if (!pte_none(READ_ONCE(ptep[i])))
+               if (!pte_none(__ptep_get(&ptep[i])))
                        return;
        }
 
@@ -1494,7 +1492,7 @@ pte_t ptep_modify_prot_start(struct vm_area_struct *vma, unsigned long addr, pte
                 * when the permission changes from executable to non-executable
                 * in cases where cpu is affected with errata #2645198.
                 */
-               if (pte_user_exec(READ_ONCE(*ptep)))
+               if (pte_user_exec(ptep_get(ptep)))
                        return ptep_clear_flush(vma, addr, ptep);
        }
        return ptep_get_and_clear(vma->vm_mm, addr, ptep);
index 924843f1f661bfe1ff5c6b8f9eff753872416040..0c4e3ecf989d434ae96b6620e72448c57e9eeb67 100644 (file)
@@ -36,12 +36,12 @@ bool can_set_direct_map(void)
 static int change_page_range(pte_t *ptep, unsigned long addr, void *data)
 {
        struct page_change_data *cdata = data;
-       pte_t pte = READ_ONCE(*ptep);
+       pte_t pte = __ptep_get(ptep);
 
        pte = clear_pte_bit(pte, cdata->clear_mask);
        pte = set_pte_bit(pte, cdata->set_mask);
 
-       set_pte(ptep, pte);
+       __set_pte(ptep, pte);
        return 0;
 }
 
@@ -245,5 +245,5 @@ bool kernel_page_present(struct page *page)
                return true;
 
        ptep = pte_offset_kernel(pmdp, addr);
-       return pte_valid(READ_ONCE(*ptep));
+       return pte_valid(__ptep_get(ptep));
 }
index 5b87f8d623f7a0a22a53687d00136a5a5612bcb6..6986827e0d64519bdddcb706bb36f7e0baf269ab 100644 (file)
@@ -322,7 +322,7 @@ static struct ptdump_info kernel_ptdump_info __ro_after_init = {
        .mm             = &init_mm,
 };
 
-void ptdump_check_wx(void)
+bool ptdump_check_wx(void)
 {
        struct pg_state st = {
                .seq = NULL,
@@ -343,11 +343,16 @@ void ptdump_check_wx(void)
 
        ptdump_walk_pgd(&st.ptdump, &init_mm, NULL);
 
-       if (st.wx_pages || st.uxn_pages)
+       if (st.wx_pages || st.uxn_pages) {
                pr_warn("Checked W+X mappings: FAILED, %lu W+X pages found, %lu non-UXN pages found\n",
                        st.wx_pages, st.uxn_pages);
-       else
+
+               return false;
+       } else {
                pr_info("Checked W+X mappings: passed, no W+X pages found\n");
+
+               return true;
+       }
 }
 
 static int __init ptdump_init(void)
index 7b14df3c64776f4c61a81aa85af317344e741df2..5139a28130c0888555395d574c2b1373e840f014 100644 (file)
@@ -33,7 +33,7 @@ static void *trans_alloc(struct trans_pgd_info *info)
 
 static void _copy_pte(pte_t *dst_ptep, pte_t *src_ptep, unsigned long addr)
 {
-       pte_t pte = READ_ONCE(*src_ptep);
+       pte_t pte = __ptep_get(src_ptep);
 
        if (pte_valid(pte)) {
                /*
@@ -41,7 +41,7 @@ static void _copy_pte(pte_t *dst_ptep, pte_t *src_ptep, unsigned long addr)
                 * read only (code, rodata). Clear the RDONLY bit from
                 * the temporary mappings we use during restore.
                 */
-               set_pte(dst_ptep, pte_mkwrite_novma(pte));
+               __set_pte(dst_ptep, pte_mkwrite_novma(pte));
        } else if ((debug_pagealloc_enabled() ||
                   is_kfence_address((void *)addr)) && !pte_none(pte)) {
                /*
@@ -55,7 +55,7 @@ static void _copy_pte(pte_t *dst_ptep, pte_t *src_ptep, unsigned long addr)
                 */
                BUG_ON(!pfn_valid(pte_pfn(pte)));
 
-               set_pte(dst_ptep, pte_mkpresent(pte_mkwrite_novma(pte)));
+               __set_pte(dst_ptep, pte_mkpresent(pte_mkwrite_novma(pte)));
        }
 }
 
index 46ae9252bc3f9e7a28a830293b7a9962636df7ff..62b2838a231adae55ca8493c2409ae7f4b70d82e 100644 (file)
@@ -36,6 +36,7 @@ HAS_GENERIC_AUTH_IMP_DEF
 HAS_GIC_CPUIF_SYSREGS
 HAS_GIC_PRIO_MASKING
 HAS_GIC_PRIO_RELAXED_SYNC
+HAS_HCR_NV1
 HAS_HCX
 HAS_LDAPR
 HAS_LPA2
index 3fc1650a329ebe5dacaec67270d09f621bfb4099..a4c1dd4741a470741a88ad37951c5b67e11928c1 100644 (file)
@@ -1399,6 +1399,7 @@ EndEnum
 UnsignedEnum   43:40   SPECRES
        0b0000  NI
        0b0001  IMP
+       0b0010  COSP_RCTX
 EndEnum
 UnsignedEnum   39:36   SB
        0b0000  NI
@@ -1525,7 +1526,12 @@ EndEnum
 EndSysreg
 
 Sysreg ID_AA64ISAR3_EL1        3       0       0       6       3
-Res0   63:12
+Res0   63:16
+UnsignedEnum   15:12   PACM
+       0b0000  NI
+       0b0001  TRIVIAL_IMP
+       0b0010  FULL_IMP
+EndEnum
 UnsignedEnum   11:8    TLBIW
        0b0000  NI
        0b0001  IMP
@@ -1824,6 +1830,43 @@ UnsignedEnum     3:0     TCRX
 EndEnum
 EndSysreg
 
+Sysreg ID_AA64MMFR4_EL1        3       0       0       7       4
+Res0   63:40
+UnsignedEnum   39:36   E3DSE
+       0b0000  NI
+       0b0001  IMP
+EndEnum
+Res0   35:28
+SignedEnum     27:24   E2H0
+       0b0000  IMP
+       0b1110  NI_NV1
+       0b1111  NI
+EndEnum
+UnsignedEnum   23:20   NV_frac
+       0b0000  NV_NV2
+       0b0001  NV2_ONLY
+EndEnum
+UnsignedEnum   19:16   FGWTE3
+       0b0000  NI
+       0b0001  IMP
+EndEnum
+UnsignedEnum   15:12   HACDBS
+       0b0000  NI
+       0b0001  IMP
+EndEnum
+UnsignedEnum   11:8    ASID2
+       0b0000  NI
+       0b0001  IMP
+EndEnum
+SignedEnum     7:4     EIESB
+       0b0000  NI
+       0b0001  ToEL3
+       0b0010  ToELx
+       0b1111  ANY
+EndEnum
+Res0   3:0
+EndSysreg
+
 Sysreg SCTLR_EL1       3       0       1       0       0
 Field  63      TIDCP
 Field  62      SPINTMASK
index 9c2723ab1c9415e9bab37e3b81b54b0a8936c76b..d3ac36751ad1f6ededf86a2d0553883898140989 100644 (file)
@@ -2,6 +2,7 @@
 config CSKY
        def_bool y
        select ARCH_32BIT_OFF_T
+       select ARCH_HAS_CPU_CACHE_ALIASING
        select ARCH_HAS_DMA_PREP_COHERENT
        select ARCH_HAS_GCOV_PROFILE_ALL
        select ARCH_HAS_SYNC_DMA_FOR_CPU
diff --git a/arch/csky/include/asm/cachetype.h b/arch/csky/include/asm/cachetype.h
new file mode 100644 (file)
index 0000000..98cbe3a
--- /dev/null
@@ -0,0 +1,9 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef __ASM_CSKY_CACHETYPE_H
+#define __ASM_CSKY_CACHETYPE_H
+
+#include <linux/types.h>
+
+#define cpu_dcache_is_aliasing()       true
+
+#endif
index b274784c2e261a0e51369de70e7874b81b87a536..c139d0d728029e200899c857c9cbb50e45c9d7c2 100644 (file)
@@ -133,7 +133,6 @@ config LOONGARCH
        select HAVE_KPROBES
        select HAVE_KPROBES_ON_FTRACE
        select HAVE_KRETPROBES
-       select HAVE_KVM
        select HAVE_MOD_ARCH_SPECIFIC
        select HAVE_NMI
        select HAVE_PCI
index 923d0bd382941acc5794d7622f7adfe1b2533422..109785922cf94e455b3945f18ec7b1f0c82b233d 100644 (file)
@@ -14,8 +14,6 @@
  * Some parts derived from the x86 version of this file.
  */
 
-#define __KVM_HAVE_READONLY_MEM
-
 #define KVM_COALESCED_MMIO_PAGE_OFFSET 1
 #define KVM_DIRTY_LOG_PAGE_OFFSET      64
 
index fd915ad69c09b8f0d0da3f2934b4018078015706..2b72eb326b44288a7ee4aba76e69c25bd75789d4 100644 (file)
@@ -260,7 +260,7 @@ static void __init arch_reserve_crashkernel(void)
        char *cmdline = boot_command_line;
        bool high = false;
 
-       if (!IS_ENABLED(CONFIG_KEXEC_CORE))
+       if (!IS_ENABLED(CONFIG_CRASH_RESERVE))
                return;
 
        ret = parse_crashkernel(cmdline, memblock_phys_mem_size(),
index 61f7e33b1f95731c3c1a207337e30a63ff028ad6..c4ef2b4d97974762cf99228affa2ec8da4b82622 100644 (file)
@@ -20,7 +20,6 @@ if VIRTUALIZATION
 config KVM
        tristate "Kernel-based Virtual Machine (KVM) support"
        depends on AS_HAS_LVZ_EXTENSION
-       depends on HAVE_KVM
        select HAVE_KVM_DIRTY_RING_ACQ_REL
        select HAVE_KVM_VCPU_ASYNC_IOCTL
        select KVM_COMMON
@@ -28,6 +27,7 @@ config KVM
        select KVM_GENERIC_HARDWARE_ENABLING
        select KVM_GENERIC_MMU_NOTIFIER
        select KVM_MMIO
+       select HAVE_KVM_READONLY_MEM
        select KVM_XFER_TO_GUEST_WORK
        help
          Support hosting virtualized guest machines using
index 50a6acd7ffe4c94b986c5f7a9802420f090a7d79..a556cff3574023f97d4d7c021100bcb067e06e11 100644 (file)
@@ -723,7 +723,7 @@ static int host_pfn_mapping_level(struct kvm *kvm, gfn_t gfn,
        /*
         * Read each entry once.  As above, a non-leaf entry can be promoted to
         * a huge page _during_ this walk.  Re-reading the entry could send the
-        * walk into the weeks, e.g. p*d_large() returns false (sees the old
+        * walk into the weeks, e.g. p*d_leaf() returns false (sees the old
         * value) and then p*d_offset() walks into the target huge page instead
         * of the old page table (sees the new value).
         */
index ba976509bfe819ec51fdaa08f2a1ba4a334755cd..3634431db18a4a4992ea9b7d1ed44545214ff3df 100644 (file)
@@ -213,12 +213,6 @@ SYM_FUNC_START(kvm_enter_guest)
        /* Save host GPRs */
        kvm_save_host_gpr a2
 
-       /* Save host CRMD, PRMD to stack */
-       csrrd   a3, LOONGARCH_CSR_CRMD
-       st.d    a3, a2, PT_CRMD
-       csrrd   a3, LOONGARCH_CSR_PRMD
-       st.d    a3, a2, PT_PRMD
-
        addi.d  a2, a1, KVM_VCPU_ARCH
        st.d    sp, a2, KVM_ARCH_HSP
        st.d    tp, a2, KVM_ARCH_HTP
index 111328f6087285a01ccf4077672cf4cc85266866..bcc6b6d063d914dbf820b43f2c1308803646b395 100644 (file)
@@ -23,24 +23,6 @@ static inline u64 tick_to_ns(struct kvm_vcpu *vcpu, u64 tick)
        return div_u64(tick * MNSEC_PER_SEC, vcpu->arch.timer_mhz);
 }
 
-/*
- * Push timer forward on timeout.
- * Handle an hrtimer event by push the hrtimer forward a period.
- */
-static enum hrtimer_restart kvm_count_timeout(struct kvm_vcpu *vcpu)
-{
-       unsigned long cfg, period;
-
-       /* Add periodic tick to current expire time */
-       cfg = kvm_read_sw_gcsr(vcpu->arch.csr, LOONGARCH_CSR_TCFG);
-       if (cfg & CSR_TCFG_PERIOD) {
-               period = tick_to_ns(vcpu, cfg & CSR_TCFG_VAL);
-               hrtimer_add_expires_ns(&vcpu->arch.swtimer, period);
-               return HRTIMER_RESTART;
-       } else
-               return HRTIMER_NORESTART;
-}
-
 /* Low level hrtimer wake routine */
 enum hrtimer_restart kvm_swtimer_wakeup(struct hrtimer *timer)
 {
@@ -50,7 +32,7 @@ enum hrtimer_restart kvm_swtimer_wakeup(struct hrtimer *timer)
        kvm_queue_irq(vcpu, INT_TI);
        rcuwait_wake_up(&vcpu->wait);
 
-       return kvm_count_timeout(vcpu);
+       return HRTIMER_NORESTART;
 }
 
 /*
@@ -93,7 +75,8 @@ void kvm_restore_timer(struct kvm_vcpu *vcpu)
        /*
         * Freeze the soft-timer and sync the guest stable timer with it.
         */
-       hrtimer_cancel(&vcpu->arch.swtimer);
+       if (kvm_vcpu_is_blocking(vcpu))
+               hrtimer_cancel(&vcpu->arch.swtimer);
 
        /*
         * From LoongArch Reference Manual Volume 1 Chapter 7.6.2
@@ -168,26 +151,20 @@ static void _kvm_save_timer(struct kvm_vcpu *vcpu)
         * Here judge one-shot timer fired by checking whether TVAL is larger
         * than TCFG
         */
-       if (ticks < cfg) {
+       if (ticks < cfg)
                delta = tick_to_ns(vcpu, ticks);
-               expire = ktime_add_ns(ktime_get(), delta);
-               vcpu->arch.expire = expire;
+       else
+               delta = 0;
+
+       expire = ktime_add_ns(ktime_get(), delta);
+       vcpu->arch.expire = expire;
+       if (kvm_vcpu_is_blocking(vcpu)) {
 
                /*
                 * HRTIMER_MODE_PINNED is suggested since vcpu may run in
                 * the same physical cpu in next time
                 */
                hrtimer_start(&vcpu->arch.swtimer, expire, HRTIMER_MODE_ABS_PINNED);
-       } else if (vcpu->stat.generic.blocking) {
-               /*
-                * Inject timer interrupt so that halt polling can dectect and exit.
-                * VCPU is scheduled out already and sleeps in rcuwait queue and
-                * will not poll pending events again. kvm_queue_irq() is not enough,
-                * hrtimer swtimer should be used here.
-                */
-               expire = ktime_add_ns(ktime_get(), 10);
-               vcpu->arch.expire = expire;
-               hrtimer_start(&vcpu->arch.swtimer, expire, HRTIMER_MODE_ABS_PINNED);
        }
 }
 
index 36106922b5d75b7f7de70df5df0d72a697440f0f..3a8779065f73b45425f69b2cf204545dd2e0c908 100644 (file)
@@ -304,11 +304,18 @@ static int _kvm_get_cpucfg_mask(int id, u64 *v)
                return -EINVAL;
 
        switch (id) {
-       case 2:
+       case LOONGARCH_CPUCFG0:
+               *v = GENMASK(31, 0);
+               return 0;
+       case LOONGARCH_CPUCFG1:
+               /* CPUCFG1_MSGINT is not supported by KVM */
+               *v = GENMASK(25, 0);
+               return 0;
+       case LOONGARCH_CPUCFG2:
                /* CPUCFG2 features unconditionally supported by KVM */
                *v = CPUCFG2_FP     | CPUCFG2_FPSP  | CPUCFG2_FPDP     |
                     CPUCFG2_FPVERS | CPUCFG2_LLFTP | CPUCFG2_LLFTPREV |
-                    CPUCFG2_LAM;
+                    CPUCFG2_LSPW | CPUCFG2_LAM;
                /*
                 * For the ISA extensions listed below, if one is supported
                 * by the host, then it is also supported by KVM.
@@ -318,14 +325,26 @@ static int _kvm_get_cpucfg_mask(int id, u64 *v)
                if (cpu_has_lasx)
                        *v |= CPUCFG2_LASX;
 
+               return 0;
+       case LOONGARCH_CPUCFG3:
+               *v = GENMASK(16, 0);
+               return 0;
+       case LOONGARCH_CPUCFG4:
+       case LOONGARCH_CPUCFG5:
+               *v = GENMASK(31, 0);
+               return 0;
+       case LOONGARCH_CPUCFG16:
+               *v = GENMASK(16, 0);
+               return 0;
+       case LOONGARCH_CPUCFG17 ... LOONGARCH_CPUCFG20:
+               *v = GENMASK(30, 0);
                return 0;
        default:
                /*
-                * No restrictions on other valid CPUCFG IDs' values, but
-                * CPUCFG data is limited to 32 bits as the LoongArch ISA
-                * manual says (Volume 1, Section 2.2.10.5 "CPUCFG").
+                * CPUCFG bits should be zero if reserved by HW or not
+                * supported by KVM.
                 */
-               *v = U32_MAX;
+               *v = 0;
                return 0;
        }
 }
@@ -344,7 +363,7 @@ static int kvm_check_cpucfg(int id, u64 val)
                return -EINVAL;
 
        switch (id) {
-       case 2:
+       case LOONGARCH_CPUCFG2:
                if (!(val & CPUCFG2_LLFTP))
                        /* Guests must have a constant timer */
                        return -EINVAL;
index 7b709453d5e7eb054a897fcfabc4e80a1d594600..6ffa29585194560a2221167ae6a074c14c9875c7 100644 (file)
@@ -3,6 +3,7 @@ config M68K
        bool
        default y
        select ARCH_32BIT_OFF_T
+       select ARCH_HAS_CPU_CACHE_ALIASING
        select ARCH_HAS_BINFMT_FLAT
        select ARCH_HAS_CPU_FINALIZE_INIT if MMU
        select ARCH_HAS_CURRENT_STACK_POINTER
diff --git a/arch/m68k/include/asm/cachetype.h b/arch/m68k/include/asm/cachetype.h
new file mode 100644 (file)
index 0000000..7fad5d9
--- /dev/null
@@ -0,0 +1,9 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef __ASM_M68K_CACHETYPE_H
+#define __ASM_M68K_CACHETYPE_H
+
+#include <linux/types.h>
+
+#define cpu_dcache_is_aliasing()       true
+
+#endif
index e2d623621a00ee69d5517cae5d4242c2640ace08..e901bf554483bd66f89139fd63d2e27eb693010a 100644 (file)
@@ -11,6 +11,7 @@ obj- := $(platform-y)
 # mips object files
 # The object files are linked as core-y files would be linked
 
+obj-y += generic/
 obj-y += kernel/
 obj-y += mm/
 obj-y += net/
index a2311c4bce6a6ac17044b6a1d3ea29782abf5395..5c145b67d3bf435df52852ff68756033286ed607 100644 (file)
@@ -17,6 +17,7 @@ platform-$(CONFIG_MACH_LOONGSON2EF)   += loongson2ef/
 platform-$(CONFIG_MACH_LOONGSON32)     += loongson32/
 platform-$(CONFIG_MACH_LOONGSON64)     += loongson64/
 platform-$(CONFIG_MIPS_MALTA)          += mti-malta/
+platform-$(CONFIG_MACH_EYEQ5)          += mobileye/
 platform-$(CONFIG_MACH_NINTENDO64)     += n64/
 platform-$(CONFIG_PIC32MZDA)           += pic32/
 platform-$(CONFIG_RALINK)              += ralink/
index 6f251746777d21e731d6e2816cf2bbb2b3a850f8..06ef440d16ce71bef0092d64e47526690735197c 100644 (file)
@@ -4,6 +4,7 @@ config MIPS
        default y
        select ARCH_32BIT_OFF_T if !64BIT
        select ARCH_BINFMT_ELF_STATE if MIPS_FP_SUPPORT
+       select ARCH_HAS_CPU_CACHE_ALIASING
        select ARCH_HAS_CPU_FINALIZE_INIT
        select ARCH_HAS_CURRENT_STACK_POINTER if !CC_IS_CLANG || CLANG_VERSION >= 140000
        select ARCH_HAS_DEBUG_VIRTUAL if !64BIT
@@ -112,6 +113,9 @@ config MIPS_FIXUP_BIGPHYS_ADDR
 config MIPS_GENERIC
        bool
 
+config MACH_GENERIC_CORE
+       bool
+
 config MACH_INGENIC
        bool
        select SYS_SUPPORTS_32BIT_KERNEL
@@ -148,6 +152,7 @@ config MIPS_GENERIC_KERNEL
        select DMA_NONCOHERENT
        select HAVE_PCI
        select IRQ_MIPS_CPU
+       select MACH_GENERIC_CORE
        select MIPS_AUTO_PFN_OFFSET
        select MIPS_CPU_SCACHE
        select MIPS_GIC
@@ -416,6 +421,7 @@ config MACH_INGENIC_SOC
        bool "Ingenic SoC based machines"
        select MIPS_GENERIC
        select MACH_INGENIC
+       select MACH_GENERIC_CORE
        select SYS_SUPPORTS_ZBOOT_UART16550
        select CPU_SUPPORTS_CPUFREQ
        select MIPS_EXTERNAL_TIMER
@@ -569,6 +575,59 @@ config MACH_PIC32
          Microchip PIC32 is a family of general-purpose 32 bit MIPS core
          microcontrollers.
 
+config MACH_EYEQ5
+       bool "Mobileye EyeQ5 SoC"
+       select MACH_GENERIC_CORE
+       select ARM_AMBA
+       select PHYSICAL_START_BOOL
+       select ARCH_SPARSEMEM_DEFAULT if 64BIT
+       select BOOT_RAW
+       select BUILTIN_DTB
+       select CEVT_R4K
+       select CLKSRC_MIPS_GIC
+       select COMMON_CLK
+       select CPU_MIPSR2_IRQ_EI
+       select CPU_MIPSR2_IRQ_VI
+       select CSRC_R4K
+       select DMA_NONCOHERENT
+       select HAVE_PCI
+       select IRQ_MIPS_CPU
+       select MIPS_AUTO_PFN_OFFSET
+       select MIPS_CPU_SCACHE
+       select MIPS_GIC
+       select MIPS_L1_CACHE_SHIFT_7
+       select PCI_DRIVERS_GENERIC
+       select SMP_UP if SMP
+       select SWAP_IO_SPACE
+       select SYS_HAS_CPU_MIPS64_R6
+       select SYS_SUPPORTS_64BIT_KERNEL
+       select SYS_SUPPORTS_HIGHMEM
+       select SYS_SUPPORTS_LITTLE_ENDIAN
+       select SYS_SUPPORTS_MIPS_CPS
+       select SYS_SUPPORTS_RELOCATABLE
+       select SYS_SUPPORTS_ZBOOT
+       select UHI_BOOT
+       select USB_EHCI_BIG_ENDIAN_DESC if CPU_BIG_ENDIAN
+       select USB_EHCI_BIG_ENDIAN_MMIO if CPU_BIG_ENDIAN
+       select USB_OHCI_BIG_ENDIAN_DESC if CPU_BIG_ENDIAN
+       select USB_OHCI_BIG_ENDIAN_MMIO if CPU_BIG_ENDIAN
+       select USB_UHCI_BIG_ENDIAN_DESC if CPU_BIG_ENDIAN
+       select USB_UHCI_BIG_ENDIAN_MMIO if CPU_BIG_ENDIAN
+       select USE_OF
+       help
+         Select this to build a kernel supporting EyeQ5 SoC from Mobileye.
+
+       bool
+
+config FIT_IMAGE_FDT_EPM5
+       bool "Include FDT for Mobileye EyeQ5 development platforms"
+       depends on MACH_EYEQ5
+       default n
+       help
+         Enable this to include the FDT for the EyeQ5 development platforms
+         from Mobileye in the FIT kernel image.
+         This requires u-boot on the platform.
+
 config MACH_NINTENDO64
        bool "Nintendo 64 console"
        select CEVT_R4K
@@ -602,6 +661,7 @@ config RALINK
 config MACH_REALTEK_RTL
        bool "Realtek RTL838x/RTL839x based machines"
        select MIPS_GENERIC
+       select MACH_GENERIC_CORE
        select DMA_NONCOHERENT
        select IRQ_MIPS_CPU
        select CSRC_R4K
@@ -1253,6 +1313,7 @@ config CPU_LOONGSON64
        select CPU_SUPPORTS_HIGHMEM
        select CPU_SUPPORTS_HUGEPAGES
        select CPU_SUPPORTS_MSA
+       select CPU_SUPPORTS_VZ
        select CPU_DIEI_BROKEN if !LOONGSON3_ENHANCEMENT
        select CPU_MIPSR2_IRQ_VI
        select DMA_NONCOHERENT
@@ -1264,7 +1325,6 @@ config CPU_LOONGSON64
        select MIPS_FP_SUPPORT
        select GPIOLIB
        select SWIOTLB
-       select HAVE_KVM
        help
          The Loongson GSx64(GS264/GS464/GS464E/GS464V) series of processor
          cores implements the MIPS64R2 instruction set with many extensions,
@@ -1272,44 +1332,6 @@ config CPU_LOONGSON64
          3B1000, 3B1500, 3A2000, 3A3000 and 3A4000) processors. However, old
          Loongson-2E/2F is not covered here and will be removed in future.
 
-config LOONGSON3_ENHANCEMENT
-       bool "New Loongson-3 CPU Enhancements"
-       default n
-       depends on CPU_LOONGSON64
-       help
-         New Loongson-3 cores (since Loongson-3A R2, as opposed to Loongson-3A
-         R1, Loongson-3B R1 and Loongson-3B R2) has many enhancements, such as
-         FTLB, L1-VCache, EI/DI/Wait/Prefetch instruction, DSP/DSPr2 ASE, User
-         Local register, Read-Inhibit/Execute-Inhibit, SFB (Store Fill Buffer),
-         Fast TLB refill support, etc.
-
-         This option enable those enhancements which are not probed at run
-         time. If you want a generic kernel to run on all Loongson 3 machines,
-         please say 'N' here. If you want a high-performance kernel to run on
-         new Loongson-3 machines only, please say 'Y' here.
-
-config CPU_LOONGSON3_WORKAROUNDS
-       bool "Loongson-3 LLSC Workarounds"
-       default y if SMP
-       depends on CPU_LOONGSON64
-       help
-         Loongson-3 processors have the llsc issues which require workarounds.
-         Without workarounds the system may hang unexpectedly.
-
-         Say Y, unless you know what you are doing.
-
-config CPU_LOONGSON3_CPUCFG_EMULATION
-       bool "Emulate the CPUCFG instruction on older Loongson cores"
-       default y
-       depends on CPU_LOONGSON64
-       help
-         Loongson-3A R4 and newer have the CPUCFG instruction available for
-         userland to query CPU capabilities, much like CPUID on x86. This
-         option provides emulation of the instruction on older Loongson
-         cores, back to Loongson-3A1000.
-
-         If unsure, please say Y.
-
 config CPU_LOONGSON2E
        bool "Loongson 2E"
        depends on SYS_HAS_CPU_LOONGSON2E
@@ -1377,7 +1399,6 @@ config CPU_MIPS32_R2
        select CPU_SUPPORTS_32BIT_KERNEL
        select CPU_SUPPORTS_HIGHMEM
        select CPU_SUPPORTS_MSA
-       select HAVE_KVM
        help
          Choose this option to build a kernel for release 2 or later of the
          MIPS32 architecture.  Most modern embedded systems with a 32-bit
@@ -1392,7 +1413,7 @@ config CPU_MIPS32_R5
        select CPU_SUPPORTS_32BIT_KERNEL
        select CPU_SUPPORTS_HIGHMEM
        select CPU_SUPPORTS_MSA
-       select HAVE_KVM
+       select CPU_SUPPORTS_VZ
        select MIPS_O32_FP64_SUPPORT
        help
          Choose this option to build a kernel for release 5 or later of the
@@ -1408,7 +1429,7 @@ config CPU_MIPS32_R6
        select CPU_SUPPORTS_32BIT_KERNEL
        select CPU_SUPPORTS_HIGHMEM
        select CPU_SUPPORTS_MSA
-       select HAVE_KVM
+       select CPU_SUPPORTS_VZ
        select MIPS_O32_FP64_SUPPORT
        help
          Choose this option to build a kernel for release 6 or later of the
@@ -1444,7 +1465,6 @@ config CPU_MIPS64_R2
        select CPU_SUPPORTS_HIGHMEM
        select CPU_SUPPORTS_HUGEPAGES
        select CPU_SUPPORTS_MSA
-       select HAVE_KVM
        help
          Choose this option to build a kernel for release 2 or later of the
          MIPS64 architecture.  Many modern embedded systems with a 64-bit
@@ -1462,7 +1482,7 @@ config CPU_MIPS64_R5
        select CPU_SUPPORTS_HUGEPAGES
        select CPU_SUPPORTS_MSA
        select MIPS_O32_FP64_SUPPORT if 32BIT || MIPS32_O32
-       select HAVE_KVM
+       select CPU_SUPPORTS_VZ
        help
          Choose this option to build a kernel for release 5 or later of the
          MIPS64 architecture.  This is a intermediate MIPS architecture
@@ -1480,7 +1500,7 @@ config CPU_MIPS64_R6
        select CPU_SUPPORTS_HUGEPAGES
        select CPU_SUPPORTS_MSA
        select MIPS_O32_FP64_SUPPORT if 32BIT || MIPS32_O32
-       select HAVE_KVM
+       select CPU_SUPPORTS_VZ
        help
          Choose this option to build a kernel for release 6 or later of the
          MIPS64 architecture.  New MIPS processors, starting with the Warrior
@@ -1495,9 +1515,9 @@ config CPU_P5600
        select CPU_SUPPORTS_HIGHMEM
        select CPU_SUPPORTS_MSA
        select CPU_SUPPORTS_CPUFREQ
+       select CPU_SUPPORTS_VZ
        select CPU_MIPSR2_IRQ_VI
        select CPU_MIPSR2_IRQ_EI
-       select HAVE_KVM
        select MIPS_O32_FP64_SUPPORT
        help
          Choose this option to build a kernel for MIPS Warrior P5600 CPU.
@@ -1619,7 +1639,7 @@ config CPU_CAVIUM_OCTEON
        select USB_EHCI_BIG_ENDIAN_MMIO if CPU_BIG_ENDIAN
        select USB_OHCI_BIG_ENDIAN_MMIO if CPU_BIG_ENDIAN
        select MIPS_L1_CACHE_SHIFT_7
-       select HAVE_KVM
+       select CPU_SUPPORTS_VZ
        help
          The Cavium Octeon processor is a highly integrated chip containing
          many ethernet hardware widgets for networking tasks. The processor
@@ -1649,6 +1669,44 @@ config CPU_BMIPS
 
 endchoice
 
+config LOONGSON3_ENHANCEMENT
+       bool "New Loongson-3 CPU Enhancements"
+       default n
+       depends on CPU_LOONGSON64
+       help
+         New Loongson-3 cores (since Loongson-3A R2, as opposed to Loongson-3A
+         R1, Loongson-3B R1 and Loongson-3B R2) has many enhancements, such as
+         FTLB, L1-VCache, EI/DI/Wait/Prefetch instruction, DSP/DSPr2 ASE, User
+         Local register, Read-Inhibit/Execute-Inhibit, SFB (Store Fill Buffer),
+         Fast TLB refill support, etc.
+
+         This option enable those enhancements which are not probed at run
+         time. If you want a generic kernel to run on all Loongson 3 machines,
+         please say 'N' here. If you want a high-performance kernel to run on
+         new Loongson-3 machines only, please say 'Y' here.
+
+config CPU_LOONGSON3_WORKAROUNDS
+       bool "Loongson-3 LLSC Workarounds"
+       default y if SMP
+       depends on CPU_LOONGSON64
+       help
+         Loongson-3 processors have the llsc issues which require workarounds.
+         Without workarounds the system may hang unexpectedly.
+
+         Say Y, unless you know what you are doing.
+
+config CPU_LOONGSON3_CPUCFG_EMULATION
+       bool "Emulate the CPUCFG instruction on older Loongson cores"
+       default y
+       depends on CPU_LOONGSON64
+       help
+         Loongson-3A R4 and newer have the CPUCFG instruction available for
+         userland to query CPU capabilities, much like CPUID on x86. This
+         option provides emulation of the instruction on older Loongson
+         cores, back to Loongson-3A1000.
+
+         If unsure, please say Y.
+
 config CPU_MIPS32_3_5_FEATURES
        bool "MIPS32 Release 3.5 Features"
        depends on SYS_HAS_CPU_MIPS32_R3_5
@@ -1974,6 +2032,8 @@ config CPU_SUPPORTS_ADDRWINCFG
 config CPU_SUPPORTS_HUGEPAGES
        bool
        depends on !(32BIT && (PHYS_ADDR_T_64BIT || EVA))
+config CPU_SUPPORTS_VZ
+       bool
 config MIPS_PGD_C0_CONTEXT
        bool
        depends on 64BIT
@@ -2123,7 +2183,8 @@ config CPU_R4K_CACHE_TLB
 config MIPS_MT_SMP
        bool "MIPS MT SMP support (1 TC on each available VPE)"
        default y
-       depends on SYS_SUPPORTS_MULTITHREADING && !CPU_MIPSR6 && !CPU_MICROMIPS
+       depends on TARGET_ISA_REV > 0 && TARGET_ISA_REV < 6
+       depends on SYS_SUPPORTS_MULTITHREADING && !CPU_MICROMIPS
        select CPU_MIPSR2_IRQ_VI
        select CPU_MIPSR2_IRQ_EI
        select SYNC_R4K
index f49807e1f19bc56ac2fd536aef4f4041d3bcad7a..80aecba2489223e31cfd78fd85c9bc924ed82a13 100644 (file)
@@ -148,10 +148,10 @@ cflags-y += $(call cc-option,-Wa$(comma)-mno-fix-loongson3-llsc,)
 #
 # CPU-dependent compiler/assembler options for optimization.
 #
-cflags-$(CONFIG_CPU_R3000)     += -march=r3000
-cflags-$(CONFIG_CPU_R4300)     += -march=r4300 -Wa,--trap
-cflags-$(CONFIG_CPU_R4X00)     += -march=r4600 -Wa,--trap
-cflags-$(CONFIG_CPU_TX49XX)    += -march=r4600 -Wa,--trap
+cflags-$(CONFIG_CPU_R3000)     += $(call cc-option,-march=r3000,-march=mips1)
+cflags-$(CONFIG_CPU_R4300)     += $(call cc-option,-march=r4300,-march=mips3) -Wa,--trap
+cflags-$(CONFIG_CPU_R4X00)     += $(call cc-option,-march=r4600,-march=mips3) -Wa,--trap
+cflags-$(CONFIG_CPU_TX49XX)    += $(call cc-option,-march=r4600,-march=mips3) -Wa,--trap
 cflags-$(CONFIG_CPU_MIPS32_R1) += -march=mips32 -Wa,--trap
 cflags-$(CONFIG_CPU_MIPS32_R2) += -march=mips32r2 -Wa,--trap
 cflags-$(CONFIG_CPU_MIPS32_R5) += -march=mips32r5 -Wa,--trap -modd-spreg
@@ -160,37 +160,35 @@ cflags-$(CONFIG_CPU_MIPS64_R1)    += -march=mips64 -Wa,--trap
 cflags-$(CONFIG_CPU_MIPS64_R2) += -march=mips64r2 -Wa,--trap
 cflags-$(CONFIG_CPU_MIPS64_R5) += -march=mips64r5 -Wa,--trap
 cflags-$(CONFIG_CPU_MIPS64_R6) += -march=mips64r6 -Wa,--trap
-cflags-$(CONFIG_CPU_P5600)     += -march=p5600 -Wa,--trap -modd-spreg
-cflags-$(CONFIG_CPU_R5000)     += -march=r5000 -Wa,--trap
-cflags-$(CONFIG_CPU_R5500)     += $(call cc-option,-march=r5500,-march=r5000) \
+cflags-$(CONFIG_CPU_P5600)     += $(call cc-option,-march=p5600,-march=mips32r5) \
+                       -Wa,--trap -modd-spreg
+cflags-$(CONFIG_CPU_R5000)     += $(call cc-option,-march=r5000,-march=mips4) \
                        -Wa,--trap
-cflags-$(CONFIG_CPU_NEVADA)    += $(call cc-option,-march=rm5200,-march=r5000) \
+cflags-$(CONFIG_CPU_R5500)     += $(call cc-option,-march=r5500,-march=mips4) \
                        -Wa,--trap
-cflags-$(CONFIG_CPU_RM7000)    += $(call cc-option,-march=rm7000,-march=r5000) \
+cflags-$(CONFIG_CPU_NEVADA)    += $(call cc-option,-march=rm5200,-march=mips4) \
                        -Wa,--trap
-cflags-$(CONFIG_CPU_SB1)       += $(call cc-option,-march=sb1,-march=r5000) \
+cflags-$(CONFIG_CPU_RM7000)    += $(call cc-option,-march=rm7000,-march=mips4) \
+                       -Wa,--trap
+cflags-$(CONFIG_CPU_SB1)       += $(call cc-option,-march=sb1,-march=mips64r1) \
                        -Wa,--trap
 cflags-$(CONFIG_CPU_SB1)       += $(call cc-option,-mno-mdmx)
 cflags-$(CONFIG_CPU_SB1)       += $(call cc-option,-mno-mips3d)
-cflags-$(CONFIG_CPU_R10000)    += $(call cc-option,-march=r10000,-march=r8000) \
+cflags-$(CONFIG_CPU_R10000)    += $(call cc-option,-march=r10000,-march=mips4) \
                        -Wa,--trap
-cflags-$(CONFIG_CPU_CAVIUM_OCTEON) += $(call cc-option,-march=octeon) -Wa,--trap
-ifeq (,$(findstring march=octeon, $(cflags-$(CONFIG_CPU_CAVIUM_OCTEON))))
-cflags-$(CONFIG_CPU_CAVIUM_OCTEON) += -Wa,-march=octeon
-endif
+cflags-$(CONFIG_CPU_CAVIUM_OCTEON) += -march=octeon -Wa,--trap
 cflags-$(CONFIG_CAVIUM_CN63XXP1) += -Wa,-mfix-cn63xxp1
-cflags-$(CONFIG_CPU_BMIPS)     += -march=mips32 -Wa,-mips32 -Wa,--trap
+cflags-$(CONFIG_CPU_BMIPS)     += -march=mips32 -Wa,--trap
 
-cflags-$(CONFIG_CPU_LOONGSON2E) += -march=loongson2e -Wa,--trap
-cflags-$(CONFIG_CPU_LOONGSON2F) += -march=loongson2f -Wa,--trap
+cflags-$(CONFIG_CPU_LOONGSON2E) += \
+                       $(call cc-option,-march=loongson2e,-march=mips3) -Wa,--trap
+cflags-$(CONFIG_CPU_LOONGSON2F) += \
+                       $(call cc-option,-march=loongson2f,-march=mips3) -Wa,--trap
 # Some -march= flags enable MMI instructions, and GCC complains about that
 # support being enabled alongside -msoft-float. Thus explicitly disable MMI.
 cflags-$(CONFIG_CPU_LOONGSON2EF) += $(call cc-option,-mno-loongson-mmi)
-ifdef CONFIG_CPU_LOONGSON64
-cflags-$(CONFIG_CPU_LOONGSON64)        += -Wa,--trap
-cflags-$(CONFIG_CC_IS_GCC) += -march=loongson3a
-cflags-$(CONFIG_CC_IS_CLANG) += -march=mips64r2
-endif
+cflags-$(CONFIG_CPU_LOONGSON64)        += \
+                       $(call cc-option,-march=loongson3a,-march=mips64r2) -Wa,--trap
 cflags-$(CONFIG_CPU_LOONGSON64) += $(call cc-option,-mno-loongson-mmi)
 
 cflags-$(CONFIG_CPU_R4000_WORKAROUNDS) += $(call cc-option,-mfix-r4000,)
@@ -299,7 +297,7 @@ drivers-$(CONFIG_PCI)               += arch/mips/pci/
 ifdef CONFIG_64BIT
   ifndef KBUILD_SYM32
     ifeq ($(shell expr $(load-y) \< 0xffffffff80000000), 0)
-      KBUILD_SYM32 = y
+      KBUILD_SYM32 = $(call cc-option-yn, -msym32)
     endif
   endif
 
index c01be8c452719210e60098a2f1120816e2bf680c..6c8996e20a7ddc4ddaf924fc43a98c882c4110f7 100644 (file)
@@ -771,7 +771,7 @@ static int __init alchemy_clk_init_fgens(int ctype)
        }
        id.flags = CLK_SET_RATE_PARENT | CLK_GET_RATE_NOCACHE;
 
-       a = kzalloc((sizeof(*a)) * 6, GFP_KERNEL);
+       a = kcalloc(6, sizeof(*a), GFP_KERNEL);
        if (!a)
                return -ENOMEM;
 
index 09dcd2c561d9afed5688ebb953e4b8678869c2ff..db618e72a0c42a9da610a881eeb851c1b357c745 100644 (file)
@@ -8,6 +8,8 @@
 
 #include <asm/addrspace.h>
 
+#include "decompress.h"
+
 #if defined(CONFIG_MACH_LOONGSON64) || defined(CONFIG_MIPS_MALTA)
 #define UART_BASE 0x1fd003f8
 #define PORT(offset) (CKSEG1ADDR(UART_BASE) + (offset))
index 8ec63011e7dcf0030fe8e429d7bd8c10a5440df9..003967c084b35066634505a1d514004d862807e5 100644 (file)
@@ -1,6 +1,8 @@
 // SPDX-License-Identifier: GPL-2.0
 #include <asm/mach-au1x00/au1000.h>
 
+#include "decompress.h"
+
 void putc(char c)
 {
        alchemy_uart_putchar(AU1000_UART0_PHYS_ADDR, c);
index a8a0a32e05d1b3139f6b108556ae75d6694d21ec..5fa3b99453338e3222350484e4d92e7dd1acaa40 100644 (file)
@@ -1,6 +1,8 @@
 // SPDX-License-Identifier: GPL-2.0
 #include <asm/setup.h>
 
+#include "decompress.h"
+
 void putc(char c)
 {
        prom_putchar(c);
index 928f38a79dff985d8d1ae22f78e3aeb2e9a2ebef..efff87cb33a9e7d58c7cc1f99cc1a63103bd4c11 100644 (file)
@@ -8,6 +8,7 @@ subdir-$(CONFIG_LANTIQ)                 += lantiq
 subdir-$(CONFIG_MACH_LOONGSON64)       += loongson
 subdir-$(CONFIG_SOC_VCOREIII)          += mscc
 subdir-$(CONFIG_MIPS_MALTA)            += mti
+subdir-$(CONFIG_MACH_EYEQ5)            += mobileye
 subdir-$(CONFIG_LEGACY_BOARD_SEAD3)    += mti
 subdir-$(CONFIG_FIT_IMAGE_FDT_NI169445)        += ni
 subdir-$(CONFIG_MACH_PIC32)            += pic32
diff --git a/arch/mips/boot/dts/mobileye/Makefile b/arch/mips/boot/dts/mobileye/Makefile
new file mode 100644 (file)
index 0000000..01c01c3
--- /dev/null
@@ -0,0 +1,4 @@
+# SPDX-License-Identifier: GPL-2.0-only
+# Copyright 2023 Mobileye Vision Technologies Ltd.
+
+dtb-$(CONFIG_MACH_EYEQ5)               += eyeq5-epm5.dtb
diff --git a/arch/mips/boot/dts/mobileye/eyeq5-epm5.dts b/arch/mips/boot/dts/mobileye/eyeq5-epm5.dts
new file mode 100644 (file)
index 0000000..6898b2d
--- /dev/null
@@ -0,0 +1,23 @@
+// SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause)
+/*
+ * Copyright 2023 Mobileye Vision Technologies Ltd.
+ */
+
+/dts-v1/;
+
+#include "eyeq5.dtsi"
+
+/ {
+       compatible = "mobileye,eyeq5-epm5", "mobileye,eyeq5";
+       model = "Mobile EyeQ5 MP5 Evaluation board";
+
+       chosen {
+               stdout-path = "serial2:115200n8";
+       };
+
+       memory@0 {
+               device_type = "memory";
+               reg = <0x0 0x40000000 0x0 0x02000000>,
+                     <0x8 0x02000000 0x0 0x7E000000>;
+       };
+};
diff --git a/arch/mips/boot/dts/mobileye/eyeq5-fixed-clocks.dtsi b/arch/mips/boot/dts/mobileye/eyeq5-fixed-clocks.dtsi
new file mode 100644 (file)
index 0000000..78f5533
--- /dev/null
@@ -0,0 +1,292 @@
+// SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause)
+/*
+ * Copyright 2023 Mobileye Vision Technologies Ltd.
+ */
+
+/ {
+       /* Fixed clock */
+       pll_cpu: pll-cpu {
+               compatible = "fixed-clock";
+               #clock-cells = <0>;
+               clock-frequency = <1500000000>;
+       };
+
+       pll_vdi: pll-vdi {
+               compatible = "fixed-clock";
+               #clock-cells = <0>;
+               clock-frequency = <1280000000>;
+       };
+
+       pll_per: pll-per {
+               compatible = "fixed-clock";
+               #clock-cells = <0>;
+               clock-frequency = <2000000000>;
+       };
+
+       pll_ddr0: pll-ddr0 {
+               compatible = "fixed-clock";
+               #clock-cells = <0>;
+               clock-frequency = <1857210000>;
+       };
+
+       pll_ddr1: pll-ddr1 {
+               compatible = "fixed-clock";
+               #clock-cells = <0>;
+               clock-frequency = <1857210000>;
+       };
+
+/* PLL_CPU derivatives */
+       occ_cpu: occ-cpu {
+               compatible = "fixed-factor-clock";
+               clocks = <&pll_cpu>;
+               #clock-cells = <0>;
+               clock-div = <1>;
+               clock-mult = <1>;
+       };
+       si_css0_ref_clk: si-css0-ref-clk { /* gate ClkRstGen_si_css0_ref */
+               compatible = "fixed-factor-clock";
+               clocks = <&occ_cpu>;
+               #clock-cells = <0>;
+               clock-div = <1>;
+               clock-mult = <1>;
+       };
+       cpc_clk: cpc-clk {
+               compatible = "fixed-factor-clock";
+               clocks = <&si_css0_ref_clk>;
+               #clock-cells = <0>;
+               clock-div = <1>;
+               clock-mult = <1>;
+       };
+       core0_clk: core0-clk {
+               compatible = "fixed-factor-clock";
+               clocks = <&si_css0_ref_clk>;
+               #clock-cells = <0>;
+               clock-div = <1>;
+               clock-mult = <1>;
+       };
+       core1_clk: core1-clk {
+               compatible = "fixed-factor-clock";
+               clocks = <&si_css0_ref_clk>;
+               #clock-cells = <0>;
+               clock-div = <1>;
+               clock-mult = <1>;
+       };
+       core2_clk: core2-clk {
+               compatible = "fixed-factor-clock";
+               clocks = <&si_css0_ref_clk>;
+               #clock-cells = <0>;
+               clock-div = <1>;
+               clock-mult = <1>;
+       };
+       core3_clk: core3-clk {
+               compatible = "fixed-factor-clock";
+               clocks = <&si_css0_ref_clk>;
+               #clock-cells = <0>;
+               clock-div = <1>;
+               clock-mult = <1>;
+       };
+       cm_clk: cm-clk {
+               compatible = "fixed-factor-clock";
+               clocks = <&si_css0_ref_clk>;
+               #clock-cells = <0>;
+               clock-div = <1>;
+               clock-mult = <1>;
+       };
+       mem_clk: mem-clk {
+               compatible = "fixed-factor-clock";
+               clocks = <&si_css0_ref_clk>;
+               #clock-cells = <0>;
+               clock-div = <1>;
+               clock-mult = <1>;
+       };
+       occ_isram: occ-isram {
+               compatible = "fixed-factor-clock";
+               clocks = <&pll_cpu>;
+               #clock-cells = <0>;
+               clock-div = <2>;
+               clock-mult = <1>;
+       };
+       isram_clk: isram-clk { /* gate ClkRstGen_isram */
+               compatible = "fixed-factor-clock";
+               clocks = <&occ_isram>;
+               #clock-cells = <0>;
+               clock-div = <1>;
+               clock-mult = <1>;
+       };
+       occ_dbu: occ-dbu {
+               compatible = "fixed-factor-clock";
+               clocks = <&pll_cpu>;
+               #clock-cells = <0>;
+               clock-div = <10>;
+               clock-mult = <1>;
+       };
+       si_dbu_tp_pclk: si-dbu-tp-pclk { /* gate ClkRstGen_dbu */
+               compatible = "fixed-factor-clock";
+               clocks = <&occ_dbu>;
+               #clock-cells = <0>;
+               clock-div = <1>;
+               clock-mult = <1>;
+       };
+/* PLL_VDI derivatives */
+       occ_vdi: occ-vdi {
+               compatible = "fixed-factor-clock";
+               clocks = <&pll_vdi>;
+               #clock-cells = <0>;
+               clock-div = <2>;
+               clock-mult = <1>;
+       };
+       vdi_clk: vdi-clk { /* gate ClkRstGen_vdi */
+               compatible = "fixed-factor-clock";
+               clocks = <&occ_vdi>;
+               #clock-cells = <0>;
+               clock-div = <1>;
+               clock-mult = <1>;
+       };
+       occ_can_ser: occ-can-ser {
+               compatible = "fixed-factor-clock";
+               clocks = <&pll_vdi>;
+               #clock-cells = <0>;
+               clock-div = <16>;
+               clock-mult = <1>;
+       };
+       can_ser_clk: can-ser-clk { /* gate ClkRstGen_can_ser */
+               compatible = "fixed-factor-clock";
+               clocks = <&occ_can_ser>;
+               #clock-cells = <0>;
+               clock-div = <1>;
+               clock-mult = <1>;
+       };
+       i2c_ser_clk: i2c-ser-clk {
+               compatible = "fixed-factor-clock";
+               clocks = <&pll_vdi>;
+               #clock-cells = <0>;
+               clock-div = <20>;
+               clock-mult = <1>;
+       };
+/* PLL_PER derivatives */
+       occ_periph: occ-periph {
+               compatible = "fixed-factor-clock";
+               clocks = <&pll_per>;
+               #clock-cells = <0>;
+               clock-div = <16>;
+               clock-mult = <1>;
+       };
+       periph_clk: periph-clk {
+               compatible = "fixed-factor-clock";
+               clocks = <&occ_periph>;
+               #clock-cells = <0>;
+               clock-div = <1>;
+               clock-mult = <1>;
+       };
+       can_clk: can-clk {
+               compatible = "fixed-factor-clock";
+               clocks = <&occ_periph>;
+               #clock-cells = <0>;
+               clock-div = <1>;
+               clock-mult = <1>;
+       };
+       spi_clk: spi-clk {
+               compatible = "fixed-factor-clock";
+               clocks = <&occ_periph>;
+               #clock-cells = <0>;
+               clock-div = <1>;
+               clock-mult = <1>;
+       };
+       uart_clk: uart-clk {
+               compatible = "fixed-factor-clock";
+               clocks = <&occ_periph>;
+               #clock-cells = <0>;
+               clock-div = <1>;
+               clock-mult = <1>;
+       };
+       i2c_clk: i2c-clk {
+               compatible = "fixed-factor-clock";
+               clocks = <&occ_periph>;
+               #clock-cells = <0>;
+               clock-div = <1>;
+               clock-mult = <1>;
+               clock-output-names = "i2c_clk";
+       };
+       timer_clk: timer-clk {
+               compatible = "fixed-factor-clock";
+               clocks = <&occ_periph>;
+               #clock-cells = <0>;
+               clock-div = <1>;
+               clock-mult = <1>;
+               clock-output-names = "timer_clk";
+       };
+       gpio_clk: gpio-clk {
+               compatible = "fixed-factor-clock";
+               clocks = <&occ_periph>;
+               #clock-cells = <0>;
+               clock-div = <1>;
+               clock-mult = <1>;
+               clock-output-names = "gpio_clk";
+       };
+       emmc_sys_clk: emmc-sys-clk {
+               compatible = "fixed-factor-clock";
+               clocks = <&pll_per>;
+               #clock-cells = <0>;
+               clock-div = <10>;
+               clock-mult = <1>;
+               clock-output-names = "emmc_sys_clk";
+       };
+       ccf_ctrl_clk: ccf-ctrl-clk {
+               compatible = "fixed-factor-clock";
+               clocks = <&pll_per>;
+               #clock-cells = <0>;
+               clock-div = <4>;
+               clock-mult = <1>;
+               clock-output-names = "ccf_ctrl_clk";
+       };
+       occ_mjpeg_core: occ-mjpeg-core {
+               compatible = "fixed-factor-clock";
+               clocks = <&pll_per>;
+               #clock-cells = <0>;
+               clock-div = <2>;
+               clock-mult = <1>;
+               clock-output-names = "occ_mjpeg_core";
+       };
+       hsm_clk: hsm-clk { /* gate ClkRstGen_hsm */
+               compatible = "fixed-factor-clock";
+               clocks = <&occ_mjpeg_core>;
+               #clock-cells = <0>;
+               clock-div = <1>;
+               clock-mult = <1>;
+               clock-output-names = "hsm_clk";
+       };
+       mjpeg_core_clk: mjpeg-core-clk { /* gate ClkRstGen_mjpeg_gen */
+               compatible = "fixed-factor-clock";
+               clocks = <&occ_mjpeg_core>;
+               #clock-cells = <0>;
+               clock-div = <1>;
+               clock-mult = <1>;
+               clock-output-names = "mjpeg_core_clk";
+       };
+       fcmu_a_clk: fcmu-a-clk {
+               compatible = "fixed-factor-clock";
+               clocks = <&pll_per>;
+               #clock-cells = <0>;
+               clock-div = <20>;
+               clock-mult = <1>;
+               clock-output-names = "fcmu_a_clk";
+       };
+       occ_pci_sys: occ-pci-sys {
+               compatible = "fixed-factor-clock";
+               clocks = <&pll_per>;
+               #clock-cells = <0>;
+               clock-div = <8>;
+               clock-mult = <1>;
+               clock-output-names = "occ_pci_sys";
+       };
+       pclk: pclk {
+               compatible = "fixed-clock";
+               #clock-cells = <0>;
+               clock-frequency = <250000000>;  /* 250MHz */
+       };
+       tsu_clk: tsu-clk {
+               compatible = "fixed-clock";
+               #clock-cells = <0>;
+               clock-frequency = <125000000>;  /* 125MHz */
+       };
+};
diff --git a/arch/mips/boot/dts/mobileye/eyeq5.dtsi b/arch/mips/boot/dts/mobileye/eyeq5.dtsi
new file mode 100644 (file)
index 0000000..6cc5980
--- /dev/null
@@ -0,0 +1,124 @@
+// SPDX-License-Identifier: GPL-2.0-only OR BSD-2-Clause */
+/*
+* Copyright 2023 Mobileye Vision Technologies Ltd.
+*/
+
+#include <dt-bindings/interrupt-controller/mips-gic.h>
+
+#include "eyeq5-fixed-clocks.dtsi"
+
+/ {
+       #address-cells = <2>;
+       #size-cells = <2>;
+       cpus {
+               #address-cells = <1>;
+               #size-cells = <0>;
+               cpu@0 {
+                       device_type = "cpu";
+                       compatible = "img,i6500";
+                       reg = <0>;
+                       clocks = <&core0_clk>;
+               };
+       };
+
+       reserved-memory {
+               #address-cells = <2>;
+               #size-cells = <2>;
+               ranges;
+
+               /* These reserved memory regions are also defined in bootmanager
+               * for configuring inbound translation for BARS, don't change
+               * these without syncing with bootmanager
+               */
+               shmem0_reserved: shmem@804000000 {
+                       reg = <0x8 0x04000000 0x0 0x1000000>;
+               };
+               shmem1_reserved: shmem@805000000 {
+                       reg = <0x8 0x05000000 0x0 0x1000000>;
+               };
+               pci0_msi_reserved: pci0-msi@806000000 {
+                       reg = <0x8 0x06000000 0x0 0x100000>;
+               };
+               pci1_msi_reserved: pci1-msi@806100000 {
+                       reg = <0x8 0x06100000 0x0 0x100000>;
+               };
+
+               mini_coredump0_reserved: mini-coredump0@806200000 {
+                       reg = <0x8 0x06200000 0x0 0x100000>;
+               };
+               mhm_reserved_0: the-mhm-reserved-0@0 {
+                       reg = <0x8 0x00000000 0x0 0x0000800>;
+               };
+       };
+
+       aliases {
+               serial0 = &uart0;
+               serial1 = &uart1;
+               serial2 = &uart2;
+       };
+
+       cpu_intc: interrupt-controller {
+               compatible = "mti,cpu-interrupt-controller";
+               interrupt-controller;
+               #address-cells = <0>;
+               #interrupt-cells = <1>;
+       };
+
+       soc: soc {
+               #address-cells = <2>;
+               #size-cells = <2>;
+               ranges;
+               compatible = "simple-bus";
+
+               uart0: serial@800000 {
+                       compatible = "arm,pl011", "arm,primecell";
+                       reg = <0 0x800000 0x0 0x1000>;
+                       reg-io-width = <4>;
+                       interrupt-parent = <&gic>;
+                       interrupts = <GIC_SHARED 6 IRQ_TYPE_LEVEL_HIGH>;
+                       clocks  = <&uart_clk>, <&occ_periph>;
+                       clock-names = "uartclk", "apb_pclk";
+               };
+
+               uart1: serial@900000 {
+                       compatible = "arm,pl011", "arm,primecell";
+                       reg = <0 0x900000 0x0 0x1000>;
+                       reg-io-width = <4>;
+                       interrupt-parent = <&gic>;
+                       interrupts = <GIC_SHARED 6 IRQ_TYPE_LEVEL_HIGH>;
+                       clocks  = <&uart_clk>, <&occ_periph>;
+                       clock-names = "uartclk", "apb_pclk";
+               };
+
+               uart2: serial@a00000 {
+                       compatible = "arm,pl011", "arm,primecell";
+                       reg = <0 0xa00000 0x0 0x1000>;
+                       reg-io-width = <4>;
+                       interrupt-parent = <&gic>;
+                       interrupts = <GIC_SHARED 6 IRQ_TYPE_LEVEL_HIGH>;
+                       clocks  = <&uart_clk>, <&occ_periph>;
+                       clock-names = "uartclk", "apb_pclk";
+               };
+
+               gic: interrupt-controller@140000 {
+                       compatible = "mti,gic";
+                       reg = <0x0 0x140000 0x0 0x20000>;
+                       interrupt-controller;
+                       #interrupt-cells = <3>;
+
+                       /*
+                       * Declare the interrupt-parent even though the mti,gic
+                       * binding doesn't require it, such that the kernel can
+                       * figure out that cpu_intc is the root interrupt
+                       * controller & should be probed first.
+                       */
+                       interrupt-parent = <&cpu_intc>;
+
+                       timer {
+                               compatible = "mti,gic-timer";
+                               interrupts = <GIC_LOCAL 1 IRQ_TYPE_NONE>;
+                               clocks = <&core0_clk>;
+                       };
+               };
+       };
+};
index 35a10258f2357bba8f95834c8b84092c73871aaf..6e95e6f19a6a86da90c794a766ba08b359ce1537 100644 (file)
                        compatible = "ns16550a";
                        reg = <0xc00 0x100>;
 
+                       reg-io-width = <4>;
+                       reg-shift = <2>;
+
                        clocks = <&sysc MT7621_CLK_UART1>;
 
                        interrupt-parent = <&gic>;
                        interrupts = <GIC_SHARED 26 IRQ_TYPE_LEVEL_HIGH>;
 
+                       no-loopback-test;
+
+                       pinctrl-names = "default";
+                       pinctrl-0 = <&uart1_pins>;
+               };
+
+               serial1: serial@d00 {
+                       compatible = "ns16550a";
+                       reg = <0xd00 0x100>;
+
+                       reg-io-width = <4>;
                        reg-shift = <2>;
+
+                       clocks = <&sysc MT7621_CLK_UART2>;
+
+                       interrupt-parent = <&gic>;
+                       interrupts = <GIC_SHARED 27 IRQ_TYPE_LEVEL_HIGH>;
+
+                       no-loopback-test;
+
+                       pinctrl-names = "default";
+                       pinctrl-0 = <&uart2_pins>;
+
+                       status = "disabled";
+               };
+
+               serial2: serial@e00 {
+                       compatible = "ns16550a";
+                       reg = <0xe00 0x100>;
+
                        reg-io-width = <4>;
+                       reg-shift = <2>;
+
+                       clocks = <&sysc MT7621_CLK_UART3>;
+
+                       interrupt-parent = <&gic>;
+                       interrupts = <GIC_SHARED 28 IRQ_TYPE_LEVEL_HIGH>;
+
                        no-loopback-test;
+
+                       pinctrl-names = "default";
+                       pinctrl-0 = <&uart3_pins>;
+
+                       status = "disabled";
                };
 
                spi0: spi@b00 {
                       0x1e1d0700 0x0100>;
                reg-names = "mac", "ippc";
 
+               #address-cells = <1>;
+               #size-cells = <0>;
+
                clocks = <&sysc MT7621_CLK_XTAL>;
                clock-names = "sys_ck";
 
diff --git a/arch/mips/configs/eyeq5_defconfig b/arch/mips/configs/eyeq5_defconfig
new file mode 100644 (file)
index 0000000..c35c29a
--- /dev/null
@@ -0,0 +1,108 @@
+CONFIG_SYSVIPC=y
+CONFIG_NO_HZ_IDLE=y
+CONFIG_HIGH_RES_TIMERS=y
+CONFIG_BPF_SYSCALL=y
+CONFIG_TASKSTATS=y
+CONFIG_IKCONFIG=y
+CONFIG_IKCONFIG_PROC=y
+CONFIG_MEMCG=y
+CONFIG_BLK_CGROUP=y
+CONFIG_CFS_BANDWIDTH=y
+CONFIG_RT_GROUP_SCHED=y
+CONFIG_CGROUP_PIDS=y
+CONFIG_CGROUP_FREEZER=y
+CONFIG_CPUSETS=y
+CONFIG_CGROUP_DEVICE=y
+CONFIG_CGROUP_CPUACCT=y
+CONFIG_NAMESPACES=y
+CONFIG_USER_NS=y
+CONFIG_SCHED_AUTOGROUP=y
+CONFIG_BLK_DEV_INITRD=y
+CONFIG_EXPERT=y
+CONFIG_MACH_EYEQ5=y
+CONFIG_FIT_IMAGE_FDT_EPM5=y
+CONFIG_PAGE_SIZE_16KB=y
+CONFIG_MIPS_CPS=y
+CONFIG_CPU_HAS_MSA=y
+CONFIG_NR_CPUS=16
+CONFIG_MIPS_RAW_APPENDED_DTB=y
+CONFIG_JUMP_LABEL=y
+CONFIG_COMPAT_32BIT_TIME=y
+CONFIG_MODULES=y
+CONFIG_MODULE_UNLOAD=y
+CONFIG_TRIM_UNUSED_KSYMS=y
+# CONFIG_COMPAT_BRK is not set
+CONFIG_SPARSEMEM_MANUAL=y
+CONFIG_USERFAULTFD=y
+CONFIG_NET=y
+CONFIG_PACKET=y
+CONFIG_UNIX=y
+CONFIG_NET_KEY=y
+CONFIG_INET=y
+CONFIG_IP_PNP=y
+CONFIG_IP_PNP_DHCP=y
+CONFIG_NETFILTER=y
+CONFIG_CAN=y
+CONFIG_PCI=y
+CONFIG_PCI_MSI=y
+CONFIG_PCI_DEBUG=y
+CONFIG_PCI_ENDPOINT=y
+CONFIG_DEVTMPFS=y
+CONFIG_DEVTMPFS_MOUNT=y
+CONFIG_CONNECTOR=y
+CONFIG_MTD=y
+CONFIG_MTD_UBI=y
+CONFIG_MTD_UBI_BLOCK=y
+CONFIG_SCSI=y
+CONFIG_NETDEVICES=y
+CONFIG_MACVLAN=y
+CONFIG_IPVLAN=y
+CONFIG_MACB=y
+CONFIG_MARVELL_PHY=y
+CONFIG_MICREL_PHY=y
+CONFIG_CAN_M_CAN=y
+CONFIG_SERIAL_AMBA_PL011=y
+CONFIG_SERIAL_AMBA_PL011_CONSOLE=y
+CONFIG_HW_RANDOM=y
+# CONFIG_PTP_1588_CLOCK is not set
+CONFIG_PINCTRL=y
+CONFIG_MFD_SYSCON=y
+CONFIG_HID_A4TECH=y
+CONFIG_HID_BELKIN=y
+CONFIG_HID_CHERRY=y
+CONFIG_HID_CYPRESS=y
+CONFIG_HID_EZKEY=y
+CONFIG_HID_ITE=y
+CONFIG_HID_KENSINGTON=y
+CONFIG_HID_REDRAGON=y
+CONFIG_HID_MICROSOFT=y
+CONFIG_HID_MONTEREY=y
+CONFIG_MMC=y
+CONFIG_MMC_SDHCI=y
+# CONFIG_IOMMU_SUPPORT is not set
+CONFIG_RESET_CONTROLLER=y
+# CONFIG_NVMEM is not set
+CONFIG_EXT4_FS=y
+CONFIG_EXT4_FS_POSIX_ACL=y
+CONFIG_EXT4_FS_SECURITY=y
+CONFIG_FS_ENCRYPTION=y
+CONFIG_FUSE_FS=y
+CONFIG_CUSE=y
+CONFIG_MSDOS_FS=y
+CONFIG_VFAT_FS=y
+CONFIG_TMPFS=y
+CONFIG_TMPFS_POSIX_ACL=y
+CONFIG_UBIFS_FS=y
+CONFIG_NFS_FS=y
+CONFIG_NFS_V3_ACL=y
+CONFIG_NFS_V4=y
+CONFIG_NFS_V4_1=y
+CONFIG_NFS_V4_2=y
+CONFIG_ROOT_NFS=y
+CONFIG_CRYPTO_CRC32_MIPS=y
+CONFIG_FRAME_WARN=1024
+CONFIG_DEBUG_FS=y
+# CONFIG_RCU_TRACE is not set
+# CONFIG_FTRACE is not set
+CONFIG_CMDLINE_BOOL=y
+CONFIG_CMDLINE="earlycon"
index e37a59bae0a62072784540e5adee4298dedb9fc1..56011d738441fc7f2bb6722e86b9edeca9584e20 100644 (file)
@@ -4,9 +4,9 @@
 # Author: Paul Burton <paul.burton@mips.com>
 #
 
-obj-y += init.o
-obj-y += irq.o
-obj-y += proc.o
+obj-$(CONFIG_MACH_GENERIC_CORE) += init.o
+obj-$(CONFIG_MACH_GENERIC_CORE) += irq.o
+obj-$(CONFIG_MACH_GENERIC_CORE) += proc.o
 
 obj-$(CONFIG_YAMON_DT_SHIM)            += yamon-dt.o
 obj-$(CONFIG_LEGACY_BOARD_SEAD3)       += board-sead3.o
index 59a48c60a065ce31ea12d7bf6aa833cd4e448020..7e9ef01cb182b3dd71766d1bc43ded45d4d02b34 100644 (file)
  */
 #define KSEGX(a)               ((_ACAST32_(a)) & _ACAST32_(0xe0000000))
 
+/*
+ * Gives the size of each kernel segment
+ */
+#define CSEGX_SIZE             0x20000000
+
 /*
  * Returns the physical address of a CKSEGx / XKPHYS address
  */
index 067a635d3bc8f518279b37cbb262ee9546ecb501..18c2ae58cdf30f440a3d8e680986f38582075a7a 100644 (file)
  * Temporary until all gas have MT ASE support
  */
        .macro  DMT     reg=0
-       .word   0x41600bc1 | (\reg << 16)
+       insn_if_mips    0x41600bc1 | (\reg << 16)
+       insn32_if_mm    0x0000057C | (\reg << 21)
        .endm
 
        .macro  EMT     reg=0
-       .word   0x41600be1 | (\reg << 16)
+       insn_if_mips    0x41600be1 | (\reg << 16)
+       insn32_if_mm    0x0000257C | (\reg << 21)
        .endm
 
        .macro  DVPE    reg=0
-       .word   0x41600001 | (\reg << 16)
+       insn_if_mips    0x41600001 | (\reg << 16)
+       insn32_if_mm    0x0000157C | (\reg << 21)
        .endm
 
        .macro  EVPE    reg=0
-       .word   0x41600021 | (\reg << 16)
+       insn_if_mips    0x41600021 | (\reg << 16)
+       insn32_if_mm    0x0000357C | (\reg << 21)
        .endm
 
-       .macro  MFTR    rt=0, rd=0, u=0, sel=0
-        .word  0x41000000 | (\rt << 16) | (\rd << 11) | (\u << 5) | (\sel)
+       .macro  MFTR    rs=0, rt=0, u=0, sel=0
+       insn_if_mips    0x41000000 | (\rt << 16) | (\rs << 11) | (\u << 5) | (\sel)
+       insn32_if_mm    0x0000000E | (\rt << 21) | (\rs << 16) | (\u << 10) | (\sel << 4)
        .endm
 
-       .macro  MTTR    rt=0, rd=0, u=0, sel=0
-        .word  0x41800000 | (\rt << 16) | (\rd << 11) | (\u << 5) | (\sel)
+       .macro  MTTR    rt=0, rs=0, u=0, sel=0
+       insn_if_mips    0x41800000 | (\rt << 16) | (\rs << 11) | (\u << 5) | (\sel)
+       insn32_if_mm    0x00000006 | (\rt << 21) | (\rs << 16) | (\u << 10) | (\sel << 4)
        .endm
 
 #ifdef TOOLCHAIN_SUPPORTS_MSA
diff --git a/arch/mips/include/asm/cachetype.h b/arch/mips/include/asm/cachetype.h
new file mode 100644 (file)
index 0000000..9f4ba2f
--- /dev/null
@@ -0,0 +1,9 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef __ASM_MIPS_CACHETYPE_H
+#define __ASM_MIPS_CACHETYPE_H
+
+#include <asm/cpu-features.h>
+
+#define cpu_dcache_is_aliasing()       cpu_has_dc_aliases
+
+#endif
index c06dbf8ba937093d81b33cd14e00d9c74c944de6..81fa99084178e7215691306dad9287cfb6e6c40c 100644 (file)
@@ -63,7 +63,7 @@ struct mips_cdmm_driver {
  */
 phys_addr_t mips_cdmm_phys_base(void);
 
-extern struct bus_type mips_cdmm_bustype;
+extern const struct bus_type mips_cdmm_bustype;
 void __iomem *mips_cdmm_early_probe(unsigned int dev_type);
 
 #define to_mips_cdmm_device(d) container_of(d, struct mips_cdmm_device, dev)
index b247575c5e6992ad4629e1cc4100504b8f0814fa..f8783d339fb0d07dcc2ed01703af45799ce9c1c2 100644 (file)
@@ -49,6 +49,8 @@
 #define HIGHMEM_START          _AC(0x20000000, UL)
 #endif
 
+#define CKSEG0ADDR_OR_64BIT(x) CKSEG0ADDR(x)
+#define CKSEG1ADDR_OR_64BIT(x) CKSEG1ADDR(x)
 #endif /* CONFIG_32BIT */
 
 #ifdef CONFIG_64BIT
@@ -82,6 +84,8 @@
 #define TO_CAC(x)              (CAC_BASE   | ((x) & TO_PHYS_MASK))
 #define TO_UNCAC(x)            (UNCAC_BASE | ((x) & TO_PHYS_MASK))
 
+#define CKSEG0ADDR_OR_64BIT(x) TO_CAC(x)
+#define CKSEG1ADDR_OR_64BIT(x) TO_UNCAC(x)
 #endif /* CONFIG_64BIT */
 
 /*
index 23c67c0871b17c91969b0c04169cb90f38bfdd86..c2930a75b7e44b956b3ef63731d71b25be57628c 100644 (file)
@@ -22,16 +22,28 @@ extern void __iomem *mips_gcr_base;
 extern void __iomem *mips_cm_l2sync_base;
 
 /**
- * __mips_cm_phys_base - retrieve the physical base address of the CM
+ * mips_cm_phys_base - retrieve the physical base address of the CM
  *
  * This function returns the physical base address of the Coherence Manager
  * global control block, or 0 if no Coherence Manager is present. It provides
  * a default implementation which reads the CMGCRBase register where available,
  * and may be overridden by platforms which determine this address in a
- * different way by defining a function with the same prototype except for the
- * name mips_cm_phys_base (without underscores).
+ * different way by defining a function with the same prototype.
  */
-extern phys_addr_t __mips_cm_phys_base(void);
+extern phys_addr_t mips_cm_phys_base(void);
+
+/**
+ * mips_cm_l2sync_phys_base - retrieve the physical base address of the CM
+ *                            L2-sync region
+ *
+ * This function returns the physical base address of the Coherence Manager
+ * L2-cache only region. It provides a default implementation which reads the
+ * CMGCRL2OnlySyncBase register where available or returns a 4K region just
+ * behind the CM GCR base address. It may be overridden by platforms which
+ * determine this address in a different way by defining a function with the
+ * same prototype.
+ */
+extern phys_addr_t mips_cm_l2sync_phys_base(void);
 
 /*
  * mips_cm_is64 - determine CM register width
@@ -311,6 +323,7 @@ GCR_CX_ACCESSOR_RW(32, 0x018, other)
 /* GCR_Cx_RESET_BASE - Configure where powered up cores will fetch from */
 GCR_CX_ACCESSOR_RW(32, 0x020, reset_base)
 #define CM_GCR_Cx_RESET_BASE_BEVEXCBASE                GENMASK(31, 12)
+#define CM_GCR_Cx_RESET_BASE_MODE              BIT(1)
 
 /* GCR_Cx_ID - Identify the current core */
 GCR_CX_ACCESSOR_RO(32, 0x028, id)
index b444523ecd500800cd2bf6ea5bc78c52fbaa6f88..28917f1582b311bec7d512a07b46df5788514500 100644 (file)
@@ -26,6 +26,6 @@ static inline void mips_mt_set_cpuoptions(void) { }
 #endif
 
 struct class;
-extern struct class *mt_class;
+extern const struct class mt_class;
 
 #endif /* __ASM_MIPS_MT_H */
index a8d67c2f4f7b33b94ec9835194bf51bbe265aba0..30e86861c206ce2a2d2aeaaf047ab6aa98c8df12 100644 (file)
@@ -189,19 +189,24 @@ static inline unsigned core_nvpes(void)
        return ((conf0 & MVPCONF0_PVPE) >> MVPCONF0_PVPE_SHIFT) + 1;
 }
 
+#define _ASM_SET_DVPE                                                  \
+       _ASM_MACRO_1R(dvpe, rt,                                         \
+                       _ASM_INSN_IF_MIPS(0x41600001 | __rt << 16)      \
+                       _ASM_INSN32_IF_MM(0x0000157C | __rt << 21))
+#define _ASM_UNSET_DVPE ".purgem dvpe\n\t"
+
 static inline unsigned int dvpe(void)
 {
        int res = 0;
 
        __asm__ __volatile__(
-       "       .set    push                                            \n"
-       "       .set    noreorder                                       \n"
-       "       .set    noat                                            \n"
-       "       .set    mips32r2                                        \n"
-       "       .word   0x41610001              # dvpe $1               \n"
-       "       move    %0, $1                                          \n"
-       "       ehb                                                     \n"
-       "       .set    pop                                             \n"
+       "       .set    push                                    \n"
+       "       .set    "MIPS_ISA_LEVEL"                        \n"
+       _ASM_SET_DVPE
+       "       dvpe    %0                                      \n"
+       "       ehb                                             \n"
+       _ASM_UNSET_DVPE
+       "       .set    pop                                     \n"
        : "=r" (res));
 
        instruction_hazard();
@@ -209,16 +214,22 @@ static inline unsigned int dvpe(void)
        return res;
 }
 
+#define _ASM_SET_EVPE                                                  \
+       _ASM_MACRO_1R(evpe, rt,                                 \
+                       _ASM_INSN_IF_MIPS(0x41600021 | __rt << 16)      \
+                       _ASM_INSN32_IF_MM(0x0000357C | __rt << 21))
+#define _ASM_UNSET_EVPE ".purgem evpe\n\t"
+
 static inline void __raw_evpe(void)
 {
        __asm__ __volatile__(
-       "       .set    push                                            \n"
-       "       .set    noreorder                                       \n"
-       "       .set    noat                                            \n"
-       "       .set    mips32r2                                        \n"
-       "       .word   0x41600021              # evpe                  \n"
-       "       ehb                                                     \n"
-       "       .set    pop                                             \n");
+       "       .set    push                                    \n"
+       "       .set    "MIPS_ISA_LEVEL"                        \n"
+       _ASM_SET_EVPE
+       "       evpe    $0                                      \n"
+       "       ehb                                             \n"
+       _ASM_UNSET_EVPE
+       "       .set    pop                                     \n");
 }
 
 /* Enable virtual processor execution if previous suggested it should be.
@@ -232,18 +243,24 @@ static inline void evpe(int previous)
                __raw_evpe();
 }
 
+#define _ASM_SET_DMT                                                   \
+       _ASM_MACRO_1R(dmt, rt,                                          \
+                       _ASM_INSN_IF_MIPS(0x41600bc1 | __rt << 16)      \
+                       _ASM_INSN32_IF_MM(0x0000057C | __rt << 21))
+#define _ASM_UNSET_DMT ".purgem dmt\n\t"
+
 static inline unsigned int dmt(void)
 {
        int res;
 
        __asm__ __volatile__(
-       "       .set    push                                            \n"
-       "       .set    mips32r2                                        \n"
-       "       .set    noat                                            \n"
-       "       .word   0x41610BC1                      # dmt $1        \n"
-       "       ehb                                                     \n"
-       "       move    %0, $1                                          \n"
-       "       .set    pop                                             \n"
+       "       .set    push                                    \n"
+       "       .set    "MIPS_ISA_LEVEL"                        \n"
+       _ASM_SET_DMT
+       "       dmt     %0                                      \n"
+       "       ehb                                             \n"
+       _ASM_UNSET_DMT
+       "       .set    pop                                     \n"
        : "=r" (res));
 
        instruction_hazard();
@@ -251,14 +268,21 @@ static inline unsigned int dmt(void)
        return res;
 }
 
+#define _ASM_SET_EMT                                                   \
+       _ASM_MACRO_1R(emt, rt,                                          \
+                       _ASM_INSN_IF_MIPS(0x41600be1 | __rt << 16)      \
+                       _ASM_INSN32_IF_MM(0x0000257C | __rt << 21))
+#define _ASM_UNSET_EMT ".purgem emt\n\t"
+
 static inline void __raw_emt(void)
 {
        __asm__ __volatile__(
-       "       .set    push                                            \n"
-       "       .set    noreorder                                       \n"
-       "       .set    mips32r2                                        \n"
-       "       .word   0x41600be1                      # emt           \n"
-       "       ehb                                                     \n"
+       "       .set    push                                    \n"
+       "       .set    "MIPS_ISA_LEVEL"                        \n"
+       _ASM_SET_EMT
+       "       emt     $0                                      \n"
+       _ASM_UNSET_EMT
+       "       ehb                                             \n"
        "       .set    pop");
 }
 
@@ -276,41 +300,55 @@ static inline void emt(int previous)
 static inline void ehb(void)
 {
        __asm__ __volatile__(
-       "       .set    push                                    \n"
-       "       .set    mips32r2                                \n"
-       "       ehb                                             \n"
-       "       .set    pop                                     \n");
+       "       .set    push                            \n"
+       "       .set    "MIPS_ISA_LEVEL"                \n"
+       "       ehb                                     \n"
+       "       .set    pop                             \n");
 }
 
-#define mftc0(rt,sel)                                                  \
+#define _ASM_SET_MFTC0                                                 \
+       _ASM_MACRO_2R_1S(mftc0, rs, rt, sel,                            \
+                       _ASM_INSN_IF_MIPS(0x41000000 | __rt << 16 |     \
+                               __rs << 11 | \\sel)                     \
+                       _ASM_INSN32_IF_MM(0x0000000E | __rt << 21 |     \
+                               __rs << 16 | \\sel << 4))
+#define _ASM_UNSET_MFTC0 ".purgem mftc0\n\t"
+
+#define mftc0(rt, sel)                                                 \
 ({                                                                     \
-        unsigned long  __res;                                          \
+       unsigned long   __res;                                          \
                                                                        \
        __asm__ __volatile__(                                           \
-       "       .set    push                                    \n"     \
-       "       .set    mips32r2                                \n"     \
-       "       .set    noat                                    \n"     \
-       "       # mftc0 $1, $" #rt ", " #sel "                  \n"     \
-       "       .word   0x41000800 | (" #rt " << 16) | " #sel " \n"     \
-       "       move    %0, $1                                  \n"     \
-       "       .set    pop                                     \n"     \
+       "       .set    push                            \n"     \
+       "       .set    "MIPS_ISA_LEVEL"                \n"     \
+       _ASM_SET_MFTC0                                                  \
+       "       mftc0   $1, " #rt ", " #sel "           \n"     \
+       _ASM_UNSET_MFTC0                                                \
+       "       .set    pop                             \n"     \
        : "=r" (__res));                                                \
                                                                        \
        __res;                                                          \
 })
 
+#define _ASM_SET_MFTGPR                                                        \
+       _ASM_MACRO_2R(mftgpr, rs, rt,                                   \
+                       _ASM_INSN_IF_MIPS(0x41000020 | __rt << 16 |     \
+                               __rs << 11)                             \
+                       _ASM_INSN32_IF_MM(0x0000040E | __rt << 21 |     \
+                               __rs << 16))
+#define _ASM_UNSET_MFTGPR ".purgem mftgpr\n\t"
+
 #define mftgpr(rt)                                                     \
 ({                                                                     \
        unsigned long __res;                                            \
                                                                        \
        __asm__ __volatile__(                                           \
-       "       .set    push                                    \n"     \
-       "       .set    noat                                    \n"     \
-       "       .set    mips32r2                                \n"     \
-       "       # mftgpr $1," #rt "                             \n"     \
-       "       .word   0x41000820 | (" #rt " << 16)            \n"     \
-       "       move    %0, $1                                  \n"     \
-       "       .set    pop                                     \n"     \
+       "       .set    push                            \n"     \
+       "       .set    "MIPS_ISA_LEVEL"                \n"     \
+       _ASM_SET_MFTGPR                                                 \
+       "       mftgpr  %0," #rt "                      \n"     \
+       _ASM_UNSET_MFTGPR                                               \
+       "       .set    pop                             \n"     \
        : "=r" (__res));                                                \
                                                                        \
        __res;                                                          \
@@ -321,35 +359,49 @@ static inline void ehb(void)
        unsigned long __res;                                            \
                                                                        \
        __asm__ __volatile__(                                           \
-       "       mftr    %0, " #rt ", " #u ", " #sel "           \n"     \
+       "       mftr    %0, " #rt ", " #u ", " #sel "   \n"     \
        : "=r" (__res));                                                \
                                                                        \
        __res;                                                          \
 })
 
-#define mttgpr(rd,v)                                                   \
+#define _ASM_SET_MTTGPR                                                        \
+       _ASM_MACRO_2R(mttgpr, rt, rs,                                   \
+                       _ASM_INSN_IF_MIPS(0x41800020 | __rt << 16 |     \
+                               __rs << 11)                             \
+                       _ASM_INSN32_IF_MM(0x00000406 | __rt << 21 |     \
+                               __rs << 16))
+#define _ASM_UNSET_MTTGPR ".purgem mttgpr\n\t"
+
+#define mttgpr(rs, v)                                                  \
 do {                                                                   \
        __asm__ __volatile__(                                           \
-       "       .set    push                                    \n"     \
-       "       .set    mips32r2                                \n"     \
-       "       .set    noat                                    \n"     \
-       "       move    $1, %0                                  \n"     \
-       "       # mttgpr $1, " #rd "                            \n"     \
-       "       .word   0x41810020 | (" #rd " << 11)            \n"     \
-       "       .set    pop                                     \n"     \
+       "       .set    push                            \n"     \
+       "       .set    "MIPS_ISA_LEVEL"                \n"     \
+       _ASM_SET_MTTGPR                                                 \
+       "       mttgpr  %0, " #rs "                     \n"     \
+       _ASM_UNSET_MTTGPR                                               \
+       "       .set    pop                             \n"     \
        : : "r" (v));                                                   \
 } while (0)
 
-#define mttc0(rd, sel, v)                                                      \
+#define _ASM_SET_MTTC0                                                 \
+       _ASM_MACRO_2R_1S(mttc0, rt, rs, sel,                            \
+                       _ASM_INSN_IF_MIPS(0x41800000 | __rt << 16 |     \
+                               __rs << 11 | \\sel)                     \
+                       _ASM_INSN32_IF_MM(0x0000040E | __rt << 21 |     \
+                               __rs << 16 | \\sel << 4))
+#define _ASM_UNSET_MTTC0 ".purgem mttc0\n\t"
+
+#define mttc0(rs, sel, v)                                                      \
 ({                                                                     \
        __asm__ __volatile__(                                           \
-       "       .set    push                                    \n"     \
-       "       .set    mips32r2                                \n"     \
-       "       .set    noat                                    \n"     \
-       "       move    $1, %0                                  \n"     \
-       "       # mttc0 %0," #rd ", " #sel "                    \n"     \
-       "       .word   0x41810000 | (" #rd " << 11) | " #sel " \n"     \
-       "       .set    pop                                     \n"     \
+       "       .set    push                            \n"     \
+       "       .set    "MIPS_ISA_LEVEL"                \n"     \
+       _ASM_SET_MTTC0                                                  \
+       "       mttc0   %0," #rs ", " #sel "            \n"     \
+       _ASM_UNSET_MTTC0                                                \
+       "       .set    pop                             \n"     \
        :                                                               \
        : "r" (v));                                                     \
 })
@@ -371,49 +423,49 @@ do {                                                                      \
 
 
 /* you *must* set the target tc (settc) before trying to use these */
-#define read_vpe_c0_vpecontrol()       mftc0(1, 1)
-#define write_vpe_c0_vpecontrol(val)   mttc0(1, 1, val)
-#define read_vpe_c0_vpeconf0()         mftc0(1, 2)
-#define write_vpe_c0_vpeconf0(val)     mttc0(1, 2, val)
-#define read_vpe_c0_vpeconf1()         mftc0(1, 3)
-#define write_vpe_c0_vpeconf1(val)     mttc0(1, 3, val)
-#define read_vpe_c0_count()            mftc0(9, 0)
-#define write_vpe_c0_count(val)                mttc0(9, 0, val)
-#define read_vpe_c0_status()           mftc0(12, 0)
-#define write_vpe_c0_status(val)       mttc0(12, 0, val)
-#define read_vpe_c0_cause()            mftc0(13, 0)
-#define write_vpe_c0_cause(val)                mttc0(13, 0, val)
-#define read_vpe_c0_config()           mftc0(16, 0)
-#define write_vpe_c0_config(val)       mttc0(16, 0, val)
-#define read_vpe_c0_config1()          mftc0(16, 1)
-#define write_vpe_c0_config1(val)      mttc0(16, 1, val)
-#define read_vpe_c0_config7()          mftc0(16, 7)
-#define write_vpe_c0_config7(val)      mttc0(16, 7, val)
-#define read_vpe_c0_ebase()            mftc0(15, 1)
-#define write_vpe_c0_ebase(val)                mttc0(15, 1, val)
-#define write_vpe_c0_compare(val)      mttc0(11, 0, val)
-#define read_vpe_c0_badvaddr()         mftc0(8, 0)
-#define read_vpe_c0_epc()              mftc0(14, 0)
-#define write_vpe_c0_epc(val)          mttc0(14, 0, val)
+#define read_vpe_c0_vpecontrol()       mftc0($1, 1)
+#define write_vpe_c0_vpecontrol(val)   mttc0($1, 1, val)
+#define read_vpe_c0_vpeconf0()         mftc0($1, 2)
+#define write_vpe_c0_vpeconf0(val)     mttc0($1, 2, val)
+#define read_vpe_c0_vpeconf1()         mftc0($1, 3)
+#define write_vpe_c0_vpeconf1(val)     mttc0($1, 3, val)
+#define read_vpe_c0_count()            mftc0($9, 0)
+#define write_vpe_c0_count(val)                mttc0($9, 0, val)
+#define read_vpe_c0_status()           mftc0($12, 0)
+#define write_vpe_c0_status(val)       mttc0($12, 0, val)
+#define read_vpe_c0_cause()            mftc0($13, 0)
+#define write_vpe_c0_cause(val)                mttc0($13, 0, val)
+#define read_vpe_c0_config()           mftc0($16, 0)
+#define write_vpe_c0_config(val)       mttc0($16, 0, val)
+#define read_vpe_c0_config1()          mftc0($16, 1)
+#define write_vpe_c0_config1(val)      mttc0($16, 1, val)
+#define read_vpe_c0_config7()          mftc0($16, 7)
+#define write_vpe_c0_config7(val)      mttc0($16, 7, val)
+#define read_vpe_c0_ebase()            mftc0($15, 1)
+#define write_vpe_c0_ebase(val)                mttc0($15, 1, val)
+#define write_vpe_c0_compare(val)      mttc0($11, 0, val)
+#define read_vpe_c0_badvaddr()         mftc0($8, 0)
+#define read_vpe_c0_epc()              mftc0($14, 0)
+#define write_vpe_c0_epc(val)          mttc0($14, 0, val)
 
 
 /* TC */
-#define read_tc_c0_tcstatus()          mftc0(2, 1)
-#define write_tc_c0_tcstatus(val)      mttc0(2, 1, val)
-#define read_tc_c0_tcbind()            mftc0(2, 2)
-#define write_tc_c0_tcbind(val)                mttc0(2, 2, val)
-#define read_tc_c0_tcrestart()         mftc0(2, 3)
-#define write_tc_c0_tcrestart(val)     mttc0(2, 3, val)
-#define read_tc_c0_tchalt()            mftc0(2, 4)
-#define write_tc_c0_tchalt(val)                mttc0(2, 4, val)
-#define read_tc_c0_tccontext()         mftc0(2, 5)
-#define write_tc_c0_tccontext(val)     mttc0(2, 5, val)
+#define read_tc_c0_tcstatus()          mftc0($2, 1)
+#define write_tc_c0_tcstatus(val)      mttc0($2, 1, val)
+#define read_tc_c0_tcbind()            mftc0($2, 2)
+#define write_tc_c0_tcbind(val)                mttc0($2, 2, val)
+#define read_tc_c0_tcrestart()         mftc0($2, 3)
+#define write_tc_c0_tcrestart(val)     mttc0($2, 3, val)
+#define read_tc_c0_tchalt()            mftc0($2, 4)
+#define write_tc_c0_tchalt(val)                mttc0($2, 4, val)
+#define read_tc_c0_tccontext()         mftc0($2, 5)
+#define write_tc_c0_tccontext(val)     mttc0($2, 5, val)
 
 /* GPR */
-#define read_tc_gpr_sp()               mftgpr(29)
-#define write_tc_gpr_sp(val)           mttgpr(29, val)
-#define read_tc_gpr_gp()               mftgpr(28)
-#define write_tc_gpr_gp(val)           mttgpr(28, val)
+#define read_tc_gpr_sp()               mftgpr($29)
+#define write_tc_gpr_sp(val)           mttgpr($29, val)
+#define read_tc_gpr_gp()               mftgpr($28)
+#define write_tc_gpr_gp(val)           mttgpr($28, val)
 
 __BUILD_SET_C0(mvpcontrol)
 
index ec58cb76d076d54234d189b0cec4f26643a670ac..3c6ddc0c2c7ac51292feaa412346f81110498871 100644 (file)
 
 /*
  * Coprocessor 0 register names
+ *
+ * CP0_REGISTER variant is meant to be used in assembly code, C0_REGISTER
+ * variant is meant to be used in C (uasm) code.
  */
-#define CP0_INDEX $0
-#define CP0_RANDOM $1
-#define CP0_ENTRYLO0 $2
-#define CP0_ENTRYLO1 $3
-#define CP0_CONF $3
-#define CP0_GLOBALNUMBER $3, 1
-#define CP0_CONTEXT $4
-#define CP0_PAGEMASK $5
-#define CP0_PAGEGRAIN $5, 1
-#define CP0_SEGCTL0 $5, 2
-#define CP0_SEGCTL1 $5, 3
-#define CP0_SEGCTL2 $5, 4
-#define CP0_WIRED $6
-#define CP0_INFO $7
-#define CP0_HWRENA $7
-#define CP0_BADVADDR $8
-#define CP0_BADINSTR $8, 1
-#define CP0_COUNT $9
-#define CP0_ENTRYHI $10
-#define CP0_GUESTCTL1 $10, 4
-#define CP0_GUESTCTL2 $10, 5
-#define CP0_GUESTCTL3 $10, 6
-#define CP0_COMPARE $11
-#define CP0_GUESTCTL0EXT $11, 4
-#define CP0_STATUS $12
-#define CP0_GUESTCTL0 $12, 6
-#define CP0_GTOFFSET $12, 7
-#define CP0_CAUSE $13
-#define CP0_EPC $14
-#define CP0_PRID $15
-#define CP0_EBASE $15, 1
-#define CP0_CMGCRBASE $15, 3
-#define CP0_CONFIG $16
-#define CP0_CONFIG3 $16, 3
-#define CP0_CONFIG5 $16, 5
-#define CP0_CONFIG6 $16, 6
-#define CP0_LLADDR $17
-#define CP0_WATCHLO $18
-#define CP0_WATCHHI $19
-#define CP0_XCONTEXT $20
-#define CP0_FRAMEMASK $21
-#define CP0_DIAGNOSTIC $22
-#define CP0_DIAGNOSTIC1 $22, 1
-#define CP0_DEBUG $23
-#define CP0_DEPC $24
-#define CP0_PERFORMANCE $25
-#define CP0_ECC $26
-#define CP0_CACHEERR $27
-#define CP0_TAGLO $28
-#define CP0_TAGHI $29
-#define CP0_ERROREPC $30
-#define CP0_DESAVE $31
+#define CP0_INDEX              $0
+#define C0_INDEX               0, 0
+
+#define CP0_RANDOM             $1
+#define C0_RANDOM              1, 0
+
+#define CP0_ENTRYLO0           $2
+#define C0_ENTRYLO0            2, 0
+
+#define CP0_ENTRYLO1           $3
+#define C0_ENTRYLO1            3, 0
+
+#define CP0_CONF               $3
+#define C0_CONF                        3, 0
+
+#define CP0_GLOBALNUMBER       $3, 1
+#define C0_GLOBALNUMBER                3, 1
+
+#define CP0_CONTEXT            $4
+#define C0_CONTEXT             4, 0
+
+#define CP0_PAGEMASK           $5
+#define C0_PAGEMASK            5, 0
+
+#define CP0_PAGEGRAIN          $5, 1
+#define C0_PAGEGRAIN           5, 1
+
+#define CP0_SEGCTL0            $5, 2
+#define C0_SEGCTL0             5, 2
+
+#define CP0_SEGCTL1            $5, 3
+#define C0_SEGCTL1             5, 3
+
+#define CP0_SEGCTL2            $5, 4
+#define C0_SEGCTL2             5, 4
+
+#define CP0_PWBASE             $5, 5
+#define C0_PWBASE              5, 5
+
+#define CP0_PWFIELD            $5, 6
+#define C0_PWFIELD             5, 6
+
+#define CP0_PWCTL              $5, 7
+#define C0_PWCTL               5, 7
+
+#define CP0_WIRED              $6
+#define C0_WIRED               6, 0
+
+#define CP0_INFO               $7
+#define C0_INFO                        7, 0
+
+#define CP0_HWRENA             $7
+#define C0_HWRENA              7, 0
+
+#define CP0_BADVADDR           $8
+#define C0_BADVADDR            8, 0
+
+#define CP0_BADINSTR           $8, 1
+#define C0_BADINSTR            8, 1
+
+#define CP0_BADINSTRP          $8, 2
+#define C0_BADINSTRP           8, 2
+
+#define CP0_COUNT              $9
+#define C0_COUNT               9, 0
+
+#define CP0_PGD                        $9, 7
+#define C0_PGD                 9, 7
+
+#define CP0_ENTRYHI            $10
+#define C0_ENTRYHI             10, 0
+
+#define CP0_GUESTCTL1          $10, 4
+#define C0_GUESTCTL1           10, 5
+
+#define CP0_GUESTCTL2          $10, 5
+#define C0_GUESTCTL2           10, 5
+
+#define CP0_GUESTCTL3          $10, 6
+#define C0_GUESTCTL3           10, 6
+
+#define CP0_COMPARE            $11
+#define C0_COMPARE             11, 0
+
+#define CP0_GUESTCTL0EXT       $11, 4
+#define C0_GUESTCTL0EXT                11, 4
+
+#define CP0_STATUS             $12
+#define C0_STATUS              12, 0
+
+#define CP0_GUESTCTL0          $12, 6
+#define C0_GUESTCTL0           12, 6
+
+#define CP0_GTOFFSET           $12, 7
+#define C0_GTOFFSET            12, 7
+
+#define CP0_CAUSE              $13
+#define C0_CAUSE               13, 0
+
+#define CP0_EPC                        $14
+#define C0_EPC                 14, 0
+
+#define CP0_PRID               $15
+#define C0_PRID                        15, 0
+
+#define CP0_EBASE              $15, 1
+#define C0_EBASE               15, 1
+
+#define CP0_CMGCRBASE          $15, 3
+#define C0_CMGCRBASE           15, 3
+
+#define CP0_CONFIG             $16
+#define C0_CONFIG              16, 0
+
+#define CP0_CONFIG1            $16, 1
+#define C0_CONFIG1             16, 1
+
+#define CP0_CONFIG2            $16, 2
+#define C0_CONFIG2             16, 2
+
+#define CP0_CONFIG3            $16, 3
+#define C0_CONFIG3             16, 3
+
+#define CP0_CONFIG4            $16, 4
+#define C0_CONFIG4             16, 4
+
+#define CP0_CONFIG5            $16, 5
+#define C0_CONFIG5             16, 5
+
+#define CP0_CONFIG6            $16, 6
+#define C0_CONFIG6             16, 6
+
+#define CP0_LLADDR             $17
+#define C0_LLADDR              17, 0
+
+#define CP0_WATCHLO            $18
+#define C0_WATCHLO             18, 0
+
+#define CP0_WATCHHI            $19
+#define C0_WATCHHI             19, 0
+
+#define CP0_XCONTEXT           $20
+#define C0_XCONTEXT            20, 0
+
+#define CP0_FRAMEMASK          $21
+#define C0_FRAMEMASK           21, 0
+
+#define CP0_DIAGNOSTIC         $22
+#define C0_DIAGNOSTIC          22, 0
+
+#define CP0_DIAGNOSTIC1                $22, 1
+#define C0_DIAGNOSTIC1         22, 1
+
+#define CP0_DEBUG              $23
+#define C0_DEBUG               23, 0
+
+#define CP0_DEPC               $24
+#define C0_DEPC                        24, 0
+
+#define CP0_PERFORMANCE                $25
+#define C0_PERFORMANCE         25, 0
+
+#define CP0_ECC                        $26
+#define C0_ECC                 26, 0
+
+#define CP0_CACHEERR           $27
+#define C0_CACHEERR            27, 0
+
+#define CP0_TAGLO              $28
+#define C0_TAGLO               28, 0
+
+#define CP0_DTAGLO             $28, 2
+#define C0_DTAGLO              28, 2
+
+#define CP0_DDATALO            $28, 3
+#define C0_DDATALO             28, 3
+
+#define CP0_STAGLO             $28, 4
+#define C0_STAGLO              28, 4
+
+#define CP0_TAGHI              $29
+#define C0_TAGHI               29, 0
+
+#define CP0_ERROREPC           $30
+#define C0_ERROREPC            30, 0
+
+#define CP0_DESAVE             $31
+#define C0_DESAVE              31, 0
 
 /*
  * R4640/R4650 cp0 register names.  These registers are listed
 #define ST0_DE                 0x00010000
 #define ST0_CE                 0x00020000
 
+#ifdef CONFIG_64BIT
+#define ST0_KX_IF_64   ST0_KX
+#else
+#define ST0_KX_IF_64   0
+#endif
+
 /*
  * Setting c0_status.co enables Hit_Writeback and Hit_Writeback_Invalidate
  * cacheops in userspace.  This bit exists only on RM7000 and RM9000
@@ -1277,11 +1422,13 @@ static inline int mm_insn_16bit(u16 insn)
  */
 
 /* Match an individual register number and assign to \var */
-#define _IFC_REG(n)                            \
-       ".ifc   \\r, $" #n "\n\t"               \
+#define _IFC_REG_NAME(name, n)                 \
+       ".ifc   \\r, $" #name "\n\t"            \
        "\\var  = " #n "\n\t"                   \
        ".endif\n\t"
 
+#define _IFC_REG(n)    _IFC_REG_NAME(n, n)
+
 #define _ASM_SET_PARSE_R                                               \
        ".macro parse_r var r\n\t"                                      \
        "\\var  = -1\n\t"                                               \
@@ -1293,6 +1440,7 @@ static inline int mm_insn_16bit(u16 insn)
        _IFC_REG(20) _IFC_REG(21) _IFC_REG(22) _IFC_REG(23)             \
        _IFC_REG(24) _IFC_REG(25) _IFC_REG(26) _IFC_REG(27)             \
        _IFC_REG(28) _IFC_REG(29) _IFC_REG(30) _IFC_REG(31)             \
+       _IFC_REG_NAME(sp, 29) _IFC_REG_NAME(fp, 30)                     \
        ".iflt  \\var\n\t"                                              \
        ".error \"Unable to parse register name \\r\"\n\t"              \
        ".endif\n\t"                                                    \
@@ -1307,6 +1455,15 @@ static inline int mm_insn_16bit(u16 insn)
  * the ENC encodings.
  */
 
+/* Instructions with 1 register operand */
+#define _ASM_MACRO_1R(OP, R1, ENC)                             \
+               ".macro " #OP " " #R1 "\n\t"                    \
+               _ASM_SET_PARSE_R                                        \
+               "parse_r __" #R1 ", \\" #R1 "\n\t"                      \
+               ENC                                                     \
+               _ASM_UNSET_PARSE_R                                      \
+               ".endm\n\t"
+
 /* Instructions with 1 register operand & 1 immediate operand */
 #define _ASM_MACRO_1R1I(OP, R1, I2, ENC)                               \
                ".macro " #OP " " #R1 ", " #I2 "\n\t"                   \
@@ -2078,7 +2235,14 @@ do {                                                                     \
                _ASM_INSN_IF_MIPS(0x4200000c)                           \
                _ASM_INSN32_IF_MM(0x0000517c)
 #else  /* !TOOLCHAIN_SUPPORTS_VIRT */
-#define _ASM_SET_VIRT ".set\tvirt\n\t"
+#if MIPS_ISA_REV >= 5
+#define _ASM_SET_VIRT_ISA
+#elif defined(CONFIG_64BIT)
+#define _ASM_SET_VIRT_ISA ".set\tmips64r5\n\t"
+#else
+#define _ASM_SET_VIRT_ISA ".set\tmips32r5\n\t"
+#endif
+#define _ASM_SET_VIRT _ASM_SET_VIRT_ISA ".set\tvirt\n\t"
 #define _ASM_SET_MFGC0 _ASM_SET_VIRT
 #define _ASM_SET_DMFGC0        _ASM_SET_VIRT
 #define _ASM_SET_MTGC0 _ASM_SET_VIRT
@@ -2099,7 +2263,6 @@ do {                                                                      \
 ({ int __res;                                                          \
        __asm__ __volatile__(                                           \
                ".set\tpush\n\t"                                        \
-               ".set\tmips32r5\n\t"                                    \
                _ASM_SET_MFGC0                                          \
                "mfgc0\t%0, " #source ", %1\n\t"                        \
                _ASM_UNSET_MFGC0                                        \
@@ -2113,7 +2276,6 @@ do {                                                                      \
 ({ unsigned long long __res;                                           \
        __asm__ __volatile__(                                           \
                ".set\tpush\n\t"                                        \
-               ".set\tmips64r5\n\t"                                    \
                _ASM_SET_DMFGC0                                         \
                "dmfgc0\t%0, " #source ", %1\n\t"                       \
                _ASM_UNSET_DMFGC0                                       \
@@ -2127,7 +2289,6 @@ do {                                                                      \
 do {                                                                   \
        __asm__ __volatile__(                                           \
                ".set\tpush\n\t"                                        \
-               ".set\tmips32r5\n\t"                                    \
                _ASM_SET_MTGC0                                          \
                "mtgc0\t%z0, " #register ", %1\n\t"                     \
                _ASM_UNSET_MTGC0                                        \
@@ -2140,7 +2301,6 @@ do {                                                                      \
 do {                                                                   \
        __asm__ __volatile__(                                           \
                ".set\tpush\n\t"                                        \
-               ".set\tmips64r5\n\t"                                    \
                _ASM_SET_DMTGC0                                         \
                "dmtgc0\t%z0, " #register ", %1\n\t"                    \
                _ASM_UNSET_DMTGC0                                       \
index 3c687df1d5150656bda1a18a485b7b8c9ee5cfb0..236051364f78e2e7586a3fc2097e0a55de2de36e 100644 (file)
 
 #if _MIPS_SIM == _MIPS_SIM_ABI32
 
+/*
+ * General purpose register numbers for 32 bit ABI
+ */
+#define GPR_ZERO       0       /* wired zero */
+#define GPR_AT 1       /* assembler temp */
+#define GPR_V0 2       /* return value */
+#define GPR_V1 3
+#define GPR_A0 4       /* argument registers */
+#define GPR_A1 5
+#define GPR_A2 6
+#define GPR_A3 7
+#define GPR_T0 8       /* caller saved */
+#define GPR_T1 9
+#define GPR_T2 10
+#define GPR_T3 11
+#define GPR_T4 12
+#define GPR_TA0        12
+#define GPR_T5 13
+#define GPR_TA1        13
+#define GPR_T6 14
+#define GPR_TA2        14
+#define GPR_T7 15
+#define GPR_TA3        15
+#define GPR_S0 16      /* callee saved */
+#define GPR_S1 17
+#define GPR_S2 18
+#define GPR_S3 19
+#define GPR_S4 20
+#define GPR_S5 21
+#define GPR_S6 22
+#define GPR_S7 23
+#define GPR_T8 24      /* caller saved */
+#define GPR_T9 25
+#define GPR_JP 25      /* PIC jump register */
+#define GPR_K0 26      /* kernel scratch */
+#define GPR_K1 27
+#define GPR_GP 28      /* global pointer */
+#define GPR_SP 29      /* stack pointer */
+#define GPR_FP 30      /* frame pointer */
+#define GPR_S8 30      /* same like fp! */
+#define GPR_RA 31      /* return address */
+
+#endif /* _MIPS_SIM == _MIPS_SIM_ABI32 */
+
+#if _MIPS_SIM == _MIPS_SIM_ABI64 || _MIPS_SIM == _MIPS_SIM_NABI32
+
+#define GPR_ZERO       0       /* wired zero */
+#define GPR_AT 1       /* assembler temp */
+#define GPR_V0 2       /* return value - caller saved */
+#define GPR_V1 3
+#define GPR_A0 4       /* argument registers */
+#define GPR_A1 5
+#define GPR_A2 6
+#define GPR_A3 7
+#define GPR_A4 8       /* arg reg 64 bit; caller saved in 32 bit */
+#define GPR_TA0        8
+#define GPR_A5 9
+#define GPR_TA1        9
+#define GPR_A6 10
+#define GPR_TA2        10
+#define GPR_A7 11
+#define GPR_TA3        11
+#define GPR_T0 12      /* caller saved */
+#define GPR_T1 13
+#define GPR_T2 14
+#define GPR_T3 15
+#define GPR_S0 16      /* callee saved */
+#define GPR_S1 17
+#define GPR_S2 18
+#define GPR_S3 19
+#define GPR_S4 20
+#define GPR_S5 21
+#define GPR_S6 22
+#define GPR_S7 23
+#define GPR_T8 24      /* caller saved */
+#define GPR_T9 25      /* callee address for PIC/temp */
+#define GPR_JP 25      /* PIC jump register */
+#define GPR_K0 26      /* kernel temporary */
+#define GPR_K1 27
+#define GPR_GP 28      /* global pointer - caller saved for PIC */
+#define GPR_SP 29      /* stack pointer */
+#define GPR_FP 30      /* frame pointer */
+#define GPR_S8 30      /* callee saved */
+#define GPR_RA 31      /* return address */
+
+#endif /* _MIPS_SIM == _MIPS_SIM_ABI64 || _MIPS_SIM == _MIPS_SIM_NABI32 */
+
+#ifdef __ASSEMBLY__
+#if _MIPS_SIM == _MIPS_SIM_ABI32
+
 /*
  * Symbolic register names for 32 bit ABI
  */
 #define ra     $31     /* return address */
 
 #endif /* _MIPS_SIM == _MIPS_SIM_ABI64 || _MIPS_SIM == _MIPS_SIM_NABI32 */
+#endif /* __ASSEMBLY__ */
 
 #endif /* _ASM_REGDEF_H */
index 22a572b70fe31c1d9f266235b5c94a504ec210e7..ab94e50f62b8735f55c6694aa0fd1b6398df2b17 100644 (file)
@@ -24,7 +24,7 @@ struct core_boot_config {
 
 extern struct core_boot_config *mips_cps_core_bootcfg;
 
-extern void mips_cps_core_entry(void);
+extern void mips_cps_core_boot(int cca, void __iomem *gcr_base);
 extern void mips_cps_core_init(void);
 
 extern void mips_cps_boot_vpes(struct core_boot_config *cfg, unsigned vpe);
@@ -32,7 +32,12 @@ extern void mips_cps_boot_vpes(struct core_boot_config *cfg, unsigned vpe);
 extern void mips_cps_pm_save(void);
 extern void mips_cps_pm_restore(void);
 
-extern void *mips_cps_core_entry_patch_end;
+extern void excep_tlbfill(void);
+extern void excep_xtlbfill(void);
+extern void excep_cache(void);
+extern void excep_genex(void);
+extern void excep_intex(void);
+extern void excep_ejtag(void);
 
 #ifdef CONFIG_MIPS_CPS
 
index edcf717c432717fce72096b1e5a2905c3f8c63ba..9673dc9cb31575a3553a4cf0e73d3e73ec24494f 100644 (file)
@@ -20,8 +20,6 @@
  * Some parts derived from the x86 version of this file.
  */
 
-#define __KVM_HAVE_READONLY_MEM
-
 #define KVM_COALESCED_MMIO_PAGE_OFFSET 1
 
 /*
index 64ecfdac6580b8341ccefe8ebf200b44fb7ce928..f876309130ad1b605e4fd2241843216fd7cde7b1 100644 (file)
@@ -4,6 +4,7 @@
  * Author: Paul Burton <paul.burton@mips.com>
  */
 
+#include <linux/init.h>
 #include <asm/addrspace.h>
 #include <asm/asm.h>
 #include <asm/asm-offsets.h>
        .endm
 
 
-.balign 0x1000
-
-LEAF(mips_cps_core_entry)
-       /*
-        * These first several instructions will be patched by cps_smp_setup to load the
-        * CCA to use into register s0 and GCR base address to register s1.
-        */
-       .rept   CPS_ENTRY_PATCH_INSNS
-       nop
-       .endr
-
-       .global mips_cps_core_entry_patch_end
-mips_cps_core_entry_patch_end:
-
-       /* Check whether we're here due to an NMI */
-       mfc0    k0, CP0_STATUS
-       and     k0, k0, ST0_NMI
-       beqz    k0, not_nmi
-        nop
-
-       /* This is an NMI */
-       PTR_LA  k0, nmi_handler
-       jr      k0
-        nop
-
-not_nmi:
-       /* Setup Cause */
-       li      t0, CAUSEF_IV
-       mtc0    t0, CP0_CAUSE
-
-       /* Setup Status */
-       li      t0, ST0_CU1 | ST0_CU0 | ST0_BEV | STATUS_BITDEPS
-       mtc0    t0, CP0_STATUS
+LEAF(mips_cps_core_boot)
+       /* Save  CCA and GCR base */
+       move   s0, a0
+       move   s1, a1
 
        /* We don't know how to do coherence setup on earlier ISA */
 #if MIPS_ISA_REV > 0
@@ -178,49 +150,45 @@ not_nmi:
        PTR_L   sp, VPEBOOTCFG_SP(v1)
        jr      t1
         nop
-       END(mips_cps_core_entry)
+       END(mips_cps_core_boot)
 
-.org 0x200
+       __INIT
 LEAF(excep_tlbfill)
        DUMP_EXCEP("TLB Fill")
        b       .
         nop
        END(excep_tlbfill)
 
-.org 0x280
 LEAF(excep_xtlbfill)
        DUMP_EXCEP("XTLB Fill")
        b       .
         nop
        END(excep_xtlbfill)
 
-.org 0x300
 LEAF(excep_cache)
        DUMP_EXCEP("Cache")
        b       .
         nop
        END(excep_cache)
 
-.org 0x380
 LEAF(excep_genex)
        DUMP_EXCEP("General")
        b       .
         nop
        END(excep_genex)
 
-.org 0x400
 LEAF(excep_intex)
        DUMP_EXCEP("Interrupt")
        b       .
         nop
        END(excep_intex)
 
-.org 0x480
 LEAF(excep_ejtag)
        PTR_LA  k0, ejtag_debug_handler
        jr      k0
         nop
        END(excep_ejtag)
+       __FINIT
 
 LEAF(mips_cps_core_init)
 #ifdef CONFIG_MIPS_MT_SMP
@@ -428,7 +396,7 @@ LEAF(mips_cps_boot_vpes)
        /* Calculate a pointer to the VPEs struct vpe_boot_config */
        li      t0, VPEBOOTCFG_SIZE
        mul     t0, t0, ta1
-       addu    t0, t0, ta3
+       PTR_ADDU t0, t0, ta3
 
        /* Set the TC restart PC */
        lw      t1, VPEBOOTCFG_PC(t0)
@@ -603,10 +571,10 @@ dcache_done:
        lw      $1, TI_CPU(gp)
        sll     $1, $1, LONGLOG
        PTR_LA  \dest, __per_cpu_offset
-       addu    $1, $1, \dest
+       PTR_ADDU $1, $1, \dest
        lw      $1, 0($1)
        PTR_LA  \dest, cps_cpu_state
-       addu    \dest, \dest, $1
+       PTR_ADDU \dest, \dest, $1
        .set    pop
        .endm
 
index 84b3affb9de88a8e1741286b39f12c70b87636fd..3a115fab55739c8ff1de3c637da08621923a63d9 100644 (file)
@@ -179,7 +179,7 @@ static char *cm3_causes[32] = {
 static DEFINE_PER_CPU_ALIGNED(spinlock_t, cm_core_lock);
 static DEFINE_PER_CPU_ALIGNED(unsigned long, cm_core_lock_flags);
 
-phys_addr_t __mips_cm_phys_base(void)
+phys_addr_t __weak mips_cm_phys_base(void)
 {
        unsigned long cmgcr;
 
@@ -198,10 +198,7 @@ phys_addr_t __mips_cm_phys_base(void)
        return (cmgcr & MIPS_CMGCRF_BASE) << (36 - 32);
 }
 
-phys_addr_t mips_cm_phys_base(void)
-       __attribute__((weak, alias("__mips_cm_phys_base")));
-
-static phys_addr_t __mips_cm_l2sync_phys_base(void)
+phys_addr_t __weak mips_cm_l2sync_phys_base(void)
 {
        u32 base_reg;
 
@@ -217,9 +214,6 @@ static phys_addr_t __mips_cm_l2sync_phys_base(void)
        return mips_cm_phys_base() + MIPS_CM_GCR_SIZE;
 }
 
-phys_addr_t mips_cm_l2sync_phys_base(void)
-       __attribute__((weak, alias("__mips_cm_l2sync_phys_base")));
-
 static void mips_cm_probe_l2sync(void)
 {
        unsigned major_rev;
index c07d64438b5bfbaef382af3b92e43ab10d2f5040..c938ba208fc0f2e23f410ecf799ffde1d5972460 100644 (file)
@@ -229,19 +229,13 @@ void mips_mt_set_cpuoptions(void)
        }
 }
 
-struct class *mt_class;
+const struct class mt_class = {
+       .name = "mt",
+};
 
 static int __init mips_mt_init(void)
 {
-       struct class *mtc;
-
-       mtc = class_create("mt");
-       if (IS_ERR(mtc))
-               return PTR_ERR(mtc);
-
-       mt_class = mtc;
-
-       return 0;
+       return class_register(&mt_class);
 }
 
 subsys_initcall(mips_mt_init);
index 9bf60d7d44d3621c6e0ad3923c378b974eb9070d..d09ca77e624d76cdb15d4f65788ed7a47dfe8fc4 100644 (file)
@@ -18,6 +18,7 @@
 #include <asm/mipsmtregs.h>
 #include <asm/pm.h>
 #include <asm/pm-cps.h>
+#include <asm/regdef.h>
 #include <asm/smp-cps.h>
 #include <asm/uasm.h>
 
@@ -69,13 +70,6 @@ DEFINE_PER_CPU_ALIGNED(struct mips_static_suspend_state, cps_cpu_state);
 static struct uasm_label labels[32];
 static struct uasm_reloc relocs[32];
 
-enum mips_reg {
-       zero, at, v0, v1, a0, a1, a2, a3,
-       t0, t1, t2, t3, t4, t5, t6, t7,
-       s0, s1, s2, s3, s4, s5, s6, s7,
-       t8, t9, k0, k1, gp, sp, fp, ra,
-};
-
 bool cps_pm_support_state(enum cps_pm_state state)
 {
        return test_bit(state, state_support);
@@ -203,13 +197,13 @@ static void cps_gen_cache_routine(u32 **pp, struct uasm_label **pl,
                return;
 
        /* Load base address */
-       UASM_i_LA(pp, t0, (long)CKSEG0);
+       UASM_i_LA(pp, GPR_T0, (long)CKSEG0);
 
        /* Calculate end address */
        if (cache_size < 0x8000)
-               uasm_i_addiu(pp, t1, t0, cache_size);
+               uasm_i_addiu(pp, GPR_T1, GPR_T0, cache_size);
        else
-               UASM_i_LA(pp, t1, (long)(CKSEG0 + cache_size));
+               UASM_i_LA(pp, GPR_T1, (long)(CKSEG0 + cache_size));
 
        /* Start of cache op loop */
        uasm_build_label(pl, *pp, lbl);
@@ -217,19 +211,19 @@ static void cps_gen_cache_routine(u32 **pp, struct uasm_label **pl,
        /* Generate the cache ops */
        for (i = 0; i < unroll_lines; i++) {
                if (cpu_has_mips_r6) {
-                       uasm_i_cache(pp, op, 0, t0);
-                       uasm_i_addiu(pp, t0, t0, cache->linesz);
+                       uasm_i_cache(pp, op, 0, GPR_T0);
+                       uasm_i_addiu(pp, GPR_T0, GPR_T0, cache->linesz);
                } else {
-                       uasm_i_cache(pp, op, i * cache->linesz, t0);
+                       uasm_i_cache(pp, op, i * cache->linesz, GPR_T0);
                }
        }
 
        if (!cpu_has_mips_r6)
                /* Update the base address */
-               uasm_i_addiu(pp, t0, t0, unroll_lines * cache->linesz);
+               uasm_i_addiu(pp, GPR_T0, GPR_T0, unroll_lines * cache->linesz);
 
        /* Loop if we haven't reached the end address yet */
-       uasm_il_bne(pp, pr, t0, t1, lbl);
+       uasm_il_bne(pp, pr, GPR_T0, GPR_T1, lbl);
        uasm_i_nop(pp);
 }
 
@@ -275,25 +269,25 @@ static int cps_gen_flush_fsb(u32 **pp, struct uasm_label **pl,
         */
 
        /* Preserve perf counter setup */
-       uasm_i_mfc0(pp, t2, 25, (perf_counter * 2) + 0); /* PerfCtlN */
-       uasm_i_mfc0(pp, t3, 25, (perf_counter * 2) + 1); /* PerfCntN */
+       uasm_i_mfc0(pp, GPR_T2, 25, (perf_counter * 2) + 0); /* PerfCtlN */
+       uasm_i_mfc0(pp, GPR_T3, 25, (perf_counter * 2) + 1); /* PerfCntN */
 
        /* Setup perf counter to count FSB full pipeline stalls */
-       uasm_i_addiu(pp, t0, zero, (perf_event << 5) | 0xf);
-       uasm_i_mtc0(pp, t0, 25, (perf_counter * 2) + 0); /* PerfCtlN */
+       uasm_i_addiu(pp, GPR_T0, GPR_ZERO, (perf_event << 5) | 0xf);
+       uasm_i_mtc0(pp, GPR_T0, 25, (perf_counter * 2) + 0); /* PerfCtlN */
        uasm_i_ehb(pp);
-       uasm_i_mtc0(pp, zero, 25, (perf_counter * 2) + 1); /* PerfCntN */
+       uasm_i_mtc0(pp, GPR_ZERO, 25, (perf_counter * 2) + 1); /* PerfCntN */
        uasm_i_ehb(pp);
 
        /* Base address for loads */
-       UASM_i_LA(pp, t0, (long)CKSEG0);
+       UASM_i_LA(pp, GPR_T0, (long)CKSEG0);
 
        /* Start of clear loop */
        uasm_build_label(pl, *pp, lbl);
 
        /* Perform some loads to fill the FSB */
        for (i = 0; i < num_loads; i++)
-               uasm_i_lw(pp, zero, i * line_size * line_stride, t0);
+               uasm_i_lw(pp, GPR_ZERO, i * line_size * line_stride, GPR_T0);
 
        /*
         * Invalidate the new D-cache entries so that the cache will need
@@ -301,9 +295,9 @@ static int cps_gen_flush_fsb(u32 **pp, struct uasm_label **pl,
         */
        for (i = 0; i < num_loads; i++) {
                uasm_i_cache(pp, Hit_Invalidate_D,
-                            i * line_size * line_stride, t0);
+                            i * line_size * line_stride, GPR_T0);
                uasm_i_cache(pp, Hit_Writeback_Inv_SD,
-                            i * line_size * line_stride, t0);
+                            i * line_size * line_stride, GPR_T0);
        }
 
        /* Barrier ensuring previous cache invalidates are complete */
@@ -311,16 +305,16 @@ static int cps_gen_flush_fsb(u32 **pp, struct uasm_label **pl,
        uasm_i_ehb(pp);
 
        /* Check whether the pipeline stalled due to the FSB being full */
-       uasm_i_mfc0(pp, t1, 25, (perf_counter * 2) + 1); /* PerfCntN */
+       uasm_i_mfc0(pp, GPR_T1, 25, (perf_counter * 2) + 1); /* PerfCntN */
 
        /* Loop if it didn't */
-       uasm_il_beqz(pp, pr, t1, lbl);
+       uasm_il_beqz(pp, pr, GPR_T1, lbl);
        uasm_i_nop(pp);
 
        /* Restore perf counter 1. The count may well now be wrong... */
-       uasm_i_mtc0(pp, t2, 25, (perf_counter * 2) + 0); /* PerfCtlN */
+       uasm_i_mtc0(pp, GPR_T2, 25, (perf_counter * 2) + 0); /* PerfCtlN */
        uasm_i_ehb(pp);
-       uasm_i_mtc0(pp, t3, 25, (perf_counter * 2) + 1); /* PerfCntN */
+       uasm_i_mtc0(pp, GPR_T3, 25, (perf_counter * 2) + 1); /* PerfCntN */
        uasm_i_ehb(pp);
 
        return 0;
@@ -330,12 +324,12 @@ static void cps_gen_set_top_bit(u32 **pp, struct uasm_label **pl,
                                struct uasm_reloc **pr,
                                unsigned r_addr, int lbl)
 {
-       uasm_i_lui(pp, t0, uasm_rel_hi(0x80000000));
+       uasm_i_lui(pp, GPR_T0, uasm_rel_hi(0x80000000));
        uasm_build_label(pl, *pp, lbl);
-       uasm_i_ll(pp, t1, 0, r_addr);
-       uasm_i_or(pp, t1, t1, t0);
-       uasm_i_sc(pp, t1, 0, r_addr);
-       uasm_il_beqz(pp, pr, t1, lbl);
+       uasm_i_ll(pp, GPR_T1, 0, r_addr);
+       uasm_i_or(pp, GPR_T1, GPR_T1, GPR_T0);
+       uasm_i_sc(pp, GPR_T1, 0, r_addr);
+       uasm_il_beqz(pp, pr, GPR_T1, lbl);
        uasm_i_nop(pp);
 }
 
@@ -344,9 +338,9 @@ static void *cps_gen_entry_code(unsigned cpu, enum cps_pm_state state)
        struct uasm_label *l = labels;
        struct uasm_reloc *r = relocs;
        u32 *buf, *p;
-       const unsigned r_online = a0;
-       const unsigned r_nc_count = a1;
-       const unsigned r_pcohctl = t7;
+       const unsigned r_online = GPR_A0;
+       const unsigned r_nc_count = GPR_A1;
+       const unsigned r_pcohctl = GPR_T8;
        const unsigned max_instrs = 256;
        unsigned cpc_cmd;
        int err;
@@ -383,8 +377,8 @@ static void *cps_gen_entry_code(unsigned cpu, enum cps_pm_state state)
                 * with the return address placed in v0 to avoid clobbering
                 * the ra register before it is saved.
                 */
-               UASM_i_LA(&p, t0, (long)mips_cps_pm_save);
-               uasm_i_jalr(&p, v0, t0);
+               UASM_i_LA(&p, GPR_T0, (long)mips_cps_pm_save);
+               uasm_i_jalr(&p, GPR_V0, GPR_T0);
                uasm_i_nop(&p);
        }
 
@@ -399,11 +393,11 @@ static void *cps_gen_entry_code(unsigned cpu, enum cps_pm_state state)
                /* Increment ready_count */
                uasm_i_sync(&p, __SYNC_mb);
                uasm_build_label(&l, p, lbl_incready);
-               uasm_i_ll(&p, t1, 0, r_nc_count);
-               uasm_i_addiu(&p, t2, t1, 1);
-               uasm_i_sc(&p, t2, 0, r_nc_count);
-               uasm_il_beqz(&p, &r, t2, lbl_incready);
-               uasm_i_addiu(&p, t1, t1, 1);
+               uasm_i_ll(&p, GPR_T1, 0, r_nc_count);
+               uasm_i_addiu(&p, GPR_T2, GPR_T1, 1);
+               uasm_i_sc(&p, GPR_T2, 0, r_nc_count);
+               uasm_il_beqz(&p, &r, GPR_T2, lbl_incready);
+               uasm_i_addiu(&p, GPR_T1, GPR_T1, 1);
 
                /* Barrier ensuring all CPUs see the updated r_nc_count value */
                uasm_i_sync(&p, __SYNC_mb);
@@ -412,7 +406,7 @@ static void *cps_gen_entry_code(unsigned cpu, enum cps_pm_state state)
                 * If this is the last VPE to become ready for non-coherence
                 * then it should branch below.
                 */
-               uasm_il_beq(&p, &r, t1, r_online, lbl_disable_coherence);
+               uasm_il_beq(&p, &r, GPR_T1, r_online, lbl_disable_coherence);
                uasm_i_nop(&p);
 
                if (state < CPS_PM_POWER_GATED) {
@@ -422,13 +416,13 @@ static void *cps_gen_entry_code(unsigned cpu, enum cps_pm_state state)
                         * has been disabled before proceeding, which it will do
                         * by polling for the top bit of ready_count being set.
                         */
-                       uasm_i_addiu(&p, t1, zero, -1);
+                       uasm_i_addiu(&p, GPR_T1, GPR_ZERO, -1);
                        uasm_build_label(&l, p, lbl_poll_cont);
-                       uasm_i_lw(&p, t0, 0, r_nc_count);
-                       uasm_il_bltz(&p, &r, t0, lbl_secondary_cont);
+                       uasm_i_lw(&p, GPR_T0, 0, r_nc_count);
+                       uasm_il_bltz(&p, &r, GPR_T0, lbl_secondary_cont);
                        uasm_i_ehb(&p);
                        if (cpu_has_mipsmt)
-                               uasm_i_yield(&p, zero, t1);
+                               uasm_i_yield(&p, GPR_ZERO, GPR_T1);
                        uasm_il_b(&p, &r, lbl_poll_cont);
                        uasm_i_nop(&p);
                } else {
@@ -438,16 +432,16 @@ static void *cps_gen_entry_code(unsigned cpu, enum cps_pm_state state)
                         */
                        if (cpu_has_mipsmt) {
                                /* Halt the VPE via C0 tchalt register */
-                               uasm_i_addiu(&p, t0, zero, TCHALT_H);
-                               uasm_i_mtc0(&p, t0, 2, 4);
+                               uasm_i_addiu(&p, GPR_T0, GPR_ZERO, TCHALT_H);
+                               uasm_i_mtc0(&p, GPR_T0, 2, 4);
                        } else if (cpu_has_vp) {
                                /* Halt the VP via the CPC VP_STOP register */
                                unsigned int vpe_id;
 
                                vpe_id = cpu_vpe_id(&cpu_data[cpu]);
-                               uasm_i_addiu(&p, t0, zero, 1 << vpe_id);
-                               UASM_i_LA(&p, t1, (long)addr_cpc_cl_vp_stop());
-                               uasm_i_sw(&p, t0, 0, t1);
+                               uasm_i_addiu(&p, GPR_T0, GPR_ZERO, 1 << vpe_id);
+                               UASM_i_LA(&p, GPR_T1, (long)addr_cpc_cl_vp_stop());
+                               uasm_i_sw(&p, GPR_T0, 0, GPR_T1);
                        } else {
                                BUG();
                        }
@@ -482,9 +476,9 @@ static void *cps_gen_entry_code(unsigned cpu, enum cps_pm_state state)
                * defined by the interAptiv & proAptiv SUMs as ensuring that the
                *  operation resulting from the preceding store is complete.
                */
-               uasm_i_addiu(&p, t0, zero, 1 << cpu_core(&cpu_data[cpu]));
-               uasm_i_sw(&p, t0, 0, r_pcohctl);
-               uasm_i_lw(&p, t0, 0, r_pcohctl);
+               uasm_i_addiu(&p, GPR_T0, GPR_ZERO, 1 << cpu_core(&cpu_data[cpu]));
+               uasm_i_sw(&p, GPR_T0, 0, r_pcohctl);
+               uasm_i_lw(&p, GPR_T0, 0, r_pcohctl);
 
                /* Barrier to ensure write to coherence control is complete */
                uasm_i_sync(&p, __SYNC_full);
@@ -492,8 +486,8 @@ static void *cps_gen_entry_code(unsigned cpu, enum cps_pm_state state)
        }
 
        /* Disable coherence */
-       uasm_i_sw(&p, zero, 0, r_pcohctl);
-       uasm_i_lw(&p, t0, 0, r_pcohctl);
+       uasm_i_sw(&p, GPR_ZERO, 0, r_pcohctl);
+       uasm_i_lw(&p, GPR_T0, 0, r_pcohctl);
 
        if (state >= CPS_PM_CLOCK_GATED) {
                err = cps_gen_flush_fsb(&p, &l, &r, &cpu_data[cpu],
@@ -515,9 +509,9 @@ static void *cps_gen_entry_code(unsigned cpu, enum cps_pm_state state)
                }
 
                /* Issue the CPC command */
-               UASM_i_LA(&p, t0, (long)addr_cpc_cl_cmd());
-               uasm_i_addiu(&p, t1, zero, cpc_cmd);
-               uasm_i_sw(&p, t1, 0, t0);
+               UASM_i_LA(&p, GPR_T0, (long)addr_cpc_cl_cmd());
+               uasm_i_addiu(&p, GPR_T1, GPR_ZERO, cpc_cmd);
+               uasm_i_sw(&p, GPR_T1, 0, GPR_T0);
 
                if (state == CPS_PM_POWER_GATED) {
                        /* If anything goes wrong just hang */
@@ -564,12 +558,12 @@ static void *cps_gen_entry_code(unsigned cpu, enum cps_pm_state state)
         * will run this. The first will actually re-enable coherence & the
         * rest will just be performing a rather unusual nop.
         */
-       uasm_i_addiu(&p, t0, zero, mips_cm_revision() < CM_REV_CM3
+       uasm_i_addiu(&p, GPR_T0, GPR_ZERO, mips_cm_revision() < CM_REV_CM3
                                ? CM_GCR_Cx_COHERENCE_COHDOMAINEN
                                : CM3_GCR_Cx_COHERENCE_COHEN);
 
-       uasm_i_sw(&p, t0, 0, r_pcohctl);
-       uasm_i_lw(&p, t0, 0, r_pcohctl);
+       uasm_i_sw(&p, GPR_T0, 0, r_pcohctl);
+       uasm_i_lw(&p, GPR_T0, 0, r_pcohctl);
 
        /* Barrier to ensure write to coherence control is complete */
        uasm_i_sync(&p, __SYNC_full);
@@ -579,11 +573,11 @@ static void *cps_gen_entry_code(unsigned cpu, enum cps_pm_state state)
                /* Decrement ready_count */
                uasm_build_label(&l, p, lbl_decready);
                uasm_i_sync(&p, __SYNC_mb);
-               uasm_i_ll(&p, t1, 0, r_nc_count);
-               uasm_i_addiu(&p, t2, t1, -1);
-               uasm_i_sc(&p, t2, 0, r_nc_count);
-               uasm_il_beqz(&p, &r, t2, lbl_decready);
-               uasm_i_andi(&p, v0, t1, (1 << fls(smp_num_siblings)) - 1);
+               uasm_i_ll(&p, GPR_T1, 0, r_nc_count);
+               uasm_i_addiu(&p, GPR_T2, GPR_T1, -1);
+               uasm_i_sc(&p, GPR_T2, 0, r_nc_count);
+               uasm_il_beqz(&p, &r, GPR_T2, lbl_decready);
+               uasm_i_andi(&p, GPR_V0, GPR_T1, (1 << fls(smp_num_siblings)) - 1);
 
                /* Barrier ensuring all CPUs see the updated r_nc_count value */
                uasm_i_sync(&p, __SYNC_mb);
@@ -612,7 +606,7 @@ static void *cps_gen_entry_code(unsigned cpu, enum cps_pm_state state)
        }
 
        /* The core is coherent, time to return to C code */
-       uasm_i_jr(&p, ra);
+       uasm_i_jr(&p, GPR_RA);
        uasm_i_nop(&p);
 
 gen_done:
index 38c6925a1beadbf290c4a059d2cbb50a68f44f2c..ff7535de42ca5b7e1f63ff0718bc65b0922e84e0 100644 (file)
@@ -95,11 +95,11 @@ int __init rtlx_module_init(void)
                atomic_set(&channel_wqs[i].in_open, 0);
                mutex_init(&channel_wqs[i].mutex);
 
-               dev = device_create(mt_class, NULL, MKDEV(major, i), NULL,
+               dev = device_create(&mt_class, NULL, MKDEV(major, i), NULL,
                                    "%s%d", RTLX_MODULE_NAME, i);
                if (IS_ERR(dev)) {
                        while (i--)
-                               device_destroy(mt_class, MKDEV(major, i));
+                               device_destroy(&mt_class, MKDEV(major, i));
 
                        err = PTR_ERR(dev);
                        goto out_chrdev;
@@ -127,7 +127,7 @@ int __init rtlx_module_init(void)
 
 out_class:
        for (i = 0; i < RTLX_CHANNELS; i++)
-               device_destroy(mt_class, MKDEV(major, i));
+               device_destroy(&mt_class, MKDEV(major, i));
 out_chrdev:
        unregister_chrdev(major, RTLX_MODULE_NAME);
 
@@ -139,7 +139,7 @@ void __exit rtlx_module_exit(void)
        int i;
 
        for (i = 0; i < RTLX_CHANNELS; i++)
-               device_destroy(mt_class, MKDEV(major, i));
+               device_destroy(&mt_class, MKDEV(major, i));
 
        unregister_chrdev(major, RTLX_MODULE_NAME);
 
index 9c30de1515976159e0210724a6e19313832a0fad..12a1a4ffb60211587186b1de584b1aadadcfc33e 100644 (file)
@@ -442,8 +442,6 @@ static void __init mips_reserve_vmcore(void)
 #endif
 }
 
-#ifdef CONFIG_KEXEC
-
 /* 64M alignment for crash kernel regions */
 #define CRASH_ALIGN    SZ_64M
 #define CRASH_ADDR_MAX SZ_512M
@@ -454,6 +452,9 @@ static void __init mips_parse_crashkernel(void)
        unsigned long long crash_size, crash_base;
        int ret;
 
+       if (!IS_ENABLED(CONFIG_CRASH_RESERVE))
+               return;
+
        total_mem = memblock_phys_mem_size();
        ret = parse_crashkernel(boot_command_line, total_mem,
                                &crash_size, &crash_base,
@@ -489,6 +490,9 @@ static void __init request_crashkernel(struct resource *res)
 {
        int ret;
 
+       if (!IS_ENABLED(CONFIG_CRASH_RESERVE))
+               return;
+
        if (crashk_res.start == crashk_res.end)
                return;
 
@@ -498,15 +502,6 @@ static void __init request_crashkernel(struct resource *res)
                        (unsigned long)(resource_size(&crashk_res) >> 20),
                        (unsigned long)(crashk_res.start  >> 20));
 }
-#else /* !defined(CONFIG_KEXEC)                */
-static void __init mips_parse_crashkernel(void)
-{
-}
-
-static void __init request_crashkernel(struct resource *res)
-{
-}
-#endif /* !defined(CONFIG_KEXEC)  */
 
 static void __init check_kernel_sections_mem(void)
 {
index f6c37d407f365fc3df4dbaf807a17550856a8c36..9cc087dd1c1940d3dc5d43eebf1846c73e7fb6cd 100644 (file)
@@ -7,6 +7,7 @@
 #include <linux/cpu.h>
 #include <linux/delay.h>
 #include <linux/io.h>
+#include <linux/memblock.h>
 #include <linux/sched/task_stack.h>
 #include <linux/sched/hotplug.h>
 #include <linux/slab.h>
 #include <asm/mipsregs.h>
 #include <asm/pm-cps.h>
 #include <asm/r4kcache.h>
+#include <asm/regdef.h>
 #include <asm/smp.h>
 #include <asm/smp-cps.h>
 #include <asm/time.h>
 #include <asm/uasm.h>
 
+#define BEV_VEC_SIZE   0x500
+#define BEV_VEC_ALIGN  0x1000
+
+enum label_id {
+       label_not_nmi = 1,
+};
+
+UASM_L_LA(_not_nmi)
+
 static DECLARE_BITMAP(core_power, NR_CPUS);
+static uint32_t core_entry_reg;
+static phys_addr_t cps_vec_pa;
 
 struct core_boot_config *mips_cps_core_bootcfg;
 
@@ -34,10 +47,100 @@ static unsigned __init core_vpe_count(unsigned int cluster, unsigned core)
        return min(smp_max_threads, mips_cps_numvps(cluster, core));
 }
 
+static void __init *mips_cps_build_core_entry(void *addr)
+{
+       extern void (*nmi_handler)(void);
+       u32 *p = addr;
+       u32 val;
+       struct uasm_label labels[2];
+       struct uasm_reloc relocs[2];
+       struct uasm_label *l = labels;
+       struct uasm_reloc *r = relocs;
+
+       memset(labels, 0, sizeof(labels));
+       memset(relocs, 0, sizeof(relocs));
+
+       uasm_i_mfc0(&p, GPR_K0, C0_STATUS);
+       UASM_i_LA(&p, GPR_T9, ST0_NMI);
+       uasm_i_and(&p, GPR_K0, GPR_K0, GPR_T9);
+
+       uasm_il_bnez(&p, &r, GPR_K0, label_not_nmi);
+       uasm_i_nop(&p);
+       UASM_i_LA(&p, GPR_K0, (long)&nmi_handler);
+
+       uasm_l_not_nmi(&l, p);
+
+       val = CAUSEF_IV;
+       uasm_i_lui(&p, GPR_K0, val >> 16);
+       uasm_i_ori(&p, GPR_K0, GPR_K0, val & 0xffff);
+       uasm_i_mtc0(&p, GPR_K0, C0_CAUSE);
+       val = ST0_CU1 | ST0_CU0 | ST0_BEV | ST0_KX_IF_64;
+       uasm_i_lui(&p, GPR_K0, val >> 16);
+       uasm_i_ori(&p, GPR_K0, GPR_K0, val & 0xffff);
+       uasm_i_mtc0(&p, GPR_K0, C0_STATUS);
+       uasm_i_ehb(&p);
+       uasm_i_ori(&p, GPR_A0, 0, read_c0_config() & CONF_CM_CMASK);
+       UASM_i_LA(&p, GPR_A1, (long)mips_gcr_base);
+#if defined(KBUILD_64BIT_SYM32) || defined(CONFIG_32BIT)
+       UASM_i_LA(&p, GPR_T9, CKSEG1ADDR(__pa_symbol(mips_cps_core_boot)));
+#else
+       UASM_i_LA(&p, GPR_T9, TO_UNCAC(__pa_symbol(mips_cps_core_boot)));
+#endif
+       uasm_i_jr(&p, GPR_T9);
+       uasm_i_nop(&p);
+
+       uasm_resolve_relocs(relocs, labels);
+
+       return p;
+}
+
+static int __init allocate_cps_vecs(void)
+{
+       /* Try to allocate in KSEG1 first */
+       cps_vec_pa = memblock_phys_alloc_range(BEV_VEC_SIZE, BEV_VEC_ALIGN,
+                                               0x0, CSEGX_SIZE - 1);
+
+       if (cps_vec_pa)
+               core_entry_reg = CKSEG1ADDR(cps_vec_pa) &
+                                       CM_GCR_Cx_RESET_BASE_BEVEXCBASE;
+
+       if (!cps_vec_pa && mips_cm_is64) {
+               cps_vec_pa = memblock_phys_alloc_range(BEV_VEC_SIZE, BEV_VEC_ALIGN,
+                                                       0x0, SZ_4G - 1);
+               if (cps_vec_pa)
+                       core_entry_reg = (cps_vec_pa & CM_GCR_Cx_RESET_BASE_BEVEXCBASE) |
+                                       CM_GCR_Cx_RESET_BASE_MODE;
+       }
+
+       if (!cps_vec_pa)
+               return -ENOMEM;
+
+       return 0;
+}
+
+static void __init setup_cps_vecs(void)
+{
+       void *cps_vec;
+
+       cps_vec = (void *)CKSEG1ADDR_OR_64BIT(cps_vec_pa);
+       mips_cps_build_core_entry(cps_vec);
+
+       memcpy(cps_vec + 0x200, &excep_tlbfill, 0x80);
+       memcpy(cps_vec + 0x280, &excep_xtlbfill, 0x80);
+       memcpy(cps_vec + 0x300, &excep_cache, 0x80);
+       memcpy(cps_vec + 0x380, &excep_genex, 0x80);
+       memcpy(cps_vec + 0x400, &excep_intex, 0x80);
+       memcpy(cps_vec + 0x480, &excep_ejtag, 0x80);
+
+       /* Make sure no prefetched data in cache */
+       blast_inv_dcache_range(CKSEG0ADDR_OR_64BIT(cps_vec_pa), CKSEG0ADDR_OR_64BIT(cps_vec_pa) + BEV_VEC_SIZE);
+       bc_inv(CKSEG0ADDR_OR_64BIT(cps_vec_pa), BEV_VEC_SIZE);
+       __sync();
+}
+
 static void __init cps_smp_setup(void)
 {
        unsigned int nclusters, ncores, nvpes, core_vpes;
-       unsigned long core_entry;
        int cl, c, v;
 
        /* Detect & record VPE topology */
@@ -94,10 +197,11 @@ static void __init cps_smp_setup(void)
        /* Make core 0 coherent with everything */
        write_gcr_cl_coherence(0xff);
 
-       if (mips_cm_revision() >= CM_REV_CM3) {
-               core_entry = CKSEG1ADDR((unsigned long)mips_cps_core_entry);
-               write_gcr_bev_base(core_entry);
-       }
+       if (allocate_cps_vecs())
+               pr_err("Failed to allocate CPS vectors\n");
+
+       if (core_entry_reg && mips_cm_revision() >= CM_REV_CM3)
+               write_gcr_bev_base(core_entry_reg);
 
 #ifdef CONFIG_MIPS_MT_FPAFF
        /* If we have an FPU, enroll ourselves in the FPU-full mask */
@@ -110,10 +214,14 @@ static void __init cps_prepare_cpus(unsigned int max_cpus)
 {
        unsigned ncores, core_vpes, c, cca;
        bool cca_unsuitable, cores_limited;
-       u32 *entry_code;
 
        mips_mt_set_cpuoptions();
 
+       if (!core_entry_reg) {
+               pr_err("core_entry address unsuitable, disabling smp-cps\n");
+               goto err_out;
+       }
+
        /* Detect whether the CCA is unsuited to multi-core SMP */
        cca = read_c0_config() & CONF_CM_CMASK;
        switch (cca) {
@@ -145,20 +253,7 @@ static void __init cps_prepare_cpus(unsigned int max_cpus)
                        (cca_unsuitable && cpu_has_dc_aliases) ? " & " : "",
                        cpu_has_dc_aliases ? "dcache aliasing" : "");
 
-       /*
-        * Patch the start of mips_cps_core_entry to provide:
-        *
-        * s0 = kseg0 CCA
-        */
-       entry_code = (u32 *)&mips_cps_core_entry;
-       uasm_i_addiu(&entry_code, 16, 0, cca);
-       UASM_i_LA(&entry_code, 17, (long)mips_gcr_base);
-       BUG_ON((void *)entry_code > (void *)&mips_cps_core_entry_patch_end);
-       blast_dcache_range((unsigned long)&mips_cps_core_entry,
-                          (unsigned long)entry_code);
-       bc_wback_inv((unsigned long)&mips_cps_core_entry,
-                    (void *)entry_code - (void *)&mips_cps_core_entry);
-       __sync();
+       setup_cps_vecs();
 
        /* Allocate core boot configuration structs */
        ncores = mips_cps_numcores(0);
@@ -213,7 +308,7 @@ static void boot_core(unsigned int core, unsigned int vpe_id)
        mips_cm_lock_other(0, core, 0, CM_GCR_Cx_OTHER_BLOCK_LOCAL);
 
        /* Set its reset vector */
-       write_gcr_co_reset_base(CKSEG1ADDR((unsigned long)mips_cps_core_entry));
+       write_gcr_co_reset_base(core_entry_reg);
 
        /* Ensure its coherency is disabled */
        write_gcr_co_coherence(0);
@@ -290,7 +385,6 @@ static int cps_boot_secondary(int cpu, struct task_struct *idle)
        unsigned vpe_id = cpu_vpe_id(&cpu_data[cpu]);
        struct core_boot_config *core_cfg = &mips_cps_core_bootcfg[core];
        struct vpe_boot_config *vpe_cfg = &core_cfg->vpe_config[vpe_id];
-       unsigned long core_entry;
        unsigned int remote;
        int err;
 
@@ -314,8 +408,7 @@ static int cps_boot_secondary(int cpu, struct task_struct *idle)
 
        if (cpu_has_vp) {
                mips_cm_lock_other(0, core, vpe_id, CM_GCR_Cx_OTHER_BLOCK_LOCAL);
-               core_entry = CKSEG1ADDR((unsigned long)mips_cps_core_entry);
-               write_gcr_co_reset_base(core_entry);
+               write_gcr_co_reset_base(core_entry_reg);
                mips_cm_unlock_other();
        }
 
index a1c1cb5de91321468f338d41a01df2f40efaf293..dc29bd9656b081dc7eaf43170d3542253866c1de 100644 (file)
@@ -58,6 +58,7 @@
 #include <asm/module.h>
 #include <asm/msa.h>
 #include <asm/ptrace.h>
+#include <asm/regdef.h>
 #include <asm/sections.h>
 #include <asm/siginfo.h>
 #include <asm/tlbdebug.h>
@@ -2041,13 +2042,12 @@ void __init *set_except_vector(int n, void *addr)
                unsigned long jump_mask = ~((1 << 28) - 1);
 #endif
                u32 *buf = (u32 *)(ebase + 0x200);
-               unsigned int k0 = 26;
                if ((handler & jump_mask) == ((ebase + 0x200) & jump_mask)) {
                        uasm_i_j(&buf, handler & ~jump_mask);
                        uasm_i_nop(&buf);
                } else {
-                       UASM_i_LA(&buf, k0, handler);
-                       uasm_i_jr(&buf, k0);
+                       UASM_i_LA(&buf, GPR_K0, handler);
+                       uasm_i_jr(&buf, GPR_K0);
                        uasm_i_nop(&buf);
                }
                local_flush_icache_range(ebase + 0x200, (unsigned long)buf);
@@ -2299,7 +2299,7 @@ static const char panic_null_cerr[] =
 void set_uncached_handler(unsigned long offset, void *addr,
        unsigned long size)
 {
-       unsigned long uncached_ebase = CKSEG1ADDR(ebase);
+       unsigned long uncached_ebase = CKSEG1ADDR_OR_64BIT(__pa(ebase));
 
        if (!addr)
                panic(panic_null_cerr);
@@ -2351,10 +2351,13 @@ void __init trap_init(void)
                 * EVA is special though as it allows segments to be rearranged
                 * and to become uncached during cache error handling.
                 */
-               if (!IS_ENABLED(CONFIG_EVA) && !WARN_ON(ebase_pa >= 0x20000000))
+               if (!IS_ENABLED(CONFIG_EVA) && ebase_pa < 0x20000000)
                        ebase = CKSEG0ADDR(ebase_pa);
                else
                        ebase = (unsigned long)phys_to_virt(ebase_pa);
+               if (ebase_pa >= 0x20000000)
+                       pr_warn("ebase(%pa) should better be in KSeg0",
+                               &ebase_pa);
        }
 
        if (cpu_has_mmips) {
index 667bc75f6420315cc3933a6533c9fccf6be88304..84124ac2d2a5c8838e113023b5f20458d36da14b 100644 (file)
@@ -95,8 +95,8 @@ int vpe_run(struct vpe *v)
         * We don't pass the memsize here, so VPE programs need to be
         * compiled with DFLT_STACK_SIZE and DFLT_HEAP_SIZE defined.
         */
-       mttgpr(7, 0);
-       mttgpr(6, v->ntcs);
+       mttgpr($7, 0);
+       mttgpr($6, v->ntcs);
 
        /* set up VPE1 */
        /*
index 18e7a17d51158ee45901b4fda71f4996eb6d8a9a..ab57221fa4ddef542dde61284045de59cb9350af 100644 (file)
@@ -17,7 +17,7 @@ if VIRTUALIZATION
 
 config KVM
        tristate "Kernel-based Virtual Machine (KVM) support"
-       depends on HAVE_KVM
+       depends on CPU_SUPPORTS_VZ
        depends on MIPS_FP_SUPPORT
        select EXPORT_UASM
        select KVM_COMMON
@@ -26,6 +26,7 @@ config KVM
        select KVM_MMIO
        select KVM_GENERIC_MMU_NOTIFIER
        select KVM_GENERIC_HARDWARE_ENABLING
+       select HAVE_KVM_READONLY_MEM
        help
          Support for hosting Guest kernels.
 
index aceed14aa1f7fc509e5469c6f79043acfbf67741..ac8e074c6bb76e0fd7c84d2357906c710f919e17 100644 (file)
 
 #include <linux/kvm_host.h>
 #include <linux/log2.h>
+#include <asm/mipsregs.h>
 #include <asm/mmu_context.h>
 #include <asm/msa.h>
+#include <asm/regdef.h>
 #include <asm/setup.h>
 #include <asm/tlbex.h>
 #include <asm/uasm.h>
 
-/* Register names */
-#define ZERO           0
-#define AT             1
-#define V0             2
-#define V1             3
-#define A0             4
-#define A1             5
-
-#if _MIPS_SIM == _MIPS_SIM_ABI32
-#define T0             8
-#define T1             9
-#define T2             10
-#define T3             11
-#endif /* _MIPS_SIM == _MIPS_SIM_ABI32 */
-
-#if _MIPS_SIM == _MIPS_SIM_ABI64 || _MIPS_SIM == _MIPS_SIM_NABI32
-#define T0             12
-#define T1             13
-#define T2             14
-#define T3             15
-#endif /* _MIPS_SIM == _MIPS_SIM_ABI64 || _MIPS_SIM == _MIPS_SIM_NABI32 */
-
-#define S0             16
-#define S1             17
-#define T9             25
-#define K0             26
-#define K1             27
-#define GP             28
-#define SP             29
-#define RA             31
-
-/* Some CP0 registers */
-#define C0_PWBASE      5, 5
-#define C0_HWRENA      7, 0
-#define C0_BADVADDR    8, 0
-#define C0_BADINSTR    8, 1
-#define C0_BADINSTRP   8, 2
-#define C0_PGD         9, 7
-#define C0_ENTRYHI     10, 0
-#define C0_GUESTCTL1   10, 4
-#define C0_STATUS      12, 0
-#define C0_GUESTCTL0   12, 6
-#define C0_CAUSE       13, 0
-#define C0_EPC         14, 0
-#define C0_EBASE       15, 1
-#define C0_CONFIG5     16, 5
-#define C0_DDATA_LO    28, 3
-#define C0_ERROREPC    30, 0
-
 #define CALLFRAME_SIZ   32
 
-#ifdef CONFIG_64BIT
-#define ST0_KX_IF_64   ST0_KX
-#else
-#define ST0_KX_IF_64   0
-#endif
-
-static unsigned int scratch_vcpu[2] = { C0_DDATA_LO };
+static unsigned int scratch_vcpu[2] = { C0_DDATALO };
 static unsigned int scratch_tmp[2] = { C0_ERROREPC };
 
 enum label_id {
@@ -212,60 +159,60 @@ void *kvm_mips_build_vcpu_run(void *addr)
        unsigned int i;
 
        /*
-        * A0: vcpu
+        * GPR_A0: vcpu
         */
 
        /* k0/k1 not being used in host kernel context */
-       UASM_i_ADDIU(&p, K1, SP, -(int)sizeof(struct pt_regs));
+       UASM_i_ADDIU(&p, GPR_K1, GPR_SP, -(int)sizeof(struct pt_regs));
        for (i = 16; i < 32; ++i) {
                if (i == 24)
                        i = 28;
-               UASM_i_SW(&p, i, offsetof(struct pt_regs, regs[i]), K1);
+               UASM_i_SW(&p, i, offsetof(struct pt_regs, regs[i]), GPR_K1);
        }
 
        /* Save host status */
-       uasm_i_mfc0(&p, V0, C0_STATUS);
-       UASM_i_SW(&p, V0, offsetof(struct pt_regs, cp0_status), K1);
+       uasm_i_mfc0(&p, GPR_V0, C0_STATUS);
+       UASM_i_SW(&p, GPR_V0, offsetof(struct pt_regs, cp0_status), GPR_K1);
 
        /* Save scratch registers, will be used to store pointer to vcpu etc */
-       kvm_mips_build_save_scratch(&p, V1, K1);
+       kvm_mips_build_save_scratch(&p, GPR_V1, GPR_K1);
 
        /* VCPU scratch register has pointer to vcpu */
-       UASM_i_MTC0(&p, A0, scratch_vcpu[0], scratch_vcpu[1]);
+       UASM_i_MTC0(&p, GPR_A0, scratch_vcpu[0], scratch_vcpu[1]);
 
        /* Offset into vcpu->arch */
-       UASM_i_ADDIU(&p, K1, A0, offsetof(struct kvm_vcpu, arch));
+       UASM_i_ADDIU(&p, GPR_K1, GPR_A0, offsetof(struct kvm_vcpu, arch));
 
        /*
         * Save the host stack to VCPU, used for exception processing
         * when we exit from the Guest
         */
-       UASM_i_SW(&p, SP, offsetof(struct kvm_vcpu_arch, host_stack), K1);
+       UASM_i_SW(&p, GPR_SP, offsetof(struct kvm_vcpu_arch, host_stack), GPR_K1);
 
        /* Save the kernel gp as well */
-       UASM_i_SW(&p, GP, offsetof(struct kvm_vcpu_arch, host_gp), K1);
+       UASM_i_SW(&p, GPR_GP, offsetof(struct kvm_vcpu_arch, host_gp), GPR_K1);
 
        /*
         * Setup status register for running the guest in UM, interrupts
         * are disabled
         */
-       UASM_i_LA(&p, K0, ST0_EXL | KSU_USER | ST0_BEV | ST0_KX_IF_64);
-       uasm_i_mtc0(&p, K0, C0_STATUS);
+       UASM_i_LA(&p, GPR_K0, ST0_EXL | KSU_USER | ST0_BEV | ST0_KX_IF_64);
+       uasm_i_mtc0(&p, GPR_K0, C0_STATUS);
        uasm_i_ehb(&p);
 
        /* load up the new EBASE */
-       UASM_i_LW(&p, K0, offsetof(struct kvm_vcpu_arch, guest_ebase), K1);
-       build_set_exc_base(&p, K0);
+       UASM_i_LW(&p, GPR_K0, offsetof(struct kvm_vcpu_arch, guest_ebase), GPR_K1);
+       build_set_exc_base(&p, GPR_K0);
 
        /*
         * Now that the new EBASE has been loaded, unset BEV, set
         * interrupt mask as it was but make sure that timer interrupts
         * are enabled
         */
-       uasm_i_addiu(&p, K0, ZERO, ST0_EXL | KSU_USER | ST0_IE | ST0_KX_IF_64);
-       uasm_i_andi(&p, V0, V0, ST0_IM);
-       uasm_i_or(&p, K0, K0, V0);
-       uasm_i_mtc0(&p, K0, C0_STATUS);
+       uasm_i_addiu(&p, GPR_K0, GPR_ZERO, ST0_EXL | KSU_USER | ST0_IE | ST0_KX_IF_64);
+       uasm_i_andi(&p, GPR_V0, GPR_V0, ST0_IM);
+       uasm_i_or(&p, GPR_K0, GPR_K0, GPR_V0);
+       uasm_i_mtc0(&p, GPR_K0, C0_STATUS);
        uasm_i_ehb(&p);
 
        p = kvm_mips_build_enter_guest(p);
@@ -296,15 +243,15 @@ static void *kvm_mips_build_enter_guest(void *addr)
        memset(relocs, 0, sizeof(relocs));
 
        /* Set Guest EPC */
-       UASM_i_LW(&p, T0, offsetof(struct kvm_vcpu_arch, pc), K1);
-       UASM_i_MTC0(&p, T0, C0_EPC);
+       UASM_i_LW(&p, GPR_T0, offsetof(struct kvm_vcpu_arch, pc), GPR_K1);
+       UASM_i_MTC0(&p, GPR_T0, C0_EPC);
 
        /* Save normal linux process pgd (VZ guarantees pgd_reg is set) */
        if (cpu_has_ldpte)
-               UASM_i_MFC0(&p, K0, C0_PWBASE);
+               UASM_i_MFC0(&p, GPR_K0, C0_PWBASE);
        else
-               UASM_i_MFC0(&p, K0, c0_kscratch(), pgd_reg);
-       UASM_i_SW(&p, K0, offsetof(struct kvm_vcpu_arch, host_pgd), K1);
+               UASM_i_MFC0(&p, GPR_K0, c0_kscratch(), pgd_reg);
+       UASM_i_SW(&p, GPR_K0, offsetof(struct kvm_vcpu_arch, host_pgd), GPR_K1);
 
        /*
         * Set up KVM GPA pgd.
@@ -312,24 +259,24 @@ static void *kvm_mips_build_enter_guest(void *addr)
         * - call tlbmiss_handler_setup_pgd(mm->pgd)
         * - write mm->pgd into CP0_PWBase
         *
-        * We keep S0 pointing at struct kvm so we can load the ASID below.
+        * We keep GPR_S0 pointing at struct kvm so we can load the ASID below.
         */
-       UASM_i_LW(&p, S0, (int)offsetof(struct kvm_vcpu, kvm) -
-                         (int)offsetof(struct kvm_vcpu, arch), K1);
-       UASM_i_LW(&p, A0, offsetof(struct kvm, arch.gpa_mm.pgd), S0);
-       UASM_i_LA(&p, T9, (unsigned long)tlbmiss_handler_setup_pgd);
-       uasm_i_jalr(&p, RA, T9);
+       UASM_i_LW(&p, GPR_S0, (int)offsetof(struct kvm_vcpu, kvm) -
+                         (int)offsetof(struct kvm_vcpu, arch), GPR_K1);
+       UASM_i_LW(&p, GPR_A0, offsetof(struct kvm, arch.gpa_mm.pgd), GPR_S0);
+       UASM_i_LA(&p, GPR_T9, (unsigned long)tlbmiss_handler_setup_pgd);
+       uasm_i_jalr(&p, GPR_RA, GPR_T9);
        /* delay slot */
        if (cpu_has_htw)
-               UASM_i_MTC0(&p, A0, C0_PWBASE);
+               UASM_i_MTC0(&p, GPR_A0, C0_PWBASE);
        else
                uasm_i_nop(&p);
 
        /* Set GM bit to setup eret to VZ guest context */
-       uasm_i_addiu(&p, V1, ZERO, 1);
-       uasm_i_mfc0(&p, K0, C0_GUESTCTL0);
-       uasm_i_ins(&p, K0, V1, MIPS_GCTL0_GM_SHIFT, 1);
-       uasm_i_mtc0(&p, K0, C0_GUESTCTL0);
+       uasm_i_addiu(&p, GPR_V1, GPR_ZERO, 1);
+       uasm_i_mfc0(&p, GPR_K0, C0_GUESTCTL0);
+       uasm_i_ins(&p, GPR_K0, GPR_V1, MIPS_GCTL0_GM_SHIFT, 1);
+       uasm_i_mtc0(&p, GPR_K0, C0_GUESTCTL0);
 
        if (cpu_has_guestid) {
                /*
@@ -338,13 +285,13 @@ static void *kvm_mips_build_enter_guest(void *addr)
                 */
 
                /* Get current GuestID */
-               uasm_i_mfc0(&p, T0, C0_GUESTCTL1);
+               uasm_i_mfc0(&p, GPR_T0, C0_GUESTCTL1);
                /* Set GuestCtl1.RID = GuestCtl1.ID */
-               uasm_i_ext(&p, T1, T0, MIPS_GCTL1_ID_SHIFT,
+               uasm_i_ext(&p, GPR_T1, GPR_T0, MIPS_GCTL1_ID_SHIFT,
                           MIPS_GCTL1_ID_WIDTH);
-               uasm_i_ins(&p, T0, T1, MIPS_GCTL1_RID_SHIFT,
+               uasm_i_ins(&p, GPR_T0, GPR_T1, MIPS_GCTL1_RID_SHIFT,
                           MIPS_GCTL1_RID_WIDTH);
-               uasm_i_mtc0(&p, T0, C0_GUESTCTL1);
+               uasm_i_mtc0(&p, GPR_T0, C0_GUESTCTL1);
 
                /* GuestID handles dealiasing so we don't need to touch ASID */
                goto skip_asid_restore;
@@ -353,65 +300,65 @@ static void *kvm_mips_build_enter_guest(void *addr)
        /* Root ASID Dealias (RAD) */
 
        /* Save host ASID */
-       UASM_i_MFC0(&p, K0, C0_ENTRYHI);
-       UASM_i_SW(&p, K0, offsetof(struct kvm_vcpu_arch, host_entryhi),
-                 K1);
+       UASM_i_MFC0(&p, GPR_K0, C0_ENTRYHI);
+       UASM_i_SW(&p, GPR_K0, offsetof(struct kvm_vcpu_arch, host_entryhi),
+                 GPR_K1);
 
        /* Set the root ASID for the Guest */
-       UASM_i_ADDIU(&p, T1, S0,
+       UASM_i_ADDIU(&p, GPR_T1, GPR_S0,
                     offsetof(struct kvm, arch.gpa_mm.context.asid));
 
        /* t1: contains the base of the ASID array, need to get the cpu id  */
        /* smp_processor_id */
-       uasm_i_lw(&p, T2, offsetof(struct thread_info, cpu), GP);
+       uasm_i_lw(&p, GPR_T2, offsetof(struct thread_info, cpu), GPR_GP);
        /* index the ASID array */
-       uasm_i_sll(&p, T2, T2, ilog2(sizeof(long)));
-       UASM_i_ADDU(&p, T3, T1, T2);
-       UASM_i_LW(&p, K0, 0, T3);
+       uasm_i_sll(&p, GPR_T2, GPR_T2, ilog2(sizeof(long)));
+       UASM_i_ADDU(&p, GPR_T3, GPR_T1, GPR_T2);
+       UASM_i_LW(&p, GPR_K0, 0, GPR_T3);
 #ifdef CONFIG_MIPS_ASID_BITS_VARIABLE
        /*
         * reuse ASID array offset
         * cpuinfo_mips is a multiple of sizeof(long)
         */
-       uasm_i_addiu(&p, T3, ZERO, sizeof(struct cpuinfo_mips)/sizeof(long));
-       uasm_i_mul(&p, T2, T2, T3);
+       uasm_i_addiu(&p, GPR_T3, GPR_ZERO, sizeof(struct cpuinfo_mips)/sizeof(long));
+       uasm_i_mul(&p, GPR_T2, GPR_T2, GPR_T3);
 
-       UASM_i_LA_mostly(&p, AT, (long)&cpu_data[0].asid_mask);
-       UASM_i_ADDU(&p, AT, AT, T2);
-       UASM_i_LW(&p, T2, uasm_rel_lo((long)&cpu_data[0].asid_mask), AT);
-       uasm_i_and(&p, K0, K0, T2);
+       UASM_i_LA_mostly(&p, GPR_AT, (long)&cpu_data[0].asid_mask);
+       UASM_i_ADDU(&p, GPR_AT, GPR_AT, GPR_T2);
+       UASM_i_LW(&p, GPR_T2, uasm_rel_lo((long)&cpu_data[0].asid_mask), GPR_AT);
+       uasm_i_and(&p, GPR_K0, GPR_K0, GPR_T2);
 #else
-       uasm_i_andi(&p, K0, K0, MIPS_ENTRYHI_ASID);
+       uasm_i_andi(&p, GPR_K0, GPR_K0, MIPS_ENTRYHI_ASID);
 #endif
 
        /* Set up KVM VZ root ASID (!guestid) */
-       uasm_i_mtc0(&p, K0, C0_ENTRYHI);
+       uasm_i_mtc0(&p, GPR_K0, C0_ENTRYHI);
 skip_asid_restore:
        uasm_i_ehb(&p);
 
        /* Disable RDHWR access */
-       uasm_i_mtc0(&p, ZERO, C0_HWRENA);
+       uasm_i_mtc0(&p, GPR_ZERO, C0_HWRENA);
 
        /* load the guest context from VCPU and return */
        for (i = 1; i < 32; ++i) {
                /* Guest k0/k1 loaded later */
-               if (i == K0 || i == K1)
+               if (i == GPR_K0 || i == GPR_K1)
                        continue;
-               UASM_i_LW(&p, i, offsetof(struct kvm_vcpu_arch, gprs[i]), K1);
+               UASM_i_LW(&p, i, offsetof(struct kvm_vcpu_arch, gprs[i]), GPR_K1);
        }
 
 #ifndef CONFIG_CPU_MIPSR6
        /* Restore hi/lo */
-       UASM_i_LW(&p, K0, offsetof(struct kvm_vcpu_arch, hi), K1);
-       uasm_i_mthi(&p, K0);
+       UASM_i_LW(&p, GPR_K0, offsetof(struct kvm_vcpu_arch, hi), GPR_K1);
+       uasm_i_mthi(&p, GPR_K0);
 
-       UASM_i_LW(&p, K0, offsetof(struct kvm_vcpu_arch, lo), K1);
-       uasm_i_mtlo(&p, K0);
+       UASM_i_LW(&p, GPR_K0, offsetof(struct kvm_vcpu_arch, lo), GPR_K1);
+       uasm_i_mtlo(&p, GPR_K0);
 #endif
 
        /* Restore the guest's k0/k1 registers */
-       UASM_i_LW(&p, K0, offsetof(struct kvm_vcpu_arch, gprs[K0]), K1);
-       UASM_i_LW(&p, K1, offsetof(struct kvm_vcpu_arch, gprs[K1]), K1);
+       UASM_i_LW(&p, GPR_K0, offsetof(struct kvm_vcpu_arch, gprs[GPR_K0]), GPR_K1);
+       UASM_i_LW(&p, GPR_K1, offsetof(struct kvm_vcpu_arch, gprs[GPR_K1]), GPR_K1);
 
        /* Jump to guest */
        uasm_i_eret(&p);
@@ -444,13 +391,13 @@ void *kvm_mips_build_tlb_refill_exception(void *addr, void *handler)
        memset(relocs, 0, sizeof(relocs));
 
        /* Save guest k1 into scratch register */
-       UASM_i_MTC0(&p, K1, scratch_tmp[0], scratch_tmp[1]);
+       UASM_i_MTC0(&p, GPR_K1, scratch_tmp[0], scratch_tmp[1]);
 
        /* Get the VCPU pointer from the VCPU scratch register */
-       UASM_i_MFC0(&p, K1, scratch_vcpu[0], scratch_vcpu[1]);
+       UASM_i_MFC0(&p, GPR_K1, scratch_vcpu[0], scratch_vcpu[1]);
 
        /* Save guest k0 into VCPU structure */
-       UASM_i_SW(&p, K0, offsetof(struct kvm_vcpu, arch.gprs[K0]), K1);
+       UASM_i_SW(&p, GPR_K0, offsetof(struct kvm_vcpu, arch.gprs[GPR_K0]), GPR_K1);
 
        /*
         * Some of the common tlbex code uses current_cpu_type(). For KVM we
@@ -459,13 +406,13 @@ void *kvm_mips_build_tlb_refill_exception(void *addr, void *handler)
        preempt_disable();
 
 #ifdef CONFIG_CPU_LOONGSON64
-       UASM_i_MFC0(&p, K1, C0_PGD);
-       uasm_i_lddir(&p, K0, K1, 3);  /* global page dir */
+       UASM_i_MFC0(&p, GPR_K1, C0_PGD);
+       uasm_i_lddir(&p, GPR_K0, GPR_K1, 3);  /* global page dir */
 #ifndef __PAGETABLE_PMD_FOLDED
-       uasm_i_lddir(&p, K1, K0, 1);  /* middle page dir */
+       uasm_i_lddir(&p, GPR_K1, GPR_K0, 1);  /* middle page dir */
 #endif
-       uasm_i_ldpte(&p, K1, 0);      /* even */
-       uasm_i_ldpte(&p, K1, 1);      /* odd */
+       uasm_i_ldpte(&p, GPR_K1, 0);      /* even */
+       uasm_i_ldpte(&p, GPR_K1, 1);      /* odd */
        uasm_i_tlbwr(&p);
 #else
        /*
@@ -480,27 +427,27 @@ void *kvm_mips_build_tlb_refill_exception(void *addr, void *handler)
         */
 
 #ifdef CONFIG_64BIT
-       build_get_pmde64(&p, &l, &r, K0, K1); /* get pmd in K1 */
+       build_get_pmde64(&p, &l, &r, GPR_K0, GPR_K1); /* get pmd in GPR_K1 */
 #else
-       build_get_pgde32(&p, K0, K1); /* get pgd in K1 */
+       build_get_pgde32(&p, GPR_K0, GPR_K1); /* get pgd in GPR_K1 */
 #endif
 
        /* we don't support huge pages yet */
 
-       build_get_ptep(&p, K0, K1);
-       build_update_entries(&p, K0, K1);
+       build_get_ptep(&p, GPR_K0, GPR_K1);
+       build_update_entries(&p, GPR_K0, GPR_K1);
        build_tlb_write_entry(&p, &l, &r, tlb_random);
 #endif
 
        preempt_enable();
 
        /* Get the VCPU pointer from the VCPU scratch register again */
-       UASM_i_MFC0(&p, K1, scratch_vcpu[0], scratch_vcpu[1]);
+       UASM_i_MFC0(&p, GPR_K1, scratch_vcpu[0], scratch_vcpu[1]);
 
        /* Restore the guest's k0/k1 registers */
-       UASM_i_LW(&p, K0, offsetof(struct kvm_vcpu, arch.gprs[K0]), K1);
+       UASM_i_LW(&p, GPR_K0, offsetof(struct kvm_vcpu, arch.gprs[GPR_K0]), GPR_K1);
        uasm_i_ehb(&p);
-       UASM_i_MFC0(&p, K1, scratch_tmp[0], scratch_tmp[1]);
+       UASM_i_MFC0(&p, GPR_K1, scratch_tmp[0], scratch_tmp[1]);
 
        /* Jump to guest */
        uasm_i_eret(&p);
@@ -530,14 +477,14 @@ void *kvm_mips_build_exception(void *addr, void *handler)
        memset(relocs, 0, sizeof(relocs));
 
        /* Save guest k1 into scratch register */
-       UASM_i_MTC0(&p, K1, scratch_tmp[0], scratch_tmp[1]);
+       UASM_i_MTC0(&p, GPR_K1, scratch_tmp[0], scratch_tmp[1]);
 
        /* Get the VCPU pointer from the VCPU scratch register */
-       UASM_i_MFC0(&p, K1, scratch_vcpu[0], scratch_vcpu[1]);
-       UASM_i_ADDIU(&p, K1, K1, offsetof(struct kvm_vcpu, arch));
+       UASM_i_MFC0(&p, GPR_K1, scratch_vcpu[0], scratch_vcpu[1]);
+       UASM_i_ADDIU(&p, GPR_K1, GPR_K1, offsetof(struct kvm_vcpu, arch));
 
        /* Save guest k0 into VCPU structure */
-       UASM_i_SW(&p, K0, offsetof(struct kvm_vcpu_arch, gprs[K0]), K1);
+       UASM_i_SW(&p, GPR_K0, offsetof(struct kvm_vcpu_arch, gprs[GPR_K0]), GPR_K1);
 
        /* Branch to the common handler */
        uasm_il_b(&p, &r, label_exit_common);
@@ -585,85 +532,85 @@ void *kvm_mips_build_exit(void *addr)
        /* Start saving Guest context to VCPU */
        for (i = 0; i < 32; ++i) {
                /* Guest k0/k1 saved later */
-               if (i == K0 || i == K1)
+               if (i == GPR_K0 || i == GPR_K1)
                        continue;
-               UASM_i_SW(&p, i, offsetof(struct kvm_vcpu_arch, gprs[i]), K1);
+               UASM_i_SW(&p, i, offsetof(struct kvm_vcpu_arch, gprs[i]), GPR_K1);
        }
 
 #ifndef CONFIG_CPU_MIPSR6
        /* We need to save hi/lo and restore them on the way out */
-       uasm_i_mfhi(&p, T0);
-       UASM_i_SW(&p, T0, offsetof(struct kvm_vcpu_arch, hi), K1);
+       uasm_i_mfhi(&p, GPR_T0);
+       UASM_i_SW(&p, GPR_T0, offsetof(struct kvm_vcpu_arch, hi), GPR_K1);
 
-       uasm_i_mflo(&p, T0);
-       UASM_i_SW(&p, T0, offsetof(struct kvm_vcpu_arch, lo), K1);
+       uasm_i_mflo(&p, GPR_T0);
+       UASM_i_SW(&p, GPR_T0, offsetof(struct kvm_vcpu_arch, lo), GPR_K1);
 #endif
 
        /* Finally save guest k1 to VCPU */
        uasm_i_ehb(&p);
-       UASM_i_MFC0(&p, T0, scratch_tmp[0], scratch_tmp[1]);
-       UASM_i_SW(&p, T0, offsetof(struct kvm_vcpu_arch, gprs[K1]), K1);
+       UASM_i_MFC0(&p, GPR_T0, scratch_tmp[0], scratch_tmp[1]);
+       UASM_i_SW(&p, GPR_T0, offsetof(struct kvm_vcpu_arch, gprs[GPR_K1]), GPR_K1);
 
        /* Now that context has been saved, we can use other registers */
 
        /* Restore vcpu */
-       UASM_i_MFC0(&p, S0, scratch_vcpu[0], scratch_vcpu[1]);
+       UASM_i_MFC0(&p, GPR_S0, scratch_vcpu[0], scratch_vcpu[1]);
 
        /*
         * Save Host level EPC, BadVaddr and Cause to VCPU, useful to process
         * the exception
         */
-       UASM_i_MFC0(&p, K0, C0_EPC);
-       UASM_i_SW(&p, K0, offsetof(struct kvm_vcpu_arch, pc), K1);
+       UASM_i_MFC0(&p, GPR_K0, C0_EPC);
+       UASM_i_SW(&p, GPR_K0, offsetof(struct kvm_vcpu_arch, pc), GPR_K1);
 
-       UASM_i_MFC0(&p, K0, C0_BADVADDR);
-       UASM_i_SW(&p, K0, offsetof(struct kvm_vcpu_arch, host_cp0_badvaddr),
-                 K1);
+       UASM_i_MFC0(&p, GPR_K0, C0_BADVADDR);
+       UASM_i_SW(&p, GPR_K0, offsetof(struct kvm_vcpu_arch, host_cp0_badvaddr),
+                 GPR_K1);
 
-       uasm_i_mfc0(&p, K0, C0_CAUSE);
-       uasm_i_sw(&p, K0, offsetof(struct kvm_vcpu_arch, host_cp0_cause), K1);
+       uasm_i_mfc0(&p, GPR_K0, C0_CAUSE);
+       uasm_i_sw(&p, GPR_K0, offsetof(struct kvm_vcpu_arch, host_cp0_cause), GPR_K1);
 
        if (cpu_has_badinstr) {
-               uasm_i_mfc0(&p, K0, C0_BADINSTR);
-               uasm_i_sw(&p, K0, offsetof(struct kvm_vcpu_arch,
-                                          host_cp0_badinstr), K1);
+               uasm_i_mfc0(&p, GPR_K0, C0_BADINSTR);
+               uasm_i_sw(&p, GPR_K0, offsetof(struct kvm_vcpu_arch,
+                                          host_cp0_badinstr), GPR_K1);
        }
 
        if (cpu_has_badinstrp) {
-               uasm_i_mfc0(&p, K0, C0_BADINSTRP);
-               uasm_i_sw(&p, K0, offsetof(struct kvm_vcpu_arch,
-                                          host_cp0_badinstrp), K1);
+               uasm_i_mfc0(&p, GPR_K0, C0_BADINSTRP);
+               uasm_i_sw(&p, GPR_K0, offsetof(struct kvm_vcpu_arch,
+                                          host_cp0_badinstrp), GPR_K1);
        }
 
        /* Now restore the host state just enough to run the handlers */
 
        /* Switch EBASE to the one used by Linux */
        /* load up the host EBASE */
-       uasm_i_mfc0(&p, V0, C0_STATUS);
+       uasm_i_mfc0(&p, GPR_V0, C0_STATUS);
 
-       uasm_i_lui(&p, AT, ST0_BEV >> 16);
-       uasm_i_or(&p, K0, V0, AT);
+       uasm_i_lui(&p, GPR_AT, ST0_BEV >> 16);
+       uasm_i_or(&p, GPR_K0, GPR_V0, GPR_AT);
 
-       uasm_i_mtc0(&p, K0, C0_STATUS);
+       uasm_i_mtc0(&p, GPR_K0, C0_STATUS);
        uasm_i_ehb(&p);
 
-       UASM_i_LA_mostly(&p, K0, (long)&ebase);
-       UASM_i_LW(&p, K0, uasm_rel_lo((long)&ebase), K0);
-       build_set_exc_base(&p, K0);
+       UASM_i_LA_mostly(&p, GPR_K0, (long)&ebase);
+       UASM_i_LW(&p, GPR_K0, uasm_rel_lo((long)&ebase), GPR_K0);
+       build_set_exc_base(&p, GPR_K0);
 
        if (raw_cpu_has_fpu) {
                /*
                 * If FPU is enabled, save FCR31 and clear it so that later
                 * ctc1's don't trigger FPE for pending exceptions.
                 */
-               uasm_i_lui(&p, AT, ST0_CU1 >> 16);
-               uasm_i_and(&p, V1, V0, AT);
-               uasm_il_beqz(&p, &r, V1, label_fpu_1);
+               uasm_i_lui(&p, GPR_AT, ST0_CU1 >> 16);
+               uasm_i_and(&p, GPR_V1, GPR_V0, GPR_AT);
+               uasm_il_beqz(&p, &r, GPR_V1, label_fpu_1);
                 uasm_i_nop(&p);
-               uasm_i_cfc1(&p, T0, 31);
-               uasm_i_sw(&p, T0, offsetof(struct kvm_vcpu_arch, fpu.fcr31),
-                         K1);
-               uasm_i_ctc1(&p, ZERO, 31);
+               uasm_i_cfc1(&p, GPR_T0, 31);
+               uasm_i_sw(&p, GPR_T0, offsetof(struct kvm_vcpu_arch, fpu.fcr31),
+                         GPR_K1);
+               uasm_i_ctc1(&p, GPR_ZERO, 31);
                uasm_l_fpu_1(&l, p);
        }
 
@@ -672,22 +619,22 @@ void *kvm_mips_build_exit(void *addr)
                 * If MSA is enabled, save MSACSR and clear it so that later
                 * instructions don't trigger MSAFPE for pending exceptions.
                 */
-               uasm_i_mfc0(&p, T0, C0_CONFIG5);
-               uasm_i_ext(&p, T0, T0, 27, 1); /* MIPS_CONF5_MSAEN */
-               uasm_il_beqz(&p, &r, T0, label_msa_1);
+               uasm_i_mfc0(&p, GPR_T0, C0_CONFIG5);
+               uasm_i_ext(&p, GPR_T0, GPR_T0, 27, 1); /* MIPS_CONF5_MSAEN */
+               uasm_il_beqz(&p, &r, GPR_T0, label_msa_1);
                 uasm_i_nop(&p);
-               uasm_i_cfcmsa(&p, T0, MSA_CSR);
-               uasm_i_sw(&p, T0, offsetof(struct kvm_vcpu_arch, fpu.msacsr),
-                         K1);
-               uasm_i_ctcmsa(&p, MSA_CSR, ZERO);
+               uasm_i_cfcmsa(&p, GPR_T0, MSA_CSR);
+               uasm_i_sw(&p, GPR_T0, offsetof(struct kvm_vcpu_arch, fpu.msacsr),
+                         GPR_K1);
+               uasm_i_ctcmsa(&p, MSA_CSR, GPR_ZERO);
                uasm_l_msa_1(&l, p);
        }
 
        /* Restore host ASID */
        if (!cpu_has_guestid) {
-               UASM_i_LW(&p, K0, offsetof(struct kvm_vcpu_arch, host_entryhi),
-                         K1);
-               UASM_i_MTC0(&p, K0, C0_ENTRYHI);
+               UASM_i_LW(&p, GPR_K0, offsetof(struct kvm_vcpu_arch, host_entryhi),
+                         GPR_K1);
+               UASM_i_MTC0(&p, GPR_K0, C0_ENTRYHI);
        }
 
        /*
@@ -696,56 +643,56 @@ void *kvm_mips_build_exit(void *addr)
         * - call tlbmiss_handler_setup_pgd(mm->pgd)
         * - write mm->pgd into CP0_PWBase
         */
-       UASM_i_LW(&p, A0,
-                 offsetof(struct kvm_vcpu_arch, host_pgd), K1);
-       UASM_i_LA(&p, T9, (unsigned long)tlbmiss_handler_setup_pgd);
-       uasm_i_jalr(&p, RA, T9);
+       UASM_i_LW(&p, GPR_A0,
+                 offsetof(struct kvm_vcpu_arch, host_pgd), GPR_K1);
+       UASM_i_LA(&p, GPR_T9, (unsigned long)tlbmiss_handler_setup_pgd);
+       uasm_i_jalr(&p, GPR_RA, GPR_T9);
        /* delay slot */
        if (cpu_has_htw)
-               UASM_i_MTC0(&p, A0, C0_PWBASE);
+               UASM_i_MTC0(&p, GPR_A0, C0_PWBASE);
        else
                uasm_i_nop(&p);
 
        /* Clear GM bit so we don't enter guest mode when EXL is cleared */
-       uasm_i_mfc0(&p, K0, C0_GUESTCTL0);
-       uasm_i_ins(&p, K0, ZERO, MIPS_GCTL0_GM_SHIFT, 1);
-       uasm_i_mtc0(&p, K0, C0_GUESTCTL0);
+       uasm_i_mfc0(&p, GPR_K0, C0_GUESTCTL0);
+       uasm_i_ins(&p, GPR_K0, GPR_ZERO, MIPS_GCTL0_GM_SHIFT, 1);
+       uasm_i_mtc0(&p, GPR_K0, C0_GUESTCTL0);
 
        /* Save GuestCtl0 so we can access GExcCode after CPU migration */
-       uasm_i_sw(&p, K0,
-                 offsetof(struct kvm_vcpu_arch, host_cp0_guestctl0), K1);
+       uasm_i_sw(&p, GPR_K0,
+                 offsetof(struct kvm_vcpu_arch, host_cp0_guestctl0), GPR_K1);
 
        if (cpu_has_guestid) {
                /*
                 * Clear root mode GuestID, so that root TLB operations use the
                 * root GuestID in the root TLB.
                 */
-               uasm_i_mfc0(&p, T0, C0_GUESTCTL1);
+               uasm_i_mfc0(&p, GPR_T0, C0_GUESTCTL1);
                /* Set GuestCtl1.RID = MIPS_GCTL1_ROOT_GUESTID (i.e. 0) */
-               uasm_i_ins(&p, T0, ZERO, MIPS_GCTL1_RID_SHIFT,
+               uasm_i_ins(&p, GPR_T0, GPR_ZERO, MIPS_GCTL1_RID_SHIFT,
                           MIPS_GCTL1_RID_WIDTH);
-               uasm_i_mtc0(&p, T0, C0_GUESTCTL1);
+               uasm_i_mtc0(&p, GPR_T0, C0_GUESTCTL1);
        }
 
        /* Now that the new EBASE has been loaded, unset BEV and KSU_USER */
-       uasm_i_addiu(&p, AT, ZERO, ~(ST0_EXL | KSU_USER | ST0_IE));
-       uasm_i_and(&p, V0, V0, AT);
-       uasm_i_lui(&p, AT, ST0_CU0 >> 16);
-       uasm_i_or(&p, V0, V0, AT);
+       uasm_i_addiu(&p, GPR_AT, GPR_ZERO, ~(ST0_EXL | KSU_USER | ST0_IE));
+       uasm_i_and(&p, GPR_V0, GPR_V0, GPR_AT);
+       uasm_i_lui(&p, GPR_AT, ST0_CU0 >> 16);
+       uasm_i_or(&p, GPR_V0, GPR_V0, GPR_AT);
 #ifdef CONFIG_64BIT
-       uasm_i_ori(&p, V0, V0, ST0_SX | ST0_UX);
+       uasm_i_ori(&p, GPR_V0, GPR_V0, ST0_SX | ST0_UX);
 #endif
-       uasm_i_mtc0(&p, V0, C0_STATUS);
+       uasm_i_mtc0(&p, GPR_V0, C0_STATUS);
        uasm_i_ehb(&p);
 
-       /* Load up host GP */
-       UASM_i_LW(&p, GP, offsetof(struct kvm_vcpu_arch, host_gp), K1);
+       /* Load up host GPR_GP */
+       UASM_i_LW(&p, GPR_GP, offsetof(struct kvm_vcpu_arch, host_gp), GPR_K1);
 
        /* Need a stack before we can jump to "C" */
-       UASM_i_LW(&p, SP, offsetof(struct kvm_vcpu_arch, host_stack), K1);
+       UASM_i_LW(&p, GPR_SP, offsetof(struct kvm_vcpu_arch, host_stack), GPR_K1);
 
        /* Saved host state */
-       UASM_i_ADDIU(&p, SP, SP, -(int)sizeof(struct pt_regs));
+       UASM_i_ADDIU(&p, GPR_SP, GPR_SP, -(int)sizeof(struct pt_regs));
 
        /*
         * XXXKYMA do we need to load the host ASID, maybe not because the
@@ -753,12 +700,12 @@ void *kvm_mips_build_exit(void *addr)
         */
 
        /* Restore host scratch registers, as we'll have clobbered them */
-       kvm_mips_build_restore_scratch(&p, K0, SP);
+       kvm_mips_build_restore_scratch(&p, GPR_K0, GPR_SP);
 
        /* Restore RDHWR access */
-       UASM_i_LA_mostly(&p, K0, (long)&hwrena);
-       uasm_i_lw(&p, K0, uasm_rel_lo((long)&hwrena), K0);
-       uasm_i_mtc0(&p, K0, C0_HWRENA);
+       UASM_i_LA_mostly(&p, GPR_K0, (long)&hwrena);
+       uasm_i_lw(&p, GPR_K0, uasm_rel_lo((long)&hwrena), GPR_K0);
+       uasm_i_mtc0(&p, GPR_K0, C0_HWRENA);
 
        /* Jump to handler */
        /*
@@ -766,10 +713,10 @@ void *kvm_mips_build_exit(void *addr)
         * Now jump to the kvm_mips_handle_exit() to see if we can deal
         * with this in the kernel
         */
-       uasm_i_move(&p, A0, S0);
-       UASM_i_LA(&p, T9, (unsigned long)kvm_mips_handle_exit);
-       uasm_i_jalr(&p, RA, T9);
-        UASM_i_ADDIU(&p, SP, SP, -CALLFRAME_SIZ);
+       uasm_i_move(&p, GPR_A0, GPR_S0);
+       UASM_i_LA(&p, GPR_T9, (unsigned long)kvm_mips_handle_exit);
+       uasm_i_jalr(&p, GPR_RA, GPR_T9);
+        UASM_i_ADDIU(&p, GPR_SP, GPR_SP, -CALLFRAME_SIZ);
 
        uasm_resolve_relocs(relocs, labels);
 
@@ -799,7 +746,7 @@ static void *kvm_mips_build_ret_from_exit(void *addr)
        memset(relocs, 0, sizeof(relocs));
 
        /* Return from handler Make sure interrupts are disabled */
-       uasm_i_di(&p, ZERO);
+       uasm_i_di(&p, GPR_ZERO);
        uasm_i_ehb(&p);
 
        /*
@@ -808,15 +755,15 @@ static void *kvm_mips_build_ret_from_exit(void *addr)
         * guest, reload k1
         */
 
-       uasm_i_move(&p, K1, S0);
-       UASM_i_ADDIU(&p, K1, K1, offsetof(struct kvm_vcpu, arch));
+       uasm_i_move(&p, GPR_K1, GPR_S0);
+       UASM_i_ADDIU(&p, GPR_K1, GPR_K1, offsetof(struct kvm_vcpu, arch));
 
        /*
         * Check return value, should tell us if we are returning to the
         * host (handle I/O etc)or resuming the guest
         */
-       uasm_i_andi(&p, T0, V0, RESUME_HOST);
-       uasm_il_bnez(&p, &r, T0, label_return_to_host);
+       uasm_i_andi(&p, GPR_T0, GPR_V0, RESUME_HOST);
+       uasm_il_bnez(&p, &r, GPR_T0, label_return_to_host);
         uasm_i_nop(&p);
 
        p = kvm_mips_build_ret_to_guest(p);
@@ -843,24 +790,24 @@ static void *kvm_mips_build_ret_to_guest(void *addr)
        u32 *p = addr;
 
        /* Put the saved pointer to vcpu (s0) back into the scratch register */
-       UASM_i_MTC0(&p, S0, scratch_vcpu[0], scratch_vcpu[1]);
+       UASM_i_MTC0(&p, GPR_S0, scratch_vcpu[0], scratch_vcpu[1]);
 
        /* Load up the Guest EBASE to minimize the window where BEV is set */
-       UASM_i_LW(&p, T0, offsetof(struct kvm_vcpu_arch, guest_ebase), K1);
+       UASM_i_LW(&p, GPR_T0, offsetof(struct kvm_vcpu_arch, guest_ebase), GPR_K1);
 
        /* Switch EBASE back to the one used by KVM */
-       uasm_i_mfc0(&p, V1, C0_STATUS);
-       uasm_i_lui(&p, AT, ST0_BEV >> 16);
-       uasm_i_or(&p, K0, V1, AT);
-       uasm_i_mtc0(&p, K0, C0_STATUS);
+       uasm_i_mfc0(&p, GPR_V1, C0_STATUS);
+       uasm_i_lui(&p, GPR_AT, ST0_BEV >> 16);
+       uasm_i_or(&p, GPR_K0, GPR_V1, GPR_AT);
+       uasm_i_mtc0(&p, GPR_K0, C0_STATUS);
        uasm_i_ehb(&p);
-       build_set_exc_base(&p, T0);
+       build_set_exc_base(&p, GPR_T0);
 
        /* Setup status register for running guest in UM */
-       uasm_i_ori(&p, V1, V1, ST0_EXL | KSU_USER | ST0_IE);
-       UASM_i_LA(&p, AT, ~(ST0_CU0 | ST0_MX | ST0_SX | ST0_UX));
-       uasm_i_and(&p, V1, V1, AT);
-       uasm_i_mtc0(&p, V1, C0_STATUS);
+       uasm_i_ori(&p, GPR_V1, GPR_V1, ST0_EXL | KSU_USER | ST0_IE);
+       UASM_i_LA(&p, GPR_AT, ~(ST0_CU0 | ST0_MX | ST0_SX | ST0_UX));
+       uasm_i_and(&p, GPR_V1, GPR_V1, GPR_AT);
+       uasm_i_mtc0(&p, GPR_V1, C0_STATUS);
        uasm_i_ehb(&p);
 
        p = kvm_mips_build_enter_guest(p);
@@ -884,31 +831,31 @@ static void *kvm_mips_build_ret_to_host(void *addr)
        unsigned int i;
 
        /* EBASE is already pointing to Linux */
-       UASM_i_LW(&p, K1, offsetof(struct kvm_vcpu_arch, host_stack), K1);
-       UASM_i_ADDIU(&p, K1, K1, -(int)sizeof(struct pt_regs));
+       UASM_i_LW(&p, GPR_K1, offsetof(struct kvm_vcpu_arch, host_stack), GPR_K1);
+       UASM_i_ADDIU(&p, GPR_K1, GPR_K1, -(int)sizeof(struct pt_regs));
 
        /*
         * r2/v0 is the return code, shift it down by 2 (arithmetic)
         * to recover the err code
         */
-       uasm_i_sra(&p, K0, V0, 2);
-       uasm_i_move(&p, V0, K0);
+       uasm_i_sra(&p, GPR_K0, GPR_V0, 2);
+       uasm_i_move(&p, GPR_V0, GPR_K0);
 
        /* Load context saved on the host stack */
        for (i = 16; i < 31; ++i) {
                if (i == 24)
                        i = 28;
-               UASM_i_LW(&p, i, offsetof(struct pt_regs, regs[i]), K1);
+               UASM_i_LW(&p, i, offsetof(struct pt_regs, regs[i]), GPR_K1);
        }
 
        /* Restore RDHWR access */
-       UASM_i_LA_mostly(&p, K0, (long)&hwrena);
-       uasm_i_lw(&p, K0, uasm_rel_lo((long)&hwrena), K0);
-       uasm_i_mtc0(&p, K0, C0_HWRENA);
+       UASM_i_LA_mostly(&p, GPR_K0, (long)&hwrena);
+       uasm_i_lw(&p, GPR_K0, uasm_rel_lo((long)&hwrena), GPR_K0);
+       uasm_i_mtc0(&p, GPR_K0, C0_HWRENA);
 
-       /* Restore RA, which is the address we will return to */
-       UASM_i_LW(&p, RA, offsetof(struct pt_regs, regs[RA]), K1);
-       uasm_i_jr(&p, RA);
+       /* Restore GPR_RA, which is the address we will return to */
+       UASM_i_LW(&p, GPR_RA, offsetof(struct pt_regs, regs[GPR_RA]), GPR_K1);
+       uasm_i_jr(&p, GPR_RA);
         uasm_i_nop(&p);
 
        return p;
index d3b4459d0fe85edbd2767629db175bcd619a72af..1df237bd4a72bda77230d2b1720d55185fbeb9c6 100644 (file)
@@ -24,6 +24,7 @@
 #include <asm/bootinfo.h>
 #include <asm/mipsregs.h>
 #include <asm/mmu_context.h>
+#include <asm/regdef.h>
 #include <asm/cpu.h>
 
 #ifdef CONFIG_SIBYTE_DMA_PAGEOPS
 
 #include <asm/uasm.h>
 
-/* Registers used in the assembled routines. */
-#define ZERO 0
-#define AT 2
-#define A0 4
-#define A1 5
-#define A2 6
-#define T0 8
-#define T1 9
-#define T2 10
-#define T3 11
-#define T9 25
-#define RA 31
-
 /* Handle labels (which must be positive integers). */
 enum label_id {
        label_clear_nopref = 1,
@@ -106,16 +94,16 @@ pg_addiu(u32 **buf, unsigned int reg1, unsigned int reg2, unsigned int off)
            IS_ENABLED(CONFIG_CPU_DADDI_WORKAROUNDS) &&
            r4k_daddiu_bug()) {
                if (off > 0x7fff) {
-                       uasm_i_lui(buf, T9, uasm_rel_hi(off));
-                       uasm_i_addiu(buf, T9, T9, uasm_rel_lo(off));
+                       uasm_i_lui(buf, GPR_T9, uasm_rel_hi(off));
+                       uasm_i_addiu(buf, GPR_T9, GPR_T9, uasm_rel_lo(off));
                } else
-                       uasm_i_addiu(buf, T9, ZERO, off);
-               uasm_i_daddu(buf, reg1, reg2, T9);
+                       uasm_i_addiu(buf, GPR_T9, GPR_ZERO, off);
+               uasm_i_daddu(buf, reg1, reg2, GPR_T9);
        } else {
                if (off > 0x7fff) {
-                       uasm_i_lui(buf, T9, uasm_rel_hi(off));
-                       uasm_i_addiu(buf, T9, T9, uasm_rel_lo(off));
-                       UASM_i_ADDU(buf, reg1, reg2, T9);
+                       uasm_i_lui(buf, GPR_T9, uasm_rel_hi(off));
+                       uasm_i_addiu(buf, GPR_T9, GPR_T9, uasm_rel_lo(off));
+                       UASM_i_ADDU(buf, reg1, reg2, GPR_T9);
                } else
                        UASM_i_ADDIU(buf, reg1, reg2, off);
        }
@@ -233,9 +221,9 @@ static void set_prefetch_parameters(void)
 static void build_clear_store(u32 **buf, int off)
 {
        if (cpu_has_64bit_gp_regs || cpu_has_64bit_zero_reg) {
-               uasm_i_sd(buf, ZERO, off, A0);
+               uasm_i_sd(buf, GPR_ZERO, off, GPR_A0);
        } else {
-               uasm_i_sw(buf, ZERO, off, A0);
+               uasm_i_sw(buf, GPR_ZERO, off, GPR_A0);
        }
 }
 
@@ -246,10 +234,10 @@ static inline void build_clear_pref(u32 **buf, int off)
 
        if (pref_bias_clear_store) {
                _uasm_i_pref(buf, pref_dst_mode, pref_bias_clear_store + off,
-                           A0);
+                           GPR_A0);
        } else if (cache_line_size == (half_clear_loop_size << 1)) {
                if (cpu_has_cache_cdex_s) {
-                       uasm_i_cache(buf, Create_Dirty_Excl_SD, off, A0);
+                       uasm_i_cache(buf, Create_Dirty_Excl_SD, off, GPR_A0);
                } else if (cpu_has_cache_cdex_p) {
                        if (IS_ENABLED(CONFIG_WAR_R4600_V1_HIT_CACHEOP) &&
                            cpu_is_r4600_v1_x()) {
@@ -261,9 +249,9 @@ static inline void build_clear_pref(u32 **buf, int off)
 
                        if (IS_ENABLED(CONFIG_WAR_R4600_V2_HIT_CACHEOP) &&
                            cpu_is_r4600_v2_x())
-                               uasm_i_lw(buf, ZERO, ZERO, AT);
+                               uasm_i_lw(buf, GPR_ZERO, GPR_ZERO, GPR_AT);
 
-                       uasm_i_cache(buf, Create_Dirty_Excl_D, off, A0);
+                       uasm_i_cache(buf, Create_Dirty_Excl_D, off, GPR_A0);
                }
        }
 }
@@ -301,12 +289,12 @@ void build_clear_page(void)
 
        off = PAGE_SIZE - pref_bias_clear_store;
        if (off > 0xffff || !pref_bias_clear_store)
-               pg_addiu(&buf, A2, A0, off);
+               pg_addiu(&buf, GPR_A2, GPR_A0, off);
        else
-               uasm_i_ori(&buf, A2, A0, off);
+               uasm_i_ori(&buf, GPR_A2, GPR_A0, off);
 
        if (IS_ENABLED(CONFIG_WAR_R4600_V2_HIT_CACHEOP) && cpu_is_r4600_v2_x())
-               uasm_i_lui(&buf, AT, uasm_rel_hi(0xa0000000));
+               uasm_i_lui(&buf, GPR_AT, uasm_rel_hi(0xa0000000));
 
        off = cache_line_size ? min(8, pref_bias_clear_store / cache_line_size)
                                * cache_line_size : 0;
@@ -320,36 +308,36 @@ void build_clear_page(void)
                build_clear_store(&buf, off);
                off += clear_word_size;
        } while (off < half_clear_loop_size);
-       pg_addiu(&buf, A0, A0, 2 * off);
+       pg_addiu(&buf, GPR_A0, GPR_A0, 2 * off);
        off = -off;
        do {
                build_clear_pref(&buf, off);
                if (off == -clear_word_size)
-                       uasm_il_bne(&buf, &r, A0, A2, label_clear_pref);
+                       uasm_il_bne(&buf, &r, GPR_A0, GPR_A2, label_clear_pref);
                build_clear_store(&buf, off);
                off += clear_word_size;
        } while (off < 0);
 
        if (pref_bias_clear_store) {
-               pg_addiu(&buf, A2, A0, pref_bias_clear_store);
+               pg_addiu(&buf, GPR_A2, GPR_A0, pref_bias_clear_store);
                uasm_l_clear_nopref(&l, buf);
                off = 0;
                do {
                        build_clear_store(&buf, off);
                        off += clear_word_size;
                } while (off < half_clear_loop_size);
-               pg_addiu(&buf, A0, A0, 2 * off);
+               pg_addiu(&buf, GPR_A0, GPR_A0, 2 * off);
                off = -off;
                do {
                        if (off == -clear_word_size)
-                               uasm_il_bne(&buf, &r, A0, A2,
+                               uasm_il_bne(&buf, &r, GPR_A0, GPR_A2,
                                            label_clear_nopref);
                        build_clear_store(&buf, off);
                        off += clear_word_size;
                } while (off < 0);
        }
 
-       uasm_i_jr(&buf, RA);
+       uasm_i_jr(&buf, GPR_RA);
        uasm_i_nop(&buf);
 
        BUG_ON(buf > &__clear_page_end);
@@ -369,18 +357,18 @@ void build_clear_page(void)
 static void build_copy_load(u32 **buf, int reg, int off)
 {
        if (cpu_has_64bit_gp_regs) {
-               uasm_i_ld(buf, reg, off, A1);
+               uasm_i_ld(buf, reg, off, GPR_A1);
        } else {
-               uasm_i_lw(buf, reg, off, A1);
+               uasm_i_lw(buf, reg, off, GPR_A1);
        }
 }
 
 static void build_copy_store(u32 **buf, int reg, int off)
 {
        if (cpu_has_64bit_gp_regs) {
-               uasm_i_sd(buf, reg, off, A0);
+               uasm_i_sd(buf, reg, off, GPR_A0);
        } else {
-               uasm_i_sw(buf, reg, off, A0);
+               uasm_i_sw(buf, reg, off, GPR_A0);
        }
 }
 
@@ -390,7 +378,7 @@ static inline void build_copy_load_pref(u32 **buf, int off)
                return;
 
        if (pref_bias_copy_load)
-               _uasm_i_pref(buf, pref_src_mode, pref_bias_copy_load + off, A1);
+               _uasm_i_pref(buf, pref_src_mode, pref_bias_copy_load + off, GPR_A1);
 }
 
 static inline void build_copy_store_pref(u32 **buf, int off)
@@ -400,10 +388,10 @@ static inline void build_copy_store_pref(u32 **buf, int off)
 
        if (pref_bias_copy_store) {
                _uasm_i_pref(buf, pref_dst_mode, pref_bias_copy_store + off,
-                           A0);
+                           GPR_A0);
        } else if (cache_line_size == (half_copy_loop_size << 1)) {
                if (cpu_has_cache_cdex_s) {
-                       uasm_i_cache(buf, Create_Dirty_Excl_SD, off, A0);
+                       uasm_i_cache(buf, Create_Dirty_Excl_SD, off, GPR_A0);
                } else if (cpu_has_cache_cdex_p) {
                        if (IS_ENABLED(CONFIG_WAR_R4600_V1_HIT_CACHEOP) &&
                            cpu_is_r4600_v1_x()) {
@@ -415,9 +403,9 @@ static inline void build_copy_store_pref(u32 **buf, int off)
 
                        if (IS_ENABLED(CONFIG_WAR_R4600_V2_HIT_CACHEOP) &&
                            cpu_is_r4600_v2_x())
-                               uasm_i_lw(buf, ZERO, ZERO, AT);
+                               uasm_i_lw(buf, GPR_ZERO, GPR_ZERO, GPR_AT);
 
-                       uasm_i_cache(buf, Create_Dirty_Excl_D, off, A0);
+                       uasm_i_cache(buf, Create_Dirty_Excl_D, off, GPR_A0);
                }
        }
 }
@@ -454,12 +442,12 @@ void build_copy_page(void)
 
        off = PAGE_SIZE - pref_bias_copy_load;
        if (off > 0xffff || !pref_bias_copy_load)
-               pg_addiu(&buf, A2, A0, off);
+               pg_addiu(&buf, GPR_A2, GPR_A0, off);
        else
-               uasm_i_ori(&buf, A2, A0, off);
+               uasm_i_ori(&buf, GPR_A2, GPR_A0, off);
 
        if (IS_ENABLED(CONFIG_WAR_R4600_V2_HIT_CACHEOP) && cpu_is_r4600_v2_x())
-               uasm_i_lui(&buf, AT, uasm_rel_hi(0xa0000000));
+               uasm_i_lui(&buf, GPR_AT, uasm_rel_hi(0xa0000000));
 
        off = cache_line_size ? min(8, pref_bias_copy_load / cache_line_size) *
                                cache_line_size : 0;
@@ -476,126 +464,126 @@ void build_copy_page(void)
        uasm_l_copy_pref_both(&l, buf);
        do {
                build_copy_load_pref(&buf, off);
-               build_copy_load(&buf, T0, off);
+               build_copy_load(&buf, GPR_T0, off);
                build_copy_load_pref(&buf, off + copy_word_size);
-               build_copy_load(&buf, T1, off + copy_word_size);
+               build_copy_load(&buf, GPR_T1, off + copy_word_size);
                build_copy_load_pref(&buf, off + 2 * copy_word_size);
-               build_copy_load(&buf, T2, off + 2 * copy_word_size);
+               build_copy_load(&buf, GPR_T2, off + 2 * copy_word_size);
                build_copy_load_pref(&buf, off + 3 * copy_word_size);
-               build_copy_load(&buf, T3, off + 3 * copy_word_size);
+               build_copy_load(&buf, GPR_T3, off + 3 * copy_word_size);
                build_copy_store_pref(&buf, off);
-               build_copy_store(&buf, T0, off);
+               build_copy_store(&buf, GPR_T0, off);
                build_copy_store_pref(&buf, off + copy_word_size);
-               build_copy_store(&buf, T1, off + copy_word_size);
+               build_copy_store(&buf, GPR_T1, off + copy_word_size);
                build_copy_store_pref(&buf, off + 2 * copy_word_size);
-               build_copy_store(&buf, T2, off + 2 * copy_word_size);
+               build_copy_store(&buf, GPR_T2, off + 2 * copy_word_size);
                build_copy_store_pref(&buf, off + 3 * copy_word_size);
-               build_copy_store(&buf, T3, off + 3 * copy_word_size);
+               build_copy_store(&buf, GPR_T3, off + 3 * copy_word_size);
                off += 4 * copy_word_size;
        } while (off < half_copy_loop_size);
-       pg_addiu(&buf, A1, A1, 2 * off);
-       pg_addiu(&buf, A0, A0, 2 * off);
+       pg_addiu(&buf, GPR_A1, GPR_A1, 2 * off);
+       pg_addiu(&buf, GPR_A0, GPR_A0, 2 * off);
        off = -off;
        do {
                build_copy_load_pref(&buf, off);
-               build_copy_load(&buf, T0, off);
+               build_copy_load(&buf, GPR_T0, off);
                build_copy_load_pref(&buf, off + copy_word_size);
-               build_copy_load(&buf, T1, off + copy_word_size);
+               build_copy_load(&buf, GPR_T1, off + copy_word_size);
                build_copy_load_pref(&buf, off + 2 * copy_word_size);
-               build_copy_load(&buf, T2, off + 2 * copy_word_size);
+               build_copy_load(&buf, GPR_T2, off + 2 * copy_word_size);
                build_copy_load_pref(&buf, off + 3 * copy_word_size);
-               build_copy_load(&buf, T3, off + 3 * copy_word_size);
+               build_copy_load(&buf, GPR_T3, off + 3 * copy_word_size);
                build_copy_store_pref(&buf, off);
-               build_copy_store(&buf, T0, off);
+               build_copy_store(&buf, GPR_T0, off);
                build_copy_store_pref(&buf, off + copy_word_size);
-               build_copy_store(&buf, T1, off + copy_word_size);
+               build_copy_store(&buf, GPR_T1, off + copy_word_size);
                build_copy_store_pref(&buf, off + 2 * copy_word_size);
-               build_copy_store(&buf, T2, off + 2 * copy_word_size);
+               build_copy_store(&buf, GPR_T2, off + 2 * copy_word_size);
                build_copy_store_pref(&buf, off + 3 * copy_word_size);
                if (off == -(4 * copy_word_size))
-                       uasm_il_bne(&buf, &r, A2, A0, label_copy_pref_both);
-               build_copy_store(&buf, T3, off + 3 * copy_word_size);
+                       uasm_il_bne(&buf, &r, GPR_A2, GPR_A0, label_copy_pref_both);
+               build_copy_store(&buf, GPR_T3, off + 3 * copy_word_size);
                off += 4 * copy_word_size;
        } while (off < 0);
 
        if (pref_bias_copy_load - pref_bias_copy_store) {
-               pg_addiu(&buf, A2, A0,
+               pg_addiu(&buf, GPR_A2, GPR_A0,
                         pref_bias_copy_load - pref_bias_copy_store);
                uasm_l_copy_pref_store(&l, buf);
                off = 0;
                do {
-                       build_copy_load(&buf, T0, off);
-                       build_copy_load(&buf, T1, off + copy_word_size);
-                       build_copy_load(&buf, T2, off + 2 * copy_word_size);
-                       build_copy_load(&buf, T3, off + 3 * copy_word_size);
+                       build_copy_load(&buf, GPR_T0, off);
+                       build_copy_load(&buf, GPR_T1, off + copy_word_size);
+                       build_copy_load(&buf, GPR_T2, off + 2 * copy_word_size);
+                       build_copy_load(&buf, GPR_T3, off + 3 * copy_word_size);
                        build_copy_store_pref(&buf, off);
-                       build_copy_store(&buf, T0, off);
+                       build_copy_store(&buf, GPR_T0, off);
                        build_copy_store_pref(&buf, off + copy_word_size);
-                       build_copy_store(&buf, T1, off + copy_word_size);
+                       build_copy_store(&buf, GPR_T1, off + copy_word_size);
                        build_copy_store_pref(&buf, off + 2 * copy_word_size);
-                       build_copy_store(&buf, T2, off + 2 * copy_word_size);
+                       build_copy_store(&buf, GPR_T2, off + 2 * copy_word_size);
                        build_copy_store_pref(&buf, off + 3 * copy_word_size);
-                       build_copy_store(&buf, T3, off + 3 * copy_word_size);
+                       build_copy_store(&buf, GPR_T3, off + 3 * copy_word_size);
                        off += 4 * copy_word_size;
                } while (off < half_copy_loop_size);
-               pg_addiu(&buf, A1, A1, 2 * off);
-               pg_addiu(&buf, A0, A0, 2 * off);
+               pg_addiu(&buf, GPR_A1, GPR_A1, 2 * off);
+               pg_addiu(&buf, GPR_A0, GPR_A0, 2 * off);
                off = -off;
                do {
-                       build_copy_load(&buf, T0, off);
-                       build_copy_load(&buf, T1, off + copy_word_size);
-                       build_copy_load(&buf, T2, off + 2 * copy_word_size);
-                       build_copy_load(&buf, T3, off + 3 * copy_word_size);
+                       build_copy_load(&buf, GPR_T0, off);
+                       build_copy_load(&buf, GPR_T1, off + copy_word_size);
+                       build_copy_load(&buf, GPR_T2, off + 2 * copy_word_size);
+                       build_copy_load(&buf, GPR_T3, off + 3 * copy_word_size);
                        build_copy_store_pref(&buf, off);
-                       build_copy_store(&buf, T0, off);
+                       build_copy_store(&buf, GPR_T0, off);
                        build_copy_store_pref(&buf, off + copy_word_size);
-                       build_copy_store(&buf, T1, off + copy_word_size);
+                       build_copy_store(&buf, GPR_T1, off + copy_word_size);
                        build_copy_store_pref(&buf, off + 2 * copy_word_size);
-                       build_copy_store(&buf, T2, off + 2 * copy_word_size);
+                       build_copy_store(&buf, GPR_T2, off + 2 * copy_word_size);
                        build_copy_store_pref(&buf, off + 3 * copy_word_size);
                        if (off == -(4 * copy_word_size))
-                               uasm_il_bne(&buf, &r, A2, A0,
+                               uasm_il_bne(&buf, &r, GPR_A2, GPR_A0,
                                            label_copy_pref_store);
-                       build_copy_store(&buf, T3, off + 3 * copy_word_size);
+                       build_copy_store(&buf, GPR_T3, off + 3 * copy_word_size);
                        off += 4 * copy_word_size;
                } while (off < 0);
        }
 
        if (pref_bias_copy_store) {
-               pg_addiu(&buf, A2, A0, pref_bias_copy_store);
+               pg_addiu(&buf, GPR_A2, GPR_A0, pref_bias_copy_store);
                uasm_l_copy_nopref(&l, buf);
                off = 0;
                do {
-                       build_copy_load(&buf, T0, off);
-                       build_copy_load(&buf, T1, off + copy_word_size);
-                       build_copy_load(&buf, T2, off + 2 * copy_word_size);
-                       build_copy_load(&buf, T3, off + 3 * copy_word_size);
-                       build_copy_store(&buf, T0, off);
-                       build_copy_store(&buf, T1, off + copy_word_size);
-                       build_copy_store(&buf, T2, off + 2 * copy_word_size);
-                       build_copy_store(&buf, T3, off + 3 * copy_word_size);
+                       build_copy_load(&buf, GPR_T0, off);
+                       build_copy_load(&buf, GPR_T1, off + copy_word_size);
+                       build_copy_load(&buf, GPR_T2, off + 2 * copy_word_size);
+                       build_copy_load(&buf, GPR_T3, off + 3 * copy_word_size);
+                       build_copy_store(&buf, GPR_T0, off);
+                       build_copy_store(&buf, GPR_T1, off + copy_word_size);
+                       build_copy_store(&buf, GPR_T2, off + 2 * copy_word_size);
+                       build_copy_store(&buf, GPR_T3, off + 3 * copy_word_size);
                        off += 4 * copy_word_size;
                } while (off < half_copy_loop_size);
-               pg_addiu(&buf, A1, A1, 2 * off);
-               pg_addiu(&buf, A0, A0, 2 * off);
+               pg_addiu(&buf, GPR_A1, GPR_A1, 2 * off);
+               pg_addiu(&buf, GPR_A0, GPR_A0, 2 * off);
                off = -off;
                do {
-                       build_copy_load(&buf, T0, off);
-                       build_copy_load(&buf, T1, off + copy_word_size);
-                       build_copy_load(&buf, T2, off + 2 * copy_word_size);
-                       build_copy_load(&buf, T3, off + 3 * copy_word_size);
-                       build_copy_store(&buf, T0, off);
-                       build_copy_store(&buf, T1, off + copy_word_size);
-                       build_copy_store(&buf, T2, off + 2 * copy_word_size);
+                       build_copy_load(&buf, GPR_T0, off);
+                       build_copy_load(&buf, GPR_T1, off + copy_word_size);
+                       build_copy_load(&buf, GPR_T2, off + 2 * copy_word_size);
+                       build_copy_load(&buf, GPR_T3, off + 3 * copy_word_size);
+                       build_copy_store(&buf, GPR_T0, off);
+                       build_copy_store(&buf, GPR_T1, off + copy_word_size);
+                       build_copy_store(&buf, GPR_T2, off + 2 * copy_word_size);
                        if (off == -(4 * copy_word_size))
-                               uasm_il_bne(&buf, &r, A2, A0,
+                               uasm_il_bne(&buf, &r, GPR_A2, GPR_A0,
                                            label_copy_nopref);
-                       build_copy_store(&buf, T3, off + 3 * copy_word_size);
+                       build_copy_store(&buf, GPR_T3, off + 3 * copy_word_size);
                        off += 4 * copy_word_size;
                } while (off < 0);
        }
 
-       uasm_i_jr(&buf, RA);
+       uasm_i_jr(&buf, GPR_RA);
        uasm_i_nop(&buf);
 
        BUG_ON(buf > &__copy_page_end);
index 4017fa0e2f68d2edb315e06bc0011184edf40349..69ea54bdc0c36182ddb0c8e289a7e2ee573182c6 100644 (file)
@@ -32,7 +32,9 @@
 
 #include <asm/cacheflush.h>
 #include <asm/cpu-type.h>
+#include <asm/mipsregs.h>
 #include <asm/mmu_context.h>
+#include <asm/regdef.h>
 #include <asm/uasm.h>
 #include <asm/setup.h>
 #include <asm/tlbex.h>
@@ -276,27 +278,6 @@ static inline void dump_handler(const char *symbol, const void *start, const voi
        pr_debug("\tEND(%s)\n", symbol);
 }
 
-/* The only general purpose registers allowed in TLB handlers. */
-#define K0             26
-#define K1             27
-
-/* Some CP0 registers */
-#define C0_INDEX       0, 0
-#define C0_ENTRYLO0    2, 0
-#define C0_TCBIND      2, 2
-#define C0_ENTRYLO1    3, 0
-#define C0_CONTEXT     4, 0
-#define C0_PAGEMASK    5, 0
-#define C0_PWBASE      5, 5
-#define C0_PWFIELD     5, 6
-#define C0_PWSIZE      5, 7
-#define C0_PWCTL       6, 6
-#define C0_BADVADDR    8, 0
-#define C0_PGD         9, 7
-#define C0_ENTRYHI     10, 0
-#define C0_EPC         14, 0
-#define C0_XCONTEXT    20, 0
-
 #ifdef CONFIG_64BIT
 # define GET_CONTEXT(buf, reg) UASM_i_MFC0(buf, reg, C0_XCONTEXT)
 #else
@@ -356,30 +337,30 @@ static struct work_registers build_get_work_registers(u32 **p)
        if (scratch_reg >= 0) {
                /* Save in CPU local C0_KScratch? */
                UASM_i_MTC0(p, 1, c0_kscratch(), scratch_reg);
-               r.r1 = K0;
-               r.r2 = K1;
-               r.r3 = 1;
+               r.r1 = GPR_K0;
+               r.r2 = GPR_K1;
+               r.r3 = GPR_AT;
                return r;
        }
 
        if (num_possible_cpus() > 1) {
                /* Get smp_processor_id */
-               UASM_i_CPUID_MFC0(p, K0, SMP_CPUID_REG);
-               UASM_i_SRL_SAFE(p, K0, K0, SMP_CPUID_REGSHIFT);
+               UASM_i_CPUID_MFC0(p, GPR_K0, SMP_CPUID_REG);
+               UASM_i_SRL_SAFE(p, GPR_K0, GPR_K0, SMP_CPUID_REGSHIFT);
 
-               /* handler_reg_save index in K0 */
-               UASM_i_SLL(p, K0, K0, ilog2(sizeof(struct tlb_reg_save)));
+               /* handler_reg_save index in GPR_K0 */
+               UASM_i_SLL(p, GPR_K0, GPR_K0, ilog2(sizeof(struct tlb_reg_save)));
 
-               UASM_i_LA(p, K1, (long)&handler_reg_save);
-               UASM_i_ADDU(p, K0, K0, K1);
+               UASM_i_LA(p, GPR_K1, (long)&handler_reg_save);
+               UASM_i_ADDU(p, GPR_K0, GPR_K0, GPR_K1);
        } else {
-               UASM_i_LA(p, K0, (long)&handler_reg_save);
+               UASM_i_LA(p, GPR_K0, (long)&handler_reg_save);
        }
-       /* K0 now points to save area, save $1 and $2  */
-       UASM_i_SW(p, 1, offsetof(struct tlb_reg_save, a), K0);
-       UASM_i_SW(p, 2, offsetof(struct tlb_reg_save, b), K0);
+       /* GPR_K0 now points to save area, save $1 and $2  */
+       UASM_i_SW(p, 1, offsetof(struct tlb_reg_save, a), GPR_K0);
+       UASM_i_SW(p, 2, offsetof(struct tlb_reg_save, b), GPR_K0);
 
-       r.r1 = K1;
+       r.r1 = GPR_K1;
        r.r2 = 1;
        r.r3 = 2;
        return r;
@@ -392,9 +373,9 @@ static void build_restore_work_registers(u32 **p)
                UASM_i_MFC0(p, 1, c0_kscratch(), scratch_reg);
                return;
        }
-       /* K0 already points to save area, restore $1 and $2  */
-       UASM_i_LW(p, 1, offsetof(struct tlb_reg_save, a), K0);
-       UASM_i_LW(p, 2, offsetof(struct tlb_reg_save, b), K0);
+       /* GPR_K0 already points to save area, restore $1 and $2  */
+       UASM_i_LW(p, 1, offsetof(struct tlb_reg_save, a), GPR_K0);
+       UASM_i_LW(p, 2, offsetof(struct tlb_reg_save, b), GPR_K0);
 }
 
 #ifndef CONFIG_MIPS_PGD_C0_CONTEXT
@@ -413,22 +394,22 @@ static void build_r3000_tlb_refill_handler(void)
        memset(tlb_handler, 0, sizeof(tlb_handler));
        p = tlb_handler;
 
-       uasm_i_mfc0(&p, K0, C0_BADVADDR);
-       uasm_i_lui(&p, K1, uasm_rel_hi(pgdc)); /* cp0 delay */
-       uasm_i_lw(&p, K1, uasm_rel_lo(pgdc), K1);
-       uasm_i_srl(&p, K0, K0, 22); /* load delay */
-       uasm_i_sll(&p, K0, K0, 2);
-       uasm_i_addu(&p, K1, K1, K0);
-       uasm_i_mfc0(&p, K0, C0_CONTEXT);
-       uasm_i_lw(&p, K1, 0, K1); /* cp0 delay */
-       uasm_i_andi(&p, K0, K0, 0xffc); /* load delay */
-       uasm_i_addu(&p, K1, K1, K0);
-       uasm_i_lw(&p, K0, 0, K1);
+       uasm_i_mfc0(&p, GPR_K0, C0_BADVADDR);
+       uasm_i_lui(&p, GPR_K1, uasm_rel_hi(pgdc)); /* cp0 delay */
+       uasm_i_lw(&p, GPR_K1, uasm_rel_lo(pgdc), GPR_K1);
+       uasm_i_srl(&p, GPR_K0, GPR_K0, 22); /* load delay */
+       uasm_i_sll(&p, GPR_K0, GPR_K0, 2);
+       uasm_i_addu(&p, GPR_K1, GPR_K1, GPR_K0);
+       uasm_i_mfc0(&p, GPR_K0, C0_CONTEXT);
+       uasm_i_lw(&p, GPR_K1, 0, GPR_K1); /* cp0 delay */
+       uasm_i_andi(&p, GPR_K0, GPR_K0, 0xffc); /* load delay */
+       uasm_i_addu(&p, GPR_K1, GPR_K1, GPR_K0);
+       uasm_i_lw(&p, GPR_K0, 0, GPR_K1);
        uasm_i_nop(&p); /* load delay */
-       uasm_i_mtc0(&p, K0, C0_ENTRYLO0);
-       uasm_i_mfc0(&p, K1, C0_EPC); /* cp0 delay */
+       uasm_i_mtc0(&p, GPR_K0, C0_ENTRYLO0);
+       uasm_i_mfc0(&p, GPR_K1, C0_EPC); /* cp0 delay */
        uasm_i_tlbwr(&p); /* cp0 delay */
-       uasm_i_jr(&p, K1);
+       uasm_i_jr(&p, GPR_K1);
        uasm_i_rfe(&p); /* branch delay */
 
        if (p > tlb_handler + 32)
@@ -1276,11 +1257,11 @@ static void build_r4000_tlb_refill_handler(void)
        memset(final_handler, 0, sizeof(final_handler));
 
        if (IS_ENABLED(CONFIG_64BIT) && (scratch_reg >= 0 || scratchpad_available()) && use_bbit_insns()) {
-               htlb_info = build_fast_tlb_refill_handler(&p, &l, &r, K0, K1,
+               htlb_info = build_fast_tlb_refill_handler(&p, &l, &r, GPR_K0, GPR_K1,
                                                          scratch_reg);
                vmalloc_mode = refill_scratch;
        } else {
-               htlb_info.huge_pte = K0;
+               htlb_info.huge_pte = GPR_K0;
                htlb_info.restore_scratch = 0;
                htlb_info.need_reload_pte = true;
                vmalloc_mode = refill_noscratch;
@@ -1290,29 +1271,29 @@ static void build_r4000_tlb_refill_handler(void)
                if (bcm1250_m3_war()) {
                        unsigned int segbits = 44;
 
-                       uasm_i_dmfc0(&p, K0, C0_BADVADDR);
-                       uasm_i_dmfc0(&p, K1, C0_ENTRYHI);
-                       uasm_i_xor(&p, K0, K0, K1);
-                       uasm_i_dsrl_safe(&p, K1, K0, 62);
-                       uasm_i_dsrl_safe(&p, K0, K0, 12 + 1);
-                       uasm_i_dsll_safe(&p, K0, K0, 64 + 12 + 1 - segbits);
-                       uasm_i_or(&p, K0, K0, K1);
-                       uasm_il_bnez(&p, &r, K0, label_leave);
+                       uasm_i_dmfc0(&p, GPR_K0, C0_BADVADDR);
+                       uasm_i_dmfc0(&p, GPR_K1, C0_ENTRYHI);
+                       uasm_i_xor(&p, GPR_K0, GPR_K0, GPR_K1);
+                       uasm_i_dsrl_safe(&p, GPR_K1, GPR_K0, 62);
+                       uasm_i_dsrl_safe(&p, GPR_K0, GPR_K0, 12 + 1);
+                       uasm_i_dsll_safe(&p, GPR_K0, GPR_K0, 64 + 12 + 1 - segbits);
+                       uasm_i_or(&p, GPR_K0, GPR_K0, GPR_K1);
+                       uasm_il_bnez(&p, &r, GPR_K0, label_leave);
                        /* No need for uasm_i_nop */
                }
 
 #ifdef CONFIG_64BIT
-               build_get_pmde64(&p, &l, &r, K0, K1); /* get pmd in K1 */
+               build_get_pmde64(&p, &l, &r, GPR_K0, GPR_K1); /* get pmd in GPR_K1 */
 #else
-               build_get_pgde32(&p, K0, K1); /* get pgd in K1 */
+               build_get_pgde32(&p, GPR_K0, GPR_K1); /* get pgd in GPR_K1 */
 #endif
 
 #ifdef CONFIG_MIPS_HUGE_TLB_SUPPORT
-               build_is_huge_pte(&p, &r, K0, K1, label_tlb_huge_update);
+               build_is_huge_pte(&p, &r, GPR_K0, GPR_K1, label_tlb_huge_update);
 #endif
 
-               build_get_ptep(&p, K0, K1);
-               build_update_entries(&p, K0, K1);
+               build_get_ptep(&p, GPR_K0, GPR_K1);
+               build_update_entries(&p, GPR_K0, GPR_K1);
                build_tlb_write_entry(&p, &l, &r, tlb_random);
                uasm_l_leave(&l, p);
                uasm_i_eret(&p); /* return from trap */
@@ -1320,14 +1301,14 @@ static void build_r4000_tlb_refill_handler(void)
 #ifdef CONFIG_MIPS_HUGE_TLB_SUPPORT
        uasm_l_tlb_huge_update(&l, p);
        if (htlb_info.need_reload_pte)
-               UASM_i_LW(&p, htlb_info.huge_pte, 0, K1);
-       build_huge_update_entries(&p, htlb_info.huge_pte, K1);
-       build_huge_tlb_write_entry(&p, &l, &r, K0, tlb_random,
+               UASM_i_LW(&p, htlb_info.huge_pte, 0, GPR_K1);
+       build_huge_update_entries(&p, htlb_info.huge_pte, GPR_K1);
+       build_huge_tlb_write_entry(&p, &l, &r, GPR_K0, tlb_random,
                                   htlb_info.restore_scratch);
 #endif
 
 #ifdef CONFIG_64BIT
-       build_get_pgd_vmalloc64(&p, &l, &r, K0, K1, vmalloc_mode);
+       build_get_pgd_vmalloc64(&p, &l, &r, GPR_K0, GPR_K1, vmalloc_mode);
 #endif
 
        /*
@@ -1500,34 +1481,35 @@ static void build_loongson3_tlb_refill_handler(void)
        memset(tlb_handler, 0, sizeof(tlb_handler));
 
        if (check_for_high_segbits) {
-               uasm_i_dmfc0(&p, K0, C0_BADVADDR);
-               uasm_i_dsrl_safe(&p, K1, K0, PGDIR_SHIFT + PGD_TABLE_ORDER + PAGE_SHIFT - 3);
-               uasm_il_beqz(&p, &r, K1, label_vmalloc);
+               uasm_i_dmfc0(&p, GPR_K0, C0_BADVADDR);
+               uasm_i_dsrl_safe(&p, GPR_K1, GPR_K0,
+                               PGDIR_SHIFT + PGD_TABLE_ORDER + PAGE_SHIFT - 3);
+               uasm_il_beqz(&p, &r, GPR_K1, label_vmalloc);
                uasm_i_nop(&p);
 
-               uasm_il_bgez(&p, &r, K0, label_large_segbits_fault);
+               uasm_il_bgez(&p, &r, GPR_K0, label_large_segbits_fault);
                uasm_i_nop(&p);
                uasm_l_vmalloc(&l, p);
        }
 
-       uasm_i_dmfc0(&p, K1, C0_PGD);
+       uasm_i_dmfc0(&p, GPR_K1, C0_PGD);
 
-       uasm_i_lddir(&p, K0, K1, 3);  /* global page dir */
+       uasm_i_lddir(&p, GPR_K0, GPR_K1, 3);  /* global page dir */
 #ifndef __PAGETABLE_PMD_FOLDED
-       uasm_i_lddir(&p, K1, K0, 1);  /* middle page dir */
+       uasm_i_lddir(&p, GPR_K1, GPR_K0, 1);  /* middle page dir */
 #endif
-       uasm_i_ldpte(&p, K1, 0);      /* even */
-       uasm_i_ldpte(&p, K1, 1);      /* odd */
+       uasm_i_ldpte(&p, GPR_K1, 0);      /* even */
+       uasm_i_ldpte(&p, GPR_K1, 1);      /* odd */
        uasm_i_tlbwr(&p);
 
        /* restore page mask */
        if (PM_DEFAULT_MASK >> 16) {
-               uasm_i_lui(&p, K0, PM_DEFAULT_MASK >> 16);
-               uasm_i_ori(&p, K0, K0, PM_DEFAULT_MASK & 0xffff);
-               uasm_i_mtc0(&p, K0, C0_PAGEMASK);
+               uasm_i_lui(&p, GPR_K0, PM_DEFAULT_MASK >> 16);
+               uasm_i_ori(&p, GPR_K0, GPR_K0, PM_DEFAULT_MASK & 0xffff);
+               uasm_i_mtc0(&p, GPR_K0, C0_PAGEMASK);
        } else if (PM_DEFAULT_MASK) {
-               uasm_i_ori(&p, K0, 0, PM_DEFAULT_MASK);
-               uasm_i_mtc0(&p, K0, C0_PAGEMASK);
+               uasm_i_ori(&p, GPR_K0, 0, PM_DEFAULT_MASK);
+               uasm_i_mtc0(&p, GPR_K0, C0_PAGEMASK);
        } else {
                uasm_i_mtc0(&p, 0, C0_PAGEMASK);
        }
@@ -1536,8 +1518,8 @@ static void build_loongson3_tlb_refill_handler(void)
 
        if (check_for_high_segbits) {
                uasm_l_large_segbits_fault(&l, p);
-               UASM_i_LA(&p, K1, (unsigned long)tlb_do_page_fault_0);
-               uasm_i_jr(&p, K1);
+               UASM_i_LA(&p, GPR_K1, (unsigned long)tlb_do_page_fault_0);
+               uasm_i_jr(&p, GPR_K1);
                uasm_i_nop(&p);
        }
 
@@ -1903,11 +1885,11 @@ static void build_r3000_tlb_load_handler(void)
        memset(labels, 0, sizeof(labels));
        memset(relocs, 0, sizeof(relocs));
 
-       build_r3000_tlbchange_handler_head(&p, K0, K1);
-       build_pte_present(&p, &r, K0, K1, -1, label_nopage_tlbl);
+       build_r3000_tlbchange_handler_head(&p, GPR_K0, GPR_K1);
+       build_pte_present(&p, &r, GPR_K0, GPR_K1, -1, label_nopage_tlbl);
        uasm_i_nop(&p); /* load delay */
-       build_make_valid(&p, &r, K0, K1, -1);
-       build_r3000_tlb_reload_write(&p, &l, &r, K0, K1);
+       build_make_valid(&p, &r, GPR_K0, GPR_K1, -1);
+       build_r3000_tlb_reload_write(&p, &l, &r, GPR_K0, GPR_K1);
 
        uasm_l_nopage_tlbl(&l, p);
        uasm_i_j(&p, (unsigned long)tlb_do_page_fault_0 & 0x0fffffff);
@@ -1933,11 +1915,11 @@ static void build_r3000_tlb_store_handler(void)
        memset(labels, 0, sizeof(labels));
        memset(relocs, 0, sizeof(relocs));
 
-       build_r3000_tlbchange_handler_head(&p, K0, K1);
-       build_pte_writable(&p, &r, K0, K1, -1, label_nopage_tlbs);
+       build_r3000_tlbchange_handler_head(&p, GPR_K0, GPR_K1);
+       build_pte_writable(&p, &r, GPR_K0, GPR_K1, -1, label_nopage_tlbs);
        uasm_i_nop(&p); /* load delay */
-       build_make_write(&p, &r, K0, K1, -1);
-       build_r3000_tlb_reload_write(&p, &l, &r, K0, K1);
+       build_make_write(&p, &r, GPR_K0, GPR_K1, -1);
+       build_r3000_tlb_reload_write(&p, &l, &r, GPR_K0, GPR_K1);
 
        uasm_l_nopage_tlbs(&l, p);
        uasm_i_j(&p, (unsigned long)tlb_do_page_fault_1 & 0x0fffffff);
@@ -1963,11 +1945,11 @@ static void build_r3000_tlb_modify_handler(void)
        memset(labels, 0, sizeof(labels));
        memset(relocs, 0, sizeof(relocs));
 
-       build_r3000_tlbchange_handler_head(&p, K0, K1);
-       build_pte_modifiable(&p, &r, K0, K1,  -1, label_nopage_tlbm);
+       build_r3000_tlbchange_handler_head(&p, GPR_K0, GPR_K1);
+       build_pte_modifiable(&p, &r, GPR_K0, GPR_K1,  -1, label_nopage_tlbm);
        uasm_i_nop(&p); /* load delay */
-       build_make_write(&p, &r, K0, K1, -1);
-       build_r3000_pte_reload_tlbwi(&p, K0, K1);
+       build_make_write(&p, &r, GPR_K0, GPR_K1, -1);
+       build_r3000_pte_reload_tlbwi(&p, GPR_K0, GPR_K1);
 
        uasm_l_nopage_tlbm(&l, p);
        uasm_i_j(&p, (unsigned long)tlb_do_page_fault_1 & 0x0fffffff);
@@ -2083,14 +2065,14 @@ static void build_r4000_tlb_load_handler(void)
        if (bcm1250_m3_war()) {
                unsigned int segbits = 44;
 
-               uasm_i_dmfc0(&p, K0, C0_BADVADDR);
-               uasm_i_dmfc0(&p, K1, C0_ENTRYHI);
-               uasm_i_xor(&p, K0, K0, K1);
-               uasm_i_dsrl_safe(&p, K1, K0, 62);
-               uasm_i_dsrl_safe(&p, K0, K0, 12 + 1);
-               uasm_i_dsll_safe(&p, K0, K0, 64 + 12 + 1 - segbits);
-               uasm_i_or(&p, K0, K0, K1);
-               uasm_il_bnez(&p, &r, K0, label_leave);
+               uasm_i_dmfc0(&p, GPR_K0, C0_BADVADDR);
+               uasm_i_dmfc0(&p, GPR_K1, C0_ENTRYHI);
+               uasm_i_xor(&p, GPR_K0, GPR_K0, GPR_K1);
+               uasm_i_dsrl_safe(&p, GPR_K1, GPR_K0, 62);
+               uasm_i_dsrl_safe(&p, GPR_K0, GPR_K0, 12 + 1);
+               uasm_i_dsll_safe(&p, GPR_K0, GPR_K0, 64 + 12 + 1 - segbits);
+               uasm_i_or(&p, GPR_K0, GPR_K0, GPR_K1);
+               uasm_il_bnez(&p, &r, GPR_K0, label_leave);
                /* No need for uasm_i_nop */
        }
 
@@ -2233,9 +2215,9 @@ static void build_r4000_tlb_load_handler(void)
        build_restore_work_registers(&p);
 #ifdef CONFIG_CPU_MICROMIPS
        if ((unsigned long)tlb_do_page_fault_0 & 1) {
-               uasm_i_lui(&p, K0, uasm_rel_hi((long)tlb_do_page_fault_0));
-               uasm_i_addiu(&p, K0, K0, uasm_rel_lo((long)tlb_do_page_fault_0));
-               uasm_i_jr(&p, K0);
+               uasm_i_lui(&p, GPR_K0, uasm_rel_hi((long)tlb_do_page_fault_0));
+               uasm_i_addiu(&p, GPR_K0, GPR_K0, uasm_rel_lo((long)tlb_do_page_fault_0));
+               uasm_i_jr(&p, GPR_K0);
        } else
 #endif
        uasm_i_j(&p, (unsigned long)tlb_do_page_fault_0 & 0x0fffffff);
@@ -2289,9 +2271,9 @@ static void build_r4000_tlb_store_handler(void)
        build_restore_work_registers(&p);
 #ifdef CONFIG_CPU_MICROMIPS
        if ((unsigned long)tlb_do_page_fault_1 & 1) {
-               uasm_i_lui(&p, K0, uasm_rel_hi((long)tlb_do_page_fault_1));
-               uasm_i_addiu(&p, K0, K0, uasm_rel_lo((long)tlb_do_page_fault_1));
-               uasm_i_jr(&p, K0);
+               uasm_i_lui(&p, GPR_K0, uasm_rel_hi((long)tlb_do_page_fault_1));
+               uasm_i_addiu(&p, GPR_K0, GPR_K0, uasm_rel_lo((long)tlb_do_page_fault_1));
+               uasm_i_jr(&p, GPR_K0);
        } else
 #endif
        uasm_i_j(&p, (unsigned long)tlb_do_page_fault_1 & 0x0fffffff);
@@ -2346,9 +2328,9 @@ static void build_r4000_tlb_modify_handler(void)
        build_restore_work_registers(&p);
 #ifdef CONFIG_CPU_MICROMIPS
        if ((unsigned long)tlb_do_page_fault_1 & 1) {
-               uasm_i_lui(&p, K0, uasm_rel_hi((long)tlb_do_page_fault_1));
-               uasm_i_addiu(&p, K0, K0, uasm_rel_lo((long)tlb_do_page_fault_1));
-               uasm_i_jr(&p, K0);
+               uasm_i_lui(&p, GPR_K0, uasm_rel_hi((long)tlb_do_page_fault_1));
+               uasm_i_addiu(&p, GPR_K0, GPR_K0, uasm_rel_lo((long)tlb_do_page_fault_1));
+               uasm_i_jr(&p, GPR_K0);
        } else
 #endif
        uasm_i_j(&p, (unsigned long)tlb_do_page_fault_1 & 0x0fffffff);
diff --git a/arch/mips/mobileye/Makefile b/arch/mips/mobileye/Makefile
new file mode 100644 (file)
index 0000000..315c06b
--- /dev/null
@@ -0,0 +1 @@
+# SPDX-License-Identifier: GPL-2.0-or-later
diff --git a/arch/mips/mobileye/Platform b/arch/mips/mobileye/Platform
new file mode 100644 (file)
index 0000000..c69f811
--- /dev/null
@@ -0,0 +1,15 @@
+#
+# Copyright (C) 2016 Imagination Technologies
+# Author: Paul Burton <paul.burton@mips.com>
+#
+# This program is free software; you can redistribute it and/or modify it
+# under the terms of the GNU General Public License as published by the
+# Free Software Foundation;  either version 2 of the  License, or (at your
+# option) any later version.
+#
+
+load-$(CONFIG_MACH_EYEQ5)      = 0xa800000808000000
+all-$(CONFIG_MACH_EYEQ5)       += vmlinux.gz.itb
+
+its-y                                  := vmlinux.its.S
+its-$(CONFIG_FIT_IMAGE_FDT_EPM5)       += board-epm5.its.S
diff --git a/arch/mips/mobileye/board-epm5.its.S b/arch/mips/mobileye/board-epm5.its.S
new file mode 100644 (file)
index 0000000..08e8c4f
--- /dev/null
@@ -0,0 +1,24 @@
+/* SPDX-License-Identifier: (GPL-2.0 OR MIT) */
+/ {
+       images {
+               fdt-mobileye-epm5 {
+                       description = "Mobileeye MP5 Device Tree";
+                       data = /incbin/("boot/dts/mobileye/eyeq5-epm5.dtb");
+                       type = "flat_dt";
+                       arch = "mips";
+                       compression = "none";
+                       hash {
+                               algo = "sha1";
+                       };
+               };
+       };
+
+    configurations {
+               default = "conf-1";
+               conf-1 {
+                       description = "Mobileye EPM5 Linux kernel";
+                       kernel = "kernel";
+                       fdt = "fdt-mobileye-epm5";
+               };
+       };
+};
diff --git a/arch/mips/mobileye/vmlinux.its.S b/arch/mips/mobileye/vmlinux.its.S
new file mode 100644 (file)
index 0000000..3e25467
--- /dev/null
@@ -0,0 +1,32 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/dts-v1/;
+
+/ {
+       description = KERNEL_NAME;
+       #address-cells = <ADDR_CELLS>;
+
+       images {
+               kernel {
+                       description = KERNEL_NAME;
+                       data = /incbin/(VMLINUX_BINARY);
+                       type = "kernel";
+                       arch = "mips";
+                       os = "linux";
+                       compression = VMLINUX_COMPRESSION;
+                       load = /bits/ ADDR_BITS <VMLINUX_LOAD_ADDRESS>;
+                       entry = /bits/ ADDR_BITS <VMLINUX_ENTRY_ADDRESS>;
+                       hash {
+                               algo = "sha1";
+                       };
+               };
+       };
+
+       configurations {
+               default = "conf-default";
+
+               conf-default {
+                       description = "Generic Linux kernel";
+                       kernel = "kernel";
+               };
+       };
+};
index 09a4ce53424fdeed7193ca50cd5c2b550a6070c1..6a6c4f58f7f43cf85e2479672ab051dcfd0f4ef6 100644 (file)
@@ -9,7 +9,7 @@
 
 int pcibios_plat_dev_init(struct pci_dev *dev)
 {
-       return PCIBIOS_SUCCESSFUL;
+       return 0;
 }
 
 int pcibios_map_irq(const struct pci_dev *dev, u8 slot, u8 pin)
index 13009666204f94a4c0eaf608af6ca59b5f41c137..8bcc136976dc40efecb0107f7a0be1e0b0ea558e 100644 (file)
@@ -7,17 +7,8 @@
 #include <linux/of_pci.h>
 #include <linux/pci.h>
 
-int (*ltq_pci_plat_arch_init)(struct pci_dev *dev) = NULL;
-int (*ltq_pci_plat_dev_init)(struct pci_dev *dev) = NULL;
-
 int pcibios_plat_dev_init(struct pci_dev *dev)
 {
-       if (ltq_pci_plat_arch_init)
-               return ltq_pci_plat_arch_init(dev);
-
-       if (ltq_pci_plat_dev_init)
-               return ltq_pci_plat_dev_init(dev);
-
        return 0;
 }
 
index f7802f100401ca3d537c540e8df28c1124597390..37087f4137eee8f4e24ab723ea85298496f83638 100644 (file)
@@ -60,7 +60,7 @@ static int mkaddr(struct pci_bus *bus, unsigned int devfn, int where,
 {
        if (bus->parent == NULL &&
            devfn >= PCI_DEVFN(TX4927_PCIC_MAX_DEVNU, 0))
-               return -1;
+               return PCIBIOS_DEVICE_NOT_FOUND;
        __raw_writel(((bus->number & 0xff) << 0x10)
                     | ((devfn & 0xff) << 0x08) | (where & 0xfc)
                     | (bus->parent ? 1 : 0),
@@ -69,7 +69,7 @@ static int mkaddr(struct pci_bus *bus, unsigned int devfn, int where,
        __raw_writel((__raw_readl(&pcicptr->pcistatus) & 0x0000ffff)
                     | (PCI_STATUS_REC_MASTER_ABORT << 16),
                     &pcicptr->pcistatus);
-       return 0;
+       return PCIBIOS_SUCCESSFUL;
 }
 
 static int check_abort(struct tx4927_pcic_reg __iomem *pcicptr)
@@ -140,10 +140,12 @@ static int tx4927_pci_config_read(struct pci_bus *bus, unsigned int devfn,
                                  int where, int size, u32 *val)
 {
        struct tx4927_pcic_reg __iomem *pcicptr = pci_bus_to_pcicptr(bus);
+       int ret;
 
-       if (mkaddr(bus, devfn, where, pcicptr)) {
-               *val = 0xffffffff;
-               return -1;
+       ret = mkaddr(bus, devfn, where, pcicptr);
+       if (ret != PCIBIOS_SUCCESSFUL) {
+               PCI_SET_ERROR_RESPONSE(val);
+               return ret;
        }
        switch (size) {
        case 1:
@@ -162,9 +164,11 @@ static int tx4927_pci_config_write(struct pci_bus *bus, unsigned int devfn,
                                   int where, int size, u32 val)
 {
        struct tx4927_pcic_reg __iomem *pcicptr = pci_bus_to_pcicptr(bus);
+       int ret;
 
-       if (mkaddr(bus, devfn, where, pcicptr))
-               return -1;
+       ret = mkaddr(bus, devfn, where, pcicptr);
+       if (ret != PCIBIOS_SUCCESSFUL)
+               return ret;
        switch (size) {
        case 1:
                icd_writeb(val, where & 3, pcicptr);
index fc503679a93db49c6cd0faf5999e8075e167f08a..54094f6e033e25a135149a0be3f1e8e3ec07d926 100644 (file)
@@ -6,11 +6,16 @@
  * Copyright (C) 2013 John Crispin <john@phrozen.org>
 */
 
-#include <linux/platform_device.h>
+#include <linux/bits.h>
+#include <linux/clk.h>
+#include <linux/device.h>
+#include <linux/err.h>
 #include <linux/interrupt.h>
+#include <linux/io.h>
+#include <linux/mod_devicetable.h>
+#include <linux/platform_device.h>
 #include <linux/timer.h>
-#include <linux/of_gpio.h>
-#include <linux/clk.h>
+#include <linux/types.h>
 
 #include <asm/mach-ralink/ralink_regs.h>
 
index 81c9f0a8880b0e5ff5fbc605dd3eef9886f2e51e..a3cdcb2899410bf4a37de8891261dd5bbae56222 100644 (file)
@@ -12,7 +12,7 @@
 #include <asm/sgi/mc.h>
 #include <asm/sgi/ip22.h>
 
-static struct bus_type gio_bus_type;
+static const struct bus_type gio_bus_type;
 
 static struct {
        const char *name;
@@ -378,7 +378,7 @@ static void ip22_check_gio(int slotno, unsigned long addr, int irq)
                printk(KERN_INFO "GIO: slot %d : Empty\n", slotno);
 }
 
-static struct bus_type gio_bus_type = {
+static const struct bus_type gio_bus_type = {
        .name      = "gio",
        .dev_groups = gio_dev_groups,
        .match     = gio_bus_match,
index 408db45efdc8a4dd840b62753f6e1457c687c18c..af5333986900812904116e34da3ceadef4b17eec 100644 (file)
@@ -535,13 +535,14 @@ static const struct file_operations sbprof_tb_fops = {
        .llseek         = default_llseek,
 };
 
-static struct class *tb_class;
+static const struct class tb_class = {
+       .name = "sb_tracebuffer",
+};
 static struct device *tb_dev;
 
 static int __init sbprof_tb_init(void)
 {
        struct device *dev;
-       struct class *tbc;
        int err;
 
        if (register_chrdev(SBPROF_TB_MAJOR, DEVNAME, &sbprof_tb_fops)) {
@@ -550,15 +551,11 @@ static int __init sbprof_tb_init(void)
                return -EIO;
        }
 
-       tbc = class_create("sb_tracebuffer");
-       if (IS_ERR(tbc)) {
-               err = PTR_ERR(tbc);
+       err = class_register(&tb_class);
+       if (err)
                goto out_chrdev;
-       }
-
-       tb_class = tbc;
 
-       dev = device_create(tbc, NULL, MKDEV(SBPROF_TB_MAJOR, 0), NULL, "tb");
+       dev = device_create(&tb_class, NULL, MKDEV(SBPROF_TB_MAJOR, 0), NULL, "tb");
        if (IS_ERR(dev)) {
                err = PTR_ERR(dev);
                goto out_class;
@@ -573,7 +570,7 @@ static int __init sbprof_tb_init(void)
        return 0;
 
 out_class:
-       class_destroy(tb_class);
+       class_unregister(&tb_class);
 out_chrdev:
        unregister_chrdev(SBPROF_TB_MAJOR, DEVNAME);
 
@@ -582,9 +579,9 @@ out_chrdev:
 
 static void __exit sbprof_tb_cleanup(void)
 {
-       device_destroy(tb_class, MKDEV(SBPROF_TB_MAJOR, 0));
+       device_destroy(&tb_class, MKDEV(SBPROF_TB_MAJOR, 0));
        unregister_chrdev(SBPROF_TB_MAJOR, DEVNAME);
-       class_destroy(tb_class);
+       class_unregister(&tb_class);
 }
 
 module_init(sbprof_tb_init);
index b098a3c76ae9e143a90e5b8677297842260a51d6..1e67fecd466ec060c3dcaa22da2a5ac074f1081a 100644 (file)
@@ -762,7 +762,7 @@ void __init txx9_aclc_init(unsigned long baseaddr, int irq,
 {
 }
 
-static struct bus_type txx9_sramc_subsys = {
+static const struct bus_type txx9_sramc_subsys = {
        .name = "txx9_sram",
        .dev_name = "txx9_sram",
 };
index 79d3039b29f1fbcb5401765743426cea65f84452..9c0e6eaeb005cdbf0d768f6f47a38f31d32b3974 100644 (file)
@@ -2,6 +2,7 @@
 config NIOS2
        def_bool y
        select ARCH_32BIT_OFF_T
+       select ARCH_HAS_CPU_CACHE_ALIASING
        select ARCH_HAS_DMA_PREP_COHERENT
        select ARCH_HAS_SYNC_DMA_FOR_CPU
        select ARCH_HAS_SYNC_DMA_FOR_DEVICE
diff --git a/arch/nios2/include/asm/cachetype.h b/arch/nios2/include/asm/cachetype.h
new file mode 100644 (file)
index 0000000..eb9c416
--- /dev/null
@@ -0,0 +1,10 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef __ASM_NIOS2_CACHETYPE_H
+#define __ASM_NIOS2_CACHETYPE_H
+
+#include <asm/page.h>
+#include <asm/cache.h>
+
+#define cpu_dcache_is_aliasing()       (NIOS2_DCACHE_SIZE > PAGE_SIZE)
+
+#endif
index 5144506dfa693270523e52e2ae95674c311fa3fc..d052dfcbe8d3a0c54c95d845415fce1e4388caab 100644 (file)
@@ -178,6 +178,8 @@ static inline void set_pte(pte_t *ptep, pte_t pteval)
        *ptep = pteval;
 }
 
+#define PFN_PTE_SHIFT          0
+
 static inline void set_ptes(struct mm_struct *mm, unsigned long addr,
                pte_t *ptep, pte_t pte, unsigned int nr)
 {
index 052d27fbea156f9f26d400b7803f3652b1788326..ee29c4c8d7c1c5d3d553fa37388a4510f0fa0b9b 100644 (file)
@@ -8,6 +8,7 @@ config PARISC
        select HAVE_FUNCTION_GRAPH_TRACER
        select HAVE_SYSCALL_TRACEPOINTS
        select ARCH_WANT_FRAME_POINTERS
+       select ARCH_HAS_CPU_CACHE_ALIASING
        select ARCH_HAS_DMA_ALLOC if PA11
        select ARCH_HAS_ELF_RANDOMIZE
        select ARCH_HAS_STRICT_KERNEL_RWX
index 5937d5edaba1eac5a0c4e4b055c3e77fcbe3bf62..000a28e1c5e8d43aba8ed5185e86c654cf5ad5a0 100644 (file)
         * version takes two arguments: a src and destination register.
         * However, the source and destination registers can not be
         * the same register.
+        *
+        * We use add,l to avoid clobbering the C/B bits in the PSW.
         */
 
        .macro  tophys  grvirt, grphys
-       ldil    L%(__PAGE_OFFSET), \grphys
-       sub     \grvirt, \grphys, \grphys
+       ldil    L%(-__PAGE_OFFSET), \grphys
+       addl    \grvirt, \grphys, \grphys
        .endm
-       
+
        .macro  tovirt  grphys, grvirt
        ldil    L%(__PAGE_OFFSET), \grvirt
-       add     \grphys, \grvirt, \grvirt
+       addl    \grphys, \grvirt, \grvirt
        .endm
 
        .macro  tophys_r1  gr
-       ldil    L%(__PAGE_OFFSET), %r1
-       sub     \gr, %r1, \gr
+       ldil    L%(-__PAGE_OFFSET), %r1
+       addl    \gr, %r1, \gr
        .endm
-       
+
        .macro  tovirt_r1  gr
        ldil    L%(__PAGE_OFFSET), %r1
-       add     \gr, %r1, \gr
+       addl    \gr, %r1, \gr
        .endm
 
        .macro delay value
diff --git a/arch/parisc/include/asm/cachetype.h b/arch/parisc/include/asm/cachetype.h
new file mode 100644 (file)
index 0000000..e0868a1
--- /dev/null
@@ -0,0 +1,9 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef __ASM_PARISC_CACHETYPE_H
+#define __ASM_PARISC_CACHETYPE_H
+
+#include <linux/types.h>
+
+#define cpu_dcache_is_aliasing()       true
+
+#endif
index 3c43baca7b397ddd4b4b09c671066e9b9cca81e1..2aceebcd695c8057d14e8cf58f45e230a9f16d38 100644 (file)
@@ -40,7 +40,7 @@ static inline __sum16 ip_fast_csum(const void *iph, unsigned int ihl)
 "      addc            %0, %5, %0\n"
 "      addc            %0, %3, %0\n"
 "1:    ldws,ma         4(%1), %3\n"
-"      addib,<         0, %2, 1b\n"
+"      addib,>         -1, %2, 1b\n"
 "      addc            %0, %3, %0\n"
 "\n"
 "      extru           %0, 31, 16, %4\n"
@@ -126,6 +126,7 @@ static __inline__ __sum16 csum_ipv6_magic(const struct in6_addr *saddr,
        ** Try to keep 4 registers with "live" values ahead of the ALU.
        */
 
+"      depdi           0, 31, 32, %0\n"/* clear upper half of incoming checksum */
 "      ldd,ma          8(%1), %4\n"    /* get 1st saddr word */
 "      ldd,ma          8(%2), %5\n"    /* get 1st daddr word */
 "      add             %4, %0, %0\n"
@@ -137,8 +138,8 @@ static __inline__ __sum16 csum_ipv6_magic(const struct in6_addr *saddr,
 "      add,dc          %3, %0, %0\n"  /* fold in proto+len | carry bit */
 "      extrd,u         %0, 31, 32, %4\n"/* copy upper half down */
 "      depdi           0, 31, 32, %0\n"/* clear upper half */
-"      add             %4, %0, %0\n"   /* fold into 32-bits */
-"      addc            0, %0, %0\n"    /* add carry */
+"      add,dc          %4, %0, %0\n"   /* fold into 32-bits, plus carry */
+"      addc            0, %0, %0\n"    /* add final carry */
 
 #else
 
@@ -163,7 +164,8 @@ static __inline__ __sum16 csum_ipv6_magic(const struct in6_addr *saddr,
 "      ldw,ma          4(%2), %7\n"    /* 4th daddr */
 "      addc            %6, %0, %0\n"
 "      addc            %7, %0, %0\n"
-"      addc            %3, %0, %0\n"   /* fold in proto+len, catch carry */
+"      addc            %3, %0, %0\n"   /* fold in proto+len */
+"      addc            0, %0, %0\n"    /* add carry */
 
 #endif
        : "=r" (sum), "=r" (saddr), "=r" (daddr), "=r" (len),
index 4de3b391d81263f233a066adda68210c32bdab2e..7ddd7f4333670ada397dc2e2ebcb3326731ad294 100644 (file)
@@ -61,7 +61,7 @@ parisc_get_drvdata(struct parisc_device *d)
        return dev_get_drvdata(&d->dev);
 }
 
-extern struct bus_type parisc_bus_type;
+extern const struct bus_type parisc_bus_type;
 
 int iosapic_serial_irq(struct parisc_device *dev);
 
index c7ff339732ba5a762eac90e1b3072aef45c58318..ac19d685e4a5a0b3f065e07190a129d280bc751e 100644 (file)
@@ -618,7 +618,7 @@ static struct attribute *parisc_device_attrs[] = {
 };
 ATTRIBUTE_GROUPS(parisc_device);
 
-struct bus_type parisc_bus_type = {
+const struct bus_type parisc_bus_type = {
        .name = "parisc",
        .match = parisc_generic_match,
        .uevent = parisc_uevent,
index 2f81bfd4f15e17bc0b0ec3fd1b524e571c2924ed..dff66be65d2900a322197f729afe545c5097fc10 100644 (file)
@@ -498,7 +498,7 @@ asmlinkage void do_cpu_irq_mask(struct pt_regs *regs)
 
        old_regs = set_irq_regs(regs);
        local_irq_disable();
-       irq_enter();
+       irq_enter_rcu();
 
        eirr_val = mfctl(23) & cpu_eiem & per_cpu(local_ack_eiem, cpu);
        if (!eirr_val)
@@ -533,7 +533,7 @@ asmlinkage void do_cpu_irq_mask(struct pt_regs *regs)
 #endif /* CONFIG_IRQSTACKS */
 
  out:
-       irq_exit();
+       irq_exit_rcu();
        set_irq_regs(old_regs);
        return;
 
index c520e551a165258609cba5e068037493bd7e57a8..71e596ca5a86b04b78059718187943cbbff735f5 100644 (file)
@@ -169,6 +169,7 @@ static int emulate_ldw(struct pt_regs *regs, int toreg, int flop)
 static int emulate_ldd(struct pt_regs *regs, int toreg, int flop)
 {
        unsigned long saddr = regs->ior;
+       unsigned long shift, temp1;
        __u64 val = 0;
        ASM_EXCEPTIONTABLE_VAR(ret);
 
@@ -180,25 +181,22 @@ static int emulate_ldd(struct pt_regs *regs, int toreg, int flop)
 
 #ifdef CONFIG_64BIT
        __asm__ __volatile__  (
-"      depd,z  %3,60,3,%%r19\n"                /* r19=(ofs&7)*8 */
-"      mtsp    %4, %%sr1\n"
-"      depd    %%r0,63,3,%3\n"
-"1:    ldd     0(%%sr1,%3),%0\n"
-"2:    ldd     8(%%sr1,%3),%%r20\n"
-"      subi    64,%%r19,%%r19\n"
-"      mtsar   %%r19\n"
-"      shrpd   %0,%%r20,%%sar,%0\n"
+"      depd,z  %2,60,3,%3\n"           /* shift=(ofs&7)*8 */
+"      mtsp    %5, %%sr1\n"
+"      depd    %%r0,63,3,%2\n"
+"1:    ldd     0(%%sr1,%2),%0\n"
+"2:    ldd     8(%%sr1,%2),%4\n"
+"      subi    64,%3,%3\n"
+"      mtsar   %3\n"
+"      shrpd   %0,%4,%%sar,%0\n"
 "3:    \n"
        ASM_EXCEPTIONTABLE_ENTRY_EFAULT(1b, 3b, "%1")
        ASM_EXCEPTIONTABLE_ENTRY_EFAULT(2b, 3b, "%1")
-       : "=r" (val), "+r" (ret)
-       : "0" (val), "r" (saddr), "r" (regs->isr)
-       : "r19", "r20" );
+       : "+r" (val), "+r" (ret), "+r" (saddr), "=&r" (shift), "=&r" (temp1)
+       : "r" (regs->isr) );
 #else
-    {
-       unsigned long shift, temp1;
        __asm__ __volatile__  (
-"      zdep    %2,29,2,%3\n"           /* r19=(ofs&3)*8 */
+"      zdep    %2,29,2,%3\n"           /* shift=(ofs&3)*8 */
 "      mtsp    %5, %%sr1\n"
 "      dep     %%r0,31,2,%2\n"
 "1:    ldw     0(%%sr1,%2),%0\n"
@@ -214,7 +212,6 @@ static int emulate_ldd(struct pt_regs *regs, int toreg, int flop)
        ASM_EXCEPTIONTABLE_ENTRY_EFAULT(3b, 4b, "%1")
        : "+r" (val), "+r" (ret), "+r" (saddr), "=&r" (shift), "=&r" (temp1)
        : "r" (regs->isr) );
-    }
 #endif
 
        DPRINTF("val = 0x%llx\n", val);
@@ -399,6 +396,13 @@ void handle_unaligned(struct pt_regs *regs)
 
                if (!unaligned_enabled)
                        goto force_sigbus;
+       } else {
+               static DEFINE_RATELIMIT_STATE(kernel_ratelimit, 5 * HZ, 5);
+               if (!(current->thread.flags & PARISC_UAC_NOPRINT) &&
+                       __ratelimit(&kernel_ratelimit))
+                       pr_warn("Kernel: unaligned access to " RFMT " in %pS "
+                                       "(iir " RFMT ")\n",
+                               regs->ior, (void *)regs->iaoq[0], regs->iir);
        }
 
        /* handle modification - OK, it's ugly, see the instruction manual */
index 63d339c81c141fdc1aa14fe189db192057f03683..e3a3a19b966b013d5bf52b9d0d47c8c3abe1d4e6 100644 (file)
@@ -15,7 +15,7 @@
  *     Double Floating-point Square Root
  *
  *  External Interfaces:
- *     dbl_fsqrt(srcptr,nullptr,dstptr,status)
+ *     dbl_fsqrt(srcptr,_nullptr,dstptr,status)
  *
  *  Internal Interfaces:
  *
@@ -37,7 +37,7 @@
 unsigned int
 dbl_fsqrt(
            dbl_floating_point *srcptr,
-           unsigned int *nullptr,
+           unsigned int *_nullptr,
            dbl_floating_point *dstptr,
            unsigned int *status)
 {
index 0530e6127797471faff378604abdaa9c1a6a8485..61e489704c86b831a341ffdb24d2f7e5d05ce857 100644 (file)
@@ -16,8 +16,8 @@
  *     Double Floating-point to Single Floating-point
  *
  *  External Interfaces:
- *     dbl_to_sgl_fcnvff(srcptr,nullptr,dstptr,status)
- *     sgl_to_dbl_fcnvff(srcptr,nullptr,dstptr,status)
+ *     dbl_to_sgl_fcnvff(srcptr,_nullptr,dstptr,status)
+ *     sgl_to_dbl_fcnvff(srcptr,_nullptr,dstptr,status)
  *
  *  Internal Interfaces:
  *
@@ -40,7 +40,7 @@
 int
 sgl_to_dbl_fcnvff(
            sgl_floating_point *srcptr,
-           unsigned int *nullptr,
+           unsigned int *_nullptr,
            dbl_floating_point *dstptr,
            unsigned int *status)
 {
@@ -127,7 +127,7 @@ sgl_to_dbl_fcnvff(
 int
 dbl_to_sgl_fcnvff(
                    dbl_floating_point *srcptr,
-                   unsigned int *nullptr,
+                   unsigned int *_nullptr,
                    sgl_floating_point *dstptr,
                    unsigned int *status)
 {
index c971618a6f3ca82965b33aaa516912a1060be787..c31790ceecca03e38b683fbf3136c1289fb066c4 100644 (file)
  *     Floating-point to Unsigned Fixed-point Converts
  *
  *  External Interfaces:
- *     dbl_to_dbl_fcnvfu(srcptr,nullptr,dstptr,status)
- *     dbl_to_sgl_fcnvfu(srcptr,nullptr,dstptr,status)
- *     sgl_to_dbl_fcnvfu(srcptr,nullptr,dstptr,status)
- *     sgl_to_sgl_fcnvfu(srcptr,nullptr,dstptr,status)
+ *     dbl_to_dbl_fcnvfu(srcptr,_nullptr,dstptr,status)
+ *     dbl_to_sgl_fcnvfu(srcptr,_nullptr,dstptr,status)
+ *     sgl_to_dbl_fcnvfu(srcptr,_nullptr,dstptr,status)
+ *     sgl_to_sgl_fcnvfu(srcptr,_nullptr,dstptr,status)
  *
  *  Internal Interfaces:
  *
@@ -45,7 +45,7 @@
 int
 sgl_to_sgl_fcnvfu(
                        sgl_floating_point *srcptr,
-                       unsigned int *nullptr,
+                       unsigned int *_nullptr,
                        unsigned int *dstptr,
                        unsigned int *status)
 {
@@ -166,7 +166,7 @@ sgl_to_sgl_fcnvfu(
 int
 sgl_to_dbl_fcnvfu(
                    sgl_floating_point *srcptr,
-                   unsigned int *nullptr,
+                   unsigned int *_nullptr,
                    dbl_unsigned *dstptr,
                    unsigned int *status)
 {
@@ -285,7 +285,7 @@ sgl_to_dbl_fcnvfu(
  */
 /*ARGSUSED*/
 int
-dbl_to_sgl_fcnvfu (dbl_floating_point * srcptr, unsigned int *nullptr,
+dbl_to_sgl_fcnvfu (dbl_floating_point * srcptr, unsigned int *_nullptr,
                   unsigned int *dstptr, unsigned int *status)
 {
        register unsigned int srcp1, srcp2, result;
@@ -408,7 +408,7 @@ dbl_to_sgl_fcnvfu (dbl_floating_point * srcptr, unsigned int *nullptr,
  */
 /*ARGSUSED*/
 int
-dbl_to_dbl_fcnvfu (dbl_floating_point * srcptr, unsigned int *nullptr,
+dbl_to_dbl_fcnvfu (dbl_floating_point * srcptr, unsigned int *_nullptr,
                   dbl_unsigned * dstptr, unsigned int *status)
 {
        register int src_exponent;
index 5b657f852578186a35a4c574242e2151893c20ef..2cf1daf3b7ad6e4a003ddd606bea563717dc2808 100644 (file)
  *     Floating-point to Unsigned Fixed-point Converts with Truncation
  *
  *  External Interfaces:
- *     dbl_to_dbl_fcnvfut(srcptr,nullptr,dstptr,status)
- *     dbl_to_sgl_fcnvfut(srcptr,nullptr,dstptr,status)
- *     sgl_to_dbl_fcnvfut(srcptr,nullptr,dstptr,status)
- *     sgl_to_sgl_fcnvfut(srcptr,nullptr,dstptr,status)
+ *     dbl_to_dbl_fcnvfut(srcptr,_nullptr,dstptr,status)
+ *     dbl_to_sgl_fcnvfut(srcptr,_nullptr,dstptr,status)
+ *     sgl_to_dbl_fcnvfut(srcptr,_nullptr,dstptr,status)
+ *     sgl_to_sgl_fcnvfut(srcptr,_nullptr,dstptr,status)
  *
  *  Internal Interfaces:
  *
@@ -44,7 +44,7 @@
  */
 /*ARGSUSED*/
 int
-sgl_to_sgl_fcnvfut (sgl_floating_point * srcptr, unsigned int *nullptr,
+sgl_to_sgl_fcnvfut (sgl_floating_point * srcptr, unsigned int *_nullptr,
                    unsigned int *dstptr, unsigned int *status)
 {
        register unsigned int src, result;
@@ -113,7 +113,7 @@ sgl_to_sgl_fcnvfut (sgl_floating_point * srcptr, unsigned int *nullptr,
  */
 /*ARGSUSED*/
 int
-sgl_to_dbl_fcnvfut (sgl_floating_point * srcptr, unsigned int *nullptr,
+sgl_to_dbl_fcnvfut (sgl_floating_point * srcptr, unsigned int *_nullptr,
                    dbl_unsigned * dstptr, unsigned int *status)
 {
        register int src_exponent;
@@ -183,7 +183,7 @@ sgl_to_dbl_fcnvfut (sgl_floating_point * srcptr, unsigned int *nullptr,
  */
 /*ARGSUSED*/
 int
-dbl_to_sgl_fcnvfut (dbl_floating_point * srcptr, unsigned int *nullptr,
+dbl_to_sgl_fcnvfut (dbl_floating_point * srcptr, unsigned int *_nullptr,
                    unsigned int *dstptr, unsigned int *status)
 {
        register unsigned int srcp1, srcp2, result;
@@ -252,7 +252,7 @@ dbl_to_sgl_fcnvfut (dbl_floating_point * srcptr, unsigned int *nullptr,
  */
 /*ARGSUSED*/
 int
-dbl_to_dbl_fcnvfut (dbl_floating_point * srcptr, unsigned int *nullptr,
+dbl_to_dbl_fcnvfut (dbl_floating_point * srcptr, unsigned int *_nullptr,
                    dbl_unsigned * dstptr, unsigned int *status)
 {
        register int src_exponent;
index 5e153078d8035a3d4c13ae0e5b961bd897f7bc31..99bd61479452222a53c3a02c039dc058eaf180b5 100644 (file)
  *     Double Floating-point to Double Fixed-point 
  *
  *  External Interfaces:
- *     dbl_to_dbl_fcnvfx(srcptr,nullptr,dstptr,status)
- *     dbl_to_sgl_fcnvfx(srcptr,nullptr,dstptr,status)
- *     sgl_to_dbl_fcnvfx(srcptr,nullptr,dstptr,status)
- *     sgl_to_sgl_fcnvfx(srcptr,nullptr,dstptr,status)
+ *     dbl_to_dbl_fcnvfx(srcptr,_nullptr,dstptr,status)
+ *     dbl_to_sgl_fcnvfx(srcptr,_nullptr,dstptr,status)
+ *     sgl_to_dbl_fcnvfx(srcptr,_nullptr,dstptr,status)
+ *     sgl_to_sgl_fcnvfx(srcptr,_nullptr,dstptr,status)
  *
  *  Internal Interfaces:
  *
@@ -44,7 +44,7 @@
 int
 sgl_to_sgl_fcnvfx(
                    sgl_floating_point *srcptr,
-                   sgl_floating_point *nullptr,
+                   sgl_floating_point *_nullptr,
                    int *dstptr,
                    sgl_floating_point *status)
 {
@@ -141,7 +141,7 @@ sgl_to_sgl_fcnvfx(
 int
 sgl_to_dbl_fcnvfx(
                sgl_floating_point *srcptr,
-               unsigned int *nullptr,
+               unsigned int *_nullptr,
                dbl_integer *dstptr,
                unsigned int *status)
 {
@@ -262,7 +262,7 @@ sgl_to_dbl_fcnvfx(
 int
 dbl_to_sgl_fcnvfx(
                    dbl_floating_point *srcptr,
-                   unsigned int *nullptr,
+                   unsigned int *_nullptr,
                    int *dstptr,
                    unsigned int *status)
 {
@@ -373,7 +373,7 @@ dbl_to_sgl_fcnvfx(
 int
 dbl_to_dbl_fcnvfx(
                    dbl_floating_point *srcptr,
-                   unsigned int *nullptr,
+                   unsigned int *_nullptr,
                    dbl_integer *dstptr,
                    unsigned int *status)
 {
index ebec31e40d01a4a867117df42009562f3fe63def..3b7cc62257d08a570daa8ac910f515f4e266040a 100644 (file)
  *     Double Floating-point to Double Fixed-point /w truncated result
  *
  *  External Interfaces:
- *     dbl_to_dbl_fcnvfxt(srcptr,nullptr,dstptr,status)
- *     dbl_to_sgl_fcnvfxt(srcptr,nullptr,dstptr,status)
- *     sgl_to_dbl_fcnvfxt(srcptr,nullptr,dstptr,status)
- *     sgl_to_sgl_fcnvfxt(srcptr,nullptr,dstptr,status)
+ *     dbl_to_dbl_fcnvfxt(srcptr,_nullptr,dstptr,status)
+ *     dbl_to_sgl_fcnvfxt(srcptr,_nullptr,dstptr,status)
+ *     sgl_to_dbl_fcnvfxt(srcptr,_nullptr,dstptr,status)
+ *     sgl_to_sgl_fcnvfxt(srcptr,_nullptr,dstptr,status)
  *
  *  Internal Interfaces:
  *
@@ -45,7 +45,7 @@
 int
 sgl_to_sgl_fcnvfxt(
                    sgl_floating_point *srcptr,
-                   unsigned int *nullptr,
+                   unsigned int *_nullptr,
                    int *dstptr,
                    unsigned int *status)
 {
@@ -109,7 +109,7 @@ sgl_to_sgl_fcnvfxt(
 int
 sgl_to_dbl_fcnvfxt(
                    sgl_floating_point *srcptr,
-                   unsigned int *nullptr,
+                   unsigned int *_nullptr,
                    dbl_integer *dstptr,
                    unsigned int *status)
 {
@@ -183,7 +183,7 @@ sgl_to_dbl_fcnvfxt(
 int
 dbl_to_sgl_fcnvfxt(
                        dbl_floating_point *srcptr,
-                       unsigned int *nullptr,
+                       unsigned int *_nullptr,
                        int *dstptr,
                        unsigned int *status)
 {
@@ -248,7 +248,7 @@ dbl_to_sgl_fcnvfxt(
 int
 dbl_to_dbl_fcnvfxt(
                        dbl_floating_point *srcptr,
-                       unsigned int *nullptr,
+                       unsigned int *_nullptr,
                        dbl_integer *dstptr,
                        unsigned int *status)
 {
index c54978a0ace194dafbff4b9f38dfd2778e2743cd..c166feb570454b24545a86eb6c620bebdba0c5b9 100644 (file)
  *     Fixed point to Floating-point Converts
  *
  *  External Interfaces:
- *     dbl_to_dbl_fcnvuf(srcptr,nullptr,dstptr,status)
- *     dbl_to_sgl_fcnvuf(srcptr,nullptr,dstptr,status)
- *     sgl_to_dbl_fcnvuf(srcptr,nullptr,dstptr,status)
- *     sgl_to_sgl_fcnvuf(srcptr,nullptr,dstptr,status)
+ *     dbl_to_dbl_fcnvuf(srcptr,_nullptr,dstptr,status)
+ *     dbl_to_sgl_fcnvuf(srcptr,_nullptr,dstptr,status)
+ *     sgl_to_dbl_fcnvuf(srcptr,_nullptr,dstptr,status)
+ *     sgl_to_sgl_fcnvuf(srcptr,_nullptr,dstptr,status)
  *
  *  Internal Interfaces:
  *
@@ -45,7 +45,7 @@
 int
 sgl_to_sgl_fcnvuf(
                        unsigned int *srcptr,
-                       unsigned int *nullptr,
+                       unsigned int *_nullptr,
                        sgl_floating_point *dstptr,
                        unsigned int *status)
 {
@@ -104,7 +104,7 @@ sgl_to_sgl_fcnvuf(
 int
 sgl_to_dbl_fcnvuf(
                        unsigned int *srcptr,
-                       unsigned int *nullptr,
+                       unsigned int *_nullptr,
                        dbl_floating_point *dstptr,
                        unsigned int *status)
 {
@@ -145,7 +145,7 @@ sgl_to_dbl_fcnvuf(
 int
 dbl_to_sgl_fcnvuf(
                        dbl_unsigned *srcptr,
-                       unsigned int *nullptr,
+                       unsigned int *_nullptr,
                        sgl_floating_point *dstptr,
                        unsigned int *status)
 {
@@ -227,7 +227,7 @@ dbl_to_sgl_fcnvuf(
 int
 dbl_to_dbl_fcnvuf(
                    dbl_unsigned *srcptr,
-                   unsigned int *nullptr,
+                   unsigned int *_nullptr,
                    dbl_floating_point *dstptr,
                    unsigned int *status)
 {
index 69401797146b906cddee6bb31be45c322ae91011..11bc1e8a13aa9a8ee095008ffd51376c93ecab6b 100644 (file)
  *     Double Fixed-point to Double Floating-point 
  *
  *  External Interfaces:
- *     dbl_to_dbl_fcnvxf(srcptr,nullptr,dstptr,status)
- *     dbl_to_sgl_fcnvxf(srcptr,nullptr,dstptr,status)
- *     sgl_to_dbl_fcnvxf(srcptr,nullptr,dstptr,status)
- *     sgl_to_sgl_fcnvxf(srcptr,nullptr,dstptr,status)
+ *     dbl_to_dbl_fcnvxf(srcptr,_nullptr,dstptr,status)
+ *     dbl_to_sgl_fcnvxf(srcptr,_nullptr,dstptr,status)
+ *     sgl_to_dbl_fcnvxf(srcptr,_nullptr,dstptr,status)
+ *     sgl_to_sgl_fcnvxf(srcptr,_nullptr,dstptr,status)
  *
  *  Internal Interfaces:
  *
@@ -44,7 +44,7 @@
 int
 sgl_to_sgl_fcnvxf(
                    int *srcptr,
-                   unsigned int *nullptr,
+                   unsigned int *_nullptr,
                    sgl_floating_point *dstptr,
                    unsigned int *status)
 {
@@ -115,7 +115,7 @@ sgl_to_sgl_fcnvxf(
 int
 sgl_to_dbl_fcnvxf(
                    int *srcptr,
-                   unsigned int *nullptr,
+                   unsigned int *_nullptr,
                    dbl_floating_point *dstptr,
                    unsigned int *status)
 {
@@ -166,7 +166,7 @@ sgl_to_dbl_fcnvxf(
 int
 dbl_to_sgl_fcnvxf(
                        dbl_integer *srcptr,
-                       unsigned int *nullptr,
+                       unsigned int *_nullptr,
                        sgl_floating_point *dstptr,
                        unsigned int *status)
 {
@@ -271,7 +271,7 @@ dbl_to_sgl_fcnvxf(
 int
 dbl_to_dbl_fcnvxf(
                    dbl_integer *srcptr,
-                   unsigned int *nullptr,
+                   unsigned int *_nullptr,
                    dbl_floating_point *dstptr,
                    unsigned int *status)
 {
index 0b0e8493e08a7044a1cd1058b08535391bc9d551..825d89650c2d86bc1d8237ae2e2169a25de59b71 100644 (file)
@@ -14,8 +14,8 @@
  *     Quad Floating-point Round to Integer (returns unimplemented)
  *
  *  External Interfaces:
- *     dbl_frnd(srcptr,nullptr,dstptr,status)
- *     sgl_frnd(srcptr,nullptr,dstptr,status)
+ *     dbl_frnd(srcptr,_nullptr,dstptr,status)
+ *     sgl_frnd(srcptr,_nullptr,dstptr,status)
  *
  * END_DESC
 */
@@ -33,7 +33,7 @@
 /*ARGSUSED*/
 int
 sgl_frnd(sgl_floating_point *srcptr,
-       unsigned int *nullptr,
+       unsigned int *_nullptr,
        sgl_floating_point *dstptr,
        unsigned int *status)
 {
@@ -138,7 +138,7 @@ sgl_frnd(sgl_floating_point *srcptr,
 int
 dbl_frnd(
        dbl_floating_point *srcptr,
-       unsigned int *nullptr,
+       unsigned int *_nullptr,
        dbl_floating_point *dstptr,
        unsigned int *status)
 {
index bd6a84f468d8f9d90daedf107da4b1d4c44862fb..8e9e023e7b2ee080f76a22c5d98dd4009639389c 100644 (file)
@@ -15,7 +15,7 @@
  *     Single Floating-point Square Root
  *
  *  External Interfaces:
- *     sgl_fsqrt(srcptr,nullptr,dstptr,status)
+ *     sgl_fsqrt(srcptr,_nullptr,dstptr,status)
  *
  *  Internal Interfaces:
  *
@@ -37,7 +37,7 @@
 unsigned int
 sgl_fsqrt(
     sgl_floating_point *srcptr,
-    unsigned int *nullptr,
+    unsigned int *_nullptr,
     sgl_floating_point *dstptr,
     unsigned int *status)
 {
index a91cb070ca4a6e08f19ddd717c7eb6fb2938a6a0..a68b9e637eda04e343a99b5c5bc23ad692d0dba9 100644 (file)
@@ -333,7 +333,6 @@ config PANIC_TIMEOUT
 config COMPAT
        bool "Enable support for 32bit binaries"
        depends on PPC64
-       depends on !CC_IS_CLANG || CLANG_VERSION >= 120000
        default y if !CPU_LITTLE_ENDIAN
        select ARCH_WANT_OLD_COMPAT_IPC
        select COMPAT_OLD_SIGACTION
@@ -608,6 +607,11 @@ config PPC64_SUPPORTS_MEMORY_FAILURE
 config ARCH_SUPPORTS_KEXEC
        def_bool PPC_BOOK3S || PPC_E500 || (44x && !SMP)
 
+config ARCH_SELECTS_KEXEC
+       def_bool y
+       depends on KEXEC
+       select CRASH_DUMP
+
 config ARCH_SUPPORTS_KEXEC_FILE
        def_bool PPC64
 
@@ -618,6 +622,7 @@ config ARCH_SELECTS_KEXEC_FILE
        def_bool y
        depends on KEXEC_FILE
        select KEXEC_ELF
+       select CRASH_DUMP
        select HAVE_IMA_KEXEC if IMA
 
 config PPC64_BIG_ENDIAN_ELF_ABI_V2
@@ -690,7 +695,6 @@ config ARCH_SELECTS_CRASH_DUMP
 config FA_DUMP
        bool "Firmware-assisted dump"
        depends on PPC64 && (PPC_RTAS || PPC_POWERNV)
-       select CRASH_CORE
        select CRASH_DUMP
        help
          A robust mechanism to get reliable kernel crash dump with
index 051247027da0bafa728f98e2e858aef0b05d8fdf..65261cbe5bfdbf79c90414a5ba3a158ae58abafe 100644 (file)
@@ -144,11 +144,11 @@ CFLAGS-$(CONFIG_PPC64)    += $(call cc-option,-mno-pointers-to-nested-functions)
 CFLAGS-$(CONFIG_PPC64) += $(call cc-option,-mlong-double-128)
 
 # Clang unconditionally reserves r2 on ppc32 and does not support the flag
-# https://bugs.llvm.org/show_bug.cgi?id=39555
+# https://llvm.org/pr39555
 CFLAGS-$(CONFIG_PPC32) := $(call cc-option, -ffixed-r2)
 
 # Clang doesn't support -mmultiple / -mno-multiple
-# https://bugs.llvm.org/show_bug.cgi?id=39556
+# https://llvm.org/pr39556
 CFLAGS-$(CONFIG_PPC32) += $(call cc-option, $(MULTIPLEWORD))
 
 CFLAGS-$(CONFIG_PPC32) += $(call cc-option,-mno-readonly-in-sdata)
@@ -219,18 +219,6 @@ KBUILD_CFLAGS += -fno-asynchronous-unwind-tables
 # often slow when they are implemented at all
 KBUILD_CFLAGS          += $(call cc-option,-mno-string)
 
-cpu-as-$(CONFIG_ALTIVEC)       += $(call as-option,-Wa$(comma)-maltivec)
-
-# When using '-many -mpower4' gas will first try and find a matching power4
-# mnemonic and failing that it will allow any valid mnemonic that GAS knows
-# about. GCC will pass -many to GAS when assembling, clang does not.
-# LLVM IAS doesn't understand either flag: https://github.com/ClangBuiltLinux/linux/issues/675
-# but LLVM IAS only supports ISA >= 2.06 for Book3S 64 anyway...
-cpu-as-$(CONFIG_PPC_BOOK3S_64) += $(call as-option,-Wa$(comma)-mpower4) $(call as-option,-Wa$(comma)-many)
-
-KBUILD_AFLAGS += $(cpu-as-y)
-KBUILD_CFLAGS += $(cpu-as-y)
-
 KBUILD_AFLAGS += $(aflags-y)
 KBUILD_CFLAGS += $(cflags-y)
 
@@ -314,6 +302,26 @@ ppc32_allmodconfig:
        $(Q)$(MAKE) KCONFIG_ALLCONFIG=$(srctree)/arch/powerpc/configs/book3s_32.config \
                -f $(srctree)/Makefile allmodconfig
 
+generated_configs += ppc40x_allmodconfig
+ppc40x_allmodconfig:
+       $(Q)$(MAKE) KCONFIG_ALLCONFIG=$(srctree)/arch/powerpc/configs/40x.config \
+               -f $(srctree)/Makefile allmodconfig
+
+generated_configs += ppc44x_allmodconfig
+ppc44x_allmodconfig:
+       $(Q)$(MAKE) KCONFIG_ALLCONFIG=$(srctree)/arch/powerpc/configs/44x.config \
+               -f $(srctree)/Makefile allmodconfig
+
+generated_configs += ppc8xx_allmodconfig
+ppc8xx_allmodconfig:
+       $(Q)$(MAKE) KCONFIG_ALLCONFIG=$(srctree)/arch/powerpc/configs/8xx.config \
+               -f $(srctree)/Makefile allmodconfig
+
+generated_configs += ppc85xx_allmodconfig
+ppc85xx_allmodconfig:
+       $(Q)$(MAKE) KCONFIG_ALLCONFIG=$(srctree)/arch/powerpc/configs/85xx-32bit.config \
+               -f $(srctree)/Makefile allmodconfig
+
 generated_configs += ppc_defconfig
 ppc_defconfig:
        $(call merge_into_defconfig,book3s_32.config,)
index 267d6524caac47d5897fec42fa1873d0740bc21a..d07796fdf91aa729610b248c7f722b7834568aeb 100644 (file)
@@ -112,8 +112,11 @@ static void *simple_realloc(void *ptr, unsigned long size)
                return ptr;
 
        new = simple_malloc(size);
-       memcpy(new, ptr, p->size);
-       simple_free(ptr);
+       if (new) {
+               memcpy(new, ptr, p->size);
+               simple_free(ptr);
+       }
+
        return new;
 }
 
diff --git a/arch/powerpc/configs/40x.config b/arch/powerpc/configs/40x.config
new file mode 100644 (file)
index 0000000..82a9d58
--- /dev/null
@@ -0,0 +1,2 @@
+CONFIG_PPC64=n
+CONFIG_40x=y
diff --git a/arch/powerpc/configs/44x.config b/arch/powerpc/configs/44x.config
new file mode 100644 (file)
index 0000000..79b7b19
--- /dev/null
@@ -0,0 +1,2 @@
+CONFIG_PPC64=n
+CONFIG_44x=y
index 6b8894d727a26a9b21144188ef8c7ccd62d21174..a85310bcb1fdd442ed4a2e616ee86f1a786c8db3 100644 (file)
@@ -1,3 +1,4 @@
+CONFIG_PPC64=n
 CONFIG_HIGHMEM=y
 CONFIG_KEXEC=y
 CONFIG_PPC_85xx=y
diff --git a/arch/powerpc/configs/8xx.config b/arch/powerpc/configs/8xx.config
new file mode 100644 (file)
index 0000000..7eb3ffb
--- /dev/null
@@ -0,0 +1,2 @@
+CONFIG_PPC64=n
+CONFIG_PPC_8xx=y
index aa8bb0208bcc8f05cf4d1cbe3df49853c48a351a..2b175ddf82f0bc61c3d811a528aae13eb79ce19a 100644 (file)
@@ -24,7 +24,6 @@ CONFIG_PS3_VRAM=m
 CONFIG_PS3_LPM=m
 # CONFIG_PPC_OF_BOOT_TRAMPOLINE is not set
 CONFIG_KEXEC=y
-# CONFIG_PPC64_BIG_ENDIAN_ELF_ABI_V2 is not set
 CONFIG_PPC_4K_PAGES=y
 CONFIG_SCHED_SMT=y
 CONFIG_PM=y
index 6fc2248ca561668f8200135a68caccc5d452aa42..1e201b7ae2fc6076027d108f96db2f79e8ac880e 100644 (file)
@@ -137,4 +137,24 @@ config CRYPTO_POLY1305_P10
          - Power10 or later
          - Little-endian
 
+config CRYPTO_DEV_VMX
+        bool "Support for VMX cryptographic acceleration instructions"
+        depends on PPC64 && VSX
+        help
+          Support for VMX cryptographic acceleration instructions.
+
+config CRYPTO_DEV_VMX_ENCRYPT
+       tristate "Encryption acceleration support on P8 CPU"
+       depends on CRYPTO_DEV_VMX
+       select CRYPTO_AES
+       select CRYPTO_CBC
+       select CRYPTO_CTR
+       select CRYPTO_GHASH
+       select CRYPTO_XTS
+       default m
+       help
+         Support for VMX cryptographic acceleration instructions on Power8 CPU.
+         This module supports acceleration for AES and GHASH in hardware. If you
+         choose 'M' here, this module will be called vmx-crypto.
+
 endmenu
index ebdac1b9eb9af30b63ddc4303808c298c47113f2..fca0e9739869668381c122b64c9fa8433b2137d9 100644 (file)
@@ -16,6 +16,7 @@ obj-$(CONFIG_CRYPTO_VPMSUM_TESTER) += crc-vpmsum_test.o
 obj-$(CONFIG_CRYPTO_AES_GCM_P10) += aes-gcm-p10-crypto.o
 obj-$(CONFIG_CRYPTO_CHACHA20_P10) += chacha-p10-crypto.o
 obj-$(CONFIG_CRYPTO_POLY1305_P10) += poly1305-p10-crypto.o
+obj-$(CONFIG_CRYPTO_DEV_VMX_ENCRYPT) += vmx-crypto.o
 
 aes-ppc-spe-y := aes-spe-core.o aes-spe-keys.o aes-tab-4k.o aes-spe-modes.o aes-spe-glue.o
 md5-ppc-y := md5-asm.o md5-glue.o
@@ -27,14 +28,29 @@ crct10dif-vpmsum-y := crct10dif-vpmsum_asm.o crct10dif-vpmsum_glue.o
 aes-gcm-p10-crypto-y := aes-gcm-p10-glue.o aes-gcm-p10.o ghashp10-ppc.o aesp10-ppc.o
 chacha-p10-crypto-y := chacha-p10-glue.o chacha-p10le-8x.o
 poly1305-p10-crypto-y := poly1305-p10-glue.o poly1305-p10le_64.o
+vmx-crypto-objs := vmx.o aesp8-ppc.o ghashp8-ppc.o aes.o aes_cbc.o aes_ctr.o aes_xts.o ghash.o
+
+ifeq ($(CONFIG_CPU_LITTLE_ENDIAN),y)
+override flavour := linux-ppc64le
+else
+ifdef CONFIG_PPC64_ELF_ABI_V2
+override flavour := linux-ppc64-elfv2
+else
+override flavour := linux-ppc64
+endif
+endif
 
 quiet_cmd_perl = PERL    $@
-      cmd_perl = $(PERL) $< $(if $(CONFIG_CPU_LITTLE_ENDIAN), linux-ppc64le, linux-ppc64) > $@
+      cmd_perl = $(PERL) $< $(flavour) > $@
 
-targets += aesp10-ppc.S ghashp10-ppc.S
+targets += aesp10-ppc.S ghashp10-ppc.S aesp8-ppc.S ghashp8-ppc.S
 
 $(obj)/aesp10-ppc.S $(obj)/ghashp10-ppc.S: $(obj)/%.S: $(src)/%.pl FORCE
        $(call if_changed,perl)
 
+$(obj)/aesp8-ppc.S $(obj)/ghashp8-ppc.S: $(obj)/%.S: $(src)/%.pl FORCE
+       $(call if_changed,perl)
+
 OBJECT_FILES_NON_STANDARD_aesp10-ppc.o := y
 OBJECT_FILES_NON_STANDARD_ghashp10-ppc.o := y
+OBJECT_FILES_NON_STANDARD_aesp8-ppc.o := y
index 6e70ae51163189449205a54dde22ea966df655de..faf3e3b4e4b2be4567739b9ea3b8534e3f784ebd 100644 (file)
@@ -269,8 +269,6 @@ int hash__create_section_mapping(unsigned long start, unsigned long end,
                                 int nid, pgprot_t prot);
 int hash__remove_section_mapping(unsigned long start, unsigned long end);
 
-void hash__kernel_map_pages(struct page *page, int numpages, int enable);
-
 #endif /* !__ASSEMBLY__ */
 #endif /* __KERNEL__ */
 #endif /* _ASM_POWERPC_BOOK3S_64_HASH_H */
index 2fce3498b000e29541c33138fc625dfe7b95d7ab..ced7ee8b42fc354b48cc9d03ebbd295433024246 100644 (file)
@@ -45,9 +45,9 @@ static inline int hugepd_ok(hugepd_t hpd)
 /*
  * This should never get called
  */
-static inline int get_hugepd_cache_index(int index)
+static __always_inline int get_hugepd_cache_index(int index)
 {
-       BUG();
+       BUILD_BUG();
 }
 
 #endif /* CONFIG_HUGETLB_PAGE */
index 927d585652bc75c2e9d798ed439e5274dc7327ef..fac5615e6bc5756857a5f980c31ca14331479522 100644 (file)
@@ -1027,16 +1027,6 @@ static inline void vmemmap_remove_mapping(unsigned long start,
 }
 #endif
 
-#if defined(CONFIG_DEBUG_PAGEALLOC) || defined(CONFIG_KFENCE)
-static inline void __kernel_map_pages(struct page *page, int numpages, int enable)
-{
-       if (radix_enabled())
-               radix__kernel_map_pages(page, numpages, enable);
-       else
-               hash__kernel_map_pages(page, numpages, enable);
-}
-#endif
-
 static inline pte_t pmd_pte(pmd_t pmd)
 {
        return __pte_raw(pmd_raw(pmd));
@@ -1157,20 +1147,6 @@ pud_hugepage_update(struct mm_struct *mm, unsigned long addr, pud_t *pudp,
        return pud_val(*pudp);
 }
 
-/*
- * returns true for pmd migration entries, THP, devmap, hugetlb
- * But compile time dependent on THP config
- */
-static inline int pmd_large(pmd_t pmd)
-{
-       return !!(pmd_raw(pmd) & cpu_to_be64(_PAGE_PTE));
-}
-
-static inline int pud_large(pud_t pud)
-{
-       return !!(pud_raw(pud) & cpu_to_be64(_PAGE_PTE));
-}
-
 /*
  * For radix we should always find H_PAGE_HASHPTE zero. Hence
  * the below will work for radix too
@@ -1451,18 +1427,16 @@ static inline bool is_pte_rw_upgrade(unsigned long old_val, unsigned long new_va
 }
 
 /*
- * Like pmd_huge() and pmd_large(), but works regardless of config options
+ * Like pmd_huge(), but works regardless of config options
  */
-#define pmd_is_leaf pmd_is_leaf
-#define pmd_leaf pmd_is_leaf
-static inline bool pmd_is_leaf(pmd_t pmd)
+#define pmd_leaf pmd_leaf
+static inline bool pmd_leaf(pmd_t pmd)
 {
        return !!(pmd_raw(pmd) & cpu_to_be64(_PAGE_PTE));
 }
 
-#define pud_is_leaf pud_is_leaf
-#define pud_leaf pud_is_leaf
-static inline bool pud_is_leaf(pud_t pud)
+#define pud_leaf pud_leaf
+static inline bool pud_leaf(pud_t pud)
 {
        return !!(pud_raw(pud) & cpu_to_be64(_PAGE_PTE));
 }
index 357e23a403d3413108e9084f07514c643d720774..8f55ff74bb680e4a21a40e022035528ab271b777 100644 (file)
@@ -362,8 +362,6 @@ int radix__create_section_mapping(unsigned long start, unsigned long end,
 int radix__remove_section_mapping(unsigned long start, unsigned long end);
 #endif /* CONFIG_MEMORY_HOTPLUG */
 
-void radix__kernel_map_pages(struct page *page, int numpages, int enable);
-
 #ifdef CONFIG_ARCH_WANT_OPTIMIZE_DAX_VMEMMAP
 #define vmemmap_can_optimize vmemmap_can_optimize
 bool vmemmap_can_optimize(struct vmem_altmap *altmap, struct dev_pagemap *pgmap);
index 8765d5158324e510c253505a1fd9aa9fe392199b..07a204d210344204a00a0d793077e868a4d664df 100644 (file)
@@ -454,6 +454,9 @@ static inline void cpu_feature_keys_init(void) { }
            CPU_FTR_ARCH_300 | CPU_FTR_ARCH_31 | \
            CPU_FTR_DAWR | CPU_FTR_DAWR1 | \
            CPU_FTR_DEXCR_NPHIE)
+
+#define CPU_FTRS_POWER11       CPU_FTRS_POWER10
+
 #define CPU_FTRS_CELL  (CPU_FTR_LWSYNC | \
            CPU_FTR_PPCAS_ARCH_V2 | CPU_FTR_CTRL | \
            CPU_FTR_ALTIVEC_COMP | CPU_FTR_MMCRA | CPU_FTR_SMT | \
@@ -542,19 +545,20 @@ enum {
 #define CPU_FTRS_DT_CPU_BASE   (~0ul)
 #endif
 
+/* pseries may disable DBELL with ibm,pi-features */
 #ifdef CONFIG_CPU_LITTLE_ENDIAN
 #define CPU_FTRS_ALWAYS \
-           (CPU_FTRS_POSSIBLE & ~CPU_FTR_HVMODE & CPU_FTRS_POWER7 & \
-            CPU_FTRS_POWER8E & CPU_FTRS_POWER8 & CPU_FTRS_POWER9 & \
-            CPU_FTRS_POWER9_DD2_1 & CPU_FTRS_POWER9_DD2_2 & \
+           (CPU_FTRS_POSSIBLE & ~CPU_FTR_HVMODE & ~CPU_FTR_DBELL & \
+            CPU_FTRS_POWER7 & CPU_FTRS_POWER8E & CPU_FTRS_POWER8 & \
+            CPU_FTRS_POWER9 & CPU_FTRS_POWER9_DD2_1 & CPU_FTRS_POWER9_DD2_2 & \
             CPU_FTRS_POWER10 & CPU_FTRS_DT_CPU_BASE)
 #else
 #define CPU_FTRS_ALWAYS                \
            (CPU_FTRS_PPC970 & CPU_FTRS_POWER5 & \
             CPU_FTRS_POWER6 & CPU_FTRS_POWER7 & CPU_FTRS_CELL & \
             CPU_FTRS_PA6T & CPU_FTRS_POWER8 & CPU_FTRS_POWER8E & \
-            ~CPU_FTR_HVMODE & CPU_FTRS_POSSIBLE & CPU_FTRS_POWER9 & \
-            CPU_FTRS_POWER9_DD2_1 & CPU_FTRS_POWER9_DD2_2 & \
+            ~CPU_FTR_HVMODE & ~CPU_FTR_DBELL & CPU_FTRS_POSSIBLE & \
+            CPU_FTRS_POWER9 & CPU_FTRS_POWER9_DD2_1 & CPU_FTRS_POWER9_DD2_2 & \
             CPU_FTRS_POWER10 & CPU_FTRS_DT_CPU_BASE)
 #endif /* CONFIG_CPU_LITTLE_ENDIAN */
 #endif
index 6f33253a364ac0f8d375fc2aaa5831a387322073..46fe406f461ce13930d583eb647dc6d9f8998ff0 100644 (file)
@@ -48,7 +48,7 @@
 
 struct platform_driver;
 
-extern struct bus_type ibmebus_bus_type;
+extern const struct bus_type ibmebus_bus_type;
 
 int ibmebus_register_driver(struct platform_driver *drv);
 void ibmebus_unregister_driver(struct platform_driver *drv);
index a4196ab1d0167cea664b345dbbcbf8b145d43da6..7b610864b3645eeee706a9be4e91ba8c782558f5 100644 (file)
@@ -97,7 +97,7 @@ DECLARE_STATIC_KEY_FALSE(interrupt_exit_not_reentrant);
 
 static inline bool is_implicit_soft_masked(struct pt_regs *regs)
 {
-       if (regs->msr & MSR_PR)
+       if (user_mode(regs))
                return false;
 
        if (regs->nip >= (unsigned long)__end_soft_masked)
index d31a5ec1550d4b051dfce4c1680b3d62f43af3a7..1862f94335ee89c7c6bff8758b81ac380f506630 100644 (file)
@@ -22,6 +22,7 @@ struct pci_host_bridge;
 struct machdep_calls {
        const char      *name;
        const char      *compatible;
+       const char * const *compatibles;
 #ifdef CONFIG_PPC64
 #ifdef CONFIG_PM
        void            (*iommu_restore)(void);
index 3a07c62973aab6d7625d50051066ce2cc0929b3f..9203ff6acbf6811e56892cfc1cad6ee4acd794e8 100644 (file)
@@ -6,7 +6,7 @@
 #include <linux/of.h>
 #include <linux/platform_device.h>
 
-extern struct bus_type macio_bus_type;
+extern const struct bus_type macio_bus_type;
 
 /* MacIO device driver is defined later */
 struct macio_driver;
@@ -126,7 +126,7 @@ static inline struct pci_dev *macio_get_pci_dev(struct macio_dev *mdev)
 struct macio_driver
 {
        int     (*probe)(struct macio_dev* dev, const struct of_device_id *match);
-       int     (*remove)(struct macio_dev* dev);
+       void    (*remove)(struct macio_dev *dev);
 
        int     (*suspend)(struct macio_dev* dev, pm_message_t state);
        int     (*resume)(struct macio_dev* dev);
index 24241995f7406e7f30b3b0b2b8537fb3f59e85f1..3b72c7ed24cfdeec454af137d99d01c7f718f044 100644 (file)
 #define MMU_FTRS_POWER8                MMU_FTRS_POWER6
 #define MMU_FTRS_POWER9                MMU_FTRS_POWER6
 #define MMU_FTRS_POWER10       MMU_FTRS_POWER6
+#define MMU_FTRS_POWER11       MMU_FTRS_POWER6
 #define MMU_FTRS_CELL          MMU_FTRS_DEFAULT_HPTE_ARCH_V2 | \
                                MMU_FTR_CI_LARGE_PAGE
 #define MMU_FTRS_PA6T          MMU_FTRS_DEFAULT_HPTE_ARCH_V2 | \
index 58353c5bd3fba6e2b1549e05a6b4b8928b1c92af..0c03a98986cdea583ca5235d6e8a24c301e4a323 100644 (file)
@@ -336,7 +336,7 @@ struct mpic
 #endif
 };
 
-extern struct bus_type mpic_subsys;
+extern const struct bus_type mpic_subsys;
 
 /*
  * MPIC flags (passed to mpic_alloc)
index e667d455ecb418a5a96e13464512b0506defeb3e..1d58da9467396f268a1aac8b1d9461e570a97569 100644 (file)
@@ -163,9 +163,7 @@ struct paca_struct {
        u64 kstack;                     /* Saved Kernel stack addr */
        u64 saved_r1;                   /* r1 save for RTAS calls or PM or EE=0 */
        u64 saved_msr;                  /* MSR saved here by enter_rtas */
-#ifdef CONFIG_PPC64
        u64 exit_save_r1;               /* Syscall/interrupt R1 save */
-#endif
 #ifdef CONFIG_PPC_BOOK3E_64
        u16 trap_save;                  /* Used when bad stack is encountered */
 #endif
@@ -214,8 +212,6 @@ struct paca_struct {
        /* Non-maskable exceptions that are not performance critical */
        u64 exnmi[EX_SIZE];     /* used for system reset (nmi) */
        u64 exmc[EX_SIZE];      /* used for machine checks */
-#endif
-#ifdef CONFIG_PPC_BOOK3S_64
        /* Exclusive stacks for system reset and machine check exception. */
        void *nmi_emergency_sp;
        void *mc_emergency_sp;
index 9224f23065fff999768278a219542c37010e7325..239709a2f68eccd525f1d27370b43e6a8ed6edb7 100644 (file)
@@ -41,6 +41,8 @@ struct mm_struct;
 
 #ifndef __ASSEMBLY__
 
+#define PFN_PTE_SHIFT          PTE_RPN_SHIFT
+
 void set_ptes(struct mm_struct *mm, unsigned long addr, pte_t *ptep,
                pte_t pte, unsigned int nr);
 #define set_ptes set_ptes
@@ -99,10 +101,6 @@ void poking_init(void);
 extern unsigned long ioremap_bot;
 extern const pgprot_t protection_map[16];
 
-#ifndef CONFIG_TRANSPARENT_HUGEPAGE
-#define pmd_large(pmd)         0
-#endif
-
 /* can we use this in kvm */
 unsigned long vmalloc_to_phys(void *vmalloc_addr);
 
@@ -180,30 +178,6 @@ static inline void pte_frag_set(mm_context_t *ctx, void *p)
 }
 #endif
 
-#ifndef pmd_is_leaf
-#define pmd_is_leaf pmd_is_leaf
-static inline bool pmd_is_leaf(pmd_t pmd)
-{
-       return false;
-}
-#endif
-
-#ifndef pud_is_leaf
-#define pud_is_leaf pud_is_leaf
-static inline bool pud_is_leaf(pud_t pud)
-{
-       return false;
-}
-#endif
-
-#ifndef p4d_is_leaf
-#define p4d_is_leaf p4d_is_leaf
-static inline bool p4d_is_leaf(p4d_t p4d)
-{
-       return false;
-}
-#endif
-
 #define pmd_pgtable pmd_pgtable
 static inline pgtable_t pmd_pgtable(pmd_t pmd)
 {
index e7792aa135105ab00f7a563c0520b093845eb330..1d1018c1e4820ce62f9d0526f8860c81e1057a9e 100644 (file)
 
 #ifdef CONFIG_PPC64_ELF_ABI_V2
 #define STK_GOT                24
-#define __STK_PARAM(i) (32 + ((i)-3)*8)
+#define STK_PARAM_AREA 32
 #else
 #define STK_GOT                40
-#define __STK_PARAM(i) (48 + ((i)-3)*8)
+#define STK_PARAM_AREA 48
 #endif
+
+#define __STK_PARAM(i) (STK_PARAM_AREA + ((i)-3)*8)
 #define STK_PARAM(i)   __STK_PARAM(__REG_##i)
 
 #ifdef CONFIG_PPC64_ELF_ABI_V2
@@ -506,7 +508,25 @@ END_FTR_SECTION_NESTED(CPU_FTR_CELL_TB_BUG, CPU_FTR_CELL_TB_BUG, 96)
  */
 #define DCBT_BOOK3S_STOP_ALL_STREAM_IDS(scratch)       \
        lis     scratch,0x60000000@h;                   \
-       dcbt    0,scratch,0b01010
+       .machine push;                                  \
+       .machine power4;                                        \
+       dcbt    0,scratch,0b01010;                      \
+       .machine pop;
+
+#define DCBT_SETUP_STREAMS(from, from_parms, to, to_parms, scratch)    \
+       lis     scratch,0x8000; /* GO=1 */                              \
+       clrldi  scratch,scratch,32;                                     \
+       .machine push;                                                  \
+       .machine power4;                                                \
+       /* setup read stream 0 */                                       \
+       dcbt    0,from,0b01000;         /* addr from */                 \
+       dcbt    0,from_parms,0b01010;   /* length and depth from */     \
+       /* setup write stream 1 */                                      \
+       dcbtst  0,to,0b01000;           /* addr to */                   \
+       dcbtst  0,to_parms,0b01010;     /* length and depth to */       \
+       eieio;                                                          \
+       dcbt    0,scratch,0b01010;      /* all streams GO */            \
+       .machine pop;
 
 /*
  * toreal/fromreal/tophys/tovirt macros. 32-bit BookE makes them
index bb47af9054a9545f54bf5080a14bde235ef29a30..d3d1aea009b46b59135fc71c77b3b51576d1e997 100644 (file)
 #define PVR_HX_C2000   0x0066
 #define PVR_POWER9     0x004E
 #define PVR_POWER10    0x0080
+#define PVR_POWER11    0x0082
 #define PVR_BE         0x0070
 #define PVR_PA6T       0x0090
 
 #define PVR_ARCH_207   0x0f000004
 #define PVR_ARCH_300   0x0f000005
 #define PVR_ARCH_31    0x0f000006
+#define PVR_ARCH_31_P11        0x0f000007
 
 /* Macros for setting and retrieving special purpose registers */
 #ifndef __ASSEMBLY__
index a21f529c43d96b548690aeff283820ce07a6e4a1..9893d2001b6801db5b53d802454162d75748cbcf 100644 (file)
 
 #ifndef __ASSEMBLY__
 /* Performance Monitor Registers */
-#define mfpmr(rn)      ({unsigned int rval; \
-                       asm volatile("mfpmr %0," __stringify(rn) \
-                                    : "=r" (rval)); rval;})
-#define mtpmr(rn, v)   asm volatile("mtpmr " __stringify(rn) ",%0" : : "r" (v))
+static __always_inline unsigned int mfpmr(unsigned int rn)
+{
+       unsigned int rval;
+
+       asm (".machine push; "
+            ".machine e300; "
+            "mfpmr %[rval], %[rn];"
+            ".machine pop;"
+            : [rval] "=r" (rval) : [rn] "i" (rn));
+
+       return rval;
+}
+
+static __always_inline void mtpmr(unsigned int rn, unsigned int val)
+{
+       asm (".machine push; "
+            ".machine e300; "
+            "mtpmr %[rn], %[val];"
+            ".machine pop;"
+            : [val] "=r" (val) : [rn] "i" (rn));
+}
 #endif /* __ASSEMBLY__ */
 
 /* Freescale Book E Performance Monitor APU Registers */
index 7ebc807aa8cc85bc8ca6ac6a0c9ef13fa9de4fe6..9a025b776a4b3758a0cb35204687dea807987f55 100644 (file)
@@ -8,6 +8,7 @@
 #define SET_MEMORY_X   3
 #define SET_MEMORY_NP  4       /* Set memory non present */
 #define SET_MEMORY_P   5       /* Set memory present */
+#define SET_MEMORY_ROX 6
 
 int change_memory_attr(unsigned long addr, int numpages, long action);
 
@@ -41,4 +42,10 @@ static inline int set_memory_p(unsigned long addr, int numpages)
        return change_memory_attr(addr, numpages, SET_MEMORY_P);
 }
 
+static inline int set_memory_rox(unsigned long addr, int numpages)
+{
+       return change_memory_attr(addr, numpages, SET_MEMORY_ROX);
+}
+#define set_memory_rox set_memory_rox
+
 #endif
index aaaa576d0e154edc270a4dd9ddd8bd6bb877ee4a..b77927ccb0ab00aae57cc95eac1158739dd3ce5e 100644 (file)
@@ -27,6 +27,7 @@
 
 extern int boot_cpuid;
 extern int boot_cpu_hwid; /* PPC64 only */
+extern int boot_core_hwid;
 extern int spinning_secondaries;
 extern u32 *cpu_to_phys_id;
 extern bool coregroup_enabled;
index aee25e3ebf96017a108fefa46ab95761901ccabc..fc933807ddc840ffa065fcbf3153b87d3357efbc 100644 (file)
@@ -48,6 +48,10 @@ static inline void disable_kernel_fp(void)
 #else
 static inline void save_fpu(struct task_struct *t) { }
 static inline void flush_fp_to_thread(struct task_struct *t) { }
+static inline void enable_kernel_fp(void)
+{
+       BUILD_BUG();
+}
 #endif
 
 #ifdef CONFIG_ALTIVEC
index b3de6102a90779739a598d9784ab9b55ab6e1ee0..1ca7d4c4b90dbf49cb7e002376fc5e73ad65a9ba 100644 (file)
@@ -19,6 +19,8 @@
 
 #include <linux/pagemap.h>
 
+static inline void __tlb_remove_tlb_entry(struct mmu_gather *tlb, pte_t *ptep,
+                                         unsigned long address);
 #define __tlb_remove_tlb_entry __tlb_remove_tlb_entry
 
 #define tlb_flush tlb_flush
index 82cc2c6704e6e94dde79e5fe98331ffb13e04681..d9ac3a4f46e1f86b0d0ec3ae6068de49f731217b 100644 (file)
@@ -267,6 +267,7 @@ TRACE_EVENT_FN(opal_exit,
 );
 #endif
 
+#ifdef CONFIG_PPC_64S_HASH_MMU
 TRACE_EVENT(hash_fault,
 
            TP_PROTO(unsigned long addr, unsigned long access, unsigned long trap),
@@ -286,7 +287,7 @@ TRACE_EVENT(hash_fault,
            TP_printk("hash fault with addr 0x%lx and access = 0x%lx trap = 0x%lx",
                      __entry->addr, __entry->access, __entry->trap)
 );
-
+#endif
 
 TRACE_EVENT(tlbie,
 
index cc9b787627adbc2104fd9edf3e3f84f8456f0cfa..6faf2a9317552eeed67b5f942bc0a363a58db9a9 100644 (file)
@@ -39,7 +39,7 @@
  */
 #define VIO_CMO_MIN_ENT 1562624
 
-extern struct bus_type vio_bus_type;
+extern const struct bus_type vio_bus_type;
 
 struct iommu_table;
 
index 4c69ece52a31e5ef874e4694fc94b2b0f6ec42d5..59ed89890c902bca0011611733f2cc219795bb20 100644 (file)
@@ -7,14 +7,14 @@
 #ifdef CONFIG_HAVE_ARCH_HUGE_VMAP
 
 #define arch_vmap_pud_supported arch_vmap_pud_supported
-static inline bool arch_vmap_pud_supported(pgprot_t prot)
+static __always_inline bool arch_vmap_pud_supported(pgprot_t prot)
 {
        /* HPT does not cope with large pages in the vmalloc area */
        return radix_enabled();
 }
 
 #define arch_vmap_pmd_supported arch_vmap_pmd_supported
-static inline bool arch_vmap_pmd_supported(pgprot_t prot)
+static __always_inline bool arch_vmap_pmd_supported(pgprot_t prot)
 {
        return radix_enabled();
 }
index 9f18fa090f1f1d08179cba6f39f7b832bbd7b95b..1691297a766a9c1a4df9384c4ff02ecd8ce21b92 100644 (file)
@@ -28,7 +28,6 @@
 #define __KVM_HAVE_PPC_SMT
 #define __KVM_HAVE_IRQCHIP
 #define __KVM_HAVE_IRQ_LINE
-#define __KVM_HAVE_GUEST_DEBUG
 
 /* Not always available, but if it is, this is the correct offset.  */
 #define KVM_COALESCED_MMIO_PAGE_OFFSET 1
@@ -733,4 +732,48 @@ struct kvm_ppc_xive_eq {
 #define KVM_XIVE_TIMA_PAGE_OFFSET      0
 #define KVM_XIVE_ESB_PAGE_OFFSET       4
 
+/* for KVM_PPC_GET_PVINFO */
+
+#define KVM_PPC_PVINFO_FLAGS_EV_IDLE   (1<<0)
+
+struct kvm_ppc_pvinfo {
+       /* out */
+       __u32 flags;
+       __u32 hcall[4];
+       __u8  pad[108];
+};
+
+/* for KVM_PPC_GET_SMMU_INFO */
+#define KVM_PPC_PAGE_SIZES_MAX_SZ      8
+
+struct kvm_ppc_one_page_size {
+       __u32 page_shift;       /* Page shift (or 0) */
+       __u32 pte_enc;          /* Encoding in the HPTE (>>12) */
+};
+
+struct kvm_ppc_one_seg_page_size {
+       __u32 page_shift;       /* Base page shift of segment (or 0) */
+       __u32 slb_enc;          /* SLB encoding for BookS */
+       struct kvm_ppc_one_page_size enc[KVM_PPC_PAGE_SIZES_MAX_SZ];
+};
+
+#define KVM_PPC_PAGE_SIZES_REAL                0x00000001
+#define KVM_PPC_1T_SEGMENTS            0x00000002
+#define KVM_PPC_NO_HASH                        0x00000004
+
+struct kvm_ppc_smmu_info {
+       __u64 flags;
+       __u32 slb_size;
+       __u16 data_keys;        /* # storage keys supported for data */
+       __u16 instr_keys;       /* # storage keys supported for instructions */
+       struct kvm_ppc_one_seg_page_size sps[KVM_PPC_PAGE_SIZES_MAX_SZ];
+};
+
+/* for KVM_PPC_RESIZE_HPT_{PREPARE,COMMIT} */
+struct kvm_ppc_resize_hpt {
+       __u64 flags;
+       __u32 shift;
+       __u32 pad;
+};
+
 #endif /* __LINUX_KVM_POWERPC_H */
index 2919433be35574eb26e687b637c19e2529e88055..d3282fbea4f2f5c3733068e950a050cb97d70bb9 100644 (file)
@@ -55,7 +55,6 @@ CFLAGS_btext.o += -DDISABLE_BRANCH_PROFILING
 endif
 
 KCSAN_SANITIZE_early_32.o := n
-KCSAN_SANITIZE_early_64.o := n
 KCSAN_SANITIZE_cputable.o := n
 KCSAN_SANITIZE_btext.o := n
 KCSAN_SANITIZE_paca.o := n
index 9f14d95b8b32fd38ce180b9abe082a42b1f51cfd..f029755f9e69af897dbf2bd456dd335e99e3ac04 100644 (file)
@@ -246,9 +246,7 @@ int main(void)
        OFFSET(PACAHWCPUID, paca_struct, hw_cpu_id);
        OFFSET(PACAKEXECSTATE, paca_struct, kexec_state);
        OFFSET(PACA_DSCR_DEFAULT, paca_struct, dscr_default);
-#ifdef CONFIG_PPC64
        OFFSET(PACA_EXIT_SAVE_R1, paca_struct, exit_save_r1);
-#endif
 #ifdef CONFIG_PPC_BOOK3E_64
        OFFSET(PACA_TRAP_SAVE, paca_struct, trap_save);
 #endif
index 3ff9757df4c0723008b655830fbed89b4e078e57..98d4274a1b6bf935239c50e6e12c63b2ba03cac3 100644 (file)
@@ -60,6 +60,9 @@
                                 PPC_FEATURE2_ISEL | PPC_FEATURE2_TAR | \
                                 PPC_FEATURE2_VEC_CRYPTO)
 
+#define COMMON_USER_POWER11    COMMON_USER_POWER10
+#define COMMON_USER2_POWER11   COMMON_USER2_POWER10
+
 static struct cpu_spec cpu_specs[] __initdata = {
        {       /* PPC970 */
                .pvr_mask               = 0xffff0000,
@@ -281,6 +284,20 @@ static struct cpu_spec cpu_specs[] __initdata = {
                .cpu_restore            = __restore_cpu_power10,
                .platform               = "power10",
        },
+       {       /* 3.1-compliant processor, i.e. Power11 "architected" mode */
+               .pvr_mask               = 0xffffffff,
+               .pvr_value              = 0x0f000007,
+               .cpu_name               = "Power11 (architected)",
+               .cpu_features           = CPU_FTRS_POWER11,
+               .cpu_user_features      = COMMON_USER_POWER11,
+               .cpu_user_features2     = COMMON_USER2_POWER11,
+               .mmu_features           = MMU_FTRS_POWER11,
+               .icache_bsize           = 128,
+               .dcache_bsize           = 128,
+               .cpu_setup              = __setup_cpu_power10,
+               .cpu_restore            = __restore_cpu_power10,
+               .platform               = "power11",
+       },
        {       /* Power7 */
                .pvr_mask               = 0xffff0000,
                .pvr_value              = 0x003f0000,
@@ -451,6 +468,23 @@ static struct cpu_spec cpu_specs[] __initdata = {
                .machine_check_early    = __machine_check_early_realmode_p10,
                .platform               = "power10",
        },
+       {       /* Power11 */
+               .pvr_mask               = 0xffff0000,
+               .pvr_value              = 0x00820000,
+               .cpu_name               = "Power11 (raw)",
+               .cpu_features           = CPU_FTRS_POWER11,
+               .cpu_user_features      = COMMON_USER_POWER11,
+               .cpu_user_features2     = COMMON_USER2_POWER11,
+               .mmu_features           = MMU_FTRS_POWER11,
+               .icache_bsize           = 128,
+               .dcache_bsize           = 128,
+               .num_pmcs               = 6,
+               .pmc_type               = PPC_PMC_IBM,
+               .cpu_setup              = __setup_cpu_power10,
+               .cpu_restore            = __restore_cpu_power10,
+               .machine_check_early    = __machine_check_early_realmode_p10,
+               .platform               = "power11",
+       },
        {       /* Cell Broadband Engine */
                .pvr_mask               = 0xffff0000,
                .pvr_value              = 0x00700000,
index c3fb9fdf5bd782e121571efd679e812a4540b2a5..af4263594eb2c93c5b1d1240acf4aae161c24edd 100644 (file)
@@ -458,6 +458,14 @@ static int __init feat_enable_mce_power10(struct dt_cpu_feature *f)
        return 1;
 }
 
+static int __init feat_enable_mce_power11(struct dt_cpu_feature *f)
+{
+       cur_cpu_spec->platform = "power11";
+       cur_cpu_spec->machine_check_early = __machine_check_early_realmode_p10;
+
+       return 1;
+}
+
 static int __init feat_enable_tm(struct dt_cpu_feature *f)
 {
 #ifdef CONFIG_PPC_TRANSACTIONAL_MEM
@@ -648,8 +656,10 @@ static struct dt_cpu_feature_match __initdata
        {"pc-relative-addressing", feat_enable, 0},
        {"machine-check-power9", feat_enable_mce_power9, 0},
        {"machine-check-power10", feat_enable_mce_power10, 0},
+       {"machine-check-power11", feat_enable_mce_power11, 0},
        {"performance-monitor-power9", feat_enable_pmu_power9, 0},
        {"performance-monitor-power10", feat_enable_pmu_power10, 0},
+       {"performance-monitor-power11", feat_enable_pmu_power10, 0},
        {"event-based-branch-v3", feat_enable, 0},
        {"random-number-generator", feat_enable, 0},
        {"system-call-vectored", feat_disable, 0},
index 6f7d4edaa0bc1518c432eb2bfd98064fb37e0ecb..7504ceec5c58c782754113dc1e476d1b22dd8ccb 100644 (file)
@@ -284,15 +284,14 @@ static __always_inline void call_do_irq(struct pt_regs *regs, void *sp)
 void __do_IRQ(struct pt_regs *regs)
 {
        struct pt_regs *old_regs = set_irq_regs(regs);
-       void *cursp, *irqsp, *sirqsp;
+       void *cursp, *irqsp;
 
        /* Switch to the irq stack to handle this */
        cursp = (void *)(current_stack_pointer & ~(THREAD_SIZE - 1));
        irqsp = hardirq_ctx[raw_smp_processor_id()];
-       sirqsp = softirq_ctx[raw_smp_processor_id()];
 
        /* Already there ? If not switch stack and call */
-       if (unlikely(cursp == irqsp || cursp == sirqsp))
+       if (unlikely(cursp == irqsp))
                __do_irq(regs, current_stack_pointer);
        else
                call_do_irq(regs, irqsp);
index b20ee72e873a1b881283ded2c8ffef94acf187f7..bbca90a5e2ec07a59903b87bf2d50f6a194235fd 100644 (file)
@@ -134,10 +134,16 @@ void *alloc_insn_page(void)
        if (!page)
                return NULL;
 
-       if (strict_module_rwx_enabled())
-               set_memory_rox((unsigned long)page, 1);
+       if (strict_module_rwx_enabled()) {
+               int err = set_memory_rox((unsigned long)page, 1);
 
+               if (err)
+                       goto error;
+       }
        return page;
+error:
+       module_memfree(page);
+       return NULL;
 }
 
 int arch_prepare_kprobe(struct kprobe *p)
index 0b5878c3125b1cd67ff3f44cedf83d514478c262..1dc32a05815612b4fb45f64f163f6eea94438fe5 100644 (file)
@@ -151,6 +151,9 @@ static void __init move_device_tree(void)
  * pa-features property is missing, or a 1/0 to indicate if the feature
  * is supported/not supported.  Note that the bit numbers are
  * big-endian to match the definition in PAPR.
+ * Note: the 'clear' flag clears the feature if the bit is set in the
+ * ibm,pa/pi-features property, it does not set the feature if the
+ * bit is clear.
  */
 struct ibm_feature {
        unsigned long   cpu_features;   /* CPU_FTR_xxx bit */
@@ -159,7 +162,7 @@ struct ibm_feature {
        unsigned int    cpu_user_ftrs2; /* PPC_FEATURE2_xxx bit */
        unsigned char   pabyte;         /* byte number in ibm,pa/pi-features */
        unsigned char   pabit;          /* bit number (big-endian) */
-       unsigned char   invert;         /* if 1, pa bit set => clear feature */
+       unsigned char   clear;          /* if 1, pa bit set => clear feature */
 };
 
 static struct ibm_feature ibm_pa_features[] __initdata = {
@@ -193,6 +196,7 @@ static struct ibm_feature ibm_pa_features[] __initdata = {
  */
 static struct ibm_feature ibm_pi_features[] __initdata = {
        { .pabyte = 0, .pabit = 3, .mmu_features  = MMU_FTR_NX_DSI },
+       { .pabyte = 0, .pabit = 4, .cpu_features  = CPU_FTR_DBELL, .clear = 1 },
 };
 
 static void __init scan_features(unsigned long node, const unsigned char *ftrs,
@@ -220,12 +224,12 @@ static void __init scan_features(unsigned long node, const unsigned char *ftrs,
                if (fp->pabyte >= ftrs[0])
                        continue;
                bit = (ftrs[2 + fp->pabyte] >> (7 - fp->pabit)) & 1;
-               if (bit ^ fp->invert) {
+               if (bit && !fp->clear) {
                        cur_cpu_spec->cpu_features |= fp->cpu_features;
                        cur_cpu_spec->cpu_user_features |= fp->cpu_user_ftrs;
                        cur_cpu_spec->cpu_user_features2 |= fp->cpu_user_ftrs2;
                        cur_cpu_spec->mmu_features |= fp->mmu_features;
-               } else {
+               } else if (bit == fp->clear) {
                        cur_cpu_spec->cpu_features &= ~fp->cpu_features;
                        cur_cpu_spec->cpu_user_features &= ~fp->cpu_user_ftrs;
                        cur_cpu_spec->cpu_user_features2 &= ~fp->cpu_user_ftrs2;
@@ -368,13 +372,31 @@ static int __init early_init_dt_scan_cpus(unsigned long node,
        if (found < 0)
                return 0;
 
-       DBG("boot cpu: logical %d physical %d\n", found,
-           be32_to_cpu(intserv[found_thread]));
        boot_cpuid = found;
 
        if (IS_ENABLED(CONFIG_PPC64))
                boot_cpu_hwid = be32_to_cpu(intserv[found_thread]);
 
+       if (nr_cpu_ids % nthreads != 0) {
+               set_nr_cpu_ids(ALIGN(nr_cpu_ids, nthreads));
+               pr_warn("nr_cpu_ids was not a multiple of threads_per_core, adjusted to %d\n",
+                       nr_cpu_ids);
+       }
+
+       if (boot_cpuid >= nr_cpu_ids) {
+               // Remember boot core for smp_setup_cpu_maps()
+               boot_core_hwid = be32_to_cpu(intserv[0]);
+
+               pr_warn("Boot CPU %d (core hwid %d) >= nr_cpu_ids, adjusted boot CPU to %d\n",
+                       boot_cpuid, boot_core_hwid, found_thread);
+
+               // Adjust boot CPU to appear on logical core 0
+               boot_cpuid = found_thread;
+       }
+
+       DBG("boot cpu: logical %d physical %d\n", boot_cpuid,
+           be32_to_cpu(intserv[found_thread]));
+
        /*
         * PAPR defines "logical" PVR values for cpus that
         * meet various levels of the architecture:
index e67effdba85cc03096132223ad9c4d7dbc698f58..0ef358285337412f14dcf073282bca863dc88949 100644 (file)
@@ -947,7 +947,7 @@ struct option_vector7 {
 } __packed;
 
 struct ibm_arch_vec {
-       struct { __be32 mask, val; } pvrs[14];
+       struct { __be32 mask, val; } pvrs[16];
 
        u8 num_vectors;
 
@@ -1007,6 +1007,14 @@ static const struct ibm_arch_vec ibm_architecture_vec_template __initconst = {
                        .mask = cpu_to_be32(0xffff0000), /* POWER10 */
                        .val  = cpu_to_be32(0x00800000),
                },
+               {
+                       .mask = cpu_to_be32(0xffff0000), /* POWER11 */
+                       .val  = cpu_to_be32(0x00820000),
+               },
+               {
+                       .mask = cpu_to_be32(0xffffffff), /* P11 compliant */
+                       .val  = cpu_to_be32(0x0f000007),
+               },
                {
                        .mask = cpu_to_be32(0xffffffff), /* all 3.1-compliant */
                        .val  = cpu_to_be32(0x0f000006),
index f9af305d9579dc0c359f84cfc3fee9dbd3e1fb80..9e0efb657f3937a3550d7625ba8558e49cdc8a07 100644 (file)
@@ -32,8 +32,10 @@ bool is_ppc_secureboot_enabled(void)
        if (enabled)
                goto out;
 
-       if (!of_property_read_u32(of_root, "ibm,secure-boot", &secureboot))
+       node = of_find_node_by_path("/");
+       if (!of_property_read_u32(node, "ibm,secure-boot", &secureboot))
                enabled = (secureboot > 1);
+       of_node_put(node);
 
 out:
        pr_info("Secure boot mode %s\n", enabled ? "enabled" : "disabled");
@@ -54,8 +56,10 @@ bool is_ppc_trustedboot_enabled(void)
        if (enabled)
                goto out;
 
-       if (!of_property_read_u32(of_root, "ibm,trusted-boot", &trustedboot))
+       node = of_find_node_by_path("/");
+       if (!of_property_read_u32(node, "ibm,trusted-boot", &trustedboot))
                enabled = (trustedboot > 0);
+       of_node_put(node);
 
 out:
        pr_info("Trusted boot mode %s\n", enabled ? "enabled" : "disabled");
index 9b142b9d5187b29297bd498fb3d7f3e781269685..2add292da49432ee8db5ff5bad9b70871d7f4f48 100644 (file)
@@ -85,6 +85,7 @@ EXPORT_SYMBOL(machine_id);
 
 int boot_cpuid = -1;
 EXPORT_SYMBOL_GPL(boot_cpuid);
+int __initdata boot_core_hwid = -1;
 
 #ifdef CONFIG_PPC64
 int boot_cpu_hwid = -1;
@@ -109,7 +110,7 @@ int ppc_do_canonicalize_irqs;
 EXPORT_SYMBOL(ppc_do_canonicalize_irqs);
 #endif
 
-#ifdef CONFIG_CRASH_CORE
+#ifdef CONFIG_VMCORE_INFO
 /* This keeps a track of which one is the crashing cpu. */
 int crashing_cpu = -1;
 #endif
@@ -411,6 +412,25 @@ static void __init cpu_init_thread_core_maps(int tpc)
 
 u32 *cpu_to_phys_id = NULL;
 
+static int assign_threads(unsigned int cpu, unsigned int nthreads, bool present,
+                         const __be32 *hw_ids)
+{
+       for (int i = 0; i < nthreads && cpu < nr_cpu_ids; i++) {
+               __be32 hwid;
+
+               hwid = be32_to_cpu(hw_ids[i]);
+
+               DBG("    thread %d -> cpu %d (hard id %d)\n", i, cpu, hwid);
+
+               set_cpu_present(cpu, present);
+               set_cpu_possible(cpu, true);
+               cpu_to_phys_id[cpu] = hwid;
+               cpu++;
+       }
+
+       return cpu;
+}
+
 /**
  * setup_cpu_maps - initialize the following cpu maps:
  *                  cpu_possible_mask
@@ -446,7 +466,7 @@ void __init smp_setup_cpu_maps(void)
        for_each_node_by_type(dn, "cpu") {
                const __be32 *intserv;
                __be32 cpu_be;
-               int j, len;
+               int len;
 
                DBG("  * %pOF...\n", dn);
 
@@ -468,27 +488,31 @@ void __init smp_setup_cpu_maps(void)
 
                nthreads = len / sizeof(int);
 
-               for (j = 0; j < nthreads && cpu < nr_cpu_ids; j++) {
-                       bool avail;
-
-                       DBG("    thread %d -> cpu %d (hard id %d)\n",
-                           j, cpu, be32_to_cpu(intserv[j]));
+               bool avail = of_device_is_available(dn);
+               if (!avail)
+                       avail = !of_property_match_string(dn,
+                                       "enable-method", "spin-table");
 
-                       avail = of_device_is_available(dn);
-                       if (!avail)
-                               avail = !of_property_match_string(dn,
-                                               "enable-method", "spin-table");
-
-                       set_cpu_present(cpu, avail);
-                       set_cpu_possible(cpu, true);
-                       cpu_to_phys_id[cpu] = be32_to_cpu(intserv[j]);
-                       cpu++;
-               }
+               if (boot_core_hwid >= 0) {
+                       if (cpu == 0) {
+                               pr_info("Skipping CPU node %pOF to allow for boot core.\n", dn);
+                               cpu = nthreads;
+                               continue;
+                       }
 
-               if (cpu >= nr_cpu_ids) {
+                       if (be32_to_cpu(intserv[0]) == boot_core_hwid) {
+                               pr_info("Renumbered boot core %pOF to logical 0\n", dn);
+                               assign_threads(0, nthreads, avail, intserv);
+                               of_node_put(dn);
+                               break;
+                       }
+               } else if (cpu >= nr_cpu_ids) {
                        of_node_put(dn);
                        break;
                }
+
+               if (cpu < nr_cpu_ids)
+                       cpu = assign_threads(cpu, nthreads, avail, intserv);
        }
 
        /* If no SMT supported, nthreads is forced to 1 */
@@ -616,6 +640,8 @@ static __init void probe_machine(void)
                DBG("  %s ...\n", machine_id->name);
                if (machine_id->compatible && !of_machine_is_compatible(machine_id->compatible))
                        continue;
+               if (machine_id->compatibles && !of_machine_compatible_match(machine_id->compatibles))
+                       continue;
                memcpy(&ppc_md, machine_id, sizeof(struct machdep_calls));
                if (ppc_md.probe && !ppc_md.probe())
                        continue;
index 77fedb190c936c77eb6dcdf872721caac82b7368..f6f868e817e636f7c1327feb13935ecff49d57d5 100644 (file)
@@ -31,7 +31,7 @@ notrace long system_call_exception(struct pt_regs *regs, unsigned long r0)
        user_exit_irqoff();
 
        BUG_ON(regs_is_unrecoverable(regs));
-       BUG_ON(!(regs->msr & MSR_PR));
+       BUG_ON(!user_mode(regs));
        BUG_ON(arch_irq_disabled_regs(regs));
 
 #ifdef CONFIG_PPC_PKEY
index 11e062b47d3f80681cb0362f85cfa7ca7fcda7f6..f23430adb68ad7fecb70a31e1443e8a02d75f824 100644 (file)
@@ -404,7 +404,7 @@ noinstr void hv_nmi_check_nonrecoverable(struct pt_regs *regs)
                return;
        if (!(regs->msr & MSR_HV))
                return;
-       if (regs->msr & MSR_PR)
+       if (user_mode(regs))
                return;
 
        /*
@@ -1510,7 +1510,7 @@ static void do_program_check(struct pt_regs *regs)
                if (!is_kernel_addr(bugaddr) && !(regs->msr & MSR_IR))
                        bugaddr += PAGE_OFFSET;
 
-               if (!(regs->msr & MSR_PR) &&  /* not user-mode */
+               if (!user_mode(regs) &&
                    report_bug(bugaddr, regs) == BUG_TRAP_TYPE_WARN) {
                        regs_add_return_ip(regs, 4);
                        return;
index 0c2abe7f99087495098d37be69041ef698819dc2..91e96f5168b753ea8e358c8820ccfdc368bb8004 100644 (file)
@@ -8,6 +8,7 @@ obj-y                           += core.o crash.o core_$(BITS).o
 obj-$(CONFIG_PPC32)            += relocate_32.o
 
 obj-$(CONFIG_KEXEC_FILE)       += file_load.o ranges.o file_load_$(BITS).o elf_$(BITS).o
+obj-$(CONFIG_VMCORE_INFO)      += vmcore_info.o
 
 # Disable GCOV, KCOV & sanitizers in odd or sensitive code
 GCOV_PROFILE_core_$(BITS).o := n
index 27fa9098a5b74be8a55b7ad8a51830674628dbd4..3ff4411ed49671baf2a6834d611d20346622f57b 100644 (file)
@@ -53,34 +53,6 @@ void machine_kexec_cleanup(struct kimage *image)
 {
 }
 
-void arch_crash_save_vmcoreinfo(void)
-{
-
-#ifdef CONFIG_NUMA
-       VMCOREINFO_SYMBOL(node_data);
-       VMCOREINFO_LENGTH(node_data, MAX_NUMNODES);
-#endif
-#ifndef CONFIG_NUMA
-       VMCOREINFO_SYMBOL(contig_page_data);
-#endif
-#if defined(CONFIG_PPC64) && defined(CONFIG_SPARSEMEM_VMEMMAP)
-       VMCOREINFO_SYMBOL(vmemmap_list);
-       VMCOREINFO_SYMBOL(mmu_vmemmap_psize);
-       VMCOREINFO_SYMBOL(mmu_psize_defs);
-       VMCOREINFO_STRUCT_SIZE(vmemmap_backing);
-       VMCOREINFO_OFFSET(vmemmap_backing, list);
-       VMCOREINFO_OFFSET(vmemmap_backing, phys);
-       VMCOREINFO_OFFSET(vmemmap_backing, virt_addr);
-       VMCOREINFO_STRUCT_SIZE(mmu_psize_def);
-       VMCOREINFO_OFFSET(mmu_psize_def, shift);
-#endif
-       VMCOREINFO_SYMBOL(cur_cpu_spec);
-       VMCOREINFO_OFFSET(cpu_spec, cpu_features);
-       VMCOREINFO_OFFSET(cpu_spec, mmu_features);
-       vmcoreinfo_append_str("NUMBER(RADIX_MMU)=%d\n", early_radix_enabled());
-       vmcoreinfo_append_str("KERNELOFFSET=%lx\n", kaslr_offset());
-}
-
 /*
  * Do not allocate memory (or fail in any way) in machine_kexec().
  * We are past the point of no return, committed to rebooting now.
index fb3e12f1521441fd9de9c84c3863a719dcb6c62b..33b780049aaf9331e798d9a660d963e583ab1a68 100644 (file)
@@ -385,14 +385,16 @@ int add_opal_mem_range(struct crash_mem **mem_ranges)
 int add_reserved_mem_ranges(struct crash_mem **mem_ranges)
 {
        int n_mem_addr_cells, n_mem_size_cells, i, len, cells, ret = 0;
+       struct device_node *root = of_find_node_by_path("/");
        const __be32 *prop;
 
-       prop = of_get_property(of_root, "reserved-ranges", &len);
+       prop = of_get_property(root, "reserved-ranges", &len);
+       n_mem_addr_cells = of_n_addr_cells(root);
+       n_mem_size_cells = of_n_size_cells(root);
+       of_node_put(root);
        if (!prop)
                return 0;
 
-       n_mem_addr_cells = of_n_addr_cells(of_root);
-       n_mem_size_cells = of_n_size_cells(of_root);
        cells = n_mem_addr_cells + n_mem_size_cells;
 
        /* Each reserved range is an (address,size) pair */
index d9f0dd9b34ffbfcb9cc6274bed12dfa3886ebef1..104c9911f40611960b8d6b077553920f4cfb9d66 100644 (file)
@@ -8,6 +8,7 @@
  *             Author: Suzuki Poulose <suzuki@in.ibm.com>
  */
 
+#include <linux/objtool.h>
 #include <asm/reg.h>
 #include <asm/page.h>
 #include <asm/mmu.h>
@@ -349,6 +350,7 @@ write_utlb:
        cmpwi   r10, PPC47x_TLB0_4K
        bne     0f
        li      r10, 0x1000                     /* r10 = 4k */
+       ANNOTATE_INTRA_FUNCTION_CALL
        bl      1f
 
 0:
diff --git a/arch/powerpc/kexec/vmcore_info.c b/arch/powerpc/kexec/vmcore_info.c
new file mode 100644 (file)
index 0000000..2b65d2a
--- /dev/null
@@ -0,0 +1,32 @@
+// SPDX-License-Identifier: GPL-2.0-only
+
+#include <linux/vmcore_info.h>
+#include <asm/pgalloc.h>
+
+void arch_crash_save_vmcoreinfo(void)
+{
+
+#ifdef CONFIG_NUMA
+       VMCOREINFO_SYMBOL(node_data);
+       VMCOREINFO_LENGTH(node_data, MAX_NUMNODES);
+#endif
+#ifndef CONFIG_NUMA
+       VMCOREINFO_SYMBOL(contig_page_data);
+#endif
+#if defined(CONFIG_PPC64) && defined(CONFIG_SPARSEMEM_VMEMMAP)
+       VMCOREINFO_SYMBOL(vmemmap_list);
+       VMCOREINFO_SYMBOL(mmu_vmemmap_psize);
+       VMCOREINFO_SYMBOL(mmu_psize_defs);
+       VMCOREINFO_STRUCT_SIZE(vmemmap_backing);
+       VMCOREINFO_OFFSET(vmemmap_backing, list);
+       VMCOREINFO_OFFSET(vmemmap_backing, phys);
+       VMCOREINFO_OFFSET(vmemmap_backing, virt_addr);
+       VMCOREINFO_STRUCT_SIZE(mmu_psize_def);
+       VMCOREINFO_OFFSET(mmu_psize_def, shift);
+#endif
+       VMCOREINFO_SYMBOL(cur_cpu_spec);
+       VMCOREINFO_OFFSET(cpu_spec, cpu_features);
+       VMCOREINFO_OFFSET(cpu_spec, mmu_features);
+       vmcoreinfo_append_str("NUMBER(RADIX_MMU)=%d\n", early_radix_enabled());
+       vmcoreinfo_append_str("KERNELOFFSET=%lx\n", kaslr_offset());
+}
index 074263429faf2e49b516fb1ef1ac3544b902febe..dbfdc126bf1440b463ea509ed9f832617c4e35f3 100644 (file)
@@ -22,7 +22,6 @@ config KVM
        select KVM_COMMON
        select HAVE_KVM_VCPU_ASYNC_IOCTL
        select KVM_VFIO
-       select IRQ_BYPASS_MANAGER
        select HAVE_KVM_IRQ_BYPASS
 
 config KVM_BOOK3S_HANDLER
index 4a1abb9f7c05818fda0c2504a4263e7794311935..408d98f8a51479364de60bfd3e8d0e3f5902d0c3 100644 (file)
@@ -503,7 +503,7 @@ static void kvmppc_unmap_free_pmd(struct kvm *kvm, pmd_t *pmd, bool full,
        for (im = 0; im < PTRS_PER_PMD; ++im, ++p) {
                if (!pmd_present(*p))
                        continue;
-               if (pmd_is_leaf(*p)) {
+               if (pmd_leaf(*p)) {
                        if (full) {
                                pmd_clear(p);
                        } else {
@@ -532,7 +532,7 @@ static void kvmppc_unmap_free_pud(struct kvm *kvm, pud_t *pud,
        for (iu = 0; iu < PTRS_PER_PUD; ++iu, ++p) {
                if (!pud_present(*p))
                        continue;
-               if (pud_is_leaf(*p)) {
+               if (pud_leaf(*p)) {
                        pud_clear(p);
                } else {
                        pmd_t *pmd;
@@ -635,12 +635,12 @@ int kvmppc_create_pte(struct kvm *kvm, pgd_t *pgtable, pte_t pte,
                new_pud = pud_alloc_one(kvm->mm, gpa);
 
        pmd = NULL;
-       if (pud && pud_present(*pud) && !pud_is_leaf(*pud))
+       if (pud && pud_present(*pud) && !pud_leaf(*pud))
                pmd = pmd_offset(pud, gpa);
        else if (level <= 1)
                new_pmd = kvmppc_pmd_alloc();
 
-       if (level == 0 && !(pmd && pmd_present(*pmd) && !pmd_is_leaf(*pmd)))
+       if (level == 0 && !(pmd && pmd_present(*pmd) && !pmd_leaf(*pmd)))
                new_ptep = kvmppc_pte_alloc();
 
        /* Check if we might have been invalidated; let the guest retry if so */
@@ -658,7 +658,7 @@ int kvmppc_create_pte(struct kvm *kvm, pgd_t *pgtable, pte_t pte,
                new_pud = NULL;
        }
        pud = pud_offset(p4d, gpa);
-       if (pud_is_leaf(*pud)) {
+       if (pud_leaf(*pud)) {
                unsigned long hgpa = gpa & PUD_MASK;
 
                /* Check if we raced and someone else has set the same thing */
@@ -709,7 +709,7 @@ int kvmppc_create_pte(struct kvm *kvm, pgd_t *pgtable, pte_t pte,
                new_pmd = NULL;
        }
        pmd = pmd_offset(pud, gpa);
-       if (pmd_is_leaf(*pmd)) {
+       if (pmd_leaf(*pmd)) {
                unsigned long lgpa = gpa & PMD_MASK;
 
                /* Check if we raced and someone else has set the same thing */
index 0b921704da45eb6b718cac8f031c5d0c45176746..8e86eb577eb8e1462bc04e46fd23416df2c455a3 100644 (file)
@@ -444,6 +444,7 @@ static int kvmppc_set_arch_compat(struct kvm_vcpu *vcpu, u32 arch_compat)
                        guest_pcr_bit = PCR_ARCH_300;
                        break;
                case PVR_ARCH_31:
+               case PVR_ARCH_31_P11:
                        guest_pcr_bit = PCR_ARCH_31;
                        break;
                default:
index 5c375ec1a3c6084be948ccc41e803f8e6da14dc2..05f5220960c63bfccf930ccb868648fc9361a204 100644 (file)
@@ -55,7 +55,7 @@ void kvmhv_save_hv_regs(struct kvm_vcpu *vcpu, struct hv_guest_state *hr)
        hr->dawrx1 = vcpu->arch.dawrx1;
 }
 
-/* Use noinline_for_stack due to https://bugs.llvm.org/show_bug.cgi?id=49610 */
+/* Use noinline_for_stack due to https://llvm.org/pr49610 */
 static noinline_for_stack void byteswap_pt_regs(struct pt_regs *regs)
 {
        unsigned long *addr = (unsigned long *) regs;
index 23407fbd73c9346e05113db812c6897e2a33e7e7..d32abe7fe6ab79ea81a4583126b036a24e01d4b0 100644 (file)
@@ -2538,9 +2538,8 @@ void kvm_arch_create_vcpu_debugfs(struct kvm_vcpu *vcpu, struct dentry *debugfs_
                vcpu->kvm->arch.kvm_ops->create_vcpu_debugfs(vcpu, debugfs_dentry);
 }
 
-int kvm_arch_create_vm_debugfs(struct kvm *kvm)
+void kvm_arch_create_vm_debugfs(struct kvm *kvm)
 {
        if (kvm->arch.kvm_ops->create_vm_debugfs)
                kvm->arch.kvm_ops->create_vm_debugfs(kvm);
-       return 0;
 }
index 6eac63e79a89957ae98337f407b33db3f7beff93..0ab65eeb93ee3a73346a2d9e7b4e5617f6b40ed3 100644 (file)
@@ -76,7 +76,7 @@ obj-$(CONFIG_PPC_LIB_RHEAP) += rheap.o
 obj-$(CONFIG_FTR_FIXUP_SELFTEST) += feature-fixups-test.o
 
 obj-$(CONFIG_ALTIVEC)  += xor_vmx.o xor_vmx_glue.o
-CFLAGS_xor_vmx.o += -maltivec $(call cc-option,-mabi=altivec)
+CFLAGS_xor_vmx.o += -mhard-float -maltivec $(call cc-option,-mabi=altivec)
 # Enable <altivec.h>
 CFLAGS_xor_vmx.o += -isystem $(shell $(CC) -print-file-name=include)
 
index a783973f1215b526d136c49254102520ffb67a7b..07e7cec4d135fd5648ab9a0d7d93827440ebb2ac 100644 (file)
@@ -27,17 +27,7 @@ _GLOBAL(copypage_power7)
 #endif
        ori     r10,r7,1        /* stream=1 */
 
-       lis     r8,0x8000       /* GO=1 */
-       clrldi  r8,r8,32
-
-       /* setup read stream 0  */
-       dcbt    0,r4,0b01000    /* addr from */
-       dcbt    0,r7,0b01010   /* length and depth from */
-       /* setup write stream 1 */
-       dcbtst  0,r9,0b01000   /* addr to */
-       dcbtst  0,r10,0b01010  /* length and depth to */
-       eieio
-       dcbt    0,r8,0b01010    /* all streams GO */
+       DCBT_SETUP_STREAMS(r4, r7, r9, r10, r8)
 
 #ifdef CONFIG_ALTIVEC
        mflr    r0
index ac41053c3a5af09549a80db401ea5e830dc7c8fe..8474c682a17849a29a3dfcb3af6544fc1e8909c6 100644 (file)
@@ -298,17 +298,7 @@ err1;      stb     r0,0(r3)
        or      r7,r7,r0
        ori     r10,r7,1        /* stream=1 */
 
-       lis     r8,0x8000       /* GO=1 */
-       clrldi  r8,r8,32
-
-       /* setup read stream 0 */
-       dcbt    0,r6,0b01000   /* addr from */
-       dcbt    0,r7,0b01010   /* length and depth from */
-       /* setup write stream 1 */
-       dcbtst  0,r9,0b01000   /* addr to */
-       dcbtst  0,r10,0b01010  /* length and depth to */
-       eieio
-       dcbt    0,r8,0b01010    /* all streams GO */
+       DCBT_SETUP_STREAMS(r6, r7, r9, r10, r8)
 
        beq     cr1,.Lunwind_stack_nonvmx_copy
 
index 9398b2b746c4dacad1e1fc24c85f5f8fa25b22b1..b7c5e7fca8b9f5fb2fc46e7acb9d817c74b4ac24 100644 (file)
@@ -244,15 +244,7 @@ END_FTR_SECTION_IFSET(CPU_FTR_ALTIVEC)
        or      r7,r7,r0
        ori     r10,r7,1        /* stream=1 */
 
-       lis     r8,0x8000       /* GO=1 */
-       clrldi  r8,r8,32
-
-       dcbt    0,r6,0b01000
-       dcbt    0,r7,0b01010
-       dcbtst  0,r9,0b01000
-       dcbtst  0,r10,0b01010
-       eieio
-       dcbt    0,r8,0b01010    /* GO */
+       DCBT_SETUP_STREAMS(r6, r7, r9, r10, r8)
 
        beq     cr1,.Lunwind_stack_nonvmx_copy
 
index 5766180f5380a6f2b8f90c4f31de3019991bf65f..e65f3fb68d06ba34becf08157e63697fe1561050 100644 (file)
@@ -1429,7 +1429,7 @@ int analyse_instr(struct instruction_op *op, const struct pt_regs *regs,
                        return 1;
 
                case 18:        /* rfid, scary */
-                       if (regs->msr & MSR_PR)
+                       if (user_mode(regs))
                                goto priv;
                        op->type = RFI;
                        return 0;
@@ -1742,13 +1742,13 @@ int analyse_instr(struct instruction_op *op, const struct pt_regs *regs,
                        return 1;
 #endif
                case 83:        /* mfmsr */
-                       if (regs->msr & MSR_PR)
+                       if (user_mode(regs))
                                goto priv;
                        op->type = MFMSR;
                        op->reg = rd;
                        return 0;
                case 146:       /* mtmsr */
-                       if (regs->msr & MSR_PR)
+                       if (user_mode(regs))
                                goto priv;
                        op->type = MTMSR;
                        op->reg = rd;
@@ -1756,7 +1756,7 @@ int analyse_instr(struct instruction_op *op, const struct pt_regs *regs,
                        return 0;
 #ifdef CONFIG_PPC64
                case 178:       /* mtmsrd */
-                       if (regs->msr & MSR_PR)
+                       if (user_mode(regs))
                                goto priv;
                        op->type = MTMSR;
                        op->reg = rd;
@@ -3437,14 +3437,14 @@ int emulate_loadstore(struct pt_regs *regs, struct instruction_op *op)
                 * stored in the thread_struct.  If the instruction is in
                 * the kernel, we must not touch the state in the thread_struct.
                 */
-               if (!(regs->msr & MSR_PR) && !(regs->msr & MSR_FP))
+               if (!user_mode(regs) && !(regs->msr & MSR_FP))
                        return 0;
                err = do_fp_load(op, ea, regs, cross_endian);
                break;
 #endif
 #ifdef CONFIG_ALTIVEC
        case LOAD_VMX:
-               if (!(regs->msr & MSR_PR) && !(regs->msr & MSR_VEC))
+               if (!user_mode(regs) && !(regs->msr & MSR_VEC))
                        return 0;
                err = do_vec_load(op->reg, ea, size, regs, cross_endian);
                break;
@@ -3459,7 +3459,7 @@ int emulate_loadstore(struct pt_regs *regs, struct instruction_op *op)
                 */
                if (op->reg >= 32 && (op->vsx_flags & VSX_CHECK_VEC))
                        msrbit = MSR_VEC;
-               if (!(regs->msr & MSR_PR) && !(regs->msr & msrbit))
+               if (!user_mode(regs) && !(regs->msr & msrbit))
                        return 0;
                err = do_vsx_load(op, ea, regs, cross_endian);
                break;
@@ -3495,8 +3495,7 @@ int emulate_loadstore(struct pt_regs *regs, struct instruction_op *op)
                }
 #endif
                if ((op->type & UPDATE) && size == sizeof(long) &&
-                   op->reg == 1 && op->update_reg == 1 &&
-                   !(regs->msr & MSR_PR) &&
+                   op->reg == 1 && op->update_reg == 1 && !user_mode(regs) &&
                    ea >= regs->gpr[1] - STACK_INT_FRAME_SIZE) {
                        err = handle_stack_update(ea, regs);
                        break;
@@ -3508,14 +3507,14 @@ int emulate_loadstore(struct pt_regs *regs, struct instruction_op *op)
 
 #ifdef CONFIG_PPC_FPU
        case STORE_FP:
-               if (!(regs->msr & MSR_PR) && !(regs->msr & MSR_FP))
+               if (!user_mode(regs) && !(regs->msr & MSR_FP))
                        return 0;
                err = do_fp_store(op, ea, regs, cross_endian);
                break;
 #endif
 #ifdef CONFIG_ALTIVEC
        case STORE_VMX:
-               if (!(regs->msr & MSR_PR) && !(regs->msr & MSR_VEC))
+               if (!user_mode(regs) && !(regs->msr & MSR_VEC))
                        return 0;
                err = do_vec_store(op->reg, ea, size, regs, cross_endian);
                break;
@@ -3530,7 +3529,7 @@ int emulate_loadstore(struct pt_regs *regs, struct instruction_op *op)
                 */
                if (op->reg >= 32 && (op->vsx_flags & VSX_CHECK_VEC))
                        msrbit = MSR_VEC;
-               if (!(regs->msr & MSR_PR) && !(regs->msr & msrbit))
+               if (!user_mode(regs) && !(regs->msr & msrbit))
                        return 0;
                err = do_vsx_store(op, ea, regs, cross_endian);
                break;
index c0fabe6c5a12d719f55162698bbace9d38e11f54..15d6f3ea717878707cbdcc4119a1c29d8b072be0 100644 (file)
@@ -59,16 +59,13 @@ int __hash_page_thp(unsigned long ea, unsigned long access, unsigned long vsid,
 
        rflags = htab_convert_pte_flags(new_pmd, flags);
 
-#if 0
-       if (!cpu_has_feature(CPU_FTR_COHERENT_ICACHE)) {
+       /*
+        * THPs are only supported on platforms that can do mixed page size
+        * segments (MPSS) and all such platforms have coherent icache. Hence we
+        * don't need to do lazy icache flush (hash_page_do_lazy_icache()) on
+        * noexecute fault.
+        */
 
-               /*
-                * No CPU has hugepages but lacks no execute, so we
-                * don't need to worry about that case
-                */
-               rflags = hash_page_do_lazy_icache(rflags, __pte(old_pte), trap);
-       }
-#endif
        /*
         * Find the slot index details for this ea, using base page size.
         */
index 0626a25b0d728be1a33aa36012327f1ddc71374f..01c3b4b652410904a06fb45721233ee84d6fbe3b 100644 (file)
@@ -2172,7 +2172,7 @@ static void kernel_unmap_linear_page(unsigned long vaddr, unsigned long lmi)
                                     mmu_kernel_ssize, 0);
 }
 
-void hash__kernel_map_pages(struct page *page, int numpages, int enable)
+int hash__kernel_map_pages(struct page *page, int numpages, int enable)
 {
        unsigned long flags, vaddr, lmi;
        int i;
@@ -2189,6 +2189,7 @@ void hash__kernel_map_pages(struct page *page, int numpages, int enable)
                        kernel_unmap_linear_page(vaddr, lmi);
        }
        local_irq_restore(flags);
+       return 0;
 }
 #endif /* CONFIG_DEBUG_PAGEALLOC || CONFIG_KFENCE */
 
index 3438ab72c346b8c09ed2340ca3792f6b85c5b172..83823db3488b98deac4fda1b324dddbb10ec5947 100644 (file)
@@ -113,7 +113,7 @@ void set_pmd_at(struct mm_struct *mm, unsigned long addr,
 
        WARN_ON(pte_hw_valid(pmd_pte(*pmdp)) && !pte_protnone(pmd_pte(*pmdp)));
        assert_spin_locked(pmd_lockptr(mm, pmdp));
-       WARN_ON(!(pmd_large(pmd)));
+       WARN_ON(!(pmd_leaf(pmd)));
 #endif
        trace_hugepage_set_pmd(addr, pmd_val(pmd));
        return set_pte_at(mm, addr, pmdp_ptep(pmdp), pmd_pte(pmd));
@@ -130,7 +130,7 @@ void set_pud_at(struct mm_struct *mm, unsigned long addr,
 
        WARN_ON(pte_hw_valid(pud_pte(*pudp)));
        assert_spin_locked(pud_lockptr(mm, pudp));
-       WARN_ON(!(pud_large(pud)));
+       WARN_ON(!(pud_leaf(pud)));
 #endif
        trace_hugepage_set_pud(addr, pud_val(pud));
        return set_pte_at(mm, addr, pudp_ptep(pudp), pud_pte(pud));
index c6a4ac766b2bf95801533ff917130407be646cf2..15e88f1439ec207e762217a1f09f2f9ffa582b6a 100644 (file)
@@ -204,14 +204,14 @@ static void radix__change_memory_range(unsigned long start, unsigned long end,
                pudp = pud_alloc(&init_mm, p4dp, idx);
                if (!pudp)
                        continue;
-               if (pud_is_leaf(*pudp)) {
+               if (pud_leaf(*pudp)) {
                        ptep = (pte_t *)pudp;
                        goto update_the_pte;
                }
                pmdp = pmd_alloc(&init_mm, pudp, idx);
                if (!pmdp)
                        continue;
-               if (pmd_is_leaf(*pmdp)) {
+               if (pmd_leaf(*pmdp)) {
                        ptep = pmdp_ptep(pmdp);
                        goto update_the_pte;
                }
@@ -767,7 +767,7 @@ static void __meminit remove_pmd_table(pmd_t *pmd_start, unsigned long addr,
                if (!pmd_present(*pmd))
                        continue;
 
-               if (pmd_is_leaf(*pmd)) {
+               if (pmd_leaf(*pmd)) {
                        if (IS_ALIGNED(addr, PMD_SIZE) &&
                            IS_ALIGNED(next, PMD_SIZE)) {
                                if (!direct)
@@ -807,7 +807,7 @@ static void __meminit remove_pud_table(pud_t *pud_start, unsigned long addr,
                if (!pud_present(*pud))
                        continue;
 
-               if (pud_is_leaf(*pud)) {
+               if (pud_leaf(*pud)) {
                        if (!IS_ALIGNED(addr, PUD_SIZE) ||
                            !IS_ALIGNED(next, PUD_SIZE)) {
                                WARN_ONCE(1, "%s: unaligned range\n", __func__);
@@ -845,7 +845,7 @@ remove_pagetable(unsigned long start, unsigned long end, bool direct,
                if (!p4d_present(*p4d))
                        continue;
 
-               if (p4d_is_leaf(*p4d)) {
+               if (p4d_leaf(*p4d)) {
                        if (!IS_ALIGNED(addr, P4D_SIZE) ||
                            !IS_ALIGNED(next, P4D_SIZE)) {
                                WARN_ONCE(1, "%s: unaligned range\n", __func__);
@@ -924,7 +924,7 @@ bool vmemmap_can_optimize(struct vmem_altmap *altmap, struct dev_pagemap *pgmap)
 int __meminit vmemmap_check_pmd(pmd_t *pmdp, int node,
                                unsigned long addr, unsigned long next)
 {
-       int large = pmd_large(*pmdp);
+       int large = pmd_leaf(*pmdp);
 
        if (large)
                vmemmap_verify(pmdp_ptep(pmdp), node, addr, next);
@@ -1339,20 +1339,6 @@ void __ref radix__vmemmap_free(unsigned long start, unsigned long end,
 #endif
 #endif
 
-#if defined(CONFIG_DEBUG_PAGEALLOC) || defined(CONFIG_KFENCE)
-void radix__kernel_map_pages(struct page *page, int numpages, int enable)
-{
-       unsigned long addr;
-
-       addr = (unsigned long)page_address(page);
-
-       if (enable)
-               set_memory_p(addr, numpages);
-       else
-               set_memory_np(addr, numpages);
-}
-#endif
-
 #ifdef CONFIG_TRANSPARENT_HUGEPAGE
 
 unsigned long radix__pmd_hugepage_update(struct mm_struct *mm, unsigned long addr,
@@ -1554,7 +1540,7 @@ int pud_set_huge(pud_t *pud, phys_addr_t addr, pgprot_t prot)
 
 int pud_clear_huge(pud_t *pud)
 {
-       if (pud_is_leaf(*pud)) {
+       if (pud_leaf(*pud)) {
                pud_clear(pud);
                return 1;
        }
@@ -1601,7 +1587,7 @@ int pmd_set_huge(pmd_t *pmd, phys_addr_t addr, pgprot_t prot)
 
 int pmd_clear_huge(pmd_t *pmd)
 {
-       if (pmd_is_leaf(*pmd)) {
+       if (pmd_leaf(*pmd)) {
                pmd_clear(pmd);
                return 1;
        }
index fde7790277f75045576e706909a21e89e4111658..c110ab8fa8a35e8a2ad49b64636a13eca65a1b58 100644 (file)
@@ -393,17 +393,17 @@ static const __be32 *of_get_usable_memory(struct device_node *dn)
 int walk_drmem_lmbs(struct device_node *dn, void *data,
                    int (*func)(struct drmem_lmb *, const __be32 **, void *))
 {
+       struct device_node *root = of_find_node_by_path("/");
        const __be32 *prop, *usm;
        int ret = -ENODEV;
 
-       if (!of_root)
+       if (!root)
                return ret;
 
        /* Get the address & size cells */
-       of_node_get(of_root);
-       n_root_addr_cells = of_n_addr_cells(of_root);
-       n_root_size_cells = of_n_size_cells(of_root);
-       of_node_put(of_root);
+       n_root_addr_cells = of_n_addr_cells(root);
+       n_root_size_cells = of_n_size_cells(root);
+       of_node_put(root);
 
        if (init_drmem_lmb_size(dn))
                return ret;
index 0a540b37aab62c2b111d702aa35e769636d2f634..594a4b7b2ca2469f190c5b4d6fc62e57da2d3dbd 100644 (file)
@@ -226,7 +226,7 @@ static int __init pseries_alloc_bootmem_huge_page(struct hstate *hstate)
                return 0;
        m = phys_to_virt(gpage_freearray[--nr_gpages]);
        gpage_freearray[nr_gpages] = 0;
-       list_add(&m->list, &huge_boot_pages);
+       list_add(&m->list, &huge_boot_pages[0]);
        m->hstate = hstate;
        return 1;
 }
@@ -614,8 +614,6 @@ void __init gigantic_hugetlb_cma_reserve(void)
                 */
                order = mmu_psize_to_shift(MMU_PAGE_16G) - PAGE_SHIFT;
 
-       if (order) {
-               VM_WARN_ON(order <= MAX_PAGE_ORDER);
+       if (order)
                hugetlb_cma_reserve(order);
-       }
 }
index 72341b9fb5521ff6032d6b3998bb972714e6a1d0..8e84bc214d133c0b29cae908e048747cd5b2993b 100644 (file)
@@ -171,12 +171,6 @@ static inline void mmu_mark_rodata_ro(void) { }
 void __init mmu_mapin_immr(void);
 #endif
 
-#ifdef CONFIG_DEBUG_WX
-void ptdump_check_wx(void);
-#else
-static inline void ptdump_check_wx(void) { }
-#endif
-
 static inline bool debug_pagealloc_enabled_or_kfence(void)
 {
        return IS_ENABLED(CONFIG_KFENCE) || debug_pagealloc_enabled();
@@ -186,3 +180,5 @@ static inline bool debug_pagealloc_enabled_or_kfence(void)
 int create_section_mapping(unsigned long start, unsigned long end,
                           int nid, pgprot_t prot);
 #endif
+
+int hash__kernel_map_pages(struct page *page, int numpages, int enable);
index b4f2786a7d2b0be0ad3f064a89d79aad68999bdd..cdff129abb14466ede3907d834045ca13822b8b3 100644 (file)
@@ -13,7 +13,7 @@
 #include <linux/delay.h>
 #include <linux/memblock.h>
 #include <linux/libfdt.h>
-#include <linux/crash_core.h>
+#include <linux/crash_reserve.h>
 #include <linux/of.h>
 #include <linux/of_fdt.h>
 #include <asm/cacheflush.h>
@@ -173,7 +173,7 @@ static __init bool overlaps_region(const void *fdt, u32 start,
 
 static void __init get_crash_kernel(void *fdt, unsigned long size)
 {
-#ifdef CONFIG_CRASH_CORE
+#ifdef CONFIG_CRASH_RESERVE
        unsigned long long crash_size, crash_base;
        int ret;
 
index f6c4ace3b22197e990f47d3124f5ac800459f21d..a490724e84adbf74517bb965e7690657e205fa92 100644 (file)
@@ -1111,7 +1111,7 @@ static void __init setup_node_data(int nid, u64 start_pfn, u64 end_pfn)
 
 static void __init find_possible_nodes(void)
 {
-       struct device_node *rtas;
+       struct device_node *rtas, *root;
        const __be32 *domains = NULL;
        int prop_length, max_nodes;
        u32 i;
@@ -1132,10 +1132,12 @@ static void __init find_possible_nodes(void)
         * If the LPAR is migratable, new nodes might be activated after a LPM,
         * so we should consider the max number in that case.
         */
-       if (!of_get_property(of_root, "ibm,migratable-partition", NULL))
+       root = of_find_node_by_path("/");
+       if (!of_get_property(root, "ibm,migratable-partition", NULL))
                domains = of_get_property(rtas,
                                          "ibm,current-associativity-domains",
                                          &prop_length);
+       of_node_put(root);
        if (!domains) {
                domains = of_get_property(rtas, "ibm,max-associativity-domains",
                                        &prop_length);
index 6163e484bc6d4d5075385613a8788d18ce532cfb..ac22bf28086fac5c5973d29190a6adfe5caa50b8 100644 (file)
@@ -14,6 +14,7 @@
 #include <asm/page.h>
 #include <asm/pgtable.h>
 
+#include <mm/mmu_decl.h>
 
 static pte_basic_t pte_update_delta(pte_t *ptep, unsigned long addr,
                                    unsigned long old, unsigned long new)
@@ -38,6 +39,10 @@ static int change_page_attr(pte_t *ptep, unsigned long addr, void *data)
                /* Don't clear DIRTY bit */
                pte_update_delta(ptep, addr, _PAGE_KERNEL_RW & ~_PAGE_DIRTY, _PAGE_KERNEL_RO);
                break;
+       case SET_MEMORY_ROX:
+               /* Don't clear DIRTY bit */
+               pte_update_delta(ptep, addr, _PAGE_KERNEL_RW & ~_PAGE_DIRTY, _PAGE_KERNEL_ROX);
+               break;
        case SET_MEMORY_RW:
                pte_update_delta(ptep, addr, _PAGE_KERNEL_RO, _PAGE_KERNEL_RW);
                break;
@@ -97,3 +102,26 @@ int change_memory_attr(unsigned long addr, int numpages, long action)
        return apply_to_existing_page_range(&init_mm, start, size,
                                            change_page_attr, (void *)action);
 }
+
+#if defined(CONFIG_DEBUG_PAGEALLOC) || defined(CONFIG_KFENCE)
+#ifdef CONFIG_ARCH_SUPPORTS_DEBUG_PAGEALLOC
+void __kernel_map_pages(struct page *page, int numpages, int enable)
+{
+       int err;
+       unsigned long addr = (unsigned long)page_address(page);
+
+       if (PageHighMem(page))
+               return;
+
+       if (IS_ENABLED(CONFIG_PPC_BOOK3S_64) && !radix_enabled())
+               err = hash__kernel_map_pages(page, numpages, enable);
+       else if (enable)
+               err = set_memory_p(addr, numpages);
+       else
+               err = set_memory_np(addr, numpages);
+
+       if (err)
+               panic("%s: changing memory protections failed\n", __func__);
+}
+#endif
+#endif
index a04ae4449a0257ffcefcfa935e22ace25d128314..9e7ba9c3851fa59476ae48b59c88a91dd568fb3c 100644 (file)
@@ -220,10 +220,7 @@ void set_ptes(struct mm_struct *mm, unsigned long addr, pte_t *ptep,
                        break;
                ptep++;
                addr += PAGE_SIZE;
-               /*
-                * increment the pfn.
-                */
-               pte = pfn_pte(pte_pfn(pte) + 1, pte_pgprot((pte)));
+               pte = pte_next_pfn(pte);
        }
 }
 
@@ -413,7 +410,7 @@ pte_t *__find_linux_pte(pgd_t *pgdir, unsigned long ea,
        if (p4d_none(p4d))
                return NULL;
 
-       if (p4d_is_leaf(p4d)) {
+       if (p4d_leaf(p4d)) {
                ret_pte = (pte_t *)p4dp;
                goto out;
        }
@@ -435,7 +432,7 @@ pte_t *__find_linux_pte(pgd_t *pgdir, unsigned long ea,
        if (pud_none(pud))
                return NULL;
 
-       if (pud_is_leaf(pud)) {
+       if (pud_leaf(pud)) {
                ret_pte = (pte_t *)pudp;
                goto out;
        }
@@ -474,7 +471,7 @@ pte_t *__find_linux_pte(pgd_t *pgdir, unsigned long ea,
                goto out;
        }
 
-       if (pmd_is_leaf(pmd)) {
+       if (pmd_leaf(pmd)) {
                ret_pte = (pte_t *)pmdp;
                goto out;
        }
index 5c02fd08d61eff04253b5069a852f4da907348fe..face94977cb2fb406711b03b15486d6b64b90a46 100644 (file)
@@ -153,7 +153,6 @@ void mark_rodata_ro(void)
 
        if (v_block_mapped((unsigned long)_stext + 1)) {
                mmu_mark_rodata_ro();
-               ptdump_check_wx();
                return;
        }
 
@@ -166,23 +165,5 @@ void mark_rodata_ro(void)
                   PFN_DOWN((unsigned long)_stext);
 
        set_memory_ro((unsigned long)_stext, numpages);
-
-       // mark_initmem_nx() should have already run by now
-       ptdump_check_wx();
 }
 #endif
-
-#if defined(CONFIG_ARCH_SUPPORTS_DEBUG_PAGEALLOC) && defined(CONFIG_DEBUG_PAGEALLOC)
-void __kernel_map_pages(struct page *page, int numpages, int enable)
-{
-       unsigned long addr = (unsigned long)page_address(page);
-
-       if (PageHighMem(page))
-               return;
-
-       if (enable)
-               set_memory_p(addr, numpages);
-       else
-               set_memory_np(addr, numpages);
-}
-#endif /* CONFIG_DEBUG_PAGEALLOC */
index 5ac1fd30341bb29704bedabd63e43cc105e3f285..9b99113cb51a8f3df10c5ec103fa52fbb8ba8737 100644 (file)
@@ -100,7 +100,7 @@ EXPORT_SYMBOL(__pte_frag_size_shift);
 /* 4 level page table */
 struct page *p4d_page(p4d_t p4d)
 {
-       if (p4d_is_leaf(p4d)) {
+       if (p4d_leaf(p4d)) {
                if (!IS_ENABLED(CONFIG_HAVE_ARCH_HUGE_VMAP))
                        VM_WARN_ON(!p4d_huge(p4d));
                return pte_page(p4d_pte(p4d));
@@ -111,7 +111,7 @@ struct page *p4d_page(p4d_t p4d)
 
 struct page *pud_page(pud_t pud)
 {
-       if (pud_is_leaf(pud)) {
+       if (pud_leaf(pud)) {
                if (!IS_ENABLED(CONFIG_HAVE_ARCH_HUGE_VMAP))
                        VM_WARN_ON(!pud_huge(pud));
                return pte_page(pud_pte(pud));
@@ -125,14 +125,14 @@ struct page *pud_page(pud_t pud)
  */
 struct page *pmd_page(pmd_t pmd)
 {
-       if (pmd_is_leaf(pmd)) {
+       if (pmd_leaf(pmd)) {
                /*
                 * vmalloc_to_page may be called on any vmap address (not only
                 * vmalloc), and it uses pmd_page() etc., when huge vmap is
                 * enabled so these checks can't be used.
                 */
                if (!IS_ENABLED(CONFIG_HAVE_ARCH_HUGE_VMAP))
-                       VM_WARN_ON(!(pmd_large(pmd) || pmd_huge(pmd)));
+                       VM_WARN_ON(!(pmd_leaf(pmd) || pmd_huge(pmd)));
                return pte_page(pmd_pte(pmd));
        }
        return virt_to_page(pmd_page_vaddr(pmd));
@@ -150,9 +150,6 @@ void mark_rodata_ro(void)
                radix__mark_rodata_ro();
        else
                hash__mark_rodata_ro();
-
-       // mark_initmem_nx() should have already run by now
-       ptdump_check_wx();
 }
 
 void mark_initmem_nx(void)
index 2313053fe679ed34f12c75c738b809dc115529ab..9dc239967b77f79d8e47b8c3cc476d307d4b17a3 100644 (file)
@@ -184,13 +184,14 @@ static void note_prot_wx(struct pg_state *st, unsigned long addr)
 {
        pte_t pte = __pte(st->current_flags);
 
-       if (!IS_ENABLED(CONFIG_DEBUG_WX) || !st->check_wx)
+       if (!st->check_wx)
                return;
 
        if (!pte_write(pte) || !pte_exec(pte))
                return;
 
-       WARN_ONCE(1, "powerpc/mm: Found insecure W+X mapping at address %p/%pS\n",
+       WARN_ONCE(IS_ENABLED(CONFIG_DEBUG_WX),
+                 "powerpc/mm: Found insecure W+X mapping at address %p/%pS\n",
                  (void *)st->start_address, (void *)st->start_address);
 
        st->wx_pages += (addr - st->start_address) / PAGE_SIZE;
@@ -326,8 +327,7 @@ static void __init build_pgtable_complete_mask(void)
                                pg_level[i].mask |= pg_level[i].flag[j].mask;
 }
 
-#ifdef CONFIG_DEBUG_WX
-void ptdump_check_wx(void)
+bool ptdump_check_wx(void)
 {
        struct pg_state st = {
                .seq = NULL,
@@ -343,15 +343,22 @@ void ptdump_check_wx(void)
                }
        };
 
+       if (IS_ENABLED(CONFIG_PPC_BOOK3S_64) && !mmu_has_feature(MMU_FTR_KERNEL_RO))
+               return true;
+
        ptdump_walk_pgd(&st.ptdump, &init_mm, NULL);
 
-       if (st.wx_pages)
+       if (st.wx_pages) {
                pr_warn("Checked W+X mappings: FAILED, %lu W+X pages found\n",
                        st.wx_pages);
-       else
+
+               return false;
+       } else {
                pr_info("Checked W+X mappings: passed, no W+X pages found\n");
+
+               return true;
+       }
 }
-#endif
 
 static int __init ptdump_init(void)
 {
index b7ff680cde9649a36350743d2ffe771564f4225b..6b5f8a94e7d890be4f8dc7fe8a4eac63125e30cf 100644 (file)
@@ -256,7 +256,7 @@ static bool regs_sipr(struct pt_regs *regs)
 
 static inline u32 perf_flags_from_msr(struct pt_regs *regs)
 {
-       if (regs->msr & MSR_PR)
+       if (user_mode(regs))
                return PERF_RECORD_MISC_USER;
        if ((regs->msr & MSR_HV) && freeze_events_kernel != MMCR0_FCHV)
                return PERF_RECORD_MISC_HYPERVISOR;
@@ -2593,6 +2593,8 @@ static int __init init_ppc64_pmu(void)
                return 0;
        else if (!init_power10_pmu())
                return 0;
+       else if (!init_power11_pmu())
+               return 0;
        else if (!init_ppc970_pmu())
                return 0;
        else
index 27f18119fda17474bb883a8e03f894239680ee1a..241551d1282f80e9a258c58d849af9443f585d39 100644 (file)
@@ -695,6 +695,20 @@ static unsigned long single_gpci_request(u32 req, u32 starting_index,
 
        ret = plpar_hcall_norets(H_GET_PERF_COUNTER_INFO,
                        virt_to_phys(arg), HGPCI_REQ_BUFFER_SIZE);
+
+       /*
+        * ret value as 'H_PARAMETER' with detail_rc as 'GEN_BUF_TOO_SMALL',
+        * specifies that the current buffer size cannot accommodate
+        * all the information and a partial buffer returned.
+        * Since in this function we are only accessing data for a given starting index,
+        * we don't need to accommodate whole data and can get required count by
+        * accessing first entry data.
+        * Hence hcall fails only incase the ret value is other than H_SUCCESS or
+        * H_PARAMETER with detail_rc value as GEN_BUF_TOO_SMALL(0x1B).
+        */
+       if (ret == H_PARAMETER && be32_to_cpu(arg->params.detail_rc) == 0x1B)
+               ret = 0;
+
        if (ret) {
                pr_devel("hcall failed: 0x%lx\n", ret);
                goto out;
@@ -759,6 +773,7 @@ static int h_gpci_event_init(struct perf_event *event)
 {
        u64 count;
        u8 length;
+       unsigned long ret;
 
        /* Not our event */
        if (event->attr.type != event->pmu->type)
@@ -789,13 +804,23 @@ static int h_gpci_event_init(struct perf_event *event)
        }
 
        /* check if the request works... */
-       if (single_gpci_request(event_get_request(event),
+       ret = single_gpci_request(event_get_request(event),
                                event_get_starting_index(event),
                                event_get_secondary_index(event),
                                event_get_counter_info_version(event),
                                event_get_offset(event),
                                length,
-                               &count)) {
+                               &count);
+
+       /*
+        * ret value as H_AUTHORITY implies that partition is not permitted to retrieve
+        * performance information, and required to set
+        * "Enable Performance Information Collection" option.
+        */
+       if (ret == H_AUTHORITY)
+               return -EPERM;
+
+       if (ret) {
                pr_devel("gpci hcall failed\n");
                return -EINVAL;
        }
index 4c18b5504326d3f8270b6f1f219d26badb81686b..a70ac471a5a5d6b8b36b8dbb068e375b5989a609 100644 (file)
@@ -10,4 +10,5 @@ int __init init_power7_pmu(void);
 int __init init_power8_pmu(void);
 int __init init_power9_pmu(void);
 int __init init_power10_pmu(void);
+int __init init_power11_pmu(void);
 int __init init_generic_compat_pmu(void);
index 9b5133e361a76b773d0ab1a2941be4dccc582ea0..62a68b6b2d4b12e5f7efadc5e85c27e0afdc6634 100644 (file)
@@ -634,3 +634,30 @@ int __init init_power10_pmu(void)
 
        return 0;
 }
+
+static struct power_pmu power11_pmu;
+
+int __init init_power11_pmu(void)
+{
+       unsigned int pvr;
+       int rc;
+
+       pvr = mfspr(SPRN_PVR);
+       if (PVR_VER(pvr) != PVR_POWER11)
+               return -ENODEV;
+
+       /* Set the PERF_REG_EXTENDED_MASK here */
+       PERF_REG_EXTENDED_MASK = PERF_REG_PMU_MASK_31;
+
+       power11_pmu = power10_pmu;
+       power11_pmu.name = "Power11";
+
+       rc = register_power_pmu(&power11_pmu);
+       if (rc)
+               return rc;
+
+       /* Tell userspace that EBB is supported */
+       cur_cpu_spec->cpu_user_features2 |= PPC_FEATURE2_EBB;
+
+       return 0;
+}
index e454e9d2eff17b575a113e8422494a9e4e17230f..294ab27285889adf42c5206ba0fb5560c195f98b 100644 (file)
@@ -59,16 +59,13 @@ static const char * const board[] __initconst = {
 
 static int __init ppc40x_probe(void)
 {
-       if (of_device_compatible_match(of_root, board)) {
-               pci_set_flags(PCI_REASSIGN_ALL_RSRC);
-               return 1;
-       }
-
-       return 0;
+       pci_set_flags(PCI_REASSIGN_ALL_RSRC);
+       return 1;
 }
 
 define_machine(ppc40x_simple) {
        .name = "PowerPC 40x Platform",
+       .compatibles = board,
        .probe = ppc40x_probe,
        .progress = udbg_progress,
        .init_IRQ = uic_init_tree,
index bf0188dcb9184d2fdeb694d7eaca3bc22394483b..a5001d32f978d72e11183739f77462b5904cdb27 100644 (file)
@@ -8,6 +8,7 @@
 #include <linux/err.h>
 #include <linux/init.h>
 #include <linux/of_platform.h>
+#include <linux/platform_device.h>
 #include <linux/kthread.h>
 #include <linux/leds.h>
 #include <linux/i2c.h>
index 0d58ab257cd93acf22c7a548d67b2258e1d2fddc..d4fa6c302ccfb0ae77ed9b222878f049cc29d9e3 100644 (file)
@@ -32,9 +32,6 @@ static const char * const board[] __initconst = {
  */
 static int __init mpc512x_generic_probe(void)
 {
-       if (!of_device_compatible_match(of_root, board))
-               return 0;
-
        mpc512x_init_early();
 
        return 1;
@@ -42,6 +39,7 @@ static int __init mpc512x_generic_probe(void)
 
 define_machine(mpc512x_generic) {
        .name                   = "MPC512x generic",
+       .compatibles            = board,
        .probe                  = mpc512x_generic_probe,
        .init                   = mpc512x_init,
        .setup_arch             = mpc512x_setup_arch,
index aa82e6b437f31a5c6c9a34c4355660e8e3ced5b8..37a67120f257ca6013d5fce43f2840181ad0a945 100644 (file)
@@ -195,8 +195,10 @@ static void __init efika_setup_arch(void)
 
 static int __init efika_probe(void)
 {
-       const char *model = of_get_property(of_root, "model", NULL);
+       struct device_node *root = of_find_node_by_path("/");
+       const char *model = of_get_property(root, "model", NULL);
 
+       of_node_put(root);
        if (model == NULL)
                return 0;
        if (strcmp(model, "EFIKA5K2"))
index 0fd67b3ffc3e83999cfc46387a0d7aed5513d3e9..0a161d82a3a8772f26fedc1fef183f811d53f13a 100644 (file)
@@ -172,17 +172,9 @@ static const char * const board[] __initconst = {
        NULL,
 };
 
-/*
- * Called very early, MMU is off, device-tree isn't unflattened
- */
-static int __init lite5200_probe(void)
-{
-       return of_device_compatible_match(of_root, board);
-}
-
 define_machine(lite5200) {
        .name           = "lite5200",
-       .probe          = lite5200_probe,
+       .compatibles    = board,
        .setup_arch     = lite5200_setup_arch,
        .discover_phbs  = mpc52xx_setup_pci,
        .init           = mpc52xx_declare_of_platform_devices,
index f1e85e86f5e57620621db9b81e9b0f793649e77b..7e0e4c34a40be17e8cf4061faaab001c293c04f1 100644 (file)
@@ -59,17 +59,9 @@ static const char *board[] __initdata = {
        NULL
 };
 
-/*
- * Called very early, MMU is off, device-tree isn't unflattened
- */
-static int __init mpc5200_simple_probe(void)
-{
-       return of_device_compatible_match(of_root, board);
-}
-
 define_machine(mpc5200_simple_platform) {
        .name           = "mpc5200-simple-platform",
-       .probe          = mpc5200_simple_probe,
+       .compatibles    = board,
        .setup_arch     = mpc5200_simple_setup_arch,
        .discover_phbs  = mpc52xx_setup_pci,
        .init           = mpc52xx_declare_of_platform_devices,
index 534bb227480d25dd09a81166a818467db6db1ba2..63b6d213726a6d6ee7ca0589dcadad8de1866f93 100644 (file)
@@ -34,19 +34,11 @@ static const char *board[] __initdata = {
        NULL
 };
 
-/*
- * Called very early, MMU is off, device-tree isn't unflattened
- */
-static int __init mpc830x_rdb_probe(void)
-{
-       return of_device_compatible_match(of_root, board);
-}
-
 machine_device_initcall(mpc830x_rdb, mpc83xx_declare_of_platform_devices);
 
 define_machine(mpc830x_rdb) {
        .name                   = "MPC830x RDB",
-       .probe                  = mpc830x_rdb_probe,
+       .compatibles            = board,
        .setup_arch             = mpc830x_rdb_setup_arch,
        .discover_phbs          = mpc83xx_setup_pci,
        .init_IRQ               = mpc83xx_ipic_init_IRQ,
index 7b901ab3b864632b79e2185d6d0f40f7fa923cec..5c39966762e4264d2ef91b2c4ef75fdf2c2c5d65 100644 (file)
@@ -34,19 +34,11 @@ static const char *board[] __initdata = {
        NULL
 };
 
-/*
- * Called very early, MMU is off, device-tree isn't unflattened
- */
-static int __init mpc831x_rdb_probe(void)
-{
-       return of_device_compatible_match(of_root, board);
-}
-
 machine_device_initcall(mpc831x_rdb, mpc83xx_declare_of_platform_devices);
 
 define_machine(mpc831x_rdb) {
        .name                   = "MPC831x RDB",
-       .probe                  = mpc831x_rdb_probe,
+       .compatibles            = board,
        .setup_arch             = mpc831x_rdb_setup_arch,
        .discover_phbs          = mpc83xx_setup_pci,
        .init_IRQ               = mpc83xx_ipic_init_IRQ,
index 39e78018dd0b7fe993b5e91f61f40a9187c1673f..45823e14793311d679530ce72d93b6e204c70992 100644 (file)
@@ -61,17 +61,9 @@ static const char * const board[] __initconst = {
        NULL
 };
 
-/*
- * Called very early, MMU is off, device-tree isn't unflattened
- */
-static int __init mpc837x_rdb_probe(void)
-{
-       return of_device_compatible_match(of_root, board);
-}
-
 define_machine(mpc837x_rdb) {
        .name                   = "MPC837x RDB/WLAN",
-       .probe                  = mpc837x_rdb_probe,
+       .compatibles            = board,
        .setup_arch             = mpc837x_rdb_setup_arch,
        .discover_phbs          = mpc83xx_setup_pci,
        .init_IRQ               = mpc83xx_ipic_init_IRQ,
index c9664e46b03d701901107870db4ddc612b381c72..99bd4355f28e60a209c4c3c1367c599ec3295842 100644 (file)
@@ -206,7 +206,8 @@ static int mpc83xx_suspend_enter(suspend_state_t state)
                out_be32(&pmc_regs->config1,
                         in_be32(&pmc_regs->config1) | PMCCR1_POWER_OFF);
 
-               enable_kernel_fp();
+               if (IS_ENABLED(CONFIG_PPC_FPU))
+                       enable_kernel_fp();
 
                mpc83xx_enter_deep_sleep(immrbase);
 
index 2eb62bff86d48f69ff4612f8e7e46c8d642da5df..3ad8096fcf16aa7419477d161cd9b89b2e5a2690 100644 (file)
@@ -19,7 +19,7 @@
 #include "mpc85xx.h"
 #include "smp.h"
 
-void __init bsc913x_qds_pic_init(void)
+static void __init bsc913x_qds_pic_init(void)
 {
        struct mpic *mpic = mpic_alloc(NULL, 0, MPIC_BIG_ENDIAN |
          MPIC_SINGLE_DEST_CPU,
index 161f006cb3bb5d129b3b0e3757a043a554bd5a9f..dcd358c28201ff86e688a5ee51f6dfa4b0209f8d 100644 (file)
@@ -15,7 +15,7 @@
 
 #include "mpc85xx.h"
 
-void __init bsc913x_rdb_pic_init(void)
+static void __init bsc913x_rdb_pic_init(void)
 {
        struct mpic *mpic = mpic_alloc(NULL, 0, MPIC_BIG_ENDIAN |
          MPIC_SINGLE_DEST_CPU,
index 645fcca77cde56c3773175dbd947c0c2f635f0ab..c44400e95f5514a9307ce4d00f3663e241215bec 100644 (file)
@@ -149,7 +149,7 @@ static int __init corenet_generic_probe(void)
        extern struct smp_ops_t smp_85xx_ops;
 #endif
 
-       if (of_device_compatible_match(of_root, boards))
+       if (of_machine_compatible_match(boards))
                return 1;
 
        /* Check if we're running under the Freescale hypervisor */
index 9c3b44a1952ec4263c8e5250048bbe0511c4fe3f..477852f1a7268f817c17f37bf4e32b46457ba0ec 100644 (file)
@@ -38,7 +38,7 @@
 
 void __iomem *imp3a_regs;
 
-void __init ge_imp3a_pic_init(void)
+static void __init ge_imp3a_pic_init(void)
 {
        struct mpic *mpic;
        struct device_node *np;
index 751395cbf022270f27f77c24e2bec403878ac379..34ce21f42623f525d901c3269d5b00f2ebac19fd 100644 (file)
@@ -114,7 +114,7 @@ static int gpio_halt_probe(struct platform_device *pdev)
        return ret;
 }
 
-static int gpio_halt_remove(struct platform_device *pdev)
+static void gpio_halt_remove(struct platform_device *pdev)
 {
        free_irq(halt_irq, pdev);
        cancel_work_sync(&gpio_halt_wq);
@@ -124,8 +124,6 @@ static int gpio_halt_remove(struct platform_device *pdev)
 
        gpiod_put(halt_gpio);
        halt_gpio = NULL;
-
-       return 0;
 }
 
 static const struct of_device_id gpio_halt_match[] = {
@@ -145,7 +143,7 @@ static struct platform_driver gpio_halt_driver = {
                .of_match_table = gpio_halt_match,
        },
        .probe          = gpio_halt_probe,
-       .remove         = gpio_halt_remove,
+       .remove_new     = gpio_halt_remove,
 };
 
 module_platform_driver(gpio_halt_driver);
index 6be1b9809db6b5c47aa68da49263ae7c4deac13c..f74d446c53f085c6521ecef889a24935ae55f09d 100644 (file)
@@ -112,17 +112,9 @@ static const char * const board[] __initconst = {
        NULL
 };
 
-/*
- * Called very early, device-tree isn't unflattened
- */
-static int __init tqm85xx_probe(void)
-{
-       return of_device_compatible_match(of_root, board);
-}
-
 define_machine(tqm85xx) {
        .name                   = "TQM85xx",
-       .probe                  = tqm85xx_probe,
+       .compatibles            = board,
        .setup_arch             = tqm85xx_setup_arch,
        .init_IRQ               = tqm85xx_pic_init,
        .show_cpuinfo           = tqm85xx_show_cpuinfo,
index 6c6e714a7521a4f4d8e9968536134541942ee1b6..2c8dc08869124c57ed646996b28dee4e60e76aa4 100644 (file)
@@ -25,7 +25,7 @@
 
 extern void __flush_disable_L1(void);
 
-void amigaone_show_cpuinfo(struct seq_file *m)
+static void amigaone_show_cpuinfo(struct seq_file *m)
 {
        seq_printf(m, "vendor\t\t: Eyetech Ltd.\n");
 }
@@ -65,7 +65,7 @@ static int __init amigaone_add_bridge(struct device_node *dev)
        return 0;
 }
 
-void __init amigaone_setup_arch(void)
+static void __init amigaone_setup_arch(void)
 {
        if (ppc_md.progress)
                ppc_md.progress("Linux/PPC "UTS_RELEASE"\n", 0);
@@ -83,7 +83,7 @@ static void __init amigaone_discover_phbs(void)
        BUG_ON(phb != 0);
 }
 
-void __init amigaone_init_IRQ(void)
+static void __init amigaone_init_IRQ(void)
 {
        struct device_node *pic, *np = NULL;
        const unsigned long *prop = NULL;
@@ -123,7 +123,7 @@ static int __init request_isa_regions(void)
 }
 machine_device_initcall(amigaone, request_isa_regions);
 
-void __noreturn amigaone_restart(char *cmd)
+static void __noreturn amigaone_restart(char *cmd)
 {
        local_irq_disable();
 
index 9c10aac40c7b11ec8812a1f2243ee91801868d88..e265f026eee2a98a813a9263f97215df59a6f101 100644 (file)
@@ -99,9 +99,6 @@ static void __init linkstation_init_IRQ(void)
        mpic_init(mpic);
 }
 
-extern void avr_uart_configure(void);
-extern void avr_uart_send(const char);
-
 static void __noreturn linkstation_restart(char *cmd)
 {
        local_irq_disable();
index 5ad12023e56280171238e13cf5136f67df1cc0bc..ebc258fa4858d023bd528d6015e64a2df465166c 100644 (file)
@@ -156,4 +156,7 @@ int mpc10x_disable_store_gathering(struct pci_controller *hose);
 /* For MPC107 boards that use the built-in openpic */
 void mpc10x_set_openpic(void);
 
+void avr_uart_configure(void);
+void avr_uart_send(const char c);
+
 #endif /* __PPC_KERNEL_MPC10X_H */
index fd130fe7a65acd35b8fe9f908e5529b8218055b4..4e983af3294922c415140a1f4101c3f5001303ef 100644 (file)
@@ -260,7 +260,7 @@ out:
 }
 
 
-static int gpio_mdio_remove(struct platform_device *dev)
+static void gpio_mdio_remove(struct platform_device *dev)
 {
        struct mii_bus *bus = dev_get_drvdata(&dev->dev);
 
@@ -271,8 +271,6 @@ static int gpio_mdio_remove(struct platform_device *dev)
        kfree(bus->priv);
        bus->priv = NULL;
        mdiobus_free(bus);
-
-       return 0;
 }
 
 static const struct of_device_id gpio_mdio_match[] =
@@ -287,7 +285,7 @@ MODULE_DEVICE_TABLE(of, gpio_mdio_match);
 static struct platform_driver gpio_mdio_driver =
 {
        .probe          = gpio_mdio_probe,
-       .remove         = gpio_mdio_remove,
+       .remove_new     = gpio_mdio_remove,
        .driver = {
                .name = "gpio-mdio-bitbang",
                .of_match_table = gpio_mdio_match,
index f27d314147373294b6f476980014275edbdaa32c..60f990a336c470224475af73b1ae258d375994bd 100644 (file)
@@ -270,16 +270,18 @@ static int __init pas_add_bridge(struct device_node *dev)
 
 void __init pas_pci_init(void)
 {
+       struct device_node *root = of_find_node_by_path("/");
        struct device_node *np;
        int res;
 
        pci_set_flags(PCI_SCAN_ALL_PCIE_DEVS);
 
-       np = of_find_compatible_node(of_root, NULL, "pasemi,rootbus");
+       np = of_find_compatible_node(root, NULL, "pasemi,rootbus");
        if (np) {
                res = pas_add_bridge(np);
                of_node_put(np);
        }
+       of_node_put(root);
 }
 
 void __iomem *__init pasemi_pci_getcfgaddr(struct pci_dev *dev, int offset)
index 8bdae0caf21e5f14c4b0d38fe7cd15697ad9c811..84f101ec53a96f2ef41ca8081919f55ebf04a796 100644 (file)
@@ -2,7 +2,7 @@
 config PPC_PMAC
        bool "Apple PowerMac based machines"
        depends on PPC_BOOK3S && CPU_BIG_ENDIAN
-       select ADB_CUDA if POWER_RESET && PPC32
+       select ADB_CUDA if POWER_RESET && ADB
        select MPIC
        select FORCE_PCI
        select PPC_INDIRECT_PCI if PPC32
index 81c9fbae88b140253c9551e451b8a71645e00953..2cc257f75c50f66dbce3e3dd73ca6b69dd87bab4 100644 (file)
@@ -2333,7 +2333,6 @@ static struct pmac_mb_def pmac_mb_defs[] = {
                PMAC_TYPE_POWERMAC_G5,          g5_features,
                0,
        },
-#ifdef CONFIG_PPC64
        {       "PowerMac7,3",                  "PowerMac G5",
                PMAC_TYPE_POWERMAC_G5,          g5_features,
                0,
@@ -2359,7 +2358,6 @@ static struct pmac_mb_def pmac_mb_defs[] = {
                0,
        },
 #endif /* CONFIG_PPC64 */
-#endif /* CONFIG_PPC64 */
 };
 
 /*
index bb7657115f1d276372d53b6edea9be4b89c7ce4b..c9a9b759cc928b931a0ac18f7ed7469a6ecd52b5 100644 (file)
@@ -16,7 +16,7 @@
 #include <linux/kobject.h>
 #include <linux/sysfs.h>
 #include <linux/slab.h>
-#include <linux/crash_core.h>
+#include <linux/vmcore_info.h>
 #include <linux/of.h>
 
 #include <asm/page.h>
index b66b06efcef1e1001f27ad556ba3880678dee947..24f04f20d3e85c58a167fd6c639e72f09677f34a 100644 (file)
@@ -425,12 +425,11 @@ static int opal_prd_probe(struct platform_device *pdev)
        return 0;
 }
 
-static int opal_prd_remove(struct platform_device *pdev)
+static void opal_prd_remove(struct platform_device *pdev)
 {
        misc_deregister(&opal_prd_dev);
        opal_message_notifier_unregister(OPAL_MSG_PRD, &opal_prd_event_nb);
        opal_message_notifier_unregister(OPAL_MSG_PRD2, &opal_prd_event_nb2);
-       return 0;
 }
 
 static const struct of_device_id opal_prd_match[] = {
@@ -444,7 +443,7 @@ static struct platform_driver opal_prd_driver = {
                .of_match_table = opal_prd_match,
        },
        .probe  = opal_prd_probe,
-       .remove = opal_prd_remove,
+       .remove_new = opal_prd_remove,
 };
 
 module_platform_driver(opal_prd_driver);
index 509e30ad01bb430319bf8ebc6d5ffc9bb9146669..e8ab3d6b03bd28425de8a89f12854ea2b227d5ec 100644 (file)
@@ -9,6 +9,7 @@
 
 #include <asm/processor.h>
 #include <asm/ppc_asm.h>
+#include <asm/ptrace.h>
 
 #define lv1call .long 0x44000022; extsw r3, r3
 
 _GLOBAL(_##API_NAME)                           \
                                                \
        mflr    r0;                             \
-       std     r0, 16(r1);                     \
+       std     r0, LRSAVE(r1);                 \
                                                \
+       stdu    r1, -STACK_FRAME_MIN_SIZE(r1);  \
        li      r11, API_NUMBER;                \
        lv1call;                                \
+       addi    r1, r1, STACK_FRAME_MIN_SIZE;   \
                                                \
-       ld      r0, 16(r1);                     \
+       ld      r0, LRSAVE(r1);                 \
        mtlr    r0;                             \
        blr
 
@@ -38,18 +41,19 @@ _GLOBAL(_##API_NAME)                                \
 _GLOBAL(_##API_NAME)                           \
                                                \
        mflr    r0;                             \
-       std     r0, 16(r1);                     \
+       std     r0, LRSAVE(r1);                 \
                                                \
-       stdu    r3, -8(r1);                     \
+       std     r3, -8(r1);                     \
+       stdu    r1, -STACK_FRAME_MIN_SIZE-8(r1); \
                                                \
        li      r11, API_NUMBER;                \
        lv1call;                                \
                                                \
-       addi    r1, r1, 8;                      \
+       addi    r1, r1, STACK_FRAME_MIN_SIZE+8; \
        ld      r11, -8(r1);                    \
        std     r4, 0(r11);                     \
                                                \
-       ld      r0, 16(r1);                     \
+       ld      r0, LRSAVE(r1);                 \
        mtlr    r0;                             \
        blr
 
@@ -57,21 +61,22 @@ _GLOBAL(_##API_NAME)                                \
 _GLOBAL(_##API_NAME)                           \
                                                \
        mflr    r0;                             \
-       std     r0, 16(r1);                     \
+       std     r0, LRSAVE(r1);                 \
                                                \
        std     r3, -8(r1);                     \
-       stdu    r4, -16(r1);                    \
+       std     r4, -16(r1);                    \
+       stdu    r1, -STACK_FRAME_MIN_SIZE-16(r1); \
                                                \
        li      r11, API_NUMBER;                \
        lv1call;                                \
                                                \
-       addi    r1, r1, 16;                     \
+       addi    r1, r1, STACK_FRAME_MIN_SIZE+16; \
        ld      r11, -8(r1);                    \
        std     r4, 0(r11);                     \
        ld      r11, -16(r1);                   \
        std     r5, 0(r11);                     \
                                                \
-       ld      r0, 16(r1);                     \
+       ld      r0, LRSAVE(r1);                 \
        mtlr    r0;                             \
        blr
 
@@ -79,16 +84,17 @@ _GLOBAL(_##API_NAME)                                \
 _GLOBAL(_##API_NAME)                           \
                                                \
        mflr    r0;                             \
-       std     r0, 16(r1);                     \
+       std     r0, LRSAVE(r1);                 \
                                                \
        std     r3, -8(r1);                     \
        std     r4, -16(r1);                    \
-       stdu    r5, -24(r1);                    \
+       std     r5, -24(r1);                    \
+       stdu    r1, -STACK_FRAME_MIN_SIZE-24(r1); \
                                                \
        li      r11, API_NUMBER;                \
        lv1call;                                \
                                                \
-       addi    r1, r1, 24;                     \
+       addi    r1, r1, STACK_FRAME_MIN_SIZE+24; \
        ld      r11, -8(r1);                    \
        std     r4, 0(r11);                     \
        ld      r11, -16(r1);                   \
@@ -96,7 +102,7 @@ _GLOBAL(_##API_NAME)                         \
        ld      r11, -24(r1);                   \
        std     r6, 0(r11);                     \
                                                \
-       ld      r0, 16(r1);                     \
+       ld      r0, LRSAVE(r1);                 \
        mtlr    r0;                             \
        blr
 
@@ -104,7 +110,7 @@ _GLOBAL(_##API_NAME)                                \
 _GLOBAL(_##API_NAME)                           \
                                                \
        mflr    r0;                             \
-       std     r0, 16(r1);                     \
+       std     r0, LRSAVE(r1);                 \
                                                \
        std     r3, -8(r1);                     \
        std     r4, -16(r1);                    \
@@ -112,12 +118,13 @@ _GLOBAL(_##API_NAME)                              \
        std     r6, -32(r1);                    \
        std     r7, -40(r1);                    \
        std     r8, -48(r1);                    \
-       stdu    r9, -56(r1);                    \
+       std     r9, -56(r1);                    \
+       stdu    r1, -STACK_FRAME_MIN_SIZE-56(r1); \
                                                \
        li      r11, API_NUMBER;                \
        lv1call;                                \
                                                \
-       addi    r1, r1, 56;                     \
+       addi    r1, r1, STACK_FRAME_MIN_SIZE+56; \
        ld      r11, -8(r1);                    \
        std     r4, 0(r11);                     \
        ld      r11, -16(r1);                   \
@@ -133,7 +140,7 @@ _GLOBAL(_##API_NAME)                                \
        ld      r11, -56(r1);                   \
        std     r10, 0(r11);                    \
                                                \
-       ld      r0, 16(r1);                     \
+       ld      r0, LRSAVE(r1);                 \
        mtlr    r0;                             \
        blr
 
@@ -141,18 +148,19 @@ _GLOBAL(_##API_NAME)                              \
 _GLOBAL(_##API_NAME)                           \
                                                \
        mflr    r0;                             \
-       std     r0, 16(r1);                     \
+       std     r0, LRSAVE(r1);                 \
                                                \
-       stdu    r4, -8(r1);                     \
+       std     r4, -8(r1);                     \
+       stdu    r1, -STACK_FRAME_MIN_SIZE-8(r1); \
                                                \
        li      r11, API_NUMBER;                \
        lv1call;                                \
                                                \
-       addi    r1, r1, 8;                      \
+       addi    r1, r1, STACK_FRAME_MIN_SIZE+8; \
        ld      r11, -8(r1);                    \
        std     r4, 0(r11);                     \
                                                \
-       ld      r0, 16(r1);                     \
+       ld      r0, LRSAVE(r1);                 \
        mtlr    r0;                             \
        blr
 
@@ -160,21 +168,22 @@ _GLOBAL(_##API_NAME)                              \
 _GLOBAL(_##API_NAME)                           \
                                                \
        mflr    r0;                             \
-       std     r0, 16(r1);                     \
+       std     r0, LRSAVE(r1);                 \
                                                \
        std     r4, -8(r1);                     \
-       stdu    r5, -16(r1);                    \
+       std     r5, -16(r1);                    \
+       stdu    r1, -STACK_FRAME_MIN_SIZE-16(r1); \
                                                \
        li      r11, API_NUMBER;                \
        lv1call;                                \
                                                \
-       addi    r1, r1, 16;                     \
+       addi    r1, r1, STACK_FRAME_MIN_SIZE+16; \
        ld      r11, -8(r1);                    \
        std     r4, 0(r11);                     \
        ld      r11, -16(r1);                   \
        std     r5, 0(r11);                     \
                                                \
-       ld      r0, 16(r1);                     \
+       ld      r0, LRSAVE(r1);                 \
        mtlr    r0;                             \
        blr
 
@@ -182,16 +191,17 @@ _GLOBAL(_##API_NAME)                              \
 _GLOBAL(_##API_NAME)                           \
                                                \
        mflr    r0;                             \
-       std     r0, 16(r1);                     \
+       std     r0, LRSAVE(r1);                 \
                                                \
        std     r4, -8(r1);                     \
        std     r5, -16(r1);                    \
-       stdu    r6, -24(r1);                    \
+       std     r6, -24(r1);                    \
+       stdu    r1, -STACK_FRAME_MIN_SIZE-24(r1); \
                                                \
        li      r11, API_NUMBER;                \
        lv1call;                                \
                                                \
-       addi    r1, r1, 24;                     \
+       addi    r1, r1, STACK_FRAME_MIN_SIZE+24; \
        ld      r11, -8(r1);                    \
        std     r4, 0(r11);                     \
        ld      r11, -16(r1);                   \
@@ -199,7 +209,7 @@ _GLOBAL(_##API_NAME)                                \
        ld      r11, -24(r1);                   \
        std     r6, 0(r11);                     \
                                                \
-       ld      r0, 16(r1);                     \
+       ld      r0, LRSAVE(r1);                 \
        mtlr    r0;                             \
        blr
 
@@ -207,17 +217,18 @@ _GLOBAL(_##API_NAME)                              \
 _GLOBAL(_##API_NAME)                           \
                                                \
        mflr    r0;                             \
-       std     r0, 16(r1);                     \
+       std     r0, LRSAVE(r1);                 \
                                                \
        std     r4, -8(r1);                     \
        std     r5, -16(r1);                    \
        std     r6, -24(r1);                    \
-       stdu    r7, -32(r1);                    \
+       std     r7, -32(r1);                    \
+       stdu    r1, -STACK_FRAME_MIN_SIZE-32(r1); \
                                                \
        li      r11, API_NUMBER;                \
        lv1call;                                \
                                                \
-       addi    r1, r1, 32;                     \
+       addi    r1, r1, STACK_FRAME_MIN_SIZE+32; \
        ld      r11, -8(r1);                    \
        std     r4, 0(r11);                     \
        ld      r11, -16(r1);                   \
@@ -227,7 +238,7 @@ _GLOBAL(_##API_NAME)                                \
        ld      r11, -32(r1);                   \
        std     r7, 0(r11);                     \
                                                \
-       ld      r0, 16(r1);                     \
+       ld      r0, LRSAVE(r1);                 \
        mtlr    r0;                             \
        blr
 
@@ -235,18 +246,19 @@ _GLOBAL(_##API_NAME)                              \
 _GLOBAL(_##API_NAME)                           \
                                                \
        mflr    r0;                             \
-       std     r0, 16(r1);                     \
+       std     r0, LRSAVE(r1);                 \
                                                \
        std     r4, -8(r1);                     \
        std     r5, -16(r1);                    \
        std     r6, -24(r1);                    \
        std     r7, -32(r1);                    \
-       stdu    r8, -40(r1);                    \
+       std     r8, -40(r1);                    \
+       stdu    r1, -STACK_FRAME_MIN_SIZE-40(r1); \
                                                \
        li      r11, API_NUMBER;                \
        lv1call;                                \
                                                \
-       addi    r1, r1, 40;                     \
+       addi    r1, r1, STACK_FRAME_MIN_SIZE+40; \
        ld      r11, -8(r1);                    \
        std     r4, 0(r11);                     \
        ld      r11, -16(r1);                   \
@@ -258,7 +270,7 @@ _GLOBAL(_##API_NAME)                                \
        ld      r11, -40(r1);                   \
        std     r8, 0(r11);                     \
                                                \
-       ld      r0, 16(r1);                     \
+       ld      r0, LRSAVE(r1);                 \
        mtlr    r0;                             \
        blr
 
@@ -266,19 +278,20 @@ _GLOBAL(_##API_NAME)                              \
 _GLOBAL(_##API_NAME)                           \
                                                \
        mflr    r0;                             \
-       std     r0, 16(r1);                     \
+       std     r0, LRSAVE(r1);                 \
                                                \
        std     r4, -8(r1);                     \
        std     r5, -16(r1);                    \
        std     r6, -24(r1);                    \
        std     r7, -32(r1);                    \
        std     r8, -40(r1);                    \
-       stdu    r9, -48(r1);                    \
+       std     r9, -48(r1);                    \
+       stdu    r1, -STACK_FRAME_MIN_SIZE-48(r1); \
                                                \
        li      r11, API_NUMBER;                \
        lv1call;                                \
                                                \
-       addi    r1, r1, 48;                     \
+       addi    r1, r1, STACK_FRAME_MIN_SIZE+48; \
        ld      r11, -8(r1);                    \
        std     r4, 0(r11);                     \
        ld      r11, -16(r1);                   \
@@ -292,7 +305,7 @@ _GLOBAL(_##API_NAME)                                \
        ld      r11, -48(r1);                   \
        std     r9, 0(r11);                     \
                                                \
-       ld      r0, 16(r1);                     \
+       ld      r0, LRSAVE(r1);                 \
        mtlr    r0;                             \
        blr
 
@@ -300,7 +313,7 @@ _GLOBAL(_##API_NAME)                                \
 _GLOBAL(_##API_NAME)                           \
                                                \
        mflr    r0;                             \
-       std     r0, 16(r1);                     \
+       std     r0, LRSAVE(r1);                 \
                                                \
        std     r4, -8(r1);                     \
        std     r5, -16(r1);                    \
@@ -308,12 +321,13 @@ _GLOBAL(_##API_NAME)                              \
        std     r7, -32(r1);                    \
        std     r8, -40(r1);                    \
        std     r9, -48(r1);                    \
-       stdu    r10, -56(r1);                   \
+       std     r10, -56(r1);                   \
+       stdu    r1, -STACK_FRAME_MIN_SIZE-56(r1); \
                                                \
        li      r11, API_NUMBER;                \
        lv1call;                                \
                                                \
-       addi    r1, r1, 56;                     \
+       addi    r1, r1, STACK_FRAME_MIN_SIZE+56; \
        ld      r11, -8(r1);                    \
        std     r4, 0(r11);                     \
        ld      r11, -16(r1);                   \
@@ -329,7 +343,7 @@ _GLOBAL(_##API_NAME)                                \
        ld      r11, -56(r1);                   \
        std     r10, 0(r11);                    \
                                                \
-       ld      r0, 16(r1);                     \
+       ld      r0, LRSAVE(r1);                 \
        mtlr    r0;                             \
        blr
 
@@ -337,18 +351,19 @@ _GLOBAL(_##API_NAME)                              \
 _GLOBAL(_##API_NAME)                           \
                                                \
        mflr    r0;                             \
-       std     r0, 16(r1);                     \
+       std     r0, LRSAVE(r1);                 \
                                                \
-       stdu    r5, -8(r1);                     \
+       std     r5, -8(r1);                     \
+       stdu    r1, -STACK_FRAME_MIN_SIZE-8(r1); \
                                                \
        li      r11, API_NUMBER;                \
        lv1call;                                \
                                                \
-       addi    r1, r1, 8;                      \
+       addi    r1, r1, STACK_FRAME_MIN_SIZE+8; \
        ld      r11, -8(r1);                    \
        std     r4, 0(r11);                     \
                                                \
-       ld      r0, 16(r1);                     \
+       ld      r0, LRSAVE(r1);                 \
        mtlr    r0;                             \
        blr
 
@@ -356,21 +371,22 @@ _GLOBAL(_##API_NAME)                              \
 _GLOBAL(_##API_NAME)                           \
                                                \
        mflr    r0;                             \
-       std     r0, 16(r1);                     \
+       std     r0, LRSAVE(r1);                 \
                                                \
        std     r5, -8(r1);                     \
-       stdu    r6, -16(r1);                    \
+       std     r6, -16(r1);                    \
+       stdu    r1, -STACK_FRAME_MIN_SIZE-16(r1); \
                                                \
        li      r11, API_NUMBER;                \
        lv1call;                                \
                                                \
-       addi    r1, r1, 16;                     \
+       addi    r1, r1, STACK_FRAME_MIN_SIZE+16; \
        ld      r11, -8(r1);                    \
        std     r4, 0(r11);                     \
        ld      r11, -16(r1);                   \
        std     r5, 0(r11);                     \
                                                \
-       ld      r0, 16(r1);                     \
+       ld      r0, LRSAVE(r1);                 \
        mtlr    r0;                             \
        blr
 
@@ -378,16 +394,17 @@ _GLOBAL(_##API_NAME)                              \
 _GLOBAL(_##API_NAME)                           \
                                                \
        mflr    r0;                             \
-       std     r0, 16(r1);                     \
+       std     r0, LRSAVE(r1);                 \
                                                \
        std     r5, -8(r1);                     \
        std     r6, -16(r1);                    \
-       stdu    r7, -24(r1);                    \
+       std     r7, -24(r1);                    \
+       stdu    r1, -STACK_FRAME_MIN_SIZE-24(r1); \
                                                \
        li      r11, API_NUMBER;                \
        lv1call;                                \
                                                \
-       addi    r1, r1, 24;                     \
+       addi    r1, r1, STACK_FRAME_MIN_SIZE+24; \
        ld      r11, -8(r1);                    \
        std     r4, 0(r11);                     \
        ld      r11, -16(r1);                   \
@@ -395,7 +412,7 @@ _GLOBAL(_##API_NAME)                                \
        ld      r11, -24(r1);                   \
        std     r6, 0(r11);                     \
                                                \
-       ld      r0, 16(r1);                     \
+       ld      r0, LRSAVE(r1);                 \
        mtlr    r0;                             \
        blr
 
@@ -403,17 +420,18 @@ _GLOBAL(_##API_NAME)                              \
 _GLOBAL(_##API_NAME)                           \
                                                \
        mflr    r0;                             \
-       std     r0, 16(r1);                     \
+       std     r0, LRSAVE(r1);                 \
                                                \
        std     r5, -8(r1);                     \
        std     r6, -16(r1);                    \
        std     r7, -24(r1);                    \
-       stdu    r8, -32(r1);                    \
+       std     r8, -32(r1);                    \
+       stdu    r1, -STACK_FRAME_MIN_SIZE-32(r1); \
                                                \
        li      r11, API_NUMBER;                \
        lv1call;                                \
                                                \
-       addi    r1, r1, 32;                     \
+       addi    r1, r1, STACK_FRAME_MIN_SIZE+32;\
        ld      r11, -8(r1);                    \
        std     r4, 0(r11);                     \
        ld      r11, -16(r1);                   \
@@ -423,7 +441,7 @@ _GLOBAL(_##API_NAME)                                \
        ld      r11, -32(r1);                   \
        std     r7, 0(r11);                     \
                                                \
-       ld      r0, 16(r1);                     \
+       ld      r0, LRSAVE(r1);                 \
        mtlr    r0;                             \
        blr
 
@@ -431,18 +449,19 @@ _GLOBAL(_##API_NAME)                              \
 _GLOBAL(_##API_NAME)                           \
                                                \
        mflr    r0;                             \
-       std     r0, 16(r1);                     \
+       std     r0, LRSAVE(r1);                 \
                                                \
        std     r5, -8(r1);                     \
        std     r6, -16(r1);                    \
        std     r7, -24(r1);                    \
        std     r8, -32(r1);                    \
-       stdu    r9, -40(r1);                    \
+       std     r9, -40(r1);                    \
+       stdu    r1, -STACK_FRAME_MIN_SIZE-40(r1); \
                                                \
        li      r11, API_NUMBER;                \
        lv1call;                                \
                                                \
-       addi    r1, r1, 40;                     \
+       addi    r1, r1, STACK_FRAME_MIN_SIZE+40; \
        ld      r11, -8(r1);                    \
        std     r4, 0(r11);                     \
        ld      r11, -16(r1);                   \
@@ -454,7 +473,7 @@ _GLOBAL(_##API_NAME)                                \
        ld      r11, -40(r1);                   \
        std     r8, 0(r11);                     \
                                                \
-       ld      r0, 16(r1);                     \
+       ld      r0, LRSAVE(r1);                 \
        mtlr    r0;                             \
        blr
 
@@ -462,18 +481,19 @@ _GLOBAL(_##API_NAME)                              \
 _GLOBAL(_##API_NAME)                           \
                                                \
        mflr    r0;                             \
-       std     r0, 16(r1);                     \
+       std     r0, LRSAVE(r1);                 \
                                                \
-       stdu    r6, -8(r1);                     \
+       std     r6, -8(r1);                     \
+       stdu    r1, -STACK_FRAME_MIN_SIZE-8(r1); \
                                                \
        li      r11, API_NUMBER;                \
        lv1call;                                \
                                                \
-       addi    r1, r1, 8;                      \
+       addi    r1, r1, STACK_FRAME_MIN_SIZE+8; \
        ld      r11, -8(r1);                    \
        std     r4, 0(r11);                     \
                                                \
-       ld      r0, 16(r1);                     \
+       ld      r0, LRSAVE(r1);                 \
        mtlr    r0;                             \
        blr
 
@@ -481,21 +501,22 @@ _GLOBAL(_##API_NAME)                              \
 _GLOBAL(_##API_NAME)                           \
                                                \
        mflr    r0;                             \
-       std     r0, 16(r1);                     \
+       std     r0, LRSAVE(r1);                 \
                                                \
        std     r6, -8(r1);                     \
-       stdu    r7, -16(r1);                    \
+       std     r7, -16(r1);                    \
+       stdu    r1, -STACK_FRAME_MIN_SIZE-16(r1); \
                                                \
        li      r11, API_NUMBER;                \
        lv1call;                                \
                                                \
-       addi    r1, r1, 16;                     \
+       addi    r1, r1, STACK_FRAME_MIN_SIZE+16; \
        ld      r11, -8(r1);                    \
        std     r4, 0(r11);                     \
        ld      r11, -16(r1);                   \
        std     r5, 0(r11);                     \
                                                \
-       ld      r0, 16(r1);                     \
+       ld      r0, LRSAVE(r1);                 \
        mtlr    r0;                             \
        blr
 
@@ -503,16 +524,17 @@ _GLOBAL(_##API_NAME)                              \
 _GLOBAL(_##API_NAME)                           \
                                                \
        mflr    r0;                             \
-       std     r0, 16(r1);                     \
+       std     r0, LRSAVE(r1);                 \
                                                \
        std     r6, -8(r1);                     \
        std     r7, -16(r1);                    \
-       stdu    r8, -24(r1);                    \
+       std     r8, -24(r1);                    \
+       stdu    r1, -STACK_FRAME_MIN_SIZE-24(r1); \
                                                \
        li      r11, API_NUMBER;                \
        lv1call;                                \
                                                \
-       addi    r1, r1, 24;                     \
+       addi    r1, r1, STACK_FRAME_MIN_SIZE+24; \
        ld      r11, -8(r1);                    \
        std     r4, 0(r11);                     \
        ld      r11, -16(r1);                   \
@@ -520,7 +542,7 @@ _GLOBAL(_##API_NAME)                                \
        ld      r11, -24(r1);                   \
        std     r6, 0(r11);                     \
                                                \
-       ld      r0, 16(r1);                     \
+       ld      r0, LRSAVE(r1);                 \
        mtlr    r0;                             \
        blr
 
@@ -528,18 +550,19 @@ _GLOBAL(_##API_NAME)                              \
 _GLOBAL(_##API_NAME)                           \
                                                \
        mflr    r0;                             \
-       std     r0, 16(r1);                     \
+       std     r0, LRSAVE(r1);                 \
                                                \
-       stdu    r7, -8(r1);                     \
+       std     r7, -8(r1);                     \
+       stdu    r1, -STACK_FRAME_MIN_SIZE-8(r1); \
                                                \
        li      r11, API_NUMBER;                \
        lv1call;                                \
                                                \
-       addi    r1, r1, 8;                      \
+       addi    r1, r1, STACK_FRAME_MIN_SIZE+8; \
        ld      r11, -8(r1);                    \
        std     r4, 0(r11);                     \
                                                \
-       ld      r0, 16(r1);                     \
+       ld      r0, LRSAVE(r1);                 \
        mtlr    r0;                             \
        blr
 
@@ -547,21 +570,22 @@ _GLOBAL(_##API_NAME)                              \
 _GLOBAL(_##API_NAME)                           \
                                                \
        mflr    r0;                             \
-       std     r0, 16(r1);                     \
+       std     r0, LRSAVE(r1);                 \
                                                \
        std     r7, -8(r1);                     \
-       stdu    r8, -16(r1);                    \
+       std     r8, -16(r1);                    \
+       stdu    r1, -STACK_FRAME_MIN_SIZE-16(r1); \
                                                \
        li      r11, API_NUMBER;                \
        lv1call;                                \
                                                \
-       addi    r1, r1, 16;                     \
+       addi    r1, r1, STACK_FRAME_MIN_SIZE+16; \
        ld      r11, -8(r1);                    \
        std     r4, 0(r11);                     \
        ld      r11, -16(r1);                   \
        std     r5, 0(r11);                     \
                                                \
-       ld      r0, 16(r1);                     \
+       ld      r0, LRSAVE(r1);                 \
        mtlr    r0;                             \
        blr
 
@@ -569,16 +593,17 @@ _GLOBAL(_##API_NAME)                              \
 _GLOBAL(_##API_NAME)                           \
                                                \
        mflr    r0;                             \
-       std     r0, 16(r1);                     \
+       std     r0, LRSAVE(r1);                 \
                                                \
        std     r7, -8(r1);                     \
        std     r8, -16(r1);                    \
-       stdu    r9, -24(r1);                    \
+       std     r9, -24(r1);                    \
+       stdu    r1, -STACK_FRAME_MIN_SIZE-24(r1); \
                                                \
        li      r11, API_NUMBER;                \
        lv1call;                                \
                                                \
-       addi    r1, r1, 24;                     \
+       addi    r1, r1, STACK_FRAME_MIN_SIZE+24; \
        ld      r11, -8(r1);                    \
        std     r4, 0(r11);                     \
        ld      r11, -16(r1);                   \
@@ -586,7 +611,7 @@ _GLOBAL(_##API_NAME)                                \
        ld      r11, -24(r1);                   \
        std     r6, 0(r11);                     \
                                                \
-       ld      r0, 16(r1);                     \
+       ld      r0, LRSAVE(r1);                 \
        mtlr    r0;                             \
        blr
 
@@ -594,18 +619,19 @@ _GLOBAL(_##API_NAME)                              \
 _GLOBAL(_##API_NAME)                           \
                                                \
        mflr    r0;                             \
-       std     r0, 16(r1);                     \
+       std     r0, LRSAVE(r1);                 \
                                                \
-       stdu    r8, -8(r1);                     \
+       std     r8, -8(r1);                     \
+       stdu    r1, -STACK_FRAME_MIN_SIZE-8(r1); \
                                                \
        li      r11, API_NUMBER;                \
        lv1call;                                \
                                                \
-       addi    r1, r1, 8;                      \
+       addi    r1, r1, STACK_FRAME_MIN_SIZE+8; \
        ld      r11, -8(r1);                    \
        std     r4, 0(r11);                     \
                                                \
-       ld      r0, 16(r1);                     \
+       ld      r0, LRSAVE(r1);                 \
        mtlr    r0;                             \
        blr
 
@@ -613,21 +639,22 @@ _GLOBAL(_##API_NAME)                              \
 _GLOBAL(_##API_NAME)                           \
                                                \
        mflr    r0;                             \
-       std     r0, 16(r1);                     \
+       std     r0, LRSAVE(r1);                 \
                                                \
        std     r8, -8(r1);                     \
-       stdu    r9, -16(r1);                    \
+       std     r9, -16(r1);                    \
+       stdu    r1, -STACK_FRAME_MIN_SIZE-16(r1); \
                                                \
        li      r11, API_NUMBER;                \
        lv1call;                                \
                                                \
-       addi    r1, r1, 16;                     \
+       addi    r1, r1, STACK_FRAME_MIN_SIZE+16; \
        ld      r11, -8(r1);                    \
        std     r4, 0(r11);                     \
        ld      r11, -16(r1);                   \
        std     r5, 0(r11);                     \
                                                \
-       ld      r0, 16(r1);                     \
+       ld      r0, LRSAVE(r1);                 \
        mtlr    r0;                             \
        blr
 
@@ -635,16 +662,17 @@ _GLOBAL(_##API_NAME)                              \
 _GLOBAL(_##API_NAME)                           \
                                                \
        mflr    r0;                             \
-       std     r0, 16(r1);                     \
+       std     r0, LRSAVE(r1);                 \
                                                \
        std     r8, -8(r1);                     \
        std     r9, -16(r1);                    \
-       stdu    r10, -24(r1);                   \
+       std     r10, -24(r1);                   \
+       stdu    r1, -STACK_FRAME_MIN_SIZE-24(r1); \
                                                \
        li      r11, API_NUMBER;                \
        lv1call;                                \
                                                \
-       addi    r1, r1, 24;                     \
+       addi    r1, r1, STACK_FRAME_MIN_SIZE+24; \
        ld      r11, -8(r1);                    \
        std     r4, 0(r11);                     \
        ld      r11, -16(r1);                   \
@@ -652,7 +680,7 @@ _GLOBAL(_##API_NAME)                                \
        ld      r11, -24(r1);                   \
        std     r6, 0(r11);                     \
                                                \
-       ld      r0, 16(r1);                     \
+       ld      r0, LRSAVE(r1);                 \
        mtlr    r0;                             \
        blr
 
@@ -660,18 +688,19 @@ _GLOBAL(_##API_NAME)                              \
 _GLOBAL(_##API_NAME)                           \
                                                \
        mflr    r0;                             \
-       std     r0, 16(r1);                     \
+       std     r0, LRSAVE(r1);                 \
                                                \
-       stdu    r9, -8(r1);                     \
+       std     r9, -8(r1);                     \
+       stdu    r1, -STACK_FRAME_MIN_SIZE-8(r1); \
                                                \
        li      r11, API_NUMBER;                \
        lv1call;                                \
                                                \
-       addi    r1, r1, 8;                      \
+       addi    r1, r1, STACK_FRAME_MIN_SIZE+8; \
        ld      r11, -8(r1);                    \
        std     r4, 0(r11);                     \
                                                \
-       ld      r0, 16(r1);                     \
+       ld      r0, LRSAVE(r1);                 \
        mtlr    r0;                             \
        blr
 
@@ -679,21 +708,22 @@ _GLOBAL(_##API_NAME)                              \
 _GLOBAL(_##API_NAME)                           \
                                                \
        mflr    r0;                             \
-       std     r0, 16(r1);                     \
+       std     r0, LRSAVE(r1);                 \
                                                \
        std     r9, -8(r1);                     \
-       stdu    r10, -16(r1);                   \
+       std     r10, -16(r1);                   \
+       stdu    r1, -STACK_FRAME_MIN_SIZE-16(r1); \
                                                \
        li      r11, API_NUMBER;                \
        lv1call;                                \
                                                \
-       addi    r1, r1, 16;                     \
+       addi    r1, r1, STACK_FRAME_MIN_SIZE+16; \
        ld      r11, -8(r1);                    \
        std     r4, 0(r11);                     \
        ld      r11, -16(r1);                   \
        std     r5, 0(r11);                     \
                                                \
-       ld      r0, 16(r1);                     \
+       ld      r0, LRSAVE(r1);                 \
        mtlr    r0;                             \
        blr
 
@@ -701,23 +731,24 @@ _GLOBAL(_##API_NAME)                              \
 _GLOBAL(_##API_NAME)                           \
                                                \
        mflr    r0;                             \
-       std     r0, 16(r1);                     \
+       std     r0, LRSAVE(r1);                 \
                                                \
        std     r9, -8(r1);                     \
-       stdu    r10, -16(r1);                   \
+       std     r10, -16(r1);                   \
+       stdu    r1, -STACK_FRAME_MIN_SIZE-16(r1); \
                                                \
        li      r11, API_NUMBER;                \
        lv1call;                                \
                                                \
-       addi    r1, r1, 16;                     \
+       addi    r1, r1, STACK_FRAME_MIN_SIZE+16; \
        ld      r11, -8(r1);                    \
        std     r4, 0(r11);                     \
        ld      r11, -16(r1);                   \
        std     r5, 0(r11);                     \
-       ld      r11, 48+8*8(r1);                \
+       ld      r11, STK_PARAM_AREA+8*8(r1);    \
        std     r6, 0(r11);                     \
                                                \
-       ld      r0, 16(r1);                     \
+       ld      r0, LRSAVE(r1);                 \
        mtlr    r0;                             \
        blr
 
@@ -725,18 +756,19 @@ _GLOBAL(_##API_NAME)                              \
 _GLOBAL(_##API_NAME)                           \
                                                \
        mflr    r0;                             \
-       std     r0, 16(r1);                     \
+       std     r0, LRSAVE(r1);                 \
                                                \
-       stdu    r10, -8(r1);                    \
+       std     r10, -8(r1);                    \
+       stdu    r1, -STACK_FRAME_MIN_SIZE-8(r1); \
                                                \
        li      r11, API_NUMBER;                \
        lv1call;                                \
                                                \
-       addi    r1, r1, 8;                      \
+       addi    r1, r1, STACK_FRAME_MIN_SIZE+8; \
        ld      r11, -8(r1);                    \
        std     r4, 0(r11);                     \
                                                \
-       ld      r0, 16(r1);                     \
+       ld      r0, LRSAVE(r1);                 \
        mtlr    r0;                             \
        blr
 
@@ -744,27 +776,29 @@ _GLOBAL(_##API_NAME)                              \
 _GLOBAL(_##API_NAME)                           \
                                                \
        mflr    r0;                             \
-       std     r0, 16(r1);                     \
+       std     r0, LRSAVE(r1);                 \
                                                \
-       std     r10, 48+8*7(r1);                \
+       std     r10, STK_PARAM_AREA+8*7(r1);    \
+       stdu    r1, -STACK_FRAME_MIN_SIZE(r1);  \
                                                \
        li      r11, API_NUMBER;                \
        lv1call;                                \
                                                \
-       ld      r11, 48+8*7(r1);                \
+       addi    r1, r1, STACK_FRAME_MIN_SIZE;   \
+       ld      r11, STK_PARAM_AREA+8*7(r1);    \
        std     r4, 0(r11);                     \
-       ld      r11, 48+8*8(r1);                \
+       ld      r11, STK_PARAM_AREA+8*8(r1);    \
        std     r5, 0(r11);                     \
-       ld      r11, 48+8*9(r1);                \
+       ld      r11, STK_PARAM_AREA+8*9(r1);    \
        std     r6, 0(r11);                     \
-       ld      r11, 48+8*10(r1);               \
+       ld      r11, STK_PARAM_AREA+8*10(r1);   \
        std     r7, 0(r11);                     \
-       ld      r11, 48+8*11(r1);               \
+       ld      r11, STK_PARAM_AREA+8*11(r1);   \
        std     r8, 0(r11);                     \
-       ld      r11, 48+8*12(r1);               \
+       ld      r11, STK_PARAM_AREA+8*12(r1);   \
        std     r9, 0(r11);                     \
                                                \
-       ld      r0, 16(r1);                     \
+       ld      r0, LRSAVE(r1);                 \
        mtlr    r0;                             \
        blr
 
@@ -772,15 +806,17 @@ _GLOBAL(_##API_NAME)                              \
 _GLOBAL(_##API_NAME)                           \
                                                \
        mflr    r0;                             \
-       std     r0, 16(r1);                     \
+       std     r0, LRSAVE(r1);                 \
+       stdu    r1, -STACK_FRAME_MIN_SIZE(r1);  \
                                                \
        li      r11, API_NUMBER;                \
        lv1call;                                \
                                                \
-       ld      r11, 48+8*8(r1);                \
+       addi    r1, r1, STACK_FRAME_MIN_SIZE;   \
+       ld      r11, STK_PARAM_AREA+8*8(r1);    \
        std     r4, 0(r11);                     \
                                                \
-       ld      r0, 16(r1);                     \
+       ld      r0, LRSAVE(r1);                 \
        mtlr    r0;                             \
        blr
 
index 998e3aff2457246a4299d140d183b14b6d69bed8..b401282727a445aff0e0593182f855bfe6e09e91 100644 (file)
@@ -55,7 +55,7 @@ static struct device ibmebus_bus_device = { /* fake "parent" device */
        .init_name = "ibmebus",
 };
 
-struct bus_type ibmebus_bus_type;
+const struct bus_type ibmebus_bus_type;
 
 /* These devices will automatically be added to the bus during init */
 static const struct of_device_id ibmebus_matches[] __initconst = {
@@ -432,7 +432,7 @@ static int ibmebus_bus_modalias(const struct device *dev, struct kobj_uevent_env
        return of_device_uevent_modalias(dev, env);
 }
 
-struct bus_type ibmebus_bus_type = {
+const struct bus_type ibmebus_bus_type = {
        .name      = "ibmebus",
        .uevent    = ibmebus_bus_modalias,
        .bus_groups = ibmbus_bus_groups,
index 1c151d77e74b3485593ab586ce15e95dc7fe816f..f73c4d1c26af9822b12fcb9df19f25892933ab38 100644 (file)
@@ -346,9 +346,13 @@ static int read_rtas_lpar_name(struct seq_file *m)
  */
 static int read_dt_lpar_name(struct seq_file *m)
 {
+       struct device_node *root = of_find_node_by_path("/");
        const char *name;
+       int ret;
 
-       if (of_property_read_string(of_root, "ibm,partition-name", &name))
+       ret = of_property_read_string(root, "ibm,partition-name", &name);
+       of_node_put(root);
+       if (ret)
                return -ENOENT;
 
        seq_printf(m, "partition_name=%s\n", name);
index 423ee1d5bd9440d3e437e763aca682ac1cd24943..6dfb55b52d363de342883b3caef9e19a446d38b9 100644 (file)
@@ -26,6 +26,7 @@ static int query_token, change_token;
 #define RTAS_CHANGE_MSI_FN     3
 #define RTAS_CHANGE_MSIX_FN    4
 #define RTAS_CHANGE_32MSI_FN   5
+#define RTAS_CHANGE_32MSIX_FN  6
 
 /* RTAS Helpers */
 
@@ -41,7 +42,7 @@ static int rtas_change_msi(struct pci_dn *pdn, u32 func, u32 num_irqs)
        seq_num = 1;
        do {
                if (func == RTAS_CHANGE_MSI_FN || func == RTAS_CHANGE_MSIX_FN ||
-                   func == RTAS_CHANGE_32MSI_FN)
+                   func == RTAS_CHANGE_32MSI_FN || func == RTAS_CHANGE_32MSIX_FN)
                        rc = rtas_call(change_token, 6, 4, rtas_ret, addr,
                                        BUID_HI(buid), BUID_LO(buid),
                                        func, num_irqs, seq_num);
@@ -406,8 +407,12 @@ again:
 
                if (use_32bit_msi_hack && rc > 0)
                        rtas_hack_32bit_msi_gen2(pdev);
-       } else
-               rc = rtas_change_msi(pdn, RTAS_CHANGE_MSIX_FN, nvec);
+       } else {
+               if (pdev->no_64bit_msi)
+                       rc = rtas_change_msi(pdn, RTAS_CHANGE_32MSIX_FN, nvec);
+               else
+                       rc = rtas_change_msi(pdn, RTAS_CHANGE_MSIX_FN, nvec);
+       }
 
        if (rc != nvec) {
                if (nvec != nvec_in) {
index 526c621b098bece600fdfe72de1c4f53697d33f5..eea2041b270b546477a34e3836662969d6dd25eb 100644 (file)
@@ -101,10 +101,12 @@ retry:
                esi_buf_size = ESI_HDR_SIZE + (CURR_MAX_ESI_ATTRS * max_esi_attrs);
 
                temp_buf = krealloc(buf, esi_buf_size, GFP_KERNEL);
-               if (temp_buf)
+               if (temp_buf) {
                        buf = temp_buf;
-               else
-                       return -ENOMEM;
+               } else {
+                       ret = -ENOMEM;
+                       goto out_buf;
+               }
 
                goto retry;
        }
index 1a53e048ceb768f175237361018d6177092b4220..c233f9db039ba4a15a625c7b9593d62181a5356f 100644 (file)
@@ -1521,7 +1521,7 @@ err:      kfree(p);
        return rc;
 }
 
-static int papr_scm_remove(struct platform_device *pdev)
+static void papr_scm_remove(struct platform_device *pdev)
 {
        struct papr_scm_priv *p = platform_get_drvdata(pdev);
 
@@ -1538,8 +1538,6 @@ static int papr_scm_remove(struct platform_device *pdev)
        pdev->archdata.priv = NULL;
        kfree(p->bus_desc.provider_name);
        kfree(p);
-
-       return 0;
 }
 
 static const struct of_device_id papr_scm_match[] = {
@@ -1550,7 +1548,7 @@ static const struct of_device_id papr_scm_match[] = {
 
 static struct platform_driver papr_scm_driver = {
        .probe = papr_scm_probe,
-       .remove = papr_scm_remove,
+       .remove_new = papr_scm_remove,
        .driver = {
                .name = "papr_scm",
                .of_match_table = papr_scm_match,
index ecea85c74c43fa9f697dadaa7857589cb25e191a..284a6fa04b0c27b8b0345986eff654a1ea4058db 100644 (file)
@@ -1029,9 +1029,11 @@ static void __init pseries_add_hw_description(void)
                return;
        }
 
-       if (of_property_read_bool(of_root, "ibm,powervm-partition") ||
-           of_property_read_bool(of_root, "ibm,fw-net-version"))
+       dn = of_find_node_by_path("/");
+       if (of_property_read_bool(dn, "ibm,powervm-partition") ||
+           of_property_read_bool(dn, "ibm,fw-net-version"))
                seq_buf_printf(&ppc_hw_desc, "hv:phyp ");
+       of_node_put(dn);
 }
 
 /*
@@ -1091,7 +1093,11 @@ static void pseries_power_off(void)
 
 static int __init pSeries_probe(void)
 {
-       if (!of_node_is_type(of_root, "chrp"))
+       struct device_node *root = of_find_node_by_path("/");
+       bool ret = of_node_is_type(root, "chrp");
+
+       of_node_put(root);
+       if (!ret)
                return 0;
 
        /* Cell blades firmware claims to be chrp while it's not. Until this
index 2dc9cbc4bcd8fea42fc12e19a93880dcde3edafb..90ff85c879bfe9bbc602fbc146a50fa5d933424a 100644 (file)
@@ -991,18 +991,6 @@ static DEVICE_ATTR_RO(cmo_allocated);
 static DEVICE_ATTR_RW(cmo_desired);
 static DEVICE_ATTR_RW(cmo_allocs_failed);
 
-static struct attribute *vio_cmo_dev_attrs[] = {
-       &dev_attr_name.attr,
-       &dev_attr_devspec.attr,
-       &dev_attr_modalias.attr,
-       &dev_attr_cmo_entitled.attr,
-       &dev_attr_cmo_allocated.attr,
-       &dev_attr_cmo_desired.attr,
-       &dev_attr_cmo_allocs_failed.attr,
-       NULL,
-};
-ATTRIBUTE_GROUPS(vio_cmo_dev);
-
 /* sysfs bus functions and data structures for CMO */
 
 #define viobus_cmo_rd_attr(name)                                        \
@@ -1062,11 +1050,7 @@ static struct attribute *vio_bus_attrs[] = {
 };
 ATTRIBUTE_GROUPS(vio_bus);
 
-static void __init vio_cmo_sysfs_init(void)
-{
-       vio_bus_type.dev_groups = vio_cmo_dev_groups;
-       vio_bus_type.bus_groups = vio_bus_groups;
-}
+static void __init vio_cmo_sysfs_init(void) { }
 #else /* CONFIG_PPC_SMLPAR */
 int vio_cmo_entitlement_update(size_t new_entitlement) { return 0; }
 void vio_cmo_set_dev_desired(struct vio_dev *viodev, size_t desired) {}
@@ -1584,14 +1568,6 @@ static ssize_t modalias_show(struct device *dev, struct device_attribute *attr,
 }
 static DEVICE_ATTR_RO(modalias);
 
-static struct attribute *vio_dev_attrs[] = {
-       &dev_attr_name.attr,
-       &dev_attr_devspec.attr,
-       &dev_attr_modalias.attr,
-       NULL,
-};
-ATTRIBUTE_GROUPS(vio_dev);
-
 void vio_unregister_device(struct vio_dev *viodev)
 {
        device_unregister(&viodev->dev);
@@ -1626,7 +1602,39 @@ static int vio_hotplug(const struct device *dev, struct kobj_uevent_env *env)
        return 0;
 }
 
-struct bus_type vio_bus_type = {
+#ifdef CONFIG_PPC_SMLPAR
+static struct attribute *vio_cmo_dev_attrs[] = {
+       &dev_attr_name.attr,
+       &dev_attr_devspec.attr,
+       &dev_attr_modalias.attr,
+       &dev_attr_cmo_entitled.attr,
+       &dev_attr_cmo_allocated.attr,
+       &dev_attr_cmo_desired.attr,
+       &dev_attr_cmo_allocs_failed.attr,
+       NULL,
+};
+ATTRIBUTE_GROUPS(vio_cmo_dev);
+
+const struct bus_type vio_bus_type = {
+       .name = "vio",
+       .dev_groups = vio_cmo_dev_groups,
+       .bus_groups = vio_bus_groups,
+       .uevent = vio_hotplug,
+       .match = vio_bus_match,
+       .probe = vio_bus_probe,
+       .remove = vio_bus_remove,
+       .shutdown = vio_bus_shutdown,
+};
+#else /* CONFIG_PPC_SMLPAR */
+static struct attribute *vio_dev_attrs[] = {
+       &dev_attr_name.attr,
+       &dev_attr_devspec.attr,
+       &dev_attr_modalias.attr,
+       NULL,
+};
+ATTRIBUTE_GROUPS(vio_dev);
+
+const struct bus_type vio_bus_type = {
        .name = "vio",
        .dev_groups = vio_dev_groups,
        .uevent = vio_hotplug,
@@ -1635,6 +1643,7 @@ struct bus_type vio_bus_type = {
        .remove = vio_bus_remove,
        .shutdown = vio_bus_shutdown,
 };
+#endif /* CONFIG_PPC_SMLPAR */
 
 /**
  * vio_get_attribute: - get attribute for virtual device
index 558ec68d768e65cdaa8266b751b8d65cd285ae02..8e6c84df4ca10c1f26ec4e1693227e0999230d4a 100644 (file)
@@ -320,7 +320,7 @@ static irqreturn_t fsl_msi_cascade(int irq, void *data)
        return ret;
 }
 
-static int fsl_of_msi_remove(struct platform_device *ofdev)
+static void fsl_of_msi_remove(struct platform_device *ofdev)
 {
        struct fsl_msi *msi = platform_get_drvdata(ofdev);
        int virq, i;
@@ -343,8 +343,6 @@ static int fsl_of_msi_remove(struct platform_device *ofdev)
        if ((msi->feature & FSL_PIC_IP_MASK) != FSL_PIC_IP_VMPIC)
                iounmap(msi->msi_regs);
        kfree(msi);
-
-       return 0;
 }
 
 static struct lock_class_key fsl_msi_irq_class;
@@ -603,7 +601,7 @@ static struct platform_driver fsl_of_msi_driver = {
                .of_match_table = fsl_of_msi_ids,
        },
        .probe = fsl_of_msi_probe,
-       .remove = fsl_of_msi_remove,
+       .remove_new = fsl_of_msi_remove,
 };
 
 static __init int fsl_of_msi_init(void)
index dabbdd356664cdf5bf1f7eb3d07a9c54795ab721..d94cf36b0f65865cb56cabc7d4498dc3eb287a02 100644 (file)
@@ -49,7 +49,7 @@
 #define DBG(fmt...)
 #endif
 
-struct bus_type mpic_subsys = {
+const struct bus_type mpic_subsys = {
        .name = "mpic",
        .dev_name = "mpic",
 };
index fcf8d1516210fb672c5a2f0e52e7d07b49da78fe..737f97fd67d72e6d396e8002d3c915e7b007da8a 100644 (file)
@@ -173,7 +173,7 @@ out:
        return rc;
 }
 
-static int pmi_of_remove(struct platform_device *dev)
+static void pmi_of_remove(struct platform_device *dev)
 {
        struct pmi_handler *handler, *tmp;
 
@@ -189,13 +189,11 @@ static int pmi_of_remove(struct platform_device *dev)
 
        kfree(data);
        data = NULL;
-
-       return 0;
 }
 
 static struct platform_driver pmi_of_platform_driver = {
        .probe          = pmi_of_probe,
-       .remove         = pmi_of_remove,
+       .remove_new     = pmi_of_remove,
        .driver = {
                .name = "pmi",
                .of_match_table = pmi_match,
index b3b94cd37713738c580824053bf8daca5c8bffc4..d79d6633f33363123b136d973787544b2c989a51 100644 (file)
@@ -643,10 +643,8 @@ static int xmon_core(struct pt_regs *regs, volatile int fromipi)
                        touch_nmi_watchdog();
                } else {
                        cmd = 1;
-#ifdef CONFIG_SMP
                        if (xmon_batch)
                                cmd = batch_cmds(regs);
-#endif
                        if (!locked_down && cmd)
                                cmd = cmds(regs);
                        if (locked_down || cmd != 0) {
@@ -1820,8 +1818,8 @@ static void print_bug_trap(struct pt_regs *regs)
        const struct bug_entry *bug;
        unsigned long addr;
 
-       if (regs->msr & MSR_PR)
-               return;         /* not in kernel */
+       if (user_mode(regs))
+               return;
        addr = regs->nip;       /* address of trap instruction */
        if (!is_kernel_addr(addr))
                return;
@@ -3342,7 +3340,7 @@ static void show_pte(unsigned long addr)
                return;
        }
 
-       if (p4d_is_leaf(*p4dp)) {
+       if (p4d_leaf(*p4dp)) {
                format_pte(p4dp, p4d_val(*p4dp));
                return;
        }
@@ -3356,7 +3354,7 @@ static void show_pte(unsigned long addr)
                return;
        }
 
-       if (pud_is_leaf(*pudp)) {
+       if (pud_leaf(*pudp)) {
                format_pte(pudp, pud_val(*pudp));
                return;
        }
@@ -3370,7 +3368,7 @@ static void show_pte(unsigned long addr)
                return;
        }
 
-       if (pmd_is_leaf(*pmdp)) {
+       if (pmd_leaf(*pmdp)) {
                format_pte(pmdp, pmd_val(*pmdp));
                return;
        }
index b50896734a91b76ffa1c3684d7276e54c4e9ae6d..92b1dbf55176fa1ec989a6b6567e3418a496ed13 100644 (file)
@@ -175,8 +175,6 @@ config RISCV
 
 config CLANG_SUPPORTS_DYNAMIC_FTRACE
        def_bool CC_IS_CLANG
-       # https://github.com/llvm/llvm-project/commit/6ab8927931851bb42b2c93a00801dc499d7d9b1e
-       depends on CLANG_VERSION >= 130000
        # https://github.com/ClangBuiltLinux/linux/issues/1817
        depends on AS_IS_GNU || (AS_IS_LLVM && (LD_IS_LLD || LD_VERSION >= 23600))
 
@@ -313,7 +311,7 @@ config AS_HAS_INSN
        def_bool $(as-instr,.insn r 51$(comma) 0$(comma) 0$(comma) t0$(comma) t0$(comma) zero)
 
 config AS_HAS_OPTION_ARCH
-       # https://reviews.llvm.org/D123515
+       # https://github.com/llvm/llvm-project/commit/9e8ed3403c191ab9c4903e8eeb8f732ff8a43cb4
        def_bool y
        depends on $(as-instr, .option arch$(comma) +m)
 
@@ -767,7 +765,7 @@ config ARCH_SUPPORTS_CRASH_DUMP
        def_bool y
 
 config ARCH_HAS_GENERIC_CRASHKERNEL_RESERVATION
-       def_bool CRASH_CORE
+       def_bool CRASH_RESERVE
 
 config COMPAT
        bool "Kernel support for 32-bit U-mode"
similarity index 78%
rename from arch/riscv/include/asm/crash_core.h
rename to arch/riscv/include/asm/crash_reserve.h
index e1874b23feaf116d90b8ee9950ca270f00d5d038..013962e63587f3a06e53a508dade442b7f64e4ce 100644 (file)
@@ -1,6 +1,6 @@
 /* SPDX-License-Identifier: GPL-2.0-only */
-#ifndef _RISCV_CRASH_CORE_H
-#define _RISCV_CRASH_CORE_H
+#ifndef _RISCV_CRASH_RESERVE_H
+#define _RISCV_CRASH_RESERVE_H
 
 #define CRASH_ALIGN                    PMD_SIZE
 
index 15055f9df4daa1e4250c8a37c64193bf5c943ee3..1276d7d9ca8b66264d1b13ff2189d9ca2e341c01 100644 (file)
 #endif
 #define HAVE_FUNCTION_GRAPH_RET_ADDR_PTR
 
-/*
- * Clang prior to 13 had "mcount" instead of "_mcount":
- * https://reviews.llvm.org/D98881
- */
-#if defined(CONFIG_CC_IS_GCC) || CONFIG_CLANG_VERSION >= 130000
-#define MCOUNT_NAME _mcount
-#else
-#define MCOUNT_NAME mcount
-#endif
-
 #define ARCH_SUPPORTS_FTRACE_OPS 1
 #ifndef __ASSEMBLY__
 
@@ -30,7 +20,7 @@ extern void *return_address(unsigned int level);
 
 #define ftrace_return_address(n) return_address(n)
 
-void MCOUNT_NAME(void);
+void _mcount(void);
 static inline unsigned long ftrace_call_adjust(unsigned long addr)
 {
        return addr;
@@ -80,7 +70,7 @@ struct dyn_arch_ftrace {
  * both auipc and jalr at the same time.
  */
 
-#define MCOUNT_ADDR            ((unsigned long)MCOUNT_NAME)
+#define MCOUNT_ADDR            ((unsigned long)_mcount)
 #define JALR_SIGN_MASK         (0x00000800)
 #define JALR_OFFSET_MASK       (0x00000fff)
 #define AUIPC_OFFSET_MASK      (0xfffff000)
index b99bd66107a69038c835ead6b77725aaeaf882c3..221a5c1ee287e63156126deec3d19cefbe85affe 100644 (file)
@@ -190,7 +190,7 @@ static inline int pud_bad(pud_t pud)
 }
 
 #define pud_leaf       pud_leaf
-static inline int pud_leaf(pud_t pud)
+static inline bool pud_leaf(pud_t pud)
 {
        return pud_present(pud) && (pud_val(pud) & _PAGE_LEAF);
 }
index 6066822e7396fa5078a546356a3a6f6605470712..20242402fc11baa76b435be4f3872487f17d961d 100644 (file)
@@ -241,7 +241,7 @@ static inline int pmd_bad(pmd_t pmd)
 }
 
 #define pmd_leaf       pmd_leaf
-static inline int pmd_leaf(pmd_t pmd)
+static inline bool pmd_leaf(pmd_t pmd)
 {
        return pmd_present(pmd) && (pmd_val(pmd) & _PAGE_LEAF);
 }
@@ -527,6 +527,8 @@ static inline void __set_pte_at(pte_t *ptep, pte_t pteval)
        set_pte(ptep, pteval);
 }
 
+#define PFN_PTE_SHIFT          _PAGE_PFN_SHIFT
+
 static inline void set_ptes(struct mm_struct *mm, unsigned long addr,
                pte_t *ptep, pte_t pteval, unsigned int nr)
 {
diff --git a/arch/riscv/include/asm/ptdump.h b/arch/riscv/include/asm/ptdump.h
deleted file mode 100644 (file)
index 3c9ea6d..0000000
+++ /dev/null
@@ -1,22 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0 */
-/*
- * Copyright (C) 2019 SiFive
- */
-
-#ifndef _ASM_RISCV_PTDUMP_H
-#define _ASM_RISCV_PTDUMP_H
-
-void ptdump_check_wx(void);
-
-#ifdef CONFIG_DEBUG_WX
-static inline void debug_checkwx(void)
-{
-       ptdump_check_wx();
-}
-#else
-static inline void debug_checkwx(void)
-{
-}
-#endif
-
-#endif /* _ASM_RISCV_PTDUMP_H */
index 7499e88a947c086c5f569e98a899f50d098a8335..b1c503c2959c34ee4fb7f7c7a4b3a0b6c3dda240 100644 (file)
@@ -16,7 +16,6 @@
 #include <asm/ptrace.h>
 
 #define __KVM_HAVE_IRQ_LINE
-#define __KVM_HAVE_READONLY_MEM
 
 #define KVM_COALESCED_MMIO_PAGE_OFFSET 1
 
@@ -166,6 +165,8 @@ enum KVM_RISCV_ISA_EXT_ID {
        KVM_RISCV_ISA_EXT_ZVFH,
        KVM_RISCV_ISA_EXT_ZVFHMIN,
        KVM_RISCV_ISA_EXT_ZFA,
+       KVM_RISCV_ISA_EXT_ZTSO,
+       KVM_RISCV_ISA_EXT_ZACAS,
        KVM_RISCV_ISA_EXT_MAX,
 };
 
index 604d6bf7e47672e9b01902f6fa497aeb4e102ee5..5e591f831638f2a18aa3008edb400c6f2f0c71ca 100644 (file)
@@ -94,7 +94,7 @@ obj-$(CONFIG_KGDB)            += kgdb.o
 obj-$(CONFIG_KEXEC_CORE)       += kexec_relocate.o crash_save_regs.o machine_kexec.o
 obj-$(CONFIG_KEXEC_FILE)       += elf_kexec.o machine_kexec_file.o
 obj-$(CONFIG_CRASH_DUMP)       += crash_dump.o
-obj-$(CONFIG_CRASH_CORE)       += crash_core.o
+obj-$(CONFIG_VMCORE_INFO)      += vmcore_info.o
 
 obj-$(CONFIG_JUMP_LABEL)       += jump_label.o
 
index 5bd1ec3341fe9cd4ea1053122ba1e556b9ab50f0..54260c16f9912aa81a738d783863cb97030aac8e 100644 (file)
@@ -117,6 +117,7 @@ static int elf_find_pbase(struct kimage *image, unsigned long kernel_len,
        return ret;
 }
 
+#ifdef CONFIG_CRASH_DUMP
 static int get_nr_ram_ranges_callback(struct resource *res, void *arg)
 {
        unsigned int *nr_ranges = arg;
@@ -189,6 +190,7 @@ static char *setup_kdump_cmdline(struct kimage *image, char *cmdline,
        cmdline_ptr[COMMAND_LINE_SIZE - 1] = '\0';
        return cmdline_ptr;
 }
+#endif
 
 static void *elf_kexec_load(struct kimage *image, char *kernel_buf,
                            unsigned long kernel_len, char *initrd,
@@ -196,12 +198,11 @@ static void *elf_kexec_load(struct kimage *image, char *kernel_buf,
                            unsigned long cmdline_len)
 {
        int ret;
+       void *fdt;
        unsigned long old_kernel_pbase = ULONG_MAX;
        unsigned long new_kernel_pbase = 0UL;
        unsigned long initrd_pbase = 0UL;
-       unsigned long headers_sz;
        unsigned long kernel_start;
-       void *fdt, *headers;
        struct elfhdr ehdr;
        struct kexec_buf kbuf;
        struct kexec_elf_info elf_info;
@@ -227,8 +228,11 @@ static void *elf_kexec_load(struct kimage *image, char *kernel_buf,
        kbuf.buf_min = new_kernel_pbase + kernel_len;
        kbuf.buf_max = ULONG_MAX;
 
+#ifdef CONFIG_CRASH_DUMP
        /* Add elfcorehdr */
        if (image->type == KEXEC_TYPE_CRASH) {
+               void *headers;
+               unsigned long headers_sz;
                ret = prepare_elf_headers(&headers, &headers_sz);
                if (ret) {
                        pr_err("Preparing elf core header failed\n");
@@ -264,6 +268,7 @@ static void *elf_kexec_load(struct kimage *image, char *kernel_buf,
                }
                cmdline = modified_cmdline;
        }
+#endif
 
 #ifdef CONFIG_ARCH_SUPPORTS_KEXEC_PURGATORY
        /* Add purgatory to the image */
index d7ec69ac6910c6ea0dadcbf7757511cc2f5f2914..3a42f6287909d094dca45d5ab2fbb012621e18aa 100644 (file)
@@ -50,8 +50,8 @@
 
 SYM_TYPED_FUNC_START(ftrace_stub)
 #ifdef CONFIG_DYNAMIC_FTRACE
-       .global MCOUNT_NAME
-       .set    MCOUNT_NAME, ftrace_stub
+       .global _mcount
+       .set    _mcount, ftrace_stub
 #endif
        ret
 SYM_FUNC_END(ftrace_stub)
@@ -80,7 +80,7 @@ SYM_FUNC_END(return_to_handler)
 #endif
 
 #ifndef CONFIG_DYNAMIC_FTRACE
-SYM_FUNC_START(MCOUNT_NAME)
+SYM_FUNC_START(_mcount)
        la      t4, ftrace_stub
 #ifdef CONFIG_FUNCTION_GRAPH_TRACER
        la      t0, ftrace_graph_return
@@ -126,6 +126,6 @@ SYM_FUNC_START(MCOUNT_NAME)
        jalr    t5
        RESTORE_ABI_STATE
        ret
-SYM_FUNC_END(MCOUNT_NAME)
+SYM_FUNC_END(_mcount)
 #endif
-EXPORT_SYMBOL(MCOUNT_NAME)
+EXPORT_SYMBOL(_mcount)
similarity index 88%
rename from arch/riscv/kernel/crash_core.c
rename to arch/riscv/kernel/vmcore_info.c
index 8706736fd4e2dca53d096d1caa5117da9fc08873..6d7a22522d630978299394e19973f9bfb83ce1ab 100644 (file)
@@ -1,6 +1,6 @@
 // SPDX-License-Identifier: GPL-2.0-only
 
-#include <linux/crash_core.h>
+#include <linux/vmcore_info.h>
 #include <linux/pagemap.h>
 
 void arch_crash_save_vmcoreinfo(void)
@@ -8,7 +8,6 @@ void arch_crash_save_vmcoreinfo(void)
        VMCOREINFO_NUMBER(phys_ram_base);
 
        vmcoreinfo_append_str("NUMBER(PAGE_OFFSET)=0x%lx\n", PAGE_OFFSET);
-       vmcoreinfo_append_str("NUMBER(VMALLOC_START)=0x%lx\n", VMALLOC_START);
        vmcoreinfo_append_str("NUMBER(VMALLOC_END)=0x%lx\n", VMALLOC_END);
 #ifdef CONFIG_MMU
        VMCOREINFO_NUMBER(VA_BITS);
index d490db94385883eb3e74048c799cea3f6a07e8cf..26d1727f0550d3dbf8cf55ce9e1d3ca1e2d41fc4 100644 (file)
@@ -24,6 +24,7 @@ config KVM
        select HAVE_KVM_IRQ_ROUTING
        select HAVE_KVM_MSI
        select HAVE_KVM_VCPU_ASYNC_IOCTL
+       select HAVE_KVM_READONLY_MEM
        select KVM_COMMON
        select KVM_GENERIC_DIRTYLOG_READ_PROTECT
        select KVM_GENERIC_HARDWARE_ENABLING
index 7a6abed41bc170b9545662c3c9387b109ca2db3f..ee7215f4071f52186c4aa2295fbb385cae0f7a0f 100644 (file)
@@ -7,6 +7,8 @@
 #include <linux/bitops.h>
 #include <linux/kvm_host.h>
 
+#include <asm/cpufeature.h>
+
 #define INSN_OPCODE_MASK       0x007c
 #define INSN_OPCODE_SHIFT      2
 #define INSN_OPCODE_SYSTEM     28
@@ -213,9 +215,20 @@ struct csr_func {
                    unsigned long wr_mask);
 };
 
+static int seed_csr_rmw(struct kvm_vcpu *vcpu, unsigned int csr_num,
+                       unsigned long *val, unsigned long new_val,
+                       unsigned long wr_mask)
+{
+       if (!riscv_isa_extension_available(vcpu->arch.isa, ZKR))
+               return KVM_INSN_ILLEGAL_TRAP;
+
+       return KVM_INSN_EXIT_TO_USER_SPACE;
+}
+
 static const struct csr_func csr_funcs[] = {
        KVM_RISCV_VCPU_AIA_CSR_FUNCS
        KVM_RISCV_VCPU_HPMCOUNTER_CSR_FUNCS
+       { .base = CSR_SEED, .count = 1, .func = seed_csr_rmw },
 };
 
 /**
index 5f7355e960084b4a4a17ea294e92352f7a70da60..f4a6124d25c939ecdf5dc631d8c7aa69a2684621 100644 (file)
@@ -40,6 +40,7 @@ static const unsigned long kvm_isa_ext_arr[] = {
        KVM_ISA_EXT_ARR(SVINVAL),
        KVM_ISA_EXT_ARR(SVNAPOT),
        KVM_ISA_EXT_ARR(SVPBMT),
+       KVM_ISA_EXT_ARR(ZACAS),
        KVM_ISA_EXT_ARR(ZBA),
        KVM_ISA_EXT_ARR(ZBB),
        KVM_ISA_EXT_ARR(ZBC),
@@ -66,6 +67,7 @@ static const unsigned long kvm_isa_ext_arr[] = {
        KVM_ISA_EXT_ARR(ZKSED),
        KVM_ISA_EXT_ARR(ZKSH),
        KVM_ISA_EXT_ARR(ZKT),
+       KVM_ISA_EXT_ARR(ZTSO),
        KVM_ISA_EXT_ARR(ZVBB),
        KVM_ISA_EXT_ARR(ZVBC),
        KVM_ISA_EXT_ARR(ZVFH),
@@ -117,6 +119,7 @@ static bool kvm_riscv_vcpu_isa_disable_allowed(unsigned long ext)
        case KVM_RISCV_ISA_EXT_SSTC:
        case KVM_RISCV_ISA_EXT_SVINVAL:
        case KVM_RISCV_ISA_EXT_SVNAPOT:
+       case KVM_RISCV_ISA_EXT_ZACAS:
        case KVM_RISCV_ISA_EXT_ZBA:
        case KVM_RISCV_ISA_EXT_ZBB:
        case KVM_RISCV_ISA_EXT_ZBC:
@@ -141,6 +144,7 @@ static bool kvm_riscv_vcpu_isa_disable_allowed(unsigned long ext)
        case KVM_RISCV_ISA_EXT_ZKSED:
        case KVM_RISCV_ISA_EXT_ZKSH:
        case KVM_RISCV_ISA_EXT_ZKT:
+       case KVM_RISCV_ISA_EXT_ZTSO:
        case KVM_RISCV_ISA_EXT_ZVBB:
        case KVM_RISCV_ISA_EXT_ZVBC:
        case KVM_RISCV_ISA_EXT_ZVFH:
index fa34cf55037bd37ad0b8d3bb3b67f6f91d243f58..b5ffb2ef54ad2244ae59598782c3ad3307b5ffe5 100644 (file)
@@ -29,7 +29,6 @@
 #include <asm/io.h>
 #include <asm/numa.h>
 #include <asm/pgtable.h>
-#include <asm/ptdump.h>
 #include <asm/sections.h>
 #include <asm/soc.h>
 #include <asm/tlbflush.h>
@@ -723,8 +722,6 @@ void mark_rodata_ro(void)
        if (IS_ENABLED(CONFIG_64BIT))
                set_kernel_memory(lm_alias(__start_rodata), lm_alias(_data),
                                  set_memory_ro);
-
-       debug_checkwx();
 }
 #else
 static __init pgprot_t pgprot_from_va(uintptr_t va)
@@ -1358,7 +1355,7 @@ static void __init arch_reserve_crashkernel(void)
        bool high = false;
        int ret;
 
-       if (!IS_ENABLED(CONFIG_KEXEC_CORE))
+       if (!IS_ENABLED(CONFIG_CRASH_RESERVE))
                return;
 
        ret = parse_crashkernel(cmdline, memblock_phys_mem_size(),
index 657c27bc07a7694edbb70795c4b2bd102b8780d0..1289cc6d3700cde8d68f022612d3e4229dd768b6 100644 (file)
@@ -9,7 +9,6 @@
 #include <linux/seq_file.h>
 #include <linux/ptdump.h>
 
-#include <asm/ptdump.h>
 #include <linux/pgtable.h>
 #include <asm/kasan.h>
 
@@ -336,7 +335,7 @@ static void ptdump_walk(struct seq_file *s, struct ptd_mm_info *pinfo)
        ptdump_walk_pgd(&st.ptdump, pinfo->mm, NULL);
 }
 
-void ptdump_check_wx(void)
+bool ptdump_check_wx(void)
 {
        struct pg_state st = {
                .seq = NULL,
@@ -357,11 +356,16 @@ void ptdump_check_wx(void)
 
        ptdump_walk_pgd(&st.ptdump, &init_mm, NULL);
 
-       if (st.wx_pages)
+       if (st.wx_pages) {
                pr_warn("Checked W+X mappings: failed, %lu W+X pages found\n",
                        st.wx_pages);
-       else
+
+               return false;
+       } else {
                pr_info("Checked W+X mappings: passed, no W+X pages found\n");
+
+               return true;
+       }
 }
 
 static int ptdump_show(struct seq_file *m, void *v)
index 9e52461f35cb955c51362b0f4440c4de2cf1cf81..367bf5bc4a5bc5353fe2c7060de487a870258d53 100644 (file)
@@ -113,6 +113,7 @@ config S390
        select ARCH_INLINE_WRITE_UNLOCK_BH
        select ARCH_INLINE_WRITE_UNLOCK_IRQ
        select ARCH_INLINE_WRITE_UNLOCK_IRQRESTORE
+       select ARCH_MHP_MEMMAP_ON_MEMORY_ENABLE
        select ARCH_STACKWALK
        select ARCH_SUPPORTS_ATOMIC_RMW
        select ARCH_SUPPORTS_DEBUG_PAGEALLOC
@@ -194,7 +195,6 @@ config S390
        select HAVE_KPROBES
        select HAVE_KPROBES_ON_FTRACE
        select HAVE_KRETPROBES
-       select HAVE_KVM
        select HAVE_LIVEPATCH
        select HAVE_MEMBLOCK_PHYS_MAP
        select HAVE_MOD_ARCH_SPECIFIC
index e3a4500a5a75714370285f46f8c5a97518b6a31d..09b10bb6e4d0700fac2b00526a575c723f092ca7 100644 (file)
@@ -333,7 +333,7 @@ static void pgtable_pmd_populate(pud_t *pud, unsigned long addr, unsigned long e
                        }
                        pte = boot_pte_alloc();
                        pmd_populate(&init_mm, pmd, pte);
-               } else if (pmd_large(*pmd)) {
+               } else if (pmd_leaf(*pmd)) {
                        continue;
                }
                pgtable_pte_populate(pmd, addr, next, mode);
@@ -366,7 +366,7 @@ static void pgtable_pud_populate(p4d_t *p4d, unsigned long addr, unsigned long e
                        }
                        pmd = boot_crst_alloc(_SEGMENT_ENTRY_EMPTY);
                        pud_populate(&init_mm, pud, pmd);
-               } else if (pud_large(*pud)) {
+               } else if (pud_leaf(*pud)) {
                        continue;
                }
                pgtable_pmd_populate(pud, addr, next, mode);
index 5a82b08f03cd3e9602311a04ed06b6e52acde3e2..621f23d5ae30a6a05e39e3bf54df1a3b80011f68 100644 (file)
@@ -9,7 +9,7 @@
 #ifndef __ASSEMBLY__
 
 #ifdef CONFIG_CC_IS_CLANG
-/* https://bugs.llvm.org/show_bug.cgi?id=41424 */
+/* https://llvm.org/pr41424 */
 #define ftrace_return_address(n) 0UL
 #else
 #define ftrace_return_address(n) __builtin_return_address(n)
index 502d655fe6ae6650dd24fded14215f903c0311e3..7b84ef6dc4b6d49a53d092fd115eb2bcae00f391 100644 (file)
@@ -23,9 +23,9 @@ unsigned long *crst_table_alloc(struct mm_struct *);
 void crst_table_free(struct mm_struct *, unsigned long *);
 
 unsigned long *page_table_alloc(struct mm_struct *);
-struct page *page_table_alloc_pgste(struct mm_struct *mm);
+struct ptdesc *page_table_alloc_pgste(struct mm_struct *mm);
 void page_table_free(struct mm_struct *, unsigned long *);
-void page_table_free_pgste(struct page *page);
+void page_table_free_pgste(struct ptdesc *ptdesc);
 extern int page_table_allocate_pgste;
 
 static inline void crst_table_init(unsigned long *crst, unsigned long entry)
index 1299b56e43f6f9ff52854ae826188a7480c892d9..60950e7a25f585b5b2d0f0bf8ce0fff9c91b475c 100644 (file)
@@ -705,23 +705,23 @@ static inline int pud_none(pud_t pud)
        return pud_val(pud) == _REGION3_ENTRY_EMPTY;
 }
 
-#define pud_leaf       pud_large
-static inline int pud_large(pud_t pud)
+#define pud_leaf pud_leaf
+static inline bool pud_leaf(pud_t pud)
 {
        if ((pud_val(pud) & _REGION_ENTRY_TYPE_MASK) != _REGION_ENTRY_TYPE_R3)
                return 0;
        return !!(pud_val(pud) & _REGION3_ENTRY_LARGE);
 }
 
-#define pmd_leaf       pmd_large
-static inline int pmd_large(pmd_t pmd)
+#define pmd_leaf pmd_leaf
+static inline bool pmd_leaf(pmd_t pmd)
 {
        return (pmd_val(pmd) & _SEGMENT_ENTRY_LARGE) != 0;
 }
 
 static inline int pmd_bad(pmd_t pmd)
 {
-       if ((pmd_val(pmd) & _SEGMENT_ENTRY_TYPE_MASK) > 0 || pmd_large(pmd))
+       if ((pmd_val(pmd) & _SEGMENT_ENTRY_TYPE_MASK) > 0 || pmd_leaf(pmd))
                return 1;
        return (pmd_val(pmd) & ~_SEGMENT_ENTRY_BITS) != 0;
 }
@@ -730,7 +730,7 @@ static inline int pud_bad(pud_t pud)
 {
        unsigned long type = pud_val(pud) & _REGION_ENTRY_TYPE_MASK;
 
-       if (type > _REGION_ENTRY_TYPE_R3 || pud_large(pud))
+       if (type > _REGION_ENTRY_TYPE_R3 || pud_leaf(pud))
                return 1;
        if (type < _REGION_ENTRY_TYPE_R3)
                return 0;
@@ -820,8 +820,8 @@ static inline int pte_protnone(pte_t pte)
 
 static inline int pmd_protnone(pmd_t pmd)
 {
-       /* pmd_large(pmd) implies pmd_present(pmd) */
-       return pmd_large(pmd) && !(pmd_val(pmd) & _SEGMENT_ENTRY_READ);
+       /* pmd_leaf(pmd) implies pmd_present(pmd) */
+       return pmd_leaf(pmd) && !(pmd_val(pmd) & _SEGMENT_ENTRY_READ);
 }
 #endif
 
@@ -1316,6 +1316,8 @@ pgprot_t pgprot_writecombine(pgprot_t prot);
 #define pgprot_writethrough    pgprot_writethrough
 pgprot_t pgprot_writethrough(pgprot_t prot);
 
+#define PFN_PTE_SHIFT          PAGE_SHIFT
+
 /*
  * Set multiple PTEs to consecutive pages with a single call.  All PTEs
  * are within the same folio, PMD and VMA.
@@ -1383,7 +1385,7 @@ static inline unsigned long pmd_deref(pmd_t pmd)
        unsigned long origin_mask;
 
        origin_mask = _SEGMENT_ENTRY_ORIGIN;
-       if (pmd_large(pmd))
+       if (pmd_leaf(pmd))
                origin_mask = _SEGMENT_ENTRY_ORIGIN_LARGE;
        return (unsigned long)__va(pmd_val(pmd) & origin_mask);
 }
@@ -1398,7 +1400,7 @@ static inline unsigned long pud_deref(pud_t pud)
        unsigned long origin_mask;
 
        origin_mask = _REGION_ENTRY_ORIGIN;
-       if (pud_large(pud))
+       if (pud_leaf(pud))
                origin_mask = _REGION3_ENTRY_ORIGIN_LARGE;
        return (unsigned long)__va(pud_val(pud) & origin_mask);
 }
diff --git a/arch/s390/include/asm/ptdump.h b/arch/s390/include/asm/ptdump.h
deleted file mode 100644 (file)
index f960b28..0000000
+++ /dev/null
@@ -1,14 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0 */
-
-#ifndef _ASM_S390_PTDUMP_H
-#define _ASM_S390_PTDUMP_H
-
-void ptdump_check_wx(void);
-
-static inline void debug_checkwx(void)
-{
-       if (IS_ENABLED(CONFIG_DEBUG_WX))
-               ptdump_check_wx();
-}
-
-#endif /* _ASM_S390_PTDUMP_H */
index d1455a601adcad03a6bd7ec2f467cec47ed55bd8..e95b2c8081eb8ec4f7017aea1f4007aee8e1b7ce 100644 (file)
@@ -25,8 +25,9 @@
 void __tlb_remove_table(void *_table);
 static inline void tlb_flush(struct mmu_gather *tlb);
 static inline bool __tlb_remove_page_size(struct mmu_gather *tlb,
-                                         struct encoded_page *page,
-                                         int page_size);
+               struct page *page, bool delay_rmap, int page_size);
+static inline bool __tlb_remove_folio_pages(struct mmu_gather *tlb,
+               struct page *page, unsigned int nr_pages, bool delay_rmap);
 
 #define tlb_flush tlb_flush
 #define pte_free_tlb pte_free_tlb
@@ -42,14 +43,29 @@ static inline bool __tlb_remove_page_size(struct mmu_gather *tlb,
  * tlb_ptep_clear_flush. In both flush modes the tlb for a page cache page
  * has already been freed, so just do free_page_and_swap_cache.
  *
- * s390 doesn't delay rmap removal, so there is nothing encoded in
- * the page pointer.
+ * s390 doesn't delay rmap removal.
  */
 static inline bool __tlb_remove_page_size(struct mmu_gather *tlb,
-                                         struct encoded_page *page,
-                                         int page_size)
+               struct page *page, bool delay_rmap, int page_size)
 {
-       free_page_and_swap_cache(encoded_page_ptr(page));
+       VM_WARN_ON_ONCE(delay_rmap);
+
+       free_page_and_swap_cache(page);
+       return false;
+}
+
+static inline bool __tlb_remove_folio_pages(struct mmu_gather *tlb,
+               struct page *page, unsigned int nr_pages, bool delay_rmap)
+{
+       struct encoded_page *encoded_pages[] = {
+               encode_page(page, ENCODED_PAGE_BIT_NR_PAGES_NEXT),
+               encode_nr_pages(nr_pages),
+       };
+
+       VM_WARN_ON_ONCE(delay_rmap);
+       VM_WARN_ON_ONCE(page_folio(page) != page_folio(page + nr_pages - 1));
+
+       free_pages_and_swap_cache(encoded_pages, ARRAY_SIZE(encoded_pages));
        return false;
 }
 
index abe926d43cbe0a06342f8c53f202cdd707ea1693..05eaf6db3ad4cba4269b1ce36563096eab236b1d 100644 (file)
 #include <linux/types.h>
 
 #define __KVM_S390
-#define __KVM_HAVE_GUEST_DEBUG
+
+struct kvm_s390_skeys {
+       __u64 start_gfn;
+       __u64 count;
+       __u64 skeydata_addr;
+       __u32 flags;
+       __u32 reserved[9];
+};
+
+#define KVM_S390_CMMA_PEEK (1 << 0)
+
+/**
+ * kvm_s390_cmma_log - Used for CMMA migration.
+ *
+ * Used both for input and output.
+ *
+ * @start_gfn: Guest page number to start from.
+ * @count: Size of the result buffer.
+ * @flags: Control operation mode via KVM_S390_CMMA_* flags
+ * @remaining: Used with KVM_S390_GET_CMMA_BITS. Indicates how many dirty
+ *             pages are still remaining.
+ * @mask: Used with KVM_S390_SET_CMMA_BITS. Bitmap of bits to actually set
+ *        in the PGSTE.
+ * @values: Pointer to the values buffer.
+ *
+ * Used in KVM_S390_{G,S}ET_CMMA_BITS ioctls.
+ */
+struct kvm_s390_cmma_log {
+       __u64 start_gfn;
+       __u32 count;
+       __u32 flags;
+       union {
+               __u64 remaining;
+               __u64 mask;
+       };
+       __u64 values;
+};
+
+#define KVM_S390_RESET_POR       1
+#define KVM_S390_RESET_CLEAR     2
+#define KVM_S390_RESET_SUBSYSTEM 4
+#define KVM_S390_RESET_CPU_INIT  8
+#define KVM_S390_RESET_IPL       16
+
+/* for KVM_S390_MEM_OP */
+struct kvm_s390_mem_op {
+       /* in */
+       __u64 gaddr;            /* the guest address */
+       __u64 flags;            /* flags */
+       __u32 size;             /* amount of bytes */
+       __u32 op;               /* type of operation */
+       __u64 buf;              /* buffer in userspace */
+       union {
+               struct {
+                       __u8 ar;        /* the access register number */
+                       __u8 key;       /* access key, ignored if flag unset */
+                       __u8 pad1[6];   /* ignored */
+                       __u64 old_addr; /* ignored if cmpxchg flag unset */
+               };
+               __u32 sida_offset; /* offset into the sida */
+               __u8 reserved[32]; /* ignored */
+       };
+};
+/* types for kvm_s390_mem_op->op */
+#define KVM_S390_MEMOP_LOGICAL_READ    0
+#define KVM_S390_MEMOP_LOGICAL_WRITE   1
+#define KVM_S390_MEMOP_SIDA_READ       2
+#define KVM_S390_MEMOP_SIDA_WRITE      3
+#define KVM_S390_MEMOP_ABSOLUTE_READ   4
+#define KVM_S390_MEMOP_ABSOLUTE_WRITE  5
+#define KVM_S390_MEMOP_ABSOLUTE_CMPXCHG        6
+
+/* flags for kvm_s390_mem_op->flags */
+#define KVM_S390_MEMOP_F_CHECK_ONLY            (1ULL << 0)
+#define KVM_S390_MEMOP_F_INJECT_EXCEPTION      (1ULL << 1)
+#define KVM_S390_MEMOP_F_SKEY_PROTECTION       (1ULL << 2)
+
+/* flags specifying extension support via KVM_CAP_S390_MEM_OP_EXTENSION */
+#define KVM_S390_MEMOP_EXTENSION_CAP_BASE      (1 << 0)
+#define KVM_S390_MEMOP_EXTENSION_CAP_CMPXCHG   (1 << 1)
+
+struct kvm_s390_psw {
+       __u64 mask;
+       __u64 addr;
+};
+
+/* valid values for type in kvm_s390_interrupt */
+#define KVM_S390_SIGP_STOP             0xfffe0000u
+#define KVM_S390_PROGRAM_INT           0xfffe0001u
+#define KVM_S390_SIGP_SET_PREFIX       0xfffe0002u
+#define KVM_S390_RESTART               0xfffe0003u
+#define KVM_S390_INT_PFAULT_INIT       0xfffe0004u
+#define KVM_S390_INT_PFAULT_DONE       0xfffe0005u
+#define KVM_S390_MCHK                  0xfffe1000u
+#define KVM_S390_INT_CLOCK_COMP                0xffff1004u
+#define KVM_S390_INT_CPU_TIMER         0xffff1005u
+#define KVM_S390_INT_VIRTIO            0xffff2603u
+#define KVM_S390_INT_SERVICE           0xffff2401u
+#define KVM_S390_INT_EMERGENCY         0xffff1201u
+#define KVM_S390_INT_EXTERNAL_CALL     0xffff1202u
+/* Anything below 0xfffe0000u is taken by INT_IO */
+#define KVM_S390_INT_IO(ai,cssid,ssid,schid)   \
+       (((schid)) |                           \
+        ((ssid) << 16) |                      \
+        ((cssid) << 18) |                     \
+        ((ai) << 26))
+#define KVM_S390_INT_IO_MIN            0x00000000u
+#define KVM_S390_INT_IO_MAX            0xfffdffffu
+#define KVM_S390_INT_IO_AI_MASK                0x04000000u
+
+
+struct kvm_s390_interrupt {
+       __u32 type;
+       __u32 parm;
+       __u64 parm64;
+};
+
+struct kvm_s390_io_info {
+       __u16 subchannel_id;
+       __u16 subchannel_nr;
+       __u32 io_int_parm;
+       __u32 io_int_word;
+};
+
+struct kvm_s390_ext_info {
+       __u32 ext_params;
+       __u32 pad;
+       __u64 ext_params2;
+};
+
+struct kvm_s390_pgm_info {
+       __u64 trans_exc_code;
+       __u64 mon_code;
+       __u64 per_address;
+       __u32 data_exc_code;
+       __u16 code;
+       __u16 mon_class_nr;
+       __u8 per_code;
+       __u8 per_atmid;
+       __u8 exc_access_id;
+       __u8 per_access_id;
+       __u8 op_access_id;
+#define KVM_S390_PGM_FLAGS_ILC_VALID   0x01
+#define KVM_S390_PGM_FLAGS_ILC_0       0x02
+#define KVM_S390_PGM_FLAGS_ILC_1       0x04
+#define KVM_S390_PGM_FLAGS_ILC_MASK    0x06
+#define KVM_S390_PGM_FLAGS_NO_REWIND   0x08
+       __u8 flags;
+       __u8 pad[2];
+};
+
+struct kvm_s390_prefix_info {
+       __u32 address;
+};
+
+struct kvm_s390_extcall_info {
+       __u16 code;
+};
+
+struct kvm_s390_emerg_info {
+       __u16 code;
+};
+
+#define KVM_S390_STOP_FLAG_STORE_STATUS        0x01
+struct kvm_s390_stop_info {
+       __u32 flags;
+};
+
+struct kvm_s390_mchk_info {
+       __u64 cr14;
+       __u64 mcic;
+       __u64 failing_storage_address;
+       __u32 ext_damage_code;
+       __u32 pad;
+       __u8 fixed_logout[16];
+};
+
+struct kvm_s390_irq {
+       __u64 type;
+       union {
+               struct kvm_s390_io_info io;
+               struct kvm_s390_ext_info ext;
+               struct kvm_s390_pgm_info pgm;
+               struct kvm_s390_emerg_info emerg;
+               struct kvm_s390_extcall_info extcall;
+               struct kvm_s390_prefix_info prefix;
+               struct kvm_s390_stop_info stop;
+               struct kvm_s390_mchk_info mchk;
+               char reserved[64];
+       } u;
+};
+
+struct kvm_s390_irq_state {
+       __u64 buf;
+       __u32 flags;        /* will stay unused for compatibility reasons */
+       __u32 len;
+       __u32 reserved[4];  /* will stay unused for compatibility reasons */
+};
+
+struct kvm_s390_ucas_mapping {
+       __u64 user_addr;
+       __u64 vcpu_addr;
+       __u64 length;
+};
+
+struct kvm_s390_pv_sec_parm {
+       __u64 origin;
+       __u64 length;
+};
+
+struct kvm_s390_pv_unp {
+       __u64 addr;
+       __u64 size;
+       __u64 tweak;
+};
+
+enum pv_cmd_dmp_id {
+       KVM_PV_DUMP_INIT,
+       KVM_PV_DUMP_CONFIG_STOR_STATE,
+       KVM_PV_DUMP_COMPLETE,
+       KVM_PV_DUMP_CPU,
+};
+
+struct kvm_s390_pv_dmp {
+       __u64 subcmd;
+       __u64 buff_addr;
+       __u64 buff_len;
+       __u64 gaddr;            /* For dump storage state */
+       __u64 reserved[4];
+};
+
+enum pv_cmd_info_id {
+       KVM_PV_INFO_VM,
+       KVM_PV_INFO_DUMP,
+};
+
+struct kvm_s390_pv_info_dump {
+       __u64 dump_cpu_buffer_len;
+       __u64 dump_config_mem_buffer_per_1m;
+       __u64 dump_config_finalize_len;
+};
+
+struct kvm_s390_pv_info_vm {
+       __u64 inst_calls_list[4];
+       __u64 max_cpus;
+       __u64 max_guests;
+       __u64 max_guest_addr;
+       __u64 feature_indication;
+};
+
+struct kvm_s390_pv_info_header {
+       __u32 id;
+       __u32 len_max;
+       __u32 len_written;
+       __u32 reserved;
+};
+
+struct kvm_s390_pv_info {
+       struct kvm_s390_pv_info_header header;
+       union {
+               struct kvm_s390_pv_info_dump dump;
+               struct kvm_s390_pv_info_vm vm;
+       };
+};
+
+enum pv_cmd_id {
+       KVM_PV_ENABLE,
+       KVM_PV_DISABLE,
+       KVM_PV_SET_SEC_PARMS,
+       KVM_PV_UNPACK,
+       KVM_PV_VERIFY,
+       KVM_PV_PREP_RESET,
+       KVM_PV_UNSHARE_ALL,
+       KVM_PV_INFO,
+       KVM_PV_DUMP,
+       KVM_PV_ASYNC_CLEANUP_PREPARE,
+       KVM_PV_ASYNC_CLEANUP_PERFORM,
+};
+
+struct kvm_pv_cmd {
+       __u32 cmd;      /* Command to be executed */
+       __u16 rc;       /* Ultravisor return code */
+       __u16 rrc;      /* Ultravisor return reason code */
+       __u64 data;     /* Data or address */
+       __u32 flags;    /* flags for future extensions. Must be 0 for now */
+       __u32 reserved[3];
+};
+
+struct kvm_s390_zpci_op {
+       /* in */
+       __u32 fh;               /* target device */
+       __u8  op;               /* operation to perform */
+       __u8  pad[3];
+       union {
+               /* for KVM_S390_ZPCIOP_REG_AEN */
+               struct {
+                       __u64 ibv;      /* Guest addr of interrupt bit vector */
+                       __u64 sb;       /* Guest addr of summary bit */
+                       __u32 flags;
+                       __u32 noi;      /* Number of interrupts */
+                       __u8 isc;       /* Guest interrupt subclass */
+                       __u8 sbo;       /* Offset of guest summary bit vector */
+                       __u16 pad;
+               } reg_aen;
+               __u64 reserved[8];
+       } u;
+};
+
+/* types for kvm_s390_zpci_op->op */
+#define KVM_S390_ZPCIOP_REG_AEN                0
+#define KVM_S390_ZPCIOP_DEREG_AEN      1
+
+/* flags for kvm_s390_zpci_op->u.reg_aen.flags */
+#define KVM_S390_ZPCIOP_REGAEN_HOST    (1 << 0)
 
 /* Device control API: s390-specific devices */
 #define KVM_DEV_FLIC_GET_ALL_IRQS      1
index 7a562b4199c81b2b2bcb30f4efe0a84e59075a99..fa029d0dc28ff90567a47ec2ff264198c1342890 100644 (file)
@@ -64,6 +64,7 @@ obj-$(CONFIG_FUNCTION_TRACER) += ftrace.o
 obj-$(CONFIG_FUNCTION_TRACER)  += mcount.o
 obj-$(CONFIG_CRASH_DUMP)       += crash_dump.o
 obj-$(CONFIG_KEXEC_CORE)       += machine_kexec.o relocate_kernel.o
+obj-$(CONFIG_VMCORE_INFO)      += vmcore_info.o
 obj-$(CONFIG_UPROBES)          += uprobes.o
 obj-$(CONFIG_JUMP_LABEL)       += jump_label.o
 
index 9da6fa30c44749fd7f4b044f5d211f7cd25d2b33..4d364de4379921c852de6d98fcea95e82ed53743 100644 (file)
@@ -40,8 +40,10 @@ static int kexec_file_add_kernel_elf(struct kimage *image,
                buf.bufsz = phdr->p_filesz;
 
                buf.mem = ALIGN(phdr->p_paddr, phdr->p_align);
+#ifdef CONFIG_CRASH_DUMP
                if (image->type == KEXEC_TYPE_CRASH)
                        buf.mem += crashk_res.start;
+#endif
                buf.memsz = phdr->p_memsz;
                data->memsz = ALIGN(data->memsz, phdr->p_align) + buf.memsz;
 
index af23eff5774dba339beeb69c34565f92edd87568..a32ce8bea745cfde0a4eea3ff57ac0572714f504 100644 (file)
@@ -24,8 +24,10 @@ static int kexec_file_add_kernel_image(struct kimage *image,
        buf.bufsz = image->kernel_buf_len;
 
        buf.mem = 0;
+#ifdef CONFIG_CRASH_DUMP
        if (image->type == KEXEC_TYPE_CRASH)
                buf.mem += crashk_res.start;
+#endif
        buf.memsz = buf.bufsz;
 
        data->kernel_buf = image->kernel_buf;
index c5d0c1cf984bb8e2bb0014bc4f695a72e34afd5f..3aee98efc37420b20410c72710119d4b4759aceb 100644 (file)
@@ -210,21 +210,6 @@ void machine_kexec_cleanup(struct kimage *image)
 {
 }
 
-void arch_crash_save_vmcoreinfo(void)
-{
-       struct lowcore *abs_lc;
-
-       VMCOREINFO_SYMBOL(lowcore_ptr);
-       VMCOREINFO_SYMBOL(high_memory);
-       VMCOREINFO_LENGTH(lowcore_ptr, NR_CPUS);
-       vmcoreinfo_append_str("SAMODE31=%lx\n", (unsigned long)__samode31);
-       vmcoreinfo_append_str("EAMODE31=%lx\n", (unsigned long)__eamode31);
-       vmcoreinfo_append_str("KERNELOFFSET=%lx\n", kaslr_offset());
-       abs_lc = get_abs_lowcore();
-       abs_lc->vmcore_info = paddr_vmcoreinfo_note();
-       put_abs_lowcore(abs_lc);
-}
-
 void machine_shutdown(void)
 {
 }
index 8d207b82d9feddcd0d12f650e73c4f500bacfc84..c2bac14dd668ae39d7fd9f2aa6092618464f3fd4 100644 (file)
@@ -105,6 +105,7 @@ static int kexec_file_update_purgatory(struct kimage *image,
        if (ret)
                return ret;
 
+#ifdef CONFIG_CRASH_DUMP
        if (image->type == KEXEC_TYPE_CRASH) {
                u64 crash_size;
 
@@ -121,6 +122,7 @@ static int kexec_file_update_purgatory(struct kimage *image,
                                                     sizeof(crash_size),
                                                     false);
        }
+#endif
        return ret;
 }
 
@@ -134,8 +136,10 @@ static int kexec_file_add_purgatory(struct kimage *image,
 
        data->memsz = ALIGN(data->memsz, PAGE_SIZE);
        buf.mem = data->memsz;
+#ifdef CONFIG_CRASH_DUMP
        if (image->type == KEXEC_TYPE_CRASH)
                buf.mem += crashk_res.start;
+#endif
 
        ret = kexec_load_purgatory(image, &buf);
        if (ret)
@@ -158,8 +162,10 @@ static int kexec_file_add_initrd(struct kimage *image,
 
        data->memsz = ALIGN(data->memsz, PAGE_SIZE);
        buf.mem = data->memsz;
+#ifdef CONFIG_CRASH_DUMP
        if (image->type == KEXEC_TYPE_CRASH)
                buf.mem += crashk_res.start;
+#endif
        buf.memsz = buf.bufsz;
 
        data->parm->initrd_start = data->memsz;
@@ -223,8 +229,10 @@ static int kexec_file_add_ipl_report(struct kimage *image,
                data->kernel_buf + offsetof(struct lowcore, ipl_parmblock_ptr);
        *lc_ipl_parmblock_ptr = (__u32)buf.mem;
 
+#ifdef CONFIG_CRASH_DUMP
        if (image->type == KEXEC_TYPE_CRASH)
                buf.mem += crashk_res.start;
+#endif
 
        ret = kexec_add_buffer(&buf);
 out:
@@ -268,10 +276,12 @@ void *kexec_file_add_components(struct kimage *image,
        memcpy(data.parm->command_line, image->cmdline_buf,
               image->cmdline_buf_len);
 
+#ifdef CONFIG_CRASH_DUMP
        if (image->type == KEXEC_TYPE_CRASH) {
                data.parm->oldmem_base = crashk_res.start;
                data.parm->oldmem_size = crashk_res.end - crashk_res.start + 1;
        }
+#endif
 
        if (image->initrd_buf) {
                ret = kexec_file_add_initrd(image, &data);
diff --git a/arch/s390/kernel/vmcore_info.c b/arch/s390/kernel/vmcore_info.c
new file mode 100644 (file)
index 0000000..d296dfc
--- /dev/null
@@ -0,0 +1,21 @@
+// SPDX-License-Identifier: GPL-2.0-only
+
+#include <linux/vmcore_info.h>
+#include <asm/abs_lowcore.h>
+#include <linux/mm.h>
+#include <asm/setup.h>
+
+void arch_crash_save_vmcoreinfo(void)
+{
+       struct lowcore *abs_lc;
+
+       VMCOREINFO_SYMBOL(lowcore_ptr);
+       VMCOREINFO_SYMBOL(high_memory);
+       VMCOREINFO_LENGTH(lowcore_ptr, NR_CPUS);
+       vmcoreinfo_append_str("SAMODE31=%lx\n", (unsigned long)__samode31);
+       vmcoreinfo_append_str("EAMODE31=%lx\n", (unsigned long)__eamode31);
+       vmcoreinfo_append_str("KERNELOFFSET=%lx\n", kaslr_offset());
+       abs_lc = get_abs_lowcore();
+       abs_lc->vmcore_info = paddr_vmcoreinfo_note();
+       put_abs_lowcore(abs_lc);
+}
index 72e9b7dcdf7d977a14a1ea7f9d39a06c36e4b97f..cae908d645501ef7eb4edbe87b8431f6499370a4 100644 (file)
@@ -19,7 +19,6 @@ if VIRTUALIZATION
 config KVM
        def_tristate y
        prompt "Kernel-based Virtual Machine (KVM) support"
-       depends on HAVE_KVM
        select HAVE_KVM_CPU_RELAX_INTERCEPT
        select HAVE_KVM_VCPU_ASYNC_IOCTL
        select KVM_ASYNC_PF
index 3c65b8258ae67ad4b28589494e663034e69dffdd..2a32438e09cebaa698a8935c4aec03bf1f2cbc58 100644 (file)
@@ -102,7 +102,7 @@ static int __diag_page_ref_service(struct kvm_vcpu *vcpu)
                    parm.token_addr & 7 || parm.zarch != 0x8000000000000000ULL)
                        return kvm_s390_inject_program_int(vcpu, PGM_SPECIFICATION);
 
-               if (kvm_is_error_gpa(vcpu->kvm, parm.token_addr))
+               if (!kvm_is_gpa_in_memslot(vcpu->kvm, parm.token_addr))
                        return kvm_s390_inject_program_int(vcpu, PGM_ADDRESSING);
 
                vcpu->arch.pfault_token = parm.token_addr;
index ee863566910b1576179e4c41548c44456be73c65..5bf3d94e9ddaa98857eb2ecdfb670e14f79d22ab 100644 (file)
@@ -665,7 +665,7 @@ static unsigned long guest_translate(struct kvm_vcpu *vcpu, unsigned long gva,
        case ASCE_TYPE_REGION1: {
                union region1_table_entry rfte;
 
-               if (kvm_is_error_gpa(vcpu->kvm, ptr))
+               if (!kvm_is_gpa_in_memslot(vcpu->kvm, ptr))
                        return PGM_ADDRESSING;
                if (deref_table(vcpu->kvm, ptr, &rfte.val))
                        return -EFAULT;
@@ -683,7 +683,7 @@ static unsigned long guest_translate(struct kvm_vcpu *vcpu, unsigned long gva,
        case ASCE_TYPE_REGION2: {
                union region2_table_entry rste;
 
-               if (kvm_is_error_gpa(vcpu->kvm, ptr))
+               if (!kvm_is_gpa_in_memslot(vcpu->kvm, ptr))
                        return PGM_ADDRESSING;
                if (deref_table(vcpu->kvm, ptr, &rste.val))
                        return -EFAULT;
@@ -701,7 +701,7 @@ static unsigned long guest_translate(struct kvm_vcpu *vcpu, unsigned long gva,
        case ASCE_TYPE_REGION3: {
                union region3_table_entry rtte;
 
-               if (kvm_is_error_gpa(vcpu->kvm, ptr))
+               if (!kvm_is_gpa_in_memslot(vcpu->kvm, ptr))
                        return PGM_ADDRESSING;
                if (deref_table(vcpu->kvm, ptr, &rtte.val))
                        return -EFAULT;
@@ -729,7 +729,7 @@ static unsigned long guest_translate(struct kvm_vcpu *vcpu, unsigned long gva,
        case ASCE_TYPE_SEGMENT: {
                union segment_table_entry ste;
 
-               if (kvm_is_error_gpa(vcpu->kvm, ptr))
+               if (!kvm_is_gpa_in_memslot(vcpu->kvm, ptr))
                        return PGM_ADDRESSING;
                if (deref_table(vcpu->kvm, ptr, &ste.val))
                        return -EFAULT;
@@ -749,7 +749,7 @@ static unsigned long guest_translate(struct kvm_vcpu *vcpu, unsigned long gva,
                ptr = ste.fc0.pto * (PAGE_SIZE / 2) + vaddr.px * 8;
        }
        }
-       if (kvm_is_error_gpa(vcpu->kvm, ptr))
+       if (!kvm_is_gpa_in_memslot(vcpu->kvm, ptr))
                return PGM_ADDRESSING;
        if (deref_table(vcpu->kvm, ptr, &pte.val))
                return -EFAULT;
@@ -771,7 +771,7 @@ absolute_address:
                *prot = PROT_TYPE_IEP;
                return PGM_PROTECTION;
        }
-       if (kvm_is_error_gpa(vcpu->kvm, raddr.addr))
+       if (!kvm_is_gpa_in_memslot(vcpu->kvm, raddr.addr))
                return PGM_ADDRESSING;
        *gpa = raddr.addr;
        return 0;
@@ -958,7 +958,7 @@ static int guest_range_to_gpas(struct kvm_vcpu *vcpu, unsigned long ga, u8 ar,
                                return rc;
                } else {
                        gpa = kvm_s390_real_to_abs(vcpu, ga);
-                       if (kvm_is_error_gpa(vcpu->kvm, gpa)) {
+                       if (!kvm_is_gpa_in_memslot(vcpu->kvm, gpa)) {
                                rc = PGM_ADDRESSING;
                                prot = PROT_NONE;
                        }
index dc721d50a942f8e5090ca63e04160455b5327d6d..4f0e7f61edf788bf94e10b9265f71436790bdf73 100644 (file)
@@ -1031,7 +1031,7 @@ static int __must_check __deliver_service_ev(struct kvm_vcpu *vcpu)
                return 0;
        }
        ext = fi->srv_signal;
-       /* only clear the event bit */
+       /* only clear the event bits */
        fi->srv_signal.ext_params &= ~SCCB_EVENT_PENDING;
        clear_bit(IRQ_PEND_EXT_SERVICE_EV, &fi->pending_irqs);
        spin_unlock(&fi->lock);
@@ -1041,7 +1041,7 @@ static int __must_check __deliver_service_ev(struct kvm_vcpu *vcpu)
        trace_kvm_s390_deliver_interrupt(vcpu->vcpu_id, KVM_S390_INT_SERVICE,
                                         ext.ext_params, 0);
 
-       return write_sclp(vcpu, SCCB_EVENT_PENDING);
+       return write_sclp(vcpu, ext.ext_params & SCCB_EVENT_PENDING);
 }
 
 static int __must_check __deliver_pfault_done(struct kvm_vcpu *vcpu)
index b11bb8e780a107206c5fe8867199bdcaf6e33a3b..5147b943a864a641fcf5d87fed39eb33f4590bea 100644 (file)
@@ -2878,7 +2878,7 @@ static int kvm_s390_vm_mem_op_abs(struct kvm *kvm, struct kvm_s390_mem_op *mop)
 
        srcu_idx = srcu_read_lock(&kvm->srcu);
 
-       if (kvm_is_error_gpa(kvm, mop->gaddr)) {
+       if (!kvm_is_gpa_in_memslot(kvm, mop->gaddr)) {
                r = PGM_ADDRESSING;
                goto out_unlock;
        }
@@ -2940,7 +2940,7 @@ static int kvm_s390_vm_mem_op_cmpxchg(struct kvm *kvm, struct kvm_s390_mem_op *m
 
        srcu_idx = srcu_read_lock(&kvm->srcu);
 
-       if (kvm_is_error_gpa(kvm, mop->gaddr)) {
+       if (!kvm_is_gpa_in_memslot(kvm, mop->gaddr)) {
                r = PGM_ADDRESSING;
                goto out_unlock;
        }
@@ -3153,7 +3153,7 @@ static int kvm_s390_apxa_installed(void)
  */
 static void kvm_s390_set_crycb_format(struct kvm *kvm)
 {
-       kvm->arch.crypto.crycbd = (__u32)(unsigned long) kvm->arch.crypto.crycb;
+       kvm->arch.crypto.crycbd = virt_to_phys(kvm->arch.crypto.crycb);
 
        /* Clear the CRYCB format bits - i.e., set format 0 by default */
        kvm->arch.crypto.crycbd &= ~(CRYCB_FORMAT_MASK);
index f875a404a0a02555d5875128fafedcfe54d5b4d6..1be19cc9d73c19cf35b3df5dd9f7b430cdf8c3b7 100644 (file)
@@ -149,7 +149,7 @@ static int handle_set_prefix(struct kvm_vcpu *vcpu)
         * first page, since address is 8k aligned and memory pieces are always
         * at least 1MB aligned and have at least a size of 1MB.
         */
-       if (kvm_is_error_gpa(vcpu->kvm, address))
+       if (!kvm_is_gpa_in_memslot(vcpu->kvm, address))
                return kvm_s390_inject_program_int(vcpu, PGM_ADDRESSING);
 
        kvm_s390_set_prefix(vcpu, address);
@@ -464,7 +464,7 @@ static int handle_test_block(struct kvm_vcpu *vcpu)
                return kvm_s390_inject_prog_irq(vcpu, &vcpu->arch.pgm);
        addr = kvm_s390_real_to_abs(vcpu, addr);
 
-       if (kvm_is_error_gpa(vcpu->kvm, addr))
+       if (!kvm_is_gpa_in_memslot(vcpu->kvm, addr))
                return kvm_s390_inject_program_int(vcpu, PGM_ADDRESSING);
        /*
         * We don't expect errors on modern systems, and do not care
index d9696b5300647c87332bb212cab2840071913e37..55c34cb354281e720b03436f2a273f92beaacc51 100644 (file)
@@ -172,7 +172,7 @@ static int __sigp_set_prefix(struct kvm_vcpu *vcpu, struct kvm_vcpu *dst_vcpu,
         * first page, since address is 8k aligned and memory pieces are always
         * at least 1MB aligned and have at least a size of 1MB.
         */
-       if (kvm_is_error_gpa(vcpu->kvm, irq.u.prefix.address)) {
+       if (!kvm_is_gpa_in_memslot(vcpu->kvm, irq.u.prefix.address)) {
                *reg &= 0xffffffff00000000UL;
                *reg |= SIGP_STATUS_INVALID_PARAMETER;
                return SIGP_CC_STATUS_STORED;
index d37a8f607b71882557ca26e719b6cffa60700b97..ffd07ed7b4af88968caac3861076217bd79d6933 100644 (file)
@@ -6,7 +6,6 @@
 #include <linux/mm.h>
 #include <linux/kfence.h>
 #include <linux/kasan.h>
-#include <asm/ptdump.h>
 #include <asm/kasan.h>
 #include <asm/abs_lowcore.h>
 #include <asm/nospec-branch.h>
@@ -122,7 +121,6 @@ static void print_prot(struct seq_file *m, unsigned int pr, int level)
 
 static void note_prot_wx(struct pg_state *st, unsigned long addr)
 {
-#ifdef CONFIG_DEBUG_WX
        if (!st->check_wx)
                return;
        if (st->current_prot & _PAGE_INVALID)
@@ -139,10 +137,10 @@ static void note_prot_wx(struct pg_state *st, unsigned long addr)
         */
        if (addr == PAGE_SIZE && (nospec_uses_trampoline() || !static_key_enabled(&cpu_has_bear)))
                return;
-       WARN_ONCE(1, "s390/mm: Found insecure W+X mapping at address %pS\n",
+       WARN_ONCE(IS_ENABLED(CONFIG_DEBUG_WX),
+                 "s390/mm: Found insecure W+X mapping at address %pS\n",
                  (void *)st->start_address);
        st->wx_pages += (addr - st->start_address) / PAGE_SIZE;
-#endif /* CONFIG_DEBUG_WX */
 }
 
 static void note_page(struct ptdump_state *pt_st, unsigned long addr, int level, u64 val)
@@ -194,8 +192,7 @@ static void note_page(struct ptdump_state *pt_st, unsigned long addr, int level,
        }
 }
 
-#ifdef CONFIG_DEBUG_WX
-void ptdump_check_wx(void)
+bool ptdump_check_wx(void)
 {
        struct pg_state st = {
                .ptdump = {
@@ -218,16 +215,20 @@ void ptdump_check_wx(void)
        };
 
        if (!MACHINE_HAS_NX)
-               return;
+               return true;
        ptdump_walk_pgd(&st.ptdump, &init_mm, NULL);
-       if (st.wx_pages)
+       if (st.wx_pages) {
                pr_warn("Checked W+X mappings: FAILED, %lu W+X pages found\n", st.wx_pages);
-       else
+
+               return false;
+       } else {
                pr_info("Checked W+X mappings: passed, no %sW+X pages found\n",
                        (nospec_uses_trampoline() || !static_key_enabled(&cpu_has_bear)) ?
                        "unexpected " : "");
+
+               return true;
+       }
 }
-#endif /* CONFIG_DEBUG_WX */
 
 #ifdef CONFIG_PTDUMP_DEBUGFS
 static int ptdump_show(struct seq_file *m, void *v)
index 8da39deb56ca4952a6f8e436d153ec6f54292932..094b43b121cd5d8d8895af0f1c830ab5cfb1d355 100644 (file)
@@ -206,9 +206,11 @@ static void gmap_free(struct gmap *gmap)
 
        /* Free additional data for a shadow gmap */
        if (gmap_is_shadow(gmap)) {
+               struct ptdesc *ptdesc, *n;
+
                /* Free all page tables. */
-               list_for_each_entry_safe(page, next, &gmap->pt_list, lru)
-                       page_table_free_pgste(page);
+               list_for_each_entry_safe(ptdesc, n, &gmap->pt_list, pt_list)
+                       page_table_free_pgste(ptdesc);
                gmap_rmap_radix_tree_free(&gmap->host_to_rmap);
                /* Release reference to the parent */
                gmap_put(gmap->parent);
@@ -596,12 +598,12 @@ int __gmap_link(struct gmap *gmap, unsigned long gaddr, unsigned long vmaddr)
        pud = pud_offset(p4d, vmaddr);
        VM_BUG_ON(pud_none(*pud));
        /* large puds cannot yet be handled */
-       if (pud_large(*pud))
+       if (pud_leaf(*pud))
                return -EFAULT;
        pmd = pmd_offset(pud, vmaddr);
        VM_BUG_ON(pmd_none(*pmd));
        /* Are we allowed to use huge pages? */
-       if (pmd_large(*pmd) && !gmap->mm->context.allow_gmap_hpage_1m)
+       if (pmd_leaf(*pmd) && !gmap->mm->context.allow_gmap_hpage_1m)
                return -EFAULT;
        /* Link gmap segment table entry location to page table. */
        rc = radix_tree_preload(GFP_KERNEL_ACCOUNT);
@@ -613,7 +615,7 @@ int __gmap_link(struct gmap *gmap, unsigned long gaddr, unsigned long vmaddr)
                rc = radix_tree_insert(&gmap->host_to_guest,
                                       vmaddr >> PMD_SHIFT, table);
                if (!rc) {
-                       if (pmd_large(*pmd)) {
+                       if (pmd_leaf(*pmd)) {
                                *table = (pmd_val(*pmd) &
                                          _SEGMENT_ENTRY_HARDWARE_BITS_LARGE)
                                        | _SEGMENT_ENTRY_GMAP_UC;
@@ -943,7 +945,7 @@ static inline pmd_t *gmap_pmd_op_walk(struct gmap *gmap, unsigned long gaddr)
        }
 
        /* 4k page table entries are locked via the pte (pte_alloc_map_lock). */
-       if (!pmd_large(*pmdp))
+       if (!pmd_leaf(*pmdp))
                spin_unlock(&gmap->guest_table_lock);
        return pmdp;
 }
@@ -955,7 +957,7 @@ static inline pmd_t *gmap_pmd_op_walk(struct gmap *gmap, unsigned long gaddr)
  */
 static inline void gmap_pmd_op_end(struct gmap *gmap, pmd_t *pmdp)
 {
-       if (pmd_large(*pmdp))
+       if (pmd_leaf(*pmdp))
                spin_unlock(&gmap->guest_table_lock);
 }
 
@@ -1066,7 +1068,7 @@ static int gmap_protect_range(struct gmap *gmap, unsigned long gaddr,
                rc = -EAGAIN;
                pmdp = gmap_pmd_op_walk(gmap, gaddr);
                if (pmdp) {
-                       if (!pmd_large(*pmdp)) {
+                       if (!pmd_leaf(*pmdp)) {
                                rc = gmap_protect_pte(gmap, gaddr, pmdp, prot,
                                                      bits);
                                if (!rc) {
@@ -1348,7 +1350,7 @@ static void gmap_unshadow_pgt(struct gmap *sg, unsigned long raddr)
 {
        unsigned long *ste;
        phys_addr_t sto, pgt;
-       struct page *page;
+       struct ptdesc *ptdesc;
 
        BUG_ON(!gmap_is_shadow(sg));
        ste = gmap_table_walk(sg, raddr, 1); /* get segment pointer */
@@ -1361,9 +1363,9 @@ static void gmap_unshadow_pgt(struct gmap *sg, unsigned long raddr)
        *ste = _SEGMENT_ENTRY_EMPTY;
        __gmap_unshadow_pgt(sg, raddr, __va(pgt));
        /* Free page table */
-       page = phys_to_page(pgt);
-       list_del(&page->lru);
-       page_table_free_pgste(page);
+       ptdesc = page_ptdesc(phys_to_page(pgt));
+       list_del(&ptdesc->pt_list);
+       page_table_free_pgste(ptdesc);
 }
 
 /**
@@ -1377,7 +1379,7 @@ static void gmap_unshadow_pgt(struct gmap *sg, unsigned long raddr)
 static void __gmap_unshadow_sgt(struct gmap *sg, unsigned long raddr,
                                unsigned long *sgt)
 {
-       struct page *page;
+       struct ptdesc *ptdesc;
        phys_addr_t pgt;
        int i;
 
@@ -1389,9 +1391,9 @@ static void __gmap_unshadow_sgt(struct gmap *sg, unsigned long raddr,
                sgt[i] = _SEGMENT_ENTRY_EMPTY;
                __gmap_unshadow_pgt(sg, raddr, __va(pgt));
                /* Free page table */
-               page = phys_to_page(pgt);
-               list_del(&page->lru);
-               page_table_free_pgste(page);
+               ptdesc = page_ptdesc(phys_to_page(pgt));
+               list_del(&ptdesc->pt_list);
+               page_table_free_pgste(ptdesc);
        }
 }
 
@@ -2058,19 +2060,19 @@ int gmap_shadow_pgt(struct gmap *sg, unsigned long saddr, unsigned long pgt,
 {
        unsigned long raddr, origin;
        unsigned long *table;
-       struct page *page;
+       struct ptdesc *ptdesc;
        phys_addr_t s_pgt;
        int rc;
 
        BUG_ON(!gmap_is_shadow(sg) || (pgt & _SEGMENT_ENTRY_LARGE));
        /* Allocate a shadow page table */
-       page = page_table_alloc_pgste(sg->mm);
-       if (!page)
+       ptdesc = page_table_alloc_pgste(sg->mm);
+       if (!ptdesc)
                return -ENOMEM;
-       page->index = pgt & _SEGMENT_ENTRY_ORIGIN;
+       ptdesc->pt_index = pgt & _SEGMENT_ENTRY_ORIGIN;
        if (fake)
-               page->index |= GMAP_SHADOW_FAKE_TABLE;
-       s_pgt = page_to_phys(page);
+               ptdesc->pt_index |= GMAP_SHADOW_FAKE_TABLE;
+       s_pgt = page_to_phys(ptdesc_page(ptdesc));
        /* Install shadow page table */
        spin_lock(&sg->guest_table_lock);
        table = gmap_table_walk(sg, saddr, 1); /* get segment pointer */
@@ -2088,7 +2090,7 @@ int gmap_shadow_pgt(struct gmap *sg, unsigned long saddr, unsigned long pgt,
        /* mark as invalid as long as the parent table is not protected */
        *table = (unsigned long) s_pgt | _SEGMENT_ENTRY |
                 (pgt & _SEGMENT_ENTRY_PROTECT) | _SEGMENT_ENTRY_INVALID;
-       list_add(&page->lru, &sg->pt_list);
+       list_add(&ptdesc->pt_list, &sg->pt_list);
        if (fake) {
                /* nothing to protect for fake tables */
                *table &= ~_SEGMENT_ENTRY_INVALID;
@@ -2114,7 +2116,7 @@ int gmap_shadow_pgt(struct gmap *sg, unsigned long saddr, unsigned long pgt,
        return rc;
 out_free:
        spin_unlock(&sg->guest_table_lock);
-       page_table_free_pgste(page);
+       page_table_free_pgste(ptdesc);
        return rc;
 
 }
@@ -2498,7 +2500,7 @@ void gmap_sync_dirty_log_pmd(struct gmap *gmap, unsigned long bitmap[4],
        if (!pmdp)
                return;
 
-       if (pmd_large(*pmdp)) {
+       if (pmd_leaf(*pmdp)) {
                if (gmap_test_and_clear_dirty_pmd(gmap, pmdp, gaddr))
                        bitmap_fill(bitmap, _PAGE_ENTRIES);
        } else {
index 297a6d897d5a0c0e2e00f271ae23d918c4c6862a..c2e8242bd15dd0afb6454e9e71e9ca5ef969ba13 100644 (file)
@@ -224,7 +224,7 @@ pte_t *huge_pte_offset(struct mm_struct *mm,
                if (p4d_present(*p4dp)) {
                        pudp = pud_offset(p4dp, addr);
                        if (pud_present(*pudp)) {
-                               if (pud_large(*pudp))
+                               if (pud_leaf(*pudp))
                                        return (pte_t *) pudp;
                                pmdp = pmd_offset(pudp, addr);
                        }
@@ -235,12 +235,12 @@ pte_t *huge_pte_offset(struct mm_struct *mm,
 
 int pmd_huge(pmd_t pmd)
 {
-       return pmd_large(pmd);
+       return pmd_leaf(pmd);
 }
 
 int pud_huge(pud_t pud)
 {
-       return pud_large(pud);
+       return pud_leaf(pud);
 }
 
 bool __init arch_hugetlb_valid_size(unsigned long size)
index 43e612bc2bcd34524a08903c8e07710492da3fb8..f6391442c0c2adba90d8e2ff0cdde30728dc5b19 100644 (file)
@@ -37,7 +37,6 @@
 #include <asm/pgalloc.h>
 #include <asm/ctlreg.h>
 #include <asm/kfence.h>
-#include <asm/ptdump.h>
 #include <asm/dma.h>
 #include <asm/abs_lowcore.h>
 #include <asm/tlb.h>
@@ -109,7 +108,6 @@ void mark_rodata_ro(void)
 
        __set_memory_ro(__start_ro_after_init, __end_ro_after_init);
        pr_info("Write protected read-only-after-init data: %luk\n", size >> 10);
-       debug_checkwx();
 }
 
 int set_memory_encrypted(unsigned long vaddr, int numpages)
@@ -281,9 +279,6 @@ int arch_add_memory(int nid, u64 start, u64 size,
        unsigned long size_pages = PFN_DOWN(size);
        int rc;
 
-       if (WARN_ON_ONCE(params->altmap))
-               return -EINVAL;
-
        if (WARN_ON_ONCE(params->pgprot.pgprot != PAGE_KERNEL.pgprot))
                return -EINVAL;
 
index 631e3a4ee2de82f66d4d97232e84734522cfe666..01bc8fad64d6c1d18983618df973cde999f7fb1b 100644 (file)
@@ -185,7 +185,7 @@ static int walk_pmd_level(pud_t *pudp, unsigned long addr, unsigned long end,
                if (pmd_none(*pmdp))
                        return -EINVAL;
                next = pmd_addr_end(addr, end);
-               if (pmd_large(*pmdp)) {
+               if (pmd_leaf(*pmdp)) {
                        need_split  = !!(flags & SET_MEMORY_4K);
                        need_split |= !!(addr & ~PMD_MASK);
                        need_split |= !!(addr + PMD_SIZE > next);
@@ -274,7 +274,7 @@ static int walk_pud_level(p4d_t *p4d, unsigned long addr, unsigned long end,
                if (pud_none(*pudp))
                        return -EINVAL;
                next = pud_addr_end(addr, end);
-               if (pud_large(*pudp)) {
+               if (pud_leaf(*pudp)) {
                        need_split  = !!(flags & SET_MEMORY_4K);
                        need_split |= !!(addr & ~PUD_MASK);
                        need_split |= !!(addr + PUD_SIZE > next);
index 008e487c94a631aa72615bfde3081717e4ffa876..abb629d7e1319169e47327565b77eb9738cbd27a 100644 (file)
@@ -135,7 +135,7 @@ err_p4d:
 
 #ifdef CONFIG_PGSTE
 
-struct page *page_table_alloc_pgste(struct mm_struct *mm)
+struct ptdesc *page_table_alloc_pgste(struct mm_struct *mm)
 {
        struct ptdesc *ptdesc;
        u64 *table;
@@ -147,12 +147,12 @@ struct page *page_table_alloc_pgste(struct mm_struct *mm)
                memset64(table, _PAGE_INVALID, PTRS_PER_PTE);
                memset64(table + PTRS_PER_PTE, 0, PTRS_PER_PTE);
        }
-       return ptdesc_page(ptdesc);
+       return ptdesc;
 }
 
-void page_table_free_pgste(struct page *page)
+void page_table_free_pgste(struct ptdesc *ptdesc)
 {
-       pagetable_free(page_ptdesc(page));
+       pagetable_free(ptdesc);
 }
 
 #endif /* CONFIG_PGSTE */
index 99422926efe1b50d5c63ac28aac2cc9de8e9e6f4..2c944bafb0309c653b9c41f6bd4f2ba554beec6b 100644 (file)
@@ -470,7 +470,7 @@ static int pmd_lookup(struct mm_struct *mm, unsigned long addr, pmd_t **pmdp)
                return -ENOENT;
 
        /* Large PUDs are not supported yet. */
-       if (pud_large(*pud))
+       if (pud_leaf(*pud))
                return -EFAULT;
 
        *pmdp = pmd_offset(pud, addr);
@@ -721,9 +721,9 @@ static void ptep_zap_swap_entry(struct mm_struct *mm, swp_entry_t entry)
        if (!non_swap_entry(entry))
                dec_mm_counter(mm, MM_SWAPENTS);
        else if (is_migration_entry(entry)) {
-               struct page *page = pfn_swap_entry_to_page(entry);
+               struct folio *folio = pfn_swap_entry_folio(entry);
 
-               dec_mm_counter(mm, mm_counter(page));
+               dec_mm_counter(mm, mm_counter(folio));
        }
        free_swap_and_cache(entry);
 }
@@ -827,7 +827,7 @@ again:
                return key ? -EFAULT : 0;
        }
 
-       if (pmd_large(*pmdp)) {
+       if (pmd_leaf(*pmdp)) {
                paddr = pmd_val(*pmdp) & HPAGE_MASK;
                paddr |= addr & ~HPAGE_MASK;
                /*
@@ -938,7 +938,7 @@ again:
                return 0;
        }
 
-       if (pmd_large(*pmdp)) {
+       if (pmd_leaf(*pmdp)) {
                paddr = pmd_val(*pmdp) & HPAGE_MASK;
                paddr |= addr & ~HPAGE_MASK;
                cc = page_reset_referenced(paddr);
@@ -1002,7 +1002,7 @@ again:
                return 0;
        }
 
-       if (pmd_large(*pmdp)) {
+       if (pmd_leaf(*pmdp)) {
                paddr = pmd_val(*pmdp) & HPAGE_MASK;
                paddr |= addr & ~HPAGE_MASK;
                *key = page_get_storage_key(paddr);
index 186a020857cf6a6ebe3b9a513d01bfd8eba233cb..85cddf904cb209ff265b6a6dcf115bd13d6f16c4 100644 (file)
@@ -33,8 +33,12 @@ static void __ref *vmem_alloc_pages(unsigned int order)
        return memblock_alloc(size, size);
 }
 
-static void vmem_free_pages(unsigned long addr, int order)
+static void vmem_free_pages(unsigned long addr, int order, struct vmem_altmap *altmap)
 {
+       if (altmap) {
+               vmem_altmap_free(altmap, 1 << order);
+               return;
+       }
        /* We don't expect boot memory to be removed ever. */
        if (!slab_is_available() ||
            WARN_ON_ONCE(PageReserved(virt_to_page((void *)addr))))
@@ -156,7 +160,8 @@ static bool vmemmap_unuse_sub_pmd(unsigned long start, unsigned long end)
 
 /* __ref: we'll only call vmemmap_alloc_block() via vmemmap_populate() */
 static int __ref modify_pte_table(pmd_t *pmd, unsigned long addr,
-                                 unsigned long end, bool add, bool direct)
+                                 unsigned long end, bool add, bool direct,
+                                 struct vmem_altmap *altmap)
 {
        unsigned long prot, pages = 0;
        int ret = -ENOMEM;
@@ -172,11 +177,11 @@ static int __ref modify_pte_table(pmd_t *pmd, unsigned long addr,
                        if (pte_none(*pte))
                                continue;
                        if (!direct)
-                               vmem_free_pages((unsigned long) pfn_to_virt(pte_pfn(*pte)), 0);
+                               vmem_free_pages((unsigned long)pfn_to_virt(pte_pfn(*pte)), get_order(PAGE_SIZE), altmap);
                        pte_clear(&init_mm, addr, pte);
                } else if (pte_none(*pte)) {
                        if (!direct) {
-                               void *new_page = vmemmap_alloc_block(PAGE_SIZE, NUMA_NO_NODE);
+                               void *new_page = vmemmap_alloc_block_buf(PAGE_SIZE, NUMA_NO_NODE, altmap);
 
                                if (!new_page)
                                        goto out;
@@ -213,7 +218,8 @@ static void try_free_pte_table(pmd_t *pmd, unsigned long start)
 
 /* __ref: we'll only call vmemmap_alloc_block() via vmemmap_populate() */
 static int __ref modify_pmd_table(pud_t *pud, unsigned long addr,
-                                 unsigned long end, bool add, bool direct)
+                                 unsigned long end, bool add, bool direct,
+                                 struct vmem_altmap *altmap)
 {
        unsigned long next, prot, pages = 0;
        int ret = -ENOMEM;
@@ -230,15 +236,15 @@ static int __ref modify_pmd_table(pud_t *pud, unsigned long addr,
                if (!add) {
                        if (pmd_none(*pmd))
                                continue;
-                       if (pmd_large(*pmd)) {
+                       if (pmd_leaf(*pmd)) {
                                if (IS_ALIGNED(addr, PMD_SIZE) &&
                                    IS_ALIGNED(next, PMD_SIZE)) {
                                        if (!direct)
-                                               vmem_free_pages(pmd_deref(*pmd), get_order(PMD_SIZE));
+                                               vmem_free_pages(pmd_deref(*pmd), get_order(PMD_SIZE), altmap);
                                        pmd_clear(pmd);
                                        pages++;
                                } else if (!direct && vmemmap_unuse_sub_pmd(addr, next)) {
-                                       vmem_free_pages(pmd_deref(*pmd), get_order(PMD_SIZE));
+                                       vmem_free_pages(pmd_deref(*pmd), get_order(PMD_SIZE), altmap);
                                        pmd_clear(pmd);
                                }
                                continue;
@@ -261,7 +267,7 @@ static int __ref modify_pmd_table(pud_t *pud, unsigned long addr,
                                 * page tables since vmemmap_populate gets
                                 * called for each section separately.
                                 */
-                               new_page = vmemmap_alloc_block(PMD_SIZE, NUMA_NO_NODE);
+                               new_page = vmemmap_alloc_block_buf(PMD_SIZE, NUMA_NO_NODE, altmap);
                                if (new_page) {
                                        set_pmd(pmd, __pmd(__pa(new_page) | prot));
                                        if (!IS_ALIGNED(addr, PMD_SIZE) ||
@@ -275,12 +281,12 @@ static int __ref modify_pmd_table(pud_t *pud, unsigned long addr,
                        if (!pte)
                                goto out;
                        pmd_populate(&init_mm, pmd, pte);
-               } else if (pmd_large(*pmd)) {
+               } else if (pmd_leaf(*pmd)) {
                        if (!direct)
                                vmemmap_use_sub_pmd(addr, next);
                        continue;
                }
-               ret = modify_pte_table(pmd, addr, next, add, direct);
+               ret = modify_pte_table(pmd, addr, next, add, direct, altmap);
                if (ret)
                        goto out;
                if (!add)
@@ -302,12 +308,12 @@ static void try_free_pmd_table(pud_t *pud, unsigned long start)
        for (i = 0; i < PTRS_PER_PMD; i++, pmd++)
                if (!pmd_none(*pmd))
                        return;
-       vmem_free_pages(pud_deref(*pud), CRST_ALLOC_ORDER);
+       vmem_free_pages(pud_deref(*pud), CRST_ALLOC_ORDER, NULL);
        pud_clear(pud);
 }
 
 static int modify_pud_table(p4d_t *p4d, unsigned long addr, unsigned long end,
-                           bool add, bool direct)
+                           bool add, bool direct, struct vmem_altmap *altmap)
 {
        unsigned long next, prot, pages = 0;
        int ret = -ENOMEM;
@@ -323,7 +329,7 @@ static int modify_pud_table(p4d_t *p4d, unsigned long addr, unsigned long end,
                if (!add) {
                        if (pud_none(*pud))
                                continue;
-                       if (pud_large(*pud)) {
+                       if (pud_leaf(*pud)) {
                                if (IS_ALIGNED(addr, PUD_SIZE) &&
                                    IS_ALIGNED(next, PUD_SIZE)) {
                                        pud_clear(pud);
@@ -344,10 +350,10 @@ static int modify_pud_table(p4d_t *p4d, unsigned long addr, unsigned long end,
                        if (!pmd)
                                goto out;
                        pud_populate(&init_mm, pud, pmd);
-               } else if (pud_large(*pud)) {
+               } else if (pud_leaf(*pud)) {
                        continue;
                }
-               ret = modify_pmd_table(pud, addr, next, add, direct);
+               ret = modify_pmd_table(pud, addr, next, add, direct, altmap);
                if (ret)
                        goto out;
                if (!add)
@@ -370,12 +376,12 @@ static void try_free_pud_table(p4d_t *p4d, unsigned long start)
                if (!pud_none(*pud))
                        return;
        }
-       vmem_free_pages(p4d_deref(*p4d), CRST_ALLOC_ORDER);
+       vmem_free_pages(p4d_deref(*p4d), CRST_ALLOC_ORDER, NULL);
        p4d_clear(p4d);
 }
 
 static int modify_p4d_table(pgd_t *pgd, unsigned long addr, unsigned long end,
-                           bool add, bool direct)
+                           bool add, bool direct, struct vmem_altmap *altmap)
 {
        unsigned long next;
        int ret = -ENOMEM;
@@ -394,7 +400,7 @@ static int modify_p4d_table(pgd_t *pgd, unsigned long addr, unsigned long end,
                                goto out;
                        p4d_populate(&init_mm, p4d, pud);
                }
-               ret = modify_pud_table(p4d, addr, next, add, direct);
+               ret = modify_pud_table(p4d, addr, next, add, direct, altmap);
                if (ret)
                        goto out;
                if (!add)
@@ -415,12 +421,12 @@ static void try_free_p4d_table(pgd_t *pgd, unsigned long start)
                if (!p4d_none(*p4d))
                        return;
        }
-       vmem_free_pages(pgd_deref(*pgd), CRST_ALLOC_ORDER);
+       vmem_free_pages(pgd_deref(*pgd), CRST_ALLOC_ORDER, NULL);
        pgd_clear(pgd);
 }
 
 static int modify_pagetable(unsigned long start, unsigned long end, bool add,
-                           bool direct)
+                           bool direct, struct vmem_altmap *altmap)
 {
        unsigned long addr, next;
        int ret = -ENOMEM;
@@ -445,7 +451,7 @@ static int modify_pagetable(unsigned long start, unsigned long end, bool add,
                                goto out;
                        pgd_populate(&init_mm, pgd, p4d);
                }
-               ret = modify_p4d_table(pgd, addr, next, add, direct);
+               ret = modify_p4d_table(pgd, addr, next, add, direct, altmap);
                if (ret)
                        goto out;
                if (!add)
@@ -458,14 +464,16 @@ out:
        return ret;
 }
 
-static int add_pagetable(unsigned long start, unsigned long end, bool direct)
+static int add_pagetable(unsigned long start, unsigned long end, bool direct,
+                        struct vmem_altmap *altmap)
 {
-       return modify_pagetable(start, end, true, direct);
+       return modify_pagetable(start, end, true, direct, altmap);
 }
 
-static int remove_pagetable(unsigned long start, unsigned long end, bool direct)
+static int remove_pagetable(unsigned long start, unsigned long end, bool direct,
+                           struct vmem_altmap *altmap)
 {
-       return modify_pagetable(start, end, false, direct);
+       return modify_pagetable(start, end, false, direct, altmap);
 }
 
 /*
@@ -474,7 +482,7 @@ static int remove_pagetable(unsigned long start, unsigned long end, bool direct)
 static int vmem_add_range(unsigned long start, unsigned long size)
 {
        start = (unsigned long)__va(start);
-       return add_pagetable(start, start + size, true);
+       return add_pagetable(start, start + size, true, NULL);
 }
 
 /*
@@ -483,7 +491,7 @@ static int vmem_add_range(unsigned long start, unsigned long size)
 static void vmem_remove_range(unsigned long start, unsigned long size)
 {
        start = (unsigned long)__va(start);
-       remove_pagetable(start, start + size, true);
+       remove_pagetable(start, start + size, true, NULL);
 }
 
 /*
@@ -496,9 +504,9 @@ int __meminit vmemmap_populate(unsigned long start, unsigned long end, int node,
 
        mutex_lock(&vmem_mutex);
        /* We don't care about the node, just use NUMA_NO_NODE on allocations */
-       ret = add_pagetable(start, end, false);
+       ret = add_pagetable(start, end, false, altmap);
        if (ret)
-               remove_pagetable(start, end, false);
+               remove_pagetable(start, end, false, altmap);
        mutex_unlock(&vmem_mutex);
        return ret;
 }
@@ -509,7 +517,7 @@ void vmemmap_free(unsigned long start, unsigned long end,
                  struct vmem_altmap *altmap)
 {
        mutex_lock(&vmem_mutex);
-       remove_pagetable(start, end, false);
+       remove_pagetable(start, end, false, altmap);
        mutex_unlock(&vmem_mutex);
 }
 
@@ -591,7 +599,7 @@ pte_t *vmem_get_alloc_pte(unsigned long addr, bool alloc)
                if (!pmd)
                        goto out;
                pud_populate(&init_mm, pud, pmd);
-       } else if (WARN_ON_ONCE(pud_large(*pud))) {
+       } else if (WARN_ON_ONCE(pud_leaf(*pud))) {
                goto out;
        }
        pmd = pmd_offset(pud, addr);
@@ -602,7 +610,7 @@ pte_t *vmem_get_alloc_pte(unsigned long addr, bool alloc)
                if (!pte)
                        goto out;
                pmd_populate(&init_mm, pmd, pte);
-       } else if (WARN_ON_ONCE(pmd_large(*pmd))) {
+       } else if (WARN_ON_ONCE(pmd_leaf(*pmd))) {
                goto out;
        }
        ptep = pte_offset_kernel(pmd, addr);
index 7500521b2b984a6082a1be3b98475a5809171106..2ad3e29f0ebec416f42f28ac09c651ef9da74614 100644 (file)
@@ -2,6 +2,7 @@
 config SUPERH
        def_bool y
        select ARCH_32BIT_OFF_T
+       select ARCH_HAS_CPU_CACHE_ALIASING
        select ARCH_ENABLE_MEMORY_HOTPLUG if SPARSEMEM && MMU
        select ARCH_ENABLE_MEMORY_HOTREMOVE if SPARSEMEM && MMU
        select ARCH_HAVE_NMI_SAFE_CMPXCHG if (GUSA_RB || CPU_SH4A)
diff --git a/arch/sh/include/asm/cachetype.h b/arch/sh/include/asm/cachetype.h
new file mode 100644 (file)
index 0000000..a5fffe5
--- /dev/null
@@ -0,0 +1,9 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef __ASM_SH_CACHETYPE_H
+#define __ASM_SH_CACHETYPE_H
+
+#include <linux/types.h>
+
+#define cpu_dcache_is_aliasing()       true
+
+#endif
index 2d7e70537de04c9b731de0ffcadc807c62238754..ba917008d63ed9808badc2d24db886e10dd32d4d 100644 (file)
@@ -34,6 +34,7 @@ obj-$(CONFIG_SH_STANDARD_BIOS)        += sh_bios.o
 obj-$(CONFIG_KGDB)             += kgdb.o
 obj-$(CONFIG_MODULES)          += sh_ksyms_32.o module.o
 obj-$(CONFIG_KEXEC_CORE)       += machine_kexec.o relocate_kernel.o
+obj-$(CONFIG_VMCORE_INFO)      += vmcore_info.o
 obj-$(CONFIG_CRASH_DUMP)       += crash_dump.o
 obj-$(CONFIG_STACKTRACE)       += stacktrace.o
 obj-$(CONFIG_IO_TRAPPED)       += io_trapped.o
index fa3a7b36190a2acfb4e64d724afad8e33a88f3e2..8321b31d2e19dc0c5af57020d4225398bd9790fc 100644 (file)
@@ -137,22 +137,14 @@ void machine_kexec(struct kimage *image)
        __ftrace_enabled_restore(save_ftrace_enabled);
 }
 
-void arch_crash_save_vmcoreinfo(void)
-{
-#ifdef CONFIG_NUMA
-       VMCOREINFO_SYMBOL(node_data);
-       VMCOREINFO_LENGTH(node_data, MAX_NUMNODES);
-#endif
-#ifdef CONFIG_X2TLB
-       VMCOREINFO_CONFIG(X2TLB);
-#endif
-}
-
 void __init reserve_crashkernel(void)
 {
        unsigned long long crash_size, crash_base;
        int ret;
 
+       if (!IS_ENABLED(CONFIG_CRASH_RESERVE))
+               return;
+
        ret = parse_crashkernel(boot_command_line, memblock_phys_mem_size(),
                        &crash_size, &crash_base, NULL, NULL);
        if (ret == 0 && crash_size > 0) {
index d3175f09b3aad9579fc16ceceb800a3d4af7320d..620e5cf8ae1e748db41e28a103db678b1bb60c69 100644 (file)
@@ -220,7 +220,7 @@ void __init __add_active_range(unsigned int nid, unsigned long start_pfn,
        request_resource(res, &code_resource);
        request_resource(res, &data_resource);
        request_resource(res, &bss_resource);
-#ifdef CONFIG_KEXEC_CORE
+#ifdef CONFIG_CRASH_RESERVE
        request_resource(res, &crashk_res);
 #endif
 
diff --git a/arch/sh/kernel/vmcore_info.c b/arch/sh/kernel/vmcore_info.c
new file mode 100644 (file)
index 0000000..a244a20
--- /dev/null
@@ -0,0 +1,15 @@
+// SPDX-License-Identifier: GPL-2.0-only
+
+#include <linux/vmcore_info.h>
+#include <linux/mm.h>
+
+void arch_crash_save_vmcoreinfo(void)
+{
+#ifdef CONFIG_NUMA
+       VMCOREINFO_SYMBOL(node_data);
+       VMCOREINFO_LENGTH(node_data, MAX_NUMNODES);
+#endif
+#ifdef CONFIG_X2TLB
+       VMCOREINFO_CONFIG(X2TLB);
+#endif
+}
index 7e6bc6fff76b101d1b5d59eec7c91998e872280f..11bf9d312318c678c8e99e29a9c20212eea5858a 100644 (file)
@@ -13,6 +13,7 @@ config 64BIT
 config SPARC
        bool
        default y
+       select ARCH_HAS_CPU_CACHE_ALIASING
        select ARCH_MIGHT_HAVE_PC_PARPORT if SPARC64 && PCI
        select ARCH_MIGHT_HAVE_PC_SERIO
        select DMA_OPS
@@ -57,6 +58,8 @@ config SPARC32
        select CLZ_TAB
        select DMA_DIRECT_REMAP
        select GENERIC_ATOMIC64
+       select GENERIC_LIB_CMPDI2
+       select GENERIC_LIB_UCMPDI2
        select HAVE_UID16
        select HAVE_PAGE_SIZE_4KB
        select LOCK_MM_AND_FIND_VMA
@@ -114,10 +117,6 @@ config ARCH_PROC_KCORE_TEXT
 config CPU_BIG_ENDIAN
        def_bool y
 
-config ARCH_ATU
-       bool
-       default y if SPARC64
-
 config STACKTRACE_SUPPORT
        bool
        default y if SPARC64
@@ -142,10 +141,6 @@ config HIGHMEM
        default y if SPARC32
        select KMAP_LOCAL
 
-config GENERIC_ISA_DMA
-       bool
-       default y if SPARC32
-
 config PGTABLE_LEVELS
        default 4 if 64BIT
        default 3
@@ -221,6 +216,8 @@ config EARLYFB
        bool "Support for early boot text console"
        default y
        depends on SPARC64
+       select FONT_SUN8x16
+       select FONT_SUPPORT
        help
          Say Y here to enable a faster early framebuffer boot console.
 
index 37e003665de652ba7583157dd5092cb567663d41..d475a056a2e066b97451ecd7dbbbe8eece6ed10e 100644 (file)
@@ -8,11 +8,8 @@ config MCOUNT
        bool
        depends on SPARC64
        depends on FUNCTION_TRACER
-       default y
-
-config FRAME_POINTER
-       bool
-       depends on MCOUNT
+       select ARCH_WANT_FRAME_POINTERS
+       select FRAME_POINTER
        default y
 
 config HAVE_HARDLOCKUP_DETECTOR_SPARC64
diff --git a/arch/sparc/include/asm/cachetype.h b/arch/sparc/include/asm/cachetype.h
new file mode 100644 (file)
index 0000000..caf1c00
--- /dev/null
@@ -0,0 +1,14 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef __ASM_SPARC_CACHETYPE_H
+#define __ASM_SPARC_CACHETYPE_H
+
+#include <asm/page.h>
+
+#ifdef CONFIG_SPARC32
+extern int vac_cache_size;
+#define cpu_dcache_is_aliasing()       (vac_cache_size > PAGE_SIZE)
+#else
+#define cpu_dcache_is_aliasing()       (L1DCACHE_SIZE > PAGE_SIZE)
+#endif
+
+#endif
index 08650d503cc2b6c8964ef5bc5cdc0eb4a67fe9ae..f220edcf17c7c2b12f0b1a0a639195e659cc33c4 100644 (file)
@@ -430,7 +430,7 @@ unsigned long sun4v_cpu_mondo_send(unsigned long cpu_count,
  * ERRORS:     No errors defined.
  *
  * Return the hypervisor ID handle for the current CPU.  Use by a
- * virtual CPU to discover it's own identity.
+ * virtual CPU to discover its own identity.
  */
 #define HV_FAST_CPU_MYID               0x16
 
@@ -1221,7 +1221,7 @@ unsigned long sun4v_con_write(unsigned long buffer,
  *             EBADALIGNED     software state description is not correctly
  *                             aligned
  *
- * This allows the guest to report it's soft state to the hypervisor.  There
+ * This allows the guest to report its soft state to the hypervisor.  There
  * are two primary components to this state.  The first part states whether
  * the guest software is running or not.  The second containts optional
  * details specific to the software.
@@ -1502,7 +1502,7 @@ struct hv_trap_trace_entry {
  * configuration error of some sort.
  *
  * The dump services provide an opaque buffer into which the
- * hypervisor can place it's internal state in order to assist in
+ * hypervisor can place its internal state in order to assist in
  * debugging such situations.  The contents are opaque and extremely
  * platform and hypervisor implementation specific.  The guest, during
  * a core dump, requests that the hypervisor update any information in
index ca973955ca8611d7c581e355d934d756bc66c8d4..4294738d40be75d51097f9c2d3ac1b54a826a7ac 100644 (file)
@@ -13,7 +13,7 @@ void ldom_power_off(void);
  * or data becomes available on the receive side.
  *
  * For non-RAW links, if the LDC_EVENT_RESET event arrives the
- * driver should reset all of it's internal state and reinvoke
+ * driver should reset all of its internal state and reinvoke
  * ldc_connect() to try and bring the link up again.
  *
  * For RAW links, ldc_connect() is not used.  Instead the driver
index 799e797c5cddc10e5793480a1349ba8786e65da2..08160bf9a0f4181c7e9af61e34de13b368564ca9 100644 (file)
@@ -93,7 +93,7 @@ static inline void switch_mm(struct mm_struct *old_mm, struct mm_struct *mm, str
 
        /* We have to be extremely careful here or else we will miss
         * a TSB grow if we switch back and forth between a kernel
-        * thread and an address space which has it's TSB size increased
+        * thread and an address space which has its TSB size increased
         * on another processor.
         *
         * It is possible to play some games in order to optimize the
@@ -118,7 +118,7 @@ static inline void switch_mm(struct mm_struct *old_mm, struct mm_struct *mm, str
         *
         * At that point cpu0 continues to use a stale TSB, the one from
         * before the TSB grow performed on cpu1.  cpu1 did not cross-call
-        * cpu0 to update it's TSB because at that point the cpu_vm_mask
+        * cpu0 to update its TSB because at that point the cpu_vm_mask
         * only had cpu1 set in it.
         */
        tsb_context_switch_ctx(mm, CTX_HWBITS(mm->context));
index 0a7ffcfd59cda0ade67bd7c640b1f69edc758e30..e2eed8f97665fb5029df9cc6969a3f44502de77f 100644 (file)
 /* SPDX-License-Identifier: GPL-2.0 */
-/* parport.h: sparc64 specific parport initialization and dma.
- *
- * Copyright (C) 1999  Eddie C. Dost  (ecd@skynet.be)
- */
+#ifndef ___ASM_SPARC_PARPORT_H
+#define ___ASM_SPARC_PARPORT_H
 
-#ifndef _ASM_SPARC64_PARPORT_H
-#define _ASM_SPARC64_PARPORT_H 1
-
-#include <linux/of.h>
-#include <linux/platform_device.h>
-
-#include <asm/ebus_dma.h>
-#include <asm/ns87303.h>
-#include <asm/prom.h>
-
-#define PARPORT_PC_MAX_PORTS   PARPORT_MAX
-
-/*
- * While sparc64 doesn't have an ISA DMA API, we provide something that looks
- * close enough to make parport_pc happy
- */
-#define HAS_DMA
-
-#ifdef CONFIG_PARPORT_PC_FIFO
-static DEFINE_SPINLOCK(dma_spin_lock);
-
-#define claim_dma_lock() \
-({     unsigned long flags; \
-       spin_lock_irqsave(&dma_spin_lock, flags); \
-       flags; \
-})
-
-#define release_dma_lock(__flags) \
-       spin_unlock_irqrestore(&dma_spin_lock, __flags);
+#if defined(__sparc__) && defined(__arch64__)
+#include <asm/parport_64.h>
+#else
+#include <asm-generic/parport.h>
+#endif
 #endif
 
-static struct sparc_ebus_info {
-       struct ebus_dma_info info;
-       unsigned int addr;
-       unsigned int count;
-       int lock;
-
-       struct parport *port;
-} sparc_ebus_dmas[PARPORT_PC_MAX_PORTS];
-
-static DECLARE_BITMAP(dma_slot_map, PARPORT_PC_MAX_PORTS);
-
-static inline int request_dma(unsigned int dmanr, const char *device_id)
-{
-       if (dmanr >= PARPORT_PC_MAX_PORTS)
-               return -EINVAL;
-       if (xchg(&sparc_ebus_dmas[dmanr].lock, 1) != 0)
-               return -EBUSY;
-       return 0;
-}
-
-static inline void free_dma(unsigned int dmanr)
-{
-       if (dmanr >= PARPORT_PC_MAX_PORTS) {
-               printk(KERN_WARNING "Trying to free DMA%d\n", dmanr);
-               return;
-       }
-       if (xchg(&sparc_ebus_dmas[dmanr].lock, 0) == 0) {
-               printk(KERN_WARNING "Trying to free free DMA%d\n", dmanr);
-               return;
-       }
-}
-
-static inline void enable_dma(unsigned int dmanr)
-{
-       ebus_dma_enable(&sparc_ebus_dmas[dmanr].info, 1);
-
-       if (ebus_dma_request(&sparc_ebus_dmas[dmanr].info,
-                            sparc_ebus_dmas[dmanr].addr,
-                            sparc_ebus_dmas[dmanr].count))
-               BUG();
-}
-
-static inline void disable_dma(unsigned int dmanr)
-{
-       ebus_dma_enable(&sparc_ebus_dmas[dmanr].info, 0);
-}
-
-static inline void clear_dma_ff(unsigned int dmanr)
-{
-       /* nothing */
-}
-
-static inline void set_dma_mode(unsigned int dmanr, char mode)
-{
-       ebus_dma_prepare(&sparc_ebus_dmas[dmanr].info, (mode != DMA_MODE_WRITE));
-}
-
-static inline void set_dma_addr(unsigned int dmanr, unsigned int addr)
-{
-       sparc_ebus_dmas[dmanr].addr = addr;
-}
-
-static inline void set_dma_count(unsigned int dmanr, unsigned int count)
-{
-       sparc_ebus_dmas[dmanr].count = count;
-}
-
-static inline unsigned int get_dma_residue(unsigned int dmanr)
-{
-       return ebus_dma_residue(&sparc_ebus_dmas[dmanr].info);
-}
-
-static int ecpp_probe(struct platform_device *op)
-{
-       unsigned long base = op->resource[0].start;
-       unsigned long config = op->resource[1].start;
-       unsigned long d_base = op->resource[2].start;
-       unsigned long d_len;
-       struct device_node *parent;
-       struct parport *p;
-       int slot, err;
-
-       parent = op->dev.of_node->parent;
-       if (of_node_name_eq(parent, "dma")) {
-               p = parport_pc_probe_port(base, base + 0x400,
-                                         op->archdata.irqs[0], PARPORT_DMA_NOFIFO,
-                                         op->dev.parent->parent, 0);
-               if (!p)
-                       return -ENOMEM;
-               dev_set_drvdata(&op->dev, p);
-               return 0;
-       }
-
-       for (slot = 0; slot < PARPORT_PC_MAX_PORTS; slot++) {
-               if (!test_and_set_bit(slot, dma_slot_map))
-                       break;
-       }
-       err = -ENODEV;
-       if (slot >= PARPORT_PC_MAX_PORTS)
-               goto out_err;
-
-       spin_lock_init(&sparc_ebus_dmas[slot].info.lock);
-
-       d_len = (op->resource[2].end - d_base) + 1UL;
-       sparc_ebus_dmas[slot].info.regs =
-               of_ioremap(&op->resource[2], 0, d_len, "ECPP DMA");
-
-       if (!sparc_ebus_dmas[slot].info.regs)
-               goto out_clear_map;
-
-       sparc_ebus_dmas[slot].info.flags = 0;
-       sparc_ebus_dmas[slot].info.callback = NULL;
-       sparc_ebus_dmas[slot].info.client_cookie = NULL;
-       sparc_ebus_dmas[slot].info.irq = 0xdeadbeef;
-       strcpy(sparc_ebus_dmas[slot].info.name, "parport");
-       if (ebus_dma_register(&sparc_ebus_dmas[slot].info))
-               goto out_unmap_regs;
-
-       ebus_dma_irq_enable(&sparc_ebus_dmas[slot].info, 1);
-
-       /* Configure IRQ to Push Pull, Level Low */
-       /* Enable ECP, set bit 2 of the CTR first */
-       outb(0x04, base + 0x02);
-       ns87303_modify(config, PCR,
-                      PCR_EPP_ENABLE |
-                      PCR_IRQ_ODRAIN,
-                      PCR_ECP_ENABLE |
-                      PCR_ECP_CLK_ENA |
-                      PCR_IRQ_POLAR);
-
-       /* CTR bit 5 controls direction of port */
-       ns87303_modify(config, PTR,
-                      0, PTR_LPT_REG_DIR);
-
-       p = parport_pc_probe_port(base, base + 0x400,
-                                 op->archdata.irqs[0],
-                                 slot,
-                                 op->dev.parent,
-                                 0);
-       err = -ENOMEM;
-       if (!p)
-               goto out_disable_irq;
-
-       dev_set_drvdata(&op->dev, p);
-
-       return 0;
-
-out_disable_irq:
-       ebus_dma_irq_enable(&sparc_ebus_dmas[slot].info, 0);
-       ebus_dma_unregister(&sparc_ebus_dmas[slot].info);
-
-out_unmap_regs:
-       of_iounmap(&op->resource[2], sparc_ebus_dmas[slot].info.regs, d_len);
-
-out_clear_map:
-       clear_bit(slot, dma_slot_map);
-
-out_err:
-       return err;
-}
-
-static int ecpp_remove(struct platform_device *op)
-{
-       struct parport *p = dev_get_drvdata(&op->dev);
-       int slot = p->dma;
-
-       parport_pc_unregister_port(p);
-
-       if (slot != PARPORT_DMA_NOFIFO) {
-               unsigned long d_base = op->resource[2].start;
-               unsigned long d_len;
-
-               d_len = (op->resource[2].end - d_base) + 1UL;
-
-               ebus_dma_irq_enable(&sparc_ebus_dmas[slot].info, 0);
-               ebus_dma_unregister(&sparc_ebus_dmas[slot].info);
-               of_iounmap(&op->resource[2],
-                          sparc_ebus_dmas[slot].info.regs,
-                          d_len);
-               clear_bit(slot, dma_slot_map);
-       }
-
-       return 0;
-}
-
-static const struct of_device_id ecpp_match[] = {
-       {
-               .name = "ecpp",
-       },
-       {
-               .name = "parallel",
-               .compatible = "ecpp",
-       },
-       {
-               .name = "parallel",
-               .compatible = "ns87317-ecpp",
-       },
-       {
-               .name = "parallel",
-               .compatible = "pnpALI,1533,3",
-       },
-       {},
-};
-
-static struct platform_driver ecpp_driver = {
-       .driver = {
-               .name = "ecpp",
-               .of_match_table = ecpp_match,
-       },
-       .probe                  = ecpp_probe,
-       .remove                 = ecpp_remove,
-};
-
-static int parport_pc_find_nonpci_ports(int autoirq, int autodma)
-{
-       return platform_driver_register(&ecpp_driver);
-}
-
-#endif /* !(_ASM_SPARC64_PARPORT_H */
diff --git a/arch/sparc/include/asm/parport_64.h b/arch/sparc/include/asm/parport_64.h
new file mode 100644 (file)
index 0000000..0a7ffcf
--- /dev/null
@@ -0,0 +1,256 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/* parport.h: sparc64 specific parport initialization and dma.
+ *
+ * Copyright (C) 1999  Eddie C. Dost  (ecd@skynet.be)
+ */
+
+#ifndef _ASM_SPARC64_PARPORT_H
+#define _ASM_SPARC64_PARPORT_H 1
+
+#include <linux/of.h>
+#include <linux/platform_device.h>
+
+#include <asm/ebus_dma.h>
+#include <asm/ns87303.h>
+#include <asm/prom.h>
+
+#define PARPORT_PC_MAX_PORTS   PARPORT_MAX
+
+/*
+ * While sparc64 doesn't have an ISA DMA API, we provide something that looks
+ * close enough to make parport_pc happy
+ */
+#define HAS_DMA
+
+#ifdef CONFIG_PARPORT_PC_FIFO
+static DEFINE_SPINLOCK(dma_spin_lock);
+
+#define claim_dma_lock() \
+({     unsigned long flags; \
+       spin_lock_irqsave(&dma_spin_lock, flags); \
+       flags; \
+})
+
+#define release_dma_lock(__flags) \
+       spin_unlock_irqrestore(&dma_spin_lock, __flags);
+#endif
+
+static struct sparc_ebus_info {
+       struct ebus_dma_info info;
+       unsigned int addr;
+       unsigned int count;
+       int lock;
+
+       struct parport *port;
+} sparc_ebus_dmas[PARPORT_PC_MAX_PORTS];
+
+static DECLARE_BITMAP(dma_slot_map, PARPORT_PC_MAX_PORTS);
+
+static inline int request_dma(unsigned int dmanr, const char *device_id)
+{
+       if (dmanr >= PARPORT_PC_MAX_PORTS)
+               return -EINVAL;
+       if (xchg(&sparc_ebus_dmas[dmanr].lock, 1) != 0)
+               return -EBUSY;
+       return 0;
+}
+
+static inline void free_dma(unsigned int dmanr)
+{
+       if (dmanr >= PARPORT_PC_MAX_PORTS) {
+               printk(KERN_WARNING "Trying to free DMA%d\n", dmanr);
+               return;
+       }
+       if (xchg(&sparc_ebus_dmas[dmanr].lock, 0) == 0) {
+               printk(KERN_WARNING "Trying to free free DMA%d\n", dmanr);
+               return;
+       }
+}
+
+static inline void enable_dma(unsigned int dmanr)
+{
+       ebus_dma_enable(&sparc_ebus_dmas[dmanr].info, 1);
+
+       if (ebus_dma_request(&sparc_ebus_dmas[dmanr].info,
+                            sparc_ebus_dmas[dmanr].addr,
+                            sparc_ebus_dmas[dmanr].count))
+               BUG();
+}
+
+static inline void disable_dma(unsigned int dmanr)
+{
+       ebus_dma_enable(&sparc_ebus_dmas[dmanr].info, 0);
+}
+
+static inline void clear_dma_ff(unsigned int dmanr)
+{
+       /* nothing */
+}
+
+static inline void set_dma_mode(unsigned int dmanr, char mode)
+{
+       ebus_dma_prepare(&sparc_ebus_dmas[dmanr].info, (mode != DMA_MODE_WRITE));
+}
+
+static inline void set_dma_addr(unsigned int dmanr, unsigned int addr)
+{
+       sparc_ebus_dmas[dmanr].addr = addr;
+}
+
+static inline void set_dma_count(unsigned int dmanr, unsigned int count)
+{
+       sparc_ebus_dmas[dmanr].count = count;
+}
+
+static inline unsigned int get_dma_residue(unsigned int dmanr)
+{
+       return ebus_dma_residue(&sparc_ebus_dmas[dmanr].info);
+}
+
+static int ecpp_probe(struct platform_device *op)
+{
+       unsigned long base = op->resource[0].start;
+       unsigned long config = op->resource[1].start;
+       unsigned long d_base = op->resource[2].start;
+       unsigned long d_len;
+       struct device_node *parent;
+       struct parport *p;
+       int slot, err;
+
+       parent = op->dev.of_node->parent;
+       if (of_node_name_eq(parent, "dma")) {
+               p = parport_pc_probe_port(base, base + 0x400,
+                                         op->archdata.irqs[0], PARPORT_DMA_NOFIFO,
+                                         op->dev.parent->parent, 0);
+               if (!p)
+                       return -ENOMEM;
+               dev_set_drvdata(&op->dev, p);
+               return 0;
+       }
+
+       for (slot = 0; slot < PARPORT_PC_MAX_PORTS; slot++) {
+               if (!test_and_set_bit(slot, dma_slot_map))
+                       break;
+       }
+       err = -ENODEV;
+       if (slot >= PARPORT_PC_MAX_PORTS)
+               goto out_err;
+
+       spin_lock_init(&sparc_ebus_dmas[slot].info.lock);
+
+       d_len = (op->resource[2].end - d_base) + 1UL;
+       sparc_ebus_dmas[slot].info.regs =
+               of_ioremap(&op->resource[2], 0, d_len, "ECPP DMA");
+
+       if (!sparc_ebus_dmas[slot].info.regs)
+               goto out_clear_map;
+
+       sparc_ebus_dmas[slot].info.flags = 0;
+       sparc_ebus_dmas[slot].info.callback = NULL;
+       sparc_ebus_dmas[slot].info.client_cookie = NULL;
+       sparc_ebus_dmas[slot].info.irq = 0xdeadbeef;
+       strcpy(sparc_ebus_dmas[slot].info.name, "parport");
+       if (ebus_dma_register(&sparc_ebus_dmas[slot].info))
+               goto out_unmap_regs;
+
+       ebus_dma_irq_enable(&sparc_ebus_dmas[slot].info, 1);
+
+       /* Configure IRQ to Push Pull, Level Low */
+       /* Enable ECP, set bit 2 of the CTR first */
+       outb(0x04, base + 0x02);
+       ns87303_modify(config, PCR,
+                      PCR_EPP_ENABLE |
+                      PCR_IRQ_ODRAIN,
+                      PCR_ECP_ENABLE |
+                      PCR_ECP_CLK_ENA |
+                      PCR_IRQ_POLAR);
+
+       /* CTR bit 5 controls direction of port */
+       ns87303_modify(config, PTR,
+                      0, PTR_LPT_REG_DIR);
+
+       p = parport_pc_probe_port(base, base + 0x400,
+                                 op->archdata.irqs[0],
+                                 slot,
+                                 op->dev.parent,
+                                 0);
+       err = -ENOMEM;
+       if (!p)
+               goto out_disable_irq;
+
+       dev_set_drvdata(&op->dev, p);
+
+       return 0;
+
+out_disable_irq:
+       ebus_dma_irq_enable(&sparc_ebus_dmas[slot].info, 0);
+       ebus_dma_unregister(&sparc_ebus_dmas[slot].info);
+
+out_unmap_regs:
+       of_iounmap(&op->resource[2], sparc_ebus_dmas[slot].info.regs, d_len);
+
+out_clear_map:
+       clear_bit(slot, dma_slot_map);
+
+out_err:
+       return err;
+}
+
+static int ecpp_remove(struct platform_device *op)
+{
+       struct parport *p = dev_get_drvdata(&op->dev);
+       int slot = p->dma;
+
+       parport_pc_unregister_port(p);
+
+       if (slot != PARPORT_DMA_NOFIFO) {
+               unsigned long d_base = op->resource[2].start;
+               unsigned long d_len;
+
+               d_len = (op->resource[2].end - d_base) + 1UL;
+
+               ebus_dma_irq_enable(&sparc_ebus_dmas[slot].info, 0);
+               ebus_dma_unregister(&sparc_ebus_dmas[slot].info);
+               of_iounmap(&op->resource[2],
+                          sparc_ebus_dmas[slot].info.regs,
+                          d_len);
+               clear_bit(slot, dma_slot_map);
+       }
+
+       return 0;
+}
+
+static const struct of_device_id ecpp_match[] = {
+       {
+               .name = "ecpp",
+       },
+       {
+               .name = "parallel",
+               .compatible = "ecpp",
+       },
+       {
+               .name = "parallel",
+               .compatible = "ns87317-ecpp",
+       },
+       {
+               .name = "parallel",
+               .compatible = "pnpALI,1533,3",
+       },
+       {},
+};
+
+static struct platform_driver ecpp_driver = {
+       .driver = {
+               .name = "ecpp",
+               .of_match_table = ecpp_match,
+       },
+       .probe                  = ecpp_probe,
+       .remove                 = ecpp_remove,
+};
+
+static int parport_pc_find_nonpci_ports(int autoirq, int autodma)
+{
+       return platform_driver_register(&ecpp_driver);
+}
+
+#endif /* !(_ASM_SPARC64_PARPORT_H */
index a8c871b7d78608dd5897a5fe7a1ebf040dc09141..4d1bafaba942d2e5115868772086524644f8d3ab 100644 (file)
@@ -680,8 +680,8 @@ static inline unsigned long pte_special(pte_t pte)
        return pte_val(pte) & _PAGE_SPECIAL;
 }
 
-#define pmd_leaf       pmd_large
-static inline unsigned long pmd_large(pmd_t pmd)
+#define pmd_leaf pmd_leaf
+static inline bool pmd_leaf(pmd_t pmd)
 {
        pte_t pte = __pte(pmd_val(pmd));
 
@@ -867,8 +867,8 @@ static inline pmd_t *pud_pgtable(pud_t pud)
 /* only used by the stubbed out hugetlb gup code, should never be called */
 #define p4d_page(p4d)                  NULL
 
-#define pud_leaf       pud_large
-static inline unsigned long pud_large(pud_t pud)
+#define pud_leaf pud_leaf
+static inline bool pud_leaf(pud_t pud)
 {
        pte_t pte = __pte(pud_val(pud));
 
@@ -929,6 +929,8 @@ static inline void __set_pte_at(struct mm_struct *mm, unsigned long addr,
        maybe_tlb_batch_add(mm, addr, ptep, orig, fullmm, PAGE_SHIFT);
 }
 
+#define PFN_PTE_SHIFT          PAGE_SHIFT
+
 static inline void set_ptes(struct mm_struct *mm, unsigned long addr,
                pte_t *ptep, pte_t pte, unsigned int nr)
 {
index 14f3c49bfdbc863bc56d4207fd604403c4fa09d8..d93963ff7caa0e21a7624c34ea0a3695a516e6ba 100644 (file)
@@ -15,7 +15,7 @@ do {                                          \
         * for l0/l1.  It will use one for 'next' and the other to hold
         * the output value of 'last'.  'next' is not referenced again
         * past the invocation of switch_to in the scheduler, so we need
-        * not preserve it's value.  Hairy, but it lets us remove 2 loads
+        * not preserve its value.  Hairy, but it lets us remove 2 loads
         * and 2 stores in this critical code path.  -DaveM
         */
 #define switch_to(prev, next, last)                                    \
index e2d3f0d2971f715794a08510aa5f420c1c03c603..2bf558a0c568063cc2ca182e7c2379310fedf0f3 100644 (file)
@@ -8,6 +8,7 @@
 #include <linux/string.h>
 #include <linux/init.h>
 #include <linux/console.h>
+#include <linux/font.h>
 
 #include <asm/btext.h>
 #include <asm/oplib.h>
@@ -20,9 +21,9 @@ static void scrollscreen(void);
 #endif
 
 static void draw_byte(unsigned char c, long locX, long locY);
-static void draw_byte_32(unsigned char *bits, unsigned int *base, int rb);
-static void draw_byte_16(unsigned char *bits, unsigned int *base, int rb);
-static void draw_byte_8(unsigned char *bits, unsigned int *base, int rb);
+static void draw_byte_32(const unsigned char *bits, unsigned int *base, int rb);
+static void draw_byte_16(const unsigned char *bits, unsigned int *base, int rb);
+static void draw_byte_8(const unsigned char *bits, unsigned int *base, int rb);
 
 #define __force_data __section(".data")
 
@@ -36,10 +37,6 @@ static int dispDeviceDepth  __force_data;
 static int dispDeviceRect[4] __force_data;
 static unsigned char *dispDeviceBase __force_data;
 
-#define cmapsz (16*256)
-
-static unsigned char vga_font[cmapsz];
-
 static int __init btext_initialize(phandle node)
 {
        unsigned int width, height, depth, pitch;
@@ -194,7 +191,8 @@ static void btext_drawtext(const char *c, unsigned int len)
 static void draw_byte(unsigned char c, long locX, long locY)
 {
        unsigned char *base     = calc_base(locX << 3, locY << 4);
-       unsigned char *font     = &vga_font[((unsigned int)c) * 16];
+       unsigned int font_index = c * 16;
+       const unsigned char *font       = font_sun_8x16.data + font_index;
        int rb                  = dispDeviceRowBytes;
 
        switch(dispDeviceDepth) {
@@ -239,7 +237,7 @@ static unsigned int expand_bits_16[4] = {
 };
 
 
-static void draw_byte_32(unsigned char *font, unsigned int *base, int rb)
+static void draw_byte_32(const unsigned char *font, unsigned int *base, int rb)
 {
        int l, bits;
        int fg = 0xFFFFFFFFUL;
@@ -260,7 +258,7 @@ static void draw_byte_32(unsigned char *font, unsigned int *base, int rb)
        }
 }
 
-static void draw_byte_16(unsigned char *font, unsigned int *base, int rb)
+static void draw_byte_16(const unsigned char *font, unsigned int *base, int rb)
 {
        int l, bits;
        int fg = 0xFFFFFFFFUL;
@@ -278,7 +276,7 @@ static void draw_byte_16(unsigned char *font, unsigned int *base, int rb)
        }
 }
 
-static void draw_byte_8(unsigned char *font, unsigned int *base, int rb)
+static void draw_byte_8(const unsigned char *font, unsigned int *base, int rb)
 {
        int l, bits;
        int fg = 0x0F0F0F0FUL;
@@ -326,348 +324,3 @@ int __init btext_find_display(void)
        }
        return ret;
 }
-
-static unsigned char vga_font[cmapsz] = {
-0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
-0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x7e, 0x81, 0xa5, 0x81, 0x81, 0xbd,
-0x99, 0x81, 0x81, 0x7e, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x7e, 0xff,
-0xdb, 0xff, 0xff, 0xc3, 0xe7, 0xff, 0xff, 0x7e, 0x00, 0x00, 0x00, 0x00,
-0x00, 0x00, 0x00, 0x00, 0x6c, 0xfe, 0xfe, 0xfe, 0xfe, 0x7c, 0x38, 0x10,
-0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x10, 0x38, 0x7c, 0xfe,
-0x7c, 0x38, 0x10, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x18,
-0x3c, 0x3c, 0xe7, 0xe7, 0xe7, 0x18, 0x18, 0x3c, 0x00, 0x00, 0x00, 0x00,
-0x00, 0x00, 0x00, 0x18, 0x3c, 0x7e, 0xff, 0xff, 0x7e, 0x18, 0x18, 0x3c,
-0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x18, 0x3c,
-0x3c, 0x18, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0xff, 0xff, 0xff, 0xff,
-0xff, 0xff, 0xe7, 0xc3, 0xc3, 0xe7, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
-0x00, 0x00, 0x00, 0x00, 0x00, 0x3c, 0x66, 0x42, 0x42, 0x66, 0x3c, 0x00,
-0x00, 0x00, 0x00, 0x00, 0xff, 0xff, 0xff, 0xff, 0xff, 0xc3, 0x99, 0xbd,
-0xbd, 0x99, 0xc3, 0xff, 0xff, 0xff, 0xff, 0xff, 0x00, 0x00, 0x1e, 0x0e,
-0x1a, 0x32, 0x78, 0xcc, 0xcc, 0xcc, 0xcc, 0x78, 0x00, 0x00, 0x00, 0x00,
-0x00, 0x00, 0x3c, 0x66, 0x66, 0x66, 0x66, 0x3c, 0x18, 0x7e, 0x18, 0x18,
-0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x3f, 0x33, 0x3f, 0x30, 0x30, 0x30,
-0x30, 0x70, 0xf0, 0xe0, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x7f, 0x63,
-0x7f, 0x63, 0x63, 0x63, 0x63, 0x67, 0xe7, 0xe6, 0xc0, 0x00, 0x00, 0x00,
-0x00, 0x00, 0x00, 0x18, 0x18, 0xdb, 0x3c, 0xe7, 0x3c, 0xdb, 0x18, 0x18,
-0x00, 0x00, 0x00, 0x00, 0x00, 0x80, 0xc0, 0xe0, 0xf0, 0xf8, 0xfe, 0xf8,
-0xf0, 0xe0, 0xc0, 0x80, 0x00, 0x00, 0x00, 0x00, 0x00, 0x02, 0x06, 0x0e,
-0x1e, 0x3e, 0xfe, 0x3e, 0x1e, 0x0e, 0x06, 0x02, 0x00, 0x00, 0x00, 0x00,
-0x00, 0x00, 0x18, 0x3c, 0x7e, 0x18, 0x18, 0x18, 0x7e, 0x3c, 0x18, 0x00,
-0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x66, 0x66, 0x66, 0x66, 0x66, 0x66,
-0x66, 0x00, 0x66, 0x66, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x7f, 0xdb,
-0xdb, 0xdb, 0x7b, 0x1b, 0x1b, 0x1b, 0x1b, 0x1b, 0x00, 0x00, 0x00, 0x00,
-0x00, 0x7c, 0xc6, 0x60, 0x38, 0x6c, 0xc6, 0xc6, 0x6c, 0x38, 0x0c, 0xc6,
-0x7c, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
-0xfe, 0xfe, 0xfe, 0xfe, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x18, 0x3c,
-0x7e, 0x18, 0x18, 0x18, 0x7e, 0x3c, 0x18, 0x7e, 0x00, 0x00, 0x00, 0x00,
-0x00, 0x00, 0x18, 0x3c, 0x7e, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18,
-0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18,
-0x18, 0x7e, 0x3c, 0x18, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
-0x00, 0x18, 0x0c, 0xfe, 0x0c, 0x18, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
-0x00, 0x00, 0x00, 0x00, 0x00, 0x30, 0x60, 0xfe, 0x60, 0x30, 0x00, 0x00,
-0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0xc0, 0xc0,
-0xc0, 0xfe, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
-0x00, 0x24, 0x66, 0xff, 0x66, 0x24, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
-0x00, 0x00, 0x00, 0x00, 0x10, 0x38, 0x38, 0x7c, 0x7c, 0xfe, 0xfe, 0x00,
-0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0xfe, 0xfe, 0x7c, 0x7c,
-0x38, 0x38, 0x10, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
-0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
-0x00, 0x00, 0x18, 0x3c, 0x3c, 0x3c, 0x18, 0x18, 0x18, 0x00, 0x18, 0x18,
-0x00, 0x00, 0x00, 0x00, 0x00, 0x66, 0x66, 0x66, 0x24, 0x00, 0x00, 0x00,
-0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x6c,
-0x6c, 0xfe, 0x6c, 0x6c, 0x6c, 0xfe, 0x6c, 0x6c, 0x00, 0x00, 0x00, 0x00,
-0x18, 0x18, 0x7c, 0xc6, 0xc2, 0xc0, 0x7c, 0x06, 0x06, 0x86, 0xc6, 0x7c,
-0x18, 0x18, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0xc2, 0xc6, 0x0c, 0x18,
-0x30, 0x60, 0xc6, 0x86, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x38, 0x6c,
-0x6c, 0x38, 0x76, 0xdc, 0xcc, 0xcc, 0xcc, 0x76, 0x00, 0x00, 0x00, 0x00,
-0x00, 0x30, 0x30, 0x30, 0x60, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
-0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x0c, 0x18, 0x30, 0x30, 0x30, 0x30,
-0x30, 0x30, 0x18, 0x0c, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x30, 0x18,
-0x0c, 0x0c, 0x0c, 0x0c, 0x0c, 0x0c, 0x18, 0x30, 0x00, 0x00, 0x00, 0x00,
-0x00, 0x00, 0x00, 0x00, 0x00, 0x66, 0x3c, 0xff, 0x3c, 0x66, 0x00, 0x00,
-0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x18, 0x18, 0x7e,
-0x18, 0x18, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
-0x00, 0x00, 0x00, 0x00, 0x00, 0x18, 0x18, 0x18, 0x30, 0x00, 0x00, 0x00,
-0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x7e, 0x00, 0x00, 0x00, 0x00,
-0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
-0x00, 0x00, 0x18, 0x18, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
-0x02, 0x06, 0x0c, 0x18, 0x30, 0x60, 0xc0, 0x80, 0x00, 0x00, 0x00, 0x00,
-0x00, 0x00, 0x7c, 0xc6, 0xc6, 0xce, 0xde, 0xf6, 0xe6, 0xc6, 0xc6, 0x7c,
-0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x18, 0x38, 0x78, 0x18, 0x18, 0x18,
-0x18, 0x18, 0x18, 0x7e, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x7c, 0xc6,
-0x06, 0x0c, 0x18, 0x30, 0x60, 0xc0, 0xc6, 0xfe, 0x00, 0x00, 0x00, 0x00,
-0x00, 0x00, 0x7c, 0xc6, 0x06, 0x06, 0x3c, 0x06, 0x06, 0x06, 0xc6, 0x7c,
-0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x0c, 0x1c, 0x3c, 0x6c, 0xcc, 0xfe,
-0x0c, 0x0c, 0x0c, 0x1e, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0xfe, 0xc0,
-0xc0, 0xc0, 0xfc, 0x06, 0x06, 0x06, 0xc6, 0x7c, 0x00, 0x00, 0x00, 0x00,
-0x00, 0x00, 0x38, 0x60, 0xc0, 0xc0, 0xfc, 0xc6, 0xc6, 0xc6, 0xc6, 0x7c,
-0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0xfe, 0xc6, 0x06, 0x06, 0x0c, 0x18,
-0x30, 0x30, 0x30, 0x30, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x7c, 0xc6,
-0xc6, 0xc6, 0x7c, 0xc6, 0xc6, 0xc6, 0xc6, 0x7c, 0x00, 0x00, 0x00, 0x00,
-0x00, 0x00, 0x7c, 0xc6, 0xc6, 0xc6, 0x7e, 0x06, 0x06, 0x06, 0x0c, 0x78,
-0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x18, 0x18, 0x00, 0x00,
-0x00, 0x18, 0x18, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
-0x18, 0x18, 0x00, 0x00, 0x00, 0x18, 0x18, 0x30, 0x00, 0x00, 0x00, 0x00,
-0x00, 0x00, 0x00, 0x06, 0x0c, 0x18, 0x30, 0x60, 0x30, 0x18, 0x0c, 0x06,
-0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x7e, 0x00, 0x00,
-0x7e, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x60,
-0x30, 0x18, 0x0c, 0x06, 0x0c, 0x18, 0x30, 0x60, 0x00, 0x00, 0x00, 0x00,
-0x00, 0x00, 0x7c, 0xc6, 0xc6, 0x0c, 0x18, 0x18, 0x18, 0x00, 0x18, 0x18,
-0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x7c, 0xc6, 0xc6, 0xc6, 0xde, 0xde,
-0xde, 0xdc, 0xc0, 0x7c, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x10, 0x38,
-0x6c, 0xc6, 0xc6, 0xfe, 0xc6, 0xc6, 0xc6, 0xc6, 0x00, 0x00, 0x00, 0x00,
-0x00, 0x00, 0xfc, 0x66, 0x66, 0x66, 0x7c, 0x66, 0x66, 0x66, 0x66, 0xfc,
-0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x3c, 0x66, 0xc2, 0xc0, 0xc0, 0xc0,
-0xc0, 0xc2, 0x66, 0x3c, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0xf8, 0x6c,
-0x66, 0x66, 0x66, 0x66, 0x66, 0x66, 0x6c, 0xf8, 0x00, 0x00, 0x00, 0x00,
-0x00, 0x00, 0xfe, 0x66, 0x62, 0x68, 0x78, 0x68, 0x60, 0x62, 0x66, 0xfe,
-0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0xfe, 0x66, 0x62, 0x68, 0x78, 0x68,
-0x60, 0x60, 0x60, 0xf0, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x3c, 0x66,
-0xc2, 0xc0, 0xc0, 0xde, 0xc6, 0xc6, 0x66, 0x3a, 0x00, 0x00, 0x00, 0x00,
-0x00, 0x00, 0xc6, 0xc6, 0xc6, 0xc6, 0xfe, 0xc6, 0xc6, 0xc6, 0xc6, 0xc6,
-0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x3c, 0x18, 0x18, 0x18, 0x18, 0x18,
-0x18, 0x18, 0x18, 0x3c, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x1e, 0x0c,
-0x0c, 0x0c, 0x0c, 0x0c, 0xcc, 0xcc, 0xcc, 0x78, 0x00, 0x00, 0x00, 0x00,
-0x00, 0x00, 0xe6, 0x66, 0x66, 0x6c, 0x78, 0x78, 0x6c, 0x66, 0x66, 0xe6,
-0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0xf0, 0x60, 0x60, 0x60, 0x60, 0x60,
-0x60, 0x62, 0x66, 0xfe, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0xc3, 0xe7,
-0xff, 0xff, 0xdb, 0xc3, 0xc3, 0xc3, 0xc3, 0xc3, 0x00, 0x00, 0x00, 0x00,
-0x00, 0x00, 0xc6, 0xe6, 0xf6, 0xfe, 0xde, 0xce, 0xc6, 0xc6, 0xc6, 0xc6,
-0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x7c, 0xc6, 0xc6, 0xc6, 0xc6, 0xc6,
-0xc6, 0xc6, 0xc6, 0x7c, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0xfc, 0x66,
-0x66, 0x66, 0x7c, 0x60, 0x60, 0x60, 0x60, 0xf0, 0x00, 0x00, 0x00, 0x00,
-0x00, 0x00, 0x7c, 0xc6, 0xc6, 0xc6, 0xc6, 0xc6, 0xc6, 0xd6, 0xde, 0x7c,
-0x0c, 0x0e, 0x00, 0x00, 0x00, 0x00, 0xfc, 0x66, 0x66, 0x66, 0x7c, 0x6c,
-0x66, 0x66, 0x66, 0xe6, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x7c, 0xc6,
-0xc6, 0x60, 0x38, 0x0c, 0x06, 0xc6, 0xc6, 0x7c, 0x00, 0x00, 0x00, 0x00,
-0x00, 0x00, 0xff, 0xdb, 0x99, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x3c,
-0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0xc6, 0xc6, 0xc6, 0xc6, 0xc6, 0xc6,
-0xc6, 0xc6, 0xc6, 0x7c, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0xc3, 0xc3,
-0xc3, 0xc3, 0xc3, 0xc3, 0xc3, 0x66, 0x3c, 0x18, 0x00, 0x00, 0x00, 0x00,
-0x00, 0x00, 0xc3, 0xc3, 0xc3, 0xc3, 0xc3, 0xdb, 0xdb, 0xff, 0x66, 0x66,
-0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0xc3, 0xc3, 0x66, 0x3c, 0x18, 0x18,
-0x3c, 0x66, 0xc3, 0xc3, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0xc3, 0xc3,
-0xc3, 0x66, 0x3c, 0x18, 0x18, 0x18, 0x18, 0x3c, 0x00, 0x00, 0x00, 0x00,
-0x00, 0x00, 0xff, 0xc3, 0x86, 0x0c, 0x18, 0x30, 0x60, 0xc1, 0xc3, 0xff,
-0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x3c, 0x30, 0x30, 0x30, 0x30, 0x30,
-0x30, 0x30, 0x30, 0x3c, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x80,
-0xc0, 0xe0, 0x70, 0x38, 0x1c, 0x0e, 0x06, 0x02, 0x00, 0x00, 0x00, 0x00,
-0x00, 0x00, 0x3c, 0x0c, 0x0c, 0x0c, 0x0c, 0x0c, 0x0c, 0x0c, 0x0c, 0x3c,
-0x00, 0x00, 0x00, 0x00, 0x10, 0x38, 0x6c, 0xc6, 0x00, 0x00, 0x00, 0x00,
-0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
-0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0xff, 0x00, 0x00,
-0x30, 0x30, 0x18, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
-0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x78, 0x0c, 0x7c,
-0xcc, 0xcc, 0xcc, 0x76, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0xe0, 0x60,
-0x60, 0x78, 0x6c, 0x66, 0x66, 0x66, 0x66, 0x7c, 0x00, 0x00, 0x00, 0x00,
-0x00, 0x00, 0x00, 0x00, 0x00, 0x7c, 0xc6, 0xc0, 0xc0, 0xc0, 0xc6, 0x7c,
-0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x1c, 0x0c, 0x0c, 0x3c, 0x6c, 0xcc,
-0xcc, 0xcc, 0xcc, 0x76, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
-0x00, 0x7c, 0xc6, 0xfe, 0xc0, 0xc0, 0xc6, 0x7c, 0x00, 0x00, 0x00, 0x00,
-0x00, 0x00, 0x38, 0x6c, 0x64, 0x60, 0xf0, 0x60, 0x60, 0x60, 0x60, 0xf0,
-0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x76, 0xcc, 0xcc,
-0xcc, 0xcc, 0xcc, 0x7c, 0x0c, 0xcc, 0x78, 0x00, 0x00, 0x00, 0xe0, 0x60,
-0x60, 0x6c, 0x76, 0x66, 0x66, 0x66, 0x66, 0xe6, 0x00, 0x00, 0x00, 0x00,
-0x00, 0x00, 0x18, 0x18, 0x00, 0x38, 0x18, 0x18, 0x18, 0x18, 0x18, 0x3c,
-0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x06, 0x06, 0x00, 0x0e, 0x06, 0x06,
-0x06, 0x06, 0x06, 0x06, 0x66, 0x66, 0x3c, 0x00, 0x00, 0x00, 0xe0, 0x60,
-0x60, 0x66, 0x6c, 0x78, 0x78, 0x6c, 0x66, 0xe6, 0x00, 0x00, 0x00, 0x00,
-0x00, 0x00, 0x38, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x3c,
-0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0xe6, 0xff, 0xdb,
-0xdb, 0xdb, 0xdb, 0xdb, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
-0x00, 0xdc, 0x66, 0x66, 0x66, 0x66, 0x66, 0x66, 0x00, 0x00, 0x00, 0x00,
-0x00, 0x00, 0x00, 0x00, 0x00, 0x7c, 0xc6, 0xc6, 0xc6, 0xc6, 0xc6, 0x7c,
-0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0xdc, 0x66, 0x66,
-0x66, 0x66, 0x66, 0x7c, 0x60, 0x60, 0xf0, 0x00, 0x00, 0x00, 0x00, 0x00,
-0x00, 0x76, 0xcc, 0xcc, 0xcc, 0xcc, 0xcc, 0x7c, 0x0c, 0x0c, 0x1e, 0x00,
-0x00, 0x00, 0x00, 0x00, 0x00, 0xdc, 0x76, 0x66, 0x60, 0x60, 0x60, 0xf0,
-0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x7c, 0xc6, 0x60,
-0x38, 0x0c, 0xc6, 0x7c, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x10, 0x30,
-0x30, 0xfc, 0x30, 0x30, 0x30, 0x30, 0x36, 0x1c, 0x00, 0x00, 0x00, 0x00,
-0x00, 0x00, 0x00, 0x00, 0x00, 0xcc, 0xcc, 0xcc, 0xcc, 0xcc, 0xcc, 0x76,
-0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0xc3, 0xc3, 0xc3,
-0xc3, 0x66, 0x3c, 0x18, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
-0x00, 0xc3, 0xc3, 0xc3, 0xdb, 0xdb, 0xff, 0x66, 0x00, 0x00, 0x00, 0x00,
-0x00, 0x00, 0x00, 0x00, 0x00, 0xc3, 0x66, 0x3c, 0x18, 0x3c, 0x66, 0xc3,
-0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0xc6, 0xc6, 0xc6,
-0xc6, 0xc6, 0xc6, 0x7e, 0x06, 0x0c, 0xf8, 0x00, 0x00, 0x00, 0x00, 0x00,
-0x00, 0xfe, 0xcc, 0x18, 0x30, 0x60, 0xc6, 0xfe, 0x00, 0x00, 0x00, 0x00,
-0x00, 0x00, 0x0e, 0x18, 0x18, 0x18, 0x70, 0x18, 0x18, 0x18, 0x18, 0x0e,
-0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x18, 0x18, 0x18, 0x18, 0x00, 0x18,
-0x18, 0x18, 0x18, 0x18, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x70, 0x18,
-0x18, 0x18, 0x0e, 0x18, 0x18, 0x18, 0x18, 0x70, 0x00, 0x00, 0x00, 0x00,
-0x00, 0x00, 0x76, 0xdc, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
-0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x10, 0x38, 0x6c, 0xc6,
-0xc6, 0xc6, 0xfe, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x3c, 0x66,
-0xc2, 0xc0, 0xc0, 0xc0, 0xc2, 0x66, 0x3c, 0x0c, 0x06, 0x7c, 0x00, 0x00,
-0x00, 0x00, 0xcc, 0x00, 0x00, 0xcc, 0xcc, 0xcc, 0xcc, 0xcc, 0xcc, 0x76,
-0x00, 0x00, 0x00, 0x00, 0x00, 0x0c, 0x18, 0x30, 0x00, 0x7c, 0xc6, 0xfe,
-0xc0, 0xc0, 0xc6, 0x7c, 0x00, 0x00, 0x00, 0x00, 0x00, 0x10, 0x38, 0x6c,
-0x00, 0x78, 0x0c, 0x7c, 0xcc, 0xcc, 0xcc, 0x76, 0x00, 0x00, 0x00, 0x00,
-0x00, 0x00, 0xcc, 0x00, 0x00, 0x78, 0x0c, 0x7c, 0xcc, 0xcc, 0xcc, 0x76,
-0x00, 0x00, 0x00, 0x00, 0x00, 0x60, 0x30, 0x18, 0x00, 0x78, 0x0c, 0x7c,
-0xcc, 0xcc, 0xcc, 0x76, 0x00, 0x00, 0x00, 0x00, 0x00, 0x38, 0x6c, 0x38,
-0x00, 0x78, 0x0c, 0x7c, 0xcc, 0xcc, 0xcc, 0x76, 0x00, 0x00, 0x00, 0x00,
-0x00, 0x00, 0x00, 0x00, 0x3c, 0x66, 0x60, 0x60, 0x66, 0x3c, 0x0c, 0x06,
-0x3c, 0x00, 0x00, 0x00, 0x00, 0x10, 0x38, 0x6c, 0x00, 0x7c, 0xc6, 0xfe,
-0xc0, 0xc0, 0xc6, 0x7c, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0xc6, 0x00,
-0x00, 0x7c, 0xc6, 0xfe, 0xc0, 0xc0, 0xc6, 0x7c, 0x00, 0x00, 0x00, 0x00,
-0x00, 0x60, 0x30, 0x18, 0x00, 0x7c, 0xc6, 0xfe, 0xc0, 0xc0, 0xc6, 0x7c,
-0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x66, 0x00, 0x00, 0x38, 0x18, 0x18,
-0x18, 0x18, 0x18, 0x3c, 0x00, 0x00, 0x00, 0x00, 0x00, 0x18, 0x3c, 0x66,
-0x00, 0x38, 0x18, 0x18, 0x18, 0x18, 0x18, 0x3c, 0x00, 0x00, 0x00, 0x00,
-0x00, 0x60, 0x30, 0x18, 0x00, 0x38, 0x18, 0x18, 0x18, 0x18, 0x18, 0x3c,
-0x00, 0x00, 0x00, 0x00, 0x00, 0xc6, 0x00, 0x10, 0x38, 0x6c, 0xc6, 0xc6,
-0xfe, 0xc6, 0xc6, 0xc6, 0x00, 0x00, 0x00, 0x00, 0x38, 0x6c, 0x38, 0x00,
-0x38, 0x6c, 0xc6, 0xc6, 0xfe, 0xc6, 0xc6, 0xc6, 0x00, 0x00, 0x00, 0x00,
-0x18, 0x30, 0x60, 0x00, 0xfe, 0x66, 0x60, 0x7c, 0x60, 0x60, 0x66, 0xfe,
-0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x6e, 0x3b, 0x1b,
-0x7e, 0xd8, 0xdc, 0x77, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x3e, 0x6c,
-0xcc, 0xcc, 0xfe, 0xcc, 0xcc, 0xcc, 0xcc, 0xce, 0x00, 0x00, 0x00, 0x00,
-0x00, 0x10, 0x38, 0x6c, 0x00, 0x7c, 0xc6, 0xc6, 0xc6, 0xc6, 0xc6, 0x7c,
-0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0xc6, 0x00, 0x00, 0x7c, 0xc6, 0xc6,
-0xc6, 0xc6, 0xc6, 0x7c, 0x00, 0x00, 0x00, 0x00, 0x00, 0x60, 0x30, 0x18,
-0x00, 0x7c, 0xc6, 0xc6, 0xc6, 0xc6, 0xc6, 0x7c, 0x00, 0x00, 0x00, 0x00,
-0x00, 0x30, 0x78, 0xcc, 0x00, 0xcc, 0xcc, 0xcc, 0xcc, 0xcc, 0xcc, 0x76,
-0x00, 0x00, 0x00, 0x00, 0x00, 0x60, 0x30, 0x18, 0x00, 0xcc, 0xcc, 0xcc,
-0xcc, 0xcc, 0xcc, 0x76, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0xc6, 0x00,
-0x00, 0xc6, 0xc6, 0xc6, 0xc6, 0xc6, 0xc6, 0x7e, 0x06, 0x0c, 0x78, 0x00,
-0x00, 0xc6, 0x00, 0x7c, 0xc6, 0xc6, 0xc6, 0xc6, 0xc6, 0xc6, 0xc6, 0x7c,
-0x00, 0x00, 0x00, 0x00, 0x00, 0xc6, 0x00, 0xc6, 0xc6, 0xc6, 0xc6, 0xc6,
-0xc6, 0xc6, 0xc6, 0x7c, 0x00, 0x00, 0x00, 0x00, 0x00, 0x18, 0x18, 0x7e,
-0xc3, 0xc0, 0xc0, 0xc0, 0xc3, 0x7e, 0x18, 0x18, 0x00, 0x00, 0x00, 0x00,
-0x00, 0x38, 0x6c, 0x64, 0x60, 0xf0, 0x60, 0x60, 0x60, 0x60, 0xe6, 0xfc,
-0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0xc3, 0x66, 0x3c, 0x18, 0xff, 0x18,
-0xff, 0x18, 0x18, 0x18, 0x00, 0x00, 0x00, 0x00, 0x00, 0xfc, 0x66, 0x66,
-0x7c, 0x62, 0x66, 0x6f, 0x66, 0x66, 0x66, 0xf3, 0x00, 0x00, 0x00, 0x00,
-0x00, 0x0e, 0x1b, 0x18, 0x18, 0x18, 0x7e, 0x18, 0x18, 0x18, 0x18, 0x18,
-0xd8, 0x70, 0x00, 0x00, 0x00, 0x18, 0x30, 0x60, 0x00, 0x78, 0x0c, 0x7c,
-0xcc, 0xcc, 0xcc, 0x76, 0x00, 0x00, 0x00, 0x00, 0x00, 0x0c, 0x18, 0x30,
-0x00, 0x38, 0x18, 0x18, 0x18, 0x18, 0x18, 0x3c, 0x00, 0x00, 0x00, 0x00,
-0x00, 0x18, 0x30, 0x60, 0x00, 0x7c, 0xc6, 0xc6, 0xc6, 0xc6, 0xc6, 0x7c,
-0x00, 0x00, 0x00, 0x00, 0x00, 0x18, 0x30, 0x60, 0x00, 0xcc, 0xcc, 0xcc,
-0xcc, 0xcc, 0xcc, 0x76, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x76, 0xdc,
-0x00, 0xdc, 0x66, 0x66, 0x66, 0x66, 0x66, 0x66, 0x00, 0x00, 0x00, 0x00,
-0x76, 0xdc, 0x00, 0xc6, 0xe6, 0xf6, 0xfe, 0xde, 0xce, 0xc6, 0xc6, 0xc6,
-0x00, 0x00, 0x00, 0x00, 0x00, 0x3c, 0x6c, 0x6c, 0x3e, 0x00, 0x7e, 0x00,
-0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x38, 0x6c, 0x6c,
-0x38, 0x00, 0x7c, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
-0x00, 0x00, 0x30, 0x30, 0x00, 0x30, 0x30, 0x60, 0xc0, 0xc6, 0xc6, 0x7c,
-0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0xfe, 0xc0,
-0xc0, 0xc0, 0xc0, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
-0x00, 0x00, 0xfe, 0x06, 0x06, 0x06, 0x06, 0x00, 0x00, 0x00, 0x00, 0x00,
-0x00, 0xc0, 0xc0, 0xc2, 0xc6, 0xcc, 0x18, 0x30, 0x60, 0xce, 0x9b, 0x06,
-0x0c, 0x1f, 0x00, 0x00, 0x00, 0xc0, 0xc0, 0xc2, 0xc6, 0xcc, 0x18, 0x30,
-0x66, 0xce, 0x96, 0x3e, 0x06, 0x06, 0x00, 0x00, 0x00, 0x00, 0x18, 0x18,
-0x00, 0x18, 0x18, 0x18, 0x3c, 0x3c, 0x3c, 0x18, 0x00, 0x00, 0x00, 0x00,
-0x00, 0x00, 0x00, 0x00, 0x00, 0x36, 0x6c, 0xd8, 0x6c, 0x36, 0x00, 0x00,
-0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0xd8, 0x6c, 0x36,
-0x6c, 0xd8, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x11, 0x44, 0x11, 0x44,
-0x11, 0x44, 0x11, 0x44, 0x11, 0x44, 0x11, 0x44, 0x11, 0x44, 0x11, 0x44,
-0x55, 0xaa, 0x55, 0xaa, 0x55, 0xaa, 0x55, 0xaa, 0x55, 0xaa, 0x55, 0xaa,
-0x55, 0xaa, 0x55, 0xaa, 0xdd, 0x77, 0xdd, 0x77, 0xdd, 0x77, 0xdd, 0x77,
-0xdd, 0x77, 0xdd, 0x77, 0xdd, 0x77, 0xdd, 0x77, 0x18, 0x18, 0x18, 0x18,
-0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18,
-0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0xf8, 0x18, 0x18, 0x18, 0x18,
-0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0xf8, 0x18, 0xf8,
-0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x36, 0x36, 0x36, 0x36,
-0x36, 0x36, 0x36, 0xf6, 0x36, 0x36, 0x36, 0x36, 0x36, 0x36, 0x36, 0x36,
-0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0xfe, 0x36, 0x36, 0x36, 0x36,
-0x36, 0x36, 0x36, 0x36, 0x00, 0x00, 0x00, 0x00, 0x00, 0xf8, 0x18, 0xf8,
-0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x36, 0x36, 0x36, 0x36,
-0x36, 0xf6, 0x06, 0xf6, 0x36, 0x36, 0x36, 0x36, 0x36, 0x36, 0x36, 0x36,
-0x36, 0x36, 0x36, 0x36, 0x36, 0x36, 0x36, 0x36, 0x36, 0x36, 0x36, 0x36,
-0x36, 0x36, 0x36, 0x36, 0x00, 0x00, 0x00, 0x00, 0x00, 0xfe, 0x06, 0xf6,
-0x36, 0x36, 0x36, 0x36, 0x36, 0x36, 0x36, 0x36, 0x36, 0x36, 0x36, 0x36,
-0x36, 0xf6, 0x06, 0xfe, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
-0x36, 0x36, 0x36, 0x36, 0x36, 0x36, 0x36, 0xfe, 0x00, 0x00, 0x00, 0x00,
-0x00, 0x00, 0x00, 0x00, 0x18, 0x18, 0x18, 0x18, 0x18, 0xf8, 0x18, 0xf8,
-0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
-0x00, 0x00, 0x00, 0xf8, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18,
-0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x1f, 0x00, 0x00, 0x00, 0x00,
-0x00, 0x00, 0x00, 0x00, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0xff,
-0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
-0x00, 0x00, 0x00, 0xff, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18,
-0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x1f, 0x18, 0x18, 0x18, 0x18,
-0x18, 0x18, 0x18, 0x18, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0xff,
-0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x18, 0x18, 0x18, 0x18,
-0x18, 0x18, 0x18, 0xff, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18,
-0x18, 0x18, 0x18, 0x18, 0x18, 0x1f, 0x18, 0x1f, 0x18, 0x18, 0x18, 0x18,
-0x18, 0x18, 0x18, 0x18, 0x36, 0x36, 0x36, 0x36, 0x36, 0x36, 0x36, 0x37,
-0x36, 0x36, 0x36, 0x36, 0x36, 0x36, 0x36, 0x36, 0x36, 0x36, 0x36, 0x36,
-0x36, 0x37, 0x30, 0x3f, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
-0x00, 0x00, 0x00, 0x00, 0x00, 0x3f, 0x30, 0x37, 0x36, 0x36, 0x36, 0x36,
-0x36, 0x36, 0x36, 0x36, 0x36, 0x36, 0x36, 0x36, 0x36, 0xf7, 0x00, 0xff,
-0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
-0x00, 0xff, 0x00, 0xf7, 0x36, 0x36, 0x36, 0x36, 0x36, 0x36, 0x36, 0x36,
-0x36, 0x36, 0x36, 0x36, 0x36, 0x37, 0x30, 0x37, 0x36, 0x36, 0x36, 0x36,
-0x36, 0x36, 0x36, 0x36, 0x00, 0x00, 0x00, 0x00, 0x00, 0xff, 0x00, 0xff,
-0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x36, 0x36, 0x36, 0x36,
-0x36, 0xf7, 0x00, 0xf7, 0x36, 0x36, 0x36, 0x36, 0x36, 0x36, 0x36, 0x36,
-0x18, 0x18, 0x18, 0x18, 0x18, 0xff, 0x00, 0xff, 0x00, 0x00, 0x00, 0x00,
-0x00, 0x00, 0x00, 0x00, 0x36, 0x36, 0x36, 0x36, 0x36, 0x36, 0x36, 0xff,
-0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
-0x00, 0xff, 0x00, 0xff, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18,
-0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0xff, 0x36, 0x36, 0x36, 0x36,
-0x36, 0x36, 0x36, 0x36, 0x36, 0x36, 0x36, 0x36, 0x36, 0x36, 0x36, 0x3f,
-0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x18, 0x18, 0x18, 0x18,
-0x18, 0x1f, 0x18, 0x1f, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
-0x00, 0x00, 0x00, 0x00, 0x00, 0x1f, 0x18, 0x1f, 0x18, 0x18, 0x18, 0x18,
-0x18, 0x18, 0x18, 0x18, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x3f,
-0x36, 0x36, 0x36, 0x36, 0x36, 0x36, 0x36, 0x36, 0x36, 0x36, 0x36, 0x36,
-0x36, 0x36, 0x36, 0xff, 0x36, 0x36, 0x36, 0x36, 0x36, 0x36, 0x36, 0x36,
-0x18, 0x18, 0x18, 0x18, 0x18, 0xff, 0x18, 0xff, 0x18, 0x18, 0x18, 0x18,
-0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0xf8,
-0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
-0x00, 0x00, 0x00, 0x1f, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18,
-0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
-0xff, 0xff, 0xff, 0xff, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0xff,
-0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xf0, 0xf0, 0xf0, 0xf0,
-0xf0, 0xf0, 0xf0, 0xf0, 0xf0, 0xf0, 0xf0, 0xf0, 0xf0, 0xf0, 0xf0, 0xf0,
-0x0f, 0x0f, 0x0f, 0x0f, 0x0f, 0x0f, 0x0f, 0x0f, 0x0f, 0x0f, 0x0f, 0x0f,
-0x0f, 0x0f, 0x0f, 0x0f, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0x00,
-0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
-0x00, 0x76, 0xdc, 0xd8, 0xd8, 0xd8, 0xdc, 0x76, 0x00, 0x00, 0x00, 0x00,
-0x00, 0x00, 0x78, 0xcc, 0xcc, 0xcc, 0xd8, 0xcc, 0xc6, 0xc6, 0xc6, 0xcc,
-0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0xfe, 0xc6, 0xc6, 0xc0, 0xc0, 0xc0,
-0xc0, 0xc0, 0xc0, 0xc0, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
-0xfe, 0x6c, 0x6c, 0x6c, 0x6c, 0x6c, 0x6c, 0x6c, 0x00, 0x00, 0x00, 0x00,
-0x00, 0x00, 0x00, 0xfe, 0xc6, 0x60, 0x30, 0x18, 0x30, 0x60, 0xc6, 0xfe,
-0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x7e, 0xd8, 0xd8,
-0xd8, 0xd8, 0xd8, 0x70, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
-0x66, 0x66, 0x66, 0x66, 0x66, 0x7c, 0x60, 0x60, 0xc0, 0x00, 0x00, 0x00,
-0x00, 0x00, 0x00, 0x00, 0x76, 0xdc, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18,
-0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x7e, 0x18, 0x3c, 0x66, 0x66,
-0x66, 0x3c, 0x18, 0x7e, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x38,
-0x6c, 0xc6, 0xc6, 0xfe, 0xc6, 0xc6, 0x6c, 0x38, 0x00, 0x00, 0x00, 0x00,
-0x00, 0x00, 0x38, 0x6c, 0xc6, 0xc6, 0xc6, 0x6c, 0x6c, 0x6c, 0x6c, 0xee,
-0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x1e, 0x30, 0x18, 0x0c, 0x3e, 0x66,
-0x66, 0x66, 0x66, 0x3c, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
-0x00, 0x7e, 0xdb, 0xdb, 0xdb, 0x7e, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
-0x00, 0x00, 0x00, 0x03, 0x06, 0x7e, 0xdb, 0xdb, 0xf3, 0x7e, 0x60, 0xc0,
-0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x1c, 0x30, 0x60, 0x60, 0x7c, 0x60,
-0x60, 0x60, 0x30, 0x1c, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x7c,
-0xc6, 0xc6, 0xc6, 0xc6, 0xc6, 0xc6, 0xc6, 0xc6, 0x00, 0x00, 0x00, 0x00,
-0x00, 0x00, 0x00, 0x00, 0xfe, 0x00, 0x00, 0xfe, 0x00, 0x00, 0xfe, 0x00,
-0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x18, 0x18, 0x7e, 0x18,
-0x18, 0x00, 0x00, 0xff, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x30,
-0x18, 0x0c, 0x06, 0x0c, 0x18, 0x30, 0x00, 0x7e, 0x00, 0x00, 0x00, 0x00,
-0x00, 0x00, 0x00, 0x0c, 0x18, 0x30, 0x60, 0x30, 0x18, 0x0c, 0x00, 0x7e,
-0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x0e, 0x1b, 0x1b, 0x1b, 0x18, 0x18,
-0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18,
-0x18, 0x18, 0x18, 0x18, 0xd8, 0xd8, 0xd8, 0x70, 0x00, 0x00, 0x00, 0x00,
-0x00, 0x00, 0x00, 0x00, 0x18, 0x18, 0x00, 0x7e, 0x00, 0x18, 0x18, 0x00,
-0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x76, 0xdc, 0x00,
-0x76, 0xdc, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x38, 0x6c, 0x6c,
-0x38, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
-0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x18, 0x18, 0x00, 0x00, 0x00,
-0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
-0x18, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x0f, 0x0c, 0x0c,
-0x0c, 0x0c, 0x0c, 0xec, 0x6c, 0x6c, 0x3c, 0x1c, 0x00, 0x00, 0x00, 0x00,
-0x00, 0xd8, 0x6c, 0x6c, 0x6c, 0x6c, 0x6c, 0x00, 0x00, 0x00, 0x00, 0x00,
-0x00, 0x00, 0x00, 0x00, 0x00, 0x70, 0xd8, 0x30, 0x60, 0xc8, 0xf8, 0x00,
-0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
-0x7c, 0x7c, 0x7c, 0x7c, 0x7c, 0x7c, 0x7c, 0x00, 0x00, 0x00, 0x00, 0x00,
-0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
-0x00, 0x00, 0x00, 0x00,
-};
index d5fad5fb04c1d9687fd840e85da860373a3d3b95..00e571c30bb5a9dff26e8e81d96b9ed28eefc2d6 100644 (file)
@@ -30,7 +30,7 @@
 #define PFX DRV_MODULE_NAME    ": "
 #define DRV_MODULE_VERSION     "0.2"
 
-MODULE_AUTHOR("David S. Miller (davem@davemloft.net)");
+MODULE_AUTHOR("David S. Miller <davem@davemloft.net>");
 MODULE_DESCRIPTION("UltraSPARC-III memory controller driver");
 MODULE_LICENSE("GPL");
 MODULE_VERSION(DRV_MODULE_VERSION);
index 4a5bdb0df7797be4d89849b8c801fad89213b75a..ffdc15588ac2e229be2ae5a3a5da7fcbebdd3a9c 100644 (file)
@@ -33,7 +33,7 @@
 
 static char version[] =
        DRV_MODULE_NAME ".c:v" DRV_MODULE_VERSION " (" DRV_MODULE_RELDATE ")\n";
-MODULE_AUTHOR("David S. Miller (davem@davemloft.net)");
+MODULE_AUTHOR("David S. Miller <davem@davemloft.net>");
 MODULE_DESCRIPTION("Sun LDOM domain services driver");
 MODULE_LICENSE("GPL");
 MODULE_VERSION(DRV_MODULE_VERSION);
index e8452be5123be3c9c2c10d516161dcf5fb623c4e..8605dd710f3c79dac9499acdb6884590a252190d 100644 (file)
@@ -268,11 +268,11 @@ int sparc_floppy_request_irq(unsigned int irq, irq_handler_t irq_handler)
        if (sparc_cpu_model != sparc_leon) {
                struct tt_entry *trap_table;
 
-               trap_table = &trapbase_cpu1;
+               trap_table = &trapbase_cpu1[0];
                INSTANTIATE(trap_table)
-               trap_table = &trapbase_cpu2;
+               trap_table = &trapbase_cpu2[0];
                INSTANTIATE(trap_table)
-               trap_table = &trapbase_cpu3;
+               trap_table = &trapbase_cpu3[0];
                INSTANTIATE(trap_table)
        }
 #endif
index 72da2e10e2559ab8ea1dc19c63af917b6ca2a492..5280e325d4d6f1081dc6f5f7748fcaeaf71cb11c 100644 (file)
@@ -980,7 +980,7 @@ void notrace init_irqwork_curcpu(void)
  *
  * On SMP this gets invoked from the CPU trampoline before
  * the cpu has fully taken over the trap table from OBP,
- * and it's kernel stack + %g6 thread register state is
+ * and its kernel stack + %g6 thread register state is
  * not fully cooked yet.
  *
  * Therefore you cannot make any OBP calls, not even prom_printf,
index 15da3c0597a564af026ccc93db42f8a80339c74d..a8fb7c0bf053a0af9affd33ce4ee24b2a17d4acc 100644 (file)
@@ -138,10 +138,10 @@ extern unsigned int t_nmi[];
 extern unsigned int linux_trap_ipi15_sun4d[];
 extern unsigned int linux_trap_ipi15_sun4m[];
 
-extern struct tt_entry trapbase;
-extern struct tt_entry trapbase_cpu1;
-extern struct tt_entry trapbase_cpu2;
-extern struct tt_entry trapbase_cpu3;
+extern struct tt_entry trapbase[];
+extern struct tt_entry trapbase_cpu1[];
+extern struct tt_entry trapbase_cpu2[];
+extern struct tt_entry trapbase_cpu3[];
 
 extern char cputypval[];
 
index 58ad3f7de1fb531061e93bdb7ab346181b2fbffb..3b2c673ec627da72cd7ae64ab1f72d44c636c3b9 100644 (file)
@@ -37,7 +37,7 @@ void pt_regs_to_gdb_regs(unsigned long *gdb_regs, struct pt_regs *regs)
        gdb_regs[GDB_Y] = regs->y;
        gdb_regs[GDB_PSR] = regs->psr;
        gdb_regs[GDB_WIM] = 0;
-       gdb_regs[GDB_TBR] = (unsigned long) &trapbase;
+       gdb_regs[GDB_TBR] = (unsigned long) &trapbase[0];
        gdb_regs[GDB_PC] = regs->pc;
        gdb_regs[GDB_NPC] = regs->npc;
        gdb_regs[GDB_FSR] = 0;
@@ -72,7 +72,7 @@ void sleeping_thread_to_gdb_regs(unsigned long *gdb_regs, struct task_struct *p)
 
        gdb_regs[GDB_PSR] = t->kpsr;
        gdb_regs[GDB_WIM] = t->kwim;
-       gdb_regs[GDB_TBR] = (unsigned long) &trapbase;
+       gdb_regs[GDB_TBR] = (unsigned long) &trapbase[0];
        gdb_regs[GDB_PC] = t->kpc;
        gdb_regs[GDB_NPC] = t->kpc + 4;
        gdb_regs[GDB_FSR] = 0;
index 535c7b35cb59902e3341989afac750eb497cdd2d..191bbaca9921249d70479d32ee25ef168b11a25a 100644 (file)
@@ -230,7 +230,7 @@ static unsigned long __kprobes relbranch_fixup(u32 insn, struct kprobe *p,
        return regs->tnpc;
 }
 
-/* If INSN is an instruction which writes it's PC location
+/* If INSN is an instruction which writes its PC location
  * into a destination register, fix that up.
  */
 static void __kprobes retpc_fixup(struct pt_regs *regs, u32 insn,
index c0fa3ef6cf016bf155539b71d6e69d0726fb4d1c..7f3cdb6f644d25f479c007a8449e482883c25e7d 100644 (file)
@@ -1854,7 +1854,7 @@ static int read_nonraw(struct ldc_channel *lp, void *buf, unsigned int size)
                         * This seems the best behavior because this allows
                         * a user of the LDC layer to start with a small
                         * RX buffer for ldc_read() calls and use -EMSGSIZE
-                        * as a cue to enlarge it's read buffer.
+                        * as a cue to enlarge its read buffer.
                         */
                        err = -EMSGSIZE;
                        break;
index 8700a0e3b0df7d9c8ba29f763fe76bd3fbbc3f62..b2b639bee068485dba8ef2ac5daee9ad086f3385 100644 (file)
@@ -697,7 +697,7 @@ err1:
        return err;
 }
 
-static const struct of_device_id grpci1_of_match[] __initconst = {
+static const struct of_device_id grpci1_of_match[] = {
        {
         .name = "GAISLER_PCIFBRG",
         },
index 60b6bdf7761fb3e9707d0c21fdd5e4347fbed577..9f662340b5b2115ad047321a0c115b5e0c4852a7 100644 (file)
@@ -586,7 +586,7 @@ static void grpci2_hw_init(struct grpci2_priv *priv)
        REGSTORE(regs->io_map, REGLOAD(regs->io_map) & 0x0000ffff);
 
        /* set 1:1 mapping between AHB -> PCI memory space, for all Masters
-        * Each AHB master has it's own mapping registers. Max 16 AHB masters.
+        * Each AHB master has its own mapping registers. Max 16 AHB masters.
         */
        for (i = 0; i < 16; i++)
                REGSTORE(regs->ahbmst_map[i], priv->pci_area);
@@ -889,7 +889,7 @@ err1:
        return err;
 }
 
-static const struct of_device_id grpci2_of_match[] __initconst = {
+static const struct of_device_id grpci2_of_match[] = {
        {
         .name = "GAISLER_GRPCI2",
         },
index 991e9ad3d3e8f953894ca1015306b7d8e7e6688e..1ee393abc463e7970a2ce9d447e40080cade7dac 100644 (file)
@@ -245,13 +245,13 @@ void __init leon_smp_done(void)
 
        /* Free unneeded trap tables */
        if (!cpu_present(1)) {
-               free_reserved_page(virt_to_page(&trapbase_cpu1));
+               free_reserved_page(virt_to_page(&trapbase_cpu1[0]));
        }
        if (!cpu_present(2)) {
-               free_reserved_page(virt_to_page(&trapbase_cpu2));
+               free_reserved_page(virt_to_page(&trapbase_cpu2[0]));
        }
        if (!cpu_present(3)) {
-               free_reserved_page(virt_to_page(&trapbase_cpu3));
+               free_reserved_page(virt_to_page(&trapbase_cpu3[0]));
        }
        /* Ok, they are spinning and ready to go. */
        smp_processors_ready = 1;
index 17cdfdbf1f3b78928e4bc8cea63a1d0ed2898df9..149adc09475306d5eb2f891f66860c18ed3f7608 100644 (file)
@@ -279,7 +279,7 @@ static int __init setup_nmi_watchdog(char *str)
        if (!strncmp(str, "panic", 5))
                panic_on_timeout = 1;
 
-       return 0;
+       return 1;
 }
 __setup("nmi_watchdog=", setup_nmi_watchdog);
 
index d3842821a5a050290fe02314e04158616af0d3ee..c350c58c7f699972d1d33ee4c14e06e99f64ad05 100644 (file)
@@ -560,7 +560,7 @@ static unsigned int __init build_one_device_irq(struct platform_device *op,
         *
         * If we hit a bus type or situation we cannot handle, we
         * stop and assume that the original IRQ number was in a
-        * format which has special meaning to it's immediate parent.
+        * format which has special meaning to its immediate parent.
         */
        pp = dp->parent;
        ip = NULL;
index f66005ce4cb56a60408d9229d1967cbe39dda8aa..50a0927a84a6fdbdf91b6c997341c7574a6bd175 100644 (file)
@@ -311,7 +311,7 @@ static struct pci_dev *of_create_pci_dev(struct pci_pbm_info *pbm,
        /* We can't actually use the firmware value, we have
         * to read what is in the register right now.  One
         * reason is that in the case of IDE interfaces the
-        * firmware can sample the value before the the IDE
+        * firmware can sample the value before the IDE
         * interface is programmed into native mode.
         */
        pci_read_config_dword(dev, PCI_CLASS_REVISION, &class);
index f31761f517575d7efa69d344cef61b0a816a4e02..83718876f1d49bc9dd4123789a9671f0d254d00e 100644 (file)
@@ -19,9 +19,9 @@
  * each with one (Sabre) or two (PSYCHO/SCHIZO) PCI bus modules
  * underneath.  Each PCI bus module uses an IOMMU (shared by both
  * PBMs of a controller, or per-PBM), and if a streaming buffer
- * is present, each PCI bus module has it's own. (ie. the IOMMU
+ * is present, each PCI bus module has its own. (ie. the IOMMU
  * might be shared between PBMs, the STC is never shared)
- * Furthermore, each PCI bus module controls it's own autonomous
+ * Furthermore, each PCI bus module controls its own autonomous
  * PCI bus.
  */
 
index 5d8dd49495863dc64d03809373e32887102f7baa..93cd9e5a80997619ed23df7a2d8f2f1844b6f968 100644 (file)
@@ -145,7 +145,7 @@ static void __schizo_check_stc_error_pbm(struct pci_pbm_info *pbm,
 
        /* This is __REALLY__ dangerous.  When we put the
         * streaming buffer into diagnostic mode to probe
-        * it's tags and error status, we _must_ clear all
+        * its tags and error status, we _must_ clear all
         * of the line tag valid bits before re-enabling
         * the streaming buffer.  If any dirty data lives
         * in the STC when we do this, we will end up
@@ -275,7 +275,7 @@ static void schizo_check_iommu_error_pbm(struct pci_pbm_info *pbm,
                       pbm->name, type_string);
 
                /* Put the IOMMU into diagnostic mode and probe
-                * it's TLB for entries with error status.
+                * its TLB for entries with error status.
                 *
                 * It is very possible for another DVMA to occur
                 * while we do this probe, and corrupt the system
index a58ae9c428032be203f5f74de8c8729447f52e2a..f02a283a8e8f052ed6400291db0d085a2c0071f3 100644 (file)
@@ -979,7 +979,7 @@ out:
 
 static void sparc_pmu_start(struct perf_event *event, int flags);
 
-/* On this PMU each PIC has it's own PCR control register.  */
+/* On this PMU each PIC has its own PCR control register.  */
 static void calculate_multiple_pcrs(struct cpu_hw_events *cpuc)
 {
        int i;
index 426bd08cb2ab1053111a4ef33ca24b092311285a..5752bfd73ac0c4a825bbc4fe016ce4276c143717 100644 (file)
@@ -394,7 +394,7 @@ static unsigned int schizo_irq_build(struct device_node *dp,
        iclr = schizo_ino_to_iclr(pbm_regs, ino);
 
        /* On Schizo, no inofixup occurs.  This is because each
-        * INO has it's own IMAP register.  On Psycho and Sabre
+        * INO has its own IMAP register.  On Psycho and Sabre
         * there is only one IMAP register for each PCI slot even
         * though four different INOs can be generated by each
         * PCI slot.
index 5ee74b4c0cf40e3e920ddcb0834b2ca100592bbe..4557ef18f3712261b74204f74a20a7738670a706 100644 (file)
@@ -50,7 +50,7 @@ static void psycho_check_stc_error(struct pci_pbm_info *pbm)
        spin_lock(&stc_buf_lock);
 
        /* This is __REALLY__ dangerous.  When we put the streaming
-        * buffer into diagnostic mode to probe it's tags and error
+        * buffer into diagnostic mode to probe its tags and error
         * status, we _must_ clear all of the line tag valid bits
         * before re-enabling the streaming buffer.  If any dirty data
         * lives in the STC when we do this, we will end up
index e3b72a7b46d37975ec65b6c200d3a68b7ca19def..704375c061e730d436baa57bcac3980b089b7129 100644 (file)
@@ -67,7 +67,7 @@ static void prom_sync_me(void)
        __asm__ __volatile__("wr %0, 0x0, %%tbr\n\t"
                             "nop\n\t"
                             "nop\n\t"
-                            "nop\n\t" : : "r" (&trapbase));
+                            "nop\n\t" : : "r" (&trapbase[0]));
 
        prom_printf("PROM SYNC COMMAND...\n");
        show_mem();
@@ -285,7 +285,7 @@ void __init setup_arch(char **cmdline_p)
        int i;
        unsigned long highest_paddr;
 
-       sparc_ttable = &trapbase;
+       sparc_ttable = &trapbase[0];
 
        /* Initialize PROM console and command line. */
        *cmdline_p = prom_getbootargs();
index 89b93c7136e708581a916cf96063faafb9349e3e..478014d2e59ba95d2a392a105bcfc91a46b5f4ab 100644 (file)
@@ -473,7 +473,7 @@ static void do_signal(struct pt_regs *regs, unsigned long orig_i0)
         *
         * %g7 is used as the "thread register".   %g6 is not used in
         * any fixed manner.  %g6 is used as a scratch register and
-        * a compiler temporary, but it's value is never used across
+        * a compiler temporary, but its value is never used across
         * a system call.  Therefore %g6 is usable for orig_i0 storage.
         */
        if (pt_regs_is_syscall(regs) && (regs->psr & PSR_C))
index b4e410976e0d5c1c8414ff3334a7ec84cc745f22..2d64566a1f88a8ec652ba6e4c3fe12986324cf6a 100644 (file)
@@ -494,7 +494,7 @@ static void do_signal(struct pt_regs *regs, unsigned long orig_i0)
         *
         * %g7 is used as the "thread register".   %g6 is not used in
         * any fixed manner.  %g6 is used as a scratch register and
-        * a compiler temporary, but it's value is never used across
+        * a compiler temporary, but its value is never used across
         * a system call.  Therefore %g6 is usable for orig_i0 storage.
         */
        if (pt_regs_is_syscall(regs) &&
index b78df3a15a72ed48e89bf39eff6aa77c19b60ef2..846a55f942d4e755b0df2ba6898e1e05024ff2bd 100644 (file)
@@ -149,7 +149,7 @@ static struct attribute *vio_dev_attrs[] = {
  };
 ATTRIBUTE_GROUPS(vio_dev);
 
-static struct bus_type vio_bus_type = {
+static const struct bus_type vio_bus_type = {
        .name           = "vio",
        .dev_groups     = vio_dev_groups,
        .uevent         = vio_hotplug,
index 59669ebddd4e1c98364a5b3efe9dec341752e641..ee5091dd67ed7724936f78c64a16c2943c8a16f8 100644 (file)
@@ -14,7 +14,7 @@ lib-$(CONFIG_SPARC32) += divdi3.o udivdi3.o
 lib-$(CONFIG_SPARC32) += copy_user.o locks.o
 lib-$(CONFIG_SPARC64) += atomic_64.o
 lib-$(CONFIG_SPARC32) += lshrdi3.o ashldi3.o
-lib-$(CONFIG_SPARC32) += muldi3.o bitext.o cmpdi2.o
+lib-$(CONFIG_SPARC32) += muldi3.o bitext.o
 lib-$(CONFIG_SPARC64) += multi3.o
 lib-$(CONFIG_SPARC64) += fls.o
 lib-$(CONFIG_SPARC64) += fls64.o
@@ -51,5 +51,5 @@ lib-$(CONFIG_SPARC64) += copy_in_user.o memmove.o
 lib-$(CONFIG_SPARC64) += mcount.o ipcsum.o xor.o hweight.o ffs.o
 
 obj-$(CONFIG_SPARC64) += iomap.o
-obj-$(CONFIG_SPARC32) += atomic32.o ucmpdi2.o
+obj-$(CONFIG_SPARC32) += atomic32.o
 obj-$(CONFIG_SPARC64) += PeeCeeI.o
diff --git a/arch/sparc/lib/cmpdi2.c b/arch/sparc/lib/cmpdi2.c
deleted file mode 100644 (file)
index 333367f..0000000
+++ /dev/null
@@ -1,28 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0
-#include <linux/module.h>
-
-#include "libgcc.h"
-
-word_type __cmpdi2(long long a, long long b)
-{
-       const DWunion au = {
-               .ll = a
-       };
-       const DWunion bu = {
-               .ll = b
-       };
-
-       if (au.s.high < bu.s.high)
-               return 0;
-       else if (au.s.high > bu.s.high)
-               return 2;
-
-       if ((unsigned int) au.s.low < (unsigned int) bu.s.low)
-               return 0;
-       else if ((unsigned int) au.s.low > (unsigned int) bu.s.low)
-               return 2;
-
-       return 1;
-}
-
-EXPORT_SYMBOL(__cmpdi2);
diff --git a/arch/sparc/lib/ucmpdi2.c b/arch/sparc/lib/ucmpdi2.c
deleted file mode 100644 (file)
index 82c1ccc..0000000
+++ /dev/null
@@ -1,20 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0
-#include <linux/module.h>
-#include "libgcc.h"
-
-word_type __ucmpdi2(unsigned long long a, unsigned long long b)
-{
-       const DWunion au = {.ll = a};
-       const DWunion bu = {.ll = b};
-
-       if ((unsigned int) au.s.high < (unsigned int) bu.s.high)
-               return 0;
-       else if ((unsigned int) au.s.high > (unsigned int) bu.s.high)
-               return 2;
-       if ((unsigned int) au.s.low < (unsigned int) bu.s.low)
-               return 0;
-       else if ((unsigned int) au.s.low > (unsigned int) bu.s.low)
-               return 2;
-       return 1;
-}
-EXPORT_SYMBOL(__ucmpdi2);
index f83017992eaaeb79e757adebf7cb2923e4859b0b..1ca9054d9b97157f20df0828097ad37c5789c4bb 100644 (file)
@@ -1665,14 +1665,14 @@ bool kern_addr_valid(unsigned long addr)
        if (pud_none(*pud))
                return false;
 
-       if (pud_large(*pud))
+       if (pud_leaf(*pud))
                return pfn_valid(pud_pfn(*pud));
 
        pmd = pmd_offset(pud, addr);
        if (pmd_none(*pmd))
                return false;
 
-       if (pmd_large(*pmd))
+       if (pmd_leaf(*pmd))
                return pfn_valid(pmd_pfn(*pmd));
 
        pte = pte_offset_kernel(pmd, addr);
@@ -2968,7 +2968,7 @@ void update_mmu_cache_pmd(struct vm_area_struct *vma, unsigned long addr,
        struct mm_struct *mm;
        pmd_t entry = *pmd;
 
-       if (!pmd_large(entry) || !pmd_young(entry))
+       if (!pmd_leaf(entry) || !pmd_young(entry))
                return;
 
        pte = pmd_val(entry);
index 8393faa3e596e4d29a3a0aaee4d435fd5c1ab755..852085ada3689d9e09f948c01f6d4e3d76d6340c 100644 (file)
@@ -1513,7 +1513,7 @@ static void __init init_viking(void)
 
                /*
                 * We need this to make sure old viking takes no hits
-                * on it's cache for dma snoops to workaround the
+                * on its cache for dma snoops to workaround the
                 * "load from non-cacheable memory" interrupt bug.
                 * This is only necessary because of the new way in
                 * which we use the IOMMU.
index 6acd8a4c1e2aff00417a5ad97b847b89be2cfd2b..5fe52a64c7e70dff770a5b7046d4b2f9e61d101e 100644 (file)
@@ -385,7 +385,7 @@ static unsigned long tsb_size_to_rss_limit(unsigned long new_size)
  * will not trigger any longer.
  *
  * The TSB can be anywhere from 8K to 1MB in size, in increasing powers
- * of two.  The TSB must be aligned to it's size, so f.e. a 512K TSB
+ * of two.  The TSB must be aligned to its size, so f.e. a 512K TSB
  * must be 512K aligned.  It also must be physically contiguous, so we
  * cannot use vmalloc().
  *
index a74e5004c6c89c8267b0014cf78a72ae66ee4f89..da2df1e84ed413e7cf92172aafca92d2bfed6229 100644 (file)
@@ -300,7 +300,7 @@ do {        *prog++ = BR_OPC | WDISP22(OFF);                \
  *
  * The most common case is to emit a branch at the end of such
  * a code sequence.  So this would be two instructions, the
- * branch and it's delay slot.
+ * branch and its delay slot.
  *
  * Therefore by default the branch emitters calculate the branch
  * offset field as:
@@ -309,13 +309,13 @@ do {      *prog++ = BR_OPC | WDISP22(OFF);                \
  *
  * This "addrs[i] - 8" is the address of the branch itself or
  * what "." would be in assembler notation.  The "8" part is
- * how we take into consideration the branch and it's delay
+ * how we take into consideration the branch and its delay
  * slot mentioned above.
  *
  * Sometimes we need to emit a branch earlier in the code
  * sequence.  And in these situations we adjust "destination"
  * to accommodate this difference.  For example, if we needed
- * to emit a branch (and it's delay slot) right before the
+ * to emit a branch (and its delay slot) right before the
  * final instruction emitted for a BPF opcode, we'd use
  * "destination + 4" instead of just plain "destination" above.
  *
index 136c78f28f8ba2c99a9cf066c9cfebf3a85aa77b..1bbf4335de4540598528240e6106896b8d6d5c4a 100644 (file)
@@ -449,9 +449,8 @@ static __init int vdso_setup(char *s)
        unsigned long val;
 
        err = kstrtoul(s, 10, &val);
-       if (err)
-               return err;
-       vdso_enabled = val;
-       return 0;
+       if (!err)
+               vdso_enabled = val;
+       return 1;
 }
 __setup("vdso=", vdso_setup);
index 484141b06938ffcd222a7c40055acd07155b04a1..4954188a6a0908363654d541f6a2faeb36bbe651 100644 (file)
@@ -16,16 +16,16 @@ void uml_dtb_init(void)
        void *area;
 
        area = uml_load_file(dtb, &size);
-       if (!area)
-               return;
-
-       if (!early_init_dt_scan(area)) {
-               pr_err("invalid DTB %s\n", dtb);
-               memblock_free(area, size);
-               return;
+       if (area) {
+               if (!early_init_dt_scan(area)) {
+                       pr_err("invalid DTB %s\n", dtb);
+                       memblock_free(area, size);
+                       return;
+               }
+
+               early_init_fdt_scan_reserved_mem();
        }
 
-       early_init_fdt_scan_reserved_mem();
        unflatten_device_tree();
 }
 
index 78050d5d7fac91901c2e67ff9130485ffced461e..7aed87cbf386257237ca55d98ff61eb22b0b7ba2 100644 (file)
@@ -245,7 +245,6 @@ config X86
        select HAVE_FUNCTION_ERROR_INJECTION
        select HAVE_KRETPROBES
        select HAVE_RETHOOK
-       select HAVE_KVM
        select HAVE_LIVEPATCH                   if X86_64
        select HAVE_MIXED_BREAKPOINTS_REGS
        select HAVE_MOD_ARCH_SPECIFIC
@@ -2105,7 +2104,7 @@ config ARCH_SUPPORTS_CRASH_HOTPLUG
        def_bool y
 
 config ARCH_HAS_GENERIC_CRASHKERNEL_RESERVATION
-       def_bool CRASH_CORE
+       def_bool CRASH_RESERVE
 
 config PHYSICAL_START
        hex "Physical address where the kernel is loaded" if (EXPERT || CRASH_DUMP)
index 7b5a5615200ab805e45cd1ad0ab2ab082c725c03..662d9d4033e6b855c327615fa20765f3b374f644 100644 (file)
@@ -221,12 +221,6 @@ endif
 
 KBUILD_LDFLAGS += -m elf_$(UTS_MACHINE)
 
-ifdef CONFIG_LTO_CLANG
-ifeq ($(call test-lt, $(CONFIG_LLD_VERSION), 130000),y)
-KBUILD_LDFLAGS += -plugin-opt=-stack-alignment=$(if $(CONFIG_X86_32),4,8)
-endif
-endif
-
 ifdef CONFIG_X86_NEED_RELOCS
 LDFLAGS_vmlinux := --emit-relocs --discard-none
 else
index 909f2a35b60c5da423868732ee9b2c3aa2e45ccd..dfb9c2deb77cfc4e9986976bf2fd1652666f8f15 100644 (file)
@@ -284,7 +284,7 @@ static int set_clr_page_flags(struct x86_mapping_info *info,
        pudp = pud_offset(p4dp, address);
        pmdp = pmd_offset(pudp, address);
 
-       if (pmd_large(*pmdp))
+       if (pmd_leaf(*pmdp))
                ptep = split_large_pmd(info, pmdp, address);
        else
                ptep = pte_offset_kernel(pmdp, address);
index 69a3b02e50bb0cfbe2688769ad3a5ea72fd8e11f..aec16e581f5b2aad520bcdfbab7bff61ca99c072 100644 (file)
@@ -604,7 +604,6 @@ static void amd_pmu_cpu_dead(int cpu)
 
        kfree(cpuhw->lbr_sel);
        cpuhw->lbr_sel = NULL;
-       amd_pmu_cpu_reset(cpu);
 
        if (!x86_pmu.amd_nb_constraints)
                return;
index eb31f850841a89ae68cebe41707eccec0a965a8a..4a1e600314d5df124403636915a0f5fcf64ec475 100644 (file)
@@ -173,9 +173,11 @@ void amd_pmu_lbr_read(void)
 
                /*
                 * Check if a branch has been logged; if valid = 0, spec = 0
-                * then no branch was recorded
+                * then no branch was recorded; if reserved = 1 then an
+                * erroneous branch was recorded (see Erratum 1452)
                 */
-               if (!entry.to.split.valid && !entry.to.split.spec)
+               if ((!entry.to.split.valid && !entry.to.split.spec) ||
+                   entry.to.split.reserved)
                        continue;
 
                perf_clear_branch_entry_bitfields(br + out);
similarity index 92%
rename from arch/x86/include/asm/crash_core.h
rename to arch/x86/include/asm/crash_reserve.h
index 76af98f4e80126b77fdc25740ca71b723aadc419..152239f95541953ee9c7afb65d10f6e16e090e72 100644 (file)
@@ -1,6 +1,6 @@
 /* SPDX-License-Identifier: GPL-2.0 */
-#ifndef _X86_CRASH_CORE_H
-#define _X86_CRASH_CORE_H
+#ifndef _X86_CRASH_RESERVE_H
+#define _X86_CRASH_RESERVE_H
 
 /* 16M alignment for crash kernel regions */
 #define CRASH_ALIGN             SZ_16M
@@ -39,4 +39,4 @@ static inline unsigned long crash_low_size_default(void)
 #endif
 }
 
-#endif /* _X86_CRASH_CORE_H */
+#endif /* _X86_CRASH_RESERVE_H */
index 66837b8c67f1a9f794f9b65008bace6278f1e3d3..fbc7722b87d1fd40f244d697fde2692a464df69f 100644 (file)
@@ -15,7 +15,7 @@ typedef struct {
        unsigned int irq_spurious_count;
        unsigned int icr_read_retry_count;
 #endif
-#ifdef CONFIG_HAVE_KVM
+#if IS_ENABLED(CONFIG_KVM)
        unsigned int kvm_posted_intr_ipis;
        unsigned int kvm_posted_intr_wakeup_ipis;
        unsigned int kvm_posted_intr_nested_ipis;
index 47d4c04d103df4eb824fef8297f0b77c39dee1c1..749c7411d2f1de33ba44564b37c900a4e6dd936b 100644 (file)
@@ -741,7 +741,7 @@ DECLARE_IDTENTRY_SYSVEC(IRQ_WORK_VECTOR,            sysvec_irq_work);
 # endif
 #endif
 
-#ifdef CONFIG_HAVE_KVM
+#if IS_ENABLED(CONFIG_KVM)
 DECLARE_IDTENTRY_SYSVEC(POSTED_INTR_VECTOR,            sysvec_kvm_posted_intr_ipi);
 DECLARE_IDTENTRY_SYSVEC(POSTED_INTR_WAKEUP_VECTOR,     sysvec_kvm_posted_intr_wakeup_ipi);
 DECLARE_IDTENTRY_SYSVEC(POSTED_INTR_NESTED_VECTOR,     sysvec_kvm_posted_intr_nested_ipi);
index 836c170d308755fe205b802e34b51c7ffd592017..194dfff84cb11e53b4bf65fb1ea26f9dbc216549 100644 (file)
@@ -29,7 +29,7 @@ struct irq_desc;
 
 extern void fixup_irqs(void);
 
-#ifdef CONFIG_HAVE_KVM
+#if IS_ENABLED(CONFIG_KVM)
 extern void kvm_set_posted_intr_wakeup_handler(void (*handler)(void));
 #endif
 
index 3a19904c2db6935fda03c0a7c9eeaa47e62f823c..d18bfb238f660fcccdfdf444a60720f03163e4d5 100644 (file)
 #define HYPERVISOR_CALLBACK_VECTOR     0xf3
 
 /* Vector for KVM to deliver posted interrupt IPI */
-#ifdef CONFIG_HAVE_KVM
 #define POSTED_INTR_VECTOR             0xf2
 #define POSTED_INTR_WAKEUP_VECTOR      0xf1
 #define POSTED_INTR_NESTED_VECTOR      0xf0
-#endif
 
 #define MANAGED_IRQ_SHUTDOWN_VECTOR    0xef
 
index ab24ce2079889b8687651a22b48bcd64addde031..110d7f29ca9a20329ef3346588f06efbf3fbd113 100644 (file)
@@ -103,7 +103,6 @@ KVM_X86_OP(write_tsc_multiplier)
 KVM_X86_OP(get_exit_info)
 KVM_X86_OP(check_intercept)
 KVM_X86_OP(handle_exit_irqoff)
-KVM_X86_OP(request_immediate_exit)
 KVM_X86_OP(sched_in)
 KVM_X86_OP_OPTIONAL(update_cpu_dirty_logging)
 KVM_X86_OP_OPTIONAL(vcpu_blocking)
index 058bc636356a1133ad151457d8bf0b56528e7f39..f852b13aeefea7a15f811c62a7035d32527b8740 100644 (file)
@@ -12,11 +12,9 @@ BUILD_BUG_ON(1)
  * a NULL definition, for example if "static_call_cond()" will be used
  * at the call sites.
  */
-KVM_X86_PMU_OP(hw_event_available)
-KVM_X86_PMU_OP(pmc_idx_to_pmc)
 KVM_X86_PMU_OP(rdpmc_ecx_to_pmc)
 KVM_X86_PMU_OP(msr_idx_to_pmc)
-KVM_X86_PMU_OP(is_valid_rdpmc_ecx)
+KVM_X86_PMU_OP_OPTIONAL(check_rdpmc_early)
 KVM_X86_PMU_OP(is_valid_msr)
 KVM_X86_PMU_OP(get_msr)
 KVM_X86_PMU_OP(set_msr)
index 18cbde14cf81091b880f018548800515c9a20a7c..16e07a2eee195d48e29536dc69c5d6e975c99d7f 100644 (file)
@@ -536,6 +536,7 @@ struct kvm_pmc {
 #define KVM_PMC_MAX_FIXED      3
 #define MSR_ARCH_PERFMON_FIXED_CTR_MAX (MSR_ARCH_PERFMON_FIXED_CTR0 + KVM_PMC_MAX_FIXED - 1)
 #define KVM_AMD_PMC_MAX_GENERIC        6
+
 struct kvm_pmu {
        u8 version;
        unsigned nr_arch_gp_counters;
@@ -1468,6 +1469,15 @@ struct kvm_arch {
         */
        bool shadow_root_allocated;
 
+#ifdef CONFIG_KVM_EXTERNAL_WRITE_TRACKING
+       /*
+        * If set, the VM has (or had) an external write tracking user, and
+        * thus all write tracking metadata has been allocated, even if KVM
+        * itself isn't using write tracking.
+        */
+       bool external_write_tracking_enabled;
+#endif
+
 #if IS_ENABLED(CONFIG_HYPERV)
        hpa_t   hv_root_tdp;
        spinlock_t hv_root_tdp_lock;
@@ -1665,7 +1675,8 @@ struct kvm_x86_ops {
        void (*flush_tlb_guest)(struct kvm_vcpu *vcpu);
 
        int (*vcpu_pre_run)(struct kvm_vcpu *vcpu);
-       enum exit_fastpath_completion (*vcpu_run)(struct kvm_vcpu *vcpu);
+       enum exit_fastpath_completion (*vcpu_run)(struct kvm_vcpu *vcpu,
+                                                 bool force_immediate_exit);
        int (*handle_exit)(struct kvm_vcpu *vcpu,
                enum exit_fastpath_completion exit_fastpath);
        int (*skip_emulated_instruction)(struct kvm_vcpu *vcpu);
@@ -1733,8 +1744,6 @@ struct kvm_x86_ops {
                               struct x86_exception *exception);
        void (*handle_exit_irqoff)(struct kvm_vcpu *vcpu);
 
-       void (*request_immediate_exit)(struct kvm_vcpu *vcpu);
-
        void (*sched_in)(struct kvm_vcpu *vcpu, int cpu);
 
        /*
@@ -1882,8 +1891,16 @@ static inline int kvm_arch_flush_remote_tlbs_range(struct kvm *kvm, gfn_t gfn,
 }
 #endif /* CONFIG_HYPERV */
 
+enum kvm_intr_type {
+       /* Values are arbitrary, but must be non-zero. */
+       KVM_HANDLING_IRQ = 1,
+       KVM_HANDLING_NMI,
+};
+
+/* Enable perf NMI and timer modes to work, and minimise false positives. */
 #define kvm_arch_pmi_in_guest(vcpu) \
-       ((vcpu) && (vcpu)->arch.handling_intr_from_guest)
+       ((vcpu) && (vcpu)->arch.handling_intr_from_guest && \
+        (!!in_nmi() == ((vcpu)->arch.handling_intr_from_guest == KVM_HANDLING_NMI)))
 
 void __init kvm_mmu_x86_module_init(void);
 int kvm_mmu_vendor_module_init(void);
@@ -2048,7 +2065,7 @@ int kvm_set_cr3(struct kvm_vcpu *vcpu, unsigned long cr3);
 int kvm_set_cr4(struct kvm_vcpu *vcpu, unsigned long cr4);
 int kvm_set_cr8(struct kvm_vcpu *vcpu, unsigned long cr8);
 int kvm_set_dr(struct kvm_vcpu *vcpu, int dr, unsigned long val);
-void kvm_get_dr(struct kvm_vcpu *vcpu, int dr, unsigned long *val);
+unsigned long kvm_get_dr(struct kvm_vcpu *vcpu, int dr);
 unsigned long kvm_get_cr8(struct kvm_vcpu *vcpu);
 void kvm_lmsw(struct kvm_vcpu *vcpu, unsigned long msw);
 int kvm_emulate_xsetbv(struct kvm_vcpu *vcpu);
@@ -2241,7 +2258,6 @@ extern bool kvm_find_async_pf_gfn(struct kvm_vcpu *vcpu, gfn_t gfn);
 
 int kvm_skip_emulated_instruction(struct kvm_vcpu *vcpu);
 int kvm_complete_insn_gp(struct kvm_vcpu *vcpu, int err);
-void __kvm_request_immediate_exit(struct kvm_vcpu *vcpu);
 
 void __user *__x86_set_memory_region(struct kvm *kvm, int id, gpa_t gpa,
                                     u32 size);
index 0da5c227f490c07bcff20c877ba4ea6b77129c91..ce4677b8b7356c276c2b5e3b4b08474924fb90db 100644 (file)
@@ -75,7 +75,7 @@ typedef struct {
                .lock = __MUTEX_INITIALIZER(mm.context.lock),           \
        }
 
-void leave_mm(int cpu);
+void leave_mm(void);
 #define leave_mm leave_mm
 
 #endif /* _ASM_X86_MMU_H */
index df0f7d4a96f3284eb4ac9c0495b5a4ec8df27aa3..315535ffb2582c17edd8b1c41fdff17291e14704 100644 (file)
@@ -31,7 +31,8 @@ struct seq_file;
 void ptdump_walk_pgd_level(struct seq_file *m, struct mm_struct *mm);
 void ptdump_walk_pgd_level_debugfs(struct seq_file *m, struct mm_struct *mm,
                                   bool user);
-void ptdump_walk_pgd_level_checkwx(void);
+bool ptdump_walk_pgd_level_checkwx(void);
+#define ptdump_check_wx ptdump_walk_pgd_level_checkwx
 void ptdump_walk_user_pgd_level_checkwx(void);
 
 /*
@@ -41,10 +42,8 @@ void ptdump_walk_user_pgd_level_checkwx(void);
 #define pgprot_decrypted(prot) __pgprot(cc_mkdec(pgprot_val(prot)))
 
 #ifdef CONFIG_DEBUG_WX
-#define debug_checkwx()                ptdump_walk_pgd_level_checkwx()
 #define debug_checkwx_user()   ptdump_walk_user_pgd_level_checkwx()
 #else
-#define debug_checkwx()                do { } while (0)
 #define debug_checkwx_user()   do { } while (0)
 #endif
 
@@ -252,8 +251,8 @@ static inline unsigned long pgd_pfn(pgd_t pgd)
        return (pgd_val(pgd) & PTE_PFN_MASK) >> PAGE_SHIFT;
 }
 
-#define p4d_leaf       p4d_large
-static inline int p4d_large(p4d_t p4d)
+#define p4d_leaf p4d_leaf
+static inline bool p4d_leaf(p4d_t p4d)
 {
        /* No 512 GiB pages yet */
        return 0;
@@ -261,14 +260,14 @@ static inline int p4d_large(p4d_t p4d)
 
 #define pte_page(pte)  pfn_to_page(pte_pfn(pte))
 
-#define pmd_leaf       pmd_large
-static inline int pmd_large(pmd_t pte)
+#define pmd_leaf pmd_leaf
+static inline bool pmd_leaf(pmd_t pte)
 {
        return pmd_flags(pte) & _PAGE_PSE;
 }
 
 #ifdef CONFIG_TRANSPARENT_HUGEPAGE
-/* NOTE: when predicate huge page, consider also pmd_devmap, or use pmd_large */
+/* NOTE: when predicate huge page, consider also pmd_devmap, or use pmd_leaf */
 static inline int pmd_trans_huge(pmd_t pmd)
 {
        return (pmd_val(pmd) & (_PAGE_PSE|_PAGE_DEVMAP)) == _PAGE_PSE;
@@ -956,13 +955,13 @@ static inline int pte_same(pte_t a, pte_t b)
        return a.pte == b.pte;
 }
 
-static inline pte_t pte_next_pfn(pte_t pte)
+static inline pte_t pte_advance_pfn(pte_t pte, unsigned long nr)
 {
        if (__pte_needs_invert(pte_val(pte)))
-               return __pte(pte_val(pte) - (1UL << PFN_PTE_SHIFT));
-       return __pte(pte_val(pte) + (1UL << PFN_PTE_SHIFT));
+               return __pte(pte_val(pte) - (nr << PFN_PTE_SHIFT));
+       return __pte(pte_val(pte) + (nr << PFN_PTE_SHIFT));
 }
-#define pte_next_pfn   pte_next_pfn
+#define pte_advance_pfn        pte_advance_pfn
 
 static inline int pte_present(pte_t a)
 {
@@ -1086,8 +1085,8 @@ static inline pmd_t *pud_pgtable(pud_t pud)
  */
 #define pud_page(pud)  pfn_to_page(pud_pfn(pud))
 
-#define pud_leaf       pud_large
-static inline int pud_large(pud_t pud)
+#define pud_leaf pud_leaf
+static inline bool pud_leaf(pud_t pud)
 {
        return (pud_val(pud) & (_PAGE_PSE | _PAGE_PRESENT)) ==
                (_PAGE_PSE | _PAGE_PRESENT);
@@ -1097,12 +1096,6 @@ static inline int pud_bad(pud_t pud)
 {
        return (pud_flags(pud) & ~(_KERNPG_TABLE | _PAGE_USER)) != 0;
 }
-#else
-#define pud_leaf       pud_large
-static inline int pud_large(pud_t pud)
-{
-       return 0;
-}
 #endif /* CONFIG_PGTABLE_LEVELS > 2 */
 
 #if CONFIG_PGTABLE_LEVELS > 3
@@ -1419,8 +1412,8 @@ static inline bool pgdp_maps_userspace(void *__ptr)
        return (((ptr & ~PAGE_MASK) / sizeof(pgd_t)) < PGD_KERNEL_START);
 }
 
-#define pgd_leaf       pgd_large
-static inline int pgd_large(pgd_t pgd) { return 0; }
+#define pgd_leaf       pgd_leaf
+static inline bool pgd_leaf(pgd_t pgd) { return false; }
 
 #ifdef CONFIG_MITIGATION_PAGE_TABLE_ISOLATION
 /*
index 87a7b917d30ea9fe0998df0d11f84e9bdcc00702..728c98175b9cb8fd5a229dfdf0ac32f294a06b52 100644 (file)
@@ -358,10 +358,10 @@ struct sev_es_save_area {
        struct vmcb_seg ldtr;
        struct vmcb_seg idtr;
        struct vmcb_seg tr;
-       u64 vmpl0_ssp;
-       u64 vmpl1_ssp;
-       u64 vmpl2_ssp;
-       u64 vmpl3_ssp;
+       u64 pl0_ssp;
+       u64 pl1_ssp;
+       u64 pl2_ssp;
+       u64 pl3_ssp;
        u64 u_cet;
        u8 reserved_0xc8[2];
        u8 vmpl;
index c6a7eed039145be3964db90a6cac559e45d87040..266daf5b5b842d0b9921d5950285c648077712fc 100644 (file)
@@ -25,6 +25,7 @@
 #define VMX_FEATURE_EPT_EXECUTE_ONLY   ( 0*32+ 17) /* "ept_x_only" EPT entries can be execute only */
 #define VMX_FEATURE_EPT_AD             ( 0*32+ 18) /* EPT Accessed/Dirty bits */
 #define VMX_FEATURE_EPT_1GB            ( 0*32+ 19) /* 1GB EPT pages */
+#define VMX_FEATURE_EPT_5LEVEL         ( 0*32+ 20) /* 5-level EPT paging */
 
 /* Aggregated APIC features 24-27 */
 #define VMX_FEATURE_FLEXPRIORITY       ( 0*32+ 24) /* TPR shadow + virt APIC */
index a448d0964fc06ebd0c15cd0b550e3c2cefbf57bf..ad29984d5e398da425c0516f14b5cf538a023696 100644 (file)
@@ -7,6 +7,8 @@
  *
  */
 
+#include <linux/const.h>
+#include <linux/bits.h>
 #include <linux/types.h>
 #include <linux/ioctl.h>
 #include <linux/stddef.h>
@@ -40,7 +42,6 @@
 #define __KVM_HAVE_IRQ_LINE
 #define __KVM_HAVE_MSI
 #define __KVM_HAVE_USER_NMI
-#define __KVM_HAVE_GUEST_DEBUG
 #define __KVM_HAVE_MSIX
 #define __KVM_HAVE_MCE
 #define __KVM_HAVE_PIT_STATE2
@@ -49,7 +50,6 @@
 #define __KVM_HAVE_DEBUGREGS
 #define __KVM_HAVE_XSAVE
 #define __KVM_HAVE_XCRS
-#define __KVM_HAVE_READONLY_MEM
 
 /* Architectural interrupt line count. */
 #define KVM_NR_INTERRUPTS 256
@@ -526,9 +526,278 @@ struct kvm_pmu_event_filter {
 #define KVM_PMU_EVENT_ALLOW 0
 #define KVM_PMU_EVENT_DENY 1
 
-#define KVM_PMU_EVENT_FLAG_MASKED_EVENTS BIT(0)
+#define KVM_PMU_EVENT_FLAG_MASKED_EVENTS _BITUL(0)
 #define KVM_PMU_EVENT_FLAGS_VALID_MASK (KVM_PMU_EVENT_FLAG_MASKED_EVENTS)
 
+/* for KVM_CAP_MCE */
+struct kvm_x86_mce {
+       __u64 status;
+       __u64 addr;
+       __u64 misc;
+       __u64 mcg_status;
+       __u8 bank;
+       __u8 pad1[7];
+       __u64 pad2[3];
+};
+
+/* for KVM_CAP_XEN_HVM */
+#define KVM_XEN_HVM_CONFIG_HYPERCALL_MSR       (1 << 0)
+#define KVM_XEN_HVM_CONFIG_INTERCEPT_HCALL     (1 << 1)
+#define KVM_XEN_HVM_CONFIG_SHARED_INFO         (1 << 2)
+#define KVM_XEN_HVM_CONFIG_RUNSTATE            (1 << 3)
+#define KVM_XEN_HVM_CONFIG_EVTCHN_2LEVEL       (1 << 4)
+#define KVM_XEN_HVM_CONFIG_EVTCHN_SEND         (1 << 5)
+#define KVM_XEN_HVM_CONFIG_RUNSTATE_UPDATE_FLAG        (1 << 6)
+#define KVM_XEN_HVM_CONFIG_PVCLOCK_TSC_UNSTABLE        (1 << 7)
+#define KVM_XEN_HVM_CONFIG_SHARED_INFO_HVA     (1 << 8)
+
+struct kvm_xen_hvm_config {
+       __u32 flags;
+       __u32 msr;
+       __u64 blob_addr_32;
+       __u64 blob_addr_64;
+       __u8 blob_size_32;
+       __u8 blob_size_64;
+       __u8 pad2[30];
+};
+
+struct kvm_xen_hvm_attr {
+       __u16 type;
+       __u16 pad[3];
+       union {
+               __u8 long_mode;
+               __u8 vector;
+               __u8 runstate_update_flag;
+               union {
+                       __u64 gfn;
+#define KVM_XEN_INVALID_GFN ((__u64)-1)
+                       __u64 hva;
+               } shared_info;
+               struct {
+                       __u32 send_port;
+                       __u32 type; /* EVTCHNSTAT_ipi / EVTCHNSTAT_interdomain */
+                       __u32 flags;
+#define KVM_XEN_EVTCHN_DEASSIGN                (1 << 0)
+#define KVM_XEN_EVTCHN_UPDATE          (1 << 1)
+#define KVM_XEN_EVTCHN_RESET           (1 << 2)
+                       /*
+                        * Events sent by the guest are either looped back to
+                        * the guest itself (potentially on a different port#)
+                        * or signalled via an eventfd.
+                        */
+                       union {
+                               struct {
+                                       __u32 port;
+                                       __u32 vcpu;
+                                       __u32 priority;
+                               } port;
+                               struct {
+                                       __u32 port; /* Zero for eventfd */
+                                       __s32 fd;
+                               } eventfd;
+                               __u32 padding[4];
+                       } deliver;
+               } evtchn;
+               __u32 xen_version;
+               __u64 pad[8];
+       } u;
+};
+
+
+/* Available with KVM_CAP_XEN_HVM / KVM_XEN_HVM_CONFIG_SHARED_INFO */
+#define KVM_XEN_ATTR_TYPE_LONG_MODE            0x0
+#define KVM_XEN_ATTR_TYPE_SHARED_INFO          0x1
+#define KVM_XEN_ATTR_TYPE_UPCALL_VECTOR                0x2
+/* Available with KVM_CAP_XEN_HVM / KVM_XEN_HVM_CONFIG_EVTCHN_SEND */
+#define KVM_XEN_ATTR_TYPE_EVTCHN               0x3
+#define KVM_XEN_ATTR_TYPE_XEN_VERSION          0x4
+/* Available with KVM_CAP_XEN_HVM / KVM_XEN_HVM_CONFIG_RUNSTATE_UPDATE_FLAG */
+#define KVM_XEN_ATTR_TYPE_RUNSTATE_UPDATE_FLAG 0x5
+/* Available with KVM_CAP_XEN_HVM / KVM_XEN_HVM_CONFIG_SHARED_INFO_HVA */
+#define KVM_XEN_ATTR_TYPE_SHARED_INFO_HVA      0x6
+
+struct kvm_xen_vcpu_attr {
+       __u16 type;
+       __u16 pad[3];
+       union {
+               __u64 gpa;
+#define KVM_XEN_INVALID_GPA ((__u64)-1)
+               __u64 hva;
+               __u64 pad[8];
+               struct {
+                       __u64 state;
+                       __u64 state_entry_time;
+                       __u64 time_running;
+                       __u64 time_runnable;
+                       __u64 time_blocked;
+                       __u64 time_offline;
+               } runstate;
+               __u32 vcpu_id;
+               struct {
+                       __u32 port;
+                       __u32 priority;
+                       __u64 expires_ns;
+               } timer;
+               __u8 vector;
+       } u;
+};
+
+/* Available with KVM_CAP_XEN_HVM / KVM_XEN_HVM_CONFIG_SHARED_INFO */
+#define KVM_XEN_VCPU_ATTR_TYPE_VCPU_INFO       0x0
+#define KVM_XEN_VCPU_ATTR_TYPE_VCPU_TIME_INFO  0x1
+#define KVM_XEN_VCPU_ATTR_TYPE_RUNSTATE_ADDR   0x2
+#define KVM_XEN_VCPU_ATTR_TYPE_RUNSTATE_CURRENT        0x3
+#define KVM_XEN_VCPU_ATTR_TYPE_RUNSTATE_DATA   0x4
+#define KVM_XEN_VCPU_ATTR_TYPE_RUNSTATE_ADJUST 0x5
+/* Available with KVM_CAP_XEN_HVM / KVM_XEN_HVM_CONFIG_EVTCHN_SEND */
+#define KVM_XEN_VCPU_ATTR_TYPE_VCPU_ID         0x6
+#define KVM_XEN_VCPU_ATTR_TYPE_TIMER           0x7
+#define KVM_XEN_VCPU_ATTR_TYPE_UPCALL_VECTOR   0x8
+/* Available with KVM_CAP_XEN_HVM / KVM_XEN_HVM_CONFIG_SHARED_INFO_HVA */
+#define KVM_XEN_VCPU_ATTR_TYPE_VCPU_INFO_HVA   0x9
+
+/* Secure Encrypted Virtualization command */
+enum sev_cmd_id {
+       /* Guest initialization commands */
+       KVM_SEV_INIT = 0,
+       KVM_SEV_ES_INIT,
+       /* Guest launch commands */
+       KVM_SEV_LAUNCH_START,
+       KVM_SEV_LAUNCH_UPDATE_DATA,
+       KVM_SEV_LAUNCH_UPDATE_VMSA,
+       KVM_SEV_LAUNCH_SECRET,
+       KVM_SEV_LAUNCH_MEASURE,
+       KVM_SEV_LAUNCH_FINISH,
+       /* Guest migration commands (outgoing) */
+       KVM_SEV_SEND_START,
+       KVM_SEV_SEND_UPDATE_DATA,
+       KVM_SEV_SEND_UPDATE_VMSA,
+       KVM_SEV_SEND_FINISH,
+       /* Guest migration commands (incoming) */
+       KVM_SEV_RECEIVE_START,
+       KVM_SEV_RECEIVE_UPDATE_DATA,
+       KVM_SEV_RECEIVE_UPDATE_VMSA,
+       KVM_SEV_RECEIVE_FINISH,
+       /* Guest status and debug commands */
+       KVM_SEV_GUEST_STATUS,
+       KVM_SEV_DBG_DECRYPT,
+       KVM_SEV_DBG_ENCRYPT,
+       /* Guest certificates commands */
+       KVM_SEV_CERT_EXPORT,
+       /* Attestation report */
+       KVM_SEV_GET_ATTESTATION_REPORT,
+       /* Guest Migration Extension */
+       KVM_SEV_SEND_CANCEL,
+
+       KVM_SEV_NR_MAX,
+};
+
+struct kvm_sev_cmd {
+       __u32 id;
+       __u64 data;
+       __u32 error;
+       __u32 sev_fd;
+};
+
+struct kvm_sev_launch_start {
+       __u32 handle;
+       __u32 policy;
+       __u64 dh_uaddr;
+       __u32 dh_len;
+       __u64 session_uaddr;
+       __u32 session_len;
+};
+
+struct kvm_sev_launch_update_data {
+       __u64 uaddr;
+       __u32 len;
+};
+
+
+struct kvm_sev_launch_secret {
+       __u64 hdr_uaddr;
+       __u32 hdr_len;
+       __u64 guest_uaddr;
+       __u32 guest_len;
+       __u64 trans_uaddr;
+       __u32 trans_len;
+};
+
+struct kvm_sev_launch_measure {
+       __u64 uaddr;
+       __u32 len;
+};
+
+struct kvm_sev_guest_status {
+       __u32 handle;
+       __u32 policy;
+       __u32 state;
+};
+
+struct kvm_sev_dbg {
+       __u64 src_uaddr;
+       __u64 dst_uaddr;
+       __u32 len;
+};
+
+struct kvm_sev_attestation_report {
+       __u8 mnonce[16];
+       __u64 uaddr;
+       __u32 len;
+};
+
+struct kvm_sev_send_start {
+       __u32 policy;
+       __u64 pdh_cert_uaddr;
+       __u32 pdh_cert_len;
+       __u64 plat_certs_uaddr;
+       __u32 plat_certs_len;
+       __u64 amd_certs_uaddr;
+       __u32 amd_certs_len;
+       __u64 session_uaddr;
+       __u32 session_len;
+};
+
+struct kvm_sev_send_update_data {
+       __u64 hdr_uaddr;
+       __u32 hdr_len;
+       __u64 guest_uaddr;
+       __u32 guest_len;
+       __u64 trans_uaddr;
+       __u32 trans_len;
+};
+
+struct kvm_sev_receive_start {
+       __u32 handle;
+       __u32 policy;
+       __u64 pdh_uaddr;
+       __u32 pdh_len;
+       __u64 session_uaddr;
+       __u32 session_len;
+};
+
+struct kvm_sev_receive_update_data {
+       __u64 hdr_uaddr;
+       __u32 hdr_len;
+       __u64 guest_uaddr;
+       __u32 guest_len;
+       __u64 trans_uaddr;
+       __u32 trans_len;
+};
+
+#define KVM_X2APIC_API_USE_32BIT_IDS            (1ULL << 0)
+#define KVM_X2APIC_API_DISABLE_BROADCAST_QUIRK  (1ULL << 1)
+
+struct kvm_hyperv_eventfd {
+       __u32 conn_id;
+       __s32 fd;
+       __u32 flags;
+       __u32 padding[3];
+};
+
+#define KVM_HYPERV_CONN_ID_MASK                0x00ffffff
+#define KVM_HYPERV_EVENTFD_DEASSIGN    (1 << 0)
+
 /*
  * Masked event layout.
  * Bits   Description
@@ -549,10 +818,10 @@ struct kvm_pmu_event_filter {
        ((__u64)(!!(exclude)) << 55))
 
 #define KVM_PMU_MASKED_ENTRY_EVENT_SELECT \
-       (GENMASK_ULL(7, 0) | GENMASK_ULL(35, 32))
-#define KVM_PMU_MASKED_ENTRY_UMASK_MASK                (GENMASK_ULL(63, 56))
-#define KVM_PMU_MASKED_ENTRY_UMASK_MATCH       (GENMASK_ULL(15, 8))
-#define KVM_PMU_MASKED_ENTRY_EXCLUDE           (BIT_ULL(55))
+       (__GENMASK_ULL(7, 0) | __GENMASK_ULL(35, 32))
+#define KVM_PMU_MASKED_ENTRY_UMASK_MASK                (__GENMASK_ULL(63, 56))
+#define KVM_PMU_MASKED_ENTRY_UMASK_MATCH       (__GENMASK_ULL(15, 8))
+#define KVM_PMU_MASKED_ENTRY_EXCLUDE           (_BITULL(55))
 #define KVM_PMU_MASKED_ENTRY_UMASK_MASK_SHIFT  (56)
 
 /* for KVM_{GET,SET,HAS}_DEVICE_ATTR */
@@ -560,7 +829,7 @@ struct kvm_pmu_event_filter {
 #define   KVM_VCPU_TSC_OFFSET 0 /* attribute for the TSC offset */
 
 /* x86-specific KVM_EXIT_HYPERCALL flags. */
-#define KVM_EXIT_HYPERCALL_LONG_MODE   BIT(0)
+#define KVM_EXIT_HYPERCALL_LONG_MODE   _BITULL(0)
 
 #define KVM_X86_DEFAULT_VM     0
 #define KVM_X86_SW_PROTECTED_VM        1
index 6e64b27b2c1ee0b7ac49bcb7c20c60caf7f3314c..6bc3456a8ebf1d1a7c83498cbbef2b5bae106b41 100644 (file)
@@ -92,7 +92,7 @@ struct kvm_clock_pairing {
 #define KVM_ASYNC_PF_DELIVERY_AS_INT           (1 << 3)
 
 /* MSR_KVM_ASYNC_PF_INT */
-#define KVM_ASYNC_PF_VEC_MASK                  GENMASK(7, 0)
+#define KVM_ASYNC_PF_VEC_MASK                  __GENMASK(7, 0)
 
 /* MSR_KVM_MIGRATION_CONTROL */
 #define KVM_MIGRATION_READY            (1 << 0)
index d0c744cb2a0e2daf1d7f0f97dd6a6b06f7fe2a0c..74077694da7d9f3aa2fe68cc6bf4f348687ca45f 100644 (file)
@@ -100,11 +100,11 @@ obj-$(CONFIG_FTRACE_SYSCALLS)     += ftrace.o
 obj-$(CONFIG_X86_TSC)          += trace_clock.o
 obj-$(CONFIG_TRACING)          += trace.o
 obj-$(CONFIG_RETHOOK)          += rethook.o
-obj-$(CONFIG_CRASH_CORE)       += crash_core_$(BITS).o
+obj-$(CONFIG_VMCORE_INFO)      += vmcore_info_$(BITS).o
 obj-$(CONFIG_KEXEC_CORE)       += machine_kexec_$(BITS).o
-obj-$(CONFIG_KEXEC_CORE)       += relocate_kernel_$(BITS).o crash.o
+obj-$(CONFIG_KEXEC_CORE)       += relocate_kernel_$(BITS).o
 obj-$(CONFIG_KEXEC_FILE)       += kexec-bzimage64.o
-obj-$(CONFIG_CRASH_DUMP)       += crash_dump_$(BITS).o
+obj-$(CONFIG_CRASH_DUMP)       += crash_dump_$(BITS).o crash.o
 obj-y                          += kprobes/
 obj-$(CONFIG_MODULES)          += module.o
 obj-$(CONFIG_X86_32)           += doublefault_32.o
index ff6e32ec8259c278a743da1482c2329cbf15a846..45a280f2161cd3369c1f390a8b498de16a0ea1d3 100644 (file)
@@ -1804,7 +1804,7 @@ static inline temp_mm_state_t use_temporary_mm(struct mm_struct *mm)
         * restoring the previous mm.
         */
        if (this_cpu_read(cpu_tlbstate_shared.is_lazy))
-               leave_mm(smp_processor_id());
+               leave_mm();
 
        temp_state.mm = this_cpu_read(cpu_tlbstate.loaded_mm);
        switch_mm_irqs_off(NULL, mm, current);
index 3282a747b6458c104e7073292d565bbab9a4c077..6d8677e80ddbb17c94ec7fcda9e5bae502c0dcb2 100644 (file)
@@ -864,11 +864,11 @@ static bool cpu_has_zenbleed_microcode(void)
        u32 good_rev = 0;
 
        switch (boot_cpu_data.x86_model) {
-       case 0x30 ... 0x3f: good_rev = 0x0830107a; break;
-       case 0x60 ... 0x67: good_rev = 0x0860010b; break;
-       case 0x68 ... 0x6f: good_rev = 0x08608105; break;
-       case 0x70 ... 0x7f: good_rev = 0x08701032; break;
-       case 0xa0 ... 0xaf: good_rev = 0x08a00008; break;
+       case 0x30 ... 0x3f: good_rev = 0x0830107b; break;
+       case 0x60 ... 0x67: good_rev = 0x0860010c; break;
+       case 0x68 ... 0x6f: good_rev = 0x08608107; break;
+       case 0x70 ... 0x7f: good_rev = 0x08701033; break;
+       case 0xa0 ... 0xaf: good_rev = 0x08a00009; break;
 
        default:
                return false;
index 03851240c3e36d4ed5e9ad250eee76410830d6e9..1640ae76548fc71247970398da2bbd35b4c0a5f6 100644 (file)
@@ -72,6 +72,8 @@ static void init_vmx_capabilities(struct cpuinfo_x86 *c)
                c->vmx_capability[MISC_FEATURES] |= VMX_F(EPT_AD);
        if (ept & VMX_EPT_1GB_PAGE_BIT)
                c->vmx_capability[MISC_FEATURES] |= VMX_F(EPT_1GB);
+       if (ept & VMX_EPT_PAGE_WALK_5_BIT)
+               c->vmx_capability[MISC_FEATURES] |= VMX_F(EPT_5LEVEL);
 
        /* Synthetic APIC features that are aggregates of multiple features. */
        if ((c->vmx_capability[PRIMARY_CTLS] & VMX_F(VIRTUAL_TPR)) &&
index 45e0e70e238cf31a0e51024d0f88505b6a6ce9c4..303fef824167d9c57e8b6068eecd62f31f071638 100644 (file)
@@ -209,7 +209,9 @@ static void hv_machine_shutdown(void)
        if (kexec_in_progress)
                hyperv_cleanup();
 }
+#endif /* CONFIG_KEXEC_CORE */
 
+#ifdef CONFIG_CRASH_DUMP
 static void hv_machine_crash_shutdown(struct pt_regs *regs)
 {
        if (hv_crash_handler)
@@ -221,7 +223,7 @@ static void hv_machine_crash_shutdown(struct pt_regs *regs)
        /* Disable the hypercall page when there is only 1 active CPU. */
        hyperv_cleanup();
 }
-#endif /* CONFIG_KEXEC_CORE */
+#endif /* CONFIG_CRASH_DUMP */
 #endif /* CONFIG_HYPERV */
 
 static uint32_t  __init ms_hyperv_platform(void)
@@ -495,9 +497,13 @@ static void __init ms_hyperv_init_platform(void)
        no_timer_check = 1;
 #endif
 
-#if IS_ENABLED(CONFIG_HYPERV) && defined(CONFIG_KEXEC_CORE)
+#if IS_ENABLED(CONFIG_HYPERV)
+#if defined(CONFIG_KEXEC_CORE)
        machine_ops.shutdown = hv_machine_shutdown;
+#endif
+#if defined(CONFIG_CRASH_DUMP)
        machine_ops.crash_shutdown = hv_machine_crash_shutdown;
+#endif
 #endif
        if (ms_hyperv.features & HV_ACCESS_TSC_INVARIANT) {
                /*
index 4aeafe63521b8367a644820e2858ef94730d8135..003e0298f46a47b21adad15eb927ff755ad88416 100644 (file)
@@ -283,22 +283,24 @@ void __init x86_flattree_get_config(void)
        u32 size, map_len;
        void *dt;
 
-       if (!initial_dtb)
-               return;
-
-       map_len = max(PAGE_SIZE - (initial_dtb & ~PAGE_MASK), (u64)128);
+       if (initial_dtb) {
+               map_len = max(PAGE_SIZE - (initial_dtb & ~PAGE_MASK), (u64)128);
+
+               dt = early_memremap(initial_dtb, map_len);
+               size = fdt_totalsize(dt);
+               if (map_len < size) {
+                       early_memunmap(dt, map_len);
+                       dt = early_memremap(initial_dtb, size);
+                       map_len = size;
+               }
 
-       dt = early_memremap(initial_dtb, map_len);
-       size = fdt_totalsize(dt);
-       if (map_len < size) {
-               early_memunmap(dt, map_len);
-               dt = early_memremap(initial_dtb, size);
-               map_len = size;
+               early_init_dt_verify(dt);
        }
 
-       early_init_dt_verify(dt);
        unflatten_and_copy_device_tree();
-       early_memunmap(dt, map_len);
+
+       if (initial_dtb)
+               early_memunmap(dt, map_len);
 }
 #endif
 
index 0cd53fa8c65d1d63ed4a11a7a2f7c88ba4122d51..fc37c8d83daf234f15094e96c87654f6fd989acd 100644 (file)
@@ -153,7 +153,7 @@ static const __initconst struct idt_data apic_idts[] = {
 #ifdef CONFIG_X86_LOCAL_APIC
        INTG(LOCAL_TIMER_VECTOR,                asm_sysvec_apic_timer_interrupt),
        INTG(X86_PLATFORM_IPI_VECTOR,           asm_sysvec_x86_platform_ipi),
-# ifdef CONFIG_HAVE_KVM
+# if IS_ENABLED(CONFIG_KVM)
        INTG(POSTED_INTR_VECTOR,                asm_sysvec_kvm_posted_intr_ipi),
        INTG(POSTED_INTR_WAKEUP_VECTOR,         asm_sysvec_kvm_posted_intr_wakeup_ipi),
        INTG(POSTED_INTR_NESTED_VECTOR,         asm_sysvec_kvm_posted_intr_nested_ipi),
index 11761c12454533c10159a6456c5a57aac3004777..35fde0107901d61f58dd08af4988f7ce7b2530b3 100644 (file)
@@ -164,7 +164,7 @@ int arch_show_interrupts(struct seq_file *p, int prec)
 #if defined(CONFIG_X86_IO_APIC)
        seq_printf(p, "%*s: %10u\n", prec, "MIS", atomic_read(&irq_mis_count));
 #endif
-#ifdef CONFIG_HAVE_KVM
+#if IS_ENABLED(CONFIG_KVM)
        seq_printf(p, "%*s: ", prec, "PIN");
        for_each_online_cpu(j)
                seq_printf(p, "%10u ", irq_stats(j)->kvm_posted_intr_ipis);
@@ -290,7 +290,7 @@ DEFINE_IDTENTRY_SYSVEC(sysvec_x86_platform_ipi)
 }
 #endif
 
-#ifdef CONFIG_HAVE_KVM
+#if IS_ENABLED(CONFIG_KVM)
 static void dummy_handler(void) {}
 static void (*kvm_posted_intr_wakeup_handler)(void) = dummy_handler;
 
index cde167b0ea92adb08115bb359b2284ea3e6acbe0..68530fad05f7475fa03462175fd8c16dc09fc698 100644 (file)
@@ -263,11 +263,13 @@ setup_boot_parameters(struct kimage *image, struct boot_params *params,
        memset(&params->hd0_info, 0, sizeof(params->hd0_info));
        memset(&params->hd1_info, 0, sizeof(params->hd1_info));
 
+#ifdef CONFIG_CRASH_DUMP
        if (image->type == KEXEC_TYPE_CRASH) {
                ret = crash_setup_memmap_entries(image, params);
                if (ret)
                        return ret;
        } else
+#endif
                setup_e820_entries(params);
 
        nr_e820_entries = params->e820_entries;
@@ -433,12 +435,14 @@ static void *bzImage64_load(struct kimage *image, char *kernel,
                return ERR_PTR(-EINVAL);
        }
 
+#ifdef CONFIG_CRASH_DUMP
        /* Allocate and load backup region */
        if (image->type == KEXEC_TYPE_CRASH) {
                ret = crash_load_segments(image);
                if (ret)
                        return ERR_PTR(ret);
        }
+#endif
 
        /*
         * Load purgatory. For 64bit entry point, purgatory  code can be
index 101a7c1bf200859072037fd2da470441d27c053c..4cadfd606e8e6a9f16a10eec4fdf85ac9b67f575 100644 (file)
@@ -770,7 +770,7 @@ static struct notifier_block kvm_pv_reboot_nb = {
  * won't be valid. In cases like kexec, in which you install a new kernel, this
  * means a random memory location will be kept being written.
  */
-#ifdef CONFIG_KEXEC_CORE
+#ifdef CONFIG_CRASH_DUMP
 static void kvm_crash_shutdown(struct pt_regs *regs)
 {
        kvm_guest_cpu_offline(true);
@@ -853,7 +853,7 @@ static void __init kvm_guest_init(void)
        kvm_guest_cpu_init();
 #endif
 
-#ifdef CONFIG_KEXEC_CORE
+#ifdef CONFIG_CRASH_DUMP
        machine_ops.crash_shutdown = kvm_crash_shutdown;
 #endif
 
index bc0a5348b4a6275c4fe1d35464330eb6c69993ba..b180d8e497c317f88a9880241721818bec818174 100644 (file)
@@ -508,6 +508,8 @@ int arch_kimage_file_post_load_cleanup(struct kimage *image)
 }
 #endif /* CONFIG_KEXEC_FILE */
 
+#ifdef CONFIG_CRASH_DUMP
+
 static int
 kexec_mark_range(unsigned long start, unsigned long end, bool protect)
 {
@@ -552,6 +554,7 @@ void arch_kexec_unprotect_crashkres(void)
 {
        kexec_mark_crashkres(false);
 }
+#endif
 
 /*
  * During a traditional boot under SME, SME will encrypt the kernel,
index 830425e6d38e2f75fb9ea41f7dc042e175d98846..f3130f762784a19fb3e74e3efe2baf0b93cd2376 100644 (file)
@@ -796,7 +796,7 @@ struct machine_ops machine_ops __ro_after_init = {
        .emergency_restart = native_machine_emergency_restart,
        .restart = native_machine_restart,
        .halt = native_machine_halt,
-#ifdef CONFIG_KEXEC_CORE
+#ifdef CONFIG_CRASH_DUMP
        .crash_shutdown = native_machine_crash_shutdown,
 #endif
 };
@@ -826,7 +826,7 @@ void machine_halt(void)
        machine_ops.halt();
 }
 
-#ifdef CONFIG_KEXEC_CORE
+#ifdef CONFIG_CRASH_DUMP
 void machine_crash_shutdown(struct pt_regs *regs)
 {
        machine_ops.crash_shutdown(regs);
index 46d5a8c520ad4aa165b72036d43c62135e83f037..3e1e96efadfe7ec8fe5a5499529a9d2913e10722 100644 (file)
@@ -471,7 +471,7 @@ static void __init arch_reserve_crashkernel(void)
        bool high = false;
        int ret;
 
-       if (!IS_ENABLED(CONFIG_KEXEC_CORE))
+       if (!IS_ENABLED(CONFIG_CRASH_RESERVE))
                return;
 
        ret = parse_crashkernel(cmdline, memblock_phys_mem_size(),
index 2908e063d7d830db32decbfefbd017fd05292700..18266cc3d98c182a3822ebf6c025924442994edb 100644 (file)
@@ -286,7 +286,7 @@ struct smp_ops smp_ops = {
        .smp_cpus_done          = native_smp_cpus_done,
 
        .stop_other_cpus        = native_stop_other_cpus,
-#if defined(CONFIG_KEXEC_CORE)
+#if defined(CONFIG_CRASH_DUMP)
        .crash_stop_other_cpus  = kdump_nmi_shootdown_cpus,
 #endif
        .smp_send_reschedule    = native_smp_send_reschedule,
similarity index 90%
rename from arch/x86/kernel/crash_core_32.c
rename to arch/x86/kernel/vmcore_info_32.c
index 8a89c109e20a6c8038fe8b26f4da254658b2f3e0..5995a749288a95de65bcc5fc128a41b7ae20450a 100644 (file)
@@ -1,6 +1,6 @@
 // SPDX-License-Identifier: GPL-2.0-only
 
-#include <linux/crash_core.h>
+#include <linux/vmcore_info.h>
 #include <linux/pgtable.h>
 
 #include <asm/setup.h>
similarity index 94%
rename from arch/x86/kernel/crash_core_64.c
rename to arch/x86/kernel/vmcore_info_64.c
index 7d255f882afe6f48eb399ee33ac2f0f524ccedd0..0dec7d8687544787a5fba9d694774c1c74e1e3b8 100644 (file)
@@ -1,6 +1,6 @@
 // SPDX-License-Identifier: GPL-2.0-only
 
-#include <linux/crash_core.h>
+#include <linux/vmcore_info.h>
 #include <linux/pgtable.h>
 
 #include <asm/setup.h>
index 65ed14b6540bbebfb91e1d20d0c7627277da3f26..8c3032a96caf167b30c1fc76b91039c809bc44e4 100644 (file)
@@ -7,7 +7,6 @@ source "virt/kvm/Kconfig"
 
 menuconfig VIRTUALIZATION
        bool "Virtualization"
-       depends on HAVE_KVM || X86
        default y
        help
          Say Y here to get to see options for using your Linux host to run other
@@ -20,7 +19,6 @@ if VIRTUALIZATION
 
 config KVM
        tristate "Kernel-based Virtual Machine (KVM) support"
-       depends on HAVE_KVM
        depends on HIGH_RES_TIMERS
        depends on X86_LOCAL_APIC
        select KVM_COMMON
@@ -29,9 +27,9 @@ config KVM
        select HAVE_KVM_PFNCACHE
        select HAVE_KVM_DIRTY_RING_TSO
        select HAVE_KVM_DIRTY_RING_ACQ_REL
-       select IRQ_BYPASS_MANAGER
        select HAVE_KVM_IRQ_BYPASS
        select HAVE_KVM_IRQ_ROUTING
+       select HAVE_KVM_READONLY_MEM
        select KVM_ASYNC_PF
        select USER_RETURN_NOTIFIER
        select KVM_MMIO
index 95ea1a1f7403ea8cd3da1051ac1632d046d8081b..999227fc7c6659158051525bcf819ae2f9edc5f0 100644 (file)
@@ -189,9 +189,8 @@ static const struct file_operations mmu_rmaps_stat_fops = {
        .release        = kvm_mmu_rmaps_stat_release,
 };
 
-int kvm_arch_create_vm_debugfs(struct kvm *kvm)
+void kvm_arch_create_vm_debugfs(struct kvm *kvm)
 {
        debugfs_create_file("mmu_rmaps_stat", 0644, kvm->debugfs_dentry, kvm,
                            &mmu_rmaps_stat_fops);
-       return 0;
 }
index e223043ef5b26f23be5b2f0606641f66c5cd18aa..5d4c86133453d88dbb4f1b5f34d04bde7ec45a55 100644 (file)
@@ -1820,22 +1820,22 @@ static int writeback(struct x86_emulate_ctxt *ctxt, struct operand *op)
        return X86EMUL_CONTINUE;
 }
 
-static int push(struct x86_emulate_ctxt *ctxt, void *data, int bytes)
+static int emulate_push(struct x86_emulate_ctxt *ctxt, const void *data, int len)
 {
        struct segmented_address addr;
 
-       rsp_increment(ctxt, -bytes);
+       rsp_increment(ctxt, -len);
        addr.ea = reg_read(ctxt, VCPU_REGS_RSP) & stack_mask(ctxt);
        addr.seg = VCPU_SREG_SS;
 
-       return segmented_write(ctxt, addr, data, bytes);
+       return segmented_write(ctxt, addr, data, len);
 }
 
 static int em_push(struct x86_emulate_ctxt *ctxt)
 {
        /* Disable writeback. */
        ctxt->dst.type = OP_NONE;
-       return push(ctxt, &ctxt->src.val, ctxt->op_bytes);
+       return emulate_push(ctxt, &ctxt->src.val, ctxt->op_bytes);
 }
 
 static int emulate_pop(struct x86_emulate_ctxt *ctxt,
@@ -1863,7 +1863,8 @@ static int emulate_popf(struct x86_emulate_ctxt *ctxt,
                        void *dest, int len)
 {
        int rc;
-       unsigned long val, change_mask;
+       unsigned long val = 0;
+       unsigned long change_mask;
        int iopl = (ctxt->eflags & X86_EFLAGS_IOPL) >> X86_EFLAGS_IOPL_BIT;
        int cpl = ctxt->ops->cpl(ctxt);
 
@@ -1920,7 +1921,7 @@ static int em_enter(struct x86_emulate_ctxt *ctxt)
                return X86EMUL_UNHANDLEABLE;
 
        rbp = reg_read(ctxt, VCPU_REGS_RBP);
-       rc = push(ctxt, &rbp, stack_size(ctxt));
+       rc = emulate_push(ctxt, &rbp, stack_size(ctxt));
        if (rc != X86EMUL_CONTINUE)
                return rc;
        assign_masked(reg_rmw(ctxt, VCPU_REGS_RBP), reg_read(ctxt, VCPU_REGS_RSP),
@@ -1954,7 +1955,7 @@ static int em_push_sreg(struct x86_emulate_ctxt *ctxt)
 static int em_pop_sreg(struct x86_emulate_ctxt *ctxt)
 {
        int seg = ctxt->src2.val;
-       unsigned long selector;
+       unsigned long selector = 0;
        int rc;
 
        rc = emulate_pop(ctxt, &selector, 2);
@@ -2000,7 +2001,7 @@ static int em_popa(struct x86_emulate_ctxt *ctxt)
 {
        int rc = X86EMUL_CONTINUE;
        int reg = VCPU_REGS_RDI;
-       u32 val;
+       u32 val = 0;
 
        while (reg >= VCPU_REGS_RAX) {
                if (reg == VCPU_REGS_RSP) {
@@ -2229,7 +2230,7 @@ static int em_cmpxchg8b(struct x86_emulate_ctxt *ctxt)
 static int em_ret(struct x86_emulate_ctxt *ctxt)
 {
        int rc;
-       unsigned long eip;
+       unsigned long eip = 0;
 
        rc = emulate_pop(ctxt, &eip, ctxt->op_bytes);
        if (rc != X86EMUL_CONTINUE)
@@ -2241,7 +2242,8 @@ static int em_ret(struct x86_emulate_ctxt *ctxt)
 static int em_ret_far(struct x86_emulate_ctxt *ctxt)
 {
        int rc;
-       unsigned long eip, cs;
+       unsigned long eip = 0;
+       unsigned long cs = 0;
        int cpl = ctxt->ops->cpl(ctxt);
        struct desc_struct new_desc;
 
@@ -3011,7 +3013,7 @@ static int emulator_do_task_switch(struct x86_emulate_ctxt *ctxt,
                ret = em_push(ctxt);
        }
 
-       ops->get_dr(ctxt, 7, &dr7);
+       dr7 = ops->get_dr(ctxt, 7);
        ops->set_dr(ctxt, 7, dr7 & ~(DR_LOCAL_ENABLE_MASK | DR_LOCAL_SLOWDOWN));
 
        return ret;
@@ -3184,7 +3186,7 @@ fail:
 static int em_ret_near_imm(struct x86_emulate_ctxt *ctxt)
 {
        int rc;
-       unsigned long eip;
+       unsigned long eip = 0;
 
        rc = emulate_pop(ctxt, &eip, ctxt->op_bytes);
        if (rc != X86EMUL_CONTINUE)
@@ -3866,15 +3868,6 @@ static int check_cr_access(struct x86_emulate_ctxt *ctxt)
        return X86EMUL_CONTINUE;
 }
 
-static int check_dr7_gd(struct x86_emulate_ctxt *ctxt)
-{
-       unsigned long dr7;
-
-       ctxt->ops->get_dr(ctxt, 7, &dr7);
-
-       return dr7 & DR7_GD;
-}
-
 static int check_dr_read(struct x86_emulate_ctxt *ctxt)
 {
        int dr = ctxt->modrm_reg;
@@ -3887,10 +3880,10 @@ static int check_dr_read(struct x86_emulate_ctxt *ctxt)
        if ((cr4 & X86_CR4_DE) && (dr == 4 || dr == 5))
                return emulate_ud(ctxt);
 
-       if (check_dr7_gd(ctxt)) {
+       if (ctxt->ops->get_dr(ctxt, 7) & DR7_GD) {
                ulong dr6;
 
-               ctxt->ops->get_dr(ctxt, 6, &dr6);
+               dr6 = ctxt->ops->get_dr(ctxt, 6);
                dr6 &= ~DR_TRAP_BITS;
                dr6 |= DR6_BD | DR6_ACTIVE_LOW;
                ctxt->ops->set_dr(ctxt, 6, dr6);
@@ -3962,7 +3955,7 @@ static int check_rdpmc(struct x86_emulate_ctxt *ctxt)
         * protected mode.
         */
        if ((!(cr4 & X86_CR4_PCE) && ctxt->ops->cpl(ctxt)) ||
-           ctxt->ops->check_pmc(ctxt, rcx))
+           ctxt->ops->check_rdpmc_early(ctxt, rcx))
                return emulate_gp(ctxt, 0);
 
        return X86EMUL_CONTINUE;
@@ -4505,11 +4498,11 @@ static const struct instr_dual instr_dual_0f_38_f1 = {
 };
 
 static const struct gprefix three_byte_0f_38_f0 = {
-       ID(0, &instr_dual_0f_38_f0), N, N, N
+       ID(0, &instr_dual_0f_38_f0), ID(0, &instr_dual_0f_38_f0), N, N
 };
 
 static const struct gprefix three_byte_0f_38_f1 = {
-       ID(0, &instr_dual_0f_38_f1), N, N, N
+       ID(0, &instr_dual_0f_38_f1), ID(0, &instr_dual_0f_38_f1), N, N
 };
 
 /*
@@ -5449,7 +5442,7 @@ twobyte_insn:
                ctxt->dst.val = ops->get_cr(ctxt, ctxt->modrm_reg);
                break;
        case 0x21: /* mov from dr to reg */
-               ops->get_dr(ctxt, ctxt->modrm_reg, &ctxt->dst.val);
+               ctxt->dst.val = ops->get_dr(ctxt, ctxt->modrm_reg);
                break;
        case 0x40 ... 0x4f:     /* cmov */
                if (test_cc(ctxt->b, ctxt->eflags))
index e6d149825169dda3ace396ca979923c4a2d108e8..5382646162a38710f4c85bc7b0f28f1dc944ee06 100644 (file)
@@ -203,12 +203,12 @@ struct x86_emulate_ops {
        ulong (*get_cr)(struct x86_emulate_ctxt *ctxt, int cr);
        int (*set_cr)(struct x86_emulate_ctxt *ctxt, int cr, ulong val);
        int (*cpl)(struct x86_emulate_ctxt *ctxt);
-       void (*get_dr)(struct x86_emulate_ctxt *ctxt, int dr, ulong *dest);
+       ulong (*get_dr)(struct x86_emulate_ctxt *ctxt, int dr);
        int (*set_dr)(struct x86_emulate_ctxt *ctxt, int dr, ulong value);
        int (*set_msr_with_filter)(struct x86_emulate_ctxt *ctxt, u32 msr_index, u64 data);
        int (*get_msr_with_filter)(struct x86_emulate_ctxt *ctxt, u32 msr_index, u64 *pdata);
        int (*get_msr)(struct x86_emulate_ctxt *ctxt, u32 msr_index, u64 *pdata);
-       int (*check_pmc)(struct x86_emulate_ctxt *ctxt, u32 pmc);
+       int (*check_rdpmc_early)(struct x86_emulate_ctxt *ctxt, u32 pmc);
        int (*read_pmc)(struct x86_emulate_ctxt *ctxt, u32 pmc, u64 *pdata);
        void (*halt)(struct x86_emulate_ctxt *ctxt);
        void (*wbinvd)(struct x86_emulate_ctxt *ctxt);
index 1edf93ee33957826681f9dc2ec3d3caddaba0a29..cf37586f04668df976c983e89b7c37ae3c58e23a 100644 (file)
@@ -41,6 +41,7 @@
 #include "ioapic.h"
 #include "trace.h"
 #include "x86.h"
+#include "xen.h"
 #include "cpuid.h"
 #include "hyperv.h"
 #include "smm.h"
@@ -124,6 +125,9 @@ static inline int __apic_test_and_clear_vector(int vec, void *bitmap)
        return __test_and_clear_bit(VEC_POS(vec), (bitmap) + REG_POS(vec));
 }
 
+__read_mostly DEFINE_STATIC_KEY_FALSE(kvm_has_noapic_vcpu);
+EXPORT_SYMBOL_GPL(kvm_has_noapic_vcpu);
+
 __read_mostly DEFINE_STATIC_KEY_DEFERRED_FALSE(apic_hw_disabled, HZ);
 __read_mostly DEFINE_STATIC_KEY_DEFERRED_FALSE(apic_sw_disabled, HZ);
 
@@ -499,8 +503,10 @@ static inline void apic_set_spiv(struct kvm_lapic *apic, u32 val)
        }
 
        /* Check if there are APF page ready requests pending */
-       if (enabled)
+       if (enabled) {
                kvm_make_request(KVM_REQ_APF_READY, apic->vcpu);
+               kvm_xen_sw_enable_lapic(apic->vcpu);
+       }
 }
 
 static inline void kvm_apic_set_xapic_id(struct kvm_lapic *apic, u8 id)
@@ -2466,8 +2472,10 @@ void kvm_free_lapic(struct kvm_vcpu *vcpu)
 {
        struct kvm_lapic *apic = vcpu->arch.apic;
 
-       if (!vcpu->arch.apic)
+       if (!vcpu->arch.apic) {
+               static_branch_dec(&kvm_has_noapic_vcpu);
                return;
+       }
 
        hrtimer_cancel(&apic->lapic_timer.timer);
 
@@ -2809,6 +2817,11 @@ int kvm_create_lapic(struct kvm_vcpu *vcpu, int timer_advance_ns)
 
        ASSERT(vcpu != NULL);
 
+       if (!irqchip_in_kernel(vcpu->kvm)) {
+               static_branch_inc(&kvm_has_noapic_vcpu);
+               return 0;
+       }
+
        apic = kzalloc(sizeof(*apic), GFP_KERNEL_ACCOUNT);
        if (!apic)
                goto nomem;
@@ -2847,6 +2860,21 @@ int kvm_create_lapic(struct kvm_vcpu *vcpu, int timer_advance_ns)
        static_branch_inc(&apic_sw_disabled.key); /* sw disabled at reset */
        kvm_iodevice_init(&apic->dev, &apic_mmio_ops);
 
+       /*
+        * Defer evaluating inhibits until the vCPU is first run, as this vCPU
+        * will not get notified of any changes until this vCPU is visible to
+        * other vCPUs (marked online and added to the set of vCPUs).
+        *
+        * Opportunistically mark APICv active as VMX in particularly is highly
+        * unlikely to have inhibits.  Ignore the current per-VM APICv state so
+        * that vCPU creation is guaranteed to run with a deterministic value,
+        * the request will ensure the vCPU gets the correct state before VM-Entry.
+        */
+       if (enable_apicv) {
+               apic->apicv_active = true;
+               kvm_make_request(KVM_REQ_APICV_UPDATE, vcpu);
+       }
+
        return 0;
 nomem_free_apic:
        kfree(apic);
index 2b515acd8e7207b688b0b8f59d4f2bb377764320..992e651540e8523aadbd15f2cb8dc748850c6a49 100644 (file)
@@ -3110,7 +3110,7 @@ static int host_pfn_mapping_level(struct kvm *kvm, gfn_t gfn,
        /*
         * Read each entry once.  As above, a non-leaf entry can be promoted to
         * a huge page _during_ this walk.  Re-reading the entry could send the
-        * walk into the weeks, e.g. p*d_large() returns false (sees the old
+        * walk into the weeks, e.g. p*d_leaf() returns false (sees the old
         * value) and then p*d_offset() walks into the target huge page instead
         * of the old page table (sees the new value).
         */
@@ -3126,7 +3126,7 @@ static int host_pfn_mapping_level(struct kvm *kvm, gfn_t gfn,
        if (pud_none(pud) || !pud_present(pud))
                goto out;
 
-       if (pud_large(pud)) {
+       if (pud_leaf(pud)) {
                level = PG_LEVEL_1G;
                goto out;
        }
@@ -3135,7 +3135,7 @@ static int host_pfn_mapping_level(struct kvm *kvm, gfn_t gfn,
        if (pmd_none(pmd) || !pmd_present(pmd))
                goto out;
 
-       if (pmd_large(pmd))
+       if (pmd_leaf(pmd))
                level = PG_LEVEL_2M;
 
 out:
@@ -3575,10 +3575,14 @@ static void mmu_free_root_page(struct kvm *kvm, hpa_t *root_hpa,
        if (WARN_ON_ONCE(!sp))
                return;
 
-       if (is_tdp_mmu_page(sp))
+       if (is_tdp_mmu_page(sp)) {
+               lockdep_assert_held_read(&kvm->mmu_lock);
                kvm_tdp_mmu_put_root(kvm, sp);
-       else if (!--sp->root_count && sp->role.invalid)
-               kvm_mmu_prepare_zap_page(kvm, sp, invalid_list);
+       } else {
+               lockdep_assert_held_write(&kvm->mmu_lock);
+               if (!--sp->root_count && sp->role.invalid)
+                       kvm_mmu_prepare_zap_page(kvm, sp, invalid_list);
+       }
 
        *root_hpa = INVALID_PAGE;
 }
@@ -3587,6 +3591,7 @@ static void mmu_free_root_page(struct kvm *kvm, hpa_t *root_hpa,
 void kvm_mmu_free_roots(struct kvm *kvm, struct kvm_mmu *mmu,
                        ulong roots_to_free)
 {
+       bool is_tdp_mmu = tdp_mmu_enabled && mmu->root_role.direct;
        int i;
        LIST_HEAD(invalid_list);
        bool free_active_root;
@@ -3609,7 +3614,10 @@ void kvm_mmu_free_roots(struct kvm *kvm, struct kvm_mmu *mmu,
                        return;
        }
 
-       write_lock(&kvm->mmu_lock);
+       if (is_tdp_mmu)
+               read_lock(&kvm->mmu_lock);
+       else
+               write_lock(&kvm->mmu_lock);
 
        for (i = 0; i < KVM_MMU_NUM_PREV_ROOTS; i++)
                if (roots_to_free & KVM_MMU_ROOT_PREVIOUS(i))
@@ -3635,8 +3643,13 @@ void kvm_mmu_free_roots(struct kvm *kvm, struct kvm_mmu *mmu,
                mmu->root.pgd = 0;
        }
 
-       kvm_mmu_commit_zap_page(kvm, &invalid_list);
-       write_unlock(&kvm->mmu_lock);
+       if (is_tdp_mmu) {
+               read_unlock(&kvm->mmu_lock);
+               WARN_ON_ONCE(!list_empty(&invalid_list));
+       } else {
+               kvm_mmu_commit_zap_page(kvm, &invalid_list);
+               write_unlock(&kvm->mmu_lock);
+       }
 }
 EXPORT_SYMBOL_GPL(kvm_mmu_free_roots);
 
@@ -3693,15 +3706,15 @@ static int mmu_alloc_direct_roots(struct kvm_vcpu *vcpu)
        unsigned i;
        int r;
 
+       if (tdp_mmu_enabled)
+               return kvm_tdp_mmu_alloc_root(vcpu);
+
        write_lock(&vcpu->kvm->mmu_lock);
        r = make_mmu_pages_available(vcpu);
        if (r < 0)
                goto out_unlock;
 
-       if (tdp_mmu_enabled) {
-               root = kvm_tdp_mmu_get_vcpu_root_hpa(vcpu);
-               mmu->root.hpa = root;
-       } else if (shadow_root_level >= PT64_ROOT_4LEVEL) {
+       if (shadow_root_level >= PT64_ROOT_4LEVEL) {
                root = mmu_alloc_root(vcpu, 0, 0, shadow_root_level);
                mmu->root.hpa = root;
        } else if (shadow_root_level == PT32E_ROOT_LEVEL) {
@@ -7039,9 +7052,7 @@ int kvm_mmu_vendor_module_init(void)
 
        kvm_mmu_reset_all_pte_masks();
 
-       pte_list_desc_cache = kmem_cache_create("pte_list_desc",
-                                           sizeof(struct pte_list_desc),
-                                           0, SLAB_ACCOUNT, NULL);
+       pte_list_desc_cache = KMEM_CACHE(pte_list_desc, SLAB_ACCOUNT);
        if (!pte_list_desc_cache)
                goto out;
 
index c87da11f3a049b75e2dc6cae714b37e868666252..f6448284c18e3e1a2820530a4919f151a10c997b 100644 (file)
 #include "mmu_internal.h"
 #include "page_track.h"
 
+static bool kvm_external_write_tracking_enabled(struct kvm *kvm)
+{
+#ifdef CONFIG_KVM_EXTERNAL_WRITE_TRACKING
+       /*
+        * Read external_write_tracking_enabled before related pointers.  Pairs
+        * with the smp_store_release in kvm_page_track_write_tracking_enable().
+        */
+       return smp_load_acquire(&kvm->arch.external_write_tracking_enabled);
+#else
+       return false;
+#endif
+}
+
 bool kvm_page_track_write_tracking_enabled(struct kvm *kvm)
 {
-       return IS_ENABLED(CONFIG_KVM_EXTERNAL_WRITE_TRACKING) ||
-              !tdp_enabled || kvm_shadow_root_allocated(kvm);
+       return kvm_external_write_tracking_enabled(kvm) ||
+              kvm_shadow_root_allocated(kvm) || !tdp_enabled;
 }
 
 void kvm_page_track_free_memslot(struct kvm_memory_slot *slot)
@@ -153,6 +166,50 @@ int kvm_page_track_init(struct kvm *kvm)
        return init_srcu_struct(&head->track_srcu);
 }
 
+static int kvm_enable_external_write_tracking(struct kvm *kvm)
+{
+       struct kvm_memslots *slots;
+       struct kvm_memory_slot *slot;
+       int r = 0, i, bkt;
+
+       mutex_lock(&kvm->slots_arch_lock);
+
+       /*
+        * Check for *any* write tracking user (not just external users) under
+        * lock.  This avoids unnecessary work, e.g. if KVM itself is using
+        * write tracking, or if two external users raced when registering.
+        */
+       if (kvm_page_track_write_tracking_enabled(kvm))
+               goto out_success;
+
+       for (i = 0; i < kvm_arch_nr_memslot_as_ids(kvm); i++) {
+               slots = __kvm_memslots(kvm, i);
+               kvm_for_each_memslot(slot, bkt, slots) {
+                       /*
+                        * Intentionally do NOT free allocations on failure to
+                        * avoid having to track which allocations were made
+                        * now versus when the memslot was created.  The
+                        * metadata is guaranteed to be freed when the slot is
+                        * freed, and will be kept/used if userspace retries
+                        * the failed ioctl() instead of killing the VM.
+                        */
+                       r = kvm_page_track_write_tracking_alloc(slot);
+                       if (r)
+                               goto out_unlock;
+               }
+       }
+
+out_success:
+       /*
+        * Ensure that external_write_tracking_enabled becomes true strictly
+        * after all the related pointers are set.
+        */
+       smp_store_release(&kvm->arch.external_write_tracking_enabled, true);
+out_unlock:
+       mutex_unlock(&kvm->slots_arch_lock);
+       return r;
+}
+
 /*
  * register the notifier so that event interception for the tracked guest
  * pages can be received.
@@ -161,10 +218,17 @@ int kvm_page_track_register_notifier(struct kvm *kvm,
                                     struct kvm_page_track_notifier_node *n)
 {
        struct kvm_page_track_notifier_head *head;
+       int r;
 
        if (!kvm || kvm->mm != current->mm)
                return -ESRCH;
 
+       if (!kvm_external_write_tracking_enabled(kvm)) {
+               r = kvm_enable_external_write_tracking(kvm);
+               if (r)
+                       return r;
+       }
+
        kvm_get_kvm(kvm);
 
        head = &kvm->arch.track_notifier_head;
index 6ae19b4ee5b1cb17d4ddda85197379cde425b03e..d078157e62aa4025e6a3a6411e0d6b118245f3b8 100644 (file)
@@ -149,11 +149,11 @@ static struct kvm_mmu_page *tdp_mmu_next_root(struct kvm *kvm,
  * If shared is set, this function is operating under the MMU lock in read
  * mode.
  */
-#define __for_each_tdp_mmu_root_yield_safe(_kvm, _root, _as_id, _only_valid)\
-       for (_root = tdp_mmu_next_root(_kvm, NULL, _only_valid);        \
-            ({ lockdep_assert_held(&(_kvm)->mmu_lock); }), _root;      \
-            _root = tdp_mmu_next_root(_kvm, _root, _only_valid))       \
-               if (kvm_mmu_page_as_id(_root) != _as_id) {              \
+#define __for_each_tdp_mmu_root_yield_safe(_kvm, _root, _as_id, _only_valid)   \
+       for (_root = tdp_mmu_next_root(_kvm, NULL, _only_valid);                \
+            ({ lockdep_assert_held(&(_kvm)->mmu_lock); }), _root;              \
+            _root = tdp_mmu_next_root(_kvm, _root, _only_valid))               \
+               if (_as_id >= 0 && kvm_mmu_page_as_id(_root) != _as_id) {       \
                } else
 
 #define for_each_valid_tdp_mmu_root_yield_safe(_kvm, _root, _as_id)    \
@@ -171,12 +171,19 @@ static struct kvm_mmu_page *tdp_mmu_next_root(struct kvm *kvm,
  * Holding mmu_lock for write obviates the need for RCU protection as the list
  * is guaranteed to be stable.
  */
-#define for_each_tdp_mmu_root(_kvm, _root, _as_id)                     \
-       list_for_each_entry(_root, &_kvm->arch.tdp_mmu_roots, link)     \
-               if (kvm_lockdep_assert_mmu_lock_held(_kvm, false) &&    \
-                   kvm_mmu_page_as_id(_root) != _as_id) {              \
+#define __for_each_tdp_mmu_root(_kvm, _root, _as_id, _only_valid)              \
+       list_for_each_entry(_root, &_kvm->arch.tdp_mmu_roots, link)             \
+               if (kvm_lockdep_assert_mmu_lock_held(_kvm, false) &&            \
+                   ((_as_id >= 0 && kvm_mmu_page_as_id(_root) != _as_id) ||    \
+                    ((_only_valid) && (_root)->role.invalid))) {               \
                } else
 
+#define for_each_tdp_mmu_root(_kvm, _root, _as_id)                     \
+       __for_each_tdp_mmu_root(_kvm, _root, _as_id, false)
+
+#define for_each_valid_tdp_mmu_root(_kvm, _root, _as_id)               \
+       __for_each_tdp_mmu_root(_kvm, _root, _as_id, true)
+
 static struct kvm_mmu_page *tdp_mmu_alloc_sp(struct kvm_vcpu *vcpu)
 {
        struct kvm_mmu_page *sp;
@@ -216,22 +223,41 @@ static void tdp_mmu_init_child_sp(struct kvm_mmu_page *child_sp,
        tdp_mmu_init_sp(child_sp, iter->sptep, iter->gfn, role);
 }
 
-hpa_t kvm_tdp_mmu_get_vcpu_root_hpa(struct kvm_vcpu *vcpu)
+int kvm_tdp_mmu_alloc_root(struct kvm_vcpu *vcpu)
 {
-       union kvm_mmu_page_role role = vcpu->arch.mmu->root_role;
+       struct kvm_mmu *mmu = vcpu->arch.mmu;
+       union kvm_mmu_page_role role = mmu->root_role;
+       int as_id = kvm_mmu_role_as_id(role);
        struct kvm *kvm = vcpu->kvm;
        struct kvm_mmu_page *root;
 
-       lockdep_assert_held_write(&kvm->mmu_lock);
+       /*
+        * Check for an existing root before acquiring the pages lock to avoid
+        * unnecessary serialization if multiple vCPUs are loading a new root.
+        * E.g. when bringing up secondary vCPUs, KVM will already have created
+        * a valid root on behalf of the primary vCPU.
+        */
+       read_lock(&kvm->mmu_lock);
+
+       for_each_valid_tdp_mmu_root_yield_safe(kvm, root, as_id) {
+               if (root->role.word == role.word)
+                       goto out_read_unlock;
+       }
+
+       spin_lock(&kvm->arch.tdp_mmu_pages_lock);
 
        /*
-        * Check for an existing root before allocating a new one.  Note, the
-        * role check prevents consuming an invalid root.
+        * Recheck for an existing root after acquiring the pages lock, another
+        * vCPU may have raced ahead and created a new usable root.  Manually
+        * walk the list of roots as the standard macros assume that the pages
+        * lock is *not* held.  WARN if grabbing a reference to a usable root
+        * fails, as the last reference to a root can only be put *after* the
+        * root has been invalidated, which requires holding mmu_lock for write.
         */
-       for_each_tdp_mmu_root(kvm, root, kvm_mmu_role_as_id(role)) {
+       list_for_each_entry(root, &kvm->arch.tdp_mmu_roots, link) {
                if (root->role.word == role.word &&
-                   kvm_tdp_mmu_get_root(root))
-                       goto out;
+                   !WARN_ON_ONCE(!kvm_tdp_mmu_get_root(root)))
+                       goto out_spin_unlock;
        }
 
        root = tdp_mmu_alloc_sp(vcpu);
@@ -245,13 +271,20 @@ hpa_t kvm_tdp_mmu_get_vcpu_root_hpa(struct kvm_vcpu *vcpu)
         * is ultimately put by kvm_tdp_mmu_zap_invalidated_roots().
         */
        refcount_set(&root->tdp_mmu_root_count, 2);
-
-       spin_lock(&kvm->arch.tdp_mmu_pages_lock);
        list_add_rcu(&root->link, &kvm->arch.tdp_mmu_roots);
-       spin_unlock(&kvm->arch.tdp_mmu_pages_lock);
 
-out:
-       return __pa(root->spt);
+out_spin_unlock:
+       spin_unlock(&kvm->arch.tdp_mmu_pages_lock);
+out_read_unlock:
+       read_unlock(&kvm->mmu_lock);
+       /*
+        * Note, KVM_REQ_MMU_FREE_OBSOLETE_ROOTS will prevent entering the guest
+        * and actually consuming the root if it's invalidated after dropping
+        * mmu_lock, and the root can't be freed as this vCPU holds a reference.
+        */
+       mmu->root.hpa = __pa(root->spt);
+       mmu->root.pgd = 0;
+       return 0;
 }
 
 static void handle_changed_spte(struct kvm *kvm, int as_id, gfn_t gfn,
@@ -734,15 +767,26 @@ static void tdp_mmu_zap_root(struct kvm *kvm, struct kvm_mmu_page *root,
        rcu_read_lock();
 
        /*
-        * To avoid RCU stalls due to recursively removing huge swaths of SPs,
-        * split the zap into two passes.  On the first pass, zap at the 1gb
-        * level, and then zap top-level SPs on the second pass.  "1gb" is not
-        * arbitrary, as KVM must be able to zap a 1gb shadow page without
-        * inducing a stall to allow in-place replacement with a 1gb hugepage.
+        * Zap roots in multiple passes of decreasing granularity, i.e. zap at
+        * 4KiB=>2MiB=>1GiB=>root, in order to better honor need_resched() (all
+        * preempt models) or mmu_lock contention (full or real-time models).
+        * Zapping at finer granularity marginally increases the total time of
+        * the zap, but in most cases the zap itself isn't latency sensitive.
         *
-        * Because zapping a SP recurses on its children, stepping down to
-        * PG_LEVEL_4K in the iterator itself is unnecessary.
+        * If KVM is configured to prove the MMU, skip the 4KiB and 2MiB zaps
+        * in order to mimic the page fault path, which can replace a 1GiB page
+        * table with an equivalent 1GiB hugepage, i.e. can get saddled with
+        * zapping a 1GiB region that's fully populated with 4KiB SPTEs.  This
+        * allows verifying that KVM can safely zap 1GiB regions, e.g. without
+        * inducing RCU stalls, without relying on a relatively rare event
+        * (zapping roots is orders of magnitude more common).  Note, because
+        * zapping a SP recurses on its children, stepping down to PG_LEVEL_4K
+        * in the iterator itself is unnecessary.
         */
+       if (!IS_ENABLED(CONFIG_KVM_PROVE_MMU)) {
+               __tdp_mmu_zap_root(kvm, root, shared, PG_LEVEL_4K);
+               __tdp_mmu_zap_root(kvm, root, shared, PG_LEVEL_2M);
+       }
        __tdp_mmu_zap_root(kvm, root, shared, PG_LEVEL_1G);
        __tdp_mmu_zap_root(kvm, root, shared, root->role.level);
 
@@ -800,7 +844,13 @@ static bool tdp_mmu_zap_leafs(struct kvm *kvm, struct kvm_mmu_page *root,
                        continue;
 
                tdp_mmu_iter_set_spte(kvm, &iter, 0);
-               flush = true;
+
+               /*
+                * Zappings SPTEs in invalid roots doesn't require a TLB flush,
+                * see kvm_tdp_mmu_zap_invalidated_roots() for details.
+                */
+               if (!root->role.invalid)
+                       flush = true;
        }
 
        rcu_read_unlock();
@@ -813,16 +863,16 @@ static bool tdp_mmu_zap_leafs(struct kvm *kvm, struct kvm_mmu_page *root,
 }
 
 /*
- * Zap leaf SPTEs for the range of gfns, [start, end), for all roots. Returns
- * true if a TLB flush is needed before releasing the MMU lock, i.e. if one or
- * more SPTEs were zapped since the MMU lock was last acquired.
+ * Zap leaf SPTEs for the range of gfns, [start, end), for all *VALID** roots.
+ * Returns true if a TLB flush is needed before releasing the MMU lock, i.e. if
+ * one or more SPTEs were zapped since the MMU lock was last acquired.
  */
 bool kvm_tdp_mmu_zap_leafs(struct kvm *kvm, gfn_t start, gfn_t end, bool flush)
 {
        struct kvm_mmu_page *root;
 
        lockdep_assert_held_write(&kvm->mmu_lock);
-       for_each_tdp_mmu_root_yield_safe(kvm, root)
+       for_each_valid_tdp_mmu_root_yield_safe(kvm, root, -1)
                flush = tdp_mmu_zap_leafs(kvm, root, start, end, true, flush);
 
        return flush;
@@ -896,7 +946,7 @@ void kvm_tdp_mmu_zap_invalidated_roots(struct kvm *kvm)
  * the VM is being destroyed).
  *
  * Note, kvm_tdp_mmu_zap_invalidated_roots() is gifted the TDP MMU's reference.
- * See kvm_tdp_mmu_get_vcpu_root_hpa().
+ * See kvm_tdp_mmu_alloc_root().
  */
 void kvm_tdp_mmu_invalidate_all_roots(struct kvm *kvm)
 {
@@ -1622,7 +1672,7 @@ void kvm_tdp_mmu_clear_dirty_pt_masked(struct kvm *kvm,
 {
        struct kvm_mmu_page *root;
 
-       for_each_tdp_mmu_root(kvm, root, slot->as_id)
+       for_each_valid_tdp_mmu_root(kvm, root, slot->as_id)
                clear_dirty_pt_masked(kvm, root, gfn, mask, wrprot);
 }
 
@@ -1740,7 +1790,7 @@ bool kvm_tdp_mmu_write_protect_gfn(struct kvm *kvm,
        bool spte_set = false;
 
        lockdep_assert_held_write(&kvm->mmu_lock);
-       for_each_tdp_mmu_root(kvm, root, slot->as_id)
+       for_each_valid_tdp_mmu_root(kvm, root, slot->as_id)
                spte_set |= write_protect_gfn(kvm, root, gfn, min_level);
 
        return spte_set;
index 20d97aa46c490fff98f9d3a6cbc116935d71a726..6e1ea04ca885e5691760326f445163bbf8447cb2 100644 (file)
@@ -10,7 +10,7 @@
 void kvm_mmu_init_tdp_mmu(struct kvm *kvm);
 void kvm_mmu_uninit_tdp_mmu(struct kvm *kvm);
 
-hpa_t kvm_tdp_mmu_get_vcpu_root_hpa(struct kvm_vcpu *vcpu);
+int kvm_tdp_mmu_alloc_root(struct kvm_vcpu *vcpu);
 
 __must_check static inline bool kvm_tdp_mmu_get_root(struct kvm_mmu_page *root)
 {
index 87cc6c8809ad88898894bd0ea6199ab70e2a91ac..c397b28e3d1b680788249daa32f36c12c80bd1a1 100644 (file)
@@ -29,6 +29,9 @@
 struct x86_pmu_capability __read_mostly kvm_pmu_cap;
 EXPORT_SYMBOL_GPL(kvm_pmu_cap);
 
+struct kvm_pmu_emulated_event_selectors __read_mostly kvm_pmu_eventsel;
+EXPORT_SYMBOL_GPL(kvm_pmu_eventsel);
+
 /* Precise Distribution of Instructions Retired (PDIR) */
 static const struct x86_cpu_id vmx_pebs_pdir_cpu[] = {
        X86_MATCH_INTEL_FAM6_MODEL(ICELAKE_D, NULL),
@@ -67,7 +70,7 @@ static const struct x86_cpu_id vmx_pebs_pdist_cpu[] = {
  *        all perf counters (both gp and fixed). The mapping relationship
  *        between pmc and perf counters is as the following:
  *        * Intel: [0 .. KVM_INTEL_PMC_MAX_GENERIC-1] <=> gp counters
- *                 [INTEL_PMC_IDX_FIXED .. INTEL_PMC_IDX_FIXED + 2] <=> fixed
+ *                 [KVM_FIXED_PMC_BASE_IDX .. KVM_FIXED_PMC_BASE_IDX + 2] <=> fixed
  *        * AMD:   [0 .. AMD64_NUM_COUNTERS-1] and, for families 15H
  *          and later, [0 .. AMD64_NUM_COUNTERS_CORE-1] <=> gp counters
  */
@@ -411,7 +414,7 @@ static bool is_gp_event_allowed(struct kvm_x86_pmu_event_filter *f,
 static bool is_fixed_event_allowed(struct kvm_x86_pmu_event_filter *filter,
                                   int idx)
 {
-       int fixed_idx = idx - INTEL_PMC_IDX_FIXED;
+       int fixed_idx = idx - KVM_FIXED_PMC_BASE_IDX;
 
        if (filter->action == KVM_PMU_EVENT_DENY &&
            test_bit(fixed_idx, (ulong *)&filter->fixed_counter_bitmap))
@@ -441,11 +444,10 @@ static bool check_pmu_event_filter(struct kvm_pmc *pmc)
 static bool pmc_event_is_allowed(struct kvm_pmc *pmc)
 {
        return pmc_is_globally_enabled(pmc) && pmc_speculative_in_use(pmc) &&
-              static_call(kvm_x86_pmu_hw_event_available)(pmc) &&
               check_pmu_event_filter(pmc);
 }
 
-static void reprogram_counter(struct kvm_pmc *pmc)
+static int reprogram_counter(struct kvm_pmc *pmc)
 {
        struct kvm_pmu *pmu = pmc_to_pmu(pmc);
        u64 eventsel = pmc->eventsel;
@@ -456,7 +458,7 @@ static void reprogram_counter(struct kvm_pmc *pmc)
        emulate_overflow = pmc_pause_counter(pmc);
 
        if (!pmc_event_is_allowed(pmc))
-               goto reprogram_complete;
+               return 0;
 
        if (emulate_overflow)
                __kvm_perf_overflow(pmc, false);
@@ -466,7 +468,7 @@ static void reprogram_counter(struct kvm_pmc *pmc)
 
        if (pmc_is_fixed(pmc)) {
                fixed_ctr_ctrl = fixed_ctrl_field(pmu->fixed_ctr_ctrl,
-                                                 pmc->idx - INTEL_PMC_IDX_FIXED);
+                                                 pmc->idx - KVM_FIXED_PMC_BASE_IDX);
                if (fixed_ctr_ctrl & 0x1)
                        eventsel |= ARCH_PERFMON_EVENTSEL_OS;
                if (fixed_ctr_ctrl & 0x2)
@@ -477,43 +479,45 @@ static void reprogram_counter(struct kvm_pmc *pmc)
        }
 
        if (pmc->current_config == new_config && pmc_resume_counter(pmc))
-               goto reprogram_complete;
+               return 0;
 
        pmc_release_perf_event(pmc);
 
        pmc->current_config = new_config;
 
-       /*
-        * If reprogramming fails, e.g. due to contention, leave the counter's
-        * regprogram bit set, i.e. opportunistically try again on the next PMU
-        * refresh.  Don't make a new request as doing so can stall the guest
-        * if reprogramming repeatedly fails.
-        */
-       if (pmc_reprogram_counter(pmc, PERF_TYPE_RAW,
-                                 (eventsel & pmu->raw_event_mask),
-                                 !(eventsel & ARCH_PERFMON_EVENTSEL_USR),
-                                 !(eventsel & ARCH_PERFMON_EVENTSEL_OS),
-                                 eventsel & ARCH_PERFMON_EVENTSEL_INT))
-               return;
-
-reprogram_complete:
-       clear_bit(pmc->idx, (unsigned long *)&pmc_to_pmu(pmc)->reprogram_pmi);
+       return pmc_reprogram_counter(pmc, PERF_TYPE_RAW,
+                                    (eventsel & pmu->raw_event_mask),
+                                    !(eventsel & ARCH_PERFMON_EVENTSEL_USR),
+                                    !(eventsel & ARCH_PERFMON_EVENTSEL_OS),
+                                    eventsel & ARCH_PERFMON_EVENTSEL_INT);
 }
 
 void kvm_pmu_handle_event(struct kvm_vcpu *vcpu)
 {
+       DECLARE_BITMAP(bitmap, X86_PMC_IDX_MAX);
        struct kvm_pmu *pmu = vcpu_to_pmu(vcpu);
+       struct kvm_pmc *pmc;
        int bit;
 
-       for_each_set_bit(bit, pmu->reprogram_pmi, X86_PMC_IDX_MAX) {
-               struct kvm_pmc *pmc = static_call(kvm_x86_pmu_pmc_idx_to_pmc)(pmu, bit);
+       bitmap_copy(bitmap, pmu->reprogram_pmi, X86_PMC_IDX_MAX);
 
-               if (unlikely(!pmc)) {
-                       clear_bit(bit, pmu->reprogram_pmi);
-                       continue;
-               }
+       /*
+        * The reprogramming bitmap can be written asynchronously by something
+        * other than the task that holds vcpu->mutex, take care to clear only
+        * the bits that will actually processed.
+        */
+       BUILD_BUG_ON(sizeof(bitmap) != sizeof(atomic64_t));
+       atomic64_andnot(*(s64 *)bitmap, &pmu->__reprogram_pmi);
 
-               reprogram_counter(pmc);
+       kvm_for_each_pmc(pmu, pmc, bit, bitmap) {
+               /*
+                * If reprogramming fails, e.g. due to contention, re-set the
+                * regprogram bit set, i.e. opportunistically try again on the
+                * next PMU refresh.  Don't make a new request as doing so can
+                * stall the guest if reprogramming repeatedly fails.
+                */
+               if (reprogram_counter(pmc))
+                       set_bit(pmc->idx, pmu->reprogram_pmi);
        }
 
        /*
@@ -525,10 +529,20 @@ void kvm_pmu_handle_event(struct kvm_vcpu *vcpu)
                kvm_pmu_cleanup(vcpu);
 }
 
-/* check if idx is a valid index to access PMU */
-bool kvm_pmu_is_valid_rdpmc_ecx(struct kvm_vcpu *vcpu, unsigned int idx)
+int kvm_pmu_check_rdpmc_early(struct kvm_vcpu *vcpu, unsigned int idx)
 {
-       return static_call(kvm_x86_pmu_is_valid_rdpmc_ecx)(vcpu, idx);
+       /*
+        * On Intel, VMX interception has priority over RDPMC exceptions that
+        * aren't already handled by the emulator, i.e. there are no additional
+        * check needed for Intel PMUs.
+        *
+        * On AMD, _all_ exceptions on RDPMC have priority over SVM intercepts,
+        * i.e. an invalid PMC results in a #GP, not #VMEXIT.
+        */
+       if (!kvm_pmu_ops.check_rdpmc_early)
+               return 0;
+
+       return static_call(kvm_x86_pmu_check_rdpmc_early)(vcpu, idx);
 }
 
 bool is_vmware_backdoor_pmc(u32 pmc_idx)
@@ -567,10 +581,9 @@ static int kvm_pmu_rdpmc_vmware(struct kvm_vcpu *vcpu, unsigned idx, u64 *data)
 
 int kvm_pmu_rdpmc(struct kvm_vcpu *vcpu, unsigned idx, u64 *data)
 {
-       bool fast_mode = idx & (1u << 31);
        struct kvm_pmu *pmu = vcpu_to_pmu(vcpu);
        struct kvm_pmc *pmc;
-       u64 mask = fast_mode ? ~0u : ~0ull;
+       u64 mask = ~0ull;
 
        if (!pmu->version)
                return 1;
@@ -716,11 +729,7 @@ static void kvm_pmu_reset(struct kvm_vcpu *vcpu)
 
        bitmap_zero(pmu->reprogram_pmi, X86_PMC_IDX_MAX);
 
-       for_each_set_bit(i, pmu->all_valid_pmc_idx, X86_PMC_IDX_MAX) {
-               pmc = static_call(kvm_x86_pmu_pmc_idx_to_pmc)(pmu, i);
-               if (!pmc)
-                       continue;
-
+       kvm_for_each_pmc(pmu, pmc, i, pmu->all_valid_pmc_idx) {
                pmc_stop_counter(pmc);
                pmc->counter = 0;
                pmc->emulated_counter = 0;
@@ -741,6 +750,8 @@ static void kvm_pmu_reset(struct kvm_vcpu *vcpu)
  */
 void kvm_pmu_refresh(struct kvm_vcpu *vcpu)
 {
+       struct kvm_pmu *pmu = vcpu_to_pmu(vcpu);
+
        if (KVM_BUG_ON(kvm_vcpu_has_run(vcpu), vcpu->kvm))
                return;
 
@@ -750,8 +761,22 @@ void kvm_pmu_refresh(struct kvm_vcpu *vcpu)
         */
        kvm_pmu_reset(vcpu);
 
-       bitmap_zero(vcpu_to_pmu(vcpu)->all_valid_pmc_idx, X86_PMC_IDX_MAX);
-       static_call(kvm_x86_pmu_refresh)(vcpu);
+       pmu->version = 0;
+       pmu->nr_arch_gp_counters = 0;
+       pmu->nr_arch_fixed_counters = 0;
+       pmu->counter_bitmask[KVM_PMC_GP] = 0;
+       pmu->counter_bitmask[KVM_PMC_FIXED] = 0;
+       pmu->reserved_bits = 0xffffffff00200000ull;
+       pmu->raw_event_mask = X86_RAW_EVENT_MASK;
+       pmu->global_ctrl_mask = ~0ull;
+       pmu->global_status_mask = ~0ull;
+       pmu->fixed_ctr_ctrl_mask = ~0ull;
+       pmu->pebs_enable_mask = ~0ull;
+       pmu->pebs_data_cfg_mask = ~0ull;
+       bitmap_zero(pmu->all_valid_pmc_idx, X86_PMC_IDX_MAX);
+
+       if (vcpu->kvm->arch.enable_pmu)
+               static_call(kvm_x86_pmu_refresh)(vcpu);
 }
 
 void kvm_pmu_init(struct kvm_vcpu *vcpu)
@@ -776,10 +801,8 @@ void kvm_pmu_cleanup(struct kvm_vcpu *vcpu)
        bitmap_andnot(bitmask, pmu->all_valid_pmc_idx,
                      pmu->pmc_in_use, X86_PMC_IDX_MAX);
 
-       for_each_set_bit(i, bitmask, X86_PMC_IDX_MAX) {
-               pmc = static_call(kvm_x86_pmu_pmc_idx_to_pmc)(pmu, i);
-
-               if (pmc && pmc->perf_event && !pmc_speculative_in_use(pmc))
+       kvm_for_each_pmc(pmu, pmc, i, bitmask) {
+               if (pmc->perf_event && !pmc_speculative_in_use(pmc))
                        pmc_stop_counter(pmc);
        }
 
@@ -799,13 +822,6 @@ static void kvm_pmu_incr_counter(struct kvm_pmc *pmc)
        kvm_pmu_request_counter_reprogram(pmc);
 }
 
-static inline bool eventsel_match_perf_hw_id(struct kvm_pmc *pmc,
-       unsigned int perf_hw_id)
-{
-       return !((pmc->eventsel ^ perf_get_hw_event_config(perf_hw_id)) &
-               AMD64_RAW_EVENT_MASK_NB);
-}
-
 static inline bool cpl_is_matched(struct kvm_pmc *pmc)
 {
        bool select_os, select_user;
@@ -817,29 +833,56 @@ static inline bool cpl_is_matched(struct kvm_pmc *pmc)
                select_user = config & ARCH_PERFMON_EVENTSEL_USR;
        } else {
                config = fixed_ctrl_field(pmc_to_pmu(pmc)->fixed_ctr_ctrl,
-                                         pmc->idx - INTEL_PMC_IDX_FIXED);
+                                         pmc->idx - KVM_FIXED_PMC_BASE_IDX);
                select_os = config & 0x1;
                select_user = config & 0x2;
        }
 
+       /*
+        * Skip the CPL lookup, which isn't free on Intel, if the result will
+        * be the same regardless of the CPL.
+        */
+       if (select_os == select_user)
+               return select_os;
+
        return (static_call(kvm_x86_get_cpl)(pmc->vcpu) == 0) ? select_os : select_user;
 }
 
-void kvm_pmu_trigger_event(struct kvm_vcpu *vcpu, u64 perf_hw_id)
+void kvm_pmu_trigger_event(struct kvm_vcpu *vcpu, u64 eventsel)
 {
+       DECLARE_BITMAP(bitmap, X86_PMC_IDX_MAX);
        struct kvm_pmu *pmu = vcpu_to_pmu(vcpu);
        struct kvm_pmc *pmc;
        int i;
 
-       for_each_set_bit(i, pmu->all_valid_pmc_idx, X86_PMC_IDX_MAX) {
-               pmc = static_call(kvm_x86_pmu_pmc_idx_to_pmc)(pmu, i);
+       BUILD_BUG_ON(sizeof(pmu->global_ctrl) * BITS_PER_BYTE != X86_PMC_IDX_MAX);
 
-               if (!pmc || !pmc_event_is_allowed(pmc))
+       if (!kvm_pmu_has_perf_global_ctrl(pmu))
+               bitmap_copy(bitmap, pmu->all_valid_pmc_idx, X86_PMC_IDX_MAX);
+       else if (!bitmap_and(bitmap, pmu->all_valid_pmc_idx,
+                            (unsigned long *)&pmu->global_ctrl, X86_PMC_IDX_MAX))
+               return;
+
+       kvm_for_each_pmc(pmu, pmc, i, bitmap) {
+               /*
+                * Ignore checks for edge detect (all events currently emulated
+                * but KVM are always rising edges), pin control (unsupported
+                * by modern CPUs), and counter mask and its invert flag (KVM
+                * doesn't emulate multiple events in a single clock cycle).
+                *
+                * Note, the uppermost nibble of AMD's mask overlaps Intel's
+                * IN_TX (bit 32) and IN_TXCP (bit 33), as well as two reserved
+                * bits (bits 35:34).  Checking the "in HLE/RTM transaction"
+                * flags is correct as the vCPU can't be in a transaction if
+                * KVM is emulating an instruction.  Checking the reserved bits
+                * might be wrong if they are defined in the future, but so
+                * could ignoring them, so do the simple thing for now.
+                */
+               if (((pmc->eventsel ^ eventsel) & AMD64_RAW_EVENT_MASK_NB) ||
+                   !pmc_event_is_allowed(pmc) || !cpl_is_matched(pmc))
                        continue;
 
-               /* Ignore checks for edge detect, pin control, invert and CMASK bits */
-               if (eventsel_match_perf_hw_id(pmc, perf_hw_id) && cpl_is_matched(pmc))
-                       kvm_pmu_incr_counter(pmc);
+               kvm_pmu_incr_counter(pmc);
        }
 }
 EXPORT_SYMBOL_GPL(kvm_pmu_trigger_event);
index 7caeb3d8d4fd1739bba12b0d133185fda8a041df..4d52b0b539bacf70821febdcb7754996eb7e389b 100644 (file)
@@ -4,6 +4,8 @@
 
 #include <linux/nospec.h>
 
+#include <asm/kvm_host.h>
+
 #define vcpu_to_pmu(vcpu) (&(vcpu)->arch.pmu)
 #define pmu_to_vcpu(pmu)  (container_of((pmu), struct kvm_vcpu, arch.pmu))
 #define pmc_to_pmu(pmc)   (&(pmc)->vcpu->arch.pmu)
 #define VMWARE_BACKDOOR_PMC_REAL_TIME          0x10001
 #define VMWARE_BACKDOOR_PMC_APPARENT_TIME      0x10002
 
+#define KVM_FIXED_PMC_BASE_IDX INTEL_PMC_IDX_FIXED
+
+struct kvm_pmu_emulated_event_selectors {
+       u64 INSTRUCTIONS_RETIRED;
+       u64 BRANCH_INSTRUCTIONS_RETIRED;
+};
+
 struct kvm_pmu_ops {
-       bool (*hw_event_available)(struct kvm_pmc *pmc);
-       struct kvm_pmc *(*pmc_idx_to_pmc)(struct kvm_pmu *pmu, int pmc_idx);
        struct kvm_pmc *(*rdpmc_ecx_to_pmc)(struct kvm_vcpu *vcpu,
                unsigned int idx, u64 *mask);
        struct kvm_pmc *(*msr_idx_to_pmc)(struct kvm_vcpu *vcpu, u32 msr);
-       bool (*is_valid_rdpmc_ecx)(struct kvm_vcpu *vcpu, unsigned int idx);
+       int (*check_rdpmc_early)(struct kvm_vcpu *vcpu, unsigned int idx);
        bool (*is_valid_msr)(struct kvm_vcpu *vcpu, u32 msr);
        int (*get_msr)(struct kvm_vcpu *vcpu, struct msr_data *msr_info);
        int (*set_msr)(struct kvm_vcpu *vcpu, struct msr_data *msr_info);
@@ -55,6 +62,38 @@ static inline bool kvm_pmu_has_perf_global_ctrl(struct kvm_pmu *pmu)
        return pmu->version > 1;
 }
 
+/*
+ * KVM tracks all counters in 64-bit bitmaps, with general purpose counters
+ * mapped to bits 31:0 and fixed counters mapped to 63:32, e.g. fixed counter 0
+ * is tracked internally via index 32.  On Intel, (AMD doesn't support fixed
+ * counters), this mirrors how fixed counters are mapped to PERF_GLOBAL_CTRL
+ * and similar MSRs, i.e. tracking fixed counters at base index 32 reduces the
+ * amounter of boilerplate needed to iterate over PMCs *and* simplifies common
+ * enabling/disable/reset operations.
+ *
+ * WARNING!  This helper is only for lookups that are initiated by KVM, it is
+ * NOT safe for guest lookups, e.g. will do the wrong thing if passed a raw
+ * ECX value from RDPMC (fixed counters are accessed by setting bit 30 in ECX
+ * for RDPMC, not by adding 32 to the fixed counter index).
+ */
+static inline struct kvm_pmc *kvm_pmc_idx_to_pmc(struct kvm_pmu *pmu, int idx)
+{
+       if (idx < pmu->nr_arch_gp_counters)
+               return &pmu->gp_counters[idx];
+
+       idx -= KVM_FIXED_PMC_BASE_IDX;
+       if (idx >= 0 && idx < pmu->nr_arch_fixed_counters)
+               return &pmu->fixed_counters[idx];
+
+       return NULL;
+}
+
+#define kvm_for_each_pmc(pmu, pmc, i, bitmap)                  \
+       for_each_set_bit(i, bitmap, X86_PMC_IDX_MAX)            \
+               if (!(pmc = kvm_pmc_idx_to_pmc(pmu, i)))        \
+                       continue;                               \
+               else                                            \
+
 static inline u64 pmc_bitmask(struct kvm_pmc *pmc)
 {
        struct kvm_pmu *pmu = pmc_to_pmu(pmc);
@@ -131,12 +170,13 @@ static inline bool pmc_speculative_in_use(struct kvm_pmc *pmc)
 
        if (pmc_is_fixed(pmc))
                return fixed_ctrl_field(pmu->fixed_ctr_ctrl,
-                                       pmc->idx - INTEL_PMC_IDX_FIXED) & 0x3;
+                                       pmc->idx - KVM_FIXED_PMC_BASE_IDX) & 0x3;
 
        return pmc->eventsel & ARCH_PERFMON_EVENTSEL_ENABLE;
 }
 
 extern struct x86_pmu_capability kvm_pmu_cap;
+extern struct kvm_pmu_emulated_event_selectors kvm_pmu_eventsel;
 
 static inline void kvm_init_pmu_capability(const struct kvm_pmu_ops *pmu_ops)
 {
@@ -178,6 +218,11 @@ static inline void kvm_init_pmu_capability(const struct kvm_pmu_ops *pmu_ops)
                                          pmu_ops->MAX_NR_GP_COUNTERS);
        kvm_pmu_cap.num_counters_fixed = min(kvm_pmu_cap.num_counters_fixed,
                                             KVM_PMC_MAX_FIXED);
+
+       kvm_pmu_eventsel.INSTRUCTIONS_RETIRED =
+               perf_get_hw_event_config(PERF_COUNT_HW_INSTRUCTIONS);
+       kvm_pmu_eventsel.BRANCH_INSTRUCTIONS_RETIRED =
+               perf_get_hw_event_config(PERF_COUNT_HW_BRANCH_INSTRUCTIONS);
 }
 
 static inline void kvm_pmu_request_counter_reprogram(struct kvm_pmc *pmc)
@@ -216,7 +261,7 @@ static inline bool pmc_is_globally_enabled(struct kvm_pmc *pmc)
 void kvm_pmu_deliver_pmi(struct kvm_vcpu *vcpu);
 void kvm_pmu_handle_event(struct kvm_vcpu *vcpu);
 int kvm_pmu_rdpmc(struct kvm_vcpu *vcpu, unsigned pmc, u64 *data);
-bool kvm_pmu_is_valid_rdpmc_ecx(struct kvm_vcpu *vcpu, unsigned int idx);
+int kvm_pmu_check_rdpmc_early(struct kvm_vcpu *vcpu, unsigned int idx);
 bool kvm_pmu_is_valid_msr(struct kvm_vcpu *vcpu, u32 msr);
 int kvm_pmu_get_msr(struct kvm_vcpu *vcpu, struct msr_data *msr_info);
 int kvm_pmu_set_msr(struct kvm_vcpu *vcpu, struct msr_data *msr_info);
@@ -225,7 +270,7 @@ void kvm_pmu_init(struct kvm_vcpu *vcpu);
 void kvm_pmu_cleanup(struct kvm_vcpu *vcpu);
 void kvm_pmu_destroy(struct kvm_vcpu *vcpu);
 int kvm_vm_ioctl_set_pmu_event_filter(struct kvm *kvm, void __user *argp);
-void kvm_pmu_trigger_event(struct kvm_vcpu *vcpu, u64 perf_hw_id);
+void kvm_pmu_trigger_event(struct kvm_vcpu *vcpu, u64 eventsel);
 
 bool is_vmware_backdoor_pmc(u32 pmc_idx);
 
index dc3d95fdca7d337ef4305123b8f439d2103c8b30..d06d43d8d2aa462e4f789b5ae31c4d6d36031cb3 100644 (file)
@@ -184,7 +184,6 @@ static void enter_smm_save_state_32(struct kvm_vcpu *vcpu,
                                    struct kvm_smram_state_32 *smram)
 {
        struct desc_ptr dt;
-       unsigned long val;
        int i;
 
        smram->cr0     = kvm_read_cr0(vcpu);
@@ -195,10 +194,8 @@ static void enter_smm_save_state_32(struct kvm_vcpu *vcpu,
        for (i = 0; i < 8; i++)
                smram->gprs[i] = kvm_register_read_raw(vcpu, i);
 
-       kvm_get_dr(vcpu, 6, &val);
-       smram->dr6     = (u32)val;
-       kvm_get_dr(vcpu, 7, &val);
-       smram->dr7     = (u32)val;
+       smram->dr6     = (u32)vcpu->arch.dr6;
+       smram->dr7     = (u32)vcpu->arch.dr7;
 
        enter_smm_save_seg_32(vcpu, &smram->tr, &smram->tr_sel, VCPU_SREG_TR);
        enter_smm_save_seg_32(vcpu, &smram->ldtr, &smram->ldtr_sel, VCPU_SREG_LDTR);
@@ -231,7 +228,6 @@ static void enter_smm_save_state_64(struct kvm_vcpu *vcpu,
                                    struct kvm_smram_state_64 *smram)
 {
        struct desc_ptr dt;
-       unsigned long val;
        int i;
 
        for (i = 0; i < 16; i++)
@@ -240,11 +236,8 @@ static void enter_smm_save_state_64(struct kvm_vcpu *vcpu,
        smram->rip    = kvm_rip_read(vcpu);
        smram->rflags = kvm_get_rflags(vcpu);
 
-
-       kvm_get_dr(vcpu, 6, &val);
-       smram->dr6 = val;
-       kvm_get_dr(vcpu, 7, &val);
-       smram->dr7 = val;
+       smram->dr6 = vcpu->arch.dr6;
+       smram->dr7 = vcpu->arch.dr7;
 
        smram->cr0 = kvm_read_cr0(vcpu);
        smram->cr3 = kvm_read_cr3(vcpu);
index b6a7ad4d69145096d55e610ef8d789b87c2a5fb0..dfcc38bd97d34f4c618bc88f202cc4dd627f00ea 100644 (file)
@@ -25,7 +25,7 @@ enum pmu_type {
        PMU_TYPE_EVNTSEL,
 };
 
-static struct kvm_pmc *amd_pmc_idx_to_pmc(struct kvm_pmu *pmu, int pmc_idx)
+static struct kvm_pmc *amd_pmu_get_pmc(struct kvm_pmu *pmu, int pmc_idx)
 {
        unsigned int num_counters = pmu->nr_arch_gp_counters;
 
@@ -70,28 +70,24 @@ static inline struct kvm_pmc *get_gp_pmc_amd(struct kvm_pmu *pmu, u32 msr,
                return NULL;
        }
 
-       return amd_pmc_idx_to_pmc(pmu, idx);
+       return amd_pmu_get_pmc(pmu, idx);
 }
 
-static bool amd_hw_event_available(struct kvm_pmc *pmc)
-{
-       return true;
-}
-
-static bool amd_is_valid_rdpmc_ecx(struct kvm_vcpu *vcpu, unsigned int idx)
+static int amd_check_rdpmc_early(struct kvm_vcpu *vcpu, unsigned int idx)
 {
        struct kvm_pmu *pmu = vcpu_to_pmu(vcpu);
 
-       idx &= ~(3u << 30);
+       if (idx >= pmu->nr_arch_gp_counters)
+               return -EINVAL;
 
-       return idx < pmu->nr_arch_gp_counters;
+       return 0;
 }
 
 /* idx is the ECX register of RDPMC instruction */
 static struct kvm_pmc *amd_rdpmc_ecx_to_pmc(struct kvm_vcpu *vcpu,
        unsigned int idx, u64 *mask)
 {
-       return amd_pmc_idx_to_pmc(vcpu_to_pmu(vcpu), idx & ~(3u << 30));
+       return amd_pmu_get_pmc(vcpu_to_pmu(vcpu), idx);
 }
 
 static struct kvm_pmc *amd_msr_idx_to_pmc(struct kvm_vcpu *vcpu, u32 msr)
@@ -233,11 +229,9 @@ static void amd_pmu_init(struct kvm_vcpu *vcpu)
 }
 
 struct kvm_pmu_ops amd_pmu_ops __initdata = {
-       .hw_event_available = amd_hw_event_available,
-       .pmc_idx_to_pmc = amd_pmc_idx_to_pmc,
        .rdpmc_ecx_to_pmc = amd_rdpmc_ecx_to_pmc,
        .msr_idx_to_pmc = amd_msr_idx_to_pmc,
-       .is_valid_rdpmc_ecx = amd_is_valid_rdpmc_ecx,
+       .check_rdpmc_early = amd_check_rdpmc_early,
        .is_valid_msr = amd_is_valid_msr,
        .get_msr = amd_pmu_get_msr,
        .set_msr = amd_pmu_set_msr,
index 272d5ed37ce77c25a2011a7c876640783049d510..d1a9f9951635819c7a585882f06bcf6415ac2cdc 100644 (file)
@@ -2735,7 +2735,6 @@ static int dr_interception(struct kvm_vcpu *vcpu)
 {
        struct vcpu_svm *svm = to_svm(vcpu);
        int reg, dr;
-       unsigned long val;
        int err = 0;
 
        /*
@@ -2763,11 +2762,9 @@ static int dr_interception(struct kvm_vcpu *vcpu)
        dr = svm->vmcb->control.exit_code - SVM_EXIT_READ_DR0;
        if (dr >= 16) { /* mov to DRn  */
                dr -= 16;
-               val = kvm_register_read(vcpu, reg);
-               err = kvm_set_dr(vcpu, dr, val);
+               err = kvm_set_dr(vcpu, dr, kvm_register_read(vcpu, reg));
        } else {
-               kvm_get_dr(vcpu, dr, &val);
-               kvm_register_write(vcpu, reg, val);
+               kvm_register_write(vcpu, reg, kvm_get_dr(vcpu, dr));
        }
 
        return kvm_complete_insn_gp(vcpu, err);
@@ -4092,6 +4089,9 @@ static int svm_vcpu_pre_run(struct kvm_vcpu *vcpu)
 
 static fastpath_t svm_exit_handlers_fastpath(struct kvm_vcpu *vcpu)
 {
+       if (is_guest_mode(vcpu))
+               return EXIT_FASTPATH_NONE;
+
        if (to_svm(vcpu)->vmcb->control.exit_code == SVM_EXIT_MSR &&
            to_svm(vcpu)->vmcb->control.exit_info_1)
                return handle_fastpath_set_msr_irqoff(vcpu);
@@ -4115,12 +4115,13 @@ static noinstr void svm_vcpu_enter_exit(struct kvm_vcpu *vcpu, bool spec_ctrl_in
        guest_state_exit_irqoff();
 }
 
-static __no_kcsan fastpath_t svm_vcpu_run(struct kvm_vcpu *vcpu)
+static __no_kcsan fastpath_t svm_vcpu_run(struct kvm_vcpu *vcpu,
+                                         bool force_immediate_exit)
 {
        struct vcpu_svm *svm = to_svm(vcpu);
        bool spec_ctrl_intercepted = msr_write_intercepted(vcpu, MSR_IA32_SPEC_CTRL);
 
-       trace_kvm_entry(vcpu);
+       trace_kvm_entry(vcpu, force_immediate_exit);
 
        svm->vmcb->save.rax = vcpu->arch.regs[VCPU_REGS_RAX];
        svm->vmcb->save.rsp = vcpu->arch.regs[VCPU_REGS_RSP];
@@ -4139,9 +4140,12 @@ static __no_kcsan fastpath_t svm_vcpu_run(struct kvm_vcpu *vcpu)
                 * is enough to force an immediate vmexit.
                 */
                disable_nmi_singlestep(svm);
-               smp_send_reschedule(vcpu->cpu);
+               force_immediate_exit = true;
        }
 
+       if (force_immediate_exit)
+               smp_send_reschedule(vcpu->cpu);
+
        pre_svm_run(vcpu);
 
        sync_lapic_to_cr8(vcpu);
@@ -4237,9 +4241,6 @@ static __no_kcsan fastpath_t svm_vcpu_run(struct kvm_vcpu *vcpu)
 
        svm_complete_interrupts(vcpu);
 
-       if (is_guest_mode(vcpu))
-               return EXIT_FASTPATH_NONE;
-
        return svm_exit_handlers_fastpath(vcpu);
 }
 
@@ -5007,8 +5008,6 @@ static struct kvm_x86_ops svm_x86_ops __initdata = {
        .check_intercept = svm_check_intercept,
        .handle_exit_irqoff = svm_handle_exit_irqoff,
 
-       .request_immediate_exit = __kvm_request_immediate_exit,
-
        .sched_in = svm_sched_in,
 
        .nested_ops = &svm_nested_ops,
index 83843379813ee3ef8cca33d1986ef61ea6e1ff9b..88659de4d2a7141a6eff8adb28054c8cb0a6c3c2 100644 (file)
  * Tracepoint for guest mode entry.
  */
 TRACE_EVENT(kvm_entry,
-       TP_PROTO(struct kvm_vcpu *vcpu),
-       TP_ARGS(vcpu),
+       TP_PROTO(struct kvm_vcpu *vcpu, bool force_immediate_exit),
+       TP_ARGS(vcpu, force_immediate_exit),
 
        TP_STRUCT__entry(
                __field(        unsigned int,   vcpu_id         )
                __field(        unsigned long,  rip             )
+               __field(        bool,           immediate_exit  )
        ),
 
        TP_fast_assign(
                __entry->vcpu_id        = vcpu->vcpu_id;
                __entry->rip            = kvm_rip_read(vcpu);
+               __entry->immediate_exit = force_immediate_exit;
        ),
 
-       TP_printk("vcpu %u, rip 0x%lx", __entry->vcpu_id, __entry->rip)
+       TP_printk("vcpu %u, rip 0x%lx%s", __entry->vcpu_id, __entry->rip,
+                 __entry->immediate_exit ? "[immediate exit]" : "")
 );
 
 /*
index 6329a306856b28972ca32af5f708bb9408c60896..d05ddf7514915c479bffbd6e40514b2b91db93ed 100644 (file)
@@ -3606,7 +3606,7 @@ static int nested_vmx_run(struct kvm_vcpu *vcpu, bool launch)
                return 1;
        }
 
-       kvm_pmu_trigger_event(vcpu, PERF_COUNT_HW_BRANCH_INSTRUCTIONS);
+       kvm_pmu_trigger_event(vcpu, kvm_pmu_eventsel.BRANCH_INSTRUCTIONS_RETIRED);
 
        if (CC(evmptrld_status == EVMPTRLD_VMFAIL))
                return nested_vmx_failInvalid(vcpu);
@@ -4433,7 +4433,7 @@ static void sync_vmcs02_to_vmcs12(struct kvm_vcpu *vcpu, struct vmcs12 *vmcs12)
                (vm_entry_controls_get(to_vmx(vcpu)) & VM_ENTRY_IA32E_MODE);
 
        if (vmcs12->vm_exit_controls & VM_EXIT_SAVE_DEBUG_CONTROLS)
-               kvm_get_dr(vcpu, 7, (unsigned long *)&vmcs12->guest_dr7);
+               vmcs12->guest_dr7 = vcpu->arch.dr7;
 
        if (vmcs12->vm_exit_controls & VM_EXIT_SAVE_IA32_EFER)
                vmcs12->guest_ia32_efer = vcpu->arch.efer;
index 315c7c2ba89b13437fe4c3cbb93d92f75bd8f3f1..12ade343a17ed5c7aaa2efc1ebf3b3b40046d907 100644 (file)
 #include "nested.h"
 #include "pmu.h"
 
-#define MSR_PMC_FULL_WIDTH_BIT      (MSR_IA32_PMC0 - MSR_IA32_PERFCTR0)
-
-enum intel_pmu_architectural_events {
-       /*
-        * The order of the architectural events matters as support for each
-        * event is enumerated via CPUID using the index of the event.
-        */
-       INTEL_ARCH_CPU_CYCLES,
-       INTEL_ARCH_INSTRUCTIONS_RETIRED,
-       INTEL_ARCH_REFERENCE_CYCLES,
-       INTEL_ARCH_LLC_REFERENCES,
-       INTEL_ARCH_LLC_MISSES,
-       INTEL_ARCH_BRANCHES_RETIRED,
-       INTEL_ARCH_BRANCHES_MISPREDICTED,
-
-       NR_REAL_INTEL_ARCH_EVENTS,
-
-       /*
-        * Pseudo-architectural event used to implement IA32_FIXED_CTR2, a.k.a.
-        * TSC reference cycles.  The architectural reference cycles event may
-        * or may not actually use the TSC as the reference, e.g. might use the
-        * core crystal clock or the bus clock (yeah, "architectural").
-        */
-       PSEUDO_ARCH_REFERENCE_CYCLES = NR_REAL_INTEL_ARCH_EVENTS,
-       NR_INTEL_ARCH_EVENTS,
-};
+/*
+ * Perf's "BASE" is wildly misleading, architectural PMUs use bits 31:16 of ECX
+ * to encode the "type" of counter to read, i.e. this is not a "base".  And to
+ * further confuse things, non-architectural PMUs use bit 31 as a flag for
+ * "fast" reads, whereas the "type" is an explicit value.
+ */
+#define INTEL_RDPMC_GP         0
+#define INTEL_RDPMC_FIXED      INTEL_PMC_FIXED_RDPMC_BASE
 
-static struct {
-       u8 eventsel;
-       u8 unit_mask;
-} const intel_arch_events[] = {
-       [INTEL_ARCH_CPU_CYCLES]                 = { 0x3c, 0x00 },
-       [INTEL_ARCH_INSTRUCTIONS_RETIRED]       = { 0xc0, 0x00 },
-       [INTEL_ARCH_REFERENCE_CYCLES]           = { 0x3c, 0x01 },
-       [INTEL_ARCH_LLC_REFERENCES]             = { 0x2e, 0x4f },
-       [INTEL_ARCH_LLC_MISSES]                 = { 0x2e, 0x41 },
-       [INTEL_ARCH_BRANCHES_RETIRED]           = { 0xc4, 0x00 },
-       [INTEL_ARCH_BRANCHES_MISPREDICTED]      = { 0xc5, 0x00 },
-       [PSEUDO_ARCH_REFERENCE_CYCLES]          = { 0x00, 0x03 },
-};
+#define INTEL_RDPMC_TYPE_MASK  GENMASK(31, 16)
+#define INTEL_RDPMC_INDEX_MASK GENMASK(15, 0)
 
-/* mapping between fixed pmc index and intel_arch_events array */
-static int fixed_pmc_events[] = {
-       [0] = INTEL_ARCH_INSTRUCTIONS_RETIRED,
-       [1] = INTEL_ARCH_CPU_CYCLES,
-       [2] = PSEUDO_ARCH_REFERENCE_CYCLES,
-};
+#define MSR_PMC_FULL_WIDTH_BIT      (MSR_IA32_PMC0 - MSR_IA32_PERFCTR0)
 
 static void reprogram_fixed_counters(struct kvm_pmu *pmu, u64 data)
 {
@@ -84,77 +50,61 @@ static void reprogram_fixed_counters(struct kvm_pmu *pmu, u64 data)
 
                pmc = get_fixed_pmc(pmu, MSR_CORE_PERF_FIXED_CTR0 + i);
 
-               __set_bit(INTEL_PMC_IDX_FIXED + i, pmu->pmc_in_use);
+               __set_bit(KVM_FIXED_PMC_BASE_IDX + i, pmu->pmc_in_use);
                kvm_pmu_request_counter_reprogram(pmc);
        }
 }
 
-static struct kvm_pmc *intel_pmc_idx_to_pmc(struct kvm_pmu *pmu, int pmc_idx)
-{
-       if (pmc_idx < INTEL_PMC_IDX_FIXED) {
-               return get_gp_pmc(pmu, MSR_P6_EVNTSEL0 + pmc_idx,
-                                 MSR_P6_EVNTSEL0);
-       } else {
-               u32 idx = pmc_idx - INTEL_PMC_IDX_FIXED;
-
-               return get_fixed_pmc(pmu, idx + MSR_CORE_PERF_FIXED_CTR0);
-       }
-}
-
-static bool intel_hw_event_available(struct kvm_pmc *pmc)
-{
-       struct kvm_pmu *pmu = pmc_to_pmu(pmc);
-       u8 event_select = pmc->eventsel & ARCH_PERFMON_EVENTSEL_EVENT;
-       u8 unit_mask = (pmc->eventsel & ARCH_PERFMON_EVENTSEL_UMASK) >> 8;
-       int i;
-
-       BUILD_BUG_ON(ARRAY_SIZE(intel_arch_events) != NR_INTEL_ARCH_EVENTS);
-
-       /*
-        * Disallow events reported as unavailable in guest CPUID.  Note, this
-        * doesn't apply to pseudo-architectural events.
-        */
-       for (i = 0; i < NR_REAL_INTEL_ARCH_EVENTS; i++) {
-               if (intel_arch_events[i].eventsel != event_select ||
-                   intel_arch_events[i].unit_mask != unit_mask)
-                       continue;
-
-               return pmu->available_event_types & BIT(i);
-       }
-
-       return true;
-}
-
-static bool intel_is_valid_rdpmc_ecx(struct kvm_vcpu *vcpu, unsigned int idx)
-{
-       struct kvm_pmu *pmu = vcpu_to_pmu(vcpu);
-       bool fixed = idx & (1u << 30);
-
-       idx &= ~(3u << 30);
-
-       return fixed ? idx < pmu->nr_arch_fixed_counters
-                    : idx < pmu->nr_arch_gp_counters;
-}
-
 static struct kvm_pmc *intel_rdpmc_ecx_to_pmc(struct kvm_vcpu *vcpu,
                                            unsigned int idx, u64 *mask)
 {
+       unsigned int type = idx & INTEL_RDPMC_TYPE_MASK;
        struct kvm_pmu *pmu = vcpu_to_pmu(vcpu);
-       bool fixed = idx & (1u << 30);
        struct kvm_pmc *counters;
        unsigned int num_counters;
+       u64 bitmask;
 
-       idx &= ~(3u << 30);
-       if (fixed) {
+       /*
+        * The encoding of ECX for RDPMC is different for architectural versus
+        * non-architecturals PMUs (PMUs with version '0').  For architectural
+        * PMUs, bits 31:16 specify the PMC type and bits 15:0 specify the PMC
+        * index.  For non-architectural PMUs, bit 31 is a "fast" flag, and
+        * bits 30:0 specify the PMC index.
+        *
+        * Yell and reject attempts to read PMCs for a non-architectural PMU,
+        * as KVM doesn't support such PMUs.
+        */
+       if (WARN_ON_ONCE(!pmu->version))
+               return NULL;
+
+       /*
+        * General Purpose (GP) PMCs are supported on all PMUs, and fixed PMCs
+        * are supported on all architectural PMUs, i.e. on all virtual PMUs
+        * supported by KVM.  Note, KVM only emulates fixed PMCs for PMU v2+,
+        * but the type itself is still valid, i.e. let RDPMC fail due to
+        * accessing a non-existent counter.  Reject attempts to read all other
+        * types, which are unknown/unsupported.
+        */
+       switch (type) {
+       case INTEL_RDPMC_FIXED:
                counters = pmu->fixed_counters;
                num_counters = pmu->nr_arch_fixed_counters;
-       } else {
+               bitmask = pmu->counter_bitmask[KVM_PMC_FIXED];
+               break;
+       case INTEL_RDPMC_GP:
                counters = pmu->gp_counters;
                num_counters = pmu->nr_arch_gp_counters;
+               bitmask = pmu->counter_bitmask[KVM_PMC_GP];
+               break;
+       default:
+               return NULL;
        }
+
+       idx &= INTEL_RDPMC_INDEX_MASK;
        if (idx >= num_counters)
                return NULL;
-       *mask &= pmu->counter_bitmask[fixed ? KVM_PMC_FIXED : KVM_PMC_GP];
+
+       *mask &= bitmask;
        return &counters[array_index_nospec(idx, num_counters)];
 }
 
@@ -464,20 +414,38 @@ static int intel_pmu_set_msr(struct kvm_vcpu *vcpu, struct msr_data *msr_info)
        return 0;
 }
 
-static void setup_fixed_pmc_eventsel(struct kvm_pmu *pmu)
+/*
+ * Map fixed counter events to architectural general purpose event encodings.
+ * Perf doesn't provide APIs to allow KVM to directly program a fixed counter,
+ * and so KVM instead programs the architectural event to effectively request
+ * the fixed counter.  Perf isn't guaranteed to use a fixed counter and may
+ * instead program the encoding into a general purpose counter, e.g. if a
+ * different perf_event is already utilizing the requested counter, but the end
+ * result is the same (ignoring the fact that using a general purpose counter
+ * will likely exacerbate counter contention).
+ *
+ * Forcibly inlined to allow asserting on @index at build time, and there should
+ * never be more than one user.
+ */
+static __always_inline u64 intel_get_fixed_pmc_eventsel(unsigned int index)
 {
-       int i;
-
-       BUILD_BUG_ON(ARRAY_SIZE(fixed_pmc_events) != KVM_PMC_MAX_FIXED);
+       const enum perf_hw_id fixed_pmc_perf_ids[] = {
+               [0] = PERF_COUNT_HW_INSTRUCTIONS,
+               [1] = PERF_COUNT_HW_CPU_CYCLES,
+               [2] = PERF_COUNT_HW_REF_CPU_CYCLES,
+       };
+       u64 eventsel;
 
-       for (i = 0; i < pmu->nr_arch_fixed_counters; i++) {
-               int index = array_index_nospec(i, KVM_PMC_MAX_FIXED);
-               struct kvm_pmc *pmc = &pmu->fixed_counters[index];
-               u32 event = fixed_pmc_events[index];
+       BUILD_BUG_ON(ARRAY_SIZE(fixed_pmc_perf_ids) != KVM_PMC_MAX_FIXED);
+       BUILD_BUG_ON(index >= KVM_PMC_MAX_FIXED);
 
-               pmc->eventsel = (intel_arch_events[event].unit_mask << 8) |
-                                intel_arch_events[event].eventsel;
-       }
+       /*
+        * Yell if perf reports support for a fixed counter but perf doesn't
+        * have a known encoding for the associated general purpose event.
+        */
+       eventsel = perf_get_hw_event_config(fixed_pmc_perf_ids[index]);
+       WARN_ON_ONCE(!eventsel && index < kvm_pmu_cap.num_counters_fixed);
+       return eventsel;
 }
 
 static void intel_pmu_refresh(struct kvm_vcpu *vcpu)
@@ -491,19 +459,6 @@ static void intel_pmu_refresh(struct kvm_vcpu *vcpu)
        u64 counter_mask;
        int i;
 
-       pmu->nr_arch_gp_counters = 0;
-       pmu->nr_arch_fixed_counters = 0;
-       pmu->counter_bitmask[KVM_PMC_GP] = 0;
-       pmu->counter_bitmask[KVM_PMC_FIXED] = 0;
-       pmu->version = 0;
-       pmu->reserved_bits = 0xffffffff00200000ull;
-       pmu->raw_event_mask = X86_RAW_EVENT_MASK;
-       pmu->global_ctrl_mask = ~0ull;
-       pmu->global_status_mask = ~0ull;
-       pmu->fixed_ctr_ctrl_mask = ~0ull;
-       pmu->pebs_enable_mask = ~0ull;
-       pmu->pebs_data_cfg_mask = ~0ull;
-
        memset(&lbr_desc->records, 0, sizeof(lbr_desc->records));
 
        /*
@@ -515,8 +470,9 @@ static void intel_pmu_refresh(struct kvm_vcpu *vcpu)
                return;
 
        entry = kvm_find_cpuid_entry(vcpu, 0xa);
-       if (!entry || !vcpu->kvm->arch.enable_pmu)
+       if (!entry)
                return;
+
        eax.full = entry->eax;
        edx.full = entry->edx;
 
@@ -543,13 +499,12 @@ static void intel_pmu_refresh(struct kvm_vcpu *vcpu)
                                                  kvm_pmu_cap.bit_width_fixed);
                pmu->counter_bitmask[KVM_PMC_FIXED] =
                        ((u64)1 << edx.split.bit_width_fixed) - 1;
-               setup_fixed_pmc_eventsel(pmu);
        }
 
        for (i = 0; i < pmu->nr_arch_fixed_counters; i++)
                pmu->fixed_ctr_ctrl_mask &= ~(0xbull << (i * 4));
        counter_mask = ~(((1ull << pmu->nr_arch_gp_counters) - 1) |
-               (((1ull << pmu->nr_arch_fixed_counters) - 1) << INTEL_PMC_IDX_FIXED));
+               (((1ull << pmu->nr_arch_fixed_counters) - 1) << KVM_FIXED_PMC_BASE_IDX));
        pmu->global_ctrl_mask = counter_mask;
 
        /*
@@ -593,7 +548,7 @@ static void intel_pmu_refresh(struct kvm_vcpu *vcpu)
                        pmu->reserved_bits &= ~ICL_EVENTSEL_ADAPTIVE;
                        for (i = 0; i < pmu->nr_arch_fixed_counters; i++) {
                                pmu->fixed_ctr_ctrl_mask &=
-                                       ~(1ULL << (INTEL_PMC_IDX_FIXED + i * 4));
+                                       ~(1ULL << (KVM_FIXED_PMC_BASE_IDX + i * 4));
                        }
                        pmu->pebs_data_cfg_mask = ~0xff00000full;
                } else {
@@ -619,8 +574,9 @@ static void intel_pmu_init(struct kvm_vcpu *vcpu)
        for (i = 0; i < KVM_PMC_MAX_FIXED; i++) {
                pmu->fixed_counters[i].type = KVM_PMC_FIXED;
                pmu->fixed_counters[i].vcpu = vcpu;
-               pmu->fixed_counters[i].idx = i + INTEL_PMC_IDX_FIXED;
+               pmu->fixed_counters[i].idx = i + KVM_FIXED_PMC_BASE_IDX;
                pmu->fixed_counters[i].current_config = 0;
+               pmu->fixed_counters[i].eventsel = intel_get_fixed_pmc_eventsel(i);
        }
 
        lbr_desc->records.nr = 0;
@@ -748,11 +704,8 @@ void intel_pmu_cross_mapped_check(struct kvm_pmu *pmu)
        struct kvm_pmc *pmc = NULL;
        int bit, hw_idx;
 
-       for_each_set_bit(bit, (unsigned long *)&pmu->global_ctrl,
-                        X86_PMC_IDX_MAX) {
-               pmc = intel_pmc_idx_to_pmc(pmu, bit);
-
-               if (!pmc || !pmc_speculative_in_use(pmc) ||
+       kvm_for_each_pmc(pmu, pmc, bit, (unsigned long *)&pmu->global_ctrl) {
+               if (!pmc_speculative_in_use(pmc) ||
                    !pmc_is_globally_enabled(pmc) || !pmc->perf_event)
                        continue;
 
@@ -767,11 +720,8 @@ void intel_pmu_cross_mapped_check(struct kvm_pmu *pmu)
 }
 
 struct kvm_pmu_ops intel_pmu_ops __initdata = {
-       .hw_event_available = intel_hw_event_available,
-       .pmc_idx_to_pmc = intel_pmc_idx_to_pmc,
        .rdpmc_ecx_to_pmc = intel_rdpmc_ecx_to_pmc,
        .msr_idx_to_pmc = intel_msr_idx_to_pmc,
-       .is_valid_rdpmc_ecx = intel_is_valid_rdpmc_ecx,
        .is_valid_msr = intel_is_valid_msr,
        .get_msr = intel_pmu_get_msr,
        .set_msr = intel_pmu_set_msr,
index 305237dcba88f975809da40e7173594491128ccd..c37a89eda90f8219de02c96452a5cdbe5d13da83 100644 (file)
@@ -50,6 +50,8 @@
 #include <asm/spec-ctrl.h>
 #include <asm/vmx.h>
 
+#include <trace/events/ipi.h>
+
 #include "capabilities.h"
 #include "cpuid.h"
 #include "hyperv.h"
@@ -160,7 +162,7 @@ module_param(allow_smaller_maxphyaddr, bool, S_IRUGO);
 
 /*
  * List of MSRs that can be directly passed to the guest.
- * In addition to these x2apic and PT MSRs are handled specially.
+ * In addition to these x2apic, PT and LBR MSRs are handled specially.
  */
 static u32 vmx_possible_passthrough_msrs[MAX_POSSIBLE_PASSTHROUGH_MSRS] = {
        MSR_IA32_SPEC_CTRL,
@@ -668,25 +670,14 @@ static inline bool cpu_need_virtualize_apic_accesses(struct kvm_vcpu *vcpu)
        return flexpriority_enabled && lapic_in_kernel(vcpu);
 }
 
-static int possible_passthrough_msr_slot(u32 msr)
-{
-       u32 i;
-
-       for (i = 0; i < ARRAY_SIZE(vmx_possible_passthrough_msrs); i++)
-               if (vmx_possible_passthrough_msrs[i] == msr)
-                       return i;
-
-       return -ENOENT;
-}
-
-static bool is_valid_passthrough_msr(u32 msr)
+static int vmx_get_passthrough_msr_slot(u32 msr)
 {
-       bool r;
+       int i;
 
        switch (msr) {
        case 0x800 ... 0x8ff:
                /* x2APIC MSRs. These are handled in vmx_update_msr_bitmap_x2apic() */
-               return true;
+               return -ENOENT;
        case MSR_IA32_RTIT_STATUS:
        case MSR_IA32_RTIT_OUTPUT_BASE:
        case MSR_IA32_RTIT_OUTPUT_MASK:
@@ -701,14 +692,16 @@ static bool is_valid_passthrough_msr(u32 msr)
        case MSR_LBR_CORE_FROM ... MSR_LBR_CORE_FROM + 8:
        case MSR_LBR_CORE_TO ... MSR_LBR_CORE_TO + 8:
                /* LBR MSRs. These are handled in vmx_update_intercept_for_lbr_msrs() */
-               return true;
+               return -ENOENT;
        }
 
-       r = possible_passthrough_msr_slot(msr) != -ENOENT;
-
-       WARN(!r, "Invalid MSR %x, please adapt vmx_possible_passthrough_msrs[]", msr);
+       for (i = 0; i < ARRAY_SIZE(vmx_possible_passthrough_msrs); i++) {
+               if (vmx_possible_passthrough_msrs[i] == msr)
+                       return i;
+       }
 
-       return r;
+       WARN(1, "Invalid MSR %x, please adapt vmx_possible_passthrough_msrs[]", msr);
+       return -ENOENT;
 }
 
 struct vmx_uret_msr *vmx_find_uret_msr(struct vcpu_vmx *vmx, u32 msr)
@@ -1291,8 +1284,6 @@ void vmx_prepare_switch_to_guest(struct kvm_vcpu *vcpu)
        u16 fs_sel, gs_sel;
        int i;
 
-       vmx->req_immediate_exit = false;
-
        /*
         * Note that guest MSRs to be saved/restored can also be changed
         * when guest state is loaded. This happens when guest transitions
@@ -3964,6 +3955,7 @@ void vmx_disable_intercept_for_msr(struct kvm_vcpu *vcpu, u32 msr, int type)
 {
        struct vcpu_vmx *vmx = to_vmx(vcpu);
        unsigned long *msr_bitmap = vmx->vmcs01.msr_bitmap;
+       int idx;
 
        if (!cpu_has_vmx_msr_bitmap())
                return;
@@ -3973,16 +3965,13 @@ void vmx_disable_intercept_for_msr(struct kvm_vcpu *vcpu, u32 msr, int type)
        /*
         * Mark the desired intercept state in shadow bitmap, this is needed
         * for resync when the MSR filters change.
-       */
-       if (is_valid_passthrough_msr(msr)) {
-               int idx = possible_passthrough_msr_slot(msr);
-
-               if (idx != -ENOENT) {
-                       if (type & MSR_TYPE_R)
-                               clear_bit(idx, vmx->shadow_msr_intercept.read);
-                       if (type & MSR_TYPE_W)
-                               clear_bit(idx, vmx->shadow_msr_intercept.write);
-               }
+        */
+       idx = vmx_get_passthrough_msr_slot(msr);
+       if (idx >= 0) {
+               if (type & MSR_TYPE_R)
+                       clear_bit(idx, vmx->shadow_msr_intercept.read);
+               if (type & MSR_TYPE_W)
+                       clear_bit(idx, vmx->shadow_msr_intercept.write);
        }
 
        if ((type & MSR_TYPE_R) &&
@@ -4008,6 +3997,7 @@ void vmx_enable_intercept_for_msr(struct kvm_vcpu *vcpu, u32 msr, int type)
 {
        struct vcpu_vmx *vmx = to_vmx(vcpu);
        unsigned long *msr_bitmap = vmx->vmcs01.msr_bitmap;
+       int idx;
 
        if (!cpu_has_vmx_msr_bitmap())
                return;
@@ -4017,16 +4007,13 @@ void vmx_enable_intercept_for_msr(struct kvm_vcpu *vcpu, u32 msr, int type)
        /*
         * Mark the desired intercept state in shadow bitmap, this is needed
         * for resync when the MSR filter changes.
-       */
-       if (is_valid_passthrough_msr(msr)) {
-               int idx = possible_passthrough_msr_slot(msr);
-
-               if (idx != -ENOENT) {
-                       if (type & MSR_TYPE_R)
-                               set_bit(idx, vmx->shadow_msr_intercept.read);
-                       if (type & MSR_TYPE_W)
-                               set_bit(idx, vmx->shadow_msr_intercept.write);
-               }
+        */
+       idx = vmx_get_passthrough_msr_slot(msr);
+       if (idx >= 0) {
+               if (type & MSR_TYPE_R)
+                       set_bit(idx, vmx->shadow_msr_intercept.read);
+               if (type & MSR_TYPE_W)
+                       set_bit(idx, vmx->shadow_msr_intercept.write);
        }
 
        if (type & MSR_TYPE_R)
@@ -4137,6 +4124,9 @@ static void vmx_msr_filter_changed(struct kvm_vcpu *vcpu)
        struct vcpu_vmx *vmx = to_vmx(vcpu);
        u32 i;
 
+       if (!cpu_has_vmx_msr_bitmap())
+               return;
+
        /*
         * Redo intercept permissions for MSRs that KVM is passing through to
         * the guest.  Disabling interception will check the new MSR filter and
@@ -5576,10 +5566,7 @@ static int handle_dr(struct kvm_vcpu *vcpu)
 
        reg = DEBUG_REG_ACCESS_REG(exit_qualification);
        if (exit_qualification & TYPE_MOV_FROM_DR) {
-               unsigned long val;
-
-               kvm_get_dr(vcpu, dr, &val);
-               kvm_register_write(vcpu, reg, val);
+               kvm_register_write(vcpu, reg, kvm_get_dr(vcpu, dr));
                err = 0;
        } else {
                err = kvm_set_dr(vcpu, dr, kvm_register_read(vcpu, reg));
@@ -6001,22 +5988,46 @@ static int handle_pml_full(struct kvm_vcpu *vcpu)
        return 1;
 }
 
-static fastpath_t handle_fastpath_preemption_timer(struct kvm_vcpu *vcpu)
+static fastpath_t handle_fastpath_preemption_timer(struct kvm_vcpu *vcpu,
+                                                  bool force_immediate_exit)
 {
        struct vcpu_vmx *vmx = to_vmx(vcpu);
 
-       if (!vmx->req_immediate_exit &&
-           !unlikely(vmx->loaded_vmcs->hv_timer_soft_disabled)) {
-               kvm_lapic_expired_hv_timer(vcpu);
+       /*
+        * In the *extremely* unlikely scenario that this is a spurious VM-Exit
+        * due to the timer expiring while it was "soft" disabled, just eat the
+        * exit and re-enter the guest.
+        */
+       if (unlikely(vmx->loaded_vmcs->hv_timer_soft_disabled))
                return EXIT_FASTPATH_REENTER_GUEST;
-       }
 
-       return EXIT_FASTPATH_NONE;
+       /*
+        * If the timer expired because KVM used it to force an immediate exit,
+        * then mission accomplished.
+        */
+       if (force_immediate_exit)
+               return EXIT_FASTPATH_EXIT_HANDLED;
+
+       /*
+        * If L2 is active, go down the slow path as emulating the guest timer
+        * expiration likely requires synthesizing a nested VM-Exit.
+        */
+       if (is_guest_mode(vcpu))
+               return EXIT_FASTPATH_NONE;
+
+       kvm_lapic_expired_hv_timer(vcpu);
+       return EXIT_FASTPATH_REENTER_GUEST;
 }
 
 static int handle_preemption_timer(struct kvm_vcpu *vcpu)
 {
-       handle_fastpath_preemption_timer(vcpu);
+       /*
+        * This non-fastpath handler is reached if and only if the preemption
+        * timer was being used to emulate a guest timer while L2 is active.
+        * All other scenarios are supposed to be handled in the fastpath.
+        */
+       WARN_ON_ONCE(!is_guest_mode(vcpu));
+       kvm_lapic_expired_hv_timer(vcpu);
        return 1;
 }
 
@@ -6519,7 +6530,7 @@ static int __vmx_handle_exit(struct kvm_vcpu *vcpu, fastpath_t exit_fastpath)
                vcpu->run->internal.suberror = KVM_INTERNAL_ERROR_DELIVERY_EV;
                vcpu->run->internal.data[0] = vectoring_info;
                vcpu->run->internal.data[1] = exit_reason.full;
-               vcpu->run->internal.data[2] = vcpu->arch.exit_qualification;
+               vcpu->run->internal.data[2] = vmx_get_exit_qual(vcpu);
                if (exit_reason.basic == EXIT_REASON_EPT_MISCONFIG) {
                        vcpu->run->internal.data[ndata++] =
                                vmcs_read64(GUEST_PHYSICAL_ADDRESS);
@@ -7158,13 +7169,13 @@ static void atomic_switch_perf_msrs(struct vcpu_vmx *vmx)
                                        msrs[i].host, false);
 }
 
-static void vmx_update_hv_timer(struct kvm_vcpu *vcpu)
+static void vmx_update_hv_timer(struct kvm_vcpu *vcpu, bool force_immediate_exit)
 {
        struct vcpu_vmx *vmx = to_vmx(vcpu);
        u64 tscl;
        u32 delta_tsc;
 
-       if (vmx->req_immediate_exit) {
+       if (force_immediate_exit) {
                vmcs_write32(VMX_PREEMPTION_TIMER_VALUE, 0);
                vmx->loaded_vmcs->hv_timer_soft_disabled = false;
        } else if (vmx->hv_deadline_tsc != -1) {
@@ -7217,13 +7228,22 @@ void noinstr vmx_spec_ctrl_restore_host(struct vcpu_vmx *vmx,
        barrier_nospec();
 }
 
-static fastpath_t vmx_exit_handlers_fastpath(struct kvm_vcpu *vcpu)
+static fastpath_t vmx_exit_handlers_fastpath(struct kvm_vcpu *vcpu,
+                                            bool force_immediate_exit)
 {
+       /*
+        * If L2 is active, some VMX preemption timer exits can be handled in
+        * the fastpath even, all other exits must use the slow path.
+        */
+       if (is_guest_mode(vcpu) &&
+           to_vmx(vcpu)->exit_reason.basic != EXIT_REASON_PREEMPTION_TIMER)
+               return EXIT_FASTPATH_NONE;
+
        switch (to_vmx(vcpu)->exit_reason.basic) {
        case EXIT_REASON_MSR_WRITE:
                return handle_fastpath_set_msr_irqoff(vcpu);
        case EXIT_REASON_PREEMPTION_TIMER:
-               return handle_fastpath_preemption_timer(vcpu);
+               return handle_fastpath_preemption_timer(vcpu, force_immediate_exit);
        default:
                return EXIT_FASTPATH_NONE;
        }
@@ -7286,7 +7306,7 @@ out:
        guest_state_exit_irqoff();
 }
 
-static fastpath_t vmx_vcpu_run(struct kvm_vcpu *vcpu)
+static fastpath_t vmx_vcpu_run(struct kvm_vcpu *vcpu, bool force_immediate_exit)
 {
        struct vcpu_vmx *vmx = to_vmx(vcpu);
        unsigned long cr3, cr4;
@@ -7313,7 +7333,7 @@ static fastpath_t vmx_vcpu_run(struct kvm_vcpu *vcpu)
                return EXIT_FASTPATH_NONE;
        }
 
-       trace_kvm_entry(vcpu);
+       trace_kvm_entry(vcpu, force_immediate_exit);
 
        if (vmx->ple_window_dirty) {
                vmx->ple_window_dirty = false;
@@ -7372,7 +7392,9 @@ static fastpath_t vmx_vcpu_run(struct kvm_vcpu *vcpu)
                vmx_passthrough_lbr_msrs(vcpu);
 
        if (enable_preemption_timer)
-               vmx_update_hv_timer(vcpu);
+               vmx_update_hv_timer(vcpu, force_immediate_exit);
+       else if (force_immediate_exit)
+               smp_send_reschedule(vcpu->cpu);
 
        kvm_wait_lapic_expire(vcpu);
 
@@ -7436,10 +7458,7 @@ static fastpath_t vmx_vcpu_run(struct kvm_vcpu *vcpu)
        vmx_recover_nmi_blocking(vmx);
        vmx_complete_interrupts(vmx);
 
-       if (is_guest_mode(vcpu))
-               return EXIT_FASTPATH_NONE;
-
-       return vmx_exit_handlers_fastpath(vcpu);
+       return vmx_exit_handlers_fastpath(vcpu, force_immediate_exit);
 }
 
 static void vmx_vcpu_free(struct kvm_vcpu *vcpu)
@@ -7919,11 +7938,6 @@ static __init void vmx_set_cpu_caps(void)
                kvm_cpu_cap_check_and_set(X86_FEATURE_WAITPKG);
 }
 
-static void vmx_request_immediate_exit(struct kvm_vcpu *vcpu)
-{
-       to_vmx(vcpu)->req_immediate_exit = true;
-}
-
 static int vmx_check_intercept_io(struct kvm_vcpu *vcpu,
                                  struct x86_instruction_info *info)
 {
@@ -8376,8 +8390,6 @@ static struct kvm_x86_ops vmx_x86_ops __initdata = {
        .check_intercept = vmx_check_intercept,
        .handle_exit_irqoff = vmx_handle_exit_irqoff,
 
-       .request_immediate_exit = vmx_request_immediate_exit,
-
        .sched_in = vmx_sched_in,
 
        .cpu_dirty_log_size = PML_ENTITY_NUM,
@@ -8637,7 +8649,6 @@ static __init int hardware_setup(void)
        if (!enable_preemption_timer) {
                vmx_x86_ops.set_hv_timer = NULL;
                vmx_x86_ops.cancel_hv_timer = NULL;
-               vmx_x86_ops.request_immediate_exit = __kvm_request_immediate_exit;
        }
 
        kvm_caps.supported_mce_cap |= MCG_LMCE_P;
index e3b0985bb74a1f4d57be41cbb0d283abbc476625..65786dbe7d60bdf753db779312bb70754ccc6f1e 100644 (file)
@@ -332,8 +332,6 @@ struct vcpu_vmx {
        unsigned int ple_window;
        bool ple_window_dirty;
 
-       bool req_immediate_exit;
-
        /* Support for PML */
 #define PML_ENTITY_NUM         512
        struct page *pml_pg;
index ffe580169c93f078532e3ddf953a2f9cf3bde627..47d9f03b7778373393b9853fe32b153dadd9de29 100644 (file)
@@ -1399,22 +1399,19 @@ int kvm_set_dr(struct kvm_vcpu *vcpu, int dr, unsigned long val)
 }
 EXPORT_SYMBOL_GPL(kvm_set_dr);
 
-void kvm_get_dr(struct kvm_vcpu *vcpu, int dr, unsigned long *val)
+unsigned long kvm_get_dr(struct kvm_vcpu *vcpu, int dr)
 {
        size_t size = ARRAY_SIZE(vcpu->arch.db);
 
        switch (dr) {
        case 0 ... 3:
-               *val = vcpu->arch.db[array_index_nospec(dr, size)];
-               break;
+               return vcpu->arch.db[array_index_nospec(dr, size)];
        case 4:
        case 6:
-               *val = vcpu->arch.dr6;
-               break;
+               return vcpu->arch.dr6;
        case 5:
        default: /* 7 */
-               *val = vcpu->arch.dr7;
-               break;
+               return vcpu->arch.dr7;
        }
 }
 EXPORT_SYMBOL_GPL(kvm_get_dr);
@@ -2860,7 +2857,11 @@ static inline u64 vgettsc(struct pvclock_clock *clock, u64 *tsc_timestamp,
        return v * clock->mult;
 }
 
-static int do_monotonic_raw(s64 *t, u64 *tsc_timestamp)
+/*
+ * As with get_kvmclock_base_ns(), this counts from boot time, at the
+ * frequency of CLOCK_MONOTONIC_RAW (hence adding gtos->offs_boot).
+ */
+static int do_kvmclock_base(s64 *t, u64 *tsc_timestamp)
 {
        struct pvclock_gtod_data *gtod = &pvclock_gtod_data;
        unsigned long seq;
@@ -2879,6 +2880,29 @@ static int do_monotonic_raw(s64 *t, u64 *tsc_timestamp)
        return mode;
 }
 
+/*
+ * This calculates CLOCK_MONOTONIC at the time of the TSC snapshot, with
+ * no boot time offset.
+ */
+static int do_monotonic(s64 *t, u64 *tsc_timestamp)
+{
+       struct pvclock_gtod_data *gtod = &pvclock_gtod_data;
+       unsigned long seq;
+       int mode;
+       u64 ns;
+
+       do {
+               seq = read_seqcount_begin(&gtod->seq);
+               ns = gtod->clock.base_cycles;
+               ns += vgettsc(&gtod->clock, tsc_timestamp, &mode);
+               ns >>= gtod->clock.shift;
+               ns += ktime_to_ns(gtod->clock.offset);
+       } while (unlikely(read_seqcount_retry(&gtod->seq, seq)));
+       *t = ns;
+
+       return mode;
+}
+
 static int do_realtime(struct timespec64 *ts, u64 *tsc_timestamp)
 {
        struct pvclock_gtod_data *gtod = &pvclock_gtod_data;
@@ -2900,18 +2924,42 @@ static int do_realtime(struct timespec64 *ts, u64 *tsc_timestamp)
        return mode;
 }
 
-/* returns true if host is using TSC based clocksource */
+/*
+ * Calculates the kvmclock_base_ns (CLOCK_MONOTONIC_RAW + boot time) and
+ * reports the TSC value from which it do so. Returns true if host is
+ * using TSC based clocksource.
+ */
 static bool kvm_get_time_and_clockread(s64 *kernel_ns, u64 *tsc_timestamp)
 {
        /* checked again under seqlock below */
        if (!gtod_is_based_on_tsc(pvclock_gtod_data.clock.vclock_mode))
                return false;
 
-       return gtod_is_based_on_tsc(do_monotonic_raw(kernel_ns,
-                                                     tsc_timestamp));
+       return gtod_is_based_on_tsc(do_kvmclock_base(kernel_ns,
+                                                    tsc_timestamp));
 }
 
-/* returns true if host is using TSC based clocksource */
+/*
+ * Calculates CLOCK_MONOTONIC and reports the TSC value from which it did
+ * so. Returns true if host is using TSC based clocksource.
+ */
+bool kvm_get_monotonic_and_clockread(s64 *kernel_ns, u64 *tsc_timestamp)
+{
+       /* checked again under seqlock below */
+       if (!gtod_is_based_on_tsc(pvclock_gtod_data.clock.vclock_mode))
+               return false;
+
+       return gtod_is_based_on_tsc(do_monotonic(kernel_ns,
+                                                tsc_timestamp));
+}
+
+/*
+ * Calculates CLOCK_REALTIME and reports the TSC value from which it did
+ * so. Returns true if host is using TSC based clocksource.
+ *
+ * DO NOT USE this for anything related to migration. You want CLOCK_TAI
+ * for that.
+ */
 static bool kvm_get_walltime_and_clockread(struct timespec64 *ts,
                                           u64 *tsc_timestamp)
 {
@@ -3158,7 +3206,7 @@ static void kvm_setup_guest_pvclock(struct kvm_vcpu *v,
 
        guest_hv_clock->version = ++vcpu->hv_clock.version;
 
-       mark_page_dirty_in_slot(v->kvm, gpc->memslot, gpc->gpa >> PAGE_SHIFT);
+       kvm_gpc_mark_dirty_in_slot(gpc);
        read_unlock_irqrestore(&gpc->lock, flags);
 
        trace_kvm_pvclock_update(v->vcpu_id, &vcpu->hv_clock);
@@ -4680,7 +4728,8 @@ int kvm_vm_ioctl_check_extension(struct kvm *kvm, long ext)
                    KVM_XEN_HVM_CONFIG_SHARED_INFO |
                    KVM_XEN_HVM_CONFIG_EVTCHN_2LEVEL |
                    KVM_XEN_HVM_CONFIG_EVTCHN_SEND |
-                   KVM_XEN_HVM_CONFIG_PVCLOCK_TSC_UNSTABLE;
+                   KVM_XEN_HVM_CONFIG_PVCLOCK_TSC_UNSTABLE |
+                   KVM_XEN_HVM_CONFIG_SHARED_INFO_HVA;
                if (sched_info_on())
                        r |= KVM_XEN_HVM_CONFIG_RUNSTATE |
                             KVM_XEN_HVM_CONFIG_RUNSTATE_UPDATE_FLAG;
@@ -5064,8 +5113,7 @@ void kvm_arch_vcpu_put(struct kvm_vcpu *vcpu)
        int idx;
 
        if (vcpu->preempted) {
-               if (!vcpu->arch.guest_state_protected)
-                       vcpu->arch.preempted_in_kernel = !static_call(kvm_x86_get_cpl)(vcpu);
+               vcpu->arch.preempted_in_kernel = kvm_arch_vcpu_in_kernel(vcpu);
 
                /*
                 * Take the srcu lock as memslots will be accessed to check the gfn
@@ -5512,18 +5560,23 @@ static int kvm_vcpu_ioctl_x86_set_vcpu_events(struct kvm_vcpu *vcpu,
 static void kvm_vcpu_ioctl_x86_get_debugregs(struct kvm_vcpu *vcpu,
                                             struct kvm_debugregs *dbgregs)
 {
-       unsigned long val;
+       unsigned int i;
 
        memset(dbgregs, 0, sizeof(*dbgregs));
-       memcpy(dbgregs->db, vcpu->arch.db, sizeof(vcpu->arch.db));
-       kvm_get_dr(vcpu, 6, &val);
-       dbgregs->dr6 = val;
+
+       BUILD_BUG_ON(ARRAY_SIZE(vcpu->arch.db) != ARRAY_SIZE(dbgregs->db));
+       for (i = 0; i < ARRAY_SIZE(vcpu->arch.db); i++)
+               dbgregs->db[i] = vcpu->arch.db[i];
+
+       dbgregs->dr6 = vcpu->arch.dr6;
        dbgregs->dr7 = vcpu->arch.dr7;
 }
 
 static int kvm_vcpu_ioctl_x86_set_debugregs(struct kvm_vcpu *vcpu,
                                            struct kvm_debugregs *dbgregs)
 {
+       unsigned int i;
+
        if (dbgregs->flags)
                return -EINVAL;
 
@@ -5532,7 +5585,9 @@ static int kvm_vcpu_ioctl_x86_set_debugregs(struct kvm_vcpu *vcpu,
        if (!kvm_dr7_valid(dbgregs->dr7))
                return -EINVAL;
 
-       memcpy(vcpu->arch.db, dbgregs->db, sizeof(vcpu->arch.db));
+       for (i = 0; i < ARRAY_SIZE(vcpu->arch.db); i++)
+               vcpu->arch.db[i] = dbgregs->db[i];
+
        kvm_update_dr0123(vcpu);
        vcpu->arch.dr6 = dbgregs->dr6;
        vcpu->arch.dr7 = dbgregs->dr7;
@@ -8180,10 +8235,9 @@ static void emulator_wbinvd(struct x86_emulate_ctxt *ctxt)
        kvm_emulate_wbinvd_noskip(emul_to_vcpu(ctxt));
 }
 
-static void emulator_get_dr(struct x86_emulate_ctxt *ctxt, int dr,
-                           unsigned long *dest)
+static unsigned long emulator_get_dr(struct x86_emulate_ctxt *ctxt, int dr)
 {
-       kvm_get_dr(emul_to_vcpu(ctxt), dr, dest);
+       return kvm_get_dr(emul_to_vcpu(ctxt), dr);
 }
 
 static int emulator_set_dr(struct x86_emulate_ctxt *ctxt, int dr,
@@ -8405,12 +8459,9 @@ static int emulator_get_msr(struct x86_emulate_ctxt *ctxt,
        return kvm_get_msr(emul_to_vcpu(ctxt), msr_index, pdata);
 }
 
-static int emulator_check_pmc(struct x86_emulate_ctxt *ctxt,
-                             u32 pmc)
+static int emulator_check_rdpmc_early(struct x86_emulate_ctxt *ctxt, u32 pmc)
 {
-       if (kvm_pmu_is_valid_rdpmc_ecx(emul_to_vcpu(ctxt), pmc))
-               return 0;
-       return -EINVAL;
+       return kvm_pmu_check_rdpmc_early(emul_to_vcpu(ctxt), pmc);
 }
 
 static int emulator_read_pmc(struct x86_emulate_ctxt *ctxt,
@@ -8542,7 +8593,7 @@ static const struct x86_emulate_ops emulate_ops = {
        .set_msr_with_filter = emulator_set_msr_with_filter,
        .get_msr_with_filter = emulator_get_msr_with_filter,
        .get_msr             = emulator_get_msr,
-       .check_pmc           = emulator_check_pmc,
+       .check_rdpmc_early   = emulator_check_rdpmc_early,
        .read_pmc            = emulator_read_pmc,
        .halt                = emulator_halt,
        .wbinvd              = emulator_wbinvd,
@@ -8803,31 +8854,24 @@ static bool reexecute_instruction(struct kvm_vcpu *vcpu, gpa_t cr2_or_gpa,
 
        kvm_release_pfn_clean(pfn);
 
-       /* The instructions are well-emulated on direct mmu. */
-       if (vcpu->arch.mmu->root_role.direct) {
-               unsigned int indirect_shadow_pages;
-
-               write_lock(&vcpu->kvm->mmu_lock);
-               indirect_shadow_pages = vcpu->kvm->arch.indirect_shadow_pages;
-               write_unlock(&vcpu->kvm->mmu_lock);
-
-               if (indirect_shadow_pages)
-                       kvm_mmu_unprotect_page(vcpu->kvm, gpa_to_gfn(gpa));
-
-               return true;
-       }
-
        /*
-        * if emulation was due to access to shadowed page table
-        * and it failed try to unshadow page and re-enter the
-        * guest to let CPU execute the instruction.
+        * If emulation may have been triggered by a write to a shadowed page
+        * table, unprotect the gfn (zap any relevant SPTEs) and re-enter the
+        * guest to let the CPU re-execute the instruction in the hope that the
+        * CPU can cleanly execute the instruction that KVM failed to emulate.
         */
-       kvm_mmu_unprotect_page(vcpu->kvm, gpa_to_gfn(gpa));
+       if (vcpu->kvm->arch.indirect_shadow_pages)
+               kvm_mmu_unprotect_page(vcpu->kvm, gpa_to_gfn(gpa));
 
        /*
-        * If the access faults on its page table, it can not
-        * be fixed by unprotecting shadow page and it should
-        * be reported to userspace.
+        * If the failed instruction faulted on an access to page tables that
+        * are used to translate any part of the instruction, KVM can't resolve
+        * the issue by unprotecting the gfn, as zapping the shadow page will
+        * result in the instruction taking a !PRESENT page fault and thus put
+        * the vCPU into an infinite loop of page faults.  E.g. KVM will create
+        * a SPTE and write-protect the gfn to resolve the !PRESENT fault, and
+        * then zap the SPTE to unprotect the gfn, and then do it all over
+        * again.  Report the error to userspace.
         */
        return !(emulation_type & EMULTYPE_WRITE_PF_TO_SP);
 }
@@ -8922,7 +8966,7 @@ int kvm_skip_emulated_instruction(struct kvm_vcpu *vcpu)
        if (unlikely(!r))
                return 0;
 
-       kvm_pmu_trigger_event(vcpu, PERF_COUNT_HW_INSTRUCTIONS);
+       kvm_pmu_trigger_event(vcpu, kvm_pmu_eventsel.INSTRUCTIONS_RETIRED);
 
        /*
         * rflags is the old, "raw" value of the flags.  The new value has
@@ -9235,9 +9279,9 @@ writeback:
                 */
                if (!ctxt->have_exception ||
                    exception_type(ctxt->exception.vector) == EXCPT_TRAP) {
-                       kvm_pmu_trigger_event(vcpu, PERF_COUNT_HW_INSTRUCTIONS);
+                       kvm_pmu_trigger_event(vcpu, kvm_pmu_eventsel.INSTRUCTIONS_RETIRED);
                        if (ctxt->is_branch)
-                               kvm_pmu_trigger_event(vcpu, PERF_COUNT_HW_BRANCH_INSTRUCTIONS);
+                               kvm_pmu_trigger_event(vcpu, kvm_pmu_eventsel.BRANCH_INSTRUCTIONS_RETIRED);
                        kvm_rip_write(vcpu, ctxt->eip);
                        if (r && (ctxt->tf || (vcpu->guest_debug & KVM_GUESTDBG_SINGLESTEP)))
                                r = kvm_vcpu_do_singlestep(vcpu);
@@ -9648,11 +9692,13 @@ static void kvm_x86_check_cpu_compat(void *ret)
        *(int *)ret = kvm_x86_check_processor_compatibility();
 }
 
-static int __kvm_x86_vendor_init(struct kvm_x86_init_ops *ops)
+int kvm_x86_vendor_init(struct kvm_x86_init_ops *ops)
 {
        u64 host_pat;
        int r, cpu;
 
+       guard(mutex)(&vendor_module_lock);
+
        if (kvm_x86_ops.hardware_enable) {
                pr_err("already loaded vendor module '%s'\n", kvm_x86_ops.name);
                return -EEXIST;
@@ -9782,17 +9828,6 @@ out_free_x86_emulator_cache:
        kmem_cache_destroy(x86_emulator_cache);
        return r;
 }
-
-int kvm_x86_vendor_init(struct kvm_x86_init_ops *ops)
-{
-       int r;
-
-       mutex_lock(&vendor_module_lock);
-       r = __kvm_x86_vendor_init(ops);
-       mutex_unlock(&vendor_module_lock);
-
-       return r;
-}
 EXPORT_SYMBOL_GPL(kvm_x86_vendor_init);
 
 void kvm_x86_vendor_exit(void)
@@ -10689,12 +10724,6 @@ static void kvm_vcpu_reload_apic_access_page(struct kvm_vcpu *vcpu)
        static_call_cond(kvm_x86_set_apic_access_page_addr)(vcpu);
 }
 
-void __kvm_request_immediate_exit(struct kvm_vcpu *vcpu)
-{
-       smp_send_reschedule(vcpu->cpu);
-}
-EXPORT_SYMBOL_GPL(__kvm_request_immediate_exit);
-
 /*
  * Called within kvm->srcu read side.
  * Returns 1 to let vcpu_run() continue the guest execution loop without
@@ -10944,10 +10973,8 @@ static int vcpu_enter_guest(struct kvm_vcpu *vcpu)
                goto cancel_injection;
        }
 
-       if (req_immediate_exit) {
+       if (req_immediate_exit)
                kvm_make_request(KVM_REQ_EVENT, vcpu);
-               static_call(kvm_x86_request_immediate_exit)(vcpu);
-       }
 
        fpregs_assert_state_consistent();
        if (test_thread_flag(TIF_NEED_FPU_LOAD))
@@ -10978,7 +11005,7 @@ static int vcpu_enter_guest(struct kvm_vcpu *vcpu)
                WARN_ON_ONCE((kvm_vcpu_apicv_activated(vcpu) != kvm_vcpu_apicv_active(vcpu)) &&
                             (kvm_get_apic_mode(vcpu) != LAPIC_MODE_DISABLED));
 
-               exit_fastpath = static_call(kvm_x86_vcpu_run)(vcpu);
+               exit_fastpath = static_call(kvm_x86_vcpu_run)(vcpu, req_immediate_exit);
                if (likely(exit_fastpath != EXIT_FASTPATH_REENTER_GUEST))
                        break;
 
@@ -12065,7 +12092,7 @@ int kvm_arch_vcpu_create(struct kvm_vcpu *vcpu)
        vcpu->arch.regs_avail = ~0;
        vcpu->arch.regs_dirty = ~0;
 
-       kvm_gpc_init(&vcpu->arch.pv_time, vcpu->kvm, vcpu, KVM_HOST_USES_PFN);
+       kvm_gpc_init(&vcpu->arch.pv_time, vcpu->kvm);
 
        if (!irqchip_in_kernel(vcpu->kvm) || kvm_vcpu_is_reset_bsp(vcpu))
                vcpu->arch.mp_state = KVM_MP_STATE_RUNNABLE;
@@ -12076,27 +12103,9 @@ int kvm_arch_vcpu_create(struct kvm_vcpu *vcpu)
        if (r < 0)
                return r;
 
-       if (irqchip_in_kernel(vcpu->kvm)) {
-               r = kvm_create_lapic(vcpu, lapic_timer_advance_ns);
-               if (r < 0)
-                       goto fail_mmu_destroy;
-
-               /*
-                * Defer evaluating inhibits until the vCPU is first run, as
-                * this vCPU will not get notified of any changes until this
-                * vCPU is visible to other vCPUs (marked online and added to
-                * the set of vCPUs).  Opportunistically mark APICv active as
-                * VMX in particularly is highly unlikely to have inhibits.
-                * Ignore the current per-VM APICv state so that vCPU creation
-                * is guaranteed to run with a deterministic value, the request
-                * will ensure the vCPU gets the correct state before VM-Entry.
-                */
-               if (enable_apicv) {
-                       vcpu->arch.apic->apicv_active = true;
-                       kvm_make_request(KVM_REQ_APICV_UPDATE, vcpu);
-               }
-       } else
-               static_branch_inc(&kvm_has_noapic_vcpu);
+       r = kvm_create_lapic(vcpu, lapic_timer_advance_ns);
+       if (r < 0)
+               goto fail_mmu_destroy;
 
        r = -ENOMEM;
 
@@ -12217,8 +12226,6 @@ void kvm_arch_vcpu_destroy(struct kvm_vcpu *vcpu)
        srcu_read_unlock(&vcpu->kvm->srcu, idx);
        free_page((unsigned long)vcpu->arch.pio_data);
        kvfree(vcpu->arch.cpuid_entries);
-       if (!lapic_in_kernel(vcpu))
-               static_branch_dec(&kvm_has_noapic_vcpu);
 }
 
 void kvm_vcpu_reset(struct kvm_vcpu *vcpu, bool init_event)
@@ -12495,9 +12502,6 @@ bool kvm_vcpu_is_bsp(struct kvm_vcpu *vcpu)
        return (vcpu->arch.apic_base & MSR_IA32_APICBASE_BSP) != 0;
 }
 
-__read_mostly DEFINE_STATIC_KEY_FALSE(kvm_has_noapic_vcpu);
-EXPORT_SYMBOL_GPL(kvm_has_noapic_vcpu);
-
 void kvm_arch_sched_in(struct kvm_vcpu *vcpu, int cpu)
 {
        struct kvm_pmu *pmu = vcpu_to_pmu(vcpu);
@@ -13100,11 +13104,13 @@ int kvm_arch_vcpu_runnable(struct kvm_vcpu *vcpu)
 
 bool kvm_arch_dy_has_pending_interrupt(struct kvm_vcpu *vcpu)
 {
-       if (kvm_vcpu_apicv_active(vcpu) &&
-           static_call(kvm_x86_dy_apicv_has_pending_interrupt)(vcpu))
-               return true;
+       return kvm_vcpu_apicv_active(vcpu) &&
+              static_call(kvm_x86_dy_apicv_has_pending_interrupt)(vcpu);
+}
 
-       return false;
+bool kvm_arch_vcpu_preempted_in_kernel(struct kvm_vcpu *vcpu)
+{
+       return vcpu->arch.preempted_in_kernel;
 }
 
 bool kvm_arch_dy_runnable(struct kvm_vcpu *vcpu)
@@ -13127,9 +13133,6 @@ bool kvm_arch_vcpu_in_kernel(struct kvm_vcpu *vcpu)
        if (vcpu->arch.guest_state_protected)
                return true;
 
-       if (vcpu != kvm_get_running_vcpu())
-               return vcpu->arch.preempted_in_kernel;
-
        return static_call(kvm_x86_get_cpl)(vcpu) == 0;
 }
 
@@ -13924,9 +13927,6 @@ module_init(kvm_x86_init);
 
 static void __exit kvm_x86_exit(void)
 {
-       /*
-        * If module_init() is implemented, module_exit() must also be
-        * implemented to allow module unload.
-        */
+       WARN_ON_ONCE(static_branch_unlikely(&kvm_has_noapic_vcpu));
 }
 module_exit(kvm_x86_exit);
index 2f7e191666580085c85785ada86789fb9d1842b1..a8b71803777baa13060b8051cc2b9aa3ce7287fd 100644 (file)
@@ -294,6 +294,7 @@ void kvm_inject_realmode_interrupt(struct kvm_vcpu *vcpu, int irq, int inc_eip);
 
 u64 get_kvmclock_ns(struct kvm *kvm);
 uint64_t kvm_get_wall_clock_epoch(struct kvm *kvm);
+bool kvm_get_monotonic_and_clockread(s64 *kernel_ns, u64 *tsc_timestamp);
 
 int kvm_read_guest_virt(struct kvm_vcpu *vcpu,
        gva_t addr, void *val, unsigned int bytes,
@@ -431,12 +432,6 @@ static inline bool kvm_notify_vmexit_enabled(struct kvm *kvm)
        return kvm->arch.notify_vmexit_flags & KVM_X86_NOTIFY_VMEXIT_ENABLED;
 }
 
-enum kvm_intr_type {
-       /* Values are arbitrary, but must be non-zero. */
-       KVM_HANDLING_IRQ = 1,
-       KVM_HANDLING_NMI,
-};
-
 static __always_inline void kvm_before_interrupt(struct kvm_vcpu *vcpu,
                                                 enum kvm_intr_type intr)
 {
index 4b4e738c6f1b79e474d18519a51e72f3d53286cc..f65b35a05d91687b3a159af967a7408c8d73c27c 100644 (file)
@@ -10,7 +10,7 @@
 #include "x86.h"
 #include "xen.h"
 #include "hyperv.h"
-#include "lapic.h"
+#include "irq.h"
 
 #include <linux/eventfd.h>
 #include <linux/kvm_host.h>
@@ -24,6 +24,7 @@
 #include <xen/interface/sched.h>
 
 #include <asm/xen/cpuid.h>
+#include <asm/pvclock.h>
 
 #include "cpuid.h"
 #include "trace.h"
@@ -34,41 +35,32 @@ static bool kvm_xen_hcall_evtchn_send(struct kvm_vcpu *vcpu, u64 param, u64 *r);
 
 DEFINE_STATIC_KEY_DEFERRED_FALSE(kvm_xen_enabled, HZ);
 
-static int kvm_xen_shared_info_init(struct kvm *kvm, gfn_t gfn)
+static int kvm_xen_shared_info_init(struct kvm *kvm)
 {
        struct gfn_to_pfn_cache *gpc = &kvm->arch.xen.shinfo_cache;
        struct pvclock_wall_clock *wc;
-       gpa_t gpa = gfn_to_gpa(gfn);
        u32 *wc_sec_hi;
        u32 wc_version;
        u64 wall_nsec;
        int ret = 0;
        int idx = srcu_read_lock(&kvm->srcu);
 
-       if (gfn == KVM_XEN_INVALID_GFN) {
-               kvm_gpc_deactivate(gpc);
-               goto out;
-       }
+       read_lock_irq(&gpc->lock);
+       while (!kvm_gpc_check(gpc, PAGE_SIZE)) {
+               read_unlock_irq(&gpc->lock);
 
-       do {
-               ret = kvm_gpc_activate(gpc, gpa, PAGE_SIZE);
+               ret = kvm_gpc_refresh(gpc, PAGE_SIZE);
                if (ret)
                        goto out;
 
-               /*
-                * This code mirrors kvm_write_wall_clock() except that it writes
-                * directly through the pfn cache and doesn't mark the page dirty.
-                */
-               wall_nsec = kvm_get_wall_clock_epoch(kvm);
-
-               /* It could be invalid again already, so we need to check */
                read_lock_irq(&gpc->lock);
+       }
 
-               if (gpc->valid)
-                       break;
-
-               read_unlock_irq(&gpc->lock);
-       } while (1);
+       /*
+        * This code mirrors kvm_write_wall_clock() except that it writes
+        * directly through the pfn cache and doesn't mark the page dirty.
+        */
+       wall_nsec = kvm_get_wall_clock_epoch(kvm);
 
        /* Paranoia checks on the 32-bit struct layout */
        BUILD_BUG_ON(offsetof(struct compat_shared_info, wc) != 0x900);
@@ -158,8 +150,93 @@ static enum hrtimer_restart xen_timer_callback(struct hrtimer *timer)
        return HRTIMER_NORESTART;
 }
 
-static void kvm_xen_start_timer(struct kvm_vcpu *vcpu, u64 guest_abs, s64 delta_ns)
+static void kvm_xen_start_timer(struct kvm_vcpu *vcpu, u64 guest_abs,
+                               bool linux_wa)
 {
+       int64_t kernel_now, delta;
+       uint64_t guest_now;
+
+       /*
+        * The guest provides the requested timeout in absolute nanoseconds
+        * of the KVM clock — as *it* sees it, based on the scaled TSC and
+        * the pvclock information provided by KVM.
+        *
+        * The kernel doesn't support hrtimers based on CLOCK_MONOTONIC_RAW
+        * so use CLOCK_MONOTONIC. In the timescales covered by timers, the
+        * difference won't matter much as there is no cumulative effect.
+        *
+        * Calculate the time for some arbitrary point in time around "now"
+        * in terms of both kvmclock and CLOCK_MONOTONIC. Calculate the
+        * delta between the kvmclock "now" value and the guest's requested
+        * timeout, apply the "Linux workaround" described below, and add
+        * the resulting delta to the CLOCK_MONOTONIC "now" value, to get
+        * the absolute CLOCK_MONOTONIC time at which the timer should
+        * fire.
+        */
+       if (vcpu->arch.hv_clock.version && vcpu->kvm->arch.use_master_clock &&
+           static_cpu_has(X86_FEATURE_CONSTANT_TSC)) {
+               uint64_t host_tsc, guest_tsc;
+
+               if (!IS_ENABLED(CONFIG_64BIT) ||
+                   !kvm_get_monotonic_and_clockread(&kernel_now, &host_tsc)) {
+                       /*
+                        * Don't fall back to get_kvmclock_ns() because it's
+                        * broken; it has a systemic error in its results
+                        * because it scales directly from host TSC to
+                        * nanoseconds, and doesn't scale first to guest TSC
+                        * and *then* to nanoseconds as the guest does.
+                        *
+                        * There is a small error introduced here because time
+                        * continues to elapse between the ktime_get() and the
+                        * subsequent rdtsc(). But not the systemic drift due
+                        * to get_kvmclock_ns().
+                        */
+                       kernel_now = ktime_get(); /* This is CLOCK_MONOTONIC */
+                       host_tsc = rdtsc();
+               }
+
+               /* Calculate the guest kvmclock as the guest would do it. */
+               guest_tsc = kvm_read_l1_tsc(vcpu, host_tsc);
+               guest_now = __pvclock_read_cycles(&vcpu->arch.hv_clock,
+                                                 guest_tsc);
+       } else {
+               /*
+                * Without CONSTANT_TSC, get_kvmclock_ns() is the only option.
+                *
+                * Also if the guest PV clock hasn't been set up yet, as is
+                * likely to be the case during migration when the vCPU has
+                * not been run yet. It would be possible to calculate the
+                * scaling factors properly in that case but there's not much
+                * point in doing so. The get_kvmclock_ns() drift accumulates
+                * over time, so it's OK to use it at startup. Besides, on
+                * migration there's going to be a little bit of skew in the
+                * precise moment at which timers fire anyway. Often they'll
+                * be in the "past" by the time the VM is running again after
+                * migration.
+                */
+               guest_now = get_kvmclock_ns(vcpu->kvm);
+               kernel_now = ktime_get();
+       }
+
+       delta = guest_abs - guest_now;
+
+       /*
+        * Xen has a 'Linux workaround' in do_set_timer_op() which checks for
+        * negative absolute timeout values (caused by integer overflow), and
+        * for values about 13 days in the future (2^50ns) which would be
+        * caused by jiffies overflow. For those cases, Xen sets the timeout
+        * 100ms in the future (not *too* soon, since if a guest really did
+        * set a long timeout on purpose we don't want to keep churning CPU
+        * time by waking it up).  Emulate Xen's workaround when starting the
+        * timer in response to __HYPERVISOR_set_timer_op.
+        */
+       if (linux_wa &&
+           unlikely((int64_t)guest_abs < 0 ||
+                    (delta > 0 && (uint32_t) (delta >> 50) != 0))) {
+               delta = 100 * NSEC_PER_MSEC;
+               guest_abs = guest_now + delta;
+       }
+
        /*
         * Avoid races with the old timer firing. Checking timer_expires
         * to avoid calling hrtimer_cancel() will only have false positives
@@ -171,14 +248,12 @@ static void kvm_xen_start_timer(struct kvm_vcpu *vcpu, u64 guest_abs, s64 delta_
        atomic_set(&vcpu->arch.xen.timer_pending, 0);
        vcpu->arch.xen.timer_expires = guest_abs;
 
-       if (delta_ns <= 0) {
+       if (delta <= 0)
                xen_timer_callback(&vcpu->arch.xen.timer);
-       } else {
-               ktime_t ktime_now = ktime_get();
+       else
                hrtimer_start(&vcpu->arch.xen.timer,
-                             ktime_add_ns(ktime_now, delta_ns),
+                             ktime_add_ns(kernel_now, delta),
                              HRTIMER_MODE_ABS_HARD);
-       }
 }
 
 static void kvm_xen_stop_timer(struct kvm_vcpu *vcpu)
@@ -452,14 +527,13 @@ static void kvm_xen_update_runstate_guest(struct kvm_vcpu *v, bool atomic)
                smp_wmb();
        }
 
-       if (user_len2)
+       if (user_len2) {
+               kvm_gpc_mark_dirty_in_slot(gpc2);
                read_unlock(&gpc2->lock);
+       }
 
+       kvm_gpc_mark_dirty_in_slot(gpc1);
        read_unlock_irqrestore(&gpc1->lock, flags);
-
-       mark_page_dirty_in_slot(v->kvm, gpc1->memslot, gpc1->gpa >> PAGE_SHIFT);
-       if (user_len2)
-               mark_page_dirty_in_slot(v->kvm, gpc2->memslot, gpc2->gpa >> PAGE_SHIFT);
 }
 
 void kvm_xen_update_runstate(struct kvm_vcpu *v, int state)
@@ -493,10 +567,9 @@ void kvm_xen_update_runstate(struct kvm_vcpu *v, int state)
                kvm_xen_update_runstate_guest(v, state == RUNSTATE_runnable);
 }
 
-static void kvm_xen_inject_vcpu_vector(struct kvm_vcpu *v)
+void kvm_xen_inject_vcpu_vector(struct kvm_vcpu *v)
 {
        struct kvm_lapic_irq irq = { };
-       int r;
 
        irq.dest_id = v->vcpu_id;
        irq.vector = v->arch.xen.upcall_vector;
@@ -505,8 +578,7 @@ static void kvm_xen_inject_vcpu_vector(struct kvm_vcpu *v)
        irq.delivery_mode = APIC_DM_FIXED;
        irq.level = 1;
 
-       /* The fast version will always work for physical unicast */
-       WARN_ON_ONCE(!kvm_irq_delivery_to_apic_fast(v->kvm, NULL, &irq, &r, NULL));
+       kvm_irq_delivery_to_apic(v->kvm, NULL, &irq, NULL);
 }
 
 /*
@@ -565,13 +637,13 @@ void kvm_xen_inject_pending_events(struct kvm_vcpu *v)
                             : "0" (evtchn_pending_sel32));
                WRITE_ONCE(vi->evtchn_upcall_pending, 1);
        }
+
+       kvm_gpc_mark_dirty_in_slot(gpc);
        read_unlock_irqrestore(&gpc->lock, flags);
 
        /* For the per-vCPU lapic vector, deliver it as MSI. */
        if (v->arch.xen.upcall_vector)
                kvm_xen_inject_vcpu_vector(v);
-
-       mark_page_dirty_in_slot(v->kvm, gpc->memslot, gpc->gpa >> PAGE_SHIFT);
 }
 
 int __kvm_xen_has_interrupt(struct kvm_vcpu *v)
@@ -635,17 +707,59 @@ int kvm_xen_hvm_set_attr(struct kvm *kvm, struct kvm_xen_hvm_attr *data)
                } else {
                        mutex_lock(&kvm->arch.xen.xen_lock);
                        kvm->arch.xen.long_mode = !!data->u.long_mode;
+
+                       /*
+                        * Re-initialize shared_info to put the wallclock in the
+                        * correct place. Whilst it's not necessary to do this
+                        * unless the mode is actually changed, it does no harm
+                        * to make the call anyway.
+                        */
+                       r = kvm->arch.xen.shinfo_cache.active ?
+                               kvm_xen_shared_info_init(kvm) : 0;
                        mutex_unlock(&kvm->arch.xen.xen_lock);
-                       r = 0;
                }
                break;
 
        case KVM_XEN_ATTR_TYPE_SHARED_INFO:
+       case KVM_XEN_ATTR_TYPE_SHARED_INFO_HVA: {
+               int idx;
+
                mutex_lock(&kvm->arch.xen.xen_lock);
-               r = kvm_xen_shared_info_init(kvm, data->u.shared_info.gfn);
+
+               idx = srcu_read_lock(&kvm->srcu);
+
+               if (data->type == KVM_XEN_ATTR_TYPE_SHARED_INFO) {
+                       gfn_t gfn = data->u.shared_info.gfn;
+
+                       if (gfn == KVM_XEN_INVALID_GFN) {
+                               kvm_gpc_deactivate(&kvm->arch.xen.shinfo_cache);
+                               r = 0;
+                       } else {
+                               r = kvm_gpc_activate(&kvm->arch.xen.shinfo_cache,
+                                                    gfn_to_gpa(gfn), PAGE_SIZE);
+                       }
+               } else {
+                       void __user * hva = u64_to_user_ptr(data->u.shared_info.hva);
+
+                       if (!PAGE_ALIGNED(hva) || !access_ok(hva, PAGE_SIZE)) {
+                               r = -EINVAL;
+                       } else if (!hva) {
+                               kvm_gpc_deactivate(&kvm->arch.xen.shinfo_cache);
+                               r = 0;
+                       } else {
+                               r = kvm_gpc_activate_hva(&kvm->arch.xen.shinfo_cache,
+                                                        (unsigned long)hva, PAGE_SIZE);
+                       }
+               }
+
+               srcu_read_unlock(&kvm->srcu, idx);
+
+               if (!r && kvm->arch.xen.shinfo_cache.active)
+                       r = kvm_xen_shared_info_init(kvm);
+
                mutex_unlock(&kvm->arch.xen.xen_lock);
                break;
-
+       }
        case KVM_XEN_ATTR_TYPE_UPCALL_VECTOR:
                if (data->u.vector && data->u.vector < 0x10)
                        r = -EINVAL;
@@ -699,13 +813,21 @@ int kvm_xen_hvm_get_attr(struct kvm *kvm, struct kvm_xen_hvm_attr *data)
                break;
 
        case KVM_XEN_ATTR_TYPE_SHARED_INFO:
-               if (kvm->arch.xen.shinfo_cache.active)
+               if (kvm_gpc_is_gpa_active(&kvm->arch.xen.shinfo_cache))
                        data->u.shared_info.gfn = gpa_to_gfn(kvm->arch.xen.shinfo_cache.gpa);
                else
                        data->u.shared_info.gfn = KVM_XEN_INVALID_GFN;
                r = 0;
                break;
 
+       case KVM_XEN_ATTR_TYPE_SHARED_INFO_HVA:
+               if (kvm_gpc_is_hva_active(&kvm->arch.xen.shinfo_cache))
+                       data->u.shared_info.hva = kvm->arch.xen.shinfo_cache.uhva;
+               else
+                       data->u.shared_info.hva = 0;
+               r = 0;
+               break;
+
        case KVM_XEN_ATTR_TYPE_UPCALL_VECTOR:
                data->u.vector = kvm->arch.xen.upcall_vector;
                r = 0;
@@ -742,20 +864,33 @@ int kvm_xen_vcpu_set_attr(struct kvm_vcpu *vcpu, struct kvm_xen_vcpu_attr *data)
 
        switch (data->type) {
        case KVM_XEN_VCPU_ATTR_TYPE_VCPU_INFO:
+       case KVM_XEN_VCPU_ATTR_TYPE_VCPU_INFO_HVA:
                /* No compat necessary here. */
                BUILD_BUG_ON(sizeof(struct vcpu_info) !=
                             sizeof(struct compat_vcpu_info));
                BUILD_BUG_ON(offsetof(struct vcpu_info, time) !=
                             offsetof(struct compat_vcpu_info, time));
 
-               if (data->u.gpa == KVM_XEN_INVALID_GPA) {
-                       kvm_gpc_deactivate(&vcpu->arch.xen.vcpu_info_cache);
-                       r = 0;
-                       break;
+               if (data->type == KVM_XEN_VCPU_ATTR_TYPE_VCPU_INFO) {
+                       if (data->u.gpa == KVM_XEN_INVALID_GPA) {
+                               kvm_gpc_deactivate(&vcpu->arch.xen.vcpu_info_cache);
+                               r = 0;
+                               break;
+                       }
+
+                       r = kvm_gpc_activate(&vcpu->arch.xen.vcpu_info_cache,
+                                            data->u.gpa, sizeof(struct vcpu_info));
+               } else {
+                       if (data->u.hva == 0) {
+                               kvm_gpc_deactivate(&vcpu->arch.xen.vcpu_info_cache);
+                               r = 0;
+                               break;
+                       }
+
+                       r = kvm_gpc_activate_hva(&vcpu->arch.xen.vcpu_info_cache,
+                                                data->u.hva, sizeof(struct vcpu_info));
                }
 
-               r = kvm_gpc_activate(&vcpu->arch.xen.vcpu_info_cache,
-                                    data->u.gpa, sizeof(struct vcpu_info));
                if (!r)
                        kvm_make_request(KVM_REQ_CLOCK_UPDATE, vcpu);
 
@@ -944,9 +1079,7 @@ int kvm_xen_vcpu_set_attr(struct kvm_vcpu *vcpu, struct kvm_xen_vcpu_attr *data)
 
                /* Start the timer if the new value has a valid vector+expiry. */
                if (data->u.timer.port && data->u.timer.expires_ns)
-                       kvm_xen_start_timer(vcpu, data->u.timer.expires_ns,
-                                           data->u.timer.expires_ns -
-                                           get_kvmclock_ns(vcpu->kvm));
+                       kvm_xen_start_timer(vcpu, data->u.timer.expires_ns, false);
 
                r = 0;
                break;
@@ -977,13 +1110,21 @@ int kvm_xen_vcpu_get_attr(struct kvm_vcpu *vcpu, struct kvm_xen_vcpu_attr *data)
 
        switch (data->type) {
        case KVM_XEN_VCPU_ATTR_TYPE_VCPU_INFO:
-               if (vcpu->arch.xen.vcpu_info_cache.active)
+               if (kvm_gpc_is_gpa_active(&vcpu->arch.xen.vcpu_info_cache))
                        data->u.gpa = vcpu->arch.xen.vcpu_info_cache.gpa;
                else
                        data->u.gpa = KVM_XEN_INVALID_GPA;
                r = 0;
                break;
 
+       case KVM_XEN_VCPU_ATTR_TYPE_VCPU_INFO_HVA:
+               if (kvm_gpc_is_hva_active(&vcpu->arch.xen.vcpu_info_cache))
+                       data->u.hva = vcpu->arch.xen.vcpu_info_cache.uhva;
+               else
+                       data->u.hva = 0;
+               r = 0;
+               break;
+
        case KVM_XEN_VCPU_ATTR_TYPE_VCPU_TIME_INFO:
                if (vcpu->arch.xen.vcpu_time_info_cache.active)
                        data->u.gpa = vcpu->arch.xen.vcpu_time_info_cache.gpa;
@@ -1093,9 +1234,24 @@ int kvm_xen_write_hypercall_page(struct kvm_vcpu *vcpu, u64 data)
        u32 page_num = data & ~PAGE_MASK;
        u64 page_addr = data & PAGE_MASK;
        bool lm = is_long_mode(vcpu);
+       int r = 0;
+
+       mutex_lock(&kvm->arch.xen.xen_lock);
+       if (kvm->arch.xen.long_mode != lm) {
+               kvm->arch.xen.long_mode = lm;
+
+               /*
+                * Re-initialize shared_info to put the wallclock in the
+                * correct place.
+                */
+               if (kvm->arch.xen.shinfo_cache.active &&
+                   kvm_xen_shared_info_init(kvm))
+                       r = 1;
+       }
+       mutex_unlock(&kvm->arch.xen.xen_lock);
 
-       /* Latch long_mode for shared_info pages etc. */
-       vcpu->kvm->arch.xen.long_mode = lm;
+       if (r)
+               return r;
 
        /*
         * If Xen hypercall intercept is enabled, fill the hypercall
@@ -1396,7 +1552,6 @@ static bool kvm_xen_hcall_vcpu_op(struct kvm_vcpu *vcpu, bool longmode, int cmd,
 {
        struct vcpu_set_singleshot_timer oneshot;
        struct x86_exception e;
-       s64 delta;
 
        if (!kvm_xen_timer_enabled(vcpu))
                return false;
@@ -1430,9 +1585,7 @@ static bool kvm_xen_hcall_vcpu_op(struct kvm_vcpu *vcpu, bool longmode, int cmd,
                        return true;
                }
 
-               /* A delta <= 0 results in an immediate callback, which is what we want */
-               delta = oneshot.timeout_abs_ns - get_kvmclock_ns(vcpu->kvm);
-               kvm_xen_start_timer(vcpu, oneshot.timeout_abs_ns, delta);
+               kvm_xen_start_timer(vcpu, oneshot.timeout_abs_ns, false);
                *r = 0;
                return true;
 
@@ -1455,29 +1608,10 @@ static bool kvm_xen_hcall_set_timer_op(struct kvm_vcpu *vcpu, uint64_t timeout,
        if (!kvm_xen_timer_enabled(vcpu))
                return false;
 
-       if (timeout) {
-               uint64_t guest_now = get_kvmclock_ns(vcpu->kvm);
-               int64_t delta = timeout - guest_now;
-
-               /* Xen has a 'Linux workaround' in do_set_timer_op() which
-                * checks for negative absolute timeout values (caused by
-                * integer overflow), and for values about 13 days in the
-                * future (2^50ns) which would be caused by jiffies
-                * overflow. For those cases, it sets the timeout 100ms in
-                * the future (not *too* soon, since if a guest really did
-                * set a long timeout on purpose we don't want to keep
-                * churning CPU time by waking it up).
-                */
-               if (unlikely((int64_t)timeout < 0 ||
-                            (delta > 0 && (uint32_t) (delta >> 50) != 0))) {
-                       delta = 100 * NSEC_PER_MSEC;
-                       timeout = guest_now + delta;
-               }
-
-               kvm_xen_start_timer(vcpu, timeout, delta);
-       } else {
+       if (timeout)
+               kvm_xen_start_timer(vcpu, timeout, true);
+       else
                kvm_xen_stop_timer(vcpu);
-       }
 
        *r = 0;
        return true;
@@ -1621,9 +1755,6 @@ int kvm_xen_set_evtchn_fast(struct kvm_xen_evtchn *xe, struct kvm *kvm)
                WRITE_ONCE(xe->vcpu_idx, vcpu->vcpu_idx);
        }
 
-       if (!vcpu->arch.xen.vcpu_info_cache.active)
-               return -EINVAL;
-
        if (xe->port >= max_evtchn_port(kvm))
                return -EINVAL;
 
@@ -1731,8 +1862,6 @@ static int kvm_xen_set_evtchn(struct kvm_xen_evtchn *xe, struct kvm *kvm)
                mm_borrowed = true;
        }
 
-       mutex_lock(&kvm->arch.xen.xen_lock);
-
        /*
         * It is theoretically possible for the page to be unmapped
         * and the MMU notifier to invalidate the shared_info before
@@ -1760,8 +1889,6 @@ static int kvm_xen_set_evtchn(struct kvm_xen_evtchn *xe, struct kvm *kvm)
                srcu_read_unlock(&kvm->srcu, idx);
        } while(!rc);
 
-       mutex_unlock(&kvm->arch.xen.xen_lock);
-
        if (mm_borrowed)
                kthread_unuse_mm(kvm->mm);
 
@@ -2109,14 +2236,10 @@ void kvm_xen_init_vcpu(struct kvm_vcpu *vcpu)
 
        timer_setup(&vcpu->arch.xen.poll_timer, cancel_evtchn_poll, 0);
 
-       kvm_gpc_init(&vcpu->arch.xen.runstate_cache, vcpu->kvm, NULL,
-                    KVM_HOST_USES_PFN);
-       kvm_gpc_init(&vcpu->arch.xen.runstate2_cache, vcpu->kvm, NULL,
-                    KVM_HOST_USES_PFN);
-       kvm_gpc_init(&vcpu->arch.xen.vcpu_info_cache, vcpu->kvm, NULL,
-                    KVM_HOST_USES_PFN);
-       kvm_gpc_init(&vcpu->arch.xen.vcpu_time_info_cache, vcpu->kvm, NULL,
-                    KVM_HOST_USES_PFN);
+       kvm_gpc_init(&vcpu->arch.xen.runstate_cache, vcpu->kvm);
+       kvm_gpc_init(&vcpu->arch.xen.runstate2_cache, vcpu->kvm);
+       kvm_gpc_init(&vcpu->arch.xen.vcpu_info_cache, vcpu->kvm);
+       kvm_gpc_init(&vcpu->arch.xen.vcpu_time_info_cache, vcpu->kvm);
 }
 
 void kvm_xen_destroy_vcpu(struct kvm_vcpu *vcpu)
@@ -2159,7 +2282,7 @@ void kvm_xen_init_vm(struct kvm *kvm)
 {
        mutex_init(&kvm->arch.xen.xen_lock);
        idr_init(&kvm->arch.xen.evtchn_ports);
-       kvm_gpc_init(&kvm->arch.xen.shinfo_cache, kvm, NULL, KVM_HOST_USES_PFN);
+       kvm_gpc_init(&kvm->arch.xen.shinfo_cache, kvm);
 }
 
 void kvm_xen_destroy_vm(struct kvm *kvm)
index f8f1fe22d090696cb32b44f719d75e124fb4c8bf..f5841d9000aebd5b9584db188d89135fb6f3e11e 100644 (file)
@@ -18,6 +18,7 @@ extern struct static_key_false_deferred kvm_xen_enabled;
 
 int __kvm_xen_has_interrupt(struct kvm_vcpu *vcpu);
 void kvm_xen_inject_pending_events(struct kvm_vcpu *vcpu);
+void kvm_xen_inject_vcpu_vector(struct kvm_vcpu *vcpu);
 int kvm_xen_vcpu_set_attr(struct kvm_vcpu *vcpu, struct kvm_xen_vcpu_attr *data);
 int kvm_xen_vcpu_get_attr(struct kvm_vcpu *vcpu, struct kvm_xen_vcpu_attr *data);
 int kvm_xen_hvm_set_attr(struct kvm *kvm, struct kvm_xen_hvm_attr *data);
@@ -36,6 +37,19 @@ int kvm_xen_setup_evtchn(struct kvm *kvm,
                         const struct kvm_irq_routing_entry *ue);
 void kvm_xen_update_tsc_info(struct kvm_vcpu *vcpu);
 
+static inline void kvm_xen_sw_enable_lapic(struct kvm_vcpu *vcpu)
+{
+       /*
+        * The local APIC is being enabled. If the per-vCPU upcall vector is
+        * set and the vCPU's evtchn_upcall_pending flag is set, inject the
+        * interrupt.
+        */
+       if (static_branch_unlikely(&kvm_xen_enabled.key) &&
+           vcpu->arch.xen.vcpu_info_cache.active &&
+           vcpu->arch.xen.upcall_vector && __kvm_xen_has_interrupt(vcpu))
+               kvm_xen_inject_vcpu_vector(vcpu);
+}
+
 static inline bool kvm_xen_msr_enabled(struct kvm *kvm)
 {
        return static_branch_unlikely(&kvm_xen_enabled.key) &&
@@ -101,6 +115,10 @@ static inline void kvm_xen_destroy_vcpu(struct kvm_vcpu *vcpu)
 {
 }
 
+static inline void kvm_xen_sw_enable_lapic(struct kvm_vcpu *vcpu)
+{
+}
+
 static inline bool kvm_xen_msr_enabled(struct kvm *kvm)
 {
        return false;
index b7b88c1d91ec4698685c9e2e8f49a2f06e343862..89079ea73e65b6b134ad0270681aee90745fa70f 100644 (file)
@@ -362,9 +362,9 @@ static void note_page(struct ptdump_state *pt_st, unsigned long addr, int level,
        }
 }
 
-static void ptdump_walk_pgd_level_core(struct seq_file *m,
-                                      struct mm_struct *mm, pgd_t *pgd,
-                                      bool checkwx, bool dmesg)
+bool ptdump_walk_pgd_level_core(struct seq_file *m,
+                               struct mm_struct *mm, pgd_t *pgd,
+                               bool checkwx, bool dmesg)
 {
        const struct ptdump_range ptdump_ranges[] = {
 #ifdef CONFIG_X86_64
@@ -391,12 +391,17 @@ static void ptdump_walk_pgd_level_core(struct seq_file *m,
        ptdump_walk_pgd(&st.ptdump, mm, pgd);
 
        if (!checkwx)
-               return;
-       if (st.wx_pages)
+               return true;
+       if (st.wx_pages) {
                pr_info("x86/mm: Checked W+X mappings: FAILED, %lu W+X pages found.\n",
                        st.wx_pages);
-       else
+
+               return false;
+       } else {
                pr_info("x86/mm: Checked W+X mappings: passed, no W+X pages found.\n");
+
+               return true;
+       }
 }
 
 void ptdump_walk_pgd_level(struct seq_file *m, struct mm_struct *mm)
@@ -431,9 +436,12 @@ void ptdump_walk_user_pgd_level_checkwx(void)
 #endif
 }
 
-void ptdump_walk_pgd_level_checkwx(void)
+bool ptdump_walk_pgd_level_checkwx(void)
 {
-       ptdump_walk_pgd_level_core(NULL, &init_mm, INIT_PGD, true, false);
+       if (!(__supported_pte_mask & _PAGE_NX))
+               return true;
+
+       return ptdump_walk_pgd_level_core(NULL, &init_mm, INIT_PGD, true, false);
 }
 
 static int __init pt_dump_init(void)
index 402e08f6b7ec9c78ec95826c8c7b3d5c06291632..622d12ec7f08518ba6701c33efd74d5f77545806 100644 (file)
@@ -252,7 +252,7 @@ static noinline int vmalloc_fault(unsigned long address)
        if (!pmd_k)
                return -1;
 
-       if (pmd_large(*pmd_k))
+       if (pmd_leaf(*pmd_k))
                return 0;
 
        pte_k = pte_offset_kernel(pmd_k, address);
@@ -321,7 +321,7 @@ static void dump_pagetable(unsigned long address)
         * And let's rather not kmap-atomic the pte, just in case
         * it's allocated already:
         */
-       if (!low_pfn(pmd_pfn(*pmd)) || !pmd_present(*pmd) || pmd_large(*pmd))
+       if (!low_pfn(pmd_pfn(*pmd)) || !pmd_present(*pmd) || pmd_leaf(*pmd))
                goto out;
 
        pte = pte_offset_kernel(pmd, address);
@@ -370,7 +370,7 @@ static void dump_pagetable(unsigned long address)
                goto bad;
 
        pr_cont("P4D %lx ", p4d_val(*p4d));
-       if (!p4d_present(*p4d) || p4d_large(*p4d))
+       if (!p4d_present(*p4d) || p4d_leaf(*p4d))
                goto out;
 
        pud = pud_offset(p4d, address);
@@ -378,7 +378,7 @@ static void dump_pagetable(unsigned long address)
                goto bad;
 
        pr_cont("PUD %lx ", pud_val(*pud));
-       if (!pud_present(*pud) || pud_large(*pud))
+       if (!pud_present(*pud) || pud_leaf(*pud))
                goto out;
 
        pmd = pmd_offset(pud, address);
@@ -386,7 +386,7 @@ static void dump_pagetable(unsigned long address)
                goto bad;
 
        pr_cont("PMD %lx ", pmd_val(*pmd));
-       if (!pmd_present(*pmd) || pmd_large(*pmd))
+       if (!pmd_present(*pmd) || pmd_leaf(*pmd))
                goto out;
 
        pte = pte_offset_kernel(pmd, address);
@@ -1036,21 +1036,21 @@ spurious_kernel_fault(unsigned long error_code, unsigned long address)
        if (!p4d_present(*p4d))
                return 0;
 
-       if (p4d_large(*p4d))
+       if (p4d_leaf(*p4d))
                return spurious_kernel_fault_check(error_code, (pte_t *) p4d);
 
        pud = pud_offset(p4d, address);
        if (!pud_present(*pud))
                return 0;
 
-       if (pud_large(*pud))
+       if (pud_leaf(*pud))
                return spurious_kernel_fault_check(error_code, (pte_t *) pud);
 
        pmd = pmd_offset(pud, address);
        if (!pmd_present(*pmd))
                return 0;
 
-       if (pmd_large(*pmd))
+       if (pmd_leaf(*pmd))
                return spurious_kernel_fault_check(error_code, (pte_t *) pmd);
 
        pte = pte_offset_kernel(pmd, address);
index f50cc210a981886e7d3a265b4d43ca16f47f6825..a204a332c71fc50948c884251cc15cc89afeaf1f 100644 (file)
@@ -33,7 +33,7 @@ static int ident_pud_init(struct x86_mapping_info *info, pud_t *pud_page,
                        next = end;
 
                /* if this is already a gbpage, this portion is already mapped */
-               if (pud_large(*pud))
+               if (pud_leaf(*pud))
                        continue;
 
                /* Is using a gbpage allowed? */
index b63403d7179df4b466d3c7b1b5ff36b654cc5fd1..ac41b1e0940d47ca647dc2006c40b6005c2dae4f 100644 (file)
@@ -463,7 +463,7 @@ void __init native_pagetable_init(void)
                        break;
 
                /* should not be large page here */
-               if (pmd_large(*pmd)) {
+               if (pmd_leaf(*pmd)) {
                        pr_warn("try to clear pte for ram above max_low_pfn: pfn: %lx pmd: %p pmd phys: %lx, but pmd is big page and is not using pte !\n",
                                pfn, pmd, __pa(pmd));
                        BUG_ON(1);
@@ -800,6 +800,4 @@ void mark_rodata_ro(void)
        set_pages_ro(virt_to_page(start), size >> PAGE_SHIFT);
 #endif
        mark_nxdata_nx();
-       if (__supported_pte_mask & _PAGE_NX)
-               debug_checkwx();
 }
index a0dffaca6d2bfc2f1fdc47a8017f72fa74f98690..7e177856ee4fe62238c2bb57323a93607233bb0c 100644 (file)
@@ -530,7 +530,7 @@ phys_pmd_init(pmd_t *pmd_page, unsigned long paddr, unsigned long paddr_end,
                }
 
                if (!pmd_none(*pmd)) {
-                       if (!pmd_large(*pmd)) {
+                       if (!pmd_leaf(*pmd)) {
                                spin_lock(&init_mm.page_table_lock);
                                pte = (pte_t *)pmd_page_vaddr(*pmd);
                                paddr_last = phys_pte_init(pte, paddr,
@@ -617,7 +617,7 @@ phys_pud_init(pud_t *pud_page, unsigned long paddr, unsigned long paddr_end,
                }
 
                if (!pud_none(*pud)) {
-                       if (!pud_large(*pud)) {
+                       if (!pud_leaf(*pud)) {
                                pmd = pmd_offset(pud, 0);
                                paddr_last = phys_pmd_init(pmd, paddr,
                                                           paddr_end,
@@ -1114,7 +1114,7 @@ remove_pmd_table(pmd_t *pmd_start, unsigned long addr, unsigned long end,
                if (!pmd_present(*pmd))
                        continue;
 
-               if (pmd_large(*pmd)) {
+               if (pmd_leaf(*pmd)) {
                        if (IS_ALIGNED(addr, PMD_SIZE) &&
                            IS_ALIGNED(next, PMD_SIZE)) {
                                if (!direct)
@@ -1163,7 +1163,7 @@ remove_pud_table(pud_t *pud_start, unsigned long addr, unsigned long end,
                if (!pud_present(*pud))
                        continue;
 
-               if (pud_large(*pud) &&
+               if (pud_leaf(*pud) &&
                    IS_ALIGNED(addr, PUD_SIZE) &&
                    IS_ALIGNED(next, PUD_SIZE)) {
                        spin_lock(&init_mm.page_table_lock);
@@ -1197,7 +1197,7 @@ remove_p4d_table(p4d_t *p4d_start, unsigned long addr, unsigned long end,
                if (!p4d_present(*p4d))
                        continue;
 
-               BUILD_BUG_ON(p4d_large(*p4d));
+               BUILD_BUG_ON(p4d_leaf(*p4d));
 
                pud_base = pud_offset(p4d, 0);
                remove_pud_table(pud_base, addr, next, altmap, direct);
@@ -1412,8 +1412,6 @@ void mark_rodata_ro(void)
                                (void *)text_end, (void *)rodata_start);
        free_kernel_image_pages("unused kernel image (rodata/data gap)",
                                (void *)rodata_end, (void *)_sdata);
-
-       debug_checkwx();
 }
 
 /*
@@ -1522,9 +1520,9 @@ void __meminit vmemmap_set_pmd(pmd_t *pmd, void *p, int node,
 int __meminit vmemmap_check_pmd(pmd_t *pmd, int node,
                                unsigned long addr, unsigned long next)
 {
-       int large = pmd_large(*pmd);
+       int large = pmd_leaf(*pmd);
 
-       if (pmd_large(*pmd)) {
+       if (pmd_leaf(*pmd)) {
                vmemmap_verify((pte_t *)pmd, node, addr, next);
                vmemmap_use_sub_pmd(addr, next);
        }
index 0302491d799d1b2227826eab5a01f76403e75edc..9dddf19a55716030916eb4d380f72d7360dbd181 100644 (file)
@@ -95,7 +95,7 @@ static void __init kasan_populate_pud(pud_t *pud, unsigned long addr,
        pmd = pmd_offset(pud, addr);
        do {
                next = pmd_addr_end(addr, end);
-               if (!pmd_large(*pmd))
+               if (!pmd_leaf(*pmd))
                        kasan_populate_pmd(pmd, addr, next, nid);
        } while (pmd++, addr = next, addr != end);
 }
@@ -115,7 +115,7 @@ static void __init kasan_populate_p4d(p4d_t *p4d, unsigned long addr,
        pud = pud_offset(p4d, addr);
        do {
                next = pud_addr_end(addr, end);
-               if (!pud_large(*pud))
+               if (!pud_leaf(*pud))
                        kasan_populate_pud(pud, addr, next, nid);
        } while (pud++, addr = next, addr != end);
 }
index 64b5005d49e5770deb25961a66084dec7185537d..ac33b2263a434db626bb8d791fedeb4301c3b9e1 100644 (file)
@@ -141,7 +141,7 @@ static pud_t __head *sme_prepare_pgd(struct sme_populate_pgd_data *ppd)
                set_pud(pud, __pud(PUD_FLAGS | __pa(pmd)));
        }
 
-       if (pud_large(*pud))
+       if (pud_leaf(*pud))
                return NULL;
 
        return pud;
@@ -157,7 +157,7 @@ static void __head sme_populate_pgd_large(struct sme_populate_pgd_data *ppd)
                return;
 
        pmd = pmd_offset(pud, ppd->vaddr);
-       if (pmd_large(*pmd))
+       if (pmd_leaf(*pmd))
                return;
 
        set_pmd(pmd, __pmd(ppd->paddr | ppd->pmd_flags));
@@ -181,7 +181,7 @@ static void __head sme_populate_pgd(struct sme_populate_pgd_data *ppd)
                set_pmd(pmd, __pmd(PMD_FLAGS | __pa(pte)));
        }
 
-       if (pmd_large(*pmd))
+       if (pmd_leaf(*pmd))
                return;
 
        pte = pte_offset_kernel(pmd, ppd->vaddr);
index e5b2985a7c5166faaf8c527c4dfbf0fdff6f80d2..80c9037ffadffd38dfdc3361f1039d8c5e1b9fac 100644 (file)
@@ -676,7 +676,7 @@ pte_t *lookup_address_in_pgd(pgd_t *pgd, unsigned long address,
                return NULL;
 
        *level = PG_LEVEL_512G;
-       if (p4d_large(*p4d) || !p4d_present(*p4d))
+       if (p4d_leaf(*p4d) || !p4d_present(*p4d))
                return (pte_t *)p4d;
 
        pud = pud_offset(p4d, address);
@@ -684,7 +684,7 @@ pte_t *lookup_address_in_pgd(pgd_t *pgd, unsigned long address,
                return NULL;
 
        *level = PG_LEVEL_1G;
-       if (pud_large(*pud) || !pud_present(*pud))
+       if (pud_leaf(*pud) || !pud_present(*pud))
                return (pte_t *)pud;
 
        pmd = pmd_offset(pud, address);
@@ -692,7 +692,7 @@ pte_t *lookup_address_in_pgd(pgd_t *pgd, unsigned long address,
                return NULL;
 
        *level = PG_LEVEL_2M;
-       if (pmd_large(*pmd) || !pmd_present(*pmd))
+       if (pmd_leaf(*pmd) || !pmd_present(*pmd))
                return (pte_t *)pmd;
 
        *level = PG_LEVEL_4K;
@@ -739,11 +739,11 @@ pmd_t *lookup_pmd_address(unsigned long address)
                return NULL;
 
        p4d = p4d_offset(pgd, address);
-       if (p4d_none(*p4d) || p4d_large(*p4d) || !p4d_present(*p4d))
+       if (p4d_none(*p4d) || p4d_leaf(*p4d) || !p4d_present(*p4d))
                return NULL;
 
        pud = pud_offset(p4d, address);
-       if (pud_none(*pud) || pud_large(*pud) || !pud_present(*pud))
+       if (pud_none(*pud) || pud_leaf(*pud) || !pud_present(*pud))
                return NULL;
 
        return pmd_offset(pud, address);
@@ -1233,7 +1233,7 @@ static void unmap_pmd_range(pud_t *pud, unsigned long start, unsigned long end)
         * Try to unmap in 2M chunks.
         */
        while (end - start >= PMD_SIZE) {
-               if (pmd_large(*pmd))
+               if (pmd_leaf(*pmd))
                        pmd_clear(pmd);
                else
                        __unmap_pmd_range(pud, pmd, start, start + PMD_SIZE);
@@ -1278,7 +1278,7 @@ static void unmap_pud_range(p4d_t *p4d, unsigned long start, unsigned long end)
         */
        while (end - start >= PUD_SIZE) {
 
-               if (pud_large(*pud))
+               if (pud_leaf(*pud))
                        pud_clear(pud);
                else
                        unmap_pmd_range(pud, start, start + PUD_SIZE);
index cceb779d882d882b4b0d7b80a2f3c6bc33a8e40b..d007591b80597a2682e0cfc21248bc0fe2fc0319 100644 (file)
@@ -777,7 +777,7 @@ int pmd_set_huge(pmd_t *pmd, phys_addr_t addr, pgprot_t prot)
  */
 int pud_clear_huge(pud_t *pud)
 {
-       if (pud_large(*pud)) {
+       if (pud_leaf(*pud)) {
                pud_clear(pud);
                return 1;
        }
@@ -792,7 +792,7 @@ int pud_clear_huge(pud_t *pud)
  */
 int pmd_clear_huge(pmd_t *pmd)
 {
-       if (pmd_large(*pmd)) {
+       if (pmd_leaf(*pmd)) {
                pmd_clear(pmd);
                return 1;
        }
index 669ba1c345b3898326657f35b7033bb412fe8898..2e69abf4f852abb7d8e29c7673dc82f685cf7b89 100644 (file)
@@ -185,7 +185,7 @@ static p4d_t *pti_user_pagetable_walk_p4d(unsigned long address)
 
                set_pgd(pgd, __pgd(_KERNPG_TABLE | __pa(new_p4d_page)));
        }
-       BUILD_BUG_ON(pgd_large(*pgd) != 0);
+       BUILD_BUG_ON(pgd_leaf(*pgd) != 0);
 
        return p4d_offset(pgd, address);
 }
@@ -206,7 +206,7 @@ static pmd_t *pti_user_pagetable_walk_pmd(unsigned long address)
        if (!p4d)
                return NULL;
 
-       BUILD_BUG_ON(p4d_large(*p4d) != 0);
+       BUILD_BUG_ON(p4d_leaf(*p4d) != 0);
        if (p4d_none(*p4d)) {
                unsigned long new_pud_page = __get_free_page(gfp);
                if (WARN_ON_ONCE(!new_pud_page))
@@ -217,7 +217,7 @@ static pmd_t *pti_user_pagetable_walk_pmd(unsigned long address)
 
        pud = pud_offset(p4d, address);
        /* The user page tables do not use large mappings: */
-       if (pud_large(*pud)) {
+       if (pud_leaf(*pud)) {
                WARN_ON(1);
                return NULL;
        }
@@ -252,7 +252,7 @@ static pte_t *pti_user_pagetable_walk_pte(unsigned long address)
                return NULL;
 
        /* We can't do anything sensible if we hit a large mapping. */
-       if (pmd_large(*pmd)) {
+       if (pmd_leaf(*pmd)) {
                WARN_ON(1);
                return NULL;
        }
@@ -341,7 +341,7 @@ pti_clone_pgtable(unsigned long start, unsigned long end,
                        continue;
                }
 
-               if (pmd_large(*pmd) || level == PTI_CLONE_PMD) {
+               if (pmd_leaf(*pmd) || level == PTI_CLONE_PMD) {
                        target_pmd = pti_user_pagetable_walk_pmd(addr);
                        if (WARN_ON(!target_pmd))
                                return;
index 4af930947380c5378190ccd631290cdcf475a1a1..44ac64f3a047ca19cb0708cf3c233270cc958853 100644 (file)
@@ -299,7 +299,7 @@ static void load_new_mm_cr3(pgd_t *pgdir, u16 new_asid, unsigned long lam,
        write_cr3(new_mm_cr3);
 }
 
-void leave_mm(int cpu)
+void leave_mm(void)
 {
        struct mm_struct *loaded_mm = this_cpu_read(cpu_tlbstate.loaded_mm);
 
@@ -327,7 +327,7 @@ void switch_mm(struct mm_struct *prev, struct mm_struct *next,
        unsigned long flags;
 
        local_irq_save(flags);
-       switch_mm_irqs_off(prev, next, tsk);
+       switch_mm_irqs_off(NULL, next, tsk);
        local_irq_restore(flags);
 }
 
@@ -492,10 +492,16 @@ void cr4_update_pce(void *ignored)
 static inline void cr4_update_pce_mm(struct mm_struct *mm) { }
 #endif
 
-void switch_mm_irqs_off(struct mm_struct *prev, struct mm_struct *next,
+/*
+ * This optimizes when not actually switching mm's.  Some architectures use the
+ * 'unused' argument for this optimization, but x86 must use
+ * 'cpu_tlbstate.loaded_mm' instead because it does not always keep
+ * 'current->active_mm' up to date.
+ */
+void switch_mm_irqs_off(struct mm_struct *unused, struct mm_struct *next,
                        struct task_struct *tsk)
 {
-       struct mm_struct *real_prev = this_cpu_read(cpu_tlbstate.loaded_mm);
+       struct mm_struct *prev = this_cpu_read(cpu_tlbstate.loaded_mm);
        u16 prev_asid = this_cpu_read(cpu_tlbstate.loaded_mm_asid);
        unsigned long new_lam = mm_lam_cr3_mask(next);
        bool was_lazy = this_cpu_read(cpu_tlbstate_shared.is_lazy);
@@ -504,15 +510,6 @@ void switch_mm_irqs_off(struct mm_struct *prev, struct mm_struct *next,
        bool need_flush;
        u16 new_asid;
 
-       /*
-        * NB: The scheduler will call us with prev == next when switching
-        * from lazy TLB mode to normal mode if active_mm isn't changing.
-        * When this happens, we don't assume that CR3 (and hence
-        * cpu_tlbstate.loaded_mm) matches next.
-        *
-        * NB: leave_mm() calls us with prev == NULL and tsk == NULL.
-        */
-
        /* We don't want flush_tlb_func() to run concurrently with us. */
        if (IS_ENABLED(CONFIG_PROVE_LOCKING))
                WARN_ON_ONCE(!irqs_disabled());
@@ -527,7 +524,7 @@ void switch_mm_irqs_off(struct mm_struct *prev, struct mm_struct *next,
         * isn't free.
         */
 #ifdef CONFIG_DEBUG_VM
-       if (WARN_ON_ONCE(__read_cr3() != build_cr3(real_prev->pgd, prev_asid,
+       if (WARN_ON_ONCE(__read_cr3() != build_cr3(prev->pgd, prev_asid,
                                                   tlbstate_lam_cr3_mask()))) {
                /*
                 * If we were to BUG here, we'd be very likely to kill
@@ -559,7 +556,7 @@ void switch_mm_irqs_off(struct mm_struct *prev, struct mm_struct *next,
         * provides that full memory barrier and core serializing
         * instruction.
         */
-       if (real_prev == next) {
+       if (prev == next) {
                /* Not actually switching mm's */
                VM_WARN_ON(this_cpu_read(cpu_tlbstate.ctxs[prev_asid].ctx_id) !=
                           next->context.ctx_id);
@@ -574,7 +571,7 @@ void switch_mm_irqs_off(struct mm_struct *prev, struct mm_struct *next,
                 * mm_cpumask. The TLB shootdown code can figure out from
                 * cpu_tlbstate_shared.is_lazy whether or not to send an IPI.
                 */
-               if (WARN_ON_ONCE(real_prev != &init_mm &&
+               if (WARN_ON_ONCE(prev != &init_mm &&
                                 !cpumask_test_cpu(cpu, mm_cpumask(next))))
                        cpumask_set_cpu(cpu, mm_cpumask(next));
 
@@ -616,10 +613,10 @@ void switch_mm_irqs_off(struct mm_struct *prev, struct mm_struct *next,
                 * Skip kernel threads; we never send init_mm TLB flushing IPIs,
                 * but the bitmap manipulation can cause cache line contention.
                 */
-               if (real_prev != &init_mm) {
+               if (prev != &init_mm) {
                        VM_WARN_ON_ONCE(!cpumask_test_cpu(cpu,
-                                               mm_cpumask(real_prev)));
-                       cpumask_clear_cpu(cpu, mm_cpumask(real_prev));
+                                               mm_cpumask(prev)));
+                       cpumask_clear_cpu(cpu, mm_cpumask(prev));
                }
 
                /*
@@ -656,9 +653,9 @@ void switch_mm_irqs_off(struct mm_struct *prev, struct mm_struct *next,
        this_cpu_write(cpu_tlbstate.loaded_mm, next);
        this_cpu_write(cpu_tlbstate.loaded_mm_asid, new_asid);
 
-       if (next != real_prev) {
+       if (next != prev) {
                cr4_update_pce_mm(next);
-               switch_ldt(real_prev, next);
+               switch_ldt(prev, next);
        }
 }
 
index 379777572bc9fe665b845e829fbe86f5cd9c853b..e0cd7afd53022ad7fde7a177a6dda9d99087472a 100644 (file)
@@ -5,7 +5,7 @@
 CFLAGS_cpu.o   := -fno-stack-protector
 
 # Clang may incorrectly inline functions with stack protector enabled into
-# __restore_processor_state(): https://bugs.llvm.org/show_bug.cgi?id=47479
+# __restore_processor_state(): https://llvm.org/pr47479
 CFLAGS_REMOVE_cpu.o := $(CC_FLAGS_LTO)
 
 obj-$(CONFIG_PM_SLEEP)         += cpu.o
index 6f955eb1e1631a04df52708ea9792bb0328d1ca8..5b81d19cd114c7d5d44a51545d3caf8fff241d1f 100644 (file)
@@ -165,17 +165,17 @@ int relocate_restore_code(void)
        pgd = (pgd_t *)__va(read_cr3_pa()) +
                pgd_index(relocated_restore_code);
        p4d = p4d_offset(pgd, relocated_restore_code);
-       if (p4d_large(*p4d)) {
+       if (p4d_leaf(*p4d)) {
                set_p4d(p4d, __p4d(p4d_val(*p4d) & ~_PAGE_NX));
                goto out;
        }
        pud = pud_offset(p4d, relocated_restore_code);
-       if (pud_large(*pud)) {
+       if (pud_leaf(*pud)) {
                set_pud(pud, __pud(pud_val(*pud) & ~_PAGE_NX));
                goto out;
        }
        pmd = pmd_offset(pud, relocated_restore_code);
-       if (pmd_large(*pmd)) {
+       if (pmd_leaf(*pmd)) {
                set_pmd(pmd, __pmd(pmd_val(*pmd) & ~_PAGE_NX));
                goto out;
        }
index 99a68fa71dbe403af34705d5425d886f1d6e49be..c001a2296582f48ae7a71702cea76d26f29dfc6a 100644 (file)
@@ -148,7 +148,9 @@ static void xen_hvm_shutdown(void)
        if (kexec_in_progress)
                xen_reboot(SHUTDOWN_soft_reset);
 }
+#endif
 
+#ifdef CONFIG_CRASH_DUMP
 static void xen_hvm_crash_shutdown(struct pt_regs *regs)
 {
        native_machine_crash_shutdown(regs);
@@ -236,6 +238,8 @@ static void __init xen_hvm_guest_init(void)
 
 #ifdef CONFIG_KEXEC_CORE
        machine_ops.shutdown = xen_hvm_shutdown;
+#endif
+#ifdef CONFIG_CRASH_DUMP
        machine_ops.crash_shutdown = xen_hvm_crash_shutdown;
 #endif
 }
index 72af496a160c8bea33252edd349018dedf6fe19f..54e0d311dcc94a7b265333899fcd9376a712c7e7 100644 (file)
@@ -913,7 +913,7 @@ static void drop_mm_ref_this_cpu(void *info)
        struct mm_struct *mm = info;
 
        if (this_cpu_read(cpu_tlbstate.loaded_mm) == mm)
-               leave_mm(smp_processor_id());
+               leave_mm();
 
        /*
         * If this cpu still has a stale cr3 reference, then make sure
@@ -1059,7 +1059,7 @@ static void __init xen_cleanmfnmap_pmd(pmd_t *pmd, bool unpin)
        pte_t *pte_tbl;
        int i;
 
-       if (pmd_large(*pmd)) {
+       if (pmd_leaf(*pmd)) {
                pa = pmd_val(*pmd) & PHYSICAL_PAGE_MASK;
                xen_free_ro_pages(pa, PMD_SIZE);
                return;
@@ -1082,7 +1082,7 @@ static void __init xen_cleanmfnmap_pud(pud_t *pud, bool unpin)
        pmd_t *pmd_tbl;
        int i;
 
-       if (pud_large(*pud)) {
+       if (pud_leaf(*pud)) {
                pa = pud_val(*pud) & PHYSICAL_PAGE_MASK;
                xen_free_ro_pages(pa, PUD_SIZE);
                return;
@@ -1104,7 +1104,7 @@ static void __init xen_cleanmfnmap_p4d(p4d_t *p4d, bool unpin)
        pud_t *pud_tbl;
        int i;
 
-       if (p4d_large(*p4d)) {
+       if (p4d_leaf(*p4d)) {
                pa = p4d_val(*p4d) & PHYSICAL_PAGE_MASK;
                xen_free_ro_pages(pa, P4D_SIZE);
                return;
@@ -1863,7 +1863,7 @@ static phys_addr_t __init xen_early_virt_to_phys(unsigned long vaddr)
        if (!pud_present(pud))
                return 0;
        pa = pud_val(pud) & PTE_PFN_MASK;
-       if (pud_large(pud))
+       if (pud_leaf(pud))
                return pa + (vaddr & ~PUD_MASK);
 
        pmd = native_make_pmd(xen_read_phys_ulong(pa + pmd_index(vaddr) *
@@ -1871,7 +1871,7 @@ static phys_addr_t __init xen_early_virt_to_phys(unsigned long vaddr)
        if (!pmd_present(pmd))
                return 0;
        pa = pmd_val(pmd) & PTE_PFN_MASK;
-       if (pmd_large(pmd))
+       if (pmd_leaf(pmd))
                return pa + (vaddr & ~PMD_MASK);
 
        pte = native_make_pte(xen_read_phys_ulong(pa + pte_index(vaddr) *
@@ -2520,7 +2520,7 @@ out:
 }
 EXPORT_SYMBOL_GPL(xen_remap_pfn);
 
-#ifdef CONFIG_KEXEC_CORE
+#ifdef CONFIG_VMCORE_INFO
 phys_addr_t paddr_vmcoreinfo_note(void)
 {
        if (xen_pv_domain())
index 87ec35b3363be1c03a4128209db4bd9b3d535fea..f200a4ec044e64a9cfc58127761f0f8214553a71 100644 (file)
@@ -2,6 +2,7 @@
 config XTENSA
        def_bool y
        select ARCH_32BIT_OFF_T
+       select ARCH_HAS_CPU_CACHE_ALIASING
        select ARCH_HAS_BINFMT_FLAT if !MMU
        select ARCH_HAS_CURRENT_STACK_POINTER
        select ARCH_HAS_DEBUG_VM_PGTABLE
diff --git a/arch/xtensa/include/asm/cachetype.h b/arch/xtensa/include/asm/cachetype.h
new file mode 100644 (file)
index 0000000..51bd49e
--- /dev/null
@@ -0,0 +1,10 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef __ASM_XTENSA_CACHETYPE_H
+#define __ASM_XTENSA_CACHETYPE_H
+
+#include <asm/cache.h>
+#include <asm/page.h>
+
+#define cpu_dcache_is_aliasing()       (DCACHE_WAY_SIZE > PAGE_SIZE)
+
+#endif
index e7adaaf1c21927a71d93e64817e22732fc72f2a3..7a5f611c3d2e3e83eb00be12b9131f49d8348f5e 100644 (file)
@@ -583,6 +583,9 @@ static void bd_finish_claiming(struct block_device *bdev, void *holder,
        mutex_unlock(&bdev->bd_holder_lock);
        bd_clear_claiming(whole, holder);
        mutex_unlock(&bdev_lock);
+
+       if (hops && hops->get_holder)
+               hops->get_holder(holder);
 }
 
 /**
@@ -605,6 +608,7 @@ EXPORT_SYMBOL(bd_abort_claiming);
 static void bd_end_claim(struct block_device *bdev, void *holder)
 {
        struct block_device *whole = bdev_whole(bdev);
+       const struct blk_holder_ops *hops = bdev->bd_holder_ops;
        bool unblock = false;
 
        /*
@@ -627,6 +631,9 @@ static void bd_end_claim(struct block_device *bdev, void *holder)
                whole->bd_holder = NULL;
        mutex_unlock(&bdev_lock);
 
+       if (hops && hops->put_holder)
+               hops->put_holder(holder);
+
        /*
         * If this was the last claim, remove holder link and unblock evpoll if
         * it was a write holder.
index dc8e35d0a51d6de0d4c7bfb2d4ce2f8cbb91a5d1..a6954eafb8c8af324971bd2d293fdceb2d481303 100644 (file)
@@ -35,26 +35,6 @@ static sector_t bio_discard_limit(struct block_device *bdev, sector_t sector)
        return round_down(UINT_MAX, discard_granularity) >> SECTOR_SHIFT;
 }
 
-static void await_bio_endio(struct bio *bio)
-{
-       complete(bio->bi_private);
-       bio_put(bio);
-}
-
-/*
- * await_bio_chain - ends @bio and waits for every chained bio to complete
- */
-static void await_bio_chain(struct bio *bio)
-{
-       DECLARE_COMPLETION_ONSTACK_MAP(done,
-                       bio->bi_bdev->bd_disk->lockdep_map);
-
-       bio->bi_private = &done;
-       bio->bi_end_io = await_bio_endio;
-       bio_endio(bio);
-       blk_wait_io(&done);
-}
-
 int __blkdev_issue_discard(struct block_device *bdev, sector_t sector,
                sector_t nr_sects, gfp_t gfp_mask, struct bio **biop)
 {
@@ -97,10 +77,6 @@ int __blkdev_issue_discard(struct block_device *bdev, sector_t sector,
                 * is disabled.
                 */
                cond_resched();
-               if (fatal_signal_pending(current)) {
-                       await_bio_chain(bio);
-                       return -EINTR;
-               }
        }
 
        *biop = bio;
@@ -167,10 +143,6 @@ static int __blkdev_issue_write_zeroes(struct block_device *bdev,
                nr_sects -= len;
                sector += len;
                cond_resched();
-               if (fatal_signal_pending(current)) {
-                       await_bio_chain(bio);
-                       return -EINTR;
-               }
        }
 
        *biop = bio;
@@ -215,10 +187,6 @@ static int __blkdev_issue_zero_pages(struct block_device *bdev,
                                break;
                }
                cond_resched();
-               if (fatal_signal_pending(current)) {
-                       await_bio_chain(bio);
-                       return -EINTR;
-               }
        }
 
        *biop = bio;
@@ -309,7 +277,7 @@ retry:
                bio_put(bio);
        }
        blk_finish_plug(&plug);
-       if (ret && ret != -EINTR && try_write_zeroes) {
+       if (ret && try_write_zeroes) {
                if (!(flags & BLKDEV_ZERO_NOFALLBACK)) {
                        try_write_zeroes = false;
                        goto retry;
@@ -361,12 +329,6 @@ int blkdev_issue_secure_erase(struct block_device *bdev, sector_t sector,
                sector += len;
                nr_sects -= len;
                cond_resched();
-               if (fatal_signal_pending(current)) {
-                       await_bio_chain(bio);
-                       ret = -EINTR;
-                       bio = NULL;
-                       break;
-               }
        }
        if (bio) {
                ret = submit_bio_wait(bio);
index e160d56e8edaa19f2e9e72dacc2552fcc652244a..3c7d8d638ab59dc9704aa01217c9b940b5941e4b 100644 (file)
@@ -267,7 +267,7 @@ int queue_limits_commit_update(struct request_queue *q,
 EXPORT_SYMBOL_GPL(queue_limits_commit_update);
 
 /**
- * queue_limits_commit_set - apply queue limits to queue
+ * queue_limits_set - apply queue limits to queue
  * @q:         queue to update
  * @lim:       limits to apply
  *
index a19b7b42e6503cd5ca5e03aba41b894a891929b8..5cac4e29ae1744fe4dd370e05290df0f31f3f4ba 100644 (file)
@@ -534,7 +534,7 @@ static inline u64 blk_time_get_ns(void)
 {
        struct blk_plug *plug = current->plug;
 
-       if (!plug)
+       if (!plug || !in_task())
                return ktime_get_ns();
 
        /*
index f958e79277b8bc24e8f26b5375c29f0558ba09ad..02a916ba62ee750d4ad29127604b7d4a0cb474d7 100644 (file)
@@ -646,9 +646,8 @@ static void dd_depth_updated(struct blk_mq_hw_ctx *hctx)
        struct request_queue *q = hctx->queue;
        struct deadline_data *dd = q->elevator->elevator_data;
        struct blk_mq_tags *tags = hctx->sched_tags;
-       unsigned int shift = tags->bitmap_tags.sb.shift;
 
-       dd->async_depth = max(1U, 3 * (1U << shift)  / 4);
+       dd->async_depth = max(1UL, 3 * q->nr_requests / 4);
 
        sbitmap_queue_min_shallow_depth(&tags->bitmap_tags, dd->async_depth);
 }
index 7d156c75f15f2d3465d9041500bdbdb3dcb3d1ee..44661c2e30ca5de9387d4c09202a588b6f57e64f 100644 (file)
@@ -1269,10 +1269,11 @@ config CRYPTO_JITTERENTROPY
 
          A non-physical non-deterministic ("true") RNG (e.g., an entropy source
          compliant with NIST SP800-90B) intended to provide a seed to a
-         deterministic RNG (e.g.  per NIST SP800-90C).
+         deterministic RNG (e.g., per NIST SP800-90C).
          This RNG does not perform any cryptographic whitening of the generated
+         random numbers.
 
-         See https://www.chronox.de/jent.html
+         See https://www.chronox.de/jent/
 
 if CRYPTO_JITTERENTROPY
 if CRYPTO_FIPS && EXPERT
index 80c3e5354711e1fcaf2e7d487ac742bdc0e2cf77..0ac83f7f701df283260383672e730f879375ed69 100644 (file)
@@ -618,6 +618,16 @@ int crypto_has_ahash(const char *alg_name, u32 type, u32 mask)
 }
 EXPORT_SYMBOL_GPL(crypto_has_ahash);
 
+static bool crypto_hash_alg_has_setkey(struct hash_alg_common *halg)
+{
+       struct crypto_alg *alg = &halg->base;
+
+       if (alg->cra_type == &crypto_shash_type)
+               return crypto_shash_alg_has_setkey(__crypto_shash_alg(alg));
+
+       return __crypto_ahash_alg(alg)->setkey != ahash_nosetkey;
+}
+
 struct crypto_ahash *crypto_clone_ahash(struct crypto_ahash *hash)
 {
        struct hash_alg_common *halg = crypto_hash_alg_common(hash);
@@ -760,16 +770,5 @@ int ahash_register_instance(struct crypto_template *tmpl,
 }
 EXPORT_SYMBOL_GPL(ahash_register_instance);
 
-bool crypto_hash_alg_has_setkey(struct hash_alg_common *halg)
-{
-       struct crypto_alg *alg = &halg->base;
-
-       if (alg->cra_type == &crypto_shash_type)
-               return crypto_shash_alg_has_setkey(__crypto_shash_alg(alg));
-
-       return __crypto_ahash_alg(alg)->setkey != ahash_nosetkey;
-}
-EXPORT_SYMBOL_GPL(crypto_hash_alg_has_setkey);
-
 MODULE_LICENSE("GPL");
 MODULE_DESCRIPTION("Asynchronous cryptographic hash type");
index f440767bd7276345980b20ae8cab037a61a297f8..2863984b67005080f5913410da7fc7ee1d287736 100644 (file)
@@ -28,7 +28,7 @@ static int pefile_parse_binary(const void *pebuf, unsigned int pelen,
        const struct pe32plus_opt_hdr *pe64;
        const struct data_directory *ddir;
        const struct data_dirent *dde;
-       const struct section_header *secs, *sec;
+       const struct section_header *sec;
        size_t cursor, datalen = pelen;
 
        kenter("");
@@ -110,7 +110,7 @@ static int pefile_parse_binary(const void *pebuf, unsigned int pelen,
        ctx->n_sections = pe->sections;
        if (ctx->n_sections > (ctx->header_size - cursor) / sizeof(*sec))
                return -ELIBBAD;
-       ctx->secs = secs = pebuf + cursor;
+       ctx->secs = pebuf + cursor;
 
        return 0;
 }
index 6704c03558896729cd40cf0c91b9c73d6182fe84..32e380b714b6ccc1a2b0aa1074f0311e9245fe37 100644 (file)
@@ -102,7 +102,7 @@ static void blake2b_compress_one_generic(struct blake2b_state *S,
        ROUND(10);
        ROUND(11);
 #ifdef CONFIG_CC_IS_CLANG
-#pragma nounroll /* https://bugs.llvm.org/show_bug.cgi?id=45803 */
+#pragma nounroll /* https://llvm.org/pr45803 */
 #endif
        for (i = 0; i < 8; ++i)
                S->h[i] = S->h[i] ^ v[i] ^ v[i + 8];
index 0fcad279e6fe9295fa2037569ea1bbba2648a6af..68d11d66c0b54d45018867da41f7f26309dd77a7 100644 (file)
@@ -106,6 +106,12 @@ err_clear_ctx:
  */
 static int dh_is_pubkey_valid(struct dh_ctx *ctx, MPI y)
 {
+       MPI val, q;
+       int ret;
+
+       if (!fips_enabled)
+               return 0;
+
        if (unlikely(!ctx->p))
                return -EINVAL;
 
@@ -125,40 +131,35 @@ static int dh_is_pubkey_valid(struct dh_ctx *ctx, MPI y)
         *
         * For the safe-prime groups q = (p - 1)/2.
         */
-       if (fips_enabled) {
-               MPI val, q;
-               int ret;
+       val = mpi_alloc(0);
+       if (!val)
+               return -ENOMEM;
 
-               val = mpi_alloc(0);
-               if (!val)
-                       return -ENOMEM;
+       q = mpi_alloc(mpi_get_nlimbs(ctx->p));
+       if (!q) {
+               mpi_free(val);
+               return -ENOMEM;
+       }
 
-               q = mpi_alloc(mpi_get_nlimbs(ctx->p));
-               if (!q) {
-                       mpi_free(val);
-                       return -ENOMEM;
-               }
+       /*
+        * ->p is odd, so no need to explicitly subtract one
+        * from it before shifting to the right.
+        */
+       mpi_rshift(q, ctx->p, 1);
 
-               /*
-                * ->p is odd, so no need to explicitly subtract one
-                * from it before shifting to the right.
-                */
-               mpi_rshift(q, ctx->p, 1);
-
-               ret = mpi_powm(val, y, q, ctx->p);
-               mpi_free(q);
-               if (ret) {
-                       mpi_free(val);
-                       return ret;
-               }
+       ret = mpi_powm(val, y, q, ctx->p);
+       mpi_free(q);
+       if (ret) {
+               mpi_free(val);
+               return ret;
+       }
 
-               ret = mpi_cmp_ui(val, 1);
+       ret = mpi_cmp_ui(val, 1);
 
-               mpi_free(val);
+       mpi_free(val);
 
-               if (ret != 0)
-                       return -EINVAL;
-       }
+       if (ret != 0)
+               return -EINVAL;
 
        return 0;
 }
index 7030f59e46b6bc2760491176268b6df0cd8b7ea0..ab469ba50c13d2af653f8f14d9137c3a6ef2e224 100644 (file)
@@ -71,7 +71,7 @@ static int crypto_pcbc_encrypt(struct skcipher_request *req)
 
        err = skcipher_walk_virt(&walk, req, false);
 
-       while ((nbytes = walk.nbytes)) {
+       while (walk.nbytes) {
                if (walk.src.virt.addr == walk.dst.virt.addr)
                        nbytes = crypto_pcbc_encrypt_inplace(req, &walk,
                                                             cipher);
@@ -138,7 +138,7 @@ static int crypto_pcbc_decrypt(struct skcipher_request *req)
 
        err = skcipher_walk_virt(&walk, req, false);
 
-       while ((nbytes = walk.nbytes)) {
+       while (walk.nbytes) {
                if (walk.src.virt.addr == walk.dst.virt.addr)
                        nbytes = crypto_pcbc_decrypt_inplace(req, &walk,
                                                             cipher);
index b9cd11fb7d3672245f49960901f349e3efe16c2f..d9be9e86097e1cf9f38befb9bfb03a39654fc1e6 100644 (file)
@@ -24,14 +24,38 @@ struct rsa_mpi_key {
        MPI qinv;
 };
 
+static int rsa_check_payload(MPI x, MPI n)
+{
+       MPI n1;
+
+       if (mpi_cmp_ui(x, 1) <= 0)
+               return -EINVAL;
+
+       n1 = mpi_alloc(0);
+       if (!n1)
+               return -ENOMEM;
+
+       if (mpi_sub_ui(n1, n, 1) || mpi_cmp(x, n1) >= 0) {
+               mpi_free(n1);
+               return -EINVAL;
+       }
+
+       mpi_free(n1);
+       return 0;
+}
+
 /*
  * RSAEP function [RFC3447 sec 5.1.1]
  * c = m^e mod n;
  */
 static int _rsa_enc(const struct rsa_mpi_key *key, MPI c, MPI m)
 {
-       /* (1) Validate 0 <= m < n */
-       if (mpi_cmp_ui(m, 0) < 0 || mpi_cmp(m, key->n) >= 0)
+       /*
+        * Even though (1) in RFC3447 only requires 0 <= m <= n - 1, we are
+        * slightly more conservative and require 1 < m < n - 1. This is in line
+        * with SP 800-56Br2, Section 7.1.1.
+        */
+       if (rsa_check_payload(m, key->n))
                return -EINVAL;
 
        /* (2) c = m^e mod n */
@@ -50,8 +74,12 @@ static int _rsa_dec_crt(const struct rsa_mpi_key *key, MPI m_or_m1_or_h, MPI c)
        MPI m2, m12_or_qh;
        int ret = -ENOMEM;
 
-       /* (1) Validate 0 <= c < n */
-       if (mpi_cmp_ui(c, 0) < 0 || mpi_cmp(c, key->n) >= 0)
+       /*
+        * Even though (1) in RFC3447 only requires 0 <= c <= n - 1, we are
+        * slightly more conservative and require 1 < c < n - 1. This is in line
+        * with SP 800-56Br2, Section 7.1.2.
+        */
+       if (rsa_check_payload(c, key->n))
                return -EINVAL;
 
        m2 = mpi_alloc(0);
index b108a30a7600140e5ed616b84431d4031fc905e4..60bbb7ea406028835bf59680fff68aaf75db330a 100644 (file)
@@ -117,6 +117,7 @@ static int scomp_acomp_comp_decomp(struct acomp_req *req, int dir)
        struct crypto_scomp *scomp = *tfm_ctx;
        void **ctx = acomp_request_ctx(req);
        struct scomp_scratch *scratch;
+       void *src, *dst;
        unsigned int dlen;
        int ret;
 
@@ -134,13 +135,25 @@ static int scomp_acomp_comp_decomp(struct acomp_req *req, int dir)
        scratch = raw_cpu_ptr(&scomp_scratch);
        spin_lock(&scratch->lock);
 
-       scatterwalk_map_and_copy(scratch->src, req->src, 0, req->slen, 0);
+       if (sg_nents(req->src) == 1 && !PageHighMem(sg_page(req->src))) {
+               src = page_to_virt(sg_page(req->src)) + req->src->offset;
+       } else {
+               scatterwalk_map_and_copy(scratch->src, req->src, 0,
+                                        req->slen, 0);
+               src = scratch->src;
+       }
+
+       if (req->dst && sg_nents(req->dst) == 1 && !PageHighMem(sg_page(req->dst)))
+               dst = page_to_virt(sg_page(req->dst)) + req->dst->offset;
+       else
+               dst = scratch->dst;
+
        if (dir)
-               ret = crypto_scomp_compress(scomp, scratch->src, req->slen,
-                                           scratch->dst, &req->dlen, *ctx);
+               ret = crypto_scomp_compress(scomp, src, req->slen,
+                                           dst, &req->dlen, *ctx);
        else
-               ret = crypto_scomp_decompress(scomp, scratch->src, req->slen,
-                                             scratch->dst, &req->dlen, *ctx);
+               ret = crypto_scomp_decompress(scomp, src, req->slen,
+                                             dst, &req->dlen, *ctx);
        if (!ret) {
                if (!req->dst) {
                        req->dst = sgl_alloc(req->dlen, GFP_ATOMIC, NULL);
@@ -152,8 +165,17 @@ static int scomp_acomp_comp_decomp(struct acomp_req *req, int dir)
                        ret = -ENOSPC;
                        goto out;
                }
-               scatterwalk_map_and_copy(scratch->dst, req->dst, 0, req->dlen,
-                                        1);
+               if (dst == scratch->dst) {
+                       scatterwalk_map_and_copy(scratch->dst, req->dst, 0,
+                                                req->dlen, 1);
+               } else {
+                       int nr_pages = DIV_ROUND_UP(req->dst->offset + req->dlen, PAGE_SIZE);
+                       int i;
+                       struct page *dst_page = sg_page(req->dst);
+
+                       for (i = 0; i < nr_pages; i++)
+                               flush_dcache_page(dst_page + i);
+               }
        }
 out:
        spin_unlock(&scratch->lock);
index ea4d1cea9c06978c7cd1d611367999b5ca075af1..8aea416f64808981dcf72f9427c23848b4836852 100644 (file)
@@ -1851,6 +1851,9 @@ static int do_test(const char *alg, u32 type, u32 mask, int m, u32 num_mb)
                ret = min(ret, tcrypt_test("cbc(aria)"));
                ret = min(ret, tcrypt_test("ctr(aria)"));
                break;
+       case 193:
+               ret = min(ret, tcrypt_test("ffdhe2048(dh)"));
+               break;
        case 200:
                test_cipher_speed("ecb(aes)", ENCRYPT, sec, NULL, 0,
                                speed_template_16_24_32);
index c26aeda8578781921aa49d1163661eab31e335ab..3dddd288ca02c93e0d9b3eabd5308e95590a7d4c 100644 (file)
@@ -5720,14 +5720,6 @@ static const struct alg_test_desc alg_test_descs[] = {
                }
        }, {
 #endif
-               .alg = "xts4096(paes)",
-               .test = alg_test_null,
-               .fips_allowed = 1,
-       }, {
-               .alg = "xts512(paes)",
-               .test = alg_test_null,
-               .fips_allowed = 1,
-       }, {
                .alg = "xxhash64",
                .test = alg_test_hash,
                .fips_allowed = 1,
index 6b18f8bc7be353403873df2c11ca0b72ef256eb6..3cfe7e7475f2fdb96f78eafc44ccb479b2ebb396 100644 (file)
@@ -60,6 +60,19 @@ config ACPI_APEI_EINJ
          mainly used for debugging and testing the other parts of
          APEI and some other RAS features.
 
+config ACPI_APEI_EINJ_CXL
+       bool "CXL Error INJection Support"
+       default ACPI_APEI_EINJ
+       depends on ACPI_APEI_EINJ
+       depends on CXL_BUS && CXL_BUS <= ACPI_APEI_EINJ
+       help
+         Support for CXL protocol Error INJection through debugfs/cxl.
+         Availability and which errors are supported is dependent on
+         the host platform. Look to ACPI v6.5 section 18.6.4 and kernel
+         EINJ documentation for more information.
+
+         If unsure say 'n'
+
 config ACPI_APEI_ERST_DEBUG
        tristate "APEI Error Record Serialization Table (ERST) Debug Support"
        depends on ACPI_APEI
index 4dfac2128737c6ea99ec79c2e4fc6d911c94ae09..2c474e6477e12a9023227ca22d9eabdf121b676e 100644 (file)
@@ -2,6 +2,8 @@
 obj-$(CONFIG_ACPI_APEI)                += apei.o
 obj-$(CONFIG_ACPI_APEI_GHES)   += ghes.o
 obj-$(CONFIG_ACPI_APEI_EINJ)   += einj.o
+einj-y                         := einj-core.o
+einj-$(CONFIG_ACPI_APEI_EINJ_CXL) += einj-cxl.o
 obj-$(CONFIG_ACPI_APEI_ERST_DEBUG) += erst-dbg.o
 
 apei-y := apei-base.o hest.o erst.o bert.o
index 67c2c3b959e1538a645d43bda3449378c0051ed8..cd2766c69d78d52df5edd067dc6edec0bac85f41 100644 (file)
@@ -130,4 +130,22 @@ static inline u32 cper_estatus_len(struct acpi_hest_generic_status *estatus)
 }
 
 int apei_osc_setup(void);
+
+int einj_get_available_error_type(u32 *type);
+int einj_error_inject(u32 type, u32 flags, u64 param1, u64 param2, u64 param3,
+                     u64 param4);
+int einj_cxl_rch_error_inject(u32 type, u32 flags, u64 param1, u64 param2,
+                             u64 param3, u64 param4);
+bool einj_is_cxl_error_type(u64 type);
+int einj_validate_error_type(u64 type);
+
+#ifndef ACPI_EINJ_CXL_CACHE_CORRECTABLE
+#define ACPI_EINJ_CXL_CACHE_CORRECTABLE     BIT(12)
+#define ACPI_EINJ_CXL_CACHE_UNCORRECTABLE   BIT(13)
+#define ACPI_EINJ_CXL_CACHE_FATAL           BIT(14)
+#define ACPI_EINJ_CXL_MEM_CORRECTABLE       BIT(15)
+#define ACPI_EINJ_CXL_MEM_UNCORRECTABLE     BIT(16)
+#define ACPI_EINJ_CXL_MEM_FATAL             BIT(17)
+#endif
+
 #endif
similarity index 90%
rename from drivers/acpi/apei/einj.c
rename to drivers/acpi/apei/einj-core.c
index 89fb9331c611e44500b80701b48ef5b57d8a8df1..66e7f529e92fc2975d744e531f1b84b78de31b8d 100644 (file)
@@ -21,6 +21,7 @@
 #include <linux/nmi.h>
 #include <linux/delay.h>
 #include <linux/mm.h>
+#include <linux/platform_device.h>
 #include <asm/unaligned.h>
 
 #include "apei-internal.h"
 #define MEM_ERROR_MASK         (ACPI_EINJ_MEMORY_CORRECTABLE | \
                                ACPI_EINJ_MEMORY_UNCORRECTABLE | \
                                ACPI_EINJ_MEMORY_FATAL)
+#define CXL_ERROR_MASK         (ACPI_EINJ_CXL_CACHE_CORRECTABLE | \
+                               ACPI_EINJ_CXL_CACHE_UNCORRECTABLE | \
+                               ACPI_EINJ_CXL_CACHE_FATAL | \
+                               ACPI_EINJ_CXL_MEM_CORRECTABLE | \
+                               ACPI_EINJ_CXL_MEM_UNCORRECTABLE | \
+                               ACPI_EINJ_CXL_MEM_FATAL)
 
 /*
  * ACPI version 5 provides a SET_ERROR_TYPE_WITH_ADDRESS action.
@@ -137,6 +144,11 @@ static struct apei_exec_ins_type einj_ins_type[] = {
  */
 static DEFINE_MUTEX(einj_mutex);
 
+/*
+ * Exported APIs use this flag to exit early if einj_probe() failed.
+ */
+bool einj_initialized __ro_after_init;
+
 static void *einj_param;
 
 static void einj_exec_ctx_init(struct apei_exec_context *ctx)
@@ -160,7 +172,7 @@ static int __einj_get_available_error_type(u32 *type)
 }
 
 /* Get error injection capabilities of the platform */
-static int einj_get_available_error_type(u32 *type)
+int einj_get_available_error_type(u32 *type)
 {
        int rc;
 
@@ -530,8 +542,8 @@ static int __einj_error_inject(u32 type, u32 flags, u64 param1, u64 param2,
 }
 
 /* Inject the specified hardware error */
-static int einj_error_inject(u32 type, u32 flags, u64 param1, u64 param2,
-                            u64 param3, u64 param4)
+int einj_error_inject(u32 type, u32 flags, u64 param1, u64 param2, u64 param3,
+                     u64 param4)
 {
        int rc;
        u64 base_addr, size;
@@ -554,8 +566,17 @@ static int einj_error_inject(u32 type, u32 flags, u64 param1, u64 param2,
        if (type & ACPI5_VENDOR_BIT) {
                if (vendor_flags != SETWA_FLAGS_MEM)
                        goto inject;
-       } else if (!(type & MEM_ERROR_MASK) && !(flags & SETWA_FLAGS_MEM))
+       } else if (!(type & MEM_ERROR_MASK) && !(flags & SETWA_FLAGS_MEM)) {
                goto inject;
+       }
+
+       /*
+        * Injections targeting a CXL 1.0/1.1 port have to be injected
+        * via the einj_cxl_rch_error_inject() path as that does the proper
+        * validation of the given RCRB base (MMIO) address.
+        */
+       if (einj_is_cxl_error_type(type) && (flags & SETWA_FLAGS_MEM))
+               return -EINVAL;
 
        /*
         * Disallow crazy address masks that give BIOS leeway to pick
@@ -587,6 +608,21 @@ inject:
        return rc;
 }
 
+int einj_cxl_rch_error_inject(u32 type, u32 flags, u64 param1, u64 param2,
+                             u64 param3, u64 param4)
+{
+       int rc;
+
+       if (!(einj_is_cxl_error_type(type) && (flags & SETWA_FLAGS_MEM)))
+               return -EINVAL;
+
+       mutex_lock(&einj_mutex);
+       rc = __einj_error_inject(type, flags, param1, param2, param3, param4);
+       mutex_unlock(&einj_mutex);
+
+       return rc;
+}
+
 static u32 error_type;
 static u32 error_flags;
 static u64 error_param1;
@@ -607,12 +643,6 @@ static struct { u32 mask; const char *str; } const einj_error_type_string[] = {
        { BIT(9), "Platform Correctable" },
        { BIT(10), "Platform Uncorrectable non-fatal" },
        { BIT(11), "Platform Uncorrectable fatal"},
-       { BIT(12), "CXL.cache Protocol Correctable" },
-       { BIT(13), "CXL.cache Protocol Uncorrectable non-fatal" },
-       { BIT(14), "CXL.cache Protocol Uncorrectable fatal" },
-       { BIT(15), "CXL.mem Protocol Correctable" },
-       { BIT(16), "CXL.mem Protocol Uncorrectable non-fatal" },
-       { BIT(17), "CXL.mem Protocol Uncorrectable fatal" },
        { BIT(31), "Vendor Defined Error Types" },
 };
 
@@ -641,22 +671,26 @@ static int error_type_get(void *data, u64 *val)
        return 0;
 }
 
-static int error_type_set(void *data, u64 val)
+bool einj_is_cxl_error_type(u64 type)
 {
+       return (type & CXL_ERROR_MASK) && (!(type & ACPI5_VENDOR_BIT));
+}
+
+int einj_validate_error_type(u64 type)
+{
+       u32 tval, vendor, available_error_type = 0;
        int rc;
-       u32 available_error_type = 0;
-       u32 tval, vendor;
 
        /* Only low 32 bits for error type are valid */
-       if (val & GENMASK_ULL(63, 32))
+       if (type & GENMASK_ULL(63, 32))
                return -EINVAL;
 
        /*
         * Vendor defined types have 0x80000000 bit set, and
         * are not enumerated by ACPI_EINJ_GET_ERROR_TYPE
         */
-       vendor = val & ACPI5_VENDOR_BIT;
-       tval = val & 0x7fffffff;
+       vendor = type & ACPI5_VENDOR_BIT;
+       tval = type & GENMASK(30, 0);
 
        /* Only one error type can be specified */
        if (tval & (tval - 1))
@@ -665,9 +699,21 @@ static int error_type_set(void *data, u64 val)
                rc = einj_get_available_error_type(&available_error_type);
                if (rc)
                        return rc;
-               if (!(val & available_error_type))
+               if (!(type & available_error_type))
                        return -EINVAL;
        }
+
+       return 0;
+}
+
+static int error_type_set(void *data, u64 val)
+{
+       int rc;
+
+       rc = einj_validate_error_type(val);
+       if (rc)
+               return rc;
+
        error_type = val;
 
        return 0;
@@ -703,21 +749,21 @@ static int einj_check_table(struct acpi_table_einj *einj_tab)
        return 0;
 }
 
-static int __init einj_init(void)
+static int __init einj_probe(struct platform_device *pdev)
 {
        int rc;
        acpi_status status;
        struct apei_exec_context ctx;
 
        if (acpi_disabled) {
-               pr_info("ACPI disabled.\n");
+               pr_debug("ACPI disabled.\n");
                return -ENODEV;
        }
 
        status = acpi_get_table(ACPI_SIG_EINJ, 0,
                                (struct acpi_table_header **)&einj_tab);
        if (status == AE_NOT_FOUND) {
-               pr_warn("EINJ table not found.\n");
+               pr_debug("EINJ table not found.\n");
                return -ENODEV;
        } else if (ACPI_FAILURE(status)) {
                pr_err("Failed to get EINJ table: %s\n",
@@ -805,7 +851,7 @@ err_put_table:
        return rc;
 }
 
-static void __exit einj_exit(void)
+static void __exit einj_remove(struct platform_device *pdev)
 {
        struct apei_exec_context ctx;
 
@@ -826,6 +872,40 @@ static void __exit einj_exit(void)
        acpi_put_table((struct acpi_table_header *)einj_tab);
 }
 
+static struct platform_device *einj_dev;
+static struct platform_driver einj_driver = {
+       .remove_new = einj_remove,
+       .driver = {
+               .name = "acpi-einj",
+       },
+};
+
+static int __init einj_init(void)
+{
+       struct platform_device_info einj_dev_info = {
+               .name = "acpi-einj",
+               .id = -1,
+       };
+       int rc;
+
+       einj_dev = platform_device_register_full(&einj_dev_info);
+       if (IS_ERR(einj_dev))
+               return PTR_ERR(einj_dev);
+
+       rc = platform_driver_probe(&einj_driver, einj_probe);
+       einj_initialized = rc == 0;
+
+       return 0;
+}
+
+static void __exit einj_exit(void)
+{
+       if (einj_initialized)
+               platform_driver_unregister(&einj_driver);
+
+       platform_device_del(einj_dev);
+}
+
 module_init(einj_init);
 module_exit(einj_exit);
 
diff --git a/drivers/acpi/apei/einj-cxl.c b/drivers/acpi/apei/einj-cxl.c
new file mode 100644 (file)
index 0000000..8b8be0c
--- /dev/null
@@ -0,0 +1,113 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * CXL Error INJection support. Used by CXL core to inject
+ * protocol errors into CXL ports.
+ *
+ * Copyright (C) 2023 Advanced Micro Devices, Inc.
+ *
+ * Author: Ben Cheatham <benjamin.cheatham@amd.com>
+ */
+#include <linux/einj-cxl.h>
+#include <linux/seq_file.h>
+#include <linux/pci.h>
+
+#include "apei-internal.h"
+
+/* Defined in einj-core.c */
+extern bool einj_initialized;
+
+static struct { u32 mask; const char *str; } const einj_cxl_error_type_string[] = {
+       { ACPI_EINJ_CXL_CACHE_CORRECTABLE, "CXL.cache Protocol Correctable" },
+       { ACPI_EINJ_CXL_CACHE_UNCORRECTABLE, "CXL.cache Protocol Uncorrectable non-fatal" },
+       { ACPI_EINJ_CXL_CACHE_FATAL, "CXL.cache Protocol Uncorrectable fatal" },
+       { ACPI_EINJ_CXL_MEM_CORRECTABLE, "CXL.mem Protocol Correctable" },
+       { ACPI_EINJ_CXL_MEM_UNCORRECTABLE, "CXL.mem Protocol Uncorrectable non-fatal" },
+       { ACPI_EINJ_CXL_MEM_FATAL, "CXL.mem Protocol Uncorrectable fatal" },
+};
+
+int einj_cxl_available_error_type_show(struct seq_file *m, void *v)
+{
+       int cxl_err, rc;
+       u32 available_error_type = 0;
+
+       rc = einj_get_available_error_type(&available_error_type);
+       if (rc)
+               return rc;
+
+       for (int pos = 0; pos < ARRAY_SIZE(einj_cxl_error_type_string); pos++) {
+               cxl_err = ACPI_EINJ_CXL_CACHE_CORRECTABLE << pos;
+
+               if (available_error_type & cxl_err)
+                       seq_printf(m, "0x%08x\t%s\n",
+                                  einj_cxl_error_type_string[pos].mask,
+                                  einj_cxl_error_type_string[pos].str);
+       }
+
+       return 0;
+}
+EXPORT_SYMBOL_NS_GPL(einj_cxl_available_error_type_show, CXL);
+
+static int cxl_dport_get_sbdf(struct pci_dev *dport_dev, u64 *sbdf)
+{
+       struct pci_bus *pbus;
+       struct pci_host_bridge *bridge;
+       u64 seg = 0, bus;
+
+       pbus = dport_dev->bus;
+       bridge = pci_find_host_bridge(pbus);
+
+       if (!bridge)
+               return -ENODEV;
+
+       if (bridge->domain_nr != PCI_DOMAIN_NR_NOT_SET)
+               seg = bridge->domain_nr;
+
+       bus = pbus->number;
+       *sbdf = (seg << 24) | (bus << 16) | dport_dev->devfn;
+
+       return 0;
+}
+
+int einj_cxl_inject_rch_error(u64 rcrb, u64 type)
+{
+       int rc;
+
+       /* Only CXL error types can be specified */
+       if (!einj_is_cxl_error_type(type))
+               return -EINVAL;
+
+       rc = einj_validate_error_type(type);
+       if (rc)
+               return rc;
+
+       return einj_cxl_rch_error_inject(type, 0x2, rcrb, GENMASK_ULL(63, 0),
+                                        0, 0);
+}
+EXPORT_SYMBOL_NS_GPL(einj_cxl_inject_rch_error, CXL);
+
+int einj_cxl_inject_error(struct pci_dev *dport, u64 type)
+{
+       u64 param4 = 0;
+       int rc;
+
+       /* Only CXL error types can be specified */
+       if (!einj_is_cxl_error_type(type))
+               return -EINVAL;
+
+       rc = einj_validate_error_type(type);
+       if (rc)
+               return rc;
+
+       rc = cxl_dport_get_sbdf(dport, &param4);
+       if (rc)
+               return rc;
+
+       return einj_error_inject(type, 0x4, 0, 0, 0, param4);
+}
+EXPORT_SYMBOL_NS_GPL(einj_cxl_inject_error, CXL);
+
+bool einj_cxl_is_initialized(void)
+{
+       return einj_initialized;
+}
+EXPORT_SYMBOL_NS_GPL(einj_cxl_is_initialized, CXL);
index d6b85f0f6082f72421168b26fec4b2fff09b4e13..2c8ccc91ebe6dfdc90dbf529580383080fde6f56 100644 (file)
@@ -59,9 +59,8 @@ struct target_cache {
 };
 
 enum {
-       NODE_ACCESS_CLASS_0 = 0,
-       NODE_ACCESS_CLASS_1,
-       NODE_ACCESS_CLASS_GENPORT_SINK,
+       NODE_ACCESS_CLASS_GENPORT_SINK_LOCAL = ACCESS_COORDINATE_MAX,
+       NODE_ACCESS_CLASS_GENPORT_SINK_CPU,
        NODE_ACCESS_CLASS_MAX,
 };
 
@@ -75,6 +74,7 @@ struct memory_target {
        struct node_cache_attrs cache_attrs;
        u8 gen_port_device_handle[ACPI_SRAT_DEVICE_HANDLE_SIZE];
        bool registered;
+       bool ext_updated;       /* externally updated */
 };
 
 struct memory_initiator {
@@ -127,7 +127,8 @@ static struct memory_target *acpi_find_genport_target(u32 uid)
 /**
  * acpi_get_genport_coordinates - Retrieve the access coordinates for a generic port
  * @uid: ACPI unique id
- * @coord: The access coordinates written back out for the generic port
+ * @coord: The access coordinates written back out for the generic port.
+ *        Expect 2 levels array.
  *
  * Return: 0 on success. Errno on failure.
  *
@@ -143,7 +144,10 @@ int acpi_get_genport_coordinates(u32 uid,
        if (!target)
                return -ENOENT;
 
-       *coord = target->coord[NODE_ACCESS_CLASS_GENPORT_SINK];
+       coord[ACCESS_COORDINATE_LOCAL] =
+               target->coord[NODE_ACCESS_CLASS_GENPORT_SINK_LOCAL];
+       coord[ACCESS_COORDINATE_CPU] =
+               target->coord[NODE_ACCESS_CLASS_GENPORT_SINK_CPU];
 
        return 0;
 }
@@ -325,6 +329,35 @@ static void hmat_update_target_access(struct memory_target *target,
        }
 }
 
+int hmat_update_target_coordinates(int nid, struct access_coordinate *coord,
+                                  enum access_coordinate_class access)
+{
+       struct memory_target *target;
+       int pxm;
+
+       if (nid == NUMA_NO_NODE)
+               return -EINVAL;
+
+       pxm = node_to_pxm(nid);
+       guard(mutex)(&target_lock);
+       target = find_mem_target(pxm);
+       if (!target)
+               return -ENODEV;
+
+       hmat_update_target_access(target, ACPI_HMAT_READ_LATENCY,
+                                 coord->read_latency, access);
+       hmat_update_target_access(target, ACPI_HMAT_WRITE_LATENCY,
+                                 coord->write_latency, access);
+       hmat_update_target_access(target, ACPI_HMAT_READ_BANDWIDTH,
+                                 coord->read_bandwidth, access);
+       hmat_update_target_access(target, ACPI_HMAT_WRITE_BANDWIDTH,
+                                 coord->write_bandwidth, access);
+       target->ext_updated = true;
+
+       return 0;
+}
+EXPORT_SYMBOL_GPL(hmat_update_target_coordinates);
+
 static __init void hmat_add_locality(struct acpi_hmat_locality *hmat_loc)
 {
        struct memory_locality *loc;
@@ -374,11 +407,11 @@ static __init void hmat_update_target(unsigned int tgt_pxm, unsigned int init_px
 
        if (target && target->processor_pxm == init_pxm) {
                hmat_update_target_access(target, type, value,
-                                         NODE_ACCESS_CLASS_0);
+                                         ACCESS_COORDINATE_LOCAL);
                /* If the node has a CPU, update access 1 */
                if (node_state(pxm_to_node(init_pxm), N_CPU))
                        hmat_update_target_access(target, type, value,
-                                                 NODE_ACCESS_CLASS_1);
+                                                 ACCESS_COORDINATE_CPU);
        }
 }
 
@@ -696,8 +729,13 @@ static void hmat_update_target_attrs(struct memory_target *target,
        u32 best = 0;
        int i;
 
+       /* Don't update if an external agent has changed the data.  */
+       if (target->ext_updated)
+               return;
+
        /* Don't update for generic port if there's no device handle */
-       if (access == NODE_ACCESS_CLASS_GENPORT_SINK &&
+       if ((access == NODE_ACCESS_CLASS_GENPORT_SINK_LOCAL ||
+            access == NODE_ACCESS_CLASS_GENPORT_SINK_CPU) &&
            !(*(u16 *)target->gen_port_device_handle))
                return;
 
@@ -709,7 +747,8 @@ static void hmat_update_target_attrs(struct memory_target *target,
         */
        if (target->processor_pxm != PXM_INVAL) {
                cpu_nid = pxm_to_node(target->processor_pxm);
-               if (access == 0 || node_state(cpu_nid, N_CPU)) {
+               if (access == ACCESS_COORDINATE_LOCAL ||
+                   node_state(cpu_nid, N_CPU)) {
                        set_bit(target->processor_pxm, p_nodes);
                        return;
                }
@@ -737,7 +776,9 @@ static void hmat_update_target_attrs(struct memory_target *target,
                list_for_each_entry(initiator, &initiators, node) {
                        u32 value;
 
-                       if (access == 1 && !initiator->has_cpu) {
+                       if ((access == ACCESS_COORDINATE_CPU ||
+                            access == NODE_ACCESS_CLASS_GENPORT_SINK_CPU) &&
+                           !initiator->has_cpu) {
                                clear_bit(initiator->processor_pxm, p_nodes);
                                continue;
                        }
@@ -770,20 +811,24 @@ static void __hmat_register_target_initiators(struct memory_target *target,
        }
 }
 
-static void hmat_register_generic_target_initiators(struct memory_target *target)
+static void hmat_update_generic_target(struct memory_target *target)
 {
        static DECLARE_BITMAP(p_nodes, MAX_NUMNODES);
 
-       __hmat_register_target_initiators(target, p_nodes,
-                                         NODE_ACCESS_CLASS_GENPORT_SINK);
+       hmat_update_target_attrs(target, p_nodes,
+                                NODE_ACCESS_CLASS_GENPORT_SINK_LOCAL);
+       hmat_update_target_attrs(target, p_nodes,
+                                NODE_ACCESS_CLASS_GENPORT_SINK_CPU);
 }
 
 static void hmat_register_target_initiators(struct memory_target *target)
 {
        static DECLARE_BITMAP(p_nodes, MAX_NUMNODES);
 
-       __hmat_register_target_initiators(target, p_nodes, 0);
-       __hmat_register_target_initiators(target, p_nodes, 1);
+       __hmat_register_target_initiators(target, p_nodes,
+                                         ACCESS_COORDINATE_LOCAL);
+       __hmat_register_target_initiators(target, p_nodes,
+                                         ACCESS_COORDINATE_CPU);
 }
 
 static void hmat_register_target_cache(struct memory_target *target)
@@ -835,7 +880,7 @@ static void hmat_register_target(struct memory_target *target)
         */
        mutex_lock(&target_lock);
        if (*(u16 *)target->gen_port_device_handle) {
-               hmat_register_generic_target_initiators(target);
+               hmat_update_generic_target(target);
                target->registered = true;
        }
        mutex_unlock(&target_lock);
@@ -854,8 +899,8 @@ static void hmat_register_target(struct memory_target *target)
        if (!target->registered) {
                hmat_register_target_initiators(target);
                hmat_register_target_cache(target);
-               hmat_register_target_perf(target, NODE_ACCESS_CLASS_0);
-               hmat_register_target_perf(target, NODE_ACCESS_CLASS_1);
+               hmat_register_target_perf(target, ACCESS_COORDINATE_LOCAL);
+               hmat_register_target_perf(target, ACCESS_COORDINATE_CPU);
                target->registered = true;
        }
        mutex_unlock(&target_lock);
@@ -927,7 +972,7 @@ static int hmat_calculate_adistance(struct notifier_block *self,
                return NOTIFY_OK;
 
        mutex_lock(&target_lock);
-       hmat_update_target_attrs(target, p_nodes, 1);
+       hmat_update_target_attrs(target, p_nodes, ACCESS_COORDINATE_CPU);
        mutex_unlock(&target_lock);
 
        perf = &target->coord[1];
index 0214518fc582f47b6ed43faa7154ea60a9d8ae48..e45e64993c504c5db1624aae118d6144e4d912ee 100644 (file)
@@ -29,6 +29,8 @@ static int node_to_pxm_map[MAX_NUMNODES]
 unsigned char acpi_srat_revision __initdata;
 static int acpi_numa __initdata;
 
+static int last_real_pxm;
+
 void __init disable_srat(void)
 {
        acpi_numa = -1;
@@ -536,6 +538,7 @@ int __init acpi_numa_init(void)
                if (node_to_pxm_map[i] > fake_pxm)
                        fake_pxm = node_to_pxm_map[i];
        }
+       last_real_pxm = fake_pxm;
        fake_pxm++;
        acpi_table_parse_cedt(ACPI_CEDT_TYPE_CFMWS, acpi_parse_cfmws,
                              &fake_pxm);
@@ -547,6 +550,14 @@ int __init acpi_numa_init(void)
        return 0;
 }
 
+bool acpi_node_backed_by_real_pxm(int nid)
+{
+       int pxm = node_to_pxm(nid);
+
+       return pxm <= last_real_pxm;
+}
+EXPORT_SYMBOL_GPL(acpi_node_backed_by_real_pxm);
+
 static int acpi_get_pxm(acpi_handle h)
 {
        unsigned long long pxm;
index b07f7d091d133c6ade25749ca746b2a64c6bb5e8..b976e5fc3fbcddc76a88210d86401ea53799f6ba 100644 (file)
@@ -253,7 +253,7 @@ int __init_or_acpilib acpi_table_parse_entries_array(
 
        count = acpi_parse_entries_array(id, table_size,
                                         (union fw_table_header *)table_header,
-                                        proc, proc_num, max_entries);
+                                        0, proc, proc_num, max_entries);
 
        acpi_put_table(table_header);
        return count;
index eca24f41556df04ac61747e05aace9622fbcc580..bad28cf42010415bee522cb2f778bd866a756584 100644 (file)
@@ -6086,9 +6086,7 @@ static void print_binder_node_nilocked(struct seq_file *m,
        struct binder_work *w;
        int count;
 
-       count = 0;
-       hlist_for_each_entry(ref, &node->refs, node_entry)
-               count++;
+       count = hlist_count_nodes(&node->refs);
 
        seq_printf(m, "  node %d: u%016llx c%016llx hs %d hw %d ls %d lw %d is %d iw %d tr %d",
                   node->debug_id, (u64)node->ptr, (u64)node->cookie,
index 17f6ccee53c7c26e1a3a4f19b1aee82a7950cc7a..4ac854f6b05777c669d7de39ab006d963b74bd48 100644 (file)
@@ -1188,7 +1188,7 @@ static int pata_macio_attach(struct macio_dev *mdev,
        return rc;
 }
 
-static int pata_macio_detach(struct macio_dev *mdev)
+static void pata_macio_detach(struct macio_dev *mdev)
 {
        struct ata_host *host = macio_get_drvdata(mdev);
        struct pata_macio_priv *priv = host->private_data;
@@ -1203,8 +1203,6 @@ static int pata_macio_detach(struct macio_dev *mdev)
        ata_host_detach(host);
 
        unlock_media_bay(priv->mdev->media_bay);
-
-       return 0;
 }
 
 #ifdef CONFIG_PM_SLEEP
index f1e79263fe61eb410dd27b5ac6b13b6c196e290a..23b8cba4a2a3b87c34ff7e7b30f25784eab7e4dd 100644 (file)
@@ -898,6 +898,37 @@ err:
        return rc;
 }
 
+static unsigned int cpu_map_shared_cache(bool online, unsigned int cpu,
+                                        cpumask_t **map)
+{
+       struct cacheinfo *llc, *sib_llc;
+       unsigned int sibling;
+
+       if (!last_level_cache_is_valid(cpu))
+               return 0;
+
+       llc = per_cpu_cacheinfo_idx(cpu, cache_leaves(cpu) - 1);
+
+       if (llc->type != CACHE_TYPE_DATA && llc->type != CACHE_TYPE_UNIFIED)
+               return 0;
+
+       if (online) {
+               *map = &llc->shared_cpu_map;
+               return cpumask_weight(*map);
+       }
+
+       /* shared_cpu_map of offlined CPU will be cleared, so use sibling map */
+       for_each_cpu(sibling, &llc->shared_cpu_map) {
+               if (sibling == cpu || !last_level_cache_is_valid(sibling))
+                       continue;
+               sib_llc = per_cpu_cacheinfo_idx(sibling, cache_leaves(sibling) - 1);
+               *map = &sib_llc->shared_cpu_map;
+               return cpumask_weight(*map);
+       }
+
+       return 0;
+}
+
 /*
  * Calculate the size of the per-CPU data cache slice.  This can be
  * used to estimate the size of the data cache slice that can be used
@@ -929,28 +960,31 @@ static void update_per_cpu_data_slice_size_cpu(unsigned int cpu)
                ci->per_cpu_data_slice_size = llc->size / nr_shared;
 }
 
-static void update_per_cpu_data_slice_size(bool cpu_online, unsigned int cpu)
+static void update_per_cpu_data_slice_size(bool cpu_online, unsigned int cpu,
+                                          cpumask_t *cpu_map)
 {
        unsigned int icpu;
 
-       for_each_online_cpu(icpu) {
+       for_each_cpu(icpu, cpu_map) {
                if (!cpu_online && icpu == cpu)
                        continue;
                update_per_cpu_data_slice_size_cpu(icpu);
+               setup_pcp_cacheinfo(icpu);
        }
 }
 
 static int cacheinfo_cpu_online(unsigned int cpu)
 {
        int rc = detect_cache_attributes(cpu);
+       cpumask_t *cpu_map;
 
        if (rc)
                return rc;
        rc = cache_add_dev(cpu);
        if (rc)
                goto err;
-       update_per_cpu_data_slice_size(true, cpu);
-       setup_pcp_cacheinfo();
+       if (cpu_map_shared_cache(true, cpu, &cpu_map))
+               update_per_cpu_data_slice_size(true, cpu, cpu_map);
        return 0;
 err:
        free_cache_attributes(cpu);
@@ -959,12 +993,16 @@ err:
 
 static int cacheinfo_cpu_pre_down(unsigned int cpu)
 {
+       cpumask_t *cpu_map;
+       unsigned int nr_shared;
+
+       nr_shared = cpu_map_shared_cache(false, cpu, &cpu_map);
        if (cpumask_test_and_clear_cpu(cpu, &cache_dev_map))
                cpu_cache_sysfs_exit(cpu);
 
        free_cache_attributes(cpu);
-       update_per_cpu_data_slice_size(false, cpu);
-       setup_pcp_cacheinfo();
+       if (nr_shared > 1)
+               update_per_cpu_data_slice_size(false, cpu, cpu_map);
        return 0;
 }
 
index 0b33e81f9c9b62d6cdd335c5638dd89f03fbc9c3..f5a6bffce5188090a6f0be2775e74ffd85ffdf59 100644 (file)
@@ -144,7 +144,7 @@ static DEVICE_ATTR(release, S_IWUSR, NULL, cpu_release_store);
 #endif /* CONFIG_ARCH_CPU_PROBE_RELEASE */
 #endif /* CONFIG_HOTPLUG_CPU */
 
-#ifdef CONFIG_KEXEC_CORE
+#ifdef CONFIG_CRASH_DUMP
 #include <linux/kexec.h>
 
 static ssize_t crash_notes_show(struct device *dev,
@@ -189,14 +189,14 @@ static const struct attribute_group crash_note_cpu_attr_group = {
 #endif
 
 static const struct attribute_group *common_cpu_attr_groups[] = {
-#ifdef CONFIG_KEXEC_CORE
+#ifdef CONFIG_CRASH_DUMP
        &crash_note_cpu_attr_group,
 #endif
        NULL
 };
 
 static const struct attribute_group *hotplugable_cpu_attr_groups[] = {
-#ifdef CONFIG_KEXEC_CORE
+#ifdef CONFIG_CRASH_DUMP
        &crash_note_cpu_attr_group,
 #endif
        NULL
index 14f964a7719bd046999f28dcb4c28abc35d0b725..c0436f46cfb7012ee651cec0c1f2b7af66570c0b 100644 (file)
@@ -188,6 +188,7 @@ static int memory_block_online(struct memory_block *mem)
        unsigned long start_pfn = section_nr_to_pfn(mem->start_section_nr);
        unsigned long nr_pages = PAGES_PER_SECTION * sections_per_block;
        unsigned long nr_vmemmap_pages = 0;
+       struct memory_notify arg;
        struct zone *zone;
        int ret;
 
@@ -207,9 +208,19 @@ static int memory_block_online(struct memory_block *mem)
        if (mem->altmap)
                nr_vmemmap_pages = mem->altmap->free;
 
+       arg.altmap_start_pfn = start_pfn;
+       arg.altmap_nr_pages = nr_vmemmap_pages;
+       arg.start_pfn = start_pfn + nr_vmemmap_pages;
+       arg.nr_pages = nr_pages - nr_vmemmap_pages;
        mem_hotplug_begin();
+       ret = memory_notify(MEM_PREPARE_ONLINE, &arg);
+       ret = notifier_to_errno(ret);
+       if (ret)
+               goto out_notifier;
+
        if (nr_vmemmap_pages) {
-               ret = mhp_init_memmap_on_memory(start_pfn, nr_vmemmap_pages, zone);
+               ret = mhp_init_memmap_on_memory(start_pfn, nr_vmemmap_pages,
+                                               zone, mem->altmap->inaccessible);
                if (ret)
                        goto out;
        }
@@ -231,7 +242,11 @@ static int memory_block_online(struct memory_block *mem)
                                          nr_vmemmap_pages);
 
        mem->zone = zone;
+       mem_hotplug_done();
+       return ret;
 out:
+       memory_notify(MEM_FINISH_OFFLINE, &arg);
+out_notifier:
        mem_hotplug_done();
        return ret;
 }
@@ -244,6 +259,7 @@ static int memory_block_offline(struct memory_block *mem)
        unsigned long start_pfn = section_nr_to_pfn(mem->start_section_nr);
        unsigned long nr_pages = PAGES_PER_SECTION * sections_per_block;
        unsigned long nr_vmemmap_pages = 0;
+       struct memory_notify arg;
        int ret;
 
        if (!mem->zone)
@@ -275,6 +291,11 @@ static int memory_block_offline(struct memory_block *mem)
                mhp_deinit_memmap_on_memory(start_pfn, nr_vmemmap_pages);
 
        mem->zone = NULL;
+       arg.altmap_start_pfn = start_pfn;
+       arg.altmap_nr_pages = nr_vmemmap_pages;
+       arg.start_pfn = start_pfn + nr_vmemmap_pages;
+       arg.nr_pages = nr_pages - nr_vmemmap_pages;
+       memory_notify(MEM_FINISH_OFFLINE, &arg);
 out:
        mem_hotplug_done();
        return ret;
index 1c05640461dd1679755c3811b4677e152bb8d875..eb72580288e62727e5b2198a6451cf9c2533225a 100644 (file)
@@ -126,7 +126,7 @@ static void node_access_release(struct device *dev)
 }
 
 static struct node_access_nodes *node_init_node_access(struct node *node,
-                                                      unsigned int access)
+                                                      enum access_coordinate_class access)
 {
        struct node_access_nodes *access_node;
        struct device *dev;
@@ -191,7 +191,7 @@ static struct attribute *access_attrs[] = {
  * @access: The access class the for the given attributes
  */
 void node_set_perf_attrs(unsigned int nid, struct access_coordinate *coord,
-                        unsigned int access)
+                        enum access_coordinate_class access)
 {
        struct node_access_nodes *c;
        struct node *node;
@@ -215,6 +215,7 @@ void node_set_perf_attrs(unsigned int nid, struct access_coordinate *coord,
                }
        }
 }
+EXPORT_SYMBOL_GPL(node_set_perf_attrs);
 
 /**
  * struct node_cache_info - Internal tracking for memory node caches
@@ -689,7 +690,7 @@ int register_cpu_under_node(unsigned int cpu, unsigned int nid)
  */
 int register_memory_node_under_compute_node(unsigned int mem_nid,
                                            unsigned int cpu_nid,
-                                           unsigned int access)
+                                           enum access_coordinate_class access)
 {
        struct node *init_node, *targ_node;
        struct node_access_nodes *initiator, *target;
index c99dd6698977ea61992aa0cb087109ef1c380c3f..5286cb8e0824d11cbe1135c892be1556ce7fee77 100644 (file)
@@ -28,7 +28,7 @@
 
 static char version[] =
        DRV_MODULE_NAME ".c:v" DRV_MODULE_VERSION " (" DRV_MODULE_RELDATE ")\n";
-MODULE_AUTHOR("David S. Miller (davem@davemloft.net)");
+MODULE_AUTHOR("David S. Miller <davem@davemloft.net>");
 MODULE_DESCRIPTION("Sun LDOM virtual disk client driver");
 MODULE_LICENSE("GPL");
 MODULE_VERSION(DRV_MODULE_VERSION);
index 55af4efd79835666a857884a992ba37c2d7d3a08..8237b08c49d8617d338193e5d4a1900f81182f53 100644 (file)
@@ -11,6 +11,7 @@
 #include <linux/sched.h>
 #include <linux/cpu.h>
 #include <linux/crypto.h>
+#include <linux/vmalloc.h>
 
 #include "zcomp.h"
 
@@ -37,7 +38,7 @@ static void zcomp_strm_free(struct zcomp_strm *zstrm)
 {
        if (!IS_ERR_OR_NULL(zstrm->tfm))
                crypto_free_comp(zstrm->tfm);
-       free_pages((unsigned long)zstrm->buffer, 1);
+       vfree(zstrm->buffer);
        zstrm->tfm = NULL;
        zstrm->buffer = NULL;
 }
@@ -53,7 +54,7 @@ static int zcomp_strm_init(struct zcomp_strm *zstrm, struct zcomp *comp)
         * allocate 2 pages. 1 for compressed data, plus 1 extra for the
         * case when compressed size is larger than the original one
         */
-       zstrm->buffer = (void *)__get_free_pages(GFP_KERNEL | __GFP_ZERO, 1);
+       zstrm->buffer = vzalloc(2 * PAGE_SIZE);
        if (IS_ERR_OR_NULL(zstrm->tfm) || !zstrm->buffer) {
                zcomp_strm_free(zstrm);
                return -ENOMEM;
index cdefdef93da8c00d3086e10c0980f7c5462694e8..e9fe63da0e9b1c7956546b8f09f6ac9d88773798 100644 (file)
@@ -39,5 +39,4 @@ int zcomp_compress(struct zcomp_strm *zstrm,
 int zcomp_decompress(struct zcomp_strm *zstrm,
                const void *src, unsigned int src_len, void *dst);
 
-bool zcomp_set_max_streams(struct zcomp *comp, int num_strm);
 #endif /* _ZCOMP_H_ */
index da7a20fa6152a97462dbeeefc8fbb7a09409a91c..f0639df6cd184a1ca0da9c6a12b92c61a06d62e9 100644 (file)
@@ -1337,7 +1337,7 @@ static int zram_read_from_zspool(struct zram *zram, struct page *page,
        src = zs_map_object(zram->mem_pool, handle, ZS_MM_RO);
        if (size == PAGE_SIZE) {
                dst = kmap_local_page(page);
-               memcpy(dst, src, PAGE_SIZE);
+               copy_page(dst, src);
                kunmap_local(dst);
                ret = 0;
        } else {
index e97c1d1c7578bdbb9cbe85fa1986a53f59cc6637..595fb22b73e0689a8f7b26f9bcbbae052ff8b9e9 100644 (file)
@@ -22,7 +22,6 @@
 #include <linux/clk.h>
 #include <linux/reset.h>
 #include <linux/time64.h>
-#include <linux/clk.h>
 #include <linux/sysfs.h>
 
 #define APB_EHB_ISR                    0x00
index 554e1992edd44ca0a41587d20bd7e02945da0a8f..8baf14bd5effbb68bc857b6d8ba4a8cf9397f5ea 100644 (file)
@@ -118,7 +118,7 @@ static struct attribute *mips_cdmm_dev_attrs[] = {
 };
 ATTRIBUTE_GROUPS(mips_cdmm_dev);
 
-struct bus_type mips_cdmm_bustype = {
+const struct bus_type mips_cdmm_bustype = {
        .name           = "cdmm",
        .dev_groups     = mips_cdmm_dev_groups,
        .match          = mips_cdmm_match,
index b6f27566e0ba3b51e2df436661cb318deefcf9ad..4e501d5c121ffc59065c9e62fa08b9910ec3795c 100644 (file)
@@ -89,10 +89,8 @@ static int hisi_rng_probe(struct platform_device *pdev)
        rng->rng.read = hisi_rng_read;
 
        ret = devm_hwrng_register(&pdev->dev, &rng->rng);
-       if (ret) {
-               dev_err(&pdev->dev, "failed to register hwrng\n");
-               return ret;
-       }
+       if (ret)
+               return dev_err_probe(&pdev->dev, ret, "failed to register hwrng\n");
 
        return 0;
 }
index 2e669e7c14d31caae83227e03f366fe48934437e..1b49e3a86d57b7dc6ec1e0a8b4c21bb1902746b1 100644 (file)
@@ -29,7 +29,7 @@
 static char version[] =
        DRV_MODULE_NAME " v" DRV_MODULE_VERSION " (" DRV_MODULE_RELDATE ")\n";
 
-MODULE_AUTHOR("David S. Miller (davem@davemloft.net)");
+MODULE_AUTHOR("David S. Miller <davem@davemloft.net>");
 MODULE_DESCRIPTION("Niagara2 RNG driver");
 MODULE_LICENSE("GPL");
 MODULE_VERSION(DRV_MODULE_VERSION);
index 661574bb0acf59a8f623bc20267901021e8f9b6a..45ca33b3dcb268168a004726c226d9be440a9c77 100644 (file)
@@ -167,7 +167,7 @@ static struct i2c_driver st33zp24_i2c_driver = {
 
 module_i2c_driver(st33zp24_i2c_driver);
 
-MODULE_AUTHOR("TPM support (TPMsupport@list.st.com)");
+MODULE_AUTHOR("TPM support <TPMsupport@list.st.com>");
 MODULE_DESCRIPTION("STM TPM 1.2 I2C ST33 Driver");
 MODULE_VERSION("1.3.0");
 MODULE_LICENSE("GPL");
index f5811b301d3b224ab67621f1cb204c07ecdf954f..5149231f3de28bc54c455b90479f59689294bd64 100644 (file)
@@ -284,7 +284,7 @@ static struct spi_driver st33zp24_spi_driver = {
 
 module_spi_driver(st33zp24_spi_driver);
 
-MODULE_AUTHOR("TPM support (TPMsupport@list.st.com)");
+MODULE_AUTHOR("TPM support <TPMsupport@list.st.com>");
 MODULE_DESCRIPTION("STM TPM 1.2 SPI ST33 Driver");
 MODULE_VERSION("1.3.0");
 MODULE_LICENSE("GPL");
index a5b554cd477861ffa46d09aec45469ee35438e3a..c0771980bc2ff1cceabd21a2060b6c9d0d0195e4 100644 (file)
@@ -582,7 +582,7 @@ int st33zp24_pm_resume(struct device *dev)
 EXPORT_SYMBOL(st33zp24_pm_resume);
 #endif
 
-MODULE_AUTHOR("TPM support (TPMsupport@list.st.com)");
+MODULE_AUTHOR("TPM support <TPMsupport@list.st.com>");
 MODULE_DESCRIPTION("ST33ZP24 TPM 1.2 driver");
 MODULE_VERSION("1.3.0");
 MODULE_LICENSE("GPL");
index 66b16d26eecc783486e46619ff3527b915df4511..757336324c904cf98451898d0f0ef37b33c7df80 100644 (file)
@@ -524,7 +524,7 @@ static void __exit tpm_exit(void)
 subsys_initcall(tpm_init);
 module_exit(tpm_exit);
 
-MODULE_AUTHOR("Leendert van Doorn (leendert@watson.ibm.com)");
+MODULE_AUTHOR("Leendert van Doorn <leendert@watson.ibm.com>");
 MODULE_DESCRIPTION("TPM Driver");
 MODULE_VERSION("2.0");
 MODULE_LICENSE("GPL");
index 54a6750a67578114b12b6be5a684ed9ac349ca33..9fb2defa9dc4216885a174537076ad3399ae0843 100644 (file)
@@ -229,7 +229,7 @@ static void __exit cleanup_atmel(void)
 module_init(init_atmel);
 module_exit(cleanup_atmel);
 
-MODULE_AUTHOR("Leendert van Doorn (leendert@watson.ibm.com)");
+MODULE_AUTHOR("Leendert van Doorn <leendert@watson.ibm.com>");
 MODULE_DESCRIPTION("TPM Driver");
 MODULE_VERSION("2.0");
 MODULE_LICENSE("GPL");
index 5490f7e0fa4369f75e175ea2899b11fab7ed9b83..3c3ee5f551db1bca4faf43a8294024f92a848fa3 100644 (file)
@@ -654,6 +654,6 @@ static struct i2c_driver i2c_nuvoton_driver = {
 
 module_i2c_driver(i2c_nuvoton_driver);
 
-MODULE_AUTHOR("Dan Morav (dan.morav@nuvoton.com)");
+MODULE_AUTHOR("Dan Morav <dan.morav@nuvoton.com>");
 MODULE_DESCRIPTION("Nuvoton TPM I2C Driver");
 MODULE_LICENSE("GPL");
index 038701d4835130478114a04ab29fda3d189c3c42..0f62bbc940daa814b260ba55dce04a6dd7ecd5f7 100644 (file)
@@ -410,7 +410,7 @@ static void __exit cleanup_nsc(void)
 module_init(init_nsc);
 module_exit(cleanup_nsc);
 
-MODULE_AUTHOR("Leendert van Doorn (leendert@watson.ibm.com)");
+MODULE_AUTHOR("Leendert van Doorn <leendert@watson.ibm.com>");
 MODULE_DESCRIPTION("TPM Driver");
 MODULE_VERSION("2.0");
 MODULE_LICENSE("GPL");
index 14652aaf8254680c073ba41ee87c8a153361c7b6..2f7326d297adb99102625306ba56c79c316190b2 100644 (file)
@@ -429,7 +429,7 @@ static void __exit cleanup_tis(void)
 
 module_init(init_tis);
 module_exit(cleanup_tis);
-MODULE_AUTHOR("Leendert van Doorn (leendert@watson.ibm.com)");
+MODULE_AUTHOR("Leendert van Doorn <leendert@watson.ibm.com>");
 MODULE_DESCRIPTION("TPM Driver");
 MODULE_VERSION("2.0");
 MODULE_LICENSE("GPL");
index 64c875657687d20f7da1894f76a6c4146f86ef15..714070ebb6e7a21d957fc4cff3920e12984c71af 100644 (file)
@@ -1360,7 +1360,7 @@ int tpm_tis_resume(struct device *dev)
 EXPORT_SYMBOL_GPL(tpm_tis_resume);
 #endif
 
-MODULE_AUTHOR("Leendert van Doorn (leendert@watson.ibm.com)");
+MODULE_AUTHOR("Leendert van Doorn <leendert@watson.ibm.com>");
 MODULE_DESCRIPTION("TPM Driver");
 MODULE_VERSION("2.0");
 MODULE_LICENSE("GPL");
index 30e953988cabe983f3cc35fe5e450830af7dc60b..11c502039faf58d94db91325e985b03271b488f8 100644 (file)
@@ -711,7 +711,7 @@ static void __exit vtpm_module_exit(void)
 module_init(vtpm_module_init);
 module_exit(vtpm_module_exit);
 
-MODULE_AUTHOR("Stefan Berger (stefanb@us.ibm.com)");
+MODULE_AUTHOR("Stefan Berger <stefanb@us.ibm.com>");
 MODULE_DESCRIPTION("vTPM Driver");
 MODULE_VERSION("0.1");
 MODULE_LICENSE("GPL");
index 909c3137c4283d35402e029645d987c7698115b3..faf88324f7b161c8e716cb33f16ebd7091dbfd2a 100644 (file)
@@ -19,7 +19,7 @@
  * This includes the gates (configured from aspeed_g6_gates), plus the
  * explicitly-configured clocks (ASPEED_CLK_HPLL and up).
  */
-#define ASPEED_G6_NUM_CLKS             72
+#define ASPEED_G6_NUM_CLKS             73
 
 #define ASPEED_G6_SILICON_REV          0x014
 #define CHIP_REVISION_ID                       GENMASK(23, 16)
@@ -157,7 +157,7 @@ static const struct aspeed_gate_data aspeed_g6_gates[] = {
        [ASPEED_CLK_GATE_UART11CLK]     = { 59,  -1, "uart11clk-gate",  "uartx", 0 },   /* UART11 */
        [ASPEED_CLK_GATE_UART12CLK]     = { 60,  -1, "uart12clk-gate",  "uartx", 0 },   /* UART12 */
        [ASPEED_CLK_GATE_UART13CLK]     = { 61,  -1, "uart13clk-gate",  "uartx", 0 },   /* UART13 */
-       [ASPEED_CLK_GATE_FSICLK]        = { 62,  59, "fsiclk-gate",     NULL,    0 },   /* FSI */
+       [ASPEED_CLK_GATE_FSICLK]        = { 62,  59, "fsiclk-gate",     "fsiclk", 0 },  /* FSI */
 };
 
 static const struct clk_div_table ast2600_eclk_div_table[] = {
@@ -821,6 +821,9 @@ static void __init aspeed_g6_cc(struct regmap *map)
 
        hw = clk_hw_register_fixed_factor(NULL, "i3cclk", "apll", 0, 1, 8);
        aspeed_g6_clk_data->hws[ASPEED_CLK_I3C] = hw;
+
+       hw = clk_hw_register_fixed_factor(NULL, "fsiclk", "apll", 0, 1, 4);
+       aspeed_g6_clk_data->hws[ASPEED_CLK_FSI] = hw;
 };
 
 static void __init aspeed_g6_cc_init(struct device_node *np)
index b0122093c6ff87cae7ff5821e85fef0baf860e6d..e48be7a6c0e2b0102f6fefdf37a39b5708609826 100644 (file)
@@ -101,7 +101,6 @@ static void cdce925_pll_find_rate(unsigned long rate,
 
        if (rate <= parent_rate) {
                /* Can always deliver parent_rate in bypass mode */
-               rate = parent_rate;
                *n = 0;
                *m = 0;
        } else {
index 737aa70e2cb3d0998f2c11f534895ad1034f749e..90e6078fb6e1b2d524b3673da1077db3a43bc2ea 100644 (file)
@@ -182,6 +182,46 @@ int __must_check devm_clk_bulk_get_all(struct device *dev,
 }
 EXPORT_SYMBOL_GPL(devm_clk_bulk_get_all);
 
+static void devm_clk_bulk_release_all_enable(struct device *dev, void *res)
+{
+       struct clk_bulk_devres *devres = res;
+
+       clk_bulk_disable_unprepare(devres->num_clks, devres->clks);
+       clk_bulk_put_all(devres->num_clks, devres->clks);
+}
+
+int __must_check devm_clk_bulk_get_all_enable(struct device *dev,
+                                             struct clk_bulk_data **clks)
+{
+       struct clk_bulk_devres *devres;
+       int ret;
+
+       devres = devres_alloc(devm_clk_bulk_release_all_enable,
+                             sizeof(*devres), GFP_KERNEL);
+       if (!devres)
+               return -ENOMEM;
+
+       ret = clk_bulk_get_all(dev, &devres->clks);
+       if (ret > 0) {
+               *clks = devres->clks;
+               devres->num_clks = ret;
+       } else {
+               devres_free(devres);
+               return ret;
+       }
+
+       ret = clk_bulk_prepare_enable(devres->num_clks, *clks);
+       if (!ret) {
+               devres_add(dev, devres);
+       } else {
+               clk_bulk_put_all(devres->num_clks, devres->clks);
+               devres_free(devres);
+       }
+
+       return ret;
+}
+EXPORT_SYMBOL_GPL(devm_clk_bulk_get_all_enable);
+
 static int devm_clk_match(struct device *dev, void *res, void *data)
 {
        struct clk **c = res;
index b3e66202b942422293711ded295e2124ecf00c27..fe0500a1af3ea807839b444ecc59b8a6d324309d 100644 (file)
@@ -57,10 +57,22 @@ static int clk_factor_set_rate(struct clk_hw *hw, unsigned long rate,
        return 0;
 }
 
+static unsigned long clk_factor_recalc_accuracy(struct clk_hw *hw,
+                                               unsigned long parent_accuracy)
+{
+       struct clk_fixed_factor *fix = to_clk_fixed_factor(hw);
+
+       if (fix->flags & CLK_FIXED_FACTOR_FIXED_ACCURACY)
+               return fix->acc;
+
+       return parent_accuracy;
+}
+
 const struct clk_ops clk_fixed_factor_ops = {
        .round_rate = clk_factor_round_rate,
        .set_rate = clk_factor_set_rate,
        .recalc_rate = clk_factor_recalc_rate,
+       .recalc_accuracy = clk_factor_recalc_accuracy,
 };
 EXPORT_SYMBOL_GPL(clk_fixed_factor_ops);
 
@@ -79,13 +91,12 @@ static void devm_clk_hw_register_fixed_factor_release(struct device *dev, void *
 static struct clk_hw *
 __clk_hw_register_fixed_factor(struct device *dev, struct device_node *np,
                const char *name, const char *parent_name,
-               const struct clk_hw *parent_hw, int index,
+               const struct clk_hw *parent_hw, const struct clk_parent_data *pdata,
                unsigned long flags, unsigned int mult, unsigned int div,
-               bool devm)
+               unsigned long acc, unsigned int fixflags, bool devm)
 {
        struct clk_fixed_factor *fix;
        struct clk_init_data init = { };
-       struct clk_parent_data pdata = { .index = index };
        struct clk_hw *hw;
        int ret;
 
@@ -105,6 +116,8 @@ __clk_hw_register_fixed_factor(struct device *dev, struct device_node *np,
        fix->mult = mult;
        fix->div = div;
        fix->hw.init = &init;
+       fix->acc = acc;
+       fix->flags = fixflags;
 
        init.name = name;
        init.ops = &clk_fixed_factor_ops;
@@ -114,7 +127,7 @@ __clk_hw_register_fixed_factor(struct device *dev, struct device_node *np,
        else if (parent_hw)
                init.parent_hws = &parent_hw;
        else
-               init.parent_data = &pdata;
+               init.parent_data = pdata;
        init.num_parents = 1;
 
        hw = &fix->hw;
@@ -151,8 +164,10 @@ struct clk_hw *devm_clk_hw_register_fixed_factor_index(struct device *dev,
                const char *name, unsigned int index, unsigned long flags,
                unsigned int mult, unsigned int div)
 {
-       return __clk_hw_register_fixed_factor(dev, NULL, name, NULL, NULL, index,
-                                             flags, mult, div, true);
+       const struct clk_parent_data pdata = { .index = index };
+
+       return __clk_hw_register_fixed_factor(dev, NULL, name, NULL, NULL, &pdata,
+                                             flags, mult, div, 0, 0, true);
 }
 EXPORT_SYMBOL_GPL(devm_clk_hw_register_fixed_factor_index);
 
@@ -173,8 +188,10 @@ struct clk_hw *devm_clk_hw_register_fixed_factor_parent_hw(struct device *dev,
                const char *name, const struct clk_hw *parent_hw,
                unsigned long flags, unsigned int mult, unsigned int div)
 {
+       const struct clk_parent_data pdata = { .index = -1 };
+
        return __clk_hw_register_fixed_factor(dev, NULL, name, NULL, parent_hw,
-                                             -1, flags, mult, div, true);
+                                             &pdata, flags, mult, div, 0, 0, true);
 }
 EXPORT_SYMBOL_GPL(devm_clk_hw_register_fixed_factor_parent_hw);
 
@@ -182,9 +199,10 @@ struct clk_hw *clk_hw_register_fixed_factor_parent_hw(struct device *dev,
                const char *name, const struct clk_hw *parent_hw,
                unsigned long flags, unsigned int mult, unsigned int div)
 {
-       return __clk_hw_register_fixed_factor(dev, NULL, name, NULL,
-                                             parent_hw, -1, flags, mult, div,
-                                             false);
+       const struct clk_parent_data pdata = { .index = -1 };
+
+       return __clk_hw_register_fixed_factor(dev, NULL, name, NULL, parent_hw,
+                                             &pdata, flags, mult, div, 0, 0, false);
 }
 EXPORT_SYMBOL_GPL(clk_hw_register_fixed_factor_parent_hw);
 
@@ -192,11 +210,37 @@ struct clk_hw *clk_hw_register_fixed_factor(struct device *dev,
                const char *name, const char *parent_name, unsigned long flags,
                unsigned int mult, unsigned int div)
 {
-       return __clk_hw_register_fixed_factor(dev, NULL, name, parent_name, NULL, -1,
-                                             flags, mult, div, false);
+       const struct clk_parent_data pdata = { .index = -1 };
+
+       return __clk_hw_register_fixed_factor(dev, NULL, name, parent_name, NULL,
+                                             &pdata, flags, mult, div, 0, 0, false);
 }
 EXPORT_SYMBOL_GPL(clk_hw_register_fixed_factor);
 
+struct clk_hw *clk_hw_register_fixed_factor_fwname(struct device *dev,
+               struct device_node *np, const char *name, const char *fw_name,
+               unsigned long flags, unsigned int mult, unsigned int div)
+{
+       const struct clk_parent_data pdata = { .index = -1, .fw_name = fw_name };
+
+       return __clk_hw_register_fixed_factor(dev, np, name, NULL, NULL,
+                       &pdata, flags, mult, div, 0, 0, false);
+}
+EXPORT_SYMBOL_GPL(clk_hw_register_fixed_factor_fwname);
+
+struct clk_hw *clk_hw_register_fixed_factor_with_accuracy_fwname(struct device *dev,
+               struct device_node *np, const char *name, const char *fw_name,
+               unsigned long flags, unsigned int mult, unsigned int div,
+               unsigned long acc)
+{
+       const struct clk_parent_data pdata = { .index = -1, .fw_name = fw_name };
+
+       return __clk_hw_register_fixed_factor(dev, np, name, NULL, NULL,
+                       &pdata, flags, mult, div, acc,
+                       CLK_FIXED_FACTOR_FIXED_ACCURACY, false);
+}
+EXPORT_SYMBOL_GPL(clk_hw_register_fixed_factor_with_accuracy_fwname);
+
 struct clk *clk_register_fixed_factor(struct device *dev, const char *name,
                const char *parent_name, unsigned long flags,
                unsigned int mult, unsigned int div)
@@ -239,16 +283,43 @@ struct clk_hw *devm_clk_hw_register_fixed_factor(struct device *dev,
                const char *name, const char *parent_name, unsigned long flags,
                unsigned int mult, unsigned int div)
 {
-       return __clk_hw_register_fixed_factor(dev, NULL, name, parent_name, NULL, -1,
-                       flags, mult, div, true);
+       const struct clk_parent_data pdata = { .index = -1 };
+
+       return __clk_hw_register_fixed_factor(dev, NULL, name, parent_name, NULL,
+                       &pdata, flags, mult, div, 0, 0, true);
 }
 EXPORT_SYMBOL_GPL(devm_clk_hw_register_fixed_factor);
 
+struct clk_hw *devm_clk_hw_register_fixed_factor_fwname(struct device *dev,
+               struct device_node *np, const char *name, const char *fw_name,
+               unsigned long flags, unsigned int mult, unsigned int div)
+{
+       const struct clk_parent_data pdata = { .index = -1, .fw_name = fw_name };
+
+       return __clk_hw_register_fixed_factor(dev, np, name, NULL, NULL,
+                       &pdata, flags, mult, div, 0, 0, true);
+}
+EXPORT_SYMBOL_GPL(devm_clk_hw_register_fixed_factor_fwname);
+
+struct clk_hw *devm_clk_hw_register_fixed_factor_with_accuracy_fwname(struct device *dev,
+               struct device_node *np, const char *name, const char *fw_name,
+               unsigned long flags, unsigned int mult, unsigned int div,
+               unsigned long acc)
+{
+       const struct clk_parent_data pdata = { .index = -1, .fw_name = fw_name };
+
+       return __clk_hw_register_fixed_factor(dev, np, name, NULL, NULL,
+                       &pdata, flags, mult, div, acc,
+                       CLK_FIXED_FACTOR_FIXED_ACCURACY, true);
+}
+EXPORT_SYMBOL_GPL(devm_clk_hw_register_fixed_factor_with_accuracy_fwname);
+
 #ifdef CONFIG_OF
 static struct clk_hw *_of_fixed_factor_clk_setup(struct device_node *node)
 {
        struct clk_hw *hw;
        const char *clk_name = node->name;
+       const struct clk_parent_data pdata = { .index = 0 };
        u32 div, mult;
        int ret;
 
@@ -266,8 +337,8 @@ static struct clk_hw *_of_fixed_factor_clk_setup(struct device_node *node)
 
        of_property_read_string(node, "clock-output-names", &clk_name);
 
-       hw = __clk_hw_register_fixed_factor(NULL, node, clk_name, NULL, NULL, 0,
-                                           0, mult, div, false);
+       hw = __clk_hw_register_fixed_factor(NULL, node, clk_name, NULL, NULL,
+                                           &pdata, 0, mult, div, 0, 0, false);
        if (IS_ERR(hw)) {
                /*
                 * Clear OF_POPULATED flag so that clock registration can be
index 5067e067e90666d75114afc144e7ca1462ccc8e6..da057172cc90f145743a4f7dfd71870bd63b564b 100644 (file)
@@ -140,8 +140,8 @@ void clk_fractional_divider_general_approximation(struct clk_hw *hw,
        }
 
        if (fd->flags & CLK_FRAC_DIVIDER_ZERO_BASED) {
-               max_m = 1 << fd->mwidth;
-               max_n = 1 << fd->nwidth;
+               max_m = BIT(fd->mwidth);
+               max_n = BIT(fd->nwidth);
        } else {
                max_m = GENMASK(fd->mwidth - 1, 0);
                max_n = GENMASK(fd->nwidth - 1, 0);
@@ -182,8 +182,8 @@ static int clk_fd_set_rate(struct clk_hw *hw, unsigned long rate,
        u32 val;
 
        if (fd->flags & CLK_FRAC_DIVIDER_ZERO_BASED) {
-               max_m = 1 << fd->mwidth;
-               max_n = 1 << fd->nwidth;
+               max_m = BIT(fd->mwidth);
+               max_n = BIT(fd->nwidth);
        } else {
                max_m = GENMASK(fd->mwidth - 1, 0);
                max_n = GENMASK(fd->nwidth - 1, 0);
@@ -195,14 +195,14 @@ static int clk_fd_set_rate(struct clk_hw *hw, unsigned long rate,
                n--;
        }
 
+       mmask = GENMASK(fd->mwidth - 1, 0) << fd->mshift;
+       nmask = GENMASK(fd->nwidth - 1, 0) << fd->nshift;
+
        if (fd->lock)
                spin_lock_irqsave(fd->lock, flags);
        else
                __acquire(fd->lock);
 
-       mmask = GENMASK(fd->mwidth - 1, 0) << fd->mshift;
-       nmask = GENMASK(fd->nwidth - 1, 0) << fd->nshift;
-
        val = clk_fd_readl(fd);
        val &= ~(mmask | nmask);
        val |= (m << fd->mshift) | (n << fd->nshift);
index 2253c154a824834f8e58e9074de6e31a4f548c83..25371c91a58fe7cc45a0ae681221d25f801cafb7 100644 (file)
@@ -418,6 +418,9 @@ static struct clk_core *clk_core_get(struct clk_core *core, u8 p_index)
        if (IS_ERR(hw))
                return ERR_CAST(hw);
 
+       if (!hw)
+               return NULL;
+
        return hw->core;
 }
 
@@ -939,6 +942,25 @@ int clk_rate_exclusive_get(struct clk *clk)
 }
 EXPORT_SYMBOL_GPL(clk_rate_exclusive_get);
 
+static void devm_clk_rate_exclusive_put(void *data)
+{
+       struct clk *clk = data;
+
+       clk_rate_exclusive_put(clk);
+}
+
+int devm_clk_rate_exclusive_get(struct device *dev, struct clk *clk)
+{
+       int ret;
+
+       ret = clk_rate_exclusive_get(clk);
+       if (ret)
+               return ret;
+
+       return devm_add_action_or_reset(dev, devm_clk_rate_exclusive_put, clk);
+}
+EXPORT_SYMBOL_GPL(devm_clk_rate_exclusive_get);
+
 static void clk_core_unprepare(struct clk_core *core)
 {
        lockdep_assert_held(&prepare_lock);
index ee37d0be6877db7070f51ad264bdc1a9da02d36a..9cd80522ca2d772c9b9cd7324405c72102318af9 100644 (file)
@@ -144,7 +144,7 @@ void clkdev_add_table(struct clk_lookup *cl, size_t num)
        mutex_unlock(&clocks_mutex);
 }
 
-#define MAX_DEV_ID     20
+#define MAX_DEV_ID     24
 #define MAX_CON_ID     16
 
 struct clk_lookup_alloc {
index b871872d9960db03e1493bbc64d5b1499f0f6367..141b727ff60d64eff34f2711a2ea9aa72a5bf75d 100644 (file)
@@ -130,7 +130,7 @@ static void hi3519_clk_unregister(struct platform_device *pdev)
        of_clk_del_provider(pdev->dev.of_node);
 
        hisi_clk_unregister_gate(hi3519_gate_clks,
-                               ARRAY_SIZE(hi3519_mux_clks),
+                               ARRAY_SIZE(hi3519_gate_clks),
                                crg->clk_data);
        hisi_clk_unregister_mux(hi3519_mux_clks,
                                ARRAY_SIZE(hi3519_mux_clks),
index ff4ca0edce06a37cafb7e215b99c0a3b62e371d6..c79a94f6d9d24c28217586df4ee2c2e19201ed27 100644 (file)
@@ -461,8 +461,7 @@ static void hisi_clk_register_pll(struct hi3559av100_pll_clock *clks,
        struct clk_init_data init;
        int i;
 
-       p_clk = devm_kzalloc(dev, sizeof(*p_clk) * nums, GFP_KERNEL);
-
+       p_clk = devm_kcalloc(dev, nums, sizeof(*p_clk), GFP_KERNEL);
        if (!p_clk)
                return;
 
@@ -491,7 +490,6 @@ static void hisi_clk_register_pll(struct hi3559av100_pll_clock *clks,
 
                clk = clk_register(NULL, &p_clk->hw);
                if (IS_ERR(clk)) {
-                       devm_kfree(dev, p_clk);
                        dev_err(dev, "%s: failed to register clock %s\n",
                               __func__, clks[i].name);
                        continue;
index 27a08c50ac1d84dc31d8a1684f14a6e5401c951f..8cc07d056a83849fec36eb20504e188fa992aee3 100644 (file)
@@ -212,15 +212,15 @@ struct clk_hw *__imx8m_clk_hw_composite(const char *name,
 {
        struct clk_hw *hw = ERR_PTR(-ENOMEM), *mux_hw;
        struct clk_hw *div_hw, *gate_hw = NULL;
-       struct clk_divider *div = NULL;
+       struct clk_divider *div;
        struct clk_gate *gate = NULL;
-       struct clk_mux *mux = NULL;
+       struct clk_mux *mux;
        const struct clk_ops *divider_ops;
        const struct clk_ops *mux_ops;
 
        mux = kzalloc(sizeof(*mux), GFP_KERNEL);
        if (!mux)
-               goto fail;
+               return ERR_CAST(hw);
 
        mux_hw = &mux->hw;
        mux->reg = reg;
@@ -230,7 +230,7 @@ struct clk_hw *__imx8m_clk_hw_composite(const char *name,
 
        div = kzalloc(sizeof(*div), GFP_KERNEL);
        if (!div)
-               goto fail;
+               goto free_mux;
 
        div_hw = &div->hw;
        div->reg = reg;
@@ -260,7 +260,7 @@ struct clk_hw *__imx8m_clk_hw_composite(const char *name,
        if (!mcore_booted) {
                gate = kzalloc(sizeof(*gate), GFP_KERNEL);
                if (!gate)
-                       goto fail;
+                       goto free_div;
 
                gate_hw = &gate->hw;
                gate->reg = reg;
@@ -272,13 +272,15 @@ struct clk_hw *__imx8m_clk_hw_composite(const char *name,
                        mux_hw, mux_ops, div_hw,
                        divider_ops, gate_hw, &clk_gate_ops, flags);
        if (IS_ERR(hw))
-               goto fail;
+               goto free_gate;
 
        return hw;
 
-fail:
+free_gate:
        kfree(gate);
+free_div:
        kfree(div);
+free_mux:
        kfree(mux);
        return ERR_CAST(hw);
 }
index f68877eef87363b53048133a7908e6bdeaa2f0e4..1bdb480cc96c6bd7c4dc2a0a8c46e2024b36f0cd 100644 (file)
@@ -394,15 +394,13 @@ err_clk_register:
        return ret;
 }
 
-static int imx8_acm_clk_remove(struct platform_device *pdev)
+static void imx8_acm_clk_remove(struct platform_device *pdev)
 {
        struct imx8_acm_priv *priv = dev_get_drvdata(&pdev->dev);
 
        pm_runtime_disable(&pdev->dev);
 
        clk_imx_acm_detach_pm_domains(&pdev->dev, &priv->dev_pm);
-
-       return 0;
 }
 
 static const struct imx8_acm_soc_data imx8qm_acm_data = {
@@ -470,7 +468,7 @@ static struct platform_driver imx8_acm_clk_driver = {
                .pm = &imx8_acm_pm_ops,
        },
        .probe = imx8_acm_clk_probe,
-       .remove = imx8_acm_clk_remove,
+       .remove_new = imx8_acm_clk_remove,
 };
 module_platform_driver(imx8_acm_clk_driver);
 
index e4300df88f1acc04994a944596cd803c1d0c001b..55ed211a5e0b18f015ae66c9d75f2918a9e716aa 100644 (file)
 
 #define CLKEN0                 0x000
 #define CLKEN1                 0x004
-#define SAI_MCLK_SEL(n)                (0x300 + 4 * (n))       /* n in 0..5 */
+#define SAI1_MCLK_SEL          0x300
+#define SAI2_MCLK_SEL          0x304
+#define SAI3_MCLK_SEL          0x308
+#define SAI5_MCLK_SEL          0x30C
+#define SAI6_MCLK_SEL          0x310
+#define SAI7_MCLK_SEL          0x314
 #define PDM_SEL                        0x318
 #define SAI_PLL_GNRL_CTL       0x400
 
@@ -95,13 +100,13 @@ static const struct clk_parent_data clk_imx8mp_audiomix_pll_bypass_sels[] = {
                IMX8MP_CLK_AUDIOMIX_SAI##n##_MCLK1_SEL, {},             \
                clk_imx8mp_audiomix_sai##n##_mclk1_parents,             \
                ARRAY_SIZE(clk_imx8mp_audiomix_sai##n##_mclk1_parents), \
-               SAI_MCLK_SEL(n), 1, 0                                   \
+               SAI##n##_MCLK_SEL, 1, 0                                 \
        }, {                                                            \
                "sai"__stringify(n)"_mclk2_sel",                        \
                IMX8MP_CLK_AUDIOMIX_SAI##n##_MCLK2_SEL, {},             \
                clk_imx8mp_audiomix_sai_mclk2_parents,                  \
                ARRAY_SIZE(clk_imx8mp_audiomix_sai_mclk2_parents),      \
-               SAI_MCLK_SEL(n), 4, 1                                   \
+               SAI##n##_MCLK_SEL, 4, 1                                 \
        }, {                                                            \
                "sai"__stringify(n)"_ipg_cg",                           \
                IMX8MP_CLK_AUDIOMIX_SAI##n##_IPG,                       \
index e48a904c0013302fbe424c310266d4feea4b9940..b1dd0c08e091b6b61f6972eb630adacaa4f288c2 100644 (file)
@@ -712,17 +712,13 @@ struct clk_hw *imx_clk_scu_alloc_dev(const char *name,
        }
 
        ret = platform_device_add_data(pdev, &clk, sizeof(clk));
-       if (ret) {
-               platform_device_put(pdev);
-               return ERR_PTR(ret);
-       }
+       if (ret)
+               goto put_device;
 
        ret = driver_set_override(&pdev->dev, &pdev->driver_override,
                                  "imx-scu-clk", strlen("imx-scu-clk"));
-       if (ret) {
-               platform_device_put(pdev);
-               return ERR_PTR(ret);
-       }
+       if (ret)
+               goto put_device;
 
        ret = imx_clk_scu_attach_pd(&pdev->dev, rsrc_id);
        if (ret)
@@ -730,13 +726,15 @@ struct clk_hw *imx_clk_scu_alloc_dev(const char *name,
                        name, ret);
 
        ret = platform_device_add(pdev);
-       if (ret) {
-               platform_device_put(pdev);
-               return ERR_PTR(ret);
-       }
+       if (ret)
+               goto put_device;
 
        /* For API backwards compatiblilty, simply return NULL for success */
        return NULL;
+
+put_device:
+       platform_device_put(pdev);
+       return ERR_PTR(ret);
 }
 
 void imx_clk_scu_unregister(void)
index 35fe197dd303c6830f5639fdcccdbf131493480a..5cefc30a843ee5b2fa2ba6549b74b6fad85457f8 100644 (file)
@@ -272,7 +272,7 @@ static const struct clk_ops sci_clk_ops = {
 };
 
 /**
- * _sci_clk_get - Gets a handle for an SCI clock
+ * _sci_clk_build - Gets a handle for an SCI clock
  * @provider: Handle to SCI clock provider
  * @sci_clk: Handle to the SCI clock to populate
  *
@@ -516,6 +516,7 @@ static int ti_sci_scan_clocks_from_dt(struct sci_clk_provider *provider)
        struct sci_clk *sci_clk, *prev;
        int num_clks = 0;
        int num_parents;
+       bool state;
        int clk_id;
        const char * const clk_names[] = {
                "clocks", "assigned-clocks", "assigned-clock-parents", NULL
@@ -586,6 +587,15 @@ static int ti_sci_scan_clocks_from_dt(struct sci_clk_provider *provider)
                                clk_id = args.args[1] + 1;
 
                                while (num_parents--) {
+                                       /* Check if this clock id is valid */
+                                       ret = provider->ops->is_auto(provider->sci,
+                                               sci_clk->dev_id, clk_id, &state);
+
+                                       if (ret) {
+                                               clk_id++;
+                                               continue;
+                                       }
+
                                        sci_clk = devm_kzalloc(dev,
                                                               sizeof(*sci_clk),
                                                               GFP_KERNEL);
index 9cffd278e9a43e6a0d7d5761f91adbc71f3ab479..1b8f859b6b6ccd2cabea5447b1ee382f76e4de4c 100644 (file)
@@ -127,7 +127,6 @@ static void clk_mt7622_apmixed_remove(struct platform_device *pdev)
        of_clk_del_provider(node);
        mtk_clk_unregister_gates(apmixed_clks, ARRAY_SIZE(apmixed_clks), clk_data);
        mtk_clk_unregister_plls(plls, ARRAY_SIZE(plls), clk_data);
-       mtk_free_clk_data(clk_data);
 }
 
 static const struct of_device_id of_match_clk_mt7622_apmixed[] = {
index 682f4ca9e89adafea2fe57d66c90a21f5c678395..493aa11d3a175f6954411569f4b2e6a12f76ed5b 100644 (file)
@@ -357,8 +357,9 @@ static const struct mtk_mux top_muxes[] = {
        MUX_GATE_CLR_SET_UPD(CLK_TOP_SGM_325M_SEL, "sgm_325m_sel",
                             sgm_325m_parents, 0x050, 0x054, 0x058, 8, 1, 15,
                             0x1C0, 21),
-       MUX_GATE_CLR_SET_UPD(CLK_TOP_SGM_REG_SEL, "sgm_reg_sel", sgm_reg_parents,
-                            0x050, 0x054, 0x058, 16, 1, 23, 0x1C0, 22),
+       MUX_GATE_CLR_SET_UPD_FLAGS(CLK_TOP_SGM_REG_SEL, "sgm_reg_sel", sgm_reg_parents,
+                                  0x050, 0x054, 0x058, 16, 1, 23, 0x1C0, 22,
+                                  CLK_IS_CRITICAL | CLK_SET_RATE_PARENT),
        MUX_GATE_CLR_SET_UPD(CLK_TOP_EIP97B_SEL, "eip97b_sel", eip97b_parents,
                             0x050, 0x054, 0x058, 24, 3, 31, 0x1C0, 23),
        /* CLK_CFG_6 */
index 8011ef278bea3ecc7e2afdb2baf155e1033315e9..449041f8abbc9a1cd97fa7d3807634653e677849 100644 (file)
 #include "clk-gate.h"
 #include "clk-mux.h"
 #include <dt-bindings/clock/mediatek,mt7988-clk.h>
+#include <dt-bindings/reset/mediatek,mt7988-resets.h>
+
+#define        MT7988_INFRA_RST0_SET_OFFSET    0x70
+#define        MT7988_INFRA_RST1_SET_OFFSET    0x80
 
 static DEFINE_SPINLOCK(mt7988_clk_lock);
 
@@ -249,12 +253,31 @@ static const struct mtk_gate infra_clks[] = {
        GATE_INFRA3(CLK_INFRA_133M_PCIE_CK_P3, "infra_133m_pcie_ck_p3", "sysaxi_sel", 31),
 };
 
+static u16 infra_rst_ofs[] = {
+       MT7988_INFRA_RST0_SET_OFFSET,
+       MT7988_INFRA_RST1_SET_OFFSET,
+};
+
+static u16 infra_idx_map[] = {
+       [MT7988_INFRA_RST0_PEXTP_MAC_SWRST] = 0 * RST_NR_PER_BANK + 6,
+       [MT7988_INFRA_RST1_THERM_CTRL_SWRST] = 1 * RST_NR_PER_BANK + 9,
+};
+
+static struct mtk_clk_rst_desc infra_rst_desc = {
+       .version = MTK_RST_SET_CLR,
+       .rst_bank_ofs = infra_rst_ofs,
+       .rst_bank_nr = ARRAY_SIZE(infra_rst_ofs),
+       .rst_idx_map = infra_idx_map,
+       .rst_idx_map_nr = ARRAY_SIZE(infra_idx_map),
+};
+
 static const struct mtk_clk_desc infra_desc = {
        .clks = infra_clks,
        .num_clks = ARRAY_SIZE(infra_clks),
        .mux_clks = infra_muxes,
        .num_mux_clks = ARRAY_SIZE(infra_muxes),
        .clk_lock = &mt7988_clk_lock,
+       .rst_desc = &infra_rst_desc,
 };
 
 static const struct of_device_id of_match_clk_mt7988_infracfg[] = {
index d1239b4b3db74b4857cc3fe2fa981acb74b914d1..41bb2d2e2ea7408c0dfc290b25b49a2196d482be 100644 (file)
@@ -59,7 +59,7 @@ static int clk_mt8135_apmixed_probe(struct platform_device *pdev)
 
        ret = mtk_clk_register_plls(node, plls, ARRAY_SIZE(plls), clk_data);
        if (ret)
-               return ret;
+               goto free_clk_data;
 
        ret = of_clk_add_hw_provider(node, of_clk_hw_onecell_get, clk_data);
        if (ret)
@@ -69,6 +69,8 @@ static int clk_mt8135_apmixed_probe(struct platform_device *pdev)
 
 unregister_plls:
        mtk_clk_unregister_plls(plls, ARRAY_SIZE(plls), clk_data);
+free_clk_data:
+       mtk_free_clk_data(clk_data);
 
        return ret;
 }
index 1bbb21ab178698c683c8949375881b76f271c6d6..6cab483b8e1ed36f7cb78166049d9aa59c28d490 100644 (file)
@@ -152,8 +152,8 @@ static int clk_mt8173_apmixed_probe(struct platform_device *pdev)
 
        clk_data = mtk_alloc_clk_data(CLK_APMIXED_NR_CLK);
        if (IS_ERR_OR_NULL(clk_data)) {
-               iounmap(base);
-               return -ENOMEM;
+               r = -ENOMEM;
+               goto unmap_io;
        }
 
        fhctl_parse_dt(fhctl_node, pllfhs, ARRAY_SIZE(pllfhs));
@@ -188,6 +188,7 @@ unregister_plls:
                                  ARRAY_SIZE(pllfhs), clk_data);
 free_clk_data:
        mtk_free_clk_data(clk_data);
+unmap_io:
        iounmap(base);
        return r;
 }
index 6e23461a04559c725d41a2c10bf9dc911cb11782..934d5a15acfc5898e2ba8c79510d7350465b7de9 100644 (file)
@@ -790,7 +790,7 @@ static const struct mtk_gate infra_clks[] = {
        /* infra_sspm_26m_self is main clock in co-processor, should not be closed in Linux. */
        GATE_INFRA3_FLAGS(CLK_INFRA_SSPM_26M_SELF, "infra_sspm_26m_self", "f_f26m_ck", 3, CLK_IS_CRITICAL),
        /* infra_sspm_32k_self is main clock in co-processor, should not be closed in Linux. */
-       GATE_INFRA3_FLAGS(CLK_INFRA_SSPM_32K_SELF, "infra_sspm_32k_self", "f_f26m_ck", 4, CLK_IS_CRITICAL),
+       GATE_INFRA3_FLAGS(CLK_INFRA_SSPM_32K_SELF, "infra_sspm_32k_self", "clk32k", 4, CLK_IS_CRITICAL),
        GATE_INFRA3(CLK_INFRA_UFS_AXI, "infra_ufs_axi", "axi_sel", 5),
        GATE_INFRA3(CLK_INFRA_I2C6, "infra_i2c6", "i2c_sel", 6),
        GATE_INFRA3(CLK_INFRA_AP_MSDC0, "infra_ap_msdc0", "msdc50_hclk_sel", 7),
index c12f81dfa6745319d0155a7b386e12ea50dd8cb0..5f60f2bcca592a6ce135d37568f2feb8aafabdd5 100644 (file)
@@ -2142,7 +2142,9 @@ static struct clk_regmap *const axg_clk_regmaps[] = {
        &axg_vclk_input,
        &axg_vclk2_input,
        &axg_vclk_div,
+       &axg_vclk_div1,
        &axg_vclk2_div,
+       &axg_vclk2_div1,
        &axg_vclk_div2_en,
        &axg_vclk_div4_en,
        &axg_vclk_div6_en,
index c8ffa755b58dfad51a3d5253ffefc723dad4ee5c..22eab91a67129b5966867594a494a1037ae7daf0 100644 (file)
@@ -15,7 +15,8 @@
 
 /* address offset of control registers */
 #define REG_MSSPLL_REF_CR      0x08u
-#define REG_MSSPLL_POSTDIV_CR  0x10u
+#define REG_MSSPLL_POSTDIV01_CR        0x10u
+#define REG_MSSPLL_POSTDIV23_CR        0x14u
 #define REG_MSSPLL_SSCG_2_CR   0x2Cu
 #define REG_CLOCK_CONFIG_CR    0x08u
 #define REG_RTC_CLOCK_CR       0x0Cu
 #define MSSPLL_FBDIV_WIDTH     0x0Cu
 #define MSSPLL_REFDIV_SHIFT    0x08u
 #define MSSPLL_REFDIV_WIDTH    0x06u
-#define MSSPLL_POSTDIV_SHIFT   0x08u
+#define MSSPLL_POSTDIV02_SHIFT 0x08u
+#define MSSPLL_POSTDIV13_SHIFT 0x18u
 #define MSSPLL_POSTDIV_WIDTH   0x07u
 #define MSSPLL_FIXED_DIV       4u
 
+/*
+ * This clock ID is defined here, rather than the binding headers, as it is an
+ * internal clock only, and therefore has no consumers in other peripheral
+ * blocks.
+ */
+#define CLK_MSSPLL_INTERNAL    38u
+
 struct mpfs_clock_data {
        struct device *dev;
        void __iomem *base;
@@ -39,17 +48,27 @@ struct mpfs_clock_data {
 
 struct mpfs_msspll_hw_clock {
        void __iomem *base;
+       struct clk_hw hw;
+       struct clk_init_data init;
        unsigned int id;
        u32 reg_offset;
        u32 shift;
        u32 width;
        u32 flags;
-       struct clk_hw hw;
-       struct clk_init_data init;
 };
 
 #define to_mpfs_msspll_clk(_hw) container_of(_hw, struct mpfs_msspll_hw_clock, hw)
 
+struct mpfs_msspll_out_hw_clock {
+       void __iomem *base;
+       struct clk_divider output;
+       struct clk_init_data init;
+       unsigned int id;
+       u32 reg_offset;
+};
+
+#define to_mpfs_msspll_out_clk(_hw) container_of(_hw, struct mpfs_msspll_out_hw_clock, hw)
+
 struct mpfs_cfg_hw_clock {
        struct clk_divider cfg;
        struct clk_init_data init;
@@ -93,93 +112,40 @@ static const struct clk_div_table mpfs_div_rtcref_table[] = {
        { 0, 0 }
 };
 
-static unsigned long mpfs_clk_msspll_recalc_rate(struct clk_hw *hw, unsigned long prate)
-{
-       struct mpfs_msspll_hw_clock *msspll_hw = to_mpfs_msspll_clk(hw);
-       void __iomem *mult_addr = msspll_hw->base + msspll_hw->reg_offset;
-       void __iomem *ref_div_addr = msspll_hw->base + REG_MSSPLL_REF_CR;
-       void __iomem *postdiv_addr = msspll_hw->base + REG_MSSPLL_POSTDIV_CR;
-       u32 mult, ref_div, postdiv;
-
-       mult = readl_relaxed(mult_addr) >> MSSPLL_FBDIV_SHIFT;
-       mult &= clk_div_mask(MSSPLL_FBDIV_WIDTH);
-       ref_div = readl_relaxed(ref_div_addr) >> MSSPLL_REFDIV_SHIFT;
-       ref_div &= clk_div_mask(MSSPLL_REFDIV_WIDTH);
-       postdiv = readl_relaxed(postdiv_addr) >> MSSPLL_POSTDIV_SHIFT;
-       postdiv &= clk_div_mask(MSSPLL_POSTDIV_WIDTH);
-
-       return prate * mult / (ref_div * MSSPLL_FIXED_DIV * postdiv);
-}
+/*
+ * MSS PLL internal clock
+ */
 
-static long mpfs_clk_msspll_round_rate(struct clk_hw *hw, unsigned long rate, unsigned long *prate)
+static unsigned long mpfs_clk_msspll_recalc_rate(struct clk_hw *hw, unsigned long prate)
 {
        struct mpfs_msspll_hw_clock *msspll_hw = to_mpfs_msspll_clk(hw);
        void __iomem *mult_addr = msspll_hw->base + msspll_hw->reg_offset;
        void __iomem *ref_div_addr = msspll_hw->base + REG_MSSPLL_REF_CR;
        u32 mult, ref_div;
-       unsigned long rate_before_ctrl;
-
-       mult = readl_relaxed(mult_addr) >> MSSPLL_FBDIV_SHIFT;
-       mult &= clk_div_mask(MSSPLL_FBDIV_WIDTH);
-       ref_div = readl_relaxed(ref_div_addr) >> MSSPLL_REFDIV_SHIFT;
-       ref_div &= clk_div_mask(MSSPLL_REFDIV_WIDTH);
-
-       rate_before_ctrl = rate * (ref_div * MSSPLL_FIXED_DIV) / mult;
-
-       return divider_round_rate(hw, rate_before_ctrl, prate, NULL, MSSPLL_POSTDIV_WIDTH,
-                                 msspll_hw->flags);
-}
-
-static int mpfs_clk_msspll_set_rate(struct clk_hw *hw, unsigned long rate, unsigned long prate)
-{
-       struct mpfs_msspll_hw_clock *msspll_hw = to_mpfs_msspll_clk(hw);
-       void __iomem *mult_addr = msspll_hw->base + msspll_hw->reg_offset;
-       void __iomem *ref_div_addr = msspll_hw->base + REG_MSSPLL_REF_CR;
-       void __iomem *postdiv_addr = msspll_hw->base + REG_MSSPLL_POSTDIV_CR;
-       u32 mult, ref_div, postdiv;
-       int divider_setting;
-       unsigned long rate_before_ctrl, flags;
 
        mult = readl_relaxed(mult_addr) >> MSSPLL_FBDIV_SHIFT;
        mult &= clk_div_mask(MSSPLL_FBDIV_WIDTH);
        ref_div = readl_relaxed(ref_div_addr) >> MSSPLL_REFDIV_SHIFT;
        ref_div &= clk_div_mask(MSSPLL_REFDIV_WIDTH);
 
-       rate_before_ctrl = rate * (ref_div * MSSPLL_FIXED_DIV) / mult;
-       divider_setting = divider_get_val(rate_before_ctrl, prate, NULL, MSSPLL_POSTDIV_WIDTH,
-                                         msspll_hw->flags);
-
-       if (divider_setting < 0)
-               return divider_setting;
-
-       spin_lock_irqsave(&mpfs_clk_lock, flags);
-
-       postdiv = readl_relaxed(postdiv_addr);
-       postdiv &= ~(clk_div_mask(MSSPLL_POSTDIV_WIDTH) << MSSPLL_POSTDIV_SHIFT);
-       writel_relaxed(postdiv, postdiv_addr);
-
-       spin_unlock_irqrestore(&mpfs_clk_lock, flags);
-
-       return 0;
+       return prate * mult / (ref_div * MSSPLL_FIXED_DIV);
 }
 
 static const struct clk_ops mpfs_clk_msspll_ops = {
        .recalc_rate = mpfs_clk_msspll_recalc_rate,
-       .round_rate = mpfs_clk_msspll_round_rate,
-       .set_rate = mpfs_clk_msspll_set_rate,
 };
 
 #define CLK_PLL(_id, _name, _parent, _shift, _width, _flags, _offset) {                        \
        .id = _id,                                                                      \
+       .flags = _flags,                                                                \
        .shift = _shift,                                                                \
        .width = _width,                                                                \
        .reg_offset = _offset,                                                          \
-       .flags = _flags,                                                                \
        .hw.init = CLK_HW_INIT_PARENTS_DATA(_name, _parent, &mpfs_clk_msspll_ops, 0),   \
 }
 
 static struct mpfs_msspll_hw_clock mpfs_msspll_clks[] = {
-       CLK_PLL(CLK_MSSPLL, "clk_msspll", mpfs_ext_ref, MSSPLL_FBDIV_SHIFT,
+       CLK_PLL(CLK_MSSPLL_INTERNAL, "clk_msspll_internal", mpfs_ext_ref, MSSPLL_FBDIV_SHIFT,
                MSSPLL_FBDIV_WIDTH, 0, REG_MSSPLL_SSCG_2_CR),
 };
 
@@ -196,7 +162,7 @@ static int mpfs_clk_register_mssplls(struct device *dev, struct mpfs_msspll_hw_c
                ret = devm_clk_hw_register(dev, &msspll_hw->hw);
                if (ret)
                        return dev_err_probe(dev, ret, "failed to register msspll id: %d\n",
-                                            CLK_MSSPLL);
+                                            CLK_MSSPLL_INTERNAL);
 
                data->hw_data.hws[msspll_hw->id] = &msspll_hw->hw;
        }
@@ -204,6 +170,54 @@ static int mpfs_clk_register_mssplls(struct device *dev, struct mpfs_msspll_hw_c
        return 0;
 }
 
+/*
+ * MSS PLL output clocks
+ */
+
+#define CLK_PLL_OUT(_id, _name, _parent, _flags, _shift, _width, _offset) {    \
+       .id = _id,                                                              \
+       .output.shift = _shift,                                                 \
+       .output.width = _width,                                                 \
+       .output.table = NULL,                                                   \
+       .reg_offset = _offset,                                                  \
+       .output.flags = _flags,                                                 \
+       .output.hw.init = CLK_HW_INIT(_name, _parent, &clk_divider_ops, 0),     \
+       .output.lock = &mpfs_clk_lock,                                          \
+}
+
+static struct mpfs_msspll_out_hw_clock mpfs_msspll_out_clks[] = {
+       CLK_PLL_OUT(CLK_MSSPLL0, "clk_msspll", "clk_msspll_internal", CLK_DIVIDER_ONE_BASED,
+                   MSSPLL_POSTDIV02_SHIFT, MSSPLL_POSTDIV_WIDTH, REG_MSSPLL_POSTDIV01_CR),
+       CLK_PLL_OUT(CLK_MSSPLL1, "clk_msspll1", "clk_msspll_internal", CLK_DIVIDER_ONE_BASED,
+                   MSSPLL_POSTDIV13_SHIFT, MSSPLL_POSTDIV_WIDTH, REG_MSSPLL_POSTDIV01_CR),
+       CLK_PLL_OUT(CLK_MSSPLL2, "clk_msspll2", "clk_msspll_internal", CLK_DIVIDER_ONE_BASED,
+                   MSSPLL_POSTDIV02_SHIFT, MSSPLL_POSTDIV_WIDTH, REG_MSSPLL_POSTDIV23_CR),
+       CLK_PLL_OUT(CLK_MSSPLL3, "clk_msspll3", "clk_msspll_internal", CLK_DIVIDER_ONE_BASED,
+                   MSSPLL_POSTDIV13_SHIFT, MSSPLL_POSTDIV_WIDTH, REG_MSSPLL_POSTDIV23_CR),
+};
+
+static int mpfs_clk_register_msspll_outs(struct device *dev,
+                                        struct mpfs_msspll_out_hw_clock *msspll_out_hws,
+                                        unsigned int num_clks, struct mpfs_clock_data *data)
+{
+       unsigned int i;
+       int ret;
+
+       for (i = 0; i < num_clks; i++) {
+               struct mpfs_msspll_out_hw_clock *msspll_out_hw = &msspll_out_hws[i];
+
+               msspll_out_hw->output.reg = data->msspll_base + msspll_out_hw->reg_offset;
+               ret = devm_clk_hw_register(dev, &msspll_out_hw->output.hw);
+               if (ret)
+                       return dev_err_probe(dev, ret, "failed to register msspll out id: %d\n",
+                                            msspll_out_hw->id);
+
+               data->hw_data.hws[msspll_out_hw->id] = &msspll_out_hw->output.hw;
+       }
+
+       return 0;
+}
+
 /*
  * "CFG" clocks
  */
@@ -442,8 +456,8 @@ static int mpfs_clk_probe(struct platform_device *pdev)
        int ret;
 
        /* CLK_RESERVED is not part of clock arrays, so add 1 */
-       num_clks = ARRAY_SIZE(mpfs_msspll_clks) + ARRAY_SIZE(mpfs_cfg_clks)
-                  + ARRAY_SIZE(mpfs_periph_clks) + 1;
+       num_clks = ARRAY_SIZE(mpfs_msspll_clks) + ARRAY_SIZE(mpfs_msspll_out_clks)
+                  + ARRAY_SIZE(mpfs_cfg_clks)  + ARRAY_SIZE(mpfs_periph_clks) + 1;
 
        clk_data = devm_kzalloc(dev, struct_size(clk_data, hw_data.hws, num_clks), GFP_KERNEL);
        if (!clk_data)
@@ -466,6 +480,12 @@ static int mpfs_clk_probe(struct platform_device *pdev)
        if (ret)
                return ret;
 
+       ret = mpfs_clk_register_msspll_outs(dev, mpfs_msspll_out_clks,
+                                           ARRAY_SIZE(mpfs_msspll_out_clks),
+                                           clk_data);
+       if (ret)
+               return ret;
+
        ret = mpfs_clk_register_cfgs(dev, mpfs_cfg_clks, ARRAY_SIZE(mpfs_cfg_clks), clk_data);
        if (ret)
                return ret;
index 2a9da0939377ac1b350a5059138141a483eca7e4..8ab08e7b5b6c69086d4b161b44973bd8a430b7ee 100644 (file)
@@ -20,6 +20,24 @@ menuconfig COMMON_CLK_QCOM
 
 if COMMON_CLK_QCOM
 
+config CLK_X1E80100_CAMCC
+       tristate "X1E80100 Camera Clock Controller"
+       depends on ARM64 || COMPILE_TEST
+       select CLK_X1E80100_GCC
+       help
+         Support for the camera clock controller on X1E80100 devices.
+         Say Y if you want to support camera devices and camera functionality.
+
+config CLK_X1E80100_DISPCC
+       tristate "X1E80100 Display Clock Controller"
+       depends on ARM64 || COMPILE_TEST
+       select CLK_X1E80100_GCC
+       help
+         Support for the two display clock controllers on Qualcomm
+         Technologies, Inc. X1E80100 devices.
+         Say Y if you want to support display devices and functionality such as
+         splash screen.
+
 config CLK_X1E80100_GCC
        tristate "X1E80100 Global Clock Controller"
        depends on ARM64 || COMPILE_TEST
@@ -30,6 +48,23 @@ config CLK_X1E80100_GCC
          Say Y if you want to use peripheral devices such as UART, SPI, I2C,
          USB, UFS, SD/eMMC, PCIe, etc.
 
+config CLK_X1E80100_GPUCC
+       tristate "X1E80100 Graphics Clock Controller"
+       depends on ARM64 || COMPILE_TEST
+       select CLK_X1E80100_GCC
+       help
+         Support for the graphics clock controller on X1E80100 devices.
+         Say Y if you want to support graphics controller devices and
+         functionality such as 3D graphics.
+
+config CLK_X1E80100_TCSRCC
+       tristate "X1E80100 TCSR Clock Controller"
+       depends on ARM64 || COMPILE_TEST
+       select QCOM_GDSC
+       help
+         Support for the TCSR clock controller on X1E80100 devices.
+         Say Y if you want to use peripheral devices such as SD/UFS.
+
 config QCOM_A53PLL
        tristate "MSM8916 A53 PLL"
        help
@@ -600,16 +635,6 @@ config SC_LPASS_CORECC_7280
          Say Y if you want to use LPASS clocks and power domains of the LPASS
          core clock controller.
 
-config SC_MSS_7180
-       tristate "SC7180 Modem Clock Controller"
-       depends on ARM64 || COMPILE_TEST
-       select SC_GCC_7180
-       help
-         Support for the Modem Subsystem clock controller on Qualcomm
-         Technologies, Inc on SC7180 devices.
-         Say Y if you want to use the Modem branch clocks of the Modem
-         subsystem clock controller to reset the MSS subsystem.
-
 config SC_VIDEOCC_7180
        tristate "SC7180 Video Clock Controller"
        depends on ARM64 || COMPILE_TEST
index 582e06dc1d939e457c84492f5177412d64416b91..dec5b6db6860c30dda7e551b66891a59eac6793f 100644 (file)
@@ -21,7 +21,11 @@ clk-qcom-$(CONFIG_QCOM_GDSC) += gdsc.o
 obj-$(CONFIG_APQ_GCC_8084) += gcc-apq8084.o
 obj-$(CONFIG_APQ_MMCC_8084) += mmcc-apq8084.o
 obj-$(CONFIG_CLK_GFM_LPASS_SM8250) += lpass-gfm-sm8250.o
+obj-$(CONFIG_CLK_X1E80100_CAMCC) += camcc-x1e80100.o
+obj-$(CONFIG_CLK_X1E80100_DISPCC) += dispcc-x1e80100.o
 obj-$(CONFIG_CLK_X1E80100_GCC) += gcc-x1e80100.o
+obj-$(CONFIG_CLK_X1E80100_GPUCC) += gpucc-x1e80100.o
+obj-$(CONFIG_CLK_X1E80100_TCSRCC) += tcsrcc-x1e80100.o
 obj-$(CONFIG_IPQ_APSS_PLL) += apss-ipq-pll.o
 obj-$(CONFIG_IPQ_APSS_6018) += apss-ipq6018.o
 obj-$(CONFIG_IPQ_GCC_4019) += gcc-ipq4019.o
@@ -87,7 +91,6 @@ obj-$(CONFIG_SC_LPASSCC_7280) += lpasscc-sc7280.o
 obj-$(CONFIG_SC_LPASSCC_8280XP) += lpasscc-sc8280xp.o
 obj-$(CONFIG_SC_LPASS_CORECC_7180) += lpasscorecc-sc7180.o
 obj-$(CONFIG_SC_LPASS_CORECC_7280) += lpasscorecc-sc7280.o lpassaudiocc-sc7280.o
-obj-$(CONFIG_SC_MSS_7180) += mss-sc7180.o
 obj-$(CONFIG_SC_VIDEOCC_7180) += videocc-sc7180.o
 obj-$(CONFIG_SC_VIDEOCC_7280) += videocc-sc7280.o
 obj-$(CONFIG_SDM_CAMCC_845) += camcc-sdm845.o
index 0a9a6df3ddaceb7eebf26fecf88eabcc6947e4f1..a78808b22b030bc68b8686cb2555d8c5b375d6e4 100644 (file)
@@ -1703,17 +1703,7 @@ static struct platform_driver cam_cc_sc7180_driver = {
        },
 };
 
-static int __init cam_cc_sc7180_init(void)
-{
-       return platform_driver_register(&cam_cc_sc7180_driver);
-}
-subsys_initcall(cam_cc_sc7180_init);
-
-static void __exit cam_cc_sc7180_exit(void)
-{
-       platform_driver_unregister(&cam_cc_sc7180_driver);
-}
-module_exit(cam_cc_sc7180_exit);
+module_platform_driver(cam_cc_sc7180_driver);
 
 MODULE_DESCRIPTION("QTI CAM_CC SC7180 Driver");
 MODULE_LICENSE("GPL v2");
index 49f046ea857cbe095b6a8a18071e9d274d3c87df..d89ddb2298e32d17ed9b0064ebb9c383b2609281 100644 (file)
@@ -2468,17 +2468,7 @@ static struct platform_driver cam_cc_sc7280_driver = {
        },
 };
 
-static int __init cam_cc_sc7280_init(void)
-{
-       return platform_driver_register(&cam_cc_sc7280_driver);
-}
-subsys_initcall(cam_cc_sc7280_init);
-
-static void __exit cam_cc_sc7280_exit(void)
-{
-       platform_driver_unregister(&cam_cc_sc7280_driver);
-}
-module_exit(cam_cc_sc7280_exit);
+module_platform_driver(cam_cc_sc7280_driver);
 
 MODULE_DESCRIPTION("QTI CAM_CC SC7280 Driver");
 MODULE_LICENSE("GPL v2");
index 3dcd79b01515170d5bb1945d02356fee6a036710..8e26ec2def73ad56f75bab241557a92ea38dd908 100644 (file)
@@ -630,6 +630,7 @@ static const struct freq_tbl ftbl_camcc_bps_clk_src[] = {
        F(480000000, P_CAMCC_PLL7_OUT_EVEN, 1, 0, 0),
        F(600000000, P_CAMCC_PLL0_OUT_MAIN, 2, 0, 0),
        F(760000000, P_CAMCC_PLL3_OUT_EVEN, 1, 0, 0),
+       { }
 };
 
 static struct clk_rcg2 camcc_bps_clk_src = {
@@ -654,6 +655,7 @@ static const struct freq_tbl ftbl_camcc_camnoc_axi_clk_src[] = {
        F(320000000, P_CAMCC_PLL7_OUT_ODD, 1, 0, 0),
        F(400000000, P_CAMCC_PLL0_OUT_ODD, 1, 0, 0),
        F(480000000, P_CAMCC_PLL7_OUT_EVEN, 1, 0, 0),
+       { }
 };
 
 static struct clk_rcg2 camcc_camnoc_axi_clk_src = {
@@ -673,6 +675,7 @@ static struct clk_rcg2 camcc_camnoc_axi_clk_src = {
 static const struct freq_tbl ftbl_camcc_cci_0_clk_src[] = {
        F(19200000, P_BI_TCXO, 1, 0, 0),
        F(37500000, P_CAMCC_PLL0_OUT_EVEN, 16, 0, 0),
+       { }
 };
 
 static struct clk_rcg2 camcc_cci_0_clk_src = {
@@ -735,6 +738,7 @@ static const struct freq_tbl ftbl_camcc_cphy_rx_clk_src[] = {
        F(19200000, P_BI_TCXO, 1, 0, 0),
        F(240000000, P_CAMCC_PLL0_OUT_EVEN, 2.5, 0, 0),
        F(400000000, P_CAMCC_PLL0_OUT_ODD, 1, 0, 0),
+       { }
 };
 
 static struct clk_rcg2 camcc_cphy_rx_clk_src = {
@@ -754,6 +758,7 @@ static struct clk_rcg2 camcc_cphy_rx_clk_src = {
 static const struct freq_tbl ftbl_camcc_csi0phytimer_clk_src[] = {
        F(19200000, P_BI_TCXO, 1, 0, 0),
        F(300000000, P_CAMCC_PLL0_OUT_EVEN, 2, 0, 0),
+       { }
 };
 
 static struct clk_rcg2 camcc_csi0phytimer_clk_src = {
@@ -818,6 +823,7 @@ static const struct freq_tbl ftbl_camcc_fast_ahb_clk_src[] = {
        F(200000000, P_CAMCC_PLL0_OUT_EVEN, 3, 0, 0),
        F(300000000, P_CAMCC_PLL0_OUT_MAIN, 4, 0, 0),
        F(400000000, P_CAMCC_PLL0_OUT_MAIN, 3, 0, 0),
+       { }
 };
 
 static struct clk_rcg2 camcc_fast_ahb_clk_src = {
@@ -838,6 +844,7 @@ static const struct freq_tbl ftbl_camcc_icp_clk_src[] = {
        F(19200000, P_BI_TCXO, 1, 0, 0),
        F(400000000, P_CAMCC_PLL0_OUT_ODD, 1, 0, 0),
        F(600000000, P_CAMCC_PLL0_OUT_MAIN, 2, 0, 0),
+       { }
 };
 
 static struct clk_rcg2 camcc_icp_clk_src = {
@@ -860,6 +867,7 @@ static const struct freq_tbl ftbl_camcc_ife_0_clk_src[] = {
        F(558000000, P_CAMCC_PLL3_OUT_EVEN, 1, 0, 0),
        F(637000000, P_CAMCC_PLL3_OUT_EVEN, 1, 0, 0),
        F(760000000, P_CAMCC_PLL3_OUT_EVEN, 1, 0, 0),
+       { }
 };
 
 static struct clk_rcg2 camcc_ife_0_clk_src = {
@@ -883,6 +891,7 @@ static const struct freq_tbl ftbl_camcc_ife_0_csid_clk_src[] = {
        F(400000000, P_CAMCC_PLL0_OUT_ODD, 1, 0, 0),
        F(480000000, P_CAMCC_PLL7_OUT_EVEN, 1, 0, 0),
        F(600000000, P_CAMCC_PLL0_OUT_MAIN, 2, 0, 0),
+       { }
 };
 
 static struct clk_rcg2 camcc_ife_0_csid_clk_src = {
@@ -905,6 +914,7 @@ static const struct freq_tbl ftbl_camcc_ife_1_clk_src[] = {
        F(558000000, P_CAMCC_PLL4_OUT_EVEN, 1, 0, 0),
        F(637000000, P_CAMCC_PLL4_OUT_EVEN, 1, 0, 0),
        F(760000000, P_CAMCC_PLL4_OUT_EVEN, 1, 0, 0),
+       { }
 };
 
 static struct clk_rcg2 camcc_ife_1_clk_src = {
@@ -941,6 +951,7 @@ static const struct freq_tbl ftbl_camcc_ife_2_clk_src[] = {
        F(558000000, P_CAMCC_PLL5_OUT_EVEN, 1, 0, 0),
        F(637000000, P_CAMCC_PLL5_OUT_EVEN, 1, 0, 0),
        F(760000000, P_CAMCC_PLL5_OUT_EVEN, 1, 0, 0),
+       { }
 };
 
 static struct clk_rcg2 camcc_ife_2_clk_src = {
@@ -962,6 +973,7 @@ static const struct freq_tbl ftbl_camcc_ife_2_csid_clk_src[] = {
        F(400000000, P_CAMCC_PLL0_OUT_ODD, 1, 0, 0),
        F(480000000, P_CAMCC_PLL7_OUT_EVEN, 1, 0, 0),
        F(600000000, P_CAMCC_PLL0_OUT_MAIN, 2, 0, 0),
+       { }
 };
 
 static struct clk_rcg2 camcc_ife_2_csid_clk_src = {
@@ -984,6 +996,7 @@ static const struct freq_tbl ftbl_camcc_ife_3_clk_src[] = {
        F(558000000, P_CAMCC_PLL6_OUT_EVEN, 1, 0, 0),
        F(637000000, P_CAMCC_PLL6_OUT_EVEN, 1, 0, 0),
        F(760000000, P_CAMCC_PLL6_OUT_EVEN, 1, 0, 0),
+       { }
 };
 
 static struct clk_rcg2 camcc_ife_3_clk_src = {
@@ -1020,6 +1033,7 @@ static const struct freq_tbl ftbl_camcc_ife_lite_0_clk_src[] = {
        F(400000000, P_CAMCC_PLL0_OUT_ODD, 1, 0, 0),
        F(480000000, P_CAMCC_PLL7_OUT_EVEN, 1, 0, 0),
        F(600000000, P_CAMCC_PLL0_OUT_MAIN, 2, 0, 0),
+       { }
 };
 
 static struct clk_rcg2 camcc_ife_lite_0_clk_src = {
@@ -1140,6 +1154,7 @@ static const struct freq_tbl ftbl_camcc_ipe_0_clk_src[] = {
        F(475000000, P_CAMCC_PLL1_OUT_EVEN, 1, 0, 0),
        F(520000000, P_CAMCC_PLL1_OUT_EVEN, 1, 0, 0),
        F(600000000, P_CAMCC_PLL1_OUT_EVEN, 1, 0, 0),
+       { }
 };
 
 static struct clk_rcg2 camcc_ipe_0_clk_src = {
@@ -1163,6 +1178,7 @@ static const struct freq_tbl ftbl_camcc_jpeg_clk_src[] = {
        F(400000000, P_CAMCC_PLL0_OUT_ODD, 1, 0, 0),
        F(480000000, P_CAMCC_PLL7_OUT_EVEN, 1, 0, 0),
        F(600000000, P_CAMCC_PLL0_OUT_MAIN, 2, 0, 0),
+       { }
 };
 
 static struct clk_rcg2 camcc_jpeg_clk_src = {
@@ -1184,6 +1200,7 @@ static const struct freq_tbl ftbl_camcc_lrme_clk_src[] = {
        F(300000000, P_CAMCC_PLL0_OUT_EVEN, 2, 0, 0),
        F(320000000, P_CAMCC_PLL7_OUT_ODD, 1, 0, 0),
        F(400000000, P_CAMCC_PLL0_OUT_MAIN, 3, 0, 0),
+       { }
 };
 
 static struct clk_rcg2 camcc_lrme_clk_src = {
@@ -1204,6 +1221,7 @@ static const struct freq_tbl ftbl_camcc_mclk0_clk_src[] = {
        F(19200000, P_BI_TCXO, 1, 0, 0),
        F(24000000, P_CAMCC_PLL2_OUT_EARLY, 10, 1, 4),
        F(64000000, P_CAMCC_PLL2_OUT_EARLY, 15, 0, 0),
+       { }
 };
 
 static struct clk_rcg2 camcc_mclk0_clk_src = {
@@ -1320,6 +1338,7 @@ static struct clk_rcg2 camcc_mclk7_clk_src = {
 
 static const struct freq_tbl ftbl_camcc_sleep_clk_src[] = {
        F(32000, P_SLEEP_CLK, 1, 0, 0),
+       { }
 };
 
 static struct clk_rcg2 camcc_sleep_clk_src = {
@@ -1339,6 +1358,7 @@ static struct clk_rcg2 camcc_sleep_clk_src = {
 static const struct freq_tbl ftbl_camcc_slow_ahb_clk_src[] = {
        F(19200000, P_BI_TCXO, 1, 0, 0),
        F(80000000, P_CAMCC_PLL7_OUT_EVEN, 6, 0, 0),
+       { }
 };
 
 static struct clk_rcg2 camcc_slow_ahb_clk_src = {
@@ -1357,6 +1377,7 @@ static struct clk_rcg2 camcc_slow_ahb_clk_src = {
 
 static const struct freq_tbl ftbl_camcc_xo_clk_src[] = {
        F(19200000, P_BI_TCXO, 1, 0, 0),
+       { }
 };
 
 static struct clk_rcg2 camcc_xo_clk_src = {
@@ -3010,10 +3031,8 @@ static int camcc_sc8280xp_probe(struct platform_device *pdev)
        clk_lucid_pll_configure(&camcc_pll6, regmap, &camcc_pll6_config);
        clk_lucid_pll_configure(&camcc_pll7, regmap, &camcc_pll7_config);
 
-       /*
-        * Keep camcc_gdsc_clk always enabled:
-        */
-       regmap_update_bits(regmap, 0xc1e4, BIT(0), 1);
+       /* Keep some clocks always-on */
+       qcom_branch_set_clk_en(regmap, 0xc1e4); /* CAMCC_GDSC_CLK */
 
        ret = qcom_cc_really_probe(pdev, &camcc_sc8280xp_desc, regmap);
        if (ret)
index 27d44188a7abb07311e222c66a78e6ffa5bac67d..8466d03e0d058e60cae8c77219257d5ffdf2e6ae 100644 (file)
@@ -1746,17 +1746,7 @@ static struct platform_driver cam_cc_sdm845_driver = {
        },
 };
 
-static int __init cam_cc_sdm845_init(void)
-{
-       return platform_driver_register(&cam_cc_sdm845_driver);
-}
-subsys_initcall(cam_cc_sdm845_init);
-
-static void __exit cam_cc_sdm845_exit(void)
-{
-       platform_driver_unregister(&cam_cc_sdm845_driver);
-}
-module_exit(cam_cc_sdm845_exit);
+module_platform_driver(cam_cc_sdm845_driver);
 
 MODULE_DESCRIPTION("QTI CAM_CC SDM845 Driver");
 MODULE_LICENSE("GPL v2");
index acba9f99d960c72db49604716ca8c077a73d2a9c..e4e7b308ecf161d82470f072cf1ab3c96dfe611d 100644 (file)
@@ -1890,17 +1890,7 @@ static struct platform_driver camcc_sm6350_driver = {
        },
 };
 
-static int __init camcc_sm6350_init(void)
-{
-       return platform_driver_register(&camcc_sm6350_driver);
-}
-subsys_initcall(camcc_sm6350_init);
-
-static void __exit camcc_sm6350_exit(void)
-{
-       platform_driver_unregister(&camcc_sm6350_driver);
-}
-module_exit(camcc_sm6350_exit);
+module_platform_driver(camcc_sm6350_driver);
 
 MODULE_DESCRIPTION("QTI CAMCC SM6350 Driver");
 MODULE_LICENSE("GPL");
index dd51ba4ea757be676b4f7b6a68c0b4981ba7a99f..1ef59a96f664f6919fef313d4cb5ec25326f03b4 100644 (file)
@@ -3536,13 +3536,9 @@ static int cam_cc_sm8550_probe(struct platform_device *pdev)
        clk_lucid_ole_pll_configure(&cam_cc_pll11, regmap, &cam_cc_pll11_config);
        clk_lucid_ole_pll_configure(&cam_cc_pll12, regmap, &cam_cc_pll12_config);
 
-       /*
-        * Keep clocks always enabled:
-        *      cam_cc_gdsc_clk
-        *      cam_cc_sleep_clk
-        */
-       regmap_update_bits(regmap, 0x1419c, BIT(0), BIT(0));
-       regmap_update_bits(regmap, 0x142cc, BIT(0), BIT(0));
+       /* Keep some clocks always-on */
+       qcom_branch_set_clk_en(regmap, 0x1419c); /* CAM_CC_GDSC_CLK */
+       qcom_branch_set_clk_en(regmap, 0x142cc); /* CAM_CC_SLEEP_CLK */
 
        ret = qcom_cc_really_probe(pdev, &cam_cc_sm8550_desc, regmap);
 
diff --git a/drivers/clk/qcom/camcc-x1e80100.c b/drivers/clk/qcom/camcc-x1e80100.c
new file mode 100644 (file)
index 0000000..46bb225
--- /dev/null
@@ -0,0 +1,2487 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * Copyright (c) 2023, Qualcomm Innovation Center, Inc. All rights reserved.
+ */
+
+#include <linux/clk-provider.h>
+#include <linux/mod_devicetable.h>
+#include <linux/module.h>
+#include <linux/platform_device.h>
+#include <linux/pm_runtime.h>
+#include <linux/regmap.h>
+
+#include <dt-bindings/clock/qcom,x1e80100-camcc.h>
+
+#include "clk-alpha-pll.h"
+#include "clk-branch.h"
+#include "clk-rcg.h"
+#include "clk-regmap.h"
+#include "common.h"
+#include "gdsc.h"
+#include "reset.h"
+
+enum {
+       DT_IFACE,
+       DT_BI_TCXO,
+       DT_BI_TCXO_AO,
+       DT_SLEEP_CLK,
+};
+
+enum {
+       P_BI_TCXO,
+       P_BI_TCXO_AO,
+       P_CAM_CC_PLL0_OUT_EVEN,
+       P_CAM_CC_PLL0_OUT_MAIN,
+       P_CAM_CC_PLL0_OUT_ODD,
+       P_CAM_CC_PLL1_OUT_EVEN,
+       P_CAM_CC_PLL2_OUT_EVEN,
+       P_CAM_CC_PLL2_OUT_MAIN,
+       P_CAM_CC_PLL3_OUT_EVEN,
+       P_CAM_CC_PLL4_OUT_EVEN,
+       P_CAM_CC_PLL6_OUT_EVEN,
+       P_CAM_CC_PLL8_OUT_EVEN,
+       P_SLEEP_CLK,
+};
+
+static const struct pll_vco lucid_ole_vco[] = {
+       { 249600000, 2300000000, 0 },
+};
+
+static const struct pll_vco rivian_ole_vco[] = {
+       { 777000000, 1285000000, 0 },
+};
+
+static const struct alpha_pll_config cam_cc_pll0_config = {
+       .l = 0x3e,
+       .alpha = 0x8000,
+       .config_ctl_val = 0x20485699,
+       .config_ctl_hi_val = 0x00182261,
+       .config_ctl_hi1_val = 0x82aa299c,
+       .test_ctl_val = 0x00000000,
+       .test_ctl_hi_val = 0x00000003,
+       .test_ctl_hi1_val = 0x00009000,
+       .test_ctl_hi2_val = 0x00000034,
+       .user_ctl_val = 0x00008400,
+       .user_ctl_hi_val = 0x00000005,
+};
+
+static struct clk_alpha_pll cam_cc_pll0 = {
+       .offset = 0x0,
+       .vco_table = lucid_ole_vco,
+       .num_vco = ARRAY_SIZE(lucid_ole_vco),
+       .regs = clk_alpha_pll_regs[CLK_ALPHA_PLL_TYPE_LUCID_OLE],
+       .clkr = {
+               .hw.init = &(const struct clk_init_data) {
+                       .name = "cam_cc_pll0",
+                       .parent_data = &(const struct clk_parent_data) {
+                               .index = DT_BI_TCXO,
+                       },
+                       .num_parents = 1,
+                       .ops = &clk_alpha_pll_lucid_evo_ops,
+               },
+       },
+};
+
+static const struct clk_div_table post_div_table_cam_cc_pll0_out_even[] = {
+       { 0x1, 2 },
+       { }
+};
+
+static struct clk_alpha_pll_postdiv cam_cc_pll0_out_even = {
+       .offset = 0x0,
+       .post_div_shift = 10,
+       .post_div_table = post_div_table_cam_cc_pll0_out_even,
+       .num_post_div = ARRAY_SIZE(post_div_table_cam_cc_pll0_out_even),
+       .width = 4,
+       .regs = clk_alpha_pll_regs[CLK_ALPHA_PLL_TYPE_LUCID_OLE],
+       .clkr.hw.init = &(const struct clk_init_data) {
+               .name = "cam_cc_pll0_out_even",
+               .parent_hws = (const struct clk_hw*[]) {
+                       &cam_cc_pll0.clkr.hw,
+               },
+               .num_parents = 1,
+               .flags = CLK_SET_RATE_PARENT,
+               .ops = &clk_alpha_pll_postdiv_lucid_ole_ops,
+       },
+};
+
+static const struct clk_div_table post_div_table_cam_cc_pll0_out_odd[] = {
+       { 0x2, 3 },
+       { }
+};
+
+static struct clk_alpha_pll_postdiv cam_cc_pll0_out_odd = {
+       .offset = 0x0,
+       .post_div_shift = 14,
+       .post_div_table = post_div_table_cam_cc_pll0_out_odd,
+       .num_post_div = ARRAY_SIZE(post_div_table_cam_cc_pll0_out_odd),
+       .width = 4,
+       .regs = clk_alpha_pll_regs[CLK_ALPHA_PLL_TYPE_LUCID_OLE],
+       .clkr.hw.init = &(const struct clk_init_data) {
+               .name = "cam_cc_pll0_out_odd",
+               .parent_hws = (const struct clk_hw*[]) {
+                       &cam_cc_pll0.clkr.hw,
+               },
+               .num_parents = 1,
+               .flags = CLK_SET_RATE_PARENT,
+               .ops = &clk_alpha_pll_postdiv_lucid_ole_ops,
+       },
+};
+
+static const struct alpha_pll_config cam_cc_pll1_config = {
+       .l = 0x1f,
+       .alpha = 0xaaaa,
+       .config_ctl_val = 0x20485699,
+       .config_ctl_hi_val = 0x00182261,
+       .config_ctl_hi1_val = 0x82aa299c,
+       .test_ctl_val = 0x00000000,
+       .test_ctl_hi_val = 0x00000003,
+       .test_ctl_hi1_val = 0x00009000,
+       .test_ctl_hi2_val = 0x00000034,
+       .user_ctl_val = 0x00000400,
+       .user_ctl_hi_val = 0x00000005,
+};
+
+static struct clk_alpha_pll cam_cc_pll1 = {
+       .offset = 0x1000,
+       .vco_table = lucid_ole_vco,
+       .num_vco = ARRAY_SIZE(lucid_ole_vco),
+       .regs = clk_alpha_pll_regs[CLK_ALPHA_PLL_TYPE_LUCID_OLE],
+       .clkr = {
+               .hw.init = &(const struct clk_init_data) {
+                       .name = "cam_cc_pll1",
+                       .parent_data = &(const struct clk_parent_data) {
+                               .index = DT_BI_TCXO,
+                       },
+                       .num_parents = 1,
+                       .ops = &clk_alpha_pll_lucid_evo_ops,
+               },
+       },
+};
+
+static const struct clk_div_table post_div_table_cam_cc_pll1_out_even[] = {
+       { 0x1, 2 },
+       { }
+};
+
+static struct clk_alpha_pll_postdiv cam_cc_pll1_out_even = {
+       .offset = 0x1000,
+       .post_div_shift = 10,
+       .post_div_table = post_div_table_cam_cc_pll1_out_even,
+       .num_post_div = ARRAY_SIZE(post_div_table_cam_cc_pll1_out_even),
+       .width = 4,
+       .regs = clk_alpha_pll_regs[CLK_ALPHA_PLL_TYPE_LUCID_OLE],
+       .clkr.hw.init = &(const struct clk_init_data) {
+               .name = "cam_cc_pll1_out_even",
+               .parent_hws = (const struct clk_hw*[]) {
+                       &cam_cc_pll1.clkr.hw,
+               },
+               .num_parents = 1,
+               .flags = CLK_SET_RATE_PARENT,
+               .ops = &clk_alpha_pll_postdiv_lucid_ole_ops,
+       },
+};
+
+static const struct alpha_pll_config cam_cc_pll2_config = {
+       .l = 0x32,
+       .alpha = 0x0,
+       .config_ctl_val = 0x10000030,
+       .config_ctl_hi_val = 0x80890263,
+       .config_ctl_hi1_val = 0x00000217,
+       .user_ctl_val = 0x00000001,
+       .user_ctl_hi_val = 0x00000000,
+};
+
+static struct clk_alpha_pll cam_cc_pll2 = {
+       .offset = 0x2000,
+       .vco_table = rivian_ole_vco,
+       .num_vco = ARRAY_SIZE(rivian_ole_vco),
+       .regs = clk_alpha_pll_regs[CLK_ALPHA_PLL_TYPE_RIVIAN_EVO],
+       .clkr = {
+               .hw.init = &(const struct clk_init_data) {
+                       .name = "cam_cc_pll2",
+                       .parent_data = &(const struct clk_parent_data) {
+                               .index = DT_BI_TCXO,
+                       },
+                       .num_parents = 1,
+                       .ops = &clk_alpha_pll_rivian_evo_ops,
+               },
+       },
+};
+
+static const struct alpha_pll_config cam_cc_pll3_config = {
+       .l = 0x24,
+       .alpha = 0x0,
+       .config_ctl_val = 0x20485699,
+       .config_ctl_hi_val = 0x00182261,
+       .config_ctl_hi1_val = 0x82aa299c,
+       .test_ctl_val = 0x00000000,
+       .test_ctl_hi_val = 0x00000003,
+       .test_ctl_hi1_val = 0x00009000,
+       .test_ctl_hi2_val = 0x00000034,
+       .user_ctl_val = 0x00000400,
+       .user_ctl_hi_val = 0x00000005,
+};
+
+static struct clk_alpha_pll cam_cc_pll3 = {
+       .offset = 0x3000,
+       .vco_table = lucid_ole_vco,
+       .num_vco = ARRAY_SIZE(lucid_ole_vco),
+       .regs = clk_alpha_pll_regs[CLK_ALPHA_PLL_TYPE_LUCID_OLE],
+       .clkr = {
+               .hw.init = &(const struct clk_init_data) {
+                       .name = "cam_cc_pll3",
+                       .parent_data = &(const struct clk_parent_data) {
+                               .index = DT_BI_TCXO,
+                       },
+                       .num_parents = 1,
+                       .ops = &clk_alpha_pll_lucid_evo_ops,
+               },
+       },
+};
+
+static const struct clk_div_table post_div_table_cam_cc_pll3_out_even[] = {
+       { 0x1, 2 },
+       { }
+};
+
+static struct clk_alpha_pll_postdiv cam_cc_pll3_out_even = {
+       .offset = 0x3000,
+       .post_div_shift = 10,
+       .post_div_table = post_div_table_cam_cc_pll3_out_even,
+       .num_post_div = ARRAY_SIZE(post_div_table_cam_cc_pll3_out_even),
+       .width = 4,
+       .regs = clk_alpha_pll_regs[CLK_ALPHA_PLL_TYPE_LUCID_OLE],
+       .clkr.hw.init = &(const struct clk_init_data) {
+               .name = "cam_cc_pll3_out_even",
+               .parent_hws = (const struct clk_hw*[]) {
+                       &cam_cc_pll3.clkr.hw,
+               },
+               .num_parents = 1,
+               .flags = CLK_SET_RATE_PARENT,
+               .ops = &clk_alpha_pll_postdiv_lucid_ole_ops,
+       },
+};
+
+static const struct alpha_pll_config cam_cc_pll4_config = {
+       .l = 0x24,
+       .alpha = 0x0,
+       .config_ctl_val = 0x20485699,
+       .config_ctl_hi_val = 0x00182261,
+       .config_ctl_hi1_val = 0x82aa299c,
+       .test_ctl_val = 0x00000000,
+       .test_ctl_hi_val = 0x00000003,
+       .test_ctl_hi1_val = 0x00009000,
+       .test_ctl_hi2_val = 0x00000034,
+       .user_ctl_val = 0x00000400,
+       .user_ctl_hi_val = 0x00000005,
+};
+
+static struct clk_alpha_pll cam_cc_pll4 = {
+       .offset = 0x4000,
+       .vco_table = lucid_ole_vco,
+       .num_vco = ARRAY_SIZE(lucid_ole_vco),
+       .regs = clk_alpha_pll_regs[CLK_ALPHA_PLL_TYPE_LUCID_OLE],
+       .clkr = {
+               .hw.init = &(const struct clk_init_data) {
+                       .name = "cam_cc_pll4",
+                       .parent_data = &(const struct clk_parent_data) {
+                               .index = DT_BI_TCXO,
+                       },
+                       .num_parents = 1,
+                       .ops = &clk_alpha_pll_lucid_evo_ops,
+               },
+       },
+};
+
+static const struct clk_div_table post_div_table_cam_cc_pll4_out_even[] = {
+       { 0x1, 2 },
+       { }
+};
+
+static struct clk_alpha_pll_postdiv cam_cc_pll4_out_even = {
+       .offset = 0x4000,
+       .post_div_shift = 10,
+       .post_div_table = post_div_table_cam_cc_pll4_out_even,
+       .num_post_div = ARRAY_SIZE(post_div_table_cam_cc_pll4_out_even),
+       .width = 4,
+       .regs = clk_alpha_pll_regs[CLK_ALPHA_PLL_TYPE_LUCID_OLE],
+       .clkr.hw.init = &(const struct clk_init_data) {
+               .name = "cam_cc_pll4_out_even",
+               .parent_hws = (const struct clk_hw*[]) {
+                       &cam_cc_pll4.clkr.hw,
+               },
+               .num_parents = 1,
+               .flags = CLK_SET_RATE_PARENT,
+               .ops = &clk_alpha_pll_postdiv_lucid_ole_ops,
+       },
+};
+
+static const struct alpha_pll_config cam_cc_pll6_config = {
+       .l = 0x24,
+       .alpha = 0x0,
+       .config_ctl_val = 0x20485699,
+       .config_ctl_hi_val = 0x00182261,
+       .config_ctl_hi1_val = 0x82aa299c,
+       .test_ctl_val = 0x00000000,
+       .test_ctl_hi_val = 0x00000003,
+       .test_ctl_hi1_val = 0x00009000,
+       .test_ctl_hi2_val = 0x00000034,
+       .user_ctl_val = 0x00000400,
+       .user_ctl_hi_val = 0x00000005,
+};
+
+static struct clk_alpha_pll cam_cc_pll6 = {
+       .offset = 0x6000,
+       .vco_table = lucid_ole_vco,
+       .num_vco = ARRAY_SIZE(lucid_ole_vco),
+       .regs = clk_alpha_pll_regs[CLK_ALPHA_PLL_TYPE_LUCID_OLE],
+       .clkr = {
+               .hw.init = &(const struct clk_init_data) {
+                       .name = "cam_cc_pll6",
+                       .parent_data = &(const struct clk_parent_data) {
+                               .index = DT_BI_TCXO,
+                       },
+                       .num_parents = 1,
+                       .ops = &clk_alpha_pll_lucid_evo_ops,
+               },
+       },
+};
+
+static const struct clk_div_table post_div_table_cam_cc_pll6_out_even[] = {
+       { 0x1, 2 },
+       { }
+};
+
+static struct clk_alpha_pll_postdiv cam_cc_pll6_out_even = {
+       .offset = 0x6000,
+       .post_div_shift = 10,
+       .post_div_table = post_div_table_cam_cc_pll6_out_even,
+       .num_post_div = ARRAY_SIZE(post_div_table_cam_cc_pll6_out_even),
+       .width = 4,
+       .regs = clk_alpha_pll_regs[CLK_ALPHA_PLL_TYPE_LUCID_OLE],
+       .clkr.hw.init = &(const struct clk_init_data) {
+               .name = "cam_cc_pll6_out_even",
+               .parent_hws = (const struct clk_hw*[]) {
+                       &cam_cc_pll6.clkr.hw,
+               },
+               .num_parents = 1,
+               .flags = CLK_SET_RATE_PARENT,
+               .ops = &clk_alpha_pll_postdiv_lucid_ole_ops,
+       },
+};
+
+static const struct alpha_pll_config cam_cc_pll8_config = {
+       .l = 0x32,
+       .alpha = 0x0,
+       .config_ctl_val = 0x20485699,
+       .config_ctl_hi_val = 0x00182261,
+       .config_ctl_hi1_val = 0x82aa299c,
+       .test_ctl_val = 0x00000000,
+       .test_ctl_hi_val = 0x00000003,
+       .test_ctl_hi1_val = 0x00009000,
+       .test_ctl_hi2_val = 0x00000034,
+       .user_ctl_val = 0x00000400,
+       .user_ctl_hi_val = 0x00000005,
+};
+
+static struct clk_alpha_pll cam_cc_pll8 = {
+       .offset = 0x8000,
+       .vco_table = lucid_ole_vco,
+       .num_vco = ARRAY_SIZE(lucid_ole_vco),
+       .regs = clk_alpha_pll_regs[CLK_ALPHA_PLL_TYPE_LUCID_OLE],
+       .clkr = {
+               .hw.init = &(const struct clk_init_data) {
+                       .name = "cam_cc_pll8",
+                       .parent_data = &(const struct clk_parent_data) {
+                               .index = DT_BI_TCXO,
+                       },
+                       .num_parents = 1,
+                       .ops = &clk_alpha_pll_lucid_evo_ops,
+               },
+       },
+};
+
+static const struct clk_div_table post_div_table_cam_cc_pll8_out_even[] = {
+       { 0x1, 2 },
+       { }
+};
+
+static struct clk_alpha_pll_postdiv cam_cc_pll8_out_even = {
+       .offset = 0x8000,
+       .post_div_shift = 10,
+       .post_div_table = post_div_table_cam_cc_pll8_out_even,
+       .num_post_div = ARRAY_SIZE(post_div_table_cam_cc_pll8_out_even),
+       .width = 4,
+       .regs = clk_alpha_pll_regs[CLK_ALPHA_PLL_TYPE_LUCID_OLE],
+       .clkr.hw.init = &(const struct clk_init_data) {
+               .name = "cam_cc_pll8_out_even",
+               .parent_hws = (const struct clk_hw*[]) {
+                       &cam_cc_pll8.clkr.hw,
+               },
+               .num_parents = 1,
+               .flags = CLK_SET_RATE_PARENT,
+               .ops = &clk_alpha_pll_postdiv_lucid_ole_ops,
+       },
+};
+
+static const struct parent_map cam_cc_parent_map_0[] = {
+       { P_BI_TCXO, 0 },
+       { P_CAM_CC_PLL0_OUT_MAIN, 1 },
+       { P_CAM_CC_PLL0_OUT_EVEN, 2 },
+       { P_CAM_CC_PLL0_OUT_ODD, 3 },
+       { P_CAM_CC_PLL8_OUT_EVEN, 5 },
+};
+
+static const struct clk_parent_data cam_cc_parent_data_0[] = {
+       { .index = DT_BI_TCXO },
+       { .hw = &cam_cc_pll0.clkr.hw },
+       { .hw = &cam_cc_pll0_out_even.clkr.hw },
+       { .hw = &cam_cc_pll0_out_odd.clkr.hw },
+       { .hw = &cam_cc_pll8_out_even.clkr.hw },
+};
+
+static const struct parent_map cam_cc_parent_map_1[] = {
+       { P_BI_TCXO, 0 },
+       { P_CAM_CC_PLL2_OUT_EVEN, 3 },
+       { P_CAM_CC_PLL2_OUT_MAIN, 5 },
+};
+
+static const struct clk_parent_data cam_cc_parent_data_1[] = {
+       { .index = DT_BI_TCXO },
+       { .hw = &cam_cc_pll2.clkr.hw },
+       { .hw = &cam_cc_pll2.clkr.hw },
+};
+
+static const struct parent_map cam_cc_parent_map_2[] = {
+       { P_BI_TCXO, 0 },
+       { P_CAM_CC_PLL3_OUT_EVEN, 6 },
+};
+
+static const struct clk_parent_data cam_cc_parent_data_2[] = {
+       { .index = DT_BI_TCXO },
+       { .hw = &cam_cc_pll3_out_even.clkr.hw },
+};
+
+static const struct parent_map cam_cc_parent_map_3[] = {
+       { P_BI_TCXO, 0 },
+       { P_CAM_CC_PLL4_OUT_EVEN, 6 },
+};
+
+static const struct clk_parent_data cam_cc_parent_data_3[] = {
+       { .index = DT_BI_TCXO },
+       { .hw = &cam_cc_pll4_out_even.clkr.hw },
+};
+
+static const struct parent_map cam_cc_parent_map_4[] = {
+       { P_BI_TCXO, 0 },
+       { P_CAM_CC_PLL1_OUT_EVEN, 4 },
+};
+
+static const struct clk_parent_data cam_cc_parent_data_4[] = {
+       { .index = DT_BI_TCXO },
+       { .hw = &cam_cc_pll1_out_even.clkr.hw },
+};
+
+static const struct parent_map cam_cc_parent_map_5[] = {
+       { P_BI_TCXO, 0 },
+       { P_CAM_CC_PLL6_OUT_EVEN, 6 },
+};
+
+static const struct clk_parent_data cam_cc_parent_data_5[] = {
+       { .index = DT_BI_TCXO },
+       { .hw = &cam_cc_pll6_out_even.clkr.hw },
+};
+
+static const struct parent_map cam_cc_parent_map_6[] = {
+       { P_SLEEP_CLK, 0 },
+};
+
+static const struct clk_parent_data cam_cc_parent_data_6_ao[] = {
+       { .index = DT_SLEEP_CLK },
+};
+
+static const struct parent_map cam_cc_parent_map_7[] = {
+       { P_BI_TCXO, 0 },
+};
+
+static const struct clk_parent_data cam_cc_parent_data_7_ao[] = {
+       { .index = DT_BI_TCXO_AO },
+};
+
+static const struct freq_tbl ftbl_cam_cc_bps_clk_src[] = {
+       F(19200000, P_BI_TCXO, 1, 0, 0),
+       F(160000000, P_CAM_CC_PLL0_OUT_ODD, 2.5, 0, 0),
+       F(200000000, P_CAM_CC_PLL0_OUT_ODD, 2, 0, 0),
+       F(400000000, P_CAM_CC_PLL0_OUT_ODD, 1, 0, 0),
+       F(600000000, P_CAM_CC_PLL0_OUT_EVEN, 1, 0, 0),
+       { }
+};
+
+static struct clk_rcg2 cam_cc_bps_clk_src = {
+       .cmd_rcgr = 0x10278,
+       .mnd_width = 0,
+       .hid_width = 5,
+       .parent_map = cam_cc_parent_map_0,
+       .freq_tbl = ftbl_cam_cc_bps_clk_src,
+       .clkr.hw.init = &(const struct clk_init_data) {
+               .name = "cam_cc_bps_clk_src",
+               .parent_data = cam_cc_parent_data_0,
+               .num_parents = ARRAY_SIZE(cam_cc_parent_data_0),
+               .flags = CLK_SET_RATE_PARENT,
+               .ops = &clk_rcg2_shared_ops,
+       },
+};
+
+static const struct freq_tbl ftbl_cam_cc_camnoc_axi_rt_clk_src[] = {
+       F(240000000, P_CAM_CC_PLL0_OUT_EVEN, 2.5, 0, 0),
+       F(300000000, P_CAM_CC_PLL0_OUT_EVEN, 2, 0, 0),
+       F(400000000, P_CAM_CC_PLL0_OUT_ODD, 1, 0, 0),
+       { }
+};
+
+static struct clk_rcg2 cam_cc_camnoc_axi_rt_clk_src = {
+       .cmd_rcgr = 0x138f8,
+       .mnd_width = 0,
+       .hid_width = 5,
+       .parent_map = cam_cc_parent_map_0,
+       .freq_tbl = ftbl_cam_cc_camnoc_axi_rt_clk_src,
+       .clkr.hw.init = &(const struct clk_init_data) {
+               .name = "cam_cc_camnoc_axi_rt_clk_src",
+               .parent_data = cam_cc_parent_data_0,
+               .num_parents = ARRAY_SIZE(cam_cc_parent_data_0),
+               .flags = CLK_SET_RATE_PARENT,
+               .ops = &clk_rcg2_shared_ops,
+       },
+};
+
+static const struct freq_tbl ftbl_cam_cc_cci_0_clk_src[] = {
+       F(19200000, P_BI_TCXO, 1, 0, 0),
+       F(30000000, P_CAM_CC_PLL8_OUT_EVEN, 16, 0, 0),
+       F(37500000, P_CAM_CC_PLL0_OUT_EVEN, 16, 0, 0),
+       { }
+};
+
+static struct clk_rcg2 cam_cc_cci_0_clk_src = {
+       .cmd_rcgr = 0x1365c,
+       .mnd_width = 8,
+       .hid_width = 5,
+       .parent_map = cam_cc_parent_map_0,
+       .freq_tbl = ftbl_cam_cc_cci_0_clk_src,
+       .clkr.hw.init = &(const struct clk_init_data) {
+               .name = "cam_cc_cci_0_clk_src",
+               .parent_data = cam_cc_parent_data_0,
+               .num_parents = ARRAY_SIZE(cam_cc_parent_data_0),
+               .flags = CLK_SET_RATE_PARENT,
+               .ops = &clk_rcg2_ops,
+       },
+};
+
+static struct clk_rcg2 cam_cc_cci_1_clk_src = {
+       .cmd_rcgr = 0x1378c,
+       .mnd_width = 8,
+       .hid_width = 5,
+       .parent_map = cam_cc_parent_map_0,
+       .freq_tbl = ftbl_cam_cc_cci_0_clk_src,
+       .clkr.hw.init = &(const struct clk_init_data) {
+               .name = "cam_cc_cci_1_clk_src",
+               .parent_data = cam_cc_parent_data_0,
+               .num_parents = ARRAY_SIZE(cam_cc_parent_data_0),
+               .flags = CLK_SET_RATE_PARENT,
+               .ops = &clk_rcg2_ops,
+       },
+};
+
+static const struct freq_tbl ftbl_cam_cc_cphy_rx_clk_src[] = {
+       F(19200000, P_BI_TCXO, 1, 0, 0),
+       F(300000000, P_CAM_CC_PLL0_OUT_MAIN, 4, 0, 0),
+       F(400000000, P_CAM_CC_PLL0_OUT_MAIN, 3, 0, 0),
+       F(480000000, P_CAM_CC_PLL0_OUT_MAIN, 2.5, 0, 0),
+       { }
+};
+
+static struct clk_rcg2 cam_cc_cphy_rx_clk_src = {
+       .cmd_rcgr = 0x11164,
+       .mnd_width = 0,
+       .hid_width = 5,
+       .parent_map = cam_cc_parent_map_0,
+       .freq_tbl = ftbl_cam_cc_cphy_rx_clk_src,
+       .clkr.hw.init = &(const struct clk_init_data) {
+               .name = "cam_cc_cphy_rx_clk_src",
+               .parent_data = cam_cc_parent_data_0,
+               .num_parents = ARRAY_SIZE(cam_cc_parent_data_0),
+               .flags = CLK_SET_RATE_PARENT,
+               .ops = &clk_rcg2_ops,
+       },
+};
+
+static const struct freq_tbl ftbl_cam_cc_csi0phytimer_clk_src[] = {
+       F(19200000, P_BI_TCXO, 1, 0, 0),
+       F(266666667, P_CAM_CC_PLL0_OUT_ODD, 1.5, 0, 0),
+       F(400000000, P_CAM_CC_PLL0_OUT_ODD, 1, 0, 0),
+       { }
+};
+
+static struct clk_rcg2 cam_cc_csi0phytimer_clk_src = {
+       .cmd_rcgr = 0x150e0,
+       .mnd_width = 0,
+       .hid_width = 5,
+       .parent_map = cam_cc_parent_map_0,
+       .freq_tbl = ftbl_cam_cc_csi0phytimer_clk_src,
+       .clkr.hw.init = &(const struct clk_init_data) {
+               .name = "cam_cc_csi0phytimer_clk_src",
+               .parent_data = cam_cc_parent_data_0,
+               .num_parents = ARRAY_SIZE(cam_cc_parent_data_0),
+               .flags = CLK_SET_RATE_PARENT,
+               .ops = &clk_rcg2_ops,
+       },
+};
+
+static struct clk_rcg2 cam_cc_csi1phytimer_clk_src = {
+       .cmd_rcgr = 0x15104,
+       .mnd_width = 0,
+       .hid_width = 5,
+       .parent_map = cam_cc_parent_map_0,
+       .freq_tbl = ftbl_cam_cc_csi0phytimer_clk_src,
+       .clkr.hw.init = &(const struct clk_init_data) {
+               .name = "cam_cc_csi1phytimer_clk_src",
+               .parent_data = cam_cc_parent_data_0,
+               .num_parents = ARRAY_SIZE(cam_cc_parent_data_0),
+               .flags = CLK_SET_RATE_PARENT,
+               .ops = &clk_rcg2_ops,
+       },
+};
+
+static struct clk_rcg2 cam_cc_csi2phytimer_clk_src = {
+       .cmd_rcgr = 0x15124,
+       .mnd_width = 0,
+       .hid_width = 5,
+       .parent_map = cam_cc_parent_map_0,
+       .freq_tbl = ftbl_cam_cc_csi0phytimer_clk_src,
+       .clkr.hw.init = &(const struct clk_init_data) {
+               .name = "cam_cc_csi2phytimer_clk_src",
+               .parent_data = cam_cc_parent_data_0,
+               .num_parents = ARRAY_SIZE(cam_cc_parent_data_0),
+               .flags = CLK_SET_RATE_PARENT,
+               .ops = &clk_rcg2_ops,
+       },
+};
+
+static struct clk_rcg2 cam_cc_csi3phytimer_clk_src = {
+       .cmd_rcgr = 0x15258,
+       .mnd_width = 0,
+       .hid_width = 5,
+       .parent_map = cam_cc_parent_map_0,
+       .freq_tbl = ftbl_cam_cc_csi0phytimer_clk_src,
+       .clkr.hw.init = &(const struct clk_init_data) {
+               .name = "cam_cc_csi3phytimer_clk_src",
+               .parent_data = cam_cc_parent_data_0,
+               .num_parents = ARRAY_SIZE(cam_cc_parent_data_0),
+               .flags = CLK_SET_RATE_PARENT,
+               .ops = &clk_rcg2_ops,
+       },
+};
+
+static struct clk_rcg2 cam_cc_csi4phytimer_clk_src = {
+       .cmd_rcgr = 0x1538c,
+       .mnd_width = 0,
+       .hid_width = 5,
+       .parent_map = cam_cc_parent_map_0,
+       .freq_tbl = ftbl_cam_cc_csi0phytimer_clk_src,
+       .clkr.hw.init = &(const struct clk_init_data) {
+               .name = "cam_cc_csi4phytimer_clk_src",
+               .parent_data = cam_cc_parent_data_0,
+               .num_parents = ARRAY_SIZE(cam_cc_parent_data_0),
+               .flags = CLK_SET_RATE_PARENT,
+               .ops = &clk_rcg2_ops,
+       },
+};
+
+static struct clk_rcg2 cam_cc_csi5phytimer_clk_src = {
+       .cmd_rcgr = 0x154c0,
+       .mnd_width = 0,
+       .hid_width = 5,
+       .parent_map = cam_cc_parent_map_0,
+       .freq_tbl = ftbl_cam_cc_csi0phytimer_clk_src,
+       .clkr.hw.init = &(const struct clk_init_data) {
+               .name = "cam_cc_csi5phytimer_clk_src",
+               .parent_data = cam_cc_parent_data_0,
+               .num_parents = ARRAY_SIZE(cam_cc_parent_data_0),
+               .flags = CLK_SET_RATE_PARENT,
+               .ops = &clk_rcg2_ops,
+       },
+};
+
+static const struct freq_tbl ftbl_cam_cc_csid_clk_src[] = {
+       F(300000000, P_CAM_CC_PLL0_OUT_MAIN, 4, 0, 0),
+       F(400000000, P_CAM_CC_PLL0_OUT_MAIN, 3, 0, 0),
+       F(480000000, P_CAM_CC_PLL0_OUT_MAIN, 2.5, 0, 0),
+       { }
+};
+
+static struct clk_rcg2 cam_cc_csid_clk_src = {
+       .cmd_rcgr = 0x138d4,
+       .mnd_width = 0,
+       .hid_width = 5,
+       .parent_map = cam_cc_parent_map_0,
+       .freq_tbl = ftbl_cam_cc_csid_clk_src,
+       .clkr.hw.init = &(const struct clk_init_data) {
+               .name = "cam_cc_csid_clk_src",
+               .parent_data = cam_cc_parent_data_0,
+               .num_parents = ARRAY_SIZE(cam_cc_parent_data_0),
+               .flags = CLK_SET_RATE_PARENT,
+               .ops = &clk_rcg2_shared_ops,
+       },
+};
+
+static const struct freq_tbl ftbl_cam_cc_fast_ahb_clk_src[] = {
+       F(19200000, P_BI_TCXO, 1, 0, 0),
+       F(80000000, P_CAM_CC_PLL0_OUT_EVEN, 7.5, 0, 0),
+       F(100000000, P_CAM_CC_PLL0_OUT_EVEN, 6, 0, 0),
+       F(200000000, P_CAM_CC_PLL0_OUT_EVEN, 3, 0, 0),
+       F(300000000, P_CAM_CC_PLL0_OUT_MAIN, 4, 0, 0),
+       F(400000000, P_CAM_CC_PLL0_OUT_MAIN, 3, 0, 0),
+       { }
+};
+
+static struct clk_rcg2 cam_cc_fast_ahb_clk_src = {
+       .cmd_rcgr = 0x10018,
+       .mnd_width = 0,
+       .hid_width = 5,
+       .parent_map = cam_cc_parent_map_0,
+       .freq_tbl = ftbl_cam_cc_fast_ahb_clk_src,
+       .clkr.hw.init = &(const struct clk_init_data) {
+               .name = "cam_cc_fast_ahb_clk_src",
+               .parent_data = cam_cc_parent_data_0,
+               .num_parents = ARRAY_SIZE(cam_cc_parent_data_0),
+               .flags = CLK_SET_RATE_PARENT,
+               .ops = &clk_rcg2_shared_ops,
+       },
+};
+
+static const struct freq_tbl ftbl_cam_cc_icp_clk_src[] = {
+       F(19200000, P_BI_TCXO, 1, 0, 0),
+       F(300000000, P_CAM_CC_PLL0_OUT_EVEN, 2, 0, 0),
+       F(400000000, P_CAM_CC_PLL0_OUT_ODD, 1, 0, 0),
+       F(480000000, P_CAM_CC_PLL8_OUT_EVEN, 1, 0, 0),
+       F(600000000, P_CAM_CC_PLL0_OUT_MAIN, 2, 0, 0),
+       { }
+};
+
+static struct clk_rcg2 cam_cc_icp_clk_src = {
+       .cmd_rcgr = 0x13520,
+       .mnd_width = 0,
+       .hid_width = 5,
+       .parent_map = cam_cc_parent_map_0,
+       .freq_tbl = ftbl_cam_cc_icp_clk_src,
+       .clkr.hw.init = &(const struct clk_init_data) {
+               .name = "cam_cc_icp_clk_src",
+               .parent_data = cam_cc_parent_data_0,
+               .num_parents = ARRAY_SIZE(cam_cc_parent_data_0),
+               .flags = CLK_SET_RATE_PARENT,
+               .ops = &clk_rcg2_shared_ops,
+       },
+};
+
+static const struct freq_tbl ftbl_cam_cc_ife_0_clk_src[] = {
+       F(19200000, P_BI_TCXO, 1, 0, 0),
+       F(345600000, P_CAM_CC_PLL3_OUT_EVEN, 1, 0, 0),
+       F(432000000, P_CAM_CC_PLL3_OUT_EVEN, 1, 0, 0),
+       F(594000000, P_CAM_CC_PLL3_OUT_EVEN, 1, 0, 0),
+       F(675000000, P_CAM_CC_PLL3_OUT_EVEN, 1, 0, 0),
+       F(727000000, P_CAM_CC_PLL3_OUT_EVEN, 1, 0, 0),
+       { }
+};
+
+static struct clk_rcg2 cam_cc_ife_0_clk_src = {
+       .cmd_rcgr = 0x11018,
+       .mnd_width = 0,
+       .hid_width = 5,
+       .parent_map = cam_cc_parent_map_2,
+       .freq_tbl = ftbl_cam_cc_ife_0_clk_src,
+       .clkr.hw.init = &(const struct clk_init_data) {
+               .name = "cam_cc_ife_0_clk_src",
+               .parent_data = cam_cc_parent_data_2,
+               .num_parents = ARRAY_SIZE(cam_cc_parent_data_2),
+               .flags = CLK_SET_RATE_PARENT,
+               .ops = &clk_rcg2_shared_ops,
+       },
+};
+
+static const struct freq_tbl ftbl_cam_cc_ife_1_clk_src[] = {
+       F(19200000, P_BI_TCXO, 1, 0, 0),
+       F(345600000, P_CAM_CC_PLL4_OUT_EVEN, 1, 0, 0),
+       F(432000000, P_CAM_CC_PLL4_OUT_EVEN, 1, 0, 0),
+       F(594000000, P_CAM_CC_PLL4_OUT_EVEN, 1, 0, 0),
+       F(675000000, P_CAM_CC_PLL4_OUT_EVEN, 1, 0, 0),
+       F(727000000, P_CAM_CC_PLL4_OUT_EVEN, 1, 0, 0),
+       { }
+};
+
+static struct clk_rcg2 cam_cc_ife_1_clk_src = {
+       .cmd_rcgr = 0x12018,
+       .mnd_width = 0,
+       .hid_width = 5,
+       .parent_map = cam_cc_parent_map_3,
+       .freq_tbl = ftbl_cam_cc_ife_1_clk_src,
+       .clkr.hw.init = &(const struct clk_init_data) {
+               .name = "cam_cc_ife_1_clk_src",
+               .parent_data = cam_cc_parent_data_3,
+               .num_parents = ARRAY_SIZE(cam_cc_parent_data_3),
+               .flags = CLK_SET_RATE_PARENT,
+               .ops = &clk_rcg2_shared_ops,
+       },
+};
+
+static const struct freq_tbl ftbl_cam_cc_ife_lite_clk_src[] = {
+       F(266666667, P_CAM_CC_PLL0_OUT_ODD, 1.5, 0, 0),
+       F(400000000, P_CAM_CC_PLL0_OUT_ODD, 1, 0, 0),
+       F(480000000, P_CAM_CC_PLL8_OUT_EVEN, 1, 0, 0),
+       { }
+};
+
+static struct clk_rcg2 cam_cc_ife_lite_clk_src = {
+       .cmd_rcgr = 0x13000,
+       .mnd_width = 0,
+       .hid_width = 5,
+       .parent_map = cam_cc_parent_map_0,
+       .freq_tbl = ftbl_cam_cc_ife_lite_clk_src,
+       .clkr.hw.init = &(const struct clk_init_data) {
+               .name = "cam_cc_ife_lite_clk_src",
+               .parent_data = cam_cc_parent_data_0,
+               .num_parents = ARRAY_SIZE(cam_cc_parent_data_0),
+               .flags = CLK_SET_RATE_PARENT,
+               .ops = &clk_rcg2_shared_ops,
+       },
+};
+
+static struct clk_rcg2 cam_cc_ife_lite_csid_clk_src = {
+       .cmd_rcgr = 0x1313c,
+       .mnd_width = 0,
+       .hid_width = 5,
+       .parent_map = cam_cc_parent_map_0,
+       .freq_tbl = ftbl_cam_cc_ife_lite_clk_src,
+       .clkr.hw.init = &(const struct clk_init_data) {
+               .name = "cam_cc_ife_lite_csid_clk_src",
+               .parent_data = cam_cc_parent_data_0,
+               .num_parents = ARRAY_SIZE(cam_cc_parent_data_0),
+               .flags = CLK_SET_RATE_PARENT,
+               .ops = &clk_rcg2_shared_ops,
+       },
+};
+
+static const struct freq_tbl ftbl_cam_cc_ipe_nps_clk_src[] = {
+       F(304000000, P_CAM_CC_PLL1_OUT_EVEN, 1, 0, 0),
+       F(364000000, P_CAM_CC_PLL1_OUT_EVEN, 1, 0, 0),
+       F(500000000, P_CAM_CC_PLL1_OUT_EVEN, 1, 0, 0),
+       F(600000000, P_CAM_CC_PLL1_OUT_EVEN, 1, 0, 0),
+       F(700000000, P_CAM_CC_PLL1_OUT_EVEN, 1, 0, 0),
+       { }
+};
+
+static struct clk_rcg2 cam_cc_ipe_nps_clk_src = {
+       .cmd_rcgr = 0x103cc,
+       .mnd_width = 0,
+       .hid_width = 5,
+       .parent_map = cam_cc_parent_map_4,
+       .freq_tbl = ftbl_cam_cc_ipe_nps_clk_src,
+       .clkr.hw.init = &(const struct clk_init_data) {
+               .name = "cam_cc_ipe_nps_clk_src",
+               .parent_data = cam_cc_parent_data_4,
+               .num_parents = ARRAY_SIZE(cam_cc_parent_data_4),
+               .flags = CLK_SET_RATE_PARENT,
+               .ops = &clk_rcg2_shared_ops,
+       },
+};
+
+static const struct freq_tbl ftbl_cam_cc_jpeg_clk_src[] = {
+       F(19200000, P_BI_TCXO, 1, 0, 0),
+       F(160000000, P_CAM_CC_PLL0_OUT_ODD, 2.5, 0, 0),
+       F(200000000, P_CAM_CC_PLL0_OUT_ODD, 2, 0, 0),
+       F(400000000, P_CAM_CC_PLL0_OUT_ODD, 1, 0, 0),
+       F(480000000, P_CAM_CC_PLL8_OUT_EVEN, 1, 0, 0),
+       F(600000000, P_CAM_CC_PLL0_OUT_EVEN, 1, 0, 0),
+       { }
+};
+
+static struct clk_rcg2 cam_cc_jpeg_clk_src = {
+       .cmd_rcgr = 0x133dc,
+       .mnd_width = 0,
+       .hid_width = 5,
+       .parent_map = cam_cc_parent_map_0,
+       .freq_tbl = ftbl_cam_cc_jpeg_clk_src,
+       .clkr.hw.init = &(const struct clk_init_data) {
+               .name = "cam_cc_jpeg_clk_src",
+               .parent_data = cam_cc_parent_data_0,
+               .num_parents = ARRAY_SIZE(cam_cc_parent_data_0),
+               .flags = CLK_SET_RATE_PARENT,
+               .ops = &clk_rcg2_shared_ops,
+       },
+};
+
+static const struct freq_tbl ftbl_cam_cc_mclk0_clk_src[] = {
+       F(19200000, P_BI_TCXO, 1, 0, 0),
+       F(24000000, P_CAM_CC_PLL2_OUT_MAIN, 10, 1, 4),
+       F(68571429, P_CAM_CC_PLL2_OUT_MAIN, 14, 0, 0),
+       { }
+};
+
+static struct clk_rcg2 cam_cc_mclk0_clk_src = {
+       .cmd_rcgr = 0x15000,
+       .mnd_width = 8,
+       .hid_width = 5,
+       .parent_map = cam_cc_parent_map_1,
+       .freq_tbl = ftbl_cam_cc_mclk0_clk_src,
+       .clkr.hw.init = &(const struct clk_init_data) {
+               .name = "cam_cc_mclk0_clk_src",
+               .parent_data = cam_cc_parent_data_1,
+               .num_parents = ARRAY_SIZE(cam_cc_parent_data_1),
+               .flags = CLK_SET_RATE_PARENT,
+               .ops = &clk_rcg2_ops,
+       },
+};
+
+static struct clk_rcg2 cam_cc_mclk1_clk_src = {
+       .cmd_rcgr = 0x1501c,
+       .mnd_width = 8,
+       .hid_width = 5,
+       .parent_map = cam_cc_parent_map_1,
+       .freq_tbl = ftbl_cam_cc_mclk0_clk_src,
+       .clkr.hw.init = &(const struct clk_init_data) {
+               .name = "cam_cc_mclk1_clk_src",
+               .parent_data = cam_cc_parent_data_1,
+               .num_parents = ARRAY_SIZE(cam_cc_parent_data_1),
+               .flags = CLK_SET_RATE_PARENT,
+               .ops = &clk_rcg2_ops,
+       },
+};
+
+static struct clk_rcg2 cam_cc_mclk2_clk_src = {
+       .cmd_rcgr = 0x15038,
+       .mnd_width = 8,
+       .hid_width = 5,
+       .parent_map = cam_cc_parent_map_1,
+       .freq_tbl = ftbl_cam_cc_mclk0_clk_src,
+       .clkr.hw.init = &(const struct clk_init_data) {
+               .name = "cam_cc_mclk2_clk_src",
+               .parent_data = cam_cc_parent_data_1,
+               .num_parents = ARRAY_SIZE(cam_cc_parent_data_1),
+               .flags = CLK_SET_RATE_PARENT,
+               .ops = &clk_rcg2_ops,
+       },
+};
+
+static struct clk_rcg2 cam_cc_mclk3_clk_src = {
+       .cmd_rcgr = 0x15054,
+       .mnd_width = 8,
+       .hid_width = 5,
+       .parent_map = cam_cc_parent_map_1,
+       .freq_tbl = ftbl_cam_cc_mclk0_clk_src,
+       .clkr.hw.init = &(const struct clk_init_data) {
+               .name = "cam_cc_mclk3_clk_src",
+               .parent_data = cam_cc_parent_data_1,
+               .num_parents = ARRAY_SIZE(cam_cc_parent_data_1),
+               .flags = CLK_SET_RATE_PARENT,
+               .ops = &clk_rcg2_ops,
+       },
+};
+
+static struct clk_rcg2 cam_cc_mclk4_clk_src = {
+       .cmd_rcgr = 0x15070,
+       .mnd_width = 8,
+       .hid_width = 5,
+       .parent_map = cam_cc_parent_map_1,
+       .freq_tbl = ftbl_cam_cc_mclk0_clk_src,
+       .clkr.hw.init = &(const struct clk_init_data) {
+               .name = "cam_cc_mclk4_clk_src",
+               .parent_data = cam_cc_parent_data_1,
+               .num_parents = ARRAY_SIZE(cam_cc_parent_data_1),
+               .flags = CLK_SET_RATE_PARENT,
+               .ops = &clk_rcg2_ops,
+       },
+};
+
+static struct clk_rcg2 cam_cc_mclk5_clk_src = {
+       .cmd_rcgr = 0x1508c,
+       .mnd_width = 8,
+       .hid_width = 5,
+       .parent_map = cam_cc_parent_map_1,
+       .freq_tbl = ftbl_cam_cc_mclk0_clk_src,
+       .clkr.hw.init = &(const struct clk_init_data) {
+               .name = "cam_cc_mclk5_clk_src",
+               .parent_data = cam_cc_parent_data_1,
+               .num_parents = ARRAY_SIZE(cam_cc_parent_data_1),
+               .flags = CLK_SET_RATE_PARENT,
+               .ops = &clk_rcg2_ops,
+       },
+};
+
+static struct clk_rcg2 cam_cc_mclk6_clk_src = {
+       .cmd_rcgr = 0x150a8,
+       .mnd_width = 8,
+       .hid_width = 5,
+       .parent_map = cam_cc_parent_map_1,
+       .freq_tbl = ftbl_cam_cc_mclk0_clk_src,
+       .clkr.hw.init = &(const struct clk_init_data) {
+               .name = "cam_cc_mclk6_clk_src",
+               .parent_data = cam_cc_parent_data_1,
+               .num_parents = ARRAY_SIZE(cam_cc_parent_data_1),
+               .flags = CLK_SET_RATE_PARENT,
+               .ops = &clk_rcg2_ops,
+       },
+};
+
+static struct clk_rcg2 cam_cc_mclk7_clk_src = {
+       .cmd_rcgr = 0x150c4,
+       .mnd_width = 8,
+       .hid_width = 5,
+       .parent_map = cam_cc_parent_map_1,
+       .freq_tbl = ftbl_cam_cc_mclk0_clk_src,
+       .clkr.hw.init = &(const struct clk_init_data) {
+               .name = "cam_cc_mclk7_clk_src",
+               .parent_data = cam_cc_parent_data_1,
+               .num_parents = ARRAY_SIZE(cam_cc_parent_data_1),
+               .flags = CLK_SET_RATE_PARENT,
+               .ops = &clk_rcg2_ops,
+       },
+};
+
+static const struct freq_tbl ftbl_cam_cc_sfe_0_clk_src[] = {
+       F(345600000, P_CAM_CC_PLL6_OUT_EVEN, 1, 0, 0),
+       F(432000000, P_CAM_CC_PLL6_OUT_EVEN, 1, 0, 0),
+       F(594000000, P_CAM_CC_PLL6_OUT_EVEN, 1, 0, 0),
+       F(675000000, P_CAM_CC_PLL6_OUT_EVEN, 1, 0, 0),
+       F(727000000, P_CAM_CC_PLL6_OUT_EVEN, 1, 0, 0),
+       { }
+};
+
+static struct clk_rcg2 cam_cc_sfe_0_clk_src = {
+       .cmd_rcgr = 0x13294,
+       .mnd_width = 0,
+       .hid_width = 5,
+       .parent_map = cam_cc_parent_map_5,
+       .freq_tbl = ftbl_cam_cc_sfe_0_clk_src,
+       .clkr.hw.init = &(const struct clk_init_data) {
+               .name = "cam_cc_sfe_0_clk_src",
+               .parent_data = cam_cc_parent_data_5,
+               .num_parents = ARRAY_SIZE(cam_cc_parent_data_5),
+               .flags = CLK_SET_RATE_PARENT,
+               .ops = &clk_rcg2_shared_ops,
+       },
+};
+
+static const struct freq_tbl ftbl_cam_cc_sleep_clk_src[] = {
+       F(32000, P_SLEEP_CLK, 1, 0, 0),
+       { }
+};
+
+static struct clk_rcg2 cam_cc_sleep_clk_src = {
+       .cmd_rcgr = 0x13aa0,
+       .mnd_width = 0,
+       .hid_width = 5,
+       .parent_map = cam_cc_parent_map_6,
+       .freq_tbl = ftbl_cam_cc_sleep_clk_src,
+       .clkr.hw.init = &(const struct clk_init_data) {
+               .name = "cam_cc_sleep_clk_src",
+               .parent_data = cam_cc_parent_data_6_ao,
+               .num_parents = ARRAY_SIZE(cam_cc_parent_data_6_ao),
+               .flags = CLK_SET_RATE_PARENT,
+               .ops = &clk_rcg2_ops,
+       },
+};
+
+static const struct freq_tbl ftbl_cam_cc_slow_ahb_clk_src[] = {
+       F(19200000, P_BI_TCXO, 1, 0, 0),
+       F(64000000, P_CAM_CC_PLL8_OUT_EVEN, 7.5, 0, 0),
+       F(80000000, P_CAM_CC_PLL0_OUT_EVEN, 7.5, 0, 0),
+       { }
+};
+
+static struct clk_rcg2 cam_cc_slow_ahb_clk_src = {
+       .cmd_rcgr = 0x10148,
+       .mnd_width = 8,
+       .hid_width = 5,
+       .parent_map = cam_cc_parent_map_0,
+       .freq_tbl = ftbl_cam_cc_slow_ahb_clk_src,
+       .clkr.hw.init = &(const struct clk_init_data) {
+               .name = "cam_cc_slow_ahb_clk_src",
+               .parent_data = cam_cc_parent_data_0,
+               .num_parents = ARRAY_SIZE(cam_cc_parent_data_0),
+               .flags = CLK_SET_RATE_PARENT,
+               .ops = &clk_rcg2_shared_ops,
+       },
+};
+
+static const struct freq_tbl ftbl_cam_cc_xo_clk_src[] = {
+       F(19200000, P_BI_TCXO, 1, 0, 0),
+       { }
+};
+
+static struct clk_rcg2 cam_cc_xo_clk_src = {
+       .cmd_rcgr = 0x13a84,
+       .mnd_width = 0,
+       .hid_width = 5,
+       .parent_map = cam_cc_parent_map_7,
+       .freq_tbl = ftbl_cam_cc_xo_clk_src,
+       .clkr.hw.init = &(const struct clk_init_data) {
+               .name = "cam_cc_xo_clk_src",
+               .parent_data = cam_cc_parent_data_7_ao,
+               .num_parents = ARRAY_SIZE(cam_cc_parent_data_7_ao),
+               .flags = CLK_SET_RATE_PARENT,
+               .ops = &clk_rcg2_ops,
+       },
+};
+
+static struct clk_branch cam_cc_bps_ahb_clk = {
+       .halt_reg = 0x10274,
+       .halt_check = BRANCH_HALT,
+       .clkr = {
+               .enable_reg = 0x10274,
+               .enable_mask = BIT(0),
+               .hw.init = &(const struct clk_init_data) {
+                       .name = "cam_cc_bps_ahb_clk",
+                       .parent_hws = (const struct clk_hw*[]) {
+                               &cam_cc_slow_ahb_clk_src.clkr.hw,
+                       },
+                       .num_parents = 1,
+                       .flags = CLK_SET_RATE_PARENT,
+                       .ops = &clk_branch2_ops,
+               },
+       },
+};
+
+static struct clk_branch cam_cc_bps_clk = {
+       .halt_reg = 0x103a4,
+       .halt_check = BRANCH_HALT,
+       .clkr = {
+               .enable_reg = 0x103a4,
+               .enable_mask = BIT(0),
+               .hw.init = &(const struct clk_init_data) {
+                       .name = "cam_cc_bps_clk",
+                       .parent_hws = (const struct clk_hw*[]) {
+                               &cam_cc_bps_clk_src.clkr.hw,
+                       },
+                       .num_parents = 1,
+                       .flags = CLK_SET_RATE_PARENT,
+                       .ops = &clk_branch2_ops,
+               },
+       },
+};
+
+static struct clk_branch cam_cc_bps_fast_ahb_clk = {
+       .halt_reg = 0x10144,
+       .halt_check = BRANCH_HALT,
+       .clkr = {
+               .enable_reg = 0x10144,
+               .enable_mask = BIT(0),
+               .hw.init = &(const struct clk_init_data) {
+                       .name = "cam_cc_bps_fast_ahb_clk",
+                       .parent_hws = (const struct clk_hw*[]) {
+                               &cam_cc_fast_ahb_clk_src.clkr.hw,
+                       },
+                       .num_parents = 1,
+                       .flags = CLK_SET_RATE_PARENT,
+                       .ops = &clk_branch2_ops,
+               },
+       },
+};
+
+static struct clk_branch cam_cc_camnoc_axi_nrt_clk = {
+       .halt_reg = 0x13920,
+       .halt_check = BRANCH_HALT,
+       .clkr = {
+               .enable_reg = 0x13920,
+               .enable_mask = BIT(0),
+               .hw.init = &(const struct clk_init_data) {
+                       .name = "cam_cc_camnoc_axi_nrt_clk",
+                       .parent_hws = (const struct clk_hw*[]) {
+                               &cam_cc_camnoc_axi_rt_clk_src.clkr.hw,
+                       },
+                       .num_parents = 1,
+                       .flags = CLK_SET_RATE_PARENT,
+                       .ops = &clk_branch2_ops,
+               },
+       },
+};
+
+static struct clk_branch cam_cc_camnoc_axi_rt_clk = {
+       .halt_reg = 0x13910,
+       .halt_check = BRANCH_HALT,
+       .clkr = {
+               .enable_reg = 0x13910,
+               .enable_mask = BIT(0),
+               .hw.init = &(const struct clk_init_data) {
+                       .name = "cam_cc_camnoc_axi_rt_clk",
+                       .parent_hws = (const struct clk_hw*[]) {
+                               &cam_cc_camnoc_axi_rt_clk_src.clkr.hw,
+                       },
+                       .num_parents = 1,
+                       .flags = CLK_SET_RATE_PARENT,
+                       .ops = &clk_branch2_ops,
+               },
+       },
+};
+
+static struct clk_branch cam_cc_camnoc_dcd_xo_clk = {
+       .halt_reg = 0x1392c,
+       .halt_check = BRANCH_HALT,
+       .clkr = {
+               .enable_reg = 0x1392c,
+               .enable_mask = BIT(0),
+               .hw.init = &(const struct clk_init_data) {
+                       .name = "cam_cc_camnoc_dcd_xo_clk",
+                       .parent_hws = (const struct clk_hw*[]) {
+                               &cam_cc_xo_clk_src.clkr.hw,
+                       },
+                       .num_parents = 1,
+                       .flags = CLK_SET_RATE_PARENT,
+                       .ops = &clk_branch2_ops,
+               },
+       },
+};
+
+static struct clk_branch cam_cc_camnoc_xo_clk = {
+       .halt_reg = 0x13930,
+       .halt_check = BRANCH_HALT,
+       .clkr = {
+               .enable_reg = 0x13930,
+               .enable_mask = BIT(0),
+               .hw.init = &(const struct clk_init_data) {
+                       .name = "cam_cc_camnoc_xo_clk",
+                       .parent_hws = (const struct clk_hw*[]) {
+                               &cam_cc_xo_clk_src.clkr.hw,
+                       },
+                       .num_parents = 1,
+                       .flags = CLK_SET_RATE_PARENT,
+                       .ops = &clk_branch2_ops,
+               },
+       },
+};
+
+static struct clk_branch cam_cc_cci_0_clk = {
+       .halt_reg = 0x13788,
+       .halt_check = BRANCH_HALT,
+       .clkr = {
+               .enable_reg = 0x13788,
+               .enable_mask = BIT(0),
+               .hw.init = &(const struct clk_init_data) {
+                       .name = "cam_cc_cci_0_clk",
+                       .parent_hws = (const struct clk_hw*[]) {
+                               &cam_cc_cci_0_clk_src.clkr.hw,
+                       },
+                       .num_parents = 1,
+                       .flags = CLK_SET_RATE_PARENT,
+                       .ops = &clk_branch2_ops,
+               },
+       },
+};
+
+static struct clk_branch cam_cc_cci_1_clk = {
+       .halt_reg = 0x138b8,
+       .halt_check = BRANCH_HALT,
+       .clkr = {
+               .enable_reg = 0x138b8,
+               .enable_mask = BIT(0),
+               .hw.init = &(const struct clk_init_data) {
+                       .name = "cam_cc_cci_1_clk",
+                       .parent_hws = (const struct clk_hw*[]) {
+                               &cam_cc_cci_1_clk_src.clkr.hw,
+                       },
+                       .num_parents = 1,
+                       .flags = CLK_SET_RATE_PARENT,
+                       .ops = &clk_branch2_ops,
+               },
+       },
+};
+
+static struct clk_branch cam_cc_core_ahb_clk = {
+       .halt_reg = 0x13a80,
+       .halt_check = BRANCH_HALT_VOTED,
+       .clkr = {
+               .enable_reg = 0x13a80,
+               .enable_mask = BIT(0),
+               .hw.init = &(const struct clk_init_data) {
+                       .name = "cam_cc_core_ahb_clk",
+                       .parent_hws = (const struct clk_hw*[]) {
+                               &cam_cc_slow_ahb_clk_src.clkr.hw,
+                       },
+                       .num_parents = 1,
+                       .flags = CLK_SET_RATE_PARENT,
+                       .ops = &clk_branch2_ops,
+               },
+       },
+};
+
+static struct clk_branch cam_cc_cpas_ahb_clk = {
+       .halt_reg = 0x138bc,
+       .halt_check = BRANCH_HALT,
+       .clkr = {
+               .enable_reg = 0x138bc,
+               .enable_mask = BIT(0),
+               .hw.init = &(const struct clk_init_data) {
+                       .name = "cam_cc_cpas_ahb_clk",
+                       .parent_hws = (const struct clk_hw*[]) {
+                               &cam_cc_slow_ahb_clk_src.clkr.hw,
+                       },
+                       .num_parents = 1,
+                       .flags = CLK_SET_RATE_PARENT,
+                       .ops = &clk_branch2_ops,
+               },
+       },
+};
+
+static struct clk_branch cam_cc_cpas_bps_clk = {
+       .halt_reg = 0x103b0,
+       .halt_check = BRANCH_HALT,
+       .clkr = {
+               .enable_reg = 0x103b0,
+               .enable_mask = BIT(0),
+               .hw.init = &(const struct clk_init_data) {
+                       .name = "cam_cc_cpas_bps_clk",
+                       .parent_hws = (const struct clk_hw*[]) {
+                               &cam_cc_bps_clk_src.clkr.hw,
+                       },
+                       .num_parents = 1,
+                       .flags = CLK_SET_RATE_PARENT,
+                       .ops = &clk_branch2_ops,
+               },
+       },
+};
+
+static struct clk_branch cam_cc_cpas_fast_ahb_clk = {
+       .halt_reg = 0x138c8,
+       .halt_check = BRANCH_HALT,
+       .clkr = {
+               .enable_reg = 0x138c8,
+               .enable_mask = BIT(0),
+               .hw.init = &(const struct clk_init_data) {
+                       .name = "cam_cc_cpas_fast_ahb_clk",
+                       .parent_hws = (const struct clk_hw*[]) {
+                               &cam_cc_fast_ahb_clk_src.clkr.hw,
+                       },
+                       .num_parents = 1,
+                       .flags = CLK_SET_RATE_PARENT,
+                       .ops = &clk_branch2_ops,
+               },
+       },
+};
+
+static struct clk_branch cam_cc_cpas_ife_0_clk = {
+       .halt_reg = 0x11150,
+       .halt_check = BRANCH_HALT,
+       .clkr = {
+               .enable_reg = 0x11150,
+               .enable_mask = BIT(0),
+               .hw.init = &(const struct clk_init_data) {
+                       .name = "cam_cc_cpas_ife_0_clk",
+                       .parent_hws = (const struct clk_hw*[]) {
+                               &cam_cc_ife_0_clk_src.clkr.hw,
+                       },
+                       .num_parents = 1,
+                       .flags = CLK_SET_RATE_PARENT,
+                       .ops = &clk_branch2_ops,
+               },
+       },
+};
+
+static struct clk_branch cam_cc_cpas_ife_1_clk = {
+       .halt_reg = 0x1203c,
+       .halt_check = BRANCH_HALT,
+       .clkr = {
+               .enable_reg = 0x1203c,
+               .enable_mask = BIT(0),
+               .hw.init = &(const struct clk_init_data) {
+                       .name = "cam_cc_cpas_ife_1_clk",
+                       .parent_hws = (const struct clk_hw*[]) {
+                               &cam_cc_ife_1_clk_src.clkr.hw,
+                       },
+                       .num_parents = 1,
+                       .flags = CLK_SET_RATE_PARENT,
+                       .ops = &clk_branch2_ops,
+               },
+       },
+};
+
+static struct clk_branch cam_cc_cpas_ife_lite_clk = {
+       .halt_reg = 0x13138,
+       .halt_check = BRANCH_HALT,
+       .clkr = {
+               .enable_reg = 0x13138,
+               .enable_mask = BIT(0),
+               .hw.init = &(const struct clk_init_data) {
+                       .name = "cam_cc_cpas_ife_lite_clk",
+                       .parent_hws = (const struct clk_hw*[]) {
+                               &cam_cc_ife_lite_clk_src.clkr.hw,
+                       },
+                       .num_parents = 1,
+                       .flags = CLK_SET_RATE_PARENT,
+                       .ops = &clk_branch2_ops,
+               },
+       },
+};
+
+static struct clk_branch cam_cc_cpas_ipe_nps_clk = {
+       .halt_reg = 0x10504,
+       .halt_check = BRANCH_HALT,
+       .clkr = {
+               .enable_reg = 0x10504,
+               .enable_mask = BIT(0),
+               .hw.init = &(const struct clk_init_data) {
+                       .name = "cam_cc_cpas_ipe_nps_clk",
+                       .parent_hws = (const struct clk_hw*[]) {
+                               &cam_cc_ipe_nps_clk_src.clkr.hw,
+                       },
+                       .num_parents = 1,
+                       .flags = CLK_SET_RATE_PARENT,
+                       .ops = &clk_branch2_ops,
+               },
+       },
+};
+
+static struct clk_branch cam_cc_cpas_sfe_0_clk = {
+       .halt_reg = 0x133cc,
+       .halt_check = BRANCH_HALT,
+       .clkr = {
+               .enable_reg = 0x133cc,
+               .enable_mask = BIT(0),
+               .hw.init = &(const struct clk_init_data) {
+                       .name = "cam_cc_cpas_sfe_0_clk",
+                       .parent_hws = (const struct clk_hw*[]) {
+                               &cam_cc_sfe_0_clk_src.clkr.hw,
+                       },
+                       .num_parents = 1,
+                       .flags = CLK_SET_RATE_PARENT,
+                       .ops = &clk_branch2_ops,
+               },
+       },
+};
+
+static struct clk_branch cam_cc_csi0phytimer_clk = {
+       .halt_reg = 0x150f8,
+       .halt_check = BRANCH_HALT,
+       .clkr = {
+               .enable_reg = 0x150f8,
+               .enable_mask = BIT(0),
+               .hw.init = &(const struct clk_init_data) {
+                       .name = "cam_cc_csi0phytimer_clk",
+                       .parent_hws = (const struct clk_hw*[]) {
+                               &cam_cc_csi0phytimer_clk_src.clkr.hw,
+                       },
+                       .num_parents = 1,
+                       .flags = CLK_SET_RATE_PARENT,
+                       .ops = &clk_branch2_ops,
+               },
+       },
+};
+
+static struct clk_branch cam_cc_csi1phytimer_clk = {
+       .halt_reg = 0x1511c,
+       .halt_check = BRANCH_HALT,
+       .clkr = {
+               .enable_reg = 0x1511c,
+               .enable_mask = BIT(0),
+               .hw.init = &(const struct clk_init_data) {
+                       .name = "cam_cc_csi1phytimer_clk",
+                       .parent_hws = (const struct clk_hw*[]) {
+                               &cam_cc_csi1phytimer_clk_src.clkr.hw,
+                       },
+                       .num_parents = 1,
+                       .flags = CLK_SET_RATE_PARENT,
+                       .ops = &clk_branch2_ops,
+               },
+       },
+};
+
+static struct clk_branch cam_cc_csi2phytimer_clk = {
+       .halt_reg = 0x15250,
+       .halt_check = BRANCH_HALT,
+       .clkr = {
+               .enable_reg = 0x15250,
+               .enable_mask = BIT(0),
+               .hw.init = &(const struct clk_init_data) {
+                       .name = "cam_cc_csi2phytimer_clk",
+                       .parent_hws = (const struct clk_hw*[]) {
+                               &cam_cc_csi2phytimer_clk_src.clkr.hw,
+                       },
+                       .num_parents = 1,
+                       .flags = CLK_SET_RATE_PARENT,
+                       .ops = &clk_branch2_ops,
+               },
+       },
+};
+
+static struct clk_branch cam_cc_csi3phytimer_clk = {
+       .halt_reg = 0x15384,
+       .halt_check = BRANCH_HALT,
+       .clkr = {
+               .enable_reg = 0x15384,
+               .enable_mask = BIT(0),
+               .hw.init = &(const struct clk_init_data) {
+                       .name = "cam_cc_csi3phytimer_clk",
+                       .parent_hws = (const struct clk_hw*[]) {
+                               &cam_cc_csi3phytimer_clk_src.clkr.hw,
+                       },
+                       .num_parents = 1,
+                       .flags = CLK_SET_RATE_PARENT,
+                       .ops = &clk_branch2_ops,
+               },
+       },
+};
+
+static struct clk_branch cam_cc_csi4phytimer_clk = {
+       .halt_reg = 0x154b8,
+       .halt_check = BRANCH_HALT,
+       .clkr = {
+               .enable_reg = 0x154b8,
+               .enable_mask = BIT(0),
+               .hw.init = &(const struct clk_init_data) {
+                       .name = "cam_cc_csi4phytimer_clk",
+                       .parent_hws = (const struct clk_hw*[]) {
+                               &cam_cc_csi4phytimer_clk_src.clkr.hw,
+                       },
+                       .num_parents = 1,
+                       .flags = CLK_SET_RATE_PARENT,
+                       .ops = &clk_branch2_ops,
+               },
+       },
+};
+
+static struct clk_branch cam_cc_csi5phytimer_clk = {
+       .halt_reg = 0x155ec,
+       .halt_check = BRANCH_HALT,
+       .clkr = {
+               .enable_reg = 0x155ec,
+               .enable_mask = BIT(0),
+               .hw.init = &(const struct clk_init_data) {
+                       .name = "cam_cc_csi5phytimer_clk",
+                       .parent_hws = (const struct clk_hw*[]) {
+                               &cam_cc_csi5phytimer_clk_src.clkr.hw,
+                       },
+                       .num_parents = 1,
+                       .flags = CLK_SET_RATE_PARENT,
+                       .ops = &clk_branch2_ops,
+               },
+       },
+};
+
+static struct clk_branch cam_cc_csid_clk = {
+       .halt_reg = 0x138ec,
+       .halt_check = BRANCH_HALT,
+       .clkr = {
+               .enable_reg = 0x138ec,
+               .enable_mask = BIT(0),
+               .hw.init = &(const struct clk_init_data) {
+                       .name = "cam_cc_csid_clk",
+                       .parent_hws = (const struct clk_hw*[]) {
+                               &cam_cc_csid_clk_src.clkr.hw,
+                       },
+                       .num_parents = 1,
+                       .flags = CLK_SET_RATE_PARENT,
+                       .ops = &clk_branch2_ops,
+               },
+       },
+};
+
+static struct clk_branch cam_cc_csid_csiphy_rx_clk = {
+       .halt_reg = 0x15100,
+       .halt_check = BRANCH_HALT,
+       .clkr = {
+               .enable_reg = 0x15100,
+               .enable_mask = BIT(0),
+               .hw.init = &(const struct clk_init_data) {
+                       .name = "cam_cc_csid_csiphy_rx_clk",
+                       .parent_hws = (const struct clk_hw*[]) {
+                               &cam_cc_cphy_rx_clk_src.clkr.hw,
+                       },
+                       .num_parents = 1,
+                       .flags = CLK_SET_RATE_PARENT,
+                       .ops = &clk_branch2_ops,
+               },
+       },
+};
+
+static struct clk_branch cam_cc_csiphy0_clk = {
+       .halt_reg = 0x150fc,
+       .halt_check = BRANCH_HALT,
+       .clkr = {
+               .enable_reg = 0x150fc,
+               .enable_mask = BIT(0),
+               .hw.init = &(const struct clk_init_data) {
+                       .name = "cam_cc_csiphy0_clk",
+                       .parent_hws = (const struct clk_hw*[]) {
+                               &cam_cc_cphy_rx_clk_src.clkr.hw,
+                       },
+                       .num_parents = 1,
+                       .flags = CLK_SET_RATE_PARENT,
+                       .ops = &clk_branch2_ops,
+               },
+       },
+};
+
+static struct clk_branch cam_cc_csiphy1_clk = {
+       .halt_reg = 0x15120,
+       .halt_check = BRANCH_HALT,
+       .clkr = {
+               .enable_reg = 0x15120,
+               .enable_mask = BIT(0),
+               .hw.init = &(const struct clk_init_data) {
+                       .name = "cam_cc_csiphy1_clk",
+                       .parent_hws = (const struct clk_hw*[]) {
+                               &cam_cc_cphy_rx_clk_src.clkr.hw,
+                       },
+                       .num_parents = 1,
+                       .flags = CLK_SET_RATE_PARENT,
+                       .ops = &clk_branch2_ops,
+               },
+       },
+};
+
+static struct clk_branch cam_cc_csiphy2_clk = {
+       .halt_reg = 0x15254,
+       .halt_check = BRANCH_HALT,
+       .clkr = {
+               .enable_reg = 0x15254,
+               .enable_mask = BIT(0),
+               .hw.init = &(const struct clk_init_data) {
+                       .name = "cam_cc_csiphy2_clk",
+                       .parent_hws = (const struct clk_hw*[]) {
+                               &cam_cc_cphy_rx_clk_src.clkr.hw,
+                       },
+                       .num_parents = 1,
+                       .flags = CLK_SET_RATE_PARENT,
+                       .ops = &clk_branch2_ops,
+               },
+       },
+};
+
+static struct clk_branch cam_cc_csiphy3_clk = {
+       .halt_reg = 0x15388,
+       .halt_check = BRANCH_HALT,
+       .clkr = {
+               .enable_reg = 0x15388,
+               .enable_mask = BIT(0),
+               .hw.init = &(const struct clk_init_data) {
+                       .name = "cam_cc_csiphy3_clk",
+                       .parent_hws = (const struct clk_hw*[]) {
+                               &cam_cc_cphy_rx_clk_src.clkr.hw,
+                       },
+                       .num_parents = 1,
+                       .flags = CLK_SET_RATE_PARENT,
+                       .ops = &clk_branch2_ops,
+               },
+       },
+};
+
+static struct clk_branch cam_cc_csiphy4_clk = {
+       .halt_reg = 0x154bc,
+       .halt_check = BRANCH_HALT,
+       .clkr = {
+               .enable_reg = 0x154bc,
+               .enable_mask = BIT(0),
+               .hw.init = &(const struct clk_init_data) {
+                       .name = "cam_cc_csiphy4_clk",
+                       .parent_hws = (const struct clk_hw*[]) {
+                               &cam_cc_cphy_rx_clk_src.clkr.hw,
+                       },
+                       .num_parents = 1,
+                       .flags = CLK_SET_RATE_PARENT,
+                       .ops = &clk_branch2_ops,
+               },
+       },
+};
+
+static struct clk_branch cam_cc_csiphy5_clk = {
+       .halt_reg = 0x155f0,
+       .halt_check = BRANCH_HALT,
+       .clkr = {
+               .enable_reg = 0x155f0,
+               .enable_mask = BIT(0),
+               .hw.init = &(const struct clk_init_data) {
+                       .name = "cam_cc_csiphy5_clk",
+                       .parent_hws = (const struct clk_hw*[]) {
+                               &cam_cc_cphy_rx_clk_src.clkr.hw,
+                       },
+                       .num_parents = 1,
+                       .flags = CLK_SET_RATE_PARENT,
+                       .ops = &clk_branch2_ops,
+               },
+       },
+};
+
+static struct clk_branch cam_cc_icp_ahb_clk = {
+       .halt_reg = 0x13658,
+       .halt_check = BRANCH_HALT,
+       .clkr = {
+               .enable_reg = 0x13658,
+               .enable_mask = BIT(0),
+               .hw.init = &(const struct clk_init_data) {
+                       .name = "cam_cc_icp_ahb_clk",
+                       .parent_hws = (const struct clk_hw*[]) {
+                               &cam_cc_slow_ahb_clk_src.clkr.hw,
+                       },
+                       .num_parents = 1,
+                       .flags = CLK_SET_RATE_PARENT,
+                       .ops = &clk_branch2_ops,
+               },
+       },
+};
+
+static struct clk_branch cam_cc_icp_clk = {
+       .halt_reg = 0x1364c,
+       .halt_check = BRANCH_HALT,
+       .clkr = {
+               .enable_reg = 0x1364c,
+               .enable_mask = BIT(0),
+               .hw.init = &(const struct clk_init_data) {
+                       .name = "cam_cc_icp_clk",
+                       .parent_hws = (const struct clk_hw*[]) {
+                               &cam_cc_icp_clk_src.clkr.hw,
+                       },
+                       .num_parents = 1,
+                       .flags = CLK_SET_RATE_PARENT,
+                       .ops = &clk_branch2_ops,
+               },
+       },
+};
+
+static struct clk_branch cam_cc_ife_0_clk = {
+       .halt_reg = 0x11144,
+       .halt_check = BRANCH_HALT,
+       .clkr = {
+               .enable_reg = 0x11144,
+               .enable_mask = BIT(0),
+               .hw.init = &(const struct clk_init_data) {
+                       .name = "cam_cc_ife_0_clk",
+                       .parent_hws = (const struct clk_hw*[]) {
+                               &cam_cc_ife_0_clk_src.clkr.hw,
+                       },
+                       .num_parents = 1,
+                       .flags = CLK_SET_RATE_PARENT,
+                       .ops = &clk_branch2_ops,
+               },
+       },
+};
+
+static struct clk_branch cam_cc_ife_0_dsp_clk = {
+       .halt_reg = 0x11154,
+       .halt_check = BRANCH_HALT,
+       .clkr = {
+               .enable_reg = 0x11154,
+               .enable_mask = BIT(0),
+               .hw.init = &(const struct clk_init_data) {
+                       .name = "cam_cc_ife_0_dsp_clk",
+                       .parent_hws = (const struct clk_hw*[]) {
+                               &cam_cc_ife_0_clk_src.clkr.hw,
+                       },
+                       .num_parents = 1,
+                       .flags = CLK_SET_RATE_PARENT,
+                       .ops = &clk_branch2_ops,
+               },
+       },
+};
+
+static struct clk_branch cam_cc_ife_0_fast_ahb_clk = {
+       .halt_reg = 0x11160,
+       .halt_check = BRANCH_HALT,
+       .clkr = {
+               .enable_reg = 0x11160,
+               .enable_mask = BIT(0),
+               .hw.init = &(const struct clk_init_data) {
+                       .name = "cam_cc_ife_0_fast_ahb_clk",
+                       .parent_hws = (const struct clk_hw*[]) {
+                               &cam_cc_fast_ahb_clk_src.clkr.hw,
+                       },
+                       .num_parents = 1,
+                       .flags = CLK_SET_RATE_PARENT,
+                       .ops = &clk_branch2_ops,
+               },
+       },
+};
+
+static struct clk_branch cam_cc_ife_1_clk = {
+       .halt_reg = 0x12030,
+       .halt_check = BRANCH_HALT,
+       .clkr = {
+               .enable_reg = 0x12030,
+               .enable_mask = BIT(0),
+               .hw.init = &(const struct clk_init_data) {
+                       .name = "cam_cc_ife_1_clk",
+                       .parent_hws = (const struct clk_hw*[]) {
+                               &cam_cc_ife_1_clk_src.clkr.hw,
+                       },
+                       .num_parents = 1,
+                       .flags = CLK_SET_RATE_PARENT,
+                       .ops = &clk_branch2_ops,
+               },
+       },
+};
+
+static struct clk_branch cam_cc_ife_1_dsp_clk = {
+       .halt_reg = 0x12040,
+       .halt_check = BRANCH_HALT,
+       .clkr = {
+               .enable_reg = 0x12040,
+               .enable_mask = BIT(0),
+               .hw.init = &(const struct clk_init_data) {
+                       .name = "cam_cc_ife_1_dsp_clk",
+                       .parent_hws = (const struct clk_hw*[]) {
+                               &cam_cc_ife_1_clk_src.clkr.hw,
+                       },
+                       .num_parents = 1,
+                       .flags = CLK_SET_RATE_PARENT,
+                       .ops = &clk_branch2_ops,
+               },
+       },
+};
+
+static struct clk_branch cam_cc_ife_1_fast_ahb_clk = {
+       .halt_reg = 0x1204c,
+       .halt_check = BRANCH_HALT,
+       .clkr = {
+               .enable_reg = 0x1204c,
+               .enable_mask = BIT(0),
+               .hw.init = &(const struct clk_init_data) {
+                       .name = "cam_cc_ife_1_fast_ahb_clk",
+                       .parent_hws = (const struct clk_hw*[]) {
+                               &cam_cc_fast_ahb_clk_src.clkr.hw,
+                       },
+                       .num_parents = 1,
+                       .flags = CLK_SET_RATE_PARENT,
+                       .ops = &clk_branch2_ops,
+               },
+       },
+};
+
+static struct clk_branch cam_cc_ife_lite_ahb_clk = {
+       .halt_reg = 0x13278,
+       .halt_check = BRANCH_HALT,
+       .clkr = {
+               .enable_reg = 0x13278,
+               .enable_mask = BIT(0),
+               .hw.init = &(const struct clk_init_data) {
+                       .name = "cam_cc_ife_lite_ahb_clk",
+                       .parent_hws = (const struct clk_hw*[]) {
+                               &cam_cc_slow_ahb_clk_src.clkr.hw,
+                       },
+                       .num_parents = 1,
+                       .flags = CLK_SET_RATE_PARENT,
+                       .ops = &clk_branch2_ops,
+               },
+       },
+};
+
+static struct clk_branch cam_cc_ife_lite_clk = {
+       .halt_reg = 0x1312c,
+       .halt_check = BRANCH_HALT,
+       .clkr = {
+               .enable_reg = 0x1312c,
+               .enable_mask = BIT(0),
+               .hw.init = &(const struct clk_init_data) {
+                       .name = "cam_cc_ife_lite_clk",
+                       .parent_hws = (const struct clk_hw*[]) {
+                               &cam_cc_ife_lite_clk_src.clkr.hw,
+                       },
+                       .num_parents = 1,
+                       .flags = CLK_SET_RATE_PARENT,
+                       .ops = &clk_branch2_ops,
+               },
+       },
+};
+
+static struct clk_branch cam_cc_ife_lite_cphy_rx_clk = {
+       .halt_reg = 0x13274,
+       .halt_check = BRANCH_HALT,
+       .clkr = {
+               .enable_reg = 0x13274,
+               .enable_mask = BIT(0),
+               .hw.init = &(const struct clk_init_data) {
+                       .name = "cam_cc_ife_lite_cphy_rx_clk",
+                       .parent_hws = (const struct clk_hw*[]) {
+                               &cam_cc_cphy_rx_clk_src.clkr.hw,
+                       },
+                       .num_parents = 1,
+                       .flags = CLK_SET_RATE_PARENT,
+                       .ops = &clk_branch2_ops,
+               },
+       },
+};
+
+static struct clk_branch cam_cc_ife_lite_csid_clk = {
+       .halt_reg = 0x13268,
+       .halt_check = BRANCH_HALT,
+       .clkr = {
+               .enable_reg = 0x13268,
+               .enable_mask = BIT(0),
+               .hw.init = &(const struct clk_init_data) {
+                       .name = "cam_cc_ife_lite_csid_clk",
+                       .parent_hws = (const struct clk_hw*[]) {
+                               &cam_cc_ife_lite_csid_clk_src.clkr.hw,
+                       },
+                       .num_parents = 1,
+                       .flags = CLK_SET_RATE_PARENT,
+                       .ops = &clk_branch2_ops,
+               },
+       },
+};
+
+static struct clk_branch cam_cc_ipe_nps_ahb_clk = {
+       .halt_reg = 0x1051c,
+       .halt_check = BRANCH_HALT,
+       .clkr = {
+               .enable_reg = 0x1051c,
+               .enable_mask = BIT(0),
+               .hw.init = &(const struct clk_init_data) {
+                       .name = "cam_cc_ipe_nps_ahb_clk",
+                       .parent_hws = (const struct clk_hw*[]) {
+                               &cam_cc_slow_ahb_clk_src.clkr.hw,
+                       },
+                       .num_parents = 1,
+                       .flags = CLK_SET_RATE_PARENT,
+                       .ops = &clk_branch2_ops,
+               },
+       },
+};
+
+static struct clk_branch cam_cc_ipe_nps_clk = {
+       .halt_reg = 0x104f8,
+       .halt_check = BRANCH_HALT,
+       .clkr = {
+               .enable_reg = 0x104f8,
+               .enable_mask = BIT(0),
+               .hw.init = &(const struct clk_init_data) {
+                       .name = "cam_cc_ipe_nps_clk",
+                       .parent_hws = (const struct clk_hw*[]) {
+                               &cam_cc_ipe_nps_clk_src.clkr.hw,
+                       },
+                       .num_parents = 1,
+                       .flags = CLK_SET_RATE_PARENT,
+                       .ops = &clk_branch2_ops,
+               },
+       },
+};
+
+static struct clk_branch cam_cc_ipe_nps_fast_ahb_clk = {
+       .halt_reg = 0x10520,
+       .halt_check = BRANCH_HALT,
+       .clkr = {
+               .enable_reg = 0x10520,
+               .enable_mask = BIT(0),
+               .hw.init = &(const struct clk_init_data) {
+                       .name = "cam_cc_ipe_nps_fast_ahb_clk",
+                       .parent_hws = (const struct clk_hw*[]) {
+                               &cam_cc_fast_ahb_clk_src.clkr.hw,
+                       },
+                       .num_parents = 1,
+                       .flags = CLK_SET_RATE_PARENT,
+                       .ops = &clk_branch2_ops,
+               },
+       },
+};
+
+static struct clk_branch cam_cc_ipe_pps_clk = {
+       .halt_reg = 0x10508,
+       .halt_check = BRANCH_HALT,
+       .clkr = {
+               .enable_reg = 0x10508,
+               .enable_mask = BIT(0),
+               .hw.init = &(const struct clk_init_data) {
+                       .name = "cam_cc_ipe_pps_clk",
+                       .parent_hws = (const struct clk_hw*[]) {
+                               &cam_cc_ipe_nps_clk_src.clkr.hw,
+                       },
+                       .num_parents = 1,
+                       .flags = CLK_SET_RATE_PARENT,
+                       .ops = &clk_branch2_ops,
+               },
+       },
+};
+
+static struct clk_branch cam_cc_ipe_pps_fast_ahb_clk = {
+       .halt_reg = 0x10524,
+       .halt_check = BRANCH_HALT,
+       .clkr = {
+               .enable_reg = 0x10524,
+               .enable_mask = BIT(0),
+               .hw.init = &(const struct clk_init_data) {
+                       .name = "cam_cc_ipe_pps_fast_ahb_clk",
+                       .parent_hws = (const struct clk_hw*[]) {
+                               &cam_cc_fast_ahb_clk_src.clkr.hw,
+                       },
+                       .num_parents = 1,
+                       .flags = CLK_SET_RATE_PARENT,
+                       .ops = &clk_branch2_ops,
+               },
+       },
+};
+
+static struct clk_branch cam_cc_jpeg_clk = {
+       .halt_reg = 0x13508,
+       .halt_check = BRANCH_HALT,
+       .clkr = {
+               .enable_reg = 0x13508,
+               .enable_mask = BIT(0),
+               .hw.init = &(const struct clk_init_data) {
+                       .name = "cam_cc_jpeg_clk",
+                       .parent_hws = (const struct clk_hw*[]) {
+                               &cam_cc_jpeg_clk_src.clkr.hw,
+                       },
+                       .num_parents = 1,
+                       .flags = CLK_SET_RATE_PARENT,
+                       .ops = &clk_branch2_ops,
+               },
+       },
+};
+
+static struct clk_branch cam_cc_mclk0_clk = {
+       .halt_reg = 0x15018,
+       .halt_check = BRANCH_HALT,
+       .clkr = {
+               .enable_reg = 0x15018,
+               .enable_mask = BIT(0),
+               .hw.init = &(const struct clk_init_data) {
+                       .name = "cam_cc_mclk0_clk",
+                       .parent_hws = (const struct clk_hw*[]) {
+                               &cam_cc_mclk0_clk_src.clkr.hw,
+                       },
+                       .num_parents = 1,
+                       .flags = CLK_SET_RATE_PARENT,
+                       .ops = &clk_branch2_ops,
+               },
+       },
+};
+
+static struct clk_branch cam_cc_mclk1_clk = {
+       .halt_reg = 0x15034,
+       .halt_check = BRANCH_HALT,
+       .clkr = {
+               .enable_reg = 0x15034,
+               .enable_mask = BIT(0),
+               .hw.init = &(const struct clk_init_data) {
+                       .name = "cam_cc_mclk1_clk",
+                       .parent_hws = (const struct clk_hw*[]) {
+                               &cam_cc_mclk1_clk_src.clkr.hw,
+                       },
+                       .num_parents = 1,
+                       .flags = CLK_SET_RATE_PARENT,
+                       .ops = &clk_branch2_ops,
+               },
+       },
+};
+
+static struct clk_branch cam_cc_mclk2_clk = {
+       .halt_reg = 0x15050,
+       .halt_check = BRANCH_HALT,
+       .clkr = {
+               .enable_reg = 0x15050,
+               .enable_mask = BIT(0),
+               .hw.init = &(const struct clk_init_data) {
+                       .name = "cam_cc_mclk2_clk",
+                       .parent_hws = (const struct clk_hw*[]) {
+                               &cam_cc_mclk2_clk_src.clkr.hw,
+                       },
+                       .num_parents = 1,
+                       .flags = CLK_SET_RATE_PARENT,
+                       .ops = &clk_branch2_ops,
+               },
+       },
+};
+
+static struct clk_branch cam_cc_mclk3_clk = {
+       .halt_reg = 0x1506c,
+       .halt_check = BRANCH_HALT,
+       .clkr = {
+               .enable_reg = 0x1506c,
+               .enable_mask = BIT(0),
+               .hw.init = &(const struct clk_init_data) {
+                       .name = "cam_cc_mclk3_clk",
+                       .parent_hws = (const struct clk_hw*[]) {
+                               &cam_cc_mclk3_clk_src.clkr.hw,
+                       },
+                       .num_parents = 1,
+                       .flags = CLK_SET_RATE_PARENT,
+                       .ops = &clk_branch2_ops,
+               },
+       },
+};
+
+static struct clk_branch cam_cc_mclk4_clk = {
+       .halt_reg = 0x15088,
+       .halt_check = BRANCH_HALT,
+       .clkr = {
+               .enable_reg = 0x15088,
+               .enable_mask = BIT(0),
+               .hw.init = &(const struct clk_init_data) {
+                       .name = "cam_cc_mclk4_clk",
+                       .parent_hws = (const struct clk_hw*[]) {
+                               &cam_cc_mclk4_clk_src.clkr.hw,
+                       },
+                       .num_parents = 1,
+                       .flags = CLK_SET_RATE_PARENT,
+                       .ops = &clk_branch2_ops,
+               },
+       },
+};
+
+static struct clk_branch cam_cc_mclk5_clk = {
+       .halt_reg = 0x150a4,
+       .halt_check = BRANCH_HALT,
+       .clkr = {
+               .enable_reg = 0x150a4,
+               .enable_mask = BIT(0),
+               .hw.init = &(const struct clk_init_data) {
+                       .name = "cam_cc_mclk5_clk",
+                       .parent_hws = (const struct clk_hw*[]) {
+                               &cam_cc_mclk5_clk_src.clkr.hw,
+                       },
+                       .num_parents = 1,
+                       .flags = CLK_SET_RATE_PARENT,
+                       .ops = &clk_branch2_ops,
+               },
+       },
+};
+
+static struct clk_branch cam_cc_mclk6_clk = {
+       .halt_reg = 0x150c0,
+       .halt_check = BRANCH_HALT,
+       .clkr = {
+               .enable_reg = 0x150c0,
+               .enable_mask = BIT(0),
+               .hw.init = &(const struct clk_init_data) {
+                       .name = "cam_cc_mclk6_clk",
+                       .parent_hws = (const struct clk_hw*[]) {
+                               &cam_cc_mclk6_clk_src.clkr.hw,
+                       },
+                       .num_parents = 1,
+                       .flags = CLK_SET_RATE_PARENT,
+                       .ops = &clk_branch2_ops,
+               },
+       },
+};
+
+static struct clk_branch cam_cc_mclk7_clk = {
+       .halt_reg = 0x150dc,
+       .halt_check = BRANCH_HALT,
+       .clkr = {
+               .enable_reg = 0x150dc,
+               .enable_mask = BIT(0),
+               .hw.init = &(const struct clk_init_data) {
+                       .name = "cam_cc_mclk7_clk",
+                       .parent_hws = (const struct clk_hw*[]) {
+                               &cam_cc_mclk7_clk_src.clkr.hw,
+                       },
+                       .num_parents = 1,
+                       .flags = CLK_SET_RATE_PARENT,
+                       .ops = &clk_branch2_ops,
+               },
+       },
+};
+
+static struct clk_branch cam_cc_sfe_0_clk = {
+       .halt_reg = 0x133c0,
+       .halt_check = BRANCH_HALT,
+       .clkr = {
+               .enable_reg = 0x133c0,
+               .enable_mask = BIT(0),
+               .hw.init = &(const struct clk_init_data) {
+                       .name = "cam_cc_sfe_0_clk",
+                       .parent_hws = (const struct clk_hw*[]) {
+                               &cam_cc_sfe_0_clk_src.clkr.hw,
+                       },
+                       .num_parents = 1,
+                       .flags = CLK_SET_RATE_PARENT,
+                       .ops = &clk_branch2_ops,
+               },
+       },
+};
+
+static struct clk_branch cam_cc_sfe_0_fast_ahb_clk = {
+       .halt_reg = 0x133d8,
+       .halt_check = BRANCH_HALT,
+       .clkr = {
+               .enable_reg = 0x133d8,
+               .enable_mask = BIT(0),
+               .hw.init = &(const struct clk_init_data) {
+                       .name = "cam_cc_sfe_0_fast_ahb_clk",
+                       .parent_hws = (const struct clk_hw*[]) {
+                               &cam_cc_fast_ahb_clk_src.clkr.hw,
+                       },
+                       .num_parents = 1,
+                       .flags = CLK_SET_RATE_PARENT,
+                       .ops = &clk_branch2_ops,
+               },
+       },
+};
+
+static struct gdsc cam_cc_bps_gdsc = {
+       .gdscr = 0x10004,
+       .en_rest_wait_val = 0x2,
+       .en_few_wait_val = 0x2,
+       .clk_dis_wait_val = 0xf,
+       .pd = {
+               .name = "cam_cc_bps_gdsc",
+       },
+       .pwrsts = PWRSTS_OFF_ON,
+       .flags = POLL_CFG_GDSCR | RETAIN_FF_ENABLE,
+};
+
+static struct gdsc cam_cc_ife_0_gdsc = {
+       .gdscr = 0x11004,
+       .en_rest_wait_val = 0x2,
+       .en_few_wait_val = 0x2,
+       .clk_dis_wait_val = 0xf,
+       .pd = {
+               .name = "cam_cc_ife_0_gdsc",
+       },
+       .pwrsts = PWRSTS_OFF_ON,
+       .flags = POLL_CFG_GDSCR | RETAIN_FF_ENABLE,
+};
+
+static struct gdsc cam_cc_ife_1_gdsc = {
+       .gdscr = 0x12004,
+       .en_rest_wait_val = 0x2,
+       .en_few_wait_val = 0x2,
+       .clk_dis_wait_val = 0xf,
+       .pd = {
+               .name = "cam_cc_ife_1_gdsc",
+       },
+       .pwrsts = PWRSTS_OFF_ON,
+       .flags = POLL_CFG_GDSCR | RETAIN_FF_ENABLE,
+};
+
+static struct gdsc cam_cc_ipe_0_gdsc = {
+       .gdscr = 0x103b8,
+       .en_rest_wait_val = 0x2,
+       .en_few_wait_val = 0x2,
+       .clk_dis_wait_val = 0xf,
+       .pd = {
+               .name = "cam_cc_ipe_0_gdsc",
+       },
+       .pwrsts = PWRSTS_OFF_ON,
+       .flags = POLL_CFG_GDSCR | RETAIN_FF_ENABLE,
+};
+
+static struct gdsc cam_cc_sfe_0_gdsc = {
+       .gdscr = 0x13280,
+       .en_rest_wait_val = 0x2,
+       .en_few_wait_val = 0x2,
+       .clk_dis_wait_val = 0xf,
+       .pd = {
+               .name = "cam_cc_sfe_0_gdsc",
+       },
+       .pwrsts = PWRSTS_OFF_ON,
+       .flags = POLL_CFG_GDSCR | RETAIN_FF_ENABLE,
+};
+
+static struct gdsc cam_cc_titan_top_gdsc = {
+       .gdscr = 0x13a6c,
+       .en_rest_wait_val = 0x2,
+       .en_few_wait_val = 0x2,
+       .clk_dis_wait_val = 0xf,
+       .pd = {
+               .name = "cam_cc_titan_top_gdsc",
+       },
+       .pwrsts = PWRSTS_OFF_ON,
+       .flags = POLL_CFG_GDSCR | RETAIN_FF_ENABLE,
+};
+
+static struct clk_regmap *cam_cc_x1e80100_clocks[] = {
+       [CAM_CC_BPS_AHB_CLK] = &cam_cc_bps_ahb_clk.clkr,
+       [CAM_CC_BPS_CLK] = &cam_cc_bps_clk.clkr,
+       [CAM_CC_BPS_CLK_SRC] = &cam_cc_bps_clk_src.clkr,
+       [CAM_CC_BPS_FAST_AHB_CLK] = &cam_cc_bps_fast_ahb_clk.clkr,
+       [CAM_CC_CAMNOC_AXI_NRT_CLK] = &cam_cc_camnoc_axi_nrt_clk.clkr,
+       [CAM_CC_CAMNOC_AXI_RT_CLK] = &cam_cc_camnoc_axi_rt_clk.clkr,
+       [CAM_CC_CAMNOC_AXI_RT_CLK_SRC] = &cam_cc_camnoc_axi_rt_clk_src.clkr,
+       [CAM_CC_CAMNOC_DCD_XO_CLK] = &cam_cc_camnoc_dcd_xo_clk.clkr,
+       [CAM_CC_CAMNOC_XO_CLK] = &cam_cc_camnoc_xo_clk.clkr,
+       [CAM_CC_CCI_0_CLK] = &cam_cc_cci_0_clk.clkr,
+       [CAM_CC_CCI_0_CLK_SRC] = &cam_cc_cci_0_clk_src.clkr,
+       [CAM_CC_CCI_1_CLK] = &cam_cc_cci_1_clk.clkr,
+       [CAM_CC_CCI_1_CLK_SRC] = &cam_cc_cci_1_clk_src.clkr,
+       [CAM_CC_CORE_AHB_CLK] = &cam_cc_core_ahb_clk.clkr,
+       [CAM_CC_CPAS_AHB_CLK] = &cam_cc_cpas_ahb_clk.clkr,
+       [CAM_CC_CPAS_BPS_CLK] = &cam_cc_cpas_bps_clk.clkr,
+       [CAM_CC_CPAS_FAST_AHB_CLK] = &cam_cc_cpas_fast_ahb_clk.clkr,
+       [CAM_CC_CPAS_IFE_0_CLK] = &cam_cc_cpas_ife_0_clk.clkr,
+       [CAM_CC_CPAS_IFE_1_CLK] = &cam_cc_cpas_ife_1_clk.clkr,
+       [CAM_CC_CPAS_IFE_LITE_CLK] = &cam_cc_cpas_ife_lite_clk.clkr,
+       [CAM_CC_CPAS_IPE_NPS_CLK] = &cam_cc_cpas_ipe_nps_clk.clkr,
+       [CAM_CC_CPAS_SFE_0_CLK] = &cam_cc_cpas_sfe_0_clk.clkr,
+       [CAM_CC_CPHY_RX_CLK_SRC] = &cam_cc_cphy_rx_clk_src.clkr,
+       [CAM_CC_CSI0PHYTIMER_CLK] = &cam_cc_csi0phytimer_clk.clkr,
+       [CAM_CC_CSI0PHYTIMER_CLK_SRC] = &cam_cc_csi0phytimer_clk_src.clkr,
+       [CAM_CC_CSI1PHYTIMER_CLK] = &cam_cc_csi1phytimer_clk.clkr,
+       [CAM_CC_CSI1PHYTIMER_CLK_SRC] = &cam_cc_csi1phytimer_clk_src.clkr,
+       [CAM_CC_CSI2PHYTIMER_CLK] = &cam_cc_csi2phytimer_clk.clkr,
+       [CAM_CC_CSI2PHYTIMER_CLK_SRC] = &cam_cc_csi2phytimer_clk_src.clkr,
+       [CAM_CC_CSI3PHYTIMER_CLK] = &cam_cc_csi3phytimer_clk.clkr,
+       [CAM_CC_CSI3PHYTIMER_CLK_SRC] = &cam_cc_csi3phytimer_clk_src.clkr,
+       [CAM_CC_CSI4PHYTIMER_CLK] = &cam_cc_csi4phytimer_clk.clkr,
+       [CAM_CC_CSI4PHYTIMER_CLK_SRC] = &cam_cc_csi4phytimer_clk_src.clkr,
+       [CAM_CC_CSI5PHYTIMER_CLK] = &cam_cc_csi5phytimer_clk.clkr,
+       [CAM_CC_CSI5PHYTIMER_CLK_SRC] = &cam_cc_csi5phytimer_clk_src.clkr,
+       [CAM_CC_CSID_CLK] = &cam_cc_csid_clk.clkr,
+       [CAM_CC_CSID_CLK_SRC] = &cam_cc_csid_clk_src.clkr,
+       [CAM_CC_CSID_CSIPHY_RX_CLK] = &cam_cc_csid_csiphy_rx_clk.clkr,
+       [CAM_CC_CSIPHY0_CLK] = &cam_cc_csiphy0_clk.clkr,
+       [CAM_CC_CSIPHY1_CLK] = &cam_cc_csiphy1_clk.clkr,
+       [CAM_CC_CSIPHY2_CLK] = &cam_cc_csiphy2_clk.clkr,
+       [CAM_CC_CSIPHY3_CLK] = &cam_cc_csiphy3_clk.clkr,
+       [CAM_CC_CSIPHY4_CLK] = &cam_cc_csiphy4_clk.clkr,
+       [CAM_CC_CSIPHY5_CLK] = &cam_cc_csiphy5_clk.clkr,
+       [CAM_CC_FAST_AHB_CLK_SRC] = &cam_cc_fast_ahb_clk_src.clkr,
+       [CAM_CC_ICP_AHB_CLK] = &cam_cc_icp_ahb_clk.clkr,
+       [CAM_CC_ICP_CLK] = &cam_cc_icp_clk.clkr,
+       [CAM_CC_ICP_CLK_SRC] = &cam_cc_icp_clk_src.clkr,
+       [CAM_CC_IFE_0_CLK] = &cam_cc_ife_0_clk.clkr,
+       [CAM_CC_IFE_0_CLK_SRC] = &cam_cc_ife_0_clk_src.clkr,
+       [CAM_CC_IFE_0_DSP_CLK] = &cam_cc_ife_0_dsp_clk.clkr,
+       [CAM_CC_IFE_0_FAST_AHB_CLK] = &cam_cc_ife_0_fast_ahb_clk.clkr,
+       [CAM_CC_IFE_1_CLK] = &cam_cc_ife_1_clk.clkr,
+       [CAM_CC_IFE_1_CLK_SRC] = &cam_cc_ife_1_clk_src.clkr,
+       [CAM_CC_IFE_1_DSP_CLK] = &cam_cc_ife_1_dsp_clk.clkr,
+       [CAM_CC_IFE_1_FAST_AHB_CLK] = &cam_cc_ife_1_fast_ahb_clk.clkr,
+       [CAM_CC_IFE_LITE_AHB_CLK] = &cam_cc_ife_lite_ahb_clk.clkr,
+       [CAM_CC_IFE_LITE_CLK] = &cam_cc_ife_lite_clk.clkr,
+       [CAM_CC_IFE_LITE_CLK_SRC] = &cam_cc_ife_lite_clk_src.clkr,
+       [CAM_CC_IFE_LITE_CPHY_RX_CLK] = &cam_cc_ife_lite_cphy_rx_clk.clkr,
+       [CAM_CC_IFE_LITE_CSID_CLK] = &cam_cc_ife_lite_csid_clk.clkr,
+       [CAM_CC_IFE_LITE_CSID_CLK_SRC] = &cam_cc_ife_lite_csid_clk_src.clkr,
+       [CAM_CC_IPE_NPS_AHB_CLK] = &cam_cc_ipe_nps_ahb_clk.clkr,
+       [CAM_CC_IPE_NPS_CLK] = &cam_cc_ipe_nps_clk.clkr,
+       [CAM_CC_IPE_NPS_CLK_SRC] = &cam_cc_ipe_nps_clk_src.clkr,
+       [CAM_CC_IPE_NPS_FAST_AHB_CLK] = &cam_cc_ipe_nps_fast_ahb_clk.clkr,
+       [CAM_CC_IPE_PPS_CLK] = &cam_cc_ipe_pps_clk.clkr,
+       [CAM_CC_IPE_PPS_FAST_AHB_CLK] = &cam_cc_ipe_pps_fast_ahb_clk.clkr,
+       [CAM_CC_JPEG_CLK] = &cam_cc_jpeg_clk.clkr,
+       [CAM_CC_JPEG_CLK_SRC] = &cam_cc_jpeg_clk_src.clkr,
+       [CAM_CC_MCLK0_CLK] = &cam_cc_mclk0_clk.clkr,
+       [CAM_CC_MCLK0_CLK_SRC] = &cam_cc_mclk0_clk_src.clkr,
+       [CAM_CC_MCLK1_CLK] = &cam_cc_mclk1_clk.clkr,
+       [CAM_CC_MCLK1_CLK_SRC] = &cam_cc_mclk1_clk_src.clkr,
+       [CAM_CC_MCLK2_CLK] = &cam_cc_mclk2_clk.clkr,
+       [CAM_CC_MCLK2_CLK_SRC] = &cam_cc_mclk2_clk_src.clkr,
+       [CAM_CC_MCLK3_CLK] = &cam_cc_mclk3_clk.clkr,
+       [CAM_CC_MCLK3_CLK_SRC] = &cam_cc_mclk3_clk_src.clkr,
+       [CAM_CC_MCLK4_CLK] = &cam_cc_mclk4_clk.clkr,
+       [CAM_CC_MCLK4_CLK_SRC] = &cam_cc_mclk4_clk_src.clkr,
+       [CAM_CC_MCLK5_CLK] = &cam_cc_mclk5_clk.clkr,
+       [CAM_CC_MCLK5_CLK_SRC] = &cam_cc_mclk5_clk_src.clkr,
+       [CAM_CC_MCLK6_CLK] = &cam_cc_mclk6_clk.clkr,
+       [CAM_CC_MCLK6_CLK_SRC] = &cam_cc_mclk6_clk_src.clkr,
+       [CAM_CC_MCLK7_CLK] = &cam_cc_mclk7_clk.clkr,
+       [CAM_CC_MCLK7_CLK_SRC] = &cam_cc_mclk7_clk_src.clkr,
+       [CAM_CC_PLL0] = &cam_cc_pll0.clkr,
+       [CAM_CC_PLL0_OUT_EVEN] = &cam_cc_pll0_out_even.clkr,
+       [CAM_CC_PLL0_OUT_ODD] = &cam_cc_pll0_out_odd.clkr,
+       [CAM_CC_PLL1] = &cam_cc_pll1.clkr,
+       [CAM_CC_PLL1_OUT_EVEN] = &cam_cc_pll1_out_even.clkr,
+       [CAM_CC_PLL2] = &cam_cc_pll2.clkr,
+       [CAM_CC_PLL3] = &cam_cc_pll3.clkr,
+       [CAM_CC_PLL3_OUT_EVEN] = &cam_cc_pll3_out_even.clkr,
+       [CAM_CC_PLL4] = &cam_cc_pll4.clkr,
+       [CAM_CC_PLL4_OUT_EVEN] = &cam_cc_pll4_out_even.clkr,
+       [CAM_CC_PLL6] = &cam_cc_pll6.clkr,
+       [CAM_CC_PLL6_OUT_EVEN] = &cam_cc_pll6_out_even.clkr,
+       [CAM_CC_PLL8] = &cam_cc_pll8.clkr,
+       [CAM_CC_PLL8_OUT_EVEN] = &cam_cc_pll8_out_even.clkr,
+       [CAM_CC_SFE_0_CLK] = &cam_cc_sfe_0_clk.clkr,
+       [CAM_CC_SFE_0_CLK_SRC] = &cam_cc_sfe_0_clk_src.clkr,
+       [CAM_CC_SFE_0_FAST_AHB_CLK] = &cam_cc_sfe_0_fast_ahb_clk.clkr,
+       [CAM_CC_SLEEP_CLK_SRC] = &cam_cc_sleep_clk_src.clkr,
+       [CAM_CC_SLOW_AHB_CLK_SRC] = &cam_cc_slow_ahb_clk_src.clkr,
+       [CAM_CC_XO_CLK_SRC] = &cam_cc_xo_clk_src.clkr,
+};
+
+static struct gdsc *cam_cc_x1e80100_gdscs[] = {
+       [CAM_CC_BPS_GDSC] = &cam_cc_bps_gdsc,
+       [CAM_CC_IFE_0_GDSC] = &cam_cc_ife_0_gdsc,
+       [CAM_CC_IFE_1_GDSC] = &cam_cc_ife_1_gdsc,
+       [CAM_CC_IPE_0_GDSC] = &cam_cc_ipe_0_gdsc,
+       [CAM_CC_SFE_0_GDSC] = &cam_cc_sfe_0_gdsc,
+       [CAM_CC_TITAN_TOP_GDSC] = &cam_cc_titan_top_gdsc,
+};
+
+static const struct qcom_reset_map cam_cc_x1e80100_resets[] = {
+       [CAM_CC_BPS_BCR] = { 0x10000 },
+       [CAM_CC_ICP_BCR] = { 0x1351c },
+       [CAM_CC_IFE_0_BCR] = { 0x11000 },
+       [CAM_CC_IFE_1_BCR] = { 0x12000 },
+       [CAM_CC_IPE_0_BCR] = { 0x103b4 },
+       [CAM_CC_SFE_0_BCR] = { 0x1327c },
+};
+
+static const struct regmap_config cam_cc_x1e80100_regmap_config = {
+       .reg_bits = 32,
+       .reg_stride = 4,
+       .val_bits = 32,
+       .max_register = 0x1603c,
+       .fast_io = true,
+};
+
+static const struct qcom_cc_desc cam_cc_x1e80100_desc = {
+       .config = &cam_cc_x1e80100_regmap_config,
+       .clks = cam_cc_x1e80100_clocks,
+       .num_clks = ARRAY_SIZE(cam_cc_x1e80100_clocks),
+       .resets = cam_cc_x1e80100_resets,
+       .num_resets = ARRAY_SIZE(cam_cc_x1e80100_resets),
+       .gdscs = cam_cc_x1e80100_gdscs,
+       .num_gdscs = ARRAY_SIZE(cam_cc_x1e80100_gdscs),
+};
+
+static const struct of_device_id cam_cc_x1e80100_match_table[] = {
+       { .compatible = "qcom,x1e80100-camcc" },
+       { }
+};
+MODULE_DEVICE_TABLE(of, cam_cc_x1e80100_match_table);
+
+static int cam_cc_x1e80100_probe(struct platform_device *pdev)
+{
+       struct regmap *regmap;
+       int ret;
+
+       ret = devm_pm_runtime_enable(&pdev->dev);
+       if (ret)
+               return ret;
+
+       ret = pm_runtime_resume_and_get(&pdev->dev);
+       if (ret)
+               return ret;
+
+       regmap = qcom_cc_map(pdev, &cam_cc_x1e80100_desc);
+       if (IS_ERR(regmap)) {
+               pm_runtime_put(&pdev->dev);
+               return PTR_ERR(regmap);
+       }
+
+       clk_lucid_ole_pll_configure(&cam_cc_pll0, regmap, &cam_cc_pll0_config);
+       clk_lucid_ole_pll_configure(&cam_cc_pll1, regmap, &cam_cc_pll1_config);
+       clk_rivian_evo_pll_configure(&cam_cc_pll2, regmap, &cam_cc_pll2_config);
+       clk_lucid_ole_pll_configure(&cam_cc_pll3, regmap, &cam_cc_pll3_config);
+       clk_lucid_ole_pll_configure(&cam_cc_pll4, regmap, &cam_cc_pll4_config);
+       clk_lucid_ole_pll_configure(&cam_cc_pll6, regmap, &cam_cc_pll6_config);
+       clk_lucid_ole_pll_configure(&cam_cc_pll8, regmap, &cam_cc_pll8_config);
+
+       /* Keep clocks always enabled */
+       qcom_branch_set_clk_en(regmap, 0x13a9c); /* CAM_CC_GDSC_CLK */
+       qcom_branch_set_clk_en(regmap, 0x13ab8); /* CAM_CC_SLEEP_CLK */
+
+       ret = qcom_cc_really_probe(pdev, &cam_cc_x1e80100_desc, regmap);
+
+       pm_runtime_put(&pdev->dev);
+
+       return ret;
+}
+
+static struct platform_driver cam_cc_x1e80100_driver = {
+       .probe = cam_cc_x1e80100_probe,
+       .driver = {
+               .name = "camcc-x1e80100",
+               .of_match_table = cam_cc_x1e80100_match_table,
+       },
+};
+
+module_platform_driver(cam_cc_x1e80100_driver);
+
+MODULE_DESCRIPTION("QTI Camera Clock Controller X1E80100 Driver");
+MODULE_LICENSE("GPL");
index 05898d2a8b22cd35460722bb63586eec25fc0691..8a412ef47e1631705326c7380042c8c3c4c93526 100644 (file)
@@ -52,6 +52,7 @@
 #define PLL_CONFIG_CTL(p)      ((p)->offset + (p)->regs[PLL_OFF_CONFIG_CTL])
 #define PLL_CONFIG_CTL_U(p)    ((p)->offset + (p)->regs[PLL_OFF_CONFIG_CTL_U])
 #define PLL_CONFIG_CTL_U1(p)   ((p)->offset + (p)->regs[PLL_OFF_CONFIG_CTL_U1])
+#define PLL_CONFIG_CTL_U2(p)   ((p)->offset + (p)->regs[PLL_OFF_CONFIG_CTL_U2])
 #define PLL_TEST_CTL(p)                ((p)->offset + (p)->regs[PLL_OFF_TEST_CTL])
 #define PLL_TEST_CTL_U(p)      ((p)->offset + (p)->regs[PLL_OFF_TEST_CTL_U])
 #define PLL_TEST_CTL_U1(p)     ((p)->offset + (p)->regs[PLL_OFF_TEST_CTL_U1])
@@ -228,6 +229,21 @@ const u8 clk_alpha_pll_regs[][PLL_OFF_MAX_REGS] = {
                [PLL_OFF_ALPHA_VAL] = 0x24,
                [PLL_OFF_ALPHA_VAL_U] = 0x28,
        },
+       [CLK_ALPHA_PLL_TYPE_ZONDA_OLE] =  {
+               [PLL_OFF_L_VAL] = 0x04,
+               [PLL_OFF_ALPHA_VAL] = 0x08,
+               [PLL_OFF_USER_CTL] = 0x0c,
+               [PLL_OFF_USER_CTL_U] = 0x10,
+               [PLL_OFF_CONFIG_CTL] = 0x14,
+               [PLL_OFF_CONFIG_CTL_U] = 0x18,
+               [PLL_OFF_CONFIG_CTL_U1] = 0x1c,
+               [PLL_OFF_CONFIG_CTL_U2] = 0x20,
+               [PLL_OFF_TEST_CTL] = 0x24,
+               [PLL_OFF_TEST_CTL_U] = 0x28,
+               [PLL_OFF_TEST_CTL_U1] = 0x2c,
+               [PLL_OFF_OPMODE] = 0x30,
+               [PLL_OFF_STATUS] = 0x3c,
+       },
 };
 EXPORT_SYMBOL_GPL(clk_alpha_pll_regs);
 
index a1a75bb12fe88bb5466d0ccce7561a6a788466ae..fb6d50263bb9df489198cab3798c2601e83c6d00 100644 (file)
@@ -21,6 +21,7 @@ enum {
        CLK_ALPHA_PLL_TYPE_LUCID = CLK_ALPHA_PLL_TYPE_TRION,
        CLK_ALPHA_PLL_TYPE_AGERA,
        CLK_ALPHA_PLL_TYPE_ZONDA,
+       CLK_ALPHA_PLL_TYPE_ZONDA_OLE,
        CLK_ALPHA_PLL_TYPE_LUCID_EVO,
        CLK_ALPHA_PLL_TYPE_LUCID_OLE,
        CLK_ALPHA_PLL_TYPE_RIVIAN_EVO,
@@ -42,6 +43,7 @@ enum {
        PLL_OFF_CONFIG_CTL,
        PLL_OFF_CONFIG_CTL_U,
        PLL_OFF_CONFIG_CTL_U1,
+       PLL_OFF_CONFIG_CTL_U2,
        PLL_OFF_TEST_CTL,
        PLL_OFF_TEST_CTL_U,
        PLL_OFF_TEST_CTL_U1,
@@ -119,6 +121,7 @@ struct alpha_pll_config {
        u32 config_ctl_val;
        u32 config_ctl_hi_val;
        u32 config_ctl_hi1_val;
+       u32 config_ctl_hi2_val;
        u32 user_ctl_val;
        u32 user_ctl_hi_val;
        u32 user_ctl_hi1_val;
@@ -173,6 +176,7 @@ extern const struct clk_ops clk_alpha_pll_postdiv_lucid_5lpe_ops;
 
 extern const struct clk_ops clk_alpha_pll_zonda_ops;
 #define clk_alpha_pll_postdiv_zonda_ops clk_alpha_pll_postdiv_fabia_ops
+#define clk_alpha_pll_zonda_ole_ops clk_alpha_pll_zonda_ops
 
 extern const struct clk_ops clk_alpha_pll_lucid_evo_ops;
 extern const struct clk_ops clk_alpha_pll_reset_lucid_evo_ops;
index 8ffed603c050b370f7beec5842decadf95072a53..f1b3b635ff3248e8c6174b14e5725bbc31a272fa 100644 (file)
@@ -64,6 +64,7 @@ struct clk_mem_branch {
 #define CBCR_FORCE_MEM_PERIPH_OFF      BIT(12)
 #define CBCR_WAKEUP                    GENMASK(11, 8)
 #define CBCR_SLEEP                     GENMASK(7, 4)
+#define CBCR_CLOCK_ENABLE              BIT(0)
 
 static inline void qcom_branch_set_force_mem_core(struct regmap *regmap,
                                                  struct clk_branch clk, bool on)
@@ -98,6 +99,11 @@ static inline void qcom_branch_set_sleep(struct regmap *regmap, struct clk_branc
                           FIELD_PREP(CBCR_SLEEP, val));
 }
 
+static inline void qcom_branch_set_clk_en(struct regmap *regmap, u32 cbcr)
+{
+       regmap_update_bits(regmap, cbcr, CBCR_CLOCK_ENABLE, CBCR_CLOCK_ENABLE);
+}
+
 extern const struct clk_ops clk_branch_ops;
 extern const struct clk_ops clk_branch2_ops;
 extern const struct clk_ops clk_branch_simple_ops;
index 9206f0eed44620506533001071b9ac5d36928aac..654a10d53e5c5b58c6c12d0ffade1ad4ffba98a4 100644 (file)
@@ -519,8 +519,8 @@ static int disp_cc_qcm2290_probe(struct platform_device *pdev)
 
        clk_alpha_pll_configure(&disp_cc_pll0, regmap, &disp_cc_pll0_config);
 
-       /* Keep DISP_CC_XO_CLK always-ON */
-       regmap_update_bits(regmap, 0x604c, BIT(0), BIT(0));
+       /* Keep some clocks always-on */
+       qcom_branch_set_clk_en(regmap, 0x604c); /* DISP_CC_XO_CLK */
 
        ret = qcom_cc_really_probe(pdev, &disp_cc_qcm2290_desc, regmap);
        if (ret) {
@@ -539,17 +539,7 @@ static struct platform_driver disp_cc_qcm2290_driver = {
        },
 };
 
-static int __init disp_cc_qcm2290_init(void)
-{
-       return platform_driver_register(&disp_cc_qcm2290_driver);
-}
-subsys_initcall(disp_cc_qcm2290_init);
-
-static void __exit disp_cc_qcm2290_exit(void)
-{
-       platform_driver_unregister(&disp_cc_qcm2290_driver);
-}
-module_exit(disp_cc_qcm2290_exit);
+module_platform_driver(disp_cc_qcm2290_driver);
 
 MODULE_DESCRIPTION("QTI DISP_CC qcm2290 Driver");
 MODULE_LICENSE("GPL v2");
index 9536bfc72a43cf2c9f2855c62a2210d08fdea348..38d7859981c7d00b84b262e8a8832b63e587f66e 100644 (file)
@@ -724,17 +724,7 @@ static struct platform_driver disp_cc_sc7180_driver = {
        },
 };
 
-static int __init disp_cc_sc7180_init(void)
-{
-       return platform_driver_register(&disp_cc_sc7180_driver);
-}
-subsys_initcall(disp_cc_sc7180_init);
-
-static void __exit disp_cc_sc7180_exit(void)
-{
-       platform_driver_unregister(&disp_cc_sc7180_driver);
-}
-module_exit(disp_cc_sc7180_exit);
+module_platform_driver(disp_cc_sc7180_driver);
 
 MODULE_DESCRIPTION("QTI DISP_CC SC7180 Driver");
 MODULE_LICENSE("GPL v2");
index ad596d567f6ab7f5e04d109b50d741f2a4ce1ae7..fbeb8fccb99af6e5bdaae61e4037d9a58b232d98 100644 (file)
@@ -878,11 +878,8 @@ static int disp_cc_sc7280_probe(struct platform_device *pdev)
 
        clk_lucid_pll_configure(&disp_cc_pll0, regmap, &disp_cc_pll0_config);
 
-       /*
-        * Keep the clocks always-ON
-        * DISP_CC_XO_CLK
-        */
-       regmap_update_bits(regmap, 0x5008, BIT(0), BIT(0));
+       /* Keep some clocks always-on */
+       qcom_branch_set_clk_en(regmap, 0x5008); /* DISP_CC_XO_CLK */
 
        return qcom_cc_really_probe(pdev, &disp_cc_sc7280_desc, regmap);
 }
@@ -895,17 +892,7 @@ static struct platform_driver disp_cc_sc7280_driver = {
        },
 };
 
-static int __init disp_cc_sc7280_init(void)
-{
-       return platform_driver_register(&disp_cc_sc7280_driver);
-}
-subsys_initcall(disp_cc_sc7280_init);
-
-static void __exit disp_cc_sc7280_exit(void)
-{
-       platform_driver_unregister(&disp_cc_sc7280_driver);
-}
-module_exit(disp_cc_sc7280_exit);
+module_platform_driver(disp_cc_sc7280_driver);
 
 MODULE_DESCRIPTION("QTI DISP_CC sc7280 Driver");
 MODULE_LICENSE("GPL v2");
index 30f636b9f0ec893dca5e4fc81d328dd3e4e80d0c..91172f5b2f15b1d8a7b582ac582abd57b7b83c4e 100644 (file)
@@ -3178,8 +3178,8 @@ static int disp_cc_sc8280xp_probe(struct platform_device *pdev)
                goto out_pm_runtime_put;
        }
 
-       /* DISP_CC_XO_CLK always-on */
-       regmap_update_bits(regmap, 0x605c, BIT(0), BIT(0));
+       /* Keep some clocks always-on */
+       qcom_branch_set_clk_en(regmap, 0x605c); /* DISP_CC_XO_CLK */
 
 out_pm_runtime_put:
        pm_runtime_put_sync(&pdev->dev);
@@ -3202,17 +3202,7 @@ static struct platform_driver disp_cc_sc8280xp_driver = {
        },
 };
 
-static int __init disp_cc_sc8280xp_init(void)
-{
-       return platform_driver_register(&disp_cc_sc8280xp_driver);
-}
-subsys_initcall(disp_cc_sc8280xp_init);
-
-static void __exit disp_cc_sc8280xp_exit(void)
-{
-       platform_driver_unregister(&disp_cc_sc8280xp_driver);
-}
-module_exit(disp_cc_sc8280xp_exit);
+module_platform_driver(disp_cc_sc8280xp_driver);
 
 MODULE_DESCRIPTION("Qualcomm SC8280XP dispcc driver");
 MODULE_LICENSE("GPL");
index 735adfefc37983b0929b2133b6ff5235a53a68d4..b84fdd17c3d8c699341123a34c71dbcb9972da34 100644 (file)
@@ -759,6 +759,8 @@ static struct clk_branch disp_cc_mdss_vsync_clk = {
 
 static struct gdsc mdss_gdsc = {
        .gdscr = 0x3000,
+       .en_few_wait_val = 0x6,
+       .en_rest_wait_val = 0x5,
        .pd = {
                .name = "mdss_gdsc",
        },
@@ -872,17 +874,7 @@ static struct platform_driver disp_cc_sdm845_driver = {
        },
 };
 
-static int __init disp_cc_sdm845_init(void)
-{
-       return platform_driver_register(&disp_cc_sdm845_driver);
-}
-subsys_initcall(disp_cc_sdm845_init);
-
-static void __exit disp_cc_sdm845_exit(void)
-{
-       platform_driver_unregister(&disp_cc_sdm845_driver);
-}
-module_exit(disp_cc_sdm845_exit);
+module_platform_driver(disp_cc_sdm845_driver);
 
 MODULE_LICENSE("GPL v2");
 MODULE_DESCRIPTION("QTI DISPCC SDM845 Driver");
index 1fab43f08e737f55738f5535f2031ca36e78c626..bd07f26af35a2bf24900bbfae2967fa708b57c5c 100644 (file)
@@ -583,8 +583,8 @@ static int disp_cc_sm6115_probe(struct platform_device *pdev)
 
        clk_alpha_pll_configure(&disp_cc_pll0, regmap, &disp_cc_pll0_config);
 
-       /* Keep DISP_CC_XO_CLK always-ON */
-       regmap_update_bits(regmap, 0x604c, BIT(0), BIT(0));
+       /* Keep some clocks always-on */
+       qcom_branch_set_clk_en(regmap, 0x604c); /* DISP_CC_XO_CLK */
 
        ret = qcom_cc_really_probe(pdev, &disp_cc_sm6115_desc, regmap);
        if (ret) {
index 87b27053ddb621a300fca5e0a077617e90ba13f1..1cc5f220a3c4988b9d3444962831ed7d13ac2c3b 100644 (file)
@@ -693,17 +693,7 @@ static struct platform_driver disp_cc_sm6125_driver = {
        },
 };
 
-static int __init disp_cc_sm6125_init(void)
-{
-       return platform_driver_register(&disp_cc_sm6125_driver);
-}
-subsys_initcall(disp_cc_sm6125_init);
-
-static void __exit disp_cc_sm6125_exit(void)
-{
-       platform_driver_unregister(&disp_cc_sm6125_driver);
-}
-module_exit(disp_cc_sm6125_exit);
+module_platform_driver(disp_cc_sm6125_driver);
 
 MODULE_DESCRIPTION("QTI DISPCC SM6125 Driver");
 MODULE_LICENSE("GPL v2");
index ea6f54ed846ece18b9c493cf26b2bce88e98a21f..839435362010eeed6eca5aba2ba8899f69ffc6de 100644 (file)
@@ -781,17 +781,7 @@ static struct platform_driver disp_cc_sm6350_driver = {
        },
 };
 
-static int __init disp_cc_sm6350_init(void)
-{
-       return platform_driver_register(&disp_cc_sm6350_driver);
-}
-subsys_initcall(disp_cc_sm6350_init);
-
-static void __exit disp_cc_sm6350_exit(void)
-{
-       platform_driver_unregister(&disp_cc_sm6350_driver);
-}
-module_exit(disp_cc_sm6350_exit);
+module_platform_driver(disp_cc_sm6350_driver);
 
 MODULE_DESCRIPTION("QTI DISP_CC SM6350 Driver");
 MODULE_LICENSE("GPL v2");
index caa1b90a5ff2da8597675e4c32969efa9542c047..d81d4e3c0b0debcfc10b1bb659afc582cfcd4ef3 100644 (file)
@@ -594,17 +594,7 @@ static struct platform_driver disp_cc_sm6375_driver = {
        },
 };
 
-static int __init disp_cc_sm6375_init(void)
-{
-       return platform_driver_register(&disp_cc_sm6375_driver);
-}
-subsys_initcall(disp_cc_sm6375_init);
-
-static void __exit disp_cc_sm6375_exit(void)
-{
-       platform_driver_unregister(&disp_cc_sm6375_driver);
-}
-module_exit(disp_cc_sm6375_exit);
+module_platform_driver(disp_cc_sm6375_driver);
 
 MODULE_DESCRIPTION("QTI DISPCC SM6375 Driver");
 MODULE_LICENSE("GPL");
index e17bb8b543b51b38bc06816d098621f4448ac7f2..43307c8a342caeac08a1523a36feb67b3845c88c 100644 (file)
@@ -39,11 +39,11 @@ enum {
        P_DSI1_PHY_PLL_OUT_DSICLK,
 };
 
-static struct pll_vco vco_table[] = {
+static const struct pll_vco vco_table[] = {
        { 249600000, 2000000000, 0 },
 };
 
-static struct pll_vco lucid_5lpe_vco[] = {
+static const struct pll_vco lucid_5lpe_vco[] = {
        { 249600000, 1750000000, 0 },
 };
 
@@ -214,7 +214,7 @@ static struct clk_rcg2 disp_cc_mdss_ahb_clk_src = {
        .hid_width = 5,
        .parent_map = disp_cc_parent_map_3,
        .freq_tbl = ftbl_disp_cc_mdss_ahb_clk_src,
-       .clkr.hw.init = &(struct clk_init_data){
+       .clkr.hw.init = &(const struct clk_init_data) {
                .name = "disp_cc_mdss_ahb_clk_src",
                .parent_data = disp_cc_parent_data_3,
                .num_parents = ARRAY_SIZE(disp_cc_parent_data_3),
@@ -233,7 +233,7 @@ static struct clk_rcg2 disp_cc_mdss_byte0_clk_src = {
        .mnd_width = 0,
        .hid_width = 5,
        .parent_map = disp_cc_parent_map_2,
-       .clkr.hw.init = &(struct clk_init_data){
+       .clkr.hw.init = &(const struct clk_init_data) {
                .name = "disp_cc_mdss_byte0_clk_src",
                .parent_data = disp_cc_parent_data_2,
                .num_parents = ARRAY_SIZE(disp_cc_parent_data_2),
@@ -247,7 +247,7 @@ static struct clk_rcg2 disp_cc_mdss_byte1_clk_src = {
        .mnd_width = 0,
        .hid_width = 5,
        .parent_map = disp_cc_parent_map_2,
-       .clkr.hw.init = &(struct clk_init_data){
+       .clkr.hw.init = &(const struct clk_init_data) {
                .name = "disp_cc_mdss_byte1_clk_src",
                .parent_data = disp_cc_parent_data_2,
                .num_parents = ARRAY_SIZE(disp_cc_parent_data_2),
@@ -262,7 +262,7 @@ static struct clk_rcg2 disp_cc_mdss_dp_aux1_clk_src = {
        .hid_width = 5,
        .parent_map = disp_cc_parent_map_1,
        .freq_tbl = ftbl_disp_cc_mdss_byte0_clk_src,
-       .clkr.hw.init = &(struct clk_init_data){
+       .clkr.hw.init = &(const struct clk_init_data) {
                .name = "disp_cc_mdss_dp_aux1_clk_src",
                .parent_data = disp_cc_parent_data_1,
                .num_parents = ARRAY_SIZE(disp_cc_parent_data_1),
@@ -277,7 +277,7 @@ static struct clk_rcg2 disp_cc_mdss_dp_aux_clk_src = {
        .hid_width = 5,
        .parent_map = disp_cc_parent_map_1,
        .freq_tbl = ftbl_disp_cc_mdss_byte0_clk_src,
-       .clkr.hw.init = &(struct clk_init_data){
+       .clkr.hw.init = &(const struct clk_init_data) {
                .name = "disp_cc_mdss_dp_aux_clk_src",
                .parent_data = disp_cc_parent_data_1,
                .num_parents = ARRAY_SIZE(disp_cc_parent_data_1),
@@ -291,7 +291,7 @@ static struct clk_rcg2 disp_cc_mdss_dp_link1_clk_src = {
        .mnd_width = 0,
        .hid_width = 5,
        .parent_map = disp_cc_parent_map_0,
-       .clkr.hw.init = &(struct clk_init_data){
+       .clkr.hw.init = &(const struct clk_init_data) {
                .name = "disp_cc_mdss_dp_link1_clk_src",
                .parent_data = disp_cc_parent_data_0,
                .num_parents = ARRAY_SIZE(disp_cc_parent_data_0),
@@ -304,7 +304,7 @@ static struct clk_rcg2 disp_cc_mdss_dp_link_clk_src = {
        .mnd_width = 0,
        .hid_width = 5,
        .parent_map = disp_cc_parent_map_0,
-       .clkr.hw.init = &(struct clk_init_data){
+       .clkr.hw.init = &(const struct clk_init_data) {
                .name = "disp_cc_mdss_dp_link_clk_src",
                .parent_data = disp_cc_parent_data_0,
                .num_parents = ARRAY_SIZE(disp_cc_parent_data_0),
@@ -317,7 +317,7 @@ static struct clk_rcg2 disp_cc_mdss_dp_pixel1_clk_src = {
        .mnd_width = 16,
        .hid_width = 5,
        .parent_map = disp_cc_parent_map_0,
-       .clkr.hw.init = &(struct clk_init_data){
+       .clkr.hw.init = &(const struct clk_init_data) {
                .name = "disp_cc_mdss_dp_pixel1_clk_src",
                .parent_data = disp_cc_parent_data_0,
                .num_parents = ARRAY_SIZE(disp_cc_parent_data_0),
@@ -330,7 +330,7 @@ static struct clk_rcg2 disp_cc_mdss_dp_pixel2_clk_src = {
        .mnd_width = 16,
        .hid_width = 5,
        .parent_map = disp_cc_parent_map_0,
-       .clkr.hw.init = &(struct clk_init_data){
+       .clkr.hw.init = &(const struct clk_init_data) {
                .name = "disp_cc_mdss_dp_pixel2_clk_src",
                .parent_data = disp_cc_parent_data_0,
                .num_parents = ARRAY_SIZE(disp_cc_parent_data_0),
@@ -343,7 +343,7 @@ static struct clk_rcg2 disp_cc_mdss_dp_pixel_clk_src = {
        .mnd_width = 16,
        .hid_width = 5,
        .parent_map = disp_cc_parent_map_0,
-       .clkr.hw.init = &(struct clk_init_data){
+       .clkr.hw.init = &(const struct clk_init_data) {
                .name = "disp_cc_mdss_dp_pixel_clk_src",
                .parent_data = disp_cc_parent_data_0,
                .num_parents = ARRAY_SIZE(disp_cc_parent_data_0),
@@ -357,7 +357,7 @@ static struct clk_rcg2 disp_cc_mdss_edp_aux_clk_src = {
        .hid_width = 5,
        .parent_map = disp_cc_parent_map_1,
        .freq_tbl = ftbl_disp_cc_mdss_byte0_clk_src,
-       .clkr.hw.init = &(struct clk_init_data){
+       .clkr.hw.init = &(const struct clk_init_data) {
                .name = "disp_cc_mdss_edp_aux_clk_src",
                .parent_data = disp_cc_parent_data_1,
                .num_parents = ARRAY_SIZE(disp_cc_parent_data_1),
@@ -372,7 +372,7 @@ static struct clk_rcg2 disp_cc_mdss_edp_gtc_clk_src = {
        .hid_width = 5,
        .parent_map = disp_cc_parent_map_7,
        .freq_tbl = ftbl_disp_cc_mdss_byte0_clk_src,
-       .clkr.hw.init = &(struct clk_init_data){
+       .clkr.hw.init = &(const struct clk_init_data) {
                .name = "disp_cc_mdss_edp_gtc_clk_src",
                .parent_data = disp_cc_parent_data_7,
                .num_parents = ARRAY_SIZE(disp_cc_parent_data_7),
@@ -386,7 +386,7 @@ static struct clk_rcg2 disp_cc_mdss_edp_link_clk_src = {
        .mnd_width = 0,
        .hid_width = 5,
        .parent_map = disp_cc_parent_map_4,
-       .clkr.hw.init = &(struct clk_init_data){
+       .clkr.hw.init = &(const struct clk_init_data) {
                .name = "disp_cc_mdss_edp_link_clk_src",
                .parent_data = disp_cc_parent_data_4,
                .num_parents = ARRAY_SIZE(disp_cc_parent_data_4),
@@ -400,7 +400,7 @@ static struct clk_rcg2 disp_cc_mdss_edp_pixel_clk_src = {
        .mnd_width = 16,
        .hid_width = 5,
        .parent_map = disp_cc_parent_map_4,
-       .clkr.hw.init = &(struct clk_init_data){
+       .clkr.hw.init = &(const struct clk_init_data) {
                .name = "disp_cc_mdss_edp_pixel_clk_src",
                .parent_data = disp_cc_parent_data_4,
                .num_parents = ARRAY_SIZE(disp_cc_parent_data_4),
@@ -414,7 +414,7 @@ static struct clk_branch disp_cc_mdss_edp_aux_clk = {
        .clkr = {
                .enable_reg = 0x2078,
                .enable_mask = BIT(0),
-               .hw.init = &(struct clk_init_data){
+               .hw.init = &(const struct clk_init_data) {
                        .name = "disp_cc_mdss_edp_aux_clk",
                        .parent_hws = (const struct clk_hw*[]){
                                &disp_cc_mdss_edp_aux_clk_src.clkr.hw,
@@ -432,7 +432,7 @@ static struct clk_branch disp_cc_mdss_edp_gtc_clk = {
        .clkr = {
                .enable_reg = 0x207c,
                .enable_mask = BIT(0),
-               .hw.init = &(struct clk_init_data){
+               .hw.init = &(const struct clk_init_data) {
                        .name = "disp_cc_mdss_edp_gtc_clk",
                        .parent_hws = (const struct clk_hw*[]){
                                &disp_cc_mdss_edp_gtc_clk_src.clkr.hw,
@@ -450,7 +450,7 @@ static struct clk_branch disp_cc_mdss_edp_link_clk = {
        .clkr = {
                .enable_reg = 0x2070,
                .enable_mask = BIT(0),
-               .hw.init = &(struct clk_init_data){
+               .hw.init = &(const struct clk_init_data) {
                        .name = "disp_cc_mdss_edp_link_clk",
                        .parent_hws = (const struct clk_hw*[]){
                                &disp_cc_mdss_edp_link_clk_src.clkr.hw,
@@ -466,7 +466,7 @@ static struct clk_regmap_div disp_cc_mdss_edp_link_div_clk_src = {
        .reg = 0x2288,
        .shift = 0,
        .width = 2,
-       .clkr.hw.init = &(struct clk_init_data) {
+       .clkr.hw.init = &(const struct clk_init_data) {
                .name = "disp_cc_mdss_edp_link_div_clk_src",
                .parent_hws = (const struct clk_hw*[]){
                        &disp_cc_mdss_edp_link_clk_src.clkr.hw,
@@ -482,7 +482,7 @@ static struct clk_branch disp_cc_mdss_edp_link_intf_clk = {
        .clkr = {
                .enable_reg = 0x2074,
                .enable_mask = BIT(0),
-               .hw.init = &(struct clk_init_data){
+               .hw.init = &(const struct clk_init_data) {
                        .name = "disp_cc_mdss_edp_link_intf_clk",
                        .parent_hws = (const struct clk_hw*[]){
                                &disp_cc_mdss_edp_link_div_clk_src.clkr.hw,
@@ -500,7 +500,7 @@ static struct clk_branch disp_cc_mdss_edp_pixel_clk = {
        .clkr = {
                .enable_reg = 0x206c,
                .enable_mask = BIT(0),
-               .hw.init = &(struct clk_init_data){
+               .hw.init = &(const struct clk_init_data) {
                        .name = "disp_cc_mdss_edp_pixel_clk",
                        .parent_hws = (const struct clk_hw*[]){
                                &disp_cc_mdss_edp_pixel_clk_src.clkr.hw,
@@ -518,7 +518,7 @@ static struct clk_rcg2 disp_cc_mdss_esc0_clk_src = {
        .hid_width = 5,
        .parent_map = disp_cc_parent_map_2,
        .freq_tbl = ftbl_disp_cc_mdss_byte0_clk_src,
-       .clkr.hw.init = &(struct clk_init_data){
+       .clkr.hw.init = &(const struct clk_init_data) {
                .name = "disp_cc_mdss_esc0_clk_src",
                .parent_data = disp_cc_parent_data_2,
                .num_parents = ARRAY_SIZE(disp_cc_parent_data_2),
@@ -533,7 +533,7 @@ static struct clk_rcg2 disp_cc_mdss_esc1_clk_src = {
        .hid_width = 5,
        .parent_map = disp_cc_parent_map_2,
        .freq_tbl = ftbl_disp_cc_mdss_byte0_clk_src,
-       .clkr.hw.init = &(struct clk_init_data){
+       .clkr.hw.init = &(const struct clk_init_data) {
                .name = "disp_cc_mdss_esc1_clk_src",
                .parent_data = disp_cc_parent_data_2,
                .num_parents = ARRAY_SIZE(disp_cc_parent_data_2),
@@ -560,7 +560,7 @@ static struct clk_rcg2 disp_cc_mdss_mdp_clk_src = {
        .hid_width = 5,
        .parent_map = disp_cc_parent_map_5,
        .freq_tbl = ftbl_disp_cc_mdss_mdp_clk_src,
-       .clkr.hw.init = &(struct clk_init_data){
+       .clkr.hw.init = &(const struct clk_init_data) {
                .name = "disp_cc_mdss_mdp_clk_src",
                .parent_data = disp_cc_parent_data_5,
                .num_parents = ARRAY_SIZE(disp_cc_parent_data_5),
@@ -574,7 +574,7 @@ static struct clk_rcg2 disp_cc_mdss_pclk0_clk_src = {
        .mnd_width = 8,
        .hid_width = 5,
        .parent_map = disp_cc_parent_map_6,
-       .clkr.hw.init = &(struct clk_init_data){
+       .clkr.hw.init = &(const struct clk_init_data) {
                .name = "disp_cc_mdss_pclk0_clk_src",
                .parent_data = disp_cc_parent_data_6,
                .num_parents = ARRAY_SIZE(disp_cc_parent_data_6),
@@ -588,7 +588,7 @@ static struct clk_rcg2 disp_cc_mdss_pclk1_clk_src = {
        .mnd_width = 8,
        .hid_width = 5,
        .parent_map = disp_cc_parent_map_6,
-       .clkr.hw.init = &(struct clk_init_data){
+       .clkr.hw.init = &(const struct clk_init_data) {
                .name = "disp_cc_mdss_pclk1_clk_src",
                .parent_data = disp_cc_parent_data_6,
                .num_parents = ARRAY_SIZE(disp_cc_parent_data_6),
@@ -612,7 +612,7 @@ static struct clk_rcg2 disp_cc_mdss_rot_clk_src = {
        .hid_width = 5,
        .parent_map = disp_cc_parent_map_5,
        .freq_tbl = ftbl_disp_cc_mdss_rot_clk_src,
-       .clkr.hw.init = &(struct clk_init_data){
+       .clkr.hw.init = &(const struct clk_init_data) {
                .name = "disp_cc_mdss_rot_clk_src",
                .parent_data = disp_cc_parent_data_5,
                .num_parents = ARRAY_SIZE(disp_cc_parent_data_5),
@@ -627,7 +627,7 @@ static struct clk_rcg2 disp_cc_mdss_vsync_clk_src = {
        .hid_width = 5,
        .parent_map = disp_cc_parent_map_1,
        .freq_tbl = ftbl_disp_cc_mdss_byte0_clk_src,
-       .clkr.hw.init = &(struct clk_init_data){
+       .clkr.hw.init = &(const struct clk_init_data) {
                .name = "disp_cc_mdss_vsync_clk_src",
                .parent_data = disp_cc_parent_data_1,
                .num_parents = ARRAY_SIZE(disp_cc_parent_data_1),
@@ -640,7 +640,7 @@ static struct clk_regmap_div disp_cc_mdss_byte0_div_clk_src = {
        .reg = 0x2128,
        .shift = 0,
        .width = 2,
-       .clkr.hw.init = &(struct clk_init_data) {
+       .clkr.hw.init = &(const struct clk_init_data) {
                .name = "disp_cc_mdss_byte0_div_clk_src",
                .parent_hws = (const struct clk_hw*[]){
                        &disp_cc_mdss_byte0_clk_src.clkr.hw,
@@ -655,7 +655,7 @@ static struct clk_regmap_div disp_cc_mdss_byte1_div_clk_src = {
        .reg = 0x2144,
        .shift = 0,
        .width = 2,
-       .clkr.hw.init = &(struct clk_init_data) {
+       .clkr.hw.init = &(const struct clk_init_data) {
                .name = "disp_cc_mdss_byte1_div_clk_src",
                .parent_hws = (const struct clk_hw*[]){
                        &disp_cc_mdss_byte1_clk_src.clkr.hw,
@@ -665,12 +665,11 @@ static struct clk_regmap_div disp_cc_mdss_byte1_div_clk_src = {
        },
 };
 
-
 static struct clk_regmap_div disp_cc_mdss_dp_link1_div_clk_src = {
        .reg = 0x2224,
        .shift = 0,
        .width = 2,
-       .clkr.hw.init = &(struct clk_init_data) {
+       .clkr.hw.init = &(const struct clk_init_data) {
                .name = "disp_cc_mdss_dp_link1_div_clk_src",
                .parent_hws = (const struct clk_hw*[]){
                        &disp_cc_mdss_dp_link1_clk_src.clkr.hw,
@@ -680,12 +679,11 @@ static struct clk_regmap_div disp_cc_mdss_dp_link1_div_clk_src = {
        },
 };
 
-
 static struct clk_regmap_div disp_cc_mdss_dp_link_div_clk_src = {
        .reg = 0x2190,
        .shift = 0,
        .width = 2,
-       .clkr.hw.init = &(struct clk_init_data) {
+       .clkr.hw.init = &(const struct clk_init_data) {
                .name = "disp_cc_mdss_dp_link_div_clk_src",
                .parent_hws = (const struct clk_hw*[]){
                        &disp_cc_mdss_dp_link_clk_src.clkr.hw,
@@ -701,7 +699,7 @@ static struct clk_branch disp_cc_mdss_ahb_clk = {
        .clkr = {
                .enable_reg = 0x2080,
                .enable_mask = BIT(0),
-               .hw.init = &(struct clk_init_data){
+               .hw.init = &(const struct clk_init_data) {
                        .name = "disp_cc_mdss_ahb_clk",
                        .parent_hws = (const struct clk_hw*[]){
                                &disp_cc_mdss_ahb_clk_src.clkr.hw,
@@ -719,7 +717,7 @@ static struct clk_branch disp_cc_mdss_byte0_clk = {
        .clkr = {
                .enable_reg = 0x2028,
                .enable_mask = BIT(0),
-               .hw.init = &(struct clk_init_data){
+               .hw.init = &(const struct clk_init_data) {
                        .name = "disp_cc_mdss_byte0_clk",
                        .parent_hws = (const struct clk_hw*[]){
                                &disp_cc_mdss_byte0_clk_src.clkr.hw,
@@ -737,7 +735,7 @@ static struct clk_branch disp_cc_mdss_byte0_intf_clk = {
        .clkr = {
                .enable_reg = 0x202c,
                .enable_mask = BIT(0),
-               .hw.init = &(struct clk_init_data){
+               .hw.init = &(const struct clk_init_data) {
                        .name = "disp_cc_mdss_byte0_intf_clk",
                        .parent_hws = (const struct clk_hw*[]){
                                &disp_cc_mdss_byte0_div_clk_src.clkr.hw,
@@ -755,7 +753,7 @@ static struct clk_branch disp_cc_mdss_byte1_clk = {
        .clkr = {
                .enable_reg = 0x2030,
                .enable_mask = BIT(0),
-               .hw.init = &(struct clk_init_data){
+               .hw.init = &(const struct clk_init_data) {
                        .name = "disp_cc_mdss_byte1_clk",
                        .parent_hws = (const struct clk_hw*[]){
                                &disp_cc_mdss_byte1_clk_src.clkr.hw,
@@ -773,7 +771,7 @@ static struct clk_branch disp_cc_mdss_byte1_intf_clk = {
        .clkr = {
                .enable_reg = 0x2034,
                .enable_mask = BIT(0),
-               .hw.init = &(struct clk_init_data){
+               .hw.init = &(const struct clk_init_data) {
                        .name = "disp_cc_mdss_byte1_intf_clk",
                        .parent_hws = (const struct clk_hw*[]){
                                &disp_cc_mdss_byte1_div_clk_src.clkr.hw,
@@ -791,7 +789,7 @@ static struct clk_branch disp_cc_mdss_dp_aux1_clk = {
        .clkr = {
                .enable_reg = 0x2068,
                .enable_mask = BIT(0),
-               .hw.init = &(struct clk_init_data){
+               .hw.init = &(const struct clk_init_data) {
                        .name = "disp_cc_mdss_dp_aux1_clk",
                        .parent_hws = (const struct clk_hw*[]){
                                &disp_cc_mdss_dp_aux1_clk_src.clkr.hw,
@@ -809,7 +807,7 @@ static struct clk_branch disp_cc_mdss_dp_aux_clk = {
        .clkr = {
                .enable_reg = 0x2054,
                .enable_mask = BIT(0),
-               .hw.init = &(struct clk_init_data){
+               .hw.init = &(const struct clk_init_data) {
                        .name = "disp_cc_mdss_dp_aux_clk",
                        .parent_hws = (const struct clk_hw*[]){
                                &disp_cc_mdss_dp_aux_clk_src.clkr.hw,
@@ -827,7 +825,7 @@ static struct clk_branch disp_cc_mdss_dp_link1_clk = {
        .clkr = {
                .enable_reg = 0x205c,
                .enable_mask = BIT(0),
-               .hw.init = &(struct clk_init_data){
+               .hw.init = &(const struct clk_init_data) {
                        .name = "disp_cc_mdss_dp_link1_clk",
                        .parent_hws = (const struct clk_hw*[]){
                                &disp_cc_mdss_dp_link1_clk_src.clkr.hw,
@@ -845,7 +843,7 @@ static struct clk_branch disp_cc_mdss_dp_link1_intf_clk = {
        .clkr = {
                .enable_reg = 0x2060,
                .enable_mask = BIT(0),
-               .hw.init = &(struct clk_init_data){
+               .hw.init = &(const struct clk_init_data) {
                        .name = "disp_cc_mdss_dp_link1_intf_clk",
                        .parent_hws = (const struct clk_hw*[]){
                                &disp_cc_mdss_dp_link1_div_clk_src.clkr.hw,
@@ -862,7 +860,7 @@ static struct clk_branch disp_cc_mdss_dp_link_clk = {
        .clkr = {
                .enable_reg = 0x2040,
                .enable_mask = BIT(0),
-               .hw.init = &(struct clk_init_data){
+               .hw.init = &(const struct clk_init_data) {
                        .name = "disp_cc_mdss_dp_link_clk",
                        .parent_hws = (const struct clk_hw*[]){
                                &disp_cc_mdss_dp_link_clk_src.clkr.hw,
@@ -880,7 +878,7 @@ static struct clk_branch disp_cc_mdss_dp_link_intf_clk = {
        .clkr = {
                .enable_reg = 0x2044,
                .enable_mask = BIT(0),
-               .hw.init = &(struct clk_init_data){
+               .hw.init = &(const struct clk_init_data) {
                        .name = "disp_cc_mdss_dp_link_intf_clk",
                        .parent_hws = (const struct clk_hw*[]){
                                &disp_cc_mdss_dp_link_div_clk_src.clkr.hw,
@@ -897,7 +895,7 @@ static struct clk_branch disp_cc_mdss_dp_pixel1_clk = {
        .clkr = {
                .enable_reg = 0x2050,
                .enable_mask = BIT(0),
-               .hw.init = &(struct clk_init_data){
+               .hw.init = &(const struct clk_init_data) {
                        .name = "disp_cc_mdss_dp_pixel1_clk",
                        .parent_hws = (const struct clk_hw*[]){
                                &disp_cc_mdss_dp_pixel1_clk_src.clkr.hw,
@@ -915,7 +913,7 @@ static struct clk_branch disp_cc_mdss_dp_pixel2_clk = {
        .clkr = {
                .enable_reg = 0x2058,
                .enable_mask = BIT(0),
-               .hw.init = &(struct clk_init_data){
+               .hw.init = &(const struct clk_init_data) {
                        .name = "disp_cc_mdss_dp_pixel2_clk",
                        .parent_hws = (const struct clk_hw*[]){
                                &disp_cc_mdss_dp_pixel2_clk_src.clkr.hw,
@@ -933,7 +931,7 @@ static struct clk_branch disp_cc_mdss_dp_pixel_clk = {
        .clkr = {
                .enable_reg = 0x204c,
                .enable_mask = BIT(0),
-               .hw.init = &(struct clk_init_data){
+               .hw.init = &(const struct clk_init_data) {
                        .name = "disp_cc_mdss_dp_pixel_clk",
                        .parent_hws = (const struct clk_hw*[]){
                                &disp_cc_mdss_dp_pixel_clk_src.clkr.hw,
@@ -951,7 +949,7 @@ static struct clk_branch disp_cc_mdss_esc0_clk = {
        .clkr = {
                .enable_reg = 0x2038,
                .enable_mask = BIT(0),
-               .hw.init = &(struct clk_init_data){
+               .hw.init = &(const struct clk_init_data) {
                        .name = "disp_cc_mdss_esc0_clk",
                        .parent_hws = (const struct clk_hw*[]){
                                &disp_cc_mdss_esc0_clk_src.clkr.hw,
@@ -969,7 +967,7 @@ static struct clk_branch disp_cc_mdss_esc1_clk = {
        .clkr = {
                .enable_reg = 0x203c,
                .enable_mask = BIT(0),
-               .hw.init = &(struct clk_init_data){
+               .hw.init = &(const struct clk_init_data) {
                        .name = "disp_cc_mdss_esc1_clk",
                        .parent_hws = (const struct clk_hw*[]){
                                &disp_cc_mdss_esc1_clk_src.clkr.hw,
@@ -987,7 +985,7 @@ static struct clk_branch disp_cc_mdss_mdp_clk = {
        .clkr = {
                .enable_reg = 0x200c,
                .enable_mask = BIT(0),
-               .hw.init = &(struct clk_init_data){
+               .hw.init = &(const struct clk_init_data) {
                        .name = "disp_cc_mdss_mdp_clk",
                        .parent_hws = (const struct clk_hw*[]){
                                &disp_cc_mdss_mdp_clk_src.clkr.hw,
@@ -1005,7 +1003,7 @@ static struct clk_branch disp_cc_mdss_mdp_lut_clk = {
        .clkr = {
                .enable_reg = 0x201c,
                .enable_mask = BIT(0),
-               .hw.init = &(struct clk_init_data){
+               .hw.init = &(const struct clk_init_data) {
                        .name = "disp_cc_mdss_mdp_lut_clk",
                        .parent_hws = (const struct clk_hw*[]){
                                &disp_cc_mdss_mdp_clk_src.clkr.hw,
@@ -1022,7 +1020,7 @@ static struct clk_branch disp_cc_mdss_non_gdsc_ahb_clk = {
        .clkr = {
                .enable_reg = 0x4004,
                .enable_mask = BIT(0),
-               .hw.init = &(struct clk_init_data){
+               .hw.init = &(const struct clk_init_data) {
                        .name = "disp_cc_mdss_non_gdsc_ahb_clk",
                        .parent_hws = (const struct clk_hw*[]){
                                &disp_cc_mdss_ahb_clk_src.clkr.hw,
@@ -1040,7 +1038,7 @@ static struct clk_branch disp_cc_mdss_pclk0_clk = {
        .clkr = {
                .enable_reg = 0x2004,
                .enable_mask = BIT(0),
-               .hw.init = &(struct clk_init_data){
+               .hw.init = &(const struct clk_init_data) {
                        .name = "disp_cc_mdss_pclk0_clk",
                        .parent_hws = (const struct clk_hw*[]){
                                &disp_cc_mdss_pclk0_clk_src.clkr.hw,
@@ -1058,7 +1056,7 @@ static struct clk_branch disp_cc_mdss_pclk1_clk = {
        .clkr = {
                .enable_reg = 0x2008,
                .enable_mask = BIT(0),
-               .hw.init = &(struct clk_init_data){
+               .hw.init = &(const struct clk_init_data) {
                        .name = "disp_cc_mdss_pclk1_clk",
                        .parent_hws = (const struct clk_hw*[]){
                                &disp_cc_mdss_pclk1_clk_src.clkr.hw,
@@ -1076,7 +1074,7 @@ static struct clk_branch disp_cc_mdss_rot_clk = {
        .clkr = {
                .enable_reg = 0x2014,
                .enable_mask = BIT(0),
-               .hw.init = &(struct clk_init_data){
+               .hw.init = &(const struct clk_init_data) {
                        .name = "disp_cc_mdss_rot_clk",
                        .parent_hws = (const struct clk_hw*[]){
                                &disp_cc_mdss_rot_clk_src.clkr.hw,
@@ -1094,7 +1092,7 @@ static struct clk_branch disp_cc_mdss_rscc_ahb_clk = {
        .clkr = {
                .enable_reg = 0x400c,
                .enable_mask = BIT(0),
-               .hw.init = &(struct clk_init_data){
+               .hw.init = &(const struct clk_init_data) {
                        .name = "disp_cc_mdss_rscc_ahb_clk",
                        .parent_hws = (const struct clk_hw*[]){
                                &disp_cc_mdss_ahb_clk_src.clkr.hw,
@@ -1112,7 +1110,7 @@ static struct clk_branch disp_cc_mdss_rscc_vsync_clk = {
        .clkr = {
                .enable_reg = 0x4008,
                .enable_mask = BIT(0),
-               .hw.init = &(struct clk_init_data){
+               .hw.init = &(const struct clk_init_data) {
                        .name = "disp_cc_mdss_rscc_vsync_clk",
                        .parent_hws = (const struct clk_hw*[]){
                                &disp_cc_mdss_vsync_clk_src.clkr.hw,
@@ -1130,7 +1128,7 @@ static struct clk_branch disp_cc_mdss_vsync_clk = {
        .clkr = {
                .enable_reg = 0x2024,
                .enable_mask = BIT(0),
-               .hw.init = &(struct clk_init_data){
+               .hw.init = &(const struct clk_init_data) {
                        .name = "disp_cc_mdss_vsync_clk",
                        .parent_hws = (const struct clk_hw*[]){
                                &disp_cc_mdss_vsync_clk_src.clkr.hw,
@@ -1365,8 +1363,8 @@ static int disp_cc_sm8250_probe(struct platform_device *pdev)
        /* Enable clock gating for MDP clocks */
        regmap_update_bits(regmap, 0x8000, 0x10, 0x10);
 
-       /* DISP_CC_XO_CLK always-on */
-       regmap_update_bits(regmap, 0x605c, BIT(0), BIT(0));
+       /* Keep some clocks always-on */
+       qcom_branch_set_clk_en(regmap, 0x605c); /* DISP_CC_XO_CLK */
 
        ret = qcom_cc_really_probe(pdev, &disp_cc_sm8250_desc, regmap);
 
@@ -1383,17 +1381,7 @@ static struct platform_driver disp_cc_sm8250_driver = {
        },
 };
 
-static int __init disp_cc_sm8250_init(void)
-{
-       return platform_driver_register(&disp_cc_sm8250_driver);
-}
-subsys_initcall(disp_cc_sm8250_init);
-
-static void __exit disp_cc_sm8250_exit(void)
-{
-       platform_driver_unregister(&disp_cc_sm8250_driver);
-}
-module_exit(disp_cc_sm8250_exit);
+module_platform_driver(disp_cc_sm8250_driver);
 
 MODULE_DESCRIPTION("QTI DISPCC SM8250 Driver");
 MODULE_LICENSE("GPL v2");
index 2c4aecd75186b0d6e777bd280204076d4af1c843..92e9c4e7b13dcc95932b4c16ca6561910edb4d4c 100644 (file)
@@ -1787,11 +1787,8 @@ static int disp_cc_sm8450_probe(struct platform_device *pdev)
        /* Enable clock gating for MDP clocks */
        regmap_update_bits(regmap, DISP_CC_MISC_CMD, 0x10, 0x10);
 
-       /*
-        * Keep clocks always enabled:
-        *      disp_cc_xo_clk
-        */
-       regmap_update_bits(regmap, 0xe05c, BIT(0), BIT(0));
+       /* Keep some clocks always-on */
+       qcom_branch_set_clk_en(regmap, 0xe05c); /* DISP_CC_XO_CLK */
 
        ret = qcom_cc_really_probe(pdev, &disp_cc_sm8450_desc, regmap);
        if (ret)
@@ -1815,17 +1812,7 @@ static struct platform_driver disp_cc_sm8450_driver = {
        },
 };
 
-static int __init disp_cc_sm8450_init(void)
-{
-       return platform_driver_register(&disp_cc_sm8450_driver);
-}
-subsys_initcall(disp_cc_sm8450_init);
-
-static void __exit disp_cc_sm8450_exit(void)
-{
-       platform_driver_unregister(&disp_cc_sm8450_driver);
-}
-module_exit(disp_cc_sm8450_exit);
+module_platform_driver(disp_cc_sm8450_driver);
 
 MODULE_DESCRIPTION("QTI DISPCC SM8450 Driver");
 MODULE_LICENSE("GPL");
index f96d8b81fd9adbadacd81f91933c633c84f8fc2d..3672c73ac11c692f8c133dabc4cd978bd6fafbae 100644 (file)
@@ -1780,11 +1780,8 @@ static int disp_cc_sm8550_probe(struct platform_device *pdev)
        /* Enable clock gating for MDP clocks */
        regmap_update_bits(regmap, DISP_CC_MISC_CMD, 0x10, 0x10);
 
-       /*
-        * Keep clocks always enabled:
-        *      disp_cc_xo_clk
-        */
-       regmap_update_bits(regmap, 0xe054, BIT(0), BIT(0));
+       /* Keep some clocks always-on */
+       qcom_branch_set_clk_en(regmap, 0xe054); /* DISP_CC_XO_CLK */
 
        ret = qcom_cc_really_probe(pdev, &disp_cc_sm8550_desc, regmap);
        if (ret)
@@ -1808,17 +1805,7 @@ static struct platform_driver disp_cc_sm8550_driver = {
        },
 };
 
-static int __init disp_cc_sm8550_init(void)
-{
-       return platform_driver_register(&disp_cc_sm8550_driver);
-}
-subsys_initcall(disp_cc_sm8550_init);
-
-static void __exit disp_cc_sm8550_exit(void)
-{
-       platform_driver_unregister(&disp_cc_sm8550_driver);
-}
-module_exit(disp_cc_sm8550_exit);
+module_platform_driver(disp_cc_sm8550_driver);
 
 MODULE_DESCRIPTION("QTI DISPCC SM8550 Driver");
 MODULE_LICENSE("GPL");
index f3b1d9d16baeab189efaa7387b1f4970cb711150..9539db0d91145c4a101263d1a2d4f84a877055ca 100644 (file)
@@ -1777,8 +1777,8 @@ static int disp_cc_sm8650_probe(struct platform_device *pdev)
        /* Enable clock gating for MDP clocks */
        regmap_update_bits(regmap, DISP_CC_MISC_CMD, 0x10, 0x10);
 
-       /* Keep clocks always enabled */
-       regmap_update_bits(regmap, 0xe054, BIT(0), BIT(0)); /* disp_cc_xo_clk */
+       /* Keep some clocks always-on */
+       qcom_branch_set_clk_en(regmap, 0xe054); /* DISP_CC_XO_CLK */
 
        ret = qcom_cc_really_probe(pdev, &disp_cc_sm8650_desc, regmap);
        if (ret)
@@ -1802,17 +1802,7 @@ static struct platform_driver disp_cc_sm8650_driver = {
        },
 };
 
-static int __init disp_cc_sm8650_init(void)
-{
-       return platform_driver_register(&disp_cc_sm8650_driver);
-}
-subsys_initcall(disp_cc_sm8650_init);
-
-static void __exit disp_cc_sm8650_exit(void)
-{
-       platform_driver_unregister(&disp_cc_sm8650_driver);
-}
-module_exit(disp_cc_sm8650_exit);
+module_platform_driver(disp_cc_sm8650_driver);
 
 MODULE_DESCRIPTION("QTI DISPCC SM8650 Driver");
 MODULE_LICENSE("GPL");
diff --git a/drivers/clk/qcom/dispcc-x1e80100.c b/drivers/clk/qcom/dispcc-x1e80100.c
new file mode 100644 (file)
index 0000000..0b2ee64
--- /dev/null
@@ -0,0 +1,1718 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * Copyright (c) 2023, Qualcomm Innovation Center, Inc. All rights reserved.
+ */
+
+#include <linux/clk-provider.h>
+#include <linux/err.h>
+#include <linux/kernel.h>
+#include <linux/mod_devicetable.h>
+#include <linux/module.h>
+#include <linux/platform_device.h>
+#include <linux/pm_runtime.h>
+#include <linux/regmap.h>
+
+#include <dt-bindings/clock/qcom,x1e80100-dispcc.h>
+
+#include "common.h"
+#include "clk-alpha-pll.h"
+#include "clk-branch.h"
+#include "clk-pll.h"
+#include "clk-rcg.h"
+#include "clk-regmap.h"
+#include "clk-regmap-divider.h"
+#include "reset.h"
+#include "gdsc.h"
+
+/* Need to match the order of clocks in DT binding */
+enum {
+       DT_BI_TCXO,
+       DT_BI_TCXO_AO,
+       DT_AHB_CLK,
+       DT_SLEEP_CLK,
+
+       DT_DSI0_PHY_PLL_OUT_BYTECLK,
+       DT_DSI0_PHY_PLL_OUT_DSICLK,
+       DT_DSI1_PHY_PLL_OUT_BYTECLK,
+       DT_DSI1_PHY_PLL_OUT_DSICLK,
+
+       DT_DP0_PHY_PLL_LINK_CLK,
+       DT_DP0_PHY_PLL_VCO_DIV_CLK,
+       DT_DP1_PHY_PLL_LINK_CLK,
+       DT_DP1_PHY_PLL_VCO_DIV_CLK,
+       DT_DP2_PHY_PLL_LINK_CLK,
+       DT_DP2_PHY_PLL_VCO_DIV_CLK,
+       DT_DP3_PHY_PLL_LINK_CLK,
+       DT_DP3_PHY_PLL_VCO_DIV_CLK,
+};
+
+#define DISP_CC_MISC_CMD       0xF000
+
+enum {
+       P_BI_TCXO,
+       P_BI_TCXO_AO,
+       P_DISP_CC_PLL0_OUT_MAIN,
+       P_DISP_CC_PLL1_OUT_EVEN,
+       P_DISP_CC_PLL1_OUT_MAIN,
+       P_DP0_PHY_PLL_LINK_CLK,
+       P_DP0_PHY_PLL_VCO_DIV_CLK,
+       P_DP1_PHY_PLL_LINK_CLK,
+       P_DP1_PHY_PLL_VCO_DIV_CLK,
+       P_DP2_PHY_PLL_LINK_CLK,
+       P_DP2_PHY_PLL_VCO_DIV_CLK,
+       P_DP3_PHY_PLL_LINK_CLK,
+       P_DP3_PHY_PLL_VCO_DIV_CLK,
+       P_DSI0_PHY_PLL_OUT_BYTECLK,
+       P_DSI0_PHY_PLL_OUT_DSICLK,
+       P_DSI1_PHY_PLL_OUT_BYTECLK,
+       P_DSI1_PHY_PLL_OUT_DSICLK,
+       P_SLEEP_CLK,
+};
+
+static const struct pll_vco lucid_ole_vco[] = {
+       { 249600000, 2300000000, 0 },
+};
+
+static const struct alpha_pll_config disp_cc_pll0_config = {
+       .l = 0xd,
+       .alpha = 0x6492,
+       .config_ctl_val = 0x20485699,
+       .config_ctl_hi_val = 0x00182261,
+       .config_ctl_hi1_val = 0x82aa299c,
+       .test_ctl_val = 0x00000000,
+       .test_ctl_hi_val = 0x00000003,
+       .test_ctl_hi1_val = 0x00009000,
+       .test_ctl_hi2_val = 0x00000034,
+       .user_ctl_val = 0x00000000,
+       .user_ctl_hi_val = 0x00000005,
+};
+
+static struct clk_alpha_pll disp_cc_pll0 = {
+       .offset = 0x0,
+       .vco_table = lucid_ole_vco,
+       .num_vco = ARRAY_SIZE(lucid_ole_vco),
+       .regs = clk_alpha_pll_regs[CLK_ALPHA_PLL_TYPE_LUCID_OLE],
+       .clkr = {
+               .hw.init = &(const struct clk_init_data) {
+                       .name = "disp_cc_pll0",
+                       .parent_data = &(const struct clk_parent_data) {
+                               .index = DT_BI_TCXO,
+                       },
+                       .num_parents = 1,
+                       .ops = &clk_alpha_pll_reset_lucid_ole_ops,
+               },
+       },
+};
+
+static const struct alpha_pll_config disp_cc_pll1_config = {
+       .l = 0x1f,
+       .alpha = 0x4000,
+       .config_ctl_val = 0x20485699,
+       .config_ctl_hi_val = 0x00182261,
+       .config_ctl_hi1_val = 0x82aa299c,
+       .test_ctl_val = 0x00000000,
+       .test_ctl_hi_val = 0x00000003,
+       .test_ctl_hi1_val = 0x00009000,
+       .test_ctl_hi2_val = 0x00000034,
+       .user_ctl_val = 0x00000000,
+       .user_ctl_hi_val = 0x00000005,
+};
+
+static struct clk_alpha_pll disp_cc_pll1 = {
+       .offset = 0x1000,
+       .vco_table = lucid_ole_vco,
+       .num_vco = ARRAY_SIZE(lucid_ole_vco),
+       .regs = clk_alpha_pll_regs[CLK_ALPHA_PLL_TYPE_LUCID_OLE],
+       .clkr = {
+               .hw.init = &(const struct clk_init_data) {
+                       .name = "disp_cc_pll1",
+                       .parent_data = &(const struct clk_parent_data) {
+                               .index = DT_BI_TCXO,
+                       },
+                       .num_parents = 1,
+                       .ops = &clk_alpha_pll_reset_lucid_ole_ops,
+               },
+       },
+};
+
+static const struct parent_map disp_cc_parent_map_0[] = {
+       { P_BI_TCXO, 0 },
+       { P_DP0_PHY_PLL_LINK_CLK, 1 },
+       { P_DP0_PHY_PLL_VCO_DIV_CLK, 2 },
+       { P_DP3_PHY_PLL_VCO_DIV_CLK, 3 },
+       { P_DP1_PHY_PLL_VCO_DIV_CLK, 4 },
+       { P_DP2_PHY_PLL_VCO_DIV_CLK, 6 },
+};
+
+static const struct clk_parent_data disp_cc_parent_data_0[] = {
+       { .index = DT_BI_TCXO },
+       { .index = DT_DP0_PHY_PLL_LINK_CLK },
+       { .index = DT_DP0_PHY_PLL_VCO_DIV_CLK },
+       { .index = DT_DP3_PHY_PLL_VCO_DIV_CLK },
+       { .index = DT_DP1_PHY_PLL_VCO_DIV_CLK },
+       { .index = DT_DP2_PHY_PLL_VCO_DIV_CLK },
+};
+
+static const struct parent_map disp_cc_parent_map_1[] = {
+       { P_BI_TCXO, 0 },
+};
+
+static const struct clk_parent_data disp_cc_parent_data_1[] = {
+       { .index = DT_BI_TCXO },
+};
+
+static const struct clk_parent_data disp_cc_parent_data_1_ao[] = {
+       { .index = DT_BI_TCXO_AO },
+};
+
+static const struct parent_map disp_cc_parent_map_2[] = {
+       { P_BI_TCXO, 0 },
+       { P_DSI0_PHY_PLL_OUT_DSICLK, 1 },
+       { P_DSI0_PHY_PLL_OUT_BYTECLK, 2 },
+       { P_DSI1_PHY_PLL_OUT_DSICLK, 3 },
+       { P_DSI1_PHY_PLL_OUT_BYTECLK, 4 },
+};
+
+static const struct clk_parent_data disp_cc_parent_data_2[] = {
+       { .index = DT_BI_TCXO },
+       { .index = DT_DSI0_PHY_PLL_OUT_DSICLK },
+       { .index = DT_DSI0_PHY_PLL_OUT_BYTECLK },
+       { .index = DT_DSI1_PHY_PLL_OUT_DSICLK },
+       { .index = DT_DSI1_PHY_PLL_OUT_BYTECLK },
+};
+
+static const struct parent_map disp_cc_parent_map_3[] = {
+       { P_BI_TCXO, 0 },
+       { P_DP0_PHY_PLL_LINK_CLK, 1 },
+       { P_DP1_PHY_PLL_LINK_CLK, 2 },
+       { P_DP2_PHY_PLL_LINK_CLK, 3 },
+       { P_DP3_PHY_PLL_LINK_CLK, 4 },
+};
+
+static const struct clk_parent_data disp_cc_parent_data_3[] = {
+       { .index = DT_BI_TCXO },
+       { .index = DT_DP0_PHY_PLL_LINK_CLK },
+       { .index = DT_DP1_PHY_PLL_LINK_CLK },
+       { .index = DT_DP2_PHY_PLL_LINK_CLK },
+       { .index = DT_DP3_PHY_PLL_LINK_CLK },
+};
+
+static const struct parent_map disp_cc_parent_map_4[] = {
+       { P_BI_TCXO, 0 },
+       { P_DSI0_PHY_PLL_OUT_BYTECLK, 2 },
+       { P_DSI1_PHY_PLL_OUT_BYTECLK, 4 },
+};
+
+static const struct clk_parent_data disp_cc_parent_data_4[] = {
+       { .index = DT_BI_TCXO },
+       { .index = DT_DSI0_PHY_PLL_OUT_BYTECLK },
+       { .index = DT_DSI1_PHY_PLL_OUT_BYTECLK },
+};
+
+static const struct parent_map disp_cc_parent_map_5[] = {
+       { P_BI_TCXO, 0 },
+       { P_DISP_CC_PLL1_OUT_MAIN, 4 },
+       { P_DISP_CC_PLL1_OUT_EVEN, 6 },
+};
+
+static const struct clk_parent_data disp_cc_parent_data_5[] = {
+       { .index = DT_BI_TCXO },
+       { .hw = &disp_cc_pll1.clkr.hw },
+       { .hw = &disp_cc_pll1.clkr.hw },
+};
+
+static const struct parent_map disp_cc_parent_map_6[] = {
+       { P_BI_TCXO, 0 },
+       { P_DISP_CC_PLL0_OUT_MAIN, 1 },
+       { P_DISP_CC_PLL1_OUT_MAIN, 4 },
+       { P_DISP_CC_PLL1_OUT_EVEN, 6 },
+};
+
+static const struct clk_parent_data disp_cc_parent_data_6[] = {
+       { .index = DT_BI_TCXO },
+       { .hw = &disp_cc_pll0.clkr.hw },
+       { .hw = &disp_cc_pll1.clkr.hw },
+       { .hw = &disp_cc_pll1.clkr.hw },
+};
+
+static const struct parent_map disp_cc_parent_map_7[] = {
+       { P_SLEEP_CLK, 0 },
+};
+
+static const struct clk_parent_data disp_cc_parent_data_7[] = {
+       { .index = DT_SLEEP_CLK },
+};
+
+static const struct freq_tbl ftbl_disp_cc_mdss_ahb_clk_src[] = {
+       F(19200000, P_BI_TCXO, 1, 0, 0),
+       F(37500000, P_DISP_CC_PLL1_OUT_MAIN, 16, 0, 0),
+       F(75000000, P_DISP_CC_PLL1_OUT_MAIN, 8, 0, 0),
+       { }
+};
+
+static struct clk_rcg2 disp_cc_mdss_ahb_clk_src = {
+       .cmd_rcgr = 0x82ec,
+       .mnd_width = 0,
+       .hid_width = 5,
+       .parent_map = disp_cc_parent_map_5,
+       .freq_tbl = ftbl_disp_cc_mdss_ahb_clk_src,
+       .clkr.hw.init = &(const struct clk_init_data) {
+               .name = "disp_cc_mdss_ahb_clk_src",
+               .parent_data = disp_cc_parent_data_5,
+               .num_parents = ARRAY_SIZE(disp_cc_parent_data_5),
+               .flags = CLK_SET_RATE_PARENT,
+               .ops = &clk_rcg2_ops,
+       },
+};
+
+static const struct freq_tbl ftbl_disp_cc_mdss_byte0_clk_src[] = {
+       F(19200000, P_BI_TCXO, 1, 0, 0),
+       { }
+};
+
+static struct clk_rcg2 disp_cc_mdss_byte0_clk_src = {
+       .cmd_rcgr = 0x810c,
+       .mnd_width = 0,
+       .hid_width = 5,
+       .parent_map = disp_cc_parent_map_2,
+       .freq_tbl = ftbl_disp_cc_mdss_byte0_clk_src,
+       .clkr.hw.init = &(const struct clk_init_data) {
+               .name = "disp_cc_mdss_byte0_clk_src",
+               .parent_data = disp_cc_parent_data_2,
+               .num_parents = ARRAY_SIZE(disp_cc_parent_data_2),
+               .flags = CLK_SET_RATE_PARENT,
+               .ops = &clk_byte2_ops,
+       },
+};
+
+static struct clk_rcg2 disp_cc_mdss_byte1_clk_src = {
+       .cmd_rcgr = 0x8128,
+       .mnd_width = 0,
+       .hid_width = 5,
+       .parent_map = disp_cc_parent_map_2,
+       .freq_tbl = ftbl_disp_cc_mdss_byte0_clk_src,
+       .clkr.hw.init = &(const struct clk_init_data) {
+               .name = "disp_cc_mdss_byte1_clk_src",
+               .parent_data = disp_cc_parent_data_2,
+               .num_parents = ARRAY_SIZE(disp_cc_parent_data_2),
+               .flags = CLK_SET_RATE_PARENT,
+               .ops = &clk_byte2_ops,
+       },
+};
+
+static struct clk_rcg2 disp_cc_mdss_dptx0_aux_clk_src = {
+       .cmd_rcgr = 0x81c0,
+       .mnd_width = 0,
+       .hid_width = 5,
+       .parent_map = disp_cc_parent_map_1,
+       .freq_tbl = ftbl_disp_cc_mdss_byte0_clk_src,
+       .clkr.hw.init = &(const struct clk_init_data) {
+               .name = "disp_cc_mdss_dptx0_aux_clk_src",
+               .parent_data = disp_cc_parent_data_1,
+               .num_parents = ARRAY_SIZE(disp_cc_parent_data_1),
+               .flags = CLK_SET_RATE_PARENT,
+               .ops = &clk_rcg2_ops,
+       },
+};
+
+static struct clk_rcg2 disp_cc_mdss_dptx0_link_clk_src = {
+       .cmd_rcgr = 0x8174,
+       .mnd_width = 0,
+       .hid_width = 5,
+       .parent_map = disp_cc_parent_map_3,
+       .freq_tbl = ftbl_disp_cc_mdss_byte0_clk_src,
+       .clkr.hw.init = &(const struct clk_init_data) {
+               .name = "disp_cc_mdss_dptx0_link_clk_src",
+               .parent_data = disp_cc_parent_data_3,
+               .num_parents = ARRAY_SIZE(disp_cc_parent_data_3),
+               .flags = CLK_SET_RATE_PARENT,
+               .ops = &clk_byte2_ops,
+       },
+};
+
+static struct clk_rcg2 disp_cc_mdss_dptx0_pixel0_clk_src = {
+       .cmd_rcgr = 0x8190,
+       .mnd_width = 16,
+       .hid_width = 5,
+       .parent_map = disp_cc_parent_map_0,
+       .freq_tbl = ftbl_disp_cc_mdss_byte0_clk_src,
+       .clkr.hw.init = &(const struct clk_init_data) {
+               .name = "disp_cc_mdss_dptx0_pixel0_clk_src",
+               .parent_data = disp_cc_parent_data_0,
+               .num_parents = ARRAY_SIZE(disp_cc_parent_data_0),
+               .flags = CLK_SET_RATE_PARENT,
+               .ops = &clk_dp_ops,
+       },
+};
+
+static struct clk_rcg2 disp_cc_mdss_dptx0_pixel1_clk_src = {
+       .cmd_rcgr = 0x81a8,
+       .mnd_width = 16,
+       .hid_width = 5,
+       .parent_map = disp_cc_parent_map_0,
+       .freq_tbl = ftbl_disp_cc_mdss_byte0_clk_src,
+       .clkr.hw.init = &(const struct clk_init_data) {
+               .name = "disp_cc_mdss_dptx0_pixel1_clk_src",
+               .parent_data = disp_cc_parent_data_0,
+               .num_parents = ARRAY_SIZE(disp_cc_parent_data_0),
+               .flags = CLK_SET_RATE_PARENT,
+               .ops = &clk_dp_ops,
+       },
+};
+
+static struct clk_rcg2 disp_cc_mdss_dptx1_aux_clk_src = {
+       .cmd_rcgr = 0x8224,
+       .mnd_width = 0,
+       .hid_width = 5,
+       .parent_map = disp_cc_parent_map_1,
+       .freq_tbl = ftbl_disp_cc_mdss_byte0_clk_src,
+       .clkr.hw.init = &(const struct clk_init_data) {
+               .name = "disp_cc_mdss_dptx1_aux_clk_src",
+               .parent_data = disp_cc_parent_data_1,
+               .num_parents = ARRAY_SIZE(disp_cc_parent_data_1),
+               .flags = CLK_SET_RATE_PARENT,
+               .ops = &clk_rcg2_ops,
+       },
+};
+
+static struct clk_rcg2 disp_cc_mdss_dptx1_link_clk_src = {
+       .cmd_rcgr = 0x8208,
+       .mnd_width = 0,
+       .hid_width = 5,
+       .parent_map = disp_cc_parent_map_3,
+       .freq_tbl = ftbl_disp_cc_mdss_byte0_clk_src,
+       .clkr.hw.init = &(const struct clk_init_data) {
+               .name = "disp_cc_mdss_dptx1_link_clk_src",
+               .parent_data = disp_cc_parent_data_3,
+               .num_parents = ARRAY_SIZE(disp_cc_parent_data_3),
+               .flags = CLK_SET_RATE_PARENT,
+               .ops = &clk_byte2_ops,
+       },
+};
+
+static struct clk_rcg2 disp_cc_mdss_dptx1_pixel0_clk_src = {
+       .cmd_rcgr = 0x81d8,
+       .mnd_width = 16,
+       .hid_width = 5,
+       .parent_map = disp_cc_parent_map_0,
+       .freq_tbl = ftbl_disp_cc_mdss_byte0_clk_src,
+       .clkr.hw.init = &(const struct clk_init_data) {
+               .name = "disp_cc_mdss_dptx1_pixel0_clk_src",
+               .parent_data = disp_cc_parent_data_0,
+               .num_parents = ARRAY_SIZE(disp_cc_parent_data_0),
+               .flags = CLK_SET_RATE_PARENT,
+               .ops = &clk_dp_ops,
+       },
+};
+
+static struct clk_rcg2 disp_cc_mdss_dptx1_pixel1_clk_src = {
+       .cmd_rcgr = 0x81f0,
+       .mnd_width = 16,
+       .hid_width = 5,
+       .parent_map = disp_cc_parent_map_0,
+       .freq_tbl = ftbl_disp_cc_mdss_byte0_clk_src,
+       .clkr.hw.init = &(const struct clk_init_data) {
+               .name = "disp_cc_mdss_dptx1_pixel1_clk_src",
+               .parent_data = disp_cc_parent_data_0,
+               .num_parents = ARRAY_SIZE(disp_cc_parent_data_0),
+               .flags = CLK_SET_RATE_PARENT,
+               .ops = &clk_dp_ops,
+       },
+};
+
+static struct clk_rcg2 disp_cc_mdss_dptx2_aux_clk_src = {
+       .cmd_rcgr = 0x8288,
+       .mnd_width = 0,
+       .hid_width = 5,
+       .parent_map = disp_cc_parent_map_1,
+       .freq_tbl = ftbl_disp_cc_mdss_byte0_clk_src,
+       .clkr.hw.init = &(const struct clk_init_data) {
+               .name = "disp_cc_mdss_dptx2_aux_clk_src",
+               .parent_data = disp_cc_parent_data_1,
+               .num_parents = ARRAY_SIZE(disp_cc_parent_data_1),
+               .flags = CLK_SET_RATE_PARENT,
+               .ops = &clk_rcg2_ops,
+       },
+};
+
+static struct clk_rcg2 disp_cc_mdss_dptx2_link_clk_src = {
+       .cmd_rcgr = 0x823c,
+       .mnd_width = 0,
+       .hid_width = 5,
+       .parent_map = disp_cc_parent_map_3,
+       .freq_tbl = ftbl_disp_cc_mdss_byte0_clk_src,
+       .clkr.hw.init = &(const struct clk_init_data) {
+               .name = "disp_cc_mdss_dptx2_link_clk_src",
+               .parent_data = disp_cc_parent_data_3,
+               .num_parents = ARRAY_SIZE(disp_cc_parent_data_3),
+               .flags = CLK_SET_RATE_PARENT,
+               .ops = &clk_byte2_ops,
+       },
+};
+
+static struct clk_rcg2 disp_cc_mdss_dptx2_pixel0_clk_src = {
+       .cmd_rcgr = 0x8258,
+       .mnd_width = 16,
+       .hid_width = 5,
+       .parent_map = disp_cc_parent_map_0,
+       .freq_tbl = ftbl_disp_cc_mdss_byte0_clk_src,
+       .clkr.hw.init = &(const struct clk_init_data) {
+               .name = "disp_cc_mdss_dptx2_pixel0_clk_src",
+               .parent_data = disp_cc_parent_data_0,
+               .num_parents = ARRAY_SIZE(disp_cc_parent_data_0),
+               .flags = CLK_SET_RATE_PARENT,
+               .ops = &clk_dp_ops,
+       },
+};
+
+static struct clk_rcg2 disp_cc_mdss_dptx2_pixel1_clk_src = {
+       .cmd_rcgr = 0x8270,
+       .mnd_width = 16,
+       .hid_width = 5,
+       .parent_map = disp_cc_parent_map_0,
+       .freq_tbl = ftbl_disp_cc_mdss_byte0_clk_src,
+       .clkr.hw.init = &(const struct clk_init_data) {
+               .name = "disp_cc_mdss_dptx2_pixel1_clk_src",
+               .parent_data = disp_cc_parent_data_0,
+               .num_parents = ARRAY_SIZE(disp_cc_parent_data_0),
+               .flags = CLK_SET_RATE_PARENT,
+               .ops = &clk_dp_ops,
+       },
+};
+
+static struct clk_rcg2 disp_cc_mdss_dptx3_aux_clk_src = {
+       .cmd_rcgr = 0x82d4,
+       .mnd_width = 0,
+       .hid_width = 5,
+       .parent_map = disp_cc_parent_map_1,
+       .freq_tbl = ftbl_disp_cc_mdss_byte0_clk_src,
+       .clkr.hw.init = &(const struct clk_init_data) {
+               .name = "disp_cc_mdss_dptx3_aux_clk_src",
+               .parent_data = disp_cc_parent_data_1,
+               .num_parents = ARRAY_SIZE(disp_cc_parent_data_1),
+               .flags = CLK_SET_RATE_PARENT,
+               .ops = &clk_rcg2_ops,
+       },
+};
+
+static struct clk_rcg2 disp_cc_mdss_dptx3_link_clk_src = {
+       .cmd_rcgr = 0x82b8,
+       .mnd_width = 0,
+       .hid_width = 5,
+       .parent_map = disp_cc_parent_map_3,
+       .freq_tbl = ftbl_disp_cc_mdss_byte0_clk_src,
+       .clkr.hw.init = &(const struct clk_init_data) {
+               .name = "disp_cc_mdss_dptx3_link_clk_src",
+               .parent_data = disp_cc_parent_data_3,
+               .num_parents = ARRAY_SIZE(disp_cc_parent_data_3),
+               .flags = CLK_SET_RATE_PARENT,
+               .ops = &clk_byte2_ops,
+       },
+};
+
+static struct clk_rcg2 disp_cc_mdss_dptx3_pixel0_clk_src = {
+       .cmd_rcgr = 0x82a0,
+       .mnd_width = 16,
+       .hid_width = 5,
+       .parent_map = disp_cc_parent_map_0,
+       .freq_tbl = ftbl_disp_cc_mdss_byte0_clk_src,
+       .clkr.hw.init = &(const struct clk_init_data) {
+               .name = "disp_cc_mdss_dptx3_pixel0_clk_src",
+               .parent_data = disp_cc_parent_data_0,
+               .num_parents = ARRAY_SIZE(disp_cc_parent_data_0),
+               .flags = CLK_SET_RATE_PARENT,
+               .ops = &clk_dp_ops,
+       },
+};
+
+static struct clk_rcg2 disp_cc_mdss_esc0_clk_src = {
+       .cmd_rcgr = 0x8144,
+       .mnd_width = 0,
+       .hid_width = 5,
+       .parent_map = disp_cc_parent_map_4,
+       .freq_tbl = ftbl_disp_cc_mdss_byte0_clk_src,
+       .clkr.hw.init = &(const struct clk_init_data) {
+               .name = "disp_cc_mdss_esc0_clk_src",
+               .parent_data = disp_cc_parent_data_4,
+               .num_parents = ARRAY_SIZE(disp_cc_parent_data_4),
+               .flags = CLK_SET_RATE_PARENT,
+               .ops = &clk_rcg2_ops,
+       },
+};
+
+static struct clk_rcg2 disp_cc_mdss_esc1_clk_src = {
+       .cmd_rcgr = 0x815c,
+       .mnd_width = 0,
+       .hid_width = 5,
+       .parent_map = disp_cc_parent_map_4,
+       .freq_tbl = ftbl_disp_cc_mdss_byte0_clk_src,
+       .clkr.hw.init = &(const struct clk_init_data) {
+               .name = "disp_cc_mdss_esc1_clk_src",
+               .parent_data = disp_cc_parent_data_4,
+               .num_parents = ARRAY_SIZE(disp_cc_parent_data_4),
+               .flags = CLK_SET_RATE_PARENT,
+               .ops = &clk_rcg2_ops,
+       },
+};
+
+static const struct freq_tbl ftbl_disp_cc_mdss_mdp_clk_src[] = {
+       F(19200000, P_BI_TCXO, 1, 0, 0),
+       F(85714286, P_DISP_CC_PLL0_OUT_MAIN, 3, 0, 0),
+       F(100000000, P_DISP_CC_PLL0_OUT_MAIN, 3, 0, 0),
+       F(150000000, P_DISP_CC_PLL0_OUT_MAIN, 3, 0, 0),
+       F(172000000, P_DISP_CC_PLL0_OUT_MAIN, 3, 0, 0),
+       F(200000000, P_DISP_CC_PLL0_OUT_MAIN, 3, 0, 0),
+       F(325000000, P_DISP_CC_PLL0_OUT_MAIN, 3, 0, 0),
+       F(375000000, P_DISP_CC_PLL0_OUT_MAIN, 3, 0, 0),
+       F(514000000, P_DISP_CC_PLL0_OUT_MAIN, 3, 0, 0),
+       F(575000000, P_DISP_CC_PLL0_OUT_MAIN, 3, 0, 0),
+       { }
+};
+
+static struct clk_rcg2 disp_cc_mdss_mdp_clk_src = {
+       .cmd_rcgr = 0x80dc,
+       .mnd_width = 0,
+       .hid_width = 5,
+       .parent_map = disp_cc_parent_map_6,
+       .freq_tbl = ftbl_disp_cc_mdss_mdp_clk_src,
+       .clkr.hw.init = &(const struct clk_init_data) {
+               .name = "disp_cc_mdss_mdp_clk_src",
+               .parent_data = disp_cc_parent_data_6,
+               .num_parents = ARRAY_SIZE(disp_cc_parent_data_6),
+               .flags = CLK_SET_RATE_PARENT,
+               .ops = &clk_rcg2_shared_ops,
+       },
+};
+
+static struct clk_rcg2 disp_cc_mdss_pclk0_clk_src = {
+       .cmd_rcgr = 0x80ac,
+       .mnd_width = 8,
+       .hid_width = 5,
+       .parent_map = disp_cc_parent_map_2,
+       .freq_tbl = ftbl_disp_cc_mdss_byte0_clk_src,
+       .clkr.hw.init = &(const struct clk_init_data) {
+               .name = "disp_cc_mdss_pclk0_clk_src",
+               .parent_data = disp_cc_parent_data_2,
+               .num_parents = ARRAY_SIZE(disp_cc_parent_data_2),
+               .flags = CLK_SET_RATE_PARENT,
+               .ops = &clk_pixel_ops,
+       },
+};
+
+static struct clk_rcg2 disp_cc_mdss_pclk1_clk_src = {
+       .cmd_rcgr = 0x80c4,
+       .mnd_width = 8,
+       .hid_width = 5,
+       .parent_map = disp_cc_parent_map_2,
+       .freq_tbl = ftbl_disp_cc_mdss_byte0_clk_src,
+       .clkr.hw.init = &(const struct clk_init_data) {
+               .name = "disp_cc_mdss_pclk1_clk_src",
+               .parent_data = disp_cc_parent_data_2,
+               .num_parents = ARRAY_SIZE(disp_cc_parent_data_2),
+               .flags = CLK_SET_RATE_PARENT,
+               .ops = &clk_pixel_ops,
+       },
+};
+
+static struct clk_rcg2 disp_cc_mdss_vsync_clk_src = {
+       .cmd_rcgr = 0x80f4,
+       .mnd_width = 0,
+       .hid_width = 5,
+       .parent_map = disp_cc_parent_map_1,
+       .freq_tbl = ftbl_disp_cc_mdss_byte0_clk_src,
+       .clkr.hw.init = &(const struct clk_init_data) {
+               .name = "disp_cc_mdss_vsync_clk_src",
+               .parent_data = disp_cc_parent_data_1,
+               .num_parents = ARRAY_SIZE(disp_cc_parent_data_1),
+               .flags = CLK_SET_RATE_PARENT,
+               .ops = &clk_rcg2_ops,
+       },
+};
+
+static const struct freq_tbl ftbl_disp_cc_sleep_clk_src[] = {
+       F(32000, P_SLEEP_CLK, 1, 0, 0),
+       { }
+};
+
+static struct clk_rcg2 disp_cc_sleep_clk_src = {
+       .cmd_rcgr = 0xe05c,
+       .mnd_width = 0,
+       .hid_width = 5,
+       .parent_map = disp_cc_parent_map_7,
+       .freq_tbl = ftbl_disp_cc_sleep_clk_src,
+       .clkr.hw.init = &(const struct clk_init_data) {
+               .name = "disp_cc_sleep_clk_src",
+               .parent_data = disp_cc_parent_data_7,
+               .num_parents = ARRAY_SIZE(disp_cc_parent_data_7),
+               .flags = CLK_SET_RATE_PARENT,
+               .ops = &clk_rcg2_ops,
+       },
+};
+
+static struct clk_rcg2 disp_cc_xo_clk_src = {
+       .cmd_rcgr = 0xe03c,
+       .mnd_width = 0,
+       .hid_width = 5,
+       .parent_map = disp_cc_parent_map_1,
+       .freq_tbl = ftbl_disp_cc_mdss_byte0_clk_src,
+       .clkr.hw.init = &(const struct clk_init_data) {
+               .name = "disp_cc_xo_clk_src",
+               .parent_data = disp_cc_parent_data_1_ao,
+               .num_parents = ARRAY_SIZE(disp_cc_parent_data_1_ao),
+               .flags = CLK_SET_RATE_PARENT,
+               .ops = &clk_rcg2_ops,
+       },
+};
+
+static struct clk_regmap_div disp_cc_mdss_byte0_div_clk_src = {
+       .reg = 0x8124,
+       .shift = 0,
+       .width = 4,
+       .clkr.hw.init = &(const struct clk_init_data) {
+               .name = "disp_cc_mdss_byte0_div_clk_src",
+               .parent_hws = (const struct clk_hw*[]) {
+                       &disp_cc_mdss_byte0_clk_src.clkr.hw,
+               },
+               .num_parents = 1,
+               .flags = CLK_SET_RATE_PARENT,
+               .ops = &clk_regmap_div_ro_ops,
+       },
+};
+
+static struct clk_regmap_div disp_cc_mdss_byte1_div_clk_src = {
+       .reg = 0x8140,
+       .shift = 0,
+       .width = 4,
+       .clkr.hw.init = &(const struct clk_init_data) {
+               .name = "disp_cc_mdss_byte1_div_clk_src",
+               .parent_hws = (const struct clk_hw*[]) {
+                       &disp_cc_mdss_byte1_clk_src.clkr.hw,
+               },
+               .num_parents = 1,
+               .flags = CLK_SET_RATE_PARENT,
+               .ops = &clk_regmap_div_ro_ops,
+       },
+};
+
+static struct clk_regmap_div disp_cc_mdss_dptx0_link_div_clk_src = {
+       .reg = 0x818c,
+       .shift = 0,
+       .width = 4,
+       .clkr.hw.init = &(const struct clk_init_data) {
+               .name = "disp_cc_mdss_dptx0_link_div_clk_src",
+               .parent_hws = (const struct clk_hw*[]) {
+                       &disp_cc_mdss_dptx0_link_clk_src.clkr.hw,
+               },
+               .num_parents = 1,
+               .flags = CLK_SET_RATE_PARENT,
+               .ops = &clk_regmap_div_ro_ops,
+       },
+};
+
+static struct clk_regmap_div disp_cc_mdss_dptx1_link_div_clk_src = {
+       .reg = 0x8220,
+       .shift = 0,
+       .width = 4,
+       .clkr.hw.init = &(const struct clk_init_data) {
+               .name = "disp_cc_mdss_dptx1_link_div_clk_src",
+               .parent_hws = (const struct clk_hw*[]) {
+                       &disp_cc_mdss_dptx1_link_clk_src.clkr.hw,
+               },
+               .num_parents = 1,
+               .flags = CLK_SET_RATE_PARENT,
+               .ops = &clk_regmap_div_ro_ops,
+       },
+};
+
+static struct clk_regmap_div disp_cc_mdss_dptx2_link_div_clk_src = {
+       .reg = 0x8254,
+       .shift = 0,
+       .width = 4,
+       .clkr.hw.init = &(const struct clk_init_data) {
+               .name = "disp_cc_mdss_dptx2_link_div_clk_src",
+               .parent_hws = (const struct clk_hw*[]) {
+                       &disp_cc_mdss_dptx2_link_clk_src.clkr.hw,
+               },
+               .num_parents = 1,
+               .flags = CLK_SET_RATE_PARENT,
+               .ops = &clk_regmap_div_ro_ops,
+       },
+};
+
+static struct clk_regmap_div disp_cc_mdss_dptx3_link_div_clk_src = {
+       .reg = 0x82d0,
+       .shift = 0,
+       .width = 4,
+       .clkr.hw.init = &(const struct clk_init_data) {
+               .name = "disp_cc_mdss_dptx3_link_div_clk_src",
+               .parent_hws = (const struct clk_hw*[]) {
+                       &disp_cc_mdss_dptx3_link_clk_src.clkr.hw,
+               },
+               .num_parents = 1,
+               .flags = CLK_SET_RATE_PARENT,
+               .ops = &clk_regmap_div_ro_ops,
+       },
+};
+
+static struct clk_branch disp_cc_mdss_accu_clk = {
+       .halt_reg = 0xe058,
+       .halt_check = BRANCH_HALT_VOTED,
+       .clkr = {
+               .enable_reg = 0xe058,
+               .enable_mask = BIT(0),
+               .hw.init = &(const struct clk_init_data) {
+                       .name = "disp_cc_mdss_accu_clk",
+                       .parent_hws = (const struct clk_hw*[]) {
+                               &disp_cc_xo_clk_src.clkr.hw,
+                       },
+                       .num_parents = 1,
+                       .flags = CLK_SET_RATE_PARENT,
+                       .ops = &clk_branch2_ops,
+               },
+       },
+};
+
+static struct clk_branch disp_cc_mdss_ahb1_clk = {
+       .halt_reg = 0xa020,
+       .halt_check = BRANCH_HALT,
+       .clkr = {
+               .enable_reg = 0xa020,
+               .enable_mask = BIT(0),
+               .hw.init = &(const struct clk_init_data) {
+                       .name = "disp_cc_mdss_ahb1_clk",
+                       .parent_hws = (const struct clk_hw*[]) {
+                               &disp_cc_mdss_ahb_clk_src.clkr.hw,
+                       },
+                       .num_parents = 1,
+                       .flags = CLK_SET_RATE_PARENT,
+                       .ops = &clk_branch2_ops,
+               },
+       },
+};
+
+static struct clk_branch disp_cc_mdss_ahb_clk = {
+       .halt_reg = 0x80a8,
+       .halt_check = BRANCH_HALT,
+       .clkr = {
+               .enable_reg = 0x80a8,
+               .enable_mask = BIT(0),
+               .hw.init = &(const struct clk_init_data) {
+                       .name = "disp_cc_mdss_ahb_clk",
+                       .parent_hws = (const struct clk_hw*[]) {
+                               &disp_cc_mdss_ahb_clk_src.clkr.hw,
+                       },
+                       .num_parents = 1,
+                       .flags = CLK_SET_RATE_PARENT,
+                       .ops = &clk_branch2_ops,
+               },
+       },
+};
+
+static struct clk_branch disp_cc_mdss_byte0_clk = {
+       .halt_reg = 0x8028,
+       .halt_check = BRANCH_HALT,
+       .clkr = {
+               .enable_reg = 0x8028,
+               .enable_mask = BIT(0),
+               .hw.init = &(const struct clk_init_data) {
+                       .name = "disp_cc_mdss_byte0_clk",
+                       .parent_hws = (const struct clk_hw*[]) {
+                               &disp_cc_mdss_byte0_clk_src.clkr.hw,
+                       },
+                       .num_parents = 1,
+                       .flags = CLK_SET_RATE_PARENT,
+                       .ops = &clk_branch2_ops,
+               },
+       },
+};
+
+static struct clk_branch disp_cc_mdss_byte0_intf_clk = {
+       .halt_reg = 0x802c,
+       .halt_check = BRANCH_HALT,
+       .clkr = {
+               .enable_reg = 0x802c,
+               .enable_mask = BIT(0),
+               .hw.init = &(const struct clk_init_data) {
+                       .name = "disp_cc_mdss_byte0_intf_clk",
+                       .parent_hws = (const struct clk_hw*[]) {
+                               &disp_cc_mdss_byte0_div_clk_src.clkr.hw,
+                       },
+                       .num_parents = 1,
+                       .flags = CLK_SET_RATE_PARENT,
+                       .ops = &clk_branch2_ops,
+               },
+       },
+};
+
+static struct clk_branch disp_cc_mdss_byte1_clk = {
+       .halt_reg = 0x8030,
+       .halt_check = BRANCH_HALT,
+       .clkr = {
+               .enable_reg = 0x8030,
+               .enable_mask = BIT(0),
+               .hw.init = &(const struct clk_init_data) {
+                       .name = "disp_cc_mdss_byte1_clk",
+                       .parent_hws = (const struct clk_hw*[]) {
+                               &disp_cc_mdss_byte1_clk_src.clkr.hw,
+                       },
+                       .num_parents = 1,
+                       .flags = CLK_SET_RATE_PARENT,
+                       .ops = &clk_branch2_ops,
+               },
+       },
+};
+
+static struct clk_branch disp_cc_mdss_byte1_intf_clk = {
+       .halt_reg = 0x8034,
+       .halt_check = BRANCH_HALT,
+       .clkr = {
+               .enable_reg = 0x8034,
+               .enable_mask = BIT(0),
+               .hw.init = &(const struct clk_init_data) {
+                       .name = "disp_cc_mdss_byte1_intf_clk",
+                       .parent_hws = (const struct clk_hw*[]) {
+                               &disp_cc_mdss_byte1_div_clk_src.clkr.hw,
+                       },
+                       .num_parents = 1,
+                       .flags = CLK_SET_RATE_PARENT,
+                       .ops = &clk_branch2_ops,
+               },
+       },
+};
+
+static struct clk_branch disp_cc_mdss_dptx0_aux_clk = {
+       .halt_reg = 0x8058,
+       .halt_check = BRANCH_HALT,
+       .clkr = {
+               .enable_reg = 0x8058,
+               .enable_mask = BIT(0),
+               .hw.init = &(const struct clk_init_data) {
+                       .name = "disp_cc_mdss_dptx0_aux_clk",
+                       .parent_hws = (const struct clk_hw*[]) {
+                               &disp_cc_mdss_dptx0_aux_clk_src.clkr.hw,
+                       },
+                       .num_parents = 1,
+                       .flags = CLK_SET_RATE_PARENT,
+                       .ops = &clk_branch2_ops,
+               },
+       },
+};
+
+static struct clk_branch disp_cc_mdss_dptx0_link_clk = {
+       .halt_reg = 0x8040,
+       .halt_check = BRANCH_HALT,
+       .clkr = {
+               .enable_reg = 0x8040,
+               .enable_mask = BIT(0),
+               .hw.init = &(const struct clk_init_data) {
+                       .name = "disp_cc_mdss_dptx0_link_clk",
+                       .parent_hws = (const struct clk_hw*[]) {
+                               &disp_cc_mdss_dptx0_link_clk_src.clkr.hw,
+                       },
+                       .num_parents = 1,
+                       .flags = CLK_SET_RATE_PARENT,
+                       .ops = &clk_branch2_ops,
+               },
+       },
+};
+
+static struct clk_branch disp_cc_mdss_dptx0_link_intf_clk = {
+       .halt_reg = 0x8048,
+       .halt_check = BRANCH_HALT,
+       .clkr = {
+               .enable_reg = 0x8048,
+               .enable_mask = BIT(0),
+               .hw.init = &(const struct clk_init_data) {
+                       .name = "disp_cc_mdss_dptx0_link_intf_clk",
+                       .parent_hws = (const struct clk_hw*[]) {
+                               &disp_cc_mdss_dptx0_link_div_clk_src.clkr.hw,
+                       },
+                       .num_parents = 1,
+                       .flags = CLK_SET_RATE_PARENT,
+                       .ops = &clk_branch2_ops,
+               },
+       },
+};
+
+static struct clk_branch disp_cc_mdss_dptx0_pixel0_clk = {
+       .halt_reg = 0x8050,
+       .halt_check = BRANCH_HALT,
+       .clkr = {
+               .enable_reg = 0x8050,
+               .enable_mask = BIT(0),
+               .hw.init = &(const struct clk_init_data) {
+                       .name = "disp_cc_mdss_dptx0_pixel0_clk",
+                       .parent_hws = (const struct clk_hw*[]) {
+                               &disp_cc_mdss_dptx0_pixel0_clk_src.clkr.hw,
+                       },
+                       .num_parents = 1,
+                       .flags = CLK_SET_RATE_PARENT,
+                       .ops = &clk_branch2_ops,
+               },
+       },
+};
+
+static struct clk_branch disp_cc_mdss_dptx0_pixel1_clk = {
+       .halt_reg = 0x8054,
+       .halt_check = BRANCH_HALT,
+       .clkr = {
+               .enable_reg = 0x8054,
+               .enable_mask = BIT(0),
+               .hw.init = &(const struct clk_init_data) {
+                       .name = "disp_cc_mdss_dptx0_pixel1_clk",
+                       .parent_hws = (const struct clk_hw*[]) {
+                               &disp_cc_mdss_dptx0_pixel1_clk_src.clkr.hw,
+                       },
+                       .num_parents = 1,
+                       .flags = CLK_SET_RATE_PARENT,
+                       .ops = &clk_branch2_ops,
+               },
+       },
+};
+
+static struct clk_branch disp_cc_mdss_dptx0_usb_router_link_intf_clk = {
+       .halt_reg = 0x8044,
+       .halt_check = BRANCH_HALT,
+       .clkr = {
+               .enable_reg = 0x8044,
+               .enable_mask = BIT(0),
+               .hw.init = &(const struct clk_init_data) {
+                       .name = "disp_cc_mdss_dptx0_usb_router_link_intf_clk",
+                       .parent_hws = (const struct clk_hw*[]) {
+                               &disp_cc_mdss_dptx0_link_div_clk_src.clkr.hw,
+                       },
+                       .num_parents = 1,
+                       .flags = CLK_SET_RATE_PARENT,
+                       .ops = &clk_branch2_ops,
+               },
+       },
+};
+
+static struct clk_branch disp_cc_mdss_dptx1_aux_clk = {
+       .halt_reg = 0x8074,
+       .halt_check = BRANCH_HALT,
+       .clkr = {
+               .enable_reg = 0x8074,
+               .enable_mask = BIT(0),
+               .hw.init = &(const struct clk_init_data) {
+                       .name = "disp_cc_mdss_dptx1_aux_clk",
+                       .parent_hws = (const struct clk_hw*[]) {
+                               &disp_cc_mdss_dptx1_aux_clk_src.clkr.hw,
+                       },
+                       .num_parents = 1,
+                       .flags = CLK_SET_RATE_PARENT,
+                       .ops = &clk_branch2_ops,
+               },
+       },
+};
+
+static struct clk_branch disp_cc_mdss_dptx1_link_clk = {
+       .halt_reg = 0x8064,
+       .halt_check = BRANCH_HALT,
+       .clkr = {
+               .enable_reg = 0x8064,
+               .enable_mask = BIT(0),
+               .hw.init = &(const struct clk_init_data) {
+                       .name = "disp_cc_mdss_dptx1_link_clk",
+                       .parent_hws = (const struct clk_hw*[]) {
+                               &disp_cc_mdss_dptx1_link_clk_src.clkr.hw,
+                       },
+                       .num_parents = 1,
+                       .flags = CLK_SET_RATE_PARENT,
+                       .ops = &clk_branch2_ops,
+               },
+       },
+};
+
+static struct clk_branch disp_cc_mdss_dptx1_link_intf_clk = {
+       .halt_reg = 0x806c,
+       .halt_check = BRANCH_HALT,
+       .clkr = {
+               .enable_reg = 0x806c,
+               .enable_mask = BIT(0),
+               .hw.init = &(const struct clk_init_data) {
+                       .name = "disp_cc_mdss_dptx1_link_intf_clk",
+                       .parent_hws = (const struct clk_hw*[]) {
+                               &disp_cc_mdss_dptx1_link_div_clk_src.clkr.hw,
+                       },
+                       .num_parents = 1,
+                       .flags = CLK_SET_RATE_PARENT,
+                       .ops = &clk_branch2_ops,
+               },
+       },
+};
+
+static struct clk_branch disp_cc_mdss_dptx1_pixel0_clk = {
+       .halt_reg = 0x805c,
+       .halt_check = BRANCH_HALT,
+       .clkr = {
+               .enable_reg = 0x805c,
+               .enable_mask = BIT(0),
+               .hw.init = &(const struct clk_init_data) {
+                       .name = "disp_cc_mdss_dptx1_pixel0_clk",
+                       .parent_hws = (const struct clk_hw*[]) {
+                               &disp_cc_mdss_dptx1_pixel0_clk_src.clkr.hw,
+                       },
+                       .num_parents = 1,
+                       .flags = CLK_SET_RATE_PARENT,
+                       .ops = &clk_branch2_ops,
+               },
+       },
+};
+
+static struct clk_branch disp_cc_mdss_dptx1_pixel1_clk = {
+       .halt_reg = 0x8060,
+       .halt_check = BRANCH_HALT,
+       .clkr = {
+               .enable_reg = 0x8060,
+               .enable_mask = BIT(0),
+               .hw.init = &(const struct clk_init_data) {
+                       .name = "disp_cc_mdss_dptx1_pixel1_clk",
+                       .parent_hws = (const struct clk_hw*[]) {
+                               &disp_cc_mdss_dptx1_pixel1_clk_src.clkr.hw,
+                       },
+                       .num_parents = 1,
+                       .flags = CLK_SET_RATE_PARENT,
+                       .ops = &clk_branch2_ops,
+               },
+       },
+};
+
+static struct clk_branch disp_cc_mdss_dptx1_usb_router_link_intf_clk = {
+       .halt_reg = 0x8068,
+       .halt_check = BRANCH_HALT,
+       .clkr = {
+               .enable_reg = 0x8068,
+               .enable_mask = BIT(0),
+               .hw.init = &(const struct clk_init_data) {
+                       .name = "disp_cc_mdss_dptx1_usb_router_link_intf_clk",
+                       .parent_hws = (const struct clk_hw*[]) {
+                               &disp_cc_mdss_dptx1_link_div_clk_src.clkr.hw,
+                       },
+                       .num_parents = 1,
+                       .flags = CLK_SET_RATE_PARENT,
+                       .ops = &clk_branch2_ops,
+               },
+       },
+};
+
+static struct clk_branch disp_cc_mdss_dptx2_aux_clk = {
+       .halt_reg = 0x8090,
+       .halt_check = BRANCH_HALT,
+       .clkr = {
+               .enable_reg = 0x8090,
+               .enable_mask = BIT(0),
+               .hw.init = &(const struct clk_init_data) {
+                       .name = "disp_cc_mdss_dptx2_aux_clk",
+                       .parent_hws = (const struct clk_hw*[]) {
+                               &disp_cc_mdss_dptx2_aux_clk_src.clkr.hw,
+                       },
+                       .num_parents = 1,
+                       .flags = CLK_SET_RATE_PARENT,
+                       .ops = &clk_branch2_ops,
+               },
+       },
+};
+
+static struct clk_branch disp_cc_mdss_dptx2_link_clk = {
+       .halt_reg = 0x8080,
+       .halt_check = BRANCH_HALT,
+       .clkr = {
+               .enable_reg = 0x8080,
+               .enable_mask = BIT(0),
+               .hw.init = &(const struct clk_init_data) {
+                       .name = "disp_cc_mdss_dptx2_link_clk",
+                       .parent_hws = (const struct clk_hw*[]) {
+                               &disp_cc_mdss_dptx2_link_clk_src.clkr.hw,
+                       },
+                       .num_parents = 1,
+                       .flags = CLK_SET_RATE_PARENT,
+                       .ops = &clk_branch2_ops,
+               },
+       },
+};
+
+static struct clk_branch disp_cc_mdss_dptx2_link_intf_clk = {
+       .halt_reg = 0x8084,
+       .halt_check = BRANCH_HALT,
+       .clkr = {
+               .enable_reg = 0x8084,
+               .enable_mask = BIT(0),
+               .hw.init = &(const struct clk_init_data) {
+                       .name = "disp_cc_mdss_dptx2_link_intf_clk",
+                       .parent_hws = (const struct clk_hw*[]) {
+                               &disp_cc_mdss_dptx2_link_div_clk_src.clkr.hw,
+                       },
+                       .num_parents = 1,
+                       .flags = CLK_SET_RATE_PARENT,
+                       .ops = &clk_branch2_ops,
+               },
+       },
+};
+
+static struct clk_branch disp_cc_mdss_dptx2_pixel0_clk = {
+       .halt_reg = 0x8078,
+       .halt_check = BRANCH_HALT,
+       .clkr = {
+               .enable_reg = 0x8078,
+               .enable_mask = BIT(0),
+               .hw.init = &(const struct clk_init_data) {
+                       .name = "disp_cc_mdss_dptx2_pixel0_clk",
+                       .parent_hws = (const struct clk_hw*[]) {
+                               &disp_cc_mdss_dptx2_pixel0_clk_src.clkr.hw,
+                       },
+                       .num_parents = 1,
+                       .flags = CLK_SET_RATE_PARENT,
+                       .ops = &clk_branch2_ops,
+               },
+       },
+};
+
+static struct clk_branch disp_cc_mdss_dptx2_pixel1_clk = {
+       .halt_reg = 0x807c,
+       .halt_check = BRANCH_HALT,
+       .clkr = {
+               .enable_reg = 0x807c,
+               .enable_mask = BIT(0),
+               .hw.init = &(const struct clk_init_data) {
+                       .name = "disp_cc_mdss_dptx2_pixel1_clk",
+                       .parent_hws = (const struct clk_hw*[]) {
+                               &disp_cc_mdss_dptx2_pixel1_clk_src.clkr.hw,
+                       },
+                       .num_parents = 1,
+                       .flags = CLK_SET_RATE_PARENT,
+                       .ops = &clk_branch2_ops,
+               },
+       },
+};
+
+static struct clk_branch disp_cc_mdss_dptx2_usb_router_link_intf_clk = {
+       .halt_reg = 0x8088,
+       .halt_check = BRANCH_HALT,
+       .clkr = {
+               .enable_reg = 0x8088,
+               .enable_mask = BIT(0),
+               .hw.init = &(const struct clk_init_data) {
+                       .name = "disp_cc_mdss_dptx2_usb_router_link_intf_clk",
+                       .parent_hws = (const struct clk_hw*[]) {
+                               &disp_cc_mdss_dptx2_link_div_clk_src.clkr.hw,
+                       },
+                       .num_parents = 1,
+                       .flags = CLK_SET_RATE_PARENT,
+                       .ops = &clk_branch2_ops,
+               },
+       },
+};
+
+static struct clk_branch disp_cc_mdss_dptx3_aux_clk = {
+       .halt_reg = 0x80a0,
+       .halt_check = BRANCH_HALT,
+       .clkr = {
+               .enable_reg = 0x80a0,
+               .enable_mask = BIT(0),
+               .hw.init = &(const struct clk_init_data) {
+                       .name = "disp_cc_mdss_dptx3_aux_clk",
+                       .parent_hws = (const struct clk_hw*[]) {
+                               &disp_cc_mdss_dptx3_aux_clk_src.clkr.hw,
+                       },
+                       .num_parents = 1,
+                       .flags = CLK_SET_RATE_PARENT,
+                       .ops = &clk_branch2_ops,
+               },
+       },
+};
+
+static struct clk_branch disp_cc_mdss_dptx3_link_clk = {
+       .halt_reg = 0x8098,
+       .halt_check = BRANCH_HALT,
+       .clkr = {
+               .enable_reg = 0x8098,
+               .enable_mask = BIT(0),
+               .hw.init = &(const struct clk_init_data) {
+                       .name = "disp_cc_mdss_dptx3_link_clk",
+                       .parent_hws = (const struct clk_hw*[]) {
+                               &disp_cc_mdss_dptx3_link_clk_src.clkr.hw,
+                       },
+                       .num_parents = 1,
+                       .flags = CLK_SET_RATE_PARENT,
+                       .ops = &clk_branch2_ops,
+               },
+       },
+};
+
+static struct clk_branch disp_cc_mdss_dptx3_link_intf_clk = {
+       .halt_reg = 0x809c,
+       .halt_check = BRANCH_HALT,
+       .clkr = {
+               .enable_reg = 0x809c,
+               .enable_mask = BIT(0),
+               .hw.init = &(const struct clk_init_data) {
+                       .name = "disp_cc_mdss_dptx3_link_intf_clk",
+                       .parent_hws = (const struct clk_hw*[]) {
+                               &disp_cc_mdss_dptx3_link_div_clk_src.clkr.hw,
+                       },
+                       .num_parents = 1,
+                       .flags = CLK_SET_RATE_PARENT,
+                       .ops = &clk_branch2_ops,
+               },
+       },
+};
+
+static struct clk_branch disp_cc_mdss_dptx3_pixel0_clk = {
+       .halt_reg = 0x8094,
+       .halt_check = BRANCH_HALT,
+       .clkr = {
+               .enable_reg = 0x8094,
+               .enable_mask = BIT(0),
+               .hw.init = &(const struct clk_init_data) {
+                       .name = "disp_cc_mdss_dptx3_pixel0_clk",
+                       .parent_hws = (const struct clk_hw*[]) {
+                               &disp_cc_mdss_dptx3_pixel0_clk_src.clkr.hw,
+                       },
+                       .num_parents = 1,
+                       .flags = CLK_SET_RATE_PARENT,
+                       .ops = &clk_branch2_ops,
+               },
+       },
+};
+
+static struct clk_branch disp_cc_mdss_esc0_clk = {
+       .halt_reg = 0x8038,
+       .halt_check = BRANCH_HALT,
+       .clkr = {
+               .enable_reg = 0x8038,
+               .enable_mask = BIT(0),
+               .hw.init = &(const struct clk_init_data) {
+                       .name = "disp_cc_mdss_esc0_clk",
+                       .parent_hws = (const struct clk_hw*[]) {
+                               &disp_cc_mdss_esc0_clk_src.clkr.hw,
+                       },
+                       .num_parents = 1,
+                       .flags = CLK_SET_RATE_PARENT,
+                       .ops = &clk_branch2_ops,
+               },
+       },
+};
+
+static struct clk_branch disp_cc_mdss_esc1_clk = {
+       .halt_reg = 0x803c,
+       .halt_check = BRANCH_HALT,
+       .clkr = {
+               .enable_reg = 0x803c,
+               .enable_mask = BIT(0),
+               .hw.init = &(const struct clk_init_data) {
+                       .name = "disp_cc_mdss_esc1_clk",
+                       .parent_hws = (const struct clk_hw*[]) {
+                               &disp_cc_mdss_esc1_clk_src.clkr.hw,
+                       },
+                       .num_parents = 1,
+                       .flags = CLK_SET_RATE_PARENT,
+                       .ops = &clk_branch2_ops,
+               },
+       },
+};
+
+static struct clk_branch disp_cc_mdss_mdp1_clk = {
+       .halt_reg = 0xa004,
+       .halt_check = BRANCH_HALT,
+       .clkr = {
+               .enable_reg = 0xa004,
+               .enable_mask = BIT(0),
+               .hw.init = &(const struct clk_init_data) {
+                       .name = "disp_cc_mdss_mdp1_clk",
+                       .parent_hws = (const struct clk_hw*[]) {
+                               &disp_cc_mdss_mdp_clk_src.clkr.hw,
+                       },
+                       .num_parents = 1,
+                       .flags = CLK_SET_RATE_PARENT,
+                       .ops = &clk_branch2_ops,
+               },
+       },
+};
+
+static struct clk_branch disp_cc_mdss_mdp_clk = {
+       .halt_reg = 0x800c,
+       .halt_check = BRANCH_HALT,
+       .clkr = {
+               .enable_reg = 0x800c,
+               .enable_mask = BIT(0),
+               .hw.init = &(const struct clk_init_data) {
+                       .name = "disp_cc_mdss_mdp_clk",
+                       .parent_hws = (const struct clk_hw*[]) {
+                               &disp_cc_mdss_mdp_clk_src.clkr.hw,
+                       },
+                       .num_parents = 1,
+                       .flags = CLK_SET_RATE_PARENT,
+                       .ops = &clk_branch2_ops,
+               },
+       },
+};
+
+static struct clk_branch disp_cc_mdss_mdp_lut1_clk = {
+       .halt_reg = 0xa010,
+       .halt_check = BRANCH_HALT,
+       .clkr = {
+               .enable_reg = 0xa010,
+               .enable_mask = BIT(0),
+               .hw.init = &(const struct clk_init_data) {
+                       .name = "disp_cc_mdss_mdp_lut1_clk",
+                       .parent_hws = (const struct clk_hw*[]) {
+                               &disp_cc_mdss_mdp_clk_src.clkr.hw,
+                       },
+                       .num_parents = 1,
+                       .flags = CLK_SET_RATE_PARENT,
+                       .ops = &clk_branch2_ops,
+               },
+       },
+};
+
+static struct clk_branch disp_cc_mdss_mdp_lut_clk = {
+       .halt_reg = 0x8018,
+       .halt_check = BRANCH_HALT_VOTED,
+       .clkr = {
+               .enable_reg = 0x8018,
+               .enable_mask = BIT(0),
+               .hw.init = &(const struct clk_init_data) {
+                       .name = "disp_cc_mdss_mdp_lut_clk",
+                       .parent_hws = (const struct clk_hw*[]) {
+                               &disp_cc_mdss_mdp_clk_src.clkr.hw,
+                       },
+                       .num_parents = 1,
+                       .flags = CLK_SET_RATE_PARENT,
+                       .ops = &clk_branch2_ops,
+               },
+       },
+};
+
+static struct clk_branch disp_cc_mdss_non_gdsc_ahb_clk = {
+       .halt_reg = 0xc004,
+       .halt_check = BRANCH_HALT_VOTED,
+       .clkr = {
+               .enable_reg = 0xc004,
+               .enable_mask = BIT(0),
+               .hw.init = &(const struct clk_init_data) {
+                       .name = "disp_cc_mdss_non_gdsc_ahb_clk",
+                       .parent_hws = (const struct clk_hw*[]) {
+                               &disp_cc_mdss_ahb_clk_src.clkr.hw,
+                       },
+                       .num_parents = 1,
+                       .flags = CLK_SET_RATE_PARENT,
+                       .ops = &clk_branch2_ops,
+               },
+       },
+};
+
+static struct clk_branch disp_cc_mdss_pclk0_clk = {
+       .halt_reg = 0x8004,
+       .halt_check = BRANCH_HALT,
+       .clkr = {
+               .enable_reg = 0x8004,
+               .enable_mask = BIT(0),
+               .hw.init = &(const struct clk_init_data) {
+                       .name = "disp_cc_mdss_pclk0_clk",
+                       .parent_hws = (const struct clk_hw*[]) {
+                               &disp_cc_mdss_pclk0_clk_src.clkr.hw,
+                       },
+                       .num_parents = 1,
+                       .flags = CLK_SET_RATE_PARENT,
+                       .ops = &clk_branch2_ops,
+               },
+       },
+};
+
+static struct clk_branch disp_cc_mdss_pclk1_clk = {
+       .halt_reg = 0x8008,
+       .halt_check = BRANCH_HALT,
+       .clkr = {
+               .enable_reg = 0x8008,
+               .enable_mask = BIT(0),
+               .hw.init = &(const struct clk_init_data) {
+                       .name = "disp_cc_mdss_pclk1_clk",
+                       .parent_hws = (const struct clk_hw*[]) {
+                               &disp_cc_mdss_pclk1_clk_src.clkr.hw,
+                       },
+                       .num_parents = 1,
+                       .flags = CLK_SET_RATE_PARENT,
+                       .ops = &clk_branch2_ops,
+               },
+       },
+};
+
+static struct clk_branch disp_cc_mdss_rscc_ahb_clk = {
+       .halt_reg = 0xc00c,
+       .halt_check = BRANCH_HALT,
+       .clkr = {
+               .enable_reg = 0xc00c,
+               .enable_mask = BIT(0),
+               .hw.init = &(const struct clk_init_data) {
+                       .name = "disp_cc_mdss_rscc_ahb_clk",
+                       .parent_hws = (const struct clk_hw*[]) {
+                               &disp_cc_mdss_ahb_clk_src.clkr.hw,
+                       },
+                       .num_parents = 1,
+                       .flags = CLK_SET_RATE_PARENT,
+                       .ops = &clk_branch2_ops,
+               },
+       },
+};
+
+static struct clk_branch disp_cc_mdss_rscc_vsync_clk = {
+       .halt_reg = 0xc008,
+       .halt_check = BRANCH_HALT,
+       .clkr = {
+               .enable_reg = 0xc008,
+               .enable_mask = BIT(0),
+               .hw.init = &(const struct clk_init_data) {
+                       .name = "disp_cc_mdss_rscc_vsync_clk",
+                       .parent_hws = (const struct clk_hw*[]) {
+                               &disp_cc_mdss_vsync_clk_src.clkr.hw,
+                       },
+                       .num_parents = 1,
+                       .flags = CLK_SET_RATE_PARENT,
+                       .ops = &clk_branch2_ops,
+               },
+       },
+};
+
+static struct clk_branch disp_cc_mdss_vsync1_clk = {
+       .halt_reg = 0xa01c,
+       .halt_check = BRANCH_HALT,
+       .clkr = {
+               .enable_reg = 0xa01c,
+               .enable_mask = BIT(0),
+               .hw.init = &(const struct clk_init_data) {
+                       .name = "disp_cc_mdss_vsync1_clk",
+                       .parent_hws = (const struct clk_hw*[]) {
+                               &disp_cc_mdss_vsync_clk_src.clkr.hw,
+                       },
+                       .num_parents = 1,
+                       .flags = CLK_SET_RATE_PARENT,
+                       .ops = &clk_branch2_ops,
+               },
+       },
+};
+
+static struct clk_branch disp_cc_mdss_vsync_clk = {
+       .halt_reg = 0x8024,
+       .halt_check = BRANCH_HALT,
+       .clkr = {
+               .enable_reg = 0x8024,
+               .enable_mask = BIT(0),
+               .hw.init = &(const struct clk_init_data) {
+                       .name = "disp_cc_mdss_vsync_clk",
+                       .parent_hws = (const struct clk_hw*[]) {
+                               &disp_cc_mdss_vsync_clk_src.clkr.hw,
+                       },
+                       .num_parents = 1,
+                       .flags = CLK_SET_RATE_PARENT,
+                       .ops = &clk_branch2_ops,
+               },
+       },
+};
+
+static struct gdsc mdss_gdsc = {
+       .gdscr = 0x9000,
+       .en_rest_wait_val = 0x2,
+       .en_few_wait_val = 0x2,
+       .clk_dis_wait_val = 0xf,
+       .pd = {
+               .name = "mdss_gdsc",
+       },
+       .pwrsts = PWRSTS_OFF_ON,
+       .flags = HW_CTRL | RETAIN_FF_ENABLE,
+};
+
+static struct gdsc mdss_int2_gdsc = {
+       .gdscr = 0xb000,
+       .en_rest_wait_val = 0x2,
+       .en_few_wait_val = 0x2,
+       .clk_dis_wait_val = 0xf,
+       .pd = {
+               .name = "mdss_int2_gdsc",
+       },
+       .pwrsts = PWRSTS_OFF_ON,
+       .flags = HW_CTRL | RETAIN_FF_ENABLE,
+};
+
+static struct clk_regmap *disp_cc_x1e80100_clocks[] = {
+       [DISP_CC_MDSS_ACCU_CLK] = &disp_cc_mdss_accu_clk.clkr,
+       [DISP_CC_MDSS_AHB1_CLK] = &disp_cc_mdss_ahb1_clk.clkr,
+       [DISP_CC_MDSS_AHB_CLK] = &disp_cc_mdss_ahb_clk.clkr,
+       [DISP_CC_MDSS_AHB_CLK_SRC] = &disp_cc_mdss_ahb_clk_src.clkr,
+       [DISP_CC_MDSS_BYTE0_CLK] = &disp_cc_mdss_byte0_clk.clkr,
+       [DISP_CC_MDSS_BYTE0_CLK_SRC] = &disp_cc_mdss_byte0_clk_src.clkr,
+       [DISP_CC_MDSS_BYTE0_DIV_CLK_SRC] = &disp_cc_mdss_byte0_div_clk_src.clkr,
+       [DISP_CC_MDSS_BYTE0_INTF_CLK] = &disp_cc_mdss_byte0_intf_clk.clkr,
+       [DISP_CC_MDSS_BYTE1_CLK] = &disp_cc_mdss_byte1_clk.clkr,
+       [DISP_CC_MDSS_BYTE1_CLK_SRC] = &disp_cc_mdss_byte1_clk_src.clkr,
+       [DISP_CC_MDSS_BYTE1_DIV_CLK_SRC] = &disp_cc_mdss_byte1_div_clk_src.clkr,
+       [DISP_CC_MDSS_BYTE1_INTF_CLK] = &disp_cc_mdss_byte1_intf_clk.clkr,
+       [DISP_CC_MDSS_DPTX0_AUX_CLK] = &disp_cc_mdss_dptx0_aux_clk.clkr,
+       [DISP_CC_MDSS_DPTX0_AUX_CLK_SRC] = &disp_cc_mdss_dptx0_aux_clk_src.clkr,
+       [DISP_CC_MDSS_DPTX0_LINK_CLK] = &disp_cc_mdss_dptx0_link_clk.clkr,
+       [DISP_CC_MDSS_DPTX0_LINK_CLK_SRC] = &disp_cc_mdss_dptx0_link_clk_src.clkr,
+       [DISP_CC_MDSS_DPTX0_LINK_DIV_CLK_SRC] = &disp_cc_mdss_dptx0_link_div_clk_src.clkr,
+       [DISP_CC_MDSS_DPTX0_LINK_INTF_CLK] = &disp_cc_mdss_dptx0_link_intf_clk.clkr,
+       [DISP_CC_MDSS_DPTX0_PIXEL0_CLK] = &disp_cc_mdss_dptx0_pixel0_clk.clkr,
+       [DISP_CC_MDSS_DPTX0_PIXEL0_CLK_SRC] = &disp_cc_mdss_dptx0_pixel0_clk_src.clkr,
+       [DISP_CC_MDSS_DPTX0_PIXEL1_CLK] = &disp_cc_mdss_dptx0_pixel1_clk.clkr,
+       [DISP_CC_MDSS_DPTX0_PIXEL1_CLK_SRC] = &disp_cc_mdss_dptx0_pixel1_clk_src.clkr,
+       [DISP_CC_MDSS_DPTX0_USB_ROUTER_LINK_INTF_CLK] =
+               &disp_cc_mdss_dptx0_usb_router_link_intf_clk.clkr,
+       [DISP_CC_MDSS_DPTX1_AUX_CLK] = &disp_cc_mdss_dptx1_aux_clk.clkr,
+       [DISP_CC_MDSS_DPTX1_AUX_CLK_SRC] = &disp_cc_mdss_dptx1_aux_clk_src.clkr,
+       [DISP_CC_MDSS_DPTX1_LINK_CLK] = &disp_cc_mdss_dptx1_link_clk.clkr,
+       [DISP_CC_MDSS_DPTX1_LINK_CLK_SRC] = &disp_cc_mdss_dptx1_link_clk_src.clkr,
+       [DISP_CC_MDSS_DPTX1_LINK_DIV_CLK_SRC] = &disp_cc_mdss_dptx1_link_div_clk_src.clkr,
+       [DISP_CC_MDSS_DPTX1_LINK_INTF_CLK] = &disp_cc_mdss_dptx1_link_intf_clk.clkr,
+       [DISP_CC_MDSS_DPTX1_PIXEL0_CLK] = &disp_cc_mdss_dptx1_pixel0_clk.clkr,
+       [DISP_CC_MDSS_DPTX1_PIXEL0_CLK_SRC] = &disp_cc_mdss_dptx1_pixel0_clk_src.clkr,
+       [DISP_CC_MDSS_DPTX1_PIXEL1_CLK] = &disp_cc_mdss_dptx1_pixel1_clk.clkr,
+       [DISP_CC_MDSS_DPTX1_PIXEL1_CLK_SRC] = &disp_cc_mdss_dptx1_pixel1_clk_src.clkr,
+       [DISP_CC_MDSS_DPTX1_USB_ROUTER_LINK_INTF_CLK] =
+               &disp_cc_mdss_dptx1_usb_router_link_intf_clk.clkr,
+       [DISP_CC_MDSS_DPTX2_AUX_CLK] = &disp_cc_mdss_dptx2_aux_clk.clkr,
+       [DISP_CC_MDSS_DPTX2_AUX_CLK_SRC] = &disp_cc_mdss_dptx2_aux_clk_src.clkr,
+       [DISP_CC_MDSS_DPTX2_LINK_CLK] = &disp_cc_mdss_dptx2_link_clk.clkr,
+       [DISP_CC_MDSS_DPTX2_LINK_CLK_SRC] = &disp_cc_mdss_dptx2_link_clk_src.clkr,
+       [DISP_CC_MDSS_DPTX2_LINK_DIV_CLK_SRC] = &disp_cc_mdss_dptx2_link_div_clk_src.clkr,
+       [DISP_CC_MDSS_DPTX2_LINK_INTF_CLK] = &disp_cc_mdss_dptx2_link_intf_clk.clkr,
+       [DISP_CC_MDSS_DPTX2_PIXEL0_CLK] = &disp_cc_mdss_dptx2_pixel0_clk.clkr,
+       [DISP_CC_MDSS_DPTX2_PIXEL0_CLK_SRC] = &disp_cc_mdss_dptx2_pixel0_clk_src.clkr,
+       [DISP_CC_MDSS_DPTX2_PIXEL1_CLK] = &disp_cc_mdss_dptx2_pixel1_clk.clkr,
+       [DISP_CC_MDSS_DPTX2_PIXEL1_CLK_SRC] = &disp_cc_mdss_dptx2_pixel1_clk_src.clkr,
+       [DISP_CC_MDSS_DPTX2_USB_ROUTER_LINK_INTF_CLK] =
+               &disp_cc_mdss_dptx2_usb_router_link_intf_clk.clkr,
+       [DISP_CC_MDSS_DPTX3_AUX_CLK] = &disp_cc_mdss_dptx3_aux_clk.clkr,
+       [DISP_CC_MDSS_DPTX3_AUX_CLK_SRC] = &disp_cc_mdss_dptx3_aux_clk_src.clkr,
+       [DISP_CC_MDSS_DPTX3_LINK_CLK] = &disp_cc_mdss_dptx3_link_clk.clkr,
+       [DISP_CC_MDSS_DPTX3_LINK_CLK_SRC] = &disp_cc_mdss_dptx3_link_clk_src.clkr,
+       [DISP_CC_MDSS_DPTX3_LINK_DIV_CLK_SRC] = &disp_cc_mdss_dptx3_link_div_clk_src.clkr,
+       [DISP_CC_MDSS_DPTX3_LINK_INTF_CLK] = &disp_cc_mdss_dptx3_link_intf_clk.clkr,
+       [DISP_CC_MDSS_DPTX3_PIXEL0_CLK] = &disp_cc_mdss_dptx3_pixel0_clk.clkr,
+       [DISP_CC_MDSS_DPTX3_PIXEL0_CLK_SRC] = &disp_cc_mdss_dptx3_pixel0_clk_src.clkr,
+       [DISP_CC_MDSS_ESC0_CLK] = &disp_cc_mdss_esc0_clk.clkr,
+       [DISP_CC_MDSS_ESC0_CLK_SRC] = &disp_cc_mdss_esc0_clk_src.clkr,
+       [DISP_CC_MDSS_ESC1_CLK] = &disp_cc_mdss_esc1_clk.clkr,
+       [DISP_CC_MDSS_ESC1_CLK_SRC] = &disp_cc_mdss_esc1_clk_src.clkr,
+       [DISP_CC_MDSS_MDP1_CLK] = &disp_cc_mdss_mdp1_clk.clkr,
+       [DISP_CC_MDSS_MDP_CLK] = &disp_cc_mdss_mdp_clk.clkr,
+       [DISP_CC_MDSS_MDP_CLK_SRC] = &disp_cc_mdss_mdp_clk_src.clkr,
+       [DISP_CC_MDSS_MDP_LUT1_CLK] = &disp_cc_mdss_mdp_lut1_clk.clkr,
+       [DISP_CC_MDSS_MDP_LUT_CLK] = &disp_cc_mdss_mdp_lut_clk.clkr,
+       [DISP_CC_MDSS_NON_GDSC_AHB_CLK] = &disp_cc_mdss_non_gdsc_ahb_clk.clkr,
+       [DISP_CC_MDSS_PCLK0_CLK] = &disp_cc_mdss_pclk0_clk.clkr,
+       [DISP_CC_MDSS_PCLK0_CLK_SRC] = &disp_cc_mdss_pclk0_clk_src.clkr,
+       [DISP_CC_MDSS_PCLK1_CLK] = &disp_cc_mdss_pclk1_clk.clkr,
+       [DISP_CC_MDSS_PCLK1_CLK_SRC] = &disp_cc_mdss_pclk1_clk_src.clkr,
+       [DISP_CC_MDSS_RSCC_AHB_CLK] = &disp_cc_mdss_rscc_ahb_clk.clkr,
+       [DISP_CC_MDSS_RSCC_VSYNC_CLK] = &disp_cc_mdss_rscc_vsync_clk.clkr,
+       [DISP_CC_MDSS_VSYNC1_CLK] = &disp_cc_mdss_vsync1_clk.clkr,
+       [DISP_CC_MDSS_VSYNC_CLK] = &disp_cc_mdss_vsync_clk.clkr,
+       [DISP_CC_MDSS_VSYNC_CLK_SRC] = &disp_cc_mdss_vsync_clk_src.clkr,
+       [DISP_CC_PLL0] = &disp_cc_pll0.clkr,
+       [DISP_CC_PLL1] = &disp_cc_pll1.clkr,
+       [DISP_CC_SLEEP_CLK_SRC] = &disp_cc_sleep_clk_src.clkr,
+       [DISP_CC_XO_CLK_SRC] = &disp_cc_xo_clk_src.clkr,
+};
+
+static const struct qcom_reset_map disp_cc_x1e80100_resets[] = {
+       [DISP_CC_MDSS_CORE_BCR] = { 0x8000 },
+       [DISP_CC_MDSS_CORE_INT2_BCR] = { 0xa000 },
+       [DISP_CC_MDSS_RSCC_BCR] = { 0xc000 },
+};
+
+static struct gdsc *disp_cc_x1e80100_gdscs[] = {
+       [MDSS_GDSC] = &mdss_gdsc,
+       [MDSS_INT2_GDSC] = &mdss_int2_gdsc,
+};
+
+static const struct regmap_config disp_cc_x1e80100_regmap_config = {
+       .reg_bits = 32,
+       .reg_stride = 4,
+       .val_bits = 32,
+       .max_register = 0x11008,
+       .fast_io = true,
+};
+
+static const struct qcom_cc_desc disp_cc_x1e80100_desc = {
+       .config = &disp_cc_x1e80100_regmap_config,
+       .clks = disp_cc_x1e80100_clocks,
+       .num_clks = ARRAY_SIZE(disp_cc_x1e80100_clocks),
+       .resets = disp_cc_x1e80100_resets,
+       .num_resets = ARRAY_SIZE(disp_cc_x1e80100_resets),
+       .gdscs = disp_cc_x1e80100_gdscs,
+       .num_gdscs = ARRAY_SIZE(disp_cc_x1e80100_gdscs),
+};
+
+static const struct of_device_id disp_cc_x1e80100_match_table[] = {
+       { .compatible = "qcom,x1e80100-dispcc" },
+       { }
+};
+MODULE_DEVICE_TABLE(of, disp_cc_x1e80100_match_table);
+
+static int disp_cc_x1e80100_probe(struct platform_device *pdev)
+{
+       struct regmap *regmap;
+       int ret;
+
+       ret = devm_pm_runtime_enable(&pdev->dev);
+       if (ret)
+               return ret;
+
+       ret = pm_runtime_resume_and_get(&pdev->dev);
+       if (ret)
+               return ret;
+
+       regmap = qcom_cc_map(pdev, &disp_cc_x1e80100_desc);
+       if (IS_ERR(regmap)) {
+               ret = PTR_ERR(regmap);
+               goto err_put_rpm;
+       }
+
+       clk_lucid_evo_pll_configure(&disp_cc_pll0, regmap, &disp_cc_pll0_config);
+       clk_lucid_evo_pll_configure(&disp_cc_pll1, regmap, &disp_cc_pll1_config);
+
+       /* Enable clock gating for MDP clocks */
+       regmap_update_bits(regmap, DISP_CC_MISC_CMD, 0x10, 0x10);
+
+       /* Keep clocks always enabled */
+       qcom_branch_set_clk_en(regmap, 0xe074); /* DISP_CC_SLEEP_CLK */
+       qcom_branch_set_clk_en(regmap, 0xe054); /* DISP_CC_XO_CLK */
+
+       ret = qcom_cc_really_probe(pdev, &disp_cc_x1e80100_desc, regmap);
+       if (ret)
+               goto err_put_rpm;
+
+       pm_runtime_put(&pdev->dev);
+
+       return 0;
+
+err_put_rpm:
+       pm_runtime_put_sync(&pdev->dev);
+
+       return ret;
+}
+
+static struct platform_driver disp_cc_x1e80100_driver = {
+       .probe = disp_cc_x1e80100_probe,
+       .driver = {
+               .name = "dispcc-x1e80100",
+               .of_match_table = disp_cc_x1e80100_match_table,
+       },
+};
+
+static int __init disp_cc_x1e80100_init(void)
+{
+       return platform_driver_register(&disp_cc_x1e80100_driver);
+}
+subsys_initcall(disp_cc_x1e80100_init);
+
+static void __exit disp_cc_x1e80100_exit(void)
+{
+       platform_driver_unregister(&disp_cc_x1e80100_driver);
+}
+module_exit(disp_cc_x1e80100_exit);
+
+MODULE_DESCRIPTION("QTI Display Clock Controller X1E80100 Driver");
+MODULE_LICENSE("GPL");
index 4aba47e8700d2723ea7613b3a5fbc70fd7a16e12..c1732d70e3a2357091355e6f577f5e74ccab3b83 100644 (file)
@@ -857,6 +857,7 @@ static struct clk_rcg2 lpass_sway_clk_src = {
 
 static const struct freq_tbl ftbl_pcie0_aux_clk_src[] = {
        F(2000000, P_XO, 12, 0, 0),
+       { }
 };
 
 static struct clk_rcg2 pcie0_aux_clk_src = {
@@ -1099,6 +1100,7 @@ static const struct freq_tbl ftbl_qpic_io_macro_clk_src[] = {
        F(100000000, P_GPLL0, 8, 0, 0),
        F(200000000, P_GPLL0, 4, 0, 0),
        F(320000000, P_GPLL0, 2.5, 0, 0),
+       { }
 };
 
 static struct clk_rcg2 qpic_io_macro_clk_src = {
@@ -1194,6 +1196,7 @@ static struct clk_rcg2 ubi0_axi_clk_src = {
 static const struct freq_tbl ftbl_ubi0_core_clk_src[] = {
        F(850000000, P_UBI32_PLL, 1, 0, 0),
        F(1000000000, P_UBI32_PLL, 1, 0, 0),
+       { }
 };
 
 static struct clk_rcg2 ubi0_core_clk_src = {
@@ -1754,7 +1757,7 @@ static struct clk_branch gcc_gmac0_sys_clk = {
        .halt_check = BRANCH_HALT_DELAY,
        .halt_bit = 31,
        .clkr = {
-               .enable_reg = 0x683190,
+               .enable_reg = 0x68190,
                .enable_mask = BIT(0),
                .hw.init = &(struct clk_init_data) {
                        .name = "gcc_gmac0_sys_clk",
@@ -2180,7 +2183,7 @@ static struct clk_branch gcc_pcie1_axi_s_clk = {
 };
 
 static struct clk_branch gcc_pcie1_pipe_clk = {
-       .halt_reg = 8,
+       .halt_reg = 0x76018,
        .halt_check = BRANCH_HALT_DELAY,
        .halt_bit = 31,
        .clkr = {
@@ -3632,7 +3635,7 @@ static const struct qcom_reset_map gcc_ipq5018_resets[] = {
        [GCC_SYSTEM_NOC_BCR] = { 0x26000, 0 },
        [GCC_TCSR_BCR] = { 0x28000, 0 },
        [GCC_TLMM_BCR] = { 0x34000, 0 },
-       [GCC_UBI0_AXI_ARES] = { 0x680},
+       [GCC_UBI0_AXI_ARES] = { 0x68010, 0 },
        [GCC_UBI0_AHB_ARES] = { 0x68010, 1 },
        [GCC_UBI0_NC_AXI_ARES] = { 0x68010, 2 },
        [GCC_UBI0_DBG_ARES] = { 0x68010, 3 },
index b366912cd6480e181f4903bb2170202b9546ab85..7e69de34c310c27715ba61c7444527182e364ad5 100644 (file)
@@ -1554,6 +1554,7 @@ static struct clk_regmap_div nss_ubi0_div_clk_src = {
 
 static const struct freq_tbl ftbl_pcie_aux_clk_src[] = {
        F(24000000, P_XO, 1, 0, 0),
+       { }
 };
 
 static const struct clk_parent_data gcc_xo_gpll0_core_pi_sleep_clk[] = {
@@ -1734,6 +1735,7 @@ static const struct freq_tbl ftbl_sdcc_ice_core_clk_src[] = {
        F(160000000, P_GPLL0, 5, 0, 0),
        F(216000000, P_GPLL6, 5, 0, 0),
        F(308570000, P_GPLL6, 3.5, 0, 0),
+       { }
 };
 
 static const struct clk_parent_data gcc_xo_gpll0_gpll6_gpll0_div2[] = {
@@ -3522,6 +3524,22 @@ static struct clk_branch gcc_prng_ahb_clk = {
        },
 };
 
+static struct clk_branch gcc_qdss_at_clk = {
+       .halt_reg = 0x29024,
+       .clkr = {
+               .enable_reg = 0x29024,
+               .enable_mask = BIT(0),
+               .hw.init = &(struct clk_init_data){
+                       .name = "gcc_qdss_at_clk",
+                       .parent_hws = (const struct clk_hw *[]){
+                               &qdss_at_clk_src.clkr.hw },
+                       .num_parents = 1,
+                       .flags = CLK_SET_RATE_PARENT,
+                       .ops = &clk_branch2_ops,
+               },
+       },
+};
+
 static struct clk_branch gcc_qdss_dap_clk = {
        .halt_reg = 0x29084,
        .clkr = {
@@ -4361,6 +4379,7 @@ static struct clk_regmap *gcc_ipq6018_clks[] = {
        [GCC_SYS_NOC_PCIE0_AXI_CLK] = &gcc_sys_noc_pcie0_axi_clk.clkr,
        [GCC_PCIE0_PIPE_CLK] = &gcc_pcie0_pipe_clk.clkr,
        [GCC_PRNG_AHB_CLK] = &gcc_prng_ahb_clk.clkr,
+       [GCC_QDSS_AT_CLK] = &gcc_qdss_at_clk.clkr,
        [GCC_QDSS_DAP_CLK] = &gcc_qdss_dap_clk.clkr,
        [GCC_QPIC_AHB_CLK] = &gcc_qpic_ahb_clk.clkr,
        [GCC_QPIC_CLK] = &gcc_qpic_clk.clkr,
index b7faf12a511a16f786d81643a1f90296212886f2..7bc679871f324f5354fca91ac11657da447a8cff 100644 (file)
@@ -644,6 +644,7 @@ static struct clk_rcg2 pcie0_axi_clk_src = {
 
 static const struct freq_tbl ftbl_pcie_aux_clk_src[] = {
        F(19200000, P_XO, 1, 0, 0),
+       { }
 };
 
 static const struct clk_parent_data gcc_xo_gpll0_sleep_clk[] = {
@@ -795,6 +796,7 @@ static const struct freq_tbl ftbl_sdcc_ice_core_clk_src[] = {
        F(19200000, P_XO, 1, 0, 0),
        F(160000000, P_GPLL0, 5, 0, 0),
        F(308570000, P_GPLL6, 3.5, 0, 0),
+       { }
 };
 
 static const struct clk_parent_data gcc_xo_gpll0_gpll6_gpll0_div2[] = {
index e8190108e1aef394eb546981b1cea8a6f6f0e2ba..0a3f846695b803fe4b93d394f96de393b1a55f1d 100644 (file)
@@ -2082,6 +2082,7 @@ static struct clk_branch gcc_sdcc1_apps_clk = {
 static const struct freq_tbl ftbl_sdcc_ice_core_clk_src[] = {
        F(150000000, P_GPLL4, 8, 0, 0),
        F(300000000, P_GPLL4, 4, 0, 0),
+       { }
 };
 
 static struct clk_rcg2 sdcc1_ice_core_clk_src = {
index 3e5a8cb14d4df1bc34d0bcbb93618f15d44f89fc..68359534ff2575dc57ef7a14118932c98dbb2fc2 100644 (file)
@@ -4171,6 +4171,10 @@ static const struct qcom_reset_map gcc_msm8953_resets[] = {
        [GCC_USB3PHY_PHY_BCR]   = { 0x3f03c },
        [GCC_USB3_PHY_BCR]      = { 0x3f034 },
        [GCC_USB_30_BCR]        = { 0x3f070 },
+       [GCC_MDSS_BCR]          = { 0x4d074 },
+       [GCC_CRYPTO_BCR]        = { 0x16000 },
+       [GCC_SDCC1_BCR]         = { 0x42000 },
+       [GCC_SDCC2_BCR]         = { 0x43000 },
 };
 
 static const struct regmap_config gcc_msm8953_regmap_config = {
index 8171d23c96e64d57da0203d3098b0572eefa7bd0..5bcbfbf52cb9e5691ac4106ab6b7173c3037a6f0 100644 (file)
@@ -4662,8 +4662,8 @@ static const struct qcom_reset_map gcc_sa8775p_resets[] = {
        [GCC_USB3UNIPHY_PHY_MP0_BCR] = { 0x5c020 },
        [GCC_USB3UNIPHY_PHY_MP1_BCR] = { 0x5c024 },
        [GCC_USB_PHY_CFG_AHB2PHY_BCR] = { 0x76000 },
-       [GCC_VIDEO_AXI0_CLK_ARES] = { 0x34014, 2 },
-       [GCC_VIDEO_AXI1_CLK_ARES] = { 0x3401c, 2 },
+       [GCC_VIDEO_AXI0_CLK_ARES] = { .reg = 0x34014, .bit = 2, .udelay = 400 },
+       [GCC_VIDEO_AXI1_CLK_ARES] = { .reg = 0x3401c, .bit = 2, .udelay = 400 },
        [GCC_VIDEO_BCR] = { 0x34000 },
 };
 
@@ -4742,21 +4742,16 @@ static int gcc_sa8775p_probe(struct platform_device *pdev)
        if (ret)
                return ret;
 
-       /*
-        * Keep the clocks always-ON
-        * GCC_CAMERA_AHB_CLK, GCC_CAMERA_XO_CLK, GCC_DISP1_AHB_CLK,
-        * GCC_DISP1_XO_CLK, GCC_DISP_AHB_CLK, GCC_DISP_XO_CLK,
-        * GCC_GPU_CFG_AHB_CLK, GCC_VIDEO_AHB_CLK, GCC_VIDEO_XO_CLK.
-        */
-       regmap_update_bits(regmap, 0x32004, BIT(0), BIT(0));
-       regmap_update_bits(regmap, 0x32020, BIT(0), BIT(0));
-       regmap_update_bits(regmap, 0xc7004, BIT(0), BIT(0));
-       regmap_update_bits(regmap, 0xc7018, BIT(0), BIT(0));
-       regmap_update_bits(regmap, 0x33004, BIT(0), BIT(0));
-       regmap_update_bits(regmap, 0x33018, BIT(0), BIT(0));
-       regmap_update_bits(regmap, 0x7d004, BIT(0), BIT(0));
-       regmap_update_bits(regmap, 0x34004, BIT(0), BIT(0));
-       regmap_update_bits(regmap, 0x34024, BIT(0), BIT(0));
+       /* Keep some clocks always-on */
+       qcom_branch_set_clk_en(regmap, 0x32004); /* GCC_CAMERA_AHB_CLK */
+       qcom_branch_set_clk_en(regmap, 0x32020); /* GCC_CAMERA_XO_CLK */
+       qcom_branch_set_clk_en(regmap, 0xc7004); /* GCC_DISP1_AHB_CLK */
+       qcom_branch_set_clk_en(regmap, 0xc7018); /* GCC_DISP1_XO_CLK */
+       qcom_branch_set_clk_en(regmap, 0x33004); /* GCC_DISP_AHB_CLK */
+       qcom_branch_set_clk_en(regmap, 0x33018); /* GCC_DISP_XO_CLK */
+       qcom_branch_set_clk_en(regmap, 0x7d004); /* GCC_GPU_CFG_AHB_CLK */
+       qcom_branch_set_clk_en(regmap, 0x34004); /* GCC_VIDEO_AHB_CLK */
+       qcom_branch_set_clk_en(regmap, 0x34024); /* GCC_VIDEO_XO_CLK */
 
        return qcom_cc_really_probe(pdev, &gcc_sa8775p_desc, regmap);
 }
index a3406aadbd17076d85f2558af0f1bf6bf3cd4acc..6a5f785c0ced0084f95a5d1ba2becf8e17e2a89e 100644 (file)
@@ -2443,19 +2443,15 @@ static int gcc_sc7180_probe(struct platform_device *pdev)
        regmap_update_bits(regmap, 0x4d110, 0x3, 0x3);
        regmap_update_bits(regmap, 0x71028, 0x3, 0x3);
 
-       /*
-        * Keep the clocks always-ON
-        * GCC_CPUSS_GNOC_CLK, GCC_VIDEO_AHB_CLK, GCC_CAMERA_AHB_CLK,
-        * GCC_DISP_AHB_CLK, GCC_GPU_CFG_AHB_CLK
-        */
-       regmap_update_bits(regmap, 0x48004, BIT(0), BIT(0));
-       regmap_update_bits(regmap, 0x0b004, BIT(0), BIT(0));
-       regmap_update_bits(regmap, 0x0b008, BIT(0), BIT(0));
-       regmap_update_bits(regmap, 0x0b00c, BIT(0), BIT(0));
-       regmap_update_bits(regmap, 0x0b02c, BIT(0), BIT(0));
-       regmap_update_bits(regmap, 0x0b028, BIT(0), BIT(0));
-       regmap_update_bits(regmap, 0x0b030, BIT(0), BIT(0));
-       regmap_update_bits(regmap, 0x71004, BIT(0), BIT(0));
+       /* Keep some clocks always-on */
+       qcom_branch_set_clk_en(regmap, 0x48004); /* GCC_CPUSS_GNOC_CLK */
+       qcom_branch_set_clk_en(regmap, 0x0b004); /* GCC_VIDEO_AHB_CLK */
+       qcom_branch_set_clk_en(regmap, 0x0b008); /* GCC_CAMERA_AHB_CLK */
+       qcom_branch_set_clk_en(regmap, 0x0b00c); /* GCC_DISP_AHB_CLK */
+       qcom_branch_set_clk_en(regmap, 0x0b02c); /* GCC_CAMERA_XO_CLK */
+       qcom_branch_set_clk_en(regmap, 0x0b028); /* GCC_VIDEO_XO_CLK */
+       qcom_branch_set_clk_en(regmap, 0x0b030); /* GCC_DISP_XO_CLK */
+       qcom_branch_set_clk_en(regmap, 0x71004); /* GCC_GPU_CFG_AHB_CLK */
 
        ret = qcom_cc_register_rcg_dfs(regmap, gcc_dfs_clocks,
                                        ARRAY_SIZE(gcc_dfs_clocks));
index 2b661df5de2660bf554656fb573477a86ede8ad5..f45a8318900c5ff8e96ec0fed67e13a396b5df79 100644 (file)
@@ -3453,18 +3453,14 @@ static int gcc_sc7280_probe(struct platform_device *pdev)
        if (IS_ERR(regmap))
                return PTR_ERR(regmap);
 
-       /*
-        * Keep the clocks always-ON
-        * GCC_CAMERA_AHB_CLK/XO_CLK, GCC_DISP_AHB_CLK/XO_CLK
-        * GCC_VIDEO_AHB_CLK/XO_CLK, GCC_GPU_CFG_AHB_CLK
-        */
-       regmap_update_bits(regmap, 0x26004, BIT(0), BIT(0));
-       regmap_update_bits(regmap, 0x26028, BIT(0), BIT(0));
-       regmap_update_bits(regmap, 0x27004, BIT(0), BIT(0));
-       regmap_update_bits(regmap, 0x2701C, BIT(0), BIT(0));
-       regmap_update_bits(regmap, 0x28004, BIT(0), BIT(0));
-       regmap_update_bits(regmap, 0x28014, BIT(0), BIT(0));
-       regmap_update_bits(regmap, 0x71004, BIT(0), BIT(0));
+       /* Keep some clocks always-on */
+       qcom_branch_set_clk_en(regmap, 0x26004);/* GCC_CAMERA_AHB_CLK */
+       qcom_branch_set_clk_en(regmap, 0x26028);/* GCC_CAMERA_XO_CLK */
+       qcom_branch_set_clk_en(regmap, 0x27004);/* GCC_DISP_AHB_CLK */
+       qcom_branch_set_clk_en(regmap, 0x2701c);/* GCC_DISP_XO_CLK */
+       qcom_branch_set_clk_en(regmap, 0x28004);/* GCC_VIDEO_AHB_CLK */
+       qcom_branch_set_clk_en(regmap, 0x28014);/* GCC_VIDEO_XO_CLK */
+       qcom_branch_set_clk_en(regmap, 0x71004);/* GCC_GPU_CFG_AHB_CLK */
        regmap_update_bits(regmap, 0x7100C, BIT(13), BIT(13));
 
        ret = qcom_cc_register_rcg_dfs(regmap, gcc_dfs_clocks,
index ae21473815596d55bfabd2e5532350274b7cc150..5261bfc92b3dc3d6c5c2b09d7b7e6a4b6d500d50 100644 (file)
@@ -3347,6 +3347,19 @@ static struct clk_branch gcc_ufs_card_2_unipro_core_clk = {
        },
 };
 
+static struct clk_branch gcc_ufs_card_clkref_en = {
+       .halt_reg = 0x8c004,
+       .halt_check = BRANCH_HALT,
+       .clkr = {
+               .enable_reg = 0x8c004,
+               .enable_mask = BIT(0),
+               .hw.init = &(const struct clk_init_data) {
+                       .name = "gcc_ufs_card_clkref_en",
+                       .ops = &clk_branch2_ops,
+               },
+       },
+};
+
 static struct clk_branch gcc_ufs_card_ahb_clk = {
        .halt_reg = 0x75014,
        .halt_check = BRANCH_HALT,
@@ -3561,6 +3574,19 @@ static struct clk_branch gcc_ufs_card_unipro_core_hw_ctl_clk = {
        },
 };
 
+static struct clk_branch gcc_ufs_mem_clkref_en = {
+       .halt_reg = 0x8c000,
+       .halt_check = BRANCH_HALT,
+       .clkr = {
+               .enable_reg = 0x8c000,
+               .enable_mask = BIT(0),
+               .hw.init = &(const struct clk_init_data) {
+                       .name = "gcc_ufs_mem_clkref_en",
+                       .ops = &clk_branch2_ops,
+               },
+       },
+};
+
 static struct clk_branch gcc_ufs_phy_ahb_clk = {
        .halt_reg = 0x77014,
        .halt_check = BRANCH_HALT,
@@ -4413,6 +4439,7 @@ static struct clk_regmap *gcc_sc8180x_clocks[] = {
        [GCC_UFS_CARD_2_TX_SYMBOL_0_CLK] = &gcc_ufs_card_2_tx_symbol_0_clk.clkr,
        [GCC_UFS_CARD_2_UNIPRO_CORE_CLK] = &gcc_ufs_card_2_unipro_core_clk.clkr,
        [GCC_UFS_CARD_2_UNIPRO_CORE_CLK_SRC] = &gcc_ufs_card_2_unipro_core_clk_src.clkr,
+       [GCC_UFS_CARD_CLKREF_EN] = &gcc_ufs_card_clkref_en.clkr,
        [GCC_UFS_CARD_AHB_CLK] = &gcc_ufs_card_ahb_clk.clkr,
        [GCC_UFS_CARD_AXI_CLK] = &gcc_ufs_card_axi_clk.clkr,
        [GCC_UFS_CARD_AXI_CLK_SRC] = &gcc_ufs_card_axi_clk_src.clkr,
@@ -4429,6 +4456,7 @@ static struct clk_regmap *gcc_sc8180x_clocks[] = {
        [GCC_UFS_CARD_UNIPRO_CORE_CLK] = &gcc_ufs_card_unipro_core_clk.clkr,
        [GCC_UFS_CARD_UNIPRO_CORE_CLK_SRC] = &gcc_ufs_card_unipro_core_clk_src.clkr,
        [GCC_UFS_CARD_UNIPRO_CORE_HW_CTL_CLK] = &gcc_ufs_card_unipro_core_hw_ctl_clk.clkr,
+       [GCC_UFS_MEM_CLKREF_EN] = &gcc_ufs_mem_clkref_en.clkr,
        [GCC_UFS_PHY_AHB_CLK] = &gcc_ufs_phy_ahb_clk.clkr,
        [GCC_UFS_PHY_AXI_CLK] = &gcc_ufs_phy_axi_clk.clkr,
        [GCC_UFS_PHY_AXI_CLK_SRC] = &gcc_ufs_phy_axi_clk_src.clkr,
@@ -4528,9 +4556,9 @@ static const struct qcom_reset_map gcc_sc8180x_resets[] = {
        [GCC_USB30_PRIM_BCR] = { 0xf000 },
        [GCC_USB30_SEC_BCR] = { 0x10000 },
        [GCC_USB_PHY_CFG_AHB2PHY_BCR] = { 0x6a000 },
-       [GCC_VIDEO_AXIC_CLK_BCR] = { 0xb02c, 2 },
-       [GCC_VIDEO_AXI0_CLK_BCR] = { 0xb024, 2 },
-       [GCC_VIDEO_AXI1_CLK_BCR] = { 0xb028, 2 },
+       [GCC_VIDEO_AXIC_CLK_BCR] = { .reg = 0xb02c, .bit = 2, .udelay = 150 },
+       [GCC_VIDEO_AXI0_CLK_BCR] = { .reg = 0xb024, .bit = 2, .udelay = 150 },
+       [GCC_VIDEO_AXI1_CLK_BCR] = { .reg = 0xb028, .bit = 2, .udelay = 150 },
 };
 
 static struct gdsc *gcc_sc8180x_gdscs[] = {
@@ -4579,23 +4607,17 @@ static int gcc_sc8180x_probe(struct platform_device *pdev)
        if (IS_ERR(regmap))
                return PTR_ERR(regmap);
 
-       /*
-        * Enable the following always-on clocks:
-        * GCC_VIDEO_AHB_CLK, GCC_CAMERA_AHB_CLK, GCC_DISP_AHB_CLK,
-        * GCC_VIDEO_XO_CLK, GCC_CAMERA_XO_CLK, GCC_DISP_XO_CLK,
-        * GCC_CPUSS_GNOC_CLK, GCC_CPUSS_DVM_BUS_CLK, GCC_NPU_CFG_AHB_CLK and
-        * GCC_GPU_CFG_AHB_CLK
-        */
-       regmap_update_bits(regmap, 0xb004, BIT(0), BIT(0));
-       regmap_update_bits(regmap, 0xb008, BIT(0), BIT(0));
-       regmap_update_bits(regmap, 0xb00c, BIT(0), BIT(0));
-       regmap_update_bits(regmap, 0xb040, BIT(0), BIT(0));
-       regmap_update_bits(regmap, 0xb044, BIT(0), BIT(0));
-       regmap_update_bits(regmap, 0xb048, BIT(0), BIT(0));
-       regmap_update_bits(regmap, 0x48004, BIT(0), BIT(0));
-       regmap_update_bits(regmap, 0x48190, BIT(0), BIT(0));
-       regmap_update_bits(regmap, 0x4d004, BIT(0), BIT(0));
-       regmap_update_bits(regmap, 0x71004, BIT(0), BIT(0));
+       /* Keep some clocks always-on */
+       qcom_branch_set_clk_en(regmap, 0xb004); /* GCC_VIDEO_AHB_CLK */
+       qcom_branch_set_clk_en(regmap, 0xb008); /* GCC_CAMERA_AHB_CLK */
+       qcom_branch_set_clk_en(regmap, 0xb00c); /* GCC_DISP_AHB_CLK */
+       qcom_branch_set_clk_en(regmap, 0xb040); /* GCC_VIDEO_XO_CLK */
+       qcom_branch_set_clk_en(regmap, 0xb044); /* GCC_CAMERA_XO_CLK */
+       qcom_branch_set_clk_en(regmap, 0xb048); /* GCC_DISP_XO_CLK */
+       qcom_branch_set_clk_en(regmap, 0x48004); /* GCC_CPUSS_GNOC_CLK */
+       qcom_branch_set_clk_en(regmap, 0x48190); /* GCC_CPUSS_DVM_BUS_CLK */
+       qcom_branch_set_clk_en(regmap, 0x4d004); /* GCC_NPU_CFG_AHB_CLK */
+       qcom_branch_set_clk_en(regmap, 0x71004); /* GCC_GPU_CFG_AHB_CLK */
 
        /* Disable the GPLL0 active input to NPU and GPU via MISC registers */
        regmap_update_bits(regmap, 0x4d110, 0x3, 0x3);
index bfb77931e8686d8258854cdd131792aa9c82e715..082d7b5504ebba373e79030b57ca477064f283ac 100644 (file)
@@ -7448,8 +7448,8 @@ static const struct qcom_reset_map gcc_sc8280xp_resets[] = {
        [GCC_USB4PHY_PHY_PRIM_BCR] = { 0x4a004 },
        [GCC_USB_PHY_CFG_AHB2PHY_BCR] = { 0x6a000 },
        [GCC_VIDEO_BCR] = { 0x28000 },
-       [GCC_VIDEO_AXI0_CLK_ARES] = { 0x28010, 2 },
-       [GCC_VIDEO_AXI1_CLK_ARES] = { 0x28018, 2 },
+       [GCC_VIDEO_AXI0_CLK_ARES] = { .reg = 0x28010, .bit = 2, .udelay = 400 },
+       [GCC_VIDEO_AXI1_CLK_ARES] = { .reg = 0x28018, .bit = 2, .udelay = 400 },
 };
 
 static struct gdsc *gcc_sc8280xp_gdscs[] = {
@@ -7543,21 +7543,16 @@ static int gcc_sc8280xp_probe(struct platform_device *pdev)
                goto err_put_rpm;
        }
 
-       /*
-        * Keep the clocks always-ON
-        * GCC_CAMERA_AHB_CLK, GCC_CAMERA_XO_CLK, GCC_DISP_AHB_CLK,
-        * GCC_DISP_XO_CLK, GCC_GPU_CFG_AHB_CLK, GCC_VIDEO_AHB_CLK,
-        * GCC_VIDEO_XO_CLK, GCC_DISP1_AHB_CLK, GCC_DISP1_XO_CLK
-        */
-       regmap_update_bits(regmap, 0x26004, BIT(0), BIT(0));
-       regmap_update_bits(regmap, 0x26020, BIT(0), BIT(0));
-       regmap_update_bits(regmap, 0x27004, BIT(0), BIT(0));
-       regmap_update_bits(regmap, 0x27028, BIT(0), BIT(0));
-       regmap_update_bits(regmap, 0x71004, BIT(0), BIT(0));
-       regmap_update_bits(regmap, 0x28004, BIT(0), BIT(0));
-       regmap_update_bits(regmap, 0x28028, BIT(0), BIT(0));
-       regmap_update_bits(regmap, 0xbb004, BIT(0), BIT(0));
-       regmap_update_bits(regmap, 0xbb028, BIT(0), BIT(0));
+       /* Keep some clocks always-on */
+       qcom_branch_set_clk_en(regmap, 0x26004); /* GCC_CAMERA_AHB_CLK */
+       qcom_branch_set_clk_en(regmap, 0x26020); /* GCC_CAMERA_XO_CLK */
+       qcom_branch_set_clk_en(regmap, 0x27004); /* GCC_DISP_AHB_CLK */
+       qcom_branch_set_clk_en(regmap, 0x27028); /* GCC_DISP_XO_CLK */
+       qcom_branch_set_clk_en(regmap, 0x71004); /* GCC_GPU_CFG_AHB_CLK */
+       qcom_branch_set_clk_en(regmap, 0x28004); /* GCC_VIDEO_AHB_CLK */
+       qcom_branch_set_clk_en(regmap, 0x28028); /* GCC_VIDEO_XO_CLK */
+       qcom_branch_set_clk_en(regmap, 0xbb004); /* GCC_DISP1_AHB_CLK */
+       qcom_branch_set_clk_en(regmap, 0xbb028); /* GCC_DISP1_XO_CLK */
 
        ret = qcom_cc_register_rcg_dfs(regmap, gcc_dfs_clocks, ARRAY_SIZE(gcc_dfs_clocks));
        if (ret)
index 725cd52d2398ede3b4bcf42a7c1690dc86663cfc..ea4c3bf4fb9bf7f35d73db4a690099b4df3f7286 100644 (file)
@@ -4037,3 +4037,4 @@ module_exit(gcc_sdm845_exit);
 MODULE_DESCRIPTION("QTI GCC SDM845 Driver");
 MODULE_LICENSE("GPL v2");
 MODULE_ALIAS("platform:gcc-sdm845");
+MODULE_SOFTDEP("pre: rpmhpd");
index d5e17122698cda6b46e7760fd64dfc9538aeabc4..26279b8d321a3091caa8f93cea00fb86e08a8ee9 100644 (file)
@@ -1611,14 +1611,10 @@ static int gcc_sdx55_probe(struct platform_device *pdev)
        if (IS_ERR(regmap))
                return PTR_ERR(regmap);
 
-       /*
-        * Keep the clocks always-ON as they are critical to the functioning
-        * of the system:
-        * GCC_SYS_NOC_CPUSS_AHB_CLK, GCC_CPUSS_AHB_CLK, GCC_CPUSS_GNOC_CLK
-        */
-       regmap_update_bits(regmap, 0x6d008, BIT(0), BIT(0));
-       regmap_update_bits(regmap, 0x6d008, BIT(21), BIT(21));
-       regmap_update_bits(regmap, 0x6d008, BIT(22), BIT(22));
+       /* Keep some clocks always-on */
+       qcom_branch_set_clk_en(regmap, 0x6d008); /* GCC_SYS_NOC_CPUSS_AHB_CLK */
+       regmap_update_bits(regmap, 0x6d008, BIT(21), BIT(21)); /* GCC_CPUSS_AHB_CLK */
+       regmap_update_bits(regmap, 0x6d008, BIT(22), BIT(22)); /* GCC_CPUSS_GNOC_CLK */
 
        return qcom_cc_really_probe(pdev, &gcc_sdx55_desc, regmap);
 }
index ffddbed5a6dba64e988a74d84f6cb31bba4e7d97..8fde6463574b92a5cef5169061205d85f2da75c4 100644 (file)
@@ -1574,14 +1574,11 @@ static int gcc_sdx65_probe(struct platform_device *pdev)
        regmap = qcom_cc_map(pdev, &gcc_sdx65_desc);
        if (IS_ERR(regmap))
                return PTR_ERR(regmap);
-       /*
-        * Keep the clocks always-ON as they are critical to the functioning
-        * of the system:
-        * GCC_SYS_NOC_CPUSS_AHB_CLK, GCC_CPUSS_AHB_CLK, GCC_CPUSS_GNOC_CLK
-        */
-       regmap_update_bits(regmap, 0x6d008, BIT(0), BIT(0));
-       regmap_update_bits(regmap, 0x6d008, BIT(21), BIT(21));
-       regmap_update_bits(regmap, 0x6d008, BIT(22), BIT(22));
+
+       /* Keep some clocks always-on */
+       qcom_branch_set_clk_en(regmap, 0x6d008); /* GCC_SYS_NOC_CPUSS_AHB_CLK */
+       regmap_update_bits(regmap, 0x6d008, BIT(21), BIT(21)); /* GCC_CPUSS_AHB_CLK */
+       regmap_update_bits(regmap, 0x6d008, BIT(22), BIT(22)); /* GCC_CPUSS_GNOC_CLK */
 
        return qcom_cc_really_probe(pdev, &gcc_sdx65_desc, regmap);
 }
index 573af17bd24caa3b7f4520a22687e15a8a0e25da..c51338f08ef16f29afddb91b84787d377cf1c2b8 100644 (file)
@@ -2936,13 +2936,9 @@ static int gcc_sdx75_probe(struct platform_device *pdev)
        if (ret)
                return ret;
 
-       /*
-        * Keep clocks always enabled:
-        * gcc_ahb_pcie_link_clk
-        * gcc_xo_pcie_link_clk
-        */
-       regmap_update_bits(regmap, 0x3e004, BIT(0), BIT(0));
-       regmap_update_bits(regmap, 0x3e008, BIT(0), BIT(0));
+       /* Keep some clocks always-on */
+       qcom_branch_set_clk_en(regmap, 0x3e004); /* GCC_AHB_PCIE_LINK_CLK */
+       qcom_branch_set_clk_en(regmap, 0x3e008); /* GCC_XO_PCIE_LINK_CLK */
 
        return qcom_cc_really_probe(pdev, &gcc_sdx75_desc, regmap);
 }
index 31abe2775fc8391fd577db68d3699981e0b7a112..062e55e981569bafb00dfefc2b50cc93a010c30c 100644 (file)
@@ -2791,8 +2791,8 @@ static const struct qcom_reset_map gcc_sm4450_resets[] = {
        [GCC_VENUS_BCR] = { 0xb601c },
        [GCC_VIDEO_BCR] = { 0x42000 },
        [GCC_VIDEO_VENUS_BCR] = { 0xb6000 },
-       [GCC_VENUS_CTL_AXI_CLK_ARES] = { 0x4201c, 2 },
-       [GCC_VIDEO_VENUS_CTL_CLK_ARES] = { 0xb6038, 2 },
+       [GCC_VENUS_CTL_AXI_CLK_ARES] = { .reg = 0x4201c, .bit = 2, .udelay = 400 },
+       [GCC_VIDEO_VENUS_CTL_CLK_ARES] = { .reg = 0xb6038, .bit = 2, .udelay = 400 },
 };
 
 static const struct clk_rcg_dfs_data gcc_dfs_clocks[] = {
@@ -2849,25 +2849,15 @@ static int gcc_sm4450_probe(struct platform_device *pdev)
 
        qcom_branch_set_force_mem_core(regmap, gcc_ufs_phy_ice_core_clk, true);
 
-       /*
-        * Keep clocks always enabled:
-        * gcc_camera_ahb_clk
-        * gcc_camera_sleep_clk
-        * gcc_camera_xo_clk
-        * gcc_disp_ahb_clk
-        * gcc_disp_xo_clk
-        * gcc_gpu_cfg_ahb_clk
-        * gcc_video_ahb_clk
-        * gcc_video_xo_clk
-        */
-       regmap_update_bits(regmap, 0x36004, BIT(0), BIT(0));
-       regmap_update_bits(regmap, 0x36018, BIT(0), BIT(0));
-       regmap_update_bits(regmap, 0x3601c, BIT(0), BIT(0));
-       regmap_update_bits(regmap, 0x37004, BIT(0), BIT(0));
-       regmap_update_bits(regmap, 0x37014, BIT(0), BIT(0));
-       regmap_update_bits(regmap, 0x81004, BIT(0), BIT(0));
-       regmap_update_bits(regmap, 0x42004, BIT(0), BIT(0));
-       regmap_update_bits(regmap, 0x42018, BIT(0), BIT(0));
+       /* Keep some clocks always-on */
+       qcom_branch_set_clk_en(regmap, 0x36004); /* GCC_CAMERA_AHB_CLK */
+       qcom_branch_set_clk_en(regmap, 0x36018); /* GCC_CAMERA_SLEEP_CLK */
+       qcom_branch_set_clk_en(regmap, 0x3601c); /* GCC_CAMERA_XO_CLK */
+       qcom_branch_set_clk_en(regmap, 0x37004); /* GCC_DISP_AHB_CLK */
+       qcom_branch_set_clk_en(regmap, 0x37014); /* GCC_DISP_XO_CLK */
+       qcom_branch_set_clk_en(regmap, 0x81004); /* GCC_GPU_CFG_AHB_CLK */
+       qcom_branch_set_clk_en(regmap, 0x42004); /* GCC_VIDEO_AHB_CLK */
+       qcom_branch_set_clk_en(regmap, 0x42018); /* GCC_VIDEO_XO_CLK */
 
        regmap_update_bits(regmap, 0x4201c, BIT(21), BIT(21));
 
index 3dd15d765b22e1efc2feb8be8acca56d91927f75..84639d5b89bfb7e2a672aac5fb7aff70ac609882 100644 (file)
@@ -3882,13 +3882,10 @@ static int gcc_sm6375_probe(struct platform_device *pdev)
        if (ret)
                return ret;
 
-       /*
-        * Keep the following clocks always on:
-        * GCC_CAMERA_XO_CLK, GCC_CPUSS_GNOC_CLK, GCC_DISP_XO_CLK
-        */
-       regmap_update_bits(regmap, 0x17028, BIT(0), BIT(0));
-       regmap_update_bits(regmap, 0x2b004, BIT(0), BIT(0));
-       regmap_update_bits(regmap, 0x1702c, BIT(0), BIT(0));
+       /* Keep some clocks always-on */
+       qcom_branch_set_clk_en(regmap, 0x17028); /* GCC_CAMERA_XO_CLK */
+       qcom_branch_set_clk_en(regmap, 0x2b004); /* GCC_CPUSS_GNOC_CLK */
+       qcom_branch_set_clk_en(regmap, 0x1702c); /* GCC_DISP_XO_CLK */
 
        clk_lucid_pll_configure(&gpll10, regmap, &gpll10_config);
        clk_lucid_pll_configure(&gpll11, regmap, &gpll11_config);
index d9983bb27475648944c329a121d57dca80fc3f58..44b49f7cd178351ac32a6eac94664e7361a67057 100644 (file)
@@ -2918,7 +2918,7 @@ static const struct qcom_reset_map gcc_sm7150_resets[] = {
        [GCC_USB3_PHY_PRIM_BCR] = { 0x50000 },
        [GCC_USB3_PHY_SEC_BCR] = { 0x5000c },
        [GCC_QUSB2PHY_PRIM_BCR] = { 0x26000 },
-       [GCC_VIDEO_AXI_CLK_BCR] = { 0xb01c, 2 },
+       [GCC_VIDEO_AXI_CLK_BCR] = { .reg = 0xb01c, .bit = 2, .udelay = 150 },
 };
 
 static const struct clk_rcg_dfs_data gcc_sm7150_dfs_desc[] = {
@@ -3002,20 +3002,15 @@ static int gcc_sm7150_probe(struct platform_device *pdev)
        regmap_update_bits(regmap, 0x4d110, 0x3, 0x3);
        regmap_update_bits(regmap, 0x71028, 0x3, 0x3);
 
-       /*
-        * Keep the critical clocks always-ON
-        * GCC_CPUSS_GNOC_CLK, GCC_VIDEO_AHB_CLK, GCC_CAMERA_AHB_CLK,
-        * GCC_DISP_AHB_CLK, GCC_CAMERA_XO_CLK, GCC_VIDEO_XO_CLK,
-        * GCC_DISP_XO_CLK, GCC_GPU_CFG_AHB_CLK
-        */
-       regmap_update_bits(regmap, 0x48004, BIT(0), BIT(0));
-       regmap_update_bits(regmap, 0x0b004, BIT(0), BIT(0));
-       regmap_update_bits(regmap, 0x0b008, BIT(0), BIT(0));
-       regmap_update_bits(regmap, 0x0b00c, BIT(0), BIT(0));
-       regmap_update_bits(regmap, 0x0b02c, BIT(0), BIT(0));
-       regmap_update_bits(regmap, 0x0b028, BIT(0), BIT(0));
-       regmap_update_bits(regmap, 0x0b030, BIT(0), BIT(0));
-       regmap_update_bits(regmap, 0x71004, BIT(0), BIT(0));
+       /* Keep some clocks always-on */
+       qcom_branch_set_clk_en(regmap, 0x48004); /* GCC_CPUSS_GNOC_CLK */
+       qcom_branch_set_clk_en(regmap, 0x0b004); /* GCC_VIDEO_AHB_CLK */
+       qcom_branch_set_clk_en(regmap, 0x0b008); /* GCC_CAMERA_AHB_CLK */
+       qcom_branch_set_clk_en(regmap, 0x0b00c); /* GCC_DISP_AHB_CLK */
+       qcom_branch_set_clk_en(regmap, 0x0b02c); /* GCC_CAMERA_XO_CLK */
+       qcom_branch_set_clk_en(regmap, 0x0b028); /* GCC_VIDEO_XO_CLK */
+       qcom_branch_set_clk_en(regmap, 0x0b030); /* GCC_DISP_XO_CLK */
+       qcom_branch_set_clk_en(regmap, 0x71004); /* GCC_GPU_CFG_AHB_CLK */
 
        ret = qcom_cc_register_rcg_dfs(regmap, gcc_sm7150_dfs_desc,
                                        ARRAY_SIZE(gcc_sm7150_dfs_desc));
index 05d115c52dfebbb4f2d6101ba2f580031326d4f4..a47ef9dfa8080e06b51309fab45c7b687728fdda 100644 (file)
@@ -453,19 +453,29 @@ static const struct freq_tbl ftbl_gcc_qupv3_wrap0_s0_clk_src[] = {
        { }
 };
 
+static struct clk_init_data gcc_qupv3_wrap0_s0_clk_src_init = {
+       .name = "gcc_qupv3_wrap0_s0_clk_src",
+       .parent_data = gcc_parents_0,
+       .num_parents = ARRAY_SIZE(gcc_parents_0),
+       .flags = CLK_SET_RATE_PARENT,
+       .ops = &clk_rcg2_ops,
+};
+
 static struct clk_rcg2 gcc_qupv3_wrap0_s0_clk_src = {
        .cmd_rcgr = 0x17148,
        .mnd_width = 16,
        .hid_width = 5,
        .parent_map = gcc_parent_map_0,
        .freq_tbl = ftbl_gcc_qupv3_wrap0_s0_clk_src,
-       .clkr.hw.init = &(struct clk_init_data){
-               .name = "gcc_qupv3_wrap0_s0_clk_src",
-               .parent_data = gcc_parents_0,
-               .num_parents = ARRAY_SIZE(gcc_parents_0),
-               .flags = CLK_SET_RATE_PARENT,
-               .ops = &clk_rcg2_ops,
-       },
+       .clkr.hw.init = &gcc_qupv3_wrap0_s0_clk_src_init,
+};
+
+static struct clk_init_data gcc_qupv3_wrap0_s1_clk_src_init = {
+       .name = "gcc_qupv3_wrap0_s1_clk_src",
+       .parent_data = gcc_parents_0,
+       .num_parents = ARRAY_SIZE(gcc_parents_0),
+       .flags = CLK_SET_RATE_PARENT,
+       .ops = &clk_rcg2_ops,
 };
 
 static struct clk_rcg2 gcc_qupv3_wrap0_s1_clk_src = {
@@ -474,13 +484,15 @@ static struct clk_rcg2 gcc_qupv3_wrap0_s1_clk_src = {
        .hid_width = 5,
        .parent_map = gcc_parent_map_0,
        .freq_tbl = ftbl_gcc_qupv3_wrap0_s0_clk_src,
-       .clkr.hw.init = &(struct clk_init_data){
-               .name = "gcc_qupv3_wrap0_s1_clk_src",
-               .parent_data = gcc_parents_0,
-               .num_parents = ARRAY_SIZE(gcc_parents_0),
-               .flags = CLK_SET_RATE_PARENT,
-               .ops = &clk_rcg2_ops,
-       },
+       .clkr.hw.init = &gcc_qupv3_wrap0_s1_clk_src_init,
+};
+
+static struct clk_init_data gcc_qupv3_wrap0_s2_clk_src_init = {
+       .name = "gcc_qupv3_wrap0_s2_clk_src",
+       .parent_data = gcc_parents_0,
+       .num_parents = ARRAY_SIZE(gcc_parents_0),
+       .flags = CLK_SET_RATE_PARENT,
+       .ops = &clk_rcg2_ops,
 };
 
 static struct clk_rcg2 gcc_qupv3_wrap0_s2_clk_src = {
@@ -489,13 +501,15 @@ static struct clk_rcg2 gcc_qupv3_wrap0_s2_clk_src = {
        .hid_width = 5,
        .parent_map = gcc_parent_map_0,
        .freq_tbl = ftbl_gcc_qupv3_wrap0_s0_clk_src,
-       .clkr.hw.init = &(struct clk_init_data){
-               .name = "gcc_qupv3_wrap0_s2_clk_src",
-               .parent_data = gcc_parents_0,
-               .num_parents = ARRAY_SIZE(gcc_parents_0),
-               .flags = CLK_SET_RATE_PARENT,
-               .ops = &clk_rcg2_ops,
-       },
+       .clkr.hw.init = &gcc_qupv3_wrap0_s2_clk_src_init,
+};
+
+static struct clk_init_data gcc_qupv3_wrap0_s3_clk_src_init = {
+       .name = "gcc_qupv3_wrap0_s3_clk_src",
+       .parent_data = gcc_parents_0,
+       .num_parents = ARRAY_SIZE(gcc_parents_0),
+       .flags = CLK_SET_RATE_PARENT,
+       .ops = &clk_rcg2_ops,
 };
 
 static struct clk_rcg2 gcc_qupv3_wrap0_s3_clk_src = {
@@ -504,13 +518,15 @@ static struct clk_rcg2 gcc_qupv3_wrap0_s3_clk_src = {
        .hid_width = 5,
        .parent_map = gcc_parent_map_0,
        .freq_tbl = ftbl_gcc_qupv3_wrap0_s0_clk_src,
-       .clkr.hw.init = &(struct clk_init_data){
-               .name = "gcc_qupv3_wrap0_s3_clk_src",
-               .parent_data = gcc_parents_0,
-               .num_parents = ARRAY_SIZE(gcc_parents_0),
-               .flags = CLK_SET_RATE_PARENT,
-               .ops = &clk_rcg2_ops,
-       },
+       .clkr.hw.init = &gcc_qupv3_wrap0_s3_clk_src_init,
+};
+
+static struct clk_init_data gcc_qupv3_wrap0_s4_clk_src_init = {
+       .name = "gcc_qupv3_wrap0_s4_clk_src",
+       .parent_data = gcc_parents_0,
+       .num_parents = ARRAY_SIZE(gcc_parents_0),
+       .flags = CLK_SET_RATE_PARENT,
+       .ops = &clk_rcg2_ops,
 };
 
 static struct clk_rcg2 gcc_qupv3_wrap0_s4_clk_src = {
@@ -519,13 +535,15 @@ static struct clk_rcg2 gcc_qupv3_wrap0_s4_clk_src = {
        .hid_width = 5,
        .parent_map = gcc_parent_map_0,
        .freq_tbl = ftbl_gcc_qupv3_wrap0_s0_clk_src,
-       .clkr.hw.init = &(struct clk_init_data){
-               .name = "gcc_qupv3_wrap0_s4_clk_src",
-               .parent_data = gcc_parents_0,
-               .num_parents = ARRAY_SIZE(gcc_parents_0),
-               .flags = CLK_SET_RATE_PARENT,
-               .ops = &clk_rcg2_ops,
-       },
+       .clkr.hw.init = &gcc_qupv3_wrap0_s4_clk_src_init,
+};
+
+static struct clk_init_data gcc_qupv3_wrap0_s5_clk_src_init = {
+       .name = "gcc_qupv3_wrap0_s5_clk_src",
+       .parent_data = gcc_parents_0,
+       .num_parents = ARRAY_SIZE(gcc_parents_0),
+       .flags = CLK_SET_RATE_PARENT,
+       .ops = &clk_rcg2_ops,
 };
 
 static struct clk_rcg2 gcc_qupv3_wrap0_s5_clk_src = {
@@ -534,13 +552,15 @@ static struct clk_rcg2 gcc_qupv3_wrap0_s5_clk_src = {
        .hid_width = 5,
        .parent_map = gcc_parent_map_0,
        .freq_tbl = ftbl_gcc_qupv3_wrap0_s0_clk_src,
-       .clkr.hw.init = &(struct clk_init_data){
-               .name = "gcc_qupv3_wrap0_s5_clk_src",
-               .parent_data = gcc_parents_0,
-               .num_parents = ARRAY_SIZE(gcc_parents_0),
-               .flags = CLK_SET_RATE_PARENT,
-               .ops = &clk_rcg2_ops,
-       },
+       .clkr.hw.init = &gcc_qupv3_wrap0_s5_clk_src_init,
+};
+
+static struct clk_init_data gcc_qupv3_wrap0_s6_clk_src_init = {
+       .name = "gcc_qupv3_wrap0_s6_clk_src",
+       .parent_data = gcc_parents_0,
+       .num_parents = ARRAY_SIZE(gcc_parents_0),
+       .flags = CLK_SET_RATE_PARENT,
+       .ops = &clk_rcg2_ops,
 };
 
 static struct clk_rcg2 gcc_qupv3_wrap0_s6_clk_src = {
@@ -549,13 +569,15 @@ static struct clk_rcg2 gcc_qupv3_wrap0_s6_clk_src = {
        .hid_width = 5,
        .parent_map = gcc_parent_map_0,
        .freq_tbl = ftbl_gcc_qupv3_wrap0_s0_clk_src,
-       .clkr.hw.init = &(struct clk_init_data){
-               .name = "gcc_qupv3_wrap0_s6_clk_src",
-               .parent_data = gcc_parents_0,
-               .num_parents = ARRAY_SIZE(gcc_parents_0),
-               .flags = CLK_SET_RATE_PARENT,
-               .ops = &clk_rcg2_ops,
-       },
+       .clkr.hw.init = &gcc_qupv3_wrap0_s6_clk_src_init,
+};
+
+static struct clk_init_data gcc_qupv3_wrap0_s7_clk_src_init = {
+       .name = "gcc_qupv3_wrap0_s7_clk_src",
+       .parent_data = gcc_parents_0,
+       .num_parents = ARRAY_SIZE(gcc_parents_0),
+       .flags = CLK_SET_RATE_PARENT,
+       .ops = &clk_rcg2_ops,
 };
 
 static struct clk_rcg2 gcc_qupv3_wrap0_s7_clk_src = {
@@ -564,13 +586,15 @@ static struct clk_rcg2 gcc_qupv3_wrap0_s7_clk_src = {
        .hid_width = 5,
        .parent_map = gcc_parent_map_0,
        .freq_tbl = ftbl_gcc_qupv3_wrap0_s0_clk_src,
-       .clkr.hw.init = &(struct clk_init_data){
-               .name = "gcc_qupv3_wrap0_s7_clk_src",
-               .parent_data = gcc_parents_0,
-               .num_parents = ARRAY_SIZE(gcc_parents_0),
-               .flags = CLK_SET_RATE_PARENT,
-               .ops = &clk_rcg2_ops,
-       },
+       .clkr.hw.init = &gcc_qupv3_wrap0_s7_clk_src_init,
+};
+
+static struct clk_init_data gcc_qupv3_wrap1_s0_clk_src_init = {
+       .name = "gcc_qupv3_wrap1_s0_clk_src",
+       .parent_data = gcc_parents_0,
+       .num_parents = ARRAY_SIZE(gcc_parents_0),
+       .flags = CLK_SET_RATE_PARENT,
+       .ops = &clk_rcg2_ops,
 };
 
 static struct clk_rcg2 gcc_qupv3_wrap1_s0_clk_src = {
@@ -579,13 +603,15 @@ static struct clk_rcg2 gcc_qupv3_wrap1_s0_clk_src = {
        .hid_width = 5,
        .parent_map = gcc_parent_map_0,
        .freq_tbl = ftbl_gcc_qupv3_wrap0_s0_clk_src,
-       .clkr.hw.init = &(struct clk_init_data){
-               .name = "gcc_qupv3_wrap1_s0_clk_src",
-               .parent_data = gcc_parents_0,
-               .num_parents = ARRAY_SIZE(gcc_parents_0),
-               .flags = CLK_SET_RATE_PARENT,
-               .ops = &clk_rcg2_ops,
-       },
+       .clkr.hw.init = &gcc_qupv3_wrap1_s0_clk_src_init,
+};
+
+static struct clk_init_data gcc_qupv3_wrap1_s1_clk_src_init = {
+       .name = "gcc_qupv3_wrap1_s1_clk_src",
+       .parent_data = gcc_parents_0,
+       .num_parents = ARRAY_SIZE(gcc_parents_0),
+       .flags = CLK_SET_RATE_PARENT,
+       .ops = &clk_rcg2_ops,
 };
 
 static struct clk_rcg2 gcc_qupv3_wrap1_s1_clk_src = {
@@ -594,13 +620,15 @@ static struct clk_rcg2 gcc_qupv3_wrap1_s1_clk_src = {
        .hid_width = 5,
        .parent_map = gcc_parent_map_0,
        .freq_tbl = ftbl_gcc_qupv3_wrap0_s0_clk_src,
-       .clkr.hw.init = &(struct clk_init_data){
-               .name = "gcc_qupv3_wrap1_s1_clk_src",
-               .parent_data = gcc_parents_0,
-               .num_parents = ARRAY_SIZE(gcc_parents_0),
-               .flags = CLK_SET_RATE_PARENT,
-               .ops = &clk_rcg2_ops,
-       },
+       .clkr.hw.init = &gcc_qupv3_wrap1_s1_clk_src_init,
+};
+
+static struct clk_init_data gcc_qupv3_wrap1_s2_clk_src_init = {
+       .name = "gcc_qupv3_wrap1_s2_clk_src",
+       .parent_data = gcc_parents_0,
+       .num_parents = ARRAY_SIZE(gcc_parents_0),
+       .flags = CLK_SET_RATE_PARENT,
+       .ops = &clk_rcg2_ops,
 };
 
 static struct clk_rcg2 gcc_qupv3_wrap1_s2_clk_src = {
@@ -609,13 +637,15 @@ static struct clk_rcg2 gcc_qupv3_wrap1_s2_clk_src = {
        .hid_width = 5,
        .parent_map = gcc_parent_map_0,
        .freq_tbl = ftbl_gcc_qupv3_wrap0_s0_clk_src,
-       .clkr.hw.init = &(struct clk_init_data){
-               .name = "gcc_qupv3_wrap1_s2_clk_src",
-               .parent_data = gcc_parents_0,
-               .num_parents = ARRAY_SIZE(gcc_parents_0),
-               .flags = CLK_SET_RATE_PARENT,
-               .ops = &clk_rcg2_ops,
-       },
+       .clkr.hw.init = &gcc_qupv3_wrap1_s2_clk_src_init,
+};
+
+static struct clk_init_data gcc_qupv3_wrap1_s3_clk_src_init = {
+       .name = "gcc_qupv3_wrap1_s3_clk_src",
+       .parent_data = gcc_parents_0,
+       .num_parents = ARRAY_SIZE(gcc_parents_0),
+       .flags = CLK_SET_RATE_PARENT,
+       .ops = &clk_rcg2_ops,
 };
 
 static struct clk_rcg2 gcc_qupv3_wrap1_s3_clk_src = {
@@ -624,13 +654,15 @@ static struct clk_rcg2 gcc_qupv3_wrap1_s3_clk_src = {
        .hid_width = 5,
        .parent_map = gcc_parent_map_0,
        .freq_tbl = ftbl_gcc_qupv3_wrap0_s0_clk_src,
-       .clkr.hw.init = &(struct clk_init_data){
-               .name = "gcc_qupv3_wrap1_s3_clk_src",
-               .parent_data = gcc_parents_0,
-               .num_parents = ARRAY_SIZE(gcc_parents_0),
-               .flags = CLK_SET_RATE_PARENT,
-               .ops = &clk_rcg2_ops,
-       },
+       .clkr.hw.init = &gcc_qupv3_wrap1_s3_clk_src_init,
+};
+
+static struct clk_init_data gcc_qupv3_wrap1_s4_clk_src_init = {
+       .name = "gcc_qupv3_wrap1_s4_clk_src",
+       .parent_data = gcc_parents_0,
+       .num_parents = ARRAY_SIZE(gcc_parents_0),
+       .flags = CLK_SET_RATE_PARENT,
+       .ops = &clk_rcg2_ops,
 };
 
 static struct clk_rcg2 gcc_qupv3_wrap1_s4_clk_src = {
@@ -639,13 +671,15 @@ static struct clk_rcg2 gcc_qupv3_wrap1_s4_clk_src = {
        .hid_width = 5,
        .parent_map = gcc_parent_map_0,
        .freq_tbl = ftbl_gcc_qupv3_wrap0_s0_clk_src,
-       .clkr.hw.init = &(struct clk_init_data){
-               .name = "gcc_qupv3_wrap1_s4_clk_src",
-               .parent_data = gcc_parents_0,
-               .num_parents = ARRAY_SIZE(gcc_parents_0),
-               .flags = CLK_SET_RATE_PARENT,
-               .ops = &clk_rcg2_ops,
-       },
+       .clkr.hw.init = &gcc_qupv3_wrap1_s4_clk_src_init,
+};
+
+static struct clk_init_data gcc_qupv3_wrap1_s5_clk_src_init = {
+       .name = "gcc_qupv3_wrap1_s5_clk_src",
+       .parent_data = gcc_parents_0,
+       .num_parents = ARRAY_SIZE(gcc_parents_0),
+       .flags = CLK_SET_RATE_PARENT,
+       .ops = &clk_rcg2_ops,
 };
 
 static struct clk_rcg2 gcc_qupv3_wrap1_s5_clk_src = {
@@ -654,13 +688,15 @@ static struct clk_rcg2 gcc_qupv3_wrap1_s5_clk_src = {
        .hid_width = 5,
        .parent_map = gcc_parent_map_0,
        .freq_tbl = ftbl_gcc_qupv3_wrap0_s0_clk_src,
-       .clkr.hw.init = &(struct clk_init_data){
-               .name = "gcc_qupv3_wrap1_s5_clk_src",
-               .parent_data = gcc_parents_0,
-               .num_parents = ARRAY_SIZE(gcc_parents_0),
-               .flags = CLK_SET_RATE_PARENT,
-               .ops = &clk_rcg2_ops,
-       },
+       .clkr.hw.init = &gcc_qupv3_wrap1_s5_clk_src_init,
+};
+
+static struct clk_init_data gcc_qupv3_wrap2_s0_clk_src_init = {
+       .name = "gcc_qupv3_wrap2_s0_clk_src",
+       .parent_data = gcc_parents_0,
+       .num_parents = ARRAY_SIZE(gcc_parents_0),
+       .flags = CLK_SET_RATE_PARENT,
+       .ops = &clk_rcg2_ops,
 };
 
 static struct clk_rcg2 gcc_qupv3_wrap2_s0_clk_src = {
@@ -669,13 +705,15 @@ static struct clk_rcg2 gcc_qupv3_wrap2_s0_clk_src = {
        .hid_width = 5,
        .parent_map = gcc_parent_map_0,
        .freq_tbl = ftbl_gcc_qupv3_wrap0_s0_clk_src,
-       .clkr.hw.init = &(struct clk_init_data){
-               .name = "gcc_qupv3_wrap2_s0_clk_src",
-               .parent_data = gcc_parents_0,
-               .num_parents = ARRAY_SIZE(gcc_parents_0),
-               .flags = CLK_SET_RATE_PARENT,
-               .ops = &clk_rcg2_ops,
-       },
+       .clkr.hw.init = &gcc_qupv3_wrap2_s0_clk_src_init,
+};
+
+static struct clk_init_data gcc_qupv3_wrap2_s1_clk_src_init = {
+       .name = "gcc_qupv3_wrap2_s1_clk_src",
+       .parent_data = gcc_parents_0,
+       .num_parents = ARRAY_SIZE(gcc_parents_0),
+       .flags = CLK_SET_RATE_PARENT,
+       .ops = &clk_rcg2_ops,
 };
 
 static struct clk_rcg2 gcc_qupv3_wrap2_s1_clk_src = {
@@ -684,13 +722,15 @@ static struct clk_rcg2 gcc_qupv3_wrap2_s1_clk_src = {
        .hid_width = 5,
        .parent_map = gcc_parent_map_0,
        .freq_tbl = ftbl_gcc_qupv3_wrap0_s0_clk_src,
-       .clkr.hw.init = &(struct clk_init_data){
-               .name = "gcc_qupv3_wrap2_s1_clk_src",
-               .parent_data = gcc_parents_0,
-               .num_parents = ARRAY_SIZE(gcc_parents_0),
-               .flags = CLK_SET_RATE_PARENT,
-               .ops = &clk_rcg2_ops,
-       },
+       .clkr.hw.init = &gcc_qupv3_wrap2_s1_clk_src_init,
+};
+
+static struct clk_init_data gcc_qupv3_wrap2_s2_clk_src_init = {
+       .name = "gcc_qupv3_wrap2_s2_clk_src",
+       .parent_data = gcc_parents_0,
+       .num_parents = ARRAY_SIZE(gcc_parents_0),
+       .flags = CLK_SET_RATE_PARENT,
+       .ops = &clk_rcg2_ops,
 };
 
 static struct clk_rcg2 gcc_qupv3_wrap2_s2_clk_src = {
@@ -699,13 +739,15 @@ static struct clk_rcg2 gcc_qupv3_wrap2_s2_clk_src = {
        .hid_width = 5,
        .parent_map = gcc_parent_map_0,
        .freq_tbl = ftbl_gcc_qupv3_wrap0_s0_clk_src,
-       .clkr.hw.init = &(struct clk_init_data){
-               .name = "gcc_qupv3_wrap2_s2_clk_src",
-               .parent_data = gcc_parents_0,
-               .num_parents = ARRAY_SIZE(gcc_parents_0),
-               .flags = CLK_SET_RATE_PARENT,
-               .ops = &clk_rcg2_ops,
-       },
+       .clkr.hw.init = &gcc_qupv3_wrap2_s2_clk_src_init,
+};
+
+static struct clk_init_data gcc_qupv3_wrap2_s3_clk_src_init = {
+       .name = "gcc_qupv3_wrap2_s3_clk_src",
+       .parent_data = gcc_parents_0,
+       .num_parents = ARRAY_SIZE(gcc_parents_0),
+       .flags = CLK_SET_RATE_PARENT,
+       .ops = &clk_rcg2_ops,
 };
 
 static struct clk_rcg2 gcc_qupv3_wrap2_s3_clk_src = {
@@ -714,13 +756,15 @@ static struct clk_rcg2 gcc_qupv3_wrap2_s3_clk_src = {
        .hid_width = 5,
        .parent_map = gcc_parent_map_0,
        .freq_tbl = ftbl_gcc_qupv3_wrap0_s0_clk_src,
-       .clkr.hw.init = &(struct clk_init_data){
-               .name = "gcc_qupv3_wrap2_s3_clk_src",
-               .parent_data = gcc_parents_0,
-               .num_parents = ARRAY_SIZE(gcc_parents_0),
-               .flags = CLK_SET_RATE_PARENT,
-               .ops = &clk_rcg2_ops,
-       },
+       .clkr.hw.init = &gcc_qupv3_wrap2_s3_clk_src_init,
+};
+
+static struct clk_init_data gcc_qupv3_wrap2_s4_clk_src_init = {
+       .name = "gcc_qupv3_wrap2_s4_clk_src",
+       .parent_data = gcc_parents_0,
+       .num_parents = ARRAY_SIZE(gcc_parents_0),
+       .flags = CLK_SET_RATE_PARENT,
+       .ops = &clk_rcg2_ops,
 };
 
 static struct clk_rcg2 gcc_qupv3_wrap2_s4_clk_src = {
@@ -729,13 +773,15 @@ static struct clk_rcg2 gcc_qupv3_wrap2_s4_clk_src = {
        .hid_width = 5,
        .parent_map = gcc_parent_map_0,
        .freq_tbl = ftbl_gcc_qupv3_wrap0_s0_clk_src,
-       .clkr.hw.init = &(struct clk_init_data){
-               .name = "gcc_qupv3_wrap2_s4_clk_src",
-               .parent_data = gcc_parents_0,
-               .num_parents = ARRAY_SIZE(gcc_parents_0),
-               .flags = CLK_SET_RATE_PARENT,
-               .ops = &clk_rcg2_ops,
-       },
+       .clkr.hw.init = &gcc_qupv3_wrap2_s4_clk_src_init,
+};
+
+static struct clk_init_data gcc_qupv3_wrap2_s5_clk_src_init = {
+       .name = "gcc_qupv3_wrap2_s5_clk_src",
+       .parent_data = gcc_parents_0,
+       .num_parents = ARRAY_SIZE(gcc_parents_0),
+       .flags = CLK_SET_RATE_PARENT,
+       .ops = &clk_rcg2_ops,
 };
 
 static struct clk_rcg2 gcc_qupv3_wrap2_s5_clk_src = {
@@ -744,13 +790,7 @@ static struct clk_rcg2 gcc_qupv3_wrap2_s5_clk_src = {
        .hid_width = 5,
        .parent_map = gcc_parent_map_0,
        .freq_tbl = ftbl_gcc_qupv3_wrap0_s0_clk_src,
-       .clkr.hw.init = &(struct clk_init_data){
-               .name = "gcc_qupv3_wrap2_s5_clk_src",
-               .parent_data = gcc_parents_0,
-               .num_parents = ARRAY_SIZE(gcc_parents_0),
-               .flags = CLK_SET_RATE_PARENT,
-               .ops = &clk_rcg2_ops,
-       },
+       .clkr.hw.init = &gcc_qupv3_wrap2_s5_clk_src_init,
 };
 
 static const struct freq_tbl ftbl_gcc_sdcc2_apps_clk_src[] = {
@@ -3738,6 +3778,9 @@ static const struct qcom_reset_map gcc_sm8150_resets[] = {
        [GCC_USB30_PRIM_BCR] = { 0xf000 },
        [GCC_USB30_SEC_BCR] = { 0x10000 },
        [GCC_USB_PHY_CFG_AHB2PHY_BCR] = { 0x6a000 },
+       [GCC_VIDEO_AXIC_CLK_BCR] = { 0xb02c, 2 },
+       [GCC_VIDEO_AXI0_CLK_BCR] = { 0xb024, 2 },
+       [GCC_VIDEO_AXI1_CLK_BCR] = { 0xb028, 2 },
 };
 
 static struct gdsc *gcc_sm8150_gdscs[] = {
@@ -3750,6 +3793,29 @@ static struct gdsc *gcc_sm8150_gdscs[] = {
        [USB30_SEC_GDSC] = &usb30_sec_gdsc,
 };
 
+static const struct clk_rcg_dfs_data gcc_dfs_clocks[] = {
+       DEFINE_RCG_DFS(gcc_qupv3_wrap0_s0_clk_src),
+       DEFINE_RCG_DFS(gcc_qupv3_wrap0_s1_clk_src),
+       DEFINE_RCG_DFS(gcc_qupv3_wrap0_s2_clk_src),
+       DEFINE_RCG_DFS(gcc_qupv3_wrap0_s3_clk_src),
+       DEFINE_RCG_DFS(gcc_qupv3_wrap0_s4_clk_src),
+       DEFINE_RCG_DFS(gcc_qupv3_wrap0_s5_clk_src),
+       DEFINE_RCG_DFS(gcc_qupv3_wrap0_s6_clk_src),
+       DEFINE_RCG_DFS(gcc_qupv3_wrap0_s7_clk_src),
+       DEFINE_RCG_DFS(gcc_qupv3_wrap1_s0_clk_src),
+       DEFINE_RCG_DFS(gcc_qupv3_wrap1_s1_clk_src),
+       DEFINE_RCG_DFS(gcc_qupv3_wrap1_s2_clk_src),
+       DEFINE_RCG_DFS(gcc_qupv3_wrap1_s3_clk_src),
+       DEFINE_RCG_DFS(gcc_qupv3_wrap1_s4_clk_src),
+       DEFINE_RCG_DFS(gcc_qupv3_wrap1_s5_clk_src),
+       DEFINE_RCG_DFS(gcc_qupv3_wrap2_s0_clk_src),
+       DEFINE_RCG_DFS(gcc_qupv3_wrap2_s1_clk_src),
+       DEFINE_RCG_DFS(gcc_qupv3_wrap2_s2_clk_src),
+       DEFINE_RCG_DFS(gcc_qupv3_wrap2_s3_clk_src),
+       DEFINE_RCG_DFS(gcc_qupv3_wrap2_s4_clk_src),
+       DEFINE_RCG_DFS(gcc_qupv3_wrap2_s5_clk_src),
+};
+
 static const struct regmap_config gcc_sm8150_regmap_config = {
        .reg_bits       = 32,
        .reg_stride     = 4,
@@ -3777,6 +3843,7 @@ MODULE_DEVICE_TABLE(of, gcc_sm8150_match_table);
 static int gcc_sm8150_probe(struct platform_device *pdev)
 {
        struct regmap *regmap;
+       int ret;
 
        regmap = qcom_cc_map(pdev, &gcc_sm8150_desc);
        if (IS_ERR(regmap))
@@ -3786,6 +3853,11 @@ static int gcc_sm8150_probe(struct platform_device *pdev)
        regmap_update_bits(regmap, 0x4d110, 0x3, 0x3);
        regmap_update_bits(regmap, 0x71028, 0x3, 0x3);
 
+       ret = qcom_cc_register_rcg_dfs(regmap, gcc_dfs_clocks,
+                                      ARRAY_SIZE(gcc_dfs_clocks));
+       if (ret)
+               dev_err_probe(&pdev->dev, ret, "Failed to register with DFS!\n");
+
        return qcom_cc_really_probe(pdev, &gcc_sm8150_desc, regmap);
 }
 
index c6c5261264f118f691dbb65f08b9b7a169bfdc9b..e630bfa2d0c17957f4188c0478a8dbe3d5bea27a 100644 (file)
@@ -3576,8 +3576,8 @@ static const struct qcom_reset_map gcc_sm8250_resets[] = {
        [GCC_USB3PHY_PHY_PRIM_BCR] = { 0x50004 },
        [GCC_USB3PHY_PHY_SEC_BCR] = { 0x50010 },
        [GCC_USB_PHY_CFG_AHB2PHY_BCR] = { 0x6a000 },
-       [GCC_VIDEO_AXI0_CLK_ARES] = { 0xb024, 2 },
-       [GCC_VIDEO_AXI1_CLK_ARES] = { 0xb028, 2 },
+       [GCC_VIDEO_AXI0_CLK_ARES] = { 0xb024, .bit = 2, .udelay = 150 },
+       [GCC_VIDEO_AXI1_CLK_ARES] = { 0xb028, .bit = 2, .udelay = 150 },
 };
 
 static const struct clk_rcg_dfs_data gcc_dfs_clocks[] = {
@@ -3643,18 +3643,13 @@ static int gcc_sm8250_probe(struct platform_device *pdev)
        regmap_update_bits(regmap, 0x4d110, 0x3, 0x3);
        regmap_update_bits(regmap, 0x71028, 0x3, 0x3);
 
-       /*
-        * Keep the clocks always-ON
-        * GCC_VIDEO_AHB_CLK, GCC_CAMERA_AHB_CLK, GCC_DISP_AHB_CLK,
-        * GCC_CPUSS_DVM_BUS_CLK, GCC_GPU_CFG_AHB_CLK,
-        * GCC_SYS_NOC_CPUSS_AHB_CLK
-        */
-       regmap_update_bits(regmap, 0x0b004, BIT(0), BIT(0));
-       regmap_update_bits(regmap, 0x0b008, BIT(0), BIT(0));
-       regmap_update_bits(regmap, 0x0b00c, BIT(0), BIT(0));
-       regmap_update_bits(regmap, 0x4818c, BIT(0), BIT(0));
-       regmap_update_bits(regmap, 0x71004, BIT(0), BIT(0));
-       regmap_update_bits(regmap, 0x52000, BIT(0), BIT(0));
+       /* Keep some clocks always-on */
+       qcom_branch_set_clk_en(regmap, 0x0b004); /* GCC_VIDEO_AHB_CLK */
+       qcom_branch_set_clk_en(regmap, 0x0b008); /* GCC_CAMERA_AHB_CLK */
+       qcom_branch_set_clk_en(regmap, 0x0b00c); /* GCC_DISP_AHB_CLK */
+       qcom_branch_set_clk_en(regmap, 0x4818c); /* GCC_CPUSS_DVM_BUS_CLK */
+       qcom_branch_set_clk_en(regmap, 0x71004); /* GCC_GPU_CFG_AHB_CLK */
+       qcom_branch_set_clk_en(regmap, 0x52000); /* GCC_SYS_NOC_CPUSS_AHB_CLK */
 
        ret = qcom_cc_register_rcg_dfs(regmap, gcc_dfs_clocks,
                                       ARRAY_SIZE(gcc_dfs_clocks));
index 1385a98eb3bbebf08743a3eab92f2a762d35814b..fc0402e8a2a75d6b0b7f9449341fb888568389e2 100644 (file)
@@ -3743,8 +3743,8 @@ static const struct qcom_reset_map gcc_sm8350_resets[] = {
        [GCC_USB3PHY_PHY_PRIM_BCR] = { 0x50004 },
        [GCC_USB3PHY_PHY_SEC_BCR] = { 0x50010 },
        [GCC_USB_PHY_CFG_AHB2PHY_BCR] = { 0x6a000 },
-       [GCC_VIDEO_AXI0_CLK_ARES] = { 0x28010, 2 },
-       [GCC_VIDEO_AXI1_CLK_ARES] = { 0x28018, 2 },
+       [GCC_VIDEO_AXI0_CLK_ARES] = { .reg = 0x28010, .bit = 2, .udelay = 400 },
+       [GCC_VIDEO_AXI1_CLK_ARES] = { .reg = 0x28018, .bit = 2, .udelay = 400 },
        [GCC_VIDEO_BCR] = { 0x28000 },
 };
 
@@ -3806,18 +3806,14 @@ static int gcc_sm8350_probe(struct platform_device *pdev)
                return PTR_ERR(regmap);
        }
 
-       /*
-        * Keep the critical clock always-On
-        * GCC_CAMERA_AHB_CLK, GCC_CAMERA_XO_CLK, GCC_DISP_AHB_CLK, GCC_DISP_XO_CLK,
-        * GCC_GPU_CFG_AHB_CLK, GCC_VIDEO_AHB_CLK, GCC_VIDEO_XO_CLK
-        */
-       regmap_update_bits(regmap, 0x26004, BIT(0), BIT(0));
-       regmap_update_bits(regmap, 0x26018, BIT(0), BIT(0));
-       regmap_update_bits(regmap, 0x27004, BIT(0), BIT(0));
-       regmap_update_bits(regmap, 0x2701c, BIT(0), BIT(0));
-       regmap_update_bits(regmap, 0x71004, BIT(0), BIT(0));
-       regmap_update_bits(regmap, 0x28004, BIT(0), BIT(0));
-       regmap_update_bits(regmap, 0x28020, BIT(0), BIT(0));
+       /* Keep some clocks always-on */
+       qcom_branch_set_clk_en(regmap, 0x26004); /* GCC_CAMERA_AHB_CLK */
+       qcom_branch_set_clk_en(regmap, 0x26018); /* GCC_CAMERA_XO_CLK */
+       qcom_branch_set_clk_en(regmap, 0x27004); /* GCC_DISP_AHB_CLK */
+       qcom_branch_set_clk_en(regmap, 0x2701c); /* GCC_DISP_XO_CLK */
+       qcom_branch_set_clk_en(regmap, 0x71004); /* GCC_GPU_CFG_AHB_CLK */
+       qcom_branch_set_clk_en(regmap, 0x28004); /* GCC_VIDEO_AHB_CLK */
+       qcom_branch_set_clk_en(regmap, 0x28020); /* GCC_VIDEO_XO_CLK */
 
        ret = qcom_cc_register_rcg_dfs(regmap, gcc_dfs_clocks, ARRAY_SIZE(gcc_dfs_clocks));
        if (ret)
index 563542982551601cd2bb1acda4b0e8e38eec6f85..e86c58bc5e48bc89aaa06cb07d6412b5d31d0f88 100644 (file)
@@ -3202,8 +3202,8 @@ static const struct qcom_reset_map gcc_sm8450_resets[] = {
        [GCC_USB3PHY_PHY_PRIM_BCR] = { 0x60004 },
        [GCC_USB3PHY_PHY_SEC_BCR] = { 0x60010 },
        [GCC_USB_PHY_CFG_AHB2PHY_BCR] = { 0x7a000 },
-       [GCC_VIDEO_AXI0_CLK_ARES] = { 0x42018, 2 },
-       [GCC_VIDEO_AXI1_CLK_ARES] = { 0x42020, 2 },
+       [GCC_VIDEO_AXI0_CLK_ARES] = { .reg = 0x42018, .bit = 2, .udelay = 1000 },
+       [GCC_VIDEO_AXI1_CLK_ARES] = { .reg = 0x42020, .bit = 2, .udelay = 1000 },
        [GCC_VIDEO_BCR] = { 0x42000 },
 };
 
@@ -3280,19 +3280,14 @@ static int gcc_sm8450_probe(struct platform_device *pdev)
        /* FORCE_MEM_CORE_ON for ufs phy ice core clocks */
        regmap_update_bits(regmap, gcc_ufs_phy_ice_core_clk.halt_reg, BIT(14), BIT(14));
 
-       /*
-        * Keep the critical clock always-On
-        * gcc_camera_ahb_clk, gcc_camera_xo_clk, gcc_disp_ahb_clk,
-        * gcc_disp_xo_clk, gcc_gpu_cfg_ahb_clk, gcc_video_ahb_clk,
-        * gcc_video_xo_clk
-        */
-       regmap_update_bits(regmap, 0x36004, BIT(0), BIT(0));
-       regmap_update_bits(regmap, 0x36020, BIT(0), BIT(0));
-       regmap_update_bits(regmap, 0x37004, BIT(0), BIT(0));
-       regmap_update_bits(regmap, 0x3701c, BIT(0), BIT(0));
-       regmap_update_bits(regmap, 0x81004, BIT(0), BIT(0));
-       regmap_update_bits(regmap, 0x42004, BIT(0), BIT(0));
-       regmap_update_bits(regmap, 0x42028, BIT(0), BIT(0));
+       /* Keep some clocks always-on */
+       qcom_branch_set_clk_en(regmap, 0x36004); /* GCC_CAMERA_AHB_CLK */
+       qcom_branch_set_clk_en(regmap, 0x36020); /* GCC_CAMERA_XO_CLK */
+       qcom_branch_set_clk_en(regmap, 0x37004); /* GCC_DISP_AHB_CLK */
+       qcom_branch_set_clk_en(regmap, 0x3701c); /* GCC_DISP_XO_CLK */
+       qcom_branch_set_clk_en(regmap, 0x81004); /* GCC_GPU_CFG_AHB_CLK */
+       qcom_branch_set_clk_en(regmap, 0x42004); /* GCC_VIDEO_AHB_CLK */
+       qcom_branch_set_clk_en(regmap, 0x42028); /* GCC_VIDEO_XO_CLK */
 
        return qcom_cc_really_probe(pdev, &gcc_sm8450_desc, regmap);
 }
index b883dffe5f7aaa86ad903839ba2e42a3c4c5be70..26d7349e7642483366f6cb8e5345cac0cb90a176 100644 (file)
@@ -3276,8 +3276,8 @@ static const struct qcom_reset_map gcc_sm8550_resets[] = {
        [GCC_USB3PHY_PHY_PRIM_BCR] = { 0x50004 },
        [GCC_USB3PHY_PHY_SEC_BCR] = { 0x50010 },
        [GCC_USB_PHY_CFG_AHB2PHY_BCR] = { 0x6a000 },
-       [GCC_VIDEO_AXI0_CLK_ARES] = { 0x32018, 2 },
-       [GCC_VIDEO_AXI1_CLK_ARES] = { 0x32024, 2 },
+       [GCC_VIDEO_AXI0_CLK_ARES] = { .reg = 0x32018, .bit = 2, .udelay = 1000 },
+       [GCC_VIDEO_AXI1_CLK_ARES] = { .reg = 0x32024, .bit = 2, .udelay = 1000 },
        [GCC_VIDEO_BCR] = { 0x32000 },
 };
 
@@ -3352,19 +3352,14 @@ static int gcc_sm8550_probe(struct platform_device *pdev)
        /* FORCE_MEM_CORE_ON for ufs phy ice core clocks */
        regmap_update_bits(regmap, gcc_ufs_phy_ice_core_clk.halt_reg, BIT(14), BIT(14));
 
-       /*
-        * Keep the critical clock always-On
-        * gcc_camera_ahb_clk, gcc_camera_xo_clk, gcc_disp_ahb_clk,
-        * gcc_disp_xo_clk, gcc_gpu_cfg_ahb_clk, gcc_video_ahb_clk,
-        * gcc_video_xo_clk
-        */
-       regmap_update_bits(regmap, 0x26004, BIT(0), BIT(0));
-       regmap_update_bits(regmap, 0x26028, BIT(0), BIT(0));
-       regmap_update_bits(regmap, 0x27004, BIT(0), BIT(0));
-       regmap_update_bits(regmap, 0x27018, BIT(0), BIT(0));
-       regmap_update_bits(regmap, 0x71004, BIT(0), BIT(0));
-       regmap_update_bits(regmap, 0x32004, BIT(0), BIT(0));
-       regmap_update_bits(regmap, 0x32030, BIT(0), BIT(0));
+       /* Keep some clocks always-on */
+       qcom_branch_set_clk_en(regmap, 0x26004); /* GCC_CAMERA_AHB_CLK */
+       qcom_branch_set_clk_en(regmap, 0x26028); /* GCC_CAMERA_XO_CLK */
+       qcom_branch_set_clk_en(regmap, 0x27004); /* GCC_DISP_AHB_CLK */
+       qcom_branch_set_clk_en(regmap, 0x27018); /* GCC_DISP_XO_CLK */
+       qcom_branch_set_clk_en(regmap, 0x71004); /* GCC_GPU_CFG_AHB_CLK */
+       qcom_branch_set_clk_en(regmap, 0x32004); /* GCC_VIDEO_AHB_CLK */
+       qcom_branch_set_clk_en(regmap, 0x32030); /* GCC_VIDEO_XO_CLK */
 
        /* Clear GDSC_SLEEP_ENA_VOTE to stop votes being auto-removed in sleep. */
        regmap_write(regmap, 0x52024, 0x0);
index 9174dd82308c2b3efb10eec188744069228c6629..9d1cbdf860fb3b6d0a5ee308ebbb985029e1c779 100644 (file)
@@ -3734,8 +3734,8 @@ static const struct qcom_reset_map gcc_sm8650_resets[] = {
        [GCC_USB3_PHY_SEC_BCR] = { 0x5000c },
        [GCC_USB3PHY_PHY_PRIM_BCR] = { 0x50004 },
        [GCC_USB3PHY_PHY_SEC_BCR] = { 0x50010 },
-       [GCC_VIDEO_AXI0_CLK_ARES] = { 0x32018, 2 },
-       [GCC_VIDEO_AXI1_CLK_ARES] = { 0x32024, 2 },
+       [GCC_VIDEO_AXI0_CLK_ARES] = { .reg = 0x32018, .bit = 2, .udelay = 1000 },
+       [GCC_VIDEO_AXI1_CLK_ARES] = { .reg = 0x32024, .bit = 2, .udelay = 1000 },
        [GCC_VIDEO_BCR] = { 0x32000 },
 };
 
@@ -3808,14 +3808,14 @@ static int gcc_sm8650_probe(struct platform_device *pdev)
        if (ret)
                return ret;
 
-       /* Keep the critical clock always-On */
-       regmap_update_bits(regmap, 0x26004, BIT(0), BIT(0)); /* gcc_camera_ahb_clk */
-       regmap_update_bits(regmap, 0x26028, BIT(0), BIT(0)); /* gcc_camera_xo_clk */
-       regmap_update_bits(regmap, 0x27004, BIT(0), BIT(0)); /* gcc_disp_ahb_clk */
-       regmap_update_bits(regmap, 0x27018, BIT(0), BIT(0)); /* gcc_disp_xo_clk */
-       regmap_update_bits(regmap, 0x71004, BIT(0), BIT(0)); /* gcc_gpu_cfg_ahb_clk */
-       regmap_update_bits(regmap, 0x32004, BIT(0), BIT(0)); /* gcc_video_ahb_clk */
-       regmap_update_bits(regmap, 0x32030, BIT(0), BIT(0)); /* gcc_video_xo_clk */
+       /* Keep some clocks always-on */
+       qcom_branch_set_clk_en(regmap, 0x26004); /* GCC_CAMERA_AHB_CLK */
+       qcom_branch_set_clk_en(regmap, 0x26028); /* GCC_CAMERA_XO_CLK */
+       qcom_branch_set_clk_en(regmap, 0x27004); /* GCC_DISP_AHB_CLK */
+       qcom_branch_set_clk_en(regmap, 0x27018); /* GCC_DISP_XO_CLK */
+       qcom_branch_set_clk_en(regmap, 0x71004); /* GCC_GPU_CFG_AHB_CLK */
+       qcom_branch_set_clk_en(regmap, 0x32004); /* GCC_VIDEO_AHB_CLK */
+       qcom_branch_set_clk_en(regmap, 0x32030); /* GCC_VIDEO_XO_CLK */
 
        qcom_branch_set_force_mem_core(regmap, gcc_ufs_phy_ice_core_clk, true);
 
index d7182d6e978372ce467da5d0c7b4fd0613eb0d8b..1404017be9180aa78c534f912396018059eb8f53 100644 (file)
@@ -6769,14 +6769,14 @@ static int gcc_x1e80100_probe(struct platform_device *pdev)
        if (ret)
                return ret;
 
-       /* Keep the critical clock always-On */
-       regmap_update_bits(regmap, 0x26004, BIT(0), BIT(0)); /* gcc_camera_ahb_clk */
-       regmap_update_bits(regmap, 0x26028, BIT(0), BIT(0)); /* gcc_camera_xo_clk */
-       regmap_update_bits(regmap, 0x27004, BIT(0), BIT(0)); /* gcc_disp_ahb_clk */
-       regmap_update_bits(regmap, 0x27018, BIT(0), BIT(0)); /* gcc_disp_xo_clk */
-       regmap_update_bits(regmap, 0x32004, BIT(0), BIT(0)); /* gcc_video_ahb_clk */
-       regmap_update_bits(regmap, 0x32030, BIT(0), BIT(0)); /* gcc_video_xo_clk */
-       regmap_update_bits(regmap, 0x71004, BIT(0), BIT(0)); /* gcc_gpu_cfg_ahb_clk */
+       /* Keep some clocks always-on */
+       qcom_branch_set_clk_en(regmap, 0x26004); /* GCC_CAMERA_AHB_CLK */
+       qcom_branch_set_clk_en(regmap, 0x26028); /* GCC_CAMERA_XO_CLK */
+       qcom_branch_set_clk_en(regmap, 0x27004); /* GCC_DISP_AHB_CLK */
+       qcom_branch_set_clk_en(regmap, 0x27018); /* GCC_DISP_XO_CLK */
+       qcom_branch_set_clk_en(regmap, 0x32004); /* GCC_VIDEO_AHB_CLK */
+       qcom_branch_set_clk_en(regmap, 0x32030); /* GCC_VIDEO_XO_CLK */
+       qcom_branch_set_clk_en(regmap, 0x71004); /* GCC_GPU_CFG_AHB_CLK */
 
        /* Clear GDSC_SLEEP_ENA_VOTE to stop votes being auto-removed in sleep. */
        regmap_write(regmap, 0x52224, 0x0);
index 5358e28122abe48d2b07d05a3794c96069750766..e7a4068b9f3906f99d9d5ca444fe903b237811b6 100644 (file)
@@ -557,7 +557,15 @@ void gdsc_unregister(struct gdsc_desc *desc)
  */
 int gdsc_gx_do_nothing_enable(struct generic_pm_domain *domain)
 {
-       /* Do nothing but give genpd the impression that we were successful */
-       return 0;
+       struct gdsc *sc = domain_to_gdsc(domain);
+       int ret = 0;
+
+       /* Enable the parent supply, when controlled through the regulator framework. */
+       if (sc->rsupply)
+               ret = regulator_enable(sc->rsupply);
+
+       /* Do nothing with the GDSC itself */
+
+       return ret;
 }
 EXPORT_SYMBOL_GPL(gdsc_gx_do_nothing_enable);
index 26ecfa63be1939b5d877310af47c1135c3c1613f..1167c42da39dba157a4a5cc469cc3bd02358b7dd 100644 (file)
@@ -609,17 +609,7 @@ static struct platform_driver gpu_cc_sa8775p_driver = {
        },
 };
 
-static int __init gpu_cc_sa8775p_init(void)
-{
-       return platform_driver_register(&gpu_cc_sa8775p_driver);
-}
-subsys_initcall(gpu_cc_sa8775p_init);
-
-static void __exit gpu_cc_sa8775p_exit(void)
-{
-       platform_driver_unregister(&gpu_cc_sa8775p_driver);
-}
-module_exit(gpu_cc_sa8775p_exit);
+module_platform_driver(gpu_cc_sa8775p_driver);
 
 MODULE_DESCRIPTION("SA8775P GPUCC driver");
 MODULE_LICENSE("GPL");
index 3f92f0b43be608b904f8f9ee21158649c5c1bad8..66f5b48cbf87926320d57e4c6d8b1b30fd9a4b99 100644 (file)
@@ -252,17 +252,7 @@ static struct platform_driver gpu_cc_sc7180_driver = {
        },
 };
 
-static int __init gpu_cc_sc7180_init(void)
-{
-       return platform_driver_register(&gpu_cc_sc7180_driver);
-}
-subsys_initcall(gpu_cc_sc7180_init);
-
-static void __exit gpu_cc_sc7180_exit(void)
-{
-       platform_driver_unregister(&gpu_cc_sc7180_driver);
-}
-module_exit(gpu_cc_sc7180_exit);
+module_platform_driver(gpu_cc_sc7180_driver);
 
 MODULE_DESCRIPTION("QTI GPU_CC SC7180 Driver");
 MODULE_LICENSE("GPL v2");
index 1490cd45a654acf87383b42e06fb38724ab6cac1..35b394feb68da03b88a390ec40312a62c09e95cc 100644 (file)
@@ -457,12 +457,9 @@ static int gpu_cc_sc7280_probe(struct platform_device *pdev)
 
        clk_lucid_pll_configure(&gpu_cc_pll1, regmap, &gpu_cc_pll1_config);
 
-       /*
-        * Keep the clocks always-ON
-        * GPU_CC_CB_CLK, GPUCC_CX_GMU_CLK
-        */
-       regmap_update_bits(regmap, 0x1170, BIT(0), BIT(0));
-       regmap_update_bits(regmap, 0x1098, BIT(0), BIT(0));
+       /* Keep some clocks always-on */
+       qcom_branch_set_clk_en(regmap, 0x1170); /* GPU_CC_CB_CLK */
+       qcom_branch_set_clk_en(regmap, 0x1098); /* GPUCC_CX_GMU_CLK */
        regmap_update_bits(regmap, 0x1098, BIT(13), BIT(13));
 
        return qcom_cc_really_probe(pdev, &gpu_cc_sc7280_desc, regmap);
@@ -476,17 +473,7 @@ static struct platform_driver gpu_cc_sc7280_driver = {
        },
 };
 
-static int __init gpu_cc_sc7280_init(void)
-{
-       return platform_driver_register(&gpu_cc_sc7280_driver);
-}
-subsys_initcall(gpu_cc_sc7280_init);
-
-static void __exit gpu_cc_sc7280_exit(void)
-{
-       platform_driver_unregister(&gpu_cc_sc7280_driver);
-}
-module_exit(gpu_cc_sc7280_exit);
+module_platform_driver(gpu_cc_sc7280_driver);
 
 MODULE_DESCRIPTION("QTI GPU_CC SC7280 Driver");
 MODULE_LICENSE("GPL v2");
index 8e147ee294eefcef7e40a53079c1c26b9edff431..3611d2d1823db00f6a60debe8883ea128f51320f 100644 (file)
@@ -399,6 +399,7 @@ static struct gdsc gx_gdsc = {
        },
        .pwrsts = PWRSTS_OFF_ON,
        .flags = CLAMP_IO | RETAIN_FF_ENABLE,
+       .supply = "vdd-gfx",
 };
 
 static struct gdsc *gpu_cc_sc8280xp_gdscs[] = {
@@ -444,12 +445,9 @@ static int gpu_cc_sc8280xp_probe(struct platform_device *pdev)
        clk_lucid_pll_configure(&gpu_cc_pll0, regmap, &gpu_cc_pll0_config);
        clk_lucid_pll_configure(&gpu_cc_pll1, regmap, &gpu_cc_pll1_config);
 
-       /*
-        * Keep the clocks always-ON
-        * GPU_CC_CB_CLK, GPU_CC_CXO_CLK
-        */
-       regmap_update_bits(regmap, 0x1170, BIT(0), BIT(0));
-       regmap_update_bits(regmap, 0x109c, BIT(0), BIT(0));
+       /* Keep some clocks always-on */
+       qcom_branch_set_clk_en(regmap, 0x1170); /* GPU_CC_CB_CLK */
+       qcom_branch_set_clk_en(regmap, 0x109c); /* GPU_CC_CXO_CLK */
 
        ret = qcom_cc_really_probe(pdev, &gpu_cc_sc8280xp_desc, regmap);
        pm_runtime_put(&pdev->dev);
index 970d7414bdf0e18a2d7ebfdb993627cac2ae5637..c87c3215dfe3579a3b91861d0d8b0260ebeb019f 100644 (file)
@@ -203,17 +203,7 @@ static struct platform_driver gpu_cc_sdm845_driver = {
        },
 };
 
-static int __init gpu_cc_sdm845_init(void)
-{
-       return platform_driver_register(&gpu_cc_sdm845_driver);
-}
-subsys_initcall(gpu_cc_sdm845_init);
-
-static void __exit gpu_cc_sdm845_exit(void)
-{
-       platform_driver_unregister(&gpu_cc_sdm845_driver);
-}
-module_exit(gpu_cc_sdm845_exit);
+module_platform_driver(gpu_cc_sdm845_driver);
 
 MODULE_DESCRIPTION("QTI GPUCC SDM845 Driver");
 MODULE_LICENSE("GPL v2");
index c89a5b59ddb7c244272112cc2851c0dbcda6f30a..135601629cba7139e011191a0b94ed5d0d043e87 100644 (file)
@@ -315,17 +315,7 @@ static struct platform_driver gpu_cc_sm8150_driver = {
        },
 };
 
-static int __init gpu_cc_sm8150_init(void)
-{
-       return platform_driver_register(&gpu_cc_sm8150_driver);
-}
-subsys_initcall(gpu_cc_sm8150_init);
-
-static void __exit gpu_cc_sm8150_exit(void)
-{
-       platform_driver_unregister(&gpu_cc_sm8150_driver);
-}
-module_exit(gpu_cc_sm8150_exit);
+module_platform_driver(gpu_cc_sm8150_driver);
 
 MODULE_DESCRIPTION("QTI GPUCC SM8150 Driver");
 MODULE_LICENSE("GPL v2");
index 9c1f8ce32da49be336c8111b400db3bd0b1e5b07..84f7f65c8d4289b83aaf8e59074c54d2ffce9d1c 100644 (file)
@@ -331,17 +331,7 @@ static struct platform_driver gpu_cc_sm8250_driver = {
        },
 };
 
-static int __init gpu_cc_sm8250_init(void)
-{
-       return platform_driver_register(&gpu_cc_sm8250_driver);
-}
-subsys_initcall(gpu_cc_sm8250_init);
-
-static void __exit gpu_cc_sm8250_exit(void)
-{
-       platform_driver_unregister(&gpu_cc_sm8250_driver);
-}
-module_exit(gpu_cc_sm8250_exit);
+module_platform_driver(gpu_cc_sm8250_driver);
 
 MODULE_DESCRIPTION("QTI GPU_CC SM8250 Driver");
 MODULE_LICENSE("GPL v2");
index 8dc54dff983f3a9cd590e57028462a52eefbdde7..38505d1388b672e4e4450eb8b9361389cf093acf 100644 (file)
@@ -621,17 +621,7 @@ static struct platform_driver gpu_cc_sm8350_driver = {
        },
 };
 
-static int __init gpu_cc_sm8350_init(void)
-{
-       return platform_driver_register(&gpu_cc_sm8350_driver);
-}
-subsys_initcall(gpu_cc_sm8350_init);
-
-static void __exit gpu_cc_sm8350_exit(void)
-{
-       platform_driver_unregister(&gpu_cc_sm8350_driver);
-}
-module_exit(gpu_cc_sm8350_exit);
+module_platform_driver(gpu_cc_sm8350_driver);
 
 MODULE_DESCRIPTION("QTI GPU_CC SM8350 Driver");
 MODULE_LICENSE("GPL v2");
index 2fa8673424d782b619741d574394eb954019c997..4fc69c6026e5e18cf7eb537faede4fb3b72d7acf 100644 (file)
@@ -575,13 +575,9 @@ static int gpu_cc_sm8550_probe(struct platform_device *pdev)
        clk_lucid_ole_pll_configure(&gpu_cc_pll0, regmap, &gpu_cc_pll0_config);
        clk_lucid_ole_pll_configure(&gpu_cc_pll1, regmap, &gpu_cc_pll1_config);
 
-       /*
-        * Keep clocks always enabled:
-        *      gpu_cc_cxo_aon_clk
-        *      gpu_cc_demet_clk
-        */
-       regmap_update_bits(regmap, 0x9004, BIT(0), BIT(0));
-       regmap_update_bits(regmap, 0x900c, BIT(0), BIT(0));
+       /* Keep some clocks always-on */
+       qcom_branch_set_clk_en(regmap, 0x9004); /* GPU_CC_CXO_AON_CLK */
+       qcom_branch_set_clk_en(regmap, 0x900c); /* GPU_CC_DEMET_CLK */
 
        return qcom_cc_really_probe(pdev, &gpu_cc_sm8550_desc, regmap);
 }
@@ -594,17 +590,7 @@ static struct platform_driver gpu_cc_sm8550_driver = {
        },
 };
 
-static int __init gpu_cc_sm8550_init(void)
-{
-       return platform_driver_register(&gpu_cc_sm8550_driver);
-}
-subsys_initcall(gpu_cc_sm8550_init);
-
-static void __exit gpu_cc_sm8550_exit(void)
-{
-       platform_driver_unregister(&gpu_cc_sm8550_driver);
-}
-module_exit(gpu_cc_sm8550_exit);
+module_platform_driver(gpu_cc_sm8550_driver);
 
 MODULE_DESCRIPTION("QTI GPUCC SM8550 Driver");
 MODULE_LICENSE("GPL");
diff --git a/drivers/clk/qcom/gpucc-x1e80100.c b/drivers/clk/qcom/gpucc-x1e80100.c
new file mode 100644 (file)
index 0000000..b7e79d1
--- /dev/null
@@ -0,0 +1,656 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * Copyright (c) 2023, Qualcomm Innovation Center, Inc. All rights reserved.
+ */
+
+#include <linux/clk-provider.h>
+#include <linux/mod_devicetable.h>
+#include <linux/module.h>
+#include <linux/platform_device.h>
+#include <linux/regmap.h>
+
+#include <dt-bindings/clock/qcom,x1e80100-gpucc.h>
+#include <dt-bindings/reset/qcom,x1e80100-gpucc.h>
+
+#include "clk-alpha-pll.h"
+#include "clk-branch.h"
+#include "clk-rcg.h"
+#include "clk-regmap.h"
+#include "clk-regmap-divider.h"
+#include "clk-regmap-mux.h"
+#include "gdsc.h"
+#include "reset.h"
+
+enum {
+       DT_BI_TCXO,
+       DT_GPLL0_OUT_MAIN,
+       DT_GPLL0_OUT_MAIN_DIV,
+};
+
+enum {
+       P_BI_TCXO,
+       P_GPLL0_OUT_MAIN,
+       P_GPLL0_OUT_MAIN_DIV,
+       P_GPU_CC_PLL0_OUT_MAIN,
+       P_GPU_CC_PLL1_OUT_MAIN,
+};
+
+static const struct pll_vco lucid_ole_vco[] = {
+       { 249600000, 2300000000, 0 },
+};
+
+static const struct pll_vco zonda_ole_vco[] = {
+       { 700000000, 3600000000, 0 },
+};
+
+static const struct alpha_pll_config gpu_cc_pll0_config = {
+       .l = 0x29,
+       .alpha = 0xa000,
+       .config_ctl_val = 0x08240800,
+       .config_ctl_hi_val = 0x05008001,
+       .config_ctl_hi1_val = 0x00000000,
+       .config_ctl_hi2_val = 0x00000000,
+       .user_ctl_val = 0x00000000,
+       .user_ctl_hi_val = 0x02000000,
+};
+
+static struct clk_alpha_pll gpu_cc_pll0 = {
+       .offset = 0x0,
+       .vco_table = zonda_ole_vco,
+       .num_vco = ARRAY_SIZE(zonda_ole_vco),
+       .regs = clk_alpha_pll_regs[CLK_ALPHA_PLL_TYPE_ZONDA_OLE],
+       .clkr = {
+               .hw.init = &(const struct clk_init_data) {
+                       .name = "gpu_cc_pll0",
+                       .parent_data = &(const struct clk_parent_data) {
+                               .index = DT_BI_TCXO,
+                       },
+                       .num_parents = 1,
+                       .ops = &clk_alpha_pll_zonda_ole_ops,
+               },
+       },
+};
+
+static const struct alpha_pll_config gpu_cc_pll1_config = {
+       .l = 0x16,
+       .alpha = 0xeaaa,
+       .config_ctl_val = 0x20485699,
+       .config_ctl_hi_val = 0x00182261,
+       .config_ctl_hi1_val = 0x82aa299c,
+       .test_ctl_val = 0x00000000,
+       .test_ctl_hi_val = 0x00000003,
+       .test_ctl_hi1_val = 0x00009000,
+       .test_ctl_hi2_val = 0x00000034,
+       .user_ctl_val = 0x00000000,
+       .user_ctl_hi_val = 0x00000005,
+};
+
+static struct clk_alpha_pll gpu_cc_pll1 = {
+       .offset = 0x1000,
+       .vco_table = lucid_ole_vco,
+       .num_vco = ARRAY_SIZE(lucid_ole_vco),
+       .regs = clk_alpha_pll_regs[CLK_ALPHA_PLL_TYPE_LUCID_OLE],
+       .clkr = {
+               .hw.init = &(const struct clk_init_data) {
+                       .name = "gpu_cc_pll1",
+                       .parent_data = &(const struct clk_parent_data) {
+                               .index = DT_BI_TCXO,
+                       },
+                       .num_parents = 1,
+                       .ops = &clk_alpha_pll_lucid_evo_ops,
+               },
+       },
+};
+
+static const struct parent_map gpu_cc_parent_map_0[] = {
+       { P_BI_TCXO, 0 },
+       { P_GPLL0_OUT_MAIN, 5 },
+       { P_GPLL0_OUT_MAIN_DIV, 6 },
+};
+
+static const struct clk_parent_data gpu_cc_parent_data_0[] = {
+       { .index = DT_BI_TCXO },
+       { .index = DT_GPLL0_OUT_MAIN },
+       { .index = DT_GPLL0_OUT_MAIN_DIV },
+};
+
+static const struct parent_map gpu_cc_parent_map_1[] = {
+       { P_BI_TCXO, 0 },
+       { P_GPU_CC_PLL0_OUT_MAIN, 1 },
+       { P_GPU_CC_PLL1_OUT_MAIN, 3 },
+       { P_GPLL0_OUT_MAIN, 5 },
+       { P_GPLL0_OUT_MAIN_DIV, 6 },
+};
+
+static const struct clk_parent_data gpu_cc_parent_data_1[] = {
+       { .index = DT_BI_TCXO },
+       { .hw = &gpu_cc_pll0.clkr.hw },
+       { .hw = &gpu_cc_pll1.clkr.hw },
+       { .index = DT_GPLL0_OUT_MAIN },
+       { .index = DT_GPLL0_OUT_MAIN_DIV },
+};
+
+static const struct parent_map gpu_cc_parent_map_2[] = {
+       { P_BI_TCXO, 0 },
+       { P_GPU_CC_PLL1_OUT_MAIN, 3 },
+       { P_GPLL0_OUT_MAIN, 5 },
+       { P_GPLL0_OUT_MAIN_DIV, 6 },
+};
+
+static const struct clk_parent_data gpu_cc_parent_data_2[] = {
+       { .index = DT_BI_TCXO },
+       { .hw = &gpu_cc_pll1.clkr.hw },
+       { .index = DT_GPLL0_OUT_MAIN },
+       { .index = DT_GPLL0_OUT_MAIN_DIV },
+};
+
+static const struct parent_map gpu_cc_parent_map_3[] = {
+       { P_BI_TCXO, 0 },
+};
+
+static const struct clk_parent_data gpu_cc_parent_data_3[] = {
+       { .index = DT_BI_TCXO },
+};
+
+static const struct freq_tbl ftbl_gpu_cc_ff_clk_src[] = {
+       F(200000000, P_GPLL0_OUT_MAIN, 3, 0, 0),
+       { }
+};
+
+static struct clk_rcg2 gpu_cc_ff_clk_src = {
+       .cmd_rcgr = 0x9474,
+       .mnd_width = 0,
+       .hid_width = 5,
+       .parent_map = gpu_cc_parent_map_0,
+       .freq_tbl = ftbl_gpu_cc_ff_clk_src,
+       .clkr.hw.init = &(const struct clk_init_data) {
+               .name = "gpu_cc_ff_clk_src",
+               .parent_data = gpu_cc_parent_data_0,
+               .num_parents = ARRAY_SIZE(gpu_cc_parent_data_0),
+               .flags = CLK_SET_RATE_PARENT,
+               .ops = &clk_rcg2_ops,
+       },
+};
+
+static const struct freq_tbl ftbl_gpu_cc_gmu_clk_src[] = {
+       F(19200000, P_BI_TCXO, 1, 0, 0),
+       F(220000000, P_GPU_CC_PLL1_OUT_MAIN, 2, 0, 0),
+       F(550000000, P_GPU_CC_PLL1_OUT_MAIN, 2, 0, 0),
+       { }
+};
+
+static struct clk_rcg2 gpu_cc_gmu_clk_src = {
+       .cmd_rcgr = 0x9318,
+       .mnd_width = 0,
+       .hid_width = 5,
+       .parent_map = gpu_cc_parent_map_1,
+       .freq_tbl = ftbl_gpu_cc_gmu_clk_src,
+       .clkr.hw.init = &(const struct clk_init_data) {
+               .name = "gpu_cc_gmu_clk_src",
+               .parent_data = gpu_cc_parent_data_1,
+               .num_parents = ARRAY_SIZE(gpu_cc_parent_data_1),
+               .flags = CLK_SET_RATE_PARENT,
+               .ops = &clk_rcg2_shared_ops,
+       },
+};
+
+static struct clk_rcg2 gpu_cc_hub_clk_src = {
+       .cmd_rcgr = 0x93ec,
+       .mnd_width = 0,
+       .hid_width = 5,
+       .parent_map = gpu_cc_parent_map_2,
+       .freq_tbl = ftbl_gpu_cc_ff_clk_src,
+       .clkr.hw.init = &(const struct clk_init_data) {
+               .name = "gpu_cc_hub_clk_src",
+               .parent_data = gpu_cc_parent_data_2,
+               .num_parents = ARRAY_SIZE(gpu_cc_parent_data_2),
+               .flags = CLK_SET_RATE_PARENT,
+               .ops = &clk_rcg2_ops,
+       },
+};
+
+static struct clk_rcg2 gpu_cc_xo_clk_src = {
+       .cmd_rcgr = 0x9010,
+       .mnd_width = 0,
+       .hid_width = 5,
+       .parent_map = gpu_cc_parent_map_3,
+       .freq_tbl = NULL,
+       .clkr.hw.init = &(const struct clk_init_data) {
+               .name = "gpu_cc_xo_clk_src",
+               .parent_data = gpu_cc_parent_data_3,
+               .num_parents = ARRAY_SIZE(gpu_cc_parent_data_3),
+               .flags = CLK_SET_RATE_PARENT,
+               .ops = &clk_rcg2_ops,
+       },
+};
+
+static struct clk_regmap_div gpu_cc_demet_div_clk_src = {
+       .reg = 0x9054,
+       .shift = 0,
+       .width = 4,
+       .clkr.hw.init = &(const struct clk_init_data) {
+               .name = "gpu_cc_demet_div_clk_src",
+               .parent_hws = (const struct clk_hw*[]) {
+                       &gpu_cc_xo_clk_src.clkr.hw,
+               },
+               .num_parents = 1,
+               .flags = CLK_SET_RATE_PARENT,
+               .ops = &clk_regmap_div_ro_ops,
+       },
+};
+
+static struct clk_regmap_div gpu_cc_xo_div_clk_src = {
+       .reg = 0x9050,
+       .shift = 0,
+       .width = 4,
+       .clkr.hw.init = &(const struct clk_init_data) {
+               .name = "gpu_cc_xo_div_clk_src",
+               .parent_hws = (const struct clk_hw*[]) {
+                       &gpu_cc_xo_clk_src.clkr.hw,
+               },
+               .num_parents = 1,
+               .flags = CLK_SET_RATE_PARENT,
+               .ops = &clk_regmap_div_ro_ops,
+       },
+};
+
+static struct clk_branch gpu_cc_ahb_clk = {
+       .halt_reg = 0x911c,
+       .halt_check = BRANCH_HALT_VOTED,
+       .clkr = {
+               .enable_reg = 0x911c,
+               .enable_mask = BIT(0),
+               .hw.init = &(const struct clk_init_data) {
+                       .name = "gpu_cc_ahb_clk",
+                       .parent_hws = (const struct clk_hw*[]) {
+                               &gpu_cc_hub_clk_src.clkr.hw,
+                       },
+                       .num_parents = 1,
+                       .flags = CLK_SET_RATE_PARENT,
+                       .ops = &clk_branch2_ops,
+               },
+       },
+};
+
+static struct clk_branch gpu_cc_crc_ahb_clk = {
+       .halt_reg = 0x9120,
+       .halt_check = BRANCH_HALT_VOTED,
+       .clkr = {
+               .enable_reg = 0x9120,
+               .enable_mask = BIT(0),
+               .hw.init = &(const struct clk_init_data) {
+                       .name = "gpu_cc_crc_ahb_clk",
+                       .parent_hws = (const struct clk_hw*[]) {
+                               &gpu_cc_hub_clk_src.clkr.hw,
+                       },
+                       .num_parents = 1,
+                       .flags = CLK_SET_RATE_PARENT,
+                       .ops = &clk_branch2_ops,
+               },
+       },
+};
+
+static struct clk_branch gpu_cc_cx_ff_clk = {
+       .halt_reg = 0x914c,
+       .halt_check = BRANCH_HALT,
+       .clkr = {
+               .enable_reg = 0x914c,
+               .enable_mask = BIT(0),
+               .hw.init = &(const struct clk_init_data) {
+                       .name = "gpu_cc_cx_ff_clk",
+                       .parent_hws = (const struct clk_hw*[]) {
+                               &gpu_cc_ff_clk_src.clkr.hw,
+                       },
+                       .num_parents = 1,
+                       .flags = CLK_SET_RATE_PARENT,
+                       .ops = &clk_branch2_ops,
+               },
+       },
+};
+
+static struct clk_branch gpu_cc_cx_gmu_clk = {
+       .halt_reg = 0x913c,
+       .halt_check = BRANCH_HALT_VOTED,
+       .clkr = {
+               .enable_reg = 0x913c,
+               .enable_mask = BIT(0),
+               .hw.init = &(const struct clk_init_data) {
+                       .name = "gpu_cc_cx_gmu_clk",
+                       .parent_hws = (const struct clk_hw*[]) {
+                               &gpu_cc_gmu_clk_src.clkr.hw,
+                       },
+                       .num_parents = 1,
+                       .flags = CLK_SET_RATE_PARENT,
+                       .ops = &clk_branch2_aon_ops,
+               },
+       },
+};
+
+static struct clk_branch gpu_cc_cxo_aon_clk = {
+       .halt_reg = 0x9004,
+       .halt_check = BRANCH_HALT_VOTED,
+       .clkr = {
+               .enable_reg = 0x9004,
+               .enable_mask = BIT(0),
+               .hw.init = &(const struct clk_init_data) {
+                       .name = "gpu_cc_cxo_aon_clk",
+                       .parent_hws = (const struct clk_hw*[]) {
+                               &gpu_cc_xo_clk_src.clkr.hw,
+                       },
+                       .num_parents = 1,
+                       .flags = CLK_SET_RATE_PARENT,
+                       .ops = &clk_branch2_ops,
+               },
+       },
+};
+
+static struct clk_branch gpu_cc_cxo_clk = {
+       .halt_reg = 0x9144,
+       .halt_check = BRANCH_HALT,
+       .clkr = {
+               .enable_reg = 0x9144,
+               .enable_mask = BIT(0),
+               .hw.init = &(const struct clk_init_data) {
+                       .name = "gpu_cc_cxo_clk",
+                       .parent_hws = (const struct clk_hw*[]) {
+                               &gpu_cc_xo_clk_src.clkr.hw,
+                       },
+                       .num_parents = 1,
+                       .flags = CLK_SET_RATE_PARENT,
+                       .ops = &clk_branch2_ops,
+               },
+       },
+};
+
+static struct clk_branch gpu_cc_demet_clk = {
+       .halt_reg = 0x900c,
+       .halt_check = BRANCH_HALT,
+       .clkr = {
+               .enable_reg = 0x900c,
+               .enable_mask = BIT(0),
+               .hw.init = &(const struct clk_init_data) {
+                       .name = "gpu_cc_demet_clk",
+                       .parent_hws = (const struct clk_hw*[]) {
+                               &gpu_cc_demet_div_clk_src.clkr.hw,
+                       },
+                       .num_parents = 1,
+                       .flags = CLK_SET_RATE_PARENT,
+                       .ops = &clk_branch2_aon_ops,
+               },
+       },
+};
+
+static struct clk_branch gpu_cc_freq_measure_clk = {
+       .halt_reg = 0x9008,
+       .halt_check = BRANCH_HALT,
+       .clkr = {
+               .enable_reg = 0x9008,
+               .enable_mask = BIT(0),
+               .hw.init = &(const struct clk_init_data) {
+                       .name = "gpu_cc_freq_measure_clk",
+                       .parent_hws = (const struct clk_hw*[]) {
+                               &gpu_cc_xo_div_clk_src.clkr.hw,
+                       },
+                       .num_parents = 1,
+                       .flags = CLK_SET_RATE_PARENT,
+                       .ops = &clk_branch2_ops,
+               },
+       },
+};
+
+static struct clk_branch gpu_cc_hlos1_vote_gpu_smmu_clk = {
+       .halt_reg = 0x7000,
+       .halt_check = BRANCH_HALT_VOTED,
+       .clkr = {
+               .enable_reg = 0x7000,
+               .enable_mask = BIT(0),
+               .hw.init = &(const struct clk_init_data) {
+                       .name = "gpu_cc_hlos1_vote_gpu_smmu_clk",
+                       .ops = &clk_branch2_ops,
+               },
+       },
+};
+
+static struct clk_branch gpu_cc_gx_gmu_clk = {
+       .halt_reg = 0x90bc,
+       .halt_check = BRANCH_HALT,
+       .clkr = {
+               .enable_reg = 0x90bc,
+               .enable_mask = BIT(0),
+               .hw.init = &(const struct clk_init_data) {
+                       .name = "gpu_cc_gx_gmu_clk",
+                       .parent_hws = (const struct clk_hw*[]) {
+                               &gpu_cc_gmu_clk_src.clkr.hw,
+                       },
+                       .num_parents = 1,
+                       .flags = CLK_SET_RATE_PARENT,
+                       .ops = &clk_branch2_ops,
+               },
+       },
+};
+
+static struct clk_branch gpu_cc_gx_vsense_clk = {
+       .halt_reg = 0x90b0,
+       .halt_check = BRANCH_HALT_VOTED,
+       .clkr = {
+               .enable_reg = 0x90b0,
+               .enable_mask = BIT(0),
+               .hw.init = &(const struct clk_init_data) {
+                       .name = "gpu_cc_gx_vsense_clk",
+                       .ops = &clk_branch2_ops,
+               },
+       },
+};
+
+static struct clk_branch gpu_cc_hub_aon_clk = {
+       .halt_reg = 0x93e8,
+       .halt_check = BRANCH_HALT,
+       .clkr = {
+               .enable_reg = 0x93e8,
+               .enable_mask = BIT(0),
+               .hw.init = &(const struct clk_init_data) {
+                       .name = "gpu_cc_hub_aon_clk",
+                       .parent_hws = (const struct clk_hw*[]) {
+                               &gpu_cc_hub_clk_src.clkr.hw,
+                       },
+                       .num_parents = 1,
+                       .flags = CLK_SET_RATE_PARENT,
+                       .ops = &clk_branch2_aon_ops,
+               },
+       },
+};
+
+static struct clk_branch gpu_cc_hub_cx_int_clk = {
+       .halt_reg = 0x9148,
+       .halt_check = BRANCH_HALT_VOTED,
+       .clkr = {
+               .enable_reg = 0x9148,
+               .enable_mask = BIT(0),
+               .hw.init = &(const struct clk_init_data) {
+                       .name = "gpu_cc_hub_cx_int_clk",
+                       .parent_hws = (const struct clk_hw*[]) {
+                               &gpu_cc_hub_clk_src.clkr.hw,
+                       },
+                       .num_parents = 1,
+                       .flags = CLK_SET_RATE_PARENT,
+                       .ops = &clk_branch2_aon_ops,
+               },
+       },
+};
+
+static struct clk_branch gpu_cc_memnoc_gfx_clk = {
+       .halt_reg = 0x9150,
+       .halt_check = BRANCH_HALT_VOTED,
+       .clkr = {
+               .enable_reg = 0x9150,
+               .enable_mask = BIT(0),
+               .hw.init = &(const struct clk_init_data) {
+                       .name = "gpu_cc_memnoc_gfx_clk",
+                       .ops = &clk_branch2_ops,
+               },
+       },
+};
+
+static struct clk_branch gpu_cc_mnd1x_0_gfx3d_clk = {
+       .halt_reg = 0x9288,
+       .halt_check = BRANCH_HALT,
+       .clkr = {
+               .enable_reg = 0x9288,
+               .enable_mask = BIT(0),
+               .hw.init = &(const struct clk_init_data) {
+                       .name = "gpu_cc_mnd1x_0_gfx3d_clk",
+                       .ops = &clk_branch2_ops,
+               },
+       },
+};
+
+static struct clk_branch gpu_cc_mnd1x_1_gfx3d_clk = {
+       .halt_reg = 0x928c,
+       .halt_check = BRANCH_HALT,
+       .clkr = {
+               .enable_reg = 0x928c,
+               .enable_mask = BIT(0),
+               .hw.init = &(const struct clk_init_data) {
+                       .name = "gpu_cc_mnd1x_1_gfx3d_clk",
+                       .ops = &clk_branch2_ops,
+               },
+       },
+};
+
+static struct clk_branch gpu_cc_sleep_clk = {
+       .halt_reg = 0x9134,
+       .halt_check = BRANCH_HALT_VOTED,
+       .clkr = {
+               .enable_reg = 0x9134,
+               .enable_mask = BIT(0),
+               .hw.init = &(const struct clk_init_data) {
+                       .name = "gpu_cc_sleep_clk",
+                       .ops = &clk_branch2_ops,
+               },
+       },
+};
+
+static struct gdsc gpu_cx_gdsc = {
+       .gdscr = 0x9108,
+       .gds_hw_ctrl = 0x953c,
+       .en_rest_wait_val = 0x2,
+       .en_few_wait_val = 0x2,
+       .clk_dis_wait_val = 0xf,
+       .pd = {
+               .name = "gpu_cx_gdsc",
+       },
+       .pwrsts = PWRSTS_OFF_ON,
+       .flags = VOTABLE | RETAIN_FF_ENABLE,
+};
+
+static struct gdsc gpu_gx_gdsc = {
+       .gdscr = 0x905c,
+       .clamp_io_ctrl = 0x9504,
+       .en_rest_wait_val = 0x2,
+       .en_few_wait_val = 0x2,
+       .clk_dis_wait_val = 0xf,
+       .pd = {
+               .name = "gpu_gx_gdsc",
+               .power_on = gdsc_gx_do_nothing_enable,
+       },
+       .pwrsts = PWRSTS_OFF_ON,
+       .flags = CLAMP_IO | AON_RESET | SW_RESET | POLL_CFG_GDSCR,
+};
+
+static struct clk_regmap *gpu_cc_x1e80100_clocks[] = {
+       [GPU_CC_AHB_CLK] = &gpu_cc_ahb_clk.clkr,
+       [GPU_CC_CRC_AHB_CLK] = &gpu_cc_crc_ahb_clk.clkr,
+       [GPU_CC_CX_FF_CLK] = &gpu_cc_cx_ff_clk.clkr,
+       [GPU_CC_CX_GMU_CLK] = &gpu_cc_cx_gmu_clk.clkr,
+       [GPU_CC_CXO_AON_CLK] = &gpu_cc_cxo_aon_clk.clkr,
+       [GPU_CC_CXO_CLK] = &gpu_cc_cxo_clk.clkr,
+       [GPU_CC_DEMET_CLK] = &gpu_cc_demet_clk.clkr,
+       [GPU_CC_DEMET_DIV_CLK_SRC] = &gpu_cc_demet_div_clk_src.clkr,
+       [GPU_CC_FF_CLK_SRC] = &gpu_cc_ff_clk_src.clkr,
+       [GPU_CC_FREQ_MEASURE_CLK] = &gpu_cc_freq_measure_clk.clkr,
+       [GPU_CC_GMU_CLK_SRC] = &gpu_cc_gmu_clk_src.clkr,
+       [GPU_CC_GX_GMU_CLK] = &gpu_cc_gx_gmu_clk.clkr,
+       [GPU_CC_HLOS1_VOTE_GPU_SMMU_CLK] = &gpu_cc_hlos1_vote_gpu_smmu_clk.clkr,
+       [GPU_CC_GX_VSENSE_CLK] = &gpu_cc_gx_vsense_clk.clkr,
+       [GPU_CC_HUB_AON_CLK] = &gpu_cc_hub_aon_clk.clkr,
+       [GPU_CC_HUB_CLK_SRC] = &gpu_cc_hub_clk_src.clkr,
+       [GPU_CC_HUB_CX_INT_CLK] = &gpu_cc_hub_cx_int_clk.clkr,
+       [GPU_CC_MEMNOC_GFX_CLK] = &gpu_cc_memnoc_gfx_clk.clkr,
+       [GPU_CC_MND1X_0_GFX3D_CLK] = &gpu_cc_mnd1x_0_gfx3d_clk.clkr,
+       [GPU_CC_MND1X_1_GFX3D_CLK] = &gpu_cc_mnd1x_1_gfx3d_clk.clkr,
+       [GPU_CC_PLL0] = &gpu_cc_pll0.clkr,
+       [GPU_CC_PLL1] = &gpu_cc_pll1.clkr,
+       [GPU_CC_SLEEP_CLK] = &gpu_cc_sleep_clk.clkr,
+       [GPU_CC_XO_CLK_SRC] = &gpu_cc_xo_clk_src.clkr,
+       [GPU_CC_XO_DIV_CLK_SRC] = &gpu_cc_xo_div_clk_src.clkr,
+};
+
+static const struct qcom_reset_map gpu_cc_x1e80100_resets[] = {
+       [GPUCC_GPU_CC_XO_BCR] = { 0x9000 },
+       [GPUCC_GPU_CC_GX_BCR] = { 0x9058 },
+       [GPUCC_GPU_CC_CX_BCR] = { 0x9104 },
+       [GPUCC_GPU_CC_GFX3D_AON_BCR] = { 0x9198 },
+       [GPUCC_GPU_CC_ACD_BCR] = { 0x9358 },
+       [GPUCC_GPU_CC_FAST_HUB_BCR] = { 0x93e4 },
+       [GPUCC_GPU_CC_FF_BCR] = { 0x9470 },
+       [GPUCC_GPU_CC_GMU_BCR] = { 0x9314 },
+       [GPUCC_GPU_CC_CB_BCR] = { 0x93a0 },
+};
+
+static struct gdsc *gpu_cc_x1e80100_gdscs[] = {
+       [GPU_CX_GDSC] = &gpu_cx_gdsc,
+       [GPU_GX_GDSC] = &gpu_gx_gdsc,
+};
+
+static const struct regmap_config gpu_cc_x1e80100_regmap_config = {
+       .reg_bits = 32,
+       .reg_stride = 4,
+       .val_bits = 32,
+       .max_register = 0x9988,
+       .fast_io = true,
+};
+
+static const struct qcom_cc_desc gpu_cc_x1e80100_desc = {
+       .config = &gpu_cc_x1e80100_regmap_config,
+       .clks = gpu_cc_x1e80100_clocks,
+       .num_clks = ARRAY_SIZE(gpu_cc_x1e80100_clocks),
+       .resets = gpu_cc_x1e80100_resets,
+       .num_resets = ARRAY_SIZE(gpu_cc_x1e80100_resets),
+       .gdscs = gpu_cc_x1e80100_gdscs,
+       .num_gdscs = ARRAY_SIZE(gpu_cc_x1e80100_gdscs),
+};
+
+static const struct of_device_id gpu_cc_x1e80100_match_table[] = {
+       { .compatible = "qcom,x1e80100-gpucc" },
+       { }
+};
+MODULE_DEVICE_TABLE(of, gpu_cc_x1e80100_match_table);
+
+static int gpu_cc_x1e80100_probe(struct platform_device *pdev)
+{
+       struct regmap *regmap;
+
+       regmap = qcom_cc_map(pdev, &gpu_cc_x1e80100_desc);
+       if (IS_ERR(regmap))
+               return PTR_ERR(regmap);
+
+       clk_zonda_pll_configure(&gpu_cc_pll0, regmap, &gpu_cc_pll0_config);
+       clk_lucid_evo_pll_configure(&gpu_cc_pll1, regmap, &gpu_cc_pll1_config);
+
+       /* Keep clocks always enabled */
+       qcom_branch_set_clk_en(regmap, 0x93a4); /* GPU_CC_CB_CLK */
+
+       return qcom_cc_really_probe(pdev, &gpu_cc_x1e80100_desc, regmap);
+}
+
+static struct platform_driver gpu_cc_x1e80100_driver = {
+       .probe = gpu_cc_x1e80100_probe,
+       .driver = {
+               .name = "gpucc-x1e80100",
+               .of_match_table = gpu_cc_x1e80100_match_table,
+       },
+};
+module_platform_driver(gpu_cc_x1e80100_driver);
+
+MODULE_DESCRIPTION("QTI GPU Clock Controller X1E80100 Driver");
+MODULE_LICENSE("GPL");
index 9051fd567112521224c320febe18e9f7ba134fd3..fd9cd2e3f95651caa1f4573de0c5115738868344 100644 (file)
@@ -401,11 +401,8 @@ static int lpass_core_cc_sc7180_probe(struct platform_device *pdev)
                goto exit;
        }
 
-       /*
-        * Keep the CLK always-ON
-        * LPASS_AUDIO_CORE_SYSNOC_SWAY_CORE_CLK
-        */
-       regmap_update_bits(regmap, 0x24000, BIT(0), BIT(0));
+       /* Keep some clocks always-on */
+       qcom_branch_set_clk_en(regmap, 0x24000); /* LPASS_AUDIO_CORE_SYSNOC_SWAY_CORE_CLK */
 
        /* PLL settings */
        regmap_write(regmap, 0x1008, 0x20);
index 02fc21208dd14b9bcfd7ae641e34f6411d0f34e2..c89700ab93f9c6468c0b509305059a852228bbe0 100644 (file)
@@ -348,6 +348,7 @@ static struct freq_tbl ftbl_mmss_axi_clk[] = {
        F(333430000, P_MMPLL1, 3.5, 0, 0),
        F(400000000, P_MMPLL0, 2, 0, 0),
        F(466800000, P_MMPLL1, 2.5, 0, 0),
+       { }
 };
 
 static struct clk_rcg2 mmss_axi_clk_src = {
@@ -372,6 +373,7 @@ static struct freq_tbl ftbl_ocmemnoc_clk[] = {
        F(150000000, P_GPLL0, 4, 0, 0),
        F(228570000, P_MMPLL0, 3.5, 0, 0),
        F(320000000, P_MMPLL0, 2.5, 0, 0),
+       { }
 };
 
 static struct clk_rcg2 ocmemnoc_clk_src = {
index a31f6cf0c4e0cef1a1e2ce31bc28ccedd3394219..36f460b78be2ccbee26fead56028626944a973db 100644 (file)
@@ -290,6 +290,7 @@ static struct freq_tbl ftbl_mmss_axi_clk[] = {
        F(291750000, P_MMPLL1, 4, 0, 0),
        F(400000000, P_MMPLL0, 2, 0, 0),
        F(466800000, P_MMPLL1, 2.5, 0, 0),
+       { }
 };
 
 static struct clk_rcg2 mmss_axi_clk_src = {
@@ -314,6 +315,7 @@ static struct freq_tbl ftbl_ocmemnoc_clk[] = {
        F(150000000, P_GPLL0, 4, 0, 0),
        F(291750000, P_MMPLL1, 4, 0, 0),
        F(400000000, P_MMPLL0, 2, 0, 0),
+       { }
 };
 
 static struct clk_rcg2 ocmemnoc_clk_src = {
diff --git a/drivers/clk/qcom/mss-sc7180.c b/drivers/clk/qcom/mss-sc7180.c
deleted file mode 100644 (file)
index d106bc6..0000000
+++ /dev/null
@@ -1,140 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0-only
-/*
- * Copyright (c) 2020, The Linux Foundation. All rights reserved.
- */
-
-#include <linux/clk-provider.h>
-#include <linux/platform_device.h>
-#include <linux/module.h>
-#include <linux/pm_clock.h>
-#include <linux/pm_runtime.h>
-#include <linux/regmap.h>
-
-#include <dt-bindings/clock/qcom,mss-sc7180.h>
-
-#include "clk-regmap.h"
-#include "clk-branch.h"
-#include "common.h"
-
-static struct clk_branch mss_axi_nav_clk = {
-       .halt_reg = 0x20bc,
-       .halt_check = BRANCH_HALT,
-       .clkr = {
-               .enable_reg = 0x20bc,
-               .enable_mask = BIT(0),
-               .hw.init = &(struct clk_init_data){
-                       .name = "mss_axi_nav_clk",
-                       .parent_data = &(const struct clk_parent_data){
-                               .fw_name = "gcc_mss_nav_axi",
-                       },
-                       .num_parents = 1,
-                       .ops = &clk_branch2_ops,
-               },
-       },
-};
-
-static struct clk_branch mss_axi_crypto_clk = {
-       .halt_reg = 0x20cc,
-       .halt_check = BRANCH_HALT,
-       .clkr = {
-               .enable_reg = 0x20cc,
-               .enable_mask = BIT(0),
-               .hw.init = &(struct clk_init_data){
-                       .name = "mss_axi_crypto_clk",
-                       .parent_data = &(const struct clk_parent_data){
-                               .fw_name = "gcc_mss_mfab_axis",
-                       },
-                       .num_parents = 1,
-                       .ops = &clk_branch2_ops,
-               },
-       },
-};
-
-static const struct regmap_config mss_regmap_config = {
-       .reg_bits       = 32,
-       .reg_stride     = 4,
-       .val_bits       = 32,
-       .fast_io        = true,
-       .max_register   = 0x41aa0cc,
-};
-
-static struct clk_regmap *mss_sc7180_clocks[] = {
-       [MSS_AXI_CRYPTO_CLK] = &mss_axi_crypto_clk.clkr,
-       [MSS_AXI_NAV_CLK] = &mss_axi_nav_clk.clkr,
-};
-
-static const struct qcom_cc_desc mss_sc7180_desc = {
-       .config = &mss_regmap_config,
-       .clks = mss_sc7180_clocks,
-       .num_clks = ARRAY_SIZE(mss_sc7180_clocks),
-};
-
-static int mss_sc7180_probe(struct platform_device *pdev)
-{
-       int ret;
-
-       ret = devm_pm_runtime_enable(&pdev->dev);
-       if (ret)
-               return ret;
-
-       ret = devm_pm_clk_create(&pdev->dev);
-       if (ret)
-               return ret;
-
-       ret = pm_clk_add(&pdev->dev, "cfg_ahb");
-       if (ret < 0) {
-               dev_err(&pdev->dev, "failed to acquire iface clock\n");
-               return ret;
-       }
-
-       ret = pm_runtime_resume_and_get(&pdev->dev);
-       if (ret)
-               return ret;
-
-       ret = qcom_cc_probe(pdev, &mss_sc7180_desc);
-       if (ret < 0)
-               goto err_put_rpm;
-
-       pm_runtime_put(&pdev->dev);
-
-       return 0;
-
-err_put_rpm:
-       pm_runtime_put_sync(&pdev->dev);
-
-       return ret;
-}
-
-static const struct dev_pm_ops mss_sc7180_pm_ops = {
-       SET_RUNTIME_PM_OPS(pm_clk_suspend, pm_clk_resume, NULL)
-};
-
-static const struct of_device_id mss_sc7180_match_table[] = {
-       { .compatible = "qcom,sc7180-mss" },
-       { }
-};
-MODULE_DEVICE_TABLE(of, mss_sc7180_match_table);
-
-static struct platform_driver mss_sc7180_driver = {
-       .probe          = mss_sc7180_probe,
-       .driver         = {
-               .name           = "sc7180-mss",
-               .of_match_table = mss_sc7180_match_table,
-               .pm = &mss_sc7180_pm_ops,
-       },
-};
-
-static int __init mss_sc7180_init(void)
-{
-       return platform_driver_register(&mss_sc7180_driver);
-}
-subsys_initcall(mss_sc7180_init);
-
-static void __exit mss_sc7180_exit(void)
-{
-       platform_driver_unregister(&mss_sc7180_driver);
-}
-module_exit(mss_sc7180_exit);
-
-MODULE_DESCRIPTION("QTI MSS SC7180 Driver");
-MODULE_LICENSE("GPL v2");
index e45e32804d2c75aa5e5a1bf11be76e3d422b140b..d96c96a9089f40b3a111067434d4d19632505a16 100644 (file)
@@ -22,8 +22,8 @@ static int qcom_reset(struct reset_controller_dev *rcdev, unsigned long id)
        return 0;
 }
 
-static int
-qcom_reset_assert(struct reset_controller_dev *rcdev, unsigned long id)
+static int qcom_reset_set_assert(struct reset_controller_dev *rcdev,
+                                unsigned long id, bool assert)
 {
        struct qcom_reset_controller *rst;
        const struct qcom_reset_map *map;
@@ -33,21 +33,22 @@ qcom_reset_assert(struct reset_controller_dev *rcdev, unsigned long id)
        map = &rst->reset_map[id];
        mask = map->bitmask ? map->bitmask : BIT(map->bit);
 
-       return regmap_update_bits(rst->regmap, map->reg, mask, mask);
+       regmap_update_bits(rst->regmap, map->reg, mask, assert ? mask : 0);
+
+       /* Read back the register to ensure write completion, ignore the value */
+       regmap_read(rst->regmap, map->reg, &mask);
+
+       return 0;
 }
 
-static int
-qcom_reset_deassert(struct reset_controller_dev *rcdev, unsigned long id)
+static int qcom_reset_assert(struct reset_controller_dev *rcdev, unsigned long id)
 {
-       struct qcom_reset_controller *rst;
-       const struct qcom_reset_map *map;
-       u32 mask;
-
-       rst = to_qcom_reset_controller(rcdev);
-       map = &rst->reset_map[id];
-       mask = map->bitmask ? map->bitmask : BIT(map->bit);
+       return qcom_reset_set_assert(rcdev, id, true);
+}
 
-       return regmap_update_bits(rst->regmap, map->reg, mask, 0);
+static int qcom_reset_deassert(struct reset_controller_dev *rcdev, unsigned long id)
+{
+       return qcom_reset_set_assert(rcdev, id, false);
 }
 
 const struct reset_control_ops qcom_reset_ops = {
index 9a47c838d9b1ba0433e38033733ee116e11a1a10..fe0561bf53d492da1dac71dd7067e84e46776bab 100644 (file)
@@ -11,7 +11,7 @@
 struct qcom_reset_map {
        unsigned int reg;
        u8 bit;
-       u8 udelay;
+       u16 udelay;
        u32 bitmask;
 };
 
diff --git a/drivers/clk/qcom/tcsrcc-x1e80100.c b/drivers/clk/qcom/tcsrcc-x1e80100.c
new file mode 100644 (file)
index 0000000..ff61769
--- /dev/null
@@ -0,0 +1,285 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * Copyright (c) 2022, Qualcomm Innovation Center, Inc. All rights reserved.
+ * Copyright (c) 2023, Linaro Limited
+ */
+
+#include <linux/clk-provider.h>
+#include <linux/mod_devicetable.h>
+#include <linux/module.h>
+#include <linux/platform_device.h>
+#include <linux/regmap.h>
+
+#include <dt-bindings/clock/qcom,x1e80100-tcsr.h>
+
+#include "clk-branch.h"
+#include "clk-regmap.h"
+#include "common.h"
+#include "reset.h"
+
+enum {
+       DT_BI_TCXO_PAD,
+};
+
+static struct clk_branch tcsr_edp_clkref_en = {
+       .halt_reg = 0x15130,
+       .halt_check = BRANCH_HALT_DELAY,
+       .clkr = {
+               .enable_reg = 0x15130,
+               .enable_mask = BIT(0),
+               .hw.init = &(const struct clk_init_data) {
+                       .name = "tcsr_edp_clkref_en",
+                       .ops = &clk_branch2_ops,
+               },
+       },
+};
+
+static struct clk_branch tcsr_pcie_2l_4_clkref_en = {
+       .halt_reg = 0x15100,
+       .halt_check = BRANCH_HALT_DELAY,
+       .clkr = {
+               .enable_reg = 0x15100,
+               .enable_mask = BIT(0),
+               .hw.init = &(struct clk_init_data){
+                       .name = "tcsr_pcie_2l_4_clkref_en",
+                       .parent_data = &(const struct clk_parent_data){
+                               .index = DT_BI_TCXO_PAD,
+                       },
+                       .num_parents = 1,
+                       .ops = &clk_branch2_ops,
+               },
+       },
+};
+
+static struct clk_branch tcsr_pcie_2l_5_clkref_en = {
+       .halt_reg = 0x15104,
+       .halt_check = BRANCH_HALT_DELAY,
+       .clkr = {
+               .enable_reg = 0x15104,
+               .enable_mask = BIT(0),
+               .hw.init = &(struct clk_init_data){
+                       .name = "tcsr_pcie_2l_5_clkref_en",
+                       .parent_data = &(const struct clk_parent_data){
+                               .index = DT_BI_TCXO_PAD,
+                       },
+                       .num_parents = 1,
+                       .ops = &clk_branch2_ops,
+               },
+       },
+};
+
+static struct clk_branch tcsr_pcie_8l_clkref_en = {
+       .halt_reg = 0x15108,
+       .halt_check = BRANCH_HALT_DELAY,
+       .clkr = {
+               .enable_reg = 0x15108,
+               .enable_mask = BIT(0),
+               .hw.init = &(struct clk_init_data){
+                       .name = "tcsr_pcie_8l_clkref_en",
+                       .parent_data = &(const struct clk_parent_data){
+                               .index = DT_BI_TCXO_PAD,
+                       },
+                       .num_parents = 1,
+                       .ops = &clk_branch2_ops,
+               },
+       },
+};
+
+static struct clk_branch tcsr_usb3_mp0_clkref_en = {
+       .halt_reg = 0x1510c,
+       .halt_check = BRANCH_HALT_DELAY,
+       .clkr = {
+               .enable_reg = 0x1510c,
+               .enable_mask = BIT(0),
+               .hw.init = &(struct clk_init_data){
+                       .name = "tcsr_usb3_mp0_clkref_en",
+                       .parent_data = &(const struct clk_parent_data){
+                               .index = DT_BI_TCXO_PAD,
+                       },
+                       .num_parents = 1,
+                       .ops = &clk_branch2_ops,
+               },
+       },
+};
+
+static struct clk_branch tcsr_usb3_mp1_clkref_en = {
+       .halt_reg = 0x15110,
+       .halt_check = BRANCH_HALT_DELAY,
+       .clkr = {
+               .enable_reg = 0x15110,
+               .enable_mask = BIT(0),
+               .hw.init = &(struct clk_init_data){
+                       .name = "tcsr_usb3_mp1_clkref_en",
+                       .parent_data = &(const struct clk_parent_data){
+                               .index = DT_BI_TCXO_PAD,
+                       },
+                       .num_parents = 1,
+                       .ops = &clk_branch2_ops,
+               },
+       },
+};
+
+static struct clk_branch tcsr_usb2_1_clkref_en = {
+       .halt_reg = 0x15114,
+       .halt_check = BRANCH_HALT_DELAY,
+       .clkr = {
+               .enable_reg = 0x15114,
+               .enable_mask = BIT(0),
+               .hw.init = &(struct clk_init_data){
+                       .name = "tcsr_usb2_1_clkref_en",
+                       .parent_data = &(const struct clk_parent_data){
+                               .index = DT_BI_TCXO_PAD,
+                       },
+                       .num_parents = 1,
+                       .ops = &clk_branch2_ops,
+               },
+       },
+};
+
+static struct clk_branch tcsr_ufs_phy_clkref_en = {
+       .halt_reg = 0x15118,
+       .halt_check = BRANCH_HALT_DELAY,
+       .clkr = {
+               .enable_reg = 0x15118,
+               .enable_mask = BIT(0),
+               .hw.init = &(struct clk_init_data){
+                       .name = "tcsr_ufs_phy_clkref_en",
+                       .parent_data = &(const struct clk_parent_data){
+                               .index = DT_BI_TCXO_PAD,
+                       },
+                       .num_parents = 1,
+                       .ops = &clk_branch2_ops,
+               },
+       },
+};
+
+static struct clk_branch tcsr_usb4_1_clkref_en = {
+       .halt_reg = 0x15120,
+       .halt_check = BRANCH_HALT_DELAY,
+       .clkr = {
+               .enable_reg = 0x15120,
+               .enable_mask = BIT(0),
+               .hw.init = &(struct clk_init_data){
+                       .name = "tcsr_usb4_1_clkref_en",
+                       .parent_data = &(const struct clk_parent_data){
+                               .index = DT_BI_TCXO_PAD,
+                       },
+                       .num_parents = 1,
+                       .ops = &clk_branch2_ops,
+               },
+       },
+};
+
+static struct clk_branch tcsr_usb4_2_clkref_en = {
+       .halt_reg = 0x15124,
+       .halt_check = BRANCH_HALT_DELAY,
+       .clkr = {
+               .enable_reg = 0x15124,
+               .enable_mask = BIT(0),
+               .hw.init = &(struct clk_init_data){
+                       .name = "tcsr_usb4_2_clkref_en",
+                       .parent_data = &(const struct clk_parent_data){
+                               .index = DT_BI_TCXO_PAD,
+                       },
+                       .num_parents = 1,
+                       .ops = &clk_branch2_ops,
+               },
+       },
+};
+
+static struct clk_branch tcsr_usb2_2_clkref_en = {
+       .halt_reg = 0x15128,
+       .halt_check = BRANCH_HALT_DELAY,
+       .clkr = {
+               .enable_reg = 0x15128,
+               .enable_mask = BIT(0),
+               .hw.init = &(struct clk_init_data){
+                       .name = "tcsr_usb2_2_clkref_en",
+                       .parent_data = &(const struct clk_parent_data){
+                               .index = DT_BI_TCXO_PAD,
+                       },
+                       .num_parents = 1,
+                       .ops = &clk_branch2_ops,
+               },
+       },
+};
+
+static struct clk_branch tcsr_pcie_4l_clkref_en = {
+       .halt_reg = 0x1512c,
+       .halt_check = BRANCH_HALT_DELAY,
+       .clkr = {
+               .enable_reg = 0x1512c,
+               .enable_mask = BIT(0),
+               .hw.init = &(struct clk_init_data){
+                       .name = "tcsr_pcie_4l_clkref_en",
+                       .parent_data = &(const struct clk_parent_data){
+                               .index = DT_BI_TCXO_PAD,
+                       },
+                       .num_parents = 1,
+                       .ops = &clk_branch2_ops,
+               },
+       },
+};
+
+static struct clk_regmap *tcsr_cc_x1e80100_clocks[] = {
+       [TCSR_EDP_CLKREF_EN] = &tcsr_edp_clkref_en.clkr,
+       [TCSR_PCIE_2L_4_CLKREF_EN] = &tcsr_pcie_2l_4_clkref_en.clkr,
+       [TCSR_PCIE_2L_5_CLKREF_EN] = &tcsr_pcie_2l_5_clkref_en.clkr,
+       [TCSR_PCIE_8L_CLKREF_EN] = &tcsr_pcie_8l_clkref_en.clkr,
+       [TCSR_USB3_MP0_CLKREF_EN] = &tcsr_usb3_mp0_clkref_en.clkr,
+       [TCSR_USB3_MP1_CLKREF_EN] = &tcsr_usb3_mp1_clkref_en.clkr,
+       [TCSR_USB2_1_CLKREF_EN] = &tcsr_usb2_1_clkref_en.clkr,
+       [TCSR_UFS_PHY_CLKREF_EN] = &tcsr_ufs_phy_clkref_en.clkr,
+       [TCSR_USB4_1_CLKREF_EN] = &tcsr_usb4_1_clkref_en.clkr,
+       [TCSR_USB4_2_CLKREF_EN] = &tcsr_usb4_2_clkref_en.clkr,
+       [TCSR_USB2_2_CLKREF_EN] = &tcsr_usb2_2_clkref_en.clkr,
+       [TCSR_PCIE_4L_CLKREF_EN] = &tcsr_pcie_4l_clkref_en.clkr,
+};
+
+static const struct regmap_config tcsr_cc_x1e80100_regmap_config = {
+       .reg_bits = 32,
+       .reg_stride = 4,
+       .val_bits = 32,
+       .max_register = 0x2f000,
+       .fast_io = true,
+};
+
+static const struct qcom_cc_desc tcsr_cc_x1e80100_desc = {
+       .config = &tcsr_cc_x1e80100_regmap_config,
+       .clks = tcsr_cc_x1e80100_clocks,
+       .num_clks = ARRAY_SIZE(tcsr_cc_x1e80100_clocks),
+};
+
+static const struct of_device_id tcsr_cc_x1e80100_match_table[] = {
+       { .compatible = "qcom,x1e80100-tcsr" },
+       { }
+};
+MODULE_DEVICE_TABLE(of, tcsr_cc_x1e80100_match_table);
+
+static int tcsr_cc_x1e80100_probe(struct platform_device *pdev)
+{
+       return qcom_cc_probe(pdev, &tcsr_cc_x1e80100_desc);
+}
+
+static struct platform_driver tcsr_cc_x1e80100_driver = {
+       .probe = tcsr_cc_x1e80100_probe,
+       .driver = {
+               .name = "tcsrcc-x1e80100",
+               .of_match_table = tcsr_cc_x1e80100_match_table,
+       },
+};
+
+static int __init tcsr_cc_x1e80100_init(void)
+{
+       return platform_driver_register(&tcsr_cc_x1e80100_driver);
+}
+subsys_initcall(tcsr_cc_x1e80100_init);
+
+static void __exit tcsr_cc_x1e80100_exit(void)
+{
+       platform_driver_unregister(&tcsr_cc_x1e80100_driver);
+}
+module_exit(tcsr_cc_x1e80100_exit);
+
+MODULE_DESCRIPTION("QTI TCSR Clock Controller X1E80100 Driver");
+MODULE_LICENSE("GPL");
index 5b9b54f616b8570b89e69e76bf259b947f7bab9e..ae0f812f83e8ed3d73cb2df15dc95e1e6a9577d2 100644 (file)
@@ -237,17 +237,7 @@ static struct platform_driver video_cc_sc7180_driver = {
        },
 };
 
-static int __init video_cc_sc7180_init(void)
-{
-       return platform_driver_register(&video_cc_sc7180_driver);
-}
-subsys_initcall(video_cc_sc7180_init);
-
-static void __exit video_cc_sc7180_exit(void)
-{
-       platform_driver_unregister(&video_cc_sc7180_driver);
-}
-module_exit(video_cc_sc7180_exit);
+module_platform_driver(video_cc_sc7180_driver);
 
 MODULE_LICENSE("GPL v2");
 MODULE_DESCRIPTION("QTI VIDEOCC SC7180 Driver");
index 615695d82319073e49a3cb3cb5e45c294e39ec7d..cdd59c6f60df819f6fe4f4f027169fcd79f03af3 100644 (file)
@@ -309,17 +309,7 @@ static struct platform_driver video_cc_sc7280_driver = {
        },
 };
 
-static int __init video_cc_sc7280_init(void)
-{
-       return platform_driver_register(&video_cc_sc7280_driver);
-}
-subsys_initcall(video_cc_sc7280_init);
-
-static void __exit video_cc_sc7280_exit(void)
-{
-       platform_driver_unregister(&video_cc_sc7280_driver);
-}
-module_exit(video_cc_sc7280_exit);
+module_platform_driver(video_cc_sc7280_driver);
 
 MODULE_DESCRIPTION("QTI VIDEO_CC sc7280 Driver");
 MODULE_LICENSE("GPL v2");
index c77a4dd5d39c9cd343b4ea79744718ff8e25f409..b7f21ecad9612f789ad9eebc3cdbd7f3bab2fc4f 100644 (file)
@@ -340,16 +340,6 @@ static struct platform_driver video_cc_sdm845_driver = {
        },
 };
 
-static int __init video_cc_sdm845_init(void)
-{
-       return platform_driver_register(&video_cc_sdm845_driver);
-}
-subsys_initcall(video_cc_sdm845_init);
-
-static void __exit video_cc_sdm845_exit(void)
-{
-       platform_driver_unregister(&video_cc_sdm845_driver);
-}
-module_exit(video_cc_sdm845_exit);
+module_platform_driver(video_cc_sdm845_driver);
 
 MODULE_LICENSE("GPL v2");
index f1456eaa87c401ec1a2cee9deb1b604333116882..a0329260157a09caf9e63ce9c2a6e6615ae08f41 100644 (file)
@@ -215,7 +215,7 @@ static const struct regmap_config video_cc_sm8150_regmap_config = {
 };
 
 static const struct qcom_reset_map video_cc_sm8150_resets[] = {
-       [VIDEO_CC_MVSC_CORE_CLK_BCR] = { 0x850, 2 },
+       [VIDEO_CC_MVSC_CORE_CLK_BCR] = { .reg = 0x850, .bit = 2, .udelay = 150 },
        [VIDEO_CC_INTERFACE_BCR] = { 0x8f0 },
        [VIDEO_CC_MVS0_BCR] = { 0x870 },
        [VIDEO_CC_MVS1_BCR] = { 0x8b0 },
@@ -277,17 +277,7 @@ static struct platform_driver video_cc_sm8150_driver = {
        },
 };
 
-static int __init video_cc_sm8150_init(void)
-{
-       return platform_driver_register(&video_cc_sm8150_driver);
-}
-subsys_initcall(video_cc_sm8150_init);
-
-static void __exit video_cc_sm8150_exit(void)
-{
-       platform_driver_unregister(&video_cc_sm8150_driver);
-}
-module_exit(video_cc_sm8150_exit);
+module_platform_driver(video_cc_sm8150_driver);
 
 MODULE_LICENSE("GPL v2");
 MODULE_DESCRIPTION("QTI VIDEOCC SM8150 Driver");
index ad46c4014a405146140f4442c3a3d48ea2e20608..016b596e03b300d84e7c3d69be459934c9756647 100644 (file)
@@ -323,10 +323,10 @@ static struct clk_regmap *video_cc_sm8250_clocks[] = {
 static const struct qcom_reset_map video_cc_sm8250_resets[] = {
        [VIDEO_CC_CVP_INTERFACE_BCR] = { 0xe54 },
        [VIDEO_CC_CVP_MVS0_BCR] = { 0xd14 },
-       [VIDEO_CC_MVS0C_CLK_ARES] = { 0xc34, 2 },
+       [VIDEO_CC_MVS0C_CLK_ARES] = { 0xc34, .bit = 2, .udelay = 150 },
        [VIDEO_CC_CVP_MVS0C_BCR] = { 0xbf4 },
        [VIDEO_CC_CVP_MVS1_BCR] = { 0xd94 },
-       [VIDEO_CC_MVS1C_CLK_ARES] = { 0xcd4, 2 },
+       [VIDEO_CC_MVS1C_CLK_ARES] = { 0xcd4, .bit = 2, .udelay = 150 },
        [VIDEO_CC_CVP_MVS1C_BCR] = { 0xc94 },
 };
 
@@ -383,9 +383,9 @@ static int video_cc_sm8250_probe(struct platform_device *pdev)
        clk_lucid_pll_configure(&video_pll0, regmap, &video_pll0_config);
        clk_lucid_pll_configure(&video_pll1, regmap, &video_pll1_config);
 
-       /* Keep VIDEO_CC_AHB_CLK and VIDEO_CC_XO_CLK ALWAYS-ON */
-       regmap_update_bits(regmap, 0xe58, BIT(0), BIT(0));
-       regmap_update_bits(regmap, 0xeec, BIT(0), BIT(0));
+       /* Keep some clocks always-on */
+       qcom_branch_set_clk_en(regmap, 0xe58); /* VIDEO_CC_AHB_CLK */
+       qcom_branch_set_clk_en(regmap, 0xeec); /* VIDEO_CC_XO_CLK */
 
        ret = qcom_cc_really_probe(pdev, &video_cc_sm8250_desc, regmap);
 
@@ -402,17 +402,7 @@ static struct platform_driver video_cc_sm8250_driver = {
        },
 };
 
-static int __init video_cc_sm8250_init(void)
-{
-       return platform_driver_register(&video_cc_sm8250_driver);
-}
-subsys_initcall(video_cc_sm8250_init);
-
-static void __exit video_cc_sm8250_exit(void)
-{
-       platform_driver_unregister(&video_cc_sm8250_driver);
-}
-module_exit(video_cc_sm8250_exit);
+module_platform_driver(video_cc_sm8250_driver);
 
 MODULE_LICENSE("GPL v2");
 MODULE_DESCRIPTION("QTI VIDEOCC SM8250 Driver");
index 7246f3c994922ec34464b2745c29bb684939cd5e..f7aec28d4c872e5380580e23857aadd82ee95350 100644 (file)
@@ -488,10 +488,10 @@ static struct clk_regmap *video_cc_sm8350_clocks[] = {
 static const struct qcom_reset_map video_cc_sm8350_resets[] = {
        [VIDEO_CC_CVP_INTERFACE_BCR] = { 0xe54 },
        [VIDEO_CC_CVP_MVS0_BCR] = { 0xd14 },
-       [VIDEO_CC_MVS0C_CLK_ARES] = { 0xc34, 2 },
+       [VIDEO_CC_MVS0C_CLK_ARES] = { .reg = 0xc34, .bit = 2, .udelay = 400 },
        [VIDEO_CC_CVP_MVS0C_BCR] = { 0xbf4 },
        [VIDEO_CC_CVP_MVS1_BCR] = { 0xd94 },
-       [VIDEO_CC_MVS1C_CLK_ARES] = { 0xcd4, 2 },
+       [VIDEO_CC_MVS1C_CLK_ARES] = { .reg = 0xcd4, .bit = 2, .udelay = 400 },
        [VIDEO_CC_CVP_MVS1C_BCR] = { 0xc94 },
 };
 
@@ -558,13 +558,9 @@ static int video_cc_sm8350_probe(struct platform_device *pdev)
        clk_lucid_pll_configure(&video_pll0, regmap, &video_pll0_config);
        clk_lucid_pll_configure(&video_pll1, regmap, &video_pll1_config);
 
-       /*
-        * Keep clocks always enabled:
-        *      video_cc_ahb_clk
-        *      video_cc_xo_clk
-        */
-       regmap_update_bits(regmap, 0xe58, BIT(0), BIT(0));
-       regmap_update_bits(regmap, video_cc_xo_clk_cbcr, BIT(0), BIT(0));
+       /* Keep some clocks always-on */
+       qcom_branch_set_clk_en(regmap, 0xe58); /* VIDEO_CC_AHB_CLK */
+       qcom_branch_set_clk_en(regmap, video_cc_xo_clk_cbcr); /* VIDEO_CC_XO_CLK */
 
        ret = qcom_cc_really_probe(pdev, &video_cc_sm8350_desc, regmap);
        pm_runtime_put(&pdev->dev);
index 16a61146e61957218f5079e3a7565bca300322e0..67df40f16423152e8c7e9544b43a4f01e7d03aaf 100644 (file)
@@ -373,8 +373,8 @@ static const struct qcom_reset_map video_cc_sm8450_resets[] = {
        [CVP_VIDEO_CC_MVS0C_BCR] = { 0x8048 },
        [CVP_VIDEO_CC_MVS1_BCR] = { 0x80bc },
        [CVP_VIDEO_CC_MVS1C_BCR] = { 0x8070 },
-       [VIDEO_CC_MVS0C_CLK_ARES] = { 0x8064, 2 },
-       [VIDEO_CC_MVS1C_CLK_ARES] = { 0x808c, 2 },
+       [VIDEO_CC_MVS0C_CLK_ARES] = { .reg = 0x8064, .bit = 2, .udelay = 1000 },
+       [VIDEO_CC_MVS1C_CLK_ARES] = { .reg = 0x808c, .bit = 2, .udelay = 1000 },
 };
 
 static const struct regmap_config video_cc_sm8450_regmap_config = {
@@ -423,15 +423,10 @@ static int video_cc_sm8450_probe(struct platform_device *pdev)
        clk_lucid_evo_pll_configure(&video_cc_pll0, regmap, &video_cc_pll0_config);
        clk_lucid_evo_pll_configure(&video_cc_pll1, regmap, &video_cc_pll1_config);
 
-       /*
-        * Keep clocks always enabled:
-        *      video_cc_ahb_clk
-        *      video_cc_sleep_clk
-        *      video_cc_xo_clk
-        */
-       regmap_update_bits(regmap, 0x80e4, BIT(0), BIT(0));
-       regmap_update_bits(regmap, 0x8130, BIT(0), BIT(0));
-       regmap_update_bits(regmap, 0x8114, BIT(0), BIT(0));
+       /* Keep some clocks always-on */
+       qcom_branch_set_clk_en(regmap, 0x80e4); /* VIDEO_CC_AHB_CLK */
+       qcom_branch_set_clk_en(regmap, 0x8130); /* VIDEO_CC_SLEEP_CLK */
+       qcom_branch_set_clk_en(regmap, 0x8114); /* VIDEO_CC_XO_CLK */
 
        ret = qcom_cc_really_probe(pdev, &video_cc_sm8450_desc, regmap);
 
@@ -448,17 +443,7 @@ static struct platform_driver video_cc_sm8450_driver = {
        },
 };
 
-static int __init video_cc_sm8450_init(void)
-{
-       return platform_driver_register(&video_cc_sm8450_driver);
-}
-subsys_initcall(video_cc_sm8450_init);
-
-static void __exit video_cc_sm8450_exit(void)
-{
-       platform_driver_unregister(&video_cc_sm8450_driver);
-}
-module_exit(video_cc_sm8450_exit);
+module_platform_driver(video_cc_sm8450_driver);
 
 MODULE_DESCRIPTION("QTI VIDEOCC SM8450 Driver");
 MODULE_LICENSE("GPL");
index f3c9dfaee968fb73ea1a5366c95f24c51a212a2c..d73f747d2474009b7afb67e876d92d3a71529ce1 100644 (file)
@@ -378,8 +378,8 @@ static const struct qcom_reset_map video_cc_sm8550_resets[] = {
        [CVP_VIDEO_CC_MVS0C_BCR] = { 0x8048 },
        [CVP_VIDEO_CC_MVS1_BCR] = { 0x80c8 },
        [CVP_VIDEO_CC_MVS1C_BCR] = { 0x8074 },
-       [VIDEO_CC_MVS0C_CLK_ARES] = { 0x8064, 2 },
-       [VIDEO_CC_MVS1C_CLK_ARES] = { 0x8090, 2 },
+       [VIDEO_CC_MVS0C_CLK_ARES] = { .reg = 0x8064, .bit = 2, .udelay = 1000 },
+       [VIDEO_CC_MVS1C_CLK_ARES] = { .reg = 0x8090, .bit = 2, .udelay = 1000 },
 };
 
 static const struct regmap_config video_cc_sm8550_regmap_config = {
@@ -428,15 +428,10 @@ static int video_cc_sm8550_probe(struct platform_device *pdev)
        clk_lucid_ole_pll_configure(&video_cc_pll0, regmap, &video_cc_pll0_config);
        clk_lucid_ole_pll_configure(&video_cc_pll1, regmap, &video_cc_pll1_config);
 
-       /*
-        * Keep clocks always enabled:
-        *      video_cc_ahb_clk
-        *      video_cc_sleep_clk
-        *      video_cc_xo_clk
-        */
-       regmap_update_bits(regmap, 0x80f4, BIT(0), BIT(0));
-       regmap_update_bits(regmap, 0x8140, BIT(0), BIT(0));
-       regmap_update_bits(regmap, 0x8124, BIT(0), BIT(0));
+       /* Keep some clocks always-on */
+       qcom_branch_set_clk_en(regmap, 0x80f4); /* VIDEO_CC_AHB_CLK */
+       qcom_branch_set_clk_en(regmap, 0x8140); /* VIDEO_CC_SLEEP_CLK */
+       qcom_branch_set_clk_en(regmap, 0x8124); /* VIDEO_CC_XO_CLK */
 
        ret = qcom_cc_really_probe(pdev, &video_cc_sm8550_desc, regmap);
 
@@ -453,17 +448,7 @@ static struct platform_driver video_cc_sm8550_driver = {
        },
 };
 
-static int __init video_cc_sm8550_init(void)
-{
-       return platform_driver_register(&video_cc_sm8550_driver);
-}
-subsys_initcall(video_cc_sm8550_init);
-
-static void __exit video_cc_sm8550_exit(void)
-{
-       platform_driver_unregister(&video_cc_sm8550_driver);
-}
-module_exit(video_cc_sm8550_exit);
+module_platform_driver(video_cc_sm8550_driver);
 
 MODULE_DESCRIPTION("QTI VIDEOCC SM8550 Driver");
 MODULE_LICENSE("GPL");
index 69396e19795903e8abb3a59e51c7ad6383bd6a64..d252150402e86366396fabe96524d1a766d6a283 100644 (file)
@@ -33,6 +33,7 @@ config CLK_RENESAS
        select CLK_R8A779A0 if ARCH_R8A779A0
        select CLK_R8A779F0 if ARCH_R8A779F0
        select CLK_R8A779G0 if ARCH_R8A779G0
+       select CLK_R8A779H0 if ARCH_R8A779H0
        select CLK_R9A06G032 if ARCH_R9A06G032
        select CLK_R9A07G043 if ARCH_R9A07G043
        select CLK_R9A07G044 if ARCH_R9A07G044
@@ -165,6 +166,10 @@ config CLK_R8A779G0
        bool "R-Car V4H clock support" if COMPILE_TEST
        select CLK_RCAR_GEN4_CPG
 
+config CLK_R8A779H0
+       bool "R-Car V4M clock support" if COMPILE_TEST
+       select CLK_RCAR_GEN4_CPG
+
 config CLK_R9A06G032
        bool "RZ/N1D clock support" if COMPILE_TEST
 
index 879a07d445f9056a2776c7c7865a7656f379bc97..f7e18679c3b81b97a4f30487634c20ae5d2a6435 100644 (file)
@@ -30,6 +30,7 @@ obj-$(CONFIG_CLK_R8A77995)            += r8a77995-cpg-mssr.o
 obj-$(CONFIG_CLK_R8A779A0)             += r8a779a0-cpg-mssr.o
 obj-$(CONFIG_CLK_R8A779F0)             += r8a779f0-cpg-mssr.o
 obj-$(CONFIG_CLK_R8A779G0)             += r8a779g0-cpg-mssr.o
+obj-$(CONFIG_CLK_R8A779H0)             += r8a779h0-cpg-mssr.o
 obj-$(CONFIG_CLK_R9A06G032)            += r9a06g032-clocks.o
 obj-$(CONFIG_CLK_R9A07G043)            += r9a07g043-cpg.o
 obj-$(CONFIG_CLK_R9A07G044)            += r9a07g044-cpg.o
index 6280f4dfed714e6914dea8bc5dd63b55ee3432b8..5304c977562fc6850ba8046ea8c027eaa5ff55ef 100644 (file)
@@ -10,7 +10,6 @@
 
 #include <linux/clk.h>
 #include <linux/clk-provider.h>
-#include <linux/clkdev.h>
 #include <linux/clk/renesas.h>
 #include <linux/device.h>
 #include <linux/io.h>
@@ -19,6 +18,7 @@
 #include <linux/of_address.h>
 #include <linux/pm_clock.h>
 #include <linux/pm_domain.h>
+#include <linux/slab.h>
 #include <linux/spinlock.h>
 
 /*
@@ -237,22 +237,12 @@ static void __init cpg_mstp_clocks_init(struct device_node *np)
 
                clks[clkidx] = cpg_mstp_clock_register(name, parent_name,
                                                       clkidx, group);
-               if (!IS_ERR(clks[clkidx])) {
+               if (!IS_ERR(clks[clkidx]))
                        group->data.clk_num = max(group->data.clk_num,
                                                  clkidx + 1);
-                       /*
-                        * Register a clkdev to let board code retrieve the
-                        * clock by name and register aliases for non-DT
-                        * devices.
-                        *
-                        * FIXME: Remove this when all devices that require a
-                        * clock will be instantiated from DT.
-                        */
-                       clk_register_clkdev(clks[clkidx], name, NULL);
-               } else {
+               else
                        pr_err("%s: failed to register %pOFn %s clock (%ld)\n",
                               __func__, np, name, PTR_ERR(clks[clkidx]));
-               }
        }
 
        of_clk_add_provider(np, of_clk_src_onecell_get, &group->data);
index f721835c7e21248b64d096e1124f5ded814bb33f..cc06127406ab57375696e702045dd80589460fd0 100644 (file)
@@ -161,7 +161,7 @@ static const struct mssr_mod_clk r8a779f0_mod_clks[] __initconst = {
        DEF_MOD("cmt1",         911,    R8A779F0_CLK_R),
        DEF_MOD("cmt2",         912,    R8A779F0_CLK_R),
        DEF_MOD("cmt3",         913,    R8A779F0_CLK_R),
-       DEF_MOD("pfc0",         915,    R8A779F0_CLK_CL16M),
+       DEF_MOD("pfc0",         915,    R8A779F0_CLK_CPEX),
        DEF_MOD("tsc",          919,    R8A779F0_CLK_CL16M),
        DEF_MOD("rswitch2",     1505,   R8A779F0_CLK_RSW2),
        DEF_MOD("ether-serdes", 1506,   R8A779F0_CLK_S0D2_HSC),
index 5974adcef3eda1947dfd6476b32b5c70b82e9162..c4b1938db76b35f4932e2b04103f0773be151909 100644 (file)
@@ -22,7 +22,7 @@
 
 enum clk_ids {
        /* Core Clock Outputs exported to DT */
-       LAST_DT_CORE_CLK = R8A779G0_CLK_R,
+       LAST_DT_CORE_CLK = R8A779G0_CLK_CP,
 
        /* External Input Clocks */
        CLK_EXTAL,
@@ -141,6 +141,7 @@ static const struct cpg_core_clk r8a779g0_core_clks[] __initconst = {
        DEF_FIXED("svd2_vip",   R8A779G0_CLK_SVD2_VIP,  CLK_SV_VIP,     2, 1),
        DEF_FIXED("cbfusa",     R8A779G0_CLK_CBFUSA,    CLK_EXTAL,      2, 1),
        DEF_FIXED("cpex",       R8A779G0_CLK_CPEX,      CLK_EXTAL,      2, 1),
+       DEF_FIXED("cp",         R8A779G0_CLK_CP,        CLK_EXTAL,      2, 1),
        DEF_FIXED("viobus",     R8A779G0_CLK_VIOBUS,    CLK_VIO,        1, 1),
        DEF_FIXED("viobusd2",   R8A779G0_CLK_VIOBUSD2,  CLK_VIO,        2, 1),
        DEF_FIXED("vcbus",      R8A779G0_CLK_VCBUS,     CLK_VC,         1, 1),
@@ -193,7 +194,7 @@ static const struct mssr_mod_clk r8a779g0_mod_clks[] __initconst = {
        DEF_MOD("msi4",         622,    R8A779G0_CLK_MSO),
        DEF_MOD("msi5",         623,    R8A779G0_CLK_MSO),
        DEF_MOD("pciec0",       624,    R8A779G0_CLK_S0D2_HSC),
-       DEF_MOD("pscie1",       625,    R8A779G0_CLK_S0D2_HSC),
+       DEF_MOD("pciec1",       625,    R8A779G0_CLK_S0D2_HSC),
        DEF_MOD("pwm",          628,    R8A779G0_CLK_SASYNCPERD4),
        DEF_MOD("rpc-if",       629,    R8A779G0_CLK_RPCD2),
        DEF_MOD("scif0",        702,    R8A779G0_CLK_SASYNCPERD4),
@@ -232,10 +233,10 @@ static const struct mssr_mod_clk r8a779g0_mod_clks[] __initconst = {
        DEF_MOD("cmt1",         911,    R8A779G0_CLK_R),
        DEF_MOD("cmt2",         912,    R8A779G0_CLK_R),
        DEF_MOD("cmt3",         913,    R8A779G0_CLK_R),
-       DEF_MOD("pfc0",         915,    R8A779G0_CLK_CL16M),
-       DEF_MOD("pfc1",         916,    R8A779G0_CLK_CL16M),
-       DEF_MOD("pfc2",         917,    R8A779G0_CLK_CL16M),
-       DEF_MOD("pfc3",         918,    R8A779G0_CLK_CL16M),
+       DEF_MOD("pfc0",         915,    R8A779G0_CLK_CP),
+       DEF_MOD("pfc1",         916,    R8A779G0_CLK_CP),
+       DEF_MOD("pfc2",         917,    R8A779G0_CLK_CP),
+       DEF_MOD("pfc3",         918,    R8A779G0_CLK_CP),
        DEF_MOD("tsc",          919,    R8A779G0_CLK_CL16M),
        DEF_MOD("tsn",          2723,   R8A779G0_CLK_S0D4_HSC),
        DEF_MOD("ssiu",         2926,   R8A779G0_CLK_S0D6_PER),
diff --git a/drivers/clk/renesas/r8a779h0-cpg-mssr.c b/drivers/clk/renesas/r8a779h0-cpg-mssr.c
new file mode 100644 (file)
index 0000000..71f67a1
--- /dev/null
@@ -0,0 +1,256 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * r8a779h0 Clock Pulse Generator / Module Standby and Software Reset
+ *
+ * Copyright (C) 2023 Renesas Electronics Corp.
+ *
+ * Based on r8a779g0-cpg-mssr.c
+ */
+
+#include <linux/bitfield.h>
+#include <linux/clk.h>
+#include <linux/clk-provider.h>
+#include <linux/device.h>
+#include <linux/err.h>
+#include <linux/kernel.h>
+#include <linux/soc/renesas/rcar-rst.h>
+
+#include <dt-bindings/clock/renesas,r8a779h0-cpg-mssr.h>
+
+#include "renesas-cpg-mssr.h"
+#include "rcar-gen4-cpg.h"
+
+enum clk_ids {
+       /* Core Clock Outputs exported to DT */
+       LAST_DT_CORE_CLK = R8A779H0_CLK_R,
+
+       /* External Input Clocks */
+       CLK_EXTAL,
+       CLK_EXTALR,
+
+       /* Internal Core Clocks */
+       CLK_MAIN,
+       CLK_PLL1,
+       CLK_PLL2,
+       CLK_PLL3,
+       CLK_PLL4,
+       CLK_PLL5,
+       CLK_PLL6,
+       CLK_PLL1_DIV2,
+       CLK_PLL2_DIV2,
+       CLK_PLL3_DIV2,
+       CLK_PLL4_DIV2,
+       CLK_PLL4_DIV5,
+       CLK_PLL5_DIV2,
+       CLK_PLL5_DIV4,
+       CLK_PLL6_DIV2,
+       CLK_S0,
+       CLK_S0_VIO,
+       CLK_S0_VC,
+       CLK_S0_HSC,
+       CLK_SASYNCPER,
+       CLK_SV_VIP,
+       CLK_SV_IR,
+       CLK_IMPASRC,
+       CLK_IMPBSRC,
+       CLK_VIOSRC,
+       CLK_VCSRC,
+       CLK_SDSRC,
+       CLK_RPCSRC,
+       CLK_OCO,
+
+       /* Module Clocks */
+       MOD_CLK_BASE
+};
+
+static const struct cpg_core_clk r8a779h0_core_clks[] = {
+       /* External Clock Inputs */
+       DEF_INPUT("extal",      CLK_EXTAL),
+       DEF_INPUT("extalr",     CLK_EXTALR),
+
+       /* Internal Core Clocks */
+       DEF_BASE(".main", CLK_MAIN,     CLK_TYPE_GEN4_MAIN,     CLK_EXTAL),
+       DEF_BASE(".pll1", CLK_PLL1,     CLK_TYPE_GEN4_PLL1,     CLK_MAIN),
+       DEF_BASE(".pll2", CLK_PLL2,     CLK_TYPE_GEN4_PLL2,     CLK_MAIN),
+       DEF_BASE(".pll3", CLK_PLL3,     CLK_TYPE_GEN4_PLL3,     CLK_MAIN),
+       DEF_BASE(".pll4", CLK_PLL4,     CLK_TYPE_GEN4_PLL4,     CLK_MAIN),
+       DEF_BASE(".pll5", CLK_PLL5,     CLK_TYPE_GEN4_PLL5,     CLK_MAIN),
+       DEF_BASE(".pll6", CLK_PLL6,     CLK_TYPE_GEN4_PLL6,     CLK_MAIN),
+
+       DEF_FIXED(".pll1_div2", CLK_PLL1_DIV2,  CLK_PLL1,       2, 1),
+       DEF_FIXED(".pll2_div2", CLK_PLL2_DIV2,  CLK_PLL2,       2, 1),
+       DEF_FIXED(".pll3_div2", CLK_PLL3_DIV2,  CLK_PLL3,       2, 1),
+       DEF_FIXED(".pll4_div2", CLK_PLL4_DIV2,  CLK_PLL4,       2, 1),
+       DEF_FIXED(".pll4_div5", CLK_PLL4_DIV5,  CLK_PLL4,       5, 1),
+       DEF_FIXED(".pll5_div2", CLK_PLL5_DIV2,  CLK_PLL5,       2, 1),
+       DEF_FIXED(".pll5_div4", CLK_PLL5_DIV4,  CLK_PLL5_DIV2,  2, 1),
+       DEF_FIXED(".pll6_div2", CLK_PLL6_DIV2,  CLK_PLL6,       2, 1),
+       DEF_FIXED(".s0",        CLK_S0,         CLK_PLL1_DIV2,  2, 1),
+       DEF_FIXED(".s0_vio",    CLK_S0_VIO,     CLK_PLL1_DIV2,  2, 1),
+       DEF_FIXED(".s0_vc",     CLK_S0_VC,      CLK_PLL1_DIV2,  2, 1),
+       DEF_FIXED(".s0_hsc",    CLK_S0_HSC,     CLK_PLL1_DIV2,  2, 1),
+       DEF_FIXED(".sasyncper", CLK_SASYNCPER,  CLK_PLL5_DIV4,  3, 1),
+       DEF_FIXED(".sv_vip",    CLK_SV_VIP,     CLK_PLL1,       5, 1),
+       DEF_FIXED(".sv_ir",     CLK_SV_IR,      CLK_PLL1,       5, 1),
+       DEF_FIXED(".impasrc",   CLK_IMPASRC,    CLK_PLL1_DIV2,  2, 1),
+       DEF_FIXED(".impbsrc",   CLK_IMPBSRC,    CLK_PLL1,       4, 1),
+       DEF_FIXED(".viosrc",    CLK_VIOSRC,     CLK_PLL1,       6, 1),
+       DEF_FIXED(".vcsrc",     CLK_VCSRC,      CLK_PLL1,       6, 1),
+       DEF_BASE(".sdsrc",      CLK_SDSRC,      CLK_TYPE_GEN4_SDSRC, CLK_PLL5),
+       DEF_BASE(".rpcsrc",     CLK_RPCSRC,     CLK_TYPE_GEN4_RPCSRC, CLK_PLL5),
+       DEF_RATE(".oco",        CLK_OCO,        32768),
+
+       /* Core Clock Outputs */
+       DEF_GEN4_Z("zc0",       R8A779H0_CLK_ZC0,       CLK_TYPE_GEN4_Z,        CLK_PLL2_DIV2,  2, 0),
+       DEF_GEN4_Z("zc1",       R8A779H0_CLK_ZC1,       CLK_TYPE_GEN4_Z,        CLK_PLL2_DIV2,  2, 8),
+       DEF_GEN4_Z("zc2",       R8A779H0_CLK_ZC2,       CLK_TYPE_GEN4_Z,        CLK_PLL2_DIV2,  2, 32),
+       DEF_GEN4_Z("zc3",       R8A779H0_CLK_ZC3,       CLK_TYPE_GEN4_Z,        CLK_PLL2_DIV2,  2, 40),
+       DEF_FIXED("s0d2",       R8A779H0_CLK_S0D2,      CLK_S0,         2, 1),
+       DEF_FIXED("s0d3",       R8A779H0_CLK_S0D3,      CLK_S0,         3, 1),
+       DEF_FIXED("s0d4",       R8A779H0_CLK_S0D4,      CLK_S0,         4, 1),
+       DEF_FIXED("cl16m",      R8A779H0_CLK_CL16M,     CLK_S0,         48, 1),
+       DEF_FIXED("s0d2_rt",    R8A779H0_CLK_S0D2_RT,   CLK_S0,         2, 1),
+       DEF_FIXED("s0d3_rt",    R8A779H0_CLK_S0D3_RT,   CLK_S0,         3, 1),
+       DEF_FIXED("s0d4_rt",    R8A779H0_CLK_S0D4_RT,   CLK_S0,         4, 1),
+       DEF_FIXED("s0d6_rt",    R8A779H0_CLK_S0D6_RT,   CLK_S0,         6, 1),
+       DEF_FIXED("cl16m_rt",   R8A779H0_CLK_CL16M_RT,  CLK_S0,         48, 1),
+       DEF_FIXED("s0d2_per",   R8A779H0_CLK_S0D2_PER,  CLK_S0,         2, 1),
+       DEF_FIXED("s0d3_per",   R8A779H0_CLK_S0D3_PER,  CLK_S0,         3, 1),
+       DEF_FIXED("s0d4_per",   R8A779H0_CLK_S0D4_PER,  CLK_S0,         4, 1),
+       DEF_FIXED("s0d6_per",   R8A779H0_CLK_S0D6_PER,  CLK_S0,         6, 1),
+       DEF_FIXED("s0d12_per",  R8A779H0_CLK_S0D12_PER, CLK_S0,         12, 1),
+       DEF_FIXED("s0d24_per",  R8A779H0_CLK_S0D24_PER, CLK_S0,         24, 1),
+       DEF_FIXED("cl16m_per",  R8A779H0_CLK_CL16M_PER, CLK_S0,         48, 1),
+       DEF_FIXED("s0d2_mm",    R8A779H0_CLK_S0D2_MM,   CLK_S0,         2, 1),
+       DEF_FIXED("s0d4_mm",    R8A779H0_CLK_S0D4_MM,   CLK_S0,         4, 1),
+       DEF_FIXED("cl16m_mm",   R8A779H0_CLK_CL16M_MM,  CLK_S0,         48, 1),
+       DEF_FIXED("s0d2_u3dg",  R8A779H0_CLK_S0D2_U3DG, CLK_S0,         2, 1),
+       DEF_FIXED("s0d4_u3dg",  R8A779H0_CLK_S0D4_U3DG, CLK_S0,         4, 1),
+       DEF_FIXED("s0d1_vio",   R8A779H0_CLK_S0D1_VIO,  CLK_S0_VIO,     1, 1),
+       DEF_FIXED("s0d2_vio",   R8A779H0_CLK_S0D2_VIO,  CLK_S0_VIO,     2, 1),
+       DEF_FIXED("s0d4_vio",   R8A779H0_CLK_S0D4_VIO,  CLK_S0_VIO,     4, 1),
+       DEF_FIXED("s0d8_vio",   R8A779H0_CLK_S0D8_VIO,  CLK_S0_VIO,     8, 1),
+       DEF_FIXED("s0d1_vc",    R8A779H0_CLK_S0D1_VC,   CLK_S0_VC,      1, 1),
+       DEF_FIXED("s0d2_vc",    R8A779H0_CLK_S0D2_VC,   CLK_S0_VC,      2, 1),
+       DEF_FIXED("s0d4_vc",    R8A779H0_CLK_S0D4_VC,   CLK_S0_VC,      4, 1),
+       DEF_FIXED("s0d1_hsc",   R8A779H0_CLK_S0D1_HSC,  CLK_S0_HSC,     1, 1),
+       DEF_FIXED("s0d2_hsc",   R8A779H0_CLK_S0D2_HSC,  CLK_S0_HSC,     2, 1),
+       DEF_FIXED("s0d4_hsc",   R8A779H0_CLK_S0D4_HSC,  CLK_S0_HSC,     4, 1),
+       DEF_FIXED("s0d8_hsc",   R8A779H0_CLK_S0D8_HSC,  CLK_S0_HSC,     8, 1),
+       DEF_FIXED("cl16m_hsc",  R8A779H0_CLK_CL16M_HSC, CLK_S0_HSC,     48, 1),
+       DEF_FIXED("sasyncrt",   R8A779H0_CLK_SASYNCRT,  CLK_PLL5_DIV4,  48, 1),
+       DEF_FIXED("sasyncperd1", R8A779H0_CLK_SASYNCPERD1, CLK_SASYNCPER, 1, 1),
+       DEF_FIXED("sasyncperd2", R8A779H0_CLK_SASYNCPERD2, CLK_SASYNCPER, 2, 1),
+       DEF_FIXED("sasyncperd4", R8A779H0_CLK_SASYNCPERD4, CLK_SASYNCPER, 4, 1),
+       DEF_FIXED("svd1_vip",   R8A779H0_CLK_SVD1_VIP,  CLK_SV_VIP,     1, 1),
+       DEF_FIXED("svd2_vip",   R8A779H0_CLK_SVD2_VIP,  CLK_SV_VIP,     2, 1),
+       DEF_FIXED("svd1_ir",    R8A779H0_CLK_SVD1_IR,   CLK_SV_IR,      1, 1),
+       DEF_FIXED("svd2_ir",    R8A779H0_CLK_SVD2_IR,   CLK_SV_IR,      2, 1),
+       DEF_FIXED("cbfusa",     R8A779H0_CLK_CBFUSA,    CLK_EXTAL,      2, 1),
+       DEF_FIXED("cpex",       R8A779H0_CLK_CPEX,      CLK_EXTAL,      2, 1),
+       DEF_FIXED("cp",         R8A779H0_CLK_CP,        CLK_EXTAL,      2, 1),
+       DEF_FIXED("impad1",     R8A779H0_CLK_IMPAD1,    CLK_IMPASRC,    1, 1),
+       DEF_FIXED("impad4",     R8A779H0_CLK_IMPAD4,    CLK_IMPASRC,    4, 1),
+       DEF_FIXED("impb",       R8A779H0_CLK_IMPB,      CLK_IMPBSRC,    1, 1),
+       DEF_FIXED("viobusd1",   R8A779H0_CLK_VIOBUSD1,  CLK_VIOSRC,     1, 1),
+       DEF_FIXED("viobusd2",   R8A779H0_CLK_VIOBUSD2,  CLK_VIOSRC,     2, 1),
+       DEF_FIXED("vcbusd1",    R8A779H0_CLK_VCBUSD1,   CLK_VCSRC,      1, 1),
+       DEF_FIXED("vcbusd2",    R8A779H0_CLK_VCBUSD2,   CLK_VCSRC,      2, 1),
+       DEF_DIV6P1("canfd",     R8A779H0_CLK_CANFD,     CLK_PLL5_DIV4,  0x878),
+       DEF_DIV6P1("csi",       R8A779H0_CLK_CSI,       CLK_PLL5_DIV4,  0x880),
+       DEF_FIXED("dsiref",     R8A779H0_CLK_DSIREF,    CLK_PLL5_DIV4,  48, 1),
+       DEF_DIV6P1("dsiext",    R8A779H0_CLK_DSIEXT,    CLK_PLL5_DIV4,  0x884),
+       DEF_DIV6P1("mso",       R8A779H0_CLK_MSO,       CLK_PLL5_DIV4,  0x87c),
+
+       DEF_GEN4_SDH("sd0h",    R8A779H0_CLK_SD0H,      CLK_SDSRC,         0x870),
+       DEF_GEN4_SD("sd0",      R8A779H0_CLK_SD0,       R8A779H0_CLK_SD0H, 0x870),
+
+       DEF_BASE("rpc",         R8A779H0_CLK_RPC,       CLK_TYPE_GEN4_RPC, CLK_RPCSRC),
+       DEF_BASE("rpcd2",       R8A779H0_CLK_RPCD2,     CLK_TYPE_GEN4_RPCD2, R8A779H0_CLK_RPC),
+
+       DEF_GEN4_OSC("osc",     R8A779H0_CLK_OSC,       CLK_EXTAL,      8),
+       DEF_GEN4_MDSEL("r",     R8A779H0_CLK_R, 29, CLK_EXTALR, 1, CLK_OCO, 1),
+};
+
+static const struct mssr_mod_clk r8a779h0_mod_clks[] = {
+       DEF_MOD("avb0:rgmii0",  211,    R8A779H0_CLK_S0D8_HSC),
+       DEF_MOD("avb1:rgmii1",  212,    R8A779H0_CLK_S0D8_HSC),
+       DEF_MOD("avb2:rgmii2",  213,    R8A779H0_CLK_S0D8_HSC),
+       DEF_MOD("hscif0",       514,    R8A779H0_CLK_SASYNCPERD1),
+       DEF_MOD("hscif1",       515,    R8A779H0_CLK_SASYNCPERD1),
+       DEF_MOD("hscif2",       516,    R8A779H0_CLK_SASYNCPERD1),
+       DEF_MOD("hscif3",       517,    R8A779H0_CLK_SASYNCPERD1),
+       DEF_MOD("i2c0",         518,    R8A779H0_CLK_S0D6_PER),
+       DEF_MOD("i2c1",         519,    R8A779H0_CLK_S0D6_PER),
+       DEF_MOD("i2c2",         520,    R8A779H0_CLK_S0D6_PER),
+       DEF_MOD("i2c3",         521,    R8A779H0_CLK_S0D6_PER),
+       DEF_MOD("rpc-if",       629,    R8A779H0_CLK_RPCD2),
+       DEF_MOD("sdhi0",        706,    R8A779H0_CLK_SD0),
+       DEF_MOD("sydm1",        709,    R8A779H0_CLK_S0D6_PER),
+       DEF_MOD("sydm2",        710,    R8A779H0_CLK_S0D6_PER),
+       DEF_MOD("wdt1:wdt0",    907,    R8A779H0_CLK_R),
+       DEF_MOD("pfc0",         915,    R8A779H0_CLK_CP),
+       DEF_MOD("pfc1",         916,    R8A779H0_CLK_CP),
+       DEF_MOD("pfc2",         917,    R8A779H0_CLK_CP),
+};
+
+/*
+ * CPG Clock Data
+ */
+/*
+ *   MD         EXTAL          PLL1    PLL2    PLL3    PLL4    PLL5    PLL6    OSC
+ * 14 13 (MHz)
+ * ------------------------------------------------------------------------
+ * 0  0         16.66 / 1      x192    x204    x192    x144    x192    x168    /16
+ * 0  1         20    / 1      x160    x170    x160    x120    x160    x140    /19
+ * 1  0         Prohibited setting
+ * 1  1         33.33 / 2      x192    x204    x192    x144    x192    x168    /32
+ */
+#define CPG_PLL_CONFIG_INDEX(md)       ((((md) & BIT(14)) >> 13) | \
+                                        (((md) & BIT(13)) >> 13))
+
+static const struct rcar_gen4_cpg_pll_config cpg_pll_configs[4] = {
+       /* EXTAL div    PLL1 mult/div   PLL2 mult/div   PLL3 mult/div   PLL4 mult/div   PLL5 mult/div   PLL6 mult/div   OSC prediv */
+       { 1,            192,    1,      240,    1,      192,    1,      240,    1,      192,    1,      168,    1,      16,     },
+       { 1,            160,    1,      200,    1,      160,    1,      200,    1,      160,    1,      140,    1,      19,     },
+       { 0,            0,      0,      0,      0,      0,      0,      0,      0,      0,      0,      0,      0,      0,      },
+       { 2,            192,    1,      240,    1,      192,    1,      240,    1,      192,    1,      168,    1,      32,     },
+};
+
+static int __init r8a779h0_cpg_mssr_init(struct device *dev)
+{
+       const struct rcar_gen4_cpg_pll_config *cpg_pll_config;
+       u32 cpg_mode;
+       int error;
+
+       error = rcar_rst_read_mode_pins(&cpg_mode);
+       if (error)
+               return error;
+
+       cpg_pll_config = &cpg_pll_configs[CPG_PLL_CONFIG_INDEX(cpg_mode)];
+       if (!cpg_pll_config->extal_div) {
+               dev_err(dev, "Prohibited setting (cpg_mode=0x%x)\n", cpg_mode);
+               return -EINVAL;
+       }
+
+       return rcar_gen4_cpg_init(cpg_pll_config, CLK_EXTALR, cpg_mode);
+}
+
+const struct cpg_mssr_info r8a779h0_cpg_mssr_info __initconst = {
+       /* Core Clocks */
+       .core_clks = r8a779h0_core_clks,
+       .num_core_clks = ARRAY_SIZE(r8a779h0_core_clks),
+       .last_dt_core_clk = LAST_DT_CORE_CLK,
+       .num_total_core_clks = MOD_CLK_BASE,
+
+       /* Module Clocks */
+       .mod_clks = r8a779h0_mod_clks,
+       .num_mod_clks = ARRAY_SIZE(r8a779h0_mod_clks),
+       .num_hw_mod_clks = 30 * 32,
+
+       /* Callbacks */
+       .init = r8a779h0_cpg_mssr_init,
+       .cpg_clk_register = rcar_gen4_cpg_clk_register,
+
+       .reg_layout = CLK_REG_LAYOUT_RCAR_GEN4,
+};
index b70bb378ab469bf6002c1358e1dc71f27a53076c..33532673d25d736b22d6c97c73d79a1bf5ee2d05 100644 (file)
@@ -48,6 +48,7 @@ enum clk_ids {
        CLK_SEL_PLL3_3,
        CLK_DIV_PLL3_C,
 #ifdef CONFIG_ARM64
+       CLK_M2_DIV2,
        CLK_PLL5,
        CLK_PLL5_500,
        CLK_PLL5_250,
@@ -87,7 +88,7 @@ static const struct clk_div_table dtable_1_32[] = {
 /* Mux clock tables */
 static const char * const sel_pll3_3[] = { ".pll3_533", ".pll3_400" };
 static const char * const sel_pll6_2[] = { ".pll6_250", ".pll5_250" };
-static const char * const sel_shdi[] = { ".clk_533", ".clk_400", ".clk_266" };
+static const char * const sel_sdhi[] = { ".clk_533", ".clk_400", ".clk_266" };
 
 static const u32 mtable_sdhi[] = { 1, 2, 3 };
 
@@ -136,12 +137,16 @@ static const struct cpg_core_clk r9a07g043_core_clks[] __initconst = {
        DEF_MUX("HP", R9A07G043_CLK_HP, SEL_PLL6_2, sel_pll6_2),
        DEF_FIXED("SPI0", R9A07G043_CLK_SPI0, CLK_DIV_PLL3_C, 1, 2),
        DEF_FIXED("SPI1", R9A07G043_CLK_SPI1, CLK_DIV_PLL3_C, 1, 4),
-       DEF_SD_MUX("SD0", R9A07G043_CLK_SD0, SEL_SDHI0, SEL_SDHI0_STS, sel_shdi,
+       DEF_SD_MUX("SD0", R9A07G043_CLK_SD0, SEL_SDHI0, SEL_SDHI0_STS, sel_sdhi,
                   mtable_sdhi, 0, rzg2l_cpg_sd_clk_mux_notifier),
-       DEF_SD_MUX("SD1", R9A07G043_CLK_SD1, SEL_SDHI1, SEL_SDHI0_STS, sel_shdi,
+       DEF_SD_MUX("SD1", R9A07G043_CLK_SD1, SEL_SDHI1, SEL_SDHI1_STS, sel_sdhi,
                   mtable_sdhi, 0, rzg2l_cpg_sd_clk_mux_notifier),
        DEF_FIXED("SD0_DIV4", CLK_SD0_DIV4, R9A07G043_CLK_SD0, 1, 4),
        DEF_FIXED("SD1_DIV4", CLK_SD1_DIV4, R9A07G043_CLK_SD1, 1, 4),
+#ifdef CONFIG_ARM64
+       DEF_FIXED("M2", R9A07G043_CLK_M2, CLK_PLL3_533, 1, 2),
+       DEF_FIXED("M2_DIV2", CLK_M2_DIV2, R9A07G043_CLK_M2, 1, 2),
+#endif
 };
 
 static struct rzg2l_mod_clk r9a07g043_mod_clks[] = {
@@ -195,6 +200,16 @@ static struct rzg2l_mod_clk r9a07g043_mod_clks[] = {
                                0x554, 6),
        DEF_MOD("sdhi1_aclk",   R9A07G043_SDHI1_ACLK, R9A07G043_CLK_P1,
                                0x554, 7),
+#ifdef CONFIG_ARM64
+       DEF_MOD("cru_sysclk",   R9A07G043_CRU_SYSCLK, CLK_M2_DIV2,
+                               0x564, 0),
+       DEF_MOD("cru_vclk",     R9A07G043_CRU_VCLK, R9A07G043_CLK_M2,
+                               0x564, 1),
+       DEF_MOD("cru_pclk",     R9A07G043_CRU_PCLK, R9A07G043_CLK_ZT,
+                               0x564, 2),
+       DEF_MOD("cru_aclk",     R9A07G043_CRU_ACLK, R9A07G043_CLK_M0,
+                               0x564, 3),
+#endif
        DEF_MOD("ssi0_pclk",    R9A07G043_SSI0_PCLK2, R9A07G043_CLK_P0,
                                0x570, 0),
        DEF_MOD("ssi0_sfr",     R9A07G043_SSI0_PCLK_SFR, R9A07G043_CLK_P0,
@@ -286,6 +301,11 @@ static struct rzg2l_reset r9a07g043_resets[] = {
        DEF_RST(R9A07G043_SPI_RST, 0x850, 0),
        DEF_RST(R9A07G043_SDHI0_IXRST, 0x854, 0),
        DEF_RST(R9A07G043_SDHI1_IXRST, 0x854, 1),
+#ifdef CONFIG_ARM64
+       DEF_RST(R9A07G043_CRU_CMN_RSTB, 0x864, 0),
+       DEF_RST(R9A07G043_CRU_PRESETN, 0x864, 1),
+       DEF_RST(R9A07G043_CRU_ARESETN, 0x864, 2),
+#endif
        DEF_RST(R9A07G043_SSI0_RST_M2_REG, 0x870, 0),
        DEF_RST(R9A07G043_SSI1_RST_M2_REG, 0x870, 1),
        DEF_RST(R9A07G043_SSI2_RST_M2_REG, 0x870, 2),
@@ -331,6 +351,13 @@ static const unsigned int r9a07g043_crit_mod_clks[] __initconst = {
        MOD_CLK_BASE + R9A07G043_DMAC_ACLK,
 };
 
+#ifdef CONFIG_ARM64
+static const unsigned int r9a07g043_no_pm_mod_clks[] = {
+       MOD_CLK_BASE + R9A07G043_CRU_SYSCLK,
+       MOD_CLK_BASE + R9A07G043_CRU_VCLK,
+};
+#endif
+
 const struct rzg2l_cpg_info r9a07g043_cpg_info = {
        /* Core Clocks */
        .core_clks = r9a07g043_core_clks,
@@ -347,6 +374,10 @@ const struct rzg2l_cpg_info r9a07g043_cpg_info = {
        .num_mod_clks = ARRAY_SIZE(r9a07g043_mod_clks),
 #ifdef CONFIG_ARM64
        .num_hw_mod_clks = R9A07G043_TSU_PCLK + 1,
+
+       /* No PM Module Clocks */
+       .no_pm_mod_clks = r9a07g043_no_pm_mod_clks,
+       .num_no_pm_mod_clks = ARRAY_SIZE(r9a07g043_no_pm_mod_clks),
 #endif
 #ifdef CONFIG_RISCV
        .num_hw_mod_clks = R9A07G043_IAX45_PCLK + 1,
index 1047278c9079a78b66a41b3fc8fa06bd896907a1..48404cafea3f51d3b01c86835b9cae8032c383fa 100644 (file)
@@ -106,7 +106,7 @@ static const struct clk_div_table dtable_16_128[] = {
 static const char * const sel_pll3_3[] = { ".pll3_533", ".pll3_400" };
 static const char * const sel_pll5_4[] = { ".pll5_foutpostdiv", ".pll5_fout1ph0" };
 static const char * const sel_pll6_2[] = { ".pll6_250", ".pll5_250" };
-static const char * const sel_shdi[] = { ".clk_533", ".clk_400", ".clk_266" };
+static const char * const sel_sdhi[] = { ".clk_533", ".clk_400", ".clk_266" };
 static const char * const sel_gpu2[] = { ".pll6", ".pll3_div2_2" };
 
 static const u32 mtable_sdhi[] = { 1, 2, 3 };
@@ -176,9 +176,9 @@ static const struct {
                DEF_MUX("HP", R9A07G044_CLK_HP, SEL_PLL6_2, sel_pll6_2),
                DEF_FIXED("SPI0", R9A07G044_CLK_SPI0, CLK_DIV_PLL3_C, 1, 2),
                DEF_FIXED("SPI1", R9A07G044_CLK_SPI1, CLK_DIV_PLL3_C, 1, 4),
-               DEF_SD_MUX("SD0", R9A07G044_CLK_SD0, SEL_SDHI0, SEL_SDHI0_STS, sel_shdi,
+               DEF_SD_MUX("SD0", R9A07G044_CLK_SD0, SEL_SDHI0, SEL_SDHI0_STS, sel_sdhi,
                           mtable_sdhi, 0, rzg2l_cpg_sd_clk_mux_notifier),
-               DEF_SD_MUX("SD1", R9A07G044_CLK_SD1, SEL_SDHI1, SEL_SDHI0_STS, sel_shdi,
+               DEF_SD_MUX("SD1", R9A07G044_CLK_SD1, SEL_SDHI1, SEL_SDHI1_STS, sel_sdhi,
                           mtable_sdhi, 0, rzg2l_cpg_sd_clk_mux_notifier),
                DEF_FIXED("SD0_DIV4", CLK_SD0_DIV4, R9A07G044_CLK_SD0, 1, 4),
                DEF_FIXED("SD1_DIV4", CLK_SD1_DIV4, R9A07G044_CLK_SD1, 1, 4),
index 2582ba95256eadc8656ea5dde9b79e24c425c90c..c3e6da2de197f495fb7b0f0a66d767d487e27f3c 100644 (file)
@@ -193,6 +193,8 @@ static const struct rzg2l_mod_clk r9a08g045_mod_clks[] = {
        DEF_MOD("ia55_pclk",            R9A08G045_IA55_PCLK, R9A08G045_CLK_P2, 0x518, 0),
        DEF_MOD("ia55_clk",             R9A08G045_IA55_CLK, R9A08G045_CLK_P1, 0x518, 1),
        DEF_MOD("dmac_aclk",            R9A08G045_DMAC_ACLK, R9A08G045_CLK_P3, 0x52c, 0),
+       DEF_MOD("wdt0_pclk",            R9A08G045_WDT0_PCLK, R9A08G045_CLK_P0, 0x548, 0),
+       DEF_MOD("wdt0_clk",             R9A08G045_WDT0_CLK, R9A08G045_OSCCLK, 0x548, 1),
        DEF_MOD("sdhi0_imclk",          R9A08G045_SDHI0_IMCLK, CLK_SD0_DIV4, 0x554, 0),
        DEF_MOD("sdhi0_imclk2",         R9A08G045_SDHI0_IMCLK2, CLK_SD0_DIV4, 0x554, 1),
        DEF_MOD("sdhi0_clk_hs",         R9A08G045_SDHI0_CLK_HS, R9A08G045_CLK_SD0, 0x554, 2),
@@ -219,6 +221,7 @@ static const struct rzg2l_reset r9a08g045_resets[] = {
        DEF_RST(R9A08G045_GIC600_GICRESET_N, 0x814, 0),
        DEF_RST(R9A08G045_GIC600_DBG_GICRESET_N, 0x814, 1),
        DEF_RST(R9A08G045_IA55_RESETN, 0x818, 0),
+       DEF_RST(R9A08G045_WDT0_PRESETN, 0x848, 0),
        DEF_RST(R9A08G045_SDHI0_IXRST, 0x854, 0),
        DEF_RST(R9A08G045_SDHI1_IXRST, 0x854, 1),
        DEF_RST(R9A08G045_SDHI2_IXRST, 0x854, 2),
index c68d8b987054131b1e92610ac704e5a4e7130f28..a2bbdad021ed8e952e69d1dfc2573bae6d25d098 100644 (file)
@@ -179,7 +179,8 @@ static struct clk * __init cpg_pll_clk_register(const char *name,
  */
 #define CPG_FRQCRB                     0x00000804
 #define CPG_FRQCRB_KICK                        BIT(31)
-#define CPG_FRQCRC                     0x00000808
+#define CPG_FRQCRC0                    0x00000808
+#define CPG_FRQCRC1                    0x000008e0
 
 struct cpg_z_clk {
        struct clk_hw hw;
@@ -304,7 +305,12 @@ static struct clk * __init cpg_z_clk_register(const char *name,
        init.parent_names = &parent_name;
        init.num_parents = 1;
 
-       zclk->reg = reg + CPG_FRQCRC;
+       if (offset < 32) {
+               zclk->reg = reg + CPG_FRQCRC0;
+       } else {
+               zclk->reg = reg + CPG_FRQCRC1;
+               offset -= 32;
+       }
        zclk->kick_reg = reg + CPG_FRQCRB;
        zclk->hw.init = &init;
        zclk->mask = GENMASK(offset + 4, offset);
index cb80d1bf6c7c6a828aa4f8bfd0ea6d8c967b694b..1b421b8097965b70837eac19a05a981b1caa3acb 100644 (file)
@@ -142,6 +142,8 @@ static const u16 srstclr_for_gen4[] = {
  * @reset_clear_regs:  Pointer to reset clearing registers array
  * @smstpcr_saved: [].mask: Mask of SMSTPCR[] bits under our control
  *                 [].val: Saved values of SMSTPCR[]
+ * @reserved_ids: Temporary used, reserved id list
+ * @num_reserved_ids: Temporary used, number of reserved id list
  * @clks: Array containing all Core and Module Clocks
  */
 struct cpg_mssr_priv {
@@ -168,6 +170,9 @@ struct cpg_mssr_priv {
                u32 val;
        } smstpcr_saved[ARRAY_SIZE(mstpsr_for_gen4)];
 
+       unsigned int *reserved_ids;
+       unsigned int num_reserved_ids;
+
        struct clk *clks[];
 };
 
@@ -453,6 +458,19 @@ static void __init cpg_mssr_register_mod_clk(const struct mssr_mod_clk *mod,
                        break;
                }
 
+       /*
+        * Ignore reserved device.
+        * see
+        *      cpg_mssr_reserved_init()
+        */
+       for (i = 0; i < priv->num_reserved_ids; i++) {
+               if (id == priv->reserved_ids[i]) {
+                       dev_info(dev, "Ignore Linux non-assigned mod (%s)\n", mod->name);
+                       init.flags |= CLK_IGNORE_UNUSED;
+                       break;
+               }
+       }
+
        clk = clk_register(NULL, &clock->hw);
        if (IS_ERR(clk))
                goto fail;
@@ -853,6 +871,12 @@ static const struct of_device_id cpg_mssr_match[] = {
                .compatible = "renesas,r8a779g0-cpg-mssr",
                .data = &r8a779g0_cpg_mssr_info,
        },
+#endif
+#ifdef CONFIG_CLK_R8A779H0
+       {
+               .compatible = "renesas,r8a779h0-cpg-mssr",
+               .data = &r8a779h0_cpg_mssr_info,
+       },
 #endif
        { /* sentinel */ }
 };
@@ -949,6 +973,78 @@ static const struct dev_pm_ops cpg_mssr_pm = {
 #define DEV_PM_OPS     NULL
 #endif /* CONFIG_PM_SLEEP && CONFIG_ARM_PSCI_FW */
 
+static void __init cpg_mssr_reserved_exit(struct cpg_mssr_priv *priv)
+{
+       kfree(priv->reserved_ids);
+}
+
+static int __init cpg_mssr_reserved_init(struct cpg_mssr_priv *priv,
+                                        const struct cpg_mssr_info *info)
+{
+       struct device_node *soc = of_find_node_by_path("/soc");
+       struct device_node *node;
+       uint32_t args[MAX_PHANDLE_ARGS];
+       unsigned int *ids = NULL;
+       unsigned int num = 0;
+
+       /*
+        * Because clk_disable_unused() will disable all unused clocks, the device which is assigned
+        * to a non-Linux system will be disabled when Linux is booted.
+        *
+        * To avoid such situation, renesas-cpg-mssr assumes the device which has
+        * status = "reserved" is assigned to a non-Linux system, and adds CLK_IGNORE_UNUSED flag
+        * to its CPG_MOD clocks.
+        * see also
+        *      cpg_mssr_register_mod_clk()
+        *
+        *      scif5: serial@e6f30000 {
+        *              ...
+        * =>           clocks = <&cpg CPG_MOD 202>,
+        *                       <&cpg CPG_CORE R8A7795_CLK_S3D1>,
+        *                       <&scif_clk>;
+        *                       ...
+        *               status = "reserved";
+        *      };
+        */
+       for_each_reserved_child_of_node(soc, node) {
+               struct of_phandle_iterator it;
+               int rc;
+
+               of_for_each_phandle(&it, rc, node, "clocks", "#clock-cells", -1) {
+                       int idx;
+
+                       if (it.node != priv->np)
+                               continue;
+
+                       if (of_phandle_iterator_args(&it, args, MAX_PHANDLE_ARGS) != 2)
+                               continue;
+
+                       if (args[0] != CPG_MOD)
+                               continue;
+
+                       ids = krealloc_array(ids, (num + 1), sizeof(*ids), GFP_KERNEL);
+                       if (!ids) {
+                               of_node_put(it.node);
+                               return -ENOMEM;
+                       }
+
+                       if (priv->reg_layout == CLK_REG_LAYOUT_RZ_A)
+                               idx = MOD_CLK_PACK_10(args[1]); /* for DEF_MOD_STB() */
+                       else
+                               idx = MOD_CLK_PACK(args[1]);    /* for DEF_MOD() */
+
+                       ids[num] = info->num_total_core_clks + idx;
+
+                       num++;
+               }
+       }
+
+       priv->num_reserved_ids  = num;
+       priv->reserved_ids      = ids;
+
+       return 0;
+}
+
 static int __init cpg_mssr_common_init(struct device *dev,
                                       struct device_node *np,
                                       const struct cpg_mssr_info *info)
@@ -1003,14 +1099,20 @@ static int __init cpg_mssr_common_init(struct device *dev,
        for (i = 0; i < nclks; i++)
                priv->clks[i] = ERR_PTR(-ENOENT);
 
-       error = of_clk_add_provider(np, cpg_mssr_clk_src_twocell_get, priv);
+       error = cpg_mssr_reserved_init(priv, info);
        if (error)
                goto out_err;
 
+       error = of_clk_add_provider(np, cpg_mssr_clk_src_twocell_get, priv);
+       if (error)
+               goto reserve_err;
+
        cpg_mssr_priv = priv;
 
        return 0;
 
+reserve_err:
+       cpg_mssr_reserved_exit(priv);
 out_err:
        if (priv->base)
                iounmap(priv->base);
@@ -1070,22 +1172,23 @@ static int __init cpg_mssr_probe(struct platform_device *pdev)
                                         cpg_mssr_del_clk_provider,
                                         np);
        if (error)
-               return error;
+               goto reserve_exit;
 
        error = cpg_mssr_add_clk_domain(dev, info->core_pm_clks,
                                        info->num_core_pm_clks);
        if (error)
-               return error;
+               goto reserve_exit;
 
        /* Reset Controller not supported for Standby Control SoCs */
        if (priv->reg_layout == CLK_REG_LAYOUT_RZ_A)
-               return 0;
+               goto reserve_exit;
 
        error = cpg_mssr_reset_controller_register(priv);
-       if (error)
-               return error;
 
-       return 0;
+reserve_exit:
+       cpg_mssr_reserved_exit(priv);
+
+       return error;
 }
 
 static struct platform_driver cpg_mssr_driver = {
index 80c5b462924ac34326b0fbfaaf4f9c2c339f594a..a1d6e0cbcff94813f441e518852ac22f4735ba6d 100644 (file)
@@ -180,6 +180,7 @@ extern const struct cpg_mssr_info r8a77995_cpg_mssr_info;
 extern const struct cpg_mssr_info r8a779a0_cpg_mssr_info;
 extern const struct cpg_mssr_info r8a779f0_cpg_mssr_info;
 extern const struct cpg_mssr_info r8a779g0_cpg_mssr_info;
+extern const struct cpg_mssr_info r8a779h0_cpg_mssr_info;
 
 void __init cpg_mssr_early_init(struct device_node *np,
                                const struct cpg_mssr_info *info);
index 9316e5c8a0ea58bb2de806225cb1b1b4b4e89d36..4f1a5782c2308bd54ff0817b1a6f80ad8b4f5c61 100644 (file)
@@ -597,7 +597,7 @@ static struct rockchip_clk_branch rk3399_clk_branches[] __initdata = {
        COMPOSITE(0, "clk_i2s0_div", mux_pll_src_cpll_gpll_p, 0,
                        RK3399_CLKSEL_CON(28), 7, 1, MFLAGS, 0, 7, DFLAGS,
                        RK3399_CLKGATE_CON(8), 3, GFLAGS),
-       COMPOSITE_FRACMUX(0, "clk_i2s0_frac", "clk_i2s0_div", 0,
+       COMPOSITE_FRACMUX(0, "clk_i2s0_frac", "clk_i2s0_div", CLK_SET_RATE_PARENT,
                        RK3399_CLKSEL_CON(96), 0,
                        RK3399_CLKGATE_CON(8), 4, GFLAGS,
                        &rk3399_i2s0_fracmux),
@@ -607,7 +607,7 @@ static struct rockchip_clk_branch rk3399_clk_branches[] __initdata = {
        COMPOSITE(0, "clk_i2s1_div", mux_pll_src_cpll_gpll_p, 0,
                        RK3399_CLKSEL_CON(29), 7, 1, MFLAGS, 0, 7, DFLAGS,
                        RK3399_CLKGATE_CON(8), 6, GFLAGS),
-       COMPOSITE_FRACMUX(0, "clk_i2s1_frac", "clk_i2s1_div", 0,
+       COMPOSITE_FRACMUX(0, "clk_i2s1_frac", "clk_i2s1_div", CLK_SET_RATE_PARENT,
                        RK3399_CLKSEL_CON(97), 0,
                        RK3399_CLKGATE_CON(8), 7, GFLAGS,
                        &rk3399_i2s1_fracmux),
@@ -617,7 +617,7 @@ static struct rockchip_clk_branch rk3399_clk_branches[] __initdata = {
        COMPOSITE(0, "clk_i2s2_div", mux_pll_src_cpll_gpll_p, 0,
                        RK3399_CLKSEL_CON(30), 7, 1, MFLAGS, 0, 7, DFLAGS,
                        RK3399_CLKGATE_CON(8), 9, GFLAGS),
-       COMPOSITE_FRACMUX(0, "clk_i2s2_frac", "clk_i2s2_div", 0,
+       COMPOSITE_FRACMUX(0, "clk_i2s2_frac", "clk_i2s2_div", CLK_SET_RATE_PARENT,
                        RK3399_CLKSEL_CON(98), 0,
                        RK3399_CLKGATE_CON(8), 10, GFLAGS,
                        &rk3399_i2s2_fracmux),
index b786ddc9af2af6ae48892558b73a8865bb1f117d..8cb21d10beca2a48a87738a5b11963570718a665 100644 (file)
@@ -78,6 +78,7 @@ static struct rockchip_pll_rate_table rk3568_pll_rates[] = {
        RK3036_PLL_RATE(200000000, 1, 100, 3, 4, 1, 0),
        RK3036_PLL_RATE(148500000, 1, 99, 4, 4, 1, 0),
        RK3036_PLL_RATE(135000000, 2, 45, 4, 1, 1, 0),
+       RK3036_PLL_RATE(128000000, 1, 16, 3, 1, 1, 0),
        RK3036_PLL_RATE(126400000, 1, 79, 5, 3, 1, 0),
        RK3036_PLL_RATE(119000000, 3, 119, 4, 2, 1, 0),
        RK3036_PLL_RATE(115200000, 1, 24, 5, 1, 1, 0),
index 0b60ae78f9d88601fa7939dc96ef583e142e8d4b..b30279a96dc8af430d8898c6d700f7ef11338d51 100644 (file)
@@ -29,7 +29,7 @@
  * power, but avoids leaking implementation details into DT or hanging the
  * system.
  */
-#define GATE_LINK(_id, cname, pname, linkname, f, o, b, gf) \
+#define GATE_LINK(_id, cname, pname, linkedclk, f, o, b, gf) \
        GATE(_id, cname, pname, f, o, b, gf)
 #define RK3588_LINKED_CLK              CLK_IS_CRITICAL
 
@@ -1004,7 +1004,7 @@ static struct rockchip_clk_branch rk3588_clk_branches[] __initdata = {
        GATE(PCLK_MAILBOX1, "pclk_mailbox1", "pclk_top_root", 0,
                        RK3588_CLKGATE_CON(16), 12, GFLAGS),
        GATE(PCLK_MAILBOX2, "pclk_mailbox2", "pclk_top_root", 0,
-               RK3588_CLKGATE_CON(16), 13, GFLAGS),
+                       RK3588_CLKGATE_CON(16), 13, GFLAGS),
        GATE(PCLK_PMU2, "pclk_pmu2", "pclk_top_root", CLK_IS_CRITICAL,
                        RK3588_CLKGATE_CON(19), 3, GFLAGS),
        GATE(PCLK_PMUCM0_INTMUX, "pclk_pmucm0_intmux", "pclk_top_root", CLK_IS_CRITICAL,
@@ -1851,8 +1851,6 @@ static struct rockchip_clk_branch rk3588_clk_branches[] __initdata = {
                        RK3588_CLKGATE_CON(56), 0, GFLAGS),
        GATE(PCLK_TRNG0, "pclk_trng0", "pclk_vo0_root", 0,
                        RK3588_CLKGATE_CON(56), 1, GFLAGS),
-       GATE(PCLK_VO0GRF, "pclk_vo0grf", "pclk_vo0_root", CLK_IGNORE_UNUSED,
-                       RK3588_CLKGATE_CON(55), 10, GFLAGS),
        COMPOSITE(CLK_I2S4_8CH_TX_SRC, "clk_i2s4_8ch_tx_src", gpll_aupll_p, 0,
                        RK3588_CLKSEL_CON(118), 5, 1, MFLAGS, 0, 5, DFLAGS,
                        RK3588_CLKGATE_CON(56), 11, GFLAGS),
@@ -1998,8 +1996,6 @@ static struct rockchip_clk_branch rk3588_clk_branches[] __initdata = {
                        RK3588_CLKGATE_CON(60), 9, GFLAGS),
        GATE(PCLK_TRNG1, "pclk_trng1", "pclk_vo1_root", 0,
                        RK3588_CLKGATE_CON(60), 10, GFLAGS),
-       GATE(0, "pclk_vo1grf", "pclk_vo1_root", CLK_IGNORE_UNUSED,
-                       RK3588_CLKGATE_CON(59), 12, GFLAGS),
        GATE(PCLK_S_EDP0, "pclk_s_edp0", "pclk_vo1_s_root", 0,
                        RK3588_CLKGATE_CON(59), 14, GFLAGS),
        GATE(PCLK_S_EDP1, "pclk_s_edp1", "pclk_vo1_s_root", 0,
@@ -2433,26 +2429,28 @@ static struct rockchip_clk_branch rk3588_clk_branches[] __initdata = {
        GATE(ACLK_AV1, "aclk_av1", "aclk_av1_pre", 0,
                        RK3588_CLKGATE_CON(68), 2, GFLAGS),
 
-       GATE_LINK(ACLK_ISP1_PRE, "aclk_isp1_pre", "aclk_isp1_root", "aclk_vi_root", 0, RK3588_CLKGATE_CON(26), 6, GFLAGS),
-       GATE_LINK(HCLK_ISP1_PRE, "hclk_isp1_pre", "hclk_isp1_root", "hclk_vi_root", 0, RK3588_CLKGATE_CON(26), 8, GFLAGS),
-       GATE_LINK(HCLK_NVM, "hclk_nvm", "hclk_nvm_root", "aclk_nvm_root", RK3588_LINKED_CLK, RK3588_CLKGATE_CON(31), 2, GFLAGS),
-       GATE_LINK(ACLK_USB, "aclk_usb", "aclk_usb_root", "aclk_vo1usb_top_root", 0, RK3588_CLKGATE_CON(42), 2, GFLAGS),
-       GATE_LINK(HCLK_USB, "hclk_usb", "hclk_usb_root", "hclk_vo1usb_top_root", 0, RK3588_CLKGATE_CON(42), 3, GFLAGS),
-       GATE_LINK(ACLK_JPEG_DECODER_PRE, "aclk_jpeg_decoder_pre", "aclk_jpeg_decoder_root", "aclk_vdpu_root", 0, RK3588_CLKGATE_CON(44), 7, GFLAGS),
-       GATE_LINK(ACLK_VDPU_LOW_PRE, "aclk_vdpu_low_pre", "aclk_vdpu_low_root", "aclk_vdpu_root", 0, RK3588_CLKGATE_CON(44), 5, GFLAGS),
-       GATE_LINK(ACLK_RKVENC1_PRE, "aclk_rkvenc1_pre", "aclk_rkvenc1_root", "aclk_rkvenc0", 0, RK3588_CLKGATE_CON(48), 3, GFLAGS),
-       GATE_LINK(HCLK_RKVENC1_PRE, "hclk_rkvenc1_pre", "hclk_rkvenc1_root", "hclk_rkvenc0", 0, RK3588_CLKGATE_CON(48), 2, GFLAGS),
-       GATE_LINK(HCLK_RKVDEC0_PRE, "hclk_rkvdec0_pre", "hclk_rkvdec0_root", "hclk_vdpu_root", 0, RK3588_CLKGATE_CON(40), 5, GFLAGS),
-       GATE_LINK(ACLK_RKVDEC0_PRE, "aclk_rkvdec0_pre", "aclk_rkvdec0_root", "aclk_vdpu_root", 0, RK3588_CLKGATE_CON(40), 6, GFLAGS),
-       GATE_LINK(HCLK_RKVDEC1_PRE, "hclk_rkvdec1_pre", "hclk_rkvdec1_root", "hclk_vdpu_root", 0, RK3588_CLKGATE_CON(41), 4, GFLAGS),
-       GATE_LINK(ACLK_RKVDEC1_PRE, "aclk_rkvdec1_pre", "aclk_rkvdec1_root", "aclk_vdpu_root", 0, RK3588_CLKGATE_CON(41), 5, GFLAGS),
-       GATE_LINK(ACLK_HDCP0_PRE, "aclk_hdcp0_pre", "aclk_vo0_root", "aclk_vop_low_root", 0, RK3588_CLKGATE_CON(55), 9, GFLAGS),
-       GATE_LINK(HCLK_VO0, "hclk_vo0", "hclk_vo0_root", "hclk_vop_root", 0, RK3588_CLKGATE_CON(55), 5, GFLAGS),
-       GATE_LINK(ACLK_HDCP1_PRE, "aclk_hdcp1_pre", "aclk_hdcp1_root", "aclk_vo1usb_top_root", 0, RK3588_CLKGATE_CON(59), 6, GFLAGS),
-       GATE_LINK(HCLK_VO1, "hclk_vo1", "hclk_vo1_root", "hclk_vo1usb_top_root", 0, RK3588_CLKGATE_CON(59), 9, GFLAGS),
-       GATE_LINK(ACLK_AV1_PRE, "aclk_av1_pre", "aclk_av1_root", "aclk_vdpu_root", 0, RK3588_CLKGATE_CON(68), 1, GFLAGS),
-       GATE_LINK(PCLK_AV1_PRE, "pclk_av1_pre", "pclk_av1_root", "hclk_vdpu_root", 0, RK3588_CLKGATE_CON(68), 4, GFLAGS),
-       GATE_LINK(HCLK_SDIO_PRE, "hclk_sdio_pre", "hclk_sdio_root", "hclk_nvm", 0, RK3588_CLKGATE_CON(75), 1, GFLAGS),
+       GATE_LINK(ACLK_ISP1_PRE, "aclk_isp1_pre", "aclk_isp1_root", ACLK_VI_ROOT, 0, RK3588_CLKGATE_CON(26), 6, GFLAGS),
+       GATE_LINK(HCLK_ISP1_PRE, "hclk_isp1_pre", "hclk_isp1_root", HCLK_VI_ROOT, 0, RK3588_CLKGATE_CON(26), 8, GFLAGS),
+       GATE_LINK(HCLK_NVM, "hclk_nvm", "hclk_nvm_root", ACLK_NVM_ROOT, RK3588_LINKED_CLK, RK3588_CLKGATE_CON(31), 2, GFLAGS),
+       GATE_LINK(ACLK_USB, "aclk_usb", "aclk_usb_root", ACLK_VO1USB_TOP_ROOT, 0, RK3588_CLKGATE_CON(42), 2, GFLAGS),
+       GATE_LINK(HCLK_USB, "hclk_usb", "hclk_usb_root", HCLK_VO1USB_TOP_ROOT, 0, RK3588_CLKGATE_CON(42), 3, GFLAGS),
+       GATE_LINK(ACLK_JPEG_DECODER_PRE, "aclk_jpeg_decoder_pre", "aclk_jpeg_decoder_root", ACLK_VDPU_ROOT, 0, RK3588_CLKGATE_CON(44), 7, GFLAGS),
+       GATE_LINK(ACLK_VDPU_LOW_PRE, "aclk_vdpu_low_pre", "aclk_vdpu_low_root", ACLK_VDPU_ROOT, 0, RK3588_CLKGATE_CON(44), 5, GFLAGS),
+       GATE_LINK(ACLK_RKVENC1_PRE, "aclk_rkvenc1_pre", "aclk_rkvenc1_root", ACLK_RKVENC0, 0, RK3588_CLKGATE_CON(48), 3, GFLAGS),
+       GATE_LINK(HCLK_RKVENC1_PRE, "hclk_rkvenc1_pre", "hclk_rkvenc1_root", HCLK_RKVENC0, 0, RK3588_CLKGATE_CON(48), 2, GFLAGS),
+       GATE_LINK(HCLK_RKVDEC0_PRE, "hclk_rkvdec0_pre", "hclk_rkvdec0_root", HCLK_VDPU_ROOT, 0, RK3588_CLKGATE_CON(40), 5, GFLAGS),
+       GATE_LINK(ACLK_RKVDEC0_PRE, "aclk_rkvdec0_pre", "aclk_rkvdec0_root", ACLK_VDPU_ROOT, 0, RK3588_CLKGATE_CON(40), 6, GFLAGS),
+       GATE_LINK(HCLK_RKVDEC1_PRE, "hclk_rkvdec1_pre", "hclk_rkvdec1_root", HCLK_VDPU_ROOT, 0, RK3588_CLKGATE_CON(41), 4, GFLAGS),
+       GATE_LINK(ACLK_RKVDEC1_PRE, "aclk_rkvdec1_pre", "aclk_rkvdec1_root", ACLK_VDPU_ROOT, 0, RK3588_CLKGATE_CON(41), 5, GFLAGS),
+       GATE_LINK(ACLK_HDCP0_PRE, "aclk_hdcp0_pre", "aclk_vo0_root", ACLK_VOP_LOW_ROOT, 0, RK3588_CLKGATE_CON(55), 9, GFLAGS),
+       GATE_LINK(HCLK_VO0, "hclk_vo0", "hclk_vo0_root", HCLK_VOP_ROOT, RK3588_LINKED_CLK, RK3588_CLKGATE_CON(55), 5, GFLAGS),
+       GATE_LINK(ACLK_HDCP1_PRE, "aclk_hdcp1_pre", "aclk_hdcp1_root", ACLK_VO1USB_TOP_ROOT, 0, RK3588_CLKGATE_CON(59), 6, GFLAGS),
+       GATE_LINK(HCLK_VO1, "hclk_vo1", "hclk_vo1_root", HCLK_VO1USB_TOP_ROOT, RK3588_LINKED_CLK, RK3588_CLKGATE_CON(59), 9, GFLAGS),
+       GATE_LINK(ACLK_AV1_PRE, "aclk_av1_pre", "aclk_av1_root", ACLK_VDPU_ROOT, 0, RK3588_CLKGATE_CON(68), 1, GFLAGS),
+       GATE_LINK(PCLK_AV1_PRE, "pclk_av1_pre", "pclk_av1_root", HCLK_VDPU_ROOT, 0, RK3588_CLKGATE_CON(68), 4, GFLAGS),
+       GATE_LINK(HCLK_SDIO_PRE, "hclk_sdio_pre", "hclk_sdio_root", HCLK_NVM, 0, RK3588_CLKGATE_CON(75), 1, GFLAGS),
+       GATE_LINK(PCLK_VO0GRF, "pclk_vo0grf", "pclk_vo0_root", HCLK_VO0, CLK_IGNORE_UNUSED, RK3588_CLKGATE_CON(55), 10, GFLAGS),
+       GATE_LINK(PCLK_VO1GRF, "pclk_vo1grf", "pclk_vo1_root", HCLK_VO1, CLK_IGNORE_UNUSED, RK3588_CLKGATE_CON(59), 12, GFLAGS),
 };
 
 static void __init rk3588_clk_init(struct device_node *np)
index 3e62ade120c5ddb68adac0c4e6f4f05cf36c5261..fbf4c4208e06ac312906571af5ec7585238d38f7 100644 (file)
  * of the SoC or supplied after the SoC characterization.
  *
  * The below implementation of the CPU clock allows the rate changes of the CPU
- * clock and the corresponding rate changes of the auxillary clocks of the CPU
+ * clock and the corresponding rate changes of the auxiliary clocks of the CPU
  * domain. The platform clock driver provides a clock register configuration
  * for each configurable rate which is then used to program the clock hardware
- * registers to acheive a fast co-oridinated rate change for all the CPU domain
+ * registers to achieve a fast coordinated rate change for all the CPU domain
  * clocks.
  *
  * On a rate change request for the CPU clock, the rate change is propagated
- * upto the PLL supplying the clock to the CPU domain clock blocks. While the
+ * up to the PLL supplying the clock to the CPU domain clock blocks. While the
  * CPU domain PLL is reconfigured, the CPU domain clocks are driven using an
  * alternate clock source. If required, the alternate clock source is divided
  * down in order to keep the output clock rate within the previous OPP limits.
-*/
+ */
 
+#include <linux/delay.h>
 #include <linux/errno.h>
 #include <linux/io.h>
 #include <linux/slab.h>
 #include <linux/clk.h>
 #include <linux/clk-provider.h>
+
+#include "clk.h"
 #include "clk-cpu.h"
 
-#define E4210_SRC_CPU          0x0
-#define E4210_STAT_CPU         0x200
-#define E4210_DIV_CPU0         0x300
-#define E4210_DIV_CPU1         0x304
-#define E4210_DIV_STAT_CPU0    0x400
-#define E4210_DIV_STAT_CPU1    0x404
-
-#define E5433_MUX_SEL2         0x008
-#define E5433_MUX_STAT2                0x208
-#define E5433_DIV_CPU0         0x400
-#define E5433_DIV_CPU1         0x404
-#define E5433_DIV_STAT_CPU0    0x500
-#define E5433_DIV_STAT_CPU1    0x504
-
-#define E4210_DIV0_RATIO0_MASK 0x7
-#define E4210_DIV1_HPM_MASK    (0x7 << 4)
-#define E4210_DIV1_COPY_MASK   (0x7 << 0)
-#define E4210_MUX_HPM_MASK     (1 << 20)
-#define E4210_DIV0_ATB_SHIFT   16
-#define E4210_DIV0_ATB_MASK    (DIV_MASK << E4210_DIV0_ATB_SHIFT)
+struct exynos_cpuclk;
+
+typedef int (*exynos_rate_change_fn_t)(struct clk_notifier_data *ndata,
+                                      struct exynos_cpuclk *cpuclk);
+
+/**
+ * struct exynos_cpuclk_regs - Register offsets for CPU related clocks
+ * @mux_sel: offset of CPU MUX_SEL register (for selecting MUX clock parent)
+ * @mux_stat: offset of CPU MUX_STAT register (for checking MUX clock status)
+ * @div_cpu0: offset of CPU DIV0 register (for modifying divider values)
+ * @div_cpu1: offset of CPU DIV1 register (for modifying divider values)
+ * @div_stat_cpu0: offset of CPU DIV0_STAT register (for checking DIV status)
+ * @div_stat_cpu1: offset of CPU DIV1_STAT register (for checking DIV status)
+ * @mux: offset of MUX register for choosing CPU clock source
+ * @divs: offsets of DIV registers (ACLK, ATCLK, PCLKDBG and PERIPHCLK)
+ */
+struct exynos_cpuclk_regs {
+       u32 mux_sel;
+       u32 mux_stat;
+       u32 div_cpu0;
+       u32 div_cpu1;
+       u32 div_stat_cpu0;
+       u32 div_stat_cpu1;
+
+       u32 mux;
+       u32 divs[4];
+};
+
+/**
+ * struct exynos_cpuclk_chip - Chip specific data for CPU clock
+ * @regs: register offsets for CPU related clocks
+ * @pre_rate_cb: callback to run before CPU clock rate change
+ * @post_rate_cb: callback to run after CPU clock rate change
+ */
+struct exynos_cpuclk_chip {
+       const struct exynos_cpuclk_regs         *regs;
+       exynos_rate_change_fn_t                 pre_rate_cb;
+       exynos_rate_change_fn_t                 post_rate_cb;
+};
+
+/**
+ * struct exynos_cpuclk - information about clock supplied to a CPU core
+ * @hw:                handle between CCF and CPU clock
+ * @alt_parent:        alternate parent clock to use when switching the speed
+ *             of the primary parent clock
+ * @base:      start address of the CPU clock registers block
+ * @lock:      cpu clock domain register access lock
+ * @cfg:       cpu clock rate configuration data
+ * @num_cfgs:  number of array elements in @cfg array
+ * @clk_nb:    clock notifier registered for changes in clock speed of the
+ *             primary parent clock
+ * @flags:     configuration flags for the CPU clock
+ * @chip:      chip-specific data for the CPU clock
+ *
+ * This structure holds information required for programming the CPU clock for
+ * various clock speeds.
+ */
+struct exynos_cpuclk {
+       struct clk_hw                           hw;
+       const struct clk_hw                     *alt_parent;
+       void __iomem                            *base;
+       spinlock_t                              *lock;
+       const struct exynos_cpuclk_cfg_data     *cfg;
+       const unsigned long                     num_cfgs;
+       struct notifier_block                   clk_nb;
+       unsigned long                           flags;
+       const struct exynos_cpuclk_chip         *chip;
+};
 
+/* ---- Common code --------------------------------------------------------- */
+
+/* Divider stabilization time, msec */
+#define MAX_STAB_TIME          10
 #define MAX_DIV                        8
-#define DIV_MASK               7
-#define DIV_MASK_ALL           0xffffffff
-#define MUX_MASK               7
+#define DIV_MASK               GENMASK(2, 0)
+#define DIV_MASK_ALL           GENMASK(31, 0)
+#define MUX_MASK               GENMASK(2, 0)
 
 /*
  * Helper function to wait until divider(s) have stabilized after the divider
  */
 static void wait_until_divider_stable(void __iomem *div_reg, unsigned long mask)
 {
-       unsigned long timeout = jiffies + msecs_to_jiffies(10);
+       unsigned long timeout = jiffies + msecs_to_jiffies(MAX_STAB_TIME);
 
        do {
                if (!(readl(div_reg) & mask))
@@ -86,72 +141,65 @@ static void wait_until_divider_stable(void __iomem *div_reg, unsigned long mask)
  * value was changed.
  */
 static void wait_until_mux_stable(void __iomem *mux_reg, u32 mux_pos,
-                                       unsigned long mux_value)
+                                 unsigned long mask, unsigned long mux_value)
 {
-       unsigned long timeout = jiffies + msecs_to_jiffies(10);
+       unsigned long timeout = jiffies + msecs_to_jiffies(MAX_STAB_TIME);
 
        do {
-               if (((readl(mux_reg) >> mux_pos) & MUX_MASK) == mux_value)
+               if (((readl(mux_reg) >> mux_pos) & mask) == mux_value)
                        return;
        } while (time_before(jiffies, timeout));
 
-       if (((readl(mux_reg) >> mux_pos) & MUX_MASK) == mux_value)
+       if (((readl(mux_reg) >> mux_pos) & mask) == mux_value)
                return;
 
        pr_err("%s: re-parenting mux timed-out\n", __func__);
 }
 
-/* common round rate callback useable for all types of CPU clocks */
-static long exynos_cpuclk_round_rate(struct clk_hw *hw,
-                       unsigned long drate, unsigned long *prate)
-{
-       struct clk_hw *parent = clk_hw_get_parent(hw);
-       *prate = clk_hw_round_rate(parent, drate);
-       return *prate;
-}
-
-/* common recalc rate callback useable for all types of CPU clocks */
-static unsigned long exynos_cpuclk_recalc_rate(struct clk_hw *hw,
-                       unsigned long parent_rate)
-{
-       /*
-        * The CPU clock output (armclk) rate is the same as its parent
-        * rate. Although there exist certain dividers inside the CPU
-        * clock block that could be used to divide the parent clock,
-        * the driver does not make use of them currently, except during
-        * frequency transitions.
-        */
-       return parent_rate;
-}
-
-static const struct clk_ops exynos_cpuclk_clk_ops = {
-       .recalc_rate = exynos_cpuclk_recalc_rate,
-       .round_rate = exynos_cpuclk_round_rate,
-};
-
 /*
  * Helper function to set the 'safe' dividers for the CPU clock. The parameters
  * div and mask contain the divider value and the register bit mask of the
  * dividers to be programmed.
  */
-static void exynos_set_safe_div(void __iomem *base, unsigned long div,
-                                       unsigned long mask)
+static void exynos_set_safe_div(struct exynos_cpuclk *cpuclk, unsigned long div,
+                               unsigned long mask)
 {
+       const struct exynos_cpuclk_regs * const regs = cpuclk->chip->regs;
+       void __iomem *base = cpuclk->base;
        unsigned long div0;
 
-       div0 = readl(base + E4210_DIV_CPU0);
+       div0 = readl(base + regs->div_cpu0);
        div0 = (div0 & ~mask) | (div & mask);
-       writel(div0, base + E4210_DIV_CPU0);
-       wait_until_divider_stable(base + E4210_DIV_STAT_CPU0, mask);
+       writel(div0, base + regs->div_cpu0);
+       wait_until_divider_stable(base + regs->div_stat_cpu0, mask);
 }
 
+/* ---- Exynos 3/4/5 -------------------------------------------------------- */
+
+#define E4210_DIV0_RATIO0_MASK GENMASK(2, 0)
+#define E4210_DIV1_HPM_MASK    GENMASK(6, 4)
+#define E4210_DIV1_COPY_MASK   GENMASK(2, 0)
+#define E4210_MUX_HPM_MASK     BIT(20)
+#define E4210_DIV0_ATB_SHIFT   16
+#define E4210_DIV0_ATB_MASK    (DIV_MASK << E4210_DIV0_ATB_SHIFT)
+
+static const struct exynos_cpuclk_regs e4210_cpuclk_regs = {
+       .mux_sel        = 0x200,
+       .mux_stat       = 0x400,
+       .div_cpu0       = 0x500,
+       .div_cpu1       = 0x504,
+       .div_stat_cpu0  = 0x600,
+       .div_stat_cpu1  = 0x604,
+};
+
 /* handler for pre-rate change notification from parent clock */
 static int exynos_cpuclk_pre_rate_change(struct clk_notifier_data *ndata,
-                       struct exynos_cpuclk *cpuclk, void __iomem *base)
+                                        struct exynos_cpuclk *cpuclk)
 {
        const struct exynos_cpuclk_cfg_data *cfg_data = cpuclk->cfg;
+       const struct exynos_cpuclk_regs * const regs = cpuclk->chip->regs;
+       void __iomem *base = cpuclk->base;
        unsigned long alt_prate = clk_hw_get_rate(cpuclk->alt_parent);
-       unsigned long alt_div = 0, alt_div_mask = DIV_MASK;
        unsigned long div0, div1 = 0, mux_reg;
        unsigned long flags;
 
@@ -172,8 +220,8 @@ static int exynos_cpuclk_pre_rate_change(struct clk_notifier_data *ndata,
        div0 = cfg_data->div0;
        if (cpuclk->flags & CLK_CPU_HAS_DIV1) {
                div1 = cfg_data->div1;
-               if (readl(base + E4210_SRC_CPU) & E4210_MUX_HPM_MASK)
-                       div1 = readl(base + E4210_DIV_CPU1) &
+               if (readl(base + regs->mux_sel) & E4210_MUX_HPM_MASK)
+                       div1 = readl(base + regs->div_cpu1) &
                                (E4210_DIV1_HPM_MASK | E4210_DIV1_COPY_MASK);
        }
 
@@ -187,6 +235,7 @@ static int exynos_cpuclk_pre_rate_change(struct clk_notifier_data *ndata,
         */
        if (alt_prate > ndata->old_rate || ndata->old_rate > ndata->new_rate) {
                unsigned long tmp_rate = min(ndata->old_rate, ndata->new_rate);
+               unsigned long alt_div, alt_div_mask = DIV_MASK;
 
                alt_div = DIV_ROUND_UP(alt_prate, tmp_rate) - 1;
                WARN_ON(alt_div >= MAX_DIV);
@@ -199,23 +248,23 @@ static int exynos_cpuclk_pre_rate_change(struct clk_notifier_data *ndata,
                        alt_div |= E4210_DIV0_ATB_MASK;
                        alt_div_mask |= E4210_DIV0_ATB_MASK;
                }
-               exynos_set_safe_div(base, alt_div, alt_div_mask);
+               exynos_set_safe_div(cpuclk, alt_div, alt_div_mask);
                div0 |= alt_div;
        }
 
        /* select sclk_mpll as the alternate parent */
-       mux_reg = readl(base + E4210_SRC_CPU);
-       writel(mux_reg | (1 << 16), base + E4210_SRC_CPU);
-       wait_until_mux_stable(base + E4210_STAT_CPU, 16, 2);
+       mux_reg = readl(base + regs->mux_sel);
+       writel(mux_reg | (1 << 16), base + regs->mux_sel);
+       wait_until_mux_stable(base + regs->mux_stat, 16, MUX_MASK, 2);
 
        /* alternate parent is active now. set the dividers */
-       writel(div0, base + E4210_DIV_CPU0);
-       wait_until_divider_stable(base + E4210_DIV_STAT_CPU0, DIV_MASK_ALL);
+       writel(div0, base + regs->div_cpu0);
+       wait_until_divider_stable(base + regs->div_stat_cpu0, DIV_MASK_ALL);
 
        if (cpuclk->flags & CLK_CPU_HAS_DIV1) {
-               writel(div1, base + E4210_DIV_CPU1);
-               wait_until_divider_stable(base + E4210_DIV_STAT_CPU1,
-                               DIV_MASK_ALL);
+               writel(div1, base + regs->div_cpu1);
+               wait_until_divider_stable(base + regs->div_stat_cpu1,
+                                         DIV_MASK_ALL);
        }
 
        spin_unlock_irqrestore(cpuclk->lock, flags);
@@ -224,9 +273,11 @@ static int exynos_cpuclk_pre_rate_change(struct clk_notifier_data *ndata,
 
 /* handler for post-rate change notification from parent clock */
 static int exynos_cpuclk_post_rate_change(struct clk_notifier_data *ndata,
-                       struct exynos_cpuclk *cpuclk, void __iomem *base)
+                                         struct exynos_cpuclk *cpuclk)
 {
        const struct exynos_cpuclk_cfg_data *cfg_data = cpuclk->cfg;
+       const struct exynos_cpuclk_regs * const regs = cpuclk->chip->regs;
+       void __iomem *base = cpuclk->base;
        unsigned long div = 0, div_mask = DIV_MASK;
        unsigned long mux_reg;
        unsigned long flags;
@@ -243,43 +294,39 @@ static int exynos_cpuclk_post_rate_change(struct clk_notifier_data *ndata,
        spin_lock_irqsave(cpuclk->lock, flags);
 
        /* select mout_apll as the alternate parent */
-       mux_reg = readl(base + E4210_SRC_CPU);
-       writel(mux_reg & ~(1 << 16), base + E4210_SRC_CPU);
-       wait_until_mux_stable(base + E4210_STAT_CPU, 16, 1);
+       mux_reg = readl(base + regs->mux_sel);
+       writel(mux_reg & ~(1 << 16), base + regs->mux_sel);
+       wait_until_mux_stable(base + regs->mux_stat, 16, MUX_MASK, 1);
 
        if (cpuclk->flags & CLK_CPU_NEEDS_DEBUG_ALT_DIV) {
                div |= (cfg_data->div0 & E4210_DIV0_ATB_MASK);
                div_mask |= E4210_DIV0_ATB_MASK;
        }
 
-       exynos_set_safe_div(base, div, div_mask);
+       exynos_set_safe_div(cpuclk, div, div_mask);
        spin_unlock_irqrestore(cpuclk->lock, flags);
        return 0;
 }
 
-/*
- * Helper function to set the 'safe' dividers for the CPU clock. The parameters
- * div and mask contain the divider value and the register bit mask of the
- * dividers to be programmed.
- */
-static void exynos5433_set_safe_div(void __iomem *base, unsigned long div,
-                                       unsigned long mask)
-{
-       unsigned long div0;
+/* ---- Exynos5433 ---------------------------------------------------------- */
 
-       div0 = readl(base + E5433_DIV_CPU0);
-       div0 = (div0 & ~mask) | (div & mask);
-       writel(div0, base + E5433_DIV_CPU0);
-       wait_until_divider_stable(base + E5433_DIV_STAT_CPU0, mask);
-}
+static const struct exynos_cpuclk_regs e5433_cpuclk_regs = {
+       .mux_sel        = 0x208,
+       .mux_stat       = 0x408,
+       .div_cpu0       = 0x600,
+       .div_cpu1       = 0x604,
+       .div_stat_cpu0  = 0x700,
+       .div_stat_cpu1  = 0x704,
+};
 
 /* handler for pre-rate change notification from parent clock */
 static int exynos5433_cpuclk_pre_rate_change(struct clk_notifier_data *ndata,
-                       struct exynos_cpuclk *cpuclk, void __iomem *base)
+                                            struct exynos_cpuclk *cpuclk)
 {
        const struct exynos_cpuclk_cfg_data *cfg_data = cpuclk->cfg;
+       const struct exynos_cpuclk_regs * const regs = cpuclk->chip->regs;
+       void __iomem *base = cpuclk->base;
        unsigned long alt_prate = clk_hw_get_rate(cpuclk->alt_parent);
-       unsigned long alt_div = 0, alt_div_mask = DIV_MASK;
        unsigned long div0, div1 = 0, mux_reg;
        unsigned long flags;
 
@@ -309,25 +356,26 @@ static int exynos5433_cpuclk_pre_rate_change(struct clk_notifier_data *ndata,
         */
        if (alt_prate > ndata->old_rate || ndata->old_rate > ndata->new_rate) {
                unsigned long tmp_rate = min(ndata->old_rate, ndata->new_rate);
+               unsigned long alt_div, alt_div_mask = DIV_MASK;
 
                alt_div = DIV_ROUND_UP(alt_prate, tmp_rate) - 1;
                WARN_ON(alt_div >= MAX_DIV);
 
-               exynos5433_set_safe_div(base, alt_div, alt_div_mask);
+               exynos_set_safe_div(cpuclk, alt_div, alt_div_mask);
                div0 |= alt_div;
        }
 
        /* select the alternate parent */
-       mux_reg = readl(base + E5433_MUX_SEL2);
-       writel(mux_reg | 1, base + E5433_MUX_SEL2);
-       wait_until_mux_stable(base + E5433_MUX_STAT2, 0, 2);
+       mux_reg = readl(base + regs->mux_sel);
+       writel(mux_reg | 1, base + regs->mux_sel);
+       wait_until_mux_stable(base + regs->mux_stat, 0, MUX_MASK, 2);
 
        /* alternate parent is active now. set the dividers */
-       writel(div0, base + E5433_DIV_CPU0);
-       wait_until_divider_stable(base + E5433_DIV_STAT_CPU0, DIV_MASK_ALL);
+       writel(div0, base + regs->div_cpu0);
+       wait_until_divider_stable(base + regs->div_stat_cpu0, DIV_MASK_ALL);
 
-       writel(div1, base + E5433_DIV_CPU1);
-       wait_until_divider_stable(base + E5433_DIV_STAT_CPU1, DIV_MASK_ALL);
+       writel(div1, base + regs->div_cpu1);
+       wait_until_divider_stable(base + regs->div_stat_cpu1, DIV_MASK_ALL);
 
        spin_unlock_irqrestore(cpuclk->lock, flags);
        return 0;
@@ -335,8 +383,10 @@ static int exynos5433_cpuclk_pre_rate_change(struct clk_notifier_data *ndata,
 
 /* handler for post-rate change notification from parent clock */
 static int exynos5433_cpuclk_post_rate_change(struct clk_notifier_data *ndata,
-                       struct exynos_cpuclk *cpuclk, void __iomem *base)
+                                             struct exynos_cpuclk *cpuclk)
 {
+       const struct exynos_cpuclk_regs * const regs = cpuclk->chip->regs;
+       void __iomem *base = cpuclk->base;
        unsigned long div = 0, div_mask = DIV_MASK;
        unsigned long mux_reg;
        unsigned long flags;
@@ -344,73 +394,265 @@ static int exynos5433_cpuclk_post_rate_change(struct clk_notifier_data *ndata,
        spin_lock_irqsave(cpuclk->lock, flags);
 
        /* select apll as the alternate parent */
-       mux_reg = readl(base + E5433_MUX_SEL2);
-       writel(mux_reg & ~1, base + E5433_MUX_SEL2);
-       wait_until_mux_stable(base + E5433_MUX_STAT2, 0, 1);
+       mux_reg = readl(base + regs->mux_sel);
+       writel(mux_reg & ~1, base + regs->mux_sel);
+       wait_until_mux_stable(base + regs->mux_stat, 0, MUX_MASK, 1);
 
-       exynos5433_set_safe_div(base, div, div_mask);
+       exynos_set_safe_div(cpuclk, div, div_mask);
        spin_unlock_irqrestore(cpuclk->lock, flags);
        return 0;
 }
 
+/* ---- Exynos850 ----------------------------------------------------------- */
+
+#define E850_DIV_RATIO_MASK    GENMASK(3, 0)
+#define E850_BUSY_MASK         BIT(16)
+
+/* Max time for divider or mux to stabilize, usec */
+#define E850_DIV_MUX_STAB_TIME 100
+/* OSCCLK clock rate, Hz */
+#define E850_OSCCLK            (26 * MHZ)
+
+static const struct exynos_cpuclk_regs e850cl0_cpuclk_regs = {
+       .mux    = 0x100c,
+       .divs   = { 0x1800, 0x1808, 0x180c, 0x1810 },
+};
+
+static const struct exynos_cpuclk_regs e850cl1_cpuclk_regs = {
+       .mux    = 0x1000,
+       .divs   = { 0x1800, 0x1808, 0x180c, 0x1810 },
+};
+
 /*
- * This notifier function is called for the pre-rate and post-rate change
- * notifications of the parent clock of cpuclk.
+ * Set alternate parent rate to "rate" value or less.
+ *
+ * rate: Desired alt_parent rate, or 0 for max alt_parent rate
+ *
+ * Exynos850 doesn't have CPU clock divider in CMU_CPUCLx block (CMUREF divider
+ * doesn't affect CPU speed). So CPUCLx_SWITCH divider from CMU_TOP is used
+ * instead to adjust alternate parent speed.
+ *
+ * It's possible to use clk_set_max_rate() instead of this function, but it
+ * would set overly pessimistic rate values to alternate parent.
  */
-static int exynos_cpuclk_notifier_cb(struct notifier_block *nb,
-                               unsigned long event, void *data)
+static int exynos850_alt_parent_set_max_rate(const struct clk_hw *alt_parent,
+                                            unsigned long rate)
 {
-       struct clk_notifier_data *ndata = data;
-       struct exynos_cpuclk *cpuclk;
-       void __iomem *base;
-       int err = 0;
+       struct clk_hw *clk_div, *clk_divp;
+       unsigned long divp_rate, div_rate, div;
+       int ret;
+
+       /* Divider from CMU_TOP */
+       clk_div = clk_hw_get_parent(alt_parent);
+       if (!clk_div)
+               return -ENOENT;
+       /* Divider's parent from CMU_TOP */
+       clk_divp = clk_hw_get_parent(clk_div);
+       if (!clk_divp)
+               return -ENOENT;
+       /* Divider input rate */
+       divp_rate = clk_hw_get_rate(clk_divp);
+       if (!divp_rate)
+               return -EINVAL;
 
-       cpuclk = container_of(nb, struct exynos_cpuclk, clk_nb);
-       base = cpuclk->ctrl_base;
+       /* Calculate new alt_parent rate for integer divider value */
+       if (rate == 0)
+               div = 1;
+       else
+               div = DIV_ROUND_UP(divp_rate, rate);
+       div_rate = DIV_ROUND_UP(divp_rate, div);
+       WARN_ON(div >= MAX_DIV);
 
-       if (event == PRE_RATE_CHANGE)
-               err = exynos_cpuclk_pre_rate_change(ndata, cpuclk, base);
-       else if (event == POST_RATE_CHANGE)
-               err = exynos_cpuclk_post_rate_change(ndata, cpuclk, base);
+       /* alt_parent will propagate this change up to the divider */
+       ret = clk_set_rate(alt_parent->clk, div_rate);
+       if (ret)
+               return ret;
+       udelay(E850_DIV_MUX_STAB_TIME);
 
-       return notifier_from_errno(err);
+       return 0;
+}
+
+/* Handler for pre-rate change notification from parent clock */
+static int exynos850_cpuclk_pre_rate_change(struct clk_notifier_data *ndata,
+                                           struct exynos_cpuclk *cpuclk)
+{
+       const unsigned int shifts[4] = { 16, 12, 8, 4 }; /* E850_CPU_DIV0() */
+       const struct exynos_cpuclk_regs * const regs = cpuclk->chip->regs;
+       const struct exynos_cpuclk_cfg_data *cfg_data = cpuclk->cfg;
+       const struct clk_hw *alt_parent = cpuclk->alt_parent;
+       void __iomem *base = cpuclk->base;
+       unsigned long alt_prate = clk_hw_get_rate(alt_parent);
+       unsigned long flags;
+       u32 mux_reg;
+       size_t i;
+       int ret;
+
+       /* No actions are needed when switching to or from OSCCLK parent */
+       if (ndata->new_rate == E850_OSCCLK || ndata->old_rate == E850_OSCCLK)
+               return 0;
+
+       /* Find out the divider values to use for clock data */
+       while ((cfg_data->prate * 1000) != ndata->new_rate) {
+               if (cfg_data->prate == 0)
+                       return -EINVAL;
+               cfg_data++;
+       }
+
+       /*
+        * If the old parent clock speed is less than the clock speed of
+        * the alternate parent, then it should be ensured that at no point
+        * the armclk speed is more than the old_prate until the dividers are
+        * set.  Also workaround the issue of the dividers being set to lower
+        * values before the parent clock speed is set to new lower speed
+        * (this can result in too high speed of armclk output clocks).
+        */
+       if (alt_prate > ndata->old_rate || ndata->old_rate > ndata->new_rate) {
+               unsigned long tmp_rate = min(ndata->old_rate, ndata->new_rate);
+
+               ret = exynos850_alt_parent_set_max_rate(alt_parent, tmp_rate);
+               if (ret)
+                       return ret;
+       }
+
+       spin_lock_irqsave(cpuclk->lock, flags);
+
+       /* Select the alternate parent */
+       mux_reg = readl(base + regs->mux);
+       writel(mux_reg | 1, base + regs->mux);
+       wait_until_mux_stable(base + regs->mux, 16, 1, 0);
+
+       /* Alternate parent is active now. Set the dividers */
+       for (i = 0; i < ARRAY_SIZE(shifts); ++i) {
+               unsigned long div = (cfg_data->div0 >> shifts[i]) & 0xf;
+               u32 val;
+
+               val = readl(base + regs->divs[i]);
+               val = (val & ~E850_DIV_RATIO_MASK) | div;
+               writel(val, base + regs->divs[i]);
+               wait_until_divider_stable(base + regs->divs[i], E850_BUSY_MASK);
+       }
+
+       spin_unlock_irqrestore(cpuclk->lock, flags);
+
+       return 0;
+}
+
+/* Handler for post-rate change notification from parent clock */
+static int exynos850_cpuclk_post_rate_change(struct clk_notifier_data *ndata,
+                                            struct exynos_cpuclk *cpuclk)
+{
+       const struct exynos_cpuclk_regs * const regs = cpuclk->chip->regs;
+       const struct clk_hw *alt_parent = cpuclk->alt_parent;
+       void __iomem *base = cpuclk->base;
+       unsigned long flags;
+       u32 mux_reg;
+
+       /* No actions are needed when switching to or from OSCCLK parent */
+       if (ndata->new_rate == E850_OSCCLK || ndata->old_rate == E850_OSCCLK)
+               return 0;
+
+       spin_lock_irqsave(cpuclk->lock, flags);
+
+       /* Select main parent (PLL) for mux */
+       mux_reg = readl(base + regs->mux);
+       writel(mux_reg & ~1, base + regs->mux);
+       wait_until_mux_stable(base + regs->mux, 16, 1, 0);
+
+       spin_unlock_irqrestore(cpuclk->lock, flags);
+
+       /* Set alt_parent rate back to max */
+       return exynos850_alt_parent_set_max_rate(alt_parent, 0);
+}
+
+/* -------------------------------------------------------------------------- */
+
+/* Common round rate callback usable for all types of CPU clocks */
+static long exynos_cpuclk_round_rate(struct clk_hw *hw, unsigned long drate,
+                                    unsigned long *prate)
+{
+       struct clk_hw *parent = clk_hw_get_parent(hw);
+       *prate = clk_hw_round_rate(parent, drate);
+       return *prate;
 }
 
+/* Common recalc rate callback usable for all types of CPU clocks */
+static unsigned long exynos_cpuclk_recalc_rate(struct clk_hw *hw,
+                                              unsigned long parent_rate)
+{
+       /*
+        * The CPU clock output (armclk) rate is the same as its parent
+        * rate. Although there exist certain dividers inside the CPU
+        * clock block that could be used to divide the parent clock,
+        * the driver does not make use of them currently, except during
+        * frequency transitions.
+        */
+       return parent_rate;
+}
+
+static const struct clk_ops exynos_cpuclk_clk_ops = {
+       .recalc_rate = exynos_cpuclk_recalc_rate,
+       .round_rate = exynos_cpuclk_round_rate,
+};
+
 /*
  * This notifier function is called for the pre-rate and post-rate change
  * notifications of the parent clock of cpuclk.
  */
-static int exynos5433_cpuclk_notifier_cb(struct notifier_block *nb,
-                               unsigned long event, void *data)
+static int exynos_cpuclk_notifier_cb(struct notifier_block *nb,
+                                    unsigned long event, void *data)
 {
        struct clk_notifier_data *ndata = data;
        struct exynos_cpuclk *cpuclk;
-       void __iomem *base;
        int err = 0;
 
        cpuclk = container_of(nb, struct exynos_cpuclk, clk_nb);
-       base = cpuclk->ctrl_base;
 
        if (event == PRE_RATE_CHANGE)
-               err = exynos5433_cpuclk_pre_rate_change(ndata, cpuclk, base);
+               err = cpuclk->chip->pre_rate_cb(ndata, cpuclk);
        else if (event == POST_RATE_CHANGE)
-               err = exynos5433_cpuclk_post_rate_change(ndata, cpuclk, base);
+               err = cpuclk->chip->post_rate_cb(ndata, cpuclk);
 
        return notifier_from_errno(err);
 }
 
+static const struct exynos_cpuclk_chip exynos_clkcpu_chips[] = {
+       [CPUCLK_LAYOUT_E4210] = {
+               .regs           = &e4210_cpuclk_regs,
+               .pre_rate_cb    = exynos_cpuclk_pre_rate_change,
+               .post_rate_cb   = exynos_cpuclk_post_rate_change,
+       },
+       [CPUCLK_LAYOUT_E5433] = {
+               .regs           = &e5433_cpuclk_regs,
+               .pre_rate_cb    = exynos5433_cpuclk_pre_rate_change,
+               .post_rate_cb   = exynos5433_cpuclk_post_rate_change,
+       },
+       [CPUCLK_LAYOUT_E850_CL0] = {
+               .regs           = &e850cl0_cpuclk_regs,
+               .pre_rate_cb    = exynos850_cpuclk_pre_rate_change,
+               .post_rate_cb   = exynos850_cpuclk_post_rate_change,
+       },
+       [CPUCLK_LAYOUT_E850_CL1] = {
+               .regs           = &e850cl1_cpuclk_regs,
+               .pre_rate_cb    = exynos850_cpuclk_pre_rate_change,
+               .post_rate_cb   = exynos850_cpuclk_post_rate_change,
+       },
+};
+
 /* helper function to register a CPU clock */
 static int __init exynos_register_cpu_clock(struct samsung_clk_provider *ctx,
-               unsigned int lookup_id, const char *name,
-               const struct clk_hw *parent, const struct clk_hw *alt_parent,
-               unsigned long offset, const struct exynos_cpuclk_cfg_data *cfg,
-               unsigned long num_cfgs, unsigned long flags)
+                               const struct samsung_cpu_clock *clk_data)
 {
+       const struct clk_hw *parent, *alt_parent;
+       struct clk_hw **hws;
        struct exynos_cpuclk *cpuclk;
        struct clk_init_data init;
        const char *parent_name;
+       unsigned int num_cfgs;
        int ret = 0;
 
+       hws = ctx->clk_data.hws;
+       parent = hws[clk_data->parent_id];
+       alt_parent = hws[clk_data->alt_parent_id];
        if (IS_ERR(parent) || IS_ERR(alt_parent)) {
                pr_err("%s: invalid parent clock(s)\n", __func__);
                return -EINVAL;
@@ -422,7 +664,7 @@ static int __init exynos_register_cpu_clock(struct samsung_clk_provider *ctx,
 
        parent_name = clk_hw_get_name(parent);
 
-       init.name = name;
+       init.name = clk_data->name;
        init.flags = CLK_SET_RATE_PARENT;
        init.parent_names = &parent_name;
        init.num_parents = 1;
@@ -430,23 +672,25 @@ static int __init exynos_register_cpu_clock(struct samsung_clk_provider *ctx,
 
        cpuclk->alt_parent = alt_parent;
        cpuclk->hw.init = &init;
-       cpuclk->ctrl_base = ctx->reg_base + offset;
+       cpuclk->base = ctx->reg_base + clk_data->offset;
        cpuclk->lock = &ctx->lock;
-       cpuclk->flags = flags;
-       if (flags & CLK_CPU_HAS_E5433_REGS_LAYOUT)
-               cpuclk->clk_nb.notifier_call = exynos5433_cpuclk_notifier_cb;
-       else
-               cpuclk->clk_nb.notifier_call = exynos_cpuclk_notifier_cb;
-
+       cpuclk->flags = clk_data->flags;
+       cpuclk->clk_nb.notifier_call = exynos_cpuclk_notifier_cb;
+       cpuclk->chip = &exynos_clkcpu_chips[clk_data->reg_layout];
 
        ret = clk_notifier_register(parent->clk, &cpuclk->clk_nb);
        if (ret) {
                pr_err("%s: failed to register clock notifier for %s\n",
-                               __func__, name);
+                      __func__, clk_data->name);
                goto free_cpuclk;
        }
 
-       cpuclk->cfg = kmemdup(cfg, sizeof(*cfg) * num_cfgs, GFP_KERNEL);
+       /* Find count of configuration rates in cfg */
+       for (num_cfgs = 0; clk_data->cfg[num_cfgs].prate != 0; )
+               num_cfgs++;
+
+       cpuclk->cfg = kmemdup(clk_data->cfg, sizeof(*clk_data->cfg) * num_cfgs,
+                             GFP_KERNEL);
        if (!cpuclk->cfg) {
                ret = -ENOMEM;
                goto unregister_clk_nb;
@@ -454,11 +698,12 @@ static int __init exynos_register_cpu_clock(struct samsung_clk_provider *ctx,
 
        ret = clk_hw_register(NULL, &cpuclk->hw);
        if (ret) {
-               pr_err("%s: could not register cpuclk %s\n", __func__,  name);
+               pr_err("%s: could not register cpuclk %s\n", __func__,
+                      clk_data->name);
                goto free_cpuclk_data;
        }
 
-       samsung_clk_add_lookup(ctx, &cpuclk->hw, lookup_id);
+       samsung_clk_add_lookup(ctx, &cpuclk->hw, clk_data->id);
        return 0;
 
 free_cpuclk_data:
@@ -474,16 +719,7 @@ void __init samsung_clk_register_cpu(struct samsung_clk_provider *ctx,
                const struct samsung_cpu_clock *list, unsigned int nr_clk)
 {
        unsigned int idx;
-       unsigned int num_cfgs;
-       struct clk_hw **hws = ctx->clk_data.hws;
 
-       for (idx = 0; idx < nr_clk; idx++, list++) {
-               /* find count of configuration rates in cfg */
-               for (num_cfgs = 0; list->cfg[num_cfgs].prate != 0; )
-                       num_cfgs++;
-
-               exynos_register_cpu_clock(ctx, list->id, list->name, hws[list->parent_id],
-                               hws[list->alt_parent_id], list->offset, list->cfg, num_cfgs,
-                               list->flags);
-       }
+       for (idx = 0; idx < nr_clk; idx++)
+               exynos_register_cpu_clock(ctx, &list[idx]);
 }
index 0164bd9ad021b915eb3efdcbb805b9210e580d26..892843611b0ac3438a3b71a6ca82d395adc85f92 100644 (file)
@@ -8,7 +8,24 @@
 #ifndef __SAMSUNG_CLK_CPU_H
 #define __SAMSUNG_CLK_CPU_H
 
-#include "clk.h"
+/* The CPU clock registers have DIV1 configuration register */
+#define CLK_CPU_HAS_DIV1               BIT(0)
+/* When ALT parent is active, debug clocks need safe divider values */
+#define CLK_CPU_NEEDS_DEBUG_ALT_DIV    BIT(1)
+
+/**
+ * enum exynos_cpuclk_layout - CPU clock registers layout compatibility
+ * @CPUCLK_LAYOUT_E4210: Exynos4210 compatible layout
+ * @CPUCLK_LAYOUT_E5433: Exynos5433 compatible layout
+ * @CPUCLK_LAYOUT_E850_CL0: Exynos850 cluster 0 compatible layout
+ * @CPUCLK_LAYOUT_E850_CL1: Exynos850 cluster 1 compatible layout
+ */
+enum exynos_cpuclk_layout {
+       CPUCLK_LAYOUT_E4210,
+       CPUCLK_LAYOUT_E5433,
+       CPUCLK_LAYOUT_E850_CL0,
+       CPUCLK_LAYOUT_E850_CL1,
+};
 
 /**
  * struct exynos_cpuclk_cfg_data - config data to setup cpu clocks
@@ -28,38 +45,4 @@ struct exynos_cpuclk_cfg_data {
        unsigned long   div1;
 };
 
-/**
- * struct exynos_cpuclk - information about clock supplied to a CPU core
- * @hw:                handle between CCF and CPU clock
- * @alt_parent:        alternate parent clock to use when switching the speed
- *             of the primary parent clock
- * @ctrl_base: base address of the clock controller
- * @lock:      cpu clock domain register access lock
- * @cfg:       cpu clock rate configuration data
- * @num_cfgs:  number of array elements in @cfg array
- * @clk_nb:    clock notifier registered for changes in clock speed of the
- *             primary parent clock
- * @flags:     configuration flags for the CPU clock
- *
- * This structure holds information required for programming the CPU clock for
- * various clock speeds.
- */
-struct exynos_cpuclk {
-       struct clk_hw                           hw;
-       const struct clk_hw                     *alt_parent;
-       void __iomem                            *ctrl_base;
-       spinlock_t                              *lock;
-       const struct exynos_cpuclk_cfg_data     *cfg;
-       const unsigned long                     num_cfgs;
-       struct notifier_block                   clk_nb;
-       unsigned long                           flags;
-
-/* The CPU clock registers have DIV1 configuration register */
-#define CLK_CPU_HAS_DIV1               (1 << 0)
-/* When ALT parent is active, debug clocks need safe divider values */
-#define CLK_CPU_NEEDS_DEBUG_ALT_DIV    (1 << 1)
-/* The CPU clock registers have Exynos5433-compatible layout */
-#define CLK_CPU_HAS_E5433_REGS_LAYOUT  (1 << 2)
-};
-
 #endif /* __SAMSUNG_CLK_CPU_H */
index a024616676640dc1c9033df641381afb14a3f7ff..cd4fec323a4274147a0f8d4ded57b8c45a0e7d32 100644 (file)
@@ -775,7 +775,7 @@ static const struct exynos_cpuclk_cfg_data e3250_armclk_d[] __initconst = {
 
 static const struct samsung_cpu_clock exynos3250_cpu_clks[] __initconst = {
        CPU_CLK(CLK_ARM_CLK, "armclk", CLK_MOUT_APLL, CLK_MOUT_MPLL_USER_C,
-                       CLK_CPU_HAS_DIV1, 0x14200, e3250_armclk_d),
+               CLK_CPU_HAS_DIV1, 0x14000, CPUCLK_LAYOUT_E4210, e3250_armclk_d),
 };
 
 static void __init exynos3_core_down_clock(void __iomem *reg_base)
index 4ec41221e68f4229db3ea08df1c6c101df4d1b45..a026ccca7315f1c403daa9e1201c8536e8a83ae7 100644 (file)
@@ -1252,17 +1252,20 @@ static const struct exynos_cpuclk_cfg_data e4412_armclk_d[] __initconst = {
 
 static const struct samsung_cpu_clock exynos4210_cpu_clks[] __initconst = {
        CPU_CLK(CLK_ARM_CLK, "armclk", CLK_MOUT_APLL, CLK_SCLK_MPLL,
-                       CLK_CPU_NEEDS_DEBUG_ALT_DIV | CLK_CPU_HAS_DIV1, 0x14200, e4210_armclk_d),
+               CLK_CPU_NEEDS_DEBUG_ALT_DIV | CLK_CPU_HAS_DIV1, 0x14000,
+               CPUCLK_LAYOUT_E4210, e4210_armclk_d),
 };
 
 static const struct samsung_cpu_clock exynos4212_cpu_clks[] __initconst = {
        CPU_CLK(CLK_ARM_CLK, "armclk", CLK_MOUT_APLL, CLK_MOUT_MPLL_USER_C,
-               CLK_CPU_NEEDS_DEBUG_ALT_DIV | CLK_CPU_HAS_DIV1, 0x14200, e4212_armclk_d),
+               CLK_CPU_NEEDS_DEBUG_ALT_DIV | CLK_CPU_HAS_DIV1, 0x14000,
+               CPUCLK_LAYOUT_E4210, e4212_armclk_d),
 };
 
 static const struct samsung_cpu_clock exynos4412_cpu_clks[] __initconst = {
        CPU_CLK(CLK_ARM_CLK, "armclk", CLK_MOUT_APLL, CLK_MOUT_MPLL_USER_C,
-                       CLK_CPU_NEEDS_DEBUG_ALT_DIV | CLK_CPU_HAS_DIV1, 0x14200, e4412_armclk_d),
+               CLK_CPU_NEEDS_DEBUG_ALT_DIV | CLK_CPU_HAS_DIV1, 0x14000,
+               CPUCLK_LAYOUT_E4210, e4412_armclk_d),
 };
 
 /* register exynos4 clocks */
index 8ebe6155d8b7064cbeb8b7a8c5a0128a790f26ee..e02e7c013f3d2adfe6ca6b15fa572c4e11e9f8e4 100644 (file)
@@ -776,8 +776,9 @@ static const struct exynos_cpuclk_cfg_data exynos5250_armclk_d[] __initconst = {
 };
 
 static const struct samsung_cpu_clock exynos5250_cpu_clks[] __initconst = {
-       CPU_CLK(CLK_ARM_CLK, "armclk", CLK_MOUT_APLL, CLK_MOUT_MPLL, CLK_CPU_HAS_DIV1, 0x200,
-                       exynos5250_armclk_d),
+       CPU_CLK(CLK_ARM_CLK, "armclk", CLK_MOUT_APLL, CLK_MOUT_MPLL,
+               CLK_CPU_HAS_DIV1, 0x0, CPUCLK_LAYOUT_E4210,
+               exynos5250_armclk_d),
 };
 
 static const struct of_device_id ext_clk_match[] __initconst = {
index 199843f12ae5688522abc4c1190453ee82ff4669..c630135c686bba0b7aef3a9caa53ac0a75ac2bfe 100644 (file)
@@ -1555,17 +1555,17 @@ static const struct exynos_cpuclk_cfg_data exynos5420_kfcclk_d[] __initconst = {
 };
 
 static const struct samsung_cpu_clock exynos5420_cpu_clks[] __initconst = {
-       CPU_CLK(CLK_ARM_CLK, "armclk", CLK_MOUT_APLL, CLK_MOUT_MSPLL_CPU, 0, 0x200,
-                       exynos5420_eglclk_d),
-       CPU_CLK(CLK_KFC_CLK, "kfcclk", CLK_MOUT_KPLL, CLK_MOUT_MSPLL_KFC, 0, 0x28200,
-                       exynos5420_kfcclk_d),
+       CPU_CLK(CLK_ARM_CLK, "armclk", CLK_MOUT_APLL, CLK_MOUT_MSPLL_CPU, 0,
+               0x0, CPUCLK_LAYOUT_E4210, exynos5420_eglclk_d),
+       CPU_CLK(CLK_KFC_CLK, "kfcclk", CLK_MOUT_KPLL, CLK_MOUT_MSPLL_KFC, 0,
+               0x28000, CPUCLK_LAYOUT_E4210, exynos5420_kfcclk_d),
 };
 
 static const struct samsung_cpu_clock exynos5800_cpu_clks[] __initconst = {
-       CPU_CLK(CLK_ARM_CLK, "armclk", CLK_MOUT_APLL, CLK_MOUT_MSPLL_CPU, 0, 0x200,
-                       exynos5800_eglclk_d),
-       CPU_CLK(CLK_KFC_CLK, "kfcclk", CLK_MOUT_KPLL, CLK_MOUT_MSPLL_KFC, 0, 0x28200,
-                       exynos5420_kfcclk_d),
+       CPU_CLK(CLK_ARM_CLK, "armclk", CLK_MOUT_APLL, CLK_MOUT_MSPLL_CPU, 0,
+               0x0, CPUCLK_LAYOUT_E4210, exynos5800_eglclk_d),
+       CPU_CLK(CLK_KFC_CLK, "kfcclk", CLK_MOUT_KPLL, CLK_MOUT_MSPLL_KFC, 0,
+               0x28000, CPUCLK_LAYOUT_E4210, exynos5420_kfcclk_d),
 };
 
 static const struct of_device_id ext_clk_match[] __initconst = {
index 6bfc5d0cd92478fd0a4982c40a34fe70b59ed35e..609d31a7aa5247e6318884609d1e651001616b73 100644 (file)
@@ -3700,9 +3700,8 @@ static const struct exynos_cpuclk_cfg_data exynos5433_apolloclk_d[] __initconst
 
 static const struct samsung_cpu_clock apollo_cpu_clks[] __initconst = {
        CPU_CLK(CLK_SCLK_APOLLO, "apolloclk", CLK_MOUT_APOLLO_PLL,
-                       CLK_MOUT_BUS_PLL_APOLLO_USER,
-                       CLK_CPU_HAS_E5433_REGS_LAYOUT, 0x200,
-                       exynos5433_apolloclk_d),
+               CLK_MOUT_BUS_PLL_APOLLO_USER, 0, 0x0,
+               CPUCLK_LAYOUT_E5433, exynos5433_apolloclk_d),
 };
 
 static const struct samsung_cmu_info apollo_cmu_info __initconst = {
@@ -3945,9 +3944,8 @@ static const struct exynos_cpuclk_cfg_data exynos5433_atlasclk_d[] __initconst =
 
 static const struct samsung_cpu_clock atlas_cpu_clks[] __initconst = {
        CPU_CLK(CLK_SCLK_ATLAS, "atlasclk", CLK_MOUT_ATLAS_PLL,
-                       CLK_MOUT_BUS_PLL_ATLAS_USER,
-                       CLK_CPU_HAS_E5433_REGS_LAYOUT, 0x200,
-                       exynos5433_atlasclk_d),
+               CLK_MOUT_BUS_PLL_ATLAS_USER, 0, 0x0,
+               CPUCLK_LAYOUT_E5433, exynos5433_atlasclk_d),
 };
 
 static const struct samsung_cmu_info atlas_cmu_info __initconst = {
index bdc1eef7d6e548d81d15f7b007f0c1e36a45071e..82cfa22c07888253c8633052e585f1fe0bdc8e5e 100644 (file)
@@ -26,7 +26,7 @@
 #define CLKS_NR_IS                     (CLK_GOUT_IS_SYSREG_PCLK + 1)
 #define CLKS_NR_MFCMSCL                        (CLK_GOUT_MFCMSCL_SYSREG_PCLK + 1)
 #define CLKS_NR_PERI                   (CLK_GOUT_WDT1_PCLK + 1)
-#define CLKS_NR_CORE                   (CLK_GOUT_SYSREG_CORE_PCLK + 1)
+#define CLKS_NR_CORE                   (CLK_GOUT_SPDMA_CORE_ACLK + 1)
 #define CLKS_NR_DPU                    (CLK_GOUT_DPU_SYSREG_PCLK + 1)
 
 /* ---- CMU_TOP ------------------------------------------------------------- */
@@ -605,7 +605,7 @@ static const struct samsung_div_clock apm_div_clks[] __initconst = {
 
 static const struct samsung_gate_clock apm_gate_clks[] __initconst = {
        GATE(CLK_GOUT_CLKCMU_CMGP_BUS, "gout_clkcmu_cmgp_bus", "dout_apm_bus",
-            CLK_CON_GAT_CLKCMU_CMGP_BUS, 21, 0, 0),
+            CLK_CON_GAT_CLKCMU_CMGP_BUS, 21, CLK_SET_RATE_PARENT, 0),
        GATE(CLK_GOUT_CLKCMU_CHUB_BUS, "gout_clkcmu_chub_bus",
             "mout_clkcmu_chub_bus",
             CLK_CON_GAT_GATE_CLKCMU_CHUB_BUS, 21, 0, 0),
@@ -974,19 +974,19 @@ static const struct samsung_fixed_rate_clock cmgp_fixed_clks[] __initconst = {
 static const struct samsung_mux_clock cmgp_mux_clks[] __initconst = {
        MUX(CLK_MOUT_CMGP_ADC, "mout_cmgp_adc", mout_cmgp_adc_p,
            CLK_CON_MUX_CLK_CMGP_ADC, 0, 1),
-       MUX(CLK_MOUT_CMGP_USI0, "mout_cmgp_usi0", mout_cmgp_usi0_p,
-           CLK_CON_MUX_MUX_CLK_CMGP_USI_CMGP0, 0, 1),
-       MUX(CLK_MOUT_CMGP_USI1, "mout_cmgp_usi1", mout_cmgp_usi1_p,
-           CLK_CON_MUX_MUX_CLK_CMGP_USI_CMGP1, 0, 1),
+       MUX_F(CLK_MOUT_CMGP_USI0, "mout_cmgp_usi0", mout_cmgp_usi0_p,
+             CLK_CON_MUX_MUX_CLK_CMGP_USI_CMGP0, 0, 1, CLK_SET_RATE_PARENT, 0),
+       MUX_F(CLK_MOUT_CMGP_USI1, "mout_cmgp_usi1", mout_cmgp_usi1_p,
+             CLK_CON_MUX_MUX_CLK_CMGP_USI_CMGP1, 0, 1, CLK_SET_RATE_PARENT, 0),
 };
 
 static const struct samsung_div_clock cmgp_div_clks[] __initconst = {
        DIV(CLK_DOUT_CMGP_ADC, "dout_cmgp_adc", "gout_clkcmu_cmgp_bus",
            CLK_CON_DIV_DIV_CLK_CMGP_ADC, 0, 4),
-       DIV(CLK_DOUT_CMGP_USI0, "dout_cmgp_usi0", "mout_cmgp_usi0",
-           CLK_CON_DIV_DIV_CLK_CMGP_USI_CMGP0, 0, 5),
-       DIV(CLK_DOUT_CMGP_USI1, "dout_cmgp_usi1", "mout_cmgp_usi1",
-           CLK_CON_DIV_DIV_CLK_CMGP_USI_CMGP1, 0, 5),
+       DIV_F(CLK_DOUT_CMGP_USI0, "dout_cmgp_usi0", "mout_cmgp_usi0",
+             CLK_CON_DIV_DIV_CLK_CMGP_USI_CMGP0, 0, 5, CLK_SET_RATE_PARENT, 0),
+       DIV_F(CLK_DOUT_CMGP_USI1, "dout_cmgp_usi1", "mout_cmgp_usi1",
+             CLK_CON_DIV_DIV_CLK_CMGP_USI_CMGP1, 0, 5, CLK_SET_RATE_PARENT, 0),
 };
 
 static const struct samsung_gate_clock cmgp_gate_clks[] __initconst = {
@@ -1001,12 +1001,12 @@ static const struct samsung_gate_clock cmgp_gate_clks[] __initconst = {
             "gout_clkcmu_cmgp_bus",
             CLK_CON_GAT_GOUT_CMGP_GPIO_PCLK, 21, CLK_IGNORE_UNUSED, 0),
        GATE(CLK_GOUT_CMGP_USI0_IPCLK, "gout_cmgp_usi0_ipclk", "dout_cmgp_usi0",
-            CLK_CON_GAT_GOUT_CMGP_USI_CMGP0_IPCLK, 21, 0, 0),
+            CLK_CON_GAT_GOUT_CMGP_USI_CMGP0_IPCLK, 21, CLK_SET_RATE_PARENT, 0),
        GATE(CLK_GOUT_CMGP_USI0_PCLK, "gout_cmgp_usi0_pclk",
             "gout_clkcmu_cmgp_bus",
             CLK_CON_GAT_GOUT_CMGP_USI_CMGP0_PCLK, 21, 0, 0),
        GATE(CLK_GOUT_CMGP_USI1_IPCLK, "gout_cmgp_usi1_ipclk", "dout_cmgp_usi1",
-            CLK_CON_GAT_GOUT_CMGP_USI_CMGP1_IPCLK, 21, 0, 0),
+            CLK_CON_GAT_GOUT_CMGP_USI_CMGP1_IPCLK, 21, CLK_SET_RATE_PARENT, 0),
        GATE(CLK_GOUT_CMGP_USI1_PCLK, "gout_cmgp_usi1_pclk",
             "gout_clkcmu_cmgp_bus",
             CLK_CON_GAT_GOUT_CMGP_USI_CMGP1_PCLK, 21, 0, 0),
@@ -1557,8 +1557,9 @@ static const struct samsung_mux_clock peri_mux_clks[] __initconst = {
            mout_peri_uart_user_p, PLL_CON0_MUX_CLKCMU_PERI_UART_USER, 4, 1),
        MUX(CLK_MOUT_PERI_HSI2C_USER, "mout_peri_hsi2c_user",
            mout_peri_hsi2c_user_p, PLL_CON0_MUX_CLKCMU_PERI_HSI2C_USER, 4, 1),
-       MUX(CLK_MOUT_PERI_SPI_USER, "mout_peri_spi_user", mout_peri_spi_user_p,
-           PLL_CON0_MUX_CLKCMU_PERI_SPI_USER, 4, 1),
+       MUX_F(CLK_MOUT_PERI_SPI_USER, "mout_peri_spi_user",
+             mout_peri_spi_user_p, PLL_CON0_MUX_CLKCMU_PERI_SPI_USER, 4, 1,
+             CLK_SET_RATE_PARENT, 0),
 };
 
 static const struct samsung_div_clock peri_div_clks[] __initconst = {
@@ -1568,8 +1569,8 @@ static const struct samsung_div_clock peri_div_clks[] __initconst = {
            CLK_CON_DIV_DIV_CLK_PERI_HSI2C_1, 0, 5),
        DIV(CLK_DOUT_PERI_HSI2C2, "dout_peri_hsi2c2", "gout_peri_hsi2c2",
            CLK_CON_DIV_DIV_CLK_PERI_HSI2C_2, 0, 5),
-       DIV(CLK_DOUT_PERI_SPI0, "dout_peri_spi0", "mout_peri_spi_user",
-           CLK_CON_DIV_DIV_CLK_PERI_SPI_0, 0, 5),
+       DIV_F(CLK_DOUT_PERI_SPI0, "dout_peri_spi0", "mout_peri_spi_user",
+             CLK_CON_DIV_DIV_CLK_PERI_SPI_0, 0, 5, CLK_SET_RATE_PARENT, 0),
 };
 
 static const struct samsung_gate_clock peri_gate_clks[] __initconst = {
@@ -1611,7 +1612,7 @@ static const struct samsung_gate_clock peri_gate_clks[] __initconst = {
             "mout_peri_bus_user",
             CLK_CON_GAT_GOUT_PERI_PWM_MOTOR_PCLK, 21, 0, 0),
        GATE(CLK_GOUT_SPI0_IPCLK, "gout_spi0_ipclk", "dout_peri_spi0",
-            CLK_CON_GAT_GOUT_PERI_SPI_0_IPCLK, 21, 0, 0),
+            CLK_CON_GAT_GOUT_PERI_SPI_0_IPCLK, 21, CLK_SET_RATE_PARENT, 0),
        GATE(CLK_GOUT_SPI0_PCLK, "gout_spi0_pclk", "mout_peri_bus_user",
             CLK_CON_GAT_GOUT_PERI_SPI_0_PCLK, 21, 0, 0),
        GATE(CLK_GOUT_SYSREG_PERI_PCLK, "gout_sysreg_peri_pclk",
@@ -1667,6 +1668,8 @@ CLK_OF_DECLARE(exynos850_cmu_peri, "samsung,exynos850-cmu-peri",
 #define CLK_CON_GAT_GOUT_CORE_GPIO_CORE_PCLK   0x2044
 #define CLK_CON_GAT_GOUT_CORE_MMC_EMBD_I_ACLK  0x20e8
 #define CLK_CON_GAT_GOUT_CORE_MMC_EMBD_SDCLKIN 0x20ec
+#define CLK_CON_GAT_GOUT_CORE_PDMA_ACLK                0x20f0
+#define CLK_CON_GAT_GOUT_CORE_SPDMA_ACLK       0x2124
 #define CLK_CON_GAT_GOUT_CORE_SSS_I_ACLK       0x2128
 #define CLK_CON_GAT_GOUT_CORE_SSS_I_PCLK       0x212c
 #define CLK_CON_GAT_GOUT_CORE_SYSREG_CORE_PCLK 0x2130
@@ -1683,6 +1686,8 @@ static const unsigned long core_clk_regs[] __initconst = {
        CLK_CON_GAT_GOUT_CORE_GPIO_CORE_PCLK,
        CLK_CON_GAT_GOUT_CORE_MMC_EMBD_I_ACLK,
        CLK_CON_GAT_GOUT_CORE_MMC_EMBD_SDCLKIN,
+       CLK_CON_GAT_GOUT_CORE_PDMA_ACLK,
+       CLK_CON_GAT_GOUT_CORE_SPDMA_ACLK,
        CLK_CON_GAT_GOUT_CORE_SSS_I_ACLK,
        CLK_CON_GAT_GOUT_CORE_SSS_I_PCLK,
        CLK_CON_GAT_GOUT_CORE_SYSREG_CORE_PCLK,
@@ -1726,6 +1731,10 @@ static const struct samsung_gate_clock core_gate_clks[] __initconst = {
        GATE(CLK_GOUT_MMC_EMBD_SDCLKIN, "gout_mmc_embd_sdclkin",
             "mout_core_mmc_embd_user", CLK_CON_GAT_GOUT_CORE_MMC_EMBD_SDCLKIN,
             21, CLK_SET_RATE_PARENT, 0),
+       GATE(CLK_GOUT_PDMA_CORE_ACLK, "gout_pdma_core_aclk",
+            "mout_core_bus_user", CLK_CON_GAT_GOUT_CORE_PDMA_ACLK, 21, 0, 0),
+       GATE(CLK_GOUT_SPDMA_CORE_ACLK, "gout_spdma_core_aclk",
+            "mout_core_bus_user", CLK_CON_GAT_GOUT_CORE_SPDMA_ACLK, 21, 0, 0),
        GATE(CLK_GOUT_SSS_ACLK, "gout_sss_aclk", "mout_core_sss_user",
             CLK_CON_GAT_GOUT_CORE_SSS_I_ACLK, 21, 0, 0),
        GATE(CLK_GOUT_SSS_PCLK, "gout_sss_pclk", "dout_core_busp",
index 782993951fff8f7cc209329fc84af7f825fee143..d065e343a85ddff4a5bfbde64d7042fdd2e685e2 100644 (file)
 #define CLKS_NR_TOP    (CLK_GOUT_CMU_TPU_UART + 1)
 #define CLKS_NR_APM    (CLK_APM_PLL_DIV16_APM + 1)
 #define CLKS_NR_MISC   (CLK_GOUT_MISC_XIU_D_MISC_ACLK + 1)
+#define CLKS_NR_PERIC0 (CLK_GOUT_PERIC0_SYSREG_PERIC0_PCLK + 1)
+#define CLKS_NR_PERIC1 (CLK_GOUT_PERIC1_SYSREG_PERIC1_PCLK + 1)
 
 /* ---- CMU_TOP ------------------------------------------------------------- */
 
 /* Register Offset definitions for CMU_TOP (0x1e080000) */
-
 #define PLL_LOCKTIME_PLL_SHARED0                       0x0000
 #define PLL_LOCKTIME_PLL_SHARED1                       0x0004
 #define PLL_LOCKTIME_PLL_SHARED2                       0x0008
@@ -2478,6 +2479,936 @@ static const struct samsung_cmu_info misc_cmu_info __initconst = {
        .clk_name               = "bus",
 };
 
+static void __init gs101_cmu_misc_init(struct device_node *np)
+{
+       exynos_arm64_register_cmu(NULL, np, &misc_cmu_info);
+}
+
+/* Register CMU_MISC early, as it's needed for MCT timer */
+CLK_OF_DECLARE(gs101_cmu_misc, "google,gs101-cmu-misc",
+              gs101_cmu_misc_init);
+
+/* ---- CMU_PERIC0 ---------------------------------------------------------- */
+
+/* Register Offset definitions for CMU_PERIC0 (0x10800000) */
+#define PLL_CON0_MUX_CLKCMU_PERIC0_BUS_USER            0x0600
+#define PLL_CON1_MUX_CLKCMU_PERIC0_BUS_USER            0x0604
+#define PLL_CON0_MUX_CLKCMU_PERIC0_I3C_USER            0x0610
+#define PLL_CON1_MUX_CLKCMU_PERIC0_I3C_USER            0x0614
+#define PLL_CON0_MUX_CLKCMU_PERIC0_USI0_UART_USER      0x0620
+#define PLL_CON1_MUX_CLKCMU_PERIC0_USI0_UART_USER      0x0624
+#define PLL_CON0_MUX_CLKCMU_PERIC0_USI14_USI_USER      0x0640
+#define PLL_CON1_MUX_CLKCMU_PERIC0_USI14_USI_USER      0x0644
+#define PLL_CON0_MUX_CLKCMU_PERIC0_USI1_USI_USER       0x0650
+#define PLL_CON1_MUX_CLKCMU_PERIC0_USI1_USI_USER       0x0654
+#define PLL_CON0_MUX_CLKCMU_PERIC0_USI2_USI_USER       0x0660
+#define PLL_CON1_MUX_CLKCMU_PERIC0_USI2_USI_USER       0x0664
+#define PLL_CON0_MUX_CLKCMU_PERIC0_USI3_USI_USER       0x0670
+#define PLL_CON1_MUX_CLKCMU_PERIC0_USI3_USI_USER       0x0674
+#define PLL_CON0_MUX_CLKCMU_PERIC0_USI4_USI_USER       0x0680
+#define PLL_CON1_MUX_CLKCMU_PERIC0_USI4_USI_USER       0x0684
+#define PLL_CON0_MUX_CLKCMU_PERIC0_USI5_USI_USER       0x0690
+#define PLL_CON1_MUX_CLKCMU_PERIC0_USI5_USI_USER       0x0694
+#define PLL_CON0_MUX_CLKCMU_PERIC0_USI6_USI_USER       0x06a0
+#define PLL_CON1_MUX_CLKCMU_PERIC0_USI6_USI_USER       0x06a4
+#define PLL_CON0_MUX_CLKCMU_PERIC0_USI7_USI_USER       0x06b0
+#define PLL_CON1_MUX_CLKCMU_PERIC0_USI7_USI_USER       0x06b4
+#define PLL_CON0_MUX_CLKCMU_PERIC0_USI8_USI_USER       0x06c0
+#define PLL_CON1_MUX_CLKCMU_PERIC0_USI8_USI_USER       0x06c4
+#define PERIC0_CMU_PERIC0_CONTROLLER_OPTION            0x0800
+#define CLKOUT_CON_BLK_PERIC0_CMU_PERIC0_CLKOUT0       0x0810
+#define CLK_CON_DIV_DIV_CLK_PERIC0_I3C                 0x1800
+#define CLK_CON_DIV_DIV_CLK_PERIC0_USI0_UART           0x1804
+#define CLK_CON_DIV_DIV_CLK_PERIC0_USI14_USI           0x180c
+#define CLK_CON_DIV_DIV_CLK_PERIC0_USI1_USI            0x1810
+#define CLK_CON_DIV_DIV_CLK_PERIC0_USI2_USI            0x1814
+#define CLK_CON_DIV_DIV_CLK_PERIC0_USI3_USI            0x1820
+#define CLK_CON_DIV_DIV_CLK_PERIC0_USI4_USI            0x1824
+#define CLK_CON_DIV_DIV_CLK_PERIC0_USI5_USI            0x1828
+#define CLK_CON_DIV_DIV_CLK_PERIC0_USI6_USI            0x182c
+#define CLK_CON_DIV_DIV_CLK_PERIC0_USI7_USI            0x1830
+#define CLK_CON_DIV_DIV_CLK_PERIC0_USI8_USI            0x1834
+#define CLK_CON_BUF_CLKBUF_PERIC0_IP                   0x2000
+#define CLK_CON_GAT_CLK_BLK_PERIC0_UID_PERIC0_CMU_PERIC0_IPCLKPORT_PCLK                        0x2004
+#define CLK_CON_GAT_CLK_BLK_PERIC0_UID_RSTNSYNC_CLK_PERIC0_OSCCLK_IPCLKPORT_CLK                0x2008
+#define CLK_CON_GAT_GOUT_BLK_PERIC0_UID_D_TZPC_PERIC0_IPCLKPORT_PCLK                   0x200c
+#define CLK_CON_GAT_GOUT_BLK_PERIC0_UID_GPC_PERIC0_IPCLKPORT_PCLK                      0x2010
+#define CLK_CON_GAT_GOUT_BLK_PERIC0_UID_GPIO_PERIC0_IPCLKPORT_PCLK                     0x2014
+#define CLK_CON_GAT_GOUT_BLK_PERIC0_UID_LHM_AXI_P_PERIC0_IPCLKPORT_I_CLK               0x2018
+#define CLK_CON_GAT_GOUT_BLK_PERIC0_UID_PERIC0_TOP0_IPCLKPORT_IPCLK_0                  0x201c
+#define CLK_CON_GAT_GOUT_BLK_PERIC0_UID_PERIC0_TOP0_IPCLKPORT_IPCLK_1                  0x2020
+#define CLK_CON_GAT_GOUT_BLK_PERIC0_UID_PERIC0_TOP0_IPCLKPORT_IPCLK_10                 0x2024
+#define CLK_CON_GAT_GOUT_BLK_PERIC0_UID_PERIC0_TOP0_IPCLKPORT_IPCLK_11                 0x2028
+#define CLK_CON_GAT_GOUT_BLK_PERIC0_UID_PERIC0_TOP0_IPCLKPORT_IPCLK_12                 0x202c
+#define CLK_CON_GAT_GOUT_BLK_PERIC0_UID_PERIC0_TOP0_IPCLKPORT_IPCLK_13                 0x2030
+#define CLK_CON_GAT_GOUT_BLK_PERIC0_UID_PERIC0_TOP0_IPCLKPORT_IPCLK_14                 0x2034
+#define CLK_CON_GAT_GOUT_BLK_PERIC0_UID_PERIC0_TOP0_IPCLKPORT_IPCLK_15                 0x2038
+#define CLK_CON_GAT_GOUT_BLK_PERIC0_UID_PERIC0_TOP0_IPCLKPORT_IPCLK_2                  0x203c
+#define CLK_CON_GAT_GOUT_BLK_PERIC0_UID_PERIC0_TOP0_IPCLKPORT_IPCLK_3                  0x2040
+#define CLK_CON_GAT_GOUT_BLK_PERIC0_UID_PERIC0_TOP0_IPCLKPORT_IPCLK_4                  0x2044
+#define CLK_CON_GAT_GOUT_BLK_PERIC0_UID_PERIC0_TOP0_IPCLKPORT_IPCLK_5                  0x2048
+#define CLK_CON_GAT_GOUT_BLK_PERIC0_UID_PERIC0_TOP0_IPCLKPORT_IPCLK_6                  0x204c
+#define CLK_CON_GAT_GOUT_BLK_PERIC0_UID_PERIC0_TOP0_IPCLKPORT_IPCLK_7                  0x2050
+#define CLK_CON_GAT_GOUT_BLK_PERIC0_UID_PERIC0_TOP0_IPCLKPORT_IPCLK_8                  0x2054
+#define CLK_CON_GAT_GOUT_BLK_PERIC0_UID_PERIC0_TOP0_IPCLKPORT_IPCLK_9                  0x2058
+#define CLK_CON_GAT_GOUT_BLK_PERIC0_UID_PERIC0_TOP0_IPCLKPORT_PCLK_0                   0x205c
+#define CLK_CON_GAT_GOUT_BLK_PERIC0_UID_PERIC0_TOP0_IPCLKPORT_PCLK_1                   0x2060
+#define CLK_CON_GAT_GOUT_BLK_PERIC0_UID_PERIC0_TOP0_IPCLKPORT_PCLK_10                  0x2064
+#define CLK_CON_GAT_GOUT_BLK_PERIC0_UID_PERIC0_TOP0_IPCLKPORT_PCLK_11                  0x2068
+#define CLK_CON_GAT_GOUT_BLK_PERIC0_UID_PERIC0_TOP0_IPCLKPORT_PCLK_12                  0x206c
+#define CLK_CON_GAT_GOUT_BLK_PERIC0_UID_PERIC0_TOP0_IPCLKPORT_PCLK_13                  0x2070
+#define CLK_CON_GAT_GOUT_BLK_PERIC0_UID_PERIC0_TOP0_IPCLKPORT_PCLK_14                  0x2074
+#define CLK_CON_GAT_GOUT_BLK_PERIC0_UID_PERIC0_TOP0_IPCLKPORT_PCLK_15                  0x2078
+#define CLK_CON_GAT_GOUT_BLK_PERIC0_UID_PERIC0_TOP0_IPCLKPORT_PCLK_2                   0x207c
+#define CLK_CON_GAT_GOUT_BLK_PERIC0_UID_PERIC0_TOP0_IPCLKPORT_PCLK_3                   0x2080
+#define CLK_CON_GAT_GOUT_BLK_PERIC0_UID_PERIC0_TOP0_IPCLKPORT_PCLK_4                   0x2084
+#define CLK_CON_GAT_GOUT_BLK_PERIC0_UID_PERIC0_TOP0_IPCLKPORT_PCLK_5                   0x2088
+#define CLK_CON_GAT_GOUT_BLK_PERIC0_UID_PERIC0_TOP0_IPCLKPORT_PCLK_6                   0x208c
+#define CLK_CON_GAT_GOUT_BLK_PERIC0_UID_PERIC0_TOP0_IPCLKPORT_PCLK_7                   0x2090
+#define CLK_CON_GAT_GOUT_BLK_PERIC0_UID_PERIC0_TOP0_IPCLKPORT_PCLK_8                   0x2094
+#define CLK_CON_GAT_GOUT_BLK_PERIC0_UID_PERIC0_TOP0_IPCLKPORT_PCLK_9                   0x2098
+#define CLK_CON_GAT_GOUT_BLK_PERIC0_UID_PERIC0_TOP1_IPCLKPORT_IPCLK_0                  0x209c
+#define CLK_CON_GAT_GOUT_BLK_PERIC0_UID_PERIC0_TOP1_IPCLKPORT_IPCLK_2                  0x20a4
+#define CLK_CON_GAT_GOUT_BLK_PERIC0_UID_PERIC0_TOP1_IPCLKPORT_PCLK_0                   0x20a8
+#define CLK_CON_GAT_GOUT_BLK_PERIC0_UID_PERIC0_TOP1_IPCLKPORT_PCLK_2                   0x20b0
+#define CLK_CON_GAT_GOUT_BLK_PERIC0_UID_RSTNSYNC_CLK_PERIC0_BUSP_IPCLKPORT_CLK         0x20b4
+#define CLK_CON_GAT_GOUT_BLK_PERIC0_UID_RSTNSYNC_CLK_PERIC0_I3C_IPCLKPORT_CLK          0x20b8
+#define CLK_CON_GAT_GOUT_BLK_PERIC0_UID_RSTNSYNC_CLK_PERIC0_USI0_UART_IPCLKPORT_CLK    0x20bc
+#define CLK_CON_GAT_GOUT_BLK_PERIC0_UID_RSTNSYNC_CLK_PERIC0_USI14_USI_IPCLKPORT_CLK    0x20c4
+#define CLK_CON_GAT_GOUT_BLK_PERIC0_UID_RSTNSYNC_CLK_PERIC0_USI1_USI_IPCLKPORT_CLK     0x20c8
+#define CLK_CON_GAT_GOUT_BLK_PERIC0_UID_RSTNSYNC_CLK_PERIC0_USI2_USI_IPCLKPORT_CLK     0x20cc
+#define CLK_CON_GAT_GOUT_BLK_PERIC0_UID_RSTNSYNC_CLK_PERIC0_USI3_USI_IPCLKPORT_CLK     0x20d0
+#define CLK_CON_GAT_GOUT_BLK_PERIC0_UID_RSTNSYNC_CLK_PERIC0_USI4_USI_IPCLKPORT_CLK     0x20d4
+#define CLK_CON_GAT_GOUT_BLK_PERIC0_UID_RSTNSYNC_CLK_PERIC0_USI5_USI_IPCLKPORT_CLK     0x20d8
+#define CLK_CON_GAT_GOUT_BLK_PERIC0_UID_RSTNSYNC_CLK_PERIC0_USI6_USI_IPCLKPORT_CLK     0x20dc
+#define CLK_CON_GAT_GOUT_BLK_PERIC0_UID_RSTNSYNC_CLK_PERIC0_USI7_USI_IPCLKPORT_CLK     0x20e0
+#define CLK_CON_GAT_GOUT_BLK_PERIC0_UID_RSTNSYNC_CLK_PERIC0_USI8_USI_IPCLKPORT_CLK     0x20e4
+#define CLK_CON_GAT_GOUT_BLK_PERIC0_UID_SYSREG_PERIC0_IPCLKPORT_PCLK                   0x20e8
+#define DMYQCH_CON_PERIC0_TOP0_QCH_S1                  0x3000
+#define DMYQCH_CON_PERIC0_TOP0_QCH_S2                  0x3004
+#define DMYQCH_CON_PERIC0_TOP0_QCH_S3                  0x3008
+#define DMYQCH_CON_PERIC0_TOP0_QCH_S4                  0x300c
+#define DMYQCH_CON_PERIC0_TOP0_QCH_S5                  0x3010
+#define DMYQCH_CON_PERIC0_TOP0_QCH_S6                  0x3014
+#define DMYQCH_CON_PERIC0_TOP0_QCH_S7                  0x3018
+#define DMYQCH_CON_PERIC0_TOP0_QCH_S8                  0x301c
+#define PCH_CON_LHM_AXI_P_PERIC0_PCH                   0x3020
+#define QCH_CON_D_TZPC_PERIC0_QCH                      0x3024
+#define QCH_CON_GPC_PERIC0_QCH                         0x3028
+#define QCH_CON_GPIO_PERIC0_QCH                                0x302c
+#define QCH_CON_LHM_AXI_P_PERIC0_QCH                   0x3030
+#define QCH_CON_PERIC0_CMU_PERIC0_QCH                  0x3034
+#define QCH_CON_PERIC0_TOP0_QCH_I3C1                   0x3038
+#define QCH_CON_PERIC0_TOP0_QCH_I3C2                   0x303c
+#define QCH_CON_PERIC0_TOP0_QCH_I3C3                   0x3040
+#define QCH_CON_PERIC0_TOP0_QCH_I3C4                   0x3044
+#define QCH_CON_PERIC0_TOP0_QCH_I3C5                   0x3048
+#define QCH_CON_PERIC0_TOP0_QCH_I3C6                   0x304c
+#define QCH_CON_PERIC0_TOP0_QCH_I3C7                   0x3050
+#define QCH_CON_PERIC0_TOP0_QCH_I3C8                   0x3054
+#define QCH_CON_PERIC0_TOP0_QCH_USI1_USI               0x3058
+#define QCH_CON_PERIC0_TOP0_QCH_USI2_USI               0x305c
+#define QCH_CON_PERIC0_TOP0_QCH_USI3_USI               0x3060
+#define QCH_CON_PERIC0_TOP0_QCH_USI4_USI               0x3064
+#define QCH_CON_PERIC0_TOP0_QCH_USI5_USI               0x3068
+#define QCH_CON_PERIC0_TOP0_QCH_USI6_USI               0x306c
+#define QCH_CON_PERIC0_TOP0_QCH_USI7_USI               0x3070
+#define QCH_CON_PERIC0_TOP0_QCH_USI8_USI               0x3074
+#define QCH_CON_PERIC0_TOP1_QCH_USI0_UART              0x3078
+#define QCH_CON_PERIC0_TOP1_QCH_USI14_UART             0x307c
+#define QCH_CON_SYSREG_PERIC0_QCH                      0x3080
+#define QUEUE_CTRL_REG_BLK_PERIC0_CMU_PERIC0           0x3c00
+
+static const unsigned long peric0_clk_regs[] __initconst = {
+       PLL_CON0_MUX_CLKCMU_PERIC0_BUS_USER,
+       PLL_CON1_MUX_CLKCMU_PERIC0_BUS_USER,
+       PLL_CON0_MUX_CLKCMU_PERIC0_I3C_USER,
+       PLL_CON1_MUX_CLKCMU_PERIC0_I3C_USER,
+       PLL_CON0_MUX_CLKCMU_PERIC0_USI0_UART_USER,
+       PLL_CON1_MUX_CLKCMU_PERIC0_USI0_UART_USER,
+       PLL_CON0_MUX_CLKCMU_PERIC0_USI14_USI_USER,
+       PLL_CON1_MUX_CLKCMU_PERIC0_USI14_USI_USER,
+       PLL_CON0_MUX_CLKCMU_PERIC0_USI1_USI_USER,
+       PLL_CON1_MUX_CLKCMU_PERIC0_USI1_USI_USER,
+       PLL_CON0_MUX_CLKCMU_PERIC0_USI2_USI_USER,
+       PLL_CON1_MUX_CLKCMU_PERIC0_USI2_USI_USER,
+       PLL_CON0_MUX_CLKCMU_PERIC0_USI3_USI_USER,
+       PLL_CON1_MUX_CLKCMU_PERIC0_USI3_USI_USER,
+       PLL_CON0_MUX_CLKCMU_PERIC0_USI4_USI_USER,
+       PLL_CON1_MUX_CLKCMU_PERIC0_USI4_USI_USER,
+       PLL_CON0_MUX_CLKCMU_PERIC0_USI5_USI_USER,
+       PLL_CON1_MUX_CLKCMU_PERIC0_USI5_USI_USER,
+       PLL_CON0_MUX_CLKCMU_PERIC0_USI6_USI_USER,
+       PLL_CON1_MUX_CLKCMU_PERIC0_USI6_USI_USER,
+       PLL_CON0_MUX_CLKCMU_PERIC0_USI7_USI_USER,
+       PLL_CON1_MUX_CLKCMU_PERIC0_USI7_USI_USER,
+       PLL_CON0_MUX_CLKCMU_PERIC0_USI8_USI_USER,
+       PLL_CON1_MUX_CLKCMU_PERIC0_USI8_USI_USER,
+       PERIC0_CMU_PERIC0_CONTROLLER_OPTION,
+       CLKOUT_CON_BLK_PERIC0_CMU_PERIC0_CLKOUT0,
+       CLK_CON_DIV_DIV_CLK_PERIC0_I3C,
+       CLK_CON_DIV_DIV_CLK_PERIC0_USI0_UART,
+       CLK_CON_DIV_DIV_CLK_PERIC0_USI14_USI,
+       CLK_CON_DIV_DIV_CLK_PERIC0_USI1_USI,
+       CLK_CON_DIV_DIV_CLK_PERIC0_USI2_USI,
+       CLK_CON_DIV_DIV_CLK_PERIC0_USI3_USI,
+       CLK_CON_DIV_DIV_CLK_PERIC0_USI4_USI,
+       CLK_CON_DIV_DIV_CLK_PERIC0_USI5_USI,
+       CLK_CON_DIV_DIV_CLK_PERIC0_USI6_USI,
+       CLK_CON_DIV_DIV_CLK_PERIC0_USI6_USI,
+       CLK_CON_DIV_DIV_CLK_PERIC0_USI8_USI,
+       CLK_CON_BUF_CLKBUF_PERIC0_IP,
+       CLK_CON_GAT_CLK_BLK_PERIC0_UID_PERIC0_CMU_PERIC0_IPCLKPORT_PCLK,
+       CLK_CON_GAT_CLK_BLK_PERIC0_UID_RSTNSYNC_CLK_PERIC0_OSCCLK_IPCLKPORT_CLK,
+       CLK_CON_GAT_GOUT_BLK_PERIC0_UID_D_TZPC_PERIC0_IPCLKPORT_PCLK,
+       CLK_CON_GAT_GOUT_BLK_PERIC0_UID_GPC_PERIC0_IPCLKPORT_PCLK,
+       CLK_CON_GAT_GOUT_BLK_PERIC0_UID_GPIO_PERIC0_IPCLKPORT_PCLK,
+       CLK_CON_GAT_GOUT_BLK_PERIC0_UID_LHM_AXI_P_PERIC0_IPCLKPORT_I_CLK,
+       CLK_CON_GAT_GOUT_BLK_PERIC0_UID_PERIC0_TOP0_IPCLKPORT_IPCLK_0,
+       CLK_CON_GAT_GOUT_BLK_PERIC0_UID_PERIC0_TOP0_IPCLKPORT_IPCLK_1,
+       CLK_CON_GAT_GOUT_BLK_PERIC0_UID_PERIC0_TOP0_IPCLKPORT_IPCLK_10,
+       CLK_CON_GAT_GOUT_BLK_PERIC0_UID_PERIC0_TOP0_IPCLKPORT_IPCLK_11,
+       CLK_CON_GAT_GOUT_BLK_PERIC0_UID_PERIC0_TOP0_IPCLKPORT_IPCLK_12,
+       CLK_CON_GAT_GOUT_BLK_PERIC0_UID_PERIC0_TOP0_IPCLKPORT_IPCLK_13,
+       CLK_CON_GAT_GOUT_BLK_PERIC0_UID_PERIC0_TOP0_IPCLKPORT_IPCLK_14,
+       CLK_CON_GAT_GOUT_BLK_PERIC0_UID_PERIC0_TOP0_IPCLKPORT_IPCLK_15,
+       CLK_CON_GAT_GOUT_BLK_PERIC0_UID_PERIC0_TOP0_IPCLKPORT_IPCLK_2,
+       CLK_CON_GAT_GOUT_BLK_PERIC0_UID_PERIC0_TOP0_IPCLKPORT_IPCLK_3,
+       CLK_CON_GAT_GOUT_BLK_PERIC0_UID_PERIC0_TOP0_IPCLKPORT_IPCLK_4,
+       CLK_CON_GAT_GOUT_BLK_PERIC0_UID_PERIC0_TOP0_IPCLKPORT_IPCLK_5,
+       CLK_CON_GAT_GOUT_BLK_PERIC0_UID_PERIC0_TOP0_IPCLKPORT_IPCLK_6,
+       CLK_CON_GAT_GOUT_BLK_PERIC0_UID_PERIC0_TOP0_IPCLKPORT_IPCLK_7,
+       CLK_CON_GAT_GOUT_BLK_PERIC0_UID_PERIC0_TOP0_IPCLKPORT_IPCLK_8,
+       CLK_CON_GAT_GOUT_BLK_PERIC0_UID_PERIC0_TOP0_IPCLKPORT_IPCLK_9,
+       CLK_CON_GAT_GOUT_BLK_PERIC0_UID_PERIC0_TOP0_IPCLKPORT_PCLK_0,
+       CLK_CON_GAT_GOUT_BLK_PERIC0_UID_PERIC0_TOP0_IPCLKPORT_PCLK_1,
+       CLK_CON_GAT_GOUT_BLK_PERIC0_UID_PERIC0_TOP0_IPCLKPORT_PCLK_10,
+       CLK_CON_GAT_GOUT_BLK_PERIC0_UID_PERIC0_TOP0_IPCLKPORT_PCLK_11,
+       CLK_CON_GAT_GOUT_BLK_PERIC0_UID_PERIC0_TOP0_IPCLKPORT_PCLK_12,
+       CLK_CON_GAT_GOUT_BLK_PERIC0_UID_PERIC0_TOP0_IPCLKPORT_PCLK_13,
+       CLK_CON_GAT_GOUT_BLK_PERIC0_UID_PERIC0_TOP0_IPCLKPORT_PCLK_14,
+       CLK_CON_GAT_GOUT_BLK_PERIC0_UID_PERIC0_TOP0_IPCLKPORT_PCLK_15,
+       CLK_CON_GAT_GOUT_BLK_PERIC0_UID_PERIC0_TOP0_IPCLKPORT_PCLK_2,
+       CLK_CON_GAT_GOUT_BLK_PERIC0_UID_PERIC0_TOP0_IPCLKPORT_PCLK_3,
+       CLK_CON_GAT_GOUT_BLK_PERIC0_UID_PERIC0_TOP0_IPCLKPORT_PCLK_4,
+       CLK_CON_GAT_GOUT_BLK_PERIC0_UID_PERIC0_TOP0_IPCLKPORT_PCLK_5,
+       CLK_CON_GAT_GOUT_BLK_PERIC0_UID_PERIC0_TOP0_IPCLKPORT_PCLK_6,
+       CLK_CON_GAT_GOUT_BLK_PERIC0_UID_PERIC0_TOP0_IPCLKPORT_PCLK_7,
+       CLK_CON_GAT_GOUT_BLK_PERIC0_UID_PERIC0_TOP0_IPCLKPORT_PCLK_8,
+       CLK_CON_GAT_GOUT_BLK_PERIC0_UID_PERIC0_TOP0_IPCLKPORT_PCLK_9,
+       CLK_CON_GAT_GOUT_BLK_PERIC0_UID_PERIC0_TOP1_IPCLKPORT_IPCLK_0,
+       CLK_CON_GAT_GOUT_BLK_PERIC0_UID_PERIC0_TOP1_IPCLKPORT_IPCLK_2,
+       CLK_CON_GAT_GOUT_BLK_PERIC0_UID_PERIC0_TOP1_IPCLKPORT_PCLK_0,
+       CLK_CON_GAT_GOUT_BLK_PERIC0_UID_PERIC0_TOP1_IPCLKPORT_PCLK_2,
+       CLK_CON_GAT_GOUT_BLK_PERIC0_UID_RSTNSYNC_CLK_PERIC0_BUSP_IPCLKPORT_CLK,
+       CLK_CON_GAT_GOUT_BLK_PERIC0_UID_RSTNSYNC_CLK_PERIC0_I3C_IPCLKPORT_CLK,
+       CLK_CON_GAT_GOUT_BLK_PERIC0_UID_RSTNSYNC_CLK_PERIC0_USI0_UART_IPCLKPORT_CLK,
+       CLK_CON_GAT_GOUT_BLK_PERIC0_UID_RSTNSYNC_CLK_PERIC0_USI14_USI_IPCLKPORT_CLK,
+       CLK_CON_GAT_GOUT_BLK_PERIC0_UID_RSTNSYNC_CLK_PERIC0_USI1_USI_IPCLKPORT_CLK,
+       CLK_CON_GAT_GOUT_BLK_PERIC0_UID_RSTNSYNC_CLK_PERIC0_USI2_USI_IPCLKPORT_CLK,
+       CLK_CON_GAT_GOUT_BLK_PERIC0_UID_RSTNSYNC_CLK_PERIC0_USI3_USI_IPCLKPORT_CLK,
+       CLK_CON_GAT_GOUT_BLK_PERIC0_UID_RSTNSYNC_CLK_PERIC0_USI4_USI_IPCLKPORT_CLK,
+       CLK_CON_GAT_GOUT_BLK_PERIC0_UID_RSTNSYNC_CLK_PERIC0_USI5_USI_IPCLKPORT_CLK,
+       CLK_CON_GAT_GOUT_BLK_PERIC0_UID_RSTNSYNC_CLK_PERIC0_USI6_USI_IPCLKPORT_CLK,
+       CLK_CON_GAT_GOUT_BLK_PERIC0_UID_RSTNSYNC_CLK_PERIC0_USI7_USI_IPCLKPORT_CLK,
+       CLK_CON_GAT_GOUT_BLK_PERIC0_UID_RSTNSYNC_CLK_PERIC0_USI8_USI_IPCLKPORT_CLK,
+       CLK_CON_GAT_GOUT_BLK_PERIC0_UID_SYSREG_PERIC0_IPCLKPORT_PCLK,
+       DMYQCH_CON_PERIC0_TOP0_QCH_S1,
+       DMYQCH_CON_PERIC0_TOP0_QCH_S2,
+       DMYQCH_CON_PERIC0_TOP0_QCH_S3,
+       DMYQCH_CON_PERIC0_TOP0_QCH_S4,
+       DMYQCH_CON_PERIC0_TOP0_QCH_S5,
+       DMYQCH_CON_PERIC0_TOP0_QCH_S6,
+       DMYQCH_CON_PERIC0_TOP0_QCH_S7,
+       DMYQCH_CON_PERIC0_TOP0_QCH_S8,
+       PCH_CON_LHM_AXI_P_PERIC0_PCH,
+       QCH_CON_D_TZPC_PERIC0_QCH,
+       QCH_CON_GPC_PERIC0_QCH,
+       QCH_CON_GPIO_PERIC0_QCH,
+       QCH_CON_LHM_AXI_P_PERIC0_QCH,
+       QCH_CON_PERIC0_CMU_PERIC0_QCH,
+       QCH_CON_PERIC0_TOP0_QCH_I3C1,
+       QCH_CON_PERIC0_TOP0_QCH_I3C2,
+       QCH_CON_PERIC0_TOP0_QCH_I3C3,
+       QCH_CON_PERIC0_TOP0_QCH_I3C4,
+       QCH_CON_PERIC0_TOP0_QCH_I3C5,
+       QCH_CON_PERIC0_TOP0_QCH_I3C6,
+       QCH_CON_PERIC0_TOP0_QCH_I3C7,
+       QCH_CON_PERIC0_TOP0_QCH_I3C8,
+       QCH_CON_PERIC0_TOP0_QCH_USI1_USI,
+       QCH_CON_PERIC0_TOP0_QCH_USI2_USI,
+       QCH_CON_PERIC0_TOP0_QCH_USI3_USI,
+       QCH_CON_PERIC0_TOP0_QCH_USI4_USI,
+       QCH_CON_PERIC0_TOP0_QCH_USI5_USI,
+       QCH_CON_PERIC0_TOP0_QCH_USI6_USI,
+       QCH_CON_PERIC0_TOP0_QCH_USI7_USI,
+       QCH_CON_PERIC0_TOP0_QCH_USI8_USI,
+       QCH_CON_PERIC0_TOP1_QCH_USI0_UART,
+       QCH_CON_PERIC0_TOP1_QCH_USI14_UART,
+       QCH_CON_SYSREG_PERIC0_QCH,
+       QUEUE_CTRL_REG_BLK_PERIC0_CMU_PERIC0,
+};
+
+/* List of parent clocks for Muxes in CMU_PERIC0 */
+PNAME(mout_peric0_bus_user_p)          = { "oscclk", "dout_cmu_peric0_bus" };
+PNAME(mout_peric0_i3c_user_p)          = { "oscclk", "dout_cmu_peric0_ip" };
+PNAME(mout_peric0_usi0_uart_user_p)    = { "oscclk", "dout_cmu_peric0_ip" };
+PNAME(mout_peric0_usi_usi_user_p)      = { "oscclk", "dout_cmu_peric0_ip" };
+
+static const struct samsung_mux_clock peric0_mux_clks[] __initconst = {
+       MUX(CLK_MOUT_PERIC0_BUS_USER, "mout_peric0_bus_user",
+           mout_peric0_bus_user_p, PLL_CON0_MUX_CLKCMU_PERIC0_BUS_USER, 4, 1),
+       MUX(CLK_MOUT_PERIC0_I3C_USER, "mout_peric0_i3c_user",
+           mout_peric0_i3c_user_p, PLL_CON0_MUX_CLKCMU_PERIC0_I3C_USER, 4, 1),
+       MUX(CLK_MOUT_PERIC0_USI0_UART_USER,
+           "mout_peric0_usi0_uart_user", mout_peric0_usi0_uart_user_p,
+           PLL_CON0_MUX_CLKCMU_PERIC0_USI0_UART_USER, 4, 1),
+       MUX(CLK_MOUT_PERIC0_USI14_USI_USER,
+           "mout_peric0_usi14_usi_user", mout_peric0_usi_usi_user_p,
+           PLL_CON0_MUX_CLKCMU_PERIC0_USI14_USI_USER, 4, 1),
+       MUX(CLK_MOUT_PERIC0_USI1_USI_USER,
+           "mout_peric0_usi1_usi_user", mout_peric0_usi_usi_user_p,
+           PLL_CON0_MUX_CLKCMU_PERIC0_USI1_USI_USER, 4, 1),
+       MUX(CLK_MOUT_PERIC0_USI2_USI_USER,
+           "mout_peric0_usi2_usi_user", mout_peric0_usi_usi_user_p,
+           PLL_CON0_MUX_CLKCMU_PERIC0_USI2_USI_USER, 4, 1),
+       MUX(CLK_MOUT_PERIC0_USI3_USI_USER,
+           "mout_peric0_usi3_usi_user", mout_peric0_usi_usi_user_p,
+           PLL_CON0_MUX_CLKCMU_PERIC0_USI3_USI_USER, 4, 1),
+       MUX(CLK_MOUT_PERIC0_USI4_USI_USER,
+           "mout_peric0_usi4_usi_user", mout_peric0_usi_usi_user_p,
+           PLL_CON0_MUX_CLKCMU_PERIC0_USI4_USI_USER, 4, 1),
+       MUX(CLK_MOUT_PERIC0_USI5_USI_USER,
+           "mout_peric0_usi5_usi_user", mout_peric0_usi_usi_user_p,
+           PLL_CON0_MUX_CLKCMU_PERIC0_USI5_USI_USER, 4, 1),
+       MUX(CLK_MOUT_PERIC0_USI6_USI_USER,
+           "mout_peric0_usi6_usi_user", mout_peric0_usi_usi_user_p,
+           PLL_CON0_MUX_CLKCMU_PERIC0_USI6_USI_USER, 4, 1),
+       MUX(CLK_MOUT_PERIC0_USI7_USI_USER,
+           "mout_peric0_usi7_usi_user", mout_peric0_usi_usi_user_p,
+           PLL_CON0_MUX_CLKCMU_PERIC0_USI7_USI_USER, 4, 1),
+       MUX(CLK_MOUT_PERIC0_USI8_USI_USER,
+           "mout_peric0_usi8_usi_user", mout_peric0_usi_usi_user_p,
+           PLL_CON0_MUX_CLKCMU_PERIC0_USI8_USI_USER, 4, 1),
+};
+
+static const struct samsung_div_clock peric0_div_clks[] __initconst = {
+       DIV(CLK_DOUT_PERIC0_I3C, "dout_peric0_i3c", "mout_peric0_i3c_user",
+           CLK_CON_DIV_DIV_CLK_PERIC0_I3C, 0, 4),
+       DIV(CLK_DOUT_PERIC0_USI0_UART,
+           "dout_peric0_usi0_uart", "mout_peric0_usi0_uart_user",
+           CLK_CON_DIV_DIV_CLK_PERIC0_USI0_UART, 0, 4),
+       DIV(CLK_DOUT_PERIC0_USI14_USI,
+           "dout_peric0_usi14_usi", "mout_peric0_usi14_usi_user",
+           CLK_CON_DIV_DIV_CLK_PERIC0_USI14_USI, 0, 4),
+       DIV(CLK_DOUT_PERIC0_USI1_USI,
+           "dout_peric0_usi1_usi", "mout_peric0_usi1_usi_user",
+           CLK_CON_DIV_DIV_CLK_PERIC0_USI1_USI, 0, 4),
+       DIV(CLK_DOUT_PERIC0_USI2_USI,
+           "dout_peric0_usi2_usi", "mout_peric0_usi2_usi_user",
+           CLK_CON_DIV_DIV_CLK_PERIC0_USI2_USI, 0, 4),
+       DIV(CLK_DOUT_PERIC0_USI3_USI,
+           "dout_peric0_usi3_usi", "mout_peric0_usi3_usi_user",
+           CLK_CON_DIV_DIV_CLK_PERIC0_USI3_USI, 0, 4),
+       DIV(CLK_DOUT_PERIC0_USI4_USI,
+           "dout_peric0_usi4_usi", "mout_peric0_usi4_usi_user",
+           CLK_CON_DIV_DIV_CLK_PERIC0_USI4_USI, 0, 4),
+       DIV(CLK_DOUT_PERIC0_USI5_USI,
+           "dout_peric0_usi5_usi", "mout_peric0_usi5_usi_user",
+           CLK_CON_DIV_DIV_CLK_PERIC0_USI5_USI, 0, 4),
+       DIV(CLK_DOUT_PERIC0_USI6_USI,
+           "dout_peric0_usi6_usi", "mout_peric0_usi6_usi_user",
+           CLK_CON_DIV_DIV_CLK_PERIC0_USI6_USI, 0, 4),
+       DIV(CLK_DOUT_PERIC0_USI7_USI,
+           "dout_peric0_usi7_usi", "mout_peric0_usi7_usi_user",
+           CLK_CON_DIV_DIV_CLK_PERIC0_USI7_USI, 0, 4),
+       DIV(CLK_DOUT_PERIC0_USI8_USI,
+           "dout_peric0_usi8_usi", "mout_peric0_usi8_usi_user",
+           CLK_CON_DIV_DIV_CLK_PERIC0_USI8_USI, 0, 4),
+};
+
+static const struct samsung_gate_clock peric0_gate_clks[] __initconst = {
+       /* Disabling this clock makes the system hang. Mark the clock as critical. */
+       GATE(CLK_GOUT_PERIC0_PERIC0_CMU_PERIC0_PCLK,
+            "gout_peric0_peric0_cmu_peric0_pclk", "mout_peric0_bus_user",
+            CLK_CON_GAT_CLK_BLK_PERIC0_UID_PERIC0_CMU_PERIC0_IPCLKPORT_PCLK,
+            21, CLK_IS_CRITICAL, 0),
+       GATE(CLK_GOUT_PERIC0_CLK_PERIC0_OSCCLK_CLK,
+            "gout_peric0_clk_peric0_oscclk_clk", "oscclk",
+            CLK_CON_GAT_CLK_BLK_PERIC0_UID_RSTNSYNC_CLK_PERIC0_OSCCLK_IPCLKPORT_CLK,
+            21, 0, 0),
+       GATE(CLK_GOUT_PERIC0_D_TZPC_PERIC0_PCLK,
+            "gout_peric0_d_tzpc_peric0_pclk", "mout_peric0_bus_user",
+            CLK_CON_GAT_GOUT_BLK_PERIC0_UID_D_TZPC_PERIC0_IPCLKPORT_PCLK,
+            21, 0, 0),
+       GATE(CLK_GOUT_PERIC0_GPC_PERIC0_PCLK,
+            "gout_peric0_gpc_peric0_pclk", "mout_peric0_bus_user",
+            CLK_CON_GAT_GOUT_BLK_PERIC0_UID_GPC_PERIC0_IPCLKPORT_PCLK,
+            21, 0, 0),
+       GATE(CLK_GOUT_PERIC0_GPIO_PERIC0_PCLK,
+            "gout_peric0_gpio_peric0_pclk", "mout_peric0_bus_user",
+            CLK_CON_GAT_GOUT_BLK_PERIC0_UID_GPIO_PERIC0_IPCLKPORT_PCLK,
+            21, CLK_IGNORE_UNUSED, 0),
+       /* Disabling this clock makes the system hang. Mark the clock as critical. */
+       GATE(CLK_GOUT_PERIC0_LHM_AXI_P_PERIC0_I_CLK,
+            "gout_peric0_lhm_axi_p_peric0_i_clk", "mout_peric0_bus_user",
+            CLK_CON_GAT_GOUT_BLK_PERIC0_UID_LHM_AXI_P_PERIC0_IPCLKPORT_I_CLK,
+            21, CLK_IS_CRITICAL, 0),
+       GATE(CLK_GOUT_PERIC0_PERIC0_TOP0_IPCLK_0,
+            "gout_peric0_peric0_top0_ipclk_0", "dout_peric0_usi1_usi",
+            CLK_CON_GAT_GOUT_BLK_PERIC0_UID_PERIC0_TOP0_IPCLKPORT_IPCLK_0,
+            21, 0, 0),
+       GATE(CLK_GOUT_PERIC0_PERIC0_TOP0_IPCLK_1,
+            "gout_peric0_peric0_top0_ipclk_1", "dout_peric0_usi2_usi",
+            CLK_CON_GAT_GOUT_BLK_PERIC0_UID_PERIC0_TOP0_IPCLKPORT_IPCLK_1,
+            21, 0, 0),
+       GATE(CLK_GOUT_PERIC0_PERIC0_TOP0_IPCLK_10,
+            "gout_peric0_peric0_top0_ipclk_10", "dout_peric0_i3c",
+            CLK_CON_GAT_GOUT_BLK_PERIC0_UID_PERIC0_TOP0_IPCLKPORT_IPCLK_10,
+            21, 0, 0),
+       GATE(CLK_GOUT_PERIC0_PERIC0_TOP0_IPCLK_11,
+            "gout_peric0_peric0_top0_ipclk_11", "dout_peric0_i3c",
+            CLK_CON_GAT_GOUT_BLK_PERIC0_UID_PERIC0_TOP0_IPCLKPORT_IPCLK_11,
+            21, 0, 0),
+       GATE(CLK_GOUT_PERIC0_PERIC0_TOP0_IPCLK_12,
+            "gout_peric0_peric0_top0_ipclk_12", "dout_peric0_i3c",
+            CLK_CON_GAT_GOUT_BLK_PERIC0_UID_PERIC0_TOP0_IPCLKPORT_IPCLK_12,
+            21, 0, 0),
+       GATE(CLK_GOUT_PERIC0_PERIC0_TOP0_IPCLK_13,
+            "gout_peric0_peric0_top0_ipclk_13", "dout_peric0_i3c",
+            CLK_CON_GAT_GOUT_BLK_PERIC0_UID_PERIC0_TOP0_IPCLKPORT_IPCLK_13,
+            21, 0, 0),
+       GATE(CLK_GOUT_PERIC0_PERIC0_TOP0_IPCLK_14,
+            "gout_peric0_peric0_top0_ipclk_14", "dout_peric0_i3c",
+            CLK_CON_GAT_GOUT_BLK_PERIC0_UID_PERIC0_TOP0_IPCLKPORT_IPCLK_14,
+            21, 0, 0),
+       GATE(CLK_GOUT_PERIC0_PERIC0_TOP0_IPCLK_15,
+            "gout_peric0_peric0_top0_ipclk_15", "dout_peric0_i3c",
+            CLK_CON_GAT_GOUT_BLK_PERIC0_UID_PERIC0_TOP0_IPCLKPORT_IPCLK_15,
+            21, 0, 0),
+       GATE(CLK_GOUT_PERIC0_PERIC0_TOP0_IPCLK_2,
+            "gout_peric0_peric0_top0_ipclk_2", "dout_peric0_usi3_usi",
+            CLK_CON_GAT_GOUT_BLK_PERIC0_UID_PERIC0_TOP0_IPCLKPORT_IPCLK_2,
+            21, 0, 0),
+       GATE(CLK_GOUT_PERIC0_PERIC0_TOP0_IPCLK_3,
+            "gout_peric0_peric0_top0_ipclk_3", "dout_peric0_usi4_usi",
+            CLK_CON_GAT_GOUT_BLK_PERIC0_UID_PERIC0_TOP0_IPCLKPORT_IPCLK_3,
+            21, 0, 0),
+       GATE(CLK_GOUT_PERIC0_PERIC0_TOP0_IPCLK_4,
+            "gout_peric0_peric0_top0_ipclk_4", "dout_peric0_usi5_usi",
+            CLK_CON_GAT_GOUT_BLK_PERIC0_UID_PERIC0_TOP0_IPCLKPORT_IPCLK_4,
+            21, 0, 0),
+       GATE(CLK_GOUT_PERIC0_PERIC0_TOP0_IPCLK_5,
+            "gout_peric0_peric0_top0_ipclk_5", "dout_peric0_usi6_usi",
+            CLK_CON_GAT_GOUT_BLK_PERIC0_UID_PERIC0_TOP0_IPCLKPORT_IPCLK_5,
+            21, 0, 0),
+       GATE(CLK_GOUT_PERIC0_PERIC0_TOP0_IPCLK_6,
+            "gout_peric0_peric0_top0_ipclk_6", "dout_peric0_usi7_usi",
+            CLK_CON_GAT_GOUT_BLK_PERIC0_UID_PERIC0_TOP0_IPCLKPORT_IPCLK_6,
+            21, 0, 0),
+       GATE(CLK_GOUT_PERIC0_PERIC0_TOP0_IPCLK_7,
+            "gout_peric0_peric0_top0_ipclk_7", "dout_peric0_usi8_usi",
+            CLK_CON_GAT_GOUT_BLK_PERIC0_UID_PERIC0_TOP0_IPCLKPORT_IPCLK_7,
+            21, 0, 0),
+       GATE(CLK_GOUT_PERIC0_PERIC0_TOP0_IPCLK_8,
+            "gout_peric0_peric0_top0_ipclk_8", "dout_peric0_i3c",
+            CLK_CON_GAT_GOUT_BLK_PERIC0_UID_PERIC0_TOP0_IPCLKPORT_IPCLK_8,
+            21, 0, 0),
+       GATE(CLK_GOUT_PERIC0_PERIC0_TOP0_IPCLK_9,
+            "gout_peric0_peric0_top0_ipclk_9", "dout_peric0_i3c",
+            CLK_CON_GAT_GOUT_BLK_PERIC0_UID_PERIC0_TOP0_IPCLKPORT_IPCLK_9,
+            21, 0, 0),
+       GATE(CLK_GOUT_PERIC0_PERIC0_TOP0_PCLK_0,
+            "gout_peric0_peric0_top0_pclk_0", "mout_peric0_bus_user",
+            CLK_CON_GAT_GOUT_BLK_PERIC0_UID_PERIC0_TOP0_IPCLKPORT_PCLK_0,
+            21, 0, 0),
+       GATE(CLK_GOUT_PERIC0_PERIC0_TOP0_PCLK_1,
+            "gout_peric0_peric0_top0_pclk_1", "mout_peric0_bus_user",
+            CLK_CON_GAT_GOUT_BLK_PERIC0_UID_PERIC0_TOP0_IPCLKPORT_PCLK_1,
+            21, 0, 0),
+       GATE(CLK_GOUT_PERIC0_PERIC0_TOP0_PCLK_10,
+            "gout_peric0_peric0_top0_pclk_10", "mout_peric0_bus_user",
+            CLK_CON_GAT_GOUT_BLK_PERIC0_UID_PERIC0_TOP0_IPCLKPORT_PCLK_10,
+            21, 0, 0),
+       GATE(CLK_GOUT_PERIC0_PERIC0_TOP0_PCLK_11,
+            "gout_peric0_peric0_top0_pclk_11", "mout_peric0_bus_user",
+            CLK_CON_GAT_GOUT_BLK_PERIC0_UID_PERIC0_TOP0_IPCLKPORT_PCLK_11,
+            21, 0, 0),
+       GATE(CLK_GOUT_PERIC0_PERIC0_TOP0_PCLK_12,
+            "gout_peric0_peric0_top0_pclk_12", "mout_peric0_bus_user",
+            CLK_CON_GAT_GOUT_BLK_PERIC0_UID_PERIC0_TOP0_IPCLKPORT_PCLK_12,
+            21, 0, 0),
+       GATE(CLK_GOUT_PERIC0_PERIC0_TOP0_PCLK_13,
+            "gout_peric0_peric0_top0_pclk_13", "mout_peric0_bus_user",
+            CLK_CON_GAT_GOUT_BLK_PERIC0_UID_PERIC0_TOP0_IPCLKPORT_PCLK_13,
+            21, 0, 0),
+       GATE(CLK_GOUT_PERIC0_PERIC0_TOP0_PCLK_14,
+            "gout_peric0_peric0_top0_pclk_14", "mout_peric0_bus_user",
+            CLK_CON_GAT_GOUT_BLK_PERIC0_UID_PERIC0_TOP0_IPCLKPORT_PCLK_14,
+            21, 0, 0),
+       GATE(CLK_GOUT_PERIC0_PERIC0_TOP0_PCLK_15,
+            "gout_peric0_peric0_top0_pclk_15", "mout_peric0_bus_user",
+            CLK_CON_GAT_GOUT_BLK_PERIC0_UID_PERIC0_TOP0_IPCLKPORT_PCLK_15,
+            21, 0, 0),
+       GATE(CLK_GOUT_PERIC0_PERIC0_TOP0_PCLK_2,
+            "gout_peric0_peric0_top0_pclk_2", "mout_peric0_bus_user",
+            CLK_CON_GAT_GOUT_BLK_PERIC0_UID_PERIC0_TOP0_IPCLKPORT_PCLK_2,
+            21, 0, 0),
+       GATE(CLK_GOUT_PERIC0_PERIC0_TOP0_PCLK_3,
+            "gout_peric0_peric0_top0_pclk_3", "mout_peric0_bus_user",
+            CLK_CON_GAT_GOUT_BLK_PERIC0_UID_PERIC0_TOP0_IPCLKPORT_PCLK_3,
+            21, 0, 0),
+       GATE(CLK_GOUT_PERIC0_PERIC0_TOP0_PCLK_4,
+            "gout_peric0_peric0_top0_pclk_4", "mout_peric0_bus_user",
+            CLK_CON_GAT_GOUT_BLK_PERIC0_UID_PERIC0_TOP0_IPCLKPORT_PCLK_4,
+            21, 0, 0),
+       GATE(CLK_GOUT_PERIC0_PERIC0_TOP0_PCLK_5,
+            "gout_peric0_peric0_top0_pclk_5", "mout_peric0_bus_user",
+            CLK_CON_GAT_GOUT_BLK_PERIC0_UID_PERIC0_TOP0_IPCLKPORT_PCLK_5,
+            21, 0, 0),
+       GATE(CLK_GOUT_PERIC0_PERIC0_TOP0_PCLK_6,
+            "gout_peric0_peric0_top0_pclk_6", "mout_peric0_bus_user",
+            CLK_CON_GAT_GOUT_BLK_PERIC0_UID_PERIC0_TOP0_IPCLKPORT_PCLK_6,
+            21, 0, 0),
+       GATE(CLK_GOUT_PERIC0_PERIC0_TOP0_PCLK_7,
+            "gout_peric0_peric0_top0_pclk_7", "mout_peric0_bus_user",
+            CLK_CON_GAT_GOUT_BLK_PERIC0_UID_PERIC0_TOP0_IPCLKPORT_PCLK_7,
+            21, 0, 0),
+       GATE(CLK_GOUT_PERIC0_PERIC0_TOP0_PCLK_8,
+            "gout_peric0_peric0_top0_pclk_8", "mout_peric0_bus_user",
+            CLK_CON_GAT_GOUT_BLK_PERIC0_UID_PERIC0_TOP0_IPCLKPORT_PCLK_8,
+            21, 0, 0),
+       GATE(CLK_GOUT_PERIC0_PERIC0_TOP0_PCLK_9,
+            "gout_peric0_peric0_top0_pclk_9", "mout_peric0_bus_user",
+            CLK_CON_GAT_GOUT_BLK_PERIC0_UID_PERIC0_TOP0_IPCLKPORT_PCLK_9,
+            21, 0, 0),
+       /* Disabling this clock makes the system hang. Mark the clock as critical. */
+       GATE(CLK_GOUT_PERIC0_PERIC0_TOP1_IPCLK_0,
+            "gout_peric0_peric0_top1_ipclk_0", "dout_peric0_usi0_uart",
+            CLK_CON_GAT_GOUT_BLK_PERIC0_UID_PERIC0_TOP1_IPCLKPORT_IPCLK_0,
+            21, CLK_IS_CRITICAL, 0),
+       GATE(CLK_GOUT_PERIC0_PERIC0_TOP1_IPCLK_2,
+            "gout_peric0_peric0_top1_ipclk_2", "dout_peric0_usi14_usi",
+            CLK_CON_GAT_GOUT_BLK_PERIC0_UID_PERIC0_TOP1_IPCLKPORT_IPCLK_2,
+            21, 0, 0),
+       /* Disabling this clock makes the system hang. Mark the clock as critical. */
+       GATE(CLK_GOUT_PERIC0_PERIC0_TOP1_PCLK_0,
+            "gout_peric0_peric0_top1_pclk_0", "mout_peric0_bus_user",
+            CLK_CON_GAT_GOUT_BLK_PERIC0_UID_PERIC0_TOP1_IPCLKPORT_PCLK_0,
+            21, CLK_IS_CRITICAL, 0),
+       GATE(CLK_GOUT_PERIC0_PERIC0_TOP1_PCLK_2,
+            "gout_peric0_peric0_top1_pclk_2", "mout_peric0_bus_user",
+            CLK_CON_GAT_GOUT_BLK_PERIC0_UID_PERIC0_TOP1_IPCLKPORT_PCLK_2,
+            21, 0, 0),
+       GATE(CLK_GOUT_PERIC0_CLK_PERIC0_BUSP_CLK,
+            "gout_peric0_clk_peric0_busp_clk", "mout_peric0_bus_user",
+            CLK_CON_GAT_GOUT_BLK_PERIC0_UID_RSTNSYNC_CLK_PERIC0_BUSP_IPCLKPORT_CLK,
+            21, 0, 0),
+       GATE(CLK_GOUT_PERIC0_CLK_PERIC0_I3C_CLK,
+            "gout_peric0_clk_peric0_i3c_clk", "dout_peric0_i3c",
+            CLK_CON_GAT_GOUT_BLK_PERIC0_UID_RSTNSYNC_CLK_PERIC0_I3C_IPCLKPORT_CLK,
+            21, 0, 0),
+       GATE(CLK_GOUT_PERIC0_CLK_PERIC0_USI0_UART_CLK,
+            "gout_peric0_clk_peric0_usi0_uart_clk", "dout_peric0_usi0_uart",
+            CLK_CON_GAT_GOUT_BLK_PERIC0_UID_RSTNSYNC_CLK_PERIC0_USI0_UART_IPCLKPORT_CLK,
+            21, 0, 0),
+       GATE(CLK_GOUT_PERIC0_CLK_PERIC0_USI14_USI_CLK,
+            "gout_peric0_clk_peric0_usi14_usi_clk", "dout_peric0_usi14_usi",
+            CLK_CON_GAT_GOUT_BLK_PERIC0_UID_RSTNSYNC_CLK_PERIC0_USI14_USI_IPCLKPORT_CLK,
+            21, 0, 0),
+       GATE(CLK_GOUT_PERIC0_CLK_PERIC0_USI1_USI_CLK,
+            "gout_peric0_clk_peric0_usi1_usi_clk", "dout_peric0_usi1_usi",
+            CLK_CON_GAT_GOUT_BLK_PERIC0_UID_RSTNSYNC_CLK_PERIC0_USI1_USI_IPCLKPORT_CLK,
+            21, 0, 0),
+       GATE(CLK_GOUT_PERIC0_CLK_PERIC0_USI2_USI_CLK,
+            "gout_peric0_clk_peric0_usi2_usi_clk", "dout_peric0_usi2_usi",
+            CLK_CON_GAT_GOUT_BLK_PERIC0_UID_RSTNSYNC_CLK_PERIC0_USI2_USI_IPCLKPORT_CLK,
+            21, 0, 0),
+       GATE(CLK_GOUT_PERIC0_CLK_PERIC0_USI3_USI_CLK,
+            "gout_peric0_clk_peric0_usi3_usi_clk", "dout_peric0_usi3_usi",
+            CLK_CON_GAT_GOUT_BLK_PERIC0_UID_RSTNSYNC_CLK_PERIC0_USI3_USI_IPCLKPORT_CLK,
+            21, 0, 0),
+       GATE(CLK_GOUT_PERIC0_CLK_PERIC0_USI4_USI_CLK,
+            "gout_peric0_clk_peric0_usi4_usi_clk", "dout_peric0_usi4_usi",
+            CLK_CON_GAT_GOUT_BLK_PERIC0_UID_RSTNSYNC_CLK_PERIC0_USI4_USI_IPCLKPORT_CLK,
+            21, 0, 0),
+       GATE(CLK_GOUT_PERIC0_CLK_PERIC0_USI5_USI_CLK,
+            "gout_peric0_clk_peric0_usi5_usi_clk", "dout_peric0_usi5_usi",
+            CLK_CON_GAT_GOUT_BLK_PERIC0_UID_RSTNSYNC_CLK_PERIC0_USI5_USI_IPCLKPORT_CLK,
+            21, 0, 0),
+       GATE(CLK_GOUT_PERIC0_CLK_PERIC0_USI6_USI_CLK,
+            "gout_peric0_clk_peric0_usi6_usi_clk", "dout_peric0_usi6_usi",
+            CLK_CON_GAT_GOUT_BLK_PERIC0_UID_RSTNSYNC_CLK_PERIC0_USI6_USI_IPCLKPORT_CLK,
+            21, 0, 0),
+       GATE(CLK_GOUT_PERIC0_CLK_PERIC0_USI7_USI_CLK,
+            "gout_peric0_clk_peric0_usi7_usi_clk", "dout_peric0_usi7_usi",
+            CLK_CON_GAT_GOUT_BLK_PERIC0_UID_RSTNSYNC_CLK_PERIC0_USI7_USI_IPCLKPORT_CLK,
+            21, 0, 0),
+       GATE(CLK_GOUT_PERIC0_CLK_PERIC0_USI8_USI_CLK,
+            "gout_peric0_clk_peric0_usi8_usi_clk", "dout_peric0_usi8_usi",
+            CLK_CON_GAT_GOUT_BLK_PERIC0_UID_RSTNSYNC_CLK_PERIC0_USI8_USI_IPCLKPORT_CLK,
+            21, 0, 0),
+       GATE(CLK_GOUT_PERIC0_SYSREG_PERIC0_PCLK,
+            "gout_peric0_sysreg_peric0_pclk", "mout_peric0_bus_user",
+            CLK_CON_GAT_GOUT_BLK_PERIC0_UID_SYSREG_PERIC0_IPCLKPORT_PCLK,
+            21, 0, 0),
+};
+
+static const struct samsung_cmu_info peric0_cmu_info __initconst = {
+       .mux_clks               = peric0_mux_clks,
+       .nr_mux_clks            = ARRAY_SIZE(peric0_mux_clks),
+       .div_clks               = peric0_div_clks,
+       .nr_div_clks            = ARRAY_SIZE(peric0_div_clks),
+       .gate_clks              = peric0_gate_clks,
+       .nr_gate_clks           = ARRAY_SIZE(peric0_gate_clks),
+       .nr_clk_ids             = CLKS_NR_PERIC0,
+       .clk_regs               = peric0_clk_regs,
+       .nr_clk_regs            = ARRAY_SIZE(peric0_clk_regs),
+       .clk_name               = "bus",
+};
+
+/* ---- CMU_PERIC1 ---------------------------------------------------------- */
+
+/* Register Offset definitions for CMU_PERIC1 (0x10c00000) */
+#define PLL_CON0_MUX_CLKCMU_PERIC1_BUS_USER                                            0x0600
+#define PLL_CON1_MUX_CLKCMU_PERIC1_BUS_USER                                            0x0604
+#define PLL_CON0_MUX_CLKCMU_PERIC1_I3C_USER                                            0x0610
+#define PLL_CON1_MUX_CLKCMU_PERIC1_I3C_USER                                            0x0614
+#define PLL_CON0_MUX_CLKCMU_PERIC1_USI0_USI_USER                                       0x0620
+#define PLL_CON1_MUX_CLKCMU_PERIC1_USI0_USI_USER                                       0x0624
+#define PLL_CON0_MUX_CLKCMU_PERIC1_USI10_USI_USER                                      0x0630
+#define PLL_CON1_MUX_CLKCMU_PERIC1_USI10_USI_USER                                      0x0634
+#define PLL_CON0_MUX_CLKCMU_PERIC1_USI11_USI_USER                                      0x0640
+#define PLL_CON1_MUX_CLKCMU_PERIC1_USI11_USI_USER                                      0x0644
+#define PLL_CON0_MUX_CLKCMU_PERIC1_USI12_USI_USER                                      0x0650
+#define PLL_CON1_MUX_CLKCMU_PERIC1_USI12_USI_USER                                      0x0654
+#define PLL_CON0_MUX_CLKCMU_PERIC1_USI13_USI_USER                                      0x0660
+#define PLL_CON1_MUX_CLKCMU_PERIC1_USI13_USI_USER                                      0x0664
+#define PLL_CON0_MUX_CLKCMU_PERIC1_USI9_USI_USER                                       0x0670
+#define PLL_CON1_MUX_CLKCMU_PERIC1_USI9_USI_USER                                       0x0674
+#define PERIC1_CMU_PERIC1_CONTROLLER_OPTION                                            0x0800
+#define CLKOUT_CON_BLK_PERIC1_CMU_PERIC1_CLKOUT0                                       0x0810
+#define CLK_CON_DIV_DIV_CLK_PERIC1_I3C                                                 0x1800
+#define CLK_CON_DIV_DIV_CLK_PERIC1_USI0_USI                                            0x1804
+#define CLK_CON_DIV_DIV_CLK_PERIC1_USI10_USI                                           0x1808
+#define CLK_CON_DIV_DIV_CLK_PERIC1_USI11_USI                                           0x180c
+#define CLK_CON_DIV_DIV_CLK_PERIC1_USI12_USI                                           0x1810
+#define CLK_CON_DIV_DIV_CLK_PERIC1_USI13_USI                                           0x1814
+#define CLK_CON_DIV_DIV_CLK_PERIC1_USI9_USI                                            0x1818
+#define CLK_CON_BUF_CLKBUF_PERIC1_IP                                                   0x2000
+#define CLK_CON_GAT_CLK_BLK_PERIC1_UID_PERIC1_CMU_PERIC1_IPCLKPORT_PCLK                        0x2004
+#define CLK_CON_GAT_CLK_BLK_PERIC1_UID_RSTNSYNC_CLK_PERIC1_I3C_IPCLKPORT_CLK           0x2008
+#define CLK_CON_GAT_CLK_BLK_PERIC1_UID_RSTNSYNC_CLK_PERIC1_OSCCLK_IPCLKPORT_CLK                0x200c
+#define CLK_CON_GAT_GOUT_BLK_PERIC1_UID_D_TZPC_PERIC1_IPCLKPORT_PCLK                   0x2010
+#define CLK_CON_GAT_GOUT_BLK_PERIC1_UID_GPC_PERIC1_IPCLKPORT_PCLK                      0x2014
+#define CLK_CON_GAT_GOUT_BLK_PERIC1_UID_GPIO_PERIC1_IPCLKPORT_PCLK                     0x2018
+#define CLK_CON_GAT_GOUT_BLK_PERIC1_UID_LHM_AXI_P_PERIC1_IPCLKPORT_I_CLK               0x201c
+#define CLK_CON_GAT_GOUT_BLK_PERIC1_UID_PERIC1_TOP0_IPCLKPORT_IPCLK_1                  0x2020
+#define CLK_CON_GAT_GOUT_BLK_PERIC1_UID_PERIC1_TOP0_IPCLKPORT_IPCLK_2                  0x2024
+#define CLK_CON_GAT_GOUT_BLK_PERIC1_UID_PERIC1_TOP0_IPCLKPORT_IPCLK_3                  0x2028
+#define CLK_CON_GAT_GOUT_BLK_PERIC1_UID_PERIC1_TOP0_IPCLKPORT_IPCLK_4                  0x202c
+#define CLK_CON_GAT_GOUT_BLK_PERIC1_UID_PERIC1_TOP0_IPCLKPORT_IPCLK_5                  0x2030
+#define CLK_CON_GAT_GOUT_BLK_PERIC1_UID_PERIC1_TOP0_IPCLKPORT_IPCLK_6                  0x2034
+#define CLK_CON_GAT_GOUT_BLK_PERIC1_UID_PERIC1_TOP0_IPCLKPORT_IPCLK_8                  0x2038
+#define CLK_CON_GAT_GOUT_BLK_PERIC1_UID_PERIC1_TOP0_IPCLKPORT_PCLK_1                   0x203c
+#define CLK_CON_GAT_GOUT_BLK_PERIC1_UID_PERIC1_TOP0_IPCLKPORT_PCLK_15                  0x2040
+#define CLK_CON_GAT_GOUT_BLK_PERIC1_UID_PERIC1_TOP0_IPCLKPORT_PCLK_2                   0x2044
+#define CLK_CON_GAT_GOUT_BLK_PERIC1_UID_PERIC1_TOP0_IPCLKPORT_PCLK_3                   0x2048
+#define CLK_CON_GAT_GOUT_BLK_PERIC1_UID_PERIC1_TOP0_IPCLKPORT_PCLK_4                   0x204c
+#define CLK_CON_GAT_GOUT_BLK_PERIC1_UID_PERIC1_TOP0_IPCLKPORT_PCLK_5                   0x2050
+#define CLK_CON_GAT_GOUT_BLK_PERIC1_UID_PERIC1_TOP0_IPCLKPORT_PCLK_6                   0x2054
+#define CLK_CON_GAT_GOUT_BLK_PERIC1_UID_PERIC1_TOP0_IPCLKPORT_PCLK_8                   0x2058
+#define CLK_CON_GAT_GOUT_BLK_PERIC1_UID_RSTNSYNC_CLK_PERIC1_BUSP_IPCLKPORT_CLK         0x205c
+#define CLK_CON_GAT_GOUT_BLK_PERIC1_UID_RSTNSYNC_CLK_PERIC1_USI0_USI_IPCLKPORT_CLK     0x2060
+#define CLK_CON_GAT_GOUT_BLK_PERIC1_UID_RSTNSYNC_CLK_PERIC1_USI10_USI_IPCLKPORT_CLK    0x2064
+#define CLK_CON_GAT_GOUT_BLK_PERIC1_UID_RSTNSYNC_CLK_PERIC1_USI11_USI_IPCLKPORT_CLK    0x2068
+#define CLK_CON_GAT_GOUT_BLK_PERIC1_UID_RSTNSYNC_CLK_PERIC1_USI12_USI_IPCLKPORT_CLK    0x206c
+#define CLK_CON_GAT_GOUT_BLK_PERIC1_UID_RSTNSYNC_CLK_PERIC1_USI13_USI_IPCLKPORT_CLK    0x2070
+#define CLK_CON_GAT_GOUT_BLK_PERIC1_UID_RSTNSYNC_CLK_PERIC1_USI9_USI_IPCLKPORT_CLK     0x2074
+#define CLK_CON_GAT_GOUT_BLK_PERIC1_UID_SYSREG_PERIC1_IPCLKPORT_PCLK                   0x2078
+#define DMYQCH_CON_PERIC1_TOP0_QCH_S                                                   0x3000
+#define PCH_CON_LHM_AXI_P_PERIC1_PCH                                                   0x3004
+#define QCH_CON_D_TZPC_PERIC1_QCH                                                      0x3008
+#define QCH_CON_GPC_PERIC1_QCH                                                         0x300c
+#define QCH_CON_GPIO_PERIC1_QCH                                                                0x3010
+#define QCH_CON_LHM_AXI_P_PERIC1_QCH                                                   0x3014
+#define QCH_CON_PERIC1_CMU_PERIC1_QCH                                                  0x3018
+#define QCH_CON_PERIC1_TOP0_QCH_I3C0                                                   0x301c
+#define QCH_CON_PERIC1_TOP0_QCH_PWM                                                    0x3020
+#define QCH_CON_PERIC1_TOP0_QCH_USI0_USI                                               0x3024
+#define QCH_CON_PERIC1_TOP0_QCH_USI10_USI                                              0x3028
+#define QCH_CON_PERIC1_TOP0_QCH_USI11_USI                                              0x302c
+#define QCH_CON_PERIC1_TOP0_QCH_USI12_USI                                              0x3030
+#define QCH_CON_PERIC1_TOP0_QCH_USI13_USI                                              0x3034
+#define QCH_CON_PERIC1_TOP0_QCH_USI9_USI                                               0x3038
+#define QCH_CON_SYSREG_PERIC1_QCH                                                      0x303c
+#define QUEUE_CTRL_REG_BLK_PERIC1_CMU_PERIC1                                           0x3c00
+
+static const unsigned long peric1_clk_regs[] __initconst = {
+       PLL_CON0_MUX_CLKCMU_PERIC1_BUS_USER,
+       PLL_CON1_MUX_CLKCMU_PERIC1_BUS_USER,
+       PLL_CON0_MUX_CLKCMU_PERIC1_I3C_USER,
+       PLL_CON1_MUX_CLKCMU_PERIC1_I3C_USER,
+       PLL_CON0_MUX_CLKCMU_PERIC1_USI0_USI_USER,
+       PLL_CON1_MUX_CLKCMU_PERIC1_USI0_USI_USER,
+       PLL_CON0_MUX_CLKCMU_PERIC1_USI10_USI_USER,
+       PLL_CON1_MUX_CLKCMU_PERIC1_USI10_USI_USER,
+       PLL_CON0_MUX_CLKCMU_PERIC1_USI11_USI_USER,
+       PLL_CON1_MUX_CLKCMU_PERIC1_USI11_USI_USER,
+       PLL_CON0_MUX_CLKCMU_PERIC1_USI12_USI_USER,
+       PLL_CON1_MUX_CLKCMU_PERIC1_USI12_USI_USER,
+       PLL_CON0_MUX_CLKCMU_PERIC1_USI13_USI_USER,
+       PLL_CON1_MUX_CLKCMU_PERIC1_USI13_USI_USER,
+       PLL_CON0_MUX_CLKCMU_PERIC1_USI9_USI_USER,
+       PLL_CON1_MUX_CLKCMU_PERIC1_USI9_USI_USER,
+       PERIC1_CMU_PERIC1_CONTROLLER_OPTION,
+       CLKOUT_CON_BLK_PERIC1_CMU_PERIC1_CLKOUT0,
+       CLK_CON_DIV_DIV_CLK_PERIC1_I3C,
+       CLK_CON_DIV_DIV_CLK_PERIC1_USI0_USI,
+       CLK_CON_DIV_DIV_CLK_PERIC1_USI10_USI,
+       CLK_CON_DIV_DIV_CLK_PERIC1_USI11_USI,
+       CLK_CON_DIV_DIV_CLK_PERIC1_USI12_USI,
+       CLK_CON_DIV_DIV_CLK_PERIC1_USI13_USI,
+       CLK_CON_DIV_DIV_CLK_PERIC1_USI9_USI,
+       CLK_CON_BUF_CLKBUF_PERIC1_IP,
+       CLK_CON_GAT_CLK_BLK_PERIC1_UID_PERIC1_CMU_PERIC1_IPCLKPORT_PCLK,
+       CLK_CON_GAT_CLK_BLK_PERIC1_UID_RSTNSYNC_CLK_PERIC1_I3C_IPCLKPORT_CLK,
+       CLK_CON_GAT_CLK_BLK_PERIC1_UID_RSTNSYNC_CLK_PERIC1_OSCCLK_IPCLKPORT_CLK,
+       CLK_CON_GAT_GOUT_BLK_PERIC1_UID_D_TZPC_PERIC1_IPCLKPORT_PCLK,
+       CLK_CON_GAT_GOUT_BLK_PERIC1_UID_GPC_PERIC1_IPCLKPORT_PCLK,
+       CLK_CON_GAT_GOUT_BLK_PERIC1_UID_GPIO_PERIC1_IPCLKPORT_PCLK,
+       CLK_CON_GAT_GOUT_BLK_PERIC1_UID_LHM_AXI_P_PERIC1_IPCLKPORT_I_CLK,
+       CLK_CON_GAT_GOUT_BLK_PERIC1_UID_PERIC1_TOP0_IPCLKPORT_IPCLK_1,
+       CLK_CON_GAT_GOUT_BLK_PERIC1_UID_PERIC1_TOP0_IPCLKPORT_IPCLK_2,
+       CLK_CON_GAT_GOUT_BLK_PERIC1_UID_PERIC1_TOP0_IPCLKPORT_IPCLK_3,
+       CLK_CON_GAT_GOUT_BLK_PERIC1_UID_PERIC1_TOP0_IPCLKPORT_IPCLK_4,
+       CLK_CON_GAT_GOUT_BLK_PERIC1_UID_PERIC1_TOP0_IPCLKPORT_IPCLK_5,
+       CLK_CON_GAT_GOUT_BLK_PERIC1_UID_PERIC1_TOP0_IPCLKPORT_IPCLK_6,
+       CLK_CON_GAT_GOUT_BLK_PERIC1_UID_PERIC1_TOP0_IPCLKPORT_IPCLK_8,
+       CLK_CON_GAT_GOUT_BLK_PERIC1_UID_PERIC1_TOP0_IPCLKPORT_PCLK_1,
+       CLK_CON_GAT_GOUT_BLK_PERIC1_UID_PERIC1_TOP0_IPCLKPORT_PCLK_15,
+       CLK_CON_GAT_GOUT_BLK_PERIC1_UID_PERIC1_TOP0_IPCLKPORT_PCLK_2,
+       CLK_CON_GAT_GOUT_BLK_PERIC1_UID_PERIC1_TOP0_IPCLKPORT_PCLK_3,
+       CLK_CON_GAT_GOUT_BLK_PERIC1_UID_PERIC1_TOP0_IPCLKPORT_PCLK_4,
+       CLK_CON_GAT_GOUT_BLK_PERIC1_UID_PERIC1_TOP0_IPCLKPORT_PCLK_5,
+       CLK_CON_GAT_GOUT_BLK_PERIC1_UID_PERIC1_TOP0_IPCLKPORT_PCLK_6,
+       CLK_CON_GAT_GOUT_BLK_PERIC1_UID_PERIC1_TOP0_IPCLKPORT_PCLK_8,
+       CLK_CON_GAT_GOUT_BLK_PERIC1_UID_RSTNSYNC_CLK_PERIC1_BUSP_IPCLKPORT_CLK,
+       CLK_CON_GAT_GOUT_BLK_PERIC1_UID_RSTNSYNC_CLK_PERIC1_USI0_USI_IPCLKPORT_CLK,
+       CLK_CON_GAT_GOUT_BLK_PERIC1_UID_RSTNSYNC_CLK_PERIC1_USI10_USI_IPCLKPORT_CLK,
+       CLK_CON_GAT_GOUT_BLK_PERIC1_UID_RSTNSYNC_CLK_PERIC1_USI11_USI_IPCLKPORT_CLK,
+       CLK_CON_GAT_GOUT_BLK_PERIC1_UID_RSTNSYNC_CLK_PERIC1_USI12_USI_IPCLKPORT_CLK,
+       CLK_CON_GAT_GOUT_BLK_PERIC1_UID_RSTNSYNC_CLK_PERIC1_USI13_USI_IPCLKPORT_CLK,
+       CLK_CON_GAT_GOUT_BLK_PERIC1_UID_RSTNSYNC_CLK_PERIC1_USI9_USI_IPCLKPORT_CLK,
+       CLK_CON_GAT_GOUT_BLK_PERIC1_UID_SYSREG_PERIC1_IPCLKPORT_PCLK,
+       DMYQCH_CON_PERIC1_TOP0_QCH_S,
+       PCH_CON_LHM_AXI_P_PERIC1_PCH,
+       QCH_CON_D_TZPC_PERIC1_QCH,
+       QCH_CON_GPC_PERIC1_QCH,
+       QCH_CON_GPIO_PERIC1_QCH,
+       QCH_CON_LHM_AXI_P_PERIC1_QCH,
+       QCH_CON_PERIC1_CMU_PERIC1_QCH,
+       QCH_CON_PERIC1_TOP0_QCH_I3C0,
+       QCH_CON_PERIC1_TOP0_QCH_PWM,
+       QCH_CON_PERIC1_TOP0_QCH_USI0_USI,
+       QCH_CON_PERIC1_TOP0_QCH_USI10_USI,
+       QCH_CON_PERIC1_TOP0_QCH_USI11_USI,
+       QCH_CON_PERIC1_TOP0_QCH_USI12_USI,
+       QCH_CON_PERIC1_TOP0_QCH_USI13_USI,
+       QCH_CON_PERIC1_TOP0_QCH_USI9_USI,
+       QCH_CON_SYSREG_PERIC1_QCH,
+       QUEUE_CTRL_REG_BLK_PERIC1_CMU_PERIC1,
+};
+
+/* List of parent clocks for Muxes in CMU_PERIC1 */
+PNAME(mout_peric1_bus_user_p)          = { "oscclk", "dout_cmu_peric1_bus" };
+PNAME(mout_peric1_nonbususer_p)                = { "oscclk", "dout_cmu_peric1_ip" };
+
+static const struct samsung_mux_clock peric1_mux_clks[] __initconst = {
+       MUX(CLK_MOUT_PERIC1_BUS_USER, "mout_peric1_bus_user",
+           mout_peric1_bus_user_p, PLL_CON0_MUX_CLKCMU_PERIC1_BUS_USER, 4, 1),
+       MUX(CLK_MOUT_PERIC1_I3C_USER,
+           "mout_peric1_i3c_user", mout_peric1_nonbususer_p,
+           PLL_CON0_MUX_CLKCMU_PERIC1_I3C_USER, 4, 1),
+       MUX(CLK_MOUT_PERIC1_USI0_USI_USER,
+           "mout_peric1_usi0_usi_user", mout_peric1_nonbususer_p,
+           PLL_CON0_MUX_CLKCMU_PERIC1_USI0_USI_USER, 4, 1),
+       MUX(CLK_MOUT_PERIC1_USI10_USI_USER,
+           "mout_peric1_usi10_usi_user", mout_peric1_nonbususer_p,
+           PLL_CON0_MUX_CLKCMU_PERIC1_USI10_USI_USER, 4, 1),
+       MUX(CLK_MOUT_PERIC1_USI11_USI_USER,
+           "mout_peric1_usi11_usi_user", mout_peric1_nonbususer_p,
+           PLL_CON0_MUX_CLKCMU_PERIC1_USI11_USI_USER, 4, 1),
+       MUX(CLK_MOUT_PERIC1_USI12_USI_USER,
+           "mout_peric1_usi12_usi_user", mout_peric1_nonbususer_p,
+           PLL_CON0_MUX_CLKCMU_PERIC1_USI12_USI_USER, 4, 1),
+       MUX(CLK_MOUT_PERIC1_USI13_USI_USER,
+           "mout_peric1_usi13_usi_user", mout_peric1_nonbususer_p,
+           PLL_CON0_MUX_CLKCMU_PERIC1_USI13_USI_USER, 4, 1),
+       MUX(CLK_MOUT_PERIC1_USI9_USI_USER,
+           "mout_peric1_usi9_usi_user", mout_peric1_nonbususer_p,
+           PLL_CON0_MUX_CLKCMU_PERIC1_USI9_USI_USER, 4, 1),
+};
+
+static const struct samsung_div_clock peric1_div_clks[] __initconst = {
+       DIV(CLK_DOUT_PERIC1_I3C, "dout_peric1_i3c", "mout_peric1_i3c_user",
+           CLK_CON_DIV_DIV_CLK_PERIC1_I3C, 0, 4),
+       DIV(CLK_DOUT_PERIC1_USI0_USI,
+           "dout_peric1_usi0_usi", "mout_peric1_usi0_usi_user",
+           CLK_CON_DIV_DIV_CLK_PERIC1_USI0_USI, 0, 4),
+       DIV(CLK_DOUT_PERIC1_USI10_USI,
+           "dout_peric1_usi10_usi", "mout_peric1_usi10_usi_user",
+           CLK_CON_DIV_DIV_CLK_PERIC1_USI10_USI, 0, 4),
+       DIV(CLK_DOUT_PERIC1_USI11_USI,
+           "dout_peric1_usi11_usi", "mout_peric1_usi11_usi_user",
+           CLK_CON_DIV_DIV_CLK_PERIC1_USI11_USI, 0, 4),
+       DIV(CLK_DOUT_PERIC1_USI12_USI,
+           "dout_peric1_usi12_usi", "mout_peric1_usi12_usi_user",
+           CLK_CON_DIV_DIV_CLK_PERIC1_USI12_USI, 0, 4),
+       DIV(CLK_DOUT_PERIC1_USI13_USI,
+           "dout_peric1_usi13_usi", "mout_peric1_usi13_usi_user",
+           CLK_CON_DIV_DIV_CLK_PERIC1_USI13_USI, 0, 4),
+       DIV(CLK_DOUT_PERIC1_USI9_USI,
+           "dout_peric1_usi9_usi", "mout_peric1_usi9_usi_user",
+           CLK_CON_DIV_DIV_CLK_PERIC1_USI9_USI, 0, 4),
+};
+
+static const struct samsung_gate_clock peric1_gate_clks[] __initconst = {
+       GATE(CLK_GOUT_PERIC1_PCLK,
+            "gout_peric1_peric1_pclk", "mout_peric1_bus_user",
+            CLK_CON_GAT_CLK_BLK_PERIC1_UID_PERIC1_CMU_PERIC1_IPCLKPORT_PCLK,
+            21, CLK_IS_CRITICAL, 0),
+       GATE(CLK_GOUT_PERIC1_CLK_PERIC1_I3C_CLK,
+            "gout_peric1_clk_peric1_i3c_clk", "dout_peric1_i3c",
+            CLK_CON_GAT_CLK_BLK_PERIC1_UID_RSTNSYNC_CLK_PERIC1_I3C_IPCLKPORT_CLK,
+            21, 0, 0),
+       GATE(CLK_GOUT_PERIC1_CLK_PERIC1_OSCCLK_CLK,
+            "gout_peric1_clk_peric1_oscclk_clk", "oscclk",
+            CLK_CON_GAT_CLK_BLK_PERIC1_UID_RSTNSYNC_CLK_PERIC1_OSCCLK_IPCLKPORT_CLK,
+            21, 0, 0),
+       GATE(CLK_GOUT_PERIC1_D_TZPC_PERIC1_PCLK,
+            "gout_peric1_d_tzpc_peric1_pclk", "mout_peric1_bus_user",
+            CLK_CON_GAT_GOUT_BLK_PERIC1_UID_D_TZPC_PERIC1_IPCLKPORT_PCLK,
+            21, 0, 0),
+       GATE(CLK_GOUT_PERIC1_GPC_PERIC1_PCLK,
+            "gout_peric1_gpc_peric1_pclk", "mout_peric1_bus_user",
+            CLK_CON_GAT_GOUT_BLK_PERIC1_UID_GPC_PERIC1_IPCLKPORT_PCLK,
+            21, 0, 0),
+       GATE(CLK_GOUT_PERIC1_GPIO_PERIC1_PCLK,
+            "gout_peric1_gpio_peric1_pclk", "mout_peric1_bus_user",
+            CLK_CON_GAT_GOUT_BLK_PERIC1_UID_GPIO_PERIC1_IPCLKPORT_PCLK,
+            21, CLK_IGNORE_UNUSED, 0),
+       GATE(CLK_GOUT_PERIC1_LHM_AXI_P_PERIC1_I_CLK,
+            "gout_peric1_lhm_axi_p_peric1_i_clk", "mout_peric1_bus_user",
+            CLK_CON_GAT_GOUT_BLK_PERIC1_UID_LHM_AXI_P_PERIC1_IPCLKPORT_I_CLK,
+            21, CLK_IS_CRITICAL, 0),
+       GATE(CLK_GOUT_PERIC1_PERIC1_TOP0_IPCLK_1,
+            "gout_peric1_peric1_top0_ipclk_1", "dout_peric1_usi0_usi",
+            CLK_CON_GAT_GOUT_BLK_PERIC1_UID_PERIC1_TOP0_IPCLKPORT_IPCLK_1,
+            21, 0, 0),
+       GATE(CLK_GOUT_PERIC1_PERIC1_TOP0_IPCLK_2,
+            "gout_peric1_peric1_top0_ipclk_2", "dout_peric1_usi9_usi",
+            CLK_CON_GAT_GOUT_BLK_PERIC1_UID_PERIC1_TOP0_IPCLKPORT_IPCLK_2,
+            21, 0, 0),
+       GATE(CLK_GOUT_PERIC1_PERIC1_TOP0_IPCLK_3,
+            "gout_peric1_peric1_top0_ipclk_3", "dout_peric1_usi10_usi",
+            CLK_CON_GAT_GOUT_BLK_PERIC1_UID_PERIC1_TOP0_IPCLKPORT_IPCLK_3,
+            21, 0, 0),
+       GATE(CLK_GOUT_PERIC1_PERIC1_TOP0_IPCLK_4,
+            "gout_peric1_peric1_top0_ipclk_4", "dout_peric1_usi11_usi",
+            CLK_CON_GAT_GOUT_BLK_PERIC1_UID_PERIC1_TOP0_IPCLKPORT_IPCLK_4,
+            21, 0, 0),
+       GATE(CLK_GOUT_PERIC1_PERIC1_TOP0_IPCLK_5,
+            "gout_peric1_peric1_top0_ipclk_5", "dout_peric1_usi12_usi",
+            CLK_CON_GAT_GOUT_BLK_PERIC1_UID_PERIC1_TOP0_IPCLKPORT_IPCLK_5,
+            21, 0, 0),
+       GATE(CLK_GOUT_PERIC1_PERIC1_TOP0_IPCLK_6,
+            "gout_peric1_peric1_top0_ipclk_6", "dout_peric1_usi13_usi",
+            CLK_CON_GAT_GOUT_BLK_PERIC1_UID_PERIC1_TOP0_IPCLKPORT_IPCLK_6,
+            21, 0, 0),
+       GATE(CLK_GOUT_PERIC1_PERIC1_TOP0_IPCLK_8,
+            "gout_peric1_peric1_top0_ipclk_8", "dout_peric1_i3c",
+            CLK_CON_GAT_GOUT_BLK_PERIC1_UID_PERIC1_TOP0_IPCLKPORT_IPCLK_8,
+            21, 0, 0),
+       GATE(CLK_GOUT_PERIC1_PERIC1_TOP0_PCLK_1,
+            "gout_peric1_peric1_top0_pclk_1", "mout_peric1_bus_user",
+            CLK_CON_GAT_GOUT_BLK_PERIC1_UID_PERIC1_TOP0_IPCLKPORT_PCLK_1,
+            21, 0, 0),
+       GATE(CLK_GOUT_PERIC1_PERIC1_TOP0_PCLK_15,
+            "gout_peric1_peric1_top0_pclk_15", "mout_peric1_bus_user",
+            CLK_CON_GAT_GOUT_BLK_PERIC1_UID_PERIC1_TOP0_IPCLKPORT_PCLK_15,
+            21, 0, 0),
+       GATE(CLK_GOUT_PERIC1_PERIC1_TOP0_PCLK_2,
+            "gout_peric1_peric1_top0_pclk_2", "mout_peric1_bus_user",
+            CLK_CON_GAT_GOUT_BLK_PERIC1_UID_PERIC1_TOP0_IPCLKPORT_PCLK_2,
+            21, 0, 0),
+       GATE(CLK_GOUT_PERIC1_PERIC1_TOP0_PCLK_3,
+            "gout_peric1_peric1_top0_pclk_3", "mout_peric1_bus_user",
+            CLK_CON_GAT_GOUT_BLK_PERIC1_UID_PERIC1_TOP0_IPCLKPORT_PCLK_3,
+            21, 0, 0),
+       GATE(CLK_GOUT_PERIC1_PERIC1_TOP0_PCLK_4,
+            "gout_peric1_peric1_top0_pclk_4", "mout_peric1_bus_user",
+            CLK_CON_GAT_GOUT_BLK_PERIC1_UID_PERIC1_TOP0_IPCLKPORT_PCLK_4,
+            21, 0, 0),
+       GATE(CLK_GOUT_PERIC1_PERIC1_TOP0_PCLK_5,
+            "gout_peric1_peric1_top0_pclk_5", "mout_peric1_bus_user",
+            CLK_CON_GAT_GOUT_BLK_PERIC1_UID_PERIC1_TOP0_IPCLKPORT_PCLK_5,
+            21, 0, 0),
+       GATE(CLK_GOUT_PERIC1_PERIC1_TOP0_PCLK_6,
+            "gout_peric1_peric1_top0_pclk_6", "mout_peric1_bus_user",
+            CLK_CON_GAT_GOUT_BLK_PERIC1_UID_PERIC1_TOP0_IPCLKPORT_PCLK_6,
+            21, 0, 0),
+       GATE(CLK_GOUT_PERIC1_PERIC1_TOP0_PCLK_8,
+            "gout_peric1_peric1_top0_pclk_8", "mout_peric1_bus_user",
+            CLK_CON_GAT_GOUT_BLK_PERIC1_UID_PERIC1_TOP0_IPCLKPORT_PCLK_8,
+            21, 0, 0),
+       GATE(CLK_GOUT_PERIC1_CLK_PERIC1_BUSP_CLK,
+            "gout_peric1_clk_peric1_busp_clk", "mout_peric1_bus_user",
+            CLK_CON_GAT_GOUT_BLK_PERIC1_UID_RSTNSYNC_CLK_PERIC1_BUSP_IPCLKPORT_CLK,
+            21, 0, 0),
+       GATE(CLK_GOUT_PERIC1_CLK_PERIC1_USI0_USI_CLK,
+            "gout_peric1_clk_peric1_usi0_usi_clk", "dout_peric1_usi0_usi",
+            CLK_CON_GAT_GOUT_BLK_PERIC1_UID_RSTNSYNC_CLK_PERIC1_USI0_USI_IPCLKPORT_CLK,
+            21, 0, 0),
+       GATE(CLK_GOUT_PERIC1_CLK_PERIC1_USI10_USI_CLK,
+            "gout_peric1_clk_peric1_usi10_usi_clk", "dout_peric1_usi10_usi",
+            CLK_CON_GAT_GOUT_BLK_PERIC1_UID_RSTNSYNC_CLK_PERIC1_USI10_USI_IPCLKPORT_CLK,
+            21, 0, 0),
+       GATE(CLK_GOUT_PERIC1_CLK_PERIC1_USI11_USI_CLK,
+            "gout_peric1_clk_peric1_usi11_usi_clk", "dout_peric1_usi11_usi",
+            CLK_CON_GAT_GOUT_BLK_PERIC1_UID_RSTNSYNC_CLK_PERIC1_USI11_USI_IPCLKPORT_CLK,
+            21, 0, 0),
+       GATE(CLK_GOUT_PERIC1_CLK_PERIC1_USI12_USI_CLK,
+            "gout_peric1_clk_peric1_usi12_usi_clk", "dout_peric1_usi12_usi",
+            CLK_CON_GAT_GOUT_BLK_PERIC1_UID_RSTNSYNC_CLK_PERIC1_USI12_USI_IPCLKPORT_CLK,
+            21, 0, 0),
+       GATE(CLK_GOUT_PERIC1_CLK_PERIC1_USI13_USI_CLK,
+            "gout_peric1_clk_peric1_usi13_usi_clk", "dout_peric1_usi13_usi",
+            CLK_CON_GAT_GOUT_BLK_PERIC1_UID_RSTNSYNC_CLK_PERIC1_USI13_USI_IPCLKPORT_CLK,
+            21, 0, 0),
+       GATE(CLK_GOUT_PERIC1_CLK_PERIC1_USI9_USI_CLK,
+            "gout_peric1_clk_peric1_usi9_usi_clk", "dout_peric1_usi9_usi",
+            CLK_CON_GAT_GOUT_BLK_PERIC1_UID_RSTNSYNC_CLK_PERIC1_USI9_USI_IPCLKPORT_CLK,
+            21, 0, 0),
+       GATE(CLK_GOUT_PERIC1_SYSREG_PERIC1_PCLK,
+            "gout_peric1_sysreg_peric1_pclk", "mout_peric1_bus_user",
+            CLK_CON_GAT_GOUT_BLK_PERIC1_UID_SYSREG_PERIC1_IPCLKPORT_PCLK,
+            21, 0, 0),
+};
+
+static const struct samsung_cmu_info peric1_cmu_info __initconst = {
+       .mux_clks               = peric1_mux_clks,
+       .nr_mux_clks            = ARRAY_SIZE(peric1_mux_clks),
+       .div_clks               = peric1_div_clks,
+       .nr_div_clks            = ARRAY_SIZE(peric1_div_clks),
+       .gate_clks              = peric1_gate_clks,
+       .nr_gate_clks           = ARRAY_SIZE(peric1_gate_clks),
+       .nr_clk_ids             = CLKS_NR_PERIC1,
+       .clk_regs               = peric1_clk_regs,
+       .nr_clk_regs            = ARRAY_SIZE(peric1_clk_regs),
+       .clk_name               = "bus",
+};
+
 /* ---- platform_driver ----------------------------------------------------- */
 
 static int __init gs101_cmu_probe(struct platform_device *pdev)
@@ -2496,8 +3427,11 @@ static const struct of_device_id gs101_cmu_of_match[] = {
                .compatible = "google,gs101-cmu-apm",
                .data = &apm_cmu_info,
        }, {
-               .compatible = "google,gs101-cmu-misc",
-               .data = &misc_cmu_info,
+               .compatible = "google,gs101-cmu-peric0",
+               .data = &peric0_cmu_info,
+       }, {
+               .compatible = "google,gs101-cmu-peric1",
+               .data = &peric1_cmu_info,
        }, {
        },
 };
index 516b716407e550f74aabdc862d1cdd6f7977cd98..a763309e6f129f546e0bb2b4f4848274f2aa2c9e 100644 (file)
@@ -12,6 +12,7 @@
 
 #include <linux/clk-provider.h>
 #include "clk-pll.h"
+#include "clk-cpu.h"
 
 /**
  * struct samsung_clk_provider - information about clock provider
@@ -282,10 +283,11 @@ struct samsung_cpu_clock {
        unsigned int    alt_parent_id;
        unsigned long   flags;
        int             offset;
+       enum exynos_cpuclk_layout reg_layout;
        const struct exynos_cpuclk_cfg_data *cfg;
 };
 
-#define CPU_CLK(_id, _name, _pid, _apid, _flags, _offset, _cfg) \
+#define CPU_CLK(_id, _name, _pid, _apid, _flags, _offset, _layout, _cfg) \
        {                                                       \
                .id               = _id,                        \
                .name             = _name,                      \
@@ -293,6 +295,7 @@ struct samsung_cpu_clock {
                .alt_parent_id    = _apid,                      \
                .flags            = _flags,                     \
                .offset           = _offset,                    \
+               .reg_layout       = _layout,                    \
                .cfg              = _cfg,                       \
        }
 
index 929b8788279ea18d9c5cc19b64d01c0983534b15..d3c85421f948fcea23b630c2184b830c6fd329d8 100644 (file)
@@ -202,12 +202,10 @@ err_exit:
        return ret;
 }
 
-static int jh7110_ispcrg_remove(struct platform_device *pdev)
+static void jh7110_ispcrg_remove(struct platform_device *pdev)
 {
        pm_runtime_put_sync(&pdev->dev);
        pm_runtime_disable(&pdev->dev);
-
-       return 0;
 }
 
 static const struct of_device_id jh7110_ispcrg_match[] = {
@@ -218,7 +216,7 @@ MODULE_DEVICE_TABLE(of, jh7110_ispcrg_match);
 
 static struct platform_driver jh7110_ispcrg_driver = {
        .probe = jh7110_ispcrg_probe,
-       .remove = jh7110_ispcrg_remove,
+       .remove_new = jh7110_ispcrg_remove,
        .driver = {
                .name = "clk-starfive-jh7110-isp",
                .of_match_table = jh7110_ispcrg_match,
index 10cc1ec4392517aa2632da033462d59611c4f9e3..53f7af234cc23e79c8253a68109c40e6f99c1e7b 100644 (file)
@@ -209,12 +209,10 @@ err_exit:
        return ret;
 }
 
-static int jh7110_voutcrg_remove(struct platform_device *pdev)
+static void jh7110_voutcrg_remove(struct platform_device *pdev)
 {
        pm_runtime_put_sync(&pdev->dev);
        pm_runtime_disable(&pdev->dev);
-
-       return 0;
 }
 
 static const struct of_device_id jh7110_voutcrg_match[] = {
@@ -225,7 +223,7 @@ MODULE_DEVICE_TABLE(of, jh7110_voutcrg_match);
 
 static struct platform_driver jh7110_voutcrg_driver = {
        .probe = jh7110_voutcrg_probe,
-       .remove = jh7110_voutcrg_remove,
+       .remove_new = jh7110_voutcrg_remove,
        .driver = {
                .name = "clk-starfive-jh7110-vout",
                .of_match_table = jh7110_voutcrg_match,
index 0b09230a0d4edb33dd9f7cf463b93d4370c9557b..43080c7d045b35f57bf475ce82c82ae792efae48 100644 (file)
 
 static DEFINE_SPINLOCK(gmac_lock);
 
+
+#define SUN7I_A20_GMAC_GPIT    2
+#define SUN7I_A20_GMAC_MASK    0x3
+#define SUN7I_A20_GMAC_PARENTS 2
+
+static u32 sun7i_a20_gmac_mux_table[SUN7I_A20_GMAC_PARENTS] = {
+       0x00, /* Select mii_phy_tx_clk */
+       0x02, /* Select gmac_int_tx_clk */
+};
+
 /**
  * sun7i_a20_gmac_clk_setup - Setup function for A20/A31 GMAC clock module
+ * @node: &struct device_node for the clock
  *
  * This clock looks something like this
  *                               ________________________
@@ -39,16 +50,6 @@ static DEFINE_SPINLOCK(gmac_lock);
  * enable/disable this clock to configure the required state. The clock
  * driver then responds by auto-reparenting the clock.
  */
-
-#define SUN7I_A20_GMAC_GPIT    2
-#define SUN7I_A20_GMAC_MASK    0x3
-#define SUN7I_A20_GMAC_PARENTS 2
-
-static u32 sun7i_a20_gmac_mux_table[SUN7I_A20_GMAC_PARENTS] = {
-       0x00, /* Select mii_phy_tx_clk */
-       0x02, /* Select gmac_int_tx_clk */
-};
-
 static void __init sun7i_a20_gmac_clk_setup(struct device_node *node)
 {
        struct clk *clk;
index 01255d827fc979d28faab5471266a3009c296a89..48bf899bb2bcd3b8bd796d7f044ec0a697cbb477 100644 (file)
@@ -18,9 +18,6 @@
 
 static DEFINE_SPINLOCK(sun9i_a80_cpus_lock);
 
-/**
- * sun9i_a80_cpus_clk_setup() - Setup function for a80 cpus composite clk
- */
 
 #define SUN9I_CPUS_MAX_PARENTS         4
 #define SUN9I_CPUS_MUX_PARENT_PLL4     3
@@ -180,6 +177,10 @@ static const struct clk_ops sun9i_a80_cpus_clk_ops = {
        .set_rate       = sun9i_a80_cpus_clk_set_rate,
 };
 
+/**
+ * sun9i_a80_cpus_setup() - Setup function for a80 cpus composite clk
+ * @node: &struct device_node for the clock
+ */
 static void sun9i_a80_cpus_setup(struct device_node *node)
 {
        const char *clk_name = node->name;
index 5460218f3467ab334be8bee9320cd7ad024973fe..3c53f65002a285ccf90a434e190bf385ac7875db 100644 (file)
@@ -73,9 +73,6 @@ static const struct reset_control_ops sunxi_usb_reset_ops = {
        .deassert       = sunxi_usb_reset_deassert,
 };
 
-/**
- * sunxi_usb_clk_setup() - Setup function for usb gate clocks
- */
 
 #define SUNXI_USB_MAX_SIZE 32
 
@@ -85,6 +82,12 @@ struct usb_clk_data {
        bool reset_needs_clk;
 };
 
+/**
+ * sunxi_usb_clk_setup() - Setup function for usb gate clocks
+ * @node: &struct device_node for the clock
+ * @data: &struct usb_clk_data for the clock
+ * @lock: spinlock for the clock
+ */
 static void __init sunxi_usb_clk_setup(struct device_node *node,
                                       const struct usb_clk_data *data,
                                       spinlock_t *lock)
index e32b3515f9e76b67e0f2cf6c13e61a360c8fd751..00680486b1bd06ac653dda0cce186acf68736755 100644 (file)
@@ -928,7 +928,7 @@ void omap3_core_dpll_restore_context(struct clk_hw *hw)
 }
 
 /**
- * omap3_non_core_dpll_save_context - Save the m and n values of the divider
+ * omap3_noncore_dpll_save_context - Save the m and n values of the divider
  * @hw: pointer  struct clk_hw
  *
  * Before the dpll registers are lost save the last rounded rate m and n
@@ -957,7 +957,7 @@ int omap3_noncore_dpll_save_context(struct clk_hw *hw)
 }
 
 /**
- * omap3_core_dpll_restore_context - restore the m and n values of the divider
+ * omap3_noncore_dpll_restore_context - restore the m and n values of the divider
  * @hw: pointer  struct clk_hw
  *
  * Restore the last rounded rate m and n
index 6a6e5d9292e87a544e064d1746a73df06d437d39..19eb3fb7ae319efbcd3cc782938adbafb31237e4 100644 (file)
@@ -498,7 +498,7 @@ static int clk_wzrd_dynamic_all_nolock(struct clk_hw *hw, unsigned long rate,
 {
        struct clk_wzrd_divider *divider = to_clk_wzrd_divider(hw);
        unsigned long vco_freq, rate_div, clockout0_div;
-       void __iomem *div_addr = divider->base;
+       void __iomem *div_addr;
        u32 reg, pre, f;
        int err;
 
index 7bdeaff2bfd68b00a25e6cd9ef4f0623a8b2cc9a..c28d3dacf0fb22c6e2586caf2201564afb56cd7b 100644 (file)
@@ -42,6 +42,7 @@ static void __iomem *zynq_clkc_base;
 #define SLCR_SWDT_CLK_SEL              (zynq_clkc_base + 0x204)
 
 #define NUM_MIO_PINS   54
+#define CLK_NAME_LEN   16
 
 #define DBG_CLK_CTRL_CLKACT_TRC                BIT(0)
 #define DBG_CLK_CTRL_CPU_1XCLKACT      BIT(1)
@@ -215,7 +216,7 @@ static void __init zynq_clk_setup(struct device_node *np)
        int i;
        u32 tmp;
        int ret;
-       char *clk_name;
+       char clk_name[CLK_NAME_LEN];
        unsigned int fclk_enable = 0;
        const char *clk_output_name[clk_max];
        const char *cpu_parents[4];
@@ -426,12 +427,10 @@ static void __init zynq_clk_setup(struct device_node *np)
                        "gem1_emio_mux", CLK_SET_RATE_PARENT,
                        SLCR_GEM1_CLK_CTRL, 0, 0, &gem1clk_lock);
 
-       tmp = strlen("mio_clk_00x");
-       clk_name = kmalloc(tmp, GFP_KERNEL);
        for (i = 0; i < NUM_MIO_PINS; i++) {
                int idx;
 
-               snprintf(clk_name, tmp, "mio_clk_%2.2d", i);
+               snprintf(clk_name, CLK_NAME_LEN, "mio_clk_%2.2d", i);
                idx = of_property_match_string(np, "clock-names", clk_name);
                if (idx >= 0)
                        can_mio_mux_parents[i] = of_clk_get_parent_name(np,
@@ -439,7 +438,6 @@ static void __init zynq_clk_setup(struct device_node *np)
                else
                        can_mio_mux_parents[i] = dummy_nm;
        }
-       kfree(clk_name);
        clk_register_mux(NULL, "can_mux", periph_parents, 4,
                        CLK_SET_RATE_NO_REPARENT, SLCR_CAN_CLK_CTRL, 4, 2, 0,
                        &canclk_lock);
index 737a026ef58a3835d07de4d89062e75e37d9cb86..02e40fd7d948c94e8ff014836ec8b8abefd824e1 100644 (file)
@@ -237,7 +237,7 @@ noinstr int cpuidle_enter_state(struct cpuidle_device *dev,
        }
 
        if (target_state->flags & CPUIDLE_FLAG_TLB_FLUSHED)
-               leave_mm(dev->cpu);
+               leave_mm();
 
        /* Take note of the planned idle state. */
        sched_idle_set_state(target_state);
index 0991f026cb0703543b76340723dec9d026e5e61e..3d02702456a507606572e9fd84ce226d87e3ad9b 100644 (file)
@@ -611,13 +611,13 @@ config CRYPTO_DEV_QCOM_RNG
          To compile this driver as a module, choose M here. The
          module will be called qcom-rng. If unsure, say N.
 
-config CRYPTO_DEV_VMX
-       bool "Support for VMX cryptographic acceleration instructions"
-       depends on PPC64 && VSX
-       help
-         Support for VMX cryptographic acceleration instructions.
-
-source "drivers/crypto/vmx/Kconfig"
+#config CRYPTO_DEV_VMX
+#      bool "Support for VMX cryptographic acceleration instructions"
+#      depends on PPC64 && VSX
+#      help
+#        Support for VMX cryptographic acceleration instructions.
+#
+#source "drivers/crypto/vmx/Kconfig"
 
 config CRYPTO_DEV_IMGTEC_HASH
        tristate "Imagination Technologies hardware hash accelerator"
index d859d6a5f3a45439c6e14bb19d6240e121c9ac62..95331bc6456b7b838e9509f3b72ee0bb366bda9b 100644 (file)
@@ -42,7 +42,7 @@ obj-$(CONFIG_CRYPTO_DEV_SL3516) += gemini/
 obj-y += stm32/
 obj-$(CONFIG_CRYPTO_DEV_TALITOS) += talitos.o
 obj-$(CONFIG_CRYPTO_DEV_VIRTIO) += virtio/
-obj-$(CONFIG_CRYPTO_DEV_VMX) += vmx/
+#obj-$(CONFIG_CRYPTO_DEV_VMX) += vmx/
 obj-$(CONFIG_CRYPTO_DEV_BCM_SPU) += bcm/
 obj-$(CONFIG_CRYPTO_DEV_SAFEXCEL) += inside-secure/
 obj-$(CONFIG_CRYPTO_DEV_ARTPEC6) += axis/
index d358334e598115649b6cc77b807dedb201eddb1c..ee2a28c906edee3d30b4f828ebe83b4a8e915867 100644 (file)
@@ -362,7 +362,7 @@ int sun8i_ce_hash_run(struct crypto_engine *engine, void *breq)
                digestsize = SHA512_DIGEST_SIZE;
 
        /* the padding could be up to two block. */
-       buf = kzalloc(bs * 2, GFP_KERNEL | GFP_DMA);
+       buf = kcalloc(2, bs, GFP_KERNEL | GFP_DMA);
        if (!buf) {
                err = -ENOMEM;
                goto theend;
index 94367bc49e35b869dc5093a918c21f325459fce6..1b8ed33897332efe3ff260607d477defedc3a4dc 100644 (file)
@@ -118,9 +118,16 @@ int psp_send_platform_access_msg(enum psp_platform_access_msg msg,
                goto unlock;
        }
 
-       /* Store the status in request header for caller to investigate */
+       /*
+        * Read status from PSP. If status is non-zero, it indicates an error
+        * occurred during "processing" of the command.
+        * If status is zero, it indicates the command was "processed"
+        * successfully, but the result of the command is in the payload.
+        * Return both cases to the caller as -EIO to investigate.
+        */
        cmd_reg = ioread32(cmd);
-       req->header.status = FIELD_GET(PSP_CMDRESP_STS, cmd_reg);
+       if (FIELD_GET(PSP_CMDRESP_STS, cmd_reg))
+               req->header.status = FIELD_GET(PSP_CMDRESP_STS, cmd_reg);
        if (req->header.status) {
                ret = -EIO;
                goto unlock;
index 124a2e0c89993786843b88daa609b5dfd86917ef..56bf832c2947775efaffc81eac68baa68aee9144 100644 (file)
@@ -156,11 +156,14 @@ static unsigned int psp_get_capability(struct psp_device *psp)
        }
        psp->capability = val;
 
-       /* Detect if TSME and SME are both enabled */
+       /* Detect TSME and/or SME status */
        if (PSP_CAPABILITY(psp, PSP_SECURITY_REPORTING) &&
-           psp->capability & (PSP_SECURITY_TSME_STATUS << PSP_CAPABILITY_PSP_SECURITY_OFFSET) &&
-           cc_platform_has(CC_ATTR_HOST_MEM_ENCRYPT))
-               dev_notice(psp->dev, "psp: Both TSME and SME are active, SME is unnecessary when TSME is active.\n");
+           psp->capability & (PSP_SECURITY_TSME_STATUS << PSP_CAPABILITY_PSP_SECURITY_OFFSET)) {
+               if (cc_platform_has(CC_ATTR_HOST_MEM_ENCRYPT))
+                       dev_notice(psp->dev, "psp: Both TSME and SME are active, SME is unnecessary when TSME is active.\n");
+               else
+                       dev_notice(psp->dev, "psp: TSME enabled\n");
+       }
 
        return 0;
 }
index 80ed4b2d209cac85a4ad435bcf882c8bd5ba14d0..cd67fa348ca72d6b4501b219b4f5ba8929608370 100644 (file)
@@ -24,6 +24,8 @@
 #define QM_DFX_QN_SHIFT                        16
 #define QM_DFX_CNT_CLR_CE              0x100118
 #define QM_DBG_WRITE_LEN               1024
+#define QM_IN_IDLE_ST_REG              0x1040e4
+#define QM_IN_IDLE_STATE               0x1
 
 static const char * const qm_debug_file_name[] = {
        [CURRENT_QM]   = "current_qm",
@@ -81,6 +83,30 @@ static const struct debugfs_reg32 qm_dfx_regs[] = {
        {"QM_DFX_FF_ST5                 ",  0x1040dc},
        {"QM_DFX_FF_ST6                 ",  0x1040e0},
        {"QM_IN_IDLE_ST                 ",  0x1040e4},
+       {"QM_CACHE_CTL                  ",  0x100050},
+       {"QM_TIMEOUT_CFG                ",  0x100070},
+       {"QM_DB_TIMEOUT_CFG             ",  0x100074},
+       {"QM_FLR_PENDING_TIME_CFG       ",  0x100078},
+       {"QM_ARUSR_MCFG1                ",  0x100088},
+       {"QM_AWUSR_MCFG1                ",  0x100098},
+       {"QM_AXI_M_CFG_ENABLE           ",  0x1000B0},
+       {"QM_RAS_CE_THRESHOLD           ",  0x1000F8},
+       {"QM_AXI_TIMEOUT_CTRL           ",  0x100120},
+       {"QM_AXI_TIMEOUT_STATUS         ",  0x100124},
+       {"QM_CQE_AGGR_TIMEOUT_CTRL      ",  0x100144},
+       {"ACC_RAS_MSI_INT_SEL           ",  0x1040fc},
+       {"QM_CQE_OUT                    ",  0x104100},
+       {"QM_EQE_OUT                    ",  0x104104},
+       {"QM_AEQE_OUT                   ",  0x104108},
+       {"QM_DB_INFO0                   ",  0x104180},
+       {"QM_DB_INFO1                   ",  0x104184},
+       {"QM_AM_CTRL_GLOBAL             ",  0x300000},
+       {"QM_AM_CURR_PORT_STS           ",  0x300100},
+       {"QM_AM_CURR_TRANS_RETURN       ",  0x300150},
+       {"QM_AM_CURR_RD_MAX_TXID        ",  0x300154},
+       {"QM_AM_CURR_WR_MAX_TXID        ",  0x300158},
+       {"QM_AM_ALARM_RRESP             ",  0x300180},
+       {"QM_AM_ALARM_BRESP             ",  0x300184},
 };
 
 static const struct debugfs_reg32 qm_vf_dfx_regs[] = {
@@ -1001,6 +1027,30 @@ static int qm_diff_regs_show(struct seq_file *s, void *unused)
 }
 DEFINE_SHOW_ATTRIBUTE(qm_diff_regs);
 
+static int qm_state_show(struct seq_file *s, void *unused)
+{
+       struct hisi_qm *qm = s->private;
+       u32 val;
+       int ret;
+
+       /* If device is in suspended, directly return the idle state. */
+       ret = hisi_qm_get_dfx_access(qm);
+       if (!ret) {
+               val = readl(qm->io_base + QM_IN_IDLE_ST_REG);
+               hisi_qm_put_dfx_access(qm);
+       } else if (ret == -EAGAIN) {
+               val = QM_IN_IDLE_STATE;
+       } else {
+               return ret;
+       }
+
+       seq_printf(s, "%u\n", val);
+
+       return 0;
+}
+
+DEFINE_SHOW_ATTRIBUTE(qm_state);
+
 static ssize_t qm_status_read(struct file *filp, char __user *buffer,
                              size_t count, loff_t *pos)
 {
@@ -1062,6 +1112,7 @@ DEFINE_DEBUGFS_ATTRIBUTE(qm_atomic64_ops, qm_debugfs_atomic64_get,
 void hisi_qm_debug_init(struct hisi_qm *qm)
 {
        struct dfx_diff_registers *qm_regs = qm->debug.qm_diff_regs;
+       struct qm_dev_dfx *dev_dfx = &qm->debug.dev_dfx;
        struct qm_dfx *dfx = &qm->debug.dfx;
        struct dentry *qm_d;
        void *data;
@@ -1072,6 +1123,9 @@ void hisi_qm_debug_init(struct hisi_qm *qm)
 
        /* only show this in PF */
        if (qm->fun_type == QM_HW_PF) {
+               debugfs_create_file("qm_state", 0444, qm->debug.qm_d,
+                                       qm, &qm_state_fops);
+
                qm_create_debugfs_file(qm, qm->debug.debug_root, CURRENT_QM);
                for (i = CURRENT_Q; i < DEBUG_FILE_NUM; i++)
                        qm_create_debugfs_file(qm, qm->debug.qm_d, i);
@@ -1087,6 +1141,10 @@ void hisi_qm_debug_init(struct hisi_qm *qm)
 
        debugfs_create_file("status", 0444, qm->debug.qm_d, qm,
                        &qm_status_fops);
+
+       debugfs_create_u32("dev_state", 0444, qm->debug.qm_d, &dev_dfx->dev_state);
+       debugfs_create_u32("dev_timeout", 0644, qm->debug.qm_d, &dev_dfx->dev_timeout);
+
        for (i = 0; i < ARRAY_SIZE(qm_dfx_files); i++) {
                data = (atomic64_t *)((uintptr_t)dfx + qm_dfx_files[i].offset);
                debugfs_create_file(qm_dfx_files[i].name,
index 3255b2a070c78592518e26a0a9c3c14711e3ee16..d93aa6630a578323b8a180a0c09c9413ceb63c06 100644 (file)
@@ -440,7 +440,7 @@ MODULE_PARM_DESC(vfs_num, "Number of VFs to enable(1-63), 0(default)");
 
 struct hisi_qp *hpre_create_qp(u8 type)
 {
-       int node = cpu_to_node(smp_processor_id());
+       int node = cpu_to_node(raw_smp_processor_id());
        struct hisi_qp *qp = NULL;
        int ret;
 
index 4b20b94e6371ad05476603d07f808cbd13fb6202..92f0a1d9b4a6b8b618cd2aa0d7528fb39caaad3c 100644 (file)
 
 #define QM_DEV_ALG_MAX_LEN             256
 
+ /* abnormal status value for stopping queue */
+#define QM_STOP_QUEUE_FAIL             1
+#define        QM_DUMP_SQC_FAIL                3
+#define        QM_DUMP_CQC_FAIL                4
+#define        QM_FINISH_WAIT                  5
+
 #define QM_MK_CQC_DW3_V1(hop_num, pg_sz, buf_sz, cqe_sz) \
        (((hop_num) << QM_CQ_HOP_NUM_SHIFT) | \
        ((pg_sz) << QM_CQ_PAGE_SIZE_SHIFT) | \
@@ -312,6 +318,7 @@ static const struct hisi_qm_cap_info qm_cap_info_comm[] = {
        {QM_SUPPORT_DB_ISOLATION, 0x30,   0, BIT(0),  0x0, 0x0, 0x0},
        {QM_SUPPORT_FUNC_QOS,     0x3100, 0, BIT(8),  0x0, 0x0, 0x1},
        {QM_SUPPORT_STOP_QP,      0x3100, 0, BIT(9),  0x0, 0x0, 0x1},
+       {QM_SUPPORT_STOP_FUNC,     0x3100, 0, BIT(10), 0x0, 0x0, 0x1},
        {QM_SUPPORT_MB_COMMAND,   0x3100, 0, BIT(11), 0x0, 0x0, 0x1},
        {QM_SUPPORT_SVA_PREFETCH, 0x3100, 0, BIT(14), 0x0, 0x0, 0x1},
 };
@@ -1674,6 +1681,11 @@ unlock:
        return ret;
 }
 
+static int qm_drain_qm(struct hisi_qm *qm)
+{
+       return hisi_qm_mb(qm, QM_MB_CMD_FLUSH_QM, 0, 0, 0);
+}
+
 static int qm_stop_qp(struct hisi_qp *qp)
 {
        return hisi_qm_mb(qp->qm, QM_MB_CMD_STOP_QP, 0, qp->qp_id, 0);
@@ -2031,43 +2043,25 @@ static void qp_stop_fail_cb(struct hisi_qp *qp)
        }
 }
 
-/**
- * qm_drain_qp() - Drain a qp.
- * @qp: The qp we want to drain.
- *
- * Determine whether the queue is cleared by judging the tail pointers of
- * sq and cq.
- */
-static int qm_drain_qp(struct hisi_qp *qp)
+static int qm_wait_qp_empty(struct hisi_qm *qm, u32 *state, u32 qp_id)
 {
-       struct hisi_qm *qm = qp->qm;
        struct device *dev = &qm->pdev->dev;
        struct qm_sqc sqc;
        struct qm_cqc cqc;
        int ret, i = 0;
 
-       /* No need to judge if master OOO is blocked. */
-       if (qm_check_dev_error(qm))
-               return 0;
-
-       /* Kunpeng930 supports drain qp by device */
-       if (test_bit(QM_SUPPORT_STOP_QP, &qm->caps)) {
-               ret = qm_stop_qp(qp);
-               if (ret)
-                       dev_err(dev, "Failed to stop qp(%u)!\n", qp->qp_id);
-               return ret;
-       }
-
        while (++i) {
-               ret = qm_set_and_get_xqc(qm, QM_MB_CMD_SQC, &sqc, qp->qp_id, 1);
+               ret = qm_set_and_get_xqc(qm, QM_MB_CMD_SQC, &sqc, qp_id, 1);
                if (ret) {
                        dev_err_ratelimited(dev, "Failed to dump sqc!\n");
+                       *state = QM_DUMP_SQC_FAIL;
                        return ret;
                }
 
-               ret = qm_set_and_get_xqc(qm, QM_MB_CMD_CQC, &cqc, qp->qp_id, 1);
+               ret = qm_set_and_get_xqc(qm, QM_MB_CMD_CQC, &cqc, qp_id, 1);
                if (ret) {
                        dev_err_ratelimited(dev, "Failed to dump cqc!\n");
+                       *state = QM_DUMP_CQC_FAIL;
                        return ret;
                }
 
@@ -2076,8 +2070,9 @@ static int qm_drain_qp(struct hisi_qp *qp)
                        break;
 
                if (i == MAX_WAIT_COUNTS) {
-                       dev_err(dev, "Fail to empty queue %u!\n", qp->qp_id);
-                       return -EBUSY;
+                       dev_err(dev, "Fail to empty queue %u!\n", qp_id);
+                       *state = QM_STOP_QUEUE_FAIL;
+                       return -ETIMEDOUT;
                }
 
                usleep_range(WAIT_PERIOD_US_MIN, WAIT_PERIOD_US_MAX);
@@ -2086,9 +2081,53 @@ static int qm_drain_qp(struct hisi_qp *qp)
        return 0;
 }
 
-static int qm_stop_qp_nolock(struct hisi_qp *qp)
+/**
+ * qm_drain_qp() - Drain a qp.
+ * @qp: The qp we want to drain.
+ *
+ * If the device does not support stopping queue by sending mailbox,
+ * determine whether the queue is cleared by judging the tail pointers of
+ * sq and cq.
+ */
+static int qm_drain_qp(struct hisi_qp *qp)
+{
+       struct hisi_qm *qm = qp->qm;
+       struct hisi_qm *pf_qm = pci_get_drvdata(pci_physfn(qm->pdev));
+       u32 state = 0;
+       int ret;
+
+       /* No need to judge if master OOO is blocked. */
+       if (qm_check_dev_error(pf_qm))
+               return 0;
+
+       /* HW V3 supports drain qp by device */
+       if (test_bit(QM_SUPPORT_STOP_QP, &qm->caps)) {
+               ret = qm_stop_qp(qp);
+               if (ret) {
+                       dev_err(&qm->pdev->dev, "Failed to stop qp!\n");
+                       state = QM_STOP_QUEUE_FAIL;
+                       goto set_dev_state;
+               }
+               return ret;
+       }
+
+       ret = qm_wait_qp_empty(qm, &state, qp->qp_id);
+       if (ret)
+               goto set_dev_state;
+
+       return 0;
+
+set_dev_state:
+       if (qm->debug.dev_dfx.dev_timeout)
+               qm->debug.dev_dfx.dev_state = state;
+
+       return ret;
+}
+
+static void qm_stop_qp_nolock(struct hisi_qp *qp)
 {
-       struct device *dev = &qp->qm->pdev->dev;
+       struct hisi_qm *qm = qp->qm;
+       struct device *dev = &qm->pdev->dev;
        int ret;
 
        /*
@@ -2099,39 +2138,36 @@ static int qm_stop_qp_nolock(struct hisi_qp *qp)
         */
        if (atomic_read(&qp->qp_status.flags) != QP_START) {
                qp->is_resetting = false;
-               return 0;
+               return;
        }
 
        atomic_set(&qp->qp_status.flags, QP_STOP);
 
-       ret = qm_drain_qp(qp);
-       if (ret)
-               dev_err(dev, "Failed to drain out data for stopping!\n");
+       /* V3 supports direct stop function when FLR prepare */
+       if (qm->ver < QM_HW_V3 || qm->status.stop_reason == QM_NORMAL) {
+               ret = qm_drain_qp(qp);
+               if (ret)
+                       dev_err(dev, "Failed to drain out data for stopping qp(%u)!\n", qp->qp_id);
+       }
 
-       flush_workqueue(qp->qm->wq);
+       flush_workqueue(qm->wq);
        if (unlikely(qp->is_resetting && atomic_read(&qp->qp_status.used)))
                qp_stop_fail_cb(qp);
 
        dev_dbg(dev, "stop queue %u!", qp->qp_id);
-
-       return 0;
 }
 
 /**
  * hisi_qm_stop_qp() - Stop a qp in qm.
  * @qp: The qp we want to stop.
  *
- * This function is reverse of hisi_qm_start_qp. Return 0 if successful.
+ * This function is reverse of hisi_qm_start_qp.
  */
-int hisi_qm_stop_qp(struct hisi_qp *qp)
+void hisi_qm_stop_qp(struct hisi_qp *qp)
 {
-       int ret;
-
        down_write(&qp->qm->qps_lock);
-       ret = qm_stop_qp_nolock(qp);
+       qm_stop_qp_nolock(qp);
        up_write(&qp->qm->qps_lock);
-
-       return ret;
 }
 EXPORT_SYMBOL_GPL(hisi_qm_stop_qp);
 
@@ -2309,7 +2345,31 @@ static int hisi_qm_uacce_start_queue(struct uacce_queue *q)
 
 static void hisi_qm_uacce_stop_queue(struct uacce_queue *q)
 {
-       hisi_qm_stop_qp(q->priv);
+       struct hisi_qp *qp = q->priv;
+       struct hisi_qm *qm = qp->qm;
+       struct qm_dev_dfx *dev_dfx = &qm->debug.dev_dfx;
+       u32 i = 0;
+
+       hisi_qm_stop_qp(qp);
+
+       if (!dev_dfx->dev_timeout || !dev_dfx->dev_state)
+               return;
+
+       /*
+        * After the queue fails to be stopped,
+        * wait for a period of time before releasing the queue.
+        */
+       while (++i) {
+               msleep(WAIT_PERIOD);
+
+               /* Since dev_timeout maybe modified, check i >= dev_timeout */
+               if (i >= dev_dfx->dev_timeout) {
+                       dev_err(&qm->pdev->dev, "Stop q %u timeout, state %u\n",
+                              qp->qp_id, dev_dfx->dev_state);
+                       dev_dfx->dev_state = QM_FINISH_WAIT;
+                       break;
+               }
+       }
 }
 
 static int hisi_qm_is_q_updated(struct uacce_queue *q)
@@ -3054,25 +3114,18 @@ static int qm_restart(struct hisi_qm *qm)
 }
 
 /* Stop started qps in reset flow */
-static int qm_stop_started_qp(struct hisi_qm *qm)
+static void qm_stop_started_qp(struct hisi_qm *qm)
 {
-       struct device *dev = &qm->pdev->dev;
        struct hisi_qp *qp;
-       int i, ret;
+       int i;
 
        for (i = 0; i < qm->qp_num; i++) {
                qp = &qm->qp_array[i];
-               if (qp && atomic_read(&qp->qp_status.flags) == QP_START) {
+               if (atomic_read(&qp->qp_status.flags) == QP_START) {
                        qp->is_resetting = true;
-                       ret = qm_stop_qp_nolock(qp);
-                       if (ret < 0) {
-                               dev_err(dev, "Failed to stop qp%d!\n", i);
-                               return ret;
-                       }
+                       qm_stop_qp_nolock(qp);
                }
        }
-
-       return 0;
 }
 
 /**
@@ -3112,21 +3165,31 @@ int hisi_qm_stop(struct hisi_qm *qm, enum qm_stop_reason r)
 
        down_write(&qm->qps_lock);
 
-       qm->status.stop_reason = r;
        if (atomic_read(&qm->status.flags) == QM_STOP)
                goto err_unlock;
 
        /* Stop all the request sending at first. */
        atomic_set(&qm->status.flags, QM_STOP);
+       qm->status.stop_reason = r;
 
-       if (qm->status.stop_reason == QM_SOFT_RESET ||
-           qm->status.stop_reason == QM_DOWN) {
+       if (qm->status.stop_reason != QM_NORMAL) {
                hisi_qm_set_hw_reset(qm, QM_RESET_STOP_TX_OFFSET);
-               ret = qm_stop_started_qp(qm);
-               if (ret < 0) {
-                       dev_err(dev, "Failed to stop started qp!\n");
-                       goto err_unlock;
+               /*
+                * When performing soft reset, the hardware will no longer
+                * do tasks, and the tasks in the device will be flushed
+                * out directly since the master ooo is closed.
+                */
+               if (test_bit(QM_SUPPORT_STOP_FUNC, &qm->caps) &&
+                   r != QM_SOFT_RESET) {
+                       ret = qm_drain_qm(qm);
+                       if (ret) {
+                               dev_err(dev, "failed to drain qm!\n");
+                               goto err_unlock;
+                       }
                }
+
+               qm_stop_started_qp(qm);
+
                hisi_qm_set_hw_reset(qm, QM_RESET_STOP_RX_OFFSET);
        }
 
@@ -3141,6 +3204,7 @@ int hisi_qm_stop(struct hisi_qm *qm, enum qm_stop_reason r)
        }
 
        qm_clear_queues(qm);
+       qm->status.stop_reason = QM_NORMAL;
 
 err_unlock:
        up_write(&qm->qps_lock);
index f028dcfd0ead7e08d8e213037adb32257e1856c1..93a972fcbf6386e5d27a33940650e3b9816e3c52 100644 (file)
@@ -118,7 +118,7 @@ struct sec_aead {
 };
 
 /* Get an en/de-cipher queue cyclically to balance load over queues of TFM */
-static inline int sec_alloc_queue_id(struct sec_ctx *ctx, struct sec_req *req)
+static inline u32 sec_alloc_queue_id(struct sec_ctx *ctx, struct sec_req *req)
 {
        if (req->c_req.encrypt)
                return (u32)atomic_inc_return(&ctx->enc_qcyclic) %
@@ -485,8 +485,7 @@ static void sec_alg_resource_free(struct sec_ctx *ctx,
                sec_free_mac_resource(dev, qp_ctx->res);
 }
 
-static int sec_alloc_qp_ctx_resource(struct hisi_qm *qm, struct sec_ctx *ctx,
-                                    struct sec_qp_ctx *qp_ctx)
+static int sec_alloc_qp_ctx_resource(struct sec_ctx *ctx, struct sec_qp_ctx *qp_ctx)
 {
        u16 q_depth = qp_ctx->qp->sq_depth;
        struct device *dev = ctx->dev;
@@ -541,8 +540,7 @@ static void sec_free_qp_ctx_resource(struct sec_ctx *ctx, struct sec_qp_ctx *qp_
        kfree(qp_ctx->req_list);
 }
 
-static int sec_create_qp_ctx(struct hisi_qm *qm, struct sec_ctx *ctx,
-                            int qp_ctx_id, int alg_type)
+static int sec_create_qp_ctx(struct sec_ctx *ctx, int qp_ctx_id)
 {
        struct sec_qp_ctx *qp_ctx;
        struct hisi_qp *qp;
@@ -561,7 +559,7 @@ static int sec_create_qp_ctx(struct hisi_qm *qm, struct sec_ctx *ctx,
        idr_init(&qp_ctx->req_idr);
        INIT_LIST_HEAD(&qp_ctx->backlog);
 
-       ret = sec_alloc_qp_ctx_resource(qm, ctx, qp_ctx);
+       ret = sec_alloc_qp_ctx_resource(ctx, qp_ctx);
        if (ret)
                goto err_destroy_idr;
 
@@ -614,7 +612,7 @@ static int sec_ctx_base_init(struct sec_ctx *ctx)
        }
 
        for (i = 0; i < sec->ctx_q_num; i++) {
-               ret = sec_create_qp_ctx(&sec->qm, ctx, i, 0);
+               ret = sec_create_qp_ctx(ctx, i);
                if (ret)
                        goto err_sec_release_qp_ctx;
        }
@@ -750,9 +748,7 @@ static void sec_skcipher_uninit(struct crypto_skcipher *tfm)
        sec_ctx_base_uninit(ctx);
 }
 
-static int sec_skcipher_3des_setkey(struct crypto_skcipher *tfm, const u8 *key,
-                                   const u32 keylen,
-                                   const enum sec_cmode c_mode)
+static int sec_skcipher_3des_setkey(struct crypto_skcipher *tfm, const u8 *key, const u32 keylen)
 {
        struct sec_ctx *ctx = crypto_skcipher_ctx(tfm);
        struct sec_cipher_ctx *c_ctx = &ctx->c_ctx;
@@ -843,7 +839,7 @@ static int sec_skcipher_setkey(struct crypto_skcipher *tfm, const u8 *key,
 
        switch (c_alg) {
        case SEC_CALG_3DES:
-               ret = sec_skcipher_3des_setkey(tfm, key, keylen, c_mode);
+               ret = sec_skcipher_3des_setkey(tfm, key, keylen);
                break;
        case SEC_CALG_AES:
        case SEC_CALG_SM4:
@@ -1371,7 +1367,7 @@ static int sec_skcipher_bd_fill_v3(struct sec_ctx *ctx, struct sec_req *req)
        sec_sqe3->bd_param = cpu_to_le32(bd_param);
 
        sec_sqe3->c_len_ivin |= cpu_to_le32(c_req->c_len);
-       sec_sqe3->tag = cpu_to_le64(req);
+       sec_sqe3->tag = cpu_to_le64((unsigned long)req);
 
        return 0;
 }
@@ -2145,8 +2141,8 @@ static int sec_skcipher_decrypt(struct skcipher_request *sk_req)
        return sec_skcipher_crypto(sk_req, false);
 }
 
-#define SEC_SKCIPHER_GEN_ALG(sec_cra_name, sec_set_key, sec_min_key_size, \
-       sec_max_key_size, ctx_init, ctx_exit, blk_size, iv_size)\
+#define SEC_SKCIPHER_ALG(sec_cra_name, sec_set_key, \
+       sec_min_key_size, sec_max_key_size, blk_size, iv_size)\
 {\
        .base = {\
                .cra_name = sec_cra_name,\
@@ -2158,8 +2154,8 @@ static int sec_skcipher_decrypt(struct skcipher_request *sk_req)
                .cra_ctxsize = sizeof(struct sec_ctx),\
                .cra_module = THIS_MODULE,\
        },\
-       .init = ctx_init,\
-       .exit = ctx_exit,\
+       .init = sec_skcipher_ctx_init,\
+       .exit = sec_skcipher_ctx_exit,\
        .setkey = sec_set_key,\
        .decrypt = sec_skcipher_decrypt,\
        .encrypt = sec_skcipher_encrypt,\
@@ -2168,11 +2164,6 @@ static int sec_skcipher_decrypt(struct skcipher_request *sk_req)
        .ivsize = iv_size,\
 }
 
-#define SEC_SKCIPHER_ALG(name, key_func, min_key_size, \
-       max_key_size, blk_size, iv_size) \
-       SEC_SKCIPHER_GEN_ALG(name, key_func, min_key_size, max_key_size, \
-       sec_skcipher_ctx_init, sec_skcipher_ctx_exit, blk_size, iv_size)
-
 static struct sec_skcipher sec_skciphers[] = {
        {
                .alg_msk = BIT(0),
index 7bb99381bbdfbba9325435af689b54f738df5d01..c290d8937b19cee6edfe3ff30163f118bda7628c 100644 (file)
@@ -282,6 +282,11 @@ static const struct debugfs_reg32 sec_dfx_regs[] = {
        {"SEC_BD_SAA6                   ",  0x301C38},
        {"SEC_BD_SAA7                   ",  0x301C3C},
        {"SEC_BD_SAA8                   ",  0x301C40},
+       {"SEC_RAS_CE_ENABLE             ",  0x301050},
+       {"SEC_RAS_FE_ENABLE             ",  0x301054},
+       {"SEC_RAS_NFE_ENABLE            ",  0x301058},
+       {"SEC_REQ_TRNG_TIME_TH          ",  0x30112C},
+       {"SEC_CHANNEL_RNG_REQ_THLD      ",  0x302110},
 };
 
 /* define the SEC's dfx regs region and region length */
@@ -374,7 +379,7 @@ void sec_destroy_qps(struct hisi_qp **qps, int qp_num)
 
 struct hisi_qp **sec_create_qps(void)
 {
-       int node = cpu_to_node(smp_processor_id());
+       int node = cpu_to_node(raw_smp_processor_id());
        u32 ctx_num = ctx_q_num;
        struct hisi_qp **qps;
        int ret;
index c650c741a18d8ab7ec5ae9ccd547d516132fba17..94e2d66b04b65d75005cc4cbb4784c6584aee134 100644 (file)
@@ -591,6 +591,7 @@ static struct acomp_alg hisi_zip_acomp_deflate = {
        .base                   = {
                .cra_name               = "deflate",
                .cra_driver_name        = "hisi-deflate-acomp",
+               .cra_flags              = CRYPTO_ALG_ASYNC,
                .cra_module             = THIS_MODULE,
                .cra_priority           = HZIP_ALG_PRIORITY,
                .cra_ctxsize            = sizeof(struct hisi_zip_ctx),
index 479ba8a1d6b5d956594106a333eeb37544b8cd0c..c065fd867161dcd04f0d90d21adcd1f346cc33fe 100644 (file)
@@ -454,7 +454,7 @@ MODULE_DEVICE_TABLE(pci, hisi_zip_dev_ids);
 int zip_create_qps(struct hisi_qp **qps, int qp_num, int node)
 {
        if (node == NUMA_NO_NODE)
-               node = cpu_to_node(smp_processor_id());
+               node = cpu_to_node(raw_smp_processor_id());
 
        return hisi_qm_alloc_qps_node(&zip_devices, qp_num, 0, node, qps);
 }
index 014420f7beb0325ef8efdb0a64f08984d3ab1c3f..2524091a5f70abea2c2048d677f6f4626a75e605 100644 (file)
@@ -59,10 +59,8 @@ struct iaa_device_compression_mode {
        const char                      *name;
 
        struct aecs_comp_table_record   *aecs_comp_table;
-       struct aecs_decomp_table_record *aecs_decomp_table;
 
        dma_addr_t                      aecs_comp_table_dma_addr;
-       dma_addr_t                      aecs_decomp_table_dma_addr;
 };
 
 /* Representation of IAA device with wqs, populated by probe */
@@ -107,23 +105,6 @@ struct aecs_comp_table_record {
        u32 reserved_padding[2];
 } __packed;
 
-/* AECS for decompress */
-struct aecs_decomp_table_record {
-       u32 crc;
-       u32 xor_checksum;
-       u32 low_filter_param;
-       u32 high_filter_param;
-       u32 output_mod_idx;
-       u32 drop_init_decomp_out_bytes;
-       u32 reserved[36];
-       u32 output_accum_data[2];
-       u32 out_bits_valid;
-       u32 bit_off_indexing;
-       u32 input_accum_data[64];
-       u8  size_qw[32];
-       u32 decomp_state[1220];
-} __packed;
-
 int iaa_aecs_init_fixed(void);
 void iaa_aecs_cleanup_fixed(void);
 
@@ -136,9 +117,6 @@ struct iaa_compression_mode {
        int                     ll_table_size;
        u32                     *d_table;
        int                     d_table_size;
-       u32                     *header_table;
-       int                     header_table_size;
-       u16                     gen_decomp_table_flags;
        iaa_dev_comp_init_fn_t  init;
        iaa_dev_comp_free_fn_t  free;
 };
@@ -148,9 +126,6 @@ int add_iaa_compression_mode(const char *name,
                             int ll_table_size,
                             const u32 *d_table,
                             int d_table_size,
-                            const u8 *header_table,
-                            int header_table_size,
-                            u16 gen_decomp_table_flags,
                             iaa_dev_comp_init_fn_t init,
                             iaa_dev_comp_free_fn_t free);
 
index 45cf5d74f0fb9411b350d1e7e91340fa250a2e81..19d9a333ac49c90eccbe8714dd6ed90c46d68f15 100644 (file)
@@ -78,7 +78,6 @@ int iaa_aecs_init_fixed(void)
                                       sizeof(fixed_ll_sym),
                                       fixed_d_sym,
                                       sizeof(fixed_d_sym),
-                                      NULL, 0, 0,
                                       init_fixed_mode, NULL);
        if (!ret)
                pr_debug("IAA fixed compression mode initialized\n");
index dfd3baf0a8d873747b3103785f2fde6236cf96bb..1cd304de53881562cdbf7b116a0e54c9c66082a0 100644 (file)
@@ -258,16 +258,14 @@ static void free_iaa_compression_mode(struct iaa_compression_mode *mode)
        kfree(mode->name);
        kfree(mode->ll_table);
        kfree(mode->d_table);
-       kfree(mode->header_table);
 
        kfree(mode);
 }
 
 /*
- * IAA Compression modes are defined by an ll_table, a d_table, and an
- * optional header_table.  These tables are typically generated and
- * captured using statistics collected from running actual
- * compress/decompress workloads.
+ * IAA Compression modes are defined by an ll_table and a d_table.
+ * These tables are typically generated and captured using statistics
+ * collected from running actual compress/decompress workloads.
  *
  * A module or other kernel code can add and remove compression modes
  * with a given name using the exported @add_iaa_compression_mode()
@@ -315,9 +313,6 @@ EXPORT_SYMBOL_GPL(remove_iaa_compression_mode);
  * @ll_table_size: The ll table size in bytes
  * @d_table: The d table
  * @d_table_size: The d table size in bytes
- * @header_table: Optional header table
- * @header_table_size: Optional header table size in bytes
- * @gen_decomp_table_flags: Otional flags used to generate the decomp table
  * @init: Optional callback function to init the compression mode data
  * @free: Optional callback function to free the compression mode data
  *
@@ -330,9 +325,6 @@ int add_iaa_compression_mode(const char *name,
                             int ll_table_size,
                             const u32 *d_table,
                             int d_table_size,
-                            const u8 *header_table,
-                            int header_table_size,
-                            u16 gen_decomp_table_flags,
                             iaa_dev_comp_init_fn_t init,
                             iaa_dev_comp_free_fn_t free)
 {
@@ -370,16 +362,6 @@ int add_iaa_compression_mode(const char *name,
                mode->d_table_size = d_table_size;
        }
 
-       if (header_table) {
-               mode->header_table = kzalloc(header_table_size, GFP_KERNEL);
-               if (!mode->header_table)
-                       goto free;
-               memcpy(mode->header_table, header_table, header_table_size);
-               mode->header_table_size = header_table_size;
-       }
-
-       mode->gen_decomp_table_flags = gen_decomp_table_flags;
-
        mode->init = init;
        mode->free = free;
 
@@ -420,10 +402,6 @@ static void free_device_compression_mode(struct iaa_device *iaa_device,
        if (device_mode->aecs_comp_table)
                dma_free_coherent(dev, size, device_mode->aecs_comp_table,
                                  device_mode->aecs_comp_table_dma_addr);
-       if (device_mode->aecs_decomp_table)
-               dma_free_coherent(dev, size, device_mode->aecs_decomp_table,
-                                 device_mode->aecs_decomp_table_dma_addr);
-
        kfree(device_mode);
 }
 
@@ -440,73 +418,6 @@ static int check_completion(struct device *dev,
                            bool compress,
                            bool only_once);
 
-static int decompress_header(struct iaa_device_compression_mode *device_mode,
-                            struct iaa_compression_mode *mode,
-                            struct idxd_wq *wq)
-{
-       dma_addr_t src_addr, src2_addr;
-       struct idxd_desc *idxd_desc;
-       struct iax_hw_desc *desc;
-       struct device *dev;
-       int ret = 0;
-
-       idxd_desc = idxd_alloc_desc(wq, IDXD_OP_BLOCK);
-       if (IS_ERR(idxd_desc))
-               return PTR_ERR(idxd_desc);
-
-       desc = idxd_desc->iax_hw;
-
-       dev = &wq->idxd->pdev->dev;
-
-       src_addr = dma_map_single(dev, (void *)mode->header_table,
-                                 mode->header_table_size, DMA_TO_DEVICE);
-       dev_dbg(dev, "%s: mode->name %s, src_addr %llx, dev %p, src %p, slen %d\n",
-               __func__, mode->name, src_addr, dev,
-               mode->header_table, mode->header_table_size);
-       if (unlikely(dma_mapping_error(dev, src_addr))) {
-               dev_dbg(dev, "dma_map_single err, exiting\n");
-               ret = -ENOMEM;
-               return ret;
-       }
-
-       desc->flags = IAX_AECS_GEN_FLAG;
-       desc->opcode = IAX_OPCODE_DECOMPRESS;
-
-       desc->src1_addr = (u64)src_addr;
-       desc->src1_size = mode->header_table_size;
-
-       src2_addr = device_mode->aecs_decomp_table_dma_addr;
-       desc->src2_addr = (u64)src2_addr;
-       desc->src2_size = 1088;
-       dev_dbg(dev, "%s: mode->name %s, src2_addr %llx, dev %p, src2_size %d\n",
-               __func__, mode->name, desc->src2_addr, dev, desc->src2_size);
-       desc->max_dst_size = 0; // suppressed output
-
-       desc->decompr_flags = mode->gen_decomp_table_flags;
-
-       desc->priv = 0;
-
-       desc->completion_addr = idxd_desc->compl_dma;
-
-       ret = idxd_submit_desc(wq, idxd_desc);
-       if (ret) {
-               pr_err("%s: submit_desc failed ret=0x%x\n", __func__, ret);
-               goto out;
-       }
-
-       ret = check_completion(dev, idxd_desc->iax_completion, false, false);
-       if (ret)
-               dev_dbg(dev, "%s: mode->name %s check_completion failed ret=%d\n",
-                       __func__, mode->name, ret);
-       else
-               dev_dbg(dev, "%s: mode->name %s succeeded\n", __func__,
-                       mode->name);
-out:
-       dma_unmap_single(dev, src_addr, 1088, DMA_TO_DEVICE);
-
-       return ret;
-}
-
 static int init_device_compression_mode(struct iaa_device *iaa_device,
                                        struct iaa_compression_mode *mode,
                                        int idx, struct idxd_wq *wq)
@@ -529,24 +440,11 @@ static int init_device_compression_mode(struct iaa_device *iaa_device,
        if (!device_mode->aecs_comp_table)
                goto free;
 
-       device_mode->aecs_decomp_table = dma_alloc_coherent(dev, size,
-                                                           &device_mode->aecs_decomp_table_dma_addr, GFP_KERNEL);
-       if (!device_mode->aecs_decomp_table)
-               goto free;
-
        /* Add Huffman table to aecs */
        memset(device_mode->aecs_comp_table, 0, sizeof(*device_mode->aecs_comp_table));
        memcpy(device_mode->aecs_comp_table->ll_sym, mode->ll_table, mode->ll_table_size);
        memcpy(device_mode->aecs_comp_table->d_sym, mode->d_table, mode->d_table_size);
 
-       if (mode->header_table) {
-               ret = decompress_header(device_mode, mode, wq);
-               if (ret) {
-                       pr_debug("iaa header decompression failed: ret=%d\n", ret);
-                       goto free;
-               }
-       }
-
        if (mode->init) {
                ret = mode->init(device_mode);
                if (ret)
@@ -1324,7 +1222,7 @@ static int iaa_compress(struct crypto_tfm *tfm,   struct acomp_req *req,
 
        *compression_crc = idxd_desc->iax_completion->crc;
 
-       if (!ctx->async_mode)
+       if (!ctx->async_mode || disable_async)
                idxd_free_desc(wq, idxd_desc);
 out:
        return ret;
@@ -1570,7 +1468,7 @@ static int iaa_decompress(struct crypto_tfm *tfm, struct acomp_req *req,
 
        *dlen = req->dlen;
 
-       if (!ctx->async_mode)
+       if (!ctx->async_mode || disable_async)
                idxd_free_desc(wq, idxd_desc);
 
        /* Update stats */
@@ -1596,6 +1494,7 @@ static int iaa_comp_acompress(struct acomp_req *req)
        u32 compression_crc;
        struct idxd_wq *wq;
        struct device *dev;
+       u64 start_time_ns;
        int order = -1;
 
        compression_ctx = crypto_tfm_ctx(tfm);
@@ -1669,8 +1568,10 @@ static int iaa_comp_acompress(struct acomp_req *req)
                " req->dlen %d, sg_dma_len(sg) %d\n", dst_addr, nr_sgs,
                req->dst, req->dlen, sg_dma_len(req->dst));
 
+       start_time_ns = iaa_get_ts();
        ret = iaa_compress(tfm, req, wq, src_addr, req->slen, dst_addr,
                           &req->dlen, &compression_crc, disable_async);
+       update_max_comp_delay_ns(start_time_ns);
        if (ret == -EINPROGRESS)
                return ret;
 
@@ -1717,6 +1618,7 @@ static int iaa_comp_adecompress_alloc_dest(struct acomp_req *req)
        struct iaa_wq *iaa_wq;
        struct device *dev;
        struct idxd_wq *wq;
+       u64 start_time_ns;
        int order = -1;
 
        cpu = get_cpu();
@@ -1773,8 +1675,10 @@ alloc_dest:
        dev_dbg(dev, "dma_map_sg, dst_addr %llx, nr_sgs %d, req->dst %p,"
                " req->dlen %d, sg_dma_len(sg) %d\n", dst_addr, nr_sgs,
                req->dst, req->dlen, sg_dma_len(req->dst));
+       start_time_ns = iaa_get_ts();
        ret = iaa_decompress(tfm, req, wq, src_addr, req->slen,
                             dst_addr, &req->dlen, true);
+       update_max_decomp_delay_ns(start_time_ns);
        if (ret == -EOVERFLOW) {
                dma_unmap_sg(dev, req->dst, sg_nents(req->dst), DMA_FROM_DEVICE);
                req->dlen *= 2;
@@ -1805,6 +1709,7 @@ static int iaa_comp_adecompress(struct acomp_req *req)
        int nr_sgs, cpu, ret = 0;
        struct iaa_wq *iaa_wq;
        struct device *dev;
+       u64 start_time_ns;
        struct idxd_wq *wq;
 
        if (!iaa_crypto_enabled) {
@@ -1864,8 +1769,10 @@ static int iaa_comp_adecompress(struct acomp_req *req)
                " req->dlen %d, sg_dma_len(sg) %d\n", dst_addr, nr_sgs,
                req->dst, req->dlen, sg_dma_len(req->dst));
 
+       start_time_ns = iaa_get_ts();
        ret = iaa_decompress(tfm, req, wq, src_addr, req->slen,
                             dst_addr, &req->dlen, false);
+       update_max_decomp_delay_ns(start_time_ns);
        if (ret == -EINPROGRESS)
                return ret;
 
@@ -1916,6 +1823,7 @@ static struct acomp_alg iaa_acomp_fixed_deflate = {
        .base                   = {
                .cra_name               = "deflate",
                .cra_driver_name        = "deflate-iaa",
+               .cra_flags              = CRYPTO_ALG_ASYNC,
                .cra_ctxsize            = sizeof(struct iaa_compression_ctx),
                .cra_module             = THIS_MODULE,
                .cra_priority           = IAA_ALG_PRIORITY,
index 2e3b7b73af20444a2480078b51eee8a55c9ba268..c9f83af4b307593ff0420b5bb1e5f1a37d258ff3 100644 (file)
@@ -22,8 +22,6 @@ static u64 total_decomp_calls;
 static u64 total_sw_decomp_calls;
 static u64 max_comp_delay_ns;
 static u64 max_decomp_delay_ns;
-static u64 max_acomp_delay_ns;
-static u64 max_adecomp_delay_ns;
 static u64 total_comp_bytes_out;
 static u64 total_decomp_bytes_in;
 static u64 total_completion_einval_errors;
@@ -92,26 +90,6 @@ void update_max_decomp_delay_ns(u64 start_time_ns)
                max_decomp_delay_ns = time_diff;
 }
 
-void update_max_acomp_delay_ns(u64 start_time_ns)
-{
-       u64 time_diff;
-
-       time_diff = ktime_get_ns() - start_time_ns;
-
-       if (time_diff > max_acomp_delay_ns)
-               max_acomp_delay_ns = time_diff;
-}
-
-void update_max_adecomp_delay_ns(u64 start_time_ns)
-{
-       u64 time_diff;
-
-       time_diff = ktime_get_ns() - start_time_ns;
-
-       if (time_diff > max_adecomp_delay_ns)
-               max_adecomp_delay_ns = time_diff;
-}
-
 void update_wq_comp_calls(struct idxd_wq *idxd_wq)
 {
        struct iaa_wq *wq = idxd_wq_get_private(idxd_wq);
@@ -151,8 +129,6 @@ static void reset_iaa_crypto_stats(void)
        total_sw_decomp_calls = 0;
        max_comp_delay_ns = 0;
        max_decomp_delay_ns = 0;
-       max_acomp_delay_ns = 0;
-       max_adecomp_delay_ns = 0;
        total_comp_bytes_out = 0;
        total_decomp_bytes_in = 0;
        total_completion_einval_errors = 0;
@@ -275,17 +251,11 @@ int __init iaa_crypto_debugfs_init(void)
                return -ENODEV;
 
        iaa_crypto_debugfs_root = debugfs_create_dir("iaa_crypto", NULL);
-       if (!iaa_crypto_debugfs_root)
-               return -ENOMEM;
 
        debugfs_create_u64("max_comp_delay_ns", 0644,
                           iaa_crypto_debugfs_root, &max_comp_delay_ns);
        debugfs_create_u64("max_decomp_delay_ns", 0644,
                           iaa_crypto_debugfs_root, &max_decomp_delay_ns);
-       debugfs_create_u64("max_acomp_delay_ns", 0644,
-                          iaa_crypto_debugfs_root, &max_comp_delay_ns);
-       debugfs_create_u64("max_adecomp_delay_ns", 0644,
-                          iaa_crypto_debugfs_root, &max_decomp_delay_ns);
        debugfs_create_u64("total_comp_calls", 0644,
                           iaa_crypto_debugfs_root, &total_comp_calls);
        debugfs_create_u64("total_decomp_calls", 0644,
index c10b87b86fa4567131a2b90c99285c746c9efc06..c916ca83f07022fc15a128b48f0b58a520b4b816 100644 (file)
@@ -15,8 +15,6 @@ void  update_total_sw_decomp_calls(void);
 void   update_total_decomp_bytes_in(int n);
 void   update_max_comp_delay_ns(u64 start_time_ns);
 void   update_max_decomp_delay_ns(u64 start_time_ns);
-void   update_max_acomp_delay_ns(u64 start_time_ns);
-void   update_max_adecomp_delay_ns(u64 start_time_ns);
 void   update_completion_einval_errs(void);
 void   update_completion_timeout_errs(void);
 void   update_completion_comp_buf_overflow_errs(void);
@@ -26,6 +24,8 @@ void  update_wq_comp_bytes(struct idxd_wq *idxd_wq, int n);
 void   update_wq_decomp_calls(struct idxd_wq *idxd_wq);
 void   update_wq_decomp_bytes(struct idxd_wq *idxd_wq, int n);
 
+static inline u64      iaa_get_ts(void) { return ktime_get_ns(); }
+
 #else
 static inline int      iaa_crypto_debugfs_init(void) { return 0; }
 static inline void     iaa_crypto_debugfs_cleanup(void) {}
@@ -37,8 +37,6 @@ static inline void    update_total_sw_decomp_calls(void) {}
 static inline void     update_total_decomp_bytes_in(int n) {}
 static inline void     update_max_comp_delay_ns(u64 start_time_ns) {}
 static inline void     update_max_decomp_delay_ns(u64 start_time_ns) {}
-static inline void     update_max_acomp_delay_ns(u64 start_time_ns) {}
-static inline void     update_max_adecomp_delay_ns(u64 start_time_ns) {}
 static inline void     update_completion_einval_errs(void) {}
 static inline void     update_completion_timeout_errs(void) {}
 static inline void     update_completion_comp_buf_overflow_errs(void) {}
@@ -48,6 +46,8 @@ static inline void    update_wq_comp_bytes(struct idxd_wq *idxd_wq, int n) {}
 static inline void     update_wq_decomp_calls(struct idxd_wq *idxd_wq) {}
 static inline void     update_wq_decomp_bytes(struct idxd_wq *idxd_wq, int n) {}
 
+static inline u64      iaa_get_ts(void) { return 0; }
+
 #endif // CONFIG_CRYPTO_DEV_IAA_CRYPTO_STATS
 
 #endif
index c120f6715a09a6a0393b66feea068e1caa89d766..02fb8abe4e6ed3eae100f9be715ea276452a0653 100644 (file)
@@ -106,3 +106,17 @@ config CRYPTO_DEV_QAT_C62XVF
 
          To compile this as a module, choose M here: the module
          will be called qat_c62xvf.
+
+config CRYPTO_DEV_QAT_ERROR_INJECTION
+       bool "Support for Intel(R) QAT Devices Heartbeat Error Injection"
+       depends on CRYPTO_DEV_QAT
+       depends on DEBUG_FS
+       help
+         Enables a mechanism that allows to inject a heartbeat error on
+         Intel(R) QuickAssist devices for testing purposes.
+
+         This is intended for developer use only.
+         If unsure, say N.
+
+         This functionality is available via debugfs entry of the Intel(R)
+         QuickAssist device
index a87d29ae724fe4f5f85a0160571bbd09bdf9be75..1102c47f8293d572bf0a65836e72665b818e86e2 100644 (file)
@@ -361,53 +361,6 @@ static u32 get_ena_thd_mask(struct adf_accel_dev *accel_dev, u32 obj_num)
        }
 }
 
-static u16 get_ring_to_svc_map(struct adf_accel_dev *accel_dev)
-{
-       enum adf_cfg_service_type rps[RP_GROUP_COUNT] = { };
-       const struct adf_fw_config *fw_config;
-       u16 ring_to_svc_map;
-       int i, j;
-
-       fw_config = get_fw_config(accel_dev);
-       if (!fw_config)
-               return 0;
-
-       for (i = 0; i < RP_GROUP_COUNT; i++) {
-               switch (fw_config[i].ae_mask) {
-               case ADF_AE_GROUP_0:
-                       j = RP_GROUP_0;
-                       break;
-               case ADF_AE_GROUP_1:
-                       j = RP_GROUP_1;
-                       break;
-               default:
-                       return 0;
-               }
-
-               switch (fw_config[i].obj) {
-               case ADF_FW_SYM_OBJ:
-                       rps[j] = SYM;
-                       break;
-               case ADF_FW_ASYM_OBJ:
-                       rps[j] = ASYM;
-                       break;
-               case ADF_FW_DC_OBJ:
-                       rps[j] = COMP;
-                       break;
-               default:
-                       rps[j] = 0;
-                       break;
-               }
-       }
-
-       ring_to_svc_map = rps[RP_GROUP_0] << ADF_CFG_SERV_RING_PAIR_0_SHIFT |
-                         rps[RP_GROUP_1] << ADF_CFG_SERV_RING_PAIR_1_SHIFT |
-                         rps[RP_GROUP_0] << ADF_CFG_SERV_RING_PAIR_2_SHIFT |
-                         rps[RP_GROUP_1] << ADF_CFG_SERV_RING_PAIR_3_SHIFT;
-
-       return ring_to_svc_map;
-}
-
 static const char *uof_get_name(struct adf_accel_dev *accel_dev, u32 obj_num,
                                const char * const fw_objs[], int num_objs)
 {
@@ -433,6 +386,20 @@ static const char *uof_get_name_420xx(struct adf_accel_dev *accel_dev, u32 obj_n
        return uof_get_name(accel_dev, obj_num, adf_420xx_fw_objs, num_fw_objs);
 }
 
+static int uof_get_obj_type(struct adf_accel_dev *accel_dev, u32 obj_num)
+{
+       const struct adf_fw_config *fw_config;
+
+       if (obj_num >= uof_get_num_objs(accel_dev))
+               return -EINVAL;
+
+       fw_config = get_fw_config(accel_dev);
+       if (!fw_config)
+               return -EINVAL;
+
+       return fw_config[obj_num].obj;
+}
+
 static u32 uof_get_ae_mask(struct adf_accel_dev *accel_dev, u32 obj_num)
 {
        const struct adf_fw_config *fw_config;
@@ -496,12 +463,13 @@ void adf_init_hw_data_420xx(struct adf_hw_device_data *hw_data, u32 dev_id)
        hw_data->fw_mmp_name = ADF_420XX_MMP;
        hw_data->uof_get_name = uof_get_name_420xx;
        hw_data->uof_get_num_objs = uof_get_num_objs;
+       hw_data->uof_get_obj_type = uof_get_obj_type;
        hw_data->uof_get_ae_mask = uof_get_ae_mask;
        hw_data->get_rp_group = get_rp_group;
        hw_data->get_ena_thd_mask = get_ena_thd_mask;
        hw_data->set_msix_rttable = adf_gen4_set_msix_default_rttable;
        hw_data->set_ssm_wdtimer = adf_gen4_set_ssm_wdtimer;
-       hw_data->get_ring_to_svc_map = get_ring_to_svc_map;
+       hw_data->get_ring_to_svc_map = adf_gen4_get_ring_to_svc_map;
        hw_data->disable_iov = adf_disable_sriov;
        hw_data->ring_pair_reset = adf_gen4_ring_pair_reset;
        hw_data->enable_pm = adf_gen4_enable_pm;
index 94a0ebb03d8c96804b455f73a8d8b3155baab866..927506cf271d00d37b619d6594e8d6f5aeef5689 100644 (file)
@@ -320,53 +320,6 @@ static u32 get_ena_thd_mask_401xx(struct adf_accel_dev *accel_dev, u32 obj_num)
        }
 }
 
-static u16 get_ring_to_svc_map(struct adf_accel_dev *accel_dev)
-{
-       enum adf_cfg_service_type rps[RP_GROUP_COUNT];
-       const struct adf_fw_config *fw_config;
-       u16 ring_to_svc_map;
-       int i, j;
-
-       fw_config = get_fw_config(accel_dev);
-       if (!fw_config)
-               return 0;
-
-       for (i = 0; i < RP_GROUP_COUNT; i++) {
-               switch (fw_config[i].ae_mask) {
-               case ADF_AE_GROUP_0:
-                       j = RP_GROUP_0;
-                       break;
-               case ADF_AE_GROUP_1:
-                       j = RP_GROUP_1;
-                       break;
-               default:
-                       return 0;
-               }
-
-               switch (fw_config[i].obj) {
-               case ADF_FW_SYM_OBJ:
-                       rps[j] = SYM;
-                       break;
-               case ADF_FW_ASYM_OBJ:
-                       rps[j] = ASYM;
-                       break;
-               case ADF_FW_DC_OBJ:
-                       rps[j] = COMP;
-                       break;
-               default:
-                       rps[j] = 0;
-                       break;
-               }
-       }
-
-       ring_to_svc_map = rps[RP_GROUP_0] << ADF_CFG_SERV_RING_PAIR_0_SHIFT |
-                         rps[RP_GROUP_1] << ADF_CFG_SERV_RING_PAIR_1_SHIFT |
-                         rps[RP_GROUP_0] << ADF_CFG_SERV_RING_PAIR_2_SHIFT |
-                         rps[RP_GROUP_1] << ADF_CFG_SERV_RING_PAIR_3_SHIFT;
-
-       return ring_to_svc_map;
-}
-
 static const char *uof_get_name(struct adf_accel_dev *accel_dev, u32 obj_num,
                                const char * const fw_objs[], int num_objs)
 {
@@ -399,6 +352,20 @@ static const char *uof_get_name_402xx(struct adf_accel_dev *accel_dev, u32 obj_n
        return uof_get_name(accel_dev, obj_num, adf_402xx_fw_objs, num_fw_objs);
 }
 
+static int uof_get_obj_type(struct adf_accel_dev *accel_dev, u32 obj_num)
+{
+       const struct adf_fw_config *fw_config;
+
+       if (obj_num >= uof_get_num_objs(accel_dev))
+               return -EINVAL;
+
+       fw_config = get_fw_config(accel_dev);
+       if (!fw_config)
+               return -EINVAL;
+
+       return fw_config[obj_num].obj;
+}
+
 static u32 uof_get_ae_mask(struct adf_accel_dev *accel_dev, u32 obj_num)
 {
        const struct adf_fw_config *fw_config;
@@ -479,11 +446,12 @@ void adf_init_hw_data_4xxx(struct adf_hw_device_data *hw_data, u32 dev_id)
                break;
        }
        hw_data->uof_get_num_objs = uof_get_num_objs;
+       hw_data->uof_get_obj_type = uof_get_obj_type;
        hw_data->uof_get_ae_mask = uof_get_ae_mask;
        hw_data->get_rp_group = get_rp_group;
        hw_data->set_msix_rttable = adf_gen4_set_msix_default_rttable;
        hw_data->set_ssm_wdtimer = adf_gen4_set_ssm_wdtimer;
-       hw_data->get_ring_to_svc_map = get_ring_to_svc_map;
+       hw_data->get_ring_to_svc_map = adf_gen4_get_ring_to_svc_map;
        hw_data->disable_iov = adf_disable_sriov;
        hw_data->ring_pair_reset = adf_gen4_ring_pair_reset;
        hw_data->enable_pm = adf_gen4_enable_pm;
index 6908727bff3b85debba7bd887153025d8efe1b20..5915cde8a7aa4d72f45e5b233dd65dc588a912ac 100644 (file)
@@ -53,3 +53,5 @@ intel_qat-$(CONFIG_PCI_IOV) += adf_sriov.o adf_vf_isr.o adf_pfvf_utils.o \
                               adf_pfvf_pf_msg.o adf_pfvf_pf_proto.o \
                               adf_pfvf_vf_msg.o adf_pfvf_vf_proto.o \
                               adf_gen2_pfvf.o adf_gen4_pfvf.o
+
+intel_qat-$(CONFIG_CRYPTO_DEV_QAT_ERROR_INJECTION) += adf_heartbeat_inject.o
index a16c7e6edc65d448c6fdca62665213e41ceaa90c..08658c3a01e9bcde41ecef655fbdab0e8291ac99 100644 (file)
@@ -248,6 +248,7 @@ struct adf_hw_device_data {
        void (*set_msix_rttable)(struct adf_accel_dev *accel_dev);
        const char *(*uof_get_name)(struct adf_accel_dev *accel_dev, u32 obj_num);
        u32 (*uof_get_num_objs)(struct adf_accel_dev *accel_dev);
+       int (*uof_get_obj_type)(struct adf_accel_dev *accel_dev, u32 obj_num);
        u32 (*uof_get_ae_mask)(struct adf_accel_dev *accel_dev, u32 obj_num);
        int (*get_rp_group)(struct adf_accel_dev *accel_dev, u32 ae_mask);
        u32 (*get_ena_thd_mask)(struct adf_accel_dev *accel_dev, u32 obj_num);
@@ -332,6 +333,7 @@ struct adf_accel_vf_info {
        struct ratelimit_state vf2pf_ratelimit;
        u32 vf_nr;
        bool init;
+       bool restarting;
        u8 vf_compat_ver;
 };
 
@@ -401,6 +403,7 @@ struct adf_accel_dev {
        struct adf_error_counters ras_errors;
        struct mutex state_lock; /* protect state of the device */
        bool is_vf;
+       bool autoreset_on_error;
        u32 accel_id;
 };
 #endif
index a39e70bd4b21bbc4ecd9180e194f8cf335ab167b..9da2278bd5b7dc594076478abf5387ed7e7ddbe0 100644 (file)
@@ -7,8 +7,15 @@
 #include <linux/delay.h>
 #include "adf_accel_devices.h"
 #include "adf_common_drv.h"
+#include "adf_pfvf_pf_msg.h"
+
+struct adf_fatal_error_data {
+       struct adf_accel_dev *accel_dev;
+       struct work_struct work;
+};
 
 static struct workqueue_struct *device_reset_wq;
+static struct workqueue_struct *device_sriov_wq;
 
 static pci_ers_result_t adf_error_detected(struct pci_dev *pdev,
                                           pci_channel_state_t state)
@@ -26,6 +33,19 @@ static pci_ers_result_t adf_error_detected(struct pci_dev *pdev,
                return PCI_ERS_RESULT_DISCONNECT;
        }
 
+       set_bit(ADF_STATUS_RESTARTING, &accel_dev->status);
+       if (accel_dev->hw_device->exit_arb) {
+               dev_dbg(&pdev->dev, "Disabling arbitration\n");
+               accel_dev->hw_device->exit_arb(accel_dev);
+       }
+       adf_error_notifier(accel_dev);
+       adf_pf2vf_notify_fatal_error(accel_dev);
+       adf_dev_restarting_notify(accel_dev);
+       adf_pf2vf_notify_restarting(accel_dev);
+       adf_pf2vf_wait_for_restarting_complete(accel_dev);
+       pci_clear_master(pdev);
+       adf_dev_down(accel_dev, false);
+
        return PCI_ERS_RESULT_NEED_RESET;
 }
 
@@ -37,6 +57,13 @@ struct adf_reset_dev_data {
        struct work_struct reset_work;
 };
 
+/* sriov dev data */
+struct adf_sriov_dev_data {
+       struct adf_accel_dev *accel_dev;
+       struct completion compl;
+       struct work_struct sriov_work;
+};
+
 void adf_reset_sbr(struct adf_accel_dev *accel_dev)
 {
        struct pci_dev *pdev = accel_to_pci_dev(accel_dev);
@@ -82,29 +109,57 @@ void adf_dev_restore(struct adf_accel_dev *accel_dev)
        }
 }
 
+static void adf_device_sriov_worker(struct work_struct *work)
+{
+       struct adf_sriov_dev_data *sriov_data =
+               container_of(work, struct adf_sriov_dev_data, sriov_work);
+
+       adf_reenable_sriov(sriov_data->accel_dev);
+       complete(&sriov_data->compl);
+}
+
 static void adf_device_reset_worker(struct work_struct *work)
 {
        struct adf_reset_dev_data *reset_data =
                  container_of(work, struct adf_reset_dev_data, reset_work);
        struct adf_accel_dev *accel_dev = reset_data->accel_dev;
+       unsigned long wait_jiffies = msecs_to_jiffies(10000);
+       struct adf_sriov_dev_data sriov_data;
 
        adf_dev_restarting_notify(accel_dev);
        if (adf_dev_restart(accel_dev)) {
                /* The device hanged and we can't restart it so stop here */
                dev_err(&GET_DEV(accel_dev), "Restart device failed\n");
-               if (reset_data->mode == ADF_DEV_RESET_ASYNC)
+               if (reset_data->mode == ADF_DEV_RESET_ASYNC ||
+                   completion_done(&reset_data->compl))
                        kfree(reset_data);
                WARN(1, "QAT: device restart failed. Device is unusable\n");
                return;
        }
+
+       sriov_data.accel_dev = accel_dev;
+       init_completion(&sriov_data.compl);
+       INIT_WORK(&sriov_data.sriov_work, adf_device_sriov_worker);
+       queue_work(device_sriov_wq, &sriov_data.sriov_work);
+       if (wait_for_completion_timeout(&sriov_data.compl, wait_jiffies))
+               adf_pf2vf_notify_restarted(accel_dev);
+
        adf_dev_restarted_notify(accel_dev);
        clear_bit(ADF_STATUS_RESTARTING, &accel_dev->status);
 
-       /* The dev is back alive. Notify the caller if in sync mode */
-       if (reset_data->mode == ADF_DEV_RESET_SYNC)
-               complete(&reset_data->compl);
-       else
+       /*
+        * The dev is back alive. Notify the caller if in sync mode
+        *
+        * If device restart will take a more time than expected,
+        * the schedule_reset() function can timeout and exit. This can be
+        * detected by calling the completion_done() function. In this case
+        * the reset_data structure needs to be freed here.
+        */
+       if (reset_data->mode == ADF_DEV_RESET_ASYNC ||
+           completion_done(&reset_data->compl))
                kfree(reset_data);
+       else
+               complete(&reset_data->compl);
 }
 
 static int adf_dev_aer_schedule_reset(struct adf_accel_dev *accel_dev,
@@ -137,8 +192,9 @@ static int adf_dev_aer_schedule_reset(struct adf_accel_dev *accel_dev,
                        dev_err(&GET_DEV(accel_dev),
                                "Reset device timeout expired\n");
                        ret = -EFAULT;
+               } else {
+                       kfree(reset_data);
                }
-               kfree(reset_data);
                return ret;
        }
        return 0;
@@ -147,14 +203,25 @@ static int adf_dev_aer_schedule_reset(struct adf_accel_dev *accel_dev,
 static pci_ers_result_t adf_slot_reset(struct pci_dev *pdev)
 {
        struct adf_accel_dev *accel_dev = adf_devmgr_pci_to_accel_dev(pdev);
+       int res = 0;
 
        if (!accel_dev) {
                pr_err("QAT: Can't find acceleration device\n");
                return PCI_ERS_RESULT_DISCONNECT;
        }
-       if (adf_dev_aer_schedule_reset(accel_dev, ADF_DEV_RESET_SYNC))
+
+       if (!pdev->is_busmaster)
+               pci_set_master(pdev);
+       pci_restore_state(pdev);
+       pci_save_state(pdev);
+       res = adf_dev_up(accel_dev, false);
+       if (res && res != -EALREADY)
                return PCI_ERS_RESULT_DISCONNECT;
 
+       adf_reenable_sriov(accel_dev);
+       adf_pf2vf_notify_restarted(accel_dev);
+       adf_dev_restarted_notify(accel_dev);
+       clear_bit(ADF_STATUS_RESTARTING, &accel_dev->status);
        return PCI_ERS_RESULT_RECOVERED;
 }
 
@@ -171,11 +238,62 @@ const struct pci_error_handlers adf_err_handler = {
 };
 EXPORT_SYMBOL_GPL(adf_err_handler);
 
+int adf_dev_autoreset(struct adf_accel_dev *accel_dev)
+{
+       if (accel_dev->autoreset_on_error)
+               return adf_dev_aer_schedule_reset(accel_dev, ADF_DEV_RESET_ASYNC);
+
+       return 0;
+}
+
+static void adf_notify_fatal_error_worker(struct work_struct *work)
+{
+       struct adf_fatal_error_data *wq_data =
+                       container_of(work, struct adf_fatal_error_data, work);
+       struct adf_accel_dev *accel_dev = wq_data->accel_dev;
+       struct adf_hw_device_data *hw_device = accel_dev->hw_device;
+
+       adf_error_notifier(accel_dev);
+
+       if (!accel_dev->is_vf) {
+               /* Disable arbitration to stop processing of new requests */
+               if (accel_dev->autoreset_on_error && hw_device->exit_arb)
+                       hw_device->exit_arb(accel_dev);
+               if (accel_dev->pf.vf_info)
+                       adf_pf2vf_notify_fatal_error(accel_dev);
+               adf_dev_autoreset(accel_dev);
+       }
+
+       kfree(wq_data);
+}
+
+int adf_notify_fatal_error(struct adf_accel_dev *accel_dev)
+{
+       struct adf_fatal_error_data *wq_data;
+
+       wq_data = kzalloc(sizeof(*wq_data), GFP_ATOMIC);
+       if (!wq_data)
+               return -ENOMEM;
+
+       wq_data->accel_dev = accel_dev;
+       INIT_WORK(&wq_data->work, adf_notify_fatal_error_worker);
+       adf_misc_wq_queue_work(&wq_data->work);
+
+       return 0;
+}
+
 int adf_init_aer(void)
 {
        device_reset_wq = alloc_workqueue("qat_device_reset_wq",
                                          WQ_MEM_RECLAIM, 0);
-       return !device_reset_wq ? -EFAULT : 0;
+       if (!device_reset_wq)
+               return -EFAULT;
+
+       device_sriov_wq = alloc_workqueue("qat_device_sriov_wq", 0, 0);
+       if (!device_sriov_wq)
+               return -EFAULT;
+
+       return 0;
 }
 
 void adf_exit_aer(void)
@@ -183,4 +301,8 @@ void adf_exit_aer(void)
        if (device_reset_wq)
                destroy_workqueue(device_reset_wq);
        device_reset_wq = NULL;
+
+       if (device_sriov_wq)
+               destroy_workqueue(device_sriov_wq);
+       device_sriov_wq = NULL;
 }
index 322b76903a737d4e0fce0371a355f6a1f17fb0b0..e015ad6cace2b22afae87fdeda773fa260dcd6ba 100644 (file)
@@ -49,5 +49,6 @@
        ADF_ETRMGR_BANK "%d" ADF_ETRMGR_CORE_AFFINITY
 #define ADF_ACCEL_STR "Accelerator%d"
 #define ADF_HEARTBEAT_TIMER  "HeartbeatTimer"
+#define ADF_SRIOV_ENABLED "SriovEnabled"
 
 #endif
index 01e0a389e462b027cefb1d5d658029297b5104b4..cf89f57de2a7021494faefc006ca4c4eda038e2b 100644 (file)
@@ -83,6 +83,9 @@ static int measure_clock(struct adf_accel_dev *accel_dev, u32 *frequency)
        }
 
        delta_us = timespec_to_us(&ts3) - timespec_to_us(&ts1);
+       if (!delta_us)
+               return -EINVAL;
+
        temp = (timestamp2 - timestamp1) * ME_CLK_DIVIDER * 10;
        temp = DIV_ROUND_CLOSEST_ULL(temp, delta_us);
        /*
index 07119c487da0195cbb268a82e9242c1d2ff1519d..627953a72d4784c82b53c9ddc7d8e64002a9f524 100644 (file)
@@ -16,7 +16,6 @@
 
 #define CNV_ERR_INFO_MASK              GENMASK(11, 0)
 #define CNV_ERR_TYPE_MASK              GENMASK(15, 12)
-#define CNV_SLICE_ERR_MASK             GENMASK(7, 0)
 #define CNV_SLICE_ERR_SIGN_BIT_INDEX   7
 #define CNV_DELTA_ERR_SIGN_BIT_INDEX   11
 
index f06188033a93fb2d5beef42af7c34345029fe72d..57328249c89e7a6f4ae7c7754bf2bbb218c6f651 100644 (file)
@@ -40,6 +40,7 @@ enum adf_event {
        ADF_EVENT_SHUTDOWN,
        ADF_EVENT_RESTARTING,
        ADF_EVENT_RESTARTED,
+       ADF_EVENT_FATAL_ERROR,
 };
 
 struct service_hndl {
@@ -60,6 +61,8 @@ int adf_dev_restart(struct adf_accel_dev *accel_dev);
 
 void adf_devmgr_update_class_index(struct adf_hw_device_data *hw_data);
 void adf_clean_vf_map(bool);
+int adf_notify_fatal_error(struct adf_accel_dev *accel_dev);
+void adf_error_notifier(struct adf_accel_dev *accel_dev);
 int adf_devmgr_add_dev(struct adf_accel_dev *accel_dev,
                       struct adf_accel_dev *pf);
 void adf_devmgr_rm_dev(struct adf_accel_dev *accel_dev,
@@ -84,12 +87,14 @@ int adf_ae_stop(struct adf_accel_dev *accel_dev);
 extern const struct pci_error_handlers adf_err_handler;
 void adf_reset_sbr(struct adf_accel_dev *accel_dev);
 void adf_reset_flr(struct adf_accel_dev *accel_dev);
+int adf_dev_autoreset(struct adf_accel_dev *accel_dev);
 void adf_dev_restore(struct adf_accel_dev *accel_dev);
 int adf_init_aer(void);
 void adf_exit_aer(void);
 int adf_init_arb(struct adf_accel_dev *accel_dev);
 void adf_exit_arb(struct adf_accel_dev *accel_dev);
 void adf_update_ring_arb(struct adf_etr_ring_data *ring);
+int adf_disable_arb_thd(struct adf_accel_dev *accel_dev, u32 ae, u32 thr);
 
 int adf_dev_get(struct adf_accel_dev *accel_dev);
 void adf_dev_put(struct adf_accel_dev *accel_dev);
@@ -188,6 +193,7 @@ bool adf_misc_wq_queue_delayed_work(struct delayed_work *work,
 #if defined(CONFIG_PCI_IOV)
 int adf_sriov_configure(struct pci_dev *pdev, int numvfs);
 void adf_disable_sriov(struct adf_accel_dev *accel_dev);
+void adf_reenable_sriov(struct adf_accel_dev *accel_dev);
 void adf_enable_vf2pf_interrupts(struct adf_accel_dev *accel_dev, u32 vf_mask);
 void adf_disable_all_vf2pf_interrupts(struct adf_accel_dev *accel_dev);
 bool adf_recv_and_handle_pf2vf_msg(struct adf_accel_dev *accel_dev);
@@ -208,6 +214,10 @@ static inline void adf_disable_sriov(struct adf_accel_dev *accel_dev)
 {
 }
 
+static inline void adf_reenable_sriov(struct adf_accel_dev *accel_dev)
+{
+}
+
 static inline int adf_init_pf_wq(void)
 {
        return 0;
index 86ee36feefad34692fa5e3d1c3a935c510a6364b..f07b748795f7b79af65f2979eb6bfd2c4c39546b 100644 (file)
@@ -60,10 +60,10 @@ static int adf_get_vf_real_id(u32 fake)
 
 /**
  * adf_clean_vf_map() - Cleans VF id mapings
- *
- * Function cleans internal ids for virtual functions.
  * @vf: flag indicating whether mappings is cleaned
  *     for vfs only or for vfs and pfs
+ *
+ * Function cleans internal ids for virtual functions.
  */
 void adf_clean_vf_map(bool vf)
 {
index 9985683056d5ffe423f020d88eb62567cc3e2ad1..d28e1921940a799b4bd145d82745ac957564cea3 100644 (file)
@@ -4,6 +4,7 @@
 #include "adf_accel_devices.h"
 #include "adf_cfg_services.h"
 #include "adf_common_drv.h"
+#include "adf_fw_config.h"
 #include "adf_gen4_hw_data.h"
 #include "adf_gen4_pm.h"
 
@@ -398,6 +399,9 @@ int adf_gen4_init_thd2arb_map(struct adf_accel_dev *accel_dev)
                         ADF_GEN4_ADMIN_ACCELENGINES;
 
        if (srv_id == SVC_DCC) {
+               if (ae_cnt > ICP_QAT_HW_AE_DELIMITER)
+                       return -EINVAL;
+
                memcpy(thd2arb_map, thrd_to_arb_map_dcc,
                       array_size(sizeof(*thd2arb_map), ae_cnt));
                return 0;
@@ -430,3 +434,58 @@ int adf_gen4_init_thd2arb_map(struct adf_accel_dev *accel_dev)
        return 0;
 }
 EXPORT_SYMBOL_GPL(adf_gen4_init_thd2arb_map);
+
+u16 adf_gen4_get_ring_to_svc_map(struct adf_accel_dev *accel_dev)
+{
+       struct adf_hw_device_data *hw_data = GET_HW_DATA(accel_dev);
+       enum adf_cfg_service_type rps[RP_GROUP_COUNT] = { };
+       unsigned int ae_mask, start_id, worker_obj_cnt, i;
+       u16 ring_to_svc_map;
+       int rp_group;
+
+       if (!hw_data->get_rp_group || !hw_data->uof_get_ae_mask ||
+           !hw_data->uof_get_obj_type || !hw_data->uof_get_num_objs)
+               return 0;
+
+       /* If dcc, all rings handle compression requests */
+       if (adf_get_service_enabled(accel_dev) == SVC_DCC) {
+               for (i = 0; i < RP_GROUP_COUNT; i++)
+                       rps[i] = COMP;
+               goto set_mask;
+       }
+
+       worker_obj_cnt = hw_data->uof_get_num_objs(accel_dev) -
+                        ADF_GEN4_ADMIN_ACCELENGINES;
+       start_id = worker_obj_cnt - RP_GROUP_COUNT;
+
+       for (i = start_id; i < worker_obj_cnt; i++) {
+               ae_mask = hw_data->uof_get_ae_mask(accel_dev, i);
+               rp_group = hw_data->get_rp_group(accel_dev, ae_mask);
+               if (rp_group >= RP_GROUP_COUNT || rp_group < RP_GROUP_0)
+                       return 0;
+
+               switch (hw_data->uof_get_obj_type(accel_dev, i)) {
+               case ADF_FW_SYM_OBJ:
+                       rps[rp_group] = SYM;
+                       break;
+               case ADF_FW_ASYM_OBJ:
+                       rps[rp_group] = ASYM;
+                       break;
+               case ADF_FW_DC_OBJ:
+                       rps[rp_group] = COMP;
+                       break;
+               default:
+                       rps[rp_group] = 0;
+                       break;
+               }
+       }
+
+set_mask:
+       ring_to_svc_map = rps[RP_GROUP_0] << ADF_CFG_SERV_RING_PAIR_0_SHIFT |
+                         rps[RP_GROUP_1] << ADF_CFG_SERV_RING_PAIR_1_SHIFT |
+                         rps[RP_GROUP_0] << ADF_CFG_SERV_RING_PAIR_2_SHIFT |
+                         rps[RP_GROUP_1] << ADF_CFG_SERV_RING_PAIR_3_SHIFT;
+
+       return ring_to_svc_map;
+}
+EXPORT_SYMBOL_GPL(adf_gen4_get_ring_to_svc_map);
index 7d8a774cadc882eab4555d261a05cae96f58d4ac..c6e80df5a85a337c6fb6a3fa383fec3ac849c748 100644 (file)
@@ -235,5 +235,6 @@ int adf_gen4_ring_pair_reset(struct adf_accel_dev *accel_dev, u32 bank_number);
 void adf_gen4_set_msix_default_rttable(struct adf_accel_dev *accel_dev);
 void adf_gen4_set_ssm_wdtimer(struct adf_accel_dev *accel_dev);
 int adf_gen4_init_thd2arb_map(struct adf_accel_dev *accel_dev);
+u16 adf_gen4_get_ring_to_svc_map(struct adf_accel_dev *accel_dev);
 
 #endif
index 048c24607939018c5aff904d706ab29129c50a7f..2dd3772bf58a6ce673587bdae15c0f751e0329d6 100644 (file)
@@ -1007,8 +1007,7 @@ static bool adf_handle_spppar_err(struct adf_accel_dev *accel_dev,
 static bool adf_handle_ssmcpppar_err(struct adf_accel_dev *accel_dev,
                                     void __iomem *csr, u32 iastatssm)
 {
-       u32 reg = ADF_CSR_RD(csr, ADF_GEN4_SSMCPPERR);
-       u32 bits_num = BITS_PER_REG(reg);
+       u32 reg, bits_num = BITS_PER_REG(reg);
        bool reset_required = false;
        unsigned long errs_bits;
        u32 bit_iterator;
@@ -1106,8 +1105,7 @@ static bool adf_handle_rf_parr_err(struct adf_accel_dev *accel_dev,
 static bool adf_handle_ser_err_ssmsh(struct adf_accel_dev *accel_dev,
                                     void __iomem *csr, u32 iastatssm)
 {
-       u32 reg = ADF_CSR_RD(csr, ADF_GEN4_SER_ERR_SSMSH);
-       u32 bits_num = BITS_PER_REG(reg);
+       u32 reg, bits_num = BITS_PER_REG(reg);
        bool reset_required = false;
        unsigned long errs_bits;
        u32 bit_iterator;
index 13f48d2f6da88e09034c0a33f4bb76f2fd29ed22..b19aa1ef8eeed9f55a89426cddebe165c4717cb5 100644 (file)
 
 #define ADF_HB_EMPTY_SIG 0xA5A5A5A5
 
-/* Heartbeat counter pair */
-struct hb_cnt_pair {
-       __u16 resp_heartbeat_cnt;
-       __u16 req_heartbeat_cnt;
-};
-
 static int adf_hb_check_polling_freq(struct adf_accel_dev *accel_dev)
 {
        u64 curr_time = adf_clock_get_current_time();
@@ -211,6 +205,19 @@ static int adf_hb_get_status(struct adf_accel_dev *accel_dev)
        return ret;
 }
 
+static void adf_heartbeat_reset(struct adf_accel_dev *accel_dev)
+{
+       u64 curr_time = adf_clock_get_current_time();
+       u64 time_since_reset = curr_time - accel_dev->heartbeat->last_hb_reset_time;
+
+       if (time_since_reset < ADF_CFG_HB_RESET_MS)
+               return;
+
+       accel_dev->heartbeat->last_hb_reset_time = curr_time;
+       if (adf_notify_fatal_error(accel_dev))
+               dev_err(&GET_DEV(accel_dev), "Failed to notify fatal error\n");
+}
+
 void adf_heartbeat_status(struct adf_accel_dev *accel_dev,
                          enum adf_device_heartbeat_status *hb_status)
 {
@@ -235,6 +242,7 @@ void adf_heartbeat_status(struct adf_accel_dev *accel_dev,
                        "Heartbeat ERROR: QAT is not responding.\n");
                *hb_status = HB_DEV_UNRESPONSIVE;
                hb->hb_failed_counter++;
+               adf_heartbeat_reset(accel_dev);
                return;
        }
 
index b22e3cb29798ec57200d82ea110a087105c53273..16fdfb48b196acd33f020f9cd580a5d026b5a2f7 100644 (file)
@@ -13,17 +13,26 @@ struct dentry;
 #define ADF_CFG_HB_TIMER_DEFAULT_MS 500
 #define ADF_CFG_HB_COUNT_THRESHOLD 3
 
+#define ADF_CFG_HB_RESET_MS 5000
+
 enum adf_device_heartbeat_status {
        HB_DEV_UNRESPONSIVE = 0,
        HB_DEV_ALIVE,
        HB_DEV_UNSUPPORTED,
 };
 
+/* Heartbeat counter pair */
+struct hb_cnt_pair {
+       __u16 resp_heartbeat_cnt;
+       __u16 req_heartbeat_cnt;
+};
+
 struct adf_heartbeat {
        unsigned int hb_sent_counter;
        unsigned int hb_failed_counter;
        unsigned int hb_timer;
        u64 last_hb_check_time;
+       u64 last_hb_reset_time;
        bool ctrs_cnt_checked;
        struct hb_dma_addr {
                dma_addr_t phy_addr;
@@ -35,6 +44,9 @@ struct adf_heartbeat {
                struct dentry *cfg;
                struct dentry *sent;
                struct dentry *failed;
+#ifdef CONFIG_CRYPTO_DEV_QAT_ERROR_INJECTION
+               struct dentry *inject_error;
+#endif
        } dbgfs;
 };
 
@@ -51,6 +63,15 @@ void adf_heartbeat_status(struct adf_accel_dev *accel_dev,
                          enum adf_device_heartbeat_status *hb_status);
 void adf_heartbeat_check_ctrs(struct adf_accel_dev *accel_dev);
 
+#ifdef CONFIG_CRYPTO_DEV_QAT_ERROR_INJECTION
+int adf_heartbeat_inject_error(struct adf_accel_dev *accel_dev);
+#else
+static inline int adf_heartbeat_inject_error(struct adf_accel_dev *accel_dev)
+{
+       return -EPERM;
+}
+#endif
+
 #else
 static inline int adf_heartbeat_init(struct adf_accel_dev *accel_dev)
 {
index 2661af6a2ef697c7e7d4fe52fe7f96dd54195d21..cccdff24b48d61baf2c70165c0f0d6be254b18ad 100644 (file)
@@ -155,6 +155,44 @@ static const struct file_operations adf_hb_cfg_fops = {
        .write = adf_hb_cfg_write,
 };
 
+static ssize_t adf_hb_error_inject_write(struct file *file,
+                                        const char __user *user_buf,
+                                        size_t count, loff_t *ppos)
+{
+       struct adf_accel_dev *accel_dev = file->private_data;
+       char buf[3];
+       int ret;
+
+       /* last byte left as string termination */
+       if (*ppos != 0 || count != 2)
+               return -EINVAL;
+
+       if (copy_from_user(buf, user_buf, count))
+               return -EFAULT;
+       buf[count] = '\0';
+
+       if (buf[0] != '1')
+               return -EINVAL;
+
+       ret = adf_heartbeat_inject_error(accel_dev);
+       if (ret) {
+               dev_err(&GET_DEV(accel_dev),
+                       "Heartbeat error injection failed with status %d\n",
+                       ret);
+               return ret;
+       }
+
+       dev_info(&GET_DEV(accel_dev), "Heartbeat error injection enabled\n");
+
+       return count;
+}
+
+static const struct file_operations adf_hb_error_inject_fops = {
+       .owner = THIS_MODULE,
+       .open = simple_open,
+       .write = adf_hb_error_inject_write,
+};
+
 void adf_heartbeat_dbgfs_add(struct adf_accel_dev *accel_dev)
 {
        struct adf_heartbeat *hb = accel_dev->heartbeat;
@@ -171,6 +209,17 @@ void adf_heartbeat_dbgfs_add(struct adf_accel_dev *accel_dev)
                                               &hb->hb_failed_counter, &adf_hb_stats_fops);
        hb->dbgfs.cfg = debugfs_create_file("config", 0600, hb->dbgfs.base_dir,
                                            accel_dev, &adf_hb_cfg_fops);
+
+       if (IS_ENABLED(CONFIG_CRYPTO_DEV_QAT_ERROR_INJECTION)) {
+               struct dentry *inject_error __maybe_unused;
+
+               inject_error = debugfs_create_file("inject_error", 0200,
+                                                  hb->dbgfs.base_dir, accel_dev,
+                                                  &adf_hb_error_inject_fops);
+#ifdef CONFIG_CRYPTO_DEV_QAT_ERROR_INJECTION
+               hb->dbgfs.inject_error = inject_error;
+#endif
+       }
 }
 EXPORT_SYMBOL_GPL(adf_heartbeat_dbgfs_add);
 
@@ -189,6 +238,10 @@ void adf_heartbeat_dbgfs_rm(struct adf_accel_dev *accel_dev)
        hb->dbgfs.failed = NULL;
        debugfs_remove(hb->dbgfs.cfg);
        hb->dbgfs.cfg = NULL;
+#ifdef CONFIG_CRYPTO_DEV_QAT_ERROR_INJECTION
+       debugfs_remove(hb->dbgfs.inject_error);
+       hb->dbgfs.inject_error = NULL;
+#endif
        debugfs_remove(hb->dbgfs.base_dir);
        hb->dbgfs.base_dir = NULL;
 }
diff --git a/drivers/crypto/intel/qat/qat_common/adf_heartbeat_inject.c b/drivers/crypto/intel/qat/qat_common/adf_heartbeat_inject.c
new file mode 100644 (file)
index 0000000..a3b474b
--- /dev/null
@@ -0,0 +1,76 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/* Copyright(c) 2023 Intel Corporation */
+#include <linux/random.h>
+
+#include "adf_admin.h"
+#include "adf_common_drv.h"
+#include "adf_heartbeat.h"
+
+#define MAX_HB_TICKS 0xFFFFFFFF
+
+static int adf_hb_set_timer_to_max(struct adf_accel_dev *accel_dev)
+{
+       struct adf_hw_device_data *hw_data = accel_dev->hw_device;
+
+       accel_dev->heartbeat->hb_timer = 0;
+
+       if (hw_data->stop_timer)
+               hw_data->stop_timer(accel_dev);
+
+       return adf_send_admin_hb_timer(accel_dev, MAX_HB_TICKS);
+}
+
+static void adf_set_hb_counters_fail(struct adf_accel_dev *accel_dev, u32 ae,
+                                    u32 thr)
+{
+       struct hb_cnt_pair *stats = accel_dev->heartbeat->dma.virt_addr;
+       struct adf_hw_device_data *hw_device = accel_dev->hw_device;
+       const size_t max_aes = hw_device->get_num_aes(hw_device);
+       const size_t hb_ctrs = hw_device->num_hb_ctrs;
+       size_t thr_id = ae * hb_ctrs + thr;
+       u16 num_rsp = stats[thr_id].resp_heartbeat_cnt;
+
+       /*
+        * Inject live.req != live.rsp and live.rsp == last.rsp
+        * to trigger the heartbeat error detection
+        */
+       stats[thr_id].req_heartbeat_cnt++;
+       stats += (max_aes * hb_ctrs);
+       stats[thr_id].resp_heartbeat_cnt = num_rsp;
+}
+
+int adf_heartbeat_inject_error(struct adf_accel_dev *accel_dev)
+{
+       struct adf_hw_device_data *hw_device = accel_dev->hw_device;
+       const size_t max_aes = hw_device->get_num_aes(hw_device);
+       const size_t hb_ctrs = hw_device->num_hb_ctrs;
+       u32 rand, rand_ae, rand_thr;
+       unsigned long ae_mask;
+       int ret;
+
+       ae_mask = hw_device->ae_mask;
+
+       do {
+               /* Ensure we have a valid ae */
+               get_random_bytes(&rand, sizeof(rand));
+               rand_ae = rand % max_aes;
+       } while (!test_bit(rand_ae, &ae_mask));
+
+       get_random_bytes(&rand, sizeof(rand));
+       rand_thr = rand % hb_ctrs;
+
+       /* Increase the heartbeat timer to prevent FW updating HB counters */
+       ret = adf_hb_set_timer_to_max(accel_dev);
+       if (ret)
+               return ret;
+
+       /* Configure worker threads to stop processing any packet */
+       ret = adf_disable_arb_thd(accel_dev, rand_ae, rand_thr);
+       if (ret)
+               return ret;
+
+       /* Change HB counters memory to simulate a hang */
+       adf_set_hb_counters_fail(accel_dev, rand_ae, rand_thr);
+
+       return 0;
+}
index da6956699246749a13b5f57079ffb74c44ffae38..65bd26b25abce9c8eccef5f87d27cbaf22614179 100644 (file)
@@ -103,3 +103,28 @@ void adf_exit_arb(struct adf_accel_dev *accel_dev)
                csr_ops->write_csr_ring_srv_arb_en(csr, i, 0);
 }
 EXPORT_SYMBOL_GPL(adf_exit_arb);
+
+int adf_disable_arb_thd(struct adf_accel_dev *accel_dev, u32 ae, u32 thr)
+{
+       void __iomem *csr = accel_dev->transport->banks[0].csr_addr;
+       struct adf_hw_device_data *hw_data = accel_dev->hw_device;
+       const u32 *thd_2_arb_cfg;
+       struct arb_info info;
+       u32 ae_thr_map;
+
+       if (ADF_AE_STRAND0_THREAD == thr || ADF_AE_STRAND1_THREAD == thr)
+               thr = ADF_AE_ADMIN_THREAD;
+
+       hw_data->get_arb_info(&info);
+       thd_2_arb_cfg = hw_data->get_arb_mapping(accel_dev);
+       if (!thd_2_arb_cfg)
+               return -EFAULT;
+
+       /* Disable scheduling for this particular AE and thread */
+       ae_thr_map = *(thd_2_arb_cfg + ae);
+       ae_thr_map &= ~(GENMASK(3, 0) << (thr * BIT(2)));
+
+       WRITE_CSR_ARB_WT2SAM(csr, info.arb_offset, info.wt2sam_offset, ae,
+                            ae_thr_map);
+       return 0;
+}
index f43ae9111553f3f2d4e6321375e654a1aec05017..74f0818c07034873871056269fc0db040cc284be 100644 (file)
@@ -433,6 +433,18 @@ int adf_dev_restarted_notify(struct adf_accel_dev *accel_dev)
        return 0;
 }
 
+void adf_error_notifier(struct adf_accel_dev *accel_dev)
+{
+       struct service_hndl *service;
+
+       list_for_each_entry(service, &service_table, list) {
+               if (service->event_hld(accel_dev, ADF_EVENT_FATAL_ERROR))
+                       dev_err(&GET_DEV(accel_dev),
+                               "Failed to send error event to %s.\n",
+                               service->name);
+       }
+}
+
 static int adf_dev_shutdown_cache_cfg(struct adf_accel_dev *accel_dev)
 {
        char services[ADF_CFG_MAX_VAL_LEN_IN_BYTES] = {0};
index 3557a0d6dea289fa6a2ef1a2b1369c48dbcdb599..cae1aee5479aff04e7c123674b9f5e0cb33f67d9 100644 (file)
@@ -139,8 +139,13 @@ static bool adf_handle_ras_int(struct adf_accel_dev *accel_dev)
 
        if (ras_ops->handle_interrupt &&
            ras_ops->handle_interrupt(accel_dev, &reset_required)) {
-               if (reset_required)
+               if (reset_required) {
                        dev_err(&GET_DEV(accel_dev), "Fatal error, reset required\n");
+                       if (adf_notify_fatal_error(accel_dev))
+                               dev_err(&GET_DEV(accel_dev),
+                                       "Failed to notify fatal error\n");
+               }
+
                return true;
        }
 
@@ -272,7 +277,7 @@ static int adf_isr_alloc_msix_vectors_data(struct adf_accel_dev *accel_dev)
        if (!accel_dev->pf.vf_info)
                msix_num_entries += hw_data->num_banks;
 
-       irqs = kzalloc_node(msix_num_entries * sizeof(*irqs),
+       irqs = kcalloc_node(msix_num_entries, sizeof(*irqs),
                            GFP_KERNEL, dev_to_node(&GET_DEV(accel_dev)));
        if (!irqs)
                return -ENOMEM;
@@ -375,8 +380,6 @@ EXPORT_SYMBOL_GPL(adf_isr_resource_alloc);
 /**
  * adf_init_misc_wq() - Init misc workqueue
  *
- * Function init workqueue 'qat_misc_wq' for general purpose.
- *
  * Return: 0 on success, error code otherwise.
  */
 int __init adf_init_misc_wq(void)
index 204a42438992645960e99b234b77983cc3e54864..d1b3ef9cadacc02574ccf9c56515cc1c2cabee36 100644 (file)
@@ -99,6 +99,8 @@ enum pf2vf_msgtype {
        ADF_PF2VF_MSGTYPE_RESTARTING            = 0x01,
        ADF_PF2VF_MSGTYPE_VERSION_RESP          = 0x02,
        ADF_PF2VF_MSGTYPE_BLKMSG_RESP           = 0x03,
+       ADF_PF2VF_MSGTYPE_FATAL_ERROR           = 0x04,
+       ADF_PF2VF_MSGTYPE_RESTARTED             = 0x05,
 /* Values from 0x10 are Gen4 specific, message type is only 4 bits in Gen2 devices. */
        ADF_PF2VF_MSGTYPE_RP_RESET_RESP         = 0x10,
 };
@@ -112,6 +114,7 @@ enum vf2pf_msgtype {
        ADF_VF2PF_MSGTYPE_LARGE_BLOCK_REQ       = 0x07,
        ADF_VF2PF_MSGTYPE_MEDIUM_BLOCK_REQ      = 0x08,
        ADF_VF2PF_MSGTYPE_SMALL_BLOCK_REQ       = 0x09,
+       ADF_VF2PF_MSGTYPE_RESTARTING_COMPLETE   = 0x0a,
 /* Values from 0x10 are Gen4 specific, message type is only 4 bits in Gen2 devices. */
        ADF_VF2PF_MSGTYPE_RP_RESET              = 0x10,
 };
@@ -124,8 +127,10 @@ enum pfvf_compatibility_version {
        ADF_PFVF_COMPAT_FAST_ACK                = 0x03,
        /* Ring to service mapping support for non-standard mappings */
        ADF_PFVF_COMPAT_RING_TO_SVC_MAP         = 0x04,
+       /* Fallback compat */
+       ADF_PFVF_COMPAT_FALLBACK                = 0x05,
        /* Reference to the latest version */
-       ADF_PFVF_COMPAT_THIS_VERSION            = 0x04,
+       ADF_PFVF_COMPAT_THIS_VERSION            = 0x05,
 };
 
 /* PF->VF Version Response */
index 14c069f0d71a5b81ce246366f83e492924d663cd..0e31f4b41844e0a8d53de4000c4d574afab989f4 100644 (file)
@@ -1,21 +1,83 @@
 // SPDX-License-Identifier: (BSD-3-Clause OR GPL-2.0-only)
 /* Copyright(c) 2015 - 2021 Intel Corporation */
+#include <linux/delay.h>
 #include <linux/pci.h>
 #include "adf_accel_devices.h"
 #include "adf_pfvf_msg.h"
 #include "adf_pfvf_pf_msg.h"
 #include "adf_pfvf_pf_proto.h"
 
+#define ADF_PF_WAIT_RESTARTING_COMPLETE_DELAY  100
+#define ADF_VF_SHUTDOWN_RETRY                  100
+
 void adf_pf2vf_notify_restarting(struct adf_accel_dev *accel_dev)
 {
        struct adf_accel_vf_info *vf;
        struct pfvf_message msg = { .type = ADF_PF2VF_MSGTYPE_RESTARTING };
        int i, num_vfs = pci_num_vf(accel_to_pci_dev(accel_dev));
 
+       dev_dbg(&GET_DEV(accel_dev), "pf2vf notify restarting\n");
        for (i = 0, vf = accel_dev->pf.vf_info; i < num_vfs; i++, vf++) {
-               if (vf->init && adf_send_pf2vf_msg(accel_dev, i, msg))
+               vf->restarting = false;
+               if (!vf->init)
+                       continue;
+               if (adf_send_pf2vf_msg(accel_dev, i, msg))
                        dev_err(&GET_DEV(accel_dev),
                                "Failed to send restarting msg to VF%d\n", i);
+               else if (vf->vf_compat_ver >= ADF_PFVF_COMPAT_FALLBACK)
+                       vf->restarting = true;
+       }
+}
+
+void adf_pf2vf_wait_for_restarting_complete(struct adf_accel_dev *accel_dev)
+{
+       int num_vfs = pci_num_vf(accel_to_pci_dev(accel_dev));
+       int i, retries = ADF_VF_SHUTDOWN_RETRY;
+       struct adf_accel_vf_info *vf;
+       bool vf_running;
+
+       dev_dbg(&GET_DEV(accel_dev), "pf2vf wait for restarting complete\n");
+       do {
+               vf_running = false;
+               for (i = 0, vf = accel_dev->pf.vf_info; i < num_vfs; i++, vf++)
+                       if (vf->restarting)
+                               vf_running = true;
+               if (!vf_running)
+                       break;
+               msleep(ADF_PF_WAIT_RESTARTING_COMPLETE_DELAY);
+       } while (--retries);
+
+       if (vf_running)
+               dev_warn(&GET_DEV(accel_dev), "Some VFs are still running\n");
+}
+
+void adf_pf2vf_notify_restarted(struct adf_accel_dev *accel_dev)
+{
+       struct pfvf_message msg = { .type = ADF_PF2VF_MSGTYPE_RESTARTED };
+       int i, num_vfs = pci_num_vf(accel_to_pci_dev(accel_dev));
+       struct adf_accel_vf_info *vf;
+
+       dev_dbg(&GET_DEV(accel_dev), "pf2vf notify restarted\n");
+       for (i = 0, vf = accel_dev->pf.vf_info; i < num_vfs; i++, vf++) {
+               if (vf->init && vf->vf_compat_ver >= ADF_PFVF_COMPAT_FALLBACK &&
+                   adf_send_pf2vf_msg(accel_dev, i, msg))
+                       dev_err(&GET_DEV(accel_dev),
+                               "Failed to send restarted msg to VF%d\n", i);
+       }
+}
+
+void adf_pf2vf_notify_fatal_error(struct adf_accel_dev *accel_dev)
+{
+       struct pfvf_message msg = { .type = ADF_PF2VF_MSGTYPE_FATAL_ERROR };
+       int i, num_vfs = pci_num_vf(accel_to_pci_dev(accel_dev));
+       struct adf_accel_vf_info *vf;
+
+       dev_dbg(&GET_DEV(accel_dev), "pf2vf notify fatal error\n");
+       for (i = 0, vf = accel_dev->pf.vf_info; i < num_vfs; i++, vf++) {
+               if (vf->init && vf->vf_compat_ver >= ADF_PFVF_COMPAT_FALLBACK &&
+                   adf_send_pf2vf_msg(accel_dev, i, msg))
+                       dev_err(&GET_DEV(accel_dev),
+                               "Failed to send fatal error msg to VF%d\n", i);
        }
 }
 
index e8982d1ac8962b3c4ebf3ab0d34f7d3eb65d8df3..f203d88c919c2f06bbbe1d82cf9f5966f0d5706f 100644 (file)
@@ -5,7 +5,28 @@
 
 #include "adf_accel_devices.h"
 
+#if defined(CONFIG_PCI_IOV)
 void adf_pf2vf_notify_restarting(struct adf_accel_dev *accel_dev);
+void adf_pf2vf_wait_for_restarting_complete(struct adf_accel_dev *accel_dev);
+void adf_pf2vf_notify_restarted(struct adf_accel_dev *accel_dev);
+void adf_pf2vf_notify_fatal_error(struct adf_accel_dev *accel_dev);
+#else
+static inline void adf_pf2vf_notify_restarting(struct adf_accel_dev *accel_dev)
+{
+}
+
+static inline void adf_pf2vf_wait_for_restarting_complete(struct adf_accel_dev *accel_dev)
+{
+}
+
+static inline void adf_pf2vf_notify_restarted(struct adf_accel_dev *accel_dev)
+{
+}
+
+static inline void adf_pf2vf_notify_fatal_error(struct adf_accel_dev *accel_dev)
+{
+}
+#endif
 
 typedef int (*adf_pf2vf_blkmsg_provider)(struct adf_accel_dev *accel_dev,
                                         u8 *buffer, u8 compat);
index 388e58bcbcaf2683228ae30e8aef99f35f3a513b..9ab93fbfefde9408826c44d59eae5c01411179f6 100644 (file)
@@ -291,6 +291,14 @@ static int adf_handle_vf2pf_msg(struct adf_accel_dev *accel_dev, u8 vf_nr,
                vf_info->init = false;
                }
                break;
+       case ADF_VF2PF_MSGTYPE_RESTARTING_COMPLETE:
+               {
+               dev_dbg(&GET_DEV(accel_dev),
+                       "Restarting Complete received from VF%d\n", vf_nr);
+               vf_info->restarting = false;
+               vf_info->init = false;
+               }
+               break;
        case ADF_VF2PF_MSGTYPE_LARGE_BLOCK_REQ:
        case ADF_VF2PF_MSGTYPE_MEDIUM_BLOCK_REQ:
        case ADF_VF2PF_MSGTYPE_SMALL_BLOCK_REQ:
index 1015155b637493fb81c9720b72b1324873020457..dc284a089c88954c100bf1a64348c456d6162353 100644 (file)
@@ -308,6 +308,12 @@ static bool adf_handle_pf2vf_msg(struct adf_accel_dev *accel_dev,
 
                adf_pf2vf_handle_pf_restarting(accel_dev);
                return false;
+       case ADF_PF2VF_MSGTYPE_RESTARTED:
+               dev_dbg(&GET_DEV(accel_dev), "Restarted message received from PF\n");
+               return true;
+       case ADF_PF2VF_MSGTYPE_FATAL_ERROR:
+               dev_err(&GET_DEV(accel_dev), "Fatal error received from PF\n");
+               return true;
        case ADF_PF2VF_MSGTYPE_VERSION_RESP:
        case ADF_PF2VF_MSGTYPE_BLKMSG_RESP:
        case ADF_PF2VF_MSGTYPE_RP_RESET_RESP:
index de1b214dba1f98b74593f6c7cac1350ca5782471..d4f2db3c53d8c0c09f636b66281ab4d28f87549f 100644 (file)
@@ -788,6 +788,24 @@ static void clear_sla(struct adf_rl *rl_data, struct rl_sla *sla)
        sla_type_arr[node_id] = NULL;
 }
 
+static void free_all_sla(struct adf_accel_dev *accel_dev)
+{
+       struct adf_rl *rl_data = accel_dev->rate_limiting;
+       int sla_id;
+
+       mutex_lock(&rl_data->rl_lock);
+
+       for (sla_id = 0; sla_id < RL_NODES_CNT_MAX; sla_id++) {
+               if (!rl_data->sla[sla_id])
+                       continue;
+
+               kfree(rl_data->sla[sla_id]);
+               rl_data->sla[sla_id] = NULL;
+       }
+
+       mutex_unlock(&rl_data->rl_lock);
+}
+
 /**
  * add_update_sla() - handles the creation and the update of an SLA
  * @accel_dev: pointer to acceleration device structure
@@ -1155,7 +1173,7 @@ void adf_rl_stop(struct adf_accel_dev *accel_dev)
                return;
 
        adf_sysfs_rl_rm(accel_dev);
-       adf_rl_remove_sla_all(accel_dev, true);
+       free_all_sla(accel_dev);
 }
 
 void adf_rl_exit(struct adf_accel_dev *accel_dev)
index f44025bb6f995d9bdf58bf9d6290fd5566c10192..87a70c00c41ee61c4e3dde0708fe4b3bcb4c8e16 100644 (file)
@@ -60,7 +60,6 @@ static int adf_enable_sriov(struct adf_accel_dev *accel_dev)
                /* This ptr will be populated when VFs will be created */
                vf_info->accel_dev = accel_dev;
                vf_info->vf_nr = i;
-               vf_info->vf_compat_ver = 0;
 
                mutex_init(&vf_info->pf2vf_lock);
                ratelimit_state_init(&vf_info->vf2pf_ratelimit,
@@ -84,6 +83,32 @@ static int adf_enable_sriov(struct adf_accel_dev *accel_dev)
        return pci_enable_sriov(pdev, totalvfs);
 }
 
+void adf_reenable_sriov(struct adf_accel_dev *accel_dev)
+{
+       struct pci_dev *pdev = accel_to_pci_dev(accel_dev);
+       char cfg[ADF_CFG_MAX_VAL_LEN_IN_BYTES] = {0};
+       unsigned long val = 0;
+
+       if (adf_cfg_get_param_value(accel_dev, ADF_GENERAL_SEC,
+                                   ADF_SRIOV_ENABLED, cfg))
+               return;
+
+       if (!accel_dev->pf.vf_info)
+               return;
+
+       if (adf_cfg_add_key_value_param(accel_dev, ADF_KERNEL_SEC, ADF_NUM_CY,
+                                       &val, ADF_DEC))
+               return;
+
+       if (adf_cfg_add_key_value_param(accel_dev, ADF_KERNEL_SEC, ADF_NUM_DC,
+                                       &val, ADF_DEC))
+               return;
+
+       set_bit(ADF_STATUS_CONFIGURED, &accel_dev->status);
+       dev_dbg(&pdev->dev, "Re-enabling SRIOV\n");
+       adf_enable_sriov(accel_dev);
+}
+
 /**
  * adf_disable_sriov() - Disable SRIOV for the device
  * @accel_dev:  Pointer to accel device.
@@ -103,6 +128,7 @@ void adf_disable_sriov(struct adf_accel_dev *accel_dev)
                return;
 
        adf_pf2vf_notify_restarting(accel_dev);
+       adf_pf2vf_wait_for_restarting_complete(accel_dev);
        pci_disable_sriov(accel_to_pci_dev(accel_dev));
 
        /* Disable VF to PF interrupts */
@@ -115,8 +141,10 @@ void adf_disable_sriov(struct adf_accel_dev *accel_dev)
        for (i = 0, vf = accel_dev->pf.vf_info; i < totalvfs; i++, vf++)
                mutex_destroy(&vf->pf2vf_lock);
 
-       kfree(accel_dev->pf.vf_info);
-       accel_dev->pf.vf_info = NULL;
+       if (!test_bit(ADF_STATUS_RESTARTING, &accel_dev->status)) {
+               kfree(accel_dev->pf.vf_info);
+               accel_dev->pf.vf_info = NULL;
+       }
 }
 EXPORT_SYMBOL_GPL(adf_disable_sriov);
 
@@ -194,6 +222,10 @@ int adf_sriov_configure(struct pci_dev *pdev, int numvfs)
        if (ret)
                return ret;
 
+       val = 1;
+       adf_cfg_add_key_value_param(accel_dev, ADF_GENERAL_SEC, ADF_SRIOV_ENABLED,
+                                   &val, ADF_DEC);
+
        return numvfs;
 }
 EXPORT_SYMBOL_GPL(adf_sriov_configure);
index d450dad32c9e4e9be3ad635855ee2c58ab4819f6..4e7f70d4049d354bd1776611dbae079fe61f3511 100644 (file)
@@ -204,6 +204,42 @@ static ssize_t pm_idle_enabled_store(struct device *dev, struct device_attribute
 }
 static DEVICE_ATTR_RW(pm_idle_enabled);
 
+static ssize_t auto_reset_show(struct device *dev, struct device_attribute *attr,
+                              char *buf)
+{
+       char *auto_reset;
+       struct adf_accel_dev *accel_dev;
+
+       accel_dev = adf_devmgr_pci_to_accel_dev(to_pci_dev(dev));
+       if (!accel_dev)
+               return -EINVAL;
+
+       auto_reset = accel_dev->autoreset_on_error ? "on" : "off";
+
+       return sysfs_emit(buf, "%s\n", auto_reset);
+}
+
+static ssize_t auto_reset_store(struct device *dev, struct device_attribute *attr,
+                               const char *buf, size_t count)
+{
+       struct adf_accel_dev *accel_dev;
+       bool enabled = false;
+       int ret;
+
+       ret = kstrtobool(buf, &enabled);
+       if (ret)
+               return ret;
+
+       accel_dev = adf_devmgr_pci_to_accel_dev(to_pci_dev(dev));
+       if (!accel_dev)
+               return -EINVAL;
+
+       accel_dev->autoreset_on_error = enabled;
+
+       return count;
+}
+static DEVICE_ATTR_RW(auto_reset);
+
 static DEVICE_ATTR_RW(state);
 static DEVICE_ATTR_RW(cfg_services);
 
@@ -291,6 +327,7 @@ static struct attribute *qat_attrs[] = {
        &dev_attr_pm_idle_enabled.attr,
        &dev_attr_rp2srv.attr,
        &dev_attr_num_rps.attr,
+       &dev_attr_auto_reset.attr,
        NULL,
 };
 
index b05c3957a16019a63d3c6568c9573cef22d838b5..cdbb2d687b1b0dfc65226c7e058bcb24b2d430ec 100644 (file)
@@ -293,8 +293,6 @@ EXPORT_SYMBOL_GPL(adf_flush_vf_wq);
 /**
  * adf_init_vf_wq() - Init workqueue for VF
  *
- * Function init workqueue 'adf_vf_stop_wq' for VF.
- *
  * Return: 0 on success, error code otherwise.
  */
 int __init adf_init_vf_wq(void)
index bf8c0ee629175ec55c0aa064d91ca5bd7ce85d1d..2ba4aa22e09279bbbd5cdb213fb3991b0cca297a 100644 (file)
 #include "qat_compression.h"
 #include "qat_algs_send.h"
 
-#define QAT_RFC_1950_HDR_SIZE 2
-#define QAT_RFC_1950_FOOTER_SIZE 4
-#define QAT_RFC_1950_CM_DEFLATE 8
-#define QAT_RFC_1950_CM_DEFLATE_CINFO_32K 7
-#define QAT_RFC_1950_CM_MASK 0x0f
-#define QAT_RFC_1950_CM_OFFSET 4
-#define QAT_RFC_1950_DICT_MASK 0x20
-#define QAT_RFC_1950_COMP_HDR 0x785e
-
 static DEFINE_MUTEX(algs_lock);
 static unsigned int active_devs;
 
index 40c8e74d1cf9ed0d1dfaf98d0375a34c002ee980..101c6ea4167389a27814f4ac6cd7924570d9aaf6 100644 (file)
@@ -105,8 +105,8 @@ struct qat_crypto_instance *qat_crypto_get_instance_node(int node)
 }
 
 /**
- * qat_crypto_vf_dev_config()
- *     create dev config required to create crypto inst.
+ * qat_crypto_vf_dev_config() - create dev config required to create
+ * crypto inst.
  *
  * @accel_dev: Pointer to acceleration device.
  *
index 7a3083debc2bb64248bfdc2aa6b9dbc3d7f79feb..59d472cb11e7501b294b95503a0e8c49f2bfcd13 100644 (file)
@@ -41,7 +41,7 @@
 static const char version[] =
        DRV_MODULE_NAME ".c:v" DRV_MODULE_VERSION " (" DRV_MODULE_RELDATE ")\n";
 
-MODULE_AUTHOR("David S. Miller (davem@davemloft.net)");
+MODULE_AUTHOR("David S. Miller <davem@davemloft.net>");
 MODULE_DESCRIPTION("Niagara2 Crypto driver");
 MODULE_LICENSE("GPL");
 MODULE_VERSION(DRV_MODULE_VERSION);
index 70edf40bc523c0932bf8abb0b36682b0549d2ed3..f74b3c81ba6df73397a7a677a393a07b06aaf3cc 100644 (file)
@@ -371,6 +371,11 @@ static int rk_crypto_probe(struct platform_device *pdev)
        }
 
        crypto_info->engine = crypto_engine_alloc_init(&pdev->dev, true);
+       if (!crypto_info->engine) {
+               err = -ENOMEM;
+               goto err_crypto;
+       }
+
        crypto_engine_start(crypto_info->engine);
        init_completion(&crypto_info->complete);
 
index de53eddf6796b6c6ac6eafdeaee9a7ee03c979d3..cb92b7fa99c6f848ffee008c8cdaf0ead064682b 100644 (file)
@@ -225,11 +225,11 @@ static int __virtio_crypto_akcipher_do_req(struct virtio_crypto_akcipher_request
        struct virtio_crypto *vcrypto = ctx->vcrypto;
        struct virtio_crypto_op_data_req *req_data = vc_req->req_data;
        struct scatterlist *sgs[4], outhdr_sg, inhdr_sg, srcdata_sg, dstdata_sg;
-       void *src_buf = NULL, *dst_buf = NULL;
+       void *src_buf, *dst_buf = NULL;
        unsigned int num_out = 0, num_in = 0;
        int node = dev_to_node(&vcrypto->vdev->dev);
        unsigned long flags;
-       int ret = -ENOMEM;
+       int ret;
        bool verify = vc_akcipher_req->opcode == VIRTIO_CRYPTO_AKCIPHER_VERIFY;
        unsigned int src_len = verify ? req->src_len + req->dst_len : req->src_len;
 
@@ -240,7 +240,7 @@ static int __virtio_crypto_akcipher_do_req(struct virtio_crypto_akcipher_request
        /* src data */
        src_buf = kcalloc_node(src_len, 1, GFP_KERNEL, node);
        if (!src_buf)
-               goto err;
+               return -ENOMEM;
 
        if (verify) {
                /* for verify operation, both src and dst data work as OUT direction */
@@ -255,7 +255,7 @@ static int __virtio_crypto_akcipher_do_req(struct virtio_crypto_akcipher_request
                /* dst data */
                dst_buf = kcalloc_node(req->dst_len, 1, GFP_KERNEL, node);
                if (!dst_buf)
-                       goto err;
+                       goto free_src;
 
                sg_init_one(&dstdata_sg, dst_buf, req->dst_len);
                sgs[num_out + num_in++] = &dstdata_sg;
@@ -278,9 +278,9 @@ static int __virtio_crypto_akcipher_do_req(struct virtio_crypto_akcipher_request
        return 0;
 
 err:
-       kfree(src_buf);
        kfree(dst_buf);
-
+free_src:
+       kfree(src_buf);
        return -ENOMEM;
 }
 
index b909c6a2bf1c348a653a093335f1df4d2f6dc4ab..6a67d70e7f1c01c7ea1d06ac79363cd07e3da19f 100644 (file)
@@ -42,8 +42,6 @@ static void virtcrypto_ctrlq_callback(struct virtqueue *vq)
                        virtio_crypto_ctrlq_callback(vc_ctrl_req);
                        spin_lock_irqsave(&vcrypto->ctrl_lock, flags);
                }
-               if (unlikely(virtqueue_is_broken(vq)))
-                       break;
        } while (!virtqueue_enable_cb(vq));
        spin_unlock_irqrestore(&vcrypto->ctrl_lock, flags);
 }
diff --git a/drivers/crypto/vmx/Kconfig b/drivers/crypto/vmx/Kconfig
deleted file mode 100644 (file)
index b2c28b8..0000000
+++ /dev/null
@@ -1,14 +0,0 @@
-# SPDX-License-Identifier: GPL-2.0-only
-config CRYPTO_DEV_VMX_ENCRYPT
-       tristate "Encryption acceleration support on P8 CPU"
-       depends on CRYPTO_DEV_VMX
-       select CRYPTO_AES
-       select CRYPTO_CBC
-       select CRYPTO_CTR
-       select CRYPTO_GHASH
-       select CRYPTO_XTS
-       default m
-       help
-         Support for VMX cryptographic acceleration instructions on Power8 CPU.
-         This module supports acceleration for AES and GHASH in hardware. If you
-         choose 'M' here, this module will be called vmx-crypto.
diff --git a/drivers/crypto/vmx/Makefile b/drivers/crypto/vmx/Makefile
deleted file mode 100644 (file)
index 7257b8c..0000000
+++ /dev/null
@@ -1,23 +0,0 @@
-# SPDX-License-Identifier: GPL-2.0
-obj-$(CONFIG_CRYPTO_DEV_VMX_ENCRYPT) += vmx-crypto.o
-vmx-crypto-objs := vmx.o aesp8-ppc.o ghashp8-ppc.o aes.o aes_cbc.o aes_ctr.o aes_xts.o ghash.o
-
-ifeq ($(CONFIG_CPU_LITTLE_ENDIAN),y)
-override flavour := linux-ppc64le
-else
-ifdef CONFIG_PPC64_ELF_ABI_V2
-override flavour := linux-ppc64-elfv2
-else
-override flavour := linux-ppc64
-endif
-endif
-
-quiet_cmd_perl = PERL    $@
-      cmd_perl = $(PERL) $< $(flavour) > $@
-
-targets += aesp8-ppc.S ghashp8-ppc.S
-
-$(obj)/aesp8-ppc.S $(obj)/ghashp8-ppc.S: $(obj)/%.S: $(src)/%.pl FORCE
-       $(call if_changed,perl)
-
-OBJECT_FILES_NON_STANDARD_aesp8-ppc.o := y
diff --git a/drivers/crypto/vmx/ppc-xlate.pl b/drivers/crypto/vmx/ppc-xlate.pl
deleted file mode 100644 (file)
index b583898..0000000
+++ /dev/null
@@ -1,231 +0,0 @@
-#!/usr/bin/env perl
-# SPDX-License-Identifier: GPL-2.0
-
-# PowerPC assembler distiller by <appro>.
-
-my $flavour = shift;
-my $output = shift;
-open STDOUT,">$output" || die "can't open $output: $!";
-
-my %GLOBALS;
-my $dotinlocallabels=($flavour=~/linux/)?1:0;
-my $elfv2abi=(($flavour =~ /linux-ppc64le/) or ($flavour =~ /linux-ppc64-elfv2/))?1:0;
-my $dotfunctions=($elfv2abi=~1)?0:1;
-
-################################################################
-# directives which need special treatment on different platforms
-################################################################
-my $globl = sub {
-    my $junk = shift;
-    my $name = shift;
-    my $global = \$GLOBALS{$name};
-    my $ret;
-
-    $name =~ s|^[\.\_]||;
-    SWITCH: for ($flavour) {
-       /aix/           && do { $name = ".$name";
-                               last;
-                             };
-       /osx/           && do { $name = "_$name";
-                               last;
-                             };
-       /linux/
-                       && do { $ret = "_GLOBAL($name)";
-                               last;
-                             };
-    }
-
-    $ret = ".globl     $name\nalign 5\n$name:" if (!$ret);
-    $$global = $name;
-    $ret;
-};
-my $text = sub {
-    my $ret = ($flavour =~ /aix/) ? ".csect\t.text[PR],7" : ".text";
-    $ret = ".abiversion        2\n".$ret       if ($elfv2abi);
-    $ret;
-};
-my $machine = sub {
-    my $junk = shift;
-    my $arch = shift;
-    if ($flavour =~ /osx/)
-    {  $arch =~ s/\"//g;
-       $arch = ($flavour=~/64/) ? "ppc970-64" : "ppc970" if ($arch eq "any");
-    }
-    ".machine  $arch";
-};
-my $size = sub {
-    if ($flavour =~ /linux/)
-    {  shift;
-       my $name = shift; $name =~ s|^[\.\_]||;
-       my $ret  = ".size       $name,.-".($dotfunctions?".":"").$name;
-       $ret .= "\n.size        .$name,.-.$name" if ($dotfunctions);
-       $ret;
-    }
-    else
-    {  "";     }
-};
-my $asciz = sub {
-    shift;
-    my $line = join(",",@_);
-    if ($line =~ /^"(.*)"$/)
-    {  ".byte  " . join(",",unpack("C*",$1),0) . "\n.align     2";     }
-    else
-    {  "";     }
-};
-my $quad = sub {
-    shift;
-    my @ret;
-    my ($hi,$lo);
-    for (@_) {
-       if (/^0x([0-9a-f]*?)([0-9a-f]{1,8})$/io)
-       {  $hi=$1?"0x$1":"0"; $lo="0x$2";  }
-       elsif (/^([0-9]+)$/o)
-       {  $hi=$1>>32; $lo=$1&0xffffffff;  } # error-prone with 32-bit perl
-       else
-       {  $hi=undef; $lo=$_; }
-
-       if (defined($hi))
-       {  push(@ret,$flavour=~/le$/o?".long\t$lo,$hi":".long\t$hi,$lo");  }
-       else
-       {  push(@ret,".quad     $lo");  }
-    }
-    join("\n",@ret);
-};
-
-################################################################
-# simplified mnemonics not handled by at least one assembler
-################################################################
-my $cmplw = sub {
-    my $f = shift;
-    my $cr = 0; $cr = shift if ($#_>1);
-    # Some out-of-date 32-bit GNU assembler just can't handle cmplw...
-    ($flavour =~ /linux.*32/) ?
-       "       .long   ".sprintf "0x%x",31<<26|$cr<<23|$_[0]<<16|$_[1]<<11|64 :
-       "       cmplw   ".join(',',$cr,@_);
-};
-my $bdnz = sub {
-    my $f = shift;
-    my $bo = $f=~/[\+\-]/ ? 16+9 : 16; # optional "to be taken" hint
-    "  bc      $bo,0,".shift;
-} if ($flavour!~/linux/);
-my $bltlr = sub {
-    my $f = shift;
-    my $bo = $f=~/\-/ ? 12+2 : 12;     # optional "not to be taken" hint
-    ($flavour =~ /linux/) ?            # GNU as doesn't allow most recent hints
-       "       .long   ".sprintf "0x%x",19<<26|$bo<<21|16<<1 :
-       "       bclr    $bo,0";
-};
-my $bnelr = sub {
-    my $f = shift;
-    my $bo = $f=~/\-/ ? 4+2 : 4;       # optional "not to be taken" hint
-    ($flavour =~ /linux/) ?            # GNU as doesn't allow most recent hints
-       "       .long   ".sprintf "0x%x",19<<26|$bo<<21|2<<16|16<<1 :
-       "       bclr    $bo,2";
-};
-my $beqlr = sub {
-    my $f = shift;
-    my $bo = $f=~/-/ ? 12+2 : 12;      # optional "not to be taken" hint
-    ($flavour =~ /linux/) ?            # GNU as doesn't allow most recent hints
-       "       .long   ".sprintf "0x%X",19<<26|$bo<<21|2<<16|16<<1 :
-       "       bclr    $bo,2";
-};
-# GNU assembler can't handle extrdi rA,rS,16,48, or when sum of last two
-# arguments is 64, with "operand out of range" error.
-my $extrdi = sub {
-    my ($f,$ra,$rs,$n,$b) = @_;
-    $b = ($b+$n)&63; $n = 64-$n;
-    "  rldicl  $ra,$rs,$b,$n";
-};
-my $vmr = sub {
-    my ($f,$vx,$vy) = @_;
-    "  vor     $vx,$vy,$vy";
-};
-
-# Some ABIs specify vrsave, special-purpose register #256, as reserved
-# for system use.
-my $no_vrsave = ($elfv2abi);
-my $mtspr = sub {
-    my ($f,$idx,$ra) = @_;
-    if ($idx == 256 && $no_vrsave) {
-       "       or      $ra,$ra,$ra";
-    } else {
-       "       mtspr   $idx,$ra";
-    }
-};
-my $mfspr = sub {
-    my ($f,$rd,$idx) = @_;
-    if ($idx == 256 && $no_vrsave) {
-       "       li      $rd,-1";
-    } else {
-       "       mfspr   $rd,$idx";
-    }
-};
-
-# PowerISA 2.06 stuff
-sub vsxmem_op {
-    my ($f, $vrt, $ra, $rb, $op) = @_;
-    "  .long   ".sprintf "0x%X",(31<<26)|($vrt<<21)|($ra<<16)|($rb<<11)|($op*2+1);
-}
-# made-up unaligned memory reference AltiVec/VMX instructions
-my $lvx_u      = sub { vsxmem_op(@_, 844); };  # lxvd2x
-my $stvx_u     = sub { vsxmem_op(@_, 972); };  # stxvd2x
-my $lvdx_u     = sub { vsxmem_op(@_, 588); };  # lxsdx
-my $stvdx_u    = sub { vsxmem_op(@_, 716); };  # stxsdx
-my $lvx_4w     = sub { vsxmem_op(@_, 780); };  # lxvw4x
-my $stvx_4w    = sub { vsxmem_op(@_, 908); };  # stxvw4x
-
-# PowerISA 2.07 stuff
-sub vcrypto_op {
-    my ($f, $vrt, $vra, $vrb, $op) = @_;
-    "  .long   ".sprintf "0x%X",(4<<26)|($vrt<<21)|($vra<<16)|($vrb<<11)|$op;
-}
-my $vcipher    = sub { vcrypto_op(@_, 1288); };
-my $vcipherlast        = sub { vcrypto_op(@_, 1289); };
-my $vncipher   = sub { vcrypto_op(@_, 1352); };
-my $vncipherlast= sub { vcrypto_op(@_, 1353); };
-my $vsbox      = sub { vcrypto_op(@_, 0, 1480); };
-my $vshasigmad = sub { my ($st,$six)=splice(@_,-2); vcrypto_op(@_, $st<<4|$six, 1730); };
-my $vshasigmaw = sub { my ($st,$six)=splice(@_,-2); vcrypto_op(@_, $st<<4|$six, 1666); };
-my $vpmsumb    = sub { vcrypto_op(@_, 1032); };
-my $vpmsumd    = sub { vcrypto_op(@_, 1224); };
-my $vpmsubh    = sub { vcrypto_op(@_, 1096); };
-my $vpmsumw    = sub { vcrypto_op(@_, 1160); };
-my $vaddudm    = sub { vcrypto_op(@_, 192);  };
-my $vadduqm    = sub { vcrypto_op(@_, 256);  };
-
-my $mtsle      = sub {
-    my ($f, $arg) = @_;
-    "  .long   ".sprintf "0x%X",(31<<26)|($arg<<21)|(147*2);
-};
-
-print "#include <asm/ppc_asm.h>\n" if $flavour =~ /linux/;
-
-while($line=<>) {
-
-    $line =~ s|[#!;].*$||;     # get rid of asm-style comments...
-    $line =~ s|/\*.*\*/||;     # ... and C-style comments...
-    $line =~ s|^\s+||;         # ... and skip white spaces in beginning...
-    $line =~ s|\s+$||;         # ... and at the end
-
-    {
-       $line =~ s|\b\.L(\w+)|L$1|g;    # common denominator for Locallabel
-       $line =~ s|\bL(\w+)|\.L$1|g     if ($dotinlocallabels);
-    }
-
-    {
-       $line =~ s|^\s*(\.?)(\w+)([\.\+\-]?)\s*||;
-       my $c = $1; $c = "\t" if ($c eq "");
-       my $mnemonic = $2;
-       my $f = $3;
-       my $opcode = eval("\$$mnemonic");
-       $line =~ s/\b(c?[rf]|v|vs)([0-9]+)\b/$2/g if ($c ne "." and $flavour !~ /osx/);
-       if (ref($opcode) eq 'CODE') { $line = &$opcode($f,split(',',$line)); }
-       elsif ($mnemonic)           { $line = $c.$mnemonic.$f."\t".$line; }
-    }
-
-    print $line if ($line);
-    print "\n";
-}
-
-close STDOUT;
index 3c205324b22b676be150d44957c52d299fdd3a5d..e6140571884091ed992b13c3410f13b763ff9ae6 100644 (file)
@@ -231,7 +231,10 @@ static int zynqmp_handle_aes_req(struct crypto_engine *engine,
                err = zynqmp_aes_aead_cipher(areq);
        }
 
+       local_bh_disable();
        crypto_finalize_aead_request(engine, areq, err);
+       local_bh_enable();
+
        return 0;
 }
 
index 1a3e6aafbdcc33dd2aae8731be8a5ad52cc0891e..af5cb818f84d6bf566e6c0a84763d8239d64700f 100644 (file)
@@ -530,13 +530,15 @@ static int get_genport_coordinates(struct device *dev, struct cxl_dport *dport)
        if (kstrtou32(acpi_device_uid(hb), 0, &uid))
                return -EINVAL;
 
-       rc = acpi_get_genport_coordinates(uid, &dport->hb_coord);
+       rc = acpi_get_genport_coordinates(uid, dport->hb_coord);
        if (rc < 0)
                return rc;
 
        /* Adjust back to picoseconds from nanoseconds */
-       dport->hb_coord.read_latency *= 1000;
-       dport->hb_coord.write_latency *= 1000;
+       for (int i = 0; i < ACCESS_COORDINATE_MAX; i++) {
+               dport->hb_coord[i].read_latency *= 1000;
+               dport->hb_coord[i].write_latency *= 1000;
+       }
 
        return 0;
 }
index 08fd0baea7a0eb0f1c1442e9f454e3c32736d19c..eddbbe21450ca9dca5e71bf6ec14866cde0935d3 100644 (file)
@@ -9,6 +9,7 @@
 #include "cxlmem.h"
 #include "core.h"
 #include "cxl.h"
+#include "core.h"
 
 struct dsmas_entry {
        struct range dpa_range;
@@ -149,28 +150,35 @@ static int cxl_cdat_endpoint_process(struct cxl_port *port,
        int rc;
 
        rc = cdat_table_parse(ACPI_CDAT_TYPE_DSMAS, cdat_dsmas_handler,
-                             dsmas_xa, port->cdat.table);
+                             dsmas_xa, port->cdat.table, port->cdat.length);
        rc = cdat_table_parse_output(rc);
        if (rc)
                return rc;
 
        rc = cdat_table_parse(ACPI_CDAT_TYPE_DSLBIS, cdat_dslbis_handler,
-                             dsmas_xa, port->cdat.table);
+                             dsmas_xa, port->cdat.table, port->cdat.length);
        return cdat_table_parse_output(rc);
 }
 
 static int cxl_port_perf_data_calculate(struct cxl_port *port,
                                        struct xarray *dsmas_xa)
 {
-       struct access_coordinate c;
+       struct access_coordinate ep_c;
+       struct access_coordinate coord[ACCESS_COORDINATE_MAX];
        struct dsmas_entry *dent;
        int valid_entries = 0;
        unsigned long index;
        int rc;
 
-       rc = cxl_endpoint_get_perf_coordinates(port, &c);
+       rc = cxl_endpoint_get_perf_coordinates(port, &ep_c);
        if (rc) {
-               dev_dbg(&port->dev, "Failed to retrieve perf coordinates.\n");
+               dev_dbg(&port->dev, "Failed to retrieve ep perf coordinates.\n");
+               return rc;
+       }
+
+       rc = cxl_hb_get_perf_coordinates(port, coord);
+       if (rc)  {
+               dev_dbg(&port->dev, "Failed to retrieve hb perf coordinates.\n");
                return rc;
        }
 
@@ -185,18 +193,19 @@ static int cxl_port_perf_data_calculate(struct cxl_port *port,
        xa_for_each(dsmas_xa, index, dent) {
                int qos_class;
 
-               dent->coord.read_latency = dent->coord.read_latency +
-                                          c.read_latency;
-               dent->coord.write_latency = dent->coord.write_latency +
-                                           c.write_latency;
-               dent->coord.read_bandwidth = min_t(int, c.read_bandwidth,
-                                                  dent->coord.read_bandwidth);
-               dent->coord.write_bandwidth = min_t(int, c.write_bandwidth,
-                                                   dent->coord.write_bandwidth);
-
+               cxl_coordinates_combine(&dent->coord, &dent->coord, &ep_c);
+               /*
+                * Keeping the host bridge coordinates separate from the dsmas
+                * coordinates in order to allow calculation of access class
+                * 0 and 1 for region later.
+                */
+               cxl_coordinates_combine(&coord[ACCESS_COORDINATE_CPU],
+                                       &coord[ACCESS_COORDINATE_CPU],
+                                       &dent->coord);
                dent->entries = 1;
-               rc = cxl_root->ops->qos_class(cxl_root, &dent->coord, 1,
-                                             &qos_class);
+               rc = cxl_root->ops->qos_class(cxl_root,
+                                             &coord[ACCESS_COORDINATE_CPU],
+                                             1, &qos_class);
                if (rc != 1)
                        continue;
 
@@ -389,36 +398,38 @@ EXPORT_SYMBOL_NS_GPL(cxl_endpoint_parse_cdat, CXL);
 static int cdat_sslbis_handler(union acpi_subtable_headers *header, void *arg,
                               const unsigned long end)
 {
+       struct acpi_cdat_sslbis_table {
+               struct acpi_cdat_header header;
+               struct acpi_cdat_sslbis sslbis_header;
+               struct acpi_cdat_sslbe entries[];
+       } *tbl = (struct acpi_cdat_sslbis_table *)header;
+       int size = sizeof(header->cdat) + sizeof(tbl->sslbis_header);
        struct acpi_cdat_sslbis *sslbis;
-       int size = sizeof(header->cdat) + sizeof(*sslbis);
        struct cxl_port *port = arg;
        struct device *dev = &port->dev;
-       struct acpi_cdat_sslbe *entry;
        int remain, entries, i;
        u16 len;
 
        len = le16_to_cpu((__force __le16)header->cdat.length);
        remain = len - size;
-       if (!remain || remain % sizeof(*entry) ||
+       if (!remain || remain % sizeof(tbl->entries[0]) ||
            (unsigned long)header + len > end) {
                dev_warn(dev, "Malformed SSLBIS table length: (%u)\n", len);
                return -EINVAL;
        }
 
-       /* Skip common header */
-       sslbis = (struct acpi_cdat_sslbis *)((unsigned long)header +
-                                            sizeof(header->cdat));
-
+       sslbis = &tbl->sslbis_header;
        /* Unrecognized data type, we can skip */
        if (sslbis->data_type > ACPI_HMAT_WRITE_BANDWIDTH)
                return 0;
 
-       entries = remain / sizeof(*entry);
-       entry = (struct acpi_cdat_sslbe *)((unsigned long)header + sizeof(*sslbis));
+       entries = remain / sizeof(tbl->entries[0]);
+       if (struct_size(tbl, entries, entries) != len)
+               return -EINVAL;
 
        for (i = 0; i < entries; i++) {
-               u16 x = le16_to_cpu((__force __le16)entry->portx_id);
-               u16 y = le16_to_cpu((__force __le16)entry->porty_id);
+               u16 x = le16_to_cpu((__force __le16)tbl->entries[i].portx_id);
+               u16 y = le16_to_cpu((__force __le16)tbl->entries[i].porty_id);
                __le64 le_base;
                __le16 le_val;
                struct cxl_dport *dport;
@@ -448,8 +459,8 @@ static int cdat_sslbis_handler(union acpi_subtable_headers *header, void *arg,
                        break;
                }
 
-               le_base = (__force __le64)sslbis->entry_base_unit;
-               le_val = (__force __le16)entry->latency_or_bandwidth;
+               le_base = (__force __le64)tbl->sslbis_header.entry_base_unit;
+               le_val = (__force __le16)tbl->entries[i].latency_or_bandwidth;
 
                if (check_mul_overflow(le64_to_cpu(le_base),
                                       le16_to_cpu(le_val), &val))
@@ -462,8 +473,6 @@ static int cdat_sslbis_handler(union acpi_subtable_headers *header, void *arg,
                                                          sslbis->data_type,
                                                          val);
                }
-
-               entry++;
        }
 
        return 0;
@@ -477,11 +486,108 @@ void cxl_switch_parse_cdat(struct cxl_port *port)
                return;
 
        rc = cdat_table_parse(ACPI_CDAT_TYPE_SSLBIS, cdat_sslbis_handler,
-                             port, port->cdat.table);
+                             port, port->cdat.table, port->cdat.length);
        rc = cdat_table_parse_output(rc);
        if (rc)
                dev_dbg(&port->dev, "Failed to parse SSLBIS: %d\n", rc);
 }
 EXPORT_SYMBOL_NS_GPL(cxl_switch_parse_cdat, CXL);
 
+/**
+ * cxl_coordinates_combine - Combine the two input coordinates
+ *
+ * @out: Output coordinate of c1 and c2 combined
+ * @c1: input coordinates
+ * @c2: input coordinates
+ */
+void cxl_coordinates_combine(struct access_coordinate *out,
+                            struct access_coordinate *c1,
+                            struct access_coordinate *c2)
+{
+               if (c1->write_bandwidth && c2->write_bandwidth)
+                       out->write_bandwidth = min(c1->write_bandwidth,
+                                                  c2->write_bandwidth);
+               out->write_latency = c1->write_latency + c2->write_latency;
+
+               if (c1->read_bandwidth && c2->read_bandwidth)
+                       out->read_bandwidth = min(c1->read_bandwidth,
+                                                 c2->read_bandwidth);
+               out->read_latency = c1->read_latency + c2->read_latency;
+}
+
 MODULE_IMPORT_NS(CXL);
+
+void cxl_region_perf_data_calculate(struct cxl_region *cxlr,
+                                   struct cxl_endpoint_decoder *cxled)
+{
+       struct cxl_memdev *cxlmd = cxled_to_memdev(cxled);
+       struct cxl_port *port = cxlmd->endpoint;
+       struct cxl_dev_state *cxlds = cxlmd->cxlds;
+       struct cxl_memdev_state *mds = to_cxl_memdev_state(cxlds);
+       struct access_coordinate hb_coord[ACCESS_COORDINATE_MAX];
+       struct access_coordinate coord;
+       struct range dpa = {
+                       .start = cxled->dpa_res->start,
+                       .end = cxled->dpa_res->end,
+       };
+       struct cxl_dpa_perf *perf;
+       int rc;
+
+       switch (cxlr->mode) {
+       case CXL_DECODER_RAM:
+               perf = &mds->ram_perf;
+               break;
+       case CXL_DECODER_PMEM:
+               perf = &mds->pmem_perf;
+               break;
+       default:
+               return;
+       }
+
+       lockdep_assert_held(&cxl_dpa_rwsem);
+
+       if (!range_contains(&perf->dpa_range, &dpa))
+               return;
+
+       rc = cxl_hb_get_perf_coordinates(port, hb_coord);
+       if (rc)  {
+               dev_dbg(&port->dev, "Failed to retrieve hb perf coordinates.\n");
+               return;
+       }
+
+       for (int i = 0; i < ACCESS_COORDINATE_MAX; i++) {
+               /* Pickup the host bridge coords */
+               cxl_coordinates_combine(&coord, &hb_coord[i], &perf->coord);
+
+               /* Get total bandwidth and the worst latency for the cxl region */
+               cxlr->coord[i].read_latency = max_t(unsigned int,
+                                                   cxlr->coord[i].read_latency,
+                                                   coord.read_latency);
+               cxlr->coord[i].write_latency = max_t(unsigned int,
+                                                    cxlr->coord[i].write_latency,
+                                                    coord.write_latency);
+               cxlr->coord[i].read_bandwidth += coord.read_bandwidth;
+               cxlr->coord[i].write_bandwidth += coord.write_bandwidth;
+
+               /*
+                * Convert latency to nanosec from picosec to be consistent
+                * with the resulting latency coordinates computed by the
+                * HMAT_REPORTING code.
+                */
+               cxlr->coord[i].read_latency =
+                       DIV_ROUND_UP(cxlr->coord[i].read_latency, 1000);
+               cxlr->coord[i].write_latency =
+                       DIV_ROUND_UP(cxlr->coord[i].write_latency, 1000);
+       }
+}
+
+int cxl_update_hmat_access_coordinates(int nid, struct cxl_region *cxlr,
+                                      enum access_coordinate_class access)
+{
+       return hmat_update_target_coordinates(nid, &cxlr->coord[access], access);
+}
+
+bool cxl_need_node_perf_attrs_update(int nid)
+{
+       return !acpi_node_backed_by_real_pxm(nid);
+}
index 3b64fb1b9ed058055fa80220fc2b83b109cc6e17..bc5a95665aa0af5490118e77e912dd2f6872fcde 100644 (file)
@@ -90,4 +90,8 @@ enum cxl_poison_trace_type {
 
 long cxl_pci_get_latency(struct pci_dev *pdev);
 
+int cxl_update_hmat_access_coordinates(int nid, struct cxl_region *cxlr,
+                                      enum access_coordinate_class access);
+bool cxl_need_node_perf_attrs_update(int nid);
+
 #endif /* __CXL_CORE_H__ */
index e9e6c81ce034a8ffaba105132d5b9ecc59d51880..0df09bd79408842441d4f865c9be052b7564a3c9 100644 (file)
@@ -518,14 +518,14 @@ EXPORT_SYMBOL_NS_GPL(cxl_hdm_decode_init, CXL);
         FIELD_PREP(CXL_DOE_TABLE_ACCESS_ENTRY_HANDLE, (entry_handle)))
 
 static int cxl_cdat_get_length(struct device *dev,
-                              struct pci_doe_mb *cdat_doe,
+                              struct pci_doe_mb *doe_mb,
                               size_t *length)
 {
        __le32 request = CDAT_DOE_REQ(0);
        __le32 response[2];
        int rc;
 
-       rc = pci_doe(cdat_doe, PCI_DVSEC_VENDOR_ID_CXL,
+       rc = pci_doe(doe_mb, PCI_DVSEC_VENDOR_ID_CXL,
                     CXL_DOE_PROTOCOL_TABLE_ACCESS,
                     &request, sizeof(request),
                     &response, sizeof(response));
@@ -543,56 +543,58 @@ static int cxl_cdat_get_length(struct device *dev,
 }
 
 static int cxl_cdat_read_table(struct device *dev,
-                              struct pci_doe_mb *cdat_doe,
-                              void *cdat_table, size_t *cdat_length)
+                              struct pci_doe_mb *doe_mb,
+                              struct cdat_doe_rsp *rsp, size_t *length)
 {
-       size_t length = *cdat_length + sizeof(__le32);
-       __le32 *data = cdat_table;
-       int entry_handle = 0;
+       size_t received, remaining = *length;
+       unsigned int entry_handle = 0;
+       union cdat_data *data;
        __le32 saved_dw = 0;
 
        do {
                __le32 request = CDAT_DOE_REQ(entry_handle);
-               struct cdat_entry_header *entry;
-               size_t entry_dw;
                int rc;
 
-               rc = pci_doe(cdat_doe, PCI_DVSEC_VENDOR_ID_CXL,
+               rc = pci_doe(doe_mb, PCI_DVSEC_VENDOR_ID_CXL,
                             CXL_DOE_PROTOCOL_TABLE_ACCESS,
                             &request, sizeof(request),
-                            data, length);
+                            rsp, sizeof(*rsp) + remaining);
                if (rc < 0) {
                        dev_err(dev, "DOE failed: %d", rc);
                        return rc;
                }
 
-               /* 1 DW Table Access Response Header + CDAT entry */
-               entry = (struct cdat_entry_header *)(data + 1);
-               if ((entry_handle == 0 &&
-                    rc != sizeof(__le32) + sizeof(struct cdat_header)) ||
-                   (entry_handle > 0 &&
-                    (rc < sizeof(__le32) + sizeof(*entry) ||
-                     rc != sizeof(__le32) + le16_to_cpu(entry->length))))
+               if (rc < sizeof(*rsp))
                        return -EIO;
 
+               data = (union cdat_data *)rsp->data;
+               received = rc - sizeof(*rsp);
+
+               if (entry_handle == 0) {
+                       if (received != sizeof(data->header))
+                               return -EIO;
+               } else {
+                       if (received < sizeof(data->entry) ||
+                           received != le16_to_cpu(data->entry.length))
+                               return -EIO;
+               }
+
                /* Get the CXL table access header entry handle */
                entry_handle = FIELD_GET(CXL_DOE_TABLE_ACCESS_ENTRY_HANDLE,
-                                        le32_to_cpu(data[0]));
-               entry_dw = rc / sizeof(__le32);
-               /* Skip Header */
-               entry_dw -= 1;
+                                        le32_to_cpu(rsp->doe_header));
+
                /*
                 * Table Access Response Header overwrote the last DW of
                 * previous entry, so restore that DW
                 */
-               *data = saved_dw;
-               length -= entry_dw * sizeof(__le32);
-               data += entry_dw;
-               saved_dw = *data;
+               rsp->doe_header = saved_dw;
+               remaining -= received;
+               rsp = (void *)rsp + received;
+               saved_dw = rsp->doe_header;
        } while (entry_handle != CXL_DOE_TABLE_ACCESS_LAST_ENTRY);
 
        /* Length in CDAT header may exceed concatenation of CDAT entries */
-       *cdat_length -= length - sizeof(__le32);
+       *length -= remaining;
 
        return 0;
 }
@@ -617,11 +619,11 @@ void read_cdat_data(struct cxl_port *port)
 {
        struct device *uport = port->uport_dev;
        struct device *dev = &port->dev;
-       struct pci_doe_mb *cdat_doe;
+       struct pci_doe_mb *doe_mb;
        struct pci_dev *pdev = NULL;
        struct cxl_memdev *cxlmd;
-       size_t cdat_length;
-       void *cdat_table, *cdat_buf;
+       struct cdat_doe_rsp *buf;
+       size_t table_length, length;
        int rc;
 
        if (is_cxl_memdev(uport)) {
@@ -638,39 +640,48 @@ void read_cdat_data(struct cxl_port *port)
        if (!pdev)
                return;
 
-       cdat_doe = pci_find_doe_mailbox(pdev, PCI_DVSEC_VENDOR_ID_CXL,
-                                       CXL_DOE_PROTOCOL_TABLE_ACCESS);
-       if (!cdat_doe) {
+       doe_mb = pci_find_doe_mailbox(pdev, PCI_DVSEC_VENDOR_ID_CXL,
+                                     CXL_DOE_PROTOCOL_TABLE_ACCESS);
+       if (!doe_mb) {
                dev_dbg(dev, "No CDAT mailbox\n");
                return;
        }
 
        port->cdat_available = true;
 
-       if (cxl_cdat_get_length(dev, cdat_doe, &cdat_length)) {
+       if (cxl_cdat_get_length(dev, doe_mb, &length)) {
                dev_dbg(dev, "No CDAT length\n");
                return;
        }
 
-       cdat_buf = devm_kzalloc(dev, cdat_length + sizeof(__le32), GFP_KERNEL);
-       if (!cdat_buf)
-               return;
+       /*
+        * The begin of the CDAT buffer needs space for additional 4
+        * bytes for the DOE header. Table data starts afterwards.
+        */
+       buf = devm_kzalloc(dev, sizeof(*buf) + length, GFP_KERNEL);
+       if (!buf)
+               goto err;
+
+       table_length = length;
 
-       rc = cxl_cdat_read_table(dev, cdat_doe, cdat_buf, &cdat_length);
+       rc = cxl_cdat_read_table(dev, doe_mb, buf, &length);
        if (rc)
                goto err;
 
-       cdat_table = cdat_buf + sizeof(__le32);
-       if (cdat_checksum(cdat_table, cdat_length))
+       if (table_length != length)
+               dev_warn(dev, "Malformed CDAT table length (%zu:%zu), discarding trailing data\n",
+                       table_length, length);
+
+       if (cdat_checksum(buf->data, length))
                goto err;
 
-       port->cdat.table = cdat_table;
-       port->cdat.length = cdat_length;
-       return;
+       port->cdat.table = buf->data;
+       port->cdat.length = length;
 
+       return;
 err:
        /* Don't leave table data allocated on error */
-       devm_kfree(dev, cdat_buf);
+       devm_kfree(dev, buf);
        dev_err(dev, "Failed to read/validate CDAT.\n");
 }
 EXPORT_SYMBOL_NS_GPL(read_cdat_data, CXL);
index e59d9d37aa65009c97326fa93ec869dee709804d..2b0cab556072f560420f7f7bf4d0bcddd0a01b4a 100644 (file)
@@ -3,6 +3,7 @@
 #include <linux/platform_device.h>
 #include <linux/memregion.h>
 #include <linux/workqueue.h>
+#include <linux/einj-cxl.h>
 #include <linux/debugfs.h>
 #include <linux/device.h>
 #include <linux/module.h>
@@ -793,6 +794,40 @@ static int cxl_dport_setup_regs(struct device *host, struct cxl_dport *dport,
        return rc;
 }
 
+DEFINE_SHOW_ATTRIBUTE(einj_cxl_available_error_type);
+
+static int cxl_einj_inject(void *data, u64 type)
+{
+       struct cxl_dport *dport = data;
+
+       if (dport->rch)
+               return einj_cxl_inject_rch_error(dport->rcrb.base, type);
+
+       return einj_cxl_inject_error(to_pci_dev(dport->dport_dev), type);
+}
+DEFINE_DEBUGFS_ATTRIBUTE(cxl_einj_inject_fops, NULL, cxl_einj_inject,
+                        "0x%llx\n");
+
+static void cxl_debugfs_create_dport_dir(struct cxl_dport *dport)
+{
+       struct dentry *dir;
+
+       if (!einj_cxl_is_initialized())
+               return;
+
+       /*
+        * dport_dev needs to be a PCIe port for CXL 2.0+ ports because
+        * EINJ expects a dport SBDF to be specified for 2.0 error injection.
+        */
+       if (!dport->rch && !dev_is_pci(dport->dport_dev))
+               return;
+
+       dir = cxl_debugfs_create_dir(dev_name(dport->dport_dev));
+
+       debugfs_create_file("einj_inject", 0200, dir, dport,
+                           &cxl_einj_inject_fops);
+}
+
 static struct cxl_port *__devm_cxl_add_port(struct device *host,
                                            struct device *uport_dev,
                                            resource_size_t component_reg_phys,
@@ -822,6 +857,7 @@ static struct cxl_port *__devm_cxl_add_port(struct device *host,
                 */
                port->reg_map = cxlds->reg_map;
                port->reg_map.host = &port->dev;
+               cxlmd->endpoint = port;
        } else if (parent_dport) {
                rc = dev_set_name(dev, "port%d", port->id);
                if (rc)
@@ -1149,6 +1185,8 @@ __devm_cxl_add_dport(struct cxl_port *port, struct device *dport_dev,
        if (dev_is_pci(dport_dev))
                dport->link_latency = cxl_pci_get_latency(to_pci_dev(dport_dev));
 
+       cxl_debugfs_create_dport_dir(dport);
+
        return dport;
 }
 
@@ -1374,7 +1412,6 @@ int cxl_endpoint_autoremove(struct cxl_memdev *cxlmd, struct cxl_port *endpoint)
 
        get_device(host);
        get_device(&endpoint->dev);
-       cxlmd->endpoint = endpoint;
        cxlmd->depth = endpoint->depth;
        return devm_add_action_or_reset(dev, delete_endpoint, cxlmd);
 }
@@ -2096,18 +2133,36 @@ bool schedule_cxl_memdev_detach(struct cxl_memdev *cxlmd)
 }
 EXPORT_SYMBOL_NS_GPL(schedule_cxl_memdev_detach, CXL);
 
-static void combine_coordinates(struct access_coordinate *c1,
-                               struct access_coordinate *c2)
+/**
+ * cxl_hb_get_perf_coordinates - Retrieve performance numbers between initiator
+ *                              and host bridge
+ *
+ * @port: endpoint cxl_port
+ * @coord: output access coordinates
+ *
+ * Return: errno on failure, 0 on success.
+ */
+int cxl_hb_get_perf_coordinates(struct cxl_port *port,
+                               struct access_coordinate *coord)
 {
-               if (c2->write_bandwidth)
-                       c1->write_bandwidth = min(c1->write_bandwidth,
-                                                 c2->write_bandwidth);
-               c1->write_latency += c2->write_latency;
+       struct cxl_port *iter = port;
+       struct cxl_dport *dport;
+
+       if (!is_cxl_endpoint(port))
+               return -EINVAL;
 
-               if (c2->read_bandwidth)
-                       c1->read_bandwidth = min(c1->read_bandwidth,
-                                                c2->read_bandwidth);
-               c1->read_latency += c2->read_latency;
+       dport = iter->parent_dport;
+       while (iter && !is_cxl_root(to_cxl_port(iter->dev.parent))) {
+               iter = to_cxl_port(iter->dev.parent);
+               dport = iter->parent_dport;
+       }
+
+       coord[ACCESS_COORDINATE_LOCAL] =
+               dport->hb_coord[ACCESS_COORDINATE_LOCAL];
+       coord[ACCESS_COORDINATE_CPU] =
+               dport->hb_coord[ACCESS_COORDINATE_CPU];
+
+       return 0;
 }
 
 /**
@@ -2143,7 +2198,7 @@ int cxl_endpoint_get_perf_coordinates(struct cxl_port *port,
         * nothing to gather.
         */
        while (iter && !is_cxl_root(to_cxl_port(iter->dev.parent))) {
-               combine_coordinates(&c, &dport->sw_coord);
+               cxl_coordinates_combine(&c, &c, &dport->sw_coord);
                c.write_latency += dport->link_latency;
                c.read_latency += dport->link_latency;
 
@@ -2151,9 +2206,6 @@ int cxl_endpoint_get_perf_coordinates(struct cxl_port *port,
                dport = iter->parent_dport;
        }
 
-       /* Augment with the generic port (host bridge) perf data */
-       combine_coordinates(&c, &dport->hb_coord);
-
        /* Get the calculated PCI paths bandwidth */
        pdev = to_pci_dev(port->uport_dev->parent);
        bw = pcie_bandwidth_available(pdev, NULL, NULL, NULL);
@@ -2221,6 +2273,10 @@ static __init int cxl_core_init(void)
 
        cxl_debugfs = debugfs_create_dir("cxl", NULL);
 
+       if (einj_cxl_is_initialized())
+               debugfs_create_file("einj_types", 0400, cxl_debugfs, NULL,
+                                   &einj_cxl_available_error_type_fops);
+
        cxl_mbox_init();
 
        rc = cxl_memdev_init();
index 4c7fd2d5cccb2965eb528cbc26bb261ef01dcdce..5c186e0a39b965835bdd775aa3c841873b172633 100644 (file)
@@ -4,6 +4,7 @@
 #include <linux/genalloc.h>
 #include <linux/device.h>
 #include <linux/module.h>
+#include <linux/memory.h>
 #include <linux/slab.h>
 #include <linux/uuid.h>
 #include <linux/sort.h>
 
 static struct cxl_region *to_cxl_region(struct device *dev);
 
+#define __ACCESS_ATTR_RO(_level, _name) {                              \
+       .attr   = { .name = __stringify(_name), .mode = 0444 },         \
+       .show   = _name##_access##_level##_show,                        \
+}
+
+#define ACCESS_DEVICE_ATTR_RO(level, name)     \
+       struct device_attribute dev_attr_access##level##_##name = __ACCESS_ATTR_RO(level, name)
+
+#define ACCESS_ATTR_RO(level, attrib)                                        \
+static ssize_t attrib##_access##level##_show(struct device *dev,             \
+                                         struct device_attribute *attr,      \
+                                         char *buf)                          \
+{                                                                            \
+       struct cxl_region *cxlr = to_cxl_region(dev);                         \
+                                                                             \
+       if (cxlr->coord[level].attrib == 0)                                   \
+               return -ENOENT;                                               \
+                                                                             \
+       return sysfs_emit(buf, "%u\n", cxlr->coord[level].attrib);            \
+}                                                                            \
+static ACCESS_DEVICE_ATTR_RO(level, attrib)
+
+ACCESS_ATTR_RO(0, read_bandwidth);
+ACCESS_ATTR_RO(0, read_latency);
+ACCESS_ATTR_RO(0, write_bandwidth);
+ACCESS_ATTR_RO(0, write_latency);
+
+#define ACCESS_ATTR_DECLARE(level, attrib)     \
+       (&dev_attr_access##level##_##attrib.attr)
+
+static struct attribute *access0_coordinate_attrs[] = {
+       ACCESS_ATTR_DECLARE(0, read_bandwidth),
+       ACCESS_ATTR_DECLARE(0, write_bandwidth),
+       ACCESS_ATTR_DECLARE(0, read_latency),
+       ACCESS_ATTR_DECLARE(0, write_latency),
+       NULL
+};
+
+ACCESS_ATTR_RO(1, read_bandwidth);
+ACCESS_ATTR_RO(1, read_latency);
+ACCESS_ATTR_RO(1, write_bandwidth);
+ACCESS_ATTR_RO(1, write_latency);
+
+static struct attribute *access1_coordinate_attrs[] = {
+       ACCESS_ATTR_DECLARE(1, read_bandwidth),
+       ACCESS_ATTR_DECLARE(1, write_bandwidth),
+       ACCESS_ATTR_DECLARE(1, read_latency),
+       ACCESS_ATTR_DECLARE(1, write_latency),
+       NULL
+};
+
+#define ACCESS_VISIBLE(level)                                          \
+static umode_t cxl_region_access##level##_coordinate_visible(          \
+               struct kobject *kobj, struct attribute *a, int n)       \
+{                                                                      \
+       struct device *dev = kobj_to_dev(kobj);                         \
+       struct cxl_region *cxlr = to_cxl_region(dev);                   \
+                                                                       \
+       if (a == &dev_attr_access##level##_read_latency.attr &&         \
+           cxlr->coord[level].read_latency == 0)                       \
+               return 0;                                               \
+                                                                       \
+       if (a == &dev_attr_access##level##_write_latency.attr &&        \
+           cxlr->coord[level].write_latency == 0)                      \
+               return 0;                                               \
+                                                                       \
+       if (a == &dev_attr_access##level##_read_bandwidth.attr &&       \
+           cxlr->coord[level].read_bandwidth == 0)                     \
+               return 0;                                               \
+                                                                       \
+       if (a == &dev_attr_access##level##_write_bandwidth.attr &&      \
+           cxlr->coord[level].write_bandwidth == 0)                    \
+               return 0;                                               \
+                                                                       \
+       return a->mode;                                                 \
+}
+
+ACCESS_VISIBLE(0);
+ACCESS_VISIBLE(1);
+
+static const struct attribute_group cxl_region_access0_coordinate_group = {
+       .name = "access0",
+       .attrs = access0_coordinate_attrs,
+       .is_visible = cxl_region_access0_coordinate_visible,
+};
+
+static const struct attribute_group *get_cxl_region_access0_group(void)
+{
+       return &cxl_region_access0_coordinate_group;
+}
+
+static const struct attribute_group cxl_region_access1_coordinate_group = {
+       .name = "access1",
+       .attrs = access1_coordinate_attrs,
+       .is_visible = cxl_region_access1_coordinate_visible,
+};
+
+static const struct attribute_group *get_cxl_region_access1_group(void)
+{
+       return &cxl_region_access1_coordinate_group;
+}
+
 static ssize_t uuid_show(struct device *dev, struct device_attribute *attr,
                         char *buf)
 {
@@ -1752,6 +1855,8 @@ static int cxl_region_attach(struct cxl_region *cxlr,
                return -EINVAL;
        }
 
+       cxl_region_perf_data_calculate(cxlr, cxled);
+
        if (test_bit(CXL_REGION_F_AUTO, &cxlr->flags)) {
                int i;
 
@@ -2067,6 +2172,8 @@ static const struct attribute_group *region_groups[] = {
        &cxl_base_attribute_group,
        &cxl_region_group,
        &cxl_region_target_group,
+       &cxl_region_access0_coordinate_group,
+       &cxl_region_access1_coordinate_group,
        NULL,
 };
 
@@ -2120,6 +2227,7 @@ static void unregister_region(void *_cxlr)
        struct cxl_region_params *p = &cxlr->params;
        int i;
 
+       unregister_memory_notifier(&cxlr->memory_notifier);
        device_del(&cxlr->dev);
 
        /*
@@ -2164,6 +2272,63 @@ static struct cxl_region *cxl_region_alloc(struct cxl_root_decoder *cxlrd, int i
        return cxlr;
 }
 
+static bool cxl_region_update_coordinates(struct cxl_region *cxlr, int nid)
+{
+       int cset = 0;
+       int rc;
+
+       for (int i = 0; i < ACCESS_COORDINATE_MAX; i++) {
+               if (cxlr->coord[i].read_bandwidth) {
+                       rc = 0;
+                       if (cxl_need_node_perf_attrs_update(nid))
+                               node_set_perf_attrs(nid, &cxlr->coord[i], i);
+                       else
+                               rc = cxl_update_hmat_access_coordinates(nid, cxlr, i);
+
+                       if (rc == 0)
+                               cset++;
+               }
+       }
+
+       if (!cset)
+               return false;
+
+       rc = sysfs_update_group(&cxlr->dev.kobj, get_cxl_region_access0_group());
+       if (rc)
+               dev_dbg(&cxlr->dev, "Failed to update access0 group\n");
+
+       rc = sysfs_update_group(&cxlr->dev.kobj, get_cxl_region_access1_group());
+       if (rc)
+               dev_dbg(&cxlr->dev, "Failed to update access1 group\n");
+
+       return true;
+}
+
+static int cxl_region_perf_attrs_callback(struct notifier_block *nb,
+                                         unsigned long action, void *arg)
+{
+       struct cxl_region *cxlr = container_of(nb, struct cxl_region,
+                                              memory_notifier);
+       struct cxl_region_params *p = &cxlr->params;
+       struct cxl_endpoint_decoder *cxled = p->targets[0];
+       struct cxl_decoder *cxld = &cxled->cxld;
+       struct memory_notify *mnb = arg;
+       int nid = mnb->status_change_nid;
+       int region_nid;
+
+       if (nid == NUMA_NO_NODE || action != MEM_ONLINE)
+               return NOTIFY_DONE;
+
+       region_nid = phys_to_target_node(cxld->hpa_range.start);
+       if (nid != region_nid)
+               return NOTIFY_DONE;
+
+       if (!cxl_region_update_coordinates(cxlr, nid))
+               return NOTIFY_DONE;
+
+       return NOTIFY_OK;
+}
+
 /**
  * devm_cxl_add_region - Adds a region to a decoder
  * @cxlrd: root decoder
@@ -2211,6 +2376,10 @@ static struct cxl_region *devm_cxl_add_region(struct cxl_root_decoder *cxlrd,
        if (rc)
                goto err;
 
+       cxlr->memory_notifier.notifier_call = cxl_region_perf_attrs_callback;
+       cxlr->memory_notifier.priority = CXL_CALLBACK_PRI;
+       register_memory_notifier(&cxlr->memory_notifier);
+
        rc = devm_add_action_or_reset(port->uport_dev, unregister_region, cxlr);
        if (rc)
                return ERR_PTR(rc);
index 003feebab79b5f8e7563ba2e32665b4377871a55..534e25e2f0a48197a0588abd8a46d996bb333ed8 100644 (file)
@@ -6,6 +6,7 @@
 
 #include <linux/libnvdimm.h>
 #include <linux/bitfield.h>
+#include <linux/notifier.h>
 #include <linux/bitops.h>
 #include <linux/log2.h>
 #include <linux/node.h>
@@ -517,6 +518,8 @@ struct cxl_region_params {
  * @cxlr_pmem: (for pmem regions) cached copy of the nvdimm bridge
  * @flags: Region state flags
  * @params: active + config params for the region
+ * @coord: QoS access coordinates for the region
+ * @memory_notifier: notifier for setting the access coordinates to node
  */
 struct cxl_region {
        struct device dev;
@@ -527,6 +530,8 @@ struct cxl_region {
        struct cxl_pmem_region *cxlr_pmem;
        unsigned long flags;
        struct cxl_region_params params;
+       struct access_coordinate coord[ACCESS_COORDINATE_MAX];
+       struct notifier_block memory_notifier;
 };
 
 struct cxl_nvdimm_bridge {
@@ -671,7 +676,7 @@ struct cxl_dport {
        struct cxl_port *port;
        struct cxl_regs regs;
        struct access_coordinate sw_coord;
-       struct access_coordinate hb_coord;
+       struct access_coordinate hb_coord[ACCESS_COORDINATE_MAX];
        long link_latency;
 };
 
@@ -879,9 +884,17 @@ void cxl_switch_parse_cdat(struct cxl_port *port);
 
 int cxl_endpoint_get_perf_coordinates(struct cxl_port *port,
                                      struct access_coordinate *coord);
+int cxl_hb_get_perf_coordinates(struct cxl_port *port,
+                               struct access_coordinate *coord);
+void cxl_region_perf_data_calculate(struct cxl_region *cxlr,
+                                   struct cxl_endpoint_decoder *cxled);
 
 void cxl_memdev_update_perf(struct cxl_memdev *cxlmd);
 
+void cxl_coordinates_combine(struct access_coordinate *out,
+                            struct access_coordinate *c1,
+                            struct access_coordinate *c2);
+
 /*
  * Unit test builds overrides this to __weak, find the 'strong' version
  * of these symbols in tools/testing/cxl/.
index 711b05d9a370e91b49beffbc5494542d8861d31e..93992a1c8eecf683edb72a5035e4a25872e64668 100644 (file)
@@ -71,6 +71,15 @@ enum cxl_regloc_type {
        CXL_REGLOC_RBI_TYPES
 };
 
+/*
+ * Table Access DOE, CDAT Read Entry Response
+ *
+ * Spec refs:
+ *
+ * CXL 3.1 8.1.11, Table 8-14: Read Entry Response
+ * CDAT Specification 1.03: 2 CDAT Data Structures
+ */
+
 struct cdat_header {
        __le32 length;
        u8 revision;
@@ -85,6 +94,21 @@ struct cdat_entry_header {
        __le16 length;
 } __packed;
 
+/*
+ * The DOE CDAT read response contains a CDAT read entry (either the
+ * CDAT header or a structure).
+ */
+union cdat_data {
+       struct cdat_header header;
+       struct cdat_entry_header entry;
+} __packed;
+
+/* There is an additional CDAT response header of 4 bytes. */
+struct cdat_doe_rsp {
+       __le32 doe_header;
+       u8 data[];
+} __packed;
+
 /*
  * CXL v3.0 6.2.3 Table 6-4
  * The table indicates that if PCIe Flit Mode is set, then CXL is in 256B flits
index 1ff1ab5fa105a60b2917ec1417156ff44e909074..797e1ebff2997d88c70ab7951b723e203a8184a1 100644 (file)
 
 static DEFINE_MUTEX(dax_bus_lock);
 
+/*
+ * All changes to the dax region configuration occur with this lock held
+ * for write.
+ */
+DECLARE_RWSEM(dax_region_rwsem);
+
+/*
+ * All changes to the dax device configuration occur with this lock held
+ * for write.
+ */
+DECLARE_RWSEM(dax_dev_rwsem);
+
 #define DAX_NAME_LEN 30
 struct dax_id {
        struct list_head list;
@@ -180,7 +192,7 @@ static u64 dev_dax_size(struct dev_dax *dev_dax)
        u64 size = 0;
        int i;
 
-       device_lock_assert(&dev_dax->dev);
+       WARN_ON_ONCE(!rwsem_is_locked(&dax_dev_rwsem));
 
        for (i = 0; i < dev_dax->nr_range; i++)
                size += range_len(&dev_dax->ranges[i].range);
@@ -194,8 +206,15 @@ static int dax_bus_probe(struct device *dev)
        struct dev_dax *dev_dax = to_dev_dax(dev);
        struct dax_region *dax_region = dev_dax->region;
        int rc;
+       u64 size;
+
+       rc = down_read_interruptible(&dax_dev_rwsem);
+       if (rc)
+               return rc;
+       size = dev_dax_size(dev_dax);
+       up_read(&dax_dev_rwsem);
 
-       if (dev_dax_size(dev_dax) == 0 || dev_dax->id < 0)
+       if (size == 0 || dev_dax->id < 0)
                return -ENXIO;
 
        rc = dax_drv->probe(dev_dax);
@@ -222,7 +241,7 @@ static void dax_bus_remove(struct device *dev)
                dax_drv->remove(dev_dax);
 }
 
-static struct bus_type dax_bus_type = {
+static const struct bus_type dax_bus_type = {
        .name = "dax",
        .uevent = dax_bus_uevent,
        .match = dax_bus_match,
@@ -250,7 +269,7 @@ static ssize_t id_show(struct device *dev,
 {
        struct dax_region *dax_region = dev_get_drvdata(dev);
 
-       return sprintf(buf, "%d\n", dax_region->id);
+       return sysfs_emit(buf, "%d\n", dax_region->id);
 }
 static DEVICE_ATTR_RO(id);
 
@@ -259,8 +278,8 @@ static ssize_t region_size_show(struct device *dev,
 {
        struct dax_region *dax_region = dev_get_drvdata(dev);
 
-       return sprintf(buf, "%llu\n", (unsigned long long)
-                       resource_size(&dax_region->res));
+       return sysfs_emit(buf, "%llu\n",
+                         (unsigned long long)resource_size(&dax_region->res));
 }
 static struct device_attribute dev_attr_region_size = __ATTR(size, 0444,
                region_size_show, NULL);
@@ -270,7 +289,7 @@ static ssize_t region_align_show(struct device *dev,
 {
        struct dax_region *dax_region = dev_get_drvdata(dev);
 
-       return sprintf(buf, "%u\n", dax_region->align);
+       return sysfs_emit(buf, "%u\n", dax_region->align);
 }
 static struct device_attribute dev_attr_region_align =
                __ATTR(align, 0400, region_align_show, NULL);
@@ -283,7 +302,7 @@ static unsigned long long dax_region_avail_size(struct dax_region *dax_region)
        resource_size_t size = resource_size(&dax_region->res);
        struct resource *res;
 
-       device_lock_assert(dax_region->dev);
+       WARN_ON_ONCE(!rwsem_is_locked(&dax_region_rwsem));
 
        for_each_dax_region_resource(dax_region, res)
                size -= resource_size(res);
@@ -295,12 +314,15 @@ static ssize_t available_size_show(struct device *dev,
 {
        struct dax_region *dax_region = dev_get_drvdata(dev);
        unsigned long long size;
+       int rc;
 
-       device_lock(dev);
+       rc = down_read_interruptible(&dax_region_rwsem);
+       if (rc)
+               return rc;
        size = dax_region_avail_size(dax_region);
-       device_unlock(dev);
+       up_read(&dax_region_rwsem);
 
-       return sprintf(buf, "%llu\n", size);
+       return sysfs_emit(buf, "%llu\n", size);
 }
 static DEVICE_ATTR_RO(available_size);
 
@@ -314,10 +336,12 @@ static ssize_t seed_show(struct device *dev,
        if (is_static(dax_region))
                return -EINVAL;
 
-       device_lock(dev);
+       rc = down_read_interruptible(&dax_region_rwsem);
+       if (rc)
+               return rc;
        seed = dax_region->seed;
-       rc = sprintf(buf, "%s\n", seed ? dev_name(seed) : "");
-       device_unlock(dev);
+       rc = sysfs_emit(buf, "%s\n", seed ? dev_name(seed) : "");
+       up_read(&dax_region_rwsem);
 
        return rc;
 }
@@ -333,14 +357,18 @@ static ssize_t create_show(struct device *dev,
        if (is_static(dax_region))
                return -EINVAL;
 
-       device_lock(dev);
+       rc = down_read_interruptible(&dax_region_rwsem);
+       if (rc)
+               return rc;
        youngest = dax_region->youngest;
-       rc = sprintf(buf, "%s\n", youngest ? dev_name(youngest) : "");
-       device_unlock(dev);
+       rc = sysfs_emit(buf, "%s\n", youngest ? dev_name(youngest) : "");
+       up_read(&dax_region_rwsem);
 
        return rc;
 }
 
+static struct dev_dax *__devm_create_dev_dax(struct dev_dax_data *data);
+
 static ssize_t create_store(struct device *dev, struct device_attribute *attr,
                const char *buf, size_t len)
 {
@@ -358,7 +386,9 @@ static ssize_t create_store(struct device *dev, struct device_attribute *attr,
        if (val != 1)
                return -EINVAL;
 
-       device_lock(dev);
+       rc = down_write_killable(&dax_region_rwsem);
+       if (rc)
+               return rc;
        avail = dax_region_avail_size(dax_region);
        if (avail == 0)
                rc = -ENOSPC;
@@ -369,7 +399,7 @@ static ssize_t create_store(struct device *dev, struct device_attribute *attr,
                        .id = -1,
                        .memmap_on_memory = false,
                };
-               struct dev_dax *dev_dax = devm_create_dev_dax(&data);
+               struct dev_dax *dev_dax = __devm_create_dev_dax(&data);
 
                if (IS_ERR(dev_dax))
                        rc = PTR_ERR(dev_dax);
@@ -387,7 +417,7 @@ static ssize_t create_store(struct device *dev, struct device_attribute *attr,
                        rc = len;
                }
        }
-       device_unlock(dev);
+       up_write(&dax_region_rwsem);
 
        return rc;
 }
@@ -417,7 +447,7 @@ static void trim_dev_dax_range(struct dev_dax *dev_dax)
        struct range *range = &dev_dax->ranges[i].range;
        struct dax_region *dax_region = dev_dax->region;
 
-       device_lock_assert(dax_region->dev);
+       WARN_ON_ONCE(!rwsem_is_locked(&dax_region_rwsem));
        dev_dbg(&dev_dax->dev, "delete range[%d]: %#llx:%#llx\n", i,
                (unsigned long long)range->start,
                (unsigned long long)range->end);
@@ -435,7 +465,7 @@ static void free_dev_dax_ranges(struct dev_dax *dev_dax)
                trim_dev_dax_range(dev_dax);
 }
 
-static void unregister_dev_dax(void *dev)
+static void __unregister_dev_dax(void *dev)
 {
        struct dev_dax *dev_dax = to_dev_dax(dev);
 
@@ -447,6 +477,17 @@ static void unregister_dev_dax(void *dev)
        put_device(dev);
 }
 
+static void unregister_dev_dax(void *dev)
+{
+       if (rwsem_is_locked(&dax_region_rwsem))
+               return __unregister_dev_dax(dev);
+
+       if (WARN_ON_ONCE(down_write_killable(&dax_region_rwsem) != 0))
+               return;
+       __unregister_dev_dax(dev);
+       up_write(&dax_region_rwsem);
+}
+
 static void dax_region_free(struct kref *kref)
 {
        struct dax_region *dax_region;
@@ -463,11 +504,10 @@ static void dax_region_put(struct dax_region *dax_region)
 /* a return value >= 0 indicates this invocation invalidated the id */
 static int __free_dev_dax_id(struct dev_dax *dev_dax)
 {
-       struct device *dev = &dev_dax->dev;
        struct dax_region *dax_region;
        int rc = dev_dax->id;
 
-       device_lock_assert(dev);
+       WARN_ON_ONCE(!rwsem_is_locked(&dax_dev_rwsem));
 
        if (!dev_dax->dyn_id || dev_dax->id < 0)
                return -1;
@@ -480,12 +520,13 @@ static int __free_dev_dax_id(struct dev_dax *dev_dax)
 
 static int free_dev_dax_id(struct dev_dax *dev_dax)
 {
-       struct device *dev = &dev_dax->dev;
        int rc;
 
-       device_lock(dev);
+       rc = down_write_killable(&dax_dev_rwsem);
+       if (rc)
+               return rc;
        rc = __free_dev_dax_id(dev_dax);
-       device_unlock(dev);
+       up_write(&dax_dev_rwsem);
        return rc;
 }
 
@@ -519,8 +560,14 @@ static ssize_t delete_store(struct device *dev, struct device_attribute *attr,
        if (!victim)
                return -ENXIO;
 
-       device_lock(dev);
-       device_lock(victim);
+       rc = down_write_killable(&dax_region_rwsem);
+       if (rc)
+               return rc;
+       rc = down_write_killable(&dax_dev_rwsem);
+       if (rc) {
+               up_write(&dax_region_rwsem);
+               return rc;
+       }
        dev_dax = to_dev_dax(victim);
        if (victim->driver || dev_dax_size(dev_dax))
                rc = -EBUSY;
@@ -541,12 +588,12 @@ static ssize_t delete_store(struct device *dev, struct device_attribute *attr,
                } else
                        rc = -EBUSY;
        }
-       device_unlock(victim);
+       up_write(&dax_dev_rwsem);
 
        /* won the race to invalidate the device, clean it up */
        if (do_del)
                devm_release_action(dev, unregister_dev_dax, victim);
-       device_unlock(dev);
+       up_write(&dax_region_rwsem);
        put_device(victim);
 
        return rc;
@@ -658,16 +705,15 @@ static void dax_mapping_release(struct device *dev)
        put_device(parent);
 }
 
-static void unregister_dax_mapping(void *data)
+static void __unregister_dax_mapping(void *data)
 {
        struct device *dev = data;
        struct dax_mapping *mapping = to_dax_mapping(dev);
        struct dev_dax *dev_dax = to_dev_dax(dev->parent);
-       struct dax_region *dax_region = dev_dax->region;
 
        dev_dbg(dev, "%s\n", __func__);
 
-       device_lock_assert(dax_region->dev);
+       WARN_ON_ONCE(!rwsem_is_locked(&dax_region_rwsem));
 
        dev_dax->ranges[mapping->range_id].mapping = NULL;
        mapping->range_id = -1;
@@ -675,28 +721,37 @@ static void unregister_dax_mapping(void *data)
        device_unregister(dev);
 }
 
+static void unregister_dax_mapping(void *data)
+{
+       if (rwsem_is_locked(&dax_region_rwsem))
+               return __unregister_dax_mapping(data);
+
+       if (WARN_ON_ONCE(down_write_killable(&dax_region_rwsem) != 0))
+               return;
+       __unregister_dax_mapping(data);
+       up_write(&dax_region_rwsem);
+}
+
 static struct dev_dax_range *get_dax_range(struct device *dev)
 {
        struct dax_mapping *mapping = to_dax_mapping(dev);
        struct dev_dax *dev_dax = to_dev_dax(dev->parent);
-       struct dax_region *dax_region = dev_dax->region;
+       int rc;
 
-       device_lock(dax_region->dev);
+       rc = down_write_killable(&dax_region_rwsem);
+       if (rc)
+               return NULL;
        if (mapping->range_id < 0) {
-               device_unlock(dax_region->dev);
+               up_write(&dax_region_rwsem);
                return NULL;
        }
 
        return &dev_dax->ranges[mapping->range_id];
 }
 
-static void put_dax_range(struct dev_dax_range *dax_range)
+static void put_dax_range(void)
 {
-       struct dax_mapping *mapping = dax_range->mapping;
-       struct dev_dax *dev_dax = to_dev_dax(mapping->dev.parent);
-       struct dax_region *dax_region = dev_dax->region;
-
-       device_unlock(dax_region->dev);
+       up_write(&dax_region_rwsem);
 }
 
 static ssize_t start_show(struct device *dev,
@@ -708,8 +763,8 @@ static ssize_t start_show(struct device *dev,
        dax_range = get_dax_range(dev);
        if (!dax_range)
                return -ENXIO;
-       rc = sprintf(buf, "%#llx\n", dax_range->range.start);
-       put_dax_range(dax_range);
+       rc = sysfs_emit(buf, "%#llx\n", dax_range->range.start);
+       put_dax_range();
 
        return rc;
 }
@@ -724,8 +779,8 @@ static ssize_t end_show(struct device *dev,
        dax_range = get_dax_range(dev);
        if (!dax_range)
                return -ENXIO;
-       rc = sprintf(buf, "%#llx\n", dax_range->range.end);
-       put_dax_range(dax_range);
+       rc = sysfs_emit(buf, "%#llx\n", dax_range->range.end);
+       put_dax_range();
 
        return rc;
 }
@@ -740,8 +795,8 @@ static ssize_t pgoff_show(struct device *dev,
        dax_range = get_dax_range(dev);
        if (!dax_range)
                return -ENXIO;
-       rc = sprintf(buf, "%#lx\n", dax_range->pgoff);
-       put_dax_range(dax_range);
+       rc = sysfs_emit(buf, "%#lx\n", dax_range->pgoff);
+       put_dax_range();
 
        return rc;
 }
@@ -775,7 +830,7 @@ static int devm_register_dax_mapping(struct dev_dax *dev_dax, int range_id)
        struct device *dev;
        int rc;
 
-       device_lock_assert(dax_region->dev);
+       WARN_ON_ONCE(!rwsem_is_locked(&dax_region_rwsem));
 
        if (dev_WARN_ONCE(&dev_dax->dev, !dax_region->dev->driver,
                                "region disabled\n"))
@@ -821,7 +876,7 @@ static int alloc_dev_dax_range(struct dev_dax *dev_dax, u64 start,
        struct resource *alloc;
        int i, rc;
 
-       device_lock_assert(dax_region->dev);
+       WARN_ON_ONCE(!rwsem_is_locked(&dax_region_rwsem));
 
        /* handle the seed alloc special case */
        if (!size) {
@@ -875,13 +930,12 @@ static int adjust_dev_dax_range(struct dev_dax *dev_dax, struct resource *res, r
 {
        int last_range = dev_dax->nr_range - 1;
        struct dev_dax_range *dax_range = &dev_dax->ranges[last_range];
-       struct dax_region *dax_region = dev_dax->region;
        bool is_shrink = resource_size(res) > size;
        struct range *range = &dax_range->range;
        struct device *dev = &dev_dax->dev;
        int rc;
 
-       device_lock_assert(dax_region->dev);
+       WARN_ON_ONCE(!rwsem_is_locked(&dax_region_rwsem));
 
        if (dev_WARN_ONCE(dev, !size, "deletion is handled by dev_dax_shrink\n"))
                return -EINVAL;
@@ -907,12 +961,15 @@ static ssize_t size_show(struct device *dev,
 {
        struct dev_dax *dev_dax = to_dev_dax(dev);
        unsigned long long size;
+       int rc;
 
-       device_lock(dev);
+       rc = down_write_killable(&dax_dev_rwsem);
+       if (rc)
+               return rc;
        size = dev_dax_size(dev_dax);
-       device_unlock(dev);
+       up_write(&dax_dev_rwsem);
 
-       return sprintf(buf, "%llu\n", size);
+       return sysfs_emit(buf, "%llu\n", size);
 }
 
 static bool alloc_is_aligned(struct dev_dax *dev_dax, resource_size_t size)
@@ -1080,17 +1137,27 @@ static ssize_t size_store(struct device *dev, struct device_attribute *attr,
                return -EINVAL;
        }
 
-       device_lock(dax_region->dev);
+       rc = down_write_killable(&dax_region_rwsem);
+       if (rc)
+               return rc;
        if (!dax_region->dev->driver) {
-               device_unlock(dax_region->dev);
-               return -ENXIO;
+               rc = -ENXIO;
+               goto err_region;
        }
-       device_lock(dev);
+       rc = down_write_killable(&dax_dev_rwsem);
+       if (rc)
+               goto err_dev;
+
        rc = dev_dax_resize(dax_region, dev_dax, val);
-       device_unlock(dev);
-       device_unlock(dax_region->dev);
 
-       return rc == 0 ? len : rc;
+err_dev:
+       up_write(&dax_dev_rwsem);
+err_region:
+       up_write(&dax_region_rwsem);
+
+       if (rc == 0)
+               return len;
+       return rc;
 }
 static DEVICE_ATTR_RW(size);
 
@@ -1138,18 +1205,24 @@ static ssize_t mapping_store(struct device *dev, struct device_attribute *attr,
                return rc;
 
        rc = -ENXIO;
-       device_lock(dax_region->dev);
+       rc = down_write_killable(&dax_region_rwsem);
+       if (rc)
+               return rc;
        if (!dax_region->dev->driver) {
-               device_unlock(dax_region->dev);
+               up_write(&dax_region_rwsem);
+               return rc;
+       }
+       rc = down_write_killable(&dax_dev_rwsem);
+       if (rc) {
+               up_write(&dax_region_rwsem);
                return rc;
        }
-       device_lock(dev);
 
        to_alloc = range_len(&r);
        if (alloc_is_aligned(dev_dax, to_alloc))
                rc = alloc_dev_dax_range(dev_dax, r.start, to_alloc);
-       device_unlock(dev);
-       device_unlock(dax_region->dev);
+       up_write(&dax_dev_rwsem);
+       up_write(&dax_region_rwsem);
 
        return rc == 0 ? len : rc;
 }
@@ -1160,7 +1233,7 @@ static ssize_t align_show(struct device *dev,
 {
        struct dev_dax *dev_dax = to_dev_dax(dev);
 
-       return sprintf(buf, "%d\n", dev_dax->align);
+       return sysfs_emit(buf, "%d\n", dev_dax->align);
 }
 
 static ssize_t dev_dax_validate_align(struct dev_dax *dev_dax)
@@ -1196,13 +1269,19 @@ static ssize_t align_store(struct device *dev, struct device_attribute *attr,
        if (!dax_align_valid(val))
                return -EINVAL;
 
-       device_lock(dax_region->dev);
+       rc = down_write_killable(&dax_region_rwsem);
+       if (rc)
+               return rc;
        if (!dax_region->dev->driver) {
-               device_unlock(dax_region->dev);
+               up_write(&dax_region_rwsem);
                return -ENXIO;
        }
 
-       device_lock(dev);
+       rc = down_write_killable(&dax_dev_rwsem);
+       if (rc) {
+               up_write(&dax_region_rwsem);
+               return rc;
+       }
        if (dev->driver) {
                rc = -EBUSY;
                goto out_unlock;
@@ -1214,8 +1293,8 @@ static ssize_t align_store(struct device *dev, struct device_attribute *attr,
        if (rc)
                dev_dax->align = align_save;
 out_unlock:
-       device_unlock(dev);
-       device_unlock(dax_region->dev);
+       up_write(&dax_dev_rwsem);
+       up_write(&dax_region_rwsem);
        return rc == 0 ? len : rc;
 }
 static DEVICE_ATTR_RW(align);
@@ -1232,7 +1311,7 @@ static ssize_t target_node_show(struct device *dev,
 {
        struct dev_dax *dev_dax = to_dev_dax(dev);
 
-       return sprintf(buf, "%d\n", dev_dax_target_node(dev_dax));
+       return sysfs_emit(buf, "%d\n", dev_dax_target_node(dev_dax));
 }
 static DEVICE_ATTR_RO(target_node);
 
@@ -1248,7 +1327,7 @@ static ssize_t resource_show(struct device *dev,
        else
                start = dev_dax->ranges[0].range.start;
 
-       return sprintf(buf, "%#llx\n", start);
+       return sysfs_emit(buf, "%#llx\n", start);
 }
 static DEVICE_ATTR(resource, 0400, resource_show, NULL);
 
@@ -1259,17 +1338,59 @@ static ssize_t modalias_show(struct device *dev, struct device_attribute *attr,
         * We only ever expect to handle device-dax instances, i.e. the
         * @type argument to MODULE_ALIAS_DAX_DEVICE() is always zero
         */
-       return sprintf(buf, DAX_DEVICE_MODALIAS_FMT "\n", 0);
+       return sysfs_emit(buf, DAX_DEVICE_MODALIAS_FMT "\n", 0);
 }
 static DEVICE_ATTR_RO(modalias);
 
 static ssize_t numa_node_show(struct device *dev,
                struct device_attribute *attr, char *buf)
 {
-       return sprintf(buf, "%d\n", dev_to_node(dev));
+       return sysfs_emit(buf, "%d\n", dev_to_node(dev));
 }
 static DEVICE_ATTR_RO(numa_node);
 
+static ssize_t memmap_on_memory_show(struct device *dev,
+                                    struct device_attribute *attr, char *buf)
+{
+       struct dev_dax *dev_dax = to_dev_dax(dev);
+
+       return sysfs_emit(buf, "%d\n", dev_dax->memmap_on_memory);
+}
+
+static ssize_t memmap_on_memory_store(struct device *dev,
+                                     struct device_attribute *attr,
+                                     const char *buf, size_t len)
+{
+       struct dev_dax *dev_dax = to_dev_dax(dev);
+       bool val;
+       int rc;
+
+       rc = kstrtobool(buf, &val);
+       if (rc)
+               return rc;
+
+       if (val == true && !mhp_supports_memmap_on_memory()) {
+               dev_dbg(dev, "memmap_on_memory is not available\n");
+               return -EOPNOTSUPP;
+       }
+
+       rc = down_write_killable(&dax_dev_rwsem);
+       if (rc)
+               return rc;
+
+       if (dev_dax->memmap_on_memory != val && dev->driver &&
+           to_dax_drv(dev->driver)->type == DAXDRV_KMEM_TYPE) {
+               up_write(&dax_dev_rwsem);
+               return -EBUSY;
+       }
+
+       dev_dax->memmap_on_memory = val;
+       up_write(&dax_dev_rwsem);
+
+       return len;
+}
+static DEVICE_ATTR_RW(memmap_on_memory);
+
 static umode_t dev_dax_visible(struct kobject *kobj, struct attribute *a, int n)
 {
        struct device *dev = container_of(kobj, struct device, kobj);
@@ -1296,6 +1417,7 @@ static struct attribute *dev_dax_attributes[] = {
        &dev_attr_align.attr,
        &dev_attr_resource.attr,
        &dev_attr_numa_node.attr,
+       &dev_attr_memmap_on_memory.attr,
        NULL,
 };
 
@@ -1325,7 +1447,7 @@ static const struct device_type dev_dax_type = {
        .groups = dax_attribute_groups,
 };
 
-struct dev_dax *devm_create_dev_dax(struct dev_dax_data *data)
+static struct dev_dax *__devm_create_dev_dax(struct dev_dax_data *data)
 {
        struct dax_region *dax_region = data->dax_region;
        struct device *parent = dax_region->dev;
@@ -1440,6 +1562,21 @@ err_id:
 
        return ERR_PTR(rc);
 }
+
+struct dev_dax *devm_create_dev_dax(struct dev_dax_data *data)
+{
+       struct dev_dax *dev_dax;
+       int rc;
+
+       rc = down_write_killable(&dax_region_rwsem);
+       if (rc)
+               return ERR_PTR(rc);
+
+       dev_dax = __devm_create_dev_dax(data);
+       up_write(&dax_region_rwsem);
+
+       return dev_dax;
+}
 EXPORT_SYMBOL_GPL(devm_create_dev_dax);
 
 int __dax_driver_register(struct dax_device_driver *dax_drv,
index a0244f6bb44bdc3e9135aa36b0e3f86e8d20a241..aca71d7fccc1f2fa741980e108e2a086f540a5f3 100644 (file)
@@ -13,6 +13,7 @@
 #include <linux/uio.h>
 #include <linux/dax.h>
 #include <linux/fs.h>
+#include <linux/cacheinfo.h>
 #include "dax-private.h"
 
 /**
@@ -319,6 +320,11 @@ EXPORT_SYMBOL_GPL(dax_alive);
  * that any fault handlers or operations that might have seen
  * dax_alive(), have completed.  Any operations that start after
  * synchronize_srcu() has run will abort upon seeing !dax_alive().
+ *
+ * Note, because alloc_dax() returns an ERR_PTR() on error, callers
+ * typically store its result into a local variable in order to check
+ * the result. Therefore, care must be taken to populate the struct
+ * device dax_dev field make sure the dax_dev is not leaked.
  */
 void kill_dax(struct dax_device *dax_dev)
 {
@@ -446,6 +452,14 @@ struct dax_device *alloc_dax(void *private, const struct dax_operations *ops)
        dev_t devt;
        int minor;
 
+       /*
+        * Unavailable on architectures with virtually aliased data caches,
+        * except for device-dax (NULL operations pointer), which does
+        * not use aliased mappings from the kernel.
+        */
+       if (ops && cpu_dcache_is_aliasing())
+               return ERR_PTR(-EOPNOTSUPP);
+
        if (WARN_ON_ONCE(ops && !ops->zero_page_range))
                return ERR_PTR(-EINVAL);
 
index e928f2ca0f1e9adc58594d6b4552d64dae29df34..002a5ec806207ca2614615b37eb1e38303292728 100644 (file)
@@ -643,16 +643,16 @@ config TEGRA20_APB_DMA
 
 config TEGRA210_ADMA
        tristate "NVIDIA Tegra210 ADMA support"
-       depends on (ARCH_TEGRA_210_SOC || COMPILE_TEST)
+       depends on (ARCH_TEGRA || COMPILE_TEST)
        select DMA_ENGINE
        select DMA_VIRTUAL_CHANNELS
        help
-         Support for the NVIDIA Tegra210 ADMA controller driver. The
-         DMA controller has multiple DMA channels and is used to service
-         various audio clients in the Tegra210 audio processing engine
-         (APE). This DMA controller transfers data from memory to
-         peripheral and vice versa. It does not support memory to
-         memory data transfer.
+         Support for the NVIDIA Tegra210/Tegra186/Tegra194/Tegra234 ADMA
+         controller driver. The DMA controller has multiple DMA channels
+         and is used to service various audio clients in the Tegra210
+         audio processing engine (APE). This DMA controller transfers
+         data from memory to peripheral and vice versa. It does not
+         support memory to memory data transfer.
 
 config TIMB_DMA
        tristate "Timberdale FPGA DMA support"
index eea8bd33b4b7385d0fd21dad5583588c7de3556f..fbf048f432bf8f16d5b97b88b4c02851367d9924 100644 (file)
@@ -2239,7 +2239,7 @@ static int pl08x_resume(struct dma_chan *chan)
 bool pl08x_filter_id(struct dma_chan *chan, void *chan_id)
 {
        struct pl08x_dma_chan *plchan;
-       char *name = chan_id;
+       const char *name = chan_id;
 
        /* Reject channels for devices not bound to this driver */
        if (chan->device->dev->driver != &pl08x_amba_driver.drv)
index 0553956f745691b78d328b79d59fbbb8bdd0dd97..ad74d57cc3abe86eaa1d39d985ad0da47b2449ef 100644 (file)
@@ -90,13 +90,8 @@ int bcom_sram_init(struct device_node *sram_node, char *owner)
        bcom_sram->rh = rh_create(4);
 
        /* Attach the free zones */
-#if 0
-       /* Currently disabled ... for future use only */
-       reg_addr_p = of_get_property(sram_node, "available", &psize);
-#else
        regaddr_p = NULL;
        psize = 0;
-#endif
 
        if (!regaddr_p || !psize) {
                /* Attach the whole zone */
index 793f1a7ad5e343bbfe403c9e0ad28e891bd0d556..b18faa7cfedb9b60361aaebba82585767a270935 100644 (file)
@@ -97,8 +97,8 @@ static void fsl_edma3_enable_request(struct fsl_edma_chan *fsl_chan)
                 * ch_mux: With the exception of 0, attempts to write a value
                 * already in use will be forced to 0.
                 */
-               if (!edma_readl_chreg(fsl_chan, ch_mux))
-                       edma_writel_chreg(fsl_chan, fsl_chan->srcid, ch_mux);
+               if (!edma_readl(fsl_chan->edma, fsl_chan->mux_addr))
+                       edma_writel(fsl_chan->edma, fsl_chan->srcid, fsl_chan->mux_addr);
        }
 
        val = edma_readl_chreg(fsl_chan, ch_csr);
@@ -134,7 +134,7 @@ static void fsl_edma3_disable_request(struct fsl_edma_chan *fsl_chan)
        flags = fsl_edma_drvflags(fsl_chan);
 
        if (flags & FSL_EDMA_DRV_HAS_CHMUX)
-               edma_writel_chreg(fsl_chan, 0, ch_mux);
+               edma_writel(fsl_chan->edma, 0, fsl_chan->mux_addr);
 
        val &= ~EDMA_V3_CH_CSR_ERQ;
        edma_writel_chreg(fsl_chan, val, ch_csr);
@@ -351,39 +351,45 @@ static size_t fsl_edma_desc_residue(struct fsl_edma_chan *fsl_chan,
 {
        struct fsl_edma_desc *edesc = fsl_chan->edesc;
        enum dma_transfer_direction dir = edesc->dirn;
-       dma_addr_t cur_addr, dma_addr;
+       dma_addr_t cur_addr, dma_addr, old_addr;
        size_t len, size;
        u32 nbytes = 0;
        int i;
 
        /* calculate the total size in this desc */
        for (len = i = 0; i < fsl_chan->edesc->n_tcds; i++) {
-               nbytes = le32_to_cpu(edesc->tcd[i].vtcd->nbytes);
+               nbytes = fsl_edma_get_tcd_to_cpu(fsl_chan, edesc->tcd[i].vtcd, nbytes);
                if (nbytes & (EDMA_V3_TCD_NBYTES_DMLOE | EDMA_V3_TCD_NBYTES_SMLOE))
                        nbytes = EDMA_V3_TCD_NBYTES_MLOFF_NBYTES(nbytes);
-               len += nbytes * le16_to_cpu(edesc->tcd[i].vtcd->biter);
+               len += nbytes * fsl_edma_get_tcd_to_cpu(fsl_chan, edesc->tcd[i].vtcd, biter);
        }
 
        if (!in_progress)
                return len;
 
-       if (dir == DMA_MEM_TO_DEV)
-               cur_addr = edma_read_tcdreg(fsl_chan, saddr);
-       else
-               cur_addr = edma_read_tcdreg(fsl_chan, daddr);
+       /* 64bit read is not atomic, need read retry when high 32bit changed */
+       do {
+               if (dir == DMA_MEM_TO_DEV) {
+                       old_addr = edma_read_tcdreg(fsl_chan, saddr);
+                       cur_addr = edma_read_tcdreg(fsl_chan, saddr);
+               } else {
+                       old_addr = edma_read_tcdreg(fsl_chan, daddr);
+                       cur_addr = edma_read_tcdreg(fsl_chan, daddr);
+               }
+       } while (upper_32_bits(cur_addr) != upper_32_bits(old_addr));
 
        /* figure out the finished and calculate the residue */
        for (i = 0; i < fsl_chan->edesc->n_tcds; i++) {
-               nbytes = le32_to_cpu(edesc->tcd[i].vtcd->nbytes);
+               nbytes = fsl_edma_get_tcd_to_cpu(fsl_chan, edesc->tcd[i].vtcd, nbytes);
                if (nbytes & (EDMA_V3_TCD_NBYTES_DMLOE | EDMA_V3_TCD_NBYTES_SMLOE))
                        nbytes = EDMA_V3_TCD_NBYTES_MLOFF_NBYTES(nbytes);
 
-               size = nbytes * le16_to_cpu(edesc->tcd[i].vtcd->biter);
+               size = nbytes * fsl_edma_get_tcd_to_cpu(fsl_chan, edesc->tcd[i].vtcd, biter);
 
                if (dir == DMA_MEM_TO_DEV)
-                       dma_addr = le32_to_cpu(edesc->tcd[i].vtcd->saddr);
+                       dma_addr = fsl_edma_get_tcd_to_cpu(fsl_chan, edesc->tcd[i].vtcd, saddr);
                else
-                       dma_addr = le32_to_cpu(edesc->tcd[i].vtcd->daddr);
+                       dma_addr = fsl_edma_get_tcd_to_cpu(fsl_chan, edesc->tcd[i].vtcd, daddr);
 
                len -= size;
                if (cur_addr >= dma_addr && cur_addr < dma_addr + size) {
@@ -426,8 +432,7 @@ enum dma_status fsl_edma_tx_status(struct dma_chan *chan,
        return fsl_chan->status;
 }
 
-static void fsl_edma_set_tcd_regs(struct fsl_edma_chan *fsl_chan,
-                                 struct fsl_edma_hw_tcd *tcd)
+static void fsl_edma_set_tcd_regs(struct fsl_edma_chan *fsl_chan, void *tcd)
 {
        u16 csr = 0;
 
@@ -439,26 +444,26 @@ static void fsl_edma_set_tcd_regs(struct fsl_edma_chan *fsl_chan,
         */
        edma_write_tcdreg(fsl_chan, 0, csr);
 
-       edma_write_tcdreg(fsl_chan, tcd->saddr, saddr);
-       edma_write_tcdreg(fsl_chan, tcd->daddr, daddr);
+       edma_cp_tcd_to_reg(fsl_chan, tcd, saddr);
+       edma_cp_tcd_to_reg(fsl_chan, tcd, daddr);
 
-       edma_write_tcdreg(fsl_chan, tcd->attr, attr);
-       edma_write_tcdreg(fsl_chan, tcd->soff, soff);
+       edma_cp_tcd_to_reg(fsl_chan, tcd, attr);
+       edma_cp_tcd_to_reg(fsl_chan, tcd, soff);
 
-       edma_write_tcdreg(fsl_chan, tcd->nbytes, nbytes);
-       edma_write_tcdreg(fsl_chan, tcd->slast, slast);
+       edma_cp_tcd_to_reg(fsl_chan, tcd, nbytes);
+       edma_cp_tcd_to_reg(fsl_chan, tcd, slast);
 
-       edma_write_tcdreg(fsl_chan, tcd->citer, citer);
-       edma_write_tcdreg(fsl_chan, tcd->biter, biter);
-       edma_write_tcdreg(fsl_chan, tcd->doff, doff);
+       edma_cp_tcd_to_reg(fsl_chan, tcd, citer);
+       edma_cp_tcd_to_reg(fsl_chan, tcd, biter);
+       edma_cp_tcd_to_reg(fsl_chan, tcd, doff);
 
-       edma_write_tcdreg(fsl_chan, tcd->dlast_sga, dlast_sga);
+       edma_cp_tcd_to_reg(fsl_chan, tcd, dlast_sga);
 
-       csr = le16_to_cpu(tcd->csr);
+       csr = fsl_edma_get_tcd_to_cpu(fsl_chan, tcd, csr);
 
        if (fsl_chan->is_sw) {
                csr |= EDMA_TCD_CSR_START;
-               tcd->csr = cpu_to_le16(csr);
+               fsl_edma_set_tcd_to_le(fsl_chan, tcd, csr, csr);
        }
 
        /*
@@ -473,14 +478,14 @@ static void fsl_edma_set_tcd_regs(struct fsl_edma_chan *fsl_chan,
                edma_writel_chreg(fsl_chan, edma_readl_chreg(fsl_chan, ch_csr), ch_csr);
 
 
-       edma_write_tcdreg(fsl_chan, tcd->csr, csr);
+       edma_cp_tcd_to_reg(fsl_chan, tcd, csr);
 }
 
 static inline
 void fsl_edma_fill_tcd(struct fsl_edma_chan *fsl_chan,
-                      struct fsl_edma_hw_tcd *tcd, u32 src, u32 dst,
-                      u16 attr, u16 soff, u32 nbytes, u32 slast, u16 citer,
-                      u16 biter, u16 doff, u32 dlast_sga, bool major_int,
+                      struct fsl_edma_hw_tcd *tcd, dma_addr_t src, dma_addr_t dst,
+                      u16 attr, u16 soff, u32 nbytes, dma_addr_t slast, u16 citer,
+                      u16 biter, u16 doff, dma_addr_t dlast_sga, bool major_int,
                       bool disable_req, bool enable_sg)
 {
        struct dma_slave_config *cfg = &fsl_chan->cfg;
@@ -493,12 +498,12 @@ void fsl_edma_fill_tcd(struct fsl_edma_chan *fsl_chan,
         * So we put the value in little endian in memory, waiting
         * for fsl_edma_set_tcd_regs doing the swap.
         */
-       tcd->saddr = cpu_to_le32(src);
-       tcd->daddr = cpu_to_le32(dst);
+       fsl_edma_set_tcd_to_le(fsl_chan, tcd, src, saddr);
+       fsl_edma_set_tcd_to_le(fsl_chan, tcd, dst, daddr);
 
-       tcd->attr = cpu_to_le16(attr);
+       fsl_edma_set_tcd_to_le(fsl_chan, tcd, attr, attr);
 
-       tcd->soff = cpu_to_le16(soff);
+       fsl_edma_set_tcd_to_le(fsl_chan, tcd, soff, soff);
 
        if (fsl_chan->is_multi_fifo) {
                /* set mloff to support multiple fifo */
@@ -515,15 +520,16 @@ void fsl_edma_fill_tcd(struct fsl_edma_chan *fsl_chan,
                }
        }
 
-       tcd->nbytes = cpu_to_le32(nbytes);
-       tcd->slast = cpu_to_le32(slast);
+       fsl_edma_set_tcd_to_le(fsl_chan, tcd, nbytes, nbytes);
+       fsl_edma_set_tcd_to_le(fsl_chan, tcd, slast, slast);
+
+       fsl_edma_set_tcd_to_le(fsl_chan, tcd, EDMA_TCD_CITER_CITER(citer), citer);
+       fsl_edma_set_tcd_to_le(fsl_chan, tcd, doff, doff);
 
-       tcd->citer = cpu_to_le16(EDMA_TCD_CITER_CITER(citer));
-       tcd->doff = cpu_to_le16(doff);
+       fsl_edma_set_tcd_to_le(fsl_chan, tcd, dlast_sga, dlast_sga);
 
-       tcd->dlast_sga = cpu_to_le32(dlast_sga);
+       fsl_edma_set_tcd_to_le(fsl_chan, tcd, EDMA_TCD_BITER_BITER(biter), biter);
 
-       tcd->biter = cpu_to_le16(EDMA_TCD_BITER_BITER(biter));
        if (major_int)
                csr |= EDMA_TCD_CSR_INT_MAJOR;
 
@@ -539,7 +545,7 @@ void fsl_edma_fill_tcd(struct fsl_edma_chan *fsl_chan,
        if (fsl_chan->is_sw)
                csr |= EDMA_TCD_CSR_START;
 
-       tcd->csr = cpu_to_le16(csr);
+       fsl_edma_set_tcd_to_le(fsl_chan, tcd, csr, csr);
 }
 
 static struct fsl_edma_desc *fsl_edma_alloc_desc(struct fsl_edma_chan *fsl_chan,
@@ -580,8 +586,9 @@ struct dma_async_tx_descriptor *fsl_edma_prep_dma_cyclic(
        dma_addr_t dma_buf_next;
        bool major_int = true;
        int sg_len, i;
-       u32 src_addr, dst_addr, last_sg, nbytes;
+       dma_addr_t src_addr, dst_addr, last_sg;
        u16 soff, doff, iter;
+       u32 nbytes;
 
        if (!is_slave_direction(direction))
                return NULL;
@@ -653,8 +660,9 @@ struct dma_async_tx_descriptor *fsl_edma_prep_slave_sg(
        struct fsl_edma_chan *fsl_chan = to_fsl_edma_chan(chan);
        struct fsl_edma_desc *fsl_desc;
        struct scatterlist *sg;
-       u32 src_addr, dst_addr, last_sg, nbytes;
+       dma_addr_t src_addr, dst_addr, last_sg;
        u16 soff, doff, iter;
+       u32 nbytes;
        int i;
 
        if (!is_slave_direction(direction))
@@ -803,7 +811,8 @@ int fsl_edma_alloc_chan_resources(struct dma_chan *chan)
        struct fsl_edma_chan *fsl_chan = to_fsl_edma_chan(chan);
 
        fsl_chan->tcd_pool = dma_pool_create("tcd_pool", chan->device->dev,
-                               sizeof(struct fsl_edma_hw_tcd),
+                               fsl_edma_drvflags(fsl_chan) & FSL_EDMA_DRV_TCD64 ?
+                               sizeof(struct fsl_edma_hw_tcd64) : sizeof(struct fsl_edma_hw_tcd),
                                32, 0);
        return 0;
 }
index f5e216b157c75ff2215d7c74cd1d9febad47031c..7bf0aba471a8c719fc3a2d43d2fd3b5473def08a 100644 (file)
@@ -88,6 +88,20 @@ struct fsl_edma_hw_tcd {
        __le16  biter;
 };
 
+struct fsl_edma_hw_tcd64 {
+       __le64  saddr;
+       __le16  soff;
+       __le16  attr;
+       __le32  nbytes;
+       __le64  slast;
+       __le64  daddr;
+       __le64  dlast_sga;
+       __le16  doff;
+       __le16  citer;
+       __le16  csr;
+       __le16  biter;
+} __packed;
+
 struct fsl_edma3_ch_reg {
        __le32  ch_csr;
        __le32  ch_es;
@@ -97,7 +111,10 @@ struct fsl_edma3_ch_reg {
        __le32  ch_mux;
        __le32  ch_mattr; /* edma4, reserved for edma3 */
        __le32  ch_reserved;
-       struct fsl_edma_hw_tcd tcd;
+       union {
+               struct fsl_edma_hw_tcd tcd;
+               struct fsl_edma_hw_tcd64 tcd64;
+       };
 } __packed;
 
 /*
@@ -126,7 +143,7 @@ struct edma_regs {
 
 struct fsl_edma_sw_tcd {
        dma_addr_t                      ptcd;
-       struct fsl_edma_hw_tcd          *vtcd;
+       void                            *vtcd;
 };
 
 struct fsl_edma_chan {
@@ -145,7 +162,8 @@ struct fsl_edma_chan {
        u32                             dma_dev_size;
        enum dma_data_direction         dma_dir;
        char                            chan_name[32];
-       struct fsl_edma_hw_tcd __iomem *tcd;
+       void __iomem                    *tcd;
+       void __iomem                    *mux_addr;
        u32                             real_count;
        struct work_struct              issue_worker;
        struct platform_device          *pdev;
@@ -188,6 +206,7 @@ struct fsl_edma_desc {
 #define FSL_EDMA_DRV_CLEAR_DONE_E_SG   BIT(13)
 /* Need clean CHn_CSR DONE before enable TCD's MAJORELINK */
 #define FSL_EDMA_DRV_CLEAR_DONE_E_LINK BIT(14)
+#define FSL_EDMA_DRV_TCD64             BIT(15)
 
 #define FSL_EDMA_DRV_EDMA3     (FSL_EDMA_DRV_SPLIT_REG |       \
                                 FSL_EDMA_DRV_BUS_8BYTE |       \
@@ -207,6 +226,8 @@ struct fsl_edma_drvdata {
        u32                     chreg_off;
        u32                     chreg_space_sz;
        u32                     flags;
+       u32                     mux_off;        /* channel mux register offset */
+       u32                     mux_skip;       /* how much skip for each channel */
        int                     (*setup_irq)(struct platform_device *pdev,
                                             struct fsl_edma_engine *fsl_edma);
 };
@@ -229,23 +250,108 @@ struct fsl_edma_engine {
        struct fsl_edma_chan    chans[] __counted_by(n_chans);
 };
 
-#define edma_read_tcdreg(chan, __name)                         \
-(sizeof(chan->tcd->__name) == sizeof(u32) ?                    \
-       edma_readl(chan->edma, &chan->tcd->__name) :            \
-       edma_readw(chan->edma, &chan->tcd->__name))
-
-#define edma_write_tcdreg(chan, val, __name)                   \
-(sizeof(chan->tcd->__name) == sizeof(u32) ?                    \
-       edma_writel(chan->edma, (u32 __force)val, &chan->tcd->__name) : \
-       edma_writew(chan->edma, (u16 __force)val, &chan->tcd->__name))
+#define edma_read_tcdreg_c(chan, _tcd,  __name)                                \
+(sizeof((_tcd)->__name) == sizeof(u64) ?                               \
+       edma_readq(chan->edma, &(_tcd)->__name) :                       \
+               ((sizeof((_tcd)->__name) == sizeof(u32)) ?              \
+                       edma_readl(chan->edma, &(_tcd)->__name) :       \
+                       edma_readw(chan->edma, &(_tcd)->__name)         \
+               ))
+
+#define edma_read_tcdreg(chan, __name)                                                         \
+((fsl_edma_drvflags(chan) & FSL_EDMA_DRV_TCD64) ?                                              \
+       edma_read_tcdreg_c(chan, ((struct fsl_edma_hw_tcd64 __iomem *)chan->tcd), __name) :     \
+       edma_read_tcdreg_c(chan, ((struct fsl_edma_hw_tcd __iomem *)chan->tcd), __name)         \
+)
+
+#define edma_write_tcdreg_c(chan, _tcd, _val, __name)                          \
+do {                                                                           \
+       switch (sizeof(_tcd->__name)) {                                         \
+       case sizeof(u64):                                                       \
+               edma_writeq(chan->edma, (u64 __force)_val, &_tcd->__name);      \
+               break;                                                          \
+       case sizeof(u32):                                                       \
+               edma_writel(chan->edma, (u32 __force)_val, &_tcd->__name);      \
+               break;                                                          \
+       case sizeof(u16):                                                       \
+               edma_writew(chan->edma, (u16 __force)_val, &_tcd->__name);      \
+               break;                                                          \
+       case sizeof(u8):                                                        \
+               edma_writeb(chan->edma, (u8 __force)_val, &_tcd->__name);       \
+               break;                                                          \
+       }                                                                       \
+} while (0)
+
+#define edma_write_tcdreg(chan, val, __name)                                                      \
+do {                                                                                              \
+       struct fsl_edma_hw_tcd64 __iomem *tcd64_r = (struct fsl_edma_hw_tcd64 __iomem *)chan->tcd; \
+       struct fsl_edma_hw_tcd __iomem *tcd_r = (struct fsl_edma_hw_tcd __iomem *)chan->tcd;       \
+                                                                                                  \
+       if (fsl_edma_drvflags(chan) & FSL_EDMA_DRV_TCD64)                                          \
+               edma_write_tcdreg_c(chan, tcd64_r, val, __name);                                   \
+       else                                                                                       \
+               edma_write_tcdreg_c(chan, tcd_r, val, __name);                                     \
+} while (0)
+
+#define edma_cp_tcd_to_reg(chan, __tcd, __name)                                                           \
+do {   \
+       struct fsl_edma_hw_tcd64 __iomem *tcd64_r = (struct fsl_edma_hw_tcd64 __iomem *)chan->tcd; \
+       struct fsl_edma_hw_tcd __iomem *tcd_r = (struct fsl_edma_hw_tcd __iomem *)chan->tcd;       \
+       struct fsl_edma_hw_tcd64 *tcd64_m = (struct fsl_edma_hw_tcd64 *)__tcd;                     \
+       struct fsl_edma_hw_tcd *tcd_m = (struct fsl_edma_hw_tcd *)__tcd;                           \
+                                                                                                  \
+       if (fsl_edma_drvflags(chan) & FSL_EDMA_DRV_TCD64)                                          \
+               edma_write_tcdreg_c(chan, tcd64_r,  tcd64_m->__name, __name);                      \
+       else                                                                                       \
+               edma_write_tcdreg_c(chan, tcd_r, tcd_m->__name, __name);                           \
+} while (0)
 
 #define edma_readl_chreg(chan, __name)                         \
        edma_readl(chan->edma,                                  \
-                  (void __iomem *)&(container_of(chan->tcd, struct fsl_edma3_ch_reg, tcd)->__name))
+                  (void __iomem *)&(container_of(((__force void *)chan->tcd),\
+                                                 struct fsl_edma3_ch_reg, tcd)->__name))
 
 #define edma_writel_chreg(chan, val,  __name)                  \
        edma_writel(chan->edma, val,                            \
-                  (void __iomem *)&(container_of(chan->tcd, struct fsl_edma3_ch_reg, tcd)->__name))
+                  (void __iomem *)&(container_of(((__force void *)chan->tcd),\
+                                                 struct fsl_edma3_ch_reg, tcd)->__name))
+
+#define fsl_edma_get_tcd(_chan, _tcd, _field)                  \
+(fsl_edma_drvflags(_chan) & FSL_EDMA_DRV_TCD64 ? (((struct fsl_edma_hw_tcd64 *)_tcd)->_field) : \
+                                                (((struct fsl_edma_hw_tcd *)_tcd)->_field))
+
+#define fsl_edma_le_to_cpu(x)                                          \
+(sizeof(x) == sizeof(u64) ? le64_to_cpu((__force __le64)(x)) :         \
+       (sizeof(x) == sizeof(u32) ? le32_to_cpu((__force __le32)(x)) :  \
+                                   le16_to_cpu((__force __le16)(x))))
+
+#define fsl_edma_get_tcd_to_cpu(_chan, _tcd, _field)                           \
+(fsl_edma_drvflags(_chan) & FSL_EDMA_DRV_TCD64 ?                               \
+       fsl_edma_le_to_cpu(((struct fsl_edma_hw_tcd64 *)_tcd)->_field) :        \
+       fsl_edma_le_to_cpu(((struct fsl_edma_hw_tcd *)_tcd)->_field))
+
+#define fsl_edma_set_tcd_to_le_c(_tcd, _val, _field)                                   \
+do {                                                                                   \
+       switch (sizeof((_tcd)->_field)) {                                               \
+       case sizeof(u64):                                                               \
+               *(__force __le64 *)(&((_tcd)->_field)) = cpu_to_le64(_val);             \
+               break;                                                                  \
+       case sizeof(u32):                                                               \
+               *(__force __le32 *)(&((_tcd)->_field)) = cpu_to_le32(_val);             \
+               break;                                                                  \
+       case sizeof(u16):                                                               \
+               *(__force __le16 *)(&((_tcd)->_field)) = cpu_to_le16(_val);             \
+               break;                                                                  \
+       }                                                                               \
+} while (0)
+
+#define fsl_edma_set_tcd_to_le(_chan, _tcd, _val, _field)      \
+do {                                                           \
+       if (fsl_edma_drvflags(_chan) & FSL_EDMA_DRV_TCD64)      \
+               fsl_edma_set_tcd_to_le_c((struct fsl_edma_hw_tcd64 *)_tcd, _val, _field);       \
+       else                                                                                    \
+               fsl_edma_set_tcd_to_le_c((struct fsl_edma_hw_tcd *)_tcd, _val, _field);         \
+} while (0)
 
 /*
  * R/W functions for big- or little-endian registers:
@@ -253,6 +359,21 @@ struct fsl_edma_engine {
  * For the big-endian IP module, the offset for 8-bit or 16-bit registers
  * should also be swapped opposite to that in little-endian IP.
  */
+static inline u64 edma_readq(struct fsl_edma_engine *edma, void __iomem *addr)
+{
+       u64 l, h;
+
+       if (edma->big_endian) {
+               l = ioread32be(addr);
+               h = ioread32be(addr + 4);
+       } else {
+               l = ioread32(addr);
+               h = ioread32(addr + 4);
+       }
+
+       return (h << 32) | l;
+}
+
 static inline u32 edma_readl(struct fsl_edma_engine *edma, void __iomem *addr)
 {
        if (edma->big_endian)
@@ -298,6 +419,18 @@ static inline void edma_writel(struct fsl_edma_engine *edma,
                iowrite32(val, addr);
 }
 
+static inline void edma_writeq(struct fsl_edma_engine *edma,
+                              u64 val, void __iomem *addr)
+{
+       if (edma->big_endian) {
+               iowrite32be(val & 0xFFFFFFFF, addr);
+               iowrite32be(val >> 32, addr + 4);
+       } else {
+               iowrite32(val & 0xFFFFFFFF, addr);
+               iowrite32(val >> 32, addr + 4);
+       }
+}
+
 static inline struct fsl_edma_chan *to_fsl_edma_chan(struct dma_chan *chan)
 {
        return container_of(chan, struct fsl_edma_chan, vchan.chan);
index d36e28b9c767ae7ebb44bc9e87de7bbc0363f926..402f0058a180c8042ff35eea5f556c9e5d648df6 100644 (file)
@@ -360,6 +360,18 @@ static struct fsl_edma_drvdata imx93_data4 = {
        .flags = FSL_EDMA_DRV_HAS_CHMUX | FSL_EDMA_DRV_HAS_DMACLK | FSL_EDMA_DRV_EDMA4,
        .chreg_space_sz = 0x8000,
        .chreg_off = 0x10000,
+       .mux_off = 0x10000 + offsetof(struct fsl_edma3_ch_reg, ch_mux),
+       .mux_skip = 0x8000,
+       .setup_irq = fsl_edma3_irq_init,
+};
+
+static struct fsl_edma_drvdata imx95_data5 = {
+       .flags = FSL_EDMA_DRV_HAS_CHMUX | FSL_EDMA_DRV_HAS_DMACLK | FSL_EDMA_DRV_EDMA4 |
+                FSL_EDMA_DRV_TCD64,
+       .chreg_space_sz = 0x8000,
+       .chreg_off = 0x10000,
+       .mux_off = 0x200,
+       .mux_skip = sizeof(u32),
        .setup_irq = fsl_edma3_irq_init,
 };
 
@@ -371,6 +383,7 @@ static const struct of_device_id fsl_edma_dt_ids[] = {
        { .compatible = "fsl,imx8qm-adma", .data = &imx8qm_audio_data},
        { .compatible = "fsl,imx93-edma3", .data = &imx93_data3},
        { .compatible = "fsl,imx93-edma4", .data = &imx93_data4},
+       { .compatible = "fsl,imx95-edma5", .data = &imx95_data5},
        { /* sentinel */ }
 };
 MODULE_DEVICE_TABLE(of, fsl_edma_dt_ids);
@@ -511,6 +524,9 @@ static int fsl_edma_probe(struct platform_device *pdev)
                        return ret;
        }
 
+       if (drvdata->flags & FSL_EDMA_DRV_TCD64)
+               dma_set_mask_and_coherent(&pdev->dev, DMA_BIT_MASK(64));
+
        INIT_LIST_HEAD(&fsl_edma->dma_dev.channels);
        for (i = 0; i < fsl_edma->n_chans; i++) {
                struct fsl_edma_chan *fsl_chan = &fsl_edma->chans[i];
@@ -533,11 +549,12 @@ static int fsl_edma_probe(struct platform_device *pdev)
                                offsetof(struct fsl_edma3_ch_reg, tcd) : 0;
                fsl_chan->tcd = fsl_edma->membase
                                + i * drvdata->chreg_space_sz + drvdata->chreg_off + len;
+               fsl_chan->mux_addr = fsl_edma->membase + drvdata->mux_off + i * drvdata->mux_skip;
 
                fsl_chan->pdev = pdev;
                vchan_init(&fsl_chan->vchan, &fsl_edma->dma_dev);
 
-               edma_write_tcdreg(fsl_chan, 0, csr);
+               edma_write_tcdreg(fsl_chan, cpu_to_le32(0), csr);
                fsl_edma_chan_mux(fsl_chan, 0, false);
        }
 
index 0c9e689a2e77dbdfdc3cd807869ec5bef957de5d..b83b27e04f2a410e6ce14db8d398df17c84a1c02 100644 (file)
@@ -72,7 +72,7 @@ static int idxd_bus_uevent(const struct device *dev, struct kobj_uevent_env *env
        return add_uevent_var(env, "MODALIAS=" IDXD_DEVICES_MODALIAS_FMT, 0);
 }
 
-struct bus_type dsa_bus_type = {
+const struct bus_type dsa_bus_type = {
        .name = "dsa",
        .match = idxd_config_bus_match,
        .probe = idxd_config_bus_probe,
index e5a94a93a3cc4e6da66aca64cc2174b20d80a7bb..8078ab9acfbc37da0c5f633b0af9bb8529a559fe 100644 (file)
@@ -152,7 +152,7 @@ static void idxd_file_dev_release(struct device *dev)
        mutex_unlock(&wq->wq_lock);
 }
 
-static struct device_type idxd_cdev_file_type = {
+static const struct device_type idxd_cdev_file_type = {
        .name = "idxd_file",
        .release = idxd_file_dev_release,
        .groups = cdev_file_attribute_groups,
@@ -169,7 +169,7 @@ static void idxd_cdev_dev_release(struct device *dev)
        kfree(idxd_cdev);
 }
 
-static struct device_type idxd_cdev_device_type = {
+static const struct device_type idxd_cdev_device_type = {
        .name = "idxd_cdev",
        .release = idxd_cdev_dev_release,
 };
index d0f5db6cf1eda103db09c31449cf3a58d58b7971..a4099a1e2340fde5271e1dc29e9a5b5c224b8641 100644 (file)
@@ -282,7 +282,7 @@ typedef int (*load_device_defaults_fn_t) (struct idxd_device *idxd);
 struct idxd_driver_data {
        const char *name_prefix;
        enum idxd_type type;
-       struct device_type *dev_type;
+       const struct device_type *dev_type;
        int compl_size;
        int align;
        int evl_cr_off;
@@ -515,15 +515,15 @@ static inline void idxd_set_user_intr(struct idxd_device *idxd, bool enable)
        iowrite32(reg.bits, idxd->reg_base + IDXD_GENCFG_OFFSET);
 }
 
-extern struct bus_type dsa_bus_type;
+extern const struct bus_type dsa_bus_type;
 
 extern bool support_enqcmd;
 extern struct ida idxd_ida;
-extern struct device_type dsa_device_type;
-extern struct device_type iax_device_type;
-extern struct device_type idxd_wq_device_type;
-extern struct device_type idxd_engine_device_type;
-extern struct device_type idxd_group_device_type;
+extern const struct device_type dsa_device_type;
+extern const struct device_type iax_device_type;
+extern const struct device_type idxd_wq_device_type;
+extern const struct device_type idxd_engine_device_type;
+extern const struct device_type idxd_group_device_type;
 
 static inline bool is_dsa_dev(struct idxd_dev *idxd_dev)
 {
index 523ae0dff7d4aaafe80998b65de1eddecc4fcd76..7f28f01be672b66c9d470f64bdc3476c44c11e3b 100644 (file)
@@ -91,7 +91,7 @@ static void idxd_conf_engine_release(struct device *dev)
        kfree(engine);
 }
 
-struct device_type idxd_engine_device_type = {
+const struct device_type idxd_engine_device_type = {
        .name = "engine",
        .release = idxd_conf_engine_release,
        .groups = idxd_engine_attribute_groups,
@@ -577,7 +577,7 @@ static void idxd_conf_group_release(struct device *dev)
        kfree(group);
 }
 
-struct device_type idxd_group_device_type = {
+const struct device_type idxd_group_device_type = {
        .name = "group",
        .release = idxd_conf_group_release,
        .groups = idxd_group_attribute_groups,
@@ -1369,7 +1369,7 @@ static void idxd_conf_wq_release(struct device *dev)
        kfree(wq);
 }
 
-struct device_type idxd_wq_device_type = {
+const struct device_type idxd_wq_device_type = {
        .name = "wq",
        .release = idxd_conf_wq_release,
        .groups = idxd_wq_attribute_groups,
@@ -1798,13 +1798,13 @@ static void idxd_conf_device_release(struct device *dev)
        kfree(idxd);
 }
 
-struct device_type dsa_device_type = {
+const struct device_type dsa_device_type = {
        .name = "dsa",
        .release = idxd_conf_device_release,
        .groups = idxd_attribute_groups,
 };
 
-struct device_type iax_device_type = {
+const struct device_type iax_device_type = {
        .name = "iax",
        .release = idxd_conf_device_release,
        .groups = idxd_attribute_groups,
index ab21455d9c3a48a865c2338e30eeb6e5ec1f7875..dba63178387678f4a27b05f644bb5d4c4ac402a4 100644 (file)
@@ -202,7 +202,7 @@ static int mcf_edma_probe(struct platform_device *pdev)
                vchan_init(&mcf_chan->vchan, &mcf_edma->dma_dev);
                mcf_chan->tcd = mcf_edma->membase + EDMA_TCD
                                + i * sizeof(struct fsl_edma_hw_tcd);
-               iowrite32(0x0, &mcf_chan->tcd->csr);
+               edma_write_tcdreg(mcf_chan, cpu_to_le32(0), csr);
        }
 
        iowrite32(~0, regs->inth);
index 775a7f408b9a0a292fc2006cef4a80ef575bdd6a..e588fff9f21d274a2bdc6ea24985c27ab8bdde71 100644 (file)
@@ -29,7 +29,7 @@ static DEFINE_MUTEX(of_dma_lock);
  * to the DMA data stored is retuned. A NULL pointer is returned if no match is
  * found.
  */
-static struct of_dma *of_dma_find_controller(struct of_phandle_args *dma_spec)
+static struct of_dma *of_dma_find_controller(const struct of_phandle_args *dma_spec)
 {
        struct of_dma *ofdma;
 
index c29744bfdf2c2afc4ae6a7b6fdcd963a19db2977..5f6d7f1e095f906ec22b4a4b6f67cc7309242994 100644 (file)
@@ -2588,6 +2588,7 @@ static struct dma_pl330_desc *pluck_desc(struct list_head *pool,
 
                desc->status = PREP;
                desc->txd.callback = NULL;
+               desc->txd.callback_result = NULL;
        }
 
        spin_unlock_irqrestore(lock, flags);
index 1d5430fc5724d26640e56b9fe51ee44134349521..ba08bdcdcd2b64cdb6d38cbfa2f64a0b5d8c9e61 100644 (file)
                },                                      \
        }
 
+#define PSIL_CSI2RX(x)                                 \
+       {                                               \
+               .thread_id = x,                         \
+               .ep_config = {                          \
+                       .ep_type = PSIL_EP_NATIVE,      \
+               },                                      \
+       }
+
 /* PSI-L source thread IDs, used for RX (DMA_DEV_TO_MEM) */
 static struct psil_ep j721s2_src_ep_map[] = {
        /* PDMA_MCASP - McASP0-4 */
@@ -114,6 +122,71 @@ static struct psil_ep j721s2_src_ep_map[] = {
        PSIL_PDMA_XY_PKT(0x4707),
        PSIL_PDMA_XY_PKT(0x4708),
        PSIL_PDMA_XY_PKT(0x4709),
+       /* CSI2RX */
+       PSIL_CSI2RX(0x4940),
+       PSIL_CSI2RX(0x4941),
+       PSIL_CSI2RX(0x4942),
+       PSIL_CSI2RX(0x4943),
+       PSIL_CSI2RX(0x4944),
+       PSIL_CSI2RX(0x4945),
+       PSIL_CSI2RX(0x4946),
+       PSIL_CSI2RX(0x4947),
+       PSIL_CSI2RX(0x4948),
+       PSIL_CSI2RX(0x4949),
+       PSIL_CSI2RX(0x494a),
+       PSIL_CSI2RX(0x494b),
+       PSIL_CSI2RX(0x494c),
+       PSIL_CSI2RX(0x494d),
+       PSIL_CSI2RX(0x494e),
+       PSIL_CSI2RX(0x494f),
+       PSIL_CSI2RX(0x4950),
+       PSIL_CSI2RX(0x4951),
+       PSIL_CSI2RX(0x4952),
+       PSIL_CSI2RX(0x4953),
+       PSIL_CSI2RX(0x4954),
+       PSIL_CSI2RX(0x4955),
+       PSIL_CSI2RX(0x4956),
+       PSIL_CSI2RX(0x4957),
+       PSIL_CSI2RX(0x4958),
+       PSIL_CSI2RX(0x4959),
+       PSIL_CSI2RX(0x495a),
+       PSIL_CSI2RX(0x495b),
+       PSIL_CSI2RX(0x495c),
+       PSIL_CSI2RX(0x495d),
+       PSIL_CSI2RX(0x495e),
+       PSIL_CSI2RX(0x495f),
+       PSIL_CSI2RX(0x4960),
+       PSIL_CSI2RX(0x4961),
+       PSIL_CSI2RX(0x4962),
+       PSIL_CSI2RX(0x4963),
+       PSIL_CSI2RX(0x4964),
+       PSIL_CSI2RX(0x4965),
+       PSIL_CSI2RX(0x4966),
+       PSIL_CSI2RX(0x4967),
+       PSIL_CSI2RX(0x4968),
+       PSIL_CSI2RX(0x4969),
+       PSIL_CSI2RX(0x496a),
+       PSIL_CSI2RX(0x496b),
+       PSIL_CSI2RX(0x496c),
+       PSIL_CSI2RX(0x496d),
+       PSIL_CSI2RX(0x496e),
+       PSIL_CSI2RX(0x496f),
+       PSIL_CSI2RX(0x4970),
+       PSIL_CSI2RX(0x4971),
+       PSIL_CSI2RX(0x4972),
+       PSIL_CSI2RX(0x4973),
+       PSIL_CSI2RX(0x4974),
+       PSIL_CSI2RX(0x4975),
+       PSIL_CSI2RX(0x4976),
+       PSIL_CSI2RX(0x4977),
+       PSIL_CSI2RX(0x4978),
+       PSIL_CSI2RX(0x4979),
+       PSIL_CSI2RX(0x497a),
+       PSIL_CSI2RX(0x497b),
+       PSIL_CSI2RX(0x497c),
+       PSIL_CSI2RX(0x497d),
+       PSIL_CSI2RX(0x497e),
+       PSIL_CSI2RX(0x497f),
        /* MAIN SA2UL */
        PSIL_SA2UL(0x4a40, 0),
        PSIL_SA2UL(0x4a41, 0),
index c278d5facf7d8bc6696e32d7f7ebc38c58b67afc..c9b93055dc9d3d7a003474b815317a9587401d47 100644 (file)
@@ -111,6 +111,35 @@ static int of_k3_udma_glue_parse(struct device_node *udmax_np,
        return 0;
 }
 
+static int of_k3_udma_glue_parse_chn_common(struct k3_udma_glue_common *common, u32 thread_id,
+                                           bool tx_chn)
+{
+       if (tx_chn && !(thread_id & K3_PSIL_DST_THREAD_ID_OFFSET))
+               return -EINVAL;
+
+       if (!tx_chn && (thread_id & K3_PSIL_DST_THREAD_ID_OFFSET))
+               return -EINVAL;
+
+       /* get psil endpoint config */
+       common->ep_config = psil_get_ep_config(thread_id);
+       if (IS_ERR(common->ep_config)) {
+               dev_err(common->dev,
+                       "No configuration for psi-l thread 0x%04x\n",
+                       thread_id);
+               return PTR_ERR(common->ep_config);
+       }
+
+       common->epib = common->ep_config->needs_epib;
+       common->psdata_size = common->ep_config->psd_size;
+
+       if (tx_chn)
+               common->dst_thread = thread_id;
+       else
+               common->src_thread = thread_id;
+
+       return 0;
+}
+
 static int of_k3_udma_glue_parse_chn(struct device_node *chn_np,
                const char *name, struct k3_udma_glue_common *common,
                bool tx_chn)
@@ -153,38 +182,32 @@ static int of_k3_udma_glue_parse_chn(struct device_node *chn_np,
                common->atype_asel = dma_spec.args[1];
        }
 
-       if (tx_chn && !(thread_id & K3_PSIL_DST_THREAD_ID_OFFSET)) {
-               ret = -EINVAL;
-               goto out_put_spec;
-       }
+       ret = of_k3_udma_glue_parse_chn_common(common, thread_id, tx_chn);
 
-       if (!tx_chn && (thread_id & K3_PSIL_DST_THREAD_ID_OFFSET)) {
-               ret = -EINVAL;
-               goto out_put_spec;
-       }
+out_put_spec:
+       of_node_put(dma_spec.np);
+       return ret;
+}
 
-       /* get psil endpoint config */
-       common->ep_config = psil_get_ep_config(thread_id);
-       if (IS_ERR(common->ep_config)) {
-               dev_err(common->dev,
-                       "No configuration for psi-l thread 0x%04x\n",
-                       thread_id);
-               ret = PTR_ERR(common->ep_config);
-               goto out_put_spec;
-       }
+static int
+of_k3_udma_glue_parse_chn_by_id(struct device_node *udmax_np, struct k3_udma_glue_common *common,
+                               bool tx_chn, u32 thread_id)
+{
+       int ret = 0;
 
-       common->epib = common->ep_config->needs_epib;
-       common->psdata_size = common->ep_config->psd_size;
+       if (unlikely(!udmax_np))
+               return -EINVAL;
 
-       if (tx_chn)
-               common->dst_thread = thread_id;
-       else
-               common->src_thread = thread_id;
+       ret = of_k3_udma_glue_parse(udmax_np, common);
+       if (ret)
+               goto out_put_spec;
+
+       ret = of_k3_udma_glue_parse_chn_common(common, thread_id, tx_chn);
 
 out_put_spec:
-       of_node_put(dma_spec.np);
+       of_node_put(udmax_np);
        return ret;
-};
+}
 
 static void k3_udma_glue_dump_tx_chn(struct k3_udma_glue_tx_channel *tx_chn)
 {
@@ -251,29 +274,13 @@ static int k3_udma_glue_cfg_tx_chn(struct k3_udma_glue_tx_channel *tx_chn)
        return tisci_rm->tisci_udmap_ops->tx_ch_cfg(tisci_rm->tisci, &req);
 }
 
-struct k3_udma_glue_tx_channel *k3_udma_glue_request_tx_chn(struct device *dev,
-               const char *name, struct k3_udma_glue_tx_channel_cfg *cfg)
+static int
+k3_udma_glue_request_tx_chn_common(struct device *dev,
+                                  struct k3_udma_glue_tx_channel *tx_chn,
+                                  struct k3_udma_glue_tx_channel_cfg *cfg)
 {
-       struct k3_udma_glue_tx_channel *tx_chn;
        int ret;
 
-       tx_chn = devm_kzalloc(dev, sizeof(*tx_chn), GFP_KERNEL);
-       if (!tx_chn)
-               return ERR_PTR(-ENOMEM);
-
-       tx_chn->common.dev = dev;
-       tx_chn->common.swdata_size = cfg->swdata_size;
-       tx_chn->tx_pause_on_err = cfg->tx_pause_on_err;
-       tx_chn->tx_filt_einfo = cfg->tx_filt_einfo;
-       tx_chn->tx_filt_pswords = cfg->tx_filt_pswords;
-       tx_chn->tx_supr_tdpkt = cfg->tx_supr_tdpkt;
-
-       /* parse of udmap channel */
-       ret = of_k3_udma_glue_parse_chn(dev->of_node, name,
-                                       &tx_chn->common, true);
-       if (ret)
-               goto err;
-
        tx_chn->common.hdesc_size = cppi5_hdesc_calc_size(tx_chn->common.epib,
                                                tx_chn->common.psdata_size,
                                                tx_chn->common.swdata_size);
@@ -289,7 +296,7 @@ struct k3_udma_glue_tx_channel *k3_udma_glue_request_tx_chn(struct device *dev,
        if (IS_ERR(tx_chn->udma_tchanx)) {
                ret = PTR_ERR(tx_chn->udma_tchanx);
                dev_err(dev, "UDMAX tchanx get err %d\n", ret);
-               goto err;
+               return ret;
        }
        tx_chn->udma_tchan_id = xudma_tchan_get_id(tx_chn->udma_tchanx);
 
@@ -302,7 +309,7 @@ struct k3_udma_glue_tx_channel *k3_udma_glue_request_tx_chn(struct device *dev,
                dev_err(dev, "Channel Device registration failed %d\n", ret);
                put_device(&tx_chn->common.chan_dev);
                tx_chn->common.chan_dev.parent = NULL;
-               goto err;
+               return ret;
        }
 
        if (xudma_is_pktdma(tx_chn->common.udmax)) {
@@ -326,7 +333,7 @@ struct k3_udma_glue_tx_channel *k3_udma_glue_request_tx_chn(struct device *dev,
                                             &tx_chn->ringtxcq);
        if (ret) {
                dev_err(dev, "Failed to get TX/TXCQ rings %d\n", ret);
-               goto err;
+               return ret;
        }
 
        /* Set the dma_dev for the rings to be configured */
@@ -342,13 +349,13 @@ struct k3_udma_glue_tx_channel *k3_udma_glue_request_tx_chn(struct device *dev,
        ret = k3_ringacc_ring_cfg(tx_chn->ringtx, &cfg->tx_cfg);
        if (ret) {
                dev_err(dev, "Failed to cfg ringtx %d\n", ret);
-               goto err;
+               return ret;
        }
 
        ret = k3_ringacc_ring_cfg(tx_chn->ringtxcq, &cfg->txcq_cfg);
        if (ret) {
                dev_err(dev, "Failed to cfg ringtx %d\n", ret);
-               goto err;
+               return ret;
        }
 
        /* request and cfg psi-l */
@@ -359,11 +366,42 @@ struct k3_udma_glue_tx_channel *k3_udma_glue_request_tx_chn(struct device *dev,
        ret = k3_udma_glue_cfg_tx_chn(tx_chn);
        if (ret) {
                dev_err(dev, "Failed to cfg tchan %d\n", ret);
-               goto err;
+               return ret;
        }
 
        k3_udma_glue_dump_tx_chn(tx_chn);
 
+       return 0;
+}
+
+struct k3_udma_glue_tx_channel *
+k3_udma_glue_request_tx_chn(struct device *dev, const char *name,
+                           struct k3_udma_glue_tx_channel_cfg *cfg)
+{
+       struct k3_udma_glue_tx_channel *tx_chn;
+       int ret;
+
+       tx_chn = devm_kzalloc(dev, sizeof(*tx_chn), GFP_KERNEL);
+       if (!tx_chn)
+               return ERR_PTR(-ENOMEM);
+
+       tx_chn->common.dev = dev;
+       tx_chn->common.swdata_size = cfg->swdata_size;
+       tx_chn->tx_pause_on_err = cfg->tx_pause_on_err;
+       tx_chn->tx_filt_einfo = cfg->tx_filt_einfo;
+       tx_chn->tx_filt_pswords = cfg->tx_filt_pswords;
+       tx_chn->tx_supr_tdpkt = cfg->tx_supr_tdpkt;
+
+       /* parse of udmap channel */
+       ret = of_k3_udma_glue_parse_chn(dev->of_node, name,
+                                       &tx_chn->common, true);
+       if (ret)
+               goto err;
+
+       ret = k3_udma_glue_request_tx_chn_common(dev, tx_chn, cfg);
+       if (ret)
+               goto err;
+
        return tx_chn;
 
 err:
@@ -372,6 +410,41 @@ err:
 }
 EXPORT_SYMBOL_GPL(k3_udma_glue_request_tx_chn);
 
+struct k3_udma_glue_tx_channel *
+k3_udma_glue_request_tx_chn_for_thread_id(struct device *dev,
+                                         struct k3_udma_glue_tx_channel_cfg *cfg,
+                                         struct device_node *udmax_np, u32 thread_id)
+{
+       struct k3_udma_glue_tx_channel *tx_chn;
+       int ret;
+
+       tx_chn = devm_kzalloc(dev, sizeof(*tx_chn), GFP_KERNEL);
+       if (!tx_chn)
+               return ERR_PTR(-ENOMEM);
+
+       tx_chn->common.dev = dev;
+       tx_chn->common.swdata_size = cfg->swdata_size;
+       tx_chn->tx_pause_on_err = cfg->tx_pause_on_err;
+       tx_chn->tx_filt_einfo = cfg->tx_filt_einfo;
+       tx_chn->tx_filt_pswords = cfg->tx_filt_pswords;
+       tx_chn->tx_supr_tdpkt = cfg->tx_supr_tdpkt;
+
+       ret = of_k3_udma_glue_parse_chn_by_id(udmax_np, &tx_chn->common, true, thread_id);
+       if (ret)
+               goto err;
+
+       ret = k3_udma_glue_request_tx_chn_common(dev, tx_chn, cfg);
+       if (ret)
+               goto err;
+
+       return tx_chn;
+
+err:
+       k3_udma_glue_release_tx_chn(tx_chn);
+       return ERR_PTR(ret);
+}
+EXPORT_SYMBOL_GPL(k3_udma_glue_request_tx_chn_for_thread_id);
+
 void k3_udma_glue_release_tx_chn(struct k3_udma_glue_tx_channel *tx_chn)
 {
        if (tx_chn->psil_paired) {
@@ -1000,12 +1073,59 @@ err:
        return ERR_PTR(ret);
 }
 
+static int
+k3_udma_glue_request_remote_rx_chn_common(struct k3_udma_glue_rx_channel *rx_chn,
+                                         struct k3_udma_glue_rx_channel_cfg *cfg,
+                                         struct device *dev)
+{
+       int ret, i;
+
+       rx_chn->common.hdesc_size = cppi5_hdesc_calc_size(rx_chn->common.epib,
+                                               rx_chn->common.psdata_size,
+                                               rx_chn->common.swdata_size);
+
+       rx_chn->flows = devm_kcalloc(dev, rx_chn->flow_num,
+                                    sizeof(*rx_chn->flows), GFP_KERNEL);
+       if (!rx_chn->flows)
+               return -ENOMEM;
+
+       rx_chn->common.chan_dev.class = &k3_udma_glue_devclass;
+       rx_chn->common.chan_dev.parent = xudma_get_device(rx_chn->common.udmax);
+       dev_set_name(&rx_chn->common.chan_dev, "rchan_remote-0x%04x-0x%02x",
+                    rx_chn->common.src_thread, rx_chn->flow_id_base);
+       ret = device_register(&rx_chn->common.chan_dev);
+       if (ret) {
+               dev_err(dev, "Channel Device registration failed %d\n", ret);
+               put_device(&rx_chn->common.chan_dev);
+               rx_chn->common.chan_dev.parent = NULL;
+               return ret;
+       }
+
+       if (xudma_is_pktdma(rx_chn->common.udmax)) {
+               /* prepare the channel device as coherent */
+               rx_chn->common.chan_dev.dma_coherent = true;
+               dma_coerce_mask_and_coherent(&rx_chn->common.chan_dev,
+                                            DMA_BIT_MASK(48));
+       }
+
+       ret = k3_udma_glue_allocate_rx_flows(rx_chn, cfg);
+       if (ret)
+               return ret;
+
+       for (i = 0; i < rx_chn->flow_num; i++)
+               rx_chn->flows[i].udma_rflow_id = rx_chn->flow_id_base + i;
+
+       k3_udma_glue_dump_rx_chn(rx_chn);
+
+       return 0;
+}
+
 static struct k3_udma_glue_rx_channel *
 k3_udma_glue_request_remote_rx_chn(struct device *dev, const char *name,
                                   struct k3_udma_glue_rx_channel_cfg *cfg)
 {
        struct k3_udma_glue_rx_channel *rx_chn;
-       int ret, i;
+       int ret;
 
        if (cfg->flow_id_num <= 0 ||
            cfg->flow_id_use_rxchan_id ||
@@ -1036,44 +1156,55 @@ k3_udma_glue_request_remote_rx_chn(struct device *dev, const char *name,
        if (ret)
                goto err;
 
-       rx_chn->common.hdesc_size = cppi5_hdesc_calc_size(rx_chn->common.epib,
-                                               rx_chn->common.psdata_size,
-                                               rx_chn->common.swdata_size);
-
-       rx_chn->flows = devm_kcalloc(dev, rx_chn->flow_num,
-                                    sizeof(*rx_chn->flows), GFP_KERNEL);
-       if (!rx_chn->flows) {
-               ret = -ENOMEM;
+       ret = k3_udma_glue_request_remote_rx_chn_common(rx_chn, cfg, dev);
+       if (ret)
                goto err;
-       }
 
-       rx_chn->common.chan_dev.class = &k3_udma_glue_devclass;
-       rx_chn->common.chan_dev.parent = xudma_get_device(rx_chn->common.udmax);
-       dev_set_name(&rx_chn->common.chan_dev, "rchan_remote-0x%04x",
-                    rx_chn->common.src_thread);
-       ret = device_register(&rx_chn->common.chan_dev);
-       if (ret) {
-               dev_err(dev, "Channel Device registration failed %d\n", ret);
-               put_device(&rx_chn->common.chan_dev);
-               rx_chn->common.chan_dev.parent = NULL;
-               goto err;
-       }
+       return rx_chn;
 
-       if (xudma_is_pktdma(rx_chn->common.udmax)) {
-               /* prepare the channel device as coherent */
-               rx_chn->common.chan_dev.dma_coherent = true;
-               dma_coerce_mask_and_coherent(&rx_chn->common.chan_dev,
-                                            DMA_BIT_MASK(48));
-       }
+err:
+       k3_udma_glue_release_rx_chn(rx_chn);
+       return ERR_PTR(ret);
+}
 
-       ret = k3_udma_glue_allocate_rx_flows(rx_chn, cfg);
+struct k3_udma_glue_rx_channel *
+k3_udma_glue_request_remote_rx_chn_for_thread_id(struct device *dev,
+                                                struct k3_udma_glue_rx_channel_cfg *cfg,
+                                                struct device_node *udmax_np, u32 thread_id)
+{
+       struct k3_udma_glue_rx_channel *rx_chn;
+       int ret;
+
+       if (cfg->flow_id_num <= 0 ||
+           cfg->flow_id_use_rxchan_id ||
+           cfg->def_flow_cfg ||
+           cfg->flow_id_base < 0)
+               return ERR_PTR(-EINVAL);
+
+       /*
+        * Remote RX channel is under control of Remote CPU core, so
+        * Linux can only request and manipulate by dedicated RX flows
+        */
+
+       rx_chn = devm_kzalloc(dev, sizeof(*rx_chn), GFP_KERNEL);
+       if (!rx_chn)
+               return ERR_PTR(-ENOMEM);
+
+       rx_chn->common.dev = dev;
+       rx_chn->common.swdata_size = cfg->swdata_size;
+       rx_chn->remote = true;
+       rx_chn->udma_rchan_id = -1;
+       rx_chn->flow_num = cfg->flow_id_num;
+       rx_chn->flow_id_base = cfg->flow_id_base;
+       rx_chn->psil_paired = false;
+
+       ret = of_k3_udma_glue_parse_chn_by_id(udmax_np, &rx_chn->common, false, thread_id);
        if (ret)
                goto err;
 
-       for (i = 0; i < rx_chn->flow_num; i++)
-               rx_chn->flows[i].udma_rflow_id = rx_chn->flow_id_base + i;
-
-       k3_udma_glue_dump_rx_chn(rx_chn);
+       ret = k3_udma_glue_request_remote_rx_chn_common(rx_chn, cfg, dev);
+       if (ret)
+               goto err;
 
        return rx_chn;
 
@@ -1081,6 +1212,7 @@ err:
        k3_udma_glue_release_rx_chn(rx_chn);
        return ERR_PTR(ret);
 }
+EXPORT_SYMBOL_GPL(k3_udma_glue_request_remote_rx_chn_for_thread_id);
 
 struct k3_udma_glue_rx_channel *
 k3_udma_glue_request_rx_chn(struct device *dev, const char *name,
index e40696f6f8647de7f3d869be1684891725f903d7..5eb51ae93e89d0b2aed30dc470b31f198e078912 100644 (file)
 
 /* Register Direct Mode Registers */
 #define XILINX_DMA_REG_VSIZE                   0x0000
+#define XILINX_DMA_VSIZE_MASK                  GENMASK(12, 0)
 #define XILINX_DMA_REG_HSIZE                   0x0004
+#define XILINX_DMA_HSIZE_MASK                  GENMASK(15, 0)
 
 #define XILINX_DMA_REG_FRMDLY_STRIDE           0x0008
 #define XILINX_DMA_FRMDLY_STRIDE_FRMDLY_SHIFT  24
@@ -2050,6 +2052,10 @@ xilinx_vdma_dma_prep_interleaved(struct dma_chan *dchan,
        if (!xt->numf || !xt->sgl[0].size)
                return NULL;
 
+       if (xt->numf & ~XILINX_DMA_VSIZE_MASK ||
+           xt->sgl[0].size & ~XILINX_DMA_HSIZE_MASK)
+               return NULL;
+
        if (xt->frame_size != 1)
                return NULL;
 
index 7d3346b3a2bf320910c72783ab857415d331ed14..c0976f6268d3292206e7dcba40fd487837e633ba 100644 (file)
@@ -322,7 +322,7 @@ static ssize_t show_immediate(struct device *dev,
        if (value < 0)
                return -ENOENT;
 
-       return snprintf(buf, buf ? PAGE_SIZE : 0, "0x%06x\n", value);
+       return sysfs_emit(buf, "0x%06x\n", value);
 }
 
 #define IMMEDIATE_ATTR(name, key)                              \
@@ -334,8 +334,6 @@ static ssize_t show_text_leaf(struct device *dev,
        struct config_rom_attribute *attr =
                container_of(dattr, struct config_rom_attribute, attr);
        const u32 *directories[] = {NULL, NULL};
-       size_t bufsize;
-       char dummy_buf[2];
        int i, ret = -ENOENT;
 
        down_read(&fw_device_rwsem);
@@ -357,15 +355,9 @@ static ssize_t show_text_leaf(struct device *dev,
                }
        }
 
-       if (buf) {
-               bufsize = PAGE_SIZE - 1;
-       } else {
-               buf = dummy_buf;
-               bufsize = 1;
-       }
-
        for (i = 0; i < ARRAY_SIZE(directories) && !!directories[i]; ++i) {
-               int result = fw_csr_string(directories[i], attr->key, buf, bufsize);
+               int result = fw_csr_string(directories[i], attr->key, buf,
+                                          PAGE_SIZE - 1);
                // Detected.
                if (result >= 0) {
                        ret = result;
@@ -374,7 +366,7 @@ static ssize_t show_text_leaf(struct device *dev,
                        // in the root directory follows to the directory entry for vendor ID
                        // instead of the immediate value for vendor ID.
                        result = fw_csr_string(directories[i], CSR_DIRECTORY | attr->key, buf,
-                                              bufsize);
+                                              PAGE_SIZE - 1);
                        if (result >= 0)
                                ret = result;
                }
@@ -490,7 +482,7 @@ static ssize_t is_local_show(struct device *dev,
 {
        struct fw_device *device = fw_device(dev);
 
-       return sprintf(buf, "%u\n", device->is_local);
+       return sysfs_emit(buf, "%u\n", device->is_local);
 }
 
 static int units_sprintf(char *buf, const u32 *directory)
index 73f4810f6db38ecc933f9a6ac2bed5ae57709148..31eb1e287ce161a60da72da1b4ca30c683dec051 100644 (file)
@@ -105,7 +105,7 @@ lib-y                               := $(patsubst %.o,%.stub.o,$(lib-y))
 # Even when -mbranch-protection=none is set, Clang will generate a
 # .note.gnu.property for code-less object files (like lib/ctype.c),
 # so work around this by explicitly removing the unwanted section.
-# https://bugs.llvm.org/show_bug.cgi?id=46480
+# https://llvm.org/pr46480
 STUBCOPY_FLAGS-y               += --remove-section=.note.gnu.property
 
 STUBCOPY_RELOC-$(CONFIG_X86_32)        := R_386_32
index 4f448d4df7b822980c7cb221c281e9ed9bf74deb..57888614e90f16b34f1e8b9cedd1341eca0697e5 100644 (file)
@@ -21,6 +21,8 @@
 #include "efistub.h"
 #include "x86-stub.h"
 
+extern char _bss[], _ebss[];
+
 const efi_system_table_t *efi_system_table;
 const efi_dxe_services_table_t *efi_dxe_table;
 static efi_loaded_image_t *image = NULL;
@@ -474,6 +476,8 @@ efi_status_t __efiapi efi_pe_entry(efi_handle_t handle,
        efi_status_t status;
        char *cmdline_ptr;
 
+       memset(_bss, 0, _ebss - _bss);
+
        efi_system_table = sys_table_arg;
 
        /* Check if we were booted by the EFI firmware */
@@ -970,8 +974,6 @@ fail:
 void efi_handover_entry(efi_handle_t handle, efi_system_table_t *sys_table_arg,
                        struct boot_params *boot_params)
 {
-       extern char _bss[], _ebss[];
-
        memset(_bss, 0, _ebss - _bss);
        efi_stub_entry(handle, sys_table_arg, boot_params);
 }
index 03da9a4354f8864492e92e5c60a676b3629b9913..5f43dfa22f799c6841e888c292bd387c95dfa4ed 100644 (file)
@@ -37,7 +37,7 @@
 #include <uapi/linux/qemu_fw_cfg.h>
 #include <linux/delay.h>
 #include <linux/crash_dump.h>
-#include <linux/crash_core.h>
+#include <linux/vmcore_info.h>
 
 MODULE_AUTHOR("Gabriel L. Somlo <somlo@cmu.edu>");
 MODULE_DESCRIPTION("QEMU fw_cfg sysfs support");
@@ -67,7 +67,7 @@ static void fw_cfg_sel_endianness(u16 key)
                iowrite16(key, fw_cfg_reg_ctrl);
 }
 
-#ifdef CONFIG_CRASH_CORE
+#ifdef CONFIG_VMCORE_INFO
 static inline bool fw_cfg_dma_enabled(void)
 {
        return (fw_cfg_rev & FW_CFG_VERSION_DMA) && fw_cfg_reg_dma;
@@ -156,7 +156,7 @@ static ssize_t fw_cfg_read_blob(u16 key,
        return count;
 }
 
-#ifdef CONFIG_CRASH_CORE
+#ifdef CONFIG_VMCORE_INFO
 /* write chunk of given fw_cfg blob (caller responsible for sanity-check) */
 static ssize_t fw_cfg_write_blob(u16 key,
                                 void *buf, loff_t pos, size_t count)
@@ -195,7 +195,7 @@ end:
 
        return ret;
 }
-#endif /* CONFIG_CRASH_CORE */
+#endif /* CONFIG_VMCORE_INFO */
 
 /* clean up fw_cfg device i/o */
 static void fw_cfg_io_cleanup(void)
@@ -319,7 +319,7 @@ struct fw_cfg_sysfs_entry {
        struct list_head list;
 };
 
-#ifdef CONFIG_CRASH_CORE
+#ifdef CONFIG_VMCORE_INFO
 static ssize_t fw_cfg_write_vmcoreinfo(const struct fw_cfg_file *f)
 {
        static struct fw_cfg_vmcoreinfo *data;
@@ -343,7 +343,7 @@ static ssize_t fw_cfg_write_vmcoreinfo(const struct fw_cfg_file *f)
        kfree(data);
        return ret;
 }
-#endif /* CONFIG_CRASH_CORE */
+#endif /* CONFIG_VMCORE_INFO */
 
 /* get fw_cfg_sysfs_entry from kobject member */
 static inline struct fw_cfg_sysfs_entry *to_entry(struct kobject *kobj)
@@ -583,7 +583,7 @@ static int fw_cfg_register_file(const struct fw_cfg_file *f)
        int err;
        struct fw_cfg_sysfs_entry *entry;
 
-#ifdef CONFIG_CRASH_CORE
+#ifdef CONFIG_VMCORE_INFO
        if (fw_cfg_dma_enabled() &&
                strcmp(f->name, FW_CFG_VMCOREINFO_FILENAME) == 0 &&
                !is_kdump_kernel()) {
index eaa4f5f499491e0acd8253c3f5e536ffd7e5a708..2d904ee72701af9f90f518bfa732d821a0a3b5ad 100644 (file)
@@ -612,7 +612,7 @@ static uint32_t sdma_v4_4_2_rb_cntl(struct amdgpu_ring *ring, uint32_t rb_cntl)
        /* Set ring buffer size in dwords */
        uint32_t rb_bufsz = order_base_2(ring->ring_size / 4);
 
-       barrier(); /* work around https://bugs.llvm.org/show_bug.cgi?id=42576 */
+       barrier(); /* work around https://llvm.org/pr42576 */
        rb_cntl = REG_SET_FIELD(rb_cntl, SDMA_GFX_RB_CNTL, RB_SIZE, rb_bufsz);
 #ifdef __BIG_ENDIAN
        rb_cntl = REG_SET_FIELD(rb_cntl, SDMA_GFX_RB_CNTL, RB_SWAP_ENABLE, 1);
index a8d42c9d5d04c35989abee839211686dd2030a0d..efcf78673e7477ee6200a6bcba4886a1bc1c2a04 100644 (file)
@@ -108,7 +108,7 @@ struct dell_smm_cooling_data {
        struct dell_smm_data *data;
 };
 
-MODULE_AUTHOR("Massimo Dal Zotto (dz@debian.org)");
+MODULE_AUTHOR("Massimo Dal Zotto <dz@debian.org>");
 MODULE_AUTHOR("Pali Rohár <pali@kernel.org>");
 MODULE_DESCRIPTION("Dell laptop SMM BIOS hwmon driver");
 MODULE_LICENSE("GPL");
index 9823afb0675a0607620256ab92dc94e84a24ee06..2765d5f1b7f05c726e7fab75306ddc6ecebc605a 100644 (file)
@@ -18,7 +18,7 @@
 
 #define DRV_MODULE_VERSION     "0.1"
 
-MODULE_AUTHOR("David S. Miller (davem@davemloft.net)");
+MODULE_AUTHOR("David S. Miller <davem@davemloft.net>");
 MODULE_DESCRIPTION("Ultra45 environmental monitor driver");
 MODULE_LICENSE("GPL");
 MODULE_VERSION(DRV_MODULE_VERSION);
index 28eb48dd5b3262b0ae05a0e2946b09b294e012a3..97989c914260f2893ee12ba2dca5ac202a1ed475 100644 (file)
@@ -1235,7 +1235,7 @@ config I2C_RCAR
        depends on ARCH_RENESAS || COMPILE_TEST
        select I2C_SLAVE
        select I2C_SMBUS
-       select RESET_CONTROLLER if ARCH_RCAR_GEN3
+       select RESET_CONTROLLER if ARCH_RCAR_GEN3 || ARCH_RCAR_GEN4
        help
          If you say yes to this option, support will be included for the
          R-Car I2C controller.
index de3f58b60dce5d465f75e3bc509ea449f0c927d0..4bb7d6756947cd16a9ab64295dfaa650bd58677f 100644 (file)
@@ -1176,6 +1176,18 @@ static int __maybe_unused cdns_i2c_runtime_suspend(struct device *dev)
        return 0;
 }
 
+static int __maybe_unused cdns_i2c_suspend(struct device *dev)
+{
+       struct cdns_i2c *xi2c = dev_get_drvdata(dev);
+
+       i2c_mark_adapter_suspended(&xi2c->adap);
+
+       if (!pm_runtime_status_suspended(dev))
+               return cdns_i2c_runtime_suspend(dev);
+
+       return 0;
+}
+
 /**
  * cdns_i2c_init -  Controller initialisation
  * @id:                Device private data structure
@@ -1219,7 +1231,28 @@ static int __maybe_unused cdns_i2c_runtime_resume(struct device *dev)
        return 0;
 }
 
+static int __maybe_unused cdns_i2c_resume(struct device *dev)
+{
+       struct cdns_i2c *xi2c = dev_get_drvdata(dev);
+       int err;
+
+       err = cdns_i2c_runtime_resume(dev);
+       if (err)
+               return err;
+
+       if (pm_runtime_status_suspended(dev)) {
+               err = cdns_i2c_runtime_suspend(dev);
+               if (err)
+                       return err;
+       }
+
+       i2c_mark_adapter_resumed(&xi2c->adap);
+
+       return 0;
+}
+
 static const struct dev_pm_ops cdns_i2c_dev_pm_ops = {
+       SET_NOIRQ_SYSTEM_SLEEP_PM_OPS(cdns_i2c_suspend, cdns_i2c_resume)
        SET_RUNTIME_PM_OPS(cdns_i2c_runtime_suspend,
                           cdns_i2c_runtime_resume, NULL)
 };
index 35f762872b8a58c2f7e8fd4867bb0e139aea5cf0..e8a688d04aee0fa9aa01dc4f5d1563526f354437 100644 (file)
@@ -648,7 +648,7 @@ void i2c_dw_disable(struct dw_i2c_dev *dev)
        __i2c_dw_disable(dev);
 
        /* Disable all interrupts */
-       regmap_write(dev->map, DW_IC_INTR_MASK, 0);
+       __i2c_dw_write_intr_mask(dev, 0);
        regmap_read(dev->map, DW_IC_CLR_INTR, &dummy);
 
        i2c_dw_release_lock(dev);
index a7f6f3eafad7dd72241f57a2a356219d36bfaf56..e9606c00b8d103c538034ad3038d53daeeb34563 100644 (file)
@@ -212,6 +212,7 @@ struct reset_control;
  * @msg_err: error status of the current transfer
  * @status: i2c master status, one of STATUS_*
  * @abort_source: copy of the TX_ABRT_SOURCE register
+ * @sw_mask: SW mask of DW_IC_INTR_MASK used in polling mode
  * @irq: interrupt number for the i2c master
  * @flags: platform specific flags like type of IO accessors or model
  * @adapter: i2c subsystem adapter node
@@ -270,6 +271,7 @@ struct dw_i2c_dev {
        int                     msg_err;
        unsigned int            status;
        unsigned int            abort_source;
+       unsigned int            sw_mask;
        int                     irq;
        u32                     flags;
        struct i2c_adapter      adapter;
@@ -303,6 +305,7 @@ struct dw_i2c_dev {
 #define ACCESS_INTR_MASK                       BIT(0)
 #define ACCESS_NO_IRQ_SUSPEND                  BIT(1)
 #define ARBITRATION_SEMAPHORE                  BIT(2)
+#define ACCESS_POLLING                         BIT(3)
 
 #define MODEL_MSCC_OCELOT                      BIT(8)
 #define MODEL_BAIKAL_BT1                       BIT(9)
@@ -318,7 +321,7 @@ struct dw_i2c_dev {
 #define AMD_UCSI_INTR_EN                       0xd
 
 #define TXGBE_TX_FIFO_DEPTH                    4
-#define TXGBE_RX_FIFO_DEPTH                    0
+#define TXGBE_RX_FIFO_DEPTH                    1
 
 struct i2c_dw_semaphore_callbacks {
        int     (*probe)(struct dw_i2c_dev *dev);
@@ -351,6 +354,24 @@ static inline void __i2c_dw_disable_nowait(struct dw_i2c_dev *dev)
        dev->status &= ~STATUS_ACTIVE;
 }
 
+static inline void __i2c_dw_write_intr_mask(struct dw_i2c_dev *dev,
+                                           unsigned int intr_mask)
+{
+       unsigned int val = dev->flags & ACCESS_POLLING ? 0 : intr_mask;
+
+       regmap_write(dev->map, DW_IC_INTR_MASK, val);
+       dev->sw_mask = intr_mask;
+}
+
+static inline void __i2c_dw_read_intr_mask(struct dw_i2c_dev *dev,
+                                          unsigned int *intr_mask)
+{
+       if (!(dev->flags & ACCESS_POLLING))
+               regmap_read(dev->map, DW_IC_INTR_MASK, intr_mask);
+       else
+               *intr_mask = dev->sw_mask;
+}
+
 void __i2c_dw_disable(struct dw_i2c_dev *dev);
 
 extern void i2c_dw_configure_master(struct dw_i2c_dev *dev);
index 85dbd0eb5392c53017ffec96bf3fca6646fa70c4..c7e56002809ace54d0307b3c350d0256c3bf41f1 100644 (file)
@@ -240,7 +240,7 @@ static void i2c_dw_xfer_init(struct dw_i2c_dev *dev)
                     msgs[dev->msg_write_idx].addr | ic_tar);
 
        /* Enforce disabled interrupts (due to HW issues) */
-       regmap_write(dev->map, DW_IC_INTR_MASK, 0);
+       __i2c_dw_write_intr_mask(dev, 0);
 
        /* Enable the adapter */
        __i2c_dw_enable(dev);
@@ -250,7 +250,7 @@ static void i2c_dw_xfer_init(struct dw_i2c_dev *dev)
 
        /* Clear and enable interrupts */
        regmap_read(dev->map, DW_IC_CLR_INTR, &dummy);
-       regmap_write(dev->map, DW_IC_INTR_MASK, DW_IC_INTR_MASTER_MASK);
+       __i2c_dw_write_intr_mask(dev, DW_IC_INTR_MASTER_MASK);
 }
 
 static int i2c_dw_check_stopbit(struct dw_i2c_dev *dev)
@@ -300,7 +300,6 @@ static int amd_i2c_dw_xfer_quirk(struct i2c_adapter *adap, struct i2c_msg *msgs,
        dev->msgs = msgs;
        dev->msgs_num = num_msgs;
        i2c_dw_xfer_init(dev);
-       regmap_write(dev->map, DW_IC_INTR_MASK, 0);
 
        /* Initiate messages read/write transaction */
        for (msg_wrt_idx = 0; msg_wrt_idx < num_msgs; msg_wrt_idx++) {
@@ -355,68 +354,6 @@ static int amd_i2c_dw_xfer_quirk(struct i2c_adapter *adap, struct i2c_msg *msgs,
        return 0;
 }
 
-static int i2c_dw_poll_tx_empty(struct dw_i2c_dev *dev)
-{
-       u32 val;
-
-       return regmap_read_poll_timeout(dev->map, DW_IC_RAW_INTR_STAT, val,
-                                       val & DW_IC_INTR_TX_EMPTY,
-                                       100, 1000);
-}
-
-static int i2c_dw_poll_rx_full(struct dw_i2c_dev *dev)
-{
-       u32 val;
-
-       return regmap_read_poll_timeout(dev->map, DW_IC_RAW_INTR_STAT, val,
-                                       val & DW_IC_INTR_RX_FULL,
-                                       100, 1000);
-}
-
-static int txgbe_i2c_dw_xfer_quirk(struct i2c_adapter *adap, struct i2c_msg *msgs,
-                                  int num_msgs)
-{
-       struct dw_i2c_dev *dev = i2c_get_adapdata(adap);
-       int msg_idx, buf_len, data_idx, ret;
-       unsigned int val, stop = 0;
-       u8 *buf;
-
-       dev->msgs = msgs;
-       dev->msgs_num = num_msgs;
-       i2c_dw_xfer_init(dev);
-       regmap_write(dev->map, DW_IC_INTR_MASK, 0);
-
-       for (msg_idx = 0; msg_idx < num_msgs; msg_idx++) {
-               buf = msgs[msg_idx].buf;
-               buf_len = msgs[msg_idx].len;
-
-               for (data_idx = 0; data_idx < buf_len; data_idx++) {
-                       if (msg_idx == num_msgs - 1 && data_idx == buf_len - 1)
-                               stop |= BIT(9);
-
-                       if (msgs[msg_idx].flags & I2C_M_RD) {
-                               regmap_write(dev->map, DW_IC_DATA_CMD, 0x100 | stop);
-
-                               ret = i2c_dw_poll_rx_full(dev);
-                               if (ret)
-                                       return ret;
-
-                               regmap_read(dev->map, DW_IC_DATA_CMD, &val);
-                               buf[data_idx] = val;
-                       } else {
-                               ret = i2c_dw_poll_tx_empty(dev);
-                               if (ret)
-                                       return ret;
-
-                               regmap_write(dev->map, DW_IC_DATA_CMD,
-                                            buf[data_idx] | stop);
-                       }
-               }
-       }
-
-       return num_msgs;
-}
-
 /*
  * Initiate (and continue) low level master read/write transaction.
  * This function is only called from i2c_dw_isr, and pumping i2c_msg
@@ -546,7 +483,7 @@ i2c_dw_xfer_msg(struct dw_i2c_dev *dev)
        if (dev->msg_err)
                intr_mask = 0;
 
-       regmap_write(dev->map,  DW_IC_INTR_MASK, intr_mask);
+       __i2c_dw_write_intr_mask(dev, intr_mask);
 }
 
 static u8
@@ -554,6 +491,7 @@ i2c_dw_recv_len(struct dw_i2c_dev *dev, u8 len)
 {
        struct i2c_msg *msgs = dev->msgs;
        u32 flags = msgs[dev->msg_read_idx].flags;
+       unsigned int intr_mask;
 
        /*
         * Adjust the buffer length and mask the flag
@@ -568,8 +506,9 @@ i2c_dw_recv_len(struct dw_i2c_dev *dev, u8 len)
         * Received buffer length, re-enable TX_EMPTY interrupt
         * to resume the SMBUS transaction.
         */
-       regmap_update_bits(dev->map, DW_IC_INTR_MASK, DW_IC_INTR_TX_EMPTY,
-                          DW_IC_INTR_TX_EMPTY);
+       __i2c_dw_read_intr_mask(dev, &intr_mask);
+       intr_mask |= DW_IC_INTR_TX_EMPTY;
+       __i2c_dw_write_intr_mask(dev, intr_mask);
 
        return len;
 }
@@ -633,6 +572,169 @@ i2c_dw_read(struct dw_i2c_dev *dev)
        }
 }
 
+static u32 i2c_dw_read_clear_intrbits(struct dw_i2c_dev *dev)
+{
+       unsigned int stat, dummy;
+
+       /*
+        * The IC_INTR_STAT register just indicates "enabled" interrupts.
+        * The unmasked raw version of interrupt status bits is available
+        * in the IC_RAW_INTR_STAT register.
+        *
+        * That is,
+        *   stat = readl(IC_INTR_STAT);
+        * equals to,
+        *   stat = readl(IC_RAW_INTR_STAT) & readl(IC_INTR_MASK);
+        *
+        * The raw version might be useful for debugging purposes.
+        */
+       if (!(dev->flags & ACCESS_POLLING)) {
+               regmap_read(dev->map, DW_IC_INTR_STAT, &stat);
+       } else {
+               regmap_read(dev->map, DW_IC_RAW_INTR_STAT, &stat);
+               stat &= dev->sw_mask;
+       }
+
+       /*
+        * Do not use the IC_CLR_INTR register to clear interrupts, or
+        * you'll miss some interrupts, triggered during the period from
+        * readl(IC_INTR_STAT) to readl(IC_CLR_INTR).
+        *
+        * Instead, use the separately-prepared IC_CLR_* registers.
+        */
+       if (stat & DW_IC_INTR_RX_UNDER)
+               regmap_read(dev->map, DW_IC_CLR_RX_UNDER, &dummy);
+       if (stat & DW_IC_INTR_RX_OVER)
+               regmap_read(dev->map, DW_IC_CLR_RX_OVER, &dummy);
+       if (stat & DW_IC_INTR_TX_OVER)
+               regmap_read(dev->map, DW_IC_CLR_TX_OVER, &dummy);
+       if (stat & DW_IC_INTR_RD_REQ)
+               regmap_read(dev->map, DW_IC_CLR_RD_REQ, &dummy);
+       if (stat & DW_IC_INTR_TX_ABRT) {
+               /*
+                * The IC_TX_ABRT_SOURCE register is cleared whenever
+                * the IC_CLR_TX_ABRT is read.  Preserve it beforehand.
+                */
+               regmap_read(dev->map, DW_IC_TX_ABRT_SOURCE, &dev->abort_source);
+               regmap_read(dev->map, DW_IC_CLR_TX_ABRT, &dummy);
+       }
+       if (stat & DW_IC_INTR_RX_DONE)
+               regmap_read(dev->map, DW_IC_CLR_RX_DONE, &dummy);
+       if (stat & DW_IC_INTR_ACTIVITY)
+               regmap_read(dev->map, DW_IC_CLR_ACTIVITY, &dummy);
+       if ((stat & DW_IC_INTR_STOP_DET) &&
+           ((dev->rx_outstanding == 0) || (stat & DW_IC_INTR_RX_FULL)))
+               regmap_read(dev->map, DW_IC_CLR_STOP_DET, &dummy);
+       if (stat & DW_IC_INTR_START_DET)
+               regmap_read(dev->map, DW_IC_CLR_START_DET, &dummy);
+       if (stat & DW_IC_INTR_GEN_CALL)
+               regmap_read(dev->map, DW_IC_CLR_GEN_CALL, &dummy);
+
+       return stat;
+}
+
+static void i2c_dw_process_transfer(struct dw_i2c_dev *dev, unsigned int stat)
+{
+       if (stat & DW_IC_INTR_TX_ABRT) {
+               dev->cmd_err |= DW_IC_ERR_TX_ABRT;
+               dev->status &= ~STATUS_MASK;
+               dev->rx_outstanding = 0;
+
+               /*
+                * Anytime TX_ABRT is set, the contents of the tx/rx
+                * buffers are flushed. Make sure to skip them.
+                */
+               __i2c_dw_write_intr_mask(dev, 0);
+               goto tx_aborted;
+       }
+
+       if (stat & DW_IC_INTR_RX_FULL)
+               i2c_dw_read(dev);
+
+       if (stat & DW_IC_INTR_TX_EMPTY)
+               i2c_dw_xfer_msg(dev);
+
+       /*
+        * No need to modify or disable the interrupt mask here.
+        * i2c_dw_xfer_msg() will take care of it according to
+        * the current transmit status.
+        */
+
+tx_aborted:
+       if (((stat & (DW_IC_INTR_TX_ABRT | DW_IC_INTR_STOP_DET)) || dev->msg_err) &&
+            (dev->rx_outstanding == 0))
+               complete(&dev->cmd_complete);
+       else if (unlikely(dev->flags & ACCESS_INTR_MASK)) {
+               /* Workaround to trigger pending interrupt */
+               __i2c_dw_read_intr_mask(dev, &stat);
+               __i2c_dw_write_intr_mask(dev, 0);
+               __i2c_dw_write_intr_mask(dev, stat);
+       }
+}
+
+/*
+ * Interrupt service routine. This gets called whenever an I2C master interrupt
+ * occurs.
+ */
+static irqreturn_t i2c_dw_isr(int this_irq, void *dev_id)
+{
+       struct dw_i2c_dev *dev = dev_id;
+       unsigned int stat, enabled;
+
+       regmap_read(dev->map, DW_IC_ENABLE, &enabled);
+       regmap_read(dev->map, DW_IC_RAW_INTR_STAT, &stat);
+       if (!enabled || !(stat & ~DW_IC_INTR_ACTIVITY))
+               return IRQ_NONE;
+       if (pm_runtime_suspended(dev->dev) || stat == GENMASK(31, 0))
+               return IRQ_NONE;
+       dev_dbg(dev->dev, "enabled=%#x stat=%#x\n", enabled, stat);
+
+       stat = i2c_dw_read_clear_intrbits(dev);
+
+       if (!(dev->status & STATUS_ACTIVE)) {
+               /*
+                * Unexpected interrupt in driver point of view. State
+                * variables are either unset or stale so acknowledge and
+                * disable interrupts for suppressing further interrupts if
+                * interrupt really came from this HW (E.g. firmware has left
+                * the HW active).
+                */
+               __i2c_dw_write_intr_mask(dev, 0);
+               return IRQ_HANDLED;
+       }
+
+       i2c_dw_process_transfer(dev, stat);
+
+       return IRQ_HANDLED;
+}
+
+static int i2c_dw_wait_transfer(struct dw_i2c_dev *dev)
+{
+       unsigned long timeout = dev->adapter.timeout;
+       unsigned int stat;
+       int ret;
+
+       if (!(dev->flags & ACCESS_POLLING)) {
+               ret = wait_for_completion_timeout(&dev->cmd_complete, timeout);
+       } else {
+               timeout += jiffies;
+               do {
+                       ret = try_wait_for_completion(&dev->cmd_complete);
+                       if (ret)
+                               break;
+
+                       stat = i2c_dw_read_clear_intrbits(dev);
+                       if (stat)
+                               i2c_dw_process_transfer(dev, stat);
+                       else
+                               /* Try save some power */
+                               usleep_range(3, 25);
+               } while (time_before(jiffies, timeout));
+       }
+
+       return ret ? 0 : -ETIMEDOUT;
+}
+
 /*
  * Prepare controller for a transaction and call i2c_dw_xfer_msg.
  */
@@ -646,18 +748,10 @@ i2c_dw_xfer(struct i2c_adapter *adap, struct i2c_msg msgs[], int num)
 
        pm_runtime_get_sync(dev->dev);
 
-       /*
-        * Initiate I2C message transfer when polling mode is enabled,
-        * As it is polling based transfer mechanism, which does not support
-        * interrupt based functionalities of existing DesignWare driver.
-        */
        switch (dev->flags & MODEL_MASK) {
        case MODEL_AMD_NAVI_GPU:
                ret = amd_i2c_dw_xfer_quirk(adap, msgs, num);
                goto done_nolock;
-       case MODEL_WANGXUN_SP:
-               ret = txgbe_i2c_dw_xfer_quirk(adap, msgs, num);
-               goto done_nolock;
        default:
                break;
        }
@@ -685,12 +779,12 @@ i2c_dw_xfer(struct i2c_adapter *adap, struct i2c_msg msgs[], int num)
        i2c_dw_xfer_init(dev);
 
        /* Wait for tx to complete */
-       if (!wait_for_completion_timeout(&dev->cmd_complete, adap->timeout)) {
+       ret = i2c_dw_wait_transfer(dev);
+       if (ret) {
                dev_err(dev->dev, "controller timed out\n");
-               /* i2c_dw_init implicitly disables the adapter */
+               /* i2c_dw_init_master() implicitly disables the adapter */
                i2c_recover_bus(&dev->adapter);
                i2c_dw_init_master(dev);
-               ret = -ETIMEDOUT;
                goto done;
        }
 
@@ -698,7 +792,7 @@ i2c_dw_xfer(struct i2c_adapter *adap, struct i2c_msg msgs[], int num)
         * We must disable the adapter before returning and signaling the end
         * of the current transfer. Otherwise the hardware might continue
         * generating interrupts which in turn causes a race condition with
-        * the following transfer.  Needs some more investigation if the
+        * the following transfer. Needs some more investigation if the
         * additional interrupts are a hardware bug or this driver doesn't
         * handle them correctly yet.
         */
@@ -746,132 +840,6 @@ static const struct i2c_adapter_quirks i2c_dw_quirks = {
        .flags = I2C_AQ_NO_ZERO_LEN,
 };
 
-static u32 i2c_dw_read_clear_intrbits(struct dw_i2c_dev *dev)
-{
-       unsigned int stat, dummy;
-
-       /*
-        * The IC_INTR_STAT register just indicates "enabled" interrupts.
-        * The unmasked raw version of interrupt status bits is available
-        * in the IC_RAW_INTR_STAT register.
-        *
-        * That is,
-        *   stat = readl(IC_INTR_STAT);
-        * equals to,
-        *   stat = readl(IC_RAW_INTR_STAT) & readl(IC_INTR_MASK);
-        *
-        * The raw version might be useful for debugging purposes.
-        */
-       regmap_read(dev->map, DW_IC_INTR_STAT, &stat);
-
-       /*
-        * Do not use the IC_CLR_INTR register to clear interrupts, or
-        * you'll miss some interrupts, triggered during the period from
-        * readl(IC_INTR_STAT) to readl(IC_CLR_INTR).
-        *
-        * Instead, use the separately-prepared IC_CLR_* registers.
-        */
-       if (stat & DW_IC_INTR_RX_UNDER)
-               regmap_read(dev->map, DW_IC_CLR_RX_UNDER, &dummy);
-       if (stat & DW_IC_INTR_RX_OVER)
-               regmap_read(dev->map, DW_IC_CLR_RX_OVER, &dummy);
-       if (stat & DW_IC_INTR_TX_OVER)
-               regmap_read(dev->map, DW_IC_CLR_TX_OVER, &dummy);
-       if (stat & DW_IC_INTR_RD_REQ)
-               regmap_read(dev->map, DW_IC_CLR_RD_REQ, &dummy);
-       if (stat & DW_IC_INTR_TX_ABRT) {
-               /*
-                * The IC_TX_ABRT_SOURCE register is cleared whenever
-                * the IC_CLR_TX_ABRT is read.  Preserve it beforehand.
-                */
-               regmap_read(dev->map, DW_IC_TX_ABRT_SOURCE, &dev->abort_source);
-               regmap_read(dev->map, DW_IC_CLR_TX_ABRT, &dummy);
-       }
-       if (stat & DW_IC_INTR_RX_DONE)
-               regmap_read(dev->map, DW_IC_CLR_RX_DONE, &dummy);
-       if (stat & DW_IC_INTR_ACTIVITY)
-               regmap_read(dev->map, DW_IC_CLR_ACTIVITY, &dummy);
-       if ((stat & DW_IC_INTR_STOP_DET) &&
-           ((dev->rx_outstanding == 0) || (stat & DW_IC_INTR_RX_FULL)))
-               regmap_read(dev->map, DW_IC_CLR_STOP_DET, &dummy);
-       if (stat & DW_IC_INTR_START_DET)
-               regmap_read(dev->map, DW_IC_CLR_START_DET, &dummy);
-       if (stat & DW_IC_INTR_GEN_CALL)
-               regmap_read(dev->map, DW_IC_CLR_GEN_CALL, &dummy);
-
-       return stat;
-}
-
-/*
- * Interrupt service routine. This gets called whenever an I2C master interrupt
- * occurs.
- */
-static irqreturn_t i2c_dw_isr(int this_irq, void *dev_id)
-{
-       struct dw_i2c_dev *dev = dev_id;
-       unsigned int stat, enabled;
-
-       regmap_read(dev->map, DW_IC_ENABLE, &enabled);
-       regmap_read(dev->map, DW_IC_RAW_INTR_STAT, &stat);
-       if (!enabled || !(stat & ~DW_IC_INTR_ACTIVITY))
-               return IRQ_NONE;
-       if (pm_runtime_suspended(dev->dev) || stat == GENMASK(31, 0))
-               return IRQ_NONE;
-       dev_dbg(dev->dev, "enabled=%#x stat=%#x\n", enabled, stat);
-
-       stat = i2c_dw_read_clear_intrbits(dev);
-
-       if (!(dev->status & STATUS_ACTIVE)) {
-               /*
-                * Unexpected interrupt in driver point of view. State
-                * variables are either unset or stale so acknowledge and
-                * disable interrupts for suppressing further interrupts if
-                * interrupt really came from this HW (E.g. firmware has left
-                * the HW active).
-                */
-               regmap_write(dev->map, DW_IC_INTR_MASK, 0);
-               return IRQ_HANDLED;
-       }
-
-       if (stat & DW_IC_INTR_TX_ABRT) {
-               dev->cmd_err |= DW_IC_ERR_TX_ABRT;
-               dev->status &= ~STATUS_MASK;
-               dev->rx_outstanding = 0;
-
-               /*
-                * Anytime TX_ABRT is set, the contents of the tx/rx
-                * buffers are flushed. Make sure to skip them.
-                */
-               regmap_write(dev->map, DW_IC_INTR_MASK, 0);
-               goto tx_aborted;
-       }
-
-       if (stat & DW_IC_INTR_RX_FULL)
-               i2c_dw_read(dev);
-
-       if (stat & DW_IC_INTR_TX_EMPTY)
-               i2c_dw_xfer_msg(dev);
-
-       /*
-        * No need to modify or disable the interrupt mask here.
-        * i2c_dw_xfer_msg() will take care of it according to
-        * the current transmit status.
-        */
-
-tx_aborted:
-       if (((stat & (DW_IC_INTR_TX_ABRT | DW_IC_INTR_STOP_DET)) || dev->msg_err) &&
-            (dev->rx_outstanding == 0))
-               complete(&dev->cmd_complete);
-       else if (unlikely(dev->flags & ACCESS_INTR_MASK)) {
-               /* Workaround to trigger pending interrupt */
-               regmap_read(dev->map, DW_IC_INTR_MASK, &stat);
-               regmap_write(dev->map, DW_IC_INTR_MASK, 0);
-               regmap_write(dev->map, DW_IC_INTR_MASK, stat);
-       }
-
-       return IRQ_HANDLED;
-}
-
 void i2c_dw_configure_master(struct dw_i2c_dev *dev)
 {
        struct i2c_timings *t = &dev->timings;
@@ -953,31 +921,6 @@ static int i2c_dw_init_recovery_info(struct dw_i2c_dev *dev)
        return 0;
 }
 
-static int i2c_dw_poll_adap_quirk(struct dw_i2c_dev *dev)
-{
-       struct i2c_adapter *adap = &dev->adapter;
-       int ret;
-
-       pm_runtime_get_noresume(dev->dev);
-       ret = i2c_add_numbered_adapter(adap);
-       if (ret)
-               dev_err(dev->dev, "Failed to add adapter: %d\n", ret);
-       pm_runtime_put_noidle(dev->dev);
-
-       return ret;
-}
-
-static bool i2c_dw_is_model_poll(struct dw_i2c_dev *dev)
-{
-       switch (dev->flags & MODEL_MASK) {
-       case MODEL_AMD_NAVI_GPU:
-       case MODEL_WANGXUN_SP:
-               return true;
-       default:
-               return false;
-       }
-}
-
 int i2c_dw_probe_master(struct dw_i2c_dev *dev)
 {
        struct i2c_adapter *adap = &dev->adapter;
@@ -1033,9 +976,6 @@ int i2c_dw_probe_master(struct dw_i2c_dev *dev)
        adap->dev.parent = dev->dev;
        i2c_set_adapdata(adap, dev);
 
-       if (i2c_dw_is_model_poll(dev))
-               return i2c_dw_poll_adap_quirk(dev);
-
        if (dev->flags & ACCESS_NO_IRQ_SUSPEND) {
                irq_flags = IRQF_NO_SUSPEND;
        } else {
@@ -1046,15 +986,17 @@ int i2c_dw_probe_master(struct dw_i2c_dev *dev)
        if (ret)
                return ret;
 
-       regmap_write(dev->map, DW_IC_INTR_MASK, 0);
+       __i2c_dw_write_intr_mask(dev, 0);
        i2c_dw_release_lock(dev);
 
-       ret = devm_request_irq(dev->dev, dev->irq, i2c_dw_isr, irq_flags,
-                              dev_name(dev->dev), dev);
-       if (ret) {
-               dev_err(dev->dev, "failure requesting irq %i: %d\n",
-                       dev->irq, ret);
-               return ret;
+       if (!(dev->flags & ACCESS_POLLING)) {
+               ret = devm_request_irq(dev->dev, dev->irq, i2c_dw_isr,
+                                      irq_flags, dev_name(dev->dev), dev);
+               if (ret) {
+                       dev_err(dev->dev, "failure requesting irq %i: %d\n",
+                               dev->irq, ret);
+                       return ret;
+               }
        }
 
        ret = i2c_dw_init_recovery_info(dev);
index 61d7a27aa070184db6a7754561ad42c25c79de6c..9be9a2658e1f6ede697906827b26da319b098902 100644 (file)
@@ -154,7 +154,7 @@ static int navi_amd_setup(struct pci_dev *pdev, struct dw_pci_controller *c)
 {
        struct dw_i2c_dev *dev = dev_get_drvdata(&pdev->dev);
 
-       dev->flags |= MODEL_AMD_NAVI_GPU;
+       dev->flags |= MODEL_AMD_NAVI_GPU | ACCESS_POLLING;
        dev->timings.bus_freq_hz = I2C_MAX_STANDARD_MODE_FREQ;
        return 0;
 }
index 855b698e99c08004df29a5da485722974de63335..4ab41ba39d55fb6312c6ecc8068d894d7e37e936 100644 (file)
@@ -290,7 +290,7 @@ static int dw_i2c_plat_probe(struct platform_device *pdev)
 
        dev->flags = (uintptr_t)device_get_match_data(&pdev->dev);
        if (device_property_present(&pdev->dev, "wx,i2c-snps-model"))
-               dev->flags = MODEL_WANGXUN_SP;
+               dev->flags = MODEL_WANGXUN_SP | ACCESS_POLLING;
 
        dev->dev = &pdev->dev;
        dev->irq = irq;
index dfad5bad507550d00efa0ac282e18c4e4de44310..975c0b1c44deb61ae84bfbe877de390f01502c60 100644 (file)
@@ -57,6 +57,8 @@
 #define   HISI_I2C_FS_SPK_LEN_CNT      GENMASK(7, 0)
 #define HISI_I2C_HS_SPK_LEN            0x003c
 #define   HISI_I2C_HS_SPK_LEN_CNT      GENMASK(7, 0)
+#define HISI_I2C_TX_INT_CLR            0x0040
+#define   HISI_I2C_TX_AEMPTY_INT               BIT(0)
 #define HISI_I2C_INT_MSTAT             0x0044
 #define HISI_I2C_INT_CLR               0x0048
 #define HISI_I2C_INT_MASK              0x004C
@@ -124,6 +126,11 @@ static void hisi_i2c_clear_int(struct hisi_i2c_controller *ctlr, u32 mask)
        writel_relaxed(mask, ctlr->iobase + HISI_I2C_INT_CLR);
 }
 
+static void hisi_i2c_clear_tx_int(struct hisi_i2c_controller *ctlr, u32 mask)
+{
+       writel_relaxed(mask, ctlr->iobase + HISI_I2C_TX_INT_CLR);
+}
+
 static void hisi_i2c_handle_errors(struct hisi_i2c_controller *ctlr)
 {
        u32 int_err = ctlr->xfer_err, reg;
@@ -168,6 +175,7 @@ static int hisi_i2c_start_xfer(struct hisi_i2c_controller *ctlr)
        writel(reg, ctlr->iobase + HISI_I2C_FIFO_CTRL);
 
        hisi_i2c_clear_int(ctlr, HISI_I2C_INT_ALL);
+       hisi_i2c_clear_tx_int(ctlr, HISI_I2C_TX_AEMPTY_INT);
        hisi_i2c_enable_int(ctlr, HISI_I2C_INT_ALL);
 
        return 0;
@@ -266,7 +274,7 @@ static int hisi_i2c_read_rx_fifo(struct hisi_i2c_controller *ctlr)
 
 static void hisi_i2c_xfer_msg(struct hisi_i2c_controller *ctlr)
 {
-       int max_write = HISI_I2C_TX_FIFO_DEPTH;
+       int max_write = HISI_I2C_TX_FIFO_DEPTH - HISI_I2C_TX_F_AE_THRESH;
        bool need_restart = false, last_msg;
        struct i2c_msg *cur_msg;
        u32 cmd, fifo_state;
@@ -323,6 +331,8 @@ static void hisi_i2c_xfer_msg(struct hisi_i2c_controller *ctlr)
         */
        if (ctlr->msg_tx_idx == ctlr->msg_num)
                hisi_i2c_disable_int(ctlr, HISI_I2C_INT_TX_EMPTY);
+
+       hisi_i2c_clear_tx_int(ctlr, HISI_I2C_TX_AEMPTY_INT);
 }
 
 static irqreturn_t hisi_i2c_irq(int irq, void *context)
@@ -363,6 +373,7 @@ out:
        if (int_stat & HISI_I2C_INT_TRANS_CPLT) {
                hisi_i2c_disable_int(ctlr, HISI_I2C_INT_ALL);
                hisi_i2c_clear_int(ctlr, HISI_I2C_INT_ALL);
+               hisi_i2c_clear_tx_int(ctlr, HISI_I2C_TX_AEMPTY_INT);
                complete(ctlr->completion);
        }
 
index 274e987e4cfa0f9b90a576b83d2a96368b7f50a3..a6861660cb8ca771f38415c08561f0cbb232588b 100644 (file)
 #define STATUS_FLAGS           (SMBHSTSTS_BYTE_DONE | SMBHSTSTS_INTR | \
                                 STATUS_ERROR_FLAGS)
 
+#define SMBUS_LEN_SENTINEL (I2C_SMBUS_BLOCK_MAX + 1)
+
 /* Older devices have their ID defined in <linux/pci_ids.h> */
 #define PCI_DEVICE_ID_INTEL_COMETLAKE_SMBUS            0x02a3
 #define PCI_DEVICE_ID_INTEL_COMETLAKE_H_SMBUS          0x06a3
@@ -328,11 +330,39 @@ MODULE_PARM_DESC(disable_features, "Disable selected driver features:\n"
        "\t\t  0x10  don't use interrupts\n"
        "\t\t  0x20  disable SMBus Host Notify ");
 
+static int i801_get_block_len(struct i801_priv *priv)
+{
+       u8 len = inb_p(SMBHSTDAT0(priv));
+
+       if (len < 1 || len > I2C_SMBUS_BLOCK_MAX) {
+               pci_err(priv->pci_dev, "Illegal SMBus block read size %u\n", len);
+               return -EPROTO;
+       }
+
+       return len;
+}
+
+static int i801_check_and_clear_pec_error(struct i801_priv *priv)
+{
+       u8 status;
+
+       if (!(priv->features & FEATURE_SMBUS_PEC))
+               return 0;
+
+       status = inb_p(SMBAUXSTS(priv)) & SMBAUXSTS_CRCE;
+       if (status) {
+               outb_p(status, SMBAUXSTS(priv));
+               return -EBADMSG;
+       }
+
+       return 0;
+}
+
 /* Make sure the SMBus host is ready to start transmitting.
    Return 0 if it is, -EBUSY if it is not. */
 static int i801_check_pre(struct i801_priv *priv)
 {
-       int status;
+       int status, result;
 
        status = inb_p(SMBHSTSTS(priv));
        if (status & SMBHSTSTS_HOST_BUSY) {
@@ -353,13 +383,9 @@ static int i801_check_pre(struct i801_priv *priv)
         * the hardware was already in this state when the driver
         * started.
         */
-       if (priv->features & FEATURE_SMBUS_PEC) {
-               status = inb_p(SMBAUXSTS(priv)) & SMBAUXSTS_CRCE;
-               if (status) {
-                       pci_dbg(priv->pci_dev, "Clearing aux status flags (%02x)\n", status);
-                       outb_p(status, SMBAUXSTS(priv));
-               }
-       }
+       result = i801_check_and_clear_pec_error(priv);
+       if (result)
+               pci_dbg(priv->pci_dev, "Clearing aux status flag CRCE\n");
 
        return 0;
 }
@@ -408,14 +434,12 @@ static int i801_check_post(struct i801_priv *priv, int status)
                 * bit is harmless as long as it's cleared before
                 * the next operation.
                 */
-               if ((priv->features & FEATURE_SMBUS_PEC) &&
-                   (inb_p(SMBAUXSTS(priv)) & SMBAUXSTS_CRCE)) {
-                       outb_p(SMBAUXSTS_CRCE, SMBAUXSTS(priv));
-                       result = -EBADMSG;
-                       dev_dbg(&priv->pci_dev->dev, "PEC error\n");
+               result = i801_check_and_clear_pec_error(priv);
+               if (result) {
+                       pci_dbg(priv->pci_dev, "PEC error\n");
                } else {
                        result = -ENXIO;
-                       dev_dbg(&priv->pci_dev->dev, "No response\n");
+                       pci_dbg(priv->pci_dev, "No response\n");
                }
        }
        if (status & SMBHSTSTS_BUS_ERR) {
@@ -512,12 +536,11 @@ static int i801_block_transaction_by_block(struct i801_priv *priv,
 
        if (read_write == I2C_SMBUS_READ ||
            command == I2C_SMBUS_BLOCK_PROC_CALL) {
-               len = inb_p(SMBHSTDAT0(priv));
-               if (len < 1 || len > I2C_SMBUS_BLOCK_MAX) {
-                       status = -EPROTO;
+               status = i801_get_block_len(priv);
+               if (status < 0)
                        goto out;
-               }
 
+               len = status;
                data->block[0] = len;
                inb_p(SMBHSTCNT(priv)); /* reset the data buffer index */
                for (i = 0; i < len; i++)
@@ -531,17 +554,17 @@ out:
 static void i801_isr_byte_done(struct i801_priv *priv)
 {
        if (priv->is_read) {
-               /* For SMBus block reads, length is received with first byte */
-               if (((priv->cmd & 0x1c) == I801_BLOCK_DATA) &&
-                   (priv->count == 0)) {
-                       priv->len = inb_p(SMBHSTDAT0(priv));
-                       if (priv->len < 1 || priv->len > I2C_SMBUS_BLOCK_MAX) {
-                               dev_err(&priv->pci_dev->dev,
-                                       "Illegal SMBus block read size %d\n",
-                                       priv->len);
+               /*
+                * At transfer start i801_smbus_block_transaction() marks
+                * the block length as invalid. Check for this sentinel value
+                * and read the block length from SMBHSTDAT0.
+                */
+               if (priv->len == SMBUS_LEN_SENTINEL) {
+                       priv->len = i801_get_block_len(priv);
+                       if (priv->len < 0)
                                /* FIXME: Recover */
                                priv->len = I2C_SMBUS_BLOCK_MAX;
-                       }
+
                        priv->data[-1] = priv->len;
                }
 
@@ -688,13 +711,14 @@ static int i801_block_transaction_byte_by_byte(struct i801_priv *priv,
                if (status)
                        return status;
 
-               if (i == 1 && read_write == I2C_SMBUS_READ
-                && command != I2C_SMBUS_I2C_BLOCK_DATA) {
-                       len = inb_p(SMBHSTDAT0(priv));
-                       if (len < 1 || len > I2C_SMBUS_BLOCK_MAX) {
-                               dev_err(&priv->pci_dev->dev,
-                                       "Illegal SMBus block read size %d\n",
-                                       len);
+               /*
+                * At transfer start i801_smbus_block_transaction() marks
+                * the block length as invalid. Check for this sentinel value
+                * and read the block length from SMBHSTDAT0.
+                */
+               if (len == SMBUS_LEN_SENTINEL) {
+                       len = i801_get_block_len(priv);
+                       if (len < 0) {
                                /* Recover */
                                while (inb_p(SMBHSTSTS(priv)) &
                                       SMBHSTSTS_HOST_BUSY)
@@ -792,77 +816,66 @@ static int i801_simple_transaction(struct i801_priv *priv, union i2c_smbus_data
        return 0;
 }
 
-/* Block transaction function */
-static int i801_block_transaction(struct i801_priv *priv, union i2c_smbus_data *data,
-                                 u8 addr, u8 hstcmd, char read_write, int command)
+static int i801_smbus_block_transaction(struct i801_priv *priv, union i2c_smbus_data *data,
+                                       u8 addr, u8 hstcmd, char read_write, int command)
 {
-       int result = 0;
-       unsigned char hostc;
-
        if (read_write == I2C_SMBUS_READ && command == I2C_SMBUS_BLOCK_DATA)
-               data->block[0] = I2C_SMBUS_BLOCK_MAX;
+               /* Mark block length as invalid */
+               data->block[0] = SMBUS_LEN_SENTINEL;
        else if (data->block[0] < 1 || data->block[0] > I2C_SMBUS_BLOCK_MAX)
                return -EPROTO;
 
-       switch (command) {
-       case I2C_SMBUS_BLOCK_DATA:
-               i801_set_hstadd(priv, addr, read_write);
-               outb_p(hstcmd, SMBHSTCMD(priv));
-               break;
-       case I2C_SMBUS_I2C_BLOCK_DATA:
-               /*
-                * NB: page 240 of ICH5 datasheet shows that the R/#W
-                * bit should be cleared here, even when reading.
-                * However if SPD Write Disable is set (Lynx Point and later),
-                * the read will fail if we don't set the R/#W bit.
-                */
-               i801_set_hstadd(priv, addr,
-                               priv->original_hstcfg & SMBHSTCFG_SPD_WD ?
-                               read_write : I2C_SMBUS_WRITE);
-               if (read_write == I2C_SMBUS_READ) {
-                       /* NB: page 240 of ICH5 datasheet also shows
-                        * that DATA1 is the cmd field when reading
-                        */
-                       outb_p(hstcmd, SMBHSTDAT1(priv));
-               } else
-                       outb_p(hstcmd, SMBHSTCMD(priv));
-
-               if (read_write == I2C_SMBUS_WRITE) {
-                       /* set I2C_EN bit in configuration register */
-                       pci_read_config_byte(priv->pci_dev, SMBHSTCFG, &hostc);
-                       pci_write_config_byte(priv->pci_dev, SMBHSTCFG,
-                                             hostc | SMBHSTCFG_I2C_EN);
-               } else if (!(priv->features & FEATURE_I2C_BLOCK_READ)) {
-                       dev_err(&priv->pci_dev->dev,
-                               "I2C block read is unsupported!\n");
-                       return -EOPNOTSUPP;
-               }
-               break;
-       case I2C_SMBUS_BLOCK_PROC_CALL:
+       if (command == I2C_SMBUS_BLOCK_PROC_CALL)
                /* Needs to be flagged as write transaction */
                i801_set_hstadd(priv, addr, I2C_SMBUS_WRITE);
+       else
+               i801_set_hstadd(priv, addr, read_write);
+       outb_p(hstcmd, SMBHSTCMD(priv));
+
+       if (priv->features & FEATURE_BLOCK_BUFFER)
+               return i801_block_transaction_by_block(priv, data, read_write, command);
+       else
+               return i801_block_transaction_byte_by_byte(priv, data, read_write, command);
+}
+
+static int i801_i2c_block_transaction(struct i801_priv *priv, union i2c_smbus_data *data,
+                                     u8 addr, u8 hstcmd, char read_write, int command)
+{
+       int result;
+       u8 hostc;
+
+       if (data->block[0] < 1 || data->block[0] > I2C_SMBUS_BLOCK_MAX)
+               return -EPROTO;
+       /*
+        * NB: page 240 of ICH5 datasheet shows that the R/#W bit should be cleared here,
+        * even when reading. However if SPD Write Disable is set (Lynx Point and later),
+        * the read will fail if we don't set the R/#W bit.
+        */
+       i801_set_hstadd(priv, addr,
+                       priv->original_hstcfg & SMBHSTCFG_SPD_WD ? read_write : I2C_SMBUS_WRITE);
+
+       /* NB: page 240 of ICH5 datasheet shows that DATA1 is the cmd field when reading */
+       if (read_write == I2C_SMBUS_READ)
+               outb_p(hstcmd, SMBHSTDAT1(priv));
+       else
                outb_p(hstcmd, SMBHSTCMD(priv));
-               break;
+
+       if (read_write == I2C_SMBUS_WRITE) {
+               /* set I2C_EN bit in configuration register */
+               pci_read_config_byte(priv->pci_dev, SMBHSTCFG, &hostc);
+               pci_write_config_byte(priv->pci_dev, SMBHSTCFG, hostc | SMBHSTCFG_I2C_EN);
+       } else if (!(priv->features & FEATURE_I2C_BLOCK_READ)) {
+               pci_err(priv->pci_dev, "I2C block read is unsupported!\n");
+               return -EOPNOTSUPP;
        }
 
-       /* Experience has shown that the block buffer can only be used for
-          SMBus (not I2C) block transactions, even though the datasheet
-          doesn't mention this limitation. */
-       if ((priv->features & FEATURE_BLOCK_BUFFER) &&
-           command != I2C_SMBUS_I2C_BLOCK_DATA)
-               result = i801_block_transaction_by_block(priv, data,
-                                                        read_write,
-                                                        command);
-       else
-               result = i801_block_transaction_byte_by_byte(priv, data,
-                                                            read_write,
-                                                            command);
+       /* Block buffer isn't supported for I2C block transactions */
+       result = i801_block_transaction_byte_by_byte(priv, data, read_write, command);
 
-       if (command == I2C_SMBUS_I2C_BLOCK_DATA
-        && read_write == I2C_SMBUS_WRITE) {
-               /* restore saved configuration register value */
+       /* restore saved configuration register value */
+       if (read_write == I2C_SMBUS_WRITE)
                pci_write_config_byte(priv->pci_dev, SMBHSTCFG, hostc);
-       }
+
        return result;
 }
 
@@ -893,10 +906,10 @@ static s32 i801_access(struct i2c_adapter *adap, u16 addr,
                outb_p(inb_p(SMBAUXCTL(priv)) & (~SMBAUXCTL_CRC),
                       SMBAUXCTL(priv));
 
-       if (size == I2C_SMBUS_BLOCK_DATA ||
-           size == I2C_SMBUS_I2C_BLOCK_DATA ||
-           size == I2C_SMBUS_BLOCK_PROC_CALL)
-               ret = i801_block_transaction(priv, data, addr, command, read_write, size);
+       if (size == I2C_SMBUS_BLOCK_DATA || size == I2C_SMBUS_BLOCK_PROC_CALL)
+               ret = i801_smbus_block_transaction(priv, data, addr, command, read_write, size);
+       else if (size == I2C_SMBUS_I2C_BLOCK_DATA)
+               ret = i801_i2c_block_transaction(priv, data, addr, command, read_write, size);
        else
                ret = i801_simple_transaction(priv, data, addr, command, read_write, size);
 
@@ -969,11 +982,10 @@ static const struct i2c_algorithm smbus_algorithm = {
        .functionality  = i801_func,
 };
 
-#define FEATURES_ICH5  (FEATURE_BLOCK_PROC | FEATURE_I2C_BLOCK_READ    | \
-                        FEATURE_IRQ | FEATURE_SMBUS_PEC                | \
-                        FEATURE_BLOCK_BUFFER | FEATURE_HOST_NOTIFY)
 #define FEATURES_ICH4  (FEATURE_SMBUS_PEC | FEATURE_BLOCK_BUFFER | \
                         FEATURE_HOST_NOTIFY)
+#define FEATURES_ICH5  (FEATURES_ICH4 | FEATURE_BLOCK_PROC | \
+                        FEATURE_I2C_BLOCK_READ | FEATURE_IRQ)
 
 static const struct pci_device_id i801_ids[] = {
        { PCI_DEVICE_DATA(INTEL, 82801AA_3,                     0)                               },
@@ -1117,7 +1129,7 @@ static void dmi_check_onboard_devices(const struct dmi_header *dm, void *adap)
 {
        int i, count;
 
-       if (dm->type != 10)
+       if (dm->type != DMI_ENTRY_ONBOARD_DEVICE)
                return;
 
        count = (dm->length - sizeof(struct dmi_header)) / 2;
@@ -1447,8 +1459,7 @@ static inline void i801_del_mux(struct i801_priv *priv) { }
 #endif
 
 static struct platform_device *
-i801_add_tco_spt(struct i801_priv *priv, struct pci_dev *pci_dev,
-                struct resource *tco_res)
+i801_add_tco_spt(struct pci_dev *pci_dev, struct resource *tco_res)
 {
        static const struct itco_wdt_platform_data pldata = {
                .name = "Intel PCH",
@@ -1479,8 +1490,7 @@ i801_add_tco_spt(struct i801_priv *priv, struct pci_dev *pci_dev,
 }
 
 static struct platform_device *
-i801_add_tco_cnl(struct i801_priv *priv, struct pci_dev *pci_dev,
-                struct resource *tco_res)
+i801_add_tco_cnl(struct pci_dev *pci_dev, struct resource *tco_res)
 {
        static const struct itco_wdt_platform_data pldata = {
                .name = "Intel PCH",
@@ -1520,9 +1530,9 @@ static void i801_add_tco(struct i801_priv *priv)
        res->flags = IORESOURCE_IO;
 
        if (priv->features & FEATURE_TCO_CNL)
-               priv->tco_pdev = i801_add_tco_cnl(priv, pci_dev, tco_res);
+               priv->tco_pdev = i801_add_tco_cnl(pci_dev, tco_res);
        else
-               priv->tco_pdev = i801_add_tco_spt(priv, pci_dev, tco_res);
+               priv->tco_pdev = i801_add_tco_spt(pci_dev, tco_res);
 
        if (IS_ERR(priv->tco_pdev))
                dev_warn(&pci_dev->dev, "failed to create iTCO device\n");
index 678b30e90492ad17294453a5e6c482007946e8f5..6d72e4e126dde6ba07770b20b358ba74dbda78b8 100644 (file)
@@ -106,6 +106,7 @@ struct lpi2c_imx_struct {
        unsigned int            txfifosize;
        unsigned int            rxfifosize;
        enum lpi2c_imx_mode     mode;
+       struct i2c_bus_recovery_info rinfo;
 };
 
 static void lpi2c_imx_intctrl(struct lpi2c_imx_struct *lpi2c_imx,
@@ -133,6 +134,8 @@ static int lpi2c_imx_bus_busy(struct lpi2c_imx_struct *lpi2c_imx)
 
                if (time_after(jiffies, orig_jiffies + msecs_to_jiffies(500))) {
                        dev_dbg(&lpi2c_imx->adapter.dev, "bus not work\n");
+                       if (lpi2c_imx->adapter.bus_recovery_info)
+                               i2c_recover_bus(&lpi2c_imx->adapter);
                        return -ETIMEDOUT;
                }
                schedule();
@@ -190,6 +193,8 @@ static void lpi2c_imx_stop(struct lpi2c_imx_struct *lpi2c_imx)
 
                if (time_after(jiffies, orig_jiffies + msecs_to_jiffies(500))) {
                        dev_dbg(&lpi2c_imx->adapter.dev, "stop timeout\n");
+                       if (lpi2c_imx->adapter.bus_recovery_info)
+                               i2c_recover_bus(&lpi2c_imx->adapter);
                        break;
                }
                schedule();
@@ -325,6 +330,8 @@ static int lpi2c_imx_txfifo_empty(struct lpi2c_imx_struct *lpi2c_imx)
 
                if (time_after(jiffies, orig_jiffies + msecs_to_jiffies(500))) {
                        dev_dbg(&lpi2c_imx->adapter.dev, "txfifo empty timeout\n");
+                       if (lpi2c_imx->adapter.bus_recovery_info)
+                               i2c_recover_bus(&lpi2c_imx->adapter);
                        return -ETIMEDOUT;
                }
                schedule();
@@ -526,6 +533,20 @@ static irqreturn_t lpi2c_imx_isr(int irq, void *dev_id)
        return IRQ_HANDLED;
 }
 
+static int lpi2c_imx_init_recovery_info(struct lpi2c_imx_struct *lpi2c_imx,
+                                 struct platform_device *pdev)
+{
+       struct i2c_bus_recovery_info *bri = &lpi2c_imx->rinfo;
+
+       bri->pinctrl = devm_pinctrl_get(&pdev->dev);
+       if (IS_ERR(bri->pinctrl))
+               return PTR_ERR(bri->pinctrl);
+
+       lpi2c_imx->adapter.bus_recovery_info = bri;
+
+       return 0;
+}
+
 static u32 lpi2c_imx_func(struct i2c_adapter *adapter)
 {
        return I2C_FUNC_I2C | I2C_FUNC_SMBUS_EMUL |
@@ -600,6 +621,12 @@ static int lpi2c_imx_probe(struct platform_device *pdev)
        lpi2c_imx->txfifosize = 1 << (temp & 0x0f);
        lpi2c_imx->rxfifosize = 1 << ((temp >> 8) & 0x0f);
 
+       /* Init optional bus recovery function */
+       ret = lpi2c_imx_init_recovery_info(lpi2c_imx, pdev);
+       /* Give it another chance if pinctrl used is not ready yet */
+       if (ret == -EPROBE_DEFER)
+               goto rpm_disable;
+
        ret = i2c_add_adapter(&lpi2c_imx->adapter);
        if (ret)
                goto rpm_disable;
index 60e813137f8442895b19c6e9d871252cc32c7f24..3842e527116b799b02860a8807c44d49b5c72375 100644 (file)
@@ -212,10 +212,6 @@ struct imx_i2c_struct {
        const struct imx_i2c_hwdata     *hwdata;
        struct i2c_bus_recovery_info rinfo;
 
-       struct pinctrl *pinctrl;
-       struct pinctrl_state *pinctrl_pins_default;
-       struct pinctrl_state *pinctrl_pins_gpio;
-
        struct imx_i2c_dma      *dma;
        struct i2c_client       *slave;
        enum i2c_slave_event last_slave_event;
@@ -1362,24 +1358,6 @@ static int i2c_imx_xfer_atomic(struct i2c_adapter *adapter,
        return result;
 }
 
-static void i2c_imx_prepare_recovery(struct i2c_adapter *adap)
-{
-       struct imx_i2c_struct *i2c_imx;
-
-       i2c_imx = container_of(adap, struct imx_i2c_struct, adapter);
-
-       pinctrl_select_state(i2c_imx->pinctrl, i2c_imx->pinctrl_pins_gpio);
-}
-
-static void i2c_imx_unprepare_recovery(struct i2c_adapter *adap)
-{
-       struct imx_i2c_struct *i2c_imx;
-
-       i2c_imx = container_of(adap, struct imx_i2c_struct, adapter);
-
-       pinctrl_select_state(i2c_imx->pinctrl, i2c_imx->pinctrl_pins_default);
-}
-
 /*
  * We switch SCL and SDA to their GPIO function and do some bitbanging
  * for bus recovery. These alternative pinmux settings can be
@@ -1390,43 +1368,13 @@ static void i2c_imx_unprepare_recovery(struct i2c_adapter *adap)
 static int i2c_imx_init_recovery_info(struct imx_i2c_struct *i2c_imx,
                struct platform_device *pdev)
 {
-       struct i2c_bus_recovery_info *rinfo = &i2c_imx->rinfo;
-
-       i2c_imx->pinctrl = devm_pinctrl_get(&pdev->dev);
-       if (!i2c_imx->pinctrl) {
-               dev_info(&pdev->dev, "pinctrl unavailable, bus recovery not supported\n");
-               return 0;
-       }
-       if (IS_ERR(i2c_imx->pinctrl)) {
-               dev_info(&pdev->dev, "can't get pinctrl, bus recovery not supported\n");
-               return PTR_ERR(i2c_imx->pinctrl);
-       }
-
-       i2c_imx->pinctrl_pins_default = pinctrl_lookup_state(i2c_imx->pinctrl,
-                       PINCTRL_STATE_DEFAULT);
-       i2c_imx->pinctrl_pins_gpio = pinctrl_lookup_state(i2c_imx->pinctrl,
-                       "gpio");
-       rinfo->sda_gpiod = devm_gpiod_get_optional(&pdev->dev, "sda", GPIOD_IN);
-       rinfo->scl_gpiod = devm_gpiod_get(&pdev->dev, "scl", GPIOD_OUT_HIGH_OPEN_DRAIN);
-
-       if (PTR_ERR(rinfo->sda_gpiod) == -EPROBE_DEFER ||
-           PTR_ERR(rinfo->scl_gpiod) == -EPROBE_DEFER) {
-               return -EPROBE_DEFER;
-       } else if (IS_ERR(rinfo->sda_gpiod) ||
-                  IS_ERR(rinfo->scl_gpiod) ||
-                  IS_ERR(i2c_imx->pinctrl_pins_default) ||
-                  IS_ERR(i2c_imx->pinctrl_pins_gpio)) {
-               dev_dbg(&pdev->dev, "recovery information incomplete\n");
-               return 0;
-       }
+       struct i2c_bus_recovery_info *bri = &i2c_imx->rinfo;
 
-       dev_dbg(&pdev->dev, "using scl%s for recovery\n",
-               rinfo->sda_gpiod ? ",sda" : "");
+       bri->pinctrl = devm_pinctrl_get(&pdev->dev);
+       if (IS_ERR(bri->pinctrl))
+               return PTR_ERR(bri->pinctrl);
 
-       rinfo->prepare_recovery = i2c_imx_prepare_recovery;
-       rinfo->unprepare_recovery = i2c_imx_unprepare_recovery;
-       rinfo->recover_bus = i2c_generic_scl_recovery;
-       i2c_imx->adapter.bus_recovery_info = rinfo;
+       i2c_imx->adapter.bus_recovery_info = bri;
 
        return 0;
 }
index e4e4995ab22430e0cdb50b19167feb3d638bb92a..8d73c0f405ed567ca980331d86ed56eee29d4f53 100644 (file)
@@ -30,8 +30,6 @@
 #include <asm/mpc85xx.h>
 #include <sysdev/fsl_soc.h>
 
-#define DRV_NAME "mpc-i2c"
-
 #define MPC_I2C_CLOCK_LEGACY   0
 #define MPC_I2C_CLOCK_PRESERVE (~0U)
 
@@ -844,14 +842,14 @@ static int fsl_i2c_probe(struct platform_device *op)
                        mpc_i2c_setup_8xxx(op->dev.of_node, i2c, clock);
        }
 
-       /*
-        * "fsl,timeout" has been marked as deprecated and, to maintain
-        * backward compatibility, we will only look for it if
-        * "i2c-scl-clk-low-timeout-us" is not present.
-        */
+       /* Sadly, we have to support two deprecated bindings here */
        result = of_property_read_u32(op->dev.of_node,
-                                     "i2c-scl-clk-low-timeout-us",
+                                     "i2c-transfer-timeout-us",
                                      &mpc_ops.timeout);
+       if (result == -EINVAL)
+               result = of_property_read_u32(op->dev.of_node,
+                                             "i2c-scl-clk-low-timeout-us",
+                                             &mpc_ops.timeout);
        if (result == -EINVAL)
                result = of_property_read_u32(op->dev.of_node,
                                              "fsl,timeout", &mpc_ops.timeout);
@@ -960,7 +958,7 @@ static struct platform_driver mpc_i2c_driver = {
        .probe          = fsl_i2c_probe,
        .remove_new     = fsl_i2c_remove,
        .driver = {
-               .name = DRV_NAME,
+               .name = "mpc-i2c",
                .of_match_table = mpc_i2c_of_match,
                .pm = &mpc_i2c_pm_ops,
        },
index 54181b3f1919625a83010efbb7b439f8fd2316b7..2fe68615942efeb3b3b6d8df28ac53a1a38d4136 100644 (file)
@@ -1264,9 +1264,6 @@ static int npcm_i2c_reg_slave(struct i2c_client *client)
 
        bus->slave = client;
 
-       if (!bus->slave)
-               return -EINVAL;
-
        if (client->flags & I2C_CLIENT_TEN)
                return -EAFNOSUPPORT;
 
index 5adbe62cf6212865aa6df5bb532f9def35196e4a..c65ac3d7eadc5b58c5d9e29d9a6ec0d894e2e844 100644 (file)
@@ -773,7 +773,7 @@ static int sh_mobile_i2c_r8a7740_workaround(struct sh_mobile_i2c_data *pd)
        iic_wr(pd, ICCR, ICCR_TRS);
        udelay(10);
 
-       return sh_mobile_i2c_init(pd);
+       return sh_mobile_i2c_v2_init(pd);
 }
 
 static const struct sh_mobile_dt_config default_dt_config = {
@@ -782,11 +782,6 @@ static const struct sh_mobile_dt_config default_dt_config = {
 };
 
 static const struct sh_mobile_dt_config fast_clock_dt_config = {
-       .clks_per_count = 2,
-       .setup = sh_mobile_i2c_init,
-};
-
-static const struct sh_mobile_dt_config v2_freq_calc_dt_config = {
        .clks_per_count = 2,
        .setup = sh_mobile_i2c_v2_init,
 };
@@ -799,17 +794,17 @@ static const struct sh_mobile_dt_config r8a7740_dt_config = {
 static const struct of_device_id sh_mobile_i2c_dt_ids[] = {
        { .compatible = "renesas,iic-r8a73a4", .data = &fast_clock_dt_config },
        { .compatible = "renesas,iic-r8a7740", .data = &r8a7740_dt_config },
-       { .compatible = "renesas,iic-r8a774c0", .data = &v2_freq_calc_dt_config },
-       { .compatible = "renesas,iic-r8a7790", .data = &v2_freq_calc_dt_config },
-       { .compatible = "renesas,iic-r8a7791", .data = &v2_freq_calc_dt_config },
-       { .compatible = "renesas,iic-r8a7792", .data = &v2_freq_calc_dt_config },
-       { .compatible = "renesas,iic-r8a7793", .data = &v2_freq_calc_dt_config },
-       { .compatible = "renesas,iic-r8a7794", .data = &v2_freq_calc_dt_config },
-       { .compatible = "renesas,iic-r8a7795", .data = &v2_freq_calc_dt_config },
-       { .compatible = "renesas,iic-r8a77990", .data = &v2_freq_calc_dt_config },
+       { .compatible = "renesas,iic-r8a774c0", .data = &fast_clock_dt_config },
+       { .compatible = "renesas,iic-r8a7790", .data = &fast_clock_dt_config },
+       { .compatible = "renesas,iic-r8a7791", .data = &fast_clock_dt_config },
+       { .compatible = "renesas,iic-r8a7792", .data = &fast_clock_dt_config },
+       { .compatible = "renesas,iic-r8a7793", .data = &fast_clock_dt_config },
+       { .compatible = "renesas,iic-r8a7794", .data = &fast_clock_dt_config },
+       { .compatible = "renesas,iic-r8a7795", .data = &fast_clock_dt_config },
+       { .compatible = "renesas,iic-r8a77990", .data = &fast_clock_dt_config },
        { .compatible = "renesas,iic-sh73a0", .data = &fast_clock_dt_config },
-       { .compatible = "renesas,rcar-gen2-iic", .data = &v2_freq_calc_dt_config },
-       { .compatible = "renesas,rcar-gen3-iic", .data = &v2_freq_calc_dt_config },
+       { .compatible = "renesas,rcar-gen2-iic", .data = &fast_clock_dt_config },
+       { .compatible = "renesas,rcar-gen3-iic", .data = &fast_clock_dt_config },
        { .compatible = "renesas,rmobile-iic", .data = &default_dt_config },
        {},
 };
index c52d1bec60b4cd4090c5acf11783e4cbc88ec308..28c88901d9bcd1db6e856983c8a81194e3eb163e 100644 (file)
@@ -570,7 +570,7 @@ err_rpm_put:
        return ret;
 }
 
-static int sprd_i2c_remove(struct platform_device *pdev)
+static void sprd_i2c_remove(struct platform_device *pdev)
 {
        struct sprd_i2c *i2c_dev = platform_get_drvdata(pdev);
        int ret;
@@ -586,8 +586,6 @@ static int sprd_i2c_remove(struct platform_device *pdev)
 
        pm_runtime_put_noidle(i2c_dev->dev);
        pm_runtime_disable(i2c_dev->dev);
-
-       return 0;
 }
 
 static int __maybe_unused sprd_i2c_suspend_noirq(struct device *dev)
@@ -645,7 +643,7 @@ MODULE_DEVICE_TABLE(of, sprd_i2c_of_match);
 
 static struct platform_driver sprd_i2c_driver = {
        .probe = sprd_i2c_probe,
-       .remove = sprd_i2c_remove,
+       .remove_new = sprd_i2c_remove,
        .driver = {
                   .name = "sprd-i2c",
                   .of_match_table = sprd_i2c_of_match,
index 3bd48d4b6318fe1fe83e3718c59713f354ff9878..ff5c486a1dbb1fa4a17d0d3f74885a7e3a0a69ec 100644 (file)
@@ -701,7 +701,7 @@ const struct bus_type i2c_bus_type = {
 };
 EXPORT_SYMBOL_GPL(i2c_bus_type);
 
-struct device_type i2c_client_type = {
+const struct device_type i2c_client_type = {
        .groups         = i2c_dev_groups,
        .uevent         = i2c_device_uevent,
        .release        = i2c_client_dev_release,
@@ -1343,7 +1343,7 @@ static struct attribute *i2c_adapter_attrs[] = {
 };
 ATTRIBUTE_GROUPS(i2c_adapter);
 
-struct device_type i2c_adapter_type = {
+const struct device_type i2c_adapter_type = {
        .groups         = i2c_adapter_groups,
        .release        = i2c_adapter_dev_release,
 };
index 74807c6db596d810fffe035268875a61b1074881..97f338b123b11872402df5d84ef7c20ef33454f2 100644 (file)
@@ -351,13 +351,18 @@ void i2c_register_spd(struct i2c_adapter *adap)
        if (!dimm_count)
                return;
 
-       dev_info(&adap->dev, "%d/%d memory slots populated (from DMI)\n",
-                dimm_count, slot_count);
-
-       if (slot_count > 8) {
-               dev_warn(&adap->dev,
-                        "Systems with more than 8 memory slots not supported yet, not instantiating SPD\n");
-               return;
+       /*
+        * If we're a child adapter on a muxed segment, then limit slots to 8,
+        * as this is the max number of SPD EEPROMs that can be addressed per bus.
+        */
+       if (i2c_parent_is_i2c_adapter(adap)) {
+               slot_count = 8;
+       } else {
+               if (slot_count > 8) {
+                       dev_warn(&adap->dev,
+                                "More than 8 memory slots on a single bus, contact i801 maintainer to add missing mux config\n");
+                       return;
+               }
        }
 
        /*
index 3dda00f1df78da36e29a335d8da13181ea16df7c..4c6ed1d58c79a311bee5634bd6d7fbc6140676d3 100644 (file)
@@ -187,7 +187,7 @@ static struct platform_driver mlxcpld_mux_driver = {
 
 module_platform_driver(mlxcpld_mux_driver);
 
-MODULE_AUTHOR("Michael Shych (michaels@mellanox.com)");
+MODULE_AUTHOR("Michael Shych <michaels@mellanox.com>");
 MODULE_DESCRIPTION("Mellanox I2C-CPLD-MUX driver");
 MODULE_LICENSE("Dual BSD/GPL");
 MODULE_ALIAS("platform:i2c-mux-mlxcpld");
index 2219062104fbca4fcd23b89012090c7c30602790..f5dfc33b97c0ab94dfee90ae29d81c69395f180b 100644 (file)
 
 #define PCA954X_IRQ_OFFSET 4
 
+/*
+ * MAX7357's configuration register is writeable after POR, but
+ * can be locked by setting the basic mode bit. MAX7358 configuration
+ * register is locked by default and needs to be unlocked first.
+ * The configuration register holds the following settings:
+ */
+#define MAX7357_CONF_INT_ENABLE                        BIT(0)
+#define MAX7357_CONF_FLUSH_OUT                 BIT(1)
+#define MAX7357_CONF_RELEASE_INT               BIT(2)
+#define MAX7357_CONF_DISCON_SINGLE_CHAN                BIT(4)
+#define MAX7357_CONF_PRECONNECT_TEST           BIT(7)
+
+#define MAX7357_POR_DEFAULT_CONF               MAX7357_CONF_INT_ENABLE
+
 enum pca_type {
        max_7356,
        max_7357,
@@ -470,7 +484,34 @@ static int pca954x_init(struct i2c_client *client, struct pca954x *data)
        else
                data->last_chan = 0; /* Disconnect multiplexer */
 
-       ret = i2c_smbus_write_byte(client, data->last_chan);
+       if (device_is_compatible(&client->dev, "maxim,max7357")) {
+               if (i2c_check_functionality(client->adapter, I2C_FUNC_SMBUS_WRITE_BYTE_DATA)) {
+                       u8 conf = MAX7357_POR_DEFAULT_CONF;
+                       /*
+                        * The interrupt signal is shared with the reset pin. Release the
+                        * interrupt after 1.6 seconds to allow using the pin as reset.
+                        */
+                       conf |= MAX7357_CONF_RELEASE_INT;
+
+                       if (device_property_read_bool(&client->dev, "maxim,isolate-stuck-channel"))
+                               conf |= MAX7357_CONF_DISCON_SINGLE_CHAN;
+                       if (device_property_read_bool(&client->dev,
+                                                     "maxim,send-flush-out-sequence"))
+                               conf |= MAX7357_CONF_FLUSH_OUT;
+                       if (device_property_read_bool(&client->dev,
+                                                     "maxim,preconnection-wiggle-test-enable"))
+                               conf |= MAX7357_CONF_PRECONNECT_TEST;
+
+                       ret = i2c_smbus_write_byte_data(client, data->last_chan, conf);
+               } else {
+                       dev_warn(&client->dev, "Write byte data not supported."
+                                "Cannot enable enhanced mode features\n");
+                       ret = i2c_smbus_write_byte(client, data->last_chan);
+               }
+       } else {
+               ret = i2c_smbus_write_byte(client, data->last_chan);
+       }
+
        if (ret < 0)
                data->last_chan = 0;
 
index 908a807badaf9c0266bc33756d66091b1ee8368c..4d99a3524171b9908315bb243fee9c1a3666eb8c 100644 (file)
@@ -10,7 +10,7 @@
 
 #include <linux/i3c/master.h>
 
-extern struct bus_type i3c_bus_type;
+extern const struct bus_type i3c_bus_type;
 
 void i3c_bus_normaluse_lock(struct i3c_bus *bus);
 void i3c_bus_normaluse_unlock(struct i3c_bus *bus);
index 3afa530c5e3220fa96c7af96a4c90f7c673393f4..f32c591ae325cf555625c8b2d1a42678cee42ee0 100644 (file)
@@ -335,7 +335,7 @@ static void i3c_device_remove(struct device *dev)
        i3c_device_free_ibi(i3cdev);
 }
 
-struct bus_type i3c_bus_type = {
+const struct bus_type i3c_bus_type = {
        .name = "i3c",
        .match = i3c_device_match,
        .probe = i3c_device_probe,
index ef5751e91cc9eff0a9a75e663e6d408a87209070..276153e10f5a404a0453e42e37f3b4ea3896b98d 100644 (file)
@@ -1163,8 +1163,10 @@ static void dw_i3c_master_set_sir_enabled(struct dw_i3c_master *master,
                global = reg == 0xffffffff;
                reg &= ~BIT(idx);
        } else {
-               global = reg == 0;
+               bool hj_rejected = !!(readl(master->regs + DEVICE_CTRL) & DEV_CTRL_HOT_JOIN_NACK);
+
                reg |= BIT(idx);
+               global = (reg == 0xffffffff) && hj_rejected;
        }
        writel(reg, master->regs + IBI_SIR_REQ_REJECT);
 
index 34f416a3ebcb7c8af4cf0252e3235f41a97e7892..cfcc81c47b50f5b70a28c629dd71e72caeeca1c6 100644 (file)
@@ -38,7 +38,7 @@ static DEFINE_MUTEX(gameport_mutex);
 
 static LIST_HEAD(gameport_list);
 
-static struct bus_type gameport_bus;
+static const struct bus_type gameport_bus;
 
 static void gameport_add_port(struct gameport *gameport);
 static void gameport_attach_driver(struct gameport_driver *drv);
@@ -813,7 +813,7 @@ static int gameport_bus_match(struct device *dev, struct device_driver *drv)
        return !gameport_drv->ignore;
 }
 
-static struct bus_type gameport_bus = {
+static const struct bus_type gameport_bus = {
        .name           = "gameport",
        .dev_groups     = gameport_device_groups,
        .drv_groups     = gameport_driver_groups,
index 0e935914bc3aa1ba7cea72a2a6c1e8e01a4a321c..6bbf3806ea37214564f881669dcbd47405e6c6ec 100644 (file)
 #define VT_TRIGGER(_name)      .trigger = NULL
 #endif
 
+#if IS_ENABLED(CONFIG_SND_CTL_LED)
+#define AUDIO_TRIGGER(_name)   .trigger = _name
+#else
+#define AUDIO_TRIGGER(_name)   .trigger = NULL
+#endif
+
 static const struct {
        const char *name;
        const char *trigger;
@@ -29,7 +35,7 @@ static const struct {
        [LED_KANA]      = { "kana", VT_TRIGGER("kbd-kanalock") },
        [LED_SLEEP]     = { "sleep" } ,
        [LED_SUSPEND]   = { "suspend" },
-       [LED_MUTE]      = { "mute" },
+       [LED_MUTE]      = { "mute", AUDIO_TRIGGER("audio-mute") },
        [LED_MISC]      = { "misc" },
        [LED_MAIL]      = { "mail" },
        [LED_CHARGING]  = { "charging" },
index f71ea4fb173fdd2950cd6a271e1975e930578ce1..7114854375678a71a007a4b06d0413a1dda275c6 100644 (file)
@@ -1918,7 +1918,7 @@ static char *input_devnode(const struct device *dev, umode_t *mode)
        return kasprintf(GFP_KERNEL, "input/%s", dev_name(dev));
 }
 
-struct class input_class = {
+const struct class input_class = {
        .name           = "input",
        .devnode        = input_devnode,
 };
@@ -2629,17 +2629,15 @@ int input_get_new_minor(int legacy_base, unsigned int legacy_num,
         * locking is needed here.
         */
        if (legacy_base >= 0) {
-               int minor = ida_simple_get(&input_ida,
-                                          legacy_base,
-                                          legacy_base + legacy_num,
-                                          GFP_KERNEL);
+               int minor = ida_alloc_range(&input_ida, legacy_base,
+                                           legacy_base + legacy_num - 1,
+                                           GFP_KERNEL);
                if (minor >= 0 || !allow_dynamic)
                        return minor;
        }
 
-       return ida_simple_get(&input_ida,
-                             INPUT_FIRST_DYNAMIC_DEV, INPUT_MAX_CHAR_DEVICES,
-                             GFP_KERNEL);
+       return ida_alloc_range(&input_ida, INPUT_FIRST_DYNAMIC_DEV,
+                              INPUT_MAX_CHAR_DEVICES - 1, GFP_KERNEL);
 }
 EXPORT_SYMBOL(input_get_new_minor);
 
@@ -2652,7 +2650,7 @@ EXPORT_SYMBOL(input_get_new_minor);
  */
 void input_free_minor(unsigned int minor)
 {
-       ida_simple_remove(&input_ida, minor);
+       ida_free(&input_ida, minor);
 }
 EXPORT_SYMBOL(input_free_minor);
 
index 14c828adebf7829269b7bace9b1bcbda0c7c506c..f50848ed5575dbe143b159f5c16864fc0cc7b25c 100644 (file)
@@ -127,6 +127,7 @@ static const struct xpad_device {
        u8 mapping;
        u8 xtype;
 } xpad_device[] = {
+       /* Please keep this list sorted by vendor and product ID. */
        { 0x0079, 0x18d4, "GPD Win 2 X-Box Controller", 0, XTYPE_XBOX360 },
        { 0x03eb, 0xff01, "Wooting One (Legacy)", 0, XTYPE_XBOX360 },
        { 0x03eb, 0xff02, "Wooting Two (Legacy)", 0, XTYPE_XBOX360 },
@@ -152,9 +153,9 @@ static const struct xpad_device {
        { 0x045e, 0x02d1, "Microsoft X-Box One pad", 0, XTYPE_XBOXONE },
        { 0x045e, 0x02dd, "Microsoft X-Box One pad (Firmware 2015)", 0, XTYPE_XBOXONE },
        { 0x045e, 0x02e3, "Microsoft X-Box One Elite pad", MAP_PADDLES, XTYPE_XBOXONE },
-       { 0x045e, 0x0b00, "Microsoft X-Box One Elite 2 pad", MAP_PADDLES, XTYPE_XBOXONE },
        { 0x045e, 0x02ea, "Microsoft X-Box One S pad", 0, XTYPE_XBOXONE },
        { 0x045e, 0x0719, "Xbox 360 Wireless Receiver", MAP_DPAD_TO_BUTTONS, XTYPE_XBOX360W },
+       { 0x045e, 0x0b00, "Microsoft X-Box One Elite 2 pad", MAP_PADDLES, XTYPE_XBOXONE },
        { 0x045e, 0x0b0a, "Microsoft X-Box Adaptive Controller", MAP_PROFILE_BUTTON, XTYPE_XBOXONE },
        { 0x045e, 0x0b12, "Microsoft Xbox Series S|X Controller", MAP_SELECT_BUTTON, XTYPE_XBOXONE },
        { 0x046d, 0xc21d, "Logitech Gamepad F310", 0, XTYPE_XBOX360 },
@@ -340,7 +341,6 @@ static const struct xpad_device {
        { 0x20d6, 0x2001, "BDA Xbox Series X Wired Controller", 0, XTYPE_XBOXONE },
        { 0x20d6, 0x2009, "PowerA Enhanced Wired Controller for Xbox Series X|S", 0, XTYPE_XBOXONE },
        { 0x20d6, 0x281f, "PowerA Wired Controller For Xbox 360", 0, XTYPE_XBOX360 },
-       { 0x2e24, 0x0652, "Hyperkin Duke X-Box One pad", 0, XTYPE_XBOXONE },
        { 0x24c6, 0x5000, "Razer Atrox Arcade Stick", MAP_TRIGGERS_TO_BUTTONS, XTYPE_XBOX360 },
        { 0x24c6, 0x5300, "PowerA MINI PROEX Controller", 0, XTYPE_XBOX360 },
        { 0x24c6, 0x5303, "Xbox Airflo wired controller", 0, XTYPE_XBOX360 },
@@ -355,9 +355,9 @@ static const struct xpad_device {
        { 0x24c6, 0x5502, "Hori Fighting Stick VX Alt", MAP_TRIGGERS_TO_BUTTONS, XTYPE_XBOX360 },
        { 0x24c6, 0x5503, "Hori Fighting Edge", MAP_TRIGGERS_TO_BUTTONS, XTYPE_XBOX360 },
        { 0x24c6, 0x5506, "Hori SOULCALIBUR V Stick", 0, XTYPE_XBOX360 },
-       { 0x24c6, 0x5510, "Hori Fighting Commander ONE (Xbox 360/PC Mode)", MAP_TRIGGERS_TO_BUTTONS, XTYPE_XBOX360 },
        { 0x24c6, 0x550d, "Hori GEM Xbox controller", 0, XTYPE_XBOX360 },
        { 0x24c6, 0x550e, "Hori Real Arcade Pro V Kai 360", MAP_TRIGGERS_TO_BUTTONS, XTYPE_XBOX360 },
+       { 0x24c6, 0x5510, "Hori Fighting Commander ONE (Xbox 360/PC Mode)", MAP_TRIGGERS_TO_BUTTONS, XTYPE_XBOX360 },
        { 0x24c6, 0x551a, "PowerA FUSION Pro Controller", 0, XTYPE_XBOXONE },
        { 0x24c6, 0x561a, "PowerA FUSION Controller", 0, XTYPE_XBOXONE },
        { 0x24c6, 0x5b00, "ThrustMaster Ferrari 458 Racing Wheel", 0, XTYPE_XBOX360 },
@@ -366,8 +366,11 @@ static const struct xpad_device {
        { 0x24c6, 0x5d04, "Razer Sabertooth", 0, XTYPE_XBOX360 },
        { 0x24c6, 0xfafe, "Rock Candy Gamepad for Xbox 360", 0, XTYPE_XBOX360 },
        { 0x2563, 0x058d, "OneXPlayer Gamepad", 0, XTYPE_XBOX360 },
+       { 0x294b, 0x3303, "Snakebyte GAMEPAD BASE X", 0, XTYPE_XBOXONE },
+       { 0x294b, 0x3404, "Snakebyte GAMEPAD RGB X", 0, XTYPE_XBOXONE },
        { 0x2dc8, 0x2000, "8BitDo Pro 2 Wired Controller fox Xbox", 0, XTYPE_XBOXONE },
        { 0x2dc8, 0x3106, "8BitDo Pro 2 Wired Controller", 0, XTYPE_XBOX360 },
+       { 0x2e24, 0x0652, "Hyperkin Duke X-Box One pad", 0, XTYPE_XBOXONE },
        { 0x31e3, 0x1100, "Wooting One", 0, XTYPE_XBOX360 },
        { 0x31e3, 0x1200, "Wooting Two", 0, XTYPE_XBOX360 },
        { 0x31e3, 0x1210, "Wooting Lekker", 0, XTYPE_XBOX360 },
@@ -465,6 +468,10 @@ static const signed short xpad_btn_paddles[] = {
        { XPAD_XBOXONE_VENDOR_PROTOCOL((vend), 208) }
 
 static const struct usb_device_id xpad_table[] = {
+       /*
+        * Please keep this list sorted by vendor ID. Note that there are 2
+        * macros - XPAD_XBOX360_VENDOR and XPAD_XBOXONE_VENDOR.
+        */
        { USB_INTERFACE_INFO('X', 'B', 0) },    /* Xbox USB-IF not-approved class */
        XPAD_XBOX360_VENDOR(0x0079),            /* GPD Win 2 controller */
        XPAD_XBOX360_VENDOR(0x03eb),            /* Wooting Keyboards (Legacy) */
@@ -507,6 +514,7 @@ static const struct usb_device_id xpad_table[] = {
        XPAD_XBOXONE_VENDOR(0x24c6),            /* PowerA controllers */
        XPAD_XBOX360_VENDOR(0x2563),            /* OneXPlayer Gamepad */
        XPAD_XBOX360_VENDOR(0x260d),            /* Dareu H101 */
+       XPAD_XBOXONE_VENDOR(0x294b),            /* Snakebyte */
        XPAD_XBOX360_VENDOR(0x2c22),            /* Qanba Controllers */
        XPAD_XBOX360_VENDOR(0x2dc8),            /* 8BitDo Pro 2 Wired Controller */
        XPAD_XBOXONE_VENDOR(0x2dc8),            /* 8BitDo Pro 2 Wired Controller for Xbox */
index f3c3746acd4cf96a0ecdc905550bafb608cda4d9..6b46f83a9edb8e2ad09e04a7902845a2f78db5c1 100644 (file)
@@ -418,7 +418,7 @@ static struct platform_driver bcm_kp_device_driver = {
        .probe          = bcm_kp_probe,
        .driver         = {
                .name   = "bcm-keypad",
-               .of_match_table = of_match_ptr(bcm_kp_of_match),
+               .of_match_table = bcm_kp_of_match,
        }
 };
 
index 50fa764c82d2b3e9bb0e6670066dd18a7c938a84..695c03e075b56fd29e40667df22bdbffa56f1cca 100644 (file)
@@ -28,7 +28,9 @@ struct matrix_keypad {
        struct input_dev *input_dev;
        unsigned int row_shift;
 
-       DECLARE_BITMAP(disabled_gpios, MATRIX_MAX_ROWS);
+       unsigned int row_irqs[MATRIX_MAX_ROWS];
+       unsigned int num_row_irqs;
+       DECLARE_BITMAP(wakeup_enabled_irqs, MATRIX_MAX_ROWS);
 
        uint32_t last_key_state[MATRIX_MAX_COLS];
        struct delayed_work work;
@@ -85,28 +87,18 @@ static bool row_asserted(const struct matrix_keypad_platform_data *pdata,
 
 static void enable_row_irqs(struct matrix_keypad *keypad)
 {
-       const struct matrix_keypad_platform_data *pdata = keypad->pdata;
        int i;
 
-       if (pdata->clustered_irq > 0)
-               enable_irq(pdata->clustered_irq);
-       else {
-               for (i = 0; i < pdata->num_row_gpios; i++)
-                       enable_irq(gpio_to_irq(pdata->row_gpios[i]));
-       }
+       for (i = 0; i < keypad->num_row_irqs; i++)
+               enable_irq(keypad->row_irqs[i]);
 }
 
 static void disable_row_irqs(struct matrix_keypad *keypad)
 {
-       const struct matrix_keypad_platform_data *pdata = keypad->pdata;
        int i;
 
-       if (pdata->clustered_irq > 0)
-               disable_irq_nosync(pdata->clustered_irq);
-       else {
-               for (i = 0; i < pdata->num_row_gpios; i++)
-                       disable_irq_nosync(gpio_to_irq(pdata->row_gpios[i]));
-       }
+       for (i = 0; i < keypad->num_row_irqs; i++)
+               disable_irq_nosync(keypad->row_irqs[i]);
 }
 
 /*
@@ -232,44 +224,20 @@ static void matrix_keypad_stop(struct input_dev *dev)
 
 static void matrix_keypad_enable_wakeup(struct matrix_keypad *keypad)
 {
-       const struct matrix_keypad_platform_data *pdata = keypad->pdata;
-       unsigned int gpio;
        int i;
 
-       if (pdata->clustered_irq > 0) {
-               if (enable_irq_wake(pdata->clustered_irq) == 0)
-                       keypad->gpio_all_disabled = true;
-       } else {
-
-               for (i = 0; i < pdata->num_row_gpios; i++) {
-                       if (!test_bit(i, keypad->disabled_gpios)) {
-                               gpio = pdata->row_gpios[i];
-
-                               if (enable_irq_wake(gpio_to_irq(gpio)) == 0)
-                                       __set_bit(i, keypad->disabled_gpios);
-                       }
-               }
-       }
+       for_each_clear_bit(i, keypad->wakeup_enabled_irqs, keypad->num_row_irqs)
+               if (enable_irq_wake(keypad->row_irqs[i]) == 0)
+                       __set_bit(i, keypad->wakeup_enabled_irqs);
 }
 
 static void matrix_keypad_disable_wakeup(struct matrix_keypad *keypad)
 {
-       const struct matrix_keypad_platform_data *pdata = keypad->pdata;
-       unsigned int gpio;
        int i;
 
-       if (pdata->clustered_irq > 0) {
-               if (keypad->gpio_all_disabled) {
-                       disable_irq_wake(pdata->clustered_irq);
-                       keypad->gpio_all_disabled = false;
-               }
-       } else {
-               for (i = 0; i < pdata->num_row_gpios; i++) {
-                       if (test_and_clear_bit(i, keypad->disabled_gpios)) {
-                               gpio = pdata->row_gpios[i];
-                               disable_irq_wake(gpio_to_irq(gpio));
-                       }
-               }
+       for_each_set_bit(i, keypad->wakeup_enabled_irqs, keypad->num_row_irqs) {
+               disable_irq_wake(keypad->row_irqs[i]);
+               __clear_bit(i, keypad->wakeup_enabled_irqs);
        }
 }
 
@@ -306,96 +274,83 @@ static int matrix_keypad_init_gpio(struct platform_device *pdev,
                                   struct matrix_keypad *keypad)
 {
        const struct matrix_keypad_platform_data *pdata = keypad->pdata;
-       int i, err;
+       int i, irq, err;
 
        /* initialized strobe lines as outputs, activated */
        for (i = 0; i < pdata->num_col_gpios; i++) {
-               err = gpio_request(pdata->col_gpios[i], "matrix_kbd_col");
+               err = devm_gpio_request(&pdev->dev,
+                                       pdata->col_gpios[i], "matrix_kbd_col");
                if (err) {
                        dev_err(&pdev->dev,
                                "failed to request GPIO%d for COL%d\n",
                                pdata->col_gpios[i], i);
-                       goto err_free_cols;
+                       return err;
                }
 
                gpio_direction_output(pdata->col_gpios[i], !pdata->active_low);
        }
 
        for (i = 0; i < pdata->num_row_gpios; i++) {
-               err = gpio_request(pdata->row_gpios[i], "matrix_kbd_row");
+               err = devm_gpio_request(&pdev->dev,
+                                       pdata->row_gpios[i], "matrix_kbd_row");
                if (err) {
                        dev_err(&pdev->dev,
                                "failed to request GPIO%d for ROW%d\n",
                                pdata->row_gpios[i], i);
-                       goto err_free_rows;
+                       return err;
                }
 
                gpio_direction_input(pdata->row_gpios[i]);
        }
 
        if (pdata->clustered_irq > 0) {
-               err = request_any_context_irq(pdata->clustered_irq,
+               err = devm_request_any_context_irq(&pdev->dev,
+                               pdata->clustered_irq,
                                matrix_keypad_interrupt,
                                pdata->clustered_irq_flags,
                                "matrix-keypad", keypad);
                if (err < 0) {
                        dev_err(&pdev->dev,
                                "Unable to acquire clustered interrupt\n");
-                       goto err_free_rows;
+                       return err;
                }
+
+               keypad->row_irqs[0] = pdata->clustered_irq;
+               keypad->num_row_irqs = 1;
        } else {
                for (i = 0; i < pdata->num_row_gpios; i++) {
-                       err = request_any_context_irq(
-                                       gpio_to_irq(pdata->row_gpios[i]),
+                       irq = gpio_to_irq(pdata->row_gpios[i]);
+                       if (irq < 0) {
+                               err = irq;
+                               dev_err(&pdev->dev,
+                                       "Unable to convert GPIO line %i to irq: %d\n",
+                                       pdata->row_gpios[i], err);
+                               return err;
+                       }
+
+                       err = devm_request_any_context_irq(&pdev->dev,
+                                       irq,
                                        matrix_keypad_interrupt,
                                        IRQF_TRIGGER_RISING |
-                                       IRQF_TRIGGER_FALLING,
+                                               IRQF_TRIGGER_FALLING,
                                        "matrix-keypad", keypad);
                        if (err < 0) {
                                dev_err(&pdev->dev,
                                        "Unable to acquire interrupt for GPIO line %i\n",
                                        pdata->row_gpios[i]);
-                               goto err_free_irqs;
+                               return err;
                        }
+
+                       keypad->row_irqs[i] = irq;
                }
+
+               keypad->num_row_irqs = pdata->num_row_gpios;
        }
 
        /* initialized as disabled - enabled by input->open */
        disable_row_irqs(keypad);
-       return 0;
-
-err_free_irqs:
-       while (--i >= 0)
-               free_irq(gpio_to_irq(pdata->row_gpios[i]), keypad);
-       i = pdata->num_row_gpios;
-err_free_rows:
-       while (--i >= 0)
-               gpio_free(pdata->row_gpios[i]);
-       i = pdata->num_col_gpios;
-err_free_cols:
-       while (--i >= 0)
-               gpio_free(pdata->col_gpios[i]);
-
-       return err;
-}
-
-static void matrix_keypad_free_gpio(struct matrix_keypad *keypad)
-{
-       const struct matrix_keypad_platform_data *pdata = keypad->pdata;
-       int i;
 
-       if (pdata->clustered_irq > 0) {
-               free_irq(pdata->clustered_irq, keypad);
-       } else {
-               for (i = 0; i < pdata->num_row_gpios; i++)
-                       free_irq(gpio_to_irq(pdata->row_gpios[i]), keypad);
-       }
-
-       for (i = 0; i < pdata->num_row_gpios; i++)
-               gpio_free(pdata->row_gpios[i]);
-
-       for (i = 0; i < pdata->num_col_gpios; i++)
-               gpio_free(pdata->col_gpios[i]);
+       return 0;
 }
 
 #ifdef CONFIG_OF
@@ -494,12 +449,13 @@ static int matrix_keypad_probe(struct platform_device *pdev)
                return -EINVAL;
        }
 
-       keypad = kzalloc(sizeof(struct matrix_keypad), GFP_KERNEL);
-       input_dev = input_allocate_device();
-       if (!keypad || !input_dev) {
-               err = -ENOMEM;
-               goto err_free_mem;
-       }
+       keypad = devm_kzalloc(&pdev->dev, sizeof(*keypad), GFP_KERNEL);
+       if (!keypad)
+               return -ENOMEM;
+
+       input_dev = devm_input_allocate_device(&pdev->dev);
+       if (!input_dev)
+               return -ENOMEM;
 
        keypad->input_dev = input_dev;
        keypad->pdata = pdata;
@@ -510,7 +466,6 @@ static int matrix_keypad_probe(struct platform_device *pdev)
 
        input_dev->name         = pdev->name;
        input_dev->id.bustype   = BUS_HOST;
-       input_dev->dev.parent   = &pdev->dev;
        input_dev->open         = matrix_keypad_start;
        input_dev->close        = matrix_keypad_stop;
 
@@ -520,7 +475,7 @@ static int matrix_keypad_probe(struct platform_device *pdev)
                                         NULL, input_dev);
        if (err) {
                dev_err(&pdev->dev, "failed to build keymap\n");
-               goto err_free_mem;
+               return -ENOMEM;
        }
 
        if (!pdata->no_autorepeat)
@@ -530,32 +485,16 @@ static int matrix_keypad_probe(struct platform_device *pdev)
 
        err = matrix_keypad_init_gpio(pdev, keypad);
        if (err)
-               goto err_free_mem;
+               return err;
 
        err = input_register_device(keypad->input_dev);
        if (err)
-               goto err_free_gpio;
+               return err;
 
        device_init_wakeup(&pdev->dev, pdata->wakeup);
        platform_set_drvdata(pdev, keypad);
 
        return 0;
-
-err_free_gpio:
-       matrix_keypad_free_gpio(keypad);
-err_free_mem:
-       input_free_device(input_dev);
-       kfree(keypad);
-       return err;
-}
-
-static void matrix_keypad_remove(struct platform_device *pdev)
-{
-       struct matrix_keypad *keypad = platform_get_drvdata(pdev);
-
-       matrix_keypad_free_gpio(keypad);
-       input_unregister_device(keypad->input_dev);
-       kfree(keypad);
 }
 
 #ifdef CONFIG_OF
@@ -568,7 +507,6 @@ MODULE_DEVICE_TABLE(of, matrix_keypad_dt_match);
 
 static struct platform_driver matrix_keypad_driver = {
        .probe          = matrix_keypad_probe,
-       .remove_new     = matrix_keypad_remove,
        .driver         = {
                .name   = "matrix-keypad",
                .pm     = pm_sleep_ptr(&matrix_keypad_pm_ops),
index 31f0702c3d01e54f74ad3b28b8d9469745145600..4b0685f9611389a6e5de313179b08458a53b9ebd 100644 (file)
@@ -1,22 +1,10 @@
+// SPDX-License-Identifier: GPL-2.0-only
 /*
  * Marvell 88PM80x ONKEY driver
  *
  * Copyright (C) 2012 Marvell International Ltd.
  * Haojian Zhuang <haojian.zhuang@marvell.com>
  * Qiao Zhou <zhouqiao@marvell.com>
- *
- * This file is subject to the terms and conditions of the GNU General
- * Public License. See the file "COPYING" in the main directory of this
- * archive for more details.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, write to the Free Software
- * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
  */
 
 #include <linux/kernel.h>
index 36aeeae7761101de277ffbfcbc93fba402c51590..9ca5a743f19feb1cd3f466299af1b8861a3f9374 100644 (file)
@@ -620,6 +620,118 @@ static const struct iqs7222_dev_desc iqs7222_devs[] = {
                        },
                },
        },
+       {
+               .prod_num = IQS7222_PROD_NUM_D,
+               .fw_major = 1,
+               .fw_minor = 2,
+               .touch_link = 1770,
+               .allow_offset = 9,
+               .event_offset = 10,
+               .comms_offset = 11,
+               .reg_grps = {
+                       [IQS7222_REG_GRP_STAT] = {
+                               .base = IQS7222_SYS_STATUS,
+                               .num_row = 1,
+                               .num_col = 7,
+                       },
+                       [IQS7222_REG_GRP_CYCLE] = {
+                               .base = 0x8000,
+                               .num_row = 7,
+                               .num_col = 2,
+                       },
+                       [IQS7222_REG_GRP_GLBL] = {
+                               .base = 0x8700,
+                               .num_row = 1,
+                               .num_col = 3,
+                       },
+                       [IQS7222_REG_GRP_BTN] = {
+                               .base = 0x9000,
+                               .num_row = 14,
+                               .num_col = 3,
+                       },
+                       [IQS7222_REG_GRP_CHAN] = {
+                               .base = 0xA000,
+                               .num_row = 14,
+                               .num_col = 4,
+                       },
+                       [IQS7222_REG_GRP_FILT] = {
+                               .base = 0xAE00,
+                               .num_row = 1,
+                               .num_col = 2,
+                       },
+                       [IQS7222_REG_GRP_TPAD] = {
+                               .base = 0xB000,
+                               .num_row = 1,
+                               .num_col = 24,
+                       },
+                       [IQS7222_REG_GRP_GPIO] = {
+                               .base = 0xC000,
+                               .num_row = 3,
+                               .num_col = 3,
+                       },
+                       [IQS7222_REG_GRP_SYS] = {
+                               .base = IQS7222_SYS_SETUP,
+                               .num_row = 1,
+                               .num_col = 12,
+                       },
+               },
+       },
+       {
+               .prod_num = IQS7222_PROD_NUM_D,
+               .fw_major = 1,
+               .fw_minor = 1,
+               .touch_link = 1774,
+               .allow_offset = 9,
+               .event_offset = 10,
+               .comms_offset = 11,
+               .reg_grps = {
+                       [IQS7222_REG_GRP_STAT] = {
+                               .base = IQS7222_SYS_STATUS,
+                               .num_row = 1,
+                               .num_col = 7,
+                       },
+                       [IQS7222_REG_GRP_CYCLE] = {
+                               .base = 0x8000,
+                               .num_row = 7,
+                               .num_col = 2,
+                       },
+                       [IQS7222_REG_GRP_GLBL] = {
+                               .base = 0x8700,
+                               .num_row = 1,
+                               .num_col = 3,
+                       },
+                       [IQS7222_REG_GRP_BTN] = {
+                               .base = 0x9000,
+                               .num_row = 14,
+                               .num_col = 3,
+                       },
+                       [IQS7222_REG_GRP_CHAN] = {
+                               .base = 0xA000,
+                               .num_row = 14,
+                               .num_col = 4,
+                       },
+                       [IQS7222_REG_GRP_FILT] = {
+                               .base = 0xAE00,
+                               .num_row = 1,
+                               .num_col = 2,
+                       },
+                       [IQS7222_REG_GRP_TPAD] = {
+                               .base = 0xB000,
+                               .num_row = 1,
+                               .num_col = 24,
+                       },
+                       [IQS7222_REG_GRP_GPIO] = {
+                               .base = 0xC000,
+                               .num_row = 3,
+                               .num_col = 3,
+                       },
+                       [IQS7222_REG_GRP_SYS] = {
+                               .base = IQS7222_SYS_SETUP,
+                               .num_row = 1,
+                               .num_col = 12,
+                       },
+               },
+       },
        {
                .prod_num = IQS7222_PROD_NUM_D,
                .fw_major = 0,
index 32cc4c62a716cdd25d02aa154f0f6830b41d8220..833b643f06164920eda67705356e7c9321ac512b 100644 (file)
@@ -439,16 +439,4 @@ config MOUSE_SYNAPTICS_USB
          To compile this driver as a module, choose M here: the
          module will be called synaptics_usb.
 
-config MOUSE_NAVPOINT_PXA27x
-       tristate "Synaptics NavPoint (PXA27x SSP/SPI)"
-       depends on PXA27x && PXA_SSP
-       help
-         This driver adds support for the Synaptics NavPoint touchpad connected
-         to a PXA27x SSP port in SPI slave mode. The device emulates a mouse;
-         a tap or tap-and-a-half drag gesture emulates the left mouse button.
-         For example, use the xf86-input-evdev driver for an X pointing device.
-
-         To compile this driver as a module, choose M here: the
-         module will be called navpoint.
-
 endif
index 92b3204ce84e3c43f7ff8f21f9823dff2dd04574..a1336d5bee6f33dd1733b66e77d6229134f14fdb 100644 (file)
@@ -15,7 +15,6 @@ obj-$(CONFIG_MOUSE_GPIO)              += gpio_mouse.o
 obj-$(CONFIG_MOUSE_INPORT)             += inport.o
 obj-$(CONFIG_MOUSE_LOGIBM)             += logibm.o
 obj-$(CONFIG_MOUSE_MAPLE)              += maplemouse.o
-obj-$(CONFIG_MOUSE_NAVPOINT_PXA27x)    += navpoint.o
 obj-$(CONFIG_MOUSE_PC110PAD)           += pc110pad.o
 obj-$(CONFIG_MOUSE_PS2)                        += psmouse.o
 obj-$(CONFIG_MOUSE_RISCPC)             += rpcmouse.o
diff --git a/drivers/input/mouse/navpoint.c b/drivers/input/mouse/navpoint.c
deleted file mode 100644 (file)
index ba75778..0000000
+++ /dev/null
@@ -1,350 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0-only
-/*
- * Synaptics NavPoint (PXA27x SSP/SPI) driver.
- *
- * Copyright (C) 2012 Paul Parsons <lost.distance@yahoo.com>
- */
-
-#include <linux/kernel.h>
-#include <linux/module.h>
-#include <linux/platform_device.h>
-#include <linux/clk.h>
-#include <linux/delay.h>
-#include <linux/gpio/consumer.h>
-#include <linux/input.h>
-#include <linux/input/navpoint.h>
-#include <linux/interrupt.h>
-#include <linux/mutex.h>
-#include <linux/pxa2xx_ssp.h>
-#include <linux/slab.h>
-
-/*
- * Synaptics Modular Embedded Protocol: Module Packet Format.
- * Module header byte 2:0 = Length (# bytes that follow)
- * Module header byte 4:3 = Control
- * Module header byte 7:5 = Module Address
- */
-#define HEADER_LENGTH(byte)    ((byte) & 0x07)
-#define HEADER_CONTROL(byte)   (((byte) >> 3) & 0x03)
-#define HEADER_ADDRESS(byte)   ((byte) >> 5)
-
-struct navpoint {
-       struct ssp_device       *ssp;
-       struct input_dev        *input;
-       struct device           *dev;
-       struct gpio_desc        *gpiod;
-       int                     index;
-       u8                      data[1 + HEADER_LENGTH(0xff)];
-};
-
-/*
- * Initialization values for SSCR0_x, SSCR1_x, SSSR_x.
- */
-static const u32 sscr0 = 0
-       | SSCR0_TUM             /* TIM = 1; No TUR interrupts */
-       | SSCR0_RIM             /* RIM = 1; No ROR interrupts */
-       | SSCR0_SSE             /* SSE = 1; SSP enabled */
-       | SSCR0_Motorola        /* FRF = 0; Motorola SPI */
-       | SSCR0_DataSize(16)    /* DSS = 15; Data size = 16-bit */
-       ;
-static const u32 sscr1 = 0
-       | SSCR1_SCFR            /* SCFR = 1; SSPSCLK only during transfers */
-       | SSCR1_SCLKDIR         /* SCLKDIR = 1; Slave mode */
-       | SSCR1_SFRMDIR         /* SFRMDIR = 1; Slave mode */
-       | SSCR1_RWOT            /* RWOT = 1; Receive without transmit mode */
-       | SSCR1_RxTresh(1)      /* RFT = 0; Receive FIFO threshold = 1 */
-       | SSCR1_SPH             /* SPH = 1; SSPSCLK inactive 0.5 + 1 cycles */
-       | SSCR1_RIE             /* RIE = 1; Receive FIFO interrupt enabled */
-       ;
-static const u32 sssr = 0
-       | SSSR_BCE              /* BCE = 1; Clear BCE */
-       | SSSR_TUR              /* TUR = 1; Clear TUR */
-       | SSSR_EOC              /* EOC = 1; Clear EOC */
-       | SSSR_TINT             /* TINT = 1; Clear TINT */
-       | SSSR_PINT             /* PINT = 1; Clear PINT */
-       | SSSR_ROR              /* ROR = 1; Clear ROR */
-       ;
-
-/*
- * MEP Query $22: Touchpad Coordinate Range Query is not supported by
- * the NavPoint module, so sampled values provide the default limits.
- */
-#define NAVPOINT_X_MIN         1278
-#define NAVPOINT_X_MAX         5340
-#define NAVPOINT_Y_MIN         1572
-#define NAVPOINT_Y_MAX         4396
-#define NAVPOINT_PRESSURE_MIN  0
-#define NAVPOINT_PRESSURE_MAX  255
-
-static void navpoint_packet(struct navpoint *navpoint)
-{
-       int finger;
-       int gesture;
-       int x, y, z;
-
-       switch (navpoint->data[0]) {
-       case 0xff:      /* Garbage (packet?) between reset and Hello packet */
-       case 0x00:      /* Module 0, NULL packet */
-               break;
-
-       case 0x0e:      /* Module 0, Absolute packet */
-               finger = (navpoint->data[1] & 0x01);
-               gesture = (navpoint->data[1] & 0x02);
-               x = ((navpoint->data[2] & 0x1f) << 8) | navpoint->data[3];
-               y = ((navpoint->data[4] & 0x1f) << 8) | navpoint->data[5];
-               z = navpoint->data[6];
-               input_report_key(navpoint->input, BTN_TOUCH, finger);
-               input_report_abs(navpoint->input, ABS_X, x);
-               input_report_abs(navpoint->input, ABS_Y, y);
-               input_report_abs(navpoint->input, ABS_PRESSURE, z);
-               input_report_key(navpoint->input, BTN_TOOL_FINGER, finger);
-               input_report_key(navpoint->input, BTN_LEFT, gesture);
-               input_sync(navpoint->input);
-               break;
-
-       case 0x19:      /* Module 0, Hello packet */
-               if ((navpoint->data[1] & 0xf0) == 0x10)
-                       break;
-               fallthrough;
-       default:
-               dev_warn(navpoint->dev,
-                        "spurious packet: data=0x%02x,0x%02x,...\n",
-                        navpoint->data[0], navpoint->data[1]);
-               break;
-       }
-}
-
-static irqreturn_t navpoint_irq(int irq, void *dev_id)
-{
-       struct navpoint *navpoint = dev_id;
-       struct ssp_device *ssp = navpoint->ssp;
-       irqreturn_t ret = IRQ_NONE;
-       u32 status;
-
-       status = pxa_ssp_read_reg(ssp, SSSR);
-       if (status & sssr) {
-               dev_warn(navpoint->dev,
-                        "unexpected interrupt: status=0x%08x\n", status);
-               pxa_ssp_write_reg(ssp, SSSR, (status & sssr));
-               ret = IRQ_HANDLED;
-       }
-
-       while (status & SSSR_RNE) {
-               u32 data;
-
-               data = pxa_ssp_read_reg(ssp, SSDR);
-               navpoint->data[navpoint->index + 0] = (data >> 8);
-               navpoint->data[navpoint->index + 1] = data;
-               navpoint->index += 2;
-               if (HEADER_LENGTH(navpoint->data[0]) < navpoint->index) {
-                       navpoint_packet(navpoint);
-                       navpoint->index = 0;
-               }
-               status = pxa_ssp_read_reg(ssp, SSSR);
-               ret = IRQ_HANDLED;
-       }
-
-       return ret;
-}
-
-static void navpoint_up(struct navpoint *navpoint)
-{
-       struct ssp_device *ssp = navpoint->ssp;
-       int timeout;
-
-       clk_prepare_enable(ssp->clk);
-
-       pxa_ssp_write_reg(ssp, SSCR1, sscr1);
-       pxa_ssp_write_reg(ssp, SSSR, sssr);
-       pxa_ssp_write_reg(ssp, SSTO, 0);
-       pxa_ssp_write_reg(ssp, SSCR0, sscr0);   /* SSCR0_SSE written last */
-
-       /* Wait until SSP port is ready for slave clock operations */
-       for (timeout = 100; timeout != 0; --timeout) {
-               if (!(pxa_ssp_read_reg(ssp, SSSR) & SSSR_CSS))
-                       break;
-               msleep(1);
-       }
-
-       if (timeout == 0)
-               dev_err(navpoint->dev,
-                       "timeout waiting for SSSR[CSS] to clear\n");
-
-       gpiod_set_value(navpoint->gpiod, 1);
-}
-
-static void navpoint_down(struct navpoint *navpoint)
-{
-       struct ssp_device *ssp = navpoint->ssp;
-
-       gpiod_set_value(navpoint->gpiod, 0);
-
-       pxa_ssp_write_reg(ssp, SSCR0, 0);
-
-       clk_disable_unprepare(ssp->clk);
-}
-
-static int navpoint_open(struct input_dev *input)
-{
-       struct navpoint *navpoint = input_get_drvdata(input);
-
-       navpoint_up(navpoint);
-
-       return 0;
-}
-
-static void navpoint_close(struct input_dev *input)
-{
-       struct navpoint *navpoint = input_get_drvdata(input);
-
-       navpoint_down(navpoint);
-}
-
-static int navpoint_probe(struct platform_device *pdev)
-{
-       const struct navpoint_platform_data *pdata =
-                                       dev_get_platdata(&pdev->dev);
-       struct ssp_device *ssp;
-       struct input_dev *input;
-       struct navpoint *navpoint;
-       int error;
-
-       if (!pdata) {
-               dev_err(&pdev->dev, "no platform data\n");
-               return -EINVAL;
-       }
-
-       ssp = pxa_ssp_request(pdata->port, pdev->name);
-       if (!ssp)
-               return -ENODEV;
-
-       /* HaRET does not disable devices before jumping into Linux */
-       if (pxa_ssp_read_reg(ssp, SSCR0) & SSCR0_SSE) {
-               pxa_ssp_write_reg(ssp, SSCR0, 0);
-               dev_warn(&pdev->dev, "ssp%d already enabled\n", pdata->port);
-       }
-
-       navpoint = kzalloc(sizeof(*navpoint), GFP_KERNEL);
-       input = input_allocate_device();
-       if (!navpoint || !input) {
-               error = -ENOMEM;
-               goto err_free_mem;
-       }
-
-       navpoint->gpiod = gpiod_get_optional(&pdev->dev,
-                                            NULL, GPIOD_OUT_LOW);
-       if (IS_ERR(navpoint->gpiod)) {
-               error = PTR_ERR(navpoint->gpiod);
-               dev_err(&pdev->dev, "error getting GPIO\n");
-               goto err_free_mem;
-       }
-       gpiod_set_consumer_name(navpoint->gpiod, "SYNAPTICS_ON");
-
-       navpoint->ssp = ssp;
-       navpoint->input = input;
-       navpoint->dev = &pdev->dev;
-
-       input->name = pdev->name;
-       input->dev.parent = &pdev->dev;
-
-       __set_bit(EV_KEY, input->evbit);
-       __set_bit(EV_ABS, input->evbit);
-       __set_bit(BTN_LEFT, input->keybit);
-       __set_bit(BTN_TOUCH, input->keybit);
-       __set_bit(BTN_TOOL_FINGER, input->keybit);
-
-       input_set_abs_params(input, ABS_X,
-                            NAVPOINT_X_MIN, NAVPOINT_X_MAX, 0, 0);
-       input_set_abs_params(input, ABS_Y,
-                            NAVPOINT_Y_MIN, NAVPOINT_Y_MAX, 0, 0);
-       input_set_abs_params(input, ABS_PRESSURE,
-                            NAVPOINT_PRESSURE_MIN, NAVPOINT_PRESSURE_MAX,
-                            0, 0);
-
-       input->open = navpoint_open;
-       input->close = navpoint_close;
-
-       input_set_drvdata(input, navpoint);
-
-       error = request_irq(ssp->irq, navpoint_irq, 0, pdev->name, navpoint);
-       if (error)
-               goto err_free_mem;
-
-       error = input_register_device(input);
-       if (error)
-               goto err_free_irq;
-
-       platform_set_drvdata(pdev, navpoint);
-       dev_dbg(&pdev->dev, "ssp%d, irq %d\n", pdata->port, ssp->irq);
-
-       return 0;
-
-err_free_irq:
-       free_irq(ssp->irq, navpoint);
-err_free_mem:
-       input_free_device(input);
-       kfree(navpoint);
-       pxa_ssp_free(ssp);
-
-       return error;
-}
-
-static void navpoint_remove(struct platform_device *pdev)
-{
-       struct navpoint *navpoint = platform_get_drvdata(pdev);
-       struct ssp_device *ssp = navpoint->ssp;
-
-       free_irq(ssp->irq, navpoint);
-
-       input_unregister_device(navpoint->input);
-       kfree(navpoint);
-
-       pxa_ssp_free(ssp);
-}
-
-static int navpoint_suspend(struct device *dev)
-{
-       struct platform_device *pdev = to_platform_device(dev);
-       struct navpoint *navpoint = platform_get_drvdata(pdev);
-       struct input_dev *input = navpoint->input;
-
-       mutex_lock(&input->mutex);
-       if (input_device_enabled(input))
-               navpoint_down(navpoint);
-       mutex_unlock(&input->mutex);
-
-       return 0;
-}
-
-static int navpoint_resume(struct device *dev)
-{
-       struct platform_device *pdev = to_platform_device(dev);
-       struct navpoint *navpoint = platform_get_drvdata(pdev);
-       struct input_dev *input = navpoint->input;
-
-       mutex_lock(&input->mutex);
-       if (input_device_enabled(input))
-               navpoint_up(navpoint);
-       mutex_unlock(&input->mutex);
-
-       return 0;
-}
-
-static DEFINE_SIMPLE_DEV_PM_OPS(navpoint_pm_ops,
-                               navpoint_suspend, navpoint_resume);
-
-static struct platform_driver navpoint_driver = {
-       .probe          = navpoint_probe,
-       .remove_new     = navpoint_remove,
-       .driver = {
-               .name   = "navpoint",
-               .pm     = pm_sleep_ptr(&navpoint_pm_ops),
-       },
-};
-
-module_platform_driver(navpoint_driver);
-
-MODULE_AUTHOR("Paul Parsons <lost.distance@yahoo.com>");
-MODULE_DESCRIPTION("Synaptics NavPoint (PXA27x SSP/SPI) driver");
-MODULE_LICENSE("GPL");
-MODULE_ALIAS("platform:navpoint");
index 1b45b1d3077de72d6f78c608287c52f0d09773f3..343030290d78899a1c964e3e0e83b69aa58a10ab 100644 (file)
@@ -344,7 +344,7 @@ static int rmi_bus_match(struct device *dev, struct device_driver *drv)
        return physical || rmi_function_match(dev, drv);
 }
 
-struct bus_type rmi_bus_type = {
+const struct bus_type rmi_bus_type = {
        .match          = rmi_bus_match,
        .name           = "rmi4",
 };
index 25df6320f9f1d92fd6b972de489b9c4db5dfd040..ea46ad9447eceba35398107fec880253089065df 100644 (file)
@@ -185,7 +185,7 @@ static inline int rmi_write_block(struct rmi_device *d, u16 addr,
 
 int rmi_for_each_dev(void *data, int (*func)(struct device *dev, void *data));
 
-extern struct bus_type rmi_bus_type;
+extern const struct bus_type rmi_bus_type;
 
 int rmi_of_property_read_u32(struct device *dev, u32 *result,
                                const char *prop, bool optional);
index 42eaebb3bf5cc82efabccff777a8ee23b016bf49..ef9ea295f9e035fd3bb5ea733a009ba72f32a833 100644 (file)
@@ -1196,7 +1196,11 @@ static int rmi_driver_probe(struct device *dev)
                }
                rmi_driver_set_input_params(rmi_dev, data->input);
                data->input->phys = devm_kasprintf(dev, GFP_KERNEL,
-                                               "%s/input0", dev_name(dev));
+                                                  "%s/input0", dev_name(dev));
+               if (!data->input->phys) {
+                       retval = -ENOMEM;
+                       goto err;
+               }
        }
 
        retval = rmi_init_functions(data);
index 767fc9efb4a863ce773354045470aab723159af6..a8838b52262762848af4267a0e15fd41e34aefe7 100644 (file)
@@ -1007,7 +1007,7 @@ irqreturn_t serio_interrupt(struct serio *serio,
 }
 EXPORT_SYMBOL(serio_interrupt);
 
-struct bus_type serio_bus = {
+const struct bus_type serio_bus = {
        .name           = "serio",
        .drv_groups     = serio_driver_groups,
        .match          = serio_bus_match,
index d8f9faf2b52902a76527abd0a844b386cfb0a677..bb758346a33d8d5559c8cd61b3ec91817d9d0fd1 100644 (file)
@@ -219,8 +219,7 @@ static void sxps2_close(struct serio *pserio)
 
 /**
  * xps2_of_probe - probe method for the PS/2 device.
- * @of_dev:    pointer to OF device structure
- * @match:     pointer to the structure used for matching a device
+ * @ofdev:     pointer to OF device structure
  *
  * This function probes the PS/2 device in the device tree.
  * It initializes the driver data structure and the hardware.
index e3e2324547b909d0dc1f0e57a3b32db0c519ddbc..c821fe3ee794e3db7a68a44dec3c23747b8ba5f6 100644 (file)
@@ -416,6 +416,37 @@ config TOUCHSCREEN_GOODIX
          To compile this driver as a module, choose M here: the
          module will be called goodix.
 
+config TOUCHSCREEN_GOODIX_BERLIN_CORE
+       tristate
+
+config TOUCHSCREEN_GOODIX_BERLIN_I2C
+       tristate "Goodix Berlin I2C touchscreen"
+       depends on I2C
+       select REGMAP_I2C
+       select TOUCHSCREEN_GOODIX_BERLIN_CORE
+       help
+         Say Y here if you have a Goodix Berlin IC connected to
+         your system via I2C.
+
+         If unsure, say N.
+
+         To compile this driver as a module, choose M here: the
+         module will be called goodix_berlin_i2c.
+
+config TOUCHSCREEN_GOODIX_BERLIN_SPI
+       tristate "Goodix Berlin SPI touchscreen"
+       depends on SPI_MASTER
+       select REGMAP
+       select TOUCHSCREEN_GOODIX_BERLIN_CORE
+       help
+         Say Y here if you have a Goodix Berlin IC connected to
+         your system via SPI.
+
+         If unsure, say N.
+
+         To compile this driver as a module, choose M here: the
+         module will be called goodix_berlin_spi.
+
 config TOUCHSCREEN_HIDEEP
        tristate "HiDeep Touch IC"
        depends on I2C
index 62bd24f3ac8e0d382d041325c0de3c53e1cbc118..a81cb5aa21a5b931b8bb91bfaab27241e9967a77 100644 (file)
@@ -47,6 +47,9 @@ obj-$(CONFIG_TOUCHSCREEN_EGALAX_SERIAL)       += egalax_ts_serial.o
 obj-$(CONFIG_TOUCHSCREEN_EXC3000)      += exc3000.o
 obj-$(CONFIG_TOUCHSCREEN_FUJITSU)      += fujitsu_ts.o
 obj-$(CONFIG_TOUCHSCREEN_GOODIX)       += goodix_ts.o
+obj-$(CONFIG_TOUCHSCREEN_GOODIX_BERLIN_CORE)   += goodix_berlin_core.o
+obj-$(CONFIG_TOUCHSCREEN_GOODIX_BERLIN_I2C)    += goodix_berlin_i2c.o
+obj-$(CONFIG_TOUCHSCREEN_GOODIX_BERLIN_SPI)    += goodix_berlin_spi.o
 obj-$(CONFIG_TOUCHSCREEN_HIDEEP)       += hideep.o
 obj-$(CONFIG_TOUCHSCREEN_HYNITRON_CSTXXX)      += hynitron_cstxxx.o
 obj-$(CONFIG_TOUCHSCREEN_ILI210X)      += ili210x.o
diff --git a/drivers/input/touchscreen/goodix_berlin.h b/drivers/input/touchscreen/goodix_berlin.h
new file mode 100644 (file)
index 0000000..1fd77eb
--- /dev/null
@@ -0,0 +1,24 @@
+/* SPDX-License-Identifier: GPL-2.0-or-later */
+/*
+ * Goodix Touchscreen Driver
+ * Copyright (C) 2020 - 2021 Goodix, Inc.
+ * Copyright (C) 2023 Linaro Ltd.
+ *
+ * Based on goodix_berlin_berlin driver.
+ */
+
+#ifndef __GOODIX_BERLIN_H_
+#define __GOODIX_BERLIN_H_
+
+#include <linux/pm.h>
+
+struct device;
+struct input_id;
+struct regmap;
+
+int goodix_berlin_probe(struct device *dev, int irq, const struct input_id *id,
+                       struct regmap *regmap);
+
+extern const struct dev_pm_ops goodix_berlin_pm_ops;
+
+#endif
diff --git a/drivers/input/touchscreen/goodix_berlin_core.c b/drivers/input/touchscreen/goodix_berlin_core.c
new file mode 100644 (file)
index 0000000..e7b41a9
--- /dev/null
@@ -0,0 +1,755 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
+/*
+ * Goodix "Berlin" Touchscreen IC driver
+ * Copyright (C) 2020 - 2021 Goodix, Inc.
+ * Copyright (C) 2023 Linaro Ltd.
+ *
+ * Based on goodix_ts_berlin driver.
+ *
+ * This driver is distinct from goodix.c since hardware interface
+ * is different enough to require a new driver.
+ * None of the register address or data structure are close enough
+ * to the previous generations.
+ *
+ * Currently the driver only handles Multitouch events with already
+ * programmed firmware and "config" for "Revision D" Berlin IC.
+ *
+ * Support is missing for:
+ * - ESD Management
+ * - Firmware update/flashing
+ * - "Config" update/flashing
+ * - Stylus Events
+ * - Gesture Events
+ * - Support for older revisions (A & B)
+ */
+
+#include <linux/bitfield.h>
+#include <linux/gpio/consumer.h>
+#include <linux/input.h>
+#include <linux/input/mt.h>
+#include <linux/input/touchscreen.h>
+#include <linux/regmap.h>
+#include <linux/regulator/consumer.h>
+#include <linux/sizes.h>
+#include <asm/unaligned.h>
+
+#include "goodix_berlin.h"
+
+#define GOODIX_BERLIN_MAX_TOUCH                        10
+
+#define GOODIX_BERLIN_NORMAL_RESET_DELAY_MS    100
+
+#define GOODIX_BERLIN_TOUCH_EVENT              BIT(7)
+#define GOODIX_BERLIN_REQUEST_EVENT            BIT(6)
+#define GOODIX_BERLIN_TOUCH_COUNT_MASK         GENMASK(3, 0)
+
+#define GOODIX_BERLIN_REQUEST_CODE_RESET       3
+
+#define GOODIX_BERLIN_POINT_TYPE_MASK          GENMASK(3, 0)
+#define GOODIX_BERLIN_POINT_TYPE_STYLUS_HOVER  1
+#define GOODIX_BERLIN_POINT_TYPE_STYLUS                3
+
+#define GOODIX_BERLIN_TOUCH_ID_MASK            GENMASK(7, 4)
+
+#define GOODIX_BERLIN_DEV_CONFIRM_VAL          0xAA
+#define GOODIX_BERLIN_BOOTOPTION_ADDR          0x10000
+#define GOODIX_BERLIN_FW_VERSION_INFO_ADDR     0x10014
+
+#define GOODIX_BERLIN_IC_INFO_MAX_LEN          SZ_1K
+#define GOODIX_BERLIN_IC_INFO_ADDR             0x10070
+
+#define GOODIX_BERLIN_CHECKSUM_SIZE            sizeof(u16)
+
+struct goodix_berlin_fw_version {
+       u8 rom_pid[6];
+       u8 rom_vid[3];
+       u8 rom_vid_reserved;
+       u8 patch_pid[8];
+       u8 patch_vid[4];
+       u8 patch_vid_reserved;
+       u8 sensor_id;
+       u8 reserved[2];
+       __le16 checksum;
+};
+
+struct goodix_berlin_ic_info_version {
+       u8 info_customer_id;
+       u8 info_version_id;
+       u8 ic_die_id;
+       u8 ic_version_id;
+       __le32 config_id;
+       u8 config_version;
+       u8 frame_data_customer_id;
+       u8 frame_data_version_id;
+       u8 touch_data_customer_id;
+       u8 touch_data_version_id;
+       u8 reserved[3];
+} __packed;
+
+struct goodix_berlin_ic_info_feature {
+       __le16 freqhop_feature;
+       __le16 calibration_feature;
+       __le16 gesture_feature;
+       __le16 side_touch_feature;
+       __le16 stylus_feature;
+} __packed;
+
+struct goodix_berlin_ic_info_misc {
+       __le32 cmd_addr;
+       __le16 cmd_max_len;
+       __le32 cmd_reply_addr;
+       __le16 cmd_reply_len;
+       __le32 fw_state_addr;
+       __le16 fw_state_len;
+       __le32 fw_buffer_addr;
+       __le16 fw_buffer_max_len;
+       __le32 frame_data_addr;
+       __le16 frame_data_head_len;
+       __le16 fw_attr_len;
+       __le16 fw_log_len;
+       u8 pack_max_num;
+       u8 pack_compress_version;
+       __le16 stylus_struct_len;
+       __le16 mutual_struct_len;
+       __le16 self_struct_len;
+       __le16 noise_struct_len;
+       __le32 touch_data_addr;
+       __le16 touch_data_head_len;
+       __le16 point_struct_len;
+       __le16 reserved1;
+       __le16 reserved2;
+       __le32 mutual_rawdata_addr;
+       __le32 mutual_diffdata_addr;
+       __le32 mutual_refdata_addr;
+       __le32 self_rawdata_addr;
+       __le32 self_diffdata_addr;
+       __le32 self_refdata_addr;
+       __le32 iq_rawdata_addr;
+       __le32 iq_refdata_addr;
+       __le32 im_rawdata_addr;
+       __le16 im_readata_len;
+       __le32 noise_rawdata_addr;
+       __le16 noise_rawdata_len;
+       __le32 stylus_rawdata_addr;
+       __le16 stylus_rawdata_len;
+       __le32 noise_data_addr;
+       __le32 esd_addr;
+} __packed;
+
+struct goodix_berlin_touch {
+       u8 status;
+       u8 reserved;
+       __le16 x;
+       __le16 y;
+       __le16 w;
+};
+#define GOODIX_BERLIN_TOUCH_SIZE       sizeof(struct goodix_berlin_touch)
+
+struct goodix_berlin_header {
+       u8 status;
+       u8 reserved1;
+       u8 request_type;
+       u8 reserved2[3];
+       __le16 checksum;
+};
+#define GOODIX_BERLIN_HEADER_SIZE      sizeof(struct goodix_berlin_header)
+
+struct goodix_berlin_event {
+       struct goodix_berlin_header hdr;
+       /* The data below is u16/__le16 aligned */
+       u8 data[GOODIX_BERLIN_TOUCH_SIZE * GOODIX_BERLIN_MAX_TOUCH +
+               GOODIX_BERLIN_CHECKSUM_SIZE];
+};
+
+struct goodix_berlin_core {
+       struct device *dev;
+       struct regmap *regmap;
+       struct regulator *avdd;
+       struct regulator *iovdd;
+       struct gpio_desc *reset_gpio;
+       struct touchscreen_properties props;
+       struct goodix_berlin_fw_version fw_version;
+       struct input_dev *input_dev;
+       int irq;
+
+       /* Runtime parameters extracted from IC_INFO buffer  */
+       u32 touch_data_addr;
+
+       struct goodix_berlin_event event;
+};
+
+static bool goodix_berlin_checksum_valid(const u8 *data, int size)
+{
+       u32 cal_checksum = 0;
+       u16 r_checksum;
+       int i;
+
+       if (size < GOODIX_BERLIN_CHECKSUM_SIZE)
+               return false;
+
+       for (i = 0; i < size - GOODIX_BERLIN_CHECKSUM_SIZE; i++)
+               cal_checksum += data[i];
+
+       r_checksum = get_unaligned_le16(&data[i]);
+
+       return (u16)cal_checksum == r_checksum;
+}
+
+static bool goodix_berlin_is_dummy_data(struct goodix_berlin_core *cd,
+                                       const u8 *data, int size)
+{
+       int i;
+
+       /*
+        * If the device is missing or doesn't respond the buffer
+        * could be filled with bus default line state, 0x00 or 0xff,
+        * so declare success the first time we encounter neither.
+        */
+       for (i = 0; i < size; i++)
+               if (data[i] > 0 && data[i] < 0xff)
+                       return false;
+
+       return true;
+}
+
+static int goodix_berlin_dev_confirm(struct goodix_berlin_core *cd)
+{
+       u8 tx_buf[8], rx_buf[8];
+       int retry = 3;
+       int error;
+
+       memset(tx_buf, GOODIX_BERLIN_DEV_CONFIRM_VAL, sizeof(tx_buf));
+       while (retry--) {
+               error = regmap_raw_write(cd->regmap,
+                                        GOODIX_BERLIN_BOOTOPTION_ADDR,
+                                        tx_buf, sizeof(tx_buf));
+               if (error)
+                       return error;
+
+               error = regmap_raw_read(cd->regmap,
+                                       GOODIX_BERLIN_BOOTOPTION_ADDR,
+                                       rx_buf, sizeof(rx_buf));
+               if (error)
+                       return error;
+
+               if (!memcmp(tx_buf, rx_buf, sizeof(tx_buf)))
+                       return 0;
+
+               usleep_range(5000, 5100);
+       }
+
+       dev_err(cd->dev, "device confirm failed, rx_buf: %*ph\n",
+               (int)sizeof(rx_buf), rx_buf);
+
+       return -EINVAL;
+}
+
+static int goodix_berlin_power_on(struct goodix_berlin_core *cd)
+{
+       int error;
+
+       error = regulator_enable(cd->iovdd);
+       if (error) {
+               dev_err(cd->dev, "Failed to enable iovdd: %d\n", error);
+               return error;
+       }
+
+       /* Vendor waits 3ms for IOVDD to settle */
+       usleep_range(3000, 3100);
+
+       error = regulator_enable(cd->avdd);
+       if (error) {
+               dev_err(cd->dev, "Failed to enable avdd: %d\n", error);
+               goto err_iovdd_disable;
+       }
+
+       /* Vendor waits 15ms for IOVDD to settle */
+       usleep_range(15000, 15100);
+
+       gpiod_set_value_cansleep(cd->reset_gpio, 0);
+
+       /* Vendor waits 4ms for Firmware to initialize */
+       usleep_range(4000, 4100);
+
+       error = goodix_berlin_dev_confirm(cd);
+       if (error)
+               goto err_dev_reset;
+
+       /* Vendor waits 100ms for Firmware to fully boot */
+       msleep(GOODIX_BERLIN_NORMAL_RESET_DELAY_MS);
+
+       return 0;
+
+err_dev_reset:
+       gpiod_set_value_cansleep(cd->reset_gpio, 1);
+       regulator_disable(cd->avdd);
+err_iovdd_disable:
+       regulator_disable(cd->iovdd);
+       return error;
+}
+
+static void goodix_berlin_power_off(struct goodix_berlin_core *cd)
+{
+       gpiod_set_value_cansleep(cd->reset_gpio, 1);
+       regulator_disable(cd->avdd);
+       regulator_disable(cd->iovdd);
+}
+
+static int goodix_berlin_read_version(struct goodix_berlin_core *cd)
+{
+       int error;
+
+       error = regmap_raw_read(cd->regmap, GOODIX_BERLIN_FW_VERSION_INFO_ADDR,
+                               &cd->fw_version, sizeof(cd->fw_version));
+       if (error) {
+               dev_err(cd->dev, "error reading fw version, %d\n", error);
+               return error;
+       }
+
+       if (!goodix_berlin_checksum_valid((u8 *)&cd->fw_version,
+                                         sizeof(cd->fw_version))) {
+               dev_err(cd->dev, "invalid fw version: checksum error\n");
+               return -EINVAL;
+       }
+
+       return 0;
+}
+
+/* Only extract necessary data for runtime */
+static int goodix_berlin_parse_ic_info(struct goodix_berlin_core *cd,
+                                      const u8 *data, u16 length)
+{
+       struct goodix_berlin_ic_info_misc *misc;
+       unsigned int offset = 0;
+
+       offset += sizeof(__le16); /* length */
+       offset += sizeof(struct goodix_berlin_ic_info_version);
+       offset += sizeof(struct goodix_berlin_ic_info_feature);
+
+       /* IC_INFO Parameters, variable width structure */
+       offset += 4 * sizeof(u8); /* drv_num, sen_num, button_num, force_num */
+       if (offset >= length)
+               goto invalid_offset;
+
+#define ADVANCE_LE16_PARAMS()                          \
+       do {                                            \
+               u8 param_num = data[offset++];          \
+               offset += param_num * sizeof(__le16);   \
+               if (offset >= length)                   \
+                       goto invalid_offset;            \
+       } while (0)
+       ADVANCE_LE16_PARAMS(); /* active_scan_rate_num */
+       ADVANCE_LE16_PARAMS(); /* mutual_freq_num*/
+       ADVANCE_LE16_PARAMS(); /* self_tx_freq_num */
+       ADVANCE_LE16_PARAMS(); /* self_rx_freq_num */
+       ADVANCE_LE16_PARAMS(); /* stylus_freq_num */
+#undef ADVANCE_LE16_PARAMS
+
+       misc = (struct goodix_berlin_ic_info_misc *)&data[offset];
+       cd->touch_data_addr = le32_to_cpu(misc->touch_data_addr);
+
+       return 0;
+
+invalid_offset:
+       dev_err(cd->dev, "ic_info length is invalid (offset %d length %d)\n",
+               offset, length);
+       return -EINVAL;
+}
+
+static int goodix_berlin_get_ic_info(struct goodix_berlin_core *cd)
+{
+       u8 *afe_data __free(kfree) = NULL;
+       __le16 length_raw;
+       u16 length;
+       int error;
+
+       afe_data = kzalloc(GOODIX_BERLIN_IC_INFO_MAX_LEN, GFP_KERNEL);
+       if (!afe_data)
+               return -ENOMEM;
+
+       error = regmap_raw_read(cd->regmap, GOODIX_BERLIN_IC_INFO_ADDR,
+                               &length_raw, sizeof(length_raw));
+       if (error) {
+               dev_err(cd->dev, "failed get ic info length, %d\n", error);
+               return error;
+       }
+
+       length = le16_to_cpu(length_raw);
+       if (length >= GOODIX_BERLIN_IC_INFO_MAX_LEN) {
+               dev_err(cd->dev, "invalid ic info length %d\n", length);
+               return -EINVAL;
+       }
+
+       error = regmap_raw_read(cd->regmap, GOODIX_BERLIN_IC_INFO_ADDR,
+                               afe_data, length);
+       if (error) {
+               dev_err(cd->dev, "failed get ic info data, %d\n", error);
+               return error;
+       }
+
+       /* check whether the data is valid (ex. bus default values) */
+       if (goodix_berlin_is_dummy_data(cd, afe_data, length)) {
+               dev_err(cd->dev, "fw info data invalid\n");
+               return -EINVAL;
+       }
+
+       if (!goodix_berlin_checksum_valid(afe_data, length)) {
+               dev_err(cd->dev, "fw info checksum error\n");
+               return -EINVAL;
+       }
+
+       error = goodix_berlin_parse_ic_info(cd, afe_data, length);
+       if (error)
+               return error;
+
+       /* check some key info */
+       if (!cd->touch_data_addr) {
+               dev_err(cd->dev, "touch_data_addr is null\n");
+               return -EINVAL;
+       }
+
+       return 0;
+}
+
+static int goodix_berlin_get_remaining_contacts(struct goodix_berlin_core *cd,
+                                               int n)
+{
+       size_t offset = 2 * GOODIX_BERLIN_TOUCH_SIZE +
+                               GOODIX_BERLIN_CHECKSUM_SIZE;
+       u32 addr = cd->touch_data_addr + GOODIX_BERLIN_HEADER_SIZE + offset;
+       int error;
+
+       error = regmap_raw_read(cd->regmap, addr,
+                               &cd->event.data[offset],
+                               (n - 2) * GOODIX_BERLIN_TOUCH_SIZE);
+       if (error) {
+               dev_err_ratelimited(cd->dev, "failed to get touch data, %d\n",
+                                   error);
+               return error;
+       }
+
+       return 0;
+}
+
+static void goodix_berlin_report_state(struct goodix_berlin_core *cd, int n)
+{
+       struct goodix_berlin_touch *touch_data =
+                       (struct goodix_berlin_touch *)cd->event.data;
+       struct goodix_berlin_touch *t;
+       int i;
+       u8 type, id;
+
+       for (i = 0; i < n; i++) {
+               t = &touch_data[i];
+
+               type = FIELD_GET(GOODIX_BERLIN_POINT_TYPE_MASK, t->status);
+               if (type == GOODIX_BERLIN_POINT_TYPE_STYLUS ||
+                   type == GOODIX_BERLIN_POINT_TYPE_STYLUS_HOVER) {
+                       dev_warn_once(cd->dev, "Stylus event type not handled\n");
+                       continue;
+               }
+
+               id = FIELD_GET(GOODIX_BERLIN_TOUCH_ID_MASK, t->status);
+               if (id >= GOODIX_BERLIN_MAX_TOUCH) {
+                       dev_warn_ratelimited(cd->dev, "invalid finger id %d\n", id);
+                       continue;
+               }
+
+               input_mt_slot(cd->input_dev, id);
+               input_mt_report_slot_state(cd->input_dev, MT_TOOL_FINGER, true);
+
+               touchscreen_report_pos(cd->input_dev, &cd->props,
+                                      __le16_to_cpu(t->x), __le16_to_cpu(t->y),
+                                      true);
+               input_report_abs(cd->input_dev, ABS_MT_TOUCH_MAJOR,
+                                __le16_to_cpu(t->w));
+       }
+
+       input_mt_sync_frame(cd->input_dev);
+       input_sync(cd->input_dev);
+}
+
+static void goodix_berlin_touch_handler(struct goodix_berlin_core *cd)
+{
+       u8 touch_num;
+       int error;
+
+       touch_num = FIELD_GET(GOODIX_BERLIN_TOUCH_COUNT_MASK,
+                             cd->event.hdr.request_type);
+       if (touch_num > GOODIX_BERLIN_MAX_TOUCH) {
+               dev_warn(cd->dev, "invalid touch num %d\n", touch_num);
+               return;
+       }
+
+       if (touch_num > 2) {
+               /* read additional contact data if more than 2 touch events */
+               error = goodix_berlin_get_remaining_contacts(cd, touch_num);
+               if (error)
+                       return;
+       }
+
+       if (touch_num) {
+               int len = touch_num * GOODIX_BERLIN_TOUCH_SIZE +
+                         GOODIX_BERLIN_CHECKSUM_SIZE;
+               if (!goodix_berlin_checksum_valid(cd->event.data, len)) {
+                       dev_err(cd->dev, "touch data checksum error: %*ph\n",
+                               len, cd->event.data);
+                       return;
+               }
+       }
+
+       goodix_berlin_report_state(cd, touch_num);
+}
+
+static int goodix_berlin_request_handle_reset(struct goodix_berlin_core *cd)
+{
+       gpiod_set_value_cansleep(cd->reset_gpio, 1);
+       usleep_range(2000, 2100);
+       gpiod_set_value_cansleep(cd->reset_gpio, 0);
+
+       msleep(GOODIX_BERLIN_NORMAL_RESET_DELAY_MS);
+
+       return 0;
+}
+
+static irqreturn_t goodix_berlin_irq(int irq, void *data)
+{
+       struct goodix_berlin_core *cd = data;
+       int error;
+
+       /*
+        * First, read buffer with space for 2 touch events:
+        * - GOODIX_BERLIN_HEADER_SIZE = 8 bytes
+        * - GOODIX_BERLIN_TOUCH_SIZE * 2 = 16 bytes
+        * - GOODIX_BERLIN_CHECKLSUM_SIZE = 2 bytes
+        * For a total of 26 bytes.
+        *
+        * If only a single finger is reported, we will read 8 bytes more than
+        * needed:
+        * - bytes 0-7:   Header (GOODIX_BERLIN_HEADER_SIZE)
+        * - bytes 8-15:  Finger 0 Data
+        * - bytes 24-25: Checksum
+        * - bytes 18-25: Unused 8 bytes
+        *
+        * If 2 fingers are reported, we would have read the exact needed
+        * amount of data and checksum would be at the end of the buffer:
+        * - bytes 0-7:   Header (GOODIX_BERLIN_HEADER_SIZE)
+        * - bytes 8-15:  Finger 0 Bytes 0-7
+        * - bytes 16-23: Finger 1 Bytes 0-7
+        * - bytes 24-25: Checksum
+        *
+        * If more than 2 fingers were reported, the "Checksum" bytes would
+        * in fact contain part of the next finger data, and then
+        * goodix_berlin_get_remaining_contacts() would complete the buffer
+        * with the missing bytes, including the trailing checksum.
+        * For example, if 3 fingers are reported, then we would do:
+        * Read 1:
+        * - bytes 0-7:   Header (GOODIX_BERLIN_HEADER_SIZE)
+        * - bytes 8-15:  Finger 0 Bytes 0-7
+        * - bytes 16-23: Finger 1 Bytes 0-7
+        * - bytes 24-25: Finger 2 Bytes 0-1
+        * Read 2 (with length of (3 - 2) * 8 = 8 bytes):
+        * - bytes 26-31: Finger 2 Bytes 2-7
+        * - bytes 32-33: Checksum
+        */
+       error = regmap_raw_read(cd->regmap, cd->touch_data_addr,
+                               &cd->event,
+                               GOODIX_BERLIN_HEADER_SIZE +
+                                       2 * GOODIX_BERLIN_TOUCH_SIZE +
+                                       GOODIX_BERLIN_CHECKSUM_SIZE);
+       if (error) {
+               dev_warn_ratelimited(cd->dev,
+                                    "failed get event head data: %d\n", error);
+               goto out;
+       }
+
+       if (cd->event.hdr.status == 0)
+               goto out;
+
+       if (!goodix_berlin_checksum_valid((u8 *)&cd->event.hdr,
+                                         GOODIX_BERLIN_HEADER_SIZE)) {
+               dev_warn_ratelimited(cd->dev,
+                                    "touch head checksum error: %*ph\n",
+                                    (int)GOODIX_BERLIN_HEADER_SIZE,
+                                    &cd->event.hdr);
+               goto out_clear;
+       }
+
+       if (cd->event.hdr.status & GOODIX_BERLIN_TOUCH_EVENT)
+               goodix_berlin_touch_handler(cd);
+
+       if (cd->event.hdr.status & GOODIX_BERLIN_REQUEST_EVENT) {
+               switch (cd->event.hdr.request_type) {
+               case GOODIX_BERLIN_REQUEST_CODE_RESET:
+                       if (cd->reset_gpio)
+                               goodix_berlin_request_handle_reset(cd);
+                       break;
+
+               default:
+                       dev_warn(cd->dev, "unsupported request code 0x%x\n",
+                                cd->event.hdr.request_type);
+               }
+       }
+
+
+out_clear:
+       /* Clear up status field */
+       regmap_write(cd->regmap, cd->touch_data_addr, 0);
+
+out:
+       return IRQ_HANDLED;
+}
+
+static int goodix_berlin_input_dev_config(struct goodix_berlin_core *cd,
+                                         const struct input_id *id)
+{
+       struct input_dev *input_dev;
+       int error;
+
+       input_dev = devm_input_allocate_device(cd->dev);
+       if (!input_dev)
+               return -ENOMEM;
+
+       cd->input_dev = input_dev;
+       input_set_drvdata(input_dev, cd);
+
+       input_dev->name = "Goodix Berlin Capacitive TouchScreen";
+       input_dev->phys = "input/ts";
+
+       input_dev->id = *id;
+
+       input_set_abs_params(cd->input_dev, ABS_MT_POSITION_X,
+                            0, SZ_64K - 1, 0, 0);
+       input_set_abs_params(cd->input_dev, ABS_MT_POSITION_Y,
+                            0, SZ_64K - 1, 0, 0);
+       input_set_abs_params(cd->input_dev, ABS_MT_TOUCH_MAJOR, 0, 255, 0, 0);
+
+       touchscreen_parse_properties(cd->input_dev, true, &cd->props);
+
+       error = input_mt_init_slots(cd->input_dev, GOODIX_BERLIN_MAX_TOUCH,
+                                   INPUT_MT_DIRECT | INPUT_MT_DROP_UNUSED);
+       if (error)
+               return error;
+
+       error = input_register_device(cd->input_dev);
+       if (error)
+               return error;
+
+       return 0;
+}
+
+static int goodix_berlin_suspend(struct device *dev)
+{
+       struct goodix_berlin_core *cd = dev_get_drvdata(dev);
+
+       disable_irq(cd->irq);
+       goodix_berlin_power_off(cd);
+
+       return 0;
+}
+
+static int goodix_berlin_resume(struct device *dev)
+{
+       struct goodix_berlin_core *cd = dev_get_drvdata(dev);
+       int error;
+
+       error = goodix_berlin_power_on(cd);
+       if (error)
+               return error;
+
+       enable_irq(cd->irq);
+
+       return 0;
+}
+
+EXPORT_GPL_SIMPLE_DEV_PM_OPS(goodix_berlin_pm_ops,
+                            goodix_berlin_suspend, goodix_berlin_resume);
+
+static void goodix_berlin_power_off_act(void *data)
+{
+       struct goodix_berlin_core *cd = data;
+
+       goodix_berlin_power_off(cd);
+}
+
+int goodix_berlin_probe(struct device *dev, int irq, const struct input_id *id,
+                       struct regmap *regmap)
+{
+       struct goodix_berlin_core *cd;
+       int error;
+
+       if (irq <= 0) {
+               dev_err(dev, "Missing interrupt number\n");
+               return -EINVAL;
+       }
+
+       cd = devm_kzalloc(dev, sizeof(*cd), GFP_KERNEL);
+       if (!cd)
+               return -ENOMEM;
+
+       cd->dev = dev;
+       cd->regmap = regmap;
+       cd->irq = irq;
+
+       cd->reset_gpio = devm_gpiod_get_optional(dev, "reset", GPIOD_OUT_HIGH);
+       if (IS_ERR(cd->reset_gpio))
+               return dev_err_probe(dev, PTR_ERR(cd->reset_gpio),
+                                    "Failed to request reset gpio\n");
+
+       cd->avdd = devm_regulator_get(dev, "avdd");
+       if (IS_ERR(cd->avdd))
+               return dev_err_probe(dev, PTR_ERR(cd->avdd),
+                                    "Failed to request avdd regulator\n");
+
+       cd->iovdd = devm_regulator_get(dev, "iovdd");
+       if (IS_ERR(cd->iovdd))
+               return dev_err_probe(dev, PTR_ERR(cd->iovdd),
+                                    "Failed to request iovdd regulator\n");
+
+       error = goodix_berlin_power_on(cd);
+       if (error) {
+               dev_err(dev, "failed power on");
+               return error;
+       }
+
+       error = devm_add_action_or_reset(dev, goodix_berlin_power_off_act, cd);
+       if (error)
+               return error;
+
+       error = goodix_berlin_read_version(cd);
+       if (error) {
+               dev_err(dev, "failed to get version info");
+               return error;
+       }
+
+       error = goodix_berlin_get_ic_info(cd);
+       if (error) {
+               dev_err(dev, "invalid ic info, abort");
+               return error;
+       }
+
+       error = goodix_berlin_input_dev_config(cd, id);
+       if (error) {
+               dev_err(dev, "failed set input device");
+               return error;
+       }
+
+       error = devm_request_threaded_irq(dev, cd->irq, NULL, goodix_berlin_irq,
+                                         IRQF_ONESHOT, "goodix-berlin", cd);
+       if (error) {
+               dev_err(dev, "request threaded irq failed: %d\n", error);
+               return error;
+       }
+
+       dev_set_drvdata(dev, cd);
+
+       dev_dbg(dev, "Goodix Berlin %s Touchscreen Controller",
+               cd->fw_version.patch_pid);
+
+       return 0;
+}
+EXPORT_SYMBOL_GPL(goodix_berlin_probe);
+
+MODULE_LICENSE("GPL");
+MODULE_DESCRIPTION("Goodix Berlin Core Touchscreen driver");
+MODULE_AUTHOR("Neil Armstrong <neil.armstrong@linaro.org>");
diff --git a/drivers/input/touchscreen/goodix_berlin_i2c.c b/drivers/input/touchscreen/goodix_berlin_i2c.c
new file mode 100644 (file)
index 0000000..6ed9aa8
--- /dev/null
@@ -0,0 +1,75 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
+/*
+ * Goodix Berlin Touchscreen Driver
+ *
+ * Copyright (C) 2020 - 2021 Goodix, Inc.
+ * Copyright (C) 2023 Linaro Ltd.
+ *
+ * Based on goodix_ts_berlin driver.
+ */
+#include <linux/i2c.h>
+#include <linux/kernel.h>
+#include <linux/module.h>
+#include <linux/regmap.h>
+#include <linux/input.h>
+
+#include "goodix_berlin.h"
+
+#define I2C_MAX_TRANSFER_SIZE          256
+
+static const struct regmap_config goodix_berlin_i2c_regmap_conf = {
+       .reg_bits = 32,
+       .val_bits = 8,
+       .max_raw_read = I2C_MAX_TRANSFER_SIZE,
+       .max_raw_write = I2C_MAX_TRANSFER_SIZE,
+};
+
+/* vendor & product left unassigned here, should probably be updated from fw info */
+static const struct input_id goodix_berlin_i2c_input_id = {
+       .bustype = BUS_I2C,
+};
+
+static int goodix_berlin_i2c_probe(struct i2c_client *client)
+{
+       struct regmap *regmap;
+       int error;
+
+       regmap = devm_regmap_init_i2c(client, &goodix_berlin_i2c_regmap_conf);
+       if (IS_ERR(regmap))
+               return PTR_ERR(regmap);
+
+       error = goodix_berlin_probe(&client->dev, client->irq,
+                                   &goodix_berlin_i2c_input_id, regmap);
+       if (error)
+               return error;
+
+       return 0;
+}
+
+static const struct i2c_device_id goodix_berlin_i2c_id[] = {
+       { "gt9916", 0 },
+       { }
+};
+
+MODULE_DEVICE_TABLE(i2c, goodix_berlin_i2c_id);
+
+static const struct of_device_id goodix_berlin_i2c_of_match[] = {
+       { .compatible = "goodix,gt9916", },
+       { }
+};
+MODULE_DEVICE_TABLE(of, goodix_berlin_i2c_of_match);
+
+static struct i2c_driver goodix_berlin_i2c_driver = {
+       .driver = {
+               .name = "goodix-berlin-i2c",
+               .of_match_table = goodix_berlin_i2c_of_match,
+               .pm = pm_sleep_ptr(&goodix_berlin_pm_ops),
+       },
+       .probe = goodix_berlin_i2c_probe,
+       .id_table = goodix_berlin_i2c_id,
+};
+module_i2c_driver(goodix_berlin_i2c_driver);
+
+MODULE_LICENSE("GPL");
+MODULE_DESCRIPTION("Goodix Berlin I2C Touchscreen driver");
+MODULE_AUTHOR("Neil Armstrong <neil.armstrong@linaro.org>");
diff --git a/drivers/input/touchscreen/goodix_berlin_spi.c b/drivers/input/touchscreen/goodix_berlin_spi.c
new file mode 100644 (file)
index 0000000..4cc557d
--- /dev/null
@@ -0,0 +1,178 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
+/*
+ * Goodix Berlin Touchscreen Driver
+ *
+ * Copyright (C) 2020 - 2021 Goodix, Inc.
+ * Copyright (C) 2023 Linaro Ltd.
+ *
+ * Based on goodix_ts_berlin driver.
+ */
+#include <asm/unaligned.h>
+#include <linux/kernel.h>
+#include <linux/module.h>
+#include <linux/regmap.h>
+#include <linux/spi/spi.h>
+#include <linux/input.h>
+
+#include "goodix_berlin.h"
+
+#define GOODIX_BERLIN_SPI_TRANS_PREFIX_LEN     1
+#define GOODIX_BERLIN_REGISTER_WIDTH           4
+#define GOODIX_BERLIN_SPI_READ_DUMMY_LEN       3
+#define GOODIX_BERLIN_SPI_READ_PREFIX_LEN      (GOODIX_BERLIN_SPI_TRANS_PREFIX_LEN + \
+                                                GOODIX_BERLIN_REGISTER_WIDTH + \
+                                                GOODIX_BERLIN_SPI_READ_DUMMY_LEN)
+#define GOODIX_BERLIN_SPI_WRITE_PREFIX_LEN     (GOODIX_BERLIN_SPI_TRANS_PREFIX_LEN + \
+                                                GOODIX_BERLIN_REGISTER_WIDTH)
+
+#define GOODIX_BERLIN_SPI_WRITE_FLAG           0xF0
+#define GOODIX_BERLIN_SPI_READ_FLAG            0xF1
+
+static int goodix_berlin_spi_read(void *context, const void *reg_buf,
+                                 size_t reg_size, void *val_buf,
+                                 size_t val_size)
+{
+       struct spi_device *spi = context;
+       struct spi_transfer xfers;
+       struct spi_message spi_msg;
+       const u32 *reg = reg_buf; /* reg is stored as native u32 at start of buffer */
+       u8 *buf;
+       int error;
+
+       if (reg_size != GOODIX_BERLIN_REGISTER_WIDTH)
+               return -EINVAL;
+
+       buf = kzalloc(GOODIX_BERLIN_SPI_READ_PREFIX_LEN + val_size, GFP_KERNEL);
+       if (!buf)
+               return -ENOMEM;
+
+       spi_message_init(&spi_msg);
+       memset(&xfers, 0, sizeof(xfers));
+
+       /* buffer format: 0xF1 + addr(4bytes) + dummy(3bytes) + data */
+       buf[0] = GOODIX_BERLIN_SPI_READ_FLAG;
+       put_unaligned_be32(*reg, buf + GOODIX_BERLIN_SPI_TRANS_PREFIX_LEN);
+       memset(buf + GOODIX_BERLIN_SPI_TRANS_PREFIX_LEN + GOODIX_BERLIN_REGISTER_WIDTH,
+              0xff, GOODIX_BERLIN_SPI_READ_DUMMY_LEN);
+
+       xfers.tx_buf = buf;
+       xfers.rx_buf = buf;
+       xfers.len = GOODIX_BERLIN_SPI_READ_PREFIX_LEN + val_size;
+       xfers.cs_change = 0;
+       spi_message_add_tail(&xfers, &spi_msg);
+
+       error = spi_sync(spi, &spi_msg);
+       if (error < 0)
+               dev_err(&spi->dev, "spi transfer error, %d", error);
+       else
+               memcpy(val_buf, buf + GOODIX_BERLIN_SPI_READ_PREFIX_LEN, val_size);
+
+       kfree(buf);
+       return error;
+}
+
+static int goodix_berlin_spi_write(void *context, const void *data,
+                                  size_t count)
+{
+       unsigned int len = count - GOODIX_BERLIN_REGISTER_WIDTH;
+       struct spi_device *spi = context;
+       struct spi_transfer xfers;
+       struct spi_message spi_msg;
+       const u32 *reg = data; /* reg is stored as native u32 at start of buffer */
+       u8 *buf;
+       int error;
+
+       buf = kzalloc(GOODIX_BERLIN_SPI_WRITE_PREFIX_LEN + len, GFP_KERNEL);
+       if (!buf)
+               return -ENOMEM;
+
+       spi_message_init(&spi_msg);
+       memset(&xfers, 0, sizeof(xfers));
+
+       buf[0] = GOODIX_BERLIN_SPI_WRITE_FLAG;
+       put_unaligned_be32(*reg, buf + GOODIX_BERLIN_SPI_TRANS_PREFIX_LEN);
+       memcpy(buf + GOODIX_BERLIN_SPI_WRITE_PREFIX_LEN,
+              data + GOODIX_BERLIN_REGISTER_WIDTH, len);
+
+       xfers.tx_buf = buf;
+       xfers.len = GOODIX_BERLIN_SPI_WRITE_PREFIX_LEN + len;
+       xfers.cs_change = 0;
+       spi_message_add_tail(&xfers, &spi_msg);
+
+       error = spi_sync(spi, &spi_msg);
+       if (error < 0)
+               dev_err(&spi->dev, "spi transfer error, %d", error);
+
+       kfree(buf);
+       return error;
+}
+
+static const struct regmap_config goodix_berlin_spi_regmap_conf = {
+       .reg_bits = 32,
+       .val_bits = 8,
+       .read = goodix_berlin_spi_read,
+       .write = goodix_berlin_spi_write,
+};
+
+/* vendor & product left unassigned here, should probably be updated from fw info */
+static const struct input_id goodix_berlin_spi_input_id = {
+       .bustype = BUS_SPI,
+};
+
+static int goodix_berlin_spi_probe(struct spi_device *spi)
+{
+       struct regmap_config regmap_config;
+       struct regmap *regmap;
+       size_t max_size;
+       int error = 0;
+
+       spi->mode = SPI_MODE_0;
+       spi->bits_per_word = 8;
+       error = spi_setup(spi);
+       if (error)
+               return error;
+
+       max_size = spi_max_transfer_size(spi);
+
+       regmap_config = goodix_berlin_spi_regmap_conf;
+       regmap_config.max_raw_read = max_size - GOODIX_BERLIN_SPI_READ_PREFIX_LEN;
+       regmap_config.max_raw_write = max_size - GOODIX_BERLIN_SPI_WRITE_PREFIX_LEN;
+
+       regmap = devm_regmap_init(&spi->dev, NULL, spi, &regmap_config);
+       if (IS_ERR(regmap))
+               return PTR_ERR(regmap);
+
+       error = goodix_berlin_probe(&spi->dev, spi->irq,
+                                   &goodix_berlin_spi_input_id, regmap);
+       if (error)
+               return error;
+
+       return 0;
+}
+
+static const struct spi_device_id goodix_berlin_spi_ids[] = {
+       { "gt9916" },
+       { },
+};
+MODULE_DEVICE_TABLE(spi, goodix_berlin_spi_ids);
+
+static const struct of_device_id goodix_berlin_spi_of_match[] = {
+       { .compatible = "goodix,gt9916", },
+       { }
+};
+MODULE_DEVICE_TABLE(of, goodix_berlin_spi_of_match);
+
+static struct spi_driver goodix_berlin_spi_driver = {
+       .driver = {
+               .name = "goodix-berlin-spi",
+               .of_match_table = goodix_berlin_spi_of_match,
+               .pm = pm_sleep_ptr(&goodix_berlin_pm_ops),
+       },
+       .probe = goodix_berlin_spi_probe,
+       .id_table = goodix_berlin_spi_ids,
+};
+module_spi_driver(goodix_berlin_spi_driver);
+
+MODULE_LICENSE("GPL");
+MODULE_DESCRIPTION("Goodix Berlin SPI Touchscreen driver");
+MODULE_AUTHOR("Neil Armstrong <neil.armstrong@linaro.org>");
index 07111ca2445561702980070c2afe0655919bd808..074dd6c342ecb9291fc225b3d22c639ae29fb508 100644 (file)
@@ -1,5 +1,6 @@
 // SPDX-License-Identifier: GPL-2.0-only
 
+#include <linux/bitfield.h>
 #include <linux/bits.h>
 #include <linux/delay.h>
 #include <linux/i2c.h>
 #include <linux/property.h>
 #include <linux/regulator/consumer.h>
 
+#define IST3032C_WHOAMI                        0x32c
+
+#define IST3038B_REG_STATUS            0x20
+#define IST3038B_REG_CHIPID            0x30
+#define IST3038B_WHOAMI                        0x30380b
+
 #define IST3038C_HIB_ACCESS            (0x800B << 16)
 #define IST3038C_DIRECT_ACCESS         BIT(31)
-#define IST3038C_REG_CHIPID            0x40001000
+#define IST3038C_REG_CHIPID            (0x40001000 | IST3038C_DIRECT_ACCESS)
 #define IST3038C_REG_HIB_BASE          0x30000100
 #define IST3038C_REG_TOUCH_STATUS      (IST3038C_REG_HIB_BASE | IST3038C_HIB_ACCESS)
 #define IST3038C_REG_TOUCH_COORD       (IST3038C_REG_HIB_BASE | IST3038C_HIB_ACCESS | 0x8)
 #define IST3038C_I2C_RETRY_COUNT       3
 #define IST3038C_MAX_FINGER_NUM                10
 #define IST3038C_X_MASK                        GENMASK(23, 12)
-#define IST3038C_X_SHIFT               12
 #define IST3038C_Y_MASK                        GENMASK(11, 0)
 #define IST3038C_AREA_MASK             GENMASK(27, 24)
-#define IST3038C_AREA_SHIFT            24
 #define IST3038C_FINGER_COUNT_MASK     GENMASK(15, 12)
-#define IST3038C_FINGER_COUNT_SHIFT    12
 #define IST3038C_FINGER_STATUS_MASK    GENMASK(9, 0)
+#define IST3032C_KEY_STATUS_MASK       GENMASK(20, 16)
+
+struct imagis_properties {
+       unsigned int interrupt_msg_cmd;
+       unsigned int touch_coord_cmd;
+       unsigned int whoami_cmd;
+       unsigned int whoami_val;
+       bool protocol_b;
+       bool touch_keys_supported;
+};
 
 struct imagis_ts {
        struct i2c_client *client;
+       const struct imagis_properties *tdata;
        struct input_dev *input_dev;
        struct touchscreen_properties prop;
        struct regulator_bulk_data supplies[2];
+       u32 keycodes[5];
+       int num_keycodes;
 };
 
 static int imagis_i2c_read_reg(struct imagis_ts *ts,
@@ -80,20 +97,18 @@ static irqreturn_t imagis_interrupt(int irq, void *dev_id)
 {
        struct imagis_ts *ts = dev_id;
        u32 intr_message, finger_status;
-       unsigned int finger_count, finger_pressed;
+       unsigned int finger_count, finger_pressed, key_pressed;
        int i;
        int error;
 
-       error = imagis_i2c_read_reg(ts, IST3038C_REG_INTR_MESSAGE,
-                                   &intr_message);
+       error = imagis_i2c_read_reg(ts, ts->tdata->interrupt_msg_cmd, &intr_message);
        if (error) {
                dev_err(&ts->client->dev,
                        "failed to read the interrupt message: %d\n", error);
                goto out;
        }
 
-       finger_count = (intr_message & IST3038C_FINGER_COUNT_MASK) >>
-                               IST3038C_FINGER_COUNT_SHIFT;
+       finger_count = FIELD_GET(IST3038C_FINGER_COUNT_MASK, intr_message);
        if (finger_count > IST3038C_MAX_FINGER_NUM) {
                dev_err(&ts->client->dev,
                        "finger count %d is more than maximum supported\n",
@@ -101,12 +116,16 @@ static irqreturn_t imagis_interrupt(int irq, void *dev_id)
                goto out;
        }
 
-       finger_pressed = intr_message & IST3038C_FINGER_STATUS_MASK;
+       finger_pressed = FIELD_GET(IST3038C_FINGER_STATUS_MASK, intr_message);
 
        for (i = 0; i < finger_count; i++) {
-               error = imagis_i2c_read_reg(ts,
-                                           IST3038C_REG_TOUCH_COORD + (i * 4),
-                                           &finger_status);
+               if (ts->tdata->protocol_b)
+                       error = imagis_i2c_read_reg(ts,
+                                                   ts->tdata->touch_coord_cmd, &finger_status);
+               else
+                       error = imagis_i2c_read_reg(ts,
+                                                   ts->tdata->touch_coord_cmd + (i * 4),
+                                                   &finger_status);
                if (error) {
                        dev_err(&ts->client->dev,
                                "failed to read coordinates for finger %d: %d\n",
@@ -118,14 +137,19 @@ static irqreturn_t imagis_interrupt(int irq, void *dev_id)
                input_mt_report_slot_state(ts->input_dev, MT_TOOL_FINGER,
                                           finger_pressed & BIT(i));
                touchscreen_report_pos(ts->input_dev, &ts->prop,
-                                      (finger_status & IST3038C_X_MASK) >>
-                                               IST3038C_X_SHIFT,
-                                      finger_status & IST3038C_Y_MASK, 1);
+                                      FIELD_GET(IST3038C_X_MASK, finger_status),
+                                      FIELD_GET(IST3038C_Y_MASK, finger_status),
+                                      true);
                input_report_abs(ts->input_dev, ABS_MT_TOUCH_MAJOR,
-                                (finger_status & IST3038C_AREA_MASK) >>
-                                       IST3038C_AREA_SHIFT);
+                                FIELD_GET(IST3038C_AREA_MASK, finger_status));
        }
 
+       key_pressed = FIELD_GET(IST3032C_KEY_STATUS_MASK, intr_message);
+
+       for (int i = 0; i < ts->num_keycodes; i++)
+               input_report_key(ts->input_dev, ts->keycodes[i],
+                                key_pressed & BIT(i));
+
        input_mt_sync_frame(ts->input_dev);
        input_sync(ts->input_dev);
 
@@ -210,7 +234,24 @@ static int imagis_init_input_dev(struct imagis_ts *ts)
 
        input_set_capability(input_dev, EV_ABS, ABS_MT_POSITION_X);
        input_set_capability(input_dev, EV_ABS, ABS_MT_POSITION_Y);
-       input_set_abs_params(input_dev, ABS_MT_TOUCH_MAJOR, 0, 255, 0, 0);
+       input_set_abs_params(input_dev, ABS_MT_TOUCH_MAJOR, 0, 16, 0, 0);
+       if (ts->tdata->touch_keys_supported) {
+               ts->num_keycodes = of_property_read_variable_u32_array(
+                               ts->client->dev.of_node, "linux,keycodes",
+                               ts->keycodes, 0, ARRAY_SIZE(ts->keycodes));
+               if (ts->num_keycodes <= 0) {
+                       ts->keycodes[0] = KEY_APPSELECT;
+                       ts->keycodes[1] = KEY_BACK;
+                       ts->num_keycodes = 2;
+               }
+
+               input_dev->keycodemax = ts->num_keycodes;
+               input_dev->keycodesize = sizeof(ts->keycodes[0]);
+               input_dev->keycode = ts->keycodes;
+       }
+
+       for (int i = 0; i < ts->num_keycodes; i++)
+               input_set_capability(input_dev, EV_KEY, ts->keycodes[i]);
 
        touchscreen_parse_properties(input_dev, true, &ts->prop);
        if (!ts->prop.max_x || !ts->prop.max_y) {
@@ -261,6 +302,12 @@ static int imagis_probe(struct i2c_client *i2c)
 
        ts->client = i2c;
 
+       ts->tdata = device_get_match_data(dev);
+       if (!ts->tdata) {
+               dev_err(dev, "missing chip data\n");
+               return -EINVAL;
+       }
+
        error = imagis_init_regulators(ts);
        if (error) {
                dev_err(dev, "regulator init error: %d\n", error);
@@ -279,15 +326,13 @@ static int imagis_probe(struct i2c_client *i2c)
                return error;
        }
 
-       error = imagis_i2c_read_reg(ts,
-                       IST3038C_REG_CHIPID | IST3038C_DIRECT_ACCESS,
-                       &chip_id);
+       error = imagis_i2c_read_reg(ts, ts->tdata->whoami_cmd, &chip_id);
        if (error) {
                dev_err(dev, "chip ID read failure: %d\n", error);
                return error;
        }
 
-       if (chip_id != IST3038C_WHOAMI) {
+       if (chip_id != ts->tdata->whoami_val) {
                dev_err(dev, "unknown chip ID: 0x%x\n", chip_id);
                return -EINVAL;
        }
@@ -343,9 +388,34 @@ static int imagis_resume(struct device *dev)
 
 static DEFINE_SIMPLE_DEV_PM_OPS(imagis_pm_ops, imagis_suspend, imagis_resume);
 
+static const struct imagis_properties imagis_3032c_data = {
+       .interrupt_msg_cmd = IST3038C_REG_INTR_MESSAGE,
+       .touch_coord_cmd = IST3038C_REG_TOUCH_COORD,
+       .whoami_cmd = IST3038C_REG_CHIPID,
+       .whoami_val = IST3032C_WHOAMI,
+       .touch_keys_supported = true,
+};
+
+static const struct imagis_properties imagis_3038b_data = {
+       .interrupt_msg_cmd = IST3038B_REG_STATUS,
+       .touch_coord_cmd = IST3038B_REG_STATUS,
+       .whoami_cmd = IST3038B_REG_CHIPID,
+       .whoami_val = IST3038B_WHOAMI,
+       .protocol_b = true,
+};
+
+static const struct imagis_properties imagis_3038c_data = {
+       .interrupt_msg_cmd = IST3038C_REG_INTR_MESSAGE,
+       .touch_coord_cmd = IST3038C_REG_TOUCH_COORD,
+       .whoami_cmd = IST3038C_REG_CHIPID,
+       .whoami_val = IST3038C_WHOAMI,
+};
+
 #ifdef CONFIG_OF
 static const struct of_device_id imagis_of_match[] = {
-       { .compatible = "imagis,ist3038c", },
+       { .compatible = "imagis,ist3032c", .data = &imagis_3032c_data },
+       { .compatible = "imagis,ist3038b", .data = &imagis_3038b_data },
+       { .compatible = "imagis,ist3038c", .data = &imagis_3038c_data },
        { },
 };
 MODULE_DEVICE_TABLE(of, imagis_of_match);
index 34324f8512ac2d40d15321551ef2dee0cca00d74..294b7ceded2728e7ca02145c12391848d1c5d1f8 100644 (file)
@@ -157,7 +157,6 @@ static void titsc_step_config(struct titsc *ts_dev)
                             n++ == 0 ? STEPCONFIG_OPENDLY : 0);
        }
 
-       config = 0;
        config = STEPCONFIG_MODE_HWSYNC |
                        STEPCONFIG_AVG_16 | ts_dev->bit_yn |
                        STEPCONFIG_INM_ADCREFM;
index f87aeab460eb8bb5419e3c74fdbc8f3034665a2b..9e71c442881412d7eb40bde0cc0e139a41093ff8 100644 (file)
@@ -149,8 +149,9 @@ static int riscv_intc_domain_alloc(struct irq_domain *domain,
         * Only allow hwirq for which we have corresponding standard or
         * custom interrupt enable register.
         */
-       if ((hwirq >= riscv_intc_nr_irqs && hwirq < riscv_intc_custom_base) ||
-           (hwirq >= riscv_intc_custom_base + riscv_intc_custom_nr_irqs))
+       if (hwirq >= riscv_intc_nr_irqs &&
+           (hwirq < riscv_intc_custom_base ||
+            hwirq >= riscv_intc_custom_base + riscv_intc_custom_nr_irqs))
                return -EINVAL;
 
        for (i = 0; i < nr_irqs; i++) {
@@ -183,10 +184,12 @@ static int __init riscv_intc_init_common(struct fwnode_handle *fn, struct irq_ch
                return -ENXIO;
        }
 
-       if (riscv_isa_extension_available(NULL, SxAIA))
+       if (riscv_isa_extension_available(NULL, SxAIA)) {
+               riscv_intc_nr_irqs = 64;
                rc = set_handle_irq(&riscv_intc_aia_irq);
-       else
+       } else {
                rc = set_handle_irq(&riscv_intc_irq);
+       }
        if (rc) {
                pr_err("failed to set irq handler\n");
                return rc;
@@ -195,7 +198,7 @@ static int __init riscv_intc_init_common(struct fwnode_handle *fn, struct irq_ch
        riscv_set_intc_hwnode_fn(riscv_intc_hwnode);
 
        pr_info("%d local interrupts mapped%s\n",
-               riscv_isa_extension_available(NULL, SxAIA) ? 64 : riscv_intc_nr_irqs,
+               riscv_intc_nr_irqs,
                riscv_isa_extension_available(NULL, SxAIA) ? " using AIA" : "");
        if (riscv_intc_custom_nr_irqs)
                pr_info("%d custom local interrupts mapped\n", riscv_intc_custom_nr_irqs);
index 6fd89efb420aa3b08401d0c617d0fad0a154c0f5..a621e5e5c75c5fe2966a002c7998e5b1ed8b137c 100644 (file)
@@ -17,7 +17,7 @@
 #include <asm/fhc.h>
 #include <asm/upa.h>
 
-MODULE_AUTHOR("David S. Miller (davem@davemloft.net)");
+MODULE_AUTHOR("David S. Miller <davem@davemloft.net>");
 MODULE_DESCRIPTION("Sun Fire LED driver");
 MODULE_LICENSE("GPL");
 
index 057b0221f695a06d26f00baf17fe77ee46da5da2..b0407c5fadb2a94d65347ac05452253f3717a49a 100644 (file)
@@ -74,7 +74,9 @@ static struct adb_driver *adb_driver_list[] = {
        NULL
 };
 
-static struct class *adb_dev_class;
+static const struct class adb_dev_class = {
+       .name = "adb",
+};
 
 static struct adb_driver *adb_controller;
 BLOCKING_NOTIFIER_HEAD(adb_client_list);
@@ -888,10 +890,10 @@ adbdev_init(void)
                return;
        }
 
-       adb_dev_class = class_create("adb");
-       if (IS_ERR(adb_dev_class))
+       if (class_register(&adb_dev_class))
                return;
-       device_create(adb_dev_class, NULL, MKDEV(ADB_MAJOR, 0), NULL, "adb");
+
+       device_create(&adb_dev_class, NULL, MKDEV(ADB_MAJOR, 0), NULL, "adb");
 
        platform_device_register(&adb_pfdev);
        platform_driver_probe(&adb_pfdrv, adb_dummy_probe);
index a5ee8f736a8e006af30d372aa6e6343577206428..565f1e21ff7dc0802c8e378a02bc723af84cb58b 100644 (file)
@@ -136,7 +136,7 @@ static int macio_device_modalias(const struct device *dev, struct kobj_uevent_en
 
 extern const struct attribute_group *macio_dev_groups[];
 
-struct bus_type macio_bus_type = {
+const struct bus_type macio_bus_type = {
        .name   = "macio",
        .match  = macio_bus_match,
        .uevent = macio_device_modalias,
index 40240bce77b01e288e3ef7ba94bc0a07c6a1421f..896a43bd819f7102ba29cbbc6441174f2e03edde 100644 (file)
@@ -523,7 +523,7 @@ static int rackmeter_probe(struct macio_dev* mdev,
        return rc;
 }
 
-static int rackmeter_remove(struct macio_dev* mdev)
+static void rackmeter_remove(struct macio_dev *mdev)
 {
        struct rackmeter *rm = dev_get_drvdata(&mdev->ofdev.dev);
 
@@ -558,8 +558,6 @@ static int rackmeter_remove(struct macio_dev* mdev)
 
        /* Get rid of me */
        kfree(rm);
-
-       return 0;
 }
 
 static int rackmeter_shutdown(struct macio_dev* mdev)
index 3c1b29476ce24abd99415c4fd51c86f6eb5279a0..37cdc6931f6d02cc435be824304a94c68ddde50d 100644 (file)
@@ -481,11 +481,9 @@ static int therm_of_probe(struct platform_device *dev)
        return -ENODEV;
 }
 
-static int
-therm_of_remove( struct platform_device *dev )
+static void therm_of_remove(struct platform_device *dev)
 {
        i2c_del_driver( &g4fan_driver );
-       return 0;
 }
 
 static const struct of_device_id therm_of_match[] = {{
@@ -501,7 +499,7 @@ static struct platform_driver therm_of_driver = {
                .of_match_table = therm_of_match,
        },
        .probe          = therm_of_probe,
-       .remove         = therm_of_remove,
+       .remove_new     = therm_of_remove,
 };
 
 struct apple_thermal_info {
index d1dec314ae305948668a5216ca9d80982973ec91..876b4d8cbe378ad609f3b64bc75eb70449bd9f19 100644 (file)
@@ -662,16 +662,14 @@ static int wf_pm112_probe(struct platform_device *dev)
        return 0;
 }
 
-static int wf_pm112_remove(struct platform_device *dev)
+static void wf_pm112_remove(struct platform_device *dev)
 {
        wf_unregister_client(&pm112_events);
-       /* should release all sensors and controls */
-       return 0;
 }
 
 static struct platform_driver wf_pm112_driver = {
        .probe = wf_pm112_probe,
-       .remove = wf_pm112_remove,
+       .remove_new = wf_pm112_remove,
        .driver = {
                .name = "windfarm",
        },
index 82500417ebeec8546fc85d091d277e1ef448306a..cd45fbc4fe1cec25dd5c288ee58001fcccac56db 100644 (file)
@@ -992,15 +992,14 @@ static int pm121_probe(struct platform_device *ddev)
        return 0;
 }
 
-static int pm121_remove(struct platform_device *ddev)
+static void pm121_remove(struct platform_device *ddev)
 {
        wf_unregister_client(&pm121_events);
-       return 0;
 }
 
 static struct platform_driver pm121_driver = {
        .probe = pm121_probe,
-       .remove = pm121_remove,
+       .remove_new = pm121_remove,
        .driver = {
                .name = "windfarm",
                .bus = &platform_bus_type,
index e21f973551cc25206687861323ab219948774a39..14fa1e9ac3e00489ce2340f5f72405d50c1f1c94 100644 (file)
@@ -775,17 +775,14 @@ static int wf_pm72_probe(struct platform_device *dev)
        return 0;
 }
 
-static int wf_pm72_remove(struct platform_device *dev)
+static void wf_pm72_remove(struct platform_device *dev)
 {
        wf_unregister_client(&pm72_events);
-
-       /* should release all sensors and controls */
-       return 0;
 }
 
 static struct platform_driver wf_pm72_driver = {
        .probe  = wf_pm72_probe,
-       .remove = wf_pm72_remove,
+       .remove_new = wf_pm72_remove,
        .driver = {
                .name = "windfarm",
        },
index 257fb2c695c53c1a246104395b000f1065fdf6ad..404d2454e33de622c1926442aa9733d3d451bfca 100644 (file)
@@ -724,7 +724,7 @@ static int wf_smu_probe(struct platform_device *ddev)
        return 0;
 }
 
-static int wf_smu_remove(struct platform_device *ddev)
+static void wf_smu_remove(struct platform_device *ddev)
 {
        wf_unregister_client(&wf_smu_events);
 
@@ -761,13 +761,11 @@ static int wf_smu_remove(struct platform_device *ddev)
        /* Destroy control loops state structures */
        kfree(wf_smu_sys_fans);
        kfree(wf_smu_cpu_fans);
-
-       return 0;
 }
 
 static struct platform_driver wf_smu_driver = {
-        .probe = wf_smu_probe,
-        .remove = wf_smu_remove,
+       .probe = wf_smu_probe,
+       .remove_new = wf_smu_remove,
        .driver = {
                .name = "windfarm",
        },
index 120a9cfba0c54e994564e0fe6064f691263c7651..fba02a375435e40956fa6f6565f616dc0b20ff06 100644 (file)
@@ -647,7 +647,7 @@ static int wf_smu_probe(struct platform_device *ddev)
        return 0;
 }
 
-static int wf_smu_remove(struct platform_device *ddev)
+static void wf_smu_remove(struct platform_device *ddev)
 {
        wf_unregister_client(&wf_smu_events);
 
@@ -691,13 +691,11 @@ static int wf_smu_remove(struct platform_device *ddev)
        kfree(wf_smu_slots_fans);
        kfree(wf_smu_drive_fans);
        kfree(wf_smu_cpu_fans);
-
-       return 0;
 }
 
 static struct platform_driver wf_smu_driver = {
-        .probe = wf_smu_probe,
-        .remove = wf_smu_remove,
+       .probe = wf_smu_probe,
+       .remove_new = wf_smu_remove,
        .driver = {
                .name = "windfarm",
        },
index e9eb7fdde48c1f6fb07df32e678fab72d54a030c..dc8f2c7ef10315edf556b1a461a0acfed463d884 100644 (file)
@@ -668,17 +668,14 @@ static int wf_rm31_probe(struct platform_device *dev)
        return 0;
 }
 
-static int wf_rm31_remove(struct platform_device *dev)
+static void wf_rm31_remove(struct platform_device *dev)
 {
        wf_unregister_client(&rm31_events);
-
-       /* should release all sensors and controls */
-       return 0;
 }
 
 static struct platform_driver wf_rm31_driver = {
        .probe  = wf_rm31_probe,
-       .remove = wf_rm31_remove,
+       .remove_new = wf_rm31_remove,
        .driver = {
                .name = "windfarm",
        },
index a438efb660699b1f1a9dbb162310bfd1fa8e7258..6956beb55326f584ec66b425f35f8f6bf5b1f89f 100644 (file)
@@ -702,13 +702,7 @@ static unsigned int bch_cache_max_chain(struct cache_set *c)
        for (h = c->bucket_hash;
             h < c->bucket_hash + (1 << BUCKET_HASH_BITS);
             h++) {
-               unsigned int i = 0;
-               struct hlist_node *p;
-
-               hlist_for_each(p, h)
-                       i++;
-
-               ret = max(ret, i);
+               ret = max(ret, hlist_count_nodes(h));
        }
 
        mutex_unlock(&c->bucket_lock);
index 06263b0a7b5881192bef4a063ba54ae3d8955671..56aa2a8b9d7153ac0792bbdc626ab5250cf074c7 100644 (file)
@@ -2057,6 +2057,7 @@ static void cleanup_mapped_device(struct mapped_device *md)
 static struct mapped_device *alloc_dev(int minor)
 {
        int r, numa_node_id = dm_get_numa_node();
+       struct dax_device *dax_dev;
        struct mapped_device *md;
        void *old_md;
 
@@ -2125,15 +2126,15 @@ static struct mapped_device *alloc_dev(int minor)
        md->disk->private_data = md;
        sprintf(md->disk->disk_name, "dm-%d", minor);
 
-       if (IS_ENABLED(CONFIG_FS_DAX)) {
-               md->dax_dev = alloc_dax(md, &dm_dax_ops);
-               if (IS_ERR(md->dax_dev)) {
-                       md->dax_dev = NULL;
+       dax_dev = alloc_dax(md, &dm_dax_ops);
+       if (IS_ERR(dax_dev)) {
+               if (PTR_ERR(dax_dev) != -EOPNOTSUPP)
                        goto bad;
-               }
-               set_dax_nocache(md->dax_dev);
-               set_dax_nomc(md->dax_dev);
-               if (dax_add_host(md->dax_dev, md->disk))
+       } else {
+               set_dax_nocache(dax_dev);
+               set_dax_nomc(dax_dev);
+               md->dax_dev = dax_dev;
+               if (dax_add_host(dax_dev, md->disk))
                        goto bad;
        }
 
index 5741adf09a2ef9b2e968364a7687a0b1d4ae5b7e..559a172ebc6cbbffaea3abbd5981113c63d6daf6 100644 (file)
@@ -1151,20 +1151,6 @@ void cec_received_msg_ts(struct cec_adapter *adap,
        if (valid_la && min_len) {
                /* These messages have special length requirements */
                switch (cmd) {
-               case CEC_MSG_TIMER_STATUS:
-                       if (msg->msg[2] & 0x10) {
-                               switch (msg->msg[2] & 0xf) {
-                               case CEC_OP_PROG_INFO_NOT_ENOUGH_SPACE:
-                               case CEC_OP_PROG_INFO_MIGHT_NOT_BE_ENOUGH_SPACE:
-                                       if (msg->len < 5)
-                                               valid_la = false;
-                                       break;
-                               }
-                       } else if ((msg->msg[2] & 0xf) == CEC_OP_PROG_ERROR_DUPLICATE) {
-                               if (msg->len < 5)
-                                       valid_la = false;
-                       }
-                       break;
                case CEC_MSG_RECORD_ON:
                        switch (msg->msg[2]) {
                        case CEC_OP_RECORD_SRC_OWN:
index 7e153c5cad04f50d58824da520cc019409bd4a87..5a54db839e5d641db9d635a43a74a71e1cfe2bfd 100644 (file)
@@ -93,7 +93,7 @@ static void cec_devnode_release(struct device *cd)
        cec_delete_adapter(to_cec_adapter(devnode));
 }
 
-static struct bus_type cec_bus_type = {
+static const struct bus_type cec_bus_type = {
        .name = CEC_NAME,
 };
 
index 52ec0ba4b3393f77af6f093be0599933156e3cb7..48ed2993d2f0d3f23e04df67ed813f33f89c0300 100644 (file)
@@ -326,6 +326,8 @@ static const struct cec_dmi_match cec_dmi_match_table[] = {
        { "Google", "Taranza", "0000:00:02.0", port_db_conns },
        /* Google Dexi */
        { "Google", "Dexi", "0000:00:02.0", port_db_conns },
+       /* Google Dita */
+       { "Google", "Dita", "0000:00:02.0", port_db_conns },
 };
 
 static struct device *cros_ec_cec_find_hdmi_dev(struct device *dev,
index 7d4bc2733f2b074e0d2cba7157ee297bc4110a55..7ebcb10126c9c39a6fc6c3169c5832a54b6e6321 100644 (file)
@@ -2155,7 +2155,7 @@ module_init(smscore_module_init);
 module_exit(smscore_module_exit);
 
 MODULE_DESCRIPTION("Siano MDTV Core module");
-MODULE_AUTHOR("Siano Mobile Silicon, Inc. (uris@siano-ms.com)");
+MODULE_AUTHOR("Siano Mobile Silicon, Inc. <uris@siano-ms.com>");
 MODULE_LICENSE("GPL");
 
 /* This should match what's defined at smscoreapi.h */
index f80caaa333daf53372598375ad5fc5a8aaa949e1..d893a0e4672b2f9c9de87b5a19f7dcac0e421890 100644 (file)
@@ -1267,5 +1267,5 @@ module_init(smsdvb_module_init);
 module_exit(smsdvb_module_exit);
 
 MODULE_DESCRIPTION("SMS DVB subsystem adaptation module");
-MODULE_AUTHOR("Siano Mobile Silicon, Inc. (uris@siano-ms.com)");
+MODULE_AUTHOR("Siano Mobile Silicon, Inc. <uris@siano-ms.com>");
 MODULE_LICENSE("GPL");
index a366566f22c3b7a1dc634f172d8c5fb89a47f62a..642c48e8c1f5842717c1e25a97335eb52bdb8344 100644 (file)
@@ -113,6 +113,7 @@ int tpg_alloc(struct tpg_data *tpg, unsigned max_w)
 {
        unsigned pat;
        unsigned plane;
+       int ret = 0;
 
        tpg->max_line_width = max_w;
        for (pat = 0; pat < TPG_MAX_PAT_LINES; pat++) {
@@ -121,14 +122,18 @@ int tpg_alloc(struct tpg_data *tpg, unsigned max_w)
 
                        tpg->lines[pat][plane] =
                                vzalloc(array3_size(max_w, 2, pixelsz));
-                       if (!tpg->lines[pat][plane])
-                               return -ENOMEM;
+                       if (!tpg->lines[pat][plane]) {
+                               ret = -ENOMEM;
+                               goto free_lines;
+                       }
                        if (plane == 0)
                                continue;
                        tpg->downsampled_lines[pat][plane] =
                                vzalloc(array3_size(max_w, 2, pixelsz));
-                       if (!tpg->downsampled_lines[pat][plane])
-                               return -ENOMEM;
+                       if (!tpg->downsampled_lines[pat][plane]) {
+                               ret = -ENOMEM;
+                               goto free_lines;
+                       }
                }
        }
        for (plane = 0; plane < TPG_MAX_PLANES; plane++) {
@@ -136,18 +141,45 @@ int tpg_alloc(struct tpg_data *tpg, unsigned max_w)
 
                tpg->contrast_line[plane] =
                        vzalloc(array_size(pixelsz, max_w));
-               if (!tpg->contrast_line[plane])
-                       return -ENOMEM;
+               if (!tpg->contrast_line[plane]) {
+                       ret = -ENOMEM;
+                       goto free_contrast_line;
+               }
                tpg->black_line[plane] =
                        vzalloc(array_size(pixelsz, max_w));
-               if (!tpg->black_line[plane])
-                       return -ENOMEM;
+               if (!tpg->black_line[plane]) {
+                       ret = -ENOMEM;
+                       goto free_contrast_line;
+               }
                tpg->random_line[plane] =
                        vzalloc(array3_size(max_w, 2, pixelsz));
-               if (!tpg->random_line[plane])
-                       return -ENOMEM;
+               if (!tpg->random_line[plane]) {
+                       ret = -ENOMEM;
+                       goto free_contrast_line;
+               }
        }
        return 0;
+
+free_contrast_line:
+       for (plane = 0; plane < TPG_MAX_PLANES; plane++) {
+               vfree(tpg->contrast_line[plane]);
+               vfree(tpg->black_line[plane]);
+               vfree(tpg->random_line[plane]);
+               tpg->contrast_line[plane] = NULL;
+               tpg->black_line[plane] = NULL;
+               tpg->random_line[plane] = NULL;
+       }
+free_lines:
+       for (pat = 0; pat < TPG_MAX_PAT_LINES; pat++)
+               for (plane = 0; plane < TPG_MAX_PLANES; plane++) {
+                       vfree(tpg->lines[pat][plane]);
+                       tpg->lines[pat][plane] = NULL;
+                       if (plane == 0)
+                               continue;
+                       vfree(tpg->downsampled_lines[pat][plane]);
+                       tpg->downsampled_lines[pat][plane] = NULL;
+               }
+       return ret;
 }
 EXPORT_SYMBOL_GPL(tpg_alloc);
 
index 9293b058ab9974885da8938095597dabd96e603c..4f78f30b3646e4715a2a6e92a29f9fdf321bf8f4 100644 (file)
@@ -679,12 +679,10 @@ static int dvb_frontend_thread(void *data)
        set_freezable();
        while (1) {
                up(&fepriv->sem);           /* is locked when we enter the thread... */
-restart:
-               wait_event_interruptible_timeout(fepriv->wait_queue,
-                                                dvb_frontend_should_wakeup(fe) ||
-                                                kthread_should_stop() ||
-                                                freezing(current),
-                       fepriv->delay);
+               wait_event_freezable_timeout(fepriv->wait_queue,
+                                            dvb_frontend_should_wakeup(fe) ||
+                                            kthread_should_stop(),
+                                            fepriv->delay);
 
                if (kthread_should_stop() || dvb_frontend_is_exiting(fe)) {
                        /* got signal or quitting */
@@ -694,9 +692,6 @@ restart:
                        break;
                }
 
-               if (try_to_freeze())
-                       goto restart;
-
                if (down_interruptible(&fepriv->sem))
                        break;
 
@@ -2168,7 +2163,8 @@ static int dvb_frontend_handle_compat_ioctl(struct file *file, unsigned int cmd,
                if (!tvps->num || (tvps->num > DTV_IOCTL_MAX_MSGS))
                        return -EINVAL;
 
-               tvp = memdup_user(compat_ptr(tvps->props), tvps->num * sizeof(*tvp));
+               tvp = memdup_array_user(compat_ptr(tvps->props),
+                                       tvps->num, sizeof(*tvp));
                if (IS_ERR(tvp))
                        return PTR_ERR(tvp);
 
@@ -2199,7 +2195,8 @@ static int dvb_frontend_handle_compat_ioctl(struct file *file, unsigned int cmd,
                if (!tvps->num || (tvps->num > DTV_IOCTL_MAX_MSGS))
                        return -EINVAL;
 
-               tvp = memdup_user(compat_ptr(tvps->props), tvps->num * sizeof(*tvp));
+               tvp = memdup_array_user(compat_ptr(tvps->props),
+                                       tvps->num, sizeof(*tvp));
                if (IS_ERR(tvp))
                        return PTR_ERR(tvp);
 
@@ -2379,7 +2376,8 @@ static int dvb_get_property(struct dvb_frontend *fe, struct file *file,
        if (!tvps->num || tvps->num > DTV_IOCTL_MAX_MSGS)
                return -EINVAL;
 
-       tvp = memdup_user((void __user *)tvps->props, tvps->num * sizeof(*tvp));
+       tvp = memdup_array_user((void __user *)tvps->props,
+                               tvps->num, sizeof(*tvp));
        if (IS_ERR(tvp))
                return PTR_ERR(tvp);
 
@@ -2457,7 +2455,8 @@ static int dvb_frontend_handle_ioctl(struct file *file,
                if (!tvps->num || (tvps->num > DTV_IOCTL_MAX_MSGS))
                        return -EINVAL;
 
-               tvp = memdup_user((void __user *)tvps->props, tvps->num * sizeof(*tvp));
+               tvp = memdup_array_user((void __user *)tvps->props,
+                                       tvps->num, sizeof(*tvp));
                if (IS_ERR(tvp))
                        return PTR_ERR(tvp);
 
index 49f0eb7d0b9d3b16e1410f061b57b2914a3b852a..733d0bc4b4cc3accbb8123947f7ea1edd69016db 100644 (file)
@@ -490,6 +490,7 @@ int dvb_register_device(struct dvb_adapter *adap, struct dvb_device **pdvbdev,
                dvbdevfops = kmemdup(template->fops, sizeof(*dvbdevfops), GFP_KERNEL);
                if (!dvbdevfops) {
                        kfree(dvbdev);
+                       *pdvbdev = NULL;
                        mutex_unlock(&dvbdev_register_lock);
                        return -ENOMEM;
                }
@@ -498,6 +499,7 @@ int dvb_register_device(struct dvb_adapter *adap, struct dvb_device **pdvbdev,
                if (!new_node) {
                        kfree(dvbdevfops);
                        kfree(dvbdev);
+                       *pdvbdev = NULL;
                        mutex_unlock(&dvbdev_register_lock);
                        return -ENOMEM;
                }
@@ -531,6 +533,7 @@ int dvb_register_device(struct dvb_adapter *adap, struct dvb_device **pdvbdev,
                }
                list_del(&dvbdev->list_head);
                kfree(dvbdev);
+               *pdvbdev = NULL;
                up_write(&minor_rwsem);
                mutex_unlock(&dvbdev_register_lock);
                return -EINVAL;
@@ -553,6 +556,7 @@ int dvb_register_device(struct dvb_adapter *adap, struct dvb_device **pdvbdev,
                dvb_media_device_free(dvbdev);
                list_del(&dvbdev->list_head);
                kfree(dvbdev);
+               *pdvbdev = NULL;
                mutex_unlock(&dvbdev_register_lock);
                return ret;
        }
@@ -571,6 +575,7 @@ int dvb_register_device(struct dvb_adapter *adap, struct dvb_device **pdvbdev,
                dvb_media_device_free(dvbdev);
                list_del(&dvbdev->list_head);
                kfree(dvbdev);
+               *pdvbdev = NULL;
                mutex_unlock(&dvbdev_register_lock);
                return PTR_ERR(clsdev);
        }
index b3f5c49accafd2b335bbee8c59760e182f9c241f..27f1de21f5717551cf87656bee02ae9d72f1a8fb 100644 (file)
@@ -797,7 +797,6 @@ struct dvb_frontend* bcm3510_attach(const struct bcm3510_config *config,
                                   struct i2c_adapter *i2c)
 {
        struct bcm3510_state* state = NULL;
-       int ret;
        bcm3510_register_value v;
 
        /* allocate memory for the internal state */
@@ -816,7 +815,7 @@ struct dvb_frontend* bcm3510_attach(const struct bcm3510_config *config,
 
        mutex_init(&state->hab_mutex);
 
-       if ((ret = bcm3510_readB(state,0xe0,&v)) < 0)
+       if (bcm3510_readB(state, 0xe0, &v) < 0)
                goto error;
 
        deb_info("Revision: 0x%1x, Layer: 0x%1x.\n",v.REVID_e0.REV,v.REVID_e0.LAYER);
index 2c9f3c430a82a9f94a708e28f86d782d91fcea8d..89c71bc42a0fd2936225db8061c7cc24e20d6d7e 100644 (file)
 #define PACKED __attribute__((packed))
 
 #undef err
-#define err(format, arg...)  printk(KERN_ERR     "bcm3510: " format "\n" , ## arg)
+#define err(format, arg...)  printk(KERN_ERR     "bcm3510: " format "\n", ## arg)
 #undef info
-#define info(format, arg...) printk(KERN_INFO    "bcm3510: " format "\n" , ## arg)
+#define info(format, arg...) printk(KERN_INFO    "bcm3510: " format "\n", ## arg)
 #undef warn
-#define warn(format, arg...) printk(KERN_WARNING "bcm3510: " format "\n" , ## arg)
+#define warn(format, arg...) printk(KERN_WARNING "bcm3510: " format "\n", ## arg)
 
 
 #define PANASONIC_FIRST_IF_BASE_IN_KHz  1407500
index 9aeea089756fe1c9dbb0073a0fec8540d8a9383e..65dd9b72ea55556782eff0d7748948bfa417f8a0 100644 (file)
@@ -224,13 +224,13 @@ static enum fe_code_rate cx24110_get_fec(struct cx24110_state *state)
        }
 }
 
-static int cx24110_set_symbolrate (struct cx24110_statestate, u32 srate)
+static int cx24110_set_symbolrate (struct cx24110_state *state, u32 srate)
 {
 /* fixme (low): add error handling */
        u32 ratio;
        u32 tmp, fclk, BDRI;
 
-       static const u32 bands[]={5000000UL,15000000UL,90999000UL/2};
+       static const u32 bands[] = {5000000UL, 15000000UL, 90999000UL/2};
        int i;
 
        dprintk("cx24110 debug: entering %s(%d)\n",__func__,srate);
index 834b011d3462db32dcf5b310c1e6a7b548b2e4b4..839551841893746c365bedb8a3648912a7701ad4 100644 (file)
@@ -34,11 +34,11 @@ static inline int cx24110_pll_write(struct dvb_frontend *fe, u32 val)
 }
 
 #if IS_REACHABLE(CONFIG_DVB_CX24110)
-extern struct dvb_frontend* cx24110_attach(const struct cx24110_config* config,
-                                          struct i2c_adapteri2c);
+extern struct dvb_frontend *cx24110_attach(const struct cx24110_config *config,
+                                          struct i2c_adapter *i2c);
 #else
-static inline struct dvb_frontend* cx24110_attach(const struct cx24110_config* config,
-                                                 struct i2c_adapteri2c)
+static inline struct dvb_frontend *cx24110_attach(const struct cx24110_config *config,
+                                                 struct i2c_adapter *i2c)
 {
        printk(KERN_WARNING "%s: driver disabled by Kconfig\n", __func__);
        return NULL;
index ac6e47d81b9ebdaad735cb88edcdb5aea47e580e..75fc7ad263d05cd70b9b58a925be56eea528eeec 100644 (file)
@@ -1647,7 +1647,7 @@ static const struct dvb_frontend_ops cx24117_ops = {
 
 
 MODULE_DESCRIPTION("DVB Frontend module for Conexant cx24117/cx24132 hardware");
-MODULE_AUTHOR("Luis Alves (ljalvs@gmail.com)");
+MODULE_AUTHOR("Luis Alves <ljalvs@gmail.com>");
 MODULE_LICENSE("GPL");
 MODULE_VERSION("1.1");
 MODULE_FIRMWARE(CX24117_DEFAULT_FIRMWARE);
index ef697ab6bc2e5287d02c5af55bc8f0359cf98523..1775a4aa0a186850789e45ee355a552f8cf94577 100644 (file)
@@ -796,7 +796,7 @@ struct dvb_frontend *dvb_pll_attach(struct dvb_frontend *fe, int pll_addr,
        b1[0] = 0;
        msg.buf = b1;
 
-       nr = ida_simple_get(&pll_ida, 0, DVB_PLL_MAX, GFP_KERNEL);
+       nr = ida_alloc_max(&pll_ida, DVB_PLL_MAX - 1, GFP_KERNEL);
        if (nr < 0) {
                kfree(b1);
                return NULL;
@@ -862,7 +862,7 @@ struct dvb_frontend *dvb_pll_attach(struct dvb_frontend *fe, int pll_addr,
        return fe;
 out:
        kfree(b1);
-       ida_simple_remove(&pll_ida, nr);
+       ida_free(&pll_ida, nr);
 
        return NULL;
 }
@@ -905,7 +905,7 @@ static void dvb_pll_remove(struct i2c_client *client)
        struct dvb_frontend *fe = i2c_get_clientdata(client);
        struct dvb_pll_priv *priv = fe->tuner_priv;
 
-       ida_simple_remove(&pll_ida, priv->nr);
+       ida_free(&pll_ida, priv->nr);
        dvb_pll_release(fe);
 }
 
index 48326434488c4fd32457d0b99638da9d6dea67b4..72540ef4e5f889b07e1731b58977e17bcb6228ab 100644 (file)
@@ -118,50 +118,32 @@ static const s32 stv0367cab_RF_LookUp2[RF_LOOKUP_TABLE2_SIZE][RF_LOOKUP_TABLE2_S
        }
 };
 
-static
-int stv0367_writeregs(struct stv0367_state *state, u16 reg, u8 *data, int len)
+static noinline_for_stack
+int stv0367_writereg(struct stv0367_state *state, u16 reg, u8 data)
 {
-       u8 buf[MAX_XFER_SIZE];
+       u8 buf[3] = { MSB(reg), LSB(reg), data };
        struct i2c_msg msg = {
                .addr = state->config->demod_address,
                .flags = 0,
                .buf = buf,
-               .len = len + 2
+               .len = 3,
        };
        int ret;
 
-       if (2 + len > sizeof(buf)) {
-               printk(KERN_WARNING
-                      "%s: i2c wr reg=%04x: len=%d is too big!\n",
-                      KBUILD_MODNAME, reg, len);
-               return -EINVAL;
-       }
-
-
-       buf[0] = MSB(reg);
-       buf[1] = LSB(reg);
-       memcpy(buf + 2, data, len);
-
        if (i2cdebug)
                printk(KERN_DEBUG "%s: [%02x] %02x: %02x\n", __func__,
-                       state->config->demod_address, reg, buf[2]);
+                       state->config->demod_address, reg, data);
 
        ret = i2c_transfer(state->i2c, &msg, 1);
        if (ret != 1)
                printk(KERN_ERR "%s: i2c write error! ([%02x] %02x: %02x)\n",
-                       __func__, state->config->demod_address, reg, buf[2]);
+                       __func__, state->config->demod_address, reg, data);
 
        return (ret != 1) ? -EREMOTEIO : 0;
 }
 
-static int stv0367_writereg(struct stv0367_state *state, u16 reg, u8 data)
-{
-       u8 tmp = data; /* see gcc.gnu.org/bugzilla/show_bug.cgi?id=81715 */
-
-       return stv0367_writeregs(state, reg, &tmp, 1);
-}
-
-static u8 stv0367_readreg(struct stv0367_state *state, u16 reg)
+static noinline_for_stack
+u8 stv0367_readreg(struct stv0367_state *state, u16 reg)
 {
        u8 b0[] = { 0, 0 };
        u8 b1[] = { 0 };
index b27769558f789db0485d518b561c9b8709189187..81410595820a16192be953b48901a026b76b0529 100644 (file)
 #define dprintk(__y, __z, format, arg...) do {                                         \
        if (__z) {                                                                      \
                if      ((verbose > FE_ERROR) && (verbose > __y))                       \
-                       printk(KERN_ERR "%s: " format "\n", __func__ , ##arg);          \
+                       printk(KERN_ERR "%s: " format "\n", __func__, ##arg);           \
                else if ((verbose > FE_NOTICE) && (verbose > __y))                      \
-                       printk(KERN_NOTICE "%s: " format "\n", __func__ , ##arg);       \
+                       printk(KERN_NOTICE "%s: " format "\n", __func__, ##arg);        \
                else if ((verbose > FE_INFO) && (verbose > __y))                        \
-                       printk(KERN_INFO "%s: " format "\n", __func__ , ##arg);         \
+                       printk(KERN_INFO "%s: " format "\n", __func__, ##arg);          \
                else if ((verbose > FE_DEBUG) && (verbose > __y))                       \
-                       printk(KERN_DEBUG "%s: " format "\n", __func__ , ##arg);        \
+                       printk(KERN_DEBUG "%s: " format "\n", __func__, ##arg); \
        } else {                                                                        \
                if (verbose > __y)                                                      \
                        printk(format, ##arg);                                          \
index 3a671ec3f45e714ef9b104ca1da70319d4b7c7e2..b635ac7ef688e89c85fd988017947ce0be258e11 100644 (file)
@@ -24,11 +24,11 @@ struct tda8083_config
 };
 
 #if IS_REACHABLE(CONFIG_DVB_TDA8083)
-extern struct dvb_frontend* tda8083_attach(const struct tda8083_config* config,
-                                          struct i2c_adapteri2c);
+extern struct dvb_frontend *tda8083_attach(const struct tda8083_config *config,
+                                          struct i2c_adapter *i2c);
 #else
-static inline struct dvb_frontend* tda8083_attach(const struct tda8083_config* config,
-                                          struct i2c_adapter* i2c)
+static inline struct dvb_frontend *tda8083_attach(const struct tda8083_config *config,
+                                                 struct i2c_adapter *i2c)
 {
        printk(KERN_WARNING "%s: driver disabled by Kconfig\n", __func__);
        return NULL;
index 7ba575e9c55f4356af0d7ff920849169f5bb554a..3df055be66d6cb69315ed67e08468fe8d3805b4e 100644 (file)
@@ -3,7 +3,7 @@
  * Driver for Zarlink zl10036 DVB-S silicon tuner
  *
  * Copyright (C) 2006 Tino Reichardt
- * Copyright (C) 2007-2009 Matthias Schwarzott <zzam@gentoo.de>
+ * Copyright (C) 2007-2009 Matthias Schwarzott <zzam@gentoo.org>
  *
  **
  * The data sheet for this tuner can be found at:
index ad83e6344e7fd360418d3638c8b9b9cf039925f4..23c2964a928c48bca98662ca74280283ab36b2f1 100644 (file)
@@ -3,7 +3,7 @@
  * Driver for Zarlink ZL10036 DVB-S silicon tuner
  *
  * Copyright (C) 2006 Tino Reichardt
- * Copyright (C) 2007-2009 Matthias Schwarzott <zzam@gentoo.de>
+ * Copyright (C) 2007-2009 Matthias Schwarzott <zzam@gentoo.org>
  */
 
 #ifndef DVB_ZL10036_H
index 4c3435921f19ee91d6da02e650e8a0fa0f94433d..56f276b920ab7bd8705f2d69e414bfb8a6b0d5b6 100644 (file)
@@ -224,6 +224,7 @@ config VIDEO_IMX412
 config VIDEO_IMX415
        tristate "Sony IMX415 sensor support"
        depends on OF_GPIO
+       select V4L2_CCI_I2C
        help
          This is a Video4Linux2 sensor driver for the Sony
          IMX415 camera.
@@ -658,6 +659,7 @@ config VIDEO_S5K6A3
 
 config VIDEO_ST_VGXY61
        tristate "ST VGXY61 sensor support"
+       select V4L2_CCI_I2C
        depends on OF && GPIOLIB
        help
          This is a Video4Linux2 sensor driver for the ST VGXY61
index 409b9a37f0185c7ecf7f0cc91e4fd897d7137eb7..4829cbe3241986f5a1760bfd6d4428f68f314b56 100644 (file)
@@ -1057,11 +1057,11 @@ static int adv7182_init(struct adv7180_state *state)
                                              ADV7180_REG_EXTENDED_OUTPUT_CONTROL,
                                              0x17);
                        }
-               }
-               else
+               } else {
                        adv7180_write(state,
                                      ADV7180_REG_EXTENDED_OUTPUT_CONTROL,
                                      0x07);
+               }
                adv7180_write(state, ADV7180_REG_OUTPUT_CONTROL, 0x0c);
                adv7180_write(state, ADV7180_REG_CTRL_2, 0x40);
        }
index ff21cd4744d3d5b383dabe9e63963a83792750f0..4fbe4e18570e9586fe899f7ad3581bbe0277f95d 100644 (file)
@@ -403,7 +403,7 @@ adv7343_get_pdata(struct i2c_client *client)
        if (!IS_ENABLED(CONFIG_OF) || !client->dev.of_node)
                return client->dev.platform_data;
 
-       np = of_graph_get_next_endpoint(client->dev.of_node, NULL);
+       np = of_graph_get_endpoint_by_regs(client->dev.of_node, 0, -1);
        if (!np)
                return NULL;
 
index 6f90f78f58cfac7e62681f51196007bb57eb7ca6..d2b5e722e997f38a57196f5035fe102190acd533 100644 (file)
@@ -173,7 +173,6 @@ struct adv748x_afe {
  *
  * @endpoints:         parsed device node endpoints for each port
  *
- * @i2c_addresses:     I2C Page addresses
  * @i2c_clients:       I2C clients for the page accesses
  * @regmap:            regmap configuration pages.
  *
index 810fa8826f308ff9a08b180e2ed013dfaba3128a..319db3e847c4e8269be3216f0f52a2644ad35982 100644 (file)
@@ -3204,8 +3204,8 @@ static int adv76xx_parse_dt(struct adv76xx_state *state)
 
        np = state->i2c_clients[ADV76XX_PAGE_IO]->dev.of_node;
 
-       /* Parse the endpoint. */
-       endpoint = of_graph_get_next_endpoint(np, NULL);
+       /* FIXME: Parse the endpoint. */
+       endpoint = of_graph_get_endpoint_by_regs(np, -1, -1);
        if (!endpoint)
                return -EINVAL;
 
index 34ff7fad38774d428fd8612d64da95486465703d..e65702e3f73e845977349eec1292a9eb3ea15b2e 100644 (file)
@@ -1170,40 +1170,32 @@ static int alvium_set_bayer_pattern(struct alvium_dev *alvium,
        return 0;
 }
 
-static int alvium_get_frame_interval(struct alvium_dev *alvium)
+static int alvium_get_frame_interval(struct alvium_dev *alvium,
+                                    u64 *min_fr, u64 *max_fr)
 {
-       u64 dft_fr, min_fr, max_fr;
        int ret = 0;
 
-       alvium_read(alvium, REG_BCRM_ACQUISITION_FRAME_RATE_RW,
-                   &dft_fr, &ret);
        alvium_read(alvium, REG_BCRM_ACQUISITION_FRAME_RATE_MIN_R,
-                   &min_fr, &ret);
+                   min_fr, &ret);
        alvium_read(alvium, REG_BCRM_ACQUISITION_FRAME_RATE_MAX_R,
-                   &max_fr, &ret);
-       if (ret)
-               return ret;
-
-       alvium->dft_fr = dft_fr;
-       alvium->min_fr = min_fr;
-       alvium->max_fr = max_fr;
+                   max_fr, &ret);
 
-       return 0;
+       return ret;
 }
 
-static int alvium_set_frame_rate(struct alvium_dev *alvium)
+static int alvium_set_frame_rate(struct alvium_dev *alvium, u64 fr)
 {
        struct device *dev = &alvium->i2c_client->dev;
        int ret;
 
        ret = alvium_write_hshake(alvium, REG_BCRM_ACQUISITION_FRAME_RATE_RW,
-                                 alvium->fr);
+                                 fr);
        if (ret) {
                dev_err(dev, "Fail to set frame rate lanes reg\n");
                return ret;
        }
 
-       dev_dbg(dev, "set frame rate: %llu us\n", alvium->fr);
+       dev_dbg(dev, "set frame rate: %llu us\n", fr);
 
        return 0;
 }
@@ -1472,7 +1464,7 @@ static int alvium_get_hw_features_params(struct alvium_dev *alvium)
 
        ret = alvium_get_img_height_params(alvium);
        if (ret) {
-               dev_err(dev, "Fail to read img heigth regs\n");
+               dev_err(dev, "Fail to read img height regs\n");
                return ret;
        }
 
@@ -1647,44 +1639,28 @@ static int alvium_hw_init(struct alvium_dev *alvium)
 }
 
 /* --------------- Subdev Operations --------------- */
-
-static int alvium_g_frame_interval(struct v4l2_subdev *sd,
+static int alvium_s_frame_interval(struct v4l2_subdev *sd,
                                   struct v4l2_subdev_state *sd_state,
                                   struct v4l2_subdev_frame_interval *fi)
 {
        struct alvium_dev *alvium = sd_to_alvium(sd);
-
-       /*
-        * FIXME: Implement support for V4L2_SUBDEV_FORMAT_TRY, using the V4L2
-        * subdev active state API.
-        */
-       if (fi->which != V4L2_SUBDEV_FORMAT_ACTIVE)
-               return -EINVAL;
-
-       fi->interval = alvium->frame_interval;
-
-       return 0;
-}
-
-static int alvium_set_frame_interval(struct alvium_dev *alvium,
-                                    struct v4l2_subdev_frame_interval *fi)
-{
        struct device *dev = &alvium->i2c_client->dev;
        u64 req_fr, min_fr, max_fr;
+       struct v4l2_fract *interval;
        int ret;
 
+       if (alvium->streaming)
+               return -EBUSY;
+
        if (fi->interval.denominator == 0)
                return -EINVAL;
 
-       ret = alvium_get_frame_interval(alvium);
+       ret = alvium_get_frame_interval(alvium, &min_fr, &max_fr);
        if (ret) {
                dev_err(dev, "Fail to get frame interval\n");
                return ret;
        }
 
-       min_fr = alvium->min_fr;
-       max_fr = alvium->max_fr;
-
        dev_dbg(dev, "fi->interval.numerator = %d\n",
                fi->interval.numerator);
        dev_dbg(dev, "fi->interval.denominator = %d\n",
@@ -1692,39 +1668,17 @@ static int alvium_set_frame_interval(struct alvium_dev *alvium,
 
        req_fr = (u64)((fi->interval.denominator * USEC_PER_SEC) /
                       fi->interval.numerator);
+       req_fr = clamp(req_fr, min_fr, max_fr);
 
-       if (req_fr >= max_fr && req_fr <= min_fr)
-               req_fr = alvium->dft_fr;
+       interval = v4l2_subdev_state_get_interval(sd_state, 0);
 
-       alvium->fr = req_fr;
-       alvium->frame_interval.numerator = fi->interval.numerator;
-       alvium->frame_interval.denominator = fi->interval.denominator;
+       interval->numerator = fi->interval.numerator;
+       interval->denominator = fi->interval.denominator;
 
-       return 0;
-}
-
-static int alvium_s_frame_interval(struct v4l2_subdev *sd,
-                                  struct v4l2_subdev_state *sd_state,
-                                  struct v4l2_subdev_frame_interval *fi)
-{
-       struct alvium_dev *alvium = sd_to_alvium(sd);
-       int ret;
-
-       /*
-        * FIXME: Implement support for V4L2_SUBDEV_FORMAT_TRY, using the V4L2
-        * subdev active state API.
-        */
        if (fi->which != V4L2_SUBDEV_FORMAT_ACTIVE)
-               return -EINVAL;
-
-       if (alvium->streaming)
-               return -EBUSY;
-
-       ret = alvium_set_frame_interval(alvium, fi);
-       if (!ret)
-               ret = alvium_set_frame_rate(alvium);
+               return 0;
 
-       return ret;
+       return alvium_set_frame_rate(alvium, req_fr);
 }
 
 static int alvium_enum_mbus_code(struct v4l2_subdev *sd,
@@ -1872,6 +1826,7 @@ static int alvium_init_state(struct v4l2_subdev *sd,
 {
        struct alvium_dev *alvium = sd_to_alvium(sd);
        struct alvium_mode *mode = &alvium->mode;
+       struct v4l2_fract *interval;
        struct v4l2_subdev_format sd_fmt = {
                .which = V4L2_SUBDEV_FORMAT_TRY,
                .format = alvium_csi2_default_fmt,
@@ -1889,6 +1844,11 @@ static int alvium_init_state(struct v4l2_subdev *sd,
        *v4l2_subdev_state_get_crop(state, 0) = sd_crop.rect;
        *v4l2_subdev_state_get_format(state, 0) = sd_fmt.format;
 
+       /* Setup initial frame interval*/
+       interval = v4l2_subdev_state_get_interval(state, 0);
+       interval->numerator = 1;
+       interval->denominator = ALVIUM_DEFAULT_FR_HZ;
+
        return 0;
 }
 
@@ -2258,7 +2218,7 @@ static const struct v4l2_subdev_pad_ops alvium_pad_ops = {
        .set_fmt = alvium_set_fmt,
        .get_selection = alvium_get_selection,
        .set_selection = alvium_set_selection,
-       .get_frame_interval = alvium_g_frame_interval,
+       .get_frame_interval = v4l2_subdev_get_frame_interval,
        .set_frame_interval = alvium_s_frame_interval,
 };
 
@@ -2279,11 +2239,6 @@ static int alvium_subdev_init(struct alvium_dev *alvium)
        struct v4l2_subdev *sd = &alvium->sd;
        int ret;
 
-       /* Setup initial frame interval*/
-       alvium->frame_interval.numerator = 1;
-       alvium->frame_interval.denominator = ALVIUM_DEFAULT_FR_HZ;
-       alvium->fr = ALVIUM_DEFAULT_FR_HZ;
-
        /* Setup the initial mode */
        alvium->mode.fmt = alvium_csi2_default_fmt;
        alvium->mode.width = alvium_csi2_default_fmt.width;
index b85a25169e79d537c5ffca0986f41a04fcc33617..9463f8604fbccda443c69c96ba22d787062b0025 100644 (file)
@@ -442,11 +442,6 @@ struct alvium_dev {
        s32 inc_sharp;
 
        struct alvium_mode mode;
-       struct v4l2_fract frame_interval;
-       u64 dft_fr;
-       u64 min_fr;
-       u64 max_fr;
-       u64 fr;
 
        u8 h_sup_csi_lanes;
        u64 link_freq;
index c7d5fa532ae1c14325ccb16968c1cafdf799b0f4..09331cf95c62d06e3720c55b5b89fb41f162875d 100644 (file)
@@ -314,7 +314,7 @@ static void ar0521_calc_pll(struct ar0521_dev *sensor)
         * In the clock tree:
         * MIPI_CLK = PIXEL_CLOCK * bpp / 2 / 2
         *
-        * Generic pixel_rate to bus clock frequencey equation:
+        * Generic pixel_rate to bus clock frequency equation:
         * MIPI_CLK = V4L2_CID_PIXEL_RATE * bpp / lanes / 2
         *
         * From which we derive the PIXEL_CLOCK to use in the clock tree:
@@ -327,7 +327,7 @@ static void ar0521_calc_pll(struct ar0521_dev *sensor)
         *
         * TODO: in case we have less data lanes we have to reduce the desired
         * VCO not to exceed the limits specified by the datasheet and
-        * consequentially reduce the obtained pixel clock.
+        * consequently reduce the obtained pixel clock.
         */
        pixel_clock = AR0521_PIXEL_CLOCK_RATE * 2 / sensor->lane_count;
        bpp = ar0521_code_to_bpp(sensor);
@@ -806,7 +806,7 @@ static const struct initial_reg {
        REGS(be(0x3F00),
             be(0x0017),  /* 3F00: BM_T0 */
             be(0x02DD),  /* 3F02: BM_T1 */
-            /* 3F04: if Ana_gain less than 2, use noise_floor0, multipl */
+            /* 3F04: if Ana_gain less than 2, use noise_floor0, multiply */
             be(0x0020),
             /* 3F06: if Ana_gain between 4 and 7, use noise_floor2 and */
             be(0x0040),
index 0b1a64958d714ed783e60b6fadbcae6a716deaa9..392c97109617700fb7e1f8bfa950c3a8cd5d90bb 100644 (file)
@@ -28,11 +28,11 @@ struct ccs_sensor;
  * @reg_access: Register access quirk. The quirk may divert the access
  *             to another register, or no register at all.
  *
- *             @write: Is this read (false) or write (true) access?
- *             @reg: Pointer to the register to access
- *             @value: Register value, set by the caller on write, or
+ *             -write: Is this read (false) or write (true) access?
+ *             -reg:   Pointer to the register to access
+ *             -val:   Register value, set by the caller on write, or
  *                     by the quirk on read
- *             @return: 0 on success, -ENOIOCTLCMD if no register
+ *             -return: 0 on success, -ENOIOCTLCMD if no register
  *                      access may be done by the caller (default read
  *                      value is zero), else negative error code on error
  * @flags: Quirk flags
index cc09b32ede601d2709a48ac855abfb545a883af2..84d29bcf0ccd47a6c360bc4495531c6c3ff9d1ae 100644 (file)
@@ -157,6 +157,8 @@ static int dw9714_probe(struct i2c_client *client)
                return rval;
        }
 
+       usleep_range(1000, 2000);
+
        v4l2_i2c_subdev_init(&dw9714_dev->sd, client, &dw9714_ops);
        dw9714_dev->sd.flags |= V4L2_SUBDEV_FL_HAS_DEVNODE |
                                V4L2_SUBDEV_FL_HAS_EVENTS;
index b148b1bd2bc3b28a3fb658903d01b94d1522635a..10b6ad66d12616a56bdc8e525a7c7994a209bc05 100644 (file)
@@ -968,7 +968,7 @@ static const struct v4l2_subdev_internal_ops imx214_internal_ops = {
 static const struct regmap_config sensor_regmap_config = {
        .reg_bits = 16,
        .val_bits = 8,
-       .cache_type = REGCACHE_RBTREE,
+       .cache_type = REGCACHE_MAPLE,
 };
 
 static int imx214_get_regulators(struct device *dev, struct imx214 *imx214)
index 352da68b8b41b28828bc5ab04f5e1b331d356eda..3800de974e8a9fbd8815c28795db7a8f87a7ad76 100644 (file)
@@ -151,7 +151,7 @@ struct reg_8 {
 static const struct regmap_config imx274_regmap_config = {
        .reg_bits = 16,
        .val_bits = 8,
-       .cache_type = REGCACHE_RBTREE,
+       .cache_type = REGCACHE_MAPLE,
 };
 
 /*
index 9967f34774334725c02df9c54a48a4f3e00940be..4150e6e4b9a6358be79950d1865427ef194ba44c 100644 (file)
 
 #define IMX290_PIXEL_ARRAY_WIDTH                       1945
 #define IMX290_PIXEL_ARRAY_HEIGHT                      1097
-#define IMX920_PIXEL_ARRAY_MARGIN_LEFT                 12
-#define IMX920_PIXEL_ARRAY_MARGIN_RIGHT                        13
-#define IMX920_PIXEL_ARRAY_MARGIN_TOP                  8
-#define IMX920_PIXEL_ARRAY_MARGIN_BOTTOM               9
+#define IMX290_PIXEL_ARRAY_MARGIN_LEFT                 12
+#define IMX290_PIXEL_ARRAY_MARGIN_RIGHT                        13
+#define IMX290_PIXEL_ARRAY_MARGIN_TOP                  8
+#define IMX290_PIXEL_ARRAY_MARGIN_BOTTOM               9
 #define IMX290_PIXEL_ARRAY_RECORDING_WIDTH             1920
 #define IMX290_PIXEL_ARRAY_RECORDING_HEIGHT            1080
 
@@ -1161,10 +1161,10 @@ static int imx290_get_selection(struct v4l2_subdev *sd,
                 * The sensor moves the readout by 1 pixel based on flips to
                 * keep the Bayer order the same.
                 */
-               sel->r.top = IMX920_PIXEL_ARRAY_MARGIN_TOP
+               sel->r.top = IMX290_PIXEL_ARRAY_MARGIN_TOP
                           + (IMX290_PIXEL_ARRAY_RECORDING_HEIGHT - format->height) / 2
                           + imx290->vflip->val;
-               sel->r.left = IMX920_PIXEL_ARRAY_MARGIN_LEFT
+               sel->r.left = IMX290_PIXEL_ARRAY_MARGIN_LEFT
                            + (IMX290_PIXEL_ARRAY_RECORDING_WIDTH - format->width) / 2
                            + imx290->hflip->val;
                sel->r.width = format->width;
@@ -1183,8 +1183,8 @@ static int imx290_get_selection(struct v4l2_subdev *sd,
                return 0;
 
        case V4L2_SEL_TGT_CROP_DEFAULT:
-               sel->r.top = IMX920_PIXEL_ARRAY_MARGIN_TOP;
-               sel->r.left = IMX920_PIXEL_ARRAY_MARGIN_LEFT;
+               sel->r.top = IMX290_PIXEL_ARRAY_MARGIN_TOP;
+               sel->r.left = IMX290_PIXEL_ARRAY_MARGIN_LEFT;
                sel->r.width = IMX290_PIXEL_ARRAY_RECORDING_WIDTH;
                sel->r.height = IMX290_PIXEL_ARRAY_RECORDING_HEIGHT;
 
index e47eff672e0c536b5c861c20394a6859aadef4e8..8fe3933f31467cb67ccd10abaa6a02dae03c1cd9 100644 (file)
@@ -70,7 +70,7 @@
 #define IMX319_REG_ORIENTATION         0x0101
 
 /* default link frequency and external clock */
-#define IMX319_LINK_FREQ_DEFAULT       482400000
+#define IMX319_LINK_FREQ_DEFAULT       482400000LL
 #define IMX319_EXT_CLK                 19200000
 #define IMX319_LINK_FREQ_INDEX         0
 
@@ -107,8 +107,7 @@ struct imx319_mode {
 
 struct imx319_hwcfg {
        u32 ext_clk;                    /* sensor external clk */
-       s64 *link_freqs;                /* CSI-2 link frequencies */
-       unsigned int nr_of_link_freqs;
+       unsigned long link_freq_bitmap;
 };
 
 struct imx319 {
@@ -129,7 +128,6 @@ struct imx319 {
        const struct imx319_mode *cur_mode;
 
        struct imx319_hwcfg *hwcfg;
-       s64 link_def_freq;      /* CSI-2 link default frequency */
 
        /*
         * Mutex for serialized access:
@@ -1654,7 +1652,10 @@ static const char * const imx319_test_pattern_menu[] = {
        "Pseudorandom Sequence (PN9)",
 };
 
-/* supported link frequencies */
+/*
+ * When adding more than the one below, make sure the disallowed ones will
+ * actually be disabled in the LINK_FREQ control.
+ */
 static const s64 link_freq_menu_items[] = {
        IMX319_LINK_FREQ_DEFAULT,
 };
@@ -2058,7 +2059,7 @@ imx319_set_pad_format(struct v4l2_subdev *sd,
                *framefmt = fmt->format;
        } else {
                imx319->cur_mode = mode;
-               pixel_rate = imx319->link_def_freq * 2 * 4;
+               pixel_rate = IMX319_LINK_FREQ_DEFAULT * 2 * 4;
                do_div(pixel_rate, 10);
                __v4l2_ctrl_s_ctrl_int64(imx319->pixel_rate, pixel_rate);
                /* Update limits and set FPS to default */
@@ -2255,7 +2256,7 @@ static int imx319_init_controls(struct imx319 *imx319)
                imx319->link_freq->flags |= V4L2_CTRL_FLAG_READ_ONLY;
 
        /* pixel_rate = link_freq * 2 * nr_of_lanes / bits_per_sample */
-       pixel_rate = imx319->link_def_freq * 2 * 4;
+       pixel_rate = IMX319_LINK_FREQ_DEFAULT * 2 * 4;
        do_div(pixel_rate, 10);
        /* By default, PIXEL_RATE is read only */
        imx319->pixel_rate = v4l2_ctrl_new_std(ctrl_hdlr, &imx319_ctrl_ops,
@@ -2332,7 +2333,6 @@ static struct imx319_hwcfg *imx319_get_hwcfg(struct device *dev)
        };
        struct fwnode_handle *ep;
        struct fwnode_handle *fwnode = dev_fwnode(dev);
-       unsigned int i;
        int ret;
 
        if (!fwnode)
@@ -2364,24 +2364,14 @@ static struct imx319_hwcfg *imx319_get_hwcfg(struct device *dev)
                goto out_err;
        }
 
-       dev_dbg(dev, "num of link freqs: %d", bus_cfg.nr_of_link_frequencies);
-       if (!bus_cfg.nr_of_link_frequencies) {
-               dev_warn(dev, "no link frequencies defined");
-               goto out_err;
-       }
-
-       cfg->nr_of_link_freqs = bus_cfg.nr_of_link_frequencies;
-       cfg->link_freqs = devm_kcalloc(dev,
-                                      bus_cfg.nr_of_link_frequencies + 1,
-                                      sizeof(*cfg->link_freqs), GFP_KERNEL);
-       if (!cfg->link_freqs)
+       ret = v4l2_link_freq_to_bitmap(dev, bus_cfg.link_frequencies,
+                                      bus_cfg.nr_of_link_frequencies,
+                                      link_freq_menu_items,
+                                      ARRAY_SIZE(link_freq_menu_items),
+                                      &cfg->link_freq_bitmap);
+       if (ret)
                goto out_err;
 
-       for (i = 0; i < bus_cfg.nr_of_link_frequencies; i++) {
-               cfg->link_freqs[i] = bus_cfg.link_frequencies[i];
-               dev_dbg(dev, "link_freq[%d] = %lld", i, cfg->link_freqs[i]);
-       }
-
        v4l2_fwnode_endpoint_free(&bus_cfg);
        fwnode_handle_put(ep);
        return cfg;
@@ -2397,7 +2387,6 @@ static int imx319_probe(struct i2c_client *client)
        struct imx319 *imx319;
        bool full_power;
        int ret;
-       u32 i;
 
        imx319 = devm_kzalloc(&client->dev, sizeof(*imx319), GFP_KERNEL);
        if (!imx319)
@@ -2425,20 +2414,6 @@ static int imx319_probe(struct i2c_client *client)
                goto error_probe;
        }
 
-       imx319->link_def_freq = link_freq_menu_items[IMX319_LINK_FREQ_INDEX];
-       for (i = 0; i < imx319->hwcfg->nr_of_link_freqs; i++) {
-               if (imx319->hwcfg->link_freqs[i] == imx319->link_def_freq) {
-                       dev_dbg(&client->dev, "link freq index %d matched", i);
-                       break;
-               }
-       }
-
-       if (i == imx319->hwcfg->nr_of_link_freqs) {
-               dev_err(&client->dev, "no link frequency supported");
-               ret = -EINVAL;
-               goto error_probe;
-       }
-
        /* Set default mode to max resolution */
        imx319->cur_mode = &supported_modes[0];
 
index 6725b3e2a73e103514b3c37f275c753fea9d0f20..40863d87d3413a5ce3527e94e1ca51d02e994321 100644 (file)
@@ -136,7 +136,7 @@ struct imx334_mode {
  * @vblank: Vertical blanking in lines
  * @cur_mode: Pointer to current selected sensor mode
  * @mutex: Mutex for serializing sensor controls
- * @menu_skip_mask: Menu skip mask for link_freq_ctrl
+ * @link_freq_bitmap: Menu bitmap for link_freq_ctrl
  * @cur_code: current selected format code
  */
 struct imx334 {
@@ -158,7 +158,7 @@ struct imx334 {
        u32 vblank;
        const struct imx334_mode *cur_mode;
        struct mutex mutex;
-       unsigned long menu_skip_mask;
+       unsigned long link_freq_bitmap;
        u32 cur_code;
 };
 
@@ -954,9 +954,9 @@ static int imx334_init_state(struct v4l2_subdev *sd,
        imx334_fill_pad_format(imx334, imx334->cur_mode, &fmt);
 
        __v4l2_ctrl_modify_range(imx334->link_freq_ctrl, 0,
-                                __fls(imx334->menu_skip_mask),
-                                ~(imx334->menu_skip_mask),
-                                __ffs(imx334->menu_skip_mask));
+                                __fls(imx334->link_freq_bitmap),
+                                ~(imx334->link_freq_bitmap),
+                                __ffs(imx334->link_freq_bitmap));
 
        mutex_unlock(&imx334->mutex);
 
@@ -1112,7 +1112,6 @@ static int imx334_parse_hw_config(struct imx334 *imx334)
        };
        struct fwnode_handle *ep;
        unsigned long rate;
-       unsigned int i, j;
        int ret;
 
        if (!fwnode)
@@ -1157,26 +1156,10 @@ static int imx334_parse_hw_config(struct imx334 *imx334)
                goto done_endpoint_free;
        }
 
-       if (!bus_cfg.nr_of_link_frequencies) {
-               dev_err(imx334->dev, "no link frequencies defined");
-               ret = -EINVAL;
-               goto done_endpoint_free;
-       }
-
-       for (i = 0; i < bus_cfg.nr_of_link_frequencies; i++) {
-               for (j = 0; j < ARRAY_SIZE(link_freq); j++) {
-                       if (bus_cfg.link_frequencies[i] == link_freq[j]) {
-                               set_bit(j, &imx334->menu_skip_mask);
-                               break;
-                       }
-               }
-
-               if (j == ARRAY_SIZE(link_freq)) {
-                       ret = dev_err_probe(imx334->dev, -EINVAL,
-                                           "no supported link freq found\n");
-                       goto done_endpoint_free;
-               }
-       }
+       ret = v4l2_link_freq_to_bitmap(imx334->dev, bus_cfg.link_frequencies,
+                                      bus_cfg.nr_of_link_frequencies,
+                                      link_freq, ARRAY_SIZE(link_freq),
+                                      &imx334->link_freq_bitmap);
 
 done_endpoint_free:
        v4l2_fwnode_endpoint_free(&bus_cfg);
@@ -1310,8 +1293,8 @@ static int imx334_init_controls(struct imx334 *imx334)
        imx334->link_freq_ctrl = v4l2_ctrl_new_int_menu(ctrl_hdlr,
                                                        &imx334_ctrl_ops,
                                                        V4L2_CID_LINK_FREQ,
-                                                       __fls(imx334->menu_skip_mask),
-                                                       __ffs(imx334->menu_skip_mask),
+                                                       __fls(imx334->link_freq_bitmap),
+                                                       __ffs(imx334->link_freq_bitmap),
                                                        link_freq);
 
        if (imx334->link_freq_ctrl)
@@ -1386,7 +1369,7 @@ static int imx334_probe(struct i2c_client *client)
        }
 
        /* Set default mode to max resolution */
-       imx334->cur_mode = &supported_modes[__ffs(imx334->menu_skip_mask)];
+       imx334->cur_mode = &supported_modes[__ffs(imx334->link_freq_bitmap)];
        imx334->cur_code = imx334_mbus_codes[0];
        imx334->vblank = imx334->cur_mode->vblank;
 
index 7a37eb327ff40bf7bf701ef5995222c5b372221c..dab6d080bc4c9b1b28bd0ab71a2266b90761de8b 100644 (file)
 /* Group hold register */
 #define IMX335_REG_HOLD                0x3001
 
+/* Test pattern generator */
+#define IMX335_REG_TPG         0x329e
+#define IMX335_TPG_ALL_000     0
+#define IMX335_TPG_ALL_FFF     1
+#define IMX335_TPG_ALL_555     2
+#define IMX335_TPG_ALL_AAA     3
+#define IMX335_TPG_TOG_555_AAA 4
+#define IMX335_TPG_TOG_AAA_555 5
+#define IMX335_TPG_TOG_000_555 6
+#define IMX335_TPG_TOG_555_000 7
+#define IMX335_TPG_TOG_000_FFF 8
+#define IMX335_TPG_TOG_FFF_000 9
+#define IMX335_TPG_H_COLOR_BARS 10
+#define IMX335_TPG_V_COLOR_BARS 11
+
 /* Input clock rate */
 #define IMX335_INCLK_RATE      24000000
 
 /* CSI2 HW configuration */
-#define IMX335_LINK_FREQ       594000000
+#define IMX335_LINK_FREQ_594MHz                594000000LL
+#define IMX335_LINK_FREQ_445MHz                445500000LL
+
 #define IMX335_NUM_DATA_LANES  4
 
 #define IMX335_REG_MIN         0x00
@@ -99,7 +116,6 @@ static const char * const imx335_supply_name[] = {
  * @vblank_min: Minimum vertical blanking in lines
  * @vblank_max: Maximum vertical blanking in lines
  * @pclk: Sensor pixel clock
- * @link_freq_idx: Link frequency index
  * @reg_list: Register list for sensor mode
  */
 struct imx335_mode {
@@ -111,7 +127,6 @@ struct imx335_mode {
        u32 vblank_min;
        u32 vblank_max;
        u64 pclk;
-       u32 link_freq_idx;
        struct imx335_reg_list reg_list;
 };
 
@@ -134,6 +149,7 @@ struct imx335_mode {
  * @vblank: Vertical blanking in lines
  * @cur_mode: Pointer to current selected sensor mode
  * @mutex: Mutex for serializing sensor controls
+ * @link_freq_bitmap: Menu bitmap for link_freq_ctrl
  * @cur_mbus_code: Currently selected media bus format code
  */
 struct imx335 {
@@ -157,19 +173,46 @@ struct imx335 {
        u32 vblank;
        const struct imx335_mode *cur_mode;
        struct mutex mutex;
+       unsigned long link_freq_bitmap;
        u32 cur_mbus_code;
 };
 
-static const s64 link_freq[] = {
-       IMX335_LINK_FREQ,
+static const char * const imx335_tpg_menu[] = {
+       "Disabled",
+       "All 000h",
+       "All FFFh",
+       "All 555h",
+       "All AAAh",
+       "Toggle 555/AAAh",
+       "Toggle AAA/555h",
+       "Toggle 000/555h",
+       "Toggle 555/000h",
+       "Toggle 000/FFFh",
+       "Toggle FFF/000h",
+       "Horizontal color bars",
+       "Vertical color bars",
+};
+
+static const int imx335_tpg_val[] = {
+       IMX335_TPG_ALL_000,
+       IMX335_TPG_ALL_000,
+       IMX335_TPG_ALL_FFF,
+       IMX335_TPG_ALL_555,
+       IMX335_TPG_ALL_AAA,
+       IMX335_TPG_TOG_555_AAA,
+       IMX335_TPG_TOG_AAA_555,
+       IMX335_TPG_TOG_000_555,
+       IMX335_TPG_TOG_555_000,
+       IMX335_TPG_TOG_000_FFF,
+       IMX335_TPG_TOG_FFF_000,
+       IMX335_TPG_H_COLOR_BARS,
+       IMX335_TPG_V_COLOR_BARS,
 };
 
 /* Sensor mode registers */
 static const struct imx335_reg mode_2592x1940_regs[] = {
        {0x3000, 0x01},
        {0x3002, 0x00},
-       {0x300c, 0x3b},
-       {0x300d, 0x2a},
        {0x3018, 0x04},
        {0x302c, 0x3c},
        {0x302e, 0x20},
@@ -177,10 +220,6 @@ static const struct imx335_reg mode_2592x1940_regs[] = {
        {0x3074, 0xc8},
        {0x3076, 0x28},
        {0x304c, 0x00},
-       {0x314c, 0xc6},
-       {0x315a, 0x02},
-       {0x3168, 0xa0},
-       {0x316a, 0x7e},
        {0x31a1, 0x00},
        {0x3288, 0x21},
        {0x328a, 0x02},
@@ -249,7 +288,7 @@ static const struct imx335_reg mode_2592x1940_regs[] = {
        {0x3794, 0x7a},
        {0x3796, 0xa1},
        {0x37b0, 0x36},
-       {0x3a00, 0x01},
+       {0x3a00, 0x00},
 };
 
 static const struct imx335_reg raw10_framefmt_regs[] = {
@@ -266,6 +305,65 @@ static const struct imx335_reg raw12_framefmt_regs[] = {
        {0x341d, 0x00},
 };
 
+static const struct imx335_reg mipi_data_rate_1188Mbps[] = {
+       {0x300c, 0x3b},
+       {0x300d, 0x2a},
+       {0x314c, 0xc6},
+       {0x314d, 0x00},
+       {0x315a, 0x02},
+       {0x3168, 0xa0},
+       {0x316a, 0x7e},
+       {0x319e, 0x01},
+       {0x3a18, 0x8f},
+       {0x3a1a, 0x4f},
+       {0x3a1c, 0x47},
+       {0x3a1e, 0x37},
+       {0x3a1f, 0x01},
+       {0x3a20, 0x4f},
+       {0x3a22, 0x87},
+       {0x3a24, 0x4f},
+       {0x3a26, 0x7f},
+       {0x3a28, 0x3f},
+};
+
+static const struct imx335_reg mipi_data_rate_891Mbps[] = {
+       {0x300c, 0x3b},
+       {0x300d, 0x2a},
+       {0x314c, 0x29},
+       {0x314d, 0x01},
+       {0x315a, 0x06},
+       {0x3168, 0xa0},
+       {0x316a, 0x7e},
+       {0x319e, 0x02},
+       {0x3a18, 0x7f},
+       {0x3a1a, 0x37},
+       {0x3a1c, 0x37},
+       {0x3a1e, 0xf7},
+       {0x3a20, 0x3f},
+       {0x3a22, 0x6f},
+       {0x3a24, 0x3f},
+       {0x3a26, 0x5f},
+       {0x3a28, 0x2f},
+};
+
+static const s64 link_freq[] = {
+       /* Corresponds to 1188Mbps data lane rate */
+       IMX335_LINK_FREQ_594MHz,
+       /* Corresponds to 891Mbps data lane rate */
+       IMX335_LINK_FREQ_445MHz,
+};
+
+static const struct imx335_reg_list link_freq_reglist[] = {
+       {
+               .num_of_regs = ARRAY_SIZE(mipi_data_rate_1188Mbps),
+               .regs = mipi_data_rate_1188Mbps,
+       },
+       {
+               .num_of_regs = ARRAY_SIZE(mipi_data_rate_891Mbps),
+               .regs = mipi_data_rate_891Mbps,
+       },
+};
+
 static const u32 imx335_mbus_codes[] = {
        MEDIA_BUS_FMT_SRGGB12_1X12,
        MEDIA_BUS_FMT_SRGGB10_1X10,
@@ -280,7 +378,6 @@ static const struct imx335_mode supported_mode = {
        .vblank_min = 2560,
        .vblank_max = 133060,
        .pclk = 396000000,
-       .link_freq_idx = 0,
        .reg_list = {
                .num_of_regs = ARRAY_SIZE(mode_2592x1940_regs),
                .regs = mode_2592x1940_regs,
@@ -405,7 +502,8 @@ static int imx335_update_controls(struct imx335 *imx335,
 {
        int ret;
 
-       ret = __v4l2_ctrl_s_ctrl(imx335->link_freq_ctrl, mode->link_freq_idx);
+       ret = __v4l2_ctrl_s_ctrl(imx335->link_freq_ctrl,
+                                __ffs(imx335->link_freq_bitmap));
        if (ret)
                return ret;
 
@@ -456,6 +554,49 @@ error_release_group_hold:
        return ret;
 }
 
+static int imx335_update_test_pattern(struct imx335 *imx335, u32 pattern_index)
+{
+       int ret;
+
+       if (pattern_index >= ARRAY_SIZE(imx335_tpg_val))
+               return -EINVAL;
+
+       if (pattern_index) {
+               const struct imx335_reg tpg_enable_regs[] = {
+                       { 0x3148, 0x10 },
+                       { 0x3280, 0x00 },
+                       { 0x329c, 0x01 },
+                       { 0x32a0, 0x11 },
+                       { 0x3302, 0x00 },
+                       { 0x3303, 0x00 },
+                       { 0x336c, 0x00 },
+               };
+
+               ret = imx335_write_reg(imx335, IMX335_REG_TPG, 1,
+                                      imx335_tpg_val[pattern_index]);
+               if (ret)
+                       return ret;
+
+               ret = imx335_write_regs(imx335, tpg_enable_regs,
+                                       ARRAY_SIZE(tpg_enable_regs));
+       } else {
+               const struct imx335_reg tpg_disable_regs[] = {
+                       { 0x3148, 0x00 },
+                       { 0x3280, 0x01 },
+                       { 0x329c, 0x00 },
+                       { 0x32a0, 0x10 },
+                       { 0x3302, 0x32 },
+                       { 0x3303, 0x00 },
+                       { 0x336c, 0x01 },
+               };
+
+               ret = imx335_write_regs(imx335, tpg_disable_regs,
+                                       ARRAY_SIZE(tpg_disable_regs));
+       }
+
+       return ret;
+}
+
 /**
  * imx335_set_ctrl() - Set subdevice control
  * @ctrl: pointer to v4l2_ctrl structure
@@ -476,26 +617,31 @@ static int imx335_set_ctrl(struct v4l2_ctrl *ctrl)
        u32 exposure;
        int ret;
 
-       switch (ctrl->id) {
-       case V4L2_CID_VBLANK:
+       /* Propagate change of current control to all related controls */
+       if (ctrl->id == V4L2_CID_VBLANK) {
                imx335->vblank = imx335->vblank_ctrl->val;
 
                dev_dbg(imx335->dev, "Received vblank %u, new lpfr %u\n",
                        imx335->vblank,
                        imx335->vblank + imx335->cur_mode->height);
 
-               ret = __v4l2_ctrl_modify_range(imx335->exp_ctrl,
-                                              IMX335_EXPOSURE_MIN,
-                                              imx335->vblank +
-                                              imx335->cur_mode->height -
-                                              IMX335_EXPOSURE_OFFSET,
-                                              1, IMX335_EXPOSURE_DEFAULT);
-               break;
-       case V4L2_CID_EXPOSURE:
-               /* Set controls only if sensor is in power on state */
-               if (!pm_runtime_get_if_in_use(imx335->dev))
-                       return 0;
+               return __v4l2_ctrl_modify_range(imx335->exp_ctrl,
+                                               IMX335_EXPOSURE_MIN,
+                                               imx335->vblank +
+                                               imx335->cur_mode->height -
+                                               IMX335_EXPOSURE_OFFSET,
+                                               1, IMX335_EXPOSURE_DEFAULT);
+       }
 
+       /*
+        * Applying V4L2 control value only happens
+        * when power is up for streaming.
+        */
+       if (pm_runtime_get_if_in_use(imx335->dev) == 0)
+               return 0;
+
+       switch (ctrl->id) {
+       case V4L2_CID_EXPOSURE:
                exposure = ctrl->val;
                analog_gain = imx335->again_ctrl->val;
 
@@ -504,7 +650,9 @@ static int imx335_set_ctrl(struct v4l2_ctrl *ctrl)
 
                ret = imx335_update_exp_gain(imx335, exposure, analog_gain);
 
-               pm_runtime_put(imx335->dev);
+               break;
+       case V4L2_CID_TEST_PATTERN:
+               ret = imx335_update_test_pattern(imx335, ctrl->val);
 
                break;
        default:
@@ -512,6 +660,8 @@ static int imx335_set_ctrl(struct v4l2_ctrl *ctrl)
                ret = -EINVAL;
        }
 
+       pm_runtime_put(imx335->dev);
+
        return ret;
 }
 
@@ -691,6 +841,13 @@ static int imx335_init_state(struct v4l2_subdev *sd,
        fmt.which = sd_state ? V4L2_SUBDEV_FORMAT_TRY : V4L2_SUBDEV_FORMAT_ACTIVE;
        imx335_fill_pad_format(imx335, &supported_mode, &fmt);
 
+       mutex_lock(&imx335->mutex);
+       __v4l2_ctrl_modify_range(imx335->link_freq_ctrl, 0,
+                                __fls(imx335->link_freq_bitmap),
+                                ~(imx335->link_freq_bitmap),
+                                __ffs(imx335->link_freq_bitmap));
+       mutex_unlock(&imx335->mutex);
+
        return imx335_set_pad_format(sd, sd_state, &fmt);
 }
 
@@ -755,6 +912,14 @@ static int imx335_start_streaming(struct imx335 *imx335)
        const struct imx335_reg_list *reg_list;
        int ret;
 
+       /* Setup PLL */
+       reg_list = &link_freq_reglist[__ffs(imx335->link_freq_bitmap)];
+       ret = imx335_write_regs(imx335, reg_list->regs, reg_list->num_of_regs);
+       if (ret) {
+               dev_err(imx335->dev, "%s failed to set plls\n", __func__);
+               return ret;
+       }
+
        /* Write sensor mode registers */
        reg_list = &imx335->cur_mode->reg_list;
        ret = imx335_write_regs(imx335, reg_list->regs,
@@ -939,19 +1104,10 @@ static int imx335_parse_hw_config(struct imx335 *imx335)
                goto done_endpoint_free;
        }
 
-       if (!bus_cfg.nr_of_link_frequencies) {
-               dev_err(imx335->dev, "no link frequencies defined\n");
-               ret = -EINVAL;
-               goto done_endpoint_free;
-       }
-
-       for (i = 0; i < bus_cfg.nr_of_link_frequencies; i++)
-               if (bus_cfg.link_frequencies[i] == IMX335_LINK_FREQ)
-                       goto done_endpoint_free;
-
-       dev_err(imx335->dev, "no compatible link frequencies found\n");
-
-       ret = -EINVAL;
+       ret = v4l2_link_freq_to_bitmap(imx335->dev, bus_cfg.link_frequencies,
+                                      bus_cfg.nr_of_link_frequencies,
+                                      link_freq, ARRAY_SIZE(link_freq),
+                                      &imx335->link_freq_bitmap);
 
 done_endpoint_free:
        v4l2_fwnode_endpoint_free(&bus_cfg);
@@ -1055,7 +1211,7 @@ static int imx335_init_controls(struct imx335 *imx335)
        u32 lpfr;
        int ret;
 
-       ret = v4l2_ctrl_handler_init(ctrl_hdlr, 6);
+       ret = v4l2_ctrl_handler_init(ctrl_hdlr, 7);
        if (ret)
                return ret;
 
@@ -1089,6 +1245,12 @@ static int imx335_init_controls(struct imx335 *imx335)
                                                mode->vblank_max,
                                                1, mode->vblank);
 
+       v4l2_ctrl_new_std_menu_items(ctrl_hdlr,
+                                    &imx335_ctrl_ops,
+                                    V4L2_CID_TEST_PATTERN,
+                                    ARRAY_SIZE(imx335_tpg_menu) - 1,
+                                    0, 0, imx335_tpg_menu);
+
        /* Read only controls */
        imx335->pclk_ctrl = v4l2_ctrl_new_std(ctrl_hdlr,
                                              &imx335_ctrl_ops,
@@ -1099,9 +1261,8 @@ static int imx335_init_controls(struct imx335 *imx335)
        imx335->link_freq_ctrl = v4l2_ctrl_new_int_menu(ctrl_hdlr,
                                                        &imx335_ctrl_ops,
                                                        V4L2_CID_LINK_FREQ,
-                                                       ARRAY_SIZE(link_freq) -
-                                                       1,
-                                                       mode->link_freq_idx,
+                                                       __fls(imx335->link_freq_bitmap),
+                                                       __ffs(imx335->link_freq_bitmap),
                                                        link_freq);
        if (imx335->link_freq_ctrl)
                imx335->link_freq_ctrl->flags |= V4L2_CTRL_FLAG_READ_ONLY;
index 8c995c58743a2ed994fa6d4693e389df56316f93..7e9c2f65fa0812bf0ba32f8feea612d393725b96 100644 (file)
@@ -56,7 +56,7 @@
 #define IMX355_REG_ORIENTATION         0x0101
 
 /* default link frequency and external clock */
-#define IMX355_LINK_FREQ_DEFAULT       360000000
+#define IMX355_LINK_FREQ_DEFAULT       360000000LL
 #define IMX355_EXT_CLK                 19200000
 #define IMX355_LINK_FREQ_INDEX         0
 
@@ -93,8 +93,7 @@ struct imx355_mode {
 
 struct imx355_hwcfg {
        u32 ext_clk;                    /* sensor external clk */
-       s64 *link_freqs;                /* CSI-2 link frequencies */
-       unsigned int nr_of_link_freqs;
+       unsigned long link_freq_bitmap;
 };
 
 struct imx355 {
@@ -115,7 +114,6 @@ struct imx355 {
        const struct imx355_mode *cur_mode;
 
        struct imx355_hwcfg *hwcfg;
-       s64 link_def_freq;      /* CSI-2 link default frequency */
 
        /*
         * Mutex for serialized access:
@@ -879,7 +877,10 @@ static const char * const imx355_test_pattern_menu[] = {
        "Pseudorandom Sequence (PN9)",
 };
 
-/* supported link frequencies */
+/*
+ * When adding more than the one below, make sure the disallowed ones will
+ * actually be disabled in the LINK_FREQ control.
+ */
 static const s64 link_freq_menu_items[] = {
        IMX355_LINK_FREQ_DEFAULT,
 };
@@ -1356,7 +1357,7 @@ imx355_set_pad_format(struct v4l2_subdev *sd,
                *framefmt = fmt->format;
        } else {
                imx355->cur_mode = mode;
-               pixel_rate = imx355->link_def_freq * 2 * 4;
+               pixel_rate = IMX355_LINK_FREQ_DEFAULT * 2 * 4;
                do_div(pixel_rate, 10);
                __v4l2_ctrl_s_ctrl_int64(imx355->pixel_rate, pixel_rate);
                /* Update limits and set FPS to default */
@@ -1543,7 +1544,7 @@ static int imx355_init_controls(struct imx355 *imx355)
                imx355->link_freq->flags |= V4L2_CTRL_FLAG_READ_ONLY;
 
        /* pixel_rate = link_freq * 2 * nr_of_lanes / bits_per_sample */
-       pixel_rate = imx355->link_def_freq * 2 * 4;
+       pixel_rate = IMX355_LINK_FREQ_DEFAULT * 2 * 4;
        do_div(pixel_rate, 10);
        /* By default, PIXEL_RATE is read only */
        imx355->pixel_rate = v4l2_ctrl_new_std(ctrl_hdlr, &imx355_ctrl_ops,
@@ -1620,7 +1621,6 @@ static struct imx355_hwcfg *imx355_get_hwcfg(struct device *dev)
        };
        struct fwnode_handle *ep;
        struct fwnode_handle *fwnode = dev_fwnode(dev);
-       unsigned int i;
        int ret;
 
        if (!fwnode)
@@ -1652,24 +1652,14 @@ static struct imx355_hwcfg *imx355_get_hwcfg(struct device *dev)
                goto out_err;
        }
 
-       dev_dbg(dev, "num of link freqs: %d", bus_cfg.nr_of_link_frequencies);
-       if (!bus_cfg.nr_of_link_frequencies) {
-               dev_warn(dev, "no link frequencies defined");
-               goto out_err;
-       }
-
-       cfg->nr_of_link_freqs = bus_cfg.nr_of_link_frequencies;
-       cfg->link_freqs = devm_kcalloc(dev,
-                                      bus_cfg.nr_of_link_frequencies + 1,
-                                      sizeof(*cfg->link_freqs), GFP_KERNEL);
-       if (!cfg->link_freqs)
+       ret = v4l2_link_freq_to_bitmap(dev, bus_cfg.link_frequencies,
+                                      bus_cfg.nr_of_link_frequencies,
+                                      link_freq_menu_items,
+                                      ARRAY_SIZE(link_freq_menu_items),
+                                      &cfg->link_freq_bitmap);
+       if (ret)
                goto out_err;
 
-       for (i = 0; i < bus_cfg.nr_of_link_frequencies; i++) {
-               cfg->link_freqs[i] = bus_cfg.link_frequencies[i];
-               dev_dbg(dev, "link_freq[%d] = %lld", i, cfg->link_freqs[i]);
-       }
-
        v4l2_fwnode_endpoint_free(&bus_cfg);
        fwnode_handle_put(ep);
        return cfg;
@@ -1684,7 +1674,6 @@ static int imx355_probe(struct i2c_client *client)
 {
        struct imx355 *imx355;
        int ret;
-       u32 i;
 
        imx355 = devm_kzalloc(&client->dev, sizeof(*imx355), GFP_KERNEL);
        if (!imx355)
@@ -1709,20 +1698,6 @@ static int imx355_probe(struct i2c_client *client)
                goto error_probe;
        }
 
-       imx355->link_def_freq = link_freq_menu_items[IMX355_LINK_FREQ_INDEX];
-       for (i = 0; i < imx355->hwcfg->nr_of_link_freqs; i++) {
-               if (imx355->hwcfg->link_freqs[i] == imx355->link_def_freq) {
-                       dev_dbg(&client->dev, "link freq index %d matched", i);
-                       break;
-               }
-       }
-
-       if (i == imx355->hwcfg->nr_of_link_freqs) {
-               dev_err(&client->dev, "no link frequency supported");
-               ret = -EINVAL;
-               goto error_probe;
-       }
-
        /* Set default mode to max resolution */
        imx355->cur_mode = &supported_modes[0];
 
index 1e5f20c3ed82414249e95ea4897b374e500b95e8..a20b0db330d3458137f54b3ba31b72a9baa2c366 100644 (file)
@@ -16,6 +16,7 @@
 #include <linux/slab.h>
 #include <linux/videodev2.h>
 
+#include <media/v4l2-cci.h>
 #include <media/v4l2-ctrls.h>
 #include <media/v4l2-fwnode.h>
 #include <media/v4l2-subdev.h>
 
 #define IMX415_NUM_CLK_PARAM_REGS 11
 
-#define IMX415_REG_8BIT(n)       ((1 << 16) | (n))
-#define IMX415_REG_16BIT(n)      ((2 << 16) | (n))
-#define IMX415_REG_24BIT(n)      ((3 << 16) | (n))
-#define IMX415_REG_SIZE_SHIFT    16
-#define IMX415_REG_ADDR_MASK     0xffff
-
-#define IMX415_MODE              IMX415_REG_8BIT(0x3000)
+#define IMX415_MODE              CCI_REG8(0x3000)
 #define IMX415_MODE_OPERATING    (0)
 #define IMX415_MODE_STANDBY      BIT(0)
-#define IMX415_REGHOLD           IMX415_REG_8BIT(0x3001)
+#define IMX415_REGHOLD           CCI_REG8(0x3001)
 #define IMX415_REGHOLD_INVALID   (0)
 #define IMX415_REGHOLD_VALID     BIT(0)
-#define IMX415_XMSTA             IMX415_REG_8BIT(0x3002)
+#define IMX415_XMSTA             CCI_REG8(0x3002)
 #define IMX415_XMSTA_START       (0)
 #define IMX415_XMSTA_STOP        BIT(0)
-#define IMX415_BCWAIT_TIME       IMX415_REG_16BIT(0x3008)
-#define IMX415_CPWAIT_TIME       IMX415_REG_16BIT(0x300A)
-#define IMX415_WINMODE           IMX415_REG_8BIT(0x301C)
-#define IMX415_ADDMODE           IMX415_REG_8BIT(0x3022)
-#define IMX415_REVERSE           IMX415_REG_8BIT(0x3030)
+#define IMX415_BCWAIT_TIME       CCI_REG16_LE(0x3008)
+#define IMX415_CPWAIT_TIME       CCI_REG16_LE(0x300a)
+#define IMX415_WINMODE           CCI_REG8(0x301c)
+#define IMX415_ADDMODE           CCI_REG8(0x3022)
+#define IMX415_REVERSE           CCI_REG8(0x3030)
 #define IMX415_HREVERSE_SHIFT    (0)
 #define IMX415_VREVERSE_SHIFT    BIT(0)
-#define IMX415_ADBIT             IMX415_REG_8BIT(0x3031)
-#define IMX415_MDBIT             IMX415_REG_8BIT(0x3032)
-#define IMX415_SYS_MODE                  IMX415_REG_8BIT(0x3033)
-#define IMX415_OUTSEL            IMX415_REG_8BIT(0x30C0)
-#define IMX415_DRV               IMX415_REG_8BIT(0x30C1)
-#define IMX415_VMAX              IMX415_REG_24BIT(0x3024)
-#define IMX415_HMAX              IMX415_REG_16BIT(0x3028)
-#define IMX415_SHR0              IMX415_REG_24BIT(0x3050)
-#define IMX415_GAIN_PCG_0        IMX415_REG_16BIT(0x3090)
+#define IMX415_ADBIT             CCI_REG8(0x3031)
+#define IMX415_MDBIT             CCI_REG8(0x3032)
+#define IMX415_SYS_MODE                  CCI_REG8(0x3033)
+#define IMX415_OUTSEL            CCI_REG8(0x30c0)
+#define IMX415_DRV               CCI_REG8(0x30c1)
+#define IMX415_VMAX              CCI_REG24_LE(0x3024)
+#define IMX415_HMAX              CCI_REG16_LE(0x3028)
+#define IMX415_SHR0              CCI_REG24_LE(0x3050)
+#define IMX415_GAIN_PCG_0        CCI_REG16_LE(0x3090)
 #define IMX415_AGAIN_MIN         0
 #define IMX415_AGAIN_MAX         100
 #define IMX415_AGAIN_STEP        1
-#define IMX415_BLKLEVEL                  IMX415_REG_16BIT(0x30E2)
+#define IMX415_BLKLEVEL                  CCI_REG16_LE(0x30e2)
 #define IMX415_BLKLEVEL_DEFAULT          50
-#define IMX415_TPG_EN_DUOUT      IMX415_REG_8BIT(0x30E4)
-#define IMX415_TPG_PATSEL_DUOUT          IMX415_REG_8BIT(0x30E6)
-#define IMX415_TPG_COLORWIDTH    IMX415_REG_8BIT(0x30E8)
-#define IMX415_TESTCLKEN_MIPI    IMX415_REG_8BIT(0x3110)
-#define IMX415_INCKSEL1                  IMX415_REG_8BIT(0x3115)
-#define IMX415_INCKSEL2                  IMX415_REG_8BIT(0x3116)
-#define IMX415_INCKSEL3                  IMX415_REG_16BIT(0x3118)
-#define IMX415_INCKSEL4                  IMX415_REG_16BIT(0x311A)
-#define IMX415_INCKSEL5                  IMX415_REG_8BIT(0x311E)
-#define IMX415_DIG_CLP_MODE      IMX415_REG_8BIT(0x32C8)
-#define IMX415_WRJ_OPEN                  IMX415_REG_8BIT(0x3390)
-#define IMX415_SENSOR_INFO       IMX415_REG_16BIT(0x3F12)
-#define IMX415_SENSOR_INFO_MASK          0xFFF
+#define IMX415_TPG_EN_DUOUT      CCI_REG8(0x30e4)
+#define IMX415_TPG_PATSEL_DUOUT          CCI_REG8(0x30e6)
+#define IMX415_TPG_COLORWIDTH    CCI_REG8(0x30e8)
+#define IMX415_TESTCLKEN_MIPI    CCI_REG8(0x3110)
+#define IMX415_INCKSEL1                  CCI_REG8(0x3115)
+#define IMX415_INCKSEL2                  CCI_REG8(0x3116)
+#define IMX415_INCKSEL3                  CCI_REG16_LE(0x3118)
+#define IMX415_INCKSEL4                  CCI_REG16_LE(0x311a)
+#define IMX415_INCKSEL5                  CCI_REG8(0x311e)
+#define IMX415_DIG_CLP_MODE      CCI_REG8(0x32c8)
+#define IMX415_WRJ_OPEN                  CCI_REG8(0x3390)
+#define IMX415_SENSOR_INFO       CCI_REG16_LE(0x3f12)
+#define IMX415_SENSOR_INFO_MASK          0xfff
 #define IMX415_CHIP_ID           0x514
-#define IMX415_LANEMODE                  IMX415_REG_16BIT(0x4001)
+#define IMX415_LANEMODE                  CCI_REG16_LE(0x4001)
 #define IMX415_LANEMODE_2        1
 #define IMX415_LANEMODE_4        3
-#define IMX415_TXCLKESC_FREQ     IMX415_REG_16BIT(0x4004)
-#define IMX415_INCKSEL6                  IMX415_REG_8BIT(0x400C)
-#define IMX415_TCLKPOST                  IMX415_REG_16BIT(0x4018)
-#define IMX415_TCLKPREPARE       IMX415_REG_16BIT(0x401A)
-#define IMX415_TCLKTRAIL         IMX415_REG_16BIT(0x401C)
-#define IMX415_TCLKZERO                  IMX415_REG_16BIT(0x401E)
-#define IMX415_THSPREPARE        IMX415_REG_16BIT(0x4020)
-#define IMX415_THSZERO           IMX415_REG_16BIT(0x4022)
-#define IMX415_THSTRAIL                  IMX415_REG_16BIT(0x4024)
-#define IMX415_THSEXIT           IMX415_REG_16BIT(0x4026)
-#define IMX415_TLPX              IMX415_REG_16BIT(0x4028)
-#define IMX415_INCKSEL7                  IMX415_REG_8BIT(0x4074)
-
-struct imx415_reg {
-       u32 address;
-       u32 val;
-};
+#define IMX415_TXCLKESC_FREQ     CCI_REG16_LE(0x4004)
+#define IMX415_INCKSEL6                  CCI_REG8(0x400c)
+#define IMX415_TCLKPOST                  CCI_REG16_LE(0x4018)
+#define IMX415_TCLKPREPARE       CCI_REG16_LE(0x401a)
+#define IMX415_TCLKTRAIL         CCI_REG16_LE(0x401c)
+#define IMX415_TCLKZERO                  CCI_REG16_LE(0x401e)
+#define IMX415_THSPREPARE        CCI_REG16_LE(0x4020)
+#define IMX415_THSZERO           CCI_REG16_LE(0x4022)
+#define IMX415_THSTRAIL                  CCI_REG16_LE(0x4024)
+#define IMX415_THSEXIT           CCI_REG16_LE(0x4026)
+#define IMX415_TLPX              CCI_REG16_LE(0x4028)
+#define IMX415_INCKSEL7                  CCI_REG8(0x4074)
 
 static const char *const imx415_supply_names[] = {
        "dvdd",
@@ -118,13 +108,13 @@ static const s64 link_freq_menu_items[] = {
 struct imx415_clk_params {
        u64 lane_rate;
        u64 inck;
-       struct imx415_reg regs[IMX415_NUM_CLK_PARAM_REGS];
+       struct cci_reg_sequence regs[IMX415_NUM_CLK_PARAM_REGS];
 };
 
 /* INCK Settings - includes all lane rate and INCK dependent registers */
 static const struct imx415_clk_params imx415_clk_params[] = {
        {
-               .lane_rate = 594000000,
+               .lane_rate = 594000000UL,
                .inck = 27000000,
                .regs[0] = { IMX415_BCWAIT_TIME, 0x05D },
                .regs[1] = { IMX415_CPWAIT_TIME, 0x042 },
@@ -139,7 +129,37 @@ static const struct imx415_clk_params imx415_clk_params[] = {
                .regs[10] = { IMX415_TXCLKESC_FREQ, 0x06C0 },
        },
        {
-               .lane_rate = 720000000,
+               .lane_rate = 594000000UL,
+               .inck = 37125000,
+               .regs[0] = { IMX415_BCWAIT_TIME, 0x07F },
+               .regs[1] = { IMX415_CPWAIT_TIME, 0x05B },
+               .regs[2] = { IMX415_SYS_MODE, 0x7 },
+               .regs[3] = { IMX415_INCKSEL1, 0x00 },
+               .regs[4] = { IMX415_INCKSEL2, 0x24 },
+               .regs[5] = { IMX415_INCKSEL3, 0x080 },
+               .regs[6] = { IMX415_INCKSEL4, 0x0E0 },
+               .regs[7] = { IMX415_INCKSEL5, 0x24 },
+               .regs[8] = { IMX415_INCKSEL6, 0x0 },
+               .regs[9] = { IMX415_INCKSEL7, 0x1 },
+               .regs[10] = { IMX415_TXCLKESC_FREQ, 0x0984 },
+       },
+       {
+               .lane_rate = 594000000UL,
+               .inck = 74250000,
+               .regs[0] = { IMX415_BCWAIT_TIME, 0x0FF },
+               .regs[1] = { IMX415_CPWAIT_TIME, 0x0B6 },
+               .regs[2] = { IMX415_SYS_MODE, 0x7 },
+               .regs[3] = { IMX415_INCKSEL1, 0x00 },
+               .regs[4] = { IMX415_INCKSEL2, 0x28 },
+               .regs[5] = { IMX415_INCKSEL3, 0x080 },
+               .regs[6] = { IMX415_INCKSEL4, 0x0E0 },
+               .regs[7] = { IMX415_INCKSEL5, 0x28 },
+               .regs[8] = { IMX415_INCKSEL6, 0x0 },
+               .regs[9] = { IMX415_INCKSEL7, 0x1 },
+               .regs[10] = { IMX415_TXCLKESC_FREQ, 0x1290 },
+       },
+       {
+               .lane_rate = 720000000UL,
                .inck = 24000000,
                .regs[0] = { IMX415_BCWAIT_TIME, 0x054 },
                .regs[1] = { IMX415_CPWAIT_TIME, 0x03B },
@@ -154,7 +174,22 @@ static const struct imx415_clk_params imx415_clk_params[] = {
                .regs[10] = { IMX415_TXCLKESC_FREQ, 0x0600 },
        },
        {
-               .lane_rate = 891000000,
+               .lane_rate = 720000000UL,
+               .inck = 72000000,
+               .regs[0] = { IMX415_BCWAIT_TIME, 0x0F8 },
+               .regs[1] = { IMX415_CPWAIT_TIME, 0x0B0 },
+               .regs[2] = { IMX415_SYS_MODE, 0x9 },
+               .regs[3] = { IMX415_INCKSEL1, 0x00 },
+               .regs[4] = { IMX415_INCKSEL2, 0x28 },
+               .regs[5] = { IMX415_INCKSEL3, 0x0A0 },
+               .regs[6] = { IMX415_INCKSEL4, 0x0E0 },
+               .regs[7] = { IMX415_INCKSEL5, 0x28 },
+               .regs[8] = { IMX415_INCKSEL6, 0x0 },
+               .regs[9] = { IMX415_INCKSEL7, 0x1 },
+               .regs[10] = { IMX415_TXCLKESC_FREQ, 0x1200 },
+       },
+       {
+               .lane_rate = 891000000UL,
                .inck = 27000000,
                .regs[0] = { IMX415_BCWAIT_TIME, 0x05D },
                .regs[1] = { IMX415_CPWAIT_TIME, 0x042 },
@@ -169,7 +204,37 @@ static const struct imx415_clk_params imx415_clk_params[] = {
                .regs[10] = { IMX415_TXCLKESC_FREQ, 0x06C0 },
        },
        {
-               .lane_rate = 1440000000,
+               .lane_rate = 891000000UL,
+               .inck = 37125000,
+               .regs[0] = { IMX415_BCWAIT_TIME, 0x07F },
+               .regs[1] = { IMX415_CPWAIT_TIME, 0x05B },
+               .regs[2] = { IMX415_SYS_MODE, 0x5 },
+               .regs[3] = { IMX415_INCKSEL1, 0x00 },
+               .regs[4] = { IMX415_INCKSEL2, 0x24 },
+               .regs[5] = { IMX415_INCKSEL3, 0x0C0 },
+               .regs[6] = { IMX415_INCKSEL4, 0x0E0 },
+               .regs[7] = { IMX415_INCKSEL5, 0x24 },
+               .regs[8] = { IMX415_INCKSEL6, 0x0 },
+               .regs[9] = { IMX415_INCKSEL7, 0x1 },
+               .regs[10] = { IMX415_TXCLKESC_FREQ, 0x0948 },
+       },
+       {
+               .lane_rate = 891000000UL,
+               .inck = 74250000,
+               .regs[0] = { IMX415_BCWAIT_TIME, 0x0FF },
+               .regs[1] = { IMX415_CPWAIT_TIME, 0x0B6 },
+               .regs[2] = { IMX415_SYS_MODE, 0x5 },
+               .regs[3] = { IMX415_INCKSEL1, 0x00 },
+               .regs[4] = { IMX415_INCKSEL2, 0x28 },
+               .regs[5] = { IMX415_INCKSEL3, 0x0C0 },
+               .regs[6] = { IMX415_INCKSEL4, 0x0E0 },
+               .regs[7] = { IMX415_INCKSEL5, 0x28 },
+               .regs[8] = { IMX415_INCKSEL6, 0x0 },
+               .regs[9] = { IMX415_INCKSEL7, 0x1 },
+               .regs[10] = { IMX415_TXCLKESC_FREQ, 0x1290 },
+       },
+       {
+               .lane_rate = 1440000000UL,
                .inck = 24000000,
                .regs[0] = { IMX415_BCWAIT_TIME, 0x054 },
                .regs[1] = { IMX415_CPWAIT_TIME, 0x03B },
@@ -184,7 +249,22 @@ static const struct imx415_clk_params imx415_clk_params[] = {
                .regs[10] = { IMX415_TXCLKESC_FREQ, 0x0600 },
        },
        {
-               .lane_rate = 1485000000,
+               .lane_rate = 1440000000UL,
+               .inck = 72000000,
+               .regs[0] = { IMX415_BCWAIT_TIME, 0x0F8 },
+               .regs[1] = { IMX415_CPWAIT_TIME, 0x0B0 },
+               .regs[2] = { IMX415_SYS_MODE, 0x8 },
+               .regs[3] = { IMX415_INCKSEL1, 0x00 },
+               .regs[4] = { IMX415_INCKSEL2, 0x28 },
+               .regs[5] = { IMX415_INCKSEL3, 0x0A0 },
+               .regs[6] = { IMX415_INCKSEL4, 0x0E0 },
+               .regs[7] = { IMX415_INCKSEL5, 0x28 },
+               .regs[8] = { IMX415_INCKSEL6, 0x1 },
+               .regs[9] = { IMX415_INCKSEL7, 0x0 },
+               .regs[10] = { IMX415_TXCLKESC_FREQ, 0x1200 },
+       },
+       {
+               .lane_rate = 1485000000UL,
                .inck = 27000000,
                .regs[0] = { IMX415_BCWAIT_TIME, 0x05D },
                .regs[1] = { IMX415_CPWAIT_TIME, 0x042 },
@@ -198,10 +278,175 @@ static const struct imx415_clk_params imx415_clk_params[] = {
                .regs[9] = { IMX415_INCKSEL7, 0x0 },
                .regs[10] = { IMX415_TXCLKESC_FREQ, 0x06C0 },
        },
+       {
+               .lane_rate = 1485000000UL,
+               .inck = 37125000,
+               .regs[0] = { IMX415_BCWAIT_TIME, 0x07F },
+               .regs[1] = { IMX415_CPWAIT_TIME, 0x05B },
+               .regs[2] = { IMX415_SYS_MODE, 0x8 },
+               .regs[3] = { IMX415_INCKSEL1, 0x00 },
+               .regs[4] = { IMX415_INCKSEL2, 0x24 },
+               .regs[5] = { IMX415_INCKSEL3, 0x0A0 },
+               .regs[6] = { IMX415_INCKSEL4, 0x0E0 },
+               .regs[7] = { IMX415_INCKSEL5, 0x24 },
+               .regs[8] = { IMX415_INCKSEL6, 0x1 },
+               .regs[9] = { IMX415_INCKSEL7, 0x0 },
+               .regs[10] = { IMX415_TXCLKESC_FREQ, 0x0948 },
+       },
+       {
+               .lane_rate = 1485000000UL,
+               .inck = 74250000,
+               .regs[0] = { IMX415_BCWAIT_TIME, 0x0FF },
+               .regs[1] = { IMX415_CPWAIT_TIME, 0x0B6 },
+               .regs[2] = { IMX415_SYS_MODE, 0x8 },
+               .regs[3] = { IMX415_INCKSEL1, 0x00 },
+               .regs[4] = { IMX415_INCKSEL2, 0x28 },
+               .regs[5] = { IMX415_INCKSEL3, 0x0A0 },
+               .regs[6] = { IMX415_INCKSEL4, 0x0E0 },
+               .regs[7] = { IMX415_INCKSEL5, 0x28 },
+               .regs[8] = { IMX415_INCKSEL6, 0x1 },
+               .regs[9] = { IMX415_INCKSEL7, 0x0 },
+               .regs[10] = { IMX415_TXCLKESC_FREQ, 0x1290 },
+       },
+       {
+               .lane_rate = 1782000000UL,
+               .inck = 27000000,
+               .regs[0] = { IMX415_BCWAIT_TIME, 0x05D },
+               .regs[1] = { IMX415_CPWAIT_TIME, 0x042 },
+               .regs[2] = { IMX415_SYS_MODE, 0x4 },
+               .regs[3] = { IMX415_INCKSEL1, 0x00 },
+               .regs[4] = { IMX415_INCKSEL2, 0x23 },
+               .regs[5] = { IMX415_INCKSEL3, 0x0C6 },
+               .regs[6] = { IMX415_INCKSEL4, 0x0E7 },
+               .regs[7] = { IMX415_INCKSEL5, 0x23 },
+               .regs[8] = { IMX415_INCKSEL6, 0x1 },
+               .regs[9] = { IMX415_INCKSEL7, 0x0 },
+               .regs[10] = { IMX415_TXCLKESC_FREQ, 0x06C0 },
+       },
+       {
+               .lane_rate = 1782000000UL,
+               .inck = 37125000,
+               .regs[0] = { IMX415_BCWAIT_TIME, 0x07F },
+               .regs[1] = { IMX415_CPWAIT_TIME, 0x05B },
+               .regs[2] = { IMX415_SYS_MODE, 0x4 },
+               .regs[3] = { IMX415_INCKSEL1, 0x00 },
+               .regs[4] = { IMX415_INCKSEL2, 0x24 },
+               .regs[5] = { IMX415_INCKSEL3, 0x0C0 },
+               .regs[6] = { IMX415_INCKSEL4, 0x0E0 },
+               .regs[7] = { IMX415_INCKSEL5, 0x24 },
+               .regs[8] = { IMX415_INCKSEL6, 0x1 },
+               .regs[9] = { IMX415_INCKSEL7, 0x0 },
+               .regs[10] = { IMX415_TXCLKESC_FREQ, 0x0948 },
+       },
+       {
+               .lane_rate = 1782000000UL,
+               .inck = 74250000,
+               .regs[0] = { IMX415_BCWAIT_TIME, 0x0FF },
+               .regs[1] = { IMX415_CPWAIT_TIME, 0x0B6 },
+               .regs[2] = { IMX415_SYS_MODE, 0x4 },
+               .regs[3] = { IMX415_INCKSEL1, 0x00 },
+               .regs[4] = { IMX415_INCKSEL2, 0x28 },
+               .regs[5] = { IMX415_INCKSEL3, 0x0C0 },
+               .regs[6] = { IMX415_INCKSEL4, 0x0E0 },
+               .regs[7] = { IMX415_INCKSEL5, 0x28 },
+               .regs[8] = { IMX415_INCKSEL6, 0x1 },
+               .regs[9] = { IMX415_INCKSEL7, 0x0 },
+               .regs[10] = { IMX415_TXCLKESC_FREQ, 0x1290 },
+       },
+       {
+               .lane_rate = 2079000000UL,
+               .inck = 27000000,
+               .regs[0] = { IMX415_BCWAIT_TIME, 0x05D },
+               .regs[1] = { IMX415_CPWAIT_TIME, 0x042 },
+               .regs[2] = { IMX415_SYS_MODE, 0x2 },
+               .regs[3] = { IMX415_INCKSEL1, 0x00 },
+               .regs[4] = { IMX415_INCKSEL2, 0x23 },
+               .regs[5] = { IMX415_INCKSEL3, 0x0E7 },
+               .regs[6] = { IMX415_INCKSEL4, 0x0E7 },
+               .regs[7] = { IMX415_INCKSEL5, 0x23 },
+               .regs[8] = { IMX415_INCKSEL6, 0x1 },
+               .regs[9] = { IMX415_INCKSEL7, 0x0 },
+               .regs[10] = { IMX415_TXCLKESC_FREQ, 0x06C0 },
+       },
+       {
+               .lane_rate = 2079000000UL,
+               .inck = 37125000,
+               .regs[0] = { IMX415_BCWAIT_TIME, 0x07F },
+               .regs[1] = { IMX415_CPWAIT_TIME, 0x05B },
+               .regs[2] = { IMX415_SYS_MODE, 0x2 },
+               .regs[3] = { IMX415_INCKSEL1, 0x00 },
+               .regs[4] = { IMX415_INCKSEL2, 0x24 },
+               .regs[5] = { IMX415_INCKSEL3, 0x0E0 },
+               .regs[6] = { IMX415_INCKSEL4, 0x0E0 },
+               .regs[7] = { IMX415_INCKSEL5, 0x24 },
+               .regs[8] = { IMX415_INCKSEL6, 0x1 },
+               .regs[9] = { IMX415_INCKSEL7, 0x0 },
+               .regs[10] = { IMX415_TXCLKESC_FREQ, 0x0948 },
+       },
+       {
+               .lane_rate = 2079000000UL,
+               .inck = 74250000,
+               .regs[0] = { IMX415_BCWAIT_TIME, 0x0FF },
+               .regs[1] = { IMX415_CPWAIT_TIME, 0x0B6 },
+               .regs[2] = { IMX415_SYS_MODE, 0x2 },
+               .regs[3] = { IMX415_INCKSEL1, 0x00 },
+               .regs[4] = { IMX415_INCKSEL2, 0x28 },
+               .regs[5] = { IMX415_INCKSEL3, 0x0E0 },
+               .regs[6] = { IMX415_INCKSEL4, 0x0E0 },
+               .regs[7] = { IMX415_INCKSEL5, 0x28 },
+               .regs[8] = { IMX415_INCKSEL6, 0x1 },
+               .regs[9] = { IMX415_INCKSEL7, 0x0 },
+               .regs[10] = { IMX415_TXCLKESC_FREQ, 0x1290 },
+       },
+       {
+               .lane_rate = 2376000000UL,
+               .inck = 27000000,
+               .regs[0] = { IMX415_BCWAIT_TIME, 0x05D },
+               .regs[1] = { IMX415_CPWAIT_TIME, 0x042 },
+               .regs[2] = { IMX415_SYS_MODE, 0x0 },
+               .regs[3] = { IMX415_INCKSEL1, 0x00 },
+               .regs[4] = { IMX415_INCKSEL2, 0x23 },
+               .regs[5] = { IMX415_INCKSEL3, 0x108 },
+               .regs[6] = { IMX415_INCKSEL4, 0x0E7 },
+               .regs[7] = { IMX415_INCKSEL5, 0x23 },
+               .regs[8] = { IMX415_INCKSEL6, 0x1 },
+               .regs[9] = { IMX415_INCKSEL7, 0x0 },
+               .regs[10] = { IMX415_TXCLKESC_FREQ, 0x06C0 },
+       },
+       {
+               .lane_rate = 2376000000UL,
+               .inck = 37125000,
+               .regs[0] = { IMX415_BCWAIT_TIME, 0x07F },
+               .regs[1] = { IMX415_CPWAIT_TIME, 0x05B },
+               .regs[2] = { IMX415_SYS_MODE, 0x0 },
+               .regs[3] = { IMX415_INCKSEL1, 0x00 },
+               .regs[4] = { IMX415_INCKSEL2, 0x24 },
+               .regs[5] = { IMX415_INCKSEL3, 0x100 },
+               .regs[6] = { IMX415_INCKSEL4, 0x0E0 },
+               .regs[7] = { IMX415_INCKSEL5, 0x24 },
+               .regs[8] = { IMX415_INCKSEL6, 0x1 },
+               .regs[9] = { IMX415_INCKSEL7, 0x0 },
+               .regs[10] = { IMX415_TXCLKESC_FREQ, 0x0948 },
+       },
+       {
+               .lane_rate = 2376000000UL,
+               .inck = 74250000,
+               .regs[0] = { IMX415_BCWAIT_TIME, 0x0FF },
+               .regs[1] = { IMX415_CPWAIT_TIME, 0x0B6 },
+               .regs[2] = { IMX415_SYS_MODE, 0x0 },
+               .regs[3] = { IMX415_INCKSEL1, 0x00 },
+               .regs[4] = { IMX415_INCKSEL2, 0x28 },
+               .regs[5] = { IMX415_INCKSEL3, 0x100 },
+               .regs[6] = { IMX415_INCKSEL4, 0x0E0 },
+               .regs[7] = { IMX415_INCKSEL5, 0x28 },
+               .regs[8] = { IMX415_INCKSEL6, 0x1 },
+               .regs[9] = { IMX415_INCKSEL7, 0x0 },
+               .regs[10] = { IMX415_TXCLKESC_FREQ, 0x1290 },
+       },
 };
 
 /* all-pixel 2-lane 720 Mbps 15.74 Hz mode */
-static const struct imx415_reg imx415_mode_2_720[] = {
+static const struct cci_reg_sequence imx415_mode_2_720[] = {
        { IMX415_VMAX, 0x08CA },
        { IMX415_HMAX, 0x07F0 },
        { IMX415_LANEMODE, IMX415_LANEMODE_2 },
@@ -217,7 +462,7 @@ static const struct imx415_reg imx415_mode_2_720[] = {
 };
 
 /* all-pixel 2-lane 1440 Mbps 30.01 Hz mode */
-static const struct imx415_reg imx415_mode_2_1440[] = {
+static const struct cci_reg_sequence imx415_mode_2_1440[] = {
        { IMX415_VMAX, 0x08CA },
        { IMX415_HMAX, 0x042A },
        { IMX415_LANEMODE, IMX415_LANEMODE_2 },
@@ -233,7 +478,7 @@ static const struct imx415_reg imx415_mode_2_1440[] = {
 };
 
 /* all-pixel 4-lane 891 Mbps 30 Hz mode */
-static const struct imx415_reg imx415_mode_4_891[] = {
+static const struct cci_reg_sequence imx415_mode_4_891[] = {
        { IMX415_VMAX, 0x08CA },
        { IMX415_HMAX, 0x044C },
        { IMX415_LANEMODE, IMX415_LANEMODE_4 },
@@ -250,7 +495,7 @@ static const struct imx415_reg imx415_mode_4_891[] = {
 
 struct imx415_mode_reg_list {
        u32 num_of_regs;
-       const struct imx415_reg *regs;
+       const struct cci_reg_sequence *regs;
 };
 
 /*
@@ -323,11 +568,6 @@ static const struct imx415_mode supported_modes[] = {
        },
 };
 
-static const struct regmap_config imx415_regmap_config = {
-       .reg_bits = 16,
-       .val_bits = 8,
-};
-
 static const char *const imx415_test_pattern_menu[] = {
        "disabled",
        "solid black",
@@ -369,7 +609,7 @@ struct imx415 {
  * This table includes fixed register settings and a bunch of undocumented
  * registers that have to be set to another value than default.
  */
-static const struct imx415_reg imx415_init_table[] = {
+static const struct cci_reg_sequence imx415_init_table[] = {
        /* use all-pixel readout mode, no flip */
        { IMX415_WINMODE, 0x00 },
        { IMX415_ADDMODE, 0x00 },
@@ -382,77 +622,77 @@ static const struct imx415_reg imx415_init_table[] = {
        { IMX415_DRV, 0x00 },
 
        /* SONY magic registers */
-       { IMX415_REG_8BIT(0x32D4), 0x21 },
-       { IMX415_REG_8BIT(0x32EC), 0xA1 },
-       { IMX415_REG_8BIT(0x3452), 0x7F },
-       { IMX415_REG_8BIT(0x3453), 0x03 },
-       { IMX415_REG_8BIT(0x358A), 0x04 },
-       { IMX415_REG_8BIT(0x35A1), 0x02 },
-       { IMX415_REG_8BIT(0x36BC), 0x0C },
-       { IMX415_REG_8BIT(0x36CC), 0x53 },
-       { IMX415_REG_8BIT(0x36CD), 0x00 },
-       { IMX415_REG_8BIT(0x36CE), 0x3C },
-       { IMX415_REG_8BIT(0x36D0), 0x8C },
-       { IMX415_REG_8BIT(0x36D1), 0x00 },
-       { IMX415_REG_8BIT(0x36D2), 0x71 },
-       { IMX415_REG_8BIT(0x36D4), 0x3C },
-       { IMX415_REG_8BIT(0x36D6), 0x53 },
-       { IMX415_REG_8BIT(0x36D7), 0x00 },
-       { IMX415_REG_8BIT(0x36D8), 0x71 },
-       { IMX415_REG_8BIT(0x36DA), 0x8C },
-       { IMX415_REG_8BIT(0x36DB), 0x00 },
-       { IMX415_REG_8BIT(0x3724), 0x02 },
-       { IMX415_REG_8BIT(0x3726), 0x02 },
-       { IMX415_REG_8BIT(0x3732), 0x02 },
-       { IMX415_REG_8BIT(0x3734), 0x03 },
-       { IMX415_REG_8BIT(0x3736), 0x03 },
-       { IMX415_REG_8BIT(0x3742), 0x03 },
-       { IMX415_REG_8BIT(0x3862), 0xE0 },
-       { IMX415_REG_8BIT(0x38CC), 0x30 },
-       { IMX415_REG_8BIT(0x38CD), 0x2F },
-       { IMX415_REG_8BIT(0x395C), 0x0C },
-       { IMX415_REG_8BIT(0x3A42), 0xD1 },
-       { IMX415_REG_8BIT(0x3A4C), 0x77 },
-       { IMX415_REG_8BIT(0x3AE0), 0x02 },
-       { IMX415_REG_8BIT(0x3AEC), 0x0C },
-       { IMX415_REG_8BIT(0x3B00), 0x2E },
-       { IMX415_REG_8BIT(0x3B06), 0x29 },
-       { IMX415_REG_8BIT(0x3B98), 0x25 },
-       { IMX415_REG_8BIT(0x3B99), 0x21 },
-       { IMX415_REG_8BIT(0x3B9B), 0x13 },
-       { IMX415_REG_8BIT(0x3B9C), 0x13 },
-       { IMX415_REG_8BIT(0x3B9D), 0x13 },
-       { IMX415_REG_8BIT(0x3B9E), 0x13 },
-       { IMX415_REG_8BIT(0x3BA1), 0x00 },
-       { IMX415_REG_8BIT(0x3BA2), 0x06 },
-       { IMX415_REG_8BIT(0x3BA3), 0x0B },
-       { IMX415_REG_8BIT(0x3BA4), 0x10 },
-       { IMX415_REG_8BIT(0x3BA5), 0x14 },
-       { IMX415_REG_8BIT(0x3BA6), 0x18 },
-       { IMX415_REG_8BIT(0x3BA7), 0x1A },
-       { IMX415_REG_8BIT(0x3BA8), 0x1A },
-       { IMX415_REG_8BIT(0x3BA9), 0x1A },
-       { IMX415_REG_8BIT(0x3BAC), 0xED },
-       { IMX415_REG_8BIT(0x3BAD), 0x01 },
-       { IMX415_REG_8BIT(0x3BAE), 0xF6 },
-       { IMX415_REG_8BIT(0x3BAF), 0x02 },
-       { IMX415_REG_8BIT(0x3BB0), 0xA2 },
-       { IMX415_REG_8BIT(0x3BB1), 0x03 },
-       { IMX415_REG_8BIT(0x3BB2), 0xE0 },
-       { IMX415_REG_8BIT(0x3BB3), 0x03 },
-       { IMX415_REG_8BIT(0x3BB4), 0xE0 },
-       { IMX415_REG_8BIT(0x3BB5), 0x03 },
-       { IMX415_REG_8BIT(0x3BB6), 0xE0 },
-       { IMX415_REG_8BIT(0x3BB7), 0x03 },
-       { IMX415_REG_8BIT(0x3BB8), 0xE0 },
-       { IMX415_REG_8BIT(0x3BBA), 0xE0 },
-       { IMX415_REG_8BIT(0x3BBC), 0xDA },
-       { IMX415_REG_8BIT(0x3BBE), 0x88 },
-       { IMX415_REG_8BIT(0x3BC0), 0x44 },
-       { IMX415_REG_8BIT(0x3BC2), 0x7B },
-       { IMX415_REG_8BIT(0x3BC4), 0xA2 },
-       { IMX415_REG_8BIT(0x3BC8), 0xBD },
-       { IMX415_REG_8BIT(0x3BCA), 0xBD },
+       { CCI_REG8(0x32D4), 0x21 },
+       { CCI_REG8(0x32EC), 0xA1 },
+       { CCI_REG8(0x3452), 0x7F },
+       { CCI_REG8(0x3453), 0x03 },
+       { CCI_REG8(0x358A), 0x04 },
+       { CCI_REG8(0x35A1), 0x02 },
+       { CCI_REG8(0x36BC), 0x0C },
+       { CCI_REG8(0x36CC), 0x53 },
+       { CCI_REG8(0x36CD), 0x00 },
+       { CCI_REG8(0x36CE), 0x3C },
+       { CCI_REG8(0x36D0), 0x8C },
+       { CCI_REG8(0x36D1), 0x00 },
+       { CCI_REG8(0x36D2), 0x71 },
+       { CCI_REG8(0x36D4), 0x3C },
+       { CCI_REG8(0x36D6), 0x53 },
+       { CCI_REG8(0x36D7), 0x00 },
+       { CCI_REG8(0x36D8), 0x71 },
+       { CCI_REG8(0x36DA), 0x8C },
+       { CCI_REG8(0x36DB), 0x00 },
+       { CCI_REG8(0x3724), 0x02 },
+       { CCI_REG8(0x3726), 0x02 },
+       { CCI_REG8(0x3732), 0x02 },
+       { CCI_REG8(0x3734), 0x03 },
+       { CCI_REG8(0x3736), 0x03 },
+       { CCI_REG8(0x3742), 0x03 },
+       { CCI_REG8(0x3862), 0xE0 },
+       { CCI_REG8(0x38CC), 0x30 },
+       { CCI_REG8(0x38CD), 0x2F },
+       { CCI_REG8(0x395C), 0x0C },
+       { CCI_REG8(0x3A42), 0xD1 },
+       { CCI_REG8(0x3A4C), 0x77 },
+       { CCI_REG8(0x3AE0), 0x02 },
+       { CCI_REG8(0x3AEC), 0x0C },
+       { CCI_REG8(0x3B00), 0x2E },
+       { CCI_REG8(0x3B06), 0x29 },
+       { CCI_REG8(0x3B98), 0x25 },
+       { CCI_REG8(0x3B99), 0x21 },
+       { CCI_REG8(0x3B9B), 0x13 },
+       { CCI_REG8(0x3B9C), 0x13 },
+       { CCI_REG8(0x3B9D), 0x13 },
+       { CCI_REG8(0x3B9E), 0x13 },
+       { CCI_REG8(0x3BA1), 0x00 },
+       { CCI_REG8(0x3BA2), 0x06 },
+       { CCI_REG8(0x3BA3), 0x0B },
+       { CCI_REG8(0x3BA4), 0x10 },
+       { CCI_REG8(0x3BA5), 0x14 },
+       { CCI_REG8(0x3BA6), 0x18 },
+       { CCI_REG8(0x3BA7), 0x1A },
+       { CCI_REG8(0x3BA8), 0x1A },
+       { CCI_REG8(0x3BA9), 0x1A },
+       { CCI_REG8(0x3BAC), 0xED },
+       { CCI_REG8(0x3BAD), 0x01 },
+       { CCI_REG8(0x3BAE), 0xF6 },
+       { CCI_REG8(0x3BAF), 0x02 },
+       { CCI_REG8(0x3BB0), 0xA2 },
+       { CCI_REG8(0x3BB1), 0x03 },
+       { CCI_REG8(0x3BB2), 0xE0 },
+       { CCI_REG8(0x3BB3), 0x03 },
+       { CCI_REG8(0x3BB4), 0xE0 },
+       { CCI_REG8(0x3BB5), 0x03 },
+       { CCI_REG8(0x3BB6), 0xE0 },
+       { CCI_REG8(0x3BB7), 0x03 },
+       { CCI_REG8(0x3BB8), 0xE0 },
+       { CCI_REG8(0x3BBA), 0xE0 },
+       { CCI_REG8(0x3BBC), 0xDA },
+       { CCI_REG8(0x3BBE), 0x88 },
+       { CCI_REG8(0x3BC0), 0x44 },
+       { CCI_REG8(0x3BC2), 0x7B },
+       { CCI_REG8(0x3BC4), 0xA2 },
+       { CCI_REG8(0x3BC8), 0xBD },
+       { CCI_REG8(0x3BCA), 0xBD },
 };
 
 static inline struct imx415 *to_imx415(struct v4l2_subdev *sd)
@@ -460,74 +700,26 @@ static inline struct imx415 *to_imx415(struct v4l2_subdev *sd)
        return container_of(sd, struct imx415, subdev);
 }
 
-static int imx415_read(struct imx415 *sensor, u32 addr)
-{
-       u8 data[3] = { 0 };
-       int ret;
-
-       ret = regmap_raw_read(sensor->regmap, addr & IMX415_REG_ADDR_MASK, data,
-                             (addr >> IMX415_REG_SIZE_SHIFT) & 3);
-       if (ret < 0)
-               return ret;
-
-       return (data[2] << 16) | (data[1] << 8) | data[0];
-}
-
-static int imx415_write(struct imx415 *sensor, u32 addr, u32 value)
-{
-       u8 data[3] = { value & 0xff, (value >> 8) & 0xff, value >> 16 };
-       int ret;
-
-       ret = regmap_raw_write(sensor->regmap, addr & IMX415_REG_ADDR_MASK,
-                              data, (addr >> IMX415_REG_SIZE_SHIFT) & 3);
-       if (ret < 0)
-               dev_err_ratelimited(sensor->dev,
-                                   "%u-bit write to 0x%04x failed: %d\n",
-                                   ((addr >> IMX415_REG_SIZE_SHIFT) & 3) * 8,
-                                   addr & IMX415_REG_ADDR_MASK, ret);
-
-       return 0;
-}
-
 static int imx415_set_testpattern(struct imx415 *sensor, int val)
 {
-       int ret;
+       int ret = 0;
 
        if (val) {
-               ret = imx415_write(sensor, IMX415_BLKLEVEL, 0x00);
-               if (ret)
-                       return ret;
-               ret = imx415_write(sensor, IMX415_TPG_EN_DUOUT, 0x01);
-               if (ret)
-                       return ret;
-               ret = imx415_write(sensor, IMX415_TPG_PATSEL_DUOUT, val - 1);
-               if (ret)
-                       return ret;
-               ret = imx415_write(sensor, IMX415_TPG_COLORWIDTH, 0x01);
-               if (ret)
-                       return ret;
-               ret = imx415_write(sensor, IMX415_TESTCLKEN_MIPI, 0x20);
-               if (ret)
-                       return ret;
-               ret = imx415_write(sensor, IMX415_DIG_CLP_MODE, 0x00);
-               if (ret)
-                       return ret;
-               ret = imx415_write(sensor, IMX415_WRJ_OPEN, 0x00);
+               cci_write(sensor->regmap, IMX415_BLKLEVEL, 0x00, &ret);
+               cci_write(sensor->regmap, IMX415_TPG_EN_DUOUT, 0x01, &ret);
+               cci_write(sensor->regmap, IMX415_TPG_PATSEL_DUOUT,
+                         val - 1, &ret);
+               cci_write(sensor->regmap, IMX415_TPG_COLORWIDTH, 0x01, &ret);
+               cci_write(sensor->regmap, IMX415_TESTCLKEN_MIPI, 0x20, &ret);
+               cci_write(sensor->regmap, IMX415_DIG_CLP_MODE, 0x00, &ret);
+               cci_write(sensor->regmap, IMX415_WRJ_OPEN, 0x00, &ret);
        } else {
-               ret = imx415_write(sensor, IMX415_BLKLEVEL,
-                                  IMX415_BLKLEVEL_DEFAULT);
-               if (ret)
-                       return ret;
-               ret = imx415_write(sensor, IMX415_TPG_EN_DUOUT, 0x00);
-               if (ret)
-                       return ret;
-               ret = imx415_write(sensor, IMX415_TESTCLKEN_MIPI, 0x00);
-               if (ret)
-                       return ret;
-               ret = imx415_write(sensor, IMX415_DIG_CLP_MODE, 0x01);
-               if (ret)
-                       return ret;
-               ret = imx415_write(sensor, IMX415_WRJ_OPEN, 0x01);
+               cci_write(sensor->regmap, IMX415_BLKLEVEL,
+                         IMX415_BLKLEVEL_DEFAULT, &ret);
+               cci_write(sensor->regmap, IMX415_TPG_EN_DUOUT, 0x00, &ret);
+               cci_write(sensor->regmap, IMX415_TESTCLKEN_MIPI, 0x00, &ret);
+               cci_write(sensor->regmap, IMX415_DIG_CLP_MODE, 0x01, &ret);
+               cci_write(sensor->regmap, IMX415_WRJ_OPEN, 0x01, &ret);
        }
        return 0;
 }
@@ -553,19 +745,21 @@ static int imx415_s_ctrl(struct v4l2_ctrl *ctrl)
                /* clamp the exposure value to VMAX. */
                vmax = format->height + sensor->vblank->cur.val;
                ctrl->val = min_t(int, ctrl->val, vmax);
-               ret = imx415_write(sensor, IMX415_SHR0, vmax - ctrl->val);
+               ret = cci_write(sensor->regmap, IMX415_SHR0,
+                               vmax - ctrl->val, NULL);
                break;
 
        case V4L2_CID_ANALOGUE_GAIN:
                /* analogue gain in 0.3 dB step size */
-               ret = imx415_write(sensor, IMX415_GAIN_PCG_0, ctrl->val);
+               ret = cci_write(sensor->regmap, IMX415_GAIN_PCG_0,
+                               ctrl->val, NULL);
                break;
 
        case V4L2_CID_HFLIP:
        case V4L2_CID_VFLIP:
                flip = (sensor->hflip->val << IMX415_HREVERSE_SHIFT) |
                       (sensor->vflip->val << IMX415_VREVERSE_SHIFT);
-               ret = imx415_write(sensor, IMX415_REVERSE, flip);
+               ret = cci_write(sensor->regmap, IMX415_REVERSE, flip, NULL);
                break;
 
        case V4L2_CID_TEST_PATTERN:
@@ -679,8 +873,6 @@ static int imx415_ctrls_init(struct imx415 *sensor)
 
 static int imx415_set_mode(struct imx415 *sensor, int mode)
 {
-       const struct imx415_reg *reg;
-       unsigned int i;
        int ret = 0;
 
        if (mode >= ARRAY_SIZE(supported_modes)) {
@@ -688,34 +880,29 @@ static int imx415_set_mode(struct imx415 *sensor, int mode)
                return -EINVAL;
        }
 
-       for (i = 0; i < supported_modes[mode].reg_list.num_of_regs; ++i) {
-               reg = &supported_modes[mode].reg_list.regs[i];
-               ret = imx415_write(sensor, reg->address, reg->val);
-               if (ret)
-                       return ret;
-       }
+       cci_multi_reg_write(sensor->regmap,
+                           supported_modes[mode].reg_list.regs,
+                           supported_modes[mode].reg_list.num_of_regs,
+                           &ret);
 
-       for (i = 0; i < IMX415_NUM_CLK_PARAM_REGS; ++i) {
-               reg = &sensor->clk_params->regs[i];
-               ret = imx415_write(sensor, reg->address, reg->val);
-               if (ret)
-                       return ret;
-       }
+       cci_multi_reg_write(sensor->regmap,
+                           sensor->clk_params->regs,
+                           IMX415_NUM_CLK_PARAM_REGS,
+                           &ret);
 
        return 0;
 }
 
 static int imx415_setup(struct imx415 *sensor, struct v4l2_subdev_state *state)
 {
-       unsigned int i;
        int ret;
 
-       for (i = 0; i < ARRAY_SIZE(imx415_init_table); ++i) {
-               ret = imx415_write(sensor, imx415_init_table[i].address,
-                                  imx415_init_table[i].val);
-               if (ret)
-                       return ret;
-       }
+       ret = cci_multi_reg_write(sensor->regmap,
+                                 imx415_init_table,
+                                 ARRAY_SIZE(imx415_init_table),
+                                 NULL);
+       if (ret)
+               return ret;
 
        return imx415_set_mode(sensor, sensor->cur_mode);
 }
@@ -724,7 +911,8 @@ static int imx415_wakeup(struct imx415 *sensor)
 {
        int ret;
 
-       ret = imx415_write(sensor, IMX415_MODE, IMX415_MODE_OPERATING);
+       ret = cci_write(sensor->regmap, IMX415_MODE,
+                       IMX415_MODE_OPERATING, NULL);
        if (ret)
                return ret;
 
@@ -743,21 +931,18 @@ static int imx415_stream_on(struct imx415 *sensor)
        int ret;
 
        ret = imx415_wakeup(sensor);
-       if (ret)
-               return ret;
-
-       return imx415_write(sensor, IMX415_XMSTA, IMX415_XMSTA_START);
+       return cci_write(sensor->regmap, IMX415_XMSTA,
+                        IMX415_XMSTA_START, &ret);
 }
 
 static int imx415_stream_off(struct imx415 *sensor)
 {
        int ret;
 
-       ret = imx415_write(sensor, IMX415_XMSTA, IMX415_XMSTA_STOP);
-       if (ret)
-               return ret;
-
-       return imx415_write(sensor, IMX415_MODE, IMX415_MODE_STANDBY);
+       ret = cci_write(sensor->regmap, IMX415_XMSTA,
+                       IMX415_XMSTA_STOP, NULL);
+       return cci_write(sensor->regmap, IMX415_MODE,
+                        IMX415_MODE_STANDBY, &ret);
 }
 
 static int imx415_s_stream(struct v4l2_subdev *sd, int enable)
@@ -992,6 +1177,7 @@ static void imx415_power_off(struct imx415 *sensor)
 static int imx415_identify_model(struct imx415 *sensor)
 {
        int model, ret;
+       u64 chip_id;
 
        /*
         * While most registers can be read when the sensor is in standby, this
@@ -1002,14 +1188,14 @@ static int imx415_identify_model(struct imx415 *sensor)
                return dev_err_probe(sensor->dev, ret,
                                     "failed to get sensor out of standby\n");
 
-       ret = imx415_read(sensor, IMX415_SENSOR_INFO);
+       ret = cci_read(sensor->regmap, IMX415_SENSOR_INFO, &chip_id, NULL);
        if (ret < 0) {
                dev_err_probe(sensor->dev, ret,
                              "failed to read sensor information\n");
                goto done;
        }
 
-       model = ret & IMX415_SENSOR_INFO_MASK;
+       model = chip_id & IMX415_SENSOR_INFO_MASK;
 
        switch (model) {
        case IMX415_CHIP_ID:
@@ -1024,7 +1210,7 @@ static int imx415_identify_model(struct imx415 *sensor)
        ret = 0;
 
 done:
-       imx415_write(sensor, IMX415_MODE, IMX415_MODE_STANDBY);
+       cci_write(sensor->regmap, IMX415_MODE, IMX415_MODE_STANDBY, &ret);
        return ret;
 }
 
@@ -1173,7 +1359,7 @@ static int imx415_probe(struct i2c_client *client)
        if (ret)
                return ret;
 
-       sensor->regmap = devm_regmap_init_i2c(client, &imx415_regmap_config);
+       sensor->regmap = devm_cci_regmap_init_i2c(client, 16);
        if (IS_ERR(sensor->regmap))
                return PTR_ERR(sensor->regmap);
 
index 89e13ebbce0c2193cd568f1ef2556dbc3a519bdd..c7089035bbc10bf3cb281a27ede42f1029307d7e 100644 (file)
@@ -1337,7 +1337,7 @@ static const struct regmap_config isl7998x_regmap = {
        .rd_table       = &isl7998x_readable_table,
        .wr_table       = &isl7998x_writeable_table,
        .volatile_table = &isl7998x_volatile_table,
-       .cache_type     = REGCACHE_RBTREE,
+       .cache_type     = REGCACHE_MAPLE,
 };
 
 static int isl7998x_mc_init(struct isl7998x *isl7998x)
index 70c2a2948fd4ea2ef2e5be0b0fa1d0c24b8d2986..cd73d2096ae451648991a4b52ae7edbb06be7fac 100644 (file)
@@ -257,7 +257,7 @@ static const struct regmap_config max2175_regmap_config = {
        .reg_defaults = max2175_reg_defaults,
        .num_reg_defaults = ARRAY_SIZE(max2175_reg_defaults),
        .volatile_table = &max2175_volatile_regs,
-       .cache_type = REGCACHE_RBTREE,
+       .cache_type = REGCACHE_MAPLE,
 };
 
 struct max2175 {
index 0ed8561edfee6bfaa3f3d9b824ddd741edb794f7..599a5bc7cbb35250e74034eec4386014f4c6a9d9 100644 (file)
@@ -309,23 +309,15 @@ static void msp_wake_thread(struct i2c_client *client)
        wake_up_interruptible(&state->wq);
 }
 
-int msp_sleep(struct msp_state *state, int timeout)
+int msp_sleep(struct msp_state *state, int msec)
 {
-       DECLARE_WAITQUEUE(wait, current);
-
-       add_wait_queue(&state->wq, &wait);
-       if (!kthread_should_stop()) {
-               if (timeout < 0) {
-                       set_current_state(TASK_INTERRUPTIBLE);
-                       schedule();
-               } else {
-                       schedule_timeout_interruptible
-                                               (msecs_to_jiffies(timeout));
-               }
-       }
+       long timeout;
+
+       timeout = msec < 0 ? MAX_SCHEDULE_TIMEOUT : msecs_to_jiffies(msec);
+
+       wait_event_freezable_timeout(state->wq, kthread_should_stop() ||
+                                    state->restart, timeout);
 
-       remove_wait_queue(&state->wq, &wait);
-       try_to_freeze();
        return state->restart;
 }
 
index 2bb9d5ff1bbde5936ccf9a277c39aa20ae0c01e6..7d391714ea52fee02f5a88322322183e9f09de28 100644 (file)
@@ -134,7 +134,7 @@ int msp_read_dsp(struct i2c_client *client, int addr);
 int msp_reset(struct i2c_client *client);
 void msp_set_scart(struct i2c_client *client, int in, int out);
 void msp_update_volume(struct msp_state *state);
-int msp_sleep(struct msp_state *state, int timeout);
+int msp_sleep(struct msp_state *state, int msec);
 
 /* msp3400-kthreads.c */
 const char *msp_standard_std_name(int std);
index 596200d0248cf0b15517fa46f7c12abb7a8eac97..f4b48121235633482332983c5dd8b6c482696d98 100644 (file)
@@ -1078,7 +1078,7 @@ mt9p031_get_pdata(struct i2c_client *client)
        if (!IS_ENABLED(CONFIG_OF) || !client->dev.of_node)
                return client->dev.platform_data;
 
-       np = of_graph_get_next_endpoint(client->dev.of_node, NULL);
+       np = of_graph_get_endpoint_by_regs(client->dev.of_node, 0, -1);
        if (!np)
                return NULL;
 
index 3ca76eeae7ffd5034099942c0354ce65b7ad20f9..302120ff125e0b42d9713b564c3ffd7fb0ea4030 100644 (file)
@@ -988,7 +988,7 @@ static const struct regmap_config mt9v032_regmap_config = {
        .reg_bits = 8,
        .val_bits = 16,
        .max_register = 0xff,
-       .cache_type = REGCACHE_RBTREE,
+       .cache_type = REGCACHE_MAPLE,
 };
 
 /* -----------------------------------------------------------------------------
@@ -1006,7 +1006,7 @@ mt9v032_get_pdata(struct i2c_client *client)
        if (!IS_ENABLED(CONFIG_OF) || !client->dev.of_node)
                return client->dev.platform_data;
 
-       np = of_graph_get_next_endpoint(client->dev.of_node, NULL);
+       np = of_graph_get_endpoint_by_regs(client->dev.of_node, 0, -1);
        if (!np)
                return NULL;
 
index abbb0b774d43f5710f3d0d6a2224a1b3e3ab6aa3..48df077522ad0bb2b5f64a6def8844c02af6a193 100644 (file)
@@ -1,6 +1,7 @@
 // SPDX-License-Identifier: GPL-2.0
 // Copyright (c) 2022 Intel Corporation.
 
+#include <asm-generic/unaligned.h>
 #include <linux/acpi.h>
 #include <linux/i2c.h>
 #include <linux/module.h>
@@ -34,7 +35,7 @@
 
 /* V_TIMING internal */
 #define OV08X40_REG_VTS                        0x380e
-#define OV08X40_VTS_30FPS              0x1388
+#define OV08X40_VTS_30FPS              0x09c4  /* the VTS need to be half in normal mode */
 #define OV08X40_VTS_BIN_30FPS          0x115c
 #define OV08X40_VTS_MAX                        0x7fff
 
@@ -44,8 +45,9 @@
 
 /* Exposure control */
 #define OV08X40_REG_EXPOSURE           0x3500
-#define OV08X40_EXPOSURE_MAX_MARGIN 31
-#define OV08X40_EXPOSURE_MIN           1
+#define OV08X40_EXPOSURE_MAX_MARGIN    8
+#define OV08X40_EXPOSURE_BIN_MAX_MARGIN        2
+#define OV08X40_EXPOSURE_MIN           4
 #define OV08X40_EXPOSURE_STEP          1
 #define OV08X40_EXPOSURE_DEFAULT       0x40
 
 /* Vertical Window Offset */
 #define OV08X40_REG_V_WIN_OFFSET       0x3813
 
+/* Burst Register */
+#define OV08X40_REG_XTALK_FIRST_A      0x5a80
+#define OV08X40_REG_XTALK_LAST_A       0x5b9f
+#define OV08X40_REG_XTALK_FIRST_B      0x5bc0
+#define OV08X40_REG_XTALK_LAST_B       0x5f1f
+
 enum {
        OV08X40_LINK_FREQ_400MHZ_INDEX,
 };
@@ -126,13 +134,17 @@ struct ov08x40_mode {
        u32 vts_def;
        u32 vts_min;
 
-       /* HTS */
-       u32 hts;
+       /* Line Length Pixels */
+       u32 llp;
 
        /* Index of Link frequency config to be used */
        u32 link_freq_index;
        /* Default register values */
        struct ov08x40_reg_list reg_list;
+
+       /* Exposure calculation */
+       u16 exposure_margin;
+       u16 exposure_shift;
 };
 
 static const struct ov08x40_reg mipi_data_rate_800mbps[] = {
@@ -665,1158 +677,6 @@ static const struct ov08x40_reg mode_3856x2416_regs[] = {
        {0x3502, 0x10},
        {0x3508, 0x0f},
        {0x3509, 0x80},
-       {0x5a80, 0x75},
-       {0x5a81, 0x75},
-       {0x5a82, 0x75},
-       {0x5a83, 0x75},
-       {0x5a84, 0x75},
-       {0x5a85, 0x75},
-       {0x5a86, 0x75},
-       {0x5a87, 0x75},
-       {0x5a88, 0x75},
-       {0x5a89, 0x75},
-       {0x5a8a, 0x75},
-       {0x5a8b, 0x75},
-       {0x5a8c, 0x75},
-       {0x5a8d, 0x75},
-       {0x5a8e, 0x75},
-       {0x5a8f, 0x75},
-       {0x5a90, 0x75},
-       {0x5a91, 0x75},
-       {0x5a92, 0x75},
-       {0x5a93, 0x75},
-       {0x5a94, 0x75},
-       {0x5a95, 0x75},
-       {0x5a96, 0x75},
-       {0x5a97, 0x75},
-       {0x5a98, 0x75},
-       {0x5a99, 0x75},
-       {0x5a9a, 0x75},
-       {0x5a9b, 0x75},
-       {0x5a9c, 0x75},
-       {0x5a9d, 0x75},
-       {0x5a9e, 0x75},
-       {0x5a9f, 0x75},
-       {0x5aa0, 0x75},
-       {0x5aa1, 0x75},
-       {0x5aa2, 0x75},
-       {0x5aa3, 0x75},
-       {0x5aa4, 0x75},
-       {0x5aa5, 0x75},
-       {0x5aa6, 0x75},
-       {0x5aa7, 0x75},
-       {0x5aa8, 0x75},
-       {0x5aa9, 0x75},
-       {0x5aaa, 0x75},
-       {0x5aab, 0x75},
-       {0x5aac, 0x75},
-       {0x5aad, 0x75},
-       {0x5aae, 0x75},
-       {0x5aaf, 0x75},
-       {0x5ab0, 0x75},
-       {0x5ab1, 0x75},
-       {0x5ab2, 0x75},
-       {0x5ab3, 0x75},
-       {0x5ab4, 0x75},
-       {0x5ab5, 0x75},
-       {0x5ab6, 0x75},
-       {0x5ab7, 0x75},
-       {0x5ab8, 0x75},
-       {0x5ab9, 0x75},
-       {0x5aba, 0x75},
-       {0x5abb, 0x75},
-       {0x5abc, 0x75},
-       {0x5abd, 0x75},
-       {0x5abe, 0x75},
-       {0x5abf, 0x75},
-       {0x5ac0, 0x75},
-       {0x5ac1, 0x75},
-       {0x5ac2, 0x75},
-       {0x5ac3, 0x75},
-       {0x5ac4, 0x75},
-       {0x5ac5, 0x75},
-       {0x5ac6, 0x75},
-       {0x5ac7, 0x75},
-       {0x5ac8, 0x75},
-       {0x5ac9, 0x75},
-       {0x5aca, 0x75},
-       {0x5acb, 0x75},
-       {0x5acc, 0x75},
-       {0x5acd, 0x75},
-       {0x5ace, 0x75},
-       {0x5acf, 0x75},
-       {0x5ad0, 0x75},
-       {0x5ad1, 0x75},
-       {0x5ad2, 0x75},
-       {0x5ad3, 0x75},
-       {0x5ad4, 0x75},
-       {0x5ad5, 0x75},
-       {0x5ad6, 0x75},
-       {0x5ad7, 0x75},
-       {0x5ad8, 0x75},
-       {0x5ad9, 0x75},
-       {0x5ada, 0x75},
-       {0x5adb, 0x75},
-       {0x5adc, 0x75},
-       {0x5add, 0x75},
-       {0x5ade, 0x75},
-       {0x5adf, 0x75},
-       {0x5ae0, 0x75},
-       {0x5ae1, 0x75},
-       {0x5ae2, 0x75},
-       {0x5ae3, 0x75},
-       {0x5ae4, 0x75},
-       {0x5ae5, 0x75},
-       {0x5ae6, 0x75},
-       {0x5ae7, 0x75},
-       {0x5ae8, 0x75},
-       {0x5ae9, 0x75},
-       {0x5aea, 0x75},
-       {0x5aeb, 0x75},
-       {0x5aec, 0x75},
-       {0x5aed, 0x75},
-       {0x5aee, 0x75},
-       {0x5aef, 0x75},
-       {0x5af0, 0x75},
-       {0x5af1, 0x75},
-       {0x5af2, 0x75},
-       {0x5af3, 0x75},
-       {0x5af4, 0x75},
-       {0x5af5, 0x75},
-       {0x5af6, 0x75},
-       {0x5af7, 0x75},
-       {0x5af8, 0x75},
-       {0x5af9, 0x75},
-       {0x5afa, 0x75},
-       {0x5afb, 0x75},
-       {0x5afc, 0x75},
-       {0x5afd, 0x75},
-       {0x5afe, 0x75},
-       {0x5aff, 0x75},
-       {0x5b00, 0x75},
-       {0x5b01, 0x75},
-       {0x5b02, 0x75},
-       {0x5b03, 0x75},
-       {0x5b04, 0x75},
-       {0x5b05, 0x75},
-       {0x5b06, 0x75},
-       {0x5b07, 0x75},
-       {0x5b08, 0x75},
-       {0x5b09, 0x75},
-       {0x5b0a, 0x75},
-       {0x5b0b, 0x75},
-       {0x5b0c, 0x75},
-       {0x5b0d, 0x75},
-       {0x5b0e, 0x75},
-       {0x5b0f, 0x75},
-       {0x5b10, 0x75},
-       {0x5b11, 0x75},
-       {0x5b12, 0x75},
-       {0x5b13, 0x75},
-       {0x5b14, 0x75},
-       {0x5b15, 0x75},
-       {0x5b16, 0x75},
-       {0x5b17, 0x75},
-       {0x5b18, 0x75},
-       {0x5b19, 0x75},
-       {0x5b1a, 0x75},
-       {0x5b1b, 0x75},
-       {0x5b1c, 0x75},
-       {0x5b1d, 0x75},
-       {0x5b1e, 0x75},
-       {0x5b1f, 0x75},
-       {0x5b20, 0x75},
-       {0x5b21, 0x75},
-       {0x5b22, 0x75},
-       {0x5b23, 0x75},
-       {0x5b24, 0x75},
-       {0x5b25, 0x75},
-       {0x5b26, 0x75},
-       {0x5b27, 0x75},
-       {0x5b28, 0x75},
-       {0x5b29, 0x75},
-       {0x5b2a, 0x75},
-       {0x5b2b, 0x75},
-       {0x5b2c, 0x75},
-       {0x5b2d, 0x75},
-       {0x5b2e, 0x75},
-       {0x5b2f, 0x75},
-       {0x5b30, 0x75},
-       {0x5b31, 0x75},
-       {0x5b32, 0x75},
-       {0x5b33, 0x75},
-       {0x5b34, 0x75},
-       {0x5b35, 0x75},
-       {0x5b36, 0x75},
-       {0x5b37, 0x75},
-       {0x5b38, 0x75},
-       {0x5b39, 0x75},
-       {0x5b3a, 0x75},
-       {0x5b3b, 0x75},
-       {0x5b3c, 0x75},
-       {0x5b3d, 0x75},
-       {0x5b3e, 0x75},
-       {0x5b3f, 0x75},
-       {0x5b40, 0x75},
-       {0x5b41, 0x75},
-       {0x5b42, 0x75},
-       {0x5b43, 0x75},
-       {0x5b44, 0x75},
-       {0x5b45, 0x75},
-       {0x5b46, 0x75},
-       {0x5b47, 0x75},
-       {0x5b48, 0x75},
-       {0x5b49, 0x75},
-       {0x5b4a, 0x75},
-       {0x5b4b, 0x75},
-       {0x5b4c, 0x75},
-       {0x5b4d, 0x75},
-       {0x5b4e, 0x75},
-       {0x5b4f, 0x75},
-       {0x5b50, 0x75},
-       {0x5b51, 0x75},
-       {0x5b52, 0x75},
-       {0x5b53, 0x75},
-       {0x5b54, 0x75},
-       {0x5b55, 0x75},
-       {0x5b56, 0x75},
-       {0x5b57, 0x75},
-       {0x5b58, 0x75},
-       {0x5b59, 0x75},
-       {0x5b5a, 0x75},
-       {0x5b5b, 0x75},
-       {0x5b5c, 0x75},
-       {0x5b5d, 0x75},
-       {0x5b5e, 0x75},
-       {0x5b5f, 0x75},
-       {0x5b60, 0x75},
-       {0x5b61, 0x75},
-       {0x5b62, 0x75},
-       {0x5b63, 0x75},
-       {0x5b64, 0x75},
-       {0x5b65, 0x75},
-       {0x5b66, 0x75},
-       {0x5b67, 0x75},
-       {0x5b68, 0x75},
-       {0x5b69, 0x75},
-       {0x5b6a, 0x75},
-       {0x5b6b, 0x75},
-       {0x5b6c, 0x75},
-       {0x5b6d, 0x75},
-       {0x5b6e, 0x75},
-       {0x5b6f, 0x75},
-       {0x5b70, 0x75},
-       {0x5b71, 0x75},
-       {0x5b72, 0x75},
-       {0x5b73, 0x75},
-       {0x5b74, 0x75},
-       {0x5b75, 0x75},
-       {0x5b76, 0x75},
-       {0x5b77, 0x75},
-       {0x5b78, 0x75},
-       {0x5b79, 0x75},
-       {0x5b7a, 0x75},
-       {0x5b7b, 0x75},
-       {0x5b7c, 0x75},
-       {0x5b7d, 0x75},
-       {0x5b7e, 0x75},
-       {0x5b7f, 0x75},
-       {0x5b80, 0x75},
-       {0x5b81, 0x75},
-       {0x5b82, 0x75},
-       {0x5b83, 0x75},
-       {0x5b84, 0x75},
-       {0x5b85, 0x75},
-       {0x5b86, 0x75},
-       {0x5b87, 0x75},
-       {0x5b88, 0x75},
-       {0x5b89, 0x75},
-       {0x5b8a, 0x75},
-       {0x5b8b, 0x75},
-       {0x5b8c, 0x75},
-       {0x5b8d, 0x75},
-       {0x5b8e, 0x75},
-       {0x5b8f, 0x75},
-       {0x5b90, 0x75},
-       {0x5b91, 0x75},
-       {0x5b92, 0x75},
-       {0x5b93, 0x75},
-       {0x5b94, 0x75},
-       {0x5b95, 0x75},
-       {0x5b96, 0x75},
-       {0x5b97, 0x75},
-       {0x5b98, 0x75},
-       {0x5b99, 0x75},
-       {0x5b9a, 0x75},
-       {0x5b9b, 0x75},
-       {0x5b9c, 0x75},
-       {0x5b9d, 0x75},
-       {0x5b9e, 0x75},
-       {0x5b9f, 0x75},
-       {0x5bc0, 0x75},
-       {0x5bc1, 0x75},
-       {0x5bc2, 0x75},
-       {0x5bc3, 0x75},
-       {0x5bc4, 0x75},
-       {0x5bc5, 0x75},
-       {0x5bc6, 0x75},
-       {0x5bc7, 0x75},
-       {0x5bc8, 0x75},
-       {0x5bc9, 0x75},
-       {0x5bca, 0x75},
-       {0x5bcb, 0x75},
-       {0x5bcc, 0x75},
-       {0x5bcd, 0x75},
-       {0x5bce, 0x75},
-       {0x5bcf, 0x75},
-       {0x5bd0, 0x75},
-       {0x5bd1, 0x75},
-       {0x5bd2, 0x75},
-       {0x5bd3, 0x75},
-       {0x5bd4, 0x75},
-       {0x5bd5, 0x75},
-       {0x5bd6, 0x75},
-       {0x5bd7, 0x75},
-       {0x5bd8, 0x75},
-       {0x5bd9, 0x75},
-       {0x5bda, 0x75},
-       {0x5bdb, 0x75},
-       {0x5bdc, 0x75},
-       {0x5bdd, 0x75},
-       {0x5bde, 0x75},
-       {0x5bdf, 0x75},
-       {0x5be0, 0x75},
-       {0x5be1, 0x75},
-       {0x5be2, 0x75},
-       {0x5be3, 0x75},
-       {0x5be4, 0x75},
-       {0x5be5, 0x75},
-       {0x5be6, 0x75},
-       {0x5be7, 0x75},
-       {0x5be8, 0x75},
-       {0x5be9, 0x75},
-       {0x5bea, 0x75},
-       {0x5beb, 0x75},
-       {0x5bec, 0x75},
-       {0x5bed, 0x75},
-       {0x5bee, 0x75},
-       {0x5bef, 0x75},
-       {0x5bf0, 0x75},
-       {0x5bf1, 0x75},
-       {0x5bf2, 0x75},
-       {0x5bf3, 0x75},
-       {0x5bf4, 0x75},
-       {0x5bf5, 0x75},
-       {0x5bf6, 0x75},
-       {0x5bf7, 0x75},
-       {0x5bf8, 0x75},
-       {0x5bf9, 0x75},
-       {0x5bfa, 0x75},
-       {0x5bfb, 0x75},
-       {0x5bfc, 0x75},
-       {0x5bfd, 0x75},
-       {0x5bfe, 0x75},
-       {0x5bff, 0x75},
-       {0x5c00, 0x75},
-       {0x5c01, 0x75},
-       {0x5c02, 0x75},
-       {0x5c03, 0x75},
-       {0x5c04, 0x75},
-       {0x5c05, 0x75},
-       {0x5c06, 0x75},
-       {0x5c07, 0x75},
-       {0x5c08, 0x75},
-       {0x5c09, 0x75},
-       {0x5c0a, 0x75},
-       {0x5c0b, 0x75},
-       {0x5c0c, 0x75},
-       {0x5c0d, 0x75},
-       {0x5c0e, 0x75},
-       {0x5c0f, 0x75},
-       {0x5c10, 0x75},
-       {0x5c11, 0x75},
-       {0x5c12, 0x75},
-       {0x5c13, 0x75},
-       {0x5c14, 0x75},
-       {0x5c15, 0x75},
-       {0x5c16, 0x75},
-       {0x5c17, 0x75},
-       {0x5c18, 0x75},
-       {0x5c19, 0x75},
-       {0x5c1a, 0x75},
-       {0x5c1b, 0x75},
-       {0x5c1c, 0x75},
-       {0x5c1d, 0x75},
-       {0x5c1e, 0x75},
-       {0x5c1f, 0x75},
-       {0x5c20, 0x75},
-       {0x5c21, 0x75},
-       {0x5c22, 0x75},
-       {0x5c23, 0x75},
-       {0x5c24, 0x75},
-       {0x5c25, 0x75},
-       {0x5c26, 0x75},
-       {0x5c27, 0x75},
-       {0x5c28, 0x75},
-       {0x5c29, 0x75},
-       {0x5c2a, 0x75},
-       {0x5c2b, 0x75},
-       {0x5c2c, 0x75},
-       {0x5c2d, 0x75},
-       {0x5c2e, 0x75},
-       {0x5c2f, 0x75},
-       {0x5c30, 0x75},
-       {0x5c31, 0x75},
-       {0x5c32, 0x75},
-       {0x5c33, 0x75},
-       {0x5c34, 0x75},
-       {0x5c35, 0x75},
-       {0x5c36, 0x75},
-       {0x5c37, 0x75},
-       {0x5c38, 0x75},
-       {0x5c39, 0x75},
-       {0x5c3a, 0x75},
-       {0x5c3b, 0x75},
-       {0x5c3c, 0x75},
-       {0x5c3d, 0x75},
-       {0x5c3e, 0x75},
-       {0x5c3f, 0x75},
-       {0x5c40, 0x75},
-       {0x5c41, 0x75},
-       {0x5c42, 0x75},
-       {0x5c43, 0x75},
-       {0x5c44, 0x75},
-       {0x5c45, 0x75},
-       {0x5c46, 0x75},
-       {0x5c47, 0x75},
-       {0x5c48, 0x75},
-       {0x5c49, 0x75},
-       {0x5c4a, 0x75},
-       {0x5c4b, 0x75},
-       {0x5c4c, 0x75},
-       {0x5c4d, 0x75},
-       {0x5c4e, 0x75},
-       {0x5c4f, 0x75},
-       {0x5c50, 0x75},
-       {0x5c51, 0x75},
-       {0x5c52, 0x75},
-       {0x5c53, 0x75},
-       {0x5c54, 0x75},
-       {0x5c55, 0x75},
-       {0x5c56, 0x75},
-       {0x5c57, 0x75},
-       {0x5c58, 0x75},
-       {0x5c59, 0x75},
-       {0x5c5a, 0x75},
-       {0x5c5b, 0x75},
-       {0x5c5c, 0x75},
-       {0x5c5d, 0x75},
-       {0x5c5e, 0x75},
-       {0x5c5f, 0x75},
-       {0x5c60, 0x75},
-       {0x5c61, 0x75},
-       {0x5c62, 0x75},
-       {0x5c63, 0x75},
-       {0x5c64, 0x75},
-       {0x5c65, 0x75},
-       {0x5c66, 0x75},
-       {0x5c67, 0x75},
-       {0x5c68, 0x75},
-       {0x5c69, 0x75},
-       {0x5c6a, 0x75},
-       {0x5c6b, 0x75},
-       {0x5c6c, 0x75},
-       {0x5c6d, 0x75},
-       {0x5c6e, 0x75},
-       {0x5c6f, 0x75},
-       {0x5c70, 0x75},
-       {0x5c71, 0x75},
-       {0x5c72, 0x75},
-       {0x5c73, 0x75},
-       {0x5c74, 0x75},
-       {0x5c75, 0x75},
-       {0x5c76, 0x75},
-       {0x5c77, 0x75},
-       {0x5c78, 0x75},
-       {0x5c79, 0x75},
-       {0x5c7a, 0x75},
-       {0x5c7b, 0x75},
-       {0x5c7c, 0x75},
-       {0x5c7d, 0x75},
-       {0x5c7e, 0x75},
-       {0x5c7f, 0x75},
-       {0x5c80, 0x75},
-       {0x5c81, 0x75},
-       {0x5c82, 0x75},
-       {0x5c83, 0x75},
-       {0x5c84, 0x75},
-       {0x5c85, 0x75},
-       {0x5c86, 0x75},
-       {0x5c87, 0x75},
-       {0x5c88, 0x75},
-       {0x5c89, 0x75},
-       {0x5c8a, 0x75},
-       {0x5c8b, 0x75},
-       {0x5c8c, 0x75},
-       {0x5c8d, 0x75},
-       {0x5c8e, 0x75},
-       {0x5c8f, 0x75},
-       {0x5c90, 0x75},
-       {0x5c91, 0x75},
-       {0x5c92, 0x75},
-       {0x5c93, 0x75},
-       {0x5c94, 0x75},
-       {0x5c95, 0x75},
-       {0x5c96, 0x75},
-       {0x5c97, 0x75},
-       {0x5c98, 0x75},
-       {0x5c99, 0x75},
-       {0x5c9a, 0x75},
-       {0x5c9b, 0x75},
-       {0x5c9c, 0x75},
-       {0x5c9d, 0x75},
-       {0x5c9e, 0x75},
-       {0x5c9f, 0x75},
-       {0x5ca0, 0x75},
-       {0x5ca1, 0x75},
-       {0x5ca2, 0x75},
-       {0x5ca3, 0x75},
-       {0x5ca4, 0x75},
-       {0x5ca5, 0x75},
-       {0x5ca6, 0x75},
-       {0x5ca7, 0x75},
-       {0x5ca8, 0x75},
-       {0x5ca9, 0x75},
-       {0x5caa, 0x75},
-       {0x5cab, 0x75},
-       {0x5cac, 0x75},
-       {0x5cad, 0x75},
-       {0x5cae, 0x75},
-       {0x5caf, 0x75},
-       {0x5cb0, 0x75},
-       {0x5cb1, 0x75},
-       {0x5cb2, 0x75},
-       {0x5cb3, 0x75},
-       {0x5cb4, 0x75},
-       {0x5cb5, 0x75},
-       {0x5cb6, 0x75},
-       {0x5cb7, 0x75},
-       {0x5cb8, 0x75},
-       {0x5cb9, 0x75},
-       {0x5cba, 0x75},
-       {0x5cbb, 0x75},
-       {0x5cbc, 0x75},
-       {0x5cbd, 0x75},
-       {0x5cbe, 0x75},
-       {0x5cbf, 0x75},
-       {0x5cc0, 0x75},
-       {0x5cc1, 0x75},
-       {0x5cc2, 0x75},
-       {0x5cc3, 0x75},
-       {0x5cc4, 0x75},
-       {0x5cc5, 0x75},
-       {0x5cc6, 0x75},
-       {0x5cc7, 0x75},
-       {0x5cc8, 0x75},
-       {0x5cc9, 0x75},
-       {0x5cca, 0x75},
-       {0x5ccb, 0x75},
-       {0x5ccc, 0x75},
-       {0x5ccd, 0x75},
-       {0x5cce, 0x75},
-       {0x5ccf, 0x75},
-       {0x5cd0, 0x75},
-       {0x5cd1, 0x75},
-       {0x5cd2, 0x75},
-       {0x5cd3, 0x75},
-       {0x5cd4, 0x75},
-       {0x5cd5, 0x75},
-       {0x5cd6, 0x75},
-       {0x5cd7, 0x75},
-       {0x5cd8, 0x75},
-       {0x5cd9, 0x75},
-       {0x5cda, 0x75},
-       {0x5cdb, 0x75},
-       {0x5cdc, 0x75},
-       {0x5cdd, 0x75},
-       {0x5cde, 0x75},
-       {0x5cdf, 0x75},
-       {0x5ce0, 0x75},
-       {0x5ce1, 0x75},
-       {0x5ce2, 0x75},
-       {0x5ce3, 0x75},
-       {0x5ce4, 0x75},
-       {0x5ce5, 0x75},
-       {0x5ce6, 0x75},
-       {0x5ce7, 0x75},
-       {0x5ce8, 0x75},
-       {0x5ce9, 0x75},
-       {0x5cea, 0x75},
-       {0x5ceb, 0x75},
-       {0x5cec, 0x75},
-       {0x5ced, 0x75},
-       {0x5cee, 0x75},
-       {0x5cef, 0x75},
-       {0x5cf0, 0x75},
-       {0x5cf1, 0x75},
-       {0x5cf2, 0x75},
-       {0x5cf3, 0x75},
-       {0x5cf4, 0x75},
-       {0x5cf5, 0x75},
-       {0x5cf6, 0x75},
-       {0x5cf7, 0x75},
-       {0x5cf8, 0x75},
-       {0x5cf9, 0x75},
-       {0x5cfa, 0x75},
-       {0x5cfb, 0x75},
-       {0x5cfc, 0x75},
-       {0x5cfd, 0x75},
-       {0x5cfe, 0x75},
-       {0x5cff, 0x75},
-       {0x5d00, 0x75},
-       {0x5d01, 0x75},
-       {0x5d02, 0x75},
-       {0x5d03, 0x75},
-       {0x5d04, 0x75},
-       {0x5d05, 0x75},
-       {0x5d06, 0x75},
-       {0x5d07, 0x75},
-       {0x5d08, 0x75},
-       {0x5d09, 0x75},
-       {0x5d0a, 0x75},
-       {0x5d0b, 0x75},
-       {0x5d0c, 0x75},
-       {0x5d0d, 0x75},
-       {0x5d0e, 0x75},
-       {0x5d0f, 0x75},
-       {0x5d10, 0x75},
-       {0x5d11, 0x75},
-       {0x5d12, 0x75},
-       {0x5d13, 0x75},
-       {0x5d14, 0x75},
-       {0x5d15, 0x75},
-       {0x5d16, 0x75},
-       {0x5d17, 0x75},
-       {0x5d18, 0x75},
-       {0x5d19, 0x75},
-       {0x5d1a, 0x75},
-       {0x5d1b, 0x75},
-       {0x5d1c, 0x75},
-       {0x5d1d, 0x75},
-       {0x5d1e, 0x75},
-       {0x5d1f, 0x75},
-       {0x5d20, 0x75},
-       {0x5d21, 0x75},
-       {0x5d22, 0x75},
-       {0x5d23, 0x75},
-       {0x5d24, 0x75},
-       {0x5d25, 0x75},
-       {0x5d26, 0x75},
-       {0x5d27, 0x75},
-       {0x5d28, 0x75},
-       {0x5d29, 0x75},
-       {0x5d2a, 0x75},
-       {0x5d2b, 0x75},
-       {0x5d2c, 0x75},
-       {0x5d2d, 0x75},
-       {0x5d2e, 0x75},
-       {0x5d2f, 0x75},
-       {0x5d30, 0x75},
-       {0x5d31, 0x75},
-       {0x5d32, 0x75},
-       {0x5d33, 0x75},
-       {0x5d34, 0x75},
-       {0x5d35, 0x75},
-       {0x5d36, 0x75},
-       {0x5d37, 0x75},
-       {0x5d38, 0x75},
-       {0x5d39, 0x75},
-       {0x5d3a, 0x75},
-       {0x5d3b, 0x75},
-       {0x5d3c, 0x75},
-       {0x5d3d, 0x75},
-       {0x5d3e, 0x75},
-       {0x5d3f, 0x75},
-       {0x5d40, 0x75},
-       {0x5d41, 0x75},
-       {0x5d42, 0x75},
-       {0x5d43, 0x75},
-       {0x5d44, 0x75},
-       {0x5d45, 0x75},
-       {0x5d46, 0x75},
-       {0x5d47, 0x75},
-       {0x5d48, 0x75},
-       {0x5d49, 0x75},
-       {0x5d4a, 0x75},
-       {0x5d4b, 0x75},
-       {0x5d4c, 0x75},
-       {0x5d4d, 0x75},
-       {0x5d4e, 0x75},
-       {0x5d4f, 0x75},
-       {0x5d50, 0x75},
-       {0x5d51, 0x75},
-       {0x5d52, 0x75},
-       {0x5d53, 0x75},
-       {0x5d54, 0x75},
-       {0x5d55, 0x75},
-       {0x5d56, 0x75},
-       {0x5d57, 0x75},
-       {0x5d58, 0x75},
-       {0x5d59, 0x75},
-       {0x5d5a, 0x75},
-       {0x5d5b, 0x75},
-       {0x5d5c, 0x75},
-       {0x5d5d, 0x75},
-       {0x5d5e, 0x75},
-       {0x5d5f, 0x75},
-       {0x5d60, 0x75},
-       {0x5d61, 0x75},
-       {0x5d62, 0x75},
-       {0x5d63, 0x75},
-       {0x5d64, 0x75},
-       {0x5d65, 0x75},
-       {0x5d66, 0x75},
-       {0x5d67, 0x75},
-       {0x5d68, 0x75},
-       {0x5d69, 0x75},
-       {0x5d6a, 0x75},
-       {0x5d6b, 0x75},
-       {0x5d6c, 0x75},
-       {0x5d6d, 0x75},
-       {0x5d6e, 0x75},
-       {0x5d6f, 0x75},
-       {0x5d70, 0x75},
-       {0x5d71, 0x75},
-       {0x5d72, 0x75},
-       {0x5d73, 0x75},
-       {0x5d74, 0x75},
-       {0x5d75, 0x75},
-       {0x5d76, 0x75},
-       {0x5d77, 0x75},
-       {0x5d78, 0x75},
-       {0x5d79, 0x75},
-       {0x5d7a, 0x75},
-       {0x5d7b, 0x75},
-       {0x5d7c, 0x75},
-       {0x5d7d, 0x75},
-       {0x5d7e, 0x75},
-       {0x5d7f, 0x75},
-       {0x5d80, 0x75},
-       {0x5d81, 0x75},
-       {0x5d82, 0x75},
-       {0x5d83, 0x75},
-       {0x5d84, 0x75},
-       {0x5d85, 0x75},
-       {0x5d86, 0x75},
-       {0x5d87, 0x75},
-       {0x5d88, 0x75},
-       {0x5d89, 0x75},
-       {0x5d8a, 0x75},
-       {0x5d8b, 0x75},
-       {0x5d8c, 0x75},
-       {0x5d8d, 0x75},
-       {0x5d8e, 0x75},
-       {0x5d8f, 0x75},
-       {0x5d90, 0x75},
-       {0x5d91, 0x75},
-       {0x5d92, 0x75},
-       {0x5d93, 0x75},
-       {0x5d94, 0x75},
-       {0x5d95, 0x75},
-       {0x5d96, 0x75},
-       {0x5d97, 0x75},
-       {0x5d98, 0x75},
-       {0x5d99, 0x75},
-       {0x5d9a, 0x75},
-       {0x5d9b, 0x75},
-       {0x5d9c, 0x75},
-       {0x5d9d, 0x75},
-       {0x5d9e, 0x75},
-       {0x5d9f, 0x75},
-       {0x5da0, 0x75},
-       {0x5da1, 0x75},
-       {0x5da2, 0x75},
-       {0x5da3, 0x75},
-       {0x5da4, 0x75},
-       {0x5da5, 0x75},
-       {0x5da6, 0x75},
-       {0x5da7, 0x75},
-       {0x5da8, 0x75},
-       {0x5da9, 0x75},
-       {0x5daa, 0x75},
-       {0x5dab, 0x75},
-       {0x5dac, 0x75},
-       {0x5dad, 0x75},
-       {0x5dae, 0x75},
-       {0x5daf, 0x75},
-       {0x5db0, 0x75},
-       {0x5db1, 0x75},
-       {0x5db2, 0x75},
-       {0x5db3, 0x75},
-       {0x5db4, 0x75},
-       {0x5db5, 0x75},
-       {0x5db6, 0x75},
-       {0x5db7, 0x75},
-       {0x5db8, 0x75},
-       {0x5db9, 0x75},
-       {0x5dba, 0x75},
-       {0x5dbb, 0x75},
-       {0x5dbc, 0x75},
-       {0x5dbd, 0x75},
-       {0x5dbe, 0x75},
-       {0x5dbf, 0x75},
-       {0x5dc0, 0x75},
-       {0x5dc1, 0x75},
-       {0x5dc2, 0x75},
-       {0x5dc3, 0x75},
-       {0x5dc4, 0x75},
-       {0x5dc5, 0x75},
-       {0x5dc6, 0x75},
-       {0x5dc7, 0x75},
-       {0x5dc8, 0x75},
-       {0x5dc9, 0x75},
-       {0x5dca, 0x75},
-       {0x5dcb, 0x75},
-       {0x5dcc, 0x75},
-       {0x5dcd, 0x75},
-       {0x5dce, 0x75},
-       {0x5dcf, 0x75},
-       {0x5dd0, 0x75},
-       {0x5dd1, 0x75},
-       {0x5dd2, 0x75},
-       {0x5dd3, 0x75},
-       {0x5dd4, 0x75},
-       {0x5dd5, 0x75},
-       {0x5dd6, 0x75},
-       {0x5dd7, 0x75},
-       {0x5dd8, 0x75},
-       {0x5dd9, 0x75},
-       {0x5dda, 0x75},
-       {0x5ddb, 0x75},
-       {0x5ddc, 0x75},
-       {0x5ddd, 0x75},
-       {0x5dde, 0x75},
-       {0x5ddf, 0x75},
-       {0x5de0, 0x75},
-       {0x5de1, 0x75},
-       {0x5de2, 0x75},
-       {0x5de3, 0x75},
-       {0x5de4, 0x75},
-       {0x5de5, 0x75},
-       {0x5de6, 0x75},
-       {0x5de7, 0x75},
-       {0x5de8, 0x75},
-       {0x5de9, 0x75},
-       {0x5dea, 0x75},
-       {0x5deb, 0x75},
-       {0x5dec, 0x75},
-       {0x5ded, 0x75},
-       {0x5dee, 0x75},
-       {0x5def, 0x75},
-       {0x5df0, 0x75},
-       {0x5df1, 0x75},
-       {0x5df2, 0x75},
-       {0x5df3, 0x75},
-       {0x5df4, 0x75},
-       {0x5df5, 0x75},
-       {0x5df6, 0x75},
-       {0x5df7, 0x75},
-       {0x5df8, 0x75},
-       {0x5df9, 0x75},
-       {0x5dfa, 0x75},
-       {0x5dfb, 0x75},
-       {0x5dfc, 0x75},
-       {0x5dfd, 0x75},
-       {0x5dfe, 0x75},
-       {0x5dff, 0x75},
-       {0x5e00, 0x75},
-       {0x5e01, 0x75},
-       {0x5e02, 0x75},
-       {0x5e03, 0x75},
-       {0x5e04, 0x75},
-       {0x5e05, 0x75},
-       {0x5e06, 0x75},
-       {0x5e07, 0x75},
-       {0x5e08, 0x75},
-       {0x5e09, 0x75},
-       {0x5e0a, 0x75},
-       {0x5e0b, 0x75},
-       {0x5e0c, 0x75},
-       {0x5e0d, 0x75},
-       {0x5e0e, 0x75},
-       {0x5e0f, 0x75},
-       {0x5e10, 0x75},
-       {0x5e11, 0x75},
-       {0x5e12, 0x75},
-       {0x5e13, 0x75},
-       {0x5e14, 0x75},
-       {0x5e15, 0x75},
-       {0x5e16, 0x75},
-       {0x5e17, 0x75},
-       {0x5e18, 0x75},
-       {0x5e19, 0x75},
-       {0x5e1a, 0x75},
-       {0x5e1b, 0x75},
-       {0x5e1c, 0x75},
-       {0x5e1d, 0x75},
-       {0x5e1e, 0x75},
-       {0x5e1f, 0x75},
-       {0x5e20, 0x75},
-       {0x5e21, 0x75},
-       {0x5e22, 0x75},
-       {0x5e23, 0x75},
-       {0x5e24, 0x75},
-       {0x5e25, 0x75},
-       {0x5e26, 0x75},
-       {0x5e27, 0x75},
-       {0x5e28, 0x75},
-       {0x5e29, 0x75},
-       {0x5e2a, 0x75},
-       {0x5e2b, 0x75},
-       {0x5e2c, 0x75},
-       {0x5e2d, 0x75},
-       {0x5e2e, 0x75},
-       {0x5e2f, 0x75},
-       {0x5e30, 0x75},
-       {0x5e31, 0x75},
-       {0x5e32, 0x75},
-       {0x5e33, 0x75},
-       {0x5e34, 0x75},
-       {0x5e35, 0x75},
-       {0x5e36, 0x75},
-       {0x5e37, 0x75},
-       {0x5e38, 0x75},
-       {0x5e39, 0x75},
-       {0x5e3a, 0x75},
-       {0x5e3b, 0x75},
-       {0x5e3c, 0x75},
-       {0x5e3d, 0x75},
-       {0x5e3e, 0x75},
-       {0x5e3f, 0x75},
-       {0x5e40, 0x75},
-       {0x5e41, 0x75},
-       {0x5e42, 0x75},
-       {0x5e43, 0x75},
-       {0x5e44, 0x75},
-       {0x5e45, 0x75},
-       {0x5e46, 0x75},
-       {0x5e47, 0x75},
-       {0x5e48, 0x75},
-       {0x5e49, 0x75},
-       {0x5e4a, 0x75},
-       {0x5e4b, 0x75},
-       {0x5e4c, 0x75},
-       {0x5e4d, 0x75},
-       {0x5e4e, 0x75},
-       {0x5e4f, 0x75},
-       {0x5e50, 0x75},
-       {0x5e51, 0x75},
-       {0x5e52, 0x75},
-       {0x5e53, 0x75},
-       {0x5e54, 0x75},
-       {0x5e55, 0x75},
-       {0x5e56, 0x75},
-       {0x5e57, 0x75},
-       {0x5e58, 0x75},
-       {0x5e59, 0x75},
-       {0x5e5a, 0x75},
-       {0x5e5b, 0x75},
-       {0x5e5c, 0x75},
-       {0x5e5d, 0x75},
-       {0x5e5e, 0x75},
-       {0x5e5f, 0x75},
-       {0x5e60, 0x75},
-       {0x5e61, 0x75},
-       {0x5e62, 0x75},
-       {0x5e63, 0x75},
-       {0x5e64, 0x75},
-       {0x5e65, 0x75},
-       {0x5e66, 0x75},
-       {0x5e67, 0x75},
-       {0x5e68, 0x75},
-       {0x5e69, 0x75},
-       {0x5e6a, 0x75},
-       {0x5e6b, 0x75},
-       {0x5e6c, 0x75},
-       {0x5e6d, 0x75},
-       {0x5e6e, 0x75},
-       {0x5e6f, 0x75},
-       {0x5e70, 0x75},
-       {0x5e71, 0x75},
-       {0x5e72, 0x75},
-       {0x5e73, 0x75},
-       {0x5e74, 0x75},
-       {0x5e75, 0x75},
-       {0x5e76, 0x75},
-       {0x5e77, 0x75},
-       {0x5e78, 0x75},
-       {0x5e79, 0x75},
-       {0x5e7a, 0x75},
-       {0x5e7b, 0x75},
-       {0x5e7c, 0x75},
-       {0x5e7d, 0x75},
-       {0x5e7e, 0x75},
-       {0x5e7f, 0x75},
-       {0x5e80, 0x75},
-       {0x5e81, 0x75},
-       {0x5e82, 0x75},
-       {0x5e83, 0x75},
-       {0x5e84, 0x75},
-       {0x5e85, 0x75},
-       {0x5e86, 0x75},
-       {0x5e87, 0x75},
-       {0x5e88, 0x75},
-       {0x5e89, 0x75},
-       {0x5e8a, 0x75},
-       {0x5e8b, 0x75},
-       {0x5e8c, 0x75},
-       {0x5e8d, 0x75},
-       {0x5e8e, 0x75},
-       {0x5e8f, 0x75},
-       {0x5e90, 0x75},
-       {0x5e91, 0x75},
-       {0x5e92, 0x75},
-       {0x5e93, 0x75},
-       {0x5e94, 0x75},
-       {0x5e95, 0x75},
-       {0x5e96, 0x75},
-       {0x5e97, 0x75},
-       {0x5e98, 0x75},
-       {0x5e99, 0x75},
-       {0x5e9a, 0x75},
-       {0x5e9b, 0x75},
-       {0x5e9c, 0x75},
-       {0x5e9d, 0x75},
-       {0x5e9e, 0x75},
-       {0x5e9f, 0x75},
-       {0x5ea0, 0x75},
-       {0x5ea1, 0x75},
-       {0x5ea2, 0x75},
-       {0x5ea3, 0x75},
-       {0x5ea4, 0x75},
-       {0x5ea5, 0x75},
-       {0x5ea6, 0x75},
-       {0x5ea7, 0x75},
-       {0x5ea8, 0x75},
-       {0x5ea9, 0x75},
-       {0x5eaa, 0x75},
-       {0x5eab, 0x75},
-       {0x5eac, 0x75},
-       {0x5ead, 0x75},
-       {0x5eae, 0x75},
-       {0x5eaf, 0x75},
-       {0x5eb0, 0x75},
-       {0x5eb1, 0x75},
-       {0x5eb2, 0x75},
-       {0x5eb3, 0x75},
-       {0x5eb4, 0x75},
-       {0x5eb5, 0x75},
-       {0x5eb6, 0x75},
-       {0x5eb7, 0x75},
-       {0x5eb8, 0x75},
-       {0x5eb9, 0x75},
-       {0x5eba, 0x75},
-       {0x5ebb, 0x75},
-       {0x5ebc, 0x75},
-       {0x5ebd, 0x75},
-       {0x5ebe, 0x75},
-       {0x5ebf, 0x75},
-       {0x5ec0, 0x75},
-       {0x5ec1, 0x75},
-       {0x5ec2, 0x75},
-       {0x5ec3, 0x75},
-       {0x5ec4, 0x75},
-       {0x5ec5, 0x75},
-       {0x5ec6, 0x75},
-       {0x5ec7, 0x75},
-       {0x5ec8, 0x75},
-       {0x5ec9, 0x75},
-       {0x5eca, 0x75},
-       {0x5ecb, 0x75},
-       {0x5ecc, 0x75},
-       {0x5ecd, 0x75},
-       {0x5ece, 0x75},
-       {0x5ecf, 0x75},
-       {0x5ed0, 0x75},
-       {0x5ed1, 0x75},
-       {0x5ed2, 0x75},
-       {0x5ed3, 0x75},
-       {0x5ed4, 0x75},
-       {0x5ed5, 0x75},
-       {0x5ed6, 0x75},
-       {0x5ed7, 0x75},
-       {0x5ed8, 0x75},
-       {0x5ed9, 0x75},
-       {0x5eda, 0x75},
-       {0x5edb, 0x75},
-       {0x5edc, 0x75},
-       {0x5edd, 0x75},
-       {0x5ede, 0x75},
-       {0x5edf, 0x75},
-       {0x5ee0, 0x75},
-       {0x5ee1, 0x75},
-       {0x5ee2, 0x75},
-       {0x5ee3, 0x75},
-       {0x5ee4, 0x75},
-       {0x5ee5, 0x75},
-       {0x5ee6, 0x75},
-       {0x5ee7, 0x75},
-       {0x5ee8, 0x75},
-       {0x5ee9, 0x75},
-       {0x5eea, 0x75},
-       {0x5eeb, 0x75},
-       {0x5eec, 0x75},
-       {0x5eed, 0x75},
-       {0x5eee, 0x75},
-       {0x5eef, 0x75},
-       {0x5ef0, 0x75},
-       {0x5ef1, 0x75},
-       {0x5ef2, 0x75},
-       {0x5ef3, 0x75},
-       {0x5ef4, 0x75},
-       {0x5ef5, 0x75},
-       {0x5ef6, 0x75},
-       {0x5ef7, 0x75},
-       {0x5ef8, 0x75},
-       {0x5ef9, 0x75},
-       {0x5efa, 0x75},
-       {0x5efb, 0x75},
-       {0x5efc, 0x75},
-       {0x5efd, 0x75},
-       {0x5efe, 0x75},
-       {0x5eff, 0x75},
-       {0x5f00, 0x75},
-       {0x5f01, 0x75},
-       {0x5f02, 0x75},
-       {0x5f03, 0x75},
-       {0x5f04, 0x75},
-       {0x5f05, 0x75},
-       {0x5f06, 0x75},
-       {0x5f07, 0x75},
-       {0x5f08, 0x75},
-       {0x5f09, 0x75},
-       {0x5f0a, 0x75},
-       {0x5f0b, 0x75},
-       {0x5f0c, 0x75},
-       {0x5f0d, 0x75},
-       {0x5f0e, 0x75},
-       {0x5f0f, 0x75},
-       {0x5f10, 0x75},
-       {0x5f11, 0x75},
-       {0x5f12, 0x75},
-       {0x5f13, 0x75},
-       {0x5f14, 0x75},
-       {0x5f15, 0x75},
-       {0x5f16, 0x75},
-       {0x5f17, 0x75},
-       {0x5f18, 0x75},
-       {0x5f19, 0x75},
-       {0x5f1a, 0x75},
-       {0x5f1b, 0x75},
-       {0x5f1c, 0x75},
-       {0x5f1d, 0x75},
-       {0x5f1e, 0x75},
-       {0x5f1f, 0x75},
 };
 
 static const struct ov08x40_reg mode_1928x1208_regs[] = {
@@ -2354,7 +1214,7 @@ static const char * const ov08x40_test_pattern_menu[] = {
 
 /* Configurations for supported link frequencies */
 #define OV08X40_LINK_FREQ_400MHZ       400000000ULL
-
+#define OV08X40_SCLK_96MHZ             96000000ULL
 #define OV08X40_EXT_CLK                        19200000
 #define OV08X40_DATA_LANES             4
 
@@ -2392,26 +1252,30 @@ static const struct ov08x40_mode supported_modes[] = {
                .height = 2416,
                .vts_def = OV08X40_VTS_30FPS,
                .vts_min = OV08X40_VTS_30FPS,
-               .hts = 640,
+               .llp = 0x10aa, /* in normal mode, tline time = 2 * HTS / SCLK */
                .lanes = 4,
                .reg_list = {
                        .num_of_regs = ARRAY_SIZE(mode_3856x2416_regs),
                        .regs = mode_3856x2416_regs,
                },
                .link_freq_index = OV08X40_LINK_FREQ_400MHZ_INDEX,
+               .exposure_shift = 1,
+               .exposure_margin = OV08X40_EXPOSURE_MAX_MARGIN,
        },
        {
                .width = 1928,
                .height = 1208,
                .vts_def = OV08X40_VTS_BIN_30FPS,
                .vts_min = OV08X40_VTS_BIN_30FPS,
-               .hts = 720,
+               .llp = 0x960,
                .lanes = 4,
                .reg_list = {
                        .num_of_regs = ARRAY_SIZE(mode_1928x1208_regs),
                        .regs = mode_1928x1208_regs,
                },
                .link_freq_index = OV08X40_LINK_FREQ_400MHZ_INDEX,
+               .exposure_shift = 0,
+               .exposure_margin = OV08X40_EXPOSURE_BIN_MAX_MARGIN,
        },
 };
 
@@ -2432,6 +1296,9 @@ struct ov08x40 {
 
        /* Mutex for serialized access */
        struct mutex mutex;
+
+       /* True if the device has been identified */
+       bool identified;
 };
 
 #define to_ov08x40(_sd)        container_of(_sd, struct ov08x40, sd)
@@ -2472,6 +1339,40 @@ static int ov08x40_read_reg(struct ov08x40 *ov08x,
        return 0;
 }
 
+static int ov08x40_burst_fill_regs(struct ov08x40 *ov08x, u16 first_reg,
+                                  u16 last_reg,  u8 val)
+{
+       struct i2c_client *client = v4l2_get_subdevdata(&ov08x->sd);
+       struct i2c_msg msgs;
+       size_t i, num_regs;
+       int ret;
+
+       num_regs = last_reg - first_reg + 1;
+       msgs.addr = client->addr;
+       msgs.flags = 0;
+       msgs.len = 2 + num_regs;
+       msgs.buf = kmalloc(msgs.len, GFP_KERNEL);
+
+       if (!msgs.buf)
+               return -ENOMEM;
+
+       put_unaligned_be16(first_reg, msgs.buf);
+
+       for (i = 0; i < num_regs; ++i)
+               msgs.buf[2 + i] = val;
+
+       ret = i2c_transfer(client->adapter, &msgs, 1);
+
+       kfree(msgs.buf);
+
+       if (ret != 1) {
+               dev_err(&client->dev, "Failed regs transferred: %d\n", ret);
+               return -EIO;
+       }
+
+       return 0;
+}
+
 /* Write registers up to 4 at a time */
 static int ov08x40_write_reg(struct ov08x40 *ov08x,
                             u16 reg, u32 len, u32 __val)
@@ -2664,13 +1565,23 @@ static int ov08x40_set_ctrl(struct v4l2_ctrl *ctrl)
                                             struct ov08x40, ctrl_handler);
        struct i2c_client *client = v4l2_get_subdevdata(&ov08x->sd);
        s64 max;
+       int exp;
+       int fll;
        int ret = 0;
 
        /* Propagate change of current control to all related controls */
        switch (ctrl->id) {
        case V4L2_CID_VBLANK:
                /* Update max exposure while meeting expected vblanking */
-               max = ov08x->cur_mode->height + ctrl->val - OV08X40_EXPOSURE_MAX_MARGIN;
+               /*
+                * because in normal mode, 1 HTS = 0.5 tline
+                * fps = sclk / hts / vts
+                * so the vts value needs to be double
+                */
+               max = ((ov08x->cur_mode->height + ctrl->val) <<
+                       ov08x->cur_mode->exposure_shift) -
+                       ov08x->cur_mode->exposure_margin;
+
                __v4l2_ctrl_modify_range(ov08x->exposure,
                                         ov08x->exposure->minimum,
                                         max, ov08x->exposure->step, max);
@@ -2694,15 +1605,20 @@ static int ov08x40_set_ctrl(struct v4l2_ctrl *ctrl)
                ret = ov08x40_update_digital_gain(ov08x, ctrl->val);
                break;
        case V4L2_CID_EXPOSURE:
+               exp = (ctrl->val << ov08x->cur_mode->exposure_shift) -
+                       ov08x->cur_mode->exposure_margin;
+
                ret = ov08x40_write_reg(ov08x, OV08X40_REG_EXPOSURE,
                                        OV08X40_REG_VALUE_24BIT,
-                                       ctrl->val);
+                                       exp);
                break;
        case V4L2_CID_VBLANK:
+               fll = ((ov08x->cur_mode->height + ctrl->val) <<
+                          ov08x->cur_mode->exposure_shift);
+
                ret = ov08x40_write_reg(ov08x, OV08X40_REG_VTS,
                                        OV08X40_REG_VALUE_16BIT,
-                                       ov08x->cur_mode->height
-                                       + ctrl->val);
+                                       fll);
                break;
        case V4L2_CID_TEST_PATTERN:
                ret = ov08x40_enable_test_pattern(ov08x, ctrl->val);
@@ -2812,6 +1728,7 @@ ov08x40_set_pad_format(struct v4l2_subdev *sd,
        s64 h_blank;
        s64 pixel_rate;
        s64 link_freq;
+       u64 steps;
 
        mutex_lock(&ov08x->mutex);
 
@@ -2839,13 +1756,22 @@ ov08x40_set_pad_format(struct v4l2_subdev *sd,
                             ov08x->cur_mode->height;
                vblank_min = ov08x->cur_mode->vts_min -
                             ov08x->cur_mode->height;
+
+               /*
+                * The frame length line should be aligned to a multiple of 4,
+                * as provided by the sensor vendor, in normal mode.
+                */
+               steps = mode->exposure_shift == 1 ? 4 : 1;
+
                __v4l2_ctrl_modify_range(ov08x->vblank, vblank_min,
                                         OV08X40_VTS_MAX
                                         - ov08x->cur_mode->height,
-                                        1,
+                                        steps,
                                         vblank_def);
                __v4l2_ctrl_s_ctrl(ov08x->vblank, vblank_def);
-               h_blank = ov08x->cur_mode->hts;
+
+               h_blank = ov08x->cur_mode->llp - ov08x->cur_mode->width;
+
                __v4l2_ctrl_modify_range(ov08x->hblank, h_blank,
                                         h_blank, 1, h_blank);
        }
@@ -2887,6 +1813,22 @@ static int ov08x40_start_streaming(struct ov08x40 *ov08x)
                return ret;
        }
 
+       /* Use i2c burst to write register on full size registers */
+       if (ov08x->cur_mode->exposure_shift == 1) {
+               ret = ov08x40_burst_fill_regs(ov08x, OV08X40_REG_XTALK_FIRST_A,
+                                             OV08X40_REG_XTALK_LAST_A, 0x75);
+               if (ret == 0)
+                       ret = ov08x40_burst_fill_regs(ov08x,
+                                                     OV08X40_REG_XTALK_FIRST_B,
+                                                     OV08X40_REG_XTALK_LAST_B,
+                                                     0x75);
+       }
+
+       if (ret) {
+               dev_err(&client->dev, "%s failed to set regs\n", __func__);
+               return ret;
+       }
+
        /* Apply customized values from user */
        ret =  __v4l2_ctrl_handler_setup(ov08x->sd.ctrl_handler);
        if (ret)
@@ -2948,6 +1890,9 @@ static int ov08x40_identify_module(struct ov08x40 *ov08x)
        int ret;
        u32 val;
 
+       if (ov08x->identified)
+               return 0;
+
        ret = ov08x40_read_reg(ov08x, OV08X40_REG_CHIP_ID,
                               OV08X40_REG_VALUE_24BIT, &val);
        if (ret)
@@ -2956,9 +1901,11 @@ static int ov08x40_identify_module(struct ov08x40 *ov08x)
        if (val != OV08X40_CHIP_ID) {
                dev_err(&client->dev, "chip id mismatch: %x!=%x\n",
                        OV08X40_CHIP_ID, val);
-               return -EIO;
+               return -ENXIO;
        }
 
+       ov08x->identified = true;
+
        return 0;
 }
 
@@ -3035,7 +1982,8 @@ static int ov08x40_init_controls(struct ov08x40 *ov08x)
                                          OV08X40_VTS_MAX - mode->height, 1,
                                          vblank_def);
 
-       hblank = ov08x->cur_mode->hts;
+       hblank = ov08x->cur_mode->llp - ov08x->cur_mode->width;
+
        ov08x->hblank = v4l2_ctrl_new_std(ctrl_hdlr, &ov08x40_ctrl_ops,
                                          V4L2_CID_HBLANK,
                                          hblank, hblank, 1, hblank);
@@ -3175,6 +2123,7 @@ static int ov08x40_probe(struct i2c_client *client)
 {
        struct ov08x40 *ov08x;
        int ret;
+       bool full_power;
 
        /* Check HW config */
        ret = ov08x40_check_hwcfg(&client->dev);
@@ -3190,11 +2139,14 @@ static int ov08x40_probe(struct i2c_client *client)
        /* Initialize subdev */
        v4l2_i2c_subdev_init(&ov08x->sd, client, &ov08x40_subdev_ops);
 
-       /* Check module identity */
-       ret = ov08x40_identify_module(ov08x);
-       if (ret) {
-               dev_err(&client->dev, "failed to find sensor: %d\n", ret);
-               return ret;
+       full_power = acpi_dev_state_d0(&client->dev);
+       if (full_power) {
+               /* Check module identity */
+               ret = ov08x40_identify_module(ov08x);
+               if (ret) {
+                       dev_err(&client->dev, "failed to find sensor: %d\n", ret);
+                       return ret;
+               }
        }
 
        /* Set default mode to max resolution */
@@ -3222,11 +2174,8 @@ static int ov08x40_probe(struct i2c_client *client)
        if (ret < 0)
                goto error_media_entity;
 
-       /*
-        * Device is already turned on by i2c-core with ACPI domain PM.
-        * Enable runtime PM and turn off the device.
-        */
-       pm_runtime_set_active(&client->dev);
+       if (full_power)
+               pm_runtime_set_active(&client->dev);
        pm_runtime_enable(&client->dev);
        pm_runtime_idle(&client->dev);
 
@@ -3270,11 +2219,13 @@ static struct i2c_driver ov08x40_i2c_driver = {
        },
        .probe = ov08x40_probe,
        .remove = ov08x40_remove,
+       .flags = I2C_DRV_ACPI_WAIVE_D0_PROBE,
 };
 
 module_i2c_driver(ov08x40_i2c_driver);
 
 MODULE_AUTHOR("Jason Chen <jason.z.chen@intel.com>");
+MODULE_AUTHOR("Qingwu Zhang <qingwu.zhang@intel.com>");
 MODULE_AUTHOR("Shawn Tu");
 MODULE_DESCRIPTION("OmniVision OV08X40 sensor driver");
 MODULE_LICENSE("GPL");
index 1d0ef72a64036b6b68c6cd71ce36e94a9aba219b..d1653d7431d0e9404c8e9c6f58414d5668e74af3 100644 (file)
@@ -1388,7 +1388,7 @@ ov2659_get_pdata(struct i2c_client *client)
        if (!IS_ENABLED(CONFIG_OF) || !client->dev.of_node)
                return client->dev.platform_data;
 
-       endpoint = of_graph_get_next_endpoint(client->dev.of_node, NULL);
+       endpoint = of_graph_get_endpoint_by_regs(client->dev.of_node, 0, -1);
        if (!endpoint)
                return NULL;
 
index a26ac11c989d784b992485ccb85580f043da7eb0..3b22b9e127873d8b3d4d5ced833547b7b27eefe0 100644 (file)
@@ -118,7 +118,6 @@ static inline struct ov5645 *to_ov5645(struct v4l2_subdev *sd)
 
 static const struct reg_value ov5645_global_init_setting[] = {
        { 0x3103, 0x11 },
-       { 0x3008, 0x82 },
        { 0x3008, 0x42 },
        { 0x3103, 0x03 },
        { 0x3503, 0x07 },
@@ -627,6 +626,10 @@ static int ov5645_set_register_array(struct ov5645 *ov5645,
                ret = ov5645_write_reg(ov5645, settings->reg, settings->val);
                if (ret < 0)
                        return ret;
+
+               if (settings->reg == OV5645_SYSTEM_CTRL0 &&
+                   settings->val == OV5645_SYSTEM_CTRL0_START)
+                       usleep_range(1000, 2000);
        }
 
        return 0;
@@ -1056,7 +1059,7 @@ static int ov5645_probe(struct i2c_client *client)
        ov5645->i2c_client = client;
        ov5645->dev = dev;
 
-       endpoint = of_graph_get_next_endpoint(dev->of_node, NULL);
+       endpoint = of_graph_get_endpoint_by_regs(dev->of_node, 0, -1);
        if (!endpoint) {
                dev_err(dev, "endpoint node not found\n");
                return -EINVAL;
index 96c0fd4ff5abfc35debdb2f43643f9f2b3f70909..7e1ecdf2485f7a6710380842bbbd861f3652790c 100644 (file)
@@ -1363,7 +1363,7 @@ static int ov5647_parse_dt(struct ov5647 *sensor, struct device_node *np)
        struct device_node *ep;
        int ret;
 
-       ep = of_graph_get_next_endpoint(np, NULL);
+       ep = of_graph_get_endpoint_by_regs(np, 0, -1);
        if (!ep)
                return -EINVAL;
 
index af8d01f78c32adea456e037aaa92564ad7dd8f22..cf6be509af33bd5df850a2da3ec1fbb7ff337fb9 100644 (file)
@@ -1568,7 +1568,7 @@ static int s5c73m3_get_dt_data(struct s5c73m3 *state)
                                     "failed to request gpio S5C73M3_RST\n");
        gpiod_set_consumer_name(state->reset, "S5C73M3_RST");
 
-       node_ep = of_graph_get_next_endpoint(node, NULL);
+       node_ep = of_graph_get_endpoint_by_regs(node, 0, -1);
        if (!node_ep) {
                dev_warn(dev, "no endpoint defined for node: %pOF\n", node);
                return 0;
index de079d2c9282b18220787bd48a3840f8631b2a57..6b11039c35798286021245122d07faa3b0f2992b 100644 (file)
@@ -1849,7 +1849,7 @@ static int s5k5baf_parse_device_node(struct s5k5baf *state, struct device *dev)
                         state->mclk_frequency);
        }
 
-       node_ep = of_graph_get_next_endpoint(node, NULL);
+       node_ep = of_graph_get_endpoint_by_regs(node, 0, -1);
        if (!node_ep) {
                dev_err(dev, "no endpoint defined at node %pOF\n", node);
                return -EINVAL;
index e4d37a19772406c72ac9536cecebbdf75fe9b024..b9e7c57027b1b9d31d86f9df5c8b836b94ece6a4 100644 (file)
@@ -12,6 +12,7 @@
 #include <linux/iopoll.h>
 #include <linux/module.h>
 #include <linux/pm_runtime.h>
+#include <linux/regmap.h>
 #include <linux/regulator/consumer.h>
 #include <linux/units.h>
 
 
 #include <media/mipi-csi2.h>
 #include <media/v4l2-async.h>
+#include <media/v4l2-cci.h>
 #include <media/v4l2-ctrls.h>
 #include <media/v4l2-device.h>
 #include <media/v4l2-event.h>
 #include <media/v4l2-fwnode.h>
 #include <media/v4l2-subdev.h>
 
-#define VGXY61_REG_8BIT(n)                             ((1 << 16) | (n))
-#define VGXY61_REG_16BIT(n)                            ((2 << 16) | (n))
-#define VGXY61_REG_32BIT(n)                            ((4 << 16) | (n))
-#define VGXY61_REG_SIZE_SHIFT                          16
-#define VGXY61_REG_ADDR_MASK                           0xffff
-
-#define VGXY61_REG_MODEL_ID                            VGXY61_REG_16BIT(0x0000)
+#define VGXY61_REG_MODEL_ID                            CCI_REG16_LE(0x0000)
 #define VG5661_MODEL_ID                                        0x5661
 #define VG5761_MODEL_ID                                        0x5761
-#define VGXY61_REG_REVISION                            VGXY61_REG_16BIT(0x0002)
-#define VGXY61_REG_FWPATCH_REVISION                    VGXY61_REG_16BIT(0x0014)
-#define VGXY61_REG_FWPATCH_START_ADDR                  VGXY61_REG_8BIT(0x2000)
-#define VGXY61_REG_SYSTEM_FSM                          VGXY61_REG_8BIT(0x0020)
+#define VGXY61_REG_REVISION                            CCI_REG16_LE(0x0002)
+#define VGXY61_REG_FWPATCH_REVISION                    CCI_REG16_LE(0x0014)
+#define VGXY61_REG_FWPATCH_START_ADDR                  CCI_REG8(0x2000)
+#define VGXY61_REG_SYSTEM_FSM                          CCI_REG8(0x0020)
 #define VGXY61_SYSTEM_FSM_SW_STBY                      0x03
 #define VGXY61_SYSTEM_FSM_STREAMING                    0x04
-#define VGXY61_REG_NVM                                 VGXY61_REG_8BIT(0x0023)
+#define VGXY61_REG_NVM                                 CCI_REG8(0x0023)
 #define VGXY61_NVM_OK                                  0x04
-#define VGXY61_REG_STBY                                        VGXY61_REG_8BIT(0x0201)
+#define VGXY61_REG_STBY                                        CCI_REG8(0x0201)
 #define VGXY61_STBY_NO_REQ                             0
 #define VGXY61_STBY_REQ_TMP_READ                       BIT(2)
-#define VGXY61_REG_STREAMING                           VGXY61_REG_8BIT(0x0202)
+#define VGXY61_REG_STREAMING                           CCI_REG8(0x0202)
 #define VGXY61_STREAMING_NO_REQ                                0
 #define VGXY61_STREAMING_REQ_STOP                      BIT(0)
 #define VGXY61_STREAMING_REQ_START                     BIT(1)
-#define VGXY61_REG_EXT_CLOCK                           VGXY61_REG_32BIT(0x0220)
-#define VGXY61_REG_CLK_PLL_PREDIV                      VGXY61_REG_8BIT(0x0224)
-#define VGXY61_REG_CLK_SYS_PLL_MULT                    VGXY61_REG_8BIT(0x0225)
-#define VGXY61_REG_GPIO_0_CTRL                         VGXY61_REG_8BIT(0x0236)
-#define VGXY61_REG_GPIO_1_CTRL                         VGXY61_REG_8BIT(0x0237)
-#define VGXY61_REG_GPIO_2_CTRL                         VGXY61_REG_8BIT(0x0238)
-#define VGXY61_REG_GPIO_3_CTRL                         VGXY61_REG_8BIT(0x0239)
-#define VGXY61_REG_SIGNALS_POLARITY_CTRL               VGXY61_REG_8BIT(0x023b)
-#define VGXY61_REG_LINE_LENGTH                         VGXY61_REG_16BIT(0x0300)
-#define VGXY61_REG_ORIENTATION                         VGXY61_REG_8BIT(0x0302)
-#define VGXY61_REG_VT_CTRL                             VGXY61_REG_8BIT(0x0304)
-#define VGXY61_REG_FORMAT_CTRL                         VGXY61_REG_8BIT(0x0305)
-#define VGXY61_REG_OIF_CTRL                            VGXY61_REG_16BIT(0x0306)
-#define VGXY61_REG_OIF_ROI0_CTRL                       VGXY61_REG_8BIT(0x030a)
-#define VGXY61_REG_ROI0_START_H                                VGXY61_REG_16BIT(0x0400)
-#define VGXY61_REG_ROI0_START_V                                VGXY61_REG_16BIT(0x0402)
-#define VGXY61_REG_ROI0_END_H                          VGXY61_REG_16BIT(0x0404)
-#define VGXY61_REG_ROI0_END_V                          VGXY61_REG_16BIT(0x0406)
-#define VGXY61_REG_PATGEN_CTRL                         VGXY61_REG_32BIT(0x0440)
+#define VGXY61_REG_EXT_CLOCK                           CCI_REG32_LE(0x0220)
+#define VGXY61_REG_CLK_PLL_PREDIV                      CCI_REG8(0x0224)
+#define VGXY61_REG_CLK_SYS_PLL_MULT                    CCI_REG8(0x0225)
+#define VGXY61_REG_GPIO_0_CTRL                         CCI_REG8(0x0236)
+#define VGXY61_REG_GPIO_1_CTRL                         CCI_REG8(0x0237)
+#define VGXY61_REG_GPIO_2_CTRL                         CCI_REG8(0x0238)
+#define VGXY61_REG_GPIO_3_CTRL                         CCI_REG8(0x0239)
+#define VGXY61_REG_SIGNALS_POLARITY_CTRL               CCI_REG8(0x023b)
+#define VGXY61_REG_LINE_LENGTH                         CCI_REG16_LE(0x0300)
+#define VGXY61_REG_ORIENTATION                         CCI_REG8(0x0302)
+#define VGXY61_REG_VT_CTRL                             CCI_REG8(0x0304)
+#define VGXY61_REG_FORMAT_CTRL                         CCI_REG8(0x0305)
+#define VGXY61_REG_OIF_CTRL                            CCI_REG16_LE(0x0306)
+#define VGXY61_REG_OIF_ROI0_CTRL                       CCI_REG8(0x030a)
+#define VGXY61_REG_ROI0_START_H                                CCI_REG16_LE(0x0400)
+#define VGXY61_REG_ROI0_START_V                                CCI_REG16_LE(0x0402)
+#define VGXY61_REG_ROI0_END_H                          CCI_REG16_LE(0x0404)
+#define VGXY61_REG_ROI0_END_V                          CCI_REG16_LE(0x0406)
+#define VGXY61_REG_PATGEN_CTRL                         CCI_REG32_LE(0x0440)
 #define VGXY61_PATGEN_LONG_ENABLE                      BIT(16)
 #define VGXY61_PATGEN_SHORT_ENABLE                     BIT(0)
 #define VGXY61_PATGEN_LONG_TYPE_SHIFT                  18
 #define VGXY61_PATGEN_SHORT_TYPE_SHIFT                 4
-#define VGXY61_REG_FRAME_CONTENT_CTRL                  VGXY61_REG_8BIT(0x0478)
-#define VGXY61_REG_COARSE_EXPOSURE_LONG                        VGXY61_REG_16BIT(0x0500)
-#define VGXY61_REG_COARSE_EXPOSURE_SHORT               VGXY61_REG_16BIT(0x0504)
-#define VGXY61_REG_ANALOG_GAIN                         VGXY61_REG_8BIT(0x0508)
-#define VGXY61_REG_DIGITAL_GAIN_LONG                   VGXY61_REG_16BIT(0x050a)
-#define VGXY61_REG_DIGITAL_GAIN_SHORT                  VGXY61_REG_16BIT(0x0512)
-#define VGXY61_REG_FRAME_LENGTH                                VGXY61_REG_16BIT(0x051a)
-#define VGXY61_REG_SIGNALS_CTRL                                VGXY61_REG_16BIT(0x0522)
+#define VGXY61_REG_FRAME_CONTENT_CTRL                  CCI_REG8(0x0478)
+#define VGXY61_REG_COARSE_EXPOSURE_LONG                        CCI_REG16_LE(0x0500)
+#define VGXY61_REG_COARSE_EXPOSURE_SHORT               CCI_REG16_LE(0x0504)
+#define VGXY61_REG_ANALOG_GAIN                         CCI_REG8(0x0508)
+#define VGXY61_REG_DIGITAL_GAIN_LONG                   CCI_REG16_LE(0x050a)
+#define VGXY61_REG_DIGITAL_GAIN_SHORT                  CCI_REG16_LE(0x0512)
+#define VGXY61_REG_FRAME_LENGTH                                CCI_REG16_LE(0x051a)
+#define VGXY61_REG_SIGNALS_CTRL                                CCI_REG16_LE(0x0522)
 #define VGXY61_SIGNALS_GPIO_ID_SHIFT                   4
-#define VGXY61_REG_READOUT_CTRL                                VGXY61_REG_8BIT(0x0530)
-#define VGXY61_REG_HDR_CTRL                            VGXY61_REG_8BIT(0x0532)
-#define VGXY61_REG_PATGEN_LONG_DATA_GR                 VGXY61_REG_16BIT(0x092c)
-#define VGXY61_REG_PATGEN_LONG_DATA_R                  VGXY61_REG_16BIT(0x092e)
-#define VGXY61_REG_PATGEN_LONG_DATA_B                  VGXY61_REG_16BIT(0x0930)
-#define VGXY61_REG_PATGEN_LONG_DATA_GB                 VGXY61_REG_16BIT(0x0932)
-#define VGXY61_REG_PATGEN_SHORT_DATA_GR                        VGXY61_REG_16BIT(0x0950)
-#define VGXY61_REG_PATGEN_SHORT_DATA_R                 VGXY61_REG_16BIT(0x0952)
-#define VGXY61_REG_PATGEN_SHORT_DATA_B                 VGXY61_REG_16BIT(0x0954)
-#define VGXY61_REG_PATGEN_SHORT_DATA_GB                        VGXY61_REG_16BIT(0x0956)
-#define VGXY61_REG_BYPASS_CTRL                         VGXY61_REG_8BIT(0x0a60)
+#define VGXY61_REG_READOUT_CTRL                                CCI_REG8(0x0530)
+#define VGXY61_REG_HDR_CTRL                            CCI_REG8(0x0532)
+#define VGXY61_REG_PATGEN_LONG_DATA_GR                 CCI_REG16_LE(0x092c)
+#define VGXY61_REG_PATGEN_LONG_DATA_R                  CCI_REG16_LE(0x092e)
+#define VGXY61_REG_PATGEN_LONG_DATA_B                  CCI_REG16_LE(0x0930)
+#define VGXY61_REG_PATGEN_LONG_DATA_GB                 CCI_REG16_LE(0x0932)
+#define VGXY61_REG_PATGEN_SHORT_DATA_GR                        CCI_REG16_LE(0x0950)
+#define VGXY61_REG_PATGEN_SHORT_DATA_R                 CCI_REG16_LE(0x0952)
+#define VGXY61_REG_PATGEN_SHORT_DATA_B                 CCI_REG16_LE(0x0954)
+#define VGXY61_REG_PATGEN_SHORT_DATA_GB                        CCI_REG16_LE(0x0956)
+#define VGXY61_REG_BYPASS_CTRL                         CCI_REG8(0x0a60)
 
 #define VGX661_WIDTH                                   1464
 #define VGX661_HEIGHT                                  1104
@@ -384,6 +380,7 @@ static const struct vgxy61_mode_info vgx761_mode_data[] = {
 
 struct vgxy61_dev {
        struct i2c_client *i2c_client;
+       struct regmap *regmap;
        struct v4l2_subdev sd;
        struct media_pad pad;
        struct regulator_bulk_data supplies[ARRAY_SIZE(vgxy61_supply_name)];
@@ -510,82 +507,6 @@ static unsigned int get_chunk_size(struct vgxy61_dev *sensor)
        return max(max_write_len, 1);
 }
 
-static int vgxy61_read_multiple(struct vgxy61_dev *sensor, u32 reg,
-                               unsigned int len)
-{
-       struct i2c_client *client = sensor->i2c_client;
-       struct i2c_msg msg[2];
-       u8 buf[2];
-       u8 val[sizeof(u32)] = {0};
-       int ret;
-
-       if (len > sizeof(u32))
-               return -EINVAL;
-       buf[0] = reg >> 8;
-       buf[1] = reg & 0xff;
-
-       msg[0].addr = client->addr;
-       msg[0].flags = client->flags;
-       msg[0].buf = buf;
-       msg[0].len = sizeof(buf);
-
-       msg[1].addr = client->addr;
-       msg[1].flags = client->flags | I2C_M_RD;
-       msg[1].buf = val;
-       msg[1].len = len;
-
-       ret = i2c_transfer(client->adapter, msg, 2);
-       if (ret < 0) {
-               dev_dbg(&client->dev, "%s: %x i2c_transfer, reg: %x => %d\n",
-                       __func__, client->addr, reg, ret);
-               return ret;
-       }
-
-       return get_unaligned_le32(val);
-}
-
-static inline int vgxy61_read_reg(struct vgxy61_dev *sensor, u32 reg)
-{
-       return vgxy61_read_multiple(sensor, reg & VGXY61_REG_ADDR_MASK,
-                                    (reg >> VGXY61_REG_SIZE_SHIFT) & 7);
-}
-
-static int vgxy61_write_multiple(struct vgxy61_dev *sensor, u32 reg,
-                                const u8 *data, unsigned int len, int *err)
-{
-       struct i2c_client *client = sensor->i2c_client;
-       struct i2c_msg msg;
-       u8 buf[VGXY61_WRITE_MULTIPLE_CHUNK_MAX + 2];
-       unsigned int i;
-       int ret;
-
-       if (err && *err)
-               return *err;
-
-       if (len > VGXY61_WRITE_MULTIPLE_CHUNK_MAX)
-               return -EINVAL;
-       buf[0] = reg >> 8;
-       buf[1] = reg & 0xff;
-       for (i = 0; i < len; i++)
-               buf[i + 2] = data[i];
-
-       msg.addr = client->addr;
-       msg.flags = client->flags;
-       msg.buf = buf;
-       msg.len = len + 2;
-
-       ret = i2c_transfer(client->adapter, &msg, 1);
-       if (ret < 0) {
-               dev_dbg(&client->dev, "%s: i2c_transfer, reg: %x => %d\n",
-                       __func__, reg, ret);
-               if (err)
-                       *err = ret;
-               return ret;
-       }
-
-       return 0;
-}
-
 static int vgxy61_write_array(struct vgxy61_dev *sensor, u32 reg,
                              unsigned int nb, const u8 *array)
 {
@@ -595,7 +516,8 @@ static int vgxy61_write_array(struct vgxy61_dev *sensor, u32 reg,
 
        while (nb) {
                sz = min(nb, chunk_size);
-               ret = vgxy61_write_multiple(sensor, reg, array, sz, NULL);
+               ret = regmap_bulk_write(sensor->regmap, CCI_REG_ADDR(reg),
+                                       array, sz);
                if (ret < 0)
                        return ret;
                nb -= sz;
@@ -606,24 +528,17 @@ static int vgxy61_write_array(struct vgxy61_dev *sensor, u32 reg,
        return 0;
 }
 
-static inline int vgxy61_write_reg(struct vgxy61_dev *sensor, u32 reg, u32 val,
-                                  int *err)
-{
-       return vgxy61_write_multiple(sensor, reg & VGXY61_REG_ADDR_MASK,
-                                    (u8 *)&val,
-                                    (reg >> VGXY61_REG_SIZE_SHIFT) & 7, err);
-}
-
 static int vgxy61_poll_reg(struct vgxy61_dev *sensor, u32 reg, u8 poll_val,
                           unsigned int timeout_ms)
 {
        const unsigned int loop_delay_ms = 10;
+       u64 val;
        int ret;
 
-       return read_poll_timeout(vgxy61_read_reg, ret,
-                                ((ret < 0) || (ret == poll_val)),
+       return read_poll_timeout(cci_read, ret,
+                                ((ret < 0) || (val == poll_val)),
                                 loop_delay_ms * 1000, timeout_ms * 1000,
-                                false, sensor, reg);
+                                false, sensor->regmap, reg, &val, NULL);
 }
 
 static int vgxy61_wait_state(struct vgxy61_dev *sensor, int state,
@@ -662,11 +577,11 @@ static int vgxy61_apply_exposure(struct vgxy61_dev *sensor)
        int ret = 0;
 
         /* We first set expo to zero to avoid forbidden parameters couple */
-       vgxy61_write_reg(sensor, VGXY61_REG_COARSE_EXPOSURE_SHORT, 0, &ret);
-       vgxy61_write_reg(sensor, VGXY61_REG_COARSE_EXPOSURE_LONG,
-                        sensor->expo_long, &ret);
-       vgxy61_write_reg(sensor, VGXY61_REG_COARSE_EXPOSURE_SHORT,
-                        sensor->expo_short, &ret);
+       cci_write(sensor->regmap, VGXY61_REG_COARSE_EXPOSURE_SHORT, 0, &ret);
+       cci_write(sensor->regmap, VGXY61_REG_COARSE_EXPOSURE_LONG,
+                 sensor->expo_long, &ret);
+       cci_write(sensor->regmap, VGXY61_REG_COARSE_EXPOSURE_SHORT,
+                 sensor->expo_short, &ret);
 
        return ret;
 }
@@ -714,7 +629,7 @@ static int vgxy61_try_fmt_internal(struct v4l2_subdev *sd,
                                   const struct vgxy61_mode_info **new_mode)
 {
        struct vgxy61_dev *sensor = to_vgxy61_dev(sd);
-       const struct vgxy61_mode_info *mode = sensor->sensor_modes;
+       const struct vgxy61_mode_info *mode;
        unsigned int index;
 
        for (index = 0; index < ARRAY_SIZE(vgxy61_supported_codes); index++) {
@@ -827,8 +742,8 @@ static int vgxy61_update_analog_gain(struct vgxy61_dev *sensor, u32 target)
        sensor->analog_gain = target;
 
        if (sensor->streaming)
-               return vgxy61_write_reg(sensor, VGXY61_REG_ANALOG_GAIN, target,
-                                       NULL);
+               return cci_write(sensor->regmap, VGXY61_REG_ANALOG_GAIN, target,
+                                NULL);
        return 0;
 }
 
@@ -842,10 +757,10 @@ static int vgxy61_apply_digital_gain(struct vgxy61_dev *sensor,
         * DIGITAL_GAIN_SHORT_CH0 is enough to configure the gain of all
         * four sub pixels.
         */
-       vgxy61_write_reg(sensor, VGXY61_REG_DIGITAL_GAIN_LONG, digital_gain,
-                        &ret);
-       vgxy61_write_reg(sensor, VGXY61_REG_DIGITAL_GAIN_SHORT, digital_gain,
-                        &ret);
+       cci_write(sensor->regmap, VGXY61_REG_DIGITAL_GAIN_LONG, digital_gain,
+                 &ret);
+       cci_write(sensor->regmap, VGXY61_REG_DIGITAL_GAIN_SHORT, digital_gain,
+                 &ret);
 
        return ret;
 }
@@ -870,7 +785,7 @@ static int vgxy61_apply_patgen(struct vgxy61_dev *sensor, u32 index)
 
        if (pattern)
                reg |= VGXY61_PATGEN_LONG_ENABLE | VGXY61_PATGEN_SHORT_ENABLE;
-       return vgxy61_write_reg(sensor, VGXY61_REG_PATGEN_CTRL, reg, NULL);
+       return cci_write(sensor->regmap, VGXY61_REG_PATGEN_CTRL, reg, NULL);
 }
 
 static int vgxy61_update_patgen(struct vgxy61_dev *sensor, u32 pattern)
@@ -887,15 +802,13 @@ static int vgxy61_apply_gpiox_strobe_mode(struct vgxy61_dev *sensor,
                                          unsigned int idx)
 {
        static const u8 index2val[] = {0x0, 0x1, 0x3};
-       int reg;
+       u16 mask, val;
 
-       reg = vgxy61_read_reg(sensor, VGXY61_REG_SIGNALS_CTRL);
-       if (reg < 0)
-               return reg;
-       reg &= ~(0xf << (idx * VGXY61_SIGNALS_GPIO_ID_SHIFT));
-       reg |= index2val[mode] << (idx * VGXY61_SIGNALS_GPIO_ID_SHIFT);
+       mask = 0xf << (idx * VGXY61_SIGNALS_GPIO_ID_SHIFT);
+       val = index2val[mode] << (idx * VGXY61_SIGNALS_GPIO_ID_SHIFT);
 
-       return vgxy61_write_reg(sensor, VGXY61_REG_SIGNALS_CTRL, reg, NULL);
+       return cci_update_bits(sensor->regmap, VGXY61_REG_SIGNALS_CTRL,
+                              mask, val, NULL);
 }
 
 static int vgxy61_update_gpios_strobe_mode(struct vgxy61_dev *sensor,
@@ -940,12 +853,12 @@ static int vgxy61_update_gpios_strobe_polarity(struct vgxy61_dev *sensor,
        if (sensor->streaming)
                return -EBUSY;
 
-       vgxy61_write_reg(sensor, VGXY61_REG_GPIO_0_CTRL, polarity << 1, &ret);
-       vgxy61_write_reg(sensor, VGXY61_REG_GPIO_1_CTRL, polarity << 1, &ret);
-       vgxy61_write_reg(sensor, VGXY61_REG_GPIO_2_CTRL, polarity << 1, &ret);
-       vgxy61_write_reg(sensor, VGXY61_REG_GPIO_3_CTRL, polarity << 1, &ret);
-       vgxy61_write_reg(sensor, VGXY61_REG_SIGNALS_POLARITY_CTRL, polarity,
-                        &ret);
+       cci_write(sensor->regmap, VGXY61_REG_GPIO_0_CTRL, polarity << 1, &ret);
+       cci_write(sensor->regmap, VGXY61_REG_GPIO_1_CTRL, polarity << 1, &ret);
+       cci_write(sensor->regmap, VGXY61_REG_GPIO_2_CTRL, polarity << 1, &ret);
+       cci_write(sensor->regmap, VGXY61_REG_GPIO_3_CTRL, polarity << 1, &ret);
+       cci_write(sensor->regmap, VGXY61_REG_SIGNALS_POLARITY_CTRL, polarity,
+                 &ret);
 
        return ret;
 }
@@ -1057,8 +970,8 @@ static int vgxy61_update_exposure(struct vgxy61_dev *sensor, u16 new_expo_long,
 
 static int vgxy61_apply_framelength(struct vgxy61_dev *sensor)
 {
-       return vgxy61_write_reg(sensor, VGXY61_REG_FRAME_LENGTH,
-                               sensor->frame_length, NULL);
+       return cci_write(sensor->regmap, VGXY61_REG_FRAME_LENGTH,
+                        sensor->frame_length, NULL);
 }
 
 static int vgxy61_update_vblank(struct vgxy61_dev *sensor, u16 vblank,
@@ -1086,8 +999,8 @@ static int vgxy61_apply_hdr(struct vgxy61_dev *sensor,
 {
        static const u8 index2val[] = {0x1, 0x4, 0xa};
 
-       return vgxy61_write_reg(sensor, VGXY61_REG_HDR_CTRL, index2val[index],
-                               NULL);
+       return cci_write(sensor->regmap, VGXY61_REG_HDR_CTRL, index2val[index],
+                        NULL);
 }
 
 static int vgxy61_update_hdr(struct vgxy61_dev *sensor,
@@ -1133,16 +1046,16 @@ static int vgxy61_apply_settings(struct vgxy61_dev *sensor)
        if (ret)
                return ret;
 
-       ret = vgxy61_write_reg(sensor, VGXY61_REG_ANALOG_GAIN,
-                              sensor->analog_gain, NULL);
+       ret = cci_write(sensor->regmap, VGXY61_REG_ANALOG_GAIN,
+                       sensor->analog_gain, NULL);
        if (ret)
                return ret;
        ret = vgxy61_apply_digital_gain(sensor, sensor->digital_gain);
        if (ret)
                return ret;
 
-       ret = vgxy61_write_reg(sensor, VGXY61_REG_ORIENTATION,
-                              sensor->hflip | (sensor->vflip << 1), NULL);
+       ret = cci_write(sensor->regmap, VGXY61_REG_ORIENTATION,
+                       sensor->hflip | (sensor->vflip << 1), NULL);
        if (ret)
                return ret;
 
@@ -1174,19 +1087,19 @@ static int vgxy61_stream_enable(struct vgxy61_dev *sensor)
        if (ret)
                return ret;
 
-       vgxy61_write_reg(sensor, VGXY61_REG_FORMAT_CTRL,
-                        get_bpp_by_code(sensor->fmt.code), &ret);
-       vgxy61_write_reg(sensor, VGXY61_REG_OIF_ROI0_CTRL,
-                        get_data_type_by_code(sensor->fmt.code), &ret);
-
-       vgxy61_write_reg(sensor, VGXY61_REG_READOUT_CTRL,
-                        sensor->current_mode->bin_mode, &ret);
-       vgxy61_write_reg(sensor, VGXY61_REG_ROI0_START_H, crop->left, &ret);
-       vgxy61_write_reg(sensor, VGXY61_REG_ROI0_END_H,
-                        crop->left + crop->width - 1, &ret);
-       vgxy61_write_reg(sensor, VGXY61_REG_ROI0_START_V, crop->top, &ret);
-       vgxy61_write_reg(sensor, VGXY61_REG_ROI0_END_V,
-                        crop->top + crop->height - 1, &ret);
+       cci_write(sensor->regmap, VGXY61_REG_FORMAT_CTRL,
+                 get_bpp_by_code(sensor->fmt.code), &ret);
+       cci_write(sensor->regmap, VGXY61_REG_OIF_ROI0_CTRL,
+                 get_data_type_by_code(sensor->fmt.code), &ret);
+
+       cci_write(sensor->regmap, VGXY61_REG_READOUT_CTRL,
+                 sensor->current_mode->bin_mode, &ret);
+       cci_write(sensor->regmap, VGXY61_REG_ROI0_START_H, crop->left, &ret);
+       cci_write(sensor->regmap, VGXY61_REG_ROI0_END_H,
+                 crop->left + crop->width - 1, &ret);
+       cci_write(sensor->regmap, VGXY61_REG_ROI0_START_V, crop->top, &ret);
+       cci_write(sensor->regmap, VGXY61_REG_ROI0_END_V,
+                 crop->top + crop->height - 1, &ret);
        if (ret)
                goto err_rpm_put;
 
@@ -1194,8 +1107,8 @@ static int vgxy61_stream_enable(struct vgxy61_dev *sensor)
        if (ret)
                goto err_rpm_put;
 
-       ret = vgxy61_write_reg(sensor, VGXY61_REG_STREAMING,
-                              VGXY61_STREAMING_REQ_START, NULL);
+       ret = cci_write(sensor->regmap, VGXY61_REG_STREAMING,
+                       VGXY61_STREAMING_REQ_START, NULL);
        if (ret)
                goto err_rpm_put;
 
@@ -1225,8 +1138,8 @@ static int vgxy61_stream_disable(struct vgxy61_dev *sensor)
        struct i2c_client *client = v4l2_get_subdevdata(&sensor->sd);
        int ret;
 
-       ret = vgxy61_write_reg(sensor, VGXY61_REG_STREAMING,
-                              VGXY61_STREAMING_REQ_STOP, NULL);
+       ret = cci_write(sensor->regmap, VGXY61_REG_STREAMING,
+                       VGXY61_STREAMING_REQ_STOP, NULL);
        if (ret)
                goto err_str_dis;
 
@@ -1582,7 +1495,7 @@ static int vgxy61_configure(struct vgxy61_dev *sensor)
 {
        u32 sensor_freq;
        u8 prediv, mult;
-       int line_length;
+       u64 line_length;
        int ret = 0;
 
        compute_pll_parameters_by_freq(sensor->clk_freq, &prediv, &mult);
@@ -1592,28 +1505,28 @@ static int vgxy61_configure(struct vgxy61_dev *sensor)
        /* Video timing ISP path (pixel clock)  requires 804/5 mhz = 160 mhz */
        sensor->pclk = sensor_freq / 5;
 
-       line_length = vgxy61_read_reg(sensor, VGXY61_REG_LINE_LENGTH);
-       if (line_length < 0)
-               return line_length;
-       sensor->line_length = line_length;
-       vgxy61_write_reg(sensor, VGXY61_REG_EXT_CLOCK, sensor->clk_freq, &ret);
-       vgxy61_write_reg(sensor, VGXY61_REG_CLK_PLL_PREDIV, prediv, &ret);
-       vgxy61_write_reg(sensor, VGXY61_REG_CLK_SYS_PLL_MULT, mult, &ret);
-       vgxy61_write_reg(sensor, VGXY61_REG_OIF_CTRL, sensor->oif_ctrl, &ret);
-       vgxy61_write_reg(sensor, VGXY61_REG_FRAME_CONTENT_CTRL, 0, &ret);
-       vgxy61_write_reg(sensor, VGXY61_REG_BYPASS_CTRL, 4, &ret);
+       cci_read(sensor->regmap, VGXY61_REG_LINE_LENGTH, &line_length, &ret);
+       if (ret < 0)
+               return ret;
+       sensor->line_length = (u16)line_length;
+       cci_write(sensor->regmap, VGXY61_REG_EXT_CLOCK, sensor->clk_freq, &ret);
+       cci_write(sensor->regmap, VGXY61_REG_CLK_PLL_PREDIV, prediv, &ret);
+       cci_write(sensor->regmap, VGXY61_REG_CLK_SYS_PLL_MULT, mult, &ret);
+       cci_write(sensor->regmap, VGXY61_REG_OIF_CTRL, sensor->oif_ctrl, &ret);
+       cci_write(sensor->regmap, VGXY61_REG_FRAME_CONTENT_CTRL, 0, &ret);
+       cci_write(sensor->regmap, VGXY61_REG_BYPASS_CTRL, 4, &ret);
        if (ret)
                return ret;
        vgxy61_update_gpios_strobe_polarity(sensor, sensor->gpios_polarity);
        /* Set pattern generator solid to middle value */
-       vgxy61_write_reg(sensor, VGXY61_REG_PATGEN_LONG_DATA_GR, 0x800, &ret);
-       vgxy61_write_reg(sensor, VGXY61_REG_PATGEN_LONG_DATA_R, 0x800, &ret);
-       vgxy61_write_reg(sensor, VGXY61_REG_PATGEN_LONG_DATA_B, 0x800, &ret);
-       vgxy61_write_reg(sensor, VGXY61_REG_PATGEN_LONG_DATA_GB, 0x800, &ret);
-       vgxy61_write_reg(sensor, VGXY61_REG_PATGEN_SHORT_DATA_GR, 0x800, &ret);
-       vgxy61_write_reg(sensor, VGXY61_REG_PATGEN_SHORT_DATA_R, 0x800, &ret);
-       vgxy61_write_reg(sensor, VGXY61_REG_PATGEN_SHORT_DATA_B, 0x800, &ret);
-       vgxy61_write_reg(sensor, VGXY61_REG_PATGEN_SHORT_DATA_GB, 0x800, &ret);
+       cci_write(sensor->regmap, VGXY61_REG_PATGEN_LONG_DATA_GR, 0x800, &ret);
+       cci_write(sensor->regmap, VGXY61_REG_PATGEN_LONG_DATA_R, 0x800, &ret);
+       cci_write(sensor->regmap, VGXY61_REG_PATGEN_LONG_DATA_B, 0x800, &ret);
+       cci_write(sensor->regmap, VGXY61_REG_PATGEN_LONG_DATA_GB, 0x800, &ret);
+       cci_write(sensor->regmap, VGXY61_REG_PATGEN_SHORT_DATA_GR, 0x800, &ret);
+       cci_write(sensor->regmap, VGXY61_REG_PATGEN_SHORT_DATA_R, 0x800, &ret);
+       cci_write(sensor->regmap, VGXY61_REG_PATGEN_SHORT_DATA_B, 0x800, &ret);
+       cci_write(sensor->regmap, VGXY61_REG_PATGEN_SHORT_DATA_GB, 0x800, &ret);
        if (ret)
                return ret;
 
@@ -1623,37 +1536,33 @@ static int vgxy61_configure(struct vgxy61_dev *sensor)
 static int vgxy61_patch(struct vgxy61_dev *sensor)
 {
        struct i2c_client *client = sensor->i2c_client;
-       int patch, ret;
+       u64 patch;
+       int ret;
 
        ret = vgxy61_write_array(sensor, VGXY61_REG_FWPATCH_START_ADDR,
                                 sizeof(patch_array), patch_array);
-       if (ret)
-               return ret;
-
-       ret = vgxy61_write_reg(sensor, VGXY61_REG_STBY, 0x10, NULL);
+       cci_write(sensor->regmap, VGXY61_REG_STBY, 0x10, &ret);
        if (ret)
                return ret;
 
        ret = vgxy61_poll_reg(sensor, VGXY61_REG_STBY, 0, VGXY61_TIMEOUT_MS);
-       if (ret)
+       cci_read(sensor->regmap, VGXY61_REG_FWPATCH_REVISION, &patch, &ret);
+       if (ret < 0)
                return ret;
 
-       patch = vgxy61_read_reg(sensor, VGXY61_REG_FWPATCH_REVISION);
-       if (patch < 0)
-               return patch;
-
        if (patch != (VGXY61_FWPATCH_REVISION_MAJOR << 12) +
                     (VGXY61_FWPATCH_REVISION_MINOR << 8) +
                     VGXY61_FWPATCH_REVISION_MICRO) {
-               dev_err(&client->dev, "bad patch version expected %d.%d.%d got %d.%d.%d\n",
+               dev_err(&client->dev,
+                       "bad patch version expected %d.%d.%d got %u.%u.%u\n",
                        VGXY61_FWPATCH_REVISION_MAJOR,
                        VGXY61_FWPATCH_REVISION_MINOR,
                        VGXY61_FWPATCH_REVISION_MICRO,
-                       patch >> 12, (patch >> 8) & 0x0f, patch & 0xff);
+                       (u16)patch >> 12, ((u16)patch >> 8) & 0x0f, (u16)patch & 0xff);
                return -ENODEV;
        }
-       dev_dbg(&client->dev, "patch %d.%d.%d applied\n",
-               patch >> 12, (patch >> 8) & 0x0f, patch & 0xff);
+       dev_dbg(&client->dev, "patch %u.%u.%u applied\n",
+               (u16)patch >> 12, ((u16)patch >> 8) & 0x0f, (u16)patch & 0xff);
 
        return 0;
 }
@@ -1661,11 +1570,12 @@ static int vgxy61_patch(struct vgxy61_dev *sensor)
 static int vgxy61_detect_cut_version(struct vgxy61_dev *sensor)
 {
        struct i2c_client *client = sensor->i2c_client;
-       int device_rev;
+       u64 device_rev;
+       int ret;
 
-       device_rev = vgxy61_read_reg(sensor, VGXY61_REG_REVISION);
-       if (device_rev < 0)
-               return device_rev;
+       ret = cci_read(sensor->regmap, VGXY61_REG_REVISION, &device_rev, NULL);
+       if (ret < 0)
+               return ret;
 
        switch (device_rev >> 8) {
        case 0xA:
@@ -1687,17 +1597,17 @@ static int vgxy61_detect_cut_version(struct vgxy61_dev *sensor)
 static int vgxy61_detect(struct vgxy61_dev *sensor)
 {
        struct i2c_client *client = sensor->i2c_client;
-       int id = 0;
-       int ret, st;
+       u64 st, id = 0;
+       int ret;
 
-       id = vgxy61_read_reg(sensor, VGXY61_REG_MODEL_ID);
-       if (id < 0)
-               return id;
+       ret = cci_read(sensor->regmap, VGXY61_REG_MODEL_ID, &id, NULL);
+       if (ret < 0)
+               return ret;
        if (id != VG5661_MODEL_ID && id != VG5761_MODEL_ID) {
-               dev_warn(&client->dev, "Unsupported sensor id %x\n", id);
+               dev_warn(&client->dev, "Unsupported sensor id %x\n", (u16)id);
                return -ENODEV;
        }
-       dev_dbg(&client->dev, "detected sensor id = 0x%04x\n", id);
+       dev_dbg(&client->dev, "detected sensor id = 0x%04x\n", (u16)id);
        sensor->id = id;
 
        ret = vgxy61_wait_state(sensor, VGXY61_SYSTEM_FSM_SW_STBY,
@@ -1705,11 +1615,11 @@ static int vgxy61_detect(struct vgxy61_dev *sensor)
        if (ret)
                return ret;
 
-       st = vgxy61_read_reg(sensor, VGXY61_REG_NVM);
-       if (st < 0)
+       ret = cci_read(sensor->regmap, VGXY61_REG_NVM, &st, NULL);
+       if (ret < 0)
                return st;
        if (st != VGXY61_NVM_OK)
-               dev_warn(&client->dev, "Bad nvm state got %d\n", st);
+               dev_warn(&client->dev, "Bad nvm state got %u\n", (u8)st);
 
        ret = vgxy61_detect_cut_version(sensor);
        if (ret)
@@ -1832,6 +1742,12 @@ static int vgxy61_probe(struct i2c_client *client)
        sensor->analog_gain = 0;
        sensor->digital_gain = 256;
 
+       sensor->regmap = devm_cci_regmap_init_i2c(client, 16);
+       if (IS_ERR(sensor->regmap)) {
+               ret = PTR_ERR(sensor->regmap);
+               return dev_err_probe(dev, ret, "Failed to init regmap\n");
+       }
+
        handle = fwnode_graph_get_endpoint_by_id(dev_fwnode(dev), 0, 0, 0);
        if (!handle) {
                dev_err(dev, "handle node not found\n");
index 558152575d1022df74deed355eeee5c6f80b3cd1..3192a334aaab5fedfc27bf503646cec142e80029 100644 (file)
@@ -1895,7 +1895,7 @@ static int tc358743_probe_of(struct tc358743_state *state)
                return dev_err_probe(dev, PTR_ERR(refclk),
                                     "failed to get refclk\n");
 
-       ep = of_graph_get_next_endpoint(dev->of_node, NULL);
+       ep = of_graph_get_endpoint_by_regs(dev->of_node, 0, -1);
        if (!ep) {
                dev_err(dev, "missing endpoint node\n");
                return -EINVAL;
index 106de4271d2ef94533e5797ac30bd4dd6aa1df93..d676adc4401bb5094be17bec25e54dfc7113d251 100644 (file)
@@ -843,14 +843,14 @@ static unsigned long tc358746_find_pll_settings(struct tc358746 *tc358746,
                if (fin < 4 * HZ_PER_MHZ || fin > 40 * HZ_PER_MHZ)
                        continue;
 
-               tmp = fout * p * postdiv;
+               tmp = fout * postdiv;
                do_div(tmp, fin);
                mul = tmp;
                if (mul > 511)
                        continue;
 
                tmp = mul * fin;
-               do_div(tmp, p * postdiv);
+               do_div(tmp, postdiv);
 
                delta = abs(fout - tmp);
                if (delta < min_delta) {
index 1ea703a9909f540382e857522c1670df3f2950b9..8e4a0718c4b6bded84a18067309486759dd37cfd 100644 (file)
@@ -2310,7 +2310,7 @@ static int tda1997x_parse_dt(struct tda1997x_state *state)
        pdata->vidout_sel_de = DE_FREF_SEL_DE_VHREF;
 
        np = state->client->dev.of_node;
-       ep = of_graph_get_next_endpoint(np, NULL);
+       ep = of_graph_get_endpoint_by_regs(np, 0, -1);
        if (!ep)
                return -EINVAL;
 
index 5a561e5bf65981effcb73ad5a98af1ae4a587c86..f9c9c80c33ac14b5acf69f07cffd867819eb5476 100644 (file)
@@ -987,7 +987,7 @@ tvp514x_get_pdata(struct i2c_client *client)
        if (!IS_ENABLED(CONFIG_OF) || !client->dev.of_node)
                return client->dev.platform_data;
 
-       endpoint = of_graph_get_next_endpoint(client->dev.of_node, NULL);
+       endpoint = of_graph_get_endpoint_by_regs(client->dev.of_node, 0, -1);
        if (!endpoint)
                return NULL;
 
index 9fc586cfdcd874f5f165db37af51d7b197a81d85..64b91aa3c82a89e3a93b31c3c717951133ccfd08 100644 (file)
@@ -1817,7 +1817,7 @@ static struct regmap_config tvp5150_config = {
        .val_bits = 8,
        .max_register = 0xff,
 
-       .cache_type = REGCACHE_RBTREE,
+       .cache_type = REGCACHE_MAPLE,
 
        .rd_table = &tvp5150_readable_table,
        .volatile_reg = tvp5150_volatile_reg,
index 30831b4b56d6b18b8c344d2d1ff6005f40947679..6a04ffae534325d83052775c2619227fbd530678 100644 (file)
@@ -893,7 +893,7 @@ tvp7002_get_pdata(struct i2c_client *client)
        if (!IS_ENABLED(CONFIG_OF) || !client->dev.of_node)
                return client->dev.platform_data;
 
-       endpoint = of_graph_get_next_endpoint(client->dev.of_node, NULL);
+       endpoint = of_graph_get_endpoint_by_regs(client->dev.of_node, 0, -1);
        if (!endpoint)
                return NULL;
 
index 680fbb3a9340224b6315a5b401c7489002a837ba..7f67825c8757ff6add88a5d1e5c267b658e97068 100644 (file)
@@ -63,7 +63,7 @@ static void media_devnode_release(struct device *cd)
        pr_debug("%s: Media Devnode Deallocated\n", __func__);
 }
 
-static struct bus_type media_bus_type = {
+static const struct bus_type media_bus_type = {
        .name = MEDIA_NAME,
 };
 
@@ -190,7 +190,6 @@ static int media_release(struct inode *inode, struct file *filp)
           return value is ignored. */
        put_device(&devnode->dev);
 
-       pr_debug("%s: Media Release\n", __func__);
        return 0;
 }
 
index 543a392f863571a89a8d41de813c9b4454244360..0e28b9a7936ef40287e5a08377d22e5275396642 100644 (file)
@@ -535,14 +535,15 @@ static int media_pipeline_walk_push(struct media_pipeline_walk *walk,
 
 /*
  * Move the top entry link cursor to the next link. If all links of the entry
- * have been visited, pop the entry itself.
+ * have been visited, pop the entry itself. Return true if the entry has been
+ * popped.
  */
-static void media_pipeline_walk_pop(struct media_pipeline_walk *walk)
+static bool media_pipeline_walk_pop(struct media_pipeline_walk *walk)
 {
        struct media_pipeline_walk_entry *entry;
 
        if (WARN_ON(walk->stack.top < 0))
-               return;
+               return false;
 
        entry = media_pipeline_walk_top(walk);
 
@@ -552,7 +553,7 @@ static void media_pipeline_walk_pop(struct media_pipeline_walk *walk)
                        walk->stack.top);
 
                walk->stack.top--;
-               return;
+               return true;
        }
 
        entry->links = entry->links->next;
@@ -560,6 +561,8 @@ static void media_pipeline_walk_pop(struct media_pipeline_walk *walk)
        dev_dbg(walk->mdev->dev,
                "media pipeline: moved entry %u to next link\n",
                walk->stack.top);
+
+       return false;
 }
 
 /* Free all memory allocated while walking the pipeline. */
@@ -605,30 +608,24 @@ static int media_pipeline_explore_next_link(struct media_pipeline *pipe,
                                            struct media_pipeline_walk *walk)
 {
        struct media_pipeline_walk_entry *entry = media_pipeline_walk_top(walk);
-       struct media_pad *pad;
+       struct media_pad *origin;
        struct media_link *link;
        struct media_pad *local;
        struct media_pad *remote;
+       bool last_link;
        int ret;
 
-       pad = entry->pad;
+       origin = entry->pad;
        link = list_entry(entry->links, typeof(*link), list);
-       media_pipeline_walk_pop(walk);
+       last_link = media_pipeline_walk_pop(walk);
 
        dev_dbg(walk->mdev->dev,
                "media pipeline: exploring link '%s':%u -> '%s':%u\n",
                link->source->entity->name, link->source->index,
                link->sink->entity->name, link->sink->index);
 
-       /* Skip links that are not enabled. */
-       if (!(link->flags & MEDIA_LNK_FL_ENABLED)) {
-               dev_dbg(walk->mdev->dev,
-                       "media pipeline: skipping link (disabled)\n");
-               return 0;
-       }
-
        /* Get the local pad and remote pad. */
-       if (link->source->entity == pad->entity) {
+       if (link->source->entity == origin->entity) {
                local = link->source;
                remote = link->sink;
        } else {
@@ -640,25 +637,64 @@ static int media_pipeline_explore_next_link(struct media_pipeline *pipe,
         * Skip links that originate from a different pad than the incoming pad
         * that is not connected internally in the entity to the incoming pad.
         */
-       if (pad != local &&
-           !media_entity_has_pad_interdep(pad->entity, pad->index, local->index)) {
+       if (origin != local &&
+           !media_entity_has_pad_interdep(origin->entity, origin->index,
+                                          local->index)) {
                dev_dbg(walk->mdev->dev,
                        "media pipeline: skipping link (no route)\n");
-               return 0;
+               goto done;
        }
 
        /*
-        * Add the local and remote pads of the link to the pipeline and push
-        * them to the stack, if they're not already present.
+        * Add the local pad of the link to the pipeline and push it to the
+        * stack, if not already present.
         */
        ret = media_pipeline_add_pad(pipe, walk, local);
        if (ret)
                return ret;
 
+       /* Similarly, add the remote pad, but only if the link is enabled. */
+       if (!(link->flags & MEDIA_LNK_FL_ENABLED)) {
+               dev_dbg(walk->mdev->dev,
+                       "media pipeline: skipping link (disabled)\n");
+               goto done;
+       }
+
        ret = media_pipeline_add_pad(pipe, walk, remote);
        if (ret)
                return ret;
 
+done:
+       /*
+        * If we're done iterating over links, iterate over pads of the entity.
+        * This is necessary to discover pads that are not connected with any
+        * link. Those are dead ends from a pipeline exploration point of view,
+        * but are still part of the pipeline and need to be added to enable
+        * proper validation.
+        */
+       if (!last_link)
+               return 0;
+
+       dev_dbg(walk->mdev->dev,
+               "media pipeline: adding unconnected pads of '%s'\n",
+               local->entity->name);
+
+       media_entity_for_each_pad(origin->entity, local) {
+               /*
+                * Skip the origin pad (already handled), pad that have links
+                * (already discovered through iterating over links) and pads
+                * not internally connected.
+                */
+               if (origin == local || !local->num_links ||
+                   !media_entity_has_pad_interdep(origin->entity, origin->index,
+                                                  local->index))
+                       continue;
+
+               ret = media_pipeline_add_pad(pipe, walk, local);
+               if (ret)
+                       return ret;
+       }
+
        return 0;
 }
 
@@ -770,7 +806,6 @@ __must_check int __media_pipeline_start(struct media_pad *pad,
                struct media_pad *pad = ppad->pad;
                struct media_entity *entity = pad->entity;
                bool has_enabled_link = false;
-               bool has_link = false;
                struct media_link *link;
 
                dev_dbg(mdev->dev, "Validating pad '%s':%u\n", pad->entity->name,
@@ -800,7 +835,6 @@ __must_check int __media_pipeline_start(struct media_pad *pad,
                        /* Record if the pad has links and enabled links. */
                        if (link->flags & MEDIA_LNK_FL_ENABLED)
                                has_enabled_link = true;
-                       has_link = true;
 
                        /*
                         * Validate the link if it's enabled and has the
@@ -838,7 +872,7 @@ __must_check int __media_pipeline_start(struct media_pad *pad,
                 * 3. If the pad has the MEDIA_PAD_FL_MUST_CONNECT flag set,
                 * ensure that it has either no link or an enabled link.
                 */
-               if ((pad->flags & MEDIA_PAD_FL_MUST_CONNECT) && has_link &&
+               if ((pad->flags & MEDIA_PAD_FL_MUST_CONNECT) &&
                    !has_enabled_link) {
                        dev_dbg(mdev->dev,
                                "Pad '%s':%u must be connected by an enabled link\n",
@@ -1038,6 +1072,9 @@ static void __media_entity_remove_link(struct media_entity *entity,
 
        /* Remove the reverse links for a data link. */
        if ((link->flags & MEDIA_LNK_FL_LINK_TYPE) == MEDIA_LNK_FL_DATA_LINK) {
+               link->source->num_links--;
+               link->sink->num_links--;
+
                if (link->source->entity == entity)
                        remote = link->sink->entity;
                else
@@ -1092,6 +1129,11 @@ media_create_pad_link(struct media_entity *source, u16 source_pad,
        struct media_link *link;
        struct media_link *backlink;
 
+       if (flags & MEDIA_LNK_FL_LINK_TYPE)
+               return -EINVAL;
+
+       flags |= MEDIA_LNK_FL_DATA_LINK;
+
        if (WARN_ON(!source || !sink) ||
            WARN_ON(source_pad >= source->num_pads) ||
            WARN_ON(sink_pad >= sink->num_pads))
@@ -1107,7 +1149,7 @@ media_create_pad_link(struct media_entity *source, u16 source_pad,
 
        link->source = &source->pads[source_pad];
        link->sink = &sink->pads[sink_pad];
-       link->flags = flags & ~MEDIA_LNK_FL_INTERFACE_LINK;
+       link->flags = flags;
 
        /* Initialize graph object embedded at the new link */
        media_gobj_create(source->graph_obj.mdev, MEDIA_GRAPH_LINK,
@@ -1138,6 +1180,9 @@ media_create_pad_link(struct media_entity *source, u16 source_pad,
        sink->num_links++;
        source->num_links++;
 
+       link->source->num_links++;
+       link->sink->num_links++;
+
        return 0;
 }
 EXPORT_SYMBOL_GPL(media_create_pad_link);
index a2b18e2bed1b0bc00669a75936a0756953fb9a4d..6b7fea50328c2f8b0bb6b6e4b64238c209d5e8cd 100644 (file)
@@ -55,7 +55,7 @@ static void bttv_sub_remove(struct device *dev)
                sub->remove(sdev);
 }
 
-struct bus_type bttv_sub_bus_type = {
+const struct bus_type bttv_sub_bus_type = {
        .name   = "bttv-sub",
        .match  = &bttv_sub_bus_match,
        .probe  = bttv_sub_probe,
index 0368a583cf077f3ff8fff441c9f2e056a197dad2..a534e63b9a37bc5fdb27cc5b558ae885c7186715 100644 (file)
@@ -234,7 +234,7 @@ int bttv_s_fmt_vbi_cap(struct file *file, void *fh, struct v4l2_format *f);
 /* ---------------------------------------------------------- */
 /* bttv-gpio.c */
 
-extern struct bus_type bttv_sub_bus_type;
+extern const struct bus_type bttv_sub_bus_type;
 int bttv_sub_add_device(struct bttv_core *core, char *name);
 int bttv_sub_del_devices(struct bttv_core *core);
 
index 42fdcf992e48b1044a9c0ff9683e81ad111786dd..7d4a409c433e2a8c909323542e6f917bd27cfb64 100644 (file)
@@ -1354,6 +1354,10 @@ int cx23885_video_register(struct cx23885_dev *dev)
        /* register Video device */
        dev->video_dev = cx23885_vdev_init(dev, dev->pci,
                &cx23885_video_template, "video");
+       if (!dev->video_dev) {
+               err = -ENOMEM;
+               goto fail_unreg;
+       }
        dev->video_dev->queue = &dev->vb2_vidq;
        dev->video_dev->device_caps = V4L2_CAP_READWRITE | V4L2_CAP_STREAMING |
                                      V4L2_CAP_AUDIO | V4L2_CAP_VIDEO_CAPTURE;
@@ -1382,6 +1386,10 @@ int cx23885_video_register(struct cx23885_dev *dev)
        /* register VBI device */
        dev->vbi_dev = cx23885_vdev_init(dev, dev->pci,
                &cx23885_vbi_template, "vbi");
+       if (!dev->vbi_dev) {
+               err = -ENOMEM;
+               goto fail_unreg;
+       }
        dev->vbi_dev->queue = &dev->vb2_vbiq;
        dev->vbi_dev->device_caps = V4L2_CAP_READWRITE | V4L2_CAP_STREAMING |
                                    V4L2_CAP_AUDIO | V4L2_CAP_VBI_CAPTURE;
index c9ce79cb55668102c938958d84d15046aeb21a13..ce1835d9691ec724d8e33a03220010a9321e578b 100644 (file)
  * @height:            frame height
  * @input:             current input
  * @sequence:          frame counter
- * @stats:             statistics structure
  * @regs:              local copy of mmio base register
  * @csr2:              local copy of csr2 register
  * @config:            local copy of config register
index f980e3125a7b9d0dbe91d07db7254576bfdfd2fc..e994db4f4d91443e3a3717802143774cb33fa33a 100644 (file)
@@ -2,6 +2,7 @@
 /* Author: Dan Scally <djrscally@gmail.com> */
 
 #include <linux/acpi.h>
+#include <linux/cleanup.h>
 #include <linux/device.h>
 #include <linux/i2c.h>
 #include <linux/mei_cl_bus.h>
@@ -60,6 +61,8 @@ static const struct ipu_sensor_config ipu_supported_sensors[] = {
        IPU_SENSOR_CONFIG("OVTIDB10", 1, 560000000),
        /* GalaxyCore GC0310 */
        IPU_SENSOR_CONFIG("INT0310", 0),
+       /* Omnivision ov01a10 */
+       IPU_SENSOR_CONFIG("OVTI01A0", 1, 400000000),
 };
 
 static const struct ipu_property_names prop_names = {
@@ -747,6 +750,24 @@ static int ipu_bridge_ivsc_is_ready(void)
        return ready;
 }
 
+static int ipu_bridge_check_fwnode_graph(struct fwnode_handle *fwnode)
+{
+       struct fwnode_handle *endpoint;
+
+       if (IS_ERR_OR_NULL(fwnode))
+               return -EINVAL;
+
+       endpoint = fwnode_graph_get_next_endpoint(fwnode, NULL);
+       if (endpoint) {
+               fwnode_handle_put(endpoint);
+               return 0;
+       }
+
+       return ipu_bridge_check_fwnode_graph(fwnode->secondary);
+}
+
+static DEFINE_MUTEX(ipu_bridge_mutex);
+
 int ipu_bridge_init(struct device *dev,
                    ipu_parse_sensor_fwnode_t parse_sensor_fwnode)
 {
@@ -755,6 +776,11 @@ int ipu_bridge_init(struct device *dev,
        unsigned int i;
        int ret;
 
+       guard(mutex)(&ipu_bridge_mutex);
+
+       if (!ipu_bridge_check_fwnode_graph(dev_fwnode(dev)))
+               return 0;
+
        if (!ipu_bridge_ivsc_is_ready())
                return -EPROBE_DEFER;
 
index ed08bf4178f08fe19954ef461e49cf49d4fadcc2..c42adc5a408db5470d45ca02900e3e8f4d34d297 100644 (file)
@@ -28,6 +28,7 @@
 #include <media/v4l2-device.h>
 #include <media/v4l2-event.h>
 #include <media/v4l2-fwnode.h>
+#include <media/v4l2-mc.h>
 #include <media/v4l2-ioctl.h>
 #include <media/videobuf2-dma-sg.h>
 
@@ -1407,7 +1408,6 @@ static void cio2_notifier_unbind(struct v4l2_async_notifier *notifier,
 static int cio2_notifier_complete(struct v4l2_async_notifier *notifier)
 {
        struct cio2_device *cio2 = to_cio2_device(notifier);
-       struct device *dev = &cio2->pci_dev->dev;
        struct sensor_async_subdev *s_asd;
        struct v4l2_async_connection *asd;
        struct cio2_queue *q;
@@ -1417,23 +1417,10 @@ static int cio2_notifier_complete(struct v4l2_async_notifier *notifier)
                s_asd = to_sensor_asd(asd);
                q = &cio2->queue[s_asd->csi2.port];
 
-               ret = media_entity_get_fwnode_pad(&q->sensor->entity,
-                                                 s_asd->asd.match.fwnode,
-                                                 MEDIA_PAD_FL_SOURCE);
-               if (ret < 0) {
-                       dev_err(dev, "no pad for endpoint %pfw (%d)\n",
-                               s_asd->asd.match.fwnode, ret);
-                       return ret;
-               }
-
-               ret = media_create_pad_link(&q->sensor->entity, ret,
-                                           &q->subdev.entity, CIO2_PAD_SINK,
-                                           0);
-               if (ret) {
-                       dev_err(dev, "failed to create link for %s (endpoint %pfw, error %d)\n",
-                               q->sensor->name, s_asd->asd.match.fwnode, ret);
+               ret = v4l2_create_fwnode_links_to_pad(asd->sd,
+                                                     &q->subdev_pads[CIO2_PAD_SINK], 0);
+               if (ret)
                        return ret;
-               }
        }
 
        return v4l2_device_register_subdev_nodes(&cio2->v4l2_dev);
@@ -1572,6 +1559,7 @@ static int cio2_queue_init(struct cio2_device *cio2, struct cio2_queue *q)
        v4l2_subdev_init(subdev, &cio2_subdev_ops);
        subdev->flags = V4L2_SUBDEV_FL_HAS_DEVNODE | V4L2_SUBDEV_FL_HAS_EVENTS;
        subdev->owner = THIS_MODULE;
+       subdev->dev = dev;
        snprintf(subdev->name, sizeof(subdev->name),
                 CIO2_ENTITY_NAME " %td", q - cio2->queue);
        subdev->entity.function = MEDIA_ENT_F_VID_IF_BRIDGE;
@@ -1679,29 +1667,12 @@ static void cio2_queues_exit(struct cio2_device *cio2)
                cio2_queue_exit(cio2, &cio2->queue[i]);
 }
 
-static int cio2_check_fwnode_graph(struct fwnode_handle *fwnode)
-{
-       struct fwnode_handle *endpoint;
-
-       if (IS_ERR_OR_NULL(fwnode))
-               return -EINVAL;
-
-       endpoint = fwnode_graph_get_next_endpoint(fwnode, NULL);
-       if (endpoint) {
-               fwnode_handle_put(endpoint);
-               return 0;
-       }
-
-       return cio2_check_fwnode_graph(fwnode->secondary);
-}
-
 /**************** PCI interface ****************/
 
 static int cio2_pci_probe(struct pci_dev *pci_dev,
                          const struct pci_device_id *id)
 {
        struct device *dev = &pci_dev->dev;
-       struct fwnode_handle *fwnode = dev_fwnode(dev);
        struct cio2_device *cio2;
        int r;
 
@@ -1710,17 +1681,9 @@ static int cio2_pci_probe(struct pci_dev *pci_dev,
         * if the device has no endpoints then we can try to build those as
         * software_nodes parsed from SSDB.
         */
-       r = cio2_check_fwnode_graph(fwnode);
-       if (r) {
-               if (fwnode && !IS_ERR_OR_NULL(fwnode->secondary)) {
-                       dev_err(dev, "fwnode graph has no endpoints connected\n");
-                       return -EINVAL;
-               }
-
-               r = ipu_bridge_init(dev, ipu_bridge_parse_ssdb);
-               if (r)
-                       return r;
-       }
+       r = ipu_bridge_init(dev, ipu_bridge_parse_ssdb);
+       if (r)
+               return r;
 
        cio2 = devm_kzalloc(dev, sizeof(*cio2), GFP_KERNEL);
        if (!cio2)
index 15b905f66ab7298e5d0bd8ae2f0536f0eda2b112..55e0c60c420cd575c71ff909d7f82a2bf7550c0b 100644 (file)
@@ -71,8 +71,8 @@ enum ivsc_privacy_status {
 };
 
 enum csi_pads {
-       CSI_PAD_SOURCE,
        CSI_PAD_SINK,
+       CSI_PAD_SOURCE,
        CSI_NUM_PADS
 };
 
@@ -128,7 +128,6 @@ struct mei_csi {
        int streaming;
 
        struct media_pad pads[CSI_NUM_PADS];
-       struct v4l2_mbus_framefmt format_mbus[CSI_NUM_PADS];
 
        /* number of data lanes used on the CSI-2 link */
        u32 nr_of_lanes;
@@ -329,58 +328,17 @@ err:
        return ret;
 }
 
-static struct v4l2_mbus_framefmt *
-mei_csi_get_pad_format(struct v4l2_subdev *sd,
-                      struct v4l2_subdev_state *sd_state,
-                      unsigned int pad, u32 which)
-{
-       struct mei_csi *csi = sd_to_csi(sd);
-
-       switch (which) {
-       case V4L2_SUBDEV_FORMAT_TRY:
-               return v4l2_subdev_state_get_format(sd_state, pad);
-       case V4L2_SUBDEV_FORMAT_ACTIVE:
-               return &csi->format_mbus[pad];
-       default:
-               return NULL;
-       }
-}
-
 static int mei_csi_init_state(struct v4l2_subdev *sd,
                              struct v4l2_subdev_state *sd_state)
 {
        struct v4l2_mbus_framefmt *mbusformat;
-       struct mei_csi *csi = sd_to_csi(sd);
        unsigned int i;
 
-       mutex_lock(&csi->lock);
-
        for (i = 0; i < sd->entity.num_pads; i++) {
                mbusformat = v4l2_subdev_state_get_format(sd_state, i);
                *mbusformat = mei_csi_format_mbus_default;
        }
 
-       mutex_unlock(&csi->lock);
-
-       return 0;
-}
-
-static int mei_csi_get_fmt(struct v4l2_subdev *sd,
-                          struct v4l2_subdev_state *sd_state,
-                          struct v4l2_subdev_format *format)
-{
-       struct v4l2_mbus_framefmt *mbusformat;
-       struct mei_csi *csi = sd_to_csi(sd);
-
-       mutex_lock(&csi->lock);
-
-       mbusformat = mei_csi_get_pad_format(sd, sd_state, format->pad,
-                                           format->which);
-       if (mbusformat)
-               format->format = *mbusformat;
-
-       mutex_unlock(&csi->lock);
-
        return 0;
 }
 
@@ -388,20 +346,17 @@ static int mei_csi_set_fmt(struct v4l2_subdev *sd,
                           struct v4l2_subdev_state *sd_state,
                           struct v4l2_subdev_format *format)
 {
-       struct v4l2_mbus_framefmt *source_mbusformat;
-       struct v4l2_mbus_framefmt *mbusformat;
-       struct mei_csi *csi = sd_to_csi(sd);
-       struct media_pad *pad;
+       struct v4l2_mbus_framefmt *source_fmt;
+       struct v4l2_mbus_framefmt *sink_fmt;
 
-       mbusformat = mei_csi_get_pad_format(sd, sd_state, format->pad,
-                                           format->which);
-       if (!mbusformat)
-               return -EINVAL;
+       sink_fmt = v4l2_subdev_state_get_format(sd_state, CSI_PAD_SINK);
+       source_fmt = v4l2_subdev_state_get_format(sd_state, CSI_PAD_SOURCE);
 
-       source_mbusformat = mei_csi_get_pad_format(sd, sd_state, CSI_PAD_SOURCE,
-                                                  format->which);
-       if (!source_mbusformat)
-               return -EINVAL;
+       if (format->pad) {
+               *source_fmt = *sink_fmt;
+
+               return 0;
+       }
 
        v4l_bound_align_image(&format->format.width, 1, 65536, 0,
                              &format->format.height, 1, 65536, 0, 0);
@@ -504,18 +459,8 @@ static int mei_csi_set_fmt(struct v4l2_subdev *sd,
        if (format->format.field == V4L2_FIELD_ANY)
                format->format.field = V4L2_FIELD_NONE;
 
-       mutex_lock(&csi->lock);
-
-       pad = &csi->pads[format->pad];
-       if (pad->flags & MEDIA_PAD_FL_SOURCE)
-               format->format = csi->format_mbus[CSI_PAD_SINK];
-
-       *mbusformat = format->format;
-
-       if (pad->flags & MEDIA_PAD_FL_SINK)
-               *source_mbusformat = format->format;
-
-       mutex_unlock(&csi->lock);
+       *sink_fmt = format->format;
+       *source_fmt = *sink_fmt;
 
        return 0;
 }
@@ -554,7 +499,7 @@ static const struct v4l2_subdev_video_ops mei_csi_video_ops = {
 };
 
 static const struct v4l2_subdev_pad_ops mei_csi_pad_ops = {
-       .get_fmt = mei_csi_get_fmt,
+       .get_fmt = v4l2_subdev_get_fmt,
        .set_fmt = mei_csi_set_fmt,
 };
 
@@ -587,7 +532,7 @@ static int mei_csi_notify_bound(struct v4l2_async_notifier *notifier,
        csi->remote_pad = pad;
 
        return media_create_pad_link(&subdev->entity, pad,
-                                    &csi->subdev.entity, 1,
+                                    &csi->subdev.entity, CSI_PAD_SINK,
                                     MEDIA_LNK_FL_ENABLED |
                                     MEDIA_LNK_FL_IMMUTABLE);
 }
@@ -749,6 +694,7 @@ static int mei_csi_probe(struct mei_cl_device *cldev,
                goto err_disable;
 
        csi->subdev.dev = &cldev->dev;
+       csi->subdev.state_lock = &csi->lock;
        v4l2_subdev_init(&csi->subdev, &mei_csi_subdev_ops);
        csi->subdev.internal_ops = &mei_csi_internal_ops;
        v4l2_set_subdevdata(&csi->subdev, csi);
@@ -764,9 +710,6 @@ static int mei_csi_probe(struct mei_cl_device *cldev,
        if (ret)
                goto err_ctrl_handler;
 
-       csi->format_mbus[CSI_PAD_SOURCE] = mei_csi_format_mbus_default;
-       csi->format_mbus[CSI_PAD_SINK] = mei_csi_format_mbus_default;
-
        csi->pads[CSI_PAD_SOURCE].flags = MEDIA_PAD_FL_SOURCE;
        csi->pads[CSI_PAD_SINK].flags = MEDIA_PAD_FL_SINK;
        ret = media_entity_pads_init(&csi->subdev.entity, CSI_NUM_PADS,
index e4cf9d63e926df4d703a428d735281d4280ab520..364ce9e5701827de4d7b92cac104fa9d0fcb8869 100644 (file)
@@ -757,7 +757,7 @@ static const struct video_device video_dev_template = {
 /**
  * vip_irq - interrupt routine
  * @irq: Number of interrupt ( not used, correct number is assumed )
- * @vip: local data structure containing all information
+ * @data: local data structure containing all information
  *
  * check for both frame interrupts set ( top and bottom ).
  * check FIFO overflow, but limit number of log messages after open.
@@ -767,8 +767,9 @@ static const struct video_device video_dev_template = {
  *
  * IRQ_HANDLED, interrupt done.
  */
-static irqreturn_t vip_irq(int irq, struct sta2x11_vip *vip)
+static irqreturn_t vip_irq(int irq, void *data)
 {
+       struct sta2x11_vip *vip = data;
        unsigned int status;
 
        status = reg_read(vip, DVP_ITS);
@@ -1053,9 +1054,7 @@ static int sta2x11_vip_init_one(struct pci_dev *pdev,
 
        spin_lock_init(&vip->slock);
 
-       ret = request_irq(pdev->irq,
-                         (irq_handler_t) vip_irq,
-                         IRQF_SHARED, KBUILD_MODNAME, vip);
+       ret = request_irq(pdev->irq, vip_irq, IRQF_SHARED, KBUILD_MODNAME, vip);
        if (ret) {
                dev_err(&pdev->dev, "request_irq failed\n");
                ret = -ENODEV;
index 230b104a7cdf07172e41000cca4fff777a510a96..a47c5850ef87582526b9798d1d2292d8716e05fe 100644 (file)
@@ -1463,7 +1463,8 @@ static int budget_av_attach(struct saa7146_dev *dev, struct saa7146_pci_extensio
                budget_av->has_saa7113 = 1;
                err = saa7146_vv_init(dev, &vv_data);
                if (err != 0) {
-                       /* fixme: proper cleanup here */
+                       ttpci_budget_deinit(&budget_av->budget);
+                       kfree(budget_av);
                        ERR("cannot init vv subsystem\n");
                        return err;
                }
@@ -1472,9 +1473,10 @@ static int budget_av_attach(struct saa7146_dev *dev, struct saa7146_pci_extensio
                vv_data.vid_ops.vidioc_s_input = vidioc_s_input;
 
                if ((err = saa7146_register_device(&budget_av->vd, dev, "knc1", VFL_TYPE_VIDEO))) {
-                       /* fixme: proper cleanup here */
-                       ERR("cannot register capture v4l2 device\n");
                        saa7146_vv_release(dev);
+                       ttpci_budget_deinit(&budget_av->budget);
+                       kfree(budget_av);
+                       ERR("cannot register capture v4l2 device\n");
                        return err;
                }
 
index 133d77d1ea0c30ac389fa402826b4c6470ff8e5b..a57f9f4f3b87658caebda407053dcda2e6949fff 100644 (file)
@@ -1595,9 +1595,11 @@ static int vdec_stop_session(struct vpu_inst *inst, u32 type)
        if (V4L2_TYPE_IS_OUTPUT(type)) {
                vdec_update_state(inst, VPU_CODEC_STATE_SEEK, 0);
                vdec->drain = 0;
+               vdec_abort(inst);
        } else {
                if (inst->state != VPU_CODEC_STATE_DYAMIC_RESOLUTION_CHANGE) {
-                       vdec_abort(inst);
+                       if (vb2_is_streaming(v4l2_m2m_get_src_vq(inst->fh.m2m_ctx)))
+                               vdec_abort(inst);
                        vdec->eos_received = 0;
                }
                vdec_clear_slots(inst);
index f8450a8ccda625050af41b84e905975d3295e6ff..c1108df72dd516fad2e0de5567afc08e0aabc64d 100644 (file)
@@ -834,7 +834,7 @@ static int atmel_isi_parse_dt(struct atmel_isi *isi,
        isi->pdata.full_mode = 1;
        isi->pdata.frate = ISI_CFG1_FRATE_CAPTURE_ALL;
 
-       np = of_graph_get_next_endpoint(np, NULL);
+       np = of_graph_get_endpoint_by_regs(np, 0, -1);
        if (!np) {
                dev_err(&pdev->dev, "Could not find the endpoint\n");
                return -EINVAL;
@@ -1158,7 +1158,7 @@ static int isi_graph_init(struct atmel_isi *isi)
        struct device_node *ep;
        int ret;
 
-       ep = of_graph_get_next_endpoint(isi->dev->of_node, NULL);
+       ep = of_graph_get_endpoint_by_regs(isi->dev->of_node, 0, -1);
        if (!ep)
                return -EINVAL;
 
index fead5426830e822d25867c636be6bda7a4ac78aa..2d7b0508cc9afa77ebdaaeb45784e862e3bcfe16 100644 (file)
@@ -114,10 +114,14 @@ static const struct csi2rx_fmt formats[] = {
        { .code = MEDIA_BUS_FMT_SGBRG8_1X8, .bpp = 8, },
        { .code = MEDIA_BUS_FMT_SGRBG8_1X8, .bpp = 8, },
        { .code = MEDIA_BUS_FMT_SRGGB8_1X8, .bpp = 8, },
+       { .code = MEDIA_BUS_FMT_Y8_1X8,     .bpp = 8, },
        { .code = MEDIA_BUS_FMT_SBGGR10_1X10, .bpp = 10, },
        { .code = MEDIA_BUS_FMT_SGBRG10_1X10, .bpp = 10, },
        { .code = MEDIA_BUS_FMT_SGRBG10_1X10, .bpp = 10, },
        { .code = MEDIA_BUS_FMT_SRGGB10_1X10, .bpp = 10, },
+       { .code = MEDIA_BUS_FMT_RGB565_1X16,  .bpp = 16, },
+       { .code = MEDIA_BUS_FMT_RGB888_1X24,  .bpp = 24, },
+       { .code = MEDIA_BUS_FMT_BGR888_1X24,  .bpp = 24, },
 };
 
 static const struct csi2rx_fmt *csi2rx_get_fmt_by_code(u32 code)
@@ -389,6 +393,18 @@ out:
        return ret;
 }
 
+static int csi2rx_enum_mbus_code(struct v4l2_subdev *subdev,
+                                struct v4l2_subdev_state *state,
+                                struct v4l2_subdev_mbus_code_enum *code_enum)
+{
+       if (code_enum->index >= ARRAY_SIZE(formats))
+               return -EINVAL;
+
+       code_enum->code = formats[code_enum->index].code;
+
+       return 0;
+}
+
 static int csi2rx_set_fmt(struct v4l2_subdev *subdev,
                          struct v4l2_subdev_state *state,
                          struct v4l2_subdev_format *format)
@@ -439,6 +455,7 @@ static int csi2rx_init_state(struct v4l2_subdev *subdev,
 }
 
 static const struct v4l2_subdev_pad_ops csi2rx_pad_ops = {
+       .enum_mbus_code = csi2rx_enum_mbus_code,
        .get_fmt        = v4l2_subdev_get_fmt,
        .set_fmt        = csi2rx_set_fmt,
 };
@@ -468,7 +485,7 @@ static int csi2rx_async_bound(struct v4l2_async_notifier *notifier,
        struct csi2rx_priv *csi2rx = v4l2_subdev_to_csi2rx(subdev);
 
        csi2rx->source_pad = media_entity_get_fwnode_pad(&s_subdev->entity,
-                                                        s_subdev->fwnode,
+                                                        asd->match.fwnode,
                                                         MEDIA_PAD_FL_SOURCE);
        if (csi2rx->source_pad < 0) {
                dev_err(csi2rx->dev, "Couldn't find output pad for subdev %s\n",
index f1e022fb148eafe81fac889f2badbdc0ef54a590..2d82791f575e0173601a950fc08be4da21f67c55 100644 (file)
@@ -2315,7 +2315,7 @@ static bool wave5_vpu_enc_check_common_param_valid(struct vpu_instance *inst,
                                param->intra_refresh_mode);
                        return false;
                }
-       };
+       }
        return true;
 
 invalid_refresh_argument:
index f29cfa3af94ac26678b33a4489889e78030f64a7..8bbf9d10b4677425af50bd90af8347d0a3029744 100644 (file)
@@ -92,7 +92,7 @@ static int switch_state(struct vpu_instance *inst, enum vpu_instance_state state
                break;
        case VPU_INST_STATE_STOP:
                break;
-       };
+       }
 
        dev_dbg(inst->dev->dev, "Switch state from %s to %s.\n",
                state_to_str(inst->state), state_to_str(state));
index 0d90b5820bef7286694129ec0c4ed4f436d399b2..1b3df5b04249ae0e7ac606b9da0f40df1c8ea90d 100644 (file)
@@ -250,7 +250,7 @@ err_clk_dis:
        return ret;
 }
 
-static int wave5_vpu_remove(struct platform_device *pdev)
+static void wave5_vpu_remove(struct platform_device *pdev)
 {
        struct vpu_device *dev = dev_get_drvdata(&pdev->dev);
 
@@ -262,8 +262,6 @@ static int wave5_vpu_remove(struct platform_device *pdev)
        v4l2_device_unregister(&dev->v4l2_dev);
        wave5_vdi_release(&pdev->dev);
        ida_destroy(&dev->inst_ida);
-
-       return 0;
 }
 
 static const struct wave5_match_data ti_wave521c_data = {
@@ -283,7 +281,7 @@ static struct platform_driver wave5_vpu_driver = {
                .of_match_table = of_match_ptr(wave5_dt_ids),
                },
        .probe = wave5_vpu_probe,
-       .remove = wave5_vpu_remove,
+       .remove_new = wave5_vpu_remove,
 };
 
 module_platform_driver(wave5_vpu_driver);
index 59b89e421dc2892c0e02f88b8119af8898be86ed..d904952bf00e33216b61659769953aa879bcd49e 100644 (file)
@@ -2207,7 +2207,7 @@ static int pxa_camera_pdata_from_dt(struct device *dev,
                pcdev->mclk = mclk_rate;
        }
 
-       np = of_graph_get_next_endpoint(np, NULL);
+       np = of_graph_get_endpoint_by_regs(np, 0, -1);
        if (!np) {
                dev_err(dev, "could not find endpoint\n");
                return -EINVAL;
index d6499ffe30e8b6a3f46ec3266de4c8d598cfdf82..d31f4730f2a38b14a30885340bd807976d2f1116 100644 (file)
@@ -7,6 +7,7 @@ config VIDEO_CAFE_CCIC
        depends on V4L_PLATFORM_DRIVERS
        depends on PCI && I2C && VIDEO_DEV
        depends on COMMON_CLK
+       select V4L2_ASYNC
        select VIDEO_OV7670 if MEDIA_SUBDRV_AUTOSELECT && VIDEO_CAMERA_SENSOR
        select VIDEOBUF2_VMALLOC
        select VIDEOBUF2_DMA_CONTIG
@@ -24,6 +25,7 @@ config VIDEO_MMP_CAMERA
        depends on COMMON_CLK
        select VIDEO_OV7670 if MEDIA_SUBDRV_AUTOSELECT && VIDEO_CAMERA_SENSOR
        select I2C_GPIO
+       select V4L2_ASYNC
        select VIDEOBUF2_VMALLOC
        select VIDEOBUF2_DMA_CONTIG
        select VIDEOBUF2_DMA_SG
index 8ba6e757e11aa93e154c2fa9c7965cd494753765..8877eb39e8071e56f61dc061bc0773e06efd56d6 100644 (file)
@@ -144,7 +144,6 @@ struct mtk_jpegdec_clk {
  * @jpegenc_irq:       jpeg encode irq num
  * @job_timeout_work:  encode timeout workqueue
  * @hw_param:          jpeg encode hw parameters
- * @hw_rdy:            record hw ready
  * @hw_state:          record hw state
  * @hw_lock:           spinlock protecting the hw device resource
  */
index b065ccd0691404e73a4fdb1ec4952d5ef1e20d3a..378a1cba0144fa771eddbb021f22569c133c3221 100644 (file)
@@ -26,7 +26,7 @@ static void mtk_mdp_vpu_handle_init_ack(const struct mdp_ipi_comm_ack *msg)
        vpu->inst_addr = msg->vpu_inst_addr;
 }
 
-static void mtk_mdp_vpu_ipi_handler(const void *data, unsigned int len,
+static void mtk_mdp_vpu_ipi_handler(void *data, unsigned int len,
                                    void *priv)
 {
        const struct mdp_ipi_comm_ack *msg = data;
index 502eeae0bfdc44590036511532c67a381d3144e6..ecca52b45307c91a30eb9e336488dbf6c6e1c871 100644 (file)
@@ -46,18 +46,114 @@ enum mt8183_mdp_comp_id {
        MT8183_MDP_COMP_WROT1,          /* 25 */
 };
 
+enum mt8195_mdp_comp_id {
+       /* MT8195 Comp id */
+       /* ISP */
+       MT8195_MDP_COMP_WPEI = 0,
+       MT8195_MDP_COMP_WPEO,           /* 1 */
+       MT8195_MDP_COMP_WPEI2,          /* 2 */
+       MT8195_MDP_COMP_WPEO2,          /* 3 */
+
+       /* MDP */
+       MT8195_MDP_COMP_CAMIN,          /* 4 */
+       MT8195_MDP_COMP_CAMIN2,         /* 5 */
+       MT8195_MDP_COMP_SPLIT,          /* 6 */
+       MT8195_MDP_COMP_SPLIT2,         /* 7 */
+       MT8195_MDP_COMP_RDMA0,          /* 8 */
+       MT8195_MDP_COMP_RDMA1,          /* 9 */
+       MT8195_MDP_COMP_RDMA2,          /* 10 */
+       MT8195_MDP_COMP_RDMA3,          /* 11 */
+       MT8195_MDP_COMP_STITCH,         /* 12 */
+       MT8195_MDP_COMP_FG0,            /* 13 */
+       MT8195_MDP_COMP_FG1,            /* 14 */
+       MT8195_MDP_COMP_FG2,            /* 15 */
+       MT8195_MDP_COMP_FG3,            /* 16 */
+       MT8195_MDP_COMP_TO_SVPP2MOUT,   /* 17 */
+       MT8195_MDP_COMP_TO_SVPP3MOUT,   /* 18 */
+       MT8195_MDP_COMP_TO_WARP0MOUT,   /* 19 */
+       MT8195_MDP_COMP_TO_WARP1MOUT,   /* 20 */
+       MT8195_MDP_COMP_VPP0_SOUT,      /* 21 */
+       MT8195_MDP_COMP_VPP1_SOUT,      /* 22 */
+       MT8195_MDP_COMP_PQ0_SOUT,       /* 23 */
+       MT8195_MDP_COMP_PQ1_SOUT,       /* 24 */
+       MT8195_MDP_COMP_HDR0,           /* 25 */
+       MT8195_MDP_COMP_HDR1,           /* 26 */
+       MT8195_MDP_COMP_HDR2,           /* 27 */
+       MT8195_MDP_COMP_HDR3,           /* 28 */
+       MT8195_MDP_COMP_AAL0,           /* 29 */
+       MT8195_MDP_COMP_AAL1,           /* 30 */
+       MT8195_MDP_COMP_AAL2,           /* 31 */
+       MT8195_MDP_COMP_AAL3,           /* 32 */
+       MT8195_MDP_COMP_RSZ0,           /* 33 */
+       MT8195_MDP_COMP_RSZ1,           /* 34 */
+       MT8195_MDP_COMP_RSZ2,           /* 35 */
+       MT8195_MDP_COMP_RSZ3,           /* 36 */
+       MT8195_MDP_COMP_TDSHP0,         /* 37 */
+       MT8195_MDP_COMP_TDSHP1,         /* 38 */
+       MT8195_MDP_COMP_TDSHP2,         /* 39 */
+       MT8195_MDP_COMP_TDSHP3,         /* 40 */
+       MT8195_MDP_COMP_COLOR0,         /* 41 */
+       MT8195_MDP_COMP_COLOR1,         /* 42 */
+       MT8195_MDP_COMP_COLOR2,         /* 43 */
+       MT8195_MDP_COMP_COLOR3,         /* 44 */
+       MT8195_MDP_COMP_OVL0,           /* 45 */
+       MT8195_MDP_COMP_OVL1,           /* 46 */
+       MT8195_MDP_COMP_PAD0,           /* 47 */
+       MT8195_MDP_COMP_PAD1,           /* 48 */
+       MT8195_MDP_COMP_PAD2,           /* 49 */
+       MT8195_MDP_COMP_PAD3,           /* 50 */
+       MT8195_MDP_COMP_TCC0,           /* 51 */
+       MT8195_MDP_COMP_TCC1,           /* 52 */
+       MT8195_MDP_COMP_WROT0,          /* 53 */
+       MT8195_MDP_COMP_WROT1,          /* 54 */
+       MT8195_MDP_COMP_WROT2,          /* 55 */
+       MT8195_MDP_COMP_WROT3,          /* 56 */
+       MT8195_MDP_COMP_MERGE2,         /* 57 */
+       MT8195_MDP_COMP_MERGE3,         /* 58 */
+
+       MT8195_MDP_COMP_VDO0DL0,        /* 59 */
+       MT8195_MDP_COMP_VDO1DL0,        /* 60 */
+       MT8195_MDP_COMP_VDO0DL1,        /* 61 */
+       MT8195_MDP_COMP_VDO1DL1,        /* 62 */
+};
+
 static const struct of_device_id mt8183_mdp_probe_infra[MDP_INFRA_MAX] = {
        [MDP_INFRA_MMSYS] = { .compatible = "mediatek,mt8183-mmsys" },
        [MDP_INFRA_MUTEX] = { .compatible = "mediatek,mt8183-disp-mutex" },
        [MDP_INFRA_SCP] = { .compatible = "mediatek,mt8183-scp" }
 };
 
+static const struct of_device_id mt8195_mdp_probe_infra[MDP_INFRA_MAX] = {
+       [MDP_INFRA_MMSYS] = { .compatible = "mediatek,mt8195-vppsys0" },
+       [MDP_INFRA_MMSYS2] = { .compatible = "mediatek,mt8195-vppsys1" },
+       [MDP_INFRA_MUTEX] = { .compatible = "mediatek,mt8195-vpp-mutex" },
+       [MDP_INFRA_MUTEX2] = { .compatible = "mediatek,mt8195-vpp-mutex" },
+       [MDP_INFRA_SCP] = { .compatible = "mediatek,mt8195-scp" }
+};
+
 static const struct mdp_platform_config mt8183_plat_cfg = {
        .rdma_support_10bit             = true,
        .rdma_rsz1_sram_sharing         = true,
        .rdma_upsample_repeat_only      = true,
+       .rdma_event_num                 = 1,
        .rsz_disable_dcm_small_sample   = false,
        .wrot_filter_constraint         = false,
+       .wrot_event_num                 = 1,
+};
+
+static const struct mdp_platform_config mt8195_plat_cfg = {
+       .rdma_support_10bit             = true,
+       .rdma_rsz1_sram_sharing         = false,
+       .rdma_upsample_repeat_only      = false,
+       .rdma_esl_setting               = true,
+       .rdma_event_num                 = 4,
+       .rsz_disable_dcm_small_sample   = false,
+       .rsz_etc_control                = true,
+       .wrot_filter_constraint         = false,
+       .wrot_event_num                 = 4,
+       .tdshp_hist_num                 = 17,
+       .tdshp_constrain                = true,
+       .tdshp_contour                  = true,
 };
 
 static const u32 mt8183_mutex_idx[MDP_MAX_COMP_COUNT] = {
@@ -71,81 +167,384 @@ static const u32 mt8183_mutex_idx[MDP_MAX_COMP_COUNT] = {
        [MDP_COMP_CCORR0] = MUTEX_MOD_IDX_MDP_CCORR0,
 };
 
+static const u32 mt8195_mutex_idx[MDP_MAX_COMP_COUNT] = {
+       [MDP_COMP_RDMA0] = MUTEX_MOD_IDX_MDP_RDMA0,
+       [MDP_COMP_RDMA1] = MUTEX_MOD_IDX_MDP_RDMA1,
+       [MDP_COMP_RDMA2] = MUTEX_MOD_IDX_MDP_RDMA2,
+       [MDP_COMP_RDMA3] = MUTEX_MOD_IDX_MDP_RDMA3,
+       [MDP_COMP_STITCH] = MUTEX_MOD_IDX_MDP_STITCH0,
+       [MDP_COMP_FG0] = MUTEX_MOD_IDX_MDP_FG0,
+       [MDP_COMP_FG1] = MUTEX_MOD_IDX_MDP_FG1,
+       [MDP_COMP_FG2] = MUTEX_MOD_IDX_MDP_FG2,
+       [MDP_COMP_FG3] = MUTEX_MOD_IDX_MDP_FG3,
+       [MDP_COMP_HDR0] = MUTEX_MOD_IDX_MDP_HDR0,
+       [MDP_COMP_HDR1] = MUTEX_MOD_IDX_MDP_HDR1,
+       [MDP_COMP_HDR2] = MUTEX_MOD_IDX_MDP_HDR2,
+       [MDP_COMP_HDR3] = MUTEX_MOD_IDX_MDP_HDR3,
+       [MDP_COMP_AAL0] = MUTEX_MOD_IDX_MDP_AAL0,
+       [MDP_COMP_AAL1] = MUTEX_MOD_IDX_MDP_AAL1,
+       [MDP_COMP_AAL2] = MUTEX_MOD_IDX_MDP_AAL2,
+       [MDP_COMP_AAL3] = MUTEX_MOD_IDX_MDP_AAL3,
+       [MDP_COMP_RSZ0] = MUTEX_MOD_IDX_MDP_RSZ0,
+       [MDP_COMP_RSZ1] = MUTEX_MOD_IDX_MDP_RSZ1,
+       [MDP_COMP_RSZ2] = MUTEX_MOD_IDX_MDP_RSZ2,
+       [MDP_COMP_RSZ3] = MUTEX_MOD_IDX_MDP_RSZ3,
+       [MDP_COMP_MERGE2] = MUTEX_MOD_IDX_MDP_MERGE2,
+       [MDP_COMP_MERGE3] = MUTEX_MOD_IDX_MDP_MERGE3,
+       [MDP_COMP_TDSHP0] = MUTEX_MOD_IDX_MDP_TDSHP0,
+       [MDP_COMP_TDSHP1] = MUTEX_MOD_IDX_MDP_TDSHP1,
+       [MDP_COMP_TDSHP2] = MUTEX_MOD_IDX_MDP_TDSHP2,
+       [MDP_COMP_TDSHP3] = MUTEX_MOD_IDX_MDP_TDSHP3,
+       [MDP_COMP_COLOR0] = MUTEX_MOD_IDX_MDP_COLOR0,
+       [MDP_COMP_COLOR1] = MUTEX_MOD_IDX_MDP_COLOR1,
+       [MDP_COMP_COLOR2] = MUTEX_MOD_IDX_MDP_COLOR2,
+       [MDP_COMP_COLOR3] = MUTEX_MOD_IDX_MDP_COLOR3,
+       [MDP_COMP_OVL0] = MUTEX_MOD_IDX_MDP_OVL0,
+       [MDP_COMP_OVL1] = MUTEX_MOD_IDX_MDP_OVL1,
+       [MDP_COMP_PAD0] = MUTEX_MOD_IDX_MDP_PAD0,
+       [MDP_COMP_PAD1] = MUTEX_MOD_IDX_MDP_PAD1,
+       [MDP_COMP_PAD2] = MUTEX_MOD_IDX_MDP_PAD2,
+       [MDP_COMP_PAD3] = MUTEX_MOD_IDX_MDP_PAD3,
+       [MDP_COMP_TCC0] = MUTEX_MOD_IDX_MDP_TCC0,
+       [MDP_COMP_TCC1] = MUTEX_MOD_IDX_MDP_TCC1,
+       [MDP_COMP_WROT0] = MUTEX_MOD_IDX_MDP_WROT0,
+       [MDP_COMP_WROT1] = MUTEX_MOD_IDX_MDP_WROT1,
+       [MDP_COMP_WROT2] = MUTEX_MOD_IDX_MDP_WROT2,
+       [MDP_COMP_WROT3] = MUTEX_MOD_IDX_MDP_WROT3,
+};
+
 static const struct mdp_comp_data mt8183_mdp_comp_data[MDP_MAX_COMP_COUNT] = {
        [MDP_COMP_WPEI] = {
-               {MDP_COMP_TYPE_WPEI, 0, MT8183_MDP_COMP_WPEI},
+               {MDP_COMP_TYPE_WPEI, 0, MT8183_MDP_COMP_WPEI, MDP_MM_SUBSYS_0},
                {0, 0, 0}
        },
        [MDP_COMP_WPEO] = {
-               {MDP_COMP_TYPE_EXTO, 2, MT8183_MDP_COMP_WPEO},
+               {MDP_COMP_TYPE_EXTO, 2, MT8183_MDP_COMP_WPEO, MDP_MM_SUBSYS_0},
                {0, 0, 0}
        },
        [MDP_COMP_WPEI2] = {
-               {MDP_COMP_TYPE_WPEI, 1, MT8183_MDP_COMP_WPEI2},
+               {MDP_COMP_TYPE_WPEI, 1, MT8183_MDP_COMP_WPEI2, MDP_MM_SUBSYS_0},
                {0, 0, 0}
        },
        [MDP_COMP_WPEO2] = {
-               {MDP_COMP_TYPE_EXTO, 3, MT8183_MDP_COMP_WPEO2},
+               {MDP_COMP_TYPE_EXTO, 3, MT8183_MDP_COMP_WPEO2, MDP_MM_SUBSYS_0},
                {0, 0, 0}
        },
        [MDP_COMP_ISP_IMGI] = {
-               {MDP_COMP_TYPE_IMGI, 0, MT8183_MDP_COMP_ISP_IMGI},
+               {MDP_COMP_TYPE_IMGI, 0, MT8183_MDP_COMP_ISP_IMGI, MDP_MM_SUBSYS_0},
                {0, 0, 4}
        },
        [MDP_COMP_ISP_IMGO] = {
-               {MDP_COMP_TYPE_EXTO, 0, MT8183_MDP_COMP_ISP_IMGO},
+               {MDP_COMP_TYPE_EXTO, 0, MT8183_MDP_COMP_ISP_IMGO, MDP_MM_SUBSYS_0},
                {0, 0, 4}
        },
        [MDP_COMP_ISP_IMG2O] = {
-               {MDP_COMP_TYPE_EXTO, 1, MT8183_MDP_COMP_ISP_IMG2O},
+               {MDP_COMP_TYPE_EXTO, 1, MT8183_MDP_COMP_ISP_IMG2O, MDP_MM_SUBSYS_0},
                {0, 0, 0}
        },
        [MDP_COMP_CAMIN] = {
-               {MDP_COMP_TYPE_DL_PATH, 0, MT8183_MDP_COMP_CAMIN},
+               {MDP_COMP_TYPE_DL_PATH, 0, MT8183_MDP_COMP_CAMIN, MDP_MM_SUBSYS_0},
                {2, 2, 1}
        },
        [MDP_COMP_CAMIN2] = {
-               {MDP_COMP_TYPE_DL_PATH, 1, MT8183_MDP_COMP_CAMIN2},
+               {MDP_COMP_TYPE_DL_PATH, 1, MT8183_MDP_COMP_CAMIN2, MDP_MM_SUBSYS_0},
                {2, 4, 1}
        },
        [MDP_COMP_RDMA0] = {
-               {MDP_COMP_TYPE_RDMA, 0, MT8183_MDP_COMP_RDMA0},
+               {MDP_COMP_TYPE_RDMA, 0, MT8183_MDP_COMP_RDMA0, MDP_MM_SUBSYS_0},
                {2, 0, 0}
        },
        [MDP_COMP_CCORR0] = {
-               {MDP_COMP_TYPE_CCORR, 0, MT8183_MDP_COMP_CCORR0},
+               {MDP_COMP_TYPE_CCORR, 0, MT8183_MDP_COMP_CCORR0, MDP_MM_SUBSYS_0},
                {1, 0, 0}
        },
        [MDP_COMP_RSZ0] = {
-               {MDP_COMP_TYPE_RSZ, 0, MT8183_MDP_COMP_RSZ0},
+               {MDP_COMP_TYPE_RSZ, 0, MT8183_MDP_COMP_RSZ0, MDP_MM_SUBSYS_0},
                {1, 0, 0}
        },
        [MDP_COMP_RSZ1] = {
-               {MDP_COMP_TYPE_RSZ, 1, MT8183_MDP_COMP_RSZ1},
+               {MDP_COMP_TYPE_RSZ, 1, MT8183_MDP_COMP_RSZ1, MDP_MM_SUBSYS_0},
                {1, 0, 0}
        },
        [MDP_COMP_TDSHP0] = {
-               {MDP_COMP_TYPE_TDSHP, 0, MT8183_MDP_COMP_TDSHP0},
+               {MDP_COMP_TYPE_TDSHP, 0, MT8183_MDP_COMP_TDSHP0, MDP_MM_SUBSYS_0},
                {0, 0, 0}
        },
        [MDP_COMP_PATH0_SOUT] = {
-               {MDP_COMP_TYPE_PATH, 0, MT8183_MDP_COMP_PATH0_SOUT},
+               {MDP_COMP_TYPE_PATH, 0, MT8183_MDP_COMP_PATH0_SOUT, MDP_MM_SUBSYS_0},
                {0, 0, 0}
        },
        [MDP_COMP_PATH1_SOUT] = {
-               {MDP_COMP_TYPE_PATH, 1, MT8183_MDP_COMP_PATH1_SOUT},
+               {MDP_COMP_TYPE_PATH, 1, MT8183_MDP_COMP_PATH1_SOUT, MDP_MM_SUBSYS_0},
                {0, 0, 0}
        },
        [MDP_COMP_WROT0] = {
-               {MDP_COMP_TYPE_WROT, 0, MT8183_MDP_COMP_WROT0},
+               {MDP_COMP_TYPE_WROT, 0, MT8183_MDP_COMP_WROT0, MDP_MM_SUBSYS_0},
                {1, 0, 0}
        },
        [MDP_COMP_WDMA] = {
-               {MDP_COMP_TYPE_WDMA, 0, MT8183_MDP_COMP_WDMA},
+               {MDP_COMP_TYPE_WDMA, 0, MT8183_MDP_COMP_WDMA, MDP_MM_SUBSYS_0},
                {1, 0, 0}
        },
 };
 
+static const struct mdp_comp_data mt8195_mdp_comp_data[MDP_MAX_COMP_COUNT] = {
+       [MDP_COMP_WPEI] = {
+               {MDP_COMP_TYPE_WPEI, 0, MT8195_MDP_COMP_WPEI, MDP_MM_SUBSYS_0},
+               {0, 0, 0}
+       },
+       [MDP_COMP_WPEO] = {
+               {MDP_COMP_TYPE_EXTO, 2, MT8195_MDP_COMP_WPEO, MDP_MM_SUBSYS_0},
+               {0, 0, 0}
+       },
+       [MDP_COMP_WPEI2] = {
+               {MDP_COMP_TYPE_WPEI, 1, MT8195_MDP_COMP_WPEI2, MDP_MM_SUBSYS_0},
+               {0, 0, 0}
+       },
+       [MDP_COMP_WPEO2] = {
+               {MDP_COMP_TYPE_EXTO, 3, MT8195_MDP_COMP_WPEO2, MDP_MM_SUBSYS_0},
+               {0, 0, 0}
+       },
+       [MDP_COMP_CAMIN] = {
+               {MDP_COMP_TYPE_DL_PATH, 0, MT8195_MDP_COMP_CAMIN, MDP_MM_SUBSYS_0},
+               {3, 3, 0}
+       },
+       [MDP_COMP_CAMIN2] = {
+               {MDP_COMP_TYPE_DL_PATH, 1, MT8195_MDP_COMP_CAMIN2, MDP_MM_SUBSYS_0},
+               {3, 6, 0}
+       },
+       [MDP_COMP_SPLIT] = {
+               {MDP_COMP_TYPE_SPLIT, 0, MT8195_MDP_COMP_SPLIT, MDP_MM_SUBSYS_1},
+               {7, 0, 0}
+       },
+       [MDP_COMP_SPLIT2] = {
+               {MDP_COMP_TYPE_SPLIT, 1, MT8195_MDP_COMP_SPLIT2, MDP_MM_SUBSYS_1},
+               {7, 0, 0}
+       },
+       [MDP_COMP_RDMA0] = {
+               {MDP_COMP_TYPE_RDMA, 0, MT8195_MDP_COMP_RDMA0, MDP_MM_SUBSYS_0},
+               {3, 0, 0}
+       },
+       [MDP_COMP_RDMA1] = {
+               {MDP_COMP_TYPE_RDMA, 1, MT8195_MDP_COMP_RDMA1, MDP_MM_SUBSYS_1},
+               {3, 0, 0}
+       },
+       [MDP_COMP_RDMA2] = {
+               {MDP_COMP_TYPE_RDMA, 2, MT8195_MDP_COMP_RDMA2, MDP_MM_SUBSYS_1},
+               {3, 0, 0}
+       },
+       [MDP_COMP_RDMA3] = {
+               {MDP_COMP_TYPE_RDMA, 3, MT8195_MDP_COMP_RDMA3, MDP_MM_SUBSYS_1},
+               {3, 0, 0}
+       },
+       [MDP_COMP_STITCH] = {
+               {MDP_COMP_TYPE_STITCH, 0, MT8195_MDP_COMP_STITCH, MDP_MM_SUBSYS_0},
+               {1, 0, 0}
+       },
+       [MDP_COMP_FG0] = {
+               {MDP_COMP_TYPE_FG, 0, MT8195_MDP_COMP_FG0, MDP_MM_SUBSYS_0},
+               {1, 0, 0}
+       },
+       [MDP_COMP_FG1] = {
+               {MDP_COMP_TYPE_FG, 1, MT8195_MDP_COMP_FG1, MDP_MM_SUBSYS_1},
+               {1, 0, 0}
+       },
+       [MDP_COMP_FG2] = {
+               {MDP_COMP_TYPE_FG, 2, MT8195_MDP_COMP_FG2, MDP_MM_SUBSYS_1},
+               {1, 0, 0}
+       },
+       [MDP_COMP_FG3] = {
+               {MDP_COMP_TYPE_FG, 3, MT8195_MDP_COMP_FG3, MDP_MM_SUBSYS_1},
+               {1, 0, 0}
+       },
+       [MDP_COMP_HDR0] = {
+               {MDP_COMP_TYPE_HDR, 0, MT8195_MDP_COMP_HDR0, MDP_MM_SUBSYS_0},
+               {1, 0, 0}
+       },
+       [MDP_COMP_HDR1] = {
+               {MDP_COMP_TYPE_HDR, 1, MT8195_MDP_COMP_HDR1, MDP_MM_SUBSYS_1},
+               {1, 0, 0}
+       },
+       [MDP_COMP_HDR2] = {
+               {MDP_COMP_TYPE_HDR, 2, MT8195_MDP_COMP_HDR2, MDP_MM_SUBSYS_1},
+               {1, 0, 0}
+       },
+       [MDP_COMP_HDR3] = {
+               {MDP_COMP_TYPE_HDR, 3, MT8195_MDP_COMP_HDR3, MDP_MM_SUBSYS_1},
+               {1, 0, 0}
+       },
+       [MDP_COMP_AAL0] = {
+               {MDP_COMP_TYPE_AAL, 0, MT8195_MDP_COMP_AAL0, MDP_MM_SUBSYS_0},
+               {1, 0, 0}
+       },
+       [MDP_COMP_AAL1] = {
+               {MDP_COMP_TYPE_AAL, 1, MT8195_MDP_COMP_AAL1, MDP_MM_SUBSYS_1},
+               {1, 0, 0}
+       },
+       [MDP_COMP_AAL2] = {
+               {MDP_COMP_TYPE_AAL, 2, MT8195_MDP_COMP_AAL2, MDP_MM_SUBSYS_1},
+               {1, 0, 0}
+       },
+       [MDP_COMP_AAL3] = {
+               {MDP_COMP_TYPE_AAL, 3, MT8195_MDP_COMP_AAL3, MDP_MM_SUBSYS_1},
+               {1, 0, 0}
+       },
+       [MDP_COMP_RSZ0] = {
+               {MDP_COMP_TYPE_RSZ, 0, MT8195_MDP_COMP_RSZ0, MDP_MM_SUBSYS_0},
+               {1, 0, 0}
+       },
+       [MDP_COMP_RSZ1] = {
+               {MDP_COMP_TYPE_RSZ, 1, MT8195_MDP_COMP_RSZ1, MDP_MM_SUBSYS_1},
+               {1, 0, 0}
+       },
+       [MDP_COMP_RSZ2] = {
+               {MDP_COMP_TYPE_RSZ, 2, MT8195_MDP_COMP_RSZ2, MDP_MM_SUBSYS_1},
+               {2, 0, 0},
+               {MDP_COMP_MERGE2, true, true}
+       },
+       [MDP_COMP_RSZ3] = {
+               {MDP_COMP_TYPE_RSZ, 3, MT8195_MDP_COMP_RSZ3, MDP_MM_SUBSYS_1},
+               {2, 0, 0},
+               {MDP_COMP_MERGE3, true, true}
+       },
+       [MDP_COMP_TDSHP0] = {
+               {MDP_COMP_TYPE_TDSHP, 0, MT8195_MDP_COMP_TDSHP0, MDP_MM_SUBSYS_0},
+               {1, 0, 0}
+       },
+       [MDP_COMP_TDSHP1] = {
+               {MDP_COMP_TYPE_TDSHP, 1, MT8195_MDP_COMP_TDSHP1, MDP_MM_SUBSYS_1},
+               {1, 0, 0}
+       },
+       [MDP_COMP_TDSHP2] = {
+               {MDP_COMP_TYPE_TDSHP, 2, MT8195_MDP_COMP_TDSHP2, MDP_MM_SUBSYS_1},
+               {1, 0, 0}
+       },
+       [MDP_COMP_TDSHP3] = {
+               {MDP_COMP_TYPE_TDSHP, 3, MT8195_MDP_COMP_TDSHP3, MDP_MM_SUBSYS_1},
+               {1, 0, 0}
+       },
+       [MDP_COMP_COLOR0] = {
+               {MDP_COMP_TYPE_COLOR, 0, MT8195_MDP_COMP_COLOR0, MDP_MM_SUBSYS_0},
+               {1, 0, 0}
+       },
+       [MDP_COMP_COLOR1] = {
+               {MDP_COMP_TYPE_COLOR, 1, MT8195_MDP_COMP_COLOR1, MDP_MM_SUBSYS_1},
+               {1, 0, 0}
+       },
+       [MDP_COMP_COLOR2] = {
+               {MDP_COMP_TYPE_COLOR, 2, MT8195_MDP_COMP_COLOR2, MDP_MM_SUBSYS_1},
+               {1, 0, 0}
+       },
+       [MDP_COMP_COLOR3] = {
+               {MDP_COMP_TYPE_COLOR, 3, MT8195_MDP_COMP_COLOR3, MDP_MM_SUBSYS_1},
+               {1, 0, 0}
+       },
+       [MDP_COMP_OVL0] = {
+               {MDP_COMP_TYPE_OVL, 0, MT8195_MDP_COMP_OVL0, MDP_MM_SUBSYS_0},
+               {1, 0, 0}
+       },
+       [MDP_COMP_OVL1] = {
+               {MDP_COMP_TYPE_OVL, 1, MT8195_MDP_COMP_OVL1, MDP_MM_SUBSYS_1},
+               {1, 0, 0}
+       },
+       [MDP_COMP_PAD0] = {
+               {MDP_COMP_TYPE_PAD, 0, MT8195_MDP_COMP_PAD0, MDP_MM_SUBSYS_0},
+               {1, 0, 0}
+       },
+       [MDP_COMP_PAD1] = {
+               {MDP_COMP_TYPE_PAD, 1, MT8195_MDP_COMP_PAD1, MDP_MM_SUBSYS_1},
+               {1, 0, 0}
+       },
+       [MDP_COMP_PAD2] = {
+               {MDP_COMP_TYPE_PAD, 2, MT8195_MDP_COMP_PAD2, MDP_MM_SUBSYS_1},
+               {1, 0, 0}
+       },
+       [MDP_COMP_PAD3] = {
+               {MDP_COMP_TYPE_PAD, 3, MT8195_MDP_COMP_PAD3, MDP_MM_SUBSYS_1},
+               {1, 0, 0}
+       },
+       [MDP_COMP_TCC0] = {
+               {MDP_COMP_TYPE_TCC, 0, MT8195_MDP_COMP_TCC0, MDP_MM_SUBSYS_0},
+               {1, 0, 0}
+       },
+       [MDP_COMP_TCC1] = {
+               {MDP_COMP_TYPE_TCC, 1, MT8195_MDP_COMP_TCC1, MDP_MM_SUBSYS_1},
+               {1, 0, 0}
+       },
+       [MDP_COMP_WROT0] = {
+               {MDP_COMP_TYPE_WROT, 0, MT8195_MDP_COMP_WROT0, MDP_MM_SUBSYS_0},
+               {1, 0, 0}
+       },
+       [MDP_COMP_WROT1] = {
+               {MDP_COMP_TYPE_WROT, 1, MT8195_MDP_COMP_WROT1, MDP_MM_SUBSYS_1},
+               {1, 0, 0}
+       },
+       [MDP_COMP_WROT2] = {
+               {MDP_COMP_TYPE_WROT, 2, MT8195_MDP_COMP_WROT2, MDP_MM_SUBSYS_1},
+               {1, 0, 0}
+       },
+       [MDP_COMP_WROT3] = {
+               {MDP_COMP_TYPE_WROT, 3, MT8195_MDP_COMP_WROT3, MDP_MM_SUBSYS_1},
+               {1, 0, 0}
+       },
+       [MDP_COMP_MERGE2] = {
+               {MDP_COMP_TYPE_MERGE, 0, MT8195_MDP_COMP_MERGE2, MDP_MM_SUBSYS_1},
+               {1, 0, 0}
+       },
+       [MDP_COMP_MERGE3] = {
+               {MDP_COMP_TYPE_MERGE, 1, MT8195_MDP_COMP_MERGE3, MDP_MM_SUBSYS_1},
+               {1, 0, 0}
+       },
+       [MDP_COMP_PQ0_SOUT] = {
+               {MDP_COMP_TYPE_DUMMY, 0, MT8195_MDP_COMP_PQ0_SOUT, MDP_MM_SUBSYS_0},
+               {0, 0, 0}
+       },
+       [MDP_COMP_PQ1_SOUT] = {
+               {MDP_COMP_TYPE_DUMMY, 1, MT8195_MDP_COMP_PQ1_SOUT, MDP_MM_SUBSYS_1},
+               {0, 0, 0}
+       },
+       [MDP_COMP_TO_WARP0MOUT] = {
+               {MDP_COMP_TYPE_DUMMY, 2, MT8195_MDP_COMP_TO_WARP0MOUT, MDP_MM_SUBSYS_0},
+               {0, 0, 0}
+       },
+       [MDP_COMP_TO_WARP1MOUT] = {
+               {MDP_COMP_TYPE_DUMMY, 3, MT8195_MDP_COMP_TO_WARP1MOUT, MDP_MM_SUBSYS_0},
+               {0, 0, 0}
+       },
+       [MDP_COMP_TO_SVPP2MOUT] = {
+               {MDP_COMP_TYPE_DUMMY, 4, MT8195_MDP_COMP_TO_SVPP2MOUT, MDP_MM_SUBSYS_1},
+               {0, 0, 0}
+       },
+       [MDP_COMP_TO_SVPP3MOUT] = {
+               {MDP_COMP_TYPE_DUMMY, 5, MT8195_MDP_COMP_TO_SVPP3MOUT, MDP_MM_SUBSYS_1},
+               {0, 0, 0}
+       },
+       [MDP_COMP_VPP0_SOUT] = {
+               {MDP_COMP_TYPE_PATH, 0, MT8195_MDP_COMP_VPP0_SOUT, MDP_MM_SUBSYS_1},
+               {4, 9, 0}
+       },
+       [MDP_COMP_VPP1_SOUT] = {
+               {MDP_COMP_TYPE_PATH, 1, MT8195_MDP_COMP_VPP1_SOUT, MDP_MM_SUBSYS_0},
+               {2, 13, 0}
+       },
+       [MDP_COMP_VDO0DL0] = {
+               {MDP_COMP_TYPE_DL_PATH, 0, MT8195_MDP_COMP_VDO0DL0, MDP_MM_SUBSYS_1},
+               {1, 15, 0}
+       },
+       [MDP_COMP_VDO1DL0] = {
+               {MDP_COMP_TYPE_DL_PATH, 0, MT8195_MDP_COMP_VDO1DL0, MDP_MM_SUBSYS_1},
+               {1, 17, 0}
+       },
+       [MDP_COMP_VDO0DL1] = {
+               {MDP_COMP_TYPE_DL_PATH, 0, MT8195_MDP_COMP_VDO0DL1, MDP_MM_SUBSYS_1},
+               {1, 18, 0}
+       },
+       [MDP_COMP_VDO1DL1] = {
+               {MDP_COMP_TYPE_DL_PATH, 0, MT8195_MDP_COMP_VDO1DL1, MDP_MM_SUBSYS_1},
+               {1, 16, 0}
+       },
+};
+
 static const struct of_device_id mt8183_sub_comp_dt_ids[] = {
        {
                .compatible = "mediatek,mt8183-mdp3-wdma",
@@ -157,6 +556,10 @@ static const struct of_device_id mt8183_sub_comp_dt_ids[] = {
        {}
 };
 
+static const struct of_device_id mt8195_sub_comp_dt_ids[] = {
+       {}
+};
+
 /*
  * All 10-bit related formats are not added in the basic format list,
  * please add the corresponding format settings before use.
@@ -382,6 +785,222 @@ static const struct mdp_format mt8183_formats[] = {
        }
 };
 
+static const struct mdp_format mt8195_formats[] = {
+       {
+               .pixelformat    = V4L2_PIX_FMT_GREY,
+               .mdp_color      = MDP_COLOR_GREY,
+               .depth          = { 8 },
+               .row_depth      = { 8 },
+               .num_planes     = 1,
+               .flags          = MDP_FMT_FLAG_OUTPUT | MDP_FMT_FLAG_CAPTURE,
+       }, {
+               .pixelformat    = V4L2_PIX_FMT_RGB565X,
+               .mdp_color      = MDP_COLOR_BGR565,
+               .depth          = { 16 },
+               .row_depth      = { 16 },
+               .num_planes     = 1,
+               .flags          = MDP_FMT_FLAG_OUTPUT | MDP_FMT_FLAG_CAPTURE,
+       }, {
+               .pixelformat    = V4L2_PIX_FMT_RGB565,
+               .mdp_color      = MDP_COLOR_RGB565,
+               .depth          = { 16 },
+               .row_depth      = { 16 },
+               .num_planes     = 1,
+               .flags          = MDP_FMT_FLAG_OUTPUT | MDP_FMT_FLAG_CAPTURE,
+       }, {
+               .pixelformat    = V4L2_PIX_FMT_RGB24,
+               .mdp_color      = MDP_COLOR_RGB888,
+               .depth          = { 24 },
+               .row_depth      = { 24 },
+               .num_planes     = 1,
+               .flags          = MDP_FMT_FLAG_OUTPUT | MDP_FMT_FLAG_CAPTURE,
+       }, {
+               .pixelformat    = V4L2_PIX_FMT_BGR24,
+               .mdp_color      = MDP_COLOR_BGR888,
+               .depth          = { 24 },
+               .row_depth      = { 24 },
+               .num_planes     = 1,
+               .flags          = MDP_FMT_FLAG_OUTPUT | MDP_FMT_FLAG_CAPTURE,
+       }, {
+               .pixelformat    = V4L2_PIX_FMT_ABGR32,
+               .mdp_color      = MDP_COLOR_BGRA8888,
+               .depth          = { 32 },
+               .row_depth      = { 32 },
+               .num_planes     = 1,
+               .flags          = MDP_FMT_FLAG_OUTPUT | MDP_FMT_FLAG_CAPTURE,
+       }, {
+               .pixelformat    = V4L2_PIX_FMT_ARGB32,
+               .mdp_color      = MDP_COLOR_ARGB8888,
+               .depth          = { 32 },
+               .row_depth      = { 32 },
+               .num_planes     = 1,
+               .flags          = MDP_FMT_FLAG_OUTPUT | MDP_FMT_FLAG_CAPTURE,
+       }, {
+               .pixelformat    = V4L2_PIX_FMT_UYVY,
+               .mdp_color      = MDP_COLOR_UYVY,
+               .depth          = { 16 },
+               .row_depth      = { 16 },
+               .num_planes     = 1,
+               .walign         = 1,
+               .flags          = MDP_FMT_FLAG_OUTPUT | MDP_FMT_FLAG_CAPTURE,
+       }, {
+               .pixelformat    = V4L2_PIX_FMT_VYUY,
+               .mdp_color      = MDP_COLOR_VYUY,
+               .depth          = { 16 },
+               .row_depth      = { 16 },
+               .num_planes     = 1,
+               .walign         = 1,
+               .flags          = MDP_FMT_FLAG_OUTPUT | MDP_FMT_FLAG_CAPTURE,
+       }, {
+               .pixelformat    = V4L2_PIX_FMT_YUYV,
+               .mdp_color      = MDP_COLOR_YUYV,
+               .depth          = { 16 },
+               .row_depth      = { 16 },
+               .num_planes     = 1,
+               .walign         = 1,
+               .flags          = MDP_FMT_FLAG_OUTPUT | MDP_FMT_FLAG_CAPTURE,
+       }, {
+               .pixelformat    = V4L2_PIX_FMT_YVYU,
+               .mdp_color      = MDP_COLOR_YVYU,
+               .depth          = { 16 },
+               .row_depth      = { 16 },
+               .num_planes     = 1,
+               .walign         = 1,
+               .flags          = MDP_FMT_FLAG_OUTPUT | MDP_FMT_FLAG_CAPTURE,
+       }, {
+               .pixelformat    = V4L2_PIX_FMT_YUV420,
+               .mdp_color      = MDP_COLOR_I420,
+               .depth          = { 12 },
+               .row_depth      = { 8 },
+               .num_planes     = 1,
+               .walign         = 1,
+               .halign         = 1,
+               .flags          = MDP_FMT_FLAG_OUTPUT | MDP_FMT_FLAG_CAPTURE,
+       }, {
+               .pixelformat    = V4L2_PIX_FMT_YVU420,
+               .mdp_color      = MDP_COLOR_YV12,
+               .depth          = { 12 },
+               .row_depth      = { 8 },
+               .num_planes     = 1,
+               .walign         = 1,
+               .halign         = 1,
+               .flags          = MDP_FMT_FLAG_OUTPUT | MDP_FMT_FLAG_CAPTURE,
+       }, {
+               .pixelformat    = V4L2_PIX_FMT_NV12,
+               .mdp_color      = MDP_COLOR_NV12,
+               .depth          = { 12 },
+               .row_depth      = { 8 },
+               .num_planes     = 1,
+               .walign         = 1,
+               .halign         = 1,
+               .flags          = MDP_FMT_FLAG_OUTPUT | MDP_FMT_FLAG_CAPTURE,
+       }, {
+               .pixelformat    = V4L2_PIX_FMT_NV21,
+               .mdp_color      = MDP_COLOR_NV21,
+               .depth          = { 12 },
+               .row_depth      = { 8 },
+               .num_planes     = 1,
+               .walign         = 1,
+               .halign         = 1,
+               .flags          = MDP_FMT_FLAG_OUTPUT | MDP_FMT_FLAG_CAPTURE,
+       }, {
+               .pixelformat    = V4L2_PIX_FMT_NV16,
+               .mdp_color      = MDP_COLOR_NV16,
+               .depth          = { 16 },
+               .row_depth      = { 8 },
+               .num_planes     = 1,
+               .walign         = 1,
+               .flags          = MDP_FMT_FLAG_OUTPUT | MDP_FMT_FLAG_CAPTURE,
+       }, {
+               .pixelformat    = V4L2_PIX_FMT_NV61,
+               .mdp_color      = MDP_COLOR_NV61,
+               .depth          = { 16 },
+               .row_depth      = { 8 },
+               .num_planes     = 1,
+               .walign         = 1,
+               .flags          = MDP_FMT_FLAG_OUTPUT | MDP_FMT_FLAG_CAPTURE,
+       }, {
+               .pixelformat    = V4L2_PIX_FMT_NV12M,
+               .mdp_color      = MDP_COLOR_NV12,
+               .depth          = { 8, 4 },
+               .row_depth      = { 8, 8 },
+               .num_planes     = 2,
+               .walign         = 1,
+               .halign         = 1,
+               .flags          = MDP_FMT_FLAG_OUTPUT | MDP_FMT_FLAG_CAPTURE,
+       }, {
+               .pixelformat    = V4L2_PIX_FMT_MM21,
+               .mdp_color      = MDP_COLOR_420_BLK,
+               .depth          = { 8, 4 },
+               .row_depth      = { 8, 8 },
+               .num_planes     = 2,
+               .walign         = 6,
+               .halign         = 6,
+               .flags          = MDP_FMT_FLAG_OUTPUT,
+       }, {
+               .pixelformat    = V4L2_PIX_FMT_NV21M,
+               .mdp_color      = MDP_COLOR_NV21,
+               .depth          = { 8, 4 },
+               .row_depth      = { 8, 8 },
+               .num_planes     = 2,
+               .walign         = 1,
+               .halign         = 1,
+               .flags          = MDP_FMT_FLAG_OUTPUT | MDP_FMT_FLAG_CAPTURE,
+       }, {
+               .pixelformat    = V4L2_PIX_FMT_NV16M,
+               .mdp_color      = MDP_COLOR_NV16,
+               .depth          = { 8, 8 },
+               .row_depth      = { 8, 8 },
+               .num_planes     = 2,
+               .walign         = 1,
+               .flags          = MDP_FMT_FLAG_OUTPUT | MDP_FMT_FLAG_CAPTURE,
+       }, {
+               .pixelformat    = V4L2_PIX_FMT_NV61M,
+               .mdp_color      = MDP_COLOR_NV61,
+               .depth          = { 8, 8 },
+               .row_depth      = { 8, 8 },
+               .num_planes     = 2,
+               .walign         = 1,
+               .flags          = MDP_FMT_FLAG_OUTPUT | MDP_FMT_FLAG_CAPTURE,
+       }, {
+               .pixelformat    = V4L2_PIX_FMT_YUV420M,
+               .mdp_color      = MDP_COLOR_I420,
+               .depth          = { 8, 2, 2 },
+               .row_depth      = { 8, 4, 4 },
+               .num_planes     = 3,
+               .walign         = 1,
+               .halign         = 1,
+               .flags          = MDP_FMT_FLAG_OUTPUT | MDP_FMT_FLAG_CAPTURE,
+       }, {
+               .pixelformat    = V4L2_PIX_FMT_YVU420M,
+               .mdp_color      = MDP_COLOR_YV12,
+               .depth          = { 8, 2, 2 },
+               .row_depth      = { 8, 4, 4 },
+               .num_planes     = 3,
+               .walign         = 1,
+               .halign         = 1,
+               .flags          = MDP_FMT_FLAG_OUTPUT | MDP_FMT_FLAG_CAPTURE,
+       }, {
+               .pixelformat    = V4L2_PIX_FMT_YUV422M,
+               .mdp_color      = MDP_COLOR_I422,
+               .depth          = { 8, 4, 4 },
+               .row_depth      = { 8, 4, 4 },
+               .num_planes     = 3,
+               .walign         = 1,
+               .halign         = 1,
+               .flags          = MDP_FMT_FLAG_OUTPUT | MDP_FMT_FLAG_CAPTURE,
+       }, {
+               .pixelformat    = V4L2_PIX_FMT_YVU422M,
+               .mdp_color      = MDP_COLOR_YV16,
+               .depth          = { 8, 4, 4 },
+               .row_depth      = { 8, 4, 4 },
+               .num_planes     = 3,
+               .walign         = 1,
+               .halign         = 1,
+               .flags          = MDP_FMT_FLAG_OUTPUT | MDP_FMT_FLAG_CAPTURE,
+       }
+};
+
 static const struct mdp_limit mt8183_mdp_def_limit = {
        .out_limit = {
                .wmin   = 16,
@@ -401,15 +1020,54 @@ static const struct mdp_limit mt8183_mdp_def_limit = {
        .v_scale_down_max = 128,
 };
 
+static const struct mdp_limit mt8195_mdp_def_limit = {
+       .out_limit = {
+               .wmin   = 64,
+               .hmin   = 64,
+               .wmax   = 8192,
+               .hmax   = 8192,
+       },
+       .cap_limit = {
+               .wmin   = 64,
+               .hmin   = 64,
+               .wmax   = 8192,
+               .hmax   = 8192,
+       },
+       .h_scale_up_max = 64,
+       .v_scale_up_max = 64,
+       .h_scale_down_max = 128,
+       .v_scale_down_max = 128,
+};
+
 static const struct mdp_pipe_info mt8183_pipe_info[] = {
-       [MDP_PIPE_WPEI] = {MDP_PIPE_WPEI, 0},
-       [MDP_PIPE_WPEI2] = {MDP_PIPE_WPEI2, 1},
-       [MDP_PIPE_IMGI] = {MDP_PIPE_IMGI, 2},
-       [MDP_PIPE_RDMA0] = {MDP_PIPE_RDMA0, 3}
+       [MDP_PIPE_WPEI] = {MDP_PIPE_WPEI, MDP_MM_SUBSYS_0, 0},
+       [MDP_PIPE_WPEI2] = {MDP_PIPE_WPEI2, MDP_MM_SUBSYS_0, 1},
+       [MDP_PIPE_IMGI] = {MDP_PIPE_IMGI, MDP_MM_SUBSYS_0, 2},
+       [MDP_PIPE_RDMA0] = {MDP_PIPE_RDMA0, MDP_MM_SUBSYS_0, 3}
+};
+
+static const struct mdp_pipe_info mt8195_pipe_info[] = {
+       [MDP_PIPE_WPEI] = {MDP_PIPE_WPEI, MDP_MM_SUBSYS_0, 0},
+       [MDP_PIPE_WPEI2] = {MDP_PIPE_WPEI2, MDP_MM_SUBSYS_0, 1},
+       [MDP_PIPE_IMGI] = {MDP_PIPE_IMGI, MDP_MM_SUBSYS_0, 2},
+       [MDP_PIPE_RDMA0] = {MDP_PIPE_RDMA0, MDP_MM_SUBSYS_0, 3},
+       [MDP_PIPE_RDMA1] = {MDP_PIPE_RDMA1, MDP_MM_SUBSYS_1, 0},
+       [MDP_PIPE_RDMA2] = {MDP_PIPE_RDMA2, MDP_MM_SUBSYS_1, 1},
+       [MDP_PIPE_RDMA3] = {MDP_PIPE_RDMA3, MDP_MM_SUBSYS_1, 2},
+       [MDP_PIPE_SPLIT] = {MDP_PIPE_SPLIT, MDP_MM_SUBSYS_1, 3},
+       [MDP_PIPE_SPLIT2] = {MDP_PIPE_SPLIT2, MDP_MM_SUBSYS_1, 4},
+       [MDP_PIPE_VPP1_SOUT] = {MDP_PIPE_VPP1_SOUT, MDP_MM_SUBSYS_0, 4},
+       [MDP_PIPE_VPP0_SOUT] = {MDP_PIPE_VPP0_SOUT, MDP_MM_SUBSYS_1, 5},
+};
+
+static const struct v4l2_rect mt8195_mdp_pp_criteria = {
+       .width = 1920,
+       .height = 1080,
 };
 
 const struct mtk_mdp_driver_data mt8183_mdp_driver_data = {
        .mdp_plat_id = MT8183,
+       .mdp_con_res = 0x14001000,
        .mdp_probe_infra = mt8183_mdp_probe_infra,
        .mdp_cfg = &mt8183_plat_cfg,
        .mdp_mutex_table_idx = mt8183_mutex_idx,
@@ -421,6 +1079,25 @@ const struct mtk_mdp_driver_data mt8183_mdp_driver_data = {
        .def_limit = &mt8183_mdp_def_limit,
        .pipe_info = mt8183_pipe_info,
        .pipe_info_len = ARRAY_SIZE(mt8183_pipe_info),
+       .pp_used = MDP_PP_USED_1,
+};
+
+const struct mtk_mdp_driver_data mt8195_mdp_driver_data = {
+       .mdp_plat_id = MT8195,
+       .mdp_con_res = 0x14001000,
+       .mdp_probe_infra = mt8195_mdp_probe_infra,
+       .mdp_sub_comp_dt_ids = mt8195_sub_comp_dt_ids,
+       .mdp_cfg = &mt8195_plat_cfg,
+       .mdp_mutex_table_idx = mt8195_mutex_idx,
+       .comp_data = mt8195_mdp_comp_data,
+       .comp_data_len = ARRAY_SIZE(mt8195_mdp_comp_data),
+       .format = mt8195_formats,
+       .format_len = ARRAY_SIZE(mt8195_formats),
+       .def_limit = &mt8195_mdp_def_limit,
+       .pipe_info = mt8195_pipe_info,
+       .pipe_info_len = ARRAY_SIZE(mt8195_pipe_info),
+       .pp_criteria = &mt8195_mdp_pp_criteria,
+       .pp_used = MDP_PP_USED_2,
 };
 
 s32 mdp_cfg_get_id_inner(struct mdp_dev *mdp_dev, enum mtk_mdp_comp_id id)
@@ -451,3 +1128,11 @@ enum mtk_mdp_comp_id mdp_cfg_get_id_public(struct mdp_dev *mdp_dev, s32 inner_id
 err_public_id:
        return public_id;
 }
+
+bool mdp_cfg_comp_is_dummy(struct mdp_dev *mdp_dev, s32 inner_id)
+{
+       enum mtk_mdp_comp_id id = mdp_cfg_get_id_public(mdp_dev, inner_id);
+       enum mdp_comp_type type = mdp_dev->mdp_data->comp_data[id].match.type;
+
+       return (type == MDP_COMP_TYPE_DUMMY);
+}
diff --git a/drivers/media/platform/mediatek/mdp3/mdp_reg_aal.h b/drivers/media/platform/mediatek/mdp3/mdp_reg_aal.h
new file mode 100644 (file)
index 0000000..4b9513e
--- /dev/null
@@ -0,0 +1,25 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
+/*
+ * Copyright (c) 2022 MediaTek Inc.
+ * Author: Ping-Hsun Wu <ping-hsun.wu@mediatek.com>
+ */
+
+#ifndef __MDP_REG_AAL_H__
+#define __MDP_REG_AAL_H__
+
+#define MDP_AAL_EN                     (0x000)
+#define MDP_AAL_CFG                    (0x020)
+#define MDP_AAL_SIZE                   (0x030)
+#define MDP_AAL_OUTPUT_SIZE            (0x034)
+#define MDP_AAL_OUTPUT_OFFSET          (0x038)
+#define MDP_AAL_CFG_MAIN               (0x200)
+
+/* MASK */
+#define MDP_AAL_EN_MASK                        (0x01)
+#define MDP_AAL_CFG_MASK               (0x70FF00B3)
+#define MDP_AAL_SIZE_MASK              (0x1FFF1FFF)
+#define MDP_AAL_OUTPUT_SIZE_MASK       (0x1FFF1FFF)
+#define MDP_AAL_OUTPUT_OFFSET_MASK     (0x0FF00FF)
+#define MDP_AAL_CFG_MAIN_MASK          (0x0FE)
+
+#endif  // __MDP_REG_AAL_H__
diff --git a/drivers/media/platform/mediatek/mdp3/mdp_reg_color.h b/drivers/media/platform/mediatek/mdp3/mdp_reg_color.h
new file mode 100644 (file)
index 0000000..f725039
--- /dev/null
@@ -0,0 +1,31 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
+/*
+ * Copyright (c) 2022 MediaTek Inc.
+ * Author: Ping-Hsun Wu <ping-hsun.wu@mediatek.com>
+ */
+
+#ifndef __MDP_REG_COLOR_H__
+#define __MDP_REG_COLOR_H__
+
+#define MDP_COLOR_WIN_X_MAIN                   (0x40C)
+#define MDP_COLOR_WIN_Y_MAIN                   (0x410)
+#define MDP_COLOR_START                                (0xC00)
+#define MDP_COLOR_INTEN                                (0xC04)
+#define MDP_COLOR_OUT_SEL                      (0xC0C)
+#define MDP_COLOR_INTERNAL_IP_WIDTH            (0xC50)
+#define MDP_COLOR_INTERNAL_IP_HEIGHT           (0xC54)
+#define MDP_COLOR_CM1_EN                       (0xC60)
+#define MDP_COLOR_CM2_EN                       (0xCA0)
+
+/* MASK */
+#define MDP_COLOR_WIN_X_MAIN_MASK              (0xFFFFFFFF)
+#define MDP_COLOR_WIN_Y_MAIN_MASK              (0xFFFFFFFF)
+#define MDP_COLOR_START_MASK                   (0x0FF013F)
+#define MDP_COLOR_INTEN_MASK                   (0x07)
+#define MDP_COLOR_OUT_SEL_MASK                 (0x0777)
+#define MDP_COLOR_INTERNAL_IP_WIDTH_MASK       (0x03FFF)
+#define MDP_COLOR_INTERNAL_IP_HEIGHT_MASK      (0x03FFF)
+#define MDP_COLOR_CM1_EN_MASK                  (0x03)
+#define MDP_COLOR_CM2_EN_MASK                  (0x017)
+
+#endif  // __MDP_REG_COLOR_H__
diff --git a/drivers/media/platform/mediatek/mdp3/mdp_reg_fg.h b/drivers/media/platform/mediatek/mdp3/mdp_reg_fg.h
new file mode 100644 (file)
index 0000000..d90bcad
--- /dev/null
@@ -0,0 +1,23 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
+/*
+ * Copyright (c) 2022 MediaTek Inc.
+ * Author: Ping-Hsun Wu <ping-hsun.wu@mediatek.com>
+ */
+
+#ifndef __MDP_REG_FG_H__
+#define __MDP_REG_FG_H__
+
+#define MDP_FG_TRIGGER                 (0x0)
+#define MDP_FG_FG_CTRL_0               (0x20)
+#define MDP_FG_FG_CK_EN                        (0x24)
+#define MDP_FG_TILE_INFO_0             (0x418)
+#define MDP_FG_TILE_INFO_1             (0x41c)
+
+/* MASK */
+#define MDP_FG_TRIGGER_MASK            (0x00000007)
+#define MDP_FG_FG_CTRL_0_MASK          (0x00000033)
+#define MDP_FG_FG_CK_EN_MASK           (0x0000000F)
+#define MDP_FG_TILE_INFO_0_MASK                (0xFFFFFFFF)
+#define MDP_FG_TILE_INFO_1_MASK                (0xFFFFFFFF)
+
+#endif  //__MDP_REG_FG_H__
diff --git a/drivers/media/platform/mediatek/mdp3/mdp_reg_hdr.h b/drivers/media/platform/mediatek/mdp3/mdp_reg_hdr.h
new file mode 100644 (file)
index 0000000..c19fbba
--- /dev/null
@@ -0,0 +1,31 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
+/*
+ * Copyright (c) 2022 MediaTek Inc.
+ * Author: Ping-Hsun Wu <ping-hsun.wu@mediatek.com>
+ */
+
+#ifndef __MDP_REG_HDR_H__
+#define __MDP_REG_HDR_H__
+
+#define MDP_HDR_TOP                    (0x000)
+#define MDP_HDR_RELAY                  (0x004)
+#define MDP_HDR_SIZE_0                 (0x014)
+#define MDP_HDR_SIZE_1                 (0x018)
+#define MDP_HDR_SIZE_2                 (0x01C)
+#define MDP_HDR_HIST_CTRL_0            (0x020)
+#define MDP_HDR_HIST_CTRL_1            (0x024)
+#define MDP_HDR_HIST_ADDR              (0x0DC)
+#define MDP_HDR_TILE_POS               (0x118)
+
+/* MASK */
+#define MDP_HDR_RELAY_MASK             (0x01)
+#define MDP_HDR_TOP_MASK               (0xFF0FEB6D)
+#define MDP_HDR_SIZE_0_MASK            (0x1FFF1FFF)
+#define MDP_HDR_SIZE_1_MASK            (0x1FFF1FFF)
+#define MDP_HDR_SIZE_2_MASK            (0x1FFF1FFF)
+#define MDP_HDR_HIST_CTRL_0_MASK       (0x1FFF1FFF)
+#define MDP_HDR_HIST_CTRL_1_MASK       (0x1FFF1FFF)
+#define MDP_HDR_HIST_ADDR_MASK         (0xBF3F2F3F)
+#define MDP_HDR_TILE_POS_MASK          (0x1FFF1FFF)
+
+#endif // __MDP_REG_HDR_H__
diff --git a/drivers/media/platform/mediatek/mdp3/mdp_reg_merge.h b/drivers/media/platform/mediatek/mdp3/mdp_reg_merge.h
new file mode 100644 (file)
index 0000000..46be27e
--- /dev/null
@@ -0,0 +1,25 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
+/*
+ * Copyright (c) 2022 MediaTek Inc.
+ * Author: Ping-Hsun Wu <ping-hsun.wu@mediatek.com>
+ */
+
+#ifndef __MDP_REG_MERGE_H__
+#define __MDP_REG_MERGE_H__
+
+#define MDP_MERGE_ENABLE       (0x000)
+#define MDP_MERGE_CFG_0                (0x010)
+#define MDP_MERGE_CFG_4                (0x020)
+#define MDP_MERGE_CFG_12       (0x040)
+#define MDP_MERGE_CFG_24       (0x070)
+#define MDP_MERGE_CFG_25       (0x074)
+
+/* MASK */
+#define MDP_MERGE_ENABLE_MASK  (0xFFFFFFFF)
+#define MDP_MERGE_CFG_0_MASK   (0xFFFFFFFF)
+#define MDP_MERGE_CFG_4_MASK   (0xFFFFFFFF)
+#define MDP_MERGE_CFG_12_MASK  (0xFFFFFFFF)
+#define MDP_MERGE_CFG_24_MASK  (0xFFFFFFFF)
+#define MDP_MERGE_CFG_25_MASK  (0xFFFFFFFF)
+
+#endif //__MDP_REG_MERGE_H__
diff --git a/drivers/media/platform/mediatek/mdp3/mdp_reg_ovl.h b/drivers/media/platform/mediatek/mdp3/mdp_reg_ovl.h
new file mode 100644 (file)
index 0000000..21d2d03
--- /dev/null
@@ -0,0 +1,25 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
+/*
+ * Copyright (c) 2022 MediaTek Inc.
+ * Author: Ping-Hsun Wu <ping-hsun.wu@mediatek.com>
+ */
+
+#ifndef __MDP_REG_OVL_H__
+#define __MDP_REG_OVL_H__
+
+#define MDP_OVL_EN                     (0x00c)
+#define MDP_OVL_ROI_SIZE               (0x020)
+#define MDP_OVL_DP_CON                 (0x024)
+#define MDP_OVL_SRC_CON                        (0x02c)
+#define MDP_OVL_L0_CON                 (0x030)
+#define MDP_OVL_L0_SRC_SIZE            (0x038)
+
+/* MASK */
+#define MDP_OVL_DP_CON_MASK            (0x0FFFFFFF)
+#define MDP_OVL_EN_MASK                        (0xB07D07B1)
+#define MDP_OVL_L0_CON_MASK            (0xFFFFFFFF)
+#define MDP_OVL_L0_SRC_SIZE_MASK       (0x1FFF1FFF)
+#define MDP_OVL_ROI_SIZE_MASK          (0x1FFF1FFF)
+#define MDP_OVL_SRC_CON_MASK           (0x0000031F)
+
+#endif  //__MDP_REG_OVL_H__
diff --git a/drivers/media/platform/mediatek/mdp3/mdp_reg_pad.h b/drivers/media/platform/mediatek/mdp3/mdp_reg_pad.h
new file mode 100644 (file)
index 0000000..0e89f1d
--- /dev/null
@@ -0,0 +1,21 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
+/*
+ * Copyright (c) 2022 MediaTek Inc.
+ * Author: Ping-Hsun Wu <ping-hsun.wu@mediatek.com>
+ */
+
+#ifndef __MDP_REG_PAD_H__
+#define __MDP_REG_PAD_H__
+
+#define MDP_PAD_CON                    (0x000)
+#define MDP_PAD_PIC_SIZE               (0x004)
+#define MDP_PAD_W_SIZE                 (0x008)
+#define MDP_PAD_H_SIZE                 (0x00c)
+
+/* MASK */
+#define MDP_PAD_CON_MASK               (0x00000007)
+#define MDP_PAD_PIC_SIZE_MASK          (0xFFFFFFFF)
+#define MDP_PAD_W_SIZE_MASK            (0x1FFF1FFF)
+#define MDP_PAD_H_SIZE_MASK            (0x1FFF1FFF)
+
+#endif  // __MDP_REG_PAD_H__
index be4065e252d3d19a9fe1cacfc262fc27df276026..0affb2a3b958a4d08aa8364a295b14e10f51bb15 100644 (file)
 #define MDP_RDMA_SRC_OFFSET_2           0x128
 #define MDP_RDMA_SRC_OFFSET_0_P         0x148
 #define MDP_RDMA_TRANSFORM_0            0x200
+#define MDP_RDMA_DMABUF_CON_0           0x240
+#define MDP_RDMA_ULTRA_TH_HIGH_CON_0    0x248
+#define MDP_RDMA_ULTRA_TH_LOW_CON_0     0x250
+#define MDP_RDMA_DMABUF_CON_1           0x258
+#define MDP_RDMA_ULTRA_TH_HIGH_CON_1    0x260
+#define MDP_RDMA_ULTRA_TH_LOW_CON_1     0x268
+#define MDP_RDMA_DMABUF_CON_2           0x270
+#define MDP_RDMA_ULTRA_TH_HIGH_CON_2    0x278
+#define MDP_RDMA_ULTRA_TH_LOW_CON_2     0x280
+#define MDP_RDMA_DMABUF_CON_3           0x288
+#define MDP_RDMA_ULTRA_TH_HIGH_CON_3    0x290
+#define MDP_RDMA_ULTRA_TH_LOW_CON_3     0x298
 #define MDP_RDMA_RESV_DUMMY_0           0x2a0
 #define MDP_RDMA_MON_STA_1              0x408
 #define MDP_RDMA_SRC_BASE_0             0xf00
 #define MDP_RDMA_SRC_OFFSET_2_MASK          0xffffffff
 #define MDP_RDMA_SRC_OFFSET_0_P_MASK        0xffffffff
 #define MDP_RDMA_TRANSFORM_0_MASK           0xff110777
+#define MDP_RDMA_DMABUF_CON_0_MASK          0x0fff00ff
+#define MDP_RDMA_ULTRA_TH_HIGH_CON_0_MASK   0x3fffffff
+#define MDP_RDMA_ULTRA_TH_LOW_CON_0_MASK    0x3fffffff
+#define MDP_RDMA_DMABUF_CON_1_MASK          0x0f7f007f
+#define MDP_RDMA_ULTRA_TH_HIGH_CON_1_MASK   0x3fffffff
+#define MDP_RDMA_ULTRA_TH_LOW_CON_1_MASK    0x3fffffff
+#define MDP_RDMA_DMABUF_CON_2_MASK          0x0f3f003f
+#define MDP_RDMA_ULTRA_TH_HIGH_CON_2_MASK   0x3fffffff
+#define MDP_RDMA_ULTRA_TH_LOW_CON_2_MASK    0x3fffffff
+#define MDP_RDMA_DMABUF_CON_3_MASK          0x0f3f003f
+#define MDP_RDMA_ULTRA_TH_HIGH_CON_3_MASK   0x3fffffff
+#define MDP_RDMA_ULTRA_TH_LOW_CON_3_MASK    0x3fffffff
 #define MDP_RDMA_RESV_DUMMY_0_MASK          0xffffffff
 #define MDP_RDMA_MON_STA_1_MASK             0xffffffff
 #define MDP_RDMA_SRC_BASE_0_MASK            0xffffffff
index 484f6d60641ff5953df5eacbbc6f22d1adfbd614..187531db8e3bd49fd8d62773576a9c58b4ed86d9 100644 (file)
@@ -20,6 +20,7 @@
 #define PRZ_LUMA_VERTICAL_SUBPIXEL_OFFSET                 0x02c
 #define PRZ_CHROMA_HORIZONTAL_INTEGER_OFFSET              0x030
 #define PRZ_CHROMA_HORIZONTAL_SUBPIXEL_OFFSET             0x034
+#define RSZ_ETC_CONTROL                                   0x22c
 
 /* MASK */
 #define PRZ_ENABLE_MASK                                   0x00010001
@@ -35,5 +36,6 @@
 #define PRZ_LUMA_VERTICAL_SUBPIXEL_OFFSET_MASK            0x001fffff
 #define PRZ_CHROMA_HORIZONTAL_INTEGER_OFFSET_MASK         0x0000ffff
 #define PRZ_CHROMA_HORIZONTAL_SUBPIXEL_OFFSET_MASK        0x001fffff
+#define RSZ_ETC_CONTROL_MASK                              0xff770000
 
 #endif // __MDP_REG_RSZ_H__
diff --git a/drivers/media/platform/mediatek/mdp3/mdp_reg_tdshp.h b/drivers/media/platform/mediatek/mdp3/mdp_reg_tdshp.h
new file mode 100644 (file)
index 0000000..83b5f9b
--- /dev/null
@@ -0,0 +1,34 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
+/*
+ * Copyright (c) 2022 MediaTek Inc.
+ * Author: Ping-Hsun Wu <ping-hsun.wu@mediatek.com>
+ */
+
+#ifndef __MDP_REG_TDSHP_H__
+#define __MDP_REG_TDSHP_H__
+
+#define MDP_HIST_CFG_00                                (0x064)
+#define MDP_HIST_CFG_01                                (0x068)
+#define MDP_TDSHP_CTRL                         (0x100)
+#define MDP_TDSHP_CFG                          (0x110)
+#define MDP_TDSHP_INPUT_SIZE                   (0x120)
+#define MDP_TDSHP_OUTPUT_OFFSET                        (0x124)
+#define MDP_TDSHP_OUTPUT_SIZE                  (0x128)
+#define MDP_LUMA_HIST_INIT                     (0x200)
+#define MDP_DC_TWO_D_W1_RESULT_INIT            (0x260)
+#define MDP_CONTOUR_HIST_INIT                  (0x398)
+
+/* MASK */
+#define MDP_HIST_CFG_00_MASK                   (0xFFFFFFFF)
+#define MDP_HIST_CFG_01_MASK                   (0xFFFFFFFF)
+#define MDP_LUMA_HIST_MASK                     (0xFFFFFFFF)
+#define MDP_TDSHP_CTRL_MASK                    (0x07)
+#define MDP_TDSHP_CFG_MASK                     (0x03F7)
+#define MDP_TDSHP_INPUT_SIZE_MASK              (0x1FFF1FFF)
+#define MDP_TDSHP_OUTPUT_OFFSET_MASK           (0x0FF00FF)
+#define MDP_TDSHP_OUTPUT_SIZE_MASK             (0x1FFF1FFF)
+#define MDP_LUMA_HIST_INIT_MASK                        (0xFFFFFFFF)
+#define MDP_DC_TWO_D_W1_RESULT_INIT_MASK       (0x007FFFFF)
+#define MDP_CONTOUR_HIST_INIT_MASK             (0xFFFFFFFF)
+
+#endif  // __MDP_REG_TDSHP_H__
index 6d3ff0e2b6720d3761a449cec3577fcc2c5cb629..b6f016d2c29da6d72fb5f61b2002fe3d57321dff 100644 (file)
 #define VIDO_STRIDE                 0x030
 #define VIDO_OFST_ADDR_C            0x038
 #define VIDO_STRIDE_C               0x03c
+#define VIDO_CTRL_2                 0x048
 #define VIDO_DITHER                 0x054
 #define VIDO_STRIDE_V               0x06c
 #define VIDO_OFST_ADDR_V            0x068
 #define VIDO_RSV_1                  0x070
+#define VIDO_DMA_PREULTRA           0x074
 #define VIDO_IN_SIZE                0x078
 #define VIDO_ROT_EN                 0x07c
 #define VIDO_FIFO_TEST              0x080
 #define VIDO_MAT_CTRL               0x084
+#define VIDO_SCAN_10BIT             0x0dc
+#define VIDO_PENDING_ZERO           0x0e0
 #define VIDO_BASE_ADDR              0xf00
 #define VIDO_BASE_ADDR_C            0xf04
 #define VIDO_BASE_ADDR_V            0xf08
 #define VIDO_STRIDE_MASK                0x0000ffff
 #define VIDO_OFST_ADDR_C_MASK           0x0fffffff
 #define VIDO_STRIDE_C_MASK              0x0000ffff
+#define VIDO_CTRL_2_MASK                0x0000000f
 #define VIDO_DITHER_MASK                0xff000001
 #define VIDO_STRIDE_V_MASK              0x0000ffff
 #define VIDO_OFST_ADDR_V_MASK           0x0fffffff
 #define VIDO_RSV_1_MASK                 0xffffffff
+#define VIDO_DMA_PREULTRA_MASK         0x00ffffff
 #define VIDO_IN_SIZE_MASK               0x1fff1fff
 #define VIDO_ROT_EN_MASK                0x00000001
 #define VIDO_FIFO_TEST_MASK             0x00000fff
 #define VIDO_MAT_CTRL_MASK              0x000000f3
+#define VIDO_SCAN_10BIT_MASK            0x0000000f
+#define VIDO_PENDING_ZERO_MASK          0x07ffffff
 #define VIDO_BASE_ADDR_MASK             0xffffffff
 #define VIDO_BASE_ADDR_C_MASK           0xffffffff
 #define VIDO_BASE_ADDR_V_MASK           0xffffffff
diff --git a/drivers/media/platform/mediatek/mdp3/mdp_sm_mt8195.h b/drivers/media/platform/mediatek/mdp3/mdp_sm_mt8195.h
new file mode 100644 (file)
index 0000000..b09f482
--- /dev/null
@@ -0,0 +1,283 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
+/*
+ * Copyright (c) 2023 MediaTek Inc.
+ * Author: Ping-Hsun Wu <ping-hsun.wu@mediatek.com>
+ */
+
+#ifndef __MDP_SM_MT8195_H__
+#define __MDP_SM_MT8195_H__
+
+#include "mtk-mdp3-type.h"
+
+/*
+ * ISP-MDP generic output information
+ * MD5 of the target SCP prebuild:
+ *     a49ec487e458b5971880f1b63dc2a9d5
+ */
+
+#define IMG_MAX_SUBFRAMES_8195 20
+
+struct img_comp_frame_8195 {
+       u32 output_disable;
+       u32 bypass;
+       u32 in_width;
+       u32 in_height;
+       u32 out_width;
+       u32 out_height;
+       struct img_crop crop;
+       u32 in_total_width;
+       u32 out_total_width;
+} __packed;
+
+struct img_comp_subfrm_8195 {
+       u32 tile_disable;
+       struct img_region in;
+       struct img_region out;
+       struct img_offset luma;
+       struct img_offset chroma;
+       s32 out_vertical; /* Output vertical index */
+       s32 out_horizontal; /* Output horizontal index */
+} __packed;
+
+struct mdp_rdma_subfrm_8195 {
+       u32 offset[IMG_MAX_PLANES];
+       u32 offset_0_p;
+       u32 src;
+       u32 clip;
+       u32 clip_ofst;
+       u32 in_tile_xleft;
+       u32 in_tile_ytop;
+} __packed;
+
+struct mdp_rdma_data_8195 {
+       u32 src_ctrl;
+       u32 comp_ctrl;
+       u32 control;
+       u32 iova[IMG_MAX_PLANES];
+       u32 iova_end[IMG_MAX_PLANES];
+       u32 mf_bkgd;
+       u32 mf_bkgd_in_pxl;
+       u32 sf_bkgd;
+       u32 ufo_dec_y;
+       u32 ufo_dec_c;
+       u32 transform;
+       u32 dmabuf_con0;
+       u32 ultra_th_high_con0;
+       u32 ultra_th_low_con0;
+       u32 dmabuf_con1;
+       u32 ultra_th_high_con1;
+       u32 ultra_th_low_con1;
+       u32 dmabuf_con2;
+       u32 ultra_th_high_con2;
+       u32 ultra_th_low_con2;
+       u32 dmabuf_con3;
+       struct mdp_rdma_subfrm_8195 subfrms[IMG_MAX_SUBFRAMES_8195];
+} __packed;
+
+struct mdp_fg_subfrm_8195 {
+       u32 info_0;
+       u32 info_1;
+} __packed;
+
+struct mdp_fg_data_8195 {
+       u32 ctrl_0;
+       u32 ck_en;
+       struct mdp_fg_subfrm_8195 subfrms[IMG_MAX_SUBFRAMES_8195];
+} __packed;
+
+struct mdp_hdr_subfrm_8195 {
+       u32 win_size;
+       u32 src;
+       u32 clip_ofst0;
+       u32 clip_ofst1;
+       u32 hist_ctrl_0;
+       u32 hist_ctrl_1;
+       u32 hdr_top;
+       u32 hist_addr;
+} __packed;
+
+struct mdp_hdr_data_8195 {
+       u32 top;
+       u32 relay;
+       struct mdp_hdr_subfrm_8195   subfrms[IMG_MAX_SUBFRAMES_8195];
+} __packed;
+
+struct mdp_aal_subfrm_8195 {
+       u32 src;
+       u32 clip;
+       u32 clip_ofst;
+} __packed;
+
+struct mdp_aal_data_8195 {
+       u32 cfg_main;
+       u32 cfg;
+       struct mdp_aal_subfrm_8195   subfrms[IMG_MAX_SUBFRAMES_8195];
+} __packed;
+
+struct mdp_rsz_subfrm_8195 {
+       u32 control2;
+       u32 src;
+       u32 clip;
+       u32 hdmirx_en;
+       u32 luma_h_int_ofst;
+       u32 luma_h_sub_ofst;
+       u32 luma_v_int_ofst;
+       u32 luma_v_sub_ofst;
+       u32 chroma_h_int_ofst;
+       u32 chroma_h_sub_ofst;
+       u32 rsz_switch;
+       u32 merge_cfg;
+} __packed;
+
+struct mdp_rsz_data_8195 {
+       u32 coeff_step_x;
+       u32 coeff_step_y;
+       u32 control1;
+       u32 control2;
+       u32 etc_control;
+       u32 prz_enable;
+       u32 ibse_softclip;
+       u32 tap_adapt;
+       u32 ibse_gaincontrol1;
+       u32 ibse_gaincontrol2;
+       u32 ibse_ylevel_1;
+       u32 ibse_ylevel_2;
+       u32 ibse_ylevel_3;
+       u32 ibse_ylevel_4;
+       u32 ibse_ylevel_5;
+       struct mdp_rsz_subfrm_8195 subfrms[IMG_MAX_SUBFRAMES_8195];
+} __packed;
+
+struct mdp_tdshp_subfrm_8195 {
+       u32 src;
+       u32 clip;
+       u32 clip_ofst;
+       u32 hist_cfg_0;
+       u32 hist_cfg_1;
+} __packed;
+
+struct mdp_tdshp_data_8195 {
+       u32 cfg;
+       struct mdp_tdshp_subfrm_8195 subfrms[IMG_MAX_SUBFRAMES_8195];
+} __packed;
+
+struct mdp_color_subfrm_8195 {
+       u32 in_hsize;
+       u32 in_vsize;
+} __packed;
+
+struct mdp_color_data_8195 {
+       u32 start;
+       struct mdp_color_subfrm_8195 subfrms[IMG_MAX_SUBFRAMES_8195];
+} __packed;
+
+struct mdp_ovl_subfrm_8195 {
+       u32 L0_src_size;
+       u32 roi_size;
+} __packed;
+
+struct mdp_ovl_data_8195 {
+       u32 L0_con;
+       u32 src_con;
+       struct mdp_ovl_subfrm_8195 subfrms[IMG_MAX_SUBFRAMES_8195];
+} __packed;
+
+struct mdp_pad_subfrm_8195 {
+       u32 pic_size;
+} __packed;
+
+struct mdp_pad_data_8195 {
+       struct mdp_pad_subfrm_8195 subfrms[IMG_MAX_SUBFRAMES_8195];
+} __packed;
+
+struct mdp_tcc_subfrm_8195 {
+       u32 pic_size;
+} __packed;
+
+struct mdp_tcc_data_8195 {
+       struct mdp_tcc_subfrm_8195 subfrms[IMG_MAX_SUBFRAMES_8195];
+} __packed;
+
+struct mdp_wrot_subfrm_8195 {
+       u32 offset[IMG_MAX_PLANES];
+       u32 src;
+       u32 clip;
+       u32 clip_ofst;
+       u32 main_buf;
+} __packed;
+
+struct mdp_wrot_data_8195 {
+       u32 iova[IMG_MAX_PLANES];
+       u32 control;
+       u32 stride[IMG_MAX_PLANES];
+       u32 mat_ctrl;
+       u32 fifo_test;
+       u32 filter;
+       u32 pre_ultra;
+       u32 framesize;
+       u32 afbc_yuvtrans;
+       u32 scan_10bit;
+       u32 pending_zero;
+       u32 bit_number;
+       u32 pvric;
+       u32 vpp02vpp1;
+       struct mdp_wrot_subfrm_8195 subfrms[IMG_MAX_SUBFRAMES_8195];
+} __packed;
+
+struct mdp_wdma_subfrm_8195 {
+       u32 offset[IMG_MAX_PLANES];
+       u32 src;
+       u32 clip;
+       u32 clip_ofst;
+} __packed;
+
+struct mdp_wdma_data_8195 {
+       u32 wdma_cfg;
+       u32 iova[IMG_MAX_PLANES];
+       u32 w_in_byte;
+       u32 uv_stride;
+       struct mdp_wdma_subfrm_8195 subfrms[IMG_MAX_SUBFRAMES_8195];
+} __packed;
+
+struct isp_data_8195 {
+       u64 dl_flags; /* 1 << (enum mdp_comp_type) */
+       u32 smxi_iova[4];
+       u32 cq_idx;
+       u32 cq_iova;
+       u32 tpipe_iova[IMG_MAX_SUBFRAMES_8195];
+} __packed;
+
+struct img_compparam_8195 {
+       u32 type; /* enum mdp_comp_id */
+       u32 id; /* engine alias_id */
+       u32 input;
+       u32 outputs[IMG_MAX_HW_OUTPUTS];
+       u32 num_outputs;
+       struct img_comp_frame_8195 frame;
+       struct img_comp_subfrm_8195 subfrms[IMG_MAX_SUBFRAMES_8195];
+       u32 num_subfrms;
+       union {
+               struct mdp_rdma_data_8195 rdma;
+               struct mdp_fg_data_8195 fg;
+               struct mdp_hdr_data_8195 hdr;
+               struct mdp_aal_data_8195 aal;
+               struct mdp_rsz_data_8195 rsz;
+               struct mdp_tdshp_data_8195 tdshp;
+               struct mdp_color_data_8195 color;
+               struct mdp_ovl_data_8195 ovl;
+               struct mdp_pad_data_8195 pad;
+               struct mdp_tcc_data_8195 tcc;
+               struct mdp_wrot_data_8195 wrot;
+               struct mdp_wdma_data_8195 wdma;
+               struct isp_data_8195 isp;
+       };
+} __packed;
+
+struct img_config_8195 {
+       struct img_compparam_8195 components[IMG_MAX_COMPONENTS];
+       u32 num_components;
+       struct img_mmsys_ctrl ctrls[IMG_MAX_SUBFRAMES_8195];
+       u32 num_subfrms;
+} __packed;
+
+#endif  /* __MDP_SM_MT8195_H__ */
index 22b8b9a10ef7f5f327484d174fde11e0fb59b4c7..f83ac408306ee98ad055e8b9345d9d5402bcc665 100644 (file)
@@ -10,6 +10,7 @@
 
 #include <linux/err.h>
 #include "mdp_sm_mt8183.h"
+#include "mdp_sm_mt8195.h"
 #include "mtk-mdp3-type.h"
 
 /* ISP-MDP generic input information */
@@ -115,6 +116,7 @@ struct img_frameparam {
 
 /* Platform config indicator */
 #define MT8183 8183
+#define MT8195 8195
 
 #define CFG_CHECK(plat, p_id) ((plat) == (p_id))
 
@@ -137,12 +139,14 @@ struct img_frameparam {
 struct img_config {
        union {
                struct img_config_8183 config_8183;
+               struct img_config_8195 config_8195;
        };
 } __packed;
 
 struct img_compparam {
        union {
                struct img_compparam_8183 comp_8183;
+               struct img_compparam_8195 comp_8195;
        };
 } __packed;
 
index dee57cc4a954fcc5fd2c0154c34d17583174e8ec..49cdf45f6e59ef5d1955290c6e92aae739c10029 100644 (file)
 #include <linux/types.h>
 
 extern const struct mtk_mdp_driver_data mt8183_mdp_driver_data;
+extern const struct mtk_mdp_driver_data mt8195_mdp_driver_data;
 
 struct mdp_dev;
 enum mtk_mdp_comp_id;
 
 s32 mdp_cfg_get_id_inner(struct mdp_dev *mdp_dev, enum mtk_mdp_comp_id id);
 enum mtk_mdp_comp_id mdp_cfg_get_id_public(struct mdp_dev *mdp_dev, s32 id);
+bool mdp_cfg_comp_is_dummy(struct mdp_dev *mdp_dev, s32 inner_id);
 
 #endif  /* __MTK_MDP3_CFG_H__ */
index 6adac857a4779dbc0974109e1057cd583875562e..1d64bac34b90aaeca77a589d4d7eaa503ba66e34 100644 (file)
@@ -6,6 +6,7 @@
 
 #include <linux/mailbox_controller.h>
 #include <linux/platform_device.h>
+#include "mtk-mdp3-cfg.h"
 #include "mtk-mdp3-cmdq.h"
 #include "mtk-mdp3-comp.h"
 #include "mtk-mdp3-core.h"
@@ -39,85 +40,192 @@ static bool is_output_disabled(int p_id, const struct img_compparam *param, u32
                num = CFG_COMP(MT8183, param, num_subfrms);
                dis_output = CFG_COMP(MT8183, param, frame.output_disable);
                dis_tile = CFG_COMP(MT8183, param, frame.output_disable);
+       } else if (CFG_CHECK(MT8195, p_id)) {
+               num = CFG_COMP(MT8195, param, num_subfrms);
+               dis_output = CFG_COMP(MT8195, param, frame.output_disable);
+               dis_tile = CFG_COMP(MT8195, param, frame.output_disable);
        }
 
        return (count < num) ? (dis_output || dis_tile) : true;
 }
 
-static int mdp_path_subfrm_require(const struct mdp_path *path,
-                                  struct mdp_cmdq_cmd *cmd,
-                                  s32 *mutex_id, u32 count)
+static struct mtk_mutex *__get_mutex(const struct mdp_dev *mdp_dev,
+                                    const struct mdp_pipe_info *p)
 {
-       const int p_id = path->mdp_dev->mdp_data->mdp_plat_id;
-       const struct mdp_comp_ctx *ctx;
-       const struct mtk_mdp_driver_data *data = path->mdp_dev->mdp_data;
-       struct device *dev = &path->mdp_dev->pdev->dev;
-       struct mtk_mutex **mutex = path->mdp_dev->mdp_mutex;
-       int id, index;
-       u32 num_comp = 0;
+       return mdp_dev->mm_subsys[p->sub_id].mdp_mutex[p->mutex_id];
+}
 
-       if (CFG_CHECK(MT8183, p_id))
-               num_comp = CFG_GET(MT8183, path->config, num_components);
+static u8 __get_pp_num(enum mdp_stream_type type)
+{
+       switch (type) {
+       case MDP_STREAM_TYPE_DUAL_BITBLT:
+               return MDP_PP_USED_2;
+       default:
+               return MDP_PP_USED_1;
+       }
+}
 
-       /* Decide which mutex to use based on the current pipeline */
-       switch (path->comps[0].comp->public_id) {
+static enum mdp_pipe_id __get_pipe(const struct mdp_dev *mdp_dev,
+                                  enum mtk_mdp_comp_id id)
+{
+       enum mdp_pipe_id pipe_id;
+
+       switch (id) {
        case MDP_COMP_RDMA0:
-               index = MDP_PIPE_RDMA0;
+               pipe_id = MDP_PIPE_RDMA0;
                break;
        case MDP_COMP_ISP_IMGI:
-               index = MDP_PIPE_IMGI;
+               pipe_id = MDP_PIPE_IMGI;
                break;
        case MDP_COMP_WPEI:
-               index = MDP_PIPE_WPEI;
+               pipe_id = MDP_PIPE_WPEI;
                break;
        case MDP_COMP_WPEI2:
-               index = MDP_PIPE_WPEI2;
+               pipe_id = MDP_PIPE_WPEI2;
+               break;
+       case MDP_COMP_RDMA1:
+               pipe_id = MDP_PIPE_RDMA1;
+               break;
+       case MDP_COMP_RDMA2:
+               pipe_id = MDP_PIPE_RDMA2;
+               break;
+       case MDP_COMP_RDMA3:
+               pipe_id = MDP_PIPE_RDMA3;
                break;
        default:
-               dev_err(dev, "Unknown pipeline and no mutex is assigned");
-               return -EINVAL;
+               /* Avoid exceptions when operating MUTEX */
+               pipe_id = MDP_PIPE_RDMA0;
+               dev_err(&mdp_dev->pdev->dev, "Unknown pipeline id %d", id);
+               break;
+       }
+
+       return pipe_id;
+}
+
+static struct img_config *__get_config_offset(struct mdp_dev *mdp,
+                                             struct mdp_cmdq_param *param,
+                                             u8 pp_idx)
+{
+       const int p_id = mdp->mdp_data->mdp_plat_id;
+       struct device *dev = &mdp->pdev->dev;
+       void *cfg_c, *cfg_n;
+       long bound = mdp->vpu.config_size;
+
+       if (pp_idx >= mdp->mdp_data->pp_used)
+               goto err_param;
+
+       if (CFG_CHECK(MT8183, p_id))
+               cfg_c = CFG_OFST(MT8183, param->config, pp_idx);
+       else if (CFG_CHECK(MT8195, p_id))
+               cfg_c = CFG_OFST(MT8195, param->config, pp_idx);
+       else
+               goto err_param;
+
+       if (CFG_CHECK(MT8183, p_id))
+               cfg_n = CFG_OFST(MT8183, param->config, pp_idx + 1);
+       else if (CFG_CHECK(MT8195, p_id))
+               cfg_n = CFG_OFST(MT8195, param->config, pp_idx + 1);
+       else
+               goto err_param;
+
+       if ((long)cfg_n - (long)mdp->vpu.config > bound) {
+               dev_err(dev, "config offset %ld OOB %ld\n", (long)cfg_n, bound);
+               cfg_c = ERR_PTR(-EFAULT);
        }
-       *mutex_id = data->pipe_info[index].mutex_id;
+
+       return (struct img_config *)cfg_c;
+
+err_param:
+       cfg_c = ERR_PTR(-EINVAL);
+       return (struct img_config *)cfg_c;
+}
+
+static int mdp_path_subfrm_require(const struct mdp_path *path,
+                                  struct mdp_cmdq_cmd *cmd,
+                                  struct mdp_pipe_info *p, u32 count)
+{
+       const int p_id = path->mdp_dev->mdp_data->mdp_plat_id;
+       const struct mdp_comp_ctx *ctx;
+       const struct mtk_mdp_driver_data *data = path->mdp_dev->mdp_data;
+       struct mtk_mutex *mutex;
+       int id, index;
+       u32 num_comp = 0;
+
+       if (CFG_CHECK(MT8183, p_id))
+               num_comp = CFG_GET(MT8183, path->config, num_components);
+       else if (CFG_CHECK(MT8195, p_id))
+               num_comp = CFG_GET(MT8195, path->config, num_components);
+
+       /* Decide which mutex to use based on the current pipeline */
+       index = __get_pipe(path->mdp_dev, path->comps[0].comp->public_id);
+       memcpy(p, &data->pipe_info[index], sizeof(struct mdp_pipe_info));
+       mutex = __get_mutex(path->mdp_dev, p);
 
        /* Set mutex mod */
        for (index = 0; index < num_comp; index++) {
+               s32 inner_id = MDP_COMP_NONE;
+               const u32 *mutex_idx;
+               const struct mdp_comp_blend *b;
+
+               if (CFG_CHECK(MT8183, p_id))
+                       inner_id = CFG_GET(MT8183, path->config, components[index].type);
+               else if (CFG_CHECK(MT8195, p_id))
+                       inner_id = CFG_GET(MT8195, path->config, components[index].type);
+
+               if (mdp_cfg_comp_is_dummy(path->mdp_dev, inner_id))
+                       continue;
+
                ctx = &path->comps[index];
                if (is_output_disabled(p_id, ctx->param, count))
                        continue;
+
+               mutex_idx = data->mdp_mutex_table_idx;
                id = ctx->comp->public_id;
-               mtk_mutex_write_mod(mutex[*mutex_id],
-                                   data->mdp_mutex_table_idx[id], false);
+               mtk_mutex_write_mod(mutex, mutex_idx[id], false);
+
+               b = &data->comp_data[id].blend;
+               if (b && b->aid_mod)
+                       mtk_mutex_write_mod(mutex, mutex_idx[b->b_id], false);
        }
 
-       mtk_mutex_write_sof(mutex[*mutex_id],
-                           MUTEX_SOF_IDX_SINGLE_MODE);
+       mtk_mutex_write_sof(mutex, MUTEX_SOF_IDX_SINGLE_MODE);
 
        return 0;
 }
 
 static int mdp_path_subfrm_run(const struct mdp_path *path,
                               struct mdp_cmdq_cmd *cmd,
-                              s32 *mutex_id, u32 count)
+                              struct mdp_pipe_info *p, u32 count)
 {
        const int p_id = path->mdp_dev->mdp_data->mdp_plat_id;
        const struct mdp_comp_ctx *ctx;
        struct device *dev = &path->mdp_dev->pdev->dev;
-       struct mtk_mutex **mutex = path->mdp_dev->mdp_mutex;
+       struct mtk_mutex *mutex;
        int index;
        u32 num_comp = 0;
        s32 event;
+       s32 inner_id = MDP_COMP_NONE;
 
-       if (-1 == *mutex_id) {
+       if (-1 == p->mutex_id) {
                dev_err(dev, "Incorrect mutex id");
                return -EINVAL;
        }
 
        if (CFG_CHECK(MT8183, p_id))
                num_comp = CFG_GET(MT8183, path->config, num_components);
+       else if (CFG_CHECK(MT8195, p_id))
+               num_comp = CFG_GET(MT8195, path->config, num_components);
 
        /* Wait WROT SRAM shared to DISP RDMA */
        /* Clear SOF event for each engine */
        for (index = 0; index < num_comp; index++) {
+               if (CFG_CHECK(MT8183, p_id))
+                       inner_id = CFG_GET(MT8183, path->config, components[index].type);
+               else if (CFG_CHECK(MT8195, p_id))
+                       inner_id = CFG_GET(MT8195, path->config, components[index].type);
+
+               if (mdp_cfg_comp_is_dummy(path->mdp_dev, inner_id))
+                       continue;
                ctx = &path->comps[index];
                if (is_output_disabled(p_id, ctx->param, count))
                        continue;
@@ -127,10 +235,18 @@ static int mdp_path_subfrm_run(const struct mdp_path *path,
        }
 
        /* Enable the mutex */
-       mtk_mutex_enable_by_cmdq(mutex[*mutex_id], (void *)&cmd->pkt);
+       mutex = __get_mutex(path->mdp_dev, p);
+       mtk_mutex_enable_by_cmdq(mutex, (void *)&cmd->pkt);
 
        /* Wait SOF events and clear mutex modules (optional) */
        for (index = 0; index < num_comp; index++) {
+               if (CFG_CHECK(MT8183, p_id))
+                       inner_id = CFG_GET(MT8183, path->config, components[index].type);
+               else if (CFG_CHECK(MT8195, p_id))
+                       inner_id = CFG_GET(MT8195, path->config, components[index].type);
+
+               if (mdp_cfg_comp_is_dummy(path->mdp_dev, inner_id))
+                       continue;
                ctx = &path->comps[index];
                if (is_output_disabled(p_id, ctx->param, count))
                        continue;
@@ -151,13 +267,26 @@ static int mdp_path_ctx_init(struct mdp_dev *mdp, struct mdp_path *path)
 
        if (CFG_CHECK(MT8183, p_id))
                num_comp = CFG_GET(MT8183, path->config, num_components);
+       else if (CFG_CHECK(MT8195, p_id))
+               num_comp = CFG_GET(MT8195, path->config, num_components);
 
        if (num_comp < 1)
                return -EINVAL;
 
        for (index = 0; index < num_comp; index++) {
+               s32 inner_id = MDP_COMP_NONE;
+
+               if (CFG_CHECK(MT8183, p_id))
+                       inner_id = CFG_GET(MT8183, path->config, components[index].type);
+               else if (CFG_CHECK(MT8195, p_id))
+                       inner_id = CFG_GET(MT8195, path->config, components[index].type);
+
+               if (mdp_cfg_comp_is_dummy(path->mdp_dev, inner_id))
+                       continue;
                if (CFG_CHECK(MT8183, p_id))
                        param = (void *)CFG_ADDR(MT8183, path->config, components[index]);
+               else if (CFG_CHECK(MT8195, p_id))
+                       param = (void *)CFG_ADDR(MT8195, path->config, components[index]);
                ret = mdp_comp_ctx_config(mdp, &path->comps[index],
                                          param, path->param);
                if (ret)
@@ -174,18 +303,23 @@ static int mdp_path_config_subfrm(struct mdp_cmdq_cmd *cmd,
        const struct img_mmsys_ctrl *ctrl = NULL;
        const struct img_mux *set;
        struct mdp_comp_ctx *ctx;
-       s32 mutex_id;
+       struct mdp_pipe_info pipe;
        int index, ret;
        u32 num_comp = 0;
+       s32 inner_id = MDP_COMP_NONE;
 
        if (CFG_CHECK(MT8183, p_id))
                num_comp = CFG_GET(MT8183, path->config, num_components);
+       else if (CFG_CHECK(MT8195, p_id))
+               num_comp = CFG_GET(MT8195, path->config, num_components);
 
        if (CFG_CHECK(MT8183, p_id))
                ctrl = CFG_ADDR(MT8183, path->config, ctrls[count]);
+       else if (CFG_CHECK(MT8195, p_id))
+               ctrl = CFG_ADDR(MT8195, path->config, ctrls[count]);
 
        /* Acquire components */
-       ret = mdp_path_subfrm_require(path, cmd, &mutex_id, count);
+       ret = mdp_path_subfrm_require(path, cmd, &pipe, count);
        if (ret)
                return ret;
        /* Enable mux settings */
@@ -196,6 +330,13 @@ static int mdp_path_config_subfrm(struct mdp_cmdq_cmd *cmd,
        }
        /* Config sub-frame information */
        for (index = (num_comp - 1); index >= 0; index--) {
+               if (CFG_CHECK(MT8183, p_id))
+                       inner_id = CFG_GET(MT8183, path->config, components[index].type);
+               else if (CFG_CHECK(MT8195, p_id))
+                       inner_id = CFG_GET(MT8195, path->config, components[index].type);
+
+               if (mdp_cfg_comp_is_dummy(path->mdp_dev, inner_id))
+                       continue;
                ctx = &path->comps[index];
                if (is_output_disabled(p_id, ctx->param, count))
                        continue;
@@ -204,11 +345,18 @@ static int mdp_path_config_subfrm(struct mdp_cmdq_cmd *cmd,
                        return ret;
        }
        /* Run components */
-       ret = mdp_path_subfrm_run(path, cmd, &mutex_id, count);
+       ret = mdp_path_subfrm_run(path, cmd, &pipe, count);
        if (ret)
                return ret;
        /* Wait components done */
        for (index = 0; index < num_comp; index++) {
+               if (CFG_CHECK(MT8183, p_id))
+                       inner_id = CFG_GET(MT8183, path->config, components[index].type);
+               else if (CFG_CHECK(MT8195, p_id))
+                       inner_id = CFG_GET(MT8195, path->config, components[index].type);
+
+               if (mdp_cfg_comp_is_dummy(path->mdp_dev, inner_id))
+                       continue;
                ctx = &path->comps[index];
                if (is_output_disabled(p_id, ctx->param, count))
                        continue;
@@ -218,6 +366,13 @@ static int mdp_path_config_subfrm(struct mdp_cmdq_cmd *cmd,
        }
        /* Advance to the next sub-frame */
        for (index = 0; index < num_comp; index++) {
+               if (CFG_CHECK(MT8183, p_id))
+                       inner_id = CFG_GET(MT8183, path->config, components[index].type);
+               else if (CFG_CHECK(MT8195, p_id))
+                       inner_id = CFG_GET(MT8195, path->config, components[index].type);
+
+               if (mdp_cfg_comp_is_dummy(path->mdp_dev, inner_id))
+                       continue;
                ctx = &path->comps[index];
                ret = call_op(ctx, advance_subfrm, cmd, count);
                if (ret)
@@ -241,16 +396,28 @@ static int mdp_path_config(struct mdp_dev *mdp, struct mdp_cmdq_cmd *cmd,
        int index, count, ret;
        u32 num_comp = 0;
        u32 num_sub = 0;
+       s32 inner_id = MDP_COMP_NONE;
 
        if (CFG_CHECK(MT8183, p_id))
                num_comp = CFG_GET(MT8183, path->config, num_components);
+       else if (CFG_CHECK(MT8195, p_id))
+               num_comp = CFG_GET(MT8195, path->config, num_components);
 
        if (CFG_CHECK(MT8183, p_id))
                num_sub = CFG_GET(MT8183, path->config, num_subfrms);
+       else if (CFG_CHECK(MT8195, p_id))
+               num_sub = CFG_GET(MT8195, path->config, num_subfrms);
 
        /* Config path frame */
        /* Reset components */
        for (index = 0; index < num_comp; index++) {
+               if (CFG_CHECK(MT8183, p_id))
+                       inner_id = CFG_GET(MT8183, path->config, components[index].type);
+               else if (CFG_CHECK(MT8195, p_id))
+                       inner_id = CFG_GET(MT8195, path->config, components[index].type);
+
+               if (mdp_cfg_comp_is_dummy(path->mdp_dev, inner_id))
+                       continue;
                ctx = &path->comps[index];
                ret = call_op(ctx, init_comp, cmd);
                if (ret)
@@ -262,8 +429,18 @@ static int mdp_path_config(struct mdp_dev *mdp, struct mdp_cmdq_cmd *cmd,
                u32 out = 0;
 
                ctx = &path->comps[index];
+               if (CFG_CHECK(MT8183, p_id))
+                       inner_id = CFG_GET(MT8183, path->config, components[index].type);
+               else if (CFG_CHECK(MT8195, p_id))
+                       inner_id = CFG_GET(MT8195, path->config, components[index].type);
+
+               if (mdp_cfg_comp_is_dummy(path->mdp_dev, inner_id))
+                       continue;
+
                if (CFG_CHECK(MT8183, p_id))
                        out = CFG_COMP(MT8183, ctx->param, outputs[0]);
+               else if (CFG_CHECK(MT8195, p_id))
+                       out = CFG_COMP(MT8195, ctx->param, outputs[0]);
 
                compose = path->composes[out];
                ret = call_op(ctx, config_frame, cmd, compose);
@@ -279,6 +456,13 @@ static int mdp_path_config(struct mdp_dev *mdp, struct mdp_cmdq_cmd *cmd,
        }
        /* Post processing information */
        for (index = 0; index < num_comp; index++) {
+               if (CFG_CHECK(MT8183, p_id))
+                       inner_id = CFG_GET(MT8183, path->config, components[index].type);
+               else if (CFG_CHECK(MT8195, p_id))
+                       inner_id = CFG_GET(MT8195, path->config, components[index].type);
+
+               if (mdp_cfg_comp_is_dummy(path->mdp_dev, inner_id))
+                       continue;
                ctx = &path->comps[index];
                ret = call_op(ctx, post_process, cmd);
                if (ret)
@@ -328,18 +512,31 @@ static void mdp_auto_release_work(struct work_struct *work)
 {
        struct mdp_cmdq_cmd *cmd;
        struct mdp_dev *mdp;
-       int id;
+       struct mtk_mutex *mutex;
+       enum mdp_pipe_id pipe_id;
 
        cmd = container_of(work, struct mdp_cmdq_cmd, auto_release_work);
        mdp = cmd->mdp;
 
-       id = mdp->mdp_data->pipe_info[MDP_PIPE_RDMA0].mutex_id;
-       mtk_mutex_unprepare(mdp->mdp_mutex[id]);
+       pipe_id = __get_pipe(mdp, cmd->comps[0].public_id);
+       mutex = __get_mutex(mdp, &mdp->mdp_data->pipe_info[pipe_id]);
+       mtk_mutex_unprepare(mutex);
        mdp_comp_clocks_off(&mdp->pdev->dev, cmd->comps,
                            cmd->num_comps);
 
-       atomic_dec(&mdp->job_count);
-       wake_up(&mdp->callback_wq);
+       if (atomic_dec_and_test(&mdp->job_count)) {
+               if (cmd->mdp_ctx)
+                       mdp_m2m_job_finish(cmd->mdp_ctx);
+
+               if (cmd->user_cmdq_cb) {
+                       struct cmdq_cb_data user_cb_data;
+
+                       user_cb_data.sta = cmd->data->sta;
+                       user_cb_data.pkt = cmd->data->pkt;
+                       cmd->user_cmdq_cb(user_cb_data);
+               }
+               wake_up(&mdp->callback_wq);
+       }
 
        mdp_cmdq_pkt_destroy(&cmd->pkt);
        kfree(cmd->comps);
@@ -354,7 +551,7 @@ static void mdp_handle_cmdq_callback(struct mbox_client *cl, void *mssg)
        struct cmdq_cb_data *data;
        struct mdp_dev *mdp;
        struct device *dev;
-       int id;
+       enum mdp_pipe_id pipe_id;
 
        if (!mssg) {
                pr_info("%s:no callback data\n", __func__);
@@ -363,30 +560,23 @@ static void mdp_handle_cmdq_callback(struct mbox_client *cl, void *mssg)
 
        data = (struct cmdq_cb_data *)mssg;
        cmd = container_of(data->pkt, struct mdp_cmdq_cmd, pkt);
+       cmd->data = data;
        mdp = cmd->mdp;
        dev = &mdp->pdev->dev;
 
-       if (cmd->mdp_ctx)
-               mdp_m2m_job_finish(cmd->mdp_ctx);
-
-       if (cmd->user_cmdq_cb) {
-               struct cmdq_cb_data user_cb_data;
-
-               user_cb_data.sta = data->sta;
-               user_cb_data.pkt = data->pkt;
-               cmd->user_cmdq_cb(user_cb_data);
-       }
-
        INIT_WORK(&cmd->auto_release_work, mdp_auto_release_work);
        if (!queue_work(mdp->clock_wq, &cmd->auto_release_work)) {
+               struct mtk_mutex *mutex;
+
                dev_err(dev, "%s:queue_work fail!\n", __func__);
-               id = mdp->mdp_data->pipe_info[MDP_PIPE_RDMA0].mutex_id;
-               mtk_mutex_unprepare(mdp->mdp_mutex[id]);
+               pipe_id = __get_pipe(mdp, cmd->comps[0].public_id);
+               mutex = __get_mutex(mdp, &mdp->mdp_data->pipe_info[pipe_id]);
+               mtk_mutex_unprepare(mutex);
                mdp_comp_clocks_off(&mdp->pdev->dev, cmd->comps,
                                    cmd->num_comps);
 
-               atomic_dec(&mdp->job_count);
-               wake_up(&mdp->callback_wq);
+               if (atomic_dec_and_test(&mdp->job_count))
+                       wake_up(&mdp->callback_wq);
 
                mdp_cmdq_pkt_destroy(&cmd->pkt);
                kfree(cmd->comps);
@@ -396,34 +586,48 @@ static void mdp_handle_cmdq_callback(struct mbox_client *cl, void *mssg)
        }
 }
 
-int mdp_cmdq_send(struct mdp_dev *mdp, struct mdp_cmdq_param *param)
+static struct mdp_cmdq_cmd *mdp_cmdq_prepare(struct mdp_dev *mdp,
+                                            struct mdp_cmdq_param *param,
+                                            u8 pp_idx)
 {
        struct mdp_path *path = NULL;
        struct mdp_cmdq_cmd *cmd = NULL;
        struct mdp_comp *comps = NULL;
        struct device *dev = &mdp->pdev->dev;
        const int p_id = mdp->mdp_data->mdp_plat_id;
-       int i, ret;
-       u32 num_comp = 0;
-
-       atomic_inc(&mdp->job_count);
-       if (atomic_read(&mdp->suspended)) {
-               atomic_dec(&mdp->job_count);
-               return -ECANCELED;
+       struct img_config *config;
+       struct mtk_mutex *mutex = NULL;
+       enum mdp_pipe_id pipe_id;
+       int i, ret = -ECANCELED;
+       u32 num_comp;
+
+       config = __get_config_offset(mdp, param, pp_idx);
+       if (IS_ERR(config)) {
+               ret = PTR_ERR(config);
+               goto err_uninit;
        }
 
+       if (CFG_CHECK(MT8183, p_id))
+               num_comp = CFG_GET(MT8183, config, num_components);
+       else if (CFG_CHECK(MT8195, p_id))
+               num_comp = CFG_GET(MT8195, config, num_components);
+       else
+               goto err_uninit;
+
        cmd = kzalloc(sizeof(*cmd), GFP_KERNEL);
        if (!cmd) {
                ret = -ENOMEM;
-               goto err_cancel_job;
+               goto err_uninit;
        }
 
-       ret = mdp_cmdq_pkt_create(mdp->cmdq_clt, &cmd->pkt, SZ_16K);
+       ret = mdp_cmdq_pkt_create(mdp->cmdq_clt[pp_idx], &cmd->pkt, SZ_16K);
        if (ret)
                goto err_free_cmd;
 
        if (CFG_CHECK(MT8183, p_id)) {
                num_comp = CFG_GET(MT8183, param->config, num_components);
+       } else if (CFG_CHECK(MT8195, p_id)) {
+               num_comp = CFG_GET(MT8195, param->config, num_components);
        } else {
                ret = -EINVAL;
                goto err_destroy_pkt;
@@ -440,15 +644,8 @@ int mdp_cmdq_send(struct mdp_dev *mdp, struct mdp_cmdq_param *param)
                goto err_free_comps;
        }
 
-       i = mdp->mdp_data->pipe_info[MDP_PIPE_RDMA0].mutex_id;
-       ret = mtk_mutex_prepare(mdp->mdp_mutex[i]);
-       if (ret) {
-               dev_err(dev, "Fail to enable mutex clk\n");
-               goto err_free_path;
-       }
-
        path->mdp_dev = mdp;
-       path->config = param->config;
+       path->config = config;
        path->param = param->param;
        for (i = 0; i < param->param->num_outputs; i++) {
                path->bounds[i].left = 0;
@@ -462,22 +659,40 @@ int mdp_cmdq_send(struct mdp_dev *mdp, struct mdp_cmdq_param *param)
        }
        ret = mdp_path_ctx_init(mdp, path);
        if (ret) {
-               dev_err(dev, "mdp_path_ctx_init error\n");
+               dev_err(dev, "mdp_path_ctx_init error %d\n", pp_idx);
+               goto err_free_path;
+       }
+
+       pipe_id = __get_pipe(mdp, path->comps[0].comp->public_id);
+       mutex = __get_mutex(mdp, &mdp->mdp_data->pipe_info[pipe_id]);
+       ret = mtk_mutex_prepare(mutex);
+       if (ret) {
+               dev_err(dev, "Fail to enable mutex %d clk\n", pp_idx);
                goto err_free_path;
        }
 
        ret = mdp_path_config(mdp, cmd, path);
        if (ret) {
-               dev_err(dev, "mdp_path_config error\n");
+               dev_err(dev, "mdp_path_config error %d\n", pp_idx);
                goto err_free_path;
        }
        cmdq_pkt_finalize(&cmd->pkt);
 
-       for (i = 0; i < num_comp; i++)
+       for (i = 0; i < num_comp; i++) {
+               s32 inner_id = MDP_COMP_NONE;
+
+               if (CFG_CHECK(MT8183, p_id))
+                       inner_id = CFG_GET(MT8183, path->config, components[i].type);
+               else if (CFG_CHECK(MT8195, p_id))
+                       inner_id = CFG_GET(MT8195, path->config, components[i].type);
+
+               if (mdp_cfg_comp_is_dummy(mdp, inner_id))
+                       continue;
                memcpy(&comps[i], path->comps[i].comp,
                       sizeof(struct mdp_comp));
+       }
 
-       mdp->cmdq_clt->client.rx_callback = mdp_handle_cmdq_callback;
+       mdp->cmdq_clt[pp_idx]->client.rx_callback = mdp_handle_cmdq_callback;
        cmd->mdp = mdp;
        cmd->user_cmdq_cb = param->cmdq_cb;
        cmd->user_cb_data = param->cb_data;
@@ -485,29 +700,12 @@ int mdp_cmdq_send(struct mdp_dev *mdp, struct mdp_cmdq_param *param)
        cmd->num_comps = num_comp;
        cmd->mdp_ctx = param->mdp_ctx;
 
-       ret = mdp_comp_clocks_on(&mdp->pdev->dev, cmd->comps, cmd->num_comps);
-       if (ret)
-               goto err_free_path;
-
-       dma_sync_single_for_device(mdp->cmdq_clt->chan->mbox->dev,
-                                  cmd->pkt.pa_base, cmd->pkt.cmd_buf_size,
-                                  DMA_TO_DEVICE);
-       ret = mbox_send_message(mdp->cmdq_clt->chan, &cmd->pkt);
-       if (ret < 0) {
-               dev_err(dev, "mbox send message fail %d!\n", ret);
-               goto err_clock_off;
-       }
-       mbox_client_txdone(mdp->cmdq_clt->chan, 0);
-
        kfree(path);
-       return 0;
+       return cmd;
 
-err_clock_off:
-       mdp_comp_clocks_off(&mdp->pdev->dev, cmd->comps,
-                           cmd->num_comps);
 err_free_path:
-       i = mdp->mdp_data->pipe_info[MDP_PIPE_RDMA0].mutex_id;
-       mtk_mutex_unprepare(mdp->mdp_mutex[i]);
+       if (mutex)
+               mtk_mutex_unprepare(mutex);
        kfree(path);
 err_free_comps:
        kfree(comps);
@@ -515,8 +713,58 @@ err_destroy_pkt:
        mdp_cmdq_pkt_destroy(&cmd->pkt);
 err_free_cmd:
        kfree(cmd);
+err_uninit:
+       return ERR_PTR(ret);
+}
+
+int mdp_cmdq_send(struct mdp_dev *mdp, struct mdp_cmdq_param *param)
+{
+       struct mdp_cmdq_cmd *cmd[MDP_PP_MAX] = {NULL};
+       struct device *dev = &mdp->pdev->dev;
+       int i, ret;
+       u8 pp_used = __get_pp_num(param->param->type);
+
+       atomic_set(&mdp->job_count, pp_used);
+       if (atomic_read(&mdp->suspended)) {
+               atomic_set(&mdp->job_count, 0);
+               return -ECANCELED;
+       }
+
+       for (i = 0; i < pp_used; i++) {
+               cmd[i] = mdp_cmdq_prepare(mdp, param, i);
+               if (IS_ERR_OR_NULL(cmd[i])) {
+                       ret = PTR_ERR(cmd[i]);
+                       goto err_cancel_job;
+               }
+       }
+
+       for (i = 0; i < pp_used; i++) {
+               ret = mdp_comp_clocks_on(&mdp->pdev->dev, cmd[i]->comps, cmd[i]->num_comps);
+               if (ret)
+                       goto err_clock_off;
+       }
+
+       for (i = 0; i < pp_used; i++) {
+               dma_sync_single_for_device(mdp->cmdq_clt[i]->chan->mbox->dev,
+                                          cmd[i]->pkt.pa_base, cmd[i]->pkt.cmd_buf_size,
+                                          DMA_TO_DEVICE);
+
+               ret = mbox_send_message(mdp->cmdq_clt[i]->chan, &cmd[i]->pkt);
+               if (ret < 0) {
+                       dev_err(dev, "mbox send message fail %d!\n", ret);
+                       i = pp_used;
+                       goto err_clock_off;
+               }
+               mbox_client_txdone(mdp->cmdq_clt[i]->chan, 0);
+       }
+       return 0;
+
+err_clock_off:
+       while (--i >= 0)
+               mdp_comp_clocks_off(&mdp->pdev->dev, cmd[i]->comps,
+                                   cmd[i]->num_comps);
 err_cancel_job:
-       atomic_dec(&mdp->job_count);
+       atomic_set(&mdp->job_count, 0);
 
        return ret;
 }
index 43475b862ddb919d5f7afb937a2da351bd663858..53a30ad7e0b09e9ddacfd8f80e634b1ed1fb671d 100644 (file)
@@ -29,6 +29,7 @@ struct mdp_cmdq_cmd {
        struct cmdq_pkt pkt;
        s32 *event;
        struct mdp_dev *mdp;
+       struct cmdq_cb_data *data;
        void (*user_cmdq_cb)(struct cmdq_cb_data data);
        void *user_cb_data;
        struct mdp_comp *comps;
index 575c8d52acd1913d489c7e3b6ae649448eb9bc59..8f62fb167156ddcc472ee82d554ae8d54f24b0b3 100644 (file)
 #include "mtk-mdp3-core.h"
 #include "mtk-mdp3-regs.h"
 
-#include "mdp_reg_rdma.h"
+#include "mdp_reg_aal.h"
 #include "mdp_reg_ccorr.h"
+#include "mdp_reg_color.h"
+#include "mdp_reg_fg.h"
+#include "mdp_reg_hdr.h"
+#include "mdp_reg_merge.h"
+#include "mdp_reg_ovl.h"
+#include "mdp_reg_pad.h"
+#include "mdp_reg_rdma.h"
 #include "mdp_reg_rsz.h"
-#include "mdp_reg_wrot.h"
+#include "mdp_reg_tdshp.h"
 #include "mdp_reg_wdma.h"
+#include "mdp_reg_wrot.h"
 
 static u32 mdp_comp_alias_id[MDP_COMP_TYPE_COUNT];
 static int p_id;
@@ -85,6 +93,7 @@ static int config_rdma_frame(struct mdp_comp_ctx *ctx,
        bool en_ufo = MDP_COLOR_IS_UFP(colorformat);
        phys_addr_t base = ctx->comp->reg_base;
        u8 subsys_id = ctx->comp->subsys_id;
+       u32 rdma_con_mask = 0;
        u32 reg = 0;
 
        if (mdp_cfg && mdp_cfg->rdma_support_10bit) {
@@ -105,6 +114,8 @@ static int config_rdma_frame(struct mdp_comp_ctx *ctx,
        /* Setup source frame info */
        if (CFG_CHECK(MT8183, p_id))
                reg = CFG_COMP(MT8183, ctx->param, rdma.src_ctrl);
+       else if (CFG_CHECK(MT8195, p_id))
+               reg = CFG_COMP(MT8195, ctx->param, rdma.src_ctrl);
        MM_REG_WRITE(cmd, subsys_id, base, MDP_RDMA_SRC_CON, reg,
                     0x03C8FE0F);
 
@@ -113,69 +124,163 @@ static int config_rdma_frame(struct mdp_comp_ctx *ctx,
                        /* Setup source buffer base */
                        if (CFG_CHECK(MT8183, p_id))
                                reg = CFG_COMP(MT8183, ctx->param, rdma.ufo_dec_y);
+                       else if (CFG_CHECK(MT8195, p_id))
+                               reg = CFG_COMP(MT8195, ctx->param, rdma.ufo_dec_y);
                        MM_REG_WRITE(cmd, subsys_id,
                                     base, MDP_RDMA_UFO_DEC_LENGTH_BASE_Y,
                                     reg, 0xFFFFFFFF);
+
                        if (CFG_CHECK(MT8183, p_id))
                                reg = CFG_COMP(MT8183, ctx->param, rdma.ufo_dec_c);
+                       else if (CFG_CHECK(MT8195, p_id))
+                               reg = CFG_COMP(MT8195, ctx->param, rdma.ufo_dec_c);
                        MM_REG_WRITE(cmd, subsys_id,
                                     base, MDP_RDMA_UFO_DEC_LENGTH_BASE_C,
                                     reg, 0xFFFFFFFF);
+
                        /* Set 10bit source frame pitch */
                        if (block10bit) {
                                if (CFG_CHECK(MT8183, p_id))
                                        reg = CFG_COMP(MT8183, ctx->param, rdma.mf_bkgd_in_pxl);
+                               else if (CFG_CHECK(MT8195, p_id))
+                                       reg = CFG_COMP(MT8195, ctx->param, rdma.mf_bkgd_in_pxl);
                                MM_REG_WRITE(cmd, subsys_id,
                                             base, MDP_RDMA_MF_BKGD_SIZE_IN_PXL,
                                             reg, 0x001FFFFF);
                        }
                }
 
-       if (CFG_CHECK(MT8183, p_id))
+       if (CFG_CHECK(MT8183, p_id)) {
                reg = CFG_COMP(MT8183, ctx->param, rdma.control);
+               rdma_con_mask = 0x1110;
+       } else if (CFG_CHECK(MT8195, p_id)) {
+               reg = CFG_COMP(MT8195, ctx->param, rdma.control);
+               rdma_con_mask = 0x1130;
+       }
        MM_REG_WRITE(cmd, subsys_id, base, MDP_RDMA_CON, reg,
-                    0x1110);
+                    rdma_con_mask);
+
        /* Setup source buffer base */
        if (CFG_CHECK(MT8183, p_id))
                reg = CFG_COMP(MT8183, ctx->param, rdma.iova[0]);
+       else if (CFG_CHECK(MT8195, p_id))
+               reg = CFG_COMP(MT8195, ctx->param, rdma.iova[0]);
        MM_REG_WRITE(cmd, subsys_id, base, MDP_RDMA_SRC_BASE_0, reg,
                     0xFFFFFFFF);
+
        if (CFG_CHECK(MT8183, p_id))
                reg = CFG_COMP(MT8183, ctx->param, rdma.iova[1]);
+       else if (CFG_CHECK(MT8195, p_id))
+               reg = CFG_COMP(MT8195, ctx->param, rdma.iova[1]);
        MM_REG_WRITE(cmd, subsys_id, base, MDP_RDMA_SRC_BASE_1, reg,
                     0xFFFFFFFF);
+
        if (CFG_CHECK(MT8183, p_id))
                reg = CFG_COMP(MT8183, ctx->param, rdma.iova[2]);
+       else if (CFG_CHECK(MT8195, p_id))
+               reg = CFG_COMP(MT8195, ctx->param, rdma.iova[2]);
        MM_REG_WRITE(cmd, subsys_id, base, MDP_RDMA_SRC_BASE_2, reg,
                     0xFFFFFFFF);
+
        /* Setup source buffer end */
        if (CFG_CHECK(MT8183, p_id))
                reg = CFG_COMP(MT8183, ctx->param, rdma.iova_end[0]);
+       else if (CFG_CHECK(MT8195, p_id))
+               reg = CFG_COMP(MT8195, ctx->param, rdma.iova_end[0]);
        MM_REG_WRITE(cmd, subsys_id, base, MDP_RDMA_SRC_END_0,
                     reg, 0xFFFFFFFF);
+
        if (CFG_CHECK(MT8183, p_id))
                reg = CFG_COMP(MT8183, ctx->param, rdma.iova_end[1]);
+       else if (CFG_CHECK(MT8195, p_id))
+               reg = CFG_COMP(MT8195, ctx->param, rdma.iova_end[1]);
        MM_REG_WRITE(cmd, subsys_id, base, MDP_RDMA_SRC_END_1,
                     reg, 0xFFFFFFFF);
+
        if (CFG_CHECK(MT8183, p_id))
                reg = CFG_COMP(MT8183, ctx->param, rdma.iova_end[2]);
+       else if (CFG_CHECK(MT8195, p_id))
+               reg = CFG_COMP(MT8195, ctx->param, rdma.iova_end[2]);
        MM_REG_WRITE(cmd, subsys_id, base, MDP_RDMA_SRC_END_2,
                     reg, 0xFFFFFFFF);
+
        /* Setup source frame pitch */
        if (CFG_CHECK(MT8183, p_id))
                reg = CFG_COMP(MT8183, ctx->param, rdma.mf_bkgd);
+       else if (CFG_CHECK(MT8195, p_id))
+               reg = CFG_COMP(MT8195, ctx->param, rdma.mf_bkgd);
        MM_REG_WRITE(cmd, subsys_id, base, MDP_RDMA_MF_BKGD_SIZE_IN_BYTE,
                     reg, 0x001FFFFF);
+
        if (CFG_CHECK(MT8183, p_id))
                reg = CFG_COMP(MT8183, ctx->param, rdma.sf_bkgd);
+       else if (CFG_CHECK(MT8195, p_id))
+               reg = CFG_COMP(MT8195, ctx->param, rdma.sf_bkgd);
        MM_REG_WRITE(cmd, subsys_id, base, MDP_RDMA_SF_BKGD_SIZE_IN_BYTE,
                     reg, 0x001FFFFF);
+
        /* Setup color transform */
        if (CFG_CHECK(MT8183, p_id))
                reg = CFG_COMP(MT8183, ctx->param, rdma.transform);
+       else if (CFG_CHECK(MT8195, p_id))
+               reg = CFG_COMP(MT8195, ctx->param, rdma.transform);
        MM_REG_WRITE(cmd, subsys_id, base, MDP_RDMA_TRANSFORM_0,
                     reg, 0x0F110000);
 
+       if (!mdp_cfg || !mdp_cfg->rdma_esl_setting)
+               goto rdma_config_done;
+
+       if (CFG_CHECK(MT8195, p_id))
+               reg = CFG_COMP(MT8195, ctx->param, rdma.dmabuf_con0);
+       MM_REG_WRITE(cmd, subsys_id, base, MDP_RDMA_DMABUF_CON_0,
+                    reg, 0x0FFF00FF);
+
+       if (CFG_CHECK(MT8195, p_id))
+               reg = CFG_COMP(MT8195, ctx->param, rdma.ultra_th_high_con0);
+       MM_REG_WRITE(cmd, subsys_id, base, MDP_RDMA_ULTRA_TH_HIGH_CON_0,
+                    reg, 0x3FFFFFFF);
+
+       if (CFG_CHECK(MT8195, p_id))
+               reg = CFG_COMP(MT8195, ctx->param, rdma.ultra_th_low_con0);
+       MM_REG_WRITE(cmd, subsys_id, base, MDP_RDMA_ULTRA_TH_LOW_CON_0,
+                    reg, 0x3FFFFFFF);
+
+       if (CFG_CHECK(MT8195, p_id))
+               reg = CFG_COMP(MT8195, ctx->param, rdma.dmabuf_con1);
+       MM_REG_WRITE(cmd, subsys_id, base, MDP_RDMA_DMABUF_CON_1,
+                    reg, 0x0F7F007F);
+
+       if (CFG_CHECK(MT8195, p_id))
+               reg = CFG_COMP(MT8195, ctx->param, rdma.ultra_th_high_con1);
+       MM_REG_WRITE(cmd, subsys_id, base, MDP_RDMA_ULTRA_TH_HIGH_CON_1,
+                    reg, 0x3FFFFFFF);
+
+       if (CFG_CHECK(MT8195, p_id))
+               reg = CFG_COMP(MT8195, ctx->param, rdma.ultra_th_low_con1);
+       MM_REG_WRITE(cmd, subsys_id, base, MDP_RDMA_ULTRA_TH_LOW_CON_1,
+                    reg, 0x3FFFFFFF);
+
+       if (CFG_CHECK(MT8195, p_id))
+               reg = CFG_COMP(MT8195, ctx->param, rdma.dmabuf_con2);
+       MM_REG_WRITE(cmd, subsys_id, base, MDP_RDMA_DMABUF_CON_2,
+                    reg, 0x0F3F003F);
+
+       if (CFG_CHECK(MT8195, p_id))
+               reg = CFG_COMP(MT8195, ctx->param, rdma.ultra_th_high_con2);
+       MM_REG_WRITE(cmd, subsys_id, base, MDP_RDMA_ULTRA_TH_HIGH_CON_2,
+                    reg, 0x3FFFFFFF);
+
+       if (CFG_CHECK(MT8195, p_id))
+               reg = CFG_COMP(MT8195, ctx->param, rdma.ultra_th_low_con2);
+       MM_REG_WRITE(cmd, subsys_id, base, MDP_RDMA_ULTRA_TH_LOW_CON_2,
+                    reg, 0x3FFFFFFF);
+
+       if (CFG_CHECK(MT8195, p_id))
+               reg = CFG_COMP(MT8195, ctx->param, rdma.dmabuf_con3);
+       MM_REG_WRITE(cmd, subsys_id, base, MDP_RDMA_DMABUF_CON_3,
+                    reg, 0x0F3F003F);
+
+rdma_config_done:
        return 0;
 }
 
@@ -197,6 +302,8 @@ static int config_rdma_subfrm(struct mdp_comp_ctx *ctx,
        /* Set Y pixel offset */
        if (CFG_CHECK(MT8183, p_id))
                reg = CFG_COMP(MT8183, ctx->param, rdma.subfrms[index].offset[0]);
+       else if (CFG_CHECK(MT8195, p_id))
+               reg = CFG_COMP(MT8195, ctx->param, rdma.subfrms[index].offset[0]);
        MM_REG_WRITE(cmd, subsys_id, base, MDP_RDMA_SRC_OFFSET_0,
                     reg, 0xFFFFFFFF);
 
@@ -205,6 +312,8 @@ static int config_rdma_subfrm(struct mdp_comp_ctx *ctx,
                if (mdp_cfg->rdma_support_10bit && block10bit && en_ufo) {
                        if (CFG_CHECK(MT8183, p_id))
                                reg = CFG_COMP(MT8183, ctx->param, rdma.subfrms[index].offset_0_p);
+                       else if (CFG_CHECK(MT8195, p_id))
+                               reg = CFG_COMP(MT8195, ctx->param, rdma.subfrms[index].offset_0_p);
                        MM_REG_WRITE(cmd, subsys_id, base,
                                     MDP_RDMA_SRC_OFFSET_0_P,
                                     reg, 0xFFFFFFFF);
@@ -214,32 +323,49 @@ static int config_rdma_subfrm(struct mdp_comp_ctx *ctx,
        /* Set U pixel offset */
        if (CFG_CHECK(MT8183, p_id))
                reg = CFG_COMP(MT8183, ctx->param, rdma.subfrms[index].offset[1]);
+       else if (CFG_CHECK(MT8195, p_id))
+               reg = CFG_COMP(MT8195, ctx->param, rdma.subfrms[index].offset[1]);
        MM_REG_WRITE(cmd, subsys_id, base, MDP_RDMA_SRC_OFFSET_1,
                     reg, 0xFFFFFFFF);
+
        /* Set V pixel offset */
        if (CFG_CHECK(MT8183, p_id))
                reg = CFG_COMP(MT8183, ctx->param, rdma.subfrms[index].offset[2]);
+       else if (CFG_CHECK(MT8195, p_id))
+               reg = CFG_COMP(MT8195, ctx->param, rdma.subfrms[index].offset[2]);
        MM_REG_WRITE(cmd, subsys_id, base, MDP_RDMA_SRC_OFFSET_2,
                     reg, 0xFFFFFFFF);
+
        /* Set source size */
        if (CFG_CHECK(MT8183, p_id))
                reg = CFG_COMP(MT8183, ctx->param, rdma.subfrms[index].src);
+       else if (CFG_CHECK(MT8195, p_id))
+               reg = CFG_COMP(MT8195, ctx->param, rdma.subfrms[index].src);
        MM_REG_WRITE(cmd, subsys_id, base, MDP_RDMA_MF_SRC_SIZE, reg,
                     0x1FFF1FFF);
+
        /* Set target size */
        if (CFG_CHECK(MT8183, p_id))
                reg = CFG_COMP(MT8183, ctx->param, rdma.subfrms[index].clip);
+       else if (CFG_CHECK(MT8195, p_id))
+               reg = CFG_COMP(MT8195, ctx->param, rdma.subfrms[index].clip);
        MM_REG_WRITE(cmd, subsys_id, base, MDP_RDMA_MF_CLIP_SIZE,
                     reg, 0x1FFF1FFF);
+
        /* Set crop offset */
        if (CFG_CHECK(MT8183, p_id))
                reg = CFG_COMP(MT8183, ctx->param, rdma.subfrms[index].clip_ofst);
+       else if (CFG_CHECK(MT8195, p_id))
+               reg = CFG_COMP(MT8195, ctx->param, rdma.subfrms[index].clip_ofst);
        MM_REG_WRITE(cmd, subsys_id, base, MDP_RDMA_MF_OFFSET_1,
                     reg, 0x003F001F);
 
        if (CFG_CHECK(MT8183, p_id)) {
                csf_l = CFG_COMP(MT8183, ctx->param, subfrms[index].in.left);
                csf_r = CFG_COMP(MT8183, ctx->param, subfrms[index].in.right);
+       } else if (CFG_CHECK(MT8195, p_id)) {
+               csf_l = CFG_COMP(MT8195, ctx->param, subfrms[index].in.left);
+               csf_r = CFG_COMP(MT8195, ctx->param, subfrms[index].in.right);
        }
        if (mdp_cfg && mdp_cfg->rdma_upsample_repeat_only)
                if ((csf_r - csf_l + 1) > 320)
@@ -251,14 +377,20 @@ static int config_rdma_subfrm(struct mdp_comp_ctx *ctx,
 
 static int wait_rdma_event(struct mdp_comp_ctx *ctx, struct mdp_cmdq_cmd *cmd)
 {
+       const struct mdp_platform_config *mdp_cfg = __get_plat_cfg(ctx);
        struct device *dev = &ctx->comp->mdp_dev->pdev->dev;
        phys_addr_t base = ctx->comp->reg_base;
        u8 subsys_id = ctx->comp->subsys_id;
 
-       if (ctx->comp->alias_id == 0)
-               MM_REG_WAIT(cmd, ctx->comp->gce_event[MDP_GCE_EVENT_EOF]);
-       else
-               dev_err(dev, "Do not support RDMA1_DONE event\n");
+       if (!mdp_cfg)
+               return -EINVAL;
+
+       if (ctx->comp->alias_id >= mdp_cfg->rdma_event_num) {
+               dev_err(dev, "Invalid RDMA event %d\n", ctx->comp->alias_id);
+               return -EINVAL;
+       }
+
+       MM_REG_WAIT(cmd, ctx->comp->gce_event[MDP_GCE_EVENT_EOF]);
 
        /* Disable RDMA */
        MM_REG_WRITE(cmd, subsys_id, base, MDP_RDMA_EN, 0x0, BIT(0));
@@ -283,6 +415,14 @@ static int init_rsz(struct mdp_comp_ctx *ctx, struct mdp_cmdq_cmd *cmd)
        MM_REG_WRITE(cmd, subsys_id, base, PRZ_ENABLE, 0x0, BIT(16));
        /* Enable RSZ */
        MM_REG_WRITE(cmd, subsys_id, base, PRZ_ENABLE, BIT(0), BIT(0));
+
+       if (CFG_CHECK(MT8195, p_id)) {
+               struct device *dev;
+
+               dev = ctx->comp->mdp_dev->mm_subsys[MDP_MM_SUBSYS_1].mmsys;
+               mtk_mmsys_vpp_rsz_dcm_config(dev, true, NULL);
+       }
+
        return 0;
 }
 
@@ -290,13 +430,19 @@ static int config_rsz_frame(struct mdp_comp_ctx *ctx,
                            struct mdp_cmdq_cmd *cmd,
                            const struct v4l2_rect *compose)
 {
+       const struct mdp_platform_config *mdp_cfg = __get_plat_cfg(ctx);
        phys_addr_t base = ctx->comp->reg_base;
        u8 subsys_id = ctx->comp->subsys_id;
        bool bypass = FALSE;
        u32 reg = 0;
 
+       if (mdp_cfg && mdp_cfg->rsz_etc_control)
+               MM_REG_WRITE(cmd, subsys_id, base, RSZ_ETC_CONTROL, 0x0, 0xFFFFFFFF);
+
        if (CFG_CHECK(MT8183, p_id))
                bypass = CFG_COMP(MT8183, ctx->param, frame.bypass);
+       else if (CFG_CHECK(MT8195, p_id))
+               bypass = CFG_COMP(MT8195, ctx->param, frame.bypass);
 
        if (bypass) {
                /* Disable RSZ */
@@ -306,20 +452,32 @@ static int config_rsz_frame(struct mdp_comp_ctx *ctx,
 
        if (CFG_CHECK(MT8183, p_id))
                reg = CFG_COMP(MT8183, ctx->param, rsz.control1);
+       else if (CFG_CHECK(MT8195, p_id))
+               reg = CFG_COMP(MT8195, ctx->param, rsz.control1);
        MM_REG_WRITE(cmd, subsys_id, base, PRZ_CONTROL_1, reg,
                     0x03FFFDF3);
+
        if (CFG_CHECK(MT8183, p_id))
                reg = CFG_COMP(MT8183, ctx->param, rsz.control2);
+       else if (CFG_CHECK(MT8195, p_id))
+               reg = CFG_COMP(MT8195, ctx->param, rsz.control2);
        MM_REG_WRITE(cmd, subsys_id, base, PRZ_CONTROL_2, reg,
                     0x0FFFC290);
+
        if (CFG_CHECK(MT8183, p_id))
                reg = CFG_COMP(MT8183, ctx->param, rsz.coeff_step_x);
+       else if (CFG_CHECK(MT8195, p_id))
+               reg = CFG_COMP(MT8195, ctx->param, rsz.coeff_step_x);
        MM_REG_WRITE(cmd, subsys_id, base, PRZ_HORIZONTAL_COEFF_STEP,
                     reg, 0x007FFFFF);
+
        if (CFG_CHECK(MT8183, p_id))
                reg = CFG_COMP(MT8183, ctx->param, rsz.coeff_step_y);
+       else if (CFG_CHECK(MT8195, p_id))
+               reg = CFG_COMP(MT8195, ctx->param, rsz.coeff_step_y);
        MM_REG_WRITE(cmd, subsys_id, base, PRZ_VERTICAL_COEFF_STEP,
                     reg, 0x007FFFFF);
+
        return 0;
 }
 
@@ -331,19 +489,28 @@ static int config_rsz_subfrm(struct mdp_comp_ctx *ctx,
        u8 subsys_id = ctx->comp->subsys_id;
        u32 csf_l = 0, csf_r = 0;
        u32 reg = 0;
+       u32 id;
 
        if (CFG_CHECK(MT8183, p_id))
                reg = CFG_COMP(MT8183, ctx->param, rsz.subfrms[index].control2);
+       else if (CFG_CHECK(MT8195, p_id))
+               reg = CFG_COMP(MT8195, ctx->param, rsz.subfrms[index].control2);
        MM_REG_WRITE(cmd, subsys_id, base, PRZ_CONTROL_2, reg,
                     0x00003800);
+
        if (CFG_CHECK(MT8183, p_id))
                reg = CFG_COMP(MT8183, ctx->param, rsz.subfrms[index].src);
+       else if (CFG_CHECK(MT8195, p_id))
+               reg = CFG_COMP(MT8195, ctx->param, rsz.subfrms[index].src);
        MM_REG_WRITE(cmd, subsys_id, base, PRZ_INPUT_IMAGE, reg,
                     0xFFFFFFFF);
 
        if (CFG_CHECK(MT8183, p_id)) {
                csf_l = CFG_COMP(MT8183, ctx->param, subfrms[index].in.left);
                csf_r = CFG_COMP(MT8183, ctx->param, subfrms[index].in.right);
+       } else if (CFG_CHECK(MT8195, p_id)) {
+               csf_l = CFG_COMP(MT8195, ctx->param, subfrms[index].in.left);
+               csf_r = CFG_COMP(MT8195, ctx->param, subfrms[index].in.right);
        }
        if (mdp_cfg && mdp_cfg->rsz_disable_dcm_small_sample)
                if ((csf_r - csf_l + 1) <= 16)
@@ -352,37 +519,99 @@ static int config_rsz_subfrm(struct mdp_comp_ctx *ctx,
 
        if (CFG_CHECK(MT8183, p_id))
                reg = CFG_COMP(MT8183, ctx->param, subfrms[index].luma.left);
+       else if (CFG_CHECK(MT8195, p_id))
+               reg = CFG_COMP(MT8195, ctx->param, subfrms[index].luma.left);
        MM_REG_WRITE(cmd, subsys_id, base, PRZ_LUMA_HORIZONTAL_INTEGER_OFFSET,
                     reg, 0xFFFF);
+
        if (CFG_CHECK(MT8183, p_id))
                reg = CFG_COMP(MT8183, ctx->param, subfrms[index].luma.left_subpix);
+       else if (CFG_CHECK(MT8195, p_id))
+               reg = CFG_COMP(MT8195, ctx->param, subfrms[index].luma.left_subpix);
        MM_REG_WRITE(cmd, subsys_id,
                     base, PRZ_LUMA_HORIZONTAL_SUBPIXEL_OFFSET,
                     reg, 0x1FFFFF);
+
        if (CFG_CHECK(MT8183, p_id))
                reg = CFG_COMP(MT8183, ctx->param, subfrms[index].luma.top);
+       else if (CFG_CHECK(MT8195, p_id))
+               reg = CFG_COMP(MT8195, ctx->param, subfrms[index].luma.top);
        MM_REG_WRITE(cmd, subsys_id, base, PRZ_LUMA_VERTICAL_INTEGER_OFFSET,
                     reg, 0xFFFF);
+
        if (CFG_CHECK(MT8183, p_id))
                reg = CFG_COMP(MT8183, ctx->param, subfrms[index].luma.top_subpix);
+       else if (CFG_CHECK(MT8195, p_id))
+               reg = CFG_COMP(MT8195, ctx->param, subfrms[index].luma.top_subpix);
        MM_REG_WRITE(cmd, subsys_id, base, PRZ_LUMA_VERTICAL_SUBPIXEL_OFFSET,
                     reg, 0x1FFFFF);
+
        if (CFG_CHECK(MT8183, p_id))
                reg = CFG_COMP(MT8183, ctx->param, subfrms[index].chroma.left);
+       else if (CFG_CHECK(MT8195, p_id))
+               reg = CFG_COMP(MT8195, ctx->param, subfrms[index].chroma.left);
        MM_REG_WRITE(cmd, subsys_id,
                     base, PRZ_CHROMA_HORIZONTAL_INTEGER_OFFSET,
                     reg, 0xFFFF);
+
        if (CFG_CHECK(MT8183, p_id))
                reg = CFG_COMP(MT8183, ctx->param, subfrms[index].chroma.left_subpix);
+       else if (CFG_CHECK(MT8195, p_id))
+               reg = CFG_COMP(MT8195, ctx->param, subfrms[index].chroma.left_subpix);
        MM_REG_WRITE(cmd, subsys_id,
                     base, PRZ_CHROMA_HORIZONTAL_SUBPIXEL_OFFSET,
                     reg, 0x1FFFFF);
 
        if (CFG_CHECK(MT8183, p_id))
                reg = CFG_COMP(MT8183, ctx->param, rsz.subfrms[index].clip);
+       else if (CFG_CHECK(MT8195, p_id))
+               reg = CFG_COMP(MT8195, ctx->param, rsz.subfrms[index].clip);
        MM_REG_WRITE(cmd, subsys_id, base, PRZ_OUTPUT_IMAGE, reg,
                     0xFFFFFFFF);
 
+       if (CFG_CHECK(MT8195, p_id)) {
+               struct device *dev;
+               struct mdp_comp *merge;
+               const struct mtk_mdp_driver_data *data = ctx->comp->mdp_dev->mdp_data;
+               enum mtk_mdp_comp_id public_id = ctx->comp->public_id;
+
+               switch (public_id) {
+               case MDP_COMP_RSZ2:
+                       merge = ctx->comp->mdp_dev->comp[MDP_COMP_MERGE2];
+                       break;
+               case MDP_COMP_RSZ3:
+                       merge = ctx->comp->mdp_dev->comp[MDP_COMP_MERGE3];
+                       break;
+               default:
+                       goto rsz_subfrm_done;
+               }
+
+               if (CFG_CHECK(MT8195, p_id))
+                       reg = CFG_COMP(MT8195, ctx->param, rsz.subfrms[index].rsz_switch);
+
+               id = data->comp_data[public_id].match.alias_id;
+               dev = ctx->comp->mdp_dev->mm_subsys[MDP_MM_SUBSYS_1].mmsys;
+               mtk_mmsys_vpp_rsz_merge_config(dev, id, reg, NULL);
+
+               if (CFG_CHECK(MT8195, p_id))
+                       reg = CFG_COMP(MT8195, ctx->param, rsz.subfrms[index].merge_cfg);
+               MM_REG_WRITE(cmd, merge->subsys_id, merge->reg_base,
+                            MDP_MERGE_CFG_0, reg, 0xFFFFFFFF);
+               MM_REG_WRITE(cmd, merge->subsys_id, merge->reg_base,
+                            MDP_MERGE_CFG_4, reg, 0xFFFFFFFF);
+               MM_REG_WRITE(cmd, merge->subsys_id, merge->reg_base,
+                            MDP_MERGE_CFG_24, reg, 0xFFFFFFFF);
+               MM_REG_WRITE(cmd, merge->subsys_id, merge->reg_base,
+                            MDP_MERGE_CFG_25, reg, 0xFFFFFFFF);
+
+               /* Bypass mode */
+               MM_REG_WRITE(cmd, merge->subsys_id, merge->reg_base,
+                            MDP_MERGE_CFG_12, BIT(0), 0xFFFFFFFF);
+               MM_REG_WRITE(cmd, merge->subsys_id, merge->reg_base,
+                            MDP_MERGE_ENABLE, BIT(0), 0xFFFFFFFF);
+       }
+
+rsz_subfrm_done:
        return 0;
 }
 
@@ -399,6 +628,9 @@ static int advance_rsz_subfrm(struct mdp_comp_ctx *ctx,
                if (CFG_CHECK(MT8183, p_id)) {
                        csf_l = CFG_COMP(MT8183, ctx->param, subfrms[index].in.left);
                        csf_r = CFG_COMP(MT8183, ctx->param, subfrms[index].in.right);
+               } else if (CFG_CHECK(MT8195, p_id)) {
+                       csf_l = CFG_COMP(MT8195, ctx->param, subfrms[index].in.left);
+                       csf_r = CFG_COMP(MT8195, ctx->param, subfrms[index].in.right);
                }
 
                if ((csf_r - csf_l + 1) <= 16)
@@ -425,6 +657,11 @@ static int init_wrot(struct mdp_comp_ctx *ctx, struct mdp_cmdq_cmd *cmd)
        /* Reset WROT */
        MM_REG_WRITE(cmd, subsys_id, base, VIDO_SOFT_RST, BIT(0), BIT(0));
        MM_REG_POLL(cmd, subsys_id, base, VIDO_SOFT_RST_STAT, BIT(0), BIT(0));
+
+       /* Reset setting */
+       if (CFG_CHECK(MT8195, p_id))
+               MM_REG_WRITE(cmd, subsys_id, base, VIDO_CTRL, 0x0, 0xFFFFFFFF);
+
        MM_REG_WRITE(cmd, subsys_id, base, VIDO_SOFT_RST, 0x0, BIT(0));
        MM_REG_POLL(cmd, subsys_id, base, VIDO_SOFT_RST_STAT, 0x0, BIT(0));
        return 0;
@@ -442,57 +679,118 @@ static int config_wrot_frame(struct mdp_comp_ctx *ctx,
        /* Write frame base address */
        if (CFG_CHECK(MT8183, p_id))
                reg = CFG_COMP(MT8183, ctx->param, wrot.iova[0]);
+       else if (CFG_CHECK(MT8195, p_id))
+               reg = CFG_COMP(MT8195, ctx->param, wrot.iova[0]);
        MM_REG_WRITE(cmd, subsys_id, base, VIDO_BASE_ADDR, reg,
                     0xFFFFFFFF);
+
        if (CFG_CHECK(MT8183, p_id))
                reg = CFG_COMP(MT8183, ctx->param, wrot.iova[1]);
+       else if (CFG_CHECK(MT8195, p_id))
+               reg = CFG_COMP(MT8195, ctx->param, wrot.iova[1]);
        MM_REG_WRITE(cmd, subsys_id, base, VIDO_BASE_ADDR_C, reg,
                     0xFFFFFFFF);
+
        if (CFG_CHECK(MT8183, p_id))
                reg = CFG_COMP(MT8183, ctx->param, wrot.iova[2]);
+       else if (CFG_CHECK(MT8195, p_id))
+               reg = CFG_COMP(MT8195, ctx->param, wrot.iova[2]);
        MM_REG_WRITE(cmd, subsys_id, base, VIDO_BASE_ADDR_V, reg,
                     0xFFFFFFFF);
+
+       if (mdp_cfg && mdp_cfg->wrot_support_10bit) {
+               if (CFG_CHECK(MT8195, p_id))
+                       reg = CFG_COMP(MT8195, ctx->param, wrot.scan_10bit);
+               MM_REG_WRITE(cmd, subsys_id, base, VIDO_SCAN_10BIT,
+                            reg, 0x0000000F);
+
+               if (CFG_CHECK(MT8195, p_id))
+                       reg = CFG_COMP(MT8195, ctx->param, wrot.pending_zero);
+               MM_REG_WRITE(cmd, subsys_id, base, VIDO_PENDING_ZERO,
+                            reg, 0x04000000);
+       }
+
+       if (CFG_CHECK(MT8195, p_id)) {
+               reg = CFG_COMP(MT8195, ctx->param, wrot.bit_number);
+               MM_REG_WRITE(cmd, subsys_id, base, VIDO_CTRL_2,
+                            reg, 0x00000007);
+       }
+
        /* Write frame related registers */
        if (CFG_CHECK(MT8183, p_id))
                reg = CFG_COMP(MT8183, ctx->param, wrot.control);
+       else if (CFG_CHECK(MT8195, p_id))
+               reg = CFG_COMP(MT8195, ctx->param, wrot.control);
        MM_REG_WRITE(cmd, subsys_id, base, VIDO_CTRL, reg,
                     0xF131510F);
+
+       /* Write pre-ultra threshold */
+       if (CFG_CHECK(MT8195, p_id)) {
+               reg = CFG_COMP(MT8195, ctx->param, wrot.pre_ultra);
+               MM_REG_WRITE(cmd, subsys_id, base, VIDO_DMA_PREULTRA, reg,
+                            0x00FFFFFF);
+       }
+
        /* Write frame Y pitch */
        if (CFG_CHECK(MT8183, p_id))
                reg = CFG_COMP(MT8183, ctx->param, wrot.stride[0]);
+       else if (CFG_CHECK(MT8195, p_id))
+               reg = CFG_COMP(MT8195, ctx->param, wrot.stride[0]);
        MM_REG_WRITE(cmd, subsys_id, base, VIDO_STRIDE, reg,
                     0x0000FFFF);
+
        /* Write frame UV pitch */
        if (CFG_CHECK(MT8183, p_id))
                reg = CFG_COMP(MT8183, ctx->param, wrot.stride[1]);
+       else if (CFG_CHECK(MT8195, p_id))
+               reg = CFG_COMP(MT8195, ctx->param, wrot.stride[1]);
        MM_REG_WRITE(cmd, subsys_id, base, VIDO_STRIDE_C, reg,
                     0xFFFF);
+
        if (CFG_CHECK(MT8183, p_id))
                reg = CFG_COMP(MT8183, ctx->param, wrot.stride[2]);
+       else if (CFG_CHECK(MT8195, p_id))
+               reg = CFG_COMP(MT8195, ctx->param, wrot.stride[2]);
        MM_REG_WRITE(cmd, subsys_id, base, VIDO_STRIDE_V, reg,
                     0xFFFF);
+
        /* Write matrix control */
        if (CFG_CHECK(MT8183, p_id))
                reg = CFG_COMP(MT8183, ctx->param, wrot.mat_ctrl);
+       else if (CFG_CHECK(MT8195, p_id))
+               reg = CFG_COMP(MT8195, ctx->param, wrot.mat_ctrl);
        MM_REG_WRITE(cmd, subsys_id, base, VIDO_MAT_CTRL, reg, 0xF3);
 
        /* Set the fixed ALPHA as 0xFF */
        MM_REG_WRITE(cmd, subsys_id, base, VIDO_DITHER, 0xFF000000,
                     0xFF000000);
+
        /* Set VIDO_EOL_SEL */
        MM_REG_WRITE(cmd, subsys_id, base, VIDO_RSV_1, BIT(31), BIT(31));
+
        /* Set VIDO_FIFO_TEST */
        if (CFG_CHECK(MT8183, p_id))
                reg = CFG_COMP(MT8183, ctx->param, wrot.fifo_test);
+       else if (CFG_CHECK(MT8195, p_id))
+               reg = CFG_COMP(MT8195, ctx->param, wrot.fifo_test);
+
        if (reg != 0)
                MM_REG_WRITE(cmd, subsys_id, base, VIDO_FIFO_TEST,
                             reg, 0xFFF);
+
        /* Filter enable */
        if (mdp_cfg && mdp_cfg->wrot_filter_constraint) {
                if (CFG_CHECK(MT8183, p_id))
                        reg = CFG_COMP(MT8183, ctx->param, wrot.filter);
+               else if (CFG_CHECK(MT8195, p_id))
+                       reg = CFG_COMP(MT8195, ctx->param, wrot.filter);
                MM_REG_WRITE(cmd, subsys_id, base, VIDO_MAIN_BUF_SIZE,
                             reg, 0x77);
+
+               /* Turn off WROT DMA DCM */
+               if (CFG_CHECK(MT8195, p_id))
+                       MM_REG_WRITE(cmd, subsys_id, base, VIDO_ROT_EN,
+                                    (0x1 << 23) + (0x1 << 20), 0x900000);
        }
 
        return 0;
@@ -508,35 +806,54 @@ static int config_wrot_subfrm(struct mdp_comp_ctx *ctx,
        /* Write Y pixel offset */
        if (CFG_CHECK(MT8183, p_id))
                reg = CFG_COMP(MT8183, ctx->param, wrot.subfrms[index].offset[0]);
+       else if (CFG_CHECK(MT8195, p_id))
+               reg = CFG_COMP(MT8195, ctx->param, wrot.subfrms[index].offset[0]);
        MM_REG_WRITE(cmd, subsys_id, base, VIDO_OFST_ADDR,
                     reg, 0x0FFFFFFF);
+
        /* Write U pixel offset */
        if (CFG_CHECK(MT8183, p_id))
                reg = CFG_COMP(MT8183, ctx->param, wrot.subfrms[index].offset[1]);
+       else if (CFG_CHECK(MT8195, p_id))
+               reg = CFG_COMP(MT8195, ctx->param, wrot.subfrms[index].offset[1]);
        MM_REG_WRITE(cmd, subsys_id, base, VIDO_OFST_ADDR_C,
                     reg, 0x0FFFFFFF);
+
        /* Write V pixel offset */
        if (CFG_CHECK(MT8183, p_id))
                reg = CFG_COMP(MT8183, ctx->param, wrot.subfrms[index].offset[2]);
+       else if (CFG_CHECK(MT8195, p_id))
+               reg = CFG_COMP(MT8195, ctx->param, wrot.subfrms[index].offset[2]);
        MM_REG_WRITE(cmd, subsys_id, base, VIDO_OFST_ADDR_V,
                     reg, 0x0FFFFFFF);
+
        /* Write source size */
        if (CFG_CHECK(MT8183, p_id))
                reg = CFG_COMP(MT8183, ctx->param, wrot.subfrms[index].src);
+       else if (CFG_CHECK(MT8195, p_id))
+               reg = CFG_COMP(MT8195, ctx->param, wrot.subfrms[index].src);
        MM_REG_WRITE(cmd, subsys_id, base, VIDO_IN_SIZE, reg,
                     0x1FFF1FFF);
+
        /* Write target size */
        if (CFG_CHECK(MT8183, p_id))
                reg = CFG_COMP(MT8183, ctx->param, wrot.subfrms[index].clip);
+       else if (CFG_CHECK(MT8195, p_id))
+               reg = CFG_COMP(MT8195, ctx->param, wrot.subfrms[index].clip);
        MM_REG_WRITE(cmd, subsys_id, base, VIDO_TAR_SIZE, reg,
                     0x1FFF1FFF);
+
        if (CFG_CHECK(MT8183, p_id))
                reg = CFG_COMP(MT8183, ctx->param, wrot.subfrms[index].clip_ofst);
+       else if (CFG_CHECK(MT8195, p_id))
+               reg = CFG_COMP(MT8195, ctx->param, wrot.subfrms[index].clip_ofst);
        MM_REG_WRITE(cmd, subsys_id, base, VIDO_CROP_OFST, reg,
                     0x1FFF1FFF);
 
        if (CFG_CHECK(MT8183, p_id))
                reg = CFG_COMP(MT8183, ctx->param, wrot.subfrms[index].main_buf);
+       else if (CFG_CHECK(MT8195, p_id))
+               reg = CFG_COMP(MT8195, ctx->param, wrot.subfrms[index].main_buf);
        MM_REG_WRITE(cmd, subsys_id, base, VIDO_MAIN_BUF_SIZE,
                     reg, 0x1FFF7F00);
 
@@ -553,10 +870,15 @@ static int wait_wrot_event(struct mdp_comp_ctx *ctx, struct mdp_cmdq_cmd *cmd)
        phys_addr_t base = ctx->comp->reg_base;
        u8 subsys_id = ctx->comp->subsys_id;
 
-       if (ctx->comp->alias_id == 0)
-               MM_REG_WAIT(cmd, ctx->comp->gce_event[MDP_GCE_EVENT_EOF]);
-       else
-               dev_err(dev, "Do not support WROT1_DONE event\n");
+       if (!mdp_cfg)
+               return -EINVAL;
+
+       if (ctx->comp->alias_id >= mdp_cfg->wrot_event_num) {
+               dev_err(dev, "Invalid WROT event %d!\n", ctx->comp->alias_id);
+               return -EINVAL;
+       }
+
+       MM_REG_WAIT(cmd, ctx->comp->gce_event[MDP_GCE_EVENT_EOF]);
 
        if (mdp_cfg && mdp_cfg->wrot_filter_constraint)
                MM_REG_WRITE(cmd, subsys_id, base, VIDO_MAIN_BUF_SIZE, 0x0,
@@ -697,6 +1019,171 @@ static const struct mdp_comp_ops wdma_ops = {
        .wait_comp_event = wait_wdma_event,
 };
 
+static int reset_luma_hist(struct mdp_comp_ctx *ctx, struct mdp_cmdq_cmd *cmd)
+{
+       const struct mdp_platform_config *mdp_cfg = __get_plat_cfg(ctx);
+       phys_addr_t base = ctx->comp->reg_base;
+       u16 subsys_id = ctx->comp->subsys_id;
+       u32 hist_num, i;
+
+       if (!mdp_cfg)
+               return -EINVAL;
+
+       hist_num = mdp_cfg->tdshp_hist_num;
+
+       /* Reset histogram */
+       for (i = 0; i <= hist_num; i++)
+               MM_REG_WRITE_MASK(cmd, subsys_id, base,
+                                 (MDP_LUMA_HIST_INIT + (i << 2)),
+                                 0, 0xFFFFFFFF);
+
+       if (mdp_cfg->tdshp_constrain)
+               MM_REG_WRITE(cmd, subsys_id, base,
+                            MDP_DC_TWO_D_W1_RESULT_INIT, 0, 0xFFFFFFFF);
+
+       if (mdp_cfg->tdshp_contour)
+               for (i = 0; i < hist_num; i++)
+                       MM_REG_WRITE_MASK(cmd, subsys_id, base,
+                                         (MDP_CONTOUR_HIST_INIT + (i << 2)),
+                                         0, 0xFFFFFFFF);
+
+       return 0;
+}
+
+static int init_tdshp(struct mdp_comp_ctx *ctx, struct mdp_cmdq_cmd *cmd)
+{
+       phys_addr_t base = ctx->comp->reg_base;
+       u16 subsys_id = ctx->comp->subsys_id;
+
+       MM_REG_WRITE(cmd, subsys_id, base, MDP_TDSHP_CTRL, BIT(0), BIT(0));
+       /* Enable FIFO */
+       MM_REG_WRITE(cmd, subsys_id, base, MDP_TDSHP_CFG, BIT(1), BIT(1));
+
+       return reset_luma_hist(ctx, cmd);
+}
+
+static int config_tdshp_frame(struct mdp_comp_ctx *ctx,
+                             struct mdp_cmdq_cmd *cmd,
+                             const struct v4l2_rect *compose)
+{
+       phys_addr_t base = ctx->comp->reg_base;
+       u16 subsys_id = ctx->comp->subsys_id;
+       u32 reg = 0;
+
+       if (CFG_CHECK(MT8195, p_id))
+               reg = CFG_COMP(MT8195, ctx->param, tdshp.cfg);
+       MM_REG_WRITE(cmd, subsys_id, base, MDP_TDSHP_CFG, reg, BIT(0));
+
+       return 0;
+}
+
+static int config_tdshp_subfrm(struct mdp_comp_ctx *ctx,
+                              struct mdp_cmdq_cmd *cmd, u32 index)
+{
+       phys_addr_t base = ctx->comp->reg_base;
+       u16 subsys_id = ctx->comp->subsys_id;
+       u32 reg = 0;
+
+       if (CFG_CHECK(MT8195, p_id))
+               reg = CFG_COMP(MT8195, ctx->param, tdshp.subfrms[index].src);
+       MM_REG_WRITE(cmd, subsys_id, base, MDP_TDSHP_INPUT_SIZE,
+                    reg, MDP_TDSHP_INPUT_SIZE_MASK);
+
+       if (CFG_CHECK(MT8195, p_id))
+               reg = CFG_COMP(MT8195, ctx->param, tdshp.subfrms[index].clip_ofst);
+       MM_REG_WRITE(cmd, subsys_id, base, MDP_TDSHP_OUTPUT_OFFSET,
+                    reg, 0x00FF00FF);
+
+       if (CFG_CHECK(MT8195, p_id))
+               reg = CFG_COMP(MT8195, ctx->param, tdshp.subfrms[index].clip);
+       MM_REG_WRITE(cmd, subsys_id, base, MDP_TDSHP_OUTPUT_SIZE,
+                    reg, MDP_TDSHP_OUTPUT_SIZE_MASK);
+
+       if (CFG_CHECK(MT8195, p_id))
+               reg = CFG_COMP(MT8195, ctx->param, tdshp.subfrms[index].hist_cfg_0);
+       MM_REG_WRITE(cmd, subsys_id, base, MDP_HIST_CFG_00, reg, 0xFFFFFFFF);
+
+       if (CFG_CHECK(MT8195, p_id))
+               reg = CFG_COMP(MT8195, ctx->param, tdshp.subfrms[index].hist_cfg_1);
+       MM_REG_WRITE(cmd, subsys_id, base, MDP_HIST_CFG_01, reg, 0xFFFFFFFF);
+
+       return 0;
+}
+
+static const struct mdp_comp_ops tdshp_ops = {
+       .get_comp_flag = get_comp_flag,
+       .init_comp = init_tdshp,
+       .config_frame = config_tdshp_frame,
+       .config_subfrm = config_tdshp_subfrm,
+};
+
+static int init_color(struct mdp_comp_ctx *ctx, struct mdp_cmdq_cmd *cmd)
+{
+       phys_addr_t base = ctx->comp->reg_base;
+       u16 subsys_id = ctx->comp->subsys_id;
+
+       MM_REG_WRITE(cmd, subsys_id, base,
+                    MDP_COLOR_START, 0x1, BIT(1) | BIT(0));
+       MM_REG_WRITE(cmd, subsys_id, base,
+                    MDP_COLOR_WIN_X_MAIN, 0xFFFF0000, 0xFFFFFFFF);
+       MM_REG_WRITE(cmd, subsys_id, base,
+                    MDP_COLOR_WIN_Y_MAIN, 0xFFFF0000, 0xFFFFFFFF);
+
+       /* Reset color matrix */
+       MM_REG_WRITE(cmd, subsys_id, base, MDP_COLOR_CM1_EN, 0x0, BIT(0));
+       MM_REG_WRITE(cmd, subsys_id, base, MDP_COLOR_CM2_EN, 0x0, BIT(0));
+
+       /* Enable interrupt */
+       MM_REG_WRITE(cmd, subsys_id, base, MDP_COLOR_INTEN, 0x7, 0x7);
+
+       MM_REG_WRITE(cmd, subsys_id, base, MDP_COLOR_OUT_SEL, 0x333, 0x333);
+
+       return 0;
+}
+
+static int config_color_frame(struct mdp_comp_ctx *ctx,
+                             struct mdp_cmdq_cmd *cmd,
+                             const struct v4l2_rect *compose)
+{
+       phys_addr_t base = ctx->comp->reg_base;
+       u16 subsys_id = ctx->comp->subsys_id;
+       u32 reg = 0;
+
+       if (CFG_CHECK(MT8195, p_id))
+               reg = CFG_COMP(MT8195, ctx->param, color.start);
+       MM_REG_WRITE(cmd, subsys_id, base, MDP_COLOR_START,
+                    reg, MDP_COLOR_START_MASK);
+
+       return 0;
+}
+
+static int config_color_subfrm(struct mdp_comp_ctx *ctx,
+                              struct mdp_cmdq_cmd *cmd, u32 index)
+{
+       phys_addr_t base = ctx->comp->reg_base;
+       u16 subsys_id = ctx->comp->subsys_id;
+       u32 reg = 0;
+
+       if (CFG_CHECK(MT8195, p_id))
+               reg = CFG_COMP(MT8195, ctx->param, color.subfrms[index].in_hsize);
+       MM_REG_WRITE(cmd, subsys_id, base, MDP_COLOR_INTERNAL_IP_WIDTH,
+                    reg, 0x00003FFF);
+
+       if (CFG_CHECK(MT8195, p_id))
+               reg = CFG_COMP(MT8195, ctx->param, color.subfrms[index].in_vsize);
+       MM_REG_WRITE(cmd, subsys_id, base, MDP_COLOR_INTERNAL_IP_HEIGHT,
+                    reg, 0x00003FFF);
+
+       return 0;
+}
+
+static const struct mdp_comp_ops color_ops = {
+       .get_comp_flag = get_comp_flag,
+       .init_comp = init_color,
+       .config_frame = config_color_frame,
+       .config_subfrm = config_color_subfrm,
+};
+
 static int init_ccorr(struct mdp_comp_ctx *ctx, struct mdp_cmdq_cmd *cmd)
 {
        phys_addr_t base = ctx->comp->reg_base;
@@ -738,12 +1225,318 @@ static const struct mdp_comp_ops ccorr_ops = {
        .config_subfrm = config_ccorr_subfrm,
 };
 
+static int init_aal(struct mdp_comp_ctx *ctx, struct mdp_cmdq_cmd *cmd)
+{
+       phys_addr_t base = ctx->comp->reg_base;
+       u16 subsys_id = ctx->comp->subsys_id;
+
+       /* Always set MDP_AAL enable to 1 */
+       MM_REG_WRITE(cmd, subsys_id, base, MDP_AAL_EN, BIT(0), BIT(0));
+
+       return 0;
+}
+
+static int config_aal_frame(struct mdp_comp_ctx *ctx,
+                           struct mdp_cmdq_cmd *cmd,
+                           const struct v4l2_rect *compose)
+{
+       phys_addr_t base = ctx->comp->reg_base;
+       u16 subsys_id = ctx->comp->subsys_id;
+       u32 reg = 0;
+
+       if (CFG_CHECK(MT8195, p_id))
+               reg = CFG_COMP(MT8195, ctx->param, aal.cfg_main);
+       MM_REG_WRITE(cmd, subsys_id, base, MDP_AAL_CFG_MAIN, reg, BIT(7));
+
+       if (CFG_CHECK(MT8195, p_id))
+               reg = CFG_COMP(MT8195, ctx->param, aal.cfg);
+       MM_REG_WRITE(cmd, subsys_id, base, MDP_AAL_CFG, reg, BIT(0));
+
+       return 0;
+}
+
+static int config_aal_subfrm(struct mdp_comp_ctx *ctx,
+                            struct mdp_cmdq_cmd *cmd, u32 index)
+{
+       phys_addr_t base = ctx->comp->reg_base;
+       u16 subsys_id = ctx->comp->subsys_id;
+       u32 reg = 0;
+
+       if (CFG_CHECK(MT8195, p_id))
+               reg = CFG_COMP(MT8195, ctx->param, aal.subfrms[index].src);
+       MM_REG_WRITE(cmd, subsys_id, base, MDP_AAL_SIZE,
+                    reg, MDP_AAL_SIZE_MASK);
+
+       if (CFG_CHECK(MT8195, p_id))
+               reg = CFG_COMP(MT8195, ctx->param, aal.subfrms[index].clip_ofst);
+       MM_REG_WRITE(cmd, subsys_id, base, MDP_AAL_OUTPUT_OFFSET,
+                    reg, 0x00FF00FF);
+
+       if (CFG_CHECK(MT8195, p_id))
+               reg = CFG_COMP(MT8195, ctx->param, aal.subfrms[index].clip);
+       MM_REG_WRITE(cmd, subsys_id, base, MDP_AAL_OUTPUT_SIZE,
+                    reg, MDP_AAL_OUTPUT_SIZE_MASK);
+
+       return 0;
+}
+
+static const struct mdp_comp_ops aal_ops = {
+       .get_comp_flag = get_comp_flag,
+       .init_comp = init_aal,
+       .config_frame = config_aal_frame,
+       .config_subfrm = config_aal_subfrm,
+};
+
+static int init_hdr(struct mdp_comp_ctx *ctx, struct mdp_cmdq_cmd *cmd)
+{
+       phys_addr_t base = ctx->comp->reg_base;
+       u16 subsys_id = ctx->comp->subsys_id;
+
+       /* Always set MDP_HDR enable to 1 */
+       MM_REG_WRITE(cmd, subsys_id, base, MDP_HDR_TOP, BIT(0), BIT(0));
+
+       return 0;
+}
+
+static int config_hdr_frame(struct mdp_comp_ctx *ctx,
+                           struct mdp_cmdq_cmd *cmd,
+                           const struct v4l2_rect *compose)
+{
+       phys_addr_t base = ctx->comp->reg_base;
+       u16 subsys_id = ctx->comp->subsys_id;
+       u32 reg = 0;
+
+       if (CFG_CHECK(MT8195, p_id))
+               reg = CFG_COMP(MT8195, ctx->param, hdr.top);
+       MM_REG_WRITE(cmd, subsys_id, base, MDP_HDR_TOP, reg, BIT(29) | BIT(28));
+
+       if (CFG_CHECK(MT8195, p_id))
+               reg = CFG_COMP(MT8195, ctx->param, hdr.relay);
+       MM_REG_WRITE(cmd, subsys_id, base, MDP_HDR_RELAY, reg, BIT(0));
+
+       return 0;
+}
+
+static int config_hdr_subfrm(struct mdp_comp_ctx *ctx,
+                            struct mdp_cmdq_cmd *cmd, u32 index)
+{
+       phys_addr_t base = ctx->comp->reg_base;
+       u16 subsys_id = ctx->comp->subsys_id;
+       u32 reg = 0;
+
+       if (CFG_CHECK(MT8195, p_id))
+               reg = CFG_COMP(MT8195, ctx->param, hdr.subfrms[index].win_size);
+       MM_REG_WRITE(cmd, subsys_id, base, MDP_HDR_TILE_POS,
+                    reg, MDP_HDR_TILE_POS_MASK);
+
+       if (CFG_CHECK(MT8195, p_id))
+               reg = CFG_COMP(MT8195, ctx->param, hdr.subfrms[index].src);
+       MM_REG_WRITE(cmd, subsys_id, base, MDP_HDR_SIZE_0, reg, 0x1FFF1FFF);
+
+       if (CFG_CHECK(MT8195, p_id))
+               reg = CFG_COMP(MT8195, ctx->param, hdr.subfrms[index].clip_ofst0);
+       MM_REG_WRITE(cmd, subsys_id, base, MDP_HDR_SIZE_1, reg, 0x1FFF1FFF);
+
+       if (CFG_CHECK(MT8195, p_id))
+               reg = CFG_COMP(MT8195, ctx->param, hdr.subfrms[index].clip_ofst1);
+       MM_REG_WRITE(cmd, subsys_id, base, MDP_HDR_SIZE_2, reg, 0x1FFF1FFF);
+
+       if (CFG_CHECK(MT8195, p_id))
+               reg = CFG_COMP(MT8195, ctx->param, hdr.subfrms[index].hist_ctrl_0);
+       MM_REG_WRITE(cmd, subsys_id, base, MDP_HDR_HIST_CTRL_0, reg, 0x00003FFF);
+
+       if (CFG_CHECK(MT8195, p_id))
+               reg = CFG_COMP(MT8195, ctx->param, hdr.subfrms[index].hist_ctrl_1);
+       MM_REG_WRITE(cmd, subsys_id, base, MDP_HDR_HIST_CTRL_1, reg, 0x00003FFF);
+
+       if (CFG_CHECK(MT8195, p_id))
+               reg = CFG_COMP(MT8195, ctx->param, hdr.subfrms[index].hdr_top);
+       MM_REG_WRITE(cmd, subsys_id, base, MDP_HDR_TOP, reg, BIT(6) | BIT(5));
+
+       /* Enable histogram */
+       if (CFG_CHECK(MT8195, p_id))
+               reg = CFG_COMP(MT8195, ctx->param, hdr.subfrms[index].hist_addr);
+       MM_REG_WRITE(cmd, subsys_id, base, MDP_HDR_HIST_ADDR, reg, BIT(9));
+
+       return 0;
+}
+
+static const struct mdp_comp_ops hdr_ops = {
+       .get_comp_flag = get_comp_flag,
+       .init_comp = init_hdr,
+       .config_frame = config_hdr_frame,
+       .config_subfrm = config_hdr_subfrm,
+};
+
+static int init_fg(struct mdp_comp_ctx *ctx, struct mdp_cmdq_cmd *cmd)
+{
+       phys_addr_t base = ctx->comp->reg_base;
+       u16 subsys_id = ctx->comp->subsys_id;
+
+       MM_REG_WRITE(cmd, subsys_id, base, MDP_FG_TRIGGER, BIT(2), BIT(2));
+       MM_REG_WRITE(cmd, subsys_id, base, MDP_FG_TRIGGER, 0x0, BIT(2));
+
+       return 0;
+}
+
+static int config_fg_frame(struct mdp_comp_ctx *ctx,
+                          struct mdp_cmdq_cmd *cmd,
+                          const struct v4l2_rect *compose)
+{
+       phys_addr_t base = ctx->comp->reg_base;
+       u16 subsys_id = ctx->comp->subsys_id;
+       u32 reg = 0;
+
+       if (CFG_CHECK(MT8195, p_id))
+               reg = CFG_COMP(MT8195, ctx->param, fg.ctrl_0);
+       MM_REG_WRITE(cmd, subsys_id, base, MDP_FG_FG_CTRL_0, reg, BIT(0));
+
+       if (CFG_CHECK(MT8195, p_id))
+               reg = CFG_COMP(MT8195, ctx->param, fg.ck_en);
+       MM_REG_WRITE(cmd, subsys_id, base, MDP_FG_FG_CK_EN, reg, 0x7);
+
+       return 0;
+}
+
+static int config_fg_subfrm(struct mdp_comp_ctx *ctx,
+                           struct mdp_cmdq_cmd *cmd, u32 index)
+{
+       phys_addr_t base = ctx->comp->reg_base;
+       u16 subsys_id = ctx->comp->subsys_id;
+       u32 reg = 0;
+
+       if (CFG_CHECK(MT8195, p_id))
+               reg = CFG_COMP(MT8195, ctx->param, fg.subfrms[index].info_0);
+       MM_REG_WRITE(cmd, subsys_id, base, MDP_FG_TILE_INFO_0, reg, 0xFFFFFFFF);
+
+       if (CFG_CHECK(MT8195, p_id))
+               reg = CFG_COMP(MT8195, ctx->param, fg.subfrms[index].info_1);
+       MM_REG_WRITE(cmd, subsys_id, base, MDP_FG_TILE_INFO_1, reg, 0xFFFFFFFF);
+
+       return 0;
+}
+
+static const struct mdp_comp_ops fg_ops = {
+       .get_comp_flag = get_comp_flag,
+       .init_comp = init_fg,
+       .config_frame = config_fg_frame,
+       .config_subfrm = config_fg_subfrm,
+};
+
+static int init_ovl(struct mdp_comp_ctx *ctx, struct mdp_cmdq_cmd *cmd)
+{
+       phys_addr_t base = ctx->comp->reg_base;
+       u16 subsys_id = ctx->comp->subsys_id;
+
+       MM_REG_WRITE(cmd, subsys_id, base, MDP_OVL_EN,
+                    BIT(0), MDP_OVL_EN_MASK);
+
+       /* Set to relay mode */
+       MM_REG_WRITE(cmd, subsys_id, base, MDP_OVL_SRC_CON,
+                    BIT(9), MDP_OVL_SRC_CON_MASK);
+       MM_REG_WRITE(cmd, subsys_id, base, MDP_OVL_DP_CON,
+                    BIT(0), MDP_OVL_DP_CON_MASK);
+
+       return 0;
+}
+
+static int config_ovl_frame(struct mdp_comp_ctx *ctx,
+                           struct mdp_cmdq_cmd *cmd,
+                           const struct v4l2_rect *compose)
+{
+       phys_addr_t base = ctx->comp->reg_base;
+       u16 subsys_id = ctx->comp->subsys_id;
+       u32 reg = 0;
+
+       if (CFG_CHECK(MT8195, p_id))
+               reg = CFG_COMP(MT8195, ctx->param, ovl.L0_con);
+       MM_REG_WRITE(cmd, subsys_id, base, MDP_OVL_L0_CON, reg, BIT(29) | BIT(28));
+
+       if (CFG_CHECK(MT8195, p_id))
+               reg = CFG_COMP(MT8195, ctx->param, ovl.src_con);
+       MM_REG_WRITE(cmd, subsys_id, base, MDP_OVL_SRC_CON, reg, BIT(0));
+
+       return 0;
+}
+
+static int config_ovl_subfrm(struct mdp_comp_ctx *ctx,
+                            struct mdp_cmdq_cmd *cmd, u32 index)
+{
+       phys_addr_t base = ctx->comp->reg_base;
+       u16 subsys_id = ctx->comp->subsys_id;
+       u32 reg = 0;
+
+       if (CFG_CHECK(MT8195, p_id))
+               reg = CFG_COMP(MT8195, ctx->param, ovl.subfrms[index].L0_src_size);
+       MM_REG_WRITE(cmd, subsys_id, base, MDP_OVL_L0_SRC_SIZE,
+                    reg, MDP_OVL_L0_SRC_SIZE_MASK);
+
+       /* Setup output size */
+       if (CFG_CHECK(MT8195, p_id))
+               reg = CFG_COMP(MT8195, ctx->param, ovl.subfrms[index].roi_size);
+       MM_REG_WRITE(cmd, subsys_id, base, MDP_OVL_ROI_SIZE,
+                    reg, MDP_OVL_ROI_SIZE_MASK);
+
+       return 0;
+}
+
+static const struct mdp_comp_ops ovl_ops = {
+       .get_comp_flag = get_comp_flag,
+       .init_comp = init_ovl,
+       .config_frame = config_ovl_frame,
+       .config_subfrm = config_ovl_subfrm,
+};
+
+static int init_pad(struct mdp_comp_ctx *ctx, struct mdp_cmdq_cmd *cmd)
+{
+       phys_addr_t base = ctx->comp->reg_base;
+       u16 subsys_id = ctx->comp->subsys_id;
+
+       MM_REG_WRITE(cmd, subsys_id, base, MDP_PAD_CON,
+                    BIT(1), MDP_PAD_CON_MASK);
+       /* Reset */
+       MM_REG_WRITE(cmd, subsys_id, base, MDP_PAD_W_SIZE,
+                    0, MDP_PAD_W_SIZE_MASK);
+       MM_REG_WRITE(cmd, subsys_id, base, MDP_PAD_H_SIZE,
+                    0, MDP_PAD_H_SIZE_MASK);
+
+       return 0;
+}
+
+static int config_pad_subfrm(struct mdp_comp_ctx *ctx,
+                            struct mdp_cmdq_cmd *cmd, u32 index)
+{
+       phys_addr_t base = ctx->comp->reg_base;
+       u16 subsys_id = ctx->comp->subsys_id;
+       u32 reg = 0;
+
+       if (CFG_CHECK(MT8195, p_id))
+               reg = CFG_COMP(MT8195, ctx->param, pad.subfrms[index].pic_size);
+       MM_REG_WRITE(cmd, subsys_id, base, MDP_PAD_PIC_SIZE,
+                    reg, MDP_PAD_PIC_SIZE_MASK);
+
+       return 0;
+}
+
+static const struct mdp_comp_ops pad_ops = {
+       .get_comp_flag = get_comp_flag,
+       .init_comp = init_pad,
+       .config_subfrm = config_pad_subfrm,
+};
+
 static const struct mdp_comp_ops *mdp_comp_ops[MDP_COMP_TYPE_COUNT] = {
        [MDP_COMP_TYPE_RDMA] =          &rdma_ops,
        [MDP_COMP_TYPE_RSZ] =           &rsz_ops,
        [MDP_COMP_TYPE_WROT] =          &wrot_ops,
        [MDP_COMP_TYPE_WDMA] =          &wdma_ops,
+       [MDP_COMP_TYPE_TDSHP] =         &tdshp_ops,
+       [MDP_COMP_TYPE_COLOR] =         &color_ops,
        [MDP_COMP_TYPE_CCORR] =         &ccorr_ops,
+       [MDP_COMP_TYPE_AAL] =           &aal_ops,
+       [MDP_COMP_TYPE_HDR] =           &hdr_ops,
+       [MDP_COMP_TYPE_FG] =            &fg_ops,
+       [MDP_COMP_TYPE_OVL] =           &ovl_ops,
+       [MDP_COMP_TYPE_PAD] =           &pad_ops,
 };
 
 static const struct of_device_id mdp_comp_dt_ids[] __maybe_unused = {
@@ -762,6 +1555,42 @@ static const struct of_device_id mdp_comp_dt_ids[] __maybe_unused = {
        }, {
                .compatible = "mediatek,mt8183-mdp3-wdma",
                .data = (void *)MDP_COMP_TYPE_WDMA,
+       }, {
+               .compatible = "mediatek,mt8195-mdp3-rdma",
+               .data = (void *)MDP_COMP_TYPE_RDMA,
+       }, {
+               .compatible = "mediatek,mt8195-mdp3-split",
+               .data = (void *)MDP_COMP_TYPE_SPLIT,
+       }, {
+               .compatible = "mediatek,mt8195-mdp3-stitch",
+               .data = (void *)MDP_COMP_TYPE_STITCH,
+       }, {
+               .compatible = "mediatek,mt8195-mdp3-fg",
+               .data = (void *)MDP_COMP_TYPE_FG,
+       }, {
+               .compatible = "mediatek,mt8195-mdp3-hdr",
+               .data = (void *)MDP_COMP_TYPE_HDR,
+       }, {
+               .compatible = "mediatek,mt8195-mdp3-aal",
+               .data = (void *)MDP_COMP_TYPE_AAL,
+       }, {
+               .compatible = "mediatek,mt8195-mdp3-merge",
+               .data = (void *)MDP_COMP_TYPE_MERGE,
+       }, {
+               .compatible = "mediatek,mt8195-mdp3-tdshp",
+               .data = (void *)MDP_COMP_TYPE_TDSHP,
+       }, {
+               .compatible = "mediatek,mt8195-mdp3-color",
+               .data = (void *)MDP_COMP_TYPE_COLOR,
+       }, {
+               .compatible = "mediatek,mt8195-mdp3-ovl",
+               .data = (void *)MDP_COMP_TYPE_OVL,
+       }, {
+               .compatible = "mediatek,mt8195-mdp3-padding",
+               .data = (void *)MDP_COMP_TYPE_PAD,
+       }, {
+               .compatible = "mediatek,mt8195-mdp3-tcc",
+               .data = (void *)MDP_COMP_TYPE_TCC,
        },
        {}
 };
@@ -853,9 +1682,26 @@ int mdp_comp_clocks_on(struct device *dev, struct mdp_comp *comps, int num)
        int i, ret;
 
        for (i = 0; i < num; i++) {
+               struct mdp_dev *m = comps[i].mdp_dev;
+               enum mtk_mdp_comp_id id;
+               const struct mdp_comp_blend *b;
+
+               /* Bypass the dummy component*/
+               if (!m)
+                       continue;
+
                ret = mdp_comp_clock_on(dev, &comps[i]);
                if (ret)
                        return ret;
+
+               id = comps[i].public_id;
+               b = &m->mdp_data->comp_data[id].blend;
+
+               if (b && b->aid_clk) {
+                       ret = mdp_comp_clock_on(dev, m->comp[b->b_id]);
+                       if (ret)
+                               return ret;
+               }
        }
 
        return 0;
@@ -865,8 +1711,23 @@ void mdp_comp_clocks_off(struct device *dev, struct mdp_comp *comps, int num)
 {
        int i;
 
-       for (i = 0; i < num; i++)
+       for (i = 0; i < num; i++) {
+               struct mdp_dev *m = comps[i].mdp_dev;
+               enum mtk_mdp_comp_id id;
+               const struct mdp_comp_blend *b;
+
+               /* Bypass the dummy component*/
+               if (!m)
+                       continue;
+
                mdp_comp_clock_off(dev, &comps[i]);
+
+               id = comps[i].public_id;
+               b = &m->mdp_data->comp_data[id].blend;
+
+               if (b && b->aid_clk)
+                       mdp_comp_clock_off(dev, m->comp[b->b_id]);
+       }
 }
 
 static int mdp_get_subsys_id(struct mdp_dev *mdp, struct device *dev,
@@ -1174,6 +2035,8 @@ int mdp_comp_ctx_config(struct mdp_dev *mdp, struct mdp_comp_ctx *ctx,
 
        if (CFG_CHECK(MT8183, p_id))
                arg = CFG_COMP(MT8183, param, type);
+       else if (CFG_CHECK(MT8195, p_id))
+               arg = CFG_COMP(MT8195, param, type);
        else
                return -EINVAL;
        public_id = mdp_cfg_get_id_public(mdp, arg);
@@ -1191,16 +2054,22 @@ int mdp_comp_ctx_config(struct mdp_dev *mdp, struct mdp_comp_ctx *ctx,
        ctx->param = param;
        if (CFG_CHECK(MT8183, p_id))
                arg = CFG_COMP(MT8183, param, input);
+       else if (CFG_CHECK(MT8195, p_id))
+               arg = CFG_COMP(MT8195, param, input);
        else
                return -EINVAL;
        ctx->input = &frame->inputs[arg];
        if (CFG_CHECK(MT8183, p_id))
                idx = CFG_COMP(MT8183, param, num_outputs);
+       else if (CFG_CHECK(MT8195, p_id))
+               idx = CFG_COMP(MT8195, param, num_outputs);
        else
                return -EINVAL;
        for (i = 0; i < idx; i++) {
                if (CFG_CHECK(MT8183, p_id))
                        arg = CFG_COMP(MT8183, param, outputs[i]);
+               else if (CFG_CHECK(MT8195, p_id))
+                       arg = CFG_COMP(MT8195, param, outputs[i]);
                else
                        return -EINVAL;
                ctx->outputs[i] = &frame->outputs[arg];
index 20d2bcb77ef93c12762e45dc5fd1949dc2f36e72..3e5d2da1c807645f0b60ea2eb83b44c571590808 100644 (file)
@@ -84,22 +84,66 @@ enum mtk_mdp_comp_id {
        MDP_COMP_CAMIN,         /* 9 */
        MDP_COMP_CAMIN2,        /* 10 */
        MDP_COMP_RDMA0,         /* 11 */
-       MDP_COMP_AAL0,          /* 12 */
-       MDP_COMP_CCORR0,        /* 13 */
-       MDP_COMP_RSZ0,          /* 14 */
-       MDP_COMP_RSZ1,          /* 15 */
-       MDP_COMP_TDSHP0,        /* 16 */
-       MDP_COMP_COLOR0,        /* 17 */
-       MDP_COMP_PATH0_SOUT,    /* 18 */
-       MDP_COMP_PATH1_SOUT,    /* 19 */
-       MDP_COMP_WROT0,         /* 20 */
-       MDP_COMP_WDMA,          /* 21 */
-
-       /* Dummy Engine */
-       MDP_COMP_RDMA1,         /* 22 */
-       MDP_COMP_RSZ2,          /* 23 */
-       MDP_COMP_TDSHP1,        /* 24 */
-       MDP_COMP_WROT1,         /* 25 */
+       MDP_COMP_RDMA1,         /* 12 */
+       MDP_COMP_RDMA2,         /* 13 */
+       MDP_COMP_RDMA3,         /* 14 */
+       MDP_COMP_AAL0,          /* 15 */
+       MDP_COMP_AAL1,          /* 16 */
+       MDP_COMP_AAL2,          /* 17 */
+       MDP_COMP_AAL3,          /* 18 */
+       MDP_COMP_CCORR0,        /* 19 */
+       MDP_COMP_RSZ0,          /* 20 */
+       MDP_COMP_RSZ1,          /* 21 */
+       MDP_COMP_RSZ2,          /* 22 */
+       MDP_COMP_RSZ3,          /* 23 */
+       MDP_COMP_TDSHP0,        /* 24 */
+       MDP_COMP_TDSHP1,        /* 25 */
+       MDP_COMP_TDSHP2,        /* 26 */
+       MDP_COMP_TDSHP3,        /* 27 */
+       MDP_COMP_COLOR0,        /* 28 */
+       MDP_COMP_COLOR1,        /* 29 */
+       MDP_COMP_COLOR2,        /* 30 */
+       MDP_COMP_COLOR3,        /* 31 */
+       MDP_COMP_PATH0_SOUT,    /* 32 */
+       MDP_COMP_PATH1_SOUT,    /* 33 */
+       MDP_COMP_WROT0,         /* 34 */
+       MDP_COMP_WROT1,         /* 35 */
+       MDP_COMP_WROT2,         /* 36 */
+       MDP_COMP_WROT3,         /* 37 */
+       MDP_COMP_WDMA,          /* 38 */
+       MDP_COMP_SPLIT,         /* 39 */
+       MDP_COMP_SPLIT2,        /* 40 */
+       MDP_COMP_STITCH,        /* 41 */
+       MDP_COMP_FG0,           /* 42 */
+       MDP_COMP_FG1,           /* 43 */
+       MDP_COMP_FG2,           /* 44 */
+       MDP_COMP_FG3,           /* 45 */
+       MDP_COMP_TO_SVPP2MOUT,  /* 46 */
+       MDP_COMP_TO_SVPP3MOUT,  /* 47 */
+       MDP_COMP_TO_WARP0MOUT,  /* 48 */
+       MDP_COMP_TO_WARP1MOUT,  /* 49 */
+       MDP_COMP_VPP0_SOUT,     /* 50 */
+       MDP_COMP_VPP1_SOUT,     /* 51 */
+       MDP_COMP_PQ0_SOUT,      /* 52 */
+       MDP_COMP_PQ1_SOUT,      /* 53 */
+       MDP_COMP_HDR0,          /* 54 */
+       MDP_COMP_HDR1,          /* 55 */
+       MDP_COMP_HDR2,          /* 56 */
+       MDP_COMP_HDR3,          /* 57 */
+       MDP_COMP_OVL0,          /* 58 */
+       MDP_COMP_OVL1,          /* 59 */
+       MDP_COMP_PAD0,          /* 60 */
+       MDP_COMP_PAD1,          /* 61 */
+       MDP_COMP_PAD2,          /* 62 */
+       MDP_COMP_PAD3,          /* 63 */
+       MDP_COMP_TCC0,          /* 64 */
+       MDP_COMP_TCC1,          /* 65 */
+       MDP_COMP_MERGE2,        /* 66 */
+       MDP_COMP_MERGE3,        /* 67 */
+       MDP_COMP_VDO0DL0,       /* 68 */
+       MDP_COMP_VDO1DL0,       /* 69 */
+       MDP_COMP_VDO0DL1,       /* 70 */
+       MDP_COMP_VDO1DL1,       /* 71 */
 
        MDP_MAX_COMP_COUNT      /* ALWAYS keep at the end */
 };
@@ -117,12 +161,21 @@ enum mdp_comp_type {
        MDP_COMP_TYPE_COLOR,
        MDP_COMP_TYPE_DRE,
        MDP_COMP_TYPE_CCORR,
+       MDP_COMP_TYPE_AAL,
+       MDP_COMP_TYPE_TCC,
        MDP_COMP_TYPE_HDR,
+       MDP_COMP_TYPE_SPLIT,
+       MDP_COMP_TYPE_STITCH,
+       MDP_COMP_TYPE_FG,
+       MDP_COMP_TYPE_OVL,
+       MDP_COMP_TYPE_PAD,
+       MDP_COMP_TYPE_MERGE,
 
        MDP_COMP_TYPE_IMGI,
        MDP_COMP_TYPE_WPEI,
        MDP_COMP_TYPE_EXTO,     /* External path */
        MDP_COMP_TYPE_DL_PATH,  /* Direct-link path */
+       MDP_COMP_TYPE_DUMMY,
 
        MDP_COMP_TYPE_COUNT     /* ALWAYS keep at the end */
 };
@@ -138,6 +191,7 @@ struct mdp_comp_match {
        enum mdp_comp_type type;
        u32 alias_id;
        s32 inner_id;
+       s32 subsys_id;
 };
 
 /* Used to describe the item order in MDP property */
@@ -147,9 +201,16 @@ struct mdp_comp_info {
        u32 dts_reg_ofst;
 };
 
+struct mdp_comp_blend {
+       enum mtk_mdp_comp_id b_id;
+       bool aid_mod;
+       bool aid_clk;
+};
+
 struct mdp_comp_data {
        struct mdp_comp_match match;
        struct mdp_comp_info info;
+       struct mdp_comp_blend blend;
 };
 
 struct mdp_comp_ops;
index 94f4ed78523bb8becb4d7775aa4e7e1e79c649e6..5209f531ef8d00decbdc5666e3db4dcebe15897b 100644 (file)
@@ -21,14 +21,21 @@ static const struct of_device_id mdp_of_ids[] = {
        { .compatible = "mediatek,mt8183-mdp3-rdma",
          .data = &mt8183_mdp_driver_data,
        },
+       { .compatible = "mediatek,mt8195-mdp3-rdma",
+         .data = &mt8195_mdp_driver_data,
+       },
+       { .compatible = "mediatek,mt8195-mdp3-wrot",
+         .data = &mt8195_mdp_driver_data,
+       },
        {},
 };
 MODULE_DEVICE_TABLE(of, mdp_of_ids);
 
 static struct platform_device *__get_pdev_by_id(struct platform_device *pdev,
+                                               struct platform_device *from,
                                                enum mdp_infra_id id)
 {
-       struct device_node *node;
+       struct device_node *node, *f = NULL;
        struct platform_device *mdp_pdev = NULL;
        const struct mtk_mdp_driver_data *mdp_data;
        const char *compat;
@@ -46,9 +53,14 @@ static struct platform_device *__get_pdev_by_id(struct platform_device *pdev,
                dev_err(&pdev->dev, "have no driver data to find node\n");
                return NULL;
        }
+
        compat = mdp_data->mdp_probe_infra[id].compatible;
+       if (strlen(compat) == 0)
+               return NULL;
 
-       node = of_find_compatible_node(NULL, NULL, compat);
+       if (from)
+               f = from->dev.of_node;
+       node = of_find_compatible_node(f, NULL, compat);
        if (WARN_ON(!node)) {
                dev_err(&pdev->dev, "find node from id %d failed\n", id);
                return NULL;
@@ -130,6 +142,10 @@ void mdp_video_device_release(struct video_device *vdev)
        struct mdp_dev *mdp = (struct mdp_dev *)video_get_drvdata(vdev);
        int i;
 
+       for (i = 0; i < mdp->mdp_data->pp_used; i++)
+               if (mdp->cmdq_clt[i])
+                       cmdq_mbox_destroy(mdp->cmdq_clt[i]);
+
        scp_put(mdp->scp);
 
        destroy_workqueue(mdp->job_wq);
@@ -140,19 +156,72 @@ void mdp_video_device_release(struct video_device *vdev)
        vb2_dma_contig_clear_max_seg_size(&mdp->pdev->dev);
 
        mdp_comp_destroy(mdp);
-       for (i = 0; i < MDP_PIPE_MAX; i++)
-               mtk_mutex_put(mdp->mdp_mutex[i]);
+       for (i = 0; i < mdp->mdp_data->pipe_info_len; i++) {
+               enum mdp_mm_subsys_id idx;
+               struct mtk_mutex *m;
+               u32 m_id;
+
+               idx = mdp->mdp_data->pipe_info[i].sub_id;
+               m_id = mdp->mdp_data->pipe_info[i].mutex_id;
+               m = mdp->mm_subsys[idx].mdp_mutex[m_id];
+               if (!IS_ERR_OR_NULL(m))
+                       mtk_mutex_put(m);
+       }
 
        mdp_vpu_shared_mem_free(&mdp->vpu);
        v4l2_m2m_release(mdp->m2m_dev);
        kfree(mdp);
 }
 
+static int mdp_mm_subsys_deploy(struct mdp_dev *mdp, enum mdp_infra_id id)
+{
+       struct platform_device *mm_pdev = NULL;
+       struct device **dev;
+       int i;
+
+       if (!mdp)
+               return -EINVAL;
+
+       for (i = 0; i < MDP_MM_SUBSYS_MAX; i++) {
+               const char *compat;
+               enum mdp_infra_id sub_id = id + i;
+
+               switch (id) {
+               case MDP_INFRA_MMSYS:
+                       dev = &mdp->mm_subsys[i].mmsys;
+                       break;
+               case MDP_INFRA_MUTEX:
+                       dev = &mdp->mm_subsys[i].mutex;
+                       break;
+               default:
+                       dev_err(&mdp->pdev->dev, "Unknown infra id %d", id);
+                       return -EINVAL;
+               }
+
+               /*
+                * Not every chip has multiple multimedia subsystems, so
+                * the config may be null.
+                */
+               compat = mdp->mdp_data->mdp_probe_infra[sub_id].compatible;
+               if (strlen(compat) == 0)
+                       continue;
+
+               mm_pdev = __get_pdev_by_id(mdp->pdev, mm_pdev, sub_id);
+               if (WARN_ON(!mm_pdev))
+                       return -ENODEV;
+
+               *dev = &mm_pdev->dev;
+       }
+
+       return 0;
+}
+
 static int mdp_probe(struct platform_device *pdev)
 {
        struct device *dev = &pdev->dev;
        struct mdp_dev *mdp;
        struct platform_device *mm_pdev;
+       struct resource *res;
        int ret, i, mutex_id;
 
        mdp = kzalloc(sizeof(*mdp), GFP_KERNEL);
@@ -164,25 +233,34 @@ static int mdp_probe(struct platform_device *pdev)
        mdp->pdev = pdev;
        mdp->mdp_data = of_device_get_match_data(&pdev->dev);
 
-       mm_pdev = __get_pdev_by_id(pdev, MDP_INFRA_MMSYS);
-       if (!mm_pdev) {
-               ret = -ENODEV;
-               goto err_destroy_device;
+       res = platform_get_resource(pdev, IORESOURCE_MEM, 0);
+       if (res->start != mdp->mdp_data->mdp_con_res) {
+               platform_set_drvdata(pdev, mdp);
+               goto success_return;
        }
-       mdp->mdp_mmsys = &mm_pdev->dev;
 
-       mm_pdev = __get_pdev_by_id(pdev, MDP_INFRA_MUTEX);
-       if (WARN_ON(!mm_pdev)) {
-               ret = -ENODEV;
+       ret = mdp_mm_subsys_deploy(mdp, MDP_INFRA_MMSYS);
+       if (ret)
                goto err_destroy_device;
-       }
+
+       ret = mdp_mm_subsys_deploy(mdp, MDP_INFRA_MUTEX);
+       if (ret)
+               goto err_destroy_device;
+
        for (i = 0; i < mdp->mdp_data->pipe_info_len; i++) {
+               enum mdp_mm_subsys_id idx;
+               struct mtk_mutex **m;
+
+               idx = mdp->mdp_data->pipe_info[i].sub_id;
                mutex_id = mdp->mdp_data->pipe_info[i].mutex_id;
-               if (!IS_ERR_OR_NULL(mdp->mdp_mutex[mutex_id]))
+               m = &mdp->mm_subsys[idx].mdp_mutex[mutex_id];
+
+               if (!IS_ERR_OR_NULL(*m))
                        continue;
-               mdp->mdp_mutex[mutex_id] = mtk_mutex_get(&mm_pdev->dev);
-               if (IS_ERR(mdp->mdp_mutex[mutex_id])) {
-                       ret = PTR_ERR(mdp->mdp_mutex[mutex_id]);
+
+               *m = mtk_mutex_get(mdp->mm_subsys[idx].mutex);
+               if (IS_ERR(*m)) {
+                       ret = PTR_ERR(*m);
                        goto err_free_mutex;
                }
        }
@@ -210,7 +288,7 @@ static int mdp_probe(struct platform_device *pdev)
 
        mdp->scp = scp_get(pdev);
        if (!mdp->scp) {
-               mm_pdev = __get_pdev_by_id(pdev, MDP_INFRA_SCP);
+               mm_pdev = __get_pdev_by_id(pdev, NULL, MDP_INFRA_SCP);
                if (WARN_ON(!mm_pdev)) {
                        dev_err(&pdev->dev, "Could not get scp device\n");
                        ret = -ENODEV;
@@ -225,10 +303,12 @@ static int mdp_probe(struct platform_device *pdev)
        mutex_init(&mdp->vpu_lock);
        mutex_init(&mdp->m2m_lock);
 
-       mdp->cmdq_clt = cmdq_mbox_create(dev, 0);
-       if (IS_ERR(mdp->cmdq_clt)) {
-               ret = PTR_ERR(mdp->cmdq_clt);
-               goto err_put_scp;
+       for (i = 0; i < mdp->mdp_data->pp_used; i++) {
+               mdp->cmdq_clt[i] = cmdq_mbox_create(dev, i);
+               if (IS_ERR(mdp->cmdq_clt[i])) {
+                       ret = PTR_ERR(mdp->cmdq_clt[i]);
+                       goto err_mbox_destroy;
+               }
        }
 
        init_waitqueue_head(&mdp->callback_wq);
@@ -250,14 +330,15 @@ static int mdp_probe(struct platform_device *pdev)
                goto err_unregister_device;
        }
 
+success_return:
        dev_dbg(dev, "mdp-%d registered successfully\n", pdev->id);
        return 0;
 
 err_unregister_device:
        v4l2_device_unregister(&mdp->v4l2_dev);
 err_mbox_destroy:
-       cmdq_mbox_destroy(mdp->cmdq_clt);
-err_put_scp:
+       while (--i >= 0)
+               cmdq_mbox_destroy(mdp->cmdq_clt[i]);
        scp_put(mdp->scp);
 err_destroy_clock_wq:
        destroy_workqueue(mdp->clock_wq);
@@ -266,9 +347,16 @@ err_destroy_job_wq:
 err_deinit_comp:
        mdp_comp_destroy(mdp);
 err_free_mutex:
-       for (i = 0; i < mdp->mdp_data->pipe_info_len; i++)
-               if (!IS_ERR_OR_NULL(mdp->mdp_mutex[i]))
-                       mtk_mutex_put(mdp->mdp_mutex[i]);
+       for (i = 0; i < mdp->mdp_data->pipe_info_len; i++) {
+               enum mdp_mm_subsys_id idx;
+               struct mtk_mutex *m;
+
+               idx = mdp->mdp_data->pipe_info[i].sub_id;
+               mutex_id = mdp->mdp_data->pipe_info[i].mutex_id;
+               m = mdp->mm_subsys[idx].mdp_mutex[mutex_id];
+               if (!IS_ERR_OR_NULL(m))
+                       mtk_mutex_put(m);
+       }
 err_destroy_device:
        kfree(mdp);
 err_return:
index 7e21d226ceb81ee02db09a59205989d0971467ef..8c09e984fd0166f16e2ce5bb4bc0a944f67289e0 100644 (file)
 #define MDP_PHANDLE_NAME       "mediatek,mdp3"
 
 enum mdp_infra_id {
+       /*
+        * Due to the sequential nature of function "mdp_mm_subsys_deploy",
+        * adding new enum. necessitates careful consideration.
+        */
        MDP_INFRA_MMSYS,
+       MDP_INFRA_MMSYS2,
        MDP_INFRA_MUTEX,
+       MDP_INFRA_MUTEX2,
        MDP_INFRA_SCP,
        MDP_INFRA_MAX
 };
 
+enum mdp_mm_subsys_id {
+       MDP_MM_SUBSYS_0,
+       MDP_MM_SUBSYS_1,
+       MDP_MM_SUBSYS_MAX,
+};
+
 enum mdp_buffer_usage {
        MDP_BUFFER_USAGE_HW_READ,
        MDP_BUFFER_USAGE_MDP,
@@ -37,8 +49,16 @@ struct mdp_platform_config {
        bool    rdma_support_10bit;
        bool    rdma_rsz1_sram_sharing;
        bool    rdma_upsample_repeat_only;
+       bool    rdma_esl_setting;
+       u32     rdma_event_num;
        bool    rsz_disable_dcm_small_sample;
+       bool    rsz_etc_control;
        bool    wrot_filter_constraint;
+       bool    wrot_support_10bit;
+       u32     wrot_event_num;
+       u32     tdshp_hist_num;
+       bool    tdshp_constrain;
+       bool    tdshp_contour;
 };
 
 /* indicate which mutex is used by each pipepline */
@@ -47,11 +67,27 @@ enum mdp_pipe_id {
        MDP_PIPE_WPEI2,
        MDP_PIPE_IMGI,
        MDP_PIPE_RDMA0,
+       MDP_PIPE_RDMA1,
+       MDP_PIPE_RDMA2,
+       MDP_PIPE_RDMA3,
+       MDP_PIPE_SPLIT,
+       MDP_PIPE_SPLIT2,
+       MDP_PIPE_VPP0_SOUT,
+       MDP_PIPE_VPP1_SOUT,
        MDP_PIPE_MAX
 };
 
+/* MDP parallel pipe control */
+enum {
+       MDP_PP_USED_1 = 1,
+       MDP_PP_USED_2 = 2,
+};
+
+#define MDP_PP_MAX MDP_PP_USED_2
+
 struct mtk_mdp_driver_data {
        const int mdp_plat_id;
+       const resource_size_t mdp_con_res;
        const struct of_device_id *mdp_probe_infra;
        const struct mdp_platform_config *mdp_cfg;
        const u32 *mdp_mutex_table_idx;
@@ -63,12 +99,19 @@ struct mtk_mdp_driver_data {
        const struct mdp_limit *def_limit;
        const struct mdp_pipe_info *pipe_info;
        unsigned int pipe_info_len;
+       const struct v4l2_rect *pp_criteria;
+       const u8 pp_used;
+};
+
+struct mdp_mm_subsys {
+       struct device *mmsys;
+       struct device *mutex;
+       struct mtk_mutex *mdp_mutex[MDP_PIPE_MAX];
 };
 
 struct mdp_dev {
        struct platform_device                  *pdev;
-       struct device                           *mdp_mmsys;
-       struct mtk_mutex                        *mdp_mutex[MDP_PIPE_MAX];
+       struct mdp_mm_subsys                    mm_subsys[MDP_MM_SUBSYS_MAX];
        struct mdp_comp                         *comp[MDP_MAX_COMP_COUNT];
        const struct mtk_mdp_driver_data        *mdp_data;
 
@@ -82,7 +125,7 @@ struct mdp_dev {
        s32                                     vpu_count;
        u32                                     id_count;
        struct ida                              mdp_ida;
-       struct cmdq_client                      *cmdq_clt;
+       struct cmdq_client                      *cmdq_clt[MDP_PP_MAX];
        wait_queue_head_t                       callback_wq;
 
        struct v4l2_device                      v4l2_dev;
@@ -96,6 +139,7 @@ struct mdp_dev {
 
 struct mdp_pipe_info {
        enum mdp_pipe_id pipe_id;
+       enum mdp_mm_subsys_id sub_id;
        u32 mutex_id;
 };
 
index a298c1b15b9ea578427acb4e8b3711552b8efc5f..35a8b059bde5d129240686cee5677351a54ab51c 100644 (file)
@@ -87,6 +87,9 @@ static void mdp_m2m_device_run(void *priv)
        dst_vb = v4l2_m2m_next_dst_buf(ctx->m2m_ctx);
        mdp_set_dst_config(&param.outputs[0], frame, &dst_vb->vb2_buf);
 
+       if (mdp_check_pp_enable(ctx->mdp_dev, frame))
+               param.type = MDP_STREAM_TYPE_DUAL_BITBLT;
+
        ret = mdp_vpu_process(&ctx->mdp_dev->vpu, &param);
        if (ret) {
                dev_err(&ctx->mdp_dev->pdev->dev,
@@ -101,6 +104,18 @@ static void mdp_m2m_device_run(void *priv)
        task.cb_data = NULL;
        task.mdp_ctx = ctx;
 
+       if (atomic_read(&ctx->mdp_dev->job_count)) {
+               ret = wait_event_timeout(ctx->mdp_dev->callback_wq,
+                                        !atomic_read(&ctx->mdp_dev->job_count),
+                                        2 * HZ);
+               if (ret == 0) {
+                       dev_err(&ctx->mdp_dev->pdev->dev,
+                               "%d jobs not yet done\n",
+                               atomic_read(&ctx->mdp_dev->job_count));
+                       goto worker_end;
+               }
+       }
+
        ret = mdp_cmdq_send(ctx->mdp_dev, &task);
        if (ret) {
                dev_err(&ctx->mdp_dev->pdev->dev,
index 9b436b911d92d75aea563b4bc7c091f781b1b0a8..657356f87743e4797a208b092e5fc6b489e0420e 100644 (file)
@@ -304,6 +304,24 @@ int mdp_check_scaling_ratio(const struct v4l2_rect *crop,
        return 0;
 }
 
+bool mdp_check_pp_enable(struct mdp_dev *mdp, struct mdp_frame *frame)
+{
+       u32 s, r1, r2;
+
+       if (!mdp || !frame)
+               return false;
+
+       if (!mdp->mdp_data->pp_criteria)
+               return false;
+
+       s = mdp->mdp_data->pp_criteria->width *
+               mdp->mdp_data->pp_criteria->height;
+       r1 = frame->crop.c.width * frame->crop.c.height;
+       r2 = frame->compose.width * frame->compose.height;
+
+       return (r1 >= s || r2 >= s);
+}
+
 /* Stride that is accepted by MDP HW */
 static u32 mdp_fmt_get_stride(const struct mdp_format *fmt,
                              u32 bytesperline, unsigned int plane)
index e9ab8ac2c0e871eba89c5df5d68a68bcd69059f5..b0c8f9f008202da29f66f4d67aa757d4c6db4e56 100644 (file)
@@ -368,6 +368,7 @@ int mdp_try_crop(struct mdp_m2m_ctx *ctx, struct v4l2_rect *r,
 int mdp_check_scaling_ratio(const struct v4l2_rect *crop,
                            const struct v4l2_rect *compose, s32 rotation,
        const struct mdp_limit *limit);
+bool mdp_check_pp_enable(struct mdp_dev *mdp, struct mdp_frame *frame);
 void mdp_set_src_config(struct img_input *in,
                        struct mdp_frame *frame, struct vb2_buffer *vb);
 void mdp_set_dst_config(struct img_output *out,
index 49fc2e9d45dd51a81722b8b0c219380e3aa0524f..da3a892ad867eedb4898509da8790981475e9a62 100644 (file)
@@ -198,6 +198,7 @@ int mdp_vpu_dev_init(struct mdp_vpu_dev *vpu, struct mtk_scp *scp,
        };
        struct mdp_dev *mdp = vpu_to_mdp(vpu);
        int err;
+       u8 pp_num = mdp->mdp_data->pp_used;
 
        init_completion(&vpu->ipi_acked);
        vpu->scp = scp;
@@ -211,7 +212,7 @@ int mdp_vpu_dev_init(struct mdp_vpu_dev *vpu, struct mtk_scp *scp,
        mutex_lock(vpu->lock);
        vpu->work_size = ALIGN(vpu->work_size, 64);
        vpu->param_size = ALIGN(sizeof(struct img_ipi_frameparam), 64);
-       vpu->config_size = ALIGN(sizeof(struct img_config), 64);
+       vpu->config_size = ALIGN(sizeof(struct img_config) * pp_num, 64);
        err = mdp_vpu_shared_mem_alloc(vpu);
        mutex_unlock(vpu->lock);
        if (err) {
index 9f6e4b59455dab9fcd392633103e4a85a15c1364..4c34344dc7dcb876e29d66358bcfcc79e1e77705 100644 (file)
@@ -29,15 +29,7 @@ static int mtk_vcodec_vpu_set_ipi_register(struct mtk_vcodec_fw *fw, int id,
                                           mtk_vcodec_ipi_handler handler,
                                           const char *name, void *priv)
 {
-       /*
-        * The handler we receive takes a void * as its first argument. We
-        * cannot change this because it needs to be passed down to the rproc
-        * subsystem when SCP is used. VPU takes a const argument, which is
-        * more constrained, so the conversion below is safe.
-        */
-       ipi_handler_t handler_const = (ipi_handler_t)handler;
-
-       return vpu_ipi_register(fw->pdev, id, handler_const, name, priv);
+       return vpu_ipi_register(fw->pdev, id, handler, name, priv);
 }
 
 static int mtk_vcodec_vpu_ipi_send(struct mtk_vcodec_fw *fw, int id, void *buf,
index ece27c880e50c6603378e2232c8563f0a8cb17cb..1af075fc019400b73b1622af3bb3f9f3dde3153b 100644 (file)
@@ -39,7 +39,6 @@ struct vdec_fb {
 /**
  * struct mtk_video_dec_buf - Private data related to each VB2 buffer.
  * @m2m_buf:   M2M buffer
- * @list:      link list
  * @used:      Capture buffer contain decoded frame data and keep in
  *                     codec data structure
  * @queued_in_vb2:     Capture buffer is queue in vb2
index d54b3833790d125a2268f35cd9189da62a255d94..b903e39fee8928beab938fc5163d3114735ef645 100644 (file)
@@ -566,7 +566,7 @@ static void mtk_vcodec_dec_fill_h264_level(struct v4l2_ctrl_config *cfg,
        default:
                cfg->max = V4L2_MPEG_VIDEO_H264_LEVEL_4_1;
                break;
-       };
+       }
 }
 
 static void mtk_vcodec_dec_fill_h264_profile(struct v4l2_ctrl_config *cfg,
@@ -580,7 +580,7 @@ static void mtk_vcodec_dec_fill_h264_profile(struct v4l2_ctrl_config *cfg,
        default:
                cfg->max = V4L2_MPEG_VIDEO_H264_PROFILE_HIGH;
                break;
-       };
+       }
 }
 
 static void mtk_vcodec_dec_fill_h265_level(struct v4l2_ctrl_config *cfg,
@@ -596,7 +596,7 @@ static void mtk_vcodec_dec_fill_h265_level(struct v4l2_ctrl_config *cfg,
        default:
                cfg->max = V4L2_MPEG_VIDEO_HEVC_LEVEL_4;
                break;
-       };
+       }
 }
 
 static void mtk_vcodec_dec_fill_h265_profile(struct v4l2_ctrl_config *cfg,
@@ -610,7 +610,7 @@ static void mtk_vcodec_dec_fill_h265_profile(struct v4l2_ctrl_config *cfg,
        default:
                cfg->max = V4L2_MPEG_VIDEO_HEVC_PROFILE_MAIN_STILL_PICTURE;
                break;
-       };
+       }
 }
 
 static void mtk_vcodec_dec_fill_vp9_level(struct v4l2_ctrl_config *cfg,
@@ -630,7 +630,7 @@ static void mtk_vcodec_dec_fill_vp9_level(struct v4l2_ctrl_config *cfg,
        default:
                cfg->max = V4L2_MPEG_VIDEO_VP9_LEVEL_4_0;
                break;
-       };
+       }
 }
 
 static void mtk_vcodec_dec_fill_vp9_profile(struct v4l2_ctrl_config *cfg,
@@ -644,7 +644,7 @@ static void mtk_vcodec_dec_fill_vp9_profile(struct v4l2_ctrl_config *cfg,
        default:
                cfg->max = V4L2_MPEG_VIDEO_VP9_PROFILE_1;
                break;
-       };
+       }
 }
 
 static void mtk_vcodec_dec_reset_controls(struct v4l2_ctrl_config *cfg,
@@ -680,7 +680,7 @@ static void mtk_vcodec_dec_reset_controls(struct v4l2_ctrl_config *cfg,
                break;
        default:
                break;
-       };
+       }
 }
 
 static int mtk_vcodec_dec_ctrls_setup(struct mtk_vcodec_dec_ctx *ctx)
index f64b21c0716967ddee862e77e8a77f14e0edff3e..f677e499fefabcba6aad67d36bcd8823d45b1db7 100644 (file)
@@ -37,7 +37,6 @@
  * @bs_sz:             bitstream size
  * @resolution_changed:resolution change flag 1 - changed,  0 - not change
  * @frame_header_type: current frame header type
- * @wait_key_frame:    wait key frame coming
  * @crc:               used to check whether hardware's status is right
  * @reserved:          reserved, currently unused
  */
index 69d37b93bd3588644b7c791a537c341f85a039ef..cf48d09b78d7a156440e1343448af946342d26e9 100644 (file)
@@ -141,7 +141,6 @@ struct vdec_vp9_slice_frame_counts {
  * @skip:      skip counts.
  * @y_mode:    Y prediction mode counts.
  * @filter:    interpolation filter counts.
- * @mv_joint:  motion vector joint counts.
  * @sign:      motion vector sign counts.
  * @classes:   motion vector class counts.
  * @class0:    motion vector class0 bit counts.
index fbb3f34a73f05535ce339a7891ac669e6bf0f255..aa7d08afc2f4849887264309aa1a8e37edda9daa 100644 (file)
@@ -22,7 +22,6 @@ struct mtk_vcodec_dec_ctx;
  *                in place of inst_addr in messages.
  * @signaled    : 1 - Host has received ack message from VPU, 0 - not received
  * @ctx         : context for v4l2 layer integration
- * @dev                : platform device of VPU
  * @wq          : wait queue to wait VPU message ack
  * @handler     : ipi handler for each decoder
  * @codec_type     : use codec type to separate different codecs
index 82246401ed4a3952c35779115d3cc2e1c390bd8c..908d8179b2d2569333e965f293e5b1ba9af8389c 100644 (file)
@@ -26,7 +26,6 @@
 /**
  * struct mtk_video_enc_buf - Private data related to each VB2 buffer.
  * @m2m_buf:   M2M buffer
- * @list:      list that buffer link to
  * @param_change: Types of encode parameter change before encoding this
  *                             buffer
  * @enc_params: Encode parameters changed before encode this buffer
index 7243604a82a5bb26b625c0de94572b048f6d72b2..724ae7c2ab3ba2090727b3aa8410fe5dbf5ca10e 100644 (file)
@@ -635,7 +635,7 @@ OUT_LOAD_FW:
 }
 EXPORT_SYMBOL_GPL(vpu_load_firmware);
 
-static void vpu_init_ipi_handler(const void *data, unsigned int len, void *priv)
+static void vpu_init_ipi_handler(void *data, unsigned int len, void *priv)
 {
        struct mtk_vpu *vpu = priv;
        const struct vpu_run *run = data;
index a56053ff135af7b8e74c4463c7687b4a3a98f113..da05f3e74081084dd965284ecfe7e38af2fef99c 100644 (file)
@@ -17,7 +17,7 @@
  * VPU interfaces with other blocks by share memory and interrupt.
  */
 
-typedef void (*ipi_handler_t) (const void *data,
+typedef void (*ipi_handler_t) (void *data,
                               unsigned int len,
                               void *priv);
 
index a1fcb616b256a22c4cdda03437c5bb799ef48447..60fbb91400355c236b8f0b245cff509fc10516cf 100644 (file)
@@ -1785,7 +1785,7 @@ static int npcm_video_probe(struct platform_device *pdev)
        return 0;
 }
 
-static int npcm_video_remove(struct platform_device *pdev)
+static void npcm_video_remove(struct platform_device *pdev)
 {
        struct device *dev = &pdev->dev;
        struct v4l2_device *v4l2_dev = dev_get_drvdata(dev);
@@ -1798,8 +1798,6 @@ static int npcm_video_remove(struct platform_device *pdev)
        if (video->ece.enable)
                npcm_video_ece_stop(video);
        of_reserved_mem_device_release(dev);
-
-       return 0;
 }
 
 static const struct of_device_id npcm_video_match[] = {
@@ -1816,7 +1814,7 @@ static struct platform_driver npcm_video_driver = {
                .of_match_table = npcm_video_match,
        },
        .probe = npcm_video_probe,
-       .remove = npcm_video_remove,
+       .remove_new = npcm_video_remove,
 };
 
 module_platform_driver(npcm_video_driver);
index 64112b63298ca0a91364b723d7a3dd49b6a26a77..cc97790ed30f6a79bfdd3787dea0bb6f698a3d9e 100644 (file)
@@ -1373,6 +1373,8 @@ static bool mxc_jpeg_source_change(struct mxc_jpeg_ctx *ctx,
                q_data_cap->crop.top = 0;
                q_data_cap->crop.width = jpeg_src_buf->w;
                q_data_cap->crop.height = jpeg_src_buf->h;
+               q_data_cap->bytesperline[0] = 0;
+               q_data_cap->bytesperline[1] = 0;
 
                /*
                 * align up the resolution for CAST IP,
@@ -1752,6 +1754,14 @@ static u32 mxc_jpeg_get_image_format(struct device *dev,
 
 static void mxc_jpeg_bytesperline(struct mxc_jpeg_q_data *q, u32 precision)
 {
+       u32 bytesperline[2];
+
+       bytesperline[0] = q->bytesperline[0];
+       bytesperline[1] = q->bytesperline[0];   /*imx-jpeg only support the same line pitch*/
+       v4l_bound_align_image(&bytesperline[0], 0, MXC_JPEG_MAX_LINE, 2,
+                             &bytesperline[1], 0, MXC_JPEG_MAX_LINE, 2,
+                             0);
+
        /* Bytes distance between the leftmost pixels in two adjacent lines */
        if (q->fmt->fourcc == V4L2_PIX_FMT_JPEG) {
                /* bytesperline unused for compressed formats */
@@ -1775,6 +1785,12 @@ static void mxc_jpeg_bytesperline(struct mxc_jpeg_q_data *q, u32 precision)
                q->bytesperline[0] = q->w_adjusted * DIV_ROUND_UP(precision, 8);
                q->bytesperline[1] = 0;
        }
+
+       if (q->fmt->fourcc != V4L2_PIX_FMT_JPEG) {
+               q->bytesperline[0] = max(q->bytesperline[0], bytesperline[0]);
+               if (q->fmt->mem_planes > 1)
+                       q->bytesperline[1] = max(q->bytesperline[1], bytesperline[1]);
+       }
 }
 
 static void mxc_jpeg_sizeimage(struct mxc_jpeg_q_data *q)
index dc4afeeff5b65bb77c1674f9cc19ad3a32100378..86e324b21aed2a51b51b337e0985bf00adf20928 100644 (file)
@@ -22,6 +22,7 @@
 #define MXC_JPEG_MIN_HEIGHT            64
 #define MXC_JPEG_MAX_WIDTH             0x2000
 #define MXC_JPEG_MAX_HEIGHT            0x2000
+#define MXC_JPEG_MAX_LINE              0x8000
 #define MXC_JPEG_MAX_CFG_STREAM                0x1000
 #define MXC_JPEG_H_ALIGN               3
 #define MXC_JPEG_W_ALIGN               3
index f73facb97dc5036d111ceeb31897832140519ceb..c2013995049c603af2a57ddd326abc84450e5d95 100644 (file)
@@ -506,7 +506,7 @@ err_pm:
        return ret;
 }
 
-static int mxc_isi_remove(struct platform_device *pdev)
+static void mxc_isi_remove(struct platform_device *pdev)
 {
        struct mxc_isi_dev *isi = platform_get_drvdata(pdev);
        unsigned int i;
@@ -523,8 +523,6 @@ static int mxc_isi_remove(struct platform_device *pdev)
        mxc_isi_v4l2_cleanup(isi);
 
        pm_runtime_disable(isi->dev);
-
-       return 0;
 }
 
 static const struct of_device_id mxc_isi_of_match[] = {
@@ -537,7 +535,7 @@ MODULE_DEVICE_TABLE(of, mxc_isi_of_match);
 
 static struct platform_driver mxc_isi_driver = {
        .probe          = mxc_isi_probe,
-       .remove         = mxc_isi_remove,
+       .remove_new     = mxc_isi_remove,
        .driver = {
                .of_match_table = mxc_isi_of_match,
                .name           = MXC_ISI_DRIVER_NAME,
index 575f173373887543deb20b08841fc43212367706..93a55c97cd1738f72a34b11c2fee5cc2e3c6f6ca 100644 (file)
@@ -161,7 +161,6 @@ mxc_isi_crossbar_xlate_streams(struct mxc_isi_crossbar *xbar,
 
        pad = media_pad_remote_pad_first(&xbar->pads[sink_pad]);
        sd = media_entity_to_v4l2_subdev(pad->entity);
-
        if (!sd) {
                dev_dbg(xbar->isi->dev,
                        "no entity connected to crossbar input %u\n",
@@ -469,7 +468,8 @@ int mxc_isi_crossbar_init(struct mxc_isi_dev *isi)
        }
 
        for (i = 0; i < xbar->num_sinks; ++i)
-               xbar->pads[i].flags = MEDIA_PAD_FL_SINK;
+               xbar->pads[i].flags = MEDIA_PAD_FL_SINK
+                                   | MEDIA_PAD_FL_MUST_CONNECT;
        for (i = 0; i < xbar->num_sources; ++i)
                xbar->pads[i + xbar->num_sinks].flags = MEDIA_PAD_FL_SOURCE;
 
index 19e80b95ffeaa39ce7c02366b30c97b34f31d08c..5623914f95e649000a353f4eb8ecc3b02e9a0a95 100644 (file)
@@ -215,8 +215,7 @@ static void mxc_isi_channel_set_csc(struct mxc_isi_pipe *pipe,
                [MXC_ISI_ENC_RGB] = "RGB",
                [MXC_ISI_ENC_YUV] = "YUV",
        };
-       const u32 *coeffs;
-       bool cscen = true;
+       const u32 *coeffs = NULL;
        u32 val;
 
        val = mxc_isi_read(pipe, CHNL_IMG_CTRL);
@@ -235,14 +234,13 @@ static void mxc_isi_channel_set_csc(struct mxc_isi_pipe *pipe,
                val |= CHNL_IMG_CTRL_CSC_MODE(CHNL_IMG_CTRL_CSC_MODE_RGB2YCBCR);
        } else {
                /* Bypass CSC */
-               cscen = false;
                val |= CHNL_IMG_CTRL_CSC_BYPASS;
        }
 
        dev_dbg(pipe->isi->dev, "CSC: %s -> %s\n",
                encodings[in_encoding], encodings[out_encoding]);
 
-       if (cscen) {
+       if (coeffs) {
                mxc_isi_write(pipe, CHNL_CSC_COEFF0, coeffs[0]);
                mxc_isi_write(pipe, CHNL_CSC_COEFF1, coeffs[1]);
                mxc_isi_write(pipe, CHNL_CSC_COEFF2, coeffs[2]);
@@ -253,7 +251,7 @@ static void mxc_isi_channel_set_csc(struct mxc_isi_pipe *pipe,
 
        mxc_isi_write(pipe, CHNL_IMG_CTRL, val);
 
-       *bypass = !cscen;
+       *bypass = !coeffs;
 }
 
 void mxc_isi_channel_set_alpha(struct mxc_isi_pipe *pipe, u8 alpha)
index 7ef341bf21cc8b5a77c3bbd13ceb670e0201fdea..6a77de3744543132bfdb538b3c465cae6bd7e550 100644 (file)
@@ -427,7 +427,6 @@ enum venus_inst_modes {
  * @error:     an error returned during last HFI sync operation
  * @session_error:     a flag rised by HFI interface in case of session error
  * @ops:               HFI operations
- * @priv:      a private for HFI operations callbacks
  * @session_type:      the type of the session (decoder or encoder)
  * @hprop:     a union used as a holder by get property
  * @core_acquired:     the Core has been acquired
index ed788e991f74b386c1b164f276e1a56a9c37a6fd..c7fc718a30a5e2e358f26ea52ceed440d4076654 100644 (file)
@@ -14,6 +14,22 @@ config VIDEO_RENESAS_CEU
        help
          This is a v4l2 driver for the Renesas CEU Interface
 
+config VIDEO_RCAR_CSI2
+       tristate "R-Car MIPI CSI-2 Receiver"
+       depends on V4L_PLATFORM_DRIVERS
+       depends on VIDEO_DEV && OF
+       depends on ARCH_RENESAS || COMPILE_TEST
+       select MEDIA_CONTROLLER
+       select VIDEO_V4L2_SUBDEV_API
+       select RESET_CONTROLLER
+       select V4L2_FWNODE
+       help
+         Support for Renesas R-Car MIPI CSI-2 receiver.
+         Supports R-Car Gen3 and RZ/G2 SoCs.
+
+         To compile this driver as a module, choose M here: the
+         module will be called rcar-csi2.
+
 config VIDEO_RCAR_ISP
        tristate "R-Car Image Signal Processor (ISP)"
        depends on V4L_PLATFORM_DRIVERS
index 55854e8688870f553c0925fbde52d78fa958c0cd..50774a20330c91fe6285a2d515b7ad3fb4643fc3 100644 (file)
@@ -7,6 +7,7 @@ obj-y += rcar-vin/
 obj-y += rzg2l-cru/
 obj-y += vsp1/
 
+obj-$(CONFIG_VIDEO_RCAR_CSI2) += rcar-csi2.o
 obj-$(CONFIG_VIDEO_RCAR_DRIF) += rcar_drif.o
 obj-$(CONFIG_VIDEO_RCAR_ISP) += rcar-isp.o
 obj-$(CONFIG_VIDEO_RENESAS_CEU) += renesas-ceu.o
index 530d65fc546bc63f3082b076729d98a841cf5d2f..4512ac338ca53be26298b08b9f254aff17cf0209 100644 (file)
@@ -518,6 +518,7 @@ static void risp_remove(struct platform_device *pdev)
 static struct platform_driver rcar_isp_driver = {
        .driver = {
                .name = "rcar-isp",
+               .suppress_bind_attrs = true,
                .of_match_table = risp_of_id_table,
        },
        .probe = risp_probe,
index de55fe63d84cf8fd7c660c5f62004b8d93274af3..2ec857ab83cb4e7c4da40b3ca2e7e09571d3ea77 100644 (file)
@@ -1,20 +1,4 @@
 # SPDX-License-Identifier: GPL-2.0
-config VIDEO_RCAR_CSI2
-       tristate "R-Car MIPI CSI-2 Receiver"
-       depends on V4L_PLATFORM_DRIVERS
-       depends on VIDEO_DEV && OF
-       depends on ARCH_RENESAS || COMPILE_TEST
-       select MEDIA_CONTROLLER
-       select VIDEO_V4L2_SUBDEV_API
-       select RESET_CONTROLLER
-       select V4L2_FWNODE
-       help
-         Support for Renesas R-Car MIPI CSI-2 receiver.
-         Supports R-Car Gen3 and RZ/G2 SoCs.
-
-         To compile this driver as a module, choose M here: the
-         module will be called rcar-csi2.
-
 config VIDEO_RCAR_VIN
        tristate "R-Car Video Input (VIN) Driver"
        depends on V4L_PLATFORM_DRIVERS
index 00d809f5d2c100ffe2f7d385db35f4c07995f90b..5938ad6290c84e2d0ee6e84aed93b0d10963c028 100644 (file)
@@ -1,5 +1,4 @@
 # SPDX-License-Identifier: GPL-2.0
 rcar-vin-objs = rcar-core.o rcar-dma.o rcar-v4l2.o
 
-obj-$(CONFIG_VIDEO_RCAR_CSI2) += rcar-csi2.o
 obj-$(CONFIG_VIDEO_RCAR_VIN) += rcar-vin.o
index 811603f18af09359d76c120f2c7d0c7772302ad6..a5a99b004322bc29e65f2b032f8b06e6514bf83b 100644 (file)
@@ -133,9 +133,6 @@ struct rzg2l_cru_dev {
        struct v4l2_pix_format format;
 };
 
-void rzg2l_cru_vclk_unprepare(struct rzg2l_cru_dev *cru);
-int rzg2l_cru_vclk_prepare(struct rzg2l_cru_dev *cru);
-
 int rzg2l_cru_start_image_processing(struct rzg2l_cru_dev *cru);
 void rzg2l_cru_stop_image_processing(struct rzg2l_cru_dev *cru);
 
index d20f4eff93a423ed071467b1f9b9d8295d995595..e68fcdaea207aa3a2386940628deed53e5f2ee07 100644 (file)
@@ -108,6 +108,7 @@ struct rzg2l_csi2 {
        struct reset_control *presetn;
        struct reset_control *cmn_rstb;
        struct clk *sysclk;
+       struct clk *vclk;
        unsigned long vclk_rate;
 
        struct v4l2_subdev subdev;
@@ -361,7 +362,7 @@ static int rzg2l_csi2_dphy_setting(struct v4l2_subdev *sd, bool on)
        return rzg2l_csi2_dphy_disable(csi2);
 }
 
-static void rzg2l_csi2_mipi_link_enable(struct rzg2l_csi2 *csi2)
+static int rzg2l_csi2_mipi_link_enable(struct rzg2l_csi2 *csi2)
 {
        unsigned long vclk_rate = csi2->vclk_rate / HZ_PER_MHZ;
        u32 frrskw, frrclk, frrskw_coeff, frrclk_coeff;
@@ -386,11 +387,15 @@ static void rzg2l_csi2_mipi_link_enable(struct rzg2l_csi2 *csi2)
        rzg2l_csi2_write(csi2, CSI2nDTEL, 0xf778ff0f);
        rzg2l_csi2_write(csi2, CSI2nDTEH, 0x00ffff1f);
 
+       clk_disable_unprepare(csi2->vclk);
+
        /* Enable LINK reception */
        rzg2l_csi2_write(csi2, CSI2nMCT3, CSI2nMCT3_RXEN);
+
+       return clk_prepare_enable(csi2->vclk);
 }
 
-static void rzg2l_csi2_mipi_link_disable(struct rzg2l_csi2 *csi2)
+static int rzg2l_csi2_mipi_link_disable(struct rzg2l_csi2 *csi2)
 {
        unsigned int timeout = VSRSTS_RETRIES;
 
@@ -409,18 +414,21 @@ static void rzg2l_csi2_mipi_link_disable(struct rzg2l_csi2 *csi2)
 
        if (!timeout)
                dev_err(csi2->dev, "Clearing CSI2nRTST.VSRSTS timed out\n");
+
+       return 0;
 }
 
 static int rzg2l_csi2_mipi_link_setting(struct v4l2_subdev *sd, bool on)
 {
        struct rzg2l_csi2 *csi2 = sd_to_csi2(sd);
+       int ret;
 
        if (on)
-               rzg2l_csi2_mipi_link_enable(csi2);
+               ret = rzg2l_csi2_mipi_link_enable(csi2);
        else
-               rzg2l_csi2_mipi_link_disable(csi2);
+               ret = rzg2l_csi2_mipi_link_disable(csi2);
 
-       return 0;
+       return ret;
 }
 
 static int rzg2l_csi2_s_stream(struct v4l2_subdev *sd, int enable)
@@ -731,7 +739,6 @@ static const struct media_entity_operations rzg2l_csi2_entity_ops = {
 static int rzg2l_csi2_probe(struct platform_device *pdev)
 {
        struct rzg2l_csi2 *csi2;
-       struct clk *vclk;
        int ret;
 
        csi2 = devm_kzalloc(&pdev->dev, sizeof(*csi2), GFP_KERNEL);
@@ -757,12 +764,11 @@ static int rzg2l_csi2_probe(struct platform_device *pdev)
                return dev_err_probe(&pdev->dev, PTR_ERR(csi2->sysclk),
                                     "Failed to get system clk\n");
 
-       vclk = clk_get(&pdev->dev, "video");
-       if (IS_ERR(vclk))
-               return dev_err_probe(&pdev->dev, PTR_ERR(vclk),
+       csi2->vclk = devm_clk_get(&pdev->dev, "video");
+       if (IS_ERR(csi2->vclk))
+               return dev_err_probe(&pdev->dev, PTR_ERR(csi2->vclk),
                                     "Failed to get video clock\n");
-       csi2->vclk_rate = clk_get_rate(vclk);
-       clk_put(vclk);
+       csi2->vclk_rate = clk_get_rate(csi2->vclk);
 
        csi2->dev = &pdev->dev;
 
@@ -834,7 +840,7 @@ static void rzg2l_csi2_remove(struct platform_device *pdev)
        pm_runtime_disable(&pdev->dev);
 }
 
-static int __maybe_unused rzg2l_csi2_pm_runtime_suspend(struct device *dev)
+static int rzg2l_csi2_pm_runtime_suspend(struct device *dev)
 {
        struct rzg2l_csi2 *csi2 = dev_get_drvdata(dev);
 
@@ -843,7 +849,7 @@ static int __maybe_unused rzg2l_csi2_pm_runtime_suspend(struct device *dev)
        return 0;
 }
 
-static int __maybe_unused rzg2l_csi2_pm_runtime_resume(struct device *dev)
+static int rzg2l_csi2_pm_runtime_resume(struct device *dev)
 {
        struct rzg2l_csi2 *csi2 = dev_get_drvdata(dev);
 
@@ -851,7 +857,8 @@ static int __maybe_unused rzg2l_csi2_pm_runtime_resume(struct device *dev)
 }
 
 static const struct dev_pm_ops rzg2l_csi2_pm_ops = {
-       SET_RUNTIME_PM_OPS(rzg2l_csi2_pm_runtime_suspend, rzg2l_csi2_pm_runtime_resume, NULL)
+       RUNTIME_PM_OPS(rzg2l_csi2_pm_runtime_suspend,
+                      rzg2l_csi2_pm_runtime_resume, NULL)
 };
 
 static const struct of_device_id rzg2l_csi2_of_table[] = {
@@ -865,7 +872,7 @@ static struct platform_driver rzg2l_csi2_pdrv = {
        .driver = {
                .name = "rzg2l-csi2",
                .of_match_table = rzg2l_csi2_of_table,
-               .pm = &rzg2l_csi2_pm_ops,
+               .pm = pm_ptr(&rzg2l_csi2_pm_ops),
        },
 };
 
index 9f351a05893e6c613550ddcd21d5e87d5f9990c5..ac8ebae4ed079bee1e0a28b59a7d189bfac1be18 100644 (file)
@@ -5,6 +5,7 @@
  * Copyright (C) 2022 Renesas Electronics Corp.
  */
 
+#include <linux/delay.h>
 #include "rzg2l-cru.h"
 
 struct rzg2l_cru_ip_format {
@@ -71,26 +72,17 @@ static int rzg2l_cru_ip_s_stream(struct v4l2_subdev *sd, int enable)
                if (ret)
                        return ret;
 
+               fsleep(1000);
+
                ret = rzg2l_cru_start_image_processing(cru);
                if (ret) {
                        v4l2_subdev_call(cru->ip.remote, video, post_streamoff);
                        return ret;
                }
 
-               rzg2l_cru_vclk_unprepare(cru);
-
                ret = v4l2_subdev_call(cru->ip.remote, video, s_stream, enable);
-               if (ret == -ENOIOCTLCMD)
-                       ret = 0;
-               if (!ret) {
-                       ret = rzg2l_cru_vclk_prepare(cru);
-                       if (!ret)
-                               return 0;
-               } else {
-                       /* enable back vclk so that s_stream in error path disables it */
-                       if (rzg2l_cru_vclk_prepare(cru))
-                               dev_err(cru->dev, "Failed to enable vclk\n");
-               }
+               if (!ret || ret == -ENOIOCTLCMD)
+                       return 0;
 
                s_stream_ret = ret;
 
index d0ffa90bc6567ccc8c46be4fb3a61fb534945f99..b16b8af6e8f8cb067fbdd71cf9b49cb4784eaaea 100644 (file)
@@ -430,13 +430,6 @@ int rzg2l_cru_start_image_processing(struct rzg2l_cru_dev *cru)
 
        spin_lock_irqsave(&cru->qlock, flags);
 
-       /* Initialize image convert */
-       ret = rzg2l_cru_initialize_image_conv(cru, fmt);
-       if (ret) {
-               spin_unlock_irqrestore(&cru->qlock, flags);
-               return ret;
-       }
-
        /* Select a video input */
        rzg2l_cru_write(cru, CRUnCTRL, CRUnCTRL_VINSEL(0));
 
@@ -450,6 +443,13 @@ int rzg2l_cru_start_image_processing(struct rzg2l_cru_dev *cru)
        /* Initialize the AXI master */
        rzg2l_cru_initialize_axi(cru);
 
+       /* Initialize image convert */
+       ret = rzg2l_cru_initialize_image_conv(cru, fmt);
+       if (ret) {
+               spin_unlock_irqrestore(&cru->qlock, flags);
+               return ret;
+       }
+
        /* Enable interrupt */
        rzg2l_cru_write(cru, CRUnIE, CRUnIE_EFE);
 
@@ -461,16 +461,6 @@ int rzg2l_cru_start_image_processing(struct rzg2l_cru_dev *cru)
        return 0;
 }
 
-void rzg2l_cru_vclk_unprepare(struct rzg2l_cru_dev *cru)
-{
-       clk_disable_unprepare(cru->vclk);
-}
-
-int rzg2l_cru_vclk_prepare(struct rzg2l_cru_dev *cru)
-{
-       return clk_prepare_enable(cru->vclk);
-}
-
 static int rzg2l_cru_set_stream(struct rzg2l_cru_dev *cru, int on)
 {
        struct media_pipeline *pipe;
@@ -499,39 +489,24 @@ static int rzg2l_cru_set_stream(struct rzg2l_cru_dev *cru, int on)
 
                video_device_pipeline_stop(&cru->vdev);
 
-               pm_runtime_put_sync(cru->dev);
-               clk_disable_unprepare(cru->vclk);
-
                return stream_off_ret;
        }
 
-       ret = pm_runtime_resume_and_get(cru->dev);
-       if (ret)
-               return ret;
-
-       ret = clk_prepare_enable(cru->vclk);
-       if (ret)
-               goto err_pm_put;
-
        ret = rzg2l_cru_mc_validate_format(cru, sd, pad);
        if (ret)
-               goto err_vclk_disable;
+               return ret;
 
        pipe = media_entity_pipeline(&sd->entity) ? : &cru->vdev.pipe;
        ret = video_device_pipeline_start(&cru->vdev, pipe);
        if (ret)
-               goto err_vclk_disable;
+               return ret;
 
        ret = v4l2_subdev_call(sd, video, pre_streamon, 0);
-       if (ret == -ENOIOCTLCMD)
-               ret = 0;
-       if (ret)
+       if (ret && ret != -ENOIOCTLCMD)
                goto pipe_line_stop;
 
        ret = v4l2_subdev_call(sd, video, s_stream, 1);
-       if (ret == -ENOIOCTLCMD)
-               ret = 0;
-       if (ret)
+       if (ret && ret != -ENOIOCTLCMD)
                goto err_s_stream;
 
        return 0;
@@ -542,12 +517,6 @@ err_s_stream:
 pipe_line_stop:
        video_device_pipeline_stop(&cru->vdev);
 
-err_vclk_disable:
-       clk_disable_unprepare(cru->vclk);
-
-err_pm_put:
-       pm_runtime_put_sync(cru->dev);
-
        return ret;
 }
 
@@ -646,25 +615,33 @@ static int rzg2l_cru_start_streaming_vq(struct vb2_queue *vq, unsigned int count
        struct rzg2l_cru_dev *cru = vb2_get_drv_priv(vq);
        int ret;
 
+       ret = pm_runtime_resume_and_get(cru->dev);
+       if (ret)
+               return ret;
+
+       ret = clk_prepare_enable(cru->vclk);
+       if (ret)
+               goto err_pm_put;
+
        /* Release reset state */
        ret = reset_control_deassert(cru->aresetn);
        if (ret) {
                dev_err(cru->dev, "failed to deassert aresetn\n");
-               return ret;
+               goto err_vclk_disable;
        }
 
        ret = reset_control_deassert(cru->presetn);
        if (ret) {
                reset_control_assert(cru->aresetn);
                dev_err(cru->dev, "failed to deassert presetn\n");
-               return ret;
+               goto assert_aresetn;
        }
 
        ret = request_irq(cru->image_conv_irq, rzg2l_cru_irq,
                          IRQF_SHARED, KBUILD_MODNAME, cru);
        if (ret) {
                dev_err(cru->dev, "failed to request irq\n");
-               goto assert_resets;
+               goto assert_presetn;
        }
 
        /* Allocate scratch buffer. */
@@ -696,10 +673,18 @@ out:
 free_image_conv_irq:
        free_irq(cru->image_conv_irq, cru);
 
-assert_resets:
+assert_presetn:
        reset_control_assert(cru->presetn);
+
+assert_aresetn:
        reset_control_assert(cru->aresetn);
 
+err_vclk_disable:
+       clk_disable_unprepare(cru->vclk);
+
+err_pm_put:
+       pm_runtime_put_sync(cru->dev);
+
        return ret;
 }
 
@@ -714,9 +699,11 @@ static void rzg2l_cru_stop_streaming_vq(struct vb2_queue *vq)
                          cru->scratch, cru->scratch_phys);
 
        free_irq(cru->image_conv_irq, cru);
-       reset_control_assert(cru->presetn);
-
        return_unused_buffers(cru, VB2_BUF_STATE_ERROR);
+
+       reset_control_assert(cru->presetn);
+       clk_disable_unprepare(cru->vclk);
+       pm_runtime_put_sync(cru->dev);
 }
 
 static const struct vb2_ops rzg2l_cru_qops = {
index c381c22135a217b71e82f8282699cfbe14749ded..2bddb4fa8a5cd163f383a9baa9fc3e2ca1ea87b8 100644 (file)
@@ -47,13 +47,18 @@ enum rkisp1_plane {
  * @fourcc: pixel format
  * @fmt_type: helper filed for pixel format
  * @uv_swap: if cb cr swapped, for yuv
+ * @yc_swap: if y and cb/cr swapped, for yuv
+ * @byte_swap: if byte pairs are swapped, for raw
  * @write_format: defines how YCbCr self picture data is written to memory
- * @output_format: defines sp output format
+ * @output_format: defines the output format (RKISP1_CIF_MI_INIT_MP_OUTPUT_* for
+ *     the main path and RKISP1_MI_CTRL_SP_OUTPUT_* for the self path)
  * @mbus: the mbus code on the src resizer pad that matches the pixel format
  */
 struct rkisp1_capture_fmt_cfg {
        u32 fourcc;
-       u8 uv_swap;
+       u32 uv_swap : 1;
+       u32 yc_swap : 1;
+       u32 byte_swap : 1;
        u32 write_format;
        u32 output_format;
        u32 mbus;
@@ -94,36 +99,50 @@ static const struct rkisp1_capture_fmt_cfg rkisp1_mp_fmts[] = {
                .fourcc = V4L2_PIX_FMT_YUYV,
                .uv_swap = 0,
                .write_format = RKISP1_MI_CTRL_MP_WRITE_YUVINT,
+               .output_format = RKISP1_CIF_MI_INIT_MP_OUTPUT_YUV422,
+               .mbus = MEDIA_BUS_FMT_YUYV8_2X8,
+       }, {
+               .fourcc = V4L2_PIX_FMT_UYVY,
+               .uv_swap = 0,
+               .yc_swap = 1,
+               .write_format = RKISP1_MI_CTRL_MP_WRITE_YUVINT,
+               .output_format = RKISP1_CIF_MI_INIT_MP_OUTPUT_YUV422,
                .mbus = MEDIA_BUS_FMT_YUYV8_2X8,
        }, {
                .fourcc = V4L2_PIX_FMT_YUV422P,
                .uv_swap = 0,
                .write_format = RKISP1_MI_CTRL_MP_WRITE_YUV_PLA_OR_RAW8,
+               .output_format = RKISP1_CIF_MI_INIT_MP_OUTPUT_YUV422,
                .mbus = MEDIA_BUS_FMT_YUYV8_2X8,
        }, {
                .fourcc = V4L2_PIX_FMT_NV16,
                .uv_swap = 0,
                .write_format = RKISP1_MI_CTRL_MP_WRITE_YUV_SPLA,
+               .output_format = RKISP1_CIF_MI_INIT_MP_OUTPUT_YUV422,
                .mbus = MEDIA_BUS_FMT_YUYV8_2X8,
        }, {
                .fourcc = V4L2_PIX_FMT_NV61,
                .uv_swap = 1,
                .write_format = RKISP1_MI_CTRL_MP_WRITE_YUV_SPLA,
+               .output_format = RKISP1_CIF_MI_INIT_MP_OUTPUT_YUV422,
                .mbus = MEDIA_BUS_FMT_YUYV8_2X8,
        }, {
                .fourcc = V4L2_PIX_FMT_NV16M,
                .uv_swap = 0,
                .write_format = RKISP1_MI_CTRL_MP_WRITE_YUV_SPLA,
+               .output_format = RKISP1_CIF_MI_INIT_MP_OUTPUT_YUV422,
                .mbus = MEDIA_BUS_FMT_YUYV8_2X8,
        }, {
                .fourcc = V4L2_PIX_FMT_NV61M,
                .uv_swap = 1,
                .write_format = RKISP1_MI_CTRL_MP_WRITE_YUV_SPLA,
+               .output_format = RKISP1_CIF_MI_INIT_MP_OUTPUT_YUV422,
                .mbus = MEDIA_BUS_FMT_YUYV8_2X8,
        }, {
                .fourcc = V4L2_PIX_FMT_YVU422M,
                .uv_swap = 1,
                .write_format = RKISP1_MI_CTRL_MP_WRITE_YUV_PLA_OR_RAW8,
+               .output_format = RKISP1_CIF_MI_INIT_MP_OUTPUT_YUV422,
                .mbus = MEDIA_BUS_FMT_YUYV8_2X8,
        },
        /* yuv400 */
@@ -131,6 +150,7 @@ static const struct rkisp1_capture_fmt_cfg rkisp1_mp_fmts[] = {
                .fourcc = V4L2_PIX_FMT_GREY,
                .uv_swap = 0,
                .write_format = RKISP1_MI_CTRL_MP_WRITE_YUV_PLA_OR_RAW8,
+               .output_format = RKISP1_CIF_MI_INIT_MP_OUTPUT_YUV400,
                .mbus = MEDIA_BUS_FMT_YUYV8_2X8,
        },
        /* yuv420 */
@@ -138,81 +158,107 @@ static const struct rkisp1_capture_fmt_cfg rkisp1_mp_fmts[] = {
                .fourcc = V4L2_PIX_FMT_NV21,
                .uv_swap = 1,
                .write_format = RKISP1_MI_CTRL_MP_WRITE_YUV_SPLA,
+               .output_format = RKISP1_CIF_MI_INIT_MP_OUTPUT_YUV420,
                .mbus = MEDIA_BUS_FMT_YUYV8_1_5X8,
        }, {
                .fourcc = V4L2_PIX_FMT_NV12,
                .uv_swap = 0,
                .write_format = RKISP1_MI_CTRL_MP_WRITE_YUV_SPLA,
+               .output_format = RKISP1_CIF_MI_INIT_MP_OUTPUT_YUV420,
                .mbus = MEDIA_BUS_FMT_YUYV8_1_5X8,
        }, {
                .fourcc = V4L2_PIX_FMT_NV21M,
                .uv_swap = 1,
                .write_format = RKISP1_MI_CTRL_MP_WRITE_YUV_SPLA,
+               .output_format = RKISP1_CIF_MI_INIT_MP_OUTPUT_YUV420,
                .mbus = MEDIA_BUS_FMT_YUYV8_1_5X8,
        }, {
                .fourcc = V4L2_PIX_FMT_NV12M,
                .uv_swap = 0,
                .write_format = RKISP1_MI_CTRL_MP_WRITE_YUV_SPLA,
+               .output_format = RKISP1_CIF_MI_INIT_MP_OUTPUT_YUV420,
                .mbus = MEDIA_BUS_FMT_YUYV8_1_5X8,
        }, {
                .fourcc = V4L2_PIX_FMT_YUV420,
                .uv_swap = 0,
                .write_format = RKISP1_MI_CTRL_MP_WRITE_YUV_PLA_OR_RAW8,
+               .output_format = RKISP1_CIF_MI_INIT_MP_OUTPUT_YUV420,
                .mbus = MEDIA_BUS_FMT_YUYV8_1_5X8,
        }, {
                .fourcc = V4L2_PIX_FMT_YVU420,
                .uv_swap = 1,
                .write_format = RKISP1_MI_CTRL_MP_WRITE_YUV_PLA_OR_RAW8,
+               .output_format = RKISP1_CIF_MI_INIT_MP_OUTPUT_YUV420,
                .mbus = MEDIA_BUS_FMT_YUYV8_1_5X8,
        },
        /* raw */
        {
                .fourcc = V4L2_PIX_FMT_SRGGB8,
                .write_format = RKISP1_MI_CTRL_MP_WRITE_YUV_PLA_OR_RAW8,
+               .output_format = RKISP1_CIF_MI_INIT_MP_OUTPUT_RAW8,
                .mbus = MEDIA_BUS_FMT_SRGGB8_1X8,
        }, {
                .fourcc = V4L2_PIX_FMT_SGRBG8,
                .write_format = RKISP1_MI_CTRL_MP_WRITE_YUV_PLA_OR_RAW8,
+               .output_format = RKISP1_CIF_MI_INIT_MP_OUTPUT_RAW8,
                .mbus = MEDIA_BUS_FMT_SGRBG8_1X8,
        }, {
                .fourcc = V4L2_PIX_FMT_SGBRG8,
                .write_format = RKISP1_MI_CTRL_MP_WRITE_YUV_PLA_OR_RAW8,
+               .output_format = RKISP1_CIF_MI_INIT_MP_OUTPUT_RAW8,
                .mbus = MEDIA_BUS_FMT_SGBRG8_1X8,
        }, {
                .fourcc = V4L2_PIX_FMT_SBGGR8,
                .write_format = RKISP1_MI_CTRL_MP_WRITE_YUV_PLA_OR_RAW8,
+               .output_format = RKISP1_CIF_MI_INIT_MP_OUTPUT_RAW8,
                .mbus = MEDIA_BUS_FMT_SBGGR8_1X8,
        }, {
                .fourcc = V4L2_PIX_FMT_SRGGB10,
+               .byte_swap = 1,
                .write_format = RKISP1_MI_CTRL_MP_WRITE_RAW12,
+               .output_format = RKISP1_CIF_MI_INIT_MP_OUTPUT_RAW10,
                .mbus = MEDIA_BUS_FMT_SRGGB10_1X10,
        }, {
                .fourcc = V4L2_PIX_FMT_SGRBG10,
+               .byte_swap = 1,
                .write_format = RKISP1_MI_CTRL_MP_WRITE_RAW12,
+               .output_format = RKISP1_CIF_MI_INIT_MP_OUTPUT_RAW10,
                .mbus = MEDIA_BUS_FMT_SGRBG10_1X10,
        }, {
                .fourcc = V4L2_PIX_FMT_SGBRG10,
+               .byte_swap = 1,
                .write_format = RKISP1_MI_CTRL_MP_WRITE_RAW12,
+               .output_format = RKISP1_CIF_MI_INIT_MP_OUTPUT_RAW10,
                .mbus = MEDIA_BUS_FMT_SGBRG10_1X10,
        }, {
                .fourcc = V4L2_PIX_FMT_SBGGR10,
+               .byte_swap = 1,
                .write_format = RKISP1_MI_CTRL_MP_WRITE_RAW12,
+               .output_format = RKISP1_CIF_MI_INIT_MP_OUTPUT_RAW10,
                .mbus = MEDIA_BUS_FMT_SBGGR10_1X10,
        }, {
                .fourcc = V4L2_PIX_FMT_SRGGB12,
+               .byte_swap = 1,
                .write_format = RKISP1_MI_CTRL_MP_WRITE_RAW12,
+               .output_format = RKISP1_CIF_MI_INIT_MP_OUTPUT_RAW12,
                .mbus = MEDIA_BUS_FMT_SRGGB12_1X12,
        }, {
                .fourcc = V4L2_PIX_FMT_SGRBG12,
+               .byte_swap = 1,
                .write_format = RKISP1_MI_CTRL_MP_WRITE_RAW12,
+               .output_format = RKISP1_CIF_MI_INIT_MP_OUTPUT_RAW12,
                .mbus = MEDIA_BUS_FMT_SGRBG12_1X12,
        }, {
                .fourcc = V4L2_PIX_FMT_SGBRG12,
+               .byte_swap = 1,
                .write_format = RKISP1_MI_CTRL_MP_WRITE_RAW12,
+               .output_format = RKISP1_CIF_MI_INIT_MP_OUTPUT_RAW12,
                .mbus = MEDIA_BUS_FMT_SGBRG12_1X12,
        }, {
                .fourcc = V4L2_PIX_FMT_SBGGR12,
+               .byte_swap = 1,
                .write_format = RKISP1_MI_CTRL_MP_WRITE_RAW12,
+               .output_format = RKISP1_CIF_MI_INIT_MP_OUTPUT_RAW12,
                .mbus = MEDIA_BUS_FMT_SBGGR12_1X12,
        },
 };
@@ -229,6 +275,13 @@ static const struct rkisp1_capture_fmt_cfg rkisp1_sp_fmts[] = {
                .write_format = RKISP1_MI_CTRL_SP_WRITE_INT,
                .output_format = RKISP1_MI_CTRL_SP_OUTPUT_YUV422,
                .mbus = MEDIA_BUS_FMT_YUYV8_2X8,
+       }, {
+               .fourcc = V4L2_PIX_FMT_UYVY,
+               .uv_swap = 0,
+               .yc_swap = 1,
+               .write_format = RKISP1_MI_CTRL_SP_WRITE_INT,
+               .output_format = RKISP1_MI_CTRL_SP_OUTPUT_YUV422,
+               .mbus = MEDIA_BUS_FMT_YUYV8_2X8,
        }, {
                .fourcc = V4L2_PIX_FMT_YUV422P,
                .uv_swap = 0,
@@ -442,6 +495,14 @@ static void rkisp1_mp_config(struct rkisp1_capture *cap)
        rkisp1_write(rkisp1, cap->config->mi.cr_size_init,
                     rkisp1_pixfmt_comp_size(pixm, RKISP1_PLANE_CR));
 
+       if (rkisp1_has_feature(rkisp1, MAIN_STRIDE)) {
+               rkisp1_write(rkisp1, RKISP1_CIF_MI_MP_Y_LLENGTH, cap->stride);
+               rkisp1_write(rkisp1, RKISP1_CIF_MI_MP_Y_PIC_WIDTH, pixm->width);
+               rkisp1_write(rkisp1, RKISP1_CIF_MI_MP_Y_PIC_HEIGHT, pixm->height);
+               rkisp1_write(rkisp1, RKISP1_CIF_MI_MP_Y_PIC_SIZE,
+                            cap->stride * pixm->height);
+       }
+
        rkisp1_irq_frame_end_enable(cap);
 
        /* set uv swapping for semiplanar formats */
@@ -454,6 +515,25 @@ static void rkisp1_mp_config(struct rkisp1_capture *cap)
                rkisp1_write(rkisp1, RKISP1_CIF_MI_XTD_FORMAT_CTRL, reg);
        }
 
+       /*
+        * U/V swapping with the MI_XTD_FORMAT_CTRL register only works for
+        * NV12/NV21 and NV16/NV61, so instead use byte swap to support UYVY.
+        * YVYU and VYUY cannot be supported with this method.
+        */
+       if (rkisp1_has_feature(rkisp1, MAIN_STRIDE)) {
+               reg = rkisp1_read(rkisp1, RKISP1_CIF_MI_OUTPUT_ALIGN_FORMAT);
+               if (cap->pix.cfg->yc_swap || cap->pix.cfg->byte_swap)
+                       reg |= RKISP1_CIF_OUTPUT_ALIGN_FORMAT_MP_BYTE_SWAP_BYTES;
+               else
+                       reg &= ~RKISP1_CIF_OUTPUT_ALIGN_FORMAT_MP_BYTE_SWAP_BYTES;
+
+               reg |= RKISP1_CIF_OUTPUT_ALIGN_FORMAT_MP_LSB_ALIGNMENT;
+               rkisp1_write(rkisp1, RKISP1_CIF_MI_OUTPUT_ALIGN_FORMAT, reg);
+
+               rkisp1_write(rkisp1, RKISP1_CIF_MI_INIT,
+                            cap->pix.cfg->output_format);
+       }
+
        rkisp1_mi_config_ctrl(cap);
 
        reg = rkisp1_read(rkisp1, RKISP1_CIF_MI_CTRL);
@@ -479,11 +559,11 @@ static void rkisp1_sp_config(struct rkisp1_capture *cap)
        rkisp1_write(rkisp1, cap->config->mi.cr_size_init,
                     rkisp1_pixfmt_comp_size(pixm, RKISP1_PLANE_CR));
 
-       rkisp1_write(rkisp1, RKISP1_CIF_MI_SP_Y_LLENGTH, cap->sp_y_stride);
+       rkisp1_write(rkisp1, RKISP1_CIF_MI_SP_Y_LLENGTH, cap->stride);
        rkisp1_write(rkisp1, RKISP1_CIF_MI_SP_Y_PIC_WIDTH, pixm->width);
        rkisp1_write(rkisp1, RKISP1_CIF_MI_SP_Y_PIC_HEIGHT, pixm->height);
        rkisp1_write(rkisp1, RKISP1_CIF_MI_SP_Y_PIC_SIZE,
-                    cap->sp_y_stride * pixm->height);
+                    cap->stride * pixm->height);
 
        rkisp1_irq_frame_end_enable(cap);
 
@@ -497,6 +577,20 @@ static void rkisp1_sp_config(struct rkisp1_capture *cap)
                rkisp1_write(rkisp1, RKISP1_CIF_MI_XTD_FORMAT_CTRL, reg);
        }
 
+       /*
+        * U/V swapping with the MI_XTD_FORMAT_CTRL register only works for
+        * NV12/NV21 and NV16/NV61, so instead use byte swap to support UYVY.
+        * YVYU and VYUY cannot be supported with this method.
+        */
+       if (rkisp1_has_feature(rkisp1, MAIN_STRIDE)) {
+               reg = rkisp1_read(rkisp1, RKISP1_CIF_MI_OUTPUT_ALIGN_FORMAT);
+               if (cap->pix.cfg->yc_swap)
+                       reg |= RKISP1_CIF_OUTPUT_ALIGN_FORMAT_SP_BYTE_SWAP_BYTES;
+               else
+                       reg &= ~RKISP1_CIF_OUTPUT_ALIGN_FORMAT_SP_BYTE_SWAP_BYTES;
+               rkisp1_write(rkisp1, RKISP1_CIF_MI_OUTPUT_ALIGN_FORMAT, reg);
+       }
+
        rkisp1_mi_config_ctrl(cap);
 
        mi_ctrl = rkisp1_read(rkisp1, RKISP1_CIF_MI_CTRL);
@@ -640,11 +734,13 @@ static void rkisp1_dummy_buf_destroy(struct rkisp1_capture *cap)
 
 static void rkisp1_set_next_buf(struct rkisp1_capture *cap)
 {
+       u8 shift = rkisp1_has_feature(cap->rkisp1, DMA_34BIT) ? 2 : 0;
+
        cap->buf.curr = cap->buf.next;
        cap->buf.next = NULL;
 
        if (!list_empty(&cap->buf.queue)) {
-               u32 *buff_addr;
+               dma_addr_t *buff_addr;
 
                cap->buf.next = list_first_entry(&cap->buf.queue, struct rkisp1_buffer, queue);
                list_del(&cap->buf.next->queue);
@@ -652,7 +748,7 @@ static void rkisp1_set_next_buf(struct rkisp1_capture *cap)
                buff_addr = cap->buf.next->buff_addr;
 
                rkisp1_write(cap->rkisp1, cap->config->mi.y_base_ad_init,
-                            buff_addr[RKISP1_PLANE_Y]);
+                            buff_addr[RKISP1_PLANE_Y] >> shift);
                /*
                 * In order to support grey format we capture
                 * YUV422 planar format from the camera and
@@ -661,17 +757,17 @@ static void rkisp1_set_next_buf(struct rkisp1_capture *cap)
                if (cap->pix.cfg->fourcc == V4L2_PIX_FMT_GREY) {
                        rkisp1_write(cap->rkisp1,
                                     cap->config->mi.cb_base_ad_init,
-                                    cap->buf.dummy.dma_addr);
+                                    cap->buf.dummy.dma_addr >> shift);
                        rkisp1_write(cap->rkisp1,
                                     cap->config->mi.cr_base_ad_init,
-                                    cap->buf.dummy.dma_addr);
+                                    cap->buf.dummy.dma_addr >> shift);
                } else {
                        rkisp1_write(cap->rkisp1,
                                     cap->config->mi.cb_base_ad_init,
-                                    buff_addr[RKISP1_PLANE_CB]);
+                                    buff_addr[RKISP1_PLANE_CB] >> shift);
                        rkisp1_write(cap->rkisp1,
                                     cap->config->mi.cr_base_ad_init,
-                                    buff_addr[RKISP1_PLANE_CR]);
+                                    buff_addr[RKISP1_PLANE_CR] >> shift);
                }
        } else {
                /*
@@ -679,11 +775,11 @@ static void rkisp1_set_next_buf(struct rkisp1_capture *cap)
                 * throw data if there is no available buffer.
                 */
                rkisp1_write(cap->rkisp1, cap->config->mi.y_base_ad_init,
-                            cap->buf.dummy.dma_addr);
+                            cap->buf.dummy.dma_addr >> shift);
                rkisp1_write(cap->rkisp1, cap->config->mi.cb_base_ad_init,
-                            cap->buf.dummy.dma_addr);
+                            cap->buf.dummy.dma_addr >> shift);
                rkisp1_write(cap->rkisp1, cap->config->mi.cr_base_ad_init,
-                            cap->buf.dummy.dma_addr);
+                            cap->buf.dummy.dma_addr >> shift);
        }
 
        /* Set plane offsets */
@@ -722,6 +818,7 @@ irqreturn_t rkisp1_capture_isr(int irq, void *ctx)
 {
        struct device *dev = ctx;
        struct rkisp1_device *rkisp1 = dev_get_drvdata(dev);
+       unsigned int dev_count = rkisp1_path_count(rkisp1);
        unsigned int i;
        u32 status;
 
@@ -734,7 +831,7 @@ irqreturn_t rkisp1_capture_isr(int irq, void *ctx)
 
        rkisp1_write(rkisp1, RKISP1_CIF_MI_ICR, status);
 
-       for (i = 0; i < ARRAY_SIZE(rkisp1->capture_devs); ++i) {
+       for (i = 0; i < dev_count; ++i) {
                struct rkisp1_capture *cap = &rkisp1->capture_devs[i];
 
                if (!(status & RKISP1_CIF_MI_FRAME(cap)))
@@ -891,6 +988,7 @@ static void rkisp1_cap_stream_enable(struct rkisp1_capture *cap)
 {
        struct rkisp1_device *rkisp1 = cap->rkisp1;
        struct rkisp1_capture *other = &rkisp1->capture_devs[cap->id ^ 1];
+       bool has_self_path = rkisp1_has_feature(rkisp1, SELF_PATH);
 
        cap->ops->set_data_path(cap);
        cap->ops->config(cap);
@@ -899,19 +997,40 @@ static void rkisp1_cap_stream_enable(struct rkisp1_capture *cap)
        spin_lock_irq(&cap->buf.lock);
        rkisp1_set_next_buf(cap);
        cap->ops->enable(cap);
-       /* It's safe to configure ACTIVE and SHADOW registers for the
-        * first stream. While when the second is starting, do NOT
-        * force update because it also updates the first one.
+
+       /*
+        * It's safe to configure ACTIVE and SHADOW registers for the first
+        * stream. While when the second is starting, do NOT force update
+        * because it also updates the first one.
         *
-        * The latter case would drop one more buffer(that is 2) since
-        * there's no buffer in a shadow register when the second FE received.
-        * This's also required because the second FE maybe corrupt
-        * especially when run at 120fps.
+        * The latter case would drop one more buffer(that is 2) since there's
+        * no buffer in a shadow register when the second FE received. This's
+        * also required because the second FE maybe corrupt especially when
+        * run at 120fps.
         */
-       if (!other->is_streaming) {
-               /* force cfg update */
-               rkisp1_write(rkisp1, RKISP1_CIF_MI_INIT,
-                            RKISP1_CIF_MI_INIT_SOFT_UPD);
+       if (!has_self_path || !other->is_streaming) {
+               u32 reg;
+
+               /*
+                * Force cfg update.
+                *
+                * The ISP8000 (implementing the MAIN_STRIDE feature) as a
+                * mp_output_format field in the CIF_MI_INIT register that must
+                * be preserved. It can be read back, but it is not clear what
+                * other register bits will return. Mask them out.
+                *
+                * On Rockchip platforms, the CIF_MI_INIT register is marked as
+                * write-only and reads as zeros. We can skip reading it.
+                */
+               if (rkisp1_has_feature(rkisp1, MAIN_STRIDE))
+                       reg = rkisp1_read(rkisp1, RKISP1_CIF_MI_INIT)
+                           & RKISP1_CIF_MI_INIT_MP_OUTPUT_MASK;
+               else
+                       reg = 0;
+
+               reg |= RKISP1_CIF_MI_INIT_SOFT_UPD;
+               rkisp1_write(rkisp1, RKISP1_CIF_MI_INIT, reg);
+
                rkisp1_set_next_buf(cap);
        }
        spin_unlock_irq(&cap->buf.lock);
@@ -1095,8 +1214,8 @@ static const struct vb2_ops rkisp1_vb2_ops = {
  */
 
 static const struct v4l2_format_info *
-rkisp1_fill_pixfmt(struct v4l2_pix_format_mplane *pixm,
-                  enum rkisp1_stream_id id)
+rkisp1_fill_pixfmt(const struct rkisp1_capture *cap,
+                  struct v4l2_pix_format_mplane *pixm)
 {
        struct v4l2_plane_pix_format *plane_y = &pixm->plane_fmt[0];
        const struct v4l2_format_info *info;
@@ -1109,10 +1228,13 @@ rkisp1_fill_pixfmt(struct v4l2_pix_format_mplane *pixm,
 
        /*
         * The SP supports custom strides, expressed as a number of pixels for
-        * the Y plane. Clamp the stride to a reasonable value to avoid integer
-        * overflows when calculating the bytesperline and sizeimage values.
+        * the Y plane, and so does the MP in ISP versions that have the
+        * MAIN_STRIDE feature. Clamp the stride to a reasonable value to avoid
+        * integer overflows when calculating the bytesperline and sizeimage
+        * values.
         */
-       if (id == RKISP1_SELFPATH)
+       if (cap->id == RKISP1_SELFPATH ||
+           rkisp1_has_feature(cap->rkisp1, MAIN_STRIDE))
                stride = clamp(DIV_ROUND_UP(plane_y->bytesperline, info->bpp[0]),
                               pixm->width, 65536U);
        else
@@ -1147,10 +1269,14 @@ rkisp1_fill_pixfmt(struct v4l2_pix_format_mplane *pixm,
 static const struct rkisp1_capture_fmt_cfg *
 rkisp1_find_fmt_cfg(const struct rkisp1_capture *cap, const u32 pixelfmt)
 {
+       bool yc_swap_support = rkisp1_has_feature(cap->rkisp1, MAIN_STRIDE);
        unsigned int i;
 
        for (i = 0; i < cap->config->fmt_size; i++) {
-               if (cap->config->fmts[i].fourcc == pixelfmt)
+               const struct rkisp1_capture_fmt_cfg *fmt = &cap->config->fmts[i];
+
+               if (fmt->fourcc == pixelfmt &&
+                   (!fmt->yc_swap || yc_swap_support))
                        return &cap->config->fmts[i];
        }
        return NULL;
@@ -1187,7 +1313,7 @@ static void rkisp1_try_fmt(const struct rkisp1_capture *cap,
        pixm->ycbcr_enc = V4L2_YCBCR_ENC_DEFAULT;
        pixm->quantization = V4L2_QUANTIZATION_DEFAULT;
 
-       info = rkisp1_fill_pixfmt(pixm, cap->id);
+       info = rkisp1_fill_pixfmt(cap, pixm);
 
        if (fmt_cfg)
                *fmt_cfg = fmt;
@@ -1199,12 +1325,9 @@ static void rkisp1_set_fmt(struct rkisp1_capture *cap,
                           struct v4l2_pix_format_mplane *pixm)
 {
        rkisp1_try_fmt(cap, pixm, &cap->pix.cfg, &cap->pix.info);
-       cap->pix.fmt = *pixm;
 
-       /* SP supports custom stride in number of pixels of the Y plane */
-       if (cap->id == RKISP1_SELFPATH)
-               cap->sp_y_stride = pixm->plane_fmt[0].bytesperline /
-                                  cap->pix.info->bpp[0];
+       cap->pix.fmt = *pixm;
+       cap->stride = pixm->plane_fmt[0].bytesperline / cap->pix.info->bpp[0];
 }
 
 static int rkisp1_try_fmt_vid_cap_mplane(struct file *file, void *fh,
@@ -1222,23 +1345,29 @@ static int rkisp1_enum_fmt_vid_cap_mplane(struct file *file, void *priv,
 {
        struct rkisp1_capture *cap = video_drvdata(file);
        const struct rkisp1_capture_fmt_cfg *fmt = NULL;
+       bool yc_swap_support = rkisp1_has_feature(cap->rkisp1, MAIN_STRIDE);
        unsigned int i, n = 0;
 
-       if (!f->mbus_code) {
-               if (f->index >= cap->config->fmt_size)
-                       return -EINVAL;
+       if (f->index >= cap->config->fmt_size)
+               return -EINVAL;
 
+       if (!f->mbus_code && yc_swap_support) {
                fmt = &cap->config->fmts[f->index];
                f->pixelformat = fmt->fourcc;
                return 0;
        }
 
        for (i = 0; i < cap->config->fmt_size; i++) {
-               if (cap->config->fmts[i].mbus != f->mbus_code)
+               fmt = &cap->config->fmts[i];
+
+               if (f->mbus_code && fmt->mbus != f->mbus_code)
+                       continue;
+
+               if (!yc_swap_support && fmt->yc_swap)
                        continue;
 
                if (n++ == f->index) {
-                       f->pixelformat = cap->config->fmts[i].fourcc;
+                       f->pixelformat = fmt->fourcc;
                        return 0;
                }
        }
@@ -1501,10 +1630,11 @@ rkisp1_capture_init(struct rkisp1_device *rkisp1, enum rkisp1_stream_id id)
 
 int rkisp1_capture_devs_register(struct rkisp1_device *rkisp1)
 {
+       unsigned int dev_count = rkisp1_path_count(rkisp1);
        unsigned int i;
        int ret;
 
-       for (i = 0; i < ARRAY_SIZE(rkisp1->capture_devs); i++) {
+       for (i = 0; i < dev_count; i++) {
                struct rkisp1_capture *cap = &rkisp1->capture_devs[i];
 
                rkisp1_capture_init(rkisp1, i);
index b757f75edecf75256525e378151fe217c88ef2c4..26573f6ae57550d9502c41ac7e1931868f68d4a6 100644 (file)
@@ -24,6 +24,7 @@
 #include "rkisp1-regs.h"
 
 struct dentry;
+struct regmap;
 
 /*
  * flags on the 'direction' field in struct rkisp1_mbus_info' that indicate
@@ -110,6 +111,10 @@ enum rkisp1_isp_pad {
  * enum rkisp1_feature - ISP features
  *
  * @RKISP1_FEATURE_MIPI_CSI2: The ISP has an internal MIPI CSI-2 receiver
+ * @RKISP1_FEATURE_MAIN_STRIDE: The ISP supports configurable stride on the main path
+ * @RKISP1_FEATURE_SELF_PATH: The ISP has a self path
+ * @RKISP1_FEATURE_DUAL_CROP: The ISP has the dual crop block at the resizer input
+ * @RKISP1_FEATURE_DMA_34BIT: The ISP uses 34-bit DMA addresses
  *
  * The ISP features are stored in a bitmask in &rkisp1_info.features and allow
  * the driver to implement support for features present in some ISP versions
@@ -117,8 +122,15 @@ enum rkisp1_isp_pad {
  */
 enum rkisp1_feature {
        RKISP1_FEATURE_MIPI_CSI2 = BIT(0),
+       RKISP1_FEATURE_MAIN_STRIDE = BIT(1),
+       RKISP1_FEATURE_SELF_PATH = BIT(2),
+       RKISP1_FEATURE_DUAL_CROP = BIT(3),
+       RKISP1_FEATURE_DMA_34BIT = BIT(4),
 };
 
+#define rkisp1_has_feature(rkisp1, feature) \
+       ((rkisp1)->info->features & RKISP1_FEATURE_##feature)
+
 /*
  * struct rkisp1_info - Model-specific ISP Information
  *
@@ -229,7 +241,7 @@ struct rkisp1_vdev_node {
 struct rkisp1_buffer {
        struct vb2_v4l2_buffer vb;
        struct list_head queue;
-       u32 buff_addr[VIDEO_MAX_PLANES];
+       dma_addr_t buff_addr[VIDEO_MAX_PLANES];
 };
 
 /*
@@ -263,7 +275,7 @@ struct rkisp1_device;
  *               handler to stop the streaming by waiting on the 'done' wait queue.
  *               If the irq handler is not called, the stream is stopped by the callback
  *               after timeout.
- * @sp_y_stride:  the selfpath allows to configure a y stride that is longer than the image width.
+ * @stride:       the line stride for the first plane, in pixel units
  * @buf.lock:    lock to protect buf.queue
  * @buf.queue:   queued buffer list
  * @buf.dummy:   dummy space to store dropped data
@@ -284,7 +296,7 @@ struct rkisp1_capture {
        bool is_streaming;
        bool is_stopping;
        wait_queue_head_t done;
-       unsigned int sp_y_stride;
+       unsigned int stride;
        struct {
                /* protects queue, curr and next */
                spinlock_t lock;
@@ -435,6 +447,8 @@ struct rkisp1_debug {
  * @dev:          a pointer to the struct device
  * @clk_size:     number of clocks
  * @clks:         array of clocks
+ * @gasket:       the gasket - i.MX8MP only
+ * @gasket_id:    the gasket ID (0 or 1) - i.MX8MP only
  * @v4l2_dev:     v4l2_device variable
  * @media_dev:    media_device variable
  * @notifier:     a notifier to register on the v4l2-async API to be notified on the sensor
@@ -457,6 +471,8 @@ struct rkisp1_device {
        struct device *dev;
        unsigned int clk_size;
        struct clk_bulk_data clks[RKISP1_MAX_BUS_CLK];
+       struct regmap *gasket;
+       unsigned int gasket_id;
        struct v4l2_device v4l2_dev;
        struct media_device media_dev;
        struct v4l2_async_notifier notifier;
@@ -526,6 +542,19 @@ int rkisp1_cap_enum_mbus_codes(struct rkisp1_capture *cap,
  */
 const struct rkisp1_mbus_info *rkisp1_mbus_info_get_by_index(unsigned int index);
 
+/*
+ * rkisp1_path_count - Return the number of paths supported by the device
+ *
+ * Some devices only have a main path, while other device have both a main path
+ * and a self path. This function returns the number of paths that this device
+ * has, based on the feature flags. It should be used insted of checking
+ * ARRAY_SIZE of capture_devs/resizer_devs.
+ */
+static inline unsigned int rkisp1_path_count(struct rkisp1_device *rkisp1)
+{
+       return rkisp1_has_feature(rkisp1, SELF_PATH) ? 2 : 1;
+}
+
 /*
  * rkisp1_sd_adjust_crop_rect - adjust a rectangle to fit into another rectangle.
  *
index 73cf08a740118c05328fdd3f1a9d52a6e935c4e0..bb0202386c70173cdd59f296bff7c5a72421877d 100644 (file)
@@ -10,6 +10,7 @@
 
 #include <linux/clk.h>
 #include <linux/interrupt.h>
+#include <linux/mfd/syscon.h>
 #include <linux/module.h>
 #include <linux/of.h>
 #include <linux/of_graph.h>
@@ -207,7 +208,7 @@ static int rkisp1_subdev_notifier_register(struct rkisp1_device *rkisp1)
                switch (reg) {
                case 0:
                        /* MIPI CSI-2 port */
-                       if (!(rkisp1->info->features & RKISP1_FEATURE_MIPI_CSI2)) {
+                       if (!rkisp1_has_feature(rkisp1, MIPI_CSI2)) {
                                dev_err(rkisp1->dev,
                                        "internal CSI must be available for port 0\n");
                                ret = -EINVAL;
@@ -358,10 +359,11 @@ static const struct dev_pm_ops rkisp1_pm_ops = {
 
 static int rkisp1_create_links(struct rkisp1_device *rkisp1)
 {
+       unsigned int dev_count = rkisp1_path_count(rkisp1);
        unsigned int i;
        int ret;
 
-       if (rkisp1->info->features & RKISP1_FEATURE_MIPI_CSI2) {
+       if (rkisp1_has_feature(rkisp1, MIPI_CSI2)) {
                /* Link the CSI receiver to the ISP. */
                ret = media_create_pad_link(&rkisp1->csi.sd.entity,
                                            RKISP1_CSI_PAD_SRC,
@@ -373,7 +375,7 @@ static int rkisp1_create_links(struct rkisp1_device *rkisp1)
        }
 
        /* create ISP->RSZ->CAP links */
-       for (i = 0; i < 2; i++) {
+       for (i = 0; i < dev_count; i++) {
                struct media_entity *resizer =
                        &rkisp1->resizer_devs[i].sd.entity;
                struct media_entity *capture =
@@ -413,7 +415,7 @@ static int rkisp1_create_links(struct rkisp1_device *rkisp1)
 
 static void rkisp1_entities_unregister(struct rkisp1_device *rkisp1)
 {
-       if (rkisp1->info->features & RKISP1_FEATURE_MIPI_CSI2)
+       if (rkisp1_has_feature(rkisp1, MIPI_CSI2))
                rkisp1_csi_unregister(rkisp1);
        rkisp1_params_unregister(rkisp1);
        rkisp1_stats_unregister(rkisp1);
@@ -446,7 +448,7 @@ static int rkisp1_entities_register(struct rkisp1_device *rkisp1)
        if (ret)
                goto error;
 
-       if (rkisp1->info->features & RKISP1_FEATURE_MIPI_CSI2) {
+       if (rkisp1_has_feature(rkisp1, MIPI_CSI2)) {
                ret = rkisp1_csi_register(rkisp1);
                if (ret)
                        goto error;
@@ -505,7 +507,9 @@ static const struct rkisp1_info px30_isp_info = {
        .isrs = px30_isp_isrs,
        .isr_size = ARRAY_SIZE(px30_isp_isrs),
        .isp_ver = RKISP1_V12,
-       .features = RKISP1_FEATURE_MIPI_CSI2,
+       .features = RKISP1_FEATURE_MIPI_CSI2
+                 | RKISP1_FEATURE_SELF_PATH
+                 | RKISP1_FEATURE_DUAL_CROP,
 };
 
 static const char * const rk3399_isp_clks[] = {
@@ -524,7 +528,29 @@ static const struct rkisp1_info rk3399_isp_info = {
        .isrs = rk3399_isp_isrs,
        .isr_size = ARRAY_SIZE(rk3399_isp_isrs),
        .isp_ver = RKISP1_V10,
-       .features = RKISP1_FEATURE_MIPI_CSI2,
+       .features = RKISP1_FEATURE_MIPI_CSI2
+                 | RKISP1_FEATURE_SELF_PATH
+                 | RKISP1_FEATURE_DUAL_CROP,
+};
+
+static const char * const imx8mp_isp_clks[] = {
+       "isp",
+       "hclk",
+       "aclk",
+};
+
+static const struct rkisp1_isr_data imx8mp_isp_isrs[] = {
+       { NULL, rkisp1_isr, BIT(RKISP1_IRQ_ISP) | BIT(RKISP1_IRQ_MI) },
+};
+
+static const struct rkisp1_info imx8mp_isp_info = {
+       .clks = imx8mp_isp_clks,
+       .clk_size = ARRAY_SIZE(imx8mp_isp_clks),
+       .isrs = imx8mp_isp_isrs,
+       .isr_size = ARRAY_SIZE(imx8mp_isp_isrs),
+       .isp_ver = RKISP1_V_IMX8MP,
+       .features = RKISP1_FEATURE_MAIN_STRIDE
+                 | RKISP1_FEATURE_DMA_34BIT,
 };
 
 static const struct of_device_id rkisp1_of_match[] = {
@@ -536,6 +562,10 @@ static const struct of_device_id rkisp1_of_match[] = {
                .compatible = "rockchip,rk3399-cif-isp",
                .data = &rk3399_isp_info,
        },
+       {
+               .compatible = "fsl,imx8mp-isp",
+               .data = &imx8mp_isp_info,
+       },
        {},
 };
 MODULE_DEVICE_TABLE(of, rkisp1_of_match);
@@ -547,6 +577,7 @@ static int rkisp1_probe(struct platform_device *pdev)
        struct rkisp1_device *rkisp1;
        struct v4l2_device *v4l2_dev;
        unsigned int i;
+       u64 dma_mask;
        int ret, irq;
        u32 cif_id;
 
@@ -560,6 +591,13 @@ static int rkisp1_probe(struct platform_device *pdev)
        dev_set_drvdata(dev, rkisp1);
        rkisp1->dev = dev;
 
+       dma_mask = rkisp1_has_feature(rkisp1, DMA_34BIT) ? DMA_BIT_MASK(34) :
+                                                          DMA_BIT_MASK(32);
+
+       ret = dma_set_mask_and_coherent(dev, dma_mask);
+       if (ret)
+               return ret;
+
        mutex_init(&rkisp1->stream_lock);
 
        rkisp1->base_addr = devm_platform_ioremap_resource(pdev, 0);
@@ -596,6 +634,21 @@ static int rkisp1_probe(struct platform_device *pdev)
                return ret;
        rkisp1->clk_size = info->clk_size;
 
+       if (info->isp_ver == RKISP1_V_IMX8MP) {
+               unsigned int id;
+
+               rkisp1->gasket = syscon_regmap_lookup_by_phandle_args(dev->of_node,
+                                                                     "fsl,blk-ctrl",
+                                                                     1, &id);
+               if (IS_ERR(rkisp1->gasket)) {
+                       ret = PTR_ERR(rkisp1->gasket);
+                       dev_err(dev, "failed to get gasket: %d\n", ret);
+                       return ret;
+               }
+
+               rkisp1->gasket_id = id;
+       }
+
        pm_runtime_enable(&pdev->dev);
 
        ret = pm_runtime_resume_and_get(&pdev->dev);
@@ -650,7 +703,7 @@ static int rkisp1_probe(struct platform_device *pdev)
 err_unreg_entities:
        rkisp1_entities_unregister(rkisp1);
 err_cleanup_csi:
-       if (rkisp1->info->features & RKISP1_FEATURE_MIPI_CSI2)
+       if (rkisp1_has_feature(rkisp1, MIPI_CSI2))
                rkisp1_csi_cleanup(rkisp1);
 err_unreg_media_dev:
        media_device_unregister(&rkisp1->media_dev);
@@ -671,7 +724,7 @@ static void rkisp1_remove(struct platform_device *pdev)
        v4l2_async_nf_cleanup(&rkisp1->notifier);
 
        rkisp1_entities_unregister(rkisp1);
-       if (rkisp1->info->features & RKISP1_FEATURE_MIPI_CSI2)
+       if (rkisp1_has_feature(rkisp1, MIPI_CSI2))
                rkisp1_csi_cleanup(rkisp1);
        rkisp1_debug_cleanup(rkisp1);
 
index 78a1f7a1499be84f15b94d75b30266dfb8c720ce..e45a213baf497118c72ba42b1c6fbf1fcb76eafb 100644 (file)
@@ -10,6 +10,7 @@
 
 #include <linux/iopoll.h>
 #include <linux/pm_runtime.h>
+#include <linux/regmap.h>
 #include <linux/videodev2.h>
 #include <linux/vmalloc.h>
 
  * +---------------------------------------------------------+
  */
 
+/* -----------------------------------------------------------------------------
+ * Media block control (i.MX8MP only)
+ */
+
+#define ISP_DEWARP_CONTROL                             0x0138
+
+#define ISP_DEWARP_CONTROL_MIPI_CSI2_HS_POLARITY       BIT(22)
+#define ISP_DEWARP_CONTROL_MIPI_CSI2_VS_SEL_RISING     (0 << 20)
+#define ISP_DEWARP_CONTROL_MIPI_CSI2_VS_SEL_NEGATIVE   (1 << 20)
+#define ISP_DEWARP_CONTROL_MIPI_CSI2_VS_SEL_POSITIVE   (2 << 20)
+#define ISP_DEWARP_CONTROL_MIPI_CSI2_VS_SEL_FALLING    (3 << 20)
+#define ISP_DEWARP_CONTROL_MIPI_CSI2_VS_SEL_MASK       GENMASK(21, 20)
+#define ISP_DEWARP_CONTROL_MIPI_ISP2_LEFT_JUST_MODE    BIT(19)
+#define ISP_DEWARP_CONTROL_MIPI_ISP2_DATA_TYPE(dt)     ((dt) << 13)
+#define ISP_DEWARP_CONTROL_MIPI_ISP2_DATA_TYPE_MASK    GENMASK(18, 13)
+
+#define ISP_DEWARP_CONTROL_MIPI_CSI1_HS_POLARITY       BIT(12)
+#define ISP_DEWARP_CONTROL_MIPI_CSI1_VS_SEL_RISING     (0 << 10)
+#define ISP_DEWARP_CONTROL_MIPI_CSI1_VS_SEL_NEGATIVE   (1 << 10)
+#define ISP_DEWARP_CONTROL_MIPI_CSI1_VS_SEL_POSITIVE   (2 << 10)
+#define ISP_DEWARP_CONTROL_MIPI_CSI1_VS_SEL_FALLING    (3 << 10)
+#define ISP_DEWARP_CONTROL_MIPI_CSI1_VS_SEL_MASK       GENMASK(11, 10)
+#define ISP_DEWARP_CONTROL_MIPI_ISP1_LEFT_JUST_MODE    BIT(9)
+#define ISP_DEWARP_CONTROL_MIPI_ISP1_DATA_TYPE(dt)     ((dt) << 3)
+#define ISP_DEWARP_CONTROL_MIPI_ISP1_DATA_TYPE_MASK    GENMASK(8, 3)
+
+#define ISP_DEWARP_CONTROL_GPR_ISP_1_DISABLE           BIT(1)
+#define ISP_DEWARP_CONTROL_GPR_ISP_0_DISABLE           BIT(0)
+
+static int rkisp1_gasket_enable(struct rkisp1_device *rkisp1,
+                               struct media_pad *source)
+{
+       struct v4l2_subdev *source_sd;
+       struct v4l2_mbus_frame_desc fd;
+       unsigned int dt;
+       u32 mask;
+       u32 val;
+       int ret;
+
+       /*
+        * Configure and enable the gasket with the CSI-2 data type. Set the
+        * vsync polarity as active high, as that is what the ISP is configured
+        * to expect in ISP_ACQ_PROP. Enable left justification, as the i.MX8MP
+        * ISP has a 16-bit wide input and expects data to be left-aligned.
+        */
+
+       source_sd = media_entity_to_v4l2_subdev(source->entity);
+       ret = v4l2_subdev_call(source_sd, pad, get_frame_desc,
+                              source->index, &fd);
+       if (ret) {
+               dev_err(rkisp1->dev,
+                       "failed to get frame descriptor from '%s':%u: %d\n",
+                       source_sd->name, 0, ret);
+               return ret;
+       }
+
+       if (fd.num_entries != 1) {
+               dev_err(rkisp1->dev, "invalid frame descriptor for '%s':%u\n",
+                       source_sd->name, 0);
+               return -EINVAL;
+       }
+
+       dt = fd.entry[0].bus.csi2.dt;
+
+       if (rkisp1->gasket_id == 0) {
+               mask = ISP_DEWARP_CONTROL_MIPI_CSI1_HS_POLARITY
+                    | ISP_DEWARP_CONTROL_MIPI_CSI1_VS_SEL_MASK
+                    | ISP_DEWARP_CONTROL_MIPI_ISP1_LEFT_JUST_MODE
+                    | ISP_DEWARP_CONTROL_MIPI_ISP1_DATA_TYPE_MASK
+                    | ISP_DEWARP_CONTROL_GPR_ISP_0_DISABLE;
+               val = ISP_DEWARP_CONTROL_MIPI_CSI1_VS_SEL_POSITIVE
+                   | ISP_DEWARP_CONTROL_MIPI_ISP1_LEFT_JUST_MODE
+                   | ISP_DEWARP_CONTROL_MIPI_ISP1_DATA_TYPE(dt);
+       } else {
+               mask = ISP_DEWARP_CONTROL_MIPI_CSI2_HS_POLARITY
+                    | ISP_DEWARP_CONTROL_MIPI_CSI2_VS_SEL_MASK
+                    | ISP_DEWARP_CONTROL_MIPI_ISP2_LEFT_JUST_MODE
+                    | ISP_DEWARP_CONTROL_MIPI_ISP2_DATA_TYPE_MASK
+                    | ISP_DEWARP_CONTROL_GPR_ISP_1_DISABLE;
+               val = ISP_DEWARP_CONTROL_MIPI_CSI2_VS_SEL_POSITIVE
+                   | ISP_DEWARP_CONTROL_MIPI_ISP2_LEFT_JUST_MODE
+                   | ISP_DEWARP_CONTROL_MIPI_ISP2_DATA_TYPE(dt);
+       }
+
+       regmap_update_bits(rkisp1->gasket, ISP_DEWARP_CONTROL, mask, val);
+
+       return 0;
+}
+
+static void rkisp1_gasket_disable(struct rkisp1_device *rkisp1)
+{
+       u32 mask;
+       u32 val;
+
+       if (rkisp1->gasket_id == 1) {
+               mask = ISP_DEWARP_CONTROL_MIPI_ISP2_LEFT_JUST_MODE
+                    | ISP_DEWARP_CONTROL_MIPI_ISP2_DATA_TYPE_MASK
+                    | ISP_DEWARP_CONTROL_GPR_ISP_1_DISABLE;
+               val = ISP_DEWARP_CONTROL_GPR_ISP_1_DISABLE;
+       } else {
+               mask = ISP_DEWARP_CONTROL_MIPI_ISP1_LEFT_JUST_MODE
+                    | ISP_DEWARP_CONTROL_MIPI_ISP1_DATA_TYPE_MASK
+                    | ISP_DEWARP_CONTROL_GPR_ISP_0_DISABLE;
+               val = ISP_DEWARP_CONTROL_GPR_ISP_0_DISABLE;
+       }
+
+       regmap_update_bits(rkisp1->gasket, ISP_DEWARP_CONTROL, mask, val);
+}
+
 /* ----------------------------------------------------------------------------
  * Camera Interface registers configurations
  */
@@ -291,6 +401,9 @@ static void rkisp1_isp_stop(struct rkisp1_isp *isp)
                     RKISP1_CIF_VI_IRCL_MIPI_SW_RST |
                     RKISP1_CIF_VI_IRCL_ISP_SW_RST);
        rkisp1_write(rkisp1, RKISP1_CIF_VI_IRCL, 0x0);
+
+       if (rkisp1->info->isp_ver == RKISP1_V_IMX8MP)
+               rkisp1_gasket_disable(rkisp1);
 }
 
 static void rkisp1_config_clk(struct rkisp1_isp *isp)
@@ -315,16 +428,24 @@ static void rkisp1_config_clk(struct rkisp1_isp *isp)
        }
 }
 
-static void rkisp1_isp_start(struct rkisp1_isp *isp,
-                            struct v4l2_subdev_state *sd_state)
+static int rkisp1_isp_start(struct rkisp1_isp *isp,
+                           struct v4l2_subdev_state *sd_state,
+                           struct media_pad *source)
 {
        struct rkisp1_device *rkisp1 = isp->rkisp1;
        const struct v4l2_mbus_framefmt *src_fmt;
        const struct rkisp1_mbus_info *src_info;
        u32 val;
+       int ret;
 
        rkisp1_config_clk(isp);
 
+       if (rkisp1->info->isp_ver == RKISP1_V_IMX8MP) {
+               ret = rkisp1_gasket_enable(rkisp1, source);
+               if (ret)
+                       return ret;
+       }
+
        /* Activate ISP */
        val = rkisp1_read(rkisp1, RKISP1_CIF_ISP_CTRL);
        val |= RKISP1_CIF_ISP_CTRL_ISP_CFG_UPD |
@@ -338,6 +459,8 @@ static void rkisp1_isp_start(struct rkisp1_isp *isp,
 
        if (src_info->pixel_enc != V4L2_PIXEL_ENC_BAYER)
                rkisp1_params_post_configure(&rkisp1->params);
+
+       return 0;
 }
 
 /* ----------------------------------------------------------------------------
@@ -848,7 +971,9 @@ static int rkisp1_isp_s_stream(struct v4l2_subdev *sd, int enable)
        if (ret)
                goto out_unlock;
 
-       rkisp1_isp_start(isp, sd_state);
+       ret = rkisp1_isp_start(isp, sd_state, source_pad);
+       if (ret)
+               goto out_unlock;
 
        ret = v4l2_subdev_call(rkisp1->source, video, s_stream, true);
        if (ret) {
index bea69a0d766acc9af15108b45f42dd76dfab794a..fccf4c17ee8d0a6a2ad430ff67768dd76de8bee2 100644 (file)
 /* MI_INIT */
 #define RKISP1_CIF_MI_INIT_SKIP                                BIT(2)
 #define RKISP1_CIF_MI_INIT_SOFT_UPD                    BIT(4)
+#define RKISP1_CIF_MI_INIT_MP_OUTPUT_YUV400            (0 << 5)
+#define RKISP1_CIF_MI_INIT_MP_OUTPUT_YUV420            (1 << 5)
+#define RKISP1_CIF_MI_INIT_MP_OUTPUT_YUV422            (2 << 5)
+#define RKISP1_CIF_MI_INIT_MP_OUTPUT_YUV444            (3 << 5)
+#define RKISP1_CIF_MI_INIT_MP_OUTPUT_RAW12             (4 << 5)
+#define RKISP1_CIF_MI_INIT_MP_OUTPUT_RAW8              (5 << 5)
+#define RKISP1_CIF_MI_INIT_MP_OUTPUT_JPEG              (6 << 5)
+#define RKISP1_CIF_MI_INIT_MP_OUTPUT_RAW10             (7 << 5)
+#define RKISP1_CIF_MI_INIT_MP_OUTPUT_MASK              (15 << 5)
 
 /* MI_CTRL_SHD */
 #define RKISP1_CIF_MI_CTRL_SHD_MP_IN_ENABLED           BIT(0)
 #define RKISP1_CIF_MI_XTD_FMT_CTRL_SP_CB_CR_SWAP       BIT(1)
 #define RKISP1_CIF_MI_XTD_FMT_CTRL_DMA_CB_CR_SWAP      BIT(2)
 
+/* MI_OUTPUT_ALIGN_FORMAT */
+#define RKISP1_CIF_OUTPUT_ALIGN_FORMAT_MP_LSB_ALIGNMENT                        BIT(0)
+#define RKISP1_CIF_OUTPUT_ALIGN_FORMAT_MP_BYTE_SWAP_BYTES              BIT(1)
+#define RKISP1_CIF_OUTPUT_ALIGN_FORMAT_MP_BYTE_SWAP_WORDS              BIT(2)
+#define RKISP1_CIF_OUTPUT_ALIGN_FORMAT_MP_BYTE_SWAP_DWORDS             BIT(3)
+#define RKISP1_CIF_OUTPUT_ALIGN_FORMAT_SP_BYTE_SWAP_BYTES              BIT(4)
+#define RKISP1_CIF_OUTPUT_ALIGN_FORMAT_SP_BYTE_SWAP_WORDS              BIT(5)
+#define RKISP1_CIF_OUTPUT_ALIGN_FORMAT_SP_BYTE_SWAP_DWORDS             BIT(6)
+#define RKISP1_CIF_OUTPUT_ALIGN_FORMAT_DMA_BYTE_SWAP_BYTES             BIT(7)
+#define RKISP1_CIF_OUTPUT_ALIGN_FORMAT_DMA_BYTE_SWAP_WORDS             BIT(8)
+#define RKISP1_CIF_OUTPUT_ALIGN_FORMAT_DMA_BYTE_SWAP_DWORDS            BIT(9)
+
+/* MI_MP_OUTPUT_FIFO_SIZE */
+#define RKISP1_CIF_MI_MP_OUTPUT_FIFO_SIZE_OUTPUT_FIFO_DEPTH_FULL       (0 << 0)
+#define RKISP1_CIF_MI_MP_OUTPUT_FIFO_SIZE_OUTPUT_FIFO_DEPTH_HALF       (1 << 0)
+#define RKISP1_CIF_MI_MP_OUTPUT_FIFO_SIZE_OUTPUT_FIFO_DEPTH_QUARTER    (2 << 0)
+#define RKISP1_CIF_MI_MP_OUTPUT_FIFO_SIZE_OUTPUT_FIFO_DEPTH_EIGHT      (3 << 0)
+
 /* VI_CCL */
 #define RKISP1_CIF_CCL_CIF_CLK_DIS                     BIT(2)
 /* VI_ISP_CLK_CTRL */
 #define RKISP1_CIF_MI_SP_CB_BASE_AD_INIT2      (RKISP1_CIF_MI_BASE + 0x00000140)
 #define RKISP1_CIF_MI_SP_CR_BASE_AD_INIT2      (RKISP1_CIF_MI_BASE + 0x00000144)
 #define RKISP1_CIF_MI_XTD_FORMAT_CTRL          (RKISP1_CIF_MI_BASE + 0x00000148)
+#define RKISP1_CIF_MI_MP_HANDSHAKE_0           (RKISP1_CIF_MI_BASE + 0x0000014C)
+#define RKISP1_CIF_MI_MP_Y_LLENGTH             (RKISP1_CIF_MI_BASE + 0x00000150)
+#define RKISP1_CIF_MI_MP_Y_SLICE_OFFSET                (RKISP1_CIF_MI_BASE + 0x00000154)
+#define RKISP1_CIF_MI_MP_C_SLICE_OFFSET                (RKISP1_CIF_MI_BASE + 0x00000158)
+#define RKISP1_CIF_MI_OUTPUT_ALIGN_FORMAT      (RKISP1_CIF_MI_BASE + 0x0000015C)
+#define RKISP1_CIF_MI_MP_OUTPUT_FIFO_SIZE      (RKISP1_CIF_MI_BASE + 0x00000160)
+#define RKISP1_CIF_MI_MP_Y_PIC_WIDTH           (RKISP1_CIF_MI_BASE + 0x00000164)
+#define RKISP1_CIF_MI_MP_Y_PIC_HEIGHT          (RKISP1_CIF_MI_BASE + 0x00000168)
+#define RKISP1_CIF_MI_MP_Y_PIC_SIZE            (RKISP1_CIF_MI_BASE + 0x0000016C)
 
 #define RKISP1_CIF_SMIA_BASE                   0x00001a00
 #define RKISP1_CIF_SMIA_CTRL                   (RKISP1_CIF_SMIA_BASE + 0x00000000)
index a8e3777013023ee4e79a6243b943d9079088474b..6f3931ca5b51aa34c5cf73c455f0e0c05ca8c569 100644 (file)
@@ -444,11 +444,12 @@ static void rkisp1_rsz_set_sink_crop(struct rkisp1_resizer *rsz,
        sink_fmt = v4l2_subdev_state_get_format(sd_state, RKISP1_RSZ_PAD_SINK);
        sink_crop = v4l2_subdev_state_get_crop(sd_state, RKISP1_RSZ_PAD_SINK);
 
-       /* Not crop for MP bayer raw data */
+       /* Not crop for MP bayer raw data, or for devices lacking dual crop. */
        mbus_info = rkisp1_mbus_info_get_by_code(sink_fmt->code);
 
-       if (rsz->id == RKISP1_MAINPATH &&
-           mbus_info->pixel_enc == V4L2_PIXEL_ENC_BAYER) {
+       if ((rsz->id == RKISP1_MAINPATH &&
+            mbus_info->pixel_enc == V4L2_PIXEL_ENC_BAYER) ||
+           !rkisp1_has_feature(rsz->rkisp1, DUAL_CROP)) {
                sink_crop->left = 0;
                sink_crop->top = 0;
                sink_crop->width = sink_fmt->width;
@@ -631,21 +632,24 @@ static int rkisp1_rsz_s_stream(struct v4l2_subdev *sd, int enable)
        struct rkisp1_device *rkisp1 = rsz->rkisp1;
        struct rkisp1_capture *other = &rkisp1->capture_devs[rsz->id ^ 1];
        enum rkisp1_shadow_regs_when when = RKISP1_SHADOW_REGS_SYNC;
+       bool has_self_path = rkisp1_has_feature(rkisp1, SELF_PATH);
        struct v4l2_subdev_state *sd_state;
 
        if (!enable) {
-               rkisp1_dcrop_disable(rsz, RKISP1_SHADOW_REGS_ASYNC);
+               if (rkisp1_has_feature(rkisp1, DUAL_CROP))
+                       rkisp1_dcrop_disable(rsz, RKISP1_SHADOW_REGS_ASYNC);
                rkisp1_rsz_disable(rsz, RKISP1_SHADOW_REGS_ASYNC);
                return 0;
        }
 
-       if (other->is_streaming)
+       if (has_self_path && other->is_streaming)
                when = RKISP1_SHADOW_REGS_ASYNC;
 
        sd_state = v4l2_subdev_lock_and_get_active_state(sd);
 
        rkisp1_rsz_config(rsz, sd_state, when);
-       rkisp1_dcrop_config(rsz, sd_state);
+       if (rkisp1_has_feature(rkisp1, DUAL_CROP))
+               rkisp1_dcrop_config(rsz, sd_state);
 
        v4l2_subdev_unlock_state(sd_state);
 
@@ -731,10 +735,11 @@ err_entity_cleanup:
 
 int rkisp1_resizer_devs_register(struct rkisp1_device *rkisp1)
 {
+       unsigned int dev_count = rkisp1_path_count(rkisp1);
        unsigned int i;
        int ret;
 
-       for (i = 0; i < ARRAY_SIZE(rkisp1->resizer_devs); i++) {
+       for (i = 0; i < dev_count; i++) {
                struct rkisp1_resizer *rsz = &rkisp1->resizer_devs[i];
 
                rsz->rkisp1 = rkisp1;
index 05cafba1c7287adbf65fda31d80330fc4009d23d..ffa4ea21387da39136d9dcfdb02a6a7cfe6edebc 100644 (file)
@@ -180,7 +180,7 @@ void fimc_capture_irq_handler(struct fimc_dev *fimc, int deq_buf)
        struct fimc_vid_cap *cap = &fimc->vid_cap;
        struct fimc_pipeline *p = to_fimc_pipeline(cap->ve.pipe);
        struct v4l2_subdev *csis = p->subdevs[IDX_CSIS];
-       struct fimc_frame *f = &cap->ctx->d_frame;
+       const struct fimc_frame *f = &cap->ctx->d_frame;
        struct fimc_vid_buffer *v_buf;
 
        if (test_and_clear_bit(ST_CAPT_SHUT, &fimc->state)) {
@@ -342,8 +342,8 @@ static int queue_setup(struct vb2_queue *vq,
                       unsigned int sizes[], struct device *alloc_devs[])
 {
        struct fimc_ctx *ctx = vq->drv_priv;
-       struct fimc_frame *frame = &ctx->d_frame;
-       struct fimc_fmt *fmt = frame->fmt;
+       const struct fimc_frame *frame = &ctx->d_frame;
+       const struct fimc_fmt *fmt = frame->fmt;
        unsigned long wh = frame->f_width * frame->f_height;
        int i;
 
@@ -559,18 +559,18 @@ static const struct v4l2_file_operations fimc_capture_fops = {
  * Format and crop negotiation helpers
  */
 
-static struct fimc_fmt *fimc_capture_try_format(struct fimc_ctx *ctx,
-                                               u32 *width, u32 *height,
-                                               u32 *code, u32 *fourcc, int pad)
+static const struct fimc_fmt *fimc_capture_try_format(struct fimc_ctx *ctx,
+                                                     u32 *width, u32 *height,
+                                                     u32 *code, u32 *fourcc, int pad)
 {
        bool rotation = ctx->rotation == 90 || ctx->rotation == 270;
        struct fimc_dev *fimc = ctx->fimc_dev;
        const struct fimc_variant *var = fimc->variant;
        const struct fimc_pix_limit *pl = var->pix_limit;
-       struct fimc_frame *dst = &ctx->d_frame;
+       const struct fimc_frame *dst = &ctx->d_frame;
        u32 depth, min_w, max_w, min_h, align_h = 3;
+       const struct fimc_fmt *ffmt;
        u32 mask = FMT_FLAGS_CAM;
-       struct fimc_fmt *ffmt;
 
        /* Conversion from/to JPEG or User Defined format is not supported */
        if (code && ctx->s_frame.fmt && pad == FIMC_SD_PAD_SOURCE &&
@@ -644,7 +644,7 @@ static void fimc_capture_try_selection(struct fimc_ctx *ctx,
        struct fimc_dev *fimc = ctx->fimc_dev;
        const struct fimc_variant *var = fimc->variant;
        const struct fimc_pix_limit *pl = var->pix_limit;
-       struct fimc_frame *sink = &ctx->s_frame;
+       const struct fimc_frame *sink = &ctx->s_frame;
        u32 max_w, max_h, min_w = 0, min_h = 0, min_sz;
        u32 align_sz = 0, align_h = 4;
        u32 max_sc_h, max_sc_v;
@@ -722,7 +722,7 @@ static int fimc_cap_querycap(struct file *file, void *priv,
 static int fimc_cap_enum_fmt(struct file *file, void *priv,
                             struct v4l2_fmtdesc *f)
 {
-       struct fimc_fmt *fmt;
+       const struct fimc_fmt *fmt;
 
        fmt = fimc_find_format(NULL, NULL, FMT_FLAGS_CAM | FMT_FLAGS_M2M,
                               f->index);
@@ -757,7 +757,7 @@ static struct media_entity *fimc_pipeline_get_head(struct media_entity *me)
  */
 static int fimc_pipeline_try_format(struct fimc_ctx *ctx,
                                    struct v4l2_mbus_framefmt *tfmt,
-                                   struct fimc_fmt **fmt_id,
+                                   const struct fimc_fmt **fmt_id,
                                    bool set)
 {
        struct fimc_dev *fimc = ctx->fimc_dev;
@@ -768,8 +768,8 @@ static int fimc_pipeline_try_format(struct fimc_ctx *ctx,
                       : V4L2_SUBDEV_FORMAT_TRY,
        };
        struct v4l2_mbus_framefmt *mf = &sfmt.format;
+       const struct fimc_fmt *ffmt;
        struct media_entity *me;
-       struct fimc_fmt *ffmt;
        struct media_pad *pad;
        int ret, i = 1;
        u32 fcc;
@@ -903,8 +903,8 @@ static int fimc_cap_g_fmt_mplane(struct file *file, void *fh,
  */
 static int __video_try_or_set_format(struct fimc_dev *fimc,
                                     struct v4l2_format *f, bool try,
-                                    struct fimc_fmt **inp_fmt,
-                                    struct fimc_fmt **out_fmt)
+                                    const struct fimc_fmt **inp_fmt,
+                                    const struct fimc_fmt **out_fmt)
 {
        struct v4l2_pix_format_mplane *pix = &f->fmt.pix_mp;
        struct fimc_vid_cap *vc = &fimc->vid_cap;
@@ -986,7 +986,7 @@ static int fimc_cap_try_fmt_mplane(struct file *file, void *fh,
                                   struct v4l2_format *f)
 {
        struct fimc_dev *fimc = video_drvdata(file);
-       struct fimc_fmt *out_fmt = NULL, *inp_fmt = NULL;
+       const struct fimc_fmt *out_fmt = NULL, *inp_fmt = NULL;
 
        return __video_try_or_set_format(fimc, f, true, &inp_fmt, &out_fmt);
 }
@@ -1010,9 +1010,9 @@ static int __fimc_capture_set_format(struct fimc_dev *fimc,
 {
        struct fimc_vid_cap *vc = &fimc->vid_cap;
        struct fimc_ctx *ctx = vc->ctx;
-       struct v4l2_pix_format_mplane *pix = &f->fmt.pix_mp;
+       const struct v4l2_pix_format_mplane *pix = &f->fmt.pix_mp;
        struct fimc_frame *ff = &ctx->d_frame;
-       struct fimc_fmt *inp_fmt = NULL;
+       const struct fimc_fmt *inp_fmt = NULL;
        int ret, i;
 
        if (vb2_is_busy(&fimc->vid_cap.vbq))
@@ -1132,7 +1132,7 @@ static int fimc_pipeline_validate(struct fimc_dev *fimc)
 
                /* Don't call FIMC subdev operation to avoid nested locking */
                if (sd == &vc->subdev) {
-                       struct fimc_frame *ff = &vc->ctx->s_frame;
+                       const struct fimc_frame *ff = &vc->ctx->s_frame;
                        sink_fmt.format.width = ff->f_width;
                        sink_fmt.format.height = ff->f_height;
                        sink_fmt.format.code = ff->fmt ? ff->fmt->mbus_code : 0;
@@ -1158,7 +1158,7 @@ static int fimc_pipeline_validate(struct fimc_dev *fimc)
                if (sd == p->subdevs[IDX_SENSOR] &&
                    fimc_user_defined_mbus_fmt(src_fmt.format.code)) {
                        struct v4l2_plane_pix_format plane_fmt[FIMC_MAX_PLANES];
-                       struct fimc_frame *frame = &vc->ctx->d_frame;
+                       const struct fimc_frame *frame = &vc->ctx->d_frame;
                        unsigned int i;
 
                        ret = fimc_get_sensor_frame_desc(sd, plane_fmt,
@@ -1263,7 +1263,7 @@ static int fimc_cap_g_selection(struct file *file, void *fh,
 {
        struct fimc_dev *fimc = video_drvdata(file);
        struct fimc_ctx *ctx = fimc->vid_cap.ctx;
-       struct fimc_frame *f = &ctx->s_frame;
+       const struct fimc_frame *f = &ctx->s_frame;
 
        if (s->type != V4L2_BUF_TYPE_VIDEO_CAPTURE)
                return -EINVAL;
@@ -1460,7 +1460,7 @@ static int fimc_subdev_enum_mbus_code(struct v4l2_subdev *sd,
                                      struct v4l2_subdev_state *sd_state,
                                      struct v4l2_subdev_mbus_code_enum *code)
 {
-       struct fimc_fmt *fmt;
+       const struct fimc_fmt *fmt;
 
        fmt = fimc_find_format(NULL, NULL, FMT_FLAGS_CAM, code->index);
        if (!fmt)
@@ -1475,7 +1475,7 @@ static int fimc_subdev_get_fmt(struct v4l2_subdev *sd,
 {
        struct fimc_dev *fimc = v4l2_get_subdevdata(sd);
        struct fimc_ctx *ctx = fimc->vid_cap.ctx;
-       struct fimc_frame *ff = &ctx->s_frame;
+       const struct fimc_frame *ff = &ctx->s_frame;
        struct v4l2_mbus_framefmt *mf;
 
        if (fmt->which == V4L2_SUBDEV_FORMAT_TRY) {
@@ -1519,7 +1519,7 @@ static int fimc_subdev_set_fmt(struct v4l2_subdev *sd,
        struct fimc_vid_cap *vc = &fimc->vid_cap;
        struct fimc_ctx *ctx = vc->ctx;
        struct fimc_frame *ff;
-       struct fimc_fmt *ffmt;
+       const struct fimc_fmt *ffmt;
 
        dbg("pad%d: code: 0x%x, %dx%d",
            fmt->pad, mf->code, mf->width, mf->height);
@@ -1582,7 +1582,7 @@ static int fimc_subdev_get_selection(struct v4l2_subdev *sd,
 {
        struct fimc_dev *fimc = v4l2_get_subdevdata(sd);
        struct fimc_ctx *ctx = fimc->vid_cap.ctx;
-       struct fimc_frame *f = &ctx->s_frame;
+       const struct fimc_frame *f = &ctx->s_frame;
        struct v4l2_rect *r = &sel->r;
        struct v4l2_rect *try_sel;
 
@@ -1715,9 +1715,9 @@ static int fimc_register_capture_device(struct fimc_dev *fimc,
 {
        struct video_device *vfd = &fimc->vid_cap.ve.vdev;
        struct vb2_queue *q = &fimc->vid_cap.vbq;
-       struct fimc_ctx *ctx;
        struct fimc_vid_cap *vid_cap;
-       struct fimc_fmt *fmt;
+       const struct fimc_fmt *fmt;
+       struct fimc_ctx *ctx;
        int ret = -ENOMEM;
 
        ctx = kzalloc(sizeof(*ctx), GFP_KERNEL);
index 0be687b01ce5aebcfc5a4fe0efd882d20d7d263b..aae74b501a42d2d1591d65b1820f02696f409223 100644 (file)
 #include "fimc-reg.h"
 #include "media-dev.h"
 
-static char *fimc_clocks[MAX_FIMC_CLOCKS] = {
+static const char *fimc_clocks[MAX_FIMC_CLOCKS] = {
        "sclk_fimc", "fimc"
 };
 
-static struct fimc_fmt fimc_formats[] = {
+static const struct fimc_fmt fimc_formats[] = {
        {
                .fourcc         = V4L2_PIX_FMT_RGB565,
                .depth          = { 16 },
@@ -180,7 +180,7 @@ static struct fimc_fmt fimc_formats[] = {
        },
 };
 
-struct fimc_fmt *fimc_get_format(unsigned int index)
+const struct fimc_fmt *fimc_get_format(unsigned int index)
 {
        if (index >= ARRAY_SIZE(fimc_formats))
                return NULL;
@@ -228,8 +228,8 @@ int fimc_set_scaler_info(struct fimc_ctx *ctx)
        const struct fimc_variant *variant = ctx->fimc_dev->variant;
        struct device *dev = &ctx->fimc_dev->pdev->dev;
        struct fimc_scaler *sc = &ctx->scaler;
-       struct fimc_frame *s_frame = &ctx->s_frame;
-       struct fimc_frame *d_frame = &ctx->d_frame;
+       const struct fimc_frame *s_frame = &ctx->s_frame;
+       const struct fimc_frame *d_frame = &ctx->d_frame;
        int tx, ty, sx, sy;
        int ret;
 
@@ -326,7 +326,7 @@ out:
 
 /* The color format (colplanes, memplanes) must be already configured. */
 int fimc_prepare_addr(struct fimc_ctx *ctx, struct vb2_buffer *vb,
-                     struct fimc_frame *frame, struct fimc_addr *addr)
+                     const struct fimc_frame *frame, struct fimc_addr *addr)
 {
        int ret = 0;
        u32 pix_size;
@@ -670,7 +670,7 @@ void fimc_alpha_ctrl_update(struct fimc_ctx *ctx)
        v4l2_ctrl_unlock(ctrl);
 }
 
-void __fimc_get_format(struct fimc_frame *frame, struct v4l2_format *f)
+void __fimc_get_format(const struct fimc_frame *frame, struct v4l2_format *f)
 {
        struct v4l2_pix_format_mplane *pixm = &f->fmt.pix_mp;
        int i;
@@ -695,7 +695,7 @@ void __fimc_get_format(struct fimc_frame *frame, struct v4l2_format *f)
  * @height: requested pixel height
  * @pix: multi-plane format to adjust
  */
-void fimc_adjust_mplane_format(struct fimc_fmt *fmt, u32 width, u32 height,
+void fimc_adjust_mplane_format(const struct fimc_fmt *fmt, u32 width, u32 height,
                               struct v4l2_pix_format_mplane *pix)
 {
        u32 bytesperline = 0;
@@ -752,10 +752,11 @@ void fimc_adjust_mplane_format(struct fimc_fmt *fmt, u32 width, u32 height,
  * @mask: the color flags to match
  * @index: offset in the fimc_formats array, ignored if negative
  */
-struct fimc_fmt *fimc_find_format(const u32 *pixelformat, const u32 *mbus_code,
-                                 unsigned int mask, int index)
+const struct fimc_fmt *fimc_find_format(const u32 *pixelformat,
+                                       const u32 *mbus_code,
+                                       unsigned int mask, int index)
 {
-       struct fimc_fmt *fmt, *def_fmt = NULL;
+       const struct fimc_fmt *fmt, *def_fmt = NULL;
        unsigned int i;
        int id = 0;
 
index 2b0760add092937e135dedfc14e2813368de8baf..63385152a2ffe82e9871497733971cfc8a1a1238 100644 (file)
@@ -257,7 +257,7 @@ struct fimc_frame {
        unsigned int            bytesperline[VIDEO_MAX_PLANES];
        struct fimc_addr        addr;
        struct fimc_dma_offset  dma_offset;
-       struct fimc_fmt         *fmt;
+       const struct fimc_fmt   *fmt;
        u8                      alpha;
 };
 
@@ -515,7 +515,7 @@ static inline void set_frame_crop(struct fimc_frame *f,
        f->height = height;
 }
 
-static inline u32 fimc_get_format_depth(struct fimc_fmt *ff)
+static inline u32 fimc_get_format_depth(const struct fimc_fmt *ff)
 {
        u32 i, depth = 0;
 
@@ -557,7 +557,7 @@ static inline bool fimc_ctx_state_is_set(u32 mask, struct fimc_ctx *ctx)
        return ret;
 }
 
-static inline int tiled_fmt(struct fimc_fmt *fmt)
+static inline int tiled_fmt(const struct fimc_fmt *fmt)
 {
        return fmt->fourcc == V4L2_PIX_FMT_NV12MT;
 }
@@ -575,7 +575,7 @@ static inline bool fimc_user_defined_mbus_fmt(u32 code)
 }
 
 /* Return the alpha component bit mask */
-static inline int fimc_get_alpha_mask(struct fimc_fmt *fmt)
+static inline int fimc_get_alpha_mask(const struct fimc_fmt *fmt)
 {
        switch (fmt->color) {
        case FIMC_FMT_RGB444:   return 0x0f;
@@ -610,25 +610,24 @@ static inline struct fimc_frame *ctx_get_frame(struct fimc_ctx *ctx,
 
 /* -----------------------------------------------------*/
 /* fimc-core.c */
-int fimc_vidioc_enum_fmt_mplane(struct file *file, void *priv,
-                               struct v4l2_fmtdesc *f);
 int fimc_ctrls_create(struct fimc_ctx *ctx);
 void fimc_ctrls_delete(struct fimc_ctx *ctx);
 void fimc_ctrls_activate(struct fimc_ctx *ctx, bool active);
 void fimc_alpha_ctrl_update(struct fimc_ctx *ctx);
-void __fimc_get_format(struct fimc_frame *frame, struct v4l2_format *f);
-void fimc_adjust_mplane_format(struct fimc_fmt *fmt, u32 width, u32 height,
+void __fimc_get_format(const struct fimc_frame *frame, struct v4l2_format *f);
+void fimc_adjust_mplane_format(const struct fimc_fmt *fmt, u32 width, u32 height,
                               struct v4l2_pix_format_mplane *pix);
-struct fimc_fmt *fimc_find_format(const u32 *pixelformat, const u32 *mbus_code,
-                                 unsigned int mask, int index);
-struct fimc_fmt *fimc_get_format(unsigned int index);
+const struct fimc_fmt *fimc_find_format(const u32 *pixelformat,
+                                       const u32 *mbus_code,
+                                       unsigned int mask, int index);
+const struct fimc_fmt *fimc_get_format(unsigned int index);
 
 int fimc_check_scaler_ratio(struct fimc_ctx *ctx, int sw, int sh,
                            int dw, int dh, int rotation);
 int fimc_set_scaler_info(struct fimc_ctx *ctx);
 int fimc_prepare_config(struct fimc_ctx *ctx, u32 flags);
 int fimc_prepare_addr(struct fimc_ctx *ctx, struct vb2_buffer *vb,
-                     struct fimc_frame *frame, struct fimc_addr *addr);
+                     const struct fimc_frame *frame, struct fimc_addr *addr);
 void fimc_prepare_dma_offset(struct fimc_ctx *ctx, struct fimc_frame *f);
 void fimc_set_yuv_order(struct fimc_ctx *ctx);
 void fimc_capture_irq_handler(struct fimc_dev *fimc, int deq_buf);
index a08c87ef6e2d87c8f2adb59442d3f4d68df34403..39aab667910dec3836e07db3114ed70e90ef53f2 100644 (file)
@@ -175,7 +175,7 @@ static int fimc_is_parse_sensor_config(struct fimc_is *is, unsigned int index,
                return -EINVAL;
        }
 
-       ep = of_graph_get_next_endpoint(node, NULL);
+       ep = of_graph_get_endpoint_by_regs(node, 0, -1);
        if (!ep)
                return -ENXIO;
 
index 8fa26969c4111ecdb97b1174f579187df6520add..06c4352562b38fb7cce395b31698f3798b06bc77 100644 (file)
@@ -40,7 +40,7 @@ static int isp_video_capture_queue_setup(struct vb2_queue *vq,
                        unsigned int sizes[], struct device *alloc_devs[])
 {
        struct fimc_isp *isp = vb2_get_drv_priv(vq);
-       struct v4l2_pix_format_mplane *vid_fmt = &isp->video_capture.pixfmt;
+       const struct v4l2_pix_format_mplane *vid_fmt = &isp->video_capture.pixfmt;
        const struct fimc_fmt *fmt = isp->video_capture.format;
        unsigned int wh, i;
 
index 57996b4104b46f59938fc71138bb3bf269e50a25..2483277a6cb0e1f0bb754bc90318aba0938d42c6 100644 (file)
@@ -124,7 +124,7 @@ static const u32 src_pixfmt_map[8][3] = {
 };
 
 /* Set camera input pixel format and resolution */
-void flite_hw_set_source_format(struct fimc_lite *dev, struct flite_frame *f)
+void flite_hw_set_source_format(struct fimc_lite *dev, const struct flite_frame *f)
 {
        u32 pixelcode = f->fmt->mbus_code;
        int i = ARRAY_SIZE(src_pixfmt_map);
@@ -155,7 +155,7 @@ void flite_hw_set_source_format(struct fimc_lite *dev, struct flite_frame *f)
 }
 
 /* Set the camera host input window offsets (cropping) */
-void flite_hw_set_window_offset(struct fimc_lite *dev, struct flite_frame *f)
+void flite_hw_set_window_offset(struct fimc_lite *dev, const struct flite_frame *f)
 {
        u32 hoff2, voff2;
        u32 cfg;
@@ -186,7 +186,7 @@ static void flite_hw_set_camera_port(struct fimc_lite *dev, int id)
 
 /* Select serial or parallel bus, camera port (A,B) and set signals polarity */
 void flite_hw_set_camera_bus(struct fimc_lite *dev,
-                            struct fimc_source_info *si)
+                            const struct fimc_source_info *si)
 {
        u32 cfg = readl(dev->regs + FLITE_REG_CIGCTRL);
        unsigned int flags = si->flags;
@@ -226,7 +226,8 @@ static void flite_hw_set_pack12(struct fimc_lite *dev, int on)
        writel(cfg, dev->regs + FLITE_REG_CIODMAFMT);
 }
 
-static void flite_hw_set_out_order(struct fimc_lite *dev, struct flite_frame *f)
+static void flite_hw_set_out_order(struct fimc_lite *dev,
+                                  const struct flite_frame *f)
 {
        static const u32 pixcode[4][2] = {
                { MEDIA_BUS_FMT_YUYV8_2X8, FLITE_REG_CIODMAFMT_YCBYCR },
@@ -244,7 +245,7 @@ static void flite_hw_set_out_order(struct fimc_lite *dev, struct flite_frame *f)
        writel(cfg | pixcode[i][1], dev->regs + FLITE_REG_CIODMAFMT);
 }
 
-void flite_hw_set_dma_window(struct fimc_lite *dev, struct flite_frame *f)
+void flite_hw_set_dma_window(struct fimc_lite *dev, const struct flite_frame *f)
 {
        u32 cfg;
 
@@ -294,7 +295,7 @@ void flite_hw_mask_dma_buffer(struct fimc_lite *dev, u32 index)
 }
 
 /* Enable/disable output DMA, set output pixel size and offsets (composition) */
-void flite_hw_set_output_dma(struct fimc_lite *dev, struct flite_frame *f,
+void flite_hw_set_output_dma(struct fimc_lite *dev, const struct flite_frame *f,
                             bool enable)
 {
        u32 cfg = readl(dev->regs + FLITE_REG_CIGCTRL);
index c5656e902750c83dd089cbd0d1189b4cd7809aea..c5ec36dfb2f9f6baf42f631ed616afe0de6d3268 100644 (file)
@@ -133,15 +133,13 @@ void flite_hw_set_interrupt_mask(struct fimc_lite *dev);
 void flite_hw_capture_start(struct fimc_lite *dev);
 void flite_hw_capture_stop(struct fimc_lite *dev);
 void flite_hw_set_camera_bus(struct fimc_lite *dev,
-                            struct fimc_source_info *s_info);
-void flite_hw_set_camera_polarity(struct fimc_lite *dev,
-                                 struct fimc_source_info *cam);
-void flite_hw_set_window_offset(struct fimc_lite *dev, struct flite_frame *f);
-void flite_hw_set_source_format(struct fimc_lite *dev, struct flite_frame *f);
+                            const struct fimc_source_info *s_info);
+void flite_hw_set_window_offset(struct fimc_lite *dev, const struct flite_frame *f);
+void flite_hw_set_source_format(struct fimc_lite *dev, const struct flite_frame *f);
 
-void flite_hw_set_output_dma(struct fimc_lite *dev, struct flite_frame *f,
+void flite_hw_set_output_dma(struct fimc_lite *dev, const struct flite_frame *f,
                             bool enable);
-void flite_hw_set_dma_window(struct fimc_lite *dev, struct flite_frame *f);
+void flite_hw_set_dma_window(struct fimc_lite *dev, const struct flite_frame *f);
 void flite_hw_set_test_pattern(struct fimc_lite *dev, bool on);
 void flite_hw_dump_regs(struct fimc_lite *dev, const char *label);
 void flite_hw_set_dma_buffer(struct fimc_lite *dev, struct flite_buffer *buf);
index 7898c9bebb04679062226ccc234280cdd25ef0a6..d1d860fa3454de1f3413211e313782681104a5ac 100644 (file)
@@ -738,7 +738,7 @@ static int fimc_lite_try_fmt_mplane(struct file *file, void *fh,
 static int fimc_lite_s_fmt_mplane(struct file *file, void *priv,
                                  struct v4l2_format *f)
 {
-       struct v4l2_pix_format_mplane *pixm = &f->fmt.pix_mp;
+       const struct v4l2_pix_format_mplane *pixm = &f->fmt.pix_mp;
        struct fimc_lite *fimc = video_drvdata(file);
        struct flite_frame *frame = &fimc->out_frame;
        const struct fimc_fmt *fmt = NULL;
index ddf29e0b5b1cdcc6e9217d6fde4d22a51dec170f..2d96fb00a5c68c0d17bed0b4394400ef79bb09a1 100644 (file)
@@ -117,8 +117,6 @@ struct flite_buffer {
  * @ctrl_handler: v4l2 control handler
  * @test_pattern: test pattern controls
  * @index: FIMC-LITE platform device index
- * @pipeline: video capture pipeline data structure
- * @pipeline_ops: media pipeline ops for the video node driver
  * @slock: spinlock protecting this data structure and the hw registers
  * @lock: mutex serializing video device and the subdev operations
  * @clock: FIMC-LITE gate clock
@@ -134,7 +132,6 @@ struct flite_buffer {
  * @active_buf_q: the queue head of buffers scheduled in hardware
  * @vb_queue: vb2 buffers queue
  * @buf_index: helps to keep track of the DMA start address register index
- * @active_buf_count: number of video buffers scheduled in hardware
  * @frame_count: the captured frames counter
  * @reqbufs_count: the number of buffers requested with REQBUFS ioctl
  * @events: event info
index df8e2aa454d8fa920b3cfcc3d9fd213ef85f504a..199997eec1cc25bbc3872c960e323128fac430d2 100644 (file)
@@ -170,7 +170,7 @@ static int fimc_queue_setup(struct vb2_queue *vq,
                            unsigned int sizes[], struct device *alloc_devs[])
 {
        struct fimc_ctx *ctx = vb2_get_drv_priv(vq);
-       struct fimc_frame *f;
+       const struct fimc_frame *f;
        int i;
 
        f = ctx_get_frame(ctx, vq->type);
@@ -192,7 +192,7 @@ static int fimc_queue_setup(struct vb2_queue *vq,
 static int fimc_buf_prepare(struct vb2_buffer *vb)
 {
        struct fimc_ctx *ctx = vb2_get_drv_priv(vb->vb2_queue);
-       struct fimc_frame *frame;
+       const struct fimc_frame *frame;
        int i;
 
        frame = ctx_get_frame(ctx, vb->vb2_queue->type);
@@ -237,7 +237,7 @@ static int fimc_m2m_querycap(struct file *file, void *fh,
 static int fimc_m2m_enum_fmt(struct file *file, void *priv,
                             struct v4l2_fmtdesc *f)
 {
-       struct fimc_fmt *fmt;
+       const struct fimc_fmt *fmt;
 
        fmt = fimc_find_format(NULL, NULL, get_m2m_fmt_flags(f->type),
                               f->index);
@@ -252,7 +252,7 @@ static int fimc_m2m_g_fmt_mplane(struct file *file, void *fh,
                                 struct v4l2_format *f)
 {
        struct fimc_ctx *ctx = fh_to_ctx(fh);
-       struct fimc_frame *frame = ctx_get_frame(ctx, f->type);
+       const struct fimc_frame *frame = ctx_get_frame(ctx, f->type);
 
        if (IS_ERR(frame))
                return PTR_ERR(frame);
@@ -266,7 +266,7 @@ static int fimc_try_fmt_mplane(struct fimc_ctx *ctx, struct v4l2_format *f)
        struct fimc_dev *fimc = ctx->fimc_dev;
        const struct fimc_variant *variant = fimc->variant;
        struct v4l2_pix_format_mplane *pix = &f->fmt.pix_mp;
-       struct fimc_fmt *fmt;
+       const struct fimc_fmt *fmt;
        u32 max_w, mod_x, mod_y;
 
        if (!IS_M2M(f->type))
@@ -314,8 +314,9 @@ static int fimc_m2m_try_fmt_mplane(struct file *file, void *fh,
        return fimc_try_fmt_mplane(ctx, f);
 }
 
-static void __set_frame_format(struct fimc_frame *frame, struct fimc_fmt *fmt,
-                              struct v4l2_pix_format_mplane *pixm)
+static void __set_frame_format(struct fimc_frame *frame,
+                              const struct fimc_fmt *fmt,
+                              const struct v4l2_pix_format_mplane *pixm)
 {
        int i;
 
@@ -340,7 +341,7 @@ static int fimc_m2m_s_fmt_mplane(struct file *file, void *fh,
 {
        struct fimc_ctx *ctx = fh_to_ctx(fh);
        struct fimc_dev *fimc = ctx->fimc_dev;
-       struct fimc_fmt *fmt;
+       const struct fimc_fmt *fmt;
        struct vb2_queue *vq;
        struct fimc_frame *frame;
        int ret;
@@ -378,7 +379,7 @@ static int fimc_m2m_g_selection(struct file *file, void *fh,
                                struct v4l2_selection *s)
 {
        struct fimc_ctx *ctx = fh_to_ctx(fh);
-       struct fimc_frame *frame;
+       const struct fimc_frame *frame;
 
        frame = ctx_get_frame(ctx, s->type);
        if (IS_ERR(frame))
@@ -428,7 +429,7 @@ static int fimc_m2m_try_selection(struct fimc_ctx *ctx,
                                  struct v4l2_selection *s)
 {
        struct fimc_dev *fimc = ctx->fimc_dev;
-       struct fimc_frame *f;
+       const struct fimc_frame *f;
        u32 min_size, halign, depth = 0;
        int i;
 
@@ -588,7 +589,7 @@ static int fimc_m2m_set_default_format(struct fimc_ctx *ctx)
                        .sizeimage = 800 * 4 * 600,
                },
        };
-       struct fimc_fmt *fmt;
+       const struct fimc_fmt *fmt;
 
        fmt = fimc_find_format(&pixm.pixelformat, NULL, FMT_FLAGS_M2M, 0);
        if (!fmt)
index 95165a2cc7d1ceb8e67f36245066e1d79e4de546..b4ee39e471e71bfecbd05c8bf51f540de6ed75ce 100644 (file)
@@ -105,7 +105,7 @@ void fimc_hw_set_target_format(struct fimc_ctx *ctx)
 {
        u32 cfg;
        struct fimc_dev *dev = ctx->fimc_dev;
-       struct fimc_frame *frame = &ctx->d_frame;
+       const struct fimc_frame *frame = &ctx->d_frame;
 
        dbg("w= %d, h= %d color: %d", frame->width,
            frame->height, frame->fmt->color);
@@ -147,7 +147,7 @@ void fimc_hw_set_target_format(struct fimc_ctx *ctx)
 static void fimc_hw_set_out_dma_size(struct fimc_ctx *ctx)
 {
        struct fimc_dev *dev = ctx->fimc_dev;
-       struct fimc_frame *frame = &ctx->d_frame;
+       const struct fimc_frame *frame = &ctx->d_frame;
        u32 cfg;
 
        cfg = (frame->f_height << 16) | frame->f_width;
@@ -166,9 +166,9 @@ static void fimc_hw_set_out_dma_size(struct fimc_ctx *ctx)
 void fimc_hw_set_out_dma(struct fimc_ctx *ctx)
 {
        struct fimc_dev *dev = ctx->fimc_dev;
-       struct fimc_frame *frame = &ctx->d_frame;
-       struct fimc_dma_offset *offset = &frame->dma_offset;
-       struct fimc_fmt *fmt = frame->fmt;
+       const struct fimc_frame *frame = &ctx->d_frame;
+       const struct fimc_dma_offset *offset = &frame->dma_offset;
+       const struct fimc_fmt *fmt = frame->fmt;
        u32 cfg;
 
        /* Set the input dma offsets. */
@@ -248,8 +248,8 @@ static void fimc_hw_set_scaler(struct fimc_ctx *ctx)
 {
        struct fimc_dev *dev = ctx->fimc_dev;
        struct fimc_scaler *sc = &ctx->scaler;
-       struct fimc_frame *src_frame = &ctx->s_frame;
-       struct fimc_frame *dst_frame = &ctx->d_frame;
+       const struct fimc_frame *src_frame = &ctx->s_frame;
+       const struct fimc_frame *dst_frame = &ctx->d_frame;
 
        u32 cfg = readl(dev->regs + FIMC_REG_CISCCTRL);
 
@@ -388,7 +388,7 @@ void fimc_hw_set_effect(struct fimc_ctx *ctx)
 void fimc_hw_set_rgb_alpha(struct fimc_ctx *ctx)
 {
        struct fimc_dev *dev = ctx->fimc_dev;
-       struct fimc_frame *frame = &ctx->d_frame;
+       const struct fimc_frame *frame = &ctx->d_frame;
        u32 cfg;
 
        if (!(frame->fmt->flags & FMT_HAS_ALPHA))
@@ -403,7 +403,7 @@ void fimc_hw_set_rgb_alpha(struct fimc_ctx *ctx)
 static void fimc_hw_set_in_dma_size(struct fimc_ctx *ctx)
 {
        struct fimc_dev *dev = ctx->fimc_dev;
-       struct fimc_frame *frame = &ctx->s_frame;
+       const struct fimc_frame *frame = &ctx->s_frame;
        u32 cfg_o = 0;
        u32 cfg_r = 0;
 
@@ -420,8 +420,8 @@ static void fimc_hw_set_in_dma_size(struct fimc_ctx *ctx)
 void fimc_hw_set_in_dma(struct fimc_ctx *ctx)
 {
        struct fimc_dev *dev = ctx->fimc_dev;
-       struct fimc_frame *frame = &ctx->s_frame;
-       struct fimc_dma_offset *offset = &frame->dma_offset;
+       const struct fimc_frame *frame = &ctx->s_frame;
+       const struct fimc_dma_offset *offset = &frame->dma_offset;
        u32 cfg;
 
        /* Set the pixel offsets. */
@@ -526,7 +526,7 @@ void fimc_hw_set_output_path(struct fimc_ctx *ctx)
        writel(cfg, dev->regs + FIMC_REG_CISCCTRL);
 }
 
-void fimc_hw_set_input_addr(struct fimc_dev *dev, struct fimc_addr *addr)
+void fimc_hw_set_input_addr(struct fimc_dev *dev, const struct fimc_addr *addr)
 {
        u32 cfg = readl(dev->regs + FIMC_REG_CIREAL_ISIZE);
        cfg |= FIMC_REG_CIREAL_ISIZE_ADDR_CH_DIS;
@@ -541,7 +541,7 @@ void fimc_hw_set_input_addr(struct fimc_dev *dev, struct fimc_addr *addr)
 }
 
 void fimc_hw_set_output_addr(struct fimc_dev *dev,
-                            struct fimc_addr *addr, int index)
+                            const struct fimc_addr *addr, int index)
 {
        int i = (index == -1) ? 0 : index;
        do {
@@ -554,7 +554,7 @@ void fimc_hw_set_output_addr(struct fimc_dev *dev,
 }
 
 int fimc_hw_set_camera_polarity(struct fimc_dev *fimc,
-                               struct fimc_source_info *cam)
+                               const struct fimc_source_info *cam)
 {
        u32 cfg = readl(fimc->regs + FIMC_REG_CIGCTRL);
 
@@ -598,8 +598,8 @@ static const struct mbus_pixfmt_desc pix_desc[] = {
 int fimc_hw_set_camera_source(struct fimc_dev *fimc,
                              struct fimc_source_info *source)
 {
-       struct fimc_vid_cap *vc = &fimc->vid_cap;
-       struct fimc_frame *f = &vc->ctx->s_frame;
+       const struct fimc_vid_cap *vc = &fimc->vid_cap;
+       const struct fimc_frame *f = &vc->ctx->s_frame;
        u32 bus_width, cfg = 0;
        int i;
 
@@ -648,7 +648,7 @@ int fimc_hw_set_camera_source(struct fimc_dev *fimc,
        return 0;
 }
 
-void fimc_hw_set_camera_offset(struct fimc_dev *fimc, struct fimc_frame *f)
+void fimc_hw_set_camera_offset(struct fimc_dev *fimc, const struct fimc_frame *f)
 {
        u32 hoff2, voff2;
 
@@ -668,9 +668,9 @@ void fimc_hw_set_camera_offset(struct fimc_dev *fimc, struct fimc_frame *f)
 }
 
 int fimc_hw_set_camera_type(struct fimc_dev *fimc,
-                           struct fimc_source_info *source)
+                           const struct fimc_source_info *source)
 {
-       struct fimc_vid_cap *vid_cap = &fimc->vid_cap;
+       const struct fimc_vid_cap *vid_cap = &fimc->vid_cap;
        u32 csis_data_alignment = 32;
        u32 cfg, tmp;
 
index b9b33aa1f12fef9b6746db0a49bfac88a1ef1717..9714f4309655cfde5d1585882d0f34ea7d55d5bb 100644 (file)
@@ -302,16 +302,16 @@ void fimc_hw_set_rgb_alpha(struct fimc_ctx *ctx);
 void fimc_hw_set_in_dma(struct fimc_ctx *ctx);
 void fimc_hw_set_input_path(struct fimc_ctx *ctx);
 void fimc_hw_set_output_path(struct fimc_ctx *ctx);
-void fimc_hw_set_input_addr(struct fimc_dev *fimc, struct fimc_addr *addr);
-void fimc_hw_set_output_addr(struct fimc_dev *fimc, struct fimc_addr *addr,
+void fimc_hw_set_input_addr(struct fimc_dev *fimc, const struct fimc_addr *addr);
+void fimc_hw_set_output_addr(struct fimc_dev *fimc, const struct fimc_addr *addr,
                             int index);
 int fimc_hw_set_camera_source(struct fimc_dev *fimc,
                              struct fimc_source_info *cam);
-void fimc_hw_set_camera_offset(struct fimc_dev *fimc, struct fimc_frame *f);
+void fimc_hw_set_camera_offset(struct fimc_dev *fimc, const struct fimc_frame *f);
 int fimc_hw_set_camera_polarity(struct fimc_dev *fimc,
-                               struct fimc_source_info *cam);
+                               const struct fimc_source_info *cam);
 int fimc_hw_set_camera_type(struct fimc_dev *fimc,
-                           struct fimc_source_info *cam);
+                           const struct fimc_source_info *cam);
 void fimc_hw_clear_irq(struct fimc_dev *dev);
 void fimc_hw_enable_scaler(struct fimc_dev *dev, bool on);
 void fimc_hw_activate_input_dma(struct fimc_dev *dev, bool on);
index aae8a8b2c0f4c0a17ba1132ce56334954d9a9875..4b9b20ba35041c58d670caec11848f504e454f71 100644 (file)
@@ -727,7 +727,8 @@ static int s5pcsis_parse_dt(struct platform_device *pdev,
                                 &state->max_num_lanes))
                return -EINVAL;
 
-       node = of_graph_get_next_endpoint(node, NULL);
+       /* from port@3 or port@4 */
+       node = of_graph_get_endpoint_by_regs(node, -1, -1);
        if (!node) {
                dev_err(&pdev->dev, "No port node at %pOF\n",
                                pdev->dev.of_node);
index fbb047eadf5afffe4c8db70660a6fc630dc56d57..50451984d59f752c20143d5aa8affda341d20570 100644 (file)
@@ -183,7 +183,7 @@ static void s5p_mfc_watchdog_worker(struct work_struct *work)
                mfc_err("Error: some instance may be closing/opening\n");
        spin_lock_irqsave(&dev->irqlock, flags);
 
-       s5p_mfc_clock_off();
+       s5p_mfc_clock_off(dev);
 
        for (i = 0; i < MFC_NUM_CONTEXTS; i++) {
                ctx = dev->ctx[i];
@@ -211,9 +211,9 @@ static void s5p_mfc_watchdog_worker(struct work_struct *work)
                        mfc_err("Failed to reload FW\n");
                        goto unlock;
                }
-               s5p_mfc_clock_on();
+               s5p_mfc_clock_on(dev);
                ret = s5p_mfc_init_hw(dev);
-               s5p_mfc_clock_off();
+               s5p_mfc_clock_off(dev);
                if (ret)
                        mfc_err("Failed to reinit FW\n");
        }
@@ -393,7 +393,7 @@ static void s5p_mfc_handle_frame(struct s5p_mfc_ctx *ctx,
                s5p_mfc_hw_call(dev->mfc_ops, clear_int_flags, dev);
                wake_up_ctx(ctx, reason, err);
                WARN_ON(test_and_clear_bit(0, &dev->hw_lock) == 0);
-               s5p_mfc_clock_off();
+               s5p_mfc_clock_off(dev);
                s5p_mfc_hw_call(dev->mfc_ops, try_run, dev);
                return;
        }
@@ -465,7 +465,7 @@ leave_handle_frame:
        s5p_mfc_hw_call(dev->mfc_ops, clear_int_flags, dev);
        wake_up_ctx(ctx, reason, err);
        WARN_ON(test_and_clear_bit(0, &dev->hw_lock) == 0);
-       s5p_mfc_clock_off();
+       s5p_mfc_clock_off(dev);
        /* if suspending, wake up device and do not try_run again*/
        if (test_bit(0, &dev->enter_suspend))
                wake_up_dev(dev, reason, err);
@@ -509,7 +509,7 @@ static void s5p_mfc_handle_error(struct s5p_mfc_dev *dev,
        }
        WARN_ON(test_and_clear_bit(0, &dev->hw_lock) == 0);
        s5p_mfc_hw_call(dev->mfc_ops, clear_int_flags, dev);
-       s5p_mfc_clock_off();
+       s5p_mfc_clock_off(dev);
        wake_up_dev(dev, reason, err);
 }
 
@@ -565,7 +565,7 @@ static void s5p_mfc_handle_seq_done(struct s5p_mfc_ctx *ctx,
        s5p_mfc_hw_call(dev->mfc_ops, clear_int_flags, dev);
        clear_work_bit(ctx);
        WARN_ON(test_and_clear_bit(0, &dev->hw_lock) == 0);
-       s5p_mfc_clock_off();
+       s5p_mfc_clock_off(dev);
        s5p_mfc_hw_call(dev->mfc_ops, try_run, dev);
        wake_up_ctx(ctx, reason, err);
 }
@@ -601,7 +601,7 @@ static void s5p_mfc_handle_init_buffers(struct s5p_mfc_ctx *ctx,
                }
                WARN_ON(test_and_clear_bit(0, &dev->hw_lock) == 0);
 
-               s5p_mfc_clock_off();
+               s5p_mfc_clock_off(dev);
 
                wake_up(&ctx->queue);
                if (ctx->src_queue_cnt >= 1 && ctx->dst_queue_cnt >= 1)
@@ -610,7 +610,7 @@ static void s5p_mfc_handle_init_buffers(struct s5p_mfc_ctx *ctx,
        } else {
                WARN_ON(test_and_clear_bit(0, &dev->hw_lock) == 0);
 
-               s5p_mfc_clock_off();
+               s5p_mfc_clock_off(dev);
 
                wake_up(&ctx->queue);
        }
@@ -638,7 +638,7 @@ static void s5p_mfc_handle_stream_complete(struct s5p_mfc_ctx *ctx)
 
        WARN_ON(test_and_clear_bit(0, &dev->hw_lock) == 0);
 
-       s5p_mfc_clock_off();
+       s5p_mfc_clock_off(dev);
        wake_up(&ctx->queue);
        s5p_mfc_hw_call(dev->mfc_ops, try_run, dev);
 }
@@ -690,7 +690,7 @@ static irqreturn_t s5p_mfc_irq(int irq, void *priv)
                        }
                        s5p_mfc_hw_call(dev->mfc_ops, clear_int_flags, dev);
                        WARN_ON(test_and_clear_bit(0, &dev->hw_lock) == 0);
-                       s5p_mfc_clock_off();
+                       s5p_mfc_clock_off(dev);
                        wake_up_ctx(ctx, reason, err);
                        s5p_mfc_hw_call(dev->mfc_ops, try_run, dev);
                } else {
@@ -754,7 +754,7 @@ irq_cleanup_hw:
        if (test_and_clear_bit(0, &dev->hw_lock) == 0)
                mfc_err("Failed to unlock hw\n");
 
-       s5p_mfc_clock_off();
+       s5p_mfc_clock_off(dev);
        clear_work_bit(ctx);
        wake_up(&ctx->queue);
 
@@ -841,20 +841,20 @@ static int s5p_mfc_open(struct file *file)
                dev->watchdog_timer.expires = jiffies +
                                        msecs_to_jiffies(MFC_WATCHDOG_INTERVAL);
                add_timer(&dev->watchdog_timer);
-               ret = s5p_mfc_power_on();
+               ret = s5p_mfc_power_on(dev);
                if (ret < 0) {
                        mfc_err("power on failed\n");
                        goto err_pwr_enable;
                }
-               s5p_mfc_clock_on();
+               s5p_mfc_clock_on(dev);
                ret = s5p_mfc_load_firmware(dev);
                if (ret) {
-                       s5p_mfc_clock_off();
+                       s5p_mfc_clock_off(dev);
                        goto err_load_fw;
                }
                /* Init the FW */
                ret = s5p_mfc_init_hw(dev);
-               s5p_mfc_clock_off();
+               s5p_mfc_clock_off(dev);
                if (ret)
                        goto err_init_hw;
        }
@@ -931,7 +931,7 @@ err_init_hw:
 err_load_fw:
 err_pwr_enable:
        if (dev->num_inst == 1) {
-               if (s5p_mfc_power_off() < 0)
+               if (s5p_mfc_power_off(dev) < 0)
                        mfc_err("power off failed\n");
                del_timer_sync(&dev->watchdog_timer);
        }
@@ -963,7 +963,7 @@ static int s5p_mfc_release(struct file *file)
        vb2_queue_release(&ctx->vq_src);
        vb2_queue_release(&ctx->vq_dst);
        if (dev) {
-               s5p_mfc_clock_on();
+               s5p_mfc_clock_on(dev);
 
                /* Mark context as idle */
                clear_work_bit_irqsave(ctx);
@@ -983,12 +983,12 @@ static int s5p_mfc_release(struct file *file)
                        mfc_debug(2, "Last instance\n");
                        s5p_mfc_deinit_hw(dev);
                        del_timer_sync(&dev->watchdog_timer);
-                       s5p_mfc_clock_off();
-                       if (s5p_mfc_power_off() < 0)
+                       s5p_mfc_clock_off(dev);
+                       if (s5p_mfc_power_off(dev) < 0)
                                mfc_err("Power off failed\n");
                } else {
                        mfc_debug(2, "Shutting down clock\n");
-                       s5p_mfc_clock_off();
+                       s5p_mfc_clock_off(dev);
                }
        }
        if (dev)
@@ -1520,20 +1520,20 @@ static const struct dev_pm_ops s5p_mfc_pm_ops = {
        SET_SYSTEM_SLEEP_PM_OPS(s5p_mfc_suspend, s5p_mfc_resume)
 };
 
-static struct s5p_mfc_buf_size_v5 mfc_buf_size_v5 = {
+static const struct s5p_mfc_buf_size_v5 mfc_buf_size_v5 = {
        .h264_ctx       = MFC_H264_CTX_BUF_SIZE,
        .non_h264_ctx   = MFC_CTX_BUF_SIZE,
        .dsc            = DESC_BUF_SIZE,
        .shm            = SHARED_BUF_SIZE,
 };
 
-static struct s5p_mfc_buf_size buf_size_v5 = {
+static const struct s5p_mfc_buf_size buf_size_v5 = {
        .fw     = MAX_FW_SIZE,
        .cpb    = MAX_CPB_SIZE,
        .priv   = &mfc_buf_size_v5,
 };
 
-static struct s5p_mfc_variant mfc_drvdata_v5 = {
+static const struct s5p_mfc_variant mfc_drvdata_v5 = {
        .version        = MFC_VERSION,
        .version_bit    = MFC_V5_BIT,
        .port_num       = MFC_NUM_PORTS,
@@ -1544,7 +1544,7 @@ static struct s5p_mfc_variant mfc_drvdata_v5 = {
        .use_clock_gating = true,
 };
 
-static struct s5p_mfc_buf_size_v6 mfc_buf_size_v6 = {
+static const struct s5p_mfc_buf_size_v6 mfc_buf_size_v6 = {
        .dev_ctx        = MFC_CTX_BUF_SIZE_V6,
        .h264_dec_ctx   = MFC_H264_DEC_CTX_BUF_SIZE_V6,
        .other_dec_ctx  = MFC_OTHER_DEC_CTX_BUF_SIZE_V6,
@@ -1552,13 +1552,13 @@ static struct s5p_mfc_buf_size_v6 mfc_buf_size_v6 = {
        .other_enc_ctx  = MFC_OTHER_ENC_CTX_BUF_SIZE_V6,
 };
 
-static struct s5p_mfc_buf_size buf_size_v6 = {
+static const struct s5p_mfc_buf_size buf_size_v6 = {
        .fw     = MAX_FW_SIZE_V6,
        .cpb    = MAX_CPB_SIZE_V6,
        .priv   = &mfc_buf_size_v6,
 };
 
-static struct s5p_mfc_variant mfc_drvdata_v6 = {
+static const struct s5p_mfc_variant mfc_drvdata_v6 = {
        .version        = MFC_VERSION_V6,
        .version_bit    = MFC_V6_BIT,
        .port_num       = MFC_NUM_PORTS_V6,
@@ -1573,7 +1573,7 @@ static struct s5p_mfc_variant mfc_drvdata_v6 = {
        .num_clocks     = 1,
 };
 
-static struct s5p_mfc_buf_size_v6 mfc_buf_size_v7 = {
+static const struct s5p_mfc_buf_size_v6 mfc_buf_size_v7 = {
        .dev_ctx        = MFC_CTX_BUF_SIZE_V7,
        .h264_dec_ctx   = MFC_H264_DEC_CTX_BUF_SIZE_V7,
        .other_dec_ctx  = MFC_OTHER_DEC_CTX_BUF_SIZE_V7,
@@ -1581,13 +1581,13 @@ static struct s5p_mfc_buf_size_v6 mfc_buf_size_v7 = {
        .other_enc_ctx  = MFC_OTHER_ENC_CTX_BUF_SIZE_V7,
 };
 
-static struct s5p_mfc_buf_size buf_size_v7 = {
+static const struct s5p_mfc_buf_size buf_size_v7 = {
        .fw     = MAX_FW_SIZE_V7,
        .cpb    = MAX_CPB_SIZE_V7,
        .priv   = &mfc_buf_size_v7,
 };
 
-static struct s5p_mfc_variant mfc_drvdata_v7 = {
+static const struct s5p_mfc_variant mfc_drvdata_v7 = {
        .version        = MFC_VERSION_V7,
        .version_bit    = MFC_V7_BIT,
        .port_num       = MFC_NUM_PORTS_V7,
@@ -1597,7 +1597,7 @@ static struct s5p_mfc_variant mfc_drvdata_v7 = {
        .num_clocks     = 1,
 };
 
-static struct s5p_mfc_variant mfc_drvdata_v7_3250 = {
+static const struct s5p_mfc_variant mfc_drvdata_v7_3250 = {
        .version        = MFC_VERSION_V7,
        .version_bit    = MFC_V7_BIT,
        .port_num       = MFC_NUM_PORTS_V7,
@@ -1607,7 +1607,7 @@ static struct s5p_mfc_variant mfc_drvdata_v7_3250 = {
        .num_clocks     = 2,
 };
 
-static struct s5p_mfc_buf_size_v6 mfc_buf_size_v8 = {
+static const struct s5p_mfc_buf_size_v6 mfc_buf_size_v8 = {
        .dev_ctx        = MFC_CTX_BUF_SIZE_V8,
        .h264_dec_ctx   = MFC_H264_DEC_CTX_BUF_SIZE_V8,
        .other_dec_ctx  = MFC_OTHER_DEC_CTX_BUF_SIZE_V8,
@@ -1615,13 +1615,13 @@ static struct s5p_mfc_buf_size_v6 mfc_buf_size_v8 = {
        .other_enc_ctx  = MFC_OTHER_ENC_CTX_BUF_SIZE_V8,
 };
 
-static struct s5p_mfc_buf_size buf_size_v8 = {
+static const struct s5p_mfc_buf_size buf_size_v8 = {
        .fw     = MAX_FW_SIZE_V8,
        .cpb    = MAX_CPB_SIZE_V8,
        .priv   = &mfc_buf_size_v8,
 };
 
-static struct s5p_mfc_variant mfc_drvdata_v8 = {
+static const struct s5p_mfc_variant mfc_drvdata_v8 = {
        .version        = MFC_VERSION_V8,
        .version_bit    = MFC_V8_BIT,
        .port_num       = MFC_NUM_PORTS_V8,
@@ -1631,7 +1631,7 @@ static struct s5p_mfc_variant mfc_drvdata_v8 = {
        .num_clocks     = 1,
 };
 
-static struct s5p_mfc_variant mfc_drvdata_v8_5433 = {
+static const struct s5p_mfc_variant mfc_drvdata_v8_5433 = {
        .version        = MFC_VERSION_V8,
        .version_bit    = MFC_V8_BIT,
        .port_num       = MFC_NUM_PORTS_V8,
@@ -1641,7 +1641,7 @@ static struct s5p_mfc_variant mfc_drvdata_v8_5433 = {
        .num_clocks     = 3,
 };
 
-static struct s5p_mfc_buf_size_v6 mfc_buf_size_v10 = {
+static const struct s5p_mfc_buf_size_v6 mfc_buf_size_v10 = {
        .dev_ctx        = MFC_CTX_BUF_SIZE_V10,
        .h264_dec_ctx   = MFC_H264_DEC_CTX_BUF_SIZE_V10,
        .other_dec_ctx  = MFC_OTHER_DEC_CTX_BUF_SIZE_V10,
@@ -1650,13 +1650,13 @@ static struct s5p_mfc_buf_size_v6 mfc_buf_size_v10 = {
        .other_enc_ctx  = MFC_OTHER_ENC_CTX_BUF_SIZE_V10,
 };
 
-static struct s5p_mfc_buf_size buf_size_v10 = {
+static const struct s5p_mfc_buf_size buf_size_v10 = {
        .fw     = MAX_FW_SIZE_V10,
        .cpb    = MAX_CPB_SIZE_V10,
        .priv   = &mfc_buf_size_v10,
 };
 
-static struct s5p_mfc_variant mfc_drvdata_v10 = {
+static const struct s5p_mfc_variant mfc_drvdata_v10 = {
        .version        = MFC_VERSION_V10,
        .version_bit    = MFC_V10_BIT,
        .port_num       = MFC_NUM_PORTS_V10,
index 774c573dc075b4e386ac85aab865ef06ea563ac0..196d8c99647b283ec7685e76263522e8db91f96b 100644 (file)
 #include "s5p_mfc_cmd_v5.h"
 #include "s5p_mfc_cmd_v6.h"
 
-static struct s5p_mfc_hw_cmds *s5p_mfc_cmds;
-
 void s5p_mfc_init_hw_cmds(struct s5p_mfc_dev *dev)
 {
        if (IS_MFCV6_PLUS(dev))
-               s5p_mfc_cmds = s5p_mfc_init_hw_cmds_v6();
+               dev->mfc_cmds = s5p_mfc_init_hw_cmds_v6();
        else
-               s5p_mfc_cmds = s5p_mfc_init_hw_cmds_v5();
-
-       dev->mfc_cmds = s5p_mfc_cmds;
+               dev->mfc_cmds = s5p_mfc_init_hw_cmds_v5();
 }
index 945d12fdceb7d5b4dde07a3bdee30586b7efab7f..172c5a63b58ea2338d529283d4504e318f7ea7f5 100644 (file)
@@ -19,7 +19,7 @@ struct s5p_mfc_cmd_args {
 
 struct s5p_mfc_hw_cmds {
        int (*cmd_host2risc)(struct s5p_mfc_dev *dev, int cmd,
-                               struct s5p_mfc_cmd_args *args);
+                            const struct s5p_mfc_cmd_args *args);
        int (*sys_init_cmd)(struct s5p_mfc_dev *dev);
        int (*sleep_cmd)(struct s5p_mfc_dev *dev);
        int (*wakeup_cmd)(struct s5p_mfc_dev *dev);
index 327e54e7061140ee0ce94221176a2f5c2d64c6fa..82ee6d300c738f18448e726448e0ee0f383d534e 100644 (file)
@@ -14,7 +14,7 @@
 
 /* This function is used to send a command to the MFC */
 static int s5p_mfc_cmd_host2risc_v5(struct s5p_mfc_dev *dev, int cmd,
-                               struct s5p_mfc_cmd_args *args)
+                                   const struct s5p_mfc_cmd_args *args)
 {
        int cur_cmd;
        unsigned long timeout;
@@ -148,7 +148,7 @@ static int s5p_mfc_close_inst_cmd_v5(struct s5p_mfc_ctx *ctx)
 }
 
 /* Initialize cmd function pointers for MFC v5 */
-static struct s5p_mfc_hw_cmds s5p_mfc_cmds_v5 = {
+static const struct s5p_mfc_hw_cmds s5p_mfc_cmds_v5 = {
        .cmd_host2risc = s5p_mfc_cmd_host2risc_v5,
        .sys_init_cmd = s5p_mfc_sys_init_cmd_v5,
        .sleep_cmd = s5p_mfc_sleep_cmd_v5,
@@ -157,7 +157,7 @@ static struct s5p_mfc_hw_cmds s5p_mfc_cmds_v5 = {
        .close_inst_cmd = s5p_mfc_close_inst_cmd_v5,
 };
 
-struct s5p_mfc_hw_cmds *s5p_mfc_init_hw_cmds_v5(void)
+const struct s5p_mfc_hw_cmds *s5p_mfc_init_hw_cmds_v5(void)
 {
        return &s5p_mfc_cmds_v5;
 }
index 6eafa514aebca7c7fe73740097b2ca2f17d82b78..c626376053c45990b06fac5d3dedbfcad06cc86c 100644 (file)
@@ -11,6 +11,6 @@
 
 #include "s5p_mfc_common.h"
 
-struct s5p_mfc_hw_cmds *s5p_mfc_init_hw_cmds_v5(void);
+const struct s5p_mfc_hw_cmds *s5p_mfc_init_hw_cmds_v5(void);
 
 #endif /* S5P_MFC_CMD_H_ */
index f8588e52dfc823a52fbfc3a50b8ec97053d0ea19..47bc3014b5d8b874da5b3fa90b4163284c047a86 100644 (file)
@@ -15,7 +15,7 @@
 #include "s5p_mfc_cmd_v6.h"
 
 static int s5p_mfc_cmd_host2risc_v6(struct s5p_mfc_dev *dev, int cmd,
-                               struct s5p_mfc_cmd_args *args)
+                                   const struct s5p_mfc_cmd_args *args)
 {
        mfc_debug(2, "Issue the command: %d\n", cmd);
 
@@ -32,7 +32,7 @@ static int s5p_mfc_cmd_host2risc_v6(struct s5p_mfc_dev *dev, int cmd,
 static int s5p_mfc_sys_init_cmd_v6(struct s5p_mfc_dev *dev)
 {
        struct s5p_mfc_cmd_args h2r_args;
-       struct s5p_mfc_buf_size_v6 *buf_size = dev->variant->buf_size->priv;
+       const struct s5p_mfc_buf_size_v6 *buf_size = dev->variant->buf_size->priv;
        int ret;
 
        ret = s5p_mfc_hw_call(dev->mfc_ops, alloc_dev_context_buffer, dev);
@@ -154,7 +154,7 @@ static int s5p_mfc_close_inst_cmd_v6(struct s5p_mfc_ctx *ctx)
 }
 
 /* Initialize cmd function pointers for MFC v6 */
-static struct s5p_mfc_hw_cmds s5p_mfc_cmds_v6 = {
+static const struct s5p_mfc_hw_cmds s5p_mfc_cmds_v6 = {
        .cmd_host2risc = s5p_mfc_cmd_host2risc_v6,
        .sys_init_cmd = s5p_mfc_sys_init_cmd_v6,
        .sleep_cmd = s5p_mfc_sleep_cmd_v6,
@@ -163,7 +163,7 @@ static struct s5p_mfc_hw_cmds s5p_mfc_cmds_v6 = {
        .close_inst_cmd = s5p_mfc_close_inst_cmd_v6,
 };
 
-struct s5p_mfc_hw_cmds *s5p_mfc_init_hw_cmds_v6(void)
+const struct s5p_mfc_hw_cmds *s5p_mfc_init_hw_cmds_v6(void)
 {
        return &s5p_mfc_cmds_v6;
 }
index 9dc44460cc38d95f2e11d1d76f8377a478ba028a..29083436f517333cf4eeee58ee013f6e665f03a2 100644 (file)
@@ -11,6 +11,6 @@
 
 #include "s5p_mfc_common.h"
 
-struct s5p_mfc_hw_cmds *s5p_mfc_init_hw_cmds_v6(void);
+const struct s5p_mfc_hw_cmds *s5p_mfc_init_hw_cmds_v6(void);
 
 #endif /* S5P_MFC_CMD_H_ */
index 59450b324f7d0fa75a58a45bcc2c75482c19afc1..3cc2a4f5c40a61b086bd7383a84c7c9976aae85b 100644 (file)
@@ -221,15 +221,15 @@ struct s5p_mfc_buf_size_v6 {
 struct s5p_mfc_buf_size {
        unsigned int fw;
        unsigned int cpb;
-       void *priv;
+       const void *priv;
 };
 
 struct s5p_mfc_variant {
        unsigned int version;
        unsigned int port_num;
        u32 version_bit;
-       struct s5p_mfc_buf_size *buf_size;
-       char    *fw_name[MFC_FW_MAX_VERSIONS];
+       const struct s5p_mfc_buf_size *buf_size;
+       const char      *fw_name[MFC_FW_MAX_VERSIONS];
        const char      *clk_names[MFC_MAX_CLOCKS];
        int             num_clocks;
        bool            use_clock_gating;
@@ -340,8 +340,8 @@ struct s5p_mfc_dev {
 
        struct s5p_mfc_priv_buf ctx_buf;
        int warn_start;
-       struct s5p_mfc_hw_ops *mfc_ops;
-       struct s5p_mfc_hw_cmds *mfc_cmds;
+       const struct s5p_mfc_hw_ops *mfc_ops;
+       const struct s5p_mfc_hw_cmds *mfc_cmds;
        const struct s5p_mfc_regs *mfc_regs;
        enum s5p_mfc_fw_ver fw_ver;
        bool fw_get_done;
@@ -612,7 +612,6 @@ struct s5p_mfc_codec_ops {
  * @chroma_dpb_size:   dpb buffer size for chroma
  * @me_buffer_size:    size of the motion estimation buffer
  * @tmv_buffer_size:   size of temporal predictor motion vector buffer
- * @frame_type:                used to force the type of the next encoded frame
  * @ref_queue:         list of the reference buffers for encoding
  * @force_frame_type:  encoder's frame type forcing control
  * @ref_queue_cnt:     number of the buffers in the reference list
@@ -639,8 +638,8 @@ struct s5p_mfc_ctx {
        unsigned int int_err;
        wait_queue_head_t queue;
 
-       struct s5p_mfc_fmt *src_fmt;
-       struct s5p_mfc_fmt *dst_fmt;
+       const struct s5p_mfc_fmt *src_fmt;
+       const struct s5p_mfc_fmt *dst_fmt;
 
        struct vb2_queue vq_src;
        struct vb2_queue vq_dst;
index 503487f34a800fc0ea6e535be3f99aeb493926f5..625d77b2be0ffe45c6fe81e6761cc992d8f02d91 100644 (file)
@@ -221,7 +221,7 @@ int s5p_mfc_init_hw(struct s5p_mfc_dev *dev)
 
        /* 0. MFC reset */
        mfc_debug(2, "MFC reset..\n");
-       s5p_mfc_clock_on();
+       s5p_mfc_clock_on(dev);
        dev->risc_on = 0;
        ret = s5p_mfc_reset(dev);
        if (ret) {
@@ -249,7 +249,7 @@ int s5p_mfc_init_hw(struct s5p_mfc_dev *dev)
        if (s5p_mfc_wait_for_done_dev(dev, S5P_MFC_R2H_CMD_FW_STATUS_RET)) {
                mfc_err("Failed to load firmware\n");
                s5p_mfc_reset(dev);
-               s5p_mfc_clock_off();
+               s5p_mfc_clock_off(dev);
                return -EIO;
        }
        s5p_mfc_clean_dev_int_flags(dev);
@@ -258,14 +258,14 @@ int s5p_mfc_init_hw(struct s5p_mfc_dev *dev)
        if (ret) {
                mfc_err("Failed to send command to MFC - timeout\n");
                s5p_mfc_reset(dev);
-               s5p_mfc_clock_off();
+               s5p_mfc_clock_off(dev);
                return ret;
        }
        mfc_debug(2, "Ok, now will wait for completion of hardware init\n");
        if (s5p_mfc_wait_for_done_dev(dev, S5P_MFC_R2H_CMD_SYS_INIT_RET)) {
                mfc_err("Failed to init hardware\n");
                s5p_mfc_reset(dev);
-               s5p_mfc_clock_off();
+               s5p_mfc_clock_off(dev);
                return -EIO;
        }
        dev->int_cond = 0;
@@ -275,7 +275,7 @@ int s5p_mfc_init_hw(struct s5p_mfc_dev *dev)
                mfc_err("Failed to init firmware - error: %d int: %d\n",
                                                dev->int_err, dev->int_type);
                s5p_mfc_reset(dev);
-               s5p_mfc_clock_off();
+               s5p_mfc_clock_off(dev);
                return -EIO;
        }
        if (IS_MFCV6_PLUS(dev))
@@ -285,7 +285,7 @@ int s5p_mfc_init_hw(struct s5p_mfc_dev *dev)
 
        mfc_debug(2, "MFC F/W version : %02xyy, %02xmm, %02xdd\n",
                (ver >> 16) & 0xFF, (ver >> 8) & 0xFF, ver & 0xFF);
-       s5p_mfc_clock_off();
+       s5p_mfc_clock_off(dev);
        mfc_debug_leave();
        return 0;
 }
@@ -294,12 +294,12 @@ int s5p_mfc_init_hw(struct s5p_mfc_dev *dev)
 /* Deinitialize hardware */
 void s5p_mfc_deinit_hw(struct s5p_mfc_dev *dev)
 {
-       s5p_mfc_clock_on();
+       s5p_mfc_clock_on(dev);
 
        s5p_mfc_reset(dev);
        s5p_mfc_hw_call(dev->mfc_ops, release_dev_context_buffer, dev);
 
-       s5p_mfc_clock_off();
+       s5p_mfc_clock_off(dev);
 }
 
 int s5p_mfc_sleep(struct s5p_mfc_dev *dev)
@@ -307,7 +307,7 @@ int s5p_mfc_sleep(struct s5p_mfc_dev *dev)
        int ret;
 
        mfc_debug_enter();
-       s5p_mfc_clock_on();
+       s5p_mfc_clock_on(dev);
        s5p_mfc_clean_dev_int_flags(dev);
        ret = s5p_mfc_hw_call(dev->mfc_cmds, sleep_cmd, dev);
        if (ret) {
@@ -318,7 +318,7 @@ int s5p_mfc_sleep(struct s5p_mfc_dev *dev)
                mfc_err("Failed to sleep\n");
                return -EIO;
        }
-       s5p_mfc_clock_off();
+       s5p_mfc_clock_off(dev);
        dev->int_cond = 0;
        if (dev->int_err != 0 || dev->int_type !=
                                                S5P_MFC_R2H_CMD_SLEEP_RET) {
@@ -390,12 +390,12 @@ int s5p_mfc_wakeup(struct s5p_mfc_dev *dev)
        mfc_debug_enter();
        /* 0. MFC reset */
        mfc_debug(2, "MFC reset..\n");
-       s5p_mfc_clock_on();
+       s5p_mfc_clock_on(dev);
        dev->risc_on = 0;
        ret = s5p_mfc_reset(dev);
        if (ret) {
                mfc_err("Failed to reset MFC - timeout\n");
-               s5p_mfc_clock_off();
+               s5p_mfc_clock_off(dev);
                return ret;
        }
        mfc_debug(2, "Done MFC reset..\n");
@@ -410,7 +410,7 @@ int s5p_mfc_wakeup(struct s5p_mfc_dev *dev)
        else
                ret = s5p_mfc_wait_wakeup(dev);
 
-       s5p_mfc_clock_off();
+       s5p_mfc_clock_off(dev);
        if (ret)
                return ret;
 
index 3957f28d4547c097c8a78aee35d7bf6a4ec4db93..91e102d4ec4e2ad9f432c662561e565de6cdc39e 100644 (file)
@@ -27,7 +27,7 @@
 #include "s5p_mfc_opr.h"
 #include "s5p_mfc_pm.h"
 
-static struct s5p_mfc_fmt formats[] = {
+static const struct s5p_mfc_fmt formats[] = {
        {
                .fourcc         = V4L2_PIX_FMT_NV12MT_16X16,
                .codec_mode     = S5P_MFC_CODEC_NONE,
@@ -177,7 +177,7 @@ static struct s5p_mfc_fmt formats[] = {
 #define NUM_FORMATS ARRAY_SIZE(formats)
 
 /* Find selected format description */
-static struct s5p_mfc_fmt *find_format(struct v4l2_format *f, unsigned int t)
+static const struct s5p_mfc_fmt *find_format(struct v4l2_format *f, unsigned int t)
 {
        unsigned int i;
 
@@ -406,7 +406,7 @@ static int vidioc_g_fmt(struct file *file, void *priv, struct v4l2_format *f)
 static int vidioc_try_fmt(struct file *file, void *priv, struct v4l2_format *f)
 {
        struct s5p_mfc_dev *dev = video_drvdata(file);
-       struct s5p_mfc_fmt *fmt;
+       const struct s5p_mfc_fmt *fmt;
 
        mfc_debug(2, "Type is %d\n", f->type);
        if (f->type == V4L2_BUF_TYPE_VIDEO_OUTPUT_MPLANE) {
@@ -445,7 +445,7 @@ static int vidioc_s_fmt(struct file *file, void *priv, struct v4l2_format *f)
        struct s5p_mfc_ctx *ctx = fh_to_ctx(priv);
        int ret = 0;
        struct v4l2_pix_format_mplane *pix_mp;
-       struct s5p_mfc_buf_size *buf_size = dev->variant->buf_size;
+       const struct s5p_mfc_buf_size *buf_size = dev->variant->buf_size;
 
        mfc_debug_enter();
        ret = vidioc_try_fmt(file, priv, f);
@@ -496,7 +496,7 @@ static int reqbufs_output(struct s5p_mfc_dev *dev, struct s5p_mfc_ctx *ctx,
 {
        int ret = 0;
 
-       s5p_mfc_clock_on();
+       s5p_mfc_clock_on(dev);
 
        if (reqbufs->count == 0) {
                mfc_debug(2, "Freeing buffers\n");
@@ -533,7 +533,7 @@ static int reqbufs_output(struct s5p_mfc_dev *dev, struct s5p_mfc_ctx *ctx,
                ret = -EINVAL;
        }
 out:
-       s5p_mfc_clock_off();
+       s5p_mfc_clock_off(dev);
        if (ret)
                mfc_err("Failed allocating buffers for OUTPUT queue\n");
        return ret;
@@ -544,7 +544,7 @@ static int reqbufs_capture(struct s5p_mfc_dev *dev, struct s5p_mfc_ctx *ctx,
 {
        int ret = 0;
 
-       s5p_mfc_clock_on();
+       s5p_mfc_clock_on(dev);
 
        if (reqbufs->count == 0) {
                mfc_debug(2, "Freeing buffers\n");
@@ -587,7 +587,7 @@ static int reqbufs_capture(struct s5p_mfc_dev *dev, struct s5p_mfc_ctx *ctx,
                ret = -EINVAL;
        }
 out:
-       s5p_mfc_clock_off();
+       s5p_mfc_clock_off(dev);
        if (ret)
                mfc_err("Failed allocating buffers for CAPTURE queue\n");
        return ret;
@@ -1159,7 +1159,7 @@ static void s5p_mfc_buf_queue(struct vb2_buffer *vb)
        s5p_mfc_hw_call(dev->mfc_ops, try_run, dev);
 }
 
-static struct vb2_ops s5p_mfc_dec_qops = {
+static const struct vb2_ops s5p_mfc_dec_qops = {
        .queue_setup            = s5p_mfc_queue_setup,
        .wait_prepare           = vb2_ops_wait_prepare,
        .wait_finish            = vb2_ops_wait_finish,
@@ -1174,7 +1174,7 @@ const struct s5p_mfc_codec_ops *get_dec_codec_ops(void)
        return &decoder_codec_ops;
 }
 
-struct vb2_ops *get_dec_queue_ops(void)
+const struct vb2_ops *get_dec_queue_ops(void)
 {
        return &s5p_mfc_dec_qops;
 }
index 0c52ab46cff730a6ab451e25bfc8cfda2e444c5d..47a6eb9a8fc0b2dbd42d8dd8f2bcc1e977423cd7 100644 (file)
@@ -10,9 +10,8 @@
 #define S5P_MFC_DEC_H_
 
 const struct s5p_mfc_codec_ops *get_dec_codec_ops(void);
-struct vb2_ops *get_dec_queue_ops(void);
+const struct vb2_ops *get_dec_queue_ops(void);
 const struct v4l2_ioctl_ops *get_dec_v4l2_ioctl_ops(void);
-struct s5p_mfc_fmt *get_dec_def_fmt(bool src);
 int s5p_mfc_dec_ctrls_setup(struct s5p_mfc_ctx *ctx);
 void s5p_mfc_dec_ctrls_delete(struct s5p_mfc_ctx *ctx);
 void s5p_mfc_dec_init(struct s5p_mfc_ctx *ctx);
index ef8bb40b9712e4cd39eb489a4fc83472d53a2a26..81cbb36fb382c0a0644730e7eb87b61884a8ed03 100644 (file)
@@ -30,7 +30,7 @@
 #define DEF_SRC_FMT_ENC        V4L2_PIX_FMT_NV12M
 #define DEF_DST_FMT_ENC        V4L2_PIX_FMT_H264
 
-static struct s5p_mfc_fmt formats[] = {
+static const struct s5p_mfc_fmt formats[] = {
        {
                .fourcc         = V4L2_PIX_FMT_NV12MT_16X16,
                .codec_mode     = S5P_MFC_CODEC_NONE,
@@ -111,7 +111,7 @@ static struct s5p_mfc_fmt formats[] = {
 };
 
 #define NUM_FORMATS ARRAY_SIZE(formats)
-static struct s5p_mfc_fmt *find_format(struct v4l2_format *f, unsigned int t)
+static const struct s5p_mfc_fmt *find_format(struct v4l2_format *f, unsigned int t)
 {
        unsigned int i;
 
@@ -1431,7 +1431,7 @@ static int vidioc_g_fmt(struct file *file, void *priv, struct v4l2_format *f)
 static int vidioc_try_fmt(struct file *file, void *priv, struct v4l2_format *f)
 {
        struct s5p_mfc_dev *dev = video_drvdata(file);
-       struct s5p_mfc_fmt *fmt;
+       const struct s5p_mfc_fmt *fmt;
        struct v4l2_pix_format_mplane *pix_fmt_mp = &f->fmt.pix_mp;
 
        if (f->type == V4L2_BUF_TYPE_VIDEO_CAPTURE_MPLANE) {
@@ -2392,7 +2392,7 @@ static const struct v4l2_ioctl_ops s5p_mfc_enc_ioctl_ops = {
        .vidioc_unsubscribe_event = v4l2_event_unsubscribe,
 };
 
-static int check_vb_with_fmt(struct s5p_mfc_fmt *fmt, struct vb2_buffer *vb)
+static int check_vb_with_fmt(const struct s5p_mfc_fmt *fmt, struct vb2_buffer *vb)
 {
        int i;
 
@@ -2650,7 +2650,7 @@ static void s5p_mfc_buf_queue(struct vb2_buffer *vb)
        s5p_mfc_hw_call(dev->mfc_ops, try_run, dev);
 }
 
-static struct vb2_ops s5p_mfc_enc_qops = {
+static const struct vb2_ops s5p_mfc_enc_qops = {
        .queue_setup            = s5p_mfc_queue_setup,
        .wait_prepare           = vb2_ops_wait_prepare,
        .wait_finish            = vb2_ops_wait_finish,
@@ -2666,7 +2666,7 @@ const struct s5p_mfc_codec_ops *get_enc_codec_ops(void)
        return &encoder_codec_ops;
 }
 
-struct vb2_ops *get_enc_queue_ops(void)
+const struct vb2_ops *get_enc_queue_ops(void)
 {
        return &s5p_mfc_enc_qops;
 }
index 3f1b1a037a4f554aabf840f209708ccd77785b3d..62d6db67fd91474fff4b30b1cfb95d58de2a37d2 100644 (file)
@@ -10,9 +10,8 @@
 #define S5P_MFC_ENC_H_
 
 const struct s5p_mfc_codec_ops *get_enc_codec_ops(void);
-struct vb2_ops *get_enc_queue_ops(void);
+const struct vb2_ops *get_enc_queue_ops(void);
 const struct v4l2_ioctl_ops *get_enc_v4l2_ioctl_ops(void);
-struct s5p_mfc_fmt *get_enc_def_fmt(bool src);
 int s5p_mfc_enc_ctrls_setup(struct s5p_mfc_ctx *ctx);
 void s5p_mfc_enc_ctrls_delete(struct s5p_mfc_ctx *ctx);
 void s5p_mfc_enc_init(struct s5p_mfc_ctx *ctx);
index 673962301173cb6d0625c0bc197a917d10fe55f5..5ba791fa36763ffca21ef74a5cba1590c5dc0d00 100644 (file)
 #include "s5p_mfc_opr_v5.h"
 #include "s5p_mfc_opr_v6.h"
 
-static struct s5p_mfc_hw_ops *s5p_mfc_ops;
-
 void s5p_mfc_init_hw_ops(struct s5p_mfc_dev *dev)
 {
        if (IS_MFCV6_PLUS(dev)) {
-               s5p_mfc_ops = s5p_mfc_init_hw_ops_v6();
+               dev->mfc_ops = s5p_mfc_init_hw_ops_v6();
                dev->warn_start = S5P_FIMV_ERR_WARNINGS_START_V6;
        } else {
-               s5p_mfc_ops = s5p_mfc_init_hw_ops_v5();
+               dev->mfc_ops = s5p_mfc_init_hw_ops_v5();
                dev->warn_start = S5P_FIMV_ERR_WARNINGS_START;
        }
-       dev->mfc_ops = s5p_mfc_ops;
 }
 
 void s5p_mfc_init_regs(struct s5p_mfc_dev *dev)
index fcfaf125a5a1df814263c26a7cce65775932380f..365f552e604bd20d3f739cdf4dc015c677fa50cf 100644 (file)
@@ -34,7 +34,7 @@
 static int s5p_mfc_alloc_dec_temp_buffers_v5(struct s5p_mfc_ctx *ctx)
 {
        struct s5p_mfc_dev *dev = ctx->dev;
-       struct s5p_mfc_buf_size_v5 *buf_size = dev->variant->buf_size->priv;
+       const struct s5p_mfc_buf_size_v5 *buf_size = dev->variant->buf_size->priv;
        int ret;
 
        ctx->dsc.size = buf_size->dsc;
@@ -200,7 +200,7 @@ static void s5p_mfc_release_codec_buffers_v5(struct s5p_mfc_ctx *ctx)
 static int s5p_mfc_alloc_instance_buffer_v5(struct s5p_mfc_ctx *ctx)
 {
        struct s5p_mfc_dev *dev = ctx->dev;
-       struct s5p_mfc_buf_size_v5 *buf_size = dev->variant->buf_size->priv;
+       const struct s5p_mfc_buf_size_v5 *buf_size = dev->variant->buf_size->priv;
        int ret;
 
        if (ctx->codec_mode == S5P_MFC_CODEC_H264_DEC ||
@@ -345,7 +345,7 @@ static void s5p_mfc_enc_calc_src_size_v5(struct s5p_mfc_ctx *ctx)
 static void s5p_mfc_set_dec_desc_buffer(struct s5p_mfc_ctx *ctx)
 {
        struct s5p_mfc_dev *dev = ctx->dev;
-       struct s5p_mfc_buf_size_v5 *buf_size = dev->variant->buf_size->priv;
+       const struct s5p_mfc_buf_size_v5 *buf_size = dev->variant->buf_size->priv;
 
        mfc_write(dev, OFFSETA(ctx->dsc.dma), S5P_FIMV_SI_CH0_DESC_ADR);
        mfc_write(dev, buf_size->dsc, S5P_FIMV_SI_CH0_DESC_SIZE);
@@ -676,7 +676,7 @@ static int s5p_mfc_set_enc_ref_buffer_v5(struct s5p_mfc_ctx *ctx)
 static int s5p_mfc_set_enc_params(struct s5p_mfc_ctx *ctx)
 {
        struct s5p_mfc_dev *dev = ctx->dev;
-       struct s5p_mfc_enc_params *p = &ctx->enc_params;
+       const struct s5p_mfc_enc_params *p = &ctx->enc_params;
        unsigned int reg;
        unsigned int shm;
 
@@ -759,8 +759,8 @@ static int s5p_mfc_set_enc_params(struct s5p_mfc_ctx *ctx)
 static int s5p_mfc_set_enc_params_h264(struct s5p_mfc_ctx *ctx)
 {
        struct s5p_mfc_dev *dev = ctx->dev;
-       struct s5p_mfc_enc_params *p = &ctx->enc_params;
-       struct s5p_mfc_h264_enc_params *p_264 = &p->codec.h264;
+       const struct s5p_mfc_enc_params *p = &ctx->enc_params;
+       const struct s5p_mfc_h264_enc_params *p_264 = &p->codec.h264;
        unsigned int reg;
        unsigned int shm;
 
@@ -916,8 +916,8 @@ static int s5p_mfc_set_enc_params_h264(struct s5p_mfc_ctx *ctx)
 static int s5p_mfc_set_enc_params_mpeg4(struct s5p_mfc_ctx *ctx)
 {
        struct s5p_mfc_dev *dev = ctx->dev;
-       struct s5p_mfc_enc_params *p = &ctx->enc_params;
-       struct s5p_mfc_mpeg4_enc_params *p_mpeg4 = &p->codec.mpeg4;
+       const struct s5p_mfc_enc_params *p = &ctx->enc_params;
+       const struct s5p_mfc_mpeg4_enc_params *p_mpeg4 = &p->codec.mpeg4;
        unsigned int reg;
        unsigned int shm;
        unsigned int framerate;
@@ -995,8 +995,8 @@ static int s5p_mfc_set_enc_params_mpeg4(struct s5p_mfc_ctx *ctx)
 static int s5p_mfc_set_enc_params_h263(struct s5p_mfc_ctx *ctx)
 {
        struct s5p_mfc_dev *dev = ctx->dev;
-       struct s5p_mfc_enc_params *p = &ctx->enc_params;
-       struct s5p_mfc_mpeg4_enc_params *p_h263 = &p->codec.mpeg4;
+       const struct s5p_mfc_enc_params *p = &ctx->enc_params;
+       const struct s5p_mfc_mpeg4_enc_params *p_h263 = &p->codec.mpeg4;
        unsigned int reg;
        unsigned int shm;
 
@@ -1348,7 +1348,7 @@ static void s5p_mfc_try_run_v5(struct s5p_mfc_dev *dev)
         * Last frame has already been sent to MFC.
         * Now obtaining frames from MFC buffer
         */
-       s5p_mfc_clock_on();
+       s5p_mfc_clock_on(dev);
        s5p_mfc_clean_ctx_int_flags(ctx);
 
        if (ctx->type == MFCINST_DECODER) {
@@ -1424,7 +1424,7 @@ static void s5p_mfc_try_run_v5(struct s5p_mfc_dev *dev)
                 * scheduled, reduce the clock count as no one will
                 * ever do this, because no interrupt related to this try_run
                 * will ever come from hardware. */
-               s5p_mfc_clock_off();
+               s5p_mfc_clock_off(dev);
        }
 }
 
@@ -1593,7 +1593,7 @@ static unsigned int s5p_mfc_get_crop_info_v_v5(struct s5p_mfc_ctx *ctx)
 }
 
 /* Initialize opr function pointers for MFC v5 */
-static struct s5p_mfc_hw_ops s5p_mfc_ops_v5 = {
+static const struct s5p_mfc_hw_ops s5p_mfc_ops_v5 = {
        .alloc_dec_temp_buffers = s5p_mfc_alloc_dec_temp_buffers_v5,
        .release_dec_desc_buffer = s5p_mfc_release_dec_desc_buffer_v5,
        .alloc_codec_buffers = s5p_mfc_alloc_codec_buffers_v5,
@@ -1633,7 +1633,7 @@ static struct s5p_mfc_hw_ops s5p_mfc_ops_v5 = {
        .get_crop_info_v = s5p_mfc_get_crop_info_v_v5,
 };
 
-struct s5p_mfc_hw_ops *s5p_mfc_init_hw_ops_v5(void)
+const struct s5p_mfc_hw_ops *s5p_mfc_init_hw_ops_v5(void)
 {
        return &s5p_mfc_ops_v5;
 }
index b53d376ead603b30f5c08d5cf1496769d6b40f74..0b98c619676e44253ca4bb1cf8fc966a0287fb9e 100644 (file)
@@ -78,5 +78,5 @@ enum MFC_SHM_OFS {
        FRAME_PACK_SEI_INFO     = 0x17c, /* E */
 };
 
-struct s5p_mfc_hw_ops *s5p_mfc_init_hw_ops_v5(void);
+const struct s5p_mfc_hw_ops *s5p_mfc_init_hw_ops_v5(void);
 #endif /* S5P_MFC_OPR_H_ */
index fd945211d28e7793176a06a6c400e476aad2b30f..73f7af674c01bdedaef7c5037cd1896cce9fd04c 100644 (file)
@@ -383,7 +383,7 @@ static void s5p_mfc_release_codec_buffers_v6(struct s5p_mfc_ctx *ctx)
 static int s5p_mfc_alloc_instance_buffer_v6(struct s5p_mfc_ctx *ctx)
 {
        struct s5p_mfc_dev *dev = ctx->dev;
-       struct s5p_mfc_buf_size_v6 *buf_size = dev->variant->buf_size->priv;
+       const struct s5p_mfc_buf_size_v6 *buf_size = dev->variant->buf_size->priv;
        int ret;
 
        mfc_debug_enter();
@@ -443,7 +443,7 @@ static void s5p_mfc_release_instance_buffer_v6(struct s5p_mfc_ctx *ctx)
 /* Allocate context buffers for SYS_INIT */
 static int s5p_mfc_alloc_dev_context_buffer_v6(struct s5p_mfc_dev *dev)
 {
-       struct s5p_mfc_buf_size_v6 *buf_size = dev->variant->buf_size->priv;
+       const struct s5p_mfc_buf_size_v6 *buf_size = dev->variant->buf_size->priv;
        int ret;
 
        mfc_debug_enter();
@@ -587,7 +587,7 @@ static int s5p_mfc_set_dec_stream_buffer_v6(struct s5p_mfc_ctx *ctx,
 {
        struct s5p_mfc_dev *dev = ctx->dev;
        const struct s5p_mfc_regs *mfc_regs = dev->mfc_regs;
-       struct s5p_mfc_buf_size *buf_size = dev->variant->buf_size;
+       const struct s5p_mfc_buf_size *buf_size = dev->variant->buf_size;
 
        mfc_debug_enter();
        mfc_debug(2, "inst_no: %d, buf_addr: 0x%08x,\n"
@@ -863,7 +863,7 @@ static int s5p_mfc_set_enc_params(struct s5p_mfc_ctx *ctx)
 {
        struct s5p_mfc_dev *dev = ctx->dev;
        const struct s5p_mfc_regs *mfc_regs = dev->mfc_regs;
-       struct s5p_mfc_enc_params *p = &ctx->enc_params;
+       const struct s5p_mfc_enc_params *p = &ctx->enc_params;
        unsigned int reg = 0;
 
        mfc_debug_enter();
@@ -1349,8 +1349,8 @@ static int s5p_mfc_set_enc_params_mpeg4(struct s5p_mfc_ctx *ctx)
 {
        struct s5p_mfc_dev *dev = ctx->dev;
        const struct s5p_mfc_regs *mfc_regs = dev->mfc_regs;
-       struct s5p_mfc_enc_params *p = &ctx->enc_params;
-       struct s5p_mfc_mpeg4_enc_params *p_mpeg4 = &p->codec.mpeg4;
+       const struct s5p_mfc_enc_params *p = &ctx->enc_params;
+       const struct s5p_mfc_mpeg4_enc_params *p_mpeg4 = &p->codec.mpeg4;
        unsigned int reg = 0;
 
        mfc_debug_enter();
@@ -1431,8 +1431,8 @@ static int s5p_mfc_set_enc_params_h263(struct s5p_mfc_ctx *ctx)
 {
        struct s5p_mfc_dev *dev = ctx->dev;
        const struct s5p_mfc_regs *mfc_regs = dev->mfc_regs;
-       struct s5p_mfc_enc_params *p = &ctx->enc_params;
-       struct s5p_mfc_mpeg4_enc_params *p_h263 = &p->codec.mpeg4;
+       const struct s5p_mfc_enc_params *p = &ctx->enc_params;
+       const struct s5p_mfc_mpeg4_enc_params *p_h263 = &p->codec.mpeg4;
        unsigned int reg = 0;
 
        mfc_debug_enter();
@@ -1501,8 +1501,8 @@ static int s5p_mfc_set_enc_params_vp8(struct s5p_mfc_ctx *ctx)
 {
        struct s5p_mfc_dev *dev = ctx->dev;
        const struct s5p_mfc_regs *mfc_regs = dev->mfc_regs;
-       struct s5p_mfc_enc_params *p = &ctx->enc_params;
-       struct s5p_mfc_vp8_enc_params *p_vp8 = &p->codec.vp8;
+       const struct s5p_mfc_enc_params *p = &ctx->enc_params;
+       const struct s5p_mfc_vp8_enc_params *p_vp8 = &p->codec.vp8;
        unsigned int reg = 0;
        unsigned int val = 0;
 
@@ -1897,8 +1897,8 @@ static int s5p_mfc_h264_set_aso_slice_order_v6(struct s5p_mfc_ctx *ctx)
 {
        struct s5p_mfc_dev *dev = ctx->dev;
        const struct s5p_mfc_regs *mfc_regs = dev->mfc_regs;
-       struct s5p_mfc_enc_params *p = &ctx->enc_params;
-       struct s5p_mfc_h264_enc_params *p_h264 = &p->codec.h264;
+       const struct s5p_mfc_enc_params *p = &ctx->enc_params;
+       const struct s5p_mfc_h264_enc_params *p_h264 = &p->codec.h264;
        int i;
 
        if (p_h264->aso) {
@@ -2165,7 +2165,7 @@ static void s5p_mfc_try_run_v6(struct s5p_mfc_dev *dev)
        /* Last frame has already been sent to MFC
         * Now obtaining frames from MFC buffer */
 
-       s5p_mfc_clock_on();
+       s5p_mfc_clock_on(dev);
        s5p_mfc_clean_ctx_int_flags(ctx);
 
        if (ctx->type == MFCINST_DECODER) {
@@ -2245,7 +2245,7 @@ static void s5p_mfc_try_run_v6(struct s5p_mfc_dev *dev)
                 * scheduled, reduce the clock count as no one will
                 * ever do this, because no interrupt related to this try_run
                 * will ever come from hardware. */
-               s5p_mfc_clock_off();
+               s5p_mfc_clock_off(dev);
        }
 }
 
@@ -2261,9 +2261,9 @@ s5p_mfc_read_info_v6(struct s5p_mfc_ctx *ctx, unsigned long ofs)
 {
        int ret;
 
-       s5p_mfc_clock_on();
+       s5p_mfc_clock_on(ctx->dev);
        ret = readl((void __iomem *)ofs);
-       s5p_mfc_clock_off();
+       s5p_mfc_clock_off(ctx->dev);
 
        return ret;
 }
@@ -2657,7 +2657,7 @@ done:
 }
 
 /* Initialize opr function pointers for MFC v6 */
-static struct s5p_mfc_hw_ops s5p_mfc_ops_v6 = {
+static const struct s5p_mfc_hw_ops s5p_mfc_ops_v6 = {
        .alloc_dec_temp_buffers = s5p_mfc_alloc_dec_temp_buffers_v6,
        .release_dec_desc_buffer = s5p_mfc_release_dec_desc_buffer_v6,
        .alloc_codec_buffers = s5p_mfc_alloc_codec_buffers_v6,
@@ -2701,7 +2701,7 @@ static struct s5p_mfc_hw_ops s5p_mfc_ops_v6 = {
        .get_e_min_scratch_buf_size = s5p_mfc_get_e_min_scratch_buf_size,
 };
 
-struct s5p_mfc_hw_ops *s5p_mfc_init_hw_ops_v6(void)
+const struct s5p_mfc_hw_ops *s5p_mfc_init_hw_ops_v6(void)
 {
        return &s5p_mfc_ops_v6;
 }
index 94ecb0e6e7c73b096a8fc4df859110cd73a0cf51..7fc1307675d8d43a066157c4a55f23e612955a92 100644 (file)
@@ -51,6 +51,6 @@
 
 #define FRAME_DELTA_DEFAULT            1
 
-struct s5p_mfc_hw_ops *s5p_mfc_init_hw_ops_v6(void);
+const struct s5p_mfc_hw_ops *s5p_mfc_init_hw_ops_v6(void);
 const struct s5p_mfc_regs *s5p_mfc_init_regs_v6_plus(struct s5p_mfc_dev *dev);
 #endif /* S5P_MFC_OPR_V6_H_ */
index 187849841a28b28f0a204ebb38e2ebcbe22228aa..ae42414083831b287d1ed6b15fa9537fa9876d1a 100644 (file)
 #include "s5p_mfc_debug.h"
 #include "s5p_mfc_pm.h"
 
-static struct s5p_mfc_pm *pm;
-static struct s5p_mfc_dev *p_dev;
-static atomic_t clk_ref;
-
 int s5p_mfc_init_pm(struct s5p_mfc_dev *dev)
 {
+       struct s5p_mfc_pm *pm = &dev->pm;
        int i;
 
-       pm = &dev->pm;
-       p_dev = dev;
-
        pm->num_clocks = dev->variant->num_clocks;
        pm->clk_names = dev->variant->clk_names;
        pm->device = &dev->plat_dev->dev;
@@ -49,70 +43,63 @@ int s5p_mfc_init_pm(struct s5p_mfc_dev *dev)
                pm->clock_gate = pm->clocks[0];
 
        pm_runtime_enable(pm->device);
-       atomic_set(&clk_ref, 0);
        return 0;
 }
 
 void s5p_mfc_final_pm(struct s5p_mfc_dev *dev)
 {
-       pm_runtime_disable(pm->device);
+       pm_runtime_disable(dev->pm.device);
 }
 
-int s5p_mfc_clock_on(void)
+int s5p_mfc_clock_on(struct s5p_mfc_dev *dev)
 {
-       atomic_inc(&clk_ref);
-       mfc_debug(3, "+ %d\n", atomic_read(&clk_ref));
-
-       return clk_enable(pm->clock_gate);
+       return clk_enable(dev->pm.clock_gate);
 }
 
-void s5p_mfc_clock_off(void)
+void s5p_mfc_clock_off(struct s5p_mfc_dev *dev)
 {
-       atomic_dec(&clk_ref);
-       mfc_debug(3, "- %d\n", atomic_read(&clk_ref));
-
-       clk_disable(pm->clock_gate);
+       clk_disable(dev->pm.clock_gate);
 }
 
-int s5p_mfc_power_on(void)
+int s5p_mfc_power_on(struct s5p_mfc_dev *dev)
 {
        int i, ret = 0;
 
-       ret = pm_runtime_resume_and_get(pm->device);
+       ret = pm_runtime_resume_and_get(dev->pm.device);
        if (ret < 0)
                return ret;
 
        /* clock control */
-       for (i = 0; i < pm->num_clocks; i++) {
-               ret = clk_prepare_enable(pm->clocks[i]);
+       for (i = 0; i < dev->pm.num_clocks; i++) {
+               ret = clk_prepare_enable(dev->pm.clocks[i]);
                if (ret < 0) {
                        mfc_err("clock prepare failed for clock: %s\n",
-                               pm->clk_names[i]);
+                               dev->pm.clk_names[i]);
                        goto err;
                }
        }
 
        /* prepare for software clock gating */
-       clk_disable(pm->clock_gate);
+       clk_disable(dev->pm.clock_gate);
 
        return 0;
 err:
        while (--i >= 0)
-               clk_disable_unprepare(pm->clocks[i]);
-       pm_runtime_put(pm->device);
+               clk_disable_unprepare(dev->pm.clocks[i]);
+       pm_runtime_put(dev->pm.device);
        return ret;
 }
 
-int s5p_mfc_power_off(void)
+int s5p_mfc_power_off(struct s5p_mfc_dev *dev)
 {
        int i;
 
        /* finish software clock gating */
-       clk_enable(pm->clock_gate);
+       clk_enable(dev->pm.clock_gate);
 
-       for (i = 0; i < pm->num_clocks; i++)
-               clk_disable_unprepare(pm->clocks[i]);
+       for (i = 0; i < dev->pm.num_clocks; i++)
+               clk_disable_unprepare(dev->pm.clocks[i]);
 
-       return pm_runtime_put_sync(pm->device);
+       return pm_runtime_put_sync(dev->pm.device);
 }
 
index 4159d2364e8729a61db56804f9fef2f690717afb..9c71036f038534d7c36ec9ecf659412b213ccf8b 100644 (file)
@@ -12,9 +12,9 @@
 int s5p_mfc_init_pm(struct s5p_mfc_dev *dev);
 void s5p_mfc_final_pm(struct s5p_mfc_dev *dev);
 
-int s5p_mfc_clock_on(void);
-void s5p_mfc_clock_off(void);
-int s5p_mfc_power_on(void);
-int s5p_mfc_power_off(void);
+int s5p_mfc_clock_on(struct s5p_mfc_dev *dev);
+void s5p_mfc_clock_off(struct s5p_mfc_dev *dev);
+int s5p_mfc_power_on(struct s5p_mfc_dev *dev);
+int s5p_mfc_power_off(struct s5p_mfc_dev *dev);
 
 #endif /* S5P_MFC_PM_H_ */
index c4610e3055461a4400191dcd38827a7734d2ad88..ff3331af9406890147f02d497c06f093da6a4ac3 100644 (file)
@@ -1855,7 +1855,7 @@ static int dcmi_graph_init(struct stm32_dcmi *dcmi)
        struct device_node *ep;
        int ret;
 
-       ep = of_graph_get_next_endpoint(dcmi->dev->of_node, NULL);
+       ep = of_graph_get_endpoint_by_regs(dcmi->dev->of_node, 0, -1);
        if (!ep) {
                dev_err(dcmi->dev, "Failed to get next endpoint\n");
                return -EINVAL;
@@ -1907,7 +1907,7 @@ static int dcmi_probe(struct platform_device *pdev)
                                     "Could not get reset control\n");
 
        /* Get bus characteristics from devicetree */
-       np = of_graph_get_next_endpoint(np, NULL);
+       np = of_graph_get_endpoint_by_regs(np, 0, -1);
        if (!np) {
                dev_err(&pdev->dev, "Could not find the endpoint\n");
                return -ENODEV;
index 32c6619be9a26426b3a1f4760be81cde73c8bafd..bce821eb71cec59041827616106a93abea92f94b 100644 (file)
@@ -517,7 +517,7 @@ static int dcmipp_probe(struct platform_device *pdev)
        return 0;
 }
 
-static int dcmipp_remove(struct platform_device *pdev)
+static void dcmipp_remove(struct platform_device *pdev)
 {
        struct dcmipp_device *dcmipp = platform_get_drvdata(pdev);
        unsigned int i;
@@ -534,8 +534,6 @@ static int dcmipp_remove(struct platform_device *pdev)
        media_device_cleanup(&dcmipp->mdev);
 
        v4l2_device_unregister(&dcmipp->v4l2_dev);
-
-       return 0;
 }
 
 static int dcmipp_runtime_suspend(struct device *dev)
@@ -588,7 +586,7 @@ static const struct dev_pm_ops dcmipp_pm_ops = {
 
 static struct platform_driver dcmipp_pdrv = {
        .probe          = dcmipp_probe,
-       .remove         = dcmipp_remove,
+       .remove_new     = dcmipp_remove,
        .driver         = {
                .name   = DCMIPP_PDEV_NAME,
                .of_match_table = dcmipp_of_match,
index 954fabec27f63f169b197564a03eddea9ea5040d..a1c35a2b68ed9017ba4de6a089fbcf62ee623f9c 100644 (file)
@@ -66,6 +66,7 @@ static void deinterlace_device_run(void *priv)
        struct vb2_v4l2_buffer *src, *dst;
        unsigned int hstep, vstep;
        dma_addr_t addr;
+       int i;
 
        src = v4l2_m2m_next_src_buf(ctx->fh.m2m_ctx);
        dst = v4l2_m2m_next_dst_buf(ctx->fh.m2m_ctx);
@@ -160,6 +161,26 @@ static void deinterlace_device_run(void *priv)
        deinterlace_write(dev, DEINTERLACE_CH1_HORZ_FACT, hstep);
        deinterlace_write(dev, DEINTERLACE_CH1_VERT_FACT, vstep);
 
+       /* neutral filter coefficients */
+       deinterlace_set_bits(dev, DEINTERLACE_FRM_CTRL,
+                            DEINTERLACE_FRM_CTRL_COEF_ACCESS);
+       readl_poll_timeout(dev->base + DEINTERLACE_STATUS, val,
+                          val & DEINTERLACE_STATUS_COEF_STATUS, 2, 40);
+
+       for (i = 0; i < 32; i++) {
+               deinterlace_write(dev, DEINTERLACE_CH0_HORZ_COEF0 + i * 4,
+                                 DEINTERLACE_IDENTITY_COEF);
+               deinterlace_write(dev, DEINTERLACE_CH0_VERT_COEF + i * 4,
+                                 DEINTERLACE_IDENTITY_COEF);
+               deinterlace_write(dev, DEINTERLACE_CH1_HORZ_COEF0 + i * 4,
+                                 DEINTERLACE_IDENTITY_COEF);
+               deinterlace_write(dev, DEINTERLACE_CH1_VERT_COEF + i * 4,
+                                 DEINTERLACE_IDENTITY_COEF);
+       }
+
+       deinterlace_clr_set_bits(dev, DEINTERLACE_FRM_CTRL,
+                                DEINTERLACE_FRM_CTRL_COEF_ACCESS, 0);
+
        deinterlace_clr_set_bits(dev, DEINTERLACE_FIELD_CTRL,
                                 DEINTERLACE_FIELD_CTRL_FIELD_CNT_MSK,
                                 DEINTERLACE_FIELD_CTRL_FIELD_CNT(ctx->field));
@@ -248,7 +269,6 @@ static irqreturn_t deinterlace_irq(int irq, void *data)
 static void deinterlace_init(struct deinterlace_dev *dev)
 {
        u32 val;
-       int i;
 
        deinterlace_write(dev, DEINTERLACE_BYPASS,
                          DEINTERLACE_BYPASS_CSC);
@@ -284,27 +304,7 @@ static void deinterlace_init(struct deinterlace_dev *dev)
 
        deinterlace_clr_set_bits(dev, DEINTERLACE_CHROMA_DIFF,
                                 DEINTERLACE_CHROMA_DIFF_TH_MSK,
-                                DEINTERLACE_CHROMA_DIFF_TH(5));
-
-       /* neutral filter coefficients */
-       deinterlace_set_bits(dev, DEINTERLACE_FRM_CTRL,
-                            DEINTERLACE_FRM_CTRL_COEF_ACCESS);
-       readl_poll_timeout(dev->base + DEINTERLACE_STATUS, val,
-                          val & DEINTERLACE_STATUS_COEF_STATUS, 2, 40);
-
-       for (i = 0; i < 32; i++) {
-               deinterlace_write(dev, DEINTERLACE_CH0_HORZ_COEF0 + i * 4,
-                                 DEINTERLACE_IDENTITY_COEF);
-               deinterlace_write(dev, DEINTERLACE_CH0_VERT_COEF + i * 4,
-                                 DEINTERLACE_IDENTITY_COEF);
-               deinterlace_write(dev, DEINTERLACE_CH1_HORZ_COEF0 + i * 4,
-                                 DEINTERLACE_IDENTITY_COEF);
-               deinterlace_write(dev, DEINTERLACE_CH1_VERT_COEF + i * 4,
-                                 DEINTERLACE_IDENTITY_COEF);
-       }
-
-       deinterlace_clr_set_bits(dev, DEINTERLACE_FRM_CTRL,
-                                DEINTERLACE_FRM_CTRL_COEF_ACCESS, 0);
+                                DEINTERLACE_CHROMA_DIFF_TH(31));
 }
 
 static inline struct deinterlace_ctx *deinterlace_file2ctx(struct file *file)
@@ -929,11 +929,18 @@ static int deinterlace_runtime_resume(struct device *device)
                return ret;
        }
 
+       ret = reset_control_deassert(dev->rstc);
+       if (ret) {
+               dev_err(dev->dev, "Failed to apply reset\n");
+
+               goto err_exclusive_rate;
+       }
+
        ret = clk_prepare_enable(dev->bus_clk);
        if (ret) {
                dev_err(dev->dev, "Failed to enable bus clock\n");
 
-               goto err_exclusive_rate;
+               goto err_rst;
        }
 
        ret = clk_prepare_enable(dev->mod_clk);
@@ -950,23 +957,16 @@ static int deinterlace_runtime_resume(struct device *device)
                goto err_mod_clk;
        }
 
-       ret = reset_control_deassert(dev->rstc);
-       if (ret) {
-               dev_err(dev->dev, "Failed to apply reset\n");
-
-               goto err_ram_clk;
-       }
-
        deinterlace_init(dev);
 
        return 0;
 
-err_ram_clk:
-       clk_disable_unprepare(dev->ram_clk);
 err_mod_clk:
        clk_disable_unprepare(dev->mod_clk);
 err_bus_clk:
        clk_disable_unprepare(dev->bus_clk);
+err_rst:
+       reset_control_assert(dev->rstc);
 err_exclusive_rate:
        clk_rate_exclusive_put(dev->mod_clk);
 
@@ -977,11 +977,12 @@ static int deinterlace_runtime_suspend(struct device *device)
 {
        struct deinterlace_dev *dev = dev_get_drvdata(device);
 
-       reset_control_assert(dev->rstc);
-
        clk_disable_unprepare(dev->ram_clk);
        clk_disable_unprepare(dev->mod_clk);
        clk_disable_unprepare(dev->bus_clk);
+
+       reset_control_assert(dev->rstc);
+
        clk_rate_exclusive_put(dev->mod_clk);
 
        return 0;
index 63cdfed37bc9bcbff5492aa4f0b5a631c451759b..f4e1fa76bf3724fe75443cede62ca96ba6908143 100644 (file)
@@ -465,8 +465,7 @@ static int vpif_probe(struct platform_device *pdev)
         * so their devices need to be registered manually here
         * for their legacy platform_drivers to work.
         */
-       endpoint = of_graph_get_next_endpoint(pdev->dev.of_node,
-                                             endpoint);
+       endpoint = of_graph_get_endpoint_by_regs(pdev->dev.of_node, 0, -1);
        if (!endpoint)
                return 0;
        of_node_put(endpoint);
index 59b30fc4314408aff806bb0ce697378c8d991a40..6da83d0cffaaedb41a0190485bd389a0f2228448 100644 (file)
@@ -158,6 +158,12 @@ static const struct ti_csi2rx_fmt ti_csi2rx_formats[] = {
                .csi_dt                 = MIPI_CSI2_DT_RAW8,
                .bpp                    = 8,
                .size                   = SHIM_DMACNTX_SIZE_8,
+       }, {
+               .fourcc                 = V4L2_PIX_FMT_GREY,
+               .code                   = MEDIA_BUS_FMT_Y8_1X8,
+               .csi_dt                 = MIPI_CSI2_DT_RAW8,
+               .bpp                    = 8,
+               .size                   = SHIM_DMACNTX_SIZE_8,
        }, {
                .fourcc                 = V4L2_PIX_FMT_SBGGR10,
                .code                   = MEDIA_BUS_FMT_SBGGR10_1X10,
@@ -182,6 +188,24 @@ static const struct ti_csi2rx_fmt ti_csi2rx_formats[] = {
                .csi_dt                 = MIPI_CSI2_DT_RAW10,
                .bpp                    = 16,
                .size                   = SHIM_DMACNTX_SIZE_16,
+       }, {
+               .fourcc                 = V4L2_PIX_FMT_RGB565X,
+               .code                   = MEDIA_BUS_FMT_RGB565_1X16,
+               .csi_dt                 = MIPI_CSI2_DT_RGB565,
+               .bpp                    = 16,
+               .size                   = SHIM_DMACNTX_SIZE_16,
+       }, {
+               .fourcc                 = V4L2_PIX_FMT_XBGR32,
+               .code                   = MEDIA_BUS_FMT_RGB888_1X24,
+               .csi_dt                 = MIPI_CSI2_DT_RGB888,
+               .bpp                    = 32,
+               .size                   = SHIM_DMACNTX_SIZE_32,
+       }, {
+               .fourcc                 = V4L2_PIX_FMT_RGBX32,
+               .code                   = MEDIA_BUS_FMT_BGR888_1X24,
+               .csi_dt                 = MIPI_CSI2_DT_RGB888,
+               .bpp                    = 32,
+               .size                   = SHIM_DMACNTX_SIZE_32,
        },
 
        /* More formats can be supported but they are not listed for now. */
@@ -1065,7 +1089,6 @@ static void ti_csi2rx_cleanup_vb2q(struct ti_csi2rx_dev *csi)
 static int ti_csi2rx_probe(struct platform_device *pdev)
 {
        struct ti_csi2rx_dev *csi;
-       struct resource *res;
        int ret;
 
        csi = devm_kzalloc(&pdev->dev, sizeof(*csi), GFP_KERNEL);
@@ -1076,9 +1099,7 @@ static int ti_csi2rx_probe(struct platform_device *pdev)
        platform_set_drvdata(pdev, csi);
 
        mutex_init(&csi->mutex);
-
-       res = platform_get_resource(pdev, IORESOURCE_MEM, 0);
-       csi->shim = devm_ioremap_resource(&pdev->dev, res);
+       csi->shim = devm_platform_ioremap_resource(pdev, 0);
        if (IS_ERR(csi->shim)) {
                ret = PTR_ERR(csi->shim);
                goto err_mutex;
@@ -1121,7 +1142,7 @@ err_mutex:
        return ret;
 }
 
-static int ti_csi2rx_remove(struct platform_device *pdev)
+static void ti_csi2rx_remove(struct platform_device *pdev)
 {
        struct ti_csi2rx_dev *csi = platform_get_drvdata(pdev);
 
@@ -1133,8 +1154,6 @@ static int ti_csi2rx_remove(struct platform_device *pdev)
        ti_csi2rx_cleanup_dma(csi);
 
        mutex_destroy(&csi->mutex);
-
-       return 0;
 }
 
 static const struct of_device_id ti_csi2rx_of_match[] = {
@@ -1145,7 +1164,7 @@ MODULE_DEVICE_TABLE(of, ti_csi2rx_of_match);
 
 static struct platform_driver ti_csi2rx_pdrv = {
        .probe = ti_csi2rx_probe,
-       .remove = ti_csi2rx_remove,
+       .remove_new = ti_csi2rx_remove,
        .driver = {
                .name = TI_CSI2RX_MODULE_NAME,
                .of_match_table = ti_csi2rx_of_match,
index 24b927d8f182e20deab7b5a5b51f5f3fb4f04ba1..9a34d14c6e40436bbd98ec8e7247a582fc28654b 100644 (file)
@@ -4,7 +4,7 @@ comment "Verisilicon media platform drivers"
 
 config VIDEO_HANTRO
        tristate "Hantro VPU driver"
-       depends on ARCH_MXC || ARCH_ROCKCHIP || ARCH_AT91 || ARCH_SUNXI || COMPILE_TEST
+       depends on ARCH_MXC || ARCH_ROCKCHIP || ARCH_AT91 || ARCH_SUNXI || ARCH_STM32 || COMPILE_TEST
        depends on V4L_MEM2MEM_DRIVERS
        depends on VIDEO_DEV
        select MEDIA_CONTROLLER
@@ -15,8 +15,8 @@ config VIDEO_HANTRO
        select V4L2_VP9
        help
          Support for the Hantro IP based Video Processing Units present on
-         Rockchip and NXP i.MX8M SoCs, which accelerate video and image
-         encoding and decoding.
+         Rockchip, NXP i.MX8M and STM32MP25 SoCs, which accelerate video
+         and image encoding and decoding.
          To compile this driver as a module, choose M here: the module
          will be called hantro-vpu.
 
@@ -51,3 +51,11 @@ config VIDEO_HANTRO_SUNXI
        default y
        help
          Enable support for H6 SoC.
+
+config VIDEO_HANTRO_STM32MP25
+       bool "Hantro STM32MP25 support"
+       depends on VIDEO_HANTRO
+       depends on ARCH_STM32 || COMPILE_TEST
+       default y
+       help
+         Enable support for STM32MP25 SoCs.
index 6ad2ef885920be35450d24d209d719de2eb8ac09..eb38a1833b02fa1c438d246b8e67ab26cbff375a 100644 (file)
@@ -39,3 +39,6 @@ hantro-vpu-$(CONFIG_VIDEO_HANTRO_ROCKCHIP) += \
 
 hantro-vpu-$(CONFIG_VIDEO_HANTRO_SUNXI) += \
                sunxi_vpu_hw.o
+
+hantro-vpu-$(CONFIG_VIDEO_HANTRO_STM32MP25) += \
+               stm32mp25_vpu_hw.o
index 6f5eb975d0e3300160180318a74528df5b77d96e..811260dc3c777a2df1a589e35d2074cc2ef125a2 100644 (file)
@@ -237,7 +237,6 @@ struct hantro_dev {
  * @codec_ops:         Set of operations related to codec mode.
  * @postproc:          Post-processing context.
  * @h264_dec:          H.264-decoding context.
- * @jpeg_enc:          JPEG-encoding context.
  * @mpeg2_dec:         MPEG-2-decoding context.
  * @vp8_dec:           VP8-decoding context.
  * @hevc_dec:          HEVC-decoding context.
index db3df6cc4513be898bff642e5c0d5bd255370064..34b123dafd890b0d1c5a9e1e4ee4d613e6c25017 100644 (file)
@@ -735,6 +735,10 @@ static const struct of_device_id of_hantro_match[] = {
 #endif
 #ifdef CONFIG_VIDEO_HANTRO_SUNXI
        { .compatible = "allwinner,sun50i-h6-vpu-g2", .data = &sunxi_vpu_variant, },
+#endif
+#ifdef CONFIG_VIDEO_HANTRO_STM32MP25
+       { .compatible = "st,stm32mp25-vdec", .data = &stm32mp25_vdec_variant, },
+       { .compatible = "st,stm32mp25-venc", .data = &stm32mp25_venc_variant, },
 #endif
        { /* sentinel */ }
 };
index 9de7f05eff2a6fe115a8a4d3d3669016d6df5319..ad5c1a6634f5c816740d3220ade01632a5cd6c0b 100644 (file)
@@ -243,7 +243,7 @@ static void set_buffers(struct hantro_ctx *ctx, struct vb2_v4l2_buffer *src_buf)
                vdpu_write_relaxed(vpu, dst_dma + offset, G1_REG_ADDR_DIR_MV);
        }
 
-       /* Auxiliary buffer prepared in hantro_g1_h264_dec_prepare_table(). */
+       /* Auxiliary buffer prepared in hantro_h264_dec_init(). */
        vdpu_write_relaxed(vpu, ctx->h264_dec.priv.dma, G1_REG_ADDR_QTABLE);
 }
 
index 9aec8a79acdca4b39849872fa679a8644113a640..7737320cc8cc622dc857facaa24a9bb78ce1101e 100644 (file)
@@ -408,6 +408,8 @@ extern const struct hantro_variant rk3568_vpu_variant;
 extern const struct hantro_variant rk3588_vpu981_variant;
 extern const struct hantro_variant sama5d4_vdec_variant;
 extern const struct hantro_variant sunxi_vpu_variant;
+extern const struct hantro_variant stm32mp25_vdec_variant;
+extern const struct hantro_variant stm32mp25_venc_variant;
 
 extern const struct hantro_postproc_ops hantro_g1_postproc_ops;
 extern const struct hantro_postproc_ops hantro_g2_postproc_ops;
index 46c1a83bcc4e009d065b13de04634a7df1d7ccdc..6da87f5184bcbc777788fe5321bca46da36c647f 100644 (file)
@@ -460,7 +460,7 @@ static void set_buffers(struct hantro_ctx *ctx, struct vb2_v4l2_buffer *src_buf)
                vdpu_write_relaxed(vpu, dst_dma + offset, VDPU_REG_DIR_MV_BASE);
        }
 
-       /* Auxiliary buffer prepared in hantro_g1_h264_dec_prepare_table(). */
+       /* Auxiliary buffer prepared in hantro_h264_dec_init(). */
        vdpu_write_relaxed(vpu, ctx->h264_dec.priv.dma, VDPU_REG_QTABLE_BASE);
 }
 
index 182e6c830ff696d06b97431371750b4eedb413ff..850ff0f844248d7b74b5dc1aa84356531ff25d58 100644 (file)
 #define av1_mcomp_filt_type            AV1_DEC_REG(11, 8, 0x7)
 #define av1_multicore_expect_context_update    AV1_DEC_REG(11, 11, 0x1)
 #define av1_multicore_sbx_offset       AV1_DEC_REG(11, 12, 0x7f)
-#define av1_ulticore_tile_col          AV1_DEC_REG(11, 19, 0x7f)
+#define av1_multicore_tile_col         AV1_DEC_REG(11, 19, 0x7f)
 #define av1_transform_mode             AV1_DEC_REG(11, 27, 0x7)
 #define av1_dec_tile_size_mag          AV1_DEC_REG(11, 30, 0x3)
 
diff --git a/drivers/media/platform/verisilicon/stm32mp25_vpu_hw.c b/drivers/media/platform/verisilicon/stm32mp25_vpu_hw.c
new file mode 100644 (file)
index 0000000..8338211
--- /dev/null
@@ -0,0 +1,186 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * STM32MP25 video codec driver
+ *
+ * Copyright (C) STMicroelectronics SA 2024
+ * Authors: Hugues Fruchet <hugues.fruchet@foss.st.com>
+ *          for STMicroelectronics.
+ *
+ */
+
+#include "hantro.h"
+#include "hantro_jpeg.h"
+#include "hantro_h1_regs.h"
+
+/*
+ * Supported formats.
+ */
+
+static const struct hantro_fmt stm32mp25_vdec_fmts[] = {
+       {
+               .fourcc = V4L2_PIX_FMT_NV12,
+               .codec_mode = HANTRO_MODE_NONE,
+               .frmsize = {
+                       .min_width = FMT_MIN_WIDTH,
+                       .max_width = FMT_FHD_WIDTH,
+                       .step_width = MB_DIM,
+                       .min_height = FMT_MIN_HEIGHT,
+                       .max_height = FMT_FHD_HEIGHT,
+                       .step_height = MB_DIM,
+               },
+       },
+       {
+               .fourcc = V4L2_PIX_FMT_VP8_FRAME,
+               .codec_mode = HANTRO_MODE_VP8_DEC,
+               .max_depth = 2,
+               .frmsize = {
+                       .min_width = FMT_MIN_WIDTH,
+                       .max_width = FMT_FHD_WIDTH,
+                       .step_width = MB_DIM,
+                       .min_height = FMT_MIN_HEIGHT,
+                       .max_height = FMT_FHD_HEIGHT,
+                       .step_height = MB_DIM,
+               },
+       },
+       {
+               .fourcc = V4L2_PIX_FMT_H264_SLICE,
+               .codec_mode = HANTRO_MODE_H264_DEC,
+               .max_depth = 2,
+               .frmsize = {
+                       .min_width = FMT_MIN_WIDTH,
+                       .max_width = FMT_FHD_WIDTH,
+                       .step_width = MB_DIM,
+                       .min_height = FMT_MIN_HEIGHT,
+                       .max_height = FMT_FHD_HEIGHT,
+                       .step_height = MB_DIM,
+               },
+       },
+};
+
+static const struct hantro_fmt stm32mp25_venc_fmts[] = {
+       {
+               .fourcc = V4L2_PIX_FMT_YUV420M,
+               .codec_mode = HANTRO_MODE_NONE,
+               .enc_fmt = ROCKCHIP_VPU_ENC_FMT_YUV420P,
+       },
+       {
+               .fourcc = V4L2_PIX_FMT_NV12M,
+               .codec_mode = HANTRO_MODE_NONE,
+               .enc_fmt = ROCKCHIP_VPU_ENC_FMT_YUV420SP,
+       },
+       {
+               .fourcc = V4L2_PIX_FMT_YUYV,
+               .codec_mode = HANTRO_MODE_NONE,
+               .enc_fmt = ROCKCHIP_VPU_ENC_FMT_YUYV422,
+       },
+       {
+               .fourcc = V4L2_PIX_FMT_UYVY,
+               .codec_mode = HANTRO_MODE_NONE,
+               .enc_fmt = ROCKCHIP_VPU_ENC_FMT_UYVY422,
+       },
+       {
+               .fourcc = V4L2_PIX_FMT_JPEG,
+               .codec_mode = HANTRO_MODE_JPEG_ENC,
+               .max_depth = 2,
+               .header_size = JPEG_HEADER_SIZE,
+               .frmsize = {
+                       .min_width = 96,
+                       .max_width = FMT_4K_WIDTH,
+                       .step_width = MB_DIM,
+                       .min_height = 96,
+                       .max_height = FMT_4K_HEIGHT,
+                       .step_height = MB_DIM,
+               },
+       },
+};
+
+static irqreturn_t stm32mp25_venc_irq(int irq, void *dev_id)
+{
+       struct hantro_dev *vpu = dev_id;
+       enum vb2_buffer_state state;
+       u32 status;
+
+       status = vepu_read(vpu, H1_REG_INTERRUPT);
+       state = (status & H1_REG_INTERRUPT_FRAME_RDY) ?
+               VB2_BUF_STATE_DONE : VB2_BUF_STATE_ERROR;
+
+       vepu_write(vpu, H1_REG_INTERRUPT_BIT, H1_REG_INTERRUPT);
+
+       hantro_irq_done(vpu, state);
+
+       return IRQ_HANDLED;
+}
+
+static void stm32mp25_venc_reset(struct hantro_ctx *ctx)
+{
+       struct hantro_dev *vpu = ctx->dev;
+
+       reset_control_reset(vpu->resets);
+}
+
+/*
+ * Supported codec ops.
+ */
+
+static const struct hantro_codec_ops stm32mp25_vdec_codec_ops[] = {
+       [HANTRO_MODE_VP8_DEC] = {
+               .run = hantro_g1_vp8_dec_run,
+               .reset = hantro_g1_reset,
+               .init = hantro_vp8_dec_init,
+               .exit = hantro_vp8_dec_exit,
+       },
+       [HANTRO_MODE_H264_DEC] = {
+               .run = hantro_g1_h264_dec_run,
+               .reset = hantro_g1_reset,
+               .init = hantro_h264_dec_init,
+               .exit = hantro_h264_dec_exit,
+       },
+};
+
+static const struct hantro_codec_ops stm32mp25_venc_codec_ops[] = {
+       [HANTRO_MODE_JPEG_ENC] = {
+               .run = hantro_h1_jpeg_enc_run,
+               .reset = stm32mp25_venc_reset,
+               .done = hantro_h1_jpeg_enc_done,
+       },
+};
+
+/*
+ * Variants.
+ */
+
+static const struct hantro_irq stm32mp25_vdec_irqs[] = {
+       { "vdec", hantro_g1_irq },
+};
+
+static const char * const stm32mp25_vdec_clk_names[] = { "vdec-clk" };
+
+const struct hantro_variant stm32mp25_vdec_variant = {
+       .dec_fmts = stm32mp25_vdec_fmts,
+       .num_dec_fmts = ARRAY_SIZE(stm32mp25_vdec_fmts),
+       .codec = HANTRO_VP8_DECODER | HANTRO_H264_DECODER,
+       .codec_ops = stm32mp25_vdec_codec_ops,
+       .irqs = stm32mp25_vdec_irqs,
+       .num_irqs = ARRAY_SIZE(stm32mp25_vdec_irqs),
+       .clk_names = stm32mp25_vdec_clk_names,
+       .num_clocks = ARRAY_SIZE(stm32mp25_vdec_clk_names),
+};
+
+static const struct hantro_irq stm32mp25_venc_irqs[] = {
+       { "venc", stm32mp25_venc_irq },
+};
+
+static const char * const stm32mp25_venc_clk_names[] = {
+       "venc-clk"
+};
+
+const struct hantro_variant stm32mp25_venc_variant = {
+       .enc_fmts = stm32mp25_venc_fmts,
+       .num_enc_fmts = ARRAY_SIZE(stm32mp25_venc_fmts),
+       .codec = HANTRO_JPEG_ENCODER,
+       .codec_ops = stm32mp25_venc_codec_ops,
+       .irqs = stm32mp25_venc_irqs,
+       .num_irqs = ARRAY_SIZE(stm32mp25_venc_irqs),
+       .clk_names = stm32mp25_venc_clk_names,
+       .num_clocks = ARRAY_SIZE(stm32mp25_venc_clk_names)
+};
index 93ef78bf62e6dd1b09e89efe653bf241aadf401d..601edd9acd5b91d40c7bbb2f2bb78c97ac659064 100644 (file)
@@ -26,10 +26,10 @@ config VIDEO_XILINX_TPG
        depends on VIDEO_XILINX
        select VIDEO_XILINX_VTC
        help
-          Driver for the Xilinx Video Test Pattern Generator
+         Driver for the Xilinx Video Test Pattern Generator
 
 config VIDEO_XILINX_VTC
        tristate "Xilinx Video Timing Controller"
        depends on VIDEO_XILINX
        help
-          Driver for the Xilinx Video Timing Controller
+         Driver for the Xilinx Video Timing Controller
index 1ce682e1b85c32d3020031b7daef995160c8e34f..fd75457d03b202bd150c4d9218c480aa9c204464 100644 (file)
@@ -49,7 +49,7 @@ static const uint8_t zigzag[64] = {
 
 /*
  * noinline_for_stack to work around
- * https://bugs.llvm.org/show_bug.cgi?id=38809
+ * https://llvm.org/pr38809
  */
 static int noinline_for_stack
 rlc(const s16 *in, __be16 *output, int blocktype)
index 8b04e12af286cc7ff4d0dccea1e9cc8e7ff3a35a..613949df897d34ea7c00da3803a79c964066f16e 100644 (file)
 #define LNB_HIGH_FREQ          10600000        /* transition frequency */
 
 static unsigned int drop_tslock_prob_on_low_snr;
-module_param(drop_tslock_prob_on_low_snr, uint, 0);
+module_param(drop_tslock_prob_on_low_snr, uint, 0444);
 MODULE_PARM_DESC(drop_tslock_prob_on_low_snr,
                 "Probability of losing the TS lock if the signal quality is bad");
 
 static unsigned int recover_tslock_prob_on_good_snr;
-module_param(recover_tslock_prob_on_good_snr, uint, 0);
+module_param(recover_tslock_prob_on_good_snr, uint, 0444);
 MODULE_PARM_DESC(recover_tslock_prob_on_good_snr,
                 "Probability recovering the TS lock when the signal improves");
 
 static unsigned int mock_power_up_delay_msec;
-module_param(mock_power_up_delay_msec, uint, 0);
+module_param(mock_power_up_delay_msec, uint, 0444);
 MODULE_PARM_DESC(mock_power_up_delay_msec, "Simulate a power up delay");
 
 static unsigned int mock_tune_delay_msec;
-module_param(mock_tune_delay_msec, uint, 0);
+module_param(mock_tune_delay_msec, uint, 0444);
 MODULE_PARM_DESC(mock_tune_delay_msec, "Simulate a tune delay");
 
 static unsigned int vidtv_valid_dvb_t_freqs[NUM_VALID_TUNER_FREQS] = {
        474000000
 };
 
-module_param_array(vidtv_valid_dvb_t_freqs, uint, NULL, 0);
+module_param_array(vidtv_valid_dvb_t_freqs, uint, NULL, 0444);
 MODULE_PARM_DESC(vidtv_valid_dvb_t_freqs,
                 "Valid DVB-T frequencies to simulate, in Hz");
 
@@ -74,19 +74,19 @@ static unsigned int vidtv_valid_dvb_c_freqs[NUM_VALID_TUNER_FREQS] = {
        474000000
 };
 
-module_param_array(vidtv_valid_dvb_c_freqs, uint, NULL, 0);
+module_param_array(vidtv_valid_dvb_c_freqs, uint, NULL, 0444);
 MODULE_PARM_DESC(vidtv_valid_dvb_c_freqs,
                 "Valid DVB-C frequencies to simulate, in Hz");
 
 static unsigned int vidtv_valid_dvb_s_freqs[NUM_VALID_TUNER_FREQS] = {
        11362000
 };
-module_param_array(vidtv_valid_dvb_s_freqs, uint, NULL, 0);
+module_param_array(vidtv_valid_dvb_s_freqs, uint, NULL, 0444);
 MODULE_PARM_DESC(vidtv_valid_dvb_s_freqs,
                 "Valid DVB-S/S2 frequencies to simulate at Ku-Band, in kHz");
 
 static unsigned int max_frequency_shift_hz;
-module_param(max_frequency_shift_hz, uint, 0);
+module_param(max_frequency_shift_hz, uint, 0444);
 MODULE_PARM_DESC(max_frequency_shift_hz,
                 "Maximum shift in HZ allowed when tuning in a channel");
 
@@ -96,24 +96,24 @@ DVB_DEFINE_MOD_OPT_ADAPTER_NR(adapter_nums);
  * Influences the signal acquisition time. See ISO/IEC 13818-1 : 2000. p. 113.
  */
 static unsigned int si_period_msec = 40;
-module_param(si_period_msec, uint, 0);
+module_param(si_period_msec, uint, 0444);
 MODULE_PARM_DESC(si_period_msec, "How often to send SI packets. Default: 40ms");
 
 static unsigned int pcr_period_msec = 40;
-module_param(pcr_period_msec, uint, 0);
+module_param(pcr_period_msec, uint, 0444);
 MODULE_PARM_DESC(pcr_period_msec,
                 "How often to send PCR packets. Default: 40ms");
 
 static unsigned int mux_rate_kbytes_sec = 4096;
-module_param(mux_rate_kbytes_sec, uint, 0);
+module_param(mux_rate_kbytes_sec, uint, 0444);
 MODULE_PARM_DESC(mux_rate_kbytes_sec, "Mux rate: will pad stream if below");
 
 static unsigned int pcr_pid = 0x200;
-module_param(pcr_pid, uint, 0);
+module_param(pcr_pid, uint, 0444);
 MODULE_PARM_DESC(pcr_pid, "PCR PID for all channels: defaults to 0x200");
 
 static unsigned int mux_buf_sz_pkts;
-module_param(mux_buf_sz_pkts, uint, 0);
+module_param(mux_buf_sz_pkts, uint, 0444);
 MODULE_PARM_DESC(mux_buf_sz_pkts,
                 "Size for the internal mux buffer in multiples of 188 bytes");
 
index 68dac896277b1385a541b76f4ba6288da5f5dbe1..c46464bcaf2e13e9c14d2ad8e0a0c751f75182fc 100644 (file)
@@ -64,30 +64,35 @@ MODULE_PARM_DESC(visl_transtime_ms, " simulated process time in milliseconds.");
  * particular number of frames
  */
 int visl_dprintk_frame_start = -1;
-module_param(visl_dprintk_frame_start, int, 0);
+module_param(visl_dprintk_frame_start, int, 0444);
 MODULE_PARM_DESC(visl_dprintk_frame_start,
                 " a frame number to start tracing with dprintk");
 
 unsigned int visl_dprintk_nframes;
-module_param(visl_dprintk_nframes, uint, 0);
+module_param(visl_dprintk_nframes, uint, 0444);
 MODULE_PARM_DESC(visl_dprintk_nframes,
                 " the number of frames to trace with dprintk");
 
 bool keep_bitstream_buffers;
-module_param(keep_bitstream_buffers, bool, false);
+module_param(keep_bitstream_buffers, bool, 0444);
 MODULE_PARM_DESC(keep_bitstream_buffers,
                 " keep bitstream buffers in debugfs after streaming is stopped");
 
 int bitstream_trace_frame_start = -1;
-module_param(bitstream_trace_frame_start, int, 0);
+module_param(bitstream_trace_frame_start, int, 0444);
 MODULE_PARM_DESC(bitstream_trace_frame_start,
                 " a frame number to start dumping the bitstream through debugfs");
 
 unsigned int bitstream_trace_nframes;
-module_param(bitstream_trace_nframes, uint, 0);
+module_param(bitstream_trace_nframes, uint, 0444);
 MODULE_PARM_DESC(bitstream_trace_nframes,
                 " the number of frames to dump the bitstream through debugfs");
 
+bool tpg_verbose;
+module_param(tpg_verbose, bool, 0644);
+MODULE_PARM_DESC(tpg_verbose,
+                " add more verbose information on the generated output frames");
+
 static const struct visl_ctrl_desc visl_fwht_ctrl_descs[] = {
        {
                .cfg.id = V4L2_CID_STATELESS_FWHT_PARAMS,
index f21260054e0fccef5d7db01ffa84a85d4a39e510..6a9639bd4d61ad6aaf5b636998a506dcbd1f45a8 100644 (file)
@@ -42,6 +42,22 @@ static void *plane_vaddr(struct tpg_data *tpg, struct vb2_buffer *buf,
        return vbuf;
 }
 
+static void visl_print_ts_idx(u8 **buf, __kernel_size_t *buflen, const char *name,
+                             u64 ts, struct vb2_buffer *vb2_buf)
+{
+       u32 len;
+
+       if (tpg_verbose && vb2_buf) {
+               len = scnprintf(*buf, *buflen, "%s: %lld, vb2_idx: %d\n", name,
+                               ts, vb2_buf->index);
+       } else {
+               len = scnprintf(*buf, *buflen, "%s: %lld\n", name, ts);
+       }
+
+       *buf += len;
+       *buflen -= len;
+}
+
 static void visl_get_ref_frames(struct visl_ctx *ctx, u8 *buf,
                                __kernel_size_t buflen, struct visl_run *run)
 {
@@ -63,9 +79,9 @@ static void visl_get_ref_frames(struct visl_ctx *ctx, u8 *buf,
 
                vb2_buf = vb2_find_buffer(cap_q, run->fwht.params->backward_ref_ts);
 
-               scnprintf(buf, buflen, "backwards_ref_ts: %lld, vb2_idx: %d",
-                         run->fwht.params->backward_ref_ts,
-                         vb2_buf ? vb2_buf->index : -1);
+               visl_print_ts_idx(&buf, &buflen, "backwards_ref_ts",
+                                 run->fwht.params->backward_ref_ts, vb2_buf);
+
                break;
        }
 
@@ -76,13 +92,11 @@ static void visl_get_ref_frames(struct visl_ctx *ctx, u8 *buf,
                b_ref = vb2_find_buffer(cap_q, run->mpeg2.pic->backward_ref_ts);
                f_ref = vb2_find_buffer(cap_q, run->mpeg2.pic->forward_ref_ts);
 
-               scnprintf(buf, buflen,
-                         "backward_ref_ts: %llu, vb2_idx: %d\n"
-                         "forward_ref_ts: %llu, vb2_idx: %d\n",
-                         run->mpeg2.pic->backward_ref_ts,
-                         b_ref ? b_ref->index : -1,
-                         run->mpeg2.pic->forward_ref_ts,
-                         f_ref ? f_ref->index : -1);
+               visl_print_ts_idx(&buf, &buflen, "backward_ref_ts",
+                                 run->mpeg2.pic->backward_ref_ts, b_ref);
+               visl_print_ts_idx(&buf, &buflen, "forward_ref_ts",
+                                 run->mpeg2.pic->forward_ref_ts, f_ref);
+
                break;
        }
 
@@ -95,16 +109,13 @@ static void visl_get_ref_frames(struct visl_ctx *ctx, u8 *buf,
                golden = vb2_find_buffer(cap_q, run->vp8.frame->golden_frame_ts);
                alt = vb2_find_buffer(cap_q, run->vp8.frame->alt_frame_ts);
 
-               scnprintf(buf, buflen,
-                         "last_ref_ts: %llu, vb2_idx: %d\n"
-                         "golden_ref_ts: %llu, vb2_idx: %d\n"
-                         "alt_ref_ts: %llu, vb2_idx: %d\n",
-                         run->vp8.frame->last_frame_ts,
-                         last ? last->index : -1,
-                         run->vp8.frame->golden_frame_ts,
-                         golden ? golden->index : -1,
-                         run->vp8.frame->alt_frame_ts,
-                         alt ? alt->index : -1);
+               visl_print_ts_idx(&buf, &buflen, "last_ref_ts",
+                                 run->vp8.frame->last_frame_ts, last);
+               visl_print_ts_idx(&buf, &buflen, "golden_ref_ts",
+                                 run->vp8.frame->golden_frame_ts, golden);
+               visl_print_ts_idx(&buf, &buflen, "alt_ref_ts",
+                                 run->vp8.frame->alt_frame_ts, alt);
+
                break;
        }
 
@@ -117,28 +128,32 @@ static void visl_get_ref_frames(struct visl_ctx *ctx, u8 *buf,
                golden = vb2_find_buffer(cap_q, run->vp9.frame->golden_frame_ts);
                alt = vb2_find_buffer(cap_q, run->vp9.frame->alt_frame_ts);
 
-               scnprintf(buf, buflen,
-                         "last_ref_ts: %llu, vb2_idx: %d\n"
-                         "golden_ref_ts: %llu, vb2_idx: %d\n"
-                         "alt_ref_ts: %llu, vb2_idx: %d\n",
-                         run->vp9.frame->last_frame_ts,
-                         last ? last->index : -1,
-                         run->vp9.frame->golden_frame_ts,
-                         golden ? golden->index : -1,
-                         run->vp9.frame->alt_frame_ts,
-                         alt ? alt->index : -1);
+               visl_print_ts_idx(&buf, &buflen, "last_ref_ts",
+                                 run->vp9.frame->last_frame_ts, last);
+               visl_print_ts_idx(&buf, &buflen, "golden_ref_ts",
+                                 run->vp9.frame->golden_frame_ts, golden);
+               visl_print_ts_idx(&buf, &buflen, "alt_ref_ts",
+                                 run->vp9.frame->alt_frame_ts, alt);
+
                break;
        }
 
        case VISL_CODEC_H264: {
                char entry[] = "dpb[%d]:%u, vb2_index: %d\n";
+               char entry_stable[] = "dpb[%d]:%u\n";
                struct vb2_buffer *vb2_buf;
 
                for (i = 0; i < ARRAY_SIZE(run->h264.dpram->dpb); i++) {
-                       vb2_buf = vb2_find_buffer(cap_q, run->h264.dpram->dpb[i].reference_ts);
-                       len = scnprintf(buf, buflen, entry, i,
-                                       run->h264.dpram->dpb[i].reference_ts,
-                                       vb2_buf ? vb2_buf->index : -1);
+                       vb2_buf = vb2_find_buffer(cap_q,
+                                                 run->h264.dpram->dpb[i].reference_ts);
+                       if (tpg_verbose && vb2_buf) {
+                               len = scnprintf(buf, buflen, entry, i,
+                                               run->h264.dpram->dpb[i].reference_ts,
+                                               vb2_buf->index);
+                       } else {
+                               len = scnprintf(buf, buflen, entry_stable, i,
+                                               run->h264.dpram->dpb[i].reference_ts);
+                       }
                        buf += len;
                        buflen -= len;
                }
@@ -148,13 +163,20 @@ static void visl_get_ref_frames(struct visl_ctx *ctx, u8 *buf,
 
        case VISL_CODEC_HEVC: {
                char entry[] = "dpb[%d]:%u, vb2_index: %d\n";
+               char entry_stable[] = "dpb[%d]:%u\n";
                struct vb2_buffer *vb2_buf;
 
                for (i = 0; i < ARRAY_SIZE(run->hevc.dpram->dpb); i++) {
                        vb2_buf = vb2_find_buffer(cap_q, run->hevc.dpram->dpb[i].timestamp);
-                       len = scnprintf(buf, buflen, entry, i,
-                                       run->hevc.dpram->dpb[i].timestamp,
-                                       vb2_buf ? vb2_buf->index : -1);
+                       if (tpg_verbose && vb2_buf) {
+                               len = scnprintf(buf, buflen, entry, i,
+                                               run->hevc.dpram->dpb[i].timestamp,
+                                               vb2_buf->index);
+                       } else {
+                               len = scnprintf(buf, buflen, entry_stable, i,
+                                               run->hevc.dpram->dpb[i].timestamp);
+                       }
+
                        buf += len;
                        buflen -= len;
                }
@@ -171,43 +193,38 @@ static void visl_get_ref_frames(struct visl_ctx *ctx, u8 *buf,
                int idx_alt2 = run->av1.frame->ref_frame_idx[ALT2_BUF_IDX];
                int idx_alt = run->av1.frame->ref_frame_idx[ALT_BUF_IDX];
 
+               const u64 *reference_frame_ts = run->av1.frame->reference_frame_ts;
+
                struct vb2_buffer *ref_last =
-                       vb2_find_buffer(cap_q, run->av1.frame->reference_frame_ts[idx_last]);
+                       vb2_find_buffer(cap_q, reference_frame_ts[idx_last]);
                struct vb2_buffer *ref_last2 =
-                       vb2_find_buffer(cap_q, run->av1.frame->reference_frame_ts[idx_last2]);
+                       vb2_find_buffer(cap_q, reference_frame_ts[idx_last2]);
                struct vb2_buffer *ref_last3 =
-                       vb2_find_buffer(cap_q, run->av1.frame->reference_frame_ts[idx_last3]);
+                       vb2_find_buffer(cap_q, reference_frame_ts[idx_last3]);
                struct vb2_buffer *ref_golden =
-                       vb2_find_buffer(cap_q, run->av1.frame->reference_frame_ts[idx_golden]);
+                       vb2_find_buffer(cap_q, reference_frame_ts[idx_golden]);
                struct vb2_buffer *ref_bwd =
-                       vb2_find_buffer(cap_q, run->av1.frame->reference_frame_ts[idx_bwd]);
+                       vb2_find_buffer(cap_q, reference_frame_ts[idx_bwd]);
                struct vb2_buffer *ref_alt2 =
-                       vb2_find_buffer(cap_q, run->av1.frame->reference_frame_ts[idx_alt2]);
+                       vb2_find_buffer(cap_q, reference_frame_ts[idx_alt2]);
                struct vb2_buffer *ref_alt =
-                       vb2_find_buffer(cap_q, run->av1.frame->reference_frame_ts[idx_alt]);
-
-               scnprintf(buf, buflen,
-                         "ref_last_ts: %llu, vb2_idx: %d\n"
-                         "ref_last2_ts: %llu, vb2_idx: %d\n"
-                         "ref_last3_ts: %llu, vb2_idx: %d\n"
-                         "ref_golden_ts: %llu, vb2_idx: %d\n"
-                         "ref_bwd_ts: %llu, vb2_idx: %d\n"
-                         "ref_alt2_ts: %llu, vb2_idx: %d\n"
-                         "ref_alt_ts: %llu, vb2_idx: %d\n",
-                         run->av1.frame->reference_frame_ts[idx_last],
-                         ref_last ? ref_last->index : -1,
-                         run->av1.frame->reference_frame_ts[idx_last2],
-                         ref_last2 ? ref_last2->index : -1,
-                         run->av1.frame->reference_frame_ts[idx_last3],
-                         ref_last3 ? ref_last3->index : -1,
-                         run->av1.frame->reference_frame_ts[idx_golden],
-                         ref_golden ? ref_golden->index : -1,
-                         run->av1.frame->reference_frame_ts[idx_bwd],
-                         ref_bwd ? ref_bwd->index : -1,
-                         run->av1.frame->reference_frame_ts[idx_alt2],
-                         ref_alt2 ? ref_alt2->index : -1,
-                         run->av1.frame->reference_frame_ts[idx_alt],
-                         ref_alt ? ref_alt->index : -1);
+                       vb2_find_buffer(cap_q, reference_frame_ts[idx_alt]);
+
+               visl_print_ts_idx(&buf, &buflen, "ref_last_ts",
+                                 reference_frame_ts[idx_last], ref_last);
+               visl_print_ts_idx(&buf, &buflen, "ref_last2_ts",
+                                 reference_frame_ts[idx_last2], ref_last2);
+               visl_print_ts_idx(&buf, &buflen, "ref_last3_ts",
+                                 reference_frame_ts[idx_last3], ref_last3);
+               visl_print_ts_idx(&buf, &buflen, "ref_golden_ts",
+                                 reference_frame_ts[idx_golden], ref_golden);
+               visl_print_ts_idx(&buf, &buflen, "ref_bwd_ts",
+                                 reference_frame_ts[idx_bwd], ref_bwd);
+               visl_print_ts_idx(&buf, &buflen, "ref_alt2_ts",
+                                 reference_frame_ts[idx_alt2], ref_alt2);
+               visl_print_ts_idx(&buf, &buflen, "ref_alt_ts",
+                                 reference_frame_ts[idx_alt], ref_alt);
+
                break;
        }
        }
@@ -254,15 +271,23 @@ static void visl_tpg_fill_sequence(struct visl_ctx *ctx,
                                   struct visl_run *run, char buf[], size_t bufsz)
 {
        u32 stream_ms;
-
-       stream_ms = jiffies_to_msecs(get_jiffies_64() - ctx->capture_streamon_jiffies);
+       int len;
+
+       if (tpg_verbose) {
+               stream_ms = jiffies_to_msecs(get_jiffies_64() - ctx->capture_streamon_jiffies);
+
+               len = scnprintf(buf, bufsz,
+                               "stream time: %02d:%02d:%02d:%03d ",
+                               (stream_ms / (60 * 60 * 1000)) % 24,
+                               (stream_ms / (60 * 1000)) % 60,
+                               (stream_ms / 1000) % 60,
+                               stream_ms % 1000);
+               buf += len;
+               bufsz -= len;
+       }
 
        scnprintf(buf, bufsz,
-                 "stream time: %02d:%02d:%02d:%03d sequence:%u timestamp:%lld field:%s",
-                 (stream_ms / (60 * 60 * 1000)) % 24,
-                 (stream_ms / (60 * 1000)) % 60,
-                 (stream_ms / 1000) % 60,
-                 stream_ms % 1000,
+                 "sequence:%u timestamp:%lld field:%s",
                  run->dst->sequence,
                  run->dst->vb2_buf.timestamp,
                  (run->dst->field == V4L2_FIELD_ALTERNATE) ?
@@ -270,6 +295,35 @@ static void visl_tpg_fill_sequence(struct visl_ctx *ctx,
                  " top" : " bottom") : "none");
 }
 
+static bool visl_tpg_fill_codec_specific(struct visl_ctx *ctx,
+                                        struct visl_run *run,
+                                        char buf[], size_t bufsz)
+{
+       /*
+        * To add variability, we need a value that is stable for a given
+        * input but is different than already shown fields.
+        * The pic order count value defines the display order of the frames
+        * (which can be different than the decoding order that is shown with
+        * the sequence number).
+        * Therefore it is stable for a given input and will add a different
+        * value that is more specific to the way the input is encoded.
+        */
+       switch (ctx->current_codec) {
+       case VISL_CODEC_H264:
+               scnprintf(buf, bufsz,
+                         "H264: %u", run->h264.dpram->pic_order_cnt_lsb);
+               break;
+       case VISL_CODEC_HEVC:
+               scnprintf(buf, bufsz,
+                         "HEVC: %d", run->hevc.dpram->pic_order_cnt_val);
+               break;
+       default:
+               return false;
+       }
+
+       return true;
+}
+
 static void visl_tpg_fill(struct visl_ctx *ctx, struct visl_run *run)
 {
        u8 *basep[TPG_MAX_PLANES][2];
@@ -302,6 +356,13 @@ static void visl_tpg_fill(struct visl_ctx *ctx, struct visl_run *run)
        frame_dprintk(ctx->dev, run->dst->sequence, "");
        line++;
 
+       if (visl_tpg_fill_codec_specific(ctx, run, buf, TPG_STR_BUF_SZ)) {
+               tpg_gen_text(&ctx->tpg, basep, line++ * line_height, 16, buf);
+               frame_dprintk(ctx->dev, run->dst->sequence, "%s\n", buf);
+               frame_dprintk(ctx->dev, run->dst->sequence, "");
+               line++;
+       }
+
        visl_get_ref_frames(ctx, buf, TPG_STR_BUF_SZ, run);
 
        while ((line_str = strsep(&tmp, "\n")) && strlen(line_str)) {
@@ -338,35 +399,37 @@ static void visl_tpg_fill(struct visl_ctx *ctx, struct visl_run *run)
                frame_dprintk(ctx->dev, run->dst->sequence, "%s\n", buf);
        }
 
-       line++;
-       frame_dprintk(ctx->dev, run->dst->sequence, "");
-       scnprintf(buf, TPG_STR_BUF_SZ, "Output queue status:");
-       tpg_gen_text(&ctx->tpg, basep, line++ * line_height, 16, buf);
-       frame_dprintk(ctx->dev, run->dst->sequence, "%s\n", buf);
+       if (tpg_verbose) {
+               line++;
+               frame_dprintk(ctx->dev, run->dst->sequence, "");
+               scnprintf(buf, TPG_STR_BUF_SZ, "Output queue status:");
+               tpg_gen_text(&ctx->tpg, basep, line++ * line_height, 16, buf);
+               frame_dprintk(ctx->dev, run->dst->sequence, "%s\n", buf);
 
-       len = 0;
-       for (i = 0; i < vb2_get_num_buffers(out_q); i++) {
-               char entry[] = "index: %u, state: %s, request_fd: %d, ";
-               u32 old_len = len;
-               struct vb2_buffer *vb2;
-               char *q_status;
+               len = 0;
+               for (i = 0; i < vb2_get_num_buffers(out_q); i++) {
+                       char entry[] = "index: %u, state: %s, request_fd: %d, ";
+                       u32 old_len = len;
+                       struct vb2_buffer *vb2;
+                       char *q_status;
 
-               vb2 = vb2_get_buffer(out_q, i);
-               if (!vb2)
-                       continue;
+                       vb2 = vb2_get_buffer(out_q, i);
+                       if (!vb2)
+                               continue;
 
-               q_status = visl_get_vb2_state(vb2->state);
+                       q_status = visl_get_vb2_state(vb2->state);
 
-               len += scnprintf(&buf[len], TPG_STR_BUF_SZ - len,
-                                entry, i, q_status,
-                                to_vb2_v4l2_buffer(vb2)->request_fd);
+                       len += scnprintf(&buf[len], TPG_STR_BUF_SZ - len,
+                                        entry, i, q_status,
+                                        to_vb2_v4l2_buffer(vb2)->request_fd);
 
-               len += visl_fill_bytesused(to_vb2_v4l2_buffer(vb2),
-                                          &buf[len],
-                                          TPG_STR_BUF_SZ - len);
+                       len += visl_fill_bytesused(to_vb2_v4l2_buffer(vb2),
+                                                  &buf[len],
+                                                  TPG_STR_BUF_SZ - len);
 
-               tpg_gen_text(&ctx->tpg, basep, line++ * line_height, 16, &buf[old_len]);
-               frame_dprintk(ctx->dev, run->dst->sequence, "%s", &buf[old_len]);
+                       tpg_gen_text(&ctx->tpg, basep, line++ * line_height, 16, &buf[old_len]);
+                       frame_dprintk(ctx->dev, run->dst->sequence, "%s", &buf[old_len]);
+               }
        }
 
        line++;
@@ -398,32 +461,34 @@ static void visl_tpg_fill(struct visl_ctx *ctx, struct visl_run *run)
                frame_dprintk(ctx->dev, run->dst->sequence, "%s\n", buf);
        }
 
-       line++;
-       frame_dprintk(ctx->dev, run->dst->sequence, "");
-       scnprintf(buf, TPG_STR_BUF_SZ, "Capture queue status:");
-       tpg_gen_text(&ctx->tpg, basep, line++ * line_height, 16, buf);
-       frame_dprintk(ctx->dev, run->dst->sequence, "%s\n", buf);
+       if (tpg_verbose) {
+               line++;
+               frame_dprintk(ctx->dev, run->dst->sequence, "");
+               scnprintf(buf, TPG_STR_BUF_SZ, "Capture queue status:");
+               tpg_gen_text(&ctx->tpg, basep, line++ * line_height, 16, buf);
+               frame_dprintk(ctx->dev, run->dst->sequence, "%s\n", buf);
 
-       len = 0;
-       for (i = 0; i < vb2_get_num_buffers(cap_q); i++) {
-               u32 old_len = len;
-               struct vb2_buffer *vb2;
-               char *q_status;
+               len = 0;
+               for (i = 0; i < vb2_get_num_buffers(cap_q); i++) {
+                       u32 old_len = len;
+                       struct vb2_buffer *vb2;
+                       char *q_status;
 
-               vb2 = vb2_get_buffer(cap_q, i);
-               if (!vb2)
-                       continue;
+                       vb2 = vb2_get_buffer(cap_q, i);
+                       if (!vb2)
+                               continue;
 
-               q_status = visl_get_vb2_state(vb2->state);
+                       q_status = visl_get_vb2_state(vb2->state);
 
-               len += scnprintf(&buf[len], TPG_STR_BUF_SZ - len,
-                                "index: %u, status: %s, timestamp: %llu, is_held: %d",
-                                vb2->index, q_status,
-                                vb2->timestamp,
-                                to_vb2_v4l2_buffer(vb2)->is_held);
+                       len += scnprintf(&buf[len], TPG_STR_BUF_SZ - len,
+                                        "index: %u, status: %s, timestamp: %llu, is_held: %d",
+                                        vb2->index, q_status,
+                                        vb2->timestamp,
+                                        to_vb2_v4l2_buffer(vb2)->is_held);
 
-               tpg_gen_text(&ctx->tpg, basep, line++ * line_height, 16, &buf[old_len]);
-               frame_dprintk(ctx->dev, run->dst->sequence, "%s", &buf[old_len]);
+                       tpg_gen_text(&ctx->tpg, basep, line++ * line_height, 16, &buf[old_len]);
+                       frame_dprintk(ctx->dev, run->dst->sequence, "%s", &buf[old_len]);
+               }
        }
 }
 
index c593b1337f116333afa865fdf89fec564d9ce678..434e9efbf9b21db4b1b903035d18e2a12f716947 100644 (file)
@@ -85,6 +85,7 @@ extern unsigned int visl_dprintk_nframes;
 extern bool keep_bitstream_buffers;
 extern int bitstream_trace_frame_start;
 extern unsigned int bitstream_trace_nframes;
+extern bool tpg_verbose;
 
 #define frame_dprintk(dev, current, fmt, arg...) \
        do { \
index f0371d004b36dcf3b3f2fc3b434d235cf632e2b8..a7e721baaa997f24e20e152dff8c5494fee839e5 100644 (file)
@@ -470,7 +470,6 @@ static int tda18271_powerscan(struct dvb_frontend *fe,
        /* algorithm initialization */
        sgn = 1;
        *freq_out = *freq_in;
-       bcal = 0;
        count = 0;
        wait = false;
 
index 57ded9ff3f04350df4c7705487c0110d0437d195..29bc63021c5aae978b7937b31b9eb81fc03a4fde 100644 (file)
@@ -1515,10 +1515,10 @@ static int xc4000_get_frequency(struct dvb_frontend *fe, u32 *freq)
 {
        struct xc4000_priv *priv = fe->tuner_priv;
 
+       mutex_lock(&priv->lock);
        *freq = priv->freq_hz + priv->freq_offset;
 
        if (debug) {
-               mutex_lock(&priv->lock);
                if ((priv->cur_fw.type
                     & (BASE | FM | DTV6 | DTV7 | DTV78 | DTV8)) == BASE) {
                        u16     snr = 0;
@@ -1529,8 +1529,8 @@ static int xc4000_get_frequency(struct dvb_frontend *fe, u32 *freq)
                                return 0;
                        }
                }
-               mutex_unlock(&priv->lock);
        }
+       mutex_unlock(&priv->lock);
 
        dprintk(1, "%s()\n", __func__);
 
index 3b75d062e60258e0d674ef8b1b1fe447621b0ec8..343a4433ed24ca608a22cdd763ffed6207e60bb4 100644 (file)
@@ -1759,7 +1759,7 @@ int cx231xx_417_register(struct cx231xx *dev)
        dev->mpeg_ctrl_handler.ops = &cx231xx_ops;
        if (dev->sd_cx25840)
                v4l2_ctrl_add_handler(&dev->mpeg_ctrl_handler.hdl,
-                               dev->sd_cx25840->ctrl_handler, NULL, false);
+                               dev->sd_cx25840->ctrl_handler, NULL, true);
        if (dev->mpeg_ctrl_handler.hdl.error) {
                err = dev->mpeg_ctrl_handler.hdl.error;
                dprintk(3, "%s: can't add cx25840 controls\n", dev->name);
index 0990aa4a17bb9fc1e2955fa53a7498ada27396b6..cbb0541d4dc1f6fabe921a658f1872a2855bc4a4 100644 (file)
@@ -126,8 +126,6 @@ struct usb_data_stream_properties {
  * @caps: capabilities of the DVB USB device.
  * @pid_filter_count: number of PID filter position in the optional hardware
  *  PID-filter.
- * @num_frontends: number of frontends of the DVB USB adapter.
- * @frontend_ctrl: called to power on/off active frontend.
  * @streaming_ctrl: called to start and stop the MPEG2-TS streaming of the
  *  device (not URB submitting/killing).
  *  This callback will be called without data URBs being active - data URBs
index 4d037c92af7c58a29fc39b3ab7d2ce593f1503cd..bae76023cf71d33cc7a85cdc4402c6d3a26f0d4b 100644 (file)
@@ -4094,6 +4094,10 @@ static int em28xx_usb_probe(struct usb_interface *intf,
         * topology will likely change after the load of the em28xx subdrivers.
         */
 #ifdef CONFIG_MEDIA_CONTROLLER
+       /*
+        * No need to check the return value, the device will still be
+        * usable without media controller API.
+        */
        retval = media_device_register(dev->media_dev);
 #endif
 
index 0c24e2984304874631c09dde658c3946463627ab..eb03f98b2ef1136079f9d1607f466e22cf0c9242 100644 (file)
@@ -80,7 +80,7 @@ static int go7007_load_encoder(struct go7007 *go)
        const struct firmware *fw_entry;
        char fw_name[] = "go7007/go7007fw.bin";
        void *bounce;
-       int fw_len, rv = 0;
+       int fw_len;
        u16 intr_val, intr_data;
 
        if (go->boot_fw == NULL) {
@@ -109,9 +109,11 @@ static int go7007_load_encoder(struct go7007 *go)
            go7007_read_interrupt(go, &intr_val, &intr_data) < 0 ||
                        (intr_val & ~0x1) != 0x5a5a) {
                v4l2_err(go, "error transferring firmware\n");
-               rv = -1;
+               kfree(go->boot_fw);
+               go->boot_fw = NULL;
+               return -1;
        }
-       return rv;
+       return 0;
 }
 
 MODULE_FIRMWARE("go7007/go7007fw.bin");
index eeb85981e02b67fcdb30307185e1a305c9a54d5e..762c13e49bfa5c76cf218dd942feb7217d4b9603 100644 (file)
@@ -1201,7 +1201,9 @@ static int go7007_usb_probe(struct usb_interface *intf,
                                u16 channel;
 
                                /* read channel number from GPIO[1:0] */
-                               go7007_read_addr(go, 0x3c81, &channel);
+                               if (go7007_read_addr(go, 0x3c81, &channel))
+                                       goto allocfail;
+
                                channel &= 0x3;
                                go->board_id = GO7007_BOARDID_ADLINK_MPG24;
                                usb->board = board = &board_adlink_mpg24;
index 1764674de98bc062faf5861681690aa19586e1ed..73c95ba2328a41b14a340847dbd864ae72040196 100644 (file)
@@ -90,8 +90,10 @@ static void pvr2_context_destroy(struct pvr2_context *mp)
 }
 
 
-static void pvr2_context_notify(struct pvr2_context *mp)
+static void pvr2_context_notify(void *ptr)
 {
+       struct pvr2_context *mp = ptr;
+
        pvr2_context_set_notify(mp,!0);
 }
 
@@ -106,9 +108,7 @@ static void pvr2_context_check(struct pvr2_context *mp)
                pvr2_trace(PVR2_TRACE_CTXT,
                           "pvr2_context %p (initialize)", mp);
                /* Finish hardware initialization */
-               if (pvr2_hdw_initialize(mp->hdw,
-                                       (void (*)(void *))pvr2_context_notify,
-                                       mp)) {
+               if (pvr2_hdw_initialize(mp->hdw, pvr2_context_notify, mp)) {
                        mp->video_stream.stream =
                                pvr2_hdw_get_video_stream(mp->hdw);
                        /* Trigger interface initialization.  By doing this
@@ -267,9 +267,9 @@ static void pvr2_context_exit(struct pvr2_context *mp)
 void pvr2_context_disconnect(struct pvr2_context *mp)
 {
        pvr2_hdw_disconnect(mp->hdw);
-       mp->disconnect_flag = !0;
        if (!pvr2_context_shutok())
                pvr2_context_notify(mp);
+       mp->disconnect_flag = !0;
 }
 
 
index 26811efe0fb58b32dbfe05c63cb1380e3704aa5c..3610139fb9ad7ffd49a7c01e8acd0d9db7c47b40 100644 (file)
@@ -33,9 +33,6 @@ static int pvr2_dvb_feed_func(struct pvr2_dvb_adapter *adap)
        for (;;) {
                if (kthread_should_stop()) break;
 
-               /* Not sure about this... */
-               try_to_freeze();
-
                bp = pvr2_stream_get_ready_buffer(stream);
                if (bp != NULL) {
                        count = pvr2_buffer_get_count(bp);
@@ -62,8 +59,7 @@ static int pvr2_dvb_feed_func(struct pvr2_dvb_adapter *adap)
 
                /* Wait until more buffers become available or we're
                   told not to wait any longer. */
-               ret = wait_event_interruptible(
-                   adap->buffer_wait_data,
+               ret = wait_event_freezable(adap->buffer_wait_data,
                    (pvr2_stream_get_ready_count(stream) > 0) ||
                    kthread_should_stop());
                if (ret < 0) break;
@@ -88,8 +84,10 @@ static int pvr2_dvb_feed_thread(void *data)
        return stat;
 }
 
-static void pvr2_dvb_notify(struct pvr2_dvb_adapter *adap)
+static void pvr2_dvb_notify(void *ptr)
 {
+       struct pvr2_dvb_adapter *adap = ptr;
+
        wake_up(&adap->buffer_wait_data);
 }
 
@@ -149,7 +147,7 @@ static int pvr2_dvb_stream_do_start(struct pvr2_dvb_adapter *adap)
        }
 
        pvr2_stream_set_callback(pvr->video_stream.stream,
-                                (pvr2_stream_callback) pvr2_dvb_notify, adap);
+                                pvr2_dvb_notify, adap);
 
        ret = pvr2_stream_set_buffer_count(stream, PVR2_DVB_BUFFER_COUNT);
        if (ret < 0) return ret;
index c04ab7258d645266c05bf9adc117566e9a1d962e..d608b793fa847b72c19ea78b4250283b9bf05f4b 100644 (file)
@@ -1033,8 +1033,10 @@ static int pvr2_v4l2_open(struct file *file)
 }
 
 
-static void pvr2_v4l2_notify(struct pvr2_v4l2_fh *fhp)
+static void pvr2_v4l2_notify(void *ptr)
 {
+       struct pvr2_v4l2_fh *fhp = ptr;
+
        wake_up(&fhp->wait_data);
 }
 
@@ -1067,7 +1069,7 @@ static int pvr2_v4l2_iosetup(struct pvr2_v4l2_fh *fh)
 
        hdw = fh->channel.mc_head->hdw;
        sp = fh->pdi->stream->stream;
-       pvr2_stream_set_callback(sp,(pvr2_stream_callback)pvr2_v4l2_notify,fh);
+       pvr2_stream_set_callback(sp, pvr2_v4l2_notify, fh);
        pvr2_hdw_set_stream_type(hdw,fh->pdi->config);
        if ((ret = pvr2_hdw_set_streaming(hdw,!0)) < 0) return ret;
        return pvr2_ioread_set_enabled(fh->rhp,!0);
@@ -1198,11 +1200,6 @@ static void pvr2_v4l2_dev_init(struct pvr2_v4l2_dev *dip,
                dip->minor_type = pvr2_v4l_type_video;
                nr_ptr = video_nr;
                caps |= V4L2_CAP_VIDEO_CAPTURE | V4L2_CAP_AUDIO;
-               if (!dip->stream) {
-                       pr_err(KBUILD_MODNAME
-                               ": Failed to set up pvrusb2 v4l video dev due to missing stream instance\n");
-                       return;
-               }
                break;
        case VFL_TYPE_VBI:
                dip->config = pvr2_config_vbi;
index 3c2627712fe9d54273431b99911c9565fb70a046..8e1de1e8bd127dfdfbb6434d8e55a7f55f016d80 100644 (file)
@@ -1906,9 +1906,10 @@ static int s2255_get_fx2fw(struct s2255_dev *dev)
 {
        int fw;
        int ret;
-       unsigned char transBuffer[64];
-       ret = s2255_vendor_req(dev, S2255_VR_FW, 0, 0, transBuffer, 2,
-                              S2255_VR_IN);
+       u8 transBuffer[2] = {};
+
+       ret = s2255_vendor_req(dev, S2255_VR_FW, 0, 0, transBuffer,
+                              sizeof(transBuffer), S2255_VR_IN);
        if (ret < 0)
                dprintk(dev, 2, "get fw error: %x\n", ret);
        fw = transBuffer[0] + (transBuffer[1] << 8);
index 9d9e14c858e670d6588e87808c41c058a8d2e1e9..723510520d092b112474a4571ef486333c279f3e 100644 (file)
@@ -724,5 +724,5 @@ static struct usb_driver smsusb_driver = {
 module_usb_driver(smsusb_driver);
 
 MODULE_DESCRIPTION("Driver for the Siano SMS1xxx USB dongle");
-MODULE_AUTHOR("Siano Mobile Silicon, INC. (uris@siano-ms.com)");
+MODULE_AUTHOR("Siano Mobile Silicon, Inc. <uris@siano-ms.com>");
 MODULE_LICENSE("GPL");
index 62a583040cd480f22e61759949972f86aba9735d..702f1c8bd2ab3d8cff01bff379dbfc9a11497dc2 100644 (file)
@@ -963,15 +963,8 @@ ctrl_fail:
 
 void usbtv_video_free(struct usbtv *usbtv)
 {
-       mutex_lock(&usbtv->vb2q_lock);
-       mutex_lock(&usbtv->v4l2_lock);
-
-       usbtv_stop(usbtv);
        vb2_video_unregister_device(&usbtv->vdev);
        v4l2_device_disconnect(&usbtv->v4l2_dev);
 
-       mutex_unlock(&usbtv->v4l2_lock);
-       mutex_unlock(&usbtv->vb2q_lock);
-
        v4l2_device_put(&usbtv->v4l2_dev);
 }
index 10005c80f43b5f1dc83ae3cafd72f8e3511f6261..ee3475bed37fab2c370387efa3355522f3713a2e 100644 (file)
@@ -32,7 +32,7 @@ int cci_read(struct regmap *map, u32 reg, u64 *val, int *err)
 
        ret = regmap_bulk_read(map, reg, buf, len);
        if (ret) {
-               dev_err(regmap_get_device(map), "Error reading reg 0x%4x: %d\n",
+               dev_err(regmap_get_device(map), "Error reading reg 0x%04x: %d\n",
                        reg, ret);
                goto out;
        }
@@ -131,7 +131,7 @@ int cci_write(struct regmap *map, u32 reg, u64 val, int *err)
 
        ret = regmap_bulk_write(map, reg, buf, len);
        if (ret)
-               dev_err(regmap_get_device(map), "Error writing reg 0x%4x: %d\n",
+               dev_err(regmap_get_device(map), "Error writing reg 0x%04x: %d\n",
                        reg, ret);
 
 out:
index 273d83de2a876316ce9baf55320e21e081a95ce8..d34d210908d967445c374cc8e396a529af6a4ed5 100644 (file)
@@ -585,3 +585,50 @@ u32 v4l2_fraction_to_interval(u32 numerator, u32 denominator)
        return denominator ? numerator * multiplier / denominator : 0;
 }
 EXPORT_SYMBOL_GPL(v4l2_fraction_to_interval);
+
+int v4l2_link_freq_to_bitmap(struct device *dev, const u64 *fw_link_freqs,
+                            unsigned int num_of_fw_link_freqs,
+                            const s64 *driver_link_freqs,
+                            unsigned int num_of_driver_link_freqs,
+                            unsigned long *bitmap)
+{
+       unsigned int i;
+
+       *bitmap = 0;
+
+       if (!num_of_fw_link_freqs) {
+               dev_err(dev, "no link frequencies in firmware\n");
+               return -ENODATA;
+       }
+
+       for (i = 0; i < num_of_fw_link_freqs; i++) {
+               unsigned int j;
+
+               for (j = 0; j < num_of_driver_link_freqs; j++) {
+                       if (fw_link_freqs[i] != driver_link_freqs[j])
+                               continue;
+
+                       dev_dbg(dev, "enabling link frequency %lld Hz\n",
+                               driver_link_freqs[j]);
+                       *bitmap |= BIT(j);
+                       break;
+               }
+       }
+
+       if (!*bitmap) {
+               dev_err(dev, "no matching link frequencies found\n");
+
+               dev_dbg(dev, "specified in firmware:\n");
+               for (i = 0; i < num_of_fw_link_freqs; i++)
+                       dev_dbg(dev, "\t%llu Hz\n", fw_link_freqs[i]);
+
+               dev_dbg(dev, "driver supported:\n");
+               for (i = 0; i < num_of_driver_link_freqs; i++)
+                       dev_dbg(dev, "\t%lld Hz\n", driver_link_freqs[i]);
+
+               return -ENOENT;
+       }
+
+       return 0;
+}
+EXPORT_SYMBOL_GPL(v4l2_link_freq_to_bitmap);
index 002ea6588edf163d116ccfff8be7cf18cfe733da..d9a422017bd9d05bb0f8c74a4bc0cb7210316bf3 100644 (file)
@@ -1179,7 +1179,7 @@ int v4l2_querymenu(struct v4l2_ctrl_handler *hdl, struct v4l2_querymenu *qm)
                return -EINVAL;
 
        /* Use mask to see if this menu item should be skipped */
-       if (ctrl->menu_skip_mask & (1ULL << i))
+       if (i < BITS_PER_LONG_LONG && (ctrl->menu_skip_mask & BIT_ULL(i)))
                return -EINVAL;
        /* Empty menu items should also be skipped */
        if (ctrl->type == V4L2_CTRL_TYPE_MENU) {
index a662fb60f73f42166b53c983a6468a948f6b4d3e..c4d995f32191c9dfce3ff0dbc430f89a02a61d31 100644 (file)
@@ -1504,11 +1504,12 @@ int check_range(enum v4l2_ctrl_type type,
                return 0;
        case V4L2_CTRL_TYPE_MENU:
        case V4L2_CTRL_TYPE_INTEGER_MENU:
-               if (min > max || def < min || def > max)
+               if (min > max || def < min || def > max ||
+                   min < 0 || (step && max >= BITS_PER_LONG_LONG))
                        return -ERANGE;
                /* Note: step == menu_skip_mask for menu controls.
                   So here we check if the default value is masked out. */
-               if (step && ((1 << def) & step))
+               if (def < BITS_PER_LONG_LONG && (step & BIT_ULL(def)))
                        return -EINVAL;
                return 0;
        case V4L2_CTRL_TYPE_STRING:
@@ -2503,7 +2504,8 @@ int v4l2_ctrl_handler_setup(struct v4l2_ctrl_handler *hdl)
 EXPORT_SYMBOL(v4l2_ctrl_handler_setup);
 
 /* Log the control name and value */
-static void log_ctrl(const struct v4l2_ctrl *ctrl,
+static void log_ctrl(const struct v4l2_ctrl_handler *hdl,
+                    struct v4l2_ctrl *ctrl,
                     const char *prefix, const char *colon)
 {
        if (ctrl->flags & (V4L2_CTRL_FLAG_DISABLED | V4L2_CTRL_FLAG_WRITE_ONLY))
@@ -2513,7 +2515,11 @@ static void log_ctrl(const struct v4l2_ctrl *ctrl,
 
        pr_info("%s%s%s: ", prefix, colon, ctrl->name);
 
+       if (ctrl->handler != hdl)
+               v4l2_ctrl_lock(ctrl);
        ctrl->type_ops->log(ctrl);
+       if (ctrl->handler != hdl)
+               v4l2_ctrl_unlock(ctrl);
 
        if (ctrl->flags & (V4L2_CTRL_FLAG_INACTIVE |
                           V4L2_CTRL_FLAG_GRABBED |
@@ -2532,7 +2538,7 @@ static void log_ctrl(const struct v4l2_ctrl *ctrl,
 void v4l2_ctrl_handler_log_status(struct v4l2_ctrl_handler *hdl,
                                  const char *prefix)
 {
-       struct v4l2_ctrl *ctrl;
+       struct v4l2_ctrl_ref *ref;
        const char *colon = "";
        int len;
 
@@ -2544,9 +2550,12 @@ void v4l2_ctrl_handler_log_status(struct v4l2_ctrl_handler *hdl,
        if (len && prefix[len - 1] != ' ')
                colon = ": ";
        mutex_lock(hdl->lock);
-       list_for_each_entry(ctrl, &hdl->ctrls, node)
-               if (!(ctrl->flags & V4L2_CTRL_FLAG_DISABLED))
-                       log_ctrl(ctrl, prefix, colon);
+       list_for_each_entry(ref, &hdl->ctrl_refs, node) {
+               if (ref->from_other_dev ||
+                   (ref->ctrl->flags & V4L2_CTRL_FLAG_DISABLED))
+                       continue;
+               log_ctrl(hdl, ref->ctrl, prefix, colon);
+       }
        mutex_unlock(hdl->lock);
 }
 EXPORT_SYMBOL(v4l2_ctrl_handler_log_status);
index 33076af4dfdbd4d6298085801cf23ebc353225ae..6e7b8b682d13f4b3c739dafe962ee7a3006c4772 100644 (file)
@@ -3028,7 +3028,7 @@ static long __video_do_ioctl(struct file *file,
        if (v4l2_is_known_ioctl(cmd)) {
                info = &v4l2_ioctls[_IOC_NR(cmd)];
 
-               if (!test_bit(_IOC_NR(cmd), vfd->valid_ioctls) &&
+               if (!is_valid_ioctl(vfd, cmd) &&
                    !((info->flags & INFO_FL_CTRL) && vfh && vfh->ctrl_handler))
                        goto done;
 
index 52d349e72b8ca52217d0f67d1071020a1f0b29d0..4bb91359e3a9a7f6c5477668ddf13cf49f85e73c 100644 (file)
@@ -337,12 +337,18 @@ int v4l2_create_fwnode_links_to_pad(struct v4l2_subdev *src_sd,
                src_idx = media_entity_get_fwnode_pad(&src_sd->entity,
                                                      endpoint,
                                                      MEDIA_PAD_FL_SOURCE);
-               if (src_idx < 0)
+               if (src_idx < 0) {
+                       dev_dbg(src_sd->dev, "no source pad found for %pfw\n",
+                               endpoint);
                        continue;
+               }
 
                remote_ep = fwnode_graph_get_remote_endpoint(endpoint);
-               if (!remote_ep)
+               if (!remote_ep) {
+                       dev_dbg(src_sd->dev, "no remote ep found for %pfw\n",
+                               endpoint);
                        continue;
+               }
 
                /*
                 * ask the sink to verify it owns the remote endpoint,
@@ -353,8 +359,12 @@ int v4l2_create_fwnode_links_to_pad(struct v4l2_subdev *src_sd,
                                                       MEDIA_PAD_FL_SINK);
                fwnode_handle_put(remote_ep);
 
-               if (sink_idx < 0 || sink_idx != sink->index)
+               if (sink_idx < 0 || sink_idx != sink->index) {
+                       dev_dbg(src_sd->dev,
+                               "sink pad index mismatch or error (is %d, expected %u)\n",
+                               sink_idx, sink->index);
                        continue;
+               }
 
                /*
                 * the source endpoint corresponds to one of its source pads,
@@ -367,8 +377,13 @@ int v4l2_create_fwnode_links_to_pad(struct v4l2_subdev *src_sd,
                src = &src_sd->entity.pads[src_idx];
 
                /* skip if link already exists */
-               if (media_entity_find_link(src, sink))
+               if (media_entity_find_link(src, sink)) {
+                       dev_dbg(src_sd->dev,
+                               "link %s:%d -> %s:%d already exists\n",
+                               src_sd->entity.name, src_idx,
+                               sink->entity->name, sink_idx);
                        continue;
+               }
 
                dev_dbg(src_sd->dev, "creating link %s:%d -> %s:%d\n",
                        src_sd->entity.name, src_idx,
index 9e983176542be07d0ce0a974d0ddb68ac490ae9f..75517134a5e94586f2a05dd0b16aa2319ec45e8a 100644 (file)
@@ -1087,11 +1087,17 @@ static int v4l2_m2m_register_entity(struct media_device *mdev,
        entity->function = function;
 
        ret = media_entity_pads_init(entity, num_pads, pads);
-       if (ret)
+       if (ret) {
+               kfree(entity->name);
+               entity->name = NULL;
                return ret;
+       }
        ret = media_device_register_entity(mdev, entity);
-       if (ret)
+       if (ret) {
+               kfree(entity->name);
+               entity->name = NULL;
                return ret;
+       }
 
        return 0;
 }
index 0581f855c72e89154188412a35dc823a4d706d08..c459f709107b7cf65782d900dab7a50c7db3b346 100644 (file)
@@ -1401,7 +1401,6 @@ static struct pci_driver mptfc_driver = {
 static int
 mptfc_event_process(MPT_ADAPTER *ioc, EventNotificationReply_t *pEvReply)
 {
-       MPT_SCSI_HOST *hd;
        u8 event = le32_to_cpu(pEvReply->Event) & 0xFF;
        unsigned long flags;
        int rc=1;
@@ -1412,8 +1411,7 @@ mptfc_event_process(MPT_ADAPTER *ioc, EventNotificationReply_t *pEvReply)
        devtverboseprintk(ioc, printk(MYIOC_s_DEBUG_FMT "MPT event (=%02Xh) routed to SCSI host driver!\n",
                        ioc->name, event));
 
-       if (ioc->sh == NULL ||
-               ((hd = shost_priv(ioc->sh)) == NULL))
+       if (ioc->sh == NULL || shost_priv(ioc->sh) == NULL)
                return 1;
 
        switch (event) {
index 629edb6486deaef514ba15a52176b84fc17465e7..3557d78ee47a27bb61d6248bfa7c2133a19bb917 100644 (file)
@@ -227,7 +227,7 @@ static int atomic_pte_lookup(struct vm_area_struct *vma, unsigned long vaddr,
        if (unlikely(pmd_none(*pmdp)))
                goto err;
 #ifdef CONFIG_X86_64
-       if (unlikely(pmd_large(*pmdp)))
+       if (unlikely(pmd_leaf(*pmdp)))
                pte = ptep_get((pte_t *)pmdp);
        else
 #endif
index df589d9b4d705553ead2b0948fa8d3e19ade13d9..9f2223d3e8e119a96e22fd1bbe399d63abf9bcfc 100644 (file)
@@ -2411,7 +2411,7 @@ static int cfi_amdstd_panic_write(struct mtd_info *mtd, loff_t to, size_t len,
 static int __xipram do_erase_chip(struct map_info *map, struct flchip *chip)
 {
        struct cfi_private *cfi = map->fldrv_priv;
-       unsigned long timeo = jiffies + HZ;
+       unsigned long timeo;
        unsigned long int adr;
        DECLARE_WAITQUEUE(wait, current);
        int ret;
@@ -2512,7 +2512,7 @@ static int __xipram do_erase_chip(struct map_info *map, struct flchip *chip)
 static int __xipram do_erase_oneblock(struct map_info *map, struct flchip *chip, unsigned long adr, int len, void *thunk)
 {
        struct cfi_private *cfi = map->fldrv_priv;
-       unsigned long timeo = jiffies + HZ;
+       unsigned long timeo;
        DECLARE_WAITQUEUE(wait, current);
        int ret;
        int retry_cnt = 0;
index e098ae937ce88a4deb7e1f3fee659146f4661358..8a8b19874e2392f076ead223ce3f443434dd85d7 100644 (file)
@@ -341,13 +341,6 @@ config MTD_UCLINUX
        help
          Map driver to support image based filesystems for uClinux.
 
-config MTD_INTEL_VR_NOR
-       tristate "NOR flash on Intel Vermilion Range Expansion Bus CS0"
-       depends on PCI
-       help
-         Map driver for a NOR flash bank located on the Expansion Bus of the
-         Intel Vermilion Range chipset.
-
 config MTD_PLATRAM
        tristate "Map driver for platform device RAM (mtd-ram)"
        select MTD_RAM
index 094cfb244086516ba2ed6bb557191e4ca73b6976..a9083c888e3b8accfedbc7158d7e642434552741 100644 (file)
@@ -40,6 +40,5 @@ obj-$(CONFIG_MTD_UCLINUX)     += uclinux.o
 obj-$(CONFIG_MTD_NETtel)       += nettel.o
 obj-$(CONFIG_MTD_SCB2_FLASH)   += scb2_flash.o
 obj-$(CONFIG_MTD_PLATRAM)      += plat-ram.o
-obj-$(CONFIG_MTD_INTEL_VR_NOR) += intel_vr_nor.o
 obj-$(CONFIG_MTD_VMU)          += vmu-flash.o
 obj-$(CONFIG_MTD_LANTIQ)       += lantiq-flash.o
diff --git a/drivers/mtd/maps/intel_vr_nor.c b/drivers/mtd/maps/intel_vr_nor.c
deleted file mode 100644 (file)
index d67b845..0000000
+++ /dev/null
@@ -1,265 +0,0 @@
-/*
- * drivers/mtd/maps/intel_vr_nor.c
- *
- * An MTD map driver for a NOR flash bank on the Expansion Bus of the Intel
- * Vermilion Range chipset.
- *
- * The Vermilion Range Expansion Bus supports four chip selects, each of which
- * has 64MiB of address space.  The 2nd BAR of the Expansion Bus PCI Device
- * is a 256MiB memory region containing the address spaces for all four of the
- * chip selects, with start addresses hardcoded on 64MiB boundaries.
- *
- * This map driver only supports NOR flash on chip select 0.  The buswidth
- * (either 8 bits or 16 bits) is determined by reading the Expansion Bus Timing
- * and Control Register for Chip Select 0 (EXP_TIMING_CS0).  This driver does
- * not modify the value in the EXP_TIMING_CS0 register except to enable writing
- * and disable boot acceleration.  The timing parameters in the register are
- * assumed to have been properly initialized by the BIOS.  The reset default
- * timing parameters are maximally conservative (slow), so access to the flash
- * will be slower than it should be if the BIOS has not initialized the timing
- * parameters.
- *
- * Author: Andy Lowe <alowe@mvista.com>
- *
- * 2006 (c) MontaVista Software, Inc. This file is licensed under
- * the terms of the GNU General Public License version 2. This program
- * is licensed "as is" without any warranty of any kind, whether express
- * or implied.
- */
-
-#include <linux/module.h>
-#include <linux/kernel.h>
-#include <linux/slab.h>
-#include <linux/pci.h>
-#include <linux/mtd/mtd.h>
-#include <linux/mtd/map.h>
-#include <linux/mtd/partitions.h>
-#include <linux/mtd/cfi.h>
-#include <linux/mtd/flashchip.h>
-
-#define DRV_NAME "vr_nor"
-
-struct vr_nor_mtd {
-       void __iomem *csr_base;
-       struct map_info map;
-       struct mtd_info *info;
-       struct pci_dev *dev;
-};
-
-/* Expansion Bus Configuration and Status Registers are in BAR 0 */
-#define EXP_CSR_MBAR 0
-/* Expansion Bus Memory Window is BAR 1 */
-#define EXP_WIN_MBAR 1
-/* Maximum address space for Chip Select 0 is 64MiB */
-#define CS0_SIZE 0x04000000
-/* Chip Select 0 is at offset 0 in the Memory Window */
-#define CS0_START 0x0
-/* Chip Select 0 Timing Register is at offset 0 in CSR */
-#define EXP_TIMING_CS0 0x00
-#define TIMING_CS_EN           (1 << 31)       /* Chip Select Enable */
-#define TIMING_BOOT_ACCEL_DIS  (1 <<  8)       /* Boot Acceleration Disable */
-#define TIMING_WR_EN           (1 <<  1)       /* Write Enable */
-#define TIMING_BYTE_EN         (1 <<  0)       /* 8-bit vs 16-bit bus */
-#define TIMING_MASK            0x3FFF0000
-
-static void vr_nor_destroy_partitions(struct vr_nor_mtd *p)
-{
-       mtd_device_unregister(p->info);
-}
-
-static int vr_nor_init_partitions(struct vr_nor_mtd *p)
-{
-       /* register the flash bank */
-       /* partition the flash bank */
-       return mtd_device_register(p->info, NULL, 0);
-}
-
-static void vr_nor_destroy_mtd_setup(struct vr_nor_mtd *p)
-{
-       map_destroy(p->info);
-}
-
-static int vr_nor_mtd_setup(struct vr_nor_mtd *p)
-{
-       static const char * const probe_types[] =
-           { "cfi_probe", "jedec_probe", NULL };
-       const char * const *type;
-
-       for (type = probe_types; !p->info && *type; type++)
-               p->info = do_map_probe(*type, &p->map);
-       if (!p->info)
-               return -ENODEV;
-
-       p->info->dev.parent = &p->dev->dev;
-
-       return 0;
-}
-
-static void vr_nor_destroy_maps(struct vr_nor_mtd *p)
-{
-       unsigned int exp_timing_cs0;
-
-       /* write-protect the flash bank */
-       exp_timing_cs0 = readl(p->csr_base + EXP_TIMING_CS0);
-       exp_timing_cs0 &= ~TIMING_WR_EN;
-       writel(exp_timing_cs0, p->csr_base + EXP_TIMING_CS0);
-
-       /* unmap the flash window */
-       iounmap(p->map.virt);
-
-       /* unmap the csr window */
-       iounmap(p->csr_base);
-}
-
-/*
- * Initialize the map_info structure and map the flash.
- * Returns 0 on success, nonzero otherwise.
- */
-static int vr_nor_init_maps(struct vr_nor_mtd *p)
-{
-       unsigned long csr_phys, csr_len;
-       unsigned long win_phys, win_len;
-       unsigned int exp_timing_cs0;
-       int err;
-
-       csr_phys = pci_resource_start(p->dev, EXP_CSR_MBAR);
-       csr_len = pci_resource_len(p->dev, EXP_CSR_MBAR);
-       win_phys = pci_resource_start(p->dev, EXP_WIN_MBAR);
-       win_len = pci_resource_len(p->dev, EXP_WIN_MBAR);
-
-       if (!csr_phys || !csr_len || !win_phys || !win_len)
-               return -ENODEV;
-
-       if (win_len < (CS0_START + CS0_SIZE))
-               return -ENXIO;
-
-       p->csr_base = ioremap(csr_phys, csr_len);
-       if (!p->csr_base)
-               return -ENOMEM;
-
-       exp_timing_cs0 = readl(p->csr_base + EXP_TIMING_CS0);
-       if (!(exp_timing_cs0 & TIMING_CS_EN)) {
-               dev_warn(&p->dev->dev, "Expansion Bus Chip Select 0 "
-                      "is disabled.\n");
-               err = -ENODEV;
-               goto release;
-       }
-       if ((exp_timing_cs0 & TIMING_MASK) == TIMING_MASK) {
-               dev_warn(&p->dev->dev, "Expansion Bus Chip Select 0 "
-                      "is configured for maximally slow access times.\n");
-       }
-       p->map.name = DRV_NAME;
-       p->map.bankwidth = (exp_timing_cs0 & TIMING_BYTE_EN) ? 1 : 2;
-       p->map.phys = win_phys + CS0_START;
-       p->map.size = CS0_SIZE;
-       p->map.virt = ioremap(p->map.phys, p->map.size);
-       if (!p->map.virt) {
-               err = -ENOMEM;
-               goto release;
-       }
-       simple_map_init(&p->map);
-
-       /* Enable writes to flash bank */
-       exp_timing_cs0 |= TIMING_BOOT_ACCEL_DIS | TIMING_WR_EN;
-       writel(exp_timing_cs0, p->csr_base + EXP_TIMING_CS0);
-
-       return 0;
-
-      release:
-       iounmap(p->csr_base);
-       return err;
-}
-
-static const struct pci_device_id vr_nor_pci_ids[] = {
-       {PCI_DEVICE(PCI_VENDOR_ID_INTEL, 0x500D)},
-       {0,}
-};
-
-static void vr_nor_pci_remove(struct pci_dev *dev)
-{
-       struct vr_nor_mtd *p = pci_get_drvdata(dev);
-
-       vr_nor_destroy_partitions(p);
-       vr_nor_destroy_mtd_setup(p);
-       vr_nor_destroy_maps(p);
-       kfree(p);
-       pci_release_regions(dev);
-       pci_disable_device(dev);
-}
-
-static int vr_nor_pci_probe(struct pci_dev *dev, const struct pci_device_id *id)
-{
-       struct vr_nor_mtd *p = NULL;
-       unsigned int exp_timing_cs0;
-       int err;
-
-       err = pci_enable_device(dev);
-       if (err)
-               goto out;
-
-       err = pci_request_regions(dev, DRV_NAME);
-       if (err)
-               goto disable_dev;
-
-       p = kzalloc(sizeof(*p), GFP_KERNEL);
-       err = -ENOMEM;
-       if (!p)
-               goto release;
-
-       p->dev = dev;
-
-       err = vr_nor_init_maps(p);
-       if (err)
-               goto release;
-
-       err = vr_nor_mtd_setup(p);
-       if (err)
-               goto destroy_maps;
-
-       err = vr_nor_init_partitions(p);
-       if (err)
-               goto destroy_mtd_setup;
-
-       pci_set_drvdata(dev, p);
-
-       return 0;
-
-      destroy_mtd_setup:
-       map_destroy(p->info);
-
-      destroy_maps:
-       /* write-protect the flash bank */
-       exp_timing_cs0 = readl(p->csr_base + EXP_TIMING_CS0);
-       exp_timing_cs0 &= ~TIMING_WR_EN;
-       writel(exp_timing_cs0, p->csr_base + EXP_TIMING_CS0);
-
-       /* unmap the flash window */
-       iounmap(p->map.virt);
-
-       /* unmap the csr window */
-       iounmap(p->csr_base);
-
-      release:
-       kfree(p);
-       pci_release_regions(dev);
-
-      disable_dev:
-       pci_disable_device(dev);
-
-      out:
-       return err;
-}
-
-static struct pci_driver vr_nor_pci_driver = {
-       .name = DRV_NAME,
-       .probe = vr_nor_pci_probe,
-       .remove = vr_nor_pci_remove,
-       .id_table = vr_nor_pci_ids,
-};
-
-module_pci_driver(vr_nor_pci_driver);
-
-MODULE_AUTHOR("Andy Lowe");
-MODULE_DESCRIPTION("MTD map driver for NOR flash on Intel Vermilion Range");
-MODULE_LICENSE("GPL");
-MODULE_DEVICE_TABLE(pci, vr_nor_pci_ids);
index 746a27d15d44006c810edc186ee1cdaffc4a2907..96eb2e782c382644273e63fc58ea4f7bf0648fcd 100644 (file)
@@ -518,7 +518,7 @@ static int physmap_flash_probe(struct platform_device *dev)
                if (!info->maps[i].phys)
                        info->maps[i].phys = res->start;
 
-               info->win_order = get_bitmask_order(resource_size(res)) - 1;
+               info->win_order = fls64(resource_size(res)) - 1;
                info->maps[i].size = BIT(info->win_order +
                                         (info->gpios ?
                                          info->gpios->ndescs : 0));
index f58cfb15d6e85e55a339faef62cae0c5eaa3719d..b69dade3f7ad07ddca34b863369a1540ecc64521 100644 (file)
@@ -47,7 +47,7 @@ struct map_info uflash_map_templ = {
        .bankwidth =    UFLASH_BUSWIDTH,
 };
 
-int uflash_devinit(struct platform_device *op, struct device_node *dp)
+static int uflash_devinit(struct platform_device *op, struct device_node *dp)
 {
        struct uflash_dev *up;
 
index 4cb478bbee4a480ebb32113c9fe4eb8250101796..dc75d50d52e84e29e55484070d66858bd83aadfa 100644 (file)
@@ -1378,7 +1378,7 @@ static int atmel_smc_nand_prepare_smcconf(struct atmel_nand *nand,
                return ret;
 
        /*
-        * The write cycle timing is directly matching tWC, but is also
+        * The read cycle timing is directly matching tRC, but is also
         * dependent on the setup and hold timings we calculated earlier,
         * which gives:
         *
index 9907e3ec4bb2d8bc275e247354d39dd143ab8e84..0536568c646727ea5a1a03398453834892fe3a3b 100644 (file)
@@ -2,7 +2,7 @@
 # link order matters; don't link the more generic brcmstb_nand.o before the
 # more specific iproc_nand.o, for instance
 obj-$(CONFIG_MTD_NAND_BRCMNAND_IPROC)  += iproc_nand.o
-obj-$(CONFIG_MTD_NAND_BRCMNAND_BCMBCA) += bcm63138_nand.o
+obj-$(CONFIG_MTD_NAND_BRCMNAND_BCMBCA) += bcmbca_nand.o
 obj-$(CONFIG_MTD_NAND_BRCMNAND_BCM63XX)        += bcm6368_nand.o
 obj-$(CONFIG_MTD_NAND_BRCMNAND_BRCMSTB)        += brcmstb_nand.o
 obj-$(CONFIG_MTD_NAND_BRCMNAND)                += brcmnand.o
diff --git a/drivers/mtd/nand/raw/brcmnand/bcm63138_nand.c b/drivers/mtd/nand/raw/brcmnand/bcm63138_nand.c
deleted file mode 100644 (file)
index 968c5b6..0000000
+++ /dev/null
@@ -1,99 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0-only
-/*
- * Copyright © 2015 Broadcom Corporation
- */
-
-#include <linux/device.h>
-#include <linux/io.h>
-#include <linux/ioport.h>
-#include <linux/module.h>
-#include <linux/of.h>
-#include <linux/of_address.h>
-#include <linux/platform_device.h>
-#include <linux/slab.h>
-
-#include "brcmnand.h"
-
-struct bcm63138_nand_soc {
-       struct brcmnand_soc soc;
-       void __iomem *base;
-};
-
-#define BCM63138_NAND_INT_STATUS               0x00
-#define BCM63138_NAND_INT_EN                   0x04
-
-enum {
-       BCM63138_CTLRDY         = BIT(4),
-};
-
-static bool bcm63138_nand_intc_ack(struct brcmnand_soc *soc)
-{
-       struct bcm63138_nand_soc *priv =
-                       container_of(soc, struct bcm63138_nand_soc, soc);
-       void __iomem *mmio = priv->base + BCM63138_NAND_INT_STATUS;
-       u32 val = brcmnand_readl(mmio);
-
-       if (val & BCM63138_CTLRDY) {
-               brcmnand_writel(val & ~BCM63138_CTLRDY, mmio);
-               return true;
-       }
-
-       return false;
-}
-
-static void bcm63138_nand_intc_set(struct brcmnand_soc *soc, bool en)
-{
-       struct bcm63138_nand_soc *priv =
-                       container_of(soc, struct bcm63138_nand_soc, soc);
-       void __iomem *mmio = priv->base + BCM63138_NAND_INT_EN;
-       u32 val = brcmnand_readl(mmio);
-
-       if (en)
-               val |= BCM63138_CTLRDY;
-       else
-               val &= ~BCM63138_CTLRDY;
-
-       brcmnand_writel(val, mmio);
-}
-
-static int bcm63138_nand_probe(struct platform_device *pdev)
-{
-       struct device *dev = &pdev->dev;
-       struct bcm63138_nand_soc *priv;
-       struct brcmnand_soc *soc;
-
-       priv = devm_kzalloc(dev, sizeof(*priv), GFP_KERNEL);
-       if (!priv)
-               return -ENOMEM;
-       soc = &priv->soc;
-
-       priv->base = devm_platform_ioremap_resource_byname(pdev, "nand-int-base");
-       if (IS_ERR(priv->base))
-               return PTR_ERR(priv->base);
-
-       soc->ctlrdy_ack = bcm63138_nand_intc_ack;
-       soc->ctlrdy_set_enabled = bcm63138_nand_intc_set;
-
-       return brcmnand_probe(pdev, soc);
-}
-
-static const struct of_device_id bcm63138_nand_of_match[] = {
-       { .compatible = "brcm,nand-bcm63138" },
-       {},
-};
-MODULE_DEVICE_TABLE(of, bcm63138_nand_of_match);
-
-static struct platform_driver bcm63138_nand_driver = {
-       .probe                  = bcm63138_nand_probe,
-       .remove_new             = brcmnand_remove,
-       .driver = {
-               .name           = "bcm63138_nand",
-               .pm             = &brcmnand_pm_ops,
-               .of_match_table = bcm63138_nand_of_match,
-       }
-};
-module_platform_driver(bcm63138_nand_driver);
-
-MODULE_LICENSE("GPL v2");
-MODULE_AUTHOR("Brian Norris");
-MODULE_DESCRIPTION("NAND driver for BCM63138");
diff --git a/drivers/mtd/nand/raw/brcmnand/bcmbca_nand.c b/drivers/mtd/nand/raw/brcmnand/bcmbca_nand.c
new file mode 100644 (file)
index 0000000..ea53485
--- /dev/null
@@ -0,0 +1,126 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * Copyright © 2015 Broadcom Corporation
+ */
+
+#include <linux/device.h>
+#include <linux/io.h>
+#include <linux/ioport.h>
+#include <linux/module.h>
+#include <linux/of.h>
+#include <linux/of_address.h>
+#include <linux/platform_device.h>
+#include <linux/slab.h>
+
+#include "brcmnand.h"
+
+struct bcmbca_nand_soc {
+       struct brcmnand_soc soc;
+       void __iomem *base;
+};
+
+#define BCMBCA_NAND_INT_STATUS         0x00
+#define BCMBCA_NAND_INT_EN                     0x04
+
+enum {
+       BCMBCA_CTLRDY           = BIT(4),
+};
+
+#if defined(CONFIG_ARM64)
+#define ALIGN_REQ              8
+#else
+#define ALIGN_REQ              4
+#endif
+
+static inline bool bcmbca_nand_is_buf_aligned(void *flash_cache,  void *buffer)
+{
+       return IS_ALIGNED((uintptr_t)buffer, ALIGN_REQ) &&
+                               IS_ALIGNED((uintptr_t)flash_cache, ALIGN_REQ);
+}
+
+static bool bcmbca_nand_intc_ack(struct brcmnand_soc *soc)
+{
+       struct bcmbca_nand_soc *priv =
+                       container_of(soc, struct bcmbca_nand_soc, soc);
+       void __iomem *mmio = priv->base + BCMBCA_NAND_INT_STATUS;
+       u32 val = brcmnand_readl(mmio);
+
+       if (val & BCMBCA_CTLRDY) {
+               brcmnand_writel(val & ~BCMBCA_CTLRDY, mmio);
+               return true;
+       }
+
+       return false;
+}
+
+static void bcmbca_nand_intc_set(struct brcmnand_soc *soc, bool en)
+{
+       struct bcmbca_nand_soc *priv =
+                       container_of(soc, struct bcmbca_nand_soc, soc);
+       void __iomem *mmio = priv->base + BCMBCA_NAND_INT_EN;
+       u32 val = brcmnand_readl(mmio);
+
+       if (en)
+               val |= BCMBCA_CTLRDY;
+       else
+               val &= ~BCMBCA_CTLRDY;
+
+       brcmnand_writel(val, mmio);
+}
+
+static void bcmbca_read_data_bus(struct brcmnand_soc *soc,
+                                void __iomem *flash_cache,  u32 *buffer, int fc_words)
+{
+       /*
+        * memcpy can do unaligned aligned access depending on source
+        * and dest address, which is incompatible with nand cache. Fallback
+        * to the memcpy_fromio in such case
+        */
+       if (bcmbca_nand_is_buf_aligned((void __force *)flash_cache, buffer))
+               memcpy((void *)buffer, (void __force *)flash_cache, fc_words * 4);
+       else
+               memcpy_fromio((void *)buffer, flash_cache, fc_words * 4);
+}
+
+static int bcmbca_nand_probe(struct platform_device *pdev)
+{
+       struct device *dev = &pdev->dev;
+       struct bcmbca_nand_soc *priv;
+       struct brcmnand_soc *soc;
+
+       priv = devm_kzalloc(dev, sizeof(*priv), GFP_KERNEL);
+       if (!priv)
+               return -ENOMEM;
+       soc = &priv->soc;
+
+       priv->base = devm_platform_ioremap_resource_byname(pdev, "nand-int-base");
+       if (IS_ERR(priv->base))
+               return PTR_ERR(priv->base);
+
+       soc->ctlrdy_ack = bcmbca_nand_intc_ack;
+       soc->ctlrdy_set_enabled = bcmbca_nand_intc_set;
+       soc->read_data_bus = bcmbca_read_data_bus;
+
+       return brcmnand_probe(pdev, soc);
+}
+
+static const struct of_device_id bcmbca_nand_of_match[] = {
+       { .compatible = "brcm,nand-bcm63138" },
+       {},
+};
+MODULE_DEVICE_TABLE(of, bcmbca_nand_of_match);
+
+static struct platform_driver bcmbca_nand_driver = {
+       .probe                  = bcmbca_nand_probe,
+       .remove_new             = brcmnand_remove,
+       .driver = {
+               .name           = "bcmbca_nand",
+               .pm             = &brcmnand_pm_ops,
+               .of_match_table = bcmbca_nand_of_match,
+       }
+};
+module_platform_driver(bcmbca_nand_driver);
+
+MODULE_LICENSE("GPL v2");
+MODULE_AUTHOR("Brian Norris");
+MODULE_DESCRIPTION("NAND driver for BCMBCA");
index 8faca43ae1ff9c51a98297f77e90a14e04ea9f34..a8d12c71f987be95817a69b4ee92c229ec8bf814 100644 (file)
@@ -625,7 +625,7 @@ enum {
 /* Only for v7.2 */
 #define        ACC_CONTROL_ECC_EXT_SHIFT               13
 
-static u8 brcmnand_status(struct brcmnand_host *host);
+static int brcmnand_status(struct brcmnand_host *host);
 
 static inline bool brcmnand_non_mmio_ops(struct brcmnand_controller *ctrl)
 {
@@ -851,6 +851,20 @@ static inline u32 edu_readl(struct brcmnand_controller *ctrl,
        return brcmnand_readl(ctrl->edu_base + offs);
 }
 
+static inline void brcmnand_read_data_bus(struct brcmnand_controller *ctrl,
+                                         void __iomem *flash_cache, u32 *buffer, int fc_words)
+{
+       struct brcmnand_soc *soc = ctrl->soc;
+       int i;
+
+       if (soc->read_data_bus) {
+               soc->read_data_bus(soc, flash_cache, buffer, fc_words);
+       } else {
+               for (i = 0; i < fc_words; i++)
+                       buffer[i] = brcmnand_read_fc(ctrl, i);
+       }
+}
+
 static void brcmnand_clear_ecc_addr(struct brcmnand_controller *ctrl)
 {
 
@@ -1024,6 +1038,22 @@ static inline int brcmnand_sector_1k_shift(struct brcmnand_controller *ctrl)
                return -1;
 }
 
+static bool brcmnand_get_sector_size_1k(struct brcmnand_host *host)
+{
+       struct brcmnand_controller *ctrl = host->ctrl;
+       int sector_size_bit = brcmnand_sector_1k_shift(ctrl);
+       u16 acc_control_offs = brcmnand_cs_offset(ctrl, host->cs,
+                                                 BRCMNAND_CS_ACC_CONTROL);
+       u32 acc_control;
+
+       if (sector_size_bit < 0)
+               return false;
+
+       acc_control = nand_readreg(ctrl, acc_control_offs);
+
+       return ((acc_control & BIT(sector_size_bit)) != 0);
+}
+
 static void brcmnand_set_sector_size_1k(struct brcmnand_host *host, int val)
 {
        struct brcmnand_controller *ctrl = host->ctrl;
@@ -1041,6 +1071,43 @@ static void brcmnand_set_sector_size_1k(struct brcmnand_host *host, int val)
        nand_writereg(ctrl, acc_control_offs, tmp);
 }
 
+static int brcmnand_get_spare_size(struct brcmnand_host *host)
+{
+       struct brcmnand_controller *ctrl = host->ctrl;
+       u16 acc_control_offs = brcmnand_cs_offset(ctrl, host->cs,
+                                                 BRCMNAND_CS_ACC_CONTROL);
+       u32 acc = nand_readreg(ctrl, acc_control_offs);
+
+       return (acc & brcmnand_spare_area_mask(ctrl));
+}
+
+static void brcmnand_get_ecc_settings(struct brcmnand_host *host, struct nand_chip *chip)
+{
+       struct brcmnand_controller *ctrl = host->ctrl;
+       u16 acc_control_offs = brcmnand_cs_offset(ctrl, host->cs,
+                                                 BRCMNAND_CS_ACC_CONTROL);
+       bool sector_size_1k = brcmnand_get_sector_size_1k(host);
+       int spare_area_size, ecc_level;
+       u32 acc;
+
+       spare_area_size = brcmnand_get_spare_size(host);
+       acc = nand_readreg(ctrl, acc_control_offs);
+       ecc_level = (acc & brcmnand_ecc_level_mask(ctrl)) >> ctrl->ecc_level_shift;
+       if (sector_size_1k)
+               chip->ecc.strength = ecc_level * 2;
+       else if (spare_area_size == 16 && ecc_level == 15)
+               chip->ecc.strength = 1; /* hamming */
+       else
+               chip->ecc.strength = ecc_level;
+
+       if (chip->ecc.size == 0) {
+               if (sector_size_1k)
+                       chip->ecc.size = 1024;
+               else
+                       chip->ecc.size = 512;
+       }
+}
+
 /***********************************************************************
  * CS_NAND_SELECT
  ***********************************************************************/
@@ -1084,8 +1151,8 @@ static int bcmnand_ctrl_poll_status(struct brcmnand_host *host,
        if ((val & mask) == expected_val)
                return 0;
 
-       dev_warn(ctrl->dev, "timeout on status poll (expected %x got %x)\n",
-                expected_val, val & mask);
+       dev_err(ctrl->dev, "timeout on status poll (expected %x got %x)\n",
+               expected_val, val & mask);
 
        return -ETIMEDOUT;
 }
@@ -1690,7 +1757,7 @@ static int brcmnand_waitfunc(struct nand_chip *chip)
                                 INTFC_FLASH_STATUS;
 }
 
-static u8 brcmnand_status(struct brcmnand_host *host)
+static int brcmnand_status(struct brcmnand_host *host)
 {
        struct nand_chip *chip = &host->chip;
        struct mtd_info *mtd = nand_to_mtd(chip);
@@ -1701,7 +1768,7 @@ static u8 brcmnand_status(struct brcmnand_host *host)
        return brcmnand_waitfunc(chip);
 }
 
-static u8 brcmnand_reset(struct brcmnand_host *host)
+static int brcmnand_reset(struct brcmnand_host *host)
 {
        struct nand_chip *chip = &host->chip;
 
@@ -1975,7 +2042,7 @@ static int brcmnand_read_by_pio(struct mtd_info *mtd, struct nand_chip *chip,
 {
        struct brcmnand_host *host = nand_get_controller_data(chip);
        struct brcmnand_controller *ctrl = host->ctrl;
-       int i, j, ret = 0;
+       int i, ret = 0;
 
        brcmnand_clear_ecc_addr(ctrl);
 
@@ -1988,8 +2055,8 @@ static int brcmnand_read_by_pio(struct mtd_info *mtd, struct nand_chip *chip,
                if (likely(buf)) {
                        brcmnand_soc_data_bus_prepare(ctrl->soc, false);
 
-                       for (j = 0; j < FC_WORDS; j++, buf++)
-                               *buf = brcmnand_read_fc(ctrl, j);
+                       brcmnand_read_data_bus(ctrl, ctrl->nand_fc, buf, FC_WORDS);
+                       buf += FC_WORDS;
 
                        brcmnand_soc_data_bus_unprepare(ctrl->soc, false);
                }
@@ -2137,7 +2204,7 @@ try_dmaread:
                                return err;
                }
 
-               dev_dbg(ctrl->dev, "uncorrectable error at 0x%llx\n",
+               dev_err(ctrl->dev, "uncorrectable error at 0x%llx\n",
                        (unsigned long long)err_addr);
                mtd->ecc_stats.failed++;
                /* NAND layer expects zero on ECC errors */
@@ -2339,7 +2406,7 @@ static int brcmnand_write_oob_raw(struct nand_chip *chip, int page)
 }
 
 static int brcmnand_exec_instr(struct brcmnand_host *host, int i,
-                               const struct nand_operation *op)
+               const struct nand_operation *op)
 {
        const struct nand_op_instr *instr = &op->instrs[i];
        struct brcmnand_controller *ctrl = host->ctrl;
@@ -2353,7 +2420,7 @@ static int brcmnand_exec_instr(struct brcmnand_host *host, int i,
         * (WAITRDY excepted).
         */
        last_op = ((i == (op->ninstrs - 1)) && (instr->type != NAND_OP_WAITRDY_INSTR)) ||
-                 ((i == (op->ninstrs - 2)) && (op->instrs[i+1].type == NAND_OP_WAITRDY_INSTR));
+                 ((i == (op->ninstrs - 2)) && (op->instrs[i + 1].type == NAND_OP_WAITRDY_INSTR));
 
        switch (instr->type) {
        case NAND_OP_CMD_INSTR:
@@ -2398,10 +2465,10 @@ static int brcmnand_exec_instr(struct brcmnand_host *host, int i,
 
 static int brcmnand_op_is_status(const struct nand_operation *op)
 {
-       if ((op->ninstrs == 2) &&
-           (op->instrs[0].type == NAND_OP_CMD_INSTR) &&
-           (op->instrs[0].ctx.cmd.opcode == NAND_CMD_STATUS) &&
-           (op->instrs[1].type == NAND_OP_DATA_IN_INSTR))
+       if (op->ninstrs == 2 &&
+           op->instrs[0].type == NAND_OP_CMD_INSTR &&
+           op->instrs[0].ctx.cmd.opcode == NAND_CMD_STATUS &&
+           op->instrs[1].type == NAND_OP_DATA_IN_INSTR)
                return 1;
 
        return 0;
@@ -2409,10 +2476,10 @@ static int brcmnand_op_is_status(const struct nand_operation *op)
 
 static int brcmnand_op_is_reset(const struct nand_operation *op)
 {
-       if ((op->ninstrs == 2) &&
-           (op->instrs[0].type == NAND_OP_CMD_INSTR) &&
-           (op->instrs[0].ctx.cmd.opcode == NAND_CMD_RESET) &&
-           (op->instrs[1].type == NAND_OP_WAITRDY_INSTR))
+       if (op->ninstrs == 2 &&
+           op->instrs[0].type == NAND_OP_CMD_INSTR &&
+           op->instrs[0].ctx.cmd.opcode == NAND_CMD_RESET &&
+           op->instrs[1].type == NAND_OP_WAITRDY_INSTR)
                return 1;
 
        return 0;
@@ -2433,11 +2500,14 @@ static int brcmnand_exec_op(struct nand_chip *chip,
 
        if (brcmnand_op_is_status(op)) {
                status = op->instrs[1].ctx.data.buf.in;
-               *status = brcmnand_status(host);
+               ret = brcmnand_status(host);
+               if (ret < 0)
+                       return ret;
+
+               *status = ret & 0xFF;
 
                return 0;
-       }
-       else if (brcmnand_op_is_reset(op)) {
+       } else if (brcmnand_op_is_reset(op)) {
                ret = brcmnand_reset(host);
                if (ret < 0)
                        return ret;
@@ -2608,19 +2678,37 @@ static int brcmnand_setup_dev(struct brcmnand_host *host)
                nanddev_get_memorg(&chip->base);
        struct brcmnand_controller *ctrl = host->ctrl;
        struct brcmnand_cfg *cfg = &host->hwcfg;
-       char msg[128];
+       struct device_node *np = nand_get_flash_node(chip);
        u32 offs, tmp, oob_sector;
+       bool use_strap = false;
+       char msg[128];
        int ret;
 
        memset(cfg, 0, sizeof(*cfg));
+       use_strap = of_property_read_bool(np, "brcm,nand-ecc-use-strap");
+
+       /*
+        * Either nand-ecc-xxx or brcm,nand-ecc-use-strap can be set. Error out
+        * if both exist.
+        */
+       if (chip->ecc.strength && use_strap) {
+               dev_err(ctrl->dev,
+                       "ECC strap and DT ECC configuration properties are mutually exclusive\n");
+               return -EINVAL;
+       }
+
+       if (use_strap)
+               brcmnand_get_ecc_settings(host, chip);
 
-       ret = of_property_read_u32(nand_get_flash_node(chip),
-                                  "brcm,nand-oob-sector-size",
+       ret = of_property_read_u32(np, "brcm,nand-oob-sector-size",
                                   &oob_sector);
        if (ret) {
-               /* Use detected size */
-               cfg->spare_area_size = mtd->oobsize /
-                                       (mtd->writesize >> FC_SHIFT);
+               if (use_strap)
+                       cfg->spare_area_size = brcmnand_get_spare_size(host);
+               else
+                       /* Use detected size */
+                       cfg->spare_area_size = mtd->oobsize /
+                                               (mtd->writesize >> FC_SHIFT);
        } else {
                cfg->spare_area_size = oob_sector;
        }
@@ -3135,6 +3223,10 @@ int brcmnand_probe(struct platform_device *pdev, struct brcmnand_soc *soc)
        /* Disable XOR addressing */
        brcmnand_rmw_reg(ctrl, BRCMNAND_CS_XOR, 0xff, 0, 0);
 
+       /* Check if the board connects the WP pin */
+       if (of_property_read_bool(dn, "brcm,wp-not-connected"))
+               wp_on = 0;
+
        if (ctrl->features & BRCMNAND_HAS_WP) {
                /* Permanently disable write protection */
                if (wp_on == 2)
index 928114c0be5ef0c16b4822ee9bb8cabff004dbca..9f171252a2ae211756a529e695fbcda3a3873f65 100644 (file)
@@ -24,6 +24,8 @@ struct brcmnand_soc {
        void (*ctlrdy_set_enabled)(struct brcmnand_soc *soc, bool en);
        void (*prepare_data_bus)(struct brcmnand_soc *soc, bool prepare,
                                 bool is_param);
+       void (*read_data_bus)(struct brcmnand_soc *soc, void __iomem *flash_cache,
+                             u32 *buffer, int fc_words);
        const struct brcmnand_io_ops *ops;
 };
 
index 1e3a80f06f3308c303811cc8d57286124a4ca7bc..df6a0d5c86bb30684124cb21c1dca021866a3c23 100644 (file)
@@ -869,7 +869,8 @@ static int fsl_elbc_nand_probe(struct platform_device *pdev)
        struct mtd_info *mtd;
 
        if (!fsl_lbc_ctrl_dev || !fsl_lbc_ctrl_dev->regs)
-               return -ENODEV;
+               return dev_err_probe(&pdev->dev, -EPROBE_DEFER, "lbc_ctrl_dev missing\n");
+
        lbc = fsl_lbc_ctrl_dev->regs;
        dev = fsl_lbc_ctrl_dev->dev;
 
index 488fd452611a66ea70afa3023d1d95cacfdad478..677fcb03f9bef14a9717543c221d188ea7dfedc3 100644 (file)
@@ -303,8 +303,9 @@ static int lpc32xx_nand_device_ready(struct nand_chip *nand_chip)
        return 0;
 }
 
-static irqreturn_t lpc3xxx_nand_irq(int irq, struct lpc32xx_nand_host *host)
+static irqreturn_t lpc3xxx_nand_irq(int irq, void *data)
 {
+       struct lpc32xx_nand_host *host = data;
        uint8_t sr;
 
        /* Clear interrupt flag by reading status */
@@ -780,7 +781,7 @@ static int lpc32xx_nand_probe(struct platform_device *pdev)
                goto release_dma_chan;
        }
 
-       if (request_irq(host->irq, (irq_handler_t)&lpc3xxx_nand_irq,
+       if (request_irq(host->irq, &lpc3xxx_nand_irq,
                        IRQF_TRIGGER_HIGH, DRV_NAME, host)) {
                dev_err(&pdev->dev, "Error requesting NAND IRQ\n");
                res = -ENXIO;
index cdb58aca59c083c0d08606532882a637f1db4b3c..2a96a87cf79ce0d3e6bd14c0699310cba1bec8ce 100644 (file)
@@ -63,7 +63,7 @@
 #define CMDRWGEN(cmd_dir, ran, bch, short_mode, page_size, pages)      \
        (                                                               \
                (cmd_dir)                       |                       \
-               ((ran) << 19)                   |                       \
+               (ran)                           |                       \
                ((bch) << 14)                   |                       \
                ((short_mode) << 13)            |                       \
                (((page_size) & 0x7f) << 6)     |                       \
index 60198e33d2d553f39aa09636ace80899392d30b3..17477bb2d48ff0d0b8423f0611fc9d8a9ea27de6 100644 (file)
@@ -1356,7 +1356,7 @@ static int mtk_nfc_nand_chip_init(struct device *dev, struct mtk_nfc *nfc,
                return -EINVAL;
        }
 
-       chip = devm_kzalloc(dev, sizeof(*chip) + nsels * sizeof(u8),
+       chip = devm_kzalloc(dev, struct_size(chip, sels, nsels),
                            GFP_KERNEL);
        if (!chip)
                return -ENOMEM;
index 3b3ce2926f5d11f3e18d66b4df8bfab04c14ed70..d7dbbd469b892536bdf2ea01b4f226006b75a8dc 100644 (file)
@@ -1211,21 +1211,36 @@ static int nand_lp_exec_read_page_op(struct nand_chip *chip, unsigned int page,
        return nand_exec_op(chip, &op);
 }
 
+static unsigned int rawnand_last_page_of_lun(unsigned int pages_per_lun, unsigned int lun)
+{
+       /* lun is expected to be very small */
+       return (lun * pages_per_lun) + pages_per_lun - 1;
+}
+
 static void rawnand_cap_cont_reads(struct nand_chip *chip)
 {
        struct nand_memory_organization *memorg;
-       unsigned int pages_per_lun, first_lun, last_lun;
+       unsigned int ppl, first_lun, last_lun;
 
        memorg = nanddev_get_memorg(&chip->base);
-       pages_per_lun = memorg->pages_per_eraseblock * memorg->eraseblocks_per_lun;
-       first_lun = chip->cont_read.first_page / pages_per_lun;
-       last_lun = chip->cont_read.last_page / pages_per_lun;
+       ppl = memorg->pages_per_eraseblock * memorg->eraseblocks_per_lun;
+       first_lun = chip->cont_read.first_page / ppl;
+       last_lun = chip->cont_read.last_page / ppl;
 
        /* Prevent sequential cache reads across LUN boundaries */
        if (first_lun != last_lun)
-               chip->cont_read.pause_page = first_lun * pages_per_lun + pages_per_lun - 1;
+               chip->cont_read.pause_page = rawnand_last_page_of_lun(ppl, first_lun);
        else
                chip->cont_read.pause_page = chip->cont_read.last_page;
+
+       if (chip->cont_read.first_page == chip->cont_read.pause_page) {
+               chip->cont_read.first_page++;
+               chip->cont_read.pause_page = min(chip->cont_read.last_page,
+                                                rawnand_last_page_of_lun(ppl, first_lun + 1));
+       }
+
+       if (chip->cont_read.first_page >= chip->cont_read.last_page)
+               chip->cont_read.ongoing = false;
 }
 
 static int nand_lp_exec_cont_read_page_op(struct nand_chip *chip, unsigned int page,
@@ -1292,12 +1307,11 @@ static int nand_lp_exec_cont_read_page_op(struct nand_chip *chip, unsigned int p
        if (!chip->cont_read.ongoing)
                return 0;
 
-       if (page == chip->cont_read.pause_page &&
-           page != chip->cont_read.last_page) {
-               chip->cont_read.first_page = chip->cont_read.pause_page + 1;
-               rawnand_cap_cont_reads(chip);
-       } else if (page == chip->cont_read.last_page) {
+       if (page == chip->cont_read.last_page) {
                chip->cont_read.ongoing = false;
+       } else if (page == chip->cont_read.pause_page) {
+               chip->cont_read.first_page++;
+               rawnand_cap_cont_reads(chip);
        }
 
        return 0;
@@ -3466,30 +3480,36 @@ static void rawnand_enable_cont_reads(struct nand_chip *chip, unsigned int page,
                                      u32 readlen, int col)
 {
        struct mtd_info *mtd = nand_to_mtd(chip);
-       unsigned int end_page, end_col;
+       unsigned int first_page, last_page;
 
        chip->cont_read.ongoing = false;
 
        if (!chip->controller->supported_op.cont_read)
                return;
 
-       end_page = DIV_ROUND_UP(col + readlen, mtd->writesize);
-       end_col = (col + readlen) % mtd->writesize;
+       /*
+        * Don't bother making any calculations if the length is too small.
+        * Side effect: avoids possible integer underflows below.
+        */
+       if (readlen < (2 * mtd->writesize))
+               return;
 
+       /* Derive the page where continuous read should start (the first full page read) */
+       first_page = page;
        if (col)
-               page++;
+               first_page++;
 
-       if (end_col && end_page)
-               end_page--;
+       /* Derive the page where continuous read should stop (the last full page read) */
+       last_page = page + ((col + readlen) / mtd->writesize) - 1;
 
-       if (page + 1 > end_page)
-               return;
-
-       chip->cont_read.first_page = page;
-       chip->cont_read.last_page = end_page;
-       chip->cont_read.ongoing = true;
-
-       rawnand_cap_cont_reads(chip);
+       /* Configure and enable continuous read when suitable */
+       if (first_page < last_page) {
+               chip->cont_read.first_page = first_page;
+               chip->cont_read.last_page = last_page;
+               chip->cont_read.ongoing = true;
+               /* May reset the ongoing flag */
+               rawnand_cap_cont_reads(chip);
+       }
 }
 
 static void rawnand_cont_read_skip_first_page(struct nand_chip *chip, unsigned int page)
@@ -3498,10 +3518,7 @@ static void rawnand_cont_read_skip_first_page(struct nand_chip *chip, unsigned i
                return;
 
        chip->cont_read.first_page++;
-       if (chip->cont_read.first_page == chip->cont_read.pause_page)
-               chip->cont_read.first_page++;
-       if (chip->cont_read.first_page >= chip->cont_read.last_page)
-               chip->cont_read.ongoing = false;
+       rawnand_cap_cont_reads(chip);
 }
 
 /**
@@ -3577,7 +3594,8 @@ static int nand_do_read_ops(struct nand_chip *chip, loff_t from,
        oob = ops->oobbuf;
        oob_required = oob ? 1 : 0;
 
-       rawnand_enable_cont_reads(chip, page, readlen, col);
+       if (likely(ops->mode != MTD_OPS_RAW))
+               rawnand_enable_cont_reads(chip, page, readlen, col);
 
        while (1) {
                struct mtd_ecc_stats ecc_stats = mtd->ecc_stats;
@@ -3710,6 +3728,9 @@ read_retry:
        }
        nand_deselect_target(chip);
 
+       if (WARN_ON_ONCE(chip->cont_read.ongoing))
+               chip->cont_read.ongoing = false;
+
        ops->retlen = ops->len - (size_t) readlen;
        if (oob)
                ops->oobretlen = ops->ooblen - oobreadlen;
@@ -5195,6 +5216,15 @@ static void rawnand_late_check_supported_ops(struct nand_chip *chip)
        if (!nand_has_exec_op(chip))
                return;
 
+       /*
+        * For now, continuous reads can only be used with the core page helpers.
+        * This can be extended later.
+        */
+       if (!(chip->ecc.read_page == nand_read_page_hwecc ||
+             chip->ecc.read_page == nand_read_page_syndrome ||
+             chip->ecc.read_page == nand_read_page_swecc))
+               return;
+
        rawnand_check_cont_read_support(chip);
 }
 
index e4664fa6fd9ef2b199f07a27c05f5299b6c5a2ec..a8fba5f39f591ca3a24f408b88c1b01bb8c0ac29 100644 (file)
@@ -576,7 +576,6 @@ static int search_bbt(struct nand_chip *this, uint8_t *buf,
                startblock &= bbtblocks - 1;
        } else {
                chips = 1;
-               bbtblocks = mtd->size >> this->bbt_erase_shift;
        }
 
        for (i = 0; i < chips; i++) {
index 39076735a3fbb0d155ad95d6c3d555f4d73e6116..a74e64e0cfa3231ddce31310c93dba2ca6425592 100644 (file)
@@ -31,7 +31,6 @@ struct hynix_read_retry {
 
 /**
  * struct hynix_nand - private Hynix NAND struct
- * @nand_technology: manufacturing process expressed in picometer
  * @read_retry: read-retry information
  */
 struct hynix_nand {
index 88811139aaf5b9966a790c403d3fb8519350a74f..264556939a00f0a66a09ef708b4da70ba8736bfb 100644 (file)
@@ -16,6 +16,7 @@
 #include <linux/module.h>
 #include <linux/mtd/rawnand.h>
 #include <linux/of_address.h>
+#include <linux/of_device.h>
 #include <linux/pinctrl/consumer.h>
 #include <linux/platform_device.h>
 #include <linux/regmap.h>
@@ -37,7 +38,7 @@
 #define FMC2_MAX_SG                    16
 
 /* Max chip enable */
-#define FMC2_MAX_CE                    2
+#define FMC2_MAX_CE                    4
 
 /* Max ECC buffer length */
 #define FMC2_MAX_ECC_BUF_LEN           (FMC2_BCHDSRS_LEN * FMC2_MAX_SG)
@@ -243,6 +244,13 @@ static inline struct stm32_fmc2_nand *to_fmc2_nand(struct nand_chip *chip)
        return container_of(chip, struct stm32_fmc2_nand, chip);
 }
 
+struct stm32_fmc2_nfc;
+
+struct stm32_fmc2_nfc_data {
+       int max_ncs;
+       int (*set_cdev)(struct stm32_fmc2_nfc *nfc);
+};
+
 struct stm32_fmc2_nfc {
        struct nand_controller base;
        struct stm32_fmc2_nand nand;
@@ -256,6 +264,7 @@ struct stm32_fmc2_nfc {
        phys_addr_t data_phys_addr[FMC2_MAX_CE];
        struct clk *clk;
        u8 irq_state;
+       const struct stm32_fmc2_nfc_data *data;
 
        struct dma_chan *dma_tx_ch;
        struct dma_chan *dma_rx_ch;
@@ -264,6 +273,8 @@ struct stm32_fmc2_nfc {
        struct sg_table dma_ecc_sg;
        u8 *ecc_buf;
        int dma_ecc_len;
+       u32 tx_dma_max_burst;
+       u32 rx_dma_max_burst;
 
        struct completion complete;
        struct completion dma_data_complete;
@@ -347,20 +358,26 @@ static int stm32_fmc2_nfc_select_chip(struct nand_chip *chip, int chipnr)
        stm32_fmc2_nfc_setup(chip);
        stm32_fmc2_nfc_timings_init(chip);
 
-       if (nfc->dma_tx_ch && nfc->dma_rx_ch) {
+       if (nfc->dma_tx_ch) {
                memset(&dma_cfg, 0, sizeof(dma_cfg));
-               dma_cfg.src_addr = nfc->data_phys_addr[nfc->cs_sel];
                dma_cfg.dst_addr = nfc->data_phys_addr[nfc->cs_sel];
-               dma_cfg.src_addr_width = DMA_SLAVE_BUSWIDTH_4_BYTES;
                dma_cfg.dst_addr_width = DMA_SLAVE_BUSWIDTH_4_BYTES;
-               dma_cfg.src_maxburst = 32;
-               dma_cfg.dst_maxburst = 32;
+               dma_cfg.dst_maxburst = nfc->tx_dma_max_burst /
+                                      dma_cfg.dst_addr_width;
 
                ret = dmaengine_slave_config(nfc->dma_tx_ch, &dma_cfg);
                if (ret) {
                        dev_err(nfc->dev, "tx DMA engine slave config failed\n");
                        return ret;
                }
+       }
+
+       if (nfc->dma_rx_ch) {
+               memset(&dma_cfg, 0, sizeof(dma_cfg));
+               dma_cfg.src_addr = nfc->data_phys_addr[nfc->cs_sel];
+               dma_cfg.src_addr_width = DMA_SLAVE_BUSWIDTH_4_BYTES;
+               dma_cfg.src_maxburst = nfc->rx_dma_max_burst /
+                                      dma_cfg.src_addr_width;
 
                ret = dmaengine_slave_config(nfc->dma_rx_ch, &dma_cfg);
                if (ret) {
@@ -1545,6 +1562,7 @@ static int stm32_fmc2_nfc_setup_interface(struct nand_chip *chip, int chipnr,
 
 static int stm32_fmc2_nfc_dma_setup(struct stm32_fmc2_nfc *nfc)
 {
+       struct dma_slave_caps caps;
        int ret = 0;
 
        nfc->dma_tx_ch = dma_request_chan(nfc->dev, "tx");
@@ -1557,6 +1575,11 @@ static int stm32_fmc2_nfc_dma_setup(struct stm32_fmc2_nfc *nfc)
                goto err_dma;
        }
 
+       ret = dma_get_slave_caps(nfc->dma_tx_ch, &caps);
+       if (ret)
+               return ret;
+       nfc->tx_dma_max_burst = caps.max_burst;
+
        nfc->dma_rx_ch = dma_request_chan(nfc->dev, "rx");
        if (IS_ERR(nfc->dma_rx_ch)) {
                ret = PTR_ERR(nfc->dma_rx_ch);
@@ -1567,6 +1590,11 @@ static int stm32_fmc2_nfc_dma_setup(struct stm32_fmc2_nfc *nfc)
                goto err_dma;
        }
 
+       ret = dma_get_slave_caps(nfc->dma_rx_ch, &caps);
+       if (ret)
+               return ret;
+       nfc->rx_dma_max_burst = caps.max_burst;
+
        nfc->dma_ecc_ch = dma_request_chan(nfc->dev, "ecc");
        if (IS_ERR(nfc->dma_ecc_ch)) {
                ret = PTR_ERR(nfc->dma_ecc_ch);
@@ -1790,7 +1818,7 @@ static int stm32_fmc2_nfc_parse_child(struct stm32_fmc2_nfc *nfc,
                        return ret;
                }
 
-               if (cs >= FMC2_MAX_CE) {
+               if (cs >= nfc->data->max_ncs) {
                        dev_err(nfc->dev, "invalid reg value: %d\n", cs);
                        return -EINVAL;
                }
@@ -1896,9 +1924,17 @@ static int stm32_fmc2_nfc_probe(struct platform_device *pdev)
        nand_controller_init(&nfc->base);
        nfc->base.ops = &stm32_fmc2_nfc_controller_ops;
 
-       ret = stm32_fmc2_nfc_set_cdev(nfc);
-       if (ret)
-               return ret;
+       nfc->data = of_device_get_match_data(dev);
+       if (!nfc->data)
+               return -EINVAL;
+
+       if (nfc->data->set_cdev) {
+               ret = nfc->data->set_cdev(nfc);
+               if (ret)
+                       return ret;
+       } else {
+               nfc->cdev = dev->parent;
+       }
 
        ret = stm32_fmc2_nfc_parse_dt(nfc);
        if (ret)
@@ -1917,7 +1953,7 @@ static int stm32_fmc2_nfc_probe(struct platform_device *pdev)
        if (nfc->dev == nfc->cdev)
                start_region = 1;
 
-       for (chip_cs = 0, mem_region = start_region; chip_cs < FMC2_MAX_CE;
+       for (chip_cs = 0, mem_region = start_region; chip_cs < nfc->data->max_ncs;
             chip_cs++, mem_region += 3) {
                if (!(nfc->cs_assigned & BIT(chip_cs)))
                        continue;
@@ -2073,7 +2109,7 @@ static int __maybe_unused stm32_fmc2_nfc_resume(struct device *dev)
 
        stm32_fmc2_nfc_wp_disable(nand);
 
-       for (chip_cs = 0; chip_cs < FMC2_MAX_CE; chip_cs++) {
+       for (chip_cs = 0; chip_cs < nfc->data->max_ncs; chip_cs++) {
                if (!(nfc->cs_assigned & BIT(chip_cs)))
                        continue;
 
@@ -2086,9 +2122,28 @@ static int __maybe_unused stm32_fmc2_nfc_resume(struct device *dev)
 static SIMPLE_DEV_PM_OPS(stm32_fmc2_nfc_pm_ops, stm32_fmc2_nfc_suspend,
                         stm32_fmc2_nfc_resume);
 
+static const struct stm32_fmc2_nfc_data stm32_fmc2_nfc_mp1_data = {
+       .max_ncs = 2,
+       .set_cdev = stm32_fmc2_nfc_set_cdev,
+};
+
+static const struct stm32_fmc2_nfc_data stm32_fmc2_nfc_mp25_data = {
+       .max_ncs = 4,
+};
+
 static const struct of_device_id stm32_fmc2_nfc_match[] = {
-       {.compatible = "st,stm32mp15-fmc2"},
-       {.compatible = "st,stm32mp1-fmc2-nfc"},
+       {
+               .compatible = "st,stm32mp15-fmc2",
+               .data = &stm32_fmc2_nfc_mp1_data,
+       },
+       {
+               .compatible = "st,stm32mp1-fmc2-nfc",
+               .data = &stm32_fmc2_nfc_mp1_data,
+       },
+       {
+               .compatible = "st,stm32mp25-fmc2-nfc",
+               .data = &stm32_fmc2_nfc_mp25_data,
+       },
        {}
 };
 MODULE_DEVICE_TABLE(of, stm32_fmc2_nfc_match);
index 31c439a557b1840067a8eca69244362bf926b05f..4597a82de23a45919adf97f36ac0ea87d6ef6f53 100644 (file)
@@ -104,7 +104,8 @@ static const struct mtd_ooblayout_ops f50l1g41lb_ooblayout = {
 
 static const struct spinand_info esmt_c8_spinand_table[] = {
        SPINAND_INFO("F50L1G41LB",
-                    SPINAND_ID(SPINAND_READID_METHOD_OPCODE_ADDR, 0x01),
+                    SPINAND_ID(SPINAND_READID_METHOD_OPCODE_ADDR, 0x01, 0x7f,
+                               0x7f, 0x7f),
                     NAND_MEMORG(1, 2048, 64, 64, 1024, 20, 1, 1, 1),
                     NAND_ECCREQ(1, 512),
                     SPINAND_INFO_OP_VARIANTS(&read_cache_variants,
@@ -113,7 +114,8 @@ static const struct spinand_info esmt_c8_spinand_table[] = {
                     0,
                     SPINAND_ECCINFO(&f50l1g41lb_ooblayout, NULL)),
        SPINAND_INFO("F50D1G41LB",
-                    SPINAND_ID(SPINAND_READID_METHOD_OPCODE_ADDR, 0x11),
+                    SPINAND_ID(SPINAND_READID_METHOD_OPCODE_ADDR, 0x11, 0x7f,
+                               0x7f, 0x7f),
                     NAND_MEMORG(1, 2048, 64, 64, 1024, 20, 1, 1, 1),
                     NAND_ECCREQ(1, 512),
                     SPINAND_INFO_OP_VARIANTS(&read_cache_variants,
@@ -122,7 +124,8 @@ static const struct spinand_info esmt_c8_spinand_table[] = {
                     0,
                     SPINAND_ECCINFO(&f50l1g41lb_ooblayout, NULL)),
        SPINAND_INFO("F50D2G41KA",
-                    SPINAND_ID(SPINAND_READID_METHOD_OPCODE_ADDR, 0x51),
+                    SPINAND_ID(SPINAND_READID_METHOD_OPCODE_ADDR, 0x51, 0x7f,
+                               0x7f, 0x7f),
                     NAND_MEMORG(1, 2048, 128, 64, 2048, 40, 1, 1, 1),
                     NAND_ECCREQ(8, 512),
                     SPINAND_INFO_OP_VARIANTS(&read_cache_variants,
index 1a473021cca5110ac045d98963b58ddf5f940f90..ba7c813b9542bc9f7551bde9289b8a5b4a59fb47 100644 (file)
@@ -15,6 +15,8 @@
 
 #define WINBOND_CFG_BUF_READ           BIT(3)
 
+#define W25N04KV_STATUS_ECC_5_8_BITFLIPS       (3 << 4)
+
 static SPINAND_OP_VARIANTS(read_cache_variants,
                SPINAND_PAGE_READ_FROM_CACHE_QUADIO_OP(0, 2, NULL, 0),
                SPINAND_PAGE_READ_FROM_CACHE_X4_OP(0, 1, NULL, 0),
@@ -118,6 +120,7 @@ static int w25n02kv_ecc_get_status(struct spinand_device *spinand,
                return -EBADMSG;
 
        case STATUS_ECC_HAS_BITFLIPS:
+       case W25N04KV_STATUS_ECC_5_8_BITFLIPS:
                /*
                 * Let's try to retrieve the real maximum number of bitflips
                 * in order to avoid forcing the wear-leveling layer to move
@@ -214,6 +217,15 @@ static const struct spinand_info winbond_spinand_table[] = {
                                              &update_cache_variants),
                     0,
                     SPINAND_ECCINFO(&w25m02gv_ooblayout, w25n02kv_ecc_get_status)),
+       SPINAND_INFO("W25N04KV",
+                    SPINAND_ID(SPINAND_READID_METHOD_OPCODE_DUMMY, 0xaa, 0x23),
+                    NAND_MEMORG(1, 2048, 128, 64, 4096, 40, 2, 1, 1),
+                    NAND_ECCREQ(8, 512),
+                    SPINAND_INFO_OP_VARIANTS(&read_cache_variants,
+                                             &write_cache_variants,
+                                             &update_cache_variants),
+                    0,
+                    SPINAND_ECCINFO(&w25n02kv_ooblayout, w25n02kv_ecc_get_status)),
 };
 
 static int winbond_spinand_init(struct spinand_device *spinand)
index 4129764fad8cf5faaeb7763b2576805dee1ebf78..3e1f1913536bf52f14b91e0569cd28e0a7228271 100644 (file)
@@ -1158,7 +1158,7 @@ static u8 spi_nor_convert_3to4_erase(u8 opcode)
 
 static bool spi_nor_has_uniform_erase(const struct spi_nor *nor)
 {
-       return !!nor->params->erase_map.uniform_erase_type;
+       return !!nor->params->erase_map.uniform_region.erase_mask;
 }
 
 static void spi_nor_set_4byte_opcodes(struct spi_nor *nor)
@@ -1542,7 +1542,6 @@ spi_nor_find_best_erase_type(const struct spi_nor_erase_map *map,
        const struct spi_nor_erase_type *erase;
        u32 rem;
        int i;
-       u8 erase_mask = region->offset & SNOR_ERASE_TYPE_MASK;
 
        /*
         * Erase types are ordered by size, with the smallest erase type at
@@ -1550,7 +1549,7 @@ spi_nor_find_best_erase_type(const struct spi_nor_erase_map *map,
         */
        for (i = SNOR_ERASE_TYPE_MAX - 1; i >= 0; i--) {
                /* Does the erase region support the tested erase type? */
-               if (!(erase_mask & BIT(i)))
+               if (!(region->erase_mask & BIT(i)))
                        continue;
 
                erase = &map->erase_type[i];
@@ -1558,8 +1557,7 @@ spi_nor_find_best_erase_type(const struct spi_nor_erase_map *map,
                        continue;
 
                /* Alignment is not mandatory for overlaid regions */
-               if (region->offset & SNOR_OVERLAID_REGION &&
-                   region->size <= len)
+               if (region->overlaid && region->size <= len)
                        return erase;
 
                /* Don't erase more than what the user has asked for. */
@@ -1574,59 +1572,6 @@ spi_nor_find_best_erase_type(const struct spi_nor_erase_map *map,
        return NULL;
 }
 
-static u64 spi_nor_region_is_last(const struct spi_nor_erase_region *region)
-{
-       return region->offset & SNOR_LAST_REGION;
-}
-
-static u64 spi_nor_region_end(const struct spi_nor_erase_region *region)
-{
-       return (region->offset & ~SNOR_ERASE_FLAGS_MASK) + region->size;
-}
-
-/**
- * spi_nor_region_next() - get the next spi nor region
- * @region:    pointer to a structure that describes a SPI NOR erase region
- *
- * Return: the next spi nor region or NULL if last region.
- */
-struct spi_nor_erase_region *
-spi_nor_region_next(struct spi_nor_erase_region *region)
-{
-       if (spi_nor_region_is_last(region))
-               return NULL;
-       region++;
-       return region;
-}
-
-/**
- * spi_nor_find_erase_region() - find the region of the serial flash memory in
- *                              which the offset fits
- * @map:       the erase map of the SPI NOR
- * @addr:      offset in the serial flash memory
- *
- * Return: a pointer to the spi_nor_erase_region struct, ERR_PTR(-errno)
- *        otherwise.
- */
-static struct spi_nor_erase_region *
-spi_nor_find_erase_region(const struct spi_nor_erase_map *map, u64 addr)
-{
-       struct spi_nor_erase_region *region = map->regions;
-       u64 region_start = region->offset & ~SNOR_ERASE_FLAGS_MASK;
-       u64 region_end = region_start + region->size;
-
-       while (addr < region_start || addr >= region_end) {
-               region = spi_nor_region_next(region);
-               if (!region)
-                       return ERR_PTR(-EINVAL);
-
-               region_start = region->offset & ~SNOR_ERASE_FLAGS_MASK;
-               region_end = region_start + region->size;
-       }
-
-       return region;
-}
-
 /**
  * spi_nor_init_erase_cmd() - initialize an erase command
  * @region:    pointer to a structure that describes a SPI NOR erase region
@@ -1649,7 +1594,7 @@ spi_nor_init_erase_cmd(const struct spi_nor_erase_region *region,
        cmd->opcode = erase->opcode;
        cmd->count = 1;
 
-       if (region->offset & SNOR_OVERLAID_REGION)
+       if (region->overlaid)
                cmd->size = region->size;
        else
                cmd->size = erase->size;
@@ -1693,44 +1638,36 @@ static int spi_nor_init_erase_cmd_list(struct spi_nor *nor,
        struct spi_nor_erase_region *region;
        struct spi_nor_erase_command *cmd = NULL;
        u64 region_end;
+       unsigned int i;
        int ret = -EINVAL;
 
-       region = spi_nor_find_erase_region(map, addr);
-       if (IS_ERR(region))
-               return PTR_ERR(region);
-
-       region_end = spi_nor_region_end(region);
+       for (i = 0; i < map->n_regions && len; i++) {
+               region = &map->regions[i];
+               region_end = region->offset + region->size;
 
-       while (len) {
-               erase = spi_nor_find_best_erase_type(map, region, addr, len);
-               if (!erase)
-                       goto destroy_erase_cmd_list;
-
-               if (prev_erase != erase ||
-                   erase->size != cmd->size ||
-                   region->offset & SNOR_OVERLAID_REGION) {
-                       cmd = spi_nor_init_erase_cmd(region, erase);
-                       if (IS_ERR(cmd)) {
-                               ret = PTR_ERR(cmd);
+               while (len && addr >= region->offset && addr < region_end) {
+                       erase = spi_nor_find_best_erase_type(map, region, addr,
+                                                            len);
+                       if (!erase)
                                goto destroy_erase_cmd_list;
-                       }
-
-                       list_add_tail(&cmd->list, erase_list);
-               } else {
-                       cmd->count++;
-               }
 
-               addr += cmd->size;
-               len -= cmd->size;
+                       if (prev_erase != erase || erase->size != cmd->size ||
+                           region->overlaid) {
+                               cmd = spi_nor_init_erase_cmd(region, erase);
+                               if (IS_ERR(cmd)) {
+                                       ret = PTR_ERR(cmd);
+                                       goto destroy_erase_cmd_list;
+                               }
+
+                               list_add_tail(&cmd->list, erase_list);
+                       } else {
+                               cmd->count++;
+                       }
 
-               if (len && addr >= region_end) {
-                       region = spi_nor_region_next(region);
-                       if (!region)
-                               goto destroy_erase_cmd_list;
-                       region_end = spi_nor_region_end(region);
+                       len -= cmd->size;
+                       addr += cmd->size;
+                       prev_erase = erase;
                }
-
-               prev_erase = erase;
        }
 
        return 0;
@@ -2468,12 +2405,11 @@ void spi_nor_mask_erase_type(struct spi_nor_erase_type *erase)
 void spi_nor_init_uniform_erase_map(struct spi_nor_erase_map *map,
                                    u8 erase_mask, u64 flash_size)
 {
-       /* Offset 0 with erase_mask and SNOR_LAST_REGION bit set */
-       map->uniform_region.offset = (erase_mask & SNOR_ERASE_TYPE_MASK) |
-                                    SNOR_LAST_REGION;
+       map->uniform_region.offset = 0;
        map->uniform_region.size = flash_size;
+       map->uniform_region.erase_mask = erase_mask;
        map->regions = &map->uniform_region;
-       map->uniform_erase_type = erase_mask;
+       map->n_regions = 1;
 }
 
 int spi_nor_post_bfpt_fixups(struct spi_nor *nor,
@@ -2560,7 +2496,7 @@ spi_nor_select_uniform_erase(struct spi_nor_erase_map *map)
 {
        const struct spi_nor_erase_type *tested_erase, *erase = NULL;
        int i;
-       u8 uniform_erase_type = map->uniform_erase_type;
+       u8 uniform_erase_type = map->uniform_region.erase_mask;
 
        /*
         * Search for the biggest erase size, except for when compiled
@@ -2599,8 +2535,7 @@ spi_nor_select_uniform_erase(struct spi_nor_erase_map *map)
                return NULL;
 
        /* Disable all other Sector Erase commands. */
-       map->uniform_erase_type &= ~SNOR_ERASE_TYPE_MASK;
-       map->uniform_erase_type |= BIT(erase - map->erase_type);
+       map->uniform_region.erase_mask = BIT(erase - map->erase_type);
        return erase;
 }
 
@@ -3434,7 +3369,54 @@ static const struct flash_info *spi_nor_get_flash_info(struct spi_nor *nor,
        return info;
 }
 
-static void spi_nor_set_mtd_info(struct spi_nor *nor)
+static u32
+spi_nor_get_region_erasesize(const struct spi_nor_erase_region *region,
+                            const struct spi_nor_erase_type *erase_type)
+{
+       int i;
+
+       if (region->overlaid)
+               return region->size;
+
+       for (i = SNOR_ERASE_TYPE_MAX - 1; i >= 0; i--) {
+               if (region->erase_mask & BIT(i))
+                       return erase_type[i].size;
+       }
+
+       return 0;
+}
+
+static int spi_nor_set_mtd_eraseregions(struct spi_nor *nor)
+{
+       const struct spi_nor_erase_map *map = &nor->params->erase_map;
+       const struct spi_nor_erase_region *region = map->regions;
+       struct mtd_erase_region_info *mtd_region;
+       struct mtd_info *mtd = &nor->mtd;
+       u32 erasesize, i;
+
+       mtd_region = devm_kcalloc(nor->dev, map->n_regions, sizeof(*mtd_region),
+                                 GFP_KERNEL);
+       if (!mtd_region)
+               return -ENOMEM;
+
+       for (i = 0; i < map->n_regions; i++) {
+               erasesize = spi_nor_get_region_erasesize(&region[i],
+                                                        map->erase_type);
+               if (!erasesize)
+                       return -EINVAL;
+
+               mtd_region[i].erasesize = erasesize;
+               mtd_region[i].numblocks = div64_ul(region[i].size, erasesize);
+               mtd_region[i].offset = region[i].offset;
+       }
+
+       mtd->numeraseregions = map->n_regions;
+       mtd->eraseregions = mtd_region;
+
+       return 0;
+}
+
+static int spi_nor_set_mtd_info(struct spi_nor *nor)
 {
        struct mtd_info *mtd = &nor->mtd;
        struct device *dev = nor->dev;
@@ -3465,6 +3447,11 @@ static void spi_nor_set_mtd_info(struct spi_nor *nor)
        mtd->_resume = spi_nor_resume;
        mtd->_get_device = spi_nor_get_device;
        mtd->_put_device = spi_nor_put_device;
+
+       if (!spi_nor_has_uniform_erase(nor))
+               return spi_nor_set_mtd_eraseregions(nor);
+
+       return 0;
 }
 
 static int spi_nor_hw_reset(struct spi_nor *nor)
@@ -3555,7 +3542,9 @@ int spi_nor_scan(struct spi_nor *nor, const char *name,
                return ret;
 
        /* No mtd_info fields should be used up to this point. */
-       spi_nor_set_mtd_info(nor);
+       ret = spi_nor_set_mtd_info(nor);
+       if (ret)
+               return ret;
 
        dev_dbg(dev, "Manufacturer and device ID: %*phN\n",
                SPI_NOR_MAX_ID_LEN, nor->id);
index d36c0e0729548b79970d713c391ceeea47911bfc..442786685515862e6990e60788a6d0205644070f 100644 (file)
@@ -240,27 +240,21 @@ struct spi_nor_erase_command {
 /**
  * struct spi_nor_erase_region - Structure to describe a SPI NOR erase region
  * @offset:            the offset in the data array of erase region start.
- *                     LSB bits are used as a bitmask encoding flags to
- *                     determine if this region is overlaid, if this region is
- *                     the last in the SPI NOR flash memory and to indicate
- *                     all the supported erase commands inside this region.
- *                     The erase types are sorted in ascending order with the
- *                     smallest Erase Type size being at BIT(0).
  * @size:              the size of the region in bytes.
+ * @erase_mask:                bitmask to indicate all the supported erase commands
+ *                     inside this region. The erase types are sorted in
+ *                     ascending order with the smallest Erase Type size being
+ *                     at BIT(0).
+ * @overlaid:          determine if this region is overlaid.
  */
 struct spi_nor_erase_region {
        u64             offset;
        u64             size;
+       u8              erase_mask;
+       bool            overlaid;
 };
 
 #define SNOR_ERASE_TYPE_MAX    4
-#define SNOR_ERASE_TYPE_MASK   GENMASK_ULL(SNOR_ERASE_TYPE_MAX - 1, 0)
-
-#define SNOR_LAST_REGION       BIT(4)
-#define SNOR_OVERLAID_REGION   BIT(5)
-
-#define SNOR_ERASE_FLAGS_MAX   6
-#define SNOR_ERASE_FLAGS_MASK  GENMASK_ULL(SNOR_ERASE_FLAGS_MAX - 1, 0)
 
 /**
  * struct spi_nor_erase_map - Structure to describe the SPI NOR erase map
@@ -273,17 +267,13 @@ struct spi_nor_erase_region {
  *                     The erase types are sorted in ascending order, with the
  *                     smallest Erase Type size being the first member in the
  *                     erase_type array.
- * @uniform_erase_type:        bitmask encoding erase types that can erase the
- *                     entire memory. This member is completed at init by
- *                     uniform and non-uniform SPI NOR flash memories if they
- *                     support at least one erase type that can erase the
- *                     entire memory.
+ * @n_regions:         number of erase regions.
  */
 struct spi_nor_erase_map {
        struct spi_nor_erase_region     *regions;
        struct spi_nor_erase_region     uniform_region;
        struct spi_nor_erase_type       erase_type[SNOR_ERASE_TYPE_MAX];
-       u8                              uniform_erase_type;
+       unsigned int                    n_regions;
 };
 
 /**
@@ -675,8 +665,6 @@ void spi_nor_set_pp_settings(struct spi_nor_pp_command *pp, u8 opcode,
 void spi_nor_set_erase_type(struct spi_nor_erase_type *erase, u32 size,
                            u8 opcode);
 void spi_nor_mask_erase_type(struct spi_nor_erase_type *erase);
-struct spi_nor_erase_region *
-spi_nor_region_next(struct spi_nor_erase_region *region);
 void spi_nor_init_uniform_erase_map(struct spi_nor_erase_map *map,
                                    u8 erase_mask, u64 flash_size);
 
index 2dbda6b6938abd9caceff084a7421e0ec46ea033..fa6956144d2e4458ff0e78c5c317a52748b02047 100644 (file)
@@ -78,10 +78,10 @@ static int spi_nor_params_show(struct seq_file *s, void *data)
        struct spi_nor *nor = s->private;
        struct spi_nor_flash_parameter *params = nor->params;
        struct spi_nor_erase_map *erase_map = &params->erase_map;
-       struct spi_nor_erase_region *region;
+       struct spi_nor_erase_region *region = erase_map->regions;
        const struct flash_info *info = nor->info;
        char buf[16], *str;
-       int i;
+       unsigned int i;
 
        seq_printf(s, "name\t\t%s\n", info->name);
        seq_printf(s, "id\t\t%*ph\n", SPI_NOR_MAX_ID_LEN, nor->id);
@@ -142,22 +142,20 @@ static int spi_nor_params_show(struct seq_file *s, void *data)
        }
 
        seq_puts(s, "\nsector map\n");
-       seq_puts(s, " region (in hex)   | erase mask | flags\n");
+       seq_puts(s, " region (in hex)   | erase mask | overlaid\n");
        seq_puts(s, " ------------------+------------+----------\n");
-       for (region = erase_map->regions;
-            region;
-            region = spi_nor_region_next(region)) {
-               u64 start = region->offset & ~SNOR_ERASE_FLAGS_MASK;
-               u64 flags = region->offset & SNOR_ERASE_FLAGS_MASK;
-               u64 end = start + region->size - 1;
+       for (i = 0; i < erase_map->n_regions; i++) {
+               u64 start = region[i].offset;
+               u64 end = start + region[i].size - 1;
+               u8 erase_mask = region[i].erase_mask;
 
                seq_printf(s, " %08llx-%08llx |     [%c%c%c%c] | %s\n",
                           start, end,
-                          flags & BIT(0) ? '0' : ' ',
-                          flags & BIT(1) ? '1' : ' ',
-                          flags & BIT(2) ? '2' : ' ',
-                          flags & BIT(3) ? '3' : ' ',
-                          flags & SNOR_OVERLAID_REGION ? "overlaid" : "");
+                          erase_mask & BIT(0) ? '0' : ' ',
+                          erase_mask & BIT(1) ? '1' : ' ',
+                          erase_mask & BIT(2) ? '2' : ' ',
+                          erase_mask & BIT(3) ? '3' : ' ',
+                          region[i].overlaid ? "yes" : "no");
        }
 
        return 0;
index 57713de3283278970c5d125bde422c36dfd71b5e..5b1117265bd289702a283e8440ac956912478add 100644 (file)
@@ -389,19 +389,15 @@ static u8 spi_nor_sort_erase_mask(struct spi_nor_erase_map *map, u8 erase_mask)
 static void spi_nor_regions_sort_erase_types(struct spi_nor_erase_map *map)
 {
        struct spi_nor_erase_region *region = map->regions;
-       u8 region_erase_mask, sorted_erase_mask;
+       u8 sorted_erase_mask;
+       unsigned int i;
 
-       while (region) {
-               region_erase_mask = region->offset & SNOR_ERASE_TYPE_MASK;
-
-               sorted_erase_mask = spi_nor_sort_erase_mask(map,
-                                                           region_erase_mask);
+       for (i = 0; i < map->n_regions; i++) {
+               sorted_erase_mask =
+                       spi_nor_sort_erase_mask(map, region[i].erase_mask);
 
                /* Overwrite erase mask. */
-               region->offset = (region->offset & ~SNOR_ERASE_TYPE_MASK) |
-                                sorted_erase_mask;
-
-               region = spi_nor_region_next(region);
+               region[i].erase_mask = sorted_erase_mask;
        }
 }
 
@@ -554,8 +550,6 @@ static int spi_nor_parse_bfpt(struct spi_nor *nor,
         * selecting the uniform erase.
         */
        spi_nor_regions_sort_erase_types(map);
-       map->uniform_erase_type = map->uniform_region.offset &
-                                 SNOR_ERASE_TYPE_MASK;
 
        /* Stop here if not JESD216 rev A or later. */
        if (bfpt_header->length == BFPT_DWORD_MAX_JESD216)
@@ -806,16 +800,6 @@ out:
        return ret;
 }
 
-static void spi_nor_region_mark_end(struct spi_nor_erase_region *region)
-{
-       region->offset |= SNOR_LAST_REGION;
-}
-
-static void spi_nor_region_mark_overlay(struct spi_nor_erase_region *region)
-{
-       region->offset |= SNOR_OVERLAID_REGION;
-}
-
 /**
  * spi_nor_region_check_overlay() - set overlay bit when the region is overlaid
  * @region:    pointer to a structure that describes a SPI NOR erase region
@@ -833,7 +817,7 @@ spi_nor_region_check_overlay(struct spi_nor_erase_region *region,
                if (!(erase[i].size && erase_type & BIT(erase[i].idx)))
                        continue;
                if (region->size & erase[i].size_mask) {
-                       spi_nor_region_mark_overlay(region);
+                       region->overlaid = true;
                        return;
                }
        }
@@ -868,6 +852,7 @@ static int spi_nor_init_non_uniform_erase_map(struct spi_nor *nor,
        if (!region)
                return -ENOMEM;
        map->regions = region;
+       map->n_regions = region_count;
 
        uniform_erase_type = 0xff;
        regions_erase_type = 0;
@@ -875,9 +860,10 @@ static int spi_nor_init_non_uniform_erase_map(struct spi_nor *nor,
        /* Populate regions. */
        for (i = 0; i < region_count; i++) {
                j = i + 1; /* index for the region dword */
+               region[i].offset = offset;
                region[i].size = SMPT_MAP_REGION_SIZE(smpt[j]);
                erase_type = SMPT_MAP_REGION_ERASE_TYPE(smpt[j]);
-               region[i].offset = offset | erase_type;
+               region[i].erase_mask = erase_type;
 
                spi_nor_region_check_overlay(&region[i], erase, erase_type);
 
@@ -893,21 +879,20 @@ static int spi_nor_init_non_uniform_erase_map(struct spi_nor *nor,
                 */
                regions_erase_type |= erase_type;
 
-               offset = (region[i].offset & ~SNOR_ERASE_FLAGS_MASK) +
-                        region[i].size;
+               offset = region[i].offset + region[i].size;
        }
-       spi_nor_region_mark_end(&region[i - 1]);
 
-       save_uniform_erase_type = map->uniform_erase_type;
-       map->uniform_erase_type = spi_nor_sort_erase_mask(map,
-                                                         uniform_erase_type);
+       save_uniform_erase_type = map->uniform_region.erase_mask;
+       map->uniform_region.erase_mask =
+                               spi_nor_sort_erase_mask(map,
+                                                       uniform_erase_type);
 
        if (!regions_erase_type) {
                /*
                 * Roll back to the previous uniform_erase_type mask, SMPT is
                 * broken.
                 */
-               map->uniform_erase_type = save_uniform_erase_type;
+               map->uniform_region.erase_mask = save_uniform_erase_type;
                return -EINVAL;
        }
 
index 211f279a33a9650a0270bb21df38a004e6b108b7..46c01fa2ec46fc5af8221b6a80e93139c31e04be 100644 (file)
@@ -295,7 +295,7 @@ static void ssfdcr_add_mtd(struct mtd_blktrans_ops *tr, struct mtd_info *mtd)
        if (cis_sector == -1)
                return;
 
-       ssfdc = kzalloc(sizeof(struct ssfdcr_record), GFP_KERNEL);
+       ssfdc = kzalloc(sizeof(*ssfdc), GFP_KERNEL);
        if (!ssfdc)
                return;
 
@@ -332,7 +332,7 @@ static void ssfdcr_add_mtd(struct mtd_blktrans_ops *tr, struct mtd_info *mtd)
                kmalloc_array(ssfdc->map_len,
                              sizeof(ssfdc->logic_block_map[0]), GFP_KERNEL);
        if (!ssfdc->logic_block_map)
-               goto out_err;
+               goto out_free_ssfdc;
        memset(ssfdc->logic_block_map, 0xff, sizeof(ssfdc->logic_block_map[0]) *
                ssfdc->map_len);
 
@@ -350,7 +350,8 @@ static void ssfdcr_add_mtd(struct mtd_blktrans_ops *tr, struct mtd_info *mtd)
 
 out_err:
        kfree(ssfdc->logic_block_map);
-        kfree(ssfdc);
+out_free_ssfdc:
+       kfree(ssfdc);
 }
 
 static void ssfdcr_remove_dev(struct mtd_blktrans_dev *dev)
index 9e653e2925f78ae9047dceedd062e71c52c80008..292b1f9cd9e78e799aa359deceed4ee3958fbb22 100644 (file)
@@ -1591,7 +1591,7 @@ bmac_proc_info(char *buffer, char **start, off_t offset, int length)
 }
 #endif
 
-static int bmac_remove(struct macio_dev *mdev)
+static void bmac_remove(struct macio_dev *mdev)
 {
        struct net_device *dev = macio_get_drvdata(mdev);
        struct bmac_data *bp = netdev_priv(dev);
@@ -1609,8 +1609,6 @@ static int bmac_remove(struct macio_dev *mdev)
        macio_release_resources(mdev);
 
        free_netdev(dev);
-
-       return 0;
 }
 
 static const struct of_device_id bmac_match[] =
index fd1b008b7208c50917e9a5d9df9bcc2218de1304..e6350971c7076c259453ac04c95c7c6f706b4bae 100644 (file)
@@ -272,7 +272,7 @@ static int mace_probe(struct macio_dev *mdev, const struct of_device_id *match)
        return rc;
 }
 
-static int mace_remove(struct macio_dev *mdev)
+static void mace_remove(struct macio_dev *mdev)
 {
        struct net_device *dev = macio_get_drvdata(mdev);
        struct mace_data *mp;
@@ -296,8 +296,6 @@ static int mace_remove(struct macio_dev *mdev)
        free_netdev(dev);
 
        macio_release_resources(mdev);
-
-       return 0;
 }
 
 static void dbdma_reset(volatile struct dbdma_regs __iomem *dma)
index eee759054aada6c75796242890b81bf24f5b1c78..62ff4381ac83cfd6a27a3763603f6ce8392043c6 100644 (file)
@@ -221,7 +221,7 @@ static inline void _tg3_flag_clear(enum TG3_FLAGS flag, unsigned long *bits)
 #define FIRMWARE_TG3TSO                "tigon/tg3_tso.bin"
 #define FIRMWARE_TG3TSO5       "tigon/tg3_tso5.bin"
 
-MODULE_AUTHOR("David S. Miller (davem@redhat.com) and Jeff Garzik (jgarzik@pobox.com)");
+MODULE_AUTHOR("David S. Miller <davem@redhat.com> and Jeff Garzik <jgarzik@pobox.com>");
 MODULE_DESCRIPTION("Broadcom Tigon3 ethernet driver");
 MODULE_LICENSE("GPL");
 MODULE_FIRMWARE(FIRMWARE_TG3);
index b317b94864554a291be736dd0d0146ba920694c6..bfb90350636717bdc8a03c437530567a5a022f9e 100644 (file)
@@ -176,7 +176,7 @@ static char version[] =
 static int cassini_debug = -1; /* -1 == use CAS_DEF_MSG_ENABLE as value */
 static int link_mode;
 
-MODULE_AUTHOR("Adrian Sun (asun@darksunrising.com)");
+MODULE_AUTHOR("Adrian Sun <asun@darksunrising.com>");
 MODULE_DESCRIPTION("Sun Cassini(+) ethernet driver");
 MODULE_LICENSE("GPL");
 MODULE_FIRMWARE("sun/cassini.bin");
index 21431f43e4c223adcdee266eb6b4c09ce85e1316..f68aa813d4fb1099aaaecc72b6c6b2b840ad2029 100644 (file)
@@ -61,7 +61,7 @@ union niu_page {
 static char version[] =
        DRV_MODULE_NAME ".c:v" DRV_MODULE_VERSION " (" DRV_MODULE_RELDATE ")\n";
 
-MODULE_AUTHOR("David S. Miller (davem@davemloft.net)");
+MODULE_AUTHOR("David S. Miller <davem@davemloft.net>");
 MODULE_DESCRIPTION("NIU ethernet driver");
 MODULE_LICENSE("GPL");
 MODULE_VERSION(DRV_MODULE_VERSION);
index b983b9c23be68a604aa4e55ef4162d3987f78f63..50ace461a1af4d14ac640c33c576c2d8c011ddda 100644 (file)
@@ -59,7 +59,7 @@
 
 #define DRV_NAME       "sunhme"
 
-MODULE_AUTHOR("David S. Miller (davem@davemloft.net)");
+MODULE_AUTHOR("David S. Miller <davem@davemloft.net>");
 MODULE_DESCRIPTION("Sun HappyMealEthernet(HME) 10/100baseT ethernet driver");
 MODULE_LICENSE("GPL");
 
index e220620d0ffc9070cf07c063a09ccbe1ced18a37..2f30715e9b67f0d5ef5d6f29ccaa7c51d1b11375 100644 (file)
@@ -44,7 +44,7 @@
 
 static char version[] =
        DRV_MODULE_NAME " " DRV_MODULE_VERSION " (" DRV_MODULE_RELDATE ")";
-MODULE_AUTHOR("David S. Miller (davem@davemloft.net)");
+MODULE_AUTHOR("David S. Miller <davem@davemloft.net>");
 MODULE_DESCRIPTION("Sun LDOM virtual network driver");
 MODULE_LICENSE("GPL");
 MODULE_VERSION(DRV_MODULE_VERSION);
index 351609f4f011d71b5bd07923e78cadab9b2015c4..1cacb2a0ee034cdcd62b1a66eaa0de1d85e9caad 100644 (file)
@@ -39,7 +39,7 @@
  */
 #define        VNET_MAX_RETRIES        10
 
-MODULE_AUTHOR("David S. Miller (davem@davemloft.net)");
+MODULE_AUTHOR("David S. Miller <davem@davemloft.net>");
 MODULE_DESCRIPTION("Sun LDOM virtual network support library");
 MODULE_LICENSE("GPL");
 MODULE_VERSION("1.1");
index 6833ef0c79305f15a5e9fa9633f6e8bff77a5d3d..689687bd2574bcd41e857584a4b692c42d2ccba7 100644 (file)
@@ -694,6 +694,6 @@ module_init(pptp_init_module);
 module_exit(pptp_exit_module);
 
 MODULE_DESCRIPTION("Point-to-Point Tunneling Protocol");
-MODULE_AUTHOR("D. Kozlov (xeb@mail.ru)");
+MODULE_AUTHOR("D. Kozlov <xeb@mail.ru>");
 MODULE_LICENSE("GPL");
 MODULE_ALIAS_NET_PF_PROTO(PF_PPPOX, PX_PROTO_PPTP);
index 77b06d54cc62e03eb7c53ce7dce77cc45130774f..fde3e17c836c835c21b2cddb7288c8da96538425 100644 (file)
@@ -24,7 +24,7 @@ config BLK_DEV_PMEM
        select ND_PFN if NVDIMM_PFN
        help
          Memory ranges for PMEM are described by either an NFIT
-         (NVDIMM Firmware Interface Table, see CONFIG_NFIT_ACPI), a
+         (NVDIMM Firmware Interface Table, see CONFIG_ACPI_NFIT), a
          non-standard OEM-specific E820 memory type (type-12, see
          CONFIG_X86_PMEM_LEGACY), or it is manually specified by the
          'memmap=nn[KMG]!ss[KMG]' kernel command line (see
index ef3d0f83318b9643101b79f61c994d131c2fe43d..508aed017ddc0308ba81346082b60515106c8545 100644 (file)
@@ -271,7 +271,7 @@ EXPORT_SYMBOL_GPL(nvdimm_clear_poison);
 
 static int nvdimm_bus_match(struct device *dev, struct device_driver *drv);
 
-static struct bus_type nvdimm_bus_type = {
+static const struct bus_type nvdimm_bus_type = {
        .name = "nd",
        .uevent = nvdimm_bus_uevent,
        .match = nvdimm_bus_match,
index 8dcc10b6db5b12c6994e2a6e1283be3eda73af29..598fe2e89bda4583d89e1f9b183c8234cf67c4be 100644 (file)
@@ -562,18 +562,19 @@ static int pmem_attach_disk(struct device *dev,
        dax_dev = alloc_dax(pmem, &pmem_dax_ops);
        if (IS_ERR(dax_dev)) {
                rc = PTR_ERR(dax_dev);
-               goto out;
+               if (rc != -EOPNOTSUPP)
+                       goto out;
+       } else {
+               set_dax_nocache(dax_dev);
+               set_dax_nomc(dax_dev);
+               if (is_nvdimm_sync(nd_region))
+                       set_dax_synchronous(dax_dev);
+               pmem->dax_dev = dax_dev;
+               rc = dax_add_host(dax_dev, disk);
+               if (rc)
+                       goto out_cleanup_dax;
+               dax_write_cache(dax_dev, nvdimm_has_cache(nd_region));
        }
-       set_dax_nocache(dax_dev);
-       set_dax_nomc(dax_dev);
-       if (is_nvdimm_sync(nd_region))
-               set_dax_synchronous(dax_dev);
-       rc = dax_add_host(dax_dev, disk);
-       if (rc)
-               goto out_cleanup_dax;
-       dax_write_cache(dax_dev, nvdimm_has_cache(nd_region));
-       pmem->dax_dev = dax_dev;
-
        rc = device_add_disk(dev, disk, pmem_attribute_groups);
        if (rc)
                goto out_remove_host;
diff --git a/drivers/of/.kunitconfig b/drivers/of/.kunitconfig
new file mode 100644 (file)
index 0000000..5a8fee1
--- /dev/null
@@ -0,0 +1,3 @@
+CONFIG_KUNIT=y
+CONFIG_OF=y
+CONFIG_OF_KUNIT_TEST=y
index da9826accb1b5d7d90d37c7fd8a0d39bdc4fcb5e..dd726c7056bf1325d05e6ed137be90e481a6c04c 100644 (file)
@@ -14,9 +14,8 @@ if OF
 
 config OF_UNITTEST
        bool "Device Tree runtime unit tests"
-       depends on !SPARC
+       depends on OF_EARLY_FLATTREE
        select IRQ_DOMAIN
-       select OF_EARLY_FLATTREE
        select OF_RESOLVE
        help
          This option builds in test cases for the device tree infrastructure
@@ -37,6 +36,15 @@ config OF_UNITTEST
 
          If unsure, say N here. This option is not safe to enable.
 
+config OF_KUNIT_TEST
+       tristate "Devicetree KUnit Test" if !KUNIT_ALL_TESTS
+       depends on KUNIT
+       default KUNIT_ALL_TESTS
+       help
+         This option builds KUnit unit tests for device tree infrastructure.
+
+         If unsure, say N here, but this option is safe to enable.
+
 config OF_ALL_DTBS
        bool "Build all Device Tree Blobs"
        depends on COMPILE_TEST
@@ -54,7 +62,7 @@ config OF_FLATTREE
        select CRC32
 
 config OF_EARLY_FLATTREE
-       bool
+       def_bool OF && !(SPARC || ALPHA || HEXAGON || M68K || PARISC || S390)
        select DMA_DECLARE_COHERENT if HAS_DMA && HAS_IOMEM
        select OF_FLATTREE
 
index eff624854575c56be5b7832cdbbcf83a3b7143ec..251d33532148254bebb6d362719af497337ee8a1 100644 (file)
@@ -2,7 +2,7 @@
 obj-y = base.o cpu.o device.o module.o platform.o property.o
 obj-$(CONFIG_OF_KOBJ) += kobj.o
 obj-$(CONFIG_OF_DYNAMIC) += dynamic.o
-obj-$(CONFIG_OF_FLATTREE) += fdt.o
+obj-$(CONFIG_OF_FLATTREE) += fdt.o empty_root.dtb.o
 obj-$(CONFIG_OF_EARLY_FLATTREE) += fdt_address.o
 obj-$(CONFIG_OF_PROMTREE) += pdt.o
 obj-$(CONFIG_OF_ADDRESS)  += address.o
@@ -19,4 +19,6 @@ obj-y += kexec.o
 endif
 endif
 
+obj-$(CONFIG_OF_KUNIT_TEST) += of_test.o
+
 obj-$(CONFIG_OF_UNITTEST) += unittest-data/
index b0ad8fc06e80e099ab6eba7ebe10039875bc85c4..8856c67c466acd4dc98aee218f3ceaebf250a7a9 100644 (file)
@@ -395,25 +395,57 @@ int of_device_compatible_match(const struct device_node *device,
 EXPORT_SYMBOL_GPL(of_device_compatible_match);
 
 /**
- * of_machine_is_compatible - Test root of device tree for a given compatible value
- * @compat: compatible string to look for in root node's compatible property.
+ * of_machine_compatible_match - Test root of device tree against a compatible array
+ * @compats: NULL terminated array of compatible strings to look for in root node's compatible property.
  *
- * Return: A positive integer if the root node has the given value in its
+ * Returns true if the root node has any of the given compatible values in its
  * compatible property.
  */
-int of_machine_is_compatible(const char *compat)
+bool of_machine_compatible_match(const char *const *compats)
 {
        struct device_node *root;
        int rc = 0;
 
        root = of_find_node_by_path("/");
        if (root) {
-               rc = of_device_is_compatible(root, compat);
+               rc = of_device_compatible_match(root, compats);
                of_node_put(root);
        }
-       return rc;
+
+       return rc != 0;
+}
+EXPORT_SYMBOL(of_machine_compatible_match);
+
+static bool __of_device_is_status(const struct device_node *device,
+                                 const char * const*strings)
+{
+       const char *status;
+       int statlen;
+
+       if (!device)
+               return false;
+
+       status = __of_get_property(device, "status", &statlen);
+       if (status == NULL)
+               return false;
+
+       if (statlen > 0) {
+               while (*strings) {
+                       unsigned int len = strlen(*strings);
+
+                       if ((*strings)[len - 1] == '-') {
+                               if (!strncmp(status, *strings, len))
+                                       return true;
+                       } else {
+                               if (!strcmp(status, *strings))
+                                       return true;
+                       }
+                       strings++;
+               }
+       }
+
+       return false;
 }
-EXPORT_SYMBOL(of_machine_is_compatible);
 
 /**
  *  __of_device_is_available - check if a device is available for use
@@ -425,22 +457,27 @@ EXPORT_SYMBOL(of_machine_is_compatible);
  */
 static bool __of_device_is_available(const struct device_node *device)
 {
-       const char *status;
-       int statlen;
+       static const char * const ok[] = {"okay", "ok", NULL};
 
        if (!device)
                return false;
 
-       status = __of_get_property(device, "status", &statlen);
-       if (status == NULL)
-               return true;
+       return !__of_get_property(device, "status", NULL) ||
+               __of_device_is_status(device, ok);
+}
 
-       if (statlen > 0) {
-               if (!strcmp(status, "okay") || !strcmp(status, "ok"))
-                       return true;
-       }
+/**
+ *  __of_device_is_reserved - check if a device is reserved
+ *
+ *  @device: Node to check for availability, with locks already held
+ *
+ *  Return: True if the status property is set to "reserved", false otherwise
+ */
+static bool __of_device_is_reserved(const struct device_node *device)
+{
+       static const char * const reserved[] = {"reserved", NULL};
 
-       return false;
+       return __of_device_is_status(device, reserved);
 }
 
 /**
@@ -474,16 +511,9 @@ EXPORT_SYMBOL(of_device_is_available);
  */
 static bool __of_device_is_fail(const struct device_node *device)
 {
-       const char *status;
+       static const char * const fail[] = {"fail", "fail-", NULL};
 
-       if (!device)
-               return false;
-
-       status = __of_get_property(device, "status", NULL);
-       if (status == NULL)
-               return false;
-
-       return !strcmp(status, "fail") || !strncmp(status, "fail-", 5);
+       return __of_device_is_status(device, fail);
 }
 
 /**
@@ -597,16 +627,9 @@ struct device_node *of_get_next_child(const struct device_node *node,
 }
 EXPORT_SYMBOL(of_get_next_child);
 
-/**
- * of_get_next_available_child - Find the next available child node
- * @node:      parent node
- * @prev:      previous child of the parent node, or NULL to get first
- *
- * This function is like of_get_next_child(), except that it
- * automatically skips any disabled nodes (i.e. status = "disabled").
- */
-struct device_node *of_get_next_available_child(const struct device_node *node,
-       struct device_node *prev)
+static struct device_node *of_get_next_status_child(const struct device_node *node,
+                                                   struct device_node *prev,
+                                                   bool (*checker)(const struct device_node *))
 {
        struct device_node *next;
        unsigned long flags;
@@ -617,7 +640,7 @@ struct device_node *of_get_next_available_child(const struct device_node *node,
        raw_spin_lock_irqsave(&devtree_lock, flags);
        next = prev ? prev->sibling : node->child;
        for (; next; next = next->sibling) {
-               if (!__of_device_is_available(next))
+               if (!checker(next))
                        continue;
                if (of_node_get(next))
                        break;
@@ -626,8 +649,37 @@ struct device_node *of_get_next_available_child(const struct device_node *node,
        raw_spin_unlock_irqrestore(&devtree_lock, flags);
        return next;
 }
+
+/**
+ * of_get_next_available_child - Find the next available child node
+ * @node:      parent node
+ * @prev:      previous child of the parent node, or NULL to get first
+ *
+ * This function is like of_get_next_child(), except that it
+ * automatically skips any disabled nodes (i.e. status = "disabled").
+ */
+struct device_node *of_get_next_available_child(const struct device_node *node,
+       struct device_node *prev)
+{
+       return of_get_next_status_child(node, prev, __of_device_is_available);
+}
 EXPORT_SYMBOL(of_get_next_available_child);
 
+/**
+ * of_get_next_reserved_child - Find the next reserved child node
+ * @node:      parent node
+ * @prev:      previous child of the parent node, or NULL to get first
+ *
+ * This function is like of_get_next_child(), except that it
+ * automatically skips any disabled nodes (i.e. status = "disabled").
+ */
+struct device_node *of_get_next_reserved_child(const struct device_node *node,
+                                               struct device_node *prev)
+{
+       return of_get_next_status_child(node, prev, __of_device_is_reserved);
+}
+EXPORT_SYMBOL(of_get_next_reserved_child);
+
 /**
  * of_get_next_cpu_node - Iterate on cpu nodes
  * @prev:      previous child of the /cpus node, or NULL to get first
@@ -1345,8 +1397,8 @@ int of_parse_phandle_with_args_map(const struct device_node *np,
        char *pass_name = NULL;
        struct device_node *cur, *new = NULL;
        const __be32 *map, *mask, *pass;
-       static const __be32 dummy_mask[] = { [0 ... MAX_PHANDLE_ARGS] = ~0 };
-       static const __be32 dummy_pass[] = { [0 ... MAX_PHANDLE_ARGS] = 0 };
+       static const __be32 dummy_mask[] = { [0 ... MAX_PHANDLE_ARGS] = cpu_to_be32(~0) };
+       static const __be32 dummy_pass[] = { [0 ... MAX_PHANDLE_ARGS] = cpu_to_be32(0) };
        __be32 initial_match_array[MAX_PHANDLE_ARGS];
        const __be32 *match_array = initial_match_array;
        int i, ret, map_len, match;
diff --git a/drivers/of/empty_root.dts b/drivers/of/empty_root.dts
new file mode 100644 (file)
index 0000000..cf9e97a
--- /dev/null
@@ -0,0 +1,6 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/dts-v1/;
+
+/ {
+
+};
index bf502ba8da9586112a5a63dd7e787fc3985dcf94..a8a04f27915b93f1d04e45aa882c48a041192c09 100644 (file)
@@ -8,6 +8,7 @@
 
 #define pr_fmt(fmt)    "OF: fdt: " fmt
 
+#include <linux/acpi.h>
 #include <linux/crash_dump.h>
 #include <linux/crc32.h>
 #include <linux/kernel.h>
@@ -16,7 +17,6 @@
 #include <linux/mutex.h>
 #include <linux/of.h>
 #include <linux/of_fdt.h>
-#include <linux/of_reserved_mem.h>
 #include <linux/sizes.h>
 #include <linux/string.h>
 #include <linux/errno.h>
 
 #include "of_private.h"
 
+/*
+ * __dtb_empty_root_begin[] and __dtb_empty_root_end[] magically created by
+ * cmd_dt_S_dtb in scripts/Makefile.lib
+ */
+extern uint8_t __dtb_empty_root_begin[];
+extern uint8_t __dtb_empty_root_end[];
+
 /*
  * of_fdt_limit_memory - limit the number of regions in the /memory node
  * @limit: maximum entries
@@ -80,7 +87,7 @@ void __init of_fdt_limit_memory(int limit)
        }
 }
 
-static bool of_fdt_device_is_available(const void *blob, unsigned long node)
+bool of_fdt_device_is_available(const void *blob, unsigned long node)
 {
        const char *status = fdt_getprop(blob, node, "status", NULL);
 
@@ -476,126 +483,6 @@ void *initial_boot_params __ro_after_init;
 
 static u32 of_fdt_crc32;
 
-static int __init early_init_dt_reserve_memory(phys_addr_t base,
-                                              phys_addr_t size, bool nomap)
-{
-       if (nomap) {
-               /*
-                * If the memory is already reserved (by another region), we
-                * should not allow it to be marked nomap, but don't worry
-                * if the region isn't memory as it won't be mapped.
-                */
-               if (memblock_overlaps_region(&memblock.memory, base, size) &&
-                   memblock_is_region_reserved(base, size))
-                       return -EBUSY;
-
-               return memblock_mark_nomap(base, size);
-       }
-       return memblock_reserve(base, size);
-}
-
-/*
- * __reserved_mem_reserve_reg() - reserve all memory described in 'reg' property
- */
-static int __init __reserved_mem_reserve_reg(unsigned long node,
-                                            const char *uname)
-{
-       int t_len = (dt_root_addr_cells + dt_root_size_cells) * sizeof(__be32);
-       phys_addr_t base, size;
-       int len;
-       const __be32 *prop;
-       int first = 1;
-       bool nomap;
-
-       prop = of_get_flat_dt_prop(node, "reg", &len);
-       if (!prop)
-               return -ENOENT;
-
-       if (len && len % t_len != 0) {
-               pr_err("Reserved memory: invalid reg property in '%s', skipping node.\n",
-                      uname);
-               return -EINVAL;
-       }
-
-       nomap = of_get_flat_dt_prop(node, "no-map", NULL) != NULL;
-
-       while (len >= t_len) {
-               base = dt_mem_next_cell(dt_root_addr_cells, &prop);
-               size = dt_mem_next_cell(dt_root_size_cells, &prop);
-
-               if (size &&
-                   early_init_dt_reserve_memory(base, size, nomap) == 0)
-                       pr_debug("Reserved memory: reserved region for node '%s': base %pa, size %lu MiB\n",
-                               uname, &base, (unsigned long)(size / SZ_1M));
-               else
-                       pr_err("Reserved memory: failed to reserve memory for node '%s': base %pa, size %lu MiB\n",
-                              uname, &base, (unsigned long)(size / SZ_1M));
-
-               len -= t_len;
-               if (first) {
-                       fdt_reserved_mem_save_node(node, uname, base, size);
-                       first = 0;
-               }
-       }
-       return 0;
-}
-
-/*
- * __reserved_mem_check_root() - check if #size-cells, #address-cells provided
- * in /reserved-memory matches the values supported by the current implementation,
- * also check if ranges property has been provided
- */
-static int __init __reserved_mem_check_root(unsigned long node)
-{
-       const __be32 *prop;
-
-       prop = of_get_flat_dt_prop(node, "#size-cells", NULL);
-       if (!prop || be32_to_cpup(prop) != dt_root_size_cells)
-               return -EINVAL;
-
-       prop = of_get_flat_dt_prop(node, "#address-cells", NULL);
-       if (!prop || be32_to_cpup(prop) != dt_root_addr_cells)
-               return -EINVAL;
-
-       prop = of_get_flat_dt_prop(node, "ranges", NULL);
-       if (!prop)
-               return -EINVAL;
-       return 0;
-}
-
-/*
- * fdt_scan_reserved_mem() - scan a single FDT node for reserved memory
- */
-static int __init fdt_scan_reserved_mem(void)
-{
-       int node, child;
-       const void *fdt = initial_boot_params;
-
-       node = fdt_path_offset(fdt, "/reserved-memory");
-       if (node < 0)
-               return -ENODEV;
-
-       if (__reserved_mem_check_root(node) != 0) {
-               pr_err("Reserved memory: unsupported node format, ignoring\n");
-               return -EINVAL;
-       }
-
-       fdt_for_each_subnode(child, fdt, node) {
-               const char *uname;
-               int err;
-
-               if (!of_fdt_device_is_available(fdt, child))
-                       continue;
-
-               uname = fdt_get_name(fdt, child, NULL);
-
-               err = __reserved_mem_reserve_reg(child, uname);
-               if (err == -ENOENT && of_get_flat_dt_prop(child, "size", NULL))
-                       fdt_reserved_mem_save_node(child, uname, 0, 0);
-       }
-       return 0;
-}
-
 /*
  * fdt_reserve_elfcorehdr() - reserves memory for elf core header
  *
@@ -1318,6 +1205,21 @@ bool __init early_init_dt_scan(void *params)
        return true;
 }
 
+static void *__init copy_device_tree(void *fdt)
+{
+       int size;
+       void *dt;
+
+       size = fdt_totalsize(fdt);
+       dt = early_init_dt_alloc_memory_arch(size,
+                                            roundup_pow_of_two(FDT_V17_SIZE));
+
+       if (dt)
+               memcpy(dt, fdt, size);
+
+       return dt;
+}
+
 /**
  * unflatten_device_tree - create tree of device_nodes from flat blob
  *
@@ -1328,7 +1230,29 @@ bool __init early_init_dt_scan(void *params)
  */
 void __init unflatten_device_tree(void)
 {
-       __unflatten_device_tree(initial_boot_params, NULL, &of_root,
+       void *fdt = initial_boot_params;
+
+       /* Don't use the bootloader provided DTB if ACPI is enabled */
+       if (!acpi_disabled)
+               fdt = NULL;
+
+       /*
+        * Populate an empty root node when ACPI is enabled or bootloader
+        * doesn't provide one.
+        */
+       if (!fdt) {
+               fdt = (void *) __dtb_empty_root_begin;
+               /* fdt_totalsize() will be used for copy size */
+               if (fdt_totalsize(fdt) >
+                   __dtb_empty_root_end - __dtb_empty_root_begin) {
+                       pr_err("invalid size in dtb_empty_root\n");
+                       return;
+               }
+               of_fdt_crc32 = crc32_be(~0, fdt, fdt_totalsize(fdt));
+               fdt = copy_device_tree(fdt);
+       }
+
+       __unflatten_device_tree(fdt, NULL, &of_root,
                                early_init_dt_alloc_memory_arch, false);
 
        /* Get pointer to "/chosen" and "/aliases" nodes for use everywhere */
@@ -1350,22 +1274,9 @@ void __init unflatten_device_tree(void)
  */
 void __init unflatten_and_copy_device_tree(void)
 {
-       int size;
-       void *dt;
+       if (initial_boot_params)
+               initial_boot_params = copy_device_tree(initial_boot_params);
 
-       if (!initial_boot_params) {
-               pr_warn("No valid device tree found, continuing without\n");
-               return;
-       }
-
-       size = fdt_totalsize(initial_boot_params);
-       dt = early_init_dt_alloc_memory_arch(size,
-                                            roundup_pow_of_two(FDT_V17_SIZE));
-
-       if (dt) {
-               memcpy(dt, initial_boot_params, size);
-               initial_boot_params = dt;
-       }
        unflatten_device_tree();
 }
 
index 68278340cecfe5c8a20805c4ef85633b257c5d4b..9ccde2fd77cbf5117b70af2c815b317e87f2c363 100644 (file)
@@ -395,6 +395,7 @@ void *of_kexec_alloc_and_setup_fdt(const struct kimage *image,
                if (ret)
                        goto out;
 
+#ifdef CONFIG_CRASH_DUMP
                /* add linux,usable-memory-range */
                ret = fdt_appendprop_addrrange(fdt, 0, chosen_node,
                                "linux,usable-memory-range", crashk_res.start,
@@ -410,6 +411,7 @@ void *of_kexec_alloc_and_setup_fdt(const struct kimage *image,
                        if (ret)
                                goto out;
                }
+#endif
        }
 
        /* add bootargs */
index f38397c7b58241799bfd8e8d4395180bf88b118e..485483524b7f1cf1e2c0833c063cd24637de7751 100644 (file)
@@ -175,8 +175,9 @@ static inline struct device_node *__of_get_dma_parent(const struct device_node *
 }
 #endif
 
+int fdt_scan_reserved_mem(void);
 void fdt_init_reserved_mem(void);
-void fdt_reserved_mem_save_node(unsigned long node, const char *uname,
-                              phys_addr_t base, phys_addr_t size);
+
+bool of_fdt_device_is_available(const void *blob, unsigned long node);
 
 #endif /* _LINUX_OF_PRIVATE_H */
index 7ec94cfcbddb18e3645212e57b6b96d4ee1645c7..8236ecae29533694ae4efbc4251a0182c4af99d6 100644 (file)
@@ -12,6 +12,7 @@
 #define pr_fmt(fmt)    "OF: reserved mem: " fmt
 
 #include <linux/err.h>
+#include <linux/libfdt.h>
 #include <linux/of.h>
 #include <linux/of_fdt.h>
 #include <linux/of_platform.h>
@@ -58,8 +59,8 @@ static int __init early_init_dt_alloc_reserved_memory_arch(phys_addr_t size,
 /*
  * fdt_reserved_mem_save_node() - save fdt node for second pass initialization
  */
-void __init fdt_reserved_mem_save_node(unsigned long node, const char *uname,
-                                     phys_addr_t base, phys_addr_t size)
+static void __init fdt_reserved_mem_save_node(unsigned long node, const char *uname,
+                                             phys_addr_t base, phys_addr_t size)
 {
        struct reserved_mem *rmem = &reserved_mem[reserved_mem_count];
 
@@ -77,6 +78,126 @@ void __init fdt_reserved_mem_save_node(unsigned long node, const char *uname,
        return;
 }
 
+static int __init early_init_dt_reserve_memory(phys_addr_t base,
+                                              phys_addr_t size, bool nomap)
+{
+       if (nomap) {
+               /*
+                * If the memory is already reserved (by another region), we
+                * should not allow it to be marked nomap, but don't worry
+                * if the region isn't memory as it won't be mapped.
+                */
+               if (memblock_overlaps_region(&memblock.memory, base, size) &&
+                   memblock_is_region_reserved(base, size))
+                       return -EBUSY;
+
+               return memblock_mark_nomap(base, size);
+       }
+       return memblock_reserve(base, size);
+}
+
+/*
+ * __reserved_mem_reserve_reg() - reserve all memory described in 'reg' property
+ */
+static int __init __reserved_mem_reserve_reg(unsigned long node,
+                                            const char *uname)
+{
+       int t_len = (dt_root_addr_cells + dt_root_size_cells) * sizeof(__be32);
+       phys_addr_t base, size;
+       int len;
+       const __be32 *prop;
+       int first = 1;
+       bool nomap;
+
+       prop = of_get_flat_dt_prop(node, "reg", &len);
+       if (!prop)
+               return -ENOENT;
+
+       if (len && len % t_len != 0) {
+               pr_err("Reserved memory: invalid reg property in '%s', skipping node.\n",
+                      uname);
+               return -EINVAL;
+       }
+
+       nomap = of_get_flat_dt_prop(node, "no-map", NULL) != NULL;
+
+       while (len >= t_len) {
+               base = dt_mem_next_cell(dt_root_addr_cells, &prop);
+               size = dt_mem_next_cell(dt_root_size_cells, &prop);
+
+               if (size &&
+                   early_init_dt_reserve_memory(base, size, nomap) == 0)
+                       pr_debug("Reserved memory: reserved region for node '%s': base %pa, size %lu MiB\n",
+                               uname, &base, (unsigned long)(size / SZ_1M));
+               else
+                       pr_err("Reserved memory: failed to reserve memory for node '%s': base %pa, size %lu MiB\n",
+                              uname, &base, (unsigned long)(size / SZ_1M));
+
+               len -= t_len;
+               if (first) {
+                       fdt_reserved_mem_save_node(node, uname, base, size);
+                       first = 0;
+               }
+       }
+       return 0;
+}
+
+/*
+ * __reserved_mem_check_root() - check if #size-cells, #address-cells provided
+ * in /reserved-memory matches the values supported by the current implementation,
+ * also check if ranges property has been provided
+ */
+static int __init __reserved_mem_check_root(unsigned long node)
+{
+       const __be32 *prop;
+
+       prop = of_get_flat_dt_prop(node, "#size-cells", NULL);
+       if (!prop || be32_to_cpup(prop) != dt_root_size_cells)
+               return -EINVAL;
+
+       prop = of_get_flat_dt_prop(node, "#address-cells", NULL);
+       if (!prop || be32_to_cpup(prop) != dt_root_addr_cells)
+               return -EINVAL;
+
+       prop = of_get_flat_dt_prop(node, "ranges", NULL);
+       if (!prop)
+               return -EINVAL;
+       return 0;
+}
+
+/*
+ * fdt_scan_reserved_mem() - scan a single FDT node for reserved memory
+ */
+int __init fdt_scan_reserved_mem(void)
+{
+       int node, child;
+       const void *fdt = initial_boot_params;
+
+       node = fdt_path_offset(fdt, "/reserved-memory");
+       if (node < 0)
+               return -ENODEV;
+
+       if (__reserved_mem_check_root(node) != 0) {
+               pr_err("Reserved memory: unsupported node format, ignoring\n");
+               return -EINVAL;
+       }
+
+       fdt_for_each_subnode(child, fdt, node) {
+               const char *uname;
+               int err;
+
+               if (!of_fdt_device_is_available(fdt, child))
+                       continue;
+
+               uname = fdt_get_name(fdt, child, NULL);
+
+               err = __reserved_mem_reserve_reg(child, uname);
+               if (err == -ENOENT && of_get_flat_dt_prop(child, "size", NULL))
+                       fdt_reserved_mem_save_node(child, uname, 0, 0);
+       }
+       return 0;
+}
+
 /*
  * __reserved_mem_alloc_in_range() - allocate reserved memory described with
  *     'alloc-ranges'. Choose bottom-up/top-down depending on nearby existing
diff --git a/drivers/of/of_test.c b/drivers/of/of_test.c
new file mode 100644 (file)
index 0000000..a9301d2
--- /dev/null
@@ -0,0 +1,57 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * KUnit tests for OF APIs
+ */
+#include <linux/module.h>
+#include <linux/of.h>
+
+#include <kunit/test.h>
+
+/*
+ * Test that the root node "/" can be found by path.
+ */
+static void of_dtb_root_node_found_by_path(struct kunit *test)
+{
+       struct device_node *np;
+
+       np = of_find_node_by_path("/");
+       KUNIT_EXPECT_NOT_ERR_OR_NULL(test, np);
+       of_node_put(np);
+}
+
+/*
+ * Test that the 'of_root' global variable is always populated when DT code is
+ * enabled. Remove this test once of_root is removed from global access.
+ */
+static void of_dtb_root_node_populates_of_root(struct kunit *test)
+{
+       KUNIT_EXPECT_NOT_ERR_OR_NULL(test, of_root);
+}
+
+static struct kunit_case of_dtb_test_cases[] = {
+       KUNIT_CASE(of_dtb_root_node_found_by_path),
+       KUNIT_CASE(of_dtb_root_node_populates_of_root),
+       {}
+};
+
+static int of_dtb_test_init(struct kunit *test)
+{
+       if (!IS_ENABLED(CONFIG_OF_EARLY_FLATTREE))
+               kunit_skip(test, "requires CONFIG_OF_EARLY_FLATTREE");
+
+       return 0;
+}
+
+/*
+ * Test suite to confirm a DTB is loaded.
+ */
+static struct kunit_suite of_dtb_suite = {
+       .name = "of_dtb",
+       .test_cases = of_dtb_test_cases,
+       .init = of_dtb_test_init,
+};
+
+kunit_test_suites(
+       &of_dtb_suite,
+);
+MODULE_LICENSE("GPL");
index b7708a06dc7844d2a4ee5eb67b1d3970c78b30df..389d4ea6bfc1591dee8321e99a322916c23b9017 100644 (file)
@@ -166,6 +166,8 @@ static struct platform_device *of_platform_device_create_pdata(
 {
        struct platform_device *dev;
 
+       pr_debug("create platform device: %pOF\n", np);
+
        if (!of_device_is_available(np) ||
            of_node_test_and_set_flag(np, OF_POPULATED))
                return NULL;
@@ -510,9 +512,6 @@ static int __init of_platform_default_populate_init(void)
 
        device_links_supplier_sync_state_pause();
 
-       if (!of_have_populated_dt())
-               return -ENODEV;
-
        if (IS_ENABLED(CONFIG_PPC)) {
                struct device_node *boot_display = NULL;
                struct platform_device *dev;
index fa8cd33be1312dc57f075cf6557270794dcc2939..c907478ef89e67157763d26ea5cddbabfb02ec16 100644 (file)
@@ -665,7 +665,7 @@ struct device_node *of_graph_get_next_endpoint(const struct device_node *parent,
                of_node_put(node);
 
                if (!port) {
-                       pr_err("graph: no port node found in %pOF\n", parent);
+                       pr_debug("graph: no port node found in %pOF\n", parent);
                        return NULL;
                }
        } else {
@@ -814,10 +814,16 @@ struct device_node *of_graph_get_remote_port(const struct device_node *node)
 }
 EXPORT_SYMBOL(of_graph_get_remote_port);
 
-int of_graph_get_endpoint_count(const struct device_node *np)
+/**
+ * of_graph_get_endpoint_count() - get the number of endpoints in a device node
+ * @np: parent device node containing ports and endpoints
+ *
+ * Return: count of endpoint of this device node
+ */
+unsigned int of_graph_get_endpoint_count(const struct device_node *np)
 {
        struct device_node *endpoint;
-       int num = 0;
+       unsigned int num = 0;
 
        for_each_endpoint_of_node(np, endpoint)
                num++;
index d7593bde2d02f39c2532ae4d0be41cccaec38526..6b5c36b6a75862010f443321da4aa9d8d0c1c5eb 100644 (file)
@@ -239,27 +239,22 @@ static void __init of_unittest_dynamic(void)
 
 static int __init of_unittest_check_node_linkage(struct device_node *np)
 {
-       struct device_node *child;
        int count = 0, rc;
 
-       for_each_child_of_node(np, child) {
+       for_each_child_of_node_scoped(np, child) {
                if (child->parent != np) {
                        pr_err("Child node %pOFn links to wrong parent %pOFn\n",
                                 child, np);
-                       rc = -EINVAL;
-                       goto put_child;
+                       return -EINVAL;
                }
 
                rc = of_unittest_check_node_linkage(child);
                if (rc < 0)
-                       goto put_child;
+                       return rc;
                count += rc;
        }
 
        return count + 1;
-put_child:
-       of_node_put(child);
-       return rc;
 }
 
 static void __init of_unittest_check_tree_linkage(void)
@@ -1750,20 +1745,16 @@ static int __init unittest_data_add(void)
                return -EINVAL;
        }
 
+       /* attach the sub-tree to live tree */
        if (!of_root) {
-               of_root = unittest_data_node;
-               for_each_of_allnodes(np)
-                       __of_attach_node_sysfs(np);
-               of_aliases = of_find_node_by_path("/aliases");
-               of_chosen = of_find_node_by_path("/chosen");
-               of_overlay_mutex_unlock();
-               return 0;
+               pr_warn("%s: no live tree to attach sub-tree\n", __func__);
+               kfree(unittest_data);
+               return -ENODEV;
        }
 
        EXPECT_BEGIN(KERN_INFO,
                     "Duplicate name in testcase-data, renamed to \"duplicate-name#1\"");
 
-       /* attach the sub-tree to live tree */
        np = unittest_data_node->child;
        while (np) {
                struct device_node *next = np->sibling;
@@ -4093,10 +4084,6 @@ static int __init of_unittest(void)
        add_taint(TAINT_TEST, LOCKDEP_STILL_OK);
 
        /* adding data for unittest */
-
-       if (IS_ENABLED(CONFIG_UML))
-               unittest_unflatten_overlay_base();
-
        res = unittest_data_add();
        if (res)
                return res;
index 1f75d2416001e9c1a575ae16c0063383d19d5f81..b49cb010a4d80ce75d8c7cafa98c3a9d5f53506f 100644 (file)
@@ -308,15 +308,13 @@ static int hppa_led_generic_probe(struct platform_device *pdev,
        return 0;
 }
 
-static int platform_led_remove(struct platform_device *pdev)
+static void platform_led_remove(struct platform_device *pdev)
 {
        struct hppa_drvdata *p = platform_get_drvdata(pdev);
        int i;
 
        for (i = 0; i < NUM_LEDS_PER_BOARD; i++)
                led_classdev_unregister(&p->leds[i].led_cdev);
-
-       return 0;
 }
 
 static struct led_type mainboard_led_types[NUM_LEDS_PER_BOARD] = {
@@ -371,7 +369,7 @@ MODULE_ALIAS("platform:platform-leds");
 
 static struct platform_driver hppa_mainboard_led_driver = {
        .probe          = platform_led_probe,
-       .remove         = platform_led_remove,
+       .remove_new     = platform_led_remove,
        .driver         = {
                .name   = "platform-leds",
        },
index b33be1e63c98fbf85bb1e39b5f5e3bac9c44716c..c75f55e1250a30af9f30093c8b4409de0f151711 100644 (file)
@@ -892,7 +892,7 @@ static const struct dev_pm_ops pcmcia_socket_pm_ops = {
 
 #endif /* CONFIG_PM */
 
-struct class pcmcia_socket_class = {
+const struct class pcmcia_socket_class = {
        .name = "pcmcia_socket",
        .dev_uevent = pcmcia_socket_uevent,
        .dev_release = pcmcia_release_socket,
index 580369f3c0b06d7e6a2d4ee174c9656c24350ab2..02a83ca44e775034f532e32102efa71919e56c0d 100644 (file)
@@ -113,7 +113,7 @@ struct pcmcia_callback{
 /* cs.c */
 extern struct rw_semaphore pcmcia_socket_list_rwsem;
 extern struct list_head pcmcia_socket_list;
-extern struct class pcmcia_socket_class;
+extern const struct class pcmcia_socket_class;
 
 int pccard_register_pcmcia(struct pcmcia_socket *s, struct pcmcia_callback *c);
 struct pcmcia_socket *pcmcia_get_socket_by_nr(unsigned int nr);
@@ -132,7 +132,7 @@ void pcmcia_put_socket(struct pcmcia_socket *skt);
  * Stuff internal to module "pcmcia".
  */
 /* ds.c */
-extern struct bus_type pcmcia_bus_type;
+extern const struct bus_type pcmcia_bus_type;
 
 struct pcmcia_device;
 
index b4b8363d1de21f3997ee6d7e5a3af65f169a56a5..d3cfd353fb935e2333a459009630dc7179415d06 100644 (file)
@@ -1406,7 +1406,7 @@ static const struct dev_pm_ops pcmcia_bus_pm_ops = {
        SET_SYSTEM_SLEEP_PM_OPS(pcmcia_dev_suspend, pcmcia_dev_resume)
 };
 
-struct bus_type pcmcia_bus_type = {
+const struct bus_type pcmcia_bus_type = {
        .name = "pcmcia",
        .uevent = pcmcia_bus_uevent,
        .match = pcmcia_bus_match,
index e53a9a9317bc2592475c9e5e2085dd28a4858e44..b0f19e95060107431810197bddfd16f292526129 100644 (file)
@@ -683,7 +683,7 @@ static int sun4i_usb_phy0_vbus_notify(struct notifier_block *nb,
 }
 
 static struct phy *sun4i_usb_phy_xlate(struct device *dev,
-                                       struct of_phandle_args *args)
+                                       const struct of_phandle_args *args)
 {
        struct sun4i_usb_phy_data *data = dev_get_drvdata(dev);
 
index 2712c4bd549d87a4607f7f7907ae65d74e2a159d..5468831d6ab9bb5b8ff057d6f8c6061616ff626d 100644 (file)
@@ -350,7 +350,7 @@ static int phy_g12a_usb3_pcie_exit(struct phy *phy)
 }
 
 static struct phy *phy_g12a_usb3_pcie_xlate(struct device *dev,
-                                           struct of_phandle_args *args)
+                                           const struct of_phandle_args *args)
 {
        struct phy_g12a_usb3_pcie_priv *priv = dev_get_drvdata(dev);
        unsigned int mode;
index 8a4aadf166cf9e778af011f235b971d625877215..ff9b3862bf7af42473cf80f739123e54d114787d 100644 (file)
@@ -195,7 +195,7 @@ static const struct phy_ops sr_paxc_phy_ops = {
 };
 
 static struct phy *sr_pcie_phy_xlate(struct device *dev,
-                                    struct of_phandle_args *args)
+                                    const struct of_phandle_args *args)
 {
        struct sr_pcie_phy_core *core;
        int phy_idx;
index b0bd18a5df8794fcc192a5b927ebfa3065c5fa6e..6bcfe83609c865d3c54c87252c8ef622d78b304e 100644 (file)
@@ -209,7 +209,7 @@ static const struct phy_ops sr_phy_ops = {
 };
 
 static struct phy *bcm_usb_phy_xlate(struct device *dev,
-                                    struct of_phandle_args *args)
+                                    const struct of_phandle_args *args)
 {
        struct bcm_usb_phy_cfg *phy_cfg;
        int phy_idx;
index f8183dea774b697a2cfab9fb6f04926029f0c353..647644de041bba86ded64484b751bf781345ac43 100644 (file)
@@ -366,7 +366,7 @@ static const struct phy_ops bcm63xx_usbh_phy_ops = {
 };
 
 static struct phy *bcm63xx_usbh_phy_xlate(struct device *dev,
-                                         struct of_phandle_args *args)
+                                         const struct of_phandle_args *args)
 {
        struct bcm63xx_usbh_phy *usbh = dev_get_drvdata(dev);
 
index a16f0b58eb74538e9bd7a31fe0a1723563f3abeb..ad2eec0956016d96bfe61729545901909d8c593a 100644 (file)
@@ -175,7 +175,7 @@ static const struct phy_ops brcm_usb_phy_ops = {
 };
 
 static struct phy *brcm_usb_phy_xlate(struct device *dev,
-                                     struct of_phandle_args *args)
+                                     const struct of_phandle_args *args)
 {
        struct brcm_usb_phy_data *data = dev_get_drvdata(dev);
 
index a75c96385c57acd9400b67d70f657d4fea1942e7..95924a09960cce0bd0fdeb88639940d584226aad 100644 (file)
@@ -355,7 +355,9 @@ struct cdns_torrent_phy {
        struct reset_control *apb_rst;
        struct device *dev;
        struct clk *clk;
+       struct clk *clk1;
        enum cdns_torrent_ref_clk ref_clk_rate;
+       enum cdns_torrent_ref_clk ref_clk1_rate;
        struct cdns_torrent_inst phys[MAX_NUM_LANES];
        int nsubnodes;
        const struct cdns_torrent_data *init_data;
@@ -2460,9 +2462,11 @@ int cdns_torrent_phy_configure_multilink(struct cdns_torrent_phy *cdns_phy)
 {
        const struct cdns_torrent_data *init_data = cdns_phy->init_data;
        struct cdns_torrent_vals *cmn_vals, *tx_ln_vals, *rx_ln_vals;
+       enum cdns_torrent_ref_clk ref_clk1 = cdns_phy->ref_clk1_rate;
        enum cdns_torrent_ref_clk ref_clk = cdns_phy->ref_clk_rate;
        struct cdns_torrent_vals *link_cmn_vals, *xcvr_diag_vals;
        enum cdns_torrent_phy_type phy_t1, phy_t2;
+       struct cdns_torrent_vals *phy_pma_cmn_vals;
        struct cdns_torrent_vals *pcs_cmn_vals;
        int i, j, node, mlane, num_lanes, ret;
        struct cdns_reg_pairs *reg_pairs;
@@ -2489,6 +2493,7 @@ int cdns_torrent_phy_configure_multilink(struct cdns_torrent_phy *cdns_phy)
                         * Get the array values as [phy_t2][phy_t1][ssc].
                         */
                        swap(phy_t1, phy_t2);
+                       swap(ref_clk, ref_clk1);
                }
 
                mlane = cdns_phy->phys[node].mlane;
@@ -2552,9 +2557,22 @@ int cdns_torrent_phy_configure_multilink(struct cdns_torrent_phy *cdns_phy)
                                             reg_pairs[i].val);
                }
 
+               /* PHY PMA common registers configurations */
+               phy_pma_cmn_vals = cdns_torrent_get_tbl_vals(&init_data->phy_pma_cmn_vals_tbl,
+                                                            CLK_ANY, CLK_ANY,
+                                                            phy_t1, phy_t2, ANY_SSC);
+               if (phy_pma_cmn_vals) {
+                       reg_pairs = phy_pma_cmn_vals->reg_pairs;
+                       num_regs = phy_pma_cmn_vals->num_regs;
+                       regmap = cdns_phy->regmap_phy_pma_common_cdb;
+                       for (i = 0; i < num_regs; i++)
+                               regmap_write(regmap, reg_pairs[i].off,
+                                            reg_pairs[i].val);
+               }
+
                /* PMA common registers configurations */
                cmn_vals = cdns_torrent_get_tbl_vals(&init_data->cmn_vals_tbl,
-                                                    ref_clk, ref_clk,
+                                                    ref_clk, ref_clk1,
                                                     phy_t1, phy_t2, ssc);
                if (cmn_vals) {
                        reg_pairs = cmn_vals->reg_pairs;
@@ -2567,7 +2585,7 @@ int cdns_torrent_phy_configure_multilink(struct cdns_torrent_phy *cdns_phy)
 
                /* PMA TX lane registers configurations */
                tx_ln_vals = cdns_torrent_get_tbl_vals(&init_data->tx_ln_vals_tbl,
-                                                      ref_clk, ref_clk,
+                                                      ref_clk, ref_clk1,
                                                       phy_t1, phy_t2, ssc);
                if (tx_ln_vals) {
                        reg_pairs = tx_ln_vals->reg_pairs;
@@ -2582,7 +2600,7 @@ int cdns_torrent_phy_configure_multilink(struct cdns_torrent_phy *cdns_phy)
 
                /* PMA RX lane registers configurations */
                rx_ln_vals = cdns_torrent_get_tbl_vals(&init_data->rx_ln_vals_tbl,
-                                                      ref_clk, ref_clk,
+                                                      ref_clk, ref_clk1,
                                                       phy_t1, phy_t2, ssc);
                if (rx_ln_vals) {
                        reg_pairs = rx_ln_vals->reg_pairs;
@@ -2684,9 +2702,11 @@ static int cdns_torrent_reset(struct cdns_torrent_phy *cdns_phy)
 static int cdns_torrent_clk(struct cdns_torrent_phy *cdns_phy)
 {
        struct device *dev = cdns_phy->dev;
+       unsigned long ref_clk1_rate;
        unsigned long ref_clk_rate;
        int ret;
 
+       /* refclk: Input reference clock for PLL0 */
        cdns_phy->clk = devm_clk_get(dev, "refclk");
        if (IS_ERR(cdns_phy->clk)) {
                dev_err(dev, "phy ref clock not found\n");
@@ -2695,15 +2715,15 @@ static int cdns_torrent_clk(struct cdns_torrent_phy *cdns_phy)
 
        ret = clk_prepare_enable(cdns_phy->clk);
        if (ret) {
-               dev_err(cdns_phy->dev, "Failed to prepare ref clock\n");
+               dev_err(cdns_phy->dev, "Failed to prepare ref clock: %d\n", ret);
                return ret;
        }
 
        ref_clk_rate = clk_get_rate(cdns_phy->clk);
        if (!ref_clk_rate) {
                dev_err(cdns_phy->dev, "Failed to get ref clock rate\n");
-               clk_disable_unprepare(cdns_phy->clk);
-               return -EINVAL;
+               ret = -EINVAL;
+               goto disable_clk;
        }
 
        switch (ref_clk_rate) {
@@ -2720,12 +2740,62 @@ static int cdns_torrent_clk(struct cdns_torrent_phy *cdns_phy)
                cdns_phy->ref_clk_rate = CLK_156_25_MHZ;
                break;
        default:
-               dev_err(cdns_phy->dev, "Invalid Ref Clock Rate\n");
-               clk_disable_unprepare(cdns_phy->clk);
-               return -EINVAL;
+               dev_err(cdns_phy->dev, "Invalid ref clock rate\n");
+               ret = -EINVAL;
+               goto disable_clk;
+       }
+
+       /* refclk1: Input reference clock for PLL1 */
+       cdns_phy->clk1 = devm_clk_get_optional(dev, "pll1_refclk");
+       if (IS_ERR(cdns_phy->clk1)) {
+               dev_err(dev, "phy PLL1 ref clock not found\n");
+               ret = PTR_ERR(cdns_phy->clk1);
+               goto disable_clk;
+       }
+
+       if (cdns_phy->clk1) {
+               ret = clk_prepare_enable(cdns_phy->clk1);
+               if (ret) {
+                       dev_err(cdns_phy->dev, "Failed to prepare PLL1 ref clock: %d\n", ret);
+                       goto disable_clk;
+               }
+
+               ref_clk1_rate = clk_get_rate(cdns_phy->clk1);
+               if (!ref_clk1_rate) {
+                       dev_err(cdns_phy->dev, "Failed to get PLL1 ref clock rate\n");
+                       ret = -EINVAL;
+                       goto disable_clk1;
+               }
+
+               switch (ref_clk1_rate) {
+               case REF_CLK_19_2MHZ:
+                       cdns_phy->ref_clk1_rate = CLK_19_2_MHZ;
+                       break;
+               case REF_CLK_25MHZ:
+                       cdns_phy->ref_clk1_rate = CLK_25_MHZ;
+                       break;
+               case REF_CLK_100MHZ:
+                       cdns_phy->ref_clk1_rate = CLK_100_MHZ;
+                       break;
+               case REF_CLK_156_25MHZ:
+                       cdns_phy->ref_clk1_rate = CLK_156_25_MHZ;
+                       break;
+               default:
+                       dev_err(cdns_phy->dev, "Invalid PLL1 ref clock rate\n");
+                       ret = -EINVAL;
+                       goto disable_clk1;
+               }
+       } else {
+               cdns_phy->ref_clk1_rate = cdns_phy->ref_clk_rate;
        }
 
        return 0;
+
+disable_clk1:
+       clk_disable_unprepare(cdns_phy->clk1);
+disable_clk:
+       clk_disable_unprepare(cdns_phy->clk);
+       return ret;
 }
 
 static int cdns_torrent_phy_probe(struct platform_device *pdev)
@@ -2980,6 +3050,7 @@ put_lnk_rst:
                reset_control_put(cdns_phy->phys[i].lnk_rst);
        of_node_put(child);
        reset_control_assert(cdns_phy->apb_rst);
+       clk_disable_unprepare(cdns_phy->clk1);
        clk_disable_unprepare(cdns_phy->clk);
 clk_cleanup:
        cdns_torrent_clk_cleanup(cdns_phy);
@@ -2998,6 +3069,7 @@ static void cdns_torrent_phy_remove(struct platform_device *pdev)
                reset_control_put(cdns_phy->phys[i].lnk_rst);
        }
 
+       clk_disable_unprepare(cdns_phy->clk1);
        clk_disable_unprepare(cdns_phy->clk);
        cdns_torrent_clk_cleanup(cdns_phy);
 }
@@ -3034,6 +3106,216 @@ static struct cdns_torrent_vals dp_usb_xcvr_diag_ln_vals = {
        .num_regs = ARRAY_SIZE(dp_usb_xcvr_diag_ln_regs),
 };
 
+/* USXGMII and SGMII/QSGMII link configuration */
+static struct cdns_reg_pairs usxgmii_sgmii_link_cmn_regs[] = {
+       {0x0002, PHY_PLL_CFG},
+       {0x0400, CMN_PDIAG_PLL0_CLK_SEL_M0},
+       {0x0601, CMN_PDIAG_PLL1_CLK_SEL_M0}
+};
+
+static struct cdns_reg_pairs usxgmii_sgmii_xcvr_diag_ln_regs[] = {
+       {0x0000, XCVR_DIAG_HSCLK_SEL},
+       {0x0001, XCVR_DIAG_HSCLK_DIV},
+       {0x0001, XCVR_DIAG_PLLDRC_CTRL}
+};
+
+static struct cdns_reg_pairs sgmii_usxgmii_xcvr_diag_ln_regs[] = {
+       {0x0111, XCVR_DIAG_HSCLK_SEL},
+       {0x0103, XCVR_DIAG_HSCLK_DIV},
+       {0x0A9B, XCVR_DIAG_PLLDRC_CTRL}
+};
+
+static struct cdns_torrent_vals usxgmii_sgmii_link_cmn_vals = {
+       .reg_pairs = usxgmii_sgmii_link_cmn_regs,
+       .num_regs = ARRAY_SIZE(usxgmii_sgmii_link_cmn_regs),
+};
+
+static struct cdns_torrent_vals usxgmii_sgmii_xcvr_diag_ln_vals = {
+       .reg_pairs = usxgmii_sgmii_xcvr_diag_ln_regs,
+       .num_regs = ARRAY_SIZE(usxgmii_sgmii_xcvr_diag_ln_regs),
+};
+
+static struct cdns_torrent_vals sgmii_usxgmii_xcvr_diag_ln_vals = {
+       .reg_pairs = sgmii_usxgmii_xcvr_diag_ln_regs,
+       .num_regs = ARRAY_SIZE(sgmii_usxgmii_xcvr_diag_ln_regs),
+};
+
+/* Multilink USXGMII, using PLL0, 156.25 MHz Ref clk, no SSC */
+static struct cdns_reg_pairs ml_usxgmii_pll0_156_25_no_ssc_cmn_regs[] = {
+       {0x0014, CMN_PLL0_DSM_FBH_OVRD_M0},
+       {0x0005, CMN_PLL0_DSM_FBL_OVRD_M0},
+       {0x061B, CMN_PLL0_VCOCAL_INIT_TMR},
+       {0x0019, CMN_PLL0_VCOCAL_ITER_TMR},
+       {0x1354, CMN_PLL0_VCOCAL_REFTIM_START},
+       {0x1354, CMN_PLL0_VCOCAL_PLLCNT_START},
+       {0x0003, CMN_PLL0_VCOCAL_TCTRL},
+       {0x0138, CMN_PLL0_LOCK_REFCNT_START},
+       {0x0138, CMN_PLL0_LOCK_PLLCNT_START}
+};
+
+static struct cdns_torrent_vals ml_usxgmii_pll0_156_25_no_ssc_cmn_vals = {
+       .reg_pairs = ml_usxgmii_pll0_156_25_no_ssc_cmn_regs,
+       .num_regs = ARRAY_SIZE(ml_usxgmii_pll0_156_25_no_ssc_cmn_regs),
+};
+
+/* Multilink SGMII/QSGMII, using PLL1, 100 MHz Ref clk, no SSC */
+static struct cdns_reg_pairs ml_sgmii_pll1_100_no_ssc_cmn_regs[] = {
+       {0x0028, CMN_PDIAG_PLL1_CP_PADJ_M0},
+       {0x001E, CMN_PLL1_DSM_FBH_OVRD_M0},
+       {0x000C, CMN_PLL1_DSM_FBL_OVRD_M0},
+       {0x0003, CMN_PLL1_VCOCAL_TCTRL},
+       {0x007F, CMN_TXPUCAL_TUNE},
+       {0x007F, CMN_TXPDCAL_TUNE}
+};
+
+static struct cdns_torrent_vals ml_sgmii_pll1_100_no_ssc_cmn_vals = {
+       .reg_pairs = ml_sgmii_pll1_100_no_ssc_cmn_regs,
+       .num_regs = ARRAY_SIZE(ml_sgmii_pll1_100_no_ssc_cmn_regs),
+};
+
+/* TI J7200, Multilink USXGMII, using PLL0, 156.25 MHz Ref clk, no SSC */
+static struct cdns_reg_pairs j7200_ml_usxgmii_pll0_156_25_no_ssc_cmn_regs[] = {
+       {0x0014, CMN_SSM_BIAS_TMR},
+       {0x0028, CMN_PLLSM0_PLLPRE_TMR},
+       {0x00A4, CMN_PLLSM0_PLLLOCK_TMR},
+       {0x0062, CMN_BGCAL_INIT_TMR},
+       {0x0062, CMN_BGCAL_ITER_TMR},
+       {0x0014, CMN_IBCAL_INIT_TMR},
+       {0x0018, CMN_TXPUCAL_INIT_TMR},
+       {0x0005, CMN_TXPUCAL_ITER_TMR},
+       {0x0018, CMN_TXPDCAL_INIT_TMR},
+       {0x0005, CMN_TXPDCAL_ITER_TMR},
+       {0x024A, CMN_RXCAL_INIT_TMR},
+       {0x0005, CMN_RXCAL_ITER_TMR},
+       {0x000B, CMN_SD_CAL_REFTIM_START},
+       {0x0132, CMN_SD_CAL_PLLCNT_START},
+       {0x0014, CMN_PLL0_DSM_FBH_OVRD_M0},
+       {0x0005, CMN_PLL0_DSM_FBL_OVRD_M0},
+       {0x061B, CMN_PLL0_VCOCAL_INIT_TMR},
+       {0x0019, CMN_PLL0_VCOCAL_ITER_TMR},
+       {0x1354, CMN_PLL0_VCOCAL_REFTIM_START},
+       {0x1354, CMN_PLL0_VCOCAL_PLLCNT_START},
+       {0x0003, CMN_PLL0_VCOCAL_TCTRL},
+       {0x0138, CMN_PLL0_LOCK_REFCNT_START},
+       {0x0138, CMN_PLL0_LOCK_PLLCNT_START}
+};
+
+static struct cdns_torrent_vals j7200_ml_usxgmii_pll0_156_25_no_ssc_cmn_vals = {
+       .reg_pairs = j7200_ml_usxgmii_pll0_156_25_no_ssc_cmn_regs,
+       .num_regs = ARRAY_SIZE(j7200_ml_usxgmii_pll0_156_25_no_ssc_cmn_regs),
+};
+
+/* TI J7200, Multilink SGMII/QSGMII, using PLL1, 100 MHz Ref clk, no SSC */
+static struct cdns_reg_pairs j7200_ml_sgmii_pll1_100_no_ssc_cmn_regs[] = {
+       {0x0028, CMN_PLLSM1_PLLPRE_TMR},
+       {0x00A4, CMN_PLLSM1_PLLLOCK_TMR},
+       {0x0028, CMN_PDIAG_PLL1_CP_PADJ_M0},
+       {0x001E, CMN_PLL1_DSM_FBH_OVRD_M0},
+       {0x000C, CMN_PLL1_DSM_FBL_OVRD_M0},
+       {0x0003, CMN_PLL1_VCOCAL_TCTRL},
+       {0x007F, CMN_TXPUCAL_TUNE},
+       {0x007F, CMN_TXPDCAL_TUNE}
+};
+
+static struct cdns_torrent_vals j7200_ml_sgmii_pll1_100_no_ssc_cmn_vals = {
+       .reg_pairs = j7200_ml_sgmii_pll1_100_no_ssc_cmn_regs,
+       .num_regs = ARRAY_SIZE(j7200_ml_sgmii_pll1_100_no_ssc_cmn_regs),
+};
+
+/* PCIe and USXGMII link configuration */
+static struct cdns_reg_pairs pcie_usxgmii_link_cmn_regs[] = {
+       {0x0003, PHY_PLL_CFG},
+       {0x0601, CMN_PDIAG_PLL0_CLK_SEL_M0},
+       {0x0400, CMN_PDIAG_PLL0_CLK_SEL_M1},
+       {0x0400, CMN_PDIAG_PLL1_CLK_SEL_M0}
+};
+
+static struct cdns_reg_pairs pcie_usxgmii_xcvr_diag_ln_regs[] = {
+       {0x0000, XCVR_DIAG_HSCLK_SEL},
+       {0x0001, XCVR_DIAG_HSCLK_DIV},
+       {0x0012, XCVR_DIAG_PLLDRC_CTRL}
+};
+
+static struct cdns_reg_pairs usxgmii_pcie_xcvr_diag_ln_regs[] = {
+       {0x0011, XCVR_DIAG_HSCLK_SEL},
+       {0x0001, XCVR_DIAG_HSCLK_DIV},
+       {0x0089, XCVR_DIAG_PLLDRC_CTRL}
+};
+
+static struct cdns_torrent_vals pcie_usxgmii_link_cmn_vals = {
+       .reg_pairs = pcie_usxgmii_link_cmn_regs,
+       .num_regs = ARRAY_SIZE(pcie_usxgmii_link_cmn_regs),
+};
+
+static struct cdns_torrent_vals pcie_usxgmii_xcvr_diag_ln_vals = {
+       .reg_pairs = pcie_usxgmii_xcvr_diag_ln_regs,
+       .num_regs = ARRAY_SIZE(pcie_usxgmii_xcvr_diag_ln_regs),
+};
+
+static struct cdns_torrent_vals usxgmii_pcie_xcvr_diag_ln_vals = {
+       .reg_pairs = usxgmii_pcie_xcvr_diag_ln_regs,
+       .num_regs = ARRAY_SIZE(usxgmii_pcie_xcvr_diag_ln_regs),
+};
+
+/*
+ * Multilink USXGMII, using PLL1, 156.25 MHz Ref clk, no SSC
+ */
+static struct cdns_reg_pairs ml_usxgmii_pll1_156_25_no_ssc_cmn_regs[] = {
+       {0x0028, CMN_PDIAG_PLL1_CP_PADJ_M0},
+       {0x0014, CMN_PLL1_DSM_FBH_OVRD_M0},
+       {0x0005, CMN_PLL1_DSM_FBL_OVRD_M0},
+       {0x061B, CMN_PLL1_VCOCAL_INIT_TMR},
+       {0x0019, CMN_PLL1_VCOCAL_ITER_TMR},
+       {0x1354, CMN_PLL1_VCOCAL_REFTIM_START},
+       {0x1354, CMN_PLL1_VCOCAL_PLLCNT_START},
+       {0x0003, CMN_PLL1_VCOCAL_TCTRL},
+       {0x0138, CMN_PLL1_LOCK_REFCNT_START},
+       {0x0138, CMN_PLL1_LOCK_PLLCNT_START},
+       {0x007F, CMN_TXPUCAL_TUNE},
+       {0x007F, CMN_TXPDCAL_TUNE}
+};
+
+static struct cdns_reg_pairs ml_usxgmii_156_25_no_ssc_tx_ln_regs[] = {
+       {0x00F3, TX_PSC_A0},
+       {0x04A2, TX_PSC_A2},
+       {0x04A2, TX_PSC_A3 },
+       {0x0000, TX_TXCC_CPOST_MULT_00},
+       {0x0000, XCVR_DIAG_PSC_OVRD}
+};
+
+static struct cdns_reg_pairs ml_usxgmii_156_25_no_ssc_rx_ln_regs[] = {
+       {0x091D, RX_PSC_A0},
+       {0x0900, RX_PSC_A2},
+       {0x0100, RX_PSC_A3},
+       {0x0030, RX_REE_SMGM_CTRL1},
+       {0x03C7, RX_REE_GCSM1_EQENM_PH1},
+       {0x01C7, RX_REE_GCSM1_EQENM_PH2},
+       {0x0000, RX_DIAG_DFE_CTRL},
+       {0x0019, RX_REE_TAP1_CLIP},
+       {0x0019, RX_REE_TAP2TON_CLIP},
+       {0x00B9, RX_DIAG_NQST_CTRL},
+       {0x0C21, RX_DIAG_DFE_AMP_TUNE_2},
+       {0x0002, RX_DIAG_DFE_AMP_TUNE_3},
+       {0x0033, RX_DIAG_PI_RATE},
+       {0x0001, RX_DIAG_ACYA},
+       {0x018C, RX_CDRLF_CNFG}
+};
+
+static struct cdns_torrent_vals ml_usxgmii_pll1_156_25_no_ssc_cmn_vals = {
+       .reg_pairs = ml_usxgmii_pll1_156_25_no_ssc_cmn_regs,
+       .num_regs = ARRAY_SIZE(ml_usxgmii_pll1_156_25_no_ssc_cmn_regs),
+};
+
+static struct cdns_torrent_vals ml_usxgmii_156_25_no_ssc_tx_ln_vals = {
+       .reg_pairs = ml_usxgmii_156_25_no_ssc_tx_ln_regs,
+       .num_regs = ARRAY_SIZE(ml_usxgmii_156_25_no_ssc_tx_ln_regs),
+};
+
+static struct cdns_torrent_vals ml_usxgmii_156_25_no_ssc_rx_ln_vals = {
+       .reg_pairs = ml_usxgmii_156_25_no_ssc_rx_ln_regs,
+       .num_regs = ARRAY_SIZE(ml_usxgmii_156_25_no_ssc_rx_ln_regs),
+};
+
 /* TI USXGMII configuration: Enable cmn_refclk_rcv_out_en */
 static struct cdns_reg_pairs ti_usxgmii_phy_pma_cmn_regs[] = {
        {0x0040, PHY_PMA_CMN_CTRL1},
@@ -3811,6 +4093,50 @@ static struct cdns_torrent_vals sgmii_100_no_ssc_rx_ln_vals = {
        .num_regs = ARRAY_SIZE(sgmii_100_no_ssc_rx_ln_regs),
 };
 
+/* TI J7200, multilink SGMII */
+static struct cdns_reg_pairs j7200_sgmii_100_no_ssc_tx_ln_regs[] = {
+       {0x07A2, TX_RCVDET_ST_TMR},
+       {0x00F3, TX_PSC_A0},
+       {0x04A2, TX_PSC_A2},
+       {0x04A2, TX_PSC_A3 },
+       {0x0000, TX_TXCC_CPOST_MULT_00},
+       {0x00B3, DRV_DIAG_TX_DRV},
+       {0x0002, XCVR_DIAG_PSC_OVRD},
+       {0x4000, XCVR_DIAG_RXCLK_CTRL}
+};
+
+static struct cdns_torrent_vals j7200_sgmii_100_no_ssc_tx_ln_vals = {
+       .reg_pairs = j7200_sgmii_100_no_ssc_tx_ln_regs,
+       .num_regs = ARRAY_SIZE(j7200_sgmii_100_no_ssc_tx_ln_regs),
+};
+
+static struct cdns_reg_pairs j7200_sgmii_100_no_ssc_rx_ln_regs[] = {
+       {0x0014, RX_SDCAL0_INIT_TMR},
+       {0x0062, RX_SDCAL0_ITER_TMR},
+       {0x0014, RX_SDCAL1_INIT_TMR},
+       {0x0062, RX_SDCAL1_ITER_TMR},
+       {0x091D, RX_PSC_A0},
+       {0x0900, RX_PSC_A2},
+       {0x0100, RX_PSC_A3},
+       {0x03C7, RX_REE_GCSM1_EQENM_PH1},
+       {0x01C7, RX_REE_GCSM1_EQENM_PH2},
+       {0x0000, RX_DIAG_DFE_CTRL},
+       {0x0019, RX_REE_TAP1_CLIP},
+       {0x0019, RX_REE_TAP2TON_CLIP},
+       {0x0098, RX_DIAG_NQST_CTRL},
+       {0x0C01, RX_DIAG_DFE_AMP_TUNE_2},
+       {0x0000, RX_DIAG_DFE_AMP_TUNE_3},
+       {0x0000, RX_DIAG_PI_CAP},
+       {0x0010, RX_DIAG_PI_RATE},
+       {0x0001, RX_DIAG_ACYA},
+       {0x018C, RX_CDRLF_CNFG}
+};
+
+static struct cdns_torrent_vals j7200_sgmii_100_no_ssc_rx_ln_vals = {
+       .reg_pairs = j7200_sgmii_100_no_ssc_rx_ln_regs,
+       .num_regs = ARRAY_SIZE(j7200_sgmii_100_no_ssc_rx_ln_regs),
+};
+
 /* SGMII 100 MHz Ref clk, internal SSC */
 static struct cdns_reg_pairs sgmii_100_int_ssc_cmn_regs[] = {
        {0x0004, CMN_PLL0_DSM_DIAG_M0},
@@ -3944,6 +4270,51 @@ static struct cdns_torrent_vals qsgmii_100_no_ssc_rx_ln_vals = {
        .num_regs = ARRAY_SIZE(qsgmii_100_no_ssc_rx_ln_regs),
 };
 
+/* TI J7200, multilink QSGMII */
+static struct cdns_reg_pairs j7200_qsgmii_100_no_ssc_tx_ln_regs[] = {
+       {0x07A2, TX_RCVDET_ST_TMR},
+       {0x00F3, TX_PSC_A0},
+       {0x04A2, TX_PSC_A2},
+       {0x04A2, TX_PSC_A3 },
+       {0x0000, TX_TXCC_CPOST_MULT_00},
+       {0x0011, TX_TXCC_MGNFS_MULT_100},
+       {0x0003, DRV_DIAG_TX_DRV},
+       {0x0002, XCVR_DIAG_PSC_OVRD},
+       {0x4000, XCVR_DIAG_RXCLK_CTRL}
+};
+
+static struct cdns_torrent_vals j7200_qsgmii_100_no_ssc_tx_ln_vals = {
+       .reg_pairs = j7200_qsgmii_100_no_ssc_tx_ln_regs,
+       .num_regs = ARRAY_SIZE(j7200_qsgmii_100_no_ssc_tx_ln_regs),
+};
+
+static struct cdns_reg_pairs j7200_qsgmii_100_no_ssc_rx_ln_regs[] = {
+       {0x0014, RX_SDCAL0_INIT_TMR},
+       {0x0062, RX_SDCAL0_ITER_TMR},
+       {0x0014, RX_SDCAL1_INIT_TMR},
+       {0x0062, RX_SDCAL1_ITER_TMR},
+       {0x091D, RX_PSC_A0},
+       {0x0900, RX_PSC_A2},
+       {0x0100, RX_PSC_A3},
+       {0x03C7, RX_REE_GCSM1_EQENM_PH1},
+       {0x01C7, RX_REE_GCSM1_EQENM_PH2},
+       {0x0000, RX_DIAG_DFE_CTRL},
+       {0x0019, RX_REE_TAP1_CLIP},
+       {0x0019, RX_REE_TAP2TON_CLIP},
+       {0x0098, RX_DIAG_NQST_CTRL},
+       {0x0C01, RX_DIAG_DFE_AMP_TUNE_2},
+       {0x0000, RX_DIAG_DFE_AMP_TUNE_3},
+       {0x0000, RX_DIAG_PI_CAP},
+       {0x0010, RX_DIAG_PI_RATE},
+       {0x0001, RX_DIAG_ACYA},
+       {0x018C, RX_CDRLF_CNFG}
+};
+
+static struct cdns_torrent_vals j7200_qsgmii_100_no_ssc_rx_ln_vals = {
+       .reg_pairs = j7200_qsgmii_100_no_ssc_rx_ln_regs,
+       .num_regs = ARRAY_SIZE(j7200_qsgmii_100_no_ssc_rx_ln_regs),
+};
+
 /* QSGMII 100 MHz Ref clk, internal SSC */
 static struct cdns_reg_pairs qsgmii_100_int_ssc_cmn_regs[] = {
        {0x0004, CMN_PLL0_DSM_DIAG_M0},
@@ -4166,14 +4537,17 @@ static struct cdns_torrent_vals_entry link_cmn_vals_entries[] = {
        {CDNS_TORRENT_KEY_ANYCLK(TYPE_PCIE, TYPE_QSGMII), &pcie_sgmii_link_cmn_vals},
        {CDNS_TORRENT_KEY_ANYCLK(TYPE_PCIE, TYPE_USB), &pcie_usb_link_cmn_vals},
        {CDNS_TORRENT_KEY_ANYCLK(TYPE_PCIE, TYPE_DP), &pcie_dp_link_cmn_vals},
+       {CDNS_TORRENT_KEY_ANYCLK(TYPE_PCIE, TYPE_USXGMII), &pcie_usxgmii_link_cmn_vals},
 
        {CDNS_TORRENT_KEY_ANYCLK(TYPE_SGMII, TYPE_NONE), &sl_sgmii_link_cmn_vals},
        {CDNS_TORRENT_KEY_ANYCLK(TYPE_SGMII, TYPE_PCIE), &pcie_sgmii_link_cmn_vals},
        {CDNS_TORRENT_KEY_ANYCLK(TYPE_SGMII, TYPE_USB), &usb_sgmii_link_cmn_vals},
+       {CDNS_TORRENT_KEY_ANYCLK(TYPE_SGMII, TYPE_USXGMII), &usxgmii_sgmii_link_cmn_vals},
 
        {CDNS_TORRENT_KEY_ANYCLK(TYPE_QSGMII, TYPE_NONE), &sl_sgmii_link_cmn_vals},
        {CDNS_TORRENT_KEY_ANYCLK(TYPE_QSGMII, TYPE_PCIE), &pcie_sgmii_link_cmn_vals},
        {CDNS_TORRENT_KEY_ANYCLK(TYPE_QSGMII, TYPE_USB), &usb_sgmii_link_cmn_vals},
+       {CDNS_TORRENT_KEY_ANYCLK(TYPE_QSGMII, TYPE_USXGMII), &usxgmii_sgmii_link_cmn_vals},
 
        {CDNS_TORRENT_KEY_ANYCLK(TYPE_USB, TYPE_NONE), &sl_usb_link_cmn_vals},
        {CDNS_TORRENT_KEY_ANYCLK(TYPE_USB, TYPE_PCIE), &pcie_usb_link_cmn_vals},
@@ -4182,6 +4556,9 @@ static struct cdns_torrent_vals_entry link_cmn_vals_entries[] = {
        {CDNS_TORRENT_KEY_ANYCLK(TYPE_USB, TYPE_DP), &usb_dp_link_cmn_vals},
 
        {CDNS_TORRENT_KEY_ANYCLK(TYPE_USXGMII, TYPE_NONE), &sl_usxgmii_link_cmn_vals},
+       {CDNS_TORRENT_KEY_ANYCLK(TYPE_USXGMII, TYPE_PCIE), &pcie_usxgmii_link_cmn_vals},
+       {CDNS_TORRENT_KEY_ANYCLK(TYPE_USXGMII, TYPE_SGMII), &usxgmii_sgmii_link_cmn_vals},
+       {CDNS_TORRENT_KEY_ANYCLK(TYPE_USXGMII, TYPE_QSGMII), &usxgmii_sgmii_link_cmn_vals},
 };
 
 static struct cdns_torrent_vals_entry xcvr_diag_vals_entries[] = {
@@ -4194,14 +4571,17 @@ static struct cdns_torrent_vals_entry xcvr_diag_vals_entries[] = {
        {CDNS_TORRENT_KEY_ANYCLK(TYPE_PCIE, TYPE_QSGMII), &pcie_sgmii_xcvr_diag_ln_vals},
        {CDNS_TORRENT_KEY_ANYCLK(TYPE_PCIE, TYPE_USB), &pcie_usb_xcvr_diag_ln_vals},
        {CDNS_TORRENT_KEY_ANYCLK(TYPE_PCIE, TYPE_DP), &pcie_dp_xcvr_diag_ln_vals},
+       {CDNS_TORRENT_KEY_ANYCLK(TYPE_PCIE, TYPE_USXGMII), &pcie_usxgmii_xcvr_diag_ln_vals},
 
        {CDNS_TORRENT_KEY_ANYCLK(TYPE_SGMII, TYPE_NONE), &sl_sgmii_xcvr_diag_ln_vals},
        {CDNS_TORRENT_KEY_ANYCLK(TYPE_SGMII, TYPE_PCIE), &sgmii_pcie_xcvr_diag_ln_vals},
        {CDNS_TORRENT_KEY_ANYCLK(TYPE_SGMII, TYPE_USB), &sgmii_usb_xcvr_diag_ln_vals},
+       {CDNS_TORRENT_KEY_ANYCLK(TYPE_SGMII, TYPE_USXGMII), &sgmii_usxgmii_xcvr_diag_ln_vals},
 
        {CDNS_TORRENT_KEY_ANYCLK(TYPE_QSGMII, TYPE_NONE), &sl_sgmii_xcvr_diag_ln_vals},
        {CDNS_TORRENT_KEY_ANYCLK(TYPE_QSGMII, TYPE_PCIE), &sgmii_pcie_xcvr_diag_ln_vals},
        {CDNS_TORRENT_KEY_ANYCLK(TYPE_QSGMII, TYPE_USB), &sgmii_usb_xcvr_diag_ln_vals},
+       {CDNS_TORRENT_KEY_ANYCLK(TYPE_QSGMII, TYPE_USXGMII), &sgmii_usxgmii_xcvr_diag_ln_vals},
 
        {CDNS_TORRENT_KEY_ANYCLK(TYPE_USB, TYPE_NONE), &sl_usb_xcvr_diag_ln_vals},
        {CDNS_TORRENT_KEY_ANYCLK(TYPE_USB, TYPE_PCIE), &usb_pcie_xcvr_diag_ln_vals},
@@ -4210,6 +4590,9 @@ static struct cdns_torrent_vals_entry xcvr_diag_vals_entries[] = {
        {CDNS_TORRENT_KEY_ANYCLK(TYPE_USB, TYPE_DP), &usb_dp_xcvr_diag_ln_vals},
 
        {CDNS_TORRENT_KEY_ANYCLK(TYPE_USXGMII, TYPE_NONE), &sl_usxgmii_xcvr_diag_ln_vals},
+       {CDNS_TORRENT_KEY_ANYCLK(TYPE_USXGMII, TYPE_PCIE), &usxgmii_pcie_xcvr_diag_ln_vals},
+       {CDNS_TORRENT_KEY_ANYCLK(TYPE_USXGMII, TYPE_SGMII), &usxgmii_sgmii_xcvr_diag_ln_vals},
+       {CDNS_TORRENT_KEY_ANYCLK(TYPE_USXGMII, TYPE_QSGMII), &usxgmii_sgmii_xcvr_diag_ln_vals},
 };
 
 static struct cdns_torrent_vals_entry pcs_cmn_vals_entries[] = {
@@ -4285,6 +4668,17 @@ static struct cdns_torrent_vals_entry cmn_vals_entries[] = {
        {CDNS_TORRENT_KEY(CLK_100_MHZ, CLK_100_MHZ, TYPE_USB, TYPE_DP, NO_SSC), &usb_100_no_ssc_cmn_vals},
 
        {CDNS_TORRENT_KEY(CLK_156_25_MHZ, CLK_156_25_MHZ, TYPE_USXGMII, TYPE_NONE, NO_SSC), &sl_usxgmii_156_25_no_ssc_cmn_vals},
+
+       /* Dual refclk */
+       {CDNS_TORRENT_KEY(CLK_100_MHZ, CLK_156_25_MHZ, TYPE_PCIE, TYPE_USXGMII, NO_SSC), NULL},
+
+       {CDNS_TORRENT_KEY(CLK_100_MHZ, CLK_156_25_MHZ, TYPE_SGMII, TYPE_USXGMII, NO_SSC), &ml_sgmii_pll1_100_no_ssc_cmn_vals},
+
+       {CDNS_TORRENT_KEY(CLK_100_MHZ, CLK_156_25_MHZ, TYPE_QSGMII, TYPE_USXGMII, NO_SSC), &ml_sgmii_pll1_100_no_ssc_cmn_vals},
+
+       {CDNS_TORRENT_KEY(CLK_156_25_MHZ, CLK_100_MHZ, TYPE_USXGMII, TYPE_PCIE, NO_SSC), &ml_usxgmii_pll1_156_25_no_ssc_cmn_vals},
+       {CDNS_TORRENT_KEY(CLK_156_25_MHZ, CLK_100_MHZ, TYPE_USXGMII, TYPE_SGMII, NO_SSC), &ml_usxgmii_pll0_156_25_no_ssc_cmn_vals},
+       {CDNS_TORRENT_KEY(CLK_156_25_MHZ, CLK_100_MHZ, TYPE_USXGMII, TYPE_QSGMII, NO_SSC), &ml_usxgmii_pll0_156_25_no_ssc_cmn_vals},
 };
 
 static struct cdns_torrent_vals_entry cdns_tx_ln_vals_entries[] = {
@@ -4352,6 +4746,17 @@ static struct cdns_torrent_vals_entry cdns_tx_ln_vals_entries[] = {
        {CDNS_TORRENT_KEY(CLK_100_MHZ, CLK_100_MHZ, TYPE_USB, TYPE_DP, NO_SSC), &usb_100_no_ssc_tx_ln_vals},
 
        {CDNS_TORRENT_KEY(CLK_156_25_MHZ, CLK_156_25_MHZ, TYPE_USXGMII, TYPE_NONE, NO_SSC), &usxgmii_156_25_no_ssc_tx_ln_vals},
+
+       /* Dual refclk */
+       {CDNS_TORRENT_KEY(CLK_100_MHZ, CLK_156_25_MHZ, TYPE_PCIE, TYPE_USXGMII, NO_SSC), NULL},
+
+       {CDNS_TORRENT_KEY(CLK_100_MHZ, CLK_156_25_MHZ, TYPE_SGMII, TYPE_USXGMII, NO_SSC), &sgmii_100_no_ssc_tx_ln_vals},
+
+       {CDNS_TORRENT_KEY(CLK_100_MHZ, CLK_156_25_MHZ, TYPE_QSGMII, TYPE_USXGMII, NO_SSC), &qsgmii_100_no_ssc_tx_ln_vals},
+
+       {CDNS_TORRENT_KEY(CLK_156_25_MHZ, CLK_100_MHZ, TYPE_USXGMII, TYPE_PCIE, NO_SSC), &ml_usxgmii_156_25_no_ssc_tx_ln_vals},
+       {CDNS_TORRENT_KEY(CLK_156_25_MHZ, CLK_100_MHZ, TYPE_USXGMII, TYPE_SGMII, NO_SSC), &ml_usxgmii_156_25_no_ssc_tx_ln_vals},
+       {CDNS_TORRENT_KEY(CLK_156_25_MHZ, CLK_100_MHZ, TYPE_USXGMII, TYPE_QSGMII, NO_SSC), &ml_usxgmii_156_25_no_ssc_tx_ln_vals},
 };
 
 static struct cdns_torrent_vals_entry cdns_rx_ln_vals_entries[] = {
@@ -4419,6 +4824,17 @@ static struct cdns_torrent_vals_entry cdns_rx_ln_vals_entries[] = {
        {CDNS_TORRENT_KEY(CLK_100_MHZ, CLK_100_MHZ, TYPE_USB, TYPE_DP, NO_SSC), &usb_100_no_ssc_rx_ln_vals},
 
        {CDNS_TORRENT_KEY(CLK_156_25_MHZ, CLK_156_25_MHZ, TYPE_USXGMII, TYPE_NONE, NO_SSC), &usxgmii_156_25_no_ssc_rx_ln_vals},
+
+       /* Dual refclk */
+       {CDNS_TORRENT_KEY(CLK_100_MHZ, CLK_156_25_MHZ, TYPE_PCIE, TYPE_USXGMII, NO_SSC), &pcie_100_no_ssc_rx_ln_vals},
+
+       {CDNS_TORRENT_KEY(CLK_100_MHZ, CLK_156_25_MHZ, TYPE_SGMII, TYPE_USXGMII, NO_SSC), &sgmii_100_no_ssc_rx_ln_vals},
+
+       {CDNS_TORRENT_KEY(CLK_100_MHZ, CLK_156_25_MHZ, TYPE_QSGMII, TYPE_USXGMII, NO_SSC), &qsgmii_100_no_ssc_rx_ln_vals},
+
+       {CDNS_TORRENT_KEY(CLK_156_25_MHZ, CLK_100_MHZ, TYPE_USXGMII, TYPE_PCIE, NO_SSC), &ml_usxgmii_156_25_no_ssc_rx_ln_vals},
+       {CDNS_TORRENT_KEY(CLK_156_25_MHZ, CLK_100_MHZ, TYPE_USXGMII, TYPE_SGMII, NO_SSC), &ml_usxgmii_156_25_no_ssc_rx_ln_vals},
+       {CDNS_TORRENT_KEY(CLK_156_25_MHZ, CLK_100_MHZ, TYPE_USXGMII, TYPE_QSGMII, NO_SSC), &ml_usxgmii_156_25_no_ssc_rx_ln_vals},
 };
 
 static const struct cdns_torrent_data cdns_map_torrent = {
@@ -4452,6 +4868,9 @@ static const struct cdns_torrent_data cdns_map_torrent = {
 
 static struct cdns_torrent_vals_entry j721e_phy_pma_cmn_vals_entries[] = {
        {CDNS_TORRENT_KEY_ANYCLK(TYPE_USXGMII, TYPE_NONE), &ti_usxgmii_phy_pma_cmn_vals},
+       {CDNS_TORRENT_KEY_ANYCLK(TYPE_USXGMII, TYPE_PCIE), &ti_usxgmii_phy_pma_cmn_vals},
+       {CDNS_TORRENT_KEY_ANYCLK(TYPE_USXGMII, TYPE_SGMII), &ti_usxgmii_phy_pma_cmn_vals},
+       {CDNS_TORRENT_KEY_ANYCLK(TYPE_USXGMII, TYPE_QSGMII), &ti_usxgmii_phy_pma_cmn_vals},
 };
 
 static struct cdns_torrent_vals_entry ti_tx_ln_vals_entries[] = {
@@ -4519,6 +4938,17 @@ static struct cdns_torrent_vals_entry ti_tx_ln_vals_entries[] = {
        {CDNS_TORRENT_KEY(CLK_100_MHZ, CLK_100_MHZ, TYPE_USB, TYPE_DP, NO_SSC), &usb_100_no_ssc_tx_ln_vals},
 
        {CDNS_TORRENT_KEY(CLK_156_25_MHZ, CLK_156_25_MHZ, TYPE_USXGMII, TYPE_NONE, NO_SSC), &usxgmii_156_25_no_ssc_tx_ln_vals},
+
+       /* Dual refclk */
+       {CDNS_TORRENT_KEY(CLK_100_MHZ, CLK_156_25_MHZ, TYPE_PCIE, TYPE_USXGMII, NO_SSC), NULL},
+
+       {CDNS_TORRENT_KEY(CLK_100_MHZ, CLK_156_25_MHZ, TYPE_SGMII, TYPE_USXGMII, NO_SSC), &ti_sgmii_100_no_ssc_tx_ln_vals},
+
+       {CDNS_TORRENT_KEY(CLK_100_MHZ, CLK_156_25_MHZ, TYPE_QSGMII, TYPE_USXGMII, NO_SSC), &ti_qsgmii_100_no_ssc_tx_ln_vals},
+
+       {CDNS_TORRENT_KEY(CLK_156_25_MHZ, CLK_100_MHZ, TYPE_USXGMII, TYPE_PCIE, NO_SSC), &ml_usxgmii_156_25_no_ssc_tx_ln_vals},
+       {CDNS_TORRENT_KEY(CLK_156_25_MHZ, CLK_100_MHZ, TYPE_USXGMII, TYPE_SGMII, NO_SSC), &ml_usxgmii_156_25_no_ssc_tx_ln_vals},
+       {CDNS_TORRENT_KEY(CLK_156_25_MHZ, CLK_100_MHZ, TYPE_USXGMII, TYPE_QSGMII, NO_SSC), &ml_usxgmii_156_25_no_ssc_tx_ln_vals},
 };
 
 static const struct cdns_torrent_data ti_j721e_map_torrent = {
@@ -4554,6 +4984,274 @@ static const struct cdns_torrent_data ti_j721e_map_torrent = {
        },
 };
 
+/* TI J7200 (Torrent SD0805) */
+static struct cdns_torrent_vals_entry ti_j7200_cmn_vals_entries[] = {
+       {CDNS_TORRENT_KEY(CLK_19_2_MHZ, CLK_19_2_MHZ, TYPE_DP, TYPE_NONE, NO_SSC), &sl_dp_19_2_no_ssc_cmn_vals},
+       {CDNS_TORRENT_KEY(CLK_25_MHZ, CLK_25_MHZ, TYPE_DP, TYPE_NONE, NO_SSC), &sl_dp_25_no_ssc_cmn_vals},
+
+       {CDNS_TORRENT_KEY(CLK_100_MHZ, CLK_100_MHZ, TYPE_DP, TYPE_NONE, NO_SSC), &sl_dp_100_no_ssc_cmn_vals},
+       {CDNS_TORRENT_KEY(CLK_100_MHZ, CLK_100_MHZ, TYPE_DP, TYPE_PCIE, NO_SSC), &dp_100_no_ssc_cmn_vals},
+       {CDNS_TORRENT_KEY(CLK_100_MHZ, CLK_100_MHZ, TYPE_DP, TYPE_USB, NO_SSC), &sl_dp_100_no_ssc_cmn_vals},
+
+       {CDNS_TORRENT_KEY(CLK_100_MHZ, CLK_100_MHZ, TYPE_PCIE, TYPE_NONE, NO_SSC), NULL},
+       {CDNS_TORRENT_KEY(CLK_100_MHZ, CLK_100_MHZ, TYPE_PCIE, TYPE_NONE, EXTERNAL_SSC), NULL},
+       {CDNS_TORRENT_KEY(CLK_100_MHZ, CLK_100_MHZ, TYPE_PCIE, TYPE_NONE, INTERNAL_SSC), &sl_pcie_100_int_ssc_cmn_vals},
+
+       {CDNS_TORRENT_KEY(CLK_100_MHZ, CLK_100_MHZ, TYPE_PCIE, TYPE_SGMII, NO_SSC), &pcie_100_no_ssc_cmn_vals},
+       {CDNS_TORRENT_KEY(CLK_100_MHZ, CLK_100_MHZ, TYPE_PCIE, TYPE_SGMII, EXTERNAL_SSC), &pcie_100_no_ssc_cmn_vals},
+       {CDNS_TORRENT_KEY(CLK_100_MHZ, CLK_100_MHZ, TYPE_PCIE, TYPE_SGMII, INTERNAL_SSC), &pcie_100_int_ssc_cmn_vals},
+
+       {CDNS_TORRENT_KEY(CLK_100_MHZ, CLK_100_MHZ, TYPE_PCIE, TYPE_QSGMII, NO_SSC), &pcie_100_no_ssc_cmn_vals},
+       {CDNS_TORRENT_KEY(CLK_100_MHZ, CLK_100_MHZ, TYPE_PCIE, TYPE_QSGMII, EXTERNAL_SSC), &pcie_100_no_ssc_cmn_vals},
+       {CDNS_TORRENT_KEY(CLK_100_MHZ, CLK_100_MHZ, TYPE_PCIE, TYPE_QSGMII, INTERNAL_SSC), &pcie_100_int_ssc_cmn_vals},
+
+       {CDNS_TORRENT_KEY(CLK_100_MHZ, CLK_100_MHZ, TYPE_PCIE, TYPE_USB, NO_SSC), &pcie_100_no_ssc_cmn_vals},
+       {CDNS_TORRENT_KEY(CLK_100_MHZ, CLK_100_MHZ, TYPE_PCIE, TYPE_USB, EXTERNAL_SSC), &pcie_100_no_ssc_cmn_vals},
+       {CDNS_TORRENT_KEY(CLK_100_MHZ, CLK_100_MHZ, TYPE_PCIE, TYPE_USB, INTERNAL_SSC), &pcie_100_int_ssc_cmn_vals},
+
+       {CDNS_TORRENT_KEY(CLK_100_MHZ, CLK_100_MHZ, TYPE_PCIE, TYPE_DP, NO_SSC), NULL},
+
+       {CDNS_TORRENT_KEY(CLK_100_MHZ, CLK_100_MHZ, TYPE_SGMII, TYPE_NONE, NO_SSC), &sl_sgmii_100_no_ssc_cmn_vals},
+
+       {CDNS_TORRENT_KEY(CLK_100_MHZ, CLK_100_MHZ, TYPE_SGMII, TYPE_PCIE, NO_SSC), &sgmii_100_no_ssc_cmn_vals},
+       {CDNS_TORRENT_KEY(CLK_100_MHZ, CLK_100_MHZ, TYPE_SGMII, TYPE_PCIE, EXTERNAL_SSC), &sgmii_100_no_ssc_cmn_vals},
+       {CDNS_TORRENT_KEY(CLK_100_MHZ, CLK_100_MHZ, TYPE_SGMII, TYPE_PCIE, INTERNAL_SSC), &sgmii_100_int_ssc_cmn_vals},
+
+       {CDNS_TORRENT_KEY(CLK_100_MHZ, CLK_100_MHZ, TYPE_SGMII, TYPE_USB, NO_SSC), &sgmii_100_no_ssc_cmn_vals},
+       {CDNS_TORRENT_KEY(CLK_100_MHZ, CLK_100_MHZ, TYPE_SGMII, TYPE_USB, EXTERNAL_SSC), &sgmii_100_no_ssc_cmn_vals},
+       {CDNS_TORRENT_KEY(CLK_100_MHZ, CLK_100_MHZ, TYPE_SGMII, TYPE_USB, INTERNAL_SSC), &sgmii_100_no_ssc_cmn_vals},
+
+       {CDNS_TORRENT_KEY(CLK_100_MHZ, CLK_100_MHZ, TYPE_QSGMII, TYPE_NONE, NO_SSC), &sl_qsgmii_100_no_ssc_cmn_vals},
+
+       {CDNS_TORRENT_KEY(CLK_100_MHZ, CLK_100_MHZ, TYPE_QSGMII, TYPE_PCIE, NO_SSC), &qsgmii_100_no_ssc_cmn_vals},
+       {CDNS_TORRENT_KEY(CLK_100_MHZ, CLK_100_MHZ, TYPE_QSGMII, TYPE_PCIE, EXTERNAL_SSC), &qsgmii_100_no_ssc_cmn_vals},
+       {CDNS_TORRENT_KEY(CLK_100_MHZ, CLK_100_MHZ, TYPE_QSGMII, TYPE_PCIE, INTERNAL_SSC), &qsgmii_100_int_ssc_cmn_vals},
+
+       {CDNS_TORRENT_KEY(CLK_100_MHZ, CLK_100_MHZ, TYPE_QSGMII, TYPE_USB, NO_SSC), &qsgmii_100_no_ssc_cmn_vals},
+       {CDNS_TORRENT_KEY(CLK_100_MHZ, CLK_100_MHZ, TYPE_QSGMII, TYPE_USB, EXTERNAL_SSC), &qsgmii_100_no_ssc_cmn_vals},
+       {CDNS_TORRENT_KEY(CLK_100_MHZ, CLK_100_MHZ, TYPE_QSGMII, TYPE_USB, INTERNAL_SSC), &qsgmii_100_no_ssc_cmn_vals},
+
+       {CDNS_TORRENT_KEY(CLK_100_MHZ, CLK_100_MHZ, TYPE_USB, TYPE_NONE, NO_SSC), &sl_usb_100_no_ssc_cmn_vals},
+       {CDNS_TORRENT_KEY(CLK_100_MHZ, CLK_100_MHZ, TYPE_USB, TYPE_NONE, EXTERNAL_SSC), &sl_usb_100_no_ssc_cmn_vals},
+       {CDNS_TORRENT_KEY(CLK_100_MHZ, CLK_100_MHZ, TYPE_USB, TYPE_NONE, INTERNAL_SSC), &sl_usb_100_int_ssc_cmn_vals},
+
+       {CDNS_TORRENT_KEY(CLK_100_MHZ, CLK_100_MHZ, TYPE_USB, TYPE_PCIE, NO_SSC), &usb_100_no_ssc_cmn_vals},
+       {CDNS_TORRENT_KEY(CLK_100_MHZ, CLK_100_MHZ, TYPE_USB, TYPE_PCIE, EXTERNAL_SSC), &usb_100_no_ssc_cmn_vals},
+       {CDNS_TORRENT_KEY(CLK_100_MHZ, CLK_100_MHZ, TYPE_USB, TYPE_PCIE, INTERNAL_SSC), &usb_100_int_ssc_cmn_vals},
+
+       {CDNS_TORRENT_KEY(CLK_100_MHZ, CLK_100_MHZ, TYPE_USB, TYPE_SGMII, NO_SSC), &sl_usb_100_no_ssc_cmn_vals},
+       {CDNS_TORRENT_KEY(CLK_100_MHZ, CLK_100_MHZ, TYPE_USB, TYPE_SGMII, EXTERNAL_SSC), &sl_usb_100_no_ssc_cmn_vals},
+       {CDNS_TORRENT_KEY(CLK_100_MHZ, CLK_100_MHZ, TYPE_USB, TYPE_SGMII, INTERNAL_SSC), &sl_usb_100_int_ssc_cmn_vals},
+
+       {CDNS_TORRENT_KEY(CLK_100_MHZ, CLK_100_MHZ, TYPE_USB, TYPE_QSGMII, NO_SSC), &sl_usb_100_no_ssc_cmn_vals},
+       {CDNS_TORRENT_KEY(CLK_100_MHZ, CLK_100_MHZ, TYPE_USB, TYPE_QSGMII, EXTERNAL_SSC), &sl_usb_100_no_ssc_cmn_vals},
+       {CDNS_TORRENT_KEY(CLK_100_MHZ, CLK_100_MHZ, TYPE_USB, TYPE_QSGMII, INTERNAL_SSC), &sl_usb_100_int_ssc_cmn_vals},
+
+       {CDNS_TORRENT_KEY(CLK_100_MHZ, CLK_100_MHZ, TYPE_USB, TYPE_DP, NO_SSC), &usb_100_no_ssc_cmn_vals},
+
+       {CDNS_TORRENT_KEY(CLK_156_25_MHZ, CLK_156_25_MHZ, TYPE_USXGMII, TYPE_NONE, NO_SSC), &sl_usxgmii_156_25_no_ssc_cmn_vals},
+
+       /* Dual refclk */
+       {CDNS_TORRENT_KEY(CLK_100_MHZ, CLK_156_25_MHZ, TYPE_PCIE, TYPE_USXGMII, NO_SSC), NULL},
+
+       {CDNS_TORRENT_KEY(CLK_100_MHZ, CLK_156_25_MHZ, TYPE_SGMII, TYPE_USXGMII, NO_SSC), &j7200_ml_sgmii_pll1_100_no_ssc_cmn_vals},
+
+       {CDNS_TORRENT_KEY(CLK_100_MHZ, CLK_156_25_MHZ, TYPE_QSGMII, TYPE_USXGMII, NO_SSC), &j7200_ml_sgmii_pll1_100_no_ssc_cmn_vals},
+
+       {CDNS_TORRENT_KEY(CLK_156_25_MHZ, CLK_100_MHZ, TYPE_USXGMII, TYPE_PCIE, NO_SSC), &ml_usxgmii_pll1_156_25_no_ssc_cmn_vals},
+       {CDNS_TORRENT_KEY(CLK_156_25_MHZ, CLK_100_MHZ, TYPE_USXGMII, TYPE_SGMII, NO_SSC), &j7200_ml_usxgmii_pll0_156_25_no_ssc_cmn_vals},
+       {CDNS_TORRENT_KEY(CLK_156_25_MHZ, CLK_100_MHZ, TYPE_USXGMII, TYPE_QSGMII, NO_SSC), &j7200_ml_usxgmii_pll0_156_25_no_ssc_cmn_vals},
+};
+
+static struct cdns_torrent_vals_entry ti_j7200_tx_ln_vals_entries[] = {
+       {CDNS_TORRENT_KEY(CLK_19_2_MHZ, CLK_19_2_MHZ, TYPE_DP, TYPE_NONE, NO_SSC), &sl_dp_19_2_no_ssc_tx_ln_vals},
+       {CDNS_TORRENT_KEY(CLK_25_MHZ, CLK_25_MHZ, TYPE_DP, TYPE_NONE, NO_SSC), &sl_dp_25_no_ssc_tx_ln_vals},
+
+       {CDNS_TORRENT_KEY(CLK_100_MHZ, CLK_100_MHZ, TYPE_DP, TYPE_NONE, NO_SSC), &sl_dp_100_no_ssc_tx_ln_vals},
+       {CDNS_TORRENT_KEY(CLK_100_MHZ, CLK_100_MHZ, TYPE_DP, TYPE_PCIE, NO_SSC), &dp_100_no_ssc_tx_ln_vals},
+       {CDNS_TORRENT_KEY(CLK_100_MHZ, CLK_100_MHZ, TYPE_DP, TYPE_USB, NO_SSC), &dp_100_no_ssc_tx_ln_vals},
+
+       {CDNS_TORRENT_KEY(CLK_100_MHZ, CLK_100_MHZ, TYPE_PCIE, TYPE_NONE, NO_SSC), NULL},
+       {CDNS_TORRENT_KEY(CLK_100_MHZ, CLK_100_MHZ, TYPE_PCIE, TYPE_NONE, EXTERNAL_SSC), NULL},
+       {CDNS_TORRENT_KEY(CLK_100_MHZ, CLK_100_MHZ, TYPE_PCIE, TYPE_NONE, INTERNAL_SSC), NULL},
+
+       {CDNS_TORRENT_KEY(CLK_100_MHZ, CLK_100_MHZ, TYPE_PCIE, TYPE_SGMII, NO_SSC), NULL},
+       {CDNS_TORRENT_KEY(CLK_100_MHZ, CLK_100_MHZ, TYPE_PCIE, TYPE_SGMII, EXTERNAL_SSC), NULL},
+       {CDNS_TORRENT_KEY(CLK_100_MHZ, CLK_100_MHZ, TYPE_PCIE, TYPE_SGMII, INTERNAL_SSC), NULL},
+
+       {CDNS_TORRENT_KEY(CLK_100_MHZ, CLK_100_MHZ, TYPE_PCIE, TYPE_QSGMII, NO_SSC), NULL},
+       {CDNS_TORRENT_KEY(CLK_100_MHZ, CLK_100_MHZ, TYPE_PCIE, TYPE_QSGMII, EXTERNAL_SSC), NULL},
+       {CDNS_TORRENT_KEY(CLK_100_MHZ, CLK_100_MHZ, TYPE_PCIE, TYPE_QSGMII, INTERNAL_SSC), NULL},
+
+       {CDNS_TORRENT_KEY(CLK_100_MHZ, CLK_100_MHZ, TYPE_PCIE, TYPE_USB, NO_SSC), NULL},
+       {CDNS_TORRENT_KEY(CLK_100_MHZ, CLK_100_MHZ, TYPE_PCIE, TYPE_USB, EXTERNAL_SSC), NULL},
+       {CDNS_TORRENT_KEY(CLK_100_MHZ, CLK_100_MHZ, TYPE_PCIE, TYPE_USB, INTERNAL_SSC), NULL},
+
+       {CDNS_TORRENT_KEY(CLK_100_MHZ, CLK_100_MHZ, TYPE_PCIE, TYPE_DP, NO_SSC), NULL},
+
+       {CDNS_TORRENT_KEY(CLK_100_MHZ, CLK_100_MHZ, TYPE_SGMII, TYPE_NONE, NO_SSC), &ti_sgmii_100_no_ssc_tx_ln_vals},
+
+       {CDNS_TORRENT_KEY(CLK_100_MHZ, CLK_100_MHZ, TYPE_SGMII, TYPE_PCIE, NO_SSC), &ti_sgmii_100_no_ssc_tx_ln_vals},
+       {CDNS_TORRENT_KEY(CLK_100_MHZ, CLK_100_MHZ, TYPE_SGMII, TYPE_PCIE, EXTERNAL_SSC), &ti_sgmii_100_no_ssc_tx_ln_vals},
+       {CDNS_TORRENT_KEY(CLK_100_MHZ, CLK_100_MHZ, TYPE_SGMII, TYPE_PCIE, INTERNAL_SSC), &ti_sgmii_100_no_ssc_tx_ln_vals},
+
+       {CDNS_TORRENT_KEY(CLK_100_MHZ, CLK_100_MHZ, TYPE_SGMII, TYPE_USB, NO_SSC), &ti_sgmii_100_no_ssc_tx_ln_vals},
+       {CDNS_TORRENT_KEY(CLK_100_MHZ, CLK_100_MHZ, TYPE_SGMII, TYPE_USB, EXTERNAL_SSC), &ti_sgmii_100_no_ssc_tx_ln_vals},
+       {CDNS_TORRENT_KEY(CLK_100_MHZ, CLK_100_MHZ, TYPE_SGMII, TYPE_USB, INTERNAL_SSC), &ti_sgmii_100_no_ssc_tx_ln_vals},
+
+       {CDNS_TORRENT_KEY(CLK_100_MHZ, CLK_100_MHZ, TYPE_QSGMII, TYPE_NONE, NO_SSC), &ti_qsgmii_100_no_ssc_tx_ln_vals},
+
+       {CDNS_TORRENT_KEY(CLK_100_MHZ, CLK_100_MHZ, TYPE_QSGMII, TYPE_PCIE, NO_SSC), &ti_qsgmii_100_no_ssc_tx_ln_vals},
+       {CDNS_TORRENT_KEY(CLK_100_MHZ, CLK_100_MHZ, TYPE_QSGMII, TYPE_PCIE, EXTERNAL_SSC), &ti_qsgmii_100_no_ssc_tx_ln_vals},
+       {CDNS_TORRENT_KEY(CLK_100_MHZ, CLK_100_MHZ, TYPE_QSGMII, TYPE_PCIE, INTERNAL_SSC), &ti_qsgmii_100_no_ssc_tx_ln_vals},
+
+       {CDNS_TORRENT_KEY(CLK_100_MHZ, CLK_100_MHZ, TYPE_QSGMII, TYPE_USB, NO_SSC), &ti_qsgmii_100_no_ssc_tx_ln_vals},
+       {CDNS_TORRENT_KEY(CLK_100_MHZ, CLK_100_MHZ, TYPE_QSGMII, TYPE_USB, EXTERNAL_SSC), &ti_qsgmii_100_no_ssc_tx_ln_vals},
+       {CDNS_TORRENT_KEY(CLK_100_MHZ, CLK_100_MHZ, TYPE_QSGMII, TYPE_USB, INTERNAL_SSC), &ti_qsgmii_100_no_ssc_tx_ln_vals},
+
+       {CDNS_TORRENT_KEY(CLK_100_MHZ, CLK_100_MHZ, TYPE_USB, TYPE_NONE, NO_SSC), &usb_100_no_ssc_tx_ln_vals},
+       {CDNS_TORRENT_KEY(CLK_100_MHZ, CLK_100_MHZ, TYPE_USB, TYPE_NONE, EXTERNAL_SSC), &usb_100_no_ssc_tx_ln_vals},
+       {CDNS_TORRENT_KEY(CLK_100_MHZ, CLK_100_MHZ, TYPE_USB, TYPE_NONE, INTERNAL_SSC), &usb_100_no_ssc_tx_ln_vals},
+
+       {CDNS_TORRENT_KEY(CLK_100_MHZ, CLK_100_MHZ, TYPE_USB, TYPE_PCIE, NO_SSC), &usb_100_no_ssc_tx_ln_vals},
+       {CDNS_TORRENT_KEY(CLK_100_MHZ, CLK_100_MHZ, TYPE_USB, TYPE_PCIE, EXTERNAL_SSC), &usb_100_no_ssc_tx_ln_vals},
+       {CDNS_TORRENT_KEY(CLK_100_MHZ, CLK_100_MHZ, TYPE_USB, TYPE_PCIE, INTERNAL_SSC), &usb_100_no_ssc_tx_ln_vals},
+
+       {CDNS_TORRENT_KEY(CLK_100_MHZ, CLK_100_MHZ, TYPE_USB, TYPE_SGMII, NO_SSC), &usb_100_no_ssc_tx_ln_vals},
+       {CDNS_TORRENT_KEY(CLK_100_MHZ, CLK_100_MHZ, TYPE_USB, TYPE_SGMII, EXTERNAL_SSC), &usb_100_no_ssc_tx_ln_vals},
+       {CDNS_TORRENT_KEY(CLK_100_MHZ, CLK_100_MHZ, TYPE_USB, TYPE_SGMII, INTERNAL_SSC), &usb_100_no_ssc_tx_ln_vals},
+
+       {CDNS_TORRENT_KEY(CLK_100_MHZ, CLK_100_MHZ, TYPE_USB, TYPE_QSGMII, NO_SSC), &usb_100_no_ssc_tx_ln_vals},
+       {CDNS_TORRENT_KEY(CLK_100_MHZ, CLK_100_MHZ, TYPE_USB, TYPE_QSGMII, EXTERNAL_SSC), &usb_100_no_ssc_tx_ln_vals},
+       {CDNS_TORRENT_KEY(CLK_100_MHZ, CLK_100_MHZ, TYPE_USB, TYPE_QSGMII, INTERNAL_SSC), &usb_100_no_ssc_tx_ln_vals},
+
+       {CDNS_TORRENT_KEY(CLK_100_MHZ, CLK_100_MHZ, TYPE_USB, TYPE_DP, NO_SSC), &usb_100_no_ssc_tx_ln_vals},
+
+       {CDNS_TORRENT_KEY(CLK_156_25_MHZ, CLK_156_25_MHZ, TYPE_USXGMII, TYPE_NONE, NO_SSC), &usxgmii_156_25_no_ssc_tx_ln_vals},
+
+       /* Dual refclk */
+       {CDNS_TORRENT_KEY(CLK_100_MHZ, CLK_156_25_MHZ, TYPE_PCIE, TYPE_USXGMII, NO_SSC), NULL},
+
+       {CDNS_TORRENT_KEY(CLK_100_MHZ, CLK_156_25_MHZ, TYPE_SGMII, TYPE_USXGMII, NO_SSC), &j7200_sgmii_100_no_ssc_tx_ln_vals},
+
+       {CDNS_TORRENT_KEY(CLK_100_MHZ, CLK_156_25_MHZ, TYPE_QSGMII, TYPE_USXGMII, NO_SSC), &j7200_qsgmii_100_no_ssc_tx_ln_vals},
+
+       {CDNS_TORRENT_KEY(CLK_156_25_MHZ, CLK_100_MHZ, TYPE_USXGMII, TYPE_PCIE, NO_SSC), &ml_usxgmii_156_25_no_ssc_tx_ln_vals},
+       {CDNS_TORRENT_KEY(CLK_156_25_MHZ, CLK_100_MHZ, TYPE_USXGMII, TYPE_SGMII, NO_SSC), &usxgmii_156_25_no_ssc_tx_ln_vals},
+       {CDNS_TORRENT_KEY(CLK_156_25_MHZ, CLK_100_MHZ, TYPE_USXGMII, TYPE_QSGMII, NO_SSC), &usxgmii_156_25_no_ssc_tx_ln_vals},
+};
+
+static struct cdns_torrent_vals_entry ti_j7200_rx_ln_vals_entries[] = {
+       {CDNS_TORRENT_KEY(CLK_19_2_MHZ, CLK_19_2_MHZ, TYPE_DP, TYPE_NONE, NO_SSC), &sl_dp_19_2_no_ssc_rx_ln_vals},
+       {CDNS_TORRENT_KEY(CLK_25_MHZ, CLK_25_MHZ, TYPE_DP, TYPE_NONE, NO_SSC), &sl_dp_25_no_ssc_rx_ln_vals},
+
+       {CDNS_TORRENT_KEY(CLK_100_MHZ, CLK_100_MHZ, TYPE_DP, TYPE_NONE, NO_SSC), &sl_dp_100_no_ssc_rx_ln_vals},
+       {CDNS_TORRENT_KEY(CLK_100_MHZ, CLK_100_MHZ, TYPE_DP, TYPE_PCIE, NO_SSC), &dp_100_no_ssc_rx_ln_vals},
+       {CDNS_TORRENT_KEY(CLK_100_MHZ, CLK_100_MHZ, TYPE_DP, TYPE_USB, NO_SSC), &dp_100_no_ssc_rx_ln_vals},
+
+       {CDNS_TORRENT_KEY(CLK_100_MHZ, CLK_100_MHZ, TYPE_PCIE, TYPE_NONE, NO_SSC), &pcie_100_no_ssc_rx_ln_vals},
+       {CDNS_TORRENT_KEY(CLK_100_MHZ, CLK_100_MHZ, TYPE_PCIE, TYPE_NONE, EXTERNAL_SSC), &pcie_100_no_ssc_rx_ln_vals},
+       {CDNS_TORRENT_KEY(CLK_100_MHZ, CLK_100_MHZ, TYPE_PCIE, TYPE_NONE, INTERNAL_SSC), &pcie_100_no_ssc_rx_ln_vals},
+
+       {CDNS_TORRENT_KEY(CLK_100_MHZ, CLK_100_MHZ, TYPE_PCIE, TYPE_SGMII, NO_SSC), &pcie_100_no_ssc_rx_ln_vals},
+       {CDNS_TORRENT_KEY(CLK_100_MHZ, CLK_100_MHZ, TYPE_PCIE, TYPE_SGMII, EXTERNAL_SSC), &pcie_100_no_ssc_rx_ln_vals},
+       {CDNS_TORRENT_KEY(CLK_100_MHZ, CLK_100_MHZ, TYPE_PCIE, TYPE_SGMII, INTERNAL_SSC), &pcie_100_no_ssc_rx_ln_vals},
+
+       {CDNS_TORRENT_KEY(CLK_100_MHZ, CLK_100_MHZ, TYPE_PCIE, TYPE_QSGMII, NO_SSC), &pcie_100_no_ssc_rx_ln_vals},
+       {CDNS_TORRENT_KEY(CLK_100_MHZ, CLK_100_MHZ, TYPE_PCIE, TYPE_QSGMII, EXTERNAL_SSC), &pcie_100_no_ssc_rx_ln_vals},
+       {CDNS_TORRENT_KEY(CLK_100_MHZ, CLK_100_MHZ, TYPE_PCIE, TYPE_QSGMII, INTERNAL_SSC), &pcie_100_no_ssc_rx_ln_vals},
+
+       {CDNS_TORRENT_KEY(CLK_100_MHZ, CLK_100_MHZ, TYPE_PCIE, TYPE_USB, NO_SSC), &pcie_100_no_ssc_rx_ln_vals},
+       {CDNS_TORRENT_KEY(CLK_100_MHZ, CLK_100_MHZ, TYPE_PCIE, TYPE_USB, EXTERNAL_SSC), &pcie_100_no_ssc_rx_ln_vals},
+       {CDNS_TORRENT_KEY(CLK_100_MHZ, CLK_100_MHZ, TYPE_PCIE, TYPE_USB, INTERNAL_SSC), &pcie_100_no_ssc_rx_ln_vals},
+
+       {CDNS_TORRENT_KEY(CLK_100_MHZ, CLK_100_MHZ, TYPE_PCIE, TYPE_DP, NO_SSC), &pcie_100_no_ssc_rx_ln_vals},
+
+       {CDNS_TORRENT_KEY(CLK_100_MHZ, CLK_100_MHZ, TYPE_SGMII, TYPE_NONE, NO_SSC), &sgmii_100_no_ssc_rx_ln_vals},
+
+       {CDNS_TORRENT_KEY(CLK_100_MHZ, CLK_100_MHZ, TYPE_SGMII, TYPE_PCIE, NO_SSC), &sgmii_100_no_ssc_rx_ln_vals},
+       {CDNS_TORRENT_KEY(CLK_100_MHZ, CLK_100_MHZ, TYPE_SGMII, TYPE_PCIE, EXTERNAL_SSC), &sgmii_100_no_ssc_rx_ln_vals},
+       {CDNS_TORRENT_KEY(CLK_100_MHZ, CLK_100_MHZ, TYPE_SGMII, TYPE_PCIE, INTERNAL_SSC), &sgmii_100_no_ssc_rx_ln_vals},
+
+       {CDNS_TORRENT_KEY(CLK_100_MHZ, CLK_100_MHZ, TYPE_SGMII, TYPE_USB, NO_SSC), &sgmii_100_no_ssc_rx_ln_vals},
+       {CDNS_TORRENT_KEY(CLK_100_MHZ, CLK_100_MHZ, TYPE_SGMII, TYPE_USB, EXTERNAL_SSC), &sgmii_100_no_ssc_rx_ln_vals},
+       {CDNS_TORRENT_KEY(CLK_100_MHZ, CLK_100_MHZ, TYPE_SGMII, TYPE_USB, INTERNAL_SSC), &sgmii_100_no_ssc_rx_ln_vals},
+
+       {CDNS_TORRENT_KEY(CLK_100_MHZ, CLK_100_MHZ, TYPE_QSGMII, TYPE_NONE, NO_SSC), &qsgmii_100_no_ssc_rx_ln_vals},
+
+       {CDNS_TORRENT_KEY(CLK_100_MHZ, CLK_100_MHZ, TYPE_QSGMII, TYPE_PCIE, NO_SSC), &qsgmii_100_no_ssc_rx_ln_vals},
+       {CDNS_TORRENT_KEY(CLK_100_MHZ, CLK_100_MHZ, TYPE_QSGMII, TYPE_PCIE, EXTERNAL_SSC), &qsgmii_100_no_ssc_rx_ln_vals},
+       {CDNS_TORRENT_KEY(CLK_100_MHZ, CLK_100_MHZ, TYPE_QSGMII, TYPE_PCIE, INTERNAL_SSC), &qsgmii_100_no_ssc_rx_ln_vals},
+
+       {CDNS_TORRENT_KEY(CLK_100_MHZ, CLK_100_MHZ, TYPE_QSGMII, TYPE_USB, NO_SSC), &qsgmii_100_no_ssc_rx_ln_vals},
+       {CDNS_TORRENT_KEY(CLK_100_MHZ, CLK_100_MHZ, TYPE_QSGMII, TYPE_USB, EXTERNAL_SSC), &qsgmii_100_no_ssc_rx_ln_vals},
+       {CDNS_TORRENT_KEY(CLK_100_MHZ, CLK_100_MHZ, TYPE_QSGMII, TYPE_USB, INTERNAL_SSC), &qsgmii_100_no_ssc_rx_ln_vals},
+
+       {CDNS_TORRENT_KEY(CLK_100_MHZ, CLK_100_MHZ, TYPE_USB, TYPE_NONE, NO_SSC), &usb_100_no_ssc_rx_ln_vals},
+       {CDNS_TORRENT_KEY(CLK_100_MHZ, CLK_100_MHZ, TYPE_USB, TYPE_NONE, EXTERNAL_SSC), &usb_100_no_ssc_rx_ln_vals},
+       {CDNS_TORRENT_KEY(CLK_100_MHZ, CLK_100_MHZ, TYPE_USB, TYPE_NONE, INTERNAL_SSC), &usb_100_no_ssc_rx_ln_vals},
+
+       {CDNS_TORRENT_KEY(CLK_100_MHZ, CLK_100_MHZ, TYPE_USB, TYPE_PCIE, NO_SSC), &usb_100_no_ssc_rx_ln_vals},
+       {CDNS_TORRENT_KEY(CLK_100_MHZ, CLK_100_MHZ, TYPE_USB, TYPE_PCIE, EXTERNAL_SSC), &usb_100_no_ssc_rx_ln_vals},
+       {CDNS_TORRENT_KEY(CLK_100_MHZ, CLK_100_MHZ, TYPE_USB, TYPE_PCIE, INTERNAL_SSC), &usb_100_no_ssc_rx_ln_vals},
+
+       {CDNS_TORRENT_KEY(CLK_100_MHZ, CLK_100_MHZ, TYPE_USB, TYPE_SGMII, NO_SSC), &usb_100_no_ssc_rx_ln_vals},
+       {CDNS_TORRENT_KEY(CLK_100_MHZ, CLK_100_MHZ, TYPE_USB, TYPE_SGMII, EXTERNAL_SSC), &usb_100_no_ssc_rx_ln_vals},
+       {CDNS_TORRENT_KEY(CLK_100_MHZ, CLK_100_MHZ, TYPE_USB, TYPE_SGMII, INTERNAL_SSC), &usb_100_no_ssc_rx_ln_vals},
+
+       {CDNS_TORRENT_KEY(CLK_100_MHZ, CLK_100_MHZ, TYPE_USB, TYPE_QSGMII, NO_SSC), &usb_100_no_ssc_rx_ln_vals},
+       {CDNS_TORRENT_KEY(CLK_100_MHZ, CLK_100_MHZ, TYPE_USB, TYPE_QSGMII, EXTERNAL_SSC), &usb_100_no_ssc_rx_ln_vals},
+       {CDNS_TORRENT_KEY(CLK_100_MHZ, CLK_100_MHZ, TYPE_USB, TYPE_QSGMII, INTERNAL_SSC), &usb_100_no_ssc_rx_ln_vals},
+
+       {CDNS_TORRENT_KEY(CLK_100_MHZ, CLK_100_MHZ, TYPE_USB, TYPE_DP, NO_SSC), &usb_100_no_ssc_rx_ln_vals},
+
+       {CDNS_TORRENT_KEY(CLK_156_25_MHZ, CLK_156_25_MHZ, TYPE_USXGMII, TYPE_NONE, NO_SSC), &usxgmii_156_25_no_ssc_rx_ln_vals},
+
+       /* Dual refclk */
+       {CDNS_TORRENT_KEY(CLK_100_MHZ, CLK_156_25_MHZ, TYPE_PCIE, TYPE_USXGMII, NO_SSC), &pcie_100_no_ssc_rx_ln_vals},
+
+       {CDNS_TORRENT_KEY(CLK_100_MHZ, CLK_156_25_MHZ, TYPE_SGMII, TYPE_USXGMII, NO_SSC), &j7200_sgmii_100_no_ssc_rx_ln_vals},
+
+       {CDNS_TORRENT_KEY(CLK_100_MHZ, CLK_156_25_MHZ, TYPE_QSGMII, TYPE_USXGMII, NO_SSC), &j7200_qsgmii_100_no_ssc_rx_ln_vals},
+
+       {CDNS_TORRENT_KEY(CLK_156_25_MHZ, CLK_100_MHZ, TYPE_USXGMII, TYPE_PCIE, NO_SSC), &ml_usxgmii_156_25_no_ssc_rx_ln_vals},
+       {CDNS_TORRENT_KEY(CLK_156_25_MHZ, CLK_100_MHZ, TYPE_USXGMII, TYPE_SGMII, NO_SSC), &usxgmii_156_25_no_ssc_rx_ln_vals},
+       {CDNS_TORRENT_KEY(CLK_156_25_MHZ, CLK_100_MHZ, TYPE_USXGMII, TYPE_QSGMII, NO_SSC), &usxgmii_156_25_no_ssc_rx_ln_vals},
+};
+
+static const struct cdns_torrent_data ti_j7200_map_torrent = {
+       .block_offset_shift = 0x0,
+       .reg_offset_shift = 0x1,
+       .link_cmn_vals_tbl = {
+               .entries = link_cmn_vals_entries,
+               .num_entries = ARRAY_SIZE(link_cmn_vals_entries),
+       },
+       .xcvr_diag_vals_tbl = {
+               .entries = xcvr_diag_vals_entries,
+               .num_entries = ARRAY_SIZE(xcvr_diag_vals_entries),
+       },
+       .pcs_cmn_vals_tbl = {
+               .entries = pcs_cmn_vals_entries,
+               .num_entries = ARRAY_SIZE(pcs_cmn_vals_entries),
+       },
+       .phy_pma_cmn_vals_tbl = {
+               .entries = j721e_phy_pma_cmn_vals_entries,
+               .num_entries = ARRAY_SIZE(j721e_phy_pma_cmn_vals_entries),
+       },
+       .cmn_vals_tbl = {
+               .entries = ti_j7200_cmn_vals_entries,
+               .num_entries = ARRAY_SIZE(ti_j7200_cmn_vals_entries),
+       },
+       .tx_ln_vals_tbl = {
+               .entries = ti_j7200_tx_ln_vals_entries,
+               .num_entries = ARRAY_SIZE(ti_j7200_tx_ln_vals_entries),
+       },
+       .rx_ln_vals_tbl = {
+               .entries = ti_j7200_rx_ln_vals_entries,
+               .num_entries = ARRAY_SIZE(ti_j7200_rx_ln_vals_entries),
+       },
+};
+
 static const struct of_device_id cdns_torrent_phy_of_match[] = {
        {
                .compatible = "cdns,torrent-phy",
@@ -4563,6 +5261,10 @@ static const struct of_device_id cdns_torrent_phy_of_match[] = {
                .compatible = "ti,j721e-serdes-10g",
                .data = &ti_j721e_map_torrent,
        },
+       {
+               .compatible = "ti,j7200-serdes-10g",
+               .data = &ti_j7200_map_torrent,
+       },
        {}
 };
 MODULE_DEVICE_TABLE(of, cdns_torrent_phy_of_match);
index 0ae052df3765525ebc1424f81c7fc7200592b08d..38388dd04bdc1d67018c113c1ad5779c50a07196 100644 (file)
@@ -294,7 +294,7 @@ static int mixel_lvds_phy_reset(struct device *dev)
 }
 
 static struct phy *mixel_lvds_phy_xlate(struct device *dev,
-                                       struct of_phandle_args *args)
+                                       const struct of_phandle_args *args)
 {
        struct mixel_lvds_phy_priv *priv = dev_get_drvdata(dev);
        unsigned int phy_id;
index e2187767ce00ce5658109e820c173a8a2fc04d12..b86da8e9daa46549148c2501666e85ddccd1e2e7 100644 (file)
@@ -556,7 +556,7 @@ static void lynx_28g_lane_read_configuration(struct lynx_28g_lane *lane)
 }
 
 static struct phy *lynx_28g_xlate(struct device *dev,
-                                 struct of_phandle_args *args)
+                                 const struct of_phandle_args *args)
 {
        struct lynx_28g_priv *priv = dev_get_drvdata(dev);
        int idx = args->args[0];
index c44588fd5a53e44d7391ec24db6d390fed0065ca..7436dcae398184d5644dc92594f8c316fc3d44d0 100644 (file)
@@ -163,7 +163,7 @@ static const struct phy_ops histb_combphy_ops = {
 };
 
 static struct phy *histb_combphy_xlate(struct device *dev,
-                                      struct of_phandle_args *args)
+                                      const struct of_phandle_args *args)
 {
        struct histb_combphy_priv *priv = dev_get_drvdata(dev);
        struct histb_combphy_mode *mode = &priv->mode;
index d32e267c0001d8a6ea47c8da522e1c11076b193e..f8e3054a9e5977c2ce9e9dd6492857185f506fd7 100644 (file)
@@ -508,7 +508,7 @@ static const struct phy_ops intel_cbphy_ops = {
 };
 
 static struct phy *intel_cbphy_xlate(struct device *dev,
-                                    struct of_phandle_args *args)
+                                    const struct of_phandle_args *args)
 {
        struct intel_combo_phy *cbphy = dev_get_drvdata(dev);
        u32 iphy_id;
index ef93bf2cba1053be5196c83a8bb8f6def7460e1f..406a87c8b7599084cc82a5a1d43cbcea3aa3fc53 100644 (file)
@@ -358,7 +358,7 @@ static const struct phy_ops ltq_vrx200_pcie_phy_ops = {
 };
 
 static struct phy *ltq_vrx200_pcie_phy_xlate(struct device *dev,
-                                            struct of_phandle_args *args)
+                                            const struct of_phandle_args *args)
 {
        struct ltq_vrx200_pcie_phy_priv *priv = dev_get_drvdata(dev);
        unsigned int mode;
index b141e3cd8a941998ef3140def0edfba331671d0b..3731f9b25655e4f23295f1416e706107766facf6 100644 (file)
@@ -61,7 +61,7 @@ static const struct phy_ops armada375_usb_phy_ops = {
  * USB3 case it still optional and we use ENODEV.
  */
 static struct phy *armada375_usb_phy_xlate(struct device *dev,
-                                       struct of_phandle_args *args)
+                                       const struct of_phandle_args *args)
 {
        struct armada375_cluster_phy *cluster_phy = dev_get_drvdata(dev);
 
index b7d99861526a559fe2aaaed2f22105667a1f0ab7..5063361b01208eb59d81a9946a1e58c4dd4615fe 100644 (file)
@@ -47,8 +47,13 @@ struct a38x_comphy {
        struct a38x_comphy_lane lane[MAX_A38X_COMPHY];
 };
 
+/*
+ * Map serdes lanes and gbe ports to serdes mux configuration values:
+ * row index = serdes lane,
+ * column index = gbe port number.
+ */
 static const u8 gbe_mux[MAX_A38X_COMPHY][MAX_A38X_PORTS] = {
-       { 0, 0, 0 },
+       { 3, 0, 0 },
        { 4, 5, 0 },
        { 0, 4, 0 },
        { 0, 0, 4 },
@@ -155,7 +160,7 @@ static const struct phy_ops a38x_comphy_ops = {
 };
 
 static struct phy *a38x_comphy_xlate(struct device *dev,
-                                    struct of_phandle_args *args)
+                                    const struct of_phandle_args *args)
 {
        struct a38x_comphy_lane *lane;
        struct phy *phy;
index f972d78372eaf5fb19cdebdb0a177a58ad7fc39e..c90e2867900c3f372a688fdde8a271348df18a6b 100644 (file)
@@ -155,7 +155,7 @@ static int phy_berlin_sata_power_off(struct phy *phy)
 }
 
 static struct phy *phy_berlin_sata_phy_xlate(struct device *dev,
-                                            struct of_phandle_args *args)
+                                            const struct of_phandle_args *args)
 {
        struct phy_berlin_priv *priv = dev_get_drvdata(dev);
        int i;
index 24c3371e2bb294402a5be470520f4835bdd49522..41162d7228c919dadc65c526657e120eff268f59 100644 (file)
@@ -1213,7 +1213,7 @@ static const struct phy_ops mvebu_a3700_comphy_ops = {
 };
 
 static struct phy *mvebu_a3700_comphy_xlate(struct device *dev,
-                                           struct of_phandle_args *args)
+                                           const struct of_phandle_args *args)
 {
        struct mvebu_a3700_comphy_lane *lane;
        unsigned int port;
index b0dd133665986e377ca164b726257d15ffb0892e..da5e8f4057490ca581109d13c7c8e736cc4b136e 100644 (file)
@@ -917,7 +917,7 @@ static const struct phy_ops mvebu_comphy_ops = {
 };
 
 static struct phy *mvebu_comphy_xlate(struct device *dev,
-                                     struct of_phandle_args *args)
+                                     const struct of_phandle_args *args)
 {
        struct mvebu_comphy_lane *lane;
        struct phy *phy;
index 3125ecb5d119fec8f6a93655d95f306b9562e085..3849b7c87d287254a766817ff819dac2efd5b097 100644 (file)
@@ -58,6 +58,18 @@ config PHY_MTK_HDMI
        help
          Support HDMI PHY for Mediatek SoCs.
 
+config PHY_MTK_MIPI_CSI_0_5
+       tristate "MediaTek MIPI CSI CD-PHY v0.5 Driver"
+       depends on ARCH_MEDIATEK || COMPILE_TEST
+       depends on OF
+       select GENERIC_PHY
+       help
+         Enable this to support the MIPI CSI CD-PHY receiver version 0.5.
+         The driver supports multiple CSI cdphy ports simultaneously.
+
+         To compile this driver as a module, choose M here: the
+         module will be called phy-mtk-mipi-csi-0-5.
+
 config PHY_MTK_MIPI_DSI
        tristate "MediaTek MIPI-DSI Driver"
        depends on ARCH_MEDIATEK || COMPILE_TEST
index c9a50395533eb1f859baf8609864125939fbbb2a..f6e24a47e08153bbafc1b94fe6c30248c98e61e9 100644 (file)
@@ -15,6 +15,8 @@ phy-mtk-hdmi-drv-y                    += phy-mtk-hdmi-mt8173.o
 phy-mtk-hdmi-drv-y                     += phy-mtk-hdmi-mt8195.o
 obj-$(CONFIG_PHY_MTK_HDMI)             += phy-mtk-hdmi-drv.o
 
+obj-$(CONFIG_PHY_MTK_MIPI_CSI_0_5)     += phy-mtk-mipi-csi-0-5.o
+
 phy-mtk-mipi-dsi-drv-y                 := phy-mtk-mipi-dsi.o
 phy-mtk-mipi-dsi-drv-y                 += phy-mtk-mipi-dsi-mt8173.o
 phy-mtk-mipi-dsi-drv-y                 += phy-mtk-mipi-dsi-mt8183.o
diff --git a/drivers/phy/mediatek/phy-mtk-mipi-csi-0-5-rx-reg.h b/drivers/phy/mediatek/phy-mtk-mipi-csi-0-5-rx-reg.h
new file mode 100644 (file)
index 0000000..97b4c27
--- /dev/null
@@ -0,0 +1,62 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * Copyright (c) 2023, MediaTek Inc.
+ * Copyright (c) 2023, BayLibre Inc.
+ */
+
+#ifndef __PHY_MTK_MIPI_CSI_V_0_5_RX_REG_H__
+#define __PHY_MTK_MIPI_CSI_V_0_5_RX_REG_H__
+
+/*
+ * CSI1 and CSI2 are identical, and similar to CSI0. All CSIX macros are
+ * applicable to the three PHYs. Where differences exist, they are denoted by
+ * macro names using CSI0 and CSI1, the latter being applicable to CSI1 and
+ * CSI2 alike.
+ */
+
+#define MIPI_RX_ANA00_CSIXA                    0x0000
+#define RG_CSI0A_CPHY_EN                       BIT(0)
+#define RG_CSIXA_EQ_PROTECT_EN                 BIT(1)
+#define RG_CSIXA_BG_LPF_EN                     BIT(2)
+#define RG_CSIXA_BG_CORE_EN                    BIT(3)
+#define RG_CSIXA_DPHY_L0_CKMODE_EN             BIT(5)
+#define RG_CSIXA_DPHY_L0_CKSEL                 BIT(6)
+#define RG_CSIXA_DPHY_L1_CKMODE_EN             BIT(8)
+#define RG_CSIXA_DPHY_L1_CKSEL                 BIT(9)
+#define RG_CSIXA_DPHY_L2_CKMODE_EN             BIT(11)
+#define RG_CSIXA_DPHY_L2_CKSEL                 BIT(12)
+
+#define MIPI_RX_ANA18_CSIXA                    0x0018
+#define RG_CSI0A_L0_T0AB_EQ_IS                 GENMASK(5, 4)
+#define RG_CSI0A_L0_T0AB_EQ_BW                 GENMASK(7, 6)
+#define RG_CSI0A_L1_T1AB_EQ_IS                 GENMASK(21, 20)
+#define RG_CSI0A_L1_T1AB_EQ_BW                 GENMASK(23, 22)
+#define RG_CSI0A_L2_T1BC_EQ_IS                 GENMASK(21, 20)
+#define RG_CSI0A_L2_T1BC_EQ_BW                 GENMASK(23, 22)
+#define RG_CSI1A_L0_EQ_IS                      GENMASK(5, 4)
+#define RG_CSI1A_L0_EQ_BW                      GENMASK(7, 6)
+#define RG_CSI1A_L1_EQ_IS                      GENMASK(21, 20)
+#define RG_CSI1A_L1_EQ_BW                      GENMASK(23, 22)
+#define RG_CSI1A_L2_EQ_IS                      GENMASK(5, 4)
+#define RG_CSI1A_L2_EQ_BW                      GENMASK(7, 6)
+
+#define MIPI_RX_ANA1C_CSIXA                    0x001c
+#define MIPI_RX_ANA20_CSI0A                    0x0020
+
+#define MIPI_RX_ANA24_CSIXA                    0x0024
+#define RG_CSIXA_RESERVE                       GENMASK(31, 24)
+
+#define MIPI_RX_ANA40_CSIXA                    0x0040
+#define RG_CSIXA_CPHY_FMCK_SEL                 GENMASK(1, 0)
+#define RG_CSIXA_ASYNC_OPTION                  GENMASK(7, 4)
+#define RG_CSIXA_CPHY_SPARE                    GENMASK(31, 16)
+
+#define MIPI_RX_WRAPPER80_CSIXA                        0x0080
+#define CSR_CSI_RST_MODE                       GENMASK(17, 16)
+
+#define MIPI_RX_ANAA8_CSIXA                    0x00a8
+#define RG_CSIXA_CDPHY_L0_T0_BYTECK_INVERT     BIT(0)
+#define RG_CSIXA_DPHY_L1_BYTECK_INVERT         BIT(1)
+#define RG_CSIXA_CDPHY_L2_T1_BYTECK_INVERT     BIT(2)
+
+#endif
diff --git a/drivers/phy/mediatek/phy-mtk-mipi-csi-0-5.c b/drivers/phy/mediatek/phy-mtk-mipi-csi-0-5.c
new file mode 100644 (file)
index 0000000..058e1d9
--- /dev/null
@@ -0,0 +1,294 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * MediaTek MIPI CSI v0.5 driver
+ *
+ * Copyright (c) 2023, MediaTek Inc.
+ * Copyright (c) 2023, BayLibre Inc.
+ */
+
+#include <dt-bindings/phy/phy.h>
+#include <linux/bitfield.h>
+#include <linux/delay.h>
+#include <linux/io.h>
+#include <linux/module.h>
+#include <linux/mutex.h>
+#include <linux/phy/phy.h>
+#include <linux/platform_device.h>
+#include <linux/slab.h>
+
+#include "phy-mtk-io.h"
+#include "phy-mtk-mipi-csi-0-5-rx-reg.h"
+
+#define CSIXB_OFFSET           0x1000
+
+struct mtk_mipi_cdphy_port {
+       struct device *dev;
+       void __iomem *base;
+       struct phy *phy;
+       u32 type;
+       u32 mode;
+       u32 num_lanes;
+};
+
+enum PHY_TYPE {
+       DPHY = 0,
+       CPHY,
+       CDPHY,
+};
+
+static void mtk_phy_csi_cdphy_ana_eq_tune(void __iomem *base)
+{
+       mtk_phy_update_field(base + MIPI_RX_ANA18_CSIXA, RG_CSI0A_L0_T0AB_EQ_IS, 1);
+       mtk_phy_update_field(base + MIPI_RX_ANA18_CSIXA, RG_CSI0A_L0_T0AB_EQ_BW, 1);
+       mtk_phy_update_field(base + MIPI_RX_ANA1C_CSIXA, RG_CSI0A_L1_T1AB_EQ_IS, 1);
+       mtk_phy_update_field(base + MIPI_RX_ANA1C_CSIXA, RG_CSI0A_L1_T1AB_EQ_BW, 1);
+       mtk_phy_update_field(base + MIPI_RX_ANA20_CSI0A, RG_CSI0A_L2_T1BC_EQ_IS, 1);
+       mtk_phy_update_field(base + MIPI_RX_ANA20_CSI0A, RG_CSI0A_L2_T1BC_EQ_BW, 1);
+
+       mtk_phy_update_field(base + CSIXB_OFFSET + MIPI_RX_ANA18_CSIXA, RG_CSI0A_L0_T0AB_EQ_IS, 1);
+       mtk_phy_update_field(base + CSIXB_OFFSET + MIPI_RX_ANA18_CSIXA, RG_CSI0A_L0_T0AB_EQ_BW, 1);
+       mtk_phy_update_field(base + CSIXB_OFFSET + MIPI_RX_ANA1C_CSIXA, RG_CSI0A_L1_T1AB_EQ_IS, 1);
+       mtk_phy_update_field(base + CSIXB_OFFSET + MIPI_RX_ANA1C_CSIXA, RG_CSI0A_L1_T1AB_EQ_BW, 1);
+       mtk_phy_update_field(base + CSIXB_OFFSET + MIPI_RX_ANA20_CSI0A, RG_CSI0A_L2_T1BC_EQ_IS, 1);
+       mtk_phy_update_field(base + CSIXB_OFFSET + MIPI_RX_ANA20_CSI0A, RG_CSI0A_L2_T1BC_EQ_BW, 1);
+}
+
+static void mtk_phy_csi_dphy_ana_eq_tune(void __iomem *base)
+{
+       mtk_phy_update_field(base + MIPI_RX_ANA18_CSIXA, RG_CSI1A_L0_EQ_IS, 1);
+       mtk_phy_update_field(base + MIPI_RX_ANA18_CSIXA, RG_CSI1A_L0_EQ_BW, 1);
+       mtk_phy_update_field(base + MIPI_RX_ANA18_CSIXA, RG_CSI1A_L1_EQ_IS, 1);
+       mtk_phy_update_field(base + MIPI_RX_ANA18_CSIXA, RG_CSI1A_L1_EQ_BW, 1);
+       mtk_phy_update_field(base + MIPI_RX_ANA1C_CSIXA, RG_CSI1A_L2_EQ_IS, 1);
+       mtk_phy_update_field(base + MIPI_RX_ANA1C_CSIXA, RG_CSI1A_L2_EQ_BW, 1);
+
+       mtk_phy_update_field(base + CSIXB_OFFSET + MIPI_RX_ANA18_CSIXA, RG_CSI1A_L0_EQ_IS, 1);
+       mtk_phy_update_field(base + CSIXB_OFFSET + MIPI_RX_ANA18_CSIXA, RG_CSI1A_L0_EQ_BW, 1);
+       mtk_phy_update_field(base + CSIXB_OFFSET + MIPI_RX_ANA18_CSIXA, RG_CSI1A_L1_EQ_IS, 1);
+       mtk_phy_update_field(base + CSIXB_OFFSET + MIPI_RX_ANA18_CSIXA, RG_CSI1A_L1_EQ_BW, 1);
+       mtk_phy_update_field(base + CSIXB_OFFSET + MIPI_RX_ANA1C_CSIXA, RG_CSI1A_L2_EQ_IS, 1);
+       mtk_phy_update_field(base + CSIXB_OFFSET + MIPI_RX_ANA1C_CSIXA, RG_CSI1A_L2_EQ_BW, 1);
+}
+
+static int mtk_mipi_phy_power_on(struct phy *phy)
+{
+       struct mtk_mipi_cdphy_port *port = phy_get_drvdata(phy);
+       void __iomem *base = port->base;
+
+       /*
+        * The driver currently supports DPHY and CD-PHY phys,
+        * but the only mode supported is DPHY,
+        * so CD-PHY capable phys must be configured in DPHY mode
+        */
+       if (port->type == CDPHY) {
+               mtk_phy_update_field(base + MIPI_RX_ANA00_CSIXA, RG_CSI0A_CPHY_EN, 0);
+               mtk_phy_update_field(base + CSIXB_OFFSET + MIPI_RX_ANA00_CSIXA,
+                                    RG_CSI0A_CPHY_EN, 0);
+       }
+
+       /*
+        * Lane configuration:
+        *
+        * Only 4 data + 1 clock is supported for now with the following mapping:
+        *
+        * CSIXA_LNR0 --> D2
+        * CSIXA_LNR1 --> D0
+        * CSIXA_LNR2 --> C
+        * CSIXB_LNR0 --> D1
+        * CSIXB_LNR1 --> D3
+        */
+       mtk_phy_update_field(base + MIPI_RX_ANA00_CSIXA, RG_CSIXA_DPHY_L0_CKMODE_EN, 0);
+       mtk_phy_update_field(base + MIPI_RX_ANA00_CSIXA, RG_CSIXA_DPHY_L0_CKSEL, 1);
+       mtk_phy_update_field(base + MIPI_RX_ANA00_CSIXA, RG_CSIXA_DPHY_L1_CKMODE_EN, 0);
+       mtk_phy_update_field(base + MIPI_RX_ANA00_CSIXA, RG_CSIXA_DPHY_L1_CKSEL, 1);
+       mtk_phy_update_field(base + MIPI_RX_ANA00_CSIXA, RG_CSIXA_DPHY_L2_CKMODE_EN, 1);
+       mtk_phy_update_field(base + MIPI_RX_ANA00_CSIXA, RG_CSIXA_DPHY_L2_CKSEL, 1);
+
+       mtk_phy_update_field(base + CSIXB_OFFSET + MIPI_RX_ANA00_CSIXA,
+                            RG_CSIXA_DPHY_L0_CKMODE_EN, 0);
+       mtk_phy_update_field(base + CSIXB_OFFSET + MIPI_RX_ANA00_CSIXA, RG_CSIXA_DPHY_L0_CKSEL, 1);
+       mtk_phy_update_field(base + CSIXB_OFFSET + MIPI_RX_ANA00_CSIXA,
+                            RG_CSIXA_DPHY_L1_CKMODE_EN, 0);
+       mtk_phy_update_field(base + CSIXB_OFFSET + MIPI_RX_ANA00_CSIXA, RG_CSIXA_DPHY_L1_CKSEL, 1);
+       mtk_phy_update_field(base + CSIXB_OFFSET + MIPI_RX_ANA00_CSIXA,
+                            RG_CSIXA_DPHY_L2_CKMODE_EN, 0);
+       mtk_phy_update_field(base + CSIXB_OFFSET + MIPI_RX_ANA00_CSIXA, RG_CSIXA_DPHY_L2_CKSEL, 1);
+
+       /* Byte clock invert */
+       mtk_phy_update_field(base + MIPI_RX_ANAA8_CSIXA, RG_CSIXA_CDPHY_L0_T0_BYTECK_INVERT, 1);
+       mtk_phy_update_field(base + MIPI_RX_ANAA8_CSIXA, RG_CSIXA_DPHY_L1_BYTECK_INVERT, 1);
+       mtk_phy_update_field(base + MIPI_RX_ANAA8_CSIXA, RG_CSIXA_CDPHY_L2_T1_BYTECK_INVERT, 1);
+
+       mtk_phy_update_field(base + CSIXB_OFFSET + MIPI_RX_ANAA8_CSIXA,
+                            RG_CSIXA_CDPHY_L0_T0_BYTECK_INVERT, 1);
+       mtk_phy_update_field(base + CSIXB_OFFSET + MIPI_RX_ANAA8_CSIXA,
+                            RG_CSIXA_DPHY_L1_BYTECK_INVERT, 1);
+       mtk_phy_update_field(base + CSIXB_OFFSET + MIPI_RX_ANAA8_CSIXA,
+                            RG_CSIXA_CDPHY_L2_T1_BYTECK_INVERT, 1);
+
+       /* Start ANA EQ tuning */
+       if (port->type == CDPHY)
+               mtk_phy_csi_cdphy_ana_eq_tune(base);
+       else
+               mtk_phy_csi_dphy_ana_eq_tune(base);
+
+       /* End ANA EQ tuning */
+       mtk_phy_set_bits(base + MIPI_RX_ANA40_CSIXA, 0x90);
+
+       mtk_phy_update_field(base + MIPI_RX_ANA24_CSIXA, RG_CSIXA_RESERVE, 0x40);
+       mtk_phy_update_field(base + CSIXB_OFFSET + MIPI_RX_ANA24_CSIXA, RG_CSIXA_RESERVE, 0x40);
+       mtk_phy_update_field(base + MIPI_RX_WRAPPER80_CSIXA, CSR_CSI_RST_MODE, 0);
+       mtk_phy_update_field(base + CSIXB_OFFSET + MIPI_RX_WRAPPER80_CSIXA, CSR_CSI_RST_MODE, 0);
+       /* ANA power on */
+       mtk_phy_update_field(base + MIPI_RX_ANA00_CSIXA, RG_CSIXA_BG_CORE_EN, 1);
+       mtk_phy_update_field(base + CSIXB_OFFSET + MIPI_RX_ANA00_CSIXA, RG_CSIXA_BG_CORE_EN, 1);
+       usleep_range(20, 40);
+       mtk_phy_update_field(base + MIPI_RX_ANA00_CSIXA, RG_CSIXA_BG_LPF_EN, 1);
+       mtk_phy_update_field(base + CSIXB_OFFSET + MIPI_RX_ANA00_CSIXA, RG_CSIXA_BG_LPF_EN, 1);
+
+       return 0;
+}
+
+static int mtk_mipi_phy_power_off(struct phy *phy)
+{
+       struct mtk_mipi_cdphy_port *port = phy_get_drvdata(phy);
+       void __iomem *base = port->base;
+
+       /* Disable MIPI BG. */
+       mtk_phy_update_field(base + MIPI_RX_ANA00_CSIXA, RG_CSIXA_BG_CORE_EN, 0);
+       mtk_phy_update_field(base + MIPI_RX_ANA00_CSIXA, RG_CSIXA_BG_LPF_EN, 0);
+
+       mtk_phy_update_field(base + CSIXB_OFFSET + MIPI_RX_ANA00_CSIXA, RG_CSIXA_BG_CORE_EN, 0);
+       mtk_phy_update_field(base + CSIXB_OFFSET + MIPI_RX_ANA00_CSIXA, RG_CSIXA_BG_LPF_EN, 0);
+
+       return 0;
+}
+
+static struct phy *mtk_mipi_cdphy_xlate(struct device *dev,
+                                       const struct of_phandle_args *args)
+{
+       struct mtk_mipi_cdphy_port *priv = dev_get_drvdata(dev);
+
+       /*
+        * If PHY is CD-PHY then we need to get the operating mode
+        * For now only D-PHY mode is supported
+        */
+       if (priv->type == CDPHY) {
+               if (args->args_count != 1) {
+                       dev_err(dev, "invalid number of arguments\n");
+                       return ERR_PTR(-EINVAL);
+               }
+               switch (args->args[0]) {
+               case PHY_TYPE_DPHY:
+                       priv->mode = DPHY;
+                       if (priv->num_lanes != 4) {
+                               dev_err(dev, "Only 4D1C mode is supported for now!\n");
+                               return ERR_PTR(-EINVAL);
+                       }
+                       break;
+               default:
+                       dev_err(dev, "Unsupported PHY type: %i\n", args->args[0]);
+                       return ERR_PTR(-EINVAL);
+               }
+       } else {
+               if (args->args_count) {
+                       dev_err(dev, "invalid number of arguments\n");
+                       return ERR_PTR(-EINVAL);
+               }
+               priv->mode = DPHY;
+       }
+
+       return priv->phy;
+}
+
+static const struct phy_ops mtk_cdphy_ops = {
+       .power_on       = mtk_mipi_phy_power_on,
+       .power_off      = mtk_mipi_phy_power_off,
+       .owner          = THIS_MODULE,
+};
+
+static int mtk_mipi_cdphy_probe(struct platform_device *pdev)
+{
+       struct device *dev = &pdev->dev;
+       struct phy_provider *phy_provider;
+       struct mtk_mipi_cdphy_port *port;
+       struct phy *phy;
+       int ret;
+       u32 phy_type;
+
+       port = devm_kzalloc(dev, sizeof(*port), GFP_KERNEL);
+       if (!port)
+               return -ENOMEM;
+
+       dev_set_drvdata(dev, port);
+
+       port->dev = dev;
+
+       port->base = devm_platform_ioremap_resource(pdev, 0);
+       if (IS_ERR(port->base))
+               return PTR_ERR(port->base);
+
+       ret = of_property_read_u32(dev->of_node, "num-lanes", &port->num_lanes);
+       if (ret) {
+               dev_err(dev, "Failed to read num-lanes property: %i\n", ret);
+               return ret;
+       }
+
+       /*
+        * phy-type is optional, if not present, PHY is considered to be CD-PHY
+        */
+       if (device_property_present(dev, "phy-type")) {
+               ret = of_property_read_u32(dev->of_node, "phy-type", &phy_type);
+               if (ret) {
+                       dev_err(dev, "Failed to read phy-type property: %i\n", ret);
+                       return ret;
+               }
+               switch (phy_type) {
+               case PHY_TYPE_DPHY:
+                       port->type = DPHY;
+                       break;
+               default:
+                       dev_err(dev, "Unsupported PHY type: %i\n", phy_type);
+                       return -EINVAL;
+               }
+       } else {
+               port->type = CDPHY;
+       }
+
+       phy = devm_phy_create(dev, NULL, &mtk_cdphy_ops);
+       if (IS_ERR(phy)) {
+               dev_err(dev, "Failed to create PHY: %ld\n", PTR_ERR(phy));
+               return PTR_ERR(phy);
+       }
+
+       port->phy = phy;
+       phy_set_drvdata(phy, port);
+
+       phy_provider = devm_of_phy_provider_register(dev, mtk_mipi_cdphy_xlate);
+       if (IS_ERR(phy_provider)) {
+               dev_err(dev, "Failed to register PHY provider: %ld\n",
+                       PTR_ERR(phy_provider));
+               return PTR_ERR(phy_provider);
+       }
+
+       return 0;
+}
+
+static const struct of_device_id mtk_mipi_cdphy_of_match[] = {
+       { .compatible = "mediatek,mt8365-csi-rx" },
+       { /* sentinel */},
+};
+MODULE_DEVICE_TABLE(of, mtk_mipi_cdphy_of_match);
+
+static struct platform_driver mipi_cdphy_pdrv = {
+       .probe = mtk_mipi_cdphy_probe,
+       .driver = {
+               .name   = "mtk-mipi-csi-0-5",
+               .of_match_table = mtk_mipi_cdphy_of_match,
+       },
+};
+module_platform_driver(mipi_cdphy_pdrv);
+
+MODULE_DESCRIPTION("MediaTek MIPI CSI CD-PHY v0.5 Driver");
+MODULE_AUTHOR("Louis Kuo <louis.kuo@mediatek.com>");
+MODULE_LICENSE("GPL");
index a4746f6cb8a187e2a1172fd5fc2dd1f7ceee7bb1..25b86bbb9cec049a069f54d17016738e72df522b 100644 (file)
@@ -1467,7 +1467,7 @@ static int mtk_phy_set_mode(struct phy *phy, enum phy_mode mode, int submode)
 }
 
 static struct phy *mtk_phy_xlate(struct device *dev,
-                                       struct of_phandle_args *args)
+                                       const struct of_phandle_args *args)
 {
        struct mtk_tphy *tphy = dev_get_drvdata(dev);
        struct mtk_phy_instance *instance = NULL;
index b222fbbd71d18642cddbb4029cb1823510087853..064fd09417275e53581c8d47d5aa942c9bad7ddf 100644 (file)
@@ -378,7 +378,7 @@ static int mtk_phy_set_mode(struct phy *phy, enum phy_mode mode, int submode)
 }
 
 static struct phy *mtk_phy_xlate(struct device *dev,
-                                struct of_phandle_args *args)
+                                const struct of_phandle_args *args)
 {
        struct mtk_xsphy *xsphy = dev_get_drvdata(dev);
        struct xsphy_instance *inst = NULL;
index b5ac2b7995e7156b73e348814ce4e29b6f71a874..835e369cdfc5fe6f56162fee4d9437e035b857d6 100644 (file)
@@ -518,7 +518,7 @@ static const struct phy_ops serdes_ops = {
 };
 
 static struct phy *serdes_simple_xlate(struct device *dev,
-                                      struct of_phandle_args *args)
+                                      const struct of_phandle_args *args)
 {
        struct serdes_ctrl *ctrl = dev_get_drvdata(dev);
        unsigned int port, idx, i;
index 01bd5ea620c5b7d26abb100909cfb67ce689a273..7cb85029fab399e61b3e7f973b3730e4ad3f295b 100644 (file)
@@ -2509,7 +2509,7 @@ static struct sparx5_serdes_io_resource sparx5_serdes_iomap[] =  {
 
 /* Client lookup function, uses serdes index */
 static struct phy *sparx5_serdes_xlate(struct device *dev,
-                                    struct of_phandle_args *args)
+                                    const struct of_phandle_args *args)
 {
        struct sparx5_serdes_private *priv = dev_get_drvdata(dev);
        int idx;
index d9443e865a780f11ec03105e373f2bcf9bded8aa..1cd1b5db2ad7cf4eb721a14a1f150b1f8ffe9834 100644 (file)
@@ -441,7 +441,7 @@ static const struct phy_ops serdes_ops = {
 };
 
 static struct phy *serdes_simple_xlate(struct device *dev,
-                                      struct of_phandle_args *args)
+                                      const struct of_phandle_args *args)
 {
        struct serdes_ctrl *ctrl = dev_get_drvdata(dev);
        unsigned int port, idx, i;
index d9be6a4d538387fac816d4d46c781ebd84cc29de..7f9b4de772eedfb9f28448511dccfd0a462b658a 100644 (file)
@@ -700,8 +700,8 @@ EXPORT_SYMBOL_GPL(devm_phy_put);
  * should provide a custom of_xlate function that reads the *args* and returns
  * the appropriate phy.
  */
-struct phy *of_phy_simple_xlate(struct device *dev, struct of_phandle_args
-       *args)
+struct phy *of_phy_simple_xlate(struct device *dev,
+                               const struct of_phandle_args *args)
 {
        struct phy *phy;
        struct class_dev_iter iter;
@@ -1095,7 +1095,7 @@ EXPORT_SYMBOL_GPL(devm_phy_destroy);
 struct phy_provider *__of_phy_provider_register(struct device *dev,
        struct device_node *children, struct module *owner,
        struct phy * (*of_xlate)(struct device *dev,
-                                struct of_phandle_args *args))
+                                const struct of_phandle_args *args))
 {
        struct phy_provider *phy_provider;
 
@@ -1158,7 +1158,7 @@ EXPORT_SYMBOL_GPL(__of_phy_provider_register);
 struct phy_provider *__devm_of_phy_provider_register(struct device *dev,
        struct device_node *children, struct module *owner,
        struct phy * (*of_xlate)(struct device *dev,
-                                struct of_phandle_args *args))
+                                const struct of_phandle_args *args))
 {
        struct phy_provider **ptr, *phy_provider;
 
index 1f0f908323f0e660a108622cb90ca55c7f595b32..5007dc7a357cb7b1d3df2958deead20a59ed7172 100644 (file)
@@ -1611,7 +1611,7 @@ static const struct phy_ops xgene_phy_ops = {
 };
 
 static struct phy *xgene_phy_xlate(struct device *dev,
-                                  struct of_phandle_args *args)
+                                  const struct of_phandle_args *args)
 {
        struct xgene_phy_ctx *ctx = dev_get_drvdata(dev);
 
index ffd609ac62336c1e7fcbb1d287d5a13ea69b399d..eb60e950ad53334a3ada3db618aa584afb33fb93 100644 (file)
@@ -7,7 +7,7 @@ obj-$(CONFIG_PHY_QCOM_IPQ806X_SATA)     += phy-qcom-ipq806x-sata.o
 obj-$(CONFIG_PHY_QCOM_M31_USB)         += phy-qcom-m31.o
 obj-$(CONFIG_PHY_QCOM_PCIE2)           += phy-qcom-pcie2.o
 
-obj-$(CONFIG_PHY_QCOM_QMP_COMBO)       += phy-qcom-qmp-combo.o
+obj-$(CONFIG_PHY_QCOM_QMP_COMBO)       += phy-qcom-qmp-combo.o phy-qcom-qmp-usbc.o
 obj-$(CONFIG_PHY_QCOM_QMP_PCIE)                += phy-qcom-qmp-pcie.o
 obj-$(CONFIG_PHY_QCOM_QMP_PCIE_8996)   += phy-qcom-qmp-pcie-msm8996.o
 obj-$(CONFIG_PHY_QCOM_QMP_UFS)         += phy-qcom-qmp-ufs.o
index 8e5078304646e9fcd4a758aabc712052b61369fe..9818d994c68b2ed30e10c8eecb7b800d034711be 100644 (file)
@@ -21,7 +21,8 @@
 
 #include <dt-bindings/phy/phy.h>
 
-#include "phy-qcom-qmp.h"
+#include "phy-qcom-qmp-dp-phy.h"
+#include "phy-qcom-qmp-qserdes-com-v4.h"
 
 /* EDP_PHY registers */
 #define DP_PHY_CFG                              0x0010
index 17c4ad7553a5edd0960e8ff7e64e97dd7f22b5f5..7d585a4bbbba950c803412d829bed4140d57d898 100644 (file)
 
 #include <dt-bindings/phy/phy-qcom-qmp.h>
 
+#include "phy-qcom-qmp-common.h"
+
 #include "phy-qcom-qmp.h"
 #include "phy-qcom-qmp-pcs-misc-v3.h"
 #include "phy-qcom-qmp-pcs-usb-v4.h"
 #include "phy-qcom-qmp-pcs-usb-v5.h"
 #include "phy-qcom-qmp-pcs-usb-v6.h"
 
-/* QPHY_SW_RESET bit */
-#define SW_RESET                               BIT(0)
-/* QPHY_POWER_DOWN_CONTROL */
-#define SW_PWRDN                               BIT(0)
-/* QPHY_START_CONTROL bits */
-#define SERDES_START                           BIT(0)
-#define PCS_START                              BIT(1)
-/* QPHY_PCS_STATUS bit */
-#define PHYSTATUS                              BIT(6)
+#include "phy-qcom-qmp-dp-com-v3.h"
+
+#include "phy-qcom-qmp-dp-phy.h"
+#include "phy-qcom-qmp-dp-phy-v3.h"
+#include "phy-qcom-qmp-dp-phy-v4.h"
+#include "phy-qcom-qmp-dp-phy-v5.h"
+#include "phy-qcom-qmp-dp-phy-v6.h"
 
 /* QPHY_V3_DP_COM_RESET_OVRD_CTRL register bits */
 /* DP PHY soft reset */
 #define USB3_MODE                              BIT(0) /* enables USB3 mode */
 #define DP_MODE                                        BIT(1) /* enables DP mode */
 
-/* QPHY_PCS_AUTONOMOUS_MODE_CTRL register bits */
-#define ARCVR_DTCT_EN                          BIT(0)
-#define ALFPS_DTCT_EN                          BIT(1)
-#define ARCVR_DTCT_EVENT_SEL                   BIT(4)
-
-/* QPHY_PCS_LFPS_RXTERM_IRQ_CLEAR register bits */
-#define IRQ_CLEAR                              BIT(0)
-
-/* QPHY_V3_PCS_MISC_CLAMP_ENABLE register bits */
-#define CLAMP_EN                               BIT(0) /* enables i/o clamp_n */
-
 /* QPHY_V3_DP_COM_TYPEC_CTRL register bits */
 #define SW_PORTSELECT_VAL                      BIT(0)
 #define SW_PORTSELECT_MUX                      BIT(1)
 
 #define PHY_INIT_COMPLETE_TIMEOUT              10000
 
-struct qmp_phy_init_tbl {
-       unsigned int offset;
-       unsigned int val;
-       /*
-        * mask of lanes for which this register is written
-        * for cases when second lane needs different values
-        */
-       u8 lane_mask;
-};
-
-#define QMP_PHY_INIT_CFG(o, v)         \
-       {                               \
-               .offset = o,            \
-               .val = v,               \
-               .lane_mask = 0xff,      \
-       }
-
-#define QMP_PHY_INIT_CFG_LANE(o, v, l) \
-       {                               \
-               .offset = o,            \
-               .val = v,               \
-               .lane_mask = l,         \
-       }
-
 /* set of registers with offsets different per-PHY */
 enum qphy_reg_layout {
        /* PCS registers */
@@ -2031,55 +1996,29 @@ static const struct qmp_phy_cfg sm8550_usb3dpphy_cfg = {
        .num_vregs              = ARRAY_SIZE(qmp_phy_vreg_l),
 };
 
-static void qmp_combo_configure_lane(void __iomem *base,
-                                       const struct qmp_phy_init_tbl tbl[],
-                                       int num,
-                                       u8 lane_mask)
-{
-       int i;
-       const struct qmp_phy_init_tbl *t = tbl;
-
-       if (!t)
-               return;
-
-       for (i = 0; i < num; i++, t++) {
-               if (!(t->lane_mask & lane_mask))
-                       continue;
-
-               writel(t->val, base + t->offset);
-       }
-}
-
-static void qmp_combo_configure(void __iomem *base,
-                                  const struct qmp_phy_init_tbl tbl[],
-                                  int num)
-{
-       qmp_combo_configure_lane(base, tbl, num, 0xff);
-}
-
 static int qmp_combo_dp_serdes_init(struct qmp_combo *qmp)
 {
        const struct qmp_phy_cfg *cfg = qmp->cfg;
        void __iomem *serdes = qmp->dp_serdes;
        const struct phy_configure_opts_dp *dp_opts = &qmp->dp_opts;
 
-       qmp_combo_configure(serdes, cfg->dp_serdes_tbl, cfg->dp_serdes_tbl_num);
+       qmp_configure(serdes, cfg->dp_serdes_tbl, cfg->dp_serdes_tbl_num);
 
        switch (dp_opts->link_rate) {
        case 1620:
-               qmp_combo_configure(serdes, cfg->serdes_tbl_rbr,
+               qmp_configure(serdes, cfg->serdes_tbl_rbr,
                                cfg->serdes_tbl_rbr_num);
                break;
        case 2700:
-               qmp_combo_configure(serdes, cfg->serdes_tbl_hbr,
+               qmp_configure(serdes, cfg->serdes_tbl_hbr,
                                cfg->serdes_tbl_hbr_num);
                break;
        case 5400:
-               qmp_combo_configure(serdes, cfg->serdes_tbl_hbr2,
+               qmp_configure(serdes, cfg->serdes_tbl_hbr2,
                                cfg->serdes_tbl_hbr2_num);
                break;
        case 8100:
-               qmp_combo_configure(serdes, cfg->serdes_tbl_hbr3,
+               qmp_configure(serdes, cfg->serdes_tbl_hbr3,
                                cfg->serdes_tbl_hbr3_num);
                break;
        default:
@@ -2370,7 +2309,7 @@ static int qmp_v456_configure_dp_phy(struct qmp_combo *qmp)
        u32 status;
        int ret;
 
-       writel(0x0f, qmp->dp_dp_phy + QSERDES_V4_DP_PHY_CFG_1);
+       writel(0x0f, qmp->dp_dp_phy + QSERDES_DP_PHY_CFG_1);
 
        qmp_combo_configure_dp_mode(qmp);
 
@@ -2681,8 +2620,8 @@ static int qmp_combo_dp_power_on(struct phy *phy)
 
        qmp_combo_dp_serdes_init(qmp);
 
-       qmp_combo_configure_lane(tx, cfg->dp_tx_tbl, cfg->dp_tx_tbl_num, 1);
-       qmp_combo_configure_lane(tx2, cfg->dp_tx_tbl, cfg->dp_tx_tbl_num, 2);
+       qmp_configure_lane(tx, cfg->dp_tx_tbl, cfg->dp_tx_tbl_num, 1);
+       qmp_configure_lane(tx2, cfg->dp_tx_tbl, cfg->dp_tx_tbl_num, 2);
 
        /* Configure special DP tx tunings */
        cfg->configure_dp_tx(qmp);
@@ -2724,7 +2663,7 @@ static int qmp_combo_usb_power_on(struct phy *phy)
        unsigned int val;
        int ret;
 
-       qmp_combo_configure(serdes, cfg->serdes_tbl, cfg->serdes_tbl_num);
+       qmp_configure(serdes, cfg->serdes_tbl, cfg->serdes_tbl_num);
 
        ret = clk_prepare_enable(qmp->pipe_clk);
        if (ret) {
@@ -2733,16 +2672,16 @@ static int qmp_combo_usb_power_on(struct phy *phy)
        }
 
        /* Tx, Rx, and PCS configurations */
-       qmp_combo_configure_lane(tx, cfg->tx_tbl, cfg->tx_tbl_num, 1);
-       qmp_combo_configure_lane(tx2, cfg->tx_tbl, cfg->tx_tbl_num, 2);
+       qmp_configure_lane(tx, cfg->tx_tbl, cfg->tx_tbl_num, 1);
+       qmp_configure_lane(tx2, cfg->tx_tbl, cfg->tx_tbl_num, 2);
 
-       qmp_combo_configure_lane(rx, cfg->rx_tbl, cfg->rx_tbl_num, 1);
-       qmp_combo_configure_lane(rx2, cfg->rx_tbl, cfg->rx_tbl_num, 2);
+       qmp_configure_lane(rx, cfg->rx_tbl, cfg->rx_tbl_num, 1);
+       qmp_configure_lane(rx2, cfg->rx_tbl, cfg->rx_tbl_num, 2);
 
-       qmp_combo_configure(pcs, cfg->pcs_tbl, cfg->pcs_tbl_num);
+       qmp_configure(pcs, cfg->pcs_tbl, cfg->pcs_tbl_num);
 
        if (pcs_usb)
-               qmp_combo_configure(pcs_usb, cfg->pcs_usb_tbl, cfg->pcs_usb_tbl_num);
+               qmp_configure(pcs_usb, cfg->pcs_usb_tbl, cfg->pcs_usb_tbl_num);
 
        if (cfg->has_pwrdn_delay)
                usleep_range(10, 20);
@@ -3515,7 +3454,7 @@ static int qmp_combo_parse_dt(struct qmp_combo *qmp)
        return 0;
 }
 
-static struct phy *qmp_combo_phy_xlate(struct device *dev, struct of_phandle_args *args)
+static struct phy *qmp_combo_phy_xlate(struct device *dev, const struct of_phandle_args *args)
 {
        struct qmp_combo *qmp = dev_get_drvdata(dev);
 
diff --git a/drivers/phy/qualcomm/phy-qcom-qmp-common.h b/drivers/phy/qualcomm/phy-qcom-qmp-common.h
new file mode 100644 (file)
index 0000000..7993842
--- /dev/null
@@ -0,0 +1,59 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * Copyright (c) 2017, The Linux Foundation. All rights reserved.
+ */
+
+#ifndef QCOM_PHY_QMP_COMMON_H_
+#define QCOM_PHY_QMP_COMMON_H_
+
+struct qmp_phy_init_tbl {
+       unsigned int offset;
+       unsigned int val;
+       /*
+        * mask of lanes for which this register is written
+        * for cases when second lane needs different values
+        */
+       u8 lane_mask;
+};
+
+#define QMP_PHY_INIT_CFG(o, v)         \
+       {                               \
+               .offset = o,            \
+               .val = v,               \
+               .lane_mask = 0xff,      \
+       }
+
+#define QMP_PHY_INIT_CFG_LANE(o, v, l) \
+       {                               \
+               .offset = o,            \
+               .val = v,               \
+               .lane_mask = l,         \
+       }
+
+static inline void qmp_configure_lane(void __iomem *base,
+                                          const struct qmp_phy_init_tbl tbl[],
+                                          int num,
+                                          u8 lane_mask)
+{
+       int i;
+       const struct qmp_phy_init_tbl *t = tbl;
+
+       if (!t)
+               return;
+
+       for (i = 0; i < num; i++, t++) {
+               if (!(t->lane_mask & lane_mask))
+                       continue;
+
+               writel(t->val, base + t->offset);
+       }
+}
+
+static inline void qmp_configure(void __iomem *base,
+                                     const struct qmp_phy_init_tbl tbl[],
+                                     int num)
+{
+       qmp_configure_lane(base, tbl, num, 0xff);
+}
+
+#endif
diff --git a/drivers/phy/qualcomm/phy-qcom-qmp-dp-com-v3.h b/drivers/phy/qualcomm/phy-qcom-qmp-dp-com-v3.h
new file mode 100644 (file)
index 0000000..396179e
--- /dev/null
@@ -0,0 +1,18 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * Copyright (c) 2017, The Linux Foundation. All rights reserved.
+ */
+
+#ifndef QCOM_PHY_QMP_DP_COM_V3_H_
+#define QCOM_PHY_QMP_DP_COM_V3_H_
+
+/* Only for QMP V3 & V4 PHY - DP COM registers */
+#define QPHY_V3_DP_COM_PHY_MODE_CTRL                   0x00
+#define QPHY_V3_DP_COM_SW_RESET                                0x04
+#define QPHY_V3_DP_COM_POWER_DOWN_CTRL                 0x08
+#define QPHY_V3_DP_COM_SWI_CTRL                                0x0c
+#define QPHY_V3_DP_COM_TYPEC_CTRL                      0x10
+#define QPHY_V3_DP_COM_TYPEC_PWRDN_CTRL                        0x14
+#define QPHY_V3_DP_COM_RESET_OVRD_CTRL                 0x1c
+
+#endif
diff --git a/drivers/phy/qualcomm/phy-qcom-qmp-dp-phy-v3.h b/drivers/phy/qualcomm/phy-qcom-qmp-dp-phy-v3.h
new file mode 100644 (file)
index 0000000..00a9702
--- /dev/null
@@ -0,0 +1,21 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * Copyright (c) 2017, The Linux Foundation. All rights reserved.
+ */
+
+#ifndef QCOM_PHY_QMP_DP_PHY_V3_H_
+#define QCOM_PHY_QMP_DP_PHY_V3_H_
+
+/* Only for QMP V3 PHY - DP PHY registers */
+#define QSERDES_V3_DP_PHY_AUX_INTERRUPT_MASK           0x048
+#define QSERDES_V3_DP_PHY_AUX_INTERRUPT_CLEAR          0x04c
+#define QSERDES_V3_DP_PHY_AUX_BIST_CFG                 0x050
+
+#define QSERDES_V3_DP_PHY_VCO_DIV                      0x064
+#define QSERDES_V3_DP_PHY_TX0_TX1_LANE_CTL             0x06c
+#define QSERDES_V3_DP_PHY_TX2_TX3_LANE_CTL             0x088
+
+#define QSERDES_V3_DP_PHY_SPARE0                       0x0ac
+#define QSERDES_V3_DP_PHY_STATUS                       0x0c0
+
+#endif
diff --git a/drivers/phy/qualcomm/phy-qcom-qmp-dp-phy-v4.h b/drivers/phy/qualcomm/phy-qcom-qmp-dp-phy-v4.h
new file mode 100644 (file)
index 0000000..ed6795e
--- /dev/null
@@ -0,0 +1,19 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * Copyright (c) 2017, The Linux Foundation. All rights reserved.
+ */
+
+#ifndef QCOM_PHY_QMP_DP_PHY_V4_H_
+#define QCOM_PHY_QMP_DP_PHY_V4_H_
+
+/* Only for QMP V4 PHY - DP PHY registers */
+#define QSERDES_V4_DP_PHY_AUX_INTERRUPT_MASK           0x054
+#define QSERDES_V4_DP_PHY_AUX_INTERRUPT_CLEAR          0x058
+#define QSERDES_V4_DP_PHY_VCO_DIV                      0x070
+#define QSERDES_V4_DP_PHY_TX0_TX1_LANE_CTL             0x078
+#define QSERDES_V4_DP_PHY_TX2_TX3_LANE_CTL             0x09c
+#define QSERDES_V4_DP_PHY_SPARE0                       0x0c8
+#define QSERDES_V4_DP_PHY_AUX_INTERRUPT_STATUS         0x0d8
+#define QSERDES_V4_DP_PHY_STATUS                       0x0dc
+
+#endif
diff --git a/drivers/phy/qualcomm/phy-qcom-qmp-dp-phy-v5.h b/drivers/phy/qualcomm/phy-qcom-qmp-dp-phy-v5.h
new file mode 100644 (file)
index 0000000..f5cfacf
--- /dev/null
@@ -0,0 +1,13 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * Copyright (c) 2017, The Linux Foundation. All rights reserved.
+ */
+
+#ifndef QCOM_PHY_QMP_DP_PHY_V5_H_
+#define QCOM_PHY_QMP_DP_PHY_V5_H_
+
+/* Only for QMP V5 PHY - DP PHY registers */
+#define QSERDES_V5_DP_PHY_AUX_INTERRUPT_STATUS         0x0d8
+#define QSERDES_V5_DP_PHY_STATUS                       0x0dc
+
+#endif
diff --git a/drivers/phy/qualcomm/phy-qcom-qmp-dp-phy-v6.h b/drivers/phy/qualcomm/phy-qcom-qmp-dp-phy-v6.h
new file mode 100644 (file)
index 0000000..01a20d3
--- /dev/null
@@ -0,0 +1,13 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * Copyright (c) 2017, The Linux Foundation. All rights reserved.
+ */
+
+#ifndef QCOM_PHY_QMP_DP_PHY_V6_H_
+#define QCOM_PHY_QMP_DP_PHY_V6_H_
+
+/* Only for QMP V6 PHY - DP PHY registers */
+#define QSERDES_V6_DP_PHY_AUX_INTERRUPT_STATUS         0x0e0
+#define QSERDES_V6_DP_PHY_STATUS                       0x0e4
+
+#endif
diff --git a/drivers/phy/qualcomm/phy-qcom-qmp-dp-phy.h b/drivers/phy/qualcomm/phy-qcom-qmp-dp-phy.h
new file mode 100644 (file)
index 0000000..0ebd405
--- /dev/null
@@ -0,0 +1,62 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * Copyright (c) 2017, The Linux Foundation. All rights reserved.
+ */
+
+#ifndef QCOM_PHY_QMP_DP_PHY_H_
+#define QCOM_PHY_QMP_DP_PHY_H_
+
+/* QMP PHY - DP PHY registers */
+#define QSERDES_DP_PHY_REVISION_ID0                    0x000
+#define QSERDES_DP_PHY_REVISION_ID1                    0x004
+#define QSERDES_DP_PHY_REVISION_ID2                    0x008
+#define QSERDES_DP_PHY_REVISION_ID3                    0x00c
+#define QSERDES_DP_PHY_CFG                             0x010
+#define QSERDES_DP_PHY_CFG_1                           0x014
+#define QSERDES_DP_PHY_PD_CTL                          0x018
+#define QSERDES_DP_PHY_MODE                            0x01c
+#define QSERDES_DP_PHY_AUX_CFG0                                0x020
+#define QSERDES_DP_PHY_AUX_CFG1                                0x024
+#define QSERDES_DP_PHY_AUX_CFG2                                0x028
+#define QSERDES_DP_PHY_AUX_CFG3                                0x02c
+#define QSERDES_DP_PHY_AUX_CFG4                                0x030
+#define QSERDES_DP_PHY_AUX_CFG5                                0x034
+#define QSERDES_DP_PHY_AUX_CFG6                                0x038
+#define QSERDES_DP_PHY_AUX_CFG7                                0x03c
+#define QSERDES_DP_PHY_AUX_CFG8                                0x040
+#define QSERDES_DP_PHY_AUX_CFG9                                0x044
+
+/* QSERDES COM_BIAS_EN_CLKBUFLR_EN bits */
+# define QSERDES_V3_COM_BIAS_EN                                0x0001
+# define QSERDES_V3_COM_BIAS_EN_MUX                    0x0002
+# define QSERDES_V3_COM_CLKBUF_R_EN                    0x0004
+# define QSERDES_V3_COM_CLKBUF_L_EN                    0x0008
+# define QSERDES_V3_COM_EN_SYSCLK_TX_SEL               0x0010
+# define QSERDES_V3_COM_CLKBUF_RX_DRIVE_L              0x0020
+# define QSERDES_V3_COM_CLKBUF_RX_DRIVE_R              0x0040
+
+/* QPHY_TX_TX_EMP_POST1_LVL bits */
+# define DP_PHY_TXn_TX_EMP_POST1_LVL_MASK              0x001f
+# define DP_PHY_TXn_TX_EMP_POST1_LVL_MUX_EN            0x0020
+
+/* QPHY_TX_TX_DRV_LVL bits */
+# define DP_PHY_TXn_TX_DRV_LVL_MASK                    0x001f
+# define DP_PHY_TXn_TX_DRV_LVL_MUX_EN                  0x0020
+
+/* QSERDES_DP_PHY_PD_CTL bits */
+# define DP_PHY_PD_CTL_PWRDN                           0x001
+# define DP_PHY_PD_CTL_PSR_PWRDN                       0x002
+# define DP_PHY_PD_CTL_AUX_PWRDN                       0x004
+# define DP_PHY_PD_CTL_LANE_0_1_PWRDN                  0x008
+# define DP_PHY_PD_CTL_LANE_2_3_PWRDN                  0x010
+# define DP_PHY_PD_CTL_PLL_PWRDN                       0x020
+# define DP_PHY_PD_CTL_DP_CLAMP_EN                     0x040
+
+/* QPHY_DP_PHY_AUX_INTERRUPT_STATUS bits */
+# define PHY_AUX_STOP_ERR_MASK                         0x01
+# define PHY_AUX_DEC_ERR_MASK                          0x02
+# define PHY_AUX_SYNC_ERR_MASK                         0x04
+# define PHY_AUX_ALIGN_ERR_MASK                                0x08
+# define PHY_AUX_REQ_ERR_MASK                          0x10
+
+#endif
index ab61a9c73b189eabffbbe64fc6889338a00091be..0442b31205638cdea16694e256c86e436a3186e6 100644 (file)
 #include <linux/reset.h>
 #include <linux/slab.h>
 
+#include "phy-qcom-qmp-common.h"
+
 #include "phy-qcom-qmp.h"
 
-/* QPHY_SW_RESET bit */
-#define SW_RESET                               BIT(0)
-/* QPHY_POWER_DOWN_CONTROL */
-#define SW_PWRDN                               BIT(0)
-#define REFCLK_DRV_DSBL                                BIT(1)
 /* QPHY_START_CONTROL bits */
-#define SERDES_START                           BIT(0)
-#define PCS_START                              BIT(1)
 #define PLL_READY_GATE_EN                      BIT(3)
-/* QPHY_PCS_STATUS bit */
-#define PHYSTATUS                              BIT(6)
+
 /* QPHY_COM_PCS_READY_STATUS bit */
 #define PCS_READY                              BIT(0)
 
 #define POWER_DOWN_DELAY_US_MIN                        10
 #define POWER_DOWN_DELAY_US_MAX                        20
 
-struct qmp_phy_init_tbl {
-       unsigned int offset;
-       unsigned int val;
-       /*
-        * mask of lanes for which this register is written
-        * for cases when second lane needs different values
-        */
-       u8 lane_mask;
-};
-
-#define QMP_PHY_INIT_CFG(o, v)         \
-       {                               \
-               .offset = o,            \
-               .val = v,               \
-               .lane_mask = 0xff,      \
-       }
-
-#define QMP_PHY_INIT_CFG_LANE(o, v, l) \
-       {                               \
-               .offset = o,            \
-               .val = v,               \
-               .lane_mask = l,         \
-       }
-
 /* set of registers with offsets different per-PHY */
 enum qphy_reg_layout {
        /* Common block control registers */
@@ -307,32 +277,6 @@ static const struct qmp_phy_cfg msm8996_pciephy_cfg = {
        .regs                   = pciephy_regs_layout,
 };
 
-static void qmp_pcie_msm8996_configure_lane(void __iomem *base,
-                                       const struct qmp_phy_init_tbl tbl[],
-                                       int num,
-                                       u8 lane_mask)
-{
-       int i;
-       const struct qmp_phy_init_tbl *t = tbl;
-
-       if (!t)
-               return;
-
-       for (i = 0; i < num; i++, t++) {
-               if (!(t->lane_mask & lane_mask))
-                       continue;
-
-               writel(t->val, base + t->offset);
-       }
-}
-
-static void qmp_pcie_msm8996_configure(void __iomem *base,
-                                  const struct qmp_phy_init_tbl tbl[],
-                                  int num)
-{
-       qmp_pcie_msm8996_configure_lane(base, tbl, num, 0xff);
-}
-
 static int qmp_pcie_msm8996_serdes_init(struct qmp_phy *qphy)
 {
        struct qcom_qmp *qmp = qphy->qmp;
@@ -344,7 +288,7 @@ static int qmp_pcie_msm8996_serdes_init(struct qmp_phy *qphy)
        unsigned int val;
        int ret;
 
-       qmp_pcie_msm8996_configure(serdes, serdes_tbl, serdes_tbl_num);
+       qmp_configure(serdes, serdes_tbl, serdes_tbl_num);
 
        qphy_clrbits(serdes, cfg->regs[QPHY_COM_SW_RESET], SW_RESET);
        qphy_setbits(serdes, cfg->regs[QPHY_COM_START_CONTROL],
@@ -487,9 +431,9 @@ static int qmp_pcie_msm8996_power_on(struct phy *phy)
        }
 
        /* Tx, Rx, and PCS configurations */
-       qmp_pcie_msm8996_configure_lane(tx, cfg->tx_tbl, cfg->tx_tbl_num, 1);
-       qmp_pcie_msm8996_configure_lane(rx, cfg->rx_tbl, cfg->rx_tbl_num, 1);
-       qmp_pcie_msm8996_configure(pcs, cfg->pcs_tbl, cfg->pcs_tbl_num);
+       qmp_configure_lane(tx, cfg->tx_tbl, cfg->tx_tbl_num, 1);
+       qmp_configure_lane(rx, cfg->rx_tbl, cfg->rx_tbl_num, 1);
+       qmp_configure(pcs, cfg->pcs_tbl, cfg->pcs_tbl_num);
 
        /*
         * Pull out PHY from POWER DOWN state.
index 2af7115ef96891ea33443bbcbf823c3d3c03fafd..8836bb1ff0cc1ee31b7778b23f7a5c766091a67c 100644 (file)
@@ -22,6 +22,8 @@
 #include <linux/reset.h>
 #include <linux/slab.h>
 
+#include "phy-qcom-qmp-common.h"
+
 #include "phy-qcom-qmp.h"
 #include "phy-qcom-qmp-pcs-misc-v3.h"
 #include "phy-qcom-qmp-pcs-pcie-v4.h"
 #include "phy-qcom-qmp-pcs-pcie-v6_20.h"
 #include "phy-qcom-qmp-pcie-qhp.h"
 
-/* QPHY_SW_RESET bit */
-#define SW_RESET                               BIT(0)
-/* QPHY_POWER_DOWN_CONTROL */
-#define SW_PWRDN                               BIT(0)
-#define REFCLK_DRV_DSBL                                BIT(1)
-/* QPHY_START_CONTROL bits */
-#define SERDES_START                           BIT(0)
-#define PCS_START                              BIT(1)
-/* QPHY_PCS_STATUS bit */
-#define PHYSTATUS                              BIT(6)
-#define PHYSTATUS_4_20                         BIT(7)
-
 #define PHY_INIT_COMPLETE_TIMEOUT              10000
 
-struct qmp_phy_init_tbl {
-       unsigned int offset;
-       unsigned int val;
-       /*
-        * mask of lanes for which this register is written
-        * for cases when second lane needs different values
-        */
-       u8 lane_mask;
-};
-
-#define QMP_PHY_INIT_CFG(o, v)         \
-       {                               \
-               .offset = o,            \
-               .val = v,               \
-               .lane_mask = 0xff,      \
-       }
-
-#define QMP_PHY_INIT_CFG_LANE(o, v, l) \
-       {                               \
-               .offset = o,            \
-               .val = v,               \
-               .lane_mask = l,         \
-       }
-
 /* set of registers with offsets different per-PHY */
 enum qphy_reg_layout {
        /* PCS registers */
@@ -116,6 +82,13 @@ static const unsigned int pciephy_v5_regs_layout[QPHY_LAYOUT_SIZE] = {
        [QPHY_PCS_POWER_DOWN_CONTROL]   = QPHY_V5_PCS_POWER_DOWN_CONTROL,
 };
 
+static const unsigned int pciephy_v6_regs_layout[QPHY_LAYOUT_SIZE] = {
+       [QPHY_SW_RESET]                 = QPHY_V6_PCS_SW_RESET,
+       [QPHY_START_CTRL]               = QPHY_V6_PCS_START_CONTROL,
+       [QPHY_PCS_STATUS]               = QPHY_V6_PCS_PCS_STATUS1,
+       [QPHY_PCS_POWER_DOWN_CONTROL]   = QPHY_V6_PCS_POWER_DOWN_CONTROL,
+};
+
 static const struct qmp_phy_init_tbl msm8998_pcie_serdes_tbl[] = {
        QMP_PHY_INIT_CFG(QSERDES_V3_COM_BIAS_EN_CLKBUFLR_EN, 0x14),
        QMP_PHY_INIT_CFG(QSERDES_V3_COM_CLK_SELECT, 0x30),
@@ -982,6 +955,143 @@ static const struct qmp_phy_init_tbl sc8280xp_qmp_gen3x2_pcie_pcs_misc_tbl[] = {
        QMP_PHY_INIT_CFG(QPHY_V5_PCS_PCIE_OSC_DTCT_ACTIONS, 0x00),
 };
 
+static const struct qmp_phy_init_tbl x1e80100_qmp_gen4x2_pcie_serdes_tbl[] = {
+       QMP_PHY_INIT_CFG(QSERDES_V6_COM_SSC_STEP_SIZE1_MODE1, 0x26),
+       QMP_PHY_INIT_CFG(QSERDES_V6_COM_SSC_STEP_SIZE2_MODE1, 0x03),
+       QMP_PHY_INIT_CFG(QSERDES_V6_COM_CP_CTRL_MODE1, 0x06),
+       QMP_PHY_INIT_CFG(QSERDES_V6_COM_PLL_RCTRL_MODE1, 0x16),
+       QMP_PHY_INIT_CFG(QSERDES_V6_COM_PLL_CCTRL_MODE1, 0x36),
+       QMP_PHY_INIT_CFG(QSERDES_V6_COM_CORECLK_DIV_MODE1, 0x04),
+       QMP_PHY_INIT_CFG(QSERDES_V6_COM_LOCK_CMP1_MODE1, 0x0a),
+       QMP_PHY_INIT_CFG(QSERDES_V6_COM_LOCK_CMP2_MODE1, 0x1a),
+       QMP_PHY_INIT_CFG(QSERDES_V6_COM_DEC_START_MODE1, 0x68),
+       QMP_PHY_INIT_CFG(QSERDES_V6_COM_DIV_FRAC_START1_MODE1, 0xab),
+       QMP_PHY_INIT_CFG(QSERDES_V6_COM_DIV_FRAC_START2_MODE1, 0xaa),
+       QMP_PHY_INIT_CFG(QSERDES_V6_COM_DIV_FRAC_START3_MODE1, 0x02),
+       QMP_PHY_INIT_CFG(QSERDES_V6_COM_HSCLK_SEL_1, 0x12),
+       QMP_PHY_INIT_CFG(QSERDES_V6_COM_SSC_STEP_SIZE1_MODE0, 0xf8),
+       QMP_PHY_INIT_CFG(QSERDES_V6_COM_SSC_STEP_SIZE2_MODE0, 0x01),
+       QMP_PHY_INIT_CFG(QSERDES_V6_COM_CP_CTRL_MODE0, 0x06),
+       QMP_PHY_INIT_CFG(QSERDES_V6_COM_PLL_RCTRL_MODE0, 0x16),
+       QMP_PHY_INIT_CFG(QSERDES_V6_COM_PLL_CCTRL_MODE0, 0x36),
+       QMP_PHY_INIT_CFG(QSERDES_V6_COM_PLL_CORE_CLK_DIV_MODE0, 0x0a),
+       QMP_PHY_INIT_CFG(QSERDES_V6_COM_LOCK_CMP1_MODE0, 0x04),
+       QMP_PHY_INIT_CFG(QSERDES_V6_COM_LOCK_CMP2_MODE0, 0x0d),
+       QMP_PHY_INIT_CFG(QSERDES_V6_COM_DEC_START_MODE0, 0x41),
+       QMP_PHY_INIT_CFG(QSERDES_V6_COM_DIV_FRAC_START1_MODE0, 0xab),
+       QMP_PHY_INIT_CFG(QSERDES_V6_COM_DIV_FRAC_START2_MODE0, 0xaa),
+       QMP_PHY_INIT_CFG(QSERDES_V6_COM_DIV_FRAC_START3_MODE0, 0x01),
+       QMP_PHY_INIT_CFG(QSERDES_V6_COM_HSCLK_HS_SWITCH_SEL_1, 0x00),
+       QMP_PHY_INIT_CFG(QSERDES_V6_COM_BG_TIMER, 0x0a),
+       QMP_PHY_INIT_CFG(QSERDES_V6_COM_SSC_EN_CENTER, 0x01),
+       QMP_PHY_INIT_CFG(QSERDES_V6_COM_SSC_PER1, 0x62),
+       QMP_PHY_INIT_CFG(QSERDES_V6_COM_SSC_PER2, 0x02),
+       QMP_PHY_INIT_CFG(QSERDES_V6_COM_PLL_POST_DIV_MUX, 0x40),
+       QMP_PHY_INIT_CFG(QSERDES_V6_COM_PLL_BIAS_EN_CLK_BUFLR_EN, 0x14),
+       QMP_PHY_INIT_CFG(QSERDES_V6_COM_CLK_ENABLE1, 0x90),
+       QMP_PHY_INIT_CFG(QSERDES_V6_COM_SYS_CLK_CTRL, 0x82),
+       QMP_PHY_INIT_CFG(QSERDES_V6_COM_PLL_IVCO, 0x0f),
+       QMP_PHY_INIT_CFG(QSERDES_V6_COM_SYSCLK_EN_SEL, 0x08),
+       QMP_PHY_INIT_CFG(QSERDES_V6_COM_LOCK_CMP_EN, 0x46),
+       QMP_PHY_INIT_CFG(QSERDES_V6_COM_LOCK_CMP_CFG, 0x04),
+       QMP_PHY_INIT_CFG(QSERDES_V6_COM_VCO_TUNE_MAP, 0x14),
+       QMP_PHY_INIT_CFG(QSERDES_V6_COM_CLK_SELECT, 0x34),
+       QMP_PHY_INIT_CFG(QSERDES_V6_COM_CORE_CLK_EN, 0xa0),
+       QMP_PHY_INIT_CFG(QSERDES_V6_COM_CMN_CONFIG_1, 0x06),
+       QMP_PHY_INIT_CFG(QSERDES_V6_COM_CMN_MISC_1, 0x88),
+       QMP_PHY_INIT_CFG(QSERDES_V6_COM_CMN_MODE, 0x14),
+       QMP_PHY_INIT_CFG(QSERDES_V6_COM_PLL_VCO_DC_LEVEL_CTRL, 0x0f),
+};
+
+static const struct qmp_phy_init_tbl x1e80100_qmp_gen4x2_pcie_ln_shrd_tbl[] = {
+       QMP_PHY_INIT_CFG(QSERDES_V6_LN_SHRD_RXCLK_DIV2_CTRL, 0x01),
+       QMP_PHY_INIT_CFG(QSERDES_V6_LN_SHRD_DFE_DAC_ENABLE1, 0x88),
+       QMP_PHY_INIT_CFG(QSERDES_V6_LN_SHRD_TX_ADAPT_POST_THRESH1, 0x00),
+       QMP_PHY_INIT_CFG(QSERDES_V6_LN_SHRD_TX_ADAPT_POST_THRESH2, 0x1f),
+       QMP_PHY_INIT_CFG(QSERDES_V6_LN_SHRD_RX_MODE_RATE_0_1_B0, 0xd4),
+       QMP_PHY_INIT_CFG(QSERDES_V6_LN_SHRD_RX_MODE_RATE_0_1_B1, 0x12),
+       QMP_PHY_INIT_CFG(QSERDES_V6_LN_SHRD_RX_MODE_RATE_0_1_B2, 0xdb),
+       QMP_PHY_INIT_CFG(QSERDES_V6_LN_SHRD_RX_MODE_RATE_0_1_B3, 0x9a),
+       QMP_PHY_INIT_CFG(QSERDES_V6_LN_SHRD_RX_MODE_RATE_0_1_B4, 0x32),
+       QMP_PHY_INIT_CFG(QSERDES_V6_LN_SHRD_RX_MODE_RATE_0_1_B5, 0xb6),
+       QMP_PHY_INIT_CFG(QSERDES_V6_LN_SHRD_RX_MODE_RATE_0_1_B6, 0x64),
+       QMP_PHY_INIT_CFG(QSERDES_V6_LN_SHRD_RX_MARG_COARSE_THRESH1_RATE210, 0x1f),
+       QMP_PHY_INIT_CFG(QSERDES_V6_LN_SHRD_RX_MARG_COARSE_THRESH1_RATE3, 0x1f),
+       QMP_PHY_INIT_CFG(QSERDES_V6_LN_SHRD_RX_MARG_COARSE_THRESH2_RATE210, 0x1f),
+       QMP_PHY_INIT_CFG(QSERDES_V6_LN_SHRD_RX_MARG_COARSE_THRESH2_RATE3, 0x1f),
+       QMP_PHY_INIT_CFG(QSERDES_V6_LN_SHRD_RX_MARG_COARSE_THRESH3_RATE210, 0x1f),
+       QMP_PHY_INIT_CFG(QSERDES_V6_LN_SHRD_RX_MARG_COARSE_THRESH3_RATE3, 0x1f),
+       QMP_PHY_INIT_CFG(QSERDES_V6_LN_SHRD_RX_MARG_COARSE_THRESH4_RATE3, 0x1f),
+       QMP_PHY_INIT_CFG(QSERDES_V6_LN_SHRD_RX_MARG_COARSE_THRESH5_RATE3, 0x1f),
+       QMP_PHY_INIT_CFG(QSERDES_V6_LN_SHRD_RX_MARG_COARSE_THRESH6_RATE3, 0x1f),
+};
+
+static const struct qmp_phy_init_tbl x1e80100_qmp_gen4x2_pcie_tx_tbl[] = {
+       QMP_PHY_INIT_CFG(QSERDES_V6_20_TX_RES_CODE_LANE_OFFSET_TX, 0x1d),
+       QMP_PHY_INIT_CFG(QSERDES_V6_20_TX_RES_CODE_LANE_OFFSET_RX, 0x03),
+       QMP_PHY_INIT_CFG(QSERDES_V6_20_TX_LANE_MODE_1, 0x01),
+       QMP_PHY_INIT_CFG(QSERDES_V6_20_TX_LANE_MODE_2, 0x10),
+       QMP_PHY_INIT_CFG(QSERDES_V6_20_TX_LANE_MODE_3, 0x51),
+       QMP_PHY_INIT_CFG(QSERDES_V6_20_TX_TRAN_DRVR_EMP_EN, 0x34),
+};
+
+static const struct qmp_phy_init_tbl x1e80100_qmp_gen4x2_pcie_rx_tbl[] = {
+       QMP_PHY_INIT_CFG(QSERDES_V6_20_RX_UCDR_FO_GAIN_RATE_2, 0x0c),
+       QMP_PHY_INIT_CFG(QSERDES_V6_20_RX_UCDR_SO_GAIN_RATE_2, 0x04),
+       QMP_PHY_INIT_CFG(QSERDES_V6_20_RX_UCDR_FO_GAIN_RATE_3, 0x0a),
+       QMP_PHY_INIT_CFG(QSERDES_V6_20_RX_UCDR_PI_CONTROLS, 0x16),
+       QMP_PHY_INIT_CFG(QSERDES_V6_20_RX_UCDR_SO_ACC_DEFAULT_VAL_RATE3, 0x00),
+       QMP_PHY_INIT_CFG(QSERDES_V6_20_RX_IVCM_CAL_CTRL2, 0x80),
+       QMP_PHY_INIT_CFG(QSERDES_V6_20_RX_IVCM_POSTCAL_OFFSET, 0x00),
+       QMP_PHY_INIT_CFG(QSERDES_V6_20_RX_BKUP_CTRL1, 0x15),
+       QMP_PHY_INIT_CFG(QSERDES_V6_20_RX_DFE_1, 0x01),
+       QMP_PHY_INIT_CFG(QSERDES_V6_20_RX_DFE_2, 0x01),
+       QMP_PHY_INIT_CFG(QSERDES_V6_20_RX_DFE_3, 0x45),
+       QMP_PHY_INIT_CFG(QSERDES_V6_20_RX_VGA_CAL_MAN_VAL, 0x0b),
+       QMP_PHY_INIT_CFG(QSERDES_V6_20_RX_GM_CAL, 0x0d),
+       QMP_PHY_INIT_CFG(QSERDES_V6_20_RX_EQU_ADAPTOR_CNTRL4, 0x0b),
+       QMP_PHY_INIT_CFG(QSERDES_V6_20_RX_SIGDET_ENABLES, 0x1c),
+       QMP_PHY_INIT_CFG(QSERDES_V6_20_RX_PHPRE_CTRL, 0x20),
+       QMP_PHY_INIT_CFG(QSERDES_V6_20_RX_DFE_CTLE_POST_CAL_OFFSET, 0x38),
+       QMP_PHY_INIT_CFG(QSERDES_V6_20_RX_Q_PI_INTRINSIC_BIAS_RATE32, 0x39),
+       QMP_PHY_INIT_CFG(QSERDES_V6_20_RX_MODE_RATE2_B0, 0x14),
+       QMP_PHY_INIT_CFG(QSERDES_V6_20_RX_MODE_RATE2_B1, 0xb3),
+       QMP_PHY_INIT_CFG(QSERDES_V6_20_RX_MODE_RATE2_B2, 0x58),
+       QMP_PHY_INIT_CFG(QSERDES_V6_20_RX_MODE_RATE2_B3, 0x9a),
+       QMP_PHY_INIT_CFG(QSERDES_V6_20_RX_MODE_RATE2_B4, 0x26),
+       QMP_PHY_INIT_CFG(QSERDES_V6_20_RX_MODE_RATE2_B5, 0xb6),
+       QMP_PHY_INIT_CFG(QSERDES_V6_20_RX_MODE_RATE2_B6, 0xee),
+       QMP_PHY_INIT_CFG(QSERDES_V6_20_RX_MODE_RATE3_B0, 0xe4),
+       QMP_PHY_INIT_CFG(QSERDES_V6_20_RX_MODE_RATE3_B1, 0xa4),
+       QMP_PHY_INIT_CFG(QSERDES_V6_20_RX_MODE_RATE3_B2, 0x60),
+       QMP_PHY_INIT_CFG(QSERDES_V6_20_RX_MODE_RATE3_B3, 0xdf),
+       QMP_PHY_INIT_CFG(QSERDES_V6_20_RX_MODE_RATE3_B4, 0x4b),
+       QMP_PHY_INIT_CFG(QSERDES_V6_20_RX_MODE_RATE3_B5, 0x76),
+       QMP_PHY_INIT_CFG(QSERDES_V6_20_RX_MODE_RATE3_B6, 0xff),
+};
+
+static const struct qmp_phy_init_tbl x1e80100_qmp_gen4x2_pcie_pcs_tbl[] = {
+       QMP_PHY_INIT_CFG(QPHY_V6_20_PCS_G3S2_PRE_GAIN, 0x2e),
+       QMP_PHY_INIT_CFG(QPHY_V6_20_PCS_RX_SIGDET_LVL, 0xcc),
+       QMP_PHY_INIT_CFG(QPHY_V6_20_PCS_EQ_CONFIG4, 0x00),
+       QMP_PHY_INIT_CFG(QPHY_V6_20_PCS_EQ_CONFIG5, 0x22),
+};
+
+static const struct qmp_phy_init_tbl x1e80100_qmp_gen4x2_pcie_pcs_misc_tbl[] = {
+       QMP_PHY_INIT_CFG(QPHY_PCIE_V6_20_PCS_ENDPOINT_REFCLK_DRIVE, 0xc1),
+       QMP_PHY_INIT_CFG(QPHY_PCIE_V6_20_PCS_OSC_DTCT_ATCIONS, 0x00),
+       QMP_PHY_INIT_CFG(QPHY_PCIE_V6_20_PCS_EQ_CONFIG1, 0x16),
+       QMP_PHY_INIT_CFG(QPHY_PCIE_V6_20_PCS_EQ_CONFIG5, 0x02),
+       QMP_PHY_INIT_CFG(QPHY_PCIE_V6_20_PCS_G4_PRE_GAIN, 0x2e),
+       QMP_PHY_INIT_CFG(QPHY_PCIE_V6_20_PCS_RX_MARGINING_CONFIG1, 0x03),
+       QMP_PHY_INIT_CFG(QPHY_PCIE_V6_20_PCS_RX_MARGINING_CONFIG3, 0x28),
+       QMP_PHY_INIT_CFG(QPHY_PCIE_V6_20_PCS_TX_RX_CONFIG, 0xc0),
+       QMP_PHY_INIT_CFG(QPHY_PCIE_V6_20_PCS_POWER_STATE_CONFIG2, 0x1d),
+       QMP_PHY_INIT_CFG(QPHY_PCIE_V6_20_PCS_RX_MARGINING_CONFIG5, 0x0f),
+       QMP_PHY_INIT_CFG(QPHY_PCIE_V6_20_PCS_G3_FOM_EQ_CONFIG5, 0xf2),
+       QMP_PHY_INIT_CFG(QPHY_PCIE_V6_20_PCS_G4_FOM_EQ_CONFIG5, 0xf2),
+};
+
 static const struct qmp_phy_init_tbl sm8250_qmp_pcie_serdes_tbl[] = {
        QMP_PHY_INIT_CFG(QSERDES_V4_COM_SYSCLK_EN_SEL, 0x08),
        QMP_PHY_INIT_CFG(QSERDES_V4_COM_CLK_SELECT, 0x34),
@@ -1747,7 +1857,7 @@ static const struct qmp_phy_init_tbl sm8550_qmp_gen3x2_pcie_rx_tbl[] = {
        QMP_PHY_INIT_CFG(QSERDES_V6_RX_RX_MODE_10_HIGH2, 0x5b),
        QMP_PHY_INIT_CFG(QSERDES_V6_RX_RX_MODE_10_HIGH3, 0x1a),
        QMP_PHY_INIT_CFG(QSERDES_V6_RX_RX_MODE_10_HIGH4, 0x89),
-       QMP_PHY_INIT_CFG(QSERDES_V6_RX_TX_ADAPT_POST_THRESH, 0xf0),
+       QMP_PHY_INIT_CFG(QSERDES_V6_RX_TX_ADAPT_POST_THRESH, 0x00),
        QMP_PHY_INIT_CFG(QSERDES_V6_RX_UCDR_FO_GAIN, 0x09),
        QMP_PHY_INIT_CFG(QSERDES_V6_RX_UCDR_SO_GAIN, 0x05),
        QMP_PHY_INIT_CFG(QSERDES_V6_RX_UCDR_SB2_THRESH1, 0x08),
@@ -1767,6 +1877,8 @@ static const struct qmp_phy_init_tbl sm8550_qmp_gen3x2_pcie_pcs_tbl[] = {
 };
 
 static const struct qmp_phy_init_tbl sm8550_qmp_gen3x2_pcie_pcs_misc_tbl[] = {
+       QMP_PHY_INIT_CFG(QPHY_PCIE_V6_PCS_PCIE_EQ_CONFIG1, 0x1e),
+       QMP_PHY_INIT_CFG(QPHY_PCIE_V6_PCS_PCIE_RXEQEVAL_TIME, 0x27),
        QMP_PHY_INIT_CFG(QPHY_PCIE_V6_PCS_PCIE_POWER_STATE_CONFIG2, 0x1d),
        QMP_PHY_INIT_CFG(QPHY_PCIE_V6_PCS_PCIE_POWER_STATE_CONFIG4, 0x07),
        QMP_PHY_INIT_CFG(QPHY_PCIE_V6_PCS_PCIE_ENDPOINT_REFCLK_DRIVE, 0xc1),
@@ -1823,10 +1935,9 @@ static const struct qmp_phy_init_tbl sm8550_qmp_gen4x2_pcie_serdes_tbl[] = {
 
 static const struct qmp_phy_init_tbl sm8550_qmp_gen4x2_pcie_ln_shrd_tbl[] = {
        QMP_PHY_INIT_CFG(QSERDES_V6_LN_SHRD_RXCLK_DIV2_CTRL, 0x01),
-       QMP_PHY_INIT_CFG(QSERDES_V6_LN_SHRD_RX_Q_EN_RATES, 0xe),
        QMP_PHY_INIT_CFG(QSERDES_V6_LN_SHRD_DFE_DAC_ENABLE1, 0x00),
-       QMP_PHY_INIT_CFG(QSERDES_V6_LN_SHRD_TX_ADAPT_POST_THRESH1, 0x00),
-       QMP_PHY_INIT_CFG(QSERDES_V6_LN_SHRD_TX_ADAPT_POST_THRESH2, 0x1f),
+       QMP_PHY_INIT_CFG(QSERDES_V6_LN_SHRD_TX_ADAPT_POST_THRESH1, 0x02),
+       QMP_PHY_INIT_CFG(QSERDES_V6_LN_SHRD_TX_ADAPT_POST_THRESH2, 0x0d),
        QMP_PHY_INIT_CFG(QSERDES_V6_LN_SHRD_RX_MODE_RATE_0_1_B0, 0x12),
        QMP_PHY_INIT_CFG(QSERDES_V6_LN_SHRD_RX_MODE_RATE_0_1_B1, 0x12),
        QMP_PHY_INIT_CFG(QSERDES_V6_LN_SHRD_RX_MODE_RATE_0_1_B2, 0xdb),
@@ -1843,6 +1954,7 @@ static const struct qmp_phy_init_tbl sm8550_qmp_gen4x2_pcie_ln_shrd_tbl[] = {
        QMP_PHY_INIT_CFG(QSERDES_V6_LN_SHRD_RX_MARG_COARSE_THRESH4_RATE3, 0x1f),
        QMP_PHY_INIT_CFG(QSERDES_V6_LN_SHRD_RX_MARG_COARSE_THRESH5_RATE3, 0x1f),
        QMP_PHY_INIT_CFG(QSERDES_V6_LN_SHRD_RX_MARG_COARSE_THRESH6_RATE3, 0x1f),
+       QMP_PHY_INIT_CFG(QSERDES_V6_LN_SHRD_RX_SUMMER_CAL_SPD_MODE, 0x5b),
 };
 
 static const struct qmp_phy_init_tbl sm8550_qmp_gen4x2_pcie_tx_tbl[] = {
@@ -1855,13 +1967,15 @@ static const struct qmp_phy_init_tbl sm8550_qmp_gen4x2_pcie_tx_tbl[] = {
 };
 
 static const struct qmp_phy_init_tbl sm8550_qmp_gen4x2_pcie_rx_tbl[] = {
-       QMP_PHY_INIT_CFG(QSERDES_V6_20_RX_UCDR_FO_GAIN_RATE_2, 0x0a),
+       QMP_PHY_INIT_CFG(QSERDES_V6_20_RX_UCDR_FO_GAIN_RATE_2, 0x0c),
        QMP_PHY_INIT_CFG(QSERDES_V6_20_RX_UCDR_FO_GAIN_RATE_3, 0x0a),
+       QMP_PHY_INIT_CFG(QSERDES_V6_20_RX_UCDR_SO_GAIN_RATE_2, 0x04),
        QMP_PHY_INIT_CFG(QSERDES_V6_20_RX_UCDR_PI_CONTROLS, 0x16),
        QMP_PHY_INIT_CFG(QSERDES_V6_20_RX_UCDR_SO_ACC_DEFAULT_VAL_RATE3, 0x00),
        QMP_PHY_INIT_CFG(QSERDES_V6_20_RX_IVCM_CAL_CTRL2, 0x80),
        QMP_PHY_INIT_CFG(QSERDES_V6_20_RX_IVCM_POSTCAL_OFFSET, 0x7c),
        QMP_PHY_INIT_CFG(QSERDES_V6_20_RX_DFE_3, 0x05),
+       QMP_PHY_INIT_CFG(QSERDES_V6_20_RX_TX_ADPT_CTRL, 0x10),
        QMP_PHY_INIT_CFG(QSERDES_V6_20_RX_VGA_CAL_MAN_VAL, 0x0a),
        QMP_PHY_INIT_CFG(QSERDES_V6_20_RX_GM_CAL, 0x0d),
        QMP_PHY_INIT_CFG(QSERDES_V6_20_RX_EQU_ADAPTOR_CNTRL4, 0x0b),
@@ -1883,11 +1997,13 @@ static const struct qmp_phy_init_tbl sm8550_qmp_gen4x2_pcie_rx_tbl[] = {
        QMP_PHY_INIT_CFG(QSERDES_V6_20_RX_MODE_RATE3_B4, 0x78),
        QMP_PHY_INIT_CFG(QSERDES_V6_20_RX_MODE_RATE3_B5, 0x76),
        QMP_PHY_INIT_CFG(QSERDES_V6_20_RX_MODE_RATE3_B6, 0xff),
+       QMP_PHY_INIT_CFG(QSERDES_V6_20_VGA_CAL_CNTRL1, 0x00),
 };
 
 static const struct qmp_phy_init_tbl sm8550_qmp_gen4x2_pcie_pcs_tbl[] = {
+       QMP_PHY_INIT_CFG(QPHY_V6_20_PCS_G12S1_TXDEEMPH_M6DB, 0x17),
        QMP_PHY_INIT_CFG(QPHY_V6_20_PCS_G3S2_PRE_GAIN, 0x2e),
-       QMP_PHY_INIT_CFG(QPHY_V6_20_PCS_COM_ELECIDLE_DLY_SEL, 0x25),
+       QMP_PHY_INIT_CFG(QPHY_V6_20_PCS_RX_SIGDET_LVL, 0xcc),
        QMP_PHY_INIT_CFG(QPHY_V6_20_PCS_EQ_CONFIG4, 0x00),
        QMP_PHY_INIT_CFG(QPHY_V6_20_PCS_EQ_CONFIG5, 0x22),
        QMP_PHY_INIT_CFG(QPHY_V6_20_PCS_TX_RX_CONFIG1, 0x04),
@@ -1898,6 +2014,8 @@ static const struct qmp_phy_init_tbl sm8550_qmp_gen4x2_pcie_pcs_misc_tbl[] = {
        QMP_PHY_INIT_CFG(QPHY_PCIE_V6_20_PCS_ENDPOINT_REFCLK_DRIVE, 0xc1),
        QMP_PHY_INIT_CFG(QPHY_PCIE_V6_20_PCS_OSC_DTCT_ATCIONS, 0x00),
        QMP_PHY_INIT_CFG(QPHY_PCIE_V6_20_PCS_EQ_CONFIG1, 0x16),
+       QMP_PHY_INIT_CFG(QPHY_PCIE_V6_20_PCS_G3_RXEQEVAL_TIME, 0x27),
+       QMP_PHY_INIT_CFG(QPHY_PCIE_V6_20_PCS_G4_RXEQEVAL_TIME, 0x27),
        QMP_PHY_INIT_CFG(QPHY_PCIE_V6_20_PCS_EQ_CONFIG5, 0x02),
        QMP_PHY_INIT_CFG(QPHY_PCIE_V6_20_PCS_G4_PRE_GAIN, 0x2e),
        QMP_PHY_INIT_CFG(QPHY_PCIE_V6_20_PCS_RX_MARGINING_CONFIG1, 0x03),
@@ -2936,7 +3054,7 @@ static const struct qmp_phy_cfg sdx65_qmp_pciephy_cfg = {
        .num_resets             = ARRAY_SIZE(sdm845_pciephy_reset_l),
        .vreg_list              = qmp_phy_vreg_l,
        .num_vregs              = ARRAY_SIZE(qmp_phy_vreg_l),
-       .regs                   = pciephy_v5_regs_layout,
+       .regs                   = pciephy_v6_regs_layout,
 
        .pwrdn_ctrl             = SW_PWRDN,
        .phy_status             = PHYSTATUS_4_20,
@@ -3069,7 +3187,7 @@ static const struct qmp_phy_cfg sm8550_qmp_gen4x2_pciephy_cfg = {
        .num_resets             = ARRAY_SIZE(sdm845_pciephy_reset_l),
        .vreg_list              = sm8550_qmp_phy_vreg_l,
        .num_vregs              = ARRAY_SIZE(sm8550_qmp_phy_vreg_l),
-       .regs                   = pciephy_v5_regs_layout,
+       .regs                   = pciephy_v6_regs_layout,
 
        .pwrdn_ctrl             = SW_PWRDN | REFCLK_DRV_DSBL,
        .phy_status             = PHYSTATUS_4_20,
@@ -3099,7 +3217,7 @@ static const struct qmp_phy_cfg sm8650_qmp_gen4x2_pciephy_cfg = {
        .num_resets             = ARRAY_SIZE(sdm845_pciephy_reset_l),
        .vreg_list              = sm8550_qmp_phy_vreg_l,
        .num_vregs              = ARRAY_SIZE(sm8550_qmp_phy_vreg_l),
-       .regs                   = pciephy_v5_regs_layout,
+       .regs                   = pciephy_v6_regs_layout,
 
        .pwrdn_ctrl             = SW_PWRDN | REFCLK_DRV_DSBL,
        .phy_status             = PHYSTATUS_4_20,
@@ -3183,31 +3301,35 @@ static const struct qmp_phy_cfg sa8775p_qmp_gen4x4_pciephy_cfg = {
        .phy_status             = PHYSTATUS_4_20,
 };
 
-static void qmp_pcie_configure_lane(void __iomem *base,
-                                       const struct qmp_phy_init_tbl tbl[],
-                                       int num,
-                                       u8 lane_mask)
-{
-       int i;
-       const struct qmp_phy_init_tbl *t = tbl;
-
-       if (!t)
-               return;
+static const struct qmp_phy_cfg x1e80100_qmp_gen4x2_pciephy_cfg = {
+       .lanes = 2,
 
-       for (i = 0; i < num; i++, t++) {
-               if (!(t->lane_mask & lane_mask))
-                       continue;
+       .offsets                = &qmp_pcie_offsets_v6_20,
 
-               writel(t->val, base + t->offset);
-       }
-}
+       .tbls = {
+               .serdes                 = x1e80100_qmp_gen4x2_pcie_serdes_tbl,
+               .serdes_num             = ARRAY_SIZE(x1e80100_qmp_gen4x2_pcie_serdes_tbl),
+               .tx                     = x1e80100_qmp_gen4x2_pcie_tx_tbl,
+               .tx_num                 = ARRAY_SIZE(x1e80100_qmp_gen4x2_pcie_tx_tbl),
+               .rx                     = x1e80100_qmp_gen4x2_pcie_rx_tbl,
+               .rx_num                 = ARRAY_SIZE(x1e80100_qmp_gen4x2_pcie_rx_tbl),
+               .pcs                    = x1e80100_qmp_gen4x2_pcie_pcs_tbl,
+               .pcs_num                = ARRAY_SIZE(x1e80100_qmp_gen4x2_pcie_pcs_tbl),
+               .pcs_misc               = x1e80100_qmp_gen4x2_pcie_pcs_misc_tbl,
+               .pcs_misc_num           = ARRAY_SIZE(x1e80100_qmp_gen4x2_pcie_pcs_misc_tbl),
+               .ln_shrd                = x1e80100_qmp_gen4x2_pcie_ln_shrd_tbl,
+               .ln_shrd_num            = ARRAY_SIZE(x1e80100_qmp_gen4x2_pcie_ln_shrd_tbl),
+       },
+       .reset_list             = sdm845_pciephy_reset_l,
+       .num_resets             = ARRAY_SIZE(sdm845_pciephy_reset_l),
+       .vreg_list              = sm8550_qmp_phy_vreg_l,
+       .num_vregs              = ARRAY_SIZE(sm8550_qmp_phy_vreg_l),
+       .regs                   = pciephy_v6_regs_layout,
 
-static void qmp_pcie_configure(void __iomem *base,
-                                       const struct qmp_phy_init_tbl tbl[],
-                                       int num)
-{
-       qmp_pcie_configure_lane(base, tbl, num, 0xff);
-}
+       .pwrdn_ctrl             = SW_PWRDN | REFCLK_DRV_DSBL,
+       .phy_status             = PHYSTATUS_4_20,
+       .has_nocsr_reset        = true,
+};
 
 static void qmp_pcie_init_port_b(struct qmp_pcie *qmp, const struct qmp_phy_cfg_tbls *tbls)
 {
@@ -3220,11 +3342,11 @@ static void qmp_pcie_init_port_b(struct qmp_pcie *qmp, const struct qmp_phy_cfg_
        tx4 = qmp->port_b + offs->tx2;
        rx4 = qmp->port_b + offs->rx2;
 
-       qmp_pcie_configure_lane(tx3, tbls->tx, tbls->tx_num, 1);
-       qmp_pcie_configure_lane(rx3, tbls->rx, tbls->rx_num, 1);
+       qmp_configure_lane(tx3, tbls->tx, tbls->tx_num, 1);
+       qmp_configure_lane(rx3, tbls->rx, tbls->rx_num, 1);
 
-       qmp_pcie_configure_lane(tx4, tbls->tx, tbls->tx_num, 2);
-       qmp_pcie_configure_lane(rx4, tbls->rx, tbls->rx_num, 2);
+       qmp_configure_lane(tx4, tbls->tx, tbls->tx_num, 2);
+       qmp_configure_lane(rx4, tbls->rx, tbls->rx_num, 2);
 }
 
 static void qmp_pcie_init_registers(struct qmp_pcie *qmp, const struct qmp_phy_cfg_tbls *tbls)
@@ -3242,25 +3364,25 @@ static void qmp_pcie_init_registers(struct qmp_pcie *qmp, const struct qmp_phy_c
        if (!tbls)
                return;
 
-       qmp_pcie_configure(serdes, tbls->serdes, tbls->serdes_num);
+       qmp_configure(serdes, tbls->serdes, tbls->serdes_num);
 
-       qmp_pcie_configure_lane(tx, tbls->tx, tbls->tx_num, 1);
-       qmp_pcie_configure_lane(rx, tbls->rx, tbls->rx_num, 1);
+       qmp_configure_lane(tx, tbls->tx, tbls->tx_num, 1);
+       qmp_configure_lane(rx, tbls->rx, tbls->rx_num, 1);
 
        if (cfg->lanes >= 2) {
-               qmp_pcie_configure_lane(tx2, tbls->tx, tbls->tx_num, 2);
-               qmp_pcie_configure_lane(rx2, tbls->rx, tbls->rx_num, 2);
+               qmp_configure_lane(tx2, tbls->tx, tbls->tx_num, 2);
+               qmp_configure_lane(rx2, tbls->rx, tbls->rx_num, 2);
        }
 
-       qmp_pcie_configure(pcs, tbls->pcs, tbls->pcs_num);
-       qmp_pcie_configure(pcs_misc, tbls->pcs_misc, tbls->pcs_misc_num);
+       qmp_configure(pcs, tbls->pcs, tbls->pcs_num);
+       qmp_configure(pcs_misc, tbls->pcs_misc, tbls->pcs_misc_num);
 
        if (cfg->lanes >= 4 && qmp->tcsr_4ln_config) {
-               qmp_pcie_configure(serdes, cfg->serdes_4ln_tbl, cfg->serdes_4ln_num);
+               qmp_configure(serdes, cfg->serdes_4ln_tbl, cfg->serdes_4ln_num);
                qmp_pcie_init_port_b(qmp, tbls);
        }
 
-       qmp_pcie_configure(ln_shrd, tbls->ln_shrd, tbls->ln_shrd_num);
+       qmp_configure(ln_shrd, tbls->ln_shrd, tbls->ln_shrd_num);
 }
 
 static int qmp_pcie_init(struct phy *phy)
@@ -3885,6 +4007,12 @@ static const struct of_device_id qmp_pcie_of_match_table[] = {
        }, {
                .compatible = "qcom,sm8650-qmp-gen4x2-pcie-phy",
                .data = &sm8650_qmp_gen4x2_pciephy_cfg,
+       }, {
+               .compatible = "qcom,x1e80100-qmp-gen3x2-pcie-phy",
+               .data = &sm8550_qmp_gen3x2_pciephy_cfg,
+       }, {
+               .compatible = "qcom,x1e80100-qmp-gen4x2-pcie-phy",
+               .data = &x1e80100_qmp_gen4x2_pciephy_cfg,
        },
        { },
 };
index 91e70002eb4777a2b0089f0f53787a8d32ef49f9..0ca79333d94261610f7274968c96362dcfb1f354 100644 (file)
@@ -7,6 +7,8 @@
 #define QCOM_PHY_QMP_PCS_PCIE_V6_H_
 
 /* Only for QMP V6 PHY - PCIE have different offsets than V5 */
+#define QPHY_PCIE_V6_PCS_PCIE_EQ_CONFIG1               0xa4
+#define QPHY_PCIE_V6_PCS_PCIE_RXEQEVAL_TIME            0xf4
 #define QPHY_PCIE_V6_PCS_PCIE_POWER_STATE_CONFIG2      0x0c
 #define QPHY_PCIE_V6_PCS_PCIE_POWER_STATE_CONFIG4      0x14
 #define QPHY_PCIE_V6_PCS_PCIE_ENDPOINT_REFCLK_DRIVE    0x20
index e3eb08776339d1d6d3bb6ea95259ceb20947bee3..dfcecf31a6060981b912646fcdf5db83906f5835 100644 (file)
@@ -12,6 +12,8 @@
 #define QPHY_PCIE_V6_20_PCS_ENDPOINT_REFCLK_DRIVE      0x01c
 #define QPHY_PCIE_V6_20_PCS_OSC_DTCT_ATCIONS           0x090
 #define QPHY_PCIE_V6_20_PCS_EQ_CONFIG1                 0x0a0
+#define QPHY_PCIE_V6_20_PCS_G3_RXEQEVAL_TIME           0x0f0
+#define QPHY_PCIE_V6_20_PCS_G4_RXEQEVAL_TIME           0x0f4
 #define QPHY_PCIE_V6_20_PCS_EQ_CONFIG5                 0x108
 #define QPHY_PCIE_V6_20_PCS_G4_PRE_GAIN                        0x15c
 #define QPHY_PCIE_V6_20_PCS_RX_MARGINING_CONFIG1       0x17c
diff --git a/drivers/phy/qualcomm/phy-qcom-qmp-pcs-sgmii.h b/drivers/phy/qualcomm/phy-qcom-qmp-pcs-sgmii.h
new file mode 100644 (file)
index 0000000..4d8c962
--- /dev/null
@@ -0,0 +1,20 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * Copyright (c) 2023, Linaro Limited
+ */
+
+#ifndef QCOM_PHY_QMP_PCS_SGMII_H_
+#define QCOM_PHY_QMP_PCS_SGMII_H_
+
+#define QPHY_PCS_PHY_START                             0x000
+#define QPHY_PCS_POWER_DOWN_CONTROL                    0x004
+#define QPHY_PCS_SW_RESET                              0x008
+#define QPHY_PCS_LINE_RESET_TIME                       0x00c
+#define QPHY_PCS_TX_LARGE_AMP_DRV_LVL                  0x020
+#define QPHY_PCS_TX_SMALL_AMP_DRV_LVL                  0x028
+#define QPHY_PCS_PCS_READY_STATUS                      0x094
+#define QPHY_PCS_TX_MID_TERM_CTRL1                     0x0d8
+#define QPHY_PCS_TX_MID_TERM_CTRL2                     0x0dc
+#define QPHY_PCS_SGMII_MISC_CTRL8                      0x118
+
+#endif
index fe6c450f612382b281ccceab453a086373a27ab0..970cc0667809465d83ba881f771c2f270c0cabac 100644 (file)
@@ -19,6 +19,7 @@
 #define QPHY_V6_PCS_UFS_BIST_FIXED_PAT_CTRL            0x060
 #define QPHY_V6_PCS_UFS_TX_HSGEAR_CAPABILITY           0x074
 #define QPHY_V6_PCS_UFS_RX_HSGEAR_CAPABILITY           0x0bc
+#define QPHY_V6_PCS_UFS_RX_HS_G5_SYNC_LENGTH_CAPABILITY        0x12c
 #define QPHY_V6_PCS_UFS_DEBUG_BUS_CLKSEL               0x158
 #define QPHY_V6_PCS_UFS_LINECFG_DISABLE                        0x17c
 #define QPHY_V6_PCS_UFS_RX_MIN_HIBERN8_TIME            0x184
@@ -28,5 +29,6 @@
 #define QPHY_V6_PCS_UFS_READY_STATUS                   0x1a8
 #define QPHY_V6_PCS_UFS_TX_MID_TERM_CTRL1              0x1f4
 #define QPHY_V6_PCS_UFS_MULTI_LANE_CTRL1               0x1fc
+#define QPHY_V6_PCS_UFS_RX_HSG5_SYNC_WAIT_TIME         0x220
 
 #endif
index 9c3f1e4950e6ba4dffeaddb5c50e24df2f3cbe90..4d9615cc0383cee27f312cbb39771d7ae79f71bb 100644 (file)
@@ -7,6 +7,7 @@
 #define QCOM_PHY_QMP_PCS_V6_20_H_
 
 /* Only for QMP V6_20 PHY - USB/PCIe PCS registers */
+#define QPHY_V6_20_PCS_G12S1_TXDEEMPH_M6DB             0x170
 #define QPHY_V6_20_PCS_G3S2_PRE_GAIN                   0x178
 #define QPHY_V6_20_PCS_RX_SIGDET_LVL                   0x190
 #define QPHY_V6_20_PCS_COM_ELECIDLE_DLY_SEL            0x1b8
index ec7291424dd1f1bb7f706bbb5b77419f76d0bfda..328c6c0b0b09ae4ff5bf14e846772e6d0f31ce5a 100644 (file)
@@ -60,6 +60,8 @@
 #define QSERDES_V6_COM_SYSCLK_BUF_ENABLE                       0xe8
 #define QSERDES_V6_COM_PLL_IVCO                                        0xf4
 #define QSERDES_V6_COM_PLL_IVCO_MODE1                          0xf8
+#define QSERDES_V6_COM_CMN_IETRIM                              0xfc
+#define QSERDES_V6_COM_CMN_IPTRIM                              0x100
 #define QSERDES_V6_COM_SYSCLK_EN_SEL                           0x110
 #define QSERDES_V6_COM_RESETSM_CNTRL                           0x118
 #define QSERDES_V6_COM_LOCK_CMP_EN                             0x120
index 35d497fd9f9a4420e8c02ae8dd91de011c94c14a..d9a87bd95590811de5484e084d4fea1872b22bcb 100644 (file)
 
 #define QSERDES_UFS_V6_RX_UCDR_FASTLOCK_FO_GAIN_RATE2          0x08
 #define QSERDES_UFS_V6_RX_UCDR_FASTLOCK_FO_GAIN_RATE4          0x10
+#define QSERDES_UFS_V6_RX_UCDR_FASTLOCK_SO_GAIN_RATE4          0x24
 #define QSERDES_UFS_V6_RX_UCDR_SO_SATURATION                   0x28
+#define QSERDES_UFS_V6_RX_UCDR_FASTLOCK_COUNT_HIGH_RATE4       0x54
 #define QSERDES_UFS_V6_RX_UCDR_PI_CTRL1                                0x58
 #define QSERDES_UFS_V6_RX_RX_TERM_BW_CTRL0                     0xc4
 #define QSERDES_UFS_V6_RX_UCDR_FO_GAIN_RATE2                   0xd4
 #define QSERDES_UFS_V6_RX_UCDR_FO_GAIN_RATE4                   0xdc
+#define QSERDES_UFS_V6_RX_UCDR_SO_GAIN_RATE4                   0xf0
+#define QSERDES_UFS_V6_RX_UCDR_PI_CONTROLS                     0xf4
 #define QSERDES_UFS_V6_RX_VGA_CAL_MAN_VAL                      0x178
+#define QSERDES_UFS_V6_RX_EQ_OFFSET_ADAPTOR_CNTRL1             0x1bc
 #define QSERDES_UFS_V6_RX_INTERFACE_MODE                       0x1e0
+#define QSERDES_UFS_V6_RX_OFFSET_ADAPTOR_CNTRL3                        0x1c4
 #define QSERDES_UFS_V6_RX_MODE_RATE_0_1_B0                     0x208
 #define QSERDES_UFS_V6_RX_MODE_RATE_0_1_B1                     0x20c
 #define QSERDES_UFS_V6_RX_MODE_RATE_0_1_B3                     0x214
@@ -33,6 +39,8 @@
 #define QSERDES_UFS_V6_RX_MODE_RATE3_B5                                0x264
 #define QSERDES_UFS_V6_RX_MODE_RATE3_B8                                0x270
 #define QSERDES_UFS_V6_RX_MODE_RATE4_B3                                0x280
+#define QSERDES_UFS_V6_RX_MODE_RATE4_B4                                0x284
 #define QSERDES_UFS_V6_RX_MODE_RATE4_B6                                0x28c
+#define QSERDES_UFS_V6_RX_DLL0_FTUNE_CTRL                      0x2f8
 
 #endif
index 6ed5339fd2ea86dd4a69df077887ea9a0713dcdd..7bac5d5c6c34485bdb357bf3a3caa7658ba07aa0 100644 (file)
@@ -23,6 +23,8 @@
 #define QSERDES_V6_20_RX_DFE_1                                 0xac
 #define QSERDES_V6_20_RX_DFE_2                                 0xb0
 #define QSERDES_V6_20_RX_DFE_3                                 0xb4
+#define QSERDES_V6_20_RX_TX_ADPT_CTRL                          0xd4
+#define QSERDES_V6_20_VGA_CAL_CNTRL1                           0xe0
 #define QSERDES_V6_20_RX_VGA_CAL_MAN_VAL                       0xe8
 #define QSERDES_V6_20_RX_GM_CAL                                        0x10c
 #define QSERDES_V6_20_RX_EQU_ADAPTOR_CNTRL4                    0x120
index 3c2e6255e26f66d21fec72595c680e5c2cccc9c4..590432d581f97ded35047cd62695f7a72e76dc4e 100644 (file)
@@ -20,6 +20,9 @@
 #include <linux/slab.h>
 
 #include <ufs/unipro.h>
+
+#include "phy-qcom-qmp-common.h"
+
 #include "phy-qcom-qmp.h"
 #include "phy-qcom-qmp-pcs-ufs-v2.h"
 #include "phy-qcom-qmp-pcs-ufs-v3.h"
 
 #include "phy-qcom-qmp-qserdes-txrx-ufs-v6.h"
 
-/* QPHY_SW_RESET bit */
-#define SW_RESET                               BIT(0)
-/* QPHY_POWER_DOWN_CONTROL */
-#define SW_PWRDN                               BIT(0)
-/* QPHY_START_CONTROL bits */
-#define SERDES_START                           BIT(0)
-#define PCS_START                              BIT(1)
 /* QPHY_PCS_READY_STATUS bit */
 #define PCS_READY                              BIT(0)
 
 #define PHY_INIT_COMPLETE_TIMEOUT              10000
 
-struct qmp_phy_init_tbl {
-       unsigned int offset;
-       unsigned int val;
-       /*
-        * mask of lanes for which this register is written
-        * for cases when second lane needs different values
-        */
-       u8 lane_mask;
-};
-
-#define QMP_PHY_INIT_CFG(o, v)         \
-       {                               \
-               .offset = o,            \
-               .val = v,               \
-               .lane_mask = 0xff,      \
-       }
-
-#define QMP_PHY_INIT_CFG_LANE(o, v, l) \
-       {                               \
-               .offset = o,            \
-               .val = v,               \
-               .lane_mask = l,         \
-       }
+#define NUM_OVERLAY                            2
 
 /* set of registers with offsets different per-PHY */
 enum qphy_reg_layout {
@@ -754,15 +728,22 @@ static const struct qmp_phy_init_tbl sm8550_ufsphy_serdes[] = {
        QMP_PHY_INIT_CFG(QSERDES_V6_COM_HSCLK_SEL_1, 0x11),
        QMP_PHY_INIT_CFG(QSERDES_V6_COM_HSCLK_HS_SWITCH_SEL_1, 0x00),
        QMP_PHY_INIT_CFG(QSERDES_V6_COM_LOCK_CMP_EN, 0x01),
-       QMP_PHY_INIT_CFG(QSERDES_V6_COM_VCO_TUNE_MAP, 0x04),
-       QMP_PHY_INIT_CFG(QSERDES_V6_COM_PLL_IVCO, 0x0f),
        QMP_PHY_INIT_CFG(QSERDES_V6_COM_VCO_TUNE_INITVAL2, 0x00),
        QMP_PHY_INIT_CFG(QSERDES_V6_COM_DEC_START_MODE0, 0x41),
-       QMP_PHY_INIT_CFG(QSERDES_V6_COM_CP_CTRL_MODE0, 0x0a),
        QMP_PHY_INIT_CFG(QSERDES_V6_COM_PLL_RCTRL_MODE0, 0x18),
        QMP_PHY_INIT_CFG(QSERDES_V6_COM_PLL_CCTRL_MODE0, 0x14),
        QMP_PHY_INIT_CFG(QSERDES_V6_COM_LOCK_CMP1_MODE0, 0x7f),
        QMP_PHY_INIT_CFG(QSERDES_V6_COM_LOCK_CMP2_MODE0, 0x06),
+};
+
+static const struct qmp_phy_init_tbl sm8550_ufsphy_hs_b_serdes[] = {
+       QMP_PHY_INIT_CFG(QSERDES_V6_COM_VCO_TUNE_MAP, 0x44),
+};
+
+static const struct qmp_phy_init_tbl sm8550_ufsphy_g4_serdes[] = {
+       QMP_PHY_INIT_CFG(QSERDES_V6_COM_VCO_TUNE_MAP, 0x04),
+       QMP_PHY_INIT_CFG(QSERDES_V6_COM_PLL_IVCO, 0x0f),
+       QMP_PHY_INIT_CFG(QSERDES_V6_COM_CP_CTRL_MODE0, 0x0a),
        QMP_PHY_INIT_CFG(QSERDES_V6_COM_DEC_START_MODE1, 0x4c),
        QMP_PHY_INIT_CFG(QSERDES_V6_COM_CP_CTRL_MODE1, 0x0a),
        QMP_PHY_INIT_CFG(QSERDES_V6_COM_PLL_RCTRL_MODE1, 0x18),
@@ -771,19 +752,24 @@ static const struct qmp_phy_init_tbl sm8550_ufsphy_serdes[] = {
        QMP_PHY_INIT_CFG(QSERDES_V6_COM_LOCK_CMP2_MODE1, 0x07),
 };
 
-static const struct qmp_phy_init_tbl sm8550_ufsphy_hs_b_serdes[] = {
-       QMP_PHY_INIT_CFG(QSERDES_V6_COM_VCO_TUNE_MAP, 0x44),
+static const struct qmp_phy_init_tbl sm8550_ufsphy_g5_serdes[] = {
+       QMP_PHY_INIT_CFG(QSERDES_V6_COM_PLL_IVCO, 0x1f),
+       QMP_PHY_INIT_CFG(QSERDES_V6_COM_CMN_IETRIM, 0x1b),
+       QMP_PHY_INIT_CFG(QSERDES_V6_COM_CMN_IPTRIM, 0x1c),
+       QMP_PHY_INIT_CFG(QSERDES_V6_COM_CP_CTRL_MODE0, 0x06),
 };
 
 static const struct qmp_phy_init_tbl sm8550_ufsphy_tx[] = {
        QMP_PHY_INIT_CFG(QSERDES_UFS_V6_TX_LANE_MODE_1, 0x05),
        QMP_PHY_INIT_CFG(QSERDES_UFS_V6_TX_RES_CODE_LANE_OFFSET_TX, 0x07),
+};
+
+static const struct qmp_phy_init_tbl sm8550_ufsphy_g4_tx[] = {
        QMP_PHY_INIT_CFG(QSERDES_UFS_V6_TX_FR_DCC_CTRL, 0x4c),
 };
 
 static const struct qmp_phy_init_tbl sm8550_ufsphy_rx[] = {
        QMP_PHY_INIT_CFG(QSERDES_UFS_V6_RX_UCDR_FO_GAIN_RATE2, 0x0c),
-       QMP_PHY_INIT_CFG(QSERDES_UFS_V6_RX_VGA_CAL_MAN_VAL, 0x0e),
 
        QMP_PHY_INIT_CFG(QSERDES_UFS_V6_RX_MODE_RATE_0_1_B0, 0xc2),
        QMP_PHY_INIT_CFG(QSERDES_UFS_V6_RX_MODE_RATE_0_1_B1, 0xc2),
@@ -799,16 +785,45 @@ static const struct qmp_phy_init_tbl sm8550_ufsphy_rx[] = {
        QMP_PHY_INIT_CFG(QSERDES_UFS_V6_RX_MODE_RATE3_B8, 0x02),
 };
 
+static const struct qmp_phy_init_tbl sm8550_ufsphy_g4_rx[] = {
+       QMP_PHY_INIT_CFG(QSERDES_UFS_V6_RX_VGA_CAL_MAN_VAL, 0x0e),
+};
+
+static const struct qmp_phy_init_tbl sm8550_ufsphy_g5_rx[] = {
+       QMP_PHY_INIT_CFG(QSERDES_UFS_V6_RX_UCDR_FO_GAIN_RATE4, 0x0c),
+       QMP_PHY_INIT_CFG(QSERDES_UFS_V6_RX_UCDR_SO_GAIN_RATE4, 0x04),
+       QMP_PHY_INIT_CFG(QSERDES_UFS_V6_RX_EQ_OFFSET_ADAPTOR_CNTRL1, 0x14),
+       QMP_PHY_INIT_CFG(QSERDES_UFS_V6_RX_UCDR_PI_CONTROLS, 0x07),
+       QMP_PHY_INIT_CFG(QSERDES_UFS_V6_RX_OFFSET_ADAPTOR_CNTRL3, 0x0e),
+       QMP_PHY_INIT_CFG(QSERDES_UFS_V6_RX_UCDR_FASTLOCK_COUNT_HIGH_RATE4, 0x02),
+       QMP_PHY_INIT_CFG(QSERDES_UFS_V6_RX_UCDR_FASTLOCK_FO_GAIN_RATE4, 0x1c),
+       QMP_PHY_INIT_CFG(QSERDES_UFS_V6_RX_UCDR_FASTLOCK_SO_GAIN_RATE4, 0x06),
+       QMP_PHY_INIT_CFG(QSERDES_UFS_V6_RX_VGA_CAL_MAN_VAL, 0x08),
+       QMP_PHY_INIT_CFG(QSERDES_UFS_V6_RX_MODE_RATE4_B3, 0xb9),
+       QMP_PHY_INIT_CFG(QSERDES_UFS_V6_RX_MODE_RATE4_B4, 0x4f),
+       QMP_PHY_INIT_CFG(QSERDES_UFS_V6_RX_MODE_RATE4_B6, 0xff),
+       QMP_PHY_INIT_CFG(QSERDES_UFS_V6_RX_DLL0_FTUNE_CTRL, 0x30),
+};
+
 static const struct qmp_phy_init_tbl sm8550_ufsphy_pcs[] = {
        QMP_PHY_INIT_CFG(QPHY_V6_PCS_UFS_RX_SIGDET_CTRL2, 0x69),
        QMP_PHY_INIT_CFG(QPHY_V6_PCS_UFS_TX_LARGE_AMP_DRV_LVL, 0x0f),
        QMP_PHY_INIT_CFG(QPHY_V6_PCS_UFS_TX_MID_TERM_CTRL1, 0x43),
-       QMP_PHY_INIT_CFG(QPHY_V6_PCS_UFS_PLL_CNTL, 0x2b),
        QMP_PHY_INIT_CFG(QPHY_V6_PCS_UFS_MULTI_LANE_CTRL1, 0x02),
+};
+
+static const struct qmp_phy_init_tbl sm8550_ufsphy_g4_pcs[] = {
+       QMP_PHY_INIT_CFG(QPHY_V6_PCS_UFS_PLL_CNTL, 0x2b),
        QMP_PHY_INIT_CFG(QPHY_V6_PCS_UFS_TX_HSGEAR_CAPABILITY, 0x04),
        QMP_PHY_INIT_CFG(QPHY_V6_PCS_UFS_RX_HSGEAR_CAPABILITY, 0x04),
 };
 
+static const struct qmp_phy_init_tbl sm8550_ufsphy_g5_pcs[] = {
+       QMP_PHY_INIT_CFG(QPHY_V6_PCS_UFS_PLL_CNTL, 0x33),
+       QMP_PHY_INIT_CFG(QPHY_V6_PCS_UFS_RX_HS_G5_SYNC_LENGTH_CAPABILITY, 0x4f),
+       QMP_PHY_INIT_CFG(QPHY_V6_PCS_UFS_RX_HSG5_SYNC_WAIT_TIME, 0x9e),
+};
+
 static const struct qmp_phy_init_tbl sm8650_ufsphy_serdes[] = {
        QMP_PHY_INIT_CFG(QSERDES_V6_COM_SYSCLK_EN_SEL, 0xd9),
        QMP_PHY_INIT_CFG(QSERDES_V6_COM_CMN_CONFIG_1, 0x16),
@@ -889,6 +904,8 @@ struct qmp_phy_cfg_tbls {
        int rx_num;
        const struct qmp_phy_init_tbl *pcs;
        int pcs_num;
+       /* Maximum supported Gear of this tbls */
+       u32 max_gear;
 };
 
 /* struct qmp_phy_cfg - per-PHY initialization config */
@@ -896,17 +913,16 @@ struct qmp_phy_cfg {
        int lanes;
 
        const struct qmp_ufs_offsets *offsets;
+       /* Maximum supported Gear of this config */
+       u32 max_supported_gear;
 
        /* Main init sequence for PHY blocks - serdes, tx, rx, pcs */
        const struct qmp_phy_cfg_tbls tbls;
        /* Additional sequence for HS Series B */
        const struct qmp_phy_cfg_tbls tbls_hs_b;
-       /* Additional sequence for HS G4 */
-       const struct qmp_phy_cfg_tbls tbls_hs_g4;
+       /* Additional sequence for different HS Gears */
+       const struct qmp_phy_cfg_tbls tbls_hs_overlay[NUM_OVERLAY];
 
-       /* clock ids to be requested */
-       const char * const *clk_list;
-       int num_clks;
        /* regulators to be requested */
        const char * const *vreg_list;
        int num_vregs;
@@ -932,6 +948,7 @@ struct qmp_ufs {
        void __iomem *rx2;
 
        struct clk_bulk_data *clks;
+       int num_clks;
        struct regulator_bulk_data *vregs;
        struct reset_control *ufs_reset;
 
@@ -964,20 +981,6 @@ static inline void qphy_clrbits(void __iomem *base, u32 offset, u32 val)
        readl(base + offset);
 }
 
-/* list of clocks required by phy */
-static const char * const msm8996_ufs_phy_clk_l[] = {
-       "ref",
-};
-
-/* the primary usb3 phy on sm8250 doesn't have a ref clock */
-static const char * const sm8450_ufs_phy_clk_l[] = {
-       "qref", "ref", "ref_aux",
-};
-
-static const char * const sdm845_ufs_phy_clk_l[] = {
-       "ref", "ref_aux",
-};
-
 /* list of regulators */
 static const char * const qmp_phy_vreg_l[] = {
        "vdda-phy", "vdda-pll",
@@ -1005,6 +1008,7 @@ static const struct qmp_phy_cfg msm8996_ufsphy_cfg = {
        .lanes                  = 1,
 
        .offsets                = &qmp_ufs_offsets,
+       .max_supported_gear     = UFS_HS_G3,
 
        .tbls = {
                .serdes         = msm8996_ufsphy_serdes,
@@ -1015,9 +1019,6 @@ static const struct qmp_phy_cfg msm8996_ufsphy_cfg = {
                .rx_num         = ARRAY_SIZE(msm8996_ufsphy_rx),
        },
 
-       .clk_list               = msm8996_ufs_phy_clk_l,
-       .num_clks               = ARRAY_SIZE(msm8996_ufs_phy_clk_l),
-
        .vreg_list              = qmp_phy_vreg_l,
        .num_vregs              = ARRAY_SIZE(qmp_phy_vreg_l),
 
@@ -1030,6 +1031,7 @@ static const struct qmp_phy_cfg sa8775p_ufsphy_cfg = {
        .lanes                  = 2,
 
        .offsets                = &qmp_ufs_offsets,
+       .max_supported_gear     = UFS_HS_G4,
 
        .tbls = {
                .serdes         = sm8350_ufsphy_serdes,
@@ -1045,16 +1047,15 @@ static const struct qmp_phy_cfg sa8775p_ufsphy_cfg = {
                .serdes         = sm8350_ufsphy_hs_b_serdes,
                .serdes_num     = ARRAY_SIZE(sm8350_ufsphy_hs_b_serdes),
        },
-       .tbls_hs_g4 = {
+       .tbls_hs_overlay[0] = {
                .tx             = sm8350_ufsphy_g4_tx,
                .tx_num         = ARRAY_SIZE(sm8350_ufsphy_g4_tx),
                .rx             = sm8350_ufsphy_g4_rx,
                .rx_num         = ARRAY_SIZE(sm8350_ufsphy_g4_rx),
                .pcs            = sm8350_ufsphy_g4_pcs,
                .pcs_num        = ARRAY_SIZE(sm8350_ufsphy_g4_pcs),
+               .max_gear       = UFS_HS_G4,
        },
-       .clk_list               = sm8450_ufs_phy_clk_l,
-       .num_clks               = ARRAY_SIZE(sm8450_ufs_phy_clk_l),
        .vreg_list              = qmp_phy_vreg_l,
        .num_vregs              = ARRAY_SIZE(qmp_phy_vreg_l),
        .regs                   = ufsphy_v5_regs_layout,
@@ -1064,6 +1065,7 @@ static const struct qmp_phy_cfg sc7280_ufsphy_cfg = {
        .lanes                  = 2,
 
        .offsets                = &qmp_ufs_offsets,
+       .max_supported_gear     = UFS_HS_G4,
 
        .tbls = {
                .serdes         = sm8150_ufsphy_serdes,
@@ -1079,16 +1081,15 @@ static const struct qmp_phy_cfg sc7280_ufsphy_cfg = {
                .serdes         = sm8150_ufsphy_hs_b_serdes,
                .serdes_num     = ARRAY_SIZE(sm8150_ufsphy_hs_b_serdes),
        },
-       .tbls_hs_g4 = {
+       .tbls_hs_overlay[0] = {
                .tx             = sm8250_ufsphy_hs_g4_tx,
                .tx_num         = ARRAY_SIZE(sm8250_ufsphy_hs_g4_tx),
                .rx             = sc7280_ufsphy_hs_g4_rx,
                .rx_num         = ARRAY_SIZE(sc7280_ufsphy_hs_g4_rx),
                .pcs            = sm8150_ufsphy_hs_g4_pcs,
                .pcs_num        = ARRAY_SIZE(sm8150_ufsphy_hs_g4_pcs),
+               .max_gear       = UFS_HS_G4,
        },
-       .clk_list               = sm8450_ufs_phy_clk_l,
-       .num_clks               = ARRAY_SIZE(sm8450_ufs_phy_clk_l),
        .vreg_list              = qmp_phy_vreg_l,
        .num_vregs              = ARRAY_SIZE(qmp_phy_vreg_l),
        .regs                   = ufsphy_v4_regs_layout,
@@ -1098,6 +1099,7 @@ static const struct qmp_phy_cfg sc8280xp_ufsphy_cfg = {
        .lanes                  = 2,
 
        .offsets                = &qmp_ufs_offsets,
+       .max_supported_gear     = UFS_HS_G4,
 
        .tbls = {
                .serdes         = sm8350_ufsphy_serdes,
@@ -1113,16 +1115,15 @@ static const struct qmp_phy_cfg sc8280xp_ufsphy_cfg = {
                .serdes         = sm8350_ufsphy_hs_b_serdes,
                .serdes_num     = ARRAY_SIZE(sm8350_ufsphy_hs_b_serdes),
        },
-       .tbls_hs_g4 = {
+       .tbls_hs_overlay[0] = {
                .tx             = sm8350_ufsphy_g4_tx,
                .tx_num         = ARRAY_SIZE(sm8350_ufsphy_g4_tx),
                .rx             = sm8350_ufsphy_g4_rx,
                .rx_num         = ARRAY_SIZE(sm8350_ufsphy_g4_rx),
                .pcs            = sm8350_ufsphy_g4_pcs,
                .pcs_num        = ARRAY_SIZE(sm8350_ufsphy_g4_pcs),
+               .max_gear       = UFS_HS_G4,
        },
-       .clk_list               = sdm845_ufs_phy_clk_l,
-       .num_clks               = ARRAY_SIZE(sdm845_ufs_phy_clk_l),
        .vreg_list              = qmp_phy_vreg_l,
        .num_vregs              = ARRAY_SIZE(qmp_phy_vreg_l),
        .regs                   = ufsphy_v5_regs_layout,
@@ -1132,6 +1133,7 @@ static const struct qmp_phy_cfg sdm845_ufsphy_cfg = {
        .lanes                  = 2,
 
        .offsets                = &qmp_ufs_offsets,
+       .max_supported_gear     = UFS_HS_G3,
 
        .tbls = {
                .serdes         = sdm845_ufsphy_serdes,
@@ -1147,8 +1149,6 @@ static const struct qmp_phy_cfg sdm845_ufsphy_cfg = {
                .serdes         = sdm845_ufsphy_hs_b_serdes,
                .serdes_num     = ARRAY_SIZE(sdm845_ufsphy_hs_b_serdes),
        },
-       .clk_list               = sdm845_ufs_phy_clk_l,
-       .num_clks               = ARRAY_SIZE(sdm845_ufs_phy_clk_l),
        .vreg_list              = qmp_phy_vreg_l,
        .num_vregs              = ARRAY_SIZE(qmp_phy_vreg_l),
        .regs                   = ufsphy_v3_regs_layout,
@@ -1160,6 +1160,7 @@ static const struct qmp_phy_cfg sm6115_ufsphy_cfg = {
        .lanes                  = 1,
 
        .offsets                = &qmp_ufs_offsets,
+       .max_supported_gear     = UFS_HS_G3,
 
        .tbls = {
                .serdes         = sm6115_ufsphy_serdes,
@@ -1175,8 +1176,6 @@ static const struct qmp_phy_cfg sm6115_ufsphy_cfg = {
                .serdes         = sm6115_ufsphy_hs_b_serdes,
                .serdes_num     = ARRAY_SIZE(sm6115_ufsphy_hs_b_serdes),
        },
-       .clk_list               = sdm845_ufs_phy_clk_l,
-       .num_clks               = ARRAY_SIZE(sdm845_ufs_phy_clk_l),
        .vreg_list              = qmp_phy_vreg_l,
        .num_vregs              = ARRAY_SIZE(qmp_phy_vreg_l),
        .regs                   = ufsphy_v2_regs_layout,
@@ -1188,6 +1187,7 @@ static const struct qmp_phy_cfg sm7150_ufsphy_cfg = {
        .lanes                  = 1,
 
        .offsets                = &qmp_ufs_offsets,
+       .max_supported_gear     = UFS_HS_G3,
 
        .tbls = {
                .serdes         = sdm845_ufsphy_serdes,
@@ -1203,8 +1203,6 @@ static const struct qmp_phy_cfg sm7150_ufsphy_cfg = {
                .serdes         = sdm845_ufsphy_hs_b_serdes,
                .serdes_num     = ARRAY_SIZE(sdm845_ufsphy_hs_b_serdes),
        },
-       .clk_list               = sdm845_ufs_phy_clk_l,
-       .num_clks               = ARRAY_SIZE(sdm845_ufs_phy_clk_l),
        .vreg_list              = qmp_phy_vreg_l,
        .num_vregs              = ARRAY_SIZE(qmp_phy_vreg_l),
        .regs                   = ufsphy_v3_regs_layout,
@@ -1216,6 +1214,7 @@ static const struct qmp_phy_cfg sm8150_ufsphy_cfg = {
        .lanes                  = 2,
 
        .offsets                = &qmp_ufs_offsets,
+       .max_supported_gear     = UFS_HS_G4,
 
        .tbls = {
                .serdes         = sm8150_ufsphy_serdes,
@@ -1231,16 +1230,15 @@ static const struct qmp_phy_cfg sm8150_ufsphy_cfg = {
                .serdes         = sm8150_ufsphy_hs_b_serdes,
                .serdes_num     = ARRAY_SIZE(sm8150_ufsphy_hs_b_serdes),
        },
-       .tbls_hs_g4 = {
+       .tbls_hs_overlay[0] = {
                .tx             = sm8150_ufsphy_hs_g4_tx,
                .tx_num         = ARRAY_SIZE(sm8150_ufsphy_hs_g4_tx),
                .rx             = sm8150_ufsphy_hs_g4_rx,
                .rx_num         = ARRAY_SIZE(sm8150_ufsphy_hs_g4_rx),
                .pcs            = sm8150_ufsphy_hs_g4_pcs,
                .pcs_num        = ARRAY_SIZE(sm8150_ufsphy_hs_g4_pcs),
+               .max_gear       = UFS_HS_G4,
        },
-       .clk_list               = sdm845_ufs_phy_clk_l,
-       .num_clks               = ARRAY_SIZE(sdm845_ufs_phy_clk_l),
        .vreg_list              = qmp_phy_vreg_l,
        .num_vregs              = ARRAY_SIZE(qmp_phy_vreg_l),
        .regs                   = ufsphy_v4_regs_layout,
@@ -1250,6 +1248,7 @@ static const struct qmp_phy_cfg sm8250_ufsphy_cfg = {
        .lanes                  = 2,
 
        .offsets                = &qmp_ufs_offsets,
+       .max_supported_gear     = UFS_HS_G4,
 
        .tbls = {
                .serdes         = sm8150_ufsphy_serdes,
@@ -1265,16 +1264,15 @@ static const struct qmp_phy_cfg sm8250_ufsphy_cfg = {
                .serdes         = sm8150_ufsphy_hs_b_serdes,
                .serdes_num     = ARRAY_SIZE(sm8150_ufsphy_hs_b_serdes),
        },
-       .tbls_hs_g4 = {
+       .tbls_hs_overlay[0] = {
                .tx             = sm8250_ufsphy_hs_g4_tx,
                .tx_num         = ARRAY_SIZE(sm8250_ufsphy_hs_g4_tx),
                .rx             = sm8250_ufsphy_hs_g4_rx,
                .rx_num         = ARRAY_SIZE(sm8250_ufsphy_hs_g4_rx),
                .pcs            = sm8150_ufsphy_hs_g4_pcs,
                .pcs_num        = ARRAY_SIZE(sm8150_ufsphy_hs_g4_pcs),
+               .max_gear       = UFS_HS_G4,
        },
-       .clk_list               = sdm845_ufs_phy_clk_l,
-       .num_clks               = ARRAY_SIZE(sdm845_ufs_phy_clk_l),
        .vreg_list              = qmp_phy_vreg_l,
        .num_vregs              = ARRAY_SIZE(qmp_phy_vreg_l),
        .regs                   = ufsphy_v4_regs_layout,
@@ -1284,6 +1282,7 @@ static const struct qmp_phy_cfg sm8350_ufsphy_cfg = {
        .lanes                  = 2,
 
        .offsets                = &qmp_ufs_offsets,
+       .max_supported_gear     = UFS_HS_G4,
 
        .tbls = {
                .serdes         = sm8350_ufsphy_serdes,
@@ -1299,16 +1298,15 @@ static const struct qmp_phy_cfg sm8350_ufsphy_cfg = {
                .serdes         = sm8350_ufsphy_hs_b_serdes,
                .serdes_num     = ARRAY_SIZE(sm8350_ufsphy_hs_b_serdes),
        },
-       .tbls_hs_g4 = {
+       .tbls_hs_overlay[0] = {
                .tx             = sm8350_ufsphy_g4_tx,
                .tx_num         = ARRAY_SIZE(sm8350_ufsphy_g4_tx),
                .rx             = sm8350_ufsphy_g4_rx,
                .rx_num         = ARRAY_SIZE(sm8350_ufsphy_g4_rx),
                .pcs            = sm8350_ufsphy_g4_pcs,
                .pcs_num        = ARRAY_SIZE(sm8350_ufsphy_g4_pcs),
+               .max_gear       = UFS_HS_G4,
        },
-       .clk_list               = sdm845_ufs_phy_clk_l,
-       .num_clks               = ARRAY_SIZE(sdm845_ufs_phy_clk_l),
        .vreg_list              = qmp_phy_vreg_l,
        .num_vregs              = ARRAY_SIZE(qmp_phy_vreg_l),
        .regs                   = ufsphy_v5_regs_layout,
@@ -1318,6 +1316,7 @@ static const struct qmp_phy_cfg sm8450_ufsphy_cfg = {
        .lanes                  = 2,
 
        .offsets                = &qmp_ufs_offsets,
+       .max_supported_gear     = UFS_HS_G4,
 
        .tbls = {
                .serdes         = sm8350_ufsphy_serdes,
@@ -1333,16 +1332,15 @@ static const struct qmp_phy_cfg sm8450_ufsphy_cfg = {
                .serdes         = sm8350_ufsphy_hs_b_serdes,
                .serdes_num     = ARRAY_SIZE(sm8350_ufsphy_hs_b_serdes),
        },
-       .tbls_hs_g4 = {
+       .tbls_hs_overlay[0] = {
                .tx             = sm8350_ufsphy_g4_tx,
                .tx_num         = ARRAY_SIZE(sm8350_ufsphy_g4_tx),
                .rx             = sm8350_ufsphy_g4_rx,
                .rx_num         = ARRAY_SIZE(sm8350_ufsphy_g4_rx),
                .pcs            = sm8350_ufsphy_g4_pcs,
                .pcs_num        = ARRAY_SIZE(sm8350_ufsphy_g4_pcs),
+               .max_gear       = UFS_HS_G4,
        },
-       .clk_list               = sm8450_ufs_phy_clk_l,
-       .num_clks               = ARRAY_SIZE(sm8450_ufs_phy_clk_l),
        .vreg_list              = qmp_phy_vreg_l,
        .num_vregs              = ARRAY_SIZE(qmp_phy_vreg_l),
        .regs                   = ufsphy_v5_regs_layout,
@@ -1352,6 +1350,7 @@ static const struct qmp_phy_cfg sm8550_ufsphy_cfg = {
        .lanes                  = 2,
 
        .offsets                = &qmp_ufs_offsets_v6,
+       .max_supported_gear     = UFS_HS_G5,
 
        .tbls = {
                .serdes         = sm8550_ufsphy_serdes,
@@ -1367,8 +1366,26 @@ static const struct qmp_phy_cfg sm8550_ufsphy_cfg = {
                .serdes         = sm8550_ufsphy_hs_b_serdes,
                .serdes_num     = ARRAY_SIZE(sm8550_ufsphy_hs_b_serdes),
        },
-       .clk_list               = sdm845_ufs_phy_clk_l,
-       .num_clks               = ARRAY_SIZE(sdm845_ufs_phy_clk_l),
+       .tbls_hs_overlay[0] = {
+               .serdes         = sm8550_ufsphy_g4_serdes,
+               .serdes_num     = ARRAY_SIZE(sm8550_ufsphy_g4_serdes),
+               .tx             = sm8550_ufsphy_g4_tx,
+               .tx_num         = ARRAY_SIZE(sm8550_ufsphy_g4_tx),
+               .rx             = sm8550_ufsphy_g4_rx,
+               .rx_num         = ARRAY_SIZE(sm8550_ufsphy_g4_rx),
+               .pcs            = sm8550_ufsphy_g4_pcs,
+               .pcs_num        = ARRAY_SIZE(sm8550_ufsphy_g4_pcs),
+               .max_gear       = UFS_HS_G4,
+       },
+       .tbls_hs_overlay[1] = {
+               .serdes         = sm8550_ufsphy_g5_serdes,
+               .serdes_num     = ARRAY_SIZE(sm8550_ufsphy_g5_serdes),
+               .rx             = sm8550_ufsphy_g5_rx,
+               .rx_num         = ARRAY_SIZE(sm8550_ufsphy_g5_rx),
+               .pcs            = sm8550_ufsphy_g5_pcs,
+               .pcs_num        = ARRAY_SIZE(sm8550_ufsphy_g5_pcs),
+               .max_gear       = UFS_HS_G5,
+       },
        .vreg_list              = qmp_phy_vreg_l,
        .num_vregs              = ARRAY_SIZE(qmp_phy_vreg_l),
        .regs                   = ufsphy_v6_regs_layout,
@@ -1378,6 +1395,7 @@ static const struct qmp_phy_cfg sm8650_ufsphy_cfg = {
        .lanes                  = 2,
 
        .offsets                = &qmp_ufs_offsets_v6,
+       .max_supported_gear     = UFS_HS_G5,
 
        .tbls = {
                .serdes         = sm8650_ufsphy_serdes,
@@ -1389,44 +1407,16 @@ static const struct qmp_phy_cfg sm8650_ufsphy_cfg = {
                .pcs            = sm8650_ufsphy_pcs,
                .pcs_num        = ARRAY_SIZE(sm8650_ufsphy_pcs),
        },
-       .clk_list               = sdm845_ufs_phy_clk_l,
-       .num_clks               = ARRAY_SIZE(sdm845_ufs_phy_clk_l),
        .vreg_list              = qmp_phy_vreg_l,
        .num_vregs              = ARRAY_SIZE(qmp_phy_vreg_l),
        .regs                   = ufsphy_v6_regs_layout,
 };
 
-static void qmp_ufs_configure_lane(void __iomem *base,
-                                       const struct qmp_phy_init_tbl tbl[],
-                                       int num,
-                                       u8 lane_mask)
-{
-       int i;
-       const struct qmp_phy_init_tbl *t = tbl;
-
-       if (!t)
-               return;
-
-       for (i = 0; i < num; i++, t++) {
-               if (!(t->lane_mask & lane_mask))
-                       continue;
-
-               writel(t->val, base + t->offset);
-       }
-}
-
-static void qmp_ufs_configure(void __iomem *base,
-                                  const struct qmp_phy_init_tbl tbl[],
-                                  int num)
-{
-       qmp_ufs_configure_lane(base, tbl, num, 0xff);
-}
-
 static void qmp_ufs_serdes_init(struct qmp_ufs *qmp, const struct qmp_phy_cfg_tbls *tbls)
 {
        void __iomem *serdes = qmp->serdes;
 
-       qmp_ufs_configure(serdes, tbls->serdes, tbls->serdes_num);
+       qmp_configure(serdes, tbls->serdes, tbls->serdes_num);
 }
 
 static void qmp_ufs_lanes_init(struct qmp_ufs *qmp, const struct qmp_phy_cfg_tbls *tbls)
@@ -1435,12 +1425,12 @@ static void qmp_ufs_lanes_init(struct qmp_ufs *qmp, const struct qmp_phy_cfg_tbl
        void __iomem *tx = qmp->tx;
        void __iomem *rx = qmp->rx;
 
-       qmp_ufs_configure_lane(tx, tbls->tx, tbls->tx_num, 1);
-       qmp_ufs_configure_lane(rx, tbls->rx, tbls->rx_num, 1);
+       qmp_configure_lane(tx, tbls->tx, tbls->tx_num, 1);
+       qmp_configure_lane(rx, tbls->rx, tbls->rx_num, 1);
 
        if (cfg->lanes >= 2) {
-               qmp_ufs_configure_lane(qmp->tx2, tbls->tx, tbls->tx_num, 2);
-               qmp_ufs_configure_lane(qmp->rx2, tbls->rx, tbls->rx_num, 2);
+               qmp_configure_lane(qmp->tx2, tbls->tx, tbls->tx_num, 2);
+               qmp_configure_lane(qmp->rx2, tbls->rx, tbls->rx_num, 2);
        }
 }
 
@@ -1448,20 +1438,52 @@ static void qmp_ufs_pcs_init(struct qmp_ufs *qmp, const struct qmp_phy_cfg_tbls
 {
        void __iomem *pcs = qmp->pcs;
 
-       qmp_ufs_configure(pcs, tbls->pcs, tbls->pcs_num);
+       qmp_configure(pcs, tbls->pcs, tbls->pcs_num);
+}
+
+static int qmp_ufs_get_gear_overlay(struct qmp_ufs *qmp, const struct qmp_phy_cfg *cfg)
+{
+       u32 max_gear, floor_max_gear = cfg->max_supported_gear;
+       int idx, ret = -EINVAL;
+
+       for (idx = NUM_OVERLAY - 1; idx >= 0; idx--) {
+               max_gear = cfg->tbls_hs_overlay[idx].max_gear;
+
+               /* Skip if the table is not available */
+               if (max_gear == 0)
+                       continue;
+
+               /* Direct matching, bail */
+               if (qmp->submode == max_gear)
+                       return idx;
+
+               /* If no direct matching, the lowest gear is the best matching */
+               if (max_gear < floor_max_gear) {
+                       ret = idx;
+                       floor_max_gear = max_gear;
+               }
+       }
+
+       return ret;
 }
 
 static void qmp_ufs_init_registers(struct qmp_ufs *qmp, const struct qmp_phy_cfg *cfg)
 {
+       int i;
+
        qmp_ufs_serdes_init(qmp, &cfg->tbls);
-       if (qmp->mode == PHY_MODE_UFS_HS_B)
-               qmp_ufs_serdes_init(qmp, &cfg->tbls_hs_b);
        qmp_ufs_lanes_init(qmp, &cfg->tbls);
-       if (qmp->submode == UFS_HS_G4)
-               qmp_ufs_lanes_init(qmp, &cfg->tbls_hs_g4);
        qmp_ufs_pcs_init(qmp, &cfg->tbls);
-       if (qmp->submode == UFS_HS_G4)
-               qmp_ufs_pcs_init(qmp, &cfg->tbls_hs_g4);
+
+       i = qmp_ufs_get_gear_overlay(qmp, cfg);
+       if (i >= 0) {
+               qmp_ufs_serdes_init(qmp, &cfg->tbls_hs_overlay[i]);
+               qmp_ufs_lanes_init(qmp, &cfg->tbls_hs_overlay[i]);
+               qmp_ufs_pcs_init(qmp, &cfg->tbls_hs_overlay[i]);
+       }
+
+       if (qmp->mode == PHY_MODE_UFS_HS_B)
+               qmp_ufs_serdes_init(qmp, &cfg->tbls_hs_b);
 }
 
 static int qmp_ufs_com_init(struct qmp_ufs *qmp)
@@ -1476,7 +1498,7 @@ static int qmp_ufs_com_init(struct qmp_ufs *qmp)
                return ret;
        }
 
-       ret = clk_bulk_prepare_enable(cfg->num_clks, qmp->clks);
+       ret = clk_bulk_prepare_enable(qmp->num_clks, qmp->clks);
        if (ret)
                goto err_disable_regulators;
 
@@ -1496,7 +1518,7 @@ static int qmp_ufs_com_exit(struct qmp_ufs *qmp)
 
        reset_control_assert(qmp->ufs_reset);
 
-       clk_bulk_disable_unprepare(cfg->num_clks, qmp->clks);
+       clk_bulk_disable_unprepare(qmp->num_clks, qmp->clks);
 
        regulator_bulk_disable(cfg->num_vregs, qmp->vregs);
 
@@ -1633,6 +1655,12 @@ static int qmp_ufs_disable(struct phy *phy)
 static int qmp_ufs_set_mode(struct phy *phy, enum phy_mode mode, int submode)
 {
        struct qmp_ufs *qmp = phy_get_drvdata(phy);
+       const struct qmp_phy_cfg *cfg = qmp->cfg;
+
+       if (submode > cfg->max_supported_gear || submode == 0) {
+               dev_err(qmp->dev, "Invalid PHY submode %d\n", submode);
+               return -EINVAL;
+       }
 
        qmp->mode = mode;
        qmp->submode = submode;
@@ -1666,19 +1694,13 @@ static int qmp_ufs_vreg_init(struct qmp_ufs *qmp)
 
 static int qmp_ufs_clk_init(struct qmp_ufs *qmp)
 {
-       const struct qmp_phy_cfg *cfg = qmp->cfg;
        struct device *dev = qmp->dev;
-       int num = cfg->num_clks;
-       int i;
 
-       qmp->clks = devm_kcalloc(dev, num, sizeof(*qmp->clks), GFP_KERNEL);
-       if (!qmp->clks)
-               return -ENOMEM;
-
-       for (i = 0; i < num; i++)
-               qmp->clks[i].id = cfg->clk_list[i];
+       qmp->num_clks = devm_clk_bulk_get_all(dev, &qmp->clks);
+       if (qmp->num_clks < 0)
+               return qmp->num_clks;
 
-       return devm_clk_bulk_get(dev, num, qmp->clks);
+       return 0;
 }
 
 static void qmp_ufs_clk_release_provider(void *res)
@@ -1880,6 +1902,9 @@ static const struct of_device_id qmp_ufs_of_match_table[] = {
        }, {
                .compatible = "qcom,sa8775p-qmp-ufs-phy",
                .data = &sa8775p_ufsphy_cfg,
+       }, {
+               .compatible = "qcom,sc7180-qmp-ufs-phy",
+               .data = &sm7150_ufsphy_cfg,
        }, {
                .compatible = "qcom,sc7280-qmp-ufs-phy",
                .data = &sc7280_ufsphy_cfg,
index cf466f6df94d7a62ab48c90beeaee4295bad677e..6d0ba39c19431e103f0c92d24b79615bdc7d1828 100644 (file)
 #include "phy-qcom-qmp-pcs-usb-v4.h"
 #include "phy-qcom-qmp-pcs-usb-v5.h"
 
-/* QPHY_SW_RESET bit */
-#define SW_RESET                               BIT(0)
-/* QPHY_POWER_DOWN_CONTROL */
-#define SW_PWRDN                               BIT(0)
-/* QPHY_START_CONTROL bits */
-#define SERDES_START                           BIT(0)
-#define PCS_START                              BIT(1)
-/* QPHY_PCS_STATUS bit */
-#define PHYSTATUS                              BIT(6)
+#include "phy-qcom-qmp-dp-com-v3.h"
 
 /* QPHY_V3_DP_COM_RESET_OVRD_CTRL register bits */
 /* DP PHY soft reset */
 #define USB3_MODE                              BIT(0) /* enables USB3 mode */
 #define DP_MODE                                        BIT(1) /* enables DP mode */
 
-/* QPHY_PCS_AUTONOMOUS_MODE_CTRL register bits */
-#define ARCVR_DTCT_EN                          BIT(0)
-#define ALFPS_DTCT_EN                          BIT(1)
-#define ARCVR_DTCT_EVENT_SEL                   BIT(4)
-
-/* QPHY_PCS_LFPS_RXTERM_IRQ_CLEAR register bits */
-#define IRQ_CLEAR                              BIT(0)
-
-/* QPHY_V3_PCS_MISC_CLAMP_ENABLE register bits */
-#define CLAMP_EN                               BIT(0) /* enables i/o clamp_n */
-
 #define PHY_INIT_COMPLETE_TIMEOUT              10000
 
 struct qmp_phy_init_tbl {
@@ -507,8 +488,6 @@ struct qmp_usb_legacy_offsets {
 
 /* struct qmp_phy_cfg - per-PHY initialization config */
 struct qmp_phy_cfg {
-       int lanes;
-
        const struct qmp_usb_legacy_offsets *offsets;
 
        /* Init sequence for PHY blocks - serdes, tx, rx, pcs */
@@ -621,8 +600,6 @@ static const char * const qmp_phy_vreg_l[] = {
 };
 
 static const struct qmp_phy_cfg qmp_v3_usb3phy_cfg = {
-       .lanes                  = 2,
-
        .serdes_tbl             = qmp_v3_usb3_serdes_tbl,
        .serdes_tbl_num         = ARRAY_SIZE(qmp_v3_usb3_serdes_tbl),
        .tx_tbl                 = qmp_v3_usb3_tx_tbl,
@@ -641,8 +618,6 @@ static const struct qmp_phy_cfg qmp_v3_usb3phy_cfg = {
 };
 
 static const struct qmp_phy_cfg sc7180_usb3phy_cfg = {
-       .lanes                  = 2,
-
        .serdes_tbl             = qmp_v3_usb3_serdes_tbl,
        .serdes_tbl_num         = ARRAY_SIZE(qmp_v3_usb3_serdes_tbl),
        .tx_tbl                 = qmp_v3_usb3_tx_tbl,
@@ -661,8 +636,6 @@ static const struct qmp_phy_cfg sc7180_usb3phy_cfg = {
 };
 
 static const struct qmp_phy_cfg sm8150_usb3phy_cfg = {
-       .lanes                  = 2,
-
        .serdes_tbl             = sm8150_usb3_serdes_tbl,
        .serdes_tbl_num         = ARRAY_SIZE(sm8150_usb3_serdes_tbl),
        .tx_tbl                 = sm8150_usb3_tx_tbl,
@@ -684,8 +657,6 @@ static const struct qmp_phy_cfg sm8150_usb3phy_cfg = {
 };
 
 static const struct qmp_phy_cfg sm8250_usb3phy_cfg = {
-       .lanes                  = 2,
-
        .serdes_tbl             = sm8150_usb3_serdes_tbl,
        .serdes_tbl_num         = ARRAY_SIZE(sm8150_usb3_serdes_tbl),
        .tx_tbl                 = sm8250_usb3_tx_tbl,
@@ -707,8 +678,6 @@ static const struct qmp_phy_cfg sm8250_usb3phy_cfg = {
 };
 
 static const struct qmp_phy_cfg sm8350_usb3phy_cfg = {
-       .lanes                  = 2,
-
        .serdes_tbl             = sm8150_usb3_serdes_tbl,
        .serdes_tbl_num         = ARRAY_SIZE(sm8150_usb3_serdes_tbl),
        .tx_tbl                 = sm8350_usb3_tx_tbl,
@@ -874,10 +843,8 @@ static int qmp_usb_legacy_power_on(struct phy *phy)
        qmp_usb_legacy_configure_lane(tx, cfg->tx_tbl, cfg->tx_tbl_num, 1);
        qmp_usb_legacy_configure_lane(rx, cfg->rx_tbl, cfg->rx_tbl_num, 1);
 
-       if (cfg->lanes >= 2) {
-               qmp_usb_legacy_configure_lane(qmp->tx2, cfg->tx_tbl, cfg->tx_tbl_num, 2);
-               qmp_usb_legacy_configure_lane(qmp->rx2, cfg->rx_tbl, cfg->rx_tbl_num, 2);
-       }
+       qmp_usb_legacy_configure_lane(qmp->tx2, cfg->tx_tbl, cfg->tx_tbl_num, 2);
+       qmp_usb_legacy_configure_lane(qmp->rx2, cfg->rx_tbl, cfg->rx_tbl_num, 2);
 
        qmp_usb_legacy_configure(pcs, cfg->pcs_tbl, cfg->pcs_tbl_num);
 
@@ -1180,27 +1147,11 @@ static int phy_pipe_clk_register(struct qmp_usb *qmp, struct device_node *np)
        return devm_add_action_or_reset(qmp->dev, phy_clk_release_provider, np);
 }
 
-static void __iomem *qmp_usb_legacy_iomap(struct device *dev, struct device_node *np,
-                                       int index, bool exclusive)
-{
-       struct resource res;
-
-       if (!exclusive) {
-               if (of_address_to_resource(np, index, &res))
-                       return IOMEM_ERR_PTR(-EINVAL);
-
-               return devm_ioremap(dev, res.start, resource_size(&res));
-       }
-
-       return devm_of_iomap(dev, np, index, NULL);
-}
-
 static int qmp_usb_legacy_parse_dt_legacy(struct qmp_usb *qmp, struct device_node *np)
 {
        struct platform_device *pdev = to_platform_device(qmp->dev);
        const struct qmp_phy_cfg *cfg = qmp->cfg;
        struct device *dev = qmp->dev;
-       bool exclusive = true;
 
        qmp->serdes = devm_platform_ioremap_resource(pdev, 0);
        if (IS_ERR(qmp->serdes))
@@ -1224,27 +1175,22 @@ static int qmp_usb_legacy_parse_dt_legacy(struct qmp_usb *qmp, struct device_nod
        if (IS_ERR(qmp->rx))
                return PTR_ERR(qmp->rx);
 
-       qmp->pcs = qmp_usb_legacy_iomap(dev, np, 2, exclusive);
+       qmp->pcs = devm_of_iomap(dev, np, 2, NULL);
        if (IS_ERR(qmp->pcs))
                return PTR_ERR(qmp->pcs);
 
        if (cfg->pcs_usb_offset)
                qmp->pcs_usb = qmp->pcs + cfg->pcs_usb_offset;
 
-       if (cfg->lanes >= 2) {
-               qmp->tx2 = devm_of_iomap(dev, np, 3, NULL);
-               if (IS_ERR(qmp->tx2))
-                       return PTR_ERR(qmp->tx2);
-
-               qmp->rx2 = devm_of_iomap(dev, np, 4, NULL);
-               if (IS_ERR(qmp->rx2))
-                       return PTR_ERR(qmp->rx2);
+       qmp->tx2 = devm_of_iomap(dev, np, 3, NULL);
+       if (IS_ERR(qmp->tx2))
+               return PTR_ERR(qmp->tx2);
 
-               qmp->pcs_misc = devm_of_iomap(dev, np, 5, NULL);
-       } else {
-               qmp->pcs_misc = devm_of_iomap(dev, np, 3, NULL);
-       }
+       qmp->rx2 = devm_of_iomap(dev, np, 4, NULL);
+       if (IS_ERR(qmp->rx2))
+               return PTR_ERR(qmp->rx2);
 
+       qmp->pcs_misc = devm_of_iomap(dev, np, 5, NULL);
        if (IS_ERR(qmp->pcs_misc)) {
                dev_vdbg(dev, "PHY pcs_misc-reg not used\n");
                qmp->pcs_misc = NULL;
index 5c003988c35d38cead7cc6b3e1e2af04a07bdb28..85253936fac352a5189d7b5fa89535ba264b5686 100644 (file)
@@ -19,6 +19,8 @@
 #include <linux/reset.h>
 #include <linux/slab.h>
 
+#include "phy-qcom-qmp-common.h"
+
 #include "phy-qcom-qmp.h"
 #include "phy-qcom-qmp-pcs-misc-v3.h"
 #include "phy-qcom-qmp-pcs-misc-v4.h"
 #include "phy-qcom-qmp-pcs-usb-v6.h"
 #include "phy-qcom-qmp-pcs-usb-v7.h"
 
-/* QPHY_SW_RESET bit */
-#define SW_RESET                               BIT(0)
-/* QPHY_POWER_DOWN_CONTROL */
-#define SW_PWRDN                               BIT(0)
-/* QPHY_START_CONTROL bits */
-#define SERDES_START                           BIT(0)
-#define PCS_START                              BIT(1)
-/* QPHY_PCS_STATUS bit */
-#define PHYSTATUS                              BIT(6)
-
-/* QPHY_V3_DP_COM_RESET_OVRD_CTRL register bits */
-/* DP PHY soft reset */
-#define SW_DPPHY_RESET                         BIT(0)
-/* mux to select DP PHY reset control, 0:HW control, 1: software reset */
-#define SW_DPPHY_RESET_MUX                     BIT(1)
-/* USB3 PHY soft reset */
-#define SW_USB3PHY_RESET                       BIT(2)
-/* mux to select USB3 PHY reset control, 0:HW control, 1: software reset */
-#define SW_USB3PHY_RESET_MUX                   BIT(3)
-
-/* QPHY_V3_DP_COM_PHY_MODE_CTRL register bits */
-#define USB3_MODE                              BIT(0) /* enables USB3 mode */
-#define DP_MODE                                        BIT(1) /* enables DP mode */
-
-/* QPHY_PCS_AUTONOMOUS_MODE_CTRL register bits */
-#define ARCVR_DTCT_EN                          BIT(0)
-#define ALFPS_DTCT_EN                          BIT(1)
-#define ARCVR_DTCT_EVENT_SEL                   BIT(4)
-
-/* QPHY_PCS_LFPS_RXTERM_IRQ_CLEAR register bits */
-#define IRQ_CLEAR                              BIT(0)
-
-/* QPHY_V3_PCS_MISC_CLAMP_ENABLE register bits */
-#define CLAMP_EN                               BIT(0) /* enables i/o clamp_n */
-
 #define PHY_INIT_COMPLETE_TIMEOUT              10000
 
-struct qmp_phy_init_tbl {
-       unsigned int offset;
-       unsigned int val;
-       /*
-        * mask of lanes for which this register is written
-        * for cases when second lane needs different values
-        */
-       u8 lane_mask;
-};
-
-#define QMP_PHY_INIT_CFG(o, v)         \
-       {                               \
-               .offset = o,            \
-               .val = v,               \
-               .lane_mask = 0xff,      \
-       }
-
-#define QMP_PHY_INIT_CFG_LANE(o, v, l) \
-       {                               \
-               .offset = o,            \
-               .val = v,               \
-               .lane_mask = l,         \
-       }
-
 /* set of registers with offsets different per-PHY */
 enum qphy_reg_layout {
        /* PCS registers */
@@ -121,15 +64,6 @@ static const unsigned int qmp_v3_usb3phy_regs_layout[QPHY_LAYOUT_SIZE] = {
        [QPHY_PCS_MISC_CLAMP_ENABLE]    = QPHY_V3_PCS_MISC_CLAMP_ENABLE,
 };
 
-static const unsigned int qmp_v3_usb3phy_regs_layout_qcm2290[QPHY_LAYOUT_SIZE] = {
-       [QPHY_SW_RESET]                 = QPHY_V3_PCS_SW_RESET,
-       [QPHY_START_CTRL]               = QPHY_V3_PCS_START_CONTROL,
-       [QPHY_PCS_STATUS]               = QPHY_V3_PCS_PCS_STATUS,
-       [QPHY_PCS_AUTONOMOUS_MODE_CTRL] = QPHY_V3_PCS_AUTONOMOUS_MODE_CTRL,
-       [QPHY_PCS_LFPS_RXTERM_IRQ_CLEAR] = QPHY_V3_PCS_LFPS_RXTERM_IRQ_CLEAR,
-       [QPHY_PCS_POWER_DOWN_CONTROL]   = QPHY_V3_PCS_POWER_DOWN_CONTROL,
-};
-
 static const unsigned int qmp_v4_usb3phy_regs_layout[QPHY_LAYOUT_SIZE] = {
        [QPHY_SW_RESET]                 = QPHY_V4_PCS_SW_RESET,
        [QPHY_START_CTRL]               = QPHY_V4_PCS_START_CONTROL,
@@ -514,115 +448,6 @@ static const struct qmp_phy_init_tbl qmp_v3_usb3_uniphy_pcs_tbl[] = {
        QMP_PHY_INIT_CFG(QPHY_V3_PCS_REFGEN_REQ_CONFIG2, 0x60),
 };
 
-static const struct qmp_phy_init_tbl msm8998_usb3_serdes_tbl[] = {
-       QMP_PHY_INIT_CFG(QSERDES_V3_COM_CLK_SELECT, 0x30),
-       QMP_PHY_INIT_CFG(QSERDES_V3_COM_BIAS_EN_CLKBUFLR_EN, 0x04),
-       QMP_PHY_INIT_CFG(QSERDES_V3_COM_SYSCLK_EN_SEL, 0x14),
-       QMP_PHY_INIT_CFG(QSERDES_V3_COM_SYS_CLK_CTRL, 0x06),
-       QMP_PHY_INIT_CFG(QSERDES_V3_COM_RESETSM_CNTRL2, 0x08),
-       QMP_PHY_INIT_CFG(QSERDES_V3_COM_CMN_CONFIG, 0x06),
-       QMP_PHY_INIT_CFG(QSERDES_V3_COM_SVS_MODE_CLK_SEL, 0x01),
-       QMP_PHY_INIT_CFG(QSERDES_V3_COM_HSCLK_SEL, 0x80),
-       QMP_PHY_INIT_CFG(QSERDES_V3_COM_DEC_START_MODE0, 0x82),
-       QMP_PHY_INIT_CFG(QSERDES_V3_COM_DIV_FRAC_START1_MODE0, 0xab),
-       QMP_PHY_INIT_CFG(QSERDES_V3_COM_DIV_FRAC_START2_MODE0, 0xea),
-       QMP_PHY_INIT_CFG(QSERDES_V3_COM_DIV_FRAC_START3_MODE0, 0x02),
-       QMP_PHY_INIT_CFG(QSERDES_V3_COM_CP_CTRL_MODE0, 0x06),
-       QMP_PHY_INIT_CFG(QSERDES_V3_COM_PLL_RCTRL_MODE0, 0x16),
-       QMP_PHY_INIT_CFG(QSERDES_V3_COM_PLL_CCTRL_MODE0, 0x36),
-       QMP_PHY_INIT_CFG(QSERDES_V3_COM_INTEGLOOP_GAIN1_MODE0, 0x00),
-       QMP_PHY_INIT_CFG(QSERDES_V3_COM_INTEGLOOP_GAIN0_MODE0, 0x3f),
-       QMP_PHY_INIT_CFG(QSERDES_V3_COM_VCO_TUNE2_MODE0, 0x01),
-       QMP_PHY_INIT_CFG(QSERDES_V3_COM_VCO_TUNE1_MODE0, 0xc9),
-       QMP_PHY_INIT_CFG(QSERDES_V3_COM_CORECLK_DIV_MODE0, 0x0a),
-       QMP_PHY_INIT_CFG(QSERDES_V3_COM_LOCK_CMP3_MODE0, 0x00),
-       QMP_PHY_INIT_CFG(QSERDES_V3_COM_LOCK_CMP2_MODE0, 0x34),
-       QMP_PHY_INIT_CFG(QSERDES_V3_COM_LOCK_CMP1_MODE0, 0x15),
-       QMP_PHY_INIT_CFG(QSERDES_V3_COM_LOCK_CMP_EN, 0x04),
-       QMP_PHY_INIT_CFG(QSERDES_V3_COM_CORE_CLK_EN, 0x00),
-       QMP_PHY_INIT_CFG(QSERDES_V3_COM_LOCK_CMP_CFG, 0x00),
-       QMP_PHY_INIT_CFG(QSERDES_V3_COM_VCO_TUNE_MAP, 0x00),
-       QMP_PHY_INIT_CFG(QSERDES_V3_COM_BG_TIMER, 0x0a),
-       QMP_PHY_INIT_CFG(QSERDES_V3_COM_PLL_IVCO, 0x07),
-       QMP_PHY_INIT_CFG(QSERDES_V3_COM_INTEGLOOP_INITVAL, 0x80),
-       QMP_PHY_INIT_CFG(QSERDES_V3_COM_CMN_MODE, 0x01),
-       QMP_PHY_INIT_CFG(QSERDES_V3_COM_SSC_EN_CENTER, 0x01),
-       QMP_PHY_INIT_CFG(QSERDES_V3_COM_SSC_PER1, 0x31),
-       QMP_PHY_INIT_CFG(QSERDES_V3_COM_SSC_PER2, 0x01),
-       QMP_PHY_INIT_CFG(QSERDES_V3_COM_SSC_ADJ_PER1, 0x00),
-       QMP_PHY_INIT_CFG(QSERDES_V3_COM_SSC_ADJ_PER2, 0x00),
-       QMP_PHY_INIT_CFG(QSERDES_V3_COM_SSC_STEP_SIZE1, 0x85),
-       QMP_PHY_INIT_CFG(QSERDES_V3_COM_SSC_STEP_SIZE2, 0x07),
-};
-
-static const struct qmp_phy_init_tbl msm8998_usb3_tx_tbl[] = {
-       QMP_PHY_INIT_CFG(QSERDES_V3_TX_HIGHZ_DRVR_EN, 0x10),
-       QMP_PHY_INIT_CFG(QSERDES_V3_TX_RCV_DETECT_LVL_2, 0x12),
-       QMP_PHY_INIT_CFG(QSERDES_V3_TX_LANE_MODE_1, 0x16),
-       QMP_PHY_INIT_CFG(QSERDES_V3_TX_RES_CODE_LANE_OFFSET_TX, 0x00),
-};
-
-static const struct qmp_phy_init_tbl msm8998_usb3_rx_tbl[] = {
-       QMP_PHY_INIT_CFG(QSERDES_V3_RX_UCDR_FASTLOCK_FO_GAIN, 0x0b),
-       QMP_PHY_INIT_CFG(QSERDES_V3_RX_RX_EQU_ADAPTOR_CNTRL2, 0x0f),
-       QMP_PHY_INIT_CFG(QSERDES_V3_RX_RX_EQU_ADAPTOR_CNTRL3, 0x4e),
-       QMP_PHY_INIT_CFG(QSERDES_V3_RX_RX_EQU_ADAPTOR_CNTRL4, 0x18),
-       QMP_PHY_INIT_CFG(QSERDES_V3_RX_RX_EQ_OFFSET_ADAPTOR_CNTRL1, 0x07),
-       QMP_PHY_INIT_CFG(QSERDES_V3_RX_RX_OFFSET_ADAPTOR_CNTRL2, 0x80),
-       QMP_PHY_INIT_CFG(QSERDES_V3_RX_SIGDET_CNTRL, 0x43),
-       QMP_PHY_INIT_CFG(QSERDES_V3_RX_SIGDET_DEGLITCH_CNTRL, 0x1c),
-       QMP_PHY_INIT_CFG(QSERDES_V3_RX_UCDR_SO_SATURATION_AND_ENABLE, 0x75),
-       QMP_PHY_INIT_CFG(QSERDES_V3_RX_UCDR_FASTLOCK_COUNT_LOW, 0x00),
-       QMP_PHY_INIT_CFG(QSERDES_V3_RX_UCDR_FASTLOCK_COUNT_HIGH, 0x00),
-       QMP_PHY_INIT_CFG(QSERDES_V3_RX_UCDR_PI_CONTROLS, 0x80),
-       QMP_PHY_INIT_CFG(QSERDES_V3_RX_UCDR_FO_GAIN, 0x0a),
-       QMP_PHY_INIT_CFG(QSERDES_V3_RX_UCDR_SO_GAIN, 0x06),
-       QMP_PHY_INIT_CFG(QSERDES_V3_RX_SIGDET_ENABLES, 0x00),
-       QMP_PHY_INIT_CFG(QSERDES_V3_RX_VGA_CAL_CNTRL2, 0x03),
-       QMP_PHY_INIT_CFG(QSERDES_V3_RX_RX_MODE_00, 0x05),
-};
-
-static const struct qmp_phy_init_tbl msm8998_usb3_pcs_tbl[] = {
-       QMP_PHY_INIT_CFG(QPHY_V3_PCS_FLL_CNTRL2, 0x83),
-       QMP_PHY_INIT_CFG(QPHY_V3_PCS_FLL_CNT_VAL_L, 0x09),
-       QMP_PHY_INIT_CFG(QPHY_V3_PCS_FLL_CNT_VAL_H_TOL, 0xa2),
-       QMP_PHY_INIT_CFG(QPHY_V3_PCS_FLL_MAN_CODE, 0x40),
-       QMP_PHY_INIT_CFG(QPHY_V3_PCS_FLL_CNTRL1, 0x02),
-       QMP_PHY_INIT_CFG(QPHY_V3_PCS_LOCK_DETECT_CONFIG1, 0xd1),
-       QMP_PHY_INIT_CFG(QPHY_V3_PCS_LOCK_DETECT_CONFIG2, 0x1f),
-       QMP_PHY_INIT_CFG(QPHY_V3_PCS_LOCK_DETECT_CONFIG3, 0x47),
-       QMP_PHY_INIT_CFG(QPHY_V3_PCS_POWER_STATE_CONFIG2, 0x1b),
-       QMP_PHY_INIT_CFG(QPHY_V3_PCS_TXMGN_V0, 0x9f),
-       QMP_PHY_INIT_CFG(QPHY_V3_PCS_TXMGN_V1, 0x9f),
-       QMP_PHY_INIT_CFG(QPHY_V3_PCS_TXMGN_V2, 0xb7),
-       QMP_PHY_INIT_CFG(QPHY_V3_PCS_TXMGN_V3, 0x4e),
-       QMP_PHY_INIT_CFG(QPHY_V3_PCS_TXMGN_V4, 0x65),
-       QMP_PHY_INIT_CFG(QPHY_V3_PCS_TXMGN_LS, 0x6b),
-       QMP_PHY_INIT_CFG(QPHY_V3_PCS_TXDEEMPH_M6DB_V0, 0x15),
-       QMP_PHY_INIT_CFG(QPHY_V3_PCS_TXDEEMPH_M3P5DB_V0, 0x0d),
-       QMP_PHY_INIT_CFG(QPHY_V3_PCS_TXDEEMPH_M6DB_V1, 0x15),
-       QMP_PHY_INIT_CFG(QPHY_V3_PCS_TXDEEMPH_M3P5DB_V1, 0x0d),
-       QMP_PHY_INIT_CFG(QPHY_V3_PCS_TXDEEMPH_M6DB_V2, 0x15),
-       QMP_PHY_INIT_CFG(QPHY_V3_PCS_TXDEEMPH_M3P5DB_V2, 0x0d),
-       QMP_PHY_INIT_CFG(QPHY_V3_PCS_TXDEEMPH_M6DB_V3, 0x15),
-       QMP_PHY_INIT_CFG(QPHY_V3_PCS_TXDEEMPH_M3P5DB_V3, 0x0d),
-       QMP_PHY_INIT_CFG(QPHY_V3_PCS_TXDEEMPH_M6DB_V4, 0x15),
-       QMP_PHY_INIT_CFG(QPHY_V3_PCS_TXDEEMPH_M3P5DB_V4, 0x0d),
-       QMP_PHY_INIT_CFG(QPHY_V3_PCS_TXDEEMPH_M6DB_LS, 0x15),
-       QMP_PHY_INIT_CFG(QPHY_V3_PCS_TXDEEMPH_M3P5DB_LS, 0x0d),
-       QMP_PHY_INIT_CFG(QPHY_V3_PCS_RATE_SLEW_CNTRL, 0x02),
-       QMP_PHY_INIT_CFG(QPHY_V3_PCS_PWRUP_RESET_DLY_TIME_AUXCLK, 0x04),
-       QMP_PHY_INIT_CFG(QPHY_V3_PCS_TSYNC_RSYNC_TIME, 0x44),
-       QMP_PHY_INIT_CFG(QPHY_V3_PCS_RCVR_DTCT_DLY_P1U2_L, 0xe7),
-       QMP_PHY_INIT_CFG(QPHY_V3_PCS_RCVR_DTCT_DLY_P1U2_H, 0x03),
-       QMP_PHY_INIT_CFG(QPHY_V3_PCS_RCVR_DTCT_DLY_U3_L, 0x40),
-       QMP_PHY_INIT_CFG(QPHY_V3_PCS_RCVR_DTCT_DLY_U3_H, 0x00),
-       QMP_PHY_INIT_CFG(QPHY_V3_PCS_RX_SIGDET_LVL, 0x8a),
-       QMP_PHY_INIT_CFG(QPHY_V3_PCS_RXEQTRAINING_WAIT_TIME, 0x75),
-       QMP_PHY_INIT_CFG(QPHY_V3_PCS_LFPS_TX_ECSTART_EQTLOCK, 0x86),
-       QMP_PHY_INIT_CFG(QPHY_V3_PCS_RXEQTRAINING_RUN_TIME, 0x13),
-};
-
 static const struct qmp_phy_init_tbl sm8150_usb3_uniphy_serdes_tbl[] = {
        QMP_PHY_INIT_CFG(QSERDES_V4_COM_SYSCLK_EN_SEL, 0x1a),
        QMP_PHY_INIT_CFG(QSERDES_V4_COM_BIN_VCOCAL_HSCLK_SEL, 0x11),
@@ -1089,99 +914,6 @@ static const struct qmp_phy_init_tbl sm8350_usb3_uniphy_pcs_usb_tbl[] = {
        QMP_PHY_INIT_CFG(QPHY_V5_PCS_USB3_LFPS_DET_HIGH_COUNT_VAL, 0xf8),
 };
 
-static const struct qmp_phy_init_tbl qcm2290_usb3_serdes_tbl[] = {
-       QMP_PHY_INIT_CFG(QSERDES_COM_SYSCLK_EN_SEL, 0x14),
-       QMP_PHY_INIT_CFG(QSERDES_COM_BIAS_EN_CLKBUFLR_EN, 0x08),
-       QMP_PHY_INIT_CFG(QSERDES_COM_CLK_SELECT, 0x30),
-       QMP_PHY_INIT_CFG(QSERDES_COM_SYS_CLK_CTRL, 0x06),
-       QMP_PHY_INIT_CFG(QSERDES_COM_RESETSM_CNTRL, 0x00),
-       QMP_PHY_INIT_CFG(QSERDES_COM_RESETSM_CNTRL2, 0x08),
-       QMP_PHY_INIT_CFG(QSERDES_COM_BG_TRIM, 0x0f),
-       QMP_PHY_INIT_CFG(QSERDES_COM_SVS_MODE_CLK_SEL, 0x01),
-       QMP_PHY_INIT_CFG(QSERDES_COM_HSCLK_SEL, 0x00),
-       QMP_PHY_INIT_CFG(QSERDES_COM_DEC_START_MODE0, 0x82),
-       QMP_PHY_INIT_CFG(QSERDES_COM_DIV_FRAC_START1_MODE0, 0x55),
-       QMP_PHY_INIT_CFG(QSERDES_COM_DIV_FRAC_START2_MODE0, 0x55),
-       QMP_PHY_INIT_CFG(QSERDES_COM_DIV_FRAC_START3_MODE0, 0x03),
-       QMP_PHY_INIT_CFG(QSERDES_COM_CP_CTRL_MODE0, 0x0b),
-       QMP_PHY_INIT_CFG(QSERDES_COM_PLL_RCTRL_MODE0, 0x16),
-       QMP_PHY_INIT_CFG(QSERDES_COM_PLL_CCTRL_MODE0, 0x28),
-       QMP_PHY_INIT_CFG(QSERDES_COM_INTEGLOOP_GAIN0_MODE0, 0x80),
-       QMP_PHY_INIT_CFG(QSERDES_COM_INTEGLOOP_GAIN1_MODE0, 0x00),
-       QMP_PHY_INIT_CFG(QSERDES_COM_CORECLK_DIV, 0x0a),
-       QMP_PHY_INIT_CFG(QSERDES_COM_LOCK_CMP1_MODE0, 0x15),
-       QMP_PHY_INIT_CFG(QSERDES_COM_LOCK_CMP2_MODE0, 0x34),
-       QMP_PHY_INIT_CFG(QSERDES_COM_LOCK_CMP3_MODE0, 0x00),
-       QMP_PHY_INIT_CFG(QSERDES_COM_LOCK_CMP_EN, 0x00),
-       QMP_PHY_INIT_CFG(QSERDES_COM_CORE_CLK_EN, 0x00),
-       QMP_PHY_INIT_CFG(QSERDES_COM_LOCK_CMP_CFG, 0x00),
-       QMP_PHY_INIT_CFG(QSERDES_COM_VCO_TUNE_MAP, 0x00),
-       QMP_PHY_INIT_CFG(QSERDES_COM_BG_TIMER, 0x0a),
-       QMP_PHY_INIT_CFG(QSERDES_COM_SSC_EN_CENTER, 0x01),
-       QMP_PHY_INIT_CFG(QSERDES_COM_SSC_PER1, 0x31),
-       QMP_PHY_INIT_CFG(QSERDES_COM_SSC_PER2, 0x01),
-       QMP_PHY_INIT_CFG(QSERDES_COM_SSC_ADJ_PER1, 0x00),
-       QMP_PHY_INIT_CFG(QSERDES_COM_SSC_ADJ_PER2, 0x00),
-       QMP_PHY_INIT_CFG(QSERDES_COM_SSC_STEP_SIZE1, 0xde),
-       QMP_PHY_INIT_CFG(QSERDES_COM_SSC_STEP_SIZE2, 0x07),
-       QMP_PHY_INIT_CFG(QSERDES_COM_PLL_IVCO, 0x0f),
-       QMP_PHY_INIT_CFG(QSERDES_COM_CMN_CONFIG, 0x06),
-       QMP_PHY_INIT_CFG(QSERDES_COM_INTEGLOOP_INITVAL, 0x80),
-       QMP_PHY_INIT_CFG(QSERDES_COM_BIAS_EN_CTRL_BY_PSM, 0x01),
-};
-
-static const struct qmp_phy_init_tbl qcm2290_usb3_tx_tbl[] = {
-       QMP_PHY_INIT_CFG(QSERDES_V3_TX_HIGHZ_DRVR_EN, 0x10),
-       QMP_PHY_INIT_CFG(QSERDES_V3_TX_RCV_DETECT_LVL_2, 0x12),
-       QMP_PHY_INIT_CFG(QSERDES_V3_TX_LANE_MODE_1, 0xc6),
-       QMP_PHY_INIT_CFG(QSERDES_V3_TX_RES_CODE_LANE_OFFSET_TX, 0x00),
-       QMP_PHY_INIT_CFG(QSERDES_V3_TX_RES_CODE_LANE_OFFSET_RX, 0x00),
-};
-
-static const struct qmp_phy_init_tbl qcm2290_usb3_rx_tbl[] = {
-       QMP_PHY_INIT_CFG(QSERDES_V3_RX_UCDR_FASTLOCK_FO_GAIN, 0x0b),
-       QMP_PHY_INIT_CFG(QSERDES_V3_RX_UCDR_PI_CONTROLS, 0x80),
-       QMP_PHY_INIT_CFG(QSERDES_V3_RX_UCDR_FASTLOCK_COUNT_LOW, 0x00),
-       QMP_PHY_INIT_CFG(QSERDES_V3_RX_UCDR_FASTLOCK_COUNT_HIGH, 0x00),
-       QMP_PHY_INIT_CFG(QSERDES_V3_RX_UCDR_FO_GAIN, 0x0a),
-       QMP_PHY_INIT_CFG(QSERDES_V3_RX_UCDR_SO_GAIN, 0x06),
-       QMP_PHY_INIT_CFG(QSERDES_V3_RX_UCDR_SO_SATURATION_AND_ENABLE, 0x75),
-       QMP_PHY_INIT_CFG(QSERDES_V3_RX_RX_EQU_ADAPTOR_CNTRL2, 0x02),
-       QMP_PHY_INIT_CFG(QSERDES_V3_RX_RX_EQU_ADAPTOR_CNTRL3, 0x4e),
-       QMP_PHY_INIT_CFG(QSERDES_V3_RX_RX_EQU_ADAPTOR_CNTRL4, 0x18),
-       QMP_PHY_INIT_CFG(QSERDES_V3_RX_RX_EQ_OFFSET_ADAPTOR_CNTRL1, 0x77),
-       QMP_PHY_INIT_CFG(QSERDES_V3_RX_RX_OFFSET_ADAPTOR_CNTRL2, 0x80),
-       QMP_PHY_INIT_CFG(QSERDES_V3_RX_VGA_CAL_CNTRL2, 0x0a),
-       QMP_PHY_INIT_CFG(QSERDES_V3_RX_SIGDET_CNTRL, 0x03),
-       QMP_PHY_INIT_CFG(QSERDES_V3_RX_SIGDET_DEGLITCH_CNTRL, 0x16),
-       QMP_PHY_INIT_CFG(QSERDES_V3_RX_SIGDET_ENABLES, 0x00),
-       QMP_PHY_INIT_CFG(QSERDES_V3_RX_RX_MODE_00, 0x00),
-};
-
-static const struct qmp_phy_init_tbl qcm2290_usb3_pcs_tbl[] = {
-       QMP_PHY_INIT_CFG(QPHY_V3_PCS_TXMGN_V0, 0x9f),
-       QMP_PHY_INIT_CFG(QPHY_V3_PCS_TXDEEMPH_M6DB_V0, 0x17),
-       QMP_PHY_INIT_CFG(QPHY_V3_PCS_TXDEEMPH_M3P5DB_V0, 0x0f),
-       QMP_PHY_INIT_CFG(QPHY_V3_PCS_FLL_CNTRL2, 0x83),
-       QMP_PHY_INIT_CFG(QPHY_V3_PCS_FLL_CNTRL1, 0x02),
-       QMP_PHY_INIT_CFG(QPHY_V3_PCS_FLL_CNT_VAL_L, 0x09),
-       QMP_PHY_INIT_CFG(QPHY_V3_PCS_FLL_CNT_VAL_H_TOL, 0xa2),
-       QMP_PHY_INIT_CFG(QPHY_V3_PCS_FLL_MAN_CODE, 0x85),
-       QMP_PHY_INIT_CFG(QPHY_V3_PCS_LOCK_DETECT_CONFIG1, 0xd1),
-       QMP_PHY_INIT_CFG(QPHY_V3_PCS_LOCK_DETECT_CONFIG2, 0x1f),
-       QMP_PHY_INIT_CFG(QPHY_V3_PCS_LOCK_DETECT_CONFIG3, 0x47),
-       QMP_PHY_INIT_CFG(QPHY_V3_PCS_RXEQTRAINING_WAIT_TIME, 0x75),
-       QMP_PHY_INIT_CFG(QPHY_V3_PCS_RXEQTRAINING_RUN_TIME, 0x13),
-       QMP_PHY_INIT_CFG(QPHY_V3_PCS_LFPS_TX_ECSTART_EQTLOCK, 0x86),
-       QMP_PHY_INIT_CFG(QPHY_V3_PCS_PWRUP_RESET_DLY_TIME_AUXCLK, 0x04),
-       QMP_PHY_INIT_CFG(QPHY_V3_PCS_TSYNC_RSYNC_TIME, 0x44),
-       QMP_PHY_INIT_CFG(QPHY_V3_PCS_RCVR_DTCT_DLY_P1U2_L, 0xe7),
-       QMP_PHY_INIT_CFG(QPHY_V3_PCS_RCVR_DTCT_DLY_P1U2_H, 0x03),
-       QMP_PHY_INIT_CFG(QPHY_V3_PCS_RCVR_DTCT_DLY_U3_L, 0x40),
-       QMP_PHY_INIT_CFG(QPHY_V3_PCS_RCVR_DTCT_DLY_U3_H, 0x00),
-       QMP_PHY_INIT_CFG(QPHY_V3_PCS_RX_SIGDET_LVL, 0x88),
-};
-
 static const struct qmp_phy_init_tbl sc8280xp_usb3_uniphy_serdes_tbl[] = {
        QMP_PHY_INIT_CFG(QSERDES_V5_COM_SYSCLK_EN_SEL, 0x1a),
        QMP_PHY_INIT_CFG(QSERDES_V5_COM_BIN_VCOCAL_HSCLK_SEL, 0x11),
@@ -1448,15 +1180,10 @@ struct qmp_usb_offsets {
        u16 pcs_usb;
        u16 tx;
        u16 rx;
-       /* for PHYs with >= 2 lanes */
-       u16 tx2;
-       u16 rx2;
 };
 
 /* struct qmp_phy_cfg - per-PHY initialization config */
 struct qmp_phy_cfg {
-       int lanes;
-
        const struct qmp_usb_offsets *offsets;
 
        /* Init sequence for PHY blocks - serdes, tx, rx, pcs */
@@ -1496,8 +1223,6 @@ struct qmp_usb {
        void __iomem *pcs_usb;
        void __iomem *tx;
        void __iomem *rx;
-       void __iomem *tx2;
-       void __iomem *rx2;
 
        struct clk *pipe_clk;
        struct clk_bulk_data *clks;
@@ -1579,16 +1304,6 @@ static const struct qmp_usb_offsets qmp_usb_offsets_v3_msm8996 = {
        .rx             = 0x400,
 };
 
-static const struct qmp_usb_offsets qmp_usb_offsets_v3_qcm2290 = {
-       .serdes         = 0x0,
-       .pcs            = 0xc00,
-       .pcs_misc       = 0xa00,
-       .tx             = 0x200,
-       .rx             = 0x400,
-       .tx2            = 0x600,
-       .rx2            = 0x800,
-};
-
 static const struct qmp_usb_offsets qmp_usb_offsets_v4 = {
        .serdes         = 0,
        .pcs            = 0x0800,
@@ -1622,8 +1337,6 @@ static const struct qmp_usb_offsets qmp_usb_offsets_v7 = {
 };
 
 static const struct qmp_phy_cfg ipq6018_usb3phy_cfg = {
-       .lanes                  = 1,
-
        .offsets                = &qmp_usb_offsets_v3,
 
        .serdes_tbl             = ipq9574_usb3_serdes_tbl,
@@ -1640,8 +1353,6 @@ static const struct qmp_phy_cfg ipq6018_usb3phy_cfg = {
 };
 
 static const struct qmp_phy_cfg ipq8074_usb3phy_cfg = {
-       .lanes                  = 1,
-
        .offsets                = &qmp_usb_offsets_v3,
 
        .serdes_tbl             = ipq8074_usb3_serdes_tbl,
@@ -1658,8 +1369,6 @@ static const struct qmp_phy_cfg ipq8074_usb3phy_cfg = {
 };
 
 static const struct qmp_phy_cfg ipq9574_usb3phy_cfg = {
-       .lanes                  = 1,
-
        .offsets                = &qmp_usb_offsets_ipq9574,
 
        .serdes_tbl             = ipq9574_usb3_serdes_tbl,
@@ -1676,8 +1385,6 @@ static const struct qmp_phy_cfg ipq9574_usb3phy_cfg = {
 };
 
 static const struct qmp_phy_cfg msm8996_usb3phy_cfg = {
-       .lanes                  = 1,
-
        .offsets                = &qmp_usb_offsets_v3_msm8996,
 
        .serdes_tbl             = msm8996_usb3_serdes_tbl,
@@ -1694,8 +1401,6 @@ static const struct qmp_phy_cfg msm8996_usb3phy_cfg = {
 };
 
 static const struct qmp_phy_cfg sa8775p_usb3_uniphy_cfg = {
-       .lanes                  = 1,
-
        .offsets                = &qmp_usb_offsets_v5,
 
        .serdes_tbl             = sc8280xp_usb3_uniphy_serdes_tbl,
@@ -1714,8 +1419,6 @@ static const struct qmp_phy_cfg sa8775p_usb3_uniphy_cfg = {
 };
 
 static const struct qmp_phy_cfg sc8280xp_usb3_uniphy_cfg = {
-       .lanes                  = 1,
-
        .offsets                = &qmp_usb_offsets_v5,
 
        .serdes_tbl             = sc8280xp_usb3_uniphy_serdes_tbl,
@@ -1734,8 +1437,6 @@ static const struct qmp_phy_cfg sc8280xp_usb3_uniphy_cfg = {
 };
 
 static const struct qmp_phy_cfg qmp_v3_usb3_uniphy_cfg = {
-       .lanes                  = 1,
-
        .offsets                = &qmp_usb_offsets_v3,
 
        .serdes_tbl             = qmp_v3_usb3_uniphy_serdes_tbl,
@@ -1753,27 +1454,7 @@ static const struct qmp_phy_cfg qmp_v3_usb3_uniphy_cfg = {
        .has_pwrdn_delay        = true,
 };
 
-static const struct qmp_phy_cfg msm8998_usb3phy_cfg = {
-       .lanes                  = 2,
-
-       .offsets                = &qmp_usb_offsets_v3_qcm2290,
-
-       .serdes_tbl             = msm8998_usb3_serdes_tbl,
-       .serdes_tbl_num         = ARRAY_SIZE(msm8998_usb3_serdes_tbl),
-       .tx_tbl                 = msm8998_usb3_tx_tbl,
-       .tx_tbl_num             = ARRAY_SIZE(msm8998_usb3_tx_tbl),
-       .rx_tbl                 = msm8998_usb3_rx_tbl,
-       .rx_tbl_num             = ARRAY_SIZE(msm8998_usb3_rx_tbl),
-       .pcs_tbl                = msm8998_usb3_pcs_tbl,
-       .pcs_tbl_num            = ARRAY_SIZE(msm8998_usb3_pcs_tbl),
-       .vreg_list              = qmp_phy_vreg_l,
-       .num_vregs              = ARRAY_SIZE(qmp_phy_vreg_l),
-       .regs                   = qmp_v3_usb3phy_regs_layout,
-};
-
 static const struct qmp_phy_cfg sm8150_usb3_uniphy_cfg = {
-       .lanes                  = 1,
-
        .offsets                = &qmp_usb_offsets_v4,
 
        .serdes_tbl             = sm8150_usb3_uniphy_serdes_tbl,
@@ -1795,8 +1476,6 @@ static const struct qmp_phy_cfg sm8150_usb3_uniphy_cfg = {
 };
 
 static const struct qmp_phy_cfg sm8250_usb3_uniphy_cfg = {
-       .lanes                  = 1,
-
        .offsets                = &qmp_usb_offsets_v4,
 
        .serdes_tbl             = sm8150_usb3_uniphy_serdes_tbl,
@@ -1818,8 +1497,6 @@ static const struct qmp_phy_cfg sm8250_usb3_uniphy_cfg = {
 };
 
 static const struct qmp_phy_cfg sdx55_usb3_uniphy_cfg = {
-       .lanes                  = 1,
-
        .offsets                = &qmp_usb_offsets_v4,
 
        .serdes_tbl             = sm8150_usb3_uniphy_serdes_tbl,
@@ -1841,8 +1518,6 @@ static const struct qmp_phy_cfg sdx55_usb3_uniphy_cfg = {
 };
 
 static const struct qmp_phy_cfg sdx65_usb3_uniphy_cfg = {
-       .lanes                  = 1,
-
        .offsets                = &qmp_usb_offsets_v5,
 
        .serdes_tbl             = sm8150_usb3_uniphy_serdes_tbl,
@@ -1864,7 +1539,6 @@ static const struct qmp_phy_cfg sdx65_usb3_uniphy_cfg = {
 };
 
 static const struct qmp_phy_cfg sdx75_usb3_uniphy_cfg = {
-       .lanes                  = 1,
        .offsets                = &qmp_usb_offsets_v6,
 
        .serdes_tbl             = sdx75_usb3_uniphy_serdes_tbl,
@@ -1886,8 +1560,6 @@ static const struct qmp_phy_cfg sdx75_usb3_uniphy_cfg = {
 };
 
 static const struct qmp_phy_cfg sm8350_usb3_uniphy_cfg = {
-       .lanes                  = 1,
-
        .offsets                = &qmp_usb_offsets_v5,
 
        .serdes_tbl             = sm8150_usb3_uniphy_serdes_tbl,
@@ -1908,27 +1580,7 @@ static const struct qmp_phy_cfg sm8350_usb3_uniphy_cfg = {
        .has_pwrdn_delay        = true,
 };
 
-static const struct qmp_phy_cfg qcm2290_usb3phy_cfg = {
-       .lanes                  = 2,
-
-       .offsets                = &qmp_usb_offsets_v3_qcm2290,
-
-       .serdes_tbl             = qcm2290_usb3_serdes_tbl,
-       .serdes_tbl_num         = ARRAY_SIZE(qcm2290_usb3_serdes_tbl),
-       .tx_tbl                 = qcm2290_usb3_tx_tbl,
-       .tx_tbl_num             = ARRAY_SIZE(qcm2290_usb3_tx_tbl),
-       .rx_tbl                 = qcm2290_usb3_rx_tbl,
-       .rx_tbl_num             = ARRAY_SIZE(qcm2290_usb3_rx_tbl),
-       .pcs_tbl                = qcm2290_usb3_pcs_tbl,
-       .pcs_tbl_num            = ARRAY_SIZE(qcm2290_usb3_pcs_tbl),
-       .vreg_list              = qmp_phy_vreg_l,
-       .num_vregs              = ARRAY_SIZE(qmp_phy_vreg_l),
-       .regs                   = qmp_v3_usb3phy_regs_layout_qcm2290,
-};
-
 static const struct qmp_phy_cfg x1e80100_usb3_uniphy_cfg = {
-       .lanes                  = 1,
-
        .offsets                = &qmp_usb_offsets_v7,
 
        .serdes_tbl             = x1e80100_usb3_uniphy_serdes_tbl,
@@ -1946,32 +1598,6 @@ static const struct qmp_phy_cfg x1e80100_usb3_uniphy_cfg = {
        .regs                   = qmp_v7_usb3phy_regs_layout,
 };
 
-static void qmp_usb_configure_lane(void __iomem *base,
-                                       const struct qmp_phy_init_tbl tbl[],
-                                       int num,
-                                       u8 lane_mask)
-{
-       int i;
-       const struct qmp_phy_init_tbl *t = tbl;
-
-       if (!t)
-               return;
-
-       for (i = 0; i < num; i++, t++) {
-               if (!(t->lane_mask & lane_mask))
-                       continue;
-
-               writel(t->val, base + t->offset);
-       }
-}
-
-static void qmp_usb_configure(void __iomem *base,
-                                  const struct qmp_phy_init_tbl tbl[],
-                                  int num)
-{
-       qmp_usb_configure_lane(base, tbl, num, 0xff);
-}
-
 static int qmp_usb_serdes_init(struct qmp_usb *qmp)
 {
        const struct qmp_phy_cfg *cfg = qmp->cfg;
@@ -1979,7 +1605,7 @@ static int qmp_usb_serdes_init(struct qmp_usb *qmp)
        const struct qmp_phy_init_tbl *serdes_tbl = cfg->serdes_tbl;
        int serdes_tbl_num = cfg->serdes_tbl_num;
 
-       qmp_usb_configure(serdes, serdes_tbl, serdes_tbl_num);
+       qmp_configure(serdes, serdes_tbl, serdes_tbl_num);
 
        return 0;
 }
@@ -2060,18 +1686,13 @@ static int qmp_usb_power_on(struct phy *phy)
        }
 
        /* Tx, Rx, and PCS configurations */
-       qmp_usb_configure_lane(tx, cfg->tx_tbl, cfg->tx_tbl_num, 1);
-       qmp_usb_configure_lane(rx, cfg->rx_tbl, cfg->rx_tbl_num, 1);
-
-       if (cfg->lanes >= 2) {
-               qmp_usb_configure_lane(qmp->tx2, cfg->tx_tbl, cfg->tx_tbl_num, 2);
-               qmp_usb_configure_lane(qmp->rx2, cfg->rx_tbl, cfg->rx_tbl_num, 2);
-       }
+       qmp_configure_lane(tx, cfg->tx_tbl, cfg->tx_tbl_num, 1);
+       qmp_configure_lane(rx, cfg->rx_tbl, cfg->rx_tbl_num, 1);
 
-       qmp_usb_configure(pcs, cfg->pcs_tbl, cfg->pcs_tbl_num);
+       qmp_configure(pcs, cfg->pcs_tbl, cfg->pcs_tbl_num);
 
        if (pcs_usb)
-               qmp_usb_configure(pcs_usb, cfg->pcs_usb_tbl, cfg->pcs_usb_tbl_num);
+               qmp_configure(pcs_usb, cfg->pcs_usb_tbl, cfg->pcs_usb_tbl_num);
 
        if (cfg->has_pwrdn_delay)
                usleep_range(10, 20);
@@ -2414,7 +2035,6 @@ static int qmp_usb_parse_dt_legacy(struct qmp_usb *qmp, struct device_node *np)
        /*
         * Get memory resources for the PHY:
         * Resources are indexed as: tx -> 0; rx -> 1; pcs -> 2.
-        * For dual lane PHYs: tx2 -> 3, rx2 -> 4, pcs_misc (optional) -> 5
         * For single lane PHYs: pcs_misc (optional) -> 3.
         */
        qmp->tx = devm_of_iomap(dev, np, 0, NULL);
@@ -2432,19 +2052,7 @@ static int qmp_usb_parse_dt_legacy(struct qmp_usb *qmp, struct device_node *np)
        if (cfg->pcs_usb_offset)
                qmp->pcs_usb = qmp->pcs + cfg->pcs_usb_offset;
 
-       if (cfg->lanes >= 2) {
-               qmp->tx2 = devm_of_iomap(dev, np, 3, NULL);
-               if (IS_ERR(qmp->tx2))
-                       return PTR_ERR(qmp->tx2);
-
-               qmp->rx2 = devm_of_iomap(dev, np, 4, NULL);
-               if (IS_ERR(qmp->rx2))
-                       return PTR_ERR(qmp->rx2);
-
-               qmp->pcs_misc = devm_of_iomap(dev, np, 5, NULL);
-       } else {
-               qmp->pcs_misc = devm_of_iomap(dev, np, 3, NULL);
-       }
+       qmp->pcs_misc = devm_of_iomap(dev, np, 3, NULL);
 
        if (IS_ERR(qmp->pcs_misc)) {
                dev_vdbg(dev, "PHY pcs_misc-reg not used\n");
@@ -2496,11 +2104,6 @@ static int qmp_usb_parse_dt(struct qmp_usb *qmp)
        qmp->tx = base + offs->tx;
        qmp->rx = base + offs->rx;
 
-       if (cfg->lanes >= 2) {
-               qmp->tx2 = base + offs->tx2;
-               qmp->rx2 = base + offs->rx2;
-       }
-
        ret = qmp_usb_clk_init(qmp);
        if (ret)
                return ret;
@@ -2599,12 +2202,6 @@ static const struct of_device_id qmp_usb_of_match_table[] = {
        }, {
                .compatible = "qcom,msm8996-qmp-usb3-phy",
                .data = &msm8996_usb3phy_cfg,
-       }, {
-               .compatible = "qcom,msm8998-qmp-usb3-phy",
-               .data = &msm8998_usb3phy_cfg,
-       }, {
-               .compatible = "qcom,qcm2290-qmp-usb3-phy",
-               .data = &qcm2290_usb3phy_cfg,
        }, {
                .compatible = "qcom,sa8775p-qmp-usb3-uni-phy",
                .data = &sa8775p_usb3_uniphy_cfg,
@@ -2623,9 +2220,6 @@ static const struct of_device_id qmp_usb_of_match_table[] = {
        }, {
                .compatible = "qcom,sdx75-qmp-usb3-uni-phy",
                .data = &sdx75_usb3_uniphy_cfg,
-       }, {
-               .compatible = "qcom,sm6115-qmp-usb3-phy",
-               .data = &qcm2290_usb3phy_cfg,
        }, {
                .compatible = "qcom,sm8150-qmp-usb3-uni-phy",
                .data = &sm8150_usb3_uniphy_cfg,
diff --git a/drivers/phy/qualcomm/phy-qcom-qmp-usbc.c b/drivers/phy/qualcomm/phy-qcom-qmp-usbc.c
new file mode 100644 (file)
index 0000000..5cbc5fd
--- /dev/null
@@ -0,0 +1,1149 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Copyright (c) 2017, The Linux Foundation. All rights reserved.
+ */
+
+#include <linux/clk.h>
+#include <linux/clk-provider.h>
+#include <linux/delay.h>
+#include <linux/err.h>
+#include <linux/io.h>
+#include <linux/iopoll.h>
+#include <linux/kernel.h>
+#include <linux/mfd/syscon.h>
+#include <linux/module.h>
+#include <linux/of.h>
+#include <linux/of_address.h>
+#include <linux/phy/phy.h>
+#include <linux/platform_device.h>
+#include <linux/regmap.h>
+#include <linux/regulator/consumer.h>
+#include <linux/reset.h>
+#include <linux/slab.h>
+#include <linux/usb/typec.h>
+#include <linux/usb/typec_mux.h>
+
+#include "phy-qcom-qmp-common.h"
+
+#include "phy-qcom-qmp.h"
+#include "phy-qcom-qmp-pcs-misc-v3.h"
+
+#define PHY_INIT_COMPLETE_TIMEOUT              10000
+
+/* set of registers with offsets different per-PHY */
+enum qphy_reg_layout {
+       /* PCS registers */
+       QPHY_SW_RESET,
+       QPHY_START_CTRL,
+       QPHY_PCS_STATUS,
+       QPHY_PCS_AUTONOMOUS_MODE_CTRL,
+       QPHY_PCS_LFPS_RXTERM_IRQ_CLEAR,
+       QPHY_PCS_POWER_DOWN_CONTROL,
+       /* Keep last to ensure regs_layout arrays are properly initialized */
+       QPHY_LAYOUT_SIZE
+};
+
+static const unsigned int qmp_v3_usb3phy_regs_layout[QPHY_LAYOUT_SIZE] = {
+       [QPHY_SW_RESET]                 = QPHY_V3_PCS_SW_RESET,
+       [QPHY_START_CTRL]               = QPHY_V3_PCS_START_CONTROL,
+       [QPHY_PCS_STATUS]               = QPHY_V3_PCS_PCS_STATUS,
+       [QPHY_PCS_AUTONOMOUS_MODE_CTRL] = QPHY_V3_PCS_AUTONOMOUS_MODE_CTRL,
+       [QPHY_PCS_LFPS_RXTERM_IRQ_CLEAR] = QPHY_V3_PCS_LFPS_RXTERM_IRQ_CLEAR,
+       [QPHY_PCS_POWER_DOWN_CONTROL]   = QPHY_V3_PCS_POWER_DOWN_CONTROL,
+};
+
+static const unsigned int qmp_v3_usb3phy_regs_layout_qcm2290[QPHY_LAYOUT_SIZE] = {
+       [QPHY_SW_RESET]                 = QPHY_V3_PCS_SW_RESET,
+       [QPHY_START_CTRL]               = QPHY_V3_PCS_START_CONTROL,
+       [QPHY_PCS_STATUS]               = QPHY_V3_PCS_PCS_STATUS,
+       [QPHY_PCS_AUTONOMOUS_MODE_CTRL] = QPHY_V3_PCS_AUTONOMOUS_MODE_CTRL,
+       [QPHY_PCS_LFPS_RXTERM_IRQ_CLEAR] = QPHY_V3_PCS_LFPS_RXTERM_IRQ_CLEAR,
+       [QPHY_PCS_POWER_DOWN_CONTROL]   = QPHY_V3_PCS_POWER_DOWN_CONTROL,
+};
+
+static const struct qmp_phy_init_tbl msm8998_usb3_serdes_tbl[] = {
+       QMP_PHY_INIT_CFG(QSERDES_V3_COM_CLK_SELECT, 0x30),
+       QMP_PHY_INIT_CFG(QSERDES_V3_COM_BIAS_EN_CLKBUFLR_EN, 0x04),
+       QMP_PHY_INIT_CFG(QSERDES_V3_COM_SYSCLK_EN_SEL, 0x14),
+       QMP_PHY_INIT_CFG(QSERDES_V3_COM_SYS_CLK_CTRL, 0x06),
+       QMP_PHY_INIT_CFG(QSERDES_V3_COM_RESETSM_CNTRL2, 0x08),
+       QMP_PHY_INIT_CFG(QSERDES_V3_COM_CMN_CONFIG, 0x06),
+       QMP_PHY_INIT_CFG(QSERDES_V3_COM_SVS_MODE_CLK_SEL, 0x01),
+       QMP_PHY_INIT_CFG(QSERDES_V3_COM_HSCLK_SEL, 0x80),
+       QMP_PHY_INIT_CFG(QSERDES_V3_COM_DEC_START_MODE0, 0x82),
+       QMP_PHY_INIT_CFG(QSERDES_V3_COM_DIV_FRAC_START1_MODE0, 0xab),
+       QMP_PHY_INIT_CFG(QSERDES_V3_COM_DIV_FRAC_START2_MODE0, 0xea),
+       QMP_PHY_INIT_CFG(QSERDES_V3_COM_DIV_FRAC_START3_MODE0, 0x02),
+       QMP_PHY_INIT_CFG(QSERDES_V3_COM_CP_CTRL_MODE0, 0x06),
+       QMP_PHY_INIT_CFG(QSERDES_V3_COM_PLL_RCTRL_MODE0, 0x16),
+       QMP_PHY_INIT_CFG(QSERDES_V3_COM_PLL_CCTRL_MODE0, 0x36),
+       QMP_PHY_INIT_CFG(QSERDES_V3_COM_INTEGLOOP_GAIN1_MODE0, 0x00),
+       QMP_PHY_INIT_CFG(QSERDES_V3_COM_INTEGLOOP_GAIN0_MODE0, 0x3f),
+       QMP_PHY_INIT_CFG(QSERDES_V3_COM_VCO_TUNE2_MODE0, 0x01),
+       QMP_PHY_INIT_CFG(QSERDES_V3_COM_VCO_TUNE1_MODE0, 0xc9),
+       QMP_PHY_INIT_CFG(QSERDES_V3_COM_CORECLK_DIV_MODE0, 0x0a),
+       QMP_PHY_INIT_CFG(QSERDES_V3_COM_LOCK_CMP3_MODE0, 0x00),
+       QMP_PHY_INIT_CFG(QSERDES_V3_COM_LOCK_CMP2_MODE0, 0x34),
+       QMP_PHY_INIT_CFG(QSERDES_V3_COM_LOCK_CMP1_MODE0, 0x15),
+       QMP_PHY_INIT_CFG(QSERDES_V3_COM_LOCK_CMP_EN, 0x04),
+       QMP_PHY_INIT_CFG(QSERDES_V3_COM_CORE_CLK_EN, 0x00),
+       QMP_PHY_INIT_CFG(QSERDES_V3_COM_LOCK_CMP_CFG, 0x00),
+       QMP_PHY_INIT_CFG(QSERDES_V3_COM_VCO_TUNE_MAP, 0x00),
+       QMP_PHY_INIT_CFG(QSERDES_V3_COM_BG_TIMER, 0x0a),
+       QMP_PHY_INIT_CFG(QSERDES_V3_COM_PLL_IVCO, 0x07),
+       QMP_PHY_INIT_CFG(QSERDES_V3_COM_INTEGLOOP_INITVAL, 0x80),
+       QMP_PHY_INIT_CFG(QSERDES_V3_COM_CMN_MODE, 0x01),
+       QMP_PHY_INIT_CFG(QSERDES_V3_COM_SSC_EN_CENTER, 0x01),
+       QMP_PHY_INIT_CFG(QSERDES_V3_COM_SSC_PER1, 0x31),
+       QMP_PHY_INIT_CFG(QSERDES_V3_COM_SSC_PER2, 0x01),
+       QMP_PHY_INIT_CFG(QSERDES_V3_COM_SSC_ADJ_PER1, 0x00),
+       QMP_PHY_INIT_CFG(QSERDES_V3_COM_SSC_ADJ_PER2, 0x00),
+       QMP_PHY_INIT_CFG(QSERDES_V3_COM_SSC_STEP_SIZE1, 0x85),
+       QMP_PHY_INIT_CFG(QSERDES_V3_COM_SSC_STEP_SIZE2, 0x07),
+};
+
+static const struct qmp_phy_init_tbl msm8998_usb3_tx_tbl[] = {
+       QMP_PHY_INIT_CFG(QSERDES_V3_TX_HIGHZ_DRVR_EN, 0x10),
+       QMP_PHY_INIT_CFG(QSERDES_V3_TX_RCV_DETECT_LVL_2, 0x12),
+       QMP_PHY_INIT_CFG(QSERDES_V3_TX_LANE_MODE_1, 0x16),
+       QMP_PHY_INIT_CFG(QSERDES_V3_TX_RES_CODE_LANE_OFFSET_TX, 0x00),
+};
+
+static const struct qmp_phy_init_tbl msm8998_usb3_rx_tbl[] = {
+       QMP_PHY_INIT_CFG(QSERDES_V3_RX_UCDR_FASTLOCK_FO_GAIN, 0x0b),
+       QMP_PHY_INIT_CFG(QSERDES_V3_RX_RX_EQU_ADAPTOR_CNTRL2, 0x0f),
+       QMP_PHY_INIT_CFG(QSERDES_V3_RX_RX_EQU_ADAPTOR_CNTRL3, 0x4e),
+       QMP_PHY_INIT_CFG(QSERDES_V3_RX_RX_EQU_ADAPTOR_CNTRL4, 0x18),
+       QMP_PHY_INIT_CFG(QSERDES_V3_RX_RX_EQ_OFFSET_ADAPTOR_CNTRL1, 0x07),
+       QMP_PHY_INIT_CFG(QSERDES_V3_RX_RX_OFFSET_ADAPTOR_CNTRL2, 0x80),
+       QMP_PHY_INIT_CFG(QSERDES_V3_RX_SIGDET_CNTRL, 0x43),
+       QMP_PHY_INIT_CFG(QSERDES_V3_RX_SIGDET_DEGLITCH_CNTRL, 0x1c),
+       QMP_PHY_INIT_CFG(QSERDES_V3_RX_UCDR_SO_SATURATION_AND_ENABLE, 0x75),
+       QMP_PHY_INIT_CFG(QSERDES_V3_RX_UCDR_FASTLOCK_COUNT_LOW, 0x00),
+       QMP_PHY_INIT_CFG(QSERDES_V3_RX_UCDR_FASTLOCK_COUNT_HIGH, 0x00),
+       QMP_PHY_INIT_CFG(QSERDES_V3_RX_UCDR_PI_CONTROLS, 0x80),
+       QMP_PHY_INIT_CFG(QSERDES_V3_RX_UCDR_FO_GAIN, 0x0a),
+       QMP_PHY_INIT_CFG(QSERDES_V3_RX_UCDR_SO_GAIN, 0x06),
+       QMP_PHY_INIT_CFG(QSERDES_V3_RX_SIGDET_ENABLES, 0x00),
+       QMP_PHY_INIT_CFG(QSERDES_V3_RX_VGA_CAL_CNTRL2, 0x03),
+       QMP_PHY_INIT_CFG(QSERDES_V3_RX_RX_MODE_00, 0x05),
+};
+
+static const struct qmp_phy_init_tbl msm8998_usb3_pcs_tbl[] = {
+       QMP_PHY_INIT_CFG(QPHY_V3_PCS_FLL_CNTRL2, 0x83),
+       QMP_PHY_INIT_CFG(QPHY_V3_PCS_FLL_CNT_VAL_L, 0x09),
+       QMP_PHY_INIT_CFG(QPHY_V3_PCS_FLL_CNT_VAL_H_TOL, 0xa2),
+       QMP_PHY_INIT_CFG(QPHY_V3_PCS_FLL_MAN_CODE, 0x40),
+       QMP_PHY_INIT_CFG(QPHY_V3_PCS_FLL_CNTRL1, 0x02),
+       QMP_PHY_INIT_CFG(QPHY_V3_PCS_LOCK_DETECT_CONFIG1, 0xd1),
+       QMP_PHY_INIT_CFG(QPHY_V3_PCS_LOCK_DETECT_CONFIG2, 0x1f),
+       QMP_PHY_INIT_CFG(QPHY_V3_PCS_LOCK_DETECT_CONFIG3, 0x47),
+       QMP_PHY_INIT_CFG(QPHY_V3_PCS_POWER_STATE_CONFIG2, 0x1b),
+       QMP_PHY_INIT_CFG(QPHY_V3_PCS_TXMGN_V0, 0x9f),
+       QMP_PHY_INIT_CFG(QPHY_V3_PCS_TXMGN_V1, 0x9f),
+       QMP_PHY_INIT_CFG(QPHY_V3_PCS_TXMGN_V2, 0xb7),
+       QMP_PHY_INIT_CFG(QPHY_V3_PCS_TXMGN_V3, 0x4e),
+       QMP_PHY_INIT_CFG(QPHY_V3_PCS_TXMGN_V4, 0x65),
+       QMP_PHY_INIT_CFG(QPHY_V3_PCS_TXMGN_LS, 0x6b),
+       QMP_PHY_INIT_CFG(QPHY_V3_PCS_TXDEEMPH_M6DB_V0, 0x15),
+       QMP_PHY_INIT_CFG(QPHY_V3_PCS_TXDEEMPH_M3P5DB_V0, 0x0d),
+       QMP_PHY_INIT_CFG(QPHY_V3_PCS_TXDEEMPH_M6DB_V1, 0x15),
+       QMP_PHY_INIT_CFG(QPHY_V3_PCS_TXDEEMPH_M3P5DB_V1, 0x0d),
+       QMP_PHY_INIT_CFG(QPHY_V3_PCS_TXDEEMPH_M6DB_V2, 0x15),
+       QMP_PHY_INIT_CFG(QPHY_V3_PCS_TXDEEMPH_M3P5DB_V2, 0x0d),
+       QMP_PHY_INIT_CFG(QPHY_V3_PCS_TXDEEMPH_M6DB_V3, 0x15),
+       QMP_PHY_INIT_CFG(QPHY_V3_PCS_TXDEEMPH_M3P5DB_V3, 0x0d),
+       QMP_PHY_INIT_CFG(QPHY_V3_PCS_TXDEEMPH_M6DB_V4, 0x15),
+       QMP_PHY_INIT_CFG(QPHY_V3_PCS_TXDEEMPH_M3P5DB_V4, 0x0d),
+       QMP_PHY_INIT_CFG(QPHY_V3_PCS_TXDEEMPH_M6DB_LS, 0x15),
+       QMP_PHY_INIT_CFG(QPHY_V3_PCS_TXDEEMPH_M3P5DB_LS, 0x0d),
+       QMP_PHY_INIT_CFG(QPHY_V3_PCS_RATE_SLEW_CNTRL, 0x02),
+       QMP_PHY_INIT_CFG(QPHY_V3_PCS_PWRUP_RESET_DLY_TIME_AUXCLK, 0x04),
+       QMP_PHY_INIT_CFG(QPHY_V3_PCS_TSYNC_RSYNC_TIME, 0x44),
+       QMP_PHY_INIT_CFG(QPHY_V3_PCS_RCVR_DTCT_DLY_P1U2_L, 0xe7),
+       QMP_PHY_INIT_CFG(QPHY_V3_PCS_RCVR_DTCT_DLY_P1U2_H, 0x03),
+       QMP_PHY_INIT_CFG(QPHY_V3_PCS_RCVR_DTCT_DLY_U3_L, 0x40),
+       QMP_PHY_INIT_CFG(QPHY_V3_PCS_RCVR_DTCT_DLY_U3_H, 0x00),
+       QMP_PHY_INIT_CFG(QPHY_V3_PCS_RX_SIGDET_LVL, 0x8a),
+       QMP_PHY_INIT_CFG(QPHY_V3_PCS_RXEQTRAINING_WAIT_TIME, 0x75),
+       QMP_PHY_INIT_CFG(QPHY_V3_PCS_LFPS_TX_ECSTART_EQTLOCK, 0x86),
+       QMP_PHY_INIT_CFG(QPHY_V3_PCS_RXEQTRAINING_RUN_TIME, 0x13),
+};
+
+static const struct qmp_phy_init_tbl qcm2290_usb3_serdes_tbl[] = {
+       QMP_PHY_INIT_CFG(QSERDES_COM_SYSCLK_EN_SEL, 0x14),
+       QMP_PHY_INIT_CFG(QSERDES_COM_BIAS_EN_CLKBUFLR_EN, 0x08),
+       QMP_PHY_INIT_CFG(QSERDES_COM_CLK_SELECT, 0x30),
+       QMP_PHY_INIT_CFG(QSERDES_COM_SYS_CLK_CTRL, 0x06),
+       QMP_PHY_INIT_CFG(QSERDES_COM_RESETSM_CNTRL, 0x00),
+       QMP_PHY_INIT_CFG(QSERDES_COM_RESETSM_CNTRL2, 0x08),
+       QMP_PHY_INIT_CFG(QSERDES_COM_BG_TRIM, 0x0f),
+       QMP_PHY_INIT_CFG(QSERDES_COM_SVS_MODE_CLK_SEL, 0x01),
+       QMP_PHY_INIT_CFG(QSERDES_COM_HSCLK_SEL, 0x00),
+       QMP_PHY_INIT_CFG(QSERDES_COM_DEC_START_MODE0, 0x82),
+       QMP_PHY_INIT_CFG(QSERDES_COM_DIV_FRAC_START1_MODE0, 0x55),
+       QMP_PHY_INIT_CFG(QSERDES_COM_DIV_FRAC_START2_MODE0, 0x55),
+       QMP_PHY_INIT_CFG(QSERDES_COM_DIV_FRAC_START3_MODE0, 0x03),
+       QMP_PHY_INIT_CFG(QSERDES_COM_CP_CTRL_MODE0, 0x0b),
+       QMP_PHY_INIT_CFG(QSERDES_COM_PLL_RCTRL_MODE0, 0x16),
+       QMP_PHY_INIT_CFG(QSERDES_COM_PLL_CCTRL_MODE0, 0x28),
+       QMP_PHY_INIT_CFG(QSERDES_COM_INTEGLOOP_GAIN0_MODE0, 0x80),
+       QMP_PHY_INIT_CFG(QSERDES_COM_INTEGLOOP_GAIN1_MODE0, 0x00),
+       QMP_PHY_INIT_CFG(QSERDES_COM_CORECLK_DIV, 0x0a),
+       QMP_PHY_INIT_CFG(QSERDES_COM_LOCK_CMP1_MODE0, 0x15),
+       QMP_PHY_INIT_CFG(QSERDES_COM_LOCK_CMP2_MODE0, 0x34),
+       QMP_PHY_INIT_CFG(QSERDES_COM_LOCK_CMP3_MODE0, 0x00),
+       QMP_PHY_INIT_CFG(QSERDES_COM_LOCK_CMP_EN, 0x00),
+       QMP_PHY_INIT_CFG(QSERDES_COM_CORE_CLK_EN, 0x00),
+       QMP_PHY_INIT_CFG(QSERDES_COM_LOCK_CMP_CFG, 0x00),
+       QMP_PHY_INIT_CFG(QSERDES_COM_VCO_TUNE_MAP, 0x00),
+       QMP_PHY_INIT_CFG(QSERDES_COM_BG_TIMER, 0x0a),
+       QMP_PHY_INIT_CFG(QSERDES_COM_SSC_EN_CENTER, 0x01),
+       QMP_PHY_INIT_CFG(QSERDES_COM_SSC_PER1, 0x31),
+       QMP_PHY_INIT_CFG(QSERDES_COM_SSC_PER2, 0x01),
+       QMP_PHY_INIT_CFG(QSERDES_COM_SSC_ADJ_PER1, 0x00),
+       QMP_PHY_INIT_CFG(QSERDES_COM_SSC_ADJ_PER2, 0x00),
+       QMP_PHY_INIT_CFG(QSERDES_COM_SSC_STEP_SIZE1, 0xde),
+       QMP_PHY_INIT_CFG(QSERDES_COM_SSC_STEP_SIZE2, 0x07),
+       QMP_PHY_INIT_CFG(QSERDES_COM_PLL_IVCO, 0x0f),
+       QMP_PHY_INIT_CFG(QSERDES_COM_CMN_CONFIG, 0x06),
+       QMP_PHY_INIT_CFG(QSERDES_COM_INTEGLOOP_INITVAL, 0x80),
+       QMP_PHY_INIT_CFG(QSERDES_COM_BIAS_EN_CTRL_BY_PSM, 0x01),
+};
+
+static const struct qmp_phy_init_tbl qcm2290_usb3_tx_tbl[] = {
+       QMP_PHY_INIT_CFG(QSERDES_V3_TX_HIGHZ_DRVR_EN, 0x10),
+       QMP_PHY_INIT_CFG(QSERDES_V3_TX_RCV_DETECT_LVL_2, 0x12),
+       QMP_PHY_INIT_CFG(QSERDES_V3_TX_LANE_MODE_1, 0xc6),
+       QMP_PHY_INIT_CFG(QSERDES_V3_TX_RES_CODE_LANE_OFFSET_TX, 0x00),
+       QMP_PHY_INIT_CFG(QSERDES_V3_TX_RES_CODE_LANE_OFFSET_RX, 0x00),
+};
+
+static const struct qmp_phy_init_tbl qcm2290_usb3_rx_tbl[] = {
+       QMP_PHY_INIT_CFG(QSERDES_V3_RX_UCDR_FASTLOCK_FO_GAIN, 0x0b),
+       QMP_PHY_INIT_CFG(QSERDES_V3_RX_UCDR_PI_CONTROLS, 0x80),
+       QMP_PHY_INIT_CFG(QSERDES_V3_RX_UCDR_FASTLOCK_COUNT_LOW, 0x00),
+       QMP_PHY_INIT_CFG(QSERDES_V3_RX_UCDR_FASTLOCK_COUNT_HIGH, 0x00),
+       QMP_PHY_INIT_CFG(QSERDES_V3_RX_UCDR_FO_GAIN, 0x0a),
+       QMP_PHY_INIT_CFG(QSERDES_V3_RX_UCDR_SO_GAIN, 0x06),
+       QMP_PHY_INIT_CFG(QSERDES_V3_RX_UCDR_SO_SATURATION_AND_ENABLE, 0x75),
+       QMP_PHY_INIT_CFG(QSERDES_V3_RX_RX_EQU_ADAPTOR_CNTRL2, 0x02),
+       QMP_PHY_INIT_CFG(QSERDES_V3_RX_RX_EQU_ADAPTOR_CNTRL3, 0x4e),
+       QMP_PHY_INIT_CFG(QSERDES_V3_RX_RX_EQU_ADAPTOR_CNTRL4, 0x18),
+       QMP_PHY_INIT_CFG(QSERDES_V3_RX_RX_EQ_OFFSET_ADAPTOR_CNTRL1, 0x77),
+       QMP_PHY_INIT_CFG(QSERDES_V3_RX_RX_OFFSET_ADAPTOR_CNTRL2, 0x80),
+       QMP_PHY_INIT_CFG(QSERDES_V3_RX_VGA_CAL_CNTRL2, 0x0a),
+       QMP_PHY_INIT_CFG(QSERDES_V3_RX_SIGDET_CNTRL, 0x03),
+       QMP_PHY_INIT_CFG(QSERDES_V3_RX_SIGDET_DEGLITCH_CNTRL, 0x16),
+       QMP_PHY_INIT_CFG(QSERDES_V3_RX_SIGDET_ENABLES, 0x00),
+       QMP_PHY_INIT_CFG(QSERDES_V3_RX_RX_MODE_00, 0x00),
+};
+
+/* the only difference is QSERDES_V3_RX_UCDR_PI_CONTROLS */
+static const struct qmp_phy_init_tbl sdm660_usb3_rx_tbl[] = {
+       QMP_PHY_INIT_CFG(QSERDES_V3_RX_UCDR_FASTLOCK_FO_GAIN, 0x0b),
+       QMP_PHY_INIT_CFG(QSERDES_V3_RX_UCDR_PI_CONTROLS, 0x00),
+       QMP_PHY_INIT_CFG(QSERDES_V3_RX_UCDR_FASTLOCK_COUNT_LOW, 0x00),
+       QMP_PHY_INIT_CFG(QSERDES_V3_RX_UCDR_FASTLOCK_COUNT_HIGH, 0x00),
+       QMP_PHY_INIT_CFG(QSERDES_V3_RX_UCDR_FO_GAIN, 0x0a),
+       QMP_PHY_INIT_CFG(QSERDES_V3_RX_UCDR_SO_GAIN, 0x06),
+       QMP_PHY_INIT_CFG(QSERDES_V3_RX_UCDR_SO_SATURATION_AND_ENABLE, 0x75),
+       QMP_PHY_INIT_CFG(QSERDES_V3_RX_RX_EQU_ADAPTOR_CNTRL2, 0x02),
+       QMP_PHY_INIT_CFG(QSERDES_V3_RX_RX_EQU_ADAPTOR_CNTRL3, 0x4e),
+       QMP_PHY_INIT_CFG(QSERDES_V3_RX_RX_EQU_ADAPTOR_CNTRL4, 0x18),
+       QMP_PHY_INIT_CFG(QSERDES_V3_RX_RX_EQ_OFFSET_ADAPTOR_CNTRL1, 0x77),
+       QMP_PHY_INIT_CFG(QSERDES_V3_RX_RX_OFFSET_ADAPTOR_CNTRL2, 0x80),
+       QMP_PHY_INIT_CFG(QSERDES_V3_RX_VGA_CAL_CNTRL2, 0x0a),
+       QMP_PHY_INIT_CFG(QSERDES_V3_RX_SIGDET_CNTRL, 0x03),
+       QMP_PHY_INIT_CFG(QSERDES_V3_RX_SIGDET_DEGLITCH_CNTRL, 0x16),
+       QMP_PHY_INIT_CFG(QSERDES_V3_RX_SIGDET_ENABLES, 0x00),
+       QMP_PHY_INIT_CFG(QSERDES_V3_RX_RX_MODE_00, 0x00),
+};
+
+static const struct qmp_phy_init_tbl qcm2290_usb3_pcs_tbl[] = {
+       QMP_PHY_INIT_CFG(QPHY_V3_PCS_TXMGN_V0, 0x9f),
+       QMP_PHY_INIT_CFG(QPHY_V3_PCS_TXDEEMPH_M6DB_V0, 0x17),
+       QMP_PHY_INIT_CFG(QPHY_V3_PCS_TXDEEMPH_M3P5DB_V0, 0x0f),
+       QMP_PHY_INIT_CFG(QPHY_V3_PCS_FLL_CNTRL2, 0x83),
+       QMP_PHY_INIT_CFG(QPHY_V3_PCS_FLL_CNTRL1, 0x02),
+       QMP_PHY_INIT_CFG(QPHY_V3_PCS_FLL_CNT_VAL_L, 0x09),
+       QMP_PHY_INIT_CFG(QPHY_V3_PCS_FLL_CNT_VAL_H_TOL, 0xa2),
+       QMP_PHY_INIT_CFG(QPHY_V3_PCS_FLL_MAN_CODE, 0x85),
+       QMP_PHY_INIT_CFG(QPHY_V3_PCS_LOCK_DETECT_CONFIG1, 0xd1),
+       QMP_PHY_INIT_CFG(QPHY_V3_PCS_LOCK_DETECT_CONFIG2, 0x1f),
+       QMP_PHY_INIT_CFG(QPHY_V3_PCS_LOCK_DETECT_CONFIG3, 0x47),
+       QMP_PHY_INIT_CFG(QPHY_V3_PCS_RXEQTRAINING_WAIT_TIME, 0x75),
+       QMP_PHY_INIT_CFG(QPHY_V3_PCS_RXEQTRAINING_RUN_TIME, 0x13),
+       QMP_PHY_INIT_CFG(QPHY_V3_PCS_LFPS_TX_ECSTART_EQTLOCK, 0x86),
+       QMP_PHY_INIT_CFG(QPHY_V3_PCS_PWRUP_RESET_DLY_TIME_AUXCLK, 0x04),
+       QMP_PHY_INIT_CFG(QPHY_V3_PCS_TSYNC_RSYNC_TIME, 0x44),
+       QMP_PHY_INIT_CFG(QPHY_V3_PCS_RCVR_DTCT_DLY_P1U2_L, 0xe7),
+       QMP_PHY_INIT_CFG(QPHY_V3_PCS_RCVR_DTCT_DLY_P1U2_H, 0x03),
+       QMP_PHY_INIT_CFG(QPHY_V3_PCS_RCVR_DTCT_DLY_U3_L, 0x40),
+       QMP_PHY_INIT_CFG(QPHY_V3_PCS_RCVR_DTCT_DLY_U3_H, 0x00),
+       QMP_PHY_INIT_CFG(QPHY_V3_PCS_RX_SIGDET_LVL, 0x88),
+};
+
+struct qmp_usbc_offsets {
+       u16 serdes;
+       u16 pcs;
+       u16 pcs_misc;
+       u16 tx;
+       u16 rx;
+       /* for PHYs with >= 2 lanes */
+       u16 tx2;
+       u16 rx2;
+};
+
+/* struct qmp_phy_cfg - per-PHY initialization config */
+struct qmp_phy_cfg {
+       const struct qmp_usbc_offsets *offsets;
+
+       /* Init sequence for PHY blocks - serdes, tx, rx, pcs */
+       const struct qmp_phy_init_tbl *serdes_tbl;
+       int serdes_tbl_num;
+       const struct qmp_phy_init_tbl *tx_tbl;
+       int tx_tbl_num;
+       const struct qmp_phy_init_tbl *rx_tbl;
+       int rx_tbl_num;
+       const struct qmp_phy_init_tbl *pcs_tbl;
+       int pcs_tbl_num;
+
+       /* regulators to be requested */
+       const char * const *vreg_list;
+       int num_vregs;
+
+       /* array of registers with different offsets */
+       const unsigned int *regs;
+};
+
+struct qmp_usbc {
+       struct device *dev;
+
+       const struct qmp_phy_cfg *cfg;
+
+       void __iomem *serdes;
+       void __iomem *pcs;
+       void __iomem *pcs_misc;
+       void __iomem *tx;
+       void __iomem *rx;
+       void __iomem *tx2;
+       void __iomem *rx2;
+
+       struct regmap *tcsr_map;
+       u32 vls_clamp_reg;
+
+       struct clk *pipe_clk;
+       struct clk_bulk_data *clks;
+       int num_clks;
+       int num_resets;
+       struct reset_control_bulk_data *resets;
+       struct regulator_bulk_data *vregs;
+
+       struct mutex phy_mutex;
+
+       enum phy_mode mode;
+       unsigned int usb_init_count;
+
+       struct phy *phy;
+
+       struct clk_fixed_rate pipe_clk_fixed;
+
+       struct typec_switch_dev *sw;
+       enum typec_orientation orientation;
+};
+
+static inline void qphy_setbits(void __iomem *base, u32 offset, u32 val)
+{
+       u32 reg;
+
+       reg = readl(base + offset);
+       reg |= val;
+       writel(reg, base + offset);
+
+       /* ensure that above write is through */
+       readl(base + offset);
+}
+
+static inline void qphy_clrbits(void __iomem *base, u32 offset, u32 val)
+{
+       u32 reg;
+
+       reg = readl(base + offset);
+       reg &= ~val;
+       writel(reg, base + offset);
+
+       /* ensure that above write is through */
+       readl(base + offset);
+}
+
+/* list of clocks required by phy */
+static const char * const qmp_usbc_phy_clk_l[] = {
+       "aux", "cfg_ahb", "ref", "com_aux",
+};
+
+/* list of resets */
+static const char * const usb3phy_legacy_reset_l[] = {
+       "phy", "common",
+};
+
+static const char * const usb3phy_reset_l[] = {
+       "phy_phy", "phy",
+};
+
+/* list of regulators */
+static const char * const qmp_phy_vreg_l[] = {
+       "vdda-phy", "vdda-pll",
+};
+
+static const struct qmp_usbc_offsets qmp_usbc_offsets_v3_qcm2290 = {
+       .serdes         = 0x0,
+       .pcs            = 0xc00,
+       .pcs_misc       = 0xa00,
+       .tx             = 0x200,
+       .rx             = 0x400,
+       .tx2            = 0x600,
+       .rx2            = 0x800,
+};
+
+static const struct qmp_phy_cfg msm8998_usb3phy_cfg = {
+       .offsets                = &qmp_usbc_offsets_v3_qcm2290,
+
+       .serdes_tbl             = msm8998_usb3_serdes_tbl,
+       .serdes_tbl_num         = ARRAY_SIZE(msm8998_usb3_serdes_tbl),
+       .tx_tbl                 = msm8998_usb3_tx_tbl,
+       .tx_tbl_num             = ARRAY_SIZE(msm8998_usb3_tx_tbl),
+       .rx_tbl                 = msm8998_usb3_rx_tbl,
+       .rx_tbl_num             = ARRAY_SIZE(msm8998_usb3_rx_tbl),
+       .pcs_tbl                = msm8998_usb3_pcs_tbl,
+       .pcs_tbl_num            = ARRAY_SIZE(msm8998_usb3_pcs_tbl),
+       .vreg_list              = qmp_phy_vreg_l,
+       .num_vregs              = ARRAY_SIZE(qmp_phy_vreg_l),
+       .regs                   = qmp_v3_usb3phy_regs_layout,
+};
+
+static const struct qmp_phy_cfg qcm2290_usb3phy_cfg = {
+       .offsets                = &qmp_usbc_offsets_v3_qcm2290,
+
+       .serdes_tbl             = qcm2290_usb3_serdes_tbl,
+       .serdes_tbl_num         = ARRAY_SIZE(qcm2290_usb3_serdes_tbl),
+       .tx_tbl                 = qcm2290_usb3_tx_tbl,
+       .tx_tbl_num             = ARRAY_SIZE(qcm2290_usb3_tx_tbl),
+       .rx_tbl                 = qcm2290_usb3_rx_tbl,
+       .rx_tbl_num             = ARRAY_SIZE(qcm2290_usb3_rx_tbl),
+       .pcs_tbl                = qcm2290_usb3_pcs_tbl,
+       .pcs_tbl_num            = ARRAY_SIZE(qcm2290_usb3_pcs_tbl),
+       .vreg_list              = qmp_phy_vreg_l,
+       .num_vregs              = ARRAY_SIZE(qmp_phy_vreg_l),
+       .regs                   = qmp_v3_usb3phy_regs_layout_qcm2290,
+};
+
+static const struct qmp_phy_cfg sdm660_usb3phy_cfg = {
+       .offsets                = &qmp_usbc_offsets_v3_qcm2290,
+
+       .serdes_tbl             = qcm2290_usb3_serdes_tbl,
+       .serdes_tbl_num         = ARRAY_SIZE(qcm2290_usb3_serdes_tbl),
+       .tx_tbl                 = qcm2290_usb3_tx_tbl,
+       .tx_tbl_num             = ARRAY_SIZE(qcm2290_usb3_tx_tbl),
+       .rx_tbl                 = sdm660_usb3_rx_tbl,
+       .rx_tbl_num             = ARRAY_SIZE(sdm660_usb3_rx_tbl),
+       .pcs_tbl                = qcm2290_usb3_pcs_tbl,
+       .pcs_tbl_num            = ARRAY_SIZE(qcm2290_usb3_pcs_tbl),
+       .vreg_list              = qmp_phy_vreg_l,
+       .num_vregs              = ARRAY_SIZE(qmp_phy_vreg_l),
+       .regs                   = qmp_v3_usb3phy_regs_layout_qcm2290,
+};
+
+static int qmp_usbc_init(struct phy *phy)
+{
+       struct qmp_usbc *qmp = phy_get_drvdata(phy);
+       const struct qmp_phy_cfg *cfg = qmp->cfg;
+       void __iomem *pcs = qmp->pcs;
+       u32 val = 0;
+       int ret;
+
+       ret = regulator_bulk_enable(cfg->num_vregs, qmp->vregs);
+       if (ret) {
+               dev_err(qmp->dev, "failed to enable regulators, err=%d\n", ret);
+               return ret;
+       }
+
+       ret = reset_control_bulk_assert(qmp->num_resets, qmp->resets);
+       if (ret) {
+               dev_err(qmp->dev, "reset assert failed\n");
+               goto err_disable_regulators;
+       }
+
+       ret = reset_control_bulk_deassert(qmp->num_resets, qmp->resets);
+       if (ret) {
+               dev_err(qmp->dev, "reset deassert failed\n");
+               goto err_disable_regulators;
+       }
+
+       ret = clk_bulk_prepare_enable(qmp->num_clks, qmp->clks);
+       if (ret)
+               goto err_assert_reset;
+
+       qphy_setbits(pcs, cfg->regs[QPHY_PCS_POWER_DOWN_CONTROL], SW_PWRDN);
+
+#define SW_PORTSELECT_VAL                      BIT(0)
+#define SW_PORTSELECT_MUX                      BIT(1)
+       /* Use software based port select and switch on typec orientation */
+       val = SW_PORTSELECT_MUX;
+       if (qmp->orientation == TYPEC_ORIENTATION_REVERSE)
+               val |= SW_PORTSELECT_VAL;
+       writel(val, qmp->pcs_misc);
+
+       return 0;
+
+err_assert_reset:
+       reset_control_bulk_assert(qmp->num_resets, qmp->resets);
+err_disable_regulators:
+       regulator_bulk_disable(cfg->num_vregs, qmp->vregs);
+
+       return ret;
+}
+
+static int qmp_usbc_exit(struct phy *phy)
+{
+       struct qmp_usbc *qmp = phy_get_drvdata(phy);
+       const struct qmp_phy_cfg *cfg = qmp->cfg;
+
+       reset_control_bulk_assert(qmp->num_resets, qmp->resets);
+
+       clk_bulk_disable_unprepare(qmp->num_clks, qmp->clks);
+
+       regulator_bulk_disable(cfg->num_vregs, qmp->vregs);
+
+       return 0;
+}
+
+static int qmp_usbc_power_on(struct phy *phy)
+{
+       struct qmp_usbc *qmp = phy_get_drvdata(phy);
+       const struct qmp_phy_cfg *cfg = qmp->cfg;
+       void __iomem *status;
+       unsigned int val;
+       int ret;
+
+       qmp_configure(qmp->serdes, cfg->serdes_tbl, cfg->serdes_tbl_num);
+
+       ret = clk_prepare_enable(qmp->pipe_clk);
+       if (ret) {
+               dev_err(qmp->dev, "pipe_clk enable failed err=%d\n", ret);
+               return ret;
+       }
+
+       /* Tx, Rx, and PCS configurations */
+       qmp_configure_lane(qmp->tx, cfg->tx_tbl, cfg->tx_tbl_num, 1);
+       qmp_configure_lane(qmp->rx, cfg->rx_tbl, cfg->rx_tbl_num, 1);
+
+       qmp_configure_lane(qmp->tx2, cfg->tx_tbl, cfg->tx_tbl_num, 2);
+       qmp_configure_lane(qmp->rx2, cfg->rx_tbl, cfg->rx_tbl_num, 2);
+
+       qmp_configure(qmp->pcs, cfg->pcs_tbl, cfg->pcs_tbl_num);
+
+       /* Pull PHY out of reset state */
+       qphy_clrbits(qmp->pcs, cfg->regs[QPHY_SW_RESET], SW_RESET);
+
+       /* start SerDes and Phy-Coding-Sublayer */
+       qphy_setbits(qmp->pcs, cfg->regs[QPHY_START_CTRL], SERDES_START | PCS_START);
+
+       status = qmp->pcs + cfg->regs[QPHY_PCS_STATUS];
+       ret = readl_poll_timeout(status, val, !(val & PHYSTATUS), 200,
+                                PHY_INIT_COMPLETE_TIMEOUT);
+       if (ret) {
+               dev_err(qmp->dev, "phy initialization timed-out\n");
+               goto err_disable_pipe_clk;
+       }
+
+       return 0;
+
+err_disable_pipe_clk:
+       clk_disable_unprepare(qmp->pipe_clk);
+
+       return ret;
+}
+
+static int qmp_usbc_power_off(struct phy *phy)
+{
+       struct qmp_usbc *qmp = phy_get_drvdata(phy);
+       const struct qmp_phy_cfg *cfg = qmp->cfg;
+
+       clk_disable_unprepare(qmp->pipe_clk);
+
+       /* PHY reset */
+       qphy_setbits(qmp->pcs, cfg->regs[QPHY_SW_RESET], SW_RESET);
+
+       /* stop SerDes and Phy-Coding-Sublayer */
+       qphy_clrbits(qmp->pcs, cfg->regs[QPHY_START_CTRL],
+                       SERDES_START | PCS_START);
+
+       /* Put PHY into POWER DOWN state: active low */
+       qphy_clrbits(qmp->pcs, cfg->regs[QPHY_PCS_POWER_DOWN_CONTROL],
+                       SW_PWRDN);
+
+       return 0;
+}
+
+static int qmp_usbc_enable(struct phy *phy)
+{
+       struct qmp_usbc *qmp = phy_get_drvdata(phy);
+       int ret;
+
+       mutex_lock(&qmp->phy_mutex);
+
+       ret = qmp_usbc_init(phy);
+       if (ret)
+               goto out_unlock;
+
+       ret = qmp_usbc_power_on(phy);
+       if (ret) {
+               qmp_usbc_exit(phy);
+               goto out_unlock;
+       }
+
+       qmp->usb_init_count++;
+out_unlock:
+       mutex_unlock(&qmp->phy_mutex);
+
+       return ret;
+}
+
+static int qmp_usbc_disable(struct phy *phy)
+{
+       struct qmp_usbc *qmp = phy_get_drvdata(phy);
+       int ret;
+
+       qmp->usb_init_count--;
+       ret = qmp_usbc_power_off(phy);
+       if (ret)
+               return ret;
+       return qmp_usbc_exit(phy);
+}
+
+static int qmp_usbc_set_mode(struct phy *phy, enum phy_mode mode, int submode)
+{
+       struct qmp_usbc *qmp = phy_get_drvdata(phy);
+
+       qmp->mode = mode;
+
+       return 0;
+}
+
+static const struct phy_ops qmp_usbc_phy_ops = {
+       .init           = qmp_usbc_enable,
+       .exit           = qmp_usbc_disable,
+       .set_mode       = qmp_usbc_set_mode,
+       .owner          = THIS_MODULE,
+};
+
+static void qmp_usbc_enable_autonomous_mode(struct qmp_usbc *qmp)
+{
+       const struct qmp_phy_cfg *cfg = qmp->cfg;
+       void __iomem *pcs = qmp->pcs;
+       u32 intr_mask;
+
+       if (qmp->mode == PHY_MODE_USB_HOST_SS ||
+           qmp->mode == PHY_MODE_USB_DEVICE_SS)
+               intr_mask = ARCVR_DTCT_EN | ALFPS_DTCT_EN;
+       else
+               intr_mask = ARCVR_DTCT_EN | ARCVR_DTCT_EVENT_SEL;
+
+       /* Clear any pending interrupts status */
+       qphy_setbits(pcs, cfg->regs[QPHY_PCS_LFPS_RXTERM_IRQ_CLEAR], IRQ_CLEAR);
+       /* Writing 1 followed by 0 clears the interrupt */
+       qphy_clrbits(pcs, cfg->regs[QPHY_PCS_LFPS_RXTERM_IRQ_CLEAR], IRQ_CLEAR);
+
+       qphy_clrbits(pcs, cfg->regs[QPHY_PCS_AUTONOMOUS_MODE_CTRL],
+                    ARCVR_DTCT_EN | ALFPS_DTCT_EN | ARCVR_DTCT_EVENT_SEL);
+
+       /* Enable required PHY autonomous mode interrupts */
+       qphy_setbits(pcs, cfg->regs[QPHY_PCS_AUTONOMOUS_MODE_CTRL], intr_mask);
+
+       /* Enable i/o clamp_n for autonomous mode */
+       if (qmp->tcsr_map && qmp->vls_clamp_reg)
+               regmap_write(qmp->tcsr_map, qmp->vls_clamp_reg, 1);
+}
+
+static void qmp_usbc_disable_autonomous_mode(struct qmp_usbc *qmp)
+{
+       const struct qmp_phy_cfg *cfg = qmp->cfg;
+       void __iomem *pcs = qmp->pcs;
+
+       /* Disable i/o clamp_n on resume for normal mode */
+       if (qmp->tcsr_map && qmp->vls_clamp_reg)
+               regmap_write(qmp->tcsr_map, qmp->vls_clamp_reg, 0);
+
+       qphy_clrbits(pcs, cfg->regs[QPHY_PCS_AUTONOMOUS_MODE_CTRL],
+                    ARCVR_DTCT_EN | ARCVR_DTCT_EVENT_SEL | ALFPS_DTCT_EN);
+
+       qphy_setbits(pcs, cfg->regs[QPHY_PCS_LFPS_RXTERM_IRQ_CLEAR], IRQ_CLEAR);
+       /* Writing 1 followed by 0 clears the interrupt */
+       qphy_clrbits(pcs, cfg->regs[QPHY_PCS_LFPS_RXTERM_IRQ_CLEAR], IRQ_CLEAR);
+}
+
+static int __maybe_unused qmp_usbc_runtime_suspend(struct device *dev)
+{
+       struct qmp_usbc *qmp = dev_get_drvdata(dev);
+
+       dev_vdbg(dev, "Suspending QMP phy, mode:%d\n", qmp->mode);
+
+       if (!qmp->phy->init_count) {
+               dev_vdbg(dev, "PHY not initialized, bailing out\n");
+               return 0;
+       }
+
+       qmp_usbc_enable_autonomous_mode(qmp);
+
+       clk_disable_unprepare(qmp->pipe_clk);
+       clk_bulk_disable_unprepare(qmp->num_clks, qmp->clks);
+
+       return 0;
+}
+
+static int __maybe_unused qmp_usbc_runtime_resume(struct device *dev)
+{
+       struct qmp_usbc *qmp = dev_get_drvdata(dev);
+       int ret = 0;
+
+       dev_vdbg(dev, "Resuming QMP phy, mode:%d\n", qmp->mode);
+
+       if (!qmp->phy->init_count) {
+               dev_vdbg(dev, "PHY not initialized, bailing out\n");
+               return 0;
+       }
+
+       ret = clk_bulk_prepare_enable(qmp->num_clks, qmp->clks);
+       if (ret)
+               return ret;
+
+       ret = clk_prepare_enable(qmp->pipe_clk);
+       if (ret) {
+               dev_err(dev, "pipe_clk enable failed, err=%d\n", ret);
+               clk_bulk_disable_unprepare(qmp->num_clks, qmp->clks);
+               return ret;
+       }
+
+       qmp_usbc_disable_autonomous_mode(qmp);
+
+       return 0;
+}
+
+static const struct dev_pm_ops qmp_usbc_pm_ops = {
+       SET_RUNTIME_PM_OPS(qmp_usbc_runtime_suspend,
+                          qmp_usbc_runtime_resume, NULL)
+};
+
+static int qmp_usbc_vreg_init(struct qmp_usbc *qmp)
+{
+       const struct qmp_phy_cfg *cfg = qmp->cfg;
+       struct device *dev = qmp->dev;
+       int num = cfg->num_vregs;
+       int i;
+
+       qmp->vregs = devm_kcalloc(dev, num, sizeof(*qmp->vregs), GFP_KERNEL);
+       if (!qmp->vregs)
+               return -ENOMEM;
+
+       for (i = 0; i < num; i++)
+               qmp->vregs[i].supply = cfg->vreg_list[i];
+
+       return devm_regulator_bulk_get(dev, num, qmp->vregs);
+}
+
+static int qmp_usbc_reset_init(struct qmp_usbc *qmp,
+                             const char *const *reset_list,
+                             int num_resets)
+{
+       struct device *dev = qmp->dev;
+       int i;
+       int ret;
+
+       qmp->resets = devm_kcalloc(dev, num_resets,
+                                  sizeof(*qmp->resets), GFP_KERNEL);
+       if (!qmp->resets)
+               return -ENOMEM;
+
+       for (i = 0; i < num_resets; i++)
+               qmp->resets[i].id = reset_list[i];
+
+       qmp->num_resets = num_resets;
+
+       ret = devm_reset_control_bulk_get_exclusive(dev, num_resets, qmp->resets);
+       if (ret)
+               return dev_err_probe(dev, ret, "failed to get resets\n");
+
+       return 0;
+}
+
+static int qmp_usbc_clk_init(struct qmp_usbc *qmp)
+{
+       struct device *dev = qmp->dev;
+       int num = ARRAY_SIZE(qmp_usbc_phy_clk_l);
+       int i;
+
+       qmp->clks = devm_kcalloc(dev, num, sizeof(*qmp->clks), GFP_KERNEL);
+       if (!qmp->clks)
+               return -ENOMEM;
+
+       for (i = 0; i < num; i++)
+               qmp->clks[i].id = qmp_usbc_phy_clk_l[i];
+
+       qmp->num_clks = num;
+
+       return devm_clk_bulk_get_optional(dev, num, qmp->clks);
+}
+
+static void phy_clk_release_provider(void *res)
+{
+       of_clk_del_provider(res);
+}
+
+/*
+ * Register a fixed rate pipe clock.
+ *
+ * The <s>_pipe_clksrc generated by PHY goes to the GCC that gate
+ * controls it. The <s>_pipe_clk coming out of the GCC is requested
+ * by the PHY driver for its operations.
+ * We register the <s>_pipe_clksrc here. The gcc driver takes care
+ * of assigning this <s>_pipe_clksrc as parent to <s>_pipe_clk.
+ * Below picture shows this relationship.
+ *
+ *         +---------------+
+ *         |   PHY block   |<<---------------------------------------+
+ *         |               |                                         |
+ *         |   +-------+   |                   +-----+               |
+ *   I/P---^-->|  PLL  |---^--->pipe_clksrc--->| GCC |--->pipe_clk---+
+ *    clk  |   +-------+   |                   +-----+
+ *         +---------------+
+ */
+static int phy_pipe_clk_register(struct qmp_usbc *qmp, struct device_node *np)
+{
+       struct clk_fixed_rate *fixed = &qmp->pipe_clk_fixed;
+       struct clk_init_data init = { };
+       int ret;
+
+       ret = of_property_read_string(np, "clock-output-names", &init.name);
+       if (ret) {
+               dev_err(qmp->dev, "%pOFn: No clock-output-names\n", np);
+               return ret;
+       }
+
+       init.ops = &clk_fixed_rate_ops;
+
+       /* controllers using QMP phys use 125MHz pipe clock interface */
+       fixed->fixed_rate = 125000000;
+       fixed->hw.init = &init;
+
+       ret = devm_clk_hw_register(qmp->dev, &fixed->hw);
+       if (ret)
+               return ret;
+
+       ret = of_clk_add_hw_provider(np, of_clk_hw_simple_get, &fixed->hw);
+       if (ret)
+               return ret;
+
+       /*
+        * Roll a devm action because the clock provider is the child node, but
+        * the child node is not actually a device.
+        */
+       return devm_add_action_or_reset(qmp->dev, phy_clk_release_provider, np);
+}
+
+#if IS_ENABLED(CONFIG_TYPEC)
+static int qmp_usbc_typec_switch_set(struct typec_switch_dev *sw,
+                                     enum typec_orientation orientation)
+{
+       struct qmp_usbc *qmp = typec_switch_get_drvdata(sw);
+
+       if (orientation == qmp->orientation || orientation == TYPEC_ORIENTATION_NONE)
+               return 0;
+
+       mutex_lock(&qmp->phy_mutex);
+       qmp->orientation = orientation;
+
+       if (qmp->usb_init_count) {
+               qmp_usbc_power_off(qmp->phy);
+               qmp_usbc_exit(qmp->phy);
+
+               qmp_usbc_init(qmp->phy);
+               qmp_usbc_power_on(qmp->phy);
+       }
+
+       mutex_unlock(&qmp->phy_mutex);
+
+       return 0;
+}
+
+static void qmp_usbc_typec_unregister(void *data)
+{
+       struct qmp_usbc *qmp = data;
+
+       typec_switch_unregister(qmp->sw);
+}
+
+static int qmp_usbc_typec_switch_register(struct qmp_usbc *qmp)
+{
+       struct typec_switch_desc sw_desc = {};
+       struct device *dev = qmp->dev;
+
+       sw_desc.drvdata = qmp;
+       sw_desc.fwnode = dev->fwnode;
+       sw_desc.set = qmp_usbc_typec_switch_set;
+       qmp->sw = typec_switch_register(dev, &sw_desc);
+       if (IS_ERR(qmp->sw)) {
+               dev_err(dev, "Unable to register typec switch: %pe\n", qmp->sw);
+               return PTR_ERR(qmp->sw);
+       }
+
+       return devm_add_action_or_reset(dev, qmp_usbc_typec_unregister, qmp);
+}
+#else
+static int qmp_usbc_typec_switch_register(struct qmp_usbc *qmp)
+{
+       return 0;
+}
+#endif
+
+static int qmp_usbc_parse_dt_legacy(struct qmp_usbc *qmp, struct device_node *np)
+{
+       struct platform_device *pdev = to_platform_device(qmp->dev);
+       struct device *dev = qmp->dev;
+       int ret;
+
+       qmp->serdes = devm_platform_ioremap_resource(pdev, 0);
+       if (IS_ERR(qmp->serdes))
+               return PTR_ERR(qmp->serdes);
+
+       /*
+        * Get memory resources for the PHY:
+        * Resources are indexed as: tx -> 0; rx -> 1; pcs -> 2.
+        * For dual lane PHYs: tx2 -> 3, rx2 -> 4, pcs_misc (optional) -> 5
+        * For single lane PHYs: pcs_misc (optional) -> 3.
+        */
+       qmp->tx = devm_of_iomap(dev, np, 0, NULL);
+       if (IS_ERR(qmp->tx))
+               return PTR_ERR(qmp->tx);
+
+       qmp->rx = devm_of_iomap(dev, np, 1, NULL);
+       if (IS_ERR(qmp->rx))
+               return PTR_ERR(qmp->rx);
+
+       qmp->pcs = devm_of_iomap(dev, np, 2, NULL);
+       if (IS_ERR(qmp->pcs))
+               return PTR_ERR(qmp->pcs);
+
+       qmp->tx2 = devm_of_iomap(dev, np, 3, NULL);
+       if (IS_ERR(qmp->tx2))
+               return PTR_ERR(qmp->tx2);
+
+       qmp->rx2 = devm_of_iomap(dev, np, 4, NULL);
+       if (IS_ERR(qmp->rx2))
+               return PTR_ERR(qmp->rx2);
+
+       qmp->pcs_misc = devm_of_iomap(dev, np, 5, NULL);
+       if (IS_ERR(qmp->pcs_misc)) {
+               dev_vdbg(dev, "PHY pcs_misc-reg not used\n");
+               qmp->pcs_misc = NULL;
+       }
+
+       qmp->pipe_clk = devm_get_clk_from_child(dev, np, NULL);
+       if (IS_ERR(qmp->pipe_clk)) {
+               return dev_err_probe(dev, PTR_ERR(qmp->pipe_clk),
+                                    "failed to get pipe clock\n");
+       }
+
+       ret = devm_clk_bulk_get_all(qmp->dev, &qmp->clks);
+       if (ret < 0)
+               return ret;
+
+       qmp->num_clks = ret;
+
+       ret = qmp_usbc_reset_init(qmp, usb3phy_legacy_reset_l,
+                                ARRAY_SIZE(usb3phy_legacy_reset_l));
+       if (ret)
+               return ret;
+
+       return 0;
+}
+
+static int qmp_usbc_parse_dt(struct qmp_usbc *qmp)
+{
+       struct platform_device *pdev = to_platform_device(qmp->dev);
+       const struct qmp_phy_cfg *cfg = qmp->cfg;
+       const struct qmp_usbc_offsets *offs = cfg->offsets;
+       struct device *dev = qmp->dev;
+       void __iomem *base;
+       int ret;
+
+       if (!offs)
+               return -EINVAL;
+
+       base = devm_platform_ioremap_resource(pdev, 0);
+       if (IS_ERR(base))
+               return PTR_ERR(base);
+
+       qmp->serdes = base + offs->serdes;
+       qmp->pcs = base + offs->pcs;
+       if (offs->pcs_misc)
+               qmp->pcs_misc = base + offs->pcs_misc;
+       qmp->tx = base + offs->tx;
+       qmp->rx = base + offs->rx;
+
+       qmp->tx2 = base + offs->tx2;
+       qmp->rx2 = base + offs->rx2;
+
+       ret = qmp_usbc_clk_init(qmp);
+       if (ret)
+               return ret;
+
+       qmp->pipe_clk = devm_clk_get(dev, "pipe");
+       if (IS_ERR(qmp->pipe_clk)) {
+               return dev_err_probe(dev, PTR_ERR(qmp->pipe_clk),
+                                    "failed to get pipe clock\n");
+       }
+
+       ret = qmp_usbc_reset_init(qmp, usb3phy_reset_l,
+                                ARRAY_SIZE(usb3phy_reset_l));
+       if (ret)
+               return ret;
+
+       return 0;
+}
+
+static int qmp_usbc_parse_vls_clamp(struct qmp_usbc *qmp)
+{
+       struct of_phandle_args tcsr_args;
+       struct device *dev = qmp->dev;
+       int ret;
+
+       /*  for backwards compatibility ignore if there is no property */
+       ret = of_parse_phandle_with_fixed_args(dev->of_node, "qcom,tcsr-reg", 1, 0,
+                                              &tcsr_args);
+       if (ret == -ENOENT)
+               return 0;
+       else if (ret < 0)
+               return dev_err_probe(dev, ret, "Failed to parse qcom,tcsr-reg\n");
+
+       qmp->tcsr_map = syscon_node_to_regmap(tcsr_args.np);
+       of_node_put(tcsr_args.np);
+       if (IS_ERR(qmp->tcsr_map))
+               return PTR_ERR(qmp->tcsr_map);
+
+       qmp->vls_clamp_reg = tcsr_args.args[0];
+
+       return 0;
+}
+
+static int qmp_usbc_probe(struct platform_device *pdev)
+{
+       struct device *dev = &pdev->dev;
+       struct phy_provider *phy_provider;
+       struct device_node *np;
+       struct qmp_usbc *qmp;
+       int ret;
+
+       qmp = devm_kzalloc(dev, sizeof(*qmp), GFP_KERNEL);
+       if (!qmp)
+               return -ENOMEM;
+
+       qmp->dev = dev;
+
+       qmp->orientation = TYPEC_ORIENTATION_NORMAL;
+
+       qmp->cfg = of_device_get_match_data(dev);
+       if (!qmp->cfg)
+               return -EINVAL;
+
+       mutex_init(&qmp->phy_mutex);
+
+       ret = qmp_usbc_vreg_init(qmp);
+       if (ret)
+               return ret;
+
+       ret = qmp_usbc_typec_switch_register(qmp);
+       if (ret)
+               return ret;
+
+       ret = qmp_usbc_parse_vls_clamp(qmp);
+       if (ret)
+               return ret;
+
+       /* Check for legacy binding with child node. */
+       np = of_get_child_by_name(dev->of_node, "phy");
+       if (np) {
+               ret = qmp_usbc_parse_dt_legacy(qmp, np);
+       } else {
+               np = of_node_get(dev->of_node);
+               ret = qmp_usbc_parse_dt(qmp);
+       }
+       if (ret)
+               goto err_node_put;
+
+       pm_runtime_set_active(dev);
+       ret = devm_pm_runtime_enable(dev);
+       if (ret)
+               goto err_node_put;
+       /*
+        * Prevent runtime pm from being ON by default. Users can enable
+        * it using power/control in sysfs.
+        */
+       pm_runtime_forbid(dev);
+
+       ret = phy_pipe_clk_register(qmp, np);
+       if (ret)
+               goto err_node_put;
+
+       qmp->phy = devm_phy_create(dev, np, &qmp_usbc_phy_ops);
+       if (IS_ERR(qmp->phy)) {
+               ret = PTR_ERR(qmp->phy);
+               dev_err(dev, "failed to create PHY: %d\n", ret);
+               goto err_node_put;
+       }
+
+       phy_set_drvdata(qmp->phy, qmp);
+
+       of_node_put(np);
+
+       phy_provider = devm_of_phy_provider_register(dev, of_phy_simple_xlate);
+
+       return PTR_ERR_OR_ZERO(phy_provider);
+
+err_node_put:
+       of_node_put(np);
+       return ret;
+}
+
+static const struct of_device_id qmp_usbc_of_match_table[] = {
+       {
+               .compatible = "qcom,msm8998-qmp-usb3-phy",
+               .data = &msm8998_usb3phy_cfg,
+       }, {
+               .compatible = "qcom,qcm2290-qmp-usb3-phy",
+               .data = &qcm2290_usb3phy_cfg,
+       }, {
+               .compatible = "qcom,sdm660-qmp-usb3-phy",
+               .data = &sdm660_usb3phy_cfg,
+       }, {
+               .compatible = "qcom,sm6115-qmp-usb3-phy",
+               .data = &qcm2290_usb3phy_cfg,
+       },
+       { },
+};
+MODULE_DEVICE_TABLE(of, qmp_usbc_of_match_table);
+
+static struct platform_driver qmp_usbc_driver = {
+       .probe          = qmp_usbc_probe,
+       .driver = {
+               .name   = "qcom-qmp-usbc-phy",
+               .pm     = &qmp_usbc_pm_ops,
+               .of_match_table = qmp_usbc_of_match_table,
+       },
+};
+
+module_platform_driver(qmp_usbc_driver);
+
+MODULE_AUTHOR("Vivek Gautam <vivek.gautam@codeaurora.org>");
+MODULE_DESCRIPTION("Qualcomm QMP USB-C PHY driver");
+MODULE_LICENSE("GPL");
index 6923496cbfee21c2bef6d1fa342254f806dc26fd..d10b8f653c4b2395250917ce301c3aba003b71ad 100644 (file)
 
 #include "phy-qcom-qmp-pcs-v7.h"
 
-/* Only for QMP V3 & V4 PHY - DP COM registers */
-#define QPHY_V3_DP_COM_PHY_MODE_CTRL                   0x00
-#define QPHY_V3_DP_COM_SW_RESET                                0x04
-#define QPHY_V3_DP_COM_POWER_DOWN_CTRL                 0x08
-#define QPHY_V3_DP_COM_SWI_CTRL                                0x0c
-#define QPHY_V3_DP_COM_TYPEC_CTRL                      0x10
-#define QPHY_V3_DP_COM_TYPEC_PWRDN_CTRL                        0x14
-#define QPHY_V3_DP_COM_RESET_OVRD_CTRL                 0x1c
-
-/* QSERDES V3 COM bits */
-# define QSERDES_V3_COM_BIAS_EN                                0x0001
-# define QSERDES_V3_COM_BIAS_EN_MUX                    0x0002
-# define QSERDES_V3_COM_CLKBUF_R_EN                    0x0004
-# define QSERDES_V3_COM_CLKBUF_L_EN                    0x0008
-# define QSERDES_V3_COM_EN_SYSCLK_TX_SEL               0x0010
-# define QSERDES_V3_COM_CLKBUF_RX_DRIVE_L              0x0020
-# define QSERDES_V3_COM_CLKBUF_RX_DRIVE_R              0x0040
-
-/* QSERDES V3 TX bits */
-# define DP_PHY_TXn_TX_EMP_POST1_LVL_MASK              0x001f
-# define DP_PHY_TXn_TX_EMP_POST1_LVL_MUX_EN            0x0020
-# define DP_PHY_TXn_TX_DRV_LVL_MASK                    0x001f
-# define DP_PHY_TXn_TX_DRV_LVL_MUX_EN                  0x0020
-
-/* QMP PHY - DP PHY registers */
-#define QSERDES_DP_PHY_REVISION_ID0                    0x000
-#define QSERDES_DP_PHY_REVISION_ID1                    0x004
-#define QSERDES_DP_PHY_REVISION_ID2                    0x008
-#define QSERDES_DP_PHY_REVISION_ID3                    0x00c
-#define QSERDES_DP_PHY_CFG                             0x010
-#define QSERDES_DP_PHY_PD_CTL                          0x018
-# define DP_PHY_PD_CTL_PWRDN                           0x001
-# define DP_PHY_PD_CTL_PSR_PWRDN                       0x002
-# define DP_PHY_PD_CTL_AUX_PWRDN                       0x004
-# define DP_PHY_PD_CTL_LANE_0_1_PWRDN                  0x008
-# define DP_PHY_PD_CTL_LANE_2_3_PWRDN                  0x010
-# define DP_PHY_PD_CTL_PLL_PWRDN                       0x020
-# define DP_PHY_PD_CTL_DP_CLAMP_EN                     0x040
-#define QSERDES_DP_PHY_MODE                            0x01c
-#define QSERDES_DP_PHY_AUX_CFG0                                0x020
-#define QSERDES_DP_PHY_AUX_CFG1                                0x024
-#define QSERDES_DP_PHY_AUX_CFG2                                0x028
-#define QSERDES_DP_PHY_AUX_CFG3                                0x02c
-#define QSERDES_DP_PHY_AUX_CFG4                                0x030
-#define QSERDES_DP_PHY_AUX_CFG5                                0x034
-#define QSERDES_DP_PHY_AUX_CFG6                                0x038
-#define QSERDES_DP_PHY_AUX_CFG7                                0x03c
-#define QSERDES_DP_PHY_AUX_CFG8                                0x040
-#define QSERDES_DP_PHY_AUX_CFG9                                0x044
-
-/* Only for QMP V3 PHY - DP PHY registers */
-#define QSERDES_V3_DP_PHY_AUX_INTERRUPT_MASK           0x048
-# define PHY_AUX_STOP_ERR_MASK                         0x01
-# define PHY_AUX_DEC_ERR_MASK                          0x02
-# define PHY_AUX_SYNC_ERR_MASK                         0x04
-# define PHY_AUX_ALIGN_ERR_MASK                                0x08
-# define PHY_AUX_REQ_ERR_MASK                          0x10
-
-#define QSERDES_V3_DP_PHY_AUX_INTERRUPT_CLEAR          0x04c
-#define QSERDES_V3_DP_PHY_AUX_BIST_CFG                 0x050
-
-#define QSERDES_V3_DP_PHY_VCO_DIV                      0x064
-#define QSERDES_V3_DP_PHY_TX0_TX1_LANE_CTL             0x06c
-#define QSERDES_V3_DP_PHY_TX2_TX3_LANE_CTL             0x088
-
-#define QSERDES_V3_DP_PHY_SPARE0                       0x0ac
-#define DP_PHY_SPARE0_MASK                             0x0f
-#define DP_PHY_SPARE0_ORIENTATION_INFO_SHIFT           0x04(0x0004)
-
-#define QSERDES_V3_DP_PHY_STATUS                       0x0c0
-
-/* Only for QMP V4 PHY - DP PHY registers */
-#define QSERDES_V4_DP_PHY_CFG_1                                0x014
-#define QSERDES_V4_DP_PHY_AUX_INTERRUPT_MASK           0x054
-#define QSERDES_V4_DP_PHY_AUX_INTERRUPT_CLEAR          0x058
-#define QSERDES_V4_DP_PHY_VCO_DIV                      0x070
-#define QSERDES_V4_DP_PHY_TX0_TX1_LANE_CTL             0x078
-#define QSERDES_V4_DP_PHY_TX2_TX3_LANE_CTL             0x09c
-#define QSERDES_V4_DP_PHY_SPARE0                       0x0c8
-#define QSERDES_V4_DP_PHY_AUX_INTERRUPT_STATUS         0x0d8
-#define QSERDES_V4_DP_PHY_STATUS                       0x0dc
-
-#define QSERDES_V5_DP_PHY_STATUS                       0x0dc
-
-/* Only for QMP V6 PHY - DP PHY registers */
-#define QSERDES_V6_DP_PHY_AUX_INTERRUPT_STATUS         0x0e0
-#define QSERDES_V6_DP_PHY_STATUS                       0x0e4
+/* QPHY_SW_RESET bit */
+#define SW_RESET                               BIT(0)
+/* QPHY_POWER_DOWN_CONTROL */
+#define SW_PWRDN                               BIT(0)
+#define REFCLK_DRV_DSBL                                BIT(1) /* PCIe */
+
+/* QPHY_START_CONTROL bits */
+#define SERDES_START                           BIT(0)
+#define PCS_START                              BIT(1)
+
+/* QPHY_PCS_STATUS bit */
+#define PHYSTATUS                              BIT(6)
+#define PHYSTATUS_4_20                         BIT(7)
+
+/* QPHY_PCS_AUTONOMOUS_MODE_CTRL register bits */
+#define ARCVR_DTCT_EN                          BIT(0)
+#define ALFPS_DTCT_EN                          BIT(1)
+#define ARCVR_DTCT_EVENT_SEL                   BIT(4)
+
+/* QPHY_PCS_LFPS_RXTERM_IRQ_CLEAR register bits */
+#define IRQ_CLEAR                              BIT(0)
+
+/* QPHY_PCS_MISC_CLAMP_ENABLE register bits */
+#define CLAMP_EN                               BIT(0) /* enables i/o clamp_n */
 
 #endif
index 03dc753f0de1f9c3898ba8a7bcefd4ee0b7b0e21..5b1c82459c126fe3a046a89601483d8c73090fd3 100644 (file)
 #include <linux/platform_device.h>
 #include <linux/regmap.h>
 
-#define QSERDES_QMP_PLL                                        0x0
-#define QSERDES_COM_BIN_VCOCAL_CMP_CODE1_MODE0         (QSERDES_QMP_PLL + 0x1ac)
-#define QSERDES_COM_BIN_VCOCAL_CMP_CODE2_MODE0         (QSERDES_QMP_PLL + 0x1b0)
-#define QSERDES_COM_BIN_VCOCAL_HSCLK_SEL               (QSERDES_QMP_PLL + 0x1bc)
-#define QSERDES_COM_CORE_CLK_EN                                (QSERDES_QMP_PLL + 0x174)
-#define QSERDES_COM_CORECLK_DIV_MODE0                  (QSERDES_QMP_PLL + 0x168)
-#define QSERDES_COM_CP_CTRL_MODE0                      (QSERDES_QMP_PLL + 0x74)
-#define QSERDES_COM_DEC_START_MODE0                    (QSERDES_QMP_PLL + 0xbc)
-#define QSERDES_COM_DIV_FRAC_START1_MODE0              (QSERDES_QMP_PLL + 0xcc)
-#define QSERDES_COM_DIV_FRAC_START2_MODE0              (QSERDES_QMP_PLL + 0xd0)
-#define QSERDES_COM_DIV_FRAC_START3_MODE0              (QSERDES_QMP_PLL + 0xd4)
-#define QSERDES_COM_HSCLK_HS_SWITCH_SEL                        (QSERDES_QMP_PLL + 0x15c)
-#define QSERDES_COM_HSCLK_SEL                          (QSERDES_QMP_PLL + 0x158)
-#define QSERDES_COM_LOCK_CMP1_MODE0                    (QSERDES_QMP_PLL + 0xac)
-#define QSERDES_COM_LOCK_CMP2_MODE0                    (QSERDES_QMP_PLL + 0xb0)
-#define QSERDES_COM_PLL_CCTRL_MODE0                    (QSERDES_QMP_PLL + 0x84)
-#define QSERDES_COM_PLL_IVCO                           (QSERDES_QMP_PLL + 0x58)
-#define QSERDES_COM_PLL_RCTRL_MODE0                    (QSERDES_QMP_PLL + 0x7c)
-#define QSERDES_COM_SYSCLK_EN_SEL                      (QSERDES_QMP_PLL + 0x94)
-#define QSERDES_COM_VCO_TUNE1_MODE0                    (QSERDES_QMP_PLL + 0x110)
-#define QSERDES_COM_VCO_TUNE2_MODE0                    (QSERDES_QMP_PLL + 0x114)
-#define QSERDES_COM_VCO_TUNE_INITVAL2                  (QSERDES_QMP_PLL + 0x124)
-#define QSERDES_COM_C_READY_STATUS                     (QSERDES_QMP_PLL + 0x178)
-#define QSERDES_COM_CMN_STATUS                         (QSERDES_QMP_PLL + 0x140)
+#include "phy-qcom-qmp-pcs-sgmii.h"
+#include "phy-qcom-qmp-qserdes-com-v5.h"
+#include "phy-qcom-qmp-qserdes-txrx-v5.h"
 
+#define QSERDES_QMP_PLL                                        0x0
 #define QSERDES_RX                                     0x600
-#define QSERDES_RX_UCDR_FO_GAIN                                (QSERDES_RX + 0x8)
-#define QSERDES_RX_UCDR_SO_GAIN                                (QSERDES_RX + 0x14)
-#define QSERDES_RX_UCDR_FASTLOCK_FO_GAIN               (QSERDES_RX + 0x30)
-#define QSERDES_RX_UCDR_SO_SATURATION_AND_ENABLE       (QSERDES_RX + 0x34)
-#define QSERDES_RX_UCDR_FASTLOCK_COUNT_LOW             (QSERDES_RX + 0x3c)
-#define QSERDES_RX_UCDR_FASTLOCK_COUNT_HIGH            (QSERDES_RX + 0x40)
-#define QSERDES_RX_UCDR_PI_CONTROLS                    (QSERDES_RX + 0x44)
-#define QSERDES_RX_UCDR_PI_CTRL2                       (QSERDES_RX + 0x48)
-#define QSERDES_RX_RX_TERM_BW                          (QSERDES_RX + 0x80)
-#define QSERDES_RX_VGA_CAL_CNTRL2                      (QSERDES_RX + 0xd8)
-#define QSERDES_RX_GM_CAL                              (QSERDES_RX + 0xdc)
-#define QSERDES_RX_RX_EQU_ADAPTOR_CNTRL1               (QSERDES_RX + 0xe8)
-#define QSERDES_RX_RX_EQU_ADAPTOR_CNTRL2               (QSERDES_RX + 0xec)
-#define QSERDES_RX_RX_EQU_ADAPTOR_CNTRL3               (QSERDES_RX + 0xf0)
-#define QSERDES_RX_RX_EQU_ADAPTOR_CNTRL4               (QSERDES_RX + 0xf4)
-#define QSERDES_RX_RX_IDAC_TSETTLE_LOW                 (QSERDES_RX + 0xf8)
-#define QSERDES_RX_RX_IDAC_TSETTLE_HIGH                        (QSERDES_RX + 0xfc)
-#define QSERDES_RX_RX_IDAC_MEASURE_TIME                        (QSERDES_RX + 0x100)
-#define QSERDES_RX_RX_EQ_OFFSET_ADAPTOR_CNTRL1         (QSERDES_RX + 0x110)
-#define QSERDES_RX_RX_OFFSET_ADAPTOR_CNTRL2            (QSERDES_RX + 0x114)
-#define QSERDES_RX_SIGDET_CNTRL                                (QSERDES_RX + 0x11c)
-#define QSERDES_RX_SIGDET_DEGLITCH_CNTRL               (QSERDES_RX + 0x124)
-#define QSERDES_RX_RX_BAND                             (QSERDES_RX + 0x128)
-#define QSERDES_RX_RX_MODE_00_LOW                      (QSERDES_RX + 0x15c)
-#define QSERDES_RX_RX_MODE_00_HIGH                     (QSERDES_RX + 0x160)
-#define QSERDES_RX_RX_MODE_00_HIGH2                    (QSERDES_RX + 0x164)
-#define QSERDES_RX_RX_MODE_00_HIGH3                    (QSERDES_RX + 0x168)
-#define QSERDES_RX_RX_MODE_00_HIGH4                    (QSERDES_RX + 0x16c)
-#define QSERDES_RX_RX_MODE_01_LOW                      (QSERDES_RX + 0x170)
-#define QSERDES_RX_RX_MODE_01_HIGH                     (QSERDES_RX + 0x174)
-#define QSERDES_RX_RX_MODE_01_HIGH2                    (QSERDES_RX + 0x178)
-#define QSERDES_RX_RX_MODE_01_HIGH3                    (QSERDES_RX + 0x17c)
-#define QSERDES_RX_RX_MODE_01_HIGH4                    (QSERDES_RX + 0x180)
-#define QSERDES_RX_RX_MODE_10_LOW                      (QSERDES_RX + 0x184)
-#define QSERDES_RX_RX_MODE_10_HIGH                     (QSERDES_RX + 0x188)
-#define QSERDES_RX_RX_MODE_10_HIGH2                    (QSERDES_RX + 0x18c)
-#define QSERDES_RX_RX_MODE_10_HIGH3                    (QSERDES_RX + 0x190)
-#define QSERDES_RX_RX_MODE_10_HIGH4                    (QSERDES_RX + 0x194)
-#define QSERDES_RX_DCC_CTRL1                           (QSERDES_RX + 0x1a8)
-
 #define QSERDES_TX                                     0x400
-#define QSERDES_TX_TX_BAND                             (QSERDES_TX + 0x24)
-#define QSERDES_TX_SLEW_CNTL                           (QSERDES_TX + 0x28)
-#define QSERDES_TX_RES_CODE_LANE_OFFSET_TX             (QSERDES_TX + 0x3c)
-#define QSERDES_TX_RES_CODE_LANE_OFFSET_RX             (QSERDES_TX + 0x40)
-#define QSERDES_TX_LANE_MODE_1                         (QSERDES_TX + 0x84)
-#define QSERDES_TX_LANE_MODE_3                         (QSERDES_TX + 0x8c)
-#define QSERDES_TX_RCV_DETECT_LVL_2                    (QSERDES_TX + 0xa4)
-#define QSERDES_TX_TRAN_DRVR_EMP_EN                    (QSERDES_TX + 0xc0)
-
-#define QSERDES_PCS                                    0xC00
-#define QSERDES_PCS_PHY_START                          (QSERDES_PCS + 0x0)
-#define QSERDES_PCS_POWER_DOWN_CONTROL                 (QSERDES_PCS + 0x4)
-#define QSERDES_PCS_SW_RESET                           (QSERDES_PCS + 0x8)
-#define QSERDES_PCS_LINE_RESET_TIME                    (QSERDES_PCS + 0xc)
-#define QSERDES_PCS_TX_LARGE_AMP_DRV_LVL               (QSERDES_PCS + 0x20)
-#define QSERDES_PCS_TX_SMALL_AMP_DRV_LVL               (QSERDES_PCS + 0x28)
-#define QSERDES_PCS_TX_MID_TERM_CTRL1                  (QSERDES_PCS + 0xd8)
-#define QSERDES_PCS_TX_MID_TERM_CTRL2                  (QSERDES_PCS + 0xdc)
-#define QSERDES_PCS_SGMII_MISC_CTRL8                   (QSERDES_PCS + 0x118)
-#define QSERDES_PCS_PCS_READY_STATUS                   (QSERDES_PCS + 0x94)
+#define QSERDES_PCS                                    0xc00
 
 #define QSERDES_COM_C_READY                            BIT(0)
 #define QSERDES_PCS_READY                              BIT(0)
@@ -112,178 +33,178 @@ struct qcom_dwmac_sgmii_phy_data {
 
 static void qcom_dwmac_sgmii_phy_init_1g(struct regmap *regmap)
 {
-       regmap_write(regmap, QSERDES_PCS_SW_RESET, 0x01);
-       regmap_write(regmap, QSERDES_PCS_POWER_DOWN_CONTROL, 0x01);
-
-       regmap_write(regmap, QSERDES_COM_PLL_IVCO, 0x0F);
-       regmap_write(regmap, QSERDES_COM_CP_CTRL_MODE0, 0x06);
-       regmap_write(regmap, QSERDES_COM_PLL_RCTRL_MODE0, 0x16);
-       regmap_write(regmap, QSERDES_COM_PLL_CCTRL_MODE0, 0x36);
-       regmap_write(regmap, QSERDES_COM_SYSCLK_EN_SEL, 0x1A);
-       regmap_write(regmap, QSERDES_COM_LOCK_CMP1_MODE0, 0x0A);
-       regmap_write(regmap, QSERDES_COM_LOCK_CMP2_MODE0, 0x1A);
-       regmap_write(regmap, QSERDES_COM_DEC_START_MODE0, 0x82);
-       regmap_write(regmap, QSERDES_COM_DIV_FRAC_START1_MODE0, 0x55);
-       regmap_write(regmap, QSERDES_COM_DIV_FRAC_START2_MODE0, 0x55);
-       regmap_write(regmap, QSERDES_COM_DIV_FRAC_START3_MODE0, 0x03);
-       regmap_write(regmap, QSERDES_COM_VCO_TUNE1_MODE0, 0x24);
-
-       regmap_write(regmap, QSERDES_COM_VCO_TUNE2_MODE0, 0x02);
-       regmap_write(regmap, QSERDES_COM_VCO_TUNE_INITVAL2, 0x00);
-       regmap_write(regmap, QSERDES_COM_HSCLK_SEL, 0x04);
-       regmap_write(regmap, QSERDES_COM_HSCLK_HS_SWITCH_SEL, 0x00);
-       regmap_write(regmap, QSERDES_COM_CORECLK_DIV_MODE0, 0x0A);
-       regmap_write(regmap, QSERDES_COM_CORE_CLK_EN, 0x00);
-       regmap_write(regmap, QSERDES_COM_BIN_VCOCAL_CMP_CODE1_MODE0, 0xB9);
-       regmap_write(regmap, QSERDES_COM_BIN_VCOCAL_CMP_CODE2_MODE0, 0x1E);
-       regmap_write(regmap, QSERDES_COM_BIN_VCOCAL_HSCLK_SEL, 0x11);
-
-       regmap_write(regmap, QSERDES_TX_TX_BAND, 0x05);
-       regmap_write(regmap, QSERDES_TX_SLEW_CNTL, 0x0A);
-       regmap_write(regmap, QSERDES_TX_RES_CODE_LANE_OFFSET_TX, 0x09);
-       regmap_write(regmap, QSERDES_TX_RES_CODE_LANE_OFFSET_RX, 0x09);
-       regmap_write(regmap, QSERDES_TX_LANE_MODE_1, 0x05);
-       regmap_write(regmap, QSERDES_TX_LANE_MODE_3, 0x00);
-       regmap_write(regmap, QSERDES_TX_RCV_DETECT_LVL_2, 0x12);
-       regmap_write(regmap, QSERDES_TX_TRAN_DRVR_EMP_EN, 0x0C);
-
-       regmap_write(regmap, QSERDES_RX_UCDR_FO_GAIN, 0x0A);
-       regmap_write(regmap, QSERDES_RX_UCDR_SO_GAIN, 0x06);
-       regmap_write(regmap, QSERDES_RX_UCDR_FASTLOCK_FO_GAIN, 0x0A);
-       regmap_write(regmap, QSERDES_RX_UCDR_SO_SATURATION_AND_ENABLE, 0x7F);
-       regmap_write(regmap, QSERDES_RX_UCDR_FASTLOCK_COUNT_LOW, 0x00);
-       regmap_write(regmap, QSERDES_RX_UCDR_FASTLOCK_COUNT_HIGH, 0x01);
-       regmap_write(regmap, QSERDES_RX_UCDR_PI_CONTROLS, 0x81);
-       regmap_write(regmap, QSERDES_RX_UCDR_PI_CTRL2, 0x80);
-       regmap_write(regmap, QSERDES_RX_RX_TERM_BW, 0x04);
-       regmap_write(regmap, QSERDES_RX_VGA_CAL_CNTRL2, 0x08);
-       regmap_write(regmap, QSERDES_RX_GM_CAL, 0x0F);
-       regmap_write(regmap, QSERDES_RX_RX_EQU_ADAPTOR_CNTRL1, 0x04);
-       regmap_write(regmap, QSERDES_RX_RX_EQU_ADAPTOR_CNTRL2, 0x00);
-       regmap_write(regmap, QSERDES_RX_RX_EQU_ADAPTOR_CNTRL3, 0x4A);
-       regmap_write(regmap, QSERDES_RX_RX_EQU_ADAPTOR_CNTRL4, 0x0A);
-       regmap_write(regmap, QSERDES_RX_RX_IDAC_TSETTLE_LOW, 0x80);
-       regmap_write(regmap, QSERDES_RX_RX_IDAC_TSETTLE_HIGH, 0x01);
-       regmap_write(regmap, QSERDES_RX_RX_IDAC_MEASURE_TIME, 0x20);
-       regmap_write(regmap, QSERDES_RX_RX_EQ_OFFSET_ADAPTOR_CNTRL1, 0x17);
-       regmap_write(regmap, QSERDES_RX_RX_OFFSET_ADAPTOR_CNTRL2, 0x00);
-       regmap_write(regmap, QSERDES_RX_SIGDET_CNTRL, 0x0F);
-       regmap_write(regmap, QSERDES_RX_SIGDET_DEGLITCH_CNTRL, 0x1E);
-       regmap_write(regmap, QSERDES_RX_RX_BAND, 0x05);
-       regmap_write(regmap, QSERDES_RX_RX_MODE_00_LOW, 0xE0);
-       regmap_write(regmap, QSERDES_RX_RX_MODE_00_HIGH, 0xC8);
-       regmap_write(regmap, QSERDES_RX_RX_MODE_00_HIGH2, 0xC8);
-       regmap_write(regmap, QSERDES_RX_RX_MODE_00_HIGH3, 0x09);
-       regmap_write(regmap, QSERDES_RX_RX_MODE_00_HIGH4, 0xB1);
-       regmap_write(regmap, QSERDES_RX_RX_MODE_01_LOW, 0xE0);
-       regmap_write(regmap, QSERDES_RX_RX_MODE_01_HIGH, 0xC8);
-       regmap_write(regmap, QSERDES_RX_RX_MODE_01_HIGH2, 0xC8);
-       regmap_write(regmap, QSERDES_RX_RX_MODE_01_HIGH3, 0x09);
-       regmap_write(regmap, QSERDES_RX_RX_MODE_01_HIGH4, 0xB1);
-       regmap_write(regmap, QSERDES_RX_RX_MODE_10_LOW, 0xE0);
-       regmap_write(regmap, QSERDES_RX_RX_MODE_10_HIGH, 0xC8);
-       regmap_write(regmap, QSERDES_RX_RX_MODE_10_HIGH2, 0xC8);
-       regmap_write(regmap, QSERDES_RX_RX_MODE_10_HIGH3, 0x3B);
-       regmap_write(regmap, QSERDES_RX_RX_MODE_10_HIGH4, 0xB7);
-       regmap_write(regmap, QSERDES_RX_DCC_CTRL1, 0x0C);
-
-       regmap_write(regmap, QSERDES_PCS_LINE_RESET_TIME, 0x0C);
-       regmap_write(regmap, QSERDES_PCS_TX_LARGE_AMP_DRV_LVL, 0x1F);
-       regmap_write(regmap, QSERDES_PCS_TX_SMALL_AMP_DRV_LVL, 0x03);
-       regmap_write(regmap, QSERDES_PCS_TX_MID_TERM_CTRL1, 0x83);
-       regmap_write(regmap, QSERDES_PCS_TX_MID_TERM_CTRL2, 0x08);
-       regmap_write(regmap, QSERDES_PCS_SGMII_MISC_CTRL8, 0x0C);
-       regmap_write(regmap, QSERDES_PCS_SW_RESET, 0x00);
-
-       regmap_write(regmap, QSERDES_PCS_PHY_START, 0x01);
+       regmap_write(regmap, QSERDES_PCS + QPHY_PCS_SW_RESET, 0x01);
+       regmap_write(regmap, QSERDES_PCS + QPHY_PCS_POWER_DOWN_CONTROL, 0x01);
+
+       regmap_write(regmap, QSERDES_QMP_PLL + QSERDES_V5_COM_PLL_IVCO, 0x0F);
+       regmap_write(regmap, QSERDES_QMP_PLL + QSERDES_V5_COM_CP_CTRL_MODE0, 0x06);
+       regmap_write(regmap, QSERDES_QMP_PLL + QSERDES_V5_COM_PLL_RCTRL_MODE0, 0x16);
+       regmap_write(regmap, QSERDES_QMP_PLL + QSERDES_V5_COM_PLL_CCTRL_MODE0, 0x36);
+       regmap_write(regmap, QSERDES_QMP_PLL + QSERDES_V5_COM_SYSCLK_EN_SEL, 0x1A);
+       regmap_write(regmap, QSERDES_QMP_PLL + QSERDES_V5_COM_LOCK_CMP1_MODE0, 0x0A);
+       regmap_write(regmap, QSERDES_QMP_PLL + QSERDES_V5_COM_LOCK_CMP2_MODE0, 0x1A);
+       regmap_write(regmap, QSERDES_QMP_PLL + QSERDES_V5_COM_DEC_START_MODE0, 0x82);
+       regmap_write(regmap, QSERDES_QMP_PLL + QSERDES_V5_COM_DIV_FRAC_START1_MODE0, 0x55);
+       regmap_write(regmap, QSERDES_QMP_PLL + QSERDES_V5_COM_DIV_FRAC_START2_MODE0, 0x55);
+       regmap_write(regmap, QSERDES_QMP_PLL + QSERDES_V5_COM_DIV_FRAC_START3_MODE0, 0x03);
+       regmap_write(regmap, QSERDES_QMP_PLL + QSERDES_V5_COM_VCO_TUNE1_MODE0, 0x24);
+
+       regmap_write(regmap, QSERDES_QMP_PLL + QSERDES_V5_COM_VCO_TUNE2_MODE0, 0x02);
+       regmap_write(regmap, QSERDES_QMP_PLL + QSERDES_V5_COM_VCO_TUNE_INITVAL2, 0x00);
+       regmap_write(regmap, QSERDES_QMP_PLL + QSERDES_V5_COM_HSCLK_SEL, 0x04);
+       regmap_write(regmap, QSERDES_QMP_PLL + QSERDES_V5_COM_HSCLK_HS_SWITCH_SEL, 0x00);
+       regmap_write(regmap, QSERDES_QMP_PLL + QSERDES_V5_COM_CORECLK_DIV_MODE0, 0x0A);
+       regmap_write(regmap, QSERDES_QMP_PLL + QSERDES_V5_COM_CORE_CLK_EN, 0x00);
+       regmap_write(regmap, QSERDES_QMP_PLL + QSERDES_V5_COM_BIN_VCOCAL_CMP_CODE1_MODE0, 0xB9);
+       regmap_write(regmap, QSERDES_QMP_PLL + QSERDES_V5_COM_BIN_VCOCAL_CMP_CODE2_MODE0, 0x1E);
+       regmap_write(regmap, QSERDES_QMP_PLL + QSERDES_V5_COM_BIN_VCOCAL_HSCLK_SEL, 0x11);
+
+       regmap_write(regmap, QSERDES_TX + QSERDES_V5_TX_TX_BAND, 0x05);
+       regmap_write(regmap, QSERDES_TX + QSERDES_V5_TX_SLEW_CNTL, 0x0A);
+       regmap_write(regmap, QSERDES_TX + QSERDES_V5_TX_RES_CODE_LANE_OFFSET_TX, 0x09);
+       regmap_write(regmap, QSERDES_TX + QSERDES_V5_TX_RES_CODE_LANE_OFFSET_RX, 0x09);
+       regmap_write(regmap, QSERDES_TX + QSERDES_V5_TX_LANE_MODE_1, 0x05);
+       regmap_write(regmap, QSERDES_TX + QSERDES_V5_TX_LANE_MODE_3, 0x00);
+       regmap_write(regmap, QSERDES_TX + QSERDES_V5_TX_RCV_DETECT_LVL_2, 0x12);
+       regmap_write(regmap, QSERDES_TX + QSERDES_V5_TX_TRAN_DRVR_EMP_EN, 0x0C);
+
+       regmap_write(regmap, QSERDES_RX + QSERDES_V5_RX_UCDR_FO_GAIN, 0x0A);
+       regmap_write(regmap, QSERDES_RX + QSERDES_V5_RX_UCDR_SO_GAIN, 0x06);
+       regmap_write(regmap, QSERDES_RX + QSERDES_V5_RX_UCDR_FASTLOCK_FO_GAIN, 0x0A);
+       regmap_write(regmap, QSERDES_RX + QSERDES_V5_RX_UCDR_SO_SATURATION_AND_ENABLE, 0x7F);
+       regmap_write(regmap, QSERDES_RX + QSERDES_V5_RX_UCDR_FASTLOCK_COUNT_LOW, 0x00);
+       regmap_write(regmap, QSERDES_RX + QSERDES_V5_RX_UCDR_FASTLOCK_COUNT_HIGH, 0x01);
+       regmap_write(regmap, QSERDES_RX + QSERDES_V5_RX_UCDR_PI_CONTROLS, 0x81);
+       regmap_write(regmap, QSERDES_RX + QSERDES_V5_RX_UCDR_PI_CTRL2, 0x80);
+       regmap_write(regmap, QSERDES_RX + QSERDES_V5_RX_RX_TERM_BW, 0x04);
+       regmap_write(regmap, QSERDES_RX + QSERDES_V5_RX_VGA_CAL_CNTRL2, 0x08);
+       regmap_write(regmap, QSERDES_RX + QSERDES_V5_RX_GM_CAL, 0x0F);
+       regmap_write(regmap, QSERDES_RX + QSERDES_V5_RX_RX_EQU_ADAPTOR_CNTRL1, 0x04);
+       regmap_write(regmap, QSERDES_RX + QSERDES_V5_RX_RX_EQU_ADAPTOR_CNTRL2, 0x00);
+       regmap_write(regmap, QSERDES_RX + QSERDES_V5_RX_RX_EQU_ADAPTOR_CNTRL3, 0x4A);
+       regmap_write(regmap, QSERDES_RX + QSERDES_V5_RX_RX_EQU_ADAPTOR_CNTRL4, 0x0A);
+       regmap_write(regmap, QSERDES_RX + QSERDES_V5_RX_RX_IDAC_TSETTLE_LOW, 0x80);
+       regmap_write(regmap, QSERDES_RX + QSERDES_V5_RX_RX_IDAC_TSETTLE_HIGH, 0x01);
+       regmap_write(regmap, QSERDES_RX + QSERDES_V5_RX_RX_IDAC_MEASURE_TIME, 0x20);
+       regmap_write(regmap, QSERDES_RX + QSERDES_V5_RX_RX_EQ_OFFSET_ADAPTOR_CNTRL1, 0x17);
+       regmap_write(regmap, QSERDES_RX + QSERDES_V5_RX_RX_OFFSET_ADAPTOR_CNTRL2, 0x00);
+       regmap_write(regmap, QSERDES_RX + QSERDES_V5_RX_SIGDET_CNTRL, 0x0F);
+       regmap_write(regmap, QSERDES_RX + QSERDES_V5_RX_SIGDET_DEGLITCH_CNTRL, 0x1E);
+       regmap_write(regmap, QSERDES_RX + QSERDES_V5_RX_RX_BAND, 0x05);
+       regmap_write(regmap, QSERDES_RX + QSERDES_V5_RX_RX_MODE_00_LOW, 0xE0);
+       regmap_write(regmap, QSERDES_RX + QSERDES_V5_RX_RX_MODE_00_HIGH, 0xC8);
+       regmap_write(regmap, QSERDES_RX + QSERDES_V5_RX_RX_MODE_00_HIGH2, 0xC8);
+       regmap_write(regmap, QSERDES_RX + QSERDES_V5_RX_RX_MODE_00_HIGH3, 0x09);
+       regmap_write(regmap, QSERDES_RX + QSERDES_V5_RX_RX_MODE_00_HIGH4, 0xB1);
+       regmap_write(regmap, QSERDES_RX + QSERDES_V5_RX_RX_MODE_01_LOW, 0xE0);
+       regmap_write(regmap, QSERDES_RX + QSERDES_V5_RX_RX_MODE_01_HIGH, 0xC8);
+       regmap_write(regmap, QSERDES_RX + QSERDES_V5_RX_RX_MODE_01_HIGH2, 0xC8);
+       regmap_write(regmap, QSERDES_RX + QSERDES_V5_RX_RX_MODE_01_HIGH3, 0x09);
+       regmap_write(regmap, QSERDES_RX + QSERDES_V5_RX_RX_MODE_01_HIGH4, 0xB1);
+       regmap_write(regmap, QSERDES_RX + QSERDES_V5_RX_RX_MODE_10_LOW, 0xE0);
+       regmap_write(regmap, QSERDES_RX + QSERDES_V5_RX_RX_MODE_10_HIGH, 0xC8);
+       regmap_write(regmap, QSERDES_RX + QSERDES_V5_RX_RX_MODE_10_HIGH2, 0xC8);
+       regmap_write(regmap, QSERDES_RX + QSERDES_V5_RX_RX_MODE_10_HIGH3, 0x3B);
+       regmap_write(regmap, QSERDES_RX + QSERDES_V5_RX_RX_MODE_10_HIGH4, 0xB7);
+       regmap_write(regmap, QSERDES_RX + QSERDES_V5_RX_DCC_CTRL1, 0x0C);
+
+       regmap_write(regmap, QSERDES_PCS + QPHY_PCS_LINE_RESET_TIME, 0x0C);
+       regmap_write(regmap, QSERDES_PCS + QPHY_PCS_TX_LARGE_AMP_DRV_LVL, 0x1F);
+       regmap_write(regmap, QSERDES_PCS + QPHY_PCS_TX_SMALL_AMP_DRV_LVL, 0x03);
+       regmap_write(regmap, QSERDES_PCS + QPHY_PCS_TX_MID_TERM_CTRL1, 0x83);
+       regmap_write(regmap, QSERDES_PCS + QPHY_PCS_TX_MID_TERM_CTRL2, 0x08);
+       regmap_write(regmap, QSERDES_PCS + QPHY_PCS_SGMII_MISC_CTRL8, 0x0C);
+       regmap_write(regmap, QSERDES_PCS + QPHY_PCS_SW_RESET, 0x00);
+
+       regmap_write(regmap, QSERDES_PCS + QPHY_PCS_PHY_START, 0x01);
 }
 
 static void qcom_dwmac_sgmii_phy_init_2p5g(struct regmap *regmap)
 {
-       regmap_write(regmap, QSERDES_PCS_SW_RESET, 0x01);
-       regmap_write(regmap, QSERDES_PCS_POWER_DOWN_CONTROL, 0x01);
-
-       regmap_write(regmap, QSERDES_COM_PLL_IVCO, 0x0F);
-       regmap_write(regmap, QSERDES_COM_CP_CTRL_MODE0, 0x06);
-       regmap_write(regmap, QSERDES_COM_PLL_RCTRL_MODE0, 0x16);
-       regmap_write(regmap, QSERDES_COM_PLL_CCTRL_MODE0, 0x36);
-       regmap_write(regmap, QSERDES_COM_SYSCLK_EN_SEL, 0x1A);
-       regmap_write(regmap, QSERDES_COM_LOCK_CMP1_MODE0, 0x1A);
-       regmap_write(regmap, QSERDES_COM_LOCK_CMP2_MODE0, 0x41);
-       regmap_write(regmap, QSERDES_COM_DEC_START_MODE0, 0x7A);
-       regmap_write(regmap, QSERDES_COM_DIV_FRAC_START1_MODE0, 0x00);
-       regmap_write(regmap, QSERDES_COM_DIV_FRAC_START2_MODE0, 0x20);
-       regmap_write(regmap, QSERDES_COM_DIV_FRAC_START3_MODE0, 0x01);
-       regmap_write(regmap, QSERDES_COM_VCO_TUNE1_MODE0, 0xA1);
-
-       regmap_write(regmap, QSERDES_COM_VCO_TUNE2_MODE0, 0x02);
-       regmap_write(regmap, QSERDES_COM_VCO_TUNE_INITVAL2, 0x00);
-       regmap_write(regmap, QSERDES_COM_HSCLK_SEL, 0x03);
-       regmap_write(regmap, QSERDES_COM_HSCLK_HS_SWITCH_SEL, 0x00);
-       regmap_write(regmap, QSERDES_COM_CORECLK_DIV_MODE0, 0x05);
-       regmap_write(regmap, QSERDES_COM_CORE_CLK_EN, 0x00);
-       regmap_write(regmap, QSERDES_COM_BIN_VCOCAL_CMP_CODE1_MODE0, 0xCD);
-       regmap_write(regmap, QSERDES_COM_BIN_VCOCAL_CMP_CODE2_MODE0, 0x1C);
-       regmap_write(regmap, QSERDES_COM_BIN_VCOCAL_HSCLK_SEL, 0x11);
-
-       regmap_write(regmap, QSERDES_TX_TX_BAND, 0x04);
-       regmap_write(regmap, QSERDES_TX_SLEW_CNTL, 0x0A);
-       regmap_write(regmap, QSERDES_TX_RES_CODE_LANE_OFFSET_TX, 0x09);
-       regmap_write(regmap, QSERDES_TX_RES_CODE_LANE_OFFSET_RX, 0x02);
-       regmap_write(regmap, QSERDES_TX_LANE_MODE_1, 0x05);
-       regmap_write(regmap, QSERDES_TX_LANE_MODE_3, 0x00);
-       regmap_write(regmap, QSERDES_TX_RCV_DETECT_LVL_2, 0x12);
-       regmap_write(regmap, QSERDES_TX_TRAN_DRVR_EMP_EN, 0x0C);
-
-       regmap_write(regmap, QSERDES_RX_UCDR_FO_GAIN, 0x0A);
-       regmap_write(regmap, QSERDES_RX_UCDR_SO_GAIN, 0x06);
-       regmap_write(regmap, QSERDES_RX_UCDR_FASTLOCK_FO_GAIN, 0x0A);
-       regmap_write(regmap, QSERDES_RX_UCDR_SO_SATURATION_AND_ENABLE, 0x7F);
-       regmap_write(regmap, QSERDES_RX_UCDR_FASTLOCK_COUNT_LOW, 0x00);
-       regmap_write(regmap, QSERDES_RX_UCDR_FASTLOCK_COUNT_HIGH, 0x01);
-       regmap_write(regmap, QSERDES_RX_UCDR_PI_CONTROLS, 0x81);
-       regmap_write(regmap, QSERDES_RX_UCDR_PI_CTRL2, 0x80);
-       regmap_write(regmap, QSERDES_RX_RX_TERM_BW, 0x00);
-       regmap_write(regmap, QSERDES_RX_VGA_CAL_CNTRL2, 0x08);
-       regmap_write(regmap, QSERDES_RX_GM_CAL, 0x0F);
-       regmap_write(regmap, QSERDES_RX_RX_EQU_ADAPTOR_CNTRL1, 0x04);
-       regmap_write(regmap, QSERDES_RX_RX_EQU_ADAPTOR_CNTRL2, 0x00);
-       regmap_write(regmap, QSERDES_RX_RX_EQU_ADAPTOR_CNTRL3, 0x4A);
-       regmap_write(regmap, QSERDES_RX_RX_EQU_ADAPTOR_CNTRL4, 0x0A);
-       regmap_write(regmap, QSERDES_RX_RX_IDAC_TSETTLE_LOW, 0x80);
-       regmap_write(regmap, QSERDES_RX_RX_IDAC_TSETTLE_HIGH, 0x01);
-       regmap_write(regmap, QSERDES_RX_RX_IDAC_MEASURE_TIME, 0x20);
-       regmap_write(regmap, QSERDES_RX_RX_EQ_OFFSET_ADAPTOR_CNTRL1, 0x17);
-       regmap_write(regmap, QSERDES_RX_RX_OFFSET_ADAPTOR_CNTRL2, 0x00);
-       regmap_write(regmap, QSERDES_RX_SIGDET_CNTRL, 0x0F);
-       regmap_write(regmap, QSERDES_RX_SIGDET_DEGLITCH_CNTRL, 0x1E);
-       regmap_write(regmap, QSERDES_RX_RX_BAND, 0x18);
-       regmap_write(regmap, QSERDES_RX_RX_MODE_00_LOW, 0x18);
-       regmap_write(regmap, QSERDES_RX_RX_MODE_00_HIGH, 0xC8);
-       regmap_write(regmap, QSERDES_RX_RX_MODE_00_HIGH2, 0xC8);
-       regmap_write(regmap, QSERDES_RX_RX_MODE_00_HIGH3, 0x0C);
-       regmap_write(regmap, QSERDES_RX_RX_MODE_00_HIGH4, 0xB8);
-       regmap_write(regmap, QSERDES_RX_RX_MODE_01_LOW, 0xE0);
-       regmap_write(regmap, QSERDES_RX_RX_MODE_01_HIGH, 0xC8);
-       regmap_write(regmap, QSERDES_RX_RX_MODE_01_HIGH2, 0xC8);
-       regmap_write(regmap, QSERDES_RX_RX_MODE_01_HIGH3, 0x09);
-       regmap_write(regmap, QSERDES_RX_RX_MODE_01_HIGH4, 0xB1);
-       regmap_write(regmap, QSERDES_RX_RX_MODE_10_LOW, 0xE0);
-       regmap_write(regmap, QSERDES_RX_RX_MODE_10_HIGH, 0xC8);
-       regmap_write(regmap, QSERDES_RX_RX_MODE_10_HIGH2, 0xC8);
-       regmap_write(regmap, QSERDES_RX_RX_MODE_10_HIGH3, 0x3B);
-       regmap_write(regmap, QSERDES_RX_RX_MODE_10_HIGH4, 0xB7);
-       regmap_write(regmap, QSERDES_RX_DCC_CTRL1, 0x0C);
-
-       regmap_write(regmap, QSERDES_PCS_LINE_RESET_TIME, 0x0C);
-       regmap_write(regmap, QSERDES_PCS_TX_LARGE_AMP_DRV_LVL, 0x1F);
-       regmap_write(regmap, QSERDES_PCS_TX_SMALL_AMP_DRV_LVL, 0x03);
-       regmap_write(regmap, QSERDES_PCS_TX_MID_TERM_CTRL1, 0x83);
-       regmap_write(regmap, QSERDES_PCS_TX_MID_TERM_CTRL2, 0x08);
-       regmap_write(regmap, QSERDES_PCS_SGMII_MISC_CTRL8, 0x8C);
-       regmap_write(regmap, QSERDES_PCS_SW_RESET, 0x00);
-
-       regmap_write(regmap, QSERDES_PCS_PHY_START, 0x01);
+       regmap_write(regmap, QSERDES_PCS + QPHY_PCS_SW_RESET, 0x01);
+       regmap_write(regmap, QSERDES_PCS + QPHY_PCS_POWER_DOWN_CONTROL, 0x01);
+
+       regmap_write(regmap, QSERDES_QMP_PLL + QSERDES_V5_COM_PLL_IVCO, 0x0F);
+       regmap_write(regmap, QSERDES_QMP_PLL + QSERDES_V5_COM_CP_CTRL_MODE0, 0x06);
+       regmap_write(regmap, QSERDES_QMP_PLL + QSERDES_V5_COM_PLL_RCTRL_MODE0, 0x16);
+       regmap_write(regmap, QSERDES_QMP_PLL + QSERDES_V5_COM_PLL_CCTRL_MODE0, 0x36);
+       regmap_write(regmap, QSERDES_QMP_PLL + QSERDES_V5_COM_SYSCLK_EN_SEL, 0x1A);
+       regmap_write(regmap, QSERDES_QMP_PLL + QSERDES_V5_COM_LOCK_CMP1_MODE0, 0x1A);
+       regmap_write(regmap, QSERDES_QMP_PLL + QSERDES_V5_COM_LOCK_CMP2_MODE0, 0x41);
+       regmap_write(regmap, QSERDES_QMP_PLL + QSERDES_V5_COM_DEC_START_MODE0, 0x7A);
+       regmap_write(regmap, QSERDES_QMP_PLL + QSERDES_V5_COM_DIV_FRAC_START1_MODE0, 0x00);
+       regmap_write(regmap, QSERDES_QMP_PLL + QSERDES_V5_COM_DIV_FRAC_START2_MODE0, 0x20);
+       regmap_write(regmap, QSERDES_QMP_PLL + QSERDES_V5_COM_DIV_FRAC_START3_MODE0, 0x01);
+       regmap_write(regmap, QSERDES_QMP_PLL + QSERDES_V5_COM_VCO_TUNE1_MODE0, 0xA1);
+
+       regmap_write(regmap, QSERDES_QMP_PLL + QSERDES_V5_COM_VCO_TUNE2_MODE0, 0x02);
+       regmap_write(regmap, QSERDES_QMP_PLL + QSERDES_V5_COM_VCO_TUNE_INITVAL2, 0x00);
+       regmap_write(regmap, QSERDES_QMP_PLL + QSERDES_V5_COM_HSCLK_SEL, 0x03);
+       regmap_write(regmap, QSERDES_QMP_PLL + QSERDES_V5_COM_HSCLK_HS_SWITCH_SEL, 0x00);
+       regmap_write(regmap, QSERDES_QMP_PLL + QSERDES_V5_COM_CORECLK_DIV_MODE0, 0x05);
+       regmap_write(regmap, QSERDES_QMP_PLL + QSERDES_V5_COM_CORE_CLK_EN, 0x00);
+       regmap_write(regmap, QSERDES_QMP_PLL + QSERDES_V5_COM_BIN_VCOCAL_CMP_CODE1_MODE0, 0xCD);
+       regmap_write(regmap, QSERDES_QMP_PLL + QSERDES_V5_COM_BIN_VCOCAL_CMP_CODE2_MODE0, 0x1C);
+       regmap_write(regmap, QSERDES_QMP_PLL + QSERDES_V5_COM_BIN_VCOCAL_HSCLK_SEL, 0x11);
+
+       regmap_write(regmap, QSERDES_TX + QSERDES_V5_TX_TX_BAND, 0x04);
+       regmap_write(regmap, QSERDES_TX + QSERDES_V5_TX_SLEW_CNTL, 0x0A);
+       regmap_write(regmap, QSERDES_TX + QSERDES_V5_TX_RES_CODE_LANE_OFFSET_TX, 0x09);
+       regmap_write(regmap, QSERDES_TX + QSERDES_V5_TX_RES_CODE_LANE_OFFSET_RX, 0x02);
+       regmap_write(regmap, QSERDES_TX + QSERDES_V5_TX_LANE_MODE_1, 0x05);
+       regmap_write(regmap, QSERDES_TX + QSERDES_V5_TX_LANE_MODE_3, 0x00);
+       regmap_write(regmap, QSERDES_TX + QSERDES_V5_TX_RCV_DETECT_LVL_2, 0x12);
+       regmap_write(regmap, QSERDES_TX + QSERDES_V5_TX_TRAN_DRVR_EMP_EN, 0x0C);
+
+       regmap_write(regmap, QSERDES_RX + QSERDES_V5_RX_UCDR_FO_GAIN, 0x0A);
+       regmap_write(regmap, QSERDES_RX + QSERDES_V5_RX_UCDR_SO_GAIN, 0x06);
+       regmap_write(regmap, QSERDES_RX + QSERDES_V5_RX_UCDR_FASTLOCK_FO_GAIN, 0x0A);
+       regmap_write(regmap, QSERDES_RX + QSERDES_V5_RX_UCDR_SO_SATURATION_AND_ENABLE, 0x7F);
+       regmap_write(regmap, QSERDES_RX + QSERDES_V5_RX_UCDR_FASTLOCK_COUNT_LOW, 0x00);
+       regmap_write(regmap, QSERDES_RX + QSERDES_V5_RX_UCDR_FASTLOCK_COUNT_HIGH, 0x01);
+       regmap_write(regmap, QSERDES_RX + QSERDES_V5_RX_UCDR_PI_CONTROLS, 0x81);
+       regmap_write(regmap, QSERDES_RX + QSERDES_V5_RX_UCDR_PI_CTRL2, 0x80);
+       regmap_write(regmap, QSERDES_RX + QSERDES_V5_RX_RX_TERM_BW, 0x00);
+       regmap_write(regmap, QSERDES_RX + QSERDES_V5_RX_VGA_CAL_CNTRL2, 0x08);
+       regmap_write(regmap, QSERDES_RX + QSERDES_V5_RX_GM_CAL, 0x0F);
+       regmap_write(regmap, QSERDES_RX + QSERDES_V5_RX_RX_EQU_ADAPTOR_CNTRL1, 0x04);
+       regmap_write(regmap, QSERDES_RX + QSERDES_V5_RX_RX_EQU_ADAPTOR_CNTRL2, 0x00);
+       regmap_write(regmap, QSERDES_RX + QSERDES_V5_RX_RX_EQU_ADAPTOR_CNTRL3, 0x4A);
+       regmap_write(regmap, QSERDES_RX + QSERDES_V5_RX_RX_EQU_ADAPTOR_CNTRL4, 0x0A);
+       regmap_write(regmap, QSERDES_RX + QSERDES_V5_RX_RX_IDAC_TSETTLE_LOW, 0x80);
+       regmap_write(regmap, QSERDES_RX + QSERDES_V5_RX_RX_IDAC_TSETTLE_HIGH, 0x01);
+       regmap_write(regmap, QSERDES_RX + QSERDES_V5_RX_RX_IDAC_MEASURE_TIME, 0x20);
+       regmap_write(regmap, QSERDES_RX + QSERDES_V5_RX_RX_EQ_OFFSET_ADAPTOR_CNTRL1, 0x17);
+       regmap_write(regmap, QSERDES_RX + QSERDES_V5_RX_RX_OFFSET_ADAPTOR_CNTRL2, 0x00);
+       regmap_write(regmap, QSERDES_RX + QSERDES_V5_RX_SIGDET_CNTRL, 0x0F);
+       regmap_write(regmap, QSERDES_RX + QSERDES_V5_RX_SIGDET_DEGLITCH_CNTRL, 0x1E);
+       regmap_write(regmap, QSERDES_RX + QSERDES_V5_RX_RX_BAND, 0x18);
+       regmap_write(regmap, QSERDES_RX + QSERDES_V5_RX_RX_MODE_00_LOW, 0x18);
+       regmap_write(regmap, QSERDES_RX + QSERDES_V5_RX_RX_MODE_00_HIGH, 0xC8);
+       regmap_write(regmap, QSERDES_RX + QSERDES_V5_RX_RX_MODE_00_HIGH2, 0xC8);
+       regmap_write(regmap, QSERDES_RX + QSERDES_V5_RX_RX_MODE_00_HIGH3, 0x0C);
+       regmap_write(regmap, QSERDES_RX + QSERDES_V5_RX_RX_MODE_00_HIGH4, 0xB8);
+       regmap_write(regmap, QSERDES_RX + QSERDES_V5_RX_RX_MODE_01_LOW, 0xE0);
+       regmap_write(regmap, QSERDES_RX + QSERDES_V5_RX_RX_MODE_01_HIGH, 0xC8);
+       regmap_write(regmap, QSERDES_RX + QSERDES_V5_RX_RX_MODE_01_HIGH2, 0xC8);
+       regmap_write(regmap, QSERDES_RX + QSERDES_V5_RX_RX_MODE_01_HIGH3, 0x09);
+       regmap_write(regmap, QSERDES_RX + QSERDES_V5_RX_RX_MODE_01_HIGH4, 0xB1);
+       regmap_write(regmap, QSERDES_RX + QSERDES_V5_RX_RX_MODE_10_LOW, 0xE0);
+       regmap_write(regmap, QSERDES_RX + QSERDES_V5_RX_RX_MODE_10_HIGH, 0xC8);
+       regmap_write(regmap, QSERDES_RX + QSERDES_V5_RX_RX_MODE_10_HIGH2, 0xC8);
+       regmap_write(regmap, QSERDES_RX + QSERDES_V5_RX_RX_MODE_10_HIGH3, 0x3B);
+       regmap_write(regmap, QSERDES_RX + QSERDES_V5_RX_RX_MODE_10_HIGH4, 0xB7);
+       regmap_write(regmap, QSERDES_RX + QSERDES_V5_RX_DCC_CTRL1, 0x0C);
+
+       regmap_write(regmap, QSERDES_PCS + QPHY_PCS_LINE_RESET_TIME, 0x0C);
+       regmap_write(regmap, QSERDES_PCS + QPHY_PCS_TX_LARGE_AMP_DRV_LVL, 0x1F);
+       regmap_write(regmap, QSERDES_PCS + QPHY_PCS_TX_SMALL_AMP_DRV_LVL, 0x03);
+       regmap_write(regmap, QSERDES_PCS + QPHY_PCS_TX_MID_TERM_CTRL1, 0x83);
+       regmap_write(regmap, QSERDES_PCS + QPHY_PCS_TX_MID_TERM_CTRL2, 0x08);
+       regmap_write(regmap, QSERDES_PCS + QPHY_PCS_SGMII_MISC_CTRL8, 0x8C);
+       regmap_write(regmap, QSERDES_PCS + QPHY_PCS_SW_RESET, 0x00);
+
+       regmap_write(regmap, QSERDES_PCS + QPHY_PCS_PHY_START, 0x01);
 }
 
 static inline int
@@ -313,28 +234,28 @@ static int qcom_dwmac_sgmii_phy_calibrate(struct phy *phy)
        }
 
        if (qcom_dwmac_sgmii_phy_poll_status(data->regmap,
-                                            QSERDES_COM_C_READY_STATUS,
+                                            QSERDES_QMP_PLL + QSERDES_V5_COM_C_READY_STATUS,
                                             QSERDES_COM_C_READY)) {
                dev_err(dev, "QSERDES_COM_C_READY_STATUS timed-out");
                return -ETIMEDOUT;
        }
 
        if (qcom_dwmac_sgmii_phy_poll_status(data->regmap,
-                                            QSERDES_PCS_PCS_READY_STATUS,
+                                            QSERDES_PCS + QPHY_PCS_PCS_READY_STATUS,
                                             QSERDES_PCS_READY)) {
                dev_err(dev, "PCS_READY timed-out");
                return -ETIMEDOUT;
        }
 
        if (qcom_dwmac_sgmii_phy_poll_status(data->regmap,
-                                            QSERDES_PCS_PCS_READY_STATUS,
+                                            QSERDES_PCS + QPHY_PCS_PCS_READY_STATUS,
                                             QSERDES_PCS_SGMIIPHY_READY)) {
                dev_err(dev, "SGMIIPHY_READY timed-out");
                return -ETIMEDOUT;
        }
 
        if (qcom_dwmac_sgmii_phy_poll_status(data->regmap,
-                                            QSERDES_COM_CMN_STATUS,
+                                            QSERDES_QMP_PLL + QSERDES_V5_COM_CMN_STATUS,
                                             QSERDES_COM_C_PLL_LOCKED)) {
                dev_err(dev, "PLL Lock Status timed-out");
                return -ETIMEDOUT;
@@ -354,11 +275,11 @@ static int qcom_dwmac_sgmii_phy_power_off(struct phy *phy)
 {
        struct qcom_dwmac_sgmii_phy_data *data = phy_get_drvdata(phy);
 
-       regmap_write(data->regmap, QSERDES_PCS_TX_MID_TERM_CTRL2, 0x08);
-       regmap_write(data->regmap, QSERDES_PCS_SW_RESET, 0x01);
+       regmap_write(data->regmap, QSERDES_PCS + QPHY_PCS_TX_MID_TERM_CTRL2, 0x08);
+       regmap_write(data->regmap, QSERDES_PCS + QPHY_PCS_SW_RESET, 0x01);
        udelay(100);
-       regmap_write(data->regmap, QSERDES_PCS_SW_RESET, 0x00);
-       regmap_write(data->regmap, QSERDES_PCS_PHY_START, 0x01);
+       regmap_write(data->regmap, QSERDES_PCS + QPHY_PCS_SW_RESET, 0x00);
+       regmap_write(data->regmap, QSERDES_PCS + QPHY_PCS_PHY_START, 0x01);
 
        clk_disable_unprepare(data->refclk);
 
index 2f876f158e1df6acd51d20bda470fcc783efcdf8..a591ad95347ccf69a747b337bcbf01cb8eb7dc73 100644 (file)
@@ -263,7 +263,7 @@ static const struct phy_ops mt7621_pci_phy_ops = {
 };
 
 static struct phy *mt7621_pcie_phy_of_xlate(struct device *dev,
-                                           struct of_phandle_args *args)
+                                           const struct of_phandle_args *args)
 {
        struct mt7621_pci_phy *mt7621_phy = dev_get_drvdata(dev);
 
index 507435af26567f992540e41dd38068815bd69863..c0221e7258c0321005171bdab9c57d5ae784a8d6 100644 (file)
@@ -306,7 +306,7 @@ static const struct of_device_id rcar_gen2_phy_match_table[] = {
 MODULE_DEVICE_TABLE(of, rcar_gen2_phy_match_table);
 
 static struct phy *rcar_gen2_phy_xlate(struct device *dev,
-                                      struct of_phandle_args *args)
+                                      const struct of_phandle_args *args)
 {
        struct rcar_gen2_phy_driver *drv;
        struct device_node *np = args->np;
index 6387c0d34c551c0e4e28e09af0792cee69eb2952..fbab6ac0f0d10c89a275b68dd8d0094c1943f357 100644 (file)
@@ -608,7 +608,7 @@ static const unsigned int rcar_gen3_phy_cable[] = {
 };
 
 static struct phy *rcar_gen3_phy_usb2_xlate(struct device *dev,
-                                           struct of_phandle_args *args)
+                                           const struct of_phandle_args *args)
 {
        struct rcar_gen3_chan *ch = dev_get_drvdata(dev);
 
index fc6e398fa3bfb65c5d92f9e87e98fc593bf69ae5..f1f1da4a0b1fe5091da22845522ae7623f187e16 100644 (file)
@@ -334,7 +334,7 @@ static const struct phy_ops r8a779f0_eth_serdes_ops = {
 };
 
 static struct phy *r8a779f0_eth_serdes_xlate(struct device *dev,
-                                            struct of_phandle_args *args)
+                                            const struct of_phandle_args *args)
 {
        struct r8a779f0_eth_serdes_drv_data *dd = dev_get_drvdata(dev);
 
index 94360fc96a6fb2e16cce53e44c0222d719e6e1ab..a34f67bb7e61ad7b23cfc2d510054bb4348b8a36 100644 (file)
@@ -83,6 +83,14 @@ config PHY_ROCKCHIP_PCIE
        help
          Enable this to support the Rockchip PCIe PHY.
 
+config PHY_ROCKCHIP_SAMSUNG_HDPTX
+       tristate "Rockchip Samsung HDMI/eDP Combo PHY driver"
+       depends on (ARCH_ROCKCHIP || COMPILE_TEST) && OF
+       select GENERIC_PHY
+       help
+         Enable this to support the Rockchip HDMI/eDP Combo PHY
+         with Samsung IP block.
+
 config PHY_ROCKCHIP_SNPS_PCIE3
        tristate "Rockchip Snps PCIe3 PHY Driver"
        depends on (ARCH_ROCKCHIP && OF) || COMPILE_TEST
index 7eab129230d175bca9d439336ca56e2e5fe1cb35..3d911304e65433b961c777a8e773db85c6452516 100644 (file)
@@ -8,6 +8,7 @@ obj-$(CONFIG_PHY_ROCKCHIP_INNO_HDMI)    += phy-rockchip-inno-hdmi.o
 obj-$(CONFIG_PHY_ROCKCHIP_INNO_USB2)   += phy-rockchip-inno-usb2.o
 obj-$(CONFIG_PHY_ROCKCHIP_NANENG_COMBO_PHY)    += phy-rockchip-naneng-combphy.o
 obj-$(CONFIG_PHY_ROCKCHIP_PCIE)                += phy-rockchip-pcie.o
+obj-$(CONFIG_PHY_ROCKCHIP_SAMSUNG_HDPTX)       += phy-rockchip-samsung-hdptx.o
 obj-$(CONFIG_PHY_ROCKCHIP_SNPS_PCIE3)  += phy-rockchip-snps-pcie3.o
 obj-$(CONFIG_PHY_ROCKCHIP_TYPEC)       += phy-rockchip-typec.o
 obj-$(CONFIG_PHY_ROCKCHIP_USB)         += phy-rockchip-usb.o
index 5de5e2e97ffa0f17d1997d49661f3699ef5fe3c1..76b9cf417591de0b7d9f5887dc78b075bd71e6bf 100644 (file)
@@ -251,7 +251,7 @@ static const struct phy_ops rochchip_combphy_ops = {
        .owner = THIS_MODULE,
 };
 
-static struct phy *rockchip_combphy_xlate(struct device *dev, struct of_phandle_args *args)
+static struct phy *rockchip_combphy_xlate(struct device *dev, const struct of_phandle_args *args)
 {
        struct rockchip_combphy_priv *priv = dev_get_drvdata(dev);
 
index 1bbd6be2a58473db756e85631505bf81bdc29b77..51cc5ece0e63724d1ce6f9d3ece2f378a28786f0 100644 (file)
@@ -82,7 +82,7 @@ static struct rockchip_pcie_phy *to_pcie_phy(struct phy_pcie_instance *inst)
 }
 
 static struct phy *rockchip_pcie_phy_of_xlate(struct device *dev,
-                                             struct of_phandle_args *args)
+                                             const struct of_phandle_args *args)
 {
        struct rockchip_pcie_phy *rk_phy = dev_get_drvdata(dev);
 
diff --git a/drivers/phy/rockchip/phy-rockchip-samsung-hdptx.c b/drivers/phy/rockchip/phy-rockchip-samsung-hdptx.c
new file mode 100644 (file)
index 0000000..946c012
--- /dev/null
@@ -0,0 +1,1028 @@
+// SPDX-License-Identifier: GPL-2.0+
+/*
+ * Copyright (c) 2021-2022 Rockchip Electronics Co., Ltd.
+ * Copyright (c) 2024 Collabora Ltd.
+ *
+ * Author: Algea Cao <algea.cao@rock-chips.com>
+ * Author: Cristian Ciocaltea <cristian.ciocaltea@collabora.com>
+ */
+#include <linux/bitfield.h>
+#include <linux/clk.h>
+#include <linux/delay.h>
+#include <linux/mfd/syscon.h>
+#include <linux/module.h>
+#include <linux/of.h>
+#include <linux/of_platform.h>
+#include <linux/phy/phy.h>
+#include <linux/platform_device.h>
+#include <linux/rational.h>
+#include <linux/regmap.h>
+#include <linux/reset.h>
+
+#define GRF_HDPTX_CON0                 0x00
+#define HDPTX_I_PLL_EN                 BIT(7)
+#define HDPTX_I_BIAS_EN                        BIT(6)
+#define HDPTX_I_BGR_EN                 BIT(5)
+#define GRF_HDPTX_STATUS               0x80
+#define HDPTX_O_PLL_LOCK_DONE          BIT(3)
+#define HDPTX_O_PHY_CLK_RDY            BIT(2)
+#define HDPTX_O_PHY_RDY                        BIT(1)
+#define HDPTX_O_SB_RDY                 BIT(0)
+
+#define HDTPX_REG(_n, _min, _max)                              \
+       (                                                       \
+               BUILD_BUG_ON_ZERO((0x##_n) < (0x##_min)) +      \
+               BUILD_BUG_ON_ZERO((0x##_n) > (0x##_max)) +      \
+               ((0x##_n) * 4)                                  \
+       )
+
+#define CMN_REG(n)                     HDTPX_REG(n, 0000, 00a7)
+#define SB_REG(n)                      HDTPX_REG(n, 0100, 0129)
+#define LNTOP_REG(n)                   HDTPX_REG(n, 0200, 0229)
+#define LANE_REG(n)                    HDTPX_REG(n, 0300, 062d)
+
+/* CMN_REG(0008) */
+#define LCPLL_EN_MASK                  BIT(6)
+#define LCPLL_LCVCO_MODE_EN_MASK       BIT(4)
+/* CMN_REG(001e) */
+#define LCPLL_PI_EN_MASK               BIT(5)
+#define LCPLL_100M_CLK_EN_MASK         BIT(0)
+/* CMN_REG(0025) */
+#define LCPLL_PMS_IQDIV_RSTN           BIT(4)
+/* CMN_REG(0028) */
+#define LCPLL_SDC_FRAC_EN              BIT(2)
+#define LCPLL_SDC_FRAC_RSTN            BIT(0)
+/* CMN_REG(002d) */
+#define LCPLL_SDC_N_MASK               GENMASK(3, 1)
+/* CMN_REG(002e) */
+#define LCPLL_SDC_NUMBERATOR_MASK      GENMASK(5, 0)
+/* CMN_REG(002f) */
+#define LCPLL_SDC_DENOMINATOR_MASK     GENMASK(7, 2)
+#define LCPLL_SDC_NDIV_RSTN            BIT(0)
+/* CMN_REG(003d) */
+#define ROPLL_LCVCO_EN                 BIT(4)
+/* CMN_REG(004e) */
+#define ROPLL_PI_EN                    BIT(5)
+/* CMN_REG(005c) */
+#define ROPLL_PMS_IQDIV_RSTN           BIT(5)
+/* CMN_REG(005e) */
+#define ROPLL_SDM_EN_MASK              BIT(6)
+#define ROPLL_SDM_FRAC_EN_RBR          BIT(3)
+#define ROPLL_SDM_FRAC_EN_HBR          BIT(2)
+#define ROPLL_SDM_FRAC_EN_HBR2         BIT(1)
+#define ROPLL_SDM_FRAC_EN_HBR3         BIT(0)
+/* CMN_REG(0064) */
+#define ROPLL_SDM_NUM_SIGN_RBR_MASK    BIT(3)
+/* CMN_REG(0069) */
+#define ROPLL_SDC_N_RBR_MASK           GENMASK(2, 0)
+/* CMN_REG(0074) */
+#define ROPLL_SDC_NDIV_RSTN            BIT(2)
+#define ROPLL_SSC_EN                   BIT(0)
+/* CMN_REG(0081) */
+#define OVRD_PLL_CD_CLK_EN             BIT(8)
+#define PLL_CD_HSCLK_EAST_EN           BIT(0)
+/* CMN_REG(0086) */
+#define PLL_PCG_POSTDIV_SEL_MASK       GENMASK(7, 4)
+#define PLL_PCG_CLK_SEL_MASK           GENMASK(3, 1)
+#define PLL_PCG_CLK_EN                 BIT(0)
+/* CMN_REG(0087) */
+#define PLL_FRL_MODE_EN                        BIT(3)
+#define PLL_TX_HS_CLK_EN               BIT(2)
+/* CMN_REG(0089) */
+#define LCPLL_ALONE_MODE               BIT(1)
+/* CMN_REG(0097) */
+#define DIG_CLK_SEL                    BIT(1)
+#define ROPLL_REF                      BIT(1)
+#define LCPLL_REF                      0
+/* CMN_REG(0099) */
+#define CMN_ROPLL_ALONE_MODE           BIT(2)
+#define ROPLL_ALONE_MODE               BIT(2)
+/* CMN_REG(009a) */
+#define HS_SPEED_SEL                   BIT(0)
+#define DIV_10_CLOCK                   BIT(0)
+/* CMN_REG(009b) */
+#define IS_SPEED_SEL                   BIT(4)
+#define LINK_SYMBOL_CLOCK              BIT(4)
+#define LINK_SYMBOL_CLOCK1_2           0
+
+/* SB_REG(0102) */
+#define OVRD_SB_RXTERM_EN_MASK         BIT(5)
+#define SB_RXTERM_EN_MASK              BIT(4)
+#define ANA_SB_RXTERM_OFFSP_MASK       GENMASK(3, 0)
+/* SB_REG(0103) */
+#define ANA_SB_RXTERM_OFFSN_MASK       GENMASK(6, 3)
+#define OVRD_SB_RX_RESCAL_DONE_MASK    BIT(1)
+#define SB_RX_RESCAL_DONE_MASK         BIT(0)
+/* SB_REG(0104) */
+#define OVRD_SB_EN_MASK                        BIT(5)
+#define SB_EN_MASK                     BIT(4)
+/* SB_REG(0105) */
+#define OVRD_SB_EARC_CMDC_EN_MASK      BIT(6)
+#define SB_EARC_CMDC_EN_MASK           BIT(5)
+#define ANA_SB_TX_HLVL_PROG_MASK       GENMASK(2, 0)
+/* SB_REG(0106) */
+#define ANA_SB_TX_LLVL_PROG_MASK       GENMASK(6, 4)
+/* SB_REG(0109) */
+#define ANA_SB_DMRX_AFC_DIV_RATIO_MASK GENMASK(2, 0)
+/* SB_REG(010f) */
+#define OVRD_SB_VREG_EN_MASK           BIT(7)
+#define SB_VREG_EN_MASK                        BIT(6)
+#define OVRD_SB_VREG_LPF_BYPASS_MASK   BIT(5)
+#define SB_VREG_LPF_BYPASS_MASK                BIT(4)
+#define ANA_SB_VREG_GAIN_CTRL_MASK     GENMASK(3, 0)
+/* SB_REG(0110) */
+#define ANA_SB_VREG_REF_SEL_MASK       BIT(0)
+/* SB_REG(0113) */
+#define SB_RX_RCAL_OPT_CODE_MASK       GENMASK(5, 4)
+#define SB_RX_RTERM_CTRL_MASK          GENMASK(3, 0)
+/* SB_REG(0114) */
+#define SB_TG_SB_EN_DELAY_TIME_MASK    GENMASK(5, 3)
+#define SB_TG_RXTERM_EN_DELAY_TIME_MASK        GENMASK(2, 0)
+/* SB_REG(0115) */
+#define SB_READY_DELAY_TIME_MASK       GENMASK(5, 3)
+#define SB_TG_OSC_EN_DELAY_TIME_MASK   GENMASK(2, 0)
+/* SB_REG(0116) */
+#define AFC_RSTN_DELAY_TIME_MASK       GENMASK(6, 4)
+/* SB_REG(0117) */
+#define FAST_PULSE_TIME_MASK           GENMASK(3, 0)
+/* SB_REG(011b) */
+#define SB_EARC_SIG_DET_BYPASS_MASK    BIT(4)
+#define SB_AFC_TOL_MASK                        GENMASK(3, 0)
+/* SB_REG(011f) */
+#define SB_PWM_AFC_CTRL_MASK           GENMASK(7, 2)
+#define SB_RCAL_RSTN_MASK              BIT(1)
+/* SB_REG(0120) */
+#define SB_EARC_EN_MASK                        BIT(1)
+#define SB_EARC_AFC_EN_MASK            BIT(2)
+/* SB_REG(0123) */
+#define OVRD_SB_READY_MASK             BIT(5)
+#define SB_READY_MASK                  BIT(4)
+
+/* LNTOP_REG(0200) */
+#define PROTOCOL_SEL                   BIT(2)
+#define HDMI_MODE                      BIT(2)
+#define HDMI_TMDS_FRL_SEL              BIT(1)
+/* LNTOP_REG(0206) */
+#define DATA_BUS_SEL                   BIT(0)
+#define DATA_BUS_36_40                 BIT(0)
+/* LNTOP_REG(0207) */
+#define LANE_EN                                0xf
+#define ALL_LANE_EN                    0xf
+
+/* LANE_REG(0312) */
+#define LN0_TX_SER_RATE_SEL_RBR                BIT(5)
+#define LN0_TX_SER_RATE_SEL_HBR                BIT(4)
+#define LN0_TX_SER_RATE_SEL_HBR2       BIT(3)
+#define LN0_TX_SER_RATE_SEL_HBR3       BIT(2)
+/* LANE_REG(0412) */
+#define LN1_TX_SER_RATE_SEL_RBR                BIT(5)
+#define LN1_TX_SER_RATE_SEL_HBR                BIT(4)
+#define LN1_TX_SER_RATE_SEL_HBR2       BIT(3)
+#define LN1_TX_SER_RATE_SEL_HBR3       BIT(2)
+/* LANE_REG(0512) */
+#define LN2_TX_SER_RATE_SEL_RBR                BIT(5)
+#define LN2_TX_SER_RATE_SEL_HBR                BIT(4)
+#define LN2_TX_SER_RATE_SEL_HBR2       BIT(3)
+#define LN2_TX_SER_RATE_SEL_HBR3       BIT(2)
+/* LANE_REG(0612) */
+#define LN3_TX_SER_RATE_SEL_RBR                BIT(5)
+#define LN3_TX_SER_RATE_SEL_HBR                BIT(4)
+#define LN3_TX_SER_RATE_SEL_HBR2       BIT(3)
+#define LN3_TX_SER_RATE_SEL_HBR3       BIT(2)
+
+struct lcpll_config {
+       u32 bit_rate;
+       u8 lcvco_mode_en;
+       u8 pi_en;
+       u8 clk_en_100m;
+       u8 pms_mdiv;
+       u8 pms_mdiv_afc;
+       u8 pms_pdiv;
+       u8 pms_refdiv;
+       u8 pms_sdiv;
+       u8 pi_cdiv_rstn;
+       u8 pi_cdiv_sel;
+       u8 sdm_en;
+       u8 sdm_rstn;
+       u8 sdc_frac_en;
+       u8 sdc_rstn;
+       u8 sdm_deno;
+       u8 sdm_num_sign;
+       u8 sdm_num;
+       u8 sdc_n;
+       u8 sdc_n2;
+       u8 sdc_num;
+       u8 sdc_deno;
+       u8 sdc_ndiv_rstn;
+       u8 ssc_en;
+       u8 ssc_fm_dev;
+       u8 ssc_fm_freq;
+       u8 ssc_clk_div_sel;
+       u8 cd_tx_ser_rate_sel;
+};
+
+struct ropll_config {
+       u32 bit_rate;
+       u8 pms_mdiv;
+       u8 pms_mdiv_afc;
+       u8 pms_pdiv;
+       u8 pms_refdiv;
+       u8 pms_sdiv;
+       u8 pms_iqdiv_rstn;
+       u8 ref_clk_sel;
+       u8 sdm_en;
+       u8 sdm_rstn;
+       u8 sdc_frac_en;
+       u8 sdc_rstn;
+       u8 sdm_clk_div;
+       u8 sdm_deno;
+       u8 sdm_num_sign;
+       u8 sdm_num;
+       u8 sdc_n;
+       u8 sdc_num;
+       u8 sdc_deno;
+       u8 sdc_ndiv_rstn;
+       u8 ssc_en;
+       u8 ssc_fm_dev;
+       u8 ssc_fm_freq;
+       u8 ssc_clk_div_sel;
+       u8 ana_cpp_ctrl;
+       u8 ana_lpf_c_sel;
+       u8 cd_tx_ser_rate_sel;
+};
+
+enum rk_hdptx_reset {
+       RST_PHY = 0,
+       RST_APB,
+       RST_INIT,
+       RST_CMN,
+       RST_LANE,
+       RST_ROPLL,
+       RST_LCPLL,
+       RST_MAX
+};
+
+struct rk_hdptx_phy {
+       struct device *dev;
+       struct regmap *regmap;
+       struct regmap *grf;
+
+       struct phy *phy;
+       struct phy_config *phy_cfg;
+       struct clk_bulk_data *clks;
+       int nr_clks;
+       struct reset_control_bulk_data rsts[RST_MAX];
+};
+
+static const struct ropll_config ropll_tmds_cfg[] = {
+       { 5940000, 124, 124, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 62, 1, 16, 5, 0,
+         1, 1, 0, 0x20, 0x0c, 1, 0x0e, 0, 0, },
+       { 3712500, 155, 155, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 62, 1, 16, 5, 0,
+         1, 1, 0, 0x20, 0x0c, 1, 0x0e, 0, 0, },
+       { 2970000, 124, 124, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 62, 1, 16, 5, 0,
+         1, 1, 0, 0x20, 0x0c, 1, 0x0e, 0, 0, },
+       { 1620000, 135, 135, 1, 1, 3, 1, 1, 0, 1, 1, 1, 1, 4, 0, 3, 5, 5, 0x10,
+         1, 0, 0x20, 0x0c, 1, 0x0e, 0, 0, },
+       { 1856250, 155, 155, 1, 1, 3, 1, 1, 1, 1, 1, 1, 1, 62, 1, 16, 5, 0,
+         1, 1, 0, 0x20, 0x0c, 1, 0x0e, 0, 0, },
+       { 1540000, 193, 193, 1, 1, 5, 1, 1, 1, 1, 1, 1, 1, 193, 1, 32, 2, 1,
+         1, 1, 0, 0x20, 0x0c, 1, 0x0e, 0, 0, },
+       { 1485000, 0x7b, 0x7b, 1, 1, 3, 1, 1, 1, 1, 1, 1, 1, 4, 0, 3, 5, 5,
+         0x10, 1, 0, 0x20, 0x0c, 1, 0x0e, 0, 0, },
+       { 1462500, 122, 122, 1, 1, 3, 1, 1, 1, 1, 1, 1, 1, 244, 1, 16, 2, 1, 1,
+         1, 0, 0x20, 0x0c, 1, 0x0e, 0, 0, },
+       { 1190000, 149, 149, 1, 1, 5, 1, 1, 1, 1, 1, 1, 1, 149, 1, 16, 2, 1, 1,
+         1, 0, 0x20, 0x0c, 1, 0x0e, 0, 0, },
+       { 1065000, 89, 89, 1, 1, 3, 1, 1, 1, 1, 1, 1, 1, 89, 1, 16, 1, 0, 1,
+         1, 0, 0x20, 0x0c, 1, 0x0e, 0, 0, },
+       { 1080000, 135, 135, 1, 1, 5, 1, 1, 0, 1, 0, 1, 1, 0x9, 0, 0x05, 0,
+         0x14, 0x18, 1, 0, 0x20, 0x0c, 1, 0x0e, 0, 0, },
+       { 855000, 214, 214, 1, 1, 11, 1, 1, 1, 1, 1, 1, 1, 214, 1, 16, 2, 1,
+         1, 1, 0, 0x20, 0x0c, 1, 0x0e, 0, 0, },
+       { 835000, 105, 105, 1, 1, 5, 1, 1, 1, 1, 1, 1, 1, 42, 1, 16, 1, 0,
+         1, 1, 0, 0x20, 0x0c, 1, 0x0e, 0, 0, },
+       { 928125, 155, 155, 1, 1, 7, 1, 1, 1, 1, 1, 1, 1, 62, 1, 16, 5, 0,
+         1, 1, 0, 0x20, 0x0c, 1, 0x0e, 0, 0, },
+       { 742500, 124, 124, 1, 1, 7, 1, 1, 1, 1, 1, 1, 1, 62, 1, 16, 5, 0,
+         1, 1, 0, 0x20, 0x0c, 1, 0x0e, 0, 0, },
+       { 650000, 162, 162, 1, 1, 11, 1, 1, 1, 1, 1, 1, 1, 54, 0, 16, 4, 1,
+         1, 1, 0, 0x20, 0x0c, 1, 0x0e, 0, 0, },
+       { 337500, 0x70, 0x70, 1, 1, 0xf, 1, 1, 1, 1, 1, 1, 1, 0x2, 0, 0x01, 5,
+         1, 1, 1, 0, 0x20, 0x0c, 1, 0x0e, 0, 0, },
+       { 400000, 100, 100, 1, 1, 11, 1, 1, 0, 1, 0, 1, 1, 0x9, 0, 0x05, 0,
+         0x14, 0x18, 1, 0, 0x20, 0x0c, 1, 0x0e, 0, 0, },
+       { 270000, 0x5a, 0x5a, 1, 1, 0xf, 1, 1, 0, 1, 0, 1, 1, 0x9, 0, 0x05, 0,
+         0x14, 0x18, 1, 0, 0x20, 0x0c, 1, 0x0e, 0, 0, },
+       { 251750, 84, 84, 1, 1, 0xf, 1, 1, 1, 1, 1, 1, 1, 168, 1, 16, 4, 1, 1,
+         1, 0, 0x20, 0x0c, 1, 0x0e, 0, 0, },
+};
+
+static const struct reg_sequence rk_hdtpx_common_cmn_init_seq[] = {
+       REG_SEQ0(CMN_REG(0009), 0x0c),
+       REG_SEQ0(CMN_REG(000a), 0x83),
+       REG_SEQ0(CMN_REG(000b), 0x06),
+       REG_SEQ0(CMN_REG(000c), 0x20),
+       REG_SEQ0(CMN_REG(000d), 0xb8),
+       REG_SEQ0(CMN_REG(000e), 0x0f),
+       REG_SEQ0(CMN_REG(000f), 0x0f),
+       REG_SEQ0(CMN_REG(0010), 0x04),
+       REG_SEQ0(CMN_REG(0011), 0x00),
+       REG_SEQ0(CMN_REG(0012), 0x26),
+       REG_SEQ0(CMN_REG(0013), 0x22),
+       REG_SEQ0(CMN_REG(0014), 0x24),
+       REG_SEQ0(CMN_REG(0015), 0x77),
+       REG_SEQ0(CMN_REG(0016), 0x08),
+       REG_SEQ0(CMN_REG(0017), 0x00),
+       REG_SEQ0(CMN_REG(0018), 0x04),
+       REG_SEQ0(CMN_REG(0019), 0x48),
+       REG_SEQ0(CMN_REG(001a), 0x01),
+       REG_SEQ0(CMN_REG(001b), 0x00),
+       REG_SEQ0(CMN_REG(001c), 0x01),
+       REG_SEQ0(CMN_REG(001d), 0x64),
+       REG_SEQ0(CMN_REG(001f), 0x00),
+       REG_SEQ0(CMN_REG(0026), 0x53),
+       REG_SEQ0(CMN_REG(0029), 0x01),
+       REG_SEQ0(CMN_REG(0030), 0x00),
+       REG_SEQ0(CMN_REG(0031), 0x20),
+       REG_SEQ0(CMN_REG(0032), 0x30),
+       REG_SEQ0(CMN_REG(0033), 0x0b),
+       REG_SEQ0(CMN_REG(0034), 0x23),
+       REG_SEQ0(CMN_REG(0035), 0x00),
+       REG_SEQ0(CMN_REG(0038), 0x00),
+       REG_SEQ0(CMN_REG(0039), 0x00),
+       REG_SEQ0(CMN_REG(003a), 0x00),
+       REG_SEQ0(CMN_REG(003b), 0x00),
+       REG_SEQ0(CMN_REG(003c), 0x80),
+       REG_SEQ0(CMN_REG(003e), 0x0c),
+       REG_SEQ0(CMN_REG(003f), 0x83),
+       REG_SEQ0(CMN_REG(0040), 0x06),
+       REG_SEQ0(CMN_REG(0041), 0x20),
+       REG_SEQ0(CMN_REG(0042), 0xb8),
+       REG_SEQ0(CMN_REG(0043), 0x00),
+       REG_SEQ0(CMN_REG(0044), 0x46),
+       REG_SEQ0(CMN_REG(0045), 0x24),
+       REG_SEQ0(CMN_REG(0046), 0xff),
+       REG_SEQ0(CMN_REG(0047), 0x00),
+       REG_SEQ0(CMN_REG(0048), 0x44),
+       REG_SEQ0(CMN_REG(0049), 0xfa),
+       REG_SEQ0(CMN_REG(004a), 0x08),
+       REG_SEQ0(CMN_REG(004b), 0x00),
+       REG_SEQ0(CMN_REG(004c), 0x01),
+       REG_SEQ0(CMN_REG(004d), 0x64),
+       REG_SEQ0(CMN_REG(004e), 0x14),
+       REG_SEQ0(CMN_REG(004f), 0x00),
+       REG_SEQ0(CMN_REG(0050), 0x00),
+       REG_SEQ0(CMN_REG(005d), 0x0c),
+       REG_SEQ0(CMN_REG(005f), 0x01),
+       REG_SEQ0(CMN_REG(006b), 0x04),
+       REG_SEQ0(CMN_REG(0073), 0x30),
+       REG_SEQ0(CMN_REG(0074), 0x00),
+       REG_SEQ0(CMN_REG(0075), 0x20),
+       REG_SEQ0(CMN_REG(0076), 0x30),
+       REG_SEQ0(CMN_REG(0077), 0x08),
+       REG_SEQ0(CMN_REG(0078), 0x0c),
+       REG_SEQ0(CMN_REG(0079), 0x00),
+       REG_SEQ0(CMN_REG(007b), 0x00),
+       REG_SEQ0(CMN_REG(007c), 0x00),
+       REG_SEQ0(CMN_REG(007d), 0x00),
+       REG_SEQ0(CMN_REG(007e), 0x00),
+       REG_SEQ0(CMN_REG(007f), 0x00),
+       REG_SEQ0(CMN_REG(0080), 0x00),
+       REG_SEQ0(CMN_REG(0081), 0x09),
+       REG_SEQ0(CMN_REG(0082), 0x04),
+       REG_SEQ0(CMN_REG(0083), 0x24),
+       REG_SEQ0(CMN_REG(0084), 0x20),
+       REG_SEQ0(CMN_REG(0085), 0x03),
+       REG_SEQ0(CMN_REG(0086), 0x01),
+       REG_SEQ0(CMN_REG(0087), 0x0c),
+       REG_SEQ0(CMN_REG(008a), 0x55),
+       REG_SEQ0(CMN_REG(008b), 0x25),
+       REG_SEQ0(CMN_REG(008c), 0x2c),
+       REG_SEQ0(CMN_REG(008d), 0x22),
+       REG_SEQ0(CMN_REG(008e), 0x14),
+       REG_SEQ0(CMN_REG(008f), 0x20),
+       REG_SEQ0(CMN_REG(0090), 0x00),
+       REG_SEQ0(CMN_REG(0091), 0x00),
+       REG_SEQ0(CMN_REG(0092), 0x00),
+       REG_SEQ0(CMN_REG(0093), 0x00),
+       REG_SEQ0(CMN_REG(009a), 0x11),
+       REG_SEQ0(CMN_REG(009b), 0x10),
+};
+
+static const struct reg_sequence rk_hdtpx_tmds_cmn_init_seq[] = {
+       REG_SEQ0(CMN_REG(0008), 0x00),
+       REG_SEQ0(CMN_REG(0011), 0x01),
+       REG_SEQ0(CMN_REG(0017), 0x20),
+       REG_SEQ0(CMN_REG(001e), 0x14),
+       REG_SEQ0(CMN_REG(0020), 0x00),
+       REG_SEQ0(CMN_REG(0021), 0x00),
+       REG_SEQ0(CMN_REG(0022), 0x11),
+       REG_SEQ0(CMN_REG(0023), 0x00),
+       REG_SEQ0(CMN_REG(0024), 0x00),
+       REG_SEQ0(CMN_REG(0025), 0x53),
+       REG_SEQ0(CMN_REG(0026), 0x00),
+       REG_SEQ0(CMN_REG(0027), 0x00),
+       REG_SEQ0(CMN_REG(0028), 0x01),
+       REG_SEQ0(CMN_REG(002a), 0x00),
+       REG_SEQ0(CMN_REG(002b), 0x00),
+       REG_SEQ0(CMN_REG(002c), 0x00),
+       REG_SEQ0(CMN_REG(002d), 0x00),
+       REG_SEQ0(CMN_REG(002e), 0x04),
+       REG_SEQ0(CMN_REG(002f), 0x00),
+       REG_SEQ0(CMN_REG(0030), 0x20),
+       REG_SEQ0(CMN_REG(0031), 0x30),
+       REG_SEQ0(CMN_REG(0032), 0x0b),
+       REG_SEQ0(CMN_REG(0033), 0x23),
+       REG_SEQ0(CMN_REG(0034), 0x00),
+       REG_SEQ0(CMN_REG(003d), 0x40),
+       REG_SEQ0(CMN_REG(0042), 0x78),
+       REG_SEQ0(CMN_REG(004e), 0x34),
+       REG_SEQ0(CMN_REG(005c), 0x25),
+       REG_SEQ0(CMN_REG(005e), 0x4f),
+       REG_SEQ0(CMN_REG(0074), 0x04),
+       REG_SEQ0(CMN_REG(0081), 0x01),
+       REG_SEQ0(CMN_REG(0087), 0x04),
+       REG_SEQ0(CMN_REG(0089), 0x00),
+       REG_SEQ0(CMN_REG(0095), 0x00),
+       REG_SEQ0(CMN_REG(0097), 0x02),
+       REG_SEQ0(CMN_REG(0099), 0x04),
+       REG_SEQ0(CMN_REG(009b), 0x00),
+};
+
+static const struct reg_sequence rk_hdtpx_common_sb_init_seq[] = {
+       REG_SEQ0(SB_REG(0114), 0x00),
+       REG_SEQ0(SB_REG(0115), 0x00),
+       REG_SEQ0(SB_REG(0116), 0x00),
+       REG_SEQ0(SB_REG(0117), 0x00),
+};
+
+static const struct reg_sequence rk_hdtpx_tmds_lntop_highbr_seq[] = {
+       REG_SEQ0(LNTOP_REG(0201), 0x00),
+       REG_SEQ0(LNTOP_REG(0202), 0x00),
+       REG_SEQ0(LNTOP_REG(0203), 0x0f),
+       REG_SEQ0(LNTOP_REG(0204), 0xff),
+       REG_SEQ0(LNTOP_REG(0205), 0xff),
+};
+
+static const struct reg_sequence rk_hdtpx_tmds_lntop_lowbr_seq[] = {
+       REG_SEQ0(LNTOP_REG(0201), 0x07),
+       REG_SEQ0(LNTOP_REG(0202), 0xc1),
+       REG_SEQ0(LNTOP_REG(0203), 0xf0),
+       REG_SEQ0(LNTOP_REG(0204), 0x7c),
+       REG_SEQ0(LNTOP_REG(0205), 0x1f),
+};
+
+static const struct reg_sequence rk_hdtpx_common_lane_init_seq[] = {
+       REG_SEQ0(LANE_REG(0303), 0x0c),
+       REG_SEQ0(LANE_REG(0307), 0x20),
+       REG_SEQ0(LANE_REG(030a), 0x17),
+       REG_SEQ0(LANE_REG(030b), 0x77),
+       REG_SEQ0(LANE_REG(030c), 0x77),
+       REG_SEQ0(LANE_REG(030d), 0x77),
+       REG_SEQ0(LANE_REG(030e), 0x38),
+       REG_SEQ0(LANE_REG(0310), 0x03),
+       REG_SEQ0(LANE_REG(0311), 0x0f),
+       REG_SEQ0(LANE_REG(0316), 0x02),
+       REG_SEQ0(LANE_REG(031b), 0x01),
+       REG_SEQ0(LANE_REG(031f), 0x15),
+       REG_SEQ0(LANE_REG(0320), 0xa0),
+       REG_SEQ0(LANE_REG(0403), 0x0c),
+       REG_SEQ0(LANE_REG(0407), 0x20),
+       REG_SEQ0(LANE_REG(040a), 0x17),
+       REG_SEQ0(LANE_REG(040b), 0x77),
+       REG_SEQ0(LANE_REG(040c), 0x77),
+       REG_SEQ0(LANE_REG(040d), 0x77),
+       REG_SEQ0(LANE_REG(040e), 0x38),
+       REG_SEQ0(LANE_REG(0410), 0x03),
+       REG_SEQ0(LANE_REG(0411), 0x0f),
+       REG_SEQ0(LANE_REG(0416), 0x02),
+       REG_SEQ0(LANE_REG(041b), 0x01),
+       REG_SEQ0(LANE_REG(041f), 0x15),
+       REG_SEQ0(LANE_REG(0420), 0xa0),
+       REG_SEQ0(LANE_REG(0503), 0x0c),
+       REG_SEQ0(LANE_REG(0507), 0x20),
+       REG_SEQ0(LANE_REG(050a), 0x17),
+       REG_SEQ0(LANE_REG(050b), 0x77),
+       REG_SEQ0(LANE_REG(050c), 0x77),
+       REG_SEQ0(LANE_REG(050d), 0x77),
+       REG_SEQ0(LANE_REG(050e), 0x38),
+       REG_SEQ0(LANE_REG(0510), 0x03),
+       REG_SEQ0(LANE_REG(0511), 0x0f),
+       REG_SEQ0(LANE_REG(0516), 0x02),
+       REG_SEQ0(LANE_REG(051b), 0x01),
+       REG_SEQ0(LANE_REG(051f), 0x15),
+       REG_SEQ0(LANE_REG(0520), 0xa0),
+       REG_SEQ0(LANE_REG(0603), 0x0c),
+       REG_SEQ0(LANE_REG(0607), 0x20),
+       REG_SEQ0(LANE_REG(060a), 0x17),
+       REG_SEQ0(LANE_REG(060b), 0x77),
+       REG_SEQ0(LANE_REG(060c), 0x77),
+       REG_SEQ0(LANE_REG(060d), 0x77),
+       REG_SEQ0(LANE_REG(060e), 0x38),
+       REG_SEQ0(LANE_REG(0610), 0x03),
+       REG_SEQ0(LANE_REG(0611), 0x0f),
+       REG_SEQ0(LANE_REG(0616), 0x02),
+       REG_SEQ0(LANE_REG(061b), 0x01),
+       REG_SEQ0(LANE_REG(061f), 0x15),
+       REG_SEQ0(LANE_REG(0620), 0xa0),
+};
+
+static const struct reg_sequence rk_hdtpx_tmds_lane_init_seq[] = {
+       REG_SEQ0(LANE_REG(0312), 0x00),
+       REG_SEQ0(LANE_REG(031e), 0x00),
+       REG_SEQ0(LANE_REG(0412), 0x00),
+       REG_SEQ0(LANE_REG(041e), 0x00),
+       REG_SEQ0(LANE_REG(0512), 0x00),
+       REG_SEQ0(LANE_REG(051e), 0x00),
+       REG_SEQ0(LANE_REG(0612), 0x00),
+       REG_SEQ0(LANE_REG(061e), 0x08),
+       REG_SEQ0(LANE_REG(0303), 0x2f),
+       REG_SEQ0(LANE_REG(0403), 0x2f),
+       REG_SEQ0(LANE_REG(0503), 0x2f),
+       REG_SEQ0(LANE_REG(0603), 0x2f),
+       REG_SEQ0(LANE_REG(0305), 0x03),
+       REG_SEQ0(LANE_REG(0405), 0x03),
+       REG_SEQ0(LANE_REG(0505), 0x03),
+       REG_SEQ0(LANE_REG(0605), 0x03),
+       REG_SEQ0(LANE_REG(0306), 0x1c),
+       REG_SEQ0(LANE_REG(0406), 0x1c),
+       REG_SEQ0(LANE_REG(0506), 0x1c),
+       REG_SEQ0(LANE_REG(0606), 0x1c),
+};
+
+static bool rk_hdptx_phy_is_rw_reg(struct device *dev, unsigned int reg)
+{
+       switch (reg) {
+       case 0x0000 ... 0x029c:
+       case 0x0400 ... 0x04a4:
+       case 0x0800 ... 0x08a4:
+       case 0x0c00 ... 0x0cb4:
+       case 0x1000 ... 0x10b4:
+       case 0x1400 ... 0x14b4:
+       case 0x1800 ... 0x18b4:
+               return true;
+       }
+
+       return false;
+}
+
+static const struct regmap_config rk_hdptx_phy_regmap_config = {
+       .reg_bits = 32,
+       .reg_stride = 4,
+       .val_bits = 32,
+       .writeable_reg = rk_hdptx_phy_is_rw_reg,
+       .readable_reg = rk_hdptx_phy_is_rw_reg,
+       .fast_io = true,
+       .max_register = 0x18b4,
+};
+
+#define rk_hdptx_multi_reg_write(hdptx, seq) \
+       regmap_multi_reg_write((hdptx)->regmap, seq, ARRAY_SIZE(seq))
+
+static void rk_hdptx_pre_power_up(struct rk_hdptx_phy *hdptx)
+{
+       u32 val;
+
+       reset_control_assert(hdptx->rsts[RST_APB].rstc);
+       usleep_range(20, 25);
+       reset_control_deassert(hdptx->rsts[RST_APB].rstc);
+
+       reset_control_assert(hdptx->rsts[RST_LANE].rstc);
+       reset_control_assert(hdptx->rsts[RST_CMN].rstc);
+       reset_control_assert(hdptx->rsts[RST_INIT].rstc);
+
+       val = (HDPTX_I_PLL_EN | HDPTX_I_BIAS_EN | HDPTX_I_BGR_EN) << 16;
+       regmap_write(hdptx->grf, GRF_HDPTX_CON0, val);
+}
+
+static int rk_hdptx_post_enable_lane(struct rk_hdptx_phy *hdptx)
+{
+       u32 val;
+       int ret;
+
+       reset_control_deassert(hdptx->rsts[RST_LANE].rstc);
+
+       val = (HDPTX_I_BIAS_EN | HDPTX_I_BGR_EN) << 16 |
+              HDPTX_I_BIAS_EN | HDPTX_I_BGR_EN;
+       regmap_write(hdptx->grf, GRF_HDPTX_CON0, val);
+
+       ret = regmap_read_poll_timeout(hdptx->grf, GRF_HDPTX_STATUS, val,
+                                      (val & HDPTX_O_PHY_RDY) &&
+                                      (val & HDPTX_O_PLL_LOCK_DONE),
+                                      100, 5000);
+       if (ret) {
+               dev_err(hdptx->dev, "Failed to get PHY lane lock: %d\n", ret);
+               return ret;
+       }
+
+       dev_dbg(hdptx->dev, "PHY lane locked\n");
+
+       return 0;
+}
+
+static int rk_hdptx_post_enable_pll(struct rk_hdptx_phy *hdptx)
+{
+       u32 val;
+       int ret;
+
+       val = (HDPTX_I_BIAS_EN | HDPTX_I_BGR_EN) << 16 |
+              HDPTX_I_BIAS_EN | HDPTX_I_BGR_EN;
+       regmap_write(hdptx->grf, GRF_HDPTX_CON0, val);
+
+       usleep_range(10, 15);
+       reset_control_deassert(hdptx->rsts[RST_INIT].rstc);
+
+       usleep_range(10, 15);
+       val = HDPTX_I_PLL_EN << 16 | HDPTX_I_PLL_EN;
+       regmap_write(hdptx->grf, GRF_HDPTX_CON0, val);
+
+       usleep_range(10, 15);
+       reset_control_deassert(hdptx->rsts[RST_CMN].rstc);
+
+       ret = regmap_read_poll_timeout(hdptx->grf, GRF_HDPTX_STATUS, val,
+                                      val & HDPTX_O_PHY_CLK_RDY, 20, 400);
+       if (ret) {
+               dev_err(hdptx->dev, "Failed to get PHY clk ready: %d\n", ret);
+               return ret;
+       }
+
+       dev_dbg(hdptx->dev, "PHY clk ready\n");
+
+       return 0;
+}
+
+static void rk_hdptx_phy_disable(struct rk_hdptx_phy *hdptx)
+{
+       u32 val;
+
+       /* reset phy and apb, or phy locked flag may keep 1 */
+       reset_control_assert(hdptx->rsts[RST_PHY].rstc);
+       usleep_range(20, 30);
+       reset_control_deassert(hdptx->rsts[RST_PHY].rstc);
+
+       reset_control_assert(hdptx->rsts[RST_APB].rstc);
+       usleep_range(20, 30);
+       reset_control_deassert(hdptx->rsts[RST_APB].rstc);
+
+       regmap_write(hdptx->regmap, LANE_REG(0300), 0x82);
+       regmap_write(hdptx->regmap, SB_REG(010f), 0xc1);
+       regmap_write(hdptx->regmap, SB_REG(0110), 0x1);
+       regmap_write(hdptx->regmap, LANE_REG(0301), 0x80);
+       regmap_write(hdptx->regmap, LANE_REG(0401), 0x80);
+       regmap_write(hdptx->regmap, LANE_REG(0501), 0x80);
+       regmap_write(hdptx->regmap, LANE_REG(0601), 0x80);
+
+       reset_control_assert(hdptx->rsts[RST_LANE].rstc);
+       reset_control_assert(hdptx->rsts[RST_CMN].rstc);
+       reset_control_assert(hdptx->rsts[RST_INIT].rstc);
+
+       val = (HDPTX_I_PLL_EN | HDPTX_I_BIAS_EN | HDPTX_I_BGR_EN) << 16;
+       regmap_write(hdptx->grf, GRF_HDPTX_CON0, val);
+}
+
+static bool rk_hdptx_phy_clk_pll_calc(unsigned int data_rate,
+                                     struct ropll_config *cfg)
+{
+       const unsigned int fout = data_rate / 2, fref = 24000;
+       unsigned long k = 0, lc, k_sub, lc_sub;
+       unsigned int fvco, sdc;
+       u32 mdiv, sdiv, n = 8;
+
+       if (fout > 0xfffffff)
+               return false;
+
+       for (sdiv = 16; sdiv >= 1; sdiv--) {
+               if (sdiv % 2 && sdiv != 1)
+                       continue;
+
+               fvco = fout * sdiv;
+
+               if (fvco < 2000000 || fvco > 4000000)
+                       continue;
+
+               mdiv = DIV_ROUND_UP(fvco, fref);
+               if (mdiv < 20 || mdiv > 255)
+                       continue;
+
+               if (fref * mdiv - fvco) {
+                       for (sdc = 264000; sdc <= 750000; sdc += fref)
+                               if (sdc * n > fref * mdiv)
+                                       break;
+
+                       if (sdc > 750000)
+                               continue;
+
+                       rational_best_approximation(fref * mdiv - fvco,
+                                                   sdc / 16,
+                                                   GENMASK(6, 0),
+                                                   GENMASK(7, 0),
+                                                   &k, &lc);
+
+                       rational_best_approximation(sdc * n - fref * mdiv,
+                                                   sdc,
+                                                   GENMASK(6, 0),
+                                                   GENMASK(7, 0),
+                                                   &k_sub, &lc_sub);
+               }
+
+               break;
+       }
+
+       if (sdiv < 1)
+               return false;
+
+       if (cfg) {
+               cfg->pms_mdiv = mdiv;
+               cfg->pms_mdiv_afc = mdiv;
+               cfg->pms_pdiv = 1;
+               cfg->pms_refdiv = 1;
+               cfg->pms_sdiv = sdiv - 1;
+
+               cfg->sdm_en = k > 0 ? 1 : 0;
+               if (cfg->sdm_en) {
+                       cfg->sdm_deno = lc;
+                       cfg->sdm_num_sign = 1;
+                       cfg->sdm_num = k;
+                       cfg->sdc_n = n - 3;
+                       cfg->sdc_num = k_sub;
+                       cfg->sdc_deno = lc_sub;
+               }
+       }
+
+       return true;
+}
+
+static int rk_hdptx_ropll_tmds_cmn_config(struct rk_hdptx_phy *hdptx,
+                                         unsigned int rate)
+{
+       const struct ropll_config *cfg = NULL;
+       struct ropll_config rc = {0};
+       int i;
+
+       for (i = 0; i < ARRAY_SIZE(ropll_tmds_cfg); i++)
+               if (rate == ropll_tmds_cfg[i].bit_rate) {
+                       cfg = &ropll_tmds_cfg[i];
+                       break;
+               }
+
+       if (!cfg) {
+               if (rk_hdptx_phy_clk_pll_calc(rate, &rc)) {
+                       cfg = &rc;
+               } else {
+                       dev_err(hdptx->dev, "%s cannot find pll cfg\n", __func__);
+                       return -EINVAL;
+               }
+       }
+
+       dev_dbg(hdptx->dev, "mdiv=%u, sdiv=%u, sdm_en=%u, k_sign=%u, k=%u, lc=%u\n",
+               cfg->pms_mdiv, cfg->pms_sdiv + 1, cfg->sdm_en,
+               cfg->sdm_num_sign, cfg->sdm_num, cfg->sdm_deno);
+
+       rk_hdptx_pre_power_up(hdptx);
+
+       reset_control_assert(hdptx->rsts[RST_ROPLL].rstc);
+       usleep_range(20, 30);
+       reset_control_deassert(hdptx->rsts[RST_ROPLL].rstc);
+
+       rk_hdptx_multi_reg_write(hdptx, rk_hdtpx_common_cmn_init_seq);
+       rk_hdptx_multi_reg_write(hdptx, rk_hdtpx_tmds_cmn_init_seq);
+
+       regmap_write(hdptx->regmap, CMN_REG(0051), cfg->pms_mdiv);
+       regmap_write(hdptx->regmap, CMN_REG(0055), cfg->pms_mdiv_afc);
+       regmap_write(hdptx->regmap, CMN_REG(0059),
+                    (cfg->pms_pdiv << 4) | cfg->pms_refdiv);
+       regmap_write(hdptx->regmap, CMN_REG(005a), cfg->pms_sdiv << 4);
+
+       regmap_update_bits(hdptx->regmap, CMN_REG(005e), ROPLL_SDM_EN_MASK,
+                          FIELD_PREP(ROPLL_SDM_EN_MASK, cfg->sdm_en));
+       if (!cfg->sdm_en)
+               regmap_update_bits(hdptx->regmap, CMN_REG(005e), 0xf, 0);
+
+       regmap_update_bits(hdptx->regmap, CMN_REG(0064), ROPLL_SDM_NUM_SIGN_RBR_MASK,
+                          FIELD_PREP(ROPLL_SDM_NUM_SIGN_RBR_MASK, cfg->sdm_num_sign));
+
+       regmap_write(hdptx->regmap, CMN_REG(0060), cfg->sdm_deno);
+       regmap_write(hdptx->regmap, CMN_REG(0065), cfg->sdm_num);
+
+       regmap_update_bits(hdptx->regmap, CMN_REG(0069), ROPLL_SDC_N_RBR_MASK,
+                          FIELD_PREP(ROPLL_SDC_N_RBR_MASK, cfg->sdc_n));
+
+       regmap_write(hdptx->regmap, CMN_REG(006c), cfg->sdc_num);
+       regmap_write(hdptx->regmap, CMN_REG(0070), cfg->sdc_deno);
+
+       regmap_update_bits(hdptx->regmap, CMN_REG(0086), PLL_PCG_POSTDIV_SEL_MASK,
+                          FIELD_PREP(PLL_PCG_POSTDIV_SEL_MASK, cfg->pms_sdiv));
+
+       regmap_update_bits(hdptx->regmap, CMN_REG(0086), PLL_PCG_CLK_EN,
+                          PLL_PCG_CLK_EN);
+
+       return rk_hdptx_post_enable_pll(hdptx);
+}
+
+static int rk_hdptx_ropll_tmds_mode_config(struct rk_hdptx_phy *hdptx,
+                                          unsigned int rate)
+{
+       u32 val;
+       int ret;
+
+       ret = regmap_read(hdptx->grf, GRF_HDPTX_STATUS, &val);
+       if (ret)
+               return ret;
+
+       if (!(val & HDPTX_O_PLL_LOCK_DONE)) {
+               ret = rk_hdptx_ropll_tmds_cmn_config(hdptx, rate);
+               if (ret)
+                       return ret;
+       }
+
+       rk_hdptx_multi_reg_write(hdptx, rk_hdtpx_common_sb_init_seq);
+
+       regmap_write(hdptx->regmap, LNTOP_REG(0200), 0x06);
+
+       if (rate >= 3400000) {
+               /* For 1/40 bitrate clk */
+               rk_hdptx_multi_reg_write(hdptx, rk_hdtpx_tmds_lntop_highbr_seq);
+       } else {
+               /* For 1/10 bitrate clk */
+               rk_hdptx_multi_reg_write(hdptx, rk_hdtpx_tmds_lntop_lowbr_seq);
+       }
+
+       regmap_write(hdptx->regmap, LNTOP_REG(0206), 0x07);
+       regmap_write(hdptx->regmap, LNTOP_REG(0207), 0x0f);
+
+       rk_hdptx_multi_reg_write(hdptx, rk_hdtpx_common_lane_init_seq);
+       rk_hdptx_multi_reg_write(hdptx, rk_hdtpx_tmds_lane_init_seq);
+
+       return rk_hdptx_post_enable_lane(hdptx);
+}
+
+static int rk_hdptx_phy_power_on(struct phy *phy)
+{
+       struct rk_hdptx_phy *hdptx = phy_get_drvdata(phy);
+       int ret, bus_width = phy_get_bus_width(hdptx->phy);
+       /*
+        * FIXME: Temporary workaround to pass pixel_clk_rate
+        * from the HDMI bridge driver until phy_configure_opts_hdmi
+        * becomes available in the PHY API.
+        */
+       unsigned int rate = bus_width & 0xfffffff;
+
+       dev_dbg(hdptx->dev, "%s bus_width=%x rate=%u\n",
+               __func__, bus_width, rate);
+
+       ret = pm_runtime_resume_and_get(hdptx->dev);
+       if (ret) {
+               dev_err(hdptx->dev, "Failed to resume phy: %d\n", ret);
+               return ret;
+       }
+
+       ret = rk_hdptx_ropll_tmds_mode_config(hdptx, rate);
+       if (ret)
+               pm_runtime_put(hdptx->dev);
+
+       return ret;
+}
+
+static int rk_hdptx_phy_power_off(struct phy *phy)
+{
+       struct rk_hdptx_phy *hdptx = phy_get_drvdata(phy);
+       u32 val;
+       int ret;
+
+       ret = regmap_read(hdptx->grf, GRF_HDPTX_STATUS, &val);
+       if (ret == 0 && (val & HDPTX_O_PLL_LOCK_DONE))
+               rk_hdptx_phy_disable(hdptx);
+
+       pm_runtime_put(hdptx->dev);
+
+       return ret;
+}
+
+static const struct phy_ops rk_hdptx_phy_ops = {
+       .power_on  = rk_hdptx_phy_power_on,
+       .power_off = rk_hdptx_phy_power_off,
+       .owner     = THIS_MODULE,
+};
+
+static int rk_hdptx_phy_runtime_suspend(struct device *dev)
+{
+       struct rk_hdptx_phy *hdptx = dev_get_drvdata(dev);
+
+       clk_bulk_disable_unprepare(hdptx->nr_clks, hdptx->clks);
+
+       return 0;
+}
+
+static int rk_hdptx_phy_runtime_resume(struct device *dev)
+{
+       struct rk_hdptx_phy *hdptx = dev_get_drvdata(dev);
+       int ret;
+
+       ret = clk_bulk_prepare_enable(hdptx->nr_clks, hdptx->clks);
+       if (ret)
+               dev_err(hdptx->dev, "Failed to enable clocks: %d\n", ret);
+
+       return ret;
+}
+
+static int rk_hdptx_phy_probe(struct platform_device *pdev)
+{
+       struct phy_provider *phy_provider;
+       struct device *dev = &pdev->dev;
+       struct rk_hdptx_phy *hdptx;
+       void __iomem *regs;
+       int ret;
+
+       hdptx = devm_kzalloc(dev, sizeof(*hdptx), GFP_KERNEL);
+       if (!hdptx)
+               return -ENOMEM;
+
+       hdptx->dev = dev;
+
+       regs = devm_platform_ioremap_resource(pdev, 0);
+       if (IS_ERR(regs))
+               return dev_err_probe(dev, PTR_ERR(regs),
+                                    "Failed to ioremap resource\n");
+
+       ret = devm_clk_bulk_get_all(dev, &hdptx->clks);
+       if (ret < 0)
+               return dev_err_probe(dev, ret, "Failed to get clocks\n");
+       if (ret == 0)
+               return dev_err_probe(dev, -EINVAL, "Missing clocks\n");
+
+       hdptx->nr_clks = ret;
+
+       hdptx->regmap = devm_regmap_init_mmio(dev, regs,
+                                             &rk_hdptx_phy_regmap_config);
+       if (IS_ERR(hdptx->regmap))
+               return dev_err_probe(dev, PTR_ERR(hdptx->regmap),
+                                    "Failed to init regmap\n");
+
+       hdptx->rsts[RST_PHY].id = "phy";
+       hdptx->rsts[RST_APB].id = "apb";
+       hdptx->rsts[RST_INIT].id = "init";
+       hdptx->rsts[RST_CMN].id = "cmn";
+       hdptx->rsts[RST_LANE].id = "lane";
+       hdptx->rsts[RST_ROPLL].id = "ropll";
+       hdptx->rsts[RST_LCPLL].id = "lcpll";
+
+       ret = devm_reset_control_bulk_get_exclusive(dev, RST_MAX, hdptx->rsts);
+       if (ret)
+               return dev_err_probe(dev, ret, "Failed to get resets\n");
+
+       hdptx->grf = syscon_regmap_lookup_by_phandle(dev->of_node,
+                                                    "rockchip,grf");
+       if (IS_ERR(hdptx->grf))
+               return dev_err_probe(dev, PTR_ERR(hdptx->grf),
+                                    "Could not get GRF syscon\n");
+
+       hdptx->phy = devm_phy_create(dev, NULL, &rk_hdptx_phy_ops);
+       if (IS_ERR(hdptx->phy))
+               return dev_err_probe(dev, PTR_ERR(hdptx->phy),
+                                    "Failed to create HDMI PHY\n");
+
+       platform_set_drvdata(pdev, hdptx);
+       phy_set_drvdata(hdptx->phy, hdptx);
+       phy_set_bus_width(hdptx->phy, 8);
+
+       ret = devm_pm_runtime_enable(dev);
+       if (ret)
+               return dev_err_probe(dev, ret, "Failed to enable runtime PM\n");
+
+       phy_provider = devm_of_phy_provider_register(dev, of_phy_simple_xlate);
+       if (IS_ERR(phy_provider))
+               return dev_err_probe(dev, PTR_ERR(phy_provider),
+                                    "Failed to register PHY provider\n");
+
+       reset_control_deassert(hdptx->rsts[RST_APB].rstc);
+       reset_control_deassert(hdptx->rsts[RST_CMN].rstc);
+       reset_control_deassert(hdptx->rsts[RST_INIT].rstc);
+
+       return 0;
+}
+
+static const struct dev_pm_ops rk_hdptx_phy_pm_ops = {
+       RUNTIME_PM_OPS(rk_hdptx_phy_runtime_suspend,
+                      rk_hdptx_phy_runtime_resume, NULL)
+};
+
+static const struct of_device_id rk_hdptx_phy_of_match[] = {
+       { .compatible = "rockchip,rk3588-hdptx-phy", },
+       {}
+};
+MODULE_DEVICE_TABLE(of, rk_hdptx_phy_of_match);
+
+static struct platform_driver rk_hdptx_phy_driver = {
+       .probe  = rk_hdptx_phy_probe,
+       .driver = {
+               .name = "rockchip-hdptx-phy",
+               .pm = &rk_hdptx_phy_pm_ops,
+               .of_match_table = rk_hdptx_phy_of_match,
+       },
+};
+module_platform_driver(rk_hdptx_phy_driver);
+
+MODULE_AUTHOR("Algea Cao <algea.cao@rock-chips.com>");
+MODULE_AUTHOR("Cristian Ciocaltea <cristian.ciocaltea@collabora.com>");
+MODULE_DESCRIPTION("Samsung HDMI/eDP Transmitter Combo PHY Driver");
+MODULE_LICENSE("GPL");
index 592d8067e848e280bb6185542c26123059bdbd17..f6756a609a9a0774ecb6e27cf96726891683636c 100644 (file)
@@ -274,7 +274,7 @@ static int exynos_mipi_video_phy_power_off(struct phy *phy)
 }
 
 static struct phy *exynos_mipi_video_phy_xlate(struct device *dev,
-                                       struct of_phandle_args *args)
+                                       const struct of_phandle_args *args)
 {
        struct exynos_mipi_video_phy *state = dev_get_drvdata(dev);
 
index 3f310b28bfff799cbe9d687bc6c1af3cc3941159..04171eed5b16fbd930a1b3a7a65ac96a4de5c12d 100644 (file)
@@ -715,7 +715,7 @@ static int exynos5420_usbdrd_phy_calibrate(struct exynos5_usbdrd_phy *phy_drd)
 }
 
 static struct phy *exynos5_usbdrd_phy_xlate(struct device *dev,
-                                       struct of_phandle_args *args)
+                                       const struct of_phandle_args *args)
 {
        struct exynos5_usbdrd_phy *phy_drd = dev_get_drvdata(dev);
 
index 68a174eca0ba8a01ae22199c7939b7fa26f53794..9de744cd6f39286d2b84f8421a26c6600c1360a0 100644 (file)
@@ -87,7 +87,7 @@ static const struct phy_ops samsung_usb2_phy_ops = {
 };
 
 static struct phy *samsung_usb2_phy_xlate(struct device *dev,
-                                       struct of_phandle_args *args)
+                                       const struct of_phandle_args *args)
 {
        struct samsung_usb2_phy_driver *drv;
 
index 3f2086ed4fe4f9fc90b0bc221dc84ffb606906d4..21c201717d952934e1de893530f418bc92b4476b 100644 (file)
@@ -81,7 +81,7 @@ static int uniphier_u2phy_init(struct phy *phy)
 }
 
 static struct phy *uniphier_u2phy_xlate(struct device *dev,
-                                       struct of_phandle_args *args)
+                                       const struct of_phandle_args *args)
 {
        struct uniphier_u2phy_priv *priv = dev_get_drvdata(dev);
 
index e30305b77f0d168e27e069340c85d670095c7be3..063fc38788ed403894390f022fa1150b2f7c689b 100644 (file)
@@ -1074,7 +1074,7 @@ static int miphy28lp_get_addr(struct miphy28lp_phy *miphy_phy)
 }
 
 static struct phy *miphy28lp_xlate(struct device *dev,
-                                  struct of_phandle_args *args)
+                                  const struct of_phandle_args *args)
 {
        struct miphy28lp_dev *miphy_dev = dev_get_drvdata(dev);
        struct miphy28lp_phy *miphy_phy = NULL;
index 35a9831b51610cad2b0b3fe22b0c2d8aba58bf63..c661ab63505f9b1731c324296fa552dc748d7825 100644 (file)
@@ -183,7 +183,7 @@ static const struct phy_ops spear1310_miphy_ops = {
 };
 
 static struct phy *spear1310_miphy_xlate(struct device *dev,
-                                        struct of_phandle_args *args)
+                                        const struct of_phandle_args *args)
 {
        struct spear1310_miphy_priv *priv = dev_get_drvdata(dev);
 
index 34a1cf21015f59d89de002ae154e01173b25f6dc..85a60d64ebb7dbadda62d530dfb6d0179a11bc2b 100644 (file)
@@ -220,7 +220,7 @@ static SIMPLE_DEV_PM_OPS(spear1340_miphy_pm_ops, spear1340_miphy_suspend,
                         spear1340_miphy_resume);
 
 static struct phy *spear1340_miphy_xlate(struct device *dev,
-                                        struct of_phandle_args *args)
+                                        const struct of_phandle_args *args)
 {
        struct spear1340_miphy_priv *priv = dev_get_drvdata(dev);
 
index d5e7e44000b56ec0fe16703a52e47b05926ac866..9dbe60dcf3190d0e96b0fa915f784d1a91ec7701 100644 (file)
@@ -574,7 +574,7 @@ static void stm32_usbphyc_switch_setup(struct stm32_usbphyc *usbphyc,
 }
 
 static struct phy *stm32_usbphyc_of_xlate(struct device *dev,
-                                         struct of_phandle_args *args)
+                                         const struct of_phandle_args *args)
 {
        struct stm32_usbphyc *usbphyc = dev_get_drvdata(dev);
        struct stm32_usbphyc_phy *usbphyc_phy = NULL;
index 142ebe0247cc00f82080303269835ad9565e584f..0dc86a7740e382f2dada87af3d1964bc92ddb52e 100644 (file)
@@ -22,7 +22,7 @@
 #include "xusb.h"
 
 static struct phy *tegra_xusb_pad_of_xlate(struct device *dev,
-                                          struct of_phandle_args *args)
+                                          const struct of_phandle_args *args)
 {
        struct tegra_xusb_pad *pad = dev_get_drvdata(dev);
        struct phy *phy = NULL;
index 3f1d43e8b7ad1fbcfd3399db0150c59eb6e96ad4..8b3b937de624838c8f57265dbf5ffe14f4b9d6c1 100644 (file)
@@ -495,7 +495,7 @@ static void serdes_am654_release(struct phy *x)
 }
 
 static struct phy *serdes_am654_xlate(struct device *dev,
-                                     struct of_phandle_args *args)
+                                     const struct of_phandle_args *args)
 {
        struct serdes_am654 *am654_phy;
        struct phy *phy;
index b7a9ef3f465440008933fa3c7a20c44599baa150..0fe577f0d6c1e66dbb3803e115f954546cd9687d 100644 (file)
@@ -119,7 +119,7 @@ static const struct phy_ops da8xx_usb20_phy_ops = {
 };
 
 static struct phy *da8xx_usb_phy_of_xlate(struct device *dev,
-                                        struct of_phandle_args *args)
+                                        const struct of_phandle_args *args)
 {
        struct da8xx_usb_phy *d_phy = dev_get_drvdata(dev);
 
index 0f4818adb440022d0d9b21a723e36ca062091268..b30bf740e2e0df81f1c03caa9624e08808147d03 100644 (file)
@@ -297,7 +297,7 @@ static const struct phy_ops phy_gmii_sel_ops = {
 };
 
 static struct phy *phy_gmii_sel_of_xlate(struct device *dev,
-                                        struct of_phandle_args *args)
+                                        const struct of_phandle_args *args)
 {
        struct phy_gmii_sel_priv *priv = dev_get_drvdata(dev);
        int phy_id = args->args[0];
@@ -494,11 +494,35 @@ static int phy_gmii_sel_probe(struct platform_device *pdev)
        return 0;
 }
 
+static int phy_gmii_sel_resume_noirq(struct device *dev)
+{
+       struct phy_gmii_sel_priv *priv = dev_get_drvdata(dev);
+       struct phy_gmii_sel_phy_priv *if_phys = priv->if_phys;
+       int ret, i;
+
+       for (i = 0; i < priv->num_ports; i++) {
+               if (if_phys[i].phy_if_mode) {
+                       ret = phy_gmii_sel_mode(if_phys[i].if_phy,
+                                               PHY_MODE_ETHERNET, if_phys[i].phy_if_mode);
+                       if (ret) {
+                               dev_err(dev, "port%u: restore mode fail %d\n",
+                                       if_phys[i].if_phy->id, ret);
+                               return ret;
+                       }
+               }
+       }
+
+       return 0;
+}
+
+static DEFINE_NOIRQ_DEV_PM_OPS(phy_gmii_sel_pm_ops, NULL, phy_gmii_sel_resume_noirq);
+
 static struct platform_driver phy_gmii_sel_driver = {
        .probe          = phy_gmii_sel_probe,
        .driver         = {
                .name   = "phy-gmii-sel",
                .of_match_table = phy_gmii_sel_id_table,
+               .pm = pm_sleep_ptr(&phy_gmii_sel_pm_ops),
        },
 };
 module_platform_driver(phy_gmii_sel_driver);
index b4881cb344759de87a9ae6fbbb3a10c96fe55833..13cd614e12a1d3c8ad98de07a40f63091409d228 100644 (file)
 #include <linux/property.h>
 #include <linux/workqueue.h>
 
+#define TI_VENDOR_ID           0x0451
+#define TI_DEVICE_TUSB1210     0x1507
+#define TI_DEVICE_TUSB1211     0x1508
+
 #define TUSB1211_POWER_CONTROL                         0x3d
 #define TUSB1211_POWER_CONTROL_SET                     0x3e
 #define TUSB1211_POWER_CONTROL_CLEAR                   0x3f
@@ -52,7 +56,7 @@ enum tusb1210_chg_det_state {
 };
 
 struct tusb1210 {
-       struct ulpi *ulpi;
+       struct device *dev;
        struct phy *phy;
        struct gpio_desc *gpio_reset;
        struct gpio_desc *gpio_cs;
@@ -71,26 +75,27 @@ struct tusb1210 {
 
 static int tusb1210_ulpi_write(struct tusb1210 *tusb, u8 reg, u8 val)
 {
+       struct device *dev = tusb->dev;
        int ret;
 
-       ret = ulpi_write(tusb->ulpi, reg, val);
+       ret = ulpi_write(to_ulpi_dev(dev), reg, val);
        if (ret)
-               dev_err(&tusb->ulpi->dev, "error %d writing val 0x%02x to reg 0x%02x\n",
-                       ret, val, reg);
+               dev_err(dev, "error %d writing val 0x%02x to reg 0x%02x\n", ret, val, reg);
 
        return ret;
 }
 
 static int tusb1210_ulpi_read(struct tusb1210 *tusb, u8 reg, u8 *val)
 {
+       struct device *dev = tusb->dev;
        int ret;
 
-       ret = ulpi_read(tusb->ulpi, reg);
+       ret = ulpi_read(to_ulpi_dev(dev), reg);
        if (ret >= 0) {
                *val = ret;
                ret = 0;
        } else {
-               dev_err(&tusb->ulpi->dev, "error %d reading reg 0x%02x\n", ret, reg);
+               dev_err(dev, "error %d reading reg 0x%02x\n", ret, reg);
        }
 
        return ret;
@@ -178,7 +183,7 @@ static void tusb1210_reset(struct tusb1210 *tusb)
 static void tusb1210_chg_det_set_type(struct tusb1210 *tusb,
                                      enum power_supply_usb_type type)
 {
-       dev_dbg(&tusb->ulpi->dev, "charger type: %d\n", type);
+       dev_dbg(tusb->dev, "charger type: %d\n", type);
        tusb->chg_type = type;
        tusb->chg_det_retries = 0;
        power_supply_changed(tusb->psy);
@@ -189,7 +194,7 @@ static void tusb1210_chg_det_set_state(struct tusb1210 *tusb,
                                       int delay_ms)
 {
        if (delay_ms)
-               dev_dbg(&tusb->ulpi->dev, "chg_det new state %s in %d ms\n",
+               dev_dbg(tusb->dev, "chg_det new state %s in %d ms\n",
                        tusb1210_chg_det_states[new_state], delay_ms);
 
        tusb->chg_det_state = new_state;
@@ -253,7 +258,7 @@ static void tusb1210_chg_det_work(struct work_struct *work)
        int ret;
        u8 val;
 
-       dev_dbg(&tusb->ulpi->dev, "chg_det state %s vbus_present %d\n",
+       dev_dbg(tusb->dev, "chg_det state %s vbus_present %d\n",
                tusb1210_chg_det_states[tusb->chg_det_state], vbus_present);
 
        switch (tusb->chg_det_state) {
@@ -261,9 +266,9 @@ static void tusb1210_chg_det_work(struct work_struct *work)
                tusb->chg_type = POWER_SUPPLY_USB_TYPE_UNKNOWN;
                tusb->chg_det_retries = 0;
                /* Power on USB controller for ulpi_read()/_write() */
-               ret = pm_runtime_resume_and_get(tusb->ulpi->dev.parent);
+               ret = pm_runtime_resume_and_get(tusb->dev->parent);
                if (ret < 0) {
-                       dev_err(&tusb->ulpi->dev, "error %d runtime-resuming\n", ret);
+                       dev_err(tusb->dev, "error %d runtime-resuming\n", ret);
                        /* Should never happen, skip charger detection */
                        tusb1210_chg_det_set_state(tusb, TUSB1210_CHG_DET_CONNECTED, 0);
                        return;
@@ -332,7 +337,7 @@ static void tusb1210_chg_det_work(struct work_struct *work)
 
                mutex_unlock(&tusb->phy->mutex);
 
-               pm_runtime_put(tusb->ulpi->dev.parent);
+               pm_runtime_put(tusb->dev->parent);
                tusb1210_chg_det_set_state(tusb, TUSB1210_CHG_DET_CONNECTED, 0);
                break;
        case TUSB1210_CHG_DET_CONNECTED:
@@ -428,13 +433,14 @@ static const struct power_supply_desc tusb1210_psy_desc = {
 static void tusb1210_probe_charger_detect(struct tusb1210 *tusb)
 {
        struct power_supply_config psy_cfg = { .drv_data = tusb };
-       struct device *dev = &tusb->ulpi->dev;
+       struct device *dev = tusb->dev;
+       struct ulpi *ulpi = to_ulpi_dev(dev);
        int ret;
 
        if (!device_property_read_bool(dev->parent, "linux,phy_charger_detect"))
                return;
 
-       if (tusb->ulpi->id.product != 0x1508) {
+       if (ulpi->id.product != TI_DEVICE_TUSB1211) {
                dev_err(dev, "error charger detection is only supported on the TUSB1211\n");
                return;
        }
@@ -485,25 +491,24 @@ static const struct phy_ops phy_ops = {
 
 static int tusb1210_probe(struct ulpi *ulpi)
 {
+       struct device *dev = &ulpi->dev;
        struct tusb1210 *tusb;
        u8 val, reg;
        int ret;
 
-       tusb = devm_kzalloc(&ulpi->dev, sizeof(*tusb), GFP_KERNEL);
+       tusb = devm_kzalloc(dev, sizeof(*tusb), GFP_KERNEL);
        if (!tusb)
                return -ENOMEM;
 
-       tusb->ulpi = ulpi;
+       tusb->dev = dev;
 
-       tusb->gpio_reset = devm_gpiod_get_optional(&ulpi->dev, "reset",
-                                                  GPIOD_OUT_LOW);
+       tusb->gpio_reset = devm_gpiod_get_optional(dev, "reset", GPIOD_OUT_LOW);
        if (IS_ERR(tusb->gpio_reset))
                return PTR_ERR(tusb->gpio_reset);
 
        gpiod_set_value_cansleep(tusb->gpio_reset, 1);
 
-       tusb->gpio_cs = devm_gpiod_get_optional(&ulpi->dev, "cs",
-                                               GPIOD_OUT_LOW);
+       tusb->gpio_cs = devm_gpiod_get_optional(dev, "cs", GPIOD_OUT_LOW);
        if (IS_ERR(tusb->gpio_cs))
                return PTR_ERR(tusb->gpio_cs);
 
@@ -519,15 +524,15 @@ static int tusb1210_probe(struct ulpi *ulpi)
                return ret;
 
        /* High speed output drive strength configuration */
-       if (!device_property_read_u8(&ulpi->dev, "ihstx", &val))
+       if (!device_property_read_u8(dev, "ihstx", &val))
                u8p_replace_bits(&reg, val, (u8)TUSB1210_VENDOR_SPECIFIC2_IHSTX_MASK);
 
        /* High speed output impedance configuration */
-       if (!device_property_read_u8(&ulpi->dev, "zhsdrv", &val))
+       if (!device_property_read_u8(dev, "zhsdrv", &val))
                u8p_replace_bits(&reg, val, (u8)TUSB1210_VENDOR_SPECIFIC2_ZHSDRV_MASK);
 
        /* DP/DM swap control */
-       if (!device_property_read_u8(&ulpi->dev, "datapolarity", &val))
+       if (!device_property_read_u8(dev, "datapolarity", &val))
                u8p_replace_bits(&reg, val, (u8)TUSB1210_VENDOR_SPECIFIC2_DP_MASK);
 
        ret = tusb1210_ulpi_write(tusb, TUSB1210_VENDOR_SPECIFIC2, reg);
@@ -561,11 +566,9 @@ static void tusb1210_remove(struct ulpi *ulpi)
        tusb1210_remove_charger_detect(tusb);
 }
 
-#define TI_VENDOR_ID 0x0451
-
 static const struct ulpi_device_id tusb1210_ulpi_id[] = {
-       { TI_VENDOR_ID, 0x1507, },  /* TUSB1210 */
-       { TI_VENDOR_ID, 0x1508, },  /* TUSB1211 */
+       { TI_VENDOR_ID, TI_DEVICE_TUSB1210 },
+       { TI_VENDOR_ID, TI_DEVICE_TUSB1211 },
        { },
 };
 MODULE_DEVICE_TABLE(ulpi, tusb1210_ulpi_id);
index 2559c6594cea2bcdde2a06af52bcc40eebf246d0..f72c5257d712771b6aad09bb766732faffa54fd3 100644 (file)
@@ -768,7 +768,7 @@ static const unsigned int icm_matrix[NUM_LANES][CONTROLLERS_PER_LANE] = {
 
 /* Translate OF phandle and args to PHY instance. */
 static struct phy *xpsgtr_xlate(struct device *dev,
-                               struct of_phandle_args *args)
+                               const struct of_phandle_args *args)
 {
        struct xpsgtr_dev *gtr_dev = dev_get_drvdata(dev);
        struct xpsgtr_phy *gtr_phy;
index 7641848be4defd903691d61932521a07c02fdb79..96ef57a7d385c1abc45c6ea1fb2390c225dec0f6 100644 (file)
@@ -685,7 +685,7 @@ static const struct phy_ops sata_phy_ops = {
 };
 
 static struct phy *tegra_xusb_padctl_xlate(struct device *dev,
-                                          struct of_phandle_args *args)
+                                          const struct of_phandle_args *args)
 {
        struct tegra_xusb_padctl *padctl = dev_get_drvdata(dev);
        unsigned int index = args->args[0];
index 61c745490d714d2952fdb5e65be28397b7a61d6a..5546fb1894913066f6618a856f081c11a7b70e9c 100644 (file)
@@ -1107,7 +1107,7 @@ module_init(compal_init);
 module_exit(compal_cleanup);
 
 MODULE_AUTHOR("Cezary Jackiewicz");
-MODULE_AUTHOR("Roald Frederickx (roald.frederickx@gmail.com)");
+MODULE_AUTHOR("Roald Frederickx <roald.frederickx@gmail.com>");
 MODULE_DESCRIPTION("Compal Laptop Support");
 MODULE_VERSION(DRIVER_VERSION);
 MODULE_LICENSE("GPL");
index fa720967e69bfb68a9e86481e3540428f2a3984f..217630f40c3f8b5cb1a6d3860a18d5640e49c2d5 100644 (file)
@@ -365,7 +365,7 @@ static void __exit oaktrail_cleanup(void)
 module_init(oaktrail_init);
 module_exit(oaktrail_cleanup);
 
-MODULE_AUTHOR("Yin Kangkai (kangkai.yin@intel.com)");
+MODULE_AUTHOR("Yin Kangkai <kangkai.yin@intel.com>");
 MODULE_DESCRIPTION("Intel Oaktrail Platform ACPI Extras");
 MODULE_VERSION(DRIVER_VERSION);
 MODULE_LICENSE("GPL");
index 32981e2ad3b390217bca086980e1f19097dc6596..9d70146fd7420a1f6bb07e42fdc65bf694ed9851 100644 (file)
@@ -6659,6 +6659,6 @@ static void __exit mlxplat_exit(void)
 }
 module_exit(mlxplat_exit);
 
-MODULE_AUTHOR("Vadim Pasternak (vadimp@mellanox.com)");
+MODULE_AUTHOR("Vadim Pasternak <vadimp@mellanox.com>");
 MODULE_DESCRIPTION("Mellanox platform driver");
 MODULE_LICENSE("Dual BSD/GPL");
index 550145f82726e95dd856d12a4f7d660f61a14500..7db0a29b5b8dcd04dd71b328e364688083640311 100644 (file)
@@ -288,7 +288,7 @@ config REGULATOR_CROS_EC
 config REGULATOR_DA903X
        tristate "Dialog Semiconductor DA9030/DA9034 regulators"
        depends on PMIC_DA903X
-       depends on !CC_IS_CLANG # https://bugs.llvm.org/show_bug.cgi?id=38789
+       depends on !CC_IS_CLANG # https://llvm.org/pr38789
        help
          Say y here to support the BUCKs and LDOs regulators found on
          Dialog Semiconductor DA9030/DA9034 PMIC.
index 9c8f529b827cb3556e07bb0c3ed7ce51e5873cff..ba66aa6a83c6c1f9605a26a4341211c96f7ba2bb 100644 (file)
@@ -552,6 +552,7 @@ dcssblk_add_store(struct device *dev, struct device_attribute *attr, const char
        int rc, i, j, num_of_segments;
        struct dcssblk_dev_info *dev_info;
        struct segment_info *seg_info, *temp;
+       struct dax_device *dax_dev;
        char *local_buf;
        unsigned long seg_byte_size;
 
@@ -679,13 +680,13 @@ dcssblk_add_store(struct device *dev, struct device_attribute *attr, const char
        if (rc)
                goto put_dev;
 
-       dev_info->dax_dev = alloc_dax(dev_info, &dcssblk_dax_ops);
-       if (IS_ERR(dev_info->dax_dev)) {
-               rc = PTR_ERR(dev_info->dax_dev);
-               dev_info->dax_dev = NULL;
+       dax_dev = alloc_dax(dev_info, &dcssblk_dax_ops);
+       if (IS_ERR(dax_dev)) {
+               rc = PTR_ERR(dax_dev);
                goto put_dev;
        }
-       set_dax_synchronous(dev_info->dax_dev);
+       set_dax_synchronous(dax_dev);
+       dev_info->dax_dev = dax_dev;
        rc = dax_add_host(dev_info->dax_dev, dev_info->gd);
        if (rc)
                goto out_dax;
index 11c428f4c7cf9c9183810479f50aa63466fc8ada..7815e9bea69a1349a5b73a1aa556577cdf72b47c 100644 (file)
@@ -18,6 +18,7 @@
 #include <linux/mm.h>
 #include <linux/mmzone.h>
 #include <linux/memory.h>
+#include <linux/memory_hotplug.h>
 #include <linux/module.h>
 #include <asm/ctlreg.h>
 #include <asm/chpid.h>
@@ -26,6 +27,7 @@
 #include <asm/sclp.h>
 #include <asm/numa.h>
 #include <asm/facility.h>
+#include <asm/page-states.h>
 
 #include "sclp.h"
 
@@ -340,16 +342,38 @@ static int sclp_mem_notifier(struct notifier_block *nb,
                if (contains_standby_increment(start, start + size))
                        rc = -EPERM;
                break;
-       case MEM_ONLINE:
-       case MEM_CANCEL_OFFLINE:
-               break;
-       case MEM_GOING_ONLINE:
+       case MEM_PREPARE_ONLINE:
+               /*
+                * Access the altmap_start_pfn and altmap_nr_pages fields
+                * within the struct memory_notify specifically when dealing
+                * with only MEM_PREPARE_ONLINE/MEM_FINISH_OFFLINE notifiers.
+                *
+                * When altmap is in use, take the specified memory range
+                * online, which includes the altmap.
+                */
+               if (arg->altmap_nr_pages) {
+                       start = PFN_PHYS(arg->altmap_start_pfn);
+                       size += PFN_PHYS(arg->altmap_nr_pages);
+               }
                rc = sclp_mem_change_state(start, size, 1);
+               if (rc || !arg->altmap_nr_pages)
+                       break;
+               /*
+                * Set CMMA state to nodat here, since the struct page memory
+                * at the beginning of the memory block will not go through the
+                * buddy allocator later.
+                */
+               __arch_set_page_nodat((void *)__va(start), arg->altmap_nr_pages);
                break;
-       case MEM_CANCEL_ONLINE:
-               sclp_mem_change_state(start, size, 0);
-               break;
-       case MEM_OFFLINE:
+       case MEM_FINISH_OFFLINE:
+               /*
+                * When altmap is in use, take the specified memory range
+                * offline, which includes the altmap.
+                */
+               if (arg->altmap_nr_pages) {
+                       start = PFN_PHYS(arg->altmap_start_pfn);
+                       size += PFN_PHYS(arg->altmap_nr_pages);
+               }
                sclp_mem_change_state(start, size, 0);
                break;
        default:
@@ -400,7 +424,9 @@ static void __init add_memory_merged(u16 rn)
        if (!size)
                goto skip_add;
        for (addr = start; addr < start + size; addr += block_size)
-               add_memory(0, addr, block_size, MHP_NONE);
+               add_memory(0, addr, block_size,
+                          MACHINE_HAS_EDAT1 ?
+                          MHP_MEMMAP_ON_MEMORY | MHP_OFFLINE_INACCESSIBLE : MHP_NONE);
 skip_add:
        first_rn = rn;
        num = 1;
index 0ff61d00feb19c7d6520d14d07a99aac5a4d3454..8672d225ba77fe88371f744db7f918d0064a2ae4 100644 (file)
@@ -9,7 +9,7 @@
 #include <linux/slab.h>
 #include <linux/timer.h>
 
-MODULE_AUTHOR("(C) 2000 IBM Corp. by Fritz Elfert (felfert@millenux.com)");
+MODULE_AUTHOR("(C) 2000 IBM Corp. by Fritz Elfert <felfert@millenux.com>");
 MODULE_DESCRIPTION("Finite state machine helper functions");
 MODULE_LICENSE("GPL");
 
index 1c76e27d527a7ddd44751fd014b48955e3fbda2a..3192dcb83b86df9912bf36d317278736793fc24c 100644 (file)
@@ -358,9 +358,6 @@ fail:
        return NULL;
 }
 
-extern int bbc_envctrl_init(struct bbc_i2c_bus *bp);
-extern void bbc_envctrl_cleanup(struct bbc_i2c_bus *bp);
-
 static int bbc_i2c_probe(struct platform_device *op)
 {
        struct bbc_i2c_bus *bp;
@@ -385,7 +382,7 @@ static int bbc_i2c_probe(struct platform_device *op)
        return err;
 }
 
-static int bbc_i2c_remove(struct platform_device *op)
+static void bbc_i2c_remove(struct platform_device *op)
 {
        struct bbc_i2c_bus *bp = dev_get_drvdata(&op->dev);
 
@@ -399,8 +396,6 @@ static int bbc_i2c_remove(struct platform_device *op)
                of_iounmap(&op->resource[1], bp->i2c_control_regs, 2);
 
        kfree(bp);
-
-       return 0;
 }
 
 static const struct of_device_id bbc_i2c_match[] = {
@@ -418,7 +413,7 @@ static struct platform_driver bbc_i2c_driver = {
                .of_match_table = bbc_i2c_match,
        },
        .probe          = bbc_i2c_probe,
-       .remove         = bbc_i2c_remove,
+       .remove_new     = bbc_i2c_remove,
 };
 
 module_platform_driver(bbc_i2c_driver);
index 7ffe908c62dcf7087329273b9676bef66b19cbec..6c748836754bf4c846393afaafc8f0225c94c28e 100644 (file)
@@ -82,4 +82,7 @@ extern int bbc_i2c_readb(struct bbc_i2c_client *, unsigned char *byte, int off);
 extern int bbc_i2c_write_buf(struct bbc_i2c_client *, char *buf, int len, int off);
 extern int bbc_i2c_read_buf(struct bbc_i2c_client *, char *buf, int len, int off);
 
+extern int bbc_envctrl_init(struct bbc_i2c_bus *bp);
+extern void bbc_envctrl_cleanup(struct bbc_i2c_bus *bp);
+
 #endif /* _BBC_I2C_H */
index 18e6f84e754f279151069259b9a50a52561b9a88..521cf8affe65a1b4ff100c51b41e87ba79859157 100644 (file)
@@ -229,7 +229,7 @@ out_iounmap:
        goto out;
 }
 
-static int d7s_remove(struct platform_device *op)
+static void d7s_remove(struct platform_device *op)
 {
        struct d7s *p = dev_get_drvdata(&op->dev);
        u8 regs = readb(p->regs);
@@ -245,8 +245,6 @@ static int d7s_remove(struct platform_device *op)
 
        misc_deregister(&d7s_miscdev);
        of_iounmap(&op->resource[0], p->regs, sizeof(u8));
-
-       return 0;
 }
 
 static const struct of_device_id d7s_match[] = {
@@ -263,7 +261,7 @@ static struct platform_driver d7s_driver = {
                .of_match_table = d7s_match,
        },
        .probe          = d7s_probe,
-       .remove         = d7s_remove,
+       .remove_new     = d7s_remove,
 };
 
 module_platform_driver(d7s_driver);
index 3dd7274cb0a3e8522945af59730a7a339cda7a79..491cc6c0b3f91a5441a9027900fce031f119662e 100644 (file)
@@ -1097,7 +1097,7 @@ out_iounmap:
        return err;
 }
 
-static int envctrl_remove(struct platform_device *op)
+static void envctrl_remove(struct platform_device *op)
 {
        int index;
 
@@ -1108,8 +1108,6 @@ static int envctrl_remove(struct platform_device *op)
 
        for (index = 0; index < ENVCTRL_MAX_CPU * 2; index++)
                kfree(i2c_childlist[index].tables);
-
-       return 0;
 }
 
 static const struct of_device_id envctrl_match[] = {
@@ -1127,7 +1125,7 @@ static struct platform_driver envctrl_driver = {
                .of_match_table = envctrl_match,
        },
        .probe          = envctrl_probe,
-       .remove         = envctrl_remove,
+       .remove_new     = envctrl_remove,
 };
 
 module_platform_driver(envctrl_driver);
index ea2d903ba673afeedf9b383ef93a26e3d797ea61..05d37d31c3b8fbf34a52a6f4a62043899a8e2a70 100644 (file)
@@ -187,11 +187,9 @@ static int flash_probe(struct platform_device *op)
        return misc_register(&flash_dev);
 }
 
-static int flash_remove(struct platform_device *op)
+static void flash_remove(struct platform_device *op)
 {
        misc_deregister(&flash_dev);
-
-       return 0;
 }
 
 static const struct of_device_id flash_match[] = {
@@ -208,7 +206,7 @@ static struct platform_driver flash_driver = {
                .of_match_table = flash_match,
        },
        .probe          = flash_probe,
-       .remove         = flash_remove,
+       .remove_new     = flash_remove,
 };
 
 module_platform_driver(flash_driver);
index 30b9751aad302b21a509d85af4f1714f27470042..cc178874c4a66229cbdebad974af6081756c7635 100644 (file)
@@ -33,7 +33,7 @@
 #include <linux/pci.h>
 #endif
 
-MODULE_AUTHOR("Thomas K. Dyas (tdyas@noc.rutgers.edu) and Eddie C. Dost  (ecd@skynet.be)");
+MODULE_AUTHOR("Thomas K. Dyas <tdyas@noc.rutgers.edu> and Eddie C. Dost <ecd@skynet.be>");
 MODULE_DESCRIPTION("OPENPROM Configuration Driver");
 MODULE_LICENSE("GPL");
 MODULE_VERSION("1.0");
index 0660425e3a5a073aa2e19618d23b9c720721b240..cf15a4186d037da4a334bbb49358f02b9ee65486 100644 (file)
@@ -399,7 +399,7 @@ out_free:
        goto out;
 }
 
-static int uctrl_remove(struct platform_device *op)
+static void uctrl_remove(struct platform_device *op)
 {
        struct uctrl_driver *p = dev_get_drvdata(&op->dev);
 
@@ -409,7 +409,6 @@ static int uctrl_remove(struct platform_device *op)
                of_iounmap(&op->resource[0], p->regs, resource_size(&op->resource[0]));
                kfree(p);
        }
-       return 0;
 }
 
 static const struct of_device_id uctrl_match[] = {
@@ -426,7 +425,7 @@ static struct platform_driver uctrl_driver = {
                .of_match_table = uctrl_match,
        },
        .probe          = uctrl_probe,
-       .remove         = uctrl_remove,
+       .remove_new     = uctrl_remove,
 };
 
 
index f925f8664c2c198ed67f9f26da437ba4733594ee..6fb61c88ea119628684ce717cc539de67c353294 100644 (file)
@@ -161,28 +161,28 @@ static ssize_t twa_show_stats(struct device *dev,
        ssize_t len;
 
        spin_lock_irqsave(tw_dev->host->host_lock, flags);
-       len = snprintf(buf, PAGE_SIZE, "3w-9xxx Driver version: %s\n"
-                      "Current commands posted:   %4d\n"
-                      "Max commands posted:       %4d\n"
-                      "Current pending commands:  %4d\n"
-                      "Max pending commands:      %4d\n"
-                      "Last sgl length:           %4d\n"
-                      "Max sgl length:            %4d\n"
-                      "Last sector count:         %4d\n"
-                      "Max sector count:          %4d\n"
-                      "SCSI Host Resets:          %4d\n"
-                      "AEN's:                     %4d\n",
-                      TW_DRIVER_VERSION,
-                      tw_dev->posted_request_count,
-                      tw_dev->max_posted_request_count,
-                      tw_dev->pending_request_count,
-                      tw_dev->max_pending_request_count,
-                      tw_dev->sgl_entries,
-                      tw_dev->max_sgl_entries,
-                      tw_dev->sector_count,
-                      tw_dev->max_sector_count,
-                      tw_dev->num_resets,
-                      tw_dev->aen_count);
+       len = sysfs_emit(buf, "3w-9xxx Driver version: %s\n"
+                        "Current commands posted:   %4d\n"
+                        "Max commands posted:       %4d\n"
+                        "Current pending commands:  %4d\n"
+                        "Max pending commands:      %4d\n"
+                        "Last sgl length:           %4d\n"
+                        "Max sgl length:            %4d\n"
+                        "Last sector count:         %4d\n"
+                        "Max sector count:          %4d\n"
+                        "SCSI Host Resets:          %4d\n"
+                        "AEN's:                     %4d\n",
+                        TW_DRIVER_VERSION,
+                        tw_dev->posted_request_count,
+                        tw_dev->max_posted_request_count,
+                        tw_dev->pending_request_count,
+                        tw_dev->max_pending_request_count,
+                        tw_dev->sgl_entries,
+                        tw_dev->max_sgl_entries,
+                        tw_dev->sector_count,
+                        tw_dev->max_sector_count,
+                        tw_dev->num_resets,
+                        tw_dev->aen_count);
        spin_unlock_irqrestore(tw_dev->host->host_lock, flags);
        return len;
 } /* End twa_show_stats() */
index 9bdb75dfdcd7c0a35c9880eabf140e145b842a41..caa6713a62a44a72c7cfa5128c01fe54788cf708 100644 (file)
@@ -166,24 +166,24 @@ static ssize_t twl_show_stats(struct device *dev,
        ssize_t len;
 
        spin_lock_irqsave(tw_dev->host->host_lock, flags);
-       len = snprintf(buf, PAGE_SIZE, "3w-sas Driver version: %s\n"
-                      "Current commands posted:   %4d\n"
-                      "Max commands posted:       %4d\n"
-                      "Last sgl length:           %4d\n"
-                      "Max sgl length:            %4d\n"
-                      "Last sector count:         %4d\n"
-                      "Max sector count:          %4d\n"
-                      "SCSI Host Resets:          %4d\n"
-                      "AEN's:                     %4d\n",
-                      TW_DRIVER_VERSION,
-                      tw_dev->posted_request_count,
-                      tw_dev->max_posted_request_count,
-                      tw_dev->sgl_entries,
-                      tw_dev->max_sgl_entries,
-                      tw_dev->sector_count,
-                      tw_dev->max_sector_count,
-                      tw_dev->num_resets,
-                      tw_dev->aen_count);
+       len = sysfs_emit(buf, "3w-sas Driver version: %s\n"
+                        "Current commands posted:   %4d\n"
+                        "Max commands posted:       %4d\n"
+                        "Last sgl length:           %4d\n"
+                        "Max sgl length:            %4d\n"
+                        "Last sector count:         %4d\n"
+                        "Max sector count:          %4d\n"
+                        "SCSI Host Resets:          %4d\n"
+                        "AEN's:                     %4d\n",
+                        TW_DRIVER_VERSION,
+                        tw_dev->posted_request_count,
+                        tw_dev->max_posted_request_count,
+                        tw_dev->sgl_entries,
+                        tw_dev->max_sgl_entries,
+                        tw_dev->sector_count,
+                        tw_dev->max_sector_count,
+                        tw_dev->num_resets,
+                        tw_dev->aen_count);
        spin_unlock_irqrestore(tw_dev->host->host_lock, flags);
        return len;
 } /* End twl_show_stats() */
index f39c9ec2e781095abecd4e5e2b359aae83fbddbd..2c0fb6da0e608d2bfbad1138fb538bde8a481054 100644 (file)
@@ -496,28 +496,28 @@ static ssize_t tw_show_stats(struct device *dev, struct device_attribute *attr,
        ssize_t len;
 
        spin_lock_irqsave(tw_dev->host->host_lock, flags);
-       len = snprintf(buf, PAGE_SIZE, "3w-xxxx Driver version: %s\n"
-                      "Current commands posted:   %4d\n"
-                      "Max commands posted:       %4d\n"
-                      "Current pending commands:  %4d\n"
-                      "Max pending commands:      %4d\n"
-                      "Last sgl length:           %4d\n"
-                      "Max sgl length:            %4d\n"
-                      "Last sector count:         %4d\n"
-                      "Max sector count:          %4d\n"
-                      "SCSI Host Resets:          %4d\n"
-                      "AEN's:                     %4d\n", 
-                      TW_DRIVER_VERSION,
-                      tw_dev->posted_request_count,
-                      tw_dev->max_posted_request_count,
-                      tw_dev->pending_request_count,
-                      tw_dev->max_pending_request_count,
-                      tw_dev->sgl_entries,
-                      tw_dev->max_sgl_entries,
-                      tw_dev->sector_count,
-                      tw_dev->max_sector_count,
-                      tw_dev->num_resets,
-                      tw_dev->aen_count);
+       len = sysfs_emit(buf, "3w-xxxx Driver version: %s\n"
+                        "Current commands posted:   %4d\n"
+                        "Max commands posted:       %4d\n"
+                        "Current pending commands:  %4d\n"
+                        "Max pending commands:      %4d\n"
+                        "Last sgl length:           %4d\n"
+                        "Max sgl length:            %4d\n"
+                        "Last sector count:         %4d\n"
+                        "Max sector count:          %4d\n"
+                        "SCSI Host Resets:          %4d\n"
+                        "AEN's:                     %4d\n",
+                        TW_DRIVER_VERSION,
+                        tw_dev->posted_request_count,
+                        tw_dev->max_posted_request_count,
+                        tw_dev->pending_request_count,
+                        tw_dev->max_pending_request_count,
+                        tw_dev->sgl_entries,
+                        tw_dev->max_sgl_entries,
+                        tw_dev->sector_count,
+                        tw_dev->max_sector_count,
+                        tw_dev->num_resets,
+                        tw_dev->aen_count);
        spin_unlock_irqrestore(tw_dev->host->host_lock, flags);
        return len;
 } /* End tw_show_stats() */
index 857be0f3ae5b9809b4ed048a05bf0868cfc9a4f7..85439e976143b9c619ee7c778ca0ebf89d4024f5 100644 (file)
@@ -2071,7 +2071,7 @@ NCR_700_show_active_tags(struct device *dev, struct device_attribute *attr, char
 {
        struct scsi_device *SDp = to_scsi_device(dev);
 
-       return snprintf(buf, 20, "%d\n", NCR_700_get_depth(SDp));
+       return sysfs_emit(buf, "%d\n", NCR_700_get_depth(SDp));
 }
 
 static struct device_attribute NCR_700_active_tags_attr = {
index 9ce27092729c30a2791b329c117fa9314b268352..8b40f75fc9d7c6b20fa9b5a8664aa26cd49027f3 100644 (file)
@@ -67,6 +67,15 @@ config SCSI_PROC_FS
 
          If unsure say Y.
 
+config SCSI_LIB_KUNIT_TEST
+       tristate "KUnit tests for SCSI Mid Layer's scsi_lib" if !KUNIT_ALL_TESTS
+       depends on KUNIT
+       default KUNIT_ALL_TESTS
+       help
+         Run SCSI Mid Layer's KUnit tests for scsi_lib.
+
+         If unsure say N.
+
 comment "SCSI support type (disk, tape, CD-ROM)"
        depends on SCSI
 
index 70e1cac1975ebedb6608890f756775a0d13b5ffc..b22857c6f3f4f93b92f3e1929a12ffb1084e8b64 100644 (file)
@@ -1099,7 +1099,7 @@ static void get_container_serial_callback(void *context, struct fib * fibptr)
                        sp[0] = INQD_PDT_DA;
                        sp[1] = scsicmd->cmnd[2];
                        sp[2] = 0;
-                       sp[3] = snprintf(sp+4, sizeof(sp)-4, "%08X",
+                       sp[3] = scnprintf(sp+4, sizeof(sp)-4, "%08X",
                                le32_to_cpu(get_serial_reply->uid));
                        scsi_sg_copy_from_buffer(scsicmd, sp,
                                                 sizeof(sp));
@@ -1169,8 +1169,8 @@ static int setinqserial(struct aac_dev *dev, void *data, int cid)
        /*
         *      This breaks array migration.
         */
-       return snprintf((char *)(data), sizeof(struct scsi_inq) - 4, "%08X%02X",
-                       le32_to_cpu(dev->adapter_info.serial[0]), cid);
+       return scnprintf((char *)(data), sizeof(struct scsi_inq) - 4, "%08X%02X",
+                        le32_to_cpu(dev->adapter_info.serial[0]), cid);
 }
 
 static inline void set_sense(struct sense_data *sense_data, u8 sense_key,
index 7bd2ba1ad4d1184feac315d6e2909e861a58f46b..4cb9249e583ccccee2bae5763b0f847c1e3c810e 100644 (file)
@@ -20,7 +20,6 @@
 struct bfa_s;
 
 typedef void (*bfa_isr_func_t) (struct bfa_s *bfa, struct bfi_msg_s *m);
-typedef void (*bfa_cb_cbfn_status_t) (void *cbarg, bfa_status_t status);
 
 /*
  * Interrupt message handlers
@@ -216,8 +215,27 @@ struct bfa_faa_args_s {
        bfa_boolean_t           busy;
 };
 
+/*
+ * IOCFC state machine definitions/declarations
+ */
+enum iocfc_event {
+       IOCFC_E_INIT            = 1,    /* IOCFC init request           */
+       IOCFC_E_START           = 2,    /* IOCFC mod start request      */
+       IOCFC_E_STOP            = 3,    /* IOCFC stop request           */
+       IOCFC_E_ENABLE          = 4,    /* IOCFC enable request         */
+       IOCFC_E_DISABLE         = 5,    /* IOCFC disable request        */
+       IOCFC_E_IOC_ENABLED     = 6,    /* IOC enabled message          */
+       IOCFC_E_IOC_DISABLED    = 7,    /* IOC disabled message         */
+       IOCFC_E_IOC_FAILED      = 8,    /* failure notice by IOC sm     */
+       IOCFC_E_DCONF_DONE      = 9,    /* dconf read/write done        */
+       IOCFC_E_CFG_DONE        = 10,   /* IOCFC config complete        */
+};
+
+struct bfa_iocfc_s;
+typedef void (*bfa_iocfs_fsm_t)(struct bfa_iocfc_s *, enum iocfc_event);
+
 struct bfa_iocfc_s {
-       bfa_fsm_t               fsm;
+       bfa_iocfs_fsm_t         fsm;
        struct bfa_s            *bfa;
        struct bfa_iocfc_cfg_s  cfg;
        u32             req_cq_pi[BFI_IOC_MAX_CQS];
@@ -437,4 +455,12 @@ struct bfa_cb_pending_q_s {
        (__qe)->data = (__data);                                \
 } while (0)
 
+#define bfa_pending_q_init_status(__qe, __cbfn, __cbarg, __data) do {  \
+       bfa_q_qe_init(&((__qe)->hcb_qe.qe));                    \
+       (__qe)->hcb_qe.cbfn_status = (__cbfn);                  \
+       (__qe)->hcb_qe.cbarg = (__cbarg);                       \
+       (__qe)->hcb_qe.pre_rmv = BFA_TRUE;                      \
+       (__qe)->data = (__data);                                \
+} while (0)
+
 #endif /* __BFA_H__ */
index 6846ca8f7313c379b03983f4c532f631bc0b2a40..3438d0b8ba0624020a4c33e29656c0a55ef06070 100644 (file)
@@ -1907,15 +1907,13 @@ bfa_comp_process(struct bfa_s *bfa, struct list_head *comp_q)
        struct list_head                *qe;
        struct list_head                *qen;
        struct bfa_cb_qe_s      *hcb_qe;
-       bfa_cb_cbfn_status_t    cbfn;
 
        list_for_each_safe(qe, qen, comp_q) {
                hcb_qe = (struct bfa_cb_qe_s *) qe;
                if (hcb_qe->pre_rmv) {
                        /* qe is invalid after return, dequeue before cbfn() */
                        list_del(qe);
-                       cbfn = (bfa_cb_cbfn_status_t)(hcb_qe->cbfn);
-                       cbfn(hcb_qe->cbarg, hcb_qe->fw_status);
+                       hcb_qe->cbfn_status(hcb_qe->cbarg, hcb_qe->fw_status);
                } else
                        hcb_qe->cbfn(hcb_qe->cbarg, BFA_TRUE);
        }
index 6b606bf589b4227a6116288b3f4923ebc4d2fef6..6650b1dbb1ed0a9c91d96aef0f86003916c746c7 100644 (file)
@@ -187,10 +187,10 @@ typedef void (*bfa_sm_t)(void *sm, int event);
 #define bfa_sm_state_decl(oc, st, otype, etype)                \
        static void oc ## _sm_ ## st(otype * fsm, etype event)
 
-#define bfa_sm_set_state(_sm, _state)  ((_sm)->sm = (bfa_sm_t)(_state))
+#define bfa_sm_set_state(_sm, _state)  ((_sm)->sm = (_state))
 #define bfa_sm_send_event(_sm, _event) ((_sm)->sm((_sm), (_event)))
 #define bfa_sm_get_state(_sm)          ((_sm)->sm)
-#define bfa_sm_cmp_state(_sm, _state)  ((_sm)->sm == (bfa_sm_t)(_state))
+#define bfa_sm_cmp_state(_sm, _state)  ((_sm)->sm == (_state))
 
 /*
  * For converting from state machine function to state encoding.
@@ -200,7 +200,7 @@ struct bfa_sm_table_s {
        int             state;  /*  state machine encoding      */
        char            *name;  /*  state name for display      */
 };
-#define BFA_SM(_sm)    ((bfa_sm_t)(_sm))
+#define BFA_SM(_sm)    (_sm)
 
 /*
  * State machine with entry actions.
@@ -218,24 +218,13 @@ typedef void (*bfa_fsm_t)(void *fsm, int event);
        static void oc ## _sm_ ## st ## _entry(otype * fsm)
 
 #define bfa_fsm_set_state(_fsm, _state) do {   \
-       (_fsm)->fsm = (bfa_fsm_t)(_state);      \
+       (_fsm)->fsm = (_state);      \
        _state ## _entry(_fsm);      \
 } while (0)
 
 #define bfa_fsm_send_event(_fsm, _event)       ((_fsm)->fsm((_fsm), (_event)))
 #define bfa_fsm_get_state(_fsm)                        ((_fsm)->fsm)
-#define bfa_fsm_cmp_state(_fsm, _state)                \
-       ((_fsm)->fsm == (bfa_fsm_t)(_state))
-
-static inline int
-bfa_sm_to_state(struct bfa_sm_table_s *smt, bfa_sm_t sm)
-{
-       int     i = 0;
-
-       while (smt[i].sm && smt[i].sm != sm)
-               i++;
-       return smt[i].state;
-}
+#define bfa_fsm_cmp_state(_fsm, _state)                ((_fsm)->fsm == (_state))
 
 /*
  * @ Generic wait counter.
index 7ad22288071b130486de2b667f1a6f6dd15b12c0..28ae4dc14dc9cedecea0bbc409be2b7e96cd6f6d 100644 (file)
@@ -64,21 +64,6 @@ enum bfa_ioim_lm_ua_status {
        BFA_IOIM_LM_UA_SET = 1,
 };
 
-/*
- *  itnim state machine event
- */
-enum bfa_itnim_event {
-       BFA_ITNIM_SM_CREATE = 1,        /*  itnim is created */
-       BFA_ITNIM_SM_ONLINE = 2,        /*  itnim is online */
-       BFA_ITNIM_SM_OFFLINE = 3,       /*  itnim is offline */
-       BFA_ITNIM_SM_FWRSP = 4,         /*  firmware response */
-       BFA_ITNIM_SM_DELETE = 5,        /*  deleting an existing itnim */
-       BFA_ITNIM_SM_CLEANUP = 6,       /*  IO cleanup completion */
-       BFA_ITNIM_SM_SLER = 7,          /*  second level error recovery */
-       BFA_ITNIM_SM_HWFAIL = 8,        /*  IOC h/w failure event */
-       BFA_ITNIM_SM_QRESUME = 9,       /*  queue space available */
-};
-
 /*
  *  BFA IOIM related definitions
  */
@@ -98,30 +83,6 @@ enum bfa_itnim_event {
                (__fcpim)->profile_start(__ioim);                       \
 } while (0)
 
-/*
- * IO state machine events
- */
-enum bfa_ioim_event {
-       BFA_IOIM_SM_START       = 1,    /*  io start request from host */
-       BFA_IOIM_SM_COMP_GOOD   = 2,    /*  io good comp, resource free */
-       BFA_IOIM_SM_COMP        = 3,    /*  io comp, resource is free */
-       BFA_IOIM_SM_COMP_UTAG   = 4,    /*  io comp, resource is free */
-       BFA_IOIM_SM_DONE        = 5,    /*  io comp, resource not free */
-       BFA_IOIM_SM_FREE        = 6,    /*  io resource is freed */
-       BFA_IOIM_SM_ABORT       = 7,    /*  abort request from scsi stack */
-       BFA_IOIM_SM_ABORT_COMP  = 8,    /*  abort from f/w */
-       BFA_IOIM_SM_ABORT_DONE  = 9,    /*  abort completion from f/w */
-       BFA_IOIM_SM_QRESUME     = 10,   /*  CQ space available to queue IO */
-       BFA_IOIM_SM_SGALLOCED   = 11,   /*  SG page allocation successful */
-       BFA_IOIM_SM_SQRETRY     = 12,   /*  sequence recovery retry */
-       BFA_IOIM_SM_HCB         = 13,   /*  bfa callback complete */
-       BFA_IOIM_SM_CLEANUP     = 14,   /*  IO cleanup from itnim */
-       BFA_IOIM_SM_TMSTART     = 15,   /*  IO cleanup from tskim */
-       BFA_IOIM_SM_TMDONE      = 16,   /*  IO cleanup from tskim */
-       BFA_IOIM_SM_HWFAIL      = 17,   /*  IOC h/w failure event */
-       BFA_IOIM_SM_IOTOV       = 18,   /*  ITN offline TOV */
-};
-
 
 /*
  *  BFA TSKIM related definitions
@@ -141,18 +102,6 @@ enum bfa_ioim_event {
 } while (0)
 
 
-enum bfa_tskim_event {
-       BFA_TSKIM_SM_START      = 1,    /*  TM command start            */
-       BFA_TSKIM_SM_DONE       = 2,    /*  TM completion               */
-       BFA_TSKIM_SM_QRESUME    = 3,    /*  resume after qfull          */
-       BFA_TSKIM_SM_HWFAIL     = 5,    /*  IOC h/w failure event       */
-       BFA_TSKIM_SM_HCB        = 6,    /*  BFA callback completion     */
-       BFA_TSKIM_SM_IOS_DONE   = 7,    /*  IO and sub TM completions   */
-       BFA_TSKIM_SM_CLEANUP    = 8,    /*  TM cleanup on ITN offline   */
-       BFA_TSKIM_SM_CLEANUP_DONE = 9,  /*  TM abort completion */
-       BFA_TSKIM_SM_UTAG       = 10,   /*  TM completion unknown tag  */
-};
-
 /*
  * forward declaration for BFA ITNIM functions
  */
index 8bf09433549b9a53c88425403b2e0a65b138d1b5..4499f84c2d8121ea3bbf218c43e8a3ed9df8c6d2 100644 (file)
@@ -154,12 +154,39 @@ struct bfa_fcp_mod_s {
        int                     throttle_update_required;
 };
 
+/*
+ * IO state machine events
+ */
+enum bfa_ioim_event {
+       BFA_IOIM_SM_START       = 1,    /*  io start request from host */
+       BFA_IOIM_SM_COMP_GOOD   = 2,    /*  io good comp, resource free */
+       BFA_IOIM_SM_COMP        = 3,    /*  io comp, resource is free */
+       BFA_IOIM_SM_COMP_UTAG   = 4,    /*  io comp, resource is free */
+       BFA_IOIM_SM_DONE        = 5,    /*  io comp, resource not free */
+       BFA_IOIM_SM_FREE        = 6,    /*  io resource is freed */
+       BFA_IOIM_SM_ABORT       = 7,    /*  abort request from scsi stack */
+       BFA_IOIM_SM_ABORT_COMP  = 8,    /*  abort from f/w */
+       BFA_IOIM_SM_ABORT_DONE  = 9,    /*  abort completion from f/w */
+       BFA_IOIM_SM_QRESUME     = 10,   /*  CQ space available to queue IO */
+       BFA_IOIM_SM_SGALLOCED   = 11,   /*  SG page allocation successful */
+       BFA_IOIM_SM_SQRETRY     = 12,   /*  sequence recovery retry */
+       BFA_IOIM_SM_HCB         = 13,   /*  bfa callback complete */
+       BFA_IOIM_SM_CLEANUP     = 14,   /*  IO cleanup from itnim */
+       BFA_IOIM_SM_TMSTART     = 15,   /*  IO cleanup from tskim */
+       BFA_IOIM_SM_TMDONE      = 16,   /*  IO cleanup from tskim */
+       BFA_IOIM_SM_HWFAIL      = 17,   /*  IOC h/w failure event */
+       BFA_IOIM_SM_IOTOV       = 18,   /*  ITN offline TOV */
+};
+
+struct bfa_ioim_s;
+typedef void (*bfa_ioim_sm_t)(struct bfa_ioim_s *, enum bfa_ioim_event);
+
 /*
  * BFA IO (initiator mode)
  */
 struct bfa_ioim_s {
        struct list_head        qe;             /*  queue elememt       */
-       bfa_sm_t                sm;             /*  BFA ioim state machine */
+       bfa_ioim_sm_t           sm;             /*  BFA ioim state machine */
        struct bfa_s            *bfa;           /*  BFA module  */
        struct bfa_fcpim_s      *fcpim;         /*  parent fcpim module */
        struct bfa_itnim_s      *itnim;         /*  i-t-n nexus for this IO  */
@@ -186,12 +213,27 @@ struct bfa_ioim_sp_s {
        struct bfa_tskim_s      *tskim;         /*  Relevant TM cmd     */
 };
 
+enum bfa_tskim_event {
+       BFA_TSKIM_SM_START      = 1,    /*  TM command start            */
+       BFA_TSKIM_SM_DONE       = 2,    /*  TM completion               */
+       BFA_TSKIM_SM_QRESUME    = 3,    /*  resume after qfull          */
+       BFA_TSKIM_SM_HWFAIL     = 5,    /*  IOC h/w failure event       */
+       BFA_TSKIM_SM_HCB        = 6,    /*  BFA callback completion     */
+       BFA_TSKIM_SM_IOS_DONE   = 7,    /*  IO and sub TM completions   */
+       BFA_TSKIM_SM_CLEANUP    = 8,    /*  TM cleanup on ITN offline   */
+       BFA_TSKIM_SM_CLEANUP_DONE = 9,  /*  TM abort completion */
+       BFA_TSKIM_SM_UTAG       = 10,   /*  TM completion unknown tag  */
+};
+
+struct bfa_tskim_s;
+typedef void (*bfa_tskim_sm_t)(struct bfa_tskim_s *, enum bfa_tskim_event);
+
 /*
  * BFA Task management command (initiator mode)
  */
 struct bfa_tskim_s {
        struct list_head        qe;
-       bfa_sm_t                sm;
+       bfa_tskim_sm_t          sm;
        struct bfa_s            *bfa;   /*  BFA module  */
        struct bfa_fcpim_s      *fcpim; /*  parent fcpim module */
        struct bfa_itnim_s      *itnim; /*  i-t-n nexus for this IO  */
@@ -208,12 +250,30 @@ struct bfa_tskim_s {
        enum bfi_tskim_status   tsk_status;  /*  TM status      */
 };
 
+/*
+ *  itnim state machine event
+ */
+enum bfa_itnim_event {
+       BFA_ITNIM_SM_CREATE = 1,        /*  itnim is created */
+       BFA_ITNIM_SM_ONLINE = 2,        /*  itnim is online */
+       BFA_ITNIM_SM_OFFLINE = 3,       /*  itnim is offline */
+       BFA_ITNIM_SM_FWRSP = 4,         /*  firmware response */
+       BFA_ITNIM_SM_DELETE = 5,        /*  deleting an existing itnim */
+       BFA_ITNIM_SM_CLEANUP = 6,       /*  IO cleanup completion */
+       BFA_ITNIM_SM_SLER = 7,          /*  second level error recovery */
+       BFA_ITNIM_SM_HWFAIL = 8,        /*  IOC h/w failure event */
+       BFA_ITNIM_SM_QRESUME = 9,       /*  queue space available */
+};
+
+struct bfa_itnim_s;
+typedef void (*bfa_itnim_sm_t)(struct bfa_itnim_s *, enum bfa_itnim_event);
+
 /*
  * BFA i-t-n (initiator mode)
  */
 struct bfa_itnim_s {
        struct list_head        qe;     /*  queue element       */
-       bfa_sm_t                sm;     /*  i-t-n im BFA state machine  */
+       bfa_itnim_sm_t          sm;     /*  i-t-n im BFA state machine  */
        struct bfa_s            *bfa;   /*  bfa instance        */
        struct bfa_rport_s      *rport; /*  bfa rport   */
        void                    *ditn;  /*  driver i-t-n structure      */
index c1baf5cd0d3e8470c8d71fca588008a3ef26bc1e..9788354b90da75221307c4fab479772a5700eb5e 100644 (file)
 
 #define BFA_FCS_OS_STR_LEN             64
 
-/*
- *  lps_pvt BFA LPS private functions
- */
-
-enum bfa_lps_event {
-       BFA_LPS_SM_LOGIN        = 1,    /* login request from user      */
-       BFA_LPS_SM_LOGOUT       = 2,    /* logout request from user     */
-       BFA_LPS_SM_FWRSP        = 3,    /* f/w response to login/logout */
-       BFA_LPS_SM_RESUME       = 4,    /* space present in reqq queue  */
-       BFA_LPS_SM_DELETE       = 5,    /* lps delete from user         */
-       BFA_LPS_SM_OFFLINE      = 6,    /* Link is offline              */
-       BFA_LPS_SM_RX_CVL       = 7,    /* Rx clear virtual link        */
-       BFA_LPS_SM_SET_N2N_PID  = 8,    /* Set assigned PID for n2n */
-};
-
-
 /*
  * !!! Only append to the enums defined here to avoid any versioning
  * !!! needed between trace utility and driver version
@@ -59,8 +43,30 @@ struct bfa_fcs_s;
 #define BFA_FCS_PID_IS_WKA(pid)  ((bfa_ntoh3b(pid) > 0xFFF000) ?  1 : 0)
 #define BFA_FCS_MAX_RPORT_LOGINS 1024
 
+/*
+ * VPort NS State Machine events
+ */
+enum vport_ns_event {
+       NSSM_EVENT_PORT_ONLINE = 1,
+       NSSM_EVENT_PORT_OFFLINE = 2,
+       NSSM_EVENT_PLOGI_SENT = 3,
+       NSSM_EVENT_RSP_OK = 4,
+       NSSM_EVENT_RSP_ERROR = 5,
+       NSSM_EVENT_TIMEOUT = 6,
+       NSSM_EVENT_NS_QUERY = 7,
+       NSSM_EVENT_RSPNID_SENT = 8,
+       NSSM_EVENT_RFTID_SENT = 9,
+       NSSM_EVENT_RFFID_SENT = 10,
+       NSSM_EVENT_GIDFT_SENT = 11,
+       NSSM_EVENT_RNNID_SENT = 12,
+       NSSM_EVENT_RSNN_NN_SENT = 13,
+};
+
+struct bfa_fcs_lport_ns_s;
+typedef void (*bfa_fcs_lport_ns_sm_t)(struct bfa_fcs_lport_ns_s *fsm, enum vport_ns_event);
+
 struct bfa_fcs_lport_ns_s {
-       bfa_sm_t        sm;             /*  state machine */
+       bfa_fcs_lport_ns_sm_t sm;       /*  state machine */
        struct bfa_timer_s timer;
        struct bfa_fcs_lport_s *port;   /*  parent port */
        struct bfa_fcxp_s *fcxp;
@@ -69,9 +75,23 @@ struct bfa_fcs_lport_ns_s {
        u8      num_rsnn_nn_retries;
 };
 
+/*
+ * VPort SCN State Machine events
+ */
+enum port_scn_event {
+       SCNSM_EVENT_PORT_ONLINE = 1,
+       SCNSM_EVENT_PORT_OFFLINE = 2,
+       SCNSM_EVENT_RSP_OK = 3,
+       SCNSM_EVENT_RSP_ERROR = 4,
+       SCNSM_EVENT_TIMEOUT = 5,
+       SCNSM_EVENT_SCR_SENT = 6,
+};
+
+struct bfa_fcs_lport_scn_s;
+typedef void (*bfa_fcs_lport_scn_sm_t)(struct bfa_fcs_lport_scn_s *fsm, enum port_scn_event);
 
 struct bfa_fcs_lport_scn_s {
-       bfa_sm_t        sm;             /*  state machine */
+       bfa_fcs_lport_scn_sm_t sm;      /*  state machine */
        struct bfa_timer_s timer;
        struct bfa_fcs_lport_s *port;   /*  parent port */
        struct bfa_fcxp_s *fcxp;
@@ -79,8 +99,25 @@ struct bfa_fcs_lport_scn_s {
 };
 
 
+/*
+ *  FDMI State Machine events
+ */
+enum port_fdmi_event {
+       FDMISM_EVENT_PORT_ONLINE = 1,
+       FDMISM_EVENT_PORT_OFFLINE = 2,
+       FDMISM_EVENT_RSP_OK = 4,
+       FDMISM_EVENT_RSP_ERROR = 5,
+       FDMISM_EVENT_TIMEOUT = 6,
+       FDMISM_EVENT_RHBA_SENT = 7,
+       FDMISM_EVENT_RPRT_SENT = 8,
+       FDMISM_EVENT_RPA_SENT = 9,
+};
+
+struct bfa_fcs_lport_fdmi_s;
+typedef void (*bfa_fcs_lport_fdmi_sm_t)(struct bfa_fcs_lport_fdmi_s *fsm, enum port_fdmi_event);
+
 struct bfa_fcs_lport_fdmi_s {
-       bfa_sm_t        sm;             /*  state machine */
+       bfa_fcs_lport_fdmi_sm_t sm;             /*  state machine */
        struct bfa_timer_s timer;
        struct bfa_fcs_lport_ms_s *ms;  /*  parent ms */
        struct bfa_fcxp_s *fcxp;
@@ -88,10 +125,24 @@ struct bfa_fcs_lport_fdmi_s {
        u8      retry_cnt;      /*  retry count */
        u8      rsvd[3];
 };
+/*
+ *  MS State Machine events
+ */
+enum port_ms_event {
+       MSSM_EVENT_PORT_ONLINE = 1,
+       MSSM_EVENT_PORT_OFFLINE = 2,
+       MSSM_EVENT_RSP_OK = 3,
+       MSSM_EVENT_RSP_ERROR = 4,
+       MSSM_EVENT_TIMEOUT = 5,
+       MSSM_EVENT_FCXP_SENT = 6,
+       MSSM_EVENT_PORT_FABRIC_RSCN = 7
+};
 
+struct bfa_fcs_lport_ms_s;
+typedef void (*bfa_fcs_lport_ms_sm_t)(struct bfa_fcs_lport_ms_s *fsm, enum port_ms_event);
 
 struct bfa_fcs_lport_ms_s {
-       bfa_sm_t        sm;             /*  state machine */
+       bfa_fcs_lport_ms_sm_t        sm;                /*  state machine */
        struct bfa_timer_s timer;
        struct bfa_fcs_lport_s *port;   /*  parent port */
        struct bfa_fcxp_s *fcxp;
@@ -131,10 +182,25 @@ union bfa_fcs_lport_topo_u {
        struct bfa_fcs_lport_n2n_s pn2n;
 };
 
+/*
+ *  fcs_port_sm FCS logical port state machine
+ */
+
+enum bfa_fcs_lport_event {
+       BFA_FCS_PORT_SM_CREATE = 1,
+       BFA_FCS_PORT_SM_ONLINE = 2,
+       BFA_FCS_PORT_SM_OFFLINE = 3,
+       BFA_FCS_PORT_SM_DELETE = 4,
+       BFA_FCS_PORT_SM_DELRPORT = 5,
+       BFA_FCS_PORT_SM_STOP = 6,
+};
+
+struct bfa_fcs_lport_s;
+typedef void (*bfa_fcs_lport_sm_t)(struct bfa_fcs_lport_s *fsm, enum bfa_fcs_lport_event);
 
 struct bfa_fcs_lport_s {
        struct list_head         qe;    /*  used by port/vport */
-       bfa_sm_t               sm;      /*  state machine */
+       bfa_fcs_lport_sm_t       sm;    /*  state machine */
        struct bfa_fcs_fabric_s *fabric;        /*  parent fabric */
        struct bfa_lport_cfg_s  port_cfg;       /*  port configuration */
        struct bfa_timer_s link_timer;  /*  timer for link offline */
@@ -171,10 +237,37 @@ enum bfa_fcs_fabric_type {
        BFA_FCS_FABRIC_LOOP = 3,
 };
 
+/*
+ * Fabric state machine events
+ */
+enum bfa_fcs_fabric_event {
+       BFA_FCS_FABRIC_SM_CREATE        = 1,    /*  create from driver        */
+       BFA_FCS_FABRIC_SM_DELETE        = 2,    /*  delete from driver        */
+       BFA_FCS_FABRIC_SM_LINK_DOWN     = 3,    /*  link down from port      */
+       BFA_FCS_FABRIC_SM_LINK_UP       = 4,    /*  link up from port         */
+       BFA_FCS_FABRIC_SM_CONT_OP       = 5,    /*  flogi/auth continue op   */
+       BFA_FCS_FABRIC_SM_RETRY_OP      = 6,    /*  flogi/auth retry op      */
+       BFA_FCS_FABRIC_SM_NO_FABRIC     = 7,    /*  from flogi/auth           */
+       BFA_FCS_FABRIC_SM_PERF_EVFP     = 8,    /*  from flogi/auth           */
+       BFA_FCS_FABRIC_SM_ISOLATE       = 9,    /*  from EVFP processing     */
+       BFA_FCS_FABRIC_SM_NO_TAGGING    = 10,   /*  no VFT tagging from EVFP */
+       BFA_FCS_FABRIC_SM_DELAYED       = 11,   /*  timeout delay event      */
+       BFA_FCS_FABRIC_SM_AUTH_FAILED   = 12,   /*  auth failed       */
+       BFA_FCS_FABRIC_SM_AUTH_SUCCESS  = 13,   /*  auth successful           */
+       BFA_FCS_FABRIC_SM_DELCOMP       = 14,   /*  all vports deleted event */
+       BFA_FCS_FABRIC_SM_LOOPBACK      = 15,   /*  Received our own FLOGI   */
+       BFA_FCS_FABRIC_SM_START         = 16,   /*  from driver       */
+       BFA_FCS_FABRIC_SM_STOP          = 17,   /*  Stop from driver    */
+       BFA_FCS_FABRIC_SM_STOPCOMP      = 18,   /*  Stop completion     */
+       BFA_FCS_FABRIC_SM_LOGOCOMP      = 19,   /*  FLOGO completion    */
+};
+
+struct bfa_fcs_fabric_s;
+typedef void (*bfa_fcs_fabric_sm_t)(struct bfa_fcs_fabric_s *fsm, enum bfa_fcs_fabric_event);
 
 struct bfa_fcs_fabric_s {
        struct list_head   qe;          /*  queue element */
-       bfa_sm_t         sm;            /*  state machine */
+       bfa_fcs_fabric_sm_t      sm;    /*  state machine */
        struct bfa_fcs_s *fcs;          /*  FCS instance */
        struct bfa_fcs_lport_s  bport;  /*  base logical port */
        enum bfa_fcs_fabric_type fab_type; /*  fabric type */
@@ -344,9 +437,33 @@ void            bfa_fcs_lport_scn_process_rscn(struct bfa_fcs_lport_s *port,
                                              struct fchs_s *rx_frame, u32 len);
 void           bfa_fcs_lport_lip_scn_online(bfa_fcs_lport_t *port);
 
+/*
+ * VPort State Machine events
+ */
+enum bfa_fcs_vport_event {
+       BFA_FCS_VPORT_SM_CREATE = 1,    /*  vport create event */
+       BFA_FCS_VPORT_SM_DELETE = 2,    /*  vport delete event */
+       BFA_FCS_VPORT_SM_START = 3,     /*  vport start request */
+       BFA_FCS_VPORT_SM_STOP = 4,      /*  stop: unsupported */
+       BFA_FCS_VPORT_SM_ONLINE = 5,    /*  fabric online */
+       BFA_FCS_VPORT_SM_OFFLINE = 6,   /*  fabric offline event */
+       BFA_FCS_VPORT_SM_FRMSENT = 7,   /*  fdisc/logo sent events */
+       BFA_FCS_VPORT_SM_RSP_OK = 8,    /*  good response */
+       BFA_FCS_VPORT_SM_RSP_ERROR = 9, /*  error/bad response */
+       BFA_FCS_VPORT_SM_TIMEOUT = 10,  /*  delay timer event */
+       BFA_FCS_VPORT_SM_DELCOMP = 11,  /*  lport delete completion */
+       BFA_FCS_VPORT_SM_RSP_DUP_WWN = 12,      /*  Dup wnn error*/
+       BFA_FCS_VPORT_SM_RSP_FAILED = 13,       /*  non-retryable failure */
+       BFA_FCS_VPORT_SM_STOPCOMP = 14, /* vport delete completion */
+       BFA_FCS_VPORT_SM_FABRIC_MAX = 15, /* max vports on fabric */
+};
+
+struct bfa_fcs_vport_s;
+typedef void (*bfa_fcs_vport_sm_t)(struct bfa_fcs_vport_s *fsm, enum bfa_fcs_vport_event);
+
 struct bfa_fcs_vport_s {
        struct list_head                qe;             /*  queue elem  */
-       bfa_sm_t                sm;             /*  state machine       */
+       bfa_fcs_vport_sm_t              sm;             /*  state machine       */
        bfa_fcs_lport_t         lport;          /*  logical port        */
        struct bfa_timer_s      timer;
        struct bfad_vport_s     *vport_drv;     /*  Driver private      */
@@ -397,9 +514,26 @@ struct bfa_fcs_itnim_s;
 struct bfa_fcs_tin_s;
 struct bfa_fcs_iprp_s;
 
+/*
+ *  fcs_rport_ftrs_sm FCS rport state machine events
+ */
+
+enum rpf_event {
+       RPFSM_EVENT_RPORT_OFFLINE  = 1, /* Rport offline                */
+       RPFSM_EVENT_RPORT_ONLINE   = 2, /* Rport online                 */
+       RPFSM_EVENT_FCXP_SENT      = 3, /* Frame from has been sent     */
+       RPFSM_EVENT_TIMEOUT        = 4, /* Rport SM timeout event       */
+       RPFSM_EVENT_RPSC_COMP      = 5,
+       RPFSM_EVENT_RPSC_FAIL      = 6,
+       RPFSM_EVENT_RPSC_ERROR     = 7,
+};
+
+struct bfa_fcs_rpf_s;
+typedef void (*bfa_fcs_rpf_sm_t)(struct bfa_fcs_rpf_s *, enum rpf_event);
+
 /* Rport Features (RPF) */
 struct bfa_fcs_rpf_s {
-       bfa_sm_t        sm;     /*  state machine */
+       bfa_fcs_rpf_sm_t        sm;     /*  state machine */
        struct bfa_fcs_rport_s *rport;  /*  parent rport */
        struct bfa_timer_s      timer;  /*  general purpose timer */
        struct bfa_fcxp_s       *fcxp;  /*  FCXP needed for discarding */
@@ -414,6 +548,36 @@ struct bfa_fcs_rpf_s {
         */
 };
 
+/*
+ *  fcs_rport_sm FCS rport state machine events
+ */
+enum rport_event {
+       RPSM_EVENT_PLOGI_SEND   = 1,    /*  new rport; start with PLOGI */
+       RPSM_EVENT_PLOGI_RCVD   = 2,    /*  Inbound PLOGI from remote port */
+       RPSM_EVENT_PLOGI_COMP   = 3,    /*  PLOGI completed to rport    */
+       RPSM_EVENT_LOGO_RCVD    = 4,    /*  LOGO from remote device     */
+       RPSM_EVENT_LOGO_IMP     = 5,    /*  implicit logo for SLER      */
+       RPSM_EVENT_FCXP_SENT    = 6,    /*  Frame from has been sent    */
+       RPSM_EVENT_DELETE       = 7,    /*  RPORT delete request        */
+       RPSM_EVENT_FAB_SCN      = 8,    /*  state change notification   */
+       RPSM_EVENT_ACCEPTED     = 9,    /*  Good response from remote device */
+       RPSM_EVENT_FAILED       = 10,   /*  Request to rport failed.    */
+       RPSM_EVENT_TIMEOUT      = 11,   /*  Rport SM timeout event      */
+       RPSM_EVENT_HCB_ONLINE  = 12,    /*  BFA rport online callback   */
+       RPSM_EVENT_HCB_OFFLINE = 13,    /*  BFA rport offline callback  */
+       RPSM_EVENT_FC4_OFFLINE = 14,    /*  FC-4 offline complete       */
+       RPSM_EVENT_ADDRESS_CHANGE = 15, /*  Rport's PID has changed     */
+       RPSM_EVENT_ADDRESS_DISC = 16,   /*  Need to Discover rport's PID */
+       RPSM_EVENT_PRLO_RCVD   = 17,    /*  PRLO from remote device     */
+       RPSM_EVENT_PLOGI_RETRY = 18,    /*  Retry PLOGI continuously */
+       RPSM_EVENT_SCN_OFFLINE = 19,    /* loop scn offline             */
+       RPSM_EVENT_SCN_ONLINE   = 20,   /* loop scn online              */
+       RPSM_EVENT_FC4_FCS_ONLINE = 21, /* FC-4 FCS online complete */
+};
+
+struct bfa_fcs_rport_s;
+typedef void (*bfa_fcs_rport_sm_t)(struct bfa_fcs_rport_s *, enum rport_event);
+
 struct bfa_fcs_rport_s {
        struct list_head        qe;     /*  used by port/vport */
        struct bfa_fcs_lport_s *port;   /*  parent FCS port */
@@ -430,7 +594,7 @@ struct bfa_fcs_rport_s {
        wwn_t   pwwn;   /*  port wwn of rport */
        wwn_t   nwwn;   /*  node wwn of rport */
        struct bfa_rport_symname_s psym_name; /*  port symbolic name  */
-       bfa_sm_t        sm;             /*  state machine */
+       bfa_fcs_rport_sm_t      sm;     /*  state machine */
        struct bfa_timer_s timer;       /*  general purpose timer */
        struct bfa_fcs_itnim_s *itnim;  /*  ITN initiator mode role */
        struct bfa_fcs_tin_s *tin;      /*  ITN initiator mode role */
@@ -487,13 +651,35 @@ void  bfa_fcs_rpf_init(struct bfa_fcs_rport_s *rport);
 void  bfa_fcs_rpf_rport_online(struct bfa_fcs_rport_s *rport);
 void  bfa_fcs_rpf_rport_offline(struct bfa_fcs_rport_s *rport);
 
+/*
+ * fcs_itnim_sm FCS itnim state machine events
+ */
+enum bfa_fcs_itnim_event {
+       BFA_FCS_ITNIM_SM_FCS_ONLINE = 1,        /*  rport online event */
+       BFA_FCS_ITNIM_SM_OFFLINE = 2,   /*  rport offline */
+       BFA_FCS_ITNIM_SM_FRMSENT = 3,   /*  prli frame is sent */
+       BFA_FCS_ITNIM_SM_RSP_OK = 4,    /*  good response */
+       BFA_FCS_ITNIM_SM_RSP_ERROR = 5, /*  error response */
+       BFA_FCS_ITNIM_SM_TIMEOUT = 6,   /*  delay timeout */
+       BFA_FCS_ITNIM_SM_HCB_OFFLINE = 7, /*  BFA online callback */
+       BFA_FCS_ITNIM_SM_HCB_ONLINE = 8, /*  BFA offline callback */
+       BFA_FCS_ITNIM_SM_INITIATOR = 9, /*  rport is initiator */
+       BFA_FCS_ITNIM_SM_DELETE = 10,   /*  delete event from rport */
+       BFA_FCS_ITNIM_SM_PRLO = 11,     /*  delete event from rport */
+       BFA_FCS_ITNIM_SM_RSP_NOT_SUPP = 12, /* cmd not supported rsp */
+       BFA_FCS_ITNIM_SM_HAL_ONLINE = 13, /* bfa rport online event */
+};
+
+struct bfa_fcs_itnim_s;
+typedef void (*bfa_fcs_itnim_sm_t)(struct bfa_fcs_itnim_s *, enum bfa_fcs_itnim_event);
+
 /*
  * forward declarations
  */
 struct bfad_itnim_s;
 
 struct bfa_fcs_itnim_s {
-       bfa_sm_t                sm;             /*  state machine */
+       bfa_fcs_itnim_sm_t      sm;             /*  state machine */
        struct bfa_fcs_rport_s  *rport;         /*  parent remote rport  */
        struct bfad_itnim_s     *itnim_drv;     /*  driver peer instance */
        struct bfa_fcs_s        *fcs;           /*  fcs instance        */
@@ -702,78 +888,6 @@ struct bfa_fcs_s {
  *  fcs_fabric_sm fabric state machine functions
  */
 
-/*
- * Fabric state machine events
- */
-enum bfa_fcs_fabric_event {
-       BFA_FCS_FABRIC_SM_CREATE        = 1,    /*  create from driver        */
-       BFA_FCS_FABRIC_SM_DELETE        = 2,    /*  delete from driver        */
-       BFA_FCS_FABRIC_SM_LINK_DOWN     = 3,    /*  link down from port      */
-       BFA_FCS_FABRIC_SM_LINK_UP       = 4,    /*  link up from port         */
-       BFA_FCS_FABRIC_SM_CONT_OP       = 5,    /*  flogi/auth continue op   */
-       BFA_FCS_FABRIC_SM_RETRY_OP      = 6,    /*  flogi/auth retry op      */
-       BFA_FCS_FABRIC_SM_NO_FABRIC     = 7,    /*  from flogi/auth           */
-       BFA_FCS_FABRIC_SM_PERF_EVFP     = 8,    /*  from flogi/auth           */
-       BFA_FCS_FABRIC_SM_ISOLATE       = 9,    /*  from EVFP processing     */
-       BFA_FCS_FABRIC_SM_NO_TAGGING    = 10,   /*  no VFT tagging from EVFP */
-       BFA_FCS_FABRIC_SM_DELAYED       = 11,   /*  timeout delay event      */
-       BFA_FCS_FABRIC_SM_AUTH_FAILED   = 12,   /*  auth failed       */
-       BFA_FCS_FABRIC_SM_AUTH_SUCCESS  = 13,   /*  auth successful           */
-       BFA_FCS_FABRIC_SM_DELCOMP       = 14,   /*  all vports deleted event */
-       BFA_FCS_FABRIC_SM_LOOPBACK      = 15,   /*  Received our own FLOGI   */
-       BFA_FCS_FABRIC_SM_START         = 16,   /*  from driver       */
-       BFA_FCS_FABRIC_SM_STOP          = 17,   /*  Stop from driver    */
-       BFA_FCS_FABRIC_SM_STOPCOMP      = 18,   /*  Stop completion     */
-       BFA_FCS_FABRIC_SM_LOGOCOMP      = 19,   /*  FLOGO completion    */
-};
-
-/*
- *  fcs_rport_sm FCS rport state machine events
- */
-
-enum rport_event {
-       RPSM_EVENT_PLOGI_SEND   = 1,    /*  new rport; start with PLOGI */
-       RPSM_EVENT_PLOGI_RCVD   = 2,    /*  Inbound PLOGI from remote port */
-       RPSM_EVENT_PLOGI_COMP   = 3,    /*  PLOGI completed to rport    */
-       RPSM_EVENT_LOGO_RCVD    = 4,    /*  LOGO from remote device     */
-       RPSM_EVENT_LOGO_IMP     = 5,    /*  implicit logo for SLER      */
-       RPSM_EVENT_FCXP_SENT    = 6,    /*  Frame from has been sent    */
-       RPSM_EVENT_DELETE       = 7,    /*  RPORT delete request        */
-       RPSM_EVENT_FAB_SCN      = 8,    /*  state change notification   */
-       RPSM_EVENT_ACCEPTED     = 9,    /*  Good response from remote device */
-       RPSM_EVENT_FAILED       = 10,   /*  Request to rport failed.    */
-       RPSM_EVENT_TIMEOUT      = 11,   /*  Rport SM timeout event      */
-       RPSM_EVENT_HCB_ONLINE  = 12,    /*  BFA rport online callback   */
-       RPSM_EVENT_HCB_OFFLINE = 13,    /*  BFA rport offline callback  */
-       RPSM_EVENT_FC4_OFFLINE = 14,    /*  FC-4 offline complete       */
-       RPSM_EVENT_ADDRESS_CHANGE = 15, /*  Rport's PID has changed     */
-       RPSM_EVENT_ADDRESS_DISC = 16,   /*  Need to Discover rport's PID */
-       RPSM_EVENT_PRLO_RCVD   = 17,    /*  PRLO from remote device     */
-       RPSM_EVENT_PLOGI_RETRY = 18,    /*  Retry PLOGI continuously */
-       RPSM_EVENT_SCN_OFFLINE = 19,    /* loop scn offline             */
-       RPSM_EVENT_SCN_ONLINE   = 20,   /* loop scn online              */
-       RPSM_EVENT_FC4_FCS_ONLINE = 21, /* FC-4 FCS online complete */
-};
-
-/*
- * fcs_itnim_sm FCS itnim state machine events
- */
-enum bfa_fcs_itnim_event {
-       BFA_FCS_ITNIM_SM_FCS_ONLINE = 1,        /*  rport online event */
-       BFA_FCS_ITNIM_SM_OFFLINE = 2,   /*  rport offline */
-       BFA_FCS_ITNIM_SM_FRMSENT = 3,   /*  prli frame is sent */
-       BFA_FCS_ITNIM_SM_RSP_OK = 4,    /*  good response */
-       BFA_FCS_ITNIM_SM_RSP_ERROR = 5, /*  error response */
-       BFA_FCS_ITNIM_SM_TIMEOUT = 6,   /*  delay timeout */
-       BFA_FCS_ITNIM_SM_HCB_OFFLINE = 7, /*  BFA online callback */
-       BFA_FCS_ITNIM_SM_HCB_ONLINE = 8, /*  BFA offline callback */
-       BFA_FCS_ITNIM_SM_INITIATOR = 9, /*  rport is initiator */
-       BFA_FCS_ITNIM_SM_DELETE = 10,   /*  delete event from rport */
-       BFA_FCS_ITNIM_SM_PRLO = 11,     /*  delete event from rport */
-       BFA_FCS_ITNIM_SM_RSP_NOT_SUPP = 12, /* cmd not supported rsp */
-       BFA_FCS_ITNIM_SM_HAL_ONLINE = 13, /* bfa rport online event */
-};
-
 /*
  * bfa fcs API functions
  */
@@ -831,9 +945,7 @@ void bfa_fcs_fabric_sm_auth_failed(struct bfa_fcs_fabric_s *fabric,
  */
 
 struct bfad_port_s;
-struct bfad_vf_s;
 struct bfad_vport_s;
-struct bfad_rport_s;
 
 /*
  * lport callbacks
index c7de62baeec99adaf8f8dd28b8292e3566e2e290..40e65ab285040c1bdb6c69e52eb15d7450a93de0 100644 (file)
@@ -16,6 +16,7 @@
 #include "bfa_fcs.h"
 #include "bfa_fcbuild.h"
 #include "bfad_im.h"
+#include "bfa_fcpim.h"
 
 BFA_TRC_FILE(FCS, FCPIM);
 
@@ -52,7 +53,23 @@ static void  bfa_fcs_itnim_sm_hcb_offline(struct bfa_fcs_itnim_s *itnim,
 static void    bfa_fcs_itnim_sm_initiator(struct bfa_fcs_itnim_s *itnim,
                                           enum bfa_fcs_itnim_event event);
 
-static struct bfa_sm_table_s itnim_sm_table[] = {
+struct bfa_fcs_itnim_sm_table_s {
+       bfa_fcs_itnim_sm_t sm;          /*  state machine function      */
+       enum bfa_itnim_state state;     /*  state machine encoding      */
+       char            *name;          /*  state name for display      */
+};
+
+static inline enum bfa_itnim_state
+bfa_fcs_itnim_sm_to_state(struct bfa_fcs_itnim_sm_table_s *smt, bfa_fcs_itnim_sm_t sm)
+{
+       int i = 0;
+
+       while (smt[i].sm && smt[i].sm != sm)
+               i++;
+       return smt[i].state;
+}
+
+static struct bfa_fcs_itnim_sm_table_s itnim_sm_table[] = {
        {BFA_SM(bfa_fcs_itnim_sm_offline), BFA_ITNIM_OFFLINE},
        {BFA_SM(bfa_fcs_itnim_sm_prli_send), BFA_ITNIM_PRLI_SEND},
        {BFA_SM(bfa_fcs_itnim_sm_prli), BFA_ITNIM_PRLI_SENT},
@@ -665,7 +682,7 @@ bfa_status_t
 bfa_fcs_itnim_get_online_state(struct bfa_fcs_itnim_s *itnim)
 {
        bfa_trc(itnim->fcs, itnim->rport->pid);
-       switch (bfa_sm_to_state(itnim_sm_table, itnim->sm)) {
+       switch (bfa_fcs_itnim_sm_to_state(itnim_sm_table, itnim->sm)) {
        case BFA_ITNIM_ONLINE:
        case BFA_ITNIM_INITIATIOR:
                return BFA_STATUS_OK;
@@ -765,7 +782,7 @@ bfa_fcs_itnim_attr_get(struct bfa_fcs_lport_s *port, wwn_t rpwwn,
        if (itnim == NULL)
                return BFA_STATUS_NO_FCPIM_NEXUS;
 
-       attr->state         = bfa_sm_to_state(itnim_sm_table, itnim->sm);
+       attr->state         = bfa_fcs_itnim_sm_to_state(itnim_sm_table, itnim->sm);
        attr->retry         = itnim->seq_rec;
        attr->rec_support   = itnim->rec_support;
        attr->conf_comp     = itnim->conf_comp;
index 008afd81708713a7999bde6ba95d20588ed80b29..966bf6cc6dd90b63119e737c542c3f180bdaeedd 100644 (file)
@@ -103,19 +103,6 @@ static struct {
        },
 };
 
-/*
- *  fcs_port_sm FCS logical port state machine
- */
-
-enum bfa_fcs_lport_event {
-       BFA_FCS_PORT_SM_CREATE = 1,
-       BFA_FCS_PORT_SM_ONLINE = 2,
-       BFA_FCS_PORT_SM_OFFLINE = 3,
-       BFA_FCS_PORT_SM_DELETE = 4,
-       BFA_FCS_PORT_SM_DELRPORT = 5,
-       BFA_FCS_PORT_SM_STOP = 6,
-};
-
 static void     bfa_fcs_lport_sm_uninit(struct bfa_fcs_lport_s *port,
                                        enum bfa_fcs_lport_event event);
 static void     bfa_fcs_lport_sm_init(struct bfa_fcs_lport_s *port,
@@ -1426,20 +1413,6 @@ u32      bfa_fcs_fdmi_convert_speed(enum bfa_port_speed pport_speed);
  *  fcs_fdmi_sm FCS FDMI state machine
  */
 
-/*
- *  FDMI State Machine events
- */
-enum port_fdmi_event {
-       FDMISM_EVENT_PORT_ONLINE = 1,
-       FDMISM_EVENT_PORT_OFFLINE = 2,
-       FDMISM_EVENT_RSP_OK = 4,
-       FDMISM_EVENT_RSP_ERROR = 5,
-       FDMISM_EVENT_TIMEOUT = 6,
-       FDMISM_EVENT_RHBA_SENT = 7,
-       FDMISM_EVENT_RPRT_SENT = 8,
-       FDMISM_EVENT_RPA_SENT = 9,
-};
-
 static void     bfa_fcs_lport_fdmi_sm_offline(struct bfa_fcs_lport_fdmi_s *fdmi,
                                             enum port_fdmi_event event);
 static void     bfa_fcs_lport_fdmi_sm_sending_rhba(
@@ -2863,19 +2836,6 @@ static void     bfa_fcs_lport_ms_gfn_response(void *fcsarg,
  *  fcs_ms_sm FCS MS state machine
  */
 
-/*
- *  MS State Machine events
- */
-enum port_ms_event {
-       MSSM_EVENT_PORT_ONLINE = 1,
-       MSSM_EVENT_PORT_OFFLINE = 2,
-       MSSM_EVENT_RSP_OK = 3,
-       MSSM_EVENT_RSP_ERROR = 4,
-       MSSM_EVENT_TIMEOUT = 5,
-       MSSM_EVENT_FCXP_SENT = 6,
-       MSSM_EVENT_PORT_FABRIC_RSCN = 7
-};
-
 static void     bfa_fcs_lport_ms_sm_offline(struct bfa_fcs_lport_ms_s *ms,
                                           enum port_ms_event event);
 static void     bfa_fcs_lport_ms_sm_plogi_sending(struct bfa_fcs_lport_ms_s *ms,
@@ -3644,25 +3604,6 @@ static void bfa_fcs_lport_ns_boot_target_disc(bfa_fcs_lport_t *port);
  *  fcs_ns_sm FCS nameserver interface state machine
  */
 
-/*
- * VPort NS State Machine events
- */
-enum vport_ns_event {
-       NSSM_EVENT_PORT_ONLINE = 1,
-       NSSM_EVENT_PORT_OFFLINE = 2,
-       NSSM_EVENT_PLOGI_SENT = 3,
-       NSSM_EVENT_RSP_OK = 4,
-       NSSM_EVENT_RSP_ERROR = 5,
-       NSSM_EVENT_TIMEOUT = 6,
-       NSSM_EVENT_NS_QUERY = 7,
-       NSSM_EVENT_RSPNID_SENT = 8,
-       NSSM_EVENT_RFTID_SENT = 9,
-       NSSM_EVENT_RFFID_SENT = 10,
-       NSSM_EVENT_GIDFT_SENT = 11,
-       NSSM_EVENT_RNNID_SENT = 12,
-       NSSM_EVENT_RSNN_NN_SENT = 13,
-};
-
 static void     bfa_fcs_lport_ns_sm_offline(struct bfa_fcs_lport_ns_s *ns,
                                           enum vport_ns_event event);
 static void     bfa_fcs_lport_ns_sm_plogi_sending(struct bfa_fcs_lport_ns_s *ns,
@@ -5239,18 +5180,6 @@ static void     bfa_fcs_lport_scn_timeout(void *arg);
  *  fcs_scm_sm FCS SCN state machine
  */
 
-/*
- * VPort SCN State Machine events
- */
-enum port_scn_event {
-       SCNSM_EVENT_PORT_ONLINE = 1,
-       SCNSM_EVENT_PORT_OFFLINE = 2,
-       SCNSM_EVENT_RSP_OK = 3,
-       SCNSM_EVENT_RSP_ERROR = 4,
-       SCNSM_EVENT_TIMEOUT = 5,
-       SCNSM_EVENT_SCR_SENT = 6,
-};
-
 static void     bfa_fcs_lport_scn_sm_offline(struct bfa_fcs_lport_scn_s *scn,
                                            enum port_scn_event event);
 static void     bfa_fcs_lport_scn_sm_sending_scr(
@@ -5989,27 +5918,6 @@ static void     bfa_fcs_vport_free(struct bfa_fcs_vport_s *vport);
  *  fcs_vport_sm FCS virtual port state machine
  */
 
-/*
- * VPort State Machine events
- */
-enum bfa_fcs_vport_event {
-       BFA_FCS_VPORT_SM_CREATE = 1,    /*  vport create event */
-       BFA_FCS_VPORT_SM_DELETE = 2,    /*  vport delete event */
-       BFA_FCS_VPORT_SM_START = 3,     /*  vport start request */
-       BFA_FCS_VPORT_SM_STOP = 4,      /*  stop: unsupported */
-       BFA_FCS_VPORT_SM_ONLINE = 5,    /*  fabric online */
-       BFA_FCS_VPORT_SM_OFFLINE = 6,   /*  fabric offline event */
-       BFA_FCS_VPORT_SM_FRMSENT = 7,   /*  fdisc/logo sent events */
-       BFA_FCS_VPORT_SM_RSP_OK = 8,    /*  good response */
-       BFA_FCS_VPORT_SM_RSP_ERROR = 9, /*  error/bad response */
-       BFA_FCS_VPORT_SM_TIMEOUT = 10,  /*  delay timer event */
-       BFA_FCS_VPORT_SM_DELCOMP = 11,  /*  lport delete completion */
-       BFA_FCS_VPORT_SM_RSP_DUP_WWN = 12,      /*  Dup wnn error*/
-       BFA_FCS_VPORT_SM_RSP_FAILED = 13,       /*  non-retryable failure */
-       BFA_FCS_VPORT_SM_STOPCOMP = 14, /* vport delete completion */
-       BFA_FCS_VPORT_SM_FABRIC_MAX = 15, /* max vports on fabric */
-};
-
 static void     bfa_fcs_vport_sm_uninit(struct bfa_fcs_vport_s *vport,
                                        enum bfa_fcs_vport_event event);
 static void     bfa_fcs_vport_sm_created(struct bfa_fcs_vport_s *vport,
@@ -6037,7 +5945,23 @@ static void      bfa_fcs_vport_sm_stopping(struct bfa_fcs_vport_s *vport,
 static void    bfa_fcs_vport_sm_logo_for_stop(struct bfa_fcs_vport_s *vport,
                                        enum bfa_fcs_vport_event event);
 
-static struct bfa_sm_table_s  vport_sm_table[] = {
+struct bfa_fcs_vport_sm_table_s {
+       bfa_fcs_vport_sm_t sm;          /*  state machine function      */
+       enum bfa_vport_state state;     /*  state machine encoding      */
+       char            *name;          /*  state name for display      */
+};
+
+static inline enum bfa_vport_state
+bfa_vport_sm_to_state(struct bfa_fcs_vport_sm_table_s *smt, bfa_fcs_vport_sm_t sm)
+{
+       int i = 0;
+
+       while (smt[i].sm && smt[i].sm != sm)
+               i++;
+       return smt[i].state;
+}
+
+static struct bfa_fcs_vport_sm_table_s  vport_sm_table[] = {
        {BFA_SM(bfa_fcs_vport_sm_uninit), BFA_FCS_VPORT_UNINIT},
        {BFA_SM(bfa_fcs_vport_sm_created), BFA_FCS_VPORT_CREATED},
        {BFA_SM(bfa_fcs_vport_sm_offline), BFA_FCS_VPORT_OFFLINE},
@@ -6864,7 +6788,7 @@ bfa_fcs_vport_get_attr(struct bfa_fcs_vport_s *vport,
        memset(attr, 0, sizeof(struct bfa_vport_attr_s));
 
        bfa_fcs_lport_get_attr(&vport->lport, &attr->port_attr);
-       attr->vport_state = bfa_sm_to_state(vport_sm_table, vport->sm);
+       attr->vport_state = bfa_vport_sm_to_state(vport_sm_table, vport->sm);
 }
 
 
index c21aa37b8adbe483c22f56eee3589d3b9cbc7ea9..ce52a9c88ae63b3a20d63aaf8bce522d2616c14f 100644 (file)
@@ -136,7 +136,23 @@ static void        bfa_fcs_rport_sm_fc4_off_delete(struct bfa_fcs_rport_s *rport,
 static void    bfa_fcs_rport_sm_delete_pending(struct bfa_fcs_rport_s *rport,
                                                enum rport_event event);
 
-static struct bfa_sm_table_s rport_sm_table[] = {
+struct bfa_fcs_rport_sm_table_s {
+       bfa_fcs_rport_sm_t sm;          /*  state machine function      */
+       enum bfa_rport_state state;     /*  state machine encoding      */
+       char            *name;          /*  state name for display      */
+};
+
+static inline enum bfa_rport_state
+bfa_rport_sm_to_state(struct bfa_fcs_rport_sm_table_s *smt, bfa_fcs_rport_sm_t sm)
+{
+       int i = 0;
+
+       while (smt[i].sm && smt[i].sm != sm)
+               i++;
+       return smt[i].state;
+}
+
+static struct bfa_fcs_rport_sm_table_s rport_sm_table[] = {
        {BFA_SM(bfa_fcs_rport_sm_uninit), BFA_RPORT_UNINIT},
        {BFA_SM(bfa_fcs_rport_sm_plogi_sending), BFA_RPORT_PLOGI},
        {BFA_SM(bfa_fcs_rport_sm_plogiacc_sending), BFA_RPORT_ONLINE},
@@ -2964,7 +2980,7 @@ bfa_fcs_rport_send_ls_rjt(struct bfa_fcs_rport_s *rport, struct fchs_s *rx_fchs,
 int
 bfa_fcs_rport_get_state(struct bfa_fcs_rport_s *rport)
 {
-       return bfa_sm_to_state(rport_sm_table, rport->sm);
+       return bfa_rport_sm_to_state(rport_sm_table, rport->sm);
 }
 
 
@@ -3107,20 +3123,6 @@ static void     bfa_fcs_rpf_rpsc2_response(void *fcsarg,
 
 static void     bfa_fcs_rpf_timeout(void *arg);
 
-/*
- *  fcs_rport_ftrs_sm FCS rport state machine events
- */
-
-enum rpf_event {
-       RPFSM_EVENT_RPORT_OFFLINE  = 1, /* Rport offline                */
-       RPFSM_EVENT_RPORT_ONLINE   = 2, /* Rport online                 */
-       RPFSM_EVENT_FCXP_SENT      = 3, /* Frame from has been sent     */
-       RPFSM_EVENT_TIMEOUT        = 4, /* Rport SM timeout event       */
-       RPFSM_EVENT_RPSC_COMP      = 5,
-       RPFSM_EVENT_RPSC_FAIL      = 6,
-       RPFSM_EVENT_RPSC_ERROR     = 7,
-};
-
 static void    bfa_fcs_rpf_sm_uninit(struct bfa_fcs_rpf_s *rpf,
                                        enum rpf_event event);
 static void     bfa_fcs_rpf_sm_rpsc_sending(struct bfa_fcs_rpf_s *rpf,
index e1ed1424fddb2621ff09d6a1313a3e87f4c428dd..ea2f107f564cd0469cd64bee0ab2b848a39d74a9 100644 (file)
@@ -114,21 +114,6 @@ static enum bfi_ioc_img_ver_cmp_e bfa_ioc_flash_fwver_cmp(
 /*
  * IOC state machine definitions/declarations
  */
-enum ioc_event {
-       IOC_E_RESET             = 1,    /*  IOC reset request           */
-       IOC_E_ENABLE            = 2,    /*  IOC enable request          */
-       IOC_E_DISABLE           = 3,    /*  IOC disable request */
-       IOC_E_DETACH            = 4,    /*  driver detach cleanup       */
-       IOC_E_ENABLED           = 5,    /*  f/w enabled         */
-       IOC_E_FWRSP_GETATTR     = 6,    /*  IOC get attribute response  */
-       IOC_E_DISABLED          = 7,    /*  f/w disabled                */
-       IOC_E_PFFAILED          = 8,    /*  failure notice by iocpf sm  */
-       IOC_E_HBFAIL            = 9,    /*  heartbeat failure           */
-       IOC_E_HWERROR           = 10,   /*  hardware error interrupt    */
-       IOC_E_TIMEOUT           = 11,   /*  timeout                     */
-       IOC_E_HWFAILED          = 12,   /*  PCI mapping failure notice  */
-};
-
 bfa_fsm_state_decl(bfa_ioc, uninit, struct bfa_ioc_s, enum ioc_event);
 bfa_fsm_state_decl(bfa_ioc, reset, struct bfa_ioc_s, enum ioc_event);
 bfa_fsm_state_decl(bfa_ioc, enabling, struct bfa_ioc_s, enum ioc_event);
@@ -140,7 +125,13 @@ bfa_fsm_state_decl(bfa_ioc, disabling, struct bfa_ioc_s, enum ioc_event);
 bfa_fsm_state_decl(bfa_ioc, disabled, struct bfa_ioc_s, enum ioc_event);
 bfa_fsm_state_decl(bfa_ioc, hwfail, struct bfa_ioc_s, enum ioc_event);
 
-static struct bfa_sm_table_s ioc_sm_table[] = {
+struct bfa_ioc_sm_table {
+       bfa_ioc_sm_t    sm;             /*  state machine function      */
+       enum bfa_ioc_state state;       /*  state machine encoding      */
+       char            *name;          /*  state name for display      */
+};
+
+static struct bfa_ioc_sm_table ioc_sm_table[] = {
        {BFA_SM(bfa_ioc_sm_uninit), BFA_IOC_UNINIT},
        {BFA_SM(bfa_ioc_sm_reset), BFA_IOC_RESET},
        {BFA_SM(bfa_ioc_sm_enabling), BFA_IOC_ENABLING},
@@ -153,6 +144,16 @@ static struct bfa_sm_table_s ioc_sm_table[] = {
        {BFA_SM(bfa_ioc_sm_hwfail), BFA_IOC_HWFAIL},
 };
 
+static inline enum bfa_ioc_state
+bfa_ioc_sm_to_state(struct bfa_ioc_sm_table *smt, bfa_ioc_sm_t sm)
+{
+       int     i = 0;
+
+       while (smt[i].sm && smt[i].sm != sm)
+               i++;
+       return smt[i].state;
+}
+
 /*
  * IOCPF state machine definitions/declarations
  */
@@ -178,24 +179,6 @@ static void bfa_iocpf_timeout(void *ioc_arg);
 static void bfa_iocpf_sem_timeout(void *ioc_arg);
 static void bfa_iocpf_poll_timeout(void *ioc_arg);
 
-/*
- * IOCPF state machine events
- */
-enum iocpf_event {
-       IOCPF_E_ENABLE          = 1,    /*  IOCPF enable request        */
-       IOCPF_E_DISABLE         = 2,    /*  IOCPF disable request       */
-       IOCPF_E_STOP            = 3,    /*  stop on driver detach       */
-       IOCPF_E_FWREADY         = 4,    /*  f/w initialization done     */
-       IOCPF_E_FWRSP_ENABLE    = 5,    /*  enable f/w response */
-       IOCPF_E_FWRSP_DISABLE   = 6,    /*  disable f/w response        */
-       IOCPF_E_FAIL            = 7,    /*  failure notice by ioc sm    */
-       IOCPF_E_INITFAIL        = 8,    /*  init fail notice by ioc sm  */
-       IOCPF_E_GETATTRFAIL     = 9,    /*  init fail notice by ioc sm  */
-       IOCPF_E_SEMLOCKED       = 10,   /*  h/w semaphore is locked     */
-       IOCPF_E_TIMEOUT         = 11,   /*  f/w response timeout        */
-       IOCPF_E_SEM_ERROR       = 12,   /*  h/w sem mapping error       */
-};
-
 /*
  * IOCPF states
  */
@@ -228,7 +211,23 @@ bfa_fsm_state_decl(bfa_iocpf, disabling_sync, struct bfa_iocpf_s,
                                                enum iocpf_event);
 bfa_fsm_state_decl(bfa_iocpf, disabled, struct bfa_iocpf_s, enum iocpf_event);
 
-static struct bfa_sm_table_s iocpf_sm_table[] = {
+struct bfa_iocpf_sm_table {
+       bfa_iocpf_sm_t  sm;             /*  state machine function      */
+       enum bfa_iocpf_state state;     /*  state machine encoding      */
+       char            *name;          /*  state name for display      */
+};
+
+static inline enum bfa_iocpf_state
+bfa_iocpf_sm_to_state(struct bfa_iocpf_sm_table *smt, bfa_iocpf_sm_t sm)
+{
+       int     i = 0;
+
+       while (smt[i].sm && smt[i].sm != sm)
+               i++;
+       return smt[i].state;
+}
+
+static struct bfa_iocpf_sm_table iocpf_sm_table[] = {
        {BFA_SM(bfa_iocpf_sm_reset), BFA_IOCPF_RESET},
        {BFA_SM(bfa_iocpf_sm_fwcheck), BFA_IOCPF_FWMISMATCH},
        {BFA_SM(bfa_iocpf_sm_mismatch), BFA_IOCPF_FWMISMATCH},
@@ -2815,12 +2814,12 @@ enum bfa_ioc_state
 bfa_ioc_get_state(struct bfa_ioc_s *ioc)
 {
        enum bfa_iocpf_state iocpf_st;
-       enum bfa_ioc_state ioc_st = bfa_sm_to_state(ioc_sm_table, ioc->fsm);
+       enum bfa_ioc_state ioc_st = bfa_ioc_sm_to_state(ioc_sm_table, ioc->fsm);
 
        if (ioc_st == BFA_IOC_ENABLING ||
                ioc_st == BFA_IOC_FAIL || ioc_st == BFA_IOC_INITFAIL) {
 
-               iocpf_st = bfa_sm_to_state(iocpf_sm_table, ioc->iocpf.fsm);
+               iocpf_st = bfa_iocpf_sm_to_state(iocpf_sm_table, ioc->iocpf.fsm);
 
                switch (iocpf_st) {
                case BFA_IOCPF_SEMWAIT:
@@ -5805,18 +5804,6 @@ bfa_phy_intr(void *phyarg, struct bfi_mbmsg_s *msg)
        }
 }
 
-/*
- * DCONF state machine events
- */
-enum bfa_dconf_event {
-       BFA_DCONF_SM_INIT               = 1,    /* dconf Init */
-       BFA_DCONF_SM_FLASH_COMP         = 2,    /* read/write to flash */
-       BFA_DCONF_SM_WR                 = 3,    /* binding change, map */
-       BFA_DCONF_SM_TIMEOUT            = 4,    /* Start timer */
-       BFA_DCONF_SM_EXIT               = 5,    /* exit dconf module */
-       BFA_DCONF_SM_IOCDISABLE         = 6,    /* IOC disable event */
-};
-
 /* forward declaration of DCONF state machine */
 static void bfa_dconf_sm_uninit(struct bfa_dconf_mod_s *dconf,
                                enum bfa_dconf_event event);
index 933a1c3890ff50bd5ee9da57d5e4137116b401d2..3ec10503caff92e2307d65ff8c4499f17ee4c5d0 100644 (file)
@@ -260,6 +260,24 @@ struct bfa_ioc_cbfn_s {
 /*
  * IOC event notification mechanism.
  */
+enum ioc_event {
+       IOC_E_RESET             = 1,    /*  IOC reset request           */
+       IOC_E_ENABLE            = 2,    /*  IOC enable request          */
+       IOC_E_DISABLE           = 3,    /*  IOC disable request */
+       IOC_E_DETACH            = 4,    /*  driver detach cleanup       */
+       IOC_E_ENABLED           = 5,    /*  f/w enabled         */
+       IOC_E_FWRSP_GETATTR     = 6,    /*  IOC get attribute response  */
+       IOC_E_DISABLED          = 7,    /*  f/w disabled                */
+       IOC_E_PFFAILED          = 8,    /*  failure notice by iocpf sm  */
+       IOC_E_HBFAIL            = 9,    /*  heartbeat failure           */
+       IOC_E_HWERROR           = 10,   /*  hardware error interrupt    */
+       IOC_E_TIMEOUT           = 11,   /*  timeout                     */
+       IOC_E_HWFAILED          = 12,   /*  PCI mapping failure notice  */
+};
+
+struct bfa_ioc_s;
+typedef void (*bfa_ioc_sm_t)(struct bfa_ioc_s *fsm, enum ioc_event);
+
 enum bfa_ioc_event_e {
        BFA_IOC_E_ENABLED       = 1,
        BFA_IOC_E_DISABLED      = 2,
@@ -282,8 +300,29 @@ struct bfa_ioc_notify_s {
        (__notify)->cbarg = (__cbarg);      \
 } while (0)
 
+/*
+ * IOCPF state machine events
+ */
+enum iocpf_event {
+       IOCPF_E_ENABLE          = 1,    /*  IOCPF enable request        */
+       IOCPF_E_DISABLE         = 2,    /*  IOCPF disable request       */
+       IOCPF_E_STOP            = 3,    /*  stop on driver detach       */
+       IOCPF_E_FWREADY         = 4,    /*  f/w initialization done     */
+       IOCPF_E_FWRSP_ENABLE    = 5,    /*  enable f/w response */
+       IOCPF_E_FWRSP_DISABLE   = 6,    /*  disable f/w response        */
+       IOCPF_E_FAIL            = 7,    /*  failure notice by ioc sm    */
+       IOCPF_E_INITFAIL        = 8,    /*  init fail notice by ioc sm  */
+       IOCPF_E_GETATTRFAIL     = 9,    /*  init fail notice by ioc sm  */
+       IOCPF_E_SEMLOCKED       = 10,   /*  h/w semaphore is locked     */
+       IOCPF_E_TIMEOUT         = 11,   /*  f/w response timeout        */
+       IOCPF_E_SEM_ERROR       = 12,   /*  h/w sem mapping error       */
+};
+
+struct bfa_iocpf_s;
+typedef void (*bfa_iocpf_sm_t)(struct bfa_iocpf_s *fsm, enum iocpf_event);
+
 struct bfa_iocpf_s {
-       bfa_fsm_t               fsm;
+       bfa_iocpf_sm_t          fsm;
        struct bfa_ioc_s        *ioc;
        bfa_boolean_t           fw_mismatch_notified;
        bfa_boolean_t           auto_recover;
@@ -291,7 +330,7 @@ struct bfa_iocpf_s {
 };
 
 struct bfa_ioc_s {
-       bfa_fsm_t               fsm;
+       bfa_ioc_sm_t            fsm;
        struct bfa_s            *bfa;
        struct bfa_pcidev_s     pcidev;
        struct bfa_timer_mod_s  *timer_mod;
@@ -361,36 +400,24 @@ struct bfa_reqq_wait_s {
        void    *cbarg;
 };
 
-typedef void   (*bfa_cb_cbfn_t) (void *cbarg, bfa_boolean_t complete);
+typedef void (*bfa_cb_cbfn_t) (void *cbarg, bfa_boolean_t complete);
+typedef void (*bfa_cb_cbfn_status_t) (void *cbarg, bfa_status_t status);
 
 /*
  * Generic BFA callback element.
  */
 struct bfa_cb_qe_s {
        struct list_head        qe;
-       bfa_cb_cbfn_t   cbfn;
+       union {
+               bfa_cb_cbfn_status_t    cbfn_status;
+               bfa_cb_cbfn_t           cbfn;
+       };
        bfa_boolean_t   once;
        bfa_boolean_t   pre_rmv;        /* set for stack based qe(s) */
        bfa_status_t    fw_status;      /* to access fw status in comp proc */
        void            *cbarg;
 };
 
-/*
- * IOCFC state machine definitions/declarations
- */
-enum iocfc_event {
-       IOCFC_E_INIT            = 1,    /* IOCFC init request           */
-       IOCFC_E_START           = 2,    /* IOCFC mod start request      */
-       IOCFC_E_STOP            = 3,    /* IOCFC stop request           */
-       IOCFC_E_ENABLE          = 4,    /* IOCFC enable request         */
-       IOCFC_E_DISABLE         = 5,    /* IOCFC disable request        */
-       IOCFC_E_IOC_ENABLED     = 6,    /* IOC enabled message          */
-       IOCFC_E_IOC_DISABLED    = 7,    /* IOC disabled message         */
-       IOCFC_E_IOC_FAILED      = 8,    /* failure notice by IOC sm     */
-       IOCFC_E_DCONF_DONE      = 9,    /* dconf read/write done        */
-       IOCFC_E_CFG_DONE        = 10,   /* IOCFC config complete        */
-};
-
 /*
  * ASIC block configurtion related
  */
@@ -775,8 +802,23 @@ struct bfa_dconf_s {
 };
 #pragma pack()
 
+/*
+ * DCONF state machine events
+ */
+enum bfa_dconf_event {
+       BFA_DCONF_SM_INIT               = 1,    /* dconf Init */
+       BFA_DCONF_SM_FLASH_COMP         = 2,    /* read/write to flash */
+       BFA_DCONF_SM_WR                 = 3,    /* binding change, map */
+       BFA_DCONF_SM_TIMEOUT            = 4,    /* Start timer */
+       BFA_DCONF_SM_EXIT               = 5,    /* exit dconf module */
+       BFA_DCONF_SM_IOCDISABLE         = 6,    /* IOC disable event */
+};
+
+struct bfa_dconf_mod_s;
+typedef void (*bfa_dconf_sm_t)(struct bfa_dconf_mod_s *fsm, enum bfa_dconf_event);
+
 struct bfa_dconf_mod_s {
-       bfa_sm_t                sm;
+       bfa_dconf_sm_t          sm;
        u8                      instance;
        bfa_boolean_t           read_data_valid;
        bfa_boolean_t           min_cfg;
index c9745c0b4eee3ad9a8538a67ea7bafa6706b0955..9f33aa303b189609070019687e75abb22b22a956 100644 (file)
@@ -40,36 +40,6 @@ BFA_TRC_FILE(HAL, FCXP);
        ((bfa_fcport_is_disabled(bfa) == BFA_TRUE) || \
        (bfa_ioc_is_disabled(&bfa->ioc) == BFA_TRUE))
 
-/*
- * BFA port state machine events
- */
-enum bfa_fcport_sm_event {
-       BFA_FCPORT_SM_START     = 1,    /*  start port state machine    */
-       BFA_FCPORT_SM_STOP      = 2,    /*  stop port state machine     */
-       BFA_FCPORT_SM_ENABLE    = 3,    /*  enable port         */
-       BFA_FCPORT_SM_DISABLE   = 4,    /*  disable port state machine */
-       BFA_FCPORT_SM_FWRSP     = 5,    /*  firmware enable/disable rsp */
-       BFA_FCPORT_SM_LINKUP    = 6,    /*  firmware linkup event       */
-       BFA_FCPORT_SM_LINKDOWN  = 7,    /*  firmware linkup down        */
-       BFA_FCPORT_SM_QRESUME   = 8,    /*  CQ space available  */
-       BFA_FCPORT_SM_HWFAIL    = 9,    /*  IOC h/w failure             */
-       BFA_FCPORT_SM_DPORTENABLE = 10, /*  enable dport      */
-       BFA_FCPORT_SM_DPORTDISABLE = 11,/*  disable dport     */
-       BFA_FCPORT_SM_FAA_MISCONFIG = 12,       /* FAA misconfiguratin */
-       BFA_FCPORT_SM_DDPORTENABLE  = 13,       /* enable ddport        */
-       BFA_FCPORT_SM_DDPORTDISABLE = 14,       /* disable ddport       */
-};
-
-/*
- * BFA port link notification state machine events
- */
-
-enum bfa_fcport_ln_sm_event {
-       BFA_FCPORT_LN_SM_LINKUP         = 1,    /*  linkup event        */
-       BFA_FCPORT_LN_SM_LINKDOWN       = 2,    /*  linkdown event      */
-       BFA_FCPORT_LN_SM_NOTIFICATION   = 3     /*  done notification   */
-};
-
 /*
  * RPORT related definitions
  */
@@ -201,7 +171,23 @@ static void     bfa_fcport_ln_sm_up_dn_nf(struct bfa_fcport_ln_s *ln,
 static void     bfa_fcport_ln_sm_up_dn_up_nf(struct bfa_fcport_ln_s *ln,
                                        enum bfa_fcport_ln_sm_event event);
 
-static struct bfa_sm_table_s hal_port_sm_table[] = {
+struct bfa_fcport_sm_table_s {
+       bfa_fcport_sm_t sm;             /*  state machine function      */
+       enum bfa_port_states state;     /*  state machine encoding      */
+       char            *name;          /*  state name for display      */
+};
+
+static inline enum bfa_port_states
+bfa_fcport_sm_to_state(struct bfa_fcport_sm_table_s *smt, bfa_fcport_sm_t sm)
+{
+       int i = 0;
+
+       while (smt[i].sm && smt[i].sm != sm)
+               i++;
+       return smt[i].state;
+}
+
+static struct bfa_fcport_sm_table_s hal_port_sm_table[] = {
        {BFA_SM(bfa_fcport_sm_uninit), BFA_PORT_ST_UNINIT},
        {BFA_SM(bfa_fcport_sm_enabling_qwait), BFA_PORT_ST_ENABLING_QWAIT},
        {BFA_SM(bfa_fcport_sm_enabling), BFA_PORT_ST_ENABLING},
@@ -3545,7 +3531,7 @@ bfa_fcport_isr(struct bfa_s *bfa, struct bfi_msg_s *msg)
        fcport->event_arg.i2hmsg = i2hmsg;
 
        bfa_trc(bfa, msg->mhdr.msg_id);
-       bfa_trc(bfa, bfa_sm_to_state(hal_port_sm_table, fcport->sm));
+       bfa_trc(bfa, bfa_fcport_sm_to_state(hal_port_sm_table, fcport->sm));
 
        switch (msg->mhdr.msg_id) {
        case BFI_FCPORT_I2H_ENABLE_RSP:
@@ -3980,7 +3966,7 @@ bfa_fcport_get_attr(struct bfa_s *bfa, struct bfa_port_attr_s *attr)
 
        attr->pport_cfg.path_tov  = bfa_fcpim_path_tov_get(bfa);
        attr->pport_cfg.q_depth  = bfa_fcpim_qdepth_get(bfa);
-       attr->port_state = bfa_sm_to_state(hal_port_sm_table, fcport->sm);
+       attr->port_state = bfa_fcport_sm_to_state(hal_port_sm_table, fcport->sm);
 
        attr->fec_state = fcport->fec_state;
 
@@ -4062,7 +4048,7 @@ bfa_fcport_is_disabled(struct bfa_s *bfa)
 {
        struct bfa_fcport_s *fcport = BFA_FCPORT_MOD(bfa);
 
-       return bfa_sm_to_state(hal_port_sm_table, fcport->sm) ==
+       return bfa_fcport_sm_to_state(hal_port_sm_table, fcport->sm) ==
                BFA_PORT_ST_DISABLED;
 
 }
@@ -4072,7 +4058,7 @@ bfa_fcport_is_dport(struct bfa_s *bfa)
 {
        struct bfa_fcport_s *fcport = BFA_FCPORT_MOD(bfa);
 
-       return (bfa_sm_to_state(hal_port_sm_table, fcport->sm) ==
+       return (bfa_fcport_sm_to_state(hal_port_sm_table, fcport->sm) ==
                BFA_PORT_ST_DPORT);
 }
 
@@ -4081,7 +4067,7 @@ bfa_fcport_is_ddport(struct bfa_s *bfa)
 {
        struct bfa_fcport_s *fcport = BFA_FCPORT_MOD(bfa);
 
-       return (bfa_sm_to_state(hal_port_sm_table, fcport->sm) ==
+       return (bfa_fcport_sm_to_state(hal_port_sm_table, fcport->sm) ==
                BFA_PORT_ST_DDPORT);
 }
 
@@ -5641,20 +5627,6 @@ enum bfa_dport_test_state_e {
        BFA_DPORT_ST_NOTSTART   = 4,    /*!< test not start dport is enabled */
 };
 
-/*
- * BFA DPORT state machine events
- */
-enum bfa_dport_sm_event {
-       BFA_DPORT_SM_ENABLE     = 1,    /* dport enable event         */
-       BFA_DPORT_SM_DISABLE    = 2,    /* dport disable event        */
-       BFA_DPORT_SM_FWRSP      = 3,    /* fw enable/disable rsp      */
-       BFA_DPORT_SM_QRESUME    = 4,    /* CQ space available         */
-       BFA_DPORT_SM_HWFAIL     = 5,    /* IOC h/w failure            */
-       BFA_DPORT_SM_START      = 6,    /* re-start dport test        */
-       BFA_DPORT_SM_REQFAIL    = 7,    /* request failure            */
-       BFA_DPORT_SM_SCN        = 8,    /* state change notify frm fw */
-};
-
 static void bfa_dport_sm_disabled(struct bfa_dport_s *dport,
                                  enum bfa_dport_sm_event event);
 static void bfa_dport_sm_enabling_qwait(struct bfa_dport_s *dport,
index 9c83109574e91d7105874600b1652ed9983a1391..26eeee82bedc68ef5eba5278959f0a0dcf54264c 100644 (file)
@@ -226,22 +226,6 @@ struct bfa_fcxp_wqe_s {
 
 void   bfa_fcxp_isr(struct bfa_s *bfa, struct bfi_msg_s *msg);
 
-
-/*
- * RPORT related defines
- */
-enum bfa_rport_event {
-       BFA_RPORT_SM_CREATE     = 1,    /*  rport create event          */
-       BFA_RPORT_SM_DELETE     = 2,    /*  deleting an existing rport  */
-       BFA_RPORT_SM_ONLINE     = 3,    /*  rport is online             */
-       BFA_RPORT_SM_OFFLINE    = 4,    /*  rport is offline            */
-       BFA_RPORT_SM_FWRSP      = 5,    /*  firmware response           */
-       BFA_RPORT_SM_HWFAIL     = 6,    /*  IOC h/w failure             */
-       BFA_RPORT_SM_QOS_SCN    = 7,    /*  QoS SCN from firmware       */
-       BFA_RPORT_SM_SET_SPEED  = 8,    /*  Set Rport Speed             */
-       BFA_RPORT_SM_QRESUME    = 9,    /*  space in requeue queue      */
-};
-
 #define BFA_RPORT_MIN  4
 
 struct bfa_rport_mod_s {
@@ -284,12 +268,30 @@ struct bfa_rport_info_s {
        enum bfa_port_speed speed;      /*  Rport's current speed           */
 };
 
+/*
+ * RPORT related defines
+ */
+enum bfa_rport_event {
+       BFA_RPORT_SM_CREATE     = 1,    /*  rport create event          */
+       BFA_RPORT_SM_DELETE     = 2,    /*  deleting an existing rport  */
+       BFA_RPORT_SM_ONLINE     = 3,    /*  rport is online             */
+       BFA_RPORT_SM_OFFLINE    = 4,    /*  rport is offline            */
+       BFA_RPORT_SM_FWRSP      = 5,    /*  firmware response           */
+       BFA_RPORT_SM_HWFAIL     = 6,    /*  IOC h/w failure             */
+       BFA_RPORT_SM_QOS_SCN    = 7,    /*  QoS SCN from firmware       */
+       BFA_RPORT_SM_SET_SPEED  = 8,    /*  Set Rport Speed             */
+       BFA_RPORT_SM_QRESUME    = 9,    /*  space in requeue queue      */
+};
+
+struct bfa_rport_s;
+typedef void (*bfa_rport_sm_t)(struct bfa_rport_s *, enum bfa_rport_event);
+
 /*
  * BFA rport data structure
  */
 struct bfa_rport_s {
        struct list_head        qe;     /*  queue element                   */
-       bfa_sm_t        sm;             /*  state machine                   */
+       bfa_rport_sm_t  sm;             /*  state machine                   */
        struct bfa_s    *bfa;           /*  backpointer to BFA              */
        void            *rport_drv;     /*  fcs/driver rport object         */
        u16     fw_handle;      /*  firmware rport handle           */
@@ -377,13 +379,31 @@ struct bfa_uf_mod_s {
 void   bfa_uf_isr(struct bfa_s *bfa, struct bfi_msg_s *msg);
 void   bfa_uf_res_recfg(struct bfa_s *bfa, u16 num_uf_fw);
 
+/*
+ *  lps_pvt BFA LPS private functions
+ */
+
+enum bfa_lps_event {
+       BFA_LPS_SM_LOGIN        = 1,    /* login request from user      */
+       BFA_LPS_SM_LOGOUT       = 2,    /* logout request from user     */
+       BFA_LPS_SM_FWRSP        = 3,    /* f/w response to login/logout */
+       BFA_LPS_SM_RESUME       = 4,    /* space present in reqq queue  */
+       BFA_LPS_SM_DELETE       = 5,    /* lps delete from user         */
+       BFA_LPS_SM_OFFLINE      = 6,    /* Link is offline              */
+       BFA_LPS_SM_RX_CVL       = 7,    /* Rx clear virtual link        */
+       BFA_LPS_SM_SET_N2N_PID  = 8,    /* Set assigned PID for n2n */
+};
+
+struct bfa_lps_s;
+typedef void (*bfa_lps_sm_t)(struct bfa_lps_s *, enum bfa_lps_event);
+
 /*
  * LPS - bfa lport login/logout service interface
  */
 struct bfa_lps_s {
        struct list_head        qe;     /*  queue element               */
        struct bfa_s    *bfa;           /*  parent bfa instance */
-       bfa_sm_t        sm;             /*  finite state machine        */
+       bfa_lps_sm_t    sm;             /*  finite state machine        */
        u8              bfa_tag;        /*  lport tag           */
        u8              fw_tag;         /*  lport fw tag                */
        u8              reqq;           /*  lport request queue */
@@ -439,12 +459,25 @@ void      bfa_lps_isr(struct bfa_s *bfa, struct bfi_msg_s *msg);
 
 #define BFA_FCPORT(_bfa)       (&((_bfa)->modules.port))
 
+/*
+ * BFA port link notification state machine events
+ */
+
+enum bfa_fcport_ln_sm_event {
+       BFA_FCPORT_LN_SM_LINKUP         = 1,    /*  linkup event        */
+       BFA_FCPORT_LN_SM_LINKDOWN       = 2,    /*  linkdown event      */
+       BFA_FCPORT_LN_SM_NOTIFICATION   = 3     /*  done notification   */
+};
+
+struct bfa_fcport_ln_s;
+typedef void (*bfa_fcport_ln_sm_t)(struct bfa_fcport_ln_s *, enum bfa_fcport_ln_sm_event);
+
 /*
  * Link notification data structure
  */
 struct bfa_fcport_ln_s {
        struct bfa_fcport_s     *fcport;
-       bfa_sm_t                sm;
+       bfa_fcport_ln_sm_t      sm;
        struct bfa_cb_qe_s      ln_qe;  /*  BFA callback queue elem for ln */
        enum bfa_port_linkstate ln_event; /*  ln event for callback */
 };
@@ -453,12 +486,35 @@ struct bfa_fcport_trunk_s {
        struct bfa_trunk_attr_s attr;
 };
 
+/*
+ * BFA port state machine events
+ */
+enum bfa_fcport_sm_event {
+       BFA_FCPORT_SM_START     = 1,    /*  start port state machine    */
+       BFA_FCPORT_SM_STOP      = 2,    /*  stop port state machine     */
+       BFA_FCPORT_SM_ENABLE    = 3,    /*  enable port         */
+       BFA_FCPORT_SM_DISABLE   = 4,    /*  disable port state machine */
+       BFA_FCPORT_SM_FWRSP     = 5,    /*  firmware enable/disable rsp */
+       BFA_FCPORT_SM_LINKUP    = 6,    /*  firmware linkup event       */
+       BFA_FCPORT_SM_LINKDOWN  = 7,    /*  firmware linkup down        */
+       BFA_FCPORT_SM_QRESUME   = 8,    /*  CQ space available  */
+       BFA_FCPORT_SM_HWFAIL    = 9,    /*  IOC h/w failure             */
+       BFA_FCPORT_SM_DPORTENABLE = 10, /*  enable dport      */
+       BFA_FCPORT_SM_DPORTDISABLE = 11,/*  disable dport     */
+       BFA_FCPORT_SM_FAA_MISCONFIG = 12,       /* FAA misconfiguratin */
+       BFA_FCPORT_SM_DDPORTENABLE  = 13,       /* enable ddport        */
+       BFA_FCPORT_SM_DDPORTDISABLE = 14,       /* disable ddport       */
+};
+
+struct bfa_fcport_s;
+typedef void (*bfa_fcport_sm_t)(struct bfa_fcport_s *, enum bfa_fcport_sm_event);
+
 /*
  * BFA FC port data structure
  */
 struct bfa_fcport_s {
        struct bfa_s            *bfa;   /*  parent BFA instance */
-       bfa_sm_t                sm;     /*  port state machine */
+       bfa_fcport_sm_t         sm;     /*  port state machine */
        wwn_t                   nwwn;   /*  node wwn of physical port */
        wwn_t                   pwwn;   /*  port wwn of physical oprt */
        enum bfa_port_speed speed_sup;
@@ -706,9 +762,26 @@ struct bfa_fcdiag_lb_s {
        u32        status;
 };
 
+/*
+ * BFA DPORT state machine events
+ */
+enum bfa_dport_sm_event {
+       BFA_DPORT_SM_ENABLE     = 1,    /* dport enable event         */
+       BFA_DPORT_SM_DISABLE    = 2,    /* dport disable event        */
+       BFA_DPORT_SM_FWRSP      = 3,    /* fw enable/disable rsp      */
+       BFA_DPORT_SM_QRESUME    = 4,    /* CQ space available         */
+       BFA_DPORT_SM_HWFAIL     = 5,    /* IOC h/w failure            */
+       BFA_DPORT_SM_START      = 6,    /* re-start dport test        */
+       BFA_DPORT_SM_REQFAIL    = 7,    /* request failure            */
+       BFA_DPORT_SM_SCN        = 8,    /* state change notify frm fw */
+};
+
+struct bfa_dport_s;
+typedef void (*bfa_dport_sm_t)(struct bfa_dport_s *, enum bfa_dport_sm_event);
+
 struct bfa_dport_s {
        struct bfa_s    *bfa;           /* Back pointer to BFA  */
-       bfa_sm_t        sm;             /* finite state machine */
+       bfa_dport_sm_t  sm;             /* finite state machine */
        struct bfa_reqq_wait_s reqq_wait;
        bfa_cb_diag_t   cbfn;
        void            *cbarg;
index d4ceca2d435ee1a3af6c6424cf2eb6648d8a6887..54bd11e6d593350f632b55af7deb3599c59adc9f 100644 (file)
@@ -2135,8 +2135,7 @@ bfad_iocmd_fcport_get_stats(struct bfad_s *bfad, void *cmd)
        struct bfa_cb_pending_q_s cb_qe;
 
        init_completion(&fcomp.comp);
-       bfa_pending_q_init(&cb_qe, (bfa_cb_cbfn_t)bfad_hcb_comp,
-                          &fcomp, &iocmd->stats);
+       bfa_pending_q_init_status(&cb_qe, bfad_hcb_comp, &fcomp, &iocmd->stats);
        spin_lock_irqsave(&bfad->bfad_lock, flags);
        iocmd->status = bfa_fcport_get_stats(&bfad->bfa, &cb_qe);
        spin_unlock_irqrestore(&bfad->bfad_lock, flags);
@@ -2159,7 +2158,7 @@ bfad_iocmd_fcport_reset_stats(struct bfad_s *bfad, void *cmd)
        struct bfa_cb_pending_q_s cb_qe;
 
        init_completion(&fcomp.comp);
-       bfa_pending_q_init(&cb_qe, (bfa_cb_cbfn_t)bfad_hcb_comp, &fcomp, NULL);
+       bfa_pending_q_init_status(&cb_qe, bfad_hcb_comp, &fcomp, NULL);
 
        spin_lock_irqsave(&bfad->bfad_lock, flags);
        iocmd->status = bfa_fcport_clear_stats(&bfad->bfa, &cb_qe);
@@ -2443,8 +2442,7 @@ bfad_iocmd_qos_get_stats(struct bfad_s *bfad, void *cmd)
        struct bfa_fcport_s *fcport = BFA_FCPORT_MOD(&bfad->bfa);
 
        init_completion(&fcomp.comp);
-       bfa_pending_q_init(&cb_qe, (bfa_cb_cbfn_t)bfad_hcb_comp,
-                          &fcomp, &iocmd->stats);
+       bfa_pending_q_init_status(&cb_qe, bfad_hcb_comp, &fcomp, &iocmd->stats);
 
        spin_lock_irqsave(&bfad->bfad_lock, flags);
        WARN_ON(!bfa_ioc_get_fcmode(&bfad->bfa.ioc));
@@ -2474,8 +2472,7 @@ bfad_iocmd_qos_reset_stats(struct bfad_s *bfad, void *cmd)
        struct bfa_fcport_s *fcport = BFA_FCPORT_MOD(&bfad->bfa);
 
        init_completion(&fcomp.comp);
-       bfa_pending_q_init(&cb_qe, (bfa_cb_cbfn_t)bfad_hcb_comp,
-                          &fcomp, NULL);
+       bfa_pending_q_init_status(&cb_qe, bfad_hcb_comp, &fcomp, NULL);
 
        spin_lock_irqsave(&bfad->bfad_lock, flags);
        WARN_ON(!bfa_ioc_get_fcmode(&bfad->bfa.ioc));
index 7682cfa34265d3184aa7d84428f0d572b61305b8..da42e3261237ec9a8e5248f500dbc2390df3bbc7 100644 (file)
@@ -175,11 +175,27 @@ union bfad_tmp_buf {
        wwn_t           wwn[BFA_FCS_MAX_LPORTS];
 };
 
+/* BFAD state machine events */
+enum bfad_sm_event {
+       BFAD_E_CREATE                   = 1,
+       BFAD_E_KTHREAD_CREATE_FAILED    = 2,
+       BFAD_E_INIT                     = 3,
+       BFAD_E_INIT_SUCCESS             = 4,
+       BFAD_E_HAL_INIT_FAILED          = 5,
+       BFAD_E_INIT_FAILED              = 6,
+       BFAD_E_FCS_EXIT_COMP            = 7,
+       BFAD_E_EXIT_COMP                = 8,
+       BFAD_E_STOP                     = 9
+};
+
+struct bfad_s;
+typedef void (*bfad_sm_t)(struct bfad_s *, enum bfad_sm_event);
+
 /*
  * BFAD (PCI function) data structure
  */
 struct bfad_s {
-       bfa_sm_t        sm;     /* state machine */
+       bfad_sm_t       sm;     /* state machine */
        struct list_head list_entry;
        struct bfa_s    bfa;
        struct bfa_fcs_s bfa_fcs;
@@ -226,19 +242,6 @@ struct bfad_s {
        struct list_head        vport_list;
 };
 
-/* BFAD state machine events */
-enum bfad_sm_event {
-       BFAD_E_CREATE                   = 1,
-       BFAD_E_KTHREAD_CREATE_FAILED    = 2,
-       BFAD_E_INIT                     = 3,
-       BFAD_E_INIT_SUCCESS             = 4,
-       BFAD_E_HAL_INIT_FAILED          = 5,
-       BFAD_E_INIT_FAILED              = 6,
-       BFAD_E_FCS_EXIT_COMP            = 7,
-       BFAD_E_EXIT_COMP                = 8,
-       BFAD_E_STOP                     = 9
-};
-
 /*
  * RPORT data structure
  */
index 2b864061e0730b98b6ee23c38ebd9f502aaf47c5..1befcd5b2a0f935afa86935f619bc2cb8ca331ae 100644 (file)
@@ -113,7 +113,6 @@ typedef struct {
        struct scsi_device  **dt;        /* ptrs to data transfer elements */
        u_int               firsts[CH_TYPES];
        u_int               counts[CH_TYPES];
-       u_int               unit_attention;
        u_int               voltags;
        struct mutex        lock;
 } scsi_changer;
@@ -186,17 +185,29 @@ static int
 ch_do_scsi(scsi_changer *ch, unsigned char *cmd, int cmd_len,
           void *buffer, unsigned int buflength, enum req_op op)
 {
-       int errno, retries = 0, timeout, result;
+       int errno = 0, timeout, result;
        struct scsi_sense_hdr sshdr;
+       struct scsi_failure failure_defs[] = {
+               {
+                       .sense = UNIT_ATTENTION,
+                       .asc = SCMD_FAILURE_ASC_ANY,
+                       .ascq = SCMD_FAILURE_ASCQ_ANY,
+                       .allowed = 3,
+                       .result = SAM_STAT_CHECK_CONDITION,
+               },
+               {}
+       };
+       struct scsi_failures failures = {
+               .failure_definitions = failure_defs,
+       };
        const struct scsi_exec_args exec_args = {
                .sshdr = &sshdr,
+               .failures = &failures,
        };
 
        timeout = (cmd[0] == INITIALIZE_ELEMENT_STATUS)
                ? timeout_init : timeout_move;
 
- retry:
-       errno = 0;
        result = scsi_execute_cmd(ch->device, cmd, op, buffer, buflength,
                                  timeout * HZ, MAX_RETRIES, &exec_args);
        if (result < 0)
@@ -205,14 +216,6 @@ ch_do_scsi(scsi_changer *ch, unsigned char *cmd, int cmd_len,
                if (debug)
                        scsi_print_sense_hdr(ch->device, ch->name, &sshdr);
                errno = ch_find_errno(&sshdr);
-
-               switch(sshdr.sense_key) {
-               case UNIT_ATTENTION:
-                       ch->unit_attention = 1;
-                       if (retries++ < 3)
-                               goto retry;
-                       break;
-               }
        }
        return errno;
 }
index c38017b4af9826871bec6205f26ee73b115b20d2..e50e93e7fe5a1c3c79225d92f660333a22344db6 100644 (file)
@@ -73,7 +73,21 @@ csio_list_deleted(struct list_head *list)
 #define csio_list_prev(elem)   (((struct list_head *)(elem))->prev)
 
 /* State machine */
-typedef void (*csio_sm_state_t)(void *, uint32_t);
+struct csio_lnode;
+
+/* State machine evets */
+enum csio_ln_ev {
+       CSIO_LNE_NONE = (uint32_t)0,
+       CSIO_LNE_LINKUP,
+       CSIO_LNE_FAB_INIT_DONE,
+       CSIO_LNE_LINK_DOWN,
+       CSIO_LNE_DOWN_LINK,
+       CSIO_LNE_LOGO,
+       CSIO_LNE_CLOSE,
+       CSIO_LNE_MAX_EVENT,
+};
+
+typedef void (*csio_sm_state_t)(struct csio_lnode *ln, enum csio_ln_ev evt);
 
 struct csio_sm {
        struct list_head        sm_list;
@@ -83,7 +97,7 @@ struct csio_sm {
 static inline void
 csio_set_state(void *smp, void *state)
 {
-       ((struct csio_sm *)smp)->sm_state = (csio_sm_state_t)state;
+       ((struct csio_sm *)smp)->sm_state = state;
 }
 
 static inline void
index d5ac9389702327fdf6d6767324ec7f8bdaa642cb..5b3ffefae476d011999a9ac66913381080b34920 100644 (file)
@@ -1095,7 +1095,7 @@ csio_handle_link_down(struct csio_hw *hw, uint8_t portid, uint32_t fcfi,
 int
 csio_is_lnode_ready(struct csio_lnode *ln)
 {
-       return (csio_get_state(ln) == ((csio_sm_state_t)csio_lns_ready));
+       return (csio_get_state(ln) == csio_lns_ready);
 }
 
 /*****************************************************************************/
@@ -1366,15 +1366,15 @@ csio_free_fcfinfo(struct kref *kref)
 void
 csio_lnode_state_to_str(struct csio_lnode *ln, int8_t *str)
 {
-       if (csio_get_state(ln) == ((csio_sm_state_t)csio_lns_uninit)) {
+       if (csio_get_state(ln) == csio_lns_uninit) {
                strcpy(str, "UNINIT");
                return;
        }
-       if (csio_get_state(ln) == ((csio_sm_state_t)csio_lns_ready)) {
+       if (csio_get_state(ln) == csio_lns_ready) {
                strcpy(str, "READY");
                return;
        }
-       if (csio_get_state(ln) == ((csio_sm_state_t)csio_lns_offline)) {
+       if (csio_get_state(ln) == csio_lns_offline) {
                strcpy(str, "OFFLINE");
                return;
        }
index 372a67d122d38fc161743c105db229c534d26a89..607698a0f06315935a169633a5f2e50038d37f48 100644 (file)
 extern int csio_fcoe_rnodes;
 extern int csio_fdmi_enable;
 
-/* State machine evets */
-enum csio_ln_ev {
-       CSIO_LNE_NONE = (uint32_t)0,
-       CSIO_LNE_LINKUP,
-       CSIO_LNE_FAB_INIT_DONE,
-       CSIO_LNE_LINK_DOWN,
-       CSIO_LNE_DOWN_LINK,
-       CSIO_LNE_LOGO,
-       CSIO_LNE_CLOSE,
-       CSIO_LNE_MAX_EVENT,
-};
-
-
 struct csio_fcf_info {
        struct list_head        list;
        uint8_t                 priority;
index 944ea4e0cc4551745cf96b9e0576fea5195cf13b..b6eaf49dfb0041910f7c524a6c0c163ace9c8e4c 100644 (file)
@@ -46,9 +46,6 @@ static int tur_done(struct scsi_device *sdev, struct hp_sw_dh_data *h,
        int ret = SCSI_DH_IO;
 
        switch (sshdr->sense_key) {
-       case UNIT_ATTENTION:
-               ret = SCSI_DH_IMM_RETRY;
-               break;
        case NOT_READY:
                if (sshdr->asc == 0x04 && sshdr->ascq == 2) {
                        /*
@@ -85,11 +82,24 @@ static int hp_sw_tur(struct scsi_device *sdev, struct hp_sw_dh_data *h)
        int ret, res;
        blk_opf_t opf = REQ_OP_DRV_IN | REQ_FAILFAST_DEV |
                                REQ_FAILFAST_TRANSPORT | REQ_FAILFAST_DRIVER;
+       struct scsi_failure failure_defs[] = {
+               {
+                       .sense = UNIT_ATTENTION,
+                       .asc = SCMD_FAILURE_ASC_ANY,
+                       .ascq = SCMD_FAILURE_ASCQ_ANY,
+                       .allowed = SCMD_FAILURE_NO_LIMIT,
+                       .result = SAM_STAT_CHECK_CONDITION,
+               },
+               {}
+       };
+       struct scsi_failures failures = {
+               .failure_definitions = failure_defs,
+       };
        const struct scsi_exec_args exec_args = {
                .sshdr = &sshdr,
+               .failures = &failures,
        };
 
-retry:
        res = scsi_execute_cmd(sdev, cmd, opf, NULL, 0, HP_SW_TIMEOUT,
                               HP_SW_RETRIES, &exec_args);
        if (res > 0 && scsi_sense_valid(&sshdr)) {
@@ -104,9 +114,6 @@ retry:
                ret = SCSI_DH_IO;
        }
 
-       if (ret == SCSI_DH_IMM_RETRY)
-               goto retry;
-
        return ret;
 }
 
@@ -122,14 +129,31 @@ static int hp_sw_start_stop(struct hp_sw_dh_data *h)
        struct scsi_sense_hdr sshdr;
        struct scsi_device *sdev = h->sdev;
        int res, rc;
-       int retry_cnt = HP_SW_RETRIES;
        blk_opf_t opf = REQ_OP_DRV_IN | REQ_FAILFAST_DEV |
                                REQ_FAILFAST_TRANSPORT | REQ_FAILFAST_DRIVER;
+       struct scsi_failure failure_defs[] = {
+               {
+                       /*
+                        * LUN not ready - manual intervention required
+                        *
+                        * Switch-over in progress, retry.
+                        */
+                       .sense = NOT_READY,
+                       .asc = 0x04,
+                       .ascq = 0x03,
+                       .allowed = HP_SW_RETRIES,
+                       .result = SAM_STAT_CHECK_CONDITION,
+               },
+               {}
+       };
+       struct scsi_failures failures = {
+               .failure_definitions = failure_defs,
+       };
        const struct scsi_exec_args exec_args = {
                .sshdr = &sshdr,
+               .failures = &failures,
        };
 
-retry:
        res = scsi_execute_cmd(sdev, cmd, opf, NULL, 0, HP_SW_TIMEOUT,
                               HP_SW_RETRIES, &exec_args);
        if (!res) {
@@ -144,13 +168,6 @@ retry:
        switch (sshdr.sense_key) {
        case NOT_READY:
                if (sshdr.asc == 0x04 && sshdr.ascq == 3) {
-                       /*
-                        * LUN not ready - manual intervention required
-                        *
-                        * Switch-over in progress, retry.
-                        */
-                       if (--retry_cnt)
-                               goto retry;
                        rc = SCSI_DH_RETRY;
                        break;
                }
index 1ac2ae17e8be3ce37d8dd554ba6c9039cf4cd434..f8a09e3eba582c8bf50ebab9d352db718d7b1ef8 100644 (file)
@@ -485,43 +485,17 @@ static int set_mode_select(struct scsi_device *sdev, struct rdac_dh_data *h)
 static int mode_select_handle_sense(struct scsi_device *sdev,
                                    struct scsi_sense_hdr *sense_hdr)
 {
-       int err = SCSI_DH_IO;
        struct rdac_dh_data *h = sdev->handler_data;
 
        if (!scsi_sense_valid(sense_hdr))
-               goto done;
-
-       switch (sense_hdr->sense_key) {
-       case NO_SENSE:
-       case ABORTED_COMMAND:
-       case UNIT_ATTENTION:
-               err = SCSI_DH_RETRY;
-               break;
-       case NOT_READY:
-               if (sense_hdr->asc == 0x04 && sense_hdr->ascq == 0x01)
-                       /* LUN Not Ready and is in the Process of Becoming
-                        * Ready
-                        */
-                       err = SCSI_DH_RETRY;
-               break;
-       case ILLEGAL_REQUEST:
-               if (sense_hdr->asc == 0x91 && sense_hdr->ascq == 0x36)
-                       /*
-                        * Command Lock contention
-                        */
-                       err = SCSI_DH_IMM_RETRY;
-               break;
-       default:
-               break;
-       }
+               return SCSI_DH_IO;
 
        RDAC_LOG(RDAC_LOG_FAILOVER, sdev, "array %s, ctlr %d, "
                "MODE_SELECT returned with sense %02x/%02x/%02x",
                (char *) h->ctlr->array_name, h->ctlr->index,
                sense_hdr->sense_key, sense_hdr->asc, sense_hdr->ascq);
 
-done:
-       return err;
+       return SCSI_DH_IO;
 }
 
 static void send_mode_select(struct work_struct *work)
@@ -530,7 +504,7 @@ static void send_mode_select(struct work_struct *work)
                container_of(work, struct rdac_controller, ms_work);
        struct scsi_device *sdev = ctlr->ms_sdev;
        struct rdac_dh_data *h = sdev->handler_data;
-       int rc, err, retry_cnt = RDAC_RETRY_COUNT;
+       int rc, err;
        struct rdac_queue_data *tmp, *qdata;
        LIST_HEAD(list);
        unsigned char cdb[MAX_COMMAND_SIZE];
@@ -538,8 +512,49 @@ static void send_mode_select(struct work_struct *work)
        unsigned int data_size;
        blk_opf_t opf = REQ_OP_DRV_OUT | REQ_FAILFAST_DEV |
                                REQ_FAILFAST_TRANSPORT | REQ_FAILFAST_DRIVER;
+       struct scsi_failure failure_defs[] = {
+               {
+                       .sense = NO_SENSE,
+                       .asc = SCMD_FAILURE_ASC_ANY,
+                       .ascq = SCMD_FAILURE_ASCQ_ANY,
+                       .result = SAM_STAT_CHECK_CONDITION,
+               },
+               {
+                       .sense = ABORTED_COMMAND,
+                       .asc = SCMD_FAILURE_ASC_ANY,
+                       .ascq = SCMD_FAILURE_ASCQ_ANY,
+                       .result = SAM_STAT_CHECK_CONDITION,
+               },
+               {
+                       .sense = UNIT_ATTENTION,
+                       .asc = SCMD_FAILURE_ASC_ANY,
+                       .ascq = SCMD_FAILURE_ASCQ_ANY,
+                       .result = SAM_STAT_CHECK_CONDITION,
+               },
+               /* LUN Not Ready and is in the Process of Becoming Ready */
+               {
+                       .sense = NOT_READY,
+                       .asc = 0x04,
+                       .ascq = 0x01,
+                       .result = SAM_STAT_CHECK_CONDITION,
+               },
+               /* Command Lock contention */
+               {
+                       .sense = ILLEGAL_REQUEST,
+                       .asc = 0x91,
+                       .ascq = 0x36,
+                       .allowed = SCMD_FAILURE_NO_LIMIT,
+                       .result = SAM_STAT_CHECK_CONDITION,
+               },
+               {}
+       };
+       struct scsi_failures failures = {
+               .total_allowed = RDAC_RETRY_COUNT,
+               .failure_definitions = failure_defs,
+       };
        const struct scsi_exec_args exec_args = {
                .sshdr = &sshdr,
+               .failures = &failures,
        };
 
        spin_lock(&ctlr->ms_lock);
@@ -548,15 +563,12 @@ static void send_mode_select(struct work_struct *work)
        ctlr->ms_sdev = NULL;
        spin_unlock(&ctlr->ms_lock);
 
- retry:
        memset(cdb, 0, sizeof(cdb));
 
        data_size = rdac_failover_get(ctlr, &list, cdb);
 
-       RDAC_LOG(RDAC_LOG_FAILOVER, sdev, "array %s, ctlr %d, "
-               "%s MODE_SELECT command",
-               (char *) h->ctlr->array_name, h->ctlr->index,
-               (retry_cnt == RDAC_RETRY_COUNT) ? "queueing" : "retrying");
+       RDAC_LOG(RDAC_LOG_FAILOVER, sdev, "array %s, ctlr %d, queueing MODE_SELECT command",
+               (char *)h->ctlr->array_name, h->ctlr->index);
 
        rc = scsi_execute_cmd(sdev, cdb, opf, &h->ctlr->mode_select, data_size,
                              RDAC_TIMEOUT * HZ, RDAC_RETRIES, &exec_args);
@@ -570,10 +582,6 @@ static void send_mode_select(struct work_struct *work)
                err = SCSI_DH_IO;
        } else {
                err = mode_select_handle_sense(sdev, &sshdr);
-               if (err == SCSI_DH_RETRY && retry_cnt--)
-                       goto retry;
-               if (err == SCSI_DH_IMM_RETRY)
-                       goto retry;
        }
 
        list_for_each_entry_safe(qdata, tmp, &list, entry) {
index 97816a0e6240a955ea7c6aa0f7ddef35430fa346..0175d2282b4581c535d95747d1bd09f02486d460 100644 (file)
@@ -2753,7 +2753,7 @@ static void __exit esp_exit(void)
 }
 
 MODULE_DESCRIPTION("ESP SCSI driver core");
-MODULE_AUTHOR("David S. Miller (davem@davemloft.net)");
+MODULE_AUTHOR("David S. Miller <davem@davemloft.net>");
 MODULE_LICENSE("GPL");
 MODULE_VERSION(DRV_VERSION);
 
index c64a085a7ee2f9f29c0e9fe1e00bed7e8eedf5a1..453665ac6020b2d41513901a9a8d261f66cc5352 100644 (file)
@@ -597,7 +597,7 @@ static const struct attribute_group *fcoe_fcf_attr_groups[] = {
        NULL,
 };
 
-static struct bus_type fcoe_bus_type;
+static const struct bus_type fcoe_bus_type;
 
 static int fcoe_bus_match(struct device *dev,
                          struct device_driver *drv)
@@ -664,7 +664,7 @@ static struct attribute *fcoe_bus_attrs[] = {
 };
 ATTRIBUTE_GROUPS(fcoe_bus);
 
-static struct bus_type fcoe_bus_type = {
+static const struct bus_type fcoe_bus_type = {
        .name = "fcoe",
        .match = &fcoe_bus_match,
        .bus_groups = fcoe_bus_groups,
index a61e0c5e650667ec18987eec76532cfaaafdabcb..0c5e57c7e3229cc4304eba1dc372ce504d273fde 100644 (file)
@@ -14,13 +14,13 @@ static ssize_t fnic_show_state(struct device *dev,
        struct fc_lport *lp = shost_priv(class_to_shost(dev));
        struct fnic *fnic = lport_priv(lp);
 
-       return snprintf(buf, PAGE_SIZE, "%s\n", fnic_state_str[fnic->state]);
+       return sysfs_emit(buf, "%s\n", fnic_state_str[fnic->state]);
 }
 
 static ssize_t fnic_show_drv_version(struct device *dev,
                                     struct device_attribute *attr, char *buf)
 {
-       return snprintf(buf, PAGE_SIZE, "%s\n", DRV_VERSION);
+       return sysfs_emit(buf, "%s\n", DRV_VERSION);
 }
 
 static ssize_t fnic_show_link_state(struct device *dev,
@@ -28,8 +28,7 @@ static ssize_t fnic_show_link_state(struct device *dev,
 {
        struct fc_lport *lp = shost_priv(class_to_shost(dev));
 
-       return snprintf(buf, PAGE_SIZE, "%s\n", (lp->link_up)
-                       ? "Link Up" : "Link Down");
+       return sysfs_emit(buf, "%s\n", (lp->link_up) ? "Link Up" : "Link Down");
 }
 
 static DEVICE_ATTR(fnic_state, S_IRUGO, fnic_show_state, NULL);
index fc4cee91b175c14d0950337ac8928b659cbc8cef..2ba61dba4569b827decb67fcca9354cc107c755c 100644 (file)
@@ -1961,8 +1961,8 @@ int fnic_abort_cmd(struct scsi_cmnd *sc)
 
        if (!(fnic_priv(sc)->flags & (FNIC_IO_ABORTED | FNIC_IO_DONE))) {
                spin_unlock_irqrestore(&fnic->wq_copy_lock[hwq], flags);
-           FNIC_SCSI_DBG(KERN_ERR, fnic->lport->host, fnic->fnic_num,
-                       "Issuing host reset due to out of order IO\n");
+               FNIC_SCSI_DBG(KERN_ERR, fnic->lport->host, fnic->fnic_num,
+                             "Issuing host reset due to out of order IO\n");
 
                ret = FAILED;
                goto fnic_abort_cmd_end;
index bbb7b2d9ffcfb36085e8a3fa3a631f01b798fc6c..097dfe4b620dce85736b8a0d5cf7f4b3c4842e9b 100644 (file)
@@ -1507,7 +1507,12 @@ void hisi_sas_controller_reset_prepare(struct hisi_hba *hisi_hba)
        scsi_block_requests(shost);
        hisi_hba->hw->wait_cmds_complete_timeout(hisi_hba, 100, 5000);
 
-       del_timer_sync(&hisi_hba->timer);
+       /*
+        * hisi_hba->timer is only used for v1/v2 hw, and check hw->sht
+        * which is also only used for v1/v2 hw to skip it for v3 hw
+        */
+       if (hisi_hba->hw->sht)
+               del_timer_sync(&hisi_hba->timer);
 
        set_bit(HISI_SAS_REJECT_CMD_BIT, &hisi_hba->flags);
 }
@@ -1573,7 +1578,7 @@ static int hisi_sas_controller_prereset(struct hisi_hba *hisi_hba)
                return -EPERM;
        }
 
-       if (hisi_sas_debugfs_enable && hisi_hba->debugfs_itct[0].itct)
+       if (hisi_sas_debugfs_enable)
                hisi_hba->hw->debugfs_snapshot_regs(hisi_hba);
 
        return 0;
@@ -1961,10 +1966,18 @@ static bool hisi_sas_internal_abort_timeout(struct sas_task *task,
        struct hisi_hba *hisi_hba = dev_to_hisi_hba(device);
        struct hisi_sas_internal_abort_data *timeout = data;
 
-       if (hisi_sas_debugfs_enable && hisi_hba->debugfs_itct[0].itct) {
-               down(&hisi_hba->sem);
+       if (hisi_sas_debugfs_enable) {
+               /*
+                * If timeout occurs in device gone scenario, to avoid
+                * circular dependency like:
+                * hisi_sas_dev_gone() -> down() -> ... ->
+                * hisi_sas_internal_abort_timeout() -> down().
+                */
+               if (!timeout->rst_ha_timeout)
+                       down(&hisi_hba->sem);
                hisi_hba->hw->debugfs_snapshot_regs(hisi_hba);
-               up(&hisi_hba->sem);
+               if (!timeout->rst_ha_timeout)
+                       up(&hisi_hba->sem);
        }
 
        if (task->task_state_flags & SAS_TASK_STATE_DONE) {
@@ -2617,7 +2630,8 @@ static __exit void hisi_sas_exit(void)
 {
        sas_release_transport(hisi_sas_stt);
 
-       debugfs_remove(hisi_sas_debugfs_dir);
+       if (hisi_sas_debugfs_enable)
+               debugfs_remove(hisi_sas_debugfs_dir);
 }
 
 module_init(hisi_sas_init);
index b56fbc61a15ae825147d5d285da1be228e2a19ec..7d2a33514538c2cd8083733d8303f4dc5934de7d 100644 (file)
@@ -4902,7 +4902,8 @@ err_out_unregister_ha:
 err_out_remove_host:
        scsi_remove_host(shost);
 err_out_undo_debugfs:
-       debugfs_exit_v3_hw(hisi_hba);
+       if (hisi_sas_debugfs_enable)
+               debugfs_exit_v3_hw(hisi_hba);
 err_out_free_host:
        hisi_sas_free(hisi_hba);
        scsi_host_put(shost);
@@ -4934,7 +4935,6 @@ static void hisi_sas_v3_remove(struct pci_dev *pdev)
        struct Scsi_Host *shost = sha->shost;
 
        pm_runtime_get_noresume(dev);
-       del_timer_sync(&hisi_hba->timer);
 
        sas_unregister_ha(sha);
        flush_workqueue(hisi_hba->wq);
@@ -4942,7 +4942,9 @@ static void hisi_sas_v3_remove(struct pci_dev *pdev)
 
        hisi_sas_v3_destroy_irqs(pdev, hisi_hba);
        hisi_sas_free(hisi_hba);
-       debugfs_exit_v3_hw(hisi_hba);
+       if (hisi_sas_debugfs_enable)
+               debugfs_exit_v3_hw(hisi_hba);
+
        scsi_host_put(shost);
 }
 
index d7f51b84f3c788f0175648655673ca97b4c3b896..4f495a41ec4aaed4c52d01ee84fc274c30236a09 100644 (file)
@@ -371,7 +371,7 @@ static void scsi_host_dev_release(struct device *dev)
        kfree(shost);
 }
 
-static struct device_type scsi_host_type = {
+static const struct device_type scsi_host_type = {
        .name =         "scsi_host",
        .release =      scsi_host_dev_release,
 };
index 46d0b3a0e12fb71388334044f236aeed34c963ca..05b126bfd18b5524870de3d8845f16a515e84e3f 100644 (file)
@@ -3482,8 +3482,7 @@ static ssize_t ibmvfc_show_host_partition_name(struct device *dev,
        struct Scsi_Host *shost = class_to_shost(dev);
        struct ibmvfc_host *vhost = shost_priv(shost);
 
-       return snprintf(buf, PAGE_SIZE, "%s\n",
-                       vhost->login_buf->resp.partition_name);
+       return sysfs_emit(buf, "%s\n", vhost->login_buf->resp.partition_name);
 }
 
 static ssize_t ibmvfc_show_host_device_name(struct device *dev,
@@ -3492,8 +3491,7 @@ static ssize_t ibmvfc_show_host_device_name(struct device *dev,
        struct Scsi_Host *shost = class_to_shost(dev);
        struct ibmvfc_host *vhost = shost_priv(shost);
 
-       return snprintf(buf, PAGE_SIZE, "%s\n",
-                       vhost->login_buf->resp.device_name);
+       return sysfs_emit(buf, "%s\n", vhost->login_buf->resp.device_name);
 }
 
 static ssize_t ibmvfc_show_host_loc_code(struct device *dev,
@@ -3502,8 +3500,7 @@ static ssize_t ibmvfc_show_host_loc_code(struct device *dev,
        struct Scsi_Host *shost = class_to_shost(dev);
        struct ibmvfc_host *vhost = shost_priv(shost);
 
-       return snprintf(buf, PAGE_SIZE, "%s\n",
-                       vhost->login_buf->resp.port_loc_code);
+       return sysfs_emit(buf, "%s\n", vhost->login_buf->resp.port_loc_code);
 }
 
 static ssize_t ibmvfc_show_host_drc_name(struct device *dev,
@@ -3512,8 +3509,7 @@ static ssize_t ibmvfc_show_host_drc_name(struct device *dev,
        struct Scsi_Host *shost = class_to_shost(dev);
        struct ibmvfc_host *vhost = shost_priv(shost);
 
-       return snprintf(buf, PAGE_SIZE, "%s\n",
-                       vhost->login_buf->resp.drc_name);
+       return sysfs_emit(buf, "%s\n", vhost->login_buf->resp.drc_name);
 }
 
 static ssize_t ibmvfc_show_host_npiv_version(struct device *dev,
@@ -3521,7 +3517,8 @@ static ssize_t ibmvfc_show_host_npiv_version(struct device *dev,
 {
        struct Scsi_Host *shost = class_to_shost(dev);
        struct ibmvfc_host *vhost = shost_priv(shost);
-       return snprintf(buf, PAGE_SIZE, "%d\n", be32_to_cpu(vhost->login_buf->resp.version));
+       return sysfs_emit(buf, "%d\n",
+                         be32_to_cpu(vhost->login_buf->resp.version));
 }
 
 static ssize_t ibmvfc_show_host_capabilities(struct device *dev,
@@ -3529,7 +3526,8 @@ static ssize_t ibmvfc_show_host_capabilities(struct device *dev,
 {
        struct Scsi_Host *shost = class_to_shost(dev);
        struct ibmvfc_host *vhost = shost_priv(shost);
-       return snprintf(buf, PAGE_SIZE, "%llx\n", be64_to_cpu(vhost->login_buf->resp.capabilities));
+       return sysfs_emit(buf, "%llx\n",
+                         be64_to_cpu(vhost->login_buf->resp.capabilities));
 }
 
 /**
@@ -3550,7 +3548,7 @@ static ssize_t ibmvfc_show_log_level(struct device *dev,
        int len;
 
        spin_lock_irqsave(shost->host_lock, flags);
-       len = snprintf(buf, PAGE_SIZE, "%d\n", vhost->log_level);
+       len = sysfs_emit(buf, "%d\n", vhost->log_level);
        spin_unlock_irqrestore(shost->host_lock, flags);
        return len;
 }
@@ -3589,7 +3587,7 @@ static ssize_t ibmvfc_show_scsi_channels(struct device *dev,
        int len;
 
        spin_lock_irqsave(shost->host_lock, flags);
-       len = snprintf(buf, PAGE_SIZE, "%d\n", scsi->desired_queues);
+       len = sysfs_emit(buf, "%d\n", scsi->desired_queues);
        spin_unlock_irqrestore(shost->host_lock, flags);
        return len;
 }
index 4dc411a58107010dca5ddb73e54ebc231c679631..68b99924ee4ffc9117f3663b4f362b56d4844fb5 100644 (file)
@@ -1551,18 +1551,18 @@ static long ibmvscsis_adapter_info(struct scsi_info *vscsi,
        if (vscsi->client_data.partition_number == 0)
                vscsi->client_data.partition_number =
                        be32_to_cpu(info->partition_number);
-       strncpy(vscsi->client_data.srp_version, info->srp_version,
+       strscpy(vscsi->client_data.srp_version, info->srp_version,
                sizeof(vscsi->client_data.srp_version));
-       strncpy(vscsi->client_data.partition_name, info->partition_name,
+       strscpy(vscsi->client_data.partition_name, info->partition_name,
                sizeof(vscsi->client_data.partition_name));
        vscsi->client_data.mad_version = be32_to_cpu(info->mad_version);
        vscsi->client_data.os_type = be32_to_cpu(info->os_type);
 
        /* Copy our info */
-       strncpy(info->srp_version, SRP_VERSION,
-               sizeof(info->srp_version));
-       strncpy(info->partition_name, vscsi->dds.partition_name,
-               sizeof(info->partition_name));
+       strscpy_pad(info->srp_version, SRP_VERSION,
+                   sizeof(info->srp_version));
+       strscpy_pad(info->partition_name, vscsi->dds.partition_name,
+                   sizeof(info->partition_name));
        info->partition_number = cpu_to_be32(vscsi->dds.partition_num);
        info->mad_version = cpu_to_be32(MAD_VERSION_1);
        info->os_type = cpu_to_be32(LINUX);
@@ -1645,8 +1645,8 @@ static int ibmvscsis_cap_mad(struct scsi_info *vscsi, struct iu_entry *iue)
                         be64_to_cpu(mad->buffer),
                         vscsi->dds.window[LOCAL].liobn, token);
        if (rc == H_SUCCESS) {
-               strncpy(cap->name, dev_name(&vscsi->dma_dev->dev),
-                       SRP_MAX_LOC_LEN);
+               strscpy_pad(cap->name, dev_name(&vscsi->dma_dev->dev),
+                       sizeof(cap->name));
 
                len = olen - min_len;
                status = VIOSRP_MAD_SUCCESS;
@@ -3616,13 +3616,13 @@ static void ibmvscsis_remove(struct vio_dev *vdev)
 static ssize_t system_id_show(struct device *dev,
                              struct device_attribute *attr, char *buf)
 {
-       return snprintf(buf, PAGE_SIZE, "%s\n", system_id);
+       return sysfs_emit(buf, "%s\n", system_id);
 }
 
 static ssize_t partition_number_show(struct device *dev,
                                     struct device_attribute *attr, char *buf)
 {
-       return snprintf(buf, PAGE_SIZE, "%x\n", partition_number);
+       return sysfs_emit(buf, "%x\n", partition_number);
 }
 
 static ssize_t unit_address_show(struct device *dev,
@@ -3630,7 +3630,7 @@ static ssize_t unit_address_show(struct device *dev,
 {
        struct scsi_info *vscsi = container_of(dev, struct scsi_info, dev);
 
-       return snprintf(buf, PAGE_SIZE, "%x\n", vscsi->dma_dev->unit_address);
+       return sysfs_emit(buf, "%x\n", vscsi->dma_dev->unit_address);
 }
 
 static int ibmvscsis_get_system_info(void)
@@ -3650,7 +3650,7 @@ static int ibmvscsis_get_system_info(void)
 
        name = of_get_property(rootdn, "ibm,partition-name", NULL);
        if (name)
-               strncpy(partition_name, name, sizeof(partition_name));
+               strscpy(partition_name, name, sizeof(partition_name));
 
        num = of_get_property(rootdn, "ibm,partition-no", NULL);
        if (num)
index 6277162a028bb98378d66c34e7a9f47a20323027..c582a3932ceab74a6cda37cc6c243b4d943f529b 100644 (file)
@@ -137,7 +137,7 @@ static ssize_t isci_show_id(struct device *dev, struct device_attribute *attr, c
        struct sas_ha_struct *sas_ha = SHOST_TO_SAS_HA(shost);
        struct isci_host *ihost = container_of(sas_ha, typeof(*ihost), sas_ha);
 
-       return snprintf(buf, PAGE_SIZE, "%d\n", ihost->id);
+       return sysfs_emit(buf, "%d\n", ihost->id);
 }
 
 static DEVICE_ATTR(isci_id, S_IRUGO, isci_show_id, NULL);
index 494a671fb5564d4c7324ecbd4d16e399b6233de9..fb04b0b515ab1fa76a8e6d76878ff091fd701ec4 100644 (file)
@@ -204,6 +204,6 @@ static struct platform_driver esp_jazz_driver = {
 module_platform_driver(esp_jazz_driver);
 
 MODULE_DESCRIPTION("JAZZ ESP SCSI driver");
-MODULE_AUTHOR("Thomas Bogendoerfer (tsbogend@alpha.franken.de)");
+MODULE_AUTHOR("Thomas Bogendoerfer <tsbogend@alpha.franken.de>");
 MODULE_LICENSE("GPL");
 MODULE_VERSION(DRV_VERSION);
index 7dcac3b6baa7eef0c1a4dddd7937feaa0a76831b..6b7e4ca6b7b5ee98d4c876fced7b0d1c9e0687aa 100644 (file)
@@ -136,22 +136,24 @@ static inline int fc_ct_ns_fill(struct fc_lport *lport,
                break;
 
        case FC_NS_RSPN_ID:
-               len = strnlen(fc_host_symbolic_name(lport->host), 255);
+               len = strnlen(fc_host_symbolic_name(lport->host),
+                             FC_SYMBOLIC_NAME_SIZE);
                ct = fc_ct_hdr_fill(fp, op, sizeof(struct fc_ns_rspn) + len,
                                    FC_FST_DIR, FC_NS_SUBTYPE);
                hton24(ct->payload.spn.fr_fid.fp_fid, lport->port_id);
-               strncpy(ct->payload.spn.fr_name,
-                       fc_host_symbolic_name(lport->host), len);
+               memcpy(ct->payload.spn.fr_name,
+                      fc_host_symbolic_name(lport->host), len);
                ct->payload.spn.fr_name_len = len;
                break;
 
        case FC_NS_RSNN_NN:
-               len = strnlen(fc_host_symbolic_name(lport->host), 255);
+               len = strnlen(fc_host_symbolic_name(lport->host),
+                             FC_SYMBOLIC_NAME_SIZE);
                ct = fc_ct_hdr_fill(fp, op, sizeof(struct fc_ns_rsnn) + len,
                                    FC_FST_DIR, FC_NS_SUBTYPE);
                put_unaligned_be64(lport->wwnn, &ct->payload.snn.fr_wwn);
-               strncpy(ct->payload.snn.fr_name,
-                       fc_host_symbolic_name(lport->host), len);
+               memcpy(ct->payload.snn.fr_name,
+                      fc_host_symbolic_name(lport->host), len);
                ct->payload.snn.fr_name_len = len;
                break;
 
index 04d608ea91060ba32cba525323ccd30be340bc6b..30d20d37554f6deb28ce816a7307c41db4ae264a 100644 (file)
@@ -1,7 +1,7 @@
 /*******************************************************************
  * This file is part of the Emulex Linux Device Driver for         *
  * Fibre Channel Host Bus Adapters.                                *
- * Copyright (C) 2017-2023 Broadcom. All Rights Reserved. The term *
+ * Copyright (C) 2017-2024 Broadcom. All Rights Reserved. The term *
  * “Broadcom” refers to Broadcom Inc. and/or its subsidiaries.     *
  * Copyright (C) 2004-2016 Emulex.  All rights reserved.           *
  * EMULEX and SLI are trademarks of Emulex.                        *
@@ -535,6 +535,44 @@ struct lpfc_cgn_acqe_stat {
        atomic64_t warn;
 };
 
+enum lpfc_fc_flag {
+       /* Several of these flags are HBA centric and should be moved to
+        * phba->link_flag (e.g. FC_PTP, FC_PUBLIC_LOOP)
+        */
+       FC_PT2PT,                       /* pt2pt with no fabric */
+       FC_PT2PT_PLOGI,                 /* pt2pt initiate PLOGI */
+       FC_DISC_TMO,                    /* Discovery timer running */
+       FC_PUBLIC_LOOP,                 /* Public loop */
+       FC_LBIT,                        /* LOGIN bit in loopinit set */
+       FC_RSCN_MODE,                   /* RSCN cmd rcv'ed */
+       FC_NLP_MORE,                    /* More node to process in node tbl */
+       FC_OFFLINE_MODE,                /* Interface is offline for diag */
+       FC_FABRIC,                      /* We are fabric attached */
+       FC_VPORT_LOGO_RCVD,             /* LOGO received on vport */
+       FC_RSCN_DISCOVERY,              /* Auth all devices after RSCN */
+       FC_LOGO_RCVD_DID_CHNG,          /* FDISC on phys port detect DID chng */
+       FC_PT2PT_NO_NVME,               /* Don't send NVME PRLI */
+       FC_SCSI_SCAN_TMO,               /* scsi scan timer running */
+       FC_ABORT_DISCOVERY,             /* we want to abort discovery */
+       FC_NDISC_ACTIVE,                /* NPort discovery active */
+       FC_BYPASSED_MODE,               /* NPort is in bypassed mode */
+       FC_VPORT_NEEDS_REG_VPI,         /* Needs to have its vpi registered */
+       FC_RSCN_DEFERRED,               /* A deferred RSCN being processed */
+       FC_VPORT_NEEDS_INIT_VPI,        /* Need to INIT_VPI before FDISC */
+       FC_VPORT_CVL_RCVD,              /* VLink failed due to CVL */
+       FC_VFI_REGISTERED,              /* VFI is registered */
+       FC_FDISC_COMPLETED,             /* FDISC completed */
+       FC_DISC_DELAYED,                /* Delay NPort discovery */
+};
+
+enum lpfc_load_flag {
+       FC_LOADING,                     /* HBA in process of loading drvr */
+       FC_UNLOADING,                   /* HBA in process of unloading drvr */
+       FC_ALLOW_FDMI,                  /* port is ready for FDMI requests */
+       FC_ALLOW_VMID,                  /* Allow VMID I/Os */
+       FC_DEREGISTER_ALL_APP_ID        /* Deregister all VMIDs */
+};
+
 struct lpfc_vport {
        struct lpfc_hba *phba;
        struct list_head listentry;
@@ -549,34 +587,7 @@ struct lpfc_vport {
        uint8_t vpi_state;
 #define LPFC_VPI_REGISTERED    0x1
 
-       uint32_t fc_flag;       /* FC flags */
-/* Several of these flags are HBA centric and should be moved to
- * phba->link_flag (e.g. FC_PTP, FC_PUBLIC_LOOP)
- */
-#define FC_PT2PT                0x1     /* pt2pt with no fabric */
-#define FC_PT2PT_PLOGI          0x2     /* pt2pt initiate PLOGI */
-#define FC_DISC_TMO             0x4     /* Discovery timer running */
-#define FC_PUBLIC_LOOP          0x8     /* Public loop */
-#define FC_LBIT                 0x10    /* LOGIN bit in loopinit set */
-#define FC_RSCN_MODE            0x20    /* RSCN cmd rcv'ed */
-#define FC_NLP_MORE             0x40    /* More node to process in node tbl */
-#define FC_OFFLINE_MODE         0x80    /* Interface is offline for diag */
-#define FC_FABRIC               0x100   /* We are fabric attached */
-#define FC_VPORT_LOGO_RCVD      0x200    /* LOGO received on vport */
-#define FC_RSCN_DISCOVERY       0x400   /* Auth all devices after RSCN */
-#define FC_LOGO_RCVD_DID_CHNG   0x800    /* FDISC on phys port detect DID chng*/
-#define FC_PT2PT_NO_NVME        0x1000   /* Don't send NVME PRLI */
-#define FC_SCSI_SCAN_TMO        0x4000  /* scsi scan timer running */
-#define FC_ABORT_DISCOVERY      0x8000  /* we want to abort discovery */
-#define FC_NDISC_ACTIVE         0x10000         /* NPort discovery active */
-#define FC_BYPASSED_MODE        0x20000         /* NPort is in bypassed mode */
-#define FC_VPORT_NEEDS_REG_VPI 0x80000  /* Needs to have its vpi registered */
-#define FC_RSCN_DEFERRED       0x100000 /* A deferred RSCN being processed */
-#define FC_VPORT_NEEDS_INIT_VPI 0x200000 /* Need to INIT_VPI before FDISC */
-#define FC_VPORT_CVL_RCVD      0x400000 /* VLink failed due to CVL      */
-#define FC_VFI_REGISTERED      0x800000 /* VFI is registered */
-#define FC_FDISC_COMPLETED     0x1000000/* FDISC completed */
-#define FC_DISC_DELAYED                0x2000000/* Delay NPort discovery */
+       unsigned long fc_flag;  /* FC flags */
 
        uint32_t ct_flags;
 #define FC_CT_RFF_ID           0x1      /* RFF_ID accepted by switch */
@@ -587,16 +598,18 @@ struct lpfc_vport {
 #define FC_CT_RPRT_DEFER       0x20     /* Defer issuing FDMI RPRT */
 
        struct list_head fc_nodes;
+       spinlock_t fc_nodes_list_lock; /* spinlock for fc_nodes list */
 
        /* Keep counters for the number of entries in each list. */
-       uint16_t fc_plogi_cnt;
-       uint16_t fc_adisc_cnt;
-       uint16_t fc_reglogin_cnt;
-       uint16_t fc_prli_cnt;
-       uint16_t fc_unmap_cnt;
-       uint16_t fc_map_cnt;
-       uint16_t fc_npr_cnt;
-       uint16_t fc_unused_cnt;
+       atomic_t fc_plogi_cnt;
+       atomic_t fc_adisc_cnt;
+       atomic_t fc_reglogin_cnt;
+       atomic_t fc_prli_cnt;
+       atomic_t fc_unmap_cnt;
+       atomic_t fc_map_cnt;
+       atomic_t fc_npr_cnt;
+       atomic_t fc_unused_cnt;
+
        struct serv_parm fc_sparam;     /* buffer for our service parameters */
 
        uint32_t fc_myDID;      /* fibre channel S_ID */
@@ -642,12 +655,7 @@ struct lpfc_vport {
        struct timer_list els_tmofunc;
        struct timer_list delayed_disc_tmo;
 
-       uint8_t load_flag;
-#define FC_LOADING             0x1     /* HBA in process of loading drvr */
-#define FC_UNLOADING           0x2     /* HBA in process of unloading drvr */
-#define FC_ALLOW_FDMI          0x4     /* port is ready for FDMI requests */
-#define FC_ALLOW_VMID          0x8     /* Allow VMID I/Os */
-#define FC_DEREGISTER_ALL_APP_ID       0x10    /* Deregister all VMIDs */
+       unsigned long load_flag;
        /* Vport Config Parameters */
        uint32_t cfg_scan_down;
        uint32_t cfg_lun_queue_depth;
index d3a5d6ecdf7d2a23d0ee94bc83d5ffc295947c82..365c7e96070bb7aec4cad7fa09f17eb0aa8b6345 100644 (file)
@@ -1,7 +1,7 @@
 /*******************************************************************
  * This file is part of the Emulex Linux Device Driver for         *
  * Fibre Channel Host Bus Adapters.                                *
- * Copyright (C) 2017-2023 Broadcom. All Rights Reserved. The term *
+ * Copyright (C) 2017-2024 Broadcom. All Rights Reserved. The term *
  * “Broadcom” refers to Broadcom Inc. and/or its subsidiaries.  *
  * Copyright (C) 2004-2016 Emulex.  All rights reserved.           *
  * EMULEX and SLI are trademarks of Emulex.                        *
@@ -344,6 +344,7 @@ lpfc_nvme_info_show(struct device *dev, struct device_attribute *attr,
        struct lpfc_fc4_ctrl_stat *cstat;
        uint64_t data1, data2, data3;
        uint64_t totin, totout, tot;
+       unsigned long iflags;
        char *statep;
        int i;
        int len = 0;
@@ -543,7 +544,7 @@ lpfc_nvme_info_show(struct device *dev, struct device_attribute *attr,
        if (strlcat(buf, tmp, PAGE_SIZE) >= PAGE_SIZE)
                goto buffer_done;
 
-       spin_lock_irq(shost->host_lock);
+       spin_lock_irqsave(&vport->fc_nodes_list_lock, iflags);
 
        list_for_each_entry(ndlp, &vport->fc_nodes, nlp_listp) {
                nrport = NULL;
@@ -617,7 +618,7 @@ lpfc_nvme_info_show(struct device *dev, struct device_attribute *attr,
                if (strlcat(buf, tmp, PAGE_SIZE) >= PAGE_SIZE)
                        goto unlock_buf_done;
        }
-       spin_unlock_irq(shost->host_lock);
+       spin_unlock_irqrestore(&vport->fc_nodes_list_lock, iflags);
 
        if (!lport)
                goto buffer_done;
@@ -681,7 +682,7 @@ lpfc_nvme_info_show(struct device *dev, struct device_attribute *attr,
        goto buffer_done;
 
  unlock_buf_done:
-       spin_unlock_irq(shost->host_lock);
+       spin_unlock_irqrestore(&vport->fc_nodes_list_lock, iflags);
 
  buffer_done:
        len = strnlen(buf, PAGE_SIZE);
@@ -1091,14 +1092,14 @@ lpfc_link_state_show(struct device *dev, struct device_attribute *attr,
                        break;
                }
                if (phba->fc_topology == LPFC_TOPOLOGY_LOOP) {
-                       if (vport->fc_flag & FC_PUBLIC_LOOP)
+                       if (test_bit(FC_PUBLIC_LOOP, &vport->fc_flag))
                                len += scnprintf(buf + len, PAGE_SIZE-len,
                                                "   Public Loop\n");
                        else
                                len += scnprintf(buf + len, PAGE_SIZE-len,
                                                "   Private Loop\n");
                } else {
-                       if (vport->fc_flag & FC_FABRIC) {
+                       if (test_bit(FC_FABRIC, &vport->fc_flag)) {
                                if (phba->sli_rev == LPFC_SLI_REV4 &&
                                    vport->port_type == LPFC_PHYSICAL_PORT &&
                                    phba->sli4_hba.fawwpn_flag &
@@ -1260,7 +1261,8 @@ lpfc_num_discovered_ports_show(struct device *dev,
        struct lpfc_vport *vport = (struct lpfc_vport *) shost->hostdata;
 
        return scnprintf(buf, PAGE_SIZE, "%d\n",
-                       vport->fc_map_cnt + vport->fc_unmap_cnt);
+                        atomic_read(&vport->fc_map_cnt) +
+                        atomic_read(&vport->fc_unmap_cnt));
 }
 
 /**
@@ -1289,7 +1291,7 @@ lpfc_issue_lip(struct Scsi_Host *shost)
         * If the link is offline, disabled or BLOCK_MGMT_IO
         * it doesn't make any sense to allow issue_lip
         */
-       if ((vport->fc_flag & FC_OFFLINE_MODE) ||
+       if (test_bit(FC_OFFLINE_MODE, &vport->fc_flag) ||
            (phba->hba_flag & LINK_DISABLED) ||
            (phba->sli.sli_flag & LPFC_BLOCK_MGMT_IO))
                return -EPERM;
@@ -1303,8 +1305,8 @@ lpfc_issue_lip(struct Scsi_Host *shost)
        pmboxq->u.mb.mbxCommand = MBX_DOWN_LINK;
        pmboxq->u.mb.mbxOwner = OWN_HOST;
 
-       if ((vport->fc_flag & FC_PT2PT) && (vport->fc_flag & FC_PT2PT_NO_NVME))
-               vport->fc_flag &= ~FC_PT2PT_NO_NVME;
+       if (test_bit(FC_PT2PT, &vport->fc_flag))
+               clear_bit(FC_PT2PT_NO_NVME, &vport->fc_flag);
 
        mbxstatus = lpfc_sli_issue_mbox_wait(phba, pmboxq, LPFC_MBOX_TMO * 2);
 
@@ -1494,7 +1496,8 @@ lpfc_reset_pci_bus(struct lpfc_hba *phba)
                if (shost) {
                        phba_other =
                                ((struct lpfc_vport *)shost->hostdata)->phba;
-                       if (!(phba_other->pport->fc_flag & FC_OFFLINE_MODE)) {
+                       if (!test_bit(FC_OFFLINE_MODE,
+                                     &phba_other->pport->fc_flag)) {
                                lpfc_printf_log(phba_other, KERN_INFO, LOG_INIT,
                                                "8349 WWPN = 0x%02x%02x%02x%02x"
                                                "%02x%02x%02x%02x is not "
@@ -1549,7 +1552,7 @@ lpfc_selective_reset(struct lpfc_hba *phba)
        if (!phba->cfg_enable_hba_reset)
                return -EACCES;
 
-       if (!(phba->pport->fc_flag & FC_OFFLINE_MODE)) {
+       if (!test_bit(FC_OFFLINE_MODE, &phba->pport->fc_flag)) {
                status = lpfc_do_offline(phba, LPFC_EVT_OFFLINE);
 
                if (status != 0)
@@ -1688,7 +1691,7 @@ lpfc_sli4_pdev_reg_request(struct lpfc_hba *phba, uint32_t opcode)
 {
        struct completion online_compl;
        struct pci_dev *pdev = phba->pcidev;
-       uint32_t before_fc_flag;
+       unsigned long before_fc_flag;
        uint32_t sriov_nr_virtfn;
        uint32_t reg_val;
        int status = 0, rc = 0;
@@ -1759,7 +1762,7 @@ lpfc_sli4_pdev_reg_request(struct lpfc_hba *phba, uint32_t opcode)
        }
 
        /* keep the original port state */
-       if (before_fc_flag & FC_OFFLINE_MODE) {
+       if (test_bit(FC_OFFLINE_MODE, &before_fc_flag)) {
                if (phba->fw_dump_cmpl)
                        phba->fw_dump_cmpl = NULL;
                goto out;
@@ -2097,7 +2100,7 @@ board_mode_out:
                        *board_mode_str = '\0';
                lpfc_printf_vlog(vport, KERN_ERR, LOG_INIT,
                                 "3097 Failed \"%s\", status(%d), "
-                                "fc_flag(x%x)\n",
+                                "fc_flag(x%lx)\n",
                                 buf, status, phba->pport->fc_flag);
                return status;
        }
@@ -2156,7 +2159,7 @@ lpfc_get_hba_info(struct lpfc_hba *phba,
        pmb->mbxOwner = OWN_HOST;
        pmboxq->ctx_buf = NULL;
 
-       if (phba->pport->fc_flag & FC_OFFLINE_MODE)
+       if (test_bit(FC_OFFLINE_MODE, &phba->pport->fc_flag))
                rc = MBX_NOT_FINISHED;
        else
                rc = lpfc_sli_issue_mbox_wait(phba, pmboxq, phba->fc_ratov * 2);
@@ -3764,15 +3767,14 @@ lpfc_nodev_tmo_init(struct lpfc_vport *vport, int val)
 static void
 lpfc_update_rport_devloss_tmo(struct lpfc_vport *vport)
 {
-       struct Scsi_Host  *shost;
        struct lpfc_nodelist  *ndlp;
+       unsigned long iflags;
 #if (IS_ENABLED(CONFIG_NVME_FC))
        struct lpfc_nvme_rport *rport;
        struct nvme_fc_remote_port *remoteport = NULL;
 #endif
 
-       shost = lpfc_shost_from_vport(vport);
-       spin_lock_irq(shost->host_lock);
+       spin_lock_irqsave(&vport->fc_nodes_list_lock, iflags);
        list_for_each_entry(ndlp, &vport->fc_nodes, nlp_listp) {
                if (ndlp->rport)
                        ndlp->rport->dev_loss_tmo = vport->cfg_devloss_tmo;
@@ -3787,7 +3789,7 @@ lpfc_update_rport_devloss_tmo(struct lpfc_vport *vport)
                                                       vport->cfg_devloss_tmo);
 #endif
        }
-       spin_unlock_irq(shost->host_lock);
+       spin_unlock_irqrestore(&vport->fc_nodes_list_lock, iflags);
 }
 
 /**
@@ -3973,8 +3975,8 @@ lpfc_vport_param_init(tgt_queue_depth, LPFC_MAX_TGT_QDEPTH,
 static int
 lpfc_tgt_queue_depth_set(struct lpfc_vport *vport, uint val)
 {
-       struct Scsi_Host *shost = lpfc_shost_from_vport(vport);
        struct lpfc_nodelist *ndlp;
+       unsigned long iflags;
 
        if (!lpfc_rangecheck(val, LPFC_MIN_TGT_QDEPTH, LPFC_MAX_TGT_QDEPTH))
                return -EINVAL;
@@ -3982,14 +3984,13 @@ lpfc_tgt_queue_depth_set(struct lpfc_vport *vport, uint val)
        if (val == vport->cfg_tgt_queue_depth)
                return 0;
 
-       spin_lock_irq(shost->host_lock);
        vport->cfg_tgt_queue_depth = val;
 
        /* Next loop thru nodelist and change cmd_qdepth */
+       spin_lock_irqsave(&vport->fc_nodes_list_lock, iflags);
        list_for_each_entry(ndlp, &vport->fc_nodes, nlp_listp)
                ndlp->cmd_qdepth = vport->cfg_tgt_queue_depth;
-
-       spin_unlock_irq(shost->host_lock);
+       spin_unlock_irqrestore(&vport->fc_nodes_list_lock, iflags);
        return 0;
 }
 
@@ -5235,8 +5236,8 @@ lpfc_vport_param_show(max_scsicmpl_time);
 static int
 lpfc_max_scsicmpl_time_set(struct lpfc_vport *vport, int val)
 {
-       struct Scsi_Host *shost = lpfc_shost_from_vport(vport);
        struct lpfc_nodelist *ndlp, *next_ndlp;
+       unsigned long iflags;
 
        if (val == vport->cfg_max_scsicmpl_time)
                return 0;
@@ -5244,13 +5245,13 @@ lpfc_max_scsicmpl_time_set(struct lpfc_vport *vport, int val)
                return -EINVAL;
        vport->cfg_max_scsicmpl_time = val;
 
-       spin_lock_irq(shost->host_lock);
+       spin_lock_irqsave(&vport->fc_nodes_list_lock, iflags);
        list_for_each_entry_safe(ndlp, next_ndlp, &vport->fc_nodes, nlp_listp) {
                if (ndlp->nlp_state == NLP_STE_UNUSED_NODE)
                        continue;
                ndlp->cmd_qdepth = vport->cfg_tgt_queue_depth;
        }
-       spin_unlock_irq(shost->host_lock);
+       spin_unlock_irqrestore(&vport->fc_nodes_list_lock, iflags);
        return 0;
 }
 lpfc_vport_param_store(max_scsicmpl_time);
@@ -6200,7 +6201,7 @@ sysfs_ctlreg_write(struct file *filp, struct kobject *kobj,
        if (memcmp(buf, LPFC_REG_WRITE_KEY, LPFC_REG_WRITE_KEY_SIZE))
                return -EINVAL;
 
-       if (!(vport->fc_flag & FC_OFFLINE_MODE))
+       if (!test_bit(FC_OFFLINE_MODE, &vport->fc_flag))
                return -EPERM;
 
        spin_lock_irq(&phba->hbalock);
@@ -6429,26 +6430,22 @@ lpfc_get_host_port_type(struct Scsi_Host *shost)
        struct lpfc_vport *vport = (struct lpfc_vport *) shost->hostdata;
        struct lpfc_hba   *phba = vport->phba;
 
-       spin_lock_irq(shost->host_lock);
-
        if (vport->port_type == LPFC_NPIV_PORT) {
                fc_host_port_type(shost) = FC_PORTTYPE_NPIV;
        } else if (lpfc_is_link_up(phba)) {
                if (phba->fc_topology == LPFC_TOPOLOGY_LOOP) {
-                       if (vport->fc_flag & FC_PUBLIC_LOOP)
+                       if (test_bit(FC_PUBLIC_LOOP, &vport->fc_flag))
                                fc_host_port_type(shost) = FC_PORTTYPE_NLPORT;
                        else
                                fc_host_port_type(shost) = FC_PORTTYPE_LPORT;
                } else {
-                       if (vport->fc_flag & FC_FABRIC)
+                       if (test_bit(FC_FABRIC, &vport->fc_flag))
                                fc_host_port_type(shost) = FC_PORTTYPE_NPORT;
                        else
                                fc_host_port_type(shost) = FC_PORTTYPE_PTP;
                }
        } else
                fc_host_port_type(shost) = FC_PORTTYPE_UNKNOWN;
-
-       spin_unlock_irq(shost->host_lock);
 }
 
 /**
@@ -6461,9 +6458,7 @@ lpfc_get_host_port_state(struct Scsi_Host *shost)
        struct lpfc_vport *vport = (struct lpfc_vport *) shost->hostdata;
        struct lpfc_hba   *phba = vport->phba;
 
-       spin_lock_irq(shost->host_lock);
-
-       if (vport->fc_flag & FC_OFFLINE_MODE)
+       if (test_bit(FC_OFFLINE_MODE, &vport->fc_flag))
                fc_host_port_state(shost) = FC_PORTSTATE_OFFLINE;
        else {
                switch (phba->link_state) {
@@ -6490,8 +6485,6 @@ lpfc_get_host_port_state(struct Scsi_Host *shost)
                        break;
                }
        }
-
-       spin_unlock_irq(shost->host_lock);
 }
 
 /**
@@ -6504,8 +6497,6 @@ lpfc_get_host_speed(struct Scsi_Host *shost)
        struct lpfc_vport *vport = (struct lpfc_vport *) shost->hostdata;
        struct lpfc_hba   *phba = vport->phba;
 
-       spin_lock_irq(shost->host_lock);
-
        if ((lpfc_is_link_up(phba)) && (!(phba->hba_flag & HBA_FCOE_MODE))) {
                switch(phba->fc_linkspeed) {
                case LPFC_LINK_SPEED_1GHZ:
@@ -6568,8 +6559,6 @@ lpfc_get_host_speed(struct Scsi_Host *shost)
                }
        } else
                fc_host_speed(shost) = FC_PORTSPEED_UNKNOWN;
-
-       spin_unlock_irq(shost->host_lock);
 }
 
 /**
@@ -6583,19 +6572,15 @@ lpfc_get_host_fabric_name (struct Scsi_Host *shost)
        struct lpfc_hba   *phba = vport->phba;
        u64 node_name;
 
-       spin_lock_irq(shost->host_lock);
-
-       if ((vport->port_state > LPFC_FLOGI) &&
-           ((vport->fc_flag & FC_FABRIC) ||
-            ((phba->fc_topology == LPFC_TOPOLOGY_LOOP) &&
-             (vport->fc_flag & FC_PUBLIC_LOOP))))
+       if (vport->port_state > LPFC_FLOGI &&
+           (test_bit(FC_FABRIC, &vport->fc_flag) ||
+            (phba->fc_topology == LPFC_TOPOLOGY_LOOP &&
+             test_bit(FC_PUBLIC_LOOP, &vport->fc_flag))))
                node_name = wwn_to_u64(phba->fc_fabparam.nodeName.u.wwn);
        else
                /* fabric is local port if there is no F/FL_Port */
                node_name = 0;
 
-       spin_unlock_irq(shost->host_lock);
-
        fc_host_fabric_name(shost) = node_name;
 }
 
@@ -6646,7 +6631,7 @@ lpfc_get_stats(struct Scsi_Host *shost)
        pmboxq->ctx_buf = NULL;
        pmboxq->vport = vport;
 
-       if (vport->fc_flag & FC_OFFLINE_MODE) {
+       if (test_bit(FC_OFFLINE_MODE, &vport->fc_flag)) {
                rc = lpfc_sli_issue_mbox(phba, pmboxq, MBX_POLL);
                if (rc != MBX_SUCCESS) {
                        mempool_free(pmboxq, phba->mbox_mem_pool);
@@ -6699,7 +6684,7 @@ lpfc_get_stats(struct Scsi_Host *shost)
        pmboxq->ctx_buf = NULL;
        pmboxq->vport = vport;
 
-       if (vport->fc_flag & FC_OFFLINE_MODE) {
+       if (test_bit(FC_OFFLINE_MODE, &vport->fc_flag)) {
                rc = lpfc_sli_issue_mbox(phba, pmboxq, MBX_POLL);
                if (rc != MBX_SUCCESS) {
                        mempool_free(pmboxq, phba->mbox_mem_pool);
@@ -6786,8 +6771,8 @@ lpfc_reset_stats(struct Scsi_Host *shost)
        pmboxq->ctx_buf = NULL;
        pmboxq->vport = vport;
 
-       if ((vport->fc_flag & FC_OFFLINE_MODE) ||
-               (!(psli->sli_flag & LPFC_SLI_ACTIVE))) {
+       if (test_bit(FC_OFFLINE_MODE, &vport->fc_flag) ||
+           !(psli->sli_flag & LPFC_SLI_ACTIVE)) {
                rc = lpfc_sli_issue_mbox(phba, pmboxq, MBX_POLL);
                if (rc != MBX_SUCCESS) {
                        mempool_free(pmboxq, phba->mbox_mem_pool);
@@ -6808,8 +6793,8 @@ lpfc_reset_stats(struct Scsi_Host *shost)
        pmboxq->ctx_buf = NULL;
        pmboxq->vport = vport;
 
-       if ((vport->fc_flag & FC_OFFLINE_MODE) ||
-           (!(psli->sli_flag & LPFC_SLI_ACTIVE))) {
+       if (test_bit(FC_OFFLINE_MODE, &vport->fc_flag) ||
+           !(psli->sli_flag & LPFC_SLI_ACTIVE)) {
                rc = lpfc_sli_issue_mbox(phba, pmboxq, MBX_POLL);
                if (rc != MBX_SUCCESS) {
                        mempool_free(pmboxq, phba->mbox_mem_pool);
@@ -6868,17 +6853,19 @@ lpfc_get_node_by_target(struct scsi_target *starget)
        struct Scsi_Host  *shost = dev_to_shost(starget->dev.parent);
        struct lpfc_vport *vport = (struct lpfc_vport *) shost->hostdata;
        struct lpfc_nodelist *ndlp;
+       unsigned long iflags;
 
-       spin_lock_irq(shost->host_lock);
+       spin_lock_irqsave(&vport->fc_nodes_list_lock, iflags);
        /* Search for this, mapped, target ID */
        list_for_each_entry(ndlp, &vport->fc_nodes, nlp_listp) {
                if (ndlp->nlp_state == NLP_STE_MAPPED_NODE &&
                    starget->id == ndlp->nlp_sid) {
-                       spin_unlock_irq(shost->host_lock);
+                       spin_unlock_irqrestore(&vport->fc_nodes_list_lock,
+                                              iflags);
                        return ndlp;
                }
        }
-       spin_unlock_irq(shost->host_lock);
+       spin_unlock_irqrestore(&vport->fc_nodes_list_lock, iflags);
        return NULL;
 }
 
index 595dca92e8db5ad76dffddac8181fd561408372b..d80e6e81053b0a9d61aec5574d2d5d3a2d241e64 100644 (file)
@@ -1,7 +1,7 @@
 /*******************************************************************
  * This file is part of the Emulex Linux Device Driver for         *
  * Fibre Channel Host Bus Adapters.                                *
- * Copyright (C) 2017-2023 Broadcom. All Rights Reserved. The term *
+ * Copyright (C) 2017-2024 Broadcom. All Rights Reserved. The term *
  * “Broadcom” refers to Broadcom Inc. and/or its subsidiaries.     *
  * Copyright (C) 2009-2015 Emulex.  All rights reserved.           *
  * EMULEX and SLI are trademarks of Emulex.                        *
@@ -1977,7 +1977,7 @@ lpfc_sli4_bsg_set_loopback_mode(struct lpfc_hba *phba, int mode,
 static int
 lpfc_sli4_diag_fcport_reg_setup(struct lpfc_hba *phba)
 {
-       if (phba->pport->fc_flag & FC_VFI_REGISTERED) {
+       if (test_bit(FC_VFI_REGISTERED, &phba->pport->fc_flag)) {
                lpfc_printf_log(phba, KERN_WARNING, LOG_LIBDFC,
                                "3136 Port still had vfi registered: "
                                "mydid:x%x, fcfi:%d, vfi:%d, vpi:%d\n",
@@ -3448,7 +3448,7 @@ static int lpfc_bsg_check_cmd_access(struct lpfc_hba *phba,
        case MBX_RUN_DIAGS:
        case MBX_RESTART:
        case MBX_SET_MASK:
-               if (!(vport->fc_flag & FC_OFFLINE_MODE)) {
+               if (!test_bit(FC_OFFLINE_MODE, &vport->fc_flag)) {
                        lpfc_printf_log(phba, KERN_WARNING, LOG_LIBDFC,
                                "2743 Command 0x%x is illegal in on-line "
                                "state\n",
@@ -4886,7 +4886,7 @@ lpfc_bsg_issue_mbox(struct lpfc_hba *phba, struct bsg_job *job,
        dd_data->context_un.mbox.outExtWLen = mbox_req->outExtWLen;
        job->dd_data = dd_data;
 
-       if ((vport->fc_flag & FC_OFFLINE_MODE) ||
+       if (test_bit(FC_OFFLINE_MODE, &vport->fc_flag) ||
            (!(phba->sli.sli_flag & LPFC_SLI_ACTIVE))) {
                rc = lpfc_sli_issue_mbox(phba, pmboxq, MBX_POLL);
                if (rc != MBX_SUCCESS) {
index baae1f8279e0cbe66128d815974803b56783f09e..8cc08e58dc05e82f7d9a03f9f02b81279b8a03f1 100644 (file)
@@ -1,7 +1,7 @@
 /*******************************************************************
  * This file is part of the Emulex Linux Device Driver for         *
  * Fibre Channel Host Bus Adapters.                                *
- * Copyright (C) 2017-2023 Broadcom. All Rights Reserved. The term *
+ * Copyright (C) 2017-2024 Broadcom. All Rights Reserved. The term *
  * “Broadcom” refers to Broadcom Inc. and/or its subsidiaries.     *
  * Copyright (C) 2004-2016 Emulex.  All rights reserved.           *
  * EMULEX and SLI are trademarks of Emulex.                        *
@@ -265,7 +265,7 @@ ct_free_mp:
        kfree(mp);
 ct_exit:
        lpfc_printf_vlog(vport, KERN_ERR, LOG_ELS,
-                        "6440 Unsol CT: Rsp err %d Data: x%x\n",
+                        "6440 Unsol CT: Rsp err %d Data: x%lx\n",
                         rc, vport->fc_flag);
 }
 
@@ -298,7 +298,7 @@ lpfc_ct_handle_mibreq(struct lpfc_hba *phba, struct lpfc_iocbq *ctiocbq)
        }
 
        /* Ignore traffic received during vport shutdown */
-       if (vport->fc_flag & FC_UNLOADING)
+       if (test_bit(FC_UNLOADING, &vport->load_flag))
                return;
 
        ndlp = lpfc_findnode_did(vport, did);
@@ -723,7 +723,7 @@ lpfc_prep_node_fc4type(struct lpfc_vport *vport, uint32_t Did, uint8_t fc4_type)
 
                if (ndlp) {
                        lpfc_debugfs_disc_trc(vport, LPFC_DISC_TRC_CT,
-                               "Parse GID_FTrsp: did:x%x flg:x%x x%x",
+                               "Parse GID_FTrsp: did:x%x flg:x%lx x%x",
                                Did, ndlp->nlp_flag, vport->fc_flag);
 
                        /* By default, the driver expects to support FCP FC4 */
@@ -735,7 +735,7 @@ lpfc_prep_node_fc4type(struct lpfc_vport *vport, uint32_t Did, uint8_t fc4_type)
 
                        lpfc_printf_vlog(vport, KERN_INFO, LOG_DISCOVERY,
                                         "0238 Process x%06x NameServer Rsp "
-                                        "Data: x%x x%x x%x x%x x%x\n", Did,
+                                        "Data: x%x x%x x%x x%lx x%x\n", Did,
                                         ndlp->nlp_flag, ndlp->nlp_fc4_type,
                                         ndlp->nlp_state, vport->fc_flag,
                                         vport->fc_rscn_id_cnt);
@@ -751,20 +751,20 @@ lpfc_prep_node_fc4type(struct lpfc_vport *vport, uint32_t Did, uint8_t fc4_type)
                        }
                } else {
                        lpfc_debugfs_disc_trc(vport, LPFC_DISC_TRC_CT,
-                               "Skip1 GID_FTrsp: did:x%x flg:x%x cnt:%d",
+                               "Skip1 GID_FTrsp: did:x%x flg:x%lx cnt:%d",
                                Did, vport->fc_flag, vport->fc_rscn_id_cnt);
 
                        lpfc_printf_vlog(vport, KERN_INFO, LOG_DISCOVERY,
                                         "0239 Skip x%06x NameServer Rsp "
-                                        "Data: x%x x%x x%px\n",
+                                        "Data: x%lx x%x x%px\n",
                                         Did, vport->fc_flag,
                                         vport->fc_rscn_id_cnt, ndlp);
                }
        } else {
-               if (!(vport->fc_flag & FC_RSCN_MODE) ||
+               if (!test_bit(FC_RSCN_MODE, &vport->fc_flag) ||
                    lpfc_rscn_payload_check(vport, Did)) {
                        lpfc_debugfs_disc_trc(vport, LPFC_DISC_TRC_CT,
-                               "Query GID_FTrsp: did:x%x flg:x%x cnt:%d",
+                               "Query GID_FTrsp: did:x%x flg:x%lx cnt:%d",
                                Did, vport->fc_flag, vport->fc_rscn_id_cnt);
 
                        /*
@@ -787,12 +787,12 @@ lpfc_prep_node_fc4type(struct lpfc_vport *vport, uint32_t Did, uint8_t fc4_type)
                                lpfc_setup_disc_node(vport, Did);
                } else {
                        lpfc_debugfs_disc_trc(vport, LPFC_DISC_TRC_CT,
-                               "Skip2 GID_FTrsp: did:x%x flg:x%x cnt:%d",
+                               "Skip2 GID_FTrsp: did:x%x flg:x%lx cnt:%d",
                                Did, vport->fc_flag, vport->fc_rscn_id_cnt);
 
                        lpfc_printf_vlog(vport, KERN_INFO, LOG_DISCOVERY,
                                         "0245 Skip x%06x NameServer Rsp "
-                                        "Data: x%x x%x\n", Did,
+                                        "Data: x%lx x%x\n", Did,
                                         vport->fc_flag,
                                         vport->fc_rscn_id_cnt);
                }
@@ -914,7 +914,6 @@ lpfc_cmpl_ct_cmd_gid_ft(struct lpfc_hba *phba, struct lpfc_iocbq *cmdiocb,
                        struct lpfc_iocbq *rspiocb)
 {
        struct lpfc_vport *vport = cmdiocb->vport;
-       struct Scsi_Host *shost = lpfc_shost_from_vport(vport);
        struct lpfc_dmabuf *outp;
        struct lpfc_dmabuf *inp;
        struct lpfc_sli_ct_request *CTrsp;
@@ -943,9 +942,9 @@ lpfc_cmpl_ct_cmd_gid_ft(struct lpfc_hba *phba, struct lpfc_iocbq *cmdiocb,
                goto out;
        }
 
-       /* Don't bother processing response if vport is being torn down. */
-       if (vport->load_flag & FC_UNLOADING) {
-               if (vport->fc_flag & FC_RSCN_MODE)
+       /* Skip processing response on pport if unloading */
+       if (vport == phba->pport && test_bit(FC_UNLOADING, &vport->load_flag)) {
+               if (test_bit(FC_RSCN_MODE, &vport->fc_flag))
                        lpfc_els_flush_rscn(vport);
                goto out;
        }
@@ -953,7 +952,7 @@ lpfc_cmpl_ct_cmd_gid_ft(struct lpfc_hba *phba, struct lpfc_iocbq *cmdiocb,
        if (lpfc_els_chk_latt(vport)) {
                lpfc_printf_vlog(vport, KERN_INFO, LOG_DISCOVERY,
                                 "0216 Link event during NS query\n");
-               if (vport->fc_flag & FC_RSCN_MODE)
+               if (test_bit(FC_RSCN_MODE, &vport->fc_flag))
                        lpfc_els_flush_rscn(vport);
                lpfc_vport_set_state(vport, FC_VPORT_FAILED);
                goto out;
@@ -961,22 +960,18 @@ lpfc_cmpl_ct_cmd_gid_ft(struct lpfc_hba *phba, struct lpfc_iocbq *cmdiocb,
        if (lpfc_error_lost_link(vport, ulp_status, ulp_word4)) {
                lpfc_printf_vlog(vport, KERN_INFO, LOG_DISCOVERY,
                                 "0226 NS query failed due to link event: "
-                                "ulp_status x%x ulp_word4 x%x fc_flag x%x "
+                                "ulp_status x%x ulp_word4 x%x fc_flag x%lx "
                                 "port_state x%x gidft_inp x%x\n",
                                 ulp_status, ulp_word4, vport->fc_flag,
                                 vport->port_state, vport->gidft_inp);
-               if (vport->fc_flag & FC_RSCN_MODE)
+               if (test_bit(FC_RSCN_MODE, &vport->fc_flag))
                        lpfc_els_flush_rscn(vport);
                if (vport->gidft_inp)
                        vport->gidft_inp--;
                goto out;
        }
 
-       spin_lock_irq(shost->host_lock);
-       if (vport->fc_flag & FC_RSCN_DEFERRED) {
-               vport->fc_flag &= ~FC_RSCN_DEFERRED;
-               spin_unlock_irq(shost->host_lock);
-
+       if (test_and_clear_bit(FC_RSCN_DEFERRED, &vport->fc_flag)) {
                /* This is a GID_FT completing so the gidft_inp counter was
                 * incremented before the GID_FT was issued to the wire.
                 */
@@ -988,13 +983,12 @@ lpfc_cmpl_ct_cmd_gid_ft(struct lpfc_hba *phba, struct lpfc_iocbq *cmdiocb,
                 * Re-issue the NS cmd
                 */
                lpfc_printf_vlog(vport, KERN_INFO, LOG_ELS,
-                                "0151 Process Deferred RSCN Data: x%x x%x\n",
+                                "0151 Process Deferred RSCN Data: x%lx x%x\n",
                                 vport->fc_flag, vport->fc_rscn_id_cnt);
                lpfc_els_handle_rscn(vport);
 
                goto out;
        }
-       spin_unlock_irq(shost->host_lock);
 
        if (ulp_status) {
                /* Check for retry */
@@ -1018,7 +1012,7 @@ lpfc_cmpl_ct_cmd_gid_ft(struct lpfc_hba *phba, struct lpfc_iocbq *cmdiocb,
                                        vport->gidft_inp--;
                        }
                }
-               if (vport->fc_flag & FC_RSCN_MODE)
+               if (test_bit(FC_RSCN_MODE, &vport->fc_flag))
                        lpfc_els_flush_rscn(vport);
                lpfc_vport_set_state(vport, FC_VPORT_FAILED);
                lpfc_printf_vlog(vport, KERN_ERR, LOG_TRACE_EVENT,
@@ -1031,7 +1025,7 @@ lpfc_cmpl_ct_cmd_gid_ft(struct lpfc_hba *phba, struct lpfc_iocbq *cmdiocb,
                if (CTrsp->CommandResponse.bits.CmdRsp ==
                    cpu_to_be16(SLI_CT_RESPONSE_FS_ACC)) {
                        lpfc_printf_vlog(vport, KERN_INFO, LOG_DISCOVERY,
-                                        "0208 NameServer Rsp Data: x%x x%x "
+                                        "0208 NameServer Rsp Data: x%lx x%x "
                                         "x%x x%x sz x%x\n",
                                         vport->fc_flag,
                                         CTreq->un.gid.Fc4Type,
@@ -1051,7 +1045,7 @@ lpfc_cmpl_ct_cmd_gid_ft(struct lpfc_hba *phba, struct lpfc_iocbq *cmdiocb,
                                lpfc_printf_vlog(vport, KERN_INFO,
                                        LOG_DISCOVERY,
                                        "0269 No NameServer Entries "
-                                       "Data: x%x x%x x%x x%x\n",
+                                       "Data: x%x x%x x%x x%lx\n",
                                        be16_to_cpu(CTrsp->CommandResponse.bits.CmdRsp),
                                        (uint32_t) CTrsp->ReasonCode,
                                        (uint32_t) CTrsp->Explanation,
@@ -1066,7 +1060,7 @@ lpfc_cmpl_ct_cmd_gid_ft(struct lpfc_hba *phba, struct lpfc_iocbq *cmdiocb,
                                lpfc_printf_vlog(vport, KERN_INFO,
                                        LOG_DISCOVERY,
                                        "0240 NameServer Rsp Error "
-                                       "Data: x%x x%x x%x x%x\n",
+                                       "Data: x%x x%x x%x x%lx\n",
                                        be16_to_cpu(CTrsp->CommandResponse.bits.CmdRsp),
                                        (uint32_t) CTrsp->ReasonCode,
                                        (uint32_t) CTrsp->Explanation,
@@ -1084,7 +1078,7 @@ lpfc_cmpl_ct_cmd_gid_ft(struct lpfc_hba *phba, struct lpfc_iocbq *cmdiocb,
                        /* NameServer Rsp Error */
                        lpfc_printf_vlog(vport, KERN_ERR, LOG_TRACE_EVENT,
                                        "0241 NameServer Rsp Error "
-                                       "Data: x%x x%x x%x x%x\n",
+                                       "Data: x%x x%x x%x x%lx\n",
                                        be16_to_cpu(CTrsp->CommandResponse.bits.CmdRsp),
                                        (uint32_t) CTrsp->ReasonCode,
                                        (uint32_t) CTrsp->Explanation,
@@ -1113,14 +1107,13 @@ lpfc_cmpl_ct_cmd_gid_ft(struct lpfc_hba *phba, struct lpfc_iocbq *cmdiocb,
                 * current driver state.
                 */
                if (vport->port_state >= LPFC_DISC_AUTH) {
-                       if (vport->fc_flag & FC_RSCN_MODE) {
+                       if (test_bit(FC_RSCN_MODE, &vport->fc_flag)) {
                                lpfc_els_flush_rscn(vport);
-                               spin_lock_irq(shost->host_lock);
-                               vport->fc_flag |= FC_RSCN_MODE; /* RSCN still */
-                               spin_unlock_irq(shost->host_lock);
-                       }
-                       else
+                               /* RSCN still */
+                               set_bit(FC_RSCN_MODE, &vport->fc_flag);
+                       } else {
                                lpfc_els_flush_rscn(vport);
+                       }
                }
 
                lpfc_disc_start(vport);
@@ -1136,7 +1129,6 @@ lpfc_cmpl_ct_cmd_gid_pt(struct lpfc_hba *phba, struct lpfc_iocbq *cmdiocb,
                        struct lpfc_iocbq *rspiocb)
 {
        struct lpfc_vport *vport = cmdiocb->vport;
-       struct Scsi_Host *shost = lpfc_shost_from_vport(vport);
        struct lpfc_dmabuf *outp;
        struct lpfc_dmabuf *inp;
        struct lpfc_sli_ct_request *CTrsp;
@@ -1166,9 +1158,9 @@ lpfc_cmpl_ct_cmd_gid_pt(struct lpfc_hba *phba, struct lpfc_iocbq *cmdiocb,
                goto out;
        }
 
-       /* Don't bother processing response if vport is being torn down. */
-       if (vport->load_flag & FC_UNLOADING) {
-               if (vport->fc_flag & FC_RSCN_MODE)
+       /* Skip processing response on pport if unloading */
+       if (vport == phba->pport && test_bit(FC_UNLOADING, &vport->load_flag)) {
+               if (test_bit(FC_RSCN_MODE, &vport->fc_flag))
                        lpfc_els_flush_rscn(vport);
                goto out;
        }
@@ -1176,7 +1168,7 @@ lpfc_cmpl_ct_cmd_gid_pt(struct lpfc_hba *phba, struct lpfc_iocbq *cmdiocb,
        if (lpfc_els_chk_latt(vport)) {
                lpfc_printf_vlog(vport, KERN_INFO, LOG_DISCOVERY,
                                 "4108 Link event during NS query\n");
-               if (vport->fc_flag & FC_RSCN_MODE)
+               if (test_bit(FC_RSCN_MODE, &vport->fc_flag))
                        lpfc_els_flush_rscn(vport);
                lpfc_vport_set_state(vport, FC_VPORT_FAILED);
                goto out;
@@ -1184,22 +1176,18 @@ lpfc_cmpl_ct_cmd_gid_pt(struct lpfc_hba *phba, struct lpfc_iocbq *cmdiocb,
        if (lpfc_error_lost_link(vport, ulp_status, ulp_word4)) {
                lpfc_printf_vlog(vport, KERN_INFO, LOG_DISCOVERY,
                                 "4166 NS query failed due to link event: "
-                                "ulp_status x%x ulp_word4 x%x fc_flag x%x "
+                                "ulp_status x%x ulp_word4 x%x fc_flag x%lx "
                                 "port_state x%x gidft_inp x%x\n",
                                 ulp_status, ulp_word4, vport->fc_flag,
                                 vport->port_state, vport->gidft_inp);
-               if (vport->fc_flag & FC_RSCN_MODE)
+               if (test_bit(FC_RSCN_MODE, &vport->fc_flag))
                        lpfc_els_flush_rscn(vport);
                if (vport->gidft_inp)
                        vport->gidft_inp--;
                goto out;
        }
 
-       spin_lock_irq(shost->host_lock);
-       if (vport->fc_flag & FC_RSCN_DEFERRED) {
-               vport->fc_flag &= ~FC_RSCN_DEFERRED;
-               spin_unlock_irq(shost->host_lock);
-
+       if (test_and_clear_bit(FC_RSCN_DEFERRED, &vport->fc_flag)) {
                /* This is a GID_PT completing so the gidft_inp counter was
                 * incremented before the GID_PT was issued to the wire.
                 */
@@ -1211,13 +1199,12 @@ lpfc_cmpl_ct_cmd_gid_pt(struct lpfc_hba *phba, struct lpfc_iocbq *cmdiocb,
                 * Re-issue the NS cmd
                 */
                lpfc_printf_vlog(vport, KERN_INFO, LOG_ELS,
-                                "4167 Process Deferred RSCN Data: x%x x%x\n",
+                                "4167 Process Deferred RSCN Data: x%lx x%x\n",
                                 vport->fc_flag, vport->fc_rscn_id_cnt);
                lpfc_els_handle_rscn(vport);
 
                goto out;
        }
-       spin_unlock_irq(shost->host_lock);
 
        if (ulp_status) {
                /* Check for retry */
@@ -1237,7 +1224,7 @@ lpfc_cmpl_ct_cmd_gid_pt(struct lpfc_hba *phba, struct lpfc_iocbq *cmdiocb,
                                        vport->gidft_inp--;
                        }
                }
-               if (vport->fc_flag & FC_RSCN_MODE)
+               if (test_bit(FC_RSCN_MODE, &vport->fc_flag))
                        lpfc_els_flush_rscn(vport);
                lpfc_vport_set_state(vport, FC_VPORT_FAILED);
                lpfc_printf_vlog(vport, KERN_ERR, LOG_TRACE_EVENT,
@@ -1250,7 +1237,7 @@ lpfc_cmpl_ct_cmd_gid_pt(struct lpfc_hba *phba, struct lpfc_iocbq *cmdiocb,
                if (be16_to_cpu(CTrsp->CommandResponse.bits.CmdRsp) ==
                    SLI_CT_RESPONSE_FS_ACC) {
                        lpfc_printf_vlog(vport, KERN_INFO, LOG_DISCOVERY,
-                                        "4105 NameServer Rsp Data: x%x x%x "
+                                        "4105 NameServer Rsp Data: x%lx x%x "
                                         "x%x x%x sz x%x\n",
                                         vport->fc_flag,
                                         CTreq->un.gid.Fc4Type,
@@ -1270,7 +1257,7 @@ lpfc_cmpl_ct_cmd_gid_pt(struct lpfc_hba *phba, struct lpfc_iocbq *cmdiocb,
                                lpfc_printf_vlog(
                                        vport, KERN_INFO, LOG_DISCOVERY,
                                        "4106 No NameServer Entries "
-                                       "Data: x%x x%x x%x x%x\n",
+                                       "Data: x%x x%x x%x x%lx\n",
                                        be16_to_cpu(CTrsp->CommandResponse.bits.CmdRsp),
                                        (uint32_t)CTrsp->ReasonCode,
                                        (uint32_t)CTrsp->Explanation,
@@ -1286,7 +1273,7 @@ lpfc_cmpl_ct_cmd_gid_pt(struct lpfc_hba *phba, struct lpfc_iocbq *cmdiocb,
                                lpfc_printf_vlog(
                                        vport, KERN_INFO, LOG_DISCOVERY,
                                        "4107 NameServer Rsp Error "
-                                       "Data: x%x x%x x%x x%x\n",
+                                       "Data: x%x x%x x%x x%lx\n",
                                        be16_to_cpu(CTrsp->CommandResponse.bits.CmdRsp),
                                        (uint32_t)CTrsp->ReasonCode,
                                        (uint32_t)CTrsp->Explanation,
@@ -1303,7 +1290,7 @@ lpfc_cmpl_ct_cmd_gid_pt(struct lpfc_hba *phba, struct lpfc_iocbq *cmdiocb,
                        /* NameServer Rsp Error */
                        lpfc_printf_vlog(vport, KERN_ERR, LOG_TRACE_EVENT,
                                         "4109 NameServer Rsp Error "
-                                        "Data: x%x x%x x%x x%x\n",
+                                        "Data: x%x x%x x%x x%lx\n",
                                         be16_to_cpu(CTrsp->CommandResponse.bits.CmdRsp),
                                         (uint32_t)CTrsp->ReasonCode,
                                         (uint32_t)CTrsp->Explanation,
@@ -1333,11 +1320,10 @@ lpfc_cmpl_ct_cmd_gid_pt(struct lpfc_hba *phba, struct lpfc_iocbq *cmdiocb,
                 * current driver state.
                 */
                if (vport->port_state >= LPFC_DISC_AUTH) {
-                       if (vport->fc_flag & FC_RSCN_MODE) {
+                       if (test_bit(FC_RSCN_MODE, &vport->fc_flag)) {
                                lpfc_els_flush_rscn(vport);
-                               spin_lock_irq(shost->host_lock);
-                               vport->fc_flag |= FC_RSCN_MODE; /* RSCN still */
-                               spin_unlock_irq(shost->host_lock);
+                               /* RSCN still */
+                               set_bit(FC_RSCN_MODE, &vport->fc_flag);
                        } else {
                                lpfc_els_flush_rscn(vport);
                        }
@@ -1355,7 +1341,6 @@ lpfc_cmpl_ct_cmd_gff_id(struct lpfc_hba *phba, struct lpfc_iocbq *cmdiocb,
                        struct lpfc_iocbq *rspiocb)
 {
        struct lpfc_vport *vport = cmdiocb->vport;
-       struct Scsi_Host *shost = lpfc_shost_from_vport(vport);
        struct lpfc_dmabuf *inp = cmdiocb->cmd_dmabuf;
        struct lpfc_dmabuf *outp = cmdiocb->rsp_dmabuf;
        struct lpfc_sli_ct_request *CTrsp;
@@ -1445,7 +1430,7 @@ lpfc_cmpl_ct_cmd_gff_id(struct lpfc_hba *phba, struct lpfc_iocbq *cmdiocb,
                }
                lpfc_printf_vlog(vport, KERN_ERR, LOG_TRACE_EVENT,
                                 "0267 NameServer GFF Rsp "
-                                "x%x Error (%d %d) Data: x%x x%x\n",
+                                "x%x Error (%d %d) Data: x%lx x%x\n",
                                 did, ulp_status, ulp_word4,
                                 vport->fc_flag, vport->fc_rscn_id_cnt);
        }
@@ -1455,13 +1440,13 @@ lpfc_cmpl_ct_cmd_gff_id(struct lpfc_hba *phba, struct lpfc_iocbq *cmdiocb,
        if (ndlp) {
                lpfc_printf_vlog(vport, KERN_INFO, LOG_DISCOVERY,
                                 "0242 Process x%x GFF "
-                                "NameServer Rsp Data: x%x x%x x%x\n",
+                                "NameServer Rsp Data: x%x x%lx x%x\n",
                                 did, ndlp->nlp_flag, vport->fc_flag,
                                 vport->fc_rscn_id_cnt);
        } else {
                lpfc_printf_vlog(vport, KERN_INFO, LOG_DISCOVERY,
                                 "0243 Skip x%x GFF "
-                                "NameServer Rsp Data: x%x x%x\n", did,
+                                "NameServer Rsp Data: x%lx x%x\n", did,
                                 vport->fc_flag, vport->fc_rscn_id_cnt);
        }
 out:
@@ -1480,14 +1465,13 @@ out:
                 * current driver state.
                 */
                if (vport->port_state >= LPFC_DISC_AUTH) {
-                       if (vport->fc_flag & FC_RSCN_MODE) {
+                       if (test_bit(FC_RSCN_MODE, &vport->fc_flag)) {
                                lpfc_els_flush_rscn(vport);
-                               spin_lock_irq(shost->host_lock);
-                               vport->fc_flag |= FC_RSCN_MODE; /* RSCN still */
-                               spin_unlock_irq(shost->host_lock);
-                       }
-                       else
+                               /* RSCN still */
+                               set_bit(FC_RSCN_MODE, &vport->fc_flag);
+                       } else {
                                lpfc_els_flush_rscn(vport);
+                       }
                }
                lpfc_disc_start(vport);
        }
@@ -1853,11 +1837,10 @@ static uint32_t
 lpfc_find_map_node(struct lpfc_vport *vport)
 {
        struct lpfc_nodelist *ndlp, *next_ndlp;
-       struct Scsi_Host  *shost;
+       unsigned long iflags;
        uint32_t cnt = 0;
 
-       shost = lpfc_shost_from_vport(vport);
-       spin_lock_irq(shost->host_lock);
+       spin_lock_irqsave(&vport->fc_nodes_list_lock, iflags);
        list_for_each_entry_safe(ndlp, next_ndlp, &vport->fc_nodes, nlp_listp) {
                if (ndlp->nlp_type & NLP_FABRIC)
                        continue;
@@ -1865,7 +1848,7 @@ lpfc_find_map_node(struct lpfc_vport *vport)
                    (ndlp->nlp_state == NLP_STE_UNMAPPED_NODE))
                        cnt++;
        }
-       spin_unlock_irq(shost->host_lock);
+       spin_unlock_irqrestore(&vport->fc_nodes_list_lock, iflags);
        return cnt;
 }
 
@@ -1950,7 +1933,7 @@ lpfc_ns_cmd(struct lpfc_vport *vport, int cmdcode,
 
        /* NameServer Req */
        lpfc_printf_vlog(vport, KERN_INFO ,LOG_DISCOVERY,
-                        "0236 NameServer Req Data: x%x x%x x%x x%x\n",
+                        "0236 NameServer Req Data: x%x x%lx x%x x%x\n",
                         cmdcode, vport->fc_flag, vport->fc_rscn_id_cnt,
                         context);
 
@@ -2167,7 +2150,8 @@ ns_cmd_free_mp:
        kfree(mp);
 ns_cmd_exit:
        lpfc_printf_vlog(vport, KERN_ERR, LOG_TRACE_EVENT,
-                        "0266 Issue NameServer Req x%x err %d Data: x%x x%x\n",
+                        "0266 Issue NameServer Req x%x err %d Data: x%lx "
+                        "x%x\n",
                         cmdcode, rc, vport->fc_flag, vport->fc_rscn_id_cnt);
        return 1;
 }
@@ -2453,7 +2437,7 @@ lpfc_fdmi_change_check(struct lpfc_vport *vport)
                return;
 
        /* Must be connected to a Fabric */
-       if (!(vport->fc_flag & FC_FABRIC))
+       if (!test_bit(FC_FABRIC, &vport->fc_flag))
                return;
 
        ndlp = lpfc_findnode_did(vport, FDMI_DID);
@@ -2569,9 +2553,9 @@ lpfc_fdmi_set_attr_string(void *attr, uint16_t attrtype, char *attrstring)
         * 64 bytes or less.
         */
 
-       strncpy(ae->value_string, attrstring, sizeof(ae->value_string));
+       strscpy(ae->value_string, attrstring, sizeof(ae->value_string));
        len = strnlen(ae->value_string, sizeof(ae->value_string));
-       /* round string length to a 32bit boundary. Ensure there's a NULL */
+       /* round string length to a 32bit boundary */
        len += (len & 3) ? (4 - (len & 3)) : 4;
        /* size is Type/Len (4 bytes) plus string length */
        size = FOURBYTES + len;
@@ -3233,7 +3217,7 @@ lpfc_fdmi_cmd(struct lpfc_vport *vport, struct lpfc_nodelist *ndlp,
 
        /* FDMI request */
        lpfc_printf_vlog(vport, KERN_INFO, LOG_DISCOVERY,
-                        "0218 FDMI Request x%x mask x%x Data: x%x x%x x%x\n",
+                        "0218 FDMI Request x%x mask x%x Data: x%x x%lx x%x\n",
                         cmdcode, new_mask, vport->fdmi_port_mask,
                         vport->fc_flag, vport->port_state);
 
@@ -3470,15 +3454,8 @@ lpfc_delayed_disc_tmo(struct timer_list *t)
 void
 lpfc_delayed_disc_timeout_handler(struct lpfc_vport *vport)
 {
-       struct Scsi_Host *shost = lpfc_shost_from_vport(vport);
-
-       spin_lock_irq(shost->host_lock);
-       if (!(vport->fc_flag & FC_DISC_DELAYED)) {
-               spin_unlock_irq(shost->host_lock);
+       if (!test_and_clear_bit(FC_DISC_DELAYED, &vport->fc_flag))
                return;
-       }
-       vport->fc_flag &= ~FC_DISC_DELAYED;
-       spin_unlock_irq(shost->host_lock);
 
        lpfc_do_scr_ns_plogi(vport->phba, vport);
 }
@@ -3606,7 +3583,8 @@ lpfc_cmpl_ct_cmd_vmid(struct lpfc_hba *phba, struct lpfc_iocbq *cmdiocb,
                    (ctrsp->Explanation != SLI_CT_APP_ID_NOT_AVAILABLE)) {
                        /* If DALLAPP_ID failed retry later */
                        if (cmd == SLI_CTAS_DALLAPP_ID)
-                               vport->load_flag |= FC_DEREGISTER_ALL_APP_ID;
+                               set_bit(FC_DEREGISTER_ALL_APP_ID,
+                                       &vport->load_flag);
                        goto free_res;
                }
        }
@@ -3662,7 +3640,7 @@ lpfc_cmpl_ct_cmd_vmid(struct lpfc_hba *phba, struct lpfc_iocbq *cmdiocb,
                if (!hash_empty(vport->hash_table))
                        hash_for_each(vport->hash_table, bucket, cur, hnode)
                                hash_del(&cur->hnode);
-               vport->load_flag |= FC_ALLOW_VMID;
+               set_bit(FC_ALLOW_VMID, &vport->load_flag);
                break;
        default:
                lpfc_printf_vlog(vport, KERN_DEBUG, LOG_DISCOVERY,
@@ -3729,7 +3707,7 @@ lpfc_vmid_cmd(struct lpfc_vport *vport,
        INIT_LIST_HEAD(&bmp->list);
 
        lpfc_printf_vlog(vport, KERN_INFO, LOG_DISCOVERY,
-                        "3275 VMID Request Data: x%x x%x x%x\n",
+                        "3275 VMID Request Data: x%lx x%x x%x\n",
                         vport->fc_flag, vport->port_state, cmdcode);
        ctreq = (struct lpfc_sli_ct_request *)mp->virt;
        data = mp->virt;
index ea9b42225e629dd5840e29c8bce622aef7736444..ab5af10c8a16ca597e1fbde60a3c4562981ea03c 100644 (file)
@@ -1,7 +1,7 @@
 /*******************************************************************
  * This file is part of the Emulex Linux Device Driver for         *
  * Fibre Channel Host Bus Adapters.                                *
- * Copyright (C) 2017-2023 Broadcom. All Rights Reserved. The term *
+ * Copyright (C) 2017-2024 Broadcom. All Rights Reserved. The term *
  * “Broadcom” refers to Broadcom Inc. and/or its subsidiaries.  *
  * Copyright (C) 2007-2015 Emulex.  All rights reserved.           *
  * EMULEX and SLI are trademarks of Emulex.                        *
@@ -806,10 +806,10 @@ lpfc_debugfs_nodelist_data(struct lpfc_vport *vport, char *buf, int size)
 {
        int len = 0;
        int i, iocnt, outio, cnt;
-       struct Scsi_Host *shost = lpfc_shost_from_vport(vport);
        struct lpfc_hba  *phba = vport->phba;
        struct lpfc_nodelist *ndlp;
        unsigned char *statep;
+       unsigned long iflags;
        struct nvme_fc_local_port *localport;
        struct nvme_fc_remote_port *nrport = NULL;
        struct lpfc_nvme_rport *rport;
@@ -818,7 +818,7 @@ lpfc_debugfs_nodelist_data(struct lpfc_vport *vport, char *buf, int size)
        outio = 0;
 
        len += scnprintf(buf+len, size-len, "\nFCP Nodelist Entries ...\n");
-       spin_lock_irq(shost->host_lock);
+       spin_lock_irqsave(&vport->fc_nodes_list_lock, iflags);
        list_for_each_entry(ndlp, &vport->fc_nodes, nlp_listp) {
                iocnt = 0;
                if (!cnt) {
@@ -908,7 +908,7 @@ lpfc_debugfs_nodelist_data(struct lpfc_vport *vport, char *buf, int size)
                                         ndlp->nlp_defer_did);
                len +=  scnprintf(buf+len, size-len, "\n");
        }
-       spin_unlock_irq(shost->host_lock);
+       spin_unlock_irqrestore(&vport->fc_nodes_list_lock, iflags);
 
        len += scnprintf(buf + len, size - len,
                        "\nOutstanding IO x%x\n",  outio);
@@ -940,8 +940,6 @@ lpfc_debugfs_nodelist_data(struct lpfc_vport *vport, char *buf, int size)
        if (!localport)
                goto out_exit;
 
-       spin_lock_irq(shost->host_lock);
-
        /* Port state is only one of two values for now. */
        if (localport->port_id)
                statep = "ONLINE";
@@ -953,6 +951,7 @@ lpfc_debugfs_nodelist_data(struct lpfc_vport *vport, char *buf, int size)
                        localport->port_id, statep);
 
        len += scnprintf(buf + len, size - len, "\tRport List:\n");
+       spin_lock_irqsave(&vport->fc_nodes_list_lock, iflags);
        list_for_each_entry(ndlp, &vport->fc_nodes, nlp_listp) {
                /* local short-hand pointer. */
                spin_lock(&ndlp->lock);
@@ -1006,8 +1005,7 @@ lpfc_debugfs_nodelist_data(struct lpfc_vport *vport, char *buf, int size)
                /* Terminate the string. */
                len +=  scnprintf(buf + len, size - len, "\n");
        }
-
-       spin_unlock_irq(shost->host_lock);
+       spin_unlock_irqrestore(&vport->fc_nodes_list_lock, iflags);
  out_exit:
        return len;
 }
index 4d723200690a4bd7d7329cefacaefd0e0a6ab7d0..28e56542e0720e30d1221c3a24e38e68a5aedd08 100644 (file)
@@ -1,7 +1,7 @@
 /*******************************************************************
  * This file is part of the Emulex Linux Device Driver for         *
  * Fibre Channel Host Bus Adapters.                                *
- * Copyright (C) 2017-2023 Broadcom. All Rights Reserved. The term *
+ * Copyright (C) 2017-2024 Broadcom. All Rights Reserved. The term *
  * “Broadcom” refers to Broadcom Inc. and/or its subsidiaries.     *
  * Copyright (C) 2004-2016 Emulex.  All rights reserved.           *
  * EMULEX and SLI are trademarks of Emulex.                        *
@@ -93,7 +93,6 @@ static void lpfc_vmid_put_cs_ctl(struct lpfc_vport *vport, u32 ctcl_vmid);
 int
 lpfc_els_chk_latt(struct lpfc_vport *vport)
 {
-       struct Scsi_Host *shost = lpfc_shost_from_vport(vport);
        struct lpfc_hba  *phba = vport->phba;
        uint32_t ha_copy;
 
@@ -121,9 +120,7 @@ lpfc_els_chk_latt(struct lpfc_vport *vport)
         * will cleanup any left over in-progress discovery
         * events.
         */
-       spin_lock_irq(shost->host_lock);
-       vport->fc_flag |= FC_ABORT_DISCOVERY;
-       spin_unlock_irq(shost->host_lock);
+       set_bit(FC_ABORT_DISCOVERY, &vport->fc_flag);
 
        if (phba->link_state != LPFC_CLEAR_LA)
                lpfc_issue_clear_la(phba, vport);
@@ -301,7 +298,7 @@ lpfc_prep_els_iocb(struct lpfc_vport *vport, u8 expect_rsp,
                lpfc_printf_vlog(vport, KERN_INFO, LOG_ELS,
                                 "0116 Xmit ELS command x%x to remote "
                                 "NPORT x%x I/O tag: x%x, port state:x%x "
-                                "rpi x%x fc_flag:x%x\n",
+                                "rpi x%x fc_flag:x%lx\n",
                                 elscmd, did, elsiocb->iotag,
                                 vport->port_state, ndlp->nlp_rpi,
                                 vport->fc_flag);
@@ -310,7 +307,7 @@ lpfc_prep_els_iocb(struct lpfc_vport *vport, u8 expect_rsp,
                lpfc_printf_vlog(vport, KERN_INFO, LOG_ELS,
                                 "0117 Xmit ELS response x%x to remote "
                                 "NPORT x%x I/O tag: x%x, size: x%x "
-                                "port_state x%x  rpi x%x fc_flag x%x\n",
+                                "port_state x%x  rpi x%x fc_flag x%lx\n",
                                 elscmd, ndlp->nlp_DID, elsiocb->iotag,
                                 cmd_size, vport->port_state,
                                 ndlp->nlp_rpi, vport->fc_flag);
@@ -452,7 +449,7 @@ lpfc_issue_reg_vfi(struct lpfc_vport *vport)
        /* move forward in case of SLI4 FC port loopback test and pt2pt mode */
        if ((phba->sli_rev == LPFC_SLI_REV4) &&
            !(phba->link_flag & LS_LOOPBACK_MODE) &&
-           !(vport->fc_flag & FC_PT2PT)) {
+           !test_bit(FC_PT2PT, &vport->fc_flag)) {
                ndlp = lpfc_findnode_did(vport, Fabric_DID);
                if (!ndlp) {
                        rc = -ENODEV;
@@ -467,7 +464,8 @@ lpfc_issue_reg_vfi(struct lpfc_vport *vport)
        }
 
        /* Supply CSP's only if we are fabric connect or pt-to-pt connect */
-       if ((vport->fc_flag & FC_FABRIC) || (vport->fc_flag & FC_PT2PT)) {
+       if (test_bit(FC_FABRIC, &vport->fc_flag) ||
+           test_bit(FC_PT2PT, &vport->fc_flag)) {
                rc = lpfc_mbox_rsrc_prep(phba, mboxq);
                if (rc) {
                        rc = -ENOMEM;
@@ -520,7 +518,6 @@ int
 lpfc_issue_unreg_vfi(struct lpfc_vport *vport)
 {
        struct lpfc_hba *phba = vport->phba;
-       struct Scsi_Host *shost;
        LPFC_MBOXQ_t *mboxq;
        int rc;
 
@@ -546,10 +543,7 @@ lpfc_issue_unreg_vfi(struct lpfc_vport *vport)
                return -EIO;
        }
 
-       shost = lpfc_shost_from_vport(vport);
-       spin_lock_irq(shost->host_lock);
-       vport->fc_flag &= ~FC_VFI_REGISTERED;
-       spin_unlock_irq(shost->host_lock);
+       clear_bit(FC_VFI_REGISTERED, &vport->fc_flag);
        return 0;
 }
 
@@ -577,7 +571,6 @@ lpfc_check_clean_addr_bit(struct lpfc_vport *vport,
 {
        struct lpfc_hba *phba = vport->phba;
        uint8_t fabric_param_changed = 0;
-       struct Scsi_Host *shost = lpfc_shost_from_vport(vport);
 
        if ((vport->fc_prevDID != vport->fc_myDID) ||
                memcmp(&vport->fabric_portname, &sp->portName,
@@ -599,11 +592,8 @@ lpfc_check_clean_addr_bit(struct lpfc_vport *vport,
         * - lpfc_delay_discovery module parameter is set.
         */
        if (fabric_param_changed && !sp->cmn.clean_address_bit &&
-           (vport->fc_prevDID || phba->cfg_delay_discovery)) {
-               spin_lock_irq(shost->host_lock);
-               vport->fc_flag |= FC_DISC_DELAYED;
-               spin_unlock_irq(shost->host_lock);
-       }
+           (vport->fc_prevDID || phba->cfg_delay_discovery))
+               set_bit(FC_DISC_DELAYED, &vport->fc_flag);
 
        return fabric_param_changed;
 }
@@ -633,15 +623,12 @@ static int
 lpfc_cmpl_els_flogi_fabric(struct lpfc_vport *vport, struct lpfc_nodelist *ndlp,
                           struct serv_parm *sp, uint32_t ulp_word4)
 {
-       struct Scsi_Host *shost = lpfc_shost_from_vport(vport);
        struct lpfc_hba  *phba = vport->phba;
        struct lpfc_nodelist *np;
        struct lpfc_nodelist *next_np;
        uint8_t fabric_param_changed;
 
-       spin_lock_irq(shost->host_lock);
-       vport->fc_flag |= FC_FABRIC;
-       spin_unlock_irq(shost->host_lock);
+       set_bit(FC_FABRIC, &vport->fc_flag);
 
        phba->fc_edtov = be32_to_cpu(sp->cmn.e_d_tov);
        if (sp->cmn.edtovResolution)    /* E_D_TOV ticks are in nanoseconds */
@@ -650,11 +637,8 @@ lpfc_cmpl_els_flogi_fabric(struct lpfc_vport *vport, struct lpfc_nodelist *ndlp,
        phba->fc_edtovResol = sp->cmn.edtovResolution;
        phba->fc_ratov = (be32_to_cpu(sp->cmn.w2.r_a_tov) + 999) / 1000;
 
-       if (phba->fc_topology == LPFC_TOPOLOGY_LOOP) {
-               spin_lock_irq(shost->host_lock);
-               vport->fc_flag |= FC_PUBLIC_LOOP;
-               spin_unlock_irq(shost->host_lock);
-       }
+       if (phba->fc_topology == LPFC_TOPOLOGY_LOOP)
+               set_bit(FC_PUBLIC_LOOP, &vport->fc_flag);
 
        vport->fc_myDID = ulp_word4 & Mask_DID;
        memcpy(&ndlp->nlp_portname, &sp->portName, sizeof(struct lpfc_name));
@@ -728,12 +712,12 @@ lpfc_cmpl_els_flogi_fabric(struct lpfc_vport *vport, struct lpfc_nodelist *ndlp,
                        lpfc_unregister_fcf_prep(phba);
 
                /* This should just update the VFI CSPs*/
-               if (vport->fc_flag & FC_VFI_REGISTERED)
+               if (test_bit(FC_VFI_REGISTERED, &vport->fc_flag))
                        lpfc_issue_reg_vfi(vport);
        }
 
        if (fabric_param_changed &&
-               !(vport->fc_flag & FC_VPORT_NEEDS_REG_VPI)) {
+               !test_bit(FC_VPORT_NEEDS_REG_VPI, &vport->fc_flag)) {
 
                /* If our NportID changed, we need to ensure all
                 * remaining NPORTs get unreg_login'ed.
@@ -753,20 +737,16 @@ lpfc_cmpl_els_flogi_fabric(struct lpfc_vport *vport, struct lpfc_nodelist *ndlp,
                if (phba->sli_rev == LPFC_SLI_REV4) {
                        lpfc_sli4_unreg_all_rpis(vport);
                        lpfc_mbx_unreg_vpi(vport);
-                       spin_lock_irq(shost->host_lock);
-                       vport->fc_flag |= FC_VPORT_NEEDS_INIT_VPI;
-                       spin_unlock_irq(shost->host_lock);
+                       set_bit(FC_VPORT_NEEDS_INIT_VPI, &vport->fc_flag);
                }
 
                /*
                 * For SLI3 and SLI4, the VPI needs to be reregistered in
                 * response to this fabric parameter change event.
                 */
-               spin_lock_irq(shost->host_lock);
-               vport->fc_flag |= FC_VPORT_NEEDS_REG_VPI;
-               spin_unlock_irq(shost->host_lock);
+               set_bit(FC_VPORT_NEEDS_REG_VPI, &vport->fc_flag);
        } else if ((phba->sli_rev == LPFC_SLI_REV4) &&
-               !(vport->fc_flag & FC_VPORT_NEEDS_REG_VPI)) {
+                  !test_bit(FC_VPORT_NEEDS_REG_VPI, &vport->fc_flag)) {
                        /*
                         * Driver needs to re-reg VPI in order for f/w
                         * to update the MAC address.
@@ -779,18 +759,18 @@ lpfc_cmpl_els_flogi_fabric(struct lpfc_vport *vport, struct lpfc_nodelist *ndlp,
        if (phba->sli_rev < LPFC_SLI_REV4) {
                lpfc_nlp_set_state(vport, ndlp, NLP_STE_REG_LOGIN_ISSUE);
                if (phba->sli3_options & LPFC_SLI3_NPIV_ENABLED &&
-                   vport->fc_flag & FC_VPORT_NEEDS_REG_VPI)
+                   test_bit(FC_VPORT_NEEDS_REG_VPI, &vport->fc_flag))
                        lpfc_register_new_vport(phba, vport, ndlp);
                else
                        lpfc_issue_fabric_reglogin(vport);
        } else {
                ndlp->nlp_type |= NLP_FABRIC;
                lpfc_nlp_set_state(vport, ndlp, NLP_STE_UNMAPPED_NODE);
-               if ((!(vport->fc_flag & FC_VPORT_NEEDS_REG_VPI)) &&
-                       (vport->vpi_state & LPFC_VPI_REGISTERED)) {
+               if ((!test_bit(FC_VPORT_NEEDS_REG_VPI, &vport->fc_flag)) &&
+                   (vport->vpi_state & LPFC_VPI_REGISTERED)) {
                        lpfc_start_fdiscs(phba);
                        lpfc_do_scr_ns_plogi(phba, vport);
-               } else if (vport->fc_flag & FC_VFI_REGISTERED)
+               } else if (test_bit(FC_VFI_REGISTERED, &vport->fc_flag))
                        lpfc_issue_init_vpi(vport);
                else {
                        lpfc_printf_vlog(vport, KERN_INFO, LOG_ELS,
@@ -826,15 +806,13 @@ static int
 lpfc_cmpl_els_flogi_nport(struct lpfc_vport *vport, struct lpfc_nodelist *ndlp,
                          struct serv_parm *sp)
 {
-       struct Scsi_Host *shost = lpfc_shost_from_vport(vport);
        struct lpfc_hba  *phba = vport->phba;
        LPFC_MBOXQ_t *mbox;
        int rc;
 
-       spin_lock_irq(shost->host_lock);
-       vport->fc_flag &= ~(FC_FABRIC | FC_PUBLIC_LOOP);
-       vport->fc_flag |= FC_PT2PT;
-       spin_unlock_irq(shost->host_lock);
+       clear_bit(FC_FABRIC, &vport->fc_flag);
+       clear_bit(FC_PUBLIC_LOOP, &vport->fc_flag);
+       set_bit(FC_PT2PT, &vport->fc_flag);
 
        /* If we are pt2pt with another NPort, force NPIV off! */
        phba->sli3_options &= ~LPFC_SLI3_NPIV_ENABLED;
@@ -842,10 +820,7 @@ lpfc_cmpl_els_flogi_nport(struct lpfc_vport *vport, struct lpfc_nodelist *ndlp,
        /* If physical FC port changed, unreg VFI and ALL VPIs / RPIs */
        if ((phba->sli_rev == LPFC_SLI_REV4) && phba->fc_topology_changed) {
                lpfc_unregister_fcf_prep(phba);
-
-               spin_lock_irq(shost->host_lock);
-               vport->fc_flag &= ~FC_VFI_REGISTERED;
-               spin_unlock_irq(shost->host_lock);
+               clear_bit(FC_VFI_REGISTERED, &vport->fc_flag);
                phba->fc_topology_changed = 0;
        }
 
@@ -854,9 +829,7 @@ lpfc_cmpl_els_flogi_nport(struct lpfc_vport *vport, struct lpfc_nodelist *ndlp,
 
        if (rc >= 0) {
                /* This side will initiate the PLOGI */
-               spin_lock_irq(shost->host_lock);
-               vport->fc_flag |= FC_PT2PT_PLOGI;
-               spin_unlock_irq(shost->host_lock);
+               set_bit(FC_PT2PT_PLOGI, &vport->fc_flag);
 
                /*
                 * N_Port ID cannot be 0, set our Id to LocalID
@@ -953,7 +926,6 @@ lpfc_cmpl_els_flogi(struct lpfc_hba *phba, struct lpfc_iocbq *cmdiocb,
                    struct lpfc_iocbq *rspiocb)
 {
        struct lpfc_vport *vport = cmdiocb->vport;
-       struct Scsi_Host  *shost = lpfc_shost_from_vport(vport);
        struct lpfc_nodelist *ndlp = cmdiocb->ndlp;
        IOCB_t *irsp;
        struct lpfc_dmabuf *pcmd = cmdiocb->cmd_dmabuf, *prsp;
@@ -1069,10 +1041,9 @@ stop_rr_fcf_flogi:
                }
 
                /* FLOGI failed, so there is no fabric */
-               spin_lock_irq(shost->host_lock);
-               vport->fc_flag &= ~(FC_FABRIC | FC_PUBLIC_LOOP |
-                                   FC_PT2PT_NO_NVME);
-               spin_unlock_irq(shost->host_lock);
+               clear_bit(FC_FABRIC, &vport->fc_flag);
+               clear_bit(FC_PUBLIC_LOOP, &vport->fc_flag);
+               clear_bit(FC_PT2PT_NO_NVME, &vport->fc_flag);
 
                /* If private loop, then allow max outstanding els to be
                 * LPFC_MAX_DISC_THREADS (32). Scanning in the case of no
@@ -1081,15 +1052,14 @@ stop_rr_fcf_flogi:
                if (phba->alpa_map[0] == 0)
                        vport->cfg_discovery_threads = LPFC_MAX_DISC_THREADS;
                if ((phba->sli_rev == LPFC_SLI_REV4) &&
-                   (!(vport->fc_flag & FC_VFI_REGISTERED) ||
+                   (!test_bit(FC_VFI_REGISTERED, &vport->fc_flag) ||
                     (vport->fc_prevDID != vport->fc_myDID) ||
                        phba->fc_topology_changed)) {
-                       if (vport->fc_flag & FC_VFI_REGISTERED) {
+                       if (test_bit(FC_VFI_REGISTERED, &vport->fc_flag)) {
                                if (phba->fc_topology_changed) {
                                        lpfc_unregister_fcf_prep(phba);
-                                       spin_lock_irq(shost->host_lock);
-                                       vport->fc_flag &= ~FC_VFI_REGISTERED;
-                                       spin_unlock_irq(shost->host_lock);
+                                       clear_bit(FC_VFI_REGISTERED,
+                                                 &vport->fc_flag);
                                        phba->fc_topology_changed = 0;
                                } else {
                                        lpfc_sli4_unreg_all_rpis(vport);
@@ -1104,10 +1074,8 @@ stop_rr_fcf_flogi:
                }
                goto flogifail;
        }
-       spin_lock_irq(shost->host_lock);
-       vport->fc_flag &= ~FC_VPORT_CVL_RCVD;
-       vport->fc_flag &= ~FC_VPORT_LOGO_RCVD;
-       spin_unlock_irq(shost->host_lock);
+       clear_bit(FC_VPORT_CVL_RCVD, &vport->fc_flag);
+       clear_bit(FC_VPORT_LOGO_RCVD, &vport->fc_flag);
 
        /*
         * The FLOGI succeeded.  Sync the data for the CPU before
@@ -1123,7 +1091,7 @@ stop_rr_fcf_flogi:
        /* FLOGI completes successfully */
        lpfc_printf_vlog(vport, KERN_INFO, LOG_ELS,
                         "0101 FLOGI completes successfully, I/O tag:x%x "
-                        "xri x%x Data: x%x x%x x%x x%x x%x x%x x%x %d\n",
+                        "xri x%x Data: x%x x%x x%x x%x x%x x%lx x%x %d\n",
                         cmdiocb->iotag, cmdiocb->sli4_xritag,
                         ulp_word4, sp->cmn.e_d_tov,
                         sp->cmn.w2.r_a_tov, sp->cmn.edtovResolution,
@@ -1202,7 +1170,7 @@ stop_rr_fcf_flogi:
                        goto out;
                }
        } else if (vport->port_state > LPFC_FLOGI &&
-                  vport->fc_flag & FC_PT2PT) {
+                  test_bit(FC_PT2PT, &vport->fc_flag)) {
                /*
                 * In a p2p topology, it is possible that discovery has
                 * already progressed, and this completion can be ignored.
@@ -1506,8 +1474,9 @@ lpfc_els_abort_flogi(struct lpfc_hba *phba)
                if (ulp_command == CMD_ELS_REQUEST64_CR) {
                        ndlp = iocb->ndlp;
                        if (ndlp && ndlp->nlp_DID == Fabric_DID) {
-                               if ((phba->pport->fc_flag & FC_PT2PT) &&
-                                   !(phba->pport->fc_flag & FC_PT2PT_PLOGI))
+                               if (test_bit(FC_PT2PT, &phba->pport->fc_flag) &&
+                                   !test_bit(FC_PT2PT_PLOGI,
+                                             &phba->pport->fc_flag))
                                        iocb->fabric_cmd_cmpl =
                                                lpfc_ignore_els_cmpl;
                                lpfc_sli_issue_abort_iotag(phba, pring, iocb,
@@ -1562,7 +1531,7 @@ lpfc_initial_flogi(struct lpfc_vport *vport)
        }
 
        /* Reset the Fabric flag, topology change may have happened */
-       vport->fc_flag &= ~FC_FABRIC;
+       clear_bit(FC_FABRIC, &vport->fc_flag);
        if (lpfc_issue_els_flogi(vport, ndlp, 0)) {
                /* A node reference should be retained while registered with a
                 * transport or dev-loss-evt work is pending.
@@ -1645,11 +1614,12 @@ lpfc_more_plogi(struct lpfc_vport *vport)
        /* Continue discovery with <num_disc_nodes> PLOGIs to go */
        lpfc_printf_vlog(vport, KERN_INFO, LOG_DISCOVERY,
                         "0232 Continue discovery with %d PLOGIs to go "
-                        "Data: x%x x%x x%x\n",
-                        vport->num_disc_nodes, vport->fc_plogi_cnt,
+                        "Data: x%x x%lx x%x\n",
+                        vport->num_disc_nodes,
+                        atomic_read(&vport->fc_plogi_cnt),
                         vport->fc_flag, vport->port_state);
        /* Check to see if there are more PLOGIs to be sent */
-       if (vport->fc_flag & FC_NLP_MORE)
+       if (test_bit(FC_NLP_MORE, &vport->fc_flag))
                /* go thru NPR nodes and issue any remaining ELS PLOGIs */
                lpfc_els_disc_plogi(vport);
 
@@ -1696,18 +1666,13 @@ lpfc_plogi_confirm_nport(struct lpfc_hba *phba, uint32_t *prsp,
        struct serv_parm *sp;
        uint8_t  name[sizeof(struct lpfc_name)];
        uint32_t keepDID = 0, keep_nlp_flag = 0;
+       int rc;
        uint32_t keep_new_nlp_flag = 0;
        uint16_t keep_nlp_state;
        u32 keep_nlp_fc4_type = 0;
        struct lpfc_nvme_rport *keep_nrport = NULL;
        unsigned long *active_rrqs_xri_bitmap = NULL;
 
-       /* Fabric nodes can have the same WWPN so we don't bother searching
-        * by WWPN.  Just return the ndlp that was given to us.
-        */
-       if (ndlp->nlp_type & NLP_FABRIC)
-               return ndlp;
-
        sp = (struct serv_parm *) ((uint8_t *) prsp + sizeof(uint32_t));
        memset(name, 0, sizeof(struct lpfc_name));
 
@@ -1717,15 +1682,9 @@ lpfc_plogi_confirm_nport(struct lpfc_hba *phba, uint32_t *prsp,
        new_ndlp = lpfc_findnode_wwpn(vport, &sp->portName);
 
        /* return immediately if the WWPN matches ndlp */
-       if (!new_ndlp || (new_ndlp == ndlp))
+       if (new_ndlp == ndlp)
                return ndlp;
 
-       /*
-        * Unregister from backend if not done yet. Could have been skipped
-        * due to ADISC
-        */
-       lpfc_nlp_unreg_node(vport, new_ndlp);
-
        if (phba->sli_rev == LPFC_SLI_REV4) {
                active_rrqs_xri_bitmap = mempool_alloc(phba->active_rrq_pool,
                                                       GFP_KERNEL);
@@ -1742,18 +1701,44 @@ lpfc_plogi_confirm_nport(struct lpfc_hba *phba, uint32_t *prsp,
                         (new_ndlp ? new_ndlp->nlp_flag : 0),
                         (new_ndlp ? new_ndlp->nlp_fc4_type : 0));
 
-       keepDID = new_ndlp->nlp_DID;
+       if (!new_ndlp) {
+               rc = memcmp(&ndlp->nlp_portname, name,
+                           sizeof(struct lpfc_name));
+               if (!rc) {
+                       if (active_rrqs_xri_bitmap)
+                               mempool_free(active_rrqs_xri_bitmap,
+                                            phba->active_rrq_pool);
+                       return ndlp;
+               }
+               new_ndlp = lpfc_nlp_init(vport, ndlp->nlp_DID);
+               if (!new_ndlp) {
+                       if (active_rrqs_xri_bitmap)
+                               mempool_free(active_rrqs_xri_bitmap,
+                                            phba->active_rrq_pool);
+                       return ndlp;
+               }
+       } else {
+               if (phba->sli_rev == LPFC_SLI_REV4 &&
+                   active_rrqs_xri_bitmap)
+                       memcpy(active_rrqs_xri_bitmap,
+                              new_ndlp->active_rrqs_xri_bitmap,
+                              phba->cfg_rrq_xri_bitmap_sz);
 
-       if (phba->sli_rev == LPFC_SLI_REV4 && active_rrqs_xri_bitmap)
-               memcpy(active_rrqs_xri_bitmap, new_ndlp->active_rrqs_xri_bitmap,
-                      phba->cfg_rrq_xri_bitmap_sz);
+               /*
+                * Unregister from backend if not done yet. Could have been
+                * skipped due to ADISC
+                */
+               lpfc_nlp_unreg_node(vport, new_ndlp);
+       }
+
+       keepDID = new_ndlp->nlp_DID;
 
        /* At this point in this routine, we know new_ndlp will be
         * returned. however, any previous GID_FTs that were done
         * would have updated nlp_fc4_type in ndlp, so we must ensure
         * new_ndlp has the right value.
         */
-       if (vport->fc_flag & FC_FABRIC) {
+       if (test_bit(FC_FABRIC, &vport->fc_flag)) {
                keep_nlp_fc4_type = new_ndlp->nlp_fc4_type;
                new_ndlp->nlp_fc4_type = ndlp->nlp_fc4_type;
        }
@@ -1914,21 +1899,17 @@ lpfc_plogi_confirm_nport(struct lpfc_hba *phba, uint32_t *prsp,
 void
 lpfc_end_rscn(struct lpfc_vport *vport)
 {
-       struct Scsi_Host *shost = lpfc_shost_from_vport(vport);
 
-       if (vport->fc_flag & FC_RSCN_MODE) {
+       if (test_bit(FC_RSCN_MODE, &vport->fc_flag)) {
                /*
                 * Check to see if more RSCNs came in while we were
                 * processing this one.
                 */
                if (vport->fc_rscn_id_cnt ||
-                   (vport->fc_flag & FC_RSCN_DISCOVERY) != 0)
+                   test_bit(FC_RSCN_DISCOVERY, &vport->fc_flag))
                        lpfc_els_handle_rscn(vport);
-               else {
-                       spin_lock_irq(shost->host_lock);
-                       vport->fc_flag &= ~FC_RSCN_MODE;
-                       spin_unlock_irq(shost->host_lock);
-               }
+               else
+                       clear_bit(FC_RSCN_MODE, &vport->fc_flag);
        }
 }
 
@@ -2015,7 +1996,6 @@ lpfc_cmpl_els_plogi(struct lpfc_hba *phba, struct lpfc_iocbq *cmdiocb,
                    struct lpfc_iocbq *rspiocb)
 {
        struct lpfc_vport *vport = cmdiocb->vport;
-       struct Scsi_Host  *shost = lpfc_shost_from_vport(vport);
        IOCB_t *irsp;
        struct lpfc_nodelist *ndlp, *free_ndlp;
        struct lpfc_dmabuf *prsp;
@@ -2162,9 +2142,7 @@ lpfc_cmpl_els_plogi(struct lpfc_hba *phba, struct lpfc_iocbq *cmdiocb,
                lpfc_more_plogi(vport);
 
                if (vport->num_disc_nodes == 0) {
-                       spin_lock_irq(shost->host_lock);
-                       vport->fc_flag &= ~FC_NDISC_ACTIVE;
-                       spin_unlock_irq(shost->host_lock);
+                       clear_bit(FC_NDISC_ACTIVE, &vport->fc_flag);
 
                        lpfc_can_disctmo(vport);
                        lpfc_end_rscn(vport);
@@ -2226,7 +2204,7 @@ lpfc_issue_els_plogi(struct lpfc_vport *vport, uint32_t did, uint8_t retry)
         */
        if ((ndlp->nlp_flag & (NLP_IGNR_REG_CMPL | NLP_UNREG_INP)) &&
            ((ndlp->nlp_DID & Fabric_DID_MASK) != Fabric_DID_MASK) &&
-           !(vport->fc_flag & FC_OFFLINE_MODE)) {
+           !test_bit(FC_OFFLINE_MODE, &vport->fc_flag)) {
                lpfc_printf_vlog(vport, KERN_INFO, LOG_DISCOVERY,
                                 "4110 Issue PLOGI x%x deferred "
                                 "on NPort x%x rpi x%x flg x%x Data:"
@@ -2258,7 +2236,8 @@ lpfc_issue_els_plogi(struct lpfc_vport *vport, uint32_t did, uint8_t retry)
         * If we are a N-port connected to a Fabric, fix-up paramm's so logins
         * to device on remote loops work.
         */
-       if ((vport->fc_flag & FC_FABRIC) && !(vport->fc_flag & FC_PUBLIC_LOOP))
+       if (test_bit(FC_FABRIC, &vport->fc_flag) &&
+           !test_bit(FC_PUBLIC_LOOP, &vport->fc_flag))
                sp->cmn.altBbCredit = 1;
 
        if (sp->cmn.fcphLow < FC_PH_4_3)
@@ -2382,8 +2361,8 @@ lpfc_cmpl_els_prli(struct lpfc_hba *phba, struct lpfc_iocbq *cmdiocb,
                /* If we don't send GFT_ID to Fabric, a PRLI error
                 * could be expected.
                 */
-               if ((vport->fc_flag & FC_FABRIC) ||
-                   (vport->cfg_enable_fc4_type != LPFC_ENABLE_BOTH)) {
+               if (test_bit(FC_FABRIC, &vport->fc_flag) ||
+                   vport->cfg_enable_fc4_type != LPFC_ENABLE_BOTH) {
                        mode = KERN_ERR;
                        loglevel =  LOG_TRACE_EVENT;
                } else {
@@ -2424,7 +2403,7 @@ lpfc_cmpl_els_prli(struct lpfc_hba *phba, struct lpfc_iocbq *cmdiocb,
                 * For P2P topology, retain the node so that PLOGI can be
                 * attempted on it again.
                 */
-               if (vport->fc_flag & FC_PT2PT)
+               if (test_bit(FC_PT2PT, &vport->fc_flag))
                        goto out;
 
                /* As long as this node is not registered with the SCSI
@@ -2500,7 +2479,7 @@ lpfc_issue_els_prli(struct lpfc_vport *vport, struct lpfc_nodelist *ndlp,
         * the remote NPort beng a NVME Target.
         */
        if (phba->sli_rev == LPFC_SLI_REV4 &&
-           vport->fc_flag & FC_RSCN_MODE &&
+           test_bit(FC_RSCN_MODE, &vport->fc_flag) &&
            vport->nvmei_support)
                ndlp->nlp_fc4_type |= NLP_FC4_NVME;
        local_nlp_type = ndlp->nlp_fc4_type;
@@ -2677,7 +2656,7 @@ lpfc_rscn_disc(struct lpfc_vport *vport)
 
        /* RSCN discovery */
        /* go thru NPR nodes and issue ELS PLOGIs */
-       if (vport->fc_npr_cnt)
+       if (atomic_read(&vport->fc_npr_cnt))
                if (lpfc_els_disc_plogi(vport))
                        return;
 
@@ -2697,7 +2676,6 @@ lpfc_rscn_disc(struct lpfc_vport *vport)
 static void
 lpfc_adisc_done(struct lpfc_vport *vport)
 {
-       struct Scsi_Host   *shost = lpfc_shost_from_vport(vport);
        struct lpfc_hba   *phba = vport->phba;
 
        /*
@@ -2705,7 +2683,7 @@ lpfc_adisc_done(struct lpfc_vport *vport)
         * and continue discovery.
         */
        if ((phba->sli3_options & LPFC_SLI3_NPIV_ENABLED) &&
-           !(vport->fc_flag & FC_RSCN_MODE) &&
+           !test_bit(FC_RSCN_MODE, &vport->fc_flag) &&
            (phba->sli_rev < LPFC_SLI_REV4)) {
 
                /*
@@ -2734,15 +2712,13 @@ lpfc_adisc_done(struct lpfc_vport *vport)
        if (vport->port_state < LPFC_VPORT_READY) {
                /* If we get here, there is nothing to ADISC */
                lpfc_issue_clear_la(phba, vport);
-               if (!(vport->fc_flag & FC_ABORT_DISCOVERY)) {
+               if (!test_bit(FC_ABORT_DISCOVERY, &vport->fc_flag)) {
                        vport->num_disc_nodes = 0;
                        /* go thru NPR list, issue ELS PLOGIs */
-                       if (vport->fc_npr_cnt)
+                       if (atomic_read(&vport->fc_npr_cnt))
                                lpfc_els_disc_plogi(vport);
                        if (!vport->num_disc_nodes) {
-                               spin_lock_irq(shost->host_lock);
-                               vport->fc_flag &= ~FC_NDISC_ACTIVE;
-                               spin_unlock_irq(shost->host_lock);
+                               clear_bit(FC_NDISC_ACTIVE, &vport->fc_flag);
                                lpfc_can_disctmo(vport);
                                lpfc_end_rscn(vport);
                        }
@@ -2769,11 +2745,12 @@ lpfc_more_adisc(struct lpfc_vport *vport)
        /* Continue discovery with <num_disc_nodes> ADISCs to go */
        lpfc_printf_vlog(vport, KERN_INFO, LOG_DISCOVERY,
                         "0210 Continue discovery with %d ADISCs to go "
-                        "Data: x%x x%x x%x\n",
-                        vport->num_disc_nodes, vport->fc_adisc_cnt,
+                        "Data: x%x x%lx x%x\n",
+                        vport->num_disc_nodes,
+                        atomic_read(&vport->fc_adisc_cnt),
                         vport->fc_flag, vport->port_state);
        /* Check to see if there are more ADISCs to be sent */
-       if (vport->fc_flag & FC_NLP_MORE) {
+       if (test_bit(FC_NLP_MORE, &vport->fc_flag)) {
                lpfc_set_disctmo(vport);
                /* go thru NPR nodes and issue any remaining ELS ADISCs */
                lpfc_els_disc_adisc(vport);
@@ -3618,10 +3595,10 @@ lpfc_issue_els_rscn(struct lpfc_vport *vport, uint8_t retry)
 
        /* Not supported for private loop */
        if (phba->fc_topology == LPFC_TOPOLOGY_LOOP &&
-           !(vport->fc_flag & FC_PUBLIC_LOOP))
+           !test_bit(FC_PUBLIC_LOOP, &vport->fc_flag))
                return 1;
 
-       if (vport->fc_flag & FC_PT2PT) {
+       if (test_bit(FC_PT2PT, &vport->fc_flag)) {
                /* find any mapped nport - that would be the other nport */
                ndlp = lpfc_findnode_mapped(vport);
                if (!ndlp)
@@ -4399,7 +4376,6 @@ try_rdf:
 void
 lpfc_cancel_retry_delay_tmo(struct lpfc_vport *vport, struct lpfc_nodelist *nlp)
 {
-       struct Scsi_Host *shost = lpfc_shost_from_vport(vport);
        struct lpfc_work_evt *evtp;
 
        if (!(nlp->nlp_flag & NLP_DELAY_TMO))
@@ -4427,9 +4403,8 @@ lpfc_cancel_retry_delay_tmo(struct lpfc_vport *vport, struct lpfc_nodelist *nlp)
                                /* Check if there are more PLOGIs to be sent */
                                lpfc_more_plogi(vport);
                                if (vport->num_disc_nodes == 0) {
-                                       spin_lock_irq(shost->host_lock);
-                                       vport->fc_flag &= ~FC_NDISC_ACTIVE;
-                                       spin_unlock_irq(shost->host_lock);
+                                       clear_bit(FC_NDISC_ACTIVE,
+                                                 &vport->fc_flag);
                                        lpfc_can_disctmo(vport);
                                        lpfc_end_rscn(vport);
                                }
@@ -4546,7 +4521,7 @@ lpfc_els_retry_delay_handler(struct lpfc_nodelist *ndlp)
                }
                break;
        case ELS_CMD_FDISC:
-               if (!(vport->fc_flag & FC_VPORT_NEEDS_INIT_VPI))
+               if (!test_bit(FC_VPORT_NEEDS_INIT_VPI, &vport->fc_flag))
                        lpfc_issue_els_fdisc(vport, ndlp, retry);
                break;
        }
@@ -4784,7 +4759,7 @@ lpfc_els_retry(struct lpfc_hba *phba, struct lpfc_iocbq *cmdiocb,
                /* Added for Vendor specifc support
                 * Just keep retrying for these Rsn / Exp codes
                 */
-               if ((vport->fc_flag & FC_PT2PT) &&
+               if (test_bit(FC_PT2PT, &vport->fc_flag) &&
                    cmd == ELS_CMD_NVMEPRLI) {
                        switch (stat.un.b.lsRjtRsnCode) {
                        case LSRJT_UNABLE_TPC:
@@ -4797,7 +4772,7 @@ lpfc_els_retry(struct lpfc_hba *phba, struct lpfc_iocbq *cmdiocb,
                                                 "support NVME, disabling NVME\n",
                                                 stat.un.b.lsRjtRsnCode);
                                retry = 0;
-                               vport->fc_flag |= FC_PT2PT_NO_NVME;
+                               set_bit(FC_PT2PT_NO_NVME, &vport->fc_flag);
                                goto out_retry;
                        }
                }
@@ -4989,7 +4964,7 @@ lpfc_els_retry(struct lpfc_hba *phba, struct lpfc_iocbq *cmdiocb,
                retry = 0;
        }
 
-       if ((vport->load_flag & FC_UNLOADING) != 0)
+       if (test_bit(FC_UNLOADING, &vport->load_flag))
                retry = 0;
 
 out_retry:
@@ -5020,7 +4995,7 @@ out_retry:
 
                        /* If discovery / RSCN timer is running, reset it */
                        if (timer_pending(&vport->fc_disctmo) ||
-                           (vport->fc_flag & FC_RSCN_MODE))
+                           test_bit(FC_RSCN_MODE, &vport->fc_flag))
                                lpfc_set_disctmo(vport);
                }
 
@@ -5406,7 +5381,7 @@ lpfc_cmpl_els_rsp(struct lpfc_hba *phba, struct lpfc_iocbq *cmdiocb,
                if (ulp_status == 0
                    && (ndlp->nlp_flag & NLP_ACC_REGLOGIN)) {
                        if (!lpfc_unreg_rpi(vport, ndlp) &&
-                           (!(vport->fc_flag & FC_PT2PT))) {
+                           !test_bit(FC_PT2PT, &vport->fc_flag)) {
                                if (ndlp->nlp_state ==  NLP_STE_PLOGI_ISSUE ||
                                    ndlp->nlp_state ==
                                     NLP_STE_REG_LOGIN_ISSUE) {
@@ -5778,7 +5753,7 @@ lpfc_els_rsp_acc(struct lpfc_vport *vport, uint32_t flag,
        lpfc_printf_vlog(vport, KERN_INFO, LOG_ELS,
                         "0128 Xmit ELS ACC response Status: x%x, IoTag: x%x, "
                         "XRI: x%x, DID: x%x, nlp_flag: x%x nlp_state: x%x "
-                        "RPI: x%x, fc_flag x%x refcnt %d\n",
+                        "RPI: x%x, fc_flag x%lx refcnt %d\n",
                         rc, elsiocb->iotag, elsiocb->sli4_xritag,
                         ndlp->nlp_DID, ndlp->nlp_flag, ndlp->nlp_state,
                         ndlp->nlp_rpi, vport->fc_flag, kref_read(&ndlp->kref));
@@ -5984,7 +5959,7 @@ lpfc_issue_els_edc_rsp(struct lpfc_vport *vport, struct lpfc_iocbq *cmdiocb,
        lpfc_printf_vlog(vport, KERN_INFO, LOG_ELS,
                         "0152 Xmit EDC ACC response Status: x%x, IoTag: x%x, "
                         "XRI: x%x, DID: x%x, nlp_flag: x%x nlp_state: x%x "
-                        "RPI: x%x, fc_flag x%x\n",
+                        "RPI: x%x, fc_flag x%lx\n",
                         rc, elsiocb->iotag, elsiocb->sli4_xritag,
                         ndlp->nlp_DID, ndlp->nlp_flag, ndlp->nlp_state,
                         ndlp->nlp_rpi, vport->fc_flag);
@@ -6551,7 +6526,6 @@ lpfc_els_rsp_echo_acc(struct lpfc_vport *vport, uint8_t *data,
 int
 lpfc_els_disc_adisc(struct lpfc_vport *vport)
 {
-       struct Scsi_Host *shost = lpfc_shost_from_vport(vport);
        struct lpfc_nodelist *ndlp, *next_ndlp;
        int sentadisc = 0;
 
@@ -6586,18 +6560,13 @@ lpfc_els_disc_adisc(struct lpfc_vport *vport)
                vport->num_disc_nodes++;
                if (vport->num_disc_nodes >=
                                vport->cfg_discovery_threads) {
-                       spin_lock_irq(shost->host_lock);
-                       vport->fc_flag |= FC_NLP_MORE;
-                       spin_unlock_irq(shost->host_lock);
+                       set_bit(FC_NLP_MORE, &vport->fc_flag);
                        break;
                }
 
        }
-       if (sentadisc == 0) {
-               spin_lock_irq(shost->host_lock);
-               vport->fc_flag &= ~FC_NLP_MORE;
-               spin_unlock_irq(shost->host_lock);
-       }
+       if (sentadisc == 0)
+               clear_bit(FC_NLP_MORE, &vport->fc_flag);
        return sentadisc;
 }
 
@@ -6623,7 +6592,6 @@ lpfc_els_disc_adisc(struct lpfc_vport *vport)
 int
 lpfc_els_disc_plogi(struct lpfc_vport *vport)
 {
-       struct Scsi_Host *shost = lpfc_shost_from_vport(vport);
        struct lpfc_nodelist *ndlp, *next_ndlp;
        int sentplogi = 0;
 
@@ -6640,26 +6608,20 @@ lpfc_els_disc_plogi(struct lpfc_vport *vport)
                        vport->num_disc_nodes++;
                        if (vport->num_disc_nodes >=
                                        vport->cfg_discovery_threads) {
-                               spin_lock_irq(shost->host_lock);
-                               vport->fc_flag |= FC_NLP_MORE;
-                               spin_unlock_irq(shost->host_lock);
+                               set_bit(FC_NLP_MORE, &vport->fc_flag);
                                break;
                        }
                }
        }
 
        lpfc_printf_vlog(vport, KERN_INFO, LOG_DISCOVERY,
-                        "6452 Discover PLOGI %d flag x%x\n",
+                        "6452 Discover PLOGI %d flag x%lx\n",
                         sentplogi, vport->fc_flag);
 
-       if (sentplogi) {
+       if (sentplogi)
                lpfc_set_disctmo(vport);
-       }
-       else {
-               spin_lock_irq(shost->host_lock);
-               vport->fc_flag &= ~FC_NLP_MORE;
-               spin_unlock_irq(shost->host_lock);
-       }
+       else
+               clear_bit(FC_NLP_MORE, &vport->fc_flag);
        return sentplogi;
 }
 
@@ -7070,7 +7032,7 @@ lpfc_rdp_res_attach_port_names(struct fc_rdp_port_name_desc *desc,
 {
 
        desc->tag = cpu_to_be32(RDP_PORT_NAMES_DESC_TAG);
-       if (vport->fc_flag & FC_FABRIC) {
+       if (test_bit(FC_FABRIC, &vport->fc_flag)) {
                memcpy(desc->port_names.wwnn, &vport->fabric_nodename,
                       sizeof(desc->port_names.wwnn));
 
@@ -7854,9 +7816,10 @@ lpfc_els_flush_rscn(struct lpfc_vport *vport)
                lpfc_in_buf_free(phba, vport->fc_rscn_id_list[i]);
                vport->fc_rscn_id_list[i] = NULL;
        }
+       clear_bit(FC_RSCN_MODE, &vport->fc_flag);
+       clear_bit(FC_RSCN_DISCOVERY, &vport->fc_flag);
        spin_lock_irq(shost->host_lock);
        vport->fc_rscn_id_cnt = 0;
-       vport->fc_flag &= ~(FC_RSCN_MODE | FC_RSCN_DISCOVERY);
        spin_unlock_irq(shost->host_lock);
        lpfc_can_disctmo(vport);
        /* Indicate we are done walking this fc_rscn_id_list */
@@ -7891,7 +7854,7 @@ lpfc_rscn_payload_check(struct lpfc_vport *vport, uint32_t did)
                return 0;
 
        /* If we are doing a FULL RSCN rediscovery, match everything */
-       if (vport->fc_flag & FC_RSCN_DISCOVERY)
+       if (test_bit(FC_RSCN_DISCOVERY, &vport->fc_flag))
                return did;
 
        spin_lock_irq(shost->host_lock);
@@ -8070,7 +8033,7 @@ lpfc_els_rcv_rscn(struct lpfc_vport *vport, struct lpfc_iocbq *cmdiocb,
        payload_len -= sizeof(uint32_t);        /* take off word 0 */
        /* RSCN received */
        lpfc_printf_vlog(vport, KERN_INFO, LOG_DISCOVERY,
-                        "0214 RSCN received Data: x%x x%x x%x x%x\n",
+                        "0214 RSCN received Data: x%lx x%x x%x x%x\n",
                         vport->fc_flag, payload_len, *lp,
                         vport->fc_rscn_id_cnt);
 
@@ -8082,10 +8045,10 @@ lpfc_els_rcv_rscn(struct lpfc_vport *vport, struct lpfc_iocbq *cmdiocb,
                        FCH_EVT_RSCN, lp[i]);
 
        /* Check if RSCN is coming from a direct-connected remote NPort */
-       if (vport->fc_flag & FC_PT2PT) {
+       if (test_bit(FC_PT2PT, &vport->fc_flag)) {
                /* If so, just ACC it, no other action needed for now */
                lpfc_printf_vlog(vport, KERN_INFO, LOG_ELS,
-                                "2024 pt2pt RSCN %08x Data: x%x x%x\n",
+                                "2024 pt2pt RSCN %08x Data: x%lx x%x\n",
                                 *lp, vport->fc_flag, payload_len);
                lpfc_els_rsp_acc(vport, ELS_CMD_ACC, cmdiocb, ndlp, NULL);
 
@@ -8129,7 +8092,7 @@ lpfc_els_rcv_rscn(struct lpfc_vport *vport, struct lpfc_iocbq *cmdiocb,
                        /* ALL NPortIDs in RSCN are on HBA */
                        lpfc_printf_vlog(vport, KERN_INFO, LOG_DISCOVERY,
                                         "0219 Ignore RSCN "
-                                        "Data: x%x x%x x%x x%x\n",
+                                        "Data: x%lx x%x x%x x%x\n",
                                         vport->fc_flag, payload_len,
                                         *lp, vport->fc_rscn_id_cnt);
                        lpfc_debugfs_disc_trc(vport, LPFC_DISC_TRC_ELS_UNSOL,
@@ -8140,7 +8103,7 @@ lpfc_els_rcv_rscn(struct lpfc_vport *vport, struct lpfc_iocbq *cmdiocb,
                        lpfc_els_rsp_acc(vport, ELS_CMD_ACC, cmdiocb,
                                ndlp, NULL);
                        /* Restart disctmo if its already running */
-                       if (vport->fc_flag & FC_DISC_TMO) {
+                       if (test_bit(FC_DISC_TMO, &vport->fc_flag)) {
                                tmo = ((phba->fc_ratov * 3) + 3);
                                mod_timer(&vport->fc_disctmo,
                                          jiffies +
@@ -8153,8 +8116,8 @@ lpfc_els_rcv_rscn(struct lpfc_vport *vport, struct lpfc_iocbq *cmdiocb,
        spin_lock_irq(shost->host_lock);
        if (vport->fc_rscn_flush) {
                /* Another thread is walking fc_rscn_id_list on this vport */
-               vport->fc_flag |= FC_RSCN_DISCOVERY;
                spin_unlock_irq(shost->host_lock);
+               set_bit(FC_RSCN_DISCOVERY, &vport->fc_flag);
                /* Send back ACC */
                lpfc_els_rsp_acc(vport, ELS_CMD_ACC, cmdiocb, ndlp, NULL);
                return 0;
@@ -8167,24 +8130,23 @@ lpfc_els_rcv_rscn(struct lpfc_vport *vport, struct lpfc_iocbq *cmdiocb,
        /* If we are already processing an RSCN, save the received
         * RSCN payload buffer, cmdiocb->cmd_dmabuf to process later.
         */
-       if (vport->fc_flag & (FC_RSCN_MODE | FC_NDISC_ACTIVE)) {
+       if (test_bit(FC_RSCN_MODE, &vport->fc_flag) ||
+           test_bit(FC_NDISC_ACTIVE, &vport->fc_flag)) {
                lpfc_debugfs_disc_trc(vport, LPFC_DISC_TRC_ELS_UNSOL,
                        "RCV RSCN defer:  did:x%x/ste:x%x flg:x%x",
                        ndlp->nlp_DID, vport->port_state, ndlp->nlp_flag);
 
-               spin_lock_irq(shost->host_lock);
-               vport->fc_flag |= FC_RSCN_DEFERRED;
+               set_bit(FC_RSCN_DEFERRED, &vport->fc_flag);
 
                /* Restart disctmo if its already running */
-               if (vport->fc_flag & FC_DISC_TMO) {
+               if (test_bit(FC_DISC_TMO, &vport->fc_flag)) {
                        tmo = ((phba->fc_ratov * 3) + 3);
                        mod_timer(&vport->fc_disctmo,
                                  jiffies + msecs_to_jiffies(1000 * tmo));
                }
                if ((rscn_cnt < FC_MAX_HOLD_RSCN) &&
-                   !(vport->fc_flag & FC_RSCN_DISCOVERY)) {
-                       vport->fc_flag |= FC_RSCN_MODE;
-                       spin_unlock_irq(shost->host_lock);
+                   !test_bit(FC_RSCN_DISCOVERY, &vport->fc_flag)) {
+                       set_bit(FC_RSCN_MODE, &vport->fc_flag);
                        if (rscn_cnt) {
                                cmd = vport->fc_rscn_id_list[rscn_cnt-1]->virt;
                                length = be32_to_cpu(*cmd & ~ELS_CMD_MASK);
@@ -8206,16 +8168,15 @@ lpfc_els_rcv_rscn(struct lpfc_vport *vport, struct lpfc_iocbq *cmdiocb,
                        /* Deferred RSCN */
                        lpfc_printf_vlog(vport, KERN_INFO, LOG_DISCOVERY,
                                         "0235 Deferred RSCN "
-                                        "Data: x%x x%x x%x\n",
+                                        "Data: x%x x%lx x%x\n",
                                         vport->fc_rscn_id_cnt, vport->fc_flag,
                                         vport->port_state);
                } else {
-                       vport->fc_flag |= FC_RSCN_DISCOVERY;
-                       spin_unlock_irq(shost->host_lock);
+                       set_bit(FC_RSCN_DISCOVERY, &vport->fc_flag);
                        /* ReDiscovery RSCN */
                        lpfc_printf_vlog(vport, KERN_INFO, LOG_DISCOVERY,
                                         "0234 ReDiscovery RSCN "
-                                        "Data: x%x x%x x%x\n",
+                                        "Data: x%x x%lx x%x\n",
                                         vport->fc_rscn_id_cnt, vport->fc_flag,
                                         vport->port_state);
                }
@@ -8231,9 +8192,7 @@ lpfc_els_rcv_rscn(struct lpfc_vport *vport, struct lpfc_iocbq *cmdiocb,
                "RCV RSCN:        did:x%x/ste:x%x flg:x%x",
                ndlp->nlp_DID, vport->port_state, ndlp->nlp_flag);
 
-       spin_lock_irq(shost->host_lock);
-       vport->fc_flag |= FC_RSCN_MODE;
-       spin_unlock_irq(shost->host_lock);
+       set_bit(FC_RSCN_MODE, &vport->fc_flag);
        vport->fc_rscn_id_list[vport->fc_rscn_id_cnt++] = pcmd;
        /* Indicate we are done walking fc_rscn_id_list on this vport */
        vport->fc_rscn_flush = 0;
@@ -8273,7 +8232,7 @@ lpfc_els_handle_rscn(struct lpfc_vport *vport)
        struct lpfc_hba  *phba = vport->phba;
 
        /* Ignore RSCN if the port is being torn down. */
-       if (vport->load_flag & FC_UNLOADING) {
+       if (test_bit(FC_UNLOADING, &vport->load_flag)) {
                lpfc_els_flush_rscn(vport);
                return 0;
        }
@@ -8283,7 +8242,7 @@ lpfc_els_handle_rscn(struct lpfc_vport *vport)
 
        /* RSCN processed */
        lpfc_printf_vlog(vport, KERN_INFO, LOG_DISCOVERY,
-                        "0215 RSCN processed Data: x%x x%x x%x x%x x%x x%x\n",
+                        "0215 RSCN processed Data: x%lx x%x x%x x%x x%x x%x\n",
                         vport->fc_flag, 0, vport->fc_rscn_id_cnt,
                         vport->port_state, vport->num_disc_nodes,
                         vport->gidft_inp);
@@ -8372,7 +8331,7 @@ lpfc_els_rcv_flogi(struct lpfc_vport *vport, struct lpfc_iocbq *cmdiocb,
        LPFC_MBOXQ_t *mbox;
        uint32_t cmd, did;
        int rc;
-       uint32_t fc_flag = 0;
+       unsigned long fc_flag = 0;
        uint32_t port_state = 0;
 
        /* Clear external loopback plug detected flag */
@@ -8442,9 +8401,7 @@ lpfc_els_rcv_flogi(struct lpfc_vport *vport, struct lpfc_iocbq *cmdiocb,
                return 0;
 
        } else if (rc > 0) {    /* greater than */
-               spin_lock_irq(shost->host_lock);
-               vport->fc_flag |= FC_PT2PT_PLOGI;
-               spin_unlock_irq(shost->host_lock);
+               set_bit(FC_PT2PT_PLOGI, &vport->fc_flag);
 
                /* If we have the high WWPN we can assign our own
                 * myDID; otherwise, we have to WAIT for a PLOGI
@@ -8463,17 +8420,17 @@ lpfc_els_rcv_flogi(struct lpfc_vport *vport, struct lpfc_iocbq *cmdiocb,
        spin_lock_irq(shost->host_lock);
        fc_flag = vport->fc_flag;
        port_state = vport->port_state;
-       vport->fc_flag |= FC_PT2PT;
-       vport->fc_flag &= ~(FC_FABRIC | FC_PUBLIC_LOOP);
-
        /* Acking an unsol FLOGI.  Count 1 for link bounce
         * work-around.
         */
        vport->rcv_flogi_cnt++;
        spin_unlock_irq(shost->host_lock);
+       set_bit(FC_PT2PT, &vport->fc_flag);
+       clear_bit(FC_FABRIC, &vport->fc_flag);
+       clear_bit(FC_PUBLIC_LOOP, &vport->fc_flag);
        lpfc_printf_vlog(vport, KERN_INFO, LOG_ELS,
                         "3311 Rcv Flogi PS x%x new PS x%x "
-                        "fc_flag x%x new fc_flag x%x\n",
+                        "fc_flag x%lx new fc_flag x%lx\n",
                         port_state, vport->port_state,
                         fc_flag, vport->fc_flag);
 
@@ -9492,11 +9449,11 @@ lpfc_els_timeout(struct timer_list *t)
 
        spin_lock_irqsave(&vport->work_port_lock, iflag);
        tmo_posted = vport->work_port_events & WORKER_ELS_TMO;
-       if ((!tmo_posted) && (!(vport->load_flag & FC_UNLOADING)))
+       if (!tmo_posted && !test_bit(FC_UNLOADING, &vport->load_flag))
                vport->work_port_events |= WORKER_ELS_TMO;
        spin_unlock_irqrestore(&vport->work_port_lock, iflag);
 
-       if ((!tmo_posted) && (!(vport->load_flag & FC_UNLOADING)))
+       if (!tmo_posted && !test_bit(FC_UNLOADING, &vport->load_flag))
                lpfc_worker_wake_up(phba);
        return;
 }
@@ -9532,7 +9489,7 @@ lpfc_els_timeout_handler(struct lpfc_vport *vport)
        if (unlikely(!pring))
                return;
 
-       if (phba->pport->load_flag & FC_UNLOADING)
+       if (test_bit(FC_UNLOADING, &phba->pport->load_flag))
                return;
 
        spin_lock_irq(&phba->hbalock);
@@ -9608,7 +9565,7 @@ lpfc_els_timeout_handler(struct lpfc_vport *vport)
        lpfc_issue_hb_tmo(phba);
 
        if (!list_empty(&pring->txcmplq))
-               if (!(phba->pport->load_flag & FC_UNLOADING))
+               if (!test_bit(FC_UNLOADING, &phba->pport->load_flag))
                        mod_timer(&vport->els_tmofunc,
                                  jiffies + msecs_to_jiffies(1000 * timeout));
 }
@@ -10116,6 +10073,9 @@ lpfc_els_rcv_fpin_peer_cgn(struct lpfc_hba *phba, struct fc_tlv_desc *tlv)
        pc_evt_str = lpfc_get_fpin_congn_event_nm(pc_evt);
        cnt = be32_to_cpu(pc->pname_count);
 
+       /* Capture FPIN frequency */
+       phba->cgn_fpin_frequency = be32_to_cpu(pc->event_period);
+
        lpfc_printf_log(phba, KERN_INFO, LOG_CGN_MGMT | LOG_ELS,
                        "4684 FPIN Peer Congestion %s (x%x) "
                        "Duration %d mSecs "
@@ -10404,12 +10364,12 @@ lpfc_els_unsol_buffer(struct lpfc_hba *phba, struct lpfc_sli_ring *pring,
                goto dropit;
 
        /* Ignore traffic received during vport shutdown. */
-       if (vport->load_flag & FC_UNLOADING)
+       if (test_bit(FC_UNLOADING, &vport->load_flag))
                goto dropit;
 
        /* If NPort discovery is delayed drop incoming ELS */
-       if ((vport->fc_flag & FC_DISC_DELAYED) &&
-                       (cmd != ELS_CMD_PLOGI))
+       if (test_bit(FC_DISC_DELAYED, &vport->fc_flag) &&
+           cmd != ELS_CMD_PLOGI)
                goto dropit;
 
        ndlp = lpfc_findnode_did(vport, did);
@@ -10453,14 +10413,14 @@ lpfc_els_unsol_buffer(struct lpfc_hba *phba, struct lpfc_sli_ring *pring,
        /* ELS command <elsCmd> received from NPORT <did> */
        lpfc_printf_vlog(vport, KERN_INFO, LOG_ELS,
                         "0112 ELS command x%x received from NPORT x%x "
-                        "refcnt %d Data: x%x x%x x%x x%x\n",
+                        "refcnt %d Data: x%x x%lx x%x x%x\n",
                         cmd, did, kref_read(&ndlp->kref), vport->port_state,
                         vport->fc_flag, vport->fc_myDID, vport->fc_prevDID);
 
        /* reject till our FLOGI completes or PLOGI assigned DID via PT2PT */
        if ((vport->port_state < LPFC_FABRIC_CFG_LINK) &&
            (cmd != ELS_CMD_FLOGI) &&
-           !((cmd == ELS_CMD_PLOGI) && (vport->fc_flag & FC_PT2PT))) {
+           !((cmd == ELS_CMD_PLOGI) && test_bit(FC_PT2PT, &vport->fc_flag))) {
                rjt_err = LSRJT_LOGICAL_BSY;
                rjt_exp = LSEXP_NOTHING_MORE;
                goto lsrjt;
@@ -10475,7 +10435,7 @@ lpfc_els_unsol_buffer(struct lpfc_hba *phba, struct lpfc_sli_ring *pring,
                phba->fc_stat.elsRcvPLOGI++;
                ndlp = lpfc_plogi_confirm_nport(phba, payload, ndlp);
                if (phba->sli_rev == LPFC_SLI_REV4 &&
-                   (phba->pport->fc_flag & FC_PT2PT)) {
+                   test_bit(FC_PT2PT, &phba->pport->fc_flag)) {
                        vport->fc_prevDID = vport->fc_myDID;
                        /* Our DID needs to be updated before registering
                         * the vfi. This is done in lpfc_rcv_plogi but
@@ -10493,15 +10453,15 @@ lpfc_els_unsol_buffer(struct lpfc_hba *phba, struct lpfc_sli_ring *pring,
                lpfc_send_els_event(vport, ndlp, payload);
 
                /* If Nport discovery is delayed, reject PLOGIs */
-               if (vport->fc_flag & FC_DISC_DELAYED) {
+               if (test_bit(FC_DISC_DELAYED, &vport->fc_flag)) {
                        rjt_err = LSRJT_UNABLE_TPC;
                        rjt_exp = LSEXP_NOTHING_MORE;
                        break;
                }
 
                if (vport->port_state < LPFC_DISC_AUTH) {
-                       if (!(phba->pport->fc_flag & FC_PT2PT) ||
-                               (phba->pport->fc_flag & FC_PT2PT_PLOGI)) {
+                       if (!test_bit(FC_PT2PT, &phba->pport->fc_flag) ||
+                           test_bit(FC_PT2PT_PLOGI, &phba->pport->fc_flag)) {
                                rjt_err = LSRJT_UNABLE_TPC;
                                rjt_exp = LSEXP_NOTHING_MORE;
                                break;
@@ -10527,7 +10487,7 @@ lpfc_els_unsol_buffer(struct lpfc_hba *phba, struct lpfc_sli_ring *pring,
                 * bounce the link.  There is some descrepancy.
                 */
                if (vport->port_state >= LPFC_LOCAL_CFG_LINK &&
-                   vport->fc_flag & FC_PT2PT &&
+                   test_bit(FC_PT2PT, &vport->fc_flag) &&
                    vport->rcv_flogi_cnt >= 1) {
                        rjt_err = LSRJT_LOGICAL_BSY;
                        rjt_exp = LSEXP_NOTHING_MORE;
@@ -10650,7 +10610,7 @@ lpfc_els_unsol_buffer(struct lpfc_hba *phba, struct lpfc_sli_ring *pring,
 
                phba->fc_stat.elsRcvPRLI++;
                if ((vport->port_state < LPFC_DISC_AUTH) &&
-                   (vport->fc_flag & FC_FABRIC)) {
+                   test_bit(FC_FABRIC, &vport->fc_flag)) {
                        rjt_err = LSRJT_UNABLE_TPC;
                        rjt_exp = LSEXP_NOTHING_MORE;
                        break;
@@ -10825,7 +10785,7 @@ lsrjt:
        return;
 
 dropit:
-       if (vport && !(vport->load_flag & FC_UNLOADING))
+       if (vport && !test_bit(FC_UNLOADING, &vport->load_flag))
                lpfc_printf_vlog(vport, KERN_ERR, LOG_TRACE_EVENT,
                        "0111 Dropping received ELS cmd "
                        "Data: x%x x%x x%x x%x\n",
@@ -10979,16 +10939,13 @@ void
 lpfc_do_scr_ns_plogi(struct lpfc_hba *phba, struct lpfc_vport *vport)
 {
        struct lpfc_nodelist *ndlp;
-       struct Scsi_Host *shost = lpfc_shost_from_vport(vport);
 
        /*
         * If lpfc_delay_discovery parameter is set and the clean address
         * bit is cleared and fc fabric parameters chenged, delay FC NPort
         * discovery.
         */
-       spin_lock_irq(shost->host_lock);
-       if (vport->fc_flag & FC_DISC_DELAYED) {
-               spin_unlock_irq(shost->host_lock);
+       if (test_bit(FC_DISC_DELAYED, &vport->fc_flag)) {
                lpfc_printf_vlog(vport, KERN_ERR, LOG_TRACE_EVENT,
                                 "3334 Delay fc port discovery for %d secs\n",
                                 phba->fc_ratov);
@@ -10996,7 +10953,6 @@ lpfc_do_scr_ns_plogi(struct lpfc_hba *phba, struct lpfc_vport *vport)
                        jiffies + msecs_to_jiffies(1000 * phba->fc_ratov));
                return;
        }
-       spin_unlock_irq(shost->host_lock);
 
        ndlp = lpfc_findnode_did(vport, NameServer_DID);
        if (!ndlp) {
@@ -11025,8 +10981,8 @@ lpfc_do_scr_ns_plogi(struct lpfc_hba *phba, struct lpfc_vport *vport)
        }
 
        if ((phba->cfg_enable_SmartSAN ||
-            (phba->cfg_fdmi_on == LPFC_FDMI_SUPPORT)) &&
-            (vport->load_flag & FC_ALLOW_FDMI))
+            phba->cfg_fdmi_on == LPFC_FDMI_SUPPORT) &&
+           test_bit(FC_ALLOW_FDMI, &vport->load_flag))
                lpfc_start_fdmi(vport);
 }
 
@@ -11046,14 +11002,12 @@ static void
 lpfc_cmpl_reg_new_vport(struct lpfc_hba *phba, LPFC_MBOXQ_t *pmb)
 {
        struct lpfc_vport *vport = pmb->vport;
-       struct Scsi_Host  *shost = lpfc_shost_from_vport(vport);
+       struct Scsi_Host *shost = lpfc_shost_from_vport(vport);
        struct lpfc_nodelist *ndlp = pmb->ctx_ndlp;
        MAILBOX_t *mb = &pmb->u.mb;
        int rc;
 
-       spin_lock_irq(shost->host_lock);
-       vport->fc_flag &= ~FC_VPORT_NEEDS_REG_VPI;
-       spin_unlock_irq(shost->host_lock);
+       clear_bit(FC_VPORT_NEEDS_REG_VPI, &vport->fc_flag);
 
        if (mb->mbxStatus) {
                lpfc_printf_vlog(vport, KERN_ERR, LOG_TRACE_EVENT,
@@ -11070,16 +11024,13 @@ lpfc_cmpl_reg_new_vport(struct lpfc_hba *phba, LPFC_MBOXQ_t *pmb)
                case 0x9602:    /* Link event since CLEAR_LA */
                        /* giving up on vport registration */
                        lpfc_vport_set_state(vport, FC_VPORT_FAILED);
-                       spin_lock_irq(shost->host_lock);
-                       vport->fc_flag &= ~(FC_FABRIC | FC_PUBLIC_LOOP);
-                       spin_unlock_irq(shost->host_lock);
+                       clear_bit(FC_FABRIC, &vport->fc_flag);
+                       clear_bit(FC_PUBLIC_LOOP, &vport->fc_flag);
                        lpfc_can_disctmo(vport);
                        break;
                /* If reg_vpi fail with invalid VPI status, re-init VPI */
                case 0x20:
-                       spin_lock_irq(shost->host_lock);
-                       vport->fc_flag |= FC_VPORT_NEEDS_REG_VPI;
-                       spin_unlock_irq(shost->host_lock);
+                       set_bit(FC_VPORT_NEEDS_REG_VPI, &vport->fc_flag);
                        lpfc_init_vpi(phba, pmb, vport->vpi);
                        pmb->vport = vport;
                        pmb->mbox_cmpl = lpfc_init_vpi_cmpl;
@@ -11100,13 +11051,11 @@ lpfc_cmpl_reg_new_vport(struct lpfc_hba *phba, LPFC_MBOXQ_t *pmb)
                        if (phba->sli_rev == LPFC_SLI_REV4)
                                lpfc_sli4_unreg_all_rpis(vport);
                        lpfc_mbx_unreg_vpi(vport);
-                       spin_lock_irq(shost->host_lock);
-                       vport->fc_flag |= FC_VPORT_NEEDS_REG_VPI;
-                       spin_unlock_irq(shost->host_lock);
+                       set_bit(FC_VPORT_NEEDS_REG_VPI, &vport->fc_flag);
                        if (mb->mbxStatus == MBX_NOT_FINISHED)
                                break;
                        if ((vport->port_type == LPFC_PHYSICAL_PORT) &&
-                           !(vport->fc_flag & FC_LOGO_RCVD_DID_CHNG)) {
+                           !test_bit(FC_LOGO_RCVD_DID_CHNG, &vport->fc_flag)) {
                                if (phba->sli_rev == LPFC_SLI_REV4)
                                        lpfc_issue_init_vfi(vport);
                                else
@@ -11167,7 +11116,6 @@ void
 lpfc_register_new_vport(struct lpfc_hba *phba, struct lpfc_vport *vport,
                        struct lpfc_nodelist *ndlp)
 {
-       struct Scsi_Host *shost = lpfc_shost_from_vport(vport);
        LPFC_MBOXQ_t *mbox;
 
        mbox = mempool_alloc(phba->mbox_mem_pool, GFP_KERNEL);
@@ -11202,9 +11150,7 @@ lpfc_register_new_vport(struct lpfc_hba *phba, struct lpfc_vport *vport,
 
 mbox_err_exit:
        lpfc_vport_set_state(vport, FC_VPORT_FAILED);
-       spin_lock_irq(shost->host_lock);
-       vport->fc_flag &= ~FC_VPORT_NEEDS_REG_VPI;
-       spin_unlock_irq(shost->host_lock);
+       clear_bit(FC_VPORT_NEEDS_REG_VPI, &vport->fc_flag);
        return;
 }
 
@@ -11319,7 +11265,6 @@ lpfc_cmpl_els_fdisc(struct lpfc_hba *phba, struct lpfc_iocbq *cmdiocb,
                    struct lpfc_iocbq *rspiocb)
 {
        struct lpfc_vport *vport = cmdiocb->vport;
-       struct Scsi_Host  *shost = lpfc_shost_from_vport(vport);
        struct lpfc_nodelist *ndlp = cmdiocb->ndlp;
        struct lpfc_nodelist *np;
        struct lpfc_nodelist *next_np;
@@ -11367,13 +11312,11 @@ lpfc_cmpl_els_fdisc(struct lpfc_hba *phba, struct lpfc_iocbq *cmdiocb,
 
        lpfc_check_nlp_post_devloss(vport, ndlp);
 
-       spin_lock_irq(shost->host_lock);
-       vport->fc_flag &= ~FC_VPORT_CVL_RCVD;
-       vport->fc_flag &= ~FC_VPORT_LOGO_RCVD;
-       vport->fc_flag |= FC_FABRIC;
+       clear_bit(FC_VPORT_CVL_RCVD, &vport->fc_flag);
+       clear_bit(FC_VPORT_LOGO_RCVD, &vport->fc_flag);
+       set_bit(FC_FABRIC, &vport->fc_flag);
        if (vport->phba->fc_topology == LPFC_TOPOLOGY_LOOP)
-               vport->fc_flag |=  FC_PUBLIC_LOOP;
-       spin_unlock_irq(shost->host_lock);
+               set_bit(FC_PUBLIC_LOOP, &vport->fc_flag);
 
        vport->fc_myDID = ulp_word4 & Mask_DID;
        lpfc_vport_set_state(vport, FC_VPORT_ACTIVE);
@@ -11390,7 +11333,7 @@ lpfc_cmpl_els_fdisc(struct lpfc_hba *phba, struct lpfc_iocbq *cmdiocb,
        memcpy(&vport->fabric_nodename, &sp->nodeName,
                sizeof(struct lpfc_name));
        if (fabric_param_changed &&
-               !(vport->fc_flag & FC_VPORT_NEEDS_REG_VPI)) {
+               !test_bit(FC_VPORT_NEEDS_REG_VPI, &vport->fc_flag)) {
                /* If our NportID changed, we need to ensure all
                 * remaining NPORTs get unreg_login'ed so we can
                 * issue unreg_vpi.
@@ -11411,15 +11354,13 @@ lpfc_cmpl_els_fdisc(struct lpfc_hba *phba, struct lpfc_iocbq *cmdiocb,
                        lpfc_sli4_unreg_all_rpis(vport);
 
                lpfc_mbx_unreg_vpi(vport);
-               spin_lock_irq(shost->host_lock);
-               vport->fc_flag |= FC_VPORT_NEEDS_REG_VPI;
+               set_bit(FC_VPORT_NEEDS_REG_VPI, &vport->fc_flag);
                if (phba->sli_rev == LPFC_SLI_REV4)
-                       vport->fc_flag |= FC_VPORT_NEEDS_INIT_VPI;
+                       set_bit(FC_VPORT_NEEDS_INIT_VPI, &vport->fc_flag);
                else
-                       vport->fc_flag |= FC_LOGO_RCVD_DID_CHNG;
-               spin_unlock_irq(shost->host_lock);
+                       set_bit(FC_LOGO_RCVD_DID_CHNG, &vport->fc_flag);
        } else if ((phba->sli_rev == LPFC_SLI_REV4) &&
-               !(vport->fc_flag & FC_VPORT_NEEDS_REG_VPI)) {
+                  !test_bit(FC_VPORT_NEEDS_REG_VPI, &vport->fc_flag)) {
                /*
                 * Driver needs to re-reg VPI in order for f/w
                 * to update the MAC address.
@@ -11429,9 +11370,9 @@ lpfc_cmpl_els_fdisc(struct lpfc_hba *phba, struct lpfc_iocbq *cmdiocb,
                goto out;
        }
 
-       if (vport->fc_flag & FC_VPORT_NEEDS_INIT_VPI)
+       if (test_bit(FC_VPORT_NEEDS_INIT_VPI, &vport->fc_flag))
                lpfc_issue_init_vpi(vport);
-       else if (vport->fc_flag & FC_VPORT_NEEDS_REG_VPI)
+       else if (test_bit(FC_VPORT_NEEDS_REG_VPI, &vport->fc_flag))
                lpfc_register_new_vport(phba, vport, ndlp);
        else
                lpfc_do_scr_ns_plogi(phba, vport);
@@ -11584,7 +11525,6 @@ lpfc_cmpl_els_npiv_logo(struct lpfc_hba *phba, struct lpfc_iocbq *cmdiocb,
        struct lpfc_vport *vport = cmdiocb->vport;
        IOCB_t *irsp;
        struct lpfc_nodelist *ndlp;
-       struct Scsi_Host *shost = lpfc_shost_from_vport(vport);
        u32 ulp_status, ulp_word4, did, tmo;
 
        ndlp = cmdiocb->ndlp;
@@ -11615,10 +11555,8 @@ lpfc_cmpl_els_npiv_logo(struct lpfc_hba *phba, struct lpfc_iocbq *cmdiocb,
                         ndlp->fc4_xpt_flags);
 
        if (ulp_status == IOSTAT_SUCCESS) {
-               spin_lock_irq(shost->host_lock);
-               vport->fc_flag &= ~FC_NDISC_ACTIVE;
-               vport->fc_flag &= ~FC_FABRIC;
-               spin_unlock_irq(shost->host_lock);
+               clear_bit(FC_NDISC_ACTIVE, &vport->fc_flag);
+               clear_bit(FC_FABRIC, &vport->fc_flag);
                lpfc_can_disctmo(vport);
        }
 
@@ -12076,7 +12014,7 @@ lpfc_sli4_vport_delete_els_xri_aborted(struct lpfc_vport *vport)
                         * node and the vport is unloading, the xri aborted wcqe
                         * likely isn't coming back.  Just release the sgl.
                         */
-                       if ((vport->load_flag & FC_UNLOADING) &&
+                       if (test_bit(FC_UNLOADING, &vport->load_flag) &&
                            ndlp->nlp_DID == Fabric_DID) {
                                list_del(&sglq_entry->list);
                                sglq_entry->state = SGL_FREED;
index f80bbc315f4caa57ea7f36ca3358c6cb68c01cda..a7a2309a629fafa16f20814f434e38e0a324b334 100644 (file)
@@ -1,7 +1,7 @@
 /*******************************************************************
  * This file is part of the Emulex Linux Device Driver for         *
  * Fibre Channel Host Bus Adapters.                                *
- * Copyright (C) 2017-2023 Broadcom. All Rights Reserved. The term *
+ * Copyright (C) 2017-2024 Broadcom. All Rights Reserved. The term *
  * “Broadcom” refers to Broadcom Inc. and/or its subsidiaries.     *
  * Copyright (C) 2004-2016 Emulex.  All rights reserved.           *
  * EMULEX and SLI are trademarks of Emulex.                        *
@@ -169,13 +169,13 @@ lpfc_dev_loss_tmo_callbk(struct fc_rport *rport)
 
        lpfc_printf_vlog(ndlp->vport, KERN_INFO, LOG_NODE,
                         "3181 dev_loss_callbk x%06x, rport x%px flg x%x "
-                        "load_flag x%x refcnt %u state %d xpt x%x\n",
+                        "load_flag x%lx refcnt %u state %d xpt x%x\n",
                         ndlp->nlp_DID, ndlp->rport, ndlp->nlp_flag,
                         vport->load_flag, kref_read(&ndlp->kref),
                         ndlp->nlp_state, ndlp->fc4_xpt_flags);
 
        /* Don't schedule a worker thread event if the vport is going down. */
-       if (vport->load_flag & FC_UNLOADING) {
+       if (test_bit(FC_UNLOADING, &vport->load_flag)) {
                spin_lock_irqsave(&ndlp->lock, iflags);
                ndlp->rport = NULL;
 
@@ -263,7 +263,7 @@ lpfc_dev_loss_tmo_callbk(struct fc_rport *rport)
        } else {
                lpfc_printf_vlog(ndlp->vport, KERN_INFO, LOG_NODE,
                                 "3188 worker thread is stopped %s x%06x, "
-                                " rport x%px flg x%x load_flag x%x refcnt "
+                                " rport x%px flg x%x load_flag x%lx refcnt "
                                 "%d\n", __func__, ndlp->nlp_DID,
                                 ndlp->rport, ndlp->nlp_flag,
                                 vport->load_flag, kref_read(&ndlp->kref));
@@ -911,7 +911,7 @@ lpfc_work_list_done(struct lpfc_hba *phba)
                        free_evt = 0;
                        break;
                case LPFC_EVT_RESET_HBA:
-                       if (!(phba->pport->load_flag & FC_UNLOADING))
+                       if (!test_bit(FC_UNLOADING, &phba->pport->load_flag))
                                lpfc_reset_hba(phba);
                        break;
                }
@@ -1149,7 +1149,6 @@ lpfc_workq_post_event(struct lpfc_hba *phba, void *arg1, void *arg2,
 void
 lpfc_cleanup_rpis(struct lpfc_vport *vport, int remove)
 {
-       struct Scsi_Host *shost = lpfc_shost_from_vport(vport);
        struct lpfc_hba  *phba = vport->phba;
        struct lpfc_nodelist *ndlp, *next_ndlp;
 
@@ -1180,9 +1179,7 @@ lpfc_cleanup_rpis(struct lpfc_vport *vport, int remove)
                if (phba->sli_rev == LPFC_SLI_REV4)
                        lpfc_sli4_unreg_all_rpis(vport);
                lpfc_mbx_unreg_vpi(vport);
-               spin_lock_irq(shost->host_lock);
-               vport->fc_flag |= FC_VPORT_NEEDS_REG_VPI;
-               spin_unlock_irq(shost->host_lock);
+               set_bit(FC_VPORT_NEEDS_REG_VPI, &vport->fc_flag);
        }
 }
 
@@ -1210,7 +1207,7 @@ void
 lpfc_linkdown_port(struct lpfc_vport *vport)
 {
        struct lpfc_hba *phba = vport->phba;
-       struct Scsi_Host  *shost = lpfc_shost_from_vport(vport);
+       struct Scsi_Host *shost = lpfc_shost_from_vport(vport);
 
        if (vport->cfg_enable_fc4_type != LPFC_ENABLE_NVME)
                fc_host_post_event(shost, fc_get_event_number(),
@@ -1223,9 +1220,7 @@ lpfc_linkdown_port(struct lpfc_vport *vport)
        lpfc_port_link_failure(vport);
 
        /* Stop delayed Nport discovery */
-       spin_lock_irq(shost->host_lock);
-       vport->fc_flag &= ~FC_DISC_DELAYED;
-       spin_unlock_irq(shost->host_lock);
+       clear_bit(FC_DISC_DELAYED, &vport->fc_flag);
        del_timer_sync(&vport->delayed_disc_tmo);
 
        if (phba->sli_rev == LPFC_SLI_REV4 &&
@@ -1240,7 +1235,7 @@ int
 lpfc_linkdown(struct lpfc_hba *phba)
 {
        struct lpfc_vport *vport = phba->pport;
-       struct Scsi_Host  *shost = lpfc_shost_from_vport(vport);
+       struct Scsi_Host *shost = lpfc_shost_from_vport(vport);
        struct lpfc_vport **vports;
        LPFC_MBOXQ_t          *mb;
        int i;
@@ -1273,9 +1268,7 @@ lpfc_linkdown(struct lpfc_hba *phba)
                        phba->sli4_hba.link_state.logical_speed =
                                                LPFC_LINK_SPEED_UNKNOWN;
                }
-               spin_lock_irq(shost->host_lock);
-               phba->pport->fc_flag &= ~FC_LBIT;
-               spin_unlock_irq(shost->host_lock);
+               clear_bit(FC_LBIT, &phba->pport->fc_flag);
        }
        vports = lpfc_create_vport_work_array(phba);
        if (vports != NULL) {
@@ -1313,7 +1306,7 @@ lpfc_linkdown(struct lpfc_hba *phba)
 
  skip_unreg_did:
        /* Setup myDID for link up if we are in pt2pt mode */
-       if (phba->pport->fc_flag & FC_PT2PT) {
+       if (test_bit(FC_PT2PT, &phba->pport->fc_flag)) {
                mb = mempool_alloc(phba->mbox_mem_pool, GFP_KERNEL);
                if (mb) {
                        lpfc_config_link(phba, mb);
@@ -1324,8 +1317,9 @@ lpfc_linkdown(struct lpfc_hba *phba)
                                mempool_free(mb, phba->mbox_mem_pool);
                        }
                }
+               clear_bit(FC_PT2PT, &phba->pport->fc_flag);
+               clear_bit(FC_PT2PT_PLOGI, &phba->pport->fc_flag);
                spin_lock_irq(shost->host_lock);
-               phba->pport->fc_flag &= ~(FC_PT2PT | FC_PT2PT_PLOGI);
                phba->pport->rcv_flogi_cnt = 0;
                spin_unlock_irq(shost->host_lock);
        }
@@ -1364,7 +1358,7 @@ lpfc_linkup_port(struct lpfc_vport *vport)
        struct Scsi_Host *shost = lpfc_shost_from_vport(vport);
        struct lpfc_hba  *phba = vport->phba;
 
-       if ((vport->load_flag & FC_UNLOADING) != 0)
+       if (test_bit(FC_UNLOADING, &vport->load_flag))
                return;
 
        lpfc_debugfs_disc_trc(vport, LPFC_DISC_TRC_ELS_CMD,
@@ -1376,19 +1370,22 @@ lpfc_linkup_port(struct lpfc_vport *vport)
                (vport != phba->pport))
                return;
 
-       if (vport->cfg_enable_fc4_type != LPFC_ENABLE_NVME)
-               fc_host_post_event(shost, fc_get_event_number(),
-                                  FCH_EVT_LINKUP, 0);
+       if (phba->defer_flogi_acc_flag) {
+               clear_bit(FC_ABORT_DISCOVERY, &vport->fc_flag);
+               clear_bit(FC_RSCN_MODE, &vport->fc_flag);
+               clear_bit(FC_NLP_MORE, &vport->fc_flag);
+               clear_bit(FC_RSCN_DISCOVERY, &vport->fc_flag);
+       } else {
+               clear_bit(FC_PT2PT, &vport->fc_flag);
+               clear_bit(FC_PT2PT_PLOGI, &vport->fc_flag);
+               clear_bit(FC_ABORT_DISCOVERY, &vport->fc_flag);
+               clear_bit(FC_RSCN_MODE, &vport->fc_flag);
+               clear_bit(FC_NLP_MORE, &vport->fc_flag);
+               clear_bit(FC_RSCN_DISCOVERY, &vport->fc_flag);
+       }
+       set_bit(FC_NDISC_ACTIVE, &vport->fc_flag);
 
        spin_lock_irq(shost->host_lock);
-       if (phba->defer_flogi_acc_flag)
-               vport->fc_flag &= ~(FC_ABORT_DISCOVERY | FC_RSCN_MODE |
-                                   FC_NLP_MORE | FC_RSCN_DISCOVERY);
-       else
-               vport->fc_flag &= ~(FC_PT2PT | FC_PT2PT_PLOGI |
-                                   FC_ABORT_DISCOVERY | FC_RSCN_MODE |
-                                   FC_NLP_MORE | FC_RSCN_DISCOVERY);
-       vport->fc_flag |= FC_NDISC_ACTIVE;
        vport->fc_ns_retry = 0;
        spin_unlock_irq(shost->host_lock);
        lpfc_setup_fdmi_mask(vport);
@@ -1439,7 +1436,6 @@ static void
 lpfc_mbx_cmpl_clear_la(struct lpfc_hba *phba, LPFC_MBOXQ_t *pmb)
 {
        struct lpfc_vport *vport = pmb->vport;
-       struct Scsi_Host  *shost = lpfc_shost_from_vport(vport);
        struct lpfc_sli   *psli = &phba->sli;
        MAILBOX_t *mb = &pmb->u.mb;
        uint32_t control;
@@ -1478,9 +1474,7 @@ out:
                         "0225 Device Discovery completes\n");
        mempool_free(pmb, phba->mbox_mem_pool);
 
-       spin_lock_irq(shost->host_lock);
-       vport->fc_flag &= ~FC_ABORT_DISCOVERY;
-       spin_unlock_irq(shost->host_lock);
+       clear_bit(FC_ABORT_DISCOVERY, &vport->fc_flag);
 
        lpfc_can_disctmo(vport);
 
@@ -1517,8 +1511,8 @@ lpfc_mbx_cmpl_local_config_link(struct lpfc_hba *phba, LPFC_MBOXQ_t *pmb)
                return;
 
        if (phba->fc_topology == LPFC_TOPOLOGY_LOOP &&
-           vport->fc_flag & FC_PUBLIC_LOOP &&
-           !(vport->fc_flag & FC_LBIT)) {
+           test_bit(FC_PUBLIC_LOOP, &vport->fc_flag) &&
+           !test_bit(FC_LBIT, &vport->fc_flag)) {
                        /* Need to wait for FAN - use discovery timer
                         * for timeout.  port_state is identically
                         * LPFC_LOCAL_CFG_LINK while waiting for FAN
@@ -1560,7 +1554,7 @@ lpfc_mbx_cmpl_local_config_link(struct lpfc_hba *phba, LPFC_MBOXQ_t *pmb)
                        lpfc_initial_flogi(vport);
                }
        } else {
-               if (vport->fc_flag & FC_PT2PT)
+               if (test_bit(FC_PT2PT, &vport->fc_flag))
                        lpfc_disc_start(vport);
        }
        return;
@@ -1884,7 +1878,7 @@ lpfc_register_fcf(struct lpfc_hba *phba)
                phba->fcf.fcf_flag |= (FCF_SCAN_DONE | FCF_IN_USE);
                phba->hba_flag &= ~FCF_TS_INPROG;
                if (phba->pport->port_state != LPFC_FLOGI &&
-                   phba->pport->fc_flag & FC_FABRIC) {
+                   test_bit(FC_FABRIC, &phba->pport->fc_flag)) {
                        phba->hba_flag |= FCF_RR_INPROG;
                        spin_unlock_irq(&phba->hbalock);
                        lpfc_initial_flogi(phba->pport);
@@ -2742,7 +2736,7 @@ lpfc_mbx_cmpl_fcf_scan_read_fcf_rec(struct lpfc_hba *phba, LPFC_MBOXQ_t *mboxq)
                                lpfc_printf_log(phba, KERN_INFO, LOG_FIP,
                                                "2836 New FCF matches in-use "
                                                "FCF (x%x), port_state:x%x, "
-                                               "fc_flag:x%x\n",
+                                               "fc_flag:x%lx\n",
                                                phba->fcf.current_rec.fcf_indx,
                                                phba->pport->port_state,
                                                phba->pport->fc_flag);
@@ -3218,7 +3212,6 @@ lpfc_init_vpi_cmpl(struct lpfc_hba *phba, LPFC_MBOXQ_t *mboxq)
 {
        struct lpfc_vport *vport = mboxq->vport;
        struct lpfc_nodelist *ndlp;
-       struct Scsi_Host *shost = lpfc_shost_from_vport(vport);
 
        if (mboxq->u.mb.mbxStatus) {
                lpfc_printf_vlog(vport, KERN_ERR, LOG_TRACE_EVENT,
@@ -3228,9 +3221,7 @@ lpfc_init_vpi_cmpl(struct lpfc_hba *phba, LPFC_MBOXQ_t *mboxq)
                lpfc_vport_set_state(vport, FC_VPORT_FAILED);
                return;
        }
-       spin_lock_irq(shost->host_lock);
-       vport->fc_flag &= ~FC_VPORT_NEEDS_INIT_VPI;
-       spin_unlock_irq(shost->host_lock);
+       clear_bit(FC_VPORT_NEEDS_INIT_VPI, &vport->fc_flag);
 
        /* If this port is physical port or FDISC is done, do reg_vpi */
        if ((phba->pport == vport) || (vport->port_state == LPFC_FDISC)) {
@@ -3328,7 +3319,8 @@ lpfc_start_fdiscs(struct lpfc_hba *phba)
                                                     FC_VPORT_LINKDOWN);
                                continue;
                        }
-                       if (vports[i]->fc_flag & FC_VPORT_NEEDS_INIT_VPI) {
+                       if (test_bit(FC_VPORT_NEEDS_INIT_VPI,
+                                    &vports[i]->fc_flag)) {
                                lpfc_issue_init_vpi(vports[i]);
                                continue;
                        }
@@ -3380,17 +3372,17 @@ lpfc_mbx_cmpl_reg_vfi(struct lpfc_hba *phba, LPFC_MBOXQ_t *mboxq)
         * Unless this was a VFI update and we are in PT2PT mode, then
         * we should drop through to set the port state to ready.
         */
-       if (vport->fc_flag & FC_VFI_REGISTERED)
+       if (test_bit(FC_VFI_REGISTERED, &vport->fc_flag))
                if (!(phba->sli_rev == LPFC_SLI_REV4 &&
-                     vport->fc_flag & FC_PT2PT))
+                     test_bit(FC_PT2PT, &vport->fc_flag)))
                        goto out_free_mem;
 
        /* The VPI is implicitly registered when the VFI is registered */
+       set_bit(FC_VFI_REGISTERED, &vport->fc_flag);
+       clear_bit(FC_VPORT_NEEDS_REG_VPI, &vport->fc_flag);
+       clear_bit(FC_VPORT_NEEDS_INIT_VPI, &vport->fc_flag);
        spin_lock_irq(shost->host_lock);
        vport->vpi_state |= LPFC_VPI_REGISTERED;
-       vport->fc_flag |= FC_VFI_REGISTERED;
-       vport->fc_flag &= ~FC_VPORT_NEEDS_REG_VPI;
-       vport->fc_flag &= ~FC_VPORT_NEEDS_INIT_VPI;
        spin_unlock_irq(shost->host_lock);
 
        /* In case SLI4 FC loopback test, we are ready */
@@ -3401,8 +3393,8 @@ lpfc_mbx_cmpl_reg_vfi(struct lpfc_hba *phba, LPFC_MBOXQ_t *mboxq)
        }
 
        lpfc_printf_vlog(vport, KERN_INFO, LOG_SLI,
-                        "3313 cmpl reg vfi  port_state:%x fc_flag:%x myDid:%x "
-                        "alpacnt:%d LinkState:%x topology:%x\n",
+                        "3313 cmpl reg vfi  port_state:%x fc_flag:%lx "
+                        "myDid:%x alpacnt:%d LinkState:%x topology:%x\n",
                         vport->port_state, vport->fc_flag, vport->fc_myDID,
                         vport->phba->alpa_map[0],
                         phba->link_state, phba->fc_topology);
@@ -3412,14 +3404,14 @@ lpfc_mbx_cmpl_reg_vfi(struct lpfc_hba *phba, LPFC_MBOXQ_t *mboxq)
                 * For private loop or for NPort pt2pt,
                 * just start discovery and we are done.
                 */
-               if ((vport->fc_flag & FC_PT2PT) ||
-                   ((phba->fc_topology == LPFC_TOPOLOGY_LOOP) &&
-                   !(vport->fc_flag & FC_PUBLIC_LOOP))) {
+               if (test_bit(FC_PT2PT, &vport->fc_flag) ||
+                   (phba->fc_topology == LPFC_TOPOLOGY_LOOP &&
+                   !test_bit(FC_PUBLIC_LOOP, &vport->fc_flag))) {
 
                        /* Use loop map to make discovery list */
                        lpfc_disc_list_loopmap(vport);
                        /* Start discovery */
-                       if (vport->fc_flag & FC_PT2PT)
+                       if (test_bit(FC_PT2PT, &vport->fc_flag))
                                vport->port_state = LPFC_VPORT_READY;
                        else
                                lpfc_disc_start(vport);
@@ -3496,11 +3488,9 @@ lpfc_mbx_process_link_up(struct lpfc_hba *phba, struct lpfc_mbx_read_top *la)
 {
        struct lpfc_vport *vport = phba->pport;
        LPFC_MBOXQ_t *sparam_mbox, *cfglink_mbox = NULL;
-       struct Scsi_Host *shost;
        int i;
        int rc;
        struct fcf_record *fcf_record;
-       uint32_t fc_flags = 0;
        unsigned long iflags;
 
        spin_lock_irqsave(&phba->hbalock, iflags);
@@ -3537,7 +3527,6 @@ lpfc_mbx_process_link_up(struct lpfc_hba *phba, struct lpfc_mbx_read_top *la)
        phba->fc_topology = bf_get(lpfc_mbx_read_top_topology, la);
        phba->link_flag &= ~(LS_NPIV_FAB_SUPPORTED | LS_CT_VEN_RPA);
 
-       shost = lpfc_shost_from_vport(vport);
        if (phba->fc_topology == LPFC_TOPOLOGY_LOOP) {
                phba->sli3_options &= ~LPFC_SLI3_NPIV_ENABLED;
 
@@ -3550,7 +3539,7 @@ lpfc_mbx_process_link_up(struct lpfc_hba *phba, struct lpfc_mbx_read_top *la)
                                "topology\n");
                                /* Get Loop Map information */
                if (bf_get(lpfc_mbx_read_top_il, la))
-                       fc_flags |= FC_LBIT;
+                       set_bit(FC_LBIT, &vport->fc_flag);
 
                vport->fc_myDID = bf_get(lpfc_mbx_read_top_alpa_granted, la);
                i = la->lilpBde64.tus.f.bdeSize;
@@ -3599,16 +3588,10 @@ lpfc_mbx_process_link_up(struct lpfc_hba *phba, struct lpfc_mbx_read_top *la)
                                phba->sli3_options |= LPFC_SLI3_NPIV_ENABLED;
                }
                vport->fc_myDID = phba->fc_pref_DID;
-               fc_flags |= FC_LBIT;
+               set_bit(FC_LBIT, &vport->fc_flag);
        }
        spin_unlock_irqrestore(&phba->hbalock, iflags);
 
-       if (fc_flags) {
-               spin_lock_irqsave(shost->host_lock, iflags);
-               vport->fc_flag |= fc_flags;
-               spin_unlock_irqrestore(shost->host_lock, iflags);
-       }
-
        lpfc_linkup(phba);
        sparam_mbox = NULL;
 
@@ -3751,13 +3734,11 @@ void
 lpfc_mbx_cmpl_read_topology(struct lpfc_hba *phba, LPFC_MBOXQ_t *pmb)
 {
        struct lpfc_vport *vport = pmb->vport;
-       struct Scsi_Host  *shost = lpfc_shost_from_vport(vport);
        struct lpfc_mbx_read_top *la;
        struct lpfc_sli_ring *pring;
        MAILBOX_t *mb = &pmb->u.mb;
        struct lpfc_dmabuf *mp = (struct lpfc_dmabuf *)(pmb->ctx_buf);
        uint8_t attn_type;
-       unsigned long iflags;
 
        /* Unblock ELS traffic */
        pring = lpfc_phba_elsring(phba);
@@ -3779,12 +3760,10 @@ lpfc_mbx_cmpl_read_topology(struct lpfc_hba *phba, LPFC_MBOXQ_t *pmb)
 
        memcpy(&phba->alpa_map[0], mp->virt, 128);
 
-       spin_lock_irqsave(shost->host_lock, iflags);
        if (bf_get(lpfc_mbx_read_top_pb, la))
-               vport->fc_flag |= FC_BYPASSED_MODE;
+               set_bit(FC_BYPASSED_MODE, &vport->fc_flag);
        else
-               vport->fc_flag &= ~FC_BYPASSED_MODE;
-       spin_unlock_irqrestore(shost->host_lock, iflags);
+               clear_bit(FC_BYPASSED_MODE, &vport->fc_flag);
 
        if (phba->fc_eventTag <= la->eventTag) {
                phba->fc_stat.LinkMultiEvent++;
@@ -3832,20 +3811,20 @@ lpfc_mbx_cmpl_read_topology(struct lpfc_hba *phba, LPFC_MBOXQ_t *pmb)
                        lpfc_printf_log(phba, KERN_ERR, LOG_LINK_EVENT,
                                "1308 Link Down Event in loop back mode "
                                "x%x received "
-                               "Data: x%x x%x x%x\n",
+                               "Data: x%x x%x x%lx\n",
                                la->eventTag, phba->fc_eventTag,
                                phba->pport->port_state, vport->fc_flag);
                else if (attn_type == LPFC_ATT_UNEXP_WWPN)
                        lpfc_printf_log(phba, KERN_ERR, LOG_LINK_EVENT,
                                "1313 Link Down Unexpected FA WWPN Event x%x "
-                               "received Data: x%x x%x x%x x%x\n",
+                               "received Data: x%x x%x x%lx x%x\n",
                                la->eventTag, phba->fc_eventTag,
                                phba->pport->port_state, vport->fc_flag,
                                bf_get(lpfc_mbx_read_top_fa, la));
                else
                        lpfc_printf_log(phba, KERN_ERR, LOG_LINK_EVENT,
                                "1305 Link Down Event x%x received "
-                               "Data: x%x x%x x%x x%x\n",
+                               "Data: x%x x%x x%lx x%x\n",
                                la->eventTag, phba->fc_eventTag,
                                phba->pport->port_state, vport->fc_flag,
                                bf_get(lpfc_mbx_read_top_fa, la));
@@ -3945,13 +3924,14 @@ lpfc_mbx_cmpl_unreg_vpi(struct lpfc_hba *phba, LPFC_MBOXQ_t *pmb)
                lpfc_printf_vlog(vport, KERN_ERR, LOG_TRACE_EVENT,
                        "2798 Unreg_vpi failed vpi 0x%x, mb status = 0x%x\n",
                        vport->vpi, mb->mbxStatus);
-               if (!(phba->pport->load_flag & FC_UNLOADING))
+               if (!test_bit(FC_UNLOADING, &phba->pport->load_flag))
                        lpfc_workq_post_event(phba, NULL, NULL,
                                LPFC_EVT_RESET_HBA);
        }
+
+       set_bit(FC_VPORT_NEEDS_REG_VPI, &vport->fc_flag);
        spin_lock_irq(shost->host_lock);
        vport->vpi_state &= ~LPFC_VPI_REGISTERED;
-       vport->fc_flag |= FC_VPORT_NEEDS_REG_VPI;
        spin_unlock_irq(shost->host_lock);
        mempool_free(pmb, phba->mbox_mem_pool);
        lpfc_cleanup_vports_rrqs(vport, NULL);
@@ -3959,7 +3939,7 @@ lpfc_mbx_cmpl_unreg_vpi(struct lpfc_hba *phba, LPFC_MBOXQ_t *pmb)
         * This shost reference might have been taken at the beginning of
         * lpfc_vport_delete()
         */
-       if ((vport->load_flag & FC_UNLOADING) && (vport != phba->pport))
+       if (test_bit(FC_UNLOADING, &vport->load_flag) && vport != phba->pport)
                scsi_host_put(shost);
 }
 
@@ -4002,9 +3982,8 @@ lpfc_mbx_cmpl_reg_vpi(struct lpfc_hba *phba, LPFC_MBOXQ_t *pmb)
                                 "0912 cmpl_reg_vpi, mb status = 0x%x\n",
                                 mb->mbxStatus);
                lpfc_vport_set_state(vport, FC_VPORT_FAILED);
-               spin_lock_irq(shost->host_lock);
-               vport->fc_flag &= ~(FC_FABRIC | FC_PUBLIC_LOOP);
-               spin_unlock_irq(shost->host_lock);
+               clear_bit(FC_FABRIC, &vport->fc_flag);
+               clear_bit(FC_PUBLIC_LOOP, &vport->fc_flag);
                vport->fc_myDID = 0;
 
                if ((vport->cfg_enable_fc4_type == LPFC_ENABLE_BOTH) ||
@@ -4017,19 +3996,17 @@ lpfc_mbx_cmpl_reg_vpi(struct lpfc_hba *phba, LPFC_MBOXQ_t *pmb)
                goto out;
        }
 
+       clear_bit(FC_VPORT_NEEDS_REG_VPI, &vport->fc_flag);
        spin_lock_irq(shost->host_lock);
        vport->vpi_state |= LPFC_VPI_REGISTERED;
-       vport->fc_flag &= ~FC_VPORT_NEEDS_REG_VPI;
        spin_unlock_irq(shost->host_lock);
        vport->num_disc_nodes = 0;
        /* go thru NPR list and issue ELS PLOGIs */
-       if (vport->fc_npr_cnt)
+       if (atomic_read(&vport->fc_npr_cnt))
                lpfc_els_disc_plogi(vport);
 
        if (!vport->num_disc_nodes) {
-               spin_lock_irq(shost->host_lock);
-               vport->fc_flag &= ~FC_NDISC_ACTIVE;
-               spin_unlock_irq(shost->host_lock);
+               clear_bit(FC_NDISC_ACTIVE, &vport->fc_flag);
                lpfc_can_disctmo(vport);
        }
        vport->port_state = LPFC_VPORT_READY;
@@ -4193,7 +4170,6 @@ lpfc_mbx_cmpl_fabric_reg_login(struct lpfc_hba *phba, LPFC_MBOXQ_t *pmb)
        struct lpfc_vport *vport = pmb->vport;
        MAILBOX_t *mb = &pmb->u.mb;
        struct lpfc_nodelist *ndlp = (struct lpfc_nodelist *)pmb->ctx_ndlp;
-       struct Scsi_Host *shost;
 
        pmb->ctx_ndlp = NULL;
 
@@ -4232,14 +4208,8 @@ lpfc_mbx_cmpl_fabric_reg_login(struct lpfc_hba *phba, LPFC_MBOXQ_t *pmb)
        if (vport->port_state == LPFC_FABRIC_CFG_LINK) {
                /* when physical port receive logo donot start
                 * vport discovery */
-               if (!(vport->fc_flag & FC_LOGO_RCVD_DID_CHNG))
+               if (!test_and_clear_bit(FC_LOGO_RCVD_DID_CHNG, &vport->fc_flag))
                        lpfc_start_fdiscs(phba);
-               else {
-                       shost = lpfc_shost_from_vport(vport);
-                       spin_lock_irq(shost->host_lock);
-                       vport->fc_flag &= ~FC_LOGO_RCVD_DID_CHNG ;
-                       spin_unlock_irq(shost->host_lock);
-               }
                lpfc_do_scr_ns_plogi(phba, vport);
        }
 
@@ -4520,7 +4490,7 @@ lpfc_register_remote_port(struct lpfc_vport *vport, struct lpfc_nodelist *ndlp)
                              ndlp->nlp_DID, ndlp->nlp_flag, ndlp->nlp_type);
 
        /* Don't add the remote port if unloading. */
-       if (vport->load_flag & FC_UNLOADING)
+       if (test_bit(FC_UNLOADING, &vport->load_flag))
                return;
 
        ndlp->rport = rport = fc_remote_port_add(shost, 0, &rport_ids);
@@ -4600,40 +4570,35 @@ lpfc_unregister_remote_port(struct lpfc_nodelist *ndlp)
 static void
 lpfc_nlp_counters(struct lpfc_vport *vport, int state, int count)
 {
-       struct Scsi_Host *shost = lpfc_shost_from_vport(vport);
-       unsigned long iflags;
-
-       spin_lock_irqsave(shost->host_lock, iflags);
        switch (state) {
        case NLP_STE_UNUSED_NODE:
-               vport->fc_unused_cnt += count;
+               atomic_add(count, &vport->fc_unused_cnt);
                break;
        case NLP_STE_PLOGI_ISSUE:
-               vport->fc_plogi_cnt += count;
+               atomic_add(count, &vport->fc_plogi_cnt);
                break;
        case NLP_STE_ADISC_ISSUE:
-               vport->fc_adisc_cnt += count;
+               atomic_add(count, &vport->fc_adisc_cnt);
                break;
        case NLP_STE_REG_LOGIN_ISSUE:
-               vport->fc_reglogin_cnt += count;
+               atomic_add(count, &vport->fc_reglogin_cnt);
                break;
        case NLP_STE_PRLI_ISSUE:
-               vport->fc_prli_cnt += count;
+               atomic_add(count, &vport->fc_prli_cnt);
                break;
        case NLP_STE_UNMAPPED_NODE:
-               vport->fc_unmap_cnt += count;
+               atomic_add(count, &vport->fc_unmap_cnt);
                break;
        case NLP_STE_MAPPED_NODE:
-               vport->fc_map_cnt += count;
+               atomic_add(count, &vport->fc_map_cnt);
                break;
        case NLP_STE_NPR_NODE:
-               if (vport->fc_npr_cnt == 0 && count == -1)
-                       vport->fc_npr_cnt = 0;
+               if (!atomic_read(&vport->fc_npr_cnt) && count == -1)
+                       atomic_set(&vport->fc_npr_cnt, 0);
                else
-                       vport->fc_npr_cnt += count;
+                       atomic_add(count, &vport->fc_npr_cnt);
                break;
        }
-       spin_unlock_irqrestore(shost->host_lock, iflags);
 }
 
 /* Register a node with backend if not already done */
@@ -4865,10 +4830,10 @@ void
 lpfc_nlp_set_state(struct lpfc_vport *vport, struct lpfc_nodelist *ndlp,
                   int state)
 {
-       struct Scsi_Host *shost = lpfc_shost_from_vport(vport);
        int  old_state = ndlp->nlp_state;
        int node_dropped = ndlp->nlp_flag & NLP_DROPPED;
        char name1[16], name2[16];
+       unsigned long iflags;
 
        lpfc_printf_vlog(vport, KERN_INFO, LOG_NODE,
                         "0904 NPort state transition x%06x, %s -> %s\n",
@@ -4895,9 +4860,9 @@ lpfc_nlp_set_state(struct lpfc_vport *vport, struct lpfc_nodelist *ndlp,
        }
 
        if (list_empty(&ndlp->nlp_listp)) {
-               spin_lock_irq(shost->host_lock);
+               spin_lock_irqsave(&vport->fc_nodes_list_lock, iflags);
                list_add_tail(&ndlp->nlp_listp, &vport->fc_nodes);
-               spin_unlock_irq(shost->host_lock);
+               spin_unlock_irqrestore(&vport->fc_nodes_list_lock, iflags);
        } else if (old_state)
                lpfc_nlp_counters(vport, old_state, -1);
 
@@ -4909,26 +4874,26 @@ lpfc_nlp_set_state(struct lpfc_vport *vport, struct lpfc_nodelist *ndlp,
 void
 lpfc_enqueue_node(struct lpfc_vport *vport, struct lpfc_nodelist *ndlp)
 {
-       struct Scsi_Host *shost = lpfc_shost_from_vport(vport);
+       unsigned long iflags;
 
        if (list_empty(&ndlp->nlp_listp)) {
-               spin_lock_irq(shost->host_lock);
+               spin_lock_irqsave(&vport->fc_nodes_list_lock, iflags);
                list_add_tail(&ndlp->nlp_listp, &vport->fc_nodes);
-               spin_unlock_irq(shost->host_lock);
+               spin_unlock_irqrestore(&vport->fc_nodes_list_lock, iflags);
        }
 }
 
 void
 lpfc_dequeue_node(struct lpfc_vport *vport, struct lpfc_nodelist *ndlp)
 {
-       struct Scsi_Host *shost = lpfc_shost_from_vport(vport);
+       unsigned long iflags;
 
        lpfc_cancel_retry_delay_tmo(vport, ndlp);
        if (ndlp->nlp_state && !list_empty(&ndlp->nlp_listp))
                lpfc_nlp_counters(vport, ndlp->nlp_state, -1);
-       spin_lock_irq(shost->host_lock);
+       spin_lock_irqsave(&vport->fc_nodes_list_lock, iflags);
        list_del_init(&ndlp->nlp_listp);
-       spin_unlock_irq(shost->host_lock);
+       spin_unlock_irqrestore(&vport->fc_nodes_list_lock, iflags);
        lpfc_nlp_state_cleanup(vport, ndlp, ndlp->nlp_state,
                                NLP_STE_UNUSED_NODE);
 }
@@ -5003,7 +4968,6 @@ lpfc_drop_node(struct lpfc_vport *vport, struct lpfc_nodelist *ndlp)
 void
 lpfc_set_disctmo(struct lpfc_vport *vport)
 {
-       struct Scsi_Host *shost = lpfc_shost_from_vport(vport);
        struct lpfc_hba  *phba = vport->phba;
        uint32_t tmo;
 
@@ -5025,17 +4989,16 @@ lpfc_set_disctmo(struct lpfc_vport *vport)
        }
 
        mod_timer(&vport->fc_disctmo, jiffies + msecs_to_jiffies(1000 * tmo));
-       spin_lock_irq(shost->host_lock);
-       vport->fc_flag |= FC_DISC_TMO;
-       spin_unlock_irq(shost->host_lock);
+       set_bit(FC_DISC_TMO, &vport->fc_flag);
 
        /* Start Discovery Timer state <hba_state> */
        lpfc_printf_vlog(vport, KERN_INFO, LOG_DISCOVERY,
                         "0247 Start Discovery Timer state x%x "
                         "Data: x%x x%lx x%x x%x\n",
                         vport->port_state, tmo,
-                        (unsigned long)&vport->fc_disctmo, vport->fc_plogi_cnt,
-                        vport->fc_adisc_cnt);
+                        (unsigned long)&vport->fc_disctmo,
+                        atomic_read(&vport->fc_plogi_cnt),
+                        atomic_read(&vport->fc_adisc_cnt));
 
        return;
 }
@@ -5046,7 +5009,6 @@ lpfc_set_disctmo(struct lpfc_vport *vport)
 int
 lpfc_can_disctmo(struct lpfc_vport *vport)
 {
-       struct Scsi_Host *shost = lpfc_shost_from_vport(vport);
        unsigned long iflags;
 
        lpfc_debugfs_disc_trc(vport, LPFC_DISC_TRC_ELS_CMD,
@@ -5054,11 +5016,9 @@ lpfc_can_disctmo(struct lpfc_vport *vport)
                vport->port_state, vport->fc_ns_retry, vport->fc_flag);
 
        /* Turn off discovery timer if its running */
-       if (vport->fc_flag & FC_DISC_TMO ||
+       if (test_bit(FC_DISC_TMO, &vport->fc_flag) ||
            timer_pending(&vport->fc_disctmo)) {
-               spin_lock_irqsave(shost->host_lock, iflags);
-               vport->fc_flag &= ~FC_DISC_TMO;
-               spin_unlock_irqrestore(shost->host_lock, iflags);
+               clear_bit(FC_DISC_TMO, &vport->fc_flag);
                del_timer_sync(&vport->fc_disctmo);
                spin_lock_irqsave(&vport->work_port_lock, iflags);
                vport->work_port_events &= ~WORKER_DISC_TMO;
@@ -5068,9 +5028,10 @@ lpfc_can_disctmo(struct lpfc_vport *vport)
        /* Cancel Discovery Timer state <hba_state> */
        lpfc_printf_vlog(vport, KERN_INFO, LOG_DISCOVERY,
                         "0248 Cancel Discovery Timer state x%x "
-                        "Data: x%x x%x x%x\n",
+                        "Data: x%lx x%x x%x\n",
                         vport->port_state, vport->fc_flag,
-                        vport->fc_plogi_cnt, vport->fc_adisc_cnt);
+                        atomic_read(&vport->fc_plogi_cnt),
+                        atomic_read(&vport->fc_adisc_cnt));
        return 0;
 }
 
@@ -5274,13 +5235,13 @@ lpfc_set_unreg_login_mbx_cmpl(struct lpfc_hba *phba, struct lpfc_vport *vport,
                mbox->mbox_cmpl = lpfc_nlp_logo_unreg;
 
        } else if (phba->sli_rev == LPFC_SLI_REV4 &&
-                  (!(vport->load_flag & FC_UNLOADING)) &&
+                  !test_bit(FC_UNLOADING, &vport->load_flag) &&
                    (bf_get(lpfc_sli_intf_if_type, &phba->sli4_hba.sli_intf) >=
                                      LPFC_SLI_INTF_IF_TYPE_2) &&
                    (kref_read(&ndlp->kref) > 0)) {
                mbox->mbox_cmpl = lpfc_sli4_unreg_rpi_cmpl_clr;
        } else {
-               if (vport->load_flag & FC_UNLOADING) {
+               if (test_bit(FC_UNLOADING, &vport->load_flag)) {
                        if (phba->sli_rev == LPFC_SLI_REV4) {
                                spin_lock_irqsave(&ndlp->lock, iflags);
                                ndlp->nlp_flag |= NLP_RELEASE_RPI;
@@ -5356,7 +5317,7 @@ lpfc_unreg_rpi(struct lpfc_vport *vport, struct lpfc_nodelist *ndlp)
                                acc_plogi = 0;
                        if (((ndlp->nlp_DID & Fabric_DID_MASK) !=
                            Fabric_DID_MASK) &&
-                           (!(vport->fc_flag & FC_OFFLINE_MODE)))
+                           (!test_bit(FC_OFFLINE_MODE, &vport->fc_flag)))
                                ndlp->nlp_flag |= NLP_UNREG_INP;
 
                        lpfc_printf_vlog(vport, KERN_INFO,
@@ -5388,7 +5349,7 @@ lpfc_unreg_rpi(struct lpfc_vport *vport, struct lpfc_nodelist *ndlp)
                         * will issue a LOGO here and keep the rpi alive if
                         * not unloading.
                         */
-                       if (!(vport->load_flag & FC_UNLOADING)) {
+                       if (!test_bit(FC_UNLOADING, &vport->load_flag)) {
                                ndlp->nlp_flag &= ~NLP_UNREG_INP;
                                lpfc_issue_els_logo(vport, ndlp, 0);
                                ndlp->nlp_prev_state = ndlp->nlp_state;
@@ -5424,8 +5385,8 @@ lpfc_unreg_hba_rpis(struct lpfc_hba *phba)
 {
        struct lpfc_vport **vports;
        struct lpfc_nodelist *ndlp;
-       struct Scsi_Host *shost;
        int i;
+       unsigned long iflags;
 
        vports = lpfc_create_vport_work_array(phba);
        if (!vports) {
@@ -5434,17 +5395,18 @@ lpfc_unreg_hba_rpis(struct lpfc_hba *phba)
                return;
        }
        for (i = 0; i <= phba->max_vports && vports[i] != NULL; i++) {
-               shost = lpfc_shost_from_vport(vports[i]);
-               spin_lock_irq(shost->host_lock);
+               spin_lock_irqsave(&vports[i]->fc_nodes_list_lock, iflags);
                list_for_each_entry(ndlp, &vports[i]->fc_nodes, nlp_listp) {
                        if (ndlp->nlp_flag & NLP_RPI_REGISTERED) {
                                /* The mempool_alloc might sleep */
-                               spin_unlock_irq(shost->host_lock);
+                               spin_unlock_irqrestore(&vports[i]->fc_nodes_list_lock,
+                                                      iflags);
                                lpfc_unreg_rpi(vports[i], ndlp);
-                               spin_lock_irq(shost->host_lock);
+                               spin_lock_irqsave(&vports[i]->fc_nodes_list_lock,
+                                                 iflags);
                        }
                }
-               spin_unlock_irq(shost->host_lock);
+               spin_unlock_irqrestore(&vports[i]->fc_nodes_list_lock, iflags);
        }
        lpfc_destroy_vport_work_array(phba, vports);
 }
@@ -5686,12 +5648,11 @@ lpfc_findnode_did(struct lpfc_vport *vport, uint32_t did)
 struct lpfc_nodelist *
 lpfc_findnode_mapped(struct lpfc_vport *vport)
 {
-       struct Scsi_Host *shost = lpfc_shost_from_vport(vport);
        struct lpfc_nodelist *ndlp;
        uint32_t data1;
        unsigned long iflags;
 
-       spin_lock_irqsave(shost->host_lock, iflags);
+       spin_lock_irqsave(&vport->fc_nodes_list_lock, iflags);
 
        list_for_each_entry(ndlp, &vport->fc_nodes, nlp_listp) {
                if (ndlp->nlp_state == NLP_STE_UNMAPPED_NODE ||
@@ -5700,7 +5661,8 @@ lpfc_findnode_mapped(struct lpfc_vport *vport)
                                 ((uint32_t)ndlp->nlp_xri << 16) |
                                 ((uint32_t)ndlp->nlp_type << 8) |
                                 ((uint32_t)ndlp->nlp_rpi & 0xff));
-                       spin_unlock_irqrestore(shost->host_lock, iflags);
+                       spin_unlock_irqrestore(&vport->fc_nodes_list_lock,
+                                              iflags);
                        lpfc_printf_vlog(vport, KERN_INFO, LOG_NODE_VERBOSE,
                                         "2025 FIND node DID MAPPED "
                                         "Data: x%px x%x x%x x%x x%px\n",
@@ -5710,7 +5672,7 @@ lpfc_findnode_mapped(struct lpfc_vport *vport)
                        return ndlp;
                }
        }
-       spin_unlock_irqrestore(shost->host_lock, iflags);
+       spin_unlock_irqrestore(&vport->fc_nodes_list_lock, iflags);
 
        /* FIND node did <did> NOT FOUND */
        lpfc_printf_vlog(vport, KERN_INFO, LOG_NODE,
@@ -5727,7 +5689,7 @@ lpfc_setup_disc_node(struct lpfc_vport *vport, uint32_t did)
        if (!ndlp) {
                if (vport->phba->nvmet_support)
                        return NULL;
-               if ((vport->fc_flag & FC_RSCN_MODE) != 0 &&
+               if (test_bit(FC_RSCN_MODE, &vport->fc_flag) &&
                    lpfc_rscn_payload_check(vport, did) == 0)
                        return NULL;
                ndlp = lpfc_nlp_init(vport, did);
@@ -5737,7 +5699,7 @@ lpfc_setup_disc_node(struct lpfc_vport *vport, uint32_t did)
 
                lpfc_printf_vlog(vport, KERN_INFO, LOG_DISCOVERY,
                                 "6453 Setup New Node 2B_DISC x%x "
-                                "Data:x%x x%x x%x\n",
+                                "Data:x%x x%x x%lx\n",
                                 ndlp->nlp_DID, ndlp->nlp_flag,
                                 ndlp->nlp_state, vport->fc_flag);
 
@@ -5751,8 +5713,8 @@ lpfc_setup_disc_node(struct lpfc_vport *vport, uint32_t did)
         * The goal is to allow the target to reset its state and clear
         * pending IO in preparation for the initiator to recover.
         */
-       if ((vport->fc_flag & FC_RSCN_MODE) &&
-           !(vport->fc_flag & FC_NDISC_ACTIVE)) {
+       if (test_bit(FC_RSCN_MODE, &vport->fc_flag) &&
+           !test_bit(FC_NDISC_ACTIVE, &vport->fc_flag)) {
                if (lpfc_rscn_payload_check(vport, did)) {
 
                        /* Since this node is marked for discovery,
@@ -5762,7 +5724,7 @@ lpfc_setup_disc_node(struct lpfc_vport *vport, uint32_t did)
 
                        lpfc_printf_vlog(vport, KERN_INFO, LOG_DISCOVERY,
                                         "6455 Setup RSCN Node 2B_DISC x%x "
-                                        "Data:x%x x%x x%x\n",
+                                        "Data:x%x x%x x%lx\n",
                                         ndlp->nlp_DID, ndlp->nlp_flag,
                                         ndlp->nlp_state, vport->fc_flag);
 
@@ -5774,14 +5736,6 @@ lpfc_setup_disc_node(struct lpfc_vport *vport, uint32_t did)
                        if (vport->phba->nvmet_support)
                                return ndlp;
 
-                       /* If we've already received a PLOGI from this NPort
-                        * we don't need to try to discover it again.
-                        */
-                       if (ndlp->nlp_flag & NLP_RCV_PLOGI &&
-                           !(ndlp->nlp_type &
-                            (NLP_FCP_TARGET | NLP_NVME_TARGET)))
-                               return NULL;
-
                        if (ndlp->nlp_state > NLP_STE_UNUSED_NODE &&
                            ndlp->nlp_state < NLP_STE_PRLI_ISSUE) {
                                lpfc_disc_state_machine(vport, ndlp, NULL,
@@ -5794,7 +5748,7 @@ lpfc_setup_disc_node(struct lpfc_vport *vport, uint32_t did)
                } else {
                        lpfc_printf_vlog(vport, KERN_INFO, LOG_DISCOVERY,
                                         "6456 Skip Setup RSCN Node x%x "
-                                        "Data:x%x x%x x%x\n",
+                                        "Data:x%x x%x x%lx\n",
                                         ndlp->nlp_DID, ndlp->nlp_flag,
                                         ndlp->nlp_state, vport->fc_flag);
                        ndlp = NULL;
@@ -5802,7 +5756,7 @@ lpfc_setup_disc_node(struct lpfc_vport *vport, uint32_t did)
        } else {
                lpfc_printf_vlog(vport, KERN_INFO, LOG_DISCOVERY,
                                 "6457 Setup Active Node 2B_DISC x%x "
-                                "Data:x%x x%x x%x\n",
+                                "Data:x%x x%x x%lx\n",
                                 ndlp->nlp_DID, ndlp->nlp_flag,
                                 ndlp->nlp_state, vport->fc_flag);
 
@@ -5930,7 +5884,6 @@ lpfc_issue_reg_vpi(struct lpfc_hba *phba, struct lpfc_vport *vport)
 void
 lpfc_disc_start(struct lpfc_vport *vport)
 {
-       struct Scsi_Host *shost = lpfc_shost_from_vport(vport);
        struct lpfc_hba  *phba = vport->phba;
        uint32_t num_sent;
        uint32_t clear_la_pending;
@@ -5958,9 +5911,11 @@ lpfc_disc_start(struct lpfc_vport *vport)
        /* Start Discovery state <hba_state> */
        lpfc_printf_vlog(vport, KERN_INFO, LOG_DISCOVERY,
                         "0202 Start Discovery port state x%x "
-                        "flg x%x Data: x%x x%x x%x\n",
-                        vport->port_state, vport->fc_flag, vport->fc_plogi_cnt,
-                        vport->fc_adisc_cnt, vport->fc_npr_cnt);
+                        "flg x%lx Data: x%x x%x x%x\n",
+                        vport->port_state, vport->fc_flag,
+                        atomic_read(&vport->fc_plogi_cnt),
+                        atomic_read(&vport->fc_adisc_cnt),
+                        atomic_read(&vport->fc_npr_cnt));
 
        /* First do ADISCs - if any */
        num_sent = lpfc_els_disc_adisc(vport);
@@ -5970,8 +5925,8 @@ lpfc_disc_start(struct lpfc_vport *vport)
 
        /* Register the VPI for SLI3, NPIV only. */
        if ((phba->sli3_options & LPFC_SLI3_NPIV_ENABLED) &&
-           !(vport->fc_flag & FC_PT2PT) &&
-           !(vport->fc_flag & FC_RSCN_MODE) &&
+           !test_bit(FC_PT2PT, &vport->fc_flag) &&
+           !test_bit(FC_RSCN_MODE, &vport->fc_flag) &&
            (phba->sli_rev < LPFC_SLI_REV4)) {
                lpfc_issue_clear_la(phba, vport);
                lpfc_issue_reg_vpi(phba, vport);
@@ -5986,16 +5941,14 @@ lpfc_disc_start(struct lpfc_vport *vport)
                /* If we get here, there is nothing to ADISC */
                lpfc_issue_clear_la(phba, vport);
 
-               if (!(vport->fc_flag & FC_ABORT_DISCOVERY)) {
+               if (!test_bit(FC_ABORT_DISCOVERY, &vport->fc_flag)) {
                        vport->num_disc_nodes = 0;
                        /* go thru NPR nodes and issue ELS PLOGIs */
-                       if (vport->fc_npr_cnt)
+                       if (atomic_read(&vport->fc_npr_cnt))
                                lpfc_els_disc_plogi(vport);
 
                        if (!vport->num_disc_nodes) {
-                               spin_lock_irq(shost->host_lock);
-                               vport->fc_flag &= ~FC_NDISC_ACTIVE;
-                               spin_unlock_irq(shost->host_lock);
+                               clear_bit(FC_NDISC_ACTIVE, &vport->fc_flag);
                                lpfc_can_disctmo(vport);
                        }
                }
@@ -6007,18 +5960,17 @@ lpfc_disc_start(struct lpfc_vport *vport)
                if (num_sent)
                        return;
 
-               if (vport->fc_flag & FC_RSCN_MODE) {
+               if (test_bit(FC_RSCN_MODE, &vport->fc_flag)) {
                        /* Check to see if more RSCNs came in while we
                         * were processing this one.
                         */
-                       if ((vport->fc_rscn_id_cnt == 0) &&
-                           (!(vport->fc_flag & FC_RSCN_DISCOVERY))) {
-                               spin_lock_irq(shost->host_lock);
-                               vport->fc_flag &= ~FC_RSCN_MODE;
-                               spin_unlock_irq(shost->host_lock);
+                       if (vport->fc_rscn_id_cnt == 0 &&
+                           !test_bit(FC_RSCN_DISCOVERY, &vport->fc_flag)) {
+                               clear_bit(FC_RSCN_MODE, &vport->fc_flag);
                                lpfc_can_disctmo(vport);
-                       } else
+                       } else {
                                lpfc_els_handle_rscn(vport);
+                       }
                }
        }
        return;
@@ -6085,7 +6037,8 @@ lpfc_disc_flush_list(struct lpfc_vport *vport)
        struct lpfc_nodelist *ndlp, *next_ndlp;
        struct lpfc_hba *phba = vport->phba;
 
-       if (vport->fc_plogi_cnt || vport->fc_adisc_cnt) {
+       if (atomic_read(&vport->fc_plogi_cnt) ||
+           atomic_read(&vport->fc_adisc_cnt)) {
                list_for_each_entry_safe(ndlp, next_ndlp, &vport->fc_nodes,
                                         nlp_listp) {
                        if (ndlp->nlp_state == NLP_STE_PLOGI_ISSUE ||
@@ -6166,20 +6119,15 @@ lpfc_disc_timeout(struct timer_list *t)
 static void
 lpfc_disc_timeout_handler(struct lpfc_vport *vport)
 {
-       struct Scsi_Host *shost = lpfc_shost_from_vport(vport);
        struct lpfc_hba  *phba = vport->phba;
        struct lpfc_sli  *psli = &phba->sli;
        struct lpfc_nodelist *ndlp, *next_ndlp;
        LPFC_MBOXQ_t *initlinkmbox;
        int rc, clrlaerr = 0;
 
-       if (!(vport->fc_flag & FC_DISC_TMO))
+       if (!test_and_clear_bit(FC_DISC_TMO, &vport->fc_flag))
                return;
 
-       spin_lock_irq(shost->host_lock);
-       vport->fc_flag &= ~FC_DISC_TMO;
-       spin_unlock_irq(shost->host_lock);
-
        lpfc_debugfs_disc_trc(vport, LPFC_DISC_TRC_ELS_CMD,
                "disc timeout:    state:x%x rtry:x%x flg:x%x",
                vport->port_state, vport->fc_ns_retry, vport->fc_flag);
@@ -6333,7 +6281,7 @@ restart_disc:
                break;
 
        case LPFC_VPORT_READY:
-               if (vport->fc_flag & FC_RSCN_MODE) {
+               if (test_bit(FC_RSCN_MODE, &vport->fc_flag)) {
                        lpfc_printf_vlog(vport, KERN_ERR,
                                         LOG_TRACE_EVENT,
                                         "0231 RSCN timeout Data: x%x "
@@ -6750,7 +6698,7 @@ lpfc_fcf_inuse(struct lpfc_hba *phba)
        struct lpfc_vport **vports;
        int i, ret = 0;
        struct lpfc_nodelist *ndlp;
-       struct Scsi_Host  *shost;
+       unsigned long iflags;
 
        vports = lpfc_create_vport_work_array(phba);
 
@@ -6759,24 +6707,23 @@ lpfc_fcf_inuse(struct lpfc_hba *phba)
                return 1;
 
        for (i = 0; i <= phba->max_vports && vports[i] != NULL; i++) {
-               shost = lpfc_shost_from_vport(vports[i]);
-               spin_lock_irq(shost->host_lock);
                /*
                 * IF the CVL_RCVD bit is not set then we have sent the
                 * flogi.
                 * If dev_loss fires while we are waiting we do not want to
                 * unreg the fcf.
                 */
-               if (!(vports[i]->fc_flag & FC_VPORT_CVL_RCVD)) {
-                       spin_unlock_irq(shost->host_lock);
+               if (!test_bit(FC_VPORT_CVL_RCVD, &vports[i]->fc_flag)) {
                        ret =  1;
                        goto out;
                }
+               spin_lock_irqsave(&vports[i]->fc_nodes_list_lock, iflags);
                list_for_each_entry(ndlp, &vports[i]->fc_nodes, nlp_listp) {
                        if (ndlp->rport &&
                          (ndlp->rport->roles & FC_RPORT_ROLE_FCP_TARGET)) {
                                ret = 1;
-                               spin_unlock_irq(shost->host_lock);
+                               spin_unlock_irqrestore(&vports[i]->fc_nodes_list_lock,
+                                                      iflags);
                                goto out;
                        } else if (ndlp->nlp_flag & NLP_RPI_REGISTERED) {
                                ret = 1;
@@ -6788,7 +6735,7 @@ lpfc_fcf_inuse(struct lpfc_hba *phba)
                                                ndlp->nlp_flag);
                        }
                }
-               spin_unlock_irq(shost->host_lock);
+               spin_unlock_irqrestore(&vports[i]->fc_nodes_list_lock, iflags);
        }
 out:
        lpfc_destroy_vport_work_array(phba, vports);
@@ -6806,7 +6753,6 @@ void
 lpfc_unregister_vfi_cmpl(struct lpfc_hba *phba, LPFC_MBOXQ_t *mboxq)
 {
        struct lpfc_vport *vport = mboxq->vport;
-       struct Scsi_Host *shost = lpfc_shost_from_vport(vport);
 
        if (mboxq->u.mb.mbxStatus) {
                lpfc_printf_log(phba, KERN_ERR, LOG_TRACE_EVENT,
@@ -6814,9 +6760,7 @@ lpfc_unregister_vfi_cmpl(struct lpfc_hba *phba, LPFC_MBOXQ_t *mboxq)
                                "HBA state x%x\n",
                                mboxq->u.mb.mbxStatus, vport->port_state);
        }
-       spin_lock_irq(shost->host_lock);
-       phba->pport->fc_flag &= ~FC_VFI_REGISTERED;
-       spin_unlock_irq(shost->host_lock);
+       clear_bit(FC_VFI_REGISTERED, &phba->pport->fc_flag);
        mempool_free(mboxq, phba->mbox_mem_pool);
        return;
 }
@@ -6880,9 +6824,9 @@ lpfc_unregister_fcf_prep(struct lpfc_hba *phba)
                        lpfc_mbx_unreg_vpi(vports[i]);
                        shost = lpfc_shost_from_vport(vports[i]);
                        spin_lock_irq(shost->host_lock);
-                       vports[i]->fc_flag |= FC_VPORT_NEEDS_INIT_VPI;
                        vports[i]->vpi_state &= ~LPFC_VPI_REGISTERED;
                        spin_unlock_irq(shost->host_lock);
+                       set_bit(FC_VPORT_NEEDS_INIT_VPI, &vports[i]->fc_flag);
                }
        lpfc_destroy_vport_work_array(phba, vports);
        if (i == 0 && (!(phba->sli3_options & LPFC_SLI3_NPIV_ENABLED))) {
@@ -6895,9 +6839,9 @@ lpfc_unregister_fcf_prep(struct lpfc_hba *phba)
                lpfc_mbx_unreg_vpi(phba->pport);
                shost = lpfc_shost_from_vport(phba->pport);
                spin_lock_irq(shost->host_lock);
-               phba->pport->fc_flag |= FC_VPORT_NEEDS_INIT_VPI;
                phba->pport->vpi_state &= ~LPFC_VPI_REGISTERED;
                spin_unlock_irq(shost->host_lock);
+               set_bit(FC_VPORT_NEEDS_INIT_VPI, &phba->pport->fc_flag);
        }
 
        /* Cleanup any outstanding ELS commands */
@@ -6981,8 +6925,8 @@ lpfc_unregister_fcf_rescan(struct lpfc_hba *phba)
         * If driver is not unloading, check if there is any other
         * FCF record that can be used for discovery.
         */
-       if ((phba->pport->load_flag & FC_UNLOADING) ||
-           (phba->link_state < LPFC_LINK_UP))
+       if (test_bit(FC_UNLOADING, &phba->pport->load_flag) ||
+           phba->link_state < LPFC_LINK_UP)
                return;
 
        /* This is considered as the initial FCF discovery scan */
index 5d4f9f27084d6d1fb356d2034bcf22577eb689f5..367e6b066d42f95d6b4d0cbd9e7ba0875083f894 100644 (file)
@@ -1,7 +1,7 @@
 /*******************************************************************
  * This file is part of the Emulex Linux Device Driver for         *
  * Fibre Channel Host Bus Adapters.                                *
- * Copyright (C) 2017-2023 Broadcom. All Rights Reserved. The term *
+ * Copyright (C) 2017-2024 Broadcom. All Rights Reserved. The term *
  * “Broadcom” refers to Broadcom Inc. and/or its subsidiaries.  *
  * Copyright (C) 2009-2016 Emulex.  All rights reserved.           *
  * EMULEX and SLI are trademarks of Emulex.                        *
@@ -4069,7 +4069,6 @@ struct lpfc_mcqe {
 #define LPFC_TRAILER_CODE_GRP5 0x5
 #define LPFC_TRAILER_CODE_FC   0x10
 #define LPFC_TRAILER_CODE_SLI  0x11
-#define LPFC_TRAILER_CODE_CMSTAT        0x13
 };
 
 struct lpfc_acqe_link {
@@ -4339,6 +4338,7 @@ struct lpfc_acqe_sli {
 #define LPFC_SLI_EVENT_TYPE_EEPROM_FAILURE     0x10
 #define LPFC_SLI_EVENT_TYPE_CGN_SIGNAL         0x11
 #define LPFC_SLI_EVENT_TYPE_RD_SIGNAL           0x12
+#define LPFC_SLI_EVENT_TYPE_RESET_CM_STATS      0x13
 };
 
 /*
index 70bcee64bc8c6b65ebd6fd7625f6a45a1645a0d5..88b2e57d90c2e3a671ab2907318bf7a9f6f7144c 100644 (file)
@@ -1,7 +1,7 @@
 /*******************************************************************
  * This file is part of the Emulex Linux Device Driver for         *
  * Fibre Channel Host Bus Adapters.                                *
- * Copyright (C) 2017-2023 Broadcom. All Rights Reserved. The term *
+ * Copyright (C) 2017-2024 Broadcom. All Rights Reserved. The term *
  * “Broadcom” refers to Broadcom Inc. and/or its subsidiaries.  *
  * Copyright (C) 2004-2016 Emulex.  All rights reserved.           *
  * EMULEX and SLI are trademarks of Emulex.                        *
@@ -94,6 +94,7 @@ static void lpfc_sli4_oas_verify(struct lpfc_hba *phba);
 static uint16_t lpfc_find_cpu_handle(struct lpfc_hba *, uint16_t, int);
 static void lpfc_setup_bg(struct lpfc_hba *, struct Scsi_Host *);
 static int lpfc_sli4_cgn_parm_chg_evt(struct lpfc_hba *);
+static void lpfc_sli4_async_cmstat_evt(struct lpfc_hba *phba);
 static void lpfc_sli4_prep_dev_for_reset(struct lpfc_hba *phba);
 
 static struct scsi_transport_template *lpfc_transport_template = NULL;
@@ -891,7 +892,7 @@ lpfc_hba_down_prep(struct lpfc_hba *phba)
                readl(phba->HCregaddr); /* flush */
        }
 
-       if (phba->pport->load_flag & FC_UNLOADING)
+       if (test_bit(FC_UNLOADING, &phba->pport->load_flag))
                lpfc_cleanup_discovery_resources(phba->pport);
        else {
                vports = lpfc_create_vport_work_array(phba);
@@ -1231,13 +1232,13 @@ lpfc_rrq_timeout(struct timer_list *t)
 
        phba = from_timer(phba, t, rrq_tmr);
        spin_lock_irqsave(&phba->pport->work_port_lock, iflag);
-       if (!(phba->pport->load_flag & FC_UNLOADING))
+       if (!test_bit(FC_UNLOADING, &phba->pport->load_flag))
                phba->hba_flag |= HBA_RRQ_ACTIVE;
        else
                phba->hba_flag &= ~HBA_RRQ_ACTIVE;
        spin_unlock_irqrestore(&phba->pport->work_port_lock, iflag);
 
-       if (!(phba->pport->load_flag & FC_UNLOADING))
+       if (!test_bit(FC_UNLOADING, &phba->pport->load_flag))
                lpfc_worker_wake_up(phba);
 }
 
@@ -1268,9 +1269,9 @@ lpfc_hb_mbox_cmpl(struct lpfc_hba * phba, LPFC_MBOXQ_t * pmboxq)
 
        /* Check and reset heart-beat timer if necessary */
        mempool_free(pmboxq, phba->mbox_mem_pool);
-       if (!(phba->pport->fc_flag & FC_OFFLINE_MODE) &&
-               !(phba->link_state == LPFC_HBA_ERROR) &&
-               !(phba->pport->load_flag & FC_UNLOADING))
+       if (!test_bit(FC_OFFLINE_MODE, &phba->pport->fc_flag) &&
+           !(phba->link_state == LPFC_HBA_ERROR) &&
+           !test_bit(FC_UNLOADING, &phba->pport->load_flag))
                mod_timer(&phba->hb_tmofunc,
                          jiffies +
                          msecs_to_jiffies(1000 * LPFC_HB_MBOX_INTERVAL));
@@ -1297,11 +1298,11 @@ lpfc_idle_stat_delay_work(struct work_struct *work)
        u32 i, idle_percent;
        u64 wall, wall_idle, diff_wall, diff_idle, busy_time;
 
-       if (phba->pport->load_flag & FC_UNLOADING)
+       if (test_bit(FC_UNLOADING, &phba->pport->load_flag))
                return;
 
        if (phba->link_state == LPFC_HBA_ERROR ||
-           phba->pport->fc_flag & FC_OFFLINE_MODE ||
+           test_bit(FC_OFFLINE_MODE, &phba->pport->fc_flag) ||
            phba->cmf_active_mode != LPFC_CFG_OFF)
                goto requeue;
 
@@ -1358,11 +1359,12 @@ lpfc_hb_eq_delay_work(struct work_struct *work)
        uint32_t usdelay;
        int i;
 
-       if (!phba->cfg_auto_imax || phba->pport->load_flag & FC_UNLOADING)
+       if (!phba->cfg_auto_imax ||
+           test_bit(FC_UNLOADING, &phba->pport->load_flag))
                return;
 
        if (phba->link_state == LPFC_HBA_ERROR ||
-           phba->pport->fc_flag & FC_OFFLINE_MODE)
+           test_bit(FC_OFFLINE_MODE, &phba->pport->fc_flag))
                goto requeue;
 
        ena_delay = kcalloc(phba->sli4_hba.num_possible_cpu, sizeof(*ena_delay),
@@ -1533,9 +1535,9 @@ lpfc_hb_timeout_handler(struct lpfc_hba *phba)
                }
        lpfc_destroy_vport_work_array(phba, vports);
 
-       if ((phba->link_state == LPFC_HBA_ERROR) ||
-               (phba->pport->load_flag & FC_UNLOADING) ||
-               (phba->pport->fc_flag & FC_OFFLINE_MODE))
+       if (phba->link_state == LPFC_HBA_ERROR ||
+           test_bit(FC_UNLOADING, &phba->pport->load_flag) ||
+           test_bit(FC_OFFLINE_MODE, &phba->pport->fc_flag))
                return;
 
        if (phba->elsbuf_cnt &&
@@ -1736,7 +1738,7 @@ lpfc_handle_deferred_eratt(struct lpfc_hba *phba)
                        break;
                }
                /* If driver is unloading let the worker thread continue */
-               if (phba->pport->load_flag & FC_UNLOADING) {
+               if (test_bit(FC_UNLOADING, &phba->pport->load_flag)) {
                        phba->work_hs = 0;
                        break;
                }
@@ -1747,7 +1749,7 @@ lpfc_handle_deferred_eratt(struct lpfc_hba *phba)
         * first write to the host attention register clear the
         * host status register.
         */
-       if ((!phba->work_hs) && (!(phba->pport->load_flag & FC_UNLOADING)))
+       if (!phba->work_hs && !test_bit(FC_UNLOADING, &phba->pport->load_flag))
                phba->work_hs = old_host_status & ~HS_FFER1;
 
        spin_lock_irq(&phba->hbalock);
@@ -3085,7 +3087,7 @@ lpfc_cleanup(struct lpfc_vport *vport)
         * The flush here is only when the pci slot
         * is offline.
         */
-       if (vport->load_flag & FC_UNLOADING &&
+       if (test_bit(FC_UNLOADING, &vport->load_flag) &&
            pci_channel_offline(phba->pcidev))
                lpfc_sli_flush_io_rings(vport->phba);
 
@@ -3411,7 +3413,7 @@ lpfc_sli4_node_prep(struct lpfc_hba *phba)
                return;
 
        for (i = 0; i <= phba->max_vports && vports[i] != NULL; i++) {
-               if (vports[i]->load_flag & FC_UNLOADING)
+               if (test_bit(FC_UNLOADING, &vports[i]->load_flag))
                        continue;
 
                list_for_each_entry_safe(ndlp, next_ndlp,
@@ -3611,7 +3613,7 @@ static void lpfc_destroy_multixri_pools(struct lpfc_hba *phba)
        if (phba->cfg_enable_fc4_type & LPFC_ENABLE_NVME)
                lpfc_destroy_expedite_pool(phba);
 
-       if (!(phba->pport->load_flag & FC_UNLOADING))
+       if (!test_bit(FC_UNLOADING, &phba->pport->load_flag))
                lpfc_sli_flush_io_rings(phba);
 
        hwq_count = phba->cfg_hdw_queue;
@@ -3697,7 +3699,7 @@ lpfc_online(struct lpfc_hba *phba)
                return 0;
        vport = phba->pport;
 
-       if (!(vport->fc_flag & FC_OFFLINE_MODE))
+       if (!test_bit(FC_OFFLINE_MODE, &vport->fc_flag))
                return 0;
 
        lpfc_printf_log(phba, KERN_WARNING, LOG_INIT,
@@ -3737,20 +3739,18 @@ lpfc_online(struct lpfc_hba *phba)
        vports = lpfc_create_vport_work_array(phba);
        if (vports != NULL) {
                for (i = 0; i <= phba->max_vports && vports[i] != NULL; i++) {
-                       struct Scsi_Host *shost;
-                       shost = lpfc_shost_from_vport(vports[i]);
-                       spin_lock_irq(shost->host_lock);
-                       vports[i]->fc_flag &= ~FC_OFFLINE_MODE;
+                       clear_bit(FC_OFFLINE_MODE, &vports[i]->fc_flag);
                        if (phba->sli3_options & LPFC_SLI3_NPIV_ENABLED)
-                               vports[i]->fc_flag |= FC_VPORT_NEEDS_REG_VPI;
+                               set_bit(FC_VPORT_NEEDS_REG_VPI,
+                                       &vports[i]->fc_flag);
                        if (phba->sli_rev == LPFC_SLI_REV4) {
-                               vports[i]->fc_flag |= FC_VPORT_NEEDS_INIT_VPI;
+                               set_bit(FC_VPORT_NEEDS_INIT_VPI,
+                                       &vports[i]->fc_flag);
                                if ((vpis_cleared) &&
                                    (vports[i]->port_type !=
                                        LPFC_PHYSICAL_PORT))
                                        vports[i]->vpi = 0;
                        }
-                       spin_unlock_irq(shost->host_lock);
                }
        }
        lpfc_destroy_vport_work_array(phba, vports);
@@ -3805,7 +3805,7 @@ lpfc_offline_prep(struct lpfc_hba *phba, int mbx_action)
        int offline;
        bool hba_pci_err;
 
-       if (vport->fc_flag & FC_OFFLINE_MODE)
+       if (test_bit(FC_OFFLINE_MODE, &vport->fc_flag))
                return;
 
        lpfc_block_mgmt_io(phba, mbx_action);
@@ -3819,16 +3819,15 @@ lpfc_offline_prep(struct lpfc_hba *phba, int mbx_action)
        vports = lpfc_create_vport_work_array(phba);
        if (vports != NULL) {
                for (i = 0; i <= phba->max_vports && vports[i] != NULL; i++) {
-                       if (vports[i]->load_flag & FC_UNLOADING)
+                       if (test_bit(FC_UNLOADING, &vports[i]->load_flag))
                                continue;
                        shost = lpfc_shost_from_vport(vports[i]);
                        spin_lock_irq(shost->host_lock);
                        vports[i]->vpi_state &= ~LPFC_VPI_REGISTERED;
-                       vports[i]->fc_flag |= FC_VPORT_NEEDS_REG_VPI;
-                       vports[i]->fc_flag &= ~FC_VFI_REGISTERED;
                        spin_unlock_irq(shost->host_lock);
+                       set_bit(FC_VPORT_NEEDS_REG_VPI, &vports[i]->fc_flag);
+                       clear_bit(FC_VFI_REGISTERED, &vports[i]->fc_flag);
 
-                       shost = lpfc_shost_from_vport(vports[i]);
                        list_for_each_entry_safe(ndlp, next_ndlp,
                                                 &vports[i]->fc_nodes,
                                                 nlp_listp) {
@@ -3910,7 +3909,7 @@ lpfc_offline(struct lpfc_hba *phba)
        struct lpfc_vport **vports;
        int i;
 
-       if (phba->pport->fc_flag & FC_OFFLINE_MODE)
+       if (test_bit(FC_OFFLINE_MODE, &phba->pport->fc_flag))
                return;
 
        /* stop port and all timers associated with this hba */
@@ -3941,14 +3940,14 @@ lpfc_offline(struct lpfc_hba *phba)
                        shost = lpfc_shost_from_vport(vports[i]);
                        spin_lock_irq(shost->host_lock);
                        vports[i]->work_port_events = 0;
-                       vports[i]->fc_flag |= FC_OFFLINE_MODE;
                        spin_unlock_irq(shost->host_lock);
+                       set_bit(FC_OFFLINE_MODE, &vports[i]->fc_flag);
                }
        lpfc_destroy_vport_work_array(phba, vports);
        /* If OFFLINE flag is clear (i.e. unloading), cpuhp removal is handled
         * in hba_unset
         */
-       if (phba->pport->fc_flag & FC_OFFLINE_MODE)
+       if (test_bit(FC_OFFLINE_MODE, &phba->pport->fc_flag))
                __lpfc_cpuhp_remove(phba);
 
        if (phba->cfg_xri_rebalancing)
@@ -4766,9 +4765,17 @@ lpfc_create_port(struct lpfc_hba *phba, int instance, struct device *dev)
 
        vport = (struct lpfc_vport *) shost->hostdata;
        vport->phba = phba;
-       vport->load_flag |= FC_LOADING;
-       vport->fc_flag |= FC_VPORT_NEEDS_REG_VPI;
+       set_bit(FC_LOADING, &vport->load_flag);
+       set_bit(FC_VPORT_NEEDS_REG_VPI, &vport->fc_flag);
        vport->fc_rscn_flush = 0;
+       atomic_set(&vport->fc_plogi_cnt, 0);
+       atomic_set(&vport->fc_adisc_cnt, 0);
+       atomic_set(&vport->fc_reglogin_cnt, 0);
+       atomic_set(&vport->fc_prli_cnt, 0);
+       atomic_set(&vport->fc_unmap_cnt, 0);
+       atomic_set(&vport->fc_map_cnt, 0);
+       atomic_set(&vport->fc_npr_cnt, 0);
+       atomic_set(&vport->fc_unused_cnt, 0);
        lpfc_get_vport_cfgparam(vport);
 
        /* Adjust value in vport */
@@ -4824,6 +4831,7 @@ lpfc_create_port(struct lpfc_hba *phba, int instance, struct device *dev)
 
        /* Initialize all internally managed lists. */
        INIT_LIST_HEAD(&vport->fc_nodes);
+       spin_lock_init(&vport->fc_nodes_list_lock);
        INIT_LIST_HEAD(&vport->rcv_buffer_list);
        spin_lock_init(&vport->work_port_lock);
 
@@ -4921,7 +4929,7 @@ int lpfc_scan_finished(struct Scsi_Host *shost, unsigned long time)
 
        spin_lock_irq(shost->host_lock);
 
-       if (vport->load_flag & FC_UNLOADING) {
+       if (test_bit(FC_UNLOADING, &vport->load_flag)) {
                stat = 1;
                goto finished;
        }
@@ -4945,7 +4953,8 @@ int lpfc_scan_finished(struct Scsi_Host *shost, unsigned long time)
                goto finished;
        if (vport->num_disc_nodes || vport->fc_prli_sent)
                goto finished;
-       if (vport->fc_map_cnt == 0 && time < msecs_to_jiffies(2 * 1000))
+       if (!atomic_read(&vport->fc_map_cnt) &&
+           time < msecs_to_jiffies(2 * 1000))
                goto finished;
        if ((phba->sli.sli_flag & LPFC_SLI_MBOX_ACTIVE) != 0)
                goto finished;
@@ -5034,9 +5043,7 @@ void lpfc_host_attrib_init(struct Scsi_Host *shost)
        fc_host_active_fc4s(shost)[7] = 1;
 
        fc_host_max_npiv_vports(shost) = phba->max_vpi;
-       spin_lock_irq(shost->host_lock);
-       vport->load_flag &= ~FC_LOADING;
-       spin_unlock_irq(shost->host_lock);
+       clear_bit(FC_LOADING, &vport->load_flag);
 }
 
 /**
@@ -5172,7 +5179,7 @@ lpfc_vmid_poll(struct timer_list *t)
 
        /* Is the vmid inactivity timer enabled */
        if (phba->pport->vmid_inactivity_timeout ||
-           phba->pport->load_flag & FC_DEREGISTER_ALL_APP_ID) {
+           test_bit(FC_DEREGISTER_ALL_APP_ID, &phba->pport->load_flag)) {
                wake_up = 1;
                phba->pport->work_port_events |= WORKER_CHECK_INACTIVE_VMID;
        }
@@ -6636,6 +6643,11 @@ lpfc_sli4_async_sli_evt(struct lpfc_hba *phba, struct lpfc_acqe_sli *acqe_sli)
                                acqe_sli->event_data1, acqe_sli->event_data2,
                                acqe_sli->event_data3);
                break;
+       case LPFC_SLI_EVENT_TYPE_RESET_CM_STATS:
+               lpfc_printf_log(phba, KERN_INFO, LOG_CGN_MGMT,
+                               "2905 Reset CM statistics\n");
+               lpfc_sli4_async_cmstat_evt(phba);
+               break;
        default:
                lpfc_printf_log(phba, KERN_INFO, LOG_SLI,
                                "3193 Unrecognized SLI event, type: 0x%x",
@@ -6689,9 +6701,7 @@ lpfc_sli4_perform_vport_cvl(struct lpfc_vport *vport)
                return NULL;
        lpfc_linkdown_port(vport);
        lpfc_cleanup_pending_mbox(vport);
-       spin_lock_irq(shost->host_lock);
-       vport->fc_flag |= FC_VPORT_CVL_RCVD;
-       spin_unlock_irq(shost->host_lock);
+       set_bit(FC_VPORT_CVL_RCVD, &vport->fc_flag);
 
        return ndlp;
 }
@@ -6888,9 +6898,9 @@ lpfc_sli4_async_fip_evt(struct lpfc_hba *phba,
                if (vports) {
                        for (i = 0; i <= phba->max_vports && vports[i] != NULL;
                                        i++) {
-                               if ((!(vports[i]->fc_flag &
-                                       FC_VPORT_CVL_RCVD)) &&
-                                       (vports[i]->port_state > LPFC_FDISC)) {
+                               if (!test_bit(FC_VPORT_CVL_RCVD,
+                                             &vports[i]->fc_flag) &&
+                                   vports[i]->port_state > LPFC_FDISC) {
                                        active_vlink_present = 1;
                                        break;
                                }
@@ -6903,8 +6913,8 @@ lpfc_sli4_async_fip_evt(struct lpfc_hba *phba,
                 * If we are here first then vport_delete is going to wait
                 * for discovery to complete.
                 */
-               if (!(vport->load_flag & FC_UNLOADING) &&
-                                       active_vlink_present) {
+               if (!test_bit(FC_UNLOADING, &vport->load_flag) &&
+                   active_vlink_present) {
                        /*
                         * If there are other active VLinks present,
                         * re-instantiate the Vlink using FDISC.
@@ -7346,9 +7356,6 @@ void lpfc_sli4_async_event_proc(struct lpfc_hba *phba)
                case LPFC_TRAILER_CODE_SLI:
                        lpfc_sli4_async_sli_evt(phba, &cq_event->cqe.acqe_sli);
                        break;
-               case LPFC_TRAILER_CODE_CMSTAT:
-                       lpfc_sli4_async_cmstat_evt(phba);
-                       break;
                default:
                        lpfc_printf_log(phba, KERN_ERR,
                                        LOG_TRACE_EVENT,
@@ -9085,7 +9092,7 @@ lpfc_setup_fdmi_mask(struct lpfc_vport *vport)
 {
        struct lpfc_hba *phba = vport->phba;
 
-       vport->load_flag |= FC_ALLOW_FDMI;
+       set_bit(FC_ALLOW_FDMI, &vport->load_flag);
        if (phba->cfg_enable_SmartSAN ||
            phba->cfg_fdmi_on == LPFC_FDMI_SUPPORT) {
                /* Setup appropriate attribute masks */
@@ -12771,7 +12778,8 @@ static void __lpfc_cpuhp_remove(struct lpfc_hba *phba)
 
 static void lpfc_cpuhp_remove(struct lpfc_hba *phba)
 {
-       if (phba->pport && (phba->pport->fc_flag & FC_OFFLINE_MODE))
+       if (phba->pport &&
+           test_bit(FC_OFFLINE_MODE, &phba->pport->fc_flag))
                return;
 
        __lpfc_cpuhp_remove(phba);
@@ -12796,7 +12804,7 @@ static void lpfc_cpuhp_add(struct lpfc_hba *phba)
 
 static int __lpfc_cpuhp_checks(struct lpfc_hba *phba, int *retval)
 {
-       if (phba->pport->load_flag & FC_UNLOADING) {
+       if (test_bit(FC_UNLOADING, &phba->pport->load_flag)) {
                *retval = -EAGAIN;
                return true;
        }
@@ -13316,12 +13324,7 @@ lpfc_sli4_disable_intr(struct lpfc_hba *phba)
 static void
 lpfc_unset_hba(struct lpfc_hba *phba)
 {
-       struct lpfc_vport *vport = phba->pport;
-       struct Scsi_Host  *shost = lpfc_shost_from_vport(vport);
-
-       spin_lock_irq(shost->host_lock);
-       vport->load_flag |= FC_UNLOADING;
-       spin_unlock_irq(shost->host_lock);
+       set_bit(FC_UNLOADING, &phba->pport->load_flag);
 
        kfree(phba->vpi_bmask);
        kfree(phba->vpi_ids);
@@ -14113,9 +14116,7 @@ lpfc_pci_remove_one_s3(struct pci_dev *pdev)
        struct lpfc_hba   *phba = vport->phba;
        int i;
 
-       spin_lock_irq(&phba->hbalock);
-       vport->load_flag |= FC_UNLOADING;
-       spin_unlock_irq(&phba->hbalock);
+       set_bit(FC_UNLOADING, &vport->load_flag);
 
        lpfc_free_sysfs_attr(vport);
 
@@ -14958,9 +14959,7 @@ lpfc_pci_remove_one_s4(struct pci_dev *pdev)
        int i;
 
        /* Mark the device unloading flag */
-       spin_lock_irq(&phba->hbalock);
-       vport->load_flag |= FC_UNLOADING;
-       spin_unlock_irq(&phba->hbalock);
+       set_bit(FC_UNLOADING, &vport->load_flag);
        if (phba->cgn_i)
                lpfc_unreg_congestion_buf(phba);
 
index cadcd16494e193f8d8032b3f983f626f21d6f9f5..f7c41958036bb7c40c01a22eca8571f4443fe206 100644 (file)
@@ -1,7 +1,7 @@
 /*******************************************************************
  * This file is part of the Emulex Linux Device Driver for         *
  * Fibre Channel Host Bus Adapters.                                *
- * Copyright (C) 2017-2023 Broadcom. All Rights Reserved. The term *
+ * Copyright (C) 2017-2024 Broadcom. All Rights Reserved. The term *
  * “Broadcom” refers to Broadcom Inc. and/or its subsidiaries.     *
  * Copyright (C) 2004-2016 Emulex.  All rights reserved.           *
  * EMULEX and SLI are trademarks of Emulex.                        *
@@ -949,7 +949,7 @@ lpfc_reg_vpi(struct lpfc_vport *vport, LPFC_MBOXQ_t *pmb)
         * Set the re-reg VPI bit for f/w to update the MAC address.
         */
        if ((phba->sli_rev == LPFC_SLI_REV4) &&
-               !(vport->fc_flag & FC_VPORT_NEEDS_REG_VPI))
+               !test_bit(FC_VPORT_NEEDS_REG_VPI, &vport->fc_flag))
                mb->un.varRegVpi.upd = 1;
 
        mb->un.varRegVpi.vpi = phba->vpi_ids[vport->vpi];
@@ -2244,7 +2244,7 @@ lpfc_reg_vfi(struct lpfcMboxq *mbox, struct lpfc_vport *vport, dma_addr_t phys)
 
        /* Only FC supports upd bit */
        if ((phba->sli4_hba.lnk_info.lnk_tp == LPFC_LNK_TYPE_FC) &&
-           (vport->fc_flag & FC_VFI_REGISTERED) &&
+           test_bit(FC_VFI_REGISTERED, &vport->fc_flag) &&
            (!phba->fc_topology_changed))
                bf_set(lpfc_reg_vfi_upd, reg_vfi, 1);
 
@@ -2271,8 +2271,8 @@ lpfc_reg_vfi(struct lpfcMboxq *mbox, struct lpfc_vport *vport, dma_addr_t phys)
        }
        lpfc_printf_vlog(vport, KERN_INFO, LOG_MBOX,
                        "3134 Register VFI, mydid:x%x, fcfi:%d, "
-                       " vfi:%d, vpi:%d, fc_pname:%x%x fc_flag:x%x"
-                       " port_state:x%x topology chg:%d bbscn_fabric :%d\n",
+                       "vfi:%d, vpi:%d, fc_pname:%x%x fc_flag:x%lx "
+                       "port_state:x%x topology chg:%d bbscn_fabric :%d\n",
                        vport->fc_myDID,
                        phba->fcf.fcfi,
                        phba->sli4_hba.vfi_ids[vport->vfi],
index d9074929fbab85d15e35a22ad1c0868601c715de..8e425be7c7c99c05b8f899043e15cac7a937665e 100644 (file)
@@ -1,7 +1,7 @@
 /*******************************************************************
  * This file is part of the Emulex Linux Device Driver for         *
  * Fibre Channel Host Bus Adapters.                                *
- * Copyright (C) 2017-2023 Broadcom. All Rights Reserved. The term *
+ * Copyright (C) 2017-2024 Broadcom. All Rights Reserved. The term *
  * “Broadcom” refers to Broadcom Inc. and/or its subsidiaries.     *
  * Copyright (C) 2004-2016 Emulex.  All rights reserved.           *
  * EMULEX and SLI are trademarks of Emulex.                        *
@@ -382,7 +382,7 @@ lpfc_rcv_plogi(struct lpfc_vport *vport, struct lpfc_nodelist *ndlp,
        /* PLOGI chkparm OK */
        lpfc_printf_vlog(vport, KERN_INFO, LOG_ELS,
                         "0114 PLOGI chkparm OK Data: x%x x%x x%x "
-                        "x%x x%x x%x\n",
+                        "x%x x%x x%lx\n",
                         ndlp->nlp_DID, ndlp->nlp_state, ndlp->nlp_flag,
                         ndlp->nlp_rpi, vport->port_state,
                         vport->fc_flag);
@@ -434,7 +434,7 @@ lpfc_rcv_plogi(struct lpfc_vport *vport, struct lpfc_nodelist *ndlp,
                }
                if (nlp_portwwn != 0 &&
                    nlp_portwwn != wwn_to_u64(sp->portName.u.wwn))
-                       lpfc_printf_vlog(vport, KERN_ERR, LOG_TRACE_EVENT,
+                       lpfc_printf_vlog(vport, KERN_INFO, LOG_ELS,
                                         "0143 PLOGI recv'd from DID: x%x "
                                         "WWPN changed: old %llx new %llx\n",
                                         ndlp->nlp_DID,
@@ -464,8 +464,8 @@ lpfc_rcv_plogi(struct lpfc_vport *vport, struct lpfc_nodelist *ndlp,
        save_iocb = NULL;
 
        /* Check for Nport to NPort pt2pt protocol */
-       if ((vport->fc_flag & FC_PT2PT) &&
-           !(vport->fc_flag & FC_PT2PT_PLOGI)) {
+       if (test_bit(FC_PT2PT, &vport->fc_flag) &&
+           !test_bit(FC_PT2PT_PLOGI, &vport->fc_flag)) {
                /* rcv'ed PLOGI decides what our NPortId will be */
                if (phba->sli_rev == LPFC_SLI_REV4) {
                        vport->fc_myDID = bf_get(els_rsp64_sid,
@@ -580,7 +580,7 @@ lpfc_rcv_plogi(struct lpfc_vport *vport, struct lpfc_nodelist *ndlp,
         * This only applies to a fabric environment.
         */
        if ((ndlp->nlp_state == NLP_STE_PLOGI_ISSUE) &&
-           (vport->fc_flag & FC_FABRIC)) {
+           test_bit(FC_FABRIC, &vport->fc_flag)) {
                /* software abort outstanding PLOGI */
                lpfc_els_abort(phba, ndlp);
        }
@@ -748,8 +748,10 @@ lpfc_rcv_padisc(struct lpfc_vport *vport, struct lpfc_nodelist *ndlp,
                                /* Save the ELS cmd */
                                elsiocb->drvrTimeout = cmd;
 
-                               lpfc_sli4_resume_rpi(ndlp,
-                                       lpfc_mbx_cmpl_resume_rpi, elsiocb);
+                               if (lpfc_sli4_resume_rpi(ndlp,
+                                               lpfc_mbx_cmpl_resume_rpi,
+                                               elsiocb))
+                                       kfree(elsiocb);
                                goto out;
                        }
                }
@@ -802,7 +804,6 @@ static int
 lpfc_rcv_logo(struct lpfc_vport *vport, struct lpfc_nodelist *ndlp,
              struct lpfc_iocbq *cmdiocb, uint32_t els_cmd)
 {
-       struct Scsi_Host *shost = lpfc_shost_from_vport(vport);
        struct lpfc_hba    *phba = vport->phba;
        struct lpfc_vport **vports;
        int i, active_vlink_present = 0 ;
@@ -835,19 +836,17 @@ lpfc_rcv_logo(struct lpfc_vport *vport, struct lpfc_nodelist *ndlp,
 
        if (ndlp->nlp_DID == Fabric_DID) {
                if (vport->port_state <= LPFC_FDISC ||
-                   vport->fc_flag & FC_PT2PT)
+                   test_bit(FC_PT2PT, &vport->fc_flag))
                        goto out;
                lpfc_linkdown_port(vport);
-               spin_lock_irq(shost->host_lock);
-               vport->fc_flag |= FC_VPORT_LOGO_RCVD;
-               spin_unlock_irq(shost->host_lock);
+               set_bit(FC_VPORT_LOGO_RCVD, &vport->fc_flag);
                vports = lpfc_create_vport_work_array(phba);
                if (vports) {
                        for (i = 0; i <= phba->max_vports && vports[i] != NULL;
                                        i++) {
-                               if ((!(vports[i]->fc_flag &
-                                       FC_VPORT_LOGO_RCVD)) &&
-                                       (vports[i]->port_state > LPFC_FDISC)) {
+                               if (!test_bit(FC_VPORT_LOGO_RCVD,
+                                             &vports[i]->fc_flag) &&
+                                   vports[i]->port_state > LPFC_FDISC) {
                                        active_vlink_present = 1;
                                        break;
                                }
@@ -860,8 +859,8 @@ lpfc_rcv_logo(struct lpfc_vport *vport, struct lpfc_nodelist *ndlp,
                 * If we are here first then vport_delete is going to wait
                 * for discovery to complete.
                 */
-               if (!(vport->load_flag & FC_UNLOADING) &&
-                                       active_vlink_present) {
+               if (!test_bit(FC_UNLOADING, &vport->load_flag) &&
+                   active_vlink_present) {
                        /*
                         * If there are other active VLinks present,
                         * re-instantiate the Vlink using FDISC.
@@ -874,23 +873,21 @@ lpfc_rcv_logo(struct lpfc_vport *vport, struct lpfc_nodelist *ndlp,
                        ndlp->nlp_last_elscmd = ELS_CMD_FDISC;
                        vport->port_state = LPFC_FDISC;
                } else {
-                       spin_lock_irq(shost->host_lock);
-                       phba->pport->fc_flag &= ~FC_LOGO_RCVD_DID_CHNG;
-                       spin_unlock_irq(shost->host_lock);
+                       clear_bit(FC_LOGO_RCVD_DID_CHNG, &phba->pport->fc_flag);
                        lpfc_retry_pport_discovery(phba);
                }
        } else {
                lpfc_printf_vlog(vport, KERN_INFO,
                                 LOG_NODE | LOG_ELS | LOG_DISCOVERY,
                                 "3203 LOGO recover nport x%06x state x%x "
-                                "ntype x%x fc_flag x%x\n",
+                                "ntype x%x fc_flag x%lx\n",
                                 ndlp->nlp_DID, ndlp->nlp_state,
                                 ndlp->nlp_type, vport->fc_flag);
 
                /* Special cases for rports that recover post LOGO. */
                if ((!(ndlp->nlp_type == NLP_FABRIC) &&
                     (ndlp->nlp_type & (NLP_FCP_TARGET | NLP_NVME_TARGET) ||
-                     vport->fc_flag & FC_PT2PT)) ||
+                     test_bit(FC_PT2PT, &vport->fc_flag))) ||
                    (ndlp->nlp_state >= NLP_STE_ADISC_ISSUE ||
                     ndlp->nlp_state <= NLP_STE_PRLI_ISSUE)) {
                        mod_timer(&ndlp->nlp_delayfunc,
@@ -1055,9 +1052,10 @@ lpfc_disc_set_adisc(struct lpfc_vport *vport, struct lpfc_nodelist *ndlp)
                return 0;
        }
 
-       if (!(vport->fc_flag & FC_PT2PT)) {
+       if (!test_bit(FC_PT2PT, &vport->fc_flag)) {
                /* Check config parameter use-adisc or FCP-2 */
-               if (vport->cfg_use_adisc && ((vport->fc_flag & FC_RSCN_MODE) ||
+               if (vport->cfg_use_adisc &&
+                   (test_bit(FC_RSCN_MODE, &vport->fc_flag) ||
                    ((ndlp->nlp_fcp_info & NLP_FCP_2_DEVICE) &&
                     (ndlp->nlp_type & NLP_FCP_TARGET)))) {
                        spin_lock_irq(&ndlp->lock);
@@ -1121,7 +1119,7 @@ lpfc_release_rpi(struct lpfc_hba *phba, struct lpfc_vport *vport,
                }
 
                if (((ndlp->nlp_DID & Fabric_DID_MASK) != Fabric_DID_MASK) &&
-                   (!(vport->fc_flag & FC_OFFLINE_MODE)))
+                   (!test_bit(FC_OFFLINE_MODE, &vport->fc_flag)))
                        ndlp->nlp_flag |= NLP_UNREG_INP;
 
                lpfc_printf_vlog(vport, KERN_INFO, LOG_DISCOVERY,
@@ -1147,9 +1145,8 @@ lpfc_disc_illegal(struct lpfc_vport *vport, struct lpfc_nodelist *ndlp,
 
        phba = vport->phba;
        /* Release the RPI if reglogin completing */
-       if (!(phba->pport->load_flag & FC_UNLOADING) &&
-               (evt == NLP_EVT_CMPL_REG_LOGIN) &&
-               (!pmb->u.mb.mbxStatus)) {
+       if (!test_bit(FC_UNLOADING, &phba->pport->load_flag) &&
+           evt == NLP_EVT_CMPL_REG_LOGIN && !pmb->u.mb.mbxStatus) {
                rpi = pmb->u.mb.un.varWords[0];
                lpfc_release_rpi(phba, vport, ndlp, rpi);
        }
@@ -1244,7 +1241,6 @@ static uint32_t
 lpfc_rcv_plogi_plogi_issue(struct lpfc_vport *vport, struct lpfc_nodelist *ndlp,
                           void *arg, uint32_t evt)
 {
-       struct Scsi_Host   *shost = lpfc_shost_from_vport(vport);
        struct lpfc_hba   *phba = vport->phba;
        struct lpfc_iocbq *cmdiocb = arg;
        struct lpfc_dmabuf *pcmd = cmdiocb->cmd_dmabuf;
@@ -1279,9 +1275,7 @@ lpfc_rcv_plogi_plogi_issue(struct lpfc_vport *vport, struct lpfc_nodelist *ndlp,
                        /* Check if there are more PLOGIs to be sent */
                        lpfc_more_plogi(vport);
                        if (vport->num_disc_nodes == 0) {
-                               spin_lock_irq(shost->host_lock);
-                               vport->fc_flag &= ~FC_NDISC_ACTIVE;
-                               spin_unlock_irq(shost->host_lock);
+                               clear_bit(FC_NDISC_ACTIVE, &vport->fc_flag);
                                lpfc_can_disctmo(vport);
                                lpfc_end_rscn(vport);
                        }
@@ -1421,8 +1415,8 @@ lpfc_cmpl_plogi_plogi_issue(struct lpfc_vport *vport,
        ndlp->nlp_maxframe =
                ((sp->cmn.bbRcvSizeMsb & 0x0F) << 8) | sp->cmn.bbRcvSizeLsb;
 
-       if ((vport->fc_flag & FC_PT2PT) &&
-           (vport->fc_flag & FC_PT2PT_PLOGI)) {
+       if (test_bit(FC_PT2PT, &vport->fc_flag) &&
+           test_bit(FC_PT2PT_PLOGI, &vport->fc_flag)) {
                ed_tov = be32_to_cpu(sp->cmn.e_d_tov);
                if (sp->cmn.edtovResolution) {
                        /* E_D_TOV ticks are in nanoseconds */
@@ -1576,8 +1570,8 @@ lpfc_cmpl_reglogin_plogi_issue(struct lpfc_vport *vport,
 
        phba = vport->phba;
        /* Release the RPI */
-       if (!(phba->pport->load_flag & FC_UNLOADING) &&
-               !mb->mbxStatus) {
+       if (!test_bit(FC_UNLOADING, &phba->pport->load_flag) &&
+           !mb->mbxStatus) {
                rpi = pmb->u.mb.un.varWords[0];
                lpfc_release_rpi(phba, vport, ndlp, rpi);
        }
@@ -1613,7 +1607,7 @@ lpfc_device_recov_plogi_issue(struct lpfc_vport *vport,
        /* Don't do anything that will mess up processing of the
         * previous RSCN.
         */
-       if (vport->fc_flag & FC_RSCN_DEFERRED)
+       if (test_bit(FC_RSCN_DEFERRED, &vport->fc_flag))
                return ndlp->nlp_state;
 
        /* software abort outstanding PLOGI */
@@ -1799,7 +1793,7 @@ lpfc_device_recov_adisc_issue(struct lpfc_vport *vport,
        /* Don't do anything that will mess up processing of the
         * previous RSCN.
         */
-       if (vport->fc_flag & FC_RSCN_DEFERRED)
+       if (test_bit(FC_RSCN_DEFERRED, &vport->fc_flag))
                return ndlp->nlp_state;
 
        /* software abort outstanding ADISC */
@@ -1989,13 +1983,13 @@ lpfc_cmpl_reglogin_reglogin_issue(struct lpfc_vport *vport,
                 * know what PRLI to send yet.  Figure that out now and
                 * call PRLI depending on the outcome.
                 */
-               if (vport->fc_flag & FC_PT2PT) {
+               if (test_bit(FC_PT2PT, &vport->fc_flag)) {
                        /* If we are pt2pt, there is no Fabric to determine
                         * the FC4 type of the remote nport. So if NVME
                         * is configured try it.
                         */
                        ndlp->nlp_fc4_type |= NLP_FC4_FCP;
-                       if ((!(vport->fc_flag & FC_PT2PT_NO_NVME)) &&
+                       if ((!test_bit(FC_PT2PT_NO_NVME, &vport->fc_flag)) &&
                            (vport->cfg_enable_fc4_type == LPFC_ENABLE_BOTH ||
                            vport->cfg_enable_fc4_type == LPFC_ENABLE_NVME)) {
                                ndlp->nlp_fc4_type |= NLP_FC4_NVME;
@@ -2027,7 +2021,7 @@ lpfc_cmpl_reglogin_reglogin_issue(struct lpfc_vport *vport,
                        lpfc_nlp_set_state(vport, ndlp, NLP_STE_NPR_NODE);
                }
        } else {
-               if ((vport->fc_flag & FC_PT2PT) && phba->nvmet_support)
+               if (test_bit(FC_PT2PT, &vport->fc_flag) && phba->nvmet_support)
                        phba->targetport->port_id = vport->fc_myDID;
 
                /* Only Fabric ports should transition. NVME target
@@ -2068,7 +2062,7 @@ lpfc_device_recov_reglogin_issue(struct lpfc_vport *vport,
        /* Don't do anything that will mess up processing of the
         * previous RSCN.
         */
-       if (vport->fc_flag & FC_RSCN_DEFERRED)
+       if (test_bit(FC_RSCN_DEFERRED, &vport->fc_flag))
                return ndlp->nlp_state;
 
        ndlp->nlp_prev_state = NLP_STE_REG_LOGIN_ISSUE;
@@ -2384,7 +2378,7 @@ lpfc_device_recov_prli_issue(struct lpfc_vport *vport,
        /* Don't do anything that will mess up processing of the
         * previous RSCN.
         */
-       if (vport->fc_flag & FC_RSCN_DEFERRED)
+       if (test_bit(FC_RSCN_DEFERRED, &vport->fc_flag))
                return ndlp->nlp_state;
 
        /* software abort outstanding PRLI */
@@ -2828,13 +2822,10 @@ static uint32_t
 lpfc_cmpl_logo_npr_node(struct lpfc_vport *vport, struct lpfc_nodelist *ndlp,
                        void *arg, uint32_t evt)
 {
-       struct Scsi_Host *shost = lpfc_shost_from_vport(vport);
-
        /* For the fabric port just clear the fc flags. */
        if (ndlp->nlp_DID == Fabric_DID) {
-               spin_lock_irq(shost->host_lock);
-               vport->fc_flag &= ~(FC_FABRIC | FC_PUBLIC_LOOP);
-               spin_unlock_irq(shost->host_lock);
+               clear_bit(FC_FABRIC, &vport->fc_flag);
+               clear_bit(FC_PUBLIC_LOOP, &vport->fc_flag);
        }
        lpfc_unreg_rpi(vport, ndlp);
        return ndlp->nlp_state;
@@ -2906,7 +2897,7 @@ lpfc_device_recov_npr_node(struct lpfc_vport *vport, struct lpfc_nodelist *ndlp,
        /* Don't do anything that will mess up processing of the
         * previous RSCN.
         */
-       if (vport->fc_flag & FC_RSCN_DEFERRED)
+       if (test_bit(FC_RSCN_DEFERRED, &vport->fc_flag))
                return ndlp->nlp_state;
 
        lpfc_cancel_retry_delay_tmo(vport, ndlp);
index 128fc1bab58653dc4b7f428c0a9ce58f687455d4..09c53b85bcb8d6a11128f0cb2c1545afa5cab800 100644 (file)
@@ -1,7 +1,7 @@
 /*******************************************************************
  * This file is part of the Emulex Linux Device Driver for         *
  * Fibre Channel Host Bus Adapters.                                *
- * Copyright (C) 2017-2023 Broadcom. All Rights Reserved. The term *
+ * Copyright (C) 2017-2024 Broadcom. All Rights Reserved. The term *
  * “Broadcom” refers to Broadcom Inc. and/or its subsidiaries.  *
  * Copyright (C) 2004-2016 Emulex.  All rights reserved.           *
  * EMULEX and SLI are trademarks of Emulex.                        *
@@ -94,7 +94,7 @@ lpfc_nvme_create_queue(struct nvme_fc_local_port *pnvme_lport,
        lport = (struct lpfc_nvme_lport *)pnvme_lport->private;
        vport = lport->vport;
 
-       if (!vport || vport->load_flag & FC_UNLOADING ||
+       if (!vport || test_bit(FC_UNLOADING, &vport->load_flag) ||
            vport->phba->hba_flag & HBA_IOQ_FLUSH)
                return -ENODEV;
 
@@ -674,7 +674,7 @@ lpfc_nvme_ls_req(struct nvme_fc_local_port *pnvme_lport,
                return -EINVAL;
 
        vport = lport->vport;
-       if (vport->load_flag & FC_UNLOADING ||
+       if (test_bit(FC_UNLOADING, &vport->load_flag) ||
            vport->phba->hba_flag & HBA_IOQ_FLUSH)
                return -ENODEV;
 
@@ -765,7 +765,7 @@ lpfc_nvme_xmt_ls_rsp(struct nvme_fc_local_port *localport,
        struct lpfc_nvme_lport *lport;
        int rc;
 
-       if (axchg->phba->pport->load_flag & FC_UNLOADING)
+       if (test_bit(FC_UNLOADING, &axchg->phba->pport->load_flag))
                return -ENODEV;
 
        lport = (struct lpfc_nvme_lport *)localport->private;
@@ -810,7 +810,7 @@ lpfc_nvme_ls_abort(struct nvme_fc_local_port *pnvme_lport,
                return;
        vport = lport->vport;
 
-       if (vport->load_flag & FC_UNLOADING)
+       if (test_bit(FC_UNLOADING, &vport->load_flag))
                return;
 
        ndlp = lpfc_findnode_did(vport, pnvme_rport->port_id);
@@ -1567,7 +1567,7 @@ lpfc_nvme_fcp_io_submit(struct nvme_fc_local_port *pnvme_lport,
 
        phba = vport->phba;
 
-       if ((unlikely(vport->load_flag & FC_UNLOADING)) ||
+       if ((unlikely(test_bit(FC_UNLOADING, &vport->load_flag))) ||
            phba->hba_flag & HBA_IOQ_FLUSH) {
                lpfc_printf_vlog(vport, KERN_INFO, LOG_NVME_IOERR,
                                 "6124 Fail IO, Driver unload\n");
@@ -1886,7 +1886,7 @@ lpfc_nvme_fcp_abort(struct nvme_fc_local_port *pnvme_lport,
 
        if (unlikely(!freqpriv))
                return;
-       if (vport->load_flag & FC_UNLOADING)
+       if (test_bit(FC_UNLOADING, &vport->load_flag))
                return;
 
        /* Announce entry to new IO submit field. */
@@ -2263,7 +2263,7 @@ lpfc_nvme_lport_unreg_wait(struct lpfc_vport *vport,
                        if (!vport->localport ||
                            test_bit(HBA_PCI_ERR, &vport->phba->bit_flags) ||
                            phba->link_state == LPFC_HBA_ERROR ||
-                           vport->load_flag & FC_UNLOADING)
+                           test_bit(FC_UNLOADING, &vport->load_flag))
                                return;
 
                        lpfc_printf_vlog(vport, KERN_ERR, LOG_TRACE_EVENT,
@@ -2625,7 +2625,7 @@ lpfc_nvme_unregister_port(struct lpfc_vport *vport, struct lpfc_nodelist *ndlp)
                 * return values is ignored.  The upcall is a courtesy to the
                 * transport.
                 */
-               if (vport->load_flag & FC_UNLOADING ||
+               if (test_bit(FC_UNLOADING, &vport->load_flag) ||
                    unlikely(vport->phba->link_state == LPFC_HBA_ERROR))
                        (void)nvme_fc_set_remoteport_devloss(remoteport, 0);
 
@@ -2644,7 +2644,7 @@ lpfc_nvme_unregister_port(struct lpfc_vport *vport, struct lpfc_nodelist *ndlp)
                                         "port_state x%x\n",
                                         ret, remoteport->port_state);
 
-                       if (vport->load_flag & FC_UNLOADING) {
+                       if (test_bit(FC_UNLOADING, &vport->load_flag)) {
                                /* Only 1 thread can drop the initial node
                                 * reference. Check if another thread has set
                                 * NLP_DROPPED.
index 425328d9c2d80b567ac3aa452625286fa368f417..8258b771bd009e2fe4dbc610db2b56971e3a6f4e 100644 (file)
@@ -1,7 +1,7 @@
 /*******************************************************************
  * This file is part of the Emulex Linux Device Driver for         *
  * Fibre Channel Host Bus Adapters.                                *
- * Copyright (C) 2017-2023 Broadcom. All Rights Reserved. The term *
+ * Copyright (C) 2017-2024 Broadcom. All Rights Reserved. The term *
  * “Broadcom” refers to Broadcom Inc. and/or its subsidiaries.     *
  * Copyright (C) 2004-2016 Emulex.  All rights reserved.           *
  * EMULEX and SLI are trademarks of Emulex.                        *
@@ -872,7 +872,7 @@ __lpfc_nvme_xmt_ls_rsp(struct lpfc_async_xchg_ctx *axchg,
        struct ulp_bde64 bpl;
        int rc;
 
-       if (phba->pport->load_flag & FC_UNLOADING)
+       if (test_bit(FC_UNLOADING, &phba->pport->load_flag))
                return -ENODEV;
 
        lpfc_printf_log(phba, KERN_INFO, LOG_NVME_DISC,
@@ -984,7 +984,7 @@ lpfc_nvmet_xmt_ls_rsp(struct nvmet_fc_target_port *tgtport,
        struct lpfc_nvmet_tgtport *nvmep = tgtport->private;
        int rc;
 
-       if (axchg->phba->pport->load_flag & FC_UNLOADING)
+       if (test_bit(FC_UNLOADING, &axchg->phba->pport->load_flag))
                return -ENODEV;
 
        rc = __lpfc_nvme_xmt_ls_rsp(axchg, ls_rsp, lpfc_nvmet_xmt_ls_rsp_cmp);
@@ -1022,7 +1022,7 @@ lpfc_nvmet_xmt_fcp_op(struct nvmet_fc_target_port *tgtport,
        int id;
 #endif
 
-       if (phba->pport->load_flag & FC_UNLOADING) {
+       if (test_bit(FC_UNLOADING, &phba->pport->load_flag)) {
                rc = -ENODEV;
                goto aerr;
        }
@@ -1145,7 +1145,7 @@ lpfc_nvmet_xmt_fcp_abort(struct nvmet_fc_target_port *tgtport,
        struct lpfc_queue *wq;
        unsigned long flags;
 
-       if (phba->pport->load_flag & FC_UNLOADING)
+       if (test_bit(FC_UNLOADING, &phba->pport->load_flag))
                return;
 
        if (!ctxp->hdwq)
@@ -1317,7 +1317,7 @@ lpfc_nvmet_ls_req(struct nvmet_fc_target_port *targetport,
                return -EINVAL;
 
        phba = lpfc_nvmet->phba;
-       if (phba->pport->load_flag & FC_UNLOADING)
+       if (test_bit(FC_UNLOADING, &phba->pport->load_flag))
                return -EINVAL;
 
        hstate = atomic_read(&lpfc_nvmet->state);
@@ -1353,7 +1353,7 @@ lpfc_nvmet_ls_abort(struct nvmet_fc_target_port *targetport,
        int ret;
 
        phba = lpfc_nvmet->phba;
-       if (phba->pport->load_flag & FC_UNLOADING)
+       if (test_bit(FC_UNLOADING, &phba->pport->load_flag))
                return;
 
        ndlp = (struct lpfc_nodelist *)hosthandle;
index bf879d81846b69379f34b91759a45ef8d5af89fb..c0038eaae7b0ae9e40d945c9d99708897c97382e 100644 (file)
@@ -1,7 +1,7 @@
 /*******************************************************************
  * This file is part of the Emulex Linux Device Driver for         *
  * Fibre Channel Host Bus Adapters.                                *
- * Copyright (C) 2017-2023 Broadcom. All Rights Reserved. The term *
+ * Copyright (C) 2017-2024 Broadcom. All Rights Reserved. The term *
  * “Broadcom” refers to Broadcom Inc. and/or its subsidiaries.     *
  * Copyright (C) 2004-2016 Emulex.  All rights reserved.           *
  * EMULEX and SLI are trademarks of Emulex.                        *
@@ -2728,14 +2728,14 @@ lpfc_calc_bg_err(struct lpfc_hba *phba, struct lpfc_io_buf *lpfc_cmd)
                sgde = scsi_sglist(cmd);
                blksize = scsi_prot_interval(cmd);
                data_src = (uint8_t *)sg_virt(sgde);
-               data_len = sgde->length;
+               data_len = sg_dma_len(sgde);
                if ((data_len & (blksize - 1)) == 0)
                        chk_guard = 1;
 
                src = (struct scsi_dif_tuple *)sg_virt(sgpe);
                start_ref_tag = scsi_prot_ref_tag(cmd);
                start_app_tag = src->app_tag;
-               len = sgpe->length;
+               len = sg_dma_len(sgpe);
                while (src && protsegcnt) {
                        while (len) {
 
@@ -2800,7 +2800,7 @@ skipit:
                                                goto out;
 
                                        data_src = (uint8_t *)sg_virt(sgde);
-                                       data_len = sgde->length;
+                                       data_len = sg_dma_len(sgde);
                                        if ((data_len & (blksize - 1)) == 0)
                                                chk_guard = 1;
                                }
@@ -2810,7 +2810,7 @@ skipit:
                        sgpe = sg_next(sgpe);
                        if (sgpe) {
                                src = (struct scsi_dif_tuple *)sg_virt(sgpe);
-                               len = sgpe->length;
+                               len = sg_dma_len(sgpe);
                        } else {
                                src = NULL;
                        }
index 706985358c6a02f37d91698c0163f506695e22cc..1f8a9b5945cbae71a32172b4eb0d61e7924e4f0a 100644 (file)
@@ -1,7 +1,7 @@
 /*******************************************************************
  * This file is part of the Emulex Linux Device Driver for         *
  * Fibre Channel Host Bus Adapters.                                *
- * Copyright (C) 2017-2023 Broadcom. All Rights Reserved. The term *
+ * Copyright (C) 2017-2024 Broadcom. All Rights Reserved. The term *
  * “Broadcom” refers to Broadcom Inc. and/or its subsidiaries.     *
  * Copyright (C) 2004-2016 Emulex.  All rights reserved.           *
  * EMULEX and SLI are trademarks of Emulex.                        *
@@ -1036,7 +1036,7 @@ lpfc_handle_rrq_active(struct lpfc_hba *phba)
        }
        spin_unlock_irqrestore(&phba->hbalock, iflags);
        if ((!list_empty(&phba->active_rrq_list)) &&
-           (!(phba->pport->load_flag & FC_UNLOADING)))
+           (!test_bit(FC_UNLOADING, &phba->pport->load_flag)))
                mod_timer(&phba->rrq_tmr, next_time);
        list_for_each_entry_safe(rrq, nextrrq, &send_rrq, list) {
                list_del(&rrq->list);
@@ -1180,12 +1180,12 @@ lpfc_set_rrq_active(struct lpfc_hba *phba, struct lpfc_nodelist *ndlp,
                return -EINVAL;
 
        spin_lock_irqsave(&phba->hbalock, iflags);
-       if (phba->pport->load_flag & FC_UNLOADING) {
+       if (test_bit(FC_UNLOADING, &phba->pport->load_flag)) {
                phba->hba_flag &= ~HBA_RRQ_ACTIVE;
                goto out;
        }
 
-       if (ndlp->vport && (ndlp->vport->load_flag & FC_UNLOADING))
+       if (ndlp->vport && test_bit(FC_UNLOADING, &ndlp->vport->load_flag))
                goto out;
 
        if (!ndlp->active_rrqs_xri_bitmap)
@@ -1732,7 +1732,7 @@ lpfc_sli_ringtxcmpl_put(struct lpfc_hba *phba, struct lpfc_sli_ring *pring,
           (ulp_command != CMD_ABORT_XRI_CN) &&
           (ulp_command != CMD_CLOSE_XRI_CN)) {
                BUG_ON(!piocb->vport);
-               if (!(piocb->vport->load_flag & FC_UNLOADING))
+               if (!test_bit(FC_UNLOADING, &piocb->vport->load_flag))
                        mod_timer(&piocb->vport->els_tmofunc,
                                  jiffies +
                                  msecs_to_jiffies(1000 * (phba->fc_ratov << 1)));
@@ -2882,7 +2882,7 @@ lpfc_sli_def_mbox_cmpl(struct lpfc_hba *phba, LPFC_MBOXQ_t *pmb)
         * If a REG_LOGIN succeeded  after node is destroyed or node
         * is in re-discovery driver need to cleanup the RPI.
         */
-       if (!(phba->pport->load_flag & FC_UNLOADING) &&
+       if (!test_bit(FC_UNLOADING, &phba->pport->load_flag) &&
            pmb->u.mb.mbxCommand == MBX_REG_LOGIN64 &&
            !pmb->u.mb.mbxStatus) {
                mp = (struct lpfc_dmabuf *)pmb->ctx_buf;
@@ -2904,13 +2904,13 @@ lpfc_sli_def_mbox_cmpl(struct lpfc_hba *phba, LPFC_MBOXQ_t *pmb)
        }
 
        if ((pmb->u.mb.mbxCommand == MBX_REG_VPI) &&
-               !(phba->pport->load_flag & FC_UNLOADING) &&
+               !test_bit(FC_UNLOADING, &phba->pport->load_flag) &&
                !pmb->u.mb.mbxStatus) {
                shost = lpfc_shost_from_vport(vport);
                spin_lock_irq(shost->host_lock);
                vport->vpi_state |= LPFC_VPI_REGISTERED;
-               vport->fc_flag &= ~FC_VPORT_NEEDS_REG_VPI;
                spin_unlock_irq(shost->host_lock);
+               clear_bit(FC_VPORT_NEEDS_REG_VPI, &vport->fc_flag);
        }
 
        if (pmb->u.mb.mbxCommand == MBX_REG_LOGIN64) {
@@ -2927,7 +2927,7 @@ lpfc_sli_def_mbox_cmpl(struct lpfc_hba *phba, LPFC_MBOXQ_t *pmb)
                                vport,
                                KERN_INFO, LOG_MBOX | LOG_DISCOVERY,
                                "1438 UNREG cmpl deferred mbox x%x "
-                               "on NPort x%x Data: x%x x%x x%px x%x x%x\n",
+                               "on NPort x%x Data: x%x x%x x%px x%lx x%x\n",
                                ndlp->nlp_rpi, ndlp->nlp_DID,
                                ndlp->nlp_flag, ndlp->nlp_defer_did,
                                ndlp, vport->load_flag, kref_read(&ndlp->kref));
@@ -3235,7 +3235,7 @@ lpfc_nvme_unsol_ls_handler(struct lpfc_hba *phba, struct lpfc_iocbq *piocb)
        lpfc_nvmeio_data(phba, "NVME LS    RCV: xri x%x sz %d from %06x\n",
                         oxid, size, sid);
 
-       if (phba->pport->load_flag & FC_UNLOADING) {
+       if (test_bit(FC_UNLOADING, &phba->pport->load_flag)) {
                failwhy = "Driver Unloading";
        } else if (!(phba->cfg_enable_fc4_type & LPFC_ENABLE_NVME)) {
                failwhy = "NVME FC4 Disabled";
@@ -3940,7 +3940,7 @@ void lpfc_poll_eratt(struct timer_list *t)
        if (!(phba->hba_flag & HBA_SETUP))
                return;
 
-       if (phba->pport->load_flag & FC_UNLOADING)
+       if (test_bit(FC_UNLOADING, &phba->pport->load_flag))
                return;
 
        /* Here we will also keep track of interrupts per sec of the hba */
@@ -7582,7 +7582,7 @@ lpfc_sli4_repost_sgl_list(struct lpfc_hba *phba,
        struct lpfc_sglq *sglq_entry = NULL;
        struct lpfc_sglq *sglq_entry_next = NULL;
        struct lpfc_sglq *sglq_entry_first = NULL;
-       int status, total_cnt;
+       int status = 0, total_cnt;
        int post_cnt = 0, num_posted = 0, block_cnt = 0;
        int last_xritag = NO_XRI;
        LIST_HEAD(prep_sgl_list);
@@ -10888,7 +10888,7 @@ __lpfc_sli_prep_els_req_rsp_s4(struct lpfc_iocbq *cmdiocbq,
         * all ELS pt2pt protocol traffic as well.
         */
        if ((phba->sli3_options & LPFC_SLI3_NPIV_ENABLED) ||
-           (vport->fc_flag & FC_PT2PT)) {
+           test_bit(FC_PT2PT, &vport->fc_flag)) {
                if (expect_rsp) {
                        bf_set(els_req64_sid, &wqe->els_req, vport->fc_myDID);
 
@@ -12428,7 +12428,7 @@ lpfc_sli_issue_abort_iotag(struct lpfc_hba *phba, struct lpfc_sli_ring *pring,
         * If we're unloading, don't abort iocb on the ELS ring, but change
         * the callback so that nothing happens when it finishes.
         */
-       if ((vport->load_flag & FC_UNLOADING) &&
+       if (test_bit(FC_UNLOADING, &vport->load_flag) &&
            pring->ringno == LPFC_ELS_RING) {
                if (cmdiocb->cmd_flag & LPFC_IO_FABRIC)
                        cmdiocb->fabric_cmd_cmpl = lpfc_ignore_els_cmpl;
@@ -14658,7 +14658,7 @@ lpfc_sli4_sp_handle_rcqe(struct lpfc_hba *phba, struct lpfc_rcqe *rcqe)
                    fc_hdr->fh_r_ctl == FC_RCTL_DD_UNSOL_DATA) {
                        spin_unlock_irqrestore(&phba->hbalock, iflags);
                        /* Handle MDS Loopback frames */
-                       if  (!(phba->pport->load_flag & FC_UNLOADING))
+                       if  (!test_bit(FC_UNLOADING, &phba->pport->load_flag))
                                lpfc_sli4_handle_mds_loopback(phba->pport,
                                                              dma_buf);
                        else
@@ -18552,8 +18552,8 @@ lpfc_fc_frame_to_vport(struct lpfc_hba *phba, struct fc_frame_header *fc_hdr,
 
        if (did == Fabric_DID)
                return phba->pport;
-       if ((phba->pport->fc_flag & FC_PT2PT) &&
-               !(phba->link_state == LPFC_HBA_READY))
+       if (test_bit(FC_PT2PT, &phba->pport->fc_flag) &&
+           phba->link_state != LPFC_HBA_READY)
                return phba->pport;
 
        vports = lpfc_create_vport_work_array(phba);
@@ -18933,7 +18933,7 @@ lpfc_sli4_seq_abort_rsp(struct lpfc_vport *vport,
                                         "oxid:x%x SID:x%x\n", oxid, sid);
                        return;
                }
-               /* Put ndlp onto pport node list */
+               /* Put ndlp onto vport node list */
                lpfc_enqueue_node(vport, ndlp);
        }
 
@@ -18953,7 +18953,7 @@ lpfc_sli4_seq_abort_rsp(struct lpfc_vport *vport,
                return;
        }
 
-       ctiocb->vport = phba->pport;
+       ctiocb->vport = vport;
        ctiocb->cmd_cmpl = lpfc_sli4_seq_abort_rsp_cmpl;
        ctiocb->sli4_lxritag = NO_XRI;
        ctiocb->sli4_xritag = NO_XRI;
@@ -19040,6 +19040,16 @@ lpfc_sli4_seq_abort_rsp(struct lpfc_vport *vport,
                ctiocb->ndlp = NULL;
                lpfc_sli_release_iocbq(phba, ctiocb);
        }
+
+       /* if only usage of this nodelist is BLS response, release initial ref
+        * to free ndlp when transmit completes
+        */
+       if (ndlp->nlp_state == NLP_STE_UNUSED_NODE &&
+           !(ndlp->nlp_flag & NLP_DROPPED) &&
+           !(ndlp->fc4_xpt_flags & (NVME_XPT_REGD | SCSI_XPT_REGD))) {
+               ndlp->nlp_flag |= NLP_DROPPED;
+               lpfc_nlp_put(ndlp);
+       }
 }
 
 /**
@@ -19447,7 +19457,7 @@ lpfc_sli4_handle_received_buffer(struct lpfc_hba *phba,
            fc_hdr->fh_r_ctl == FC_RCTL_DD_UNSOL_DATA) {
                vport = phba->pport;
                /* Handle MDS Loopback frames */
-               if  (!(phba->pport->load_flag & FC_UNLOADING))
+               if  (!test_bit(FC_UNLOADING, &phba->pport->load_flag))
                        lpfc_sli4_handle_mds_loopback(vport, dmabuf);
                else
                        lpfc_in_buf_free(phba, &dmabuf->dbuf);
@@ -19497,8 +19507,8 @@ lpfc_sli4_handle_received_buffer(struct lpfc_hba *phba,
                 * The pt2pt protocol allows for discovery frames
                 * to be received without a registered VPI.
                 */
-               if (!(vport->fc_flag & FC_PT2PT) ||
-                       (phba->link_state == LPFC_HBA_READY)) {
+               if (!test_bit(FC_PT2PT, &vport->fc_flag) ||
+                   phba->link_state == LPFC_HBA_READY) {
                        lpfc_in_buf_free(phba, &dmabuf->dbuf);
                        return;
                }
@@ -22656,7 +22666,7 @@ lpfc_sli_prep_wqe(struct lpfc_hba *phba, struct lpfc_iocbq *job)
                if_type = bf_get(lpfc_sli_intf_if_type,
                                 &phba->sli4_hba.sli_intf);
                if (if_type >= LPFC_SLI_INTF_IF_TYPE_2) {
-                       if (job->vport->fc_flag & FC_PT2PT) {
+                       if (test_bit(FC_PT2PT, &job->vport->fc_flag)) {
                                bf_set(els_rsp64_sp, &wqe->xmit_els_rsp, 1);
                                bf_set(els_rsp64_sid, &wqe->xmit_els_rsp,
                                       job->vport->fc_myDID);
index aba1c1cee8c4db90083c2ff046108a36642f910a..56f5889dbaf9347cae12a7ab4a6f3f70ee2c58a7 100644 (file)
@@ -1,7 +1,7 @@
 /*******************************************************************
  * This file is part of the Emulex Linux Device Driver for         *
  * Fibre Channel Host Bus Adapters.                                *
- * Copyright (C) 2017-2023 Broadcom. All Rights Reserved. The term *
+ * Copyright (C) 2017-2024 Broadcom. All Rights Reserved. The term *
  * “Broadcom” refers to Broadcom Inc. and/or its subsidiaries.     *
  * Copyright (C) 2004-2016 Emulex.  All rights reserved.           *
  * EMULEX and SLI are trademarks of Emulex.                        *
@@ -20,7 +20,7 @@
  * included with this package.                                     *
  *******************************************************************/
 
-#define LPFC_DRIVER_VERSION "14.2.0.17"
+#define LPFC_DRIVER_VERSION "14.4.0.0"
 #define LPFC_DRIVER_NAME               "lpfc"
 
 /* Used for SLI 2/3 */
@@ -32,6 +32,6 @@
 
 #define LPFC_MODULE_DESC "Emulex LightPulse Fibre Channel SCSI driver " \
                LPFC_DRIVER_VERSION
-#define LPFC_COPYRIGHT "Copyright (C) 2017-2023 Broadcom. All Rights " \
+#define LPFC_COPYRIGHT "Copyright (C) 2017-2024 Broadcom. All Rights " \
                "Reserved. The term \"Broadcom\" refers to Broadcom Inc. " \
                "and/or its subsidiaries."
index 6c7559cf1a4b65e7f3ba2ac23d78a13c0c0ac7eb..0f79840b94986146478e61fa4e5ab1cd6eeaa99e 100644 (file)
@@ -1,7 +1,7 @@
 /*******************************************************************
  * This file is part of the Emulex Linux Device Driver for         *
  * Fibre Channel Host Bus Adapters.                                *
- * Copyright (C) 2017-2023 Broadcom. All Rights Reserved. The term *
+ * Copyright (C) 2017-2024 Broadcom. All Rights Reserved. The term *
  * “Broadcom” refers to Broadcom Inc. and/or its subsidiaries.     *
  * Copyright (C) 2004-2016 Emulex.  All rights reserved.           *
  * EMULEX and SLI are trademarks of Emulex.                        *
@@ -238,13 +238,9 @@ lpfc_unique_wwpn(struct lpfc_hba *phba, struct lpfc_vport *new_vport)
 static void lpfc_discovery_wait(struct lpfc_vport *vport)
 {
        struct lpfc_hba *phba = vport->phba;
-       uint32_t wait_flags = 0;
        unsigned long wait_time_max;
        unsigned long start_time;
 
-       wait_flags = FC_RSCN_MODE | FC_RSCN_DISCOVERY | FC_NLP_MORE |
-                    FC_RSCN_DEFERRED | FC_NDISC_ACTIVE | FC_DISC_TMO;
-
        /*
         * The time constraint on this loop is a balance between the
         * fabric RA_TOV value and dev_loss tmo.  The driver's
@@ -255,14 +251,19 @@ static void lpfc_discovery_wait(struct lpfc_vport *vport)
        start_time = jiffies;
        while (time_before(jiffies, wait_time_max)) {
                if ((vport->num_disc_nodes > 0)    ||
-                   (vport->fc_flag & wait_flags)  ||
+                   test_bit(FC_RSCN_MODE, &vport->fc_flag) ||
+                   test_bit(FC_RSCN_DISCOVERY, &vport->fc_flag) ||
+                   test_bit(FC_NLP_MORE, &vport->fc_flag) ||
+                   test_bit(FC_RSCN_DEFERRED, &vport->fc_flag) ||
+                   test_bit(FC_NDISC_ACTIVE, &vport->fc_flag) ||
+                   test_bit(FC_DISC_TMO, &vport->fc_flag) ||
                    ((vport->port_state > LPFC_VPORT_FAILED) &&
                     (vport->port_state < LPFC_VPORT_READY))) {
                        lpfc_printf_vlog(vport, KERN_INFO, LOG_VPORT,
-                                       "1833 Vport discovery quiesce Wait:"
-                                       " state x%x fc_flags x%x"
-                                       " num_nodes x%x, waiting 1000 msecs"
-                                       " total wait msecs x%x\n",
+                                       "1833 Vport discovery quiesce Wait: "
+                                       "state x%x fc_flags x%lx "
+                                       "num_nodes x%x, waiting 1000 msecs "
+                                       "total wait msecs x%x\n",
                                        vport->port_state, vport->fc_flag,
                                        vport->num_disc_nodes,
                                        jiffies_to_msecs(jiffies - start_time));
@@ -270,9 +271,9 @@ static void lpfc_discovery_wait(struct lpfc_vport *vport)
                } else {
                        /* Base case.  Wait variants satisfied.  Break out */
                        lpfc_printf_vlog(vport, KERN_INFO, LOG_VPORT,
-                                        "1834 Vport discovery quiesced:"
-                                        " state x%x fc_flags x%x"
-                                        " wait msecs x%x\n",
+                                        "1834 Vport discovery quiesced: "
+                                        "state x%x fc_flags x%lx "
+                                        "wait msecs x%x\n",
                                         vport->port_state, vport->fc_flag,
                                         jiffies_to_msecs(jiffies
                                                - start_time));
@@ -283,7 +284,7 @@ static void lpfc_discovery_wait(struct lpfc_vport *vport)
        if (time_after(jiffies, wait_time_max))
                lpfc_printf_vlog(vport, KERN_ERR, LOG_TRACE_EVENT,
                                 "1835 Vport discovery quiesce failed:"
-                                " state x%x fc_flags x%x wait msecs x%x\n",
+                                " state x%x fc_flags x%lx wait msecs x%x\n",
                                 vport->port_state, vport->fc_flag,
                                 jiffies_to_msecs(jiffies - start_time));
 }
@@ -407,7 +408,7 @@ lpfc_vport_create(struct fc_vport *fc_vport, bool disable)
        vport->fc_vport = fc_vport;
 
        /* At this point we are fully registered with SCSI Layer.  */
-       vport->load_flag |= FC_ALLOW_FDMI;
+       set_bit(FC_ALLOW_FDMI, &vport->load_flag);
        if (phba->cfg_enable_SmartSAN ||
            (phba->cfg_fdmi_on == LPFC_FDMI_SUPPORT)) {
                /* Setup appropriate attribute masks */
@@ -420,7 +421,7 @@ lpfc_vport_create(struct fc_vport *fc_vport, bool disable)
         * by the port.
         */
        if ((phba->sli_rev == LPFC_SLI_REV4) &&
-           (pport->fc_flag & FC_VFI_REGISTERED)) {
+           test_bit(FC_VFI_REGISTERED, &pport->fc_flag)) {
                rc = lpfc_sli4_init_vpi(vport);
                if (rc) {
                        lpfc_printf_log(phba, KERN_ERR, LOG_TRACE_EVENT,
@@ -435,7 +436,7 @@ lpfc_vport_create(struct fc_vport *fc_vport, bool disable)
                 * Driver cannot INIT_VPI now. Set the flags to
                 * init_vpi when reg_vfi complete.
                 */
-               vport->fc_flag |= FC_VPORT_NEEDS_INIT_VPI;
+               set_bit(FC_VPORT_NEEDS_INIT_VPI, &vport->fc_flag);
                lpfc_vport_set_state(vport, FC_VPORT_LINKDOWN);
                rc = VPORT_OK;
                goto out;
@@ -535,10 +536,9 @@ disable_vport(struct fc_vport *fc_vport)
        struct lpfc_vport *vport = *(struct lpfc_vport **)fc_vport->dd_data;
        struct lpfc_hba   *phba = vport->phba;
        struct lpfc_nodelist *ndlp = NULL;
-       struct Scsi_Host *shost = lpfc_shost_from_vport(vport);
 
        /* Can't disable during an outstanding delete. */
-       if (vport->load_flag & FC_UNLOADING)
+       if (test_bit(FC_UNLOADING, &vport->load_flag))
                return 0;
 
        ndlp = lpfc_findnode_did(vport, Fabric_DID);
@@ -556,11 +556,8 @@ disable_vport(struct fc_vport *fc_vport)
         * scsi_host_put() to release the vport.
         */
        lpfc_mbx_unreg_vpi(vport);
-       if (phba->sli_rev == LPFC_SLI_REV4) {
-               spin_lock_irq(shost->host_lock);
-               vport->fc_flag |= FC_VPORT_NEEDS_INIT_VPI;
-               spin_unlock_irq(shost->host_lock);
-       }
+       if (phba->sli_rev == LPFC_SLI_REV4)
+               set_bit(FC_VPORT_NEEDS_INIT_VPI, &vport->fc_flag);
 
        lpfc_vport_set_state(vport, FC_VPORT_DISABLED);
        lpfc_printf_vlog(vport, KERN_ERR, LOG_VPORT,
@@ -574,7 +571,6 @@ enable_vport(struct fc_vport *fc_vport)
        struct lpfc_vport *vport = *(struct lpfc_vport **)fc_vport->dd_data;
        struct lpfc_hba   *phba = vport->phba;
        struct lpfc_nodelist *ndlp = NULL;
-       struct Scsi_Host *shost = lpfc_shost_from_vport(vport);
 
        if ((phba->link_state < LPFC_LINK_UP) ||
            (phba->fc_topology == LPFC_TOPOLOGY_LOOP)) {
@@ -582,16 +578,13 @@ enable_vport(struct fc_vport *fc_vport)
                return VPORT_OK;
        }
 
-       spin_lock_irq(shost->host_lock);
-       vport->load_flag |= FC_LOADING;
-       if (vport->fc_flag & FC_VPORT_NEEDS_INIT_VPI) {
-               spin_unlock_irq(shost->host_lock);
+       set_bit(FC_LOADING, &vport->load_flag);
+       if (test_bit(FC_VPORT_NEEDS_INIT_VPI, &vport->fc_flag)) {
                lpfc_issue_init_vpi(vport);
                goto out;
        }
 
-       vport->fc_flag |= FC_VPORT_NEEDS_REG_VPI;
-       spin_unlock_irq(shost->host_lock);
+       set_bit(FC_VPORT_NEEDS_REG_VPI, &vport->fc_flag);
 
        /* Use the Physical nodes Fabric NDLP to determine if the link is
         * up and ready to FDISC.
@@ -643,22 +636,20 @@ lpfc_vport_delete(struct fc_vport *fc_vport)
 
        /* If the vport is a static vport fail the deletion. */
        if ((vport->vport_flag & STATIC_VPORT) &&
-               !(phba->pport->load_flag & FC_UNLOADING)) {
+               !test_bit(FC_UNLOADING, &phba->pport->load_flag)) {
                lpfc_printf_vlog(vport, KERN_ERR, LOG_TRACE_EVENT,
                                 "1837 vport_delete failed: Cannot delete "
                                 "static vport.\n");
                return VPORT_ERROR;
        }
 
-       spin_lock_irq(&phba->hbalock);
-       vport->load_flag |= FC_UNLOADING;
-       spin_unlock_irq(&phba->hbalock);
+       set_bit(FC_UNLOADING, &vport->load_flag);
 
        /*
         * If we are not unloading the driver then prevent the vport_delete
         * from happening until after this vport's discovery is finished.
         */
-       if (!(phba->pport->load_flag & FC_UNLOADING)) {
+       if (!test_bit(FC_UNLOADING, &phba->pport->load_flag)) {
                int check_count = 0;
                while (check_count < ((phba->fc_ratov * 3) + 3) &&
                       vport->port_state > LPFC_VPORT_FAILED &&
@@ -725,7 +716,7 @@ lpfc_vport_delete(struct fc_vport *fc_vport)
                        goto skip_logo;
        }
 
-       if (!(phba->pport->load_flag & FC_UNLOADING))
+       if (!test_bit(FC_UNLOADING, &phba->pport->load_flag))
                lpfc_discovery_wait(vport);
 
 skip_logo:
@@ -736,7 +727,7 @@ skip_logo:
        lpfc_sli_host_down(vport);
        lpfc_stop_vport_timers(vport);
 
-       if (!(phba->pport->load_flag & FC_UNLOADING)) {
+       if (!test_bit(FC_UNLOADING, &phba->pport->load_flag)) {
                lpfc_unreg_all_rpis(vport);
                lpfc_unreg_default_rpis(vport);
                /*
@@ -773,7 +764,7 @@ lpfc_create_vport_work_array(struct lpfc_hba *phba)
                return NULL;
        spin_lock_irq(&phba->port_list_lock);
        list_for_each_entry(port_iterator, &phba->port_list, listentry) {
-               if (port_iterator->load_flag & FC_UNLOADING)
+               if (test_bit(FC_UNLOADING, &port_iterator->load_flag))
                        continue;
                if (!scsi_host_get(lpfc_shost_from_vport(port_iterator))) {
                        lpfc_printf_vlog(port_iterator, KERN_ERR,
index 6a019132109c1ee678ad09d5b4b413d7fe691bbc..377dcab32cd8fe5ed7a23765a6d6e9448642b76e 100644 (file)
@@ -508,7 +508,7 @@ static int mac53c94_probe(struct macio_dev *mdev, const struct of_device_id *mat
        return rc;
 }
 
-static int mac53c94_remove(struct macio_dev *mdev)
+static void mac53c94_remove(struct macio_dev *mdev)
 {
        struct fsc_state *fp = (struct fsc_state *)macio_get_drvdata(mdev);
        struct Scsi_Host *host = fp->host;
@@ -526,11 +526,8 @@ static int mac53c94_remove(struct macio_dev *mdev)
        scsi_host_put(host);
 
        macio_release_resources(mdev);
-
-       return 0;
 }
 
-
 static struct of_device_id mac53c94_match[] = 
 {
        {
index 66a30a3e6cd53508f478aec8795077154998e3d2..38976f94453e0eea403bed17288dbf9183be50f9 100644 (file)
@@ -219,7 +219,7 @@ mega_query_adapter(adapter_t *adapter)
        raw_mbox[3] = ENQ3_GET_SOLICITED_FULL;  /* i.e. 0x02 */
 
        /* Issue a blocking command to the card */
-       if ((retval = issue_scb_block(adapter, raw_mbox))) {
+       if (issue_scb_block(adapter, raw_mbox)) {
                /* the adapter does not support 40ld */
 
                mraid_ext_inquiry       *ext_inq;
index e276583c590c38ee237e33caaa45a95ba87defe8..1c15cac41d805a783e634936cb261e16e70f2113 100644 (file)
@@ -54,7 +54,7 @@
 #define KERN_DEBUG KERN_WARNING
 #endif
 
-MODULE_AUTHOR("Paul Mackerras (paulus@samba.org)");
+MODULE_AUTHOR("Paul Mackerras <paulus@samba.org>");
 MODULE_DESCRIPTION("PowerMac MESH SCSI driver");
 MODULE_LICENSE("GPL");
 
@@ -1986,7 +1986,7 @@ static int mesh_probe(struct macio_dev *mdev, const struct of_device_id *match)
        return -ENODEV;
 }
 
-static int mesh_remove(struct macio_dev *mdev)
+static void mesh_remove(struct macio_dev *mdev)
 {
        struct mesh_state *ms = (struct mesh_state *)macio_get_drvdata(mdev);
        struct Scsi_Host *mesh_host = ms->host;
@@ -2013,11 +2013,8 @@ static int mesh_remove(struct macio_dev *mdev)
        macio_release_resources(mdev);
 
        scsi_host_put(mesh_host);
-
-       return 0;
 }
 
-
 static struct of_device_id mesh_match[] = 
 {
        {
index 1bffd629c1244f22a43e1ba6a8eab0b676e5370d..73c831a97d276a6f446ad22986cbc0e463fa4e9d 100644 (file)
@@ -8,11 +8,12 @@
  */
 
 #include "mpi3mr.h"
+#include <linux/idr.h>
 
 /* global driver scop variables */
 LIST_HEAD(mrioc_list);
 DEFINE_SPINLOCK(mrioc_list_lock);
-static int mrioc_ids;
+static DEFINE_IDA(mrioc_ida);
 static int warn_non_secure_ctlr;
 atomic64_t event_counter;
 
@@ -5072,7 +5073,10 @@ mpi3mr_probe(struct pci_dev *pdev, const struct pci_device_id *id)
        }
 
        mrioc = shost_priv(shost);
-       mrioc->id = mrioc_ids++;
+       retval = ida_alloc_range(&mrioc_ida, 1, U8_MAX, GFP_KERNEL);
+       if (retval < 0)
+               goto id_alloc_failed;
+       mrioc->id = (u8)retval;
        sprintf(mrioc->driver_name, "%s", MPI3MR_DRIVER_NAME);
        sprintf(mrioc->name, "%s%d", mrioc->driver_name, mrioc->id);
        INIT_LIST_HEAD(&mrioc->list);
@@ -5222,9 +5226,11 @@ init_ioc_failed:
 resource_alloc_failed:
        destroy_workqueue(mrioc->fwevt_worker_thread);
 fwevtthread_failed:
+       ida_free(&mrioc_ida, mrioc->id);
        spin_lock(&mrioc_list_lock);
        list_del(&mrioc->list);
        spin_unlock(&mrioc_list_lock);
+id_alloc_failed:
        scsi_host_put(shost);
 shost_failed:
        return retval;
@@ -5310,6 +5316,7 @@ static void mpi3mr_remove(struct pci_dev *pdev)
                mrioc->sas_hba.num_phys = 0;
        }
 
+       ida_free(&mrioc_ida, mrioc->id);
        spin_lock(&mrioc_list_lock);
        list_del(&mrioc->list);
        spin_unlock(&mrioc_list_lock);
@@ -5525,6 +5532,7 @@ static void __exit mpi3mr_exit(void)
                           &driver_attr_event_counter);
        pci_unregister_driver(&mpi3mr_pci_driver);
        sas_release_transport(mpi3mr_transport_template);
+       ida_destroy(&mrioc_ida);
 }
 
 module_init(mpi3mr_init);
index b8120ca93c79740d7827ebff1652b4b22b296421..1b492e9a3e55ea2650fa4185ea3fa55187816b5d 100644 (file)
@@ -5481,7 +5481,7 @@ mpt3sas_atto_validate_nvram(struct MPT3SAS_ADAPTER *ioc,
  * mpt3sas_atto_get_sas_addr - get the ATTO SAS address from mfg page 1
  *
  * @ioc : per adapter object
- * @*sas_addr : return sas address
+ * @sas_addr : return sas address
  * Return: 0 for success, non-zero for failure.
  */
 static int
@@ -7916,26 +7916,22 @@ mpt3sas_base_validate_event_type(struct MPT3SAS_ADAPTER *ioc, u32 *event_type)
 }
 
 /**
- * _base_diag_reset - the "big hammer" start of day reset
- * @ioc: per adapter object
- *
- * Return: 0 for success, non-zero for failure.
- */
-static int
-_base_diag_reset(struct MPT3SAS_ADAPTER *ioc)
-{
-       u32 host_diagnostic;
-       u32 ioc_state;
-       u32 count;
-       u32 hcb_size;
-
-       ioc_info(ioc, "sending diag reset !!\n");
-
-       pci_cfg_access_lock(ioc->pdev);
+* mpt3sas_base_unlock_and_get_host_diagnostic- enable Host Diagnostic Register writes
+* @ioc: per adapter object
+* @host_diagnostic: host diagnostic register content
+*
+* Return: 0 for success, non-zero for failure.
+*/
 
-       drsprintk(ioc, ioc_info(ioc, "clear interrupts\n"));
+int
+mpt3sas_base_unlock_and_get_host_diagnostic(struct MPT3SAS_ADAPTER *ioc,
+       u32 *host_diagnostic)
+{
 
+       u32 count;
+       *host_diagnostic = 0;
        count = 0;
+
        do {
                /* Write magic sequence to WriteSequence register
                 * Loop until in diagnostic mode
@@ -7954,30 +7950,67 @@ _base_diag_reset(struct MPT3SAS_ADAPTER *ioc)
 
                if (count++ > 20) {
                        ioc_info(ioc,
-                           "Stop writing magic sequence after 20 retries\n");
+                                   "Stop writing magic sequence after 20 retries\n");
                        _base_dump_reg_set(ioc);
-                       goto out;
+                       return -EFAULT;
                }
 
-               host_diagnostic = ioc->base_readl_ext_retry(&ioc->chip->HostDiagnostic);
+               *host_diagnostic = ioc->base_readl_ext_retry(&ioc->chip->HostDiagnostic);
                drsprintk(ioc,
-                         ioc_info(ioc, "wrote magic sequence: count(%d), host_diagnostic(0x%08x)\n",
-                                  count, host_diagnostic));
+                            ioc_info(ioc, "wrote magic sequence: count(%d), host_diagnostic(0x%08x)\n",
+                                    count, *host_diagnostic));
 
-       } while ((host_diagnostic & MPI2_DIAG_DIAG_WRITE_ENABLE) == 0);
+       } while ((*host_diagnostic & MPI2_DIAG_DIAG_WRITE_ENABLE) == 0);
+       return 0;
+}
 
-       hcb_size = ioc->base_readl(&ioc->chip->HCBSize);
+/**
+ * mpt3sas_base_lock_host_diagnostic: Disable Host Diagnostic Register writes
+ * @ioc: per adapter object
+ */
 
+void
+mpt3sas_base_lock_host_diagnostic(struct MPT3SAS_ADAPTER *ioc)
+{
+       drsprintk(ioc, ioc_info(ioc, "disable writes to the diagnostic register\n"));
+       writel(MPI2_WRSEQ_FLUSH_KEY_VALUE, &ioc->chip->WriteSequence);
+}
+
+/**
+ * _base_diag_reset - the "big hammer" start of day reset
+ * @ioc: per adapter object
+ *
+ * Return: 0 for success, non-zero for failure.
+ */
+static int
+_base_diag_reset(struct MPT3SAS_ADAPTER *ioc)
+{
+       u32 host_diagnostic;
+       u32 ioc_state;
+       u32 count;
+       u32 hcb_size;
+
+       ioc_info(ioc, "sending diag reset !!\n");
+
+       pci_cfg_access_lock(ioc->pdev);
+
+       drsprintk(ioc, ioc_info(ioc, "clear interrupts\n"));
+
+       mutex_lock(&ioc->hostdiag_unlock_mutex);
+       if (mpt3sas_base_unlock_and_get_host_diagnostic(ioc, &host_diagnostic))
+               goto out;
+
+       hcb_size = ioc->base_readl(&ioc->chip->HCBSize);
        drsprintk(ioc, ioc_info(ioc, "diag reset: issued\n"));
        writel(host_diagnostic | MPI2_DIAG_RESET_ADAPTER,
             &ioc->chip->HostDiagnostic);
 
-       /*This delay allows the chip PCIe hardware time to finish reset tasks*/
+       /* This delay allows the chip PCIe hardware time to finish reset tasks */
        msleep(MPI2_HARD_RESET_PCIE_FIRST_READ_DELAY_MICRO_SEC/1000);
 
        /* Approximately 300 second max wait */
        for (count = 0; count < (300000000 /
-               MPI2_HARD_RESET_PCIE_SECOND_READ_DELAY_MICRO_SEC); count++) {
+           MPI2_HARD_RESET_PCIE_SECOND_READ_DELAY_MICRO_SEC); count++) {
 
                host_diagnostic = ioc->base_readl_ext_retry(&ioc->chip->HostDiagnostic);
 
@@ -7990,13 +8023,15 @@ _base_diag_reset(struct MPT3SAS_ADAPTER *ioc)
                if (!(host_diagnostic & MPI2_DIAG_RESET_ADAPTER))
                        break;
 
-               msleep(MPI2_HARD_RESET_PCIE_SECOND_READ_DELAY_MICRO_SEC / 1000);
+               /* Wait to pass the second read delay window */
+               msleep(MPI2_HARD_RESET_PCIE_SECOND_READ_DELAY_MICRO_SEC/1000);
        }
 
        if (host_diagnostic & MPI2_DIAG_HCB_MODE) {
 
                drsprintk(ioc,
-                         ioc_info(ioc, "restart the adapter assuming the HCB Address points to good F/W\n"));
+                       ioc_info(ioc, "restart the adapter assuming the\n"
+                                       "HCB Address points to good F/W\n"));
                host_diagnostic &= ~MPI2_DIAG_BOOT_DEVICE_SELECT_MASK;
                host_diagnostic |= MPI2_DIAG_BOOT_DEVICE_SELECT_HCDW;
                writel(host_diagnostic, &ioc->chip->HostDiagnostic);
@@ -8010,9 +8045,8 @@ _base_diag_reset(struct MPT3SAS_ADAPTER *ioc)
        writel(host_diagnostic & ~MPI2_DIAG_HOLD_IOC_RESET,
            &ioc->chip->HostDiagnostic);
 
-       drsprintk(ioc,
-                 ioc_info(ioc, "disable writes to the diagnostic register\n"));
-       writel(MPI2_WRSEQ_FLUSH_KEY_VALUE, &ioc->chip->WriteSequence);
+       mpt3sas_base_lock_host_diagnostic(ioc);
+       mutex_unlock(&ioc->hostdiag_unlock_mutex);
 
        drsprintk(ioc, ioc_info(ioc, "Wait for FW to go to the READY state\n"));
        ioc_state = _base_wait_on_iocstate(ioc, MPI2_IOC_STATE_READY, 20);
@@ -8030,6 +8064,7 @@ _base_diag_reset(struct MPT3SAS_ADAPTER *ioc)
  out:
        pci_cfg_access_unlock(ioc->pdev);
        ioc_err(ioc, "diag reset: FAILED\n");
+       mutex_unlock(&ioc->hostdiag_unlock_mutex);
        return -EFAULT;
 }
 
index 6d0bc8c6670028d621271190ac6e38a7a79a4fe6..bf100a4ebfc362c3ec77a566a583ae046e6990d6 100644 (file)
@@ -77,8 +77,8 @@
 #define MPT3SAS_DRIVER_NAME            "mpt3sas"
 #define MPT3SAS_AUTHOR "Avago Technologies <MPT-FusionLinux.pdl@avagotech.com>"
 #define MPT3SAS_DESCRIPTION    "LSI MPT Fusion SAS 3.0 Device Driver"
-#define MPT3SAS_DRIVER_VERSION         "43.100.00.00"
-#define MPT3SAS_MAJOR_VERSION          43
+#define MPT3SAS_DRIVER_VERSION         "48.100.00.00"
+#define MPT3SAS_MAJOR_VERSION          48
 #define MPT3SAS_MINOR_VERSION          100
 #define MPT3SAS_BUILD_VERSION          0
 #define MPT3SAS_RELEASE_VERSION        00
@@ -1366,6 +1366,7 @@ struct MPT3SAS_ADAPTER {
        u8              got_task_abort_from_ioctl;
 
        struct mutex    reset_in_progress_mutex;
+       struct mutex    hostdiag_unlock_mutex;
        spinlock_t      ioc_reset_in_progress_lock;
        u8              ioc_link_reset_in_progress;
 
@@ -1790,6 +1791,9 @@ void mpt3sas_base_disable_msix(struct MPT3SAS_ADAPTER *ioc);
 int mpt3sas_blk_mq_poll(struct Scsi_Host *shost, unsigned int queue_num);
 void mpt3sas_base_pause_mq_polling(struct MPT3SAS_ADAPTER *ioc);
 void mpt3sas_base_resume_mq_polling(struct MPT3SAS_ADAPTER *ioc);
+int mpt3sas_base_unlock_and_get_host_diagnostic(struct MPT3SAS_ADAPTER *ioc,
+       u32 *host_diagnostic);
+void mpt3sas_base_lock_host_diagnostic(struct MPT3SAS_ADAPTER *ioc);
 
 /* scsih shared API */
 struct scsi_cmnd *mpt3sas_scsih_scsi_lookup_get(struct MPT3SAS_ADAPTER *ioc,
index 147cb7088d55f4aaa4eb823b894a8a67dd22bf84..1c9fd26195b81230cda66851bc44f26c86d52415 100644 (file)
@@ -2543,6 +2543,56 @@ out:
        return 0;
 }
 
+/**
+ * _ctl_enable_diag_sbr_reload - enable sbr reload bit
+ * @ioc: per adapter object
+ * @arg: user space buffer containing ioctl content
+ *
+ * Enable the SBR reload bit
+ */
+static int
+_ctl_enable_diag_sbr_reload(struct MPT3SAS_ADAPTER *ioc, void __user *arg)
+{
+       u32 ioc_state, host_diagnostic;
+
+       if (ioc->shost_recovery ||
+           ioc->pci_error_recovery || ioc->is_driver_loading ||
+           ioc->remove_host)
+               return -EAGAIN;
+
+       ioc_state = mpt3sas_base_get_iocstate(ioc, 1);
+
+       if (ioc_state != MPI2_IOC_STATE_OPERATIONAL)
+               return -EFAULT;
+
+       host_diagnostic = ioc->base_readl(&ioc->chip->HostDiagnostic);
+
+       if (host_diagnostic & MPI2_DIAG_SBR_RELOAD)
+               return 0;
+
+       if (mutex_trylock(&ioc->hostdiag_unlock_mutex)) {
+               if (mpt3sas_base_unlock_and_get_host_diagnostic(ioc, &host_diagnostic)) {
+                       mutex_unlock(&ioc->hostdiag_unlock_mutex);
+                               return -EFAULT;
+               }
+       } else
+               return -EAGAIN;
+
+       host_diagnostic |= MPI2_DIAG_SBR_RELOAD;
+       writel(host_diagnostic, &ioc->chip->HostDiagnostic);
+       host_diagnostic = ioc->base_readl(&ioc->chip->HostDiagnostic);
+       mpt3sas_base_lock_host_diagnostic(ioc);
+       mutex_unlock(&ioc->hostdiag_unlock_mutex);
+
+       if (!(host_diagnostic & MPI2_DIAG_SBR_RELOAD)) {
+               ioc_err(ioc, "%s: Failed to set Diag SBR Reload Bit\n", __func__);
+               return -EFAULT;
+       }
+
+       ioc_info(ioc, "%s: Successfully set the Diag SBR Reload Bit\n", __func__);
+       return 0;
+}
+
 #ifdef CONFIG_COMPAT
 /**
  * _ctl_compat_mpt_command - convert 32bit pointers to 64bit.
@@ -2719,6 +2769,10 @@ _ctl_ioctl_main(struct file *file, unsigned int cmd, void __user *arg,
                if (_IOC_SIZE(cmd) == sizeof(struct mpt3_addnl_diag_query))
                        ret = _ctl_addnl_diag_query(ioc, arg);
                break;
+       case MPT3ENABLEDIAGSBRRELOAD:
+               if (_IOC_SIZE(cmd) == sizeof(struct mpt3_enable_diag_sbr_reload))
+                       ret = _ctl_enable_diag_sbr_reload(ioc, arg);
+               break;
        default:
                dctlprintk(ioc,
                           ioc_info(ioc, "unsupported ioctl opcode(0x%08x)\n",
index 8f6ffb40261c9a6f5f0eafc5a880177212338f9f..171709e910066a0ce7cb4e7332aad8096a05607c 100644 (file)
@@ -98,6 +98,8 @@
        struct mpt3_diag_read_buffer)
 #define MPT3ADDNLDIAGQUERY _IOWR(MPT3_MAGIC_NUMBER, 32, \
        struct mpt3_addnl_diag_query)
+#define MPT3ENABLEDIAGSBRRELOAD _IOWR(MPT3_MAGIC_NUMBER, 33, \
+       struct mpt3_enable_diag_sbr_reload)
 
 /* Trace Buffer default UniqueId */
 #define MPT2DIAGBUFFUNIQUEID (0x07075900)
@@ -448,4 +450,12 @@ struct mpt3_addnl_diag_query {
        uint32_t reserved2[2];
 };
 
+/**
+ * struct mpt3_enable_diag_sbr_reload - enable sbr reload
+ * @hdr - generic header
+ */
+struct mpt3_enable_diag_sbr_reload {
+       struct mpt3_ioctl_header hdr;
+};
+
 #endif /* MPT3SAS_CTL_H_INCLUDED */
index 51b5788da040ac79125eaf027048b85dca8e7e2a..ef8ee93005eae6db5123d369b9e583fed391b20d 100644 (file)
@@ -12240,6 +12240,7 @@ _scsih_probe(struct pci_dev *pdev, const struct pci_device_id *id)
 
        /* misc semaphores and spin locks */
        mutex_init(&ioc->reset_in_progress_mutex);
+       mutex_init(&ioc->hostdiag_unlock_mutex);
        /* initializing pci_access_mutex lock */
        mutex_init(&ioc->pci_access_mutex);
        spin_lock_init(&ioc->ioc_reset_in_progress_lock);
index 5c26a13ffbd2613fcf0117a42d81865dc84fbd00..7b27618fd7b2e82dcb3e4ef5299c8a2fd94a4a6f 100644 (file)
@@ -880,9 +880,9 @@ static ssize_t pm8001_show_update_fw(struct device *cdev,
        if (pm8001_ha->fw_status != FLASH_IN_PROGRESS)
                pm8001_ha->fw_status = FLASH_OK;
 
-       return snprintf(buf, PAGE_SIZE, "status=%x %s\n",
-                       flash_error_table[i].err_code,
-                       flash_error_table[i].reason);
+       return sysfs_emit(buf, "status=%x %s\n",
+                         flash_error_table[i].err_code,
+                         flash_error_table[i].reason);
 }
 static DEVICE_ATTR(update_fw, S_IRUGO|S_IWUSR|S_IWGRP,
        pm8001_show_update_fw, pm8001_store_update_fw);
index 27bce80262c20c26f4cf7f29f40e2489fd47ae0b..8958547ac111ac251c57a10bd66e725422e8ed43 100644 (file)
@@ -2478,7 +2478,6 @@ qla1280_mailbox_command(struct scsi_qla_host *ha, uint8_t mr, uint16_t *mb)
        /* Load return mailbox registers. */
        optr = mb;
        iptr = (uint16_t *) &ha->mailbox_out[0];
-       mr = MAILBOX_REGISTER_COUNT;
        memcpy(optr, iptr, MAILBOX_REGISTER_COUNT * sizeof(uint16_t));
 
        if (ha->flags.reset_marker)
index 5d560d9b8944052c1f7da7a3ac39ad9ccd26f4ae..6177f4798f3ac98123ec6e4d5b0fedf3c13faea5 100644 (file)
@@ -1468,7 +1468,7 @@ static struct platform_driver qpti_sbus_driver = {
 module_platform_driver(qpti_sbus_driver);
 
 MODULE_DESCRIPTION("QlogicISP SBUS driver");
-MODULE_AUTHOR("David S. Miller (davem@davemloft.net)");
+MODULE_AUTHOR("David S. Miller <davem@davemloft.net>");
 MODULE_LICENSE("GPL");
 MODULE_VERSION("2.1");
 MODULE_FIRMWARE("qlogic/isp1000.bin");
index d03d66f11493012815d7c5bb5fbdfd35c9f6dd67..914d9c12e7412de0123bcb8b04e73ffee33c430a 100644 (file)
@@ -899,7 +899,7 @@ static int poll_queues; /* iouring iopoll interface.*/
 static char sdebug_proc_name[] = MY_NAME;
 static const char *my_name = MY_NAME;
 
-static struct bus_type pseudo_lld_bus;
+static const struct bus_type pseudo_lld_bus;
 
 static struct device_driver sdebug_driverfs_driver = {
        .name           = sdebug_proc_name,
@@ -8405,7 +8405,7 @@ static void sdebug_driver_remove(struct device *dev)
        scsi_host_put(sdbg_host->shost);
 }
 
-static struct bus_type pseudo_lld_bus = {
+static const struct bus_type pseudo_lld_bus = {
        .name = "pseudo",
        .probe = sdebug_driver_probe,
        .remove = sdebug_driver_remove,
index 3fcaf10a9dfe76daa651db71c9a2f0f6b1bbd1b0..ba7237e838633c95528c742db4efc94d209ecc0f 100644 (file)
@@ -551,9 +551,9 @@ static int scsi_dev_info_list_add_str(char *dev_list)
                if (model)
                        strflags = strsep(&next, next_check);
                if (!model || !strflags) {
-                       printk(KERN_ERR "%s: bad dev info string '%s' '%s'"
-                              " '%s'\n", __func__, vendor, model,
-                              strflags);
+                       pr_err("%s: bad dev info string '%s' '%s' '%s'\n",
+                              __func__, vendor, model ? model : "",
+                              strflags ? strflags : "");
                        res = -EINVAL;
                } else
                        res = scsi_dev_info_list_add(0 /* compatible */, vendor,
index df5ac03d5d6c2eb5233ad7fcfdad37a1e487b4e6..2e28e2360c85740d0b3ebb391785ee111c78d47b 100644 (file)
@@ -184,6 +184,92 @@ void scsi_queue_insert(struct scsi_cmnd *cmd, int reason)
        __scsi_queue_insert(cmd, reason, true);
 }
 
+void scsi_failures_reset_retries(struct scsi_failures *failures)
+{
+       struct scsi_failure *failure;
+
+       failures->total_retries = 0;
+
+       for (failure = failures->failure_definitions; failure->result;
+            failure++)
+               failure->retries = 0;
+}
+EXPORT_SYMBOL_GPL(scsi_failures_reset_retries);
+
+/**
+ * scsi_check_passthrough - Determine if passthrough scsi_cmnd needs a retry.
+ * @scmd: scsi_cmnd to check.
+ * @failures: scsi_failures struct that lists failures to check for.
+ *
+ * Returns -EAGAIN if the caller should retry else 0.
+ */
+static int scsi_check_passthrough(struct scsi_cmnd *scmd,
+                                 struct scsi_failures *failures)
+{
+       struct scsi_failure *failure;
+       struct scsi_sense_hdr sshdr;
+       enum sam_status status;
+
+       if (!failures)
+               return 0;
+
+       for (failure = failures->failure_definitions; failure->result;
+            failure++) {
+               if (failure->result == SCMD_FAILURE_RESULT_ANY)
+                       goto maybe_retry;
+
+               if (host_byte(scmd->result) &&
+                   host_byte(scmd->result) == host_byte(failure->result))
+                       goto maybe_retry;
+
+               status = status_byte(scmd->result);
+               if (!status)
+                       continue;
+
+               if (failure->result == SCMD_FAILURE_STAT_ANY &&
+                   !scsi_status_is_good(scmd->result))
+                       goto maybe_retry;
+
+               if (status != status_byte(failure->result))
+                       continue;
+
+               if (status_byte(failure->result) != SAM_STAT_CHECK_CONDITION ||
+                   failure->sense == SCMD_FAILURE_SENSE_ANY)
+                       goto maybe_retry;
+
+               if (!scsi_command_normalize_sense(scmd, &sshdr))
+                       return 0;
+
+               if (failure->sense != sshdr.sense_key)
+                       continue;
+
+               if (failure->asc == SCMD_FAILURE_ASC_ANY)
+                       goto maybe_retry;
+
+               if (failure->asc != sshdr.asc)
+                       continue;
+
+               if (failure->ascq == SCMD_FAILURE_ASCQ_ANY ||
+                   failure->ascq == sshdr.ascq)
+                       goto maybe_retry;
+       }
+
+       return 0;
+
+maybe_retry:
+       if (failure->allowed) {
+               if (failure->allowed == SCMD_FAILURE_NO_LIMIT ||
+                   ++failure->retries <= failure->allowed)
+                       return -EAGAIN;
+       } else {
+               if (failures->total_allowed == SCMD_FAILURE_NO_LIMIT ||
+                   ++failures->total_retries <= failures->total_allowed)
+                       return -EAGAIN;
+       }
+
+       return 0;
+}
+
 /**
  * scsi_execute_cmd - insert request and wait for the result
  * @sdev:      scsi_device
@@ -192,7 +278,7 @@ void scsi_queue_insert(struct scsi_cmnd *cmd, int reason)
  * @buffer:    data buffer
  * @bufflen:   len of buffer
  * @timeout:   request timeout in HZ
- * @retries:   number of times to retry request
+ * @ml_retries:        number of times SCSI midlayer will retry request
  * @args:      Optional args. See struct definition for field descriptions
  *
  * Returns the scsi_cmnd result field if a command was executed, or a negative
@@ -200,7 +286,7 @@ void scsi_queue_insert(struct scsi_cmnd *cmd, int reason)
  */
 int scsi_execute_cmd(struct scsi_device *sdev, const unsigned char *cmd,
                     blk_opf_t opf, void *buffer, unsigned int bufflen,
-                    int timeout, int retries,
+                    int timeout, int ml_retries,
                     const struct scsi_exec_args *args)
 {
        static const struct scsi_exec_args default_args;
@@ -214,6 +300,7 @@ int scsi_execute_cmd(struct scsi_device *sdev, const unsigned char *cmd,
                              args->sense_len != SCSI_SENSE_BUFFERSIZE))
                return -EINVAL;
 
+retry:
        req = scsi_alloc_request(sdev->request_queue, opf, args->req_flags);
        if (IS_ERR(req))
                return PTR_ERR(req);
@@ -227,7 +314,7 @@ int scsi_execute_cmd(struct scsi_device *sdev, const unsigned char *cmd,
        scmd = blk_mq_rq_to_pdu(req);
        scmd->cmd_len = COMMAND_SIZE(cmd[0]);
        memcpy(scmd->cmnd, cmd, scmd->cmd_len);
-       scmd->allowed = retries;
+       scmd->allowed = ml_retries;
        scmd->flags |= args->scmd_flags;
        req->timeout = timeout;
        req->rq_flags |= RQF_QUIET;
@@ -237,6 +324,11 @@ int scsi_execute_cmd(struct scsi_device *sdev, const unsigned char *cmd,
         */
        blk_execute_rq(req, true);
 
+       if (scsi_check_passthrough(scmd, args->failures) == -EAGAIN) {
+               blk_mq_free_request(req);
+               goto retry;
+       }
+
        /*
         * Some devices (USB mass-storage in particular) may transfer
         * garbage data together with a residue indicating that the data
@@ -2172,11 +2264,25 @@ scsi_mode_sense(struct scsi_device *sdev, int dbd, int modepage, int subpage,
        unsigned char cmd[12];
        int use_10_for_ms;
        int header_length;
-       int result, retry_count = retries;
+       int result;
        struct scsi_sense_hdr my_sshdr;
+       struct scsi_failure failure_defs[] = {
+               {
+                       .sense = UNIT_ATTENTION,
+                       .asc = SCMD_FAILURE_ASC_ANY,
+                       .ascq = SCMD_FAILURE_ASCQ_ANY,
+                       .allowed = retries,
+                       .result = SAM_STAT_CHECK_CONDITION,
+               },
+               {}
+       };
+       struct scsi_failures failures = {
+               .failure_definitions = failure_defs,
+       };
        const struct scsi_exec_args exec_args = {
                /* caller might not be interested in sense, but we need it */
                .sshdr = sshdr ? : &my_sshdr,
+               .failures = &failures,
        };
 
        memset(data, 0, sizeof(*data));
@@ -2238,12 +2344,6 @@ scsi_mode_sense(struct scsi_device *sdev, int dbd, int modepage, int subpage,
                                        goto retry;
                                }
                        }
-                       if (scsi_status_is_check_condition(result) &&
-                           sshdr->sense_key == UNIT_ATTENTION &&
-                           retry_count) {
-                               retry_count--;
-                               goto retry;
-                       }
                }
                return -EIO;
        }
@@ -3336,3 +3436,7 @@ void scsi_build_sense(struct scsi_cmnd *scmd, int desc, u8 key, u8 asc, u8 ascq)
        scmd->result = SAM_STAT_CHECK_CONDITION;
 }
 EXPORT_SYMBOL_GPL(scsi_build_sense);
+
+#ifdef CONFIG_SCSI_LIB_KUNIT_TEST
+#include "scsi_lib_test.c"
+#endif
diff --git a/drivers/scsi/scsi_lib_test.c b/drivers/scsi/scsi_lib_test.c
new file mode 100644 (file)
index 0000000..9983442
--- /dev/null
@@ -0,0 +1,330 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * KUnit tests for scsi_lib.c.
+ *
+ * Copyright (C) 2023, Oracle Corporation
+ */
+#include <kunit/test.h>
+
+#include <scsi/scsi_proto.h>
+#include <scsi/scsi_cmnd.h>
+#include <scsi/scsi_device.h>
+
+#define SCSI_LIB_TEST_MAX_ALLOWED 3
+#define SCSI_LIB_TEST_TOTAL_MAX_ALLOWED 5
+
+static void scsi_lib_test_multiple_sense(struct kunit *test)
+{
+       struct scsi_failure multiple_sense_failure_defs[] = {
+               {
+                       .sense = DATA_PROTECT,
+                       .asc = 0x1,
+                       .ascq = 0x1,
+                       .result = SAM_STAT_CHECK_CONDITION,
+               },
+               {
+                       .sense = UNIT_ATTENTION,
+                       .asc = 0x11,
+                       .ascq = 0x0,
+                       .allowed = SCSI_LIB_TEST_MAX_ALLOWED,
+                       .result = SAM_STAT_CHECK_CONDITION,
+               },
+               {
+                       .sense = NOT_READY,
+                       .asc = 0x11,
+                       .ascq = 0x22,
+                       .allowed = SCSI_LIB_TEST_MAX_ALLOWED,
+                       .result = SAM_STAT_CHECK_CONDITION,
+               },
+               {
+                       .sense = ABORTED_COMMAND,
+                       .asc = 0x11,
+                       .ascq = SCMD_FAILURE_ASCQ_ANY,
+                       .allowed = SCSI_LIB_TEST_MAX_ALLOWED,
+                       .result = SAM_STAT_CHECK_CONDITION,
+               },
+               {
+                       .sense = HARDWARE_ERROR,
+                       .asc = SCMD_FAILURE_ASC_ANY,
+                       .allowed = SCSI_LIB_TEST_MAX_ALLOWED,
+                       .result = SAM_STAT_CHECK_CONDITION,
+               },
+               {
+                       .sense = ILLEGAL_REQUEST,
+                       .asc = 0x91,
+                       .ascq = 0x36,
+                       .allowed = SCSI_LIB_TEST_MAX_ALLOWED,
+                       .result = SAM_STAT_CHECK_CONDITION,
+               },
+               {}
+       };
+       struct scsi_failures failures = {
+               .failure_definitions = multiple_sense_failure_defs,
+       };
+       u8 sense[SCSI_SENSE_BUFFERSIZE] = {};
+       struct scsi_cmnd sc = {
+               .sense_buffer = sense,
+       };
+       int i;
+
+       /* Match end of array */
+       scsi_build_sense(&sc, 0, ILLEGAL_REQUEST, 0x91, 0x36);
+       KUNIT_EXPECT_EQ(test, -EAGAIN, scsi_check_passthrough(&sc, &failures));
+       /* Basic match in array */
+       scsi_build_sense(&sc, 0, UNIT_ATTENTION, 0x11, 0x0);
+       KUNIT_EXPECT_EQ(test, -EAGAIN, scsi_check_passthrough(&sc, &failures));
+       /* No matching sense entry */
+       scsi_build_sense(&sc, 0, MISCOMPARE, 0x11, 0x11);
+       KUNIT_EXPECT_EQ(test, 0, scsi_check_passthrough(&sc, &failures));
+       /* Match using SCMD_FAILURE_ASCQ_ANY */
+       scsi_build_sense(&sc, 0, ABORTED_COMMAND, 0x11, 0x22);
+       KUNIT_EXPECT_EQ(test, -EAGAIN, scsi_check_passthrough(&sc, &failures));
+       /* Fail to match */
+       scsi_build_sense(&sc, 0, ABORTED_COMMAND, 0x22, 0x22);
+       KUNIT_EXPECT_EQ(test, 0, scsi_check_passthrough(&sc, &failures));
+       /* Match using SCMD_FAILURE_ASC_ANY */
+       scsi_build_sense(&sc, 0, HARDWARE_ERROR, 0x11, 0x22);
+       KUNIT_EXPECT_EQ(test, -EAGAIN, scsi_check_passthrough(&sc, &failures));
+       /* No matching status entry */
+       sc.result = SAM_STAT_RESERVATION_CONFLICT;
+       KUNIT_EXPECT_EQ(test, 0, scsi_check_passthrough(&sc, &failures));
+
+       /* Test hitting allowed limit */
+       scsi_build_sense(&sc, 0, NOT_READY, 0x11, 0x22);
+       for (i = 0; i < SCSI_LIB_TEST_MAX_ALLOWED; i++)
+               KUNIT_EXPECT_EQ(test, -EAGAIN, scsi_check_passthrough(&sc,
+                               &failures));
+       KUNIT_EXPECT_EQ(test, 0, scsi_check_passthrough(&sc, &failures));
+
+       /* reset retries so we can retest */
+       failures.failure_definitions = multiple_sense_failure_defs;
+       scsi_failures_reset_retries(&failures);
+
+       /* Test no retries allowed */
+       scsi_build_sense(&sc, 0, DATA_PROTECT, 0x1, 0x1);
+       KUNIT_EXPECT_EQ(test, 0, scsi_check_passthrough(&sc, &failures));
+}
+
+static void scsi_lib_test_any_sense(struct kunit *test)
+{
+       struct scsi_failure any_sense_failure_defs[] = {
+               {
+                       .result = SCMD_FAILURE_SENSE_ANY,
+                       .allowed = SCSI_LIB_TEST_MAX_ALLOWED,
+               },
+               {}
+       };
+       struct scsi_failures failures = {
+               .failure_definitions = any_sense_failure_defs,
+       };
+       u8 sense[SCSI_SENSE_BUFFERSIZE] = {};
+       struct scsi_cmnd sc = {
+               .sense_buffer = sense,
+       };
+
+       /* Match using SCMD_FAILURE_SENSE_ANY */
+       failures.failure_definitions = any_sense_failure_defs;
+       scsi_build_sense(&sc, 0, MEDIUM_ERROR, 0x11, 0x22);
+       KUNIT_EXPECT_EQ(test, -EAGAIN, scsi_check_passthrough(&sc, &failures));
+}
+
+static void scsi_lib_test_host(struct kunit *test)
+{
+       struct scsi_failure retryable_host_failure_defs[] = {
+               {
+                       .result = DID_TRANSPORT_DISRUPTED << 16,
+                       .allowed = SCSI_LIB_TEST_MAX_ALLOWED,
+               },
+               {
+                       .result = DID_TIME_OUT << 16,
+                       .allowed = SCSI_LIB_TEST_MAX_ALLOWED,
+               },
+               {}
+       };
+       struct scsi_failures failures = {
+               .failure_definitions = retryable_host_failure_defs,
+       };
+       u8 sense[SCSI_SENSE_BUFFERSIZE] = {};
+       struct scsi_cmnd sc = {
+               .sense_buffer = sense,
+       };
+
+       /* No matching host byte entry */
+       failures.failure_definitions = retryable_host_failure_defs;
+       sc.result = DID_NO_CONNECT << 16;
+       KUNIT_EXPECT_EQ(test, 0, scsi_check_passthrough(&sc, &failures));
+       /* Matching host byte entry */
+       sc.result = DID_TIME_OUT << 16;
+       KUNIT_EXPECT_EQ(test, -EAGAIN, scsi_check_passthrough(&sc, &failures));
+}
+
+static void scsi_lib_test_any_failure(struct kunit *test)
+{
+       struct scsi_failure any_failure_defs[] = {
+               {
+                       .result = SCMD_FAILURE_RESULT_ANY,
+                       .allowed = SCSI_LIB_TEST_MAX_ALLOWED,
+               },
+               {}
+       };
+       struct scsi_failures failures = {
+               .failure_definitions = any_failure_defs,
+       };
+       u8 sense[SCSI_SENSE_BUFFERSIZE] = {};
+       struct scsi_cmnd sc = {
+               .sense_buffer = sense,
+       };
+
+       /* Match SCMD_FAILURE_RESULT_ANY */
+       failures.failure_definitions = any_failure_defs;
+       sc.result = DID_TRANSPORT_FAILFAST << 16;
+       KUNIT_EXPECT_EQ(test, -EAGAIN, scsi_check_passthrough(&sc, &failures));
+}
+
+static void scsi_lib_test_any_status(struct kunit *test)
+{
+       struct scsi_failure any_status_failure_defs[] = {
+               {
+                       .result = SCMD_FAILURE_STAT_ANY,
+                       .allowed = SCSI_LIB_TEST_MAX_ALLOWED,
+               },
+               {}
+       };
+       struct scsi_failures failures = {
+               .failure_definitions = any_status_failure_defs,
+       };
+       u8 sense[SCSI_SENSE_BUFFERSIZE] = {};
+       struct scsi_cmnd sc = {
+               .sense_buffer = sense,
+       };
+
+       /* Test any status handling */
+       failures.failure_definitions = any_status_failure_defs;
+       sc.result = SAM_STAT_RESERVATION_CONFLICT;
+       KUNIT_EXPECT_EQ(test, -EAGAIN, scsi_check_passthrough(&sc, &failures));
+}
+
+static void scsi_lib_test_total_allowed(struct kunit *test)
+{
+       struct scsi_failure total_allowed_defs[] = {
+               {
+                       .sense = UNIT_ATTENTION,
+                       .asc = SCMD_FAILURE_ASC_ANY,
+                       .ascq = SCMD_FAILURE_ASCQ_ANY,
+                       .result = SAM_STAT_CHECK_CONDITION,
+               },
+               /* Fail all CCs except the UA above */
+               {
+                       .sense = SCMD_FAILURE_SENSE_ANY,
+                       .result = SAM_STAT_CHECK_CONDITION,
+               },
+               /* Retry any other errors not listed above */
+               {
+                       .result = SCMD_FAILURE_RESULT_ANY,
+               },
+               {}
+       };
+       struct scsi_failures failures = {
+               .failure_definitions = total_allowed_defs,
+       };
+       u8 sense[SCSI_SENSE_BUFFERSIZE] = {};
+       struct scsi_cmnd sc = {
+               .sense_buffer = sense,
+       };
+       int i;
+
+       /* Test total_allowed */
+       failures.failure_definitions = total_allowed_defs;
+       scsi_failures_reset_retries(&failures);
+       failures.total_allowed = SCSI_LIB_TEST_TOTAL_MAX_ALLOWED;
+
+       scsi_build_sense(&sc, 0, UNIT_ATTENTION, 0x28, 0x0);
+       for (i = 0; i < SCSI_LIB_TEST_TOTAL_MAX_ALLOWED; i++)
+               /* Retry since we under the total_allowed limit */
+               KUNIT_EXPECT_EQ(test, -EAGAIN, scsi_check_passthrough(&sc,
+                               &failures));
+       sc.result = DID_TIME_OUT << 16;
+       /* We have now hit the total_allowed limit so no more retries */
+       KUNIT_EXPECT_EQ(test, 0, scsi_check_passthrough(&sc, &failures));
+}
+
+static void scsi_lib_test_mixed_total(struct kunit *test)
+{
+       struct scsi_failure mixed_total_defs[] = {
+               {
+                       .sense = UNIT_ATTENTION,
+                       .asc = 0x28,
+                       .result = SAM_STAT_CHECK_CONDITION,
+               },
+               {
+                       .sense = UNIT_ATTENTION,
+                       .asc = 0x29,
+                       .result = SAM_STAT_CHECK_CONDITION,
+               },
+               {
+                       .allowed = 1,
+                       .result = DID_TIME_OUT << 16,
+               },
+               {}
+       };
+       u8 sense[SCSI_SENSE_BUFFERSIZE] = {};
+       struct scsi_failures failures = {
+               .failure_definitions = mixed_total_defs,
+       };
+       struct scsi_cmnd sc = {
+               .sense_buffer = sense,
+       };
+       int i;
+
+       /*
+        * Test total_allowed when there is a mix of per failure allowed
+        * and total_allowed limits.
+        */
+       failures.failure_definitions = mixed_total_defs;
+       scsi_failures_reset_retries(&failures);
+       failures.total_allowed = SCSI_LIB_TEST_TOTAL_MAX_ALLOWED;
+
+       scsi_build_sense(&sc, 0, UNIT_ATTENTION, 0x28, 0x0);
+       for (i = 0; i < SCSI_LIB_TEST_TOTAL_MAX_ALLOWED; i++)
+               /* Retry since we under the total_allowed limit */
+               KUNIT_EXPECT_EQ(test, -EAGAIN, scsi_check_passthrough(&sc,
+                               &failures));
+       /* Do not retry since we are now over total_allowed limit */
+       KUNIT_EXPECT_EQ(test, 0, scsi_check_passthrough(&sc, &failures));
+
+       scsi_failures_reset_retries(&failures);
+       scsi_build_sense(&sc, 0, UNIT_ATTENTION, 0x28, 0x0);
+       for (i = 0; i < SCSI_LIB_TEST_TOTAL_MAX_ALLOWED; i++)
+               /* Retry since we under the total_allowed limit */
+               KUNIT_EXPECT_EQ(test, -EAGAIN, scsi_check_passthrough(&sc,
+                               &failures));
+       sc.result = DID_TIME_OUT << 16;
+       /* Retry because this failure has a per failure limit */
+       KUNIT_EXPECT_EQ(test, -EAGAIN, scsi_check_passthrough(&sc, &failures));
+       scsi_build_sense(&sc, 0, UNIT_ATTENTION, 0x29, 0x0);
+       /* total_allowed is now hit so no more retries */
+       KUNIT_EXPECT_EQ(test, 0, scsi_check_passthrough(&sc, &failures));
+}
+
+static void scsi_lib_test_check_passthough(struct kunit *test)
+{
+       scsi_lib_test_multiple_sense(test);
+       scsi_lib_test_any_sense(test);
+       scsi_lib_test_host(test);
+       scsi_lib_test_any_failure(test);
+       scsi_lib_test_any_status(test);
+       scsi_lib_test_total_allowed(test);
+       scsi_lib_test_mixed_total(test);
+}
+
+static struct kunit_case scsi_lib_test_cases[] = {
+       KUNIT_CASE(scsi_lib_test_check_passthough),
+       {}
+};
+
+static struct kunit_suite scsi_lib_test_suite = {
+       .name = "scsi_lib",
+       .test_cases = scsi_lib_test_cases,
+};
+
+kunit_test_suite(scsi_lib_test_suite);
index 1fbfe1b52c9f1a906ea6b0da7a6b273e2972a903..9fc397a9ce7a4f91cb568a040929b610a24d4e97 100644 (file)
@@ -156,7 +156,7 @@ extern void scsi_sysfs_device_initialize(struct scsi_device *);
 extern struct scsi_transport_template blank_transport_template;
 extern void __scsi_remove_device(struct scsi_device *);
 
-extern struct bus_type scsi_bus_type;
+extern const struct bus_type scsi_bus_type;
 extern const struct attribute_group *scsi_shost_groups[];
 
 /* scsi_netlink.c */
index 9969f4e2f1c3d9c656076e3e540bd17c8352c2af..8d06475de17a33a26921a1ff70c57f759986973f 100644 (file)
@@ -412,7 +412,7 @@ static void scsi_target_dev_release(struct device *dev)
        put_device(parent);
 }
 
-static struct device_type scsi_target_type = {
+static const struct device_type scsi_target_type = {
        .name =         "scsi_target",
        .release =      scsi_target_dev_release,
 };
@@ -626,6 +626,7 @@ void scsi_sanitize_inquiry_string(unsigned char *s, int len)
 }
 EXPORT_SYMBOL(scsi_sanitize_inquiry_string);
 
+
 /**
  * scsi_probe_lun - probe a single LUN using a SCSI INQUIRY
  * @sdev:      scsi_device to probe
@@ -647,10 +648,36 @@ static int scsi_probe_lun(struct scsi_device *sdev, unsigned char *inq_result,
        int first_inquiry_len, try_inquiry_len, next_inquiry_len;
        int response_len = 0;
        int pass, count, result, resid;
-       struct scsi_sense_hdr sshdr;
+       struct scsi_failure failure_defs[] = {
+               /*
+                * not-ready to ready transition [asc/ascq=0x28/0x0] or
+                * power-on, reset [asc/ascq=0x29/0x0], continue. INQUIRY
+                * should not yield UNIT_ATTENTION but many buggy devices do
+                * so anyway.
+                */
+               {
+                       .sense = UNIT_ATTENTION,
+                       .asc = 0x28,
+                       .result = SAM_STAT_CHECK_CONDITION,
+               },
+               {
+                       .sense = UNIT_ATTENTION,
+                       .asc = 0x29,
+                       .result = SAM_STAT_CHECK_CONDITION,
+               },
+               {
+                       .allowed = 1,
+                       .result = DID_TIME_OUT << 16,
+               },
+               {}
+       };
+       struct scsi_failures failures = {
+               .total_allowed = 3,
+               .failure_definitions = failure_defs,
+       };
        const struct scsi_exec_args exec_args = {
-               .sshdr = &sshdr,
                .resid = &resid,
+               .failures = &failures,
        };
 
        *bflags = 0;
@@ -668,6 +695,8 @@ static int scsi_probe_lun(struct scsi_device *sdev, unsigned char *inq_result,
                                pass, try_inquiry_len));
 
        /* Each pass gets up to three chances to ignore Unit Attention */
+       scsi_failures_reset_retries(&failures);
+
        for (count = 0; count < 3; ++count) {
                memset(scsi_cmd, 0, 6);
                scsi_cmd[0] = INQUIRY;
@@ -684,22 +713,7 @@ static int scsi_probe_lun(struct scsi_device *sdev, unsigned char *inq_result,
                                "scsi scan: INQUIRY %s with code 0x%x\n",
                                result ? "failed" : "successful", result));
 
-               if (result > 0) {
-                       /*
-                        * not-ready to ready transition [asc/ascq=0x28/0x0]
-                        * or power-on, reset [asc/ascq=0x29/0x0], continue.
-                        * INQUIRY should not yield UNIT_ATTENTION
-                        * but many buggy devices do so anyway. 
-                        */
-                       if (scsi_status_is_check_condition(result) &&
-                           scsi_sense_valid(&sshdr)) {
-                               if ((sshdr.sense_key == UNIT_ATTENTION) &&
-                                   ((sshdr.asc == 0x28) ||
-                                    (sshdr.asc == 0x29)) &&
-                                   (sshdr.ascq == 0))
-                                       continue;
-                       }
-               } else if (result == 0) {
+               if (result == 0) {
                        /*
                         * if nothing was transferred, we try
                         * again. It's a workaround for some USB
@@ -1402,14 +1416,34 @@ static int scsi_report_lun_scan(struct scsi_target *starget, blist_flags_t bflag
        unsigned int length;
        u64 lun;
        unsigned int num_luns;
-       unsigned int retries;
        int result;
        struct scsi_lun *lunp, *lun_data;
-       struct scsi_sense_hdr sshdr;
        struct scsi_device *sdev;
        struct Scsi_Host *shost = dev_to_shost(&starget->dev);
+       struct scsi_failure failure_defs[] = {
+               {
+                       .sense = UNIT_ATTENTION,
+                       .asc = SCMD_FAILURE_ASC_ANY,
+                       .ascq = SCMD_FAILURE_ASCQ_ANY,
+                       .result = SAM_STAT_CHECK_CONDITION,
+               },
+               /* Fail all CCs except the UA above */
+               {
+                       .sense = SCMD_FAILURE_SENSE_ANY,
+                       .result = SAM_STAT_CHECK_CONDITION,
+               },
+               /* Retry any other errors not listed above */
+               {
+                       .result = SCMD_FAILURE_RESULT_ANY,
+               },
+               {}
+       };
+       struct scsi_failures failures = {
+               .total_allowed = 3,
+               .failure_definitions = failure_defs,
+       };
        const struct scsi_exec_args exec_args = {
-               .sshdr = &sshdr,
+               .failures = &failures,
        };
        int ret = 0;
 
@@ -1480,29 +1514,18 @@ retry:
         * should come through as a check condition, and will not generate
         * a retry.
         */
-       for (retries = 0; retries < 3; retries++) {
-               SCSI_LOG_SCAN_BUS(3, sdev_printk (KERN_INFO, sdev,
-                               "scsi scan: Sending REPORT LUNS to (try %d)\n",
-                               retries));
-
-               result = scsi_execute_cmd(sdev, scsi_cmd, REQ_OP_DRV_IN,
-                                         lun_data, length,
-                                         SCSI_REPORT_LUNS_TIMEOUT, 3,
-                                         &exec_args);
+       scsi_failures_reset_retries(&failures);
 
-               SCSI_LOG_SCAN_BUS(3, sdev_printk (KERN_INFO, sdev,
-                               "scsi scan: REPORT LUNS"
-                               " %s (try %d) result 0x%x\n",
-                               result ?  "failed" : "successful",
-                               retries, result));
-               if (result == 0)
-                       break;
-               else if (scsi_sense_valid(&sshdr)) {
-                       if (sshdr.sense_key != UNIT_ATTENTION)
-                               break;
-               }
-       }
+       SCSI_LOG_SCAN_BUS(3, sdev_printk (KERN_INFO, sdev,
+                         "scsi scan: Sending REPORT LUNS\n"));
 
+       result = scsi_execute_cmd(sdev, scsi_cmd, REQ_OP_DRV_IN, lun_data,
+                                 length, SCSI_REPORT_LUNS_TIMEOUT, 3,
+                                 &exec_args);
+
+       SCSI_LOG_SCAN_BUS(3, sdev_printk (KERN_INFO, sdev,
+                         "scsi scan: REPORT LUNS  %s result 0x%x\n",
+                         result ?  "failed" : "successful", result));
        if (result) {
                /*
                 * The device probably does not support a REPORT LUN command
index 24f6eefb68030d39869934e6fbefc9fce11d640b..49dd34426d5e06a20d454aa5ca39c3a05b7b6fa3 100644 (file)
@@ -27,7 +27,7 @@
 #include "scsi_priv.h"
 #include "scsi_logging.h"
 
-static struct device_type scsi_dev_type;
+static const struct device_type scsi_dev_type;
 
 static const struct {
        enum scsi_device_state  value;
@@ -549,7 +549,7 @@ static int scsi_bus_uevent(const struct device *dev, struct kobj_uevent_env *env
        return 0;
 }
 
-struct bus_type scsi_bus_type = {
+const struct bus_type scsi_bus_type = {
         .name          = "scsi",
         .match         = scsi_bus_match,
        .uevent         = scsi_bus_uevent,
@@ -1626,7 +1626,7 @@ int scsi_sysfs_add_host(struct Scsi_Host *shost)
        return 0;
 }
 
-static struct device_type scsi_dev_type = {
+static const struct device_type scsi_dev_type = {
        .name =         "scsi_device",
        .release =      scsi_device_dev_release,
        .groups =       scsi_sdev_attr_groups,
index 3075b2ddf7a6975e203f9453aa56e1feead0ffc2..af3ac6346796ba624820086c7a5293fdcc2cba73 100644 (file)
@@ -1201,7 +1201,7 @@ static const struct device_type iscsi_flashnode_conn_dev_type = {
        .release = iscsi_flashnode_conn_release,
 };
 
-static struct bus_type iscsi_flashnode_bus;
+static const struct bus_type iscsi_flashnode_bus;
 
 int iscsi_flashnode_bus_match(struct device *dev,
                                     struct device_driver *drv)
@@ -1212,7 +1212,7 @@ int iscsi_flashnode_bus_match(struct device *dev,
 }
 EXPORT_SYMBOL_GPL(iscsi_flashnode_bus_match);
 
-static struct bus_type iscsi_flashnode_bus = {
+static const struct bus_type iscsi_flashnode_bus = {
        .name = "iscsi_flashnode",
        .match = &iscsi_flashnode_bus_match,
 };
index f668c1c0a98f20bc6b8923687ca451f10351b2aa..64852e6df3e327d97aa74d38f932e11551a69b41 100644 (file)
@@ -108,29 +108,30 @@ static int spi_execute(struct scsi_device *sdev, const void *cmd,
                       enum req_op op, void *buffer, unsigned int bufflen,
                       struct scsi_sense_hdr *sshdr)
 {
-       int i, result;
-       struct scsi_sense_hdr sshdr_tmp;
        blk_opf_t opf = op | REQ_FAILFAST_DEV | REQ_FAILFAST_TRANSPORT |
                        REQ_FAILFAST_DRIVER;
+       struct scsi_failure failure_defs[] = {
+               {
+                       .sense = UNIT_ATTENTION,
+                       .asc = SCMD_FAILURE_ASC_ANY,
+                       .ascq = SCMD_FAILURE_ASCQ_ANY,
+                       .allowed = DV_RETRIES,
+                       .result = SAM_STAT_CHECK_CONDITION,
+               },
+               {}
+       };
+       struct scsi_failures failures = {
+               .failure_definitions = failure_defs,
+       };
        const struct scsi_exec_args exec_args = {
+               /* bypass the SDEV_QUIESCE state with BLK_MQ_REQ_PM */
                .req_flags = BLK_MQ_REQ_PM,
-               .sshdr = sshdr ? : &sshdr_tmp,
+               .sshdr = sshdr,
+               .failures = &failures,
        };
 
-       sshdr = exec_args.sshdr;
-
-       for(i = 0; i < DV_RETRIES; i++) {
-               /*
-                * The purpose of the RQF_PM flag below is to bypass the
-                * SDEV_QUIESCE state.
-                */
-               result = scsi_execute_cmd(sdev, cmd, opf, buffer, bufflen,
-                                         DV_TIMEOUT, 1, &exec_args);
-               if (result < 0 || !scsi_sense_valid(sshdr) ||
-                   sshdr->sense_key != UNIT_ATTENTION)
-                       break;
-       }
-       return result;
+       return scsi_execute_cmd(sdev, cmd, opf, buffer, bufflen, DV_TIMEOUT, 1,
+                               &exec_args);
 }
 
 static struct {
index bdd0acf7fa3cb130e64fac2aacf684aa5a91da8b..2cc73c650ca60a557fb2952dbed8a078ba58745b 100644 (file)
@@ -1645,36 +1645,35 @@ out:
 
 static int sd_sync_cache(struct scsi_disk *sdkp)
 {
-       int retries, res;
+       int res;
        struct scsi_device *sdp = sdkp->device;
        const int timeout = sdp->request_queue->rq_timeout
                * SD_FLUSH_TIMEOUT_MULTIPLIER;
+       /* Leave the rest of the command zero to indicate flush everything. */
+       const unsigned char cmd[16] = { sdp->use_16_for_sync ?
+                               SYNCHRONIZE_CACHE_16 : SYNCHRONIZE_CACHE };
        struct scsi_sense_hdr sshdr;
+       struct scsi_failure failure_defs[] = {
+               {
+                       .allowed = 3,
+                       .result = SCMD_FAILURE_RESULT_ANY,
+               },
+               {}
+       };
+       struct scsi_failures failures = {
+               .failure_definitions = failure_defs,
+       };
        const struct scsi_exec_args exec_args = {
                .req_flags = BLK_MQ_REQ_PM,
                .sshdr = &sshdr,
+               .failures = &failures,
        };
 
        if (!scsi_device_online(sdp))
                return -ENODEV;
 
-       for (retries = 3; retries > 0; --retries) {
-               unsigned char cmd[16] = { 0 };
-
-               if (sdp->use_16_for_sync)
-                       cmd[0] = SYNCHRONIZE_CACHE_16;
-               else
-                       cmd[0] = SYNCHRONIZE_CACHE;
-               /*
-                * Leave the rest of the command zero to indicate
-                * flush everything.
-                */
-               res = scsi_execute_cmd(sdp, cmd, REQ_OP_DRV_IN, NULL, 0,
-                                      timeout, sdkp->max_retries, &exec_args);
-               if (res == 0)
-                       break;
-       }
-
+       res = scsi_execute_cmd(sdp, cmd, REQ_OP_DRV_IN, NULL, 0, timeout,
+                              sdkp->max_retries, &exec_args);
        if (res) {
                sd_print_result(sdkp, "Synchronize Cache(10) failed", res);
 
@@ -1801,8 +1800,22 @@ static int sd_pr_in_command(struct block_device *bdev, u8 sa,
        struct scsi_device *sdev = sdkp->device;
        struct scsi_sense_hdr sshdr;
        u8 cmd[10] = { PERSISTENT_RESERVE_IN, sa };
+       struct scsi_failure failure_defs[] = {
+               {
+                       .sense = UNIT_ATTENTION,
+                       .asc = SCMD_FAILURE_ASC_ANY,
+                       .ascq = SCMD_FAILURE_ASCQ_ANY,
+                       .allowed = 5,
+                       .result = SAM_STAT_CHECK_CONDITION,
+               },
+               {}
+       };
+       struct scsi_failures failures = {
+               .failure_definitions = failure_defs,
+       };
        const struct scsi_exec_args exec_args = {
                .sshdr = &sshdr,
+               .failures = &failures,
        };
        int result;
 
@@ -1889,8 +1902,22 @@ static int sd_pr_out_command(struct block_device *bdev, u8 sa, u64 key,
        struct scsi_disk *sdkp = scsi_disk(bdev->bd_disk);
        struct scsi_device *sdev = sdkp->device;
        struct scsi_sense_hdr sshdr;
+       struct scsi_failure failure_defs[] = {
+               {
+                       .sense = UNIT_ATTENTION,
+                       .asc = SCMD_FAILURE_ASC_ANY,
+                       .ascq = SCMD_FAILURE_ASCQ_ANY,
+                       .allowed = 5,
+                       .result = SAM_STAT_CHECK_CONDITION,
+               },
+               {}
+       };
+       struct scsi_failures failures = {
+               .failure_definitions = failure_defs,
+       };
        const struct scsi_exec_args exec_args = {
                .sshdr = &sshdr,
+               .failures = &failures,
        };
        int result;
        u8 cmd[16] = { 0, };
@@ -2235,55 +2262,68 @@ static int sd_done(struct scsi_cmnd *SCpnt)
 static void
 sd_spinup_disk(struct scsi_disk *sdkp)
 {
-       unsigned char cmd[10];
+       static const u8 cmd[10] = { TEST_UNIT_READY };
        unsigned long spintime_expire = 0;
-       int retries, spintime;
+       int spintime, sense_valid = 0;
        unsigned int the_result;
        struct scsi_sense_hdr sshdr;
+       struct scsi_failure failure_defs[] = {
+               /* Do not retry Medium Not Present */
+               {
+                       .sense = UNIT_ATTENTION,
+                       .asc = 0x3A,
+                       .ascq = SCMD_FAILURE_ASCQ_ANY,
+                       .result = SAM_STAT_CHECK_CONDITION,
+               },
+               {
+                       .sense = NOT_READY,
+                       .asc = 0x3A,
+                       .ascq = SCMD_FAILURE_ASCQ_ANY,
+                       .result = SAM_STAT_CHECK_CONDITION,
+               },
+               /* Retry when scsi_status_is_good would return false 3 times */
+               {
+                       .result = SCMD_FAILURE_STAT_ANY,
+                       .allowed = 3,
+               },
+               {}
+       };
+       struct scsi_failures failures = {
+               .failure_definitions = failure_defs,
+       };
        const struct scsi_exec_args exec_args = {
                .sshdr = &sshdr,
+               .failures = &failures,
        };
-       int sense_valid = 0;
 
        spintime = 0;
 
        /* Spin up drives, as required.  Only do this at boot time */
        /* Spinup needs to be done for module loads too. */
        do {
-               retries = 0;
-
-               do {
-                       bool media_was_present = sdkp->media_present;
+               bool media_was_present = sdkp->media_present;
 
-                       cmd[0] = TEST_UNIT_READY;
-                       memset((void *) &cmd[1], 0, 9);
+               scsi_failures_reset_retries(&failures);
 
-                       the_result = scsi_execute_cmd(sdkp->device, cmd,
-                                                     REQ_OP_DRV_IN, NULL, 0,
-                                                     SD_TIMEOUT,
-                                                     sdkp->max_retries,
-                                                     &exec_args);
+               the_result = scsi_execute_cmd(sdkp->device, cmd, REQ_OP_DRV_IN,
+                                             NULL, 0, SD_TIMEOUT,
+                                             sdkp->max_retries, &exec_args);
 
-                       if (the_result > 0) {
-                               /*
-                                * If the drive has indicated to us that it
-                                * doesn't have any media in it, don't bother
-                                * with any more polling.
-                                */
-                               if (media_not_present(sdkp, &sshdr)) {
-                                       if (media_was_present)
-                                               sd_printk(KERN_NOTICE, sdkp,
-                                                         "Media removed, stopped polling\n");
-                                       return;
-                               }
 
-                               sense_valid = scsi_sense_valid(&sshdr);
+               if (the_result > 0) {
+                       /*
+                        * If the drive has indicated to us that it doesn't
+                        * have any media in it, don't bother with any more
+                        * polling.
+                        */
+                       if (media_not_present(sdkp, &sshdr)) {
+                               if (media_was_present)
+                                       sd_printk(KERN_NOTICE, sdkp,
+                                                 "Media removed, stopped polling\n");
+                               return;
                        }
-                       retries++;
-               } while (retries < 3 &&
-                        (!scsi_status_is_good(the_result) ||
-                         (scsi_status_is_check_condition(the_result) &&
-                         sense_valid && sshdr.sense_key == UNIT_ATTENTION)));
+                       sense_valid = scsi_sense_valid(&sshdr);
+               }
 
                if (!scsi_status_is_check_condition(the_result)) {
                        /* no sense, TUR either succeeded or failed
@@ -2318,14 +2358,16 @@ sd_spinup_disk(struct scsi_disk *sdkp)
                         * Issue command to spin up drive when not ready
                         */
                        if (!spintime) {
+                               /* Return immediately and start spin cycle */
+                               const u8 start_cmd[10] = {
+                                       [0] = START_STOP,
+                                       [1] = 1,
+                                       [4] = sdkp->device->start_stop_pwr_cond ?
+                                               0x11 : 1,
+                               };
+
                                sd_printk(KERN_NOTICE, sdkp, "Spinning up disk...");
-                               cmd[0] = START_STOP;
-                               cmd[1] = 1;     /* Return immediately */
-                               memset((void *) &cmd[2], 0, 8);
-                               cmd[4] = 1;     /* Start spin cycle */
-                               if (sdkp->device->start_stop_pwr_cond)
-                                       cmd[4] |= 1 << 4;
-                               scsi_execute_cmd(sdkp->device, cmd,
+                               scsi_execute_cmd(sdkp->device, start_cmd,
                                                 REQ_OP_DRV_IN, NULL, 0,
                                                 SD_TIMEOUT, sdkp->max_retries,
                                                 &exec_args);
@@ -2546,42 +2588,58 @@ static int read_capacity_16(struct scsi_disk *sdkp, struct scsi_device *sdp,
 static int read_capacity_10(struct scsi_disk *sdkp, struct scsi_device *sdp,
                                                unsigned char *buffer)
 {
-       unsigned char cmd[16];
+       static const u8 cmd[10] = { READ_CAPACITY };
        struct scsi_sense_hdr sshdr;
+       struct scsi_failure failure_defs[] = {
+               /* Do not retry Medium Not Present */
+               {
+                       .sense = UNIT_ATTENTION,
+                       .asc = 0x3A,
+                       .result = SAM_STAT_CHECK_CONDITION,
+               },
+               {
+                       .sense = NOT_READY,
+                       .asc = 0x3A,
+                       .result = SAM_STAT_CHECK_CONDITION,
+               },
+                /* Device reset might occur several times so retry a lot */
+               {
+                       .sense = UNIT_ATTENTION,
+                       .asc = 0x29,
+                       .allowed = READ_CAPACITY_RETRIES_ON_RESET,
+                       .result = SAM_STAT_CHECK_CONDITION,
+               },
+               /* Any other error not listed above retry 3 times */
+               {
+                       .result = SCMD_FAILURE_RESULT_ANY,
+                       .allowed = 3,
+               },
+               {}
+       };
+       struct scsi_failures failures = {
+               .failure_definitions = failure_defs,
+       };
        const struct scsi_exec_args exec_args = {
                .sshdr = &sshdr,
+               .failures = &failures,
        };
        int sense_valid = 0;
        int the_result;
-       int retries = 3, reset_retries = READ_CAPACITY_RETRIES_ON_RESET;
        sector_t lba;
        unsigned sector_size;
 
-       do {
-               cmd[0] = READ_CAPACITY;
-               memset(&cmd[1], 0, 9);
-               memset(buffer, 0, 8);
+       memset(buffer, 0, 8);
+
+       the_result = scsi_execute_cmd(sdp, cmd, REQ_OP_DRV_IN, buffer,
+                                     8, SD_TIMEOUT, sdkp->max_retries,
+                                     &exec_args);
 
-               the_result = scsi_execute_cmd(sdp, cmd, REQ_OP_DRV_IN, buffer,
-                                             8, SD_TIMEOUT, sdkp->max_retries,
-                                             &exec_args);
+       if (the_result > 0) {
+               sense_valid = scsi_sense_valid(&sshdr);
 
                if (media_not_present(sdkp, &sshdr))
                        return -ENODEV;
-
-               if (the_result > 0) {
-                       sense_valid = scsi_sense_valid(&sshdr);
-                       if (sense_valid &&
-                           sshdr.sense_key == UNIT_ATTENTION &&
-                           sshdr.asc == 0x29 && sshdr.ascq == 0x00)
-                               /* Device reset might occur several times,
-                                * give it one more chance */
-                               if (--reset_retries > 0)
-                                       continue;
-               }
-               retries--;
-
-       } while (the_result && retries);
+       }
 
        if (the_result) {
                sd_print_result(sdkp, "Read Capacity(10) failed", the_result);
@@ -3752,7 +3810,7 @@ static int sd_probe(struct device *dev)
        blk_pm_runtime_init(sdp->request_queue, dev);
        if (sdp->rpm_autosuspend) {
                pm_runtime_set_autosuspend_delay(dev,
-                       sdp->host->hostt->rpm_autosuspend_delay);
+                       sdp->host->rpm_autosuspend_delay);
        }
 
        error = device_add_disk(dev, gd, NULL);
index d7d0c35c58b80ed6b50535f210c98d64883b5a32..0f2c87cc95e6297da3c57645e3d1893ece273bc1 100644 (file)
@@ -87,19 +87,32 @@ static int ses_recv_diag(struct scsi_device *sdev, int page_code,
                0
        };
        unsigned char recv_page_code;
-       unsigned int retries = SES_RETRIES;
-       struct scsi_sense_hdr sshdr;
+       struct scsi_failure failure_defs[] = {
+               {
+                       .sense = UNIT_ATTENTION,
+                       .asc = 0x29,
+                       .ascq = SCMD_FAILURE_ASCQ_ANY,
+                       .allowed = SES_RETRIES,
+                       .result = SAM_STAT_CHECK_CONDITION,
+               },
+               {
+                       .sense = NOT_READY,
+                       .asc = SCMD_FAILURE_ASC_ANY,
+                       .ascq = SCMD_FAILURE_ASCQ_ANY,
+                       .allowed = SES_RETRIES,
+                       .result = SAM_STAT_CHECK_CONDITION,
+               },
+               {}
+       };
+       struct scsi_failures failures = {
+               .failure_definitions = failure_defs,
+       };
        const struct scsi_exec_args exec_args = {
-               .sshdr = &sshdr,
+               .failures = &failures,
        };
 
-       do {
-               ret = scsi_execute_cmd(sdev, cmd, REQ_OP_DRV_IN, buf, bufflen,
-                                      SES_TIMEOUT, 1, &exec_args);
-       } while (ret > 0 && --retries && scsi_sense_valid(&sshdr) &&
-                (sshdr.sense_key == NOT_READY ||
-                 (sshdr.sense_key == UNIT_ATTENTION && sshdr.asc == 0x29)));
-
+       ret = scsi_execute_cmd(sdev, cmd, REQ_OP_DRV_IN, buf, bufflen,
+                              SES_TIMEOUT, 1, &exec_args);
        if (unlikely(ret))
                return ret;
 
@@ -131,19 +144,32 @@ static int ses_send_diag(struct scsi_device *sdev, int page_code,
                bufflen & 0xff,
                0
        };
-       struct scsi_sense_hdr sshdr;
-       unsigned int retries = SES_RETRIES;
+       struct scsi_failure failure_defs[] = {
+               {
+                       .sense = UNIT_ATTENTION,
+                       .asc = 0x29,
+                       .ascq = SCMD_FAILURE_ASCQ_ANY,
+                       .allowed = SES_RETRIES,
+                       .result = SAM_STAT_CHECK_CONDITION,
+               },
+               {
+                       .sense = NOT_READY,
+                       .asc = SCMD_FAILURE_ASC_ANY,
+                       .ascq = SCMD_FAILURE_ASCQ_ANY,
+                       .allowed = SES_RETRIES,
+                       .result = SAM_STAT_CHECK_CONDITION,
+               },
+               {}
+       };
+       struct scsi_failures failures = {
+               .failure_definitions = failure_defs,
+       };
        const struct scsi_exec_args exec_args = {
-               .sshdr = &sshdr,
+               .failures = &failures,
        };
 
-       do {
-               result = scsi_execute_cmd(sdev, cmd, REQ_OP_DRV_OUT, buf,
-                                         bufflen, SES_TIMEOUT, 1, &exec_args);
-       } while (result > 0 && --retries && scsi_sense_valid(&sshdr) &&
-                (sshdr.sense_key == NOT_READY ||
-                 (sshdr.sense_key == UNIT_ATTENTION && sshdr.asc == 0x29)));
-
+       result = scsi_execute_cmd(sdev, cmd, REQ_OP_DRV_OUT, buf, bufflen,
+                                 SES_TIMEOUT, 1, &exec_args);
        if (result)
                sdev_printk(KERN_ERR, sdev, "SEND DIAGNOSTIC result: %8x\n",
                            result);
index d093dd187b2f9f6dc798925505736f973ebbd2c9..268b3a40891edb65c59243fca24263b27a6aa114 100644 (file)
@@ -717,27 +717,29 @@ fail:
 
 static void get_sectorsize(struct scsi_cd *cd)
 {
-       unsigned char cmd[10];
-       unsigned char buffer[8];
-       int the_result, retries = 3;
+       static const u8 cmd[10] = { READ_CAPACITY };
+       unsigned char buffer[8] = { };
+       int the_result;
        int sector_size;
        struct request_queue *queue;
+       struct scsi_failure failure_defs[] = {
+               {
+                       .result = SCMD_FAILURE_RESULT_ANY,
+                       .allowed = 3,
+               },
+               {}
+       };
+       struct scsi_failures failures = {
+               .failure_definitions = failure_defs,
+       };
+       const struct scsi_exec_args exec_args = {
+               .failures = &failures,
+       };
 
-       do {
-               cmd[0] = READ_CAPACITY;
-               memset((void *) &cmd[1], 0, 9);
-               memset(buffer, 0, sizeof(buffer));
-
-               /* Do the command and wait.. */
-               the_result = scsi_execute_cmd(cd->device, cmd, REQ_OP_DRV_IN,
-                                             buffer, sizeof(buffer),
-                                             SR_TIMEOUT, MAX_RETRIES, NULL);
-
-               retries--;
-
-       } while (the_result && retries);
-
-
+       /* Do the command and wait.. */
+       the_result = scsi_execute_cmd(cd->device, cmd, REQ_OP_DRV_IN, buffer,
+                                     sizeof(buffer), SR_TIMEOUT, MAX_RETRIES,
+                                     &exec_args);
        if (the_result) {
                cd->capacity = 0x1fffff;
                sector_size = 2048;     /* A guess, just in case */
index 09219c362acccfbf25fa3ed37df0e4445d0253b5..e20f314cf3e7d7b37023917d506aaef906bef04d 100644 (file)
@@ -273,7 +273,7 @@ static struct platform_driver esp_sun3x_driver = {
 module_platform_driver(esp_sun3x_driver);
 
 MODULE_DESCRIPTION("Sun3x ESP SCSI driver");
-MODULE_AUTHOR("Thomas Bogendoerfer (tsbogend@alpha.franken.de)");
+MODULE_AUTHOR("Thomas Bogendoerfer <tsbogend@alpha.franken.de>");
 MODULE_LICENSE("GPL");
 MODULE_VERSION(DRV_VERSION);
 MODULE_ALIAS("platform:sun3x_esp");
index 64a7c2c6c5ff424f6742e3f2e2cbd3fa8fbfa06f..5ce6c9d19d1e619b2717a135eef450eb8c7d710f 100644 (file)
@@ -608,6 +608,6 @@ static struct platform_driver esp_sbus_driver = {
 module_platform_driver(esp_sbus_driver);
 
 MODULE_DESCRIPTION("Sun ESP SCSI driver");
-MODULE_AUTHOR("David S. Miller (davem@davemloft.net)");
+MODULE_AUTHOR("David S. Miller <davem@davemloft.net>");
 MODULE_LICENSE("GPL");
 MODULE_VERSION(DRV_VERSION);
index cb24a08be084db158a906c381b8626f06df26f6b..b0f26f6f731e7a24fd854cfe1aece1ef5cba1f48 100644 (file)
@@ -144,17 +144,6 @@ static int bm_set_memory(u64 ba, u32 size)
 static dma_addr_t fbpr_a;
 static size_t fbpr_sz;
 
-static int bman_fbpr(struct reserved_mem *rmem)
-{
-       fbpr_a = rmem->base;
-       fbpr_sz = rmem->size;
-
-       WARN_ON(!(fbpr_a && fbpr_sz));
-
-       return 0;
-}
-RESERVEDMEM_OF_DECLARE(bman_fbpr, "fsl,bman-fbpr", bman_fbpr);
-
 static irqreturn_t bman_isr(int irq, void *ptr)
 {
        u32 isr_val, ier_val, ecsr_val, isr_mask, i;
@@ -242,17 +231,11 @@ static int fsl_bman_probe(struct platform_device *pdev)
                return -ENODEV;
        }
 
-       /*
-        * If FBPR memory wasn't defined using the qbman compatible string
-        * try using the of_reserved_mem_device method
-        */
-       if (!fbpr_a) {
-               ret = qbman_init_private_mem(dev, 0, &fbpr_a, &fbpr_sz);
-               if (ret) {
-                       dev_err(dev, "qbman_init_private_mem() failed 0x%x\n",
-                               ret);
-                       return -ENODEV;
-               }
+       ret = qbman_init_private_mem(dev, 0, "fsl,bman-fbpr", &fbpr_a, &fbpr_sz);
+       if (ret) {
+               dev_err(dev, "qbman_init_private_mem() failed 0x%x\n",
+                       ret);
+               return -ENODEV;
        }
 
        dev_dbg(dev, "Allocated FBPR 0x%llx 0x%zx\n", fbpr_a, fbpr_sz);
index 33751450047eda733f41a58520e9c401db15452b..e1d7b79cc45039e84b5dbc24f80bba9f20c411d5 100644 (file)
@@ -34,8 +34,8 @@
 /*
  * Initialize a devices private memory region
  */
-int qbman_init_private_mem(struct device *dev, int idx, dma_addr_t *addr,
-                               size_t *size)
+int qbman_init_private_mem(struct device *dev, int idx, const char *compat,
+                          dma_addr_t *addr, size_t *size)
 {
        struct device_node *mem_node;
        struct reserved_mem *rmem;
@@ -44,8 +44,12 @@ int qbman_init_private_mem(struct device *dev, int idx, dma_addr_t *addr,
 
        mem_node = of_parse_phandle(dev->of_node, "memory-region", idx);
        if (!mem_node) {
-               dev_err(dev, "No memory-region found for index %d\n", idx);
-               return -ENODEV;
+               mem_node = of_find_compatible_node(NULL, NULL, compat);
+               if (!mem_node) {
+                       dev_err(dev, "No memory-region found for index %d or compatible '%s'\n",
+                               idx, compat);
+                       return -ENODEV;
+               }
        }
 
        rmem = of_reserved_mem_lookup(mem_node);
index ae8afa552b1ef47e629de1eae4f324dc8b0af181..16485bde963667503d83f36df3bedd2a3edfe0c3 100644 (file)
@@ -101,8 +101,8 @@ static inline u8 dpaa_cyc_diff(u8 ringsize, u8 first, u8 last)
 #define DPAA_GENALLOC_OFF      0x80000000
 
 /* Initialize the devices private memory region */
-int qbman_init_private_mem(struct device *dev, int idx, dma_addr_t *addr,
-                               size_t *size);
+int qbman_init_private_mem(struct device *dev, int idx, const char *compat,
+                          dma_addr_t *addr, size_t *size);
 
 /* memremap() attributes for different platforms */
 #ifdef CONFIG_PPC
index 157659fd033a3943b69d3236ea0cace89b0bd7e3..392e54f14dbe927babae41f7a11b6f57ea987029 100644 (file)
@@ -468,28 +468,6 @@ static int zero_priv_mem(phys_addr_t addr, size_t sz)
 
        return 0;
 }
-
-static int qman_fqd(struct reserved_mem *rmem)
-{
-       fqd_a = rmem->base;
-       fqd_sz = rmem->size;
-
-       WARN_ON(!(fqd_a && fqd_sz));
-       return 0;
-}
-RESERVEDMEM_OF_DECLARE(qman_fqd, "fsl,qman-fqd", qman_fqd);
-
-static int qman_pfdr(struct reserved_mem *rmem)
-{
-       pfdr_a = rmem->base;
-       pfdr_sz = rmem->size;
-
-       WARN_ON(!(pfdr_a && pfdr_sz));
-
-       return 0;
-}
-RESERVEDMEM_OF_DECLARE(qman_pfdr, "fsl,qman-pfdr", qman_pfdr);
-
 #endif
 
 unsigned int qm_get_fqid_maxcnt(void)
@@ -796,39 +774,34 @@ static int fsl_qman_probe(struct platform_device *pdev)
                qm_channel_caam = QMAN_CHANNEL_CAAM_REV3;
        }
 
-       if (fqd_a) {
+       /*
+       * Order of memory regions is assumed as FQD followed by PFDR
+       * in order to ensure allocations from the correct regions the
+       * driver initializes then allocates each piece in order
+       */
+       ret = qbman_init_private_mem(dev, 0, "fsl,qman-fqd", &fqd_a, &fqd_sz);
+       if (ret) {
+               dev_err(dev, "qbman_init_private_mem() for FQD failed 0x%x\n",
+                       ret);
+               return -ENODEV;
+       }
 #ifdef CONFIG_PPC
-               /*
-                * For PPC backward DT compatibility
-                * FQD memory MUST be zero'd by software
-                */
-               zero_priv_mem(fqd_a, fqd_sz);
+       /*
+        * For PPC backward DT compatibility
+        * FQD memory MUST be zero'd by software
+        */
+       zero_priv_mem(fqd_a, fqd_sz);
 #else
-               WARN(1, "Unexpected architecture using non shared-dma-mem reservations");
+       WARN(1, "Unexpected architecture using non shared-dma-mem reservations");
 #endif
-       } else {
-               /*
-                * Order of memory regions is assumed as FQD followed by PFDR
-                * in order to ensure allocations from the correct regions the
-                * driver initializes then allocates each piece in order
-                */
-               ret = qbman_init_private_mem(dev, 0, &fqd_a, &fqd_sz);
-               if (ret) {
-                       dev_err(dev, "qbman_init_private_mem() for FQD failed 0x%x\n",
-                               ret);
-                       return -ENODEV;
-               }
-       }
        dev_dbg(dev, "Allocated FQD 0x%llx 0x%zx\n", fqd_a, fqd_sz);
 
-       if (!pfdr_a) {
-               /* Setup PFDR memory */
-               ret = qbman_init_private_mem(dev, 1, &pfdr_a, &pfdr_sz);
-               if (ret) {
-                       dev_err(dev, "qbman_init_private_mem() for PFDR failed 0x%x\n",
-                               ret);
-                       return -ENODEV;
-               }
+       /* Setup PFDR memory */
+       ret = qbman_init_private_mem(dev, 1, "fsl,qman-pfdr", &pfdr_a, &pfdr_sz);
+       if (ret) {
+               dev_err(dev, "qbman_init_private_mem() for PFDR failed 0x%x\n",
+                       ret);
+               return -ENODEV;
        }
        dev_dbg(dev, "Allocated PFDR 0x%llx 0x%zx\n", pfdr_a, pfdr_sz);
 
index 9fa93bb923d70f03ff371b5e05cb4a03f0047d81..fd65b2360fc1e16e8cf2bacc34085bd0d424499d 100644 (file)
@@ -72,7 +72,7 @@ int sdw_slave_uevent(const struct device *dev, struct kobj_uevent_env *env)
        return 0;
 }
 
-struct bus_type sdw_bus_type = {
+const struct bus_type sdw_bus_type = {
        .name = "soundwire",
        .match = sdw_bus_match,
 };
index 93698532deac4098706099dfceaf85055f563b09..95125cc2fc599e040be30b3e376fbd93d6cfd88c 100644 (file)
@@ -621,8 +621,6 @@ static int __maybe_unused intel_resume(struct device *dev)
                return 0;
        }
 
-       link_flags = md_flags >> (bus->link_id * 8);
-
        if (pm_runtime_suspended(dev)) {
                dev_dbg(dev, "pm_runtime status was suspended, forcing active\n");
 
index 51abedbbaa6630e53ab301b85d6dcefe2f8ce9ac..b2c64512739d674a4b00c132ee2d61a973daad8b 100644 (file)
@@ -112,7 +112,7 @@ static const struct dev_pm_ops master_dev_pm = {
                           pm_generic_runtime_resume, NULL)
 };
 
-struct device_type sdw_master_type = {
+const struct device_type sdw_master_type = {
        .name =         "soundwire_master",
        .release =      sdw_master_device_release,
        .pm = &master_dev_pm,
index 060c2982e26b009d561eafcbf81b888ada781fd4..9963b92eb50536d5e44b36409023fe95a6c5475f 100644 (file)
@@ -16,7 +16,7 @@ static void sdw_slave_release(struct device *dev)
        kfree(slave);
 }
 
-struct device_type sdw_slave_type = {
+const struct device_type sdw_slave_type = {
        .name =         "sdw_slave",
        .release =      sdw_slave_release,
        .uevent =       sdw_slave_uevent,
index f9c0adc0738db27a7578509edbfbcd82c60b5206..4e9e7d2a942d8aa8a6d630a9e238916aaa699033 100644 (file)
@@ -1718,7 +1718,7 @@ EXPORT_SYMBOL(sdw_deprepare_stream);
 static int set_stream(struct snd_pcm_substream *substream,
                      struct sdw_stream_runtime *sdw_stream)
 {
-       struct snd_soc_pcm_runtime *rtd = substream->private_data;
+       struct snd_soc_pcm_runtime *rtd = snd_soc_substream_to_rtd(substream);
        struct snd_soc_dai *dai;
        int ret = 0;
        int i;
@@ -1771,7 +1771,7 @@ EXPORT_SYMBOL(sdw_alloc_stream);
 int sdw_startup_stream(void *sdw_substream)
 {
        struct snd_pcm_substream *substream = sdw_substream;
-       struct snd_soc_pcm_runtime *rtd = substream->private_data;
+       struct snd_soc_pcm_runtime *rtd = snd_soc_substream_to_rtd(substream);
        struct sdw_stream_runtime *sdw_stream;
        char *name;
        int ret;
@@ -1815,7 +1815,7 @@ EXPORT_SYMBOL(sdw_startup_stream);
 void sdw_shutdown_stream(void *sdw_substream)
 {
        struct snd_pcm_substream *substream = sdw_substream;
-       struct snd_soc_pcm_runtime *rtd = substream->private_data;
+       struct snd_soc_pcm_runtime *rtd = snd_soc_substream_to_rtd(substream);
        struct sdw_stream_runtime *sdw_stream;
        struct snd_soc_dai *dai;
 
index d99cc898cd99128e5c5bdade55422192a7148f81..bfef99997a1d2da7adb5464cdea5a8413a8e0509 100644 (file)
@@ -29,16 +29,6 @@ TODO
 
 1. Items which MUST be fixed before the driver can be moved out of staging:
 
-* The atomisp ov2680 and ov5693 sensor drivers bind to the same hw-ids as
-  the standard ov2680 and ov5693 drivers under drivers/media/i2c, which
-  conflicts. Drop the atomisp private ov2680 and ov5693 drivers:
-  * Port various ov2680 improvements from atomisp_ov2680.c to regular ov2680.c
-    and switch to regular ov2680 driver
-  * Make atomisp work with the regular ov5693 driver and drop atomisp_ov5693
-
-* Fix atomisp causing the whole machine to hang in its probe() error-exit
-  path taken in the firmware missing case
-
 * Remove/disable private IOCTLs
 
 * Remove/disable custom v4l2-ctrls
index 55ea422291ba82d0d325e2ac4cf41ef1defefd02..ade28950db7329c4d2ee5bcf039987a9f210a640 100644 (file)
@@ -74,9 +74,6 @@
 #define GC2235_COARSE_INTG_TIME_MIN 1
 #define GC2235_COARSE_INTG_TIME_MAX_MARGIN 6
 
-/*
- * GC2235 System control registers
- */
 /*
  * GC2235 System control registers
  */
@@ -167,7 +164,7 @@ enum gc2235_tok_type {
        GC2235_TOK_MASK = 0xfff0
 };
 
-/**
+/*
  * struct gc2235_reg - MI sensor  register format
  * @type: type of the register
  * @reg: 8-bit offset to register
index d0db2efe004525e9d88531e0115c8edd2fa9c4df..8593ba90605f625e6f44e3a791b2012b42ebbe67 100644 (file)
@@ -3721,22 +3721,34 @@ apply_min_padding:
        *padding_h = max_t(u32, *padding_h, min_pad_h);
 }
 
-static int atomisp_set_crop(struct atomisp_device *isp,
-                           const struct v4l2_mbus_framefmt *format,
-                           struct v4l2_subdev_state *sd_state,
-                           int which)
+static int atomisp_set_crop_and_fmt(struct atomisp_device *isp,
+                                   struct v4l2_mbus_framefmt *ffmt,
+                                   int which)
 {
        struct atomisp_input_subdev *input = &isp->inputs[isp->asd.input_curr];
        struct v4l2_subdev_selection sel = {
                .which = which,
                .target = V4L2_SEL_TGT_CROP,
-               .r.width = format->width,
-               .r.height = format->height,
+               .r.width = ffmt->width,
+               .r.height = ffmt->height,
        };
-       int ret;
+       struct v4l2_subdev_format format = {
+               .which = which,
+               .format = *ffmt,
+       };
+       struct v4l2_subdev_state *sd_state;
+       int ret = 0;
+
+       if (!input->camera)
+               return -EINVAL;
+
+       sd_state = (which == V4L2_SUBDEV_FORMAT_TRY) ? input->try_sd_state :
+                                                      input->camera->active_state;
+       if (sd_state)
+               v4l2_subdev_lock_state(sd_state);
 
        if (!input->crop_support)
-               return 0;
+               goto set_fmt;
 
        /* Cropping is done before binning, when binning double the crop rect */
        if (input->binning_support && sel.r.width <= (input->native_rect.width / 2) &&
@@ -3757,6 +3769,14 @@ static int atomisp_set_crop(struct atomisp_device *isp,
                dev_err(isp->dev, "Error setting crop to %ux%u @%ux%u: %d\n",
                        sel.r.width, sel.r.height, sel.r.left, sel.r.top, ret);
 
+set_fmt:
+       if (ret == 0)
+               ret = v4l2_subdev_call(input->camera, pad, set_fmt, sd_state, &format);
+
+       if (sd_state)
+               v4l2_subdev_unlock_state(sd_state);
+
+       *ffmt = format.format;
        return ret;
 }
 
@@ -3767,16 +3787,10 @@ int atomisp_try_fmt(struct atomisp_device *isp, struct v4l2_pix_format *f,
 {
        const struct atomisp_format_bridge *fmt, *snr_fmt;
        struct atomisp_sub_device *asd = &isp->asd;
-       struct atomisp_input_subdev *input = &isp->inputs[asd->input_curr];
-       struct v4l2_subdev_format format = {
-               .which = V4L2_SUBDEV_FORMAT_TRY,
-       };
+       struct v4l2_mbus_framefmt ffmt = { };
        u32 padding_w, padding_h;
        int ret;
 
-       if (!input->camera)
-               return -EINVAL;
-
        fmt = atomisp_get_format_bridge(f->pixelformat);
        /* Currently, raw formats are broken!!! */
        if (!fmt || fmt->sh_fmt == IA_CSS_FRAME_FORMAT_RAW) {
@@ -3797,38 +3811,27 @@ int atomisp_try_fmt(struct atomisp_device *isp, struct v4l2_pix_format *f,
         * the set_fmt call, like atomisp_set_fmt_to_snr() does.
         */
        atomisp_get_padding(isp, f->width, f->height, &padding_w, &padding_h);
-       v4l2_fill_mbus_format(&format.format, f, fmt->mbus_code);
-       format.format.width += padding_w;
-       format.format.height += padding_h;
-
-       dev_dbg(isp->dev, "try_mbus_fmt: asking for %ux%u\n",
-               format.format.width, format.format.height);
-
-       v4l2_subdev_lock_state(input->try_sd_state);
+       v4l2_fill_mbus_format(&ffmt, f, fmt->mbus_code);
+       ffmt.width += padding_w;
+       ffmt.height += padding_h;
 
-       ret = atomisp_set_crop(isp, &format.format, input->try_sd_state,
-                              V4L2_SUBDEV_FORMAT_TRY);
-       if (ret == 0)
-               ret = v4l2_subdev_call(input->camera, pad, set_fmt,
-                                      input->try_sd_state, &format);
-
-       v4l2_subdev_unlock_state(input->try_sd_state);
+       dev_dbg(isp->dev, "try_mbus_fmt: try %ux%u\n", ffmt.width, ffmt.height);
 
+       ret = atomisp_set_crop_and_fmt(isp, &ffmt, V4L2_SUBDEV_FORMAT_TRY);
        if (ret)
                return ret;
 
-       dev_dbg(isp->dev, "try_mbus_fmt: got %ux%u\n",
-               format.format.width, format.format.height);
+       dev_dbg(isp->dev, "try_mbus_fmt: got %ux%u\n", ffmt.width, ffmt.height);
 
-       snr_fmt = atomisp_get_format_bridge_from_mbus(format.format.code);
+       snr_fmt = atomisp_get_format_bridge_from_mbus(ffmt.code);
        if (!snr_fmt) {
                dev_err(isp->dev, "unknown sensor format 0x%8.8x\n",
-                       format.format.code);
+                       ffmt.code);
                return -EINVAL;
        }
 
-       f->width = format.format.width - padding_w;
-       f->height = format.format.height - padding_h;
+       f->width = ffmt.width - padding_w;
+       f->height = ffmt.height - padding_h;
 
        /*
         * If the format is jpeg or custom RAW, then the width and height will
@@ -4236,28 +4239,22 @@ static int atomisp_set_fmt_to_snr(struct video_device *vdev, const struct v4l2_p
        struct atomisp_video_pipe *pipe = atomisp_to_video_pipe(vdev);
        struct atomisp_sub_device *asd = pipe->asd;
        struct atomisp_device *isp = asd->isp;
-       struct atomisp_input_subdev *input = &isp->inputs[asd->input_curr];
        const struct atomisp_format_bridge *format;
-       struct v4l2_subdev_state *act_sd_state;
-       struct v4l2_subdev_format vformat = {
-               .which = V4L2_SUBDEV_FORMAT_TRY,
-       };
-       struct v4l2_mbus_framefmt *ffmt = &vformat.format;
-       struct v4l2_mbus_framefmt *req_ffmt;
+       struct v4l2_mbus_framefmt req_ffmt, ffmt = { };
        struct atomisp_input_stream_info *stream_info =
-           (struct atomisp_input_stream_info *)ffmt->reserved;
+           (struct atomisp_input_stream_info *)&ffmt.reserved;
        int ret;
 
        format = atomisp_get_format_bridge(f->pixelformat);
        if (!format)
                return -EINVAL;
 
-       v4l2_fill_mbus_format(ffmt, f, format->mbus_code);
-       ffmt->height += asd->sink_pad_padding_h + dvs_env_h;
-       ffmt->width += asd->sink_pad_padding_w + dvs_env_w;
+       v4l2_fill_mbus_format(&ffmt, f, format->mbus_code);
+       ffmt.height += asd->sink_pad_padding_h + dvs_env_h;
+       ffmt.width += asd->sink_pad_padding_w + dvs_env_w;
 
        dev_dbg(isp->dev, "s_mbus_fmt: ask %ux%u (padding %ux%u, dvs %ux%u)\n",
-               ffmt->width, ffmt->height, asd->sink_pad_padding_w, asd->sink_pad_padding_h,
+               ffmt.width, ffmt.height, asd->sink_pad_padding_w, asd->sink_pad_padding_h,
                dvs_env_w, dvs_env_h);
 
        __atomisp_init_stream_info(ATOMISP_INPUT_STREAM_GENERAL, stream_info);
@@ -4266,28 +4263,17 @@ static int atomisp_set_fmt_to_snr(struct video_device *vdev, const struct v4l2_p
 
        /* Disable dvs if resolution can't be supported by sensor */
        if (asd->params.video_dis_en && asd->run_mode->val == ATOMISP_RUN_MODE_VIDEO) {
-               v4l2_subdev_lock_state(input->try_sd_state);
-
-               ret = atomisp_set_crop(isp, &vformat.format, input->try_sd_state,
-                                      V4L2_SUBDEV_FORMAT_TRY);
-               if (ret == 0) {
-                       vformat.which = V4L2_SUBDEV_FORMAT_TRY;
-                       ret = v4l2_subdev_call(input->camera, pad, set_fmt,
-                                              input->try_sd_state, &vformat);
-               }
-
-               v4l2_subdev_unlock_state(input->try_sd_state);
-
+               ret = atomisp_set_crop_and_fmt(isp, &ffmt, V4L2_SUBDEV_FORMAT_TRY);
                if (ret)
                        return ret;
 
                dev_dbg(isp->dev, "video dis: sensor width: %d, height: %d\n",
-                       ffmt->width, ffmt->height);
+                       ffmt.width, ffmt.height);
 
-               if (ffmt->width < req_ffmt->width ||
-                   ffmt->height < req_ffmt->height) {
-                       req_ffmt->height -= dvs_env_h;
-                       req_ffmt->width -= dvs_env_w;
+               if (ffmt.width < req_ffmt.width ||
+                   ffmt.height < req_ffmt.height) {
+                       req_ffmt.height -= dvs_env_h;
+                       req_ffmt.width -= dvs_env_w;
                        ffmt = req_ffmt;
                        dev_warn(isp->dev,
                                 "can not enable video dis due to sensor limitation.");
@@ -4295,32 +4281,21 @@ static int atomisp_set_fmt_to_snr(struct video_device *vdev, const struct v4l2_p
                }
        }
 
-       act_sd_state = v4l2_subdev_lock_and_get_active_state(input->camera);
-
-       ret = atomisp_set_crop(isp, &vformat.format, act_sd_state,
-                              V4L2_SUBDEV_FORMAT_ACTIVE);
-       if (ret == 0) {
-               vformat.which = V4L2_SUBDEV_FORMAT_ACTIVE;
-               ret = v4l2_subdev_call(input->camera, pad, set_fmt, act_sd_state, &vformat);
-       }
-
-       if (act_sd_state)
-               v4l2_subdev_unlock_state(act_sd_state);
-
+       ret = atomisp_set_crop_and_fmt(isp, &ffmt, V4L2_SUBDEV_FORMAT_ACTIVE);
        if (ret)
                return ret;
 
        __atomisp_update_stream_env(asd, ATOMISP_INPUT_STREAM_GENERAL, stream_info);
 
        dev_dbg(isp->dev, "sensor width: %d, height: %d\n",
-               ffmt->width, ffmt->height);
+               ffmt.width, ffmt.height);
 
-       if (ffmt->width < ATOM_ISP_STEP_WIDTH ||
-           ffmt->height < ATOM_ISP_STEP_HEIGHT)
+       if (ffmt.width < ATOM_ISP_STEP_WIDTH ||
+           ffmt.height < ATOM_ISP_STEP_HEIGHT)
                return -EINVAL;
 
        if (asd->params.video_dis_en && asd->run_mode->val == ATOMISP_RUN_MODE_VIDEO &&
-           (ffmt->width < req_ffmt->width || ffmt->height < req_ffmt->height)) {
+           (ffmt.width < req_ffmt.width || ffmt.height < req_ffmt.height)) {
                dev_warn(isp->dev,
                         "can not enable video dis due to sensor limitation.");
                asd->params.video_dis_en = false;
@@ -4328,9 +4303,9 @@ static int atomisp_set_fmt_to_snr(struct video_device *vdev, const struct v4l2_p
 
        atomisp_subdev_set_ffmt(&asd->subdev, NULL,
                                V4L2_SUBDEV_FORMAT_ACTIVE,
-                               ATOMISP_SUBDEV_PAD_SINK, ffmt);
+                               ATOMISP_SUBDEV_PAD_SINK, &ffmt);
 
-       return css_input_resolution_changed(asd, ffmt);
+       return css_input_resolution_changed(asd, &ffmt);
 }
 
 int atomisp_set_fmt(struct video_device *vdev, struct v4l2_format *f)
index 02f06294bbfe0744988a68b238ec1c8ad41b34ed..6fe8b0b7467a78c7558085c9231d58a0063af814 100644 (file)
@@ -757,7 +757,7 @@ int atomisp_css_init(struct atomisp_device *isp)
                return ret;
 
        /* Init ISP */
-       err = ia_css_init(isp->dev, &isp->css_env.isp_css_env, NULL,
+       err = ia_css_init(isp->dev, &isp->css_env.isp_css_env,
                          (uint32_t)mmu_base_addr, IA_CSS_IRQ_TYPE_PULSE);
        if (err) {
                dev_err(isp->dev, "css init failed --- bad firmware?\n");
index 1df534bf54d3269139a39bf4584a70af97e2f571..ba7dd569a55a191357005a6110a701dc1ba1bbda 100644 (file)
 #include "hmm/hmm.h"
 #include "ia_css_debug.h"
 
+#define OPTION_BIN_LIST                        BIT(0)
+#define OPTION_BIN_RUN                 BIT(1)
+#define OPTION_VALID                   (OPTION_BIN_LIST | OPTION_BIN_RUN)
+
 /*
- * _iunit_debug:
- * dbglvl: iunit css driver trace level
  * dbgopt: iunit debug option:
  *        bit 0: binary list
  *        bit 1: running binary
  *        bit 2: memory statistic
-*/
-struct _iunit_debug {
-       struct device_driver    *drv;
-       struct atomisp_device   *isp;
-       unsigned int            dbglvl;
-       unsigned int            dbgfun;
-       unsigned int            dbgopt;
-};
-
-#define OPTION_BIN_LIST                        BIT(0)
-#define OPTION_BIN_RUN                 BIT(1)
-#define OPTION_VALID                   (OPTION_BIN_LIST \
-                                       | OPTION_BIN_RUN)
-
-static struct _iunit_debug iunit_debug = {
-       .dbglvl = 0,
-       .dbgopt = OPTION_BIN_LIST,
-};
+ */
+static unsigned int dbgopt = OPTION_BIN_LIST;
 
 static inline int iunit_dump_dbgopt(struct atomisp_device *isp,
                                    unsigned int opt)
@@ -88,34 +74,44 @@ opt_err:
        return ret;
 }
 
-static ssize_t iunit_dbglvl_show(struct device_driver *drv, char *buf)
+static ssize_t dbglvl_show(struct device *dev, struct device_attribute *attr,
+                          char *buf)
 {
-       iunit_debug.dbglvl = dbg_level;
-       return sysfs_emit(buf, "dtrace level:%u\n", iunit_debug.dbglvl);
+       unsigned int dbglvl = ia_css_debug_get_dtrace_level();
+
+       return sysfs_emit(buf, "dtrace level:%u\n", dbglvl);
 }
 
-static ssize_t iunit_dbglvl_store(struct device_driver *drv, const char *buf,
-                                 size_t size)
+static ssize_t dbglvl_store(struct device *dev, struct device_attribute *attr,
+                           const char *buf, size_t size)
 {
-       if (kstrtouint(buf, 10, &iunit_debug.dbglvl)
-           || iunit_debug.dbglvl < 1
-           || iunit_debug.dbglvl > 9) {
+       unsigned int dbglvl;
+       int ret;
+
+       ret = kstrtouint(buf, 10, &dbglvl);
+       if (ret)
+               return ret;
+
+       if (dbglvl < 1 || dbglvl > 9)
                return -ERANGE;
-       }
-       ia_css_debug_set_dtrace_level(iunit_debug.dbglvl);
 
+       ia_css_debug_set_dtrace_level(dbglvl);
        return size;
 }
+static DEVICE_ATTR_RW(dbglvl);
 
-static ssize_t iunit_dbgfun_show(struct device_driver *drv, char *buf)
+static ssize_t dbgfun_show(struct device *dev, struct device_attribute *attr,
+                          char *buf)
 {
-       iunit_debug.dbgfun = atomisp_get_css_dbgfunc();
-       return sysfs_emit(buf, "dbgfun opt:%u\n", iunit_debug.dbgfun);
+       unsigned int dbgfun = atomisp_get_css_dbgfunc();
+
+       return sysfs_emit(buf, "dbgfun opt:%u\n", dbgfun);
 }
 
-static ssize_t iunit_dbgfun_store(struct device_driver *drv, const char *buf,
-                                 size_t size)
+static ssize_t dbgfun_store(struct device *dev, struct device_attribute *attr,
+                           const char *buf, size_t size)
 {
+       struct atomisp_device *isp = dev_get_drvdata(dev);
        unsigned int opt;
        int ret;
 
@@ -123,23 +119,20 @@ static ssize_t iunit_dbgfun_store(struct device_driver *drv, const char *buf,
        if (ret)
                return ret;
 
-       ret = atomisp_set_css_dbgfunc(iunit_debug.isp, opt);
-       if (ret)
-               return ret;
-
-       iunit_debug.dbgfun = opt;
-
-       return size;
+       return atomisp_set_css_dbgfunc(isp, opt);
 }
+static DEVICE_ATTR_RW(dbgfun);
 
-static ssize_t iunit_dbgopt_show(struct device_driver *drv, char *buf)
+static ssize_t dbgopt_show(struct device *dev, struct device_attribute *attr,
+                          char *buf)
 {
-       return sysfs_emit(buf, "option:0x%x\n", iunit_debug.dbgopt);
+       return sysfs_emit(buf, "option:0x%x\n", dbgopt);
 }
 
-static ssize_t iunit_dbgopt_store(struct device_driver *drv, const char *buf,
-                                 size_t size)
+static ssize_t dbgopt_store(struct device *dev, struct device_attribute *attr,
+                           const char *buf, size_t size)
 {
+       struct atomisp_device *isp = dev_get_drvdata(dev);
        unsigned int opt;
        int ret;
 
@@ -147,56 +140,27 @@ static ssize_t iunit_dbgopt_store(struct device_driver *drv, const char *buf,
        if (ret)
                return ret;
 
-       iunit_debug.dbgopt = opt;
-       ret = iunit_dump_dbgopt(iunit_debug.isp, iunit_debug.dbgopt);
+       dbgopt = opt;
+       ret = iunit_dump_dbgopt(isp, dbgopt);
        if (ret)
                return ret;
 
        return size;
 }
+static DEVICE_ATTR_RW(dbgopt);
 
-static const struct driver_attribute iunit_drvfs_attrs[] = {
-       __ATTR(dbglvl, 0644, iunit_dbglvl_show, iunit_dbglvl_store),
-       __ATTR(dbgfun, 0644, iunit_dbgfun_show, iunit_dbgfun_store),
-       __ATTR(dbgopt, 0644, iunit_dbgopt_show, iunit_dbgopt_store),
+static struct attribute *dbg_attrs[] = {
+       &dev_attr_dbglvl.attr,
+       &dev_attr_dbgfun.attr,
+       &dev_attr_dbgopt.attr,
+       NULL
 };
 
-static int iunit_drvfs_create_files(struct device_driver *drv)
-{
-       int i, ret = 0;
-
-       for (i = 0; i < ARRAY_SIZE(iunit_drvfs_attrs); i++)
-               ret |= driver_create_file(drv, &iunit_drvfs_attrs[i]);
-
-       return ret;
-}
-
-static void iunit_drvfs_remove_files(struct device_driver *drv)
-{
-       int i;
-
-       for (i = 0; i < ARRAY_SIZE(iunit_drvfs_attrs); i++)
-               driver_remove_file(drv, &iunit_drvfs_attrs[i]);
-}
-
-int atomisp_drvfs_init(struct atomisp_device *isp)
-{
-       struct device_driver *drv = isp->dev->driver;
-       int ret;
-
-       iunit_debug.isp = isp;
-       iunit_debug.drv = drv;
-
-       ret = iunit_drvfs_create_files(iunit_debug.drv);
-       if (ret) {
-               dev_err(isp->dev, "drvfs_create_files error: %d\n", ret);
-               iunit_drvfs_remove_files(iunit_debug.drv);
-       }
-
-       return ret;
-}
+static const struct attribute_group dbg_attr_group = {
+       .attrs = dbg_attrs,
+};
 
-void atomisp_drvfs_exit(void)
-{
-       iunit_drvfs_remove_files(iunit_debug.drv);
-}
+const struct attribute_group *dbg_attr_groups[] = {
+       &dbg_attr_group,
+       NULL
+};
index 8f4cc722b881e8162697f9af07d985d8f6ac2842..8495cc133c0608bad1729298e033639f57ae3b1a 100644 (file)
@@ -19,7 +19,8 @@
 #ifndef        __ATOMISP_DRVFS_H__
 #define        __ATOMISP_DRVFS_H__
 
-int atomisp_drvfs_init(struct atomisp_device *isp);
-void atomisp_drvfs_exit(void);
+#include <linux/sysfs.h>
+
+extern const struct attribute_group *dbg_attr_groups[];
 
 #endif /* __ATOMISP_DRVFS_H__ */
index d5b077e602caec6ac2863780f660f7aac751ff02..bba9bc64d44745a6313da0ebcae8333b3d5a57b8 100644 (file)
@@ -192,6 +192,7 @@ struct atomisp_device {
        struct dev_pm_domain pm_domain;
        struct pm_qos_request pm_qos;
        s32 max_isr_latency;
+       bool pm_only;
 
        struct atomisp_mipi_csi2_device csi2_port[ATOMISP_CAMERA_NR_PORTS];
        struct atomisp_tpg_device tpg;
index 5b2d88c02d36a083376ee21660923635e9ff70c4..bb8e5e883b5080a8e7fc276d49efe6ecdaecb395 100644 (file)
@@ -666,14 +666,6 @@ static int atomisp_s_input(struct file *file, void *fh, unsigned int input)
                return ret;
        }
 
-       /* select operating sensor */
-       ret = v4l2_subdev_call(isp->inputs[input].camera, video, s_routing,
-                              0, 0, 0);
-       if (ret && (ret != -ENOIOCTLCMD)) {
-               dev_err(isp->dev, "Failed to select sensor\n");
-               return ret;
-       }
-
        if (!IS_ISP2401) {
                motor = isp->inputs[input].motor;
        } else {
index 547e1444ad9733569816c1e43c74e542089f82fa..f736e54c7df3578133646e2c6793892ac713bb08 100644 (file)
 /* G-Min addition: pull this in from intel_mid_pm.h */
 #define CSTATE_EXIT_LATENCY_C1  1
 
-static uint skip_fwload;
-module_param(skip_fwload, uint, 0644);
-MODULE_PARM_DESC(skip_fwload, "Skip atomisp firmware load");
-
 /* cross componnet debug message flag */
 int dbg_level;
 module_param(dbg_level, int, 0644);
@@ -552,7 +548,7 @@ static int atomisp_mrfld_power(struct atomisp_device *isp, bool enable)
        dev_dbg(isp->dev, "IUNIT power-%s.\n", enable ? "on" : "off");
 
        /* WA for P-Unit, if DVFS enabled, ISP timeout observed */
-       if (IS_CHT && enable) {
+       if (IS_CHT && enable && !isp->pm_only) {
                punit_ddr_dvfs_enable(false);
                msleep(20);
        }
@@ -562,7 +558,7 @@ static int atomisp_mrfld_power(struct atomisp_device *isp, bool enable)
                        val, MRFLD_ISPSSPM0_ISPSSC_MASK);
 
        /* WA:Enable DVFS */
-       if (IS_CHT && !enable)
+       if (IS_CHT && !enable && !isp->pm_only)
                punit_ddr_dvfs_enable(true);
 
        /*
@@ -591,9 +587,6 @@ static int atomisp_mrfld_power(struct atomisp_device *isp, bool enable)
                usleep_range(100, 150);
        } while (1);
 
-       if (enable)
-               msleep(10);
-
        dev_err(isp->dev, "IUNIT power-%s timeout.\n", enable ? "on" : "off");
        return -EBUSY;
 }
@@ -605,11 +598,15 @@ int atomisp_power_off(struct device *dev)
        int ret;
        u32 reg;
 
-       atomisp_css_uninit(isp);
+       if (isp->pm_only) {
+               pci_write_config_dword(pdev, PCI_INTERRUPT_CTRL, 0);
+       } else {
+               atomisp_css_uninit(isp);
 
-       ret = atomisp_mrfld_pre_power_down(isp);
-       if (ret)
-               return ret;
+               ret = atomisp_mrfld_pre_power_down(isp);
+               if (ret)
+                       return ret;
+       }
 
        /*
         * MRFLD IUNIT DPHY is located in an always-power-on island
@@ -638,6 +635,9 @@ int atomisp_power_on(struct device *dev)
        pci_restore_state(to_pci_dev(dev));
        cpu_latency_qos_update_request(&isp->pm_qos, isp->max_isr_latency);
 
+       if (isp->pm_only)
+               return 0;
+
        /*restore register values for iUnit and iUnitPHY registers*/
        if (isp->saved_regs.pcicmdsts)
                atomisp_restore_iunit_reg(isp);
@@ -1161,9 +1161,6 @@ atomisp_load_firmware(struct atomisp_device *isp)
        int rc;
        char *fw_path = NULL;
 
-       if (skip_fwload)
-               return NULL;
-
        if (firmware_name[0] != '\0') {
                fw_path = firmware_name;
        } else {
@@ -1199,46 +1196,39 @@ atomisp_load_firmware(struct atomisp_device *isp)
        return fw;
 }
 
-/*
- * Check for flags the driver was compiled with against the PCI
- * device. Always returns true on other than ISP 2400.
- */
-static bool is_valid_device(struct pci_dev *pdev, const struct pci_device_id *id)
+static void atomisp_pm_init(struct atomisp_device *isp)
 {
-       const char *name;
-       const char *product;
-
-       product = dmi_get_system_info(DMI_PRODUCT_NAME);
-
-       switch (id->device & ATOMISP_PCI_DEVICE_SOC_MASK) {
-       case ATOMISP_PCI_DEVICE_SOC_MRFLD:
-               name = "Merrifield";
-               break;
-       case ATOMISP_PCI_DEVICE_SOC_BYT:
-               name = "Baytrail";
-               break;
-       case ATOMISP_PCI_DEVICE_SOC_ANN:
-               name = "Anniedale";
-               break;
-       case ATOMISP_PCI_DEVICE_SOC_CHT:
-               name = "Cherrytrail";
-               break;
-       default:
-               dev_err(&pdev->dev, "%s: unknown device ID %x04:%x04\n",
-                       product, id->vendor, id->device);
-               return false;
-       }
+       /*
+        * The atomisp does not use standard PCI power-management through the
+        * PCI config space. Instead this driver directly tells the P-Unit to
+        * disable the ISP over the IOSF. The standard PCI subsystem pm_ops will
+        * try to access the config space before (resume) / after (suspend) this
+        * driver has turned the ISP on / off, resulting in the following errors:
+        *
+        * "Unable to change power state from D0 to D3hot, device inaccessible"
+        * "Unable to change power state from D3cold to D0, device inaccessible"
+        *
+        * To avoid these errors override the pm_domain so that all the PCI
+        * subsys suspend / resume handling is skipped.
+        */
+       isp->pm_domain.ops.runtime_suspend = atomisp_power_off;
+       isp->pm_domain.ops.runtime_resume = atomisp_power_on;
+       isp->pm_domain.ops.suspend = atomisp_suspend;
+       isp->pm_domain.ops.resume = atomisp_resume;
 
-       if (pdev->revision <= ATOMISP_PCI_REV_BYT_A0_MAX) {
-               dev_err(&pdev->dev, "%s revision %d is not unsupported\n",
-                       name, pdev->revision);
-               return false;
-       }
+       cpu_latency_qos_add_request(&isp->pm_qos, PM_QOS_DEFAULT_VALUE);
+       dev_pm_domain_set(isp->dev, &isp->pm_domain);
 
-       dev_info(&pdev->dev, "Detected %s version %d (ISP240%c) on %s\n",
-                name, pdev->revision, IS_ISP2401 ? '1' : '0', product);
+       pm_runtime_allow(isp->dev);
+       pm_runtime_put_sync_suspend(isp->dev);
+}
 
-       return true;
+static void atomisp_pm_uninit(struct atomisp_device *isp)
+{
+       pm_runtime_get_sync(isp->dev);
+       pm_runtime_forbid(isp->dev);
+       dev_pm_domain_set(isp->dev, NULL);
+       cpu_latency_qos_remove_request(&isp->pm_qos);
 }
 
 #define ATOM_ISP_PCI_BAR       0
@@ -1249,10 +1239,6 @@ static int atomisp_pci_probe(struct pci_dev *pdev, const struct pci_device_id *i
        struct atomisp_device *isp;
        unsigned int start;
        int err, val;
-       u32 irq;
-
-       if (!is_valid_device(pdev, id))
-               return -ENODEV;
 
        /* Pointer to struct device. */
        atomisp_dev = &pdev->dev;
@@ -1261,32 +1247,16 @@ static int atomisp_pci_probe(struct pci_dev *pdev, const struct pci_device_id *i
        if (!pdata)
                dev_warn(&pdev->dev, "no platform data available\n");
 
-       err = pcim_enable_device(pdev);
-       if (err) {
-               dev_err(&pdev->dev, "Failed to enable CI ISP device (%d)\n", err);
-               return err;
-       }
-
        start = pci_resource_start(pdev, ATOM_ISP_PCI_BAR);
        dev_dbg(&pdev->dev, "start: 0x%x\n", start);
 
-       err = pcim_iomap_regions(pdev, BIT(ATOM_ISP_PCI_BAR), pci_name(pdev));
-       if (err) {
-               dev_err(&pdev->dev, "Failed to I/O memory remapping (%d)\n", err);
-               goto ioremap_fail;
-       }
-
        isp = devm_kzalloc(&pdev->dev, sizeof(*isp), GFP_KERNEL);
-       if (!isp) {
-               err = -ENOMEM;
-               goto atomisp_dev_alloc_fail;
-       }
+       if (!isp)
+               return -ENOMEM;
 
        isp->dev = &pdev->dev;
-       isp->base = pcim_iomap_table(pdev)[ATOM_ISP_PCI_BAR];
        isp->saved_regs.ispmmadr = start;
-
-       dev_dbg(&pdev->dev, "atomisp mmio base: %p\n", isp->base);
+       isp->asd.isp = isp;
 
        mutex_init(&isp->mutex);
        spin_lock_init(&isp->lock);
@@ -1389,8 +1359,12 @@ static int atomisp_pci_probe(struct pci_dev *pdev, const struct pci_device_id *i
                break;
        default:
                dev_err(&pdev->dev, "un-supported IUNIT device\n");
-               err = -ENODEV;
-               goto atomisp_dev_alloc_fail;
+               return -ENODEV;
+       }
+
+       if (pdev->revision <= ATOMISP_PCI_REV_BYT_A0_MAX) {
+               dev_err(&pdev->dev, "revision %d is not unsupported\n", pdev->revision);
+               return -ENODEV;
        }
 
        dev_info(&pdev->dev, "ISP HPLL frequency base = %d MHz\n", isp->hpll_freq);
@@ -1400,29 +1374,43 @@ static int atomisp_pci_probe(struct pci_dev *pdev, const struct pci_device_id *i
        /* Load isp firmware from user space */
        isp->firmware = atomisp_load_firmware(isp);
        if (!isp->firmware) {
-               err = -ENOENT;
-               dev_dbg(&pdev->dev, "Firmware load failed\n");
-               goto load_fw_fail;
+               /* No firmware continue in pm-only mode for S0i3 support */
+               dev_info(&pdev->dev, "Continuing in power-management only mode\n");
+               isp->pm_only = true;
+               atomisp_pm_init(isp);
+               return 0;
        }
 
        err = sh_css_check_firmware_version(isp->dev, isp->firmware->data);
        if (err) {
                dev_dbg(&pdev->dev, "Firmware version check failed\n");
-               goto fw_validation_fail;
+               goto error_release_firmware;
+       }
+
+       err = pcim_enable_device(pdev);
+       if (err) {
+               dev_err(&pdev->dev, "Failed to enable ISP PCI device (%d)\n", err);
+               goto error_release_firmware;
+       }
+
+       err = pcim_iomap_regions(pdev, BIT(ATOM_ISP_PCI_BAR), pci_name(pdev));
+       if (err) {
+               dev_err(&pdev->dev, "Failed to I/O memory remapping (%d)\n", err);
+               goto error_release_firmware;
        }
 
+       isp->base = pcim_iomap_table(pdev)[ATOM_ISP_PCI_BAR];
+
        pci_set_master(pdev);
 
        err = pci_alloc_irq_vectors(pdev, 1, 1, PCI_IRQ_MSI);
        if (err < 0) {
                dev_err(&pdev->dev, "Failed to enable msi (%d)\n", err);
-               goto enable_msi_fail;
+               goto error_release_firmware;
        }
 
        atomisp_msi_irq_init(isp);
 
-       cpu_latency_qos_add_request(&isp->pm_qos, PM_QOS_DEFAULT_VALUE);
-
        /*
         * for MRFLD, Software/firmware needs to write a 1 to bit 0 of
         * the register at CSI_RECEIVER_SELECTION_REG to enable SH CSI
@@ -1459,13 +1447,13 @@ static int atomisp_pci_probe(struct pci_dev *pdev, const struct pci_device_id *i
        err = atomisp_initialize_modules(isp);
        if (err < 0) {
                dev_err(&pdev->dev, "atomisp_initialize_modules (%d)\n", err);
-               goto initialize_modules_fail;
+               goto error_irq_uninit;
        }
 
        err = atomisp_register_entities(isp);
        if (err < 0) {
                dev_err(&pdev->dev, "atomisp_register_entities failed (%d)\n", err);
-               goto register_entities_fail;
+               goto error_uninitialize_modules;
        }
 
        INIT_WORK(&isp->assert_recovery_work, atomisp_assert_recovery_work);
@@ -1473,29 +1461,6 @@ static int atomisp_pci_probe(struct pci_dev *pdev, const struct pci_device_id *i
        /* save the iunit context only once after all the values are init'ed. */
        atomisp_save_iunit_reg(isp);
 
-       /*
-        * The atomisp does not use standard PCI power-management through the
-        * PCI config space. Instead this driver directly tells the P-Unit to
-        * disable the ISP over the IOSF. The standard PCI subsystem pm_ops will
-        * try to access the config space before (resume) / after (suspend) this
-        * driver has turned the ISP on / off, resulting in the following errors:
-        *
-        * "Unable to change power state from D0 to D3hot, device inaccessible"
-        * "Unable to change power state from D3cold to D0, device inaccessible"
-        *
-        * To avoid these errors override the pm_domain so that all the PCI
-        * subsys suspend / resume handling is skipped.
-        */
-       isp->pm_domain.ops.runtime_suspend = atomisp_power_off;
-       isp->pm_domain.ops.runtime_resume = atomisp_power_on;
-       isp->pm_domain.ops.suspend = atomisp_suspend;
-       isp->pm_domain.ops.resume = atomisp_resume;
-
-       dev_pm_domain_set(&pdev->dev, &isp->pm_domain);
-
-       pm_runtime_put_noidle(&pdev->dev);
-       pm_runtime_allow(&pdev->dev);
-
        /* Init ISP memory management */
        hmm_init();
 
@@ -1504,72 +1469,45 @@ static int atomisp_pci_probe(struct pci_dev *pdev, const struct pci_device_id *i
                                        IRQF_SHARED, "isp_irq", isp);
        if (err) {
                dev_err(&pdev->dev, "Failed to request irq (%d)\n", err);
-               goto request_irq_fail;
+               goto error_unregister_entities;
        }
 
        /* Load firmware into ISP memory */
        err = atomisp_css_load_firmware(isp);
        if (err) {
                dev_err(&pdev->dev, "Failed to init css.\n");
-               goto css_init_fail;
+               goto error_free_irq;
        }
        /* Clear FW image from memory */
        release_firmware(isp->firmware);
        isp->firmware = NULL;
        isp->css_env.isp_css_fw.data = NULL;
 
+       atomisp_pm_init(isp);
+
        err = v4l2_async_nf_register(&isp->notifier);
        if (err) {
                dev_err(isp->dev, "failed to register async notifier : %d\n", err);
-               goto css_init_fail;
+               goto error_unload_firmware;
        }
 
-       atomisp_drvfs_init(isp);
-
        return 0;
 
-css_init_fail:
+error_unload_firmware:
+       atomisp_pm_uninit(isp);
+       ia_css_unload_firmware();
+error_free_irq:
        devm_free_irq(&pdev->dev, pdev->irq, isp);
-request_irq_fail:
+error_unregister_entities:
        hmm_cleanup();
-       pm_runtime_get_noresume(&pdev->dev);
-       dev_pm_domain_set(&pdev->dev, NULL);
        atomisp_unregister_entities(isp);
-register_entities_fail:
+error_uninitialize_modules:
        atomisp_uninitialize_modules(isp);
-initialize_modules_fail:
-       cpu_latency_qos_remove_request(&isp->pm_qos);
+error_irq_uninit:
        atomisp_msi_irq_uninit(isp);
        pci_free_irq_vectors(pdev);
-enable_msi_fail:
-fw_validation_fail:
+error_release_firmware:
        release_firmware(isp->firmware);
-load_fw_fail:
-       /*
-        * Switch off ISP, as keeping it powered on would prevent
-        * reaching S0ix states.
-        *
-        * The following lines have been copied from atomisp suspend path
-        */
-
-       pci_read_config_dword(pdev, PCI_INTERRUPT_CTRL, &irq);
-       irq &= BIT(INTR_IIR);
-       pci_write_config_dword(pdev, PCI_INTERRUPT_CTRL, irq);
-
-       pci_read_config_dword(pdev, PCI_INTERRUPT_CTRL, &irq);
-       irq &= ~BIT(INTR_IER);
-       pci_write_config_dword(pdev, PCI_INTERRUPT_CTRL, irq);
-
-       atomisp_msi_irq_uninit(isp);
-
-       /* Address later when we worry about the ...field chips */
-       if (IS_ENABLED(CONFIG_PM) && atomisp_mrfld_power(isp, false))
-               dev_err(&pdev->dev, "Failed to switch off ISP\n");
-
-atomisp_dev_alloc_fail:
-       pcim_iounmap_regions(pdev, BIT(ATOM_ISP_PCI_BAR));
-
-ioremap_fail:
        return err;
 }
 
@@ -1577,22 +1515,21 @@ static void atomisp_pci_remove(struct pci_dev *pdev)
 {
        struct atomisp_device *isp = pci_get_drvdata(pdev);
 
-       dev_info(&pdev->dev, "Removing atomisp driver\n");
+       atomisp_pm_uninit(isp);
 
-       atomisp_drvfs_exit();
+       if (isp->pm_only)
+               return;
 
+       /* Undo ia_css_init() from atomisp_power_on() */
+       atomisp_css_uninit(isp);
        ia_css_unload_firmware();
+       devm_free_irq(&pdev->dev, pdev->irq, isp);
        hmm_cleanup();
 
-       pm_runtime_forbid(&pdev->dev);
-       pm_runtime_get_noresume(&pdev->dev);
-       dev_pm_domain_set(&pdev->dev, NULL);
-       cpu_latency_qos_remove_request(&isp->pm_qos);
-
-       atomisp_msi_irq_uninit(isp);
        atomisp_unregister_entities(isp);
-
-       release_firmware(isp->firmware);
+       atomisp_uninitialize_modules(isp);
+       atomisp_msi_irq_uninit(isp);
+       pci_free_irq_vectors(pdev);
 }
 
 static const struct pci_device_id atomisp_pci_tbl[] = {
@@ -1608,11 +1545,12 @@ static const struct pci_device_id atomisp_pci_tbl[] = {
        {PCI_DEVICE(PCI_VENDOR_ID_INTEL, ATOMISP_PCI_DEVICE_SOC_CHT)},
        {0,}
 };
-
 MODULE_DEVICE_TABLE(pci, atomisp_pci_tbl);
 
-
 static struct pci_driver atomisp_pci_driver = {
+       .driver = {
+               .dev_groups = dbg_attr_groups,
+       },
        .name = "atomisp-isp2",
        .id_table = atomisp_pci_tbl,
        .probe = atomisp_pci_probe,
index 0579deac55350fdd64b266b073b9e63a9b59631f..e9846951f4ed9b901c2e72384bd919b387182913 100644 (file)
@@ -73,7 +73,7 @@ uint32_t ia_css_circbuf_pop(
 
 /**
  * @brief Extract a value out of the circular buffer.
- * Get a value at an arbitrary poistion in the circular
+ * Get a value at an arbitrary position in the circular
  * buffer. The user should call "ia_css_circbuf_is_empty()"
  * to avoid accessing to an empty buffer.
  *
index d9f7c143794dd04ac646b00dd2821338d1ec6266..198c9f6e61911668b2c9d45877ebe381a63a3f24 100644 (file)
@@ -24,7 +24,7 @@
  **********************************************************************/
 /*
  * @brief Read the oldest element from the circular buffer.
- * Read the oldest element WITHOUT checking whehter the
+ * Read the oldest element WITHOUT checking whether the
  * circular buffer is empty or not. The oldest element is
  * also removed out from the circular buffer.
  *
@@ -129,7 +129,7 @@ uint32_t ia_css_circbuf_extract(ia_css_circbuf_t *cb, int offset)
        u32 src_pos;
        u32 dest_pos;
 
-       /* get the maximum offest */
+       /* get the maximum offset */
        max_offset = ia_css_circbuf_get_offset(cb, cb->desc->start, cb->desc->end);
        max_offset--;
 
@@ -207,7 +207,7 @@ bool ia_css_circbuf_increase_size(
 {
        u8 curr_size;
        u8 curr_end;
-       unsigned int i = 0;
+       unsigned int i;
 
        if (!cb || sz_delta == 0)
                return false;
index d6e52b4971d689dcf92b729c97bf224b933a3363..f6838a8fc9d5d324d78169d6a94ed40cd9b34097 100644 (file)
@@ -84,7 +84,7 @@ struct ia_css_blob_info {
                memory_offsets;  /** offset wrt hdr in bytes */
        u32 prog_name_offset;  /** offset wrt hdr in bytes */
        u32 size;                       /** Size of blob */
-       u32 padding_size;       /** total cummulative of bytes added due to section alignment */
+       u32 padding_size;       /** total accumulation of bytes added due to section alignment */
        u32 icache_source;      /** Position of icache in blob */
        u32 icache_size;        /** Size of icache section */
        u32 icache_padding;/** bytes added due to icache section alignment */
@@ -408,7 +408,7 @@ struct ia_css_acc_sp {
 };
 
 /* Acceleration firmware descriptor.
-  * This descriptor descibes either SP code (stand-alone), or
+  * This descriptor describes either SP code (stand-alone), or
   * ISP code (a separate pipeline stage).
   */
 struct ia_css_acc_fw_hdr {
index 88f031a63ba269432fd48b14abc813b2c32bc0ce..6a473459b346a9b3fc104dcce1886c607284bc57 100644 (file)
  *                             environment in which the CSS code runs. This is
  *                             used for host side memory access and message
  *                             printing. May not be NULL.
- * @param[in]  fw              Firmware package containing the firmware for all
- *                             predefined ISP binaries.
- *                             if fw is NULL the firmware must be loaded before
- *                             through a call of ia_css_load_firmware
  * @param[in]  l1_base         Base index (isp2400)
  *                              of the L1 page table. This is a physical
  *                              address or index.
  * @param[in]  irq_type        The type of interrupt to be used (edge or level)
- * @return                             Returns -EINVAL in case of any
+ * @return                     Returns -EINVAL in case of any
  *                             errors and 0 otherwise.
  *
  * This function initializes the API which includes allocating and initializing
- * internal data structures. This also interprets the firmware package. All
- * contents of this firmware package are copied into local data structures, so
- * the fw pointer could be freed after this function completes.
+ * internal data structures.
+ * ia_css_load_firmware() must be called to load the firmware before calling
+ * this function.
  */
 int ia_css_init(struct device           *dev,
-                           const struct ia_css_env *env,
-                           const struct ia_css_fw  *fw,
-                           u32                     l1_base,
-                           enum ia_css_irq_type    irq_type);
+               const struct ia_css_env *env,
+               u32                     l1_base,
+               enum ia_css_irq_type    irq_type);
 
 /* @brief Un-initialize the CSS API.
  * @return     None
  *
- * This function deallocates all memory that has been allocated by the CSS API
- * Exception: if you explicitly loaded firmware through ia_css_load_firmware
- * you need to call ia_css_unload_firmware to deallocate the memory reserved
- * for the firmware.
- * After this function is called, no other CSS functions should be called
- * with the exception of ia_css_init which will re-initialize the CSS code,
- * ia_css_unload_firmware to unload the firmware or ia_css_load_firmware
- * to load new firmware
+ * This function deallocates all memory that has been allocated by the CSS API.
+ * After this function is called, no other CSS functions should be called.
  */
 void
 ia_css_uninit(void);
index 01d2faf557cf8771b7e792dd8049733b7068a7f2..d3a66128b4de6532f6edc868919ecb8987cb6bed 100644 (file)
@@ -46,10 +46,6 @@ struct device;
  * This function interprets the firmware package. All
  * contents of this firmware package are copied into local data structures, so
  * the fw pointer could be freed after this function completes.
- *
- * Rationale for this function is that it can be called before ia_css_init, and thus
- * speeds up ia_css_init (ia_css_init is called each time a stream is created but the
- * firmware only needs to be loaded once).
  */
 int
 ia_css_load_firmware(struct device *dev, const struct ia_css_env *env,
@@ -61,6 +57,8 @@ ia_css_load_firmware(struct device *dev, const struct ia_css_env *env,
  * This function unloads the firmware loaded by ia_css_load_firmware.
  * It is pointless to call this function if no firmware is loaded,
  * but it won't harm. Use this to deallocate all memory associated with the firmware.
+ * This function may only be called when the CSS API is in uninitialized state
+ * (e.g. after calling ia_css_uninit()).
  */
 void
 ia_css_unload_firmware(void);
index 26b1b3c8ba625d2dcd091a29ee7cf5b69dd0d860..00e2fd1f9647ad10e89e9a83a5e222194540974e 100644 (file)
@@ -84,11 +84,11 @@ enum ia_css_irq_info {
        IA_CSS_IRQ_INFO_ISP_BINARY_STATISTICS_READY   = BIT(17),
        /** ISP binary statistics are ready */
        IA_CSS_IRQ_INFO_INPUT_SYSTEM_ERROR            = BIT(18),
-       /** the input system in in error */
+       /** the input system is in error */
        IA_CSS_IRQ_INFO_IF_ERROR                      = BIT(19),
-       /** the input formatter in in error */
+       /** the input formatter is in error */
        IA_CSS_IRQ_INFO_DMA_ERROR                     = BIT(20),
-       /** the dma in in error */
+       /** the dma is in error */
        IA_CSS_IRQ_INFO_ISYS_EVENTS_READY             = BIT(21),
        /** end-of-frame events are ready in the isys_event queue */
 };
index 175c301ee96ac05110e0609f65443cedd6c1bdb6..ecc98686f5cf5369ba15fd820fadb767a57e30fa 100644 (file)
@@ -57,9 +57,9 @@ struct ia_css_hdr_exclusion_params {
 };
 
 /**
- * \brief HDR public paramterers.
+ * \brief HDR public parameters.
  * \details Struct with all parameters for HDR that can be seet from
- * the CSS API. Currenly, only test parameters are defined.
+ * the CSS API. Currently, only test parameters are defined.
  */
 struct ia_css_hdr_config {
        struct ia_css_hdr_irradiance_params irradiance; /** HDR irradiance parameters */
index 946b074e82886b8b398baf164df9f8822e1dcb76..d25bf59273baad97600fa6d6e95390920b1ad2dc 100644 (file)
@@ -19,7 +19,7 @@
 
 /* Multi-Axes Color Correction table for ISP1.
  *     64values = 2x2matrix for 16area, [s2.13]
- *     ineffective: 16 of "identity 2x2 matix" {8192,0,0,8192}
+ *     ineffective: 16 of "identity 2x2 matrix" {8192,0,0,8192}
  */
 const struct ia_css_macc_table default_macc_table = {
        {
@@ -36,7 +36,7 @@ const struct ia_css_macc_table default_macc_table = {
 
 /* Multi-Axes Color Correction table for ISP2.
  *     64values = 2x2matrix for 16area, [s1.12]
- *     ineffective: 16 of "identity 2x2 matix" {4096,0,0,4096}
+ *     ineffective: 16 of "identity 2x2 matrix" {4096,0,0,4096}
  */
 const struct ia_css_macc_table default_macc2_table = {
        {
index 61f23814e2fdb9eb297567b93f3cc6fd1c0cfe31..3ff61faf0621a96d50906e6e84785b35a537f283 100644 (file)
@@ -19,7 +19,7 @@
 #define                N_CSI_PORTS (3)
 //AM: Use previous define for this.
 
-//MIPI allows upto 4 channels.
+//MIPI allows up to 4 channels.
 #define                N_CHANNELS  (4)
 // 12KB = 256bit x 384 words
 #define                IB_CAPACITY_IN_WORDS (384)
index 447c7c5c55a1f74162c493ff75fa69e9aac44498..523c948923f315914db1d71d6da5def69e0679fa 100644 (file)
@@ -163,7 +163,7 @@ STORAGE_CLASS_INPUT_SYSTEM_H void receiver_port_reg_store(
     const hrt_address                  reg,
     const hrt_data                             value);
 
-/*! Read from a control register PORT[port_ID] of of RECEIVER[ID]
+/*! Read from a control register PORT[port_ID] of RECEIVER[ID]
 
  \param        ID[in]                          RECEIVER identifier
  \param        port_ID[in]                     mipi PORT identifier
index 0f3729e55e14a6fb3b03b439fcff22552530350b..130662f8e76845b1b57cdf134c11ccb4e1987f7b 100644 (file)
@@ -534,7 +534,7 @@ ia_css_binary_uninit(void) {
 static int
 binary_grid_deci_factor_log2(int width, int height)
 {
-       /* 3A/Shading decimation factor spcification (at August 2008)
+       /* 3A/Shading decimation factor specification (at August 2008)
         * ------------------------------------------------------------------
         * [Image Width (BQ)] [Decimation Factor (BQ)] [Resulting grid cells]
         * 1280 ?c             32                       40 ?c
index 3d8741e7d5ca792d484200b1905c17b76dd3bc07..9d2b5f9cbb146fd7693d618c9a0a167e8973f61a 100644 (file)
@@ -693,7 +693,7 @@ static void pipeline_init_defaults(
 static void ia_css_pipeline_set_zoom_stage(struct ia_css_pipeline *pipeline)
 {
        struct ia_css_pipeline_stage *stage = NULL;
-       int err = 0;
+       int err;
 
        assert(pipeline);
        if (pipeline->pipe_id == IA_CSS_PIPE_ID_PREVIEW) {
index 2f1c2df59f71910be7dec3e58447a326e7f5701d..0e430388b331b50276e5c748674c153b6b1e93da 100644 (file)
@@ -81,7 +81,7 @@ int ia_css_queue_uninit(ia_css_queue_t *qhandle)
 
 int ia_css_queue_enqueue(ia_css_queue_t *qhandle, uint32_t item)
 {
-       int error = 0;
+       int error;
 
        if (!qhandle)
                return -EINVAL;
@@ -123,7 +123,7 @@ int ia_css_queue_enqueue(ia_css_queue_t *qhandle, uint32_t item)
 
                /* c. Store the queue object */
                /* Set only fields requiring update with
-                * valid value. Avoids uncessary calls
+                * valid value. Avoids unnecessary calls
                 * to load/store functions
                 */
                ignore_desc_flags = QUEUE_IGNORE_SIZE_START_STEP_FLAGS;
@@ -138,7 +138,7 @@ int ia_css_queue_enqueue(ia_css_queue_t *qhandle, uint32_t item)
 
 int ia_css_queue_dequeue(ia_css_queue_t *qhandle, uint32_t *item)
 {
-       int error = 0;
+       int error;
 
        if (!qhandle || NULL == item)
                return -EINVAL;
@@ -180,7 +180,7 @@ int ia_css_queue_dequeue(ia_css_queue_t *qhandle, uint32_t *item)
 
                /* c. Store the queue object */
                /* Set only fields requiring update with
-                * valid value. Avoids uncessary calls
+                * valid value. Avoids unnecessary calls
                 * to load/store functions
                 */
                ignore_desc_flags = QUEUE_IGNORE_SIZE_END_STEP_FLAGS;
@@ -193,7 +193,7 @@ int ia_css_queue_dequeue(ia_css_queue_t *qhandle, uint32_t *item)
 
 int ia_css_queue_is_full(ia_css_queue_t *qhandle, bool *is_full)
 {
-       int error = 0;
+       int error;
 
        if ((!qhandle) || (!is_full))
                return -EINVAL;
@@ -225,7 +225,7 @@ int ia_css_queue_is_full(ia_css_queue_t *qhandle, bool *is_full)
 
 int ia_css_queue_get_free_space(ia_css_queue_t *qhandle, uint32_t *size)
 {
-       int error = 0;
+       int error;
 
        if ((!qhandle) || (!size))
                return -EINVAL;
@@ -257,7 +257,7 @@ int ia_css_queue_get_free_space(ia_css_queue_t *qhandle, uint32_t *size)
 
 int ia_css_queue_get_used_space(ia_css_queue_t *qhandle, uint32_t *size)
 {
-       int error = 0;
+       int error;
 
        if ((!qhandle) || (!size))
                return -EINVAL;
@@ -289,8 +289,8 @@ int ia_css_queue_get_used_space(ia_css_queue_t *qhandle, uint32_t *size)
 
 int ia_css_queue_peek(ia_css_queue_t *qhandle, u32 offset, uint32_t *element)
 {
-       u32 num_elems = 0;
-       int error = 0;
+       u32 num_elems;
+       int error;
 
        if ((!qhandle) || (!element))
                return -EINVAL;
@@ -338,7 +338,7 @@ int ia_css_queue_peek(ia_css_queue_t *qhandle, u32 offset, uint32_t *element)
 
 int ia_css_queue_is_empty(ia_css_queue_t *qhandle, bool *is_empty)
 {
-       int error = 0;
+       int error;
 
        if ((!qhandle) || (!is_empty))
                return -EINVAL;
@@ -370,7 +370,7 @@ int ia_css_queue_is_empty(ia_css_queue_t *qhandle, bool *is_empty)
 
 int ia_css_queue_get_size(ia_css_queue_t *qhandle, uint32_t *size)
 {
-       int error = 0;
+       int error;
 
        if ((!qhandle) || (!size))
                return -EINVAL;
index 2e07dab8bf519571e9c1da438ae35fba6fe8d0c2..1f24db77fe387819c6c5db1eb389f21c67036dc6 100644 (file)
@@ -198,7 +198,7 @@ void rmgr_push_handle(struct ia_css_rmgr_vbuf_pool *pool,
                      struct ia_css_rmgr_vbuf_handle **handle)
 {
        u32 i;
-       bool succes = false;
+       bool success = false;
 
        assert(pool);
        assert(pool->recycle);
@@ -208,11 +208,11 @@ void rmgr_push_handle(struct ia_css_rmgr_vbuf_pool *pool,
                if (!pool->handles[i]) {
                        ia_css_rmgr_refcount_retain_vbuf(handle);
                        pool->handles[i] = *handle;
-                       succes = true;
+                       success = true;
                        break;
                }
        }
-       assert(succes);
+       assert(success);
 }
 
 /*
index f35c90809414cb6d5e235e16ebdca5dd3ecb047d..938a4ea89c59087f180aed9a8dae7316d61f53b2 100644 (file)
@@ -174,8 +174,6 @@ static struct sh_css_hmm_buffer_record hmm_buffer_record[MAX_HMM_BUFFER_NUM];
 
 #define GPIO_FLASH_PIN_MASK BIT(HIVE_GPIO_STROBE_TRIGGER_PIN)
 
-static bool fw_explicitly_loaded;
-
 /*
  * Local prototypes
  */
@@ -1360,7 +1358,6 @@ ia_css_unload_firmware(void)
                ia_css_binary_uninit();
                sh_css_unload_firmware();
        }
-       fw_explicitly_loaded = false;
 }
 
 static void
@@ -1405,13 +1402,9 @@ ia_css_load_firmware(struct device *dev, const struct ia_css_env *env,
                my_css.flush = env->cpu_mem_env.flush;
        }
 
-       ia_css_unload_firmware(); /* in case we are called twice */
        err = sh_css_load_firmware(dev, fw->data, fw->bytes);
-       if (!err) {
+       if (!err)
                err = ia_css_binary_init_infos();
-               if (!err)
-                       fw_explicitly_loaded = true;
-       }
 
        ia_css_debug_dtrace(IA_CSS_DEBUG_TRACE, "ia_css_load_firmware() leave\n");
        return err;
@@ -1419,9 +1412,7 @@ ia_css_load_firmware(struct device *dev, const struct ia_css_env *env,
 
 int
 ia_css_init(struct device *dev, const struct ia_css_env *env,
-           const struct ia_css_fw  *fw,
-           u32                 mmu_l1_base,
-           enum ia_css_irq_type     irq_type)
+           u32 mmu_l1_base, enum ia_css_irq_type irq_type)
 {
        int err;
        ia_css_spctrl_cfg spctrl_cfg;
@@ -1466,8 +1457,6 @@ ia_css_init(struct device *dev, const struct ia_css_env *env,
        /* Check struct ia_css_init_dmem_cfg */
        COMPILATION_ERROR_IF(sizeof(struct ia_css_sp_init_dmem_cfg)             != SIZE_OF_IA_CSS_SP_INIT_DMEM_CFG_STRUCT);
 
-       if (!fw && !fw_explicitly_loaded)
-               return -EINVAL;
        if (!env)
                return -EINVAL;
 
@@ -1543,22 +1532,7 @@ ia_css_init(struct device *dev, const struct ia_css_env *env,
                IA_CSS_LEAVE_ERR(err);
                return err;
        }
-       if (fw) {
-               ia_css_unload_firmware(); /* in case we already had firmware loaded */
-               err = sh_css_load_firmware(dev, fw->data, fw->bytes);
-               if (err) {
-                       IA_CSS_LEAVE_ERR(err);
-                       return err;
-               }
-               err = ia_css_binary_init_infos();
-               if (err) {
-                       IA_CSS_LEAVE_ERR(err);
-                       return err;
-               }
-               fw_explicitly_loaded = false;
 
-               my_css_save.loaded_fw = (struct ia_css_fw *)fw;
-       }
        if (!sh_css_setup_spctrl_config(&sh_css_sp_fw, SP_PROG_NAME, &spctrl_cfg))
                return -EINVAL;
 
@@ -2163,9 +2137,6 @@ ia_css_uninit(void)
                ifmtr_set_if_blocking_mode_reset = true;
        }
 
-       if (!fw_explicitly_loaded)
-               ia_css_unload_firmware();
-
        ia_css_spctrl_unload_fw(SP0_ID);
        sh_css_sp_set_sp_running(false);
        /* check and free any remaining mipi frames */
@@ -3635,7 +3606,7 @@ ia_css_pipe_enqueue_buffer(struct ia_css_pipe *pipe,
 
        assert(pipeline || pipe_id == IA_CSS_PIPE_ID_COPY);
 
-       assert(sizeof(NULL) <= sizeof(ddr_buffer.kernel_ptr));
+       assert(sizeof(void *) <= sizeof(ddr_buffer.kernel_ptr));
        ddr_buffer.kernel_ptr = HOST_ADDRESS(NULL);
        ddr_buffer.cookie_ptr = buffer->driver_cookie;
        ddr_buffer.timing_data = buffer->timing_data;
index 7eb10b226f0a9ca91687387ae1c4ba592f49cff4..2afde974e75d24215b49c89e99c46693ec516acc 100644 (file)
@@ -131,7 +131,7 @@ RGB[0,8191],coef[-8192,8191] -> RGB[0,8191]
  * invalid rows/columns that result from filter initialization are skipped. */
 #define SH_CSS_MIN_DVS_ENVELOPE           12U
 
-/* The FPGA system (vec_nelems == 16) only supports upto 5MP */
+/* The FPGA system (vec_nelems == 16) only supports up to 5MP */
 #define SH_CSS_MAX_SENSOR_WIDTH           4608
 #define SH_CSS_MAX_SENSOR_HEIGHT          3450
 
index b7c1e164ee2449952a88c39e145d5910ee699efd..6e11fd7719384c47abd1108eadcf15ad05a4538f 100644 (file)
@@ -174,7 +174,7 @@ ia_css_mipi_frame_calculate_size(const unsigned int width,
        mem_words = ((embedded_data_size_words + 7) >> 3) +
        mem_words_for_first_line +
        (((height + 1) >> 1) - 1) * mem_words_per_odd_line +
-       /* ceil (height/2) - 1 (first line is calculated separatelly) */
+       /* ceil (height/2) - 1 (first line is calculated separately) */
        (height      >> 1) * mem_words_per_even_line + /* floor(height/2) */
        mem_words_for_EOF;
 
@@ -537,7 +537,7 @@ send_mipi_frames(struct ia_css_pipe *pipe)
 
        /* Hand-over the SP-internal mipi buffers */
        for (i = 0; i < my_css.num_mipi_frames[port]; i++) {
-               /* Need to include the ofset for port. */
+               /* Need to include the offset for port. */
                sh_css_update_host2sp_mipi_frame(port * NUM_MIPI_FRAMES_PER_STREAM + i,
                                                 my_css.mipi_frames[port][i]);
                sh_css_update_host2sp_mipi_metadata(port * NUM_MIPI_FRAMES_PER_STREAM + i,
index 1fd39a2fca98ae70ca74ea3768aeb5d7c95a51da..95cca281e8a378c6244bd25d5b81c392712361aa 100644 (file)
@@ -803,6 +803,7 @@ static int ipu_csc_scaler_release(struct file *file)
 
        dev_dbg(priv->dev, "Releasing instance %p\n", ctx);
 
+       v4l2_ctrl_handler_free(&ctx->ctrl_hdlr);
        v4l2_m2m_ctx_release(ctx->fh.m2m_ctx);
        v4l2_fh_del(&ctx->fh);
        v4l2_fh_exit(&ctx->fh);
index e28a33d9dec75572fc46fe3129b749f0e74cfce4..ccbc0371fba2e4a42cd76ac805e3468bf726c0fa 100644 (file)
@@ -401,7 +401,7 @@ int imx_media_fim_add_controls(struct imx_media_fim *fim)
 {
        /* add the FIM controls to the calling subdev ctrl handler */
        return v4l2_ctrl_add_handler(fim->sd->ctrl_handler,
-                                    &fim->ctrl_handler, NULL, false);
+                                    &fim->ctrl_handler, NULL, true);
 }
 
 /* Called by the subdev in its subdev registered callback */
index caa358e0bae40ca0f9f70323f4c271c9d5334ca8..4aa2797f5e3cf8778fa29555ce3f44f8c101bfd9 100644 (file)
@@ -2485,11 +2485,9 @@ struct ipu3_uapi_anr_config {
  *             &ipu3_uapi_yuvp1_y_ee_nr_config
  * @yds:       y down scaler config. See &ipu3_uapi_yuvp1_yds_config
  * @chnr:      chroma noise reduction config. See &ipu3_uapi_yuvp1_chnr_config
- * @reserved1: reserved
  * @yds2:      y channel down scaler config. See &ipu3_uapi_yuvp1_yds_config
  * @tcc:       total color correction config as defined in struct
  *             &ipu3_uapi_yuvp2_tcc_static_config
- * @reserved2: reserved
  * @anr:       advanced noise reduction config.See &ipu3_uapi_anr_config
  * @awb_fr:    AWB filter response config. See ipu3_uapi_awb_fr_config
  * @ae:        auto exposure config  As specified by &ipu3_uapi_ae_config
@@ -2724,7 +2722,6 @@ struct ipu3_uapi_obgrid_param {
  * @acc_ae: 0 = no update, 1 = update.
  * @acc_af: 0 = no update, 1 = update.
  * @acc_awb: 0 = no update, 1 = update.
- * @__acc_osys: 0 = no update, 1 = update.
  * @reserved3: Not used.
  * @lin_vmem_params: 0 = no update, 1 = update.
  * @tnr3_vmem_params: 0 = no update, 1 = update.
index a66f034380c057e5e6a97c23497ab225b31f3761..3df58eb3e8822b5eea8c3e70937841be6f2ca31c 100644 (file)
@@ -1069,6 +1069,11 @@ static int imgu_v4l2_subdev_register(struct imgu_device *imgu,
        struct imgu_media_pipe *imgu_pipe = &imgu->imgu_pipe[pipe];
 
        /* Initialize subdev media entity */
+       imgu_sd->subdev.entity.ops = &imgu_media_ops;
+       for (i = 0; i < IMGU_NODE_NUM; i++) {
+               imgu_sd->subdev_pads[i].flags = imgu_pipe->nodes[i].output ?
+                       MEDIA_PAD_FL_SINK : MEDIA_PAD_FL_SOURCE;
+       }
        r = media_entity_pads_init(&imgu_sd->subdev.entity, IMGU_NODE_NUM,
                                   imgu_sd->subdev_pads);
        if (r) {
@@ -1076,11 +1081,6 @@ static int imgu_v4l2_subdev_register(struct imgu_device *imgu,
                        "failed initialize subdev media entity (%d)\n", r);
                return r;
        }
-       imgu_sd->subdev.entity.ops = &imgu_media_ops;
-       for (i = 0; i < IMGU_NODE_NUM; i++) {
-               imgu_sd->subdev_pads[i].flags = imgu_pipe->nodes[i].output ?
-                       MEDIA_PAD_FL_SINK : MEDIA_PAD_FL_SOURCE;
-       }
 
        /* Initialize subdev */
        v4l2_subdev_init(&imgu_sd->subdev, &imgu_subdev_ops);
@@ -1177,15 +1177,15 @@ static int imgu_v4l2_node_setup(struct imgu_device *imgu, unsigned int pipe,
        }
 
        /* Initialize media entities */
+       node->vdev_pad.flags = node->output ?
+               MEDIA_PAD_FL_SOURCE : MEDIA_PAD_FL_SINK;
+       vdev->entity.ops = NULL;
        r = media_entity_pads_init(&vdev->entity, 1, &node->vdev_pad);
        if (r) {
                dev_err(dev, "failed initialize media entity (%d)\n", r);
                mutex_destroy(&node->lock);
                return r;
        }
-       node->vdev_pad.flags = node->output ?
-               MEDIA_PAD_FL_SOURCE : MEDIA_PAD_FL_SINK;
-       vdev->entity.ops = NULL;
 
        /* Initialize vbq */
        vbq->type = node->vdev_fmt.type;
index 0906b8fb5cc60eba0d135a52449c7c69cd55670e..2586851777000ef4afaf91824ea3f32168a94dd1 100644 (file)
@@ -101,7 +101,6 @@ struct amvdec_core {
  * @conf_esparser: mandatory call to let the vdec configure the ESPARSER
  * @vififo_level: mandatory call to get the current amount of data
  *               in the VIFIFO
- * @use_offsets: mandatory call. Returns 1 if the VDEC supports vififo offsets
  */
 struct amvdec_ops {
        int (*start)(struct amvdec_session *sess);
index 70c24b050a1b5e5ba086485849800f2a68a64e07..ec5169e7b3918bf2ff1ade1c294027cf816cd0e5 100644 (file)
@@ -20,28 +20,28 @@ static const struct stfcamss_format_info stf_wr_fmts[] = {
                .pixelformat = V4L2_PIX_FMT_SRGGB10,
                .planes = 1,
                .vsub = { 1 },
-               .bpp = 10,
+               .bpp = 16,
        },
        {
                .code = MEDIA_BUS_FMT_SGRBG10_1X10,
                .pixelformat = V4L2_PIX_FMT_SGRBG10,
                .planes = 1,
                .vsub = { 1 },
-               .bpp = 10,
+               .bpp = 16,
        },
        {
                .code = MEDIA_BUS_FMT_SGBRG10_1X10,
                .pixelformat = V4L2_PIX_FMT_SGBRG10,
                .planes = 1,
                .vsub = { 1 },
-               .bpp = 10,
+               .bpp = 16,
        },
        {
                .code = MEDIA_BUS_FMT_SBGGR10_1X10,
                .pixelformat = V4L2_PIX_FMT_SBGGR10,
                .planes = 1,
                .vsub = { 1 },
-               .bpp = 10,
+               .bpp = 16,
        },
 };
 
index 52e94c8f2f01a8c3542dce541cc57b989488ed1c..780da4a8b5af18f1128c6856b975341aec32a275 100644 (file)
@@ -427,11 +427,11 @@ static int cedrus_h265_setup(struct cedrus_ctx *ctx, struct cedrus_run *run)
        unsigned int ctb_addr_x, ctb_addr_y;
        struct cedrus_buffer *cedrus_buf;
        dma_addr_t src_buf_addr;
-       dma_addr_t src_buf_end_addr;
        u32 chroma_log2_weight_denom;
        u32 num_entry_point_offsets;
        u32 output_pic_list_index;
        u32 pic_order_cnt[2];
+       size_t slice_bytes;
        u8 padding;
        int count;
        u32 reg;
@@ -443,6 +443,7 @@ static int cedrus_h265_setup(struct cedrus_ctx *ctx, struct cedrus_run *run)
        pred_weight_table = &slice_params->pred_weight_table;
        num_entry_point_offsets = slice_params->num_entry_point_offsets;
        cedrus_buf = vb2_to_cedrus_buffer(&run->dst->vb2_buf);
+       slice_bytes = vb2_get_plane_payload(&run->src->vb2_buf, 0);
 
        /*
         * If entry points offsets are present, we should get them
@@ -490,7 +491,7 @@ static int cedrus_h265_setup(struct cedrus_ctx *ctx, struct cedrus_run *run)
 
        cedrus_write(dev, VE_DEC_H265_BITS_OFFSET, 0);
 
-       reg = slice_params->bit_size;
+       reg = slice_bytes * 8;
        cedrus_write(dev, VE_DEC_H265_BITS_LEN, reg);
 
        /* Source beginning and end addresses. */
@@ -504,10 +505,7 @@ static int cedrus_h265_setup(struct cedrus_ctx *ctx, struct cedrus_run *run)
 
        cedrus_write(dev, VE_DEC_H265_BITS_ADDR, reg);
 
-       src_buf_end_addr = src_buf_addr +
-                          DIV_ROUND_UP(slice_params->bit_size, 8);
-
-       reg = VE_DEC_H265_BITS_END_ADDR_BASE(src_buf_end_addr);
+       reg = VE_DEC_H265_BITS_END_ADDR_BASE(src_buf_addr + slice_bytes);
        cedrus_write(dev, VE_DEC_H265_BITS_END_ADDR, reg);
 
        /* Coding tree block address */
index 8e4035ff367485a17de0e2e609cc9ad4ff2457e3..761c511aea07c9dfc523383a7aa6b5b86fc15e4a 100644 (file)
@@ -83,7 +83,7 @@ static int tcm_loop_show_info(struct seq_file *m, struct Scsi_Host *host)
 static int tcm_loop_driver_probe(struct device *);
 static void tcm_loop_driver_remove(struct device *);
 
-static struct bus_type tcm_loop_lld_bus = {
+static const struct bus_type tcm_loop_lld_bus = {
        .name                   = "tcm_loop_bus",
        .probe                  = tcm_loop_driver_probe,
        .remove                 = tcm_loop_driver_remove,
index d45f2c1ff34145b0d7ed6b748ec0d25daaeaa9fa..1c9d983a5a1fa2b0be1faa1a7dc41d1861e95e88 100644 (file)
@@ -95,7 +95,7 @@ static int tc_bus_match(struct device *dev, struct device_driver *drv)
        return 0;
 }
 
-struct bus_type tc_bus_type = {
+const struct bus_type tc_bus_type = {
        .name   = "tc",
        .match  = tc_bus_match,
 };
index aac80b69a069c38b12f85c8bcb4475e36729d83e..afbf7738c7c47c3377560a3288fa95083dd1c6b0 100644 (file)
@@ -309,7 +309,7 @@ static void mips_ejtag_fdc_console_write(struct console *c, const char *s,
        unsigned int i, buf_len, cpu;
        bool done_cr = false;
        char buf[4];
-       const char *buf_ptr = buf;
+       const u8 *buf_ptr = buf;
        /* Number of bytes of input data encoded up to each byte in buf */
        u8 inc[4];
 
index c8bf08c19c647a761e8c4b99fb2453c726e96c18..732d821db4f805d539e732367b9ef9df1afb7cc9 100644 (file)
@@ -1507,12 +1507,12 @@ static int pmz_attach(struct macio_dev *mdev, const struct of_device_id *match)
  * That one should not be called, macio isn't really a hotswap device,
  * we don't expect one of those serial ports to go away...
  */
-static int pmz_detach(struct macio_dev *mdev)
+static void pmz_detach(struct macio_dev *mdev)
 {
        struct uart_pmac_port   *uap = dev_get_drvdata(&mdev->ofdev.dev);
        
        if (!uap)
-               return -ENODEV;
+               return;
 
        uart_remove_one_port(&pmz_uart_reg, &uap->port);
 
@@ -1523,11 +1523,8 @@ static int pmz_detach(struct macio_dev *mdev)
        dev_set_drvdata(&mdev->ofdev.dev, NULL);
        uap->dev = NULL;
        uap->port.dev = NULL;
-       
-       return 0;
 }
 
-
 static int pmz_suspend(struct macio_dev *mdev, pm_message_t pm_state)
 {
        struct uart_pmac_port *uap = dev_get_drvdata(&mdev->ofdev.dev);
index 0787456c2b892f773bba5cf66c09ac7918787852..8db81f1a12d5fc8f39ac7a1010d6a0d1b55a16c9 100644 (file)
@@ -258,9 +258,7 @@ EXPORT_SYMBOL_GPL(ufshcd_mcq_write_cqis);
  * Current MCQ specification doesn't provide a Task Tag or its equivalent in
  * the Completion Queue Entry. Find the Task Tag using an indirect method.
  */
-static int ufshcd_mcq_get_tag(struct ufs_hba *hba,
-                                    struct ufs_hw_queue *hwq,
-                                    struct cq_entry *cqe)
+static int ufshcd_mcq_get_tag(struct ufs_hba *hba, struct cq_entry *cqe)
 {
        u64 addr;
 
@@ -278,7 +276,7 @@ static void ufshcd_mcq_process_cqe(struct ufs_hba *hba,
                                   struct ufs_hw_queue *hwq)
 {
        struct cq_entry *cqe = ufshcd_mcq_cur_cqe(hwq);
-       int tag = ufshcd_mcq_get_tag(hba, hwq, cqe);
+       int tag = ufshcd_mcq_get_tag(hba, cqe);
 
        if (cqe->command_desc_base_addr) {
                ufshcd_compl_one_cqe(hba, tag, cqe);
@@ -399,6 +397,12 @@ void ufshcd_mcq_enable_esi(struct ufs_hba *hba)
 }
 EXPORT_SYMBOL_GPL(ufshcd_mcq_enable_esi);
 
+void ufshcd_mcq_enable(struct ufs_hba *hba)
+{
+       ufshcd_rmwl(hba, MCQ_MODE_SELECT, MCQ_MODE_SELECT, REG_UFS_MEM_CFG);
+}
+EXPORT_SYMBOL_GPL(ufshcd_mcq_enable);
+
 void ufshcd_mcq_config_esi(struct ufs_hba *hba, struct msi_msg *msg)
 {
        ufshcd_writel(hba, msg->address_lo, REG_UFS_ESILBA);
index e6d12289e0170ef8c12a3e3628fe65a2468b5e7c..3d049967f6bc4a089bed6391a3c62e2c0a579fc8 100644 (file)
@@ -405,6 +405,53 @@ static ssize_t wb_flush_threshold_store(struct device *dev,
        return count;
 }
 
+/**
+ * pm_qos_enable_show - sysfs handler to show pm qos enable value
+ * @dev: device associated with the UFS controller
+ * @attr: sysfs attribute handle
+ * @buf: buffer for sysfs file
+ *
+ * Print 1 if PM QoS feature is enabled, 0 if disabled.
+ *
+ * Returns number of characters written to @buf.
+ */
+static ssize_t pm_qos_enable_show(struct device *dev,
+               struct device_attribute *attr, char *buf)
+{
+       struct ufs_hba *hba = dev_get_drvdata(dev);
+
+       return sysfs_emit(buf, "%d\n", hba->pm_qos_enabled);
+}
+
+/**
+ * pm_qos_enable_store - sysfs handler to store value
+ * @dev: device associated with the UFS controller
+ * @attr: sysfs attribute handle
+ * @buf: buffer for sysfs file
+ * @count: stores buffer characters count
+ *
+ * Input 0 to disable PM QoS and 1 value to enable.
+ * Default state: 1
+ *
+ * Return: number of characters written to @buf on success, < 0 upon failure.
+ */
+static ssize_t pm_qos_enable_store(struct device *dev,
+               struct device_attribute *attr, const char *buf, size_t count)
+{
+       struct ufs_hba *hba = dev_get_drvdata(dev);
+       bool value;
+
+       if (kstrtobool(buf, &value))
+               return -EINVAL;
+
+       if (value)
+               ufshcd_pm_qos_init(hba);
+       else
+               ufshcd_pm_qos_exit(hba);
+
+       return count;
+}
+
 static DEVICE_ATTR_RW(rpm_lvl);
 static DEVICE_ATTR_RO(rpm_target_dev_state);
 static DEVICE_ATTR_RO(rpm_target_link_state);
@@ -416,6 +463,7 @@ static DEVICE_ATTR_RW(wb_on);
 static DEVICE_ATTR_RW(enable_wb_buf_flush);
 static DEVICE_ATTR_RW(wb_flush_threshold);
 static DEVICE_ATTR_RW(rtc_update_ms);
+static DEVICE_ATTR_RW(pm_qos_enable);
 
 static struct attribute *ufs_sysfs_ufshcd_attrs[] = {
        &dev_attr_rpm_lvl.attr,
@@ -429,6 +477,7 @@ static struct attribute *ufs_sysfs_ufshcd_attrs[] = {
        &dev_attr_enable_wb_buf_flush.attr,
        &dev_attr_wb_flush_threshold.attr,
        &dev_attr_rtc_update_ms.attr,
+       &dev_attr_pm_qos_enable.attr,
        NULL
 };
 
index eac7fff6992d0a863ec674c4c3c33f50b0d1f51e..e30fd125988d7a8ca521d6fb30e97c671f269732 100644 (file)
@@ -1014,6 +1014,48 @@ static bool ufshcd_is_unipro_pa_params_tuning_req(struct ufs_hba *hba)
        return ufshcd_get_local_unipro_ver(hba) < UFS_UNIPRO_VER_1_6;
 }
 
+/**
+ * ufshcd_pm_qos_init - initialize PM QoS request
+ * @hba: per adapter instance
+ */
+void ufshcd_pm_qos_init(struct ufs_hba *hba)
+{
+
+       if (hba->pm_qos_enabled)
+               return;
+
+       cpu_latency_qos_add_request(&hba->pm_qos_req, PM_QOS_DEFAULT_VALUE);
+
+       if (cpu_latency_qos_request_active(&hba->pm_qos_req))
+               hba->pm_qos_enabled = true;
+}
+
+/**
+ * ufshcd_pm_qos_exit - remove request from PM QoS
+ * @hba: per adapter instance
+ */
+void ufshcd_pm_qos_exit(struct ufs_hba *hba)
+{
+       if (!hba->pm_qos_enabled)
+               return;
+
+       cpu_latency_qos_remove_request(&hba->pm_qos_req);
+       hba->pm_qos_enabled = false;
+}
+
+/**
+ * ufshcd_pm_qos_update - update PM QoS request
+ * @hba: per adapter instance
+ * @on: If True, vote for perf PM QoS mode otherwise power save mode
+ */
+static void ufshcd_pm_qos_update(struct ufs_hba *hba, bool on)
+{
+       if (!hba->pm_qos_enabled)
+               return;
+
+       cpu_latency_qos_update_request(&hba->pm_qos_req, on ? 0 : PM_QOS_DEFAULT_VALUE);
+}
+
 /**
  * ufshcd_set_clk_freq - set UFS controller clock frequencies
  * @hba: per adapter instance
@@ -1160,8 +1202,11 @@ static int ufshcd_scale_clks(struct ufs_hba *hba, unsigned long freq,
                                            hba->devfreq->previous_freq);
                else
                        ufshcd_set_clk_freq(hba, !scale_up);
+               goto out;
        }
 
+       ufshcd_pm_qos_update(hba, scale_up);
+
 out:
        trace_ufshcd_profile_clk_scaling(dev_name(hba->dev),
                        (scale_up ? "up" : "down"),
@@ -5602,7 +5647,6 @@ static void ufshcd_mcq_compl_pending_transfer(struct ufs_hba *hba,
        struct ufshcd_lrb *lrbp;
        struct scsi_cmnd *cmd;
        unsigned long flags;
-       u32 hwq_num, utag;
        int tag;
 
        for (tag = 0; tag < hba->nutrs; tag++) {
@@ -5612,9 +5656,7 @@ static void ufshcd_mcq_compl_pending_transfer(struct ufs_hba *hba,
                    test_bit(SCMD_STATE_COMPLETE, &cmd->state))
                        continue;
 
-               utag = blk_mq_unique_tag(scsi_cmd_to_rq(cmd));
-               hwq_num = blk_mq_unique_tag_to_hwq(utag);
-               hwq = &hba->uhq[hwq_num];
+               hwq = ufshcd_mcq_req_to_hwq(hba, scsi_cmd_to_rq(cmd));
 
                if (force_compl) {
                        ufshcd_mcq_compl_all_cqes_lock(hba, hwq);
@@ -7987,11 +8029,13 @@ out:
 
 static inline void ufshcd_blk_pm_runtime_init(struct scsi_device *sdev)
 {
+       struct Scsi_Host *shost = sdev->host;
+
        scsi_autopm_get_device(sdev);
        blk_pm_runtime_init(sdev->request_queue, &sdev->sdev_gendev);
        if (sdev->rpm_autosuspend)
                pm_runtime_set_autosuspend_delay(&sdev->sdev_gendev,
-                                                RPM_AUTOSUSPEND_DELAY_MS);
+                                                shost->rpm_autosuspend_delay);
        scsi_autopm_put_device(sdev);
 }
 
@@ -8801,9 +8845,7 @@ static void ufshcd_config_mcq(struct ufs_hba *hba)
        hba->host->can_queue = hba->nutrs - UFSHCD_NUM_RESERVED;
        hba->reserved_slot = hba->nutrs - UFSHCD_NUM_RESERVED;
 
-       /* Select MCQ mode */
-       ufshcd_writel(hba, ufshcd_readl(hba, REG_UFS_MEM_CFG) | 0x1,
-                     REG_UFS_MEM_CFG);
+       ufshcd_mcq_enable(hba);
        hba->mcq_enabled = true;
 
        dev_info(hba->dev, "MCQ configured, nr_queues=%d, io_queues=%d, read_queue=%d, poll_queues=%d, queue_depth=%d\n",
@@ -9065,7 +9107,6 @@ static const struct scsi_host_template ufshcd_driver_template = {
        .track_queue_depth      = 1,
        .skip_settle_delay      = 1,
        .sdev_groups            = ufshcd_driver_groups,
-       .rpm_autosuspend_delay  = RPM_AUTOSUSPEND_DELAY_MS,
 };
 
 static int ufshcd_config_vreg_load(struct device *dev, struct ufs_vreg *vreg,
@@ -9280,6 +9321,8 @@ static int ufshcd_setup_clocks(struct ufs_hba *hba, bool on)
        if (ret)
                return ret;
 
+       if (!ufshcd_is_clkscaling_supported(hba))
+               ufshcd_pm_qos_update(hba, on);
 out:
        if (ret) {
                list_for_each_entry(clki, head, list) {
@@ -9457,6 +9500,7 @@ out:
 static void ufshcd_hba_exit(struct ufs_hba *hba)
 {
        if (hba->is_powered) {
+               ufshcd_pm_qos_exit(hba);
                ufshcd_exit_clk_scaling(hba);
                ufshcd_exit_clk_gating(hba);
                if (hba->eh_wq)
@@ -9476,7 +9520,17 @@ static int ufshcd_execute_start_stop(struct scsi_device *sdev,
                                     struct scsi_sense_hdr *sshdr)
 {
        const unsigned char cdb[6] = { START_STOP, 0, 0, 0, pwr_mode << 4, 0 };
+       struct scsi_failure failure_defs[] = {
+               {
+                       .allowed = 2,
+                       .result = SCMD_FAILURE_RESULT_ANY,
+               },
+       };
+       struct scsi_failures failures = {
+               .failure_definitions = failure_defs,
+       };
        const struct scsi_exec_args args = {
+               .failures = &failures,
                .sshdr = sshdr,
                .req_flags = BLK_MQ_REQ_PM,
                .scmd_flags = SCMD_FAIL_IF_RECOVERING,
@@ -9502,7 +9556,7 @@ static int ufshcd_set_dev_pwr_mode(struct ufs_hba *hba,
        struct scsi_sense_hdr sshdr;
        struct scsi_device *sdp;
        unsigned long flags;
-       int ret, retries;
+       int ret;
 
        spin_lock_irqsave(hba->host->host_lock, flags);
        sdp = hba->ufs_device_wlun;
@@ -9528,15 +9582,7 @@ static int ufshcd_set_dev_pwr_mode(struct ufs_hba *hba,
         * callbacks hence set the RQF_PM flag so that it doesn't resume the
         * already suspended childs.
         */
-       for (retries = 3; retries > 0; --retries) {
-               ret = ufshcd_execute_start_stop(sdp, pwr_mode, &sshdr);
-               /*
-                * scsi_execute() only returns a negative value if the request
-                * queue is dying.
-                */
-               if (ret <= 0)
-                       break;
-       }
+       ret = ufshcd_execute_start_stop(sdp, pwr_mode, &sshdr);
        if (ret) {
                sdev_printk(KERN_WARNING, sdp,
                            "START_STOP failed for power mode: %d, result %x\n",
@@ -10109,6 +10155,7 @@ static int ufshcd_suspend(struct ufs_hba *hba)
        ufshcd_vreg_set_lpm(hba);
        /* Put the host controller in low power mode if possible */
        ufshcd_hba_vreg_set_lpm(hba);
+       ufshcd_pm_qos_update(hba, false);
        return ret;
 }
 
@@ -10520,6 +10567,10 @@ int ufshcd_init(struct ufs_hba *hba, void __iomem *mmio_base, unsigned int irq)
        host->max_cmd_len = UFS_CDB_SIZE;
        host->queuecommand_may_block = !!(hba->caps & UFSHCD_CAP_CLK_GATING);
 
+       /* Use default RPM delay if host not set */
+       if (host->rpm_autosuspend_delay == 0)
+               host->rpm_autosuspend_delay = RPM_AUTOSUSPEND_DELAY_MS;
+
        hba->max_pwr_info.is_valid = false;
 
        /* Initialize work queues */
@@ -10655,6 +10706,7 @@ int ufshcd_init(struct ufs_hba *hba, void __iomem *mmio_base, unsigned int irq)
        ufs_sysfs_add_nodes(hba->dev);
 
        device_enable_async_suspend(dev);
+       ufshcd_pm_qos_init(hba);
        return 0;
 
 free_tmf_queue:
index 776bca4f70c8829c1962b3e71639367fadb30dcf..b8a8801322e2d928343d8669f0dd90e7f64b29cd 100644 (file)
@@ -17,7 +17,6 @@
 #include <linux/of_platform.h>
 #include <linux/phy/phy.h>
 #include <linux/platform_device.h>
-#include <linux/pm_qos.h>
 #include <linux/regulator/consumer.h>
 #include <linux/reset.h>
 #include <linux/soc/mediatek/mtk_sip_svc.h>
@@ -626,21 +625,9 @@ static void ufs_mtk_init_host_caps(struct ufs_hba *hba)
        dev_info(hba->dev, "caps: 0x%x", host->caps);
 }
 
-static void ufs_mtk_boost_pm_qos(struct ufs_hba *hba, bool boost)
-{
-       struct ufs_mtk_host *host = ufshcd_get_variant(hba);
-
-       if (!host || !host->pm_qos_init)
-               return;
-
-       cpu_latency_qos_update_request(&host->pm_qos_req,
-                                      boost ? 0 : PM_QOS_DEFAULT_VALUE);
-}
-
 static void ufs_mtk_scale_perf(struct ufs_hba *hba, bool scale_up)
 {
        ufs_mtk_boost_crypt(hba, scale_up);
-       ufs_mtk_boost_pm_qos(hba, scale_up);
 }
 
 static void ufs_mtk_pwr_ctrl(struct ufs_hba *hba, bool on)
@@ -660,6 +647,45 @@ static void ufs_mtk_pwr_ctrl(struct ufs_hba *hba, bool on)
        }
 }
 
+static void ufs_mtk_mcq_disable_irq(struct ufs_hba *hba)
+{
+       struct ufs_mtk_host *host = ufshcd_get_variant(hba);
+       u32 irq, i;
+
+       if (!is_mcq_enabled(hba))
+               return;
+
+       if (host->mcq_nr_intr == 0)
+               return;
+
+       for (i = 0; i < host->mcq_nr_intr; i++) {
+               irq = host->mcq_intr_info[i].irq;
+               disable_irq(irq);
+       }
+       host->is_mcq_intr_enabled = false;
+}
+
+static void ufs_mtk_mcq_enable_irq(struct ufs_hba *hba)
+{
+       struct ufs_mtk_host *host = ufshcd_get_variant(hba);
+       u32 irq, i;
+
+       if (!is_mcq_enabled(hba))
+               return;
+
+       if (host->mcq_nr_intr == 0)
+               return;
+
+       if (host->is_mcq_intr_enabled == true)
+               return;
+
+       for (i = 0; i < host->mcq_nr_intr; i++) {
+               irq = host->mcq_intr_info[i].irq;
+               enable_irq(irq);
+       }
+       host->is_mcq_intr_enabled = true;
+}
+
 /**
  * ufs_mtk_setup_clocks - enables/disable clocks
  * @hba: host controller instance
@@ -703,8 +729,10 @@ static int ufs_mtk_setup_clocks(struct ufs_hba *hba, bool on,
 
                if (clk_pwr_off)
                        ufs_mtk_pwr_ctrl(hba, false);
+               ufs_mtk_mcq_disable_irq(hba);
        } else if (on && status == POST_CHANGE) {
                ufs_mtk_pwr_ctrl(hba, true);
+               ufs_mtk_mcq_enable_irq(hba);
        }
 
        return ret;
@@ -893,6 +921,7 @@ static int ufs_mtk_init(struct ufs_hba *hba)
        const struct of_device_id *id;
        struct device *dev = hba->dev;
        struct ufs_mtk_host *host;
+       struct Scsi_Host *shost = hba->host;
        int err = 0;
 
        host = devm_kzalloc(dev, sizeof(*host), GFP_KERNEL);
@@ -937,6 +966,9 @@ static int ufs_mtk_init(struct ufs_hba *hba)
        /* Enable clk scaling*/
        hba->caps |= UFSHCD_CAP_CLK_SCALING;
 
+       /* Set runtime pm delay to replace default */
+       shost->rpm_autosuspend_delay = MTK_RPM_AUTOSUSPEND_DELAY_MS;
+
        hba->quirks |= UFSHCI_QUIRK_SKIP_MANUAL_WB_FLUSH_CTRL;
        hba->quirks |= UFSHCD_QUIRK_MCQ_BROKEN_INTR;
        hba->quirks |= UFSHCD_QUIRK_MCQ_BROKEN_RTC;
@@ -959,10 +991,6 @@ static int ufs_mtk_init(struct ufs_hba *hba)
 
        host->ip_ver = ufshcd_readl(hba, REG_UFS_MTK_IP_VER);
 
-       /* Initialize pm-qos request */
-       cpu_latency_qos_add_request(&host->pm_qos_req, PM_QOS_DEFAULT_VALUE);
-       host->pm_qos_init = true;
-
        goto out;
 
 out_variant_clear:
@@ -1206,25 +1234,29 @@ static int ufs_mtk_link_set_hpm(struct ufs_hba *hba)
                return err;
 
        err = ufshcd_uic_hibern8_exit(hba);
-       if (!err)
-               ufshcd_set_link_active(hba);
-       else
+       if (err)
                return err;
 
-       if (!hba->mcq_enabled) {
-               err = ufshcd_make_hba_operational(hba);
-       } else {
-               ufs_mtk_config_mcq(hba, false);
-               ufshcd_mcq_make_queues_operational(hba);
-               ufshcd_mcq_config_mac(hba, hba->nutrs);
-               /* Enable MCQ mode */
-               ufshcd_writel(hba, ufshcd_readl(hba, REG_UFS_MEM_CFG) | 0x1,
-                             REG_UFS_MEM_CFG);
+       /* Check link state to make sure exit h8 success */
+       ufs_mtk_wait_idle_state(hba, 5);
+       err = ufs_mtk_wait_link_state(hba, VS_LINK_UP, 100);
+       if (err) {
+               dev_warn(hba->dev, "exit h8 state fail, err=%d\n", err);
+               return err;
        }
+       ufshcd_set_link_active(hba);
 
+       err = ufshcd_make_hba_operational(hba);
        if (err)
                return err;
 
+       if (is_mcq_enabled(hba)) {
+               ufs_mtk_config_mcq(hba, false);
+               ufshcd_mcq_make_queues_operational(hba);
+               ufshcd_mcq_config_mac(hba, hba->nutrs);
+               ufshcd_mcq_enable(hba);
+       }
+
        return 0;
 }
 
index f76e80d91729c80d0837c5608a8041acea1696a8..fb53882f42ca888c16192e9f91374ae57bbd0069 100644 (file)
@@ -7,7 +7,6 @@
 #define _UFS_MEDIATEK_H
 
 #include <linux/bitops.h>
-#include <linux/pm_qos.h>
 #include <linux/soc/mediatek/mtk_sip_svc.h>
 
 /*
@@ -167,7 +166,6 @@ struct ufs_mtk_mcq_intr_info {
 
 struct ufs_mtk_host {
        struct phy *mphy;
-       struct pm_qos_request pm_qos_req;
        struct regulator *reg_va09;
        struct reset_control *hci_reset;
        struct reset_control *unipro_reset;
@@ -178,7 +176,6 @@ struct ufs_mtk_host {
        struct ufs_mtk_hw_ver hw_ver;
        enum ufs_mtk_host_caps caps;
        bool mphy_powered_on;
-       bool pm_qos_init;
        bool unipro_lpm;
        bool ref_clk_enabled;
        u16 ref_clk_ungating_wait_us;
@@ -186,10 +183,14 @@ struct ufs_mtk_host {
        u32 ip_ver;
 
        bool mcq_set_intr;
+       bool is_mcq_intr_enabled;
        int mcq_nr_intr;
        struct ufs_mtk_mcq_intr_info mcq_intr_info[UFSHCD_MAX_Q_NR];
 };
 
+/* MTK delay of autosuspend: 500 ms */
+#define MTK_RPM_AUTOSUSPEND_DELAY_MS 500
+
 /*
  * Multi-VCC by Numbering
  */
index 8fde5204e88b06d1d83a055316064d55126c5de5..8d68bd21ae7332409198b06d7c99d2f7e6faaafe 100644 (file)
@@ -738,8 +738,17 @@ static int ufs_qcom_pwr_change_notify(struct ufs_hba *hba,
                 * the second init can program the optimal PHY settings. This allows one to start
                 * the first init with either the minimum or the maximum support gear.
                 */
-               if (hba->ufshcd_state == UFSHCD_STATE_RESET)
-                       host->phy_gear = dev_req_params->gear_tx;
+               if (hba->ufshcd_state == UFSHCD_STATE_RESET) {
+                       /*
+                        * Skip REINIT if the negotiated gear matches with the
+                        * initial phy_gear. Otherwise, update the phy_gear to
+                        * program the optimal gear setting during REINIT.
+                        */
+                       if (host->phy_gear == dev_req_params->gear_tx)
+                               hba->quirks &= ~UFSHCD_QUIRK_REINIT_AFTER_MAX_GEAR_SWITCH;
+                       else
+                               host->phy_gear = dev_req_params->gear_tx;
+               }
 
                /* enable the device ref clock before changing to HS mode */
                if (!ufshcd_is_hs_mode(&hba->pwr_info) &&
@@ -843,15 +852,20 @@ static void ufs_qcom_set_phy_gear(struct ufs_qcom_host *host)
        struct ufs_host_params *host_params = &host->host_params;
        u32 val, dev_major;
 
+       /*
+        * Default to powering up the PHY to the max gear possible, which is
+        * backwards compatible with lower gears but not optimal from
+        * a power usage point of view. After device negotiation, if the
+        * gear is lower a reinit will be performed to program the PHY
+        * to the ideal gear for this combo of controller and device.
+        */
        host->phy_gear = host_params->hs_tx_gear;
 
        if (host->hw_ver.major < 0x4) {
                /*
-                * For controllers whose major HW version is < 4, power up the
-                * PHY using minimum supported gear (UFS_HS_G2). Switching to
-                * max gear will be performed during reinit if supported.
-                * For newer controllers, whose major HW version is >= 4, power
-                * up the PHY using max supported gear.
+                * These controllers only have one PHY init sequence,
+                * let's power up the PHY using that (the minimum supported
+                * gear, UFS_HS_G2).
                 */
                host->phy_gear = UFS_HS_G2;
        } else if (host->hw_ver.major >= 0x5) {
index d62fbfff20b827936fae17a35321c63ba9183cb1..82b2afa9b7e3134b37107fb3ba31b0c74f4e5683 100644 (file)
@@ -141,13 +141,14 @@ static int vfio_fsl_mc_set_irq_trigger(struct vfio_fsl_mc_device *vdev,
        irq = &vdev->mc_irqs[index];
 
        if (flags & VFIO_IRQ_SET_DATA_NONE) {
-               vfio_fsl_mc_irq_handler(hwirq, irq);
+               if (irq->trigger)
+                       eventfd_signal(irq->trigger);
 
        } else if (flags & VFIO_IRQ_SET_DATA_BOOL) {
                u8 trigger = *(u8 *)data;
 
-               if (trigger)
-                       vfio_fsl_mc_irq_handler(hwirq, irq);
+               if (trigger && irq->trigger)
+                       eventfd_signal(irq->trigger);
        }
 
        return 0;
index 7825d83a55f8c2777f64e6afae987e1bb6b7a7cf..b98322966b3ed41d999dc30b7acc11e07cff0aa9 100644 (file)
@@ -40,7 +40,7 @@ static int mdev_match(struct device *dev, struct device_driver *drv)
        return 0;
 }
 
-struct bus_type mdev_bus_type = {
+const struct bus_type mdev_bus_type = {
        .name           = "mdev",
        .probe          = mdev_probe,
        .remove         = mdev_remove,
index af457b27f6074621bb182c80b7657d2b696e6f61..63a1316b08b72aef8b790a0901a0db5ca3f1cd75 100644 (file)
@@ -13,7 +13,7 @@
 int  mdev_bus_register(void);
 void mdev_bus_unregister(void);
 
-extern struct bus_type mdev_bus_type;
+extern const struct bus_type mdev_bus_type;
 extern const struct attribute_group *mdev_device_groups[];
 
 #define to_mdev_type_attr(_attr)       \
index 18c397df566d8dbf4716a583bf45f33faa6c1f0a..15821a2d77d25a243b2ee6cafd3fbbcbb6eb9c25 100644 (file)
@@ -67,4 +67,6 @@ source "drivers/vfio/pci/pds/Kconfig"
 
 source "drivers/vfio/pci/virtio/Kconfig"
 
+source "drivers/vfio/pci/nvgrace-gpu/Kconfig"
+
 endmenu
index 046139a4eca5b58b733bfc62174ac54ec745307d..ce7a61f1d912bf245eaa4acaeddd2da24bcf5687 100644 (file)
@@ -15,3 +15,5 @@ obj-$(CONFIG_HISI_ACC_VFIO_PCI) += hisilicon/
 obj-$(CONFIG_PDS_VFIO_PCI) += pds/
 
 obj-$(CONFIG_VIRTIO_VFIO_PCI) += virtio/
+
+obj-$(CONFIG_NVGRACE_GPU_VFIO_PCI) += nvgrace-gpu/
index 4d27465c8f1a893352bae1498303a5da7e4ebb6d..9a3e97108ace812543cd87eff0b7bca6f190d9dc 100644 (file)
@@ -630,25 +630,11 @@ static void hisi_acc_vf_disable_fds(struct hisi_acc_vf_core_device *hisi_acc_vde
        }
 }
 
-/*
- * This function is called in all state_mutex unlock cases to
- * handle a 'deferred_reset' if exists.
- */
-static void
-hisi_acc_vf_state_mutex_unlock(struct hisi_acc_vf_core_device *hisi_acc_vdev)
+static void hisi_acc_vf_reset(struct hisi_acc_vf_core_device *hisi_acc_vdev)
 {
-again:
-       spin_lock(&hisi_acc_vdev->reset_lock);
-       if (hisi_acc_vdev->deferred_reset) {
-               hisi_acc_vdev->deferred_reset = false;
-               spin_unlock(&hisi_acc_vdev->reset_lock);
-               hisi_acc_vdev->vf_qm_state = QM_NOT_READY;
-               hisi_acc_vdev->mig_state = VFIO_DEVICE_STATE_RUNNING;
-               hisi_acc_vf_disable_fds(hisi_acc_vdev);
-               goto again;
-       }
-       mutex_unlock(&hisi_acc_vdev->state_mutex);
-       spin_unlock(&hisi_acc_vdev->reset_lock);
+       hisi_acc_vdev->vf_qm_state = QM_NOT_READY;
+       hisi_acc_vdev->mig_state = VFIO_DEVICE_STATE_RUNNING;
+       hisi_acc_vf_disable_fds(hisi_acc_vdev);
 }
 
 static void hisi_acc_vf_start_device(struct hisi_acc_vf_core_device *hisi_acc_vdev)
@@ -804,8 +790,10 @@ static long hisi_acc_vf_precopy_ioctl(struct file *filp,
 
        info.dirty_bytes = 0;
        info.initial_bytes = migf->total_length - *pos;
+       mutex_unlock(&migf->lock);
+       mutex_unlock(&hisi_acc_vdev->state_mutex);
 
-       ret = copy_to_user((void __user *)arg, &info, minsz) ? -EFAULT : 0;
+       return copy_to_user((void __user *)arg, &info, minsz) ? -EFAULT : 0;
 out:
        mutex_unlock(&migf->lock);
        mutex_unlock(&hisi_acc_vdev->state_mutex);
@@ -1071,7 +1059,7 @@ hisi_acc_vfio_pci_set_device_state(struct vfio_device *vdev,
                        break;
                }
        }
-       hisi_acc_vf_state_mutex_unlock(hisi_acc_vdev);
+       mutex_unlock(&hisi_acc_vdev->state_mutex);
        return res;
 }
 
@@ -1092,7 +1080,7 @@ hisi_acc_vfio_pci_get_device_state(struct vfio_device *vdev,
 
        mutex_lock(&hisi_acc_vdev->state_mutex);
        *curr_state = hisi_acc_vdev->mig_state;
-       hisi_acc_vf_state_mutex_unlock(hisi_acc_vdev);
+       mutex_unlock(&hisi_acc_vdev->state_mutex);
        return 0;
 }
 
@@ -1104,21 +1092,9 @@ static void hisi_acc_vf_pci_aer_reset_done(struct pci_dev *pdev)
                                VFIO_MIGRATION_STOP_COPY)
                return;
 
-       /*
-        * As the higher VFIO layers are holding locks across reset and using
-        * those same locks with the mm_lock we need to prevent ABBA deadlock
-        * with the state_mutex and mm_lock.
-        * In case the state_mutex was taken already we defer the cleanup work
-        * to the unlock flow of the other running context.
-        */
-       spin_lock(&hisi_acc_vdev->reset_lock);
-       hisi_acc_vdev->deferred_reset = true;
-       if (!mutex_trylock(&hisi_acc_vdev->state_mutex)) {
-               spin_unlock(&hisi_acc_vdev->reset_lock);
-               return;
-       }
-       spin_unlock(&hisi_acc_vdev->reset_lock);
-       hisi_acc_vf_state_mutex_unlock(hisi_acc_vdev);
+       mutex_lock(&hisi_acc_vdev->state_mutex);
+       hisi_acc_vf_reset(hisi_acc_vdev);
+       mutex_unlock(&hisi_acc_vdev->state_mutex);
 }
 
 static int hisi_acc_vf_qm_init(struct hisi_acc_vf_core_device *hisi_acc_vdev)
index dcabfeec6ca19d4334e746536c5e15a3572972d5..5bab46602fad261ee602e4524245f6551fa2af15 100644 (file)
@@ -98,8 +98,8 @@ struct hisi_acc_vf_migration_file {
 
 struct hisi_acc_vf_core_device {
        struct vfio_pci_core_device core_device;
-       u8 match_done:1;
-       u8 deferred_reset:1;
+       u8 match_done;
+
        /* For migration state */
        struct mutex state_mutex;
        enum vfio_device_mig_state mig_state;
@@ -109,8 +109,6 @@ struct hisi_acc_vf_core_device {
        struct hisi_qm vf_qm;
        u32 vf_qm_state;
        int vf_id;
-       /* For reset handler */
-       spinlock_t reset_lock;
        struct hisi_acc_vf_migration_file *resuming_migf;
        struct hisi_acc_vf_migration_file *saving_migf;
 };
index efd1d252cdc95d6fef8b7fd25c4682205ac80a71..41a4b0cf429756b6e72f476ba794ad6b3baaf65e 100644 (file)
@@ -108,8 +108,9 @@ int mlx5vf_cmd_query_vhca_migration_state(struct mlx5vf_pci_core_device *mvdev,
                ret = wait_for_completion_interruptible(&mvdev->saving_migf->save_comp);
                if (ret)
                        return ret;
-               if (mvdev->saving_migf->state ==
-                   MLX5_MIGF_STATE_PRE_COPY_ERROR) {
+               /* Upon cleanup, ignore previous pre_copy error state */
+               if (mvdev->saving_migf->state == MLX5_MIGF_STATE_PRE_COPY_ERROR &&
+                   !(query_flags & MLX5VF_QUERY_CLEANUP)) {
                        /*
                         * In case we had a PRE_COPY error, only query full
                         * image for final image
@@ -121,6 +122,11 @@ int mlx5vf_cmd_query_vhca_migration_state(struct mlx5vf_pci_core_device *mvdev,
                        }
                        query_flags &= ~MLX5VF_QUERY_INC;
                }
+               /* Block incremental query which is state-dependent */
+               if (mvdev->saving_migf->state == MLX5_MIGF_STATE_ERROR) {
+                       complete(&mvdev->saving_migf->save_comp);
+                       return -ENODEV;
+               }
        }
 
        MLX5_SET(query_vhca_migration_state_in, in, opcode,
@@ -149,6 +155,12 @@ int mlx5vf_cmd_query_vhca_migration_state(struct mlx5vf_pci_core_device *mvdev,
        return 0;
 }
 
+static void set_tracker_change_event(struct mlx5vf_pci_core_device *mvdev)
+{
+       mvdev->tracker.object_changed = true;
+       complete(&mvdev->tracker_comp);
+}
+
 static void set_tracker_error(struct mlx5vf_pci_core_device *mvdev)
 {
        /* Mark the tracker under an error and wake it up if it's running */
@@ -189,7 +201,7 @@ void mlx5vf_cmd_close_migratable(struct mlx5vf_pci_core_device *mvdev)
        /* Must be done outside the lock to let it progress */
        set_tracker_error(mvdev);
        mutex_lock(&mvdev->state_mutex);
-       mlx5vf_disable_fds(mvdev);
+       mlx5vf_disable_fds(mvdev, NULL);
        _mlx5vf_free_page_tracker_resources(mvdev);
        mlx5vf_state_mutex_unlock(mvdev);
 }
@@ -221,6 +233,10 @@ void mlx5vf_cmd_set_migratable(struct mlx5vf_pci_core_device *mvdev,
        if (!MLX5_CAP_GEN(mvdev->mdev, migration))
                goto end;
 
+       if (!(MLX5_CAP_GEN_2(mvdev->mdev, migration_multi_load) &&
+             MLX5_CAP_GEN_2(mvdev->mdev, migration_tracking_state)))
+               goto end;
+
        mvdev->vf_id = pci_iov_vf_id(pdev);
        if (mvdev->vf_id < 0)
                goto end;
@@ -250,17 +266,14 @@ void mlx5vf_cmd_set_migratable(struct mlx5vf_pci_core_device *mvdev,
        mvdev->migrate_cap = 1;
        mvdev->core_device.vdev.migration_flags =
                VFIO_MIGRATION_STOP_COPY |
-               VFIO_MIGRATION_P2P;
+               VFIO_MIGRATION_P2P |
+               VFIO_MIGRATION_PRE_COPY;
+
        mvdev->core_device.vdev.mig_ops = mig_ops;
        init_completion(&mvdev->tracker_comp);
        if (MLX5_CAP_GEN(mvdev->mdev, adv_virtualization))
                mvdev->core_device.vdev.log_ops = log_ops;
 
-       if (MLX5_CAP_GEN_2(mvdev->mdev, migration_multi_load) &&
-           MLX5_CAP_GEN_2(mvdev->mdev, migration_tracking_state))
-               mvdev->core_device.vdev.migration_flags |=
-                       VFIO_MIGRATION_PRE_COPY;
-
        if (MLX5_CAP_GEN_2(mvdev->mdev, migration_in_chunks))
                mvdev->chunk_mode = 1;
 
@@ -402,6 +415,50 @@ void mlx5vf_free_data_buffer(struct mlx5_vhca_data_buffer *buf)
        kfree(buf);
 }
 
+static int mlx5vf_add_migration_pages(struct mlx5_vhca_data_buffer *buf,
+                                     unsigned int npages)
+{
+       unsigned int to_alloc = npages;
+       struct page **page_list;
+       unsigned long filled;
+       unsigned int to_fill;
+       int ret;
+
+       to_fill = min_t(unsigned int, npages, PAGE_SIZE / sizeof(*page_list));
+       page_list = kvzalloc(to_fill * sizeof(*page_list), GFP_KERNEL_ACCOUNT);
+       if (!page_list)
+               return -ENOMEM;
+
+       do {
+               filled = alloc_pages_bulk_array(GFP_KERNEL_ACCOUNT, to_fill,
+                                               page_list);
+               if (!filled) {
+                       ret = -ENOMEM;
+                       goto err;
+               }
+               to_alloc -= filled;
+               ret = sg_alloc_append_table_from_pages(
+                       &buf->table, page_list, filled, 0,
+                       filled << PAGE_SHIFT, UINT_MAX, SG_MAX_SINGLE_ALLOC,
+                       GFP_KERNEL_ACCOUNT);
+
+               if (ret)
+                       goto err;
+               buf->allocated_length += filled * PAGE_SIZE;
+               /* clean input for another bulk allocation */
+               memset(page_list, 0, filled * sizeof(*page_list));
+               to_fill = min_t(unsigned int, to_alloc,
+                               PAGE_SIZE / sizeof(*page_list));
+       } while (to_alloc > 0);
+
+       kvfree(page_list);
+       return 0;
+
+err:
+       kvfree(page_list);
+       return ret;
+}
+
 struct mlx5_vhca_data_buffer *
 mlx5vf_alloc_data_buffer(struct mlx5_vf_migration_file *migf,
                         size_t length,
@@ -608,8 +665,13 @@ static void mlx5vf_save_callback(int status, struct mlx5_async_work *context)
 
 err:
        /* The error flow can't run from an interrupt context */
-       if (status == -EREMOTEIO)
+       if (status == -EREMOTEIO) {
                status = MLX5_GET(save_vhca_state_out, async_data->out, status);
+               /* Failed in FW, print cmd out failure details */
+               mlx5_cmd_out_err(migf->mvdev->mdev, MLX5_CMD_OP_SAVE_VHCA_STATE, 0,
+                                async_data->out);
+       }
+
        async_data->status = status;
        queue_work(migf->mvdev->cb_wq, &async_data->work);
 }
@@ -623,6 +685,7 @@ int mlx5vf_cmd_save_vhca_state(struct mlx5vf_pci_core_device *mvdev,
        u32 in[MLX5_ST_SZ_DW(save_vhca_state_in)] = {};
        struct mlx5_vhca_data_buffer *header_buf = NULL;
        struct mlx5vf_async_data *async_data;
+       bool pre_copy_cleanup = false;
        int err;
 
        lockdep_assert_held(&mvdev->state_mutex);
@@ -633,6 +696,10 @@ int mlx5vf_cmd_save_vhca_state(struct mlx5vf_pci_core_device *mvdev,
        if (err)
                return err;
 
+       if ((migf->state == MLX5_MIGF_STATE_PRE_COPY ||
+            migf->state == MLX5_MIGF_STATE_PRE_COPY_ERROR) && !track && !inc)
+               pre_copy_cleanup = true;
+
        if (migf->state == MLX5_MIGF_STATE_PRE_COPY_ERROR)
                /*
                 * In case we had a PRE_COPY error, SAVE is triggered only for
@@ -651,29 +718,27 @@ int mlx5vf_cmd_save_vhca_state(struct mlx5vf_pci_core_device *mvdev,
 
        async_data = &migf->async_data;
        async_data->buf = buf;
-       async_data->stop_copy_chunk = !track;
+       async_data->stop_copy_chunk = (!track && !pre_copy_cleanup);
        async_data->out = kvzalloc(out_size, GFP_KERNEL);
        if (!async_data->out) {
                err = -ENOMEM;
                goto err_out;
        }
 
-       if (MLX5VF_PRE_COPY_SUPP(mvdev)) {
-               if (async_data->stop_copy_chunk) {
-                       u8 header_idx = buf->stop_copy_chunk_num ?
-                               buf->stop_copy_chunk_num - 1 : 0;
+       if (async_data->stop_copy_chunk) {
+               u8 header_idx = buf->stop_copy_chunk_num ?
+                       buf->stop_copy_chunk_num - 1 : 0;
 
-                       header_buf = migf->buf_header[header_idx];
-                       migf->buf_header[header_idx] = NULL;
-               }
+               header_buf = migf->buf_header[header_idx];
+               migf->buf_header[header_idx] = NULL;
+       }
 
-               if (!header_buf) {
-                       header_buf = mlx5vf_get_data_buffer(migf,
-                               sizeof(struct mlx5_vf_migration_header), DMA_NONE);
-                       if (IS_ERR(header_buf)) {
-                               err = PTR_ERR(header_buf);
-                               goto err_free;
-                       }
+       if (!header_buf) {
+               header_buf = mlx5vf_get_data_buffer(migf,
+                       sizeof(struct mlx5_vf_migration_header), DMA_NONE);
+               if (IS_ERR(header_buf)) {
+                       err = PTR_ERR(header_buf);
+                       goto err_free;
                }
        }
 
@@ -900,6 +965,29 @@ static int mlx5vf_cmd_modify_tracker(struct mlx5_core_dev *mdev,
        return mlx5_cmd_exec(mdev, in, sizeof(in), out, sizeof(out));
 }
 
+static int mlx5vf_cmd_query_tracker(struct mlx5_core_dev *mdev,
+                                   struct mlx5_vhca_page_tracker *tracker)
+{
+       u32 out[MLX5_ST_SZ_DW(query_page_track_obj_out)] = {};
+       u32 in[MLX5_ST_SZ_DW(general_obj_in_cmd_hdr)] = {};
+       void *obj_context;
+       void *cmd_hdr;
+       int err;
+
+       cmd_hdr = MLX5_ADDR_OF(modify_page_track_obj_in, in, general_obj_in_cmd_hdr);
+       MLX5_SET(general_obj_in_cmd_hdr, cmd_hdr, opcode, MLX5_CMD_OP_QUERY_GENERAL_OBJECT);
+       MLX5_SET(general_obj_in_cmd_hdr, cmd_hdr, obj_type, MLX5_OBJ_TYPE_PAGE_TRACK);
+       MLX5_SET(general_obj_in_cmd_hdr, cmd_hdr, obj_id, tracker->id);
+
+       err = mlx5_cmd_exec(mdev, in, sizeof(in), out, sizeof(out));
+       if (err)
+               return err;
+
+       obj_context = MLX5_ADDR_OF(query_page_track_obj_out, out, obj_context);
+       tracker->status = MLX5_GET(page_track, obj_context, state);
+       return 0;
+}
+
 static int alloc_cq_frag_buf(struct mlx5_core_dev *mdev,
                             struct mlx5_vhca_cq_buf *buf, int nent,
                             int cqe_size)
@@ -957,9 +1045,11 @@ static int mlx5vf_event_notifier(struct notifier_block *nb, unsigned long type,
                mlx5_nb_cof(nb, struct mlx5_vhca_page_tracker, nb);
        struct mlx5vf_pci_core_device *mvdev = container_of(
                tracker, struct mlx5vf_pci_core_device, tracker);
+       struct mlx5_eqe_obj_change *object;
        struct mlx5_eqe *eqe = data;
        u8 event_type = (u8)type;
        u8 queue_type;
+       u32 obj_id;
        int qp_num;
 
        switch (event_type) {
@@ -975,6 +1065,12 @@ static int mlx5vf_event_notifier(struct notifier_block *nb, unsigned long type,
                        break;
                set_tracker_error(mvdev);
                break;
+       case MLX5_EVENT_TYPE_OBJECT_CHANGE:
+               object = &eqe->data.obj_change;
+               obj_id = be32_to_cpu(object->obj_id);
+               if (obj_id == tracker->id)
+                       set_tracker_change_event(mvdev);
+               break;
        default:
                break;
        }
@@ -1634,6 +1730,11 @@ int mlx5vf_tracker_read_and_clear(struct vfio_device *vdev, unsigned long iova,
                goto end;
        }
 
+       if (tracker->is_err) {
+               err = -EIO;
+               goto end;
+       }
+
        mdev = mvdev->mdev;
        err = mlx5vf_cmd_modify_tracker(mdev, tracker->id, iova, length,
                                        MLX5_PAGE_TRACK_STATE_REPORTING);
@@ -1652,6 +1753,12 @@ int mlx5vf_tracker_read_and_clear(struct vfio_device *vdev, unsigned long iova,
                                                      dirty, &tracker->status);
                        if (poll_err == CQ_EMPTY) {
                                wait_for_completion(&mvdev->tracker_comp);
+                               if (tracker->object_changed) {
+                                       tracker->object_changed = false;
+                                       err = mlx5vf_cmd_query_tracker(mdev, tracker);
+                                       if (err)
+                                               goto end;
+                               }
                                continue;
                        }
                }
index f2c7227fa683a41735f41adc8830594d209878ba..df421dc6de04852c07cbabbf2e8bb78d3669aaa1 100644 (file)
@@ -13,9 +13,6 @@
 #include <linux/mlx5/cq.h>
 #include <linux/mlx5/qp.h>
 
-#define MLX5VF_PRE_COPY_SUPP(mvdev) \
-       ((mvdev)->core_device.vdev.migration_flags & VFIO_MIGRATION_PRE_COPY)
-
 enum mlx5_vf_migf_state {
        MLX5_MIGF_STATE_ERROR = 1,
        MLX5_MIGF_STATE_PRE_COPY_ERROR,
@@ -25,7 +22,6 @@ enum mlx5_vf_migf_state {
 };
 
 enum mlx5_vf_load_state {
-       MLX5_VF_LOAD_STATE_READ_IMAGE_NO_HEADER,
        MLX5_VF_LOAD_STATE_READ_HEADER,
        MLX5_VF_LOAD_STATE_PREP_HEADER_DATA,
        MLX5_VF_LOAD_STATE_READ_HEADER_DATA,
@@ -162,6 +158,7 @@ struct mlx5_vhca_page_tracker {
        u32 id;
        u32 pdn;
        u8 is_err:1;
+       u8 object_changed:1;
        struct mlx5_uars_page *uar;
        struct mlx5_vhca_cq cq;
        struct mlx5_vhca_qp *host_qp;
@@ -196,6 +193,7 @@ struct mlx5vf_pci_core_device {
 enum {
        MLX5VF_QUERY_INC = (1UL << 0),
        MLX5VF_QUERY_FINAL = (1UL << 1),
+       MLX5VF_QUERY_CLEANUP = (1UL << 2),
 };
 
 int mlx5vf_cmd_suspend_vhca(struct mlx5vf_pci_core_device *mvdev, u16 op_mod);
@@ -226,12 +224,11 @@ struct mlx5_vhca_data_buffer *
 mlx5vf_get_data_buffer(struct mlx5_vf_migration_file *migf,
                       size_t length, enum dma_data_direction dma_dir);
 void mlx5vf_put_data_buffer(struct mlx5_vhca_data_buffer *buf);
-int mlx5vf_add_migration_pages(struct mlx5_vhca_data_buffer *buf,
-                              unsigned int npages);
 struct page *mlx5vf_get_migration_page(struct mlx5_vhca_data_buffer *buf,
                                       unsigned long offset);
 void mlx5vf_state_mutex_unlock(struct mlx5vf_pci_core_device *mvdev);
-void mlx5vf_disable_fds(struct mlx5vf_pci_core_device *mvdev);
+void mlx5vf_disable_fds(struct mlx5vf_pci_core_device *mvdev,
+                       enum mlx5_vf_migf_state *last_save_state);
 void mlx5vf_mig_file_cleanup_cb(struct work_struct *_work);
 void mlx5vf_mig_file_set_save_work(struct mlx5_vf_migration_file *migf,
                                   u8 chunk_num, size_t next_required_umem_size);
index fe09a8c8af95e8dedac6e08a4fba74379d1c4b5d..61d9b0f9146d1b23c38194bc95aac4c533ed2fc3 100644 (file)
@@ -65,50 +65,6 @@ mlx5vf_get_migration_page(struct mlx5_vhca_data_buffer *buf,
        return NULL;
 }
 
-int mlx5vf_add_migration_pages(struct mlx5_vhca_data_buffer *buf,
-                              unsigned int npages)
-{
-       unsigned int to_alloc = npages;
-       struct page **page_list;
-       unsigned long filled;
-       unsigned int to_fill;
-       int ret;
-
-       to_fill = min_t(unsigned int, npages, PAGE_SIZE / sizeof(*page_list));
-       page_list = kvzalloc(to_fill * sizeof(*page_list), GFP_KERNEL_ACCOUNT);
-       if (!page_list)
-               return -ENOMEM;
-
-       do {
-               filled = alloc_pages_bulk_array(GFP_KERNEL_ACCOUNT, to_fill,
-                                               page_list);
-               if (!filled) {
-                       ret = -ENOMEM;
-                       goto err;
-               }
-               to_alloc -= filled;
-               ret = sg_alloc_append_table_from_pages(
-                       &buf->table, page_list, filled, 0,
-                       filled << PAGE_SHIFT, UINT_MAX, SG_MAX_SINGLE_ALLOC,
-                       GFP_KERNEL_ACCOUNT);
-
-               if (ret)
-                       goto err;
-               buf->allocated_length += filled * PAGE_SIZE;
-               /* clean input for another bulk allocation */
-               memset(page_list, 0, filled * sizeof(*page_list));
-               to_fill = min_t(unsigned int, to_alloc,
-                               PAGE_SIZE / sizeof(*page_list));
-       } while (to_alloc > 0);
-
-       kvfree(page_list);
-       return 0;
-
-err:
-       kvfree(page_list);
-       return ret;
-}
-
 static void mlx5vf_disable_fd(struct mlx5_vf_migration_file *migf)
 {
        mutex_lock(&migf->lock);
@@ -777,36 +733,6 @@ mlx5vf_append_page_to_mig_buf(struct mlx5_vhca_data_buffer *vhca_buf,
        return 0;
 }
 
-static int
-mlx5vf_resume_read_image_no_header(struct mlx5_vhca_data_buffer *vhca_buf,
-                                  loff_t requested_length,
-                                  const char __user **buf, size_t *len,
-                                  loff_t *pos, ssize_t *done)
-{
-       int ret;
-
-       if (requested_length > MAX_LOAD_SIZE)
-               return -ENOMEM;
-
-       if (vhca_buf->allocated_length < requested_length) {
-               ret = mlx5vf_add_migration_pages(
-                       vhca_buf,
-                       DIV_ROUND_UP(requested_length - vhca_buf->allocated_length,
-                                    PAGE_SIZE));
-               if (ret)
-                       return ret;
-       }
-
-       while (*len) {
-               ret = mlx5vf_append_page_to_mig_buf(vhca_buf, buf, len, pos,
-                                                   done);
-               if (ret)
-                       return ret;
-       }
-
-       return 0;
-}
-
 static ssize_t
 mlx5vf_resume_read_image(struct mlx5_vf_migration_file *migf,
                         struct mlx5_vhca_data_buffer *vhca_buf,
@@ -1038,13 +964,6 @@ static ssize_t mlx5vf_resume_write(struct file *filp, const char __user *buf,
                        migf->load_state = MLX5_VF_LOAD_STATE_READ_IMAGE;
                        break;
                }
-               case MLX5_VF_LOAD_STATE_READ_IMAGE_NO_HEADER:
-                       ret = mlx5vf_resume_read_image_no_header(vhca_buf,
-                                               requested_length,
-                                               &buf, &len, pos, &done);
-                       if (ret)
-                               goto out_unlock;
-                       break;
                case MLX5_VF_LOAD_STATE_READ_IMAGE:
                        ret = mlx5vf_resume_read_image(migf, vhca_buf,
                                                migf->record_size,
@@ -1114,21 +1033,16 @@ mlx5vf_pci_resume_device_data(struct mlx5vf_pci_core_device *mvdev)
        }
 
        migf->buf[0] = buf;
-       if (MLX5VF_PRE_COPY_SUPP(mvdev)) {
-               buf = mlx5vf_alloc_data_buffer(migf,
-                       sizeof(struct mlx5_vf_migration_header), DMA_NONE);
-               if (IS_ERR(buf)) {
-                       ret = PTR_ERR(buf);
-                       goto out_buf;
-               }
-
-               migf->buf_header[0] = buf;
-               migf->load_state = MLX5_VF_LOAD_STATE_READ_HEADER;
-       } else {
-               /* Initial state will be to read the image */
-               migf->load_state = MLX5_VF_LOAD_STATE_READ_IMAGE_NO_HEADER;
+       buf = mlx5vf_alloc_data_buffer(migf,
+               sizeof(struct mlx5_vf_migration_header), DMA_NONE);
+       if (IS_ERR(buf)) {
+               ret = PTR_ERR(buf);
+               goto out_buf;
        }
 
+       migf->buf_header[0] = buf;
+       migf->load_state = MLX5_VF_LOAD_STATE_READ_HEADER;
+
        stream_open(migf->filp->f_inode, migf->filp);
        mutex_init(&migf->lock);
        INIT_LIST_HEAD(&migf->buf_list);
@@ -1146,7 +1060,8 @@ end:
        return ERR_PTR(ret);
 }
 
-void mlx5vf_disable_fds(struct mlx5vf_pci_core_device *mvdev)
+void mlx5vf_disable_fds(struct mlx5vf_pci_core_device *mvdev,
+                       enum mlx5_vf_migf_state *last_save_state)
 {
        if (mvdev->resuming_migf) {
                mlx5vf_disable_fd(mvdev->resuming_migf);
@@ -1157,6 +1072,8 @@ void mlx5vf_disable_fds(struct mlx5vf_pci_core_device *mvdev)
        if (mvdev->saving_migf) {
                mlx5_cmd_cleanup_async_ctx(&mvdev->saving_migf->async_ctx);
                cancel_work_sync(&mvdev->saving_migf->async_data.work);
+               if (last_save_state)
+                       *last_save_state = mvdev->saving_migf->state;
                mlx5vf_disable_fd(mvdev->saving_migf);
                wake_up_interruptible(&mvdev->saving_migf->poll_wait);
                mlx5fv_cmd_clean_migf_resources(mvdev->saving_migf);
@@ -1217,12 +1134,34 @@ mlx5vf_pci_step_device_state_locked(struct mlx5vf_pci_core_device *mvdev,
                return migf->filp;
        }
 
-       if ((cur == VFIO_DEVICE_STATE_STOP_COPY && new == VFIO_DEVICE_STATE_STOP) ||
-           (cur == VFIO_DEVICE_STATE_PRE_COPY && new == VFIO_DEVICE_STATE_RUNNING) ||
+       if (cur == VFIO_DEVICE_STATE_STOP_COPY && new == VFIO_DEVICE_STATE_STOP) {
+               mlx5vf_disable_fds(mvdev, NULL);
+               return NULL;
+       }
+
+       if ((cur == VFIO_DEVICE_STATE_PRE_COPY && new == VFIO_DEVICE_STATE_RUNNING) ||
            (cur == VFIO_DEVICE_STATE_PRE_COPY_P2P &&
             new == VFIO_DEVICE_STATE_RUNNING_P2P)) {
-               mlx5vf_disable_fds(mvdev);
-               return NULL;
+               struct mlx5_vf_migration_file *migf = mvdev->saving_migf;
+               struct mlx5_vhca_data_buffer *buf;
+               enum mlx5_vf_migf_state state;
+               size_t size;
+
+               ret = mlx5vf_cmd_query_vhca_migration_state(mvdev, &size, NULL,
+                                       MLX5VF_QUERY_INC | MLX5VF_QUERY_CLEANUP);
+               if (ret)
+                       return ERR_PTR(ret);
+               buf = mlx5vf_get_data_buffer(migf, size, DMA_FROM_DEVICE);
+               if (IS_ERR(buf))
+                       return ERR_CAST(buf);
+               /* pre_copy cleanup */
+               ret = mlx5vf_cmd_save_vhca_state(mvdev, migf, buf, false, false);
+               if (ret) {
+                       mlx5vf_put_data_buffer(buf);
+                       return ERR_PTR(ret);
+               }
+               mlx5vf_disable_fds(mvdev, &state);
+               return (state != MLX5_MIGF_STATE_ERROR) ? NULL : ERR_PTR(-EIO);
        }
 
        if (cur == VFIO_DEVICE_STATE_STOP && new == VFIO_DEVICE_STATE_RESUMING) {
@@ -1237,14 +1176,7 @@ mlx5vf_pci_step_device_state_locked(struct mlx5vf_pci_core_device *mvdev,
        }
 
        if (cur == VFIO_DEVICE_STATE_RESUMING && new == VFIO_DEVICE_STATE_STOP) {
-               if (!MLX5VF_PRE_COPY_SUPP(mvdev)) {
-                       ret = mlx5vf_cmd_load_vhca_state(mvdev,
-                                                        mvdev->resuming_migf,
-                                                        mvdev->resuming_migf->buf[0]);
-                       if (ret)
-                               return ERR_PTR(ret);
-               }
-               mlx5vf_disable_fds(mvdev);
+               mlx5vf_disable_fds(mvdev, NULL);
                return NULL;
        }
 
@@ -1289,7 +1221,7 @@ again:
                mvdev->deferred_reset = false;
                spin_unlock(&mvdev->reset_lock);
                mvdev->mig_state = VFIO_DEVICE_STATE_RUNNING;
-               mlx5vf_disable_fds(mvdev);
+               mlx5vf_disable_fds(mvdev, NULL);
                goto again;
        }
        mutex_unlock(&mvdev->state_mutex);
diff --git a/drivers/vfio/pci/nvgrace-gpu/Kconfig b/drivers/vfio/pci/nvgrace-gpu/Kconfig
new file mode 100644 (file)
index 0000000..a7f624b
--- /dev/null
@@ -0,0 +1,10 @@
+# SPDX-License-Identifier: GPL-2.0-only
+config NVGRACE_GPU_VFIO_PCI
+       tristate "VFIO support for the GPU in the NVIDIA Grace Hopper Superchip"
+       depends on ARM64 || (COMPILE_TEST && 64BIT)
+       select VFIO_PCI_CORE
+       help
+         VFIO support for the GPU in the NVIDIA Grace Hopper Superchip is
+         required to assign the GPU device to userspace using KVM/qemu/etc.
+
+         If you don't know what to do here, say N.
diff --git a/drivers/vfio/pci/nvgrace-gpu/Makefile b/drivers/vfio/pci/nvgrace-gpu/Makefile
new file mode 100644 (file)
index 0000000..3ca8c18
--- /dev/null
@@ -0,0 +1,3 @@
+# SPDX-License-Identifier: GPL-2.0-only
+obj-$(CONFIG_NVGRACE_GPU_VFIO_PCI) += nvgrace-gpu-vfio-pci.o
+nvgrace-gpu-vfio-pci-y := main.o
diff --git a/drivers/vfio/pci/nvgrace-gpu/main.c b/drivers/vfio/pci/nvgrace-gpu/main.c
new file mode 100644 (file)
index 0000000..a7fd018
--- /dev/null
@@ -0,0 +1,888 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * Copyright (c) 2024, NVIDIA CORPORATION & AFFILIATES. All rights reserved
+ */
+
+#include <linux/sizes.h>
+#include <linux/vfio_pci_core.h>
+
+/*
+ * The device memory usable to the workloads running in the VM is cached
+ * and showcased as a 64b device BAR (comprising of BAR4 and BAR5 region)
+ * to the VM and is represented as usemem.
+ * Moreover, the VM GPU device driver needs a non-cacheable region to
+ * support the MIG feature. This region is also exposed as a 64b BAR
+ * (comprising of BAR2 and BAR3 region) and represented as resmem.
+ */
+#define RESMEM_REGION_INDEX VFIO_PCI_BAR2_REGION_INDEX
+#define USEMEM_REGION_INDEX VFIO_PCI_BAR4_REGION_INDEX
+
+/* Memory size expected as non cached and reserved by the VM driver */
+#define RESMEM_SIZE SZ_1G
+
+/* A hardwired and constant ABI value between the GPU FW and VFIO driver. */
+#define MEMBLK_SIZE SZ_512M
+
+/*
+ * The state of the two device memory region - resmem and usemem - is
+ * saved as struct mem_region.
+ */
+struct mem_region {
+       phys_addr_t memphys;    /* Base physical address of the region */
+       size_t memlength;       /* Region size */
+       size_t bar_size;        /* Reported region BAR size */
+       __le64 bar_val;         /* Emulated BAR offset registers */
+       union {
+               void *memaddr;
+               void __iomem *ioaddr;
+       };                      /* Base virtual address of the region */
+};
+
+struct nvgrace_gpu_pci_core_device {
+       struct vfio_pci_core_device core_device;
+       /* Cached and usable memory for the VM. */
+       struct mem_region usemem;
+       /* Non cached memory carved out from the end of device memory */
+       struct mem_region resmem;
+       /* Lock to control device memory kernel mapping */
+       struct mutex remap_lock;
+};
+
+static void nvgrace_gpu_init_fake_bar_emu_regs(struct vfio_device *core_vdev)
+{
+       struct nvgrace_gpu_pci_core_device *nvdev =
+               container_of(core_vdev, struct nvgrace_gpu_pci_core_device,
+                            core_device.vdev);
+
+       nvdev->resmem.bar_val = 0;
+       nvdev->usemem.bar_val = 0;
+}
+
+/* Choose the structure corresponding to the fake BAR with a given index. */
+static struct mem_region *
+nvgrace_gpu_memregion(int index,
+                     struct nvgrace_gpu_pci_core_device *nvdev)
+{
+       if (index == USEMEM_REGION_INDEX)
+               return &nvdev->usemem;
+
+       if (index == RESMEM_REGION_INDEX)
+               return &nvdev->resmem;
+
+       return NULL;
+}
+
+static int nvgrace_gpu_open_device(struct vfio_device *core_vdev)
+{
+       struct vfio_pci_core_device *vdev =
+               container_of(core_vdev, struct vfio_pci_core_device, vdev);
+       struct nvgrace_gpu_pci_core_device *nvdev =
+               container_of(core_vdev, struct nvgrace_gpu_pci_core_device,
+                            core_device.vdev);
+       int ret;
+
+       ret = vfio_pci_core_enable(vdev);
+       if (ret)
+               return ret;
+
+       if (nvdev->usemem.memlength) {
+               nvgrace_gpu_init_fake_bar_emu_regs(core_vdev);
+               mutex_init(&nvdev->remap_lock);
+       }
+
+       vfio_pci_core_finish_enable(vdev);
+
+       return 0;
+}
+
+static void nvgrace_gpu_close_device(struct vfio_device *core_vdev)
+{
+       struct nvgrace_gpu_pci_core_device *nvdev =
+               container_of(core_vdev, struct nvgrace_gpu_pci_core_device,
+                            core_device.vdev);
+
+       /* Unmap the mapping to the device memory cached region */
+       if (nvdev->usemem.memaddr) {
+               memunmap(nvdev->usemem.memaddr);
+               nvdev->usemem.memaddr = NULL;
+       }
+
+       /* Unmap the mapping to the device memory non-cached region */
+       if (nvdev->resmem.ioaddr) {
+               iounmap(nvdev->resmem.ioaddr);
+               nvdev->resmem.ioaddr = NULL;
+       }
+
+       mutex_destroy(&nvdev->remap_lock);
+
+       vfio_pci_core_close_device(core_vdev);
+}
+
+static int nvgrace_gpu_mmap(struct vfio_device *core_vdev,
+                           struct vm_area_struct *vma)
+{
+       struct nvgrace_gpu_pci_core_device *nvdev =
+               container_of(core_vdev, struct nvgrace_gpu_pci_core_device,
+                            core_device.vdev);
+       struct mem_region *memregion;
+       unsigned long start_pfn;
+       u64 req_len, pgoff, end;
+       unsigned int index;
+       int ret = 0;
+
+       index = vma->vm_pgoff >> (VFIO_PCI_OFFSET_SHIFT - PAGE_SHIFT);
+
+       memregion = nvgrace_gpu_memregion(index, nvdev);
+       if (!memregion)
+               return vfio_pci_core_mmap(core_vdev, vma);
+
+       /*
+        * Request to mmap the BAR. Map to the CPU accessible memory on the
+        * GPU using the memory information gathered from the system ACPI
+        * tables.
+        */
+       pgoff = vma->vm_pgoff &
+               ((1U << (VFIO_PCI_OFFSET_SHIFT - PAGE_SHIFT)) - 1);
+
+       if (check_sub_overflow(vma->vm_end, vma->vm_start, &req_len) ||
+           check_add_overflow(PHYS_PFN(memregion->memphys), pgoff, &start_pfn) ||
+           check_add_overflow(PFN_PHYS(pgoff), req_len, &end))
+               return -EOVERFLOW;
+
+       /*
+        * Check that the mapping request does not go beyond available device
+        * memory size
+        */
+       if (end > memregion->memlength)
+               return -EINVAL;
+
+       /*
+        * The carved out region of the device memory needs the NORMAL_NC
+        * property. Communicate as such to the hypervisor.
+        */
+       if (index == RESMEM_REGION_INDEX) {
+               /*
+                * The nvgrace-gpu module has no issues with uncontained
+                * failures on NORMAL_NC accesses. VM_ALLOW_ANY_UNCACHED is
+                * set to communicate to the KVM to S2 map as NORMAL_NC.
+                * This opens up guest usage of NORMAL_NC for this mapping.
+                */
+               vm_flags_set(vma, VM_ALLOW_ANY_UNCACHED);
+
+               vma->vm_page_prot = pgprot_writecombine(vma->vm_page_prot);
+       }
+
+       /*
+        * Perform a PFN map to the memory and back the device BAR by the
+        * GPU memory.
+        *
+        * The available GPU memory size may not be power-of-2 aligned. The
+        * remainder is only backed by vfio_device_ops read/write handlers.
+        *
+        * During device reset, the GPU is safely disconnected to the CPU
+        * and access to the BAR will be immediately returned preventing
+        * machine check.
+        */
+       ret = remap_pfn_range(vma, vma->vm_start, start_pfn,
+                             req_len, vma->vm_page_prot);
+       if (ret)
+               return ret;
+
+       vma->vm_pgoff = start_pfn;
+
+       return 0;
+}
+
+static long
+nvgrace_gpu_ioctl_get_region_info(struct vfio_device *core_vdev,
+                                 unsigned long arg)
+{
+       struct nvgrace_gpu_pci_core_device *nvdev =
+               container_of(core_vdev, struct nvgrace_gpu_pci_core_device,
+                            core_device.vdev);
+       unsigned long minsz = offsetofend(struct vfio_region_info, offset);
+       struct vfio_info_cap caps = { .buf = NULL, .size = 0 };
+       struct vfio_region_info_cap_sparse_mmap *sparse;
+       struct vfio_region_info info;
+       struct mem_region *memregion;
+       u32 size;
+       int ret;
+
+       if (copy_from_user(&info, (void __user *)arg, minsz))
+               return -EFAULT;
+
+       if (info.argsz < minsz)
+               return -EINVAL;
+
+       /*
+        * Request to determine the BAR region information. Send the
+        * GPU memory information.
+        */
+       memregion = nvgrace_gpu_memregion(info.index, nvdev);
+       if (!memregion)
+               return vfio_pci_core_ioctl(core_vdev,
+                                          VFIO_DEVICE_GET_REGION_INFO, arg);
+
+       size = struct_size(sparse, areas, 1);
+
+       /*
+        * Setup for sparse mapping for the device memory. Only the
+        * available device memory on the hardware is shown as a
+        * mappable region.
+        */
+       sparse = kzalloc(size, GFP_KERNEL);
+       if (!sparse)
+               return -ENOMEM;
+
+       sparse->nr_areas = 1;
+       sparse->areas[0].offset = 0;
+       sparse->areas[0].size = memregion->memlength;
+       sparse->header.id = VFIO_REGION_INFO_CAP_SPARSE_MMAP;
+       sparse->header.version = 1;
+
+       ret = vfio_info_add_capability(&caps, &sparse->header, size);
+       kfree(sparse);
+       if (ret)
+               return ret;
+
+       info.offset = VFIO_PCI_INDEX_TO_OFFSET(info.index);
+       /*
+        * The region memory size may not be power-of-2 aligned.
+        * Given that the memory  as a BAR and may not be
+        * aligned, roundup to the next power-of-2.
+        */
+       info.size = memregion->bar_size;
+       info.flags = VFIO_REGION_INFO_FLAG_READ |
+                    VFIO_REGION_INFO_FLAG_WRITE |
+                    VFIO_REGION_INFO_FLAG_MMAP;
+
+       if (caps.size) {
+               info.flags |= VFIO_REGION_INFO_FLAG_CAPS;
+               if (info.argsz < sizeof(info) + caps.size) {
+                       info.argsz = sizeof(info) + caps.size;
+                       info.cap_offset = 0;
+               } else {
+                       vfio_info_cap_shift(&caps, sizeof(info));
+                       if (copy_to_user((void __user *)arg +
+                                        sizeof(info), caps.buf,
+                                        caps.size)) {
+                               kfree(caps.buf);
+                               return -EFAULT;
+                       }
+                       info.cap_offset = sizeof(info);
+               }
+               kfree(caps.buf);
+       }
+       return copy_to_user((void __user *)arg, &info, minsz) ?
+                           -EFAULT : 0;
+}
+
+static long nvgrace_gpu_ioctl(struct vfio_device *core_vdev,
+                             unsigned int cmd, unsigned long arg)
+{
+       switch (cmd) {
+       case VFIO_DEVICE_GET_REGION_INFO:
+               return nvgrace_gpu_ioctl_get_region_info(core_vdev, arg);
+       case VFIO_DEVICE_IOEVENTFD:
+               return -ENOTTY;
+       case VFIO_DEVICE_RESET:
+               nvgrace_gpu_init_fake_bar_emu_regs(core_vdev);
+               fallthrough;
+       default:
+               return vfio_pci_core_ioctl(core_vdev, cmd, arg);
+       }
+}
+
+static __le64
+nvgrace_gpu_get_read_value(size_t bar_size, u64 flags, __le64 val64)
+{
+       u64 tmp_val;
+
+       tmp_val = le64_to_cpu(val64);
+       tmp_val &= ~(bar_size - 1);
+       tmp_val |= flags;
+
+       return cpu_to_le64(tmp_val);
+}
+
+/*
+ * Both the usable (usemem) and the reserved (resmem) device memory region
+ * are exposed as a 64b fake device BARs in the VM. These fake BARs must
+ * respond to the accesses on their respective PCI config space offsets.
+ *
+ * resmem BAR owns PCI_BASE_ADDRESS_2 & PCI_BASE_ADDRESS_3.
+ * usemem BAR owns PCI_BASE_ADDRESS_4 & PCI_BASE_ADDRESS_5.
+ */
+static ssize_t
+nvgrace_gpu_read_config_emu(struct vfio_device *core_vdev,
+                           char __user *buf, size_t count, loff_t *ppos)
+{
+       struct nvgrace_gpu_pci_core_device *nvdev =
+               container_of(core_vdev, struct nvgrace_gpu_pci_core_device,
+                            core_device.vdev);
+       u64 pos = *ppos & VFIO_PCI_OFFSET_MASK;
+       struct mem_region *memregion = NULL;
+       __le64 val64;
+       size_t register_offset;
+       loff_t copy_offset;
+       size_t copy_count;
+       int ret;
+
+       ret = vfio_pci_core_read(core_vdev, buf, count, ppos);
+       if (ret < 0)
+               return ret;
+
+       if (vfio_pci_core_range_intersect_range(pos, count, PCI_BASE_ADDRESS_2,
+                                               sizeof(val64),
+                                               &copy_offset, &copy_count,
+                                               &register_offset))
+               memregion = nvgrace_gpu_memregion(RESMEM_REGION_INDEX, nvdev);
+       else if (vfio_pci_core_range_intersect_range(pos, count,
+                                                    PCI_BASE_ADDRESS_4,
+                                                    sizeof(val64),
+                                                    &copy_offset, &copy_count,
+                                                    &register_offset))
+               memregion = nvgrace_gpu_memregion(USEMEM_REGION_INDEX, nvdev);
+
+       if (memregion) {
+               val64 = nvgrace_gpu_get_read_value(memregion->bar_size,
+                                                  PCI_BASE_ADDRESS_MEM_TYPE_64 |
+                                                  PCI_BASE_ADDRESS_MEM_PREFETCH,
+                                                  memregion->bar_val);
+               if (copy_to_user(buf + copy_offset,
+                                (void *)&val64 + register_offset, copy_count)) {
+                       /*
+                        * The position has been incremented in
+                        * vfio_pci_core_read. Reset the offset back to the
+                        * starting position.
+                        */
+                       *ppos -= count;
+                       return -EFAULT;
+               }
+       }
+
+       return count;
+}
+
+static ssize_t
+nvgrace_gpu_write_config_emu(struct vfio_device *core_vdev,
+                            const char __user *buf, size_t count, loff_t *ppos)
+{
+       struct nvgrace_gpu_pci_core_device *nvdev =
+               container_of(core_vdev, struct nvgrace_gpu_pci_core_device,
+                            core_device.vdev);
+       u64 pos = *ppos & VFIO_PCI_OFFSET_MASK;
+       struct mem_region *memregion = NULL;
+       size_t register_offset;
+       loff_t copy_offset;
+       size_t copy_count;
+
+       if (vfio_pci_core_range_intersect_range(pos, count, PCI_BASE_ADDRESS_2,
+                                               sizeof(u64), &copy_offset,
+                                               &copy_count, &register_offset))
+               memregion = nvgrace_gpu_memregion(RESMEM_REGION_INDEX, nvdev);
+       else if (vfio_pci_core_range_intersect_range(pos, count, PCI_BASE_ADDRESS_4,
+                                                    sizeof(u64), &copy_offset,
+                                                    &copy_count, &register_offset))
+               memregion = nvgrace_gpu_memregion(USEMEM_REGION_INDEX, nvdev);
+
+       if (memregion) {
+               if (copy_from_user((void *)&memregion->bar_val + register_offset,
+                                  buf + copy_offset, copy_count))
+                       return -EFAULT;
+               *ppos += copy_count;
+               return copy_count;
+       }
+
+       return vfio_pci_core_write(core_vdev, buf, count, ppos);
+}
+
+/*
+ * Ad hoc map the device memory in the module kernel VA space. Primarily needed
+ * as vfio does not require the userspace driver to only perform accesses through
+ * mmaps of the vfio-pci BAR regions and such accesses should be supported using
+ * vfio_device_ops read/write implementations.
+ *
+ * The usemem region is cacheable memory and hence is memremaped.
+ * The resmem region is non-cached and is mapped using ioremap_wc (NORMAL_NC).
+ */
+static int
+nvgrace_gpu_map_device_mem(int index,
+                          struct nvgrace_gpu_pci_core_device *nvdev)
+{
+       struct mem_region *memregion;
+       int ret = 0;
+
+       memregion = nvgrace_gpu_memregion(index, nvdev);
+       if (!memregion)
+               return -EINVAL;
+
+       mutex_lock(&nvdev->remap_lock);
+
+       if (memregion->memaddr)
+               goto unlock;
+
+       if (index == USEMEM_REGION_INDEX)
+               memregion->memaddr = memremap(memregion->memphys,
+                                             memregion->memlength,
+                                             MEMREMAP_WB);
+       else
+               memregion->ioaddr = ioremap_wc(memregion->memphys,
+                                              memregion->memlength);
+
+       if (!memregion->memaddr)
+               ret = -ENOMEM;
+
+unlock:
+       mutex_unlock(&nvdev->remap_lock);
+
+       return ret;
+}
+
+/*
+ * Read the data from the device memory (mapped either through ioremap
+ * or memremap) into the user buffer.
+ */
+static int
+nvgrace_gpu_map_and_read(struct nvgrace_gpu_pci_core_device *nvdev,
+                        char __user *buf, size_t mem_count, loff_t *ppos)
+{
+       unsigned int index = VFIO_PCI_OFFSET_TO_INDEX(*ppos);
+       u64 offset = *ppos & VFIO_PCI_OFFSET_MASK;
+       int ret;
+
+       if (!mem_count)
+               return 0;
+
+       /*
+        * Handle read on the BAR regions. Map to the target device memory
+        * physical address and copy to the request read buffer.
+        */
+       ret = nvgrace_gpu_map_device_mem(index, nvdev);
+       if (ret)
+               return ret;
+
+       if (index == USEMEM_REGION_INDEX) {
+               if (copy_to_user(buf,
+                                (u8 *)nvdev->usemem.memaddr + offset,
+                                mem_count))
+                       ret = -EFAULT;
+       } else {
+               /*
+                * The hardware ensures that the system does not crash when
+                * the device memory is accessed with the memory enable
+                * turned off. It synthesizes ~0 on such read. So there is
+                * no need to check or support the disablement/enablement of
+                * BAR through PCI_COMMAND config space register. Pass
+                * test_mem flag as false.
+                */
+               ret = vfio_pci_core_do_io_rw(&nvdev->core_device, false,
+                                            nvdev->resmem.ioaddr,
+                                            buf, offset, mem_count,
+                                            0, 0, false);
+       }
+
+       return ret;
+}
+
+/*
+ * Read count bytes from the device memory at an offset. The actual device
+ * memory size (available) may not be a power-of-2. So the driver fakes
+ * the size to a power-of-2 (reported) when exposing to a user space driver.
+ *
+ * Reads starting beyond the reported size generate -EINVAL; reads extending
+ * beyond the actual device size is filled with ~0; reads extending beyond
+ * the reported size are truncated.
+ */
+static ssize_t
+nvgrace_gpu_read_mem(struct nvgrace_gpu_pci_core_device *nvdev,
+                    char __user *buf, size_t count, loff_t *ppos)
+{
+       u64 offset = *ppos & VFIO_PCI_OFFSET_MASK;
+       unsigned int index = VFIO_PCI_OFFSET_TO_INDEX(*ppos);
+       struct mem_region *memregion;
+       size_t mem_count, i;
+       u8 val = 0xFF;
+       int ret;
+
+       /* No need to do NULL check as caller does. */
+       memregion = nvgrace_gpu_memregion(index, nvdev);
+
+       if (offset >= memregion->bar_size)
+               return -EINVAL;
+
+       /* Clip short the read request beyond reported BAR size */
+       count = min(count, memregion->bar_size - (size_t)offset);
+
+       /*
+        * Determine how many bytes to be actually read from the device memory.
+        * Read request beyond the actual device memory size is filled with ~0,
+        * while those beyond the actual reported size is skipped.
+        */
+       if (offset >= memregion->memlength)
+               mem_count = 0;
+       else
+               mem_count = min(count, memregion->memlength - (size_t)offset);
+
+       ret = nvgrace_gpu_map_and_read(nvdev, buf, mem_count, ppos);
+       if (ret)
+               return ret;
+
+       /*
+        * Only the device memory present on the hardware is mapped, which may
+        * not be power-of-2 aligned. A read to an offset beyond the device memory
+        * size is filled with ~0.
+        */
+       for (i = mem_count; i < count; i++) {
+               ret = put_user(val, (unsigned char __user *)(buf + i));
+               if (ret)
+                       return ret;
+       }
+
+       *ppos += count;
+       return count;
+}
+
+static ssize_t
+nvgrace_gpu_read(struct vfio_device *core_vdev,
+                char __user *buf, size_t count, loff_t *ppos)
+{
+       unsigned int index = VFIO_PCI_OFFSET_TO_INDEX(*ppos);
+       struct nvgrace_gpu_pci_core_device *nvdev =
+               container_of(core_vdev, struct nvgrace_gpu_pci_core_device,
+                            core_device.vdev);
+
+       if (nvgrace_gpu_memregion(index, nvdev))
+               return nvgrace_gpu_read_mem(nvdev, buf, count, ppos);
+
+       if (index == VFIO_PCI_CONFIG_REGION_INDEX)
+               return nvgrace_gpu_read_config_emu(core_vdev, buf, count, ppos);
+
+       return vfio_pci_core_read(core_vdev, buf, count, ppos);
+}
+
+/*
+ * Write the data to the device memory (mapped either through ioremap
+ * or memremap) from the user buffer.
+ */
+static int
+nvgrace_gpu_map_and_write(struct nvgrace_gpu_pci_core_device *nvdev,
+                         const char __user *buf, size_t mem_count,
+                         loff_t *ppos)
+{
+       unsigned int index = VFIO_PCI_OFFSET_TO_INDEX(*ppos);
+       loff_t pos = *ppos & VFIO_PCI_OFFSET_MASK;
+       int ret;
+
+       if (!mem_count)
+               return 0;
+
+       ret = nvgrace_gpu_map_device_mem(index, nvdev);
+       if (ret)
+               return ret;
+
+       if (index == USEMEM_REGION_INDEX) {
+               if (copy_from_user((u8 *)nvdev->usemem.memaddr + pos,
+                                  buf, mem_count))
+                       return -EFAULT;
+       } else {
+               /*
+                * The hardware ensures that the system does not crash when
+                * the device memory is accessed with the memory enable
+                * turned off. It drops such writes. So there is no need to
+                * check or support the disablement/enablement of BAR
+                * through PCI_COMMAND config space register. Pass test_mem
+                * flag as false.
+                */
+               ret = vfio_pci_core_do_io_rw(&nvdev->core_device, false,
+                                            nvdev->resmem.ioaddr,
+                                            (char __user *)buf, pos, mem_count,
+                                            0, 0, true);
+       }
+
+       return ret;
+}
+
+/*
+ * Write count bytes to the device memory at a given offset. The actual device
+ * memory size (available) may not be a power-of-2. So the driver fakes the
+ * size to a power-of-2 (reported) when exposing to a user space driver.
+ *
+ * Writes extending beyond the reported size are truncated; writes starting
+ * beyond the reported size generate -EINVAL.
+ */
+static ssize_t
+nvgrace_gpu_write_mem(struct nvgrace_gpu_pci_core_device *nvdev,
+                     size_t count, loff_t *ppos, const char __user *buf)
+{
+       unsigned int index = VFIO_PCI_OFFSET_TO_INDEX(*ppos);
+       u64 offset = *ppos & VFIO_PCI_OFFSET_MASK;
+       struct mem_region *memregion;
+       size_t mem_count;
+       int ret = 0;
+
+       /* No need to do NULL check as caller does. */
+       memregion = nvgrace_gpu_memregion(index, nvdev);
+
+       if (offset >= memregion->bar_size)
+               return -EINVAL;
+
+       /* Clip short the write request beyond reported BAR size */
+       count = min(count, memregion->bar_size - (size_t)offset);
+
+       /*
+        * Determine how many bytes to be actually written to the device memory.
+        * Do not write to the offset beyond available size.
+        */
+       if (offset >= memregion->memlength)
+               goto exitfn;
+
+       /*
+        * Only the device memory present on the hardware is mapped, which may
+        * not be power-of-2 aligned. Drop access outside the available device
+        * memory on the hardware.
+        */
+       mem_count = min(count, memregion->memlength - (size_t)offset);
+
+       ret = nvgrace_gpu_map_and_write(nvdev, buf, mem_count, ppos);
+       if (ret)
+               return ret;
+
+exitfn:
+       *ppos += count;
+       return count;
+}
+
+static ssize_t
+nvgrace_gpu_write(struct vfio_device *core_vdev,
+                 const char __user *buf, size_t count, loff_t *ppos)
+{
+       struct nvgrace_gpu_pci_core_device *nvdev =
+               container_of(core_vdev, struct nvgrace_gpu_pci_core_device,
+                            core_device.vdev);
+       unsigned int index = VFIO_PCI_OFFSET_TO_INDEX(*ppos);
+
+       if (nvgrace_gpu_memregion(index, nvdev))
+               return nvgrace_gpu_write_mem(nvdev, count, ppos, buf);
+
+       if (index == VFIO_PCI_CONFIG_REGION_INDEX)
+               return nvgrace_gpu_write_config_emu(core_vdev, buf, count, ppos);
+
+       return vfio_pci_core_write(core_vdev, buf, count, ppos);
+}
+
+static const struct vfio_device_ops nvgrace_gpu_pci_ops = {
+       .name           = "nvgrace-gpu-vfio-pci",
+       .init           = vfio_pci_core_init_dev,
+       .release        = vfio_pci_core_release_dev,
+       .open_device    = nvgrace_gpu_open_device,
+       .close_device   = nvgrace_gpu_close_device,
+       .ioctl          = nvgrace_gpu_ioctl,
+       .device_feature = vfio_pci_core_ioctl_feature,
+       .read           = nvgrace_gpu_read,
+       .write          = nvgrace_gpu_write,
+       .mmap           = nvgrace_gpu_mmap,
+       .request        = vfio_pci_core_request,
+       .match          = vfio_pci_core_match,
+       .bind_iommufd   = vfio_iommufd_physical_bind,
+       .unbind_iommufd = vfio_iommufd_physical_unbind,
+       .attach_ioas    = vfio_iommufd_physical_attach_ioas,
+       .detach_ioas    = vfio_iommufd_physical_detach_ioas,
+};
+
+static const struct vfio_device_ops nvgrace_gpu_pci_core_ops = {
+       .name           = "nvgrace-gpu-vfio-pci-core",
+       .init           = vfio_pci_core_init_dev,
+       .release        = vfio_pci_core_release_dev,
+       .open_device    = nvgrace_gpu_open_device,
+       .close_device   = vfio_pci_core_close_device,
+       .ioctl          = vfio_pci_core_ioctl,
+       .device_feature = vfio_pci_core_ioctl_feature,
+       .read           = vfio_pci_core_read,
+       .write          = vfio_pci_core_write,
+       .mmap           = vfio_pci_core_mmap,
+       .request        = vfio_pci_core_request,
+       .match          = vfio_pci_core_match,
+       .bind_iommufd   = vfio_iommufd_physical_bind,
+       .unbind_iommufd = vfio_iommufd_physical_unbind,
+       .attach_ioas    = vfio_iommufd_physical_attach_ioas,
+       .detach_ioas    = vfio_iommufd_physical_detach_ioas,
+};
+
+static int
+nvgrace_gpu_fetch_memory_property(struct pci_dev *pdev,
+                                 u64 *pmemphys, u64 *pmemlength)
+{
+       int ret;
+
+       /*
+        * The memory information is present in the system ACPI tables as DSD
+        * properties nvidia,gpu-mem-base-pa and nvidia,gpu-mem-size.
+        */
+       ret = device_property_read_u64(&pdev->dev, "nvidia,gpu-mem-base-pa",
+                                      pmemphys);
+       if (ret)
+               return ret;
+
+       if (*pmemphys > type_max(phys_addr_t))
+               return -EOVERFLOW;
+
+       ret = device_property_read_u64(&pdev->dev, "nvidia,gpu-mem-size",
+                                      pmemlength);
+       if (ret)
+               return ret;
+
+       if (*pmemlength > type_max(size_t))
+               return -EOVERFLOW;
+
+       /*
+        * If the C2C link is not up due to an error, the coherent device
+        * memory size is returned as 0. Fail in such case.
+        */
+       if (*pmemlength == 0)
+               return -ENOMEM;
+
+       return ret;
+}
+
+static int
+nvgrace_gpu_init_nvdev_struct(struct pci_dev *pdev,
+                             struct nvgrace_gpu_pci_core_device *nvdev,
+                             u64 memphys, u64 memlength)
+{
+       int ret = 0;
+
+       /*
+        * The VM GPU device driver needs a non-cacheable region to support
+        * the MIG feature. Since the device memory is mapped as NORMAL cached,
+        * carve out a region from the end with a different NORMAL_NC
+        * property (called as reserved memory and represented as resmem). This
+        * region then is exposed as a 64b BAR (region 2 and 3) to the VM, while
+        * exposing the rest (termed as usable memory and represented using usemem)
+        * as cacheable 64b BAR (region 4 and 5).
+        *
+        *               devmem (memlength)
+        * |-------------------------------------------------|
+        * |                                           |
+        * usemem.memphys                              resmem.memphys
+        */
+       nvdev->usemem.memphys = memphys;
+
+       /*
+        * The device memory exposed to the VM is added to the kernel by the
+        * VM driver module in chunks of memory block size. Only the usable
+        * memory (usemem) is added to the kernel for usage by the VM
+        * workloads. Make the usable memory size memblock aligned.
+        */
+       if (check_sub_overflow(memlength, RESMEM_SIZE,
+                              &nvdev->usemem.memlength)) {
+               ret = -EOVERFLOW;
+               goto done;
+       }
+
+       /*
+        * The USEMEM part of the device memory has to be MEMBLK_SIZE
+        * aligned. This is a hardwired ABI value between the GPU FW and
+        * VFIO driver. The VM device driver is also aware of it and make
+        * use of the value for its calculation to determine USEMEM size.
+        */
+       nvdev->usemem.memlength = round_down(nvdev->usemem.memlength,
+                                            MEMBLK_SIZE);
+       if (nvdev->usemem.memlength == 0) {
+               ret = -EINVAL;
+               goto done;
+       }
+
+       if ((check_add_overflow(nvdev->usemem.memphys,
+                               nvdev->usemem.memlength,
+                               &nvdev->resmem.memphys)) ||
+           (check_sub_overflow(memlength, nvdev->usemem.memlength,
+                               &nvdev->resmem.memlength))) {
+               ret = -EOVERFLOW;
+               goto done;
+       }
+
+       /*
+        * The memory regions are exposed as BARs. Calculate and save
+        * the BAR size for them.
+        */
+       nvdev->usemem.bar_size = roundup_pow_of_two(nvdev->usemem.memlength);
+       nvdev->resmem.bar_size = roundup_pow_of_two(nvdev->resmem.memlength);
+done:
+       return ret;
+}
+
+static int nvgrace_gpu_probe(struct pci_dev *pdev,
+                            const struct pci_device_id *id)
+{
+       const struct vfio_device_ops *ops = &nvgrace_gpu_pci_core_ops;
+       struct nvgrace_gpu_pci_core_device *nvdev;
+       u64 memphys, memlength;
+       int ret;
+
+       ret = nvgrace_gpu_fetch_memory_property(pdev, &memphys, &memlength);
+       if (!ret)
+               ops = &nvgrace_gpu_pci_ops;
+
+       nvdev = vfio_alloc_device(nvgrace_gpu_pci_core_device, core_device.vdev,
+                                 &pdev->dev, ops);
+       if (IS_ERR(nvdev))
+               return PTR_ERR(nvdev);
+
+       dev_set_drvdata(&pdev->dev, &nvdev->core_device);
+
+       if (ops == &nvgrace_gpu_pci_ops) {
+               /*
+                * Device memory properties are identified in the host ACPI
+                * table. Set the nvgrace_gpu_pci_core_device structure.
+                */
+               ret = nvgrace_gpu_init_nvdev_struct(pdev, nvdev,
+                                                   memphys, memlength);
+               if (ret)
+                       goto out_put_vdev;
+       }
+
+       ret = vfio_pci_core_register_device(&nvdev->core_device);
+       if (ret)
+               goto out_put_vdev;
+
+       return ret;
+
+out_put_vdev:
+       vfio_put_device(&nvdev->core_device.vdev);
+       return ret;
+}
+
+static void nvgrace_gpu_remove(struct pci_dev *pdev)
+{
+       struct vfio_pci_core_device *core_device = dev_get_drvdata(&pdev->dev);
+
+       vfio_pci_core_unregister_device(core_device);
+       vfio_put_device(&core_device->vdev);
+}
+
+static const struct pci_device_id nvgrace_gpu_vfio_pci_table[] = {
+       /* GH200 120GB */
+       { PCI_DRIVER_OVERRIDE_DEVICE_VFIO(PCI_VENDOR_ID_NVIDIA, 0x2342) },
+       /* GH200 480GB */
+       { PCI_DRIVER_OVERRIDE_DEVICE_VFIO(PCI_VENDOR_ID_NVIDIA, 0x2345) },
+       {}
+};
+
+MODULE_DEVICE_TABLE(pci, nvgrace_gpu_vfio_pci_table);
+
+static struct pci_driver nvgrace_gpu_vfio_pci_driver = {
+       .name = KBUILD_MODNAME,
+       .id_table = nvgrace_gpu_vfio_pci_table,
+       .probe = nvgrace_gpu_probe,
+       .remove = nvgrace_gpu_remove,
+       .err_handler = &vfio_pci_core_err_handlers,
+       .driver_managed_dma = true,
+};
+
+module_pci_driver(nvgrace_gpu_vfio_pci_driver);
+
+MODULE_LICENSE("GPL");
+MODULE_AUTHOR("Ankit Agrawal <ankita@nvidia.com>");
+MODULE_AUTHOR("Aniket Agashe <aniketa@nvidia.com>");
+MODULE_DESCRIPTION("VFIO NVGRACE GPU PF - User Level driver for NVIDIA devices with CPU coherently accessible device memory");
index 8ddf4346fcd5d153ad24b7377edd7f412be28b47..68e8f006dfdbf7faf3790c8b9324055f98f00544 100644 (file)
@@ -607,7 +607,7 @@ int pds_vfio_dma_logging_report(struct vfio_device *vdev, unsigned long iova,
 
        mutex_lock(&pds_vfio->state_mutex);
        err = pds_vfio_dirty_sync(pds_vfio, dirty, iova, length);
-       pds_vfio_state_mutex_unlock(pds_vfio);
+       mutex_unlock(&pds_vfio->state_mutex);
 
        return err;
 }
@@ -624,7 +624,7 @@ int pds_vfio_dma_logging_start(struct vfio_device *vdev,
        mutex_lock(&pds_vfio->state_mutex);
        pds_vfio_send_host_vf_lm_status_cmd(pds_vfio, PDS_LM_STA_IN_PROGRESS);
        err = pds_vfio_dirty_enable(pds_vfio, ranges, nnodes, page_size);
-       pds_vfio_state_mutex_unlock(pds_vfio);
+       mutex_unlock(&pds_vfio->state_mutex);
 
        return err;
 }
@@ -637,7 +637,7 @@ int pds_vfio_dma_logging_stop(struct vfio_device *vdev)
 
        mutex_lock(&pds_vfio->state_mutex);
        pds_vfio_dirty_disable(pds_vfio, true);
-       pds_vfio_state_mutex_unlock(pds_vfio);
+       mutex_unlock(&pds_vfio->state_mutex);
 
        return 0;
 }
index 79fe2e66bb498682e46435df5dbc996fe46899b3..6b94cc0bf45b44552dcfa51453724e9a18a42b9a 100644 (file)
@@ -92,8 +92,10 @@ static void pds_vfio_put_lm_file(struct pds_vfio_lm_file *lm_file)
 {
        mutex_lock(&lm_file->lock);
 
+       lm_file->disabled = true;
        lm_file->size = 0;
        lm_file->alloc_size = 0;
+       lm_file->filep->f_pos = 0;
 
        /* Free scatter list of file pages */
        sg_free_table(&lm_file->sg_table);
@@ -183,6 +185,12 @@ static ssize_t pds_vfio_save_read(struct file *filp, char __user *buf,
        pos = &filp->f_pos;
 
        mutex_lock(&lm_file->lock);
+
+       if (lm_file->disabled) {
+               done = -ENODEV;
+               goto out_unlock;
+       }
+
        if (*pos > lm_file->size) {
                done = -EINVAL;
                goto out_unlock;
@@ -283,6 +291,11 @@ static ssize_t pds_vfio_restore_write(struct file *filp, const char __user *buf,
 
        mutex_lock(&lm_file->lock);
 
+       if (lm_file->disabled) {
+               done = -ENODEV;
+               goto out_unlock;
+       }
+
        while (len) {
                size_t page_offset;
                struct page *page;
index 13be893198b743d1fdbf6c8f83ea04c5ad4f1511..9511b1afc6a112abf4ff20d7c1d0d83365015a06 100644 (file)
@@ -27,6 +27,7 @@ struct pds_vfio_lm_file {
        struct scatterlist *last_offset_sg;     /* Iterator */
        unsigned int sg_last_entry;
        unsigned long last_offset;
+       bool disabled;
 };
 
 struct pds_vfio_pci_device;
index a34dda5166293583337372fc0059129c726a9f45..16e93b11ab1b0ef9a3b8e07ac4e838397b26e5ea 100644 (file)
 
 static void pds_vfio_recovery(struct pds_vfio_pci_device *pds_vfio)
 {
-       bool deferred_reset_needed = false;
-
        /*
         * Documentation states that the kernel migration driver must not
         * generate asynchronous device state transitions outside of
         * manipulation by the user or the VFIO_DEVICE_RESET ioctl.
         *
         * Since recovery is an asynchronous event received from the device,
-        * initiate a deferred reset. Issue a deferred reset in the following
-        * situations:
+        * initiate a reset in the following situations:
         *   1. Migration is in progress, which will cause the next step of
         *      the migration to fail.
         *   2. If the device is in a state that will be set to
@@ -42,24 +39,8 @@ static void pds_vfio_recovery(struct pds_vfio_pci_device *pds_vfio)
             pds_vfio->state != VFIO_DEVICE_STATE_ERROR) ||
            (pds_vfio->state == VFIO_DEVICE_STATE_RUNNING &&
             pds_vfio_dirty_is_enabled(pds_vfio)))
-               deferred_reset_needed = true;
+               pds_vfio_reset(pds_vfio, VFIO_DEVICE_STATE_ERROR);
        mutex_unlock(&pds_vfio->state_mutex);
-
-       /*
-        * On the next user initiated state transition, the device will
-        * transition to the VFIO_DEVICE_STATE_ERROR. At this point it's the user's
-        * responsibility to reset the device.
-        *
-        * If a VFIO_DEVICE_RESET is requested post recovery and before the next
-        * state transition, then the deferred reset state will be set to
-        * VFIO_DEVICE_STATE_RUNNING.
-        */
-       if (deferred_reset_needed) {
-               mutex_lock(&pds_vfio->reset_mutex);
-               pds_vfio->deferred_reset = true;
-               pds_vfio->deferred_reset_state = VFIO_DEVICE_STATE_ERROR;
-               mutex_unlock(&pds_vfio->reset_mutex);
-       }
 }
 
 static int pds_vfio_pci_notify_handler(struct notifier_block *nb,
@@ -185,7 +166,9 @@ static void pds_vfio_pci_aer_reset_done(struct pci_dev *pdev)
 {
        struct pds_vfio_pci_device *pds_vfio = pds_vfio_pci_drvdata(pdev);
 
-       pds_vfio_reset(pds_vfio);
+       mutex_lock(&pds_vfio->state_mutex);
+       pds_vfio_reset(pds_vfio, VFIO_DEVICE_STATE_RUNNING);
+       mutex_unlock(&pds_vfio->state_mutex);
 }
 
 static const struct pci_error_handlers pds_vfio_pci_err_handlers = {
index 4c351c59d05a939097d969b7e3da69325dc839b0..76a80ae7087b514216a78ec93462fd0bc4311afd 100644 (file)
@@ -26,37 +26,14 @@ struct pds_vfio_pci_device *pds_vfio_pci_drvdata(struct pci_dev *pdev)
                            vfio_coredev);
 }
 
-void pds_vfio_state_mutex_unlock(struct pds_vfio_pci_device *pds_vfio)
+void pds_vfio_reset(struct pds_vfio_pci_device *pds_vfio,
+                   enum vfio_device_mig_state state)
 {
-again:
-       mutex_lock(&pds_vfio->reset_mutex);
-       if (pds_vfio->deferred_reset) {
-               pds_vfio->deferred_reset = false;
-               if (pds_vfio->state == VFIO_DEVICE_STATE_ERROR) {
-                       pds_vfio_put_restore_file(pds_vfio);
-                       pds_vfio_put_save_file(pds_vfio);
-                       pds_vfio_dirty_disable(pds_vfio, false);
-               }
-               pds_vfio->state = pds_vfio->deferred_reset_state;
-               pds_vfio->deferred_reset_state = VFIO_DEVICE_STATE_RUNNING;
-               mutex_unlock(&pds_vfio->reset_mutex);
-               goto again;
-       }
-       mutex_unlock(&pds_vfio->state_mutex);
-       mutex_unlock(&pds_vfio->reset_mutex);
-}
-
-void pds_vfio_reset(struct pds_vfio_pci_device *pds_vfio)
-{
-       mutex_lock(&pds_vfio->reset_mutex);
-       pds_vfio->deferred_reset = true;
-       pds_vfio->deferred_reset_state = VFIO_DEVICE_STATE_RUNNING;
-       if (!mutex_trylock(&pds_vfio->state_mutex)) {
-               mutex_unlock(&pds_vfio->reset_mutex);
-               return;
-       }
-       mutex_unlock(&pds_vfio->reset_mutex);
-       pds_vfio_state_mutex_unlock(pds_vfio);
+       pds_vfio_put_restore_file(pds_vfio);
+       pds_vfio_put_save_file(pds_vfio);
+       if (state == VFIO_DEVICE_STATE_ERROR)
+               pds_vfio_dirty_disable(pds_vfio, false);
+       pds_vfio->state = state;
 }
 
 static struct file *
@@ -97,8 +74,7 @@ pds_vfio_set_device_state(struct vfio_device *vdev,
                        break;
                }
        }
-       pds_vfio_state_mutex_unlock(pds_vfio);
-       /* still waiting on a deferred_reset */
+       mutex_unlock(&pds_vfio->state_mutex);
        if (pds_vfio->state == VFIO_DEVICE_STATE_ERROR)
                res = ERR_PTR(-EIO);
 
@@ -114,7 +90,7 @@ static int pds_vfio_get_device_state(struct vfio_device *vdev,
 
        mutex_lock(&pds_vfio->state_mutex);
        *current_state = pds_vfio->state;
-       pds_vfio_state_mutex_unlock(pds_vfio);
+       mutex_unlock(&pds_vfio->state_mutex);
        return 0;
 }
 
@@ -156,7 +132,6 @@ static int pds_vfio_init_device(struct vfio_device *vdev)
        pds_vfio->vf_id = vf_id;
 
        mutex_init(&pds_vfio->state_mutex);
-       mutex_init(&pds_vfio->reset_mutex);
 
        vdev->migration_flags = VFIO_MIGRATION_STOP_COPY | VFIO_MIGRATION_P2P;
        vdev->mig_ops = &pds_vfio_lm_ops;
@@ -178,7 +153,6 @@ static void pds_vfio_release_device(struct vfio_device *vdev)
                             vfio_coredev.vdev);
 
        mutex_destroy(&pds_vfio->state_mutex);
-       mutex_destroy(&pds_vfio->reset_mutex);
        vfio_pci_core_release_dev(vdev);
 }
 
@@ -194,7 +168,6 @@ static int pds_vfio_open_device(struct vfio_device *vdev)
                return err;
 
        pds_vfio->state = VFIO_DEVICE_STATE_RUNNING;
-       pds_vfio->deferred_reset_state = VFIO_DEVICE_STATE_RUNNING;
 
        vfio_pci_core_finish_enable(&pds_vfio->vfio_coredev);
 
index e7b01080a1ec3acf255c0a3ee7a330cf40ad3b9f..803d99d69c738ea4697ad5c6782b5e948ff4b6e3 100644 (file)
@@ -18,20 +18,16 @@ struct pds_vfio_pci_device {
        struct pds_vfio_dirty dirty;
        struct mutex state_mutex; /* protect migration state */
        enum vfio_device_mig_state state;
-       struct mutex reset_mutex; /* protect reset_done flow */
-       u8 deferred_reset;
-       enum vfio_device_mig_state deferred_reset_state;
        struct notifier_block nb;
 
        int vf_id;
        u16 client_id;
 };
 
-void pds_vfio_state_mutex_unlock(struct pds_vfio_pci_device *pds_vfio);
-
 const struct vfio_device_ops *pds_vfio_ops_info(void);
 struct pds_vfio_pci_device *pds_vfio_pci_drvdata(struct pci_dev *pdev);
-void pds_vfio_reset(struct pds_vfio_pci_device *pds_vfio);
+void pds_vfio_reset(struct pds_vfio_pci_device *pds_vfio,
+                   enum vfio_device_mig_state state);
 
 struct pci_dev *pds_vfio_to_pci_dev(struct pds_vfio_pci_device *pds_vfio);
 struct device *pds_vfio_to_dev(struct pds_vfio_pci_device *pds_vfio);
index 7e2e62ab0869cfb42ae786f9c41b343b42209de7..97422aafaa7b5df6803da52fb2acee4c5e3ada57 100644 (file)
@@ -1966,3 +1966,45 @@ ssize_t vfio_pci_config_rw(struct vfio_pci_core_device *vdev, char __user *buf,
 
        return done;
 }
+
+/**
+ * vfio_pci_core_range_intersect_range() - Determine overlap between a buffer
+ *                                        and register offset ranges.
+ * @buf_start:         start offset of the buffer
+ * @buf_cnt:           number of buffer bytes
+ * @reg_start:         start register offset
+ * @reg_cnt:           number of register bytes
+ * @buf_offset:        start offset of overlap in the buffer
+ * @intersect_count:   number of overlapping bytes
+ * @register_offset:   start offset of overlap in register
+ *
+ * Returns: true if there is overlap, false if not.
+ * The overlap start and size is returned through function args.
+ */
+bool vfio_pci_core_range_intersect_range(loff_t buf_start, size_t buf_cnt,
+                                        loff_t reg_start, size_t reg_cnt,
+                                        loff_t *buf_offset,
+                                        size_t *intersect_count,
+                                        size_t *register_offset)
+{
+       if (buf_start <= reg_start &&
+           buf_start + buf_cnt > reg_start) {
+               *buf_offset = reg_start - buf_start;
+               *intersect_count = min_t(size_t, reg_cnt,
+                                        buf_start + buf_cnt - reg_start);
+               *register_offset = 0;
+               return true;
+       }
+
+       if (buf_start > reg_start &&
+           buf_start < reg_start + reg_cnt) {
+               *buf_offset = 0;
+               *intersect_count = min_t(size_t, buf_cnt,
+                                        reg_start + reg_cnt - buf_start);
+               *register_offset = buf_start - reg_start;
+               return true;
+       }
+
+       return false;
+}
+EXPORT_SYMBOL_GPL(vfio_pci_core_range_intersect_range);
index 1cbc990d42e07cf41904dc726f83c08c72922b77..d94d61b92c1ac1cf9cd84c76f2e9bcb714f39d08 100644 (file)
@@ -1862,8 +1862,25 @@ int vfio_pci_core_mmap(struct vfio_device *core_vdev, struct vm_area_struct *vma
        /*
         * See remap_pfn_range(), called from vfio_pci_fault() but we can't
         * change vm_flags within the fault handler.  Set them now.
+        *
+        * VM_ALLOW_ANY_UNCACHED: The VMA flag is implemented for ARM64,
+        * allowing KVM stage 2 device mapping attributes to use Normal-NC
+        * rather than DEVICE_nGnRE, which allows guest mappings
+        * supporting write-combining attributes (WC). ARM does not
+        * architecturally guarantee this is safe, and indeed some MMIO
+        * regions like the GICv2 VCPU interface can trigger uncontained
+        * faults if Normal-NC is used.
+        *
+        * To safely use VFIO in KVM the platform must guarantee full
+        * safety in the guest where no action taken against a MMIO
+        * mapping can trigger an uncontained failure. The assumption is
+        * that most VFIO PCI platforms support this for both mapping types,
+        * at least in common flows, based on some expectations of how
+        * PCI IP is integrated. Hence VM_ALLOW_ANY_UNCACHED is set in
+        * the VMA flags.
         */
-       vm_flags_set(vma, VM_IO | VM_PFNMAP | VM_DONTEXPAND | VM_DONTDUMP);
+       vm_flags_set(vma, VM_ALLOW_ANY_UNCACHED | VM_IO | VM_PFNMAP |
+                       VM_DONTEXPAND | VM_DONTDUMP);
        vma->vm_ops = &vfio_pci_mmap_ops;
 
        return 0;
@@ -2047,6 +2064,7 @@ static int vfio_pci_bus_notifier(struct notifier_block *nb,
                         pci_name(pdev));
                pdev->driver_override = kasprintf(GFP_KERNEL, "%s",
                                                  vdev->vdev.ops->name);
+               WARN_ON(!pdev->driver_override);
        } else if (action == BUS_NOTIFY_BOUND_DRIVER &&
                   pdev->is_virtfn && physfn == vdev->pdev) {
                struct pci_driver *drv = pci_dev_driver(pdev);
index 237beac8380975110231503e0d30f331ba64e2d5..fb5392b749fff07d248e4eb3afa8392f073366e4 100644 (file)
@@ -90,22 +90,28 @@ static void vfio_send_intx_eventfd(void *opaque, void *unused)
 
        if (likely(is_intx(vdev) && !vdev->virq_disabled)) {
                struct vfio_pci_irq_ctx *ctx;
+               struct eventfd_ctx *trigger;
 
                ctx = vfio_irq_ctx_get(vdev, 0);
                if (WARN_ON_ONCE(!ctx))
                        return;
-               eventfd_signal(ctx->trigger);
+
+               trigger = READ_ONCE(ctx->trigger);
+               if (likely(trigger))
+                       eventfd_signal(trigger);
        }
 }
 
 /* Returns true if the INTx vfio_pci_irq_ctx.masked value is changed. */
-bool vfio_pci_intx_mask(struct vfio_pci_core_device *vdev)
+static bool __vfio_pci_intx_mask(struct vfio_pci_core_device *vdev)
 {
        struct pci_dev *pdev = vdev->pdev;
        struct vfio_pci_irq_ctx *ctx;
        unsigned long flags;
        bool masked_changed = false;
 
+       lockdep_assert_held(&vdev->igate);
+
        spin_lock_irqsave(&vdev->irqlock, flags);
 
        /*
@@ -143,6 +149,17 @@ out_unlock:
        return masked_changed;
 }
 
+bool vfio_pci_intx_mask(struct vfio_pci_core_device *vdev)
+{
+       bool mask_changed;
+
+       mutex_lock(&vdev->igate);
+       mask_changed = __vfio_pci_intx_mask(vdev);
+       mutex_unlock(&vdev->igate);
+
+       return mask_changed;
+}
+
 /*
  * If this is triggered by an eventfd, we can't call eventfd_signal
  * or else we'll deadlock on the eventfd wait queue.  Return >0 when
@@ -194,12 +211,21 @@ out_unlock:
        return ret;
 }
 
-void vfio_pci_intx_unmask(struct vfio_pci_core_device *vdev)
+static void __vfio_pci_intx_unmask(struct vfio_pci_core_device *vdev)
 {
+       lockdep_assert_held(&vdev->igate);
+
        if (vfio_pci_intx_unmask_handler(vdev, NULL) > 0)
                vfio_send_intx_eventfd(vdev, NULL);
 }
 
+void vfio_pci_intx_unmask(struct vfio_pci_core_device *vdev)
+{
+       mutex_lock(&vdev->igate);
+       __vfio_pci_intx_unmask(vdev);
+       mutex_unlock(&vdev->igate);
+}
+
 static irqreturn_t vfio_intx_handler(int irq, void *dev_id)
 {
        struct vfio_pci_core_device *vdev = dev_id;
@@ -231,97 +257,100 @@ static irqreturn_t vfio_intx_handler(int irq, void *dev_id)
        return ret;
 }
 
-static int vfio_intx_enable(struct vfio_pci_core_device *vdev)
+static int vfio_intx_enable(struct vfio_pci_core_device *vdev,
+                           struct eventfd_ctx *trigger)
 {
+       struct pci_dev *pdev = vdev->pdev;
        struct vfio_pci_irq_ctx *ctx;
+       unsigned long irqflags;
+       char *name;
+       int ret;
 
        if (!is_irq_none(vdev))
                return -EINVAL;
 
-       if (!vdev->pdev->irq)
+       if (!pdev->irq)
                return -ENODEV;
 
+       name = kasprintf(GFP_KERNEL_ACCOUNT, "vfio-intx(%s)", pci_name(pdev));
+       if (!name)
+               return -ENOMEM;
+
        ctx = vfio_irq_ctx_alloc(vdev, 0);
        if (!ctx)
                return -ENOMEM;
 
+       ctx->name = name;
+       ctx->trigger = trigger;
+
        /*
-        * If the virtual interrupt is masked, restore it.  Devices
-        * supporting DisINTx can be masked at the hardware level
-        * here, non-PCI-2.3 devices will have to wait until the
-        * interrupt is enabled.
+        * Fill the initial masked state based on virq_disabled.  After
+        * enable, changing the DisINTx bit in vconfig directly changes INTx
+        * masking.  igate prevents races during setup, once running masked
+        * is protected via irqlock.
+        *
+        * Devices supporting DisINTx also reflect the current mask state in
+        * the physical DisINTx bit, which is not affected during IRQ setup.
+        *
+        * Devices without DisINTx support require an exclusive interrupt.
+        * IRQ masking is performed at the IRQ chip.  Again, igate protects
+        * against races during setup and IRQ handlers and irqfds are not
+        * yet active, therefore masked is stable and can be used to
+        * conditionally auto-enable the IRQ.
+        *
+        * irq_type must be stable while the IRQ handler is registered,
+        * therefore it must be set before request_irq().
         */
        ctx->masked = vdev->virq_disabled;
-       if (vdev->pci_2_3)
-               pci_intx(vdev->pdev, !ctx->masked);
+       if (vdev->pci_2_3) {
+               pci_intx(pdev, !ctx->masked);
+               irqflags = IRQF_SHARED;
+       } else {
+               irqflags = ctx->masked ? IRQF_NO_AUTOEN : 0;
+       }
 
        vdev->irq_type = VFIO_PCI_INTX_IRQ_INDEX;
 
+       ret = request_irq(pdev->irq, vfio_intx_handler,
+                         irqflags, ctx->name, vdev);
+       if (ret) {
+               vdev->irq_type = VFIO_PCI_NUM_IRQS;
+               kfree(name);
+               vfio_irq_ctx_free(vdev, ctx, 0);
+               return ret;
+       }
+
        return 0;
 }
 
-static int vfio_intx_set_signal(struct vfio_pci_core_device *vdev, int fd)
+static int vfio_intx_set_signal(struct vfio_pci_core_device *vdev,
+                               struct eventfd_ctx *trigger)
 {
        struct pci_dev *pdev = vdev->pdev;
-       unsigned long irqflags = IRQF_SHARED;
        struct vfio_pci_irq_ctx *ctx;
-       struct eventfd_ctx *trigger;
-       unsigned long flags;
-       int ret;
+       struct eventfd_ctx *old;
 
        ctx = vfio_irq_ctx_get(vdev, 0);
        if (WARN_ON_ONCE(!ctx))
                return -EINVAL;
 
-       if (ctx->trigger) {
-               free_irq(pdev->irq, vdev);
-               kfree(ctx->name);
-               eventfd_ctx_put(ctx->trigger);
-               ctx->trigger = NULL;
-       }
-
-       if (fd < 0) /* Disable only */
-               return 0;
+       old = ctx->trigger;
 
-       ctx->name = kasprintf(GFP_KERNEL_ACCOUNT, "vfio-intx(%s)",
-                             pci_name(pdev));
-       if (!ctx->name)
-               return -ENOMEM;
+       WRITE_ONCE(ctx->trigger, trigger);
 
-       trigger = eventfd_ctx_fdget(fd);
-       if (IS_ERR(trigger)) {
-               kfree(ctx->name);
-               return PTR_ERR(trigger);
+       /* Releasing an old ctx requires synchronizing in-flight users */
+       if (old) {
+               synchronize_irq(pdev->irq);
+               vfio_virqfd_flush_thread(&ctx->unmask);
+               eventfd_ctx_put(old);
        }
 
-       ctx->trigger = trigger;
-
-       if (!vdev->pci_2_3)
-               irqflags = 0;
-
-       ret = request_irq(pdev->irq, vfio_intx_handler,
-                         irqflags, ctx->name, vdev);
-       if (ret) {
-               ctx->trigger = NULL;
-               kfree(ctx->name);
-               eventfd_ctx_put(trigger);
-               return ret;
-       }
-
-       /*
-        * INTx disable will stick across the new irq setup,
-        * disable_irq won't.
-        */
-       spin_lock_irqsave(&vdev->irqlock, flags);
-       if (!vdev->pci_2_3 && ctx->masked)
-               disable_irq_nosync(pdev->irq);
-       spin_unlock_irqrestore(&vdev->irqlock, flags);
-
        return 0;
 }
 
 static void vfio_intx_disable(struct vfio_pci_core_device *vdev)
 {
+       struct pci_dev *pdev = vdev->pdev;
        struct vfio_pci_irq_ctx *ctx;
 
        ctx = vfio_irq_ctx_get(vdev, 0);
@@ -329,10 +358,13 @@ static void vfio_intx_disable(struct vfio_pci_core_device *vdev)
        if (ctx) {
                vfio_virqfd_disable(&ctx->unmask);
                vfio_virqfd_disable(&ctx->mask);
+               free_irq(pdev->irq, vdev);
+               if (ctx->trigger)
+                       eventfd_ctx_put(ctx->trigger);
+               kfree(ctx->name);
+               vfio_irq_ctx_free(vdev, ctx, 0);
        }
-       vfio_intx_set_signal(vdev, -1);
        vdev->irq_type = VFIO_PCI_NUM_IRQS;
-       vfio_irq_ctx_free(vdev, ctx, 0);
 }
 
 /*
@@ -560,11 +592,11 @@ static int vfio_pci_set_intx_unmask(struct vfio_pci_core_device *vdev,
                return -EINVAL;
 
        if (flags & VFIO_IRQ_SET_DATA_NONE) {
-               vfio_pci_intx_unmask(vdev);
+               __vfio_pci_intx_unmask(vdev);
        } else if (flags & VFIO_IRQ_SET_DATA_BOOL) {
                uint8_t unmask = *(uint8_t *)data;
                if (unmask)
-                       vfio_pci_intx_unmask(vdev);
+                       __vfio_pci_intx_unmask(vdev);
        } else if (flags & VFIO_IRQ_SET_DATA_EVENTFD) {
                struct vfio_pci_irq_ctx *ctx = vfio_irq_ctx_get(vdev, 0);
                int32_t fd = *(int32_t *)data;
@@ -591,11 +623,11 @@ static int vfio_pci_set_intx_mask(struct vfio_pci_core_device *vdev,
                return -EINVAL;
 
        if (flags & VFIO_IRQ_SET_DATA_NONE) {
-               vfio_pci_intx_mask(vdev);
+               __vfio_pci_intx_mask(vdev);
        } else if (flags & VFIO_IRQ_SET_DATA_BOOL) {
                uint8_t mask = *(uint8_t *)data;
                if (mask)
-                       vfio_pci_intx_mask(vdev);
+                       __vfio_pci_intx_mask(vdev);
        } else if (flags & VFIO_IRQ_SET_DATA_EVENTFD) {
                return -ENOTTY; /* XXX implement me */
        }
@@ -616,19 +648,23 @@ static int vfio_pci_set_intx_trigger(struct vfio_pci_core_device *vdev,
                return -EINVAL;
 
        if (flags & VFIO_IRQ_SET_DATA_EVENTFD) {
+               struct eventfd_ctx *trigger = NULL;
                int32_t fd = *(int32_t *)data;
                int ret;
 
-               if (is_intx(vdev))
-                       return vfio_intx_set_signal(vdev, fd);
+               if (fd >= 0) {
+                       trigger = eventfd_ctx_fdget(fd);
+                       if (IS_ERR(trigger))
+                               return PTR_ERR(trigger);
+               }
 
-               ret = vfio_intx_enable(vdev);
-               if (ret)
-                       return ret;
+               if (is_intx(vdev))
+                       ret = vfio_intx_set_signal(vdev, trigger);
+               else
+                       ret = vfio_intx_enable(vdev, trigger);
 
-               ret = vfio_intx_set_signal(vdev, fd);
-               if (ret)
-                       vfio_intx_disable(vdev);
+               if (ret && trigger)
+                       eventfd_ctx_put(trigger);
 
                return ret;
        }
index 07fea08ea8a21340cacef113b238adea5dc8b59d..03b8f7ada1ac24d89956128fb7db2fdcd7f91588 100644 (file)
@@ -96,10 +96,10 @@ VFIO_IOREAD(32)
  * reads with -1.  This is intended for handling MSI-X vector tables and
  * leftover space for ROM BARs.
  */
-static ssize_t do_io_rw(struct vfio_pci_core_device *vdev, bool test_mem,
-                       void __iomem *io, char __user *buf,
-                       loff_t off, size_t count, size_t x_start,
-                       size_t x_end, bool iswrite)
+ssize_t vfio_pci_core_do_io_rw(struct vfio_pci_core_device *vdev, bool test_mem,
+                              void __iomem *io, char __user *buf,
+                              loff_t off, size_t count, size_t x_start,
+                              size_t x_end, bool iswrite)
 {
        ssize_t done = 0;
        int ret;
@@ -201,6 +201,7 @@ static ssize_t do_io_rw(struct vfio_pci_core_device *vdev, bool test_mem,
 
        return done;
 }
+EXPORT_SYMBOL_GPL(vfio_pci_core_do_io_rw);
 
 int vfio_pci_core_setup_barmap(struct vfio_pci_core_device *vdev, int bar)
 {
@@ -279,8 +280,8 @@ ssize_t vfio_pci_bar_rw(struct vfio_pci_core_device *vdev, char __user *buf,
                x_end = vdev->msix_offset + vdev->msix_size;
        }
 
-       done = do_io_rw(vdev, res->flags & IORESOURCE_MEM, io, buf, pos,
-                       count, x_start, x_end, iswrite);
+       done = vfio_pci_core_do_io_rw(vdev, res->flags & IORESOURCE_MEM, io, buf, pos,
+                                     count, x_start, x_end, iswrite);
 
        if (done >= 0)
                *ppos += done;
@@ -348,7 +349,8 @@ ssize_t vfio_pci_vga_rw(struct vfio_pci_core_device *vdev, char __user *buf,
         * probing, so we don't currently worry about access in relation
         * to the memory enable bit in the command register.
         */
-       done = do_io_rw(vdev, false, iomem, buf, off, count, 0, 0, iswrite);
+       done = vfio_pci_core_do_io_rw(vdev, false, iomem, buf, off, count,
+                                     0, 0, iswrite);
 
        vga_put(vdev->pdev, rsrc);
 
index d5af683837d345eaf7afcb860e3f7774379f9474..b5d3a8c5bbc9aa22ea9f566d4ce441eded0170d7 100644 (file)
@@ -132,33 +132,6 @@ end:
        return ret ? ret : count;
 }
 
-static bool range_intersect_range(loff_t range1_start, size_t count1,
-                                 loff_t range2_start, size_t count2,
-                                 loff_t *start_offset,
-                                 size_t *intersect_count,
-                                 size_t *register_offset)
-{
-       if (range1_start <= range2_start &&
-           range1_start + count1 > range2_start) {
-               *start_offset = range2_start - range1_start;
-               *intersect_count = min_t(size_t, count2,
-                                        range1_start + count1 - range2_start);
-               *register_offset = 0;
-               return true;
-       }
-
-       if (range1_start > range2_start &&
-           range1_start < range2_start + count2) {
-               *start_offset = 0;
-               *intersect_count = min_t(size_t, count1,
-                                        range2_start + count2 - range1_start);
-               *register_offset = range1_start - range2_start;
-               return true;
-       }
-
-       return false;
-}
-
 static ssize_t virtiovf_pci_read_config(struct vfio_device *core_vdev,
                                        char __user *buf, size_t count,
                                        loff_t *ppos)
@@ -178,16 +151,18 @@ static ssize_t virtiovf_pci_read_config(struct vfio_device *core_vdev,
        if (ret < 0)
                return ret;
 
-       if (range_intersect_range(pos, count, PCI_DEVICE_ID, sizeof(val16),
-                                 &copy_offset, &copy_count, &register_offset)) {
+       if (vfio_pci_core_range_intersect_range(pos, count, PCI_DEVICE_ID,
+                                               sizeof(val16), &copy_offset,
+                                               &copy_count, &register_offset)) {
                val16 = cpu_to_le16(VIRTIO_TRANS_ID_NET);
                if (copy_to_user(buf + copy_offset, (void *)&val16 + register_offset, copy_count))
                        return -EFAULT;
        }
 
        if ((le16_to_cpu(virtvdev->pci_cmd) & PCI_COMMAND_IO) &&
-           range_intersect_range(pos, count, PCI_COMMAND, sizeof(val16),
-                                 &copy_offset, &copy_count, &register_offset)) {
+           vfio_pci_core_range_intersect_range(pos, count, PCI_COMMAND,
+                                               sizeof(val16), &copy_offset,
+                                               &copy_count, &register_offset)) {
                if (copy_from_user((void *)&val16 + register_offset, buf + copy_offset,
                                   copy_count))
                        return -EFAULT;
@@ -197,16 +172,18 @@ static ssize_t virtiovf_pci_read_config(struct vfio_device *core_vdev,
                        return -EFAULT;
        }
 
-       if (range_intersect_range(pos, count, PCI_REVISION_ID, sizeof(val8),
-                                 &copy_offset, &copy_count, &register_offset)) {
+       if (vfio_pci_core_range_intersect_range(pos, count, PCI_REVISION_ID,
+                                               sizeof(val8), &copy_offset,
+                                               &copy_count, &register_offset)) {
                /* Transional needs to have revision 0 */
                val8 = 0;
                if (copy_to_user(buf + copy_offset, &val8, copy_count))
                        return -EFAULT;
        }
 
-       if (range_intersect_range(pos, count, PCI_BASE_ADDRESS_0, sizeof(val32),
-                                 &copy_offset, &copy_count, &register_offset)) {
+       if (vfio_pci_core_range_intersect_range(pos, count, PCI_BASE_ADDRESS_0,
+                                               sizeof(val32), &copy_offset,
+                                               &copy_count, &register_offset)) {
                u32 bar_mask = ~(virtvdev->bar0_virtual_buf_size - 1);
                u32 pci_base_addr_0 = le32_to_cpu(virtvdev->pci_base_addr_0);
 
@@ -215,8 +192,9 @@ static ssize_t virtiovf_pci_read_config(struct vfio_device *core_vdev,
                        return -EFAULT;
        }
 
-       if (range_intersect_range(pos, count, PCI_SUBSYSTEM_ID, sizeof(val16),
-                                 &copy_offset, &copy_count, &register_offset)) {
+       if (vfio_pci_core_range_intersect_range(pos, count, PCI_SUBSYSTEM_ID,
+                                               sizeof(val16), &copy_offset,
+                                               &copy_count, &register_offset)) {
                /*
                 * Transitional devices use the PCI subsystem device id as
                 * virtio device id, same as legacy driver always did.
@@ -227,8 +205,9 @@ static ssize_t virtiovf_pci_read_config(struct vfio_device *core_vdev,
                        return -EFAULT;
        }
 
-       if (range_intersect_range(pos, count, PCI_SUBSYSTEM_VENDOR_ID, sizeof(val16),
-                                 &copy_offset, &copy_count, &register_offset)) {
+       if (vfio_pci_core_range_intersect_range(pos, count, PCI_SUBSYSTEM_VENDOR_ID,
+                                               sizeof(val16), &copy_offset,
+                                               &copy_count, &register_offset)) {
                val16 = cpu_to_le16(PCI_VENDOR_ID_REDHAT_QUMRANET);
                if (copy_to_user(buf + copy_offset, (void *)&val16 + register_offset,
                                 copy_count))
@@ -270,19 +249,20 @@ static ssize_t virtiovf_pci_write_config(struct vfio_device *core_vdev,
        loff_t copy_offset;
        size_t copy_count;
 
-       if (range_intersect_range(pos, count, PCI_COMMAND, sizeof(virtvdev->pci_cmd),
-                                 &copy_offset, &copy_count,
-                                 &register_offset)) {
+       if (vfio_pci_core_range_intersect_range(pos, count, PCI_COMMAND,
+                                               sizeof(virtvdev->pci_cmd),
+                                               &copy_offset, &copy_count,
+                                               &register_offset)) {
                if (copy_from_user((void *)&virtvdev->pci_cmd + register_offset,
                                   buf + copy_offset,
                                   copy_count))
                        return -EFAULT;
        }
 
-       if (range_intersect_range(pos, count, PCI_BASE_ADDRESS_0,
-                                 sizeof(virtvdev->pci_base_addr_0),
-                                 &copy_offset, &copy_count,
-                                 &register_offset)) {
+       if (vfio_pci_core_range_intersect_range(pos, count, PCI_BASE_ADDRESS_0,
+                                               sizeof(virtvdev->pci_base_addr_0),
+                                               &copy_offset, &copy_count,
+                                               &register_offset)) {
                if (copy_from_user((void *)&virtvdev->pci_base_addr_0 + register_offset,
                                   buf + copy_offset,
                                   copy_count))
index 6464b3939ebcfb5304511d42aedc9bb5d9a3393a..485c6f9161a91be0276ed273602b9c158c823fad 100644 (file)
@@ -122,16 +122,16 @@ static const struct vfio_device_ops vfio_amba_ops = {
        .detach_ioas    = vfio_iommufd_physical_detach_ioas,
 };
 
-static const struct amba_id pl330_ids[] = {
+static const struct amba_id vfio_amba_ids[] = {
        { 0, 0 },
 };
 
-MODULE_DEVICE_TABLE(amba, pl330_ids);
+MODULE_DEVICE_TABLE(amba, vfio_amba_ids);
 
 static struct amba_driver vfio_amba_driver = {
        .probe = vfio_amba_probe,
        .remove = vfio_amba_remove,
-       .id_table = pl330_ids,
+       .id_table = vfio_amba_ids,
        .drv = {
                .name = "vfio-amba",
                .owner = THIS_MODULE,
index 8cf22fa65baa229404fc133cad75240e113f81b8..42d1462c5e19daf852777d395dced89e0d31fbd7 100644 (file)
@@ -85,14 +85,13 @@ static void vfio_platform_release_dev(struct vfio_device *core_vdev)
        vfio_platform_release_common(vdev);
 }
 
-static int vfio_platform_remove(struct platform_device *pdev)
+static void vfio_platform_remove(struct platform_device *pdev)
 {
        struct vfio_platform_device *vdev = dev_get_drvdata(&pdev->dev);
 
        vfio_unregister_group_dev(&vdev->vdev);
        pm_runtime_disable(vdev->device);
        vfio_put_device(&vdev->vdev);
-       return 0;
 }
 
 static const struct vfio_device_ops vfio_platform_ops = {
@@ -113,7 +112,7 @@ static const struct vfio_device_ops vfio_platform_ops = {
 
 static struct platform_driver vfio_platform_driver = {
        .probe          = vfio_platform_probe,
-       .remove         = vfio_platform_remove,
+       .remove_new     = vfio_platform_remove,
        .driver = {
                .name   = "vfio-platform",
        },
index 61a1bfb68ac7864b84c712c121e61190f230dcfa..ef41ecef83af113b1ecbc9c034e1150c696fae39 100644 (file)
@@ -136,6 +136,16 @@ static int vfio_platform_set_irq_unmask(struct vfio_platform_device *vdev,
        return 0;
 }
 
+/*
+ * The trigger eventfd is guaranteed valid in the interrupt path
+ * and protected by the igate mutex when triggered via ioctl.
+ */
+static void vfio_send_eventfd(struct vfio_platform_irq *irq_ctx)
+{
+       if (likely(irq_ctx->trigger))
+               eventfd_signal(irq_ctx->trigger);
+}
+
 static irqreturn_t vfio_automasked_irq_handler(int irq, void *dev_id)
 {
        struct vfio_platform_irq *irq_ctx = dev_id;
@@ -155,7 +165,7 @@ static irqreturn_t vfio_automasked_irq_handler(int irq, void *dev_id)
        spin_unlock_irqrestore(&irq_ctx->lock, flags);
 
        if (ret == IRQ_HANDLED)
-               eventfd_signal(irq_ctx->trigger);
+               vfio_send_eventfd(irq_ctx);
 
        return ret;
 }
@@ -164,52 +174,40 @@ static irqreturn_t vfio_irq_handler(int irq, void *dev_id)
 {
        struct vfio_platform_irq *irq_ctx = dev_id;
 
-       eventfd_signal(irq_ctx->trigger);
+       vfio_send_eventfd(irq_ctx);
 
        return IRQ_HANDLED;
 }
 
 static int vfio_set_trigger(struct vfio_platform_device *vdev, int index,
-                           int fd, irq_handler_t handler)
+                           int fd)
 {
        struct vfio_platform_irq *irq = &vdev->irqs[index];
        struct eventfd_ctx *trigger;
-       int ret;
 
        if (irq->trigger) {
-               irq_clear_status_flags(irq->hwirq, IRQ_NOAUTOEN);
-               free_irq(irq->hwirq, irq);
-               kfree(irq->name);
+               disable_irq(irq->hwirq);
                eventfd_ctx_put(irq->trigger);
                irq->trigger = NULL;
        }
 
        if (fd < 0) /* Disable only */
                return 0;
-       irq->name = kasprintf(GFP_KERNEL_ACCOUNT, "vfio-irq[%d](%s)",
-                             irq->hwirq, vdev->name);
-       if (!irq->name)
-               return -ENOMEM;
 
        trigger = eventfd_ctx_fdget(fd);
-       if (IS_ERR(trigger)) {
-               kfree(irq->name);
+       if (IS_ERR(trigger))
                return PTR_ERR(trigger);
-       }
 
        irq->trigger = trigger;
 
-       irq_set_status_flags(irq->hwirq, IRQ_NOAUTOEN);
-       ret = request_irq(irq->hwirq, handler, 0, irq->name, irq);
-       if (ret) {
-               kfree(irq->name);
-               eventfd_ctx_put(trigger);
-               irq->trigger = NULL;
-               return ret;
-       }
-
-       if (!irq->masked)
-               enable_irq(irq->hwirq);
+       /*
+        * irq->masked effectively provides nested disables within the overall
+        * enable relative to trigger.  Specifically request_irq() is called
+        * with NO_AUTOEN, therefore the IRQ is initially disabled.  The user
+        * may only further disable the IRQ with a MASK operations because
+        * irq->masked is initially false.
+        */
+       enable_irq(irq->hwirq);
 
        return 0;
 }
@@ -228,7 +226,7 @@ static int vfio_platform_set_irq_trigger(struct vfio_platform_device *vdev,
                handler = vfio_irq_handler;
 
        if (!count && (flags & VFIO_IRQ_SET_DATA_NONE))
-               return vfio_set_trigger(vdev, index, -1, handler);
+               return vfio_set_trigger(vdev, index, -1);
 
        if (start != 0 || count != 1)
                return -EINVAL;
@@ -236,7 +234,7 @@ static int vfio_platform_set_irq_trigger(struct vfio_platform_device *vdev,
        if (flags & VFIO_IRQ_SET_DATA_EVENTFD) {
                int32_t fd = *(int32_t *)data;
 
-               return vfio_set_trigger(vdev, index, fd, handler);
+               return vfio_set_trigger(vdev, index, fd);
        }
 
        if (flags & VFIO_IRQ_SET_DATA_NONE) {
@@ -260,6 +258,14 @@ int vfio_platform_set_irqs_ioctl(struct vfio_platform_device *vdev,
                    unsigned start, unsigned count, uint32_t flags,
                    void *data) = NULL;
 
+       /*
+        * For compatibility, errors from request_irq() are local to the
+        * SET_IRQS path and reflected in the name pointer.  This allows,
+        * for example, polling mode fallback for an exclusive IRQ failure.
+        */
+       if (IS_ERR(vdev->irqs[index].name))
+               return PTR_ERR(vdev->irqs[index].name);
+
        switch (flags & VFIO_IRQ_SET_ACTION_TYPE_MASK) {
        case VFIO_IRQ_SET_ACTION_MASK:
                func = vfio_platform_set_irq_mask;
@@ -280,7 +286,7 @@ int vfio_platform_set_irqs_ioctl(struct vfio_platform_device *vdev,
 
 int vfio_platform_irq_init(struct vfio_platform_device *vdev)
 {
-       int cnt = 0, i;
+       int cnt = 0, i, ret = 0;
 
        while (vdev->get_irq(vdev, cnt) >= 0)
                cnt++;
@@ -292,37 +298,70 @@ int vfio_platform_irq_init(struct vfio_platform_device *vdev)
 
        for (i = 0; i < cnt; i++) {
                int hwirq = vdev->get_irq(vdev, i);
+               irq_handler_t handler = vfio_irq_handler;
 
-               if (hwirq < 0)
+               if (hwirq < 0) {
+                       ret = -EINVAL;
                        goto err;
+               }
 
                spin_lock_init(&vdev->irqs[i].lock);
 
                vdev->irqs[i].flags = VFIO_IRQ_INFO_EVENTFD;
 
-               if (irq_get_trigger_type(hwirq) & IRQ_TYPE_LEVEL_MASK)
+               if (irq_get_trigger_type(hwirq) & IRQ_TYPE_LEVEL_MASK) {
                        vdev->irqs[i].flags |= VFIO_IRQ_INFO_MASKABLE
                                                | VFIO_IRQ_INFO_AUTOMASKED;
+                       handler = vfio_automasked_irq_handler;
+               }
 
                vdev->irqs[i].count = 1;
                vdev->irqs[i].hwirq = hwirq;
                vdev->irqs[i].masked = false;
+               vdev->irqs[i].name = kasprintf(GFP_KERNEL_ACCOUNT,
+                                              "vfio-irq[%d](%s)", hwirq,
+                                              vdev->name);
+               if (!vdev->irqs[i].name) {
+                       ret = -ENOMEM;
+                       goto err;
+               }
+
+               ret = request_irq(hwirq, handler, IRQF_NO_AUTOEN,
+                                 vdev->irqs[i].name, &vdev->irqs[i]);
+               if (ret) {
+                       kfree(vdev->irqs[i].name);
+                       vdev->irqs[i].name = ERR_PTR(ret);
+               }
        }
 
        vdev->num_irqs = cnt;
 
        return 0;
 err:
+       for (--i; i >= 0; i--) {
+               if (!IS_ERR(vdev->irqs[i].name)) {
+                       free_irq(vdev->irqs[i].hwirq, &vdev->irqs[i]);
+                       kfree(vdev->irqs[i].name);
+               }
+       }
        kfree(vdev->irqs);
-       return -EINVAL;
+       return ret;
 }
 
 void vfio_platform_irq_cleanup(struct vfio_platform_device *vdev)
 {
        int i;
 
-       for (i = 0; i < vdev->num_irqs; i++)
-               vfio_set_trigger(vdev, i, -1, NULL);
+       for (i = 0; i < vdev->num_irqs; i++) {
+               vfio_virqfd_disable(&vdev->irqs[i].mask);
+               vfio_virqfd_disable(&vdev->irqs[i].unmask);
+               if (!IS_ERR(vdev->irqs[i].name)) {
+                       free_irq(vdev->irqs[i].hwirq, &vdev->irqs[i]);
+                       if (vdev->irqs[i].trigger)
+                               eventfd_ctx_put(vdev->irqs[i].trigger);
+                       kfree(vdev->irqs[i].name);
+               }
+       }
 
        vdev->num_irqs = 0;
        kfree(vdev->irqs);
index bde84ad344e50181685f5fbc2620c20b7b33f5a0..50128da18bcaf95f44f933018a59c1450f4d0d07 100644 (file)
@@ -434,7 +434,7 @@ static inline void vfio_virqfd_exit(void)
 }
 #endif
 
-#ifdef CONFIG_HAVE_KVM
+#if IS_ENABLED(CONFIG_KVM)
 void vfio_device_get_kvm_safe(struct vfio_device *device, struct kvm *kvm);
 void vfio_device_put_kvm(struct vfio_device *device);
 #else
index b2854d7939ce02ddf2be186e483c0f8c3f094a26..b5c15fe8f9fcf91911dcb391031713e517588297 100644 (file)
@@ -567,18 +567,6 @@ static int vaddr_get_pfns(struct mm_struct *mm, unsigned long vaddr,
        ret = pin_user_pages_remote(mm, vaddr, npages, flags | FOLL_LONGTERM,
                                    pages, NULL);
        if (ret > 0) {
-               int i;
-
-               /*
-                * The zero page is always resident, we don't need to pin it
-                * and it falls into our invalid/reserved test so we don't
-                * unpin in put_pfn().  Unpin all zero pages in the batch here.
-                */
-               for (i = 0 ; i < ret; i++) {
-                       if (unlikely(is_zero_pfn(page_to_pfn(pages[i]))))
-                               unpin_user_page(pages[i]);
-               }
-
                *pfn = page_to_pfn(pages[0]);
                goto done;
        }
index 1cc93aac99a290d903819635284860b48600ab5d..e97d796a54fbaf8da0fc3860b8bbb5ca5039acce 100644 (file)
@@ -16,7 +16,7 @@
 #include <linux/fs.h>
 #include <linux/idr.h>
 #include <linux/iommu.h>
-#ifdef CONFIG_HAVE_KVM
+#if IS_ENABLED(CONFIG_KVM)
 #include <linux/kvm_host.h>
 #endif
 #include <linux/list.h>
@@ -385,7 +385,7 @@ void vfio_unregister_group_dev(struct vfio_device *device)
 }
 EXPORT_SYMBOL_GPL(vfio_unregister_group_dev);
 
-#ifdef CONFIG_HAVE_KVM
+#if IS_ENABLED(CONFIG_KVM)
 void vfio_device_get_kvm_safe(struct vfio_device *device, struct kvm *kvm)
 {
        void (*pfn)(struct kvm *kvm);
index 29c564b7a6e13e40c690ce3894f7873ca55bb240..53226913380197e38e021bb2efa7fc7979c323c7 100644 (file)
@@ -101,6 +101,13 @@ static void virqfd_inject(struct work_struct *work)
                virqfd->thread(virqfd->opaque, virqfd->data);
 }
 
+static void virqfd_flush_inject(struct work_struct *work)
+{
+       struct virqfd *virqfd = container_of(work, struct virqfd, flush_inject);
+
+       flush_work(&virqfd->inject);
+}
+
 int vfio_virqfd_enable(void *opaque,
                       int (*handler)(void *, void *),
                       void (*thread)(void *, void *),
@@ -124,6 +131,7 @@ int vfio_virqfd_enable(void *opaque,
 
        INIT_WORK(&virqfd->shutdown, virqfd_shutdown);
        INIT_WORK(&virqfd->inject, virqfd_inject);
+       INIT_WORK(&virqfd->flush_inject, virqfd_flush_inject);
 
        irqfd = fdget(fd);
        if (!irqfd.file) {
@@ -213,3 +221,16 @@ void vfio_virqfd_disable(struct virqfd **pvirqfd)
        flush_workqueue(vfio_irqfd_cleanup_wq);
 }
 EXPORT_SYMBOL_GPL(vfio_virqfd_disable);
+
+void vfio_virqfd_flush_thread(struct virqfd **pvirqfd)
+{
+       unsigned long flags;
+
+       spin_lock_irqsave(&virqfd_lock, flags);
+       if (*pvirqfd && (*pvirqfd)->thread)
+               queue_work(vfio_irqfd_cleanup_wq, &(*pvirqfd)->flush_inject);
+       spin_unlock_irqrestore(&virqfd_lock, flags);
+
+       flush_workqueue(vfio_irqfd_cleanup_wq);
+}
+EXPORT_SYMBOL_GPL(vfio_virqfd_flush_thread);
index 264c8cedba159aadb6f5f3adb9a05d86ffd3ec88..c3bc5b78b749a295883dfcdea3c923ebef11fff4 100644 (file)
@@ -670,7 +670,7 @@ static void __exit hgafb_exit(void)
  *
  * ------------------------------------------------------------------------- */
 
-MODULE_AUTHOR("Ferenc Bakonyi (fero@drama.obuda.kando.hu)");
+MODULE_AUTHOR("Ferenc Bakonyi <fero@drama.obuda.kando.hu>");
 MODULE_DESCRIPTION("FBDev driver for Hercules Graphics Adaptor");
 MODULE_LICENSE("GPL");
 
index 138dc8d8ca3d8a6b4ae4114137a6bc28cb3f09a7..ae30e394d176e5714ee58b336843485f338db5a6 100644 (file)
@@ -378,11 +378,36 @@ static void hpwdt_exit(struct pci_dev *dev)
        pci_disable_device(dev);
 }
 
+static int hpwdt_suspend(struct device *dev)
+{
+       if (watchdog_active(&hpwdt_dev))
+               hpwdt_stop();
+
+       return 0;
+}
+
+static int hpwdt_resume(struct device *dev)
+{
+       if (watchdog_active(&hpwdt_dev))
+               hpwdt_start(&hpwdt_dev);
+
+       return 0;
+}
+
+static const struct dev_pm_ops hpwdt_pm_ops = {
+       LATE_SYSTEM_SLEEP_PM_OPS(hpwdt_suspend, hpwdt_resume)
+};
+
 static struct pci_driver hpwdt_driver = {
        .name = "hpwdt",
        .id_table = hpwdt_devices,
        .probe = hpwdt_init_one,
        .remove = hpwdt_exit,
+
+       .driver = {
+               .name = "hpwdt",
+               .pm = &hpwdt_pm_ops,
+       }
 };
 
 MODULE_AUTHOR("Tom Mingarelli");
index fb7fae750181bf3738299408241ee0db2f5f8e27..8d71f6a2236bfa7dfd864f17e32e11baa57e30ea 100644 (file)
@@ -9,15 +9,20 @@
  *      Contact: David Cohen <david.a.cohen@linux.intel.com>
  */
 
+#include <linux/bitops.h>
+#include <linux/device.h>
+#include <linux/errno.h>
 #include <linux/interrupt.h>
+#include <linux/math.h>
 #include <linux/module.h>
-#include <linux/nmi.h>
+#include <linux/panic.h>
 #include <linux/platform_device.h>
+#include <linux/types.h>
 #include <linux/watchdog.h>
+
 #include <linux/platform_data/intel-mid_wdt.h>
 
 #include <asm/intel_scu_ipc.h>
-#include <asm/intel-mid.h>
 
 #define IPC_WATCHDOG 0xf8
 
@@ -122,7 +127,7 @@ static int mid_wdt_probe(struct platform_device *pdev)
 {
        struct device *dev = &pdev->dev;
        struct watchdog_device *wdt_dev;
-       struct intel_mid_wdt_pdata *pdata = dev->platform_data;
+       struct intel_mid_wdt_pdata *pdata = dev_get_platdata(dev);
        struct mid_wdt *mid;
        int ret;
 
index 9297a5891912748562f49921a7ff7dd925550304..3e8c15138eddadf5ff880788bad8dd1ebc3e483a 100644 (file)
@@ -213,12 +213,16 @@ static int wdt_stop(struct watchdog_device *wdd)
 
 /**
  *     wdt_set_timeout - set a new timeout value with watchdog ioctl
+ *     @wdd: pointer to the watchdog_device structure
  *     @t: timeout value in seconds
  *
  *     The hardware device has a 8 or 16 bit watchdog timer (depends on
  *     chip version) that can be configured to count seconds or minutes.
  *
  *     Used within WDIOC_SETTIMEOUT watchdog device ioctl.
+ *
+ *     Return: 0 if the timeout was set successfully, or a negative error code on
+ *     failure.
  */
 
 static int wdt_set_timeout(struct watchdog_device *wdd, unsigned int t)
index 9e790f0c2096cdbd5ab349690b036670a0e17216..006f9c61aa64fd2b4ee9db493aeb54c8fafac818 100644 (file)
@@ -41,6 +41,7 @@ static const u32 reg_offset_data_kpss[] = {
 struct qcom_wdt_match_data {
        const u32 *offset;
        bool pretimeout;
+       u32 max_tick_count;
 };
 
 struct qcom_wdt {
@@ -177,11 +178,13 @@ static const struct watchdog_info qcom_wdt_pt_info = {
 static const struct qcom_wdt_match_data match_data_apcs_tmr = {
        .offset = reg_offset_data_apcs_tmr,
        .pretimeout = false,
+       .max_tick_count = 0x10000000U,
 };
 
 static const struct qcom_wdt_match_data match_data_kpss = {
        .offset = reg_offset_data_kpss,
        .pretimeout = true,
+       .max_tick_count = 0xFFFFFU,
 };
 
 static int qcom_wdt_probe(struct platform_device *pdev)
@@ -236,7 +239,7 @@ static int qcom_wdt_probe(struct platform_device *pdev)
         */
        wdt->rate = clk_get_rate(clk);
        if (wdt->rate == 0 ||
-           wdt->rate > 0x10000000U) {
+           wdt->rate > data->max_tick_count) {
                dev_err(dev, "invalid clock rate\n");
                return -EINVAL;
        }
@@ -260,7 +263,7 @@ static int qcom_wdt_probe(struct platform_device *pdev)
 
        wdt->wdd.ops = &qcom_wdt_ops;
        wdt->wdd.min_timeout = 1;
-       wdt->wdd.max_timeout = 0x10000000U / wdt->rate;
+       wdt->wdd.max_timeout = data->max_tick_count / wdt->rate;
        wdt->wdd.parent = dev;
        wdt->layout = data->offset;
 
index 2756ed54ca3d55c2d4d25b9b9828101d73244e86..109e2e37e8f09e6f8afd61375f52316664bc744d 100644 (file)
@@ -25,6 +25,7 @@
 #include <linux/moduleparam.h>
 #include <linux/pm.h>
 #include <linux/property.h>
+#include <linux/reset.h>
 #include <linux/slab.h>
 #include <linux/spinlock.h>
 #include <linux/types.h>
@@ -232,6 +233,7 @@ static int
 sp805_wdt_probe(struct amba_device *adev, const struct amba_id *id)
 {
        struct sp805_wdt *wdt;
+       struct reset_control *rst;
        u64 rate = 0;
        int ret = 0;
 
@@ -264,6 +266,12 @@ sp805_wdt_probe(struct amba_device *adev, const struct amba_id *id)
                return -ENODEV;
        }
 
+       rst = devm_reset_control_get_optional_exclusive(&adev->dev, NULL);
+       if (IS_ERR(rst))
+               return dev_err_probe(&adev->dev, PTR_ERR(rst), "Can not get reset\n");
+
+       reset_control_deassert(rst);
+
        wdt->adev = adev;
        wdt->wdd.info = &wdt_info;
        wdt->wdd.ops = &wdt_ops;
index e28ead24c520e19a40dffc833d76757f3a8c9430..b4b059883618b94e8b8d77d9170b5f8cd71c7419 100644 (file)
@@ -494,8 +494,13 @@ static int starfive_wdt_probe(struct platform_device *pdev)
        if (ret)
                goto err_exit;
 
-       if (!early_enable)
-               pm_runtime_put_sync(&pdev->dev);
+       if (!early_enable) {
+               if (pm_runtime_enabled(&pdev->dev)) {
+                       ret = pm_runtime_put_sync(&pdev->dev);
+                       if (ret)
+                               goto err_exit;
+               }
+       }
 
        return 0;
 
@@ -554,7 +559,10 @@ static int starfive_wdt_resume(struct device *dev)
        starfive_wdt_set_reload_count(wdt, wdt->reload);
        starfive_wdt_lock(wdt);
 
-       return starfive_wdt_start(wdt);
+       if (watchdog_active(&wdt->wdd))
+               return starfive_wdt_start(wdt);
+
+       return 0;
 }
 
 static int starfive_wdt_runtime_suspend(struct device *dev)
index d9fd50df9802ce036b9c49d249da043fc4cef232..5404e0387620218d38be644aa70a7e21f2d57edc 100644 (file)
@@ -20,6 +20,8 @@
 #include <linux/platform_device.h>
 #include <linux/watchdog.h>
 
+#define DEFAULT_TIMEOUT 10
+
 /* IWDG registers */
 #define IWDG_KR                0x00 /* Key register */
 #define IWDG_PR                0x04 /* Prescaler Register */
@@ -248,6 +250,7 @@ static int stm32_iwdg_probe(struct platform_device *pdev)
        wdd->parent = dev;
        wdd->info = &stm32_iwdg_info;
        wdd->ops = &stm32_iwdg_ops;
+       wdd->timeout = DEFAULT_TIMEOUT;
        wdd->min_timeout = DIV_ROUND_UP((RLR_MIN + 1) * PR_MIN, wdt->rate);
        wdd->max_hw_heartbeat_ms = ((RLR_MAX + 1) * wdt->data->max_prescaler *
                                    1000) / wdt->rate;
index 5b55ccae06d4b02a32637c4cd4662d7f1917863b..aff2c3912ead69cae51fe8228b9f568ab2a61b41 100644 (file)
@@ -260,12 +260,12 @@ static int __watchdog_register_device(struct watchdog_device *wdd)
        if (wdd->parent) {
                ret = of_alias_get_id(wdd->parent->of_node, "watchdog");
                if (ret >= 0)
-                       id = ida_simple_get(&watchdog_ida, ret,
-                                           ret + 1, GFP_KERNEL);
+                       id = ida_alloc_range(&watchdog_ida, ret, ret,
+                                            GFP_KERNEL);
        }
 
        if (id < 0)
-               id = ida_simple_get(&watchdog_ida, 0, MAX_DOGS, GFP_KERNEL);
+               id = ida_alloc_max(&watchdog_ida, MAX_DOGS - 1, GFP_KERNEL);
 
        if (id < 0)
                return id;
@@ -273,19 +273,20 @@ static int __watchdog_register_device(struct watchdog_device *wdd)
 
        ret = watchdog_dev_register(wdd);
        if (ret) {
-               ida_simple_remove(&watchdog_ida, id);
+               ida_free(&watchdog_ida, id);
                if (!(id == 0 && ret == -EBUSY))
                        return ret;
 
                /* Retry in case a legacy watchdog module exists */
-               id = ida_simple_get(&watchdog_ida, 1, MAX_DOGS, GFP_KERNEL);
+               id = ida_alloc_range(&watchdog_ida, 1, MAX_DOGS - 1,
+                                    GFP_KERNEL);
                if (id < 0)
                        return id;
                wdd->id = id;
 
                ret = watchdog_dev_register(wdd);
                if (ret) {
-                       ida_simple_remove(&watchdog_ida, id);
+                       ida_free(&watchdog_ida, id);
                        return ret;
                }
        }
@@ -309,7 +310,7 @@ static int __watchdog_register_device(struct watchdog_device *wdd)
                                pr_err("watchdog%d: Cannot register reboot notifier (%d)\n",
                                        wdd->id, ret);
                                watchdog_dev_unregister(wdd);
-                               ida_simple_remove(&watchdog_ida, id);
+                               ida_free(&watchdog_ida, id);
                                return ret;
                        }
                }
@@ -382,7 +383,7 @@ static void __watchdog_unregister_device(struct watchdog_device *wdd)
                unregister_reboot_notifier(&wdd->reboot_nb);
 
        watchdog_dev_unregister(wdd);
-       ida_simple_remove(&watchdog_ida, wdd->id);
+       ida_free(&watchdog_ida, wdd->id);
 }
 
 /**
index 698c43dd5dc867984e5072d0c43897080c1177b1..9defa12208f98a715e5b894119f044fca50b3dc5 100644 (file)
@@ -179,16 +179,13 @@ extern int v9fs_vfs_rename(struct mnt_idmap *idmap,
                           struct inode *old_dir, struct dentry *old_dentry,
                           struct inode *new_dir, struct dentry *new_dentry,
                           unsigned int flags);
-extern struct inode *v9fs_inode_from_fid(struct v9fs_session_info *v9ses,
-                                        struct p9_fid *fid,
-                                        struct super_block *sb, int new);
+extern struct inode *v9fs_fid_iget(struct super_block *sb, struct p9_fid *fid);
 extern const struct inode_operations v9fs_dir_inode_operations_dotl;
 extern const struct inode_operations v9fs_file_inode_operations_dotl;
 extern const struct inode_operations v9fs_symlink_inode_operations_dotl;
 extern const struct netfs_request_ops v9fs_req_ops;
-extern struct inode *v9fs_inode_from_fid_dotl(struct v9fs_session_info *v9ses,
-                                             struct p9_fid *fid,
-                                             struct super_block *sb, int new);
+extern struct inode *v9fs_fid_iget_dotl(struct super_block *sb,
+                                       struct p9_fid *fid);
 
 /* other default globals */
 #define V9FS_PORT      564
@@ -230,27 +227,9 @@ v9fs_get_inode_from_fid(struct v9fs_session_info *v9ses, struct p9_fid *fid,
                        struct super_block *sb)
 {
        if (v9fs_proto_dotl(v9ses))
-               return v9fs_inode_from_fid_dotl(v9ses, fid, sb, 0);
+               return v9fs_fid_iget_dotl(sb, fid);
        else
-               return v9fs_inode_from_fid(v9ses, fid, sb, 0);
-}
-
-/**
- * v9fs_get_new_inode_from_fid - Helper routine to populate an inode by
- * issuing a attribute request
- * @v9ses: session information
- * @fid: fid to issue attribute request for
- * @sb: superblock on which to create inode
- *
- */
-static inline struct inode *
-v9fs_get_new_inode_from_fid(struct v9fs_session_info *v9ses, struct p9_fid *fid,
-                           struct super_block *sb)
-{
-       if (v9fs_proto_dotl(v9ses))
-               return v9fs_inode_from_fid_dotl(v9ses, fid, sb, 1);
-       else
-               return v9fs_inode_from_fid(v9ses, fid, sb, 1);
+               return v9fs_fid_iget(sb, fid);
 }
 
 #endif
index 0e8418066a482f5ce6332372b3af1259ef02237a..7923c3c347cbd54475d45f255a67dc5fc30df162 100644 (file)
@@ -40,13 +40,16 @@ extern struct kmem_cache *v9fs_inode_cache;
 
 struct inode *v9fs_alloc_inode(struct super_block *sb);
 void v9fs_free_inode(struct inode *inode);
-struct inode *v9fs_get_inode(struct super_block *sb, umode_t mode,
-                            dev_t rdev);
 void v9fs_set_netfs_context(struct inode *inode);
 int v9fs_init_inode(struct v9fs_session_info *v9ses,
-                   struct inode *inode, umode_t mode, dev_t rdev);
+                   struct inode *inode, struct p9_qid *qid, umode_t mode, dev_t rdev);
 void v9fs_evict_inode(struct inode *inode);
-ino_t v9fs_qid2ino(struct p9_qid *qid);
+#if (BITS_PER_LONG == 32)
+#define QID2INO(q) ((ino_t) (((q)->path+2) ^ (((q)->path) >> 32)))
+#else
+#define QID2INO(q) ((ino_t) ((q)->path+2))
+#endif
+
 void v9fs_stat2inode(struct p9_wstat *stat, struct inode *inode,
                      struct super_block *sb, unsigned int flags);
 void v9fs_stat2inode_dotl(struct p9_stat_dotl *stat, struct inode *inode,
index 4102759a5cb56df30ccc72c627f2f8844f7ccf57..e0d34e4e9076e3b1a6c5ed07a3e009a50c9fa2a9 100644 (file)
@@ -127,7 +127,7 @@ static int v9fs_dir_readdir(struct file *file, struct dir_context *ctx)
                        }
 
                        over = !dir_emit(ctx, st.name, strlen(st.name),
-                                        v9fs_qid2ino(&st.qid), dt_type(&st));
+                                       QID2INO(&st.qid), dt_type(&st));
                        p9stat_free(&st);
                        if (over)
                                return 0;
@@ -184,7 +184,7 @@ static int v9fs_dir_readdir_dotl(struct file *file, struct dir_context *ctx)
 
                        if (!dir_emit(ctx, curdirent.d_name,
                                      strlen(curdirent.d_name),
-                                     v9fs_qid2ino(&curdirent.qid),
+                                     QID2INO(&curdirent.qid),
                                      curdirent.d_type))
                                return 0;
 
index 32572982f72e68a6db3967d9ab9ba9d51c8bae9c..360a5304ec03ce53e5bab61e2177a5f325569ebf 100644 (file)
@@ -253,9 +253,12 @@ void v9fs_set_netfs_context(struct inode *inode)
 }
 
 int v9fs_init_inode(struct v9fs_session_info *v9ses,
-                   struct inode *inode, umode_t mode, dev_t rdev)
+                   struct inode *inode, struct p9_qid *qid, umode_t mode, dev_t rdev)
 {
        int err = 0;
+       struct v9fs_inode *v9inode = V9FS_I(inode);
+
+       memcpy(&v9inode->qid, qid, sizeof(struct p9_qid));
 
        inode_init_owner(&nop_mnt_idmap, inode, NULL, mode);
        inode->i_blocks = 0;
@@ -331,36 +334,6 @@ error:
 
 }
 
-/**
- * v9fs_get_inode - helper function to setup an inode
- * @sb: superblock
- * @mode: mode to setup inode with
- * @rdev: The device numbers to set
- */
-
-struct inode *v9fs_get_inode(struct super_block *sb, umode_t mode, dev_t rdev)
-{
-       int err;
-       struct inode *inode;
-       struct v9fs_session_info *v9ses = sb->s_fs_info;
-
-       p9_debug(P9_DEBUG_VFS, "super block: %p mode: %ho\n", sb, mode);
-
-       inode = new_inode(sb);
-       if (!inode) {
-               pr_warn("%s (%d): Problem allocating inode\n",
-                       __func__, task_pid_nr(current));
-               return ERR_PTR(-ENOMEM);
-       }
-       err = v9fs_init_inode(v9ses, inode, mode, rdev);
-       if (err) {
-               iput(inode);
-               return ERR_PTR(err);
-       }
-       v9fs_set_netfs_context(inode);
-       return inode;
-}
-
 /**
  * v9fs_evict_inode - Remove an inode from the inode cache
  * @inode: inode to release
@@ -384,82 +357,40 @@ void v9fs_evict_inode(struct inode *inode)
 #endif
 }
 
-static int v9fs_test_inode(struct inode *inode, void *data)
-{
-       int umode;
-       dev_t rdev;
-       struct v9fs_inode *v9inode = V9FS_I(inode);
-       struct p9_wstat *st = (struct p9_wstat *)data;
-       struct v9fs_session_info *v9ses = v9fs_inode2v9ses(inode);
-
-       umode = p9mode2unixmode(v9ses, st, &rdev);
-       /* don't match inode of different type */
-       if (inode_wrong_type(inode, umode))
-               return 0;
-
-       /* compare qid details */
-       if (memcmp(&v9inode->qid.version,
-                  &st->qid.version, sizeof(v9inode->qid.version)))
-               return 0;
-
-       if (v9inode->qid.type != st->qid.type)
-               return 0;
-
-       if (v9inode->qid.path != st->qid.path)
-               return 0;
-       return 1;
-}
-
-static int v9fs_test_new_inode(struct inode *inode, void *data)
-{
-       return 0;
-}
-
-static int v9fs_set_inode(struct inode *inode,  void *data)
-{
-       struct v9fs_inode *v9inode = V9FS_I(inode);
-       struct p9_wstat *st = (struct p9_wstat *)data;
-
-       memcpy(&v9inode->qid, &st->qid, sizeof(st->qid));
-       return 0;
-}
-
-static struct inode *v9fs_qid_iget(struct super_block *sb,
-                                  struct p9_qid *qid,
-                                  struct p9_wstat *st,
-                                  int new)
+struct inode *v9fs_fid_iget(struct super_block *sb, struct p9_fid *fid)
 {
        dev_t rdev;
        int retval;
        umode_t umode;
-       unsigned long i_ino;
        struct inode *inode;
+       struct p9_wstat *st;
        struct v9fs_session_info *v9ses = sb->s_fs_info;
-       int (*test)(struct inode *inode, void *data);
 
-       if (new)
-               test = v9fs_test_new_inode;
-       else
-               test = v9fs_test_inode;
-
-       i_ino = v9fs_qid2ino(qid);
-       inode = iget5_locked(sb, i_ino, test, v9fs_set_inode, st);
-       if (!inode)
+       inode = iget_locked(sb, QID2INO(&fid->qid));
+       if (unlikely(!inode))
                return ERR_PTR(-ENOMEM);
        if (!(inode->i_state & I_NEW))
                return inode;
+
        /*
         * initialize the inode with the stat info
         * FIXME!! we may need support for stale inodes
         * later.
         */
-       inode->i_ino = i_ino;
+       st = p9_client_stat(fid);
+       if (IS_ERR(st)) {
+               retval = PTR_ERR(st);
+               goto error;
+       }
+
        umode = p9mode2unixmode(v9ses, st, &rdev);
-       retval = v9fs_init_inode(v9ses, inode, umode, rdev);
+       retval = v9fs_init_inode(v9ses, inode, &fid->qid, umode, rdev);
+       v9fs_stat2inode(st, inode, sb, 0);
+       p9stat_free(st);
+       kfree(st);
        if (retval)
                goto error;
 
-       v9fs_stat2inode(st, inode, sb, 0);
        v9fs_set_netfs_context(inode);
        v9fs_cache_inode_get_cookie(inode);
        unlock_new_inode(inode);
@@ -470,23 +401,6 @@ error:
 
 }
 
-struct inode *
-v9fs_inode_from_fid(struct v9fs_session_info *v9ses, struct p9_fid *fid,
-                   struct super_block *sb, int new)
-{
-       struct p9_wstat *st;
-       struct inode *inode = NULL;
-
-       st = p9_client_stat(fid);
-       if (IS_ERR(st))
-               return ERR_CAST(st);
-
-       inode = v9fs_qid_iget(sb, &st->qid, st, new);
-       p9stat_free(st);
-       kfree(st);
-       return inode;
-}
-
 /**
  * v9fs_at_to_dotl_flags- convert Linux specific AT flags to
  * plan 9 AT flag.
@@ -633,7 +547,7 @@ v9fs_create(struct v9fs_session_info *v9ses, struct inode *dir,
                /*
                 * instantiate inode and assign the unopened fid to the dentry
                 */
-               inode = v9fs_get_new_inode_from_fid(v9ses, fid, dir->i_sb);
+               inode = v9fs_get_inode_from_fid(v9ses, fid, dir->i_sb);
                if (IS_ERR(inode)) {
                        err = PTR_ERR(inode);
                        p9_debug(P9_DEBUG_VFS,
@@ -761,10 +675,8 @@ struct dentry *v9fs_vfs_lookup(struct inode *dir, struct dentry *dentry,
                inode = NULL;
        else if (IS_ERR(fid))
                inode = ERR_CAST(fid);
-       else if (v9ses->cache & (CACHE_META|CACHE_LOOSE))
-               inode = v9fs_get_inode_from_fid(v9ses, fid, dir->i_sb);
        else
-               inode = v9fs_get_new_inode_from_fid(v9ses, fid, dir->i_sb);
+               inode = v9fs_get_inode_from_fid(v9ses, fid, dir->i_sb);
        /*
         * If we had a rename on the server and a parallel lookup
         * for the new name, then make sure we instantiate with
@@ -1186,26 +1098,6 @@ v9fs_stat2inode(struct p9_wstat *stat, struct inode *inode,
        v9inode->cache_validity &= ~V9FS_INO_INVALID_ATTR;
 }
 
-/**
- * v9fs_qid2ino - convert qid into inode number
- * @qid: qid to hash
- *
- * BUG: potential for inode number collisions?
- */
-
-ino_t v9fs_qid2ino(struct p9_qid *qid)
-{
-       u64 path = qid->path + 2;
-       ino_t i = 0;
-
-       if (sizeof(ino_t) == sizeof(path))
-               memcpy(&i, &path, sizeof(ino_t));
-       else
-               i = (ino_t) (path ^ (path >> 32));
-
-       return i;
-}
-
 /**
  * v9fs_vfs_get_link - follow a symlink path
  * @dentry: dentry for symlink
index 3505227e170402be03b2df40ff8f3ba2994a07d2..ef9db3e035062b189be8e408c15372e0dd11b3c6 100644 (file)
@@ -52,78 +52,33 @@ static kgid_t v9fs_get_fsgid_for_create(struct inode *dir_inode)
        return current_fsgid();
 }
 
-static int v9fs_test_inode_dotl(struct inode *inode, void *data)
-{
-       struct v9fs_inode *v9inode = V9FS_I(inode);
-       struct p9_stat_dotl *st = (struct p9_stat_dotl *)data;
-
-       /* don't match inode of different type */
-       if (inode_wrong_type(inode, st->st_mode))
-               return 0;
-
-       if (inode->i_generation != st->st_gen)
-               return 0;
-
-       /* compare qid details */
-       if (memcmp(&v9inode->qid.version,
-                  &st->qid.version, sizeof(v9inode->qid.version)))
-               return 0;
-
-       if (v9inode->qid.type != st->qid.type)
-               return 0;
-
-       if (v9inode->qid.path != st->qid.path)
-               return 0;
-       return 1;
-}
-
-/* Always get a new inode */
-static int v9fs_test_new_inode_dotl(struct inode *inode, void *data)
-{
-       return 0;
-}
-
-static int v9fs_set_inode_dotl(struct inode *inode,  void *data)
-{
-       struct v9fs_inode *v9inode = V9FS_I(inode);
-       struct p9_stat_dotl *st = (struct p9_stat_dotl *)data;
-
-       memcpy(&v9inode->qid, &st->qid, sizeof(st->qid));
-       inode->i_generation = st->st_gen;
-       return 0;
-}
-
-static struct inode *v9fs_qid_iget_dotl(struct super_block *sb,
-                                       struct p9_qid *qid,
-                                       struct p9_fid *fid,
-                                       struct p9_stat_dotl *st,
-                                       int new)
+struct inode *v9fs_fid_iget_dotl(struct super_block *sb, struct p9_fid *fid)
 {
        int retval;
-       unsigned long i_ino;
        struct inode *inode;
+       struct p9_stat_dotl *st;
        struct v9fs_session_info *v9ses = sb->s_fs_info;
-       int (*test)(struct inode *inode, void *data);
-
-       if (new)
-               test = v9fs_test_new_inode_dotl;
-       else
-               test = v9fs_test_inode_dotl;
 
-       i_ino = v9fs_qid2ino(qid);
-       inode = iget5_locked(sb, i_ino, test, v9fs_set_inode_dotl, st);
-       if (!inode)
+       inode = iget_locked(sb, QID2INO(&fid->qid));
+       if (unlikely(!inode))
                return ERR_PTR(-ENOMEM);
        if (!(inode->i_state & I_NEW))
                return inode;
+
        /*
         * initialize the inode with the stat info
         * FIXME!! we may need support for stale inodes
         * later.
         */
-       inode->i_ino = i_ino;
-       retval = v9fs_init_inode(v9ses, inode,
+       st = p9_client_getattr_dotl(fid, P9_STATS_BASIC | P9_STATS_GEN);
+       if (IS_ERR(st)) {
+               retval = PTR_ERR(st);
+               goto error;
+       }
+
+       retval = v9fs_init_inode(v9ses, inode, &fid->qid,
                                 st->st_mode, new_decode_dev(st->st_rdev));
+       kfree(st);
        if (retval)
                goto error;
 
@@ -135,6 +90,7 @@ static struct inode *v9fs_qid_iget_dotl(struct super_block *sb,
                goto error;
 
        unlock_new_inode(inode);
+
        return inode;
 error:
        iget_failed(inode);
@@ -142,22 +98,6 @@ error:
 
 }
 
-struct inode *
-v9fs_inode_from_fid_dotl(struct v9fs_session_info *v9ses, struct p9_fid *fid,
-                        struct super_block *sb, int new)
-{
-       struct p9_stat_dotl *st;
-       struct inode *inode = NULL;
-
-       st = p9_client_getattr_dotl(fid, P9_STATS_BASIC | P9_STATS_GEN);
-       if (IS_ERR(st))
-               return ERR_CAST(st);
-
-       inode = v9fs_qid_iget_dotl(sb, &st->qid, fid, st, new);
-       kfree(st);
-       return inode;
-}
-
 struct dotl_openflag_map {
        int open_flag;
        int dotl_flag;
@@ -307,7 +247,7 @@ v9fs_vfs_atomic_open_dotl(struct inode *dir, struct dentry *dentry,
                p9_debug(P9_DEBUG_VFS, "p9_client_walk failed %d\n", err);
                goto out;
        }
-       inode = v9fs_get_new_inode_from_fid(v9ses, fid, dir->i_sb);
+       inode = v9fs_fid_iget_dotl(dir->i_sb, fid);
        if (IS_ERR(inode)) {
                err = PTR_ERR(inode);
                p9_debug(P9_DEBUG_VFS, "inode creation failed %d\n", err);
@@ -402,32 +342,17 @@ static int v9fs_vfs_mkdir_dotl(struct mnt_idmap *idmap,
        }
 
        /* instantiate inode and assign the unopened fid to the dentry */
-       if (v9ses->cache & (CACHE_META|CACHE_LOOSE)) {
-               inode = v9fs_get_new_inode_from_fid(v9ses, fid, dir->i_sb);
-               if (IS_ERR(inode)) {
-                       err = PTR_ERR(inode);
-                       p9_debug(P9_DEBUG_VFS, "inode creation failed %d\n",
-                                err);
-                       goto error;
-               }
-               v9fs_fid_add(dentry, &fid);
-               v9fs_set_create_acl(inode, fid, dacl, pacl);
-               d_instantiate(dentry, inode);
-               err = 0;
-       } else {
-               /*
-                * Not in cached mode. No need to populate
-                * inode with stat. We need to get an inode
-                * so that we can set the acl with dentry
-                */
-               inode = v9fs_get_inode(dir->i_sb, mode, 0);
-               if (IS_ERR(inode)) {
-                       err = PTR_ERR(inode);
-                       goto error;
-               }
-               v9fs_set_create_acl(inode, fid, dacl, pacl);
-               d_instantiate(dentry, inode);
+       inode = v9fs_fid_iget_dotl(dir->i_sb, fid);
+       if (IS_ERR(inode)) {
+               err = PTR_ERR(inode);
+               p9_debug(P9_DEBUG_VFS, "inode creation failed %d\n",
+                        err);
+               goto error;
        }
+       v9fs_fid_add(dentry, &fid);
+       v9fs_set_create_acl(inode, fid, dacl, pacl);
+       d_instantiate(dentry, inode);
+       err = 0;
        inc_nlink(dir);
        v9fs_invalidate_inode_attr(dir);
 error:
@@ -709,14 +634,11 @@ v9fs_vfs_symlink_dotl(struct mnt_idmap *idmap, struct inode *dir,
        kgid_t gid;
        const unsigned char *name;
        struct p9_qid qid;
-       struct inode *inode;
        struct p9_fid *dfid;
        struct p9_fid *fid = NULL;
-       struct v9fs_session_info *v9ses;
 
        name = dentry->d_name.name;
        p9_debug(P9_DEBUG_VFS, "%lu,%s,%s\n", dir->i_ino, name, symname);
-       v9ses = v9fs_inode2v9ses(dir);
 
        dfid = v9fs_parent_fid(dentry);
        if (IS_ERR(dfid)) {
@@ -736,36 +658,6 @@ v9fs_vfs_symlink_dotl(struct mnt_idmap *idmap, struct inode *dir,
        }
 
        v9fs_invalidate_inode_attr(dir);
-       if (v9ses->cache & (CACHE_META|CACHE_LOOSE)) {
-               /* Now walk from the parent so we can get an unopened fid. */
-               fid = p9_client_walk(dfid, 1, &name, 1);
-               if (IS_ERR(fid)) {
-                       err = PTR_ERR(fid);
-                       p9_debug(P9_DEBUG_VFS, "p9_client_walk failed %d\n",
-                                err);
-                       goto error;
-               }
-
-               /* instantiate inode and assign the unopened fid to dentry */
-               inode = v9fs_get_new_inode_from_fid(v9ses, fid, dir->i_sb);
-               if (IS_ERR(inode)) {
-                       err = PTR_ERR(inode);
-                       p9_debug(P9_DEBUG_VFS, "inode creation failed %d\n",
-                                err);
-                       goto error;
-               }
-               v9fs_fid_add(dentry, &fid);
-               d_instantiate(dentry, inode);
-               err = 0;
-       } else {
-               /* Not in cached mode. No need to populate inode with stat */
-               inode = v9fs_get_inode(dir->i_sb, S_IFLNK, 0);
-               if (IS_ERR(inode)) {
-                       err = PTR_ERR(inode);
-                       goto error;
-               }
-               d_instantiate(dentry, inode);
-       }
 
 error:
        p9_fid_put(fid);
@@ -888,33 +780,17 @@ v9fs_vfs_mknod_dotl(struct mnt_idmap *idmap, struct inode *dir,
                         err);
                goto error;
        }
-
-       /* instantiate inode and assign the unopened fid to the dentry */
-       if (v9ses->cache & (CACHE_META|CACHE_LOOSE)) {
-               inode = v9fs_get_new_inode_from_fid(v9ses, fid, dir->i_sb);
-               if (IS_ERR(inode)) {
-                       err = PTR_ERR(inode);
-                       p9_debug(P9_DEBUG_VFS, "inode creation failed %d\n",
-                                err);
-                       goto error;
-               }
-               v9fs_set_create_acl(inode, fid, dacl, pacl);
-               v9fs_fid_add(dentry, &fid);
-               d_instantiate(dentry, inode);
-               err = 0;
-       } else {
-               /*
-                * Not in cached mode. No need to populate inode with stat.
-                * socket syscall returns a fd, so we need instantiate
-                */
-               inode = v9fs_get_inode(dir->i_sb, mode, rdev);
-               if (IS_ERR(inode)) {
-                       err = PTR_ERR(inode);
-                       goto error;
-               }
-               v9fs_set_create_acl(inode, fid, dacl, pacl);
-               d_instantiate(dentry, inode);
+       inode = v9fs_fid_iget_dotl(dir->i_sb, fid);
+       if (IS_ERR(inode)) {
+               err = PTR_ERR(inode);
+               p9_debug(P9_DEBUG_VFS, "inode creation failed %d\n",
+                        err);
+               goto error;
        }
+       v9fs_set_create_acl(inode, fid, dacl, pacl);
+       v9fs_fid_add(dentry, &fid);
+       d_instantiate(dentry, inode);
+       err = 0;
 error:
        p9_fid_put(fid);
        v9fs_put_acl(dacl, pacl);
index 941f7d0e0bfa27e67aa34a9c7eebec3a65fc6f99..4236058c7bbd18b726925e808e73bcb6c08edfeb 100644 (file)
@@ -110,7 +110,6 @@ static struct dentry *v9fs_mount(struct file_system_type *fs_type, int flags,
        struct inode *inode = NULL;
        struct dentry *root = NULL;
        struct v9fs_session_info *v9ses = NULL;
-       umode_t mode = 0777 | S_ISVTX;
        struct p9_fid *fid;
        int retval = 0;
 
@@ -140,7 +139,7 @@ static struct dentry *v9fs_mount(struct file_system_type *fs_type, int flags,
        else
                sb->s_d_op = &v9fs_dentry_operations;
 
-       inode = v9fs_get_inode(sb, S_IFDIR | mode, 0);
+       inode = v9fs_get_inode_from_fid(v9ses, fid, sb);
        if (IS_ERR(inode)) {
                retval = PTR_ERR(inode);
                goto release_sb;
@@ -152,32 +151,6 @@ static struct dentry *v9fs_mount(struct file_system_type *fs_type, int flags,
                goto release_sb;
        }
        sb->s_root = root;
-       if (v9fs_proto_dotl(v9ses)) {
-               struct p9_stat_dotl *st = NULL;
-
-               st = p9_client_getattr_dotl(fid, P9_STATS_BASIC);
-               if (IS_ERR(st)) {
-                       retval = PTR_ERR(st);
-                       goto release_sb;
-               }
-               d_inode(root)->i_ino = v9fs_qid2ino(&st->qid);
-               v9fs_stat2inode_dotl(st, d_inode(root), 0);
-               kfree(st);
-       } else {
-               struct p9_wstat *st = NULL;
-
-               st = p9_client_stat(fid);
-               if (IS_ERR(st)) {
-                       retval = PTR_ERR(st);
-                       goto release_sb;
-               }
-
-               d_inode(root)->i_ino = v9fs_qid2ino(&st->qid);
-               v9fs_stat2inode(st, d_inode(root), sb, 0);
-
-               p9stat_free(st);
-               kfree(st);
-       }
        retval = v9fs_get_acl(inode, fid);
        if (retval)
                goto release_sb;
@@ -271,21 +244,6 @@ done:
        return res;
 }
 
-static int v9fs_drop_inode(struct inode *inode)
-{
-       struct v9fs_session_info *v9ses;
-
-       v9ses = v9fs_inode2v9ses(inode);
-       if (v9ses->cache & (CACHE_META|CACHE_LOOSE))
-               return generic_drop_inode(inode);
-       /*
-        * in case of non cached mode always drop the
-        * inode because we want the inode attribute
-        * to always match that on the server.
-        */
-       return 1;
-}
-
 static int v9fs_write_inode(struct inode *inode,
                            struct writeback_control *wbc)
 {
@@ -320,7 +278,6 @@ static const struct super_operations v9fs_super_ops_dotl = {
        .alloc_inode = v9fs_alloc_inode,
        .free_inode = v9fs_free_inode,
        .statfs = v9fs_statfs,
-       .drop_inode = v9fs_drop_inode,
        .evict_inode = v9fs_evict_inode,
        .show_options = v9fs_show_options,
        .umount_begin = v9fs_umount_begin,
index ea2f77446080ef9f22ed4f4e860b77b2f4ce8167..a46b0cbc4d8f61a69d427368300582a18f285f74 100644 (file)
@@ -60,7 +60,6 @@ endif # BLOCK
 config FS_DAX
        bool "File system based Direct Access (DAX) support"
        depends on MMU
-       depends on !(ARM || MIPS || SPARC)
        depends on ZONE_DEVICE || FS_DAX_LIMITED
        select FS_IOMAP
        select DAX
@@ -261,6 +260,7 @@ menuconfig HUGETLBFS
        depends on X86 || SPARC64 || ARCH_SUPPORTS_HUGETLBFS || BROKEN
        depends on (SYSFS || SYSCTL)
        select MEMFD_CREATE
+       select PADATA if SMP
        help
          hugetlbfs is a filesystem backing for HugeTLB pages, based on
          ramfs. For architectures that support it, say Y here and read
index 8a67fc427e748a0840d9e92c1c0e8e4a3d7c4fdc..67afe68972d5c7e62a5db77f41d5b81916eb432d 100644 (file)
@@ -474,16 +474,6 @@ static int afs_dir_iterate_block(struct afs_vnode *dvnode,
                        continue;
                }
 
-               /* Don't expose silly rename entries to userspace. */
-               if (nlen > 6 &&
-                   dire->u.name[0] == '.' &&
-                   ctx->actor != afs_lookup_filldir &&
-                   ctx->actor != afs_lookup_one_filldir &&
-                   memcmp(dire->u.name, ".__afs", 6) == 0) {
-                       ctx->pos = blkoff + next * sizeof(union afs_xdr_dirent);
-                       continue;
-               }
-
                /* found the next entry */
                if (!dir_emit(ctx, dire->u.name, nlen,
                              ntohl(dire->u.vnode),
index 700a27bc8c257e920714359ceb168d7c6d7d6055..ed04bd1eeae89793219aacd1ada6efb61aa6ba03 100644 (file)
@@ -602,6 +602,8 @@ iterate_address:
                goto wait_for_more_probe_results;
 
        alist = op->estate->addresses;
+       best_prio = -1;
+       addr_index = 0;
        for (i = 0; i < alist->nr_addrs; i++) {
                if (alist->addrs[i].prio > best_prio) {
                        addr_index = i;
@@ -609,9 +611,7 @@ iterate_address:
                }
        }
 
-       addr_index = READ_ONCE(alist->preferred);
-       if (!test_bit(addr_index, &set))
-               addr_index = __ffs(set);
+       alist->preferred = addr_index;
 
        op->addr_index = addr_index;
        set_bit(addr_index, &op->addr_tried);
@@ -656,12 +656,6 @@ wait_for_more_probe_results:
 next_server:
        trace_afs_rotate(op, afs_rotate_trace_next_server, 0);
        _debug("next");
-       ASSERT(op->estate);
-       alist = op->estate->addresses;
-       if (op->call_responded &&
-           op->addr_index != READ_ONCE(alist->preferred) &&
-           test_bit(alist->preferred, &op->addr_tried))
-               WRITE_ONCE(alist->preferred, op->addr_index);
        op->estate = NULL;
        goto pick_server;
 
@@ -690,14 +684,7 @@ no_more_servers:
 failed:
        trace_afs_rotate(op, afs_rotate_trace_failed, 0);
        op->flags |= AFS_OPERATION_STOP;
-       if (op->estate) {
-               alist = op->estate->addresses;
-               if (op->call_responded &&
-                   op->addr_index != READ_ONCE(alist->preferred) &&
-                   test_bit(alist->preferred, &op->addr_tried))
-                       WRITE_ONCE(alist->preferred, op->addr_index);
-               op->estate = NULL;
-       }
+       op->estate = NULL;
        _leave(" = f [failed %d]", afs_op_error(op));
        return false;
 }
index 46b37f2cce7d907562ef5014762b5cd37e99775a..32a53fc8dfb26b292c0817155a7c9d387d66064d 100644 (file)
@@ -122,6 +122,9 @@ bool afs_check_validity(const struct afs_vnode *vnode)
        const struct afs_volume *volume = vnode->volume;
        time64_t deadline = ktime_get_real_seconds() + 10;
 
+       if (test_bit(AFS_VNODE_DELETED, &vnode->flags))
+               return true;
+
        if (atomic_read(&volume->cb_v_check) != atomic_read(&volume->cb_v_break) ||
            atomic64_read(&vnode->cb_expires_at)  <= deadline ||
            volume->cb_expires_at <= deadline ||
@@ -389,12 +392,17 @@ int afs_validate(struct afs_vnode *vnode, struct key *key)
               key_serial(key));
 
        if (afs_check_validity(vnode))
-               return 0;
+               return test_bit(AFS_VNODE_DELETED, &vnode->flags) ? -ESTALE : 0;
 
        ret = down_write_killable(&vnode->validate_lock);
        if (ret < 0)
                goto error;
 
+       if (test_bit(AFS_VNODE_DELETED, &vnode->flags)) {
+               ret = -ESTALE;
+               goto error_unlock;
+       }
+
        /* Validate a volume after the v_break has changed or the volume
         * callback expired.  We only want to do this once per volume per
         * v_break change.  The actual work will be done when parsing the
@@ -448,12 +456,6 @@ int afs_validate(struct afs_vnode *vnode, struct key *key)
        vnode->cb_ro_snapshot = cb_ro_snapshot;
        vnode->cb_scrub = cb_scrub;
 
-       if (test_bit(AFS_VNODE_DELETED, &vnode->flags)) {
-               _debug("file already deleted");
-               ret = -ESTALE;
-               goto error_unlock;
-       }
-
        /* if the vnode's data version number changed then its contents are
         * different */
        zap |= test_and_clear_bit(AFS_VNODE_ZAP_DATA, &vnode->flags);
index 1a05cecda7cc5c47695911e7d715aa215f26688d..b02796c8a595339a7127a4e96a90c4927ef85e60 100644 (file)
@@ -82,6 +82,7 @@ bcachefs-y            :=      \
        super-io.o              \
        sysfs.o                 \
        tests.o                 \
+       time_stats.o            \
        thread_with_file.o      \
        trace.o                 \
        two_state_shared_lock.o \
@@ -90,3 +91,6 @@ bcachefs-y            :=      \
        xattr.o
 
 obj-$(CONFIG_MEAN_AND_VARIANCE_UNIT_TEST)   += mean_and_variance_test.o
+
+# Silence "note: xyz changed in GCC X.X" messages
+subdir-ccflags-y += $(call cc-disable-warning, psabi)
index fd3e175d83423261d68124cd26fc0351488ad05e..c47f72f2bd586f6c15bc42e019cc97edc0f6030a 100644 (file)
@@ -29,6 +29,8 @@
 #include <linux/sched/task.h>
 #include <linux/sort.h>
 
+static void bch2_discard_one_bucket_fast(struct bch_fs *c, struct bpos bucket);
+
 /* Persistent alloc info: */
 
 static const unsigned BCH_ALLOC_V1_FIELD_BYTES[] = {
@@ -860,23 +862,28 @@ int bch2_trigger_alloc(struct btree_trans *trans,
                        *bucket_gen(ca, new.k->p.offset) = new_a->gen;
 
                bch2_dev_usage_update(c, ca, old_a, new_a, journal_seq, false);
+               percpu_up_read(&c->mark_lock);
+
+#define eval_state(_a, expr)           ({ const struct bch_alloc_v4 *a = _a; expr; })
+#define statechange(expr)              !eval_state(old_a, expr) && eval_state(new_a, expr)
+#define bucket_flushed(a)              (!a->journal_seq || a->journal_seq <= c->journal.flushed_seq_ondisk)
 
-               if (new_a->data_type == BCH_DATA_free &&
-                   (!new_a->journal_seq || new_a->journal_seq < c->journal.flushed_seq_ondisk))
+               if (statechange(a->data_type == BCH_DATA_free) &&
+                   bucket_flushed(new_a))
                        closure_wake_up(&c->freelist_wait);
 
-               if (new_a->data_type == BCH_DATA_need_discard &&
-                   (!bucket_journal_seq || bucket_journal_seq < c->journal.flushed_seq_ondisk))
-                       bch2_do_discards(c);
+               if (statechange(a->data_type == BCH_DATA_need_discard) &&
+                   !bch2_bucket_is_open(c, new.k->p.inode, new.k->p.offset) &&
+                   bucket_flushed(new_a))
+                       bch2_discard_one_bucket_fast(c, new.k->p);
 
-               if (old_a->data_type != BCH_DATA_cached &&
-                   new_a->data_type == BCH_DATA_cached &&
+               if (statechange(a->data_type == BCH_DATA_cached) &&
+                   !bch2_bucket_is_open(c, new.k->p.inode, new.k->p.offset) &&
                    should_invalidate_buckets(ca, bch2_dev_usage_read(ca)))
                        bch2_do_invalidates(c);
 
-               if (new_a->data_type == BCH_DATA_need_gc_gens)
+               if (statechange(a->data_type == BCH_DATA_need_gc_gens))
                        bch2_do_gc_gens(c);
-               percpu_up_read(&c->mark_lock);
        }
 
        if ((flags & BTREE_TRIGGER_GC) &&
@@ -1045,14 +1052,13 @@ int bch2_check_alloc_key(struct btree_trans *trans,
        if (ret)
                goto err;
 
-       if (k.k->type != discard_key_type &&
-           (c->opts.reconstruct_alloc ||
-            fsck_err(c, need_discard_key_wrong,
-                     "incorrect key in need_discard btree (got %s should be %s)\n"
-                     "  %s",
-                     bch2_bkey_types[k.k->type],
-                     bch2_bkey_types[discard_key_type],
-                     (bch2_bkey_val_to_text(&buf, c, alloc_k), buf.buf)))) {
+       if (fsck_err_on(k.k->type != discard_key_type,
+                       c, need_discard_key_wrong,
+                       "incorrect key in need_discard btree (got %s should be %s)\n"
+                       "  %s",
+                       bch2_bkey_types[k.k->type],
+                       bch2_bkey_types[discard_key_type],
+                       (bch2_bkey_val_to_text(&buf, c, alloc_k), buf.buf))) {
                struct bkey_i *update =
                        bch2_trans_kmalloc(trans, sizeof(*update));
 
@@ -1076,15 +1082,14 @@ int bch2_check_alloc_key(struct btree_trans *trans,
        if (ret)
                goto err;
 
-       if (k.k->type != freespace_key_type &&
-           (c->opts.reconstruct_alloc ||
-            fsck_err(c, freespace_key_wrong,
-                     "incorrect key in freespace btree (got %s should be %s)\n"
-                     "  %s",
-                     bch2_bkey_types[k.k->type],
-                     bch2_bkey_types[freespace_key_type],
-                     (printbuf_reset(&buf),
-                      bch2_bkey_val_to_text(&buf, c, alloc_k), buf.buf)))) {
+       if (fsck_err_on(k.k->type != freespace_key_type,
+                       c, freespace_key_wrong,
+                       "incorrect key in freespace btree (got %s should be %s)\n"
+                       "  %s",
+                       bch2_bkey_types[k.k->type],
+                       bch2_bkey_types[freespace_key_type],
+                       (printbuf_reset(&buf),
+                        bch2_bkey_val_to_text(&buf, c, alloc_k), buf.buf))) {
                struct bkey_i *update =
                        bch2_trans_kmalloc(trans, sizeof(*update));
 
@@ -1108,14 +1113,13 @@ int bch2_check_alloc_key(struct btree_trans *trans,
        if (ret)
                goto err;
 
-       if (a->gen != alloc_gen(k, gens_offset) &&
-           (c->opts.reconstruct_alloc ||
-            fsck_err(c, bucket_gens_key_wrong,
-                     "incorrect gen in bucket_gens btree (got %u should be %u)\n"
-                     "  %s",
-                     alloc_gen(k, gens_offset), a->gen,
-                     (printbuf_reset(&buf),
-                      bch2_bkey_val_to_text(&buf, c, alloc_k), buf.buf)))) {
+       if (fsck_err_on(a->gen != alloc_gen(k, gens_offset),
+                       c, bucket_gens_key_wrong,
+                       "incorrect gen in bucket_gens btree (got %u should be %u)\n"
+                       "  %s",
+                       alloc_gen(k, gens_offset), a->gen,
+                       (printbuf_reset(&buf),
+                        bch2_bkey_val_to_text(&buf, c, alloc_k), buf.buf))) {
                struct bkey_i_bucket_gens *g =
                        bch2_trans_kmalloc(trans, sizeof(*g));
 
@@ -1167,14 +1171,13 @@ int bch2_check_alloc_hole_freespace(struct btree_trans *trans,
 
        *end = bkey_min(k.k->p, *end);
 
-       if (k.k->type != KEY_TYPE_set &&
-           (c->opts.reconstruct_alloc ||
-            fsck_err(c, freespace_hole_missing,
-                     "hole in alloc btree missing in freespace btree\n"
-                     "  device %llu buckets %llu-%llu",
-                     freespace_iter->pos.inode,
-                     freespace_iter->pos.offset,
-                     end->offset))) {
+       if (fsck_err_on(k.k->type != KEY_TYPE_set,
+                       c, freespace_hole_missing,
+                       "hole in alloc btree missing in freespace btree\n"
+                       "  device %llu buckets %llu-%llu",
+                       freespace_iter->pos.inode,
+                       freespace_iter->pos.offset,
+                       end->offset)) {
                struct bkey_i *update =
                        bch2_trans_kmalloc(trans, sizeof(*update));
 
@@ -1604,6 +1607,36 @@ int bch2_check_alloc_to_lru_refs(struct bch_fs *c)
        return ret;
 }
 
+static int discard_in_flight_add(struct bch_fs *c, struct bpos bucket)
+{
+       int ret;
+
+       mutex_lock(&c->discard_buckets_in_flight_lock);
+       darray_for_each(c->discard_buckets_in_flight, i)
+               if (bkey_eq(*i, bucket)) {
+                       ret = -EEXIST;
+                       goto out;
+               }
+
+       ret = darray_push(&c->discard_buckets_in_flight, bucket);
+out:
+       mutex_unlock(&c->discard_buckets_in_flight_lock);
+       return ret;
+}
+
+static void discard_in_flight_remove(struct bch_fs *c, struct bpos bucket)
+{
+       mutex_lock(&c->discard_buckets_in_flight_lock);
+       darray_for_each(c->discard_buckets_in_flight, i)
+               if (bkey_eq(*i, bucket)) {
+                       darray_remove_item(&c->discard_buckets_in_flight, i);
+                       goto found;
+               }
+       BUG();
+found:
+       mutex_unlock(&c->discard_buckets_in_flight_lock);
+}
+
 struct discard_buckets_state {
        u64             seen;
        u64             open;
@@ -1642,6 +1675,7 @@ static int bch2_discard_one_bucket(struct btree_trans *trans,
        struct bch_dev *ca;
        struct bkey_i_alloc_v4 *a;
        struct printbuf buf = PRINTBUF;
+       bool discard_locked = false;
        int ret = 0;
 
        ca = bch_dev_bkey_exists(c, pos.inode);
@@ -1709,6 +1743,11 @@ static int bch2_discard_one_bucket(struct btree_trans *trans,
                goto out;
        }
 
+       if (discard_in_flight_add(c, SPOS(iter.pos.inode, iter.pos.offset, true)))
+               goto out;
+
+       discard_locked = true;
+
        if (!bkey_eq(*discard_pos_done, iter.pos) &&
            ca->mi.discard && !c->opts.nochanges) {
                /*
@@ -1740,6 +1779,8 @@ write:
        count_event(c, bucket_discard);
        s->discarded++;
 out:
+       if (discard_locked)
+               discard_in_flight_remove(c, iter.pos);
        s->seen++;
        bch2_trans_iter_exit(trans, &iter);
        percpu_ref_put(&ca->io_ref);
@@ -1779,6 +1820,93 @@ void bch2_do_discards(struct bch_fs *c)
                bch2_write_ref_put(c, BCH_WRITE_REF_discard);
 }
 
+static int bch2_clear_bucket_needs_discard(struct btree_trans *trans, struct bpos bucket)
+{
+       struct btree_iter iter;
+       bch2_trans_iter_init(trans, &iter, BTREE_ID_alloc, bucket, BTREE_ITER_INTENT);
+       struct bkey_s_c k = bch2_btree_iter_peek_slot(&iter);
+       int ret = bkey_err(k);
+       if (ret)
+               goto err;
+
+       struct bkey_i_alloc_v4 *a = bch2_alloc_to_v4_mut(trans, k);
+       ret = PTR_ERR_OR_ZERO(a);
+       if (ret)
+               goto err;
+
+       SET_BCH_ALLOC_V4_NEED_DISCARD(&a->v, false);
+       a->v.data_type = alloc_data_type(a->v, a->v.data_type);
+
+       ret = bch2_trans_update(trans, &iter, &a->k_i, 0);
+err:
+       bch2_trans_iter_exit(trans, &iter);
+       return ret;
+}
+
+static void bch2_do_discards_fast_work(struct work_struct *work)
+{
+       struct bch_fs *c = container_of(work, struct bch_fs, discard_fast_work);
+
+       while (1) {
+               bool got_bucket = false;
+               struct bpos bucket;
+               struct bch_dev *ca;
+
+               mutex_lock(&c->discard_buckets_in_flight_lock);
+               darray_for_each(c->discard_buckets_in_flight, i) {
+                       if (i->snapshot)
+                               continue;
+
+                       ca = bch_dev_bkey_exists(c, i->inode);
+
+                       if (!percpu_ref_tryget(&ca->io_ref)) {
+                               darray_remove_item(&c->discard_buckets_in_flight, i);
+                               continue;
+                       }
+
+                       got_bucket = true;
+                       bucket = *i;
+                       i->snapshot = true;
+                       break;
+               }
+               mutex_unlock(&c->discard_buckets_in_flight_lock);
+
+               if (!got_bucket)
+                       break;
+
+               if (ca->mi.discard && !c->opts.nochanges)
+                       blkdev_issue_discard(ca->disk_sb.bdev,
+                                            bucket.offset * ca->mi.bucket_size,
+                                            ca->mi.bucket_size,
+                                            GFP_KERNEL);
+
+               int ret = bch2_trans_do(c, NULL, NULL,
+                                       BCH_WATERMARK_btree|
+                                       BCH_TRANS_COMMIT_no_enospc,
+                                       bch2_clear_bucket_needs_discard(trans, bucket));
+               bch_err_fn(c, ret);
+
+               percpu_ref_put(&ca->io_ref);
+               discard_in_flight_remove(c, bucket);
+
+               if (ret)
+                       break;
+       }
+
+       bch2_write_ref_put(c, BCH_WRITE_REF_discard_fast);
+}
+
+static void bch2_discard_one_bucket_fast(struct bch_fs *c, struct bpos bucket)
+{
+       struct bch_dev *ca = bch_dev_bkey_exists(c, bucket.inode);
+
+       if (!percpu_ref_is_dying(&ca->io_ref) &&
+           !discard_in_flight_add(c, bucket) &&
+           bch2_write_ref_tryget(c, BCH_WRITE_REF_discard_fast) &&
+           !queue_work(c->write_ref_wq, &c->discard_fast_work))
+               bch2_write_ref_put(c, BCH_WRITE_REF_discard_fast);
+}
+
 static int invalidate_one_bucket(struct btree_trans *trans,
                                 struct btree_iter *lru_iter,
                                 struct bkey_s_c lru_k,
@@ -2210,9 +2338,16 @@ void bch2_dev_allocator_add(struct bch_fs *c, struct bch_dev *ca)
                        set_bit(ca->dev_idx, c->rw_devs[i].d);
 }
 
+void bch2_fs_allocator_background_exit(struct bch_fs *c)
+{
+       darray_exit(&c->discard_buckets_in_flight);
+}
+
 void bch2_fs_allocator_background_init(struct bch_fs *c)
 {
        spin_lock_init(&c->freelist_lock);
+       mutex_init(&c->discard_buckets_in_flight_lock);
        INIT_WORK(&c->discard_work, bch2_do_discards_work);
+       INIT_WORK(&c->discard_fast_work, bch2_do_discards_fast_work);
        INIT_WORK(&c->invalidate_work, bch2_do_invalidates_work);
 }
index e7f7e842ee1b725f1373e4782cc34e1c9b83afa7..052b2fac25d693c7dddba5077fc9caeec2d246dd 100644 (file)
@@ -269,6 +269,7 @@ u64 bch2_min_rw_member_capacity(struct bch_fs *);
 void bch2_dev_allocator_remove(struct bch_fs *, struct bch_dev *);
 void bch2_dev_allocator_add(struct bch_fs *, struct bch_dev *);
 
+void bch2_fs_allocator_background_exit(struct bch_fs *);
 void bch2_fs_allocator_background_init(struct bch_fs *);
 
 #endif /* _BCACHEFS_ALLOC_BACKGROUND_H */
index 633d3223b353f83e83501601024dd262952236c6..ca58193dd90279b6d6081f06954690f214ba3a42 100644 (file)
@@ -236,8 +236,7 @@ static struct open_bucket *__try_alloc_bucket(struct bch_fs *c, struct bch_dev *
                if (cl)
                        closure_wait(&c->open_buckets_wait, cl);
 
-               track_event_change(&c->times[BCH_TIME_blocked_allocate_open_bucket],
-                                  &c->blocked_allocate_open_bucket, true);
+               track_event_change(&c->times[BCH_TIME_blocked_allocate_open_bucket], true);
                spin_unlock(&c->freelist_lock);
                return ERR_PTR(-BCH_ERR_open_buckets_empty);
        }
@@ -263,11 +262,8 @@ static struct open_bucket *__try_alloc_bucket(struct bch_fs *c, struct bch_dev *
        ca->nr_open_buckets++;
        bch2_open_bucket_hash_add(c, ob);
 
-       track_event_change(&c->times[BCH_TIME_blocked_allocate_open_bucket],
-                          &c->blocked_allocate_open_bucket, false);
-
-       track_event_change(&c->times[BCH_TIME_blocked_allocate],
-                          &c->blocked_allocate, false);
+       track_event_change(&c->times[BCH_TIME_blocked_allocate_open_bucket], false);
+       track_event_change(&c->times[BCH_TIME_blocked_allocate], false);
 
        spin_unlock(&c->freelist_lock);
        return ob;
@@ -555,8 +551,7 @@ again:
                        goto again;
                }
 
-               track_event_change(&c->times[BCH_TIME_blocked_allocate],
-                                  &c->blocked_allocate, true);
+               track_event_change(&c->times[BCH_TIME_blocked_allocate], true);
 
                ob = ERR_PTR(-BCH_ERR_freelist_empty);
                goto err;
index 569b97904da42eec8975e8662dd78895d41d62fe..8cb35ea572cb95ce13a956b21a4ec3a40ac39cdb 100644 (file)
@@ -131,8 +131,7 @@ static noinline int backpointer_mod_err(struct btree_trans *trans,
        printbuf_exit(&buf);
 
        if (c->curr_recovery_pass > BCH_RECOVERY_PASS_check_extents_to_backpointers) {
-               bch2_inconsistent_error(c);
-               return -EIO;
+               return bch2_inconsistent_error(c) ? BCH_ERR_erofs_unfixed_errors : 0;
        } else {
                return 0;
        }
@@ -478,8 +477,7 @@ missing:
        prt_printf(&buf, "\nbp pos ");
        bch2_bpos_to_text(&buf, bp_iter.pos);
 
-       if (c->opts.reconstruct_alloc ||
-           fsck_err(c, ptr_to_missing_backpointer, "%s", buf.buf))
+       if (fsck_err(c, ptr_to_missing_backpointer, "%s", buf.buf))
                ret = bch2_bucket_backpointer_mod(trans, bucket, bp, orig_k, true);
 
        goto out;
@@ -555,60 +553,61 @@ static inline struct bbpos bp_to_bbpos(struct bch_backpointer bp)
        };
 }
 
-static size_t btree_nodes_fit_in_ram(struct bch_fs *c)
+static u64 mem_may_pin_bytes(struct bch_fs *c)
 {
        struct sysinfo i;
-       u64 mem_bytes;
-
        si_meminfo(&i);
-       mem_bytes = i.totalram * i.mem_unit;
-       return div_u64(mem_bytes >> 1, c->opts.btree_node_size);
+
+       u64 mem_bytes = i.totalram * i.mem_unit;
+       return div_u64(mem_bytes * c->opts.fsck_memory_usage_percent, 100);
+}
+
+static size_t btree_nodes_fit_in_ram(struct bch_fs *c)
+{
+       return div_u64(mem_may_pin_bytes(c), c->opts.btree_node_size);
 }
 
 static int bch2_get_btree_in_memory_pos(struct btree_trans *trans,
-                                       unsigned btree_leaf_mask,
-                                       unsigned btree_interior_mask,
+                                       u64 btree_leaf_mask,
+                                       u64 btree_interior_mask,
                                        struct bbpos start, struct bbpos *end)
 {
-       struct btree_iter iter;
-       struct bkey_s_c k;
-       size_t btree_nodes = btree_nodes_fit_in_ram(trans->c);
-       enum btree_id btree;
+       struct bch_fs *c = trans->c;
+       s64 mem_may_pin = mem_may_pin_bytes(c);
        int ret = 0;
 
-       for (btree = start.btree; btree < BTREE_ID_NR && !ret; btree++) {
-               unsigned depth = ((1U << btree) & btree_leaf_mask) ? 1 : 2;
+       btree_interior_mask |= btree_leaf_mask;
+
+       c->btree_cache.pinned_nodes_leaf_mask           = btree_leaf_mask;
+       c->btree_cache.pinned_nodes_interior_mask       = btree_interior_mask;
+       c->btree_cache.pinned_nodes_start               = start;
+       c->btree_cache.pinned_nodes_end                 = *end = BBPOS_MAX;
+
+       for (enum btree_id btree = start.btree;
+            btree < BTREE_ID_NR && !ret;
+            btree++) {
+               unsigned depth = ((1U << btree) & btree_leaf_mask) ? 0 : 1;
+               struct btree_iter iter;
+               struct btree *b;
 
                if (!((1U << btree) & btree_leaf_mask) &&
                    !((1U << btree) & btree_interior_mask))
                        continue;
 
-               bch2_trans_node_iter_init(trans, &iter, btree,
-                                         btree == start.btree ? start.pos : POS_MIN,
-                                         0, depth, 0);
-               /*
-                * for_each_btree_key_contineu() doesn't check the return value
-                * from bch2_btree_iter_advance(), which is needed when
-                * iterating over interior nodes where we'll see keys at
-                * SPOS_MAX:
-                */
-               do {
-                       k = __bch2_btree_iter_peek_and_restart(trans, &iter, 0);
-                       ret = bkey_err(k);
-                       if (!k.k || ret)
-                               break;
-
-                       --btree_nodes;
-                       if (!btree_nodes) {
-                               *end = BBPOS(btree, k.k->p);
+               __for_each_btree_node(trans, iter, btree,
+                                     btree == start.btree ? start.pos : POS_MIN,
+                                     0, depth, BTREE_ITER_PREFETCH, b, ret) {
+                       mem_may_pin -= btree_buf_bytes(b);
+                       if (mem_may_pin <= 0) {
+                               c->btree_cache.pinned_nodes_end = *end =
+                                       BBPOS(btree, b->key.k.p);
                                bch2_trans_iter_exit(trans, &iter);
                                return 0;
                        }
-               } while (bch2_btree_iter_advance(&iter));
+               }
                bch2_trans_iter_exit(trans, &iter);
        }
 
-       *end = BBPOS_MAX;
        return ret;
 }
 
@@ -666,62 +665,6 @@ static int bch2_check_extents_to_backpointers_pass(struct btree_trans *trans,
        return 0;
 }
 
-static struct bpos bucket_pos_to_bp_safe(const struct bch_fs *c,
-                                        struct bpos bucket)
-{
-       return bch2_dev_exists2(c, bucket.inode)
-               ? bucket_pos_to_bp(c, bucket, 0)
-               : bucket;
-}
-
-static int bch2_get_alloc_in_memory_pos(struct btree_trans *trans,
-                                       struct bpos start, struct bpos *end)
-{
-       struct btree_iter alloc_iter;
-       struct btree_iter bp_iter;
-       struct bkey_s_c alloc_k, bp_k;
-       size_t btree_nodes = btree_nodes_fit_in_ram(trans->c);
-       bool alloc_end = false, bp_end = false;
-       int ret = 0;
-
-       bch2_trans_node_iter_init(trans, &alloc_iter, BTREE_ID_alloc,
-                                 start, 0, 1, 0);
-       bch2_trans_node_iter_init(trans, &bp_iter, BTREE_ID_backpointers,
-                                 bucket_pos_to_bp_safe(trans->c, start), 0, 1, 0);
-       while (1) {
-               alloc_k = !alloc_end
-                       ? __bch2_btree_iter_peek_and_restart(trans, &alloc_iter, 0)
-                       : bkey_s_c_null;
-               bp_k = !bp_end
-                       ? __bch2_btree_iter_peek_and_restart(trans, &bp_iter, 0)
-                       : bkey_s_c_null;
-
-               ret = bkey_err(alloc_k) ?: bkey_err(bp_k);
-               if ((!alloc_k.k && !bp_k.k) || ret) {
-                       *end = SPOS_MAX;
-                       break;
-               }
-
-               --btree_nodes;
-               if (!btree_nodes) {
-                       *end = alloc_k.k ? alloc_k.k->p : SPOS_MAX;
-                       break;
-               }
-
-               if (bpos_lt(alloc_iter.pos, SPOS_MAX) &&
-                   bpos_lt(bucket_pos_to_bp_safe(trans->c, alloc_iter.pos), bp_iter.pos)) {
-                       if (!bch2_btree_iter_advance(&alloc_iter))
-                               alloc_end = true;
-               } else {
-                       if (!bch2_btree_iter_advance(&bp_iter))
-                               bp_end = true;
-               }
-       }
-       bch2_trans_iter_exit(trans, &bp_iter);
-       bch2_trans_iter_exit(trans, &alloc_iter);
-       return ret;
-}
-
 int bch2_check_extents_to_backpointers(struct bch_fs *c)
 {
        struct btree_trans *trans = bch2_trans_get(c);
@@ -732,10 +675,16 @@ int bch2_check_extents_to_backpointers(struct bch_fs *c)
        bkey_init(&s.last_flushed.k->k);
 
        while (1) {
-               ret = bch2_get_alloc_in_memory_pos(trans, s.bucket_start, &s.bucket_end);
+               struct bbpos end;
+               ret = bch2_get_btree_in_memory_pos(trans,
+                               BIT_ULL(BTREE_ID_backpointers),
+                               BIT_ULL(BTREE_ID_backpointers),
+                               BBPOS(BTREE_ID_backpointers, s.bucket_start), &end);
                if (ret)
                        break;
 
+               s.bucket_end = end.pos;
+
                if ( bpos_eq(s.bucket_start, POS_MIN) &&
                    !bpos_eq(s.bucket_end, SPOS_MAX))
                        bch_verbose(c, "%s(): alloc info does not fit in ram, running in multiple passes with %zu nodes per pass",
@@ -763,6 +712,9 @@ int bch2_check_extents_to_backpointers(struct bch_fs *c)
        bch2_trans_put(trans);
        bch2_bkey_buf_exit(&s.last_flushed, c);
 
+       c->btree_cache.pinned_nodes_leaf_mask = 0;
+       c->btree_cache.pinned_nodes_interior_mask = 0;
+
        bch_err_fn(c, ret);
        return ret;
 }
@@ -868,6 +820,9 @@ int bch2_check_backpointers_to_extents(struct bch_fs *c)
        }
        bch2_trans_put(trans);
 
+       c->btree_cache.pinned_nodes_leaf_mask = 0;
+       c->btree_cache.pinned_nodes_interior_mask = 0;
+
        bch_err_fn(c, ret);
        return ret;
 }
index 5198e94cf3b89c09f88c1304bc1aa2ff5f7cc35a..f63893344f80aa721554ba3f95124cfc824edbee 100644 (file)
@@ -13,6 +13,6 @@ static inline struct bbpos BBPOS(enum btree_id btree, struct bpos pos)
 }
 
 #define BBPOS_MIN      BBPOS(0, POS_MIN)
-#define BBPOS_MAX      BBPOS(BTREE_ID_NR - 1, POS_MAX)
+#define BBPOS_MAX      BBPOS(BTREE_ID_NR - 1, SPOS_MAX)
 
 #endif /* _BCACHEFS_BBPOS_TYPES_H */
index 69d0d60d50e366edf9e56ba101dda047536f5338..339dc3e1dcd39939b5f021db2665190ea07ceee9 100644 (file)
 #include "recovery_types.h"
 #include "sb-errors_types.h"
 #include "seqmutex.h"
+#include "time_stats.h"
 #include "util.h"
 
 #ifdef CONFIG_BCACHEFS_DEBUG
@@ -265,6 +266,9 @@ do {                                                                        \
 
 #define bch2_fmt(_c, fmt)              bch2_log_msg(_c, fmt "\n")
 
+__printf(2, 3)
+void bch2_print_opts(struct bch_opts *, const char *, ...);
+
 __printf(2, 3)
 void __bch2_print(struct bch_fs *c, const char *fmt, ...);
 
@@ -504,6 +508,7 @@ enum gc_phase {
        GC_PHASE_BTREE_deleted_inodes,
        GC_PHASE_BTREE_logged_ops,
        GC_PHASE_BTREE_rebalance_work,
+       GC_PHASE_BTREE_subvolume_children,
 
        GC_PHASE_PENDING_DELETE,
 };
@@ -593,7 +598,7 @@ struct bch_dev {
 
        /* The rest of this all shows up in sysfs */
        atomic64_t              cur_latency[2];
-       struct bch2_time_stats  io_latency[2];
+       struct bch2_time_stats_quantiles io_latency[2];
 
 #define CONGESTED_MAX          1024
        atomic_t                congested;
@@ -663,6 +668,8 @@ struct journal_seq_blacklist_table {
 };
 
 struct journal_keys {
+       /* must match layout in darray_types.h */
+       size_t                  nr, size;
        struct journal_key {
                u64             journal_seq;
                u32             journal_offset;
@@ -671,15 +678,13 @@ struct journal_keys {
                bool            allocated;
                bool            overwritten;
                struct bkey_i   *k;
-       }                       *d;
+       }                       *data;
        /*
         * Gap buffer: instead of all the empty space in the array being at the
         * end of the buffer - from @nr to @size - the empty space is at @gap.
         * This means that sequential insertions are O(n) instead of O(n^2).
         */
        size_t                  gap;
-       size_t                  nr;
-       size_t                  size;
        atomic_t                ref;
        bool                    initial_ref_held;
 };
@@ -703,6 +708,7 @@ struct btree_trans_buf {
        x(reflink)                                                      \
        x(fallocate)                                                    \
        x(discard)                                                      \
+       x(discard_fast)                                                 \
        x(invalidate)                                                   \
        x(delete_dead_snapshots)                                        \
        x(snapshot_delete_pagecache)                                    \
@@ -919,8 +925,6 @@ struct bch_fs {
        /* ALLOCATOR */
        spinlock_t              freelist_lock;
        struct closure_waitlist freelist_wait;
-       u64                     blocked_allocate;
-       u64                     blocked_allocate_open_bucket;
 
        open_bucket_idx_t       open_buckets_freelist;
        open_bucket_idx_t       open_buckets_nr_free;
@@ -940,8 +944,11 @@ struct bch_fs {
        unsigned                write_points_nr;
 
        struct buckets_waiting_for_journal buckets_waiting_for_journal;
-       struct work_struct      discard_work;
        struct work_struct      invalidate_work;
+       struct work_struct      discard_work;
+       struct mutex            discard_buckets_in_flight_lock;
+       DARRAY(struct bpos)     discard_buckets_in_flight;
+       struct work_struct      discard_fast_work;
 
        /* GARBAGE COLLECTION */
        struct task_struct      *gc_thread;
index 0668b682a21ca8e035cae73f73e6774c99eaeb94..bff8750ac0d743aa22f2cbea9effbf77bf6be725 100644 (file)
@@ -189,7 +189,11 @@ struct bversion {
        __u32           hi;
        __u64           lo;
 #endif
-} __packed __aligned(4);
+} __packed
+#if __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__
+__aligned(4)
+#endif
+;
 
 struct bkey {
        /* Size of combined key and value, in u64s */
@@ -222,7 +226,36 @@ struct bkey {
 
        __u8            pad[1];
 #endif
-} __packed __aligned(8);
+} __packed
+#if __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__
+/*
+ * The big-endian version of bkey can't be compiled by rustc with the "aligned"
+ * attr since it doesn't allow types to have both "packed" and "aligned" attrs.
+ * So for Rust compatibility, don't include this. It can be included in the LE
+ * version because the "packed" attr is redundant in that case.
+ *
+ * History: (quoting Kent)
+ *
+ * Specifically, when i was designing bkey, I wanted the header to be no
+ * bigger than necessary so that bkey_packed could use the rest. That means that
+ * decently offten extent keys will fit into only 8 bytes, instead of spilling over
+ * to 16.
+ *
+ * But packed_bkey treats the part after the header - the packed section -
+ * as a single multi word, variable length integer. And bkey, the unpacked
+ * version, is just a special case version of a bkey_packed; all the packed
+ * bkey code will work on keys in any packed format, the in-memory
+ * representation of an unpacked key also is just one type of packed key...
+ *
+ * So that constrains the key part of a bkig endian bkey to start right
+ * after the header.
+ *
+ * If we ever do a bkey_v2 and need to expand the hedaer by another byte for
+ * some reason - that will clean up this wart.
+ */
+__aligned(8)
+#endif
+;
 
 struct bkey_packed {
        __u64           _data[0];
@@ -840,7 +873,9 @@ struct bch_sb_field_downgrade {
        x(snapshot_skiplists,           BCH_VERSION(1,  1))             \
        x(deleted_inodes,               BCH_VERSION(1,  2))             \
        x(rebalance_work,               BCH_VERSION(1,  3))             \
-       x(member_seq,                   BCH_VERSION(1,  4))
+       x(member_seq,                   BCH_VERSION(1,  4))             \
+       x(subvolume_fs_parent,          BCH_VERSION(1,  5))             \
+       x(btree_subvolume_children,     BCH_VERSION(1,  6))
 
 enum bcachefs_metadata_version {
        bcachefs_metadata_version_min = 9,
@@ -1275,7 +1310,8 @@ static inline __u64 __bset_magic(struct bch_sb *sb)
        x(dev_usage,            8)              \
        x(log,                  9)              \
        x(overwrite,            10)             \
-       x(write_buffer_keys,    11)
+       x(write_buffer_keys,    11)             \
+       x(datetime,             12)
 
 enum {
 #define x(f, nr)       BCH_JSET_ENTRY_##f      = nr,
@@ -1376,6 +1412,11 @@ struct jset_entry_log {
        u8                      d[];
 } __packed __aligned(8);
 
+struct jset_entry_datetime {
+       struct jset_entry       entry;
+       __le64                  seconds;
+} __packed __aligned(8);
+
 /*
  * On disk format for a journal entry:
  * seq is monotonically increasing; every journal entry has its own unique
@@ -1482,7 +1523,9 @@ enum btree_id_flags {
          BIT_ULL(KEY_TYPE_logged_op_truncate)|                                 \
          BIT_ULL(KEY_TYPE_logged_op_finsert))                                  \
        x(rebalance_work,       18,     BTREE_ID_SNAPSHOT_FIELD,                \
-         BIT_ULL(KEY_TYPE_set)|BIT_ULL(KEY_TYPE_cookie))
+         BIT_ULL(KEY_TYPE_set)|BIT_ULL(KEY_TYPE_cookie))                       \
+       x(subvolume_children,   19,     0,                                      \
+         BIT_ULL(KEY_TYPE_set))
 
 enum btree_id {
 #define x(name, nr, ...) BTREE_ID_##name = nr,
index 831be01809f2c9271d4db159377decd1b8686bb6..cf23ff47bed8be588593a7fb193ee21ca8298c65 100644 (file)
@@ -4,7 +4,7 @@
 
 #include <linux/bug.h>
 #include "bcachefs_format.h"
-
+#include "bkey_types.h"
 #include "btree_types.h"
 #include "util.h"
 #include "vstructs.h"
@@ -31,57 +31,6 @@ void bch2_bkey_packed_to_binary_text(struct printbuf *,
                                     const struct bkey_format *,
                                     const struct bkey_packed *);
 
-/* bkey with split value, const */
-struct bkey_s_c {
-       const struct bkey       *k;
-       const struct bch_val    *v;
-};
-
-/* bkey with split value */
-struct bkey_s {
-       union {
-       struct {
-               struct bkey     *k;
-               struct bch_val  *v;
-       };
-       struct bkey_s_c         s_c;
-       };
-};
-
-#define bkey_p_next(_k)                vstruct_next(_k)
-
-static inline struct bkey_i *bkey_next(struct bkey_i *k)
-{
-       return (struct bkey_i *) ((u64 *) k->_data + k->k.u64s);
-}
-
-#define bkey_val_u64s(_k)      ((_k)->u64s - BKEY_U64s)
-
-static inline size_t bkey_val_bytes(const struct bkey *k)
-{
-       return bkey_val_u64s(k) * sizeof(u64);
-}
-
-static inline void set_bkey_val_u64s(struct bkey *k, unsigned val_u64s)
-{
-       unsigned u64s = BKEY_U64s + val_u64s;
-
-       BUG_ON(u64s > U8_MAX);
-       k->u64s = u64s;
-}
-
-static inline void set_bkey_val_bytes(struct bkey *k, unsigned bytes)
-{
-       set_bkey_val_u64s(k, DIV_ROUND_UP(bytes, sizeof(u64)));
-}
-
-#define bkey_val_end(_k)       ((void *) (((u64 *) (_k).v) + bkey_val_u64s((_k).k)))
-
-#define bkey_deleted(_k)       ((_k)->type == KEY_TYPE_deleted)
-
-#define bkey_whiteout(_k)                              \
-       ((_k)->type == KEY_TYPE_deleted || (_k)->type == KEY_TYPE_whiteout)
-
 enum bkey_lr_packed {
        BKEY_PACKED_BOTH,
        BKEY_PACKED_RIGHT,
@@ -362,10 +311,7 @@ static inline struct bpos bkey_start_pos(const struct bkey *k)
 static inline unsigned bkeyp_key_u64s(const struct bkey_format *format,
                                      const struct bkey_packed *k)
 {
-       unsigned ret = bkey_packed(k) ? format->key_u64s : BKEY_U64s;
-
-       EBUG_ON(k->u64s < ret);
-       return ret;
+       return bkey_packed(k) ? format->key_u64s : BKEY_U64s;
 }
 
 static inline unsigned bkeyp_key_bytes(const struct bkey_format *format,
@@ -553,155 +499,6 @@ static inline void bkey_reassemble(struct bkey_i *dst,
        memcpy_u64s_small(&dst->v, src.v, bkey_val_u64s(src.k));
 }
 
-#define bkey_s_null            ((struct bkey_s)   { .k = NULL })
-#define bkey_s_c_null          ((struct bkey_s_c) { .k = NULL })
-
-#define bkey_s_err(err)                ((struct bkey_s)   { .k = ERR_PTR(err) })
-#define bkey_s_c_err(err)      ((struct bkey_s_c) { .k = ERR_PTR(err) })
-
-static inline struct bkey_s bkey_to_s(struct bkey *k)
-{
-       return (struct bkey_s) { .k = k, .v = NULL };
-}
-
-static inline struct bkey_s_c bkey_to_s_c(const struct bkey *k)
-{
-       return (struct bkey_s_c) { .k = k, .v = NULL };
-}
-
-static inline struct bkey_s bkey_i_to_s(struct bkey_i *k)
-{
-       return (struct bkey_s) { .k = &k->k, .v = &k->v };
-}
-
-static inline struct bkey_s_c bkey_i_to_s_c(const struct bkey_i *k)
-{
-       return (struct bkey_s_c) { .k = &k->k, .v = &k->v };
-}
-
-/*
- * For a given type of value (e.g. struct bch_extent), generates the types for
- * bkey + bch_extent - inline, split, split const - and also all the conversion
- * functions, which also check that the value is of the correct type.
- *
- * We use anonymous unions for upcasting - e.g. converting from e.g. a
- * bkey_i_extent to a bkey_i - since that's always safe, instead of conversion
- * functions.
- */
-#define x(name, ...)                                   \
-struct bkey_i_##name {                                                 \
-       union {                                                         \
-               struct bkey             k;                              \
-               struct bkey_i           k_i;                            \
-       };                                                              \
-       struct bch_##name               v;                              \
-};                                                                     \
-                                                                       \
-struct bkey_s_c_##name {                                               \
-       union {                                                         \
-       struct {                                                        \
-               const struct bkey       *k;                             \
-               const struct bch_##name *v;                             \
-       };                                                              \
-       struct bkey_s_c                 s_c;                            \
-       };                                                              \
-};                                                                     \
-                                                                       \
-struct bkey_s_##name {                                                 \
-       union {                                                         \
-       struct {                                                        \
-               struct bkey             *k;                             \
-               struct bch_##name       *v;                             \
-       };                                                              \
-       struct bkey_s_c_##name          c;                              \
-       struct bkey_s                   s;                              \
-       struct bkey_s_c                 s_c;                            \
-       };                                                              \
-};                                                                     \
-                                                                       \
-static inline struct bkey_i_##name *bkey_i_to_##name(struct bkey_i *k) \
-{                                                                      \
-       EBUG_ON(!IS_ERR_OR_NULL(k) && k->k.type != KEY_TYPE_##name);    \
-       return container_of(&k->k, struct bkey_i_##name, k);            \
-}                                                                      \
-                                                                       \
-static inline const struct bkey_i_##name *                             \
-bkey_i_to_##name##_c(const struct bkey_i *k)                           \
-{                                                                      \
-       EBUG_ON(!IS_ERR_OR_NULL(k) && k->k.type != KEY_TYPE_##name);    \
-       return container_of(&k->k, struct bkey_i_##name, k);            \
-}                                                                      \
-                                                                       \
-static inline struct bkey_s_##name bkey_s_to_##name(struct bkey_s k)   \
-{                                                                      \
-       EBUG_ON(!IS_ERR_OR_NULL(k.k) && k.k->type != KEY_TYPE_##name);  \
-       return (struct bkey_s_##name) {                                 \
-               .k = k.k,                                               \
-               .v = container_of(k.v, struct bch_##name, v),           \
-       };                                                              \
-}                                                                      \
-                                                                       \
-static inline struct bkey_s_c_##name bkey_s_c_to_##name(struct bkey_s_c k)\
-{                                                                      \
-       EBUG_ON(!IS_ERR_OR_NULL(k.k) && k.k->type != KEY_TYPE_##name);  \
-       return (struct bkey_s_c_##name) {                               \
-               .k = k.k,                                               \
-               .v = container_of(k.v, struct bch_##name, v),           \
-       };                                                              \
-}                                                                      \
-                                                                       \
-static inline struct bkey_s_##name name##_i_to_s(struct bkey_i_##name *k)\
-{                                                                      \
-       return (struct bkey_s_##name) {                                 \
-               .k = &k->k,                                             \
-               .v = &k->v,                                             \
-       };                                                              \
-}                                                                      \
-                                                                       \
-static inline struct bkey_s_c_##name                                   \
-name##_i_to_s_c(const struct bkey_i_##name *k)                         \
-{                                                                      \
-       return (struct bkey_s_c_##name) {                               \
-               .k = &k->k,                                             \
-               .v = &k->v,                                             \
-       };                                                              \
-}                                                                      \
-                                                                       \
-static inline struct bkey_s_##name bkey_i_to_s_##name(struct bkey_i *k)        \
-{                                                                      \
-       EBUG_ON(!IS_ERR_OR_NULL(k) && k->k.type != KEY_TYPE_##name);    \
-       return (struct bkey_s_##name) {                                 \
-               .k = &k->k,                                             \
-               .v = container_of(&k->v, struct bch_##name, v),         \
-       };                                                              \
-}                                                                      \
-                                                                       \
-static inline struct bkey_s_c_##name                                   \
-bkey_i_to_s_c_##name(const struct bkey_i *k)                           \
-{                                                                      \
-       EBUG_ON(!IS_ERR_OR_NULL(k) && k->k.type != KEY_TYPE_##name);    \
-       return (struct bkey_s_c_##name) {                               \
-               .k = &k->k,                                             \
-               .v = container_of(&k->v, struct bch_##name, v),         \
-       };                                                              \
-}                                                                      \
-                                                                       \
-static inline struct bkey_i_##name *bkey_##name##_init(struct bkey_i *_k)\
-{                                                                      \
-       struct bkey_i_##name *k =                                       \
-               container_of(&_k->k, struct bkey_i_##name, k);          \
-                                                                       \
-       bkey_init(&k->k);                                               \
-       memset(&k->v, 0, sizeof(k->v));                                 \
-       k->k.type = KEY_TYPE_##name;                                    \
-       set_bkey_val_bytes(&k->k, sizeof(k->v));                        \
-                                                                       \
-       return k;                                                       \
-}
-
-BCH_BKEY_TYPES();
-#undef x
-
 /* byte order helpers */
 
 #if __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__
diff --git a/fs/bcachefs/bkey_types.h b/fs/bcachefs/bkey_types.h
new file mode 100644 (file)
index 0000000..c9ae9e4
--- /dev/null
@@ -0,0 +1,213 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef _BCACHEFS_BKEY_TYPES_H
+#define _BCACHEFS_BKEY_TYPES_H
+
+#include "bcachefs_format.h"
+
+/*
+ * bkey_i      - bkey with inline value
+ * bkey_s      - bkey with split value
+ * bkey_s_c    - bkey with split value, const
+ */
+
+#define bkey_p_next(_k)                vstruct_next(_k)
+
+static inline struct bkey_i *bkey_next(struct bkey_i *k)
+{
+       return (struct bkey_i *) ((u64 *) k->_data + k->k.u64s);
+}
+
+#define bkey_val_u64s(_k)      ((_k)->u64s - BKEY_U64s)
+
+static inline size_t bkey_val_bytes(const struct bkey *k)
+{
+       return bkey_val_u64s(k) * sizeof(u64);
+}
+
+static inline void set_bkey_val_u64s(struct bkey *k, unsigned val_u64s)
+{
+       unsigned u64s = BKEY_U64s + val_u64s;
+
+       BUG_ON(u64s > U8_MAX);
+       k->u64s = u64s;
+}
+
+static inline void set_bkey_val_bytes(struct bkey *k, unsigned bytes)
+{
+       set_bkey_val_u64s(k, DIV_ROUND_UP(bytes, sizeof(u64)));
+}
+
+#define bkey_val_end(_k)       ((void *) (((u64 *) (_k).v) + bkey_val_u64s((_k).k)))
+
+#define bkey_deleted(_k)       ((_k)->type == KEY_TYPE_deleted)
+
+#define bkey_whiteout(_k)                              \
+       ((_k)->type == KEY_TYPE_deleted || (_k)->type == KEY_TYPE_whiteout)
+
+/* bkey with split value, const */
+struct bkey_s_c {
+       const struct bkey       *k;
+       const struct bch_val    *v;
+};
+
+/* bkey with split value */
+struct bkey_s {
+       union {
+       struct {
+               struct bkey     *k;
+               struct bch_val  *v;
+       };
+       struct bkey_s_c         s_c;
+       };
+};
+
+#define bkey_s_null            ((struct bkey_s)   { .k = NULL })
+#define bkey_s_c_null          ((struct bkey_s_c) { .k = NULL })
+
+#define bkey_s_err(err)                ((struct bkey_s)   { .k = ERR_PTR(err) })
+#define bkey_s_c_err(err)      ((struct bkey_s_c) { .k = ERR_PTR(err) })
+
+static inline struct bkey_s bkey_to_s(struct bkey *k)
+{
+       return (struct bkey_s) { .k = k, .v = NULL };
+}
+
+static inline struct bkey_s_c bkey_to_s_c(const struct bkey *k)
+{
+       return (struct bkey_s_c) { .k = k, .v = NULL };
+}
+
+static inline struct bkey_s bkey_i_to_s(struct bkey_i *k)
+{
+       return (struct bkey_s) { .k = &k->k, .v = &k->v };
+}
+
+static inline struct bkey_s_c bkey_i_to_s_c(const struct bkey_i *k)
+{
+       return (struct bkey_s_c) { .k = &k->k, .v = &k->v };
+}
+
+/*
+ * For a given type of value (e.g. struct bch_extent), generates the types for
+ * bkey + bch_extent - inline, split, split const - and also all the conversion
+ * functions, which also check that the value is of the correct type.
+ *
+ * We use anonymous unions for upcasting - e.g. converting from e.g. a
+ * bkey_i_extent to a bkey_i - since that's always safe, instead of conversion
+ * functions.
+ */
+#define x(name, ...)                                   \
+struct bkey_i_##name {                                                 \
+       union {                                                         \
+               struct bkey             k;                              \
+               struct bkey_i           k_i;                            \
+       };                                                              \
+       struct bch_##name               v;                              \
+};                                                                     \
+                                                                       \
+struct bkey_s_c_##name {                                               \
+       union {                                                         \
+       struct {                                                        \
+               const struct bkey       *k;                             \
+               const struct bch_##name *v;                             \
+       };                                                              \
+       struct bkey_s_c                 s_c;                            \
+       };                                                              \
+};                                                                     \
+                                                                       \
+struct bkey_s_##name {                                                 \
+       union {                                                         \
+       struct {                                                        \
+               struct bkey             *k;                             \
+               struct bch_##name       *v;                             \
+       };                                                              \
+       struct bkey_s_c_##name          c;                              \
+       struct bkey_s                   s;                              \
+       struct bkey_s_c                 s_c;                            \
+       };                                                              \
+};                                                                     \
+                                                                       \
+static inline struct bkey_i_##name *bkey_i_to_##name(struct bkey_i *k) \
+{                                                                      \
+       EBUG_ON(!IS_ERR_OR_NULL(k) && k->k.type != KEY_TYPE_##name);    \
+       return container_of(&k->k, struct bkey_i_##name, k);            \
+}                                                                      \
+                                                                       \
+static inline const struct bkey_i_##name *                             \
+bkey_i_to_##name##_c(const struct bkey_i *k)                           \
+{                                                                      \
+       EBUG_ON(!IS_ERR_OR_NULL(k) && k->k.type != KEY_TYPE_##name);    \
+       return container_of(&k->k, struct bkey_i_##name, k);            \
+}                                                                      \
+                                                                       \
+static inline struct bkey_s_##name bkey_s_to_##name(struct bkey_s k)   \
+{                                                                      \
+       EBUG_ON(!IS_ERR_OR_NULL(k.k) && k.k->type != KEY_TYPE_##name);  \
+       return (struct bkey_s_##name) {                                 \
+               .k = k.k,                                               \
+               .v = container_of(k.v, struct bch_##name, v),           \
+       };                                                              \
+}                                                                      \
+                                                                       \
+static inline struct bkey_s_c_##name bkey_s_c_to_##name(struct bkey_s_c k)\
+{                                                                      \
+       EBUG_ON(!IS_ERR_OR_NULL(k.k) && k.k->type != KEY_TYPE_##name);  \
+       return (struct bkey_s_c_##name) {                               \
+               .k = k.k,                                               \
+               .v = container_of(k.v, struct bch_##name, v),           \
+       };                                                              \
+}                                                                      \
+                                                                       \
+static inline struct bkey_s_##name name##_i_to_s(struct bkey_i_##name *k)\
+{                                                                      \
+       return (struct bkey_s_##name) {                                 \
+               .k = &k->k,                                             \
+               .v = &k->v,                                             \
+       };                                                              \
+}                                                                      \
+                                                                       \
+static inline struct bkey_s_c_##name                                   \
+name##_i_to_s_c(const struct bkey_i_##name *k)                         \
+{                                                                      \
+       return (struct bkey_s_c_##name) {                               \
+               .k = &k->k,                                             \
+               .v = &k->v,                                             \
+       };                                                              \
+}                                                                      \
+                                                                       \
+static inline struct bkey_s_##name bkey_i_to_s_##name(struct bkey_i *k)        \
+{                                                                      \
+       EBUG_ON(!IS_ERR_OR_NULL(k) && k->k.type != KEY_TYPE_##name);    \
+       return (struct bkey_s_##name) {                                 \
+               .k = &k->k,                                             \
+               .v = container_of(&k->v, struct bch_##name, v),         \
+       };                                                              \
+}                                                                      \
+                                                                       \
+static inline struct bkey_s_c_##name                                   \
+bkey_i_to_s_c_##name(const struct bkey_i *k)                           \
+{                                                                      \
+       EBUG_ON(!IS_ERR_OR_NULL(k) && k->k.type != KEY_TYPE_##name);    \
+       return (struct bkey_s_c_##name) {                               \
+               .k = &k->k,                                             \
+               .v = container_of(&k->v, struct bch_##name, v),         \
+       };                                                              \
+}                                                                      \
+                                                                       \
+static inline struct bkey_i_##name *bkey_##name##_init(struct bkey_i *_k)\
+{                                                                      \
+       struct bkey_i_##name *k =                                       \
+               container_of(&_k->k, struct bkey_i_##name, k);          \
+                                                                       \
+       bkey_init(&k->k);                                               \
+       memset(&k->v, 0, sizeof(k->v));                                 \
+       k->k.type = KEY_TYPE_##name;                                    \
+       set_bkey_val_bytes(&k->k, sizeof(k->v));                        \
+                                                                       \
+       return k;                                                       \
+}
+
+BCH_BKEY_TYPES();
+#undef x
+
+#endif /* _BCACHEFS_BKEY_TYPES_H */
index d7c81beac14afae7ee44f11f28eb424f1b54a063..562561a9a510e8ce55cdee26a9b064d4c07cf02d 100644 (file)
@@ -1,6 +1,7 @@
 // SPDX-License-Identifier: GPL-2.0
 
 #include "bcachefs.h"
+#include "bbpos.h"
 #include "bkey_buf.h"
 #include "btree_cache.h"
 #include "btree_io.h"
@@ -60,7 +61,7 @@ static void btree_node_data_free(struct bch_fs *c, struct btree *b)
 
        clear_btree_node_just_written(b);
 
-       kvpfree(b->data, btree_buf_bytes(b));
+       kvfree(b->data);
        b->data = NULL;
 #ifdef __KERNEL__
        kvfree(b->aux_data);
@@ -94,7 +95,7 @@ static int btree_node_data_alloc(struct bch_fs *c, struct btree *b, gfp_t gfp)
 {
        BUG_ON(b->data || b->aux_data);
 
-       b->data = kvpmalloc(btree_buf_bytes(b), gfp);
+       b->data = kvmalloc(btree_buf_bytes(b), gfp);
        if (!b->data)
                return -BCH_ERR_ENOMEM_btree_node_mem_alloc;
 #ifdef __KERNEL__
@@ -107,7 +108,7 @@ static int btree_node_data_alloc(struct bch_fs *c, struct btree *b, gfp_t gfp)
                b->aux_data = NULL;
 #endif
        if (!b->aux_data) {
-               kvpfree(b->data, btree_buf_bytes(b));
+               kvfree(b->data);
                b->data = NULL;
                return -BCH_ERR_ENOMEM_btree_node_mem_alloc;
        }
@@ -208,6 +209,18 @@ static int __btree_node_reclaim(struct bch_fs *c, struct btree *b, bool flush)
        int ret = 0;
 
        lockdep_assert_held(&bc->lock);
+
+       struct bbpos pos = BBPOS(b->c.btree_id, b->key.k.p);
+
+       u64 mask = b->c.level
+               ? bc->pinned_nodes_interior_mask
+               : bc->pinned_nodes_leaf_mask;
+
+       if ((mask & BIT_ULL(b->c.btree_id)) &&
+           bbpos_cmp(bc->pinned_nodes_start, pos) < 0 &&
+           bbpos_cmp(bc->pinned_nodes_end, pos) >= 0)
+               return -BCH_ERR_ENOMEM_btree_node_reclaim;
+
 wait_on_io:
        if (b->flags & ((1U << BTREE_NODE_dirty)|
                        (1U << BTREE_NODE_read_in_flight)|
@@ -408,7 +421,7 @@ void bch2_fs_btree_cache_exit(struct bch_fs *c)
        if (c->verify_data)
                list_move(&c->verify_data->list, &bc->live);
 
-       kvpfree(c->verify_ondisk, c->opts.btree_node_size);
+       kvfree(c->verify_ondisk);
 
        for (i = 0; i < btree_id_nr_alive(c); i++) {
                struct btree_root *r = bch2_btree_id_root(c, i);
@@ -711,6 +724,9 @@ static noinline struct btree *bch2_btree_node_fill(struct btree_trans *trans,
        b = bch2_btree_node_mem_alloc(trans, level != 0);
 
        if (bch2_err_matches(PTR_ERR_OR_ZERO(b), ENOMEM)) {
+               if (!path)
+                       return b;
+
                trans->memory_allocation_failure = true;
                trace_and_count(c, trans_restart_memory_allocation_failure, trans, _THIS_IP_, path);
                return ERR_PTR(btree_trans_restart(trans, BCH_ERR_transaction_restart_fill_mem_alloc_fail));
@@ -760,8 +776,9 @@ static noinline struct btree *bch2_btree_node_fill(struct btree_trans *trans,
        }
 
        if (!six_relock_type(&b->c.lock, lock_type, seq)) {
-               if (path)
-                       trace_and_count(c, trans_restart_relock_after_fill, trans, _THIS_IP_, path);
+               BUG_ON(!path);
+
+               trace_and_count(c, trans_restart_relock_after_fill, trans, _THIS_IP_, path);
                return ERR_PTR(btree_trans_restart(trans, BCH_ERR_transaction_restart_relock_after_fill));
        }
 
@@ -901,7 +918,7 @@ retry:
 
        if (unlikely(btree_node_read_error(b))) {
                six_unlock_type(&b->c.lock, lock_type);
-               return ERR_PTR(-EIO);
+               return ERR_PTR(-BCH_ERR_btree_node_read_error);
        }
 
        EBUG_ON(b->c.btree_id != path->btree_id);
@@ -992,7 +1009,7 @@ struct btree *bch2_btree_node_get(struct btree_trans *trans, struct btree_path *
 
        if (unlikely(btree_node_read_error(b))) {
                six_unlock_type(&b->c.lock, lock_type);
-               return ERR_PTR(-EIO);
+               return ERR_PTR(-BCH_ERR_btree_node_read_error);
        }
 
        EBUG_ON(b->c.btree_id != path->btree_id);
@@ -1075,7 +1092,7 @@ lock_node:
 
        if (unlikely(btree_node_read_error(b))) {
                six_unlock_read(&b->c.lock);
-               b = ERR_PTR(-EIO);
+               b = ERR_PTR(-BCH_ERR_btree_node_read_error);
                goto out;
        }
 
@@ -1096,7 +1113,7 @@ int bch2_btree_node_prefetch(struct btree_trans *trans,
        struct btree_cache *bc = &c->btree_cache;
        struct btree *b;
 
-       BUG_ON(trans && !btree_node_locked(path, level + 1));
+       BUG_ON(path && !btree_node_locked(path, level + 1));
        BUG_ON(level >= BTREE_MAX_DEPTH);
 
        b = btree_cache_find(bc, k);
index 1102995643b137c3a8a9fe5f12f0cce95edfafeb..584aee7010deaa79df36edd60e5d7e59bcb7a7e3 100644 (file)
@@ -389,7 +389,8 @@ again:
        have_child = dropped_children = false;
        bch2_bkey_buf_init(&prev_k);
        bch2_bkey_buf_init(&cur_k);
-       bch2_btree_and_journal_iter_init_node_iter(&iter, c, b);
+       bch2_btree_and_journal_iter_init_node_iter(trans, &iter, b);
+       iter.prefetch = true;
 
        while ((k = bch2_btree_and_journal_iter_peek(&iter)).k) {
                BUG_ON(bpos_lt(k.k->p, b->data->min_key));
@@ -406,7 +407,7 @@ again:
                printbuf_reset(&buf);
                bch2_bkey_val_to_text(&buf, c, bkey_i_to_s_c(cur_k.k));
 
-               if (mustfix_fsck_err_on(ret == -EIO, c,
+               if (mustfix_fsck_err_on(bch2_err_matches(ret, EIO), c,
                                btree_node_unreadable,
                                "Topology repair: unreadable btree node at btree %s level %u:\n"
                                "  %s",
@@ -478,7 +479,8 @@ again:
                goto err;
 
        bch2_btree_and_journal_iter_exit(&iter);
-       bch2_btree_and_journal_iter_init_node_iter(&iter, c, b);
+       bch2_btree_and_journal_iter_init_node_iter(trans, &iter, b);
+       iter.prefetch = true;
 
        while ((k = bch2_btree_and_journal_iter_peek(&iter)).k) {
                bch2_bkey_buf_reassemble(&cur_k, c, k);
@@ -591,16 +593,15 @@ static int bch2_check_fix_ptrs(struct btree_trans *trans, enum btree_id btree_id
                struct bucket *g = PTR_GC_BUCKET(ca, &p.ptr);
                enum bch_data_type data_type = bch2_bkey_ptr_data_type(*k, &entry_c->ptr);
 
-               if (!g->gen_valid &&
-                   (c->opts.reconstruct_alloc ||
-                    fsck_err(c, ptr_to_missing_alloc_key,
-                             "bucket %u:%zu data type %s ptr gen %u missing in alloc btree\n"
-                             "while marking %s",
-                             p.ptr.dev, PTR_BUCKET_NR(ca, &p.ptr),
-                             bch2_data_type_str(ptr_data_type(k->k, &p.ptr)),
-                             p.ptr.gen,
-                             (printbuf_reset(&buf),
-                              bch2_bkey_val_to_text(&buf, c, *k), buf.buf)))) {
+               if (fsck_err_on(!g->gen_valid,
+                               c, ptr_to_missing_alloc_key,
+                               "bucket %u:%zu data type %s ptr gen %u missing in alloc btree\n"
+                               "while marking %s",
+                               p.ptr.dev, PTR_BUCKET_NR(ca, &p.ptr),
+                               bch2_data_type_str(ptr_data_type(k->k, &p.ptr)),
+                               p.ptr.gen,
+                               (printbuf_reset(&buf),
+                                bch2_bkey_val_to_text(&buf, c, *k), buf.buf))) {
                        if (!p.ptr.cached) {
                                g->gen_valid            = true;
                                g->gen                  = p.ptr.gen;
@@ -609,16 +610,15 @@ static int bch2_check_fix_ptrs(struct btree_trans *trans, enum btree_id btree_id
                        }
                }
 
-               if (gen_cmp(p.ptr.gen, g->gen) > 0 &&
-                   (c->opts.reconstruct_alloc ||
-                    fsck_err(c, ptr_gen_newer_than_bucket_gen,
-                             "bucket %u:%zu data type %s ptr gen in the future: %u > %u\n"
-                             "while marking %s",
-                             p.ptr.dev, PTR_BUCKET_NR(ca, &p.ptr),
-                             bch2_data_type_str(ptr_data_type(k->k, &p.ptr)),
-                             p.ptr.gen, g->gen,
-                             (printbuf_reset(&buf),
-                              bch2_bkey_val_to_text(&buf, c, *k), buf.buf)))) {
+               if (fsck_err_on(gen_cmp(p.ptr.gen, g->gen) > 0,
+                               c, ptr_gen_newer_than_bucket_gen,
+                               "bucket %u:%zu data type %s ptr gen in the future: %u > %u\n"
+                               "while marking %s",
+                               p.ptr.dev, PTR_BUCKET_NR(ca, &p.ptr),
+                               bch2_data_type_str(ptr_data_type(k->k, &p.ptr)),
+                               p.ptr.gen, g->gen,
+                               (printbuf_reset(&buf),
+                                bch2_bkey_val_to_text(&buf, c, *k), buf.buf))) {
                        if (!p.ptr.cached) {
                                g->gen_valid            = true;
                                g->gen                  = p.ptr.gen;
@@ -631,28 +631,26 @@ static int bch2_check_fix_ptrs(struct btree_trans *trans, enum btree_id btree_id
                        }
                }
 
-               if (gen_cmp(g->gen, p.ptr.gen) > BUCKET_GC_GEN_MAX &&
-                   (c->opts.reconstruct_alloc ||
-                    fsck_err(c, ptr_gen_newer_than_bucket_gen,
-                             "bucket %u:%zu gen %u data type %s: ptr gen %u too stale\n"
-                             "while marking %s",
-                             p.ptr.dev, PTR_BUCKET_NR(ca, &p.ptr), g->gen,
-                             bch2_data_type_str(ptr_data_type(k->k, &p.ptr)),
-                             p.ptr.gen,
-                             (printbuf_reset(&buf),
-                              bch2_bkey_val_to_text(&buf, c, *k), buf.buf))))
+               if (fsck_err_on(gen_cmp(g->gen, p.ptr.gen) > BUCKET_GC_GEN_MAX,
+                               c, ptr_gen_newer_than_bucket_gen,
+                               "bucket %u:%zu gen %u data type %s: ptr gen %u too stale\n"
+                               "while marking %s",
+                               p.ptr.dev, PTR_BUCKET_NR(ca, &p.ptr), g->gen,
+                               bch2_data_type_str(ptr_data_type(k->k, &p.ptr)),
+                               p.ptr.gen,
+                               (printbuf_reset(&buf),
+                                bch2_bkey_val_to_text(&buf, c, *k), buf.buf)))
                        do_update = true;
 
-               if (!p.ptr.cached && gen_cmp(p.ptr.gen, g->gen) < 0 &&
-                   (c->opts.reconstruct_alloc ||
-                    fsck_err(c, stale_dirty_ptr,
-                             "bucket %u:%zu data type %s stale dirty ptr: %u < %u\n"
-                             "while marking %s",
-                             p.ptr.dev, PTR_BUCKET_NR(ca, &p.ptr),
-                             bch2_data_type_str(ptr_data_type(k->k, &p.ptr)),
-                             p.ptr.gen, g->gen,
-                             (printbuf_reset(&buf),
-                              bch2_bkey_val_to_text(&buf, c, *k), buf.buf))))
+               if (fsck_err_on(!p.ptr.cached && gen_cmp(p.ptr.gen, g->gen) < 0,
+                               c, stale_dirty_ptr,
+                               "bucket %u:%zu data type %s stale dirty ptr: %u < %u\n"
+                               "while marking %s",
+                               p.ptr.dev, PTR_BUCKET_NR(ca, &p.ptr),
+                               bch2_data_type_str(ptr_data_type(k->k, &p.ptr)),
+                               p.ptr.gen, g->gen,
+                               (printbuf_reset(&buf),
+                                bch2_bkey_val_to_text(&buf, c, *k), buf.buf)))
                        do_update = true;
 
                if (data_type != BCH_DATA_btree && p.ptr.gen != g->gen)
@@ -931,7 +929,7 @@ static int bch2_gc_btree_init_recurse(struct btree_trans *trans, struct btree *b
        struct printbuf buf = PRINTBUF;
        int ret = 0;
 
-       bch2_btree_and_journal_iter_init_node_iter(&iter, c, b);
+       bch2_btree_and_journal_iter_init_node_iter(trans, &iter, b);
        bch2_bkey_buf_init(&prev);
        bch2_bkey_buf_init(&cur);
        bkey_init(&prev.k->k);
@@ -963,7 +961,8 @@ static int bch2_gc_btree_init_recurse(struct btree_trans *trans, struct btree *b
 
        if (b->c.level > target_depth) {
                bch2_btree_and_journal_iter_exit(&iter);
-               bch2_btree_and_journal_iter_init_node_iter(&iter, c, b);
+               bch2_btree_and_journal_iter_init_node_iter(trans, &iter, b);
+               iter.prefetch = true;
 
                while ((k = bch2_btree_and_journal_iter_peek(&iter)).k) {
                        struct btree *child;
@@ -976,7 +975,7 @@ static int bch2_gc_btree_init_recurse(struct btree_trans *trans, struct btree *b
                                                false);
                        ret = PTR_ERR_OR_ZERO(child);
 
-                       if (ret == -EIO) {
+                       if (bch2_err_matches(ret, EIO)) {
                                bch2_topology_error(c);
 
                                if (__fsck_err(c,
@@ -1190,9 +1189,7 @@ static void bch2_gc_free(struct bch_fs *c)
        genradix_free(&c->gc_stripes);
 
        for_each_member_device(c, ca) {
-               kvpfree(rcu_dereference_protected(ca->buckets_gc, 1),
-                       sizeof(struct bucket_array) +
-                       ca->mi.nbuckets * sizeof(struct bucket));
+               kvfree(rcu_dereference_protected(ca->buckets_gc, 1));
                ca->buckets_gc = NULL;
 
                free_percpu(ca->usage_gc);
@@ -1365,11 +1362,10 @@ static int bch2_alloc_write_key(struct btree_trans *trans,
 {
        struct bch_fs *c = trans->c;
        struct bch_dev *ca = bch_dev_bkey_exists(c, iter->pos.inode);
-       struct bucket gc, *b;
+       struct bucket old_gc, gc, *b;
        struct bkey_i_alloc_v4 *a;
        struct bch_alloc_v4 old_convert, new;
        const struct bch_alloc_v4 *old;
-       enum bch_data_type type;
        int ret;
 
        old = bch2_alloc_to_v4(k, &old_convert);
@@ -1377,30 +1373,31 @@ static int bch2_alloc_write_key(struct btree_trans *trans,
 
        percpu_down_read(&c->mark_lock);
        b = gc_bucket(ca, iter->pos.offset);
+       old_gc = *b;
+
+       if ((old->data_type == BCH_DATA_sb ||
+            old->data_type == BCH_DATA_journal) &&
+           !bch2_dev_is_online(ca)) {
+               b->data_type = old->data_type;
+               b->dirty_sectors = old->dirty_sectors;
+       }
 
        /*
         * b->data_type doesn't yet include need_discard & need_gc_gen states -
         * fix that here:
         */
-       type = __alloc_data_type(b->dirty_sectors,
-                                b->cached_sectors,
-                                b->stripe,
-                                *old,
-                                b->data_type);
-       if (b->data_type != type) {
-               struct bch_dev_usage *u;
-
-               preempt_disable();
-               u = this_cpu_ptr(ca->usage_gc);
-               u->d[b->data_type].buckets--;
-               b->data_type = type;
-               u->d[b->data_type].buckets++;
-               preempt_enable();
-       }
-
+       b->data_type = __alloc_data_type(b->dirty_sectors,
+                                        b->cached_sectors,
+                                        b->stripe,
+                                        *old,
+                                        b->data_type);
        gc = *b;
        percpu_up_read(&c->mark_lock);
 
+       if (gc.data_type != old_gc.data_type ||
+           gc.dirty_sectors != old_gc.dirty_sectors)
+               bch2_dev_usage_update_m(c, ca, &old_gc, &gc);
+
        if (metadata_only &&
            gc.data_type != BCH_DATA_sb &&
            gc.data_type != BCH_DATA_journal &&
@@ -1410,8 +1407,7 @@ static int bch2_alloc_write_key(struct btree_trans *trans,
        if (gen_after(old->gen, gc.gen))
                return 0;
 
-       if (c->opts.reconstruct_alloc ||
-           fsck_err_on(new.data_type != gc.data_type, c,
+       if (fsck_err_on(new.data_type != gc.data_type, c,
                        alloc_key_data_type_wrong,
                        "bucket %llu:%llu gen %u has wrong data_type"
                        ": got %s, should be %s",
@@ -1422,8 +1418,7 @@ static int bch2_alloc_write_key(struct btree_trans *trans,
                new.data_type = gc.data_type;
 
 #define copy_bucket_field(_errtype, _f)                                        \
-       if (c->opts.reconstruct_alloc ||                                \
-           fsck_err_on(new._f != gc._f, c, _errtype,                   \
+       if (fsck_err_on(new._f != gc._f, c, _errtype,                   \
                        "bucket %llu:%llu gen %u data type %s has wrong " #_f   \
                        ": got %u, should be %u",                       \
                        iter->pos.inode, iter->pos.offset,              \
@@ -1491,7 +1486,7 @@ static int bch2_gc_alloc_done(struct bch_fs *c, bool metadata_only)
 static int bch2_gc_alloc_start(struct bch_fs *c, bool metadata_only)
 {
        for_each_member_device(c, ca) {
-               struct bucket_array *buckets = kvpmalloc(sizeof(struct bucket_array) +
+               struct bucket_array *buckets = kvmalloc(sizeof(struct bucket_array) +
                                ca->mi.nbuckets * sizeof(struct bucket),
                                GFP_KERNEL|__GFP_ZERO);
                if (!buckets) {
@@ -1585,8 +1580,7 @@ static int bch2_gc_write_reflink_key(struct btree_trans *trans,
                        "  should be %u",
                        (bch2_bkey_val_to_text(&buf, c, k), buf.buf),
                        r->refcount)) {
-               struct bkey_i *new = bch2_bkey_make_mut(trans, iter, &k, 0);
-
+               struct bkey_i *new = bch2_bkey_make_mut_noupdate(trans, k);
                ret = PTR_ERR_OR_ZERO(new);
                if (ret)
                        return ret;
@@ -1595,6 +1589,7 @@ static int bch2_gc_write_reflink_key(struct btree_trans *trans,
                        new->k.type = KEY_TYPE_deleted;
                else
                        *bkey_refcount(bkey_i_to_s(new)) = cpu_to_le64(r->refcount);
+               ret = bch2_trans_update(trans, iter, new, 0);
        }
 fsck_err:
        printbuf_exit(&buf);
@@ -1817,10 +1812,10 @@ out:
        if (!ret) {
                bch2_journal_block(&c->journal);
 
-               ret   = bch2_gc_stripes_done(c, metadata_only) ?:
-                       bch2_gc_reflink_done(c, metadata_only) ?:
-                       bch2_gc_alloc_done(c, metadata_only) ?:
-                       bch2_gc_done(c, initial, metadata_only);
+               ret   = bch2_gc_alloc_done(c, metadata_only) ?:
+                       bch2_gc_done(c, initial, metadata_only) ?:
+                       bch2_gc_stripes_done(c, metadata_only) ?:
+                       bch2_gc_reflink_done(c, metadata_only);
 
                bch2_journal_unblock(&c->journal);
        }
index aa9b6cbe3226909626411b886731a8bb8648a558..624c8287deb43191d39b130c842875f7aa1a9ff1 100644 (file)
@@ -103,7 +103,7 @@ static void btree_bounce_free(struct bch_fs *c, size_t size,
        if (used_mempool)
                mempool_free(p, &c->btree_bounce_pool);
        else
-               vpfree(p, size);
+               kvfree(p);
 }
 
 static void *btree_bounce_alloc(struct bch_fs *c, size_t size,
@@ -115,7 +115,7 @@ static void *btree_bounce_alloc(struct bch_fs *c, size_t size,
        BUG_ON(size > c->opts.btree_node_size);
 
        *used_mempool = false;
-       p = vpmalloc(size, __GFP_NOWARN|GFP_NOWAIT);
+       p = kvmalloc(size, __GFP_NOWARN|GFP_NOWAIT);
        if (!p) {
                *used_mempool = true;
                p = mempool_alloc(&c->btree_bounce_pool, GFP_NOFS);
@@ -581,8 +581,7 @@ static int __btree_err(int ret,
                break;
        case -BCH_ERR_btree_node_read_err_bad_node:
                bch2_print_string_as_lines(KERN_ERR, out.buf);
-               bch2_topology_error(c);
-               ret = bch2_run_explicit_recovery_pass(c, BCH_RECOVERY_PASS_check_topology) ?: -EIO;
+               ret = bch2_topology_error(c);
                break;
        case -BCH_ERR_btree_node_read_err_incompatible:
                bch2_print_string_as_lines(KERN_ERR, out.buf);
@@ -840,6 +839,9 @@ static bool __bkey_valid(struct bch_fs *c, struct btree *b,
        if (k->format > KEY_FORMAT_CURRENT)
                return false;
 
+       if (k->u64s < bkeyp_key_u64s(&b->format, k))
+               return false;
+
        struct printbuf buf = PRINTBUF;
        struct bkey tmp;
        struct bkey_s u = __bkey_disassemble(b, k, &tmp);
@@ -881,7 +883,13 @@ static int validate_bset_keys(struct bch_fs *c, struct btree *b,
                                 "invalid bkey format %u", k->format))
                        goto drop_this_key;
 
-               /* XXX: validate k->u64s */
+               if (btree_err_on(k->u64s < bkeyp_key_u64s(&b->format, k),
+                                -BCH_ERR_btree_node_read_err_fixable,
+                                c, NULL, b, i,
+                                btree_node_bkey_bad_u64s,
+                                "k->u64s too small (%u < %u)", k->u64s, bkeyp_key_u64s(&b->format, k)))
+                       goto drop_this_key;
+
                if (!write)
                        bch2_bkey_compat(b->c.level, b->c.btree_id, version,
                                    BSET_BIG_ENDIAN(i), write,
@@ -1737,7 +1745,7 @@ static int __bch2_btree_root_read(struct btree_trans *trans, enum btree_id id,
                list_move(&b->list, &c->btree_cache.freeable);
                mutex_unlock(&c->btree_cache.lock);
 
-               ret = -EIO;
+               ret = -BCH_ERR_btree_node_read_error;
                goto err;
        }
 
@@ -1841,7 +1849,7 @@ static void btree_node_write_work(struct work_struct *work)
                bch2_dev_list_has_dev(wbio->wbio.failed, ptr->dev));
 
        if (!bch2_bkey_nr_ptrs(bkey_i_to_s_c(&wbio->key))) {
-               ret = -BCH_ERR_btree_write_all_failed;
+               ret = -BCH_ERR_btree_node_write_all_failed;
                goto err;
        }
 
index 3ef338df82f5e46228f583a85a7cacdba233a64b..51bcdc6c6d1cda83be21b43b54d0d11f320a0471 100644 (file)
@@ -891,7 +891,7 @@ static noinline int btree_node_iter_and_journal_peek(struct btree_trans *trans,
        struct bkey_s_c k;
        int ret = 0;
 
-       __bch2_btree_and_journal_iter_init_node_iter(&jiter, c, l->b, l->iter, path->pos);
+       __bch2_btree_and_journal_iter_init_node_iter(trans, &jiter, l->b, l->iter, path->pos);
 
        k = bch2_btree_and_journal_iter_peek(&jiter);
 
@@ -1146,7 +1146,7 @@ int bch2_btree_path_traverse_one(struct btree_trans *trans,
        path = &trans->paths[path_idx];
 
        if (unlikely(path->level >= BTREE_MAX_DEPTH))
-               goto out;
+               goto out_uptodate;
 
        path->level = btree_path_up_until_good_node(trans, path, 0);
 
@@ -1179,7 +1179,7 @@ int bch2_btree_path_traverse_one(struct btree_trans *trans,
                        goto out;
                }
        }
-
+out_uptodate:
        path->uptodate = BTREE_ITER_UPTODATE;
 out:
        if (bch2_err_matches(ret, BCH_ERR_transaction_restart) != !!trans->restarted)
@@ -1520,7 +1520,7 @@ static noinline void btree_paths_realloc(struct btree_trans *trans)
 {
        unsigned nr = trans->nr_paths * 2;
 
-       void *p = kzalloc(BITS_TO_LONGS(nr) * sizeof(unsigned long) +
+       void *p = kvzalloc(BITS_TO_LONGS(nr) * sizeof(unsigned long) +
                          sizeof(struct btree_trans_paths) +
                          nr * sizeof(struct btree_path) +
                          nr * sizeof(btree_path_idx_t) + 8 +
@@ -1729,7 +1729,9 @@ bch2_btree_iter_traverse(struct btree_iter *iter)
        if (ret)
                return ret;
 
-       btree_path_set_should_be_locked(trans->paths + iter->path);
+       struct btree_path *path = btree_iter_path(trans, iter);
+       if (btree_path_node(path, path->level))
+               btree_path_set_should_be_locked(path);
        return 0;
 }
 
@@ -2305,7 +2307,7 @@ struct bkey_s_c bch2_btree_iter_peek_prev(struct btree_iter *iter)
                btree_iter_path(trans, iter)->level);
 
        if (iter->flags & BTREE_ITER_WITH_JOURNAL)
-               return bkey_s_c_err(-EIO);
+               return bkey_s_c_err(-BCH_ERR_btree_iter_with_journal_not_supported);
 
        bch2_btree_iter_verify(iter);
        bch2_btree_iter_verify_entry_exit(iter);
@@ -2503,6 +2505,7 @@ struct bkey_s_c bch2_btree_iter_peek_slot(struct btree_iter *iter)
                        k = bch2_btree_iter_peek_upto(&iter2, end);
 
                        if (k.k && !bkey_err(k)) {
+                               swap(iter->key_cache_path, iter2.key_cache_path);
                                iter->k = iter2.k;
                                k.k = &iter->k;
                        }
@@ -2762,6 +2765,9 @@ void bch2_trans_copy_iter(struct btree_iter *dst, struct btree_iter *src)
        struct btree_trans *trans = src->trans;
 
        *dst = *src;
+#ifdef TRACK_PATH_ALLOCATED
+       dst->ip_allocated = _RET_IP_;
+#endif
        if (src->path)
                __btree_path_get(trans->paths + src->path, src->flags & BTREE_ITER_INTENT);
        if (src->update_path)
@@ -3085,7 +3091,7 @@ void bch2_trans_put(struct btree_trans *trans)
        trans->paths            = NULL;
 
        if (paths_allocated != trans->_paths_allocated)
-               kfree_rcu_mightsleep(paths_allocated);
+               kvfree_rcu_mightsleep(paths_allocated);
 
        if (trans->mem_bytes == BTREE_TRANS_MEM_MAX)
                mempool_free(trans->mem, &c->btree_trans_mem_pool);
index 719a94a84950b7fe2d179b4860c2eed727044417..50e04356d72c8ea0f920545561b4797875afc03f 100644 (file)
@@ -1,7 +1,9 @@
 // SPDX-License-Identifier: GPL-2.0
 
 #include "bcachefs.h"
+#include "bkey_buf.h"
 #include "bset.h"
+#include "btree_cache.h"
 #include "btree_journal_iter.h"
 #include "journal_io.h"
 
@@ -40,7 +42,7 @@ static inline size_t idx_to_pos(struct journal_keys *keys, size_t idx)
 
 static inline struct journal_key *idx_to_key(struct journal_keys *keys, size_t idx)
 {
-       return keys->d + idx_to_pos(keys, idx);
+       return keys->data + idx_to_pos(keys, idx);
 }
 
 static size_t __bch2_journal_key_search(struct journal_keys *keys,
@@ -180,10 +182,10 @@ int bch2_journal_key_insert_take(struct bch_fs *c, enum btree_id id,
        BUG_ON(test_bit(BCH_FS_rw, &c->flags));
 
        if (idx < keys->size &&
-           journal_key_cmp(&n, &keys->d[idx]) == 0) {
-               if (keys->d[idx].allocated)
-                       kfree(keys->d[idx].k);
-               keys->d[idx] = n;
+           journal_key_cmp(&n, &keys->data[idx]) == 0) {
+               if (keys->data[idx].allocated)
+                       kfree(keys->data[idx].k);
+               keys->data[idx] = n;
                return 0;
        }
 
@@ -196,17 +198,17 @@ int bch2_journal_key_insert_take(struct bch_fs *c, enum btree_id id,
                        .size                   = max_t(size_t, keys->size, 8) * 2,
                };
 
-               new_keys.d = kvmalloc_array(new_keys.size, sizeof(new_keys.d[0]), GFP_KERNEL);
-               if (!new_keys.d) {
+               new_keys.data = kvmalloc_array(new_keys.size, sizeof(new_keys.data[0]), GFP_KERNEL);
+               if (!new_keys.data) {
                        bch_err(c, "%s: error allocating new key array (size %zu)",
                                __func__, new_keys.size);
                        return -BCH_ERR_ENOMEM_journal_key_insert;
                }
 
                /* Since @keys was full, there was no gap: */
-               memcpy(new_keys.d, keys->d, sizeof(keys->d[0]) * keys->nr);
-               kvfree(keys->d);
-               keys->d         = new_keys.d;
+               memcpy(new_keys.data, keys->data, sizeof(keys->data[0]) * keys->nr);
+               kvfree(keys->data);
+               keys->data      = new_keys.data;
                keys->nr        = new_keys.nr;
                keys->size      = new_keys.size;
 
@@ -216,11 +218,10 @@ int bch2_journal_key_insert_take(struct bch_fs *c, enum btree_id id,
 
        journal_iters_move_gap(c, keys->gap, idx);
 
-       move_gap(keys->d, keys->nr, keys->size, keys->gap, idx);
-       keys->gap = idx;
+       move_gap(keys, idx);
 
        keys->nr++;
-       keys->d[keys->gap++] = n;
+       keys->data[keys->gap++] = n;
 
        journal_iters_fix(c);
 
@@ -267,10 +268,10 @@ void bch2_journal_key_overwritten(struct bch_fs *c, enum btree_id btree,
        size_t idx = bch2_journal_key_search(keys, btree, level, pos);
 
        if (idx < keys->size &&
-           keys->d[idx].btree_id       == btree &&
-           keys->d[idx].level          == level &&
-           bpos_eq(keys->d[idx].k->k.p, pos))
-               keys->d[idx].overwritten = true;
+           keys->data[idx].btree_id    == btree &&
+           keys->data[idx].level       == level &&
+           bpos_eq(keys->data[idx].k->k.p, pos))
+               keys->data[idx].overwritten = true;
 }
 
 static void bch2_journal_iter_advance(struct journal_iter *iter)
@@ -284,16 +285,16 @@ static void bch2_journal_iter_advance(struct journal_iter *iter)
 
 static struct bkey_s_c bch2_journal_iter_peek(struct journal_iter *iter)
 {
-       struct journal_key *k = iter->keys->d + iter->idx;
+       struct journal_key *k = iter->keys->data + iter->idx;
 
-       while (k < iter->keys->d + iter->keys->size &&
+       while (k < iter->keys->data + iter->keys->size &&
               k->btree_id      == iter->btree_id &&
               k->level         == iter->level) {
                if (!k->overwritten)
                        return bkey_i_to_s_c(k->k);
 
                bch2_journal_iter_advance(iter);
-               k = iter->keys->d + iter->idx;
+               k = iter->keys->data + iter->idx;
        }
 
        return bkey_s_c_null;
@@ -334,9 +335,38 @@ void bch2_btree_and_journal_iter_advance(struct btree_and_journal_iter *iter)
                iter->pos = bpos_successor(iter->pos);
 }
 
+static void btree_and_journal_iter_prefetch(struct btree_and_journal_iter *_iter)
+{
+       struct btree_and_journal_iter iter = *_iter;
+       struct bch_fs *c = iter.trans->c;
+       unsigned level = iter.journal.level;
+       struct bkey_buf tmp;
+       unsigned nr = test_bit(BCH_FS_started, &c->flags)
+               ? (level > 1 ? 0 :  2)
+               : (level > 1 ? 1 : 16);
+
+       iter.prefetch = false;
+       bch2_bkey_buf_init(&tmp);
+
+       while (nr--) {
+               bch2_btree_and_journal_iter_advance(&iter);
+               struct bkey_s_c k = bch2_btree_and_journal_iter_peek(&iter);
+               if (!k.k)
+                       break;
+
+               bch2_bkey_buf_reassemble(&tmp, c, k);
+               bch2_btree_node_prefetch(iter.trans, NULL, tmp.k, iter.journal.btree_id, level - 1);
+       }
+
+       bch2_bkey_buf_exit(&tmp, c);
+}
+
 struct bkey_s_c bch2_btree_and_journal_iter_peek(struct btree_and_journal_iter *iter)
 {
        struct bkey_s_c btree_k, journal_k, ret;
+
+       if (iter->prefetch && iter->journal.level)
+               btree_and_journal_iter_prefetch(iter);
 again:
        if (iter->at_end)
                return bkey_s_c_null;
@@ -376,17 +406,18 @@ void bch2_btree_and_journal_iter_exit(struct btree_and_journal_iter *iter)
        bch2_journal_iter_exit(&iter->journal);
 }
 
-void __bch2_btree_and_journal_iter_init_node_iter(struct btree_and_journal_iter *iter,
-                                                 struct bch_fs *c,
+void __bch2_btree_and_journal_iter_init_node_iter(struct btree_trans *trans,
+                                                 struct btree_and_journal_iter *iter,
                                                  struct btree *b,
                                                  struct btree_node_iter node_iter,
                                                  struct bpos pos)
 {
        memset(iter, 0, sizeof(*iter));
 
+       iter->trans = trans;
        iter->b = b;
        iter->node_iter = node_iter;
-       bch2_journal_iter_init(c, &iter->journal, b->c.btree_id, b->c.level, pos);
+       bch2_journal_iter_init(trans->c, &iter->journal, b->c.btree_id, b->c.level, pos);
        INIT_LIST_HEAD(&iter->journal.list);
        iter->pos = b->data->min_key;
        iter->at_end = false;
@@ -396,15 +427,15 @@ void __bch2_btree_and_journal_iter_init_node_iter(struct btree_and_journal_iter
  * this version is used by btree_gc before filesystem has gone RW and
  * multithreaded, so uses the journal_iters list:
  */
-void bch2_btree_and_journal_iter_init_node_iter(struct btree_and_journal_iter *iter,
-                                               struct bch_fs *c,
+void bch2_btree_and_journal_iter_init_node_iter(struct btree_trans *trans,
+                                               struct btree_and_journal_iter *iter,
                                                struct btree *b)
 {
        struct btree_node_iter node_iter;
 
        bch2_btree_node_iter_init_from_start(&node_iter, b);
-       __bch2_btree_and_journal_iter_init_node_iter(iter, c, b, node_iter, b->data->min_key);
-       list_add(&iter->journal.list, &c->journal_iters);
+       __bch2_btree_and_journal_iter_init_node_iter(trans, iter, b, node_iter, b->data->min_key);
+       list_add(&iter->journal.list, &trans->c->journal_iters);
 }
 
 /* sort and dedup all keys in the journal: */
@@ -415,9 +446,7 @@ void bch2_journal_entries_free(struct bch_fs *c)
        struct genradix_iter iter;
 
        genradix_for_each(&c->journal_entries, iter, i)
-               if (*i)
-                       kvpfree(*i, offsetof(struct journal_replay, j) +
-                               vstruct_bytes(&(*i)->j));
+               kvfree(*i);
        genradix_free(&c->journal_entries);
 }
 
@@ -437,22 +466,20 @@ static int journal_sort_key_cmp(const void *_l, const void *_r)
 void bch2_journal_keys_put(struct bch_fs *c)
 {
        struct journal_keys *keys = &c->journal_keys;
-       struct journal_key *i;
 
        BUG_ON(atomic_read(&keys->ref) <= 0);
 
        if (!atomic_dec_and_test(&keys->ref))
                return;
 
-       move_gap(keys->d, keys->nr, keys->size, keys->gap, keys->nr);
-       keys->gap = keys->nr;
+       move_gap(keys, keys->nr);
 
-       for (i = keys->d; i < keys->d + keys->nr; i++)
+       darray_for_each(*keys, i)
                if (i->allocated)
                        kfree(i->k);
 
-       kvfree(keys->d);
-       keys->d = NULL;
+       kvfree(keys->data);
+       keys->data = NULL;
        keys->nr = keys->gap = keys->size = 0;
 
        bch2_journal_entries_free(c);
@@ -460,83 +487,38 @@ void bch2_journal_keys_put(struct bch_fs *c)
 
 static void __journal_keys_sort(struct journal_keys *keys)
 {
-       struct journal_key *src, *dst;
+       sort(keys->data, keys->nr, sizeof(keys->data[0]), journal_sort_key_cmp, NULL);
 
-       sort(keys->d, keys->nr, sizeof(keys->d[0]), journal_sort_key_cmp, NULL);
+       struct journal_key *dst = keys->data;
 
-       src = dst = keys->d;
-       while (src < keys->d + keys->nr) {
-               while (src + 1 < keys->d + keys->nr &&
-                      !journal_key_cmp(src, src + 1))
-                       src++;
+       darray_for_each(*keys, src) {
+               if (src + 1 < &darray_top(*keys) &&
+                   !journal_key_cmp(src, src + 1))
+                       continue;
 
-               *dst++ = *src++;
+               *dst++ = *src;
        }
 
-       keys->nr = dst - keys->d;
+       keys->nr = dst - keys->data;
 }
 
 int bch2_journal_keys_sort(struct bch_fs *c)
 {
        struct genradix_iter iter;
        struct journal_replay *i, **_i;
-       struct jset_entry *entry;
-       struct bkey_i *k;
        struct journal_keys *keys = &c->journal_keys;
-       size_t nr_keys = 0, nr_read = 0;
-
-       genradix_for_each(&c->journal_entries, iter, _i) {
-               i = *_i;
-
-               if (!i || i->ignore)
-                       continue;
-
-               for_each_jset_key(k, entry, &i->j)
-                       nr_keys++;
-       }
-
-       if (!nr_keys)
-               return 0;
-
-       keys->size = roundup_pow_of_two(nr_keys);
-
-       keys->d = kvmalloc_array(keys->size, sizeof(keys->d[0]), GFP_KERNEL);
-       if (!keys->d) {
-               bch_err(c, "Failed to allocate buffer for sorted journal keys (%zu keys); trying slowpath",
-                       nr_keys);
-
-               do {
-                       keys->size >>= 1;
-                       keys->d = kvmalloc_array(keys->size, sizeof(keys->d[0]), GFP_KERNEL);
-               } while (!keys->d && keys->size > nr_keys / 8);
-
-               if (!keys->d) {
-                       bch_err(c, "Failed to allocate %zu size buffer for sorted journal keys; exiting",
-                               keys->size);
-                       return -BCH_ERR_ENOMEM_journal_keys_sort;
-               }
-       }
+       size_t nr_read = 0;
 
        genradix_for_each(&c->journal_entries, iter, _i) {
                i = *_i;
 
-               if (!i || i->ignore)
+               if (journal_replay_ignore(i))
                        continue;
 
                cond_resched();
 
                for_each_jset_key(k, entry, &i->j) {
-                       if (keys->nr == keys->size) {
-                               __journal_keys_sort(keys);
-
-                               if (keys->nr > keys->size * 7 / 8) {
-                                       bch_err(c, "Too many journal keys for slowpath; have %zu compacted, buf size %zu, processed %zu/%zu",
-                                               keys->nr, keys->size, nr_read, nr_keys);
-                                       return -BCH_ERR_ENOMEM_journal_keys_sort;
-                               }
-                       }
-
-                       keys->d[keys->nr++] = (struct journal_key) {
+                       struct journal_key n = (struct journal_key) {
                                .btree_id       = entry->btree_id,
                                .level          = entry->level,
                                .k              = k,
@@ -544,6 +526,18 @@ int bch2_journal_keys_sort(struct bch_fs *c)
                                .journal_offset = k->_data - i->j._data,
                        };
 
+                       if (darray_push(keys, n)) {
+                               __journal_keys_sort(keys);
+
+                               if (keys->nr * 8 > keys->size * 7) {
+                                       bch_err(c, "Too many journal keys for slowpath; have %zu compacted, buf size %zu, processed %zu keys at seq %llu",
+                                               keys->nr, keys->size, nr_read, le64_to_cpu(i->j.seq));
+                                       return -BCH_ERR_ENOMEM_journal_keys_sort;
+                               }
+
+                               BUG_ON(darray_push(keys, n));
+                       }
+
                        nr_read++;
                }
        }
@@ -551,6 +545,6 @@ int bch2_journal_keys_sort(struct bch_fs *c)
        __journal_keys_sort(keys);
        keys->gap = keys->nr;
 
-       bch_verbose(c, "Journal keys: %zu read, %zu after sorting and compacting", nr_keys, keys->nr);
+       bch_verbose(c, "Journal keys: %zu read, %zu after sorting and compacting", nr_read, keys->nr);
        return 0;
 }
index 8ca4c100b2e3e413d7adbb8dd5599d9f42de6d30..c9d19da3ea04803a360a683fa0e01a2838f2433f 100644 (file)
@@ -15,6 +15,7 @@ struct journal_iter {
  */
 
 struct btree_and_journal_iter {
+       struct btree_trans      *trans;
        struct btree            *b;
        struct btree_node_iter  node_iter;
        struct bkey             unpacked;
@@ -22,6 +23,7 @@ struct btree_and_journal_iter {
        struct journal_iter     journal;
        struct bpos             pos;
        bool                    at_end;
+       bool                    prefetch;
 };
 
 struct bkey_i *bch2_journal_keys_peek_upto(struct bch_fs *, enum btree_id,
@@ -29,6 +31,9 @@ struct bkey_i *bch2_journal_keys_peek_upto(struct bch_fs *, enum btree_id,
 struct bkey_i *bch2_journal_keys_peek_slot(struct bch_fs *, enum btree_id,
                                           unsigned, struct bpos);
 
+int bch2_btree_and_journal_iter_prefetch(struct btree_trans *, struct btree_path *,
+                                        struct btree_and_journal_iter *);
+
 int bch2_journal_key_insert_take(struct bch_fs *, enum btree_id,
                                 unsigned, struct bkey_i *);
 int bch2_journal_key_insert(struct bch_fs *, enum btree_id,
@@ -42,12 +47,11 @@ void bch2_btree_and_journal_iter_advance(struct btree_and_journal_iter *);
 struct bkey_s_c bch2_btree_and_journal_iter_peek(struct btree_and_journal_iter *);
 
 void bch2_btree_and_journal_iter_exit(struct btree_and_journal_iter *);
-void __bch2_btree_and_journal_iter_init_node_iter(struct btree_and_journal_iter *,
-                               struct bch_fs *, struct btree *,
+void __bch2_btree_and_journal_iter_init_node_iter(struct btree_trans *,
+                               struct btree_and_journal_iter *, struct btree *,
                                struct btree_node_iter, struct bpos);
-void bch2_btree_and_journal_iter_init_node_iter(struct btree_and_journal_iter *,
-                                               struct bch_fs *,
-                                               struct btree *);
+void bch2_btree_and_journal_iter_init_node_iter(struct btree_trans *,
+                               struct btree_and_journal_iter *, struct btree *);
 
 void bch2_journal_keys_put(struct bch_fs *);
 
index 74e52fd28abe584617d2d7ccd2c09b8a46db1603..8a71d43444b9425808ff1db86ec0fd296c9900ff 100644 (file)
@@ -380,9 +380,11 @@ static int btree_key_cache_fill(struct btree_trans *trans,
        struct bkey_i *new_k = NULL;
        int ret;
 
-       k = bch2_bkey_get_iter(trans, &iter, ck->key.btree_id, ck->key.pos,
-                              BTREE_ITER_KEY_CACHE_FILL|
-                              BTREE_ITER_CACHED_NOFILL);
+       bch2_trans_iter_init(trans, &iter, ck->key.btree_id, ck->key.pos,
+                            BTREE_ITER_KEY_CACHE_FILL|
+                            BTREE_ITER_CACHED_NOFILL);
+       iter.flags &= ~BTREE_ITER_WITH_JOURNAL;
+       k = bch2_btree_iter_peek_slot(&iter);
        ret = bkey_err(k);
        if (ret)
                goto err;
index 6843974423381029e7a8cf24fd4cd5c6c33627cd..b9b151e693ed60ecc3dc9147cc34902643cfc7aa 100644 (file)
@@ -747,7 +747,8 @@ void bch2_trans_downgrade(struct btree_trans *trans)
                return;
 
        trans_for_each_path(trans, path, i)
-               bch2_btree_path_downgrade(trans, path);
+               if (path->ref)
+                       bch2_btree_path_downgrade(trans, path);
 }
 
 int bch2_trans_relock(struct btree_trans *trans)
index 4a5a64499eb76698743ae7f20b4e47eaca09b868..9404d96c38f3b368726a6603b601b241b5106100 100644 (file)
@@ -5,6 +5,7 @@
 #include <linux/list.h>
 #include <linux/rhashtable.h>
 
+#include "bbpos_types.h"
 #include "btree_key_cache_types.h"
 #include "buckets_types.h"
 #include "darray.h"
@@ -173,6 +174,11 @@ struct btree_cache {
         */
        struct task_struct      *alloc_lock;
        struct closure_waitlist alloc_wait;
+
+       struct bbpos            pinned_nodes_start;
+       struct bbpos            pinned_nodes_end;
+       u64                     pinned_nodes_leaf_mask;
+       u64                     pinned_nodes_interior_mask;
 };
 
 struct btree_node_iter {
@@ -654,6 +660,7 @@ const char *bch2_btree_node_type_str(enum btree_node_type);
         BIT_ULL(BKEY_TYPE_inodes)|                     \
         BIT_ULL(BKEY_TYPE_stripes)|                    \
         BIT_ULL(BKEY_TYPE_reflink)|                    \
+        BIT_ULL(BKEY_TYPE_subvolumes)|                 \
         BIT_ULL(BKEY_TYPE_btree))
 
 #define BTREE_NODE_TYPE_HAS_ATOMIC_TRIGGERS            \
@@ -727,7 +734,7 @@ struct btree_root {
        __BKEY_PADDED(key, BKEY_BTREE_PTR_VAL_U64s_MAX);
        u8                      level;
        u8                      alive;
-       s                     error;
+       s16                     error;
 };
 
 enum btree_gc_coalesce_fail_reason {
index c3ff365acce9afeae894c69003d247bef9c8e955..a4b40c1656a54b0a13c9f562d337827387a44b15 100644 (file)
@@ -452,7 +452,7 @@ bch2_trans_update_by_path(struct btree_trans *trans, btree_path_idx_t path_idx,
         * the key cache - but the key has to exist in the btree for that to
         * work:
         */
-       if (path->cached && bkey_deleted(&i->old_k))
+       if (path->cached && !i->old_btree_u64s)
                return flush_new_cached_update(trans, i, flags, ip);
 
        return 0;
@@ -787,6 +787,27 @@ int bch2_btree_delete_range(struct bch_fs *c, enum btree_id id,
 
 int bch2_btree_bit_mod(struct btree_trans *trans, enum btree_id btree,
                       struct bpos pos, bool set)
+{
+       struct bkey_i *k = bch2_trans_kmalloc(trans, sizeof(*k));
+       int ret = PTR_ERR_OR_ZERO(k);
+       if (ret)
+               return ret;
+
+       bkey_init(&k->k);
+       k->k.type = set ? KEY_TYPE_set : KEY_TYPE_deleted;
+       k->k.p = pos;
+
+       struct btree_iter iter;
+       bch2_trans_iter_init(trans, &iter, btree, pos, BTREE_ITER_INTENT);
+
+       ret   = bch2_btree_iter_traverse(&iter) ?:
+               bch2_trans_update(trans, &iter, k, 0);
+       bch2_trans_iter_exit(trans, &iter);
+       return ret;
+}
+
+int bch2_btree_bit_mod_buffered(struct btree_trans *trans, enum btree_id btree,
+                               struct bpos pos, bool set)
 {
        struct bkey_i k;
 
index b9382b7b288b6a6189d191886511a3ee57187634..cc7c53e83f89dd5cdc8ccccab214ae3cbfd3fb72 100644 (file)
@@ -63,11 +63,12 @@ int bch2_btree_delete_range(struct bch_fs *, enum btree_id,
                            struct bpos, struct bpos, unsigned, u64 *);
 
 int bch2_btree_bit_mod(struct btree_trans *, enum btree_id, struct bpos, bool);
+int bch2_btree_bit_mod_buffered(struct btree_trans *, enum btree_id, struct bpos, bool);
 
 static inline int bch2_btree_delete_at_buffered(struct btree_trans *trans,
                                                enum btree_id btree, struct bpos pos)
 {
-       return bch2_btree_bit_mod(trans, btree, pos, false);
+       return bch2_btree_bit_mod_buffered(trans, btree, pos, false);
 }
 
 int __bch2_insert_snapshot_whiteouts(struct btree_trans *, enum btree_id,
index 4530b14ff2c3717ec15e92615385c04e185e28e1..642213ef9f798e477bc902e4977be9cb813aab56 100644 (file)
@@ -25,8 +25,7 @@
 #include <linux/random.h>
 
 static int bch2_btree_insert_node(struct btree_update *, struct btree_trans *,
-                                 btree_path_idx_t, struct btree *,
-                                 struct keylist *, unsigned);
+                                 btree_path_idx_t, struct btree *, struct keylist *);
 static void bch2_btree_update_add_new_node(struct btree_update *, struct btree *);
 
 static btree_path_idx_t get_unlocked_mut_path(struct btree_trans *trans,
@@ -1208,10 +1207,6 @@ static void bch2_btree_set_root_inmem(struct bch_fs *c, struct btree *b)
        mutex_unlock(&c->btree_cache.lock);
 
        mutex_lock(&c->btree_root_lock);
-       BUG_ON(btree_node_root(c, b) &&
-              (b->c.level < btree_node_root(c, b)->c.level ||
-               !btree_node_dying(btree_node_root(c, b))));
-
        bch2_btree_id_root(c, b->c.btree_id)->b = b;
        mutex_unlock(&c->btree_root_lock);
 
@@ -1477,7 +1472,7 @@ static void btree_split_insert_keys(struct btree_update *as,
 
 static int btree_split(struct btree_update *as, struct btree_trans *trans,
                       btree_path_idx_t path, struct btree *b,
-                      struct keylist *keys, unsigned flags)
+                      struct keylist *keys)
 {
        struct bch_fs *c = as->c;
        struct btree *parent = btree_node_parent(trans->paths + path, b);
@@ -1578,7 +1573,7 @@ static int btree_split(struct btree_update *as, struct btree_trans *trans,
 
        if (parent) {
                /* Split a non root node */
-               ret = bch2_btree_insert_node(as, trans, path, parent, &as->parent_keys, flags);
+               ret = bch2_btree_insert_node(as, trans, path, parent, &as->parent_keys);
                if (ret)
                        goto err;
        } else if (n3) {
@@ -1673,7 +1668,6 @@ bch2_btree_insert_keys_interior(struct btree_update *as,
  * @path_idx:          path that points to current node
  * @b:                 node to insert keys into
  * @keys:              list of keys to insert
- * @flags:             transaction commit flags
  *
  * Returns: 0 on success, typically transaction restart error on failure
  *
@@ -1683,7 +1677,7 @@ bch2_btree_insert_keys_interior(struct btree_update *as,
  */
 static int bch2_btree_insert_node(struct btree_update *as, struct btree_trans *trans,
                                  btree_path_idx_t path_idx, struct btree *b,
-                                 struct keylist *keys, unsigned flags)
+                                 struct keylist *keys)
 {
        struct bch_fs *c = as->c;
        struct btree_path *path = trans->paths + path_idx;
@@ -1739,7 +1733,7 @@ split:
                return btree_trans_restart(trans, BCH_ERR_transaction_restart_split_race);
        }
 
-       return btree_split(as, trans, path_idx, b, keys, flags);
+       return btree_split(as, trans, path_idx, b, keys);
 }
 
 int bch2_btree_split_leaf(struct btree_trans *trans,
@@ -1747,7 +1741,6 @@ int bch2_btree_split_leaf(struct btree_trans *trans,
                          unsigned flags)
 {
        /* btree_split & merge may both cause paths array to be reallocated */
-
        struct btree *b = path_l(trans->paths + path)->b;
        struct btree_update *as;
        unsigned l;
@@ -1759,7 +1752,7 @@ int bch2_btree_split_leaf(struct btree_trans *trans,
        if (IS_ERR(as))
                return PTR_ERR(as);
 
-       ret = btree_split(as, trans, path, b, NULL, flags);
+       ret = btree_split(as, trans, path, b, NULL);
        if (ret) {
                bch2_btree_update_free(as, trans);
                return ret;
@@ -1775,6 +1768,60 @@ int bch2_btree_split_leaf(struct btree_trans *trans,
        return ret;
 }
 
+static void __btree_increase_depth(struct btree_update *as, struct btree_trans *trans,
+                                  btree_path_idx_t path_idx)
+{
+       struct bch_fs *c = as->c;
+       struct btree_path *path = trans->paths + path_idx;
+       struct btree *n, *b = bch2_btree_id_root(c, path->btree_id)->b;
+
+       BUG_ON(!btree_node_locked(path, b->c.level));
+
+       n = __btree_root_alloc(as, trans, b->c.level + 1);
+
+       bch2_btree_update_add_new_node(as, n);
+       six_unlock_write(&n->c.lock);
+
+       path->locks_want++;
+       BUG_ON(btree_node_locked(path, n->c.level));
+       six_lock_increment(&n->c.lock, SIX_LOCK_intent);
+       mark_btree_node_locked(trans, path, n->c.level, BTREE_NODE_INTENT_LOCKED);
+       bch2_btree_path_level_init(trans, path, n);
+
+       n->sib_u64s[0] = U16_MAX;
+       n->sib_u64s[1] = U16_MAX;
+
+       bch2_keylist_add(&as->parent_keys, &b->key);
+       btree_split_insert_keys(as, trans, path_idx, n, &as->parent_keys);
+
+       bch2_btree_set_root(as, trans, path, n);
+       bch2_btree_update_get_open_buckets(as, n);
+       bch2_btree_node_write(c, n, SIX_LOCK_intent, 0);
+       bch2_trans_node_add(trans, path, n);
+       six_unlock_intent(&n->c.lock);
+
+       mutex_lock(&c->btree_cache.lock);
+       list_add_tail(&b->list, &c->btree_cache.live);
+       mutex_unlock(&c->btree_cache.lock);
+
+       bch2_trans_verify_locks(trans);
+}
+
+int bch2_btree_increase_depth(struct btree_trans *trans, btree_path_idx_t path, unsigned flags)
+{
+       struct bch_fs *c = trans->c;
+       struct btree *b = bch2_btree_id_root(c, trans->paths[path].btree_id)->b;
+       struct btree_update *as =
+               bch2_btree_update_start(trans, trans->paths + path,
+                                       b->c.level, true, flags);
+       if (IS_ERR(as))
+               return PTR_ERR(as);
+
+       __btree_increase_depth(as, trans, path);
+       bch2_btree_update_done(as, trans);
+       return 0;
+}
+
 int __bch2_foreground_maybe_merge(struct btree_trans *trans,
                                  btree_path_idx_t path,
                                  unsigned level,
@@ -1845,8 +1892,7 @@ int __bch2_foreground_maybe_merge(struct btree_trans *trans,
                        __func__, buf1.buf, buf2.buf);
                printbuf_exit(&buf1);
                printbuf_exit(&buf2);
-               bch2_topology_error(c);
-               ret = -EIO;
+               ret = bch2_topology_error(c);
                goto err;
        }
 
@@ -1916,7 +1962,7 @@ int __bch2_foreground_maybe_merge(struct btree_trans *trans,
 
        bch2_trans_verify_paths(trans);
 
-       ret = bch2_btree_insert_node(as, trans, path, parent, &as->parent_keys, flags);
+       ret = bch2_btree_insert_node(as, trans, path, parent, &as->parent_keys);
        if (ret)
                goto err_free_update;
 
@@ -1987,8 +2033,7 @@ int bch2_btree_node_rewrite(struct btree_trans *trans,
 
        if (parent) {
                bch2_keylist_add(&as->parent_keys, &n->key);
-               ret = bch2_btree_insert_node(as, trans, iter->path,
-                                            parent, &as->parent_keys, flags);
+               ret = bch2_btree_insert_node(as, trans, iter->path, parent, &as->parent_keys);
                if (ret)
                        goto err;
        } else {
@@ -2485,7 +2530,7 @@ void bch2_fs_btree_interior_update_init_early(struct bch_fs *c)
 int bch2_fs_btree_interior_update_init(struct bch_fs *c)
 {
        c->btree_interior_update_worker =
-               alloc_workqueue("btree_update", WQ_UNBOUND|WQ_MEM_RECLAIM, 1);
+               alloc_workqueue("btree_update", WQ_UNBOUND|WQ_MEM_RECLAIM, 8);
        if (!c->btree_interior_update_worker)
                return -BCH_ERR_ENOMEM_btree_interior_update_worker_init;
 
index c593c925d1e3b03cfae5b4e7fdf0f7bc4b99df5c..3439b03719c7b505bf43e5f76bf703f6778c9899 100644 (file)
@@ -119,6 +119,8 @@ struct btree *__bch2_btree_node_alloc_replacement(struct btree_update *,
 
 int bch2_btree_split_leaf(struct btree_trans *, btree_path_idx_t, unsigned);
 
+int bch2_btree_increase_depth(struct btree_trans *, btree_path_idx_t, unsigned);
+
 int __bch2_foreground_maybe_merge(struct btree_trans *, btree_path_idx_t,
                                  unsigned, unsigned, enum btree_node_sibling);
 
index ac7844861966368cdce41efd9e27c898fe8ad6e7..b77e7b382b66660f8b02925c13657118dc28fed1 100644 (file)
@@ -574,8 +574,6 @@ void bch2_journal_keys_to_write_buffer_end(struct bch_fs *c, struct journal_keys
 static int bch2_journal_keys_to_write_buffer(struct bch_fs *c, struct journal_buf *buf)
 {
        struct journal_keys_to_wb dst;
-       struct jset_entry *entry;
-       struct bkey_i *k;
        int ret = 0;
 
        bch2_journal_keys_to_write_buffer_start(c, &dst, le64_to_cpu(buf->data->seq));
@@ -590,7 +588,9 @@ static int bch2_journal_keys_to_write_buffer(struct bch_fs *c, struct journal_bu
                entry->type = BCH_JSET_ENTRY_btree_keys;
        }
 
+       spin_lock(&c->journal.lock);
        buf->need_flush_to_write_buffer = false;
+       spin_unlock(&c->journal.lock);
 out:
        bch2_journal_keys_to_write_buffer_end(c, &dst);
        return ret;
index 54f7826ac49874d46b08330678ea0b2565ecc491..c2f46b267b3ad50c796690320f0a700411940931 100644 (file)
@@ -1053,7 +1053,8 @@ int bch2_trigger_extent(struct btree_trans *trans,
                          (int) bch2_bkey_needs_rebalance(c, old);
 
                if (mod) {
-                       int ret = bch2_btree_bit_mod(trans, BTREE_ID_rebalance_work, new.k->p, mod > 0);
+                       int ret = bch2_btree_bit_mod_buffered(trans, BTREE_ID_rebalance_work,
+                                                             new.k->p, mod > 0);
                        if (ret)
                                return ret;
                }
@@ -1335,7 +1336,7 @@ static void bucket_gens_free_rcu(struct rcu_head *rcu)
        struct bucket_gens *buckets =
                container_of(rcu, struct bucket_gens, rcu);
 
-       kvpfree(buckets, sizeof(*buckets) + buckets->nbuckets);
+       kvfree(buckets);
 }
 
 int bch2_dev_buckets_resize(struct bch_fs *c, struct bch_dev *ca, u64 nbuckets)
@@ -1345,16 +1346,16 @@ int bch2_dev_buckets_resize(struct bch_fs *c, struct bch_dev *ca, u64 nbuckets)
        bool resize = ca->bucket_gens != NULL;
        int ret;
 
-       if (!(bucket_gens       = kvpmalloc(sizeof(struct bucket_gens) + nbuckets,
-                                           GFP_KERNEL|__GFP_ZERO))) {
+       if (!(bucket_gens       = kvmalloc(sizeof(struct bucket_gens) + nbuckets,
+                                          GFP_KERNEL|__GFP_ZERO))) {
                ret = -BCH_ERR_ENOMEM_bucket_gens;
                goto err;
        }
 
        if ((c->opts.buckets_nouse &&
-            !(buckets_nouse    = kvpmalloc(BITS_TO_LONGS(nbuckets) *
-                                           sizeof(unsigned long),
-                                           GFP_KERNEL|__GFP_ZERO)))) {
+            !(buckets_nouse    = kvmalloc(BITS_TO_LONGS(nbuckets) *
+                                          sizeof(unsigned long),
+                                          GFP_KERNEL|__GFP_ZERO)))) {
                ret = -BCH_ERR_ENOMEM_buckets_nouse;
                goto err;
        }
@@ -1397,8 +1398,7 @@ int bch2_dev_buckets_resize(struct bch_fs *c, struct bch_dev *ca, u64 nbuckets)
 
        ret = 0;
 err:
-       kvpfree(buckets_nouse,
-               BITS_TO_LONGS(nbuckets) * sizeof(unsigned long));
+       kvfree(buckets_nouse);
        if (bucket_gens)
                call_rcu(&bucket_gens->rcu, bucket_gens_free_rcu);
 
@@ -1407,27 +1407,21 @@ err:
 
 void bch2_dev_buckets_free(struct bch_dev *ca)
 {
-       unsigned i;
-
-       kvpfree(ca->buckets_nouse,
-               BITS_TO_LONGS(ca->mi.nbuckets) * sizeof(unsigned long));
-       kvpfree(rcu_dereference_protected(ca->bucket_gens, 1),
-               sizeof(struct bucket_gens) + ca->mi.nbuckets);
+       kvfree(ca->buckets_nouse);
+       kvfree(rcu_dereference_protected(ca->bucket_gens, 1));
 
-       for (i = 0; i < ARRAY_SIZE(ca->usage); i++)
+       for (unsigned i = 0; i < ARRAY_SIZE(ca->usage); i++)
                free_percpu(ca->usage[i]);
        kfree(ca->usage_base);
 }
 
 int bch2_dev_buckets_alloc(struct bch_fs *c, struct bch_dev *ca)
 {
-       unsigned i;
-
        ca->usage_base = kzalloc(sizeof(struct bch_dev_usage), GFP_KERNEL);
        if (!ca->usage_base)
                return -BCH_ERR_ENOMEM_usage_init;
 
-       for (i = 0; i < ARRAY_SIZE(ca->usage); i++) {
+       for (unsigned i = 0; i < ARRAY_SIZE(ca->usage); i++) {
                ca->usage[i] = alloc_percpu(struct bch_dev_usage);
                if (!ca->usage[i])
                        return -BCH_ERR_ENOMEM_usage_init;
index 226b39c176673a374f50ab06ad5f6d3e0a4858d8..38defa19d52d701762fa95a02cb1e22e7a0c182c 100644 (file)
 #include <linux/slab.h>
 #include <linux/uaccess.h>
 
-__must_check
-static int copy_to_user_errcode(void __user *to, const void *from, unsigned long n)
-{
-       return copy_to_user(to, from, n) ? -EFAULT : 0;
-}
-
 /* returns with ref on ca->ref */
 static struct bch_dev *bch2_device_lookup(struct bch_fs *c, u64 dev,
                                          unsigned flags)
@@ -155,19 +149,35 @@ static void bch2_fsck_thread_exit(struct thread_with_stdio *_thr)
        kfree(thr);
 }
 
-static int bch2_fsck_offline_thread_fn(void *arg)
+static int bch2_fsck_offline_thread_fn(struct thread_with_stdio *stdio)
 {
-       struct fsck_thread *thr = container_of(arg, struct fsck_thread, thr);
+       struct fsck_thread *thr = container_of(stdio, struct fsck_thread, thr);
        struct bch_fs *c = bch2_fs_open(thr->devs, thr->nr_devs, thr->opts);
 
-       thr->thr.thr.ret = PTR_ERR_OR_ZERO(c);
-       if (!thr->thr.thr.ret)
-               bch2_fs_stop(c);
+       if (IS_ERR(c))
+               return PTR_ERR(c);
 
-       thread_with_stdio_done(&thr->thr);
-       return 0;
+       int ret = 0;
+       if (test_bit(BCH_FS_errors_fixed, &c->flags))
+               ret |= 1;
+       if (test_bit(BCH_FS_error, &c->flags))
+               ret |= 4;
+
+       bch2_fs_stop(c);
+
+       if (ret & 1)
+               bch2_stdio_redirect_printf(&stdio->stdio, false, "%s: errors fixed\n", c->name);
+       if (ret & 4)
+               bch2_stdio_redirect_printf(&stdio->stdio, false, "%s: still has errors\n", c->name);
+
+       return ret;
 }
 
+static const struct thread_with_stdio_ops bch2_offline_fsck_ops = {
+       .exit           = bch2_fsck_thread_exit,
+       .fn             = bch2_fsck_offline_thread_fn,
+};
+
 static long bch2_ioctl_fsck_offline(struct bch_ioctl_fsck_offline __user *user_arg)
 {
        struct bch_ioctl_fsck_offline arg;
@@ -220,9 +230,7 @@ static long bch2_ioctl_fsck_offline(struct bch_ioctl_fsck_offline __user *user_a
 
        opt_set(thr->opts, stdio, (u64)(unsigned long)&thr->thr.stdio);
 
-       ret = bch2_run_thread_with_stdio(&thr->thr,
-                       bch2_fsck_thread_exit,
-                       bch2_fsck_offline_thread_fn);
+       ret = bch2_run_thread_with_stdio(&thr->thr, &bch2_offline_fsck_ops);
 err:
        if (ret < 0) {
                if (thr)
@@ -763,9 +771,9 @@ static long bch2_ioctl_disk_resize_journal(struct bch_fs *c,
        return ret;
 }
 
-static int bch2_fsck_online_thread_fn(void *arg)
+static int bch2_fsck_online_thread_fn(struct thread_with_stdio *stdio)
 {
-       struct fsck_thread *thr = container_of(arg, struct fsck_thread, thr);
+       struct fsck_thread *thr = container_of(stdio, struct fsck_thread, thr);
        struct bch_fs *c = thr->c;
 
        c->stdio_filter = current;
@@ -793,13 +801,16 @@ static int bch2_fsck_online_thread_fn(void *arg)
        c->stdio_filter = NULL;
        c->opts.fix_errors = old_fix_errors;
 
-       thread_with_stdio_done(&thr->thr);
-
        up(&c->online_fsck_mutex);
        bch2_ro_ref_put(c);
-       return 0;
+       return ret;
 }
 
+static const struct thread_with_stdio_ops bch2_online_fsck_ops = {
+       .exit           = bch2_fsck_thread_exit,
+       .fn             = bch2_fsck_online_thread_fn,
+};
+
 static long bch2_ioctl_fsck_online(struct bch_fs *c,
                                   struct bch_ioctl_fsck_online arg)
 {
@@ -840,9 +851,7 @@ static long bch2_ioctl_fsck_online(struct bch_fs *c,
                        goto err;
        }
 
-       ret = bch2_run_thread_with_stdio(&thr->thr,
-                       bch2_fsck_thread_exit,
-                       bch2_fsck_online_thread_fn);
+       ret = bch2_run_thread_with_stdio(&thr->thr, &bch2_online_fsck_ops);
 err:
        if (ret < 0) {
                bch_err_fn(c, ret);
index 3c761ad6b1c8ef1fef8781dc10f393ccfee9997e..4701457f6381ca820e17a12707009c272ed5b4ac 100644 (file)
@@ -558,7 +558,7 @@ got_key:
        return 0;
 }
 
-#include "../crypto.h"
+#include "crypto.h"
 #endif
 
 int bch2_request_key(struct bch_sb *sb, struct bch_key *key)
index 33df8cf86bd8f83bbf42d45944d0632da404fd71..1410365a889156450c78da9165bdb146872370ed 100644 (file)
@@ -601,13 +601,13 @@ static int __bch2_fs_compress_init(struct bch_fs *c, u64 features)
                return 0;
 
        if (!mempool_initialized(&c->compression_bounce[READ]) &&
-           mempool_init_kvpmalloc_pool(&c->compression_bounce[READ],
-                                       1, c->opts.encoded_extent_max))
+           mempool_init_kvmalloc_pool(&c->compression_bounce[READ],
+                                      1, c->opts.encoded_extent_max))
                return -BCH_ERR_ENOMEM_compression_bounce_read_init;
 
        if (!mempool_initialized(&c->compression_bounce[WRITE]) &&
-           mempool_init_kvpmalloc_pool(&c->compression_bounce[WRITE],
-                                       1, c->opts.encoded_extent_max))
+           mempool_init_kvmalloc_pool(&c->compression_bounce[WRITE],
+                                      1, c->opts.encoded_extent_max))
                return -BCH_ERR_ENOMEM_compression_bounce_write_init;
 
        for (i = compression_types;
@@ -622,15 +622,15 @@ static int __bch2_fs_compress_init(struct bch_fs *c, u64 features)
                if (mempool_initialized(&c->compress_workspace[i->type]))
                        continue;
 
-               if (mempool_init_kvpmalloc_pool(
+               if (mempool_init_kvmalloc_pool(
                                &c->compress_workspace[i->type],
                                1, i->compress_workspace))
                        return -BCH_ERR_ENOMEM_compression_workspace_init;
        }
 
        if (!mempool_initialized(&c->decompress_workspace) &&
-           mempool_init_kvpmalloc_pool(&c->decompress_workspace,
-                                       1, decompress_workspace_size))
+           mempool_init_kvmalloc_pool(&c->decompress_workspace,
+                                      1, decompress_workspace_size))
                return -BCH_ERR_ENOMEM_decompression_workspace_init;
 
        return 0;
index 7bdba8507fc93cdfdecc29de3e70e5589cf8177b..b1f147e6be4d5cdd0ab491932db9c625b763e29e 100644 (file)
@@ -137,7 +137,7 @@ void __bch2_btree_verify(struct bch_fs *c, struct btree *b)
        mutex_lock(&c->verify_lock);
 
        if (!c->verify_ondisk) {
-               c->verify_ondisk = kvpmalloc(btree_buf_bytes(b), GFP_KERNEL);
+               c->verify_ondisk = kvmalloc(btree_buf_bytes(b), GFP_KERNEL);
                if (!c->verify_ondisk)
                        goto out;
        }
@@ -199,7 +199,7 @@ void bch2_btree_node_ondisk_to_text(struct printbuf *out, struct bch_fs *c,
                return;
        }
 
-       n_ondisk = kvpmalloc(btree_buf_bytes(b), GFP_KERNEL);
+       n_ondisk = kvmalloc(btree_buf_bytes(b), GFP_KERNEL);
        if (!n_ondisk) {
                prt_printf(out, "memory allocation failure\n");
                goto out;
@@ -293,7 +293,7 @@ void bch2_btree_node_ondisk_to_text(struct printbuf *out, struct bch_fs *c,
 out:
        if (bio)
                bio_put(bio);
-       kvpfree(n_ondisk, btree_buf_bytes(b));
+       kvfree(n_ondisk);
        percpu_ref_put(&ca->io_ref);
 }
 
index 4ae1e9f002a09b9c7ea3bed1709334f35373b061..d37bd07afbfe4088ebb9b92feb56ff86127ae1ab 100644 (file)
@@ -144,19 +144,21 @@ fsck_err:
        return ret;
 }
 
-void bch2_dirent_to_text(struct printbuf *out, struct bch_fs *c,
-                        struct bkey_s_c k)
+void bch2_dirent_to_text(struct printbuf *out, struct bch_fs *c, struct bkey_s_c k)
 {
        struct bkey_s_c_dirent d = bkey_s_c_to_dirent(k);
        struct qstr d_name = bch2_dirent_get_name(d);
 
-       prt_printf(out, "%.*s -> %llu type %s",
-              d_name.len,
-              d_name.name,
-              d.v->d_type != DT_SUBVOL
-              ? le64_to_cpu(d.v->d_inum)
-              : le32_to_cpu(d.v->d_child_subvol),
-              bch2_d_type_str(d.v->d_type));
+       prt_printf(out, "%.*s -> ", d_name.len, d_name.name);
+
+       if (d.v->d_type != DT_SUBVOL)
+               prt_printf(out, "%llu", le64_to_cpu(d.v->d_inum));
+       else
+               prt_printf(out, "%u -> %u",
+                          le32_to_cpu(d.v->d_parent_subvol),
+                          le32_to_cpu(d.v->d_child_subvol));
+
+       prt_printf(out, " type %s", bch2_d_type_str(d.v->d_type));
 }
 
 static struct bkey_i_dirent *dirent_create_key(struct btree_trans *trans,
@@ -199,17 +201,17 @@ static struct bkey_i_dirent *dirent_create_key(struct btree_trans *trans,
 }
 
 int bch2_dirent_create_snapshot(struct btree_trans *trans,
-                       u64 dir, u32 snapshot,
+                       u32 dir_subvol, u64 dir, u32 snapshot,
                        const struct bch_hash_info *hash_info,
                        u8 type, const struct qstr *name, u64 dst_inum,
                        u64 *dir_offset,
                        bch_str_hash_flags_t str_hash_flags)
 {
-       subvol_inum zero_inum = { 0 };
+       subvol_inum dir_inum = { .subvol = dir_subvol, .inum = dir };
        struct bkey_i_dirent *dirent;
        int ret;
 
-       dirent = dirent_create_key(trans, zero_inum, type, name, dst_inum);
+       dirent = dirent_create_key(trans, dir_inum, type, name, dst_inum);
        ret = PTR_ERR_OR_ZERO(dirent);
        if (ret)
                return ret;
@@ -217,10 +219,10 @@ int bch2_dirent_create_snapshot(struct btree_trans *trans,
        dirent->k.p.inode       = dir;
        dirent->k.p.snapshot    = snapshot;
 
-       ret = bch2_hash_set_snapshot(trans, bch2_dirent_hash_desc, hash_info,
-                                    zero_inum, snapshot,
-                                    &dirent->k_i, str_hash_flags,
-                                    BTREE_UPDATE_INTERNAL_SNAPSHOT_NODE);
+       ret = bch2_hash_set_in_snapshot(trans, bch2_dirent_hash_desc, hash_info,
+                                       dir_inum, snapshot,
+                                       &dirent->k_i, str_hash_flags,
+                                       BTREE_UPDATE_INTERNAL_SNAPSHOT_NODE);
        *dir_offset = dirent->k.p.offset;
 
        return ret;
@@ -291,12 +293,10 @@ int bch2_dirent_rename(struct btree_trans *trans,
        struct bkey_i_dirent *new_src = NULL, *new_dst = NULL;
        struct bpos dst_pos =
                POS(dst_dir.inum, bch2_dirent_hash(dst_hash, dst_name));
-       unsigned src_type = 0, dst_type = 0, src_update_flags = 0;
+       unsigned src_update_flags = 0;
+       bool delete_src, delete_dst;
        int ret = 0;
 
-       if (src_dir.subvol != dst_dir.subvol)
-               return -EXDEV;
-
        memset(src_inum, 0, sizeof(*src_inum));
        memset(dst_inum, 0, sizeof(*dst_inum));
 
@@ -317,12 +317,6 @@ int bch2_dirent_rename(struct btree_trans *trans,
        if (ret)
                goto out;
 
-       src_type = bkey_s_c_to_dirent(old_src).v->d_type;
-
-       if (src_type == DT_SUBVOL && mode == BCH_RENAME_EXCHANGE)
-               return -EOPNOTSUPP;
-
-
        /* Lookup dst: */
        if (mode == BCH_RENAME) {
                /*
@@ -350,11 +344,6 @@ int bch2_dirent_rename(struct btree_trans *trans,
                                bkey_s_c_to_dirent(old_dst), dst_inum);
                if (ret)
                        goto out;
-
-               dst_type = bkey_s_c_to_dirent(old_dst).v->d_type;
-
-               if (dst_type == DT_SUBVOL)
-                       return -EOPNOTSUPP;
        }
 
        if (mode != BCH_RENAME_EXCHANGE)
@@ -424,28 +413,55 @@ int bch2_dirent_rename(struct btree_trans *trans,
                }
        }
 
+       if (new_dst->v.d_type == DT_SUBVOL)
+               new_dst->v.d_parent_subvol = cpu_to_le32(dst_dir.subvol);
+
+       if ((mode == BCH_RENAME_EXCHANGE) &&
+           new_src->v.d_type == DT_SUBVOL)
+               new_src->v.d_parent_subvol = cpu_to_le32(src_dir.subvol);
+
        ret = bch2_trans_update(trans, &dst_iter, &new_dst->k_i, 0);
        if (ret)
                goto out;
 out_set_src:
-
        /*
-        * If we're deleting a subvolume, we need to really delete the dirent,
-        * not just emit a whiteout in the current snapshot:
+        * If we're deleting a subvolume we need to really delete the dirent,
+        * not just emit a whiteout in the current snapshot - there can only be
+        * single dirent that points to a given subvolume.
+        *
+        * IOW, we don't maintain multiple versions in different snapshots of
+        * dirents that point to subvolumes - dirents that point to subvolumes
+        * are only visible in one particular subvolume so it's not necessary,
+        * and it would be particularly confusing for fsck to have to deal with.
         */
-       if (src_type == DT_SUBVOL) {
-               bch2_btree_iter_set_snapshot(&src_iter, old_src.k->p.snapshot);
-               ret = bch2_btree_iter_traverse(&src_iter);
+       delete_src = bkey_s_c_to_dirent(old_src).v->d_type == DT_SUBVOL &&
+               new_src->k.p.snapshot != old_src.k->p.snapshot;
+
+       delete_dst = old_dst.k &&
+               bkey_s_c_to_dirent(old_dst).v->d_type == DT_SUBVOL &&
+               new_dst->k.p.snapshot != old_dst.k->p.snapshot;
+
+       if (!delete_src || !bkey_deleted(&new_src->k)) {
+               ret = bch2_trans_update(trans, &src_iter, &new_src->k_i, src_update_flags);
                if (ret)
                        goto out;
+       }
 
-               new_src->k.p = src_iter.pos;
-               src_update_flags |= BTREE_UPDATE_INTERNAL_SNAPSHOT_NODE;
+       if (delete_src) {
+               bch2_btree_iter_set_snapshot(&src_iter, old_src.k->p.snapshot);
+               ret =   bch2_btree_iter_traverse(&src_iter) ?:
+                       bch2_btree_delete_at(trans, &src_iter, BTREE_UPDATE_INTERNAL_SNAPSHOT_NODE);
+               if (ret)
+                       goto out;
        }
 
-       ret = bch2_trans_update(trans, &src_iter, &new_src->k_i, src_update_flags);
-       if (ret)
-               goto out;
+       if (delete_dst) {
+               bch2_btree_iter_set_snapshot(&dst_iter, old_dst.k->p.snapshot);
+               ret =   bch2_btree_iter_traverse(&dst_iter) ?:
+                       bch2_btree_delete_at(trans, &dst_iter, BTREE_UPDATE_INTERNAL_SNAPSHOT_NODE);
+               if (ret)
+                       goto out;
+       }
 
        if (mode == BCH_RENAME_EXCHANGE)
                *src_offset = new_src->k.p.offset;
@@ -456,41 +472,29 @@ out:
        return ret;
 }
 
-int __bch2_dirent_lookup_trans(struct btree_trans *trans,
-                              struct btree_iter *iter,
-                              subvol_inum dir,
-                              const struct bch_hash_info *hash_info,
-                              const struct qstr *name, subvol_inum *inum,
-                              unsigned flags)
+int bch2_dirent_lookup_trans(struct btree_trans *trans,
+                            struct btree_iter *iter,
+                            subvol_inum dir,
+                            const struct bch_hash_info *hash_info,
+                            const struct qstr *name, subvol_inum *inum,
+                            unsigned flags)
 {
-       struct bkey_s_c k;
-       struct bkey_s_c_dirent d;
-       u32 snapshot;
-       int ret;
-
-       ret = bch2_subvolume_get_snapshot(trans, dir.subvol, &snapshot);
-       if (ret)
-               return ret;
-
-       ret = bch2_hash_lookup(trans, iter, bch2_dirent_hash_desc,
-                              hash_info, dir, name, flags);
+       int ret = bch2_hash_lookup(trans, iter, bch2_dirent_hash_desc,
+                                  hash_info, dir, name, flags);
        if (ret)
                return ret;
 
-       k = bch2_btree_iter_peek_slot(iter);
+       struct bkey_s_c k = bch2_btree_iter_peek_slot(iter);
        ret = bkey_err(k);
        if (ret)
                goto err;
 
-       d = bkey_s_c_to_dirent(k);
-
-       ret = bch2_dirent_read_target(trans, dir, d, inum);
+       ret = bch2_dirent_read_target(trans, dir, bkey_s_c_to_dirent(k), inum);
        if (ret > 0)
                ret = -ENOENT;
 err:
        if (ret)
                bch2_trans_iter_exit(trans, iter);
-
        return ret;
 }
 
@@ -502,13 +506,13 @@ u64 bch2_dirent_lookup(struct bch_fs *c, subvol_inum dir,
        struct btree_iter iter = { NULL };
 
        int ret = lockrestart_do(trans,
-               __bch2_dirent_lookup_trans(trans, &iter, dir, hash_info, name, inum, 0));
+               bch2_dirent_lookup_trans(trans, &iter, dir, hash_info, name, inum, 0));
        bch2_trans_iter_exit(trans, &iter);
        bch2_trans_put(trans);
        return ret;
 }
 
-int bch2_empty_dir_snapshot(struct btree_trans *trans, u64 dir, u32 snapshot)
+int bch2_empty_dir_snapshot(struct btree_trans *trans, u64 dir, u32 subvol, u32 snapshot)
 {
        struct btree_iter iter;
        struct bkey_s_c k;
@@ -518,7 +522,10 @@ int bch2_empty_dir_snapshot(struct btree_trans *trans, u64 dir, u32 snapshot)
                           SPOS(dir, 0, snapshot),
                           POS(dir, U64_MAX), 0, k, ret)
                if (k.k->type == KEY_TYPE_dirent) {
-                       ret = -ENOTEMPTY;
+                       struct bkey_s_c_dirent d = bkey_s_c_to_dirent(k);
+                       if (d.v->d_type == DT_SUBVOL && le32_to_cpu(d.v->d_parent_subvol) != subvol)
+                               continue;
+                       ret = -BCH_ERR_ENOTEMPTY_dir_not_empty;
                        break;
                }
        bch2_trans_iter_exit(trans, &iter);
@@ -531,7 +538,7 @@ int bch2_empty_dir_trans(struct btree_trans *trans, subvol_inum dir)
        u32 snapshot;
 
        return bch2_subvolume_get_snapshot(trans, dir.subvol, &snapshot) ?:
-               bch2_empty_dir_snapshot(trans, dir.inum, snapshot);
+               bch2_empty_dir_snapshot(trans, dir.inum, dir.subvol, snapshot);
 }
 
 int bch2_readdir(struct bch_fs *c, subvol_inum inum, struct dir_context *ctx)
index 21ffeb78f02ee3a750a39512f2fb353b594567b5..bee55cca2aa0dd303599a095753508137708fc90 100644 (file)
@@ -35,7 +35,7 @@ static inline unsigned dirent_val_u64s(unsigned len)
 int bch2_dirent_read_target(struct btree_trans *, subvol_inum,
                            struct bkey_s_c_dirent, subvol_inum *);
 
-int bch2_dirent_create_snapshot(struct btree_trans *, u64, u32,
+int bch2_dirent_create_snapshot(struct btree_trans *, u32, u64, u32,
                        const struct bch_hash_info *, u8,
                        const struct qstr *, u64, u64 *,
                        bch_str_hash_flags_t);
@@ -62,14 +62,14 @@ int bch2_dirent_rename(struct btree_trans *,
                       const struct qstr *, subvol_inum *, u64 *,
                       enum bch_rename_mode);
 
-int __bch2_dirent_lookup_trans(struct btree_trans *, struct btree_iter *,
+int bch2_dirent_lookup_trans(struct btree_trans *, struct btree_iter *,
                               subvol_inum, const struct bch_hash_info *,
                               const struct qstr *, subvol_inum *, unsigned);
 u64 bch2_dirent_lookup(struct bch_fs *, subvol_inum,
                       const struct bch_hash_info *,
                       const struct qstr *, subvol_inum *);
 
-int bch2_empty_dir_snapshot(struct btree_trans *, u64, u32);
+int bch2_empty_dir_snapshot(struct btree_trans *, u64, u32, u32);
 int bch2_empty_dir_trans(struct btree_trans *, subvol_inum);
 int bch2_readdir(struct bch_fs *, subvol_inum, struct dir_context *);
 
index d503af2700247d8aa1257962c37df9b042ee55ec..b98e2c2b8bf06f59fa70cfe23873e51529a917b8 100644 (file)
@@ -504,7 +504,7 @@ static void ec_stripe_buf_exit(struct ec_stripe_buf *buf)
                unsigned i;
 
                for (i = 0; i < s->v.nr_blocks; i++) {
-                       kvpfree(buf->data[i], buf->size << 9);
+                       kvfree(buf->data[i]);
                        buf->data[i] = NULL;
                }
        }
@@ -531,7 +531,7 @@ static int ec_stripe_buf_init(struct ec_stripe_buf *buf,
        memset(buf->valid, 0xFF, sizeof(buf->valid));
 
        for (i = 0; i < v->nr_blocks; i++) {
-               buf->data[i] = kvpmalloc(buf->size << 9, GFP_KERNEL);
+               buf->data[i] = kvmalloc(buf->size << 9, GFP_KERNEL);
                if (!buf->data[i])
                        goto err;
        }
index d260ff9bbfeb7b9121f222a4362f37f95c927977..43557bebd0f8549f5e0bc7c23229687d2f4c102f 100644 (file)
@@ -2,6 +2,7 @@
 
 #include "bcachefs.h"
 #include "errcode.h"
+#include "trace.h"
 
 #include <linux/errname.h>
 
@@ -49,15 +50,17 @@ bool __bch2_err_matches(int err, int class)
        return err == class;
 }
 
-int __bch2_err_class(int err)
+int __bch2_err_class(int bch_err)
 {
-       err = -err;
-       BUG_ON((unsigned) err >= BCH_ERR_MAX);
+       int std_err = -bch_err;
+       BUG_ON((unsigned) std_err >= BCH_ERR_MAX);
 
-       while (err >= BCH_ERR_START && bch2_errcode_parents[err - BCH_ERR_START])
-               err = bch2_errcode_parents[err - BCH_ERR_START];
+       while (std_err >= BCH_ERR_START && bch2_errcode_parents[std_err - BCH_ERR_START])
+               std_err = bch2_errcode_parents[std_err - BCH_ERR_START];
+
+       trace_error_downcast(bch_err, std_err, _RET_IP_);
 
-       return -err;
+       return -std_err;
 }
 
 const char *bch2_blk_status_to_str(blk_status_t status)
index 8c40c2067a0471e2dde6c3dcbcdeb709565732a7..af25d8ec60f221d9d935a0ef4ad7aef3641a9e3d 100644 (file)
@@ -5,6 +5,10 @@
 #define BCH_ERRCODES()                                                         \
        x(ERANGE,                       ERANGE_option_too_small)                \
        x(ERANGE,                       ERANGE_option_too_big)                  \
+       x(EINVAL,                       mount_option)                           \
+       x(BCH_ERR_mount_option,         option_name)                            \
+       x(BCH_ERR_mount_option,         option_value)                           \
+       x(BCH_ERR_mount_option,         option_not_bool)                        \
        x(ENOMEM,                       ENOMEM_stripe_buf)                      \
        x(ENOMEM,                       ENOMEM_replicas_table)                  \
        x(ENOMEM,                       ENOMEM_cpu_replicas)                    \
@@ -78,6 +82,7 @@
        x(ENOMEM,                       ENOMEM_fs_name_alloc)                   \
        x(ENOMEM,                       ENOMEM_fs_other_alloc)                  \
        x(ENOMEM,                       ENOMEM_dev_alloc)                       \
+       x(ENOMEM,                       ENOMEM_disk_accounting)                 \
        x(ENOSPC,                       ENOSPC_disk_reservation)                \
        x(ENOSPC,                       ENOSPC_bucket_alloc)                    \
        x(ENOSPC,                       ENOSPC_disk_label_add)                  \
        x(ENOENT,                       ENOENT_dirent_doesnt_match_inode)       \
        x(ENOENT,                       ENOENT_dev_not_found)                   \
        x(ENOENT,                       ENOENT_dev_idx_not_found)               \
+       x(ENOTEMPTY,                    ENOTEMPTY_dir_not_empty)                \
+       x(ENOTEMPTY,                    ENOTEMPTY_subvol_not_empty)             \
        x(0,                            open_buckets_empty)                     \
        x(0,                            freelist_empty)                         \
        x(BCH_ERR_freelist_empty,       no_buckets_found)                       \
        x(EINVAL,                       invalid)                                \
        x(EINVAL,                       internal_fsck_err)                      \
        x(EINVAL,                       opt_parse_error)                        \
+       x(EINVAL,                       remove_with_metadata_missing_unimplemented)\
+       x(EINVAL,                       remove_would_lose_data)                 \
+       x(EINVAL,                       btree_iter_with_journal_not_supported)  \
        x(EROFS,                        erofs_trans_commit)                     \
        x(EROFS,                        erofs_no_writes)                        \
        x(EROFS,                        erofs_journal_err)                      \
        x(BCH_ERR_operation_blocked,    nocow_lock_blocked)                     \
        x(EIO,                          btree_node_read_err)                    \
        x(EIO,                          sb_not_downgraded)                      \
-       x(EIO,                          btree_write_all_failed)                 \
+       x(EIO,                          btree_node_write_all_failed)            \
+       x(EIO,                          btree_node_read_error)                  \
+       x(EIO,                          btree_node_read_validate_error)         \
+       x(EIO,                          btree_need_topology_repair)             \
        x(BCH_ERR_btree_node_read_err,  btree_node_read_err_fixable)            \
        x(BCH_ERR_btree_node_read_err,  btree_node_read_err_want_retry)         \
        x(BCH_ERR_btree_node_read_err,  btree_node_read_err_must_retry)         \
        x(BCH_ERR_nopromote,            nopromote_congested)                    \
        x(BCH_ERR_nopromote,            nopromote_in_flight)                    \
        x(BCH_ERR_nopromote,            nopromote_no_writes)                    \
-       x(BCH_ERR_nopromote,            nopromote_enomem)
+       x(BCH_ERR_nopromote,            nopromote_enomem)                       \
+       x(0,                            need_inode_lock)
 
 enum bch_errcode {
        BCH_ERR_START           = 2048,
index d32c8bebe46c32f7abc1a11ad49ee80752f2a623..043431206799d80a6e3eab43bd635947fa48db9f 100644 (file)
@@ -1,6 +1,7 @@
 // SPDX-License-Identifier: GPL-2.0
 #include "bcachefs.h"
 #include "error.h"
+#include "recovery.h"
 #include "super.h"
 #include "thread_with_file.h"
 
@@ -25,11 +26,16 @@ bool bch2_inconsistent_error(struct bch_fs *c)
        }
 }
 
-void bch2_topology_error(struct bch_fs *c)
+int bch2_topology_error(struct bch_fs *c)
 {
        set_bit(BCH_FS_topology_error, &c->flags);
-       if (!test_bit(BCH_FS_fsck_running, &c->flags))
+       if (!test_bit(BCH_FS_fsck_running, &c->flags)) {
                bch2_inconsistent_error(c);
+               return -BCH_ERR_btree_need_topology_repair;
+       } else {
+               return bch2_run_explicit_recovery_pass(c, BCH_RECOVERY_PASS_check_topology) ?:
+                       -BCH_ERR_btree_node_read_validate_error;
+       }
 }
 
 void bch2_fatal_error(struct bch_fs *c)
index fec17d1353d18042ca77132106d2e2318de2be01..94491190e09e9d5085ca1ef87c5764c4192d6a24 100644 (file)
@@ -30,7 +30,7 @@ struct work_struct;
 
 bool bch2_inconsistent_error(struct bch_fs *);
 
-void bch2_topology_error(struct bch_fs *);
+int bch2_topology_error(struct bch_fs *);
 
 #define bch2_fs_inconsistent(c, ...)                                   \
 ({                                                                     \
index 6bf839d69e84e6e24ed3bf2bf611177fc04676e1..6219f2c08e4c737abd477588419c0f0dbeecbc38 100644 (file)
@@ -43,6 +43,11 @@ enum bkey_invalid_flags;
 #define extent_entry_next(_entry)                                      \
        ((typeof(_entry)) ((void *) (_entry) + extent_entry_bytes(_entry)))
 
+#define extent_entry_next_safe(_entry, _end)                           \
+       (likely(__extent_entry_type(_entry) < BCH_EXTENT_ENTRY_MAX)     \
+        ? extent_entry_next(_entry)                                    \
+        : _end)
+
 static inline unsigned
 __extent_entry_type(const union bch_extent_entry *e)
 {
@@ -280,7 +285,7 @@ static inline struct bkey_ptrs bch2_bkey_ptrs(struct bkey_s k)
 #define __bkey_extent_entry_for_each_from(_start, _end, _entry)                \
        for ((_entry) = (_start);                                       \
             (_entry) < (_end);                                         \
-            (_entry) = extent_entry_next(_entry))
+            (_entry) = extent_entry_next_safe(_entry, _end))
 
 #define __bkey_ptr_next(_ptr, _end)                                    \
 ({                                                                     \
@@ -318,7 +323,7 @@ static inline struct bkey_ptrs bch2_bkey_ptrs(struct bkey_s k)
        (_ptr).has_ec   = false;                                        \
                                                                        \
        __bkey_extent_entry_for_each_from(_entry, _end, _entry)         \
-               switch (extent_entry_type(_entry)) {                    \
+               switch (__extent_entry_type(_entry)) {                  \
                case BCH_EXTENT_ENTRY_ptr:                              \
                        (_ptr).ptr              = _entry->ptr;          \
                        goto out;                                       \
@@ -344,7 +349,7 @@ out:                                                                        \
        for ((_ptr).crc = bch2_extent_crc_unpack(_k, NULL),             \
             (_entry) = _start;                                         \
             __bkey_ptr_next_decode(_k, _end, _ptr, _entry);            \
-            (_entry) = extent_entry_next(_entry))
+            (_entry) = extent_entry_next_safe(_entry, _end))
 
 #define bkey_for_each_ptr_decode(_k, _p, _ptr, _entry)                 \
        __bkey_for_each_ptr_decode(_k, (_p).start, (_p).end,            \
index 66b945be10c2309a9e758b228b146047b20674e2..d8153fe27037ef46d1b2b220430f78fae78f2e35 100644 (file)
@@ -24,12 +24,12 @@ struct {                                                            \
        (fifo)->mask    = (fifo)->size                                  \
                ? roundup_pow_of_two((fifo)->size) - 1                  \
                : 0;                                                    \
-       (fifo)->data    = kvpmalloc(fifo_buf_size(fifo), (_gfp));       \
+       (fifo)->data    = kvmalloc(fifo_buf_size(fifo), (_gfp));        \
 })
 
 #define free_fifo(fifo)                                                        \
 do {                                                                   \
-       kvpfree((fifo)->data, fifo_buf_size(fifo));                     \
+       kvfree((fifo)->data);                                           \
        (fifo)->data = NULL;                                            \
 } while (0)
 
index 1c1ea0f0c692a6fdd4c262ef184bbcdda32d154f..624e6f963240f82f56d4a111f5041e6fb1d9daa9 100644 (file)
@@ -107,6 +107,7 @@ int bch2_create_trans(struct btree_trans *trans,
                u32 new_subvol, dir_snapshot;
 
                ret = bch2_subvolume_create(trans, new_inode->bi_inum,
+                                           dir.subvol,
                                            snapshot_src.subvol,
                                            &new_subvol, &snapshot,
                                            (flags & BCH_CREATE_SNAPSHOT_RO) != 0);
@@ -242,7 +243,7 @@ int bch2_unlink_trans(struct btree_trans *trans,
                      struct bch_inode_unpacked *dir_u,
                      struct bch_inode_unpacked *inode_u,
                      const struct qstr *name,
-                     bool deleting_snapshot)
+                     bool deleting_subvol)
 {
        struct bch_fs *c = trans->c;
        struct btree_iter dir_iter = { NULL };
@@ -260,8 +261,8 @@ int bch2_unlink_trans(struct btree_trans *trans,
 
        dir_hash = bch2_hash_info_init(c, dir_u);
 
-       ret = __bch2_dirent_lookup_trans(trans, &dirent_iter, dir, &dir_hash,
-                                        name, &inum, BTREE_ITER_INTENT);
+       ret = bch2_dirent_lookup_trans(trans, &dirent_iter, dir, &dir_hash,
+                                      name, &inum, BTREE_ITER_INTENT);
        if (ret)
                goto err;
 
@@ -270,18 +271,25 @@ int bch2_unlink_trans(struct btree_trans *trans,
        if (ret)
                goto err;
 
-       if (!deleting_snapshot && S_ISDIR(inode_u->bi_mode)) {
+       if (!deleting_subvol && S_ISDIR(inode_u->bi_mode)) {
                ret = bch2_empty_dir_trans(trans, inum);
                if (ret)
                        goto err;
        }
 
-       if (deleting_snapshot && !inode_u->bi_subvol) {
+       if (deleting_subvol && !inode_u->bi_subvol) {
                ret = -BCH_ERR_ENOENT_not_subvol;
                goto err;
        }
 
-       if (deleting_snapshot || inode_u->bi_subvol) {
+       if (inode_u->bi_subvol) {
+               /* Recursive subvolume destroy not allowed (yet?) */
+               ret = bch2_subvol_has_children(trans, inode_u->bi_subvol);
+               if (ret)
+                       goto err;
+       }
+
+       if (deleting_subvol || inode_u->bi_subvol) {
                ret = bch2_subvolume_unlink(trans, inode_u->bi_subvol);
                if (ret)
                        goto err;
@@ -349,6 +357,22 @@ bool bch2_reinherit_attrs(struct bch_inode_unpacked *dst_u,
        return ret;
 }
 
+static int subvol_update_parent(struct btree_trans *trans, u32 subvol, u32 new_parent)
+{
+       struct btree_iter iter;
+       struct bkey_i_subvolume *s =
+               bch2_bkey_get_mut_typed(trans, &iter,
+                       BTREE_ID_subvolumes, POS(0, subvol),
+                       BTREE_ITER_CACHED, subvolume);
+       int ret = PTR_ERR_OR_ZERO(s);
+       if (ret)
+               return ret;
+
+       s->v.fs_path_parent = cpu_to_le32(new_parent);
+       bch2_trans_iter_exit(trans, &iter);
+       return 0;
+}
+
 int bch2_rename_trans(struct btree_trans *trans,
                      subvol_inum src_dir, struct bch_inode_unpacked *src_dir_u,
                      subvol_inum dst_dir, struct bch_inode_unpacked *dst_dir_u,
@@ -410,6 +434,36 @@ int bch2_rename_trans(struct btree_trans *trans,
                        goto err;
        }
 
+       if (src_inode_u->bi_subvol &&
+           dst_dir.subvol != src_inode_u->bi_parent_subvol) {
+               ret = subvol_update_parent(trans, src_inode_u->bi_subvol, dst_dir.subvol);
+               if (ret)
+                       goto err;
+       }
+
+       if (mode == BCH_RENAME_EXCHANGE &&
+           dst_inode_u->bi_subvol &&
+           src_dir.subvol != dst_inode_u->bi_parent_subvol) {
+               ret = subvol_update_parent(trans, dst_inode_u->bi_subvol, src_dir.subvol);
+               if (ret)
+                       goto err;
+       }
+
+       /* Can't move across subvolumes, unless it's a subvolume root: */
+       if (src_dir.subvol != dst_dir.subvol &&
+           (!src_inode_u->bi_subvol ||
+            (dst_inum.inum && !dst_inode_u->bi_subvol))) {
+               ret = -EXDEV;
+               goto err;
+       }
+
+       if (src_inode_u->bi_parent_subvol)
+               src_inode_u->bi_parent_subvol = dst_dir.subvol;
+
+       if ((mode == BCH_RENAME_EXCHANGE) &&
+           dst_inode_u->bi_parent_subvol)
+               dst_inode_u->bi_parent_subvol = src_dir.subvol;
+
        src_inode_u->bi_dir             = dst_dir_u->bi_inum;
        src_inode_u->bi_dir_offset      = dst_offset;
 
@@ -432,10 +486,10 @@ int bch2_rename_trans(struct btree_trans *trans,
                        goto err;
                }
 
-               if (S_ISDIR(dst_inode_u->bi_mode) &&
-                   bch2_empty_dir_trans(trans, dst_inum)) {
-                       ret = -ENOTEMPTY;
-                       goto err;
+               if (S_ISDIR(dst_inode_u->bi_mode)) {
+                       ret = bch2_empty_dir_trans(trans, dst_inum);
+                       if (ret)
+                               goto err;
                }
        }
 
index 27710cdd5710ec5bba9ff9a11cad92f7cf14bc09..39292e7ef342c53f842b88372cd79d6bfece6bed 100644 (file)
@@ -810,7 +810,8 @@ static noinline void folios_trunc(folios *fs, struct folio **fi)
 static int __bch2_buffered_write(struct bch_inode_info *inode,
                                 struct address_space *mapping,
                                 struct iov_iter *iter,
-                                loff_t pos, unsigned len)
+                                loff_t pos, unsigned len,
+                                bool inode_locked)
 {
        struct bch_fs *c = inode->v.i_sb->s_fs_info;
        struct bch2_folio_reservation res;
@@ -835,6 +836,15 @@ static int __bch2_buffered_write(struct bch_inode_info *inode,
 
        BUG_ON(!fs.nr);
 
+       /*
+        * If we're not using the inode lock, we need to lock all the folios for
+        * atomiticity of writes vs. other writes:
+        */
+       if (!inode_locked && folio_end_pos(darray_last(fs)) < end) {
+               ret = -BCH_ERR_need_inode_lock;
+               goto out;
+       }
+
        f = darray_first(fs);
        if (pos != folio_pos(f) && !folio_test_uptodate(f)) {
                ret = bch2_read_single_folio(f, mapping);
@@ -929,8 +939,10 @@ static int __bch2_buffered_write(struct bch_inode_info *inode,
        end = pos + copied;
 
        spin_lock(&inode->v.i_lock);
-       if (end > inode->v.i_size)
+       if (end > inode->v.i_size) {
+               BUG_ON(!inode_locked);
                i_size_write(&inode->v, end);
+       }
        spin_unlock(&inode->v.i_lock);
 
        f_pos = pos;
@@ -974,12 +986,68 @@ static ssize_t bch2_buffered_write(struct kiocb *iocb, struct iov_iter *iter)
        struct file *file = iocb->ki_filp;
        struct address_space *mapping = file->f_mapping;
        struct bch_inode_info *inode = file_bch_inode(file);
-       loff_t pos = iocb->ki_pos;
-       ssize_t written = 0;
-       int ret = 0;
+       loff_t pos;
+       bool inode_locked = false;
+       ssize_t written = 0, written2 = 0, ret = 0;
+
+       /*
+        * We don't take the inode lock unless i_size will be changing. Folio
+        * locks provide exclusion with other writes, and the pagecache add lock
+        * provides exclusion with truncate and hole punching.
+        *
+        * There is one nasty corner case where atomicity would be broken
+        * without great care: when copying data from userspace to the page
+        * cache, we do that with faults disable - a page fault would recurse
+        * back into the filesystem, taking filesystem locks again, and
+        * deadlock; so it's done with faults disabled, and we fault in the user
+        * buffer when we aren't holding locks.
+        *
+        * If we do part of the write, but we then race and in the userspace
+        * buffer have been evicted and are no longer resident, then we have to
+        * drop our folio locks to re-fault them in, breaking write atomicity.
+        *
+        * To fix this, we restart the write from the start, if we weren't
+        * holding the inode lock.
+        *
+        * There is another wrinkle after that; if we restart the write from the
+        * start, and then get an unrecoverable error, we _cannot_ claim to
+        * userspace that we did not write data we actually did - so we must
+        * track (written2) the most we ever wrote.
+        */
+
+       if ((iocb->ki_flags & IOCB_APPEND) ||
+           (iocb->ki_pos + iov_iter_count(iter) > i_size_read(&inode->v))) {
+               inode_lock(&inode->v);
+               inode_locked = true;
+       }
+
+       ret = generic_write_checks(iocb, iter);
+       if (ret <= 0)
+               goto unlock;
+
+       ret = file_remove_privs_flags(file, !inode_locked ? IOCB_NOWAIT : 0);
+       if (ret) {
+               if (!inode_locked) {
+                       inode_lock(&inode->v);
+                       inode_locked = true;
+                       ret = file_remove_privs_flags(file, 0);
+               }
+               if (ret)
+                       goto unlock;
+       }
+
+       ret = file_update_time(file);
+       if (ret)
+               goto unlock;
+
+       pos = iocb->ki_pos;
 
        bch2_pagecache_add_get(inode);
 
+       if (!inode_locked &&
+           (iocb->ki_pos + iov_iter_count(iter) > i_size_read(&inode->v)))
+               goto get_inode_lock;
+
        do {
                unsigned offset = pos & (PAGE_SIZE - 1);
                unsigned bytes = iov_iter_count(iter);
@@ -1004,12 +1072,17 @@ again:
                        }
                }
 
+               if (unlikely(bytes != iov_iter_count(iter) && !inode_locked))
+                       goto get_inode_lock;
+
                if (unlikely(fatal_signal_pending(current))) {
                        ret = -EINTR;
                        break;
                }
 
-               ret = __bch2_buffered_write(inode, mapping, iter, pos, bytes);
+               ret = __bch2_buffered_write(inode, mapping, iter, pos, bytes, inode_locked);
+               if (ret == -BCH_ERR_need_inode_lock)
+                       goto get_inode_lock;
                if (unlikely(ret < 0))
                        break;
 
@@ -1030,50 +1103,46 @@ again:
                }
                pos += ret;
                written += ret;
+               written2 = max(written, written2);
+
+               if (ret != bytes && !inode_locked)
+                       goto get_inode_lock;
                ret = 0;
 
                balance_dirty_pages_ratelimited(mapping);
-       } while (iov_iter_count(iter));
 
+               if (0) {
+get_inode_lock:
+                       bch2_pagecache_add_put(inode);
+                       inode_lock(&inode->v);
+                       inode_locked = true;
+                       bch2_pagecache_add_get(inode);
+
+                       iov_iter_revert(iter, written);
+                       pos -= written;
+                       written = 0;
+                       ret = 0;
+               }
+       } while (iov_iter_count(iter));
        bch2_pagecache_add_put(inode);
+unlock:
+       if (inode_locked)
+               inode_unlock(&inode->v);
+
+       iocb->ki_pos += written;
 
-       return written ? written : ret;
+       ret = max(written, written2) ?: ret;
+       if (ret > 0)
+               ret = generic_write_sync(iocb, ret);
+       return ret;
 }
 
-ssize_t bch2_write_iter(struct kiocb *iocb, struct iov_iter *from)
+ssize_t bch2_write_iter(struct kiocb *iocb, struct iov_iter *iter)
 {
-       struct file *file = iocb->ki_filp;
-       struct bch_inode_info *inode = file_bch_inode(file);
-       ssize_t ret;
-
-       if (iocb->ki_flags & IOCB_DIRECT) {
-               ret = bch2_direct_write(iocb, from);
-               goto out;
-       }
-
-       inode_lock(&inode->v);
-
-       ret = generic_write_checks(iocb, from);
-       if (ret <= 0)
-               goto unlock;
-
-       ret = file_remove_privs(file);
-       if (ret)
-               goto unlock;
-
-       ret = file_update_time(file);
-       if (ret)
-               goto unlock;
-
-       ret = bch2_buffered_write(iocb, from);
-       if (likely(ret > 0))
-               iocb->ki_pos += ret;
-unlock:
-       inode_unlock(&inode->v);
+       ssize_t ret = iocb->ki_flags & IOCB_DIRECT
+               ? bch2_direct_write(iocb, iter)
+               : bch2_buffered_write(iocb, iter);
 
-       if (ret > 0)
-               ret = generic_write_sync(iocb, ret);
-out:
        return bch2_err_class(ret);
 }
 
index 8cbaba6565b4493695d679fe41553c197468c752..828c3d7c8f1993129ae54db922f4bf9a649f6b6b 100644 (file)
@@ -51,13 +51,10 @@ enum bch_folio_sector_state {
 
 struct bch_folio_sector {
        /* Uncompressed, fully allocated replicas (or on disk reservation): */
-       unsigned                nr_replicas:4;
-
+       u8                      nr_replicas:4,
        /* Owns PAGE_SECTORS * replicas_reserved sized in memory reservation: */
-       unsigned                replicas_reserved:4;
-
-       /* i_sectors: */
-       enum bch_folio_sector_state state:8;
+                               replicas_reserved:4;
+       u8                      state;
 };
 
 struct bch_folio {
index 77ae65542db9166a4168a78a55064295bb1d9ebf..3f073845bbd77391306a55c6ac7a87771f7e5890 100644 (file)
@@ -176,45 +176,88 @@ static unsigned bch2_inode_hash(subvol_inum inum)
        return jhash_3words(inum.subvol, inum.inum >> 32, inum.inum, JHASH_INITVAL);
 }
 
-struct inode *bch2_vfs_inode_get(struct bch_fs *c, subvol_inum inum)
+static struct bch_inode_info *bch2_inode_insert(struct bch_fs *c, struct bch_inode_info *inode)
 {
-       struct bch_inode_unpacked inode_u;
-       struct bch_inode_info *inode;
-       struct btree_trans *trans;
-       struct bch_subvolume subvol;
-       int ret;
+       subvol_inum inum = inode_inum(inode);
+       struct bch_inode_info *old = to_bch_ei(inode_insert5(&inode->v,
+                                     bch2_inode_hash(inum),
+                                     bch2_iget5_test,
+                                     bch2_iget5_set,
+                                     &inum));
+       BUG_ON(!old);
 
-       inode = to_bch_ei(iget5_locked(c->vfs_sb,
-                                      bch2_inode_hash(inum),
-                                      bch2_iget5_test,
-                                      bch2_iget5_set,
-                                      &inum));
-       if (unlikely(!inode))
-               return ERR_PTR(-ENOMEM);
-       if (!(inode->v.i_state & I_NEW))
-               return &inode->v;
+       if (unlikely(old != inode)) {
+               discard_new_inode(&inode->v);
+               inode = old;
+       } else {
+               mutex_lock(&c->vfs_inodes_lock);
+               list_add(&inode->ei_vfs_inode_list, &c->vfs_inodes_list);
+               mutex_unlock(&c->vfs_inodes_lock);
+               /*
+                * we really don't want insert_inode_locked2() to be setting
+                * I_NEW...
+                */
+               unlock_new_inode(&inode->v);
+       }
 
-       trans = bch2_trans_get(c);
-       ret = lockrestart_do(trans,
-               bch2_subvolume_get(trans, inum.subvol, true, 0, &subvol) ?:
-               bch2_inode_find_by_inum_trans(trans, inum, &inode_u));
+       return inode;
+}
 
-       if (!ret)
-               bch2_vfs_inode_init(trans, inum, inode, &inode_u, &subvol);
-       bch2_trans_put(trans);
+#define memalloc_flags_do(_flags, _do)                                         \
+({                                                                             \
+       unsigned _saved_flags = memalloc_flags_save(_flags);                    \
+       typeof(_do) _ret = _do;                                                 \
+       memalloc_noreclaim_restore(_saved_flags);                               \
+       _ret;                                                                   \
+})
 
-       if (ret) {
-               iget_failed(&inode->v);
-               return ERR_PTR(bch2_err_class(ret));
+/*
+ * Allocate a new inode, dropping/retaking btree locks if necessary:
+ */
+static struct bch_inode_info *bch2_new_inode(struct btree_trans *trans)
+{
+       struct bch_fs *c = trans->c;
+
+       struct bch_inode_info *inode =
+               memalloc_flags_do(PF_MEMALLOC_NORECLAIM|PF_MEMALLOC_NOWARN,
+                                 to_bch_ei(new_inode(c->vfs_sb)));
+
+       if (unlikely(!inode)) {
+               int ret = drop_locks_do(trans, (inode = to_bch_ei(new_inode(c->vfs_sb))) ? 0 : -ENOMEM);
+               if (ret && inode)
+                       discard_new_inode(&inode->v);
+               if (ret)
+                       return ERR_PTR(ret);
        }
 
-       mutex_lock(&c->vfs_inodes_lock);
-       list_add(&inode->ei_vfs_inode_list, &c->vfs_inodes_list);
-       mutex_unlock(&c->vfs_inodes_lock);
+       return inode;
+}
 
-       unlock_new_inode(&inode->v);
+struct inode *bch2_vfs_inode_get(struct bch_fs *c, subvol_inum inum)
+{
+       struct bch_inode_info *inode =
+               to_bch_ei(ilookup5_nowait(c->vfs_sb,
+                                         bch2_inode_hash(inum),
+                                         bch2_iget5_test,
+                                         &inum));
+       if (inode)
+               return &inode->v;
 
-       return &inode->v;
+       struct btree_trans *trans = bch2_trans_get(c);
+
+       struct bch_inode_unpacked inode_u;
+       struct bch_subvolume subvol;
+       int ret = lockrestart_do(trans,
+               bch2_subvolume_get(trans, inum.subvol, true, 0, &subvol) ?:
+               bch2_inode_find_by_inum_trans(trans, inum, &inode_u)) ?:
+               PTR_ERR_OR_ZERO(inode = bch2_new_inode(trans));
+       if (!ret) {
+               bch2_vfs_inode_init(trans, inum, inode, &inode_u, &subvol);
+               inode = bch2_inode_insert(c, inode);
+       }
+       bch2_trans_put(trans);
+
+       return ret ? ERR_PTR(ret) : &inode->v;
 }
 
 struct bch_inode_info *
@@ -226,7 +269,7 @@ __bch2_create(struct mnt_idmap *idmap,
        struct bch_fs *c = dir->v.i_sb->s_fs_info;
        struct btree_trans *trans;
        struct bch_inode_unpacked dir_u;
-       struct bch_inode_info *inode, *old;
+       struct bch_inode_info *inode;
        struct bch_inode_unpacked inode_u;
        struct posix_acl *default_acl = NULL, *acl = NULL;
        subvol_inum inum;
@@ -293,7 +336,6 @@ err_before_quota:
                mutex_unlock(&dir->ei_update_lock);
        }
 
-       bch2_iget5_set(&inode->v, &inum);
        bch2_vfs_inode_init(trans, inum, inode, &inode_u, &subvol);
 
        set_cached_acl(&inode->v, ACL_TYPE_ACCESS, acl);
@@ -304,36 +346,7 @@ err_before_quota:
         * bch2_trans_exit() and dropping locks, else we could race with another
         * thread pulling the inode in and modifying it:
         */
-
-       inode->v.i_state |= I_CREATING;
-
-       old = to_bch_ei(inode_insert5(&inode->v,
-                                     bch2_inode_hash(inum),
-                                     bch2_iget5_test,
-                                     bch2_iget5_set,
-                                     &inum));
-       BUG_ON(!old);
-
-       if (unlikely(old != inode)) {
-               /*
-                * We raced, another process pulled the new inode into cache
-                * before us:
-                */
-               make_bad_inode(&inode->v);
-               iput(&inode->v);
-
-               inode = old;
-       } else {
-               mutex_lock(&c->vfs_inodes_lock);
-               list_add(&inode->ei_vfs_inode_list, &c->vfs_inodes_list);
-               mutex_unlock(&c->vfs_inodes_lock);
-               /*
-                * we really don't want insert_inode_locked2() to be setting
-                * I_NEW...
-                */
-               unlock_new_inode(&inode->v);
-       }
-
+       inode = bch2_inode_insert(c, inode);
        bch2_trans_put(trans);
 err:
        posix_acl_release(default_acl);
@@ -352,23 +365,78 @@ err_trans:
 
 /* methods */
 
+static struct bch_inode_info *bch2_lookup_trans(struct btree_trans *trans,
+                       subvol_inum dir, struct bch_hash_info *dir_hash_info,
+                       const struct qstr *name)
+{
+       struct bch_fs *c = trans->c;
+       struct btree_iter dirent_iter = {};
+       subvol_inum inum = {};
+
+       int ret = bch2_hash_lookup(trans, &dirent_iter, bch2_dirent_hash_desc,
+                                  dir_hash_info, dir, name, 0);
+       if (ret)
+               return ERR_PTR(ret);
+
+       struct bkey_s_c k = bch2_btree_iter_peek_slot(&dirent_iter);
+       ret = bkey_err(k);
+       if (ret)
+               goto err;
+
+       ret = bch2_dirent_read_target(trans, dir, bkey_s_c_to_dirent(k), &inum);
+       if (ret > 0)
+               ret = -ENOENT;
+       if (ret)
+               goto err;
+
+       struct bch_inode_info *inode =
+               to_bch_ei(ilookup5_nowait(c->vfs_sb,
+                                         bch2_inode_hash(inum),
+                                         bch2_iget5_test,
+                                         &inum));
+       if (inode)
+               goto out;
+
+       struct bch_subvolume subvol;
+       struct bch_inode_unpacked inode_u;
+       ret =   bch2_subvolume_get(trans, inum.subvol, true, 0, &subvol) ?:
+               bch2_inode_find_by_inum_nowarn_trans(trans, inum, &inode_u) ?:
+               PTR_ERR_OR_ZERO(inode = bch2_new_inode(trans));
+       if (bch2_err_matches(ret, ENOENT)) {
+               struct printbuf buf = PRINTBUF;
+
+               bch2_bkey_val_to_text(&buf, c, k);
+               bch_err(c, "%s points to missing inode", buf.buf);
+               printbuf_exit(&buf);
+       }
+       if (ret)
+               goto err;
+
+       bch2_vfs_inode_init(trans, inum, inode, &inode_u, &subvol);
+       inode = bch2_inode_insert(c, inode);
+out:
+       bch2_trans_iter_exit(trans, &dirent_iter);
+       return inode;
+err:
+       inode = ERR_PTR(ret);
+       goto out;
+}
+
 static struct dentry *bch2_lookup(struct inode *vdir, struct dentry *dentry,
                                  unsigned int flags)
 {
        struct bch_fs *c = vdir->i_sb->s_fs_info;
        struct bch_inode_info *dir = to_bch_ei(vdir);
        struct bch_hash_info hash = bch2_hash_info_init(c, &dir->ei_inode);
-       struct inode *vinode = NULL;
-       subvol_inum inum = { .subvol = 1 };
-       int ret;
 
-       ret = bch2_dirent_lookup(c, inode_inum(dir), &hash,
-                                &dentry->d_name, &inum);
-
-       if (!ret)
-               vinode = bch2_vfs_inode_get(c, inum);
+       struct bch_inode_info *inode;
+       bch2_trans_do(c, NULL, NULL, 0,
+               PTR_ERR_OR_ZERO(inode = bch2_lookup_trans(trans, inode_inum(dir),
+                                                         &hash, &dentry->d_name)));
+       if (IS_ERR(inode))
+               inode = NULL;
 
-       return d_splice_alias(vinode, dentry);
+       return d_splice_alias(&inode->v, dentry);
 }
 
 static int bch2_mknod(struct mnt_idmap *idmap,
@@ -1372,6 +1440,7 @@ static void bch2_vfs_inode_init(struct btree_trans *trans, subvol_inum inum,
                                struct bch_inode_unpacked *bi,
                                struct bch_subvolume *subvol)
 {
+       bch2_iget5_set(&inode->v, &inum);
        bch2_inode_update_after_write(trans, inode, bi, ~0);
 
        if (BCH_SUBVOLUME_SNAP(subvol))
@@ -1572,7 +1641,6 @@ static int bch2_statfs(struct dentry *dentry, struct kstatfs *buf)
         * number:
         */
        u64 avail_inodes = ((usage.capacity - usage.used) << 3);
-       u64 fsid;
 
        buf->f_type     = BCACHEFS_STATFS_MAGIC;
        buf->f_bsize    = sb->s_blocksize;
@@ -1583,10 +1651,7 @@ static int bch2_statfs(struct dentry *dentry, struct kstatfs *buf)
        buf->f_files    = usage.nr_inodes + avail_inodes;
        buf->f_ffree    = avail_inodes;
 
-       fsid = le64_to_cpup((void *) c->sb.user_uuid.b) ^
-              le64_to_cpup((void *) c->sb.user_uuid.b + sizeof(u64));
-       buf->f_fsid.val[0] = fsid & 0xFFFFFFFFUL;
-       buf->f_fsid.val[1] = (fsid >> 32) & 0xFFFFFFFFUL;
+       buf->f_fsid     = uuid_to_fsid(c->sb.user_uuid.b);
        buf->f_namelen  = BCH_NAME_MAX;
 
        return 0;
@@ -1805,8 +1870,10 @@ static struct dentry *bch2_mount(struct file_system_type *fs_type,
        opt_set(opts, read_only, (flags & SB_RDONLY) != 0);
 
        ret = bch2_parse_mount_opts(NULL, &opts, data);
-       if (ret)
+       if (ret) {
+               ret = bch2_err_class(ret);
                return ERR_PTR(ret);
+       }
 
        if (!dev_name || strlen(dev_name) == 0)
                return ERR_PTR(-EINVAL);
@@ -1882,6 +1949,7 @@ got_sb:
        sb->s_time_gran         = c->sb.nsec_per_time_unit;
        sb->s_time_min          = div_s64(S64_MIN, c->sb.time_units_per_sec) + 1;
        sb->s_time_max          = div_s64(S64_MAX, c->sb.time_units_per_sec);
+       sb->s_uuid              = c->sb.user_uuid;
        c->vfs_sb               = sb;
        strscpy(sb->s_id, c->name, sizeof(sb->s_id));
 
index 6a760777bafb06d08b449ee0db4308a77b54b11e..f48033be3f6b1fda3da6105b982403b607158c32 100644 (file)
@@ -100,8 +100,8 @@ err:
 }
 
 static int lookup_inode(struct btree_trans *trans, u64 inode_nr,
-                         struct bch_inode_unpacked *inode,
-                         u32 *snapshot)
+                       struct bch_inode_unpacked *inode,
+                       u32 *snapshot)
 {
        struct btree_iter iter;
        struct bkey_s_c k;
@@ -142,34 +142,6 @@ static int lookup_dirent_in_snapshot(struct btree_trans *trans,
        return 0;
 }
 
-static int __write_inode(struct btree_trans *trans,
-                        struct bch_inode_unpacked *inode,
-                        u32 snapshot)
-{
-       struct bkey_inode_buf *inode_p =
-               bch2_trans_kmalloc(trans, sizeof(*inode_p));
-
-       if (IS_ERR(inode_p))
-               return PTR_ERR(inode_p);
-
-       bch2_inode_pack(inode_p, inode);
-       inode_p->inode.k.p.snapshot = snapshot;
-
-       return bch2_btree_insert_nonextent(trans, BTREE_ID_inodes,
-                               &inode_p->inode.k_i,
-                               BTREE_UPDATE_INTERNAL_SNAPSHOT_NODE);
-}
-
-static int fsck_write_inode(struct btree_trans *trans,
-                           struct bch_inode_unpacked *inode,
-                           u32 snapshot)
-{
-       int ret = commit_do(trans, NULL, NULL, BCH_TRANS_COMMIT_no_enospc,
-                           __write_inode(trans, inode, snapshot));
-       bch_err_fn(trans->c, ret);
-       return ret;
-}
-
 static int __remove_dirent(struct btree_trans *trans, struct bpos pos)
 {
        struct bch_fs *c = trans->c;
@@ -280,7 +252,7 @@ create_lostfound:
                goto err;
 
        ret =   bch2_dirent_create_snapshot(trans,
-                               root_inode.bi_inum, snapshot, &root_hash_info,
+                               0, root_inode.bi_inum, snapshot, &root_hash_info,
                                mode_to_type(lostfound->bi_mode),
                                &lostfound_str,
                                lostfound->bi_inum,
@@ -303,30 +275,47 @@ static int reattach_inode(struct btree_trans *trans,
        char name_buf[20];
        struct qstr name;
        u64 dir_offset = 0;
+       u32 dirent_snapshot = inode_snapshot;
        int ret;
 
-       ret = lookup_lostfound(trans, inode_snapshot, &lostfound);
+       if (inode->bi_subvol) {
+               inode->bi_parent_subvol = BCACHEFS_ROOT_SUBVOL;
+
+               u64 root_inum;
+               ret = subvol_lookup(trans, inode->bi_parent_subvol,
+                                   &dirent_snapshot, &root_inum);
+               if (ret)
+                       return ret;
+
+               snprintf(name_buf, sizeof(name_buf), "subvol-%u", inode->bi_subvol);
+       } else {
+               snprintf(name_buf, sizeof(name_buf), "%llu", inode->bi_inum);
+       }
+
+       ret = lookup_lostfound(trans, dirent_snapshot, &lostfound);
        if (ret)
                return ret;
 
        if (S_ISDIR(inode->bi_mode)) {
                lostfound.bi_nlink++;
 
-               ret = __write_inode(trans, &lostfound, U32_MAX);
+               ret = __bch2_fsck_write_inode(trans, &lostfound, U32_MAX);
                if (ret)
                        return ret;
        }
 
        dir_hash = bch2_hash_info_init(trans->c, &lostfound);
 
-       snprintf(name_buf, sizeof(name_buf), "%llu", inode->bi_inum);
        name = (struct qstr) QSTR(name_buf);
 
        ret = bch2_dirent_create_snapshot(trans,
-                               lostfound.bi_inum, inode_snapshot,
+                               inode->bi_parent_subvol, lostfound.bi_inum,
+                               dirent_snapshot,
                                &dir_hash,
                                inode_d_type(inode),
-                               &name, inode->bi_inum, &dir_offset,
+                               &name,
+                               inode->bi_subvol ?: inode->bi_inum,
+                               &dir_offset,
                                BCH_HASH_SET_MUST_CREATE);
        if (ret)
                return ret;
@@ -334,7 +323,7 @@ static int reattach_inode(struct btree_trans *trans,
        inode->bi_dir           = lostfound.bi_inum;
        inode->bi_dir_offset    = dir_offset;
 
-       return __write_inode(trans, inode, inode_snapshot);
+       return __bch2_fsck_write_inode(trans, inode, inode_snapshot);
 }
 
 static int remove_backpointer(struct btree_trans *trans,
@@ -353,6 +342,27 @@ static int remove_backpointer(struct btree_trans *trans,
        return ret;
 }
 
+static int reattach_subvol(struct btree_trans *trans, struct bkey_s_c_subvolume s)
+{
+       struct bch_fs *c = trans->c;
+
+       struct bch_inode_unpacked inode;
+       int ret = bch2_inode_find_by_inum_trans(trans,
+                               (subvol_inum) { s.k->p.offset, le64_to_cpu(s.v->inode) },
+                               &inode);
+       if (ret)
+               return ret;
+
+       ret = remove_backpointer(trans, &inode);
+       bch_err_msg(c, ret, "removing dirent");
+       if (ret)
+               return ret;
+
+       ret = reattach_inode(trans, &inode, le32_to_cpu(s.v->snapshot));
+       bch_err_msg(c, ret, "reattaching inode %llu", inode.bi_inum);
+       return ret;
+}
+
 struct snapshots_seen_entry {
        u32                             id;
        u32                             equiv;
@@ -592,13 +602,12 @@ static int get_inodes_all_snapshots(struct btree_trans *trans,
 }
 
 static struct inode_walker_entry *
-lookup_inode_for_snapshot(struct bch_fs *c, struct inode_walker *w,
-                         u32 snapshot, bool is_whiteout)
+lookup_inode_for_snapshot(struct bch_fs *c, struct inode_walker *w, struct bkey_s_c k)
 {
-       struct inode_walker_entry *i;
-
-       snapshot = bch2_snapshot_equiv(c, snapshot);
+       bool is_whiteout = k.k->type == KEY_TYPE_whiteout;
+       u32 snapshot = bch2_snapshot_equiv(c, k.k->p.snapshot);
 
+       struct inode_walker_entry *i;
        __darray_for_each(w->inodes, i)
                if (bch2_snapshot_is_ancestor(c, snapshot, i->snapshot))
                        goto found;
@@ -609,20 +618,24 @@ found:
 
        if (snapshot != i->snapshot && !is_whiteout) {
                struct inode_walker_entry new = *i;
-               size_t pos;
-               int ret;
 
                new.snapshot = snapshot;
                new.count = 0;
 
-               bch_info(c, "have key for inode %llu:%u but have inode in ancestor snapshot %u",
-                        w->last_pos.inode, snapshot, i->snapshot);
+               struct printbuf buf = PRINTBUF;
+               bch2_bkey_val_to_text(&buf, c, k);
+
+               bch_info(c, "have key for inode %llu:%u but have inode in ancestor snapshot %u\n"
+                        "unexpected because we should always update the inode when we update a key in that inode\n"
+                        "%s",
+                        w->last_pos.inode, snapshot, i->snapshot, buf.buf);
+               printbuf_exit(&buf);
 
                while (i > w->inodes.data && i[-1].snapshot > snapshot)
                        --i;
 
-               pos = i - w->inodes.data;
-               ret = darray_insert_item(&w->inodes, pos, new);
+               size_t pos = i - w->inodes.data;
+               int ret = darray_insert_item(&w->inodes, pos, new);
                if (ret)
                        return ERR_PTR(ret);
 
@@ -633,21 +646,21 @@ found:
 }
 
 static struct inode_walker_entry *walk_inode(struct btree_trans *trans,
-                                            struct inode_walker *w, struct bpos pos,
-                                            bool is_whiteout)
+                                            struct inode_walker *w,
+                                            struct bkey_s_c k)
 {
-       if (w->last_pos.inode != pos.inode) {
-               int ret = get_inodes_all_snapshots(trans, w, pos.inode);
+       if (w->last_pos.inode != k.k->p.inode) {
+               int ret = get_inodes_all_snapshots(trans, w, k.k->p.inode);
                if (ret)
                        return ERR_PTR(ret);
-       } else if (bkey_cmp(w->last_pos, pos)) {
+       } else if (bkey_cmp(w->last_pos, k.k->p)) {
                darray_for_each(w->inodes, i)
                        i->seen_this_pos = false;
        }
 
-       w->last_pos = pos;
+       w->last_pos = k.k->p;
 
-       return lookup_inode_for_snapshot(trans->c, w, pos.snapshot, is_whiteout);
+       return lookup_inode_for_snapshot(trans->c, w, k);
 }
 
 static int __get_visible_inodes(struct btree_trans *trans,
@@ -722,7 +735,7 @@ static int hash_redo_key(struct btree_trans *trans,
        delete->k.p = k_iter->pos;
        return  bch2_btree_iter_traverse(k_iter) ?:
                bch2_trans_update(trans, k_iter, delete, 0) ?:
-               bch2_hash_set_snapshot(trans, desc, hash_info,
+               bch2_hash_set_in_snapshot(trans, desc, hash_info,
                                       (subvol_inum) { 0, k.k->p.inode },
                                       k.k->p.snapshot, tmp,
                                       BCH_HASH_SET_MUST_CREATE,
@@ -795,16 +808,93 @@ fsck_err:
        goto out;
 }
 
+static struct bkey_s_c_dirent dirent_get_by_pos(struct btree_trans *trans,
+                                               struct btree_iter *iter,
+                                               struct bpos pos)
+{
+       return bch2_bkey_get_iter_typed(trans, iter, BTREE_ID_dirents, pos, 0, dirent);
+}
+
+static struct bkey_s_c_dirent inode_get_dirent(struct btree_trans *trans,
+                                              struct btree_iter *iter,
+                                              struct bch_inode_unpacked *inode,
+                                              u32 *snapshot)
+{
+       if (inode->bi_subvol) {
+               u64 inum;
+               int ret = subvol_lookup(trans, inode->bi_parent_subvol, snapshot, &inum);
+               if (ret)
+                       return ((struct bkey_s_c_dirent) { .k = ERR_PTR(ret) });
+       }
+
+       return dirent_get_by_pos(trans, iter, SPOS(inode->bi_dir, inode->bi_dir_offset, *snapshot));
+}
+
+static bool inode_points_to_dirent(struct bch_inode_unpacked *inode,
+                                  struct bkey_s_c_dirent d)
+{
+       return  inode->bi_dir           == d.k->p.inode &&
+               inode->bi_dir_offset    == d.k->p.offset;
+}
+
+static bool dirent_points_to_inode(struct bkey_s_c_dirent d,
+                                  struct bch_inode_unpacked *inode)
+{
+       return d.v->d_type == DT_SUBVOL
+               ? le32_to_cpu(d.v->d_child_subvol)      == inode->bi_subvol
+               : le64_to_cpu(d.v->d_inum)              == inode->bi_inum;
+}
+
 static int check_inode_deleted_list(struct btree_trans *trans, struct bpos p)
 {
        struct btree_iter iter;
        struct bkey_s_c k = bch2_bkey_get_iter(trans, &iter, BTREE_ID_deleted_inodes, p, 0);
-       int ret = bkey_err(k);
-       if (ret)
+       int ret = bkey_err(k) ?: k.k->type == KEY_TYPE_set;
+       bch2_trans_iter_exit(trans, &iter);
+       return ret;
+}
+
+static int check_inode_dirent_inode(struct btree_trans *trans, struct bkey_s_c inode_k,
+                                   struct bch_inode_unpacked *inode,
+                                   u32 inode_snapshot, bool *write_inode)
+{
+       struct bch_fs *c = trans->c;
+       struct printbuf buf = PRINTBUF;
+
+       struct btree_iter dirent_iter = {};
+       struct bkey_s_c_dirent d = inode_get_dirent(trans, &dirent_iter, inode, &inode_snapshot);
+       int ret = bkey_err(d);
+       if (ret && !bch2_err_matches(ret, ENOENT))
                return ret;
 
-       bch2_trans_iter_exit(trans, &iter);
-       return k.k->type == KEY_TYPE_set;
+       if (fsck_err_on(ret,
+                       c, inode_points_to_missing_dirent,
+                       "inode points to missing dirent\n%s",
+                       (bch2_bkey_val_to_text(&buf, c, inode_k), buf.buf)) ||
+           fsck_err_on(!ret && !dirent_points_to_inode(d, inode),
+                       c, inode_points_to_wrong_dirent,
+                       "inode points to dirent that does not point back:\n%s",
+                       (bch2_bkey_val_to_text(&buf, c, inode_k),
+                        prt_newline(&buf),
+                        bch2_bkey_val_to_text(&buf, c, d.s_c), buf.buf))) {
+               /*
+                * We just clear the backpointer fields for now. If we find a
+                * dirent that points to this inode in check_dirents(), we'll
+                * update it then; then when we get to check_path() if the
+                * backpointer is still 0 we'll reattach it.
+                */
+               inode->bi_dir = 0;
+               inode->bi_dir_offset = 0;
+               inode->bi_flags &= ~BCH_INODE_backptr_untrusted;
+               *write_inode = true;
+       }
+
+       ret = 0;
+fsck_err:
+       bch2_trans_iter_exit(trans, &dirent_iter);
+       printbuf_exit(&buf);
+       bch_err_fn(c, ret);
+       return ret;
 }
 
 static int check_inode(struct btree_trans *trans,
@@ -861,7 +951,8 @@ static int check_inode(struct btree_trans *trans,
 
                u.bi_flags &= ~BCH_INODE_i_size_dirty|BCH_INODE_unlinked;
 
-               ret = __write_inode(trans, &u, iter->pos.snapshot);
+               ret = __bch2_fsck_write_inode(trans, &u, iter->pos.snapshot);
+
                bch_err_msg(c, ret, "in fsck updating inode");
                if (ret)
                        return ret;
@@ -876,7 +967,7 @@ static int check_inode(struct btree_trans *trans,
                if (ret < 0)
                        return ret;
 
-               fsck_err_on(ret, c, unlinked_inode_not_on_deleted_list,
+               fsck_err_on(!ret, c, unlinked_inode_not_on_deleted_list,
                            "inode %llu:%u unlinked, but not on deleted list",
                            u.bi_inum, k.k->p.snapshot);
                ret = 0;
@@ -950,8 +1041,49 @@ static int check_inode(struct btree_trans *trans,
                do_update = true;
        }
 
+       if (u.bi_dir || u.bi_dir_offset) {
+               ret = check_inode_dirent_inode(trans, k, &u, k.k->p.snapshot, &do_update);
+               if (ret)
+                       goto err;
+       }
+
+       if (fsck_err_on(u.bi_parent_subvol &&
+                       (u.bi_subvol == 0 ||
+                        u.bi_subvol == BCACHEFS_ROOT_SUBVOL),
+                       c, inode_bi_parent_nonzero,
+                       "inode %llu:%u has subvol %u but nonzero parent subvol %u",
+                       u.bi_inum, k.k->p.snapshot, u.bi_subvol, u.bi_parent_subvol)) {
+               u.bi_parent_subvol = 0;
+               do_update = true;
+       }
+
+       if (u.bi_subvol) {
+               struct bch_subvolume s;
+
+               ret = bch2_subvolume_get(trans, u.bi_subvol, false, 0, &s);
+               if (ret && !bch2_err_matches(ret, ENOENT))
+                       goto err;
+
+               if (fsck_err_on(ret,
+                               c, inode_bi_subvol_missing,
+                               "inode %llu:%u bi_subvol points to missing subvolume %u",
+                               u.bi_inum, k.k->p.snapshot, u.bi_subvol) ||
+                   fsck_err_on(le64_to_cpu(s.inode) != u.bi_inum ||
+                               !bch2_snapshot_is_ancestor(c, le32_to_cpu(s.snapshot),
+                                                          k.k->p.snapshot),
+                               c, inode_bi_subvol_wrong,
+                               "inode %llu:%u points to subvol %u, but subvol points to %llu:%u",
+                               u.bi_inum, k.k->p.snapshot, u.bi_subvol,
+                               le64_to_cpu(s.inode),
+                               le32_to_cpu(s.snapshot))) {
+                       u.bi_subvol = 0;
+                       u.bi_parent_subvol = 0;
+                       do_update = true;
+               }
+       }
+
        if (do_update) {
-               ret = __write_inode(trans, &u, iter->pos.snapshot);
+               ret = __bch2_fsck_write_inode(trans, &u, iter->pos.snapshot);
                bch_err_msg(c, ret, "in fsck updating inode");
                if (ret)
                        return ret;
@@ -982,28 +1114,6 @@ int bch2_check_inodes(struct bch_fs *c)
        return ret;
 }
 
-static struct bkey_s_c_dirent dirent_get_by_pos(struct btree_trans *trans,
-                                               struct btree_iter *iter,
-                                               struct bpos pos)
-{
-       return bch2_bkey_get_iter_typed(trans, iter, BTREE_ID_dirents, pos, 0, dirent);
-}
-
-static bool inode_points_to_dirent(struct bch_inode_unpacked *inode,
-                                  struct bkey_s_c_dirent d)
-{
-       return  inode->bi_dir           == d.k->p.inode &&
-               inode->bi_dir_offset    == d.k->p.offset;
-}
-
-static bool dirent_points_to_inode(struct bkey_s_c_dirent d,
-                                  struct bch_inode_unpacked *inode)
-{
-       return d.v->d_type == DT_SUBVOL
-               ? le32_to_cpu(d.v->d_child_subvol)      == inode->bi_subvol
-               : le64_to_cpu(d.v->d_inum)              == inode->bi_inum;
-}
-
 static int check_i_sectors(struct btree_trans *trans, struct inode_walker *w)
 {
        struct bch_fs *c = trans->c;
@@ -1032,7 +1142,7 @@ static int check_i_sectors(struct btree_trans *trans, struct inode_walker *w)
                                w->last_pos.inode, i->snapshot,
                                i->inode.bi_sectors, i->count)) {
                        i->inode.bi_sectors = i->count;
-                       ret = fsck_write_inode(trans, &i->inode, i->snapshot);
+                       ret = bch2_fsck_write_inode(trans, &i->inode, i->snapshot);
                        if (ret)
                                break;
                }
@@ -1312,7 +1422,7 @@ static int check_extent(struct btree_trans *trans, struct btree_iter *iter,
                        goto err;
        }
 
-       i = walk_inode(trans, inode, equiv, k.k->type == KEY_TYPE_whiteout);
+       i = walk_inode(trans, inode, k);
        ret = PTR_ERR_OR_ZERO(i);
        if (ret)
                goto err;
@@ -1481,7 +1591,7 @@ static int check_subdir_count(struct btree_trans *trans, struct inode_walker *w)
                                "directory %llu:%u with wrong i_nlink: got %u, should be %llu",
                                w->last_pos.inode, i->snapshot, i->inode.bi_nlink, i->count)) {
                        i->inode.bi_nlink = i->count;
-                       ret = fsck_write_inode(trans, &i->inode, i->snapshot);
+                       ret = bch2_fsck_write_inode(trans, &i->inode, i->snapshot);
                        if (ret)
                                break;
                }
@@ -1491,86 +1601,106 @@ fsck_err:
        return ret ?: trans_was_restarted(trans, restart_count);
 }
 
-static int check_dirent_target(struct btree_trans *trans,
-                              struct btree_iter *iter,
-                              struct bkey_s_c_dirent d,
-                              struct bch_inode_unpacked *target,
-                              u32 target_snapshot)
+static int check_dirent_inode_dirent(struct btree_trans *trans,
+                                  struct btree_iter *iter,
+                                  struct bkey_s_c_dirent d,
+                                  struct bch_inode_unpacked *target,
+                                  u32 target_snapshot)
 {
        struct bch_fs *c = trans->c;
-       struct bkey_i_dirent *n;
        struct printbuf buf = PRINTBUF;
-       struct btree_iter bp_iter = { NULL };
        int ret = 0;
 
+       if (inode_points_to_dirent(target, d))
+               return 0;
+
        if (!target->bi_dir &&
            !target->bi_dir_offset) {
                target->bi_dir          = d.k->p.inode;
                target->bi_dir_offset   = d.k->p.offset;
-
-               ret = __write_inode(trans, target, target_snapshot);
-               if (ret)
-                       goto err;
+               return __bch2_fsck_write_inode(trans, target, target_snapshot);
        }
 
-       if (!inode_points_to_dirent(target, d)) {
-               struct bkey_s_c_dirent bp_dirent = dirent_get_by_pos(trans, &bp_iter,
-                                     SPOS(target->bi_dir, target->bi_dir_offset, target_snapshot));
-               ret = bkey_err(bp_dirent);
-               if (ret && !bch2_err_matches(ret, ENOENT))
-                       goto err;
+       struct btree_iter bp_iter = { NULL };
+       struct bkey_s_c_dirent bp_dirent = dirent_get_by_pos(trans, &bp_iter,
+                             SPOS(target->bi_dir, target->bi_dir_offset, target_snapshot));
+       ret = bkey_err(bp_dirent);
+       if (ret && !bch2_err_matches(ret, ENOENT))
+               goto err;
 
-               bool backpointer_exists = !ret;
-               ret = 0;
+       bool backpointer_exists = !ret;
+       ret = 0;
+
+       if (fsck_err_on(!backpointer_exists,
+                       c, inode_wrong_backpointer,
+                       "inode %llu:%u has wrong backpointer:\n"
+                       "got       %llu:%llu\n"
+                       "should be %llu:%llu",
+                       target->bi_inum, target_snapshot,
+                       target->bi_dir,
+                       target->bi_dir_offset,
+                       d.k->p.inode,
+                       d.k->p.offset)) {
+               target->bi_dir          = d.k->p.inode;
+               target->bi_dir_offset   = d.k->p.offset;
+               ret = __bch2_fsck_write_inode(trans, target, target_snapshot);
+               goto out;
+       }
 
-               bch2_bkey_val_to_text(&buf, c, d.s_c);
-               prt_newline(&buf);
-               if (backpointer_exists)
-                       bch2_bkey_val_to_text(&buf, c, bp_dirent.s_c);
+       bch2_bkey_val_to_text(&buf, c, d.s_c);
+       prt_newline(&buf);
+       if (backpointer_exists)
+               bch2_bkey_val_to_text(&buf, c, bp_dirent.s_c);
+
+       if (fsck_err_on(backpointer_exists &&
+                       (S_ISDIR(target->bi_mode) ||
+                        target->bi_subvol),
+                       c, inode_dir_multiple_links,
+                       "%s %llu:%u with multiple links\n%s",
+                       S_ISDIR(target->bi_mode) ? "directory" : "subvolume",
+                       target->bi_inum, target_snapshot, buf.buf)) {
+               ret = __remove_dirent(trans, d.k->p);
+               goto out;
+       }
 
-               if (fsck_err_on(S_ISDIR(target->bi_mode) && backpointer_exists,
-                               c, inode_dir_multiple_links,
-                               "directory %llu:%u with multiple links\n%s",
-                               target->bi_inum, target_snapshot, buf.buf)) {
-                       ret = __remove_dirent(trans, d.k->p);
-                       goto out;
-               }
+       /*
+        * hardlinked file with nlink 0:
+        * We're just adjusting nlink here so check_nlinks() will pick
+        * it up, it ignores inodes with nlink 0
+        */
+       if (fsck_err_on(backpointer_exists && !target->bi_nlink,
+                       c, inode_multiple_links_but_nlink_0,
+                       "inode %llu:%u type %s has multiple links but i_nlink 0\n%s",
+                       target->bi_inum, target_snapshot, bch2_d_types[d.v->d_type], buf.buf)) {
+               target->bi_nlink++;
+               target->bi_flags &= ~BCH_INODE_unlinked;
+               ret = __bch2_fsck_write_inode(trans, target, target_snapshot);
+               if (ret)
+                       goto err;
+       }
+out:
+err:
+fsck_err:
+       bch2_trans_iter_exit(trans, &bp_iter);
+       printbuf_exit(&buf);
+       bch_err_fn(c, ret);
+       return ret;
+}
 
-               /*
-                * hardlinked file with nlink 0:
-                * We're just adjusting nlink here so check_nlinks() will pick
-                * it up, it ignores inodes with nlink 0
-                */
-               if (fsck_err_on(backpointer_exists && !target->bi_nlink,
-                               c, inode_multiple_links_but_nlink_0,
-                               "inode %llu:%u type %s has multiple links but i_nlink 0\n%s",
-                               target->bi_inum, target_snapshot, bch2_d_types[d.v->d_type], buf.buf)) {
-                       target->bi_nlink++;
-                       target->bi_flags &= ~BCH_INODE_unlinked;
-
-                       ret = __write_inode(trans, target, target_snapshot);
-                       if (ret)
-                               goto err;
-               }
+static int check_dirent_target(struct btree_trans *trans,
+                              struct btree_iter *iter,
+                              struct bkey_s_c_dirent d,
+                              struct bch_inode_unpacked *target,
+                              u32 target_snapshot)
+{
+       struct bch_fs *c = trans->c;
+       struct bkey_i_dirent *n;
+       struct printbuf buf = PRINTBUF;
+       int ret = 0;
 
-               if (fsck_err_on(!backpointer_exists,
-                               c, inode_wrong_backpointer,
-                               "inode %llu:%u has wrong backpointer:\n"
-                               "got       %llu:%llu\n"
-                               "should be %llu:%llu",
-                               target->bi_inum, target_snapshot,
-                               target->bi_dir,
-                               target->bi_dir_offset,
-                               d.k->p.inode,
-                               d.k->p.offset)) {
-                       target->bi_dir          = d.k->p.inode;
-                       target->bi_dir_offset   = d.k->p.offset;
-
-                       ret = __write_inode(trans, target, target_snapshot);
-                       if (ret)
-                               goto err;
-               }
-       }
+       ret = check_dirent_inode_dirent(trans, iter, d, target, target_snapshot);
+       if (ret)
+               goto err;
 
        if (fsck_err_on(d.v->d_type != inode_d_type(target),
                        c, dirent_d_type_wrong,
@@ -1586,6 +1716,12 @@ static int check_dirent_target(struct btree_trans *trans,
 
                bkey_reassemble(&n->k_i, d.s_c);
                n->v.d_type = inode_d_type(target);
+               if (n->v.d_type == DT_SUBVOL) {
+                       n->v.d_parent_subvol = cpu_to_le32(target->bi_parent_subvol);
+                       n->v.d_child_subvol = cpu_to_le32(target->bi_subvol);
+               } else {
+                       n->v.d_inum = cpu_to_le64(target->bi_inum);
+               }
 
                ret = bch2_trans_update(trans, iter, &n->k_i, 0);
                if (ret)
@@ -1593,33 +1729,134 @@ static int check_dirent_target(struct btree_trans *trans,
 
                d = dirent_i_to_s_c(n);
        }
+err:
+fsck_err:
+       printbuf_exit(&buf);
+       bch_err_fn(c, ret);
+       return ret;
+}
 
-       if (fsck_err_on(d.v->d_type == DT_SUBVOL &&
-                       target->bi_parent_subvol != le32_to_cpu(d.v->d_parent_subvol),
-                       c, dirent_d_parent_subvol_wrong,
-                       "dirent has wrong d_parent_subvol field: got %u, should be %u",
-                       le32_to_cpu(d.v->d_parent_subvol),
-                       target->bi_parent_subvol)) {
-               n = bch2_trans_kmalloc(trans, bkey_bytes(d.k));
-               ret = PTR_ERR_OR_ZERO(n);
+/* find a subvolume that's a descendent of @snapshot: */
+static int find_snapshot_subvol(struct btree_trans *trans, u32 snapshot, u32 *subvolid)
+{
+       struct btree_iter iter;
+       struct bkey_s_c k;
+       int ret;
+
+       for_each_btree_key_norestart(trans, iter, BTREE_ID_subvolumes, POS_MIN, 0, k, ret) {
+               if (k.k->type != KEY_TYPE_subvolume)
+                       continue;
+
+               struct bkey_s_c_subvolume s = bkey_s_c_to_subvolume(k);
+               if (bch2_snapshot_is_ancestor(trans->c, le32_to_cpu(s.v->snapshot), snapshot)) {
+                       bch2_trans_iter_exit(trans, &iter);
+                       *subvolid = k.k->p.offset;
+                       goto found;
+               }
+       }
+       if (!ret)
+               ret = -ENOENT;
+found:
+       bch2_trans_iter_exit(trans, &iter);
+       return ret;
+}
+
+static int check_dirent_to_subvol(struct btree_trans *trans, struct btree_iter *iter,
+                                 struct bkey_s_c_dirent d)
+{
+       struct bch_fs *c = trans->c;
+       struct btree_iter subvol_iter = {};
+       struct bch_inode_unpacked subvol_root;
+       u32 parent_subvol = le32_to_cpu(d.v->d_parent_subvol);
+       u32 target_subvol = le32_to_cpu(d.v->d_child_subvol);
+       u32 parent_snapshot;
+       u64 parent_inum;
+       struct printbuf buf = PRINTBUF;
+       int ret = 0;
+
+       ret = subvol_lookup(trans, parent_subvol, &parent_snapshot, &parent_inum);
+       if (ret && !bch2_err_matches(ret, ENOENT))
+               return ret;
+
+       if (fsck_err_on(ret, c, dirent_to_missing_parent_subvol,
+                       "dirent parent_subvol points to missing subvolume\n%s",
+                       (bch2_bkey_val_to_text(&buf, c, d.s_c), buf.buf)) ||
+           fsck_err_on(!ret && !bch2_snapshot_is_ancestor(c, parent_snapshot, d.k->p.snapshot),
+                       c, dirent_not_visible_in_parent_subvol,
+                       "dirent not visible in parent_subvol (not an ancestor of subvol snap %u)\n%s",
+                       parent_snapshot,
+                       (bch2_bkey_val_to_text(&buf, c, d.s_c), buf.buf))) {
+               u32 new_parent_subvol;
+               ret = find_snapshot_subvol(trans, d.k->p.snapshot, &new_parent_subvol);
                if (ret)
                        goto err;
 
-               bkey_reassemble(&n->k_i, d.s_c);
-               n->v.d_parent_subvol = cpu_to_le32(target->bi_parent_subvol);
+               struct bkey_i_dirent *new_dirent = bch2_bkey_make_mut_typed(trans, iter, &d.s_c, 0, dirent);
+               ret = PTR_ERR_OR_ZERO(new_dirent);
+               if (ret)
+                       goto err;
 
-               ret = bch2_trans_update(trans, iter, &n->k_i, 0);
+               new_dirent->v.d_parent_subvol = cpu_to_le32(new_parent_subvol);
+       }
+
+       struct bkey_s_c_subvolume s =
+               bch2_bkey_get_iter_typed(trans, &subvol_iter,
+                                        BTREE_ID_subvolumes, POS(0, target_subvol),
+                                        0, subvolume);
+       ret = bkey_err(s.s_c);
+       if (ret && !bch2_err_matches(ret, ENOENT))
+               return ret;
+
+       if (ret) {
+               if (fsck_err(c, dirent_to_missing_subvol,
+                            "dirent points to missing subvolume\n%s",
+                            (bch2_bkey_val_to_text(&buf, c, d.s_c), buf.buf)))
+                       return __remove_dirent(trans, d.k->p);
+               ret = 0;
+               goto out;
+       }
+
+       if (fsck_err_on(le32_to_cpu(s.v->fs_path_parent) != parent_subvol,
+                       c, subvol_fs_path_parent_wrong,
+                       "subvol with wrong fs_path_parent, should be be %u\n%s",
+                       parent_subvol,
+                       (bch2_bkey_val_to_text(&buf, c, s.s_c), buf.buf))) {
+               struct bkey_i_subvolume *n =
+                       bch2_bkey_make_mut_typed(trans, &subvol_iter, &s.s_c, 0, subvolume);
+               ret = PTR_ERR_OR_ZERO(n);
                if (ret)
                        goto err;
 
-               d = dirent_i_to_s_c(n);
+               n->v.fs_path_parent = cpu_to_le32(parent_subvol);
        }
+
+       u64 target_inum = le64_to_cpu(s.v->inode);
+       u32 target_snapshot = le32_to_cpu(s.v->snapshot);
+
+       ret = lookup_inode(trans, target_inum, &subvol_root, &target_snapshot);
+       if (ret && !bch2_err_matches(ret, ENOENT))
+               return ret;
+
+       if (fsck_err_on(parent_subvol != subvol_root.bi_parent_subvol,
+                       c, inode_bi_parent_wrong,
+                       "subvol root %llu has wrong bi_parent_subvol: got %u, should be %u",
+                       target_inum,
+                       subvol_root.bi_parent_subvol, parent_subvol)) {
+               subvol_root.bi_parent_subvol = parent_subvol;
+               ret = __bch2_fsck_write_inode(trans, &subvol_root, target_snapshot);
+               if (ret)
+                       return ret;
+       }
+
+       ret = check_dirent_target(trans, iter, d, &subvol_root,
+                                 target_snapshot);
+       if (ret)
+               return ret;
 out:
 err:
 fsck_err:
-       bch2_trans_iter_exit(trans, &bp_iter);
+       bch2_trans_iter_exit(trans, &subvol_iter);
        printbuf_exit(&buf);
-       bch_err_fn(c, ret);
        return ret;
 }
 
@@ -1661,7 +1898,7 @@ static int check_dirent(struct btree_trans *trans, struct btree_iter *iter,
 
        BUG_ON(!btree_iter_path(trans, iter)->should_be_locked);
 
-       i = walk_inode(trans, dir, equiv, k.k->type == KEY_TYPE_whiteout);
+       i = walk_inode(trans, dir, k);
        ret = PTR_ERR_OR_ZERO(i);
        if (ret < 0)
                goto err;
@@ -1707,50 +1944,7 @@ static int check_dirent(struct btree_trans *trans, struct btree_iter *iter,
        d = bkey_s_c_to_dirent(k);
 
        if (d.v->d_type == DT_SUBVOL) {
-               struct bch_inode_unpacked subvol_root;
-               u32 target_subvol = le32_to_cpu(d.v->d_child_subvol);
-               u32 target_snapshot;
-               u64 target_inum;
-
-               ret = subvol_lookup(trans, target_subvol,
-                                     &target_snapshot, &target_inum);
-               if (ret && !bch2_err_matches(ret, ENOENT))
-                       goto err;
-
-               if (fsck_err_on(ret, c, dirent_to_missing_subvol,
-                               "dirent points to missing subvolume %u",
-                               le32_to_cpu(d.v->d_child_subvol))) {
-                       ret = __remove_dirent(trans, d.k->p);
-                       goto err;
-               }
-
-               ret = lookup_inode(trans, target_inum,
-                                  &subvol_root, &target_snapshot);
-               if (ret && !bch2_err_matches(ret, ENOENT))
-                       goto err;
-
-               if (fsck_err_on(ret, c, subvol_to_missing_root,
-                               "subvolume %u points to missing subvolume root %llu",
-                               target_subvol,
-                               target_inum)) {
-                       bch_err(c, "repair not implemented yet");
-                       ret = -EINVAL;
-                       goto err;
-               }
-
-               if (fsck_err_on(subvol_root.bi_subvol != target_subvol,
-                               c, subvol_root_wrong_bi_subvol,
-                               "subvol root %llu has wrong bi_subvol field: got %u, should be %u",
-                               target_inum,
-                               subvol_root.bi_subvol, target_subvol)) {
-                       subvol_root.bi_subvol = target_subvol;
-                       ret = __write_inode(trans, &subvol_root, target_snapshot);
-                       if (ret)
-                               goto err;
-               }
-
-               ret = check_dirent_target(trans, iter, d, &subvol_root,
-                                         target_snapshot);
+               ret = check_dirent_to_subvol(trans, iter, d);
                if (ret)
                        goto err;
        } else {
@@ -1776,12 +1970,11 @@ static int check_dirent(struct btree_trans *trans, struct btree_iter *iter,
                        if (ret)
                                goto err;
                }
-       }
-
-       if (d.v->d_type == DT_DIR)
-               for_each_visible_inode(c, s, dir, equiv.snapshot, i)
-                       i->count++;
 
+               if (d.v->d_type == DT_DIR)
+                       for_each_visible_inode(c, s, dir, equiv.snapshot, i)
+                               i->count++;
+       }
 out:
 err:
 fsck_err:
@@ -1832,7 +2025,7 @@ static int check_xattr(struct btree_trans *trans, struct btree_iter *iter,
        if (ret)
                return ret;
 
-       i = walk_inode(trans, inode, k.k->p, k.k->type == KEY_TYPE_whiteout);
+       i = walk_inode(trans, inode, k);
        ret = PTR_ERR_OR_ZERO(i);
        if (ret)
                return ret;
@@ -1919,7 +2112,7 @@ static int check_root_trans(struct btree_trans *trans)
                                0, NULL);
                root_inode.bi_inum = inum;
 
-               ret = __write_inode(trans, &root_inode, snapshot);
+               ret = __bch2_fsck_write_inode(trans, &root_inode, snapshot);
                bch_err_msg(c, ret, "writing root inode");
        }
 err:
@@ -1936,6 +2129,107 @@ int bch2_check_root(struct bch_fs *c)
        return ret;
 }
 
+typedef DARRAY(u32) darray_u32;
+
+static bool darray_u32_has(darray_u32 *d, u32 v)
+{
+       darray_for_each(*d, i)
+               if (*i == v)
+                       return true;
+       return false;
+}
+
+/*
+ * We've checked that inode backpointers point to valid dirents; here, it's
+ * sufficient to check that the subvolume root has a dirent:
+ */
+static int subvol_has_dirent(struct btree_trans *trans, struct bkey_s_c_subvolume s)
+{
+       struct bch_inode_unpacked inode;
+       int ret = bch2_inode_find_by_inum_trans(trans,
+                               (subvol_inum) { s.k->p.offset, le64_to_cpu(s.v->inode) },
+                               &inode);
+       if (ret)
+               return ret;
+
+       return inode.bi_dir != 0;
+}
+
+static int check_subvol_path(struct btree_trans *trans, struct btree_iter *iter, struct bkey_s_c k)
+{
+       struct bch_fs *c = trans->c;
+       struct btree_iter parent_iter = {};
+       darray_u32 subvol_path = {};
+       struct printbuf buf = PRINTBUF;
+       int ret = 0;
+
+       if (k.k->type != KEY_TYPE_subvolume)
+               return 0;
+
+       while (k.k->p.offset != BCACHEFS_ROOT_SUBVOL) {
+               ret = darray_push(&subvol_path, k.k->p.offset);
+               if (ret)
+                       goto err;
+
+               struct bkey_s_c_subvolume s = bkey_s_c_to_subvolume(k);
+
+               ret = subvol_has_dirent(trans, s);
+               if (ret < 0)
+                       break;
+
+               if (fsck_err_on(!ret,
+                               c, subvol_unreachable,
+                               "unreachable subvolume %s",
+                               (bch2_bkey_val_to_text(&buf, c, s.s_c),
+                                buf.buf))) {
+                       ret = reattach_subvol(trans, s);
+                       break;
+               }
+
+               u32 parent = le32_to_cpu(s.v->fs_path_parent);
+
+               if (darray_u32_has(&subvol_path, parent)) {
+                       if (fsck_err(c, subvol_loop, "subvolume loop"))
+                               ret = reattach_subvol(trans, s);
+                       break;
+               }
+
+               bch2_trans_iter_exit(trans, &parent_iter);
+               bch2_trans_iter_init(trans, &parent_iter,
+                                    BTREE_ID_subvolumes, POS(0, parent), 0);
+               k = bch2_btree_iter_peek_slot(&parent_iter);
+               ret = bkey_err(k);
+               if (ret)
+                       goto err;
+
+               if (fsck_err_on(k.k->type != KEY_TYPE_subvolume,
+                               c, subvol_unreachable,
+                               "unreachable subvolume %s",
+                               (bch2_bkey_val_to_text(&buf, c, s.s_c),
+                                buf.buf))) {
+                       ret = reattach_subvol(trans, s);
+                       break;
+               }
+       }
+fsck_err:
+err:
+       printbuf_exit(&buf);
+       darray_exit(&subvol_path);
+       bch2_trans_iter_exit(trans, &parent_iter);
+       return ret;
+}
+
+int bch2_check_subvolume_structure(struct bch_fs *c)
+{
+       int ret = bch2_trans_run(c,
+               for_each_btree_key_commit(trans, iter,
+                               BTREE_ID_subvolumes, POS_MIN, BTREE_ITER_PREFETCH, k,
+                               NULL, NULL, BCH_TRANS_COMMIT_no_enospc,
+                       check_subvol_path(trans, &iter, k)));
+       bch_err_fn(c, ret);
+       return ret;
+}
+
 struct pathbuf_entry {
        u64     inum;
        u32     snapshot;
@@ -1952,89 +2246,71 @@ static bool path_is_dup(pathbuf *p, u64 inum, u32 snapshot)
        return false;
 }
 
-static int path_down(struct bch_fs *c, pathbuf *p,
-                    u64 inum, u32 snapshot)
-{
-       int ret = darray_push(p, ((struct pathbuf_entry) {
-               .inum           = inum,
-               .snapshot       = snapshot,
-       }));
-
-       if (ret)
-               bch_err(c, "fsck: error allocating memory for pathbuf, size %zu",
-                       p->size);
-       return ret;
-}
-
 /*
- * Check that a given inode is reachable from the root:
+ * Check that a given inode is reachable from its subvolume root - we already
+ * verified subvolume connectivity:
  *
  * XXX: we should also be verifying that inodes are in the right subvolumes
  */
-static int check_path(struct btree_trans *trans,
-                     pathbuf *p,
-                     struct bch_inode_unpacked *inode,
-                     u32 snapshot)
+static int check_path(struct btree_trans *trans, pathbuf *p, struct bkey_s_c inode_k)
 {
        struct bch_fs *c = trans->c;
+       struct btree_iter inode_iter = {};
+       struct bch_inode_unpacked inode;
+       struct printbuf buf = PRINTBUF;
+       u32 snapshot = bch2_snapshot_equiv(c, inode_k.k->p.snapshot);
        int ret = 0;
 
-       snapshot = bch2_snapshot_equiv(c, snapshot);
        p->nr = 0;
 
-       while (!(inode->bi_inum == BCACHEFS_ROOT_INO &&
-                inode->bi_subvol == BCACHEFS_ROOT_SUBVOL)) {
+       BUG_ON(bch2_inode_unpack(inode_k, &inode));
+
+       while (!inode.bi_subvol) {
                struct btree_iter dirent_iter;
                struct bkey_s_c_dirent d;
                u32 parent_snapshot = snapshot;
 
-               if (inode->bi_subvol) {
-                       u64 inum;
-
-                       ret = subvol_lookup(trans, inode->bi_parent_subvol,
-                                           &parent_snapshot, &inum);
-                       if (ret)
-                               break;
-               }
-
-               d = dirent_get_by_pos(trans, &dirent_iter,
-                                     SPOS(inode->bi_dir, inode->bi_dir_offset,
-                                          parent_snapshot));
+               d = inode_get_dirent(trans, &dirent_iter, &inode, &parent_snapshot);
                ret = bkey_err(d.s_c);
                if (ret && !bch2_err_matches(ret, ENOENT))
                        break;
 
-               if (!ret && !dirent_points_to_inode(d, inode)) {
+               if (!ret && !dirent_points_to_inode(d, &inode)) {
                        bch2_trans_iter_exit(trans, &dirent_iter);
                        ret = -BCH_ERR_ENOENT_dirent_doesnt_match_inode;
                }
 
                if (bch2_err_matches(ret, ENOENT)) {
-                       if (fsck_err(c,  inode_unreachable,
-                                    "unreachable inode %llu:%u, type %s nlink %u backptr %llu:%llu",
-                                    inode->bi_inum, snapshot,
-                                    bch2_d_type_str(inode_d_type(inode)),
-                                    inode->bi_nlink,
-                                    inode->bi_dir,
-                                    inode->bi_dir_offset))
-                               ret = reattach_inode(trans, inode, snapshot);
-                       break;
+                       ret = 0;
+                       if (fsck_err(c, inode_unreachable,
+                                    "unreachable inode\n%s",
+                                    (printbuf_reset(&buf),
+                                     bch2_bkey_val_to_text(&buf, c, inode_k),
+                                     buf.buf)))
+                               ret = reattach_inode(trans, &inode, snapshot);
+                       goto out;
                }
 
                bch2_trans_iter_exit(trans, &dirent_iter);
 
-               if (!S_ISDIR(inode->bi_mode))
+               if (!S_ISDIR(inode.bi_mode))
                        break;
 
-               ret = path_down(c, p, inode->bi_inum, snapshot);
-               if (ret) {
-                       bch_err(c, "memory allocation failure");
+               ret = darray_push(p, ((struct pathbuf_entry) {
+                       .inum           = inode.bi_inum,
+                       .snapshot       = snapshot,
+               }));
+               if (ret)
                        return ret;
-               }
 
                snapshot = parent_snapshot;
 
-               ret = lookup_inode(trans, inode->bi_dir, inode, &snapshot);
+               bch2_trans_iter_exit(trans, &inode_iter);
+               inode_k = bch2_bkey_get_iter(trans, &inode_iter, BTREE_ID_inodes,
+                                            SPOS(0, inode.bi_dir, snapshot), 0);
+               ret = bkey_err(inode_k) ?:
+                       !bkey_is_inode(inode_k.k) ? -BCH_ERR_ENOENT_inode
+                       : bch2_inode_unpack(inode_k, &inode);
                if (ret) {
                        /* Should have been caught in dirents pass */
                        if (!bch2_err_matches(ret, BCH_ERR_transaction_restart))
@@ -2042,30 +2318,32 @@ static int check_path(struct btree_trans *trans,
                        break;
                }
 
-               if (path_is_dup(p, inode->bi_inum, snapshot)) {
+               snapshot = inode_k.k->p.snapshot;
+
+               if (path_is_dup(p, inode.bi_inum, snapshot)) {
                        /* XXX print path */
                        bch_err(c, "directory structure loop");
 
                        darray_for_each(*p, i)
                                pr_err("%llu:%u", i->inum, i->snapshot);
-                       pr_err("%llu:%u", inode->bi_inum, snapshot);
-
-                       if (!fsck_err(c, dir_loop, "directory structure loop"))
-                               return 0;
+                       pr_err("%llu:%u", inode.bi_inum, snapshot);
 
-                       ret = remove_backpointer(trans, inode);
-                       if (ret && !bch2_err_matches(ret, BCH_ERR_transaction_restart))
+                       if (fsck_err(c, dir_loop, "directory structure loop")) {
+                               ret = remove_backpointer(trans, &inode);
                                bch_err_msg(c, ret, "removing dirent");
-                       if (ret)
-                               break;
+                               if (ret)
+                                       break;
 
-                       ret = reattach_inode(trans, inode, snapshot);
-                       if (ret && !bch2_err_matches(ret, BCH_ERR_transaction_restart))
-                               bch_err_msg(c, ret, "reattaching inode %llu", inode->bi_inum);
+                               ret = reattach_inode(trans, &inode, snapshot);
+                               bch_err_msg(c, ret, "reattaching inode %llu", inode.bi_inum);
+                       }
                        break;
                }
        }
+out:
 fsck_err:
+       bch2_trans_iter_exit(trans, &inode_iter);
+       printbuf_exit(&buf);
        bch_err_fn(c, ret);
        return ret;
 }
@@ -2077,7 +2355,6 @@ fsck_err:
  */
 int bch2_check_directory_structure(struct bch_fs *c)
 {
-       struct bch_inode_unpacked u;
        pathbuf path = { 0, };
        int ret;
 
@@ -2090,12 +2367,10 @@ int bch2_check_directory_structure(struct bch_fs *c)
                        if (!bkey_is_inode(k.k))
                                continue;
 
-                       BUG_ON(bch2_inode_unpack(k, &u));
-
-                       if (u.bi_flags & BCH_INODE_unlinked)
+                       if (bch2_inode_flags(k) & BCH_INODE_unlinked)
                                continue;
 
-                       check_path(trans, &path, &u, iter.pos.snapshot);
+                       check_path(trans, &path, k);
                })));
        darray_exit(&path);
 
@@ -2291,7 +2566,7 @@ static int check_nlinks_update_inode(struct btree_trans *trans, struct btree_ite
                        u.bi_inum, bch2_d_types[mode_to_type(u.bi_mode)],
                        bch2_inode_nlink_get(&u), link->count)) {
                bch2_inode_nlink_set(&u, link->count);
-               ret = __write_inode(trans, &u, k.k->p.snapshot);
+               ret = __bch2_fsck_write_inode(trans, &u, k.k->p.snapshot);
        }
 fsck_err:
        return ret;
index da991e8cf27eb493ed5aac5a3e3da606ae089968..a4ef9427178433bda33d500fe5f7551ff8fd1638 100644 (file)
@@ -8,6 +8,7 @@ int bch2_check_indirect_extents(struct bch_fs *);
 int bch2_check_dirents(struct bch_fs *);
 int bch2_check_xattrs(struct bch_fs *);
 int bch2_check_root(struct bch_fs *);
+int bch2_check_subvolume_structure(struct bch_fs *);
 int bch2_check_directory_structure(struct bch_fs *);
 int bch2_check_nlinks(struct bch_fs *);
 int bch2_fix_reflink_p(struct bch_fs *);
index 086f0090b03a4015388dce49388ba5951940cb0a..2b5e06770ab39ea0844342d6298b5ab37f26667d 100644 (file)
@@ -324,7 +324,7 @@ int bch2_inode_unpack(struct bkey_s_c k,
        return bch2_inode_unpack_slowpath(k, unpacked);
 }
 
-static int bch2_inode_peek_nowarn(struct btree_trans *trans,
+int bch2_inode_peek_nowarn(struct btree_trans *trans,
                    struct btree_iter *iter,
                    struct bch_inode_unpacked *inode,
                    subvol_inum inum, unsigned flags)
@@ -384,6 +384,34 @@ int bch2_inode_write_flags(struct btree_trans *trans,
        return bch2_trans_update(trans, iter, &inode_p->inode.k_i, flags);
 }
 
+int __bch2_fsck_write_inode(struct btree_trans *trans,
+                        struct bch_inode_unpacked *inode,
+                        u32 snapshot)
+{
+       struct bkey_inode_buf *inode_p =
+               bch2_trans_kmalloc(trans, sizeof(*inode_p));
+
+       if (IS_ERR(inode_p))
+               return PTR_ERR(inode_p);
+
+       bch2_inode_pack(inode_p, inode);
+       inode_p->inode.k.p.snapshot = snapshot;
+
+       return bch2_btree_insert_nonextent(trans, BTREE_ID_inodes,
+                               &inode_p->inode.k_i,
+                               BTREE_UPDATE_INTERNAL_SNAPSHOT_NODE);
+}
+
+int bch2_fsck_write_inode(struct btree_trans *trans,
+                           struct bch_inode_unpacked *inode,
+                           u32 snapshot)
+{
+       int ret = commit_do(trans, NULL, NULL, BCH_TRANS_COMMIT_no_enospc,
+                           __bch2_fsck_write_inode(trans, inode, snapshot));
+       bch_err_fn(trans->c, ret);
+       return ret;
+}
+
 struct bkey_i *bch2_inode_to_v3(struct btree_trans *trans, struct bkey_i *k)
 {
        struct bch_inode_unpacked u;
@@ -592,7 +620,8 @@ int bch2_trigger_inode(struct btree_trans *trans,
                bool old_deleted = bkey_is_deleted_inode(old);
                bool new_deleted = bkey_is_deleted_inode(new.s_c);
                if (old_deleted != new_deleted) {
-                       int ret = bch2_btree_bit_mod(trans, BTREE_ID_deleted_inodes, new.k->p, new_deleted);
+                       int ret = bch2_btree_bit_mod_buffered(trans, BTREE_ID_deleted_inodes,
+                                                             new.k->p, new_deleted);
                        if (ret)
                                return ret;
                }
@@ -1088,8 +1117,9 @@ static int may_delete_deleted_inode(struct btree_trans *trans,
                goto out;
 
        if (S_ISDIR(inode.bi_mode)) {
-               ret = bch2_empty_dir_snapshot(trans, pos.offset, pos.snapshot);
-               if (fsck_err_on(ret == -ENOTEMPTY, c, deleted_inode_is_dir,
+               ret = bch2_empty_dir_snapshot(trans, pos.offset, 0, pos.snapshot);
+               if (fsck_err_on(bch2_err_matches(ret, ENOTEMPTY),
+                               c, deleted_inode_is_dir,
                                "non empty directory %llu:%u in deleted_inodes btree",
                                pos.offset, pos.snapshot))
                        goto delete;
@@ -1141,7 +1171,7 @@ fsck_err:
        bch2_trans_iter_exit(trans, &inode_iter);
        return ret;
 delete:
-       ret = bch2_btree_bit_mod(trans, BTREE_ID_deleted_inodes, pos, false);
+       ret = bch2_btree_bit_mod_buffered(trans, BTREE_ID_deleted_inodes, pos, false);
        goto out;
 }
 
@@ -1151,6 +1181,15 @@ int bch2_delete_dead_inodes(struct bch_fs *c)
        bool need_another_pass;
        int ret;
 again:
+       /*
+        * if we ran check_inodes() unlinked inodes will have already been
+        * cleaned up but the write buffer will be out of sync; therefore we
+        * alway need a write buffer flush
+        */
+       ret = bch2_btree_write_buffer_flush_sync(trans);
+       if (ret)
+               goto err;
+
        need_another_pass = false;
 
        /*
@@ -1183,12 +1222,8 @@ again:
                ret;
        }));
 
-       if (!ret && need_another_pass) {
-               ret = bch2_btree_write_buffer_flush_sync(trans);
-               if (ret)
-                       goto err;
+       if (!ret && need_another_pass)
                goto again;
-       }
 err:
        bch2_trans_put(trans);
        return ret;
index b63f312581cfa5ea9975fae6fdcd2d1518d13d54..056298050550f9ecf4ce1e000cb32ce501f5bc62 100644 (file)
@@ -95,6 +95,8 @@ struct bkey_i *bch2_inode_to_v3(struct btree_trans *, struct bkey_i *);
 
 void bch2_inode_unpacked_to_text(struct printbuf *, struct bch_inode_unpacked *);
 
+int bch2_inode_peek_nowarn(struct btree_trans *, struct btree_iter *,
+                   struct bch_inode_unpacked *, subvol_inum, unsigned);
 int bch2_inode_peek(struct btree_trans *, struct btree_iter *,
                    struct bch_inode_unpacked *, subvol_inum, unsigned);
 
@@ -108,6 +110,9 @@ static inline int bch2_inode_write(struct btree_trans *trans,
        return bch2_inode_write_flags(trans, iter, inode, 0);
 }
 
+int __bch2_fsck_write_inode(struct btree_trans *, struct bch_inode_unpacked *, u32);
+int bch2_fsck_write_inode(struct btree_trans *, struct bch_inode_unpacked *, u32);
+
 void bch2_inode_init_early(struct bch_fs *,
                           struct bch_inode_unpacked *);
 void bch2_inode_init_late(struct bch_inode_unpacked *, u64,
@@ -172,6 +177,20 @@ static inline u8 inode_d_type(struct bch_inode_unpacked *inode)
        return inode->bi_subvol ? DT_SUBVOL : mode_to_type(inode->bi_mode);
 }
 
+static inline u32 bch2_inode_flags(struct bkey_s_c k)
+{
+       switch (k.k->type) {
+       case KEY_TYPE_inode:
+               return le32_to_cpu(bkey_s_c_to_inode(k).v->bi_flags);
+       case KEY_TYPE_inode_v2:
+               return le64_to_cpu(bkey_s_c_to_inode_v2(k).v->bi_flags);
+       case KEY_TYPE_inode_v3:
+               return le64_to_cpu(bkey_s_c_to_inode_v3(k).v->bi_flags);
+       default:
+               return 0;
+       }
+}
+
 /* i_nlink: */
 
 static inline unsigned nlink_bias(umode_t mode)
index 3c574d8873a1e209dc7f7f48faacf9928f8a1272..8a556e6d1ab6f2080342a0f232c94cf160600fe1 100644 (file)
@@ -174,7 +174,7 @@ static struct promote_op *__promote_alloc(struct btree_trans *trans,
        if (!bch2_write_ref_tryget(c, BCH_WRITE_REF_promote))
                return ERR_PTR(-BCH_ERR_nopromote_no_writes);
 
-       op = kzalloc(sizeof(*op) + sizeof(struct bio_vec) * pages, GFP_KERNEL);
+       op = kzalloc(struct_size(op, bi_inline_vecs, pages), GFP_KERNEL);
        if (!op) {
                ret = -BCH_ERR_nopromote_enomem;
                goto err;
index 2c098ac017b30b6a4b5d016e9f5dde93ee258f2f..f137252bccc575b42a012a7876f8c81ddee28a21 100644 (file)
@@ -88,7 +88,7 @@ void bch2_latency_acct(struct bch_dev *ca, u64 submit_time, int rw)
 
        bch2_congested_acct(ca, io_latency, now, rw);
 
-       __bch2_time_stats_update(&ca->io_latency[rw], submit_time, now);
+       __bch2_time_stats_update(&ca->io_latency[rw].stats, submit_time, now);
 }
 
 #endif
@@ -530,7 +530,8 @@ static void __bch2_write_index(struct bch_write_op *op)
 
                        bch_err_inum_offset_ratelimited(c,
                                insert->k.p.inode, insert->k.p.offset << 9,
-                               "write error while doing btree update: %s",
+                               "%s write error while doing btree update: %s",
+                               op->flags & BCH_WRITE_MOVE ? "move" : "user",
                                bch2_err_str(ret));
                }
 
@@ -1067,7 +1068,8 @@ do_write:
        *_dst = dst;
        return more;
 csum_err:
-       bch_err(c, "error verifying existing checksum while rewriting existing data (memory corruption?)");
+       bch_err(c, "%s writ error: error verifying existing checksum while rewriting existing data (memory corruption?)",
+               op->flags & BCH_WRITE_MOVE ? "move" : "user");
        ret = -EIO;
 err:
        if (to_wbio(dst)->bounce)
@@ -1169,7 +1171,8 @@ static void bch2_nocow_write_convert_unwritten(struct bch_write_op *op)
 
                        bch_err_inum_offset_ratelimited(c,
                                insert->k.p.inode, insert->k.p.offset << 9,
-                               "write error while doing btree update: %s",
+                               "%s write error while doing btree update: %s",
+                               op->flags & BCH_WRITE_MOVE ? "move" : "user",
                                bch2_err_str(ret));
                }
 
@@ -1449,7 +1452,9 @@ err:
                                        bch_err_inum_offset_ratelimited(c,
                                                op->pos.inode,
                                                op->pos.offset << 9,
-                                               "%s(): error: %s", __func__, bch2_err_str(ret));
+                                               "%s(): %s error: %s", __func__,
+                                               op->flags & BCH_WRITE_MOVE ? "move" : "user",
+                                               bch2_err_str(ret));
                                op->error = ret;
                                break;
                        }
@@ -1573,7 +1578,8 @@ CLOSURE_CALLBACK(bch2_write)
                bch_err_inum_offset_ratelimited(c,
                        op->pos.inode,
                        op->pos.offset << 9,
-                       "misaligned write");
+                       "%s write error: misaligned write",
+                       op->flags & BCH_WRITE_MOVE ? "move" : "user");
                op->error = -EIO;
                goto err;
        }
index bc890776eb57933a5931edd2a2f07570f52b7ab3..f314b2e78ec368718e671651a99752de374a838f 100644 (file)
@@ -27,33 +27,71 @@ static const char * const bch2_journal_errors[] = {
        NULL
 };
 
+static inline bool journal_seq_unwritten(struct journal *j, u64 seq)
+{
+       return seq > j->seq_ondisk;
+}
+
+static bool __journal_entry_is_open(union journal_res_state state)
+{
+       return state.cur_entry_offset < JOURNAL_ENTRY_CLOSED_VAL;
+}
+
+static inline unsigned nr_unwritten_journal_entries(struct journal *j)
+{
+       return atomic64_read(&j->seq) - j->seq_ondisk;
+}
+
+static bool journal_entry_is_open(struct journal *j)
+{
+       return __journal_entry_is_open(j->reservations);
+}
+
 static void bch2_journal_buf_to_text(struct printbuf *out, struct journal *j, u64 seq)
 {
        union journal_res_state s = READ_ONCE(j->reservations);
        unsigned i = seq & JOURNAL_BUF_MASK;
        struct journal_buf *buf = j->buf + i;
 
-       prt_printf(out, "seq:");
+       prt_str(out, "seq:");
        prt_tab(out);
        prt_printf(out, "%llu", seq);
        prt_newline(out);
        printbuf_indent_add(out, 2);
 
-       prt_printf(out, "refcount:");
+       prt_str(out, "refcount:");
        prt_tab(out);
        prt_printf(out, "%u", journal_state_count(s, i));
        prt_newline(out);
 
-       prt_printf(out, "size:");
+       prt_str(out, "size:");
        prt_tab(out);
        prt_human_readable_u64(out, vstruct_bytes(buf->data));
        prt_newline(out);
 
-       prt_printf(out, "expires");
+       prt_str(out, "expires:");
        prt_tab(out);
        prt_printf(out, "%li jiffies", buf->expires - jiffies);
        prt_newline(out);
 
+       prt_str(out, "flags:");
+       prt_tab(out);
+       if (buf->noflush)
+               prt_str(out, "noflush ");
+       if (buf->must_flush)
+               prt_str(out, "must_flush ");
+       if (buf->separate_flush)
+               prt_str(out, "separate_flush ");
+       if (buf->need_flush_to_write_buffer)
+               prt_str(out, "need_flush_to_write_buffer ");
+       if (buf->write_started)
+               prt_str(out, "write_started ");
+       if (buf->write_allocated)
+               prt_str(out, "write allocated ");
+       if (buf->write_done)
+               prt_str(out, "write done");
+       prt_newline(out);
+
        printbuf_indent_sub(out, 2);
 }
 
@@ -66,26 +104,7 @@ static void bch2_journal_bufs_to_text(struct printbuf *out, struct journal *j)
             seq <= journal_cur_seq(j);
             seq++)
                bch2_journal_buf_to_text(out, j, seq);
-}
-
-static inline bool journal_seq_unwritten(struct journal *j, u64 seq)
-{
-       return seq > j->seq_ondisk;
-}
-
-static bool __journal_entry_is_open(union journal_res_state state)
-{
-       return state.cur_entry_offset < JOURNAL_ENTRY_CLOSED_VAL;
-}
-
-static inline unsigned nr_unwritten_journal_entries(struct journal *j)
-{
-       return atomic64_read(&j->seq) - j->seq_ondisk;
-}
-
-static bool journal_entry_is_open(struct journal *j)
-{
-       return __journal_entry_is_open(j->reservations);
+       prt_printf(out, "last buf %s\n", journal_entry_is_open(j) ? "open" : "closed");
 }
 
 static inline struct journal_buf *
@@ -174,21 +193,40 @@ journal_error_check_stuck(struct journal *j, int error, unsigned flags)
        return stuck;
 }
 
+void bch2_journal_do_writes(struct journal *j)
+{
+       for (u64 seq = journal_last_unwritten_seq(j);
+            seq <= journal_cur_seq(j);
+            seq++) {
+               unsigned idx = seq & JOURNAL_BUF_MASK;
+               struct journal_buf *w = j->buf + idx;
+
+               if (w->write_started && !w->write_allocated)
+                       break;
+               if (w->write_started)
+                       continue;
+
+               if (!journal_state_count(j->reservations, idx)) {
+                       w->write_started = true;
+                       closure_call(&w->io, bch2_journal_write, j->wq, NULL);
+               }
+
+               break;
+       }
+}
+
 /*
  * Final processing when the last reference of a journal buffer has been
  * dropped. Drop the pin list reference acquired at journal entry open and write
  * the buffer, if requested.
  */
-void bch2_journal_buf_put_final(struct journal *j, u64 seq, bool write)
+void bch2_journal_buf_put_final(struct journal *j, u64 seq)
 {
-       struct bch_fs *c = container_of(j, struct bch_fs, journal);
-
        lockdep_assert_held(&j->lock);
 
        if (__bch2_journal_pin_put(j, seq))
                bch2_journal_reclaim_fast(j);
-       if (write)
-               closure_call(&j->io, bch2_journal_write, c->io_complete_wq, NULL);
+       bch2_journal_do_writes(j);
 }
 
 /*
@@ -380,11 +418,14 @@ static int journal_entry_open(struct journal *j)
        BUG_ON(j->buf + (journal_cur_seq(j) & JOURNAL_BUF_MASK) != buf);
 
        bkey_extent_init(&buf->key);
-       buf->noflush    = false;
-       buf->must_flush = false;
-       buf->separate_flush = false;
-       buf->flush_time = 0;
+       buf->noflush            = false;
+       buf->must_flush         = false;
+       buf->separate_flush     = false;
+       buf->flush_time         = 0;
        buf->need_flush_to_write_buffer = true;
+       buf->write_started      = false;
+       buf->write_allocated    = false;
+       buf->write_done         = false;
 
        memset(buf->data, 0, sizeof(*buf->data));
        buf->data->seq  = cpu_to_le64(journal_cur_seq(j));
@@ -418,9 +459,10 @@ static int journal_entry_open(struct journal *j)
        } while ((v = atomic64_cmpxchg(&j->reservations.counter,
                                       old.v, new.v)) != old.v);
 
-       mod_delayed_work(c->io_complete_wq,
-                        &j->write_work,
-                        msecs_to_jiffies(c->opts.journal_flush_delay));
+       if (nr_unwritten_journal_entries(j) == 1)
+               mod_delayed_work(j->wq,
+                                &j->write_work,
+                                msecs_to_jiffies(c->opts.journal_flush_delay));
        journal_wake(j);
 
        if (j->early_journal_entries.nr)
@@ -445,20 +487,16 @@ static void journal_quiesce(struct journal *j)
 static void journal_write_work(struct work_struct *work)
 {
        struct journal *j = container_of(work, struct journal, write_work.work);
-       struct bch_fs *c = container_of(j, struct bch_fs, journal);
-       long delta;
 
        spin_lock(&j->lock);
-       if (!__journal_entry_is_open(j->reservations))
-               goto unlock;
-
-       delta = journal_cur_buf(j)->expires - jiffies;
+       if (__journal_entry_is_open(j->reservations)) {
+               long delta = journal_cur_buf(j)->expires - jiffies;
 
-       if (delta > 0)
-               mod_delayed_work(c->io_complete_wq, &j->write_work, delta);
-       else
-               __journal_entry_close(j, JOURNAL_ENTRY_CLOSED_VAL, true);
-unlock:
+               if (delta > 0)
+                       mod_delayed_work(j->wq, &j->write_work, delta);
+               else
+                       __journal_entry_close(j, JOURNAL_ENTRY_CLOSED_VAL, true);
+       }
        spin_unlock(&j->lock);
 }
 
@@ -473,33 +511,32 @@ retry:
        if (journal_res_get_fast(j, res, flags))
                return 0;
 
-       if (bch2_journal_error(j))
-               return -BCH_ERR_erofs_journal_err;
+       if ((flags & BCH_WATERMARK_MASK) < j->watermark) {
+               ret = JOURNAL_ERR_journal_full;
+               can_discard = j->can_discard;
+               goto out;
+       }
 
-       spin_lock(&j->lock);
+       if (j->blocked)
+               return -BCH_ERR_journal_res_get_blocked;
 
-       /* check once more in case somebody else shut things down... */
-       if (bch2_journal_error(j)) {
-               spin_unlock(&j->lock);
+       if (bch2_journal_error(j))
                return -BCH_ERR_erofs_journal_err;
+
+       if (nr_unwritten_journal_entries(j) == ARRAY_SIZE(j->buf) && !journal_entry_is_open(j)) {
+               ret = JOURNAL_ERR_max_in_flight;
+               goto out;
        }
 
+       spin_lock(&j->lock);
+
        /*
         * Recheck after taking the lock, so we don't race with another thread
         * that just did journal_entry_open() and call bch2_journal_entry_close()
         * unnecessarily
         */
        if (journal_res_get_fast(j, res, flags)) {
-               spin_unlock(&j->lock);
-               return 0;
-       }
-
-       if ((flags & BCH_WATERMARK_MASK) < j->watermark) {
-               /*
-                * Don't want to close current journal entry, just need to
-                * invoke reclaim:
-                */
-               ret = JOURNAL_ERR_journal_full;
+               ret = 0;
                goto unlock;
        }
 
@@ -515,30 +552,30 @@ retry:
                j->buf_size_want = max(j->buf_size_want, buf->buf_size << 1);
 
        __journal_entry_close(j, JOURNAL_ENTRY_CLOSED_VAL, false);
-       ret = journal_entry_open(j);
-
-       if (ret == JOURNAL_ERR_max_in_flight) {
-               track_event_change(&c->times[BCH_TIME_blocked_journal_max_in_flight],
-                                  &j->max_in_flight_start, true);
-               if (trace_journal_entry_full_enabled()) {
-                       struct printbuf buf = PRINTBUF;
-                       buf.atomic++;
-
-                       bch2_journal_bufs_to_text(&buf, j);
-                       trace_journal_entry_full(c, buf.buf);
-                       printbuf_exit(&buf);
-               }
-               count_event(c, journal_entry_full);
-       }
+       ret = journal_entry_open(j) ?: JOURNAL_ERR_retry;
 unlock:
        can_discard = j->can_discard;
        spin_unlock(&j->lock);
-
-       if (!ret)
+out:
+       if (ret == JOURNAL_ERR_retry)
                goto retry;
+       if (!ret)
+               return 0;
+
        if (journal_error_check_stuck(j, ret, flags))
                ret = -BCH_ERR_journal_res_get_blocked;
 
+       if (ret == JOURNAL_ERR_max_in_flight &&
+           track_event_change(&c->times[BCH_TIME_blocked_journal_max_in_flight], true)) {
+
+               struct printbuf buf = PRINTBUF;
+               prt_printf(&buf, "seq %llu\n", journal_cur_seq(j));
+               bch2_journal_bufs_to_text(&buf, j);
+               trace_journal_entry_full(c, buf.buf);
+               printbuf_exit(&buf);
+               count_event(c, journal_entry_full);
+       }
+
        /*
         * Journal is full - can't rely on reclaim from work item due to
         * freezing:
@@ -674,7 +711,7 @@ recheck_need_open:
                        return ret;
 
                seq = res.seq;
-               buf = j->buf + (seq & JOURNAL_BUF_MASK);
+               buf = journal_seq_to_buf(j, seq);
                buf->must_flush = true;
 
                if (!buf->flush_time) {
@@ -692,8 +729,8 @@ recheck_need_open:
        }
 
        /*
-        * if write was kicked off without a flush, flush the next sequence
-        * number instead
+        * if write was kicked off without a flush, or if we promised it
+        * wouldn't be a flush, flush the next sequence number instead
         */
        buf = journal_seq_to_buf(j, seq);
        if (buf->noflush) {
@@ -771,8 +808,8 @@ bool bch2_journal_noflush_seq(struct journal *j, u64 seq)
             unwritten_seq++) {
                struct journal_buf *buf = journal_seq_to_buf(j, unwritten_seq);
 
-               /* journal write is already in flight, and was a flush write: */
-               if (unwritten_seq == journal_last_unwritten_seq(j) && !buf->noflush)
+               /* journal flush already in flight, or flush requseted */
+               if (buf->must_flush)
                        goto out;
 
                buf->noflush = true;
@@ -1157,13 +1194,12 @@ int bch2_fs_journal_start(struct journal *j, u64 cur_seq)
        struct journal_replay *i, **_i;
        struct genradix_iter iter;
        bool had_entries = false;
-       unsigned ptr;
        u64 last_seq = cur_seq, nr, seq;
 
        genradix_for_each_reverse(&c->journal_entries, iter, _i) {
                i = *_i;
 
-               if (!i || i->ignore)
+               if (journal_replay_ignore(i))
                        continue;
 
                last_seq = le64_to_cpu(i->j.last_seq);
@@ -1196,7 +1232,7 @@ int bch2_fs_journal_start(struct journal *j, u64 cur_seq)
        genradix_for_each(&c->journal_entries, iter, _i) {
                i = *_i;
 
-               if (!i || i->ignore)
+               if (journal_replay_ignore(i))
                        continue;
 
                seq = le64_to_cpu(i->j.seq);
@@ -1211,8 +1247,8 @@ int bch2_fs_journal_start(struct journal *j, u64 cur_seq)
                p = journal_seq_pin(j, seq);
 
                p->devs.nr = 0;
-               for (ptr = 0; ptr < i->nr_ptrs; ptr++)
-                       bch2_dev_list_add_dev(&p->devs, i->ptrs[ptr].dev);
+               darray_for_each(i->ptrs, ptr)
+                       bch2_dev_list_add_dev(&p->devs, ptr->dev);
 
                had_entries = true;
        }
@@ -1240,13 +1276,17 @@ int bch2_fs_journal_start(struct journal *j, u64 cur_seq)
 
 void bch2_dev_journal_exit(struct bch_dev *ca)
 {
-       kfree(ca->journal.bio);
-       kfree(ca->journal.buckets);
-       kfree(ca->journal.bucket_seq);
+       struct journal_device *ja = &ca->journal;
+
+       for (unsigned i = 0; i < ARRAY_SIZE(ja->bio); i++) {
+               kfree(ja->bio[i]);
+               ja->bio[i] = NULL;
+       }
 
-       ca->journal.bio         = NULL;
-       ca->journal.buckets     = NULL;
-       ca->journal.bucket_seq  = NULL;
+       kfree(ja->buckets);
+       kfree(ja->bucket_seq);
+       ja->buckets     = NULL;
+       ja->bucket_seq  = NULL;
 }
 
 int bch2_dev_journal_init(struct bch_dev *ca, struct bch_sb *sb)
@@ -1256,14 +1296,13 @@ int bch2_dev_journal_init(struct bch_dev *ca, struct bch_sb *sb)
                bch2_sb_field_get(sb, journal);
        struct bch_sb_field_journal_v2 *journal_buckets_v2 =
                bch2_sb_field_get(sb, journal_v2);
-       unsigned i, nr_bvecs;
 
        ja->nr = 0;
 
        if (journal_buckets_v2) {
                unsigned nr = bch2_sb_field_journal_v2_nr_entries(journal_buckets_v2);
 
-               for (i = 0; i < nr; i++)
+               for (unsigned i = 0; i < nr; i++)
                        ja->nr += le64_to_cpu(journal_buckets_v2->d[i].nr);
        } else if (journal_buckets) {
                ja->nr = bch2_nr_journal_buckets(journal_buckets);
@@ -1273,13 +1312,18 @@ int bch2_dev_journal_init(struct bch_dev *ca, struct bch_sb *sb)
        if (!ja->bucket_seq)
                return -BCH_ERR_ENOMEM_dev_journal_init;
 
-       nr_bvecs = DIV_ROUND_UP(JOURNAL_ENTRY_SIZE_MAX, PAGE_SIZE);
+       unsigned nr_bvecs = DIV_ROUND_UP(JOURNAL_ENTRY_SIZE_MAX, PAGE_SIZE);
 
-       ca->journal.bio = bio_kmalloc(nr_bvecs, GFP_KERNEL);
-       if (!ca->journal.bio)
-               return -BCH_ERR_ENOMEM_dev_journal_init;
+       for (unsigned i = 0; i < ARRAY_SIZE(ja->bio); i++) {
+               ja->bio[i] = kmalloc(struct_size(ja->bio[i], bio.bi_inline_vecs,
+                                    nr_bvecs), GFP_KERNEL);
+               if (!ja->bio[i])
+                       return -BCH_ERR_ENOMEM_dev_journal_init;
 
-       bio_init(ca->journal.bio, NULL, ca->journal.bio->bi_inline_vecs, nr_bvecs, 0);
+               ja->bio[i]->ca = ca;
+               ja->bio[i]->buf_idx = i;
+               bio_init(&ja->bio[i]->bio, NULL, ja->bio[i]->bio.bi_inline_vecs, nr_bvecs, 0);
+       }
 
        ja->buckets = kcalloc(ja->nr, sizeof(u64), GFP_KERNEL);
        if (!ja->buckets)
@@ -1287,14 +1331,14 @@ int bch2_dev_journal_init(struct bch_dev *ca, struct bch_sb *sb)
 
        if (journal_buckets_v2) {
                unsigned nr = bch2_sb_field_journal_v2_nr_entries(journal_buckets_v2);
-               unsigned j, dst = 0;
+               unsigned dst = 0;
 
-               for (i = 0; i < nr; i++)
-                       for (j = 0; j < le64_to_cpu(journal_buckets_v2->d[i].nr); j++)
+               for (unsigned i = 0; i < nr; i++)
+                       for (unsigned j = 0; j < le64_to_cpu(journal_buckets_v2->d[i].nr); j++)
                                ja->buckets[dst++] =
                                        le64_to_cpu(journal_buckets_v2->d[i].start) + j;
        } else if (journal_buckets) {
-               for (i = 0; i < ja->nr; i++)
+               for (unsigned i = 0; i < ja->nr; i++)
                        ja->buckets[i] = le64_to_cpu(journal_buckets->buckets[i]);
        }
 
@@ -1303,19 +1347,19 @@ int bch2_dev_journal_init(struct bch_dev *ca, struct bch_sb *sb)
 
 void bch2_fs_journal_exit(struct journal *j)
 {
-       unsigned i;
+       if (j->wq)
+               destroy_workqueue(j->wq);
 
        darray_exit(&j->early_journal_entries);
 
-       for (i = 0; i < ARRAY_SIZE(j->buf); i++)
-               kvpfree(j->buf[i].data, j->buf[i].buf_size);
+       for (unsigned i = 0; i < ARRAY_SIZE(j->buf); i++)
+               kvfree(j->buf[i].data);
        free_fifo(&j->pin);
 }
 
 int bch2_fs_journal_init(struct journal *j)
 {
        static struct lock_class_key res_key;
-       unsigned i;
 
        mutex_init(&j->buf_lock);
        spin_lock_init(&j->lock);
@@ -1336,14 +1380,20 @@ int bch2_fs_journal_init(struct journal *j)
        if (!(init_fifo(&j->pin, JOURNAL_PIN, GFP_KERNEL)))
                return -BCH_ERR_ENOMEM_journal_pin_fifo;
 
-       for (i = 0; i < ARRAY_SIZE(j->buf); i++) {
+       for (unsigned i = 0; i < ARRAY_SIZE(j->buf); i++) {
                j->buf[i].buf_size = JOURNAL_ENTRY_SIZE_MIN;
-               j->buf[i].data = kvpmalloc(j->buf[i].buf_size, GFP_KERNEL);
+               j->buf[i].data = kvmalloc(j->buf[i].buf_size, GFP_KERNEL);
                if (!j->buf[i].data)
                        return -BCH_ERR_ENOMEM_journal_buf;
+               j->buf[i].idx = i;
        }
 
        j->pin.front = j->pin.back = 1;
+
+       j->wq = alloc_workqueue("bcachefs_journal",
+                               WQ_HIGHPRI|WQ_FREEZABLE|WQ_UNBOUND|WQ_MEM_RECLAIM, 512);
+       if (!j->wq)
+               return -BCH_ERR_ENOMEM_fs_other_alloc;
        return 0;
 }
 
@@ -1381,6 +1431,7 @@ void __bch2_journal_debug_to_text(struct printbuf *out, struct journal *j)
        prt_printf(out, "reclaim kicked:\t\t%u\n",              j->reclaim_kicked);
        prt_printf(out, "reclaim runs in:\t%u ms\n",            time_after(j->next_reclaim, now)
               ? jiffies_to_msecs(j->next_reclaim - jiffies) : 0);
+       prt_printf(out, "blocked:\t\t%u\n",                     j->blocked);
        prt_printf(out, "current entry sectors:\t%u\n",         j->cur_entry_sectors);
        prt_printf(out, "current entry error:\t%s\n",           bch2_journal_errors[j->cur_entry_error]);
        prt_printf(out, "current entry:\t\t");
@@ -1455,7 +1506,6 @@ bool bch2_journal_seq_pins_to_text(struct printbuf *out, struct journal *j, u64
 {
        struct journal_entry_pin_list *pin_list;
        struct journal_entry_pin *pin;
-       unsigned i;
 
        spin_lock(&j->lock);
        *seq = max(*seq, j->pin.front);
@@ -1473,7 +1523,7 @@ bool bch2_journal_seq_pins_to_text(struct printbuf *out, struct journal *j, u64
        prt_newline(out);
        printbuf_indent_add(out, 2);
 
-       for (i = 0; i < ARRAY_SIZE(pin_list->list); i++)
+       for (unsigned i = 0; i < ARRAY_SIZE(pin_list->list); i++)
                list_for_each_entry(pin, &pin_list->list[i], list) {
                        prt_printf(out, "\t%px %ps", pin, pin->flush);
                        prt_newline(out);
index 4544ce24bb8a654e62be91c5d7e0242e51893c1c..7c7528f839c567f5d1398cbdf890a5433818253d 100644 (file)
@@ -264,7 +264,8 @@ static inline union journal_res_state journal_state_buf_put(struct journal *j, u
 }
 
 bool bch2_journal_entry_close(struct journal *);
-void bch2_journal_buf_put_final(struct journal *, u64, bool);
+void bch2_journal_do_writes(struct journal *);
+void bch2_journal_buf_put_final(struct journal *, u64);
 
 static inline void __bch2_journal_buf_put(struct journal *j, unsigned idx, u64 seq)
 {
@@ -272,7 +273,7 @@ static inline void __bch2_journal_buf_put(struct journal *j, unsigned idx, u64 s
 
        s = journal_state_buf_put(j, idx);
        if (!journal_state_count(s, idx))
-               bch2_journal_buf_put_final(j, seq, idx == s.unwritten_idx);
+               bch2_journal_buf_put_final(j, seq);
 }
 
 static inline void bch2_journal_buf_put(struct journal *j, unsigned idx, u64 seq)
@@ -282,7 +283,7 @@ static inline void bch2_journal_buf_put(struct journal *j, unsigned idx, u64 seq
        s = journal_state_buf_put(j, idx);
        if (!journal_state_count(s, idx)) {
                spin_lock(&j->lock);
-               bch2_journal_buf_put_final(j, seq, idx == s.unwritten_idx);
+               bch2_journal_buf_put_final(j, seq);
                spin_unlock(&j->lock);
        }
 }
index 47805193f18cc72c941f72f5b82cfb461eb8982c..d76c3c0c203f9eb8e39d391b28bf09430f919f6b 100644 (file)
 #include "sb-clean.h"
 #include "trace.h"
 
+void bch2_journal_ptrs_to_text(struct printbuf *out, struct bch_fs *c,
+                              struct journal_replay *j)
+{
+       darray_for_each(j->ptrs, i) {
+               struct bch_dev *ca = bch_dev_bkey_exists(c, i->dev);
+               u64 offset;
+
+               div64_u64_rem(i->sector, ca->mi.bucket_size, &offset);
+
+               if (i != j->ptrs.data)
+                       prt_printf(out, " ");
+               prt_printf(out, "%u:%u:%u (sector %llu)",
+                          i->dev, i->bucket, i->bucket_offset, i->sector);
+       }
+}
+
+static void bch2_journal_replay_to_text(struct printbuf *out, struct bch_fs *c,
+                                       struct journal_replay *j)
+{
+       prt_printf(out, "seq %llu ", le64_to_cpu(j->j.seq));
+
+       bch2_journal_ptrs_to_text(out, c, j);
+
+       for_each_jset_entry_type(entry, &j->j, BCH_JSET_ENTRY_datetime) {
+               struct jset_entry_datetime *datetime =
+                       container_of(entry, struct jset_entry_datetime, entry);
+               bch2_prt_datetime(out, le64_to_cpu(datetime->seconds));
+               break;
+       }
+}
+
 static struct nonce journal_nonce(const struct jset *jset)
 {
        return (struct nonce) {{
@@ -52,13 +83,15 @@ static void __journal_replay_free(struct bch_fs *c,
 
        BUG_ON(*p != i);
        *p = NULL;
-       kvpfree(i, offsetof(struct journal_replay, j) +
-               vstruct_bytes(&i->j));
+       kvfree(i);
 }
 
-static void journal_replay_free(struct bch_fs *c, struct journal_replay *i)
+static void journal_replay_free(struct bch_fs *c, struct journal_replay *i, bool blacklisted)
 {
-       i->ignore = true;
+       if (blacklisted)
+               i->ignore_blacklisted = true;
+       else
+               i->ignore_not_dirty = true;
 
        if (!c->opts.read_entire_journal)
                __journal_replay_free(c, i);
@@ -84,9 +117,9 @@ static int journal_entry_add(struct bch_fs *c, struct bch_dev *ca,
 {
        struct genradix_iter iter;
        struct journal_replay **_i, *i, *dup;
-       struct journal_ptr *ptr;
        size_t bytes = vstruct_bytes(j);
        u64 last_seq = !JSET_NO_FLUSH(j) ? le64_to_cpu(j->last_seq) : 0;
+       struct printbuf buf = PRINTBUF;
        int ret = JOURNAL_ENTRY_ADD_OK;
 
        /* Is this entry older than the range we need? */
@@ -108,12 +141,13 @@ static int journal_entry_add(struct bch_fs *c, struct bch_dev *ca,
                                       journal_entry_radix_idx(c, jlist->last_seq)) {
                        i = *_i;
 
-                       if (!i || i->ignore)
+                       if (journal_replay_ignore(i))
                                continue;
 
                        if (le64_to_cpu(i->j.seq) >= last_seq)
                                break;
-                       journal_replay_free(c, i);
+
+                       journal_replay_free(c, i, false);
                }
        }
 
@@ -131,72 +165,62 @@ static int journal_entry_add(struct bch_fs *c, struct bch_dev *ca,
         */
        dup = *_i;
        if (dup) {
-               if (bytes == vstruct_bytes(&dup->j) &&
-                   !memcmp(j, &dup->j, bytes)) {
-                       i = dup;
-                       goto found;
-               }
+               bool identical = bytes == vstruct_bytes(&dup->j) &&
+                       !memcmp(j, &dup->j, bytes);
+               bool not_identical = !identical &&
+                       entry_ptr.csum_good &&
+                       dup->csum_good;
+
+               bool same_device = false;
+               darray_for_each(dup->ptrs, ptr)
+                       if (ptr->dev == ca->dev_idx)
+                               same_device = true;
+
+               ret = darray_push(&dup->ptrs, entry_ptr);
+               if (ret)
+                       goto out;
 
-               if (!entry_ptr.csum_good) {
-                       i = dup;
-                       goto found;
-               }
+               bch2_journal_replay_to_text(&buf, c, dup);
 
-               if (!dup->csum_good)
+               fsck_err_on(same_device,
+                           c, journal_entry_dup_same_device,
+                           "duplicate journal entry on same device\n  %s",
+                           buf.buf);
+
+               fsck_err_on(not_identical,
+                           c, journal_entry_replicas_data_mismatch,
+                           "found duplicate but non identical journal entries\n  %s",
+                           buf.buf);
+
+               if (entry_ptr.csum_good && !identical)
                        goto replace;
 
-               fsck_err(c, journal_entry_replicas_data_mismatch,
-                        "found duplicate but non identical journal entries (seq %llu)",
-                        le64_to_cpu(j->seq));
-               i = dup;
-               goto found;
+               goto out;
        }
 replace:
-       i = kvpmalloc(offsetof(struct journal_replay, j) + bytes, GFP_KERNEL);
+       i = kvmalloc(offsetof(struct journal_replay, j) + bytes, GFP_KERNEL);
        if (!i)
                return -BCH_ERR_ENOMEM_journal_entry_add;
 
-       i->nr_ptrs      = 0;
-       i->csum_good    = entry_ptr.csum_good;
-       i->ignore       = false;
+       darray_init(&i->ptrs);
+       i->csum_good            = entry_ptr.csum_good;
+       i->ignore_blacklisted   = false;
+       i->ignore_not_dirty     = false;
        unsafe_memcpy(&i->j, j, bytes, "embedded variable length struct");
-       i->ptrs[i->nr_ptrs++] = entry_ptr;
 
        if (dup) {
-               if (dup->nr_ptrs >= ARRAY_SIZE(dup->ptrs)) {
-                       bch_err(c, "found too many copies of journal entry %llu",
-                               le64_to_cpu(i->j.seq));
-                       dup->nr_ptrs = ARRAY_SIZE(dup->ptrs) - 1;
-               }
-
                /* The first ptr should represent the jset we kept: */
-               memcpy(i->ptrs + i->nr_ptrs,
-                      dup->ptrs,
-                      sizeof(dup->ptrs[0]) * dup->nr_ptrs);
-               i->nr_ptrs += dup->nr_ptrs;
+               darray_for_each(dup->ptrs, ptr)
+                       darray_push(&i->ptrs, *ptr);
                __journal_replay_free(c, dup);
+       } else {
+               darray_push(&i->ptrs, entry_ptr);
        }
 
        *_i = i;
-       return 0;
-found:
-       for (ptr = i->ptrs; ptr < i->ptrs + i->nr_ptrs; ptr++) {
-               if (ptr->dev == ca->dev_idx) {
-                       bch_err(c, "duplicate journal entry %llu on same device",
-                               le64_to_cpu(i->j.seq));
-                       goto out;
-               }
-       }
-
-       if (i->nr_ptrs >= ARRAY_SIZE(i->ptrs)) {
-               bch_err(c, "found too many copies of journal entry %llu",
-                       le64_to_cpu(i->j.seq));
-               goto out;
-       }
-
-       i->ptrs[i->nr_ptrs++] = entry_ptr;
 out:
 fsck_err:
+       printbuf_exit(&buf);
        return ret;
 }
 
@@ -374,7 +398,6 @@ static int journal_entry_btree_keys_validate(struct bch_fs *c,
 static void journal_entry_btree_keys_to_text(struct printbuf *out, struct bch_fs *c,
                                             struct jset_entry *entry)
 {
-       struct bkey_i *k;
        bool first = true;
 
        jset_entry_for_each_key(entry, k) {
@@ -741,6 +764,37 @@ static void journal_entry_write_buffer_keys_to_text(struct printbuf *out, struct
        journal_entry_btree_keys_to_text(out, c, entry);
 }
 
+static int journal_entry_datetime_validate(struct bch_fs *c,
+                               struct jset *jset,
+                               struct jset_entry *entry,
+                               unsigned version, int big_endian,
+                               enum bkey_invalid_flags flags)
+{
+       unsigned bytes = vstruct_bytes(entry);
+       unsigned expected = 16;
+       int ret = 0;
+
+       if (journal_entry_err_on(vstruct_bytes(entry) < expected,
+                                c, version, jset, entry,
+                                journal_entry_dev_usage_bad_size,
+                                "bad size (%u < %u)",
+                                bytes, expected)) {
+               journal_entry_null_range(entry, vstruct_next(entry));
+               return ret;
+       }
+fsck_err:
+       return ret;
+}
+
+static void journal_entry_datetime_to_text(struct printbuf *out, struct bch_fs *c,
+                                           struct jset_entry *entry)
+{
+       struct jset_entry_datetime *datetime =
+               container_of(entry, struct jset_entry_datetime, entry);
+
+       bch2_prt_datetime(out, le64_to_cpu(datetime->seconds));
+}
+
 struct jset_entry_ops {
        int (*validate)(struct bch_fs *, struct jset *,
                        struct jset_entry *, unsigned, int,
@@ -913,11 +967,11 @@ static int journal_read_buf_realloc(struct journal_read_buf *b,
                return -BCH_ERR_ENOMEM_journal_read_buf_realloc;
 
        new_size = roundup_pow_of_two(new_size);
-       n = kvpmalloc(new_size, GFP_KERNEL);
+       n = kvmalloc(new_size, GFP_KERNEL);
        if (!n)
                return -BCH_ERR_ENOMEM_journal_read_buf_realloc;
 
-       kvpfree(b->data, b->size);
+       kvfree(b->data);
        b->data = n;
        b->size = new_size;
        return 0;
@@ -1102,16 +1156,15 @@ static CLOSURE_CALLBACK(bch2_journal_read_device)
                if (!r)
                        continue;
 
-               for (i = 0; i < r->nr_ptrs; i++) {
-                       if (r->ptrs[i].dev == ca->dev_idx) {
-                               unsigned wrote = bucket_remainder(ca, r->ptrs[i].sector) +
+               darray_for_each(r->ptrs, i)
+                       if (i->dev == ca->dev_idx) {
+                               unsigned wrote = bucket_remainder(ca, i->sector) +
                                        vstruct_sectors(&r->j, c->block_bits);
 
-                               ja->cur_idx = r->ptrs[i].bucket;
+                               ja->cur_idx = i->bucket;
                                ja->sectors_free = ca->mi.bucket_size - wrote;
                                goto found;
                        }
-               }
        }
 found:
        mutex_unlock(&jlist->lock);
@@ -1144,7 +1197,7 @@ found:
                ja->dirty_idx = (ja->cur_idx + 1) % ja->nr;
 out:
        bch_verbose(c, "journal read done on device %s, ret %i", ca->name, ret);
-       kvpfree(buf.data, buf.size);
+       kvfree(buf.data);
        percpu_ref_put(&ca->io_ref);
        closure_return(cl);
        return;
@@ -1155,27 +1208,6 @@ err:
        goto out;
 }
 
-void bch2_journal_ptrs_to_text(struct printbuf *out, struct bch_fs *c,
-                              struct journal_replay *j)
-{
-       unsigned i;
-
-       for (i = 0; i < j->nr_ptrs; i++) {
-               struct bch_dev *ca = bch_dev_bkey_exists(c, j->ptrs[i].dev);
-               u64 offset;
-
-               div64_u64_rem(j->ptrs[i].sector, ca->mi.bucket_size, &offset);
-
-               if (i)
-                       prt_printf(out, " ");
-               prt_printf(out, "%u:%u:%u (sector %llu)",
-                      j->ptrs[i].dev,
-                      j->ptrs[i].bucket,
-                      j->ptrs[i].bucket_offset,
-                      j->ptrs[i].sector);
-       }
-}
-
 int bch2_journal_read(struct bch_fs *c,
                      u64 *last_seq,
                      u64 *blacklist_seq,
@@ -1228,20 +1260,20 @@ int bch2_journal_read(struct bch_fs *c,
 
                i = *_i;
 
-               if (!i || i->ignore)
+               if (journal_replay_ignore(i))
                        continue;
 
                if (!*start_seq)
                        *blacklist_seq = *start_seq = le64_to_cpu(i->j.seq) + 1;
 
                if (JSET_NO_FLUSH(&i->j)) {
-                       i->ignore = true;
+                       i->ignore_blacklisted = true;
                        continue;
                }
 
                if (!last_write_torn && !i->csum_good) {
                        last_write_torn = true;
-                       i->ignore = true;
+                       i->ignore_blacklisted = true;
                        continue;
                }
 
@@ -1280,12 +1312,12 @@ int bch2_journal_read(struct bch_fs *c,
        genradix_for_each(&c->journal_entries, radix_iter, _i) {
                i = *_i;
 
-               if (!i || i->ignore)
+               if (journal_replay_ignore(i))
                        continue;
 
                seq = le64_to_cpu(i->j.seq);
                if (seq < *last_seq) {
-                       journal_replay_free(c, i);
+                       journal_replay_free(c, i, false);
                        continue;
                }
 
@@ -1293,7 +1325,7 @@ int bch2_journal_read(struct bch_fs *c,
                        fsck_err_on(!JSET_NO_FLUSH(&i->j), c,
                                    jset_seq_blacklisted,
                                    "found blacklisted journal entry %llu", seq);
-                       i->ignore = true;
+                       i->ignore_blacklisted = true;
                }
        }
 
@@ -1302,7 +1334,7 @@ int bch2_journal_read(struct bch_fs *c,
        genradix_for_each(&c->journal_entries, radix_iter, _i) {
                i = *_i;
 
-               if (!i || i->ignore)
+               if (journal_replay_ignore(i))
                        continue;
 
                BUG_ON(seq > le64_to_cpu(i->j.seq));
@@ -1353,32 +1385,31 @@ int bch2_journal_read(struct bch_fs *c,
                        .e.data_type = BCH_DATA_journal,
                        .e.nr_required = 1,
                };
-               unsigned ptr;
 
                i = *_i;
-               if (!i || i->ignore)
+               if (journal_replay_ignore(i))
                        continue;
 
-               for (ptr = 0; ptr < i->nr_ptrs; ptr++) {
-                       struct bch_dev *ca = bch_dev_bkey_exists(c, i->ptrs[ptr].dev);
+               darray_for_each(i->ptrs, ptr) {
+                       struct bch_dev *ca = bch_dev_bkey_exists(c, ptr->dev);
 
-                       if (!i->ptrs[ptr].csum_good)
-                               bch_err_dev_offset(ca, i->ptrs[ptr].sector,
+                       if (!ptr->csum_good)
+                               bch_err_dev_offset(ca, ptr->sector,
                                                   "invalid journal checksum, seq %llu%s",
                                                   le64_to_cpu(i->j.seq),
                                                   i->csum_good ? " (had good copy on another device)" : "");
                }
 
                ret = jset_validate(c,
-                                   bch_dev_bkey_exists(c, i->ptrs[0].dev),
+                                   bch_dev_bkey_exists(c, i->ptrs.data[0].dev),
                                    &i->j,
-                                   i->ptrs[0].sector,
+                                   i->ptrs.data[0].sector,
                                    READ);
                if (ret)
                        goto err;
 
-               for (ptr = 0; ptr < i->nr_ptrs; ptr++)
-                       replicas.e.devs[replicas.e.nr_devs++] = i->ptrs[ptr].dev;
+               darray_for_each(i->ptrs, ptr)
+                       replicas.e.devs[replicas.e.nr_devs++] = ptr->dev;
 
                bch2_replicas_entry_sort(&replicas.e);
 
@@ -1547,7 +1578,7 @@ static void journal_buf_realloc(struct journal *j, struct journal_buf *buf)
        if (bch2_btree_write_buffer_resize(c, btree_write_buffer_size))
                return;
 
-       new_buf = kvpmalloc(new_size, GFP_NOFS|__GFP_NOWARN);
+       new_buf = kvmalloc(new_size, GFP_NOFS|__GFP_NOWARN);
        if (!new_buf)
                return;
 
@@ -1558,7 +1589,7 @@ static void journal_buf_realloc(struct journal *j, struct journal_buf *buf)
        swap(buf->buf_size,     new_size);
        spin_unlock(&j->lock);
 
-       kvpfree(new_buf, new_size);
+       kvfree(new_buf);
 }
 
 static inline struct journal_buf *journal_last_unwritten_buf(struct journal *j)
@@ -1568,12 +1599,12 @@ static inline struct journal_buf *journal_last_unwritten_buf(struct journal *j)
 
 static CLOSURE_CALLBACK(journal_write_done)
 {
-       closure_type(j, struct journal, io);
+       closure_type(w, struct journal_buf, io);
+       struct journal *j = container_of(w, struct journal, buf[w->idx]);
        struct bch_fs *c = container_of(j, struct bch_fs, journal);
-       struct journal_buf *w = journal_last_unwritten_buf(j);
        struct bch_replicas_padded replicas;
        union journal_res_state old, new;
-       u64 v, seq;
+       u64 v, seq = le64_to_cpu(w->data->seq);
        int err = 0;
 
        bch2_time_stats_update(!JSET_NO_FLUSH(w->data)
@@ -1593,63 +1624,68 @@ static CLOSURE_CALLBACK(journal_write_done)
        if (err)
                bch2_fatal_error(c);
 
-       spin_lock(&j->lock);
-       seq = le64_to_cpu(w->data->seq);
+       closure_debug_destroy(cl);
 
+       spin_lock(&j->lock);
        if (seq >= j->pin.front)
                journal_seq_pin(j, seq)->devs = w->devs_written;
+       if (err && (!j->err_seq || seq < j->err_seq))
+               j->err_seq      = seq;
+       w->write_done = true;
 
-       if (!err) {
-               if (!JSET_NO_FLUSH(w->data)) {
+       bool completed = false;
+
+       for (seq = journal_last_unwritten_seq(j);
+            seq <= journal_cur_seq(j);
+            seq++) {
+               w = j->buf + (seq & JOURNAL_BUF_MASK);
+               if (!w->write_done)
+                       break;
+
+               if (!j->err_seq && !JSET_NO_FLUSH(w->data)) {
                        j->flushed_seq_ondisk = seq;
                        j->last_seq_ondisk = w->last_seq;
 
                        bch2_do_discards(c);
                        closure_wake_up(&c->freelist_wait);
-
                        bch2_reset_alloc_cursors(c);
                }
-       } else if (!j->err_seq || seq < j->err_seq)
-               j->err_seq      = seq;
 
-       j->seq_ondisk           = seq;
+               j->seq_ondisk = seq;
 
-       /*
-        * Updating last_seq_ondisk may let bch2_journal_reclaim_work() discard
-        * more buckets:
-        *
-        * Must come before signaling write completion, for
-        * bch2_fs_journal_stop():
-        */
-       if (j->watermark != BCH_WATERMARK_stripe)
-               journal_reclaim_kick(&c->journal);
+               /*
+                * Updating last_seq_ondisk may let bch2_journal_reclaim_work() discard
+                * more buckets:
+                *
+                * Must come before signaling write completion, for
+                * bch2_fs_journal_stop():
+                */
+               if (j->watermark != BCH_WATERMARK_stripe)
+                       journal_reclaim_kick(&c->journal);
 
-       /* also must come before signalling write completion: */
-       closure_debug_destroy(cl);
+               v = atomic64_read(&j->reservations.counter);
+               do {
+                       old.v = new.v = v;
+                       BUG_ON(journal_state_count(new, new.unwritten_idx));
+                       BUG_ON(new.unwritten_idx != (seq & JOURNAL_BUF_MASK));
 
-       v = atomic64_read(&j->reservations.counter);
-       do {
-               old.v = new.v = v;
-               BUG_ON(journal_state_count(new, new.unwritten_idx));
+                       new.unwritten_idx++;
+               } while ((v = atomic64_cmpxchg(&j->reservations.counter, old.v, new.v)) != old.v);
 
-               new.unwritten_idx++;
-       } while ((v = atomic64_cmpxchg(&j->reservations.counter,
-                                      old.v, new.v)) != old.v);
+               closure_wake_up(&w->wait);
+               completed = true;
+       }
 
-       bch2_journal_reclaim_fast(j);
-       bch2_journal_space_available(j);
+       if (completed) {
+               bch2_journal_reclaim_fast(j);
+               bch2_journal_space_available(j);
 
-       track_event_change(&c->times[BCH_TIME_blocked_journal_max_in_flight],
-                          &j->max_in_flight_start, false);
+               track_event_change(&c->times[BCH_TIME_blocked_journal_max_in_flight], false);
 
-       closure_wake_up(&w->wait);
-       journal_wake(j);
+               journal_wake(j);
+       }
 
-       if (!journal_state_count(new, new.unwritten_idx) &&
-           journal_last_unwritten_seq(j) <= journal_cur_seq(j)) {
-               spin_unlock(&j->lock);
-               closure_call(&j->io, bch2_journal_write, c->io_complete_wq, NULL);
-       } else if (journal_last_unwritten_seq(j) == journal_cur_seq(j) &&
+       if (journal_last_unwritten_seq(j) == journal_cur_seq(j) &&
                   new.cur_entry_offset < JOURNAL_ENTRY_CLOSED_VAL) {
                struct journal_buf *buf = journal_cur_buf(j);
                long delta = buf->expires - jiffies;
@@ -1659,46 +1695,46 @@ static CLOSURE_CALLBACK(journal_write_done)
                 * previous entries still in flight - the current journal entry
                 * might want to be written now:
                 */
-
-               spin_unlock(&j->lock);
-               mod_delayed_work(c->io_complete_wq, &j->write_work, max(0L, delta));
-       } else {
-               spin_unlock(&j->lock);
+               mod_delayed_work(j->wq, &j->write_work, max(0L, delta));
        }
+
+       spin_unlock(&j->lock);
 }
 
 static void journal_write_endio(struct bio *bio)
 {
-       struct bch_dev *ca = bio->bi_private;
+       struct journal_bio *jbio = container_of(bio, struct journal_bio, bio);
+       struct bch_dev *ca = jbio->ca;
        struct journal *j = &ca->fs->journal;
-       struct journal_buf *w = journal_last_unwritten_buf(j);
-       unsigned long flags;
+       struct journal_buf *w = j->buf + jbio->buf_idx;
 
        if (bch2_dev_io_err_on(bio->bi_status, ca, BCH_MEMBER_ERROR_write,
                               "error writing journal entry %llu: %s",
                               le64_to_cpu(w->data->seq),
                               bch2_blk_status_to_str(bio->bi_status)) ||
            bch2_meta_write_fault("journal")) {
+               unsigned long flags;
+
                spin_lock_irqsave(&j->err_lock, flags);
                bch2_dev_list_drop_dev(&w->devs_written, ca->dev_idx);
                spin_unlock_irqrestore(&j->err_lock, flags);
        }
 
-       closure_put(&j->io);
+       closure_put(&w->io);
        percpu_ref_put(&ca->io_ref);
 }
 
 static CLOSURE_CALLBACK(do_journal_write)
 {
-       closure_type(j, struct journal, io);
+       closure_type(w, struct journal_buf, io);
+       struct journal *j = container_of(w, struct journal, buf[w->idx]);
        struct bch_fs *c = container_of(j, struct bch_fs, journal);
-       struct bch_dev *ca;
-       struct journal_buf *w = journal_last_unwritten_buf(j);
-       struct bio *bio;
        unsigned sectors = vstruct_sectors(w->data, c->block_bits);
 
        extent_for_each_ptr(bkey_i_to_s_extent(&w->key), ptr) {
-               ca = bch_dev_bkey_exists(c, ptr->dev);
+               struct bch_dev *ca = bch_dev_bkey_exists(c, ptr->dev);
+               struct journal_device *ja = &ca->journal;
+
                if (!percpu_ref_tryget(&ca->io_ref)) {
                        /* XXX: fix this */
                        bch_err(c, "missing device for journal write\n");
@@ -1708,7 +1744,7 @@ static CLOSURE_CALLBACK(do_journal_write)
                this_cpu_add(ca->io_done->sectors[WRITE][BCH_DATA_journal],
                             sectors);
 
-               bio = ca->journal.bio;
+               struct bio *bio = &ja->bio[w->idx]->bio;
                bio_reset(bio, ca->disk_sb.bdev, REQ_OP_WRITE|REQ_SYNC|REQ_META);
                bio->bi_iter.bi_sector  = ptr->offset;
                bio->bi_end_io          = journal_write_endio;
@@ -1727,11 +1763,10 @@ static CLOSURE_CALLBACK(do_journal_write)
                trace_and_count(c, journal_write, bio);
                closure_bio_submit(bio, cl);
 
-               ca->journal.bucket_seq[ca->journal.cur_idx] =
-                       le64_to_cpu(w->data->seq);
+               ja->bucket_seq[ja->cur_idx] = le64_to_cpu(w->data->seq);
        }
 
-       continue_at(cl, journal_write_done, c->io_complete_wq);
+       continue_at(cl, journal_write_done, j->wq);
 }
 
 static int bch2_journal_write_prep(struct journal *j, struct journal_buf *w)
@@ -1782,7 +1817,6 @@ static int bch2_journal_write_prep(struct journal *j, struct journal_buf *w)
                        if (!wb.wb)
                                bch2_journal_keys_to_write_buffer_start(c, &wb, seq);
 
-                       struct bkey_i *k;
                        jset_entry_for_each_key(i, k) {
                                ret = bch2_journal_key_to_wb(c, &wb, i->btree_id, k);
                                if (ret) {
@@ -1798,12 +1832,20 @@ static int bch2_journal_write_prep(struct journal *j, struct journal_buf *w)
 
        if (wb.wb)
                bch2_journal_keys_to_write_buffer_end(c, &wb);
+
+       spin_lock(&c->journal.lock);
        w->need_flush_to_write_buffer = false;
+       spin_unlock(&c->journal.lock);
 
        start = end = vstruct_last(jset);
 
        end     = bch2_btree_roots_to_journal_entries(c, end, btree_roots_have);
 
+       struct jset_entry_datetime *d =
+               container_of(jset_entry_init(&end, sizeof(*d)), struct jset_entry_datetime, entry);
+       d->entry.type   = BCH_JSET_ENTRY_datetime;
+       d->seconds      = cpu_to_le64(ktime_get_real_seconds());
+
        bch2_journal_super_entries_add_common(c, &end, seq);
        u64s    = (u64 *) end - (u64 *) start;
        BUG_ON(u64s > j->entry_u64s_reserved);
@@ -1893,6 +1935,7 @@ static int bch2_journal_write_pick_flush(struct journal *j, struct journal_buf *
 
                j->nr_noflush_writes++;
        } else {
+               w->must_flush = true;
                j->last_flush_write = jiffies;
                j->nr_flush_writes++;
                clear_bit(JOURNAL_NEED_FLUSH_WRITE, &j->flags);
@@ -1903,20 +1946,28 @@ static int bch2_journal_write_pick_flush(struct journal *j, struct journal_buf *
 
 CLOSURE_CALLBACK(bch2_journal_write)
 {
-       closure_type(j, struct journal, io);
+       closure_type(w, struct journal_buf, io);
+       struct journal *j = container_of(w, struct journal, buf[w->idx]);
        struct bch_fs *c = container_of(j, struct bch_fs, journal);
-       struct journal_buf *w = journal_last_unwritten_buf(j);
        struct bch_replicas_padded replicas;
-       struct bio *bio;
        struct printbuf journal_debug_buf = PRINTBUF;
        unsigned nr_rw_members = 0;
        int ret;
 
+       for_each_rw_member(c, ca)
+               nr_rw_members++;
+
        BUG_ON(BCH_SB_CLEAN(c->disk_sb.sb));
+       BUG_ON(!w->write_started);
+       BUG_ON(w->write_allocated);
+       BUG_ON(w->write_done);
 
        j->write_start_time = local_clock();
 
        spin_lock(&j->lock);
+       if (nr_rw_members > 1)
+               w->separate_flush = true;
+
        ret = bch2_journal_write_pick_flush(j, w);
        spin_unlock(&j->lock);
        if (ret)
@@ -1956,12 +2007,14 @@ CLOSURE_CALLBACK(bch2_journal_write)
         * bch2_journal_space_available():
         */
        w->sectors = 0;
+       w->write_allocated = true;
 
        /*
         * journal entry has been compacted and allocated, recalculate space
         * available:
         */
        bch2_journal_space_available(j);
+       bch2_journal_do_writes(j);
        spin_unlock(&j->lock);
 
        w->devs_written = bch2_bkey_devs(bkey_i_to_s_c(&w->key));
@@ -1969,12 +2022,6 @@ CLOSURE_CALLBACK(bch2_journal_write)
        if (c->opts.nochanges)
                goto no_io;
 
-       for_each_rw_member(c, ca)
-               nr_rw_members++;
-
-       if (nr_rw_members > 1)
-               w->separate_flush = true;
-
        /*
         * Mark journal replicas before we submit the write to guarantee
         * recovery will find the journal entries after a crash.
@@ -1985,25 +2032,29 @@ CLOSURE_CALLBACK(bch2_journal_write)
        if (ret)
                goto err;
 
+       if (!JSET_NO_FLUSH(w->data))
+               closure_wait_event(&j->async_wait, j->seq_ondisk + 1 == le64_to_cpu(w->data->seq));
+
        if (!JSET_NO_FLUSH(w->data) && w->separate_flush) {
                for_each_rw_member(c, ca) {
                        percpu_ref_get(&ca->io_ref);
 
-                       bio = ca->journal.bio;
+                       struct journal_device *ja = &ca->journal;
+                       struct bio *bio = &ja->bio[w->idx]->bio;
                        bio_reset(bio, ca->disk_sb.bdev,
-                                 REQ_OP_WRITE|REQ_PREFLUSH);
+                                 REQ_OP_WRITE|REQ_SYNC|REQ_META|REQ_PREFLUSH);
                        bio->bi_end_io          = journal_write_endio;
                        bio->bi_private         = ca;
                        closure_bio_submit(bio, cl);
                }
        }
 
-       continue_at(cl, do_journal_write, c->io_complete_wq);
+       continue_at(cl, do_journal_write, j->wq);
        return;
 no_io:
-       continue_at(cl, journal_write_done, c->io_complete_wq);
+       continue_at(cl, journal_write_done, j->wq);
        return;
 err:
        bch2_fatal_error(c);
-       continue_at(cl, journal_write_done, c->io_complete_wq);
+       continue_at(cl, journal_write_done, j->wq);
 }
index c035e7c108e19012e6e4e1f708136dec27b5387c..4f1e763ab506007c4488b81285b2a6464e5a2f2c 100644 (file)
@@ -2,26 +2,35 @@
 #ifndef _BCACHEFS_JOURNAL_IO_H
 #define _BCACHEFS_JOURNAL_IO_H
 
+#include "darray.h"
+
+struct journal_ptr {
+       bool            csum_good;
+       u8              dev;
+       u32             bucket;
+       u32             bucket_offset;
+       u64             sector;
+};
+
 /*
  * Only used for holding the journal entries we read in btree_journal_read()
  * during cache_registration
  */
 struct journal_replay {
-       struct journal_ptr {
-               bool            csum_good;
-               u8              dev;
-               u32             bucket;
-               u32             bucket_offset;
-               u64             sector;
-       }                       ptrs[BCH_REPLICAS_MAX];
-       unsigned                nr_ptrs;
+       DARRAY_PREALLOCATED(struct journal_ptr, 8) ptrs;
 
        bool                    csum_good;
-       bool                    ignore;
+       bool                    ignore_blacklisted;
+       bool                    ignore_not_dirty;
        /* must be last: */
        struct jset             j;
 };
 
+static inline bool journal_replay_ignore(struct journal_replay *i)
+{
+       return !i || i->ignore_blacklisted || i->ignore_not_dirty;
+}
+
 static inline struct jset_entry *__jset_entry_type_next(struct jset *jset,
                                        struct jset_entry *entry, unsigned type)
 {
@@ -36,12 +45,12 @@ static inline struct jset_entry *__jset_entry_type_next(struct jset *jset,
 }
 
 #define for_each_jset_entry_type(entry, jset, type)                    \
-       for (entry = (jset)->start;                                     \
+       for (struct jset_entry *entry = (jset)->start;                  \
             (entry = __jset_entry_type_next(jset, entry, type));       \
             entry = vstruct_next(entry))
 
 #define jset_entry_for_each_key(_e, _k)                                        \
-       for (_k = (_e)->start;                                          \
+       for (struct bkey_i *_k = (_e)->start;                           \
             _k < vstruct_last(_e);                                     \
             _k = bkey_next(_k))
 
@@ -62,4 +71,20 @@ int bch2_journal_read(struct bch_fs *, u64 *, u64 *, u64 *);
 
 CLOSURE_CALLBACK(bch2_journal_write);
 
+static inline struct jset_entry *jset_entry_init(struct jset_entry **end, size_t size)
+{
+       struct jset_entry *entry = *end;
+       unsigned u64s = DIV_ROUND_UP(size, sizeof(u64));
+
+       memset(entry, 0, u64s * sizeof(u64));
+       /*
+        * The u64s field counts from the start of data, ignoring the shared
+        * fields.
+        */
+       entry->u64s = cpu_to_le16(u64s - 1);
+
+       *end = vstruct_next(*end);
+       return entry;
+}
+
 #endif /* _BCACHEFS_JOURNAL_IO_H */
index c33dca641575dffc58b6db8354e71c879ed5cf26..ab811c0dad26accfb4924eaef4cccb3ab957087c 100644 (file)
@@ -62,12 +62,9 @@ void bch2_journal_set_watermark(struct journal *j)
                ? BCH_WATERMARK_reclaim
                : BCH_WATERMARK_stripe;
 
-       if (track_event_change(&c->times[BCH_TIME_blocked_journal_low_on_space],
-                              &j->low_on_space_start, low_on_space) ||
-           track_event_change(&c->times[BCH_TIME_blocked_journal_low_on_pin],
-                              &j->low_on_pin_start, low_on_pin) ||
-           track_event_change(&c->times[BCH_TIME_blocked_write_buffer_full],
-                              &j->write_buffer_full_start, low_on_wb))
+       if (track_event_change(&c->times[BCH_TIME_blocked_journal_low_on_space], low_on_space) ||
+           track_event_change(&c->times[BCH_TIME_blocked_journal_low_on_pin], low_on_pin) ||
+           track_event_change(&c->times[BCH_TIME_blocked_write_buffer_full], low_on_wb))
                trace_and_count(c, journal_full, c);
 
        swap(watermark, j->watermark);
@@ -394,8 +391,6 @@ void bch2_journal_pin_copy(struct journal *j,
                           struct journal_entry_pin *src,
                           journal_pin_flush_fn flush_fn)
 {
-       bool reclaim;
-
        spin_lock(&j->lock);
 
        u64 seq = READ_ONCE(src->seq);
@@ -411,44 +406,44 @@ void bch2_journal_pin_copy(struct journal *j,
                return;
        }
 
-       reclaim = __journal_pin_drop(j, dst);
+       bool reclaim = __journal_pin_drop(j, dst);
 
        bch2_journal_pin_set_locked(j, seq, dst, flush_fn, journal_pin_type(flush_fn));
 
        if (reclaim)
                bch2_journal_reclaim_fast(j);
-       spin_unlock(&j->lock);
 
        /*
         * If the journal is currently full,  we might want to call flush_fn
         * immediately:
         */
-       journal_wake(j);
+       if (seq == journal_last_seq(j))
+               journal_wake(j);
+       spin_unlock(&j->lock);
 }
 
 void bch2_journal_pin_set(struct journal *j, u64 seq,
                          struct journal_entry_pin *pin,
                          journal_pin_flush_fn flush_fn)
 {
-       bool reclaim;
-
        spin_lock(&j->lock);
 
        BUG_ON(seq < journal_last_seq(j));
 
-       reclaim = __journal_pin_drop(j, pin);
+       bool reclaim = __journal_pin_drop(j, pin);
 
        bch2_journal_pin_set_locked(j, seq, pin, flush_fn, journal_pin_type(flush_fn));
 
        if (reclaim)
                bch2_journal_reclaim_fast(j);
-       spin_unlock(&j->lock);
-
        /*
         * If the journal is currently full,  we might want to call flush_fn
         * immediately:
         */
-       journal_wake(j);
+       if (seq == journal_last_seq(j))
+               journal_wake(j);
+
+       spin_unlock(&j->lock);
 }
 
 /**
index 0200e299cfbb9c210d144bb056f1e85a910fe70f..b5303874fc35b33e5e6ac3878a03af8ab1a882be 100644 (file)
@@ -43,61 +43,36 @@ static unsigned sb_blacklist_u64s(unsigned nr)
        return (sizeof(*bl) + sizeof(bl->start[0]) * nr) / sizeof(u64);
 }
 
-static struct bch_sb_field_journal_seq_blacklist *
-blacklist_entry_try_merge(struct bch_fs *c,
-                         struct bch_sb_field_journal_seq_blacklist *bl,
-                         unsigned i)
-{
-       unsigned nr = blacklist_nr_entries(bl);
-
-       if (le64_to_cpu(bl->start[i].end) >=
-           le64_to_cpu(bl->start[i + 1].start)) {
-               bl->start[i].end = bl->start[i + 1].end;
-               --nr;
-               memmove(&bl->start[i],
-                       &bl->start[i + 1],
-                       sizeof(bl->start[0]) * (nr - i));
-
-               bl = bch2_sb_field_resize(&c->disk_sb, journal_seq_blacklist,
-                                         sb_blacklist_u64s(nr));
-               BUG_ON(!bl);
-       }
-
-       return bl;
-}
-
-static bool bl_entry_contig_or_overlaps(struct journal_seq_blacklist_entry *e,
-                                       u64 start, u64 end)
-{
-       return !(end < le64_to_cpu(e->start) || le64_to_cpu(e->end) < start);
-}
-
 int bch2_journal_seq_blacklist_add(struct bch_fs *c, u64 start, u64 end)
 {
        struct bch_sb_field_journal_seq_blacklist *bl;
-       unsigned i, nr;
+       unsigned i = 0, nr;
        int ret = 0;
 
        mutex_lock(&c->sb_lock);
        bl = bch2_sb_field_get(c->disk_sb.sb, journal_seq_blacklist);
        nr = blacklist_nr_entries(bl);
 
-       for (i = 0; i < nr; i++) {
+       while (i < nr) {
                struct journal_seq_blacklist_entry *e =
                        bl->start + i;
 
-               if (bl_entry_contig_or_overlaps(e, start, end)) {
-                       e->start = cpu_to_le64(min(start, le64_to_cpu(e->start)));
-                       e->end  = cpu_to_le64(max(end, le64_to_cpu(e->end)));
-
-                       if (i + 1 < nr)
-                               bl = blacklist_entry_try_merge(c,
-                                                       bl, i);
-                       if (i)
-                               bl = blacklist_entry_try_merge(c,
-                                                       bl, i - 1);
-                       goto out_write_sb;
+               if (end < le64_to_cpu(e->start))
+                       break;
+
+               if (start > le64_to_cpu(e->end)) {
+                       i++;
+                       continue;
                }
+
+               /*
+                * Entry is contiguous or overlapping with new entry: merge it
+                * with new entry, and delete:
+                */
+
+               start   = min(start,    le64_to_cpu(e->start));
+               end     = max(end,      le64_to_cpu(e->end));
+               array_remove_item(bl->start, nr, i);
        }
 
        bl = bch2_sb_field_resize(&c->disk_sb, journal_seq_blacklist,
@@ -107,9 +82,10 @@ int bch2_journal_seq_blacklist_add(struct bch_fs *c, u64 start, u64 end)
                goto out;
        }
 
-       bl->start[nr].start     = cpu_to_le64(start);
-       bl->start[nr].end       = cpu_to_le64(end);
-out_write_sb:
+       array_insert_item(bl->start, nr, i, ((struct journal_seq_blacklist_entry) {
+               .start  = cpu_to_le64(start),
+               .end    = cpu_to_le64(end),
+       }));
        c->disk_sb.sb->features[0] |= cpu_to_le64(1ULL << BCH_FEATURE_journal_seq_blacklist_v3);
 
        ret = bch2_write_super(c);
@@ -165,8 +141,7 @@ int bch2_blacklist_table_initialize(struct bch_fs *c)
        if (!bl)
                return 0;
 
-       t = kzalloc(sizeof(*t) + sizeof(t->entries[0]) * nr,
-                   GFP_KERNEL);
+       t = kzalloc(struct_size(t, entries, nr), GFP_KERNEL);
        if (!t)
                return -BCH_ERR_ENOMEM_blacklist_table_init;
 
index 38817c7a0851592c67c591f9a7a425d58152a004..8c053cb64ca5ee25b9a5b2613f2fcd9e03d517d3 100644 (file)
@@ -18,6 +18,7 @@
  * the journal that are being staged or in flight.
  */
 struct journal_buf {
+       struct closure          io;
        struct jset             *data;
 
        __BKEY_PADDED(key, BCH_REPLICAS_MAX);
@@ -33,10 +34,14 @@ struct journal_buf {
        unsigned                disk_sectors;   /* maximum size entry could have been, if
                                                   buf_size was bigger */
        unsigned                u64s_reserved;
-       bool                    noflush;        /* write has already been kicked off, and was noflush */
-       bool                    must_flush;     /* something wants a flush */
-       bool                    separate_flush;
-       bool                    need_flush_to_write_buffer;
+       bool                    noflush:1;      /* write has already been kicked off, and was noflush */
+       bool                    must_flush:1;   /* something wants a flush */
+       bool                    separate_flush:1;
+       bool                    need_flush_to_write_buffer:1;
+       bool                    write_started:1;
+       bool                    write_allocated:1;
+       bool                    write_done:1;
+       u8                      idx;
 };
 
 /*
@@ -134,6 +139,7 @@ enum journal_flags {
 /* Reasons we may fail to get a journal reservation: */
 #define JOURNAL_ERRORS()               \
        x(ok)                           \
+       x(retry)                        \
        x(blocked)                      \
        x(max_in_flight)                \
        x(journal_full)                 \
@@ -149,6 +155,13 @@ enum journal_errors {
 
 typedef DARRAY(u64)            darray_u64;
 
+struct journal_bio {
+       struct bch_dev          *ca;
+       unsigned                buf_idx;
+
+       struct bio              bio;
+};
+
 /* Embedded in struct bch_fs */
 struct journal {
        /* Fastpath stuff up front: */
@@ -203,8 +216,8 @@ struct journal {
        wait_queue_head_t       wait;
        struct closure_waitlist async_wait;
 
-       struct closure          io;
        struct delayed_work     write_work;
+       struct workqueue_struct *wq;
 
        /* Sequence number of most recent journal entry (last entry in @pin) */
        atomic64_t              seq;
@@ -274,11 +287,6 @@ struct journal {
        u64                     nr_noflush_writes;
        u64                     entry_bytes_written;
 
-       u64                     low_on_space_start;
-       u64                     low_on_pin_start;
-       u64                     max_in_flight_start;
-       u64                     write_buffer_full_start;
-
        struct bch2_time_stats  *flush_write_time;
        struct bch2_time_stats  *noflush_write_time;
        struct bch2_time_stats  *flush_seq_time;
@@ -313,7 +321,7 @@ struct journal_device {
        u64                     *buckets;
 
        /* Bio for journal reads/writes to this device */
-       struct bio              *bio;
+       struct journal_bio      *bio[JOURNAL_BUF_NR];
 
        /* for bch_journal_read_device */
        struct closure          read;
index 7a4ca5a28b3eac83ead3d5e585e8886db5c456c9..26569043e36803ca290342063f6435f7c71d7b87 100644 (file)
@@ -44,8 +44,8 @@ static int __bch2_lru_set(struct btree_trans *trans, u16 lru_id,
                          u64 dev_bucket, u64 time, bool set)
 {
        return time
-               ? bch2_btree_bit_mod(trans, BTREE_ID_lru,
-                                    lru_pos(lru_id, dev_bucket, time), set)
+               ? bch2_btree_bit_mod_buffered(trans, BTREE_ID_lru,
+                                             lru_pos(lru_id, dev_bucket, time), set)
                : 0;
 }
 
@@ -125,8 +125,7 @@ static int bch2_check_lru_key(struct btree_trans *trans,
                        goto out;
                }
 
-               if (c->opts.reconstruct_alloc ||
-                   fsck_err(c, lru_entry_bad,
+               if (fsck_err(c, lru_entry_bad,
                             "incorrect lru entry: lru %s time %llu\n"
                             "  %s\n"
                             "  for %s",
index bf0ef668fd38324132b737e648e3ffcb143bbe92..0ea9f30803a2b3da1372fd1d282dbb56f25d3920 100644 (file)
@@ -103,14 +103,17 @@ EXPORT_SYMBOL_GPL(mean_and_variance_get_stddev);
  * mean_and_variance_weighted_update() - exponentially weighted variant of mean_and_variance_update()
  * @s: mean and variance number of samples and their sums
  * @x: new value to include in the &mean_and_variance_weighted
+ * @initted: caller must track whether this is the first use or not
+ * @weight: ewma weight
  *
  * see linked pdf: function derived from equations 140-143 where alpha = 2^w.
  * values are stored bitshifted for performance and added precision.
  */
-void mean_and_variance_weighted_update(struct mean_and_variance_weighted *s, s64 x)
+void mean_and_variance_weighted_update(struct mean_and_variance_weighted *s,
+               s64 x, bool initted, u8 weight)
 {
        // previous weighted variance.
-       u8 w            = s->weight;
+       u8 w            = weight;
        u64 var_w0      = s->variance;
        // new value weighted.
        s64 x_w         = x << w;
@@ -119,45 +122,50 @@ void mean_and_variance_weighted_update(struct mean_and_variance_weighted *s, s64
        // new mean weighted.
        s64 u_w1        = s->mean + diff;
 
-       if (!s->init) {
+       if (!initted) {
                s->mean = x_w;
                s->variance = 0;
        } else {
                s->mean = u_w1;
                s->variance = ((var_w0 << w) - var_w0 + ((diff_w * (x_w - u_w1)) >> w)) >> w;
        }
-       s->init = true;
 }
 EXPORT_SYMBOL_GPL(mean_and_variance_weighted_update);
 
 /**
  * mean_and_variance_weighted_get_mean() - get mean from @s
  * @s: mean and variance number of samples and their sums
+ * @weight: ewma weight
  */
-s64 mean_and_variance_weighted_get_mean(struct mean_and_variance_weighted s)
+s64 mean_and_variance_weighted_get_mean(struct mean_and_variance_weighted s,
+               u8 weight)
 {
-       return fast_divpow2(s.mean, s.weight);
+       return fast_divpow2(s.mean, weight);
 }
 EXPORT_SYMBOL_GPL(mean_and_variance_weighted_get_mean);
 
 /**
  * mean_and_variance_weighted_get_variance() -- get variance from @s
  * @s: mean and variance number of samples and their sums
+ * @weight: ewma weight
  */
-u64 mean_and_variance_weighted_get_variance(struct mean_and_variance_weighted s)
+u64 mean_and_variance_weighted_get_variance(struct mean_and_variance_weighted s,
+               u8 weight)
 {
        // always positive don't need fast divpow2
-       return s.variance >> s.weight;
+       return s.variance >> weight;
 }
 EXPORT_SYMBOL_GPL(mean_and_variance_weighted_get_variance);
 
 /**
  * mean_and_variance_weighted_get_stddev() - get standard deviation from @s
  * @s: mean and variance number of samples and their sums
+ * @weight: ewma weight
  */
-u32 mean_and_variance_weighted_get_stddev(struct mean_and_variance_weighted s)
+u32 mean_and_variance_weighted_get_stddev(struct mean_and_variance_weighted s,
+               u8 weight)
 {
-       return int_sqrt64(mean_and_variance_weighted_get_variance(s));
+       return int_sqrt64(mean_and_variance_weighted_get_variance(s, weight));
 }
 EXPORT_SYMBOL_GPL(mean_and_variance_weighted_get_stddev);
 
index 64df11ab422bf455560bad095973cc6e5a296697..4fcf062dd22c71efd338a9ab37a8e1af5ca22ce5 100644 (file)
@@ -154,8 +154,6 @@ struct mean_and_variance {
 
 /* expontentially weighted variant */
 struct mean_and_variance_weighted {
-       bool    init;
-       u8      weight; /* base 2 logarithim */
        s64     mean;
        u64     variance;
 };
@@ -192,10 +190,14 @@ s64 mean_and_variance_get_mean(struct mean_and_variance s);
 u64 mean_and_variance_get_variance(struct mean_and_variance s1);
 u32 mean_and_variance_get_stddev(struct mean_and_variance s);
 
-void mean_and_variance_weighted_update(struct mean_and_variance_weighted *s, s64 v);
+void mean_and_variance_weighted_update(struct mean_and_variance_weighted *s,
+               s64 v, bool initted, u8 weight);
 
-s64 mean_and_variance_weighted_get_mean(struct mean_and_variance_weighted s);
-u64 mean_and_variance_weighted_get_variance(struct mean_and_variance_weighted s);
-u32 mean_and_variance_weighted_get_stddev(struct mean_and_variance_weighted s);
+s64 mean_and_variance_weighted_get_mean(struct mean_and_variance_weighted s,
+               u8 weight);
+u64 mean_and_variance_weighted_get_variance(struct mean_and_variance_weighted s,
+               u8 weight);
+u32 mean_and_variance_weighted_get_stddev(struct mean_and_variance_weighted s,
+               u8 weight);
 
 #endif // MEAN_AND_VAIRANCE_H_
index 019583c3ca0eaba79f932223eedfe5a386ffdfa9..db63b3f3b338ad6405ceb34c4526a52765cca7af 100644 (file)
@@ -31,53 +31,59 @@ static void mean_and_variance_basic_test(struct kunit *test)
 
 static void mean_and_variance_weighted_test(struct kunit *test)
 {
-       struct mean_and_variance_weighted s = { .weight = 2 };
+       struct mean_and_variance_weighted s = { };
 
-       mean_and_variance_weighted_update(&s, 10);
-       KUNIT_EXPECT_EQ(test, mean_and_variance_weighted_get_mean(s), 10);
-       KUNIT_EXPECT_EQ(test, mean_and_variance_weighted_get_variance(s), 0);
+       mean_and_variance_weighted_update(&s, 10, false, 2);
+       KUNIT_EXPECT_EQ(test, mean_and_variance_weighted_get_mean(s, 2), 10);
+       KUNIT_EXPECT_EQ(test, mean_and_variance_weighted_get_variance(s, 2), 0);
 
-       mean_and_variance_weighted_update(&s, 20);
-       KUNIT_EXPECT_EQ(test, mean_and_variance_weighted_get_mean(s), 12);
-       KUNIT_EXPECT_EQ(test, mean_and_variance_weighted_get_variance(s), 18);
+       mean_and_variance_weighted_update(&s, 20, true, 2);
+       KUNIT_EXPECT_EQ(test, mean_and_variance_weighted_get_mean(s, 2), 12);
+       KUNIT_EXPECT_EQ(test, mean_and_variance_weighted_get_variance(s, 2), 18);
 
-       mean_and_variance_weighted_update(&s, 30);
-       KUNIT_EXPECT_EQ(test, mean_and_variance_weighted_get_mean(s), 16);
-       KUNIT_EXPECT_EQ(test, mean_and_variance_weighted_get_variance(s), 72);
+       mean_and_variance_weighted_update(&s, 30, true, 2);
+       KUNIT_EXPECT_EQ(test, mean_and_variance_weighted_get_mean(s, 2), 16);
+       KUNIT_EXPECT_EQ(test, mean_and_variance_weighted_get_variance(s, 2), 72);
 
-       s = (struct mean_and_variance_weighted) { .weight = 2 };
+       s = (struct mean_and_variance_weighted) { };
 
-       mean_and_variance_weighted_update(&s, -10);
-       KUNIT_EXPECT_EQ(test, mean_and_variance_weighted_get_mean(s), -10);
-       KUNIT_EXPECT_EQ(test, mean_and_variance_weighted_get_variance(s), 0);
+       mean_and_variance_weighted_update(&s, -10, false, 2);
+       KUNIT_EXPECT_EQ(test, mean_and_variance_weighted_get_mean(s, 2), -10);
+       KUNIT_EXPECT_EQ(test, mean_and_variance_weighted_get_variance(s, 2), 0);
 
-       mean_and_variance_weighted_update(&s, -20);
-       KUNIT_EXPECT_EQ(test, mean_and_variance_weighted_get_mean(s), -12);
-       KUNIT_EXPECT_EQ(test, mean_and_variance_weighted_get_variance(s), 18);
+       mean_and_variance_weighted_update(&s, -20, true, 2);
+       KUNIT_EXPECT_EQ(test, mean_and_variance_weighted_get_mean(s, 2), -12);
+       KUNIT_EXPECT_EQ(test, mean_and_variance_weighted_get_variance(s, 2), 18);
 
-       mean_and_variance_weighted_update(&s, -30);
-       KUNIT_EXPECT_EQ(test, mean_and_variance_weighted_get_mean(s), -16);
-       KUNIT_EXPECT_EQ(test, mean_and_variance_weighted_get_variance(s), 72);
+       mean_and_variance_weighted_update(&s, -30, true, 2);
+       KUNIT_EXPECT_EQ(test, mean_and_variance_weighted_get_mean(s, 2), -16);
+       KUNIT_EXPECT_EQ(test, mean_and_variance_weighted_get_variance(s, 2), 72);
 }
 
 static void mean_and_variance_weighted_advanced_test(struct kunit *test)
 {
-       struct mean_and_variance_weighted s = { .weight = 8 };
+       struct mean_and_variance_weighted s = { };
+       bool initted = false;
        s64 i;
 
-       for (i = 10; i <= 100; i += 10)
-               mean_and_variance_weighted_update(&s, i);
+       for (i = 10; i <= 100; i += 10) {
+               mean_and_variance_weighted_update(&s, i, initted, 8);
+               initted = true;
+       }
 
-       KUNIT_EXPECT_EQ(test, mean_and_variance_weighted_get_mean(s), 11);
-       KUNIT_EXPECT_EQ(test, mean_and_variance_weighted_get_variance(s), 107);
+       KUNIT_EXPECT_EQ(test, mean_and_variance_weighted_get_mean(s, 8), 11);
+       KUNIT_EXPECT_EQ(test, mean_and_variance_weighted_get_variance(s, 8), 107);
 
-       s = (struct mean_and_variance_weighted) { .weight = 8 };
+       s = (struct mean_and_variance_weighted) { };
+       initted = false;
 
-       for (i = -10; i >= -100; i -= 10)
-               mean_and_variance_weighted_update(&s, i);
+       for (i = -10; i >= -100; i -= 10) {
+               mean_and_variance_weighted_update(&s, i, initted, 8);
+               initted = true;
+       }
 
-       KUNIT_EXPECT_EQ(test, mean_and_variance_weighted_get_mean(s), -11);
-       KUNIT_EXPECT_EQ(test, mean_and_variance_weighted_get_variance(s), 107);
+       KUNIT_EXPECT_EQ(test, mean_and_variance_weighted_get_mean(s, 8), -11);
+       KUNIT_EXPECT_EQ(test, mean_and_variance_weighted_get_variance(s, 8), 107);
 }
 
 static void do_mean_and_variance_test(struct kunit *test,
@@ -92,26 +98,26 @@ static void do_mean_and_variance_test(struct kunit *test,
                                      s64 *weighted_stddev)
 {
        struct mean_and_variance mv = {};
-       struct mean_and_variance_weighted vw = { .weight = weight };
+       struct mean_and_variance_weighted vw = { };
 
        for (unsigned i = 0; i < initial_n; i++) {
                mean_and_variance_update(&mv, initial_value);
-               mean_and_variance_weighted_update(&vw, initial_value);
+               mean_and_variance_weighted_update(&vw, initial_value, false, weight);
 
                KUNIT_EXPECT_EQ(test, mean_and_variance_get_mean(mv),           initial_value);
                KUNIT_EXPECT_EQ(test, mean_and_variance_get_stddev(mv),         0);
-               KUNIT_EXPECT_EQ(test, mean_and_variance_weighted_get_mean(vw),  initial_value);
-               KUNIT_EXPECT_EQ(test, mean_and_variance_weighted_get_stddev(vw),0);
+               KUNIT_EXPECT_EQ(test, mean_and_variance_weighted_get_mean(vw, weight),  initial_value);
+               KUNIT_EXPECT_EQ(test, mean_and_variance_weighted_get_stddev(vw, weight),0);
        }
 
        for (unsigned i = 0; i < n; i++) {
                mean_and_variance_update(&mv, data[i]);
-               mean_and_variance_weighted_update(&vw, data[i]);
+               mean_and_variance_weighted_update(&vw, data[i], true, weight);
 
                KUNIT_EXPECT_EQ(test, mean_and_variance_get_mean(mv),           mean[i]);
                KUNIT_EXPECT_EQ(test, mean_and_variance_get_stddev(mv),         stddev[i]);
-               KUNIT_EXPECT_EQ(test, mean_and_variance_weighted_get_mean(vw),  weighted_mean[i]);
-               KUNIT_EXPECT_EQ(test, mean_and_variance_weighted_get_stddev(vw),weighted_stddev[i]);
+               KUNIT_EXPECT_EQ(test, mean_and_variance_weighted_get_mean(vw, weight),  weighted_mean[i]);
+               KUNIT_EXPECT_EQ(test, mean_and_variance_weighted_get_stddev(vw, weight),weighted_stddev[i]);
        }
 
        KUNIT_EXPECT_EQ(test, mv.n, initial_n + n);
index 5623cee3ef8693413ee51d7dd521c496e90f206c..69098eeb5d48e3a06236bb7ad8aead21fcc19679 100644 (file)
@@ -31,7 +31,7 @@ static int drop_dev_ptrs(struct bch_fs *c, struct bkey_s k,
        nr_good = bch2_bkey_durability(c, k.s_c);
        if ((!nr_good && !(flags & lost)) ||
            (nr_good < replicas && !(flags & degraded)))
-               return -EINVAL;
+               return -BCH_ERR_remove_would_lose_data;
 
        return 0;
 }
@@ -111,7 +111,7 @@ static int bch2_dev_metadata_drop(struct bch_fs *c, unsigned dev_idx, int flags)
 
        /* don't handle this yet: */
        if (flags & BCH_FORCE_IF_METADATA_LOST)
-               return -EINVAL;
+               return -BCH_ERR_remove_with_metadata_missing_unimplemented;
 
        trans = bch2_trans_get(c);
        bch2_bkey_buf_init(&k);
@@ -132,10 +132,8 @@ retry:
 
                        ret = drop_dev_ptrs(c, bkey_i_to_s(k.k),
                                            dev_idx, flags, true);
-                       if (ret) {
-                               bch_err(c, "Cannot drop device without losing data");
+                       if (ret)
                                break;
-                       }
 
                        ret = bch2_btree_node_update_key(trans, &iter, b, k.k, 0, false);
                        if (bch2_err_matches(ret, BCH_ERR_transaction_restart)) {
index b1ed0b9a20d35d61491ce0cff28b4bb2c7be42c3..08ea0cfc4aef08acfd4d0fe33e0d8227f212cb02 100644 (file)
@@ -314,7 +314,7 @@ int bch2_opt_parse(struct bch_fs *c,
                if (ret < 0 || (*res != 0 && *res != 1)) {
                        if (err)
                                prt_printf(err, "%s: must be bool", opt->attr.name);
-                       return ret;
+                       return ret < 0 ? ret : -BCH_ERR_option_not_bool;
                }
                break;
        case BCH_OPT_UINT:
@@ -456,7 +456,7 @@ int bch2_parse_mount_opts(struct bch_fs *c, struct bch_opts *opts,
 
        copied_opts = kstrdup(options, GFP_KERNEL);
        if (!copied_opts)
-               return -1;
+               return -ENOMEM;
        copied_opts_start = copied_opts;
 
        while ((opt = strsep(&copied_opts, ",")) != NULL) {
@@ -501,11 +501,11 @@ int bch2_parse_mount_opts(struct bch_fs *c, struct bch_opts *opts,
 
 bad_opt:
        pr_err("Bad mount option %s", name);
-       ret = -1;
+       ret = -BCH_ERR_option_name;
        goto out;
 bad_val:
        pr_err("Invalid mount option %s", err.buf);
-       ret = -1;
+       ret = -BCH_ERR_option_value;
        goto out;
 out:
        kfree(copied_opts_start);
index 9a4b7faa376503993f1c2da8f8d1e5963ef6ca5a..136083c11f3a3aecc575501c33c0b3868f38113f 100644 (file)
@@ -290,6 +290,11 @@ enum fsck_err_opts {
          OPT_BOOL(),                                                   \
          BCH2_NO_SB_OPT,               false,                          \
          NULL,         "Allow mounting in when data will be missing")  \
+       x(no_splitbrain_check,          u8,                             \
+         OPT_FS|OPT_MOUNT,                                             \
+         OPT_BOOL(),                                                   \
+         BCH2_NO_SB_OPT,               false,                          \
+         NULL,         "Don't kick drives out when splitbrain detected")\
        x(discard,                      u8,                             \
          OPT_FS|OPT_MOUNT|OPT_DEVICE,                                  \
          OPT_BOOL(),                                                   \
@@ -332,6 +337,11 @@ enum fsck_err_opts {
          OPT_BOOL(),                                                   \
          BCH2_NO_SB_OPT,               false,                          \
          NULL,         "Run fsck on mount")                            \
+       x(fsck_memory_usage_percent,    u8,                             \
+         OPT_FS|OPT_MOUNT,                                             \
+         OPT_UINT(20, 70),                                             \
+         BCH2_NO_SB_OPT,               50,                             \
+         NULL,         "Maximum percentage of system ram fsck is allowed to pin")\
        x(fix_errors,                   u8,                             \
          OPT_FS|OPT_MOUNT,                                             \
          OPT_FN(bch2_opt_fix_errors),                                  \
index 22d1017aa49b975756905a9a69ce8bcd82416ca3..56336f3dd1d0771b94c30aaa78d2c6b0d00617e7 100644 (file)
@@ -412,11 +412,11 @@ void bch2_rebalance_status_to_text(struct printbuf *out, struct bch_fs *c)
                u64 now = atomic64_read(&c->io_clock[WRITE].now);
 
                prt_str(out, "io wait duration:  ");
-               bch2_prt_human_readable_s64(out, r->wait_iotime_end - r->wait_iotime_start);
+               bch2_prt_human_readable_s64(out, (r->wait_iotime_end - r->wait_iotime_start) << 9);
                prt_newline(out);
 
                prt_str(out, "io wait remaining: ");
-               bch2_prt_human_readable_s64(out, r->wait_iotime_end - now);
+               bch2_prt_human_readable_s64(out, (r->wait_iotime_end - now) << 9);
                prt_newline(out);
 
                prt_str(out, "duration waited:   ");
index 21e13bb4335be3b6d48005282000c2f0a7c4e2bd..2af219aedfdbefb36d37fe19423d8e26cabf0cdd 100644 (file)
@@ -52,14 +52,47 @@ static bool btree_id_is_alloc(enum btree_id id)
 }
 
 /* for -o reconstruct_alloc: */
-static void drop_alloc_keys(struct journal_keys *keys)
+static void do_reconstruct_alloc(struct bch_fs *c)
 {
+       bch2_journal_log_msg(c, "dropping alloc info");
+       bch_info(c, "dropping and reconstructing all alloc info");
+
+       mutex_lock(&c->sb_lock);
+       struct bch_sb_field_ext *ext = bch2_sb_field_get(c->disk_sb.sb, ext);
+
+       __set_bit_le64(BCH_RECOVERY_PASS_STABLE_check_allocations, ext->recovery_passes_required);
+       __set_bit_le64(BCH_RECOVERY_PASS_STABLE_check_alloc_info, ext->recovery_passes_required);
+       __set_bit_le64(BCH_RECOVERY_PASS_STABLE_check_lrus, ext->recovery_passes_required);
+       __set_bit_le64(BCH_RECOVERY_PASS_STABLE_check_extents_to_backpointers, ext->recovery_passes_required);
+       __set_bit_le64(BCH_RECOVERY_PASS_STABLE_check_alloc_to_lru_refs, ext->recovery_passes_required);
+
+       __set_bit_le64(BCH_FSCK_ERR_ptr_to_missing_alloc_key, ext->errors_silent);
+       __set_bit_le64(BCH_FSCK_ERR_ptr_gen_newer_than_bucket_gen, ext->errors_silent);
+       __set_bit_le64(BCH_FSCK_ERR_stale_dirty_ptr, ext->errors_silent);
+       __set_bit_le64(BCH_FSCK_ERR_alloc_key_data_type_wrong, ext->errors_silent);
+       __set_bit_le64(BCH_FSCK_ERR_alloc_key_gen_wrong, ext->errors_silent);
+       __set_bit_le64(BCH_FSCK_ERR_alloc_key_dirty_sectors_wrong, ext->errors_silent);
+       __set_bit_le64(BCH_FSCK_ERR_alloc_key_stripe_wrong, ext->errors_silent);
+       __set_bit_le64(BCH_FSCK_ERR_alloc_key_stripe_redundancy_wrong, ext->errors_silent);
+       __set_bit_le64(BCH_FSCK_ERR_need_discard_key_wrong, ext->errors_silent);
+       __set_bit_le64(BCH_FSCK_ERR_freespace_key_wrong, ext->errors_silent);
+       __set_bit_le64(BCH_FSCK_ERR_bucket_gens_key_wrong, ext->errors_silent);
+       __set_bit_le64(BCH_FSCK_ERR_freespace_hole_missing, ext->errors_silent);
+       __set_bit_le64(BCH_FSCK_ERR_ptr_to_missing_backpointer, ext->errors_silent);
+       __set_bit_le64(BCH_FSCK_ERR_lru_entry_bad, ext->errors_silent);
+       c->sb.compat &= ~(1ULL << BCH_COMPAT_alloc_info);
+
+       bch2_write_super(c);
+       mutex_unlock(&c->sb_lock);
+
+       c->recovery_passes_explicit |= bch2_recovery_passes_from_stable(le64_to_cpu(ext->recovery_passes_required[0]));
+
+       struct journal_keys *keys = &c->journal_keys;
        size_t src, dst;
 
        for (src = 0, dst = 0; src < keys->nr; src++)
-               if (!btree_id_is_alloc(keys->d[src].btree_id))
-                       keys->d[dst++] = keys->d[src];
-
+               if (!btree_id_is_alloc(keys->data[src].btree_id))
+                       keys->data[dst++] = keys->data[src];
        keys->nr = dst;
 }
 
@@ -70,9 +103,7 @@ static void drop_alloc_keys(struct journal_keys *keys)
  */
 static void zero_out_btree_mem_ptr(struct journal_keys *keys)
 {
-       struct journal_key *i;
-
-       for (i = keys->d; i < keys->d + keys->nr; i++)
+       darray_for_each(*keys, i)
                if (i->k->k.type == KEY_TYPE_btree_ptr_v2)
                        bkey_i_to_btree_ptr_v2(i->k)->v.mem_ptr = 0;
 }
@@ -124,6 +155,17 @@ static int bch2_journal_replay_key(struct btree_trans *trans,
        if (ret)
                goto out;
 
+       struct btree_path *path = btree_iter_path(trans, &iter);
+       if (unlikely(!btree_path_node(path, k->level))) {
+               bch2_trans_iter_exit(trans, &iter);
+               bch2_trans_node_iter_init(trans, &iter, k->btree_id, k->k->k.p,
+                                         BTREE_MAX_DEPTH, 0, iter_flags);
+               ret =   bch2_btree_iter_traverse(&iter) ?:
+                       bch2_btree_increase_depth(trans, iter.path, 0) ?:
+                       -BCH_ERR_transaction_restart_nested;
+               goto out;
+       }
+
        /* Must be checked with btree locked: */
        if (k->overwritten)
                goto out;
@@ -166,11 +208,9 @@ static int bch2_journal_replay(struct bch_fs *c)
         * efficient - better locality of btree access -  but some might fail if
         * that would cause a journal deadlock.
         */
-       for (size_t i = 0; i < keys->nr; i++) {
+       darray_for_each(*keys, k) {
                cond_resched();
 
-               struct journal_key *k = keys->d + i;
-
                /* Skip fastpath if we're low on space in the journal */
                ret = c->journal.watermark ? -1 :
                        commit_do(trans, NULL, NULL,
@@ -264,7 +304,7 @@ static int journal_replay_entry_early(struct bch_fs *c,
                        bkey_copy(&r->key, (struct bkey_i *) entry->start);
                        r->error = 0;
                } else {
-                       r->error = -EIO;
+                       r->error = -BCH_ERR_btree_node_read_error;
                }
                r->alive = true;
                break;
@@ -359,7 +399,7 @@ static int journal_replay_early(struct bch_fs *c,
                genradix_for_each(&c->journal_entries, iter, _i) {
                        i = *_i;
 
-                       if (!i || i->ignore)
+                       if (journal_replay_ignore(i))
                                continue;
 
                        vstruct_for_each(&i->j, entry) {
@@ -388,11 +428,8 @@ static int read_btree_roots(struct bch_fs *c)
                if (!r->alive)
                        continue;
 
-               if (btree_id_is_alloc(i) &&
-                   c->opts.reconstruct_alloc) {
-                       c->sb.compat &= ~(1ULL << BCH_COMPAT_alloc_info);
+               if (btree_id_is_alloc(i) && c->opts.reconstruct_alloc)
                        continue;
-               }
 
                if (r->error) {
                        __fsck_err(c,
@@ -524,8 +561,7 @@ static int bch2_set_may_go_rw(struct bch_fs *c)
         * setting journal_key->overwritten: it will be accessed by multiple
         * threads
         */
-       move_gap(keys->d, keys->nr, keys->size, keys->gap, keys->nr);
-       keys->gap = keys->nr;
+       move_gap(keys, keys->nr);
 
        set_bit(BCH_FS_may_go_rw, &c->flags);
 
@@ -862,7 +898,7 @@ int bch2_fs_recovery(struct bch_fs *c)
                        goto out;
 
                genradix_for_each_reverse(&c->journal_entries, iter, i)
-                       if (*i && !(*i)->ignore) {
+                       if (!journal_replay_ignore(*i)) {
                                last_journal_entry = &(*i)->j;
                                break;
                        }
@@ -887,7 +923,8 @@ int bch2_fs_recovery(struct bch_fs *c)
                        genradix_for_each_reverse(&c->journal_entries, iter, i)
                                if (*i) {
                                        last_journal_entry = &(*i)->j;
-                                       (*i)->ignore = false;
+                                       (*i)->ignore_blacklisted = false;
+                                       (*i)->ignore_not_dirty= false;
                                        /*
                                         * This was probably a NO_FLUSH entry,
                                         * so last_seq was garbage - but we know
@@ -923,10 +960,8 @@ use_clean:
        c->journal_replay_seq_start     = last_seq;
        c->journal_replay_seq_end       = blacklist_seq - 1;
 
-       if (c->opts.reconstruct_alloc) {
-               c->sb.compat &= ~(1ULL << BCH_COMPAT_alloc_info);
-               drop_alloc_keys(&c->journal_keys);
-       }
+       if (c->opts.reconstruct_alloc)
+               do_reconstruct_alloc(c);
 
        zero_out_btree_mem_ptr(&c->journal_keys);
 
@@ -950,7 +985,7 @@ use_clean:
                        bch2_journal_seq_blacklist_add(c,
                                        blacklist_seq, journal_seq);
                if (ret) {
-                       bch_err(c, "error creating new journal seq blacklist entry");
+                       bch_err_msg(c, ret, "error creating new journal seq blacklist entry");
                        goto err;
                }
        }
@@ -961,9 +996,6 @@ use_clean:
        if (ret)
                goto err;
 
-       if (c->opts.reconstruct_alloc)
-               bch2_journal_log_msg(c, "dropping alloc info");
-
        /*
         * Skip past versions that might have possibly been used (as nonces),
         * but hadn't had their pointers written:
index fa0c8efd2a1b42450535474079b791aa2e6e9938..1361e34d4e64c2939fc0b7af8c9df9e5d9dfc7cf 100644 (file)
@@ -34,6 +34,7 @@
        x(check_snapshot_trees,                 18, PASS_ONLINE|PASS_FSCK)      \
        x(check_snapshots,                      19, PASS_ONLINE|PASS_FSCK)      \
        x(check_subvols,                        20, PASS_ONLINE|PASS_FSCK)      \
+       x(check_subvol_children,                35, PASS_ONLINE|PASS_FSCK)      \
        x(delete_dead_snapshots,                21, PASS_ONLINE|PASS_FSCK)      \
        x(fs_upgrade_for_subvolumes,            22, 0)                          \
        x(resume_logged_ops,                    23, PASS_ALWAYS)                \
@@ -43,6 +44,7 @@
        x(check_dirents,                        27, PASS_FSCK)                  \
        x(check_xattrs,                         28, PASS_FSCK)                  \
        x(check_root,                           29, PASS_ONLINE|PASS_FSCK)      \
+       x(check_subvolume_structure,            36, PASS_ONLINE|PASS_FSCK)      \
        x(check_directory_structure,            30, PASS_ONLINE|PASS_FSCK)      \
        x(check_nlinks,                         31, PASS_FSCK)                  \
        x(delete_dead_inodes,                   32, PASS_FSCK|PASS_UNCLEAN)     \
index b6bf0ebe7e84046a5d08ade7d34bae9ae0bff3a5..5980ba2563fe9fa159ba9d87fe08ab2dc53a78fb 100644 (file)
@@ -171,22 +171,6 @@ fsck_err:
        return ERR_PTR(ret);
 }
 
-static struct jset_entry *jset_entry_init(struct jset_entry **end, size_t size)
-{
-       struct jset_entry *entry = *end;
-       unsigned u64s = DIV_ROUND_UP(size, sizeof(u64));
-
-       memset(entry, 0, u64s * sizeof(u64));
-       /*
-        * The u64s field counts from the start of data, ignoring the shared
-        * fields.
-        */
-       entry->u64s = cpu_to_le16(u64s - 1);
-
-       *end = vstruct_next(*end);
-       return entry;
-}
-
 void bch2_journal_super_entries_add_common(struct bch_fs *c,
                                           struct jset_entry **end,
                                           u64 journal_seq)
index 441dcb1bf160e917d531d1a5ea955cf0238f0844..e4396cb0bacb037bac965e1beccd261d4a960789 100644 (file)
          BIT_ULL(BCH_RECOVERY_PASS_check_inodes),              \
          BCH_FSCK_ERR_unlinked_inode_not_on_deleted_list)      \
        x(rebalance_work,                                       \
-         BIT_ULL(BCH_RECOVERY_PASS_set_fs_needs_rebalance))
+         BIT_ULL(BCH_RECOVERY_PASS_set_fs_needs_rebalance))    \
+       x(subvolume_fs_parent,                                  \
+         BIT_ULL(BCH_RECOVERY_PASS_check_dirents),             \
+         BCH_FSCK_ERR_subvol_fs_path_parent_wrong)             \
+       x(btree_subvolume_children,                             \
+         BIT_ULL(BCH_RECOVERY_PASS_check_subvols),             \
+         BCH_FSCK_ERR_subvol_children_not_set)
 
 #define DOWNGRADE_TABLE()
 
@@ -253,7 +259,7 @@ void bch2_sb_set_downgrade(struct bch_fs *c, unsigned new_minor, unsigned old_mi
                                if (e < BCH_SB_ERR_MAX)
                                        __set_bit(e, c->sb.errors_silent);
                                if (e < sizeof(ext->errors_silent) * 8)
-                                       ext->errors_silent[e / 64] |= cpu_to_le64(BIT_ULL(e % 64));
+                                       __set_bit_le64(e, ext->errors_silent);
                        }
                }
        }
index c08aacdfd073c203e44a072363c94e89dd93eec8..5178bf579f7c538b6f1132fc687aa281177c7db9 100644 (file)
        x(dirent_name_dot_or_dotdot,                            223)    \
        x(dirent_name_has_slash,                                224)    \
        x(dirent_d_type_wrong,                                  225)    \
-       x(dirent_d_parent_subvol_wrong,                         226)    \
+       x(inode_bi_parent_wrong,                                226)    \
        x(dirent_in_missing_dir_inode,                          227)    \
        x(dirent_in_non_dir_inode,                              228)    \
        x(dirent_to_missing_inode,                              229)    \
        x(hash_table_key_duplicate,                             242)    \
        x(hash_table_key_wrong_offset,                          243)    \
        x(unlinked_inode_not_on_deleted_list,                   244)    \
-       x(reflink_p_front_pad_bad,                              245)
+       x(reflink_p_front_pad_bad,                              245)    \
+       x(journal_entry_dup_same_device,                        246)    \
+       x(inode_bi_subvol_missing,                              247)    \
+       x(inode_bi_subvol_wrong,                                248)    \
+       x(inode_points_to_missing_dirent,                       249)    \
+       x(inode_points_to_wrong_dirent,                         250)    \
+       x(inode_bi_parent_nonzero,                              251)    \
+       x(dirent_to_missing_parent_subvol,                      252)    \
+       x(dirent_not_visible_in_parent_subvol,                  253)    \
+       x(subvol_fs_path_parent_wrong,                          254)    \
+       x(subvol_root_fs_path_parent_nonzero,                   255)    \
+       x(subvol_children_not_set,                              256)    \
+       x(subvol_children_bad,                                  257)    \
+       x(subvol_loop,                                          258)    \
+       x(subvol_unreachable,                                   259)    \
+       x(btree_node_bkey_bad_u64s,                             260)
 
 enum bch_sb_error_id {
 #define x(t, n) BCH_FSCK_ERR_##t = n,
index fcaa5a888744881a4f6c37dd77fbd8cf73b2f4d0..3976f80721bf1b40736a3d882c9f841fa7ab961b 100644 (file)
@@ -259,7 +259,7 @@ int bch2_hash_needs_whiteout(struct btree_trans *trans,
 }
 
 static __always_inline
-int bch2_hash_set_snapshot(struct btree_trans *trans,
+int bch2_hash_set_in_snapshot(struct btree_trans *trans,
                           const struct bch_hash_desc desc,
                           const struct bch_hash_info *info,
                           subvol_inum inum, u32 snapshot,
@@ -328,17 +328,12 @@ int bch2_hash_set(struct btree_trans *trans,
                  struct bkey_i *insert,
                  bch_str_hash_flags_t str_hash_flags)
 {
-       u32 snapshot;
-       int ret;
-
-       ret = bch2_subvolume_get_snapshot(trans, inum.subvol, &snapshot);
-       if (ret)
-               return ret;
-
        insert->k.p.inode = inum.inum;
 
-       return bch2_hash_set_snapshot(trans, desc, info, inum,
-                                     snapshot, insert, str_hash_flags, 0);
+       u32 snapshot;
+       return  bch2_subvolume_get_snapshot(trans, inum.subvol, &snapshot) ?:
+               bch2_hash_set_in_snapshot(trans, desc, info, inum,
+                                         snapshot, insert, str_hash_flags, 0);
 }
 
 static __always_inline
index 7c67c28d3ef88ff32d1805257faf37ebc79f0d2d..ce7aed12194238071f8fbf37aa111160ced286c9 100644 (file)
 
 static int bch2_subvolume_delete(struct btree_trans *, u32);
 
+static struct bpos subvolume_children_pos(struct bkey_s_c k)
+{
+       if (k.k->type != KEY_TYPE_subvolume)
+               return POS_MIN;
+
+       struct bkey_s_c_subvolume s = bkey_s_c_to_subvolume(k);
+       if (!s.v->fs_path_parent)
+               return POS_MIN;
+       return POS(le32_to_cpu(s.v->fs_path_parent), s.k->p.offset);
+}
+
 static int check_subvol(struct btree_trans *trans,
                        struct btree_iter *iter,
                        struct bkey_s_c k)
 {
        struct bch_fs *c = trans->c;
        struct bkey_s_c_subvolume subvol;
+       struct btree_iter subvol_children_iter = {};
        struct bch_snapshot snapshot;
+       struct printbuf buf = PRINTBUF;
        unsigned snapid;
        int ret = 0;
 
@@ -42,6 +55,72 @@ static int check_subvol(struct btree_trans *trans,
                return ret ?: -BCH_ERR_transaction_restart_nested;
        }
 
+       if (fsck_err_on(subvol.k->p.offset == BCACHEFS_ROOT_SUBVOL &&
+                       subvol.v->fs_path_parent,
+                       c, subvol_root_fs_path_parent_nonzero,
+                       "root subvolume has nonzero fs_path_parent\n%s",
+                       (bch2_bkey_val_to_text(&buf, c, k), buf.buf))) {
+               struct bkey_i_subvolume *n =
+                       bch2_bkey_make_mut_typed(trans, iter, &subvol.s_c, 0, subvolume);
+               ret = PTR_ERR_OR_ZERO(n);
+               if (ret)
+                       goto err;
+
+               n->v.fs_path_parent = 0;
+       }
+
+       if (subvol.v->fs_path_parent) {
+               struct bpos pos = subvolume_children_pos(k);
+
+               struct bkey_s_c subvol_children_k =
+                       bch2_bkey_get_iter(trans, &subvol_children_iter,
+                                          BTREE_ID_subvolume_children, pos, 0);
+               ret = bkey_err(subvol_children_k);
+               if (ret)
+                       goto err;
+
+               if (fsck_err_on(subvol_children_k.k->type != KEY_TYPE_set,
+                               c, subvol_children_not_set,
+                               "subvolume not set in subvolume_children btree at %llu:%llu\n%s",
+                               pos.inode, pos.offset,
+                               (printbuf_reset(&buf),
+                                bch2_bkey_val_to_text(&buf, c, k), buf.buf))) {
+                       ret = bch2_btree_bit_mod(trans, BTREE_ID_subvolume_children, pos, true);
+                       if (ret)
+                               goto err;
+               }
+       }
+
+       struct bch_inode_unpacked inode;
+       struct btree_iter inode_iter = {};
+       ret = bch2_inode_peek_nowarn(trans, &inode_iter, &inode,
+                                   (subvol_inum) { k.k->p.offset, le64_to_cpu(subvol.v->inode) },
+                                   0);
+       bch2_trans_iter_exit(trans, &inode_iter);
+
+       if (ret && !bch2_err_matches(ret, ENOENT))
+               return ret;
+
+       if (fsck_err_on(ret, c, subvol_to_missing_root,
+                       "subvolume %llu points to missing subvolume root %llu:%u",
+                       k.k->p.offset, le64_to_cpu(subvol.v->inode),
+                       le32_to_cpu(subvol.v->snapshot))) {
+               ret = bch2_subvolume_delete(trans, iter->pos.offset);
+               bch_err_msg(c, ret, "deleting subvolume %llu", iter->pos.offset);
+               return ret ?: -BCH_ERR_transaction_restart_nested;
+       }
+
+       if (fsck_err_on(inode.bi_subvol != subvol.k->p.offset,
+                       c, subvol_root_wrong_bi_subvol,
+                       "subvol root %llu:%u has wrong bi_subvol field: got %u, should be %llu",
+                       inode.bi_inum, inode_iter.k.p.snapshot,
+                       inode.bi_subvol, subvol.k->p.offset)) {
+               inode.bi_subvol = subvol.k->p.offset;
+               ret = __bch2_fsck_write_inode(trans, &inode, le32_to_cpu(subvol.v->snapshot));
+               if (ret)
+                       goto err;
+       }
+
        if (!BCH_SUBVOLUME_SNAP(subvol.v)) {
                u32 snapshot_root = bch2_snapshot_root(c, le32_to_cpu(subvol.v->snapshot));
                u32 snapshot_tree;
@@ -72,8 +151,10 @@ static int check_subvol(struct btree_trans *trans,
                        SET_BCH_SUBVOLUME_SNAP(&s->v, true);
                }
        }
-
+err:
 fsck_err:
+       bch2_trans_iter_exit(trans, &subvol_children_iter);
+       printbuf_exit(&buf);
        return ret;
 }
 
@@ -88,6 +169,42 @@ int bch2_check_subvols(struct bch_fs *c)
        return ret;
 }
 
+static int check_subvol_child(struct btree_trans *trans,
+                             struct btree_iter *child_iter,
+                             struct bkey_s_c child_k)
+{
+       struct bch_fs *c = trans->c;
+       struct bch_subvolume s;
+       int ret = bch2_bkey_get_val_typed(trans, BTREE_ID_subvolumes, POS(0, child_k.k->p.offset),
+                                         0, subvolume, &s);
+       if (ret && !bch2_err_matches(ret, ENOENT))
+               return ret;
+
+       if (fsck_err_on(ret ||
+                       le32_to_cpu(s.fs_path_parent) != child_k.k->p.inode,
+                       c, subvol_children_bad,
+                       "incorrect entry in subvolume_children btree %llu:%llu",
+                       child_k.k->p.inode, child_k.k->p.offset)) {
+               ret = bch2_btree_delete_at(trans, child_iter, 0);
+               if (ret)
+                       goto err;
+       }
+err:
+fsck_err:
+       return ret;
+}
+
+int bch2_check_subvol_children(struct bch_fs *c)
+{
+       int ret = bch2_trans_run(c,
+               for_each_btree_key_commit(trans, iter,
+                               BTREE_ID_subvolume_children, POS_MIN, BTREE_ITER_PREFETCH, k,
+                               NULL, NULL, BCH_TRANS_COMMIT_no_enospc,
+                       check_subvol_child(trans, &iter, k)));
+       bch_err_fn(c, ret);
+       return 0;
+}
+
 /* Subvolumes: */
 
 int bch2_subvolume_invalid(struct bch_fs *c, struct bkey_s_c k,
@@ -112,8 +229,50 @@ void bch2_subvolume_to_text(struct printbuf *out, struct bch_fs *c,
                   le64_to_cpu(s.v->inode),
                   le32_to_cpu(s.v->snapshot));
 
-       if (bkey_val_bytes(s.k) > offsetof(struct bch_subvolume, parent))
-               prt_printf(out, " parent %u", le32_to_cpu(s.v->parent));
+       if (bkey_val_bytes(s.k) > offsetof(struct bch_subvolume, creation_parent)) {
+               prt_printf(out, " creation_parent %u", le32_to_cpu(s.v->creation_parent));
+               prt_printf(out, " fs_parent %u", le32_to_cpu(s.v->fs_path_parent));
+       }
+}
+
+static int subvolume_children_mod(struct btree_trans *trans, struct bpos pos, bool set)
+{
+       return !bpos_eq(pos, POS_MIN)
+               ? bch2_btree_bit_mod(trans, BTREE_ID_subvolume_children, pos, set)
+               : 0;
+}
+
+int bch2_subvolume_trigger(struct btree_trans *trans,
+                          enum btree_id btree_id, unsigned level,
+                          struct bkey_s_c old, struct bkey_s new,
+                          unsigned flags)
+{
+       if (flags & BTREE_TRIGGER_TRANSACTIONAL) {
+               struct bpos children_pos_old = subvolume_children_pos(old);
+               struct bpos children_pos_new = subvolume_children_pos(new.s_c);
+
+               if (!bpos_eq(children_pos_old, children_pos_new)) {
+                       int ret = subvolume_children_mod(trans, children_pos_old, false) ?:
+                                 subvolume_children_mod(trans, children_pos_new, true);
+                       if (ret)
+                               return ret;
+               }
+       }
+
+       return 0;
+}
+
+int bch2_subvol_has_children(struct btree_trans *trans, u32 subvol)
+{
+       struct btree_iter iter;
+
+       bch2_trans_iter_init(trans, &iter, BTREE_ID_subvolume_children, POS(subvol, 0), 0);
+       struct bkey_s_c k = bch2_btree_iter_peek(&iter);
+       bch2_trans_iter_exit(trans, &iter);
+
+       return bkey_err(k) ?: k.k && k.k->p.inode == subvol
+               ? -BCH_ERR_ENOTEMPTY_subvol_not_empty
+               : 0;
 }
 
 static __always_inline int
@@ -197,8 +356,8 @@ static int bch2_subvolume_reparent(struct btree_trans *trans,
        if (k.k->type != KEY_TYPE_subvolume)
                return 0;
 
-       if (bkey_val_bytes(k.k) > offsetof(struct bch_subvolume, parent) &&
-           le32_to_cpu(bkey_s_c_to_subvolume(k).v->parent) != old_parent)
+       if (bkey_val_bytes(k.k) > offsetof(struct bch_subvolume, creation_parent) &&
+           le32_to_cpu(bkey_s_c_to_subvolume(k).v->creation_parent) != old_parent)
                return 0;
 
        s = bch2_bkey_make_mut_typed(trans, iter, &k, 0, subvolume);
@@ -206,7 +365,7 @@ static int bch2_subvolume_reparent(struct btree_trans *trans,
        if (ret)
                return ret;
 
-       s->v.parent = cpu_to_le32(new_parent);
+       s->v.creation_parent = cpu_to_le32(new_parent);
        return 0;
 }
 
@@ -229,7 +388,7 @@ static int bch2_subvolumes_reparent(struct btree_trans *trans, u32 subvolid_to_d
                                BTREE_ID_subvolumes, POS_MIN, BTREE_ITER_PREFETCH, k,
                                NULL, NULL, BCH_TRANS_COMMIT_no_enospc,
                        bch2_subvolume_reparent(trans, &iter, k,
-                                       subvolid_to_delete, le32_to_cpu(s.parent)));
+                                       subvolid_to_delete, le32_to_cpu(s.creation_parent)));
 }
 
 /*
@@ -360,6 +519,7 @@ int bch2_subvolume_unlink(struct btree_trans *trans, u32 subvolid)
 }
 
 int bch2_subvolume_create(struct btree_trans *trans, u64 inode,
+                         u32 parent_subvolid,
                          u32 src_subvolid,
                          u32 *new_subvolid,
                          u32 *new_snapshotid,
@@ -416,12 +576,13 @@ int bch2_subvolume_create(struct btree_trans *trans, u64 inode,
        if (ret)
                goto err;
 
-       new_subvol->v.flags     = 0;
-       new_subvol->v.snapshot  = cpu_to_le32(new_nodes[0]);
-       new_subvol->v.inode     = cpu_to_le64(inode);
-       new_subvol->v.parent    = cpu_to_le32(src_subvolid);
-       new_subvol->v.otime.lo  = cpu_to_le64(bch2_current_time(c));
-       new_subvol->v.otime.hi  = 0;
+       new_subvol->v.flags             = 0;
+       new_subvol->v.snapshot          = cpu_to_le32(new_nodes[0]);
+       new_subvol->v.inode             = cpu_to_le64(inode);
+       new_subvol->v.creation_parent   = cpu_to_le32(src_subvolid);
+       new_subvol->v.fs_path_parent    = cpu_to_le32(parent_subvolid);
+       new_subvol->v.otime.lo          = cpu_to_le64(bch2_current_time(c));
+       new_subvol->v.otime.hi          = 0;
 
        SET_BCH_SUBVOLUME_RO(&new_subvol->v, ro);
        SET_BCH_SUBVOLUME_SNAP(&new_subvol->v, src_subvolid != 0);
index a6f56f66e27cb7699402f089ef89a2f1355077c4..903c05162c0688ae902321aace955ca27fa5e2f9 100644 (file)
@@ -8,17 +8,22 @@
 enum bkey_invalid_flags;
 
 int bch2_check_subvols(struct bch_fs *);
+int bch2_check_subvol_children(struct bch_fs *);
 
 int bch2_subvolume_invalid(struct bch_fs *, struct bkey_s_c,
                           enum bkey_invalid_flags, struct printbuf *);
 void bch2_subvolume_to_text(struct printbuf *, struct bch_fs *, struct bkey_s_c);
+int bch2_subvolume_trigger(struct btree_trans *, enum btree_id, unsigned,
+                          struct bkey_s_c, struct bkey_s, unsigned);
 
 #define bch2_bkey_ops_subvolume ((struct bkey_ops) {           \
        .key_invalid    = bch2_subvolume_invalid,               \
        .val_to_text    = bch2_subvolume_to_text,               \
+       .trigger        = bch2_subvolume_trigger,               \
        .min_val_size   = 16,                                   \
 })
 
+int bch2_subvol_has_children(struct btree_trans *, u32);
 int bch2_subvolume_get(struct btree_trans *, unsigned,
                       bool, int, struct bch_subvolume *);
 int bch2_subvolume_get_snapshot(struct btree_trans *, u32, u32 *);
@@ -30,8 +35,7 @@ int bch2_delete_dead_snapshots(struct bch_fs *);
 void bch2_delete_dead_snapshots_async(struct bch_fs *);
 
 int bch2_subvolume_unlink(struct btree_trans *, u32);
-int bch2_subvolume_create(struct btree_trans *, u64, u32,
-                         u32 *, u32 *, bool);
+int bch2_subvolume_create(struct btree_trans *, u64, u32, u32, u32 *, u32 *, bool);
 
 int bch2_fs_subvolumes_init(struct bch_fs *);
 
index af79134b07d6ad304e7af22b838d9709e777a41b..e029df7ba89f5244b65c99d021252d753207d3bd 100644 (file)
@@ -19,8 +19,8 @@ struct bch_subvolume {
         * This is _not_ necessarily the subvolume of the directory containing
         * this subvolume:
         */
-       __le32                  parent;
-       __le32                  pad;
+       __le32                  creation_parent;
+       __le32                  fs_path_parent;
        bch_le128               otime;
 };
 
index bd64eb68e84af4c6b7afed028a6bd7d9ae2cc5d6..bceac29f3d86272d884c8fa59ad8e7f7c8163318 100644 (file)
@@ -470,6 +470,14 @@ static int bch2_sb_validate(struct bch_sb_handle *disk_sb, struct printbuf *out,
                        return ret;
        }
 
+       if (rw == WRITE &&
+           bch2_sb_member_get(sb, sb->dev_idx).seq != sb->seq) {
+               prt_printf(out, "Invalid superblock: member seq %llu != sb seq %llu",
+                          le64_to_cpu(bch2_sb_member_get(sb, sb->dev_idx).seq),
+                          le64_to_cpu(sb->seq));
+               return -BCH_ERR_invalid_sb_members_missing;
+       }
+
        return 0;
 }
 
@@ -717,6 +725,7 @@ retry:
 
        if (IS_ERR(sb->s_bdev_file)) {
                ret = PTR_ERR(sb->s_bdev_file);
+               prt_printf(&err, "error opening %s: %s", path, bch2_err_str(ret));
                goto err;
        }
        sb->bdev = file_bdev(sb->s_bdev_file);
@@ -743,9 +752,9 @@ retry:
        prt_printf(&err2, "bcachefs (%s): error reading default superblock: %s\n",
               path, err.buf);
        if (ret == -BCH_ERR_invalid_sb_magic && ignore_notbchfs_msg)
-               printk(KERN_INFO "%s", err2.buf);
+               bch2_print_opts(opts, KERN_INFO "%s", err2.buf);
        else
-               printk(KERN_ERR "%s", err2.buf);
+               bch2_print_opts(opts, KERN_ERR "%s", err2.buf);
 
        printbuf_exit(&err2);
        printbuf_reset(&err);
@@ -803,21 +812,20 @@ got_super:
                goto err;
        }
 
-       ret = 0;
        sb->have_layout = true;
 
        ret = bch2_sb_validate(sb, &err, READ);
        if (ret) {
-               printk(KERN_ERR "bcachefs (%s): error validating superblock: %s\n",
-                      path, err.buf);
+               bch2_print_opts(opts, KERN_ERR "bcachefs (%s): error validating superblock: %s\n",
+                               path, err.buf);
                goto err_no_print;
        }
 out:
        printbuf_exit(&err);
        return ret;
 err:
-       printk(KERN_ERR "bcachefs (%s): error reading superblock: %s\n",
-              path, err.buf);
+       bch2_print_opts(opts, KERN_ERR "bcachefs (%s): error reading superblock: %s\n",
+                       path, err.buf);
 err_no_print:
        bch2_free_super(sb);
        goto out;
index 6b23e11825e6d47ef46c7f294add46fa455e6a8f..233f864ed8b07ff321b64993a821d08137b70ecd 100644 (file)
@@ -56,6 +56,7 @@
 #include "super.h"
 #include "super-io.h"
 #include "sysfs.h"
+#include "thread_with_file.h"
 #include "trace.h"
 
 #include <linux/backing-dev.h>
@@ -86,6 +87,23 @@ const char * const bch2_fs_flag_strs[] = {
        NULL
 };
 
+void bch2_print_opts(struct bch_opts *opts, const char *fmt, ...)
+{
+       struct stdio_redirect *stdio = (void *)(unsigned long)opts->stdio;
+
+       va_list args;
+       va_start(args, fmt);
+       if (likely(!stdio)) {
+               vprintk(fmt, args);
+       } else {
+               if (fmt[0] == KERN_SOH[0])
+                       fmt += 2;
+
+               bch2_stdio_redirect_vprintf(stdio, true, fmt, args);
+       }
+       va_end(args);
+}
+
 void __bch2_print(struct bch_fs *c, const char *fmt, ...)
 {
        struct stdio_redirect *stdio = bch2_fs_stdio_redirect(c);
@@ -95,16 +113,10 @@ void __bch2_print(struct bch_fs *c, const char *fmt, ...)
        if (likely(!stdio)) {
                vprintk(fmt, args);
        } else {
-               unsigned long flags;
-
                if (fmt[0] == KERN_SOH[0])
                        fmt += 2;
 
-               spin_lock_irqsave(&stdio->output_lock, flags);
-               prt_vprintf(&stdio->output_buf, fmt, args);
-               spin_unlock_irqrestore(&stdio->output_lock, flags);
-
-               wake_up(&stdio->output_wait);
+               bch2_stdio_redirect_vprintf(stdio, true, fmt, args);
        }
        va_end(args);
 }
@@ -576,7 +588,7 @@ static void __bch2_fs_free(struct bch_fs *c)
                destroy_workqueue(c->btree_update_wq);
 
        bch2_free_super(&c->disk_sb);
-       kvpfree(c, sizeof(*c));
+       kvfree(c);
        module_put(THIS_MODULE);
 }
 
@@ -715,7 +727,7 @@ static struct bch_fs *bch2_fs_alloc(struct bch_sb *sb, struct bch_opts opts)
        unsigned i, iter_size;
        int ret = 0;
 
-       c = kvpmalloc(sizeof(struct bch_fs), GFP_KERNEL|__GFP_ZERO);
+       c = kvmalloc(sizeof(struct bch_fs), GFP_KERNEL|__GFP_ZERO);
        if (!c) {
                c = ERR_PTR(-BCH_ERR_ENOMEM_fs_alloc);
                goto out;
@@ -818,13 +830,13 @@ static struct bch_fs *bch2_fs_alloc(struct bch_sb *sb, struct bch_opts opts)
                goto err;
 
        pr_uuid(&name, c->sb.user_uuid.b);
-       strscpy(c->name, name.buf, sizeof(c->name));
-       printbuf_exit(&name);
-
        ret = name.allocation_failure ? -BCH_ERR_ENOMEM_fs_name_alloc : 0;
        if (ret)
                goto err;
 
+       strscpy(c->name, name.buf, sizeof(c->name));
+       printbuf_exit(&name);
+
        /* Compat: */
        if (le16_to_cpu(sb->version) <= bcachefs_metadata_version_inode_v2 &&
            !BCH_SB_JOURNAL_FLUSH_DELAY(sb))
@@ -862,13 +874,13 @@ static struct bch_fs *bch2_fs_alloc(struct bch_sb *sb, struct bch_opts opts)
        c->inode_shard_bits = ilog2(roundup_pow_of_two(num_possible_cpus()));
 
        if (!(c->btree_update_wq = alloc_workqueue("bcachefs",
-                               WQ_FREEZABLE|WQ_UNBOUND|WQ_MEM_RECLAIM, 512)) ||
+                               WQ_HIGHPRI|WQ_FREEZABLE|WQ_MEM_RECLAIM|WQ_UNBOUND, 512)) ||
            !(c->btree_io_complete_wq = alloc_workqueue("bcachefs_btree_io",
-                               WQ_FREEZABLE|WQ_MEM_RECLAIM, 1)) ||
+                               WQ_HIGHPRI|WQ_FREEZABLE|WQ_MEM_RECLAIM, 1)) ||
            !(c->copygc_wq = alloc_workqueue("bcachefs_copygc",
-                               WQ_FREEZABLE|WQ_MEM_RECLAIM|WQ_CPU_INTENSIVE, 1)) ||
+                               WQ_HIGHPRI|WQ_FREEZABLE|WQ_MEM_RECLAIM|WQ_CPU_INTENSIVE, 1)) ||
            !(c->io_complete_wq = alloc_workqueue("bcachefs_io",
-                               WQ_FREEZABLE|WQ_HIGHPRI|WQ_MEM_RECLAIM, 512)) ||
+                               WQ_HIGHPRI|WQ_FREEZABLE|WQ_MEM_RECLAIM, 512)) ||
            !(c->write_ref_wq = alloc_workqueue("bcachefs_write_ref",
                                WQ_FREEZABLE, 0)) ||
 #ifndef BCH_WRITE_REF_DEBUG
@@ -882,8 +894,8 @@ static struct bch_fs *bch2_fs_alloc(struct bch_sb *sb, struct bch_opts opts)
                        BIOSET_NEED_BVECS) ||
            !(c->pcpu = alloc_percpu(struct bch_fs_pcpu)) ||
            !(c->online_reserved = alloc_percpu(u64)) ||
-           mempool_init_kvpmalloc_pool(&c->btree_bounce_pool, 1,
-                                       c->opts.btree_node_size) ||
+           mempool_init_kvmalloc_pool(&c->btree_bounce_pool, 1,
+                                      c->opts.btree_node_size) ||
            mempool_init_kmalloc_pool(&c->large_bkey_pool, 1, 2048) ||
            !(c->unused_inode_hints = kcalloc(1U << c->inode_shard_bits,
                                              sizeof(u64), GFP_KERNEL))) {
@@ -1061,7 +1073,8 @@ static int bch2_dev_may_add(struct bch_sb *sb, struct bch_fs *c)
 }
 
 static int bch2_dev_in_fs(struct bch_sb_handle *fs,
-                         struct bch_sb_handle *sb)
+                         struct bch_sb_handle *sb,
+                         struct bch_opts *opts)
 {
        if (fs == sb)
                return 0;
@@ -1102,11 +1115,14 @@ static int bch2_dev_in_fs(struct bch_sb_handle *fs,
                bch2_prt_datetime(&buf, le64_to_cpu(sb->sb->write_time));;
                prt_newline(&buf);
 
-               prt_printf(&buf, "Not using older sb");
+               if (!opts->no_splitbrain_check)
+                       prt_printf(&buf, "Not using older sb");
 
                pr_err("%s", buf.buf);
                printbuf_exit(&buf);
-               return -BCH_ERR_device_splitbrain;
+
+               if (!opts->no_splitbrain_check)
+                       return -BCH_ERR_device_splitbrain;
        }
 
        struct bch_member m = bch2_sb_member_get(fs->sb, sb->sb->dev_idx);
@@ -1124,17 +1140,22 @@ static int bch2_dev_in_fs(struct bch_sb_handle *fs,
                prt_newline(&buf);
 
                prt_bdevname(&buf, fs->bdev);
-               prt_str(&buf, "believes seq of ");
+               prt_str(&buf, " believes seq of ");
                prt_bdevname(&buf, sb->bdev);
                prt_printf(&buf, " to be %llu, but ", seq_from_fs);
                prt_bdevname(&buf, sb->bdev);
                prt_printf(&buf, " has %llu\n", seq_from_member);
-               prt_str(&buf, "Not using ");
-               prt_bdevname(&buf, sb->bdev);
+
+               if (!opts->no_splitbrain_check) {
+                       prt_str(&buf, "Not using ");
+                       prt_bdevname(&buf, sb->bdev);
+               }
 
                pr_err("%s", buf.buf);
                printbuf_exit(&buf);
-               return -BCH_ERR_device_splitbrain;
+
+               if (!opts->no_splitbrain_check)
+                       return -BCH_ERR_device_splitbrain;
        }
 
        return 0;
@@ -1168,8 +1189,8 @@ static void bch2_dev_free(struct bch_dev *ca)
        bch2_dev_buckets_free(ca);
        free_page((unsigned long) ca->sb_read_scratch);
 
-       bch2_time_stats_exit(&ca->io_latency[WRITE]);
-       bch2_time_stats_exit(&ca->io_latency[READ]);
+       bch2_time_stats_quantiles_exit(&ca->io_latency[WRITE]);
+       bch2_time_stats_quantiles_exit(&ca->io_latency[READ]);
 
        percpu_ref_exit(&ca->io_ref);
        percpu_ref_exit(&ca->ref);
@@ -1260,8 +1281,8 @@ static struct bch_dev *__bch2_dev_alloc(struct bch_fs *c,
 
        INIT_WORK(&ca->io_error_work, bch2_io_error_work);
 
-       bch2_time_stats_init(&ca->io_latency[READ]);
-       bch2_time_stats_init(&ca->io_latency[WRITE]);
+       bch2_time_stats_quantiles_init(&ca->io_latency[READ]);
+       bch2_time_stats_quantiles_init(&ca->io_latency[WRITE]);
 
        ca->mi = bch2_mi_to_cpu(member);
 
@@ -1597,27 +1618,27 @@ int bch2_dev_remove(struct bch_fs *c, struct bch_dev *ca, int flags)
        __bch2_dev_read_only(c, ca);
 
        ret = bch2_dev_data_drop(c, ca->dev_idx, flags);
-       bch_err_msg(ca, ret, "dropping data");
+       bch_err_msg(ca, ret, "bch2_dev_data_drop()");
        if (ret)
                goto err;
 
        ret = bch2_dev_remove_alloc(c, ca);
-       bch_err_msg(ca, ret, "deleting alloc info");
+       bch_err_msg(ca, ret, "bch2_dev_remove_alloc()");
        if (ret)
                goto err;
 
        ret = bch2_journal_flush_device_pins(&c->journal, ca->dev_idx);
-       bch_err_msg(ca, ret, "flushing journal");
+       bch_err_msg(ca, ret, "bch2_journal_flush_device_pins()");
        if (ret)
                goto err;
 
        ret = bch2_journal_flush(&c->journal);
-       bch_err(ca, "journal error");
+       bch_err_msg(ca, ret, "bch2_journal_flush()");
        if (ret)
                goto err;
 
        ret = bch2_replicas_gc2(c);
-       bch_err_msg(ca, ret, "in replicas_gc2()");
+       bch_err_msg(ca, ret, "bch2_replicas_gc2()");
        if (ret)
                goto err;
 
@@ -1835,7 +1856,7 @@ int bch2_dev_online(struct bch_fs *c, const char *path)
 
        dev_idx = sb.sb->dev_idx;
 
-       ret = bch2_dev_in_fs(&c->disk_sb, &sb);
+       ret = bch2_dev_in_fs(&c->disk_sb, &sb, &c->opts);
        bch_err_msg(c, ret, "bringing %s online", path);
        if (ret)
                goto err;
@@ -2023,7 +2044,7 @@ struct bch_fs *bch2_fs_open(char * const *devices, unsigned nr_devices,
                        best = sb;
 
        darray_for_each_reverse(sbs, sb) {
-               ret = bch2_dev_in_fs(best, sb);
+               ret = bch2_dev_in_fs(best, sb, &opts);
 
                if (ret == -BCH_ERR_device_has_been_removed ||
                    ret == -BCH_ERR_device_splitbrain) {
index cee80c47feea2b27fa7d18fc55a39228db7f0b96..c86a93a8d8fc81bbe373efcbec74f3e2563e6da5 100644 (file)
@@ -930,10 +930,10 @@ SHOW(bch2_dev)
        sysfs_print(io_latency_write,           atomic64_read(&ca->cur_latency[WRITE]));
 
        if (attr == &sysfs_io_latency_stats_read)
-               bch2_time_stats_to_text(out, &ca->io_latency[READ]);
+               bch2_time_stats_to_text(out, &ca->io_latency[READ].stats);
 
        if (attr == &sysfs_io_latency_stats_write)
-               bch2_time_stats_to_text(out, &ca->io_latency[WRITE]);
+               bch2_time_stats_to_text(out, &ca->io_latency[WRITE].stats);
 
        sysfs_printf(congested,                 "%u%%",
                     clamp(atomic_read(&ca->congested), 0, CONGESTED_MAX)
index 9220d7de10db67f6cd4a36040af7fe557756230b..940db15d6a939bf93281627e9759904e4a6531f3 100644 (file)
@@ -2,7 +2,6 @@
 #ifndef NO_BCACHEFS_FS
 
 #include "bcachefs.h"
-#include "printbuf.h"
 #include "thread_with_file.h"
 
 #include <linux/anon_inodes.h>
@@ -10,6 +9,7 @@
 #include <linux/kthread.h>
 #include <linux/pagemap.h>
 #include <linux/poll.h>
+#include <linux/sched/sysctl.h>
 
 void bch2_thread_with_file_exit(struct thread_with_file *thr)
 {
@@ -65,68 +65,82 @@ err:
        return ret;
 }
 
-static inline bool thread_with_stdio_has_output(struct thread_with_stdio *thr)
+/* stdio_redirect */
+
+static bool stdio_redirect_has_input(struct stdio_redirect *stdio)
 {
-       return thr->stdio.output_buf.pos ||
-               thr->output2.nr ||
-               thr->thr.done;
+       return stdio->input.buf.nr || stdio->done;
 }
 
-static ssize_t thread_with_stdio_read(struct file *file, char __user *buf,
-                                     size_t len, loff_t *ppos)
+static bool stdio_redirect_has_output(struct stdio_redirect *stdio)
 {
-       struct thread_with_stdio *thr =
-               container_of(file->private_data, struct thread_with_stdio, thr);
-       size_t copied = 0, b;
-       int ret = 0;
+       return stdio->output.buf.nr || stdio->done;
+}
 
-       if ((file->f_flags & O_NONBLOCK) &&
-           !thread_with_stdio_has_output(thr))
-               return -EAGAIN;
+#define STDIO_REDIRECT_BUFSIZE         4096
 
-       ret = wait_event_interruptible(thr->stdio.output_wait,
-               thread_with_stdio_has_output(thr));
-       if (ret)
-               return ret;
+static bool stdio_redirect_has_input_space(struct stdio_redirect *stdio)
+{
+       return stdio->input.buf.nr < STDIO_REDIRECT_BUFSIZE || stdio->done;
+}
 
-       if (thr->thr.done)
-               return 0;
+static bool stdio_redirect_has_output_space(struct stdio_redirect *stdio)
+{
+       return stdio->output.buf.nr < STDIO_REDIRECT_BUFSIZE || stdio->done;
+}
 
-       while (len) {
-               ret = darray_make_room(&thr->output2, thr->stdio.output_buf.pos);
-               if (ret)
-                       break;
+static void stdio_buf_init(struct stdio_buf *buf)
+{
+       spin_lock_init(&buf->lock);
+       init_waitqueue_head(&buf->wait);
+       darray_init(&buf->buf);
+}
 
-               spin_lock_irq(&thr->stdio.output_lock);
-               b = min_t(size_t, darray_room(thr->output2), thr->stdio.output_buf.pos);
+/* thread_with_stdio */
 
-               memcpy(&darray_top(thr->output2), thr->stdio.output_buf.buf, b);
-               memmove(thr->stdio.output_buf.buf,
-                       thr->stdio.output_buf.buf + b,
-                       thr->stdio.output_buf.pos - b);
+static void thread_with_stdio_done(struct thread_with_stdio *thr)
+{
+       thr->thr.done = true;
+       thr->stdio.done = true;
+       wake_up(&thr->stdio.input.wait);
+       wake_up(&thr->stdio.output.wait);
+}
 
-               thr->output2.nr += b;
-               thr->stdio.output_buf.pos -= b;
-               spin_unlock_irq(&thr->stdio.output_lock);
+static ssize_t thread_with_stdio_read(struct file *file, char __user *ubuf,
+                                     size_t len, loff_t *ppos)
+{
+       struct thread_with_stdio *thr =
+               container_of(file->private_data, struct thread_with_stdio, thr);
+       struct stdio_buf *buf = &thr->stdio.output;
+       size_t copied = 0, b;
+       int ret = 0;
 
-               b = min(len, thr->output2.nr);
-               if (!b)
-                       break;
+       if (!(file->f_flags & O_NONBLOCK)) {
+               ret = wait_event_interruptible(buf->wait, stdio_redirect_has_output(&thr->stdio));
+               if (ret)
+                       return ret;
+       } else if (!stdio_redirect_has_output(&thr->stdio))
+               return -EAGAIN;
 
-               b -= copy_to_user(buf, thr->output2.data, b);
-               if (!b) {
+       while (len && buf->buf.nr) {
+               if (fault_in_writeable(ubuf, len) == len) {
                        ret = -EFAULT;
                        break;
                }
 
-               copied  += b;
-               buf     += b;
-               len     -= b;
-
-               memmove(thr->output2.data,
-                       thr->output2.data + b,
-                       thr->output2.nr - b);
-               thr->output2.nr -= b;
+               spin_lock_irq(&buf->lock);
+               b = min_t(size_t, len, buf->buf.nr);
+
+               if (b && !copy_to_user_nofault(ubuf, buf->buf.data, b)) {
+                       ubuf    += b;
+                       len     -= b;
+                       copied  += b;
+                       buf->buf.nr -= b;
+                       memmove(buf->buf.data,
+                               buf->buf.data + b,
+                               buf->buf.nr);
+               }
+               spin_unlock_irq(&buf->lock);
        }
 
        return copied ?: ret;
@@ -137,27 +151,20 @@ static int thread_with_stdio_release(struct inode *inode, struct file *file)
        struct thread_with_stdio *thr =
                container_of(file->private_data, struct thread_with_stdio, thr);
 
+       thread_with_stdio_done(thr);
        bch2_thread_with_file_exit(&thr->thr);
-       printbuf_exit(&thr->stdio.input_buf);
-       printbuf_exit(&thr->stdio.output_buf);
-       darray_exit(&thr->output2);
-       thr->exit(thr);
+       darray_exit(&thr->stdio.input.buf);
+       darray_exit(&thr->stdio.output.buf);
+       thr->ops->exit(thr);
        return 0;
 }
 
-#define WRITE_BUFFER           4096
-
-static inline bool thread_with_stdio_has_input_space(struct thread_with_stdio *thr)
-{
-       return thr->stdio.input_buf.pos < WRITE_BUFFER || thr->thr.done;
-}
-
 static ssize_t thread_with_stdio_write(struct file *file, const char __user *ubuf,
                                       size_t len, loff_t *ppos)
 {
        struct thread_with_stdio *thr =
                container_of(file->private_data, struct thread_with_stdio, thr);
-       struct printbuf *buf = &thr->stdio.input_buf;
+       struct stdio_buf *buf = &thr->stdio.input;
        size_t copied = 0;
        ssize_t ret = 0;
 
@@ -173,29 +180,30 @@ static ssize_t thread_with_stdio_write(struct file *file, const char __user *ubu
                        break;
                }
 
-               spin_lock(&thr->stdio.input_lock);
-               if (buf->pos < WRITE_BUFFER)
-                       bch2_printbuf_make_room(buf, min(b, WRITE_BUFFER - buf->pos));
-               b = min(len, printbuf_remaining_size(buf));
-
-               if (b && !copy_from_user_nofault(&buf->buf[buf->pos], ubuf, b)) {
-                       ubuf += b;
-                       len -= b;
-                       copied += b;
-                       buf->pos += b;
+               spin_lock(&buf->lock);
+               if (buf->buf.nr < STDIO_REDIRECT_BUFSIZE)
+                       darray_make_room_gfp(&buf->buf,
+                               min(b, STDIO_REDIRECT_BUFSIZE - buf->buf.nr), GFP_NOWAIT);
+               b = min(len, darray_room(buf->buf));
+
+               if (b && !copy_from_user_nofault(&darray_top(buf->buf), ubuf, b)) {
+                       buf->buf.nr += b;
+                       ubuf    += b;
+                       len     -= b;
+                       copied  += b;
                }
-               spin_unlock(&thr->stdio.input_lock);
+               spin_unlock(&buf->lock);
 
                if (b) {
-                       wake_up(&thr->stdio.input_wait);
+                       wake_up(&buf->wait);
                } else {
                        if ((file->f_flags & O_NONBLOCK)) {
                                ret = -EAGAIN;
                                break;
                        }
 
-                       ret = wait_event_interruptible(thr->stdio.input_wait,
-                                       thread_with_stdio_has_input_space(thr));
+                       ret = wait_event_interruptible(buf->wait,
+                                       stdio_redirect_has_input_space(&thr->stdio));
                        if (ret)
                                break;
                }
@@ -209,90 +217,233 @@ static __poll_t thread_with_stdio_poll(struct file *file, struct poll_table_stru
        struct thread_with_stdio *thr =
                container_of(file->private_data, struct thread_with_stdio, thr);
 
-       poll_wait(file, &thr->stdio.output_wait, wait);
-       poll_wait(file, &thr->stdio.input_wait, wait);
+       poll_wait(file, &thr->stdio.output.wait, wait);
+       poll_wait(file, &thr->stdio.input.wait, wait);
 
        __poll_t mask = 0;
 
-       if (thread_with_stdio_has_output(thr))
+       if (stdio_redirect_has_output(&thr->stdio))
                mask |= EPOLLIN;
-       if (thread_with_stdio_has_input_space(thr))
+       if (stdio_redirect_has_input_space(&thr->stdio))
                mask |= EPOLLOUT;
        if (thr->thr.done)
                mask |= EPOLLHUP|EPOLLERR;
        return mask;
 }
 
+static __poll_t thread_with_stdout_poll(struct file *file, struct poll_table_struct *wait)
+{
+       struct thread_with_stdio *thr =
+               container_of(file->private_data, struct thread_with_stdio, thr);
+
+       poll_wait(file, &thr->stdio.output.wait, wait);
+
+       __poll_t mask = 0;
+
+       if (stdio_redirect_has_output(&thr->stdio))
+               mask |= EPOLLIN;
+       if (thr->thr.done)
+               mask |= EPOLLHUP|EPOLLERR;
+       return mask;
+}
+
+static int thread_with_stdio_flush(struct file *file, fl_owner_t id)
+{
+       struct thread_with_stdio *thr =
+               container_of(file->private_data, struct thread_with_stdio, thr);
+
+       return thr->thr.ret;
+}
+
+static long thread_with_stdio_ioctl(struct file *file, unsigned int cmd, unsigned long p)
+{
+       struct thread_with_stdio *thr =
+               container_of(file->private_data, struct thread_with_stdio, thr);
+
+       if (thr->ops->unlocked_ioctl)
+               return thr->ops->unlocked_ioctl(thr, cmd, p);
+       return -ENOTTY;
+}
+
 static const struct file_operations thread_with_stdio_fops = {
-       .release        = thread_with_stdio_release,
+       .llseek         = no_llseek,
        .read           = thread_with_stdio_read,
        .write          = thread_with_stdio_write,
        .poll           = thread_with_stdio_poll,
+       .flush          = thread_with_stdio_flush,
+       .release        = thread_with_stdio_release,
+       .unlocked_ioctl = thread_with_stdio_ioctl,
+};
+
+static const struct file_operations thread_with_stdout_fops = {
        .llseek         = no_llseek,
+       .read           = thread_with_stdio_read,
+       .poll           = thread_with_stdout_poll,
+       .flush          = thread_with_stdio_flush,
+       .release        = thread_with_stdio_release,
+       .unlocked_ioctl = thread_with_stdio_ioctl,
 };
 
+static int thread_with_stdio_fn(void *arg)
+{
+       struct thread_with_stdio *thr = arg;
+
+       thr->thr.ret = thr->ops->fn(thr);
+
+       thread_with_stdio_done(thr);
+       return 0;
+}
+
 int bch2_run_thread_with_stdio(struct thread_with_stdio *thr,
-                              void (*exit)(struct thread_with_stdio *),
-                              int (*fn)(void *))
+                              const struct thread_with_stdio_ops *ops)
 {
-       thr->stdio.input_buf = PRINTBUF;
-       thr->stdio.input_buf.atomic++;
-       spin_lock_init(&thr->stdio.input_lock);
-       init_waitqueue_head(&thr->stdio.input_wait);
+       stdio_buf_init(&thr->stdio.input);
+       stdio_buf_init(&thr->stdio.output);
+       thr->ops = ops;
 
-       thr->stdio.output_buf = PRINTBUF;
-       thr->stdio.output_buf.atomic++;
-       spin_lock_init(&thr->stdio.output_lock);
-       init_waitqueue_head(&thr->stdio.output_wait);
+       return bch2_run_thread_with_file(&thr->thr, &thread_with_stdio_fops, thread_with_stdio_fn);
+}
 
-       darray_init(&thr->output2);
-       thr->exit = exit;
+int bch2_run_thread_with_stdout(struct thread_with_stdio *thr,
+                               const struct thread_with_stdio_ops *ops)
+{
+       stdio_buf_init(&thr->stdio.input);
+       stdio_buf_init(&thr->stdio.output);
+       thr->ops = ops;
 
-       return bch2_run_thread_with_file(&thr->thr, &thread_with_stdio_fops, fn);
+       return bch2_run_thread_with_file(&thr->thr, &thread_with_stdout_fops, thread_with_stdio_fn);
 }
+EXPORT_SYMBOL_GPL(bch2_run_thread_with_stdout);
 
-int bch2_stdio_redirect_read(struct stdio_redirect *stdio, char *buf, size_t len)
+int bch2_stdio_redirect_read(struct stdio_redirect *stdio, char *ubuf, size_t len)
 {
-       wait_event(stdio->input_wait,
-                  stdio->input_buf.pos || stdio->done);
+       struct stdio_buf *buf = &stdio->input;
+
+       /*
+        * we're waiting on user input (or for the file descriptor to be
+        * closed), don't want a hung task warning:
+        */
+       do {
+               wait_event_timeout(buf->wait, stdio_redirect_has_input(stdio),
+                                  sysctl_hung_task_timeout_secs * HZ / 2);
+       } while (!stdio_redirect_has_input(stdio));
 
        if (stdio->done)
                return -1;
 
-       spin_lock(&stdio->input_lock);
-       int ret = min(len, stdio->input_buf.pos);
-       stdio->input_buf.pos -= ret;
-       memcpy(buf, stdio->input_buf.buf, ret);
-       memmove(stdio->input_buf.buf,
-               stdio->input_buf.buf + ret,
-               stdio->input_buf.pos);
-       spin_unlock(&stdio->input_lock);
+       spin_lock(&buf->lock);
+       int ret = min(len, buf->buf.nr);
+       buf->buf.nr -= ret;
+       memcpy(ubuf, buf->buf.data, ret);
+       memmove(buf->buf.data,
+               buf->buf.data + ret,
+               buf->buf.nr);
+       spin_unlock(&buf->lock);
 
-       wake_up(&stdio->input_wait);
+       wake_up(&buf->wait);
        return ret;
 }
 
-int bch2_stdio_redirect_readline(struct stdio_redirect *stdio, char *buf, size_t len)
+int bch2_stdio_redirect_readline(struct stdio_redirect *stdio, char *ubuf, size_t len)
 {
-       wait_event(stdio->input_wait,
-                  stdio->input_buf.pos || stdio->done);
-
-       if (stdio->done)
-               return -1;
+       struct stdio_buf *buf = &stdio->input;
+       size_t copied = 0;
+       ssize_t ret = 0;
+again:
+       do {
+               wait_event_timeout(buf->wait, stdio_redirect_has_input(stdio),
+                                  sysctl_hung_task_timeout_secs * HZ / 2);
+       } while (!stdio_redirect_has_input(stdio));
+
+       if (stdio->done) {
+               ret = -1;
+               goto out;
+       }
 
-       spin_lock(&stdio->input_lock);
-       int ret = min(len, stdio->input_buf.pos);
-       char *n = memchr(stdio->input_buf.buf, '\n', ret);
+       spin_lock(&buf->lock);
+       size_t b = min(len, buf->buf.nr);
+       char *n = memchr(buf->buf.data, '\n', b);
        if (n)
-               ret = min(ret, n + 1 - stdio->input_buf.buf);
-       stdio->input_buf.pos -= ret;
-       memcpy(buf, stdio->input_buf.buf, ret);
-       memmove(stdio->input_buf.buf,
-               stdio->input_buf.buf + ret,
-               stdio->input_buf.pos);
-       spin_unlock(&stdio->input_lock);
-
-       wake_up(&stdio->input_wait);
+               b = min_t(size_t, b, n + 1 - buf->buf.data);
+       buf->buf.nr -= b;
+       memcpy(ubuf, buf->buf.data, b);
+       memmove(buf->buf.data,
+               buf->buf.data + b,
+               buf->buf.nr);
+       ubuf += b;
+       len -= b;
+       copied += b;
+       spin_unlock(&buf->lock);
+
+       wake_up(&buf->wait);
+
+       if (!n && len)
+               goto again;
+out:
+       return copied ?: ret;
+}
+
+__printf(3, 0)
+static ssize_t bch2_darray_vprintf(darray_char *out, gfp_t gfp, const char *fmt, va_list args)
+{
+       ssize_t ret;
+
+       do {
+               va_list args2;
+               size_t len;
+
+               va_copy(args2, args);
+               len = vsnprintf(out->data + out->nr, darray_room(*out), fmt, args2);
+               va_end(args2);
+
+               if (len + 1 <= darray_room(*out)) {
+                       out->nr += len;
+                       return len;
+               }
+
+               ret = darray_make_room_gfp(out, len + 1, gfp);
+       } while (ret == 0);
+
+       return ret;
+}
+
+ssize_t bch2_stdio_redirect_vprintf(struct stdio_redirect *stdio, bool nonblocking,
+                                   const char *fmt, va_list args)
+{
+       struct stdio_buf *buf = &stdio->output;
+       unsigned long flags;
+       ssize_t ret;
+
+again:
+       spin_lock_irqsave(&buf->lock, flags);
+       ret = bch2_darray_vprintf(&buf->buf, GFP_NOWAIT, fmt, args);
+       spin_unlock_irqrestore(&buf->lock, flags);
+
+       if (ret < 0) {
+               if (nonblocking)
+                       return -EAGAIN;
+
+               ret = wait_event_interruptible(buf->wait,
+                               stdio_redirect_has_output_space(stdio));
+               if (ret)
+                       return ret;
+               goto again;
+       }
+
+       wake_up(&buf->wait);
+       return ret;
+}
+
+ssize_t bch2_stdio_redirect_printf(struct stdio_redirect *stdio, bool nonblocking,
+                               const char *fmt, ...)
+{
+       va_list args;
+       ssize_t ret;
+
+       va_start(args, fmt);
+       ret = bch2_stdio_redirect_vprintf(stdio, nonblocking, fmt, args);
+       va_end(args);
+
        return ret;
 }
 
index 05879c5048c875b9df186a4cfb6a5866ddb36428..af54ea8f5b0ff85871c915e275187c29b7b0c6f1 100644 (file)
@@ -4,6 +4,38 @@
 
 #include "thread_with_file_types.h"
 
+/*
+ * Thread with file: Run a kthread and connect it to a file descriptor, so that
+ * it can be interacted with via fd read/write methods and closing the file
+ * descriptor stops the kthread.
+ *
+ * We have two different APIs:
+ *
+ * thread_with_file, the low level version.
+ * You get to define the full file_operations, including your release function,
+ * which means that you must call bch2_thread_with_file_exit() from your
+ * .release method
+ *
+ * thread_with_stdio, the higher level version
+ * This implements full piping of input and output, including .poll.
+ *
+ * Notes on behaviour:
+ *  - kthread shutdown behaves like writing or reading from a pipe that has been
+ *    closed
+ *  - Input and output buffers are 4096 bytes, although buffers may in some
+ *    situations slightly exceed that limit so as to avoid chopping off a
+ *    message in the middle in nonblocking mode.
+ *  - Input/output buffers are lazily allocated, with GFP_NOWAIT allocations -
+ *    should be fine but might change in future revisions.
+ *  - Output buffer may grow past 4096 bytes to deal with messages that are
+ *    bigger than 4096 bytes
+ *  - Writing may be done blocking or nonblocking; in nonblocking mode, we only
+ *    drop entire messages.
+ *
+ * To write, use stdio_redirect_printf()
+ * To read, use stdio_redirect_read() or stdio_redirect_readline()
+ */
+
 struct task_struct;
 
 struct thread_with_file {
@@ -17,25 +49,28 @@ int bch2_run_thread_with_file(struct thread_with_file *,
                              const struct file_operations *,
                              int (*fn)(void *));
 
+struct thread_with_stdio;
+
+struct thread_with_stdio_ops {
+       void (*exit)(struct thread_with_stdio *);
+       int (*fn)(struct thread_with_stdio *);
+       long (*unlocked_ioctl)(struct thread_with_stdio *, unsigned int, unsigned long);
+};
+
 struct thread_with_stdio {
        struct thread_with_file thr;
        struct stdio_redirect   stdio;
-       DARRAY(char)            output2;
-       void                    (*exit)(struct thread_with_stdio *);
+       const struct thread_with_stdio_ops      *ops;
 };
 
-static inline void thread_with_stdio_done(struct thread_with_stdio *thr)
-{
-       thr->thr.done = true;
-       thr->stdio.done = true;
-       wake_up(&thr->stdio.input_wait);
-       wake_up(&thr->stdio.output_wait);
-}
-
 int bch2_run_thread_with_stdio(struct thread_with_stdio *,
-                              void (*exit)(struct thread_with_stdio *),
-                              int (*fn)(void *));
+                              const struct thread_with_stdio_ops *);
+int bch2_run_thread_with_stdout(struct thread_with_stdio *,
+                               const struct thread_with_stdio_ops *);
 int bch2_stdio_redirect_read(struct stdio_redirect *, char *, size_t);
 int bch2_stdio_redirect_readline(struct stdio_redirect *, char *, size_t);
 
+__printf(3, 0) ssize_t bch2_stdio_redirect_vprintf(struct stdio_redirect *, bool, const char *, va_list);
+__printf(3, 4) ssize_t bch2_stdio_redirect_printf(struct stdio_redirect *, bool, const char *, ...);
+
 #endif /* _BCACHEFS_THREAD_WITH_FILE_H */
index 90b5e645e98ce5352acf8fa36f21d526fd5bc180..e0daf4eec341e04a0924343fb58b9186eb845026 100644 (file)
@@ -2,14 +2,21 @@
 #ifndef _BCACHEFS_THREAD_WITH_FILE_TYPES_H
 #define _BCACHEFS_THREAD_WITH_FILE_TYPES_H
 
+#include "darray.h"
+
+struct stdio_buf {
+       spinlock_t              lock;
+       wait_queue_head_t       wait;
+       darray_char             buf;
+};
+
 struct stdio_redirect {
-       spinlock_t              output_lock;
-       wait_queue_head_t       output_wait;
-       struct printbuf         output_buf;
+       struct stdio_buf        input;
+       struct stdio_buf        output;
 
        spinlock_t              input_lock;
        wait_queue_head_t       input_wait;
-       struct printbuf         input_buf;
+       darray_char             input_buf;
        bool                    done;
 };
 
diff --git a/fs/bcachefs/time_stats.c b/fs/bcachefs/time_stats.c
new file mode 100644 (file)
index 0000000..4508e9d
--- /dev/null
@@ -0,0 +1,165 @@
+// SPDX-License-Identifier: GPL-2.0
+
+#include <linux/jiffies.h>
+#include <linux/module.h>
+#include <linux/percpu.h>
+#include <linux/preempt.h>
+#include <linux/time.h>
+#include <linux/spinlock.h>
+
+#include "eytzinger.h"
+#include "time_stats.h"
+
+static const struct time_unit time_units[] = {
+       { "ns",         1                },
+       { "us",         NSEC_PER_USEC    },
+       { "ms",         NSEC_PER_MSEC    },
+       { "s",          NSEC_PER_SEC     },
+       { "m",          (u64) NSEC_PER_SEC * 60},
+       { "h",          (u64) NSEC_PER_SEC * 3600},
+       { "d",          (u64) NSEC_PER_SEC * 3600 * 24},
+       { "w",          (u64) NSEC_PER_SEC * 3600 * 24 * 7},
+       { "y",          (u64) NSEC_PER_SEC * ((3600 * 24 * 7 * 365) + (3600 * (24 / 4) * 7))}, /* 365.25d */
+       { "eon",        U64_MAX          },
+};
+
+const struct time_unit *bch2_pick_time_units(u64 ns)
+{
+       const struct time_unit *u;
+
+       for (u = time_units;
+            u + 1 < time_units + ARRAY_SIZE(time_units) &&
+            ns >= u[1].nsecs << 1;
+            u++)
+               ;
+
+       return u;
+}
+
+static void quantiles_update(struct quantiles *q, u64 v)
+{
+       unsigned i = 0;
+
+       while (i < ARRAY_SIZE(q->entries)) {
+               struct quantile_entry *e = q->entries + i;
+
+               if (unlikely(!e->step)) {
+                       e->m = v;
+                       e->step = max_t(unsigned, v / 2, 1024);
+               } else if (e->m > v) {
+                       e->m = e->m >= e->step
+                               ? e->m - e->step
+                               : 0;
+               } else if (e->m < v) {
+                       e->m = e->m + e->step > e->m
+                               ? e->m + e->step
+                               : U32_MAX;
+               }
+
+               if ((e->m > v ? e->m - v : v - e->m) < e->step)
+                       e->step = max_t(unsigned, e->step / 2, 1);
+
+               if (v >= e->m)
+                       break;
+
+               i = eytzinger0_child(i, v > e->m);
+       }
+}
+
+static inline void time_stats_update_one(struct bch2_time_stats *stats,
+                                             u64 start, u64 end)
+{
+       u64 duration, freq;
+       bool initted = stats->last_event != 0;
+
+       if (time_after64(end, start)) {
+               struct quantiles *quantiles = time_stats_to_quantiles(stats);
+
+               duration = end - start;
+               mean_and_variance_update(&stats->duration_stats, duration);
+               mean_and_variance_weighted_update(&stats->duration_stats_weighted,
+                               duration, initted, TIME_STATS_MV_WEIGHT);
+               stats->max_duration = max(stats->max_duration, duration);
+               stats->min_duration = min(stats->min_duration, duration);
+               stats->total_duration += duration;
+
+               if (quantiles)
+                       quantiles_update(quantiles, duration);
+       }
+
+       if (stats->last_event && time_after64(end, stats->last_event)) {
+               freq = end - stats->last_event;
+               mean_and_variance_update(&stats->freq_stats, freq);
+               mean_and_variance_weighted_update(&stats->freq_stats_weighted,
+                               freq, initted, TIME_STATS_MV_WEIGHT);
+               stats->max_freq = max(stats->max_freq, freq);
+               stats->min_freq = min(stats->min_freq, freq);
+       }
+
+       stats->last_event = end;
+}
+
+void __bch2_time_stats_clear_buffer(struct bch2_time_stats *stats,
+                                   struct time_stat_buffer *b)
+{
+       for (struct time_stat_buffer_entry *i = b->entries;
+            i < b->entries + ARRAY_SIZE(b->entries);
+            i++)
+               time_stats_update_one(stats, i->start, i->end);
+       b->nr = 0;
+}
+
+static noinline void time_stats_clear_buffer(struct bch2_time_stats *stats,
+                                            struct time_stat_buffer *b)
+{
+       unsigned long flags;
+
+       spin_lock_irqsave(&stats->lock, flags);
+       __bch2_time_stats_clear_buffer(stats, b);
+       spin_unlock_irqrestore(&stats->lock, flags);
+}
+
+void __bch2_time_stats_update(struct bch2_time_stats *stats, u64 start, u64 end)
+{
+       unsigned long flags;
+
+       if (!stats->buffer) {
+               spin_lock_irqsave(&stats->lock, flags);
+               time_stats_update_one(stats, start, end);
+
+               if (mean_and_variance_weighted_get_mean(stats->freq_stats_weighted, TIME_STATS_MV_WEIGHT) < 32 &&
+                   stats->duration_stats.n > 1024)
+                       stats->buffer =
+                               alloc_percpu_gfp(struct time_stat_buffer,
+                                                GFP_ATOMIC);
+               spin_unlock_irqrestore(&stats->lock, flags);
+       } else {
+               struct time_stat_buffer *b;
+
+               preempt_disable();
+               b = this_cpu_ptr(stats->buffer);
+
+               BUG_ON(b->nr >= ARRAY_SIZE(b->entries));
+               b->entries[b->nr++] = (struct time_stat_buffer_entry) {
+                       .start = start,
+                       .end = end
+               };
+
+               if (unlikely(b->nr == ARRAY_SIZE(b->entries)))
+                       time_stats_clear_buffer(stats, b);
+               preempt_enable();
+       }
+}
+
+void bch2_time_stats_exit(struct bch2_time_stats *stats)
+{
+       free_percpu(stats->buffer);
+}
+
+void bch2_time_stats_init(struct bch2_time_stats *stats)
+{
+       memset(stats, 0, sizeof(*stats));
+       stats->min_duration = U64_MAX;
+       stats->min_freq = U64_MAX;
+       spin_lock_init(&stats->lock);
+}
diff --git a/fs/bcachefs/time_stats.h b/fs/bcachefs/time_stats.h
new file mode 100644 (file)
index 0000000..5df6140
--- /dev/null
@@ -0,0 +1,159 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * bch2_time_stats - collect statistics on events that have a duration, with nicely
+ * formatted textual output on demand
+ *
+ * - percpu buffering of event collection: cheap enough to shotgun
+ *   everywhere without worrying about overhead
+ *
+ * tracks:
+ *  - number of events
+ *  - maximum event duration ever seen
+ *  - sum of all event durations
+ *  - average event duration, standard and weighted
+ *  - standard deviation of event durations, standard and weighted
+ * and analagous statistics for the frequency of events
+ *
+ * We provide both mean and weighted mean (exponentially weighted), and standard
+ * deviation and weighted standard deviation, to give an efficient-to-compute
+ * view of current behaviour versus. average behaviour - "did this event source
+ * just become wonky, or is this typical?".
+ *
+ * Particularly useful for tracking down latency issues.
+ */
+#ifndef _BCACHEFS_TIME_STATS_H
+#define _BCACHEFS_TIME_STATS_H
+
+#include <linux/sched/clock.h>
+#include <linux/spinlock_types.h>
+#include <linux/string.h>
+
+#include "mean_and_variance.h"
+
+struct time_unit {
+       const char      *name;
+       u64             nsecs;
+};
+
+/*
+ * given a nanosecond value, pick the preferred time units for printing:
+ */
+const struct time_unit *bch2_pick_time_units(u64 ns);
+
+/*
+ * quantiles - do not use:
+ *
+ * Only enabled if bch2_time_stats->quantiles_enabled has been manually set - don't
+ * use in new code.
+ */
+
+#define NR_QUANTILES   15
+#define QUANTILE_IDX(i)        inorder_to_eytzinger0(i, NR_QUANTILES)
+#define QUANTILE_FIRST eytzinger0_first(NR_QUANTILES)
+#define QUANTILE_LAST  eytzinger0_last(NR_QUANTILES)
+
+struct quantiles {
+       struct quantile_entry {
+               u64     m;
+               u64     step;
+       }               entries[NR_QUANTILES];
+};
+
+struct time_stat_buffer {
+       unsigned        nr;
+       struct time_stat_buffer_entry {
+               u64     start;
+               u64     end;
+       }               entries[31];
+};
+
+struct bch2_time_stats {
+       spinlock_t      lock;
+       bool            have_quantiles;
+       /* all fields are in nanoseconds */
+       u64             min_duration;
+       u64             max_duration;
+       u64             total_duration;
+       u64             max_freq;
+       u64             min_freq;
+       u64             last_event;
+       u64             last_event_start;
+
+       struct mean_and_variance          duration_stats;
+       struct mean_and_variance          freq_stats;
+
+/* default weight for weighted mean and variance calculations */
+#define TIME_STATS_MV_WEIGHT   8
+
+       struct mean_and_variance_weighted duration_stats_weighted;
+       struct mean_and_variance_weighted freq_stats_weighted;
+       struct time_stat_buffer __percpu *buffer;
+};
+
+struct bch2_time_stats_quantiles {
+       struct bch2_time_stats  stats;
+       struct quantiles        quantiles;
+};
+
+static inline struct quantiles *time_stats_to_quantiles(struct bch2_time_stats *stats)
+{
+       return stats->have_quantiles
+               ? &container_of(stats, struct bch2_time_stats_quantiles, stats)->quantiles
+               : NULL;
+}
+
+void __bch2_time_stats_clear_buffer(struct bch2_time_stats *, struct time_stat_buffer *);
+void __bch2_time_stats_update(struct bch2_time_stats *stats, u64, u64);
+
+/**
+ * time_stats_update - collect a new event being tracked
+ *
+ * @stats      - bch2_time_stats to update
+ * @start      - start time of event, recorded with local_clock()
+ *
+ * The end duration of the event will be the current time
+ */
+static inline void bch2_time_stats_update(struct bch2_time_stats *stats, u64 start)
+{
+       __bch2_time_stats_update(stats, start, local_clock());
+}
+
+/**
+ * track_event_change - track state change events
+ *
+ * @stats      - bch2_time_stats to update
+ * @v          - new state, true or false
+ *
+ * Use this when tracking time stats for state changes, i.e. resource X becoming
+ * blocked/unblocked.
+ */
+static inline bool track_event_change(struct bch2_time_stats *stats, bool v)
+{
+       if (v != !!stats->last_event_start) {
+               if (!v) {
+                       bch2_time_stats_update(stats, stats->last_event_start);
+                       stats->last_event_start = 0;
+               } else {
+                       stats->last_event_start = local_clock() ?: 1;
+                       return true;
+               }
+       }
+
+       return false;
+}
+
+void bch2_time_stats_exit(struct bch2_time_stats *);
+void bch2_time_stats_init(struct bch2_time_stats *);
+
+static inline void bch2_time_stats_quantiles_exit(struct bch2_time_stats_quantiles *statq)
+{
+       bch2_time_stats_exit(&statq->stats);
+}
+static inline void bch2_time_stats_quantiles_init(struct bch2_time_stats_quantiles *statq)
+{
+       bch2_time_stats_init(&statq->stats);
+       statq->stats.have_quantiles = true;
+       memset(&statq->quantiles, 0, sizeof(statq->quantiles));
+}
+
+#endif /* _BCACHEFS_TIME_STATS_H */
index 293b90d704fb5b48ed39038e793c4d3cbf77b5a8..6aa81d1e6d36a42aa9fbace040ec1fb2c92aedb6 100644 (file)
@@ -1431,6 +1431,25 @@ DEFINE_EVENT(fs_str, data_update,
        TP_ARGS(c, str)
 );
 
+TRACE_EVENT(error_downcast,
+       TP_PROTO(int bch_err, int std_err, unsigned long ip),
+       TP_ARGS(bch_err, std_err, ip),
+
+       TP_STRUCT__entry(
+               __array(char,           bch_err, 32             )
+               __array(char,           std_err, 32             )
+               __array(char,           ip, 32                  )
+       ),
+
+       TP_fast_assign(
+               strscpy(__entry->bch_err, bch2_err_str(bch_err), sizeof(__entry->bch_err));
+               strscpy(__entry->std_err, bch2_err_str(std_err), sizeof(__entry->std_err));
+               snprintf(__entry->ip, sizeof(__entry->ip), "%ps", (void *) ip);
+       ),
+
+       TP_printk("%s -> %s %s", __entry->bch_err, __entry->std_err, __entry->ip)
+);
+
 #endif /* _TRACE_BCACHEFS_H */
 
 /* This part must be outside protection */
index 3a32faa86b5c4a2eee98de32951c18dc73052041..216fadf16928b9a73eb47da96a8a7b409657e8fe 100644 (file)
@@ -337,157 +337,16 @@ void bch2_prt_datetime(struct printbuf *out, time64_t sec)
 }
 #endif
 
-static const struct time_unit {
-       const char      *name;
-       u64             nsecs;
-} time_units[] = {
-       { "ns",         1                },
-       { "us",         NSEC_PER_USEC    },
-       { "ms",         NSEC_PER_MSEC    },
-       { "s",          NSEC_PER_SEC     },
-       { "m",          (u64) NSEC_PER_SEC * 60},
-       { "h",          (u64) NSEC_PER_SEC * 3600},
-       { "eon",        U64_MAX          },
-};
-
-static const struct time_unit *pick_time_units(u64 ns)
-{
-       const struct time_unit *u;
-
-       for (u = time_units;
-            u + 1 < time_units + ARRAY_SIZE(time_units) &&
-            ns >= u[1].nsecs << 1;
-            u++)
-               ;
-
-       return u;
-}
-
 void bch2_pr_time_units(struct printbuf *out, u64 ns)
 {
-       const struct time_unit *u = pick_time_units(ns);
+       const struct time_unit *u = bch2_pick_time_units(ns);
 
        prt_printf(out, "%llu %s", div_u64(ns, u->nsecs), u->name);
 }
 
-/* time stats: */
-
-#ifndef CONFIG_BCACHEFS_NO_LATENCY_ACCT
-static void bch2_quantiles_update(struct bch2_quantiles *q, u64 v)
-{
-       unsigned i = 0;
-
-       while (i < ARRAY_SIZE(q->entries)) {
-               struct bch2_quantile_entry *e = q->entries + i;
-
-               if (unlikely(!e->step)) {
-                       e->m = v;
-                       e->step = max_t(unsigned, v / 2, 1024);
-               } else if (e->m > v) {
-                       e->m = e->m >= e->step
-                               ? e->m - e->step
-                               : 0;
-               } else if (e->m < v) {
-                       e->m = e->m + e->step > e->m
-                               ? e->m + e->step
-                               : U32_MAX;
-               }
-
-               if ((e->m > v ? e->m - v : v - e->m) < e->step)
-                       e->step = max_t(unsigned, e->step / 2, 1);
-
-               if (v >= e->m)
-                       break;
-
-               i = eytzinger0_child(i, v > e->m);
-       }
-}
-
-static inline void bch2_time_stats_update_one(struct bch2_time_stats *stats,
-                                             u64 start, u64 end)
-{
-       u64 duration, freq;
-
-       if (time_after64(end, start)) {
-               duration = end - start;
-               mean_and_variance_update(&stats->duration_stats, duration);
-               mean_and_variance_weighted_update(&stats->duration_stats_weighted, duration);
-               stats->max_duration = max(stats->max_duration, duration);
-               stats->min_duration = min(stats->min_duration, duration);
-               stats->total_duration += duration;
-               bch2_quantiles_update(&stats->quantiles, duration);
-       }
-
-       if (stats->last_event && time_after64(end, stats->last_event)) {
-               freq = end - stats->last_event;
-               mean_and_variance_update(&stats->freq_stats, freq);
-               mean_and_variance_weighted_update(&stats->freq_stats_weighted, freq);
-               stats->max_freq = max(stats->max_freq, freq);
-               stats->min_freq = min(stats->min_freq, freq);
-       }
-
-       stats->last_event = end;
-}
-
-static void __bch2_time_stats_clear_buffer(struct bch2_time_stats *stats,
-                                          struct bch2_time_stat_buffer *b)
-{
-       for (struct bch2_time_stat_buffer_entry *i = b->entries;
-            i < b->entries + ARRAY_SIZE(b->entries);
-            i++)
-               bch2_time_stats_update_one(stats, i->start, i->end);
-       b->nr = 0;
-}
-
-static noinline void bch2_time_stats_clear_buffer(struct bch2_time_stats *stats,
-                                                 struct bch2_time_stat_buffer *b)
-{
-       unsigned long flags;
-
-       spin_lock_irqsave(&stats->lock, flags);
-       __bch2_time_stats_clear_buffer(stats, b);
-       spin_unlock_irqrestore(&stats->lock, flags);
-}
-
-void __bch2_time_stats_update(struct bch2_time_stats *stats, u64 start, u64 end)
-{
-       unsigned long flags;
-
-       WARN_ONCE(!stats->duration_stats_weighted.weight ||
-                 !stats->freq_stats_weighted.weight,
-                 "uninitialized time_stats");
-
-       if (!stats->buffer) {
-               spin_lock_irqsave(&stats->lock, flags);
-               bch2_time_stats_update_one(stats, start, end);
-
-               if (mean_and_variance_weighted_get_mean(stats->freq_stats_weighted) < 32 &&
-                   stats->duration_stats.n > 1024)
-                       stats->buffer =
-                               alloc_percpu_gfp(struct bch2_time_stat_buffer,
-                                                GFP_ATOMIC);
-               spin_unlock_irqrestore(&stats->lock, flags);
-       } else {
-               struct bch2_time_stat_buffer *b;
-
-               preempt_disable();
-               b = this_cpu_ptr(stats->buffer);
-
-               BUG_ON(b->nr >= ARRAY_SIZE(b->entries));
-               b->entries[b->nr++] = (struct bch2_time_stat_buffer_entry) {
-                       .start = start,
-                       .end = end
-               };
-
-               if (unlikely(b->nr == ARRAY_SIZE(b->entries)))
-                       bch2_time_stats_clear_buffer(stats, b);
-               preempt_enable();
-       }
-}
-
 static void bch2_pr_time_units_aligned(struct printbuf *out, u64 ns)
 {
-       const struct time_unit *u = pick_time_units(ns);
+       const struct time_unit *u = bch2_pick_time_units(ns);
 
        prt_printf(out, "%llu ", div64_u64(ns, u->nsecs));
        prt_tab_rjust(out);
@@ -506,10 +365,9 @@ static inline void pr_name_and_units(struct printbuf *out, const char *name, u64
 
 void bch2_time_stats_to_text(struct printbuf *out, struct bch2_time_stats *stats)
 {
-       const struct time_unit *u;
+       struct quantiles *quantiles = time_stats_to_quantiles(stats);
        s64 f_mean = 0, d_mean = 0;
-       u64 q, last_q = 0, f_stddev = 0, d_stddev = 0;
-       int i;
+       u64 f_stddev = 0, d_stddev = 0;
 
        if (stats->buffer) {
                int cpu;
@@ -571,14 +429,14 @@ void bch2_time_stats_to_text(struct printbuf *out, struct bch2_time_stats *stats
        prt_tab(out);
        bch2_pr_time_units_aligned(out, d_mean);
        prt_tab(out);
-       bch2_pr_time_units_aligned(out, mean_and_variance_weighted_get_mean(stats->duration_stats_weighted));
+       bch2_pr_time_units_aligned(out, mean_and_variance_weighted_get_mean(stats->duration_stats_weighted, TIME_STATS_MV_WEIGHT));
        prt_newline(out);
 
        prt_printf(out, "stddev:");
        prt_tab(out);
        bch2_pr_time_units_aligned(out, d_stddev);
        prt_tab(out);
-       bch2_pr_time_units_aligned(out, mean_and_variance_weighted_get_stddev(stats->duration_stats_weighted));
+       bch2_pr_time_units_aligned(out, mean_and_variance_weighted_get_stddev(stats->duration_stats_weighted, TIME_STATS_MV_WEIGHT));
 
        printbuf_indent_sub(out, 2);
        prt_newline(out);
@@ -594,53 +452,38 @@ void bch2_time_stats_to_text(struct printbuf *out, struct bch2_time_stats *stats
        prt_tab(out);
        bch2_pr_time_units_aligned(out, f_mean);
        prt_tab(out);
-       bch2_pr_time_units_aligned(out, mean_and_variance_weighted_get_mean(stats->freq_stats_weighted));
+       bch2_pr_time_units_aligned(out, mean_and_variance_weighted_get_mean(stats->freq_stats_weighted, TIME_STATS_MV_WEIGHT));
        prt_newline(out);
 
        prt_printf(out, "stddev:");
        prt_tab(out);
        bch2_pr_time_units_aligned(out, f_stddev);
        prt_tab(out);
-       bch2_pr_time_units_aligned(out, mean_and_variance_weighted_get_stddev(stats->freq_stats_weighted));
+       bch2_pr_time_units_aligned(out, mean_and_variance_weighted_get_stddev(stats->freq_stats_weighted, TIME_STATS_MV_WEIGHT));
 
        printbuf_indent_sub(out, 2);
        prt_newline(out);
 
        printbuf_tabstops_reset(out);
 
-       i = eytzinger0_first(NR_QUANTILES);
-       u = pick_time_units(stats->quantiles.entries[i].m);
-
-       prt_printf(out, "quantiles (%s):\t", u->name);
-       eytzinger0_for_each(i, NR_QUANTILES) {
-               bool is_last = eytzinger0_next(i, NR_QUANTILES) == -1;
-
-               q = max(stats->quantiles.entries[i].m, last_q);
-               prt_printf(out, "%llu ",
-                      div_u64(q, u->nsecs));
-               if (is_last)
-                       prt_newline(out);
-               last_q = q;
+       if (quantiles) {
+               int i = eytzinger0_first(NR_QUANTILES);
+               const struct time_unit *u =
+                       bch2_pick_time_units(quantiles->entries[i].m);
+               u64 last_q = 0;
+
+               prt_printf(out, "quantiles (%s):\t", u->name);
+               eytzinger0_for_each(i, NR_QUANTILES) {
+                       bool is_last = eytzinger0_next(i, NR_QUANTILES) == -1;
+
+                       u64 q = max(quantiles->entries[i].m, last_q);
+                       prt_printf(out, "%llu ", div_u64(q, u->nsecs));
+                       if (is_last)
+                               prt_newline(out);
+                       last_q = q;
+               }
        }
 }
-#else
-void bch2_time_stats_to_text(struct printbuf *out, struct bch2_time_stats *stats) {}
-#endif
-
-void bch2_time_stats_exit(struct bch2_time_stats *stats)
-{
-       free_percpu(stats->buffer);
-}
-
-void bch2_time_stats_init(struct bch2_time_stats *stats)
-{
-       memset(stats, 0, sizeof(*stats));
-       stats->duration_stats_weighted.weight = 8;
-       stats->freq_stats_weighted.weight = 8;
-       stats->min_duration = U64_MAX;
-       stats->min_freq = U64_MAX;
-       spin_lock_init(&stats->lock);
-}
 
 /* ratelimit: */
 
@@ -1007,28 +850,6 @@ void sort_cmp_size(void *base, size_t num, size_t size,
        }
 }
 
-static void mempool_free_vp(void *element, void *pool_data)
-{
-       size_t size = (size_t) pool_data;
-
-       vpfree(element, size);
-}
-
-static void *mempool_alloc_vp(gfp_t gfp_mask, void *pool_data)
-{
-       size_t size = (size_t) pool_data;
-
-       return vpmalloc(size, gfp_mask);
-}
-
-int mempool_init_kvpmalloc_pool(mempool_t *pool, int min_nr, size_t size)
-{
-       return size < PAGE_SIZE
-               ? mempool_init_kmalloc_pool(pool, min_nr, size)
-               : mempool_init(pool, min_nr, mempool_alloc_vp,
-                              mempool_free_vp, (void *) size);
-}
-
 #if 0
 void eytzinger1_test(void)
 {
index b414736d59a5b36d1344657eaeb6de6113ec5a09..7ffbddb80400d7aed4bc1479c79cae32427e2595 100644 (file)
@@ -21,6 +21,7 @@
 #include "mean_and_variance.h"
 
 #include "darray.h"
+#include "time_stats.h"
 
 struct closure;
 
@@ -53,38 +54,6 @@ static inline size_t buf_pages(void *p, size_t len)
                            PAGE_SIZE);
 }
 
-static inline void vpfree(void *p, size_t size)
-{
-       if (is_vmalloc_addr(p))
-               vfree(p);
-       else
-               free_pages((unsigned long) p, get_order(size));
-}
-
-static inline void *vpmalloc(size_t size, gfp_t gfp_mask)
-{
-       return (void *) __get_free_pages(gfp_mask|__GFP_NOWARN,
-                                        get_order(size)) ?:
-               __vmalloc(size, gfp_mask);
-}
-
-static inline void kvpfree(void *p, size_t size)
-{
-       if (size < PAGE_SIZE)
-               kfree(p);
-       else
-               vpfree(p, size);
-}
-
-static inline void *kvpmalloc(size_t size, gfp_t gfp_mask)
-{
-       return size < PAGE_SIZE
-               ? kmalloc(size, gfp_mask)
-               : vpmalloc(size, gfp_mask);
-}
-
-int mempool_init_kvpmalloc_pool(mempool_t *, int, size_t);
-
 #define HEAP(type)                                                     \
 struct {                                                               \
        size_t size, used;                                              \
@@ -97,13 +66,13 @@ struct {                                                            \
 ({                                                                     \
        (heap)->used = 0;                                               \
        (heap)->size = (_size);                                         \
-       (heap)->data = kvpmalloc((heap)->size * sizeof((heap)->data[0]),\
+       (heap)->data = kvmalloc((heap)->size * sizeof((heap)->data[0]),\
                                 (gfp));                                \
 })
 
 #define free_heap(heap)                                                        \
 do {                                                                   \
-       kvpfree((heap)->data, (heap)->size * sizeof((heap)->data[0]));  \
+       kvfree((heap)->data);                                           \
        (heap)->data = NULL;                                            \
 } while (0)
 
@@ -361,84 +330,8 @@ static inline void prt_bdevname(struct printbuf *out, struct block_device *bdev)
 #endif
 }
 
-#define NR_QUANTILES   15
-#define QUANTILE_IDX(i)        inorder_to_eytzinger0(i, NR_QUANTILES)
-#define QUANTILE_FIRST eytzinger0_first(NR_QUANTILES)
-#define QUANTILE_LAST  eytzinger0_last(NR_QUANTILES)
-
-struct bch2_quantiles {
-       struct bch2_quantile_entry {
-               u64     m;
-               u64     step;
-       }               entries[NR_QUANTILES];
-};
-
-struct bch2_time_stat_buffer {
-       unsigned        nr;
-       struct bch2_time_stat_buffer_entry {
-               u64     start;
-               u64     end;
-       }               entries[32];
-};
-
-struct bch2_time_stats {
-       spinlock_t      lock;
-       /* all fields are in nanoseconds */
-       u64             min_duration;
-       u64             max_duration;
-       u64             total_duration;
-       u64             max_freq;
-       u64             min_freq;
-       u64             last_event;
-       struct bch2_quantiles quantiles;
-
-       struct mean_and_variance          duration_stats;
-       struct mean_and_variance_weighted duration_stats_weighted;
-       struct mean_and_variance          freq_stats;
-       struct mean_and_variance_weighted freq_stats_weighted;
-       struct bch2_time_stat_buffer __percpu *buffer;
-};
-
-#ifndef CONFIG_BCACHEFS_NO_LATENCY_ACCT
-void __bch2_time_stats_update(struct bch2_time_stats *stats, u64, u64);
-
-static inline void bch2_time_stats_update(struct bch2_time_stats *stats, u64 start)
-{
-       __bch2_time_stats_update(stats, start, local_clock());
-}
-
-static inline bool track_event_change(struct bch2_time_stats *stats,
-                                     u64 *start, bool v)
-{
-       if (v != !!*start) {
-               if (!v) {
-                       bch2_time_stats_update(stats, *start);
-                       *start = 0;
-               } else {
-                       *start = local_clock() ?: 1;
-                       return true;
-               }
-       }
-
-       return false;
-}
-#else
-static inline void __bch2_time_stats_update(struct bch2_time_stats *stats, u64 start, u64 end) {}
-static inline void bch2_time_stats_update(struct bch2_time_stats *stats, u64 start) {}
-static inline bool track_event_change(struct bch2_time_stats *stats,
-                                     u64 *start, bool v)
-{
-       bool ret = v && !*start;
-       *start = v;
-       return ret;
-}
-#endif
-
 void bch2_time_stats_to_text(struct printbuf *, struct bch2_time_stats *);
 
-void bch2_time_stats_exit(struct bch2_time_stats *);
-void bch2_time_stats_init(struct bch2_time_stats *);
-
 #define ewma_add(ewma, val, weight)                                    \
 ({                                                                     \
        typeof(ewma) _ewma = (ewma);                                    \
@@ -788,8 +681,12 @@ static inline void __move_gap(void *array, size_t element_size,
 }
 
 /* Move the gap in a gap buffer: */
-#define move_gap(_array, _nr, _size, _old_gap, _new_gap)       \
-       __move_gap(_array, sizeof(_array[0]), _nr, _size, _old_gap, _new_gap)
+#define move_gap(_d, _new_gap)                                         \
+do {                                                                   \
+       __move_gap((_d)->data, sizeof((_d)->data[0]),                   \
+                  (_d)->nr, (_d)->size, (_d)->gap, _new_gap);          \
+       (_d)->gap = _new_gap;                                           \
+} while (0)
 
 #define bubble_sort(_base, _nr, _cmp)                                  \
 do {                                                                   \
@@ -876,4 +773,25 @@ static inline bool qstr_eq(const struct qstr l, const struct qstr r)
 void bch2_darray_str_exit(darray_str *);
 int bch2_split_devs(const char *, darray_str *);
 
+#ifdef __KERNEL__
+
+__must_check
+static inline int copy_to_user_errcode(void __user *to, const void *from, unsigned long n)
+{
+       return copy_to_user(to, from, n) ? -EFAULT : 0;
+}
+
+__must_check
+static inline int copy_from_user_errcode(void *to, const void __user *from, unsigned long n)
+{
+       return copy_from_user(to, from, n) ? -EFAULT : 0;
+}
+
+#endif
+
+static inline void __set_bit_le64(size_t bit, __le64 *addr)
+{
+       addr[bit / 64] |= cpu_to_le64(BIT_ULL(bit % 64));
+}
+
 #endif /* _BCACHEFS_UTIL_H */
index 9c0d2316031b1beceda4e1b68dcda4e34184a89e..754f17bba68edb600c0731bee21edff4647c522a 100644 (file)
@@ -544,11 +544,11 @@ static int bch2_xattr_bcachefs_set(const struct xattr_handler *handler,
                kfree(buf);
 
                if (ret < 0)
-                       return ret;
+                       goto err_class_exit;
 
                ret = bch2_opt_check_may_set(c, opt_id, v);
                if (ret < 0)
-                       return ret;
+                       goto err_class_exit;
 
                s.v = v + 1;
                s.defined = true;
@@ -595,6 +595,7 @@ err:
             (opt_id == Opt_compression && !inode_opt_get(c, &inode->ei_inode, background_compression))))
                bch2_set_rebalance_needs_scan(c, inode->ei_inode.bi_inum);
 
+err_class_exit:
        return bch2_err_class(ret);
 }
 
index 7669d154c05e0c1c86c725c2bab490753317d632..e57054bdc5fd81e796298fa90a7d47d9ddd2b248 100644 (file)
@@ -4111,10 +4111,10 @@ insert_hole:
  *
  * Need to be called with
  * down_read(&EXT4_I(inode)->i_data_sem) if not allocating file system block
- * (ie, create is zero). Otherwise down_write(&EXT4_I(inode)->i_data_sem)
+ * (ie, flags is zero). Otherwise down_write(&EXT4_I(inode)->i_data_sem)
  *
  * return > 0, number of blocks already mapped/allocated
- *          if create == 0 and these are pre-allocated blocks
+ *          if flags doesn't contain EXT4_GET_BLOCKS_CREATE and these are pre-allocated blocks
  *             buffer head is unmapped
  *          otherwise blocks are mapped
  *
@@ -4218,7 +4218,7 @@ int ext4_ext_map_blocks(handle_t *handle, struct inode *inode,
 
        /*
         * requested block isn't allocated yet;
-        * we couldn't try to create block if create flag is zero
+        * we couldn't try to create block if flags doesn't contain EXT4_GET_BLOCKS_CREATE
         */
        if ((flags & EXT4_GET_BLOCKS_CREATE) == 0) {
                ext4_lblk_t len;
index 2ccf3b5e3a7c4dcb1b0c6a9d27a3c8a77a145730..537803250ca9a7fa7ce4cde8a61935ee24cf188e 100644 (file)
@@ -465,9 +465,10 @@ static void ext4_map_blocks_es_recheck(handle_t *handle,
  * Otherwise, call with ext4_ind_map_blocks() to handle indirect mapping
  * based files
  *
- * On success, it returns the number of blocks being mapped or allocated.  if
- * create==0 and the blocks are pre-allocated and unwritten, the resulting @map
- * is marked as unwritten. If the create == 1, it will mark @map as mapped.
+ * On success, it returns the number of blocks being mapped or allocated.
+ * If flags doesn't contain EXT4_GET_BLOCKS_CREATE the blocks are
+ * pre-allocated and unwritten, the resulting @map is marked as unwritten.
+ * If the flags contain EXT4_GET_BLOCKS_CREATE, it will mark @map as mapped.
  *
  * It returns 0 if plain look up failed (blocks have not been allocated), in
  * that case, @map is returned as unmapped but we still do fill map->m_len to
@@ -589,8 +590,7 @@ found:
         * Returns if the blocks have already allocated
         *
         * Note that if blocks have been preallocated
-        * ext4_ext_get_block() returns the create = 0
-        * with buffer head unmapped.
+        * ext4_ext_map_blocks() returns with buffer head unmapped
         */
        if (retval > 0 && map->m_flags & EXT4_MAP_MAPPED)
                /*
index f94901fd38357ea330d39011fcc504cae0a690db..044ca5238f41915a709ac3c9d391cf8bd4b7447e 100644 (file)
@@ -5,6 +5,7 @@
 
 #include <kunit/test.h>
 #include <kunit/static_stub.h>
+#include <linux/random.h>
 
 #include "ext4.h"
 
@@ -20,41 +21,135 @@ struct mbt_ctx {
 };
 
 struct mbt_ext4_super_block {
-       struct super_block sb;
+       struct ext4_super_block es;
+       struct ext4_sb_info sbi;
        struct mbt_ctx mbt_ctx;
 };
 
-#define MBT_CTX(_sb) (&(container_of((_sb), struct mbt_ext4_super_block, sb)->mbt_ctx))
+#define MBT_SB(_sb) (container_of((_sb)->s_fs_info, struct mbt_ext4_super_block, sbi))
+#define MBT_CTX(_sb) (&MBT_SB(_sb)->mbt_ctx)
 #define MBT_GRP_CTX(_sb, _group) (&MBT_CTX(_sb)->grp_ctx[_group])
 
+static const struct super_operations mbt_sops = {
+};
+
+static void mbt_kill_sb(struct super_block *sb)
+{
+       generic_shutdown_super(sb);
+}
+
+static struct file_system_type mbt_fs_type = {
+       .name                   = "mballoc test",
+       .kill_sb                = mbt_kill_sb,
+};
+
+static int mbt_mb_init(struct super_block *sb)
+{
+       ext4_fsblk_t block;
+       int ret;
+
+       /* needed by ext4_mb_init->bdev_nonrot(sb->s_bdev) */
+       sb->s_bdev = kzalloc(sizeof(*sb->s_bdev), GFP_KERNEL);
+       if (sb->s_bdev == NULL)
+               return -ENOMEM;
+
+       sb->s_bdev->bd_queue = kzalloc(sizeof(struct request_queue), GFP_KERNEL);
+       if (sb->s_bdev->bd_queue == NULL) {
+               kfree(sb->s_bdev);
+               return -ENOMEM;
+       }
+
+       /*
+        * needed by ext4_mb_init->ext4_mb_init_backend-> sbi->s_buddy_cache =
+        * new_inode(sb);
+        */
+       INIT_LIST_HEAD(&sb->s_inodes);
+       sb->s_op = &mbt_sops;
+
+       ret = ext4_mb_init(sb);
+       if (ret != 0)
+               goto err_out;
+
+       block = ext4_count_free_clusters(sb);
+       ret = percpu_counter_init(&EXT4_SB(sb)->s_freeclusters_counter, block,
+                                 GFP_KERNEL);
+       if (ret != 0)
+               goto err_mb_release;
+
+       ret = percpu_counter_init(&EXT4_SB(sb)->s_dirtyclusters_counter, 0,
+                                 GFP_KERNEL);
+       if (ret != 0)
+               goto err_freeclusters;
+
+       return 0;
+
+err_freeclusters:
+       percpu_counter_destroy(&EXT4_SB(sb)->s_freeclusters_counter);
+err_mb_release:
+       ext4_mb_release(sb);
+err_out:
+       kfree(sb->s_bdev->bd_queue);
+       kfree(sb->s_bdev);
+       return ret;
+}
+
+static void mbt_mb_release(struct super_block *sb)
+{
+       percpu_counter_destroy(&EXT4_SB(sb)->s_dirtyclusters_counter);
+       percpu_counter_destroy(&EXT4_SB(sb)->s_freeclusters_counter);
+       ext4_mb_release(sb);
+       kfree(sb->s_bdev->bd_queue);
+       kfree(sb->s_bdev);
+}
+
+static int mbt_set(struct super_block *sb, void *data)
+{
+       return 0;
+}
+
 static struct super_block *mbt_ext4_alloc_super_block(void)
 {
-       struct ext4_super_block *es = kzalloc(sizeof(*es), GFP_KERNEL);
-       struct ext4_sb_info *sbi = kzalloc(sizeof(*sbi), GFP_KERNEL);
-       struct mbt_ext4_super_block *fsb = kzalloc(sizeof(*fsb), GFP_KERNEL);
+       struct mbt_ext4_super_block *fsb;
+       struct super_block *sb;
+       struct ext4_sb_info *sbi;
+
+       fsb = kzalloc(sizeof(*fsb), GFP_KERNEL);
+       if (fsb == NULL)
+               return NULL;
 
-       if (fsb == NULL || sbi == NULL || es == NULL)
+       sb = sget(&mbt_fs_type, NULL, mbt_set, 0, NULL);
+       if (IS_ERR(sb))
                goto out;
 
-       sbi->s_es = es;
-       fsb->sb.s_fs_info = sbi;
-       return &fsb->sb;
+       sbi = &fsb->sbi;
+
+       sbi->s_blockgroup_lock =
+               kzalloc(sizeof(struct blockgroup_lock), GFP_KERNEL);
+       if (!sbi->s_blockgroup_lock)
+               goto out_deactivate;
+
+       bgl_lock_init(sbi->s_blockgroup_lock);
+
+       sbi->s_es = &fsb->es;
+       sb->s_fs_info = sbi;
+
+       up_write(&sb->s_umount);
+       return sb;
 
+out_deactivate:
+       deactivate_locked_super(sb);
 out:
        kfree(fsb);
-       kfree(sbi);
-       kfree(es);
        return NULL;
 }
 
 static void mbt_ext4_free_super_block(struct super_block *sb)
 {
-       struct mbt_ext4_super_block *fsb =
-               container_of(sb, struct mbt_ext4_super_block, sb);
+       struct mbt_ext4_super_block *fsb = MBT_SB(sb);
        struct ext4_sb_info *sbi = EXT4_SB(sb);
 
-       kfree(sbi->s_es);
-       kfree(sbi);
+       kfree(sbi->s_blockgroup_lock);
+       deactivate_super(sb);
        kfree(fsb);
 }
 
@@ -82,6 +177,9 @@ static void mbt_init_sb_layout(struct super_block *sb,
        sbi->s_clusters_per_group = layout->blocks_per_group >>
                                    layout->cluster_bits;
        sbi->s_desc_size = layout->desc_size;
+       sbi->s_desc_per_block_bits =
+               sb->s_blocksize_bits - (fls(layout->desc_size) - 1);
+       sbi->s_desc_per_block = 1 << sbi->s_desc_per_block_bits;
 
        es->s_first_data_block = cpu_to_le32(0);
        es->s_blocks_count_lo = cpu_to_le32(layout->blocks_per_group *
@@ -91,9 +189,13 @@ static void mbt_init_sb_layout(struct super_block *sb,
 static int mbt_grp_ctx_init(struct super_block *sb,
                            struct mbt_grp_ctx *grp_ctx)
 {
+       ext4_grpblk_t max = EXT4_CLUSTERS_PER_GROUP(sb);
+
        grp_ctx->bitmap_bh.b_data = kzalloc(EXT4_BLOCK_SIZE(sb), GFP_KERNEL);
        if (grp_ctx->bitmap_bh.b_data == NULL)
                return -ENOMEM;
+       mb_set_bits(grp_ctx->bitmap_bh.b_data, max, sb->s_blocksize * 8 - max);
+       ext4_free_group_clusters_set(sb, &grp_ctx->desc, max);
 
        return 0;
 }
@@ -112,6 +214,13 @@ static void mbt_ctx_mark_used(struct super_block *sb, ext4_group_t group,
        mb_set_bits(grp_ctx->bitmap_bh.b_data, start, len);
 }
 
+static void *mbt_ctx_bitmap(struct super_block *sb, ext4_group_t group)
+{
+       struct mbt_grp_ctx *grp_ctx = MBT_GRP_CTX(sb, group);
+
+       return grp_ctx->bitmap_bh.b_data;
+}
+
 /* called after mbt_init_sb_layout */
 static int mbt_ctx_init(struct super_block *sb)
 {
@@ -133,6 +242,8 @@ static int mbt_ctx_init(struct super_block *sb)
         * block which will fail ext4_sb_block_valid check.
         */
        mb_set_bits(ctx->grp_ctx[0].bitmap_bh.b_data, 0, 1);
+       ext4_free_group_clusters_set(sb, &ctx->grp_ctx[0].desc,
+                                    EXT4_CLUSTERS_PER_GROUP(sb) - 1);
 
        return 0;
 out:
@@ -167,6 +278,13 @@ static int ext4_wait_block_bitmap_stub(struct super_block *sb,
                                       ext4_group_t block_group,
                                       struct buffer_head *bh)
 {
+       /*
+        * real ext4_wait_block_bitmap will set these flags and
+        * functions like ext4_mb_init_cache will verify the flags.
+        */
+       set_buffer_uptodate(bh);
+       set_bitmap_uptodate(bh);
+       set_buffer_verified(bh);
        return 0;
 }
 
@@ -232,6 +350,14 @@ static int mbt_kunit_init(struct kunit *test)
        kunit_activate_static_stub(test,
                                   ext4_mb_mark_context,
                                   ext4_mb_mark_context_stub);
+
+       /* stub function will be called in mbt_mb_init->ext4_mb_init */
+       if (mbt_mb_init(sb) != 0) {
+               mbt_ctx_release(sb);
+               mbt_ext4_free_super_block(sb);
+               return -ENOMEM;
+       }
+
        return 0;
 }
 
@@ -239,6 +365,7 @@ static void mbt_kunit_exit(struct kunit *test)
 {
        struct super_block *sb = (struct super_block *)test->priv;
 
+       mbt_mb_release(sb);
        mbt_ctx_release(sb);
        mbt_ext4_free_super_block(sb);
 }
@@ -246,14 +373,19 @@ static void mbt_kunit_exit(struct kunit *test)
 static void test_new_blocks_simple(struct kunit *test)
 {
        struct super_block *sb = (struct super_block *)test->priv;
-       struct inode inode = { .i_sb = sb, };
+       struct inode *inode;
        struct ext4_allocation_request ar;
        ext4_group_t i, goal_group = TEST_GOAL_GROUP;
        int err = 0;
        ext4_fsblk_t found;
        struct ext4_sb_info *sbi = EXT4_SB(sb);
 
-       ar.inode = &inode;
+       inode = kunit_kzalloc(test, sizeof(*inode), GFP_KERNEL);
+       if (!inode)
+               return;
+
+       inode->i_sb = sb;
+       ar.inode = inode;
 
        /* get block at goal */
        ar.goal = ext4_group_first_block_no(sb, goal_group);
@@ -297,6 +429,436 @@ static void test_new_blocks_simple(struct kunit *test)
                "unexpectedly get block when no block is available");
 }
 
+#define TEST_RANGE_COUNT 8
+
+struct test_range {
+       ext4_grpblk_t start;
+       ext4_grpblk_t len;
+};
+
+static void
+mbt_generate_test_ranges(struct super_block *sb, struct test_range *ranges,
+                        int count)
+{
+       ext4_grpblk_t start, len, max;
+       int i;
+
+       max = EXT4_CLUSTERS_PER_GROUP(sb) / count;
+       for (i = 0; i < count; i++) {
+               start = get_random_u32() % max;
+               len = get_random_u32() % max;
+               len = min(len, max - start);
+
+               ranges[i].start = start + i * max;
+               ranges[i].len = len;
+       }
+}
+
+static void
+validate_free_blocks_simple(struct kunit *test, struct super_block *sb,
+                           ext4_group_t goal_group, ext4_grpblk_t start,
+                           ext4_grpblk_t len)
+{
+       void *bitmap;
+       ext4_grpblk_t bit, max = EXT4_CLUSTERS_PER_GROUP(sb);
+       ext4_group_t i;
+
+       for (i = 0; i < ext4_get_groups_count(sb); i++) {
+               if (i == goal_group)
+                       continue;
+
+               bitmap = mbt_ctx_bitmap(sb, i);
+               bit = mb_find_next_zero_bit(bitmap, max, 0);
+               KUNIT_ASSERT_EQ_MSG(test, bit, max,
+                                   "free block on unexpected group %d", i);
+       }
+
+       bitmap = mbt_ctx_bitmap(sb, goal_group);
+       bit = mb_find_next_zero_bit(bitmap, max, 0);
+       KUNIT_ASSERT_EQ(test, bit, start);
+
+       bit = mb_find_next_bit(bitmap, max, bit + 1);
+       KUNIT_ASSERT_EQ(test, bit, start + len);
+}
+
+static void
+test_free_blocks_simple_range(struct kunit *test, ext4_group_t goal_group,
+                             ext4_grpblk_t start, ext4_grpblk_t len)
+{
+       struct super_block *sb = (struct super_block *)test->priv;
+       struct ext4_sb_info *sbi = EXT4_SB(sb);
+       struct inode *inode;
+       ext4_fsblk_t block;
+
+       inode = kunit_kzalloc(test, sizeof(*inode), GFP_KERNEL);
+       if (!inode)
+               return;
+       inode->i_sb = sb;
+
+       if (len == 0)
+               return;
+
+       block = ext4_group_first_block_no(sb, goal_group) +
+               EXT4_C2B(sbi, start);
+       ext4_free_blocks_simple(inode, block, len);
+       validate_free_blocks_simple(test, sb, goal_group, start, len);
+       mbt_ctx_mark_used(sb, goal_group, 0, EXT4_CLUSTERS_PER_GROUP(sb));
+}
+
+static void test_free_blocks_simple(struct kunit *test)
+{
+       struct super_block *sb = (struct super_block *)test->priv;
+       ext4_grpblk_t max = EXT4_CLUSTERS_PER_GROUP(sb);
+       ext4_group_t i;
+       struct test_range ranges[TEST_RANGE_COUNT];
+
+       for (i = 0; i < ext4_get_groups_count(sb); i++)
+               mbt_ctx_mark_used(sb, i, 0, max);
+
+       mbt_generate_test_ranges(sb, ranges, TEST_RANGE_COUNT);
+       for (i = 0; i < TEST_RANGE_COUNT; i++)
+               test_free_blocks_simple_range(test, TEST_GOAL_GROUP,
+                       ranges[i].start, ranges[i].len);
+}
+
+static void
+test_mark_diskspace_used_range(struct kunit *test,
+                              struct ext4_allocation_context *ac,
+                              ext4_grpblk_t start,
+                              ext4_grpblk_t len)
+{
+       struct super_block *sb = (struct super_block *)test->priv;
+       int ret;
+       void *bitmap;
+       ext4_grpblk_t i, max;
+
+       /* ext4_mb_mark_diskspace_used will BUG if len is 0 */
+       if (len == 0)
+               return;
+
+       ac->ac_b_ex.fe_group = TEST_GOAL_GROUP;
+       ac->ac_b_ex.fe_start = start;
+       ac->ac_b_ex.fe_len = len;
+
+       bitmap = mbt_ctx_bitmap(sb, TEST_GOAL_GROUP);
+       memset(bitmap, 0, sb->s_blocksize);
+       ret = ext4_mb_mark_diskspace_used(ac, NULL, 0);
+       KUNIT_ASSERT_EQ(test, ret, 0);
+
+       max = EXT4_CLUSTERS_PER_GROUP(sb);
+       i = mb_find_next_bit(bitmap, max, 0);
+       KUNIT_ASSERT_EQ(test, i, start);
+       i = mb_find_next_zero_bit(bitmap, max, i + 1);
+       KUNIT_ASSERT_EQ(test, i, start + len);
+       i = mb_find_next_bit(bitmap, max, i + 1);
+       KUNIT_ASSERT_EQ(test, max, i);
+}
+
+static void test_mark_diskspace_used(struct kunit *test)
+{
+       struct super_block *sb = (struct super_block *)test->priv;
+       struct inode *inode;
+       struct ext4_allocation_context ac;
+       struct test_range ranges[TEST_RANGE_COUNT];
+       int i;
+
+       mbt_generate_test_ranges(sb, ranges, TEST_RANGE_COUNT);
+
+       inode = kunit_kzalloc(test, sizeof(*inode), GFP_KERNEL);
+       if (!inode)
+               return;
+       inode->i_sb = sb;
+
+       ac.ac_status = AC_STATUS_FOUND;
+       ac.ac_sb = sb;
+       ac.ac_inode = inode;
+       for (i = 0; i < TEST_RANGE_COUNT; i++)
+               test_mark_diskspace_used_range(test, &ac, ranges[i].start,
+                                              ranges[i].len);
+}
+
+static void mbt_generate_buddy(struct super_block *sb, void *buddy,
+                              void *bitmap, struct ext4_group_info *grp)
+{
+       struct ext4_sb_info *sbi = EXT4_SB(sb);
+       uint32_t order, off;
+       void *bb, *bb_h;
+       int max;
+
+       memset(buddy, 0xff, sb->s_blocksize);
+       memset(grp, 0, offsetof(struct ext4_group_info,
+                                bb_counters[MB_NUM_ORDERS(sb)]));
+
+       bb = bitmap;
+       max = EXT4_CLUSTERS_PER_GROUP(sb);
+       bb_h = buddy + sbi->s_mb_offsets[1];
+
+       off = mb_find_next_zero_bit(bb, max, 0);
+       grp->bb_first_free = off;
+       while (off < max) {
+               grp->bb_counters[0]++;
+               grp->bb_free++;
+
+               if (!(off & 1) && !mb_test_bit(off + 1, bb)) {
+                       grp->bb_free++;
+                       grp->bb_counters[0]--;
+                       mb_clear_bit(off >> 1, bb_h);
+                       grp->bb_counters[1]++;
+                       grp->bb_largest_free_order = 1;
+                       off++;
+               }
+
+               off = mb_find_next_zero_bit(bb, max, off + 1);
+       }
+
+       for (order = 1; order < MB_NUM_ORDERS(sb) - 1; order++) {
+               bb = buddy + sbi->s_mb_offsets[order];
+               bb_h = buddy + sbi->s_mb_offsets[order + 1];
+               max = max >> 1;
+               off = mb_find_next_zero_bit(bb, max, 0);
+
+               while (off < max) {
+                       if (!(off & 1) && !mb_test_bit(off + 1, bb)) {
+                               mb_set_bits(bb, off, 2);
+                               grp->bb_counters[order] -= 2;
+                               mb_clear_bit(off >> 1, bb_h);
+                               grp->bb_counters[order + 1]++;
+                               grp->bb_largest_free_order = order + 1;
+                               off++;
+                       }
+
+                       off = mb_find_next_zero_bit(bb, max, off + 1);
+               }
+       }
+
+       max = EXT4_CLUSTERS_PER_GROUP(sb);
+       off = mb_find_next_zero_bit(bitmap, max, 0);
+       while (off < max) {
+               grp->bb_fragments++;
+
+               off = mb_find_next_bit(bitmap, max, off + 1);
+               if (off + 1 >= max)
+                       break;
+
+               off = mb_find_next_zero_bit(bitmap, max, off + 1);
+       }
+}
+
+static void
+mbt_validate_group_info(struct kunit *test, struct ext4_group_info *grp1,
+                       struct ext4_group_info *grp2)
+{
+       struct super_block *sb = (struct super_block *)test->priv;
+       int i;
+
+       KUNIT_ASSERT_EQ(test, grp1->bb_first_free,
+                       grp2->bb_first_free);
+       KUNIT_ASSERT_EQ(test, grp1->bb_fragments,
+                       grp2->bb_fragments);
+       KUNIT_ASSERT_EQ(test, grp1->bb_free, grp2->bb_free);
+       KUNIT_ASSERT_EQ(test, grp1->bb_largest_free_order,
+                       grp2->bb_largest_free_order);
+
+       for (i = 1; i < MB_NUM_ORDERS(sb); i++) {
+               KUNIT_ASSERT_EQ_MSG(test, grp1->bb_counters[i],
+                                   grp2->bb_counters[i],
+                                   "bb_counters[%d] diffs, expected %d, generated %d",
+                                   i, grp1->bb_counters[i],
+                                   grp2->bb_counters[i]);
+       }
+}
+
+static void
+do_test_generate_buddy(struct kunit *test, struct super_block *sb, void *bitmap,
+                          void *mbt_buddy, struct ext4_group_info *mbt_grp,
+                          void *ext4_buddy, struct ext4_group_info *ext4_grp)
+{
+       int i;
+
+       mbt_generate_buddy(sb, mbt_buddy, bitmap, mbt_grp);
+
+       for (i = 0; i < MB_NUM_ORDERS(sb); i++)
+               ext4_grp->bb_counters[i] = 0;
+       /* needed by validation in ext4_mb_generate_buddy */
+       ext4_grp->bb_free = mbt_grp->bb_free;
+       memset(ext4_buddy, 0xff, sb->s_blocksize);
+       ext4_mb_generate_buddy(sb, ext4_buddy, bitmap, TEST_GOAL_GROUP,
+                              ext4_grp);
+
+       KUNIT_ASSERT_EQ(test, memcmp(mbt_buddy, ext4_buddy, sb->s_blocksize),
+                       0);
+       mbt_validate_group_info(test, mbt_grp, ext4_grp);
+}
+
+static void test_mb_generate_buddy(struct kunit *test)
+{
+       struct super_block *sb = (struct super_block *)test->priv;
+       void *bitmap, *expected_bb, *generate_bb;
+       struct ext4_group_info *expected_grp, *generate_grp;
+       struct test_range ranges[TEST_RANGE_COUNT];
+       int i;
+
+       bitmap = kunit_kzalloc(test, sb->s_blocksize, GFP_KERNEL);
+       KUNIT_ASSERT_NOT_ERR_OR_NULL(test, bitmap);
+       expected_bb = kunit_kzalloc(test, sb->s_blocksize, GFP_KERNEL);
+       KUNIT_ASSERT_NOT_ERR_OR_NULL(test, expected_bb);
+       generate_bb = kunit_kzalloc(test, sb->s_blocksize, GFP_KERNEL);
+       KUNIT_ASSERT_NOT_ERR_OR_NULL(test, generate_bb);
+       expected_grp = kunit_kzalloc(test, offsetof(struct ext4_group_info,
+                               bb_counters[MB_NUM_ORDERS(sb)]), GFP_KERNEL);
+       KUNIT_ASSERT_NOT_ERR_OR_NULL(test, expected_grp);
+       generate_grp = ext4_get_group_info(sb, TEST_GOAL_GROUP);
+       KUNIT_ASSERT_NOT_NULL(test, generate_grp);
+
+       mbt_generate_test_ranges(sb, ranges, TEST_RANGE_COUNT);
+       for (i = 0; i < TEST_RANGE_COUNT; i++) {
+               mb_set_bits(bitmap, ranges[i].start, ranges[i].len);
+               do_test_generate_buddy(test, sb, bitmap, expected_bb,
+                                      expected_grp, generate_bb, generate_grp);
+       }
+}
+
+static void
+test_mb_mark_used_range(struct kunit *test, struct ext4_buddy *e4b,
+                       ext4_grpblk_t start, ext4_grpblk_t len, void *bitmap,
+                       void *buddy, struct ext4_group_info *grp)
+{
+       struct super_block *sb = (struct super_block *)test->priv;
+       struct ext4_free_extent ex;
+       int i;
+
+       /* mb_mark_used only accepts non-zero len */
+       if (len == 0)
+               return;
+
+       ex.fe_start = start;
+       ex.fe_len = len;
+       ex.fe_group = TEST_GOAL_GROUP;
+
+       ext4_lock_group(sb, TEST_GOAL_GROUP);
+       mb_mark_used(e4b, &ex);
+       ext4_unlock_group(sb, TEST_GOAL_GROUP);
+
+       mb_set_bits(bitmap, start, len);
+       /* bypass bb_free validatoin in ext4_mb_generate_buddy */
+       grp->bb_free -= len;
+       memset(buddy, 0xff, sb->s_blocksize);
+       for (i = 0; i < MB_NUM_ORDERS(sb); i++)
+               grp->bb_counters[i] = 0;
+       ext4_mb_generate_buddy(sb, buddy, bitmap, 0, grp);
+
+       KUNIT_ASSERT_EQ(test, memcmp(buddy, e4b->bd_buddy, sb->s_blocksize),
+                       0);
+       mbt_validate_group_info(test, grp, e4b->bd_info);
+}
+
+static void test_mb_mark_used(struct kunit *test)
+{
+       struct ext4_buddy e4b;
+       struct super_block *sb = (struct super_block *)test->priv;
+       void *bitmap, *buddy;
+       struct ext4_group_info *grp;
+       int ret;
+       struct test_range ranges[TEST_RANGE_COUNT];
+       int i;
+
+       /* buddy cache assumes that each page contains at least one block */
+       if (sb->s_blocksize > PAGE_SIZE)
+               kunit_skip(test, "blocksize exceeds pagesize");
+
+       bitmap = kunit_kzalloc(test, sb->s_blocksize, GFP_KERNEL);
+       KUNIT_ASSERT_NOT_ERR_OR_NULL(test, bitmap);
+       buddy = kunit_kzalloc(test, sb->s_blocksize, GFP_KERNEL);
+       KUNIT_ASSERT_NOT_ERR_OR_NULL(test, buddy);
+       grp = kunit_kzalloc(test, offsetof(struct ext4_group_info,
+                               bb_counters[MB_NUM_ORDERS(sb)]), GFP_KERNEL);
+
+       ret = ext4_mb_load_buddy(sb, TEST_GOAL_GROUP, &e4b);
+       KUNIT_ASSERT_EQ(test, ret, 0);
+
+       grp->bb_free = EXT4_CLUSTERS_PER_GROUP(sb);
+       mbt_generate_test_ranges(sb, ranges, TEST_RANGE_COUNT);
+       for (i = 0; i < TEST_RANGE_COUNT; i++)
+               test_mb_mark_used_range(test, &e4b, ranges[i].start,
+                                       ranges[i].len, bitmap, buddy, grp);
+
+       ext4_mb_unload_buddy(&e4b);
+}
+
+static void
+test_mb_free_blocks_range(struct kunit *test, struct ext4_buddy *e4b,
+                         ext4_grpblk_t start, ext4_grpblk_t len, void *bitmap,
+                         void *buddy, struct ext4_group_info *grp)
+{
+       struct super_block *sb = (struct super_block *)test->priv;
+       int i;
+
+       /* mb_free_blocks will WARN if len is 0 */
+       if (len == 0)
+               return;
+
+       ext4_lock_group(sb, e4b->bd_group);
+       mb_free_blocks(NULL, e4b, start, len);
+       ext4_unlock_group(sb, e4b->bd_group);
+
+       mb_clear_bits(bitmap, start, len);
+       /* bypass bb_free validatoin in ext4_mb_generate_buddy */
+       grp->bb_free += len;
+       memset(buddy, 0xff, sb->s_blocksize);
+       for (i = 0; i < MB_NUM_ORDERS(sb); i++)
+               grp->bb_counters[i] = 0;
+       ext4_mb_generate_buddy(sb, buddy, bitmap, 0, grp);
+
+       KUNIT_ASSERT_EQ(test, memcmp(buddy, e4b->bd_buddy, sb->s_blocksize),
+                       0);
+       mbt_validate_group_info(test, grp, e4b->bd_info);
+
+}
+
+static void test_mb_free_blocks(struct kunit *test)
+{
+       struct ext4_buddy e4b;
+       struct super_block *sb = (struct super_block *)test->priv;
+       void *bitmap, *buddy;
+       struct ext4_group_info *grp;
+       struct ext4_free_extent ex;
+       int ret;
+       int i;
+       struct test_range ranges[TEST_RANGE_COUNT];
+
+       /* buddy cache assumes that each page contains at least one block */
+       if (sb->s_blocksize > PAGE_SIZE)
+               kunit_skip(test, "blocksize exceeds pagesize");
+
+       bitmap = kunit_kzalloc(test, sb->s_blocksize, GFP_KERNEL);
+       KUNIT_ASSERT_NOT_ERR_OR_NULL(test, bitmap);
+       buddy = kunit_kzalloc(test, sb->s_blocksize, GFP_KERNEL);
+       KUNIT_ASSERT_NOT_ERR_OR_NULL(test, buddy);
+       grp = kunit_kzalloc(test, offsetof(struct ext4_group_info,
+                               bb_counters[MB_NUM_ORDERS(sb)]), GFP_KERNEL);
+
+       ret = ext4_mb_load_buddy(sb, TEST_GOAL_GROUP, &e4b);
+       KUNIT_ASSERT_EQ(test, ret, 0);
+
+       ex.fe_start = 0;
+       ex.fe_len = EXT4_CLUSTERS_PER_GROUP(sb);
+       ex.fe_group = TEST_GOAL_GROUP;
+
+       ext4_lock_group(sb, TEST_GOAL_GROUP);
+       mb_mark_used(&e4b, &ex);
+       ext4_unlock_group(sb, TEST_GOAL_GROUP);
+
+       grp->bb_free = 0;
+       memset(bitmap, 0xff, sb->s_blocksize);
+
+       mbt_generate_test_ranges(sb, ranges, TEST_RANGE_COUNT);
+       for (i = 0; i < TEST_RANGE_COUNT; i++)
+               test_mb_free_blocks_range(test, &e4b, ranges[i].start,
+                                         ranges[i].len, bitmap, buddy, grp);
+
+       ext4_mb_unload_buddy(&e4b);
+}
+
 static const struct mbt_ext4_block_layout mbt_test_layouts[] = {
        {
                .blocksize_bits = 10,
@@ -334,6 +896,11 @@ KUNIT_ARRAY_PARAM(mbt_layouts, mbt_test_layouts, mbt_show_layout);
 
 static struct kunit_case mbt_test_cases[] = {
        KUNIT_CASE_PARAM(test_new_blocks_simple, mbt_layouts_gen_params),
+       KUNIT_CASE_PARAM(test_free_blocks_simple, mbt_layouts_gen_params),
+       KUNIT_CASE_PARAM(test_mb_generate_buddy, mbt_layouts_gen_params),
+       KUNIT_CASE_PARAM(test_mb_mark_used, mbt_layouts_gen_params),
+       KUNIT_CASE_PARAM(test_mb_free_blocks, mbt_layouts_gen_params),
+       KUNIT_CASE_PARAM(test_mark_diskspace_used, mbt_layouts_gen_params),
        {}
 };
 
index e4f7cf9d89c45a881d6c403fd50fcc499db0b708..12b3f196010b8effbf1603eb8a2bb08f86ec01ac 100644 (file)
@@ -3015,8 +3015,8 @@ static int ext4_mb_seq_groups_show(struct seq_file *seq, void *v)
 {
        struct super_block *sb = pde_data(file_inode(seq->file));
        ext4_group_t group = (ext4_group_t) ((unsigned long) v);
-       int i;
-       int err, buddy_loaded = 0;
+       int i, err;
+       char nbuf[16];
        struct ext4_buddy e4b;
        struct ext4_group_info *grinfo;
        unsigned char blocksize_bits = min_t(unsigned char,
@@ -3043,23 +3043,26 @@ static int ext4_mb_seq_groups_show(struct seq_file *seq, void *v)
        if (unlikely(EXT4_MB_GRP_NEED_INIT(grinfo))) {
                err = ext4_mb_load_buddy(sb, group, &e4b);
                if (err) {
-                       seq_printf(seq, "#%-5u: I/O error\n", group);
+                       seq_printf(seq, "#%-5u: %s\n", group, ext4_decode_error(NULL, err, nbuf));
                        return 0;
                }
-               buddy_loaded = 1;
+               ext4_mb_unload_buddy(&e4b);
        }
 
+       /*
+        * We care only about free space counters in the group info and
+        * these are safe to access even after the buddy has been unloaded
+        */
        memcpy(&sg, grinfo, i);
-
-       if (buddy_loaded)
-               ext4_mb_unload_buddy(&e4b);
-
        seq_printf(seq, "#%-5u: %-5u %-5u %-5u [", group, sg.info.bb_free,
                        sg.info.bb_fragments, sg.info.bb_first_free);
        for (i = 0; i <= 13; i++)
                seq_printf(seq, " %-5u", i <= blocksize_bits + 1 ?
                                sg.info.bb_counters[i] : 0);
-       seq_puts(seq, " ]\n");
+       seq_puts(seq, " ]");
+       if (EXT4_MB_GRP_BBITMAP_CORRUPT(&sg.info))
+               seq_puts(seq, " Block bitmap corrupted!");
+       seq_puts(seq, "\n");
 
        return 0;
 }
@@ -3829,8 +3832,7 @@ void ext4_mb_release(struct super_block *sb)
 }
 
 static inline int ext4_issue_discard(struct super_block *sb,
-               ext4_group_t block_group, ext4_grpblk_t cluster, int count,
-               struct bio **biop)
+               ext4_group_t block_group, ext4_grpblk_t cluster, int count)
 {
        ext4_fsblk_t discard_block;
 
@@ -3839,13 +3841,8 @@ static inline int ext4_issue_discard(struct super_block *sb,
        count = EXT4_C2B(EXT4_SB(sb), count);
        trace_ext4_discard_blocks(sb,
                        (unsigned long long) discard_block, count);
-       if (biop) {
-               return __blkdev_issue_discard(sb->s_bdev,
-                       (sector_t)discard_block << (sb->s_blocksize_bits - 9),
-                       (sector_t)count << (sb->s_blocksize_bits - 9),
-                       GFP_NOFS, biop);
-       } else
-               return sb_issue_discard(sb, discard_block, count, GFP_NOFS, 0);
+
+       return sb_issue_discard(sb, discard_block, count, GFP_NOFS, 0);
 }
 
 static void ext4_free_data_in_buddy(struct super_block *sb,
@@ -5169,10 +5166,16 @@ ext4_mb_new_inode_pa(struct ext4_allocation_context *ac)
                        .fe_len = ac->ac_orig_goal_len,
                };
                loff_t orig_goal_end = extent_logical_end(sbi, &ex);
+               loff_t o_ex_end = extent_logical_end(sbi, &ac->ac_o_ex);
 
-               /* we can't allocate as much as normalizer wants.
-                * so, found space must get proper lstart
-                * to cover original request */
+               /*
+                * We can't allocate as much as normalizer wants, so we try
+                * to get proper lstart to cover the original request, except
+                * when the goal doesn't cover the original request as below:
+                *
+                * orig_ex:2045/2055(10), isize:8417280 -> normalized:0/2048
+                * best_ex:0/200(200) -> adjusted: 1848/2048(200)
+                */
                BUG_ON(ac->ac_g_ex.fe_logical > ac->ac_o_ex.fe_logical);
                BUG_ON(ac->ac_g_ex.fe_len < ac->ac_o_ex.fe_len);
 
@@ -5184,7 +5187,7 @@ ext4_mb_new_inode_pa(struct ext4_allocation_context *ac)
                 * 1. Check if best ex can be kept at end of goal (before
                 *    cr_best_avail trimmed it) and still cover original start
                 * 2. Else, check if best ex can be kept at start of goal and
-                *    still cover original start
+                *    still cover original end
                 * 3. Else, keep the best ex at start of original request.
                 */
                ex.fe_len = ac->ac_b_ex.fe_len;
@@ -5194,7 +5197,7 @@ ext4_mb_new_inode_pa(struct ext4_allocation_context *ac)
                        goto adjust_bex;
 
                ex.fe_logical = ac->ac_g_ex.fe_logical;
-               if (ac->ac_o_ex.fe_logical < extent_logical_end(sbi, &ex))
+               if (o_ex_end <= extent_logical_end(sbi, &ex))
                        goto adjust_bex;
 
                ex.fe_logical = ac->ac_o_ex.fe_logical;
@@ -5202,7 +5205,6 @@ adjust_bex:
                ac->ac_b_ex.fe_logical = ex.fe_logical;
 
                BUG_ON(ac->ac_o_ex.fe_logical < ac->ac_b_ex.fe_logical);
-               BUG_ON(ac->ac_o_ex.fe_len > ac->ac_b_ex.fe_len);
                BUG_ON(extent_logical_end(sbi, &ex) > orig_goal_end);
        }
 
@@ -6487,8 +6489,14 @@ do_more:
        } else {
                if (test_opt(sb, DISCARD)) {
                        err = ext4_issue_discard(sb, block_group, bit,
-                                                count_clusters, NULL);
-                       if (err && err != -EOPNOTSUPP)
+                                                count_clusters);
+                       /*
+                        * Ignore EOPNOTSUPP error. This is consistent with
+                        * what happens when using journal.
+                        */
+                       if (err == -EOPNOTSUPP)
+                               err = 0;
+                       if (err)
                                ext4_msg(sb, KERN_WARNING, "discard request in"
                                         " group:%u block:%d count:%lu failed"
                                         " with %d", block_group, bit, count,
@@ -6738,7 +6746,7 @@ __acquires(bitlock)
         */
        mb_mark_used(e4b, &ex);
        ext4_unlock_group(sb, group);
-       ret = ext4_issue_discard(sb, group, start, count, NULL);
+       ret = ext4_issue_discard(sb, group, start, count);
        ext4_lock_group(sb, group);
        mb_free_blocks(NULL, e4b, start, ex.fe_len);
        return ret;
index 4d4a5a32e310d43bb57f6d8bd2561debe882b724..0ba9837d65cac90d74e6b2ce23050c4cae165953 100644 (file)
@@ -1602,7 +1602,8 @@ exit_journal:
                int gdb_num = group / EXT4_DESC_PER_BLOCK(sb);
                int gdb_num_end = ((group + flex_gd->count - 1) /
                                   EXT4_DESC_PER_BLOCK(sb));
-               int meta_bg = ext4_has_feature_meta_bg(sb);
+               int meta_bg = ext4_has_feature_meta_bg(sb) &&
+                             gdb_num >= le32_to_cpu(es->s_first_meta_bg);
                sector_t padding_blocks = meta_bg ? 0 : sbi->s_sbh->b_blocknr -
                                         ext4_group_first_block_no(sb, 0);
 
@@ -2084,7 +2085,7 @@ retry:
                }
        }
 
-       if ((!resize_inode && !meta_bg) || n_blocks_count == o_blocks_count) {
+       if ((!resize_inode && !meta_bg && n_desc_blocks > o_desc_blocks) || n_blocks_count == o_blocks_count) {
                err = ext4_convert_meta_bg(sb, resize_inode);
                if (err)
                        goto out;
index f5e5a44778cf56fea813ab63a2ffc66e691811a0..cfb8449c731f9ac53fb3add808e13493175508c4 100644 (file)
@@ -4421,22 +4421,6 @@ static int ext4_handle_clustersize(struct super_block *sb)
                }
                sbi->s_cluster_bits = le32_to_cpu(es->s_log_cluster_size) -
                        le32_to_cpu(es->s_log_block_size);
-               sbi->s_clusters_per_group =
-                       le32_to_cpu(es->s_clusters_per_group);
-               if (sbi->s_clusters_per_group > sb->s_blocksize * 8) {
-                       ext4_msg(sb, KERN_ERR,
-                                "#clusters per group too big: %lu",
-                                sbi->s_clusters_per_group);
-                       return -EINVAL;
-               }
-               if (sbi->s_blocks_per_group !=
-                   (sbi->s_clusters_per_group * (clustersize / sb->s_blocksize))) {
-                       ext4_msg(sb, KERN_ERR, "blocks per group (%lu) and "
-                                "clusters per group (%lu) inconsistent",
-                                sbi->s_blocks_per_group,
-                                sbi->s_clusters_per_group);
-                       return -EINVAL;
-               }
        } else {
                if (clustersize != sb->s_blocksize) {
                        ext4_msg(sb, KERN_ERR,
@@ -4450,9 +4434,21 @@ static int ext4_handle_clustersize(struct super_block *sb)
                                 sbi->s_blocks_per_group);
                        return -EINVAL;
                }
-               sbi->s_clusters_per_group = sbi->s_blocks_per_group;
                sbi->s_cluster_bits = 0;
        }
+       sbi->s_clusters_per_group = le32_to_cpu(es->s_clusters_per_group);
+       if (sbi->s_clusters_per_group > sb->s_blocksize * 8) {
+               ext4_msg(sb, KERN_ERR, "#clusters per group too big: %lu",
+                        sbi->s_clusters_per_group);
+               return -EINVAL;
+       }
+       if (sbi->s_blocks_per_group !=
+           (sbi->s_clusters_per_group * (clustersize / sb->s_blocksize))) {
+               ext4_msg(sb, KERN_ERR,
+                        "blocks per group (%lu) and clusters per group (%lu) inconsistent",
+                        sbi->s_blocks_per_group, sbi->s_clusters_per_group);
+               return -EINVAL;
+       }
        sbi->s_cluster_ratio = clustersize / sb->s_blocksize;
 
        /* Do we have standard group size of clustersize * 8 blocks ? */
@@ -6864,6 +6860,10 @@ static int ext4_write_dquot(struct dquot *dquot)
        if (IS_ERR(handle))
                return PTR_ERR(handle);
        ret = dquot_commit(dquot);
+       if (ret < 0)
+               ext4_error_err(dquot->dq_sb, -ret,
+                              "Failed to commit dquot type %d",
+                              dquot->dq_id.type);
        err = ext4_journal_stop(handle);
        if (!ret)
                ret = err;
@@ -6880,6 +6880,10 @@ static int ext4_acquire_dquot(struct dquot *dquot)
        if (IS_ERR(handle))
                return PTR_ERR(handle);
        ret = dquot_acquire(dquot);
+       if (ret < 0)
+               ext4_error_err(dquot->dq_sb, -ret,
+                             "Failed to acquire dquot type %d",
+                             dquot->dq_id.type);
        err = ext4_journal_stop(handle);
        if (!ret)
                ret = err;
@@ -6899,6 +6903,10 @@ static int ext4_release_dquot(struct dquot *dquot)
                return PTR_ERR(handle);
        }
        ret = dquot_release(dquot);
+       if (ret < 0)
+               ext4_error_err(dquot->dq_sb, -ret,
+                              "Failed to release dquot type %d",
+                              dquot->dq_id.type);
        err = ext4_journal_stop(handle);
        if (!ret)
                ret = err;
index 82dc5e673d5c4d91bb9a0d0dd50ebf1238f58583..b67a176bfcf9f9548dc78d826c7bd854f76bb499 100644 (file)
@@ -1565,46 +1565,49 @@ ext4_xattr_inode_cache_find(struct inode *inode, const void *value,
 /*
  * Add value of the EA in an inode.
  */
-static int ext4_xattr_inode_lookup_create(handle_t *handle, struct inode *inode,
-                                         const void *value, size_t value_len,
-                                         struct inode **ret_inode)
+static struct inode *ext4_xattr_inode_lookup_create(handle_t *handle,
+               struct inode *inode, const void *value, size_t value_len)
 {
        struct inode *ea_inode;
        u32 hash;
        int err;
 
+       /* Account inode & space to quota even if sharing... */
+       err = ext4_xattr_inode_alloc_quota(inode, value_len);
+       if (err)
+               return ERR_PTR(err);
+
        hash = ext4_xattr_inode_hash(EXT4_SB(inode->i_sb), value, value_len);
        ea_inode = ext4_xattr_inode_cache_find(inode, value, value_len, hash);
        if (ea_inode) {
                err = ext4_xattr_inode_inc_ref(handle, ea_inode);
-               if (err) {
-                       iput(ea_inode);
-                       return err;
-               }
-
-               *ret_inode = ea_inode;
-               return 0;
+               if (err)
+                       goto out_err;
+               return ea_inode;
        }
 
        /* Create an inode for the EA value */
        ea_inode = ext4_xattr_inode_create(handle, inode, hash);
-       if (IS_ERR(ea_inode))
-               return PTR_ERR(ea_inode);
+       if (IS_ERR(ea_inode)) {
+               ext4_xattr_inode_free_quota(inode, NULL, value_len);
+               return ea_inode;
+       }
 
        err = ext4_xattr_inode_write(handle, ea_inode, value, value_len);
        if (err) {
                if (ext4_xattr_inode_dec_ref(handle, ea_inode))
                        ext4_warning_inode(ea_inode, "cleanup dec ref error %d", err);
-               iput(ea_inode);
-               return err;
+               goto out_err;
        }
 
        if (EA_INODE_CACHE(inode))
                mb_cache_entry_create(EA_INODE_CACHE(inode), GFP_NOFS, hash,
                                      ea_inode->i_ino, true /* reusable */);
-
-       *ret_inode = ea_inode;
-       return 0;
+       return ea_inode;
+out_err:
+       iput(ea_inode);
+       ext4_xattr_inode_free_quota(inode, NULL, value_len);
+       return ERR_PTR(err);
 }
 
 /*
@@ -1712,16 +1715,11 @@ static int ext4_xattr_set_entry(struct ext4_xattr_info *i,
        if (i->value && in_inode) {
                WARN_ON_ONCE(!i->value_len);
 
-               ret = ext4_xattr_inode_alloc_quota(inode, i->value_len);
-               if (ret)
-                       goto out;
-
-               ret = ext4_xattr_inode_lookup_create(handle, inode, i->value,
-                                                    i->value_len,
-                                                    &new_ea_inode);
-               if (ret) {
+               new_ea_inode = ext4_xattr_inode_lookup_create(handle, inode,
+                                       i->value, i->value_len);
+               if (IS_ERR(new_ea_inode)) {
+                       ret = PTR_ERR(new_ea_inode);
                        new_ea_inode = NULL;
-                       ext4_xattr_inode_free_quota(inode, NULL, i->value_len);
                        goto out;
                }
        }
@@ -2160,17 +2158,6 @@ getblk_failed:
                                                      ENTRY(header(s->base)+1));
                        if (error)
                                goto getblk_failed;
-                       if (ea_inode) {
-                               /* Drop the extra ref on ea_inode. */
-                               error = ext4_xattr_inode_dec_ref(handle,
-                                                                ea_inode);
-                               if (error)
-                                       ext4_warning_inode(ea_inode,
-                                                          "dec ref error=%d",
-                                                          error);
-                               iput(ea_inode);
-                               ea_inode = NULL;
-                       }
 
                        lock_buffer(new_bh);
                        error = ext4_journal_get_create_access(handle, sb,
index b0597a539fc54842922714c74f893452296164e2..eac698b8dd3877a1a00cedd9d6f17d11adb199ca 100644 (file)
@@ -154,49 +154,47 @@ static bool __is_bitmap_valid(struct f2fs_sb_info *sbi, block_t blkaddr,
        if (unlikely(f2fs_cp_error(sbi)))
                return exist;
 
-       if (exist && type == DATA_GENERIC_ENHANCE_UPDATE) {
-               f2fs_err(sbi, "Inconsistent error blkaddr:%u, sit bitmap:%d",
-                        blkaddr, exist);
-               set_sbi_flag(sbi, SBI_NEED_FSCK);
-               return exist;
-       }
+       if ((exist && type == DATA_GENERIC_ENHANCE_UPDATE) ||
+               (!exist && type == DATA_GENERIC_ENHANCE))
+               goto out_err;
+       if (!exist && type != DATA_GENERIC_ENHANCE_UPDATE)
+               goto out_handle;
+       return exist;
 
-       if (!exist && type == DATA_GENERIC_ENHANCE) {
-               f2fs_err(sbi, "Inconsistent error blkaddr:%u, sit bitmap:%d",
-                        blkaddr, exist);
-               set_sbi_flag(sbi, SBI_NEED_FSCK);
-               dump_stack();
-       }
+out_err:
+       f2fs_err(sbi, "Inconsistent error blkaddr:%u, sit bitmap:%d",
+                blkaddr, exist);
+       set_sbi_flag(sbi, SBI_NEED_FSCK);
+       dump_stack();
+out_handle:
+       f2fs_handle_error(sbi, ERROR_INVALID_BLKADDR);
        return exist;
 }
 
-bool f2fs_is_valid_blkaddr(struct f2fs_sb_info *sbi,
+static bool __f2fs_is_valid_blkaddr(struct f2fs_sb_info *sbi,
                                        block_t blkaddr, int type)
 {
-       if (time_to_inject(sbi, FAULT_BLKADDR))
-               return false;
-
        switch (type) {
        case META_NAT:
                break;
        case META_SIT:
                if (unlikely(blkaddr >= SIT_BLK_CNT(sbi)))
-                       return false;
+                       goto err;
                break;
        case META_SSA:
                if (unlikely(blkaddr >= MAIN_BLKADDR(sbi) ||
                        blkaddr < SM_I(sbi)->ssa_blkaddr))
-                       return false;
+                       goto err;
                break;
        case META_CP:
                if (unlikely(blkaddr >= SIT_I(sbi)->sit_base_addr ||
                        blkaddr < __start_cp_addr(sbi)))
-                       return false;
+                       goto err;
                break;
        case META_POR:
                if (unlikely(blkaddr >= MAX_BLKADDR(sbi) ||
                        blkaddr < MAIN_BLKADDR(sbi)))
-                       return false;
+                       goto err;
                break;
        case DATA_GENERIC:
        case DATA_GENERIC_ENHANCE:
@@ -213,7 +211,7 @@ bool f2fs_is_valid_blkaddr(struct f2fs_sb_info *sbi,
                                  blkaddr);
                        set_sbi_flag(sbi, SBI_NEED_FSCK);
                        dump_stack();
-                       return false;
+                       goto err;
                } else {
                        return __is_bitmap_valid(sbi, blkaddr, type);
                }
@@ -221,13 +219,30 @@ bool f2fs_is_valid_blkaddr(struct f2fs_sb_info *sbi,
        case META_GENERIC:
                if (unlikely(blkaddr < SEG0_BLKADDR(sbi) ||
                        blkaddr >= MAIN_BLKADDR(sbi)))
-                       return false;
+                       goto err;
                break;
        default:
                BUG();
        }
 
        return true;
+err:
+       f2fs_handle_error(sbi, ERROR_INVALID_BLKADDR);
+       return false;
+}
+
+bool f2fs_is_valid_blkaddr(struct f2fs_sb_info *sbi,
+                                       block_t blkaddr, int type)
+{
+       if (time_to_inject(sbi, FAULT_BLKADDR_VALIDITY))
+               return false;
+       return __f2fs_is_valid_blkaddr(sbi, blkaddr, type);
+}
+
+bool f2fs_is_valid_blkaddr_raw(struct f2fs_sb_info *sbi,
+                                       block_t blkaddr, int type)
+{
+       return __f2fs_is_valid_blkaddr(sbi, blkaddr, type);
 }
 
 /*
@@ -889,7 +904,7 @@ static struct page *validate_checkpoint(struct f2fs_sb_info *sbi,
 
        cp_blocks = le32_to_cpu(cp_block->cp_pack_total_block_count);
 
-       if (cp_blocks > sbi->blocks_per_seg || cp_blocks <= F2FS_CP_PACKS) {
+       if (cp_blocks > BLKS_PER_SEG(sbi) || cp_blocks <= F2FS_CP_PACKS) {
                f2fs_warn(sbi, "invalid cp_pack_total_block_count:%u",
                          le32_to_cpu(cp_block->cp_pack_total_block_count));
                goto invalid_cp;
@@ -1324,7 +1339,7 @@ static void update_ckpt_flags(struct f2fs_sb_info *sbi, struct cp_control *cpc)
 
        if (cpc->reason & CP_UMOUNT) {
                if (le32_to_cpu(ckpt->cp_pack_total_block_count) +
-                       NM_I(sbi)->nat_bits_blocks > sbi->blocks_per_seg) {
+                       NM_I(sbi)->nat_bits_blocks > BLKS_PER_SEG(sbi)) {
                        clear_ckpt_flags(sbi, CP_NAT_BITS_FLAG);
                        f2fs_notice(sbi, "Disable nat_bits due to no space");
                } else if (!is_set_ckpt_flags(sbi, CP_NAT_BITS_FLAG) &&
@@ -1527,7 +1542,7 @@ static int do_checkpoint(struct f2fs_sb_info *sbi, struct cp_control *cpc)
                cp_ver |= ((__u64)crc32 << 32);
                *(__le64 *)nm_i->nat_bits = cpu_to_le64(cp_ver);
 
-               blk = start_blk + sbi->blocks_per_seg - nm_i->nat_bits_blocks;
+               blk = start_blk + BLKS_PER_SEG(sbi) - nm_i->nat_bits_blocks;
                for (i = 0; i < nm_i->nat_bits_blocks; i++)
                        f2fs_update_meta_page(sbi, nm_i->nat_bits +
                                        (i << F2FS_BLKSIZE_BITS), blk + i);
@@ -1587,8 +1602,9 @@ static int do_checkpoint(struct f2fs_sb_info *sbi, struct cp_control *cpc)
         */
        if (f2fs_sb_has_encrypt(sbi) || f2fs_sb_has_verity(sbi) ||
                f2fs_sb_has_compression(sbi))
-               invalidate_mapping_pages(META_MAPPING(sbi),
-                               MAIN_BLKADDR(sbi), MAX_BLKADDR(sbi) - 1);
+               f2fs_bug_on(sbi,
+                       invalidate_inode_pages2_range(META_MAPPING(sbi),
+                               MAIN_BLKADDR(sbi), MAX_BLKADDR(sbi) - 1));
 
        f2fs_release_ino_entry(sbi, false);
 
@@ -1730,9 +1746,9 @@ void f2fs_init_ino_entry_info(struct f2fs_sb_info *sbi)
                im->ino_num = 0;
        }
 
-       sbi->max_orphans = (sbi->blocks_per_seg - F2FS_CP_PACKS -
+       sbi->max_orphans = (BLKS_PER_SEG(sbi) - F2FS_CP_PACKS -
                        NR_CURSEG_PERSIST_TYPE - __cp_payload(sbi)) *
-                               F2FS_ORPHANS_PER_BLOCK;
+                       F2FS_ORPHANS_PER_BLOCK;
 }
 
 int __init f2fs_create_checkpoint_caches(void)
index 531517dac07967c72daf289cb5e7bd9fc70b3942..8892c82621414602e50b070d2187d9f66c0e23a5 100644 (file)
@@ -512,8 +512,8 @@ static int lzorle_compress_pages(struct compress_ctx *cc)
        ret = lzorle1x_1_compress(cc->rbuf, cc->rlen, cc->cbuf->cdata,
                                        &cc->clen, cc->private);
        if (ret != LZO_E_OK) {
-               printk_ratelimited("%sF2FS-fs (%s): lzo-rle compress failed, ret:%d\n",
-                               KERN_ERR, F2FS_I_SB(cc->inode)->sb->s_id, ret);
+               f2fs_err_ratelimited(F2FS_I_SB(cc->inode),
+                               "lzo-rle compress failed, ret:%d", ret);
                return -EIO;
        }
        return 0;
@@ -780,9 +780,9 @@ void f2fs_decompress_cluster(struct decompress_io_ctx *dic, bool in_task)
                if (provided != calculated) {
                        if (!is_inode_flag_set(dic->inode, FI_COMPRESS_CORRUPT)) {
                                set_inode_flag(dic->inode, FI_COMPRESS_CORRUPT);
-                               printk_ratelimited(
-                                       "%sF2FS-fs (%s): checksum invalid, nid = %lu, %x vs %x",
-                                       KERN_INFO, sbi->sb->s_id, dic->inode->i_ino,
+                               f2fs_info_ratelimited(sbi,
+                                       "checksum invalid, nid = %lu, %x vs %x",
+                                       dic->inode->i_ino,
                                        provided, calculated);
                        }
                        set_sbi_flag(sbi, SBI_NEED_FSCK);
@@ -1418,6 +1418,8 @@ void f2fs_compress_write_end_io(struct bio *bio, struct page *page)
        struct f2fs_sb_info *sbi = bio->bi_private;
        struct compress_io_ctx *cic =
                        (struct compress_io_ctx *)page_private(page);
+       enum count_type type = WB_DATA_TYPE(page,
+                               f2fs_is_compressed_page(page));
        int i;
 
        if (unlikely(bio->bi_status))
@@ -1425,7 +1427,7 @@ void f2fs_compress_write_end_io(struct bio *bio, struct page *page)
 
        f2fs_compress_free_page(page);
 
-       dec_page_count(sbi, F2FS_WB_DATA);
+       dec_page_count(sbi, type);
 
        if (atomic_dec_return(&cic->pending_pages))
                return;
@@ -1441,12 +1443,14 @@ void f2fs_compress_write_end_io(struct bio *bio, struct page *page)
 }
 
 static int f2fs_write_raw_pages(struct compress_ctx *cc,
-                                       int *submitted,
+                                       int *submitted_p,
                                        struct writeback_control *wbc,
                                        enum iostat_type io_type)
 {
        struct address_space *mapping = cc->inode->i_mapping;
-       int _submitted, compr_blocks, ret, i;
+       struct f2fs_sb_info *sbi = F2FS_M_SB(mapping);
+       int submitted, compr_blocks, i;
+       int ret = 0;
 
        compr_blocks = f2fs_compressed_blocks(cc);
 
@@ -1461,6 +1465,10 @@ static int f2fs_write_raw_pages(struct compress_ctx *cc,
        if (compr_blocks < 0)
                return compr_blocks;
 
+       /* overwrite compressed cluster w/ normal cluster */
+       if (compr_blocks > 0)
+               f2fs_lock_op(sbi);
+
        for (i = 0; i < cc->cluster_size; i++) {
                if (!cc->rpages[i])
                        continue;
@@ -1485,7 +1493,7 @@ continue_unlock:
                if (!clear_page_dirty_for_io(cc->rpages[i]))
                        goto continue_unlock;
 
-               ret = f2fs_write_single_data_page(cc->rpages[i], &_submitted,
+               ret = f2fs_write_single_data_page(cc->rpages[i], &submitted,
                                                NULL, NULL, wbc, io_type,
                                                compr_blocks, false);
                if (ret) {
@@ -1493,26 +1501,29 @@ continue_unlock:
                                unlock_page(cc->rpages[i]);
                                ret = 0;
                        } else if (ret == -EAGAIN) {
+                               ret = 0;
                                /*
                                 * for quota file, just redirty left pages to
                                 * avoid deadlock caused by cluster update race
                                 * from foreground operation.
                                 */
                                if (IS_NOQUOTA(cc->inode))
-                                       return 0;
-                               ret = 0;
+                                       goto out;
                                f2fs_io_schedule_timeout(DEFAULT_IO_TIMEOUT);
                                goto retry_write;
                        }
-                       return ret;
+                       goto out;
                }
 
-               *submitted += _submitted;
+               *submitted_p += submitted;
        }
 
-       f2fs_balance_fs(F2FS_M_SB(mapping), true);
+out:
+       if (compr_blocks > 0)
+               f2fs_unlock_op(sbi);
 
-       return 0;
+       f2fs_balance_fs(sbi, true);
+       return ret;
 }
 
 int f2fs_write_multi_pages(struct compress_ctx *cc,
@@ -1806,16 +1817,18 @@ void f2fs_put_page_dic(struct page *page, bool in_task)
  * check whether cluster blocks are contiguous, and add extent cache entry
  * only if cluster blocks are logically and physically contiguous.
  */
-unsigned int f2fs_cluster_blocks_are_contiguous(struct dnode_of_data *dn)
+unsigned int f2fs_cluster_blocks_are_contiguous(struct dnode_of_data *dn,
+                                               unsigned int ofs_in_node)
 {
-       bool compressed = f2fs_data_blkaddr(dn) == COMPRESS_ADDR;
+       bool compressed = data_blkaddr(dn->inode, dn->node_page,
+                                       ofs_in_node) == COMPRESS_ADDR;
        int i = compressed ? 1 : 0;
        block_t first_blkaddr = data_blkaddr(dn->inode, dn->node_page,
-                                               dn->ofs_in_node + i);
+                                                       ofs_in_node + i);
 
        for (i += 1; i < F2FS_I(dn->inode)->i_cluster_size; i++) {
                block_t blkaddr = data_blkaddr(dn->inode, dn->node_page,
-                                               dn->ofs_in_node + i);
+                                                       ofs_in_node + i);
 
                if (!__is_valid_data_blkaddr(blkaddr))
                        break;
@@ -1878,12 +1891,8 @@ void f2fs_cache_compressed_page(struct f2fs_sb_info *sbi, struct page *page,
 
        set_page_private_data(cpage, ino);
 
-       if (!f2fs_is_valid_blkaddr(sbi, blkaddr, DATA_GENERIC_ENHANCE_READ))
-               goto out;
-
        memcpy(page_address(cpage), page_address(page), PAGE_SIZE);
        SetPageUptodate(cpage);
-out:
        f2fs_put_page(cpage, 1);
 }
 
index 26e317696b3389d9291f84756b82a126137ce215..d9494b5fc7c185e5088dd12885a5e9758cfd4011 100644 (file)
@@ -48,7 +48,7 @@ void f2fs_destroy_bioset(void)
        bioset_exit(&f2fs_bioset);
 }
 
-static bool __is_cp_guaranteed(struct page *page)
+bool f2fs_is_cp_guaranteed(struct page *page)
 {
        struct address_space *mapping = page->mapping;
        struct inode *inode;
@@ -65,8 +65,6 @@ static bool __is_cp_guaranteed(struct page *page)
                        S_ISDIR(inode->i_mode))
                return true;
 
-       if (f2fs_is_compressed_page(page))
-               return false;
        if ((S_ISREG(inode->i_mode) && IS_NOQUOTA(inode)) ||
                        page_private_gcing(page))
                return true;
@@ -338,18 +336,7 @@ static void f2fs_write_end_io(struct bio *bio)
 
        bio_for_each_segment_all(bvec, bio, iter_all) {
                struct page *page = bvec->bv_page;
-               enum count_type type = WB_DATA_TYPE(page);
-
-               if (page_private_dummy(page)) {
-                       clear_page_private_dummy(page);
-                       unlock_page(page);
-                       mempool_free(page, sbi->write_io_dummy);
-
-                       if (unlikely(bio->bi_status))
-                               f2fs_stop_checkpoint(sbi, true,
-                                               STOP_CP_REASON_WRITE_FAIL);
-                       continue;
-               }
+               enum count_type type = WB_DATA_TYPE(page, false);
 
                fscrypt_finalize_bounce_page(&page);
 
@@ -524,50 +511,13 @@ void f2fs_submit_read_bio(struct f2fs_sb_info *sbi, struct bio *bio,
        submit_bio(bio);
 }
 
-static void f2fs_align_write_bio(struct f2fs_sb_info *sbi, struct bio *bio)
-{
-       unsigned int start =
-               (bio->bi_iter.bi_size >> F2FS_BLKSIZE_BITS) % F2FS_IO_SIZE(sbi);
-
-       if (start == 0)
-               return;
-
-       /* fill dummy pages */
-       for (; start < F2FS_IO_SIZE(sbi); start++) {
-               struct page *page =
-                       mempool_alloc(sbi->write_io_dummy,
-                                     GFP_NOIO | __GFP_NOFAIL);
-               f2fs_bug_on(sbi, !page);
-
-               lock_page(page);
-
-               zero_user_segment(page, 0, PAGE_SIZE);
-               set_page_private_dummy(page);
-
-               if (bio_add_page(bio, page, PAGE_SIZE, 0) < PAGE_SIZE)
-                       f2fs_bug_on(sbi, 1);
-       }
-}
-
 static void f2fs_submit_write_bio(struct f2fs_sb_info *sbi, struct bio *bio,
                                  enum page_type type)
 {
        WARN_ON_ONCE(is_read_io(bio_op(bio)));
 
-       if (type == DATA || type == NODE) {
-               if (f2fs_lfs_mode(sbi) && current->plug)
-                       blk_finish_plug(current->plug);
-
-               if (F2FS_IO_ALIGNED(sbi)) {
-                       f2fs_align_write_bio(sbi, bio);
-                       /*
-                        * In the NODE case, we lose next block address chain.
-                        * So, we need to do checkpoint in f2fs_sync_file.
-                        */
-                       if (type == NODE)
-                               set_sbi_flag(sbi, SBI_NEED_CP);
-               }
-       }
+       if (f2fs_lfs_mode(sbi) && current->plug && PAGE_TYPE_ON_MAIN(type))
+               blk_finish_plug(current->plug);
 
        trace_f2fs_submit_write_bio(sbi->sb, type, bio);
        iostat_update_submit_ctx(bio, type);
@@ -740,10 +690,8 @@ int f2fs_submit_page_bio(struct f2fs_io_info *fio)
 
        if (!f2fs_is_valid_blkaddr(fio->sbi, fio->new_blkaddr,
                        fio->is_por ? META_POR : (__is_meta_io(fio) ?
-                       META_GENERIC : DATA_GENERIC_ENHANCE))) {
-               f2fs_handle_error(fio->sbi, ERROR_INVALID_BLKADDR);
+                       META_GENERIC : DATA_GENERIC_ENHANCE)))
                return -EFSCORRUPTED;
-       }
 
        trace_f2fs_submit_page_bio(page, fio);
 
@@ -762,7 +710,7 @@ int f2fs_submit_page_bio(struct f2fs_io_info *fio)
                wbc_account_cgroup_owner(fio->io_wbc, fio->page, PAGE_SIZE);
 
        inc_page_count(fio->sbi, is_read_io(fio->op) ?
-                       __read_io_type(page) : WB_DATA_TYPE(fio->page));
+                       __read_io_type(page) : WB_DATA_TYPE(fio->page, false));
 
        if (is_read_io(bio_op(bio)))
                f2fs_submit_read_bio(fio->sbi, bio, fio->type);
@@ -796,16 +744,6 @@ static bool io_is_mergeable(struct f2fs_sb_info *sbi, struct bio *bio,
                                        block_t last_blkaddr,
                                        block_t cur_blkaddr)
 {
-       if (F2FS_IO_ALIGNED(sbi) && (fio->type == DATA || fio->type == NODE)) {
-               unsigned int filled_blocks =
-                               F2FS_BYTES_TO_BLK(bio->bi_iter.bi_size);
-               unsigned int io_size = F2FS_IO_SIZE(sbi);
-               unsigned int left_vecs = bio->bi_max_vecs - bio->bi_vcnt;
-
-               /* IOs in bio is aligned and left space of vectors is not enough */
-               if (!(filled_blocks % io_size) && left_vecs < io_size)
-                       return false;
-       }
        if (!page_is_mergeable(sbi, bio, last_blkaddr, cur_blkaddr))
                return false;
        return io_type_is_mergeable(io, fio);
@@ -948,10 +886,8 @@ int f2fs_merge_page_bio(struct f2fs_io_info *fio)
                        fio->encrypted_page : fio->page;
 
        if (!f2fs_is_valid_blkaddr(fio->sbi, fio->new_blkaddr,
-                       __is_meta_io(fio) ? META_GENERIC : DATA_GENERIC)) {
-               f2fs_handle_error(fio->sbi, ERROR_INVALID_BLKADDR);
+                       __is_meta_io(fio) ? META_GENERIC : DATA_GENERIC))
                return -EFSCORRUPTED;
-       }
 
        trace_f2fs_submit_page_bio(page, fio);
 
@@ -973,7 +909,7 @@ alloc_new:
        if (fio->io_wbc)
                wbc_account_cgroup_owner(fio->io_wbc, fio->page, PAGE_SIZE);
 
-       inc_page_count(fio->sbi, WB_DATA_TYPE(page));
+       inc_page_count(fio->sbi, WB_DATA_TYPE(page, false));
 
        *fio->last_block = fio->new_blkaddr;
        *fio->bio = bio;
@@ -1007,11 +943,12 @@ void f2fs_submit_page_write(struct f2fs_io_info *fio)
        enum page_type btype = PAGE_TYPE_OF_BIO(fio->type);
        struct f2fs_bio_info *io = sbi->write_io[btype] + fio->temp;
        struct page *bio_page;
+       enum count_type type;
 
        f2fs_bug_on(sbi, is_read_io(fio->op));
 
        f2fs_down_write(&io->io_rwsem);
-
+next:
 #ifdef CONFIG_BLK_DEV_ZONED
        if (f2fs_sb_has_blkzoned(sbi) && btype < META && io->zone_pending_bio) {
                wait_for_completion_io(&io->zone_wait);
@@ -1021,7 +958,6 @@ void f2fs_submit_page_write(struct f2fs_io_info *fio)
        }
 #endif
 
-next:
        if (fio->in_list) {
                spin_lock(&io->io_lock);
                if (list_empty(&io->io_list)) {
@@ -1046,7 +982,8 @@ next:
        /* set submitted = true as a return value */
        fio->submitted = 1;
 
-       inc_page_count(sbi, WB_DATA_TYPE(bio_page));
+       type = WB_DATA_TYPE(bio_page, fio->compressed_page);
+       inc_page_count(sbi, type);
 
        if (io->bio &&
            (!io_is_mergeable(sbi, io->bio, io, fio, io->last_block_in_bio,
@@ -1056,13 +993,6 @@ next:
                __submit_merged_bio(io);
 alloc_new:
        if (io->bio == NULL) {
-               if (F2FS_IO_ALIGNED(sbi) &&
-                               (fio->type == DATA || fio->type == NODE) &&
-                               fio->new_blkaddr & F2FS_IO_SIZE_MASK(sbi)) {
-                       dec_page_count(sbi, WB_DATA_TYPE(bio_page));
-                       fio->retry = 1;
-                       goto skip;
-               }
                io->bio = __bio_alloc(fio, BIO_MAX_VECS);
                f2fs_set_bio_crypt_ctx(io->bio, fio->page->mapping->host,
                                       bio_page->index, fio, GFP_NOIO);
@@ -1080,10 +1010,6 @@ alloc_new:
        io->last_block_in_bio = fio->new_blkaddr;
 
        trace_f2fs_submit_page_write(fio->page, fio);
-skip:
-       if (fio->in_list)
-               goto next;
-out:
 #ifdef CONFIG_BLK_DEV_ZONED
        if (f2fs_sb_has_blkzoned(sbi) && btype < META &&
                        is_end_zone_blkaddr(sbi, fio->new_blkaddr)) {
@@ -1096,6 +1022,9 @@ out:
                __submit_merged_bio(io);
        }
 #endif
+       if (fio->in_list)
+               goto next;
+out:
        if (is_sbi_flag_set(sbi, SBI_IS_SHUTDOWN) ||
                                !f2fs_is_checkpoint_ready(sbi))
                __submit_merged_bio(io);
@@ -1218,7 +1147,8 @@ int f2fs_reserve_new_blocks(struct dnode_of_data *dn, blkcnt_t count)
 
        if (unlikely(is_inode_flag_set(dn->inode, FI_NO_ALLOC)))
                return -EPERM;
-       if (unlikely((err = inc_valid_block_count(sbi, dn->inode, &count))))
+       err = inc_valid_block_count(sbi, dn->inode, &count, true);
+       if (unlikely(err))
                return err;
 
        trace_f2fs_reserve_new_blocks(dn->inode, dn->nid,
@@ -1285,8 +1215,6 @@ struct page *f2fs_get_read_data_page(struct inode *inode, pgoff_t index,
                if (!f2fs_is_valid_blkaddr(F2FS_I_SB(inode), dn.data_blkaddr,
                                                DATA_GENERIC_ENHANCE_READ)) {
                        err = -EFSCORRUPTED;
-                       f2fs_handle_error(F2FS_I_SB(inode),
-                                               ERROR_INVALID_BLKADDR);
                        goto put_err;
                }
                goto got_it;
@@ -1312,8 +1240,6 @@ struct page *f2fs_get_read_data_page(struct inode *inode, pgoff_t index,
                                                dn.data_blkaddr,
                                                DATA_GENERIC_ENHANCE)) {
                err = -EFSCORRUPTED;
-               f2fs_handle_error(F2FS_I_SB(inode),
-                                       ERROR_INVALID_BLKADDR);
                goto put_err;
        }
 got_it:
@@ -1475,15 +1401,18 @@ static int __allocate_data_block(struct dnode_of_data *dn, int seg_type)
 
        dn->data_blkaddr = f2fs_data_blkaddr(dn);
        if (dn->data_blkaddr == NULL_ADDR) {
-               err = inc_valid_block_count(sbi, dn->inode, &count);
+               err = inc_valid_block_count(sbi, dn->inode, &count, true);
                if (unlikely(err))
                        return err;
        }
 
        set_summary(&sum, dn->nid, dn->ofs_in_node, ni.version);
        old_blkaddr = dn->data_blkaddr;
-       f2fs_allocate_data_block(sbi, NULL, old_blkaddr, &dn->data_blkaddr,
-                               &sum, seg_type, NULL);
+       err = f2fs_allocate_data_block(sbi, NULL, old_blkaddr,
+                               &dn->data_blkaddr, &sum, seg_type, NULL);
+       if (err)
+               return err;
+
        if (GET_SEGNO(sbi, old_blkaddr) != NULL_SEGNO)
                f2fs_invalidate_internal_cache(sbi, old_blkaddr);
 
@@ -1641,7 +1570,6 @@ next_block:
        if (!is_hole &&
            !f2fs_is_valid_blkaddr(sbi, blkaddr, DATA_GENERIC_ENHANCE)) {
                err = -EFSCORRUPTED;
-               f2fs_handle_error(sbi, ERROR_INVALID_BLKADDR);
                goto sync_out;
        }
 
@@ -2165,8 +2093,6 @@ got_it:
                if (!f2fs_is_valid_blkaddr(F2FS_I_SB(inode), block_nr,
                                                DATA_GENERIC_ENHANCE_READ)) {
                        ret = -EFSCORRUPTED;
-                       f2fs_handle_error(F2FS_I_SB(inode),
-                                               ERROR_INVALID_BLKADDR);
                        goto out;
                }
        } else {
@@ -2668,8 +2594,6 @@ bool f2fs_should_update_outplace(struct inode *inode, struct f2fs_io_info *fio)
        if (fio) {
                if (page_private_gcing(fio->page))
                        return true;
-               if (page_private_dummy(fio->page))
-                       return true;
                if (unlikely(is_sbi_flag_set(sbi, SBI_CP_DISABLED) &&
                        f2fs_is_checkpointed_data(sbi, fio->old_blkaddr)))
                        return true;
@@ -2706,11 +2630,8 @@ int f2fs_do_write_data_page(struct f2fs_io_info *fio)
            f2fs_lookup_read_extent_cache_block(inode, page->index,
                                                &fio->old_blkaddr)) {
                if (!f2fs_is_valid_blkaddr(fio->sbi, fio->old_blkaddr,
-                                               DATA_GENERIC_ENHANCE)) {
-                       f2fs_handle_error(fio->sbi,
-                                               ERROR_INVALID_BLKADDR);
+                                               DATA_GENERIC_ENHANCE))
                        return -EFSCORRUPTED;
-               }
 
                ipu_force = true;
                fio->need_lock = LOCK_DONE;
@@ -2738,7 +2659,6 @@ got_it:
                !f2fs_is_valid_blkaddr(fio->sbi, fio->old_blkaddr,
                                                DATA_GENERIC_ENHANCE)) {
                err = -EFSCORRUPTED;
-               f2fs_handle_error(fio->sbi, ERROR_INVALID_BLKADDR);
                goto out_writepage;
        }
 
@@ -2838,7 +2758,7 @@ int f2fs_write_single_data_page(struct page *page, int *submitted,
                .encrypted_page = NULL,
                .submitted = 0,
                .compr_blocks = compr_blocks,
-               .need_lock = LOCK_RETRY,
+               .need_lock = compr_blocks ? LOCK_DONE : LOCK_RETRY,
                .post_read = f2fs_post_read_required(inode) ? 1 : 0,
                .io_type = io_type,
                .io_wbc = wbc,
@@ -2919,6 +2839,7 @@ write:
        if (err == -EAGAIN) {
                err = f2fs_do_write_data_page(&fio);
                if (err == -EAGAIN) {
+                       f2fs_bug_on(sbi, compr_blocks);
                        fio.need_lock = LOCK_REQ;
                        err = f2fs_do_write_data_page(&fio);
                }
@@ -3704,7 +3625,6 @@ repeat:
                if (!f2fs_is_valid_blkaddr(sbi, blkaddr,
                                DATA_GENERIC_ENHANCE_READ)) {
                        err = -EFSCORRUPTED;
-                       f2fs_handle_error(sbi, ERROR_INVALID_BLKADDR);
                        goto fail;
                }
                err = f2fs_submit_page_read(use_cow ?
@@ -3905,26 +3825,36 @@ static int f2fs_migrate_blocks(struct inode *inode, block_t start_blk,
        struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
        unsigned int blkofs;
        unsigned int blk_per_sec = BLKS_PER_SEC(sbi);
+       unsigned int end_blk = start_blk + blkcnt - 1;
        unsigned int secidx = start_blk / blk_per_sec;
-       unsigned int end_sec = secidx + blkcnt / blk_per_sec;
+       unsigned int end_sec;
        int ret = 0;
 
+       if (!blkcnt)
+               return 0;
+       end_sec = end_blk / blk_per_sec;
+
        f2fs_down_write(&F2FS_I(inode)->i_gc_rwsem[WRITE]);
        filemap_invalidate_lock(inode->i_mapping);
 
        set_inode_flag(inode, FI_ALIGNED_WRITE);
        set_inode_flag(inode, FI_OPU_WRITE);
 
-       for (; secidx < end_sec; secidx++) {
+       for (; secidx <= end_sec; secidx++) {
+               unsigned int blkofs_end = secidx == end_sec ?
+                               end_blk % blk_per_sec : blk_per_sec - 1;
+
                f2fs_down_write(&sbi->pin_sem);
 
-               f2fs_lock_op(sbi);
-               f2fs_allocate_new_section(sbi, CURSEG_COLD_DATA_PINNED, false);
-               f2fs_unlock_op(sbi);
+               ret = f2fs_allocate_pinning_section(sbi);
+               if (ret) {
+                       f2fs_up_write(&sbi->pin_sem);
+                       break;
+               }
 
                set_inode_flag(inode, FI_SKIP_WRITES);
 
-               for (blkofs = 0; blkofs < blk_per_sec; blkofs++) {
+               for (blkofs = 0; blkofs <= blkofs_end; blkofs++) {
                        struct page *page;
                        unsigned int blkidx = secidx * blk_per_sec + blkofs;
 
@@ -4013,27 +3943,34 @@ retry:
                nr_pblocks = map.m_len;
 
                if ((pblock - SM_I(sbi)->main_blkaddr) & sec_blks_mask ||
-                               nr_pblocks & sec_blks_mask) {
+                               nr_pblocks & sec_blks_mask ||
+                               !f2fs_valid_pinned_area(sbi, pblock)) {
+                       bool last_extent = false;
+
                        not_aligned++;
 
                        nr_pblocks = roundup(nr_pblocks, blks_per_sec);
                        if (cur_lblock + nr_pblocks > sis->max)
                                nr_pblocks -= blks_per_sec;
 
+                       /* this extent is last one */
                        if (!nr_pblocks) {
-                               /* this extent is last one */
-                               nr_pblocks = map.m_len;
-                               f2fs_warn(sbi, "Swapfile: last extent is not aligned to section");
-                               goto next;
+                               nr_pblocks = last_lblock - cur_lblock;
+                               last_extent = true;
                        }
 
                        ret = f2fs_migrate_blocks(inode, cur_lblock,
                                                        nr_pblocks);
-                       if (ret)
+                       if (ret) {
+                               if (ret == -ENOENT)
+                                       ret = -EINVAL;
                                goto out;
-                       goto retry;
+                       }
+
+                       if (!last_extent)
+                               goto retry;
                }
-next:
+
                if (cur_lblock + nr_pblocks >= sis->max)
                        nr_pblocks = sis->max - cur_lblock;
 
@@ -4071,17 +4008,17 @@ static int f2fs_swap_activate(struct swap_info_struct *sis, struct file *file,
                                sector_t *span)
 {
        struct inode *inode = file_inode(file);
+       struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
        int ret;
 
        if (!S_ISREG(inode->i_mode))
                return -EINVAL;
 
-       if (f2fs_readonly(F2FS_I_SB(inode)->sb))
+       if (f2fs_readonly(sbi->sb))
                return -EROFS;
 
-       if (f2fs_lfs_mode(F2FS_I_SB(inode))) {
-               f2fs_err(F2FS_I_SB(inode),
-                       "Swapfile not supported in LFS mode");
+       if (f2fs_lfs_mode(sbi) && !f2fs_sb_has_blkzoned(sbi)) {
+               f2fs_err(sbi, "Swapfile not supported in LFS mode");
                return -EINVAL;
        }
 
@@ -4092,6 +4029,10 @@ static int f2fs_swap_activate(struct swap_info_struct *sis, struct file *file,
        if (!f2fs_disable_compressed_file(inode))
                return -EINVAL;
 
+       ret = filemap_fdatawrite(inode->i_mapping);
+       if (ret < 0)
+               return ret;
+
        f2fs_precache_extents(inode);
 
        ret = check_swap_activate(sis, file, span);
@@ -4100,7 +4041,7 @@ static int f2fs_swap_activate(struct swap_info_struct *sis, struct file *file,
 
        stat_inc_swapfile_inode(inode);
        set_inode_flag(inode, FI_PIN_FILE);
-       f2fs_update_time(F2FS_I_SB(inode), REQ_TIME);
+       f2fs_update_time(sbi, REQ_TIME);
        return ret;
 }
 
index fdbf994f12718c566ef03d22fae9a7050a2ba5ee..8b0e1e71b667448afce3cb7b094fc2bc4644a892 100644 (file)
@@ -41,7 +41,7 @@ void f2fs_update_sit_info(struct f2fs_sb_info *sbi)
        total_vblocks = 0;
        blks_per_sec = CAP_BLKS_PER_SEC(sbi);
        hblks_per_sec = blks_per_sec / 2;
-       for (segno = 0; segno < MAIN_SEGS(sbi); segno += sbi->segs_per_sec) {
+       for (segno = 0; segno < MAIN_SEGS(sbi); segno += SEGS_PER_SEC(sbi)) {
                vblocks = get_valid_blocks(sbi, segno, true);
                dist = abs(vblocks - hblks_per_sec);
                bimodal += dist * dist;
@@ -135,7 +135,7 @@ static void update_general_status(struct f2fs_sb_info *sbi)
        si->cur_ckpt_time = sbi->cprc_info.cur_time;
        si->peak_ckpt_time = sbi->cprc_info.peak_time;
        spin_unlock(&sbi->cprc_info.stat_lock);
-       si->total_count = (int)sbi->user_block_count / sbi->blocks_per_seg;
+       si->total_count = BLKS_TO_SEGS(sbi, (int)sbi->user_block_count);
        si->rsvd_segs = reserved_segments(sbi);
        si->overp_segs = overprovision_segments(sbi);
        si->valid_count = valid_user_blocks(sbi);
@@ -176,11 +176,10 @@ static void update_general_status(struct f2fs_sb_info *sbi)
        si->alloc_nids = NM_I(sbi)->nid_cnt[PREALLOC_NID];
        si->io_skip_bggc = sbi->io_skip_bggc;
        si->other_skip_bggc = sbi->other_skip_bggc;
-       si->util_free = (int)(free_user_blocks(sbi) >> sbi->log_blocks_per_seg)
+       si->util_free = (int)(BLKS_TO_SEGS(sbi, free_user_blocks(sbi)))
                * 100 / (int)(sbi->user_block_count >> sbi->log_blocks_per_seg)
                / 2;
-       si->util_valid = (int)(written_block_count(sbi) >>
-                                               sbi->log_blocks_per_seg)
+       si->util_valid = (int)(BLKS_TO_SEGS(sbi, written_block_count(sbi)))
                * 100 / (int)(sbi->user_block_count >> sbi->log_blocks_per_seg)
                / 2;
        si->util_invalid = 50 - si->util_free - si->util_valid;
@@ -208,7 +207,7 @@ static void update_general_status(struct f2fs_sb_info *sbi)
                if (!blks)
                        continue;
 
-               if (blks == sbi->blocks_per_seg)
+               if (blks == BLKS_PER_SEG(sbi))
                        si->full_seg[type]++;
                else
                        si->dirty_seg[type]++;
index 042593aed1ec0a79cdae8846d0e79c06674d97ca..02c9355176d3b55f467d0bd6948c7e5db600b519 100644 (file)
@@ -830,13 +830,14 @@ int f2fs_do_add_link(struct inode *dir, const struct qstr *name,
        return err;
 }
 
-int f2fs_do_tmpfile(struct inode *inode, struct inode *dir)
+int f2fs_do_tmpfile(struct inode *inode, struct inode *dir,
+                                       struct f2fs_filename *fname)
 {
        struct page *page;
        int err = 0;
 
        f2fs_down_write(&F2FS_I(inode)->i_sem);
-       page = f2fs_init_inode_metadata(inode, dir, NULL, NULL);
+       page = f2fs_init_inode_metadata(inode, dir, fname, NULL);
        if (IS_ERR(page)) {
                err = PTR_ERR(page);
                goto fail;
@@ -995,9 +996,8 @@ int f2fs_fill_dentries(struct dir_context *ctx, struct f2fs_dentry_ptr *d,
                de = &d->dentry[bit_pos];
                if (de->name_len == 0) {
                        if (found_valid_dirent || !bit_pos) {
-                               printk_ratelimited(
-                                       "%sF2FS-fs (%s): invalid namelen(0), ino:%u, run fsck to fix.",
-                                       KERN_WARNING, sbi->sb->s_id,
+                               f2fs_warn_ratelimited(sbi,
+                                       "invalid namelen(0), ino:%u, run fsck to fix.",
                                        le32_to_cpu(de->ino));
                                set_sbi_flag(sbi, SBI_NEED_FSCK);
                        }
index ad8dfac73bd4461d95d92e965a079d3968d1a229..48048fa3642766d5176e175ee445f5a26aea8a6b 100644 (file)
@@ -43,7 +43,6 @@ bool sanity_check_extent_cache(struct inode *inode)
        if (!f2fs_is_valid_blkaddr(sbi, ei->blk, DATA_GENERIC_ENHANCE) ||
            !f2fs_is_valid_blkaddr(sbi, ei->blk + ei->len - 1,
                                        DATA_GENERIC_ENHANCE)) {
-               set_sbi_flag(sbi, SBI_NEED_FSCK);
                f2fs_warn(sbi, "%s: inode (ino=%lx) extent info [%u, %u, %u] is incorrect, run fsck to fix",
                          __func__, inode->i_ino,
                          ei->blk, ei->fofs, ei->len);
@@ -856,10 +855,8 @@ static int __get_new_block_age(struct inode *inode, struct extent_info *ei,
                goto out;
 
        if (__is_valid_data_blkaddr(blkaddr) &&
-           !f2fs_is_valid_blkaddr(sbi, blkaddr, DATA_GENERIC_ENHANCE)) {
-               f2fs_bug_on(sbi, 1);
+           !f2fs_is_valid_blkaddr(sbi, blkaddr, DATA_GENERIC_ENHANCE))
                return -EINVAL;
-       }
 out:
        /*
         * init block age with zero, this can happen when the block age extent
index 3ff428bee958cbd25f16e0e74e8ebc9e43be1a3a..fced2b7652f40a1acf202043dc3c3fdd82c46177 100644 (file)
@@ -61,7 +61,9 @@ enum {
        FAULT_SLAB_ALLOC,
        FAULT_DQUOT_INIT,
        FAULT_LOCK_OP,
-       FAULT_BLKADDR,
+       FAULT_BLKADDR_VALIDITY,
+       FAULT_BLKADDR_CONSISTENCE,
+       FAULT_NO_SEGMENT,
        FAULT_MAX,
 };
 
@@ -76,6 +78,11 @@ struct f2fs_fault_info {
 
 extern const char *f2fs_fault_name[FAULT_MAX];
 #define IS_FAULT_SET(fi, type) ((fi)->inject_type & BIT(type))
+
+/* maximum retry count for injected failure */
+#define DEFAULT_FAILURE_RETRY_COUNT            8
+#else
+#define DEFAULT_FAILURE_RETRY_COUNT            1
 #endif
 
 /*
@@ -143,7 +150,6 @@ struct f2fs_rwsem {
 
 struct f2fs_mount_info {
        unsigned int opt;
-       int write_io_size_bits;         /* Write IO size bits */
        block_t root_reserved_blocks;   /* root reserved blocks */
        kuid_t s_resuid;                /* reserved blocks for uid */
        kgid_t s_resgid;                /* reserved blocks for gid */
@@ -1081,7 +1087,8 @@ struct f2fs_sm_info {
  * f2fs monitors the number of several block types such as on-writeback,
  * dirty dentry blocks, dirty node blocks, and dirty meta blocks.
  */
-#define WB_DATA_TYPE(p)        (__is_cp_guaranteed(p) ? F2FS_WB_CP_DATA : F2FS_WB_DATA)
+#define WB_DATA_TYPE(p, f)                     \
+       (f || f2fs_is_cp_guaranteed(p) ? F2FS_WB_CP_DATA : F2FS_WB_DATA)
 enum count_type {
        F2FS_DIRTY_DENTS,
        F2FS_DIRTY_DATA,
@@ -1111,6 +1118,7 @@ enum count_type {
  * ...                 Only can be used with META.
  */
 #define PAGE_TYPE_OF_BIO(type) ((type) > META ? META : (type))
+#define PAGE_TYPE_ON_MAIN(type)        ((type) == DATA || (type) == NODE)
 enum page_type {
        DATA = 0,
        NODE = 1,       /* should not change this */
@@ -1205,7 +1213,6 @@ struct f2fs_io_info {
        unsigned int submitted:1;       /* indicate IO submission */
        unsigned int in_list:1;         /* indicate fio is in io_list */
        unsigned int is_por:1;          /* indicate IO is from recovery or not */
-       unsigned int retry:1;           /* need to reallocate block address */
        unsigned int encrypted:1;       /* indicate file is encrypted */
        unsigned int post_read:1;       /* require post read */
        enum iostat_type io_type;       /* io type */
@@ -1407,18 +1414,16 @@ static inline void f2fs_clear_bit(unsigned int nr, char *addr);
  * Layout A: lowest bit should be 1
  * | bit0 = 1 | bit1 | bit2 | ... | bit MAX | private data .... |
  * bit 0       PAGE_PRIVATE_NOT_POINTER
- * bit 1       PAGE_PRIVATE_DUMMY_WRITE
- * bit 2       PAGE_PRIVATE_ONGOING_MIGRATION
- * bit 3       PAGE_PRIVATE_INLINE_INODE
- * bit 4       PAGE_PRIVATE_REF_RESOURCE
- * bit 5-      f2fs private data
+ * bit 1       PAGE_PRIVATE_ONGOING_MIGRATION
+ * bit 2       PAGE_PRIVATE_INLINE_INODE
+ * bit 3       PAGE_PRIVATE_REF_RESOURCE
+ * bit 4-      f2fs private data
  *
  * Layout B: lowest bit should be 0
  * page.private is a wrapped pointer.
  */
 enum {
        PAGE_PRIVATE_NOT_POINTER,               /* private contains non-pointer data */
-       PAGE_PRIVATE_DUMMY_WRITE,               /* data page for padding aligned IO */
        PAGE_PRIVATE_ONGOING_MIGRATION,         /* data page which is on-going migrating */
        PAGE_PRIVATE_INLINE_INODE,              /* inode page contains inline data */
        PAGE_PRIVATE_REF_RESOURCE,              /* dirty page has referenced resources */
@@ -1565,7 +1570,6 @@ struct f2fs_sb_info {
        struct f2fs_bio_info *write_io[NR_PAGE_TYPE];   /* for write bios */
        /* keep migration IO order for LFS mode */
        struct f2fs_rwsem io_order_lock;
-       mempool_t *write_io_dummy;              /* Dummy pages */
        pgoff_t page_eio_ofs[NR_PAGE_TYPE];     /* EIO page offset */
        int page_eio_cnt[NR_PAGE_TYPE];         /* EIO count */
 
@@ -1811,6 +1815,37 @@ struct f2fs_sb_info {
 #endif
 };
 
+/* Definitions to access f2fs_sb_info */
+#define SEGS_TO_BLKS(sbi, segs)                                        \
+               ((segs) << (sbi)->log_blocks_per_seg)
+#define BLKS_TO_SEGS(sbi, blks)                                        \
+               ((blks) >> (sbi)->log_blocks_per_seg)
+
+#define BLKS_PER_SEG(sbi)      ((sbi)->blocks_per_seg)
+#define BLKS_PER_SEC(sbi)      (SEGS_TO_BLKS(sbi, (sbi)->segs_per_sec))
+#define SEGS_PER_SEC(sbi)      ((sbi)->segs_per_sec)
+
+__printf(3, 4)
+void f2fs_printk(struct f2fs_sb_info *sbi, bool limit_rate, const char *fmt, ...);
+
+#define f2fs_err(sbi, fmt, ...)                                                \
+       f2fs_printk(sbi, false, KERN_ERR fmt, ##__VA_ARGS__)
+#define f2fs_warn(sbi, fmt, ...)                                       \
+       f2fs_printk(sbi, false, KERN_WARNING fmt, ##__VA_ARGS__)
+#define f2fs_notice(sbi, fmt, ...)                                     \
+       f2fs_printk(sbi, false, KERN_NOTICE fmt, ##__VA_ARGS__)
+#define f2fs_info(sbi, fmt, ...)                                       \
+       f2fs_printk(sbi, false, KERN_INFO fmt, ##__VA_ARGS__)
+#define f2fs_debug(sbi, fmt, ...)                                      \
+       f2fs_printk(sbi, false, KERN_DEBUG fmt, ##__VA_ARGS__)
+
+#define f2fs_err_ratelimited(sbi, fmt, ...)                            \
+       f2fs_printk(sbi, true, KERN_ERR fmt, ##__VA_ARGS__)
+#define f2fs_warn_ratelimited(sbi, fmt, ...)                           \
+       f2fs_printk(sbi, true, KERN_WARNING fmt, ##__VA_ARGS__)
+#define f2fs_info_ratelimited(sbi, fmt, ...)                           \
+       f2fs_printk(sbi, true, KERN_INFO fmt, ##__VA_ARGS__)
+
 #ifdef CONFIG_F2FS_FAULT_INJECTION
 #define time_to_inject(sbi, type) __time_to_inject(sbi, type, __func__,        \
                                                                        __builtin_return_address(0))
@@ -1828,9 +1863,8 @@ static inline bool __time_to_inject(struct f2fs_sb_info *sbi, int type,
        atomic_inc(&ffi->inject_ops);
        if (atomic_read(&ffi->inject_ops) >= ffi->inject_rate) {
                atomic_set(&ffi->inject_ops, 0);
-               printk_ratelimited("%sF2FS-fs (%s) : inject %s in %s of %pS\n",
-                       KERN_INFO, sbi->sb->s_id, f2fs_fault_name[type],
-                       func, parent_func);
+               f2fs_info_ratelimited(sbi, "inject %s in %s of %pS",
+                               f2fs_fault_name[type], func, parent_func);
                return true;
        }
        return false;
@@ -2250,9 +2284,30 @@ static inline bool __allow_reserved_blocks(struct f2fs_sb_info *sbi,
        return false;
 }
 
+static inline unsigned int get_available_block_count(struct f2fs_sb_info *sbi,
+                                               struct inode *inode, bool cap)
+{
+       block_t avail_user_block_count;
+
+       avail_user_block_count = sbi->user_block_count -
+                                       sbi->current_reserved_blocks;
+
+       if (!__allow_reserved_blocks(sbi, inode, cap))
+               avail_user_block_count -= F2FS_OPTION(sbi).root_reserved_blocks;
+
+       if (unlikely(is_sbi_flag_set(sbi, SBI_CP_DISABLED))) {
+               if (avail_user_block_count > sbi->unusable_block_count)
+                       avail_user_block_count -= sbi->unusable_block_count;
+               else
+                       avail_user_block_count = 0;
+       }
+
+       return avail_user_block_count;
+}
+
 static inline void f2fs_i_blocks_write(struct inode *, block_t, bool, bool);
 static inline int inc_valid_block_count(struct f2fs_sb_info *sbi,
-                                struct inode *inode, blkcnt_t *count)
+                                struct inode *inode, blkcnt_t *count, bool partial)
 {
        blkcnt_t diff = 0, release = 0;
        block_t avail_user_block_count;
@@ -2275,23 +2330,14 @@ static inline int inc_valid_block_count(struct f2fs_sb_info *sbi,
 
        spin_lock(&sbi->stat_lock);
        sbi->total_valid_block_count += (block_t)(*count);
-       avail_user_block_count = sbi->user_block_count -
-                                       sbi->current_reserved_blocks;
-
-       if (!__allow_reserved_blocks(sbi, inode, true))
-               avail_user_block_count -= F2FS_OPTION(sbi).root_reserved_blocks;
-
-       if (F2FS_IO_ALIGNED(sbi))
-               avail_user_block_count -= sbi->blocks_per_seg *
-                               SM_I(sbi)->additional_reserved_segments;
+       avail_user_block_count = get_available_block_count(sbi, inode, true);
 
-       if (unlikely(is_sbi_flag_set(sbi, SBI_CP_DISABLED))) {
-               if (avail_user_block_count > sbi->unusable_block_count)
-                       avail_user_block_count -= sbi->unusable_block_count;
-               else
-                       avail_user_block_count = 0;
-       }
        if (unlikely(sbi->total_valid_block_count > avail_user_block_count)) {
+               if (!partial) {
+                       spin_unlock(&sbi->stat_lock);
+                       goto enospc;
+               }
+
                diff = sbi->total_valid_block_count - avail_user_block_count;
                if (diff > *count)
                        diff = *count;
@@ -2319,20 +2365,6 @@ release_quota:
        return -ENOSPC;
 }
 
-__printf(2, 3)
-void f2fs_printk(struct f2fs_sb_info *sbi, const char *fmt, ...);
-
-#define f2fs_err(sbi, fmt, ...)                                                \
-       f2fs_printk(sbi, KERN_ERR fmt, ##__VA_ARGS__)
-#define f2fs_warn(sbi, fmt, ...)                                       \
-       f2fs_printk(sbi, KERN_WARNING fmt, ##__VA_ARGS__)
-#define f2fs_notice(sbi, fmt, ...)                                     \
-       f2fs_printk(sbi, KERN_NOTICE fmt, ##__VA_ARGS__)
-#define f2fs_info(sbi, fmt, ...)                                       \
-       f2fs_printk(sbi, KERN_INFO fmt, ##__VA_ARGS__)
-#define f2fs_debug(sbi, fmt, ...)                                      \
-       f2fs_printk(sbi, KERN_DEBUG fmt, ##__VA_ARGS__)
-
 #define PAGE_PRIVATE_GET_FUNC(name, flagname) \
 static inline bool page_private_##name(struct page *page) \
 { \
@@ -2361,17 +2393,14 @@ static inline void clear_page_private_##name(struct page *page) \
 PAGE_PRIVATE_GET_FUNC(nonpointer, NOT_POINTER);
 PAGE_PRIVATE_GET_FUNC(inline, INLINE_INODE);
 PAGE_PRIVATE_GET_FUNC(gcing, ONGOING_MIGRATION);
-PAGE_PRIVATE_GET_FUNC(dummy, DUMMY_WRITE);
 
 PAGE_PRIVATE_SET_FUNC(reference, REF_RESOURCE);
 PAGE_PRIVATE_SET_FUNC(inline, INLINE_INODE);
 PAGE_PRIVATE_SET_FUNC(gcing, ONGOING_MIGRATION);
-PAGE_PRIVATE_SET_FUNC(dummy, DUMMY_WRITE);
 
 PAGE_PRIVATE_CLEAR_FUNC(reference, REF_RESOURCE);
 PAGE_PRIVATE_CLEAR_FUNC(inline, INLINE_INODE);
 PAGE_PRIVATE_CLEAR_FUNC(gcing, ONGOING_MIGRATION);
-PAGE_PRIVATE_CLEAR_FUNC(dummy, DUMMY_WRITE);
 
 static inline unsigned long get_page_private_data(struct page *page)
 {
@@ -2505,11 +2534,8 @@ static inline int get_dirty_pages(struct inode *inode)
 
 static inline int get_blocktype_secs(struct f2fs_sb_info *sbi, int block_type)
 {
-       unsigned int pages_per_sec = sbi->segs_per_sec * sbi->blocks_per_seg;
-       unsigned int segs = (get_pages(sbi, block_type) + pages_per_sec - 1) >>
-                                               sbi->log_blocks_per_seg;
-
-       return segs / sbi->segs_per_sec;
+       return div_u64(get_pages(sbi, block_type) + BLKS_PER_SEC(sbi) - 1,
+                                                       BLKS_PER_SEC(sbi));
 }
 
 static inline block_t valid_user_blocks(struct f2fs_sb_info *sbi)
@@ -2573,7 +2599,7 @@ static inline block_t __start_cp_addr(struct f2fs_sb_info *sbi)
        block_t start_addr = le32_to_cpu(F2FS_RAW_SUPER(sbi)->cp_blkaddr);
 
        if (sbi->cur_cp_pack == 2)
-               start_addr += sbi->blocks_per_seg;
+               start_addr += BLKS_PER_SEG(sbi);
        return start_addr;
 }
 
@@ -2582,7 +2608,7 @@ static inline block_t __start_cp_next_addr(struct f2fs_sb_info *sbi)
        block_t start_addr = le32_to_cpu(F2FS_RAW_SUPER(sbi)->cp_blkaddr);
 
        if (sbi->cur_cp_pack == 1)
-               start_addr += sbi->blocks_per_seg;
+               start_addr += BLKS_PER_SEG(sbi);
        return start_addr;
 }
 
@@ -2601,7 +2627,8 @@ static inline int inc_valid_node_count(struct f2fs_sb_info *sbi,
                                        struct inode *inode, bool is_inode)
 {
        block_t valid_block_count;
-       unsigned int valid_node_count, user_block_count;
+       unsigned int valid_node_count;
+       unsigned int avail_user_block_count;
        int err;
 
        if (is_inode) {
@@ -2621,21 +2648,10 @@ static inline int inc_valid_node_count(struct f2fs_sb_info *sbi,
 
        spin_lock(&sbi->stat_lock);
 
-       valid_block_count = sbi->total_valid_block_count +
-                                       sbi->current_reserved_blocks + 1;
-
-       if (!__allow_reserved_blocks(sbi, inode, false))
-               valid_block_count += F2FS_OPTION(sbi).root_reserved_blocks;
+       valid_block_count = sbi->total_valid_block_count + 1;
+       avail_user_block_count = get_available_block_count(sbi, inode, false);
 
-       if (F2FS_IO_ALIGNED(sbi))
-               valid_block_count += sbi->blocks_per_seg *
-                               SM_I(sbi)->additional_reserved_segments;
-
-       user_block_count = sbi->user_block_count;
-       if (unlikely(is_sbi_flag_set(sbi, SBI_CP_DISABLED)))
-               user_block_count -= sbi->unusable_block_count;
-
-       if (unlikely(valid_block_count > user_block_count)) {
+       if (unlikely(valid_block_count > avail_user_block_count)) {
                spin_unlock(&sbi->stat_lock);
                goto enospc;
        }
@@ -3022,6 +3038,7 @@ static inline void __mark_inode_dirty_flag(struct inode *inode,
        case FI_INLINE_DOTS:
        case FI_PIN_FILE:
        case FI_COMPRESS_RELEASED:
+       case FI_ATOMIC_COMMITTED:
                f2fs_mark_inode_dirty_sync(inode, true);
        }
 }
@@ -3445,7 +3462,7 @@ static inline __le32 *get_dnode_addr(struct inode *inode,
                sizeof((f2fs_inode)->field))                    \
                <= (F2FS_OLD_ATTRIBUTE_SIZE + (extra_isize)))   \
 
-#define __is_large_section(sbi)                ((sbi)->segs_per_sec > 1)
+#define __is_large_section(sbi)                (SEGS_PER_SEC(sbi) > 1)
 
 #define __is_meta_io(fio) (PAGE_TYPE_OF_BIO((fio)->type) == META)
 
@@ -3454,11 +3471,9 @@ bool f2fs_is_valid_blkaddr(struct f2fs_sb_info *sbi,
 static inline void verify_blkaddr(struct f2fs_sb_info *sbi,
                                        block_t blkaddr, int type)
 {
-       if (!f2fs_is_valid_blkaddr(sbi, blkaddr, type)) {
+       if (!f2fs_is_valid_blkaddr(sbi, blkaddr, type))
                f2fs_err(sbi, "invalid blkaddr: %u, type: %d, run fsck to fix.",
                         blkaddr, type);
-               f2fs_bug_on(sbi, 1);
-       }
 }
 
 static inline bool __is_valid_data_blkaddr(block_t blkaddr)
@@ -3560,7 +3575,8 @@ int f2fs_do_add_link(struct inode *dir, const struct qstr *name,
                        struct inode *inode, nid_t ino, umode_t mode);
 void f2fs_delete_entry(struct f2fs_dir_entry *dentry, struct page *page,
                        struct inode *dir, struct inode *inode);
-int f2fs_do_tmpfile(struct inode *inode, struct inode *dir);
+int f2fs_do_tmpfile(struct inode *inode, struct inode *dir,
+                                       struct f2fs_filename *fname);
 bool f2fs_empty_dir(struct inode *dir);
 
 static inline int f2fs_add_link(struct dentry *dentry, struct inode *inode)
@@ -3675,15 +3691,14 @@ int f2fs_disable_cp_again(struct f2fs_sb_info *sbi, block_t unusable);
 void f2fs_release_discard_addrs(struct f2fs_sb_info *sbi);
 int f2fs_npages_for_summary_flush(struct f2fs_sb_info *sbi, bool for_ra);
 bool f2fs_segment_has_free_slot(struct f2fs_sb_info *sbi, int segno);
-void f2fs_init_inmem_curseg(struct f2fs_sb_info *sbi);
+int f2fs_init_inmem_curseg(struct f2fs_sb_info *sbi);
 void f2fs_save_inmem_curseg(struct f2fs_sb_info *sbi);
 void f2fs_restore_inmem_curseg(struct f2fs_sb_info *sbi);
-void f2fs_get_new_segment(struct f2fs_sb_info *sbi,
-                       unsigned int *newseg, bool new_sec, int dir);
-void f2fs_allocate_segment_for_resize(struct f2fs_sb_info *sbi, int type,
+int f2fs_allocate_segment_for_resize(struct f2fs_sb_info *sbi, int type,
                                        unsigned int start, unsigned int end);
-void f2fs_allocate_new_section(struct f2fs_sb_info *sbi, int type, bool force);
-void f2fs_allocate_new_segments(struct f2fs_sb_info *sbi);
+int f2fs_allocate_new_section(struct f2fs_sb_info *sbi, int type, bool force);
+int f2fs_allocate_pinning_section(struct f2fs_sb_info *sbi);
+int f2fs_allocate_new_segments(struct f2fs_sb_info *sbi);
 int f2fs_trim_fs(struct f2fs_sb_info *sbi, struct fstrim_range *range);
 bool f2fs_exist_trim_candidates(struct f2fs_sb_info *sbi,
                                        struct cp_control *cpc);
@@ -3704,7 +3719,7 @@ void f2fs_replace_block(struct f2fs_sb_info *sbi, struct dnode_of_data *dn,
                        block_t old_addr, block_t new_addr,
                        unsigned char version, bool recover_curseg,
                        bool recover_newaddr);
-void f2fs_allocate_data_block(struct f2fs_sb_info *sbi, struct page *page,
+int f2fs_allocate_data_block(struct f2fs_sb_info *sbi, struct page *page,
                        block_t old_blkaddr, block_t *new_blkaddr,
                        struct f2fs_summary *sum, int type,
                        struct f2fs_io_info *fio);
@@ -3754,6 +3769,8 @@ struct page *f2fs_get_meta_page_retry(struct f2fs_sb_info *sbi, pgoff_t index);
 struct page *f2fs_get_tmp_page(struct f2fs_sb_info *sbi, pgoff_t index);
 bool f2fs_is_valid_blkaddr(struct f2fs_sb_info *sbi,
                                        block_t blkaddr, int type);
+bool f2fs_is_valid_blkaddr_raw(struct f2fs_sb_info *sbi,
+                                       block_t blkaddr, int type);
 int f2fs_ra_meta_pages(struct f2fs_sb_info *sbi, block_t start, int nrpages,
                        int type, bool sync);
 void f2fs_ra_meta_pages_cond(struct f2fs_sb_info *sbi, pgoff_t index,
@@ -3794,6 +3811,7 @@ void f2fs_init_ckpt_req_control(struct f2fs_sb_info *sbi);
  */
 int __init f2fs_init_bioset(void);
 void f2fs_destroy_bioset(void);
+bool f2fs_is_cp_guaranteed(struct page *page);
 int f2fs_init_bio_entry_cache(void);
 void f2fs_destroy_bio_entry_cache(void);
 void f2fs_submit_read_bio(struct f2fs_sb_info *sbi, struct bio *bio,
@@ -3857,6 +3875,9 @@ void f2fs_stop_gc_thread(struct f2fs_sb_info *sbi);
 block_t f2fs_start_bidx_of_node(unsigned int node_ofs, struct inode *inode);
 int f2fs_gc(struct f2fs_sb_info *sbi, struct f2fs_gc_control *gc_control);
 void f2fs_build_gc_manager(struct f2fs_sb_info *sbi);
+int f2fs_gc_range(struct f2fs_sb_info *sbi,
+               unsigned int start_seg, unsigned int end_seg,
+               bool dry_run, unsigned int dry_run_sections);
 int f2fs_resize_fs(struct file *filp, __u64 block_count);
 int __init f2fs_create_garbage_collection_cache(void);
 void f2fs_destroy_garbage_collection_cache(void);
@@ -4277,7 +4298,8 @@ struct decompress_io_ctx *f2fs_alloc_dic(struct compress_ctx *cc);
 void f2fs_decompress_end_io(struct decompress_io_ctx *dic, bool failed,
                                bool in_task);
 void f2fs_put_page_dic(struct page *page, bool in_task);
-unsigned int f2fs_cluster_blocks_are_contiguous(struct dnode_of_data *dn);
+unsigned int f2fs_cluster_blocks_are_contiguous(struct dnode_of_data *dn,
+                                               unsigned int ofs_in_node);
 int f2fs_init_compress_ctx(struct compress_ctx *cc);
 void f2fs_destroy_compress_ctx(struct compress_ctx *cc, bool reuse);
 void f2fs_init_compress_info(struct f2fs_sb_info *sbi);
@@ -4334,7 +4356,8 @@ static inline void f2fs_put_page_dic(struct page *page, bool in_task)
 {
        WARN_ON_ONCE(1);
 }
-static inline unsigned int f2fs_cluster_blocks_are_contiguous(struct dnode_of_data *dn) { return 0; }
+static inline unsigned int f2fs_cluster_blocks_are_contiguous(
+                       struct dnode_of_data *dn, unsigned int ofs_in_node) { return 0; }
 static inline bool f2fs_sanity_check_cluster(struct dnode_of_data *dn) { return false; }
 static inline int f2fs_init_compress_inode(struct f2fs_sb_info *sbi) { return 0; }
 static inline void f2fs_destroy_compress_inode(struct f2fs_sb_info *sbi) { }
@@ -4391,15 +4414,24 @@ static inline bool f2fs_disable_compressed_file(struct inode *inode)
 {
        struct f2fs_inode_info *fi = F2FS_I(inode);
 
-       if (!f2fs_compressed_file(inode))
+       f2fs_down_write(&F2FS_I(inode)->i_sem);
+
+       if (!f2fs_compressed_file(inode)) {
+               f2fs_up_write(&F2FS_I(inode)->i_sem);
                return true;
-       if (S_ISREG(inode->i_mode) && F2FS_HAS_BLOCKS(inode))
+       }
+       if (f2fs_is_mmap_file(inode) ||
+               (S_ISREG(inode->i_mode) && F2FS_HAS_BLOCKS(inode))) {
+               f2fs_up_write(&F2FS_I(inode)->i_sem);
                return false;
+       }
 
        fi->i_flags &= ~F2FS_COMPR_FL;
        stat_dec_compr_inode(inode);
        clear_inode_flag(inode, FI_COMPRESSED_FILE);
        f2fs_mark_inode_dirty_sync(inode, true);
+
+       f2fs_up_write(&F2FS_I(inode)->i_sem);
        return true;
 }
 
@@ -4502,6 +4534,17 @@ static inline bool f2fs_lfs_mode(struct f2fs_sb_info *sbi)
        return F2FS_OPTION(sbi).fs_mode == FS_MODE_LFS;
 }
 
+static inline bool f2fs_valid_pinned_area(struct f2fs_sb_info *sbi,
+                                         block_t blkaddr)
+{
+       if (f2fs_sb_has_blkzoned(sbi)) {
+               int devi = f2fs_target_device_index(sbi, blkaddr);
+
+               return !bdev_is_zoned(FDEV(devi).bdev);
+       }
+       return true;
+}
+
 static inline bool f2fs_low_mem_mode(struct f2fs_sb_info *sbi)
 {
        return F2FS_OPTION(sbi).memory_mode == MEMORY_MODE_LOW;
@@ -4603,10 +4646,36 @@ static inline bool f2fs_is_readonly(struct f2fs_sb_info *sbi)
        return f2fs_sb_has_readonly(sbi) || f2fs_readonly(sbi->sb);
 }
 
+static inline void f2fs_truncate_meta_inode_pages(struct f2fs_sb_info *sbi,
+                                       block_t blkaddr, unsigned int cnt)
+{
+       bool need_submit = false;
+       int i = 0;
+
+       do {
+               struct page *page;
+
+               page = find_get_page(META_MAPPING(sbi), blkaddr + i);
+               if (page) {
+                       if (PageWriteback(page))
+                               need_submit = true;
+                       f2fs_put_page(page, 0);
+               }
+       } while (++i < cnt && !need_submit);
+
+       if (need_submit)
+               f2fs_submit_merged_write_cond(sbi, sbi->meta_inode,
+                                                       NULL, 0, DATA);
+
+       truncate_inode_pages_range(META_MAPPING(sbi),
+                       F2FS_BLK_TO_BYTES((loff_t)blkaddr),
+                       F2FS_BLK_END_BYTES((loff_t)(blkaddr + cnt - 1)));
+}
+
 static inline void f2fs_invalidate_internal_cache(struct f2fs_sb_info *sbi,
                                                                block_t blkaddr)
 {
-       invalidate_mapping_pages(META_MAPPING(sbi), blkaddr, blkaddr);
+       f2fs_truncate_meta_inode_pages(sbi, blkaddr, 1);
        f2fs_invalidate_compress_page(sbi, blkaddr);
 }
 
index b58ab1157b7ef27724ed46be6527439c76975e9a..1761ad125f97a37ebb7bb9ce5c35ff9decf8a130 100644 (file)
@@ -39,6 +39,7 @@
 static vm_fault_t f2fs_filemap_fault(struct vm_fault *vmf)
 {
        struct inode *inode = file_inode(vmf->vma->vm_file);
+       vm_flags_t flags = vmf->vma->vm_flags;
        vm_fault_t ret;
 
        ret = filemap_fault(vmf);
@@ -46,7 +47,7 @@ static vm_fault_t f2fs_filemap_fault(struct vm_fault *vmf)
                f2fs_update_iostat(F2FS_I_SB(inode), inode,
                                        APP_MAPPED_READ_IO, F2FS_BLKSIZE);
 
-       trace_f2fs_filemap_fault(inode, vmf->pgoff, vmf->vma->vm_flags, ret);
+       trace_f2fs_filemap_fault(inode, vmf->pgoff, flags, ret);
 
        return ret;
 }
@@ -394,9 +395,20 @@ int f2fs_sync_file(struct file *file, loff_t start, loff_t end, int datasync)
        return f2fs_do_sync_file(file, start, end, datasync, false);
 }
 
-static bool __found_offset(struct address_space *mapping, block_t blkaddr,
-                               pgoff_t index, int whence)
+static bool __found_offset(struct address_space *mapping,
+               struct dnode_of_data *dn, pgoff_t index, int whence)
 {
+       block_t blkaddr = f2fs_data_blkaddr(dn);
+       struct inode *inode = mapping->host;
+       bool compressed_cluster = false;
+
+       if (f2fs_compressed_file(inode)) {
+               block_t first_blkaddr = data_blkaddr(dn->inode, dn->node_page,
+                   ALIGN_DOWN(dn->ofs_in_node, F2FS_I(inode)->i_cluster_size));
+
+               compressed_cluster = first_blkaddr == COMPRESS_ADDR;
+       }
+
        switch (whence) {
        case SEEK_DATA:
                if (__is_valid_data_blkaddr(blkaddr))
@@ -404,8 +416,12 @@ static bool __found_offset(struct address_space *mapping, block_t blkaddr,
                if (blkaddr == NEW_ADDR &&
                    xa_get_mark(&mapping->i_pages, index, PAGECACHE_TAG_DIRTY))
                        return true;
+               if (compressed_cluster)
+                       return true;
                break;
        case SEEK_HOLE:
+               if (compressed_cluster)
+                       return false;
                if (blkaddr == NULL_ADDR)
                        return true;
                break;
@@ -474,7 +490,7 @@ static loff_t f2fs_seek_block(struct file *file, loff_t offset, int whence)
                                goto fail;
                        }
 
-                       if (__found_offset(file->f_mapping, blkaddr,
+                       if (__found_offset(file->f_mapping, &dn,
                                                        pgofs, whence)) {
                                f2fs_put_dnode(&dn);
                                goto found;
@@ -590,8 +606,10 @@ void f2fs_truncate_data_blocks_range(struct dnode_of_data *dn, int count)
                f2fs_set_data_blkaddr(dn, NULL_ADDR);
 
                if (__is_valid_data_blkaddr(blkaddr)) {
-                       if (!f2fs_is_valid_blkaddr(sbi, blkaddr,
-                                       DATA_GENERIC_ENHANCE))
+                       if (time_to_inject(sbi, FAULT_BLKADDR_CONSISTENCE))
+                               continue;
+                       if (!f2fs_is_valid_blkaddr_raw(sbi, blkaddr,
+                                               DATA_GENERIC_ENHANCE))
                                continue;
                        if (compressed_cluster)
                                valid_blocks++;
@@ -818,8 +836,6 @@ static bool f2fs_force_buffered_io(struct inode *inode, int rw)
         */
        if (f2fs_sb_has_blkzoned(sbi) && (rw == WRITE))
                return true;
-       if (f2fs_lfs_mode(sbi) && rw == WRITE && F2FS_IO_ALIGNED(sbi))
-               return true;
        if (is_sbi_flag_set(sbi, SBI_CP_DISABLED))
                return true;
 
@@ -1192,7 +1208,6 @@ next_dnode:
                        !f2fs_is_valid_blkaddr(sbi, *blkaddr,
                                        DATA_GENERIC_ENHANCE)) {
                        f2fs_put_dnode(&dn);
-                       f2fs_handle_error(sbi, ERROR_INVALID_BLKADDR);
                        return -EFSCORRUPTED;
                }
 
@@ -1478,7 +1493,6 @@ static int f2fs_do_zero_range(struct dnode_of_data *dn, pgoff_t start,
                if (!f2fs_is_valid_blkaddr(sbi, dn->data_blkaddr,
                                        DATA_GENERIC_ENHANCE)) {
                        ret = -EFSCORRUPTED;
-                       f2fs_handle_error(sbi, ERROR_INVALID_BLKADDR);
                        break;
                }
 
@@ -1662,10 +1676,12 @@ static int f2fs_insert_range(struct inode *inode, loff_t offset, loff_t len)
        }
        filemap_invalidate_unlock(mapping);
        f2fs_up_write(&F2FS_I(inode)->i_gc_rwsem[WRITE]);
+       if (ret)
+               return ret;
 
        /* write out all moved pages, if possible */
        filemap_invalidate_lock(mapping);
-       filemap_write_and_wait_range(mapping, offset, LLONG_MAX);
+       ret = filemap_write_and_wait_range(mapping, offset, LLONG_MAX);
        truncate_pagecache(inode, offset);
        filemap_invalidate_unlock(mapping);
 
@@ -1731,9 +1747,11 @@ next_alloc:
 
                f2fs_down_write(&sbi->pin_sem);
 
-               f2fs_lock_op(sbi);
-               f2fs_allocate_new_section(sbi, CURSEG_COLD_DATA_PINNED, false);
-               f2fs_unlock_op(sbi);
+               err = f2fs_allocate_pinning_section(sbi);
+               if (err) {
+                       f2fs_up_write(&sbi->pin_sem);
+                       goto out_err;
+               }
 
                map.m_seg_type = CURSEG_COLD_DATA_PINNED;
                err = f2fs_map_blocks(inode, &map, F2FS_GET_BLOCK_PRE_DIO);
@@ -2066,7 +2084,8 @@ static int f2fs_ioc_start_atomic_write(struct file *filp, bool truncate)
 
        inode_lock(inode);
 
-       if (!f2fs_disable_compressed_file(inode)) {
+       if (!f2fs_disable_compressed_file(inode) ||
+                       f2fs_is_pinned_file(inode)) {
                ret = -EINVAL;
                goto out;
        }
@@ -2243,8 +2262,11 @@ static int f2fs_ioc_shutdown(struct file *filp, unsigned long arg)
        case F2FS_GOING_DOWN_METASYNC:
                /* do checkpoint only */
                ret = f2fs_sync_fs(sb, 1);
-               if (ret)
+               if (ret) {
+                       if (ret == -EIO)
+                               ret = 0;
                        goto out;
+               }
                f2fs_stop_checkpoint(sbi, false, STOP_CP_REASON_SHUTDOWN);
                break;
        case F2FS_GOING_DOWN_NOSYNC:
@@ -2260,6 +2282,8 @@ static int f2fs_ioc_shutdown(struct file *filp, unsigned long arg)
                set_sbi_flag(sbi, SBI_IS_DIRTY);
                /* do checkpoint only */
                ret = f2fs_sync_fs(sb, 1);
+               if (ret == -EIO)
+                       ret = 0;
                goto out;
        default:
                ret = -EINVAL;
@@ -2578,7 +2602,6 @@ static int f2fs_defragment_range(struct f2fs_sb_info *sbi,
                                        .m_may_create = false };
        struct extent_info ei = {};
        pgoff_t pg_start, pg_end, next_pgofs;
-       unsigned int blk_per_seg = sbi->blocks_per_seg;
        unsigned int total = 0, sec_num;
        block_t blk_end = 0;
        bool fragmented = false;
@@ -2687,7 +2710,8 @@ do_map:
                set_inode_flag(inode, FI_SKIP_WRITES);
 
                idx = map.m_lblk;
-               while (idx < map.m_lblk + map.m_len && cnt < blk_per_seg) {
+               while (idx < map.m_lblk + map.m_len &&
+                                               cnt < BLKS_PER_SEG(sbi)) {
                        struct page *page;
 
                        page = f2fs_get_lock_data_page(inode, idx, true);
@@ -2707,7 +2731,7 @@ do_map:
 
                map.m_lblk = idx;
 check:
-               if (map.m_lblk < pg_end && cnt < blk_per_seg)
+               if (map.m_lblk < pg_end && cnt < BLKS_PER_SEG(sbi))
                        goto do_map;
 
                clear_inode_flag(inode, FI_SKIP_WRITES);
@@ -2976,8 +3000,8 @@ static int f2fs_ioc_flush_device(struct file *filp, unsigned long arg)
 
        if (!f2fs_is_multi_device(sbi) || sbi->s_ndevs - 1 <= range.dev_num ||
                        __is_large_section(sbi)) {
-               f2fs_warn(sbi, "Can't flush %u in %d for segs_per_sec %u != 1",
-                         range.dev_num, sbi->s_ndevs, sbi->segs_per_sec);
+               f2fs_warn(sbi, "Can't flush %u in %d for SEGS_PER_SEC %u != 1",
+                         range.dev_num, sbi->s_ndevs, SEGS_PER_SEC(sbi));
                return -EINVAL;
        }
 
@@ -3183,6 +3207,7 @@ int f2fs_pin_file_control(struct inode *inode, bool inc)
 static int f2fs_ioc_set_pin_file(struct file *filp, unsigned long arg)
 {
        struct inode *inode = file_inode(filp);
+       struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
        __u32 pin;
        int ret = 0;
 
@@ -3192,7 +3217,7 @@ static int f2fs_ioc_set_pin_file(struct file *filp, unsigned long arg)
        if (!S_ISREG(inode->i_mode))
                return -EINVAL;
 
-       if (f2fs_readonly(F2FS_I_SB(inode)->sb))
+       if (f2fs_readonly(sbi->sb))
                return -EROFS;
 
        ret = mnt_want_write_file(filp);
@@ -3205,9 +3230,18 @@ static int f2fs_ioc_set_pin_file(struct file *filp, unsigned long arg)
                clear_inode_flag(inode, FI_PIN_FILE);
                f2fs_i_gc_failures_write(inode, 0);
                goto done;
+       } else if (f2fs_is_pinned_file(inode)) {
+               goto done;
        }
 
-       if (f2fs_should_update_outplace(inode, NULL)) {
+       if (f2fs_sb_has_blkzoned(sbi) && F2FS_HAS_BLOCKS(inode)) {
+               ret = -EFBIG;
+               goto out;
+       }
+
+       /* Let's allow file pinning on zoned device. */
+       if (!f2fs_sb_has_blkzoned(sbi) &&
+           f2fs_should_update_outplace(inode, NULL)) {
                ret = -EINVAL;
                goto out;
        }
@@ -3229,7 +3263,7 @@ static int f2fs_ioc_set_pin_file(struct file *filp, unsigned long arg)
        set_inode_flag(inode, FI_PIN_FILE);
        ret = F2FS_I(inode)->i_gc_failures[GC_FAILURE_PIN];
 done:
-       f2fs_update_time(F2FS_I_SB(inode), REQ_TIME);
+       f2fs_update_time(sbi, REQ_TIME);
 out:
        inode_unlock(inode);
        mnt_drop_write_file(filp);
@@ -3438,10 +3472,8 @@ static int release_compress_blocks(struct dnode_of_data *dn, pgoff_t count)
                if (!__is_valid_data_blkaddr(blkaddr))
                        continue;
                if (unlikely(!f2fs_is_valid_blkaddr(sbi, blkaddr,
-                                       DATA_GENERIC_ENHANCE))) {
-                       f2fs_handle_error(sbi, ERROR_INVALID_BLKADDR);
+                                       DATA_GENERIC_ENHANCE)))
                        return -EFSCORRUPTED;
-               }
        }
 
        while (count) {
@@ -3588,10 +3620,10 @@ out:
        return ret;
 }
 
-static int reserve_compress_blocks(struct dnode_of_data *dn, pgoff_t count)
+static int reserve_compress_blocks(struct dnode_of_data *dn, pgoff_t count,
+               unsigned int *reserved_blocks)
 {
        struct f2fs_sb_info *sbi = F2FS_I_SB(dn->inode);
-       unsigned int reserved_blocks = 0;
        int cluster_size = F2FS_I(dn->inode)->i_cluster_size;
        block_t blkaddr;
        int i;
@@ -3603,10 +3635,8 @@ static int reserve_compress_blocks(struct dnode_of_data *dn, pgoff_t count)
                if (!__is_valid_data_blkaddr(blkaddr))
                        continue;
                if (unlikely(!f2fs_is_valid_blkaddr(sbi, blkaddr,
-                                       DATA_GENERIC_ENHANCE))) {
-                       f2fs_handle_error(sbi, ERROR_INVALID_BLKADDR);
+                                       DATA_GENERIC_ENHANCE)))
                        return -EFSCORRUPTED;
-               }
        }
 
        while (count) {
@@ -3614,40 +3644,53 @@ static int reserve_compress_blocks(struct dnode_of_data *dn, pgoff_t count)
                blkcnt_t reserved;
                int ret;
 
-               for (i = 0; i < cluster_size; i++, dn->ofs_in_node++) {
-                       blkaddr = f2fs_data_blkaddr(dn);
+               for (i = 0; i < cluster_size; i++) {
+                       blkaddr = data_blkaddr(dn->inode, dn->node_page,
+                                               dn->ofs_in_node + i);
 
                        if (i == 0) {
-                               if (blkaddr == COMPRESS_ADDR)
-                                       continue;
-                               dn->ofs_in_node += cluster_size;
-                               goto next;
+                               if (blkaddr != COMPRESS_ADDR) {
+                                       dn->ofs_in_node += cluster_size;
+                                       goto next;
+                               }
+                               continue;
                        }
 
-                       if (__is_valid_data_blkaddr(blkaddr)) {
+                       /*
+                        * compressed cluster was not released due to it
+                        * fails in release_compress_blocks(), so NEW_ADDR
+                        * is a possible case.
+                        */
+                       if (blkaddr == NEW_ADDR ||
+                               __is_valid_data_blkaddr(blkaddr)) {
                                compr_blocks++;
                                continue;
                        }
-
-                       f2fs_set_data_blkaddr(dn, NEW_ADDR);
                }
 
                reserved = cluster_size - compr_blocks;
-               ret = inc_valid_block_count(sbi, dn->inode, &reserved);
-               if (ret)
+
+               /* for the case all blocks in cluster were reserved */
+               if (reserved == 1)
+                       goto next;
+
+               ret = inc_valid_block_count(sbi, dn->inode, &reserved, false);
+               if (unlikely(ret))
                        return ret;
 
-               if (reserved != cluster_size - compr_blocks)
-                       return -ENOSPC;
+               for (i = 0; i < cluster_size; i++, dn->ofs_in_node++) {
+                       if (f2fs_data_blkaddr(dn) == NULL_ADDR)
+                               f2fs_set_data_blkaddr(dn, NEW_ADDR);
+               }
 
                f2fs_i_compr_blocks_update(dn->inode, compr_blocks, true);
 
-               reserved_blocks += reserved;
+               *reserved_blocks += reserved;
 next:
                count -= cluster_size;
        }
 
-       return reserved_blocks;
+       return 0;
 }
 
 static int f2fs_reserve_compress_blocks(struct file *filp, unsigned long arg)
@@ -3671,9 +3714,6 @@ static int f2fs_reserve_compress_blocks(struct file *filp, unsigned long arg)
        if (ret)
                return ret;
 
-       if (atomic_read(&F2FS_I(inode)->i_compr_blocks))
-               goto out;
-
        f2fs_balance_fs(sbi, true);
 
        inode_lock(inode);
@@ -3683,6 +3723,9 @@ static int f2fs_reserve_compress_blocks(struct file *filp, unsigned long arg)
                goto unlock_inode;
        }
 
+       if (atomic_read(&F2FS_I(inode)->i_compr_blocks))
+               goto unlock_inode;
+
        f2fs_down_write(&F2FS_I(inode)->i_gc_rwsem[WRITE]);
        filemap_invalidate_lock(inode->i_mapping);
 
@@ -3708,7 +3751,7 @@ static int f2fs_reserve_compress_blocks(struct file *filp, unsigned long arg)
                count = min(end_offset - dn.ofs_in_node, last_idx - page_idx);
                count = round_up(count, F2FS_I(inode)->i_cluster_size);
 
-               ret = reserve_compress_blocks(&dn, count);
+               ret = reserve_compress_blocks(&dn, count, &reserved_blocks);
 
                f2fs_put_dnode(&dn);
 
@@ -3716,23 +3759,21 @@ static int f2fs_reserve_compress_blocks(struct file *filp, unsigned long arg)
                        break;
 
                page_idx += count;
-               reserved_blocks += ret;
        }
 
        filemap_invalidate_unlock(inode->i_mapping);
        f2fs_up_write(&F2FS_I(inode)->i_gc_rwsem[WRITE]);
 
-       if (ret >= 0) {
+       if (!ret) {
                clear_inode_flag(inode, FI_COMPRESS_RELEASED);
                inode_set_ctime_current(inode);
                f2fs_mark_inode_dirty_sync(inode, true);
        }
 unlock_inode:
        inode_unlock(inode);
-out:
        mnt_drop_write_file(filp);
 
-       if (ret >= 0) {
+       if (!ret) {
                ret = put_user(reserved_blocks, (u64 __user *)arg);
        } else if (reserved_blocks &&
                        atomic_read(&F2FS_I(inode)->i_compr_blocks)) {
@@ -3877,8 +3918,6 @@ static int f2fs_sec_trim_file(struct file *filp, unsigned long arg)
                                                DATA_GENERIC_ENHANCE)) {
                                ret = -EFSCORRUPTED;
                                f2fs_put_dnode(&dn);
-                               f2fs_handle_error(sbi,
-                                               ERROR_INVALID_BLKADDR);
                                goto out;
                        }
 
@@ -3981,16 +4020,20 @@ static int f2fs_ioc_set_compress_option(struct file *filp, unsigned long arg)
                                sizeof(option)))
                return -EFAULT;
 
-       if (!f2fs_compressed_file(inode) ||
-                       option.log_cluster_size < MIN_COMPRESS_LOG_SIZE ||
-                       option.log_cluster_size > MAX_COMPRESS_LOG_SIZE ||
-                       option.algorithm >= COMPRESS_MAX)
+       if (option.log_cluster_size < MIN_COMPRESS_LOG_SIZE ||
+               option.log_cluster_size > MAX_COMPRESS_LOG_SIZE ||
+               option.algorithm >= COMPRESS_MAX)
                return -EINVAL;
 
        file_start_write(filp);
        inode_lock(inode);
 
        f2fs_down_write(&F2FS_I(inode)->i_sem);
+       if (!f2fs_compressed_file(inode)) {
+               ret = -EINVAL;
+               goto out;
+       }
+
        if (f2fs_is_mmap_file(inode) || get_dirty_pages(inode)) {
                ret = -EBUSY;
                goto out;
@@ -4066,7 +4109,6 @@ static int f2fs_ioc_decompress_file(struct file *filp)
        struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
        struct f2fs_inode_info *fi = F2FS_I(inode);
        pgoff_t page_idx = 0, last_idx;
-       unsigned int blk_per_seg = sbi->blocks_per_seg;
        int cluster_size = fi->i_cluster_size;
        int count, ret;
 
@@ -4110,7 +4152,7 @@ static int f2fs_ioc_decompress_file(struct file *filp)
                if (ret < 0)
                        break;
 
-               if (get_dirty_pages(inode) >= blk_per_seg) {
+               if (get_dirty_pages(inode) >= BLKS_PER_SEG(sbi)) {
                        ret = filemap_fdatawrite(inode->i_mapping);
                        if (ret < 0)
                                break;
@@ -4145,7 +4187,6 @@ static int f2fs_ioc_compress_file(struct file *filp)
        struct inode *inode = file_inode(filp);
        struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
        pgoff_t page_idx = 0, last_idx;
-       unsigned int blk_per_seg = sbi->blocks_per_seg;
        int cluster_size = F2FS_I(inode)->i_cluster_size;
        int count, ret;
 
@@ -4188,7 +4229,7 @@ static int f2fs_ioc_compress_file(struct file *filp)
                if (ret < 0)
                        break;
 
-               if (get_dirty_pages(inode) >= blk_per_seg) {
+               if (get_dirty_pages(inode) >= BLKS_PER_SEG(sbi)) {
                        ret = filemap_fdatawrite(inode->i_mapping);
                        if (ret < 0)
                                break;
index a079eebfb080beba723be99e0db95203d6815c8f..8852814dab7f625a9e01032892ed586ad86f26f5 100644 (file)
@@ -259,7 +259,7 @@ static void select_policy(struct f2fs_sb_info *sbi, int gc_type,
                p->ofs_unit = 1;
        } else {
                p->gc_mode = select_gc_type(sbi, gc_type);
-               p->ofs_unit = sbi->segs_per_sec;
+               p->ofs_unit = SEGS_PER_SEC(sbi);
                if (__is_large_section(sbi)) {
                        p->dirty_bitmap = dirty_i->dirty_secmap;
                        p->max_search = count_bits(p->dirty_bitmap,
@@ -280,11 +280,11 @@ static void select_policy(struct f2fs_sb_info *sbi, int gc_type,
                        p->max_search > sbi->max_victim_search)
                p->max_search = sbi->max_victim_search;
 
-       /* let's select beginning hot/small space first in no_heap mode*/
+       /* let's select beginning hot/small space first*/
        if (f2fs_need_rand_seg(sbi))
-               p->offset = get_random_u32_below(MAIN_SECS(sbi) * sbi->segs_per_sec);
-       else if (test_opt(sbi, NOHEAP) &&
-               (type == CURSEG_HOT_DATA || IS_NODESEG(type)))
+               p->offset = get_random_u32_below(MAIN_SECS(sbi) *
+                                               SEGS_PER_SEC(sbi));
+       else if (type == CURSEG_HOT_DATA || IS_NODESEG(type))
                p->offset = 0;
        else
                p->offset = SIT_I(sbi)->last_victim[p->gc_mode];
@@ -295,13 +295,13 @@ static unsigned int get_max_cost(struct f2fs_sb_info *sbi,
 {
        /* SSR allocates in a segment unit */
        if (p->alloc_mode == SSR)
-               return sbi->blocks_per_seg;
+               return BLKS_PER_SEG(sbi);
        else if (p->alloc_mode == AT_SSR)
                return UINT_MAX;
 
        /* LFS */
        if (p->gc_mode == GC_GREEDY)
-               return 2 * sbi->blocks_per_seg * p->ofs_unit;
+               return SEGS_TO_BLKS(sbi, 2 * p->ofs_unit);
        else if (p->gc_mode == GC_CB)
                return UINT_MAX;
        else if (p->gc_mode == GC_AT)
@@ -348,7 +348,7 @@ static unsigned int get_cb_cost(struct f2fs_sb_info *sbi, unsigned int segno)
        mtime = div_u64(mtime, usable_segs_per_sec);
        vblocks = div_u64(vblocks, usable_segs_per_sec);
 
-       u = (vblocks * 100) >> sbi->log_blocks_per_seg;
+       u = BLKS_TO_SEGS(sbi, vblocks * 100);
 
        /* Handle if the system time has changed by the user */
        if (mtime < sit_i->min_mtime)
@@ -496,9 +496,9 @@ static void add_victim_entry(struct f2fs_sb_info *sbi,
                        return;
        }
 
-       for (i = 0; i < sbi->segs_per_sec; i++)
+       for (i = 0; i < SEGS_PER_SEC(sbi); i++)
                mtime += get_seg_entry(sbi, start + i)->mtime;
-       mtime = div_u64(mtime, sbi->segs_per_sec);
+       mtime = div_u64(mtime, SEGS_PER_SEC(sbi));
 
        /* Handle if the system time has changed by the user */
        if (mtime < sit_i->min_mtime)
@@ -599,7 +599,6 @@ static void atssr_lookup_victim(struct f2fs_sb_info *sbi,
        unsigned long long age;
        unsigned long long max_mtime = sit_i->dirty_max_mtime;
        unsigned long long min_mtime = sit_i->dirty_min_mtime;
-       unsigned int seg_blocks = sbi->blocks_per_seg;
        unsigned int vblocks;
        unsigned int dirty_threshold = max(am->max_candidate_count,
                                        am->candidate_ratio *
@@ -629,7 +628,7 @@ next_node:
        f2fs_bug_on(sbi, !vblocks);
 
        /* rare case */
-       if (vblocks == seg_blocks)
+       if (vblocks == BLKS_PER_SEG(sbi))
                goto skip_node;
 
        iter++;
@@ -755,7 +754,7 @@ int f2fs_get_victim(struct f2fs_sb_info *sbi, unsigned int *result,
        int ret = 0;
 
        mutex_lock(&dirty_i->seglist_lock);
-       last_segment = MAIN_SECS(sbi) * sbi->segs_per_sec;
+       last_segment = MAIN_SECS(sbi) * SEGS_PER_SEC(sbi);
 
        p.alloc_mode = alloc_mode;
        p.age = age;
@@ -896,7 +895,7 @@ next:
                        else
                                sm->last_victim[p.gc_mode] = segno + p.ofs_unit;
                        sm->last_victim[p.gc_mode] %=
-                               (MAIN_SECS(sbi) * sbi->segs_per_sec);
+                               (MAIN_SECS(sbi) * SEGS_PER_SEC(sbi));
                        break;
                }
        }
@@ -1184,7 +1183,6 @@ static int ra_data_block(struct inode *inode, pgoff_t index)
                .op_flags = 0,
                .encrypted_page = NULL,
                .in_list = 0,
-               .retry = 0,
        };
        int err;
 
@@ -1197,7 +1195,6 @@ static int ra_data_block(struct inode *inode, pgoff_t index)
                if (unlikely(!f2fs_is_valid_blkaddr(sbi, dn.data_blkaddr,
                                                DATA_GENERIC_ENHANCE_READ))) {
                        err = -EFSCORRUPTED;
-                       f2fs_handle_error(sbi, ERROR_INVALID_BLKADDR);
                        goto put_page;
                }
                goto got_it;
@@ -1216,7 +1213,6 @@ static int ra_data_block(struct inode *inode, pgoff_t index)
        if (unlikely(!f2fs_is_valid_blkaddr(sbi, dn.data_blkaddr,
                                                DATA_GENERIC_ENHANCE))) {
                err = -EFSCORRUPTED;
-               f2fs_handle_error(sbi, ERROR_INVALID_BLKADDR);
                goto put_page;
        }
 got_it:
@@ -1273,7 +1269,6 @@ static int move_data_block(struct inode *inode, block_t bidx,
                .op_flags = 0,
                .encrypted_page = NULL,
                .in_list = 0,
-               .retry = 0,
        };
        struct dnode_of_data dn;
        struct f2fs_summary sum;
@@ -1364,8 +1359,13 @@ static int move_data_block(struct inode *inode, block_t bidx,
        set_summary(&sum, dn.nid, dn.ofs_in_node, ni.version);
 
        /* allocate block address */
-       f2fs_allocate_data_block(fio.sbi, NULL, fio.old_blkaddr, &newaddr,
+       err = f2fs_allocate_data_block(fio.sbi, NULL, fio.old_blkaddr, &newaddr,
                                &sum, type, NULL);
+       if (err) {
+               f2fs_put_page(mpage, 1);
+               /* filesystem should shutdown, no need to recovery block */
+               goto up_out;
+       }
 
        fio.encrypted_page = f2fs_pagecache_get_page(META_MAPPING(fio.sbi),
                                newaddr, FGP_LOCK | FGP_CREAT, GFP_NOFS);
@@ -1393,18 +1393,12 @@ static int move_data_block(struct inode *inode, block_t bidx,
        fio.op_flags = REQ_SYNC;
        fio.new_blkaddr = newaddr;
        f2fs_submit_page_write(&fio);
-       if (fio.retry) {
-               err = -EAGAIN;
-               if (PageWriteback(fio.encrypted_page))
-                       end_page_writeback(fio.encrypted_page);
-               goto put_page_out;
-       }
 
        f2fs_update_iostat(fio.sbi, NULL, FS_GC_DATA_IO, F2FS_BLKSIZE);
 
        f2fs_update_data_blkaddr(&dn, newaddr);
        set_inode_flag(inode, FI_APPEND_WRITE);
-put_page_out:
+
        f2fs_put_page(fio.encrypted_page, 1);
 recover_block:
        if (err)
@@ -1678,7 +1672,7 @@ static int do_garbage_collect(struct f2fs_sb_info *sbi,
        struct f2fs_summary_block *sum;
        struct blk_plug plug;
        unsigned int segno = start_segno;
-       unsigned int end_segno = start_segno + sbi->segs_per_sec;
+       unsigned int end_segno = start_segno + SEGS_PER_SEC(sbi);
        int seg_freed = 0, migrated = 0;
        unsigned char type = IS_DATASEG(get_seg_entry(sbi, segno)->type) ?
                                                SUM_TYPE_DATA : SUM_TYPE_NODE;
@@ -1686,7 +1680,7 @@ static int do_garbage_collect(struct f2fs_sb_info *sbi,
        int submitted = 0;
 
        if (__is_large_section(sbi))
-               end_segno = rounddown(end_segno, sbi->segs_per_sec);
+               end_segno = rounddown(end_segno, SEGS_PER_SEC(sbi));
 
        /*
         * zone-capacity can be less than zone-size in zoned devices,
@@ -1694,7 +1688,7 @@ static int do_garbage_collect(struct f2fs_sb_info *sbi,
         * calculate the end segno in the zone which can be garbage collected
         */
        if (f2fs_sb_has_blkzoned(sbi))
-               end_segno -= sbi->segs_per_sec -
+               end_segno -= SEGS_PER_SEC(sbi) -
                                        f2fs_usable_segs_in_sec(sbi, segno);
 
        sanity_check_seg_type(sbi, get_seg_entry(sbi, segno)->type);
@@ -1983,10 +1977,43 @@ void f2fs_build_gc_manager(struct f2fs_sb_info *sbi)
        init_atgc_management(sbi);
 }
 
+int f2fs_gc_range(struct f2fs_sb_info *sbi,
+               unsigned int start_seg, unsigned int end_seg,
+               bool dry_run, unsigned int dry_run_sections)
+{
+       unsigned int segno;
+       unsigned int gc_secs = dry_run_sections;
+
+       if (unlikely(f2fs_cp_error(sbi)))
+               return -EIO;
+
+       for (segno = start_seg; segno <= end_seg; segno += SEGS_PER_SEC(sbi)) {
+               struct gc_inode_list gc_list = {
+                       .ilist = LIST_HEAD_INIT(gc_list.ilist),
+                       .iroot = RADIX_TREE_INIT(gc_list.iroot, GFP_NOFS),
+               };
+
+               do_garbage_collect(sbi, segno, &gc_list, FG_GC,
+                                               dry_run_sections == 0);
+               put_gc_inode(&gc_list);
+
+               if (!dry_run && get_valid_blocks(sbi, segno, true))
+                       return -EAGAIN;
+               if (dry_run && dry_run_sections &&
+                   !get_valid_blocks(sbi, segno, true) && --gc_secs == 0)
+                       break;
+
+               if (fatal_signal_pending(current))
+                       return -ERESTARTSYS;
+       }
+
+       return 0;
+}
+
 static int free_segment_range(struct f2fs_sb_info *sbi,
-                               unsigned int secs, bool gc_only)
+                               unsigned int secs, bool dry_run)
 {
-       unsigned int segno, next_inuse, start, end;
+       unsigned int next_inuse, start, end;
        struct cp_control cpc = { CP_RESIZE, 0, 0, 0 };
        int gc_mode, gc_type;
        int err = 0;
@@ -1994,7 +2021,7 @@ static int free_segment_range(struct f2fs_sb_info *sbi,
 
        /* Force block allocation for GC */
        MAIN_SECS(sbi) -= secs;
-       start = MAIN_SECS(sbi) * sbi->segs_per_sec;
+       start = MAIN_SECS(sbi) * SEGS_PER_SEC(sbi);
        end = MAIN_SEGS(sbi) - 1;
 
        mutex_lock(&DIRTY_I(sbi)->seglist_lock);
@@ -2008,29 +2035,15 @@ static int free_segment_range(struct f2fs_sb_info *sbi,
        mutex_unlock(&DIRTY_I(sbi)->seglist_lock);
 
        /* Move out cursegs from the target range */
-       for (type = CURSEG_HOT_DATA; type < NR_CURSEG_PERSIST_TYPE; type++)
-               f2fs_allocate_segment_for_resize(sbi, type, start, end);
-
-       /* do GC to move out valid blocks in the range */
-       for (segno = start; segno <= end; segno += sbi->segs_per_sec) {
-               struct gc_inode_list gc_list = {
-                       .ilist = LIST_HEAD_INIT(gc_list.ilist),
-                       .iroot = RADIX_TREE_INIT(gc_list.iroot, GFP_NOFS),
-               };
-
-               do_garbage_collect(sbi, segno, &gc_list, FG_GC, true);
-               put_gc_inode(&gc_list);
-
-               if (!gc_only && get_valid_blocks(sbi, segno, true)) {
-                       err = -EAGAIN;
-                       goto out;
-               }
-               if (fatal_signal_pending(current)) {
-                       err = -ERESTARTSYS;
+       for (type = CURSEG_HOT_DATA; type < NR_CURSEG_PERSIST_TYPE; type++) {
+               err = f2fs_allocate_segment_for_resize(sbi, type, start, end);
+               if (err)
                        goto out;
-               }
        }
-       if (gc_only)
+
+       /* do GC to move out valid blocks in the range */
+       err = f2fs_gc_range(sbi, start, end, dry_run, 0);
+       if (err || dry_run)
                goto out;
 
        stat_inc_cp_call_count(sbi, TOTAL_CALL);
@@ -2056,7 +2069,7 @@ static void update_sb_metadata(struct f2fs_sb_info *sbi, int secs)
        int segment_count;
        int segment_count_main;
        long long block_count;
-       int segs = secs * sbi->segs_per_sec;
+       int segs = secs * SEGS_PER_SEC(sbi);
 
        f2fs_down_write(&sbi->sb_lock);
 
@@ -2069,7 +2082,7 @@ static void update_sb_metadata(struct f2fs_sb_info *sbi, int secs)
        raw_sb->segment_count = cpu_to_le32(segment_count + segs);
        raw_sb->segment_count_main = cpu_to_le32(segment_count_main + segs);
        raw_sb->block_count = cpu_to_le64(block_count +
-                                       (long long)segs * sbi->blocks_per_seg);
+                       (long long)SEGS_TO_BLKS(sbi, segs));
        if (f2fs_is_multi_device(sbi)) {
                int last_dev = sbi->s_ndevs - 1;
                int dev_segs =
@@ -2084,8 +2097,8 @@ static void update_sb_metadata(struct f2fs_sb_info *sbi, int secs)
 
 static void update_fs_metadata(struct f2fs_sb_info *sbi, int secs)
 {
-       int segs = secs * sbi->segs_per_sec;
-       long long blks = (long long)segs * sbi->blocks_per_seg;
+       int segs = secs * SEGS_PER_SEC(sbi);
+       long long blks = SEGS_TO_BLKS(sbi, segs);
        long long user_block_count =
                                le64_to_cpu(F2FS_CKPT(sbi)->user_block_count);
 
@@ -2127,7 +2140,7 @@ int f2fs_resize_fs(struct file *filp, __u64 block_count)
                int last_dev = sbi->s_ndevs - 1;
                __u64 last_segs = FDEV(last_dev).total_segments;
 
-               if (block_count + last_segs * sbi->blocks_per_seg <=
+               if (block_count + SEGS_TO_BLKS(sbi, last_segs) <=
                                                                old_block_count)
                        return -EINVAL;
        }
index 28a00942802c216cc77ad3d5910f14329e3e2b60..9c0d06c4d19a91f61757c4d9a5babbeb4a07729d 100644 (file)
@@ -96,7 +96,7 @@ static inline block_t free_segs_blk_count(struct f2fs_sb_info *sbi)
        if (f2fs_sb_has_blkzoned(sbi))
                return free_segs_blk_count_zoned(sbi);
 
-       return free_segments(sbi) << sbi->log_blocks_per_seg;
+       return SEGS_TO_BLKS(sbi, free_segments(sbi));
 }
 
 static inline block_t free_user_blocks(struct f2fs_sb_info *sbi)
@@ -104,7 +104,7 @@ static inline block_t free_user_blocks(struct f2fs_sb_info *sbi)
        block_t free_blks, ovp_blks;
 
        free_blks = free_segs_blk_count(sbi);
-       ovp_blks = overprovision_segments(sbi) << sbi->log_blocks_per_seg;
+       ovp_blks = SEGS_TO_BLKS(sbi, overprovision_segments(sbi));
 
        if (free_blks < ovp_blks)
                return 0;
index f7f63a567d869d66e024b0334a06e1cc83c8b6f4..e54f8c08bda832b2b25c93495b796fa943a13d8e 100644 (file)
@@ -851,7 +851,7 @@ out:
 
 static int __f2fs_tmpfile(struct mnt_idmap *idmap, struct inode *dir,
                          struct file *file, umode_t mode, bool is_whiteout,
-                         struct inode **new_inode)
+                         struct inode **new_inode, struct f2fs_filename *fname)
 {
        struct f2fs_sb_info *sbi = F2FS_I_SB(dir);
        struct inode *inode;
@@ -879,7 +879,7 @@ static int __f2fs_tmpfile(struct mnt_idmap *idmap, struct inode *dir,
        if (err)
                goto out;
 
-       err = f2fs_do_tmpfile(inode, dir);
+       err = f2fs_do_tmpfile(inode, dir, fname);
        if (err)
                goto release_out;
 
@@ -930,22 +930,24 @@ static int f2fs_tmpfile(struct mnt_idmap *idmap, struct inode *dir,
        if (!f2fs_is_checkpoint_ready(sbi))
                return -ENOSPC;
 
-       err = __f2fs_tmpfile(idmap, dir, file, mode, false, NULL);
+       err = __f2fs_tmpfile(idmap, dir, file, mode, false, NULL, NULL);
 
        return finish_open_simple(file, err);
 }
 
 static int f2fs_create_whiteout(struct mnt_idmap *idmap,
-                               struct inode *dir, struct inode **whiteout)
+                               struct inode *dir, struct inode **whiteout,
+                               struct f2fs_filename *fname)
 {
-       return __f2fs_tmpfile(idmap, dir, NULL,
-                               S_IFCHR | WHITEOUT_MODE, true, whiteout);
+       return __f2fs_tmpfile(idmap, dir, NULL, S_IFCHR | WHITEOUT_MODE,
+                                               true, whiteout, fname);
 }
 
 int f2fs_get_tmpfile(struct mnt_idmap *idmap, struct inode *dir,
                     struct inode **new_inode)
 {
-       return __f2fs_tmpfile(idmap, dir, NULL, S_IFREG, false, new_inode);
+       return __f2fs_tmpfile(idmap, dir, NULL, S_IFREG,
+                               false, new_inode, NULL);
 }
 
 static int f2fs_rename(struct mnt_idmap *idmap, struct inode *old_dir,
@@ -989,7 +991,14 @@ static int f2fs_rename(struct mnt_idmap *idmap, struct inode *old_dir,
        }
 
        if (flags & RENAME_WHITEOUT) {
-               err = f2fs_create_whiteout(idmap, old_dir, &whiteout);
+               struct f2fs_filename fname;
+
+               err = f2fs_setup_filename(old_dir, &old_dentry->d_name,
+                                                       0, &fname);
+               if (err)
+                       return err;
+
+               err = f2fs_create_whiteout(idmap, old_dir, &whiteout, &fname);
                if (err)
                        return err;
        }
@@ -1104,14 +1113,11 @@ static int f2fs_rename(struct mnt_idmap *idmap, struct inode *old_dir,
                iput(whiteout);
        }
 
-       if (old_is_dir) {
-               if (old_dir_entry)
-                       f2fs_set_link(old_inode, old_dir_entry,
-                                               old_dir_page, new_dir);
-               else
-                       f2fs_put_page(old_dir_page, 0);
+       if (old_dir_entry)
+               f2fs_set_link(old_inode, old_dir_entry, old_dir_page, new_dir);
+       if (old_is_dir)
                f2fs_i_links_write(old_dir, false);
-       }
+
        if (F2FS_OPTION(sbi).fsync_mode == FSYNC_MODE_STRICT) {
                f2fs_add_ino_entry(sbi, new_dir->i_ino, TRANS_DIR_INO);
                if (S_ISDIR(old_inode->i_mode))
index 9b546fd2101004c306d83bb5a33a11e53a88b7c7..b3de6d6cdb02199c13808bfaabbc8e742b52a067 100644 (file)
@@ -852,21 +852,29 @@ int f2fs_get_dnode_of_data(struct dnode_of_data *dn, pgoff_t index, int mode)
 
        if (is_inode_flag_set(dn->inode, FI_COMPRESSED_FILE) &&
                                        f2fs_sb_has_readonly(sbi)) {
-               unsigned int c_len = f2fs_cluster_blocks_are_contiguous(dn);
+               unsigned int cluster_size = F2FS_I(dn->inode)->i_cluster_size;
+               unsigned int ofs_in_node = dn->ofs_in_node;
+               pgoff_t fofs = index;
+               unsigned int c_len;
                block_t blkaddr;
 
+               /* should align fofs and ofs_in_node to cluster_size */
+               if (fofs % cluster_size) {
+                       fofs = round_down(fofs, cluster_size);
+                       ofs_in_node = round_down(ofs_in_node, cluster_size);
+               }
+
+               c_len = f2fs_cluster_blocks_are_contiguous(dn, ofs_in_node);
                if (!c_len)
                        goto out;
 
-               blkaddr = f2fs_data_blkaddr(dn);
+               blkaddr = data_blkaddr(dn->inode, dn->node_page, ofs_in_node);
                if (blkaddr == COMPRESS_ADDR)
                        blkaddr = data_blkaddr(dn->inode, dn->node_page,
-                                               dn->ofs_in_node + 1);
+                                               ofs_in_node + 1);
 
                f2fs_update_read_extent_tree_range_compressed(dn->inode,
-                                       index, blkaddr,
-                                       F2FS_I(dn->inode)->i_cluster_size,
-                                       c_len);
+                                       fofs, blkaddr, cluster_size, c_len);
        }
 out:
        return 0;
@@ -1919,7 +1927,7 @@ void f2fs_flush_inline_data(struct f2fs_sb_info *sbi)
                for (i = 0; i < nr_folios; i++) {
                        struct page *page = &fbatch.folios[i]->page;
 
-                       if (!IS_DNODE(page))
+                       if (!IS_INODE(page))
                                continue;
 
                        lock_page(page);
@@ -2841,7 +2849,7 @@ int f2fs_restore_node_summary(struct f2fs_sb_info *sbi,
        int i, idx, last_offset, nrpages;
 
        /* scan the node segment */
-       last_offset = sbi->blocks_per_seg;
+       last_offset = BLKS_PER_SEG(sbi);
        addr = START_BLOCK(sbi, segno);
        sum_entry = &sum->entries[0];
 
@@ -3158,7 +3166,7 @@ static int __get_nat_bitmaps(struct f2fs_sb_info *sbi)
        if (!is_set_ckpt_flags(sbi, CP_NAT_BITS_FLAG))
                return 0;
 
-       nat_bits_addr = __start_cp_addr(sbi) + sbi->blocks_per_seg -
+       nat_bits_addr = __start_cp_addr(sbi) + BLKS_PER_SEG(sbi) -
                                                nm_i->nat_bits_blocks;
        for (i = 0; i < nm_i->nat_bits_blocks; i++) {
                struct page *page;
index 5bd16a95eef8f13bb5465faa166c2e74d7bc838c..6aea13024ac1659f83fe412849bbb64cb0ec2abd 100644 (file)
@@ -208,10 +208,10 @@ static inline pgoff_t current_nat_addr(struct f2fs_sb_info *sbi, nid_t start)
 
        block_addr = (pgoff_t)(nm_i->nat_blkaddr +
                (block_off << 1) -
-               (block_off & (sbi->blocks_per_seg - 1)));
+               (block_off & (BLKS_PER_SEG(sbi) - 1)));
 
        if (f2fs_test_bit(block_off, nm_i->nat_bitmap))
-               block_addr += sbi->blocks_per_seg;
+               block_addr += BLKS_PER_SEG(sbi);
 
        return block_addr;
 }
index d0f24ccbd1ac6e99d2b67ec2f6529c274ee583c4..e7bf15b8240ae613ec82f8b2be4049c77dccdce9 100644 (file)
@@ -354,7 +354,7 @@ static unsigned int adjust_por_ra_blocks(struct f2fs_sb_info *sbi,
        if (blkaddr + 1 == next_blkaddr)
                ra_blocks = min_t(unsigned int, RECOVERY_MAX_RA_BLOCKS,
                                                        ra_blocks * 2);
-       else if (next_blkaddr % sbi->blocks_per_seg)
+       else if (next_blkaddr % BLKS_PER_SEG(sbi))
                ra_blocks = max_t(unsigned int, RECOVERY_MIN_RA_BLOCKS,
                                                        ra_blocks / 2);
        return ra_blocks;
@@ -611,6 +611,19 @@ truncate_out:
        return 0;
 }
 
+static int f2fs_reserve_new_block_retry(struct dnode_of_data *dn)
+{
+       int i, err = 0;
+
+       for (i = DEFAULT_FAILURE_RETRY_COUNT; i > 0; i--) {
+               err = f2fs_reserve_new_block(dn);
+               if (!err)
+                       break;
+       }
+
+       return err;
+}
+
 static int do_recover_data(struct f2fs_sb_info *sbi, struct inode *inode,
                                        struct page *page)
 {
@@ -680,14 +693,12 @@ retry_dn:
                if (__is_valid_data_blkaddr(src) &&
                        !f2fs_is_valid_blkaddr(sbi, src, META_POR)) {
                        err = -EFSCORRUPTED;
-                       f2fs_handle_error(sbi, ERROR_INVALID_BLKADDR);
                        goto err;
                }
 
                if (__is_valid_data_blkaddr(dest) &&
                        !f2fs_is_valid_blkaddr(sbi, dest, META_POR)) {
                        err = -EFSCORRUPTED;
-                       f2fs_handle_error(sbi, ERROR_INVALID_BLKADDR);
                        goto err;
                }
 
@@ -712,14 +723,8 @@ retry_dn:
                 */
                if (dest == NEW_ADDR) {
                        f2fs_truncate_data_blocks_range(&dn, 1);
-                       do {
-                               err = f2fs_reserve_new_block(&dn);
-                               if (err == -ENOSPC) {
-                                       f2fs_bug_on(sbi, 1);
-                                       break;
-                               }
-                       } while (err &&
-                               IS_ENABLED(CONFIG_F2FS_FAULT_INJECTION));
+
+                       err = f2fs_reserve_new_block_retry(&dn);
                        if (err)
                                goto err;
                        continue;
@@ -727,16 +732,8 @@ retry_dn:
 
                /* dest is valid block, try to recover from src to dest */
                if (f2fs_is_valid_blkaddr(sbi, dest, META_POR)) {
-
                        if (src == NULL_ADDR) {
-                               do {
-                                       err = f2fs_reserve_new_block(&dn);
-                                       if (err == -ENOSPC) {
-                                               f2fs_bug_on(sbi, 1);
-                                               break;
-                                       }
-                               } while (err &&
-                                       IS_ENABLED(CONFIG_F2FS_FAULT_INJECTION));
+                               err = f2fs_reserve_new_block_retry(&dn);
                                if (err)
                                        goto err;
                        }
@@ -756,8 +753,6 @@ retry_prev:
                                f2fs_err(sbi, "Inconsistent dest blkaddr:%u, ino:%lu, ofs:%u",
                                        dest, inode->i_ino, dn.ofs_in_node);
                                err = -EFSCORRUPTED;
-                               f2fs_handle_error(sbi,
-                                               ERROR_INVALID_BLKADDR);
                                goto err;
                        }
 
@@ -852,7 +847,7 @@ next:
                f2fs_ra_meta_pages_cond(sbi, blkaddr, ra_blocks);
        }
        if (!err)
-               f2fs_allocate_new_segments(sbi);
+               err = f2fs_allocate_new_segments(sbi);
        return err;
 }
 
@@ -864,7 +859,6 @@ int f2fs_recover_fsync_data(struct f2fs_sb_info *sbi, bool check_only)
        int ret = 0;
        unsigned long s_flags = sbi->sb->s_flags;
        bool need_writecp = false;
-       bool fix_curseg_write_pointer = false;
 
        if (is_sbi_flag_set(sbi, SBI_IS_WRITABLE))
                f2fs_info(sbi, "recover fsync data on readonly fs");
@@ -895,8 +889,6 @@ int f2fs_recover_fsync_data(struct f2fs_sb_info *sbi, bool check_only)
        else
                f2fs_bug_on(sbi, sbi->sb->s_flags & SB_ACTIVE);
 skip:
-       fix_curseg_write_pointer = !check_only || list_empty(&inode_list);
-
        destroy_fsync_dnodes(&inode_list, err);
        destroy_fsync_dnodes(&tmp_inode_list, err);
 
@@ -914,11 +906,13 @@ skip:
         * and the f2fs is not read only, check and fix zoned block devices'
         * write pointer consistency.
         */
-       if (!err && fix_curseg_write_pointer && !f2fs_readonly(sbi->sb) &&
-                       f2fs_sb_has_blkzoned(sbi)) {
-               err = f2fs_fix_curseg_write_pointer(sbi);
-               if (!err)
-                       err = f2fs_check_write_pointer(sbi);
+       if (f2fs_sb_has_blkzoned(sbi) && !f2fs_readonly(sbi->sb)) {
+               int err2 = f2fs_fix_curseg_write_pointer(sbi);
+
+               if (!err2)
+                       err2 = f2fs_check_write_pointer(sbi);
+               if (err2)
+                       err = err2;
                ret = err;
        }
 
index e1065ba702076131067091fe173b19edab41b5d1..4fd76e867e0a2efa772fda10e9f01a870c2edcd2 100644 (file)
@@ -192,6 +192,9 @@ void f2fs_abort_atomic_write(struct inode *inode, bool clean)
        if (!f2fs_is_atomic_file(inode))
                return;
 
+       if (clean)
+               truncate_inode_pages_final(inode->i_mapping);
+
        release_atomic_write_cnt(inode);
        clear_inode_flag(inode, FI_ATOMIC_COMMITTED);
        clear_inode_flag(inode, FI_ATOMIC_REPLACE);
@@ -201,7 +204,6 @@ void f2fs_abort_atomic_write(struct inode *inode, bool clean)
        F2FS_I(inode)->atomic_write_task = NULL;
 
        if (clean) {
-               truncate_inode_pages_final(inode->i_mapping);
                f2fs_i_size_write(inode, fi->original_i_size);
                fi->original_i_size = 0;
        }
@@ -248,7 +250,7 @@ retry:
        } else {
                blkcnt_t count = 1;
 
-               err = inc_valid_block_count(sbi, inode, &count);
+               err = inc_valid_block_count(sbi, inode, &count, true);
                if (err) {
                        f2fs_put_dnode(&dn);
                        return err;
@@ -334,8 +336,6 @@ static int __f2fs_commit_atomic_write(struct inode *inode)
                                        DATA_GENERIC_ENHANCE)) {
                                f2fs_put_dnode(&dn);
                                ret = -EFSCORRUPTED;
-                               f2fs_handle_error(sbi,
-                                               ERROR_INVALID_BLKADDR);
                                goto out;
                        }
 
@@ -400,6 +400,9 @@ int f2fs_commit_atomic_write(struct inode *inode)
  */
 void f2fs_balance_fs(struct f2fs_sb_info *sbi, bool need)
 {
+       if (f2fs_cp_error(sbi))
+               return;
+
        if (time_to_inject(sbi, FAULT_CHECKPOINT))
                f2fs_stop_checkpoint(sbi, false, STOP_CP_REASON_FAULT_INJECT);
 
@@ -448,8 +451,8 @@ static inline bool excess_dirty_threshold(struct f2fs_sb_info *sbi)
        unsigned int nodes = get_pages(sbi, F2FS_DIRTY_NODES);
        unsigned int meta = get_pages(sbi, F2FS_DIRTY_META);
        unsigned int imeta = get_pages(sbi, F2FS_DIRTY_IMETA);
-       unsigned int threshold = sbi->blocks_per_seg * factor *
-                                       DEFAULT_DIRTY_THRESHOLD;
+       unsigned int threshold =
+               SEGS_TO_BLKS(sbi, (factor * DEFAULT_DIRTY_THRESHOLD));
        unsigned int global_threshold = threshold * 3 / 2;
 
        if (dents >= threshold || qdata >= threshold ||
@@ -872,7 +875,7 @@ block_t f2fs_get_unusable_blocks(struct f2fs_sb_info *sbi)
 {
        int ovp_hole_segs =
                (overprovision_segments(sbi) - reserved_segments(sbi));
-       block_t ovp_holes = ovp_hole_segs << sbi->log_blocks_per_seg;
+       block_t ovp_holes = SEGS_TO_BLKS(sbi, ovp_hole_segs);
        struct dirty_seglist_info *dirty_i = DIRTY_I(sbi);
        block_t holes[2] = {0, 0};      /* DATA and NODE */
        block_t unusable;
@@ -901,11 +904,16 @@ int f2fs_disable_cp_again(struct f2fs_sb_info *sbi, block_t unusable)
 {
        int ovp_hole_segs =
                (overprovision_segments(sbi) - reserved_segments(sbi));
+
+       if (F2FS_OPTION(sbi).unusable_cap_perc == 100)
+               return 0;
        if (unusable > F2FS_OPTION(sbi).unusable_cap)
                return -EAGAIN;
        if (is_sbi_flag_set(sbi, SBI_CP_DISABLED_QUICK) &&
                dirty_segments(sbi) > ovp_hole_segs)
                return -EAGAIN;
+       if (has_not_enough_free_secs(sbi, 0, 0))
+               return -EAGAIN;
        return 0;
 }
 
@@ -1132,8 +1140,7 @@ static void __check_sit_bitmap(struct f2fs_sb_info *sbi,
        struct seg_entry *sentry;
        unsigned int segno;
        block_t blk = start;
-       unsigned long offset, size, max_blocks = sbi->blocks_per_seg;
-       unsigned long *map;
+       unsigned long offset, size, *map;
 
        while (blk < end) {
                segno = GET_SEGNO(sbi, blk);
@@ -1143,7 +1150,7 @@ static void __check_sit_bitmap(struct f2fs_sb_info *sbi,
                if (end < START_BLOCK(sbi, segno + 1))
                        size = GET_BLKOFF_FROM_SEG0(sbi, end);
                else
-                       size = max_blocks;
+                       size = BLKS_PER_SEG(sbi);
                map = (unsigned long *)(sentry->cur_valid_map);
                offset = __find_rev_next_bit(map, size, offset);
                f2fs_bug_on(sbi, offset != size);
@@ -2048,7 +2055,6 @@ static bool add_discard_addrs(struct f2fs_sb_info *sbi, struct cp_control *cpc,
                                                        bool check_only)
 {
        int entries = SIT_VBLOCK_MAP_SIZE / sizeof(unsigned long);
-       int max_blocks = sbi->blocks_per_seg;
        struct seg_entry *se = get_seg_entry(sbi, cpc->trim_start);
        unsigned long *cur_map = (unsigned long *)se->cur_valid_map;
        unsigned long *ckpt_map = (unsigned long *)se->ckpt_valid_map;
@@ -2060,8 +2066,9 @@ static bool add_discard_addrs(struct f2fs_sb_info *sbi, struct cp_control *cpc,
        struct list_head *head = &SM_I(sbi)->dcc_info->entry_list;
        int i;
 
-       if (se->valid_blocks == max_blocks || !f2fs_hw_support_discard(sbi) ||
-                       !f2fs_block_unit_discard(sbi))
+       if (se->valid_blocks == BLKS_PER_SEG(sbi) ||
+           !f2fs_hw_support_discard(sbi) ||
+           !f2fs_block_unit_discard(sbi))
                return false;
 
        if (!force) {
@@ -2078,13 +2085,14 @@ static bool add_discard_addrs(struct f2fs_sb_info *sbi, struct cp_control *cpc,
 
        while (force || SM_I(sbi)->dcc_info->nr_discards <=
                                SM_I(sbi)->dcc_info->max_discards) {
-               start = __find_rev_next_bit(dmap, max_blocks, end + 1);
-               if (start >= max_blocks)
+               start = __find_rev_next_bit(dmap, BLKS_PER_SEG(sbi), end + 1);
+               if (start >= BLKS_PER_SEG(sbi))
                        break;
 
-               end = __find_rev_next_zero_bit(dmap, max_blocks, start + 1);
-               if (force && start && end != max_blocks
-                                       && (end - start) < cpc->trim_minlen)
+               end = __find_rev_next_zero_bit(dmap,
+                                               BLKS_PER_SEG(sbi), start + 1);
+               if (force && start && end != BLKS_PER_SEG(sbi) &&
+                   (end - start) < cpc->trim_minlen)
                        continue;
 
                if (check_only)
@@ -2166,8 +2174,8 @@ void f2fs_clear_prefree_segments(struct f2fs_sb_info *sbi,
                                                                start + 1);
 
                if (section_alignment) {
-                       start = rounddown(start, sbi->segs_per_sec);
-                       end = roundup(end, sbi->segs_per_sec);
+                       start = rounddown(start, SEGS_PER_SEC(sbi));
+                       end = roundup(end, SEGS_PER_SEC(sbi));
                }
 
                for (i = start; i < end; i++) {
@@ -2186,7 +2194,7 @@ void f2fs_clear_prefree_segments(struct f2fs_sb_info *sbi,
                if (!f2fs_sb_has_blkzoned(sbi) &&
                    (!f2fs_lfs_mode(sbi) || !__is_large_section(sbi))) {
                        f2fs_issue_discard(sbi, START_BLOCK(sbi, start),
-                               (end - start) << sbi->log_blocks_per_seg);
+                               SEGS_TO_BLKS(sbi, end - start));
                        continue;
                }
 next:
@@ -2195,9 +2203,9 @@ next:
                if (!IS_CURSEC(sbi, secno) &&
                        !get_valid_blocks(sbi, start, true))
                        f2fs_issue_discard(sbi, START_BLOCK(sbi, start_segno),
-                               sbi->segs_per_sec << sbi->log_blocks_per_seg);
+                                               BLKS_PER_SEC(sbi));
 
-               start = start_segno + sbi->segs_per_sec;
+               start = start_segno + SEGS_PER_SEC(sbi);
                if (start < end)
                        goto next;
                else
@@ -2216,7 +2224,7 @@ next:
 find_next:
                if (is_valid) {
                        next_pos = find_next_zero_bit_le(entry->discard_map,
-                                       sbi->blocks_per_seg, cur_pos);
+                                               BLKS_PER_SEG(sbi), cur_pos);
                        len = next_pos - cur_pos;
 
                        if (f2fs_sb_has_blkzoned(sbi) ||
@@ -2228,13 +2236,13 @@ find_next:
                        total_len += len;
                } else {
                        next_pos = find_next_bit_le(entry->discard_map,
-                                       sbi->blocks_per_seg, cur_pos);
+                                               BLKS_PER_SEG(sbi), cur_pos);
                }
 skip:
                cur_pos = next_pos;
                is_valid = !is_valid;
 
-               if (cur_pos < sbi->blocks_per_seg)
+               if (cur_pos < BLKS_PER_SEG(sbi))
                        goto find_next;
 
                release_discard_addr(entry);
@@ -2251,6 +2259,12 @@ int f2fs_start_discard_thread(struct f2fs_sb_info *sbi)
        struct discard_cmd_control *dcc = SM_I(sbi)->dcc_info;
        int err = 0;
 
+       if (f2fs_sb_has_readonly(sbi)) {
+               f2fs_info(sbi,
+                       "Skip to start discard thread for readonly image");
+               return 0;
+       }
+
        if (!f2fs_realtime_discard_enable(sbi))
                return 0;
 
@@ -2283,7 +2297,7 @@ static int create_discard_cmd_control(struct f2fs_sb_info *sbi)
        dcc->max_ordered_discard = DEFAULT_MAX_ORDERED_DISCARD_GRANULARITY;
        dcc->discard_io_aware = DPOLICY_IO_AWARE_ENABLE;
        if (F2FS_OPTION(sbi).discard_unit == DISCARD_UNIT_SEGMENT)
-               dcc->discard_granularity = sbi->blocks_per_seg;
+               dcc->discard_granularity = BLKS_PER_SEG(sbi);
        else if (F2FS_OPTION(sbi).discard_unit == DISCARD_UNIT_SECTION)
                dcc->discard_granularity = BLKS_PER_SEC(sbi);
 
@@ -2297,7 +2311,7 @@ static int create_discard_cmd_control(struct f2fs_sb_info *sbi)
        atomic_set(&dcc->queued_discard, 0);
        atomic_set(&dcc->discard_cmd_cnt, 0);
        dcc->nr_discards = 0;
-       dcc->max_discards = MAIN_SEGS(sbi) << sbi->log_blocks_per_seg;
+       dcc->max_discards = SEGS_TO_BLKS(sbi, MAIN_SEGS(sbi));
        dcc->max_discard_request = DEF_MAX_DISCARD_REQUEST;
        dcc->min_discard_issue_time = DEF_MIN_DISCARD_ISSUE_TIME;
        dcc->mid_discard_issue_time = DEF_MID_DISCARD_ISSUE_TIME;
@@ -2405,6 +2419,8 @@ static void update_sit_entry(struct f2fs_sb_info *sbi, block_t blkaddr, int del)
 #endif
 
        segno = GET_SEGNO(sbi, blkaddr);
+       if (segno == NULL_SEGNO)
+               return;
 
        se = get_seg_entry(sbi, segno);
        new_vblocks = se->valid_blocks + del;
@@ -2546,7 +2562,7 @@ static unsigned short f2fs_curseg_valid_blocks(struct f2fs_sb_info *sbi, int typ
        struct curseg_info *curseg = CURSEG_I(sbi, type);
 
        if (sbi->ckpt->alloc_type[type] == SSR)
-               return sbi->blocks_per_seg;
+               return BLKS_PER_SEG(sbi);
        return curseg->next_blkoff;
 }
 
@@ -2634,7 +2650,7 @@ static int is_next_segment_free(struct f2fs_sb_info *sbi,
        unsigned int segno = curseg->segno + 1;
        struct free_segmap_info *free_i = FREE_I(sbi);
 
-       if (segno < MAIN_SEGS(sbi) && segno % sbi->segs_per_sec)
+       if (segno < MAIN_SEGS(sbi) && segno % SEGS_PER_SEC(sbi))
                return !test_bit(segno, free_i->free_segmap);
        return 0;
 }
@@ -2643,54 +2659,51 @@ static int is_next_segment_free(struct f2fs_sb_info *sbi,
  * Find a new segment from the free segments bitmap to right order
  * This function should be returned with success, otherwise BUG
  */
-static void get_new_segment(struct f2fs_sb_info *sbi,
-                       unsigned int *newseg, bool new_sec, int dir)
+static int get_new_segment(struct f2fs_sb_info *sbi,
+                       unsigned int *newseg, bool new_sec, bool pinning)
 {
        struct free_segmap_info *free_i = FREE_I(sbi);
        unsigned int segno, secno, zoneno;
        unsigned int total_zones = MAIN_SECS(sbi) / sbi->secs_per_zone;
        unsigned int hint = GET_SEC_FROM_SEG(sbi, *newseg);
        unsigned int old_zoneno = GET_ZONE_FROM_SEG(sbi, *newseg);
-       unsigned int left_start = hint;
        bool init = true;
-       int go_left = 0;
        int i;
+       int ret = 0;
 
        spin_lock(&free_i->segmap_lock);
 
-       if (!new_sec && ((*newseg + 1) % sbi->segs_per_sec)) {
+       if (time_to_inject(sbi, FAULT_NO_SEGMENT)) {
+               ret = -ENOSPC;
+               goto out_unlock;
+       }
+
+       if (!new_sec && ((*newseg + 1) % SEGS_PER_SEC(sbi))) {
                segno = find_next_zero_bit(free_i->free_segmap,
                        GET_SEG_FROM_SEC(sbi, hint + 1), *newseg + 1);
                if (segno < GET_SEG_FROM_SEC(sbi, hint + 1))
                        goto got_it;
        }
+
+       /*
+        * If we format f2fs on zoned storage, let's try to get pinned sections
+        * from beginning of the storage, which should be a conventional one.
+        */
+       if (f2fs_sb_has_blkzoned(sbi)) {
+               segno = pinning ? 0 : max(first_zoned_segno(sbi), *newseg);
+               hint = GET_SEC_FROM_SEG(sbi, segno);
+       }
+
 find_other_zone:
        secno = find_next_zero_bit(free_i->free_secmap, MAIN_SECS(sbi), hint);
        if (secno >= MAIN_SECS(sbi)) {
-               if (dir == ALLOC_RIGHT) {
-                       secno = find_first_zero_bit(free_i->free_secmap,
+               secno = find_first_zero_bit(free_i->free_secmap,
                                                        MAIN_SECS(sbi));
-                       f2fs_bug_on(sbi, secno >= MAIN_SECS(sbi));
-               } else {
-                       go_left = 1;
-                       left_start = hint - 1;
+               if (secno >= MAIN_SECS(sbi)) {
+                       ret = -ENOSPC;
+                       goto out_unlock;
                }
        }
-       if (go_left == 0)
-               goto skip_left;
-
-       while (test_bit(left_start, free_i->free_secmap)) {
-               if (left_start > 0) {
-                       left_start--;
-                       continue;
-               }
-               left_start = find_first_zero_bit(free_i->free_secmap,
-                                                       MAIN_SECS(sbi));
-               f2fs_bug_on(sbi, left_start >= MAIN_SECS(sbi));
-               break;
-       }
-       secno = left_start;
-skip_left:
        segno = GET_SEG_FROM_SEC(sbi, secno);
        zoneno = GET_ZONE_FROM_SEC(sbi, secno);
 
@@ -2701,21 +2714,13 @@ skip_left:
                goto got_it;
        if (zoneno == old_zoneno)
                goto got_it;
-       if (dir == ALLOC_LEFT) {
-               if (!go_left && zoneno + 1 >= total_zones)
-                       goto got_it;
-               if (go_left && zoneno == 0)
-                       goto got_it;
-       }
        for (i = 0; i < NR_CURSEG_TYPE; i++)
                if (CURSEG_I(sbi, i)->zone == zoneno)
                        break;
 
        if (i < NR_CURSEG_TYPE) {
                /* zone is in user, try another */
-               if (go_left)
-                       hint = zoneno * sbi->secs_per_zone - 1;
-               else if (zoneno + 1 >= total_zones)
+               if (zoneno + 1 >= total_zones)
                        hint = 0;
                else
                        hint = (zoneno + 1) * sbi->secs_per_zone;
@@ -2725,9 +2730,23 @@ skip_left:
 got_it:
        /* set it as dirty segment in free segmap */
        f2fs_bug_on(sbi, test_bit(segno, free_i->free_segmap));
+
+       /* no free section in conventional zone */
+       if (new_sec && pinning &&
+               !f2fs_valid_pinned_area(sbi, START_BLOCK(sbi, segno))) {
+               ret = -EAGAIN;
+               goto out_unlock;
+       }
        __set_inuse(sbi, segno);
        *newseg = segno;
+out_unlock:
        spin_unlock(&free_i->segmap_lock);
+
+       if (ret == -ENOSPC) {
+               f2fs_stop_checkpoint(sbi, false, STOP_CP_REASON_NO_SEGMENT);
+               f2fs_bug_on(sbi, 1);
+       }
+       return ret;
 }
 
 static void reset_curseg(struct f2fs_sb_info *sbi, int type, int modified)
@@ -2736,6 +2755,10 @@ static void reset_curseg(struct f2fs_sb_info *sbi, int type, int modified)
        struct summary_footer *sum_footer;
        unsigned short seg_type = curseg->seg_type;
 
+       /* only happen when get_new_segment() fails */
+       if (curseg->next_segno == NULL_SEGNO)
+               return;
+
        curseg->inited = true;
        curseg->segno = curseg->next_segno;
        curseg->zone = GET_ZONE_FROM_SEG(sbi, curseg->segno);
@@ -2761,9 +2784,8 @@ static unsigned int __get_next_segno(struct f2fs_sb_info *sbi, int type)
 
        sanity_check_seg_type(sbi, seg_type);
        if (f2fs_need_rand_seg(sbi))
-               return get_random_u32_below(MAIN_SECS(sbi) * sbi->segs_per_sec);
+               return get_random_u32_below(MAIN_SECS(sbi) * SEGS_PER_SEC(sbi));
 
-       /* if segs_per_sec is large than 1, we need to keep original policy. */
        if (__is_large_section(sbi))
                return curseg->segno;
 
@@ -2774,8 +2796,7 @@ static unsigned int __get_next_segno(struct f2fs_sb_info *sbi, int type)
        if (unlikely(is_sbi_flag_set(sbi, SBI_CP_DISABLED)))
                return 0;
 
-       if (test_opt(sbi, NOHEAP) &&
-               (seg_type == CURSEG_HOT_DATA || IS_NODESEG(seg_type)))
+       if (seg_type == CURSEG_HOT_DATA || IS_NODESEG(seg_type))
                return 0;
 
        if (SIT_I(sbi)->last_victim[ALLOC_NEXT])
@@ -2792,30 +2813,31 @@ static unsigned int __get_next_segno(struct f2fs_sb_info *sbi, int type)
  * Allocate a current working segment.
  * This function always allocates a free segment in LFS manner.
  */
-static void new_curseg(struct f2fs_sb_info *sbi, int type, bool new_sec)
+static int new_curseg(struct f2fs_sb_info *sbi, int type, bool new_sec)
 {
        struct curseg_info *curseg = CURSEG_I(sbi, type);
-       unsigned short seg_type = curseg->seg_type;
        unsigned int segno = curseg->segno;
-       int dir = ALLOC_LEFT;
+       bool pinning = type == CURSEG_COLD_DATA_PINNED;
+       int ret;
 
        if (curseg->inited)
-               write_sum_page(sbi, curseg->sum_blk,
-                               GET_SUM_BLOCK(sbi, segno));
-       if (seg_type == CURSEG_WARM_DATA || seg_type == CURSEG_COLD_DATA)
-               dir = ALLOC_RIGHT;
-
-       if (test_opt(sbi, NOHEAP))
-               dir = ALLOC_RIGHT;
+               write_sum_page(sbi, curseg->sum_blk, GET_SUM_BLOCK(sbi, segno));
 
        segno = __get_next_segno(sbi, type);
-       get_new_segment(sbi, &segno, new_sec, dir);
+       ret = get_new_segment(sbi, &segno, new_sec, pinning);
+       if (ret) {
+               if (ret == -ENOSPC)
+                       curseg->segno = NULL_SEGNO;
+               return ret;
+       }
+
        curseg->next_segno = segno;
        reset_curseg(sbi, type, 1);
        curseg->alloc_type = LFS;
        if (F2FS_OPTION(sbi).fs_mode == FS_MODE_FRAGMENT_BLK)
                curseg->fragment_remained_chunk =
                                get_random_u32_inclusive(1, sbi->max_fragment_chunk);
+       return 0;
 }
 
 static int __next_free_blkoff(struct f2fs_sb_info *sbi,
@@ -2831,7 +2853,7 @@ static int __next_free_blkoff(struct f2fs_sb_info *sbi,
        for (i = 0; i < entries; i++)
                target_map[i] = ckpt_map[i] | cur_map[i];
 
-       return __find_rev_next_zero_bit(target_map, sbi->blocks_per_seg, start);
+       return __find_rev_next_zero_bit(target_map, BLKS_PER_SEG(sbi), start);
 }
 
 static int f2fs_find_next_ssr_block(struct f2fs_sb_info *sbi,
@@ -2842,14 +2864,14 @@ static int f2fs_find_next_ssr_block(struct f2fs_sb_info *sbi,
 
 bool f2fs_segment_has_free_slot(struct f2fs_sb_info *sbi, int segno)
 {
-       return __next_free_blkoff(sbi, segno, 0) < sbi->blocks_per_seg;
+       return __next_free_blkoff(sbi, segno, 0) < BLKS_PER_SEG(sbi);
 }
 
 /*
  * This function always allocates a used segment(from dirty seglist) by SSR
  * manner, so it should recover the existing segment information of valid blocks
  */
-static void change_curseg(struct f2fs_sb_info *sbi, int type)
+static int change_curseg(struct f2fs_sb_info *sbi, int type)
 {
        struct dirty_seglist_info *dirty_i = DIRTY_I(sbi);
        struct curseg_info *curseg = CURSEG_I(sbi, type);
@@ -2874,21 +2896,23 @@ static void change_curseg(struct f2fs_sb_info *sbi, int type)
        if (IS_ERR(sum_page)) {
                /* GC won't be able to use stale summary pages by cp_error */
                memset(curseg->sum_blk, 0, SUM_ENTRY_SIZE);
-               return;
+               return PTR_ERR(sum_page);
        }
        sum_node = (struct f2fs_summary_block *)page_address(sum_page);
        memcpy(curseg->sum_blk, sum_node, SUM_ENTRY_SIZE);
        f2fs_put_page(sum_page, 1);
+       return 0;
 }
 
 static int get_ssr_segment(struct f2fs_sb_info *sbi, int type,
                                int alloc_mode, unsigned long long age);
 
-static void get_atssr_segment(struct f2fs_sb_info *sbi, int type,
+static int get_atssr_segment(struct f2fs_sb_info *sbi, int type,
                                        int target_type, int alloc_mode,
                                        unsigned long long age)
 {
        struct curseg_info *curseg = CURSEG_I(sbi, type);
+       int ret = 0;
 
        curseg->seg_type = target_type;
 
@@ -2896,38 +2920,41 @@ static void get_atssr_segment(struct f2fs_sb_info *sbi, int type,
                struct seg_entry *se = get_seg_entry(sbi, curseg->next_segno);
 
                curseg->seg_type = se->type;
-               change_curseg(sbi, type);
+               ret = change_curseg(sbi, type);
        } else {
                /* allocate cold segment by default */
                curseg->seg_type = CURSEG_COLD_DATA;
-               new_curseg(sbi, type, true);
+               ret = new_curseg(sbi, type, true);
        }
        stat_inc_seg_type(sbi, curseg);
+       return ret;
 }
 
-static void __f2fs_init_atgc_curseg(struct f2fs_sb_info *sbi)
+static int __f2fs_init_atgc_curseg(struct f2fs_sb_info *sbi)
 {
        struct curseg_info *curseg = CURSEG_I(sbi, CURSEG_ALL_DATA_ATGC);
+       int ret = 0;
 
        if (!sbi->am.atgc_enabled)
-               return;
+               return 0;
 
        f2fs_down_read(&SM_I(sbi)->curseg_lock);
 
        mutex_lock(&curseg->curseg_mutex);
        down_write(&SIT_I(sbi)->sentry_lock);
 
-       get_atssr_segment(sbi, CURSEG_ALL_DATA_ATGC, CURSEG_COLD_DATA, SSR, 0);
+       ret = get_atssr_segment(sbi, CURSEG_ALL_DATA_ATGC,
+                                       CURSEG_COLD_DATA, SSR, 0);
 
        up_write(&SIT_I(sbi)->sentry_lock);
        mutex_unlock(&curseg->curseg_mutex);
 
        f2fs_up_read(&SM_I(sbi)->curseg_lock);
-
+       return ret;
 }
-void f2fs_init_inmem_curseg(struct f2fs_sb_info *sbi)
+int f2fs_init_inmem_curseg(struct f2fs_sb_info *sbi)
 {
-       __f2fs_init_atgc_curseg(sbi);
+       return __f2fs_init_atgc_curseg(sbi);
 }
 
 static void __f2fs_save_inmem_curseg(struct f2fs_sb_info *sbi, int type)
@@ -3055,11 +3082,12 @@ static bool need_new_seg(struct f2fs_sb_info *sbi, int type)
        return false;
 }
 
-void f2fs_allocate_segment_for_resize(struct f2fs_sb_info *sbi, int type,
+int f2fs_allocate_segment_for_resize(struct f2fs_sb_info *sbi, int type,
                                        unsigned int start, unsigned int end)
 {
        struct curseg_info *curseg = CURSEG_I(sbi, type);
        unsigned int segno;
+       int ret = 0;
 
        f2fs_down_read(&SM_I(sbi)->curseg_lock);
        mutex_lock(&curseg->curseg_mutex);
@@ -3070,9 +3098,9 @@ void f2fs_allocate_segment_for_resize(struct f2fs_sb_info *sbi, int type,
                goto unlock;
 
        if (f2fs_need_SSR(sbi) && get_ssr_segment(sbi, type, SSR, 0))
-               change_curseg(sbi, type);
+               ret = change_curseg(sbi, type);
        else
-               new_curseg(sbi, type, true);
+               ret = new_curseg(sbi, type, true);
 
        stat_inc_seg_type(sbi, curseg);
 
@@ -3086,45 +3114,84 @@ unlock:
 
        mutex_unlock(&curseg->curseg_mutex);
        f2fs_up_read(&SM_I(sbi)->curseg_lock);
+       return ret;
 }
 
-static void __allocate_new_segment(struct f2fs_sb_info *sbi, int type,
+static int __allocate_new_segment(struct f2fs_sb_info *sbi, int type,
                                                bool new_sec, bool force)
 {
        struct curseg_info *curseg = CURSEG_I(sbi, type);
        unsigned int old_segno;
+       int err = 0;
+
+       if (type == CURSEG_COLD_DATA_PINNED && !curseg->inited)
+               goto allocate;
 
        if (!force && curseg->inited &&
            !curseg->next_blkoff &&
            !get_valid_blocks(sbi, curseg->segno, new_sec) &&
            !get_ckpt_valid_blocks(sbi, curseg->segno, new_sec))
-               return;
+               return 0;
 
+allocate:
        old_segno = curseg->segno;
-       new_curseg(sbi, type, true);
+       err = new_curseg(sbi, type, true);
+       if (err)
+               return err;
        stat_inc_seg_type(sbi, curseg);
        locate_dirty_segment(sbi, old_segno);
+       return 0;
 }
 
-void f2fs_allocate_new_section(struct f2fs_sb_info *sbi, int type, bool force)
+int f2fs_allocate_new_section(struct f2fs_sb_info *sbi, int type, bool force)
 {
+       int ret;
+
        f2fs_down_read(&SM_I(sbi)->curseg_lock);
        down_write(&SIT_I(sbi)->sentry_lock);
-       __allocate_new_segment(sbi, type, true, force);
+       ret = __allocate_new_segment(sbi, type, true, force);
        up_write(&SIT_I(sbi)->sentry_lock);
        f2fs_up_read(&SM_I(sbi)->curseg_lock);
+
+       return ret;
 }
 
-void f2fs_allocate_new_segments(struct f2fs_sb_info *sbi)
+int f2fs_allocate_pinning_section(struct f2fs_sb_info *sbi)
+{
+       int err;
+       bool gc_required = true;
+
+retry:
+       f2fs_lock_op(sbi);
+       err = f2fs_allocate_new_section(sbi, CURSEG_COLD_DATA_PINNED, false);
+       f2fs_unlock_op(sbi);
+
+       if (f2fs_sb_has_blkzoned(sbi) && err == -EAGAIN && gc_required) {
+               f2fs_down_write(&sbi->gc_lock);
+               err = f2fs_gc_range(sbi, 0, GET_SEGNO(sbi, FDEV(0).end_blk), true, 1);
+               f2fs_up_write(&sbi->gc_lock);
+
+               gc_required = false;
+               if (!err)
+                       goto retry;
+       }
+
+       return err;
+}
+
+int f2fs_allocate_new_segments(struct f2fs_sb_info *sbi)
 {
        int i;
+       int err = 0;
 
        f2fs_down_read(&SM_I(sbi)->curseg_lock);
        down_write(&SIT_I(sbi)->sentry_lock);
        for (i = CURSEG_HOT_DATA; i <= CURSEG_COLD_DATA; i++)
-               __allocate_new_segment(sbi, i, false, false);
+               err += __allocate_new_segment(sbi, i, false, false);
        up_write(&SIT_I(sbi)->sentry_lock);
        f2fs_up_read(&SM_I(sbi)->curseg_lock);
+
+       return err;
 }
 
 bool f2fs_exist_trim_candidates(struct f2fs_sb_info *sbi,
@@ -3242,8 +3309,8 @@ int f2fs_trim_fs(struct f2fs_sb_info *sbi, struct fstrim_range *range)
        end_segno = (end >= MAX_BLKADDR(sbi)) ? MAIN_SEGS(sbi) - 1 :
                                                GET_SEGNO(sbi, end);
        if (need_align) {
-               start_segno = rounddown(start_segno, sbi->segs_per_sec);
-               end_segno = roundup(end_segno + 1, sbi->segs_per_sec) - 1;
+               start_segno = rounddown(start_segno, SEGS_PER_SEC(sbi));
+               end_segno = roundup(end_segno + 1, SEGS_PER_SEC(sbi)) - 1;
        }
 
        cpc.reason = CP_DISCARD;
@@ -3416,7 +3483,14 @@ static void f2fs_randomize_chunk(struct f2fs_sb_info *sbi,
                get_random_u32_inclusive(1, sbi->max_fragment_hole);
 }
 
-void f2fs_allocate_data_block(struct f2fs_sb_info *sbi, struct page *page,
+static void reset_curseg_fields(struct curseg_info *curseg)
+{
+       curseg->inited = false;
+       curseg->segno = NULL_SEGNO;
+       curseg->next_segno = 0;
+}
+
+int f2fs_allocate_data_block(struct f2fs_sb_info *sbi, struct page *page,
                block_t old_blkaddr, block_t *new_blkaddr,
                struct f2fs_summary *sum, int type,
                struct f2fs_io_info *fio)
@@ -3427,12 +3501,18 @@ void f2fs_allocate_data_block(struct f2fs_sb_info *sbi, struct page *page,
        bool from_gc = (type == CURSEG_ALL_DATA_ATGC);
        struct seg_entry *se = NULL;
        bool segment_full = false;
+       int ret = 0;
 
        f2fs_down_read(&SM_I(sbi)->curseg_lock);
 
        mutex_lock(&curseg->curseg_mutex);
        down_write(&sit_i->sentry_lock);
 
+       if (curseg->segno == NULL_SEGNO) {
+               ret = -ENOSPC;
+               goto out_err;
+       }
+
        if (from_gc) {
                f2fs_bug_on(sbi, GET_SEGNO(sbi, old_blkaddr) == NULL_SEGNO);
                se = get_seg_entry(sbi, GET_SEGNO(sbi, old_blkaddr));
@@ -3441,7 +3521,7 @@ void f2fs_allocate_data_block(struct f2fs_sb_info *sbi, struct page *page,
        }
        *new_blkaddr = NEXT_FREE_BLKADDR(sbi, curseg);
 
-       f2fs_bug_on(sbi, curseg->next_blkoff >= sbi->blocks_per_seg);
+       f2fs_bug_on(sbi, curseg->next_blkoff >= BLKS_PER_SEG(sbi));
 
        f2fs_wait_discard_bio(sbi, *new_blkaddr);
 
@@ -3470,25 +3550,35 @@ void f2fs_allocate_data_block(struct f2fs_sb_info *sbi, struct page *page,
         * since SSR needs latest valid block information.
         */
        update_sit_entry(sbi, *new_blkaddr, 1);
-       if (GET_SEGNO(sbi, old_blkaddr) != NULL_SEGNO)
-               update_sit_entry(sbi, old_blkaddr, -1);
+       update_sit_entry(sbi, old_blkaddr, -1);
 
        /*
         * If the current segment is full, flush it out and replace it with a
         * new segment.
         */
        if (segment_full) {
+               if (type == CURSEG_COLD_DATA_PINNED &&
+                   !((curseg->segno + 1) % sbi->segs_per_sec)) {
+                       reset_curseg_fields(curseg);
+                       goto skip_new_segment;
+               }
+
                if (from_gc) {
-                       get_atssr_segment(sbi, type, se->type,
+                       ret = get_atssr_segment(sbi, type, se->type,
                                                AT_SSR, se->mtime);
                } else {
                        if (need_new_seg(sbi, type))
-                               new_curseg(sbi, type, false);
+                               ret = new_curseg(sbi, type, false);
                        else
-                               change_curseg(sbi, type);
+                               ret = change_curseg(sbi, type);
                        stat_inc_seg_type(sbi, curseg);
                }
+
+               if (ret)
+                       goto out_err;
        }
+
+skip_new_segment:
        /*
         * segment dirty status should be updated after segment allocation,
         * so we just need to update status only one time after previous
@@ -3497,12 +3587,12 @@ void f2fs_allocate_data_block(struct f2fs_sb_info *sbi, struct page *page,
        locate_dirty_segment(sbi, GET_SEGNO(sbi, old_blkaddr));
        locate_dirty_segment(sbi, GET_SEGNO(sbi, *new_blkaddr));
 
-       if (IS_DATASEG(type))
+       if (IS_DATASEG(curseg->seg_type))
                atomic64_inc(&sbi->allocated_data_blocks);
 
        up_write(&sit_i->sentry_lock);
 
-       if (page && IS_NODESEG(type)) {
+       if (page && IS_NODESEG(curseg->seg_type)) {
                fill_node_footer_blkaddr(page, NEXT_FREE_BLKADDR(sbi, curseg));
 
                f2fs_inode_chksum_set(sbi, page);
@@ -3511,9 +3601,6 @@ void f2fs_allocate_data_block(struct f2fs_sb_info *sbi, struct page *page,
        if (fio) {
                struct f2fs_bio_info *io;
 
-               if (F2FS_IO_ALIGNED(sbi))
-                       fio->retry = 0;
-
                INIT_LIST_HEAD(&fio->list);
                fio->in_list = 1;
                io = sbi->write_io[fio->type] + fio->temp;
@@ -3523,8 +3610,15 @@ void f2fs_allocate_data_block(struct f2fs_sb_info *sbi, struct page *page,
        }
 
        mutex_unlock(&curseg->curseg_mutex);
-
        f2fs_up_read(&SM_I(sbi)->curseg_lock);
+       return 0;
+out_err:
+       *new_blkaddr = NULL_ADDR;
+       up_write(&sit_i->sentry_lock);
+       mutex_unlock(&curseg->curseg_mutex);
+       f2fs_up_read(&SM_I(sbi)->curseg_lock);
+       return ret;
+
 }
 
 void f2fs_update_device_state(struct f2fs_sb_info *sbi, nid_t ino,
@@ -3561,21 +3655,25 @@ static void do_write_page(struct f2fs_summary *sum, struct f2fs_io_info *fio)
 
        if (keep_order)
                f2fs_down_read(&fio->sbi->io_order_lock);
-reallocate:
-       f2fs_allocate_data_block(fio->sbi, fio->page, fio->old_blkaddr,
-                       &fio->new_blkaddr, sum, type, fio);
+
+       if (f2fs_allocate_data_block(fio->sbi, fio->page, fio->old_blkaddr,
+                       &fio->new_blkaddr, sum, type, fio)) {
+               if (fscrypt_inode_uses_fs_layer_crypto(fio->page->mapping->host))
+                       fscrypt_finalize_bounce_page(&fio->encrypted_page);
+               if (PageWriteback(fio->page))
+                       end_page_writeback(fio->page);
+               if (f2fs_in_warm_node_list(fio->sbi, fio->page))
+                       f2fs_del_fsync_node_entry(fio->sbi, fio->page);
+               goto out;
+       }
        if (GET_SEGNO(fio->sbi, fio->old_blkaddr) != NULL_SEGNO)
                f2fs_invalidate_internal_cache(fio->sbi, fio->old_blkaddr);
 
        /* writeout dirty page into bdev */
        f2fs_submit_page_write(fio);
-       if (fio->retry) {
-               fio->old_blkaddr = fio->new_blkaddr;
-               goto reallocate;
-       }
 
        f2fs_update_device_state(fio->sbi, fio->ino, fio->new_blkaddr, 1);
-
+out:
        if (keep_order)
                f2fs_up_read(&fio->sbi->io_order_lock);
 }
@@ -3659,8 +3757,7 @@ int f2fs_inplace_write_data(struct f2fs_io_info *fio)
        }
 
        if (fio->post_read)
-               invalidate_mapping_pages(META_MAPPING(sbi),
-                               fio->new_blkaddr, fio->new_blkaddr);
+               f2fs_truncate_meta_inode_pages(sbi, fio->new_blkaddr, 1);
 
        stat_inc_inplace_blocks(fio->sbi);
 
@@ -3749,7 +3846,8 @@ void f2fs_do_replace_block(struct f2fs_sb_info *sbi, struct f2fs_summary *sum,
        /* change the current segment */
        if (segno != curseg->segno) {
                curseg->next_segno = segno;
-               change_curseg(sbi, type);
+               if (change_curseg(sbi, type))
+                       goto out_unlock;
        }
 
        curseg->next_blkoff = GET_BLKOFF_FROM_SEG0(sbi, new_blkaddr);
@@ -3775,12 +3873,14 @@ void f2fs_do_replace_block(struct f2fs_sb_info *sbi, struct f2fs_summary *sum,
        if (recover_curseg) {
                if (old_cursegno != curseg->segno) {
                        curseg->next_segno = old_cursegno;
-                       change_curseg(sbi, type);
+                       if (change_curseg(sbi, type))
+                               goto out_unlock;
                }
                curseg->next_blkoff = old_blkoff;
                curseg->alloc_type = old_alloc_type;
        }
 
+out_unlock:
        up_write(&sit_i->sentry_lock);
        mutex_unlock(&curseg->curseg_mutex);
        f2fs_up_write(&SM_I(sbi)->curseg_lock);
@@ -3850,7 +3950,7 @@ void f2fs_wait_on_block_writeback_range(struct inode *inode, block_t blkaddr,
        for (i = 0; i < len; i++)
                f2fs_wait_on_block_writeback(inode, blkaddr + i);
 
-       invalidate_mapping_pages(META_MAPPING(sbi), blkaddr, blkaddr + len - 1);
+       f2fs_truncate_meta_inode_pages(sbi, blkaddr, len);
 }
 
 static int read_compacted_summaries(struct f2fs_sb_info *sbi)
@@ -3892,7 +3992,7 @@ static int read_compacted_summaries(struct f2fs_sb_info *sbi)
                seg_i->next_blkoff = blk_off;
 
                if (seg_i->alloc_type == SSR)
-                       blk_off = sbi->blocks_per_seg;
+                       blk_off = BLKS_PER_SEG(sbi);
 
                for (j = 0; j < blk_off; j++) {
                        struct f2fs_summary *s;
@@ -3960,7 +4060,7 @@ static int read_normal_summaries(struct f2fs_sb_info *sbi, int type)
                        struct f2fs_summary *ns = &sum->entries[0];
                        int i;
 
-                       for (i = 0; i < sbi->blocks_per_seg; i++, ns++) {
+                       for (i = 0; i < BLKS_PER_SEG(sbi); i++, ns++) {
                                ns->version = 0;
                                ns->ofs_in_node = 0;
                        }
@@ -4466,7 +4566,7 @@ static int build_sit_info(struct f2fs_sb_info *sbi)
 #endif
 
        sit_i->sit_base_addr = le32_to_cpu(raw_super->sit_blkaddr);
-       sit_i->sit_blocks = sit_segs << sbi->log_blocks_per_seg;
+       sit_i->sit_blocks = SEGS_TO_BLKS(sbi, sit_segs);
        sit_i->written_valid_blocks = 0;
        sit_i->bitmap_size = sit_bitmap_size;
        sit_i->dirty_sentries = 0;
@@ -4539,9 +4639,7 @@ static int build_curseg(struct f2fs_sb_info *sbi)
                        array[i].seg_type = CURSEG_COLD_DATA;
                else if (i == CURSEG_ALL_DATA_ATGC)
                        array[i].seg_type = CURSEG_COLD_DATA;
-               array[i].segno = NULL_SEGNO;
-               array[i].next_blkoff = 0;
-               array[i].inited = false;
+               reset_curseg_fields(&array[i]);
        }
        return restore_curseg_summaries(sbi);
 }
@@ -4593,21 +4691,20 @@ static int build_sit_entries(struct f2fs_sb_info *sbi)
 
                        sit_valid_blocks[SE_PAGETYPE(se)] += se->valid_blocks;
 
-                       if (f2fs_block_unit_discard(sbi)) {
-                               /* build discard map only one time */
-                               if (is_set_ckpt_flags(sbi, CP_TRIMMED_FLAG)) {
-                                       memset(se->discard_map, 0xff,
+                       if (!f2fs_block_unit_discard(sbi))
+                               goto init_discard_map_done;
+
+                       /* build discard map only one time */
+                       if (is_set_ckpt_flags(sbi, CP_TRIMMED_FLAG)) {
+                               memset(se->discard_map, 0xff,
                                                SIT_VBLOCK_MAP_SIZE);
-                               } else {
-                                       memcpy(se->discard_map,
-                                               se->cur_valid_map,
+                               goto init_discard_map_done;
+                       }
+                       memcpy(se->discard_map, se->cur_valid_map,
                                                SIT_VBLOCK_MAP_SIZE);
-                                       sbi->discard_blks +=
-                                               sbi->blocks_per_seg -
+                       sbi->discard_blks += BLKS_PER_SEG(sbi) -
                                                se->valid_blocks;
-                               }
-                       }
-
+init_discard_map_done:
                        if (__is_large_section(sbi))
                                get_sec_entry(sbi, start)->valid_blocks +=
                                                        se->valid_blocks;
@@ -4747,7 +4844,7 @@ static void init_dirty_segmap(struct f2fs_sb_info *sbi)
                return;
 
        mutex_lock(&dirty_i->seglist_lock);
-       for (segno = 0; segno < MAIN_SEGS(sbi); segno += sbi->segs_per_sec) {
+       for (segno = 0; segno < MAIN_SEGS(sbi); segno += SEGS_PER_SEC(sbi)) {
                valid_blocks = get_valid_blocks(sbi, segno, true);
                secno = GET_SEC_FROM_SEG(sbi, segno);
 
@@ -4846,7 +4943,7 @@ static int sanity_check_curseg(struct f2fs_sb_info *sbi)
                if (curseg->alloc_type == SSR)
                        continue;
 
-               for (blkofs += 1; blkofs < sbi->blocks_per_seg; blkofs++) {
+               for (blkofs += 1; blkofs < BLKS_PER_SEG(sbi); blkofs++) {
                        if (!f2fs_test_bit(blkofs, se->cur_valid_map))
                                continue;
 out:
@@ -4862,6 +4959,16 @@ out:
 }
 
 #ifdef CONFIG_BLK_DEV_ZONED
+static const char *f2fs_zone_status[BLK_ZONE_COND_OFFLINE + 1] = {
+       [BLK_ZONE_COND_NOT_WP]          = "NOT_WP",
+       [BLK_ZONE_COND_EMPTY]           = "EMPTY",
+       [BLK_ZONE_COND_IMP_OPEN]        = "IMPLICIT_OPEN",
+       [BLK_ZONE_COND_EXP_OPEN]        = "EXPLICIT_OPEN",
+       [BLK_ZONE_COND_CLOSED]          = "CLOSED",
+       [BLK_ZONE_COND_READONLY]        = "READONLY",
+       [BLK_ZONE_COND_FULL]            = "FULL",
+       [BLK_ZONE_COND_OFFLINE]         = "OFFLINE",
+};
 
 static int check_zone_write_pointer(struct f2fs_sb_info *sbi,
                                    struct f2fs_dev_info *fdev,
@@ -4883,14 +4990,19 @@ static int check_zone_write_pointer(struct f2fs_sb_info *sbi,
         * Skip check of zones cursegs point to, since
         * fix_curseg_write_pointer() checks them.
         */
-       if (zone_segno >= MAIN_SEGS(sbi) ||
-           IS_CURSEC(sbi, GET_SEC_FROM_SEG(sbi, zone_segno)))
+       if (zone_segno >= MAIN_SEGS(sbi))
                return 0;
 
        /*
         * Get # of valid block of the zone.
         */
        valid_block_cnt = get_valid_blocks(sbi, zone_segno, true);
+       if (IS_CURSEC(sbi, GET_SEC_FROM_SEG(sbi, zone_segno))) {
+               f2fs_notice(sbi, "Open zones: valid block[0x%x,0x%x] cond[%s]",
+                               zone_segno, valid_block_cnt,
+                               f2fs_zone_status[zone->cond]);
+               return 0;
+       }
 
        if ((!valid_block_cnt && zone->cond == BLK_ZONE_COND_EMPTY) ||
            (valid_block_cnt && zone->cond == BLK_ZONE_COND_FULL))
@@ -4898,8 +5010,8 @@ static int check_zone_write_pointer(struct f2fs_sb_info *sbi,
 
        if (!valid_block_cnt) {
                f2fs_notice(sbi, "Zone without valid block has non-zero write "
-                           "pointer. Reset the write pointer: cond[0x%x]",
-                           zone->cond);
+                           "pointer. Reset the write pointer: cond[%s]",
+                           f2fs_zone_status[zone->cond]);
                ret = __f2fs_issue_discard_zone(sbi, fdev->bdev, zone_block,
                                        zone->len >> log_sectors_per_block);
                if (ret)
@@ -4916,8 +5028,8 @@ static int check_zone_write_pointer(struct f2fs_sb_info *sbi,
         * selected for write operation until it get discarded.
         */
        f2fs_notice(sbi, "Valid blocks are not aligned with write "
-                   "pointer: valid block[0x%x,0x%x] cond[0x%x]",
-                   zone_segno, valid_block_cnt, zone->cond);
+                   "pointer: valid block[0x%x,0x%x] cond[%s]",
+                   zone_segno, valid_block_cnt, f2fs_zone_status[zone->cond]);
 
        nofs_flags = memalloc_nofs_save();
        ret = blkdev_zone_mgmt(fdev->bdev, REQ_OP_ZONE_FINISH,
@@ -5128,7 +5240,7 @@ static inline unsigned int f2fs_usable_zone_blks_in_seg(
        unsigned int secno;
 
        if (!sbi->unusable_blocks_per_sec)
-               return sbi->blocks_per_seg;
+               return BLKS_PER_SEG(sbi);
 
        secno = GET_SEC_FROM_SEG(sbi, segno);
        seg_start = START_BLOCK(sbi, segno);
@@ -5143,10 +5255,10 @@ static inline unsigned int f2fs_usable_zone_blks_in_seg(
         */
        if (seg_start >= sec_cap_blkaddr)
                return 0;
-       if (seg_start + sbi->blocks_per_seg > sec_cap_blkaddr)
+       if (seg_start + BLKS_PER_SEG(sbi) > sec_cap_blkaddr)
                return sec_cap_blkaddr - seg_start;
 
-       return sbi->blocks_per_seg;
+       return BLKS_PER_SEG(sbi);
 }
 #else
 int f2fs_fix_curseg_write_pointer(struct f2fs_sb_info *sbi)
@@ -5172,7 +5284,7 @@ unsigned int f2fs_usable_blks_in_seg(struct f2fs_sb_info *sbi,
        if (f2fs_sb_has_blkzoned(sbi))
                return f2fs_usable_zone_blks_in_seg(sbi, segno);
 
-       return sbi->blocks_per_seg;
+       return BLKS_PER_SEG(sbi);
 }
 
 unsigned int f2fs_usable_segs_in_sec(struct f2fs_sb_info *sbi,
@@ -5181,7 +5293,7 @@ unsigned int f2fs_usable_segs_in_sec(struct f2fs_sb_info *sbi,
        if (f2fs_sb_has_blkzoned(sbi))
                return CAP_SEGS_PER_SEC(sbi);
 
-       return sbi->segs_per_sec;
+       return SEGS_PER_SEC(sbi);
 }
 
 /*
@@ -5196,14 +5308,14 @@ static void init_min_max_mtime(struct f2fs_sb_info *sbi)
 
        sit_i->min_mtime = ULLONG_MAX;
 
-       for (segno = 0; segno < MAIN_SEGS(sbi); segno += sbi->segs_per_sec) {
+       for (segno = 0; segno < MAIN_SEGS(sbi); segno += SEGS_PER_SEC(sbi)) {
                unsigned int i;
                unsigned long long mtime = 0;
 
-               for (i = 0; i < sbi->segs_per_sec; i++)
+               for (i = 0; i < SEGS_PER_SEC(sbi); i++)
                        mtime += get_seg_entry(sbi, segno + i)->mtime;
 
-               mtime = div_u64(mtime, sbi->segs_per_sec);
+               mtime = div_u64(mtime, SEGS_PER_SEC(sbi));
 
                if (sit_i->min_mtime > mtime)
                        sit_i->min_mtime = mtime;
@@ -5242,7 +5354,7 @@ int f2fs_build_segment_manager(struct f2fs_sb_info *sbi)
                sm_info->ipu_policy = BIT(F2FS_IPU_FSYNC);
        sm_info->min_ipu_util = DEF_MIN_IPU_UTIL;
        sm_info->min_fsync_blocks = DEF_MIN_FSYNC_BLOCKS;
-       sm_info->min_seq_blocks = sbi->blocks_per_seg;
+       sm_info->min_seq_blocks = BLKS_PER_SEG(sbi);
        sm_info->min_hot_blocks = DEF_MIN_HOT_BLOCKS;
        sm_info->min_ssr_sections = reserved_sections(sbi);
 
index 8129be788bd5615b99a666f6a240b16c315a0cc0..e1c0f418aa11f6b2495c5cd24d409f1434560ea4 100644 (file)
@@ -48,21 +48,21 @@ static inline void sanity_check_seg_type(struct f2fs_sb_info *sbi,
 
 #define IS_CURSEC(sbi, secno)                                          \
        (((secno) == CURSEG_I(sbi, CURSEG_HOT_DATA)->segno /            \
-         (sbi)->segs_per_sec) ||       \
+         SEGS_PER_SEC(sbi)) || \
         ((secno) == CURSEG_I(sbi, CURSEG_WARM_DATA)->segno /           \
-         (sbi)->segs_per_sec) ||       \
+         SEGS_PER_SEC(sbi)) || \
         ((secno) == CURSEG_I(sbi, CURSEG_COLD_DATA)->segno /           \
-         (sbi)->segs_per_sec) ||       \
+         SEGS_PER_SEC(sbi)) || \
         ((secno) == CURSEG_I(sbi, CURSEG_HOT_NODE)->segno /            \
-         (sbi)->segs_per_sec) ||       \
+         SEGS_PER_SEC(sbi)) || \
         ((secno) == CURSEG_I(sbi, CURSEG_WARM_NODE)->segno /           \
-         (sbi)->segs_per_sec) ||       \
+         SEGS_PER_SEC(sbi)) || \
         ((secno) == CURSEG_I(sbi, CURSEG_COLD_NODE)->segno /           \
-         (sbi)->segs_per_sec) ||       \
+         SEGS_PER_SEC(sbi)) || \
         ((secno) == CURSEG_I(sbi, CURSEG_COLD_DATA_PINNED)->segno /    \
-         (sbi)->segs_per_sec) ||       \
+         SEGS_PER_SEC(sbi)) || \
         ((secno) == CURSEG_I(sbi, CURSEG_ALL_DATA_ATGC)->segno /       \
-         (sbi)->segs_per_sec))
+         SEGS_PER_SEC(sbi)))
 
 #define MAIN_BLKADDR(sbi)                                              \
        (SM_I(sbi) ? SM_I(sbi)->main_blkaddr :                          \
@@ -77,40 +77,37 @@ static inline void sanity_check_seg_type(struct f2fs_sb_info *sbi,
 #define TOTAL_SEGS(sbi)                                                        \
        (SM_I(sbi) ? SM_I(sbi)->segment_count :                                 \
                le32_to_cpu(F2FS_RAW_SUPER(sbi)->segment_count))
-#define TOTAL_BLKS(sbi)        (TOTAL_SEGS(sbi) << (sbi)->log_blocks_per_seg)
+#define TOTAL_BLKS(sbi)        (SEGS_TO_BLKS(sbi, TOTAL_SEGS(sbi)))
 
 #define MAX_BLKADDR(sbi)       (SEG0_BLKADDR(sbi) + TOTAL_BLKS(sbi))
 #define SEGMENT_SIZE(sbi)      (1ULL << ((sbi)->log_blocksize +        \
                                        (sbi)->log_blocks_per_seg))
 
 #define START_BLOCK(sbi, segno)        (SEG0_BLKADDR(sbi) +                    \
-        (GET_R2L_SEGNO(FREE_I(sbi), segno) << (sbi)->log_blocks_per_seg))
+        (SEGS_TO_BLKS(sbi, GET_R2L_SEGNO(FREE_I(sbi), segno))))
 
 #define NEXT_FREE_BLKADDR(sbi, curseg)                                 \
        (START_BLOCK(sbi, (curseg)->segno) + (curseg)->next_blkoff)
 
 #define GET_SEGOFF_FROM_SEG0(sbi, blk_addr)    ((blk_addr) - SEG0_BLKADDR(sbi))
 #define GET_SEGNO_FROM_SEG0(sbi, blk_addr)                             \
-       (GET_SEGOFF_FROM_SEG0(sbi, blk_addr) >> (sbi)->log_blocks_per_seg)
+       (BLKS_TO_SEGS(sbi, GET_SEGOFF_FROM_SEG0(sbi, blk_addr)))
 #define GET_BLKOFF_FROM_SEG0(sbi, blk_addr)                            \
-       (GET_SEGOFF_FROM_SEG0(sbi, blk_addr) & ((sbi)->blocks_per_seg - 1))
+       (GET_SEGOFF_FROM_SEG0(sbi, blk_addr) & (BLKS_PER_SEG(sbi) - 1))
 
 #define GET_SEGNO(sbi, blk_addr)                                       \
        ((!__is_valid_data_blkaddr(blk_addr)) ?                 \
        NULL_SEGNO : GET_L2R_SEGNO(FREE_I(sbi),                 \
                GET_SEGNO_FROM_SEG0(sbi, blk_addr)))
-#define BLKS_PER_SEC(sbi)                                      \
-       ((sbi)->segs_per_sec * (sbi)->blocks_per_seg)
 #define CAP_BLKS_PER_SEC(sbi)                                  \
-       ((sbi)->segs_per_sec * (sbi)->blocks_per_seg -          \
-        (sbi)->unusable_blocks_per_sec)
+       (BLKS_PER_SEC(sbi) - (sbi)->unusable_blocks_per_sec)
 #define CAP_SEGS_PER_SEC(sbi)                                  \
-       ((sbi)->segs_per_sec - ((sbi)->unusable_blocks_per_sec >>\
-       (sbi)->log_blocks_per_seg))
+       (SEGS_PER_SEC(sbi) -                                    \
+       BLKS_TO_SEGS(sbi, (sbi)->unusable_blocks_per_sec))
 #define GET_SEC_FROM_SEG(sbi, segno)                           \
-       (((segno) == -1) ? -1 : (segno) / (sbi)->segs_per_sec)
+       (((segno) == -1) ? -1 : (segno) / SEGS_PER_SEC(sbi))
 #define GET_SEG_FROM_SEC(sbi, secno)                           \
-       ((secno) * (sbi)->segs_per_sec)
+       ((secno) * SEGS_PER_SEC(sbi))
 #define GET_ZONE_FROM_SEC(sbi, secno)                          \
        (((secno) == -1) ? -1 : (secno) / (sbi)->secs_per_zone)
 #define GET_ZONE_FROM_SEG(sbi, segno)                          \
@@ -138,16 +135,6 @@ static inline void sanity_check_seg_type(struct f2fs_sb_info *sbi,
 #define SECTOR_TO_BLOCK(sectors)                                       \
        ((sectors) >> F2FS_LOG_SECTORS_PER_BLOCK)
 
-/*
- * indicate a block allocation direction: RIGHT and LEFT.
- * RIGHT means allocating new sections towards the end of volume.
- * LEFT means the opposite direction.
- */
-enum {
-       ALLOC_RIGHT = 0,
-       ALLOC_LEFT
-};
-
 /*
  * In the victim_sel_policy->alloc_mode, there are three block allocation modes.
  * LFS writes data sequentially with cleaning operations.
@@ -364,7 +351,7 @@ static inline unsigned int get_ckpt_valid_blocks(struct f2fs_sb_info *sbi,
                unsigned int blocks = 0;
                int i;
 
-               for (i = 0; i < sbi->segs_per_sec; i++, start_segno++) {
+               for (i = 0; i < SEGS_PER_SEC(sbi); i++, start_segno++) {
                        struct seg_entry *se = get_seg_entry(sbi, start_segno);
 
                        blocks += se->ckpt_valid_blocks;
@@ -449,7 +436,7 @@ static inline void __set_free(struct f2fs_sb_info *sbi, unsigned int segno)
        free_i->free_segments++;
 
        next = find_next_bit(free_i->free_segmap,
-                       start_segno + sbi->segs_per_sec, start_segno);
+                       start_segno + SEGS_PER_SEC(sbi), start_segno);
        if (next >= start_segno + usable_segs) {
                clear_bit(secno, free_i->free_secmap);
                free_i->free_sections++;
@@ -485,7 +472,7 @@ static inline void __set_test_and_free(struct f2fs_sb_info *sbi,
                if (!inmem && IS_CURSEC(sbi, secno))
                        goto skip_free;
                next = find_next_bit(free_i->free_segmap,
-                               start_segno + sbi->segs_per_sec, start_segno);
+                               start_segno + SEGS_PER_SEC(sbi), start_segno);
                if (next >= start_segno + usable_segs) {
                        if (test_and_clear_bit(secno, free_i->free_secmap))
                                free_i->free_sections++;
@@ -573,23 +560,22 @@ static inline bool has_curseg_enough_space(struct f2fs_sb_info *sbi,
                        unsigned int node_blocks, unsigned int dent_blocks)
 {
 
-       unsigned int segno, left_blocks;
+       unsigned segno, left_blocks;
        int i;
 
-       /* check current node segment */
+       /* check current node sections in the worst case. */
        for (i = CURSEG_HOT_NODE; i <= CURSEG_COLD_NODE; i++) {
                segno = CURSEG_I(sbi, i)->segno;
-               left_blocks = f2fs_usable_blks_in_seg(sbi, segno) -
-                               get_seg_entry(sbi, segno)->ckpt_valid_blocks;
-
+               left_blocks = CAP_BLKS_PER_SEC(sbi) -
+                               get_ckpt_valid_blocks(sbi, segno, true);
                if (node_blocks > left_blocks)
                        return false;
        }
 
-       /* check current data segment */
+       /* check current data section for dentry blocks. */
        segno = CURSEG_I(sbi, CURSEG_HOT_DATA)->segno;
-       left_blocks = f2fs_usable_blks_in_seg(sbi, segno) -
-                       get_seg_entry(sbi, segno)->ckpt_valid_blocks;
+       left_blocks = CAP_BLKS_PER_SEC(sbi) -
+                       get_ckpt_valid_blocks(sbi, segno, true);
        if (dent_blocks > left_blocks)
                return false;
        return true;
@@ -638,7 +624,7 @@ static inline bool has_not_enough_free_secs(struct f2fs_sb_info *sbi,
 
        if (free_secs > upper_secs)
                return false;
-       else if (free_secs <= lower_secs)
+       if (free_secs <= lower_secs)
                return true;
        return !curseg_space;
 }
@@ -793,10 +779,10 @@ static inline int check_block_count(struct f2fs_sb_info *sbi,
                return -EFSCORRUPTED;
        }
 
-       if (usable_blks_per_seg < sbi->blocks_per_seg)
+       if (usable_blks_per_seg < BLKS_PER_SEG(sbi))
                f2fs_bug_on(sbi, find_next_bit_le(&raw_sit->valid_map,
-                               sbi->blocks_per_seg,
-                               usable_blks_per_seg) != sbi->blocks_per_seg);
+                               BLKS_PER_SEG(sbi),
+                               usable_blks_per_seg) != BLKS_PER_SEG(sbi));
 
        /* check segment usage, and check boundary of a given segment number */
        if (unlikely(GET_SIT_VBLOCKS(raw_sit) > usable_blks_per_seg
@@ -915,9 +901,9 @@ static inline int nr_pages_to_skip(struct f2fs_sb_info *sbi, int type)
                return 0;
 
        if (type == DATA)
-               return sbi->blocks_per_seg;
+               return BLKS_PER_SEG(sbi);
        else if (type == NODE)
-               return 8 * sbi->blocks_per_seg;
+               return SEGS_TO_BLKS(sbi, 8);
        else if (type == META)
                return 8 * BIO_MAX_VECS;
        else
@@ -969,3 +955,13 @@ wake_up:
        dcc->discard_wake = true;
        wake_up_interruptible_all(&dcc->discard_wait_queue);
 }
+
+static inline unsigned int first_zoned_segno(struct f2fs_sb_info *sbi)
+{
+       int devi;
+
+       for (devi = 0; devi < sbi->s_ndevs; devi++)
+               if (bdev_is_zoned(FDEV(devi).bdev))
+                       return GET_SEGNO(sbi, FDEV(devi).start_blk);
+       return 0;
+}
index f6ffbfe75653cf0de1741bf5094f4a6340faa659..a6867f26f141836dcd4a4f0136dd67a9de6c3c74 100644 (file)
@@ -44,24 +44,26 @@ static struct kmem_cache *f2fs_inode_cachep;
 #ifdef CONFIG_F2FS_FAULT_INJECTION
 
 const char *f2fs_fault_name[FAULT_MAX] = {
-       [FAULT_KMALLOC]         = "kmalloc",
-       [FAULT_KVMALLOC]        = "kvmalloc",
-       [FAULT_PAGE_ALLOC]      = "page alloc",
-       [FAULT_PAGE_GET]        = "page get",
-       [FAULT_ALLOC_NID]       = "alloc nid",
-       [FAULT_ORPHAN]          = "orphan",
-       [FAULT_BLOCK]           = "no more block",
-       [FAULT_DIR_DEPTH]       = "too big dir depth",
-       [FAULT_EVICT_INODE]     = "evict_inode fail",
-       [FAULT_TRUNCATE]        = "truncate fail",
-       [FAULT_READ_IO]         = "read IO error",
-       [FAULT_CHECKPOINT]      = "checkpoint error",
-       [FAULT_DISCARD]         = "discard error",
-       [FAULT_WRITE_IO]        = "write IO error",
-       [FAULT_SLAB_ALLOC]      = "slab alloc",
-       [FAULT_DQUOT_INIT]      = "dquot initialize",
-       [FAULT_LOCK_OP]         = "lock_op",
-       [FAULT_BLKADDR]         = "invalid blkaddr",
+       [FAULT_KMALLOC]                 = "kmalloc",
+       [FAULT_KVMALLOC]                = "kvmalloc",
+       [FAULT_PAGE_ALLOC]              = "page alloc",
+       [FAULT_PAGE_GET]                = "page get",
+       [FAULT_ALLOC_NID]               = "alloc nid",
+       [FAULT_ORPHAN]                  = "orphan",
+       [FAULT_BLOCK]                   = "no more block",
+       [FAULT_DIR_DEPTH]               = "too big dir depth",
+       [FAULT_EVICT_INODE]             = "evict_inode fail",
+       [FAULT_TRUNCATE]                = "truncate fail",
+       [FAULT_READ_IO]                 = "read IO error",
+       [FAULT_CHECKPOINT]              = "checkpoint error",
+       [FAULT_DISCARD]                 = "discard error",
+       [FAULT_WRITE_IO]                = "write IO error",
+       [FAULT_SLAB_ALLOC]              = "slab alloc",
+       [FAULT_DQUOT_INIT]              = "dquot initialize",
+       [FAULT_LOCK_OP]                 = "lock_op",
+       [FAULT_BLKADDR_VALIDITY]        = "invalid blkaddr",
+       [FAULT_BLKADDR_CONSISTENCE]     = "inconsistent blkaddr",
+       [FAULT_NO_SEGMENT]              = "no free segment",
 };
 
 void f2fs_build_fault_attr(struct f2fs_sb_info *sbi, unsigned int rate,
@@ -137,7 +139,6 @@ enum {
        Opt_resgid,
        Opt_resuid,
        Opt_mode,
-       Opt_io_size_bits,
        Opt_fault_injection,
        Opt_fault_type,
        Opt_lazytime,
@@ -216,7 +217,6 @@ static match_table_t f2fs_tokens = {
        {Opt_resgid, "resgid=%u"},
        {Opt_resuid, "resuid=%u"},
        {Opt_mode, "mode=%s"},
-       {Opt_io_size_bits, "io_bits=%u"},
        {Opt_fault_injection, "fault_injection=%u"},
        {Opt_fault_type, "fault_type=%u"},
        {Opt_lazytime, "lazytime"},
@@ -263,7 +263,8 @@ static match_table_t f2fs_tokens = {
        {Opt_err, NULL},
 };
 
-void f2fs_printk(struct f2fs_sb_info *sbi, const char *fmt, ...)
+void f2fs_printk(struct f2fs_sb_info *sbi, bool limit_rate,
+                                               const char *fmt, ...)
 {
        struct va_format vaf;
        va_list args;
@@ -274,8 +275,12 @@ void f2fs_printk(struct f2fs_sb_info *sbi, const char *fmt, ...)
        level = printk_get_level(fmt);
        vaf.fmt = printk_skip_level(fmt);
        vaf.va = &args;
-       printk("%c%cF2FS-fs (%s): %pV\n",
-              KERN_SOH_ASCII, level, sbi->sb->s_id, &vaf);
+       if (limit_rate)
+               printk_ratelimited("%c%cF2FS-fs (%s): %pV\n",
+                       KERN_SOH_ASCII, level, sbi->sb->s_id, &vaf);
+       else
+               printk("%c%cF2FS-fs (%s): %pV\n",
+                       KERN_SOH_ASCII, level, sbi->sb->s_id, &vaf);
 
        va_end(args);
 }
@@ -343,46 +348,6 @@ static inline void limit_reserve_root(struct f2fs_sb_info *sbi)
                                           F2FS_OPTION(sbi).s_resgid));
 }
 
-static inline int adjust_reserved_segment(struct f2fs_sb_info *sbi)
-{
-       unsigned int sec_blks = sbi->blocks_per_seg * sbi->segs_per_sec;
-       unsigned int avg_vblocks;
-       unsigned int wanted_reserved_segments;
-       block_t avail_user_block_count;
-
-       if (!F2FS_IO_ALIGNED(sbi))
-               return 0;
-
-       /* average valid block count in section in worst case */
-       avg_vblocks = sec_blks / F2FS_IO_SIZE(sbi);
-
-       /*
-        * we need enough free space when migrating one section in worst case
-        */
-       wanted_reserved_segments = (F2FS_IO_SIZE(sbi) / avg_vblocks) *
-                                               reserved_segments(sbi);
-       wanted_reserved_segments -= reserved_segments(sbi);
-
-       avail_user_block_count = sbi->user_block_count -
-                               sbi->current_reserved_blocks -
-                               F2FS_OPTION(sbi).root_reserved_blocks;
-
-       if (wanted_reserved_segments * sbi->blocks_per_seg >
-                                       avail_user_block_count) {
-               f2fs_err(sbi, "IO align feature can't grab additional reserved segment: %u, available segments: %u",
-                       wanted_reserved_segments,
-                       avail_user_block_count >> sbi->log_blocks_per_seg);
-               return -ENOSPC;
-       }
-
-       SM_I(sbi)->additional_reserved_segments = wanted_reserved_segments;
-
-       f2fs_info(sbi, "IO align feature needs additional reserved segment: %u",
-                        wanted_reserved_segments);
-
-       return 0;
-}
-
 static inline void adjust_unusable_cap_perc(struct f2fs_sb_info *sbi)
 {
        if (!F2FS_OPTION(sbi).unusable_cap_perc)
@@ -663,7 +628,7 @@ static int f2fs_set_lz4hc_level(struct f2fs_sb_info *sbi, const char *str)
 #ifdef CONFIG_F2FS_FS_ZSTD
 static int f2fs_set_zstd_level(struct f2fs_sb_info *sbi, const char *str)
 {
-       unsigned int level;
+       int level;
        int len = 4;
 
        if (strlen(str) == len) {
@@ -677,9 +642,15 @@ static int f2fs_set_zstd_level(struct f2fs_sb_info *sbi, const char *str)
                f2fs_info(sbi, "wrong format, e.g. <alg_name>:<compr_level>");
                return -EINVAL;
        }
-       if (kstrtouint(str + 1, 10, &level))
+       if (kstrtoint(str + 1, 10, &level))
                return -EINVAL;
 
+       /* f2fs does not support negative compress level now */
+       if (level < 0) {
+               f2fs_info(sbi, "do not support negative compress level: %d", level);
+               return -ERANGE;
+       }
+
        if (!f2fs_is_compress_level_valid(COMPRESS_ZSTD, level)) {
                f2fs_info(sbi, "invalid zstd compress level: %d", level);
                return -EINVAL;
@@ -763,10 +734,8 @@ static int parse_options(struct super_block *sb, char *options, bool is_remount)
                        clear_opt(sbi, DISCARD);
                        break;
                case Opt_noheap:
-                       set_opt(sbi, NOHEAP);
-                       break;
                case Opt_heap:
-                       clear_opt(sbi, NOHEAP);
+                       f2fs_warn(sbi, "heap/no_heap options were deprecated");
                        break;
 #ifdef CONFIG_F2FS_FS_XATTR
                case Opt_user_xattr:
@@ -913,16 +882,6 @@ static int parse_options(struct super_block *sb, char *options, bool is_remount)
                        }
                        kfree(name);
                        break;
-               case Opt_io_size_bits:
-                       if (args->from && match_int(args, &arg))
-                               return -EINVAL;
-                       if (arg <= 0 || arg > __ilog2_u32(BIO_MAX_VECS)) {
-                               f2fs_warn(sbi, "Not support %ld, larger than %d",
-                                       BIT(arg), BIO_MAX_VECS);
-                               return -EINVAL;
-                       }
-                       F2FS_OPTION(sbi).write_io_size_bits = arg;
-                       break;
 #ifdef CONFIG_F2FS_FAULT_INJECTION
                case Opt_fault_injection:
                        if (args->from && match_int(args, &arg))
@@ -1392,12 +1351,6 @@ default_check:
        }
 #endif
 
-       if (F2FS_IO_SIZE_BITS(sbi) && !f2fs_lfs_mode(sbi)) {
-               f2fs_err(sbi, "Should set mode=lfs with %luKB-sized IO",
-                        F2FS_IO_SIZE_KB(sbi));
-               return -EINVAL;
-       }
-
        if (test_opt(sbi, INLINE_XATTR_SIZE)) {
                int min_size, max_size;
 
@@ -1718,7 +1671,6 @@ static void f2fs_put_super(struct super_block *sb)
 
        f2fs_destroy_page_array_cache(sbi);
        f2fs_destroy_xattr_caches(sbi);
-       mempool_destroy(sbi->write_io_dummy);
 #ifdef CONFIG_QUOTA
        for (i = 0; i < MAXQUOTAS; i++)
                kfree(F2FS_OPTION(sbi).s_qf_names[i]);
@@ -2009,10 +1961,6 @@ static int f2fs_show_options(struct seq_file *seq, struct dentry *root)
        } else {
                seq_puts(seq, ",nodiscard");
        }
-       if (test_opt(sbi, NOHEAP))
-               seq_puts(seq, ",no_heap");
-       else
-               seq_puts(seq, ",heap");
 #ifdef CONFIG_F2FS_FS_XATTR
        if (test_opt(sbi, XATTR_USER))
                seq_puts(seq, ",user_xattr");
@@ -2078,9 +2026,6 @@ static int f2fs_show_options(struct seq_file *seq, struct dentry *root)
                                        F2FS_OPTION(sbi).s_resuid),
                                from_kgid_munged(&init_user_ns,
                                        F2FS_OPTION(sbi).s_resgid));
-       if (F2FS_IO_SIZE_BITS(sbi))
-               seq_printf(seq, ",io_bits=%u",
-                               F2FS_OPTION(sbi).write_io_size_bits);
 #ifdef CONFIG_F2FS_FAULT_INJECTION
        if (test_opt(sbi, FAULT_INJECTION)) {
                seq_printf(seq, ",fault_injection=%u",
@@ -2192,7 +2137,6 @@ static void default_options(struct f2fs_sb_info *sbi, bool remount)
        set_opt(sbi, INLINE_XATTR);
        set_opt(sbi, INLINE_DATA);
        set_opt(sbi, INLINE_DENTRY);
-       set_opt(sbi, NOHEAP);
        set_opt(sbi, MERGE_CHECKPOINT);
        F2FS_OPTION(sbi).unusable_cap = 0;
        sbi->sb->s_flags |= SB_LAZYTIME;
@@ -2247,6 +2191,7 @@ static int f2fs_disable_checkpoint(struct f2fs_sb_info *sbi)
                        .init_gc_type = FG_GC,
                        .should_migrate_blocks = false,
                        .err_gc_skipped = true,
+                       .no_bg_gc = true,
                        .nr_free_secs = 1 };
 
                f2fs_down_write(&sbi->gc_lock);
@@ -2332,7 +2277,6 @@ static int f2fs_remount(struct super_block *sb, int *flags, char *data)
        bool no_read_extent_cache = !test_opt(sbi, READ_EXTENT_CACHE);
        bool no_age_extent_cache = !test_opt(sbi, AGE_EXTENT_CACHE);
        bool enable_checkpoint = !test_opt(sbi, DISABLE_CHECKPOINT);
-       bool no_io_align = !F2FS_IO_ALIGNED(sbi);
        bool no_atgc = !test_opt(sbi, ATGC);
        bool no_discard = !test_opt(sbi, DISCARD);
        bool no_compress_cache = !test_opt(sbi, COMPRESS_CACHE);
@@ -2440,12 +2384,6 @@ static int f2fs_remount(struct super_block *sb, int *flags, char *data)
                goto restore_opts;
        }
 
-       if (no_io_align == !!F2FS_IO_ALIGNED(sbi)) {
-               err = -EINVAL;
-               f2fs_warn(sbi, "switch io_bits option is not allowed");
-               goto restore_opts;
-       }
-
        if (no_compress_cache == !!test_opt(sbi, COMPRESS_CACHE)) {
                err = -EINVAL;
                f2fs_warn(sbi, "switch compress_cache option is not allowed");
@@ -3706,7 +3644,7 @@ int f2fs_sanity_check_ckpt(struct f2fs_sb_info *sbi)
        }
 
        main_segs = le32_to_cpu(raw_super->segment_count_main);
-       blocks_per_seg = sbi->blocks_per_seg;
+       blocks_per_seg = BLKS_PER_SEG(sbi);
 
        for (i = 0; i < NR_CURSEG_NODE_TYPE; i++) {
                if (le32_to_cpu(ckpt->cur_node_segno[i]) >= main_segs ||
@@ -3818,9 +3756,9 @@ static void init_sb_info(struct f2fs_sb_info *sbi)
        sbi->segs_per_sec = le32_to_cpu(raw_super->segs_per_sec);
        sbi->secs_per_zone = le32_to_cpu(raw_super->secs_per_zone);
        sbi->total_sections = le32_to_cpu(raw_super->section_count);
-       sbi->total_node_count =
-               (le32_to_cpu(raw_super->segment_count_nat) / 2)
-                       * sbi->blocks_per_seg * NAT_ENTRY_PER_BLOCK;
+       sbi->total_node_count = SEGS_TO_BLKS(sbi,
+                       ((le32_to_cpu(raw_super->segment_count_nat) / 2) *
+                       NAT_ENTRY_PER_BLOCK));
        F2FS_ROOT_INO(sbi) = le32_to_cpu(raw_super->root_ino);
        F2FS_NODE_INO(sbi) = le32_to_cpu(raw_super->node_ino);
        F2FS_META_INO(sbi) = le32_to_cpu(raw_super->meta_ino);
@@ -3829,7 +3767,7 @@ static void init_sb_info(struct f2fs_sb_info *sbi)
        sbi->next_victim_seg[BG_GC] = NULL_SEGNO;
        sbi->next_victim_seg[FG_GC] = NULL_SEGNO;
        sbi->max_victim_search = DEF_MAX_VICTIM_SEARCH;
-       sbi->migration_granularity = sbi->segs_per_sec;
+       sbi->migration_granularity = SEGS_PER_SEC(sbi);
        sbi->seq_file_ra_mul = MIN_RA_MUL;
        sbi->max_fragment_chunk = DEF_FRAGMENT_SIZE;
        sbi->max_fragment_hole = DEF_FRAGMENT_SIZE;
@@ -3930,11 +3868,6 @@ static int init_blkz_info(struct f2fs_sb_info *sbi, int devi)
                return 0;
 
        zone_sectors = bdev_zone_sectors(bdev);
-       if (!is_power_of_2(zone_sectors)) {
-               f2fs_err(sbi, "F2FS does not support non power of 2 zone sizes\n");
-               return -EINVAL;
-       }
-
        if (sbi->blocks_per_blkz && sbi->blocks_per_blkz !=
                                SECTOR_TO_BLOCK(zone_sectors))
                return -EINVAL;
@@ -4090,7 +4023,9 @@ static void f2fs_record_stop_reason(struct f2fs_sb_info *sbi)
 
        f2fs_up_write(&sbi->sb_lock);
        if (err)
-               f2fs_err(sbi, "f2fs_commit_super fails to record err:%d", err);
+               f2fs_err_ratelimited(sbi,
+                       "f2fs_commit_super fails to record stop_reason, err:%d",
+                       err);
 }
 
 void f2fs_save_errors(struct f2fs_sb_info *sbi, unsigned char flag)
@@ -4133,8 +4068,9 @@ static void f2fs_record_errors(struct f2fs_sb_info *sbi, unsigned char error)
 
        err = f2fs_commit_super(sbi, false);
        if (err)
-               f2fs_err(sbi, "f2fs_commit_super fails to record errors:%u, err:%d",
-                                                               error, err);
+               f2fs_err_ratelimited(sbi,
+                       "f2fs_commit_super fails to record errors:%u, err:%d",
+                       error, err);
 out_unlock:
        f2fs_up_write(&sbi->sb_lock);
 }
@@ -4259,14 +4195,14 @@ static int f2fs_scan_devices(struct f2fs_sb_info *sbi)
                        if (i == 0) {
                                FDEV(i).start_blk = 0;
                                FDEV(i).end_blk = FDEV(i).start_blk +
-                                   (FDEV(i).total_segments <<
-                                   sbi->log_blocks_per_seg) - 1 +
-                                   le32_to_cpu(raw_super->segment0_blkaddr);
+                                       SEGS_TO_BLKS(sbi,
+                                       FDEV(i).total_segments) - 1 +
+                                       le32_to_cpu(raw_super->segment0_blkaddr);
                        } else {
                                FDEV(i).start_blk = FDEV(i - 1).end_blk + 1;
                                FDEV(i).end_blk = FDEV(i).start_blk +
-                                       (FDEV(i).total_segments <<
-                                       sbi->log_blocks_per_seg) - 1;
+                                               SEGS_TO_BLKS(sbi,
+                                               FDEV(i).total_segments) - 1;
                                FDEV(i).bdev_file = bdev_file_open_by_path(
                                        FDEV(i).path, mode, sbi->sb, NULL);
                        }
@@ -4305,8 +4241,6 @@ static int f2fs_scan_devices(struct f2fs_sb_info *sbi)
                          FDEV(i).total_segments,
                          FDEV(i).start_blk, FDEV(i).end_blk);
        }
-       f2fs_info(sbi,
-                 "IO Block Size: %8ld KB", F2FS_IO_SIZE_KB(sbi));
        return 0;
 }
 
@@ -4519,19 +4453,10 @@ try_onemore:
        if (err)
                goto free_iostat;
 
-       if (F2FS_IO_ALIGNED(sbi)) {
-               sbi->write_io_dummy =
-                       mempool_create_page_pool(2 * (F2FS_IO_SIZE(sbi) - 1), 0);
-               if (!sbi->write_io_dummy) {
-                       err = -ENOMEM;
-                       goto free_percpu;
-               }
-       }
-
        /* init per sbi slab cache */
        err = f2fs_init_xattr_caches(sbi);
        if (err)
-               goto free_io_dummy;
+               goto free_percpu;
        err = f2fs_init_page_array_cache(sbi);
        if (err)
                goto free_xattr_cache;
@@ -4619,10 +4544,6 @@ try_onemore:
                goto free_nm;
        }
 
-       err = adjust_reserved_segment(sbi);
-       if (err)
-               goto free_nm;
-
        /* For write statistics */
        sbi->sectors_written_start = f2fs_get_sectors_written(sbi);
 
@@ -4749,13 +4670,20 @@ reset_checkpoint:
         * If the f2fs is not readonly and fsync data recovery succeeds,
         * check zoned block devices' write pointer consistency.
         */
-       if (!err && !f2fs_readonly(sb) && f2fs_sb_has_blkzoned(sbi)) {
-               err = f2fs_check_write_pointer(sbi);
-               if (err)
-                       goto free_meta;
+       if (f2fs_sb_has_blkzoned(sbi) && !f2fs_readonly(sb)) {
+               int err2;
+
+               f2fs_notice(sbi, "Checking entire write pointers");
+               err2 = f2fs_check_write_pointer(sbi);
+               if (err2)
+                       err = err2;
        }
+       if (err)
+               goto free_meta;
 
-       f2fs_init_inmem_curseg(sbi);
+       err = f2fs_init_inmem_curseg(sbi);
+       if (err)
+               goto sync_free_meta;
 
        /* f2fs_recover_fsync_data() cleared this already */
        clear_sbi_flag(sbi, SBI_POR_DOING);
@@ -4854,8 +4782,6 @@ free_page_array_cache:
        f2fs_destroy_page_array_cache(sbi);
 free_xattr_cache:
        f2fs_destroy_xattr_caches(sbi);
-free_io_dummy:
-       mempool_destroy(sbi->write_io_dummy);
 free_percpu:
        destroy_percpu_info(sbi);
 free_iostat:
index a7ec55c7bb20ce136d41ac91cdea0c618062c9bb..a568ce96cf563130654333ccae5a69049050aae7 100644 (file)
@@ -493,8 +493,8 @@ out:
                spin_lock(&sbi->stat_lock);
                if (t > (unsigned long)(sbi->user_block_count -
                                F2FS_OPTION(sbi).root_reserved_blocks -
-                               sbi->blocks_per_seg *
-                               SM_I(sbi)->additional_reserved_segments)) {
+                               SEGS_TO_BLKS(sbi,
+                               SM_I(sbi)->additional_reserved_segments))) {
                        spin_unlock(&sbi->stat_lock);
                        return -EINVAL;
                }
@@ -551,7 +551,7 @@ out:
        }
 
        if (!strcmp(a->attr.name, "migration_granularity")) {
-               if (t == 0 || t > sbi->segs_per_sec)
+               if (t == 0 || t > SEGS_PER_SEC(sbi))
                        return -EINVAL;
        }
 
@@ -1492,6 +1492,50 @@ static int __maybe_unused discard_plist_seq_show(struct seq_file *seq,
        return 0;
 }
 
+static int __maybe_unused disk_map_seq_show(struct seq_file *seq,
+                                               void *offset)
+{
+       struct super_block *sb = seq->private;
+       struct f2fs_sb_info *sbi = F2FS_SB(sb);
+       int i;
+
+       seq_printf(seq, "Address Layout   : %5luB Block address (# of Segments)\n",
+                                       F2FS_BLKSIZE);
+       seq_printf(seq, " SB            : %12s\n", "0/1024B");
+       seq_printf(seq, " seg0_blkaddr  : 0x%010x\n", SEG0_BLKADDR(sbi));
+       seq_printf(seq, " Checkpoint    : 0x%010x (%10d)\n",
+                       le32_to_cpu(F2FS_RAW_SUPER(sbi)->cp_blkaddr), 2);
+       seq_printf(seq, " SIT           : 0x%010x (%10d)\n",
+                       SIT_I(sbi)->sit_base_addr,
+                       le32_to_cpu(F2FS_RAW_SUPER(sbi)->segment_count_sit));
+       seq_printf(seq, " NAT           : 0x%010x (%10d)\n",
+                       NM_I(sbi)->nat_blkaddr,
+                       le32_to_cpu(F2FS_RAW_SUPER(sbi)->segment_count_nat));
+       seq_printf(seq, " SSA           : 0x%010x (%10d)\n",
+                       SM_I(sbi)->ssa_blkaddr,
+                       le32_to_cpu(F2FS_RAW_SUPER(sbi)->segment_count_ssa));
+       seq_printf(seq, " Main          : 0x%010x (%10d)\n",
+                       SM_I(sbi)->main_blkaddr,
+                       le32_to_cpu(F2FS_RAW_SUPER(sbi)->segment_count_main));
+       seq_printf(seq, " # of Sections : %12d\n",
+                       le32_to_cpu(F2FS_RAW_SUPER(sbi)->section_count));
+       seq_printf(seq, " Segs/Sections : %12d\n",
+                       SEGS_PER_SEC(sbi));
+       seq_printf(seq, " Section size  : %12d MB\n",
+                       SEGS_PER_SEC(sbi) << 1);
+
+       if (!f2fs_is_multi_device(sbi))
+               return 0;
+
+       seq_puts(seq, "\nDisk Map for multi devices:\n");
+       for (i = 0; i < sbi->s_ndevs; i++)
+               seq_printf(seq, "Disk:%2d (zoned=%d): 0x%010x - 0x%010x on %s\n",
+                       i, bdev_is_zoned(FDEV(i).bdev),
+                       FDEV(i).start_blk, FDEV(i).end_blk,
+                       FDEV(i).path);
+       return 0;
+}
+
 int __init f2fs_init_sysfs(void)
 {
        int ret;
@@ -1573,6 +1617,8 @@ int f2fs_register_sysfs(struct f2fs_sb_info *sbi)
                                victim_bits_seq_show, sb);
        proc_create_single_data("discard_plist_info", 0444, sbi->s_proc,
                                discard_plist_seq_show, sb);
+       proc_create_single_data("disk_map", 0444, sbi->s_proc,
+                               disk_map_seq_show, sb);
        return 0;
 put_feature_list_kobj:
        kobject_put(&sbi->s_feature_list_kobj);
index 4fc95f353a7a96c6cc67c6ea5dfabea0cc591e9c..f7bb0c54502c8d57981ffe9f41850b5c196e9f2c 100644 (file)
@@ -258,21 +258,23 @@ static struct page *f2fs_read_merkle_tree_page(struct inode *inode,
                                               pgoff_t index,
                                               unsigned long num_ra_pages)
 {
-       struct page *page;
+       struct folio *folio;
 
        index += f2fs_verity_metadata_pos(inode) >> PAGE_SHIFT;
 
-       page = find_get_page_flags(inode->i_mapping, index, FGP_ACCESSED);
-       if (!page || !PageUptodate(page)) {
+       folio = __filemap_get_folio(inode->i_mapping, index, FGP_ACCESSED, 0);
+       if (IS_ERR(folio) || !folio_test_uptodate(folio)) {
                DEFINE_READAHEAD(ractl, NULL, NULL, inode->i_mapping, index);
 
-               if (page)
-                       put_page(page);
+               if (!IS_ERR(folio))
+                       folio_put(folio);
                else if (num_ra_pages > 1)
                        page_cache_ra_unbounded(&ractl, num_ra_pages, 0);
-               page = read_mapping_page(inode->i_mapping, index, NULL);
+               folio = read_mapping_folio(inode->i_mapping, index, NULL);
+               if (IS_ERR(folio))
+                       return ERR_CAST(folio);
        }
-       return page;
+       return folio_file_page(folio, index);
 }
 
 static int f2fs_write_merkle_tree_block(struct inode *inode, const void *buf,
index c52e63e10d35cd3ccacee22341bed2679df1db80..509eea96a457d41f63f04480da32aceae75a8a4a 100644 (file)
@@ -130,6 +130,12 @@ fat_encode_fh_nostale(struct inode *inode, __u32 *fh, int *lenp,
                fid->parent_i_gen = parent->i_generation;
                type = FILEID_FAT_WITH_PARENT;
                *lenp = FAT_FID_SIZE_WITH_PARENT;
+       } else {
+               /*
+                * We need to initialize this field because the fh is actually
+                * 12 bytes long
+                */
+               fid->parent_i_pos_hi = 0;
        }
 
        return type;
index 038ed0b9aaa5d619cf498c55ffd0069dce2abf14..8674dbfbe59dbf79c304c587b08ebba3cfe405be 100644 (file)
@@ -52,3 +52,14 @@ config FUSE_DAX
 
          If you want to allow mounting a Virtio Filesystem with the "dax"
          option, answer Y.
+
+config FUSE_PASSTHROUGH
+       bool "FUSE passthrough operations support"
+       default y
+       depends on FUSE_FS
+       select FS_STACK
+       help
+         This allows bypassing FUSE server by mapping specific FUSE operations
+         to be performed directly on a backing file.
+
+         If you want to allow passthrough operations, answer Y.
index 0c48b35c058d78d20966443de6a56928020729b4..6e0228c6d0cba9541c8668efb86b83094751d469 100644 (file)
@@ -8,6 +8,8 @@ obj-$(CONFIG_CUSE) += cuse.o
 obj-$(CONFIG_VIRTIO_FS) += virtiofs.o
 
 fuse-y := dev.o dir.o file.o inode.o control.o xattr.o acl.o readdir.o ioctl.o
+fuse-y += iomode.o
 fuse-$(CONFIG_FUSE_DAX) += dax.o
+fuse-$(CONFIG_FUSE_PASSTHROUGH) += passthrough.o
 
 virtiofs-y := virtio_fs.o
index 284a3500646296dab9a4dd8883159c8a24b9f82f..97ac994ff78f1daca5b8cc25ef2f5bb7210577db 100644 (file)
@@ -174,11 +174,7 @@ static ssize_t fuse_conn_congestion_threshold_write(struct file *file,
        if (!fc)
                goto out;
 
-       down_read(&fc->killsb);
-       spin_lock(&fc->bg_lock);
-       fc->congestion_threshold = val;
-       spin_unlock(&fc->bg_lock);
-       up_read(&fc->killsb);
+       WRITE_ONCE(fc->congestion_threshold, val);
        fuse_conn_put(fc);
 out:
        return ret;
index 1a8f82f478cb7aa4a01f7286ff2f1e939ee5caca..3ec8bb5e68ff59497481a84e4038fee093f08be8 100644 (file)
@@ -1775,6 +1775,61 @@ copy_finish:
        return err;
 }
 
+/*
+ * Resending all processing queue requests.
+ *
+ * During a FUSE daemon panics and failover, it is possible for some inflight
+ * requests to be lost and never returned. As a result, applications awaiting
+ * replies would become stuck forever. To address this, we can use notification
+ * to trigger resending of these pending requests to the FUSE daemon, ensuring
+ * they are properly processed again.
+ *
+ * Please note that this strategy is applicable only to idempotent requests or
+ * if the FUSE daemon takes careful measures to avoid processing duplicated
+ * non-idempotent requests.
+ */
+static void fuse_resend(struct fuse_conn *fc)
+{
+       struct fuse_dev *fud;
+       struct fuse_req *req, *next;
+       struct fuse_iqueue *fiq = &fc->iq;
+       LIST_HEAD(to_queue);
+       unsigned int i;
+
+       spin_lock(&fc->lock);
+       if (!fc->connected) {
+               spin_unlock(&fc->lock);
+               return;
+       }
+
+       list_for_each_entry(fud, &fc->devices, entry) {
+               struct fuse_pqueue *fpq = &fud->pq;
+
+               spin_lock(&fpq->lock);
+               for (i = 0; i < FUSE_PQ_HASH_SIZE; i++)
+                       list_splice_tail_init(&fpq->processing[i], &to_queue);
+               spin_unlock(&fpq->lock);
+       }
+       spin_unlock(&fc->lock);
+
+       list_for_each_entry_safe(req, next, &to_queue, list) {
+               __set_bit(FR_PENDING, &req->flags);
+               /* mark the request as resend request */
+               req->in.h.unique |= FUSE_UNIQUE_RESEND;
+       }
+
+       spin_lock(&fiq->lock);
+       /* iq and pq requests are both oldest to newest */
+       list_splice(&to_queue, &fiq->pending);
+       fiq->ops->wake_pending_and_unlock(fiq);
+}
+
+static int fuse_notify_resend(struct fuse_conn *fc)
+{
+       fuse_resend(fc);
+       return 0;
+}
+
 static int fuse_notify(struct fuse_conn *fc, enum fuse_notify_code code,
                       unsigned int size, struct fuse_copy_state *cs)
 {
@@ -1800,6 +1855,9 @@ static int fuse_notify(struct fuse_conn *fc, enum fuse_notify_code code,
        case FUSE_NOTIFY_DELETE:
                return fuse_notify_delete(fc, size, cs);
 
+       case FUSE_NOTIFY_RESEND:
+               return fuse_notify_resend(fc);
+
        default:
                fuse_copy_finish(cs);
                return -EINVAL;
@@ -2251,43 +2309,91 @@ static int fuse_device_clone(struct fuse_conn *fc, struct file *new)
        return 0;
 }
 
-static long fuse_dev_ioctl(struct file *file, unsigned int cmd,
-                          unsigned long arg)
+static long fuse_dev_ioctl_clone(struct file *file, __u32 __user *argp)
 {
        int res;
        int oldfd;
        struct fuse_dev *fud = NULL;
        struct fd f;
 
+       if (get_user(oldfd, argp))
+               return -EFAULT;
+
+       f = fdget(oldfd);
+       if (!f.file)
+               return -EINVAL;
+
+       /*
+        * Check against file->f_op because CUSE
+        * uses the same ioctl handler.
+        */
+       if (f.file->f_op == file->f_op)
+               fud = fuse_get_dev(f.file);
+
+       res = -EINVAL;
+       if (fud) {
+               mutex_lock(&fuse_mutex);
+               res = fuse_device_clone(fud->fc, file);
+               mutex_unlock(&fuse_mutex);
+       }
+
+       fdput(f);
+       return res;
+}
+
+static long fuse_dev_ioctl_backing_open(struct file *file,
+                                       struct fuse_backing_map __user *argp)
+{
+       struct fuse_dev *fud = fuse_get_dev(file);
+       struct fuse_backing_map map;
+
+       if (!fud)
+               return -EPERM;
+
+       if (!IS_ENABLED(CONFIG_FUSE_PASSTHROUGH))
+               return -EOPNOTSUPP;
+
+       if (copy_from_user(&map, argp, sizeof(map)))
+               return -EFAULT;
+
+       return fuse_backing_open(fud->fc, &map);
+}
+
+static long fuse_dev_ioctl_backing_close(struct file *file, __u32 __user *argp)
+{
+       struct fuse_dev *fud = fuse_get_dev(file);
+       int backing_id;
+
+       if (!fud)
+               return -EPERM;
+
+       if (!IS_ENABLED(CONFIG_FUSE_PASSTHROUGH))
+               return -EOPNOTSUPP;
+
+       if (get_user(backing_id, argp))
+               return -EFAULT;
+
+       return fuse_backing_close(fud->fc, backing_id);
+}
+
+static long fuse_dev_ioctl(struct file *file, unsigned int cmd,
+                          unsigned long arg)
+{
+       void __user *argp = (void __user *)arg;
+
        switch (cmd) {
        case FUSE_DEV_IOC_CLONE:
-               if (get_user(oldfd, (__u32 __user *)arg))
-                       return -EFAULT;
+               return fuse_dev_ioctl_clone(file, argp);
 
-               f = fdget(oldfd);
-               if (!f.file)
-                       return -EINVAL;
+       case FUSE_DEV_IOC_BACKING_OPEN:
+               return fuse_dev_ioctl_backing_open(file, argp);
+
+       case FUSE_DEV_IOC_BACKING_CLOSE:
+               return fuse_dev_ioctl_backing_close(file, argp);
 
-               /*
-                * Check against file->f_op because CUSE
-                * uses the same ioctl handler.
-                */
-               if (f.file->f_op == file->f_op)
-                       fud = fuse_get_dev(f.file);
-
-               res = -EINVAL;
-               if (fud) {
-                       mutex_lock(&fuse_mutex);
-                       res = fuse_device_clone(fud->fc, file);
-                       mutex_unlock(&fuse_mutex);
-               }
-               fdput(f);
-               break;
        default:
-               res = -ENOTTY;
-               break;
+               return -ENOTTY;
        }
-       return res;
 }
 
 const struct file_operations fuse_dev_operations = {
index d19cbf34c634174f36a1769f61c691060f32aaa3..4a6df591add61cd8960caa213e2102643bd2c8db 100644 (file)
@@ -391,6 +391,10 @@ int fuse_lookup_name(struct super_block *sb, u64 nodeid, const struct qstr *name
        err = -EIO;
        if (fuse_invalid_attr(&outarg->attr))
                goto out_put_forget;
+       if (outarg->nodeid == FUSE_ROOT_ID && outarg->generation != 0) {
+               pr_warn_once("root generation should be zero\n");
+               outarg->generation = 0;
+       }
 
        *inode = fuse_iget(sb, outarg->nodeid, outarg->generation,
                           &outarg->attr, ATTR_TIMEOUT(outarg),
@@ -615,7 +619,7 @@ static int fuse_create_open(struct inode *dir, struct dentry *entry,
        FUSE_ARGS(args);
        struct fuse_forget_link *forget;
        struct fuse_create_in inarg;
-       struct fuse_open_out outopen;
+       struct fuse_open_out *outopenp;
        struct fuse_entry_out outentry;
        struct fuse_inode *fi;
        struct fuse_file *ff;
@@ -630,7 +634,7 @@ static int fuse_create_open(struct inode *dir, struct dentry *entry,
                goto out_err;
 
        err = -ENOMEM;
-       ff = fuse_file_alloc(fm);
+       ff = fuse_file_alloc(fm, true);
        if (!ff)
                goto out_put_forget_req;
 
@@ -659,8 +663,10 @@ static int fuse_create_open(struct inode *dir, struct dentry *entry,
        args.out_numargs = 2;
        args.out_args[0].size = sizeof(outentry);
        args.out_args[0].value = &outentry;
-       args.out_args[1].size = sizeof(outopen);
-       args.out_args[1].value = &outopen;
+       /* Store outarg for fuse_finish_open() */
+       outopenp = &ff->args->open_outarg;
+       args.out_args[1].size = sizeof(*outopenp);
+       args.out_args[1].value = outopenp;
 
        err = get_create_ext(&args, dir, entry, mode);
        if (err)
@@ -676,9 +682,9 @@ static int fuse_create_open(struct inode *dir, struct dentry *entry,
            fuse_invalid_attr(&outentry.attr))
                goto out_free_ff;
 
-       ff->fh = outopen.fh;
+       ff->fh = outopenp->fh;
        ff->nodeid = outentry.nodeid;
-       ff->open_flags = outopen.open_flags;
+       ff->open_flags = outopenp->open_flags;
        inode = fuse_iget(dir->i_sb, outentry.nodeid, outentry.generation,
                          &outentry.attr, ATTR_TIMEOUT(&outentry), 0);
        if (!inode) {
@@ -692,13 +698,15 @@ static int fuse_create_open(struct inode *dir, struct dentry *entry,
        d_instantiate(entry, inode);
        fuse_change_entry_timeout(entry, &outentry);
        fuse_dir_changed(dir);
-       err = finish_open(file, entry, generic_file_open);
+       err = generic_file_open(inode, file);
+       if (!err) {
+               file->private_data = ff;
+               err = finish_open(file, entry, fuse_finish_open);
+       }
        if (err) {
                fi = get_fuse_inode(inode);
                fuse_sync_release(fi, ff, flags);
        } else {
-               file->private_data = ff;
-               fuse_finish_open(inode, file);
                if (fm->fc->atomic_o_trunc && trunc)
                        truncate_pagecache(inode, 0);
                else if (!(ff->open_flags & FOPEN_KEEP_CACHE))
@@ -1210,7 +1218,7 @@ static int fuse_do_statx(struct inode *inode, struct file *file,
        if (((sx->mask & STATX_SIZE) && !fuse_valid_size(sx->size)) ||
            ((sx->mask & STATX_TYPE) && (!fuse_valid_type(sx->mode) ||
                                         inode_wrong_type(inode, sx->mode)))) {
-               make_bad_inode(inode);
+               fuse_make_bad(inode);
                return -EIO;
        }
 
@@ -1485,7 +1493,7 @@ static int fuse_perm_getattr(struct inode *inode, int mask)
  *
  * 1) Local access checking ('default_permissions' mount option) based
  * on file mode.  This is the plain old disk filesystem permission
- * modell.
+ * model.
  *
  * 2) "Remote" access checking, where server is responsible for
  * checking permission in each inode operation.  An exception to this
@@ -1630,7 +1638,30 @@ out_err:
 
 static int fuse_dir_open(struct inode *inode, struct file *file)
 {
-       return fuse_open_common(inode, file, true);
+       struct fuse_mount *fm = get_fuse_mount(inode);
+       int err;
+
+       if (fuse_is_bad(inode))
+               return -EIO;
+
+       err = generic_file_open(inode, file);
+       if (err)
+               return err;
+
+       err = fuse_do_open(fm, get_node_id(inode), file, true);
+       if (!err) {
+               struct fuse_file *ff = file->private_data;
+
+               /*
+                * Keep handling FOPEN_STREAM and FOPEN_NONSEEKABLE for
+                * directories for backward compatibility, though it's unlikely
+                * to be useful.
+                */
+               if (ff->open_flags & (FOPEN_STREAM | FOPEN_NONSEEKABLE))
+                       nonseekable_open(inode, file);
+       }
+
+       return err;
 }
 
 static int fuse_dir_release(struct inode *inode, struct file *file)
index c007b0f0c3a7e73bf222eab2928fc86d544aa692..a56e7bffd0004e3755d648ad9b35f67d4eba863a 100644 (file)
@@ -20,6 +20,7 @@
 #include <linux/fs.h>
 #include <linux/filelock.h>
 #include <linux/splice.h>
+#include <linux/task_io_accounting_ops.h>
 
 static int fuse_send_open(struct fuse_mount *fm, u64 nodeid,
                          unsigned int open_flags, int opcode,
@@ -50,13 +51,7 @@ static int fuse_send_open(struct fuse_mount *fm, u64 nodeid,
        return fuse_simple_request(fm, &args);
 }
 
-struct fuse_release_args {
-       struct fuse_args args;
-       struct fuse_release_in inarg;
-       struct inode *inode;
-};
-
-struct fuse_file *fuse_file_alloc(struct fuse_mount *fm)
+struct fuse_file *fuse_file_alloc(struct fuse_mount *fm, bool release)
 {
        struct fuse_file *ff;
 
@@ -65,15 +60,15 @@ struct fuse_file *fuse_file_alloc(struct fuse_mount *fm)
                return NULL;
 
        ff->fm = fm;
-       ff->release_args = kzalloc(sizeof(*ff->release_args),
-                                  GFP_KERNEL_ACCOUNT);
-       if (!ff->release_args) {
-               kfree(ff);
-               return NULL;
+       if (release) {
+               ff->args = kzalloc(sizeof(*ff->args), GFP_KERNEL_ACCOUNT);
+               if (!ff->args) {
+                       kfree(ff);
+                       return NULL;
+               }
        }
 
        INIT_LIST_HEAD(&ff->write_entry);
-       mutex_init(&ff->readdir.lock);
        refcount_set(&ff->count, 1);
        RB_CLEAR_NODE(&ff->polled_node);
        init_waitqueue_head(&ff->poll_wait);
@@ -85,8 +80,7 @@ struct fuse_file *fuse_file_alloc(struct fuse_mount *fm)
 
 void fuse_file_free(struct fuse_file *ff)
 {
-       kfree(ff->release_args);
-       mutex_destroy(&ff->readdir.lock);
+       kfree(ff->args);
        kfree(ff);
 }
 
@@ -105,14 +99,17 @@ static void fuse_release_end(struct fuse_mount *fm, struct fuse_args *args,
        kfree(ra);
 }
 
-static void fuse_file_put(struct fuse_file *ff, bool sync, bool isdir)
+static void fuse_file_put(struct fuse_file *ff, bool sync)
 {
        if (refcount_dec_and_test(&ff->count)) {
-               struct fuse_args *args = &ff->release_args->args;
+               struct fuse_release_args *ra = &ff->args->release_args;
+               struct fuse_args *args = (ra ? &ra->args : NULL);
 
-               if (isdir ? ff->fm->fc->no_opendir : ff->fm->fc->no_open) {
-                       /* Do nothing when client does not implement 'open' */
-                       fuse_release_end(ff->fm, args, 0);
+               if (ra && ra->inode)
+                       fuse_file_io_release(ff, ra->inode);
+
+               if (!args) {
+                       /* Do nothing when server does not implement 'open' */
                } else if (sync) {
                        fuse_simple_request(ff->fm, args);
                        fuse_release_end(ff->fm, args, 0);
@@ -132,27 +129,31 @@ struct fuse_file *fuse_file_open(struct fuse_mount *fm, u64 nodeid,
        struct fuse_conn *fc = fm->fc;
        struct fuse_file *ff;
        int opcode = isdir ? FUSE_OPENDIR : FUSE_OPEN;
+       bool open = isdir ? !fc->no_opendir : !fc->no_open;
 
-       ff = fuse_file_alloc(fm);
+       ff = fuse_file_alloc(fm, open);
        if (!ff)
                return ERR_PTR(-ENOMEM);
 
        ff->fh = 0;
        /* Default for no-open */
        ff->open_flags = FOPEN_KEEP_CACHE | (isdir ? FOPEN_CACHE_DIR : 0);
-       if (isdir ? !fc->no_opendir : !fc->no_open) {
-               struct fuse_open_out outarg;
+       if (open) {
+               /* Store outarg for fuse_finish_open() */
+               struct fuse_open_out *outargp = &ff->args->open_outarg;
                int err;
 
-               err = fuse_send_open(fm, nodeid, open_flags, opcode, &outarg);
+               err = fuse_send_open(fm, nodeid, open_flags, opcode, outargp);
                if (!err) {
-                       ff->fh = outarg.fh;
-                       ff->open_flags = outarg.open_flags;
-
+                       ff->fh = outargp->fh;
+                       ff->open_flags = outargp->open_flags;
                } else if (err != -ENOSYS) {
                        fuse_file_free(ff);
                        return ERR_PTR(err);
                } else {
+                       /* No release needed */
+                       kfree(ff->args);
+                       ff->args = NULL;
                        if (isdir)
                                fc->no_opendir = 1;
                        else
@@ -195,40 +196,50 @@ static void fuse_link_write_file(struct file *file)
        spin_unlock(&fi->lock);
 }
 
-void fuse_finish_open(struct inode *inode, struct file *file)
+int fuse_finish_open(struct inode *inode, struct file *file)
 {
        struct fuse_file *ff = file->private_data;
        struct fuse_conn *fc = get_fuse_conn(inode);
+       int err;
+
+       err = fuse_file_io_open(file, inode);
+       if (err)
+               return err;
 
        if (ff->open_flags & FOPEN_STREAM)
                stream_open(inode, file);
        else if (ff->open_flags & FOPEN_NONSEEKABLE)
                nonseekable_open(inode, file);
 
-       if (fc->atomic_o_trunc && (file->f_flags & O_TRUNC)) {
-               struct fuse_inode *fi = get_fuse_inode(inode);
-
-               spin_lock(&fi->lock);
-               fi->attr_version = atomic64_inc_return(&fc->attr_version);
-               i_size_write(inode, 0);
-               spin_unlock(&fi->lock);
-               file_update_time(file);
-               fuse_invalidate_attr_mask(inode, FUSE_STATX_MODSIZE);
-       }
        if ((file->f_mode & FMODE_WRITE) && fc->writeback_cache)
                fuse_link_write_file(file);
+
+       return 0;
 }
 
-int fuse_open_common(struct inode *inode, struct file *file, bool isdir)
+static void fuse_truncate_update_attr(struct inode *inode, struct file *file)
+{
+       struct fuse_conn *fc = get_fuse_conn(inode);
+       struct fuse_inode *fi = get_fuse_inode(inode);
+
+       spin_lock(&fi->lock);
+       fi->attr_version = atomic64_inc_return(&fc->attr_version);
+       i_size_write(inode, 0);
+       spin_unlock(&fi->lock);
+       file_update_time(file);
+       fuse_invalidate_attr_mask(inode, FUSE_STATX_MODSIZE);
+}
+
+static int fuse_open(struct inode *inode, struct file *file)
 {
        struct fuse_mount *fm = get_fuse_mount(inode);
+       struct fuse_inode *fi = get_fuse_inode(inode);
        struct fuse_conn *fc = fm->fc;
+       struct fuse_file *ff;
        int err;
-       bool is_wb_truncate = (file->f_flags & O_TRUNC) &&
-                         fc->atomic_o_trunc &&
-                         fc->writeback_cache;
-       bool dax_truncate = (file->f_flags & O_TRUNC) &&
-                         fc->atomic_o_trunc && FUSE_IS_DAX(inode);
+       bool is_truncate = (file->f_flags & O_TRUNC) && fc->atomic_o_trunc;
+       bool is_wb_truncate = is_truncate && fc->writeback_cache;
+       bool dax_truncate = is_truncate && FUSE_IS_DAX(inode);
 
        if (fuse_is_bad(inode))
                return -EIO;
@@ -250,16 +261,20 @@ int fuse_open_common(struct inode *inode, struct file *file, bool isdir)
        if (is_wb_truncate || dax_truncate)
                fuse_set_nowrite(inode);
 
-       err = fuse_do_open(fm, get_node_id(inode), file, isdir);
-       if (!err)
-               fuse_finish_open(inode, file);
+       err = fuse_do_open(fm, get_node_id(inode), file, false);
+       if (!err) {
+               ff = file->private_data;
+               err = fuse_finish_open(inode, file);
+               if (err)
+                       fuse_sync_release(fi, ff, file->f_flags);
+               else if (is_truncate)
+                       fuse_truncate_update_attr(inode, file);
+       }
 
        if (is_wb_truncate || dax_truncate)
                fuse_release_nowrite(inode);
        if (!err) {
-               struct fuse_file *ff = file->private_data;
-
-               if (fc->atomic_o_trunc && (file->f_flags & O_TRUNC))
+               if (is_truncate)
                        truncate_pagecache(inode, 0);
                else if (!(ff->open_flags & FOPEN_KEEP_CACHE))
                        invalidate_inode_pages2(inode->i_mapping);
@@ -274,10 +289,13 @@ out_inode_unlock:
 }
 
 static void fuse_prepare_release(struct fuse_inode *fi, struct fuse_file *ff,
-                                unsigned int flags, int opcode)
+                                unsigned int flags, int opcode, bool sync)
 {
        struct fuse_conn *fc = ff->fm->fc;
-       struct fuse_release_args *ra = ff->release_args;
+       struct fuse_release_args *ra = &ff->args->release_args;
+
+       if (fuse_file_passthrough(ff))
+               fuse_passthrough_release(ff, fuse_inode_backing(fi));
 
        /* Inode is NULL on error path of fuse_create_open() */
        if (likely(fi)) {
@@ -292,6 +310,11 @@ static void fuse_prepare_release(struct fuse_inode *fi, struct fuse_file *ff,
 
        wake_up_interruptible_all(&ff->poll_wait);
 
+       if (!ra)
+               return;
+
+       /* ff->args was used for open outarg */
+       memset(ff->args, 0, sizeof(*ff->args));
        ra->inarg.fh = ff->fh;
        ra->inarg.flags = flags;
        ra->args.in_numargs = 1;
@@ -301,23 +324,28 @@ static void fuse_prepare_release(struct fuse_inode *fi, struct fuse_file *ff,
        ra->args.nodeid = ff->nodeid;
        ra->args.force = true;
        ra->args.nocreds = true;
+
+       /*
+        * Hold inode until release is finished.
+        * From fuse_sync_release() the refcount is 1 and everything's
+        * synchronous, so we are fine with not doing igrab() here.
+        */
+       ra->inode = sync ? NULL : igrab(&fi->inode);
 }
 
 void fuse_file_release(struct inode *inode, struct fuse_file *ff,
                       unsigned int open_flags, fl_owner_t id, bool isdir)
 {
        struct fuse_inode *fi = get_fuse_inode(inode);
-       struct fuse_release_args *ra = ff->release_args;
+       struct fuse_release_args *ra = &ff->args->release_args;
        int opcode = isdir ? FUSE_RELEASEDIR : FUSE_RELEASE;
 
-       fuse_prepare_release(fi, ff, open_flags, opcode);
+       fuse_prepare_release(fi, ff, open_flags, opcode, false);
 
-       if (ff->flock) {
+       if (ra && ff->flock) {
                ra->inarg.release_flags |= FUSE_RELEASE_FLOCK_UNLOCK;
                ra->inarg.lock_owner = fuse_lock_owner_id(ff->fm->fc, id);
        }
-       /* Hold inode until release is finished */
-       ra->inode = igrab(inode);
 
        /*
         * Normally this will send the RELEASE request, however if
@@ -328,7 +356,7 @@ void fuse_file_release(struct inode *inode, struct fuse_file *ff,
         * synchronous RELEASE is allowed (and desirable) in this case
         * because the server can be trusted not to screw up.
         */
-       fuse_file_put(ff, ff->fm->fc->destroy, isdir);
+       fuse_file_put(ff, ff->fm->fc->destroy);
 }
 
 void fuse_release_common(struct file *file, bool isdir)
@@ -337,11 +365,6 @@ void fuse_release_common(struct file *file, bool isdir)
                          (fl_owner_t) file, isdir);
 }
 
-static int fuse_open(struct inode *inode, struct file *file)
-{
-       return fuse_open_common(inode, file, false);
-}
-
 static int fuse_release(struct inode *inode, struct file *file)
 {
        struct fuse_conn *fc = get_fuse_conn(inode);
@@ -363,12 +386,8 @@ void fuse_sync_release(struct fuse_inode *fi, struct fuse_file *ff,
                       unsigned int flags)
 {
        WARN_ON(refcount_read(&ff->count) > 1);
-       fuse_prepare_release(fi, ff, flags, FUSE_RELEASE);
-       /*
-        * iput(NULL) is a no-op and since the refcount is 1 and everything's
-        * synchronous, we are fine with not doing igrab() here"
-        */
-       fuse_file_put(ff, true, false);
+       fuse_prepare_release(fi, ff, flags, FUSE_RELEASE, true);
+       fuse_file_put(ff, true);
 }
 EXPORT_SYMBOL_GPL(fuse_sync_release);
 
@@ -634,7 +653,8 @@ static void fuse_release_user_pages(struct fuse_args_pages *ap,
        for (i = 0; i < ap->num_pages; i++) {
                if (should_dirty)
                        set_page_dirty_lock(ap->pages[i]);
-               put_page(ap->pages[i]);
+               if (ap->args.is_pinned)
+                       unpin_user_page(ap->pages[i]);
        }
 }
 
@@ -925,7 +945,7 @@ static void fuse_readpages_end(struct fuse_mount *fm, struct fuse_args *args,
                put_page(page);
        }
        if (ia->ff)
-               fuse_file_put(ia->ff, false, false);
+               fuse_file_put(ia->ff, false);
 
        fuse_io_free(ia);
 }
@@ -1299,13 +1319,93 @@ static ssize_t fuse_perform_write(struct kiocb *iocb, struct iov_iter *ii)
        return res;
 }
 
+static bool fuse_io_past_eof(struct kiocb *iocb, struct iov_iter *iter)
+{
+       struct inode *inode = file_inode(iocb->ki_filp);
+
+       return iocb->ki_pos + iov_iter_count(iter) > i_size_read(inode);
+}
+
+/*
+ * @return true if an exclusive lock for direct IO writes is needed
+ */
+static bool fuse_dio_wr_exclusive_lock(struct kiocb *iocb, struct iov_iter *from)
+{
+       struct file *file = iocb->ki_filp;
+       struct fuse_file *ff = file->private_data;
+       struct inode *inode = file_inode(iocb->ki_filp);
+       struct fuse_inode *fi = get_fuse_inode(inode);
+
+       /* Server side has to advise that it supports parallel dio writes. */
+       if (!(ff->open_flags & FOPEN_PARALLEL_DIRECT_WRITES))
+               return true;
+
+       /*
+        * Append will need to know the eventual EOF - always needs an
+        * exclusive lock.
+        */
+       if (iocb->ki_flags & IOCB_APPEND)
+               return true;
+
+       /* shared locks are not allowed with parallel page cache IO */
+       if (test_bit(FUSE_I_CACHE_IO_MODE, &fi->state))
+               return false;
+
+       /* Parallel dio beyond EOF is not supported, at least for now. */
+       if (fuse_io_past_eof(iocb, from))
+               return true;
+
+       return false;
+}
+
+static void fuse_dio_lock(struct kiocb *iocb, struct iov_iter *from,
+                         bool *exclusive)
+{
+       struct inode *inode = file_inode(iocb->ki_filp);
+       struct fuse_file *ff = iocb->ki_filp->private_data;
+
+       *exclusive = fuse_dio_wr_exclusive_lock(iocb, from);
+       if (*exclusive) {
+               inode_lock(inode);
+       } else {
+               inode_lock_shared(inode);
+               /*
+                * New parallal dio allowed only if inode is not in caching
+                * mode and denies new opens in caching mode. This check
+                * should be performed only after taking shared inode lock.
+                * Previous past eof check was without inode lock and might
+                * have raced, so check it again.
+                */
+               if (fuse_io_past_eof(iocb, from) ||
+                   fuse_file_uncached_io_start(inode, ff, NULL) != 0) {
+                       inode_unlock_shared(inode);
+                       inode_lock(inode);
+                       *exclusive = true;
+               }
+       }
+}
+
+static void fuse_dio_unlock(struct kiocb *iocb, bool exclusive)
+{
+       struct inode *inode = file_inode(iocb->ki_filp);
+       struct fuse_file *ff = iocb->ki_filp->private_data;
+
+       if (exclusive) {
+               inode_unlock(inode);
+       } else {
+               /* Allow opens in caching mode after last parallel dio end */
+               fuse_file_uncached_io_end(inode, ff);
+               inode_unlock_shared(inode);
+       }
+}
+
 static ssize_t fuse_cache_write_iter(struct kiocb *iocb, struct iov_iter *from)
 {
        struct file *file = iocb->ki_filp;
        struct address_space *mapping = file->f_mapping;
        ssize_t written = 0;
        struct inode *inode = mapping->host;
-       ssize_t err;
+       ssize_t err, count;
        struct fuse_conn *fc = get_fuse_conn(inode);
 
        if (fc->writeback_cache) {
@@ -1327,10 +1427,12 @@ static ssize_t fuse_cache_write_iter(struct kiocb *iocb, struct iov_iter *from)
 writethrough:
        inode_lock(inode);
 
-       err = generic_write_checks(iocb, from);
+       err = count = generic_write_checks(iocb, from);
        if (err <= 0)
                goto out;
 
+       task_io_account_write(count);
+
        err = file_remove_privs(file);
        if (err)
                goto out;
@@ -1392,10 +1494,13 @@ static int fuse_get_user_pages(struct fuse_args_pages *ap, struct iov_iter *ii,
        while (nbytes < *nbytesp && ap->num_pages < max_pages) {
                unsigned npages;
                size_t start;
-               ret = iov_iter_get_pages2(ii, &ap->pages[ap->num_pages],
-                                       *nbytesp - nbytes,
-                                       max_pages - ap->num_pages,
-                                       &start);
+               struct page **pt_pages;
+
+               pt_pages = &ap->pages[ap->num_pages];
+               ret = iov_iter_extract_pages(ii, &pt_pages,
+                                            *nbytesp - nbytes,
+                                            max_pages - ap->num_pages,
+                                            0, &start);
                if (ret < 0)
                        break;
 
@@ -1412,6 +1517,7 @@ static int fuse_get_user_pages(struct fuse_args_pages *ap, struct iov_iter *ii,
                        (PAGE_SIZE - ret) & (PAGE_SIZE - 1);
        }
 
+       ap->args.is_pinned = iov_iter_extract_will_pin(ii);
        ap->args.user_pages = true;
        if (write)
                ap->args.in_pages = true;
@@ -1558,51 +1664,17 @@ static ssize_t fuse_direct_read_iter(struct kiocb *iocb, struct iov_iter *to)
        return res;
 }
 
-static bool fuse_direct_write_extending_i_size(struct kiocb *iocb,
-                                              struct iov_iter *iter)
-{
-       struct inode *inode = file_inode(iocb->ki_filp);
-
-       return iocb->ki_pos + iov_iter_count(iter) > i_size_read(inode);
-}
-
 static ssize_t fuse_direct_write_iter(struct kiocb *iocb, struct iov_iter *from)
 {
        struct inode *inode = file_inode(iocb->ki_filp);
-       struct file *file = iocb->ki_filp;
-       struct fuse_file *ff = file->private_data;
        struct fuse_io_priv io = FUSE_IO_PRIV_SYNC(iocb);
        ssize_t res;
-       bool exclusive_lock =
-               !(ff->open_flags & FOPEN_PARALLEL_DIRECT_WRITES) ||
-               get_fuse_conn(inode)->direct_io_allow_mmap ||
-               iocb->ki_flags & IOCB_APPEND ||
-               fuse_direct_write_extending_i_size(iocb, from);
-
-       /*
-        * Take exclusive lock if
-        * - Parallel direct writes are disabled - a user space decision
-        * - Parallel direct writes are enabled and i_size is being extended.
-        * - Shared mmap on direct_io file is supported (FUSE_DIRECT_IO_ALLOW_MMAP).
-        *   This might not be needed at all, but needs further investigation.
-        */
-       if (exclusive_lock)
-               inode_lock(inode);
-       else {
-               inode_lock_shared(inode);
-
-               /* A race with truncate might have come up as the decision for
-                * the lock type was done without holding the lock, check again.
-                */
-               if (fuse_direct_write_extending_i_size(iocb, from)) {
-                       inode_unlock_shared(inode);
-                       inode_lock(inode);
-                       exclusive_lock = true;
-               }
-       }
+       bool exclusive;
 
+       fuse_dio_lock(iocb, from, &exclusive);
        res = generic_write_checks(iocb, from);
        if (res > 0) {
+               task_io_account_write(res);
                if (!is_sync_kiocb(iocb) && iocb->ki_flags & IOCB_DIRECT) {
                        res = fuse_direct_IO(iocb, from);
                } else {
@@ -1611,10 +1683,7 @@ static ssize_t fuse_direct_write_iter(struct kiocb *iocb, struct iov_iter *from)
                        fuse_write_update_attr(inode, iocb->ki_pos, res);
                }
        }
-       if (exclusive_lock)
-               inode_unlock(inode);
-       else
-               inode_unlock_shared(inode);
+       fuse_dio_unlock(iocb, exclusive);
 
        return res;
 }
@@ -1631,10 +1700,13 @@ static ssize_t fuse_file_read_iter(struct kiocb *iocb, struct iov_iter *to)
        if (FUSE_IS_DAX(inode))
                return fuse_dax_read_iter(iocb, to);
 
-       if (!(ff->open_flags & FOPEN_DIRECT_IO))
-               return fuse_cache_read_iter(iocb, to);
-       else
+       /* FOPEN_DIRECT_IO overrides FOPEN_PASSTHROUGH */
+       if (ff->open_flags & FOPEN_DIRECT_IO)
                return fuse_direct_read_iter(iocb, to);
+       else if (fuse_file_passthrough(ff))
+               return fuse_passthrough_read_iter(iocb, to);
+       else
+               return fuse_cache_read_iter(iocb, to);
 }
 
 static ssize_t fuse_file_write_iter(struct kiocb *iocb, struct iov_iter *from)
@@ -1649,10 +1721,38 @@ static ssize_t fuse_file_write_iter(struct kiocb *iocb, struct iov_iter *from)
        if (FUSE_IS_DAX(inode))
                return fuse_dax_write_iter(iocb, from);
 
-       if (!(ff->open_flags & FOPEN_DIRECT_IO))
+       /* FOPEN_DIRECT_IO overrides FOPEN_PASSTHROUGH */
+       if (ff->open_flags & FOPEN_DIRECT_IO)
+               return fuse_direct_write_iter(iocb, from);
+       else if (fuse_file_passthrough(ff))
+               return fuse_passthrough_write_iter(iocb, from);
+       else
                return fuse_cache_write_iter(iocb, from);
+}
+
+static ssize_t fuse_splice_read(struct file *in, loff_t *ppos,
+                               struct pipe_inode_info *pipe, size_t len,
+                               unsigned int flags)
+{
+       struct fuse_file *ff = in->private_data;
+
+       /* FOPEN_DIRECT_IO overrides FOPEN_PASSTHROUGH */
+       if (fuse_file_passthrough(ff) && !(ff->open_flags & FOPEN_DIRECT_IO))
+               return fuse_passthrough_splice_read(in, ppos, pipe, len, flags);
        else
-               return fuse_direct_write_iter(iocb, from);
+               return filemap_splice_read(in, ppos, pipe, len, flags);
+}
+
+static ssize_t fuse_splice_write(struct pipe_inode_info *pipe, struct file *out,
+                                loff_t *ppos, size_t len, unsigned int flags)
+{
+       struct fuse_file *ff = out->private_data;
+
+       /* FOPEN_DIRECT_IO overrides FOPEN_PASSTHROUGH */
+       if (fuse_file_passthrough(ff) && !(ff->open_flags & FOPEN_DIRECT_IO))
+               return fuse_passthrough_splice_write(pipe, out, ppos, len, flags);
+       else
+               return iter_file_splice_write(pipe, out, ppos, len, flags);
 }
 
 static void fuse_writepage_free(struct fuse_writepage_args *wpa)
@@ -1667,7 +1767,7 @@ static void fuse_writepage_free(struct fuse_writepage_args *wpa)
                __free_page(ap->pages[i]);
 
        if (wpa->ia.ff)
-               fuse_file_put(wpa->ia.ff, false, false);
+               fuse_file_put(wpa->ia.ff, false);
 
        kfree(ap->pages);
        kfree(wpa);
@@ -1909,7 +2009,7 @@ int fuse_write_inode(struct inode *inode, struct writeback_control *wbc)
        ff = __fuse_write_file_get(fi);
        err = fuse_flush_times(inode, ff);
        if (ff)
-               fuse_file_put(ff, false, false);
+               fuse_file_put(ff, false);
 
        return err;
 }
@@ -1947,26 +2047,26 @@ static void fuse_writepage_add_to_bucket(struct fuse_conn *fc,
        rcu_read_unlock();
 }
 
-static int fuse_writepage_locked(struct page *page)
+static int fuse_writepage_locked(struct folio *folio)
 {
-       struct address_space *mapping = page->mapping;
+       struct address_space *mapping = folio->mapping;
        struct inode *inode = mapping->host;
        struct fuse_conn *fc = get_fuse_conn(inode);
        struct fuse_inode *fi = get_fuse_inode(inode);
        struct fuse_writepage_args *wpa;
        struct fuse_args_pages *ap;
-       struct page *tmp_page;
+       struct folio *tmp_folio;
        int error = -ENOMEM;
 
-       set_page_writeback(page);
+       folio_start_writeback(folio);
 
        wpa = fuse_writepage_args_alloc();
        if (!wpa)
                goto err;
        ap = &wpa->ia.ap;
 
-       tmp_page = alloc_page(GFP_NOFS | __GFP_HIGHMEM);
-       if (!tmp_page)
+       tmp_folio = folio_alloc(GFP_NOFS | __GFP_HIGHMEM, 0);
+       if (!tmp_folio)
                goto err_free;
 
        error = -EIO;
@@ -1975,21 +2075,21 @@ static int fuse_writepage_locked(struct page *page)
                goto err_nofile;
 
        fuse_writepage_add_to_bucket(fc, wpa);
-       fuse_write_args_fill(&wpa->ia, wpa->ia.ff, page_offset(page), 0);
+       fuse_write_args_fill(&wpa->ia, wpa->ia.ff, folio_pos(folio), 0);
 
-       copy_highpage(tmp_page, page);
+       folio_copy(tmp_folio, folio);
        wpa->ia.write.in.write_flags |= FUSE_WRITE_CACHE;
        wpa->next = NULL;
        ap->args.in_pages = true;
        ap->num_pages = 1;
-       ap->pages[0] = tmp_page;
+       ap->pages[0] = &tmp_folio->page;
        ap->descs[0].offset = 0;
        ap->descs[0].length = PAGE_SIZE;
        ap->args.end = fuse_writepage_end;
        wpa->inode = inode;
 
        inc_wb_stat(&inode_to_bdi(inode)->wb, WB_WRITEBACK);
-       inc_node_page_state(tmp_page, NR_WRITEBACK_TEMP);
+       node_stat_add_folio(tmp_folio, NR_WRITEBACK_TEMP);
 
        spin_lock(&fi->lock);
        tree_insert(&fi->writepages, wpa);
@@ -1997,48 +2097,20 @@ static int fuse_writepage_locked(struct page *page)
        fuse_flush_writepages(inode);
        spin_unlock(&fi->lock);
 
-       end_page_writeback(page);
+       folio_end_writeback(folio);
 
        return 0;
 
 err_nofile:
-       __free_page(tmp_page);
+       folio_put(tmp_folio);
 err_free:
        kfree(wpa);
 err:
-       mapping_set_error(page->mapping, error);
-       end_page_writeback(page);
+       mapping_set_error(folio->mapping, error);
+       folio_end_writeback(folio);
        return error;
 }
 
-static int fuse_writepage(struct page *page, struct writeback_control *wbc)
-{
-       struct fuse_conn *fc = get_fuse_conn(page->mapping->host);
-       int err;
-
-       if (fuse_page_is_writeback(page->mapping->host, page->index)) {
-               /*
-                * ->writepages() should be called for sync() and friends.  We
-                * should only get here on direct reclaim and then we are
-                * allowed to skip a page which is already in flight
-                */
-               WARN_ON(wbc->sync_mode == WB_SYNC_ALL);
-
-               redirty_page_for_writepage(wbc, page);
-               unlock_page(page);
-               return 0;
-       }
-
-       if (wbc->sync_mode == WB_SYNC_NONE &&
-           fc->num_background >= fc->congestion_threshold)
-               return AOP_WRITEPAGE_ACTIVATE;
-
-       err = fuse_writepage_locked(page);
-       unlock_page(page);
-
-       return err;
-}
-
 struct fuse_fill_wb_data {
        struct fuse_writepage_args *wpa;
        struct fuse_file *ff;
@@ -2307,7 +2379,7 @@ static int fuse_writepages(struct address_space *mapping,
                fuse_writepages_send(&data);
        }
        if (data.ff)
-               fuse_file_put(data.ff, false, false);
+               fuse_file_put(data.ff, false);
 
        kfree(data.orig_pages);
 out:
@@ -2401,7 +2473,7 @@ static int fuse_launder_folio(struct folio *folio)
 
                /* Serialize with pending writeback for the same page */
                fuse_wait_on_page_writeback(inode, folio->index);
-               err = fuse_writepage_locked(&folio->page);
+               err = fuse_writepage_locked(folio);
                if (!err)
                        fuse_wait_on_page_writeback(inode, folio->index);
        }
@@ -2462,13 +2534,30 @@ static int fuse_file_mmap(struct file *file, struct vm_area_struct *vma)
 {
        struct fuse_file *ff = file->private_data;
        struct fuse_conn *fc = ff->fm->fc;
+       struct inode *inode = file_inode(file);
+       int rc;
 
        /* DAX mmap is superior to direct_io mmap */
-       if (FUSE_IS_DAX(file_inode(file)))
+       if (FUSE_IS_DAX(inode))
                return fuse_dax_mmap(file, vma);
 
+       /*
+        * If inode is in passthrough io mode, because it has some file open
+        * in passthrough mode, either mmap to backing file or fail mmap,
+        * because mixing cached mmap and passthrough io mode is not allowed.
+        */
+       if (fuse_file_passthrough(ff))
+               return fuse_passthrough_mmap(file, vma);
+       else if (fuse_inode_backing(get_fuse_inode(inode)))
+               return -ENODEV;
+
+       /*
+        * FOPEN_DIRECT_IO handling is special compared to O_DIRECT,
+        * as does not allow MAP_SHARED mmap without FUSE_DIRECT_IO_ALLOW_MMAP.
+        */
        if (ff->open_flags & FOPEN_DIRECT_IO) {
-               /* Can't provide the coherency needed for MAP_SHARED
+               /*
+                * Can't provide the coherency needed for MAP_SHARED
                 * if FUSE_DIRECT_IO_ALLOW_MMAP isn't set.
                 */
                if ((vma->vm_flags & VM_MAYSHARE) && !fc->direct_io_allow_mmap)
@@ -2476,7 +2565,19 @@ static int fuse_file_mmap(struct file *file, struct vm_area_struct *vma)
 
                invalidate_inode_pages2(file->f_mapping);
 
-               return generic_file_mmap(file, vma);
+               if (!(vma->vm_flags & VM_MAYSHARE)) {
+                       /* MAP_PRIVATE */
+                       return generic_file_mmap(file, vma);
+               }
+
+               /*
+                * First mmap of direct_io file enters caching inode io mode.
+                * Also waits for parallel dio writers to go into serial mode
+                * (exclusive instead of shared lock).
+                */
+               rc = fuse_file_cached_io_start(inode, ff);
+               if (rc)
+                       return rc;
        }
 
        if ((vma->vm_flags & VM_SHARED) && (vma->vm_flags & VM_MAYWRITE))
@@ -2580,10 +2681,6 @@ static int fuse_setlk(struct file *file, struct file_lock *fl, int flock)
                return -ENOLCK;
        }
 
-       /* Unlock on close is handled by the flush method */
-       if ((fl->c.flc_flags & FL_CLOSE_POSIX) == FL_CLOSE_POSIX)
-               return 0;
-
        fuse_lk_fill(&args, file, fl, opcode, pid_nr, flock, &inarg);
        err = fuse_simple_request(fm, &args);
 
@@ -3213,8 +3310,8 @@ static const struct file_operations fuse_file_operations = {
        .lock           = fuse_file_lock,
        .get_unmapped_area = thp_get_unmapped_area,
        .flock          = fuse_file_flock,
-       .splice_read    = filemap_splice_read,
-       .splice_write   = iter_file_splice_write,
+       .splice_read    = fuse_splice_read,
+       .splice_write   = fuse_splice_write,
        .unlocked_ioctl = fuse_file_ioctl,
        .compat_ioctl   = fuse_file_compat_ioctl,
        .poll           = fuse_file_poll,
@@ -3225,10 +3322,10 @@ static const struct file_operations fuse_file_operations = {
 static const struct address_space_operations fuse_file_aops  = {
        .read_folio     = fuse_read_folio,
        .readahead      = fuse_readahead,
-       .writepage      = fuse_writepage,
        .writepages     = fuse_writepages,
        .launder_folio  = fuse_launder_folio,
        .dirty_folio    = filemap_dirty_folio,
+       .migrate_folio  = filemap_migrate_folio,
        .bmap           = fuse_bmap,
        .direct_IO      = fuse_direct_IO,
        .write_begin    = fuse_write_begin,
@@ -3245,7 +3342,9 @@ void fuse_init_file_inode(struct inode *inode, unsigned int flags)
        INIT_LIST_HEAD(&fi->write_files);
        INIT_LIST_HEAD(&fi->queued_writes);
        fi->writectr = 0;
+       fi->iocachectr = 0;
        init_waitqueue_head(&fi->page_waitq);
+       init_waitqueue_head(&fi->direct_io_waitq);
        fi->writepages = RB_ROOT;
 
        if (IS_ENABLED(CONFIG_FUSE_DAX))
index bcbe34488862752154ca2284386baacadf972744..b24084b60864ee57c82864cffda5048dc7f45fb7 100644 (file)
@@ -76,6 +76,16 @@ struct fuse_submount_lookup {
        struct fuse_forget_link *forget;
 };
 
+/** Container for data related to mapping to backing file */
+struct fuse_backing {
+       struct file *file;
+       struct cred *cred;
+
+       /** refcount */
+       refcount_t count;
+       struct rcu_head rcu;
+};
+
 /** FUSE inode */
 struct fuse_inode {
        /** Inode data */
@@ -111,7 +121,7 @@ struct fuse_inode {
        u64 attr_version;
 
        union {
-               /* Write related fields (regular file only) */
+               /* read/write io cache (regular file only) */
                struct {
                        /* Files usable in writepage.  Protected by fi->lock */
                        struct list_head write_files;
@@ -123,9 +133,15 @@ struct fuse_inode {
                         * (FUSE_NOWRITE) means more writes are blocked */
                        int writectr;
 
+                       /** Number of files/maps using page cache */
+                       int iocachectr;
+
                        /* Waitq for writepage completion */
                        wait_queue_head_t page_waitq;
 
+                       /* waitq for direct-io completion */
+                       wait_queue_head_t direct_io_waitq;
+
                        /* List of writepage requestst (pending or sent) */
                        struct rb_root writepages;
                };
@@ -173,6 +189,10 @@ struct fuse_inode {
 #endif
        /** Submount specific lookup tracking */
        struct fuse_submount_lookup *submount_lookup;
+#ifdef CONFIG_FUSE_PASSTHROUGH
+       /** Reference to backing file in passthrough mode */
+       struct fuse_backing *fb;
+#endif
 };
 
 /** FUSE inode state bits */
@@ -187,19 +207,21 @@ enum {
        FUSE_I_BAD,
        /* Has btime */
        FUSE_I_BTIME,
+       /* Wants or already has page cache IO */
+       FUSE_I_CACHE_IO_MODE,
 };
 
 struct fuse_conn;
 struct fuse_mount;
-struct fuse_release_args;
+union fuse_file_args;
 
 /** FUSE specific file data */
 struct fuse_file {
        /** Fuse connection for this file */
        struct fuse_mount *fm;
 
-       /* Argument space reserved for release */
-       struct fuse_release_args *release_args;
+       /* Argument space reserved for open/release */
+       union fuse_file_args *args;
 
        /** Kernel file handle guaranteed to be unique */
        u64 kh;
@@ -221,12 +243,6 @@ struct fuse_file {
 
        /* Readdir related */
        struct {
-               /*
-                * Protects below fields against (crazy) parallel readdir on
-                * same open file.  Uncontended in the normal case.
-                */
-               struct mutex lock;
-
                /* Dir stream position */
                loff_t pos;
 
@@ -244,6 +260,15 @@ struct fuse_file {
        /** Wait queue head for poll */
        wait_queue_head_t poll_wait;
 
+       /** Does file hold a fi->iocachectr refcount? */
+       enum { IOM_NONE, IOM_CACHED, IOM_UNCACHED } iomode;
+
+#ifdef CONFIG_FUSE_PASSTHROUGH
+       /** Reference to backing file in passthrough mode */
+       struct file *passthrough;
+       const struct cred *cred;
+#endif
+
        /** Has flock been performed on this file? */
        bool flock:1;
 };
@@ -283,6 +308,7 @@ struct fuse_args {
        bool page_replace:1;
        bool may_block:1;
        bool is_ext:1;
+       bool is_pinned:1;
        struct fuse_in_arg in_args[3];
        struct fuse_arg out_args[2];
        void (*end)(struct fuse_mount *fm, struct fuse_args *args, int error);
@@ -295,6 +321,19 @@ struct fuse_args_pages {
        unsigned int num_pages;
 };
 
+struct fuse_release_args {
+       struct fuse_args args;
+       struct fuse_release_in inarg;
+       struct inode *inode;
+};
+
+union fuse_file_args {
+       /* Used during open() */
+       struct fuse_open_out open_outarg;
+       /* Used during release() */
+       struct fuse_release_args release_args;
+};
+
 #define FUSE_ARGS(args) struct fuse_args args = {}
 
 /** The request IO state (for asynchronous processing) */
@@ -818,6 +857,12 @@ struct fuse_conn {
        /* Is statx not implemented by fs? */
        unsigned int no_statx:1;
 
+       /** Passthrough support for read/write IO */
+       unsigned int passthrough:1;
+
+       /** Maximum stack depth for passthrough backing files */
+       int max_stack_depth;
+
        /** The number of requests waiting for completion */
        atomic_t num_waiting;
 
@@ -867,6 +912,11 @@ struct fuse_conn {
 
        /* New writepages go into this bucket */
        struct fuse_sync_bucket __rcu *curr_bucket;
+
+#ifdef CONFIG_FUSE_PASSTHROUGH
+       /** IDR for backing files ids */
+       struct idr backing_files_map;
+#endif
 };
 
 /*
@@ -940,7 +990,6 @@ static inline bool fuse_stale_inode(const struct inode *inode, int generation,
 
 static inline void fuse_make_bad(struct inode *inode)
 {
-       remove_inode_hash(inode);
        set_bit(FUSE_I_BAD, &get_fuse_inode(inode)->state);
 }
 
@@ -1032,14 +1081,9 @@ void fuse_read_args_fill(struct fuse_io_args *ia, struct file *file, loff_t pos,
                         size_t count, int opcode);
 
 
-/**
- * Send OPEN or OPENDIR request
- */
-int fuse_open_common(struct inode *inode, struct file *file, bool isdir);
-
-struct fuse_file *fuse_file_alloc(struct fuse_mount *fm);
+struct fuse_file *fuse_file_alloc(struct fuse_mount *fm, bool release);
 void fuse_file_free(struct fuse_file *ff);
-void fuse_finish_open(struct inode *inode, struct file *file);
+int fuse_finish_open(struct inode *inode, struct file *file);
 
 void fuse_sync_release(struct fuse_inode *fi, struct fuse_file *ff,
                       unsigned int flags);
@@ -1349,11 +1393,82 @@ int fuse_fileattr_get(struct dentry *dentry, struct fileattr *fa);
 int fuse_fileattr_set(struct mnt_idmap *idmap,
                      struct dentry *dentry, struct fileattr *fa);
 
-/* file.c */
+/* iomode.c */
+int fuse_file_cached_io_start(struct inode *inode, struct fuse_file *ff);
+int fuse_file_uncached_io_start(struct inode *inode, struct fuse_file *ff, struct fuse_backing *fb);
+void fuse_file_uncached_io_end(struct inode *inode, struct fuse_file *ff);
+
+int fuse_file_io_open(struct file *file, struct inode *inode);
+void fuse_file_io_release(struct fuse_file *ff, struct inode *inode);
 
+/* file.c */
 struct fuse_file *fuse_file_open(struct fuse_mount *fm, u64 nodeid,
                                 unsigned int open_flags, bool isdir);
 void fuse_file_release(struct inode *inode, struct fuse_file *ff,
                       unsigned int open_flags, fl_owner_t id, bool isdir);
 
+/* passthrough.c */
+static inline struct fuse_backing *fuse_inode_backing(struct fuse_inode *fi)
+{
+#ifdef CONFIG_FUSE_PASSTHROUGH
+       return READ_ONCE(fi->fb);
+#else
+       return NULL;
+#endif
+}
+
+static inline struct fuse_backing *fuse_inode_backing_set(struct fuse_inode *fi,
+                                                         struct fuse_backing *fb)
+{
+#ifdef CONFIG_FUSE_PASSTHROUGH
+       return xchg(&fi->fb, fb);
+#else
+       return NULL;
+#endif
+}
+
+#ifdef CONFIG_FUSE_PASSTHROUGH
+struct fuse_backing *fuse_backing_get(struct fuse_backing *fb);
+void fuse_backing_put(struct fuse_backing *fb);
+#else
+
+static inline struct fuse_backing *fuse_backing_get(struct fuse_backing *fb)
+{
+       return NULL;
+}
+
+static inline void fuse_backing_put(struct fuse_backing *fb)
+{
+}
+#endif
+
+void fuse_backing_files_init(struct fuse_conn *fc);
+void fuse_backing_files_free(struct fuse_conn *fc);
+int fuse_backing_open(struct fuse_conn *fc, struct fuse_backing_map *map);
+int fuse_backing_close(struct fuse_conn *fc, int backing_id);
+
+struct fuse_backing *fuse_passthrough_open(struct file *file,
+                                          struct inode *inode,
+                                          int backing_id);
+void fuse_passthrough_release(struct fuse_file *ff, struct fuse_backing *fb);
+
+static inline struct file *fuse_file_passthrough(struct fuse_file *ff)
+{
+#ifdef CONFIG_FUSE_PASSTHROUGH
+       return ff->passthrough;
+#else
+       return NULL;
+#endif
+}
+
+ssize_t fuse_passthrough_read_iter(struct kiocb *iocb, struct iov_iter *iter);
+ssize_t fuse_passthrough_write_iter(struct kiocb *iocb, struct iov_iter *iter);
+ssize_t fuse_passthrough_splice_read(struct file *in, loff_t *ppos,
+                                    struct pipe_inode_info *pipe,
+                                    size_t len, unsigned int flags);
+ssize_t fuse_passthrough_splice_write(struct pipe_inode_info *pipe,
+                                     struct file *out, loff_t *ppos,
+                                     size_t len, unsigned int flags);
+ssize_t fuse_passthrough_mmap(struct file *file, struct vm_area_struct *vma);
+
 #endif /* _FS_FUSE_I_H */
index 516ea2979a90ff2d0eff63a71dc6b8edc4c91b98..3a5d888783353cce48e18243ea386e97c788e12a 100644 (file)
@@ -111,6 +111,9 @@ static struct inode *fuse_alloc_inode(struct super_block *sb)
        if (IS_ENABLED(CONFIG_FUSE_DAX) && !fuse_dax_inode_alloc(sb, fi))
                goto out_free_forget;
 
+       if (IS_ENABLED(CONFIG_FUSE_PASSTHROUGH))
+               fuse_inode_backing_set(fi, NULL);
+
        return &fi->inode;
 
 out_free_forget:
@@ -129,6 +132,9 @@ static void fuse_free_inode(struct inode *inode)
 #ifdef CONFIG_FUSE_DAX
        kfree(fi->dax);
 #endif
+       if (IS_ENABLED(CONFIG_FUSE_PASSTHROUGH))
+               fuse_backing_put(fuse_inode_backing(fi));
+
        kmem_cache_free(fuse_inode_cachep, fi);
 }
 
@@ -469,8 +475,11 @@ retry:
        } else if (fuse_stale_inode(inode, generation, attr)) {
                /* nodeid was reused, any I/O on the old inode should fail */
                fuse_make_bad(inode);
-               iput(inode);
-               goto retry;
+               if (inode != d_inode(sb->s_root)) {
+                       remove_inode_hash(inode);
+                       iput(inode);
+                       goto retry;
+               }
        }
        fi = get_fuse_inode(inode);
        spin_lock(&fi->lock);
@@ -924,6 +933,9 @@ void fuse_conn_init(struct fuse_conn *fc, struct fuse_mount *fm,
        fc->max_pages = FUSE_DEFAULT_MAX_PAGES_PER_REQ;
        fc->max_pages_limit = FUSE_MAX_MAX_PAGES;
 
+       if (IS_ENABLED(CONFIG_FUSE_PASSTHROUGH))
+               fuse_backing_files_init(fc);
+
        INIT_LIST_HEAD(&fc->mounts);
        list_add(&fm->fc_entry, &fc->mounts);
        fm->fc = fc;
@@ -954,6 +966,8 @@ void fuse_conn_put(struct fuse_conn *fc)
                        WARN_ON(atomic_read(&bucket->count) != 1);
                        kfree(bucket);
                }
+               if (IS_ENABLED(CONFIG_FUSE_PASSTHROUGH))
+                       fuse_backing_files_free(fc);
                call_rcu(&fc->rcu, delayed_release);
        }
 }
@@ -974,7 +988,7 @@ static struct inode *fuse_get_root_inode(struct super_block *sb, unsigned mode)
        attr.mode = mode;
        attr.ino = FUSE_ROOT_ID;
        attr.nlink = 1;
-       return fuse_iget(sb, 1, 0, &attr, 0, 0);
+       return fuse_iget(sb, FUSE_ROOT_ID, 0, &attr, 0, 0);
 }
 
 struct fuse_inode_handle {
@@ -1117,6 +1131,11 @@ static struct dentry *fuse_get_parent(struct dentry *child)
        return parent;
 }
 
+/* only for fid encoding; no support for file handle */
+static const struct export_operations fuse_export_fid_operations = {
+       .encode_fh      = fuse_encode_fh,
+};
+
 static const struct export_operations fuse_export_operations = {
        .fh_to_dentry   = fuse_fh_to_dentry,
        .fh_to_parent   = fuse_fh_to_parent,
@@ -1291,6 +1310,26 @@ static void process_init_reply(struct fuse_mount *fm, struct fuse_args *args,
                                fc->create_supp_group = 1;
                        if (flags & FUSE_DIRECT_IO_ALLOW_MMAP)
                                fc->direct_io_allow_mmap = 1;
+                       /*
+                        * max_stack_depth is the max stack depth of FUSE fs,
+                        * so it has to be at least 1 to support passthrough
+                        * to backing files.
+                        *
+                        * with max_stack_depth > 1, the backing files can be
+                        * on a stacked fs (e.g. overlayfs) themselves and with
+                        * max_stack_depth == 1, FUSE fs can be stacked as the
+                        * underlying fs of a stacked fs (e.g. overlayfs).
+                        */
+                       if (IS_ENABLED(CONFIG_FUSE_PASSTHROUGH) &&
+                           (flags & FUSE_PASSTHROUGH) &&
+                           arg->max_stack_depth > 0 &&
+                           arg->max_stack_depth <= FILESYSTEM_MAX_STACK_DEPTH) {
+                               fc->passthrough = 1;
+                               fc->max_stack_depth = arg->max_stack_depth;
+                               fm->sb->s_stack_depth = arg->max_stack_depth;
+                       }
+                       if (flags & FUSE_NO_EXPORT_SUPPORT)
+                               fm->sb->s_export_op = &fuse_export_fid_operations;
                } else {
                        ra_pages = fc->max_read / PAGE_SIZE;
                        fc->no_lock = 1;
@@ -1337,7 +1376,8 @@ void fuse_send_init(struct fuse_mount *fm)
                FUSE_NO_OPENDIR_SUPPORT | FUSE_EXPLICIT_INVAL_DATA |
                FUSE_HANDLE_KILLPRIV_V2 | FUSE_SETXATTR_EXT | FUSE_INIT_EXT |
                FUSE_SECURITY_CTX | FUSE_CREATE_SUPP_GROUP |
-               FUSE_HAS_EXPIRE_ONLY | FUSE_DIRECT_IO_ALLOW_MMAP;
+               FUSE_HAS_EXPIRE_ONLY | FUSE_DIRECT_IO_ALLOW_MMAP |
+               FUSE_NO_EXPORT_SUPPORT | FUSE_HAS_RESEND;
 #ifdef CONFIG_FUSE_DAX
        if (fm->fc->dax)
                flags |= FUSE_MAP_ALIGNMENT;
@@ -1346,6 +1386,8 @@ void fuse_send_init(struct fuse_mount *fm)
 #endif
        if (fm->fc->auto_submounts)
                flags |= FUSE_SUBMOUNTS;
+       if (IS_ENABLED(CONFIG_FUSE_PASSTHROUGH))
+               flags |= FUSE_PASSTHROUGH;
 
        ia->in.flags = flags;
        ia->in.flags2 = flags >> 32;
@@ -1496,8 +1538,8 @@ static void fuse_fill_attr_from_inode(struct fuse_attr *attr,
                .ctimensec      = ctime.tv_nsec,
                .mode           = fi->inode.i_mode,
                .nlink          = fi->inode.i_nlink,
-               .uid            = fi->inode.i_uid.val,
-               .gid            = fi->inode.i_gid.val,
+               .uid            = __kuid_val(fi->inode.i_uid),
+               .gid            = __kgid_val(fi->inode.i_gid),
                .rdev           = fi->inode.i_rdev,
                .blksize        = 1u << fi->inode.i_blkbits,
        };
@@ -1534,6 +1576,7 @@ static int fuse_fill_super_submount(struct super_block *sb,
        sb->s_bdi = bdi_get(parent_sb->s_bdi);
 
        sb->s_xattr = parent_sb->s_xattr;
+       sb->s_export_op = parent_sb->s_export_op;
        sb->s_time_gran = parent_sb->s_time_gran;
        sb->s_blocksize = parent_sb->s_blocksize;
        sb->s_blocksize_bits = parent_sb->s_blocksize_bits;
diff --git a/fs/fuse/iomode.c b/fs/fuse/iomode.c
new file mode 100644 (file)
index 0000000..c653ddc
--- /dev/null
@@ -0,0 +1,254 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * FUSE inode io modes.
+ *
+ * Copyright (c) 2024 CTERA Networks.
+ */
+
+#include "fuse_i.h"
+
+#include <linux/kernel.h>
+#include <linux/sched.h>
+#include <linux/file.h>
+#include <linux/fs.h>
+
+/*
+ * Return true if need to wait for new opens in caching mode.
+ */
+static inline bool fuse_is_io_cache_wait(struct fuse_inode *fi)
+{
+       return READ_ONCE(fi->iocachectr) < 0 && !fuse_inode_backing(fi);
+}
+
+/*
+ * Start cached io mode.
+ *
+ * Blocks new parallel dio writes and waits for the in-progress parallel dio
+ * writes to complete.
+ */
+int fuse_file_cached_io_start(struct inode *inode, struct fuse_file *ff)
+{
+       struct fuse_inode *fi = get_fuse_inode(inode);
+
+       /* There are no io modes if server does not implement open */
+       if (!ff->args)
+               return 0;
+
+       spin_lock(&fi->lock);
+       /*
+        * Setting the bit advises new direct-io writes to use an exclusive
+        * lock - without it the wait below might be forever.
+        */
+       while (fuse_is_io_cache_wait(fi)) {
+               set_bit(FUSE_I_CACHE_IO_MODE, &fi->state);
+               spin_unlock(&fi->lock);
+               wait_event(fi->direct_io_waitq, !fuse_is_io_cache_wait(fi));
+               spin_lock(&fi->lock);
+       }
+
+       /*
+        * Check if inode entered passthrough io mode while waiting for parallel
+        * dio write completion.
+        */
+       if (fuse_inode_backing(fi)) {
+               clear_bit(FUSE_I_CACHE_IO_MODE, &fi->state);
+               spin_unlock(&fi->lock);
+               return -ETXTBSY;
+       }
+
+       WARN_ON(ff->iomode == IOM_UNCACHED);
+       if (ff->iomode == IOM_NONE) {
+               ff->iomode = IOM_CACHED;
+               if (fi->iocachectr == 0)
+                       set_bit(FUSE_I_CACHE_IO_MODE, &fi->state);
+               fi->iocachectr++;
+       }
+       spin_unlock(&fi->lock);
+       return 0;
+}
+
+static void fuse_file_cached_io_end(struct inode *inode, struct fuse_file *ff)
+{
+       struct fuse_inode *fi = get_fuse_inode(inode);
+
+       spin_lock(&fi->lock);
+       WARN_ON(fi->iocachectr <= 0);
+       WARN_ON(ff->iomode != IOM_CACHED);
+       ff->iomode = IOM_NONE;
+       fi->iocachectr--;
+       if (fi->iocachectr == 0)
+               clear_bit(FUSE_I_CACHE_IO_MODE, &fi->state);
+       spin_unlock(&fi->lock);
+}
+
+/* Start strictly uncached io mode where cache access is not allowed */
+int fuse_file_uncached_io_start(struct inode *inode, struct fuse_file *ff, struct fuse_backing *fb)
+{
+       struct fuse_inode *fi = get_fuse_inode(inode);
+       struct fuse_backing *oldfb;
+       int err = 0;
+
+       spin_lock(&fi->lock);
+       /* deny conflicting backing files on same fuse inode */
+       oldfb = fuse_inode_backing(fi);
+       if (oldfb && oldfb != fb) {
+               err = -EBUSY;
+               goto unlock;
+       }
+       if (fi->iocachectr > 0) {
+               err = -ETXTBSY;
+               goto unlock;
+       }
+       WARN_ON(ff->iomode != IOM_NONE);
+       fi->iocachectr--;
+       ff->iomode = IOM_UNCACHED;
+
+       /* fuse inode holds a single refcount of backing file */
+       if (!oldfb) {
+               oldfb = fuse_inode_backing_set(fi, fb);
+               WARN_ON_ONCE(oldfb != NULL);
+       } else {
+               fuse_backing_put(fb);
+       }
+unlock:
+       spin_unlock(&fi->lock);
+       return err;
+}
+
+void fuse_file_uncached_io_end(struct inode *inode, struct fuse_file *ff)
+{
+       struct fuse_inode *fi = get_fuse_inode(inode);
+       struct fuse_backing *oldfb = NULL;
+
+       spin_lock(&fi->lock);
+       WARN_ON(fi->iocachectr >= 0);
+       WARN_ON(ff->iomode != IOM_UNCACHED);
+       ff->iomode = IOM_NONE;
+       fi->iocachectr++;
+       if (!fi->iocachectr) {
+               wake_up(&fi->direct_io_waitq);
+               oldfb = fuse_inode_backing_set(fi, NULL);
+       }
+       spin_unlock(&fi->lock);
+       if (oldfb)
+               fuse_backing_put(oldfb);
+}
+
+/*
+ * Open flags that are allowed in combination with FOPEN_PASSTHROUGH.
+ * A combination of FOPEN_PASSTHROUGH and FOPEN_DIRECT_IO means that read/write
+ * operations go directly to the server, but mmap is done on the backing file.
+ * FOPEN_PASSTHROUGH mode should not co-exist with any users of the fuse inode
+ * page cache, so FOPEN_KEEP_CACHE is a strange and undesired combination.
+ */
+#define FOPEN_PASSTHROUGH_MASK \
+       (FOPEN_PASSTHROUGH | FOPEN_DIRECT_IO | FOPEN_PARALLEL_DIRECT_WRITES | \
+        FOPEN_NOFLUSH)
+
+static int fuse_file_passthrough_open(struct inode *inode, struct file *file)
+{
+       struct fuse_file *ff = file->private_data;
+       struct fuse_conn *fc = get_fuse_conn(inode);
+       struct fuse_backing *fb;
+       int err;
+
+       /* Check allowed conditions for file open in passthrough mode */
+       if (!IS_ENABLED(CONFIG_FUSE_PASSTHROUGH) || !fc->passthrough ||
+           (ff->open_flags & ~FOPEN_PASSTHROUGH_MASK))
+               return -EINVAL;
+
+       fb = fuse_passthrough_open(file, inode,
+                                  ff->args->open_outarg.backing_id);
+       if (IS_ERR(fb))
+               return PTR_ERR(fb);
+
+       /* First passthrough file open denies caching inode io mode */
+       err = fuse_file_uncached_io_start(inode, ff, fb);
+       if (!err)
+               return 0;
+
+       fuse_passthrough_release(ff, fb);
+       fuse_backing_put(fb);
+
+       return err;
+}
+
+/* Request access to submit new io to inode via open file */
+int fuse_file_io_open(struct file *file, struct inode *inode)
+{
+       struct fuse_file *ff = file->private_data;
+       struct fuse_inode *fi = get_fuse_inode(inode);
+       int err;
+
+       /*
+        * io modes are not relevant with DAX and with server that does not
+        * implement open.
+        */
+       if (FUSE_IS_DAX(inode) || !ff->args)
+               return 0;
+
+       /*
+        * Server is expected to use FOPEN_PASSTHROUGH for all opens of an inode
+        * which is already open for passthrough.
+        */
+       err = -EINVAL;
+       if (fuse_inode_backing(fi) && !(ff->open_flags & FOPEN_PASSTHROUGH))
+               goto fail;
+
+       /*
+        * FOPEN_PARALLEL_DIRECT_WRITES requires FOPEN_DIRECT_IO.
+        */
+       if (!(ff->open_flags & FOPEN_DIRECT_IO))
+               ff->open_flags &= ~FOPEN_PARALLEL_DIRECT_WRITES;
+
+       /*
+        * First passthrough file open denies caching inode io mode.
+        * First caching file open enters caching inode io mode.
+        *
+        * Note that if user opens a file open with O_DIRECT, but server did
+        * not specify FOPEN_DIRECT_IO, a later fcntl() could remove O_DIRECT,
+        * so we put the inode in caching mode to prevent parallel dio.
+        */
+       if ((ff->open_flags & FOPEN_DIRECT_IO) &&
+           !(ff->open_flags & FOPEN_PASSTHROUGH))
+               return 0;
+
+       if (ff->open_flags & FOPEN_PASSTHROUGH)
+               err = fuse_file_passthrough_open(inode, file);
+       else
+               err = fuse_file_cached_io_start(inode, ff);
+       if (err)
+               goto fail;
+
+       return 0;
+
+fail:
+       pr_debug("failed to open file in requested io mode (open_flags=0x%x, err=%i).\n",
+                ff->open_flags, err);
+       /*
+        * The file open mode determines the inode io mode.
+        * Using incorrect open mode is a server mistake, which results in
+        * user visible failure of open() with EIO error.
+        */
+       return -EIO;
+}
+
+/* No more pending io and no new io possible to inode via open/mmapped file */
+void fuse_file_io_release(struct fuse_file *ff, struct inode *inode)
+{
+       /*
+        * Last parallel dio close allows caching inode io mode.
+        * Last caching file close exits caching inode io mode.
+        */
+       switch (ff->iomode) {
+       case IOM_NONE:
+               /* Nothing to do */
+               break;
+       case IOM_UNCACHED:
+               fuse_file_uncached_io_end(inode, ff);
+               break;
+       case IOM_CACHED:
+               fuse_file_cached_io_end(inode, ff);
+               break;
+       }
+}
diff --git a/fs/fuse/passthrough.c b/fs/fuse/passthrough.c
new file mode 100644 (file)
index 0000000..1567f03
--- /dev/null
@@ -0,0 +1,355 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * FUSE passthrough to backing file.
+ *
+ * Copyright (c) 2023 CTERA Networks.
+ */
+
+#include "fuse_i.h"
+
+#include <linux/file.h>
+#include <linux/backing-file.h>
+#include <linux/splice.h>
+
+static void fuse_file_accessed(struct file *file)
+{
+       struct inode *inode = file_inode(file);
+
+       fuse_invalidate_atime(inode);
+}
+
+static void fuse_file_modified(struct file *file)
+{
+       struct inode *inode = file_inode(file);
+
+       fuse_invalidate_attr_mask(inode, FUSE_STATX_MODSIZE);
+}
+
+ssize_t fuse_passthrough_read_iter(struct kiocb *iocb, struct iov_iter *iter)
+{
+       struct file *file = iocb->ki_filp;
+       struct fuse_file *ff = file->private_data;
+       struct file *backing_file = fuse_file_passthrough(ff);
+       size_t count = iov_iter_count(iter);
+       ssize_t ret;
+       struct backing_file_ctx ctx = {
+               .cred = ff->cred,
+               .user_file = file,
+               .accessed = fuse_file_accessed,
+       };
+
+
+       pr_debug("%s: backing_file=0x%p, pos=%lld, len=%zu\n", __func__,
+                backing_file, iocb->ki_pos, count);
+
+       if (!count)
+               return 0;
+
+       ret = backing_file_read_iter(backing_file, iter, iocb, iocb->ki_flags,
+                                    &ctx);
+
+       return ret;
+}
+
+ssize_t fuse_passthrough_write_iter(struct kiocb *iocb,
+                                   struct iov_iter *iter)
+{
+       struct file *file = iocb->ki_filp;
+       struct inode *inode = file_inode(file);
+       struct fuse_file *ff = file->private_data;
+       struct file *backing_file = fuse_file_passthrough(ff);
+       size_t count = iov_iter_count(iter);
+       ssize_t ret;
+       struct backing_file_ctx ctx = {
+               .cred = ff->cred,
+               .user_file = file,
+               .end_write = fuse_file_modified,
+       };
+
+       pr_debug("%s: backing_file=0x%p, pos=%lld, len=%zu\n", __func__,
+                backing_file, iocb->ki_pos, count);
+
+       if (!count)
+               return 0;
+
+       inode_lock(inode);
+       ret = backing_file_write_iter(backing_file, iter, iocb, iocb->ki_flags,
+                                     &ctx);
+       inode_unlock(inode);
+
+       return ret;
+}
+
+ssize_t fuse_passthrough_splice_read(struct file *in, loff_t *ppos,
+                                    struct pipe_inode_info *pipe,
+                                    size_t len, unsigned int flags)
+{
+       struct fuse_file *ff = in->private_data;
+       struct file *backing_file = fuse_file_passthrough(ff);
+       struct backing_file_ctx ctx = {
+               .cred = ff->cred,
+               .user_file = in,
+               .accessed = fuse_file_accessed,
+       };
+
+       pr_debug("%s: backing_file=0x%p, pos=%lld, len=%zu, flags=0x%x\n", __func__,
+                backing_file, ppos ? *ppos : 0, len, flags);
+
+       return backing_file_splice_read(backing_file, ppos, pipe, len, flags,
+                                       &ctx);
+}
+
+ssize_t fuse_passthrough_splice_write(struct pipe_inode_info *pipe,
+                                     struct file *out, loff_t *ppos,
+                                     size_t len, unsigned int flags)
+{
+       struct fuse_file *ff = out->private_data;
+       struct file *backing_file = fuse_file_passthrough(ff);
+       struct inode *inode = file_inode(out);
+       ssize_t ret;
+       struct backing_file_ctx ctx = {
+               .cred = ff->cred,
+               .user_file = out,
+               .end_write = fuse_file_modified,
+       };
+
+       pr_debug("%s: backing_file=0x%p, pos=%lld, len=%zu, flags=0x%x\n", __func__,
+                backing_file, ppos ? *ppos : 0, len, flags);
+
+       inode_lock(inode);
+       ret = backing_file_splice_write(pipe, backing_file, ppos, len, flags,
+                                       &ctx);
+       inode_unlock(inode);
+
+       return ret;
+}
+
+ssize_t fuse_passthrough_mmap(struct file *file, struct vm_area_struct *vma)
+{
+       struct fuse_file *ff = file->private_data;
+       struct file *backing_file = fuse_file_passthrough(ff);
+       struct backing_file_ctx ctx = {
+               .cred = ff->cred,
+               .user_file = file,
+               .accessed = fuse_file_accessed,
+       };
+
+       pr_debug("%s: backing_file=0x%p, start=%lu, end=%lu\n", __func__,
+                backing_file, vma->vm_start, vma->vm_end);
+
+       return backing_file_mmap(backing_file, vma, &ctx);
+}
+
+struct fuse_backing *fuse_backing_get(struct fuse_backing *fb)
+{
+       if (fb && refcount_inc_not_zero(&fb->count))
+               return fb;
+       return NULL;
+}
+
+static void fuse_backing_free(struct fuse_backing *fb)
+{
+       pr_debug("%s: fb=0x%p\n", __func__, fb);
+
+       if (fb->file)
+               fput(fb->file);
+       put_cred(fb->cred);
+       kfree_rcu(fb, rcu);
+}
+
+void fuse_backing_put(struct fuse_backing *fb)
+{
+       if (fb && refcount_dec_and_test(&fb->count))
+               fuse_backing_free(fb);
+}
+
+void fuse_backing_files_init(struct fuse_conn *fc)
+{
+       idr_init(&fc->backing_files_map);
+}
+
+static int fuse_backing_id_alloc(struct fuse_conn *fc, struct fuse_backing *fb)
+{
+       int id;
+
+       idr_preload(GFP_KERNEL);
+       spin_lock(&fc->lock);
+       /* FIXME: xarray might be space inefficient */
+       id = idr_alloc_cyclic(&fc->backing_files_map, fb, 1, 0, GFP_ATOMIC);
+       spin_unlock(&fc->lock);
+       idr_preload_end();
+
+       WARN_ON_ONCE(id == 0);
+       return id;
+}
+
+static struct fuse_backing *fuse_backing_id_remove(struct fuse_conn *fc,
+                                                  int id)
+{
+       struct fuse_backing *fb;
+
+       spin_lock(&fc->lock);
+       fb = idr_remove(&fc->backing_files_map, id);
+       spin_unlock(&fc->lock);
+
+       return fb;
+}
+
+static int fuse_backing_id_free(int id, void *p, void *data)
+{
+       struct fuse_backing *fb = p;
+
+       WARN_ON_ONCE(refcount_read(&fb->count) != 1);
+       fuse_backing_free(fb);
+       return 0;
+}
+
+void fuse_backing_files_free(struct fuse_conn *fc)
+{
+       idr_for_each(&fc->backing_files_map, fuse_backing_id_free, NULL);
+       idr_destroy(&fc->backing_files_map);
+}
+
+int fuse_backing_open(struct fuse_conn *fc, struct fuse_backing_map *map)
+{
+       struct file *file;
+       struct super_block *backing_sb;
+       struct fuse_backing *fb = NULL;
+       int res;
+
+       pr_debug("%s: fd=%d flags=0x%x\n", __func__, map->fd, map->flags);
+
+       /* TODO: relax CAP_SYS_ADMIN once backing files are visible to lsof */
+       res = -EPERM;
+       if (!fc->passthrough || !capable(CAP_SYS_ADMIN))
+               goto out;
+
+       res = -EINVAL;
+       if (map->flags)
+               goto out;
+
+       file = fget(map->fd);
+       res = -EBADF;
+       if (!file)
+               goto out;
+
+       res = -EOPNOTSUPP;
+       if (!file->f_op->read_iter || !file->f_op->write_iter)
+               goto out_fput;
+
+       backing_sb = file_inode(file)->i_sb;
+       res = -ELOOP;
+       if (backing_sb->s_stack_depth >= fc->max_stack_depth)
+               goto out_fput;
+
+       fb = kmalloc(sizeof(struct fuse_backing), GFP_KERNEL);
+       res = -ENOMEM;
+       if (!fb)
+               goto out_fput;
+
+       fb->file = file;
+       fb->cred = prepare_creds();
+       refcount_set(&fb->count, 1);
+
+       res = fuse_backing_id_alloc(fc, fb);
+       if (res < 0) {
+               fuse_backing_free(fb);
+               fb = NULL;
+       }
+
+out:
+       pr_debug("%s: fb=0x%p, ret=%i\n", __func__, fb, res);
+
+       return res;
+
+out_fput:
+       fput(file);
+       goto out;
+}
+
+int fuse_backing_close(struct fuse_conn *fc, int backing_id)
+{
+       struct fuse_backing *fb = NULL;
+       int err;
+
+       pr_debug("%s: backing_id=%d\n", __func__, backing_id);
+
+       /* TODO: relax CAP_SYS_ADMIN once backing files are visible to lsof */
+       err = -EPERM;
+       if (!fc->passthrough || !capable(CAP_SYS_ADMIN))
+               goto out;
+
+       err = -EINVAL;
+       if (backing_id <= 0)
+               goto out;
+
+       err = -ENOENT;
+       fb = fuse_backing_id_remove(fc, backing_id);
+       if (!fb)
+               goto out;
+
+       fuse_backing_put(fb);
+       err = 0;
+out:
+       pr_debug("%s: fb=0x%p, err=%i\n", __func__, fb, err);
+
+       return err;
+}
+
+/*
+ * Setup passthrough to a backing file.
+ *
+ * Returns an fb object with elevated refcount to be stored in fuse inode.
+ */
+struct fuse_backing *fuse_passthrough_open(struct file *file,
+                                          struct inode *inode,
+                                          int backing_id)
+{
+       struct fuse_file *ff = file->private_data;
+       struct fuse_conn *fc = ff->fm->fc;
+       struct fuse_backing *fb = NULL;
+       struct file *backing_file;
+       int err;
+
+       err = -EINVAL;
+       if (backing_id <= 0)
+               goto out;
+
+       rcu_read_lock();
+       fb = idr_find(&fc->backing_files_map, backing_id);
+       fb = fuse_backing_get(fb);
+       rcu_read_unlock();
+
+       err = -ENOENT;
+       if (!fb)
+               goto out;
+
+       /* Allocate backing file per fuse file to store fuse path */
+       backing_file = backing_file_open(&file->f_path, file->f_flags,
+                                        &fb->file->f_path, fb->cred);
+       err = PTR_ERR(backing_file);
+       if (IS_ERR(backing_file)) {
+               fuse_backing_put(fb);
+               goto out;
+       }
+
+       err = 0;
+       ff->passthrough = backing_file;
+       ff->cred = get_cred(fb->cred);
+out:
+       pr_debug("%s: backing_id=%d, fb=0x%p, backing_file=0x%p, err=%i\n", __func__,
+                backing_id, fb, ff->passthrough, err);
+
+       return err ? ERR_PTR(err) : fb;
+}
+
+void fuse_passthrough_release(struct fuse_file *ff, struct fuse_backing *fb)
+{
+       pr_debug("%s: fb=0x%p, backing_file=0x%p\n", __func__,
+                fb, ff->passthrough);
+
+       fput(ff->passthrough);
+       ff->passthrough = NULL;
+       put_cred(ff->cred);
+       ff->cred = NULL;
+}
index c66a54d6c7d3042c5304614758e5e15dc946690e..0377b6dc24c80db8eb90750cf97855282e1ea7f1 100644 (file)
@@ -592,15 +592,11 @@ int fuse_readdir(struct file *file, struct dir_context *ctx)
        if (fuse_is_bad(inode))
                return -EIO;
 
-       mutex_lock(&ff->readdir.lock);
-
        err = UNCACHED;
        if (ff->open_flags & FOPEN_CACHE_DIR)
                err = fuse_readdir_cached(file, ctx);
        if (err == UNCACHED)
                err = fuse_readdir_uncached(file, ctx);
 
-       mutex_unlock(&ff->readdir.lock);
-
        return err;
 }
index 5f1be1da92ce942cc1c8828bed888296a87c4d4e..322af827a2329f9af0ee445116f1ef84114b2b4c 100644 (file)
@@ -16,6 +16,7 @@
 #include <linux/fs_context.h>
 #include <linux/fs_parser.h>
 #include <linux/highmem.h>
+#include <linux/cleanup.h>
 #include <linux/uio.h>
 #include "fuse_i.h"
 
@@ -31,6 +32,9 @@
 static DEFINE_MUTEX(virtio_fs_mutex);
 static LIST_HEAD(virtio_fs_instances);
 
+/* The /sys/fs/virtio_fs/ kset */
+static struct kset *virtio_fs_kset;
+
 enum {
        VQ_HIPRIO,
        VQ_REQUEST
@@ -55,7 +59,7 @@ struct virtio_fs_vq {
 
 /* A virtio-fs device instance */
 struct virtio_fs {
-       struct kref refcount;
+       struct kobject kobj;
        struct list_head list;    /* on virtio_fs_instances */
        char *tag;
        struct virtio_fs_vq *vqs;
@@ -161,18 +165,40 @@ static inline void dec_in_flight_req(struct virtio_fs_vq *fsvq)
                complete(&fsvq->in_flight_zero);
 }
 
-static void release_virtio_fs_obj(struct kref *ref)
+static ssize_t tag_show(struct kobject *kobj,
+               struct kobj_attribute *attr, char *buf)
+{
+       struct virtio_fs *fs = container_of(kobj, struct virtio_fs, kobj);
+
+       return sysfs_emit(buf, fs->tag);
+}
+
+static struct kobj_attribute virtio_fs_tag_attr = __ATTR_RO(tag);
+
+static struct attribute *virtio_fs_attrs[] = {
+       &virtio_fs_tag_attr.attr,
+       NULL
+};
+ATTRIBUTE_GROUPS(virtio_fs);
+
+static void virtio_fs_ktype_release(struct kobject *kobj)
 {
-       struct virtio_fs *vfs = container_of(ref, struct virtio_fs, refcount);
+       struct virtio_fs *vfs = container_of(kobj, struct virtio_fs, kobj);
 
        kfree(vfs->vqs);
        kfree(vfs);
 }
 
+static const struct kobj_type virtio_fs_ktype = {
+       .release = virtio_fs_ktype_release,
+       .sysfs_ops = &kobj_sysfs_ops,
+       .default_groups = virtio_fs_groups,
+};
+
 /* Make sure virtiofs_mutex is held */
 static void virtio_fs_put(struct virtio_fs *fs)
 {
-       kref_put(&fs->refcount, release_virtio_fs_obj);
+       kobject_put(&fs->kobj);
 }
 
 static void virtio_fs_fiq_release(struct fuse_iqueue *fiq)
@@ -243,25 +269,46 @@ static void virtio_fs_start_all_queues(struct virtio_fs *fs)
 }
 
 /* Add a new instance to the list or return -EEXIST if tag name exists*/
-static int virtio_fs_add_instance(struct virtio_fs *fs)
+static int virtio_fs_add_instance(struct virtio_device *vdev,
+                                 struct virtio_fs *fs)
 {
        struct virtio_fs *fs2;
-       bool duplicate = false;
+       int ret;
 
        mutex_lock(&virtio_fs_mutex);
 
        list_for_each_entry(fs2, &virtio_fs_instances, list) {
-               if (strcmp(fs->tag, fs2->tag) == 0)
-                       duplicate = true;
+               if (strcmp(fs->tag, fs2->tag) == 0) {
+                       mutex_unlock(&virtio_fs_mutex);
+                       return -EEXIST;
+               }
        }
 
-       if (!duplicate)
-               list_add_tail(&fs->list, &virtio_fs_instances);
+       /* Use the virtio_device's index as a unique identifier, there is no
+        * need to allocate our own identifiers because the virtio_fs instance
+        * is only visible to userspace as long as the underlying virtio_device
+        * exists.
+        */
+       fs->kobj.kset = virtio_fs_kset;
+       ret = kobject_add(&fs->kobj, NULL, "%d", vdev->index);
+       if (ret < 0) {
+               mutex_unlock(&virtio_fs_mutex);
+               return ret;
+       }
+
+       ret = sysfs_create_link(&fs->kobj, &vdev->dev.kobj, "device");
+       if (ret < 0) {
+               kobject_del(&fs->kobj);
+               mutex_unlock(&virtio_fs_mutex);
+               return ret;
+       }
+
+       list_add_tail(&fs->list, &virtio_fs_instances);
 
        mutex_unlock(&virtio_fs_mutex);
 
-       if (duplicate)
-               return -EEXIST;
+       kobject_uevent(&fs->kobj, KOBJ_ADD);
+
        return 0;
 }
 
@@ -274,7 +321,7 @@ static struct virtio_fs *virtio_fs_find_instance(const char *tag)
 
        list_for_each_entry(fs, &virtio_fs_instances, list) {
                if (strcmp(fs->tag, tag) == 0) {
-                       kref_get(&fs->refcount);
+                       kobject_get(&fs->kobj);
                        goto found;
                }
        }
@@ -323,6 +370,16 @@ static int virtio_fs_read_tag(struct virtio_device *vdev, struct virtio_fs *fs)
                return -ENOMEM;
        memcpy(fs->tag, tag_buf, len);
        fs->tag[len] = '\0';
+
+       /* While the VIRTIO specification allows any character, newlines are
+        * awkward on mount(8) command-lines and cause problems in the sysfs
+        * "tag" attr and uevent TAG= properties. Forbid them.
+        */
+       if (strchr(fs->tag, '\n')) {
+               dev_dbg(&vdev->dev, "refusing virtiofs tag with newline character\n");
+               return -EINVAL;
+       }
+
        return 0;
 }
 
@@ -345,7 +402,7 @@ static void virtio_fs_hiprio_done_work(struct work_struct *work)
                        kfree(req);
                        dec_in_flight_req(fsvq);
                }
-       } while (!virtqueue_enable_cb(vq) && likely(!virtqueue_is_broken(vq)));
+       } while (!virtqueue_enable_cb(vq));
        spin_unlock(&fsvq->lock);
 }
 
@@ -627,7 +684,7 @@ static void virtio_fs_requests_done_work(struct work_struct *work)
                        list_move_tail(&req->list, &reqs);
                        spin_unlock(&fpq->lock);
                }
-       } while (!virtqueue_enable_cb(vq) && likely(!virtqueue_is_broken(vq)));
+       } while (!virtqueue_enable_cb(vq));
        spin_unlock(&fsvq->lock);
 
        /* End requests */
@@ -795,8 +852,11 @@ static void virtio_fs_cleanup_dax(void *data)
        put_dax(dax_dev);
 }
 
+DEFINE_FREE(cleanup_dax, struct dax_dev *, if (!IS_ERR_OR_NULL(_T)) virtio_fs_cleanup_dax(_T))
+
 static int virtio_fs_setup_dax(struct virtio_device *vdev, struct virtio_fs *fs)
 {
+       struct dax_device *dax_dev __free(cleanup_dax) = NULL;
        struct virtio_shm_region cache_reg;
        struct dev_pagemap *pgmap;
        bool have_cache;
@@ -804,6 +864,12 @@ static int virtio_fs_setup_dax(struct virtio_device *vdev, struct virtio_fs *fs)
        if (!IS_ENABLED(CONFIG_FUSE_DAX))
                return 0;
 
+       dax_dev = alloc_dax(fs, &virtio_fs_dax_ops);
+       if (IS_ERR(dax_dev)) {
+               int rc = PTR_ERR(dax_dev);
+               return rc == -EOPNOTSUPP ? 0 : rc;
+       }
+
        /* Get cache region */
        have_cache = virtio_get_shm_region(vdev, &cache_reg,
                                           (u8)VIRTIO_FS_SHMCAP_ID_CACHE);
@@ -849,10 +915,7 @@ static int virtio_fs_setup_dax(struct virtio_device *vdev, struct virtio_fs *fs)
        dev_dbg(&vdev->dev, "%s: window kaddr 0x%px phys_addr 0x%llx len 0x%llx\n",
                __func__, fs->window_kaddr, cache_reg.addr, cache_reg.len);
 
-       fs->dax_dev = alloc_dax(fs, &virtio_fs_dax_ops);
-       if (IS_ERR(fs->dax_dev))
-               return PTR_ERR(fs->dax_dev);
-
+       fs->dax_dev = no_free_ptr(dax_dev);
        return devm_add_action_or_reset(&vdev->dev, virtio_fs_cleanup_dax,
                                        fs->dax_dev);
 }
@@ -865,7 +928,7 @@ static int virtio_fs_probe(struct virtio_device *vdev)
        fs = kzalloc(sizeof(*fs), GFP_KERNEL);
        if (!fs)
                return -ENOMEM;
-       kref_init(&fs->refcount);
+       kobject_init(&fs->kobj, &virtio_fs_ktype);
        vdev->priv = fs;
 
        ret = virtio_fs_read_tag(vdev, fs);
@@ -887,7 +950,7 @@ static int virtio_fs_probe(struct virtio_device *vdev)
         */
        virtio_device_ready(vdev);
 
-       ret = virtio_fs_add_instance(fs);
+       ret = virtio_fs_add_instance(vdev, fs);
        if (ret < 0)
                goto out_vqs;
 
@@ -896,11 +959,10 @@ static int virtio_fs_probe(struct virtio_device *vdev)
 out_vqs:
        virtio_reset_device(vdev);
        virtio_fs_cleanup_vqs(vdev);
-       kfree(fs->vqs);
 
 out:
        vdev->priv = NULL;
-       kfree(fs);
+       kobject_put(&fs->kobj);
        return ret;
 }
 
@@ -924,6 +986,8 @@ static void virtio_fs_remove(struct virtio_device *vdev)
        mutex_lock(&virtio_fs_mutex);
        /* This device is going away. No one should get new reference */
        list_del_init(&fs->list);
+       sysfs_remove_link(&fs->kobj, "device");
+       kobject_del(&fs->kobj);
        virtio_fs_stop_all_queues(fs);
        virtio_fs_drain_all_queues_locked(fs);
        virtio_reset_device(vdev);
@@ -1510,21 +1574,56 @@ static struct file_system_type virtio_fs_type = {
        .kill_sb        = virtio_kill_sb,
 };
 
+static int virtio_fs_uevent(const struct kobject *kobj, struct kobj_uevent_env *env)
+{
+       const struct virtio_fs *fs = container_of(kobj, struct virtio_fs, kobj);
+
+       add_uevent_var(env, "TAG=%s", fs->tag);
+       return 0;
+}
+
+static const struct kset_uevent_ops virtio_fs_uevent_ops = {
+       .uevent = virtio_fs_uevent,
+};
+
+static int __init virtio_fs_sysfs_init(void)
+{
+       virtio_fs_kset = kset_create_and_add("virtiofs", &virtio_fs_uevent_ops,
+                                            fs_kobj);
+       if (!virtio_fs_kset)
+               return -ENOMEM;
+       return 0;
+}
+
+static void virtio_fs_sysfs_exit(void)
+{
+       kset_unregister(virtio_fs_kset);
+       virtio_fs_kset = NULL;
+}
+
 static int __init virtio_fs_init(void)
 {
        int ret;
 
-       ret = register_virtio_driver(&virtio_fs_driver);
+       ret = virtio_fs_sysfs_init();
        if (ret < 0)
                return ret;
 
+       ret = register_virtio_driver(&virtio_fs_driver);
+       if (ret < 0)
+               goto sysfs_exit;
+
        ret = register_filesystem(&virtio_fs_type);
-       if (ret < 0) {
-               unregister_virtio_driver(&virtio_fs_driver);
-               return ret;
-       }
+       if (ret < 0)
+               goto unregister_virtio_driver;
 
        return 0;
+
+unregister_virtio_driver:
+       unregister_virtio_driver(&virtio_fs_driver);
+sysfs_exit:
+       virtio_fs_sysfs_exit();
+       return ret;
 }
 module_init(virtio_fs_init);
 
@@ -1532,6 +1631,7 @@ static void __exit virtio_fs_exit(void)
 {
        unregister_filesystem(&virtio_fs_type);
        unregister_virtio_driver(&virtio_fs_driver);
+       virtio_fs_sysfs_exit();
 }
 module_exit(virtio_fs_exit);
 
index d290f007b3d13132319ea4eec2774ab9622d7ff9..3a41f83a4ba5598536d57b8a5230f34da66621aa 100644 (file)
@@ -2033,7 +2033,7 @@ static int __remove_privs(struct mnt_idmap *idmap,
        return notify_change(idmap, dentry, &newattrs, NULL);
 }
 
-static int __file_remove_privs(struct file *file, unsigned int flags)
+int file_remove_privs_flags(struct file *file, unsigned int flags)
 {
        struct dentry *dentry = file_dentry(file);
        struct inode *inode = file_inode(file);
@@ -2058,6 +2058,7 @@ static int __file_remove_privs(struct file *file, unsigned int flags)
                inode_has_no_xattr(inode);
        return error;
 }
+EXPORT_SYMBOL_GPL(file_remove_privs_flags);
 
 /**
  * file_remove_privs - remove special file privileges (suid, capabilities)
@@ -2070,7 +2071,7 @@ static int __file_remove_privs(struct file *file, unsigned int flags)
  */
 int file_remove_privs(struct file *file)
 {
-       return __file_remove_privs(file, 0);
+       return file_remove_privs_flags(file, 0);
 }
 EXPORT_SYMBOL(file_remove_privs);
 
@@ -2163,7 +2164,7 @@ static int file_modified_flags(struct file *file, int flags)
         * Clear the security bits if the process is not being run by root.
         * This keeps people from modifying setuid and setgid binaries.
         */
-       ret = __file_remove_privs(file, flags);
+       ret = file_remove_privs_flags(file, flags);
        if (ret)
                return ret;
 
index ad572f7ee897b9d26d2439a6a1178332d2a2e547..43a651ed826413d8ea60dcb240950e2a074aca28 100644 (file)
@@ -83,8 +83,10 @@ static int fscache_begin_operation(struct netfs_cache_resources *cres,
        cres->debug_id          = cookie->debug_id;
        cres->inval_counter     = cookie->inval_counter;
 
-       if (!fscache_begin_cookie_access(cookie, why))
+       if (!fscache_begin_cookie_access(cookie, why)) {
+               cres->cache_priv = NULL;
                return -ENOBUFS;
+       }
 
 again:
        spin_lock(&cookie->lock);
index fbdc9ca80f714bdf3d3cad54e63d7c858612e5f1..de77848ae6545566a5c38501516f3769e142f4cd 100644 (file)
@@ -73,14 +73,9 @@ const struct rpc_program nfs_program = {
        .number                 = NFS_PROGRAM,
        .nrvers                 = ARRAY_SIZE(nfs_version),
        .version                = nfs_version,
-       .stats                  = &nfs_rpcstat,
        .pipe_dir_name          = NFS_PIPE_DIRNAME,
 };
 
-struct rpc_stat nfs_rpcstat = {
-       .program                = &nfs_program
-};
-
 static struct nfs_subversion *find_nfs_version(unsigned int version)
 {
        struct nfs_subversion *nfs;
@@ -502,6 +497,7 @@ int nfs_create_rpc_client(struct nfs_client *clp,
                          const struct nfs_client_initdata *cl_init,
                          rpc_authflavor_t flavor)
 {
+       struct nfs_net          *nn = net_generic(clp->cl_net, nfs_net_id);
        struct rpc_clnt         *clnt = NULL;
        struct rpc_create_args args = {
                .net            = clp->cl_net,
@@ -513,6 +509,7 @@ int nfs_create_rpc_client(struct nfs_client *clp,
                .servername     = clp->cl_hostname,
                .nodename       = cl_init->nodename,
                .program        = &nfs_program,
+               .stats          = &nn->rpcstats,
                .version        = clp->rpc_ops->version,
                .authflavor     = flavor,
                .cred           = cl_init->cred,
@@ -1182,6 +1179,8 @@ void nfs_clients_init(struct net *net)
 #endif
        spin_lock_init(&nn->nfs_client_lock);
        nn->boot_time = ktime_get_real();
+       memset(&nn->rpcstats, 0, sizeof(nn->rpcstats));
+       nn->rpcstats.program = &nfs_program;
 
        nfs_netns_sysfs_setup(nn, net);
 }
index d4a42ce0c7e3dfeef19884be272a8ddd449c143f..6bace5fece04e29fa37839a705b0734a0e0d75ca 100644 (file)
@@ -181,7 +181,6 @@ static int nfs_delegation_claim_opens(struct inode *inode,
        struct nfs_open_context *ctx;
        struct nfs4_state_owner *sp;
        struct nfs4_state *state;
-       unsigned int seq;
        int err;
 
 again:
@@ -202,12 +201,9 @@ again:
                sp = state->owner;
                /* Block nfs4_proc_unlck */
                mutex_lock(&sp->so_delegreturn_mutex);
-               seq = raw_seqcount_begin(&sp->so_reclaim_seqcount);
                err = nfs4_open_delegation_recall(ctx, state, stateid);
                if (!err)
                        err = nfs_delegation_claim_locks(state, stateid);
-               if (!err && read_seqcount_retry(&sp->so_reclaim_seqcount, seq))
-                       err = -EAGAIN;
                mutex_unlock(&sp->so_delegreturn_mutex);
                put_nfs_open_context(ctx);
                if (err != 0)
index 7af5d270de2876270aaf83aecac7d2ee19e17e76..bb2f583eb28bf1de5d96eb1b3ee5d154d718b9a0 100644 (file)
@@ -606,6 +606,7 @@ static void nfs_direct_commit_complete(struct nfs_commit_data *data)
 
        trace_nfs_direct_commit_complete(dreq);
 
+       spin_lock(&dreq->lock);
        if (status < 0) {
                /* Errors in commit are fatal */
                dreq->error = status;
@@ -613,6 +614,7 @@ static void nfs_direct_commit_complete(struct nfs_commit_data *data)
        } else {
                status = dreq->error;
        }
+       spin_unlock(&dreq->lock);
 
        nfs_init_cinfo_from_dreq(&cinfo, dreq);
 
@@ -625,7 +627,10 @@ static void nfs_direct_commit_complete(struct nfs_commit_data *data)
                        spin_unlock(&dreq->lock);
                        nfs_release_request(req);
                } else if (!nfs_write_match_verf(verf, req)) {
-                       dreq->flags = NFS_ODIRECT_RESCHED_WRITES;
+                       spin_lock(&dreq->lock);
+                       if (dreq->flags == 0)
+                               dreq->flags = NFS_ODIRECT_RESCHED_WRITES;
+                       spin_unlock(&dreq->lock);
                        /*
                         * Despite the reboot, the write was successful,
                         * so reset wb_nio.
@@ -667,10 +672,17 @@ static void nfs_direct_commit_schedule(struct nfs_direct_req *dreq)
        LIST_HEAD(mds_list);
 
        nfs_init_cinfo_from_dreq(&cinfo, dreq);
+       nfs_commit_begin(cinfo.mds);
        nfs_scan_commit(dreq->inode, &mds_list, &cinfo);
        res = nfs_generic_commit_list(dreq->inode, &mds_list, 0, &cinfo);
-       if (res < 0) /* res == -ENOMEM */
-               nfs_direct_write_reschedule(dreq);
+       if (res < 0) { /* res == -ENOMEM */
+               spin_lock(&dreq->lock);
+               if (dreq->flags == 0)
+                       dreq->flags = NFS_ODIRECT_RESCHED_WRITES;
+               spin_unlock(&dreq->lock);
+       }
+       if (nfs_commit_end(cinfo.mds))
+               nfs_direct_write_complete(dreq);
 }
 
 static void nfs_direct_write_clear_reqs(struct nfs_direct_req *dreq)
index acf4b88889dc3814ebfe06d3883c96f429415f2a..4fa304fa5bc4b2346458877c39a558936a49317a 100644 (file)
@@ -35,6 +35,7 @@
 #include "../internal.h"
 #include "../nfs4session.h"
 #include "filelayout.h"
+#include "../nfs4trace.h"
 
 #define NFSDBG_FACILITY                NFSDBG_PNFS_LD
 
@@ -172,6 +173,7 @@ nfs4_fl_alloc_deviceid_node(struct nfs_server *server, struct pnfs_device *pdev,
                dsaddr->ds_list[i] = nfs4_pnfs_ds_add(&dsaddrs, gfp_flags);
                if (!dsaddr->ds_list[i])
                        goto out_err_drain_dsaddrs;
+               trace_fl_getdevinfo(server, &pdev->dev_id, dsaddr->ds_list[i]->ds_remotestr);
 
                /* If DS was already in cache, free ds addrs */
                while (!list_empty(&dsaddrs)) {
index ef817a0475ffa6508a0d663b3fa4f7f29ebd58af..3e724cb7ef01d87f3d49c57ab83ca81fd781c4f9 100644 (file)
@@ -2016,7 +2016,7 @@ static void ff_layout_cancel_io(struct pnfs_layout_segment *lseg)
        for (idx = 0; idx < flseg->mirror_array_cnt; idx++) {
                mirror = flseg->mirror_array[idx];
                mirror_ds = mirror->mirror_ds;
-               if (!mirror_ds)
+               if (IS_ERR_OR_NULL(mirror_ds))
                        continue;
                ds = mirror->mirror_ds->ds;
                if (!ds)
index 853e8d609bb3bcc81a505fa7cf03e1658a24ffc8..d0a0956f8a13462ab305e18c437719b0ed612158 100644 (file)
@@ -652,6 +652,7 @@ static int nfs_fs_context_parse_param(struct fs_context *fc,
                ctx->fscache_uniq = NULL;
                break;
        case Opt_fscache:
+               trace_nfs_mount_assign(param->key, param->string);
                ctx->options |= NFS_OPTION_FSCACHE;
                kfree(ctx->fscache_uniq);
                ctx->fscache_uniq = param->string;
index 2d1bfee225c3693d4443c62463944ecf04439bca..ddc1ee0319554cfc381bc920acb248766938bde7 100644 (file)
@@ -301,11 +301,11 @@ static void nfs_netfs_issue_read(struct netfs_io_subrequest *sreq)
        struct inode *inode = sreq->rreq->inode;
        struct nfs_open_context *ctx = sreq->rreq->netfs_priv;
        struct page *page;
+       unsigned long idx;
        int err;
        pgoff_t start = (sreq->start + sreq->transferred) >> PAGE_SHIFT;
        pgoff_t last = ((sreq->start + sreq->len -
                         sreq->transferred - 1) >> PAGE_SHIFT);
-       XA_STATE(xas, &sreq->rreq->mapping->i_pages, start);
 
        nfs_pageio_init_read(&pgio, inode, false,
                             &nfs_async_read_completion_ops);
@@ -316,19 +316,14 @@ static void nfs_netfs_issue_read(struct netfs_io_subrequest *sreq)
 
        pgio.pg_netfs = netfs; /* used in completion */
 
-       xas_lock(&xas);
-       xas_for_each(&xas, page, last) {
+       xa_for_each_range(&sreq->rreq->mapping->i_pages, idx, page, start, last) {
                /* nfs_read_add_folio() may schedule() due to pNFS layout and other RPCs  */
-               xas_pause(&xas);
-               xas_unlock(&xas);
                err = nfs_read_add_folio(&pgio, ctx, page_folio(page));
                if (err < 0) {
                        netfs->error = err;
                        goto out;
                }
-               xas_lock(&xas);
        }
-       xas_unlock(&xas);
 out:
        nfs_pageio_complete_read(&pgio);
        nfs_netfs_put(netfs);
index 93ea49a7eb61b44d217728cf4333db39dfa37871..c709c296ea9a49e0ccecdae975a5287aea5759fe 100644 (file)
@@ -2426,12 +2426,16 @@ EXPORT_SYMBOL_GPL(nfs_net_id);
 
 static int nfs_net_init(struct net *net)
 {
+       struct nfs_net *nn = net_generic(net, nfs_net_id);
+
        nfs_clients_init(net);
+       rpc_proc_register(net, &nn->rpcstats);
        return nfs_fs_proc_net_init(net);
 }
 
 static void nfs_net_exit(struct net *net)
 {
+       rpc_proc_unregister(net, "nfs");
        nfs_fs_proc_net_exit(net);
        nfs_clients_exit(net);
 }
@@ -2486,15 +2490,12 @@ static int __init init_nfs_fs(void)
        if (err)
                goto out1;
 
-       rpc_proc_register(&init_net, &nfs_rpcstat);
-
        err = register_nfs_fs();
        if (err)
                goto out0;
 
        return 0;
 out0:
-       rpc_proc_unregister(&init_net, "nfs");
        nfs_destroy_directcache();
 out1:
        nfs_destroy_writepagecache();
@@ -2524,7 +2525,6 @@ static void __exit exit_nfs_fs(void)
        nfs_destroy_inodecache();
        nfs_destroy_nfspagecache();
        unregister_pernet_subsys(&nfs_net_ops);
-       rpc_proc_unregister(&init_net, "nfs");
        unregister_nfs_fs();
        nfs_fs_proc_exit();
        nfsiod_stop();
index e3722ce6722e247e0da7c55a8bb3dd049c6cbcb6..06253695fe53f01708aed390004cbbd9651c5d57 100644 (file)
@@ -449,8 +449,6 @@ int nfs_try_get_tree(struct fs_context *);
 int nfs_get_tree_common(struct fs_context *);
 void nfs_kill_super(struct super_block *);
 
-extern struct rpc_stat nfs_rpcstat;
-
 extern int __init register_nfs_fs(void);
 extern void __exit unregister_nfs_fs(void);
 extern bool nfs_sb_active(struct super_block *sb);
index c8374f74dce1142289ed717c5281b4600a2a44e5..a68b21603ea9a867ba513e2a667b08fbc6d80dd8 100644 (file)
@@ -9,6 +9,7 @@
 #include <linux/nfs4.h>
 #include <net/net_namespace.h>
 #include <net/netns/generic.h>
+#include <linux/sunrpc/stats.h>
 
 struct bl_dev_msg {
        int32_t status;
@@ -34,6 +35,7 @@ struct nfs_net {
        struct nfs_netns_client *nfs_client;
        spinlock_t nfs_client_lock;
        ktime_t boot_time;
+       struct rpc_stat rpcstats;
 #ifdef CONFIG_PROC_FS
        struct proc_dir_entry *proc_nfsfs;
 #endif
index 674c012868b1a250b869e6586496db24915ce7da..b0c8a39c2bbdeab011a468a6b267d20a8d8ce91e 100644 (file)
@@ -111,6 +111,7 @@ struct nfs_client *nfs3_set_ds_client(struct nfs_server *mds_srv,
        cl_init.hostname = buf;
 
        switch (ds_proto) {
+       case XPRT_TRANSPORT_RDMA:
        case XPRT_TRANSPORT_TCP:
        case XPRT_TRANSPORT_TCP_TLS:
                if (mds_clp->cl_nconnect > 1)
index b59876b01a1e3c15206b7d12528e3e42312d3a42..0282d93c8bccb386d744017e9d2741abcb9a5f13 100644 (file)
@@ -55,11 +55,14 @@ int nfs42_proc_removexattr(struct inode *inode, const char *name);
  * They would be 7 bytes long in the eventual buffer ("user.x\0"), and
  * 8 bytes long XDR-encoded.
  *
- * Include the trailing eof word as well.
+ * Include the trailing eof word as well and make the result a multiple
+ * of 4 bytes.
  */
 static inline u32 nfs42_listxattr_xdrsize(u32 buflen)
 {
-       return ((buflen / (XATTR_USER_PREFIX_LEN + 2)) * 8) + 4;
+       u32 size = 8 * buflen / (XATTR_USER_PREFIX_LEN + 2) + 4;
+
+       return (size + 3) & ~3;
 }
 #endif /* CONFIG_NFS_V4_2 */
 #endif /* __LINUX_FS_NFS_NFS4_2_H */
index 6ff41ceb9f1c770cfe0af2f032bfa3f23d95e290..7024230f0d1db2f2b464ddd1c79a764e389f437e 100644 (file)
@@ -120,7 +120,6 @@ struct nfs4_state_owner {
        unsigned long        so_flags;
        struct list_head     so_states;
        struct nfs_seqid_counter so_seqid;
-       seqcount_spinlock_t  so_reclaim_seqcount;
        struct mutex         so_delegreturn_mutex;
 };
 
index 11e3a285594c231c6b887dc53edbe52ef8519b23..84573df5cf5ae57616efb4d3c1b8c7556d30bc47 100644 (file)
@@ -924,6 +924,7 @@ static int nfs4_set_client(struct nfs_server *server,
        else
                cl_init.max_connect = max_connect;
        switch (proto) {
+       case XPRT_TRANSPORT_RDMA:
        case XPRT_TRANSPORT_TCP:
        case XPRT_TRANSPORT_TCP_TLS:
                cl_init.nconnect = nconnect;
@@ -1000,6 +1001,7 @@ struct nfs_client *nfs4_set_ds_client(struct nfs_server *mds_srv,
        cl_init.hostname = buf;
 
        switch (ds_proto) {
+       case XPRT_TRANSPORT_RDMA:
        case XPRT_TRANSPORT_TCP:
        case XPRT_TRANSPORT_TCP_TLS:
                if (mds_clp->cl_nconnect > 1) {
index 815996cb27fc4589bed01827c086b32e766f0bc0..ea390db94b622f5332f5126ea15f95bc8131344e 100644 (file)
@@ -3069,10 +3069,8 @@ static int _nfs4_open_and_get_state(struct nfs4_opendata *opendata,
        fmode_t acc_mode = _nfs4_ctx_to_accessmode(ctx);
        struct inode *dir = d_inode(opendata->dir);
        unsigned long dir_verifier;
-       unsigned int seq;
        int ret;
 
-       seq = raw_seqcount_begin(&sp->so_reclaim_seqcount);
        dir_verifier = nfs_save_change_attribute(dir);
 
        ret = _nfs4_proc_open(opendata, ctx);
@@ -3125,11 +3123,8 @@ static int _nfs4_open_and_get_state(struct nfs4_opendata *opendata,
        if (ret != 0)
                goto out;
 
-       if (d_inode(dentry) == state->inode) {
+       if (d_inode(dentry) == state->inode)
                nfs_inode_attach_open_context(ctx);
-               if (read_seqcount_retry(&sp->so_reclaim_seqcount, seq))
-                       nfs4_schedule_stateid_recovery(server, state);
-       }
 
 out:
        if (!opendata->cancelled) {
@@ -8973,10 +8968,12 @@ try_again:
                return;
 
        status = task->tk_status;
-       if (status == 0)
+       if (status == 0) {
                status = nfs4_detect_session_trunking(adata->clp,
                                task->tk_msg.rpc_resp, xprt);
-
+               trace_nfs4_trunked_exchange_id(adata->clp,
+                       xprt->address_strings[RPC_DISPLAY_ADDR], status);
+       }
        if (status == 0)
                rpc_clnt_xprt_switch_add_xprt(clnt, xprt);
        else if (status != -NFS4ERR_DELAY && rpc_clnt_xprt_switch_has_addr(clnt,
@@ -10618,29 +10615,33 @@ const struct nfs4_minor_version_ops *nfs_v4_minor_ops[] = {
 static ssize_t nfs4_listxattr(struct dentry *dentry, char *list, size_t size)
 {
        ssize_t error, error2, error3;
+       size_t left = size;
 
-       error = generic_listxattr(dentry, list, size);
+       error = generic_listxattr(dentry, list, left);
        if (error < 0)
                return error;
        if (list) {
                list += error;
-               size -= error;
+               left -= error;
        }
 
-       error2 = nfs4_listxattr_nfs4_label(d_inode(dentry), list, size);
+       error2 = nfs4_listxattr_nfs4_label(d_inode(dentry), list, left);
        if (error2 < 0)
                return error2;
 
        if (list) {
                list += error2;
-               size -= error2;
+               left -= error2;
        }
 
-       error3 = nfs4_listxattr_nfs4_user(d_inode(dentry), list, size);
+       error3 = nfs4_listxattr_nfs4_user(d_inode(dentry), list, left);
        if (error3 < 0)
                return error3;
 
-       return error + error2 + error3;
+       error += error2 + error3;
+       if (size && error > size)
+               return -ERANGE;
+       return error;
 }
 
 static void nfs4_enable_swap(struct inode *inode)
index 8cfabdbda33694912652eeb736126c673bdb745a..662e86ea3a2ddadb7331f9e122c537ca075830e9 100644 (file)
@@ -513,7 +513,6 @@ nfs4_alloc_state_owner(struct nfs_server *server,
        nfs4_init_seqid_counter(&sp->so_seqid);
        atomic_set(&sp->so_count, 1);
        INIT_LIST_HEAD(&sp->so_lru);
-       seqcount_spinlock_init(&sp->so_reclaim_seqcount, &sp->so_lock);
        mutex_init(&sp->so_delegreturn_mutex);
        return sp;
 }
@@ -1667,7 +1666,6 @@ static int nfs4_reclaim_open_state(struct nfs4_state_owner *sp,
         * server that doesn't support a grace period.
         */
        spin_lock(&sp->so_lock);
-       raw_write_seqcount_begin(&sp->so_reclaim_seqcount);
 restart:
        list_for_each_entry(state, &sp->so_states, open_states) {
                if (!test_and_clear_bit(ops->state_flag_bit, &state->flags))
@@ -1735,7 +1733,6 @@ restart:
                spin_lock(&sp->so_lock);
                goto restart;
        }
-       raw_write_seqcount_end(&sp->so_reclaim_seqcount);
        spin_unlock(&sp->so_lock);
 #ifdef CONFIG_NFS_V4_2
        if (found_ssc_copy_state)
@@ -1745,7 +1742,6 @@ restart:
 out_err:
        nfs4_put_open_state(state);
        spin_lock(&sp->so_lock);
-       raw_write_seqcount_end(&sp->so_reclaim_seqcount);
        spin_unlock(&sp->so_lock);
        return status;
 }
@@ -1928,9 +1924,12 @@ static int nfs4_do_reclaim(struct nfs_client *clp, const struct nfs4_state_recov
        struct nfs_server *server;
        struct rb_node *pos;
        LIST_HEAD(freeme);
-       int status = 0;
        int lost_locks = 0;
+       int status;
 
+       status = nfs4_begin_drain_session(clp);
+       if (status < 0)
+               return status;
 restart:
        rcu_read_lock();
        list_for_each_entry_rcu(server, &clp->cl_superblocks, client_link) {
@@ -2694,6 +2693,9 @@ static void nfs4_state_manager(struct nfs_client *clp)
                /* Detect expired delegations... */
                if (test_and_clear_bit(NFS4CLNT_DELEGATION_EXPIRED, &clp->cl_state)) {
                        section = "detect expired delegations";
+                       status = nfs4_begin_drain_session(clp);
+                       if (status < 0)
+                               goto out_error;
                        nfs_reap_expired_delegations(clp);
                        continue;
                }
index d09bcfd7db89488eec9291c4e985e13953f079d3..8da5a9c000f42c3eda0d3b1f35ca45be96a253ae 100644 (file)
@@ -145,6 +145,7 @@ static int do_nfs4_mount(struct nfs_server *server,
                         const char *export_path)
 {
        struct nfs_fs_context *root_ctx;
+       struct nfs_fs_context *ctx;
        struct fs_context *root_fc;
        struct vfsmount *root_mnt;
        struct dentry *dentry;
@@ -157,6 +158,12 @@ static int do_nfs4_mount(struct nfs_server *server,
                .dirfd  = -1,
        };
 
+       struct fs_parameter param_fsc = {
+               .key    = "fsc",
+               .type   = fs_value_is_string,
+               .dirfd  = -1,
+       };
+
        if (IS_ERR(server))
                return PTR_ERR(server);
 
@@ -168,9 +175,26 @@ static int do_nfs4_mount(struct nfs_server *server,
        kfree(root_fc->source);
        root_fc->source = NULL;
 
+       ctx = nfs_fc2context(fc);
        root_ctx = nfs_fc2context(root_fc);
        root_ctx->internal = true;
        root_ctx->server = server;
+
+       if (ctx->fscache_uniq) {
+               len = strlen(ctx->fscache_uniq);
+               param_fsc.size = len;
+               param_fsc.string = kmemdup_nul(ctx->fscache_uniq, len, GFP_KERNEL);
+               if (param_fsc.string == NULL) {
+                       put_fs_context(root_fc);
+                       return -ENOMEM;
+               }
+               ret = vfs_parse_fs_param(root_fc, &param_fsc);
+               kfree(param_fsc.string);
+               if (ret < 0) {
+                       put_fs_context(root_fc);
+                       return ret;
+               }
+       }
        /* We leave export_path unset as it's not used to find the root. */
 
        len = strlen(hostname) + 5;
index d9ac556bebcf685c50a026594823d434202ab903..d22c6670f770f18949ed908e8e64572c174aeb41 100644 (file)
@@ -28,4 +28,6 @@ EXPORT_TRACEPOINT_SYMBOL_GPL(pnfs_mds_fallback_write_pagelist);
 EXPORT_TRACEPOINT_SYMBOL_GPL(ff_layout_read_error);
 EXPORT_TRACEPOINT_SYMBOL_GPL(ff_layout_write_error);
 EXPORT_TRACEPOINT_SYMBOL_GPL(ff_layout_commit_error);
+
+EXPORT_TRACEPOINT_SYMBOL_GPL(fl_getdevinfo);
 #endif
index fd7cb15b08b27628f205cdb4284b9035a7d3172a..10985a4b8259dd543d4b499dde282f15ce7afaab 100644 (file)
@@ -77,6 +77,36 @@ DEFINE_NFS4_CLIENTID_EVENT(nfs4_bind_conn_to_session);
 DEFINE_NFS4_CLIENTID_EVENT(nfs4_sequence);
 DEFINE_NFS4_CLIENTID_EVENT(nfs4_reclaim_complete);
 
+TRACE_EVENT(nfs4_trunked_exchange_id,
+               TP_PROTO(
+                       const struct nfs_client *clp,
+                       const char *addr,
+                       int error
+               ),
+
+               TP_ARGS(clp, addr, error),
+
+               TP_STRUCT__entry(
+                       __string(main_addr, clp->cl_hostname)
+                       __string(trunk_addr, addr)
+                       __field(unsigned long, error)
+               ),
+
+               TP_fast_assign(
+                       __entry->error = error < 0 ? -error : 0;
+                       __assign_str(main_addr, clp->cl_hostname);
+                       __assign_str(trunk_addr, addr);
+               ),
+
+               TP_printk(
+                       "error=%ld (%s) main_addr=%s trunk_addr=%s",
+                       -__entry->error,
+                       show_nfs4_status(__entry->error),
+                       __get_str(main_addr),
+                       __get_str(trunk_addr)
+               )
+);
+
 TRACE_EVENT(nfs4_sequence_done,
                TP_PROTO(
                        const struct nfs4_session *session,
@@ -1991,6 +2021,34 @@ DECLARE_EVENT_CLASS(nfs4_deviceid_status,
 DEFINE_PNFS_DEVICEID_STATUS(nfs4_getdeviceinfo);
 DEFINE_PNFS_DEVICEID_STATUS(nfs4_find_deviceid);
 
+TRACE_EVENT(fl_getdevinfo,
+               TP_PROTO(
+                       const struct nfs_server *server,
+                       const struct nfs4_deviceid *deviceid,
+                       char *ds_remotestr
+               ),
+               TP_ARGS(server, deviceid, ds_remotestr),
+
+               TP_STRUCT__entry(
+                       __string(mds_addr, server->nfs_client->cl_hostname)
+                       __array(unsigned char, deviceid, NFS4_DEVICEID4_SIZE)
+                       __string(ds_ips, ds_remotestr)
+               ),
+
+               TP_fast_assign(
+                       __assign_str(mds_addr, server->nfs_client->cl_hostname);
+                       __assign_str(ds_ips, ds_remotestr);
+                       memcpy(__entry->deviceid, deviceid->data,
+                              NFS4_DEVICEID4_SIZE);
+               ),
+               TP_printk(
+                       "deviceid=%s, mds_addr=%s, ds_ips=%s",
+                       __print_hex(__entry->deviceid, NFS4_DEVICEID4_SIZE),
+                       __get_str(mds_addr),
+                       __get_str(ds_ips)
+               )
+);
+
 DECLARE_EVENT_CLASS(nfs4_flexfiles_io_event,
                TP_PROTO(
                        const struct nfs_pgio_header *hdr
index 7600100ba26f02884812653db5d0231c4e63d0d2..432612d22437423ce5e0d38768c73de47f500074 100644 (file)
@@ -175,10 +175,10 @@ static int __init root_nfs_cat(char *dest, const char *src,
        size_t len = strlen(dest);
 
        if (len && dest[len - 1] != ',')
-               if (strlcat(dest, ",", destlen) > destlen)
+               if (strlcat(dest, ",", destlen) >= destlen)
                        return -1;
 
-       if (strlcat(dest, src, destlen) > destlen)
+       if (strlcat(dest, src, destlen) >= destlen)
                return -1;
        return 0;
 }
index 0c0fed1ecd0bf0d0938dec7b7c8cdca30e65954c..a5cc6199127f5876faae1c27e36c128d9eab9996 100644 (file)
@@ -1999,6 +1999,14 @@ pnfs_update_layout(struct inode *ino,
        }
 
 lookup_again:
+       if (!nfs4_valid_open_stateid(ctx->state)) {
+               trace_pnfs_update_layout(ino, pos, count,
+                                        iomode, lo, lseg,
+                                        PNFS_UPDATE_LAYOUT_INVALID_OPEN);
+               lseg = ERR_PTR(-EIO);
+               goto out;
+       }
+
        lseg = ERR_PTR(nfs4_client_recover_expired_lease(clp));
        if (IS_ERR(lseg))
                goto out;
index afd23910f3bffc52b7d4505e4bb7b6eaa0b632fa..88e061bd711b746afcd46878e518f870fae19b0c 100644 (file)
@@ -919,6 +919,8 @@ static int _nfs4_pnfs_v4_ds_connect(struct nfs_server *mds_srv,
        dprintk("--> %s DS %s\n", __func__, ds->ds_remotestr);
 
        list_for_each_entry(da, &ds->ds_addrs, da_node) {
+               char servername[48];
+
                dprintk("%s: DS %s: trying address %s\n",
                        __func__, ds->ds_remotestr, da->da_remotestr);
 
@@ -929,6 +931,7 @@ static int _nfs4_pnfs_v4_ds_connect(struct nfs_server *mds_srv,
                                .dstaddr = (struct sockaddr *)&da->da_addr,
                                .addrlen = da->da_addrlen,
                                .servername = clp->cl_hostname,
+                               .xprtsec = clp->cl_xprtsec,
                        };
                        struct nfs4_add_xprt_data xprtdata = {
                                .clp = clp,
@@ -938,10 +941,45 @@ static int _nfs4_pnfs_v4_ds_connect(struct nfs_server *mds_srv,
                                .data = &xprtdata,
                        };
 
-                       if (da->da_transport != clp->cl_proto)
+                       if (da->da_transport != clp->cl_proto &&
+                                       clp->cl_proto != XPRT_TRANSPORT_TCP_TLS)
                                continue;
+                       if (da->da_transport == XPRT_TRANSPORT_TCP &&
+                               mds_srv->nfs_client->cl_proto ==
+                                       XPRT_TRANSPORT_TCP_TLS) {
+                               struct sockaddr *addr =
+                                       (struct sockaddr *)&da->da_addr;
+                               struct sockaddr_in *sin =
+                                       (struct sockaddr_in *)&da->da_addr;
+                               struct sockaddr_in6 *sin6 =
+                                       (struct sockaddr_in6 *)&da->da_addr;
+
+                               /* for NFS with TLS we need to supply a correct
+                                * servername of the trunked transport, not the
+                                * servername of the main transport stored in
+                                * clp->cl_hostname. And set the protocol to
+                                * indicate to use TLS
+                                */
+                               servername[0] = '\0';
+                               switch(addr->sa_family) {
+                               case AF_INET:
+                                       snprintf(servername, sizeof(servername),
+                                               "%pI4", &sin->sin_addr.s_addr);
+                                       break;
+                               case AF_INET6:
+                                       snprintf(servername, sizeof(servername),
+                                               "%pI6", &sin6->sin6_addr);
+                                       break;
+                               default:
+                                       /* do not consider this address */
+                                       continue;
+                               }
+                               xprt_args.ident = XPRT_TRANSPORT_TCP_TLS;
+                               xprt_args.servername = servername;
+                       }
                        if (da->da_addr.ss_family != clp->cl_addr.ss_family)
                                continue;
+
                        /**
                        * Test this address for session trunking and
                        * add as an alias
@@ -953,6 +991,10 @@ static int _nfs4_pnfs_v4_ds_connect(struct nfs_server *mds_srv,
                        if (xprtdata.cred)
                                put_cred(xprtdata.cred);
                } else {
+                       if (da->da_transport == XPRT_TRANSPORT_TCP &&
+                               mds_srv->nfs_client->cl_proto ==
+                                       XPRT_TRANSPORT_TCP_TLS)
+                               da->da_transport = XPRT_TRANSPORT_TCP_TLS;
                        clp = nfs4_set_ds_client(mds_srv,
                                                &da->da_addr,
                                                da->da_addrlen,
index 7dc21a48e3e7b6a0b86f06148163450e79564f82..a142287d86f68ed411dce6f9c410e41948c570b2 100644 (file)
@@ -305,6 +305,8 @@ int nfs_read_add_folio(struct nfs_pageio_descriptor *pgio,
        new = nfs_page_create_from_folio(ctx, folio, 0, aligned_len);
        if (IS_ERR(new)) {
                error = PTR_ERR(new);
+               if (nfs_netfs_folio_unlock(folio))
+                       folio_unlock(folio);
                goto out;
        }
 
index 075b31c93f87d0c8a4aa9699187e94bcdba0aeab..dc03f98f7616a8ba4f7c4d65cbfec0aabe98f942 100644 (file)
@@ -516,8 +516,16 @@ static void nfs_show_mount_options(struct seq_file *m, struct nfs_server *nfss,
        else
                nfs_show_nfsv4_options(m, nfss, showdefaults);
 
-       if (nfss->options & NFS_OPTION_FSCACHE)
+       if (nfss->options & NFS_OPTION_FSCACHE) {
+#ifdef CONFIG_NFS_FSCACHE
+               if (nfss->fscache_uniq)
+                       seq_printf(m, ",fsc=%s", nfss->fscache_uniq);
+               else
+                       seq_puts(m, ",fsc");
+#else
                seq_puts(m, ",fsc");
+#endif
+       }
 
        if (nfss->options & NFS_OPTION_MIGRATION)
                seq_puts(m, ",migration");
index 84bb852645728b3edf427c5ac1020e38f329f325..5de85d725fb95fdd19fe814aba607c5249267a6a 100644 (file)
@@ -667,10 +667,6 @@ static int nfs_writepage_locked(struct folio *folio,
        struct inode *inode = folio_file_mapping(folio)->host;
        int err;
 
-       if (wbc->sync_mode == WB_SYNC_NONE &&
-           NFS_SERVER(inode)->write_congested)
-               return AOP_WRITEPAGE_ACTIVATE;
-
        nfs_inc_stats(inode, NFSIOS_VFSWRITEPAGE);
        nfs_pageio_init_write(&pgio, inode, 0, false,
                              &nfs_async_write_completion_ops);
@@ -1650,7 +1646,7 @@ static int wait_on_commit(struct nfs_mds_commit_info *cinfo)
                                       !atomic_read(&cinfo->rpcs_out));
 }
 
-static void nfs_commit_begin(struct nfs_mds_commit_info *cinfo)
+void nfs_commit_begin(struct nfs_mds_commit_info *cinfo)
 {
        atomic_inc(&cinfo->rpcs_out);
 }
index 7342de296ec3c6a8486da9f6bb0b16eeded7ca5c..89caef7513db3509b1a83090324b47536a3f8e5e 100644 (file)
@@ -525,54 +525,55 @@ int nilfs_palloc_prepare_alloc_entry(struct inode *inode,
                ret = nilfs_palloc_get_desc_block(inode, group, 1, &desc_bh);
                if (ret < 0)
                        return ret;
-               desc_kaddr = kmap(desc_bh->b_page);
+               desc_kaddr = kmap_local_page(desc_bh->b_page);
                desc = nilfs_palloc_block_get_group_desc(
                        inode, group, desc_bh, desc_kaddr);
                n = nilfs_palloc_rest_groups_in_desc_block(inode, group,
                                                           maxgroup);
-               for (j = 0; j < n; j++, desc++, group++) {
+               for (j = 0; j < n; j++, desc++, group++, group_offset = 0) {
                        lock = nilfs_mdt_bgl_lock(inode, group);
-                       if (nilfs_palloc_group_desc_nfrees(desc, lock) > 0) {
-                               ret = nilfs_palloc_get_bitmap_block(
-                                       inode, group, 1, &bitmap_bh);
-                               if (ret < 0)
-                                       goto out_desc;
-                               bitmap_kaddr = kmap(bitmap_bh->b_page);
-                               bitmap = bitmap_kaddr + bh_offset(bitmap_bh);
-                               pos = nilfs_palloc_find_available_slot(
-                                       bitmap, group_offset,
-                                       entries_per_group, lock);
-                               if (pos >= 0) {
-                                       /* found a free entry */
-                                       nilfs_palloc_group_desc_add_entries(
-                                               desc, lock, -1);
-                                       req->pr_entry_nr =
-                                               entries_per_group * group + pos;
-                                       kunmap(desc_bh->b_page);
-                                       kunmap(bitmap_bh->b_page);
-
-                                       req->pr_desc_bh = desc_bh;
-                                       req->pr_bitmap_bh = bitmap_bh;
-                                       return 0;
-                               }
-                               kunmap(bitmap_bh->b_page);
-                               brelse(bitmap_bh);
+                       if (nilfs_palloc_group_desc_nfrees(desc, lock) == 0)
+                               continue;
+
+                       kunmap_local(desc_kaddr);
+                       ret = nilfs_palloc_get_bitmap_block(inode, group, 1,
+                                                           &bitmap_bh);
+                       if (unlikely(ret < 0)) {
+                               brelse(desc_bh);
+                               return ret;
                        }
 
-                       group_offset = 0;
+                       desc_kaddr = kmap_local_page(desc_bh->b_page);
+                       desc = nilfs_palloc_block_get_group_desc(
+                               inode, group, desc_bh, desc_kaddr);
+
+                       bitmap_kaddr = kmap_local_page(bitmap_bh->b_page);
+                       bitmap = bitmap_kaddr + bh_offset(bitmap_bh);
+                       pos = nilfs_palloc_find_available_slot(
+                               bitmap, group_offset, entries_per_group, lock);
+                       kunmap_local(bitmap_kaddr);
+                       if (pos >= 0)
+                               goto found;
+
+                       brelse(bitmap_bh);
                }
 
-               kunmap(desc_bh->b_page);
+               kunmap_local(desc_kaddr);
                brelse(desc_bh);
        }
 
        /* no entries left */
        return -ENOSPC;
 
- out_desc:
-       kunmap(desc_bh->b_page);
-       brelse(desc_bh);
-       return ret;
+found:
+       /* found a free entry */
+       nilfs_palloc_group_desc_add_entries(desc, lock, -1);
+       req->pr_entry_nr = entries_per_group * group + pos;
+       kunmap_local(desc_kaddr);
+
+       req->pr_desc_bh = desc_bh;
+       req->pr_bitmap_bh = bitmap_bh;
+       return 0;
 }
 
 /**
@@ -606,10 +607,10 @@ void nilfs_palloc_commit_free_entry(struct inode *inode,
        spinlock_t *lock;
 
        group = nilfs_palloc_group(inode, req->pr_entry_nr, &group_offset);
-       desc_kaddr = kmap(req->pr_desc_bh->b_page);
+       desc_kaddr = kmap_local_page(req->pr_desc_bh->b_page);
        desc = nilfs_palloc_block_get_group_desc(inode, group,
                                                 req->pr_desc_bh, desc_kaddr);
-       bitmap_kaddr = kmap(req->pr_bitmap_bh->b_page);
+       bitmap_kaddr = kmap_local_page(req->pr_bitmap_bh->b_page);
        bitmap = bitmap_kaddr + bh_offset(req->pr_bitmap_bh);
        lock = nilfs_mdt_bgl_lock(inode, group);
 
@@ -621,8 +622,8 @@ void nilfs_palloc_commit_free_entry(struct inode *inode,
        else
                nilfs_palloc_group_desc_add_entries(desc, lock, 1);
 
-       kunmap(req->pr_bitmap_bh->b_page);
-       kunmap(req->pr_desc_bh->b_page);
+       kunmap_local(bitmap_kaddr);
+       kunmap_local(desc_kaddr);
 
        mark_buffer_dirty(req->pr_desc_bh);
        mark_buffer_dirty(req->pr_bitmap_bh);
@@ -647,10 +648,10 @@ void nilfs_palloc_abort_alloc_entry(struct inode *inode,
        spinlock_t *lock;
 
        group = nilfs_palloc_group(inode, req->pr_entry_nr, &group_offset);
-       desc_kaddr = kmap(req->pr_desc_bh->b_page);
+       desc_kaddr = kmap_local_page(req->pr_desc_bh->b_page);
        desc = nilfs_palloc_block_get_group_desc(inode, group,
                                                 req->pr_desc_bh, desc_kaddr);
-       bitmap_kaddr = kmap(req->pr_bitmap_bh->b_page);
+       bitmap_kaddr = kmap_local_page(req->pr_bitmap_bh->b_page);
        bitmap = bitmap_kaddr + bh_offset(req->pr_bitmap_bh);
        lock = nilfs_mdt_bgl_lock(inode, group);
 
@@ -662,8 +663,8 @@ void nilfs_palloc_abort_alloc_entry(struct inode *inode,
        else
                nilfs_palloc_group_desc_add_entries(desc, lock, 1);
 
-       kunmap(req->pr_bitmap_bh->b_page);
-       kunmap(req->pr_desc_bh->b_page);
+       kunmap_local(bitmap_kaddr);
+       kunmap_local(desc_kaddr);
 
        brelse(req->pr_bitmap_bh);
        brelse(req->pr_desc_bh);
@@ -755,7 +756,7 @@ int nilfs_palloc_freev(struct inode *inode, __u64 *entry_nrs, size_t nitems)
                /* Get the first entry number of the group */
                group_min_nr = (__u64)group * epg;
 
-               bitmap_kaddr = kmap(bitmap_bh->b_page);
+               bitmap_kaddr = kmap_local_page(bitmap_bh->b_page);
                bitmap = bitmap_kaddr + bh_offset(bitmap_bh);
                lock = nilfs_mdt_bgl_lock(inode, group);
 
@@ -801,7 +802,7 @@ int nilfs_palloc_freev(struct inode *inode, __u64 *entry_nrs, size_t nitems)
                        entry_start = rounddown(group_offset, epb);
                } while (true);
 
-               kunmap(bitmap_bh->b_page);
+               kunmap_local(bitmap_kaddr);
                mark_buffer_dirty(bitmap_bh);
                brelse(bitmap_bh);
 
@@ -815,11 +816,11 @@ int nilfs_palloc_freev(struct inode *inode, __u64 *entry_nrs, size_t nitems)
                                           inode->i_ino);
                }
 
-               desc_kaddr = kmap_atomic(desc_bh->b_page);
+               desc_kaddr = kmap_local_page(desc_bh->b_page);
                desc = nilfs_palloc_block_get_group_desc(
                        inode, group, desc_bh, desc_kaddr);
                nfree = nilfs_palloc_group_desc_add_entries(desc, lock, n);
-               kunmap_atomic(desc_kaddr);
+               kunmap_local(desc_kaddr);
                mark_buffer_dirty(desc_bh);
                nilfs_mdt_mark_dirty(inode);
                brelse(desc_bh);
index 7a8f166f2c8d841129b2208dd4bf701f9b885deb..383f0afa2cea367f5da038ed4875c88cb97ea5e1 100644 (file)
@@ -548,13 +548,10 @@ int nilfs_bmap_read(struct nilfs_bmap *bmap, struct nilfs_inode *raw_inode)
  */
 void nilfs_bmap_write(struct nilfs_bmap *bmap, struct nilfs_inode *raw_inode)
 {
-       down_write(&bmap->b_sem);
        memcpy(raw_inode->i_bmap, bmap->b_u.u_data,
               NILFS_INODE_BMAP_SIZE * sizeof(__le64));
        if (bmap->b_inode->i_ino == NILFS_DAT_INO)
                bmap->b_last_allocated_ptr = NILFS_BMAP_NEW_PTR_INIT;
-
-       up_write(&bmap->b_sem);
 }
 
 void nilfs_bmap_init_gc(struct nilfs_bmap *bmap)
index 13592e82eaf68b2d92851e0218a99bd7842af9c4..65659fa0372e6ca3ae53590b7dadd5e19cd2d728 100644 (file)
@@ -724,7 +724,7 @@ static int nilfs_btree_lookup_contig(const struct nilfs_bmap *btree,
                dat = nilfs_bmap_get_dat(btree);
                ret = nilfs_dat_translate(dat, ptr, &blocknr);
                if (ret < 0)
-                       goto out;
+                       goto dat_error;
                ptr = blocknr;
        }
        cnt = 1;
@@ -743,7 +743,7 @@ static int nilfs_btree_lookup_contig(const struct nilfs_bmap *btree,
                        if (dat) {
                                ret = nilfs_dat_translate(dat, ptr2, &blocknr);
                                if (ret < 0)
-                                       goto out;
+                                       goto dat_error;
                                ptr2 = blocknr;
                        }
                        if (ptr2 != ptr + cnt || ++cnt == maxblocks)
@@ -781,6 +781,11 @@ static int nilfs_btree_lookup_contig(const struct nilfs_bmap *btree,
  out:
        nilfs_btree_free_path(path);
        return ret;
+
+ dat_error:
+       if (ret == -ENOENT)
+               ret = -EINVAL;  /* Notify bmap layer of metadata corruption */
+       goto out;
 }
 
 static void nilfs_btree_promote_key(struct nilfs_bmap *btree,
index 39136637f7155bc7bee21e1fbb8c08ebab8621ad..69a5cced1e84b947683bcc1deab3363ea0be8fb9 100644 (file)
@@ -28,7 +28,7 @@ nilfs_cpfile_get_blkoff(const struct inode *cpfile, __u64 cno)
 {
        __u64 tcno = cno + NILFS_MDT(cpfile)->mi_first_entry_offset - 1;
 
-       do_div(tcno, nilfs_cpfile_checkpoints_per_block(cpfile));
+       tcno = div64_ul(tcno, nilfs_cpfile_checkpoints_per_block(cpfile));
        return (unsigned long)tcno;
 }
 
@@ -187,35 +187,90 @@ static inline int nilfs_cpfile_delete_checkpoint_block(struct inode *cpfile,
 }
 
 /**
- * nilfs_cpfile_get_checkpoint - get a checkpoint
- * @cpfile: inode of checkpoint file
- * @cno: checkpoint number
- * @create: create flag
- * @cpp: pointer to a checkpoint
- * @bhp: pointer to a buffer head
- *
- * Description: nilfs_cpfile_get_checkpoint() acquires the checkpoint
- * specified by @cno. A new checkpoint will be created if @cno is the current
- * checkpoint number and @create is nonzero.
- *
- * Return Value: On success, 0 is returned, and the checkpoint and the
- * buffer head of the buffer on which the checkpoint is located are stored in
- * the place pointed by @cpp and @bhp, respectively. On error, one of the
- * following negative error codes is returned.
+ * nilfs_cpfile_read_checkpoint - read a checkpoint entry in cpfile
+ * @cpfile: checkpoint file inode
+ * @cno:    number of checkpoint entry to read
+ * @root:   nilfs root object
+ * @ifile:  ifile's inode to read and attach to @root
  *
- * %-EIO - I/O error.
+ * This function imports checkpoint information from the checkpoint file and
+ * stores it to the inode file given by @ifile and the nilfs root object
+ * given by @root.
  *
- * %-ENOMEM - Insufficient amount of memory available.
+ * Return: 0 on success, or the following negative error code on failure.
+ * * %-EINVAL  - Invalid checkpoint.
+ * * %-ENOMEM  - Insufficient memory available.
+ * * %-EIO     - I/O error (including metadata corruption).
+ */
+int nilfs_cpfile_read_checkpoint(struct inode *cpfile, __u64 cno,
+                                struct nilfs_root *root, struct inode *ifile)
+{
+       struct buffer_head *cp_bh;
+       struct nilfs_checkpoint *cp;
+       void *kaddr;
+       int ret;
+
+       if (cno < 1 || cno > nilfs_mdt_cno(cpfile))
+               return -EINVAL;
+
+       down_read(&NILFS_MDT(cpfile)->mi_sem);
+       ret = nilfs_cpfile_get_checkpoint_block(cpfile, cno, 0, &cp_bh);
+       if (unlikely(ret < 0)) {
+               if (ret == -ENOENT)
+                       ret = -EINVAL;
+               goto out_sem;
+       }
+
+       kaddr = kmap_local_page(cp_bh->b_page);
+       cp = nilfs_cpfile_block_get_checkpoint(cpfile, cno, cp_bh, kaddr);
+       if (nilfs_checkpoint_invalid(cp)) {
+               ret = -EINVAL;
+               goto put_cp;
+       }
+
+       ret = nilfs_read_inode_common(ifile, &cp->cp_ifile_inode);
+       if (unlikely(ret)) {
+               /*
+                * Since this inode is on a checkpoint entry, treat errors
+                * as metadata corruption.
+                */
+               nilfs_err(cpfile->i_sb,
+                         "ifile inode (checkpoint number=%llu) corrupted",
+                         (unsigned long long)cno);
+               ret = -EIO;
+               goto put_cp;
+       }
+
+       /* Configure the nilfs root object */
+       atomic64_set(&root->inodes_count, le64_to_cpu(cp->cp_inodes_count));
+       atomic64_set(&root->blocks_count, le64_to_cpu(cp->cp_blocks_count));
+       root->ifile = ifile;
+
+put_cp:
+       kunmap_local(kaddr);
+       brelse(cp_bh);
+out_sem:
+       up_read(&NILFS_MDT(cpfile)->mi_sem);
+       return ret;
+}
+
+/**
+ * nilfs_cpfile_create_checkpoint - create a checkpoint entry on cpfile
+ * @cpfile: checkpoint file inode
+ * @cno:    number of checkpoint to set up
  *
- * %-ENOENT - No such checkpoint.
+ * This function creates a checkpoint with the number specified by @cno on
+ * cpfile.  If the specified checkpoint entry already exists due to a past
+ * failure, it will be reused without returning an error.
+ * In either case, the buffer of the block containing the checkpoint entry
+ * and the cpfile inode are made dirty for inclusion in the write log.
  *
- * %-EINVAL - invalid checkpoint.
+ * Return: 0 on success, or the following negative error code on failure.
+ * * %-ENOMEM  - Insufficient memory available.
+ * * %-EIO     - I/O error (including metadata corruption).
+ * * %-EROFS   - Read only filesystem
  */
-int nilfs_cpfile_get_checkpoint(struct inode *cpfile,
-                               __u64 cno,
-                               int create,
-                               struct nilfs_checkpoint **cpp,
-                               struct buffer_head **bhp)
+int nilfs_cpfile_create_checkpoint(struct inode *cpfile, __u64 cno)
 {
        struct buffer_head *header_bh, *cp_bh;
        struct nilfs_cpfile_header *header;
@@ -223,70 +278,128 @@ int nilfs_cpfile_get_checkpoint(struct inode *cpfile,
        void *kaddr;
        int ret;
 
-       if (unlikely(cno < 1 || cno > nilfs_mdt_cno(cpfile) ||
-                    (cno < nilfs_mdt_cno(cpfile) && create)))
-               return -EINVAL;
+       if (WARN_ON_ONCE(cno < 1))
+               return -EIO;
 
        down_write(&NILFS_MDT(cpfile)->mi_sem);
-
        ret = nilfs_cpfile_get_header_block(cpfile, &header_bh);
-       if (ret < 0)
+       if (unlikely(ret < 0)) {
+               if (ret == -ENOENT) {
+                       nilfs_error(cpfile->i_sb,
+                                   "checkpoint creation failed due to metadata corruption.");
+                       ret = -EIO;
+               }
                goto out_sem;
-       ret = nilfs_cpfile_get_checkpoint_block(cpfile, cno, create, &cp_bh);
-       if (ret < 0)
+       }
+       ret = nilfs_cpfile_get_checkpoint_block(cpfile, cno, 1, &cp_bh);
+       if (unlikely(ret < 0))
                goto out_header;
-       kaddr = kmap(cp_bh->b_page);
+
+       kaddr = kmap_local_page(cp_bh->b_page);
        cp = nilfs_cpfile_block_get_checkpoint(cpfile, cno, cp_bh, kaddr);
        if (nilfs_checkpoint_invalid(cp)) {
-               if (!create) {
-                       kunmap(cp_bh->b_page);
-                       brelse(cp_bh);
-                       ret = -ENOENT;
-                       goto out_header;
-               }
                /* a newly-created checkpoint */
                nilfs_checkpoint_clear_invalid(cp);
                if (!nilfs_cpfile_is_in_first(cpfile, cno))
                        nilfs_cpfile_block_add_valid_checkpoints(cpfile, cp_bh,
                                                                 kaddr, 1);
-               mark_buffer_dirty(cp_bh);
+               kunmap_local(kaddr);
 
-               kaddr = kmap_atomic(header_bh->b_page);
+               kaddr = kmap_local_page(header_bh->b_page);
                header = nilfs_cpfile_block_get_header(cpfile, header_bh,
                                                       kaddr);
                le64_add_cpu(&header->ch_ncheckpoints, 1);
-               kunmap_atomic(kaddr);
+               kunmap_local(kaddr);
                mark_buffer_dirty(header_bh);
-               nilfs_mdt_mark_dirty(cpfile);
+       } else {
+               kunmap_local(kaddr);
        }
 
-       if (cpp != NULL)
-               *cpp = cp;
-       *bhp = cp_bh;
+       /* Force the buffer and the inode to become dirty */
+       mark_buffer_dirty(cp_bh);
+       brelse(cp_bh);
+       nilfs_mdt_mark_dirty(cpfile);
 
- out_header:
+out_header:
        brelse(header_bh);
 
- out_sem:
+out_sem:
        up_write(&NILFS_MDT(cpfile)->mi_sem);
        return ret;
 }
 
 /**
- * nilfs_cpfile_put_checkpoint - put a checkpoint
- * @cpfile: inode of checkpoint file
- * @cno: checkpoint number
- * @bh: buffer head
+ * nilfs_cpfile_finalize_checkpoint - fill in a checkpoint entry in cpfile
+ * @cpfile: checkpoint file inode
+ * @cno:    checkpoint number
+ * @root:   nilfs root object
+ * @blkinc: number of blocks added by this checkpoint
+ * @ctime:  checkpoint creation time
+ * @minor:  minor checkpoint flag
+ *
+ * This function completes the checkpoint entry numbered by @cno in the
+ * cpfile with the data given by the arguments @root, @blkinc, @ctime, and
+ * @minor.
  *
- * Description: nilfs_cpfile_put_checkpoint() releases the checkpoint
- * specified by @cno. @bh must be the buffer head which has been returned by
- * a previous call to nilfs_cpfile_get_checkpoint() with @cno.
+ * Return: 0 on success, or the following negative error code on failure.
+ * * %-ENOMEM  - Insufficient memory available.
+ * * %-EIO     - I/O error (including metadata corruption).
  */
-void nilfs_cpfile_put_checkpoint(struct inode *cpfile, __u64 cno,
-                                struct buffer_head *bh)
+int nilfs_cpfile_finalize_checkpoint(struct inode *cpfile, __u64 cno,
+                                    struct nilfs_root *root, __u64 blkinc,
+                                    time64_t ctime, bool minor)
 {
-       kunmap(bh->b_page);
-       brelse(bh);
+       struct buffer_head *cp_bh;
+       struct nilfs_checkpoint *cp;
+       void *kaddr;
+       int ret;
+
+       if (WARN_ON_ONCE(cno < 1))
+               return -EIO;
+
+       down_write(&NILFS_MDT(cpfile)->mi_sem);
+       ret = nilfs_cpfile_get_checkpoint_block(cpfile, cno, 0, &cp_bh);
+       if (unlikely(ret < 0)) {
+               if (ret == -ENOENT)
+                       goto error;
+               goto out_sem;
+       }
+
+       kaddr = kmap_local_page(cp_bh->b_page);
+       cp = nilfs_cpfile_block_get_checkpoint(cpfile, cno, cp_bh, kaddr);
+       if (unlikely(nilfs_checkpoint_invalid(cp))) {
+               kunmap_local(kaddr);
+               brelse(cp_bh);
+               goto error;
+       }
+
+       cp->cp_snapshot_list.ssl_next = 0;
+       cp->cp_snapshot_list.ssl_prev = 0;
+       cp->cp_inodes_count = cpu_to_le64(atomic64_read(&root->inodes_count));
+       cp->cp_blocks_count = cpu_to_le64(atomic64_read(&root->blocks_count));
+       cp->cp_nblk_inc = cpu_to_le64(blkinc);
+       cp->cp_create = cpu_to_le64(ctime);
+       cp->cp_cno = cpu_to_le64(cno);
+
+       if (minor)
+               nilfs_checkpoint_set_minor(cp);
+       else
+               nilfs_checkpoint_clear_minor(cp);
+
+       nilfs_write_inode_common(root->ifile, &cp->cp_ifile_inode);
+       nilfs_bmap_write(NILFS_I(root->ifile)->i_bmap, &cp->cp_ifile_inode);
+
+       kunmap_local(kaddr);
+       brelse(cp_bh);
+out_sem:
+       up_write(&NILFS_MDT(cpfile)->mi_sem);
+       return ret;
+
+error:
+       nilfs_error(cpfile->i_sb,
+                   "checkpoint finalization failed due to metadata corruption.");
+       ret = -EIO;
+       goto out_sem;
 }
 
 /**
@@ -347,7 +460,7 @@ int nilfs_cpfile_delete_checkpoints(struct inode *cpfile,
                        continue;
                }
 
-               kaddr = kmap_atomic(cp_bh->b_page);
+               kaddr = kmap_local_page(cp_bh->b_page);
                cp = nilfs_cpfile_block_get_checkpoint(
                        cpfile, cno, cp_bh, kaddr);
                nicps = 0;
@@ -369,7 +482,7 @@ int nilfs_cpfile_delete_checkpoints(struct inode *cpfile,
                                                cpfile, cp_bh, kaddr, nicps);
                                if (count == 0) {
                                        /* make hole */
-                                       kunmap_atomic(kaddr);
+                                       kunmap_local(kaddr);
                                        brelse(cp_bh);
                                        ret =
                                          nilfs_cpfile_delete_checkpoint_block(
@@ -384,18 +497,18 @@ int nilfs_cpfile_delete_checkpoints(struct inode *cpfile,
                        }
                }
 
-               kunmap_atomic(kaddr);
+               kunmap_local(kaddr);
                brelse(cp_bh);
        }
 
        if (tnicps > 0) {
-               kaddr = kmap_atomic(header_bh->b_page);
+               kaddr = kmap_local_page(header_bh->b_page);
                header = nilfs_cpfile_block_get_header(cpfile, header_bh,
                                                       kaddr);
                le64_add_cpu(&header->ch_ncheckpoints, -(u64)tnicps);
                mark_buffer_dirty(header_bh);
                nilfs_mdt_mark_dirty(cpfile);
-               kunmap_atomic(kaddr);
+               kunmap_local(kaddr);
        }
 
        brelse(header_bh);
@@ -447,7 +560,7 @@ static ssize_t nilfs_cpfile_do_get_cpinfo(struct inode *cpfile, __u64 *cnop,
                }
                ncps = nilfs_cpfile_checkpoints_in_block(cpfile, cno, cur_cno);
 
-               kaddr = kmap_atomic(bh->b_page);
+               kaddr = kmap_local_page(bh->b_page);
                cp = nilfs_cpfile_block_get_checkpoint(cpfile, cno, bh, kaddr);
                for (i = 0; i < ncps && n < nci; i++, cp = (void *)cp + cpsz) {
                        if (!nilfs_checkpoint_invalid(cp)) {
@@ -457,7 +570,7 @@ static ssize_t nilfs_cpfile_do_get_cpinfo(struct inode *cpfile, __u64 *cnop,
                                n++;
                        }
                }
-               kunmap_atomic(kaddr);
+               kunmap_local(kaddr);
                brelse(bh);
        }
 
@@ -491,10 +604,10 @@ static ssize_t nilfs_cpfile_do_get_ssinfo(struct inode *cpfile, __u64 *cnop,
                ret = nilfs_cpfile_get_header_block(cpfile, &bh);
                if (ret < 0)
                        goto out;
-               kaddr = kmap_atomic(bh->b_page);
+               kaddr = kmap_local_page(bh->b_page);
                header = nilfs_cpfile_block_get_header(cpfile, bh, kaddr);
                curr = le64_to_cpu(header->ch_snapshot_list.ssl_next);
-               kunmap_atomic(kaddr);
+               kunmap_local(kaddr);
                brelse(bh);
                if (curr == 0) {
                        ret = 0;
@@ -512,7 +625,7 @@ static ssize_t nilfs_cpfile_do_get_ssinfo(struct inode *cpfile, __u64 *cnop,
                        ret = 0; /* No snapshots (started from a hole block) */
                goto out;
        }
-       kaddr = kmap_atomic(bh->b_page);
+       kaddr = kmap_local_page(bh->b_page);
        while (n < nci) {
                cp = nilfs_cpfile_block_get_checkpoint(cpfile, curr, bh, kaddr);
                curr = ~(__u64)0; /* Terminator */
@@ -528,7 +641,7 @@ static ssize_t nilfs_cpfile_do_get_ssinfo(struct inode *cpfile, __u64 *cnop,
 
                next_blkoff = nilfs_cpfile_get_blkoff(cpfile, next);
                if (curr_blkoff != next_blkoff) {
-                       kunmap_atomic(kaddr);
+                       kunmap_local(kaddr);
                        brelse(bh);
                        ret = nilfs_cpfile_get_checkpoint_block(cpfile, next,
                                                                0, &bh);
@@ -536,12 +649,12 @@ static ssize_t nilfs_cpfile_do_get_ssinfo(struct inode *cpfile, __u64 *cnop,
                                WARN_ON(ret == -ENOENT);
                                goto out;
                        }
-                       kaddr = kmap_atomic(bh->b_page);
+                       kaddr = kmap_local_page(bh->b_page);
                }
                curr = next;
                curr_blkoff = next_blkoff;
        }
-       kunmap_atomic(kaddr);
+       kunmap_local(kaddr);
        brelse(bh);
        *cnop = curr;
        ret = n;
@@ -650,24 +763,24 @@ static int nilfs_cpfile_set_snapshot(struct inode *cpfile, __u64 cno)
        ret = nilfs_cpfile_get_checkpoint_block(cpfile, cno, 0, &cp_bh);
        if (ret < 0)
                goto out_sem;
-       kaddr = kmap_atomic(cp_bh->b_page);
+       kaddr = kmap_local_page(cp_bh->b_page);
        cp = nilfs_cpfile_block_get_checkpoint(cpfile, cno, cp_bh, kaddr);
        if (nilfs_checkpoint_invalid(cp)) {
                ret = -ENOENT;
-               kunmap_atomic(kaddr);
+               kunmap_local(kaddr);
                goto out_cp;
        }
        if (nilfs_checkpoint_snapshot(cp)) {
                ret = 0;
-               kunmap_atomic(kaddr);
+               kunmap_local(kaddr);
                goto out_cp;
        }
-       kunmap_atomic(kaddr);
+       kunmap_local(kaddr);
 
        ret = nilfs_cpfile_get_header_block(cpfile, &header_bh);
        if (ret < 0)
                goto out_cp;
-       kaddr = kmap_atomic(header_bh->b_page);
+       kaddr = kmap_local_page(header_bh->b_page);
        header = nilfs_cpfile_block_get_header(cpfile, header_bh, kaddr);
        list = &header->ch_snapshot_list;
        curr_bh = header_bh;
@@ -679,13 +792,13 @@ static int nilfs_cpfile_set_snapshot(struct inode *cpfile, __u64 cno)
                prev_blkoff = nilfs_cpfile_get_blkoff(cpfile, prev);
                curr = prev;
                if (curr_blkoff != prev_blkoff) {
-                       kunmap_atomic(kaddr);
+                       kunmap_local(kaddr);
                        brelse(curr_bh);
                        ret = nilfs_cpfile_get_checkpoint_block(cpfile, curr,
                                                                0, &curr_bh);
                        if (ret < 0)
                                goto out_header;
-                       kaddr = kmap_atomic(curr_bh->b_page);
+                       kaddr = kmap_local_page(curr_bh->b_page);
                }
                curr_blkoff = prev_blkoff;
                cp = nilfs_cpfile_block_get_checkpoint(
@@ -693,7 +806,7 @@ static int nilfs_cpfile_set_snapshot(struct inode *cpfile, __u64 cno)
                list = &cp->cp_snapshot_list;
                prev = le64_to_cpu(list->ssl_prev);
        }
-       kunmap_atomic(kaddr);
+       kunmap_local(kaddr);
 
        if (prev != 0) {
                ret = nilfs_cpfile_get_checkpoint_block(cpfile, prev, 0,
@@ -705,29 +818,29 @@ static int nilfs_cpfile_set_snapshot(struct inode *cpfile, __u64 cno)
                get_bh(prev_bh);
        }
 
-       kaddr = kmap_atomic(curr_bh->b_page);
+       kaddr = kmap_local_page(curr_bh->b_page);
        list = nilfs_cpfile_block_get_snapshot_list(
                cpfile, curr, curr_bh, kaddr);
        list->ssl_prev = cpu_to_le64(cno);
-       kunmap_atomic(kaddr);
+       kunmap_local(kaddr);
 
-       kaddr = kmap_atomic(cp_bh->b_page);
+       kaddr = kmap_local_page(cp_bh->b_page);
        cp = nilfs_cpfile_block_get_checkpoint(cpfile, cno, cp_bh, kaddr);
        cp->cp_snapshot_list.ssl_next = cpu_to_le64(curr);
        cp->cp_snapshot_list.ssl_prev = cpu_to_le64(prev);
        nilfs_checkpoint_set_snapshot(cp);
-       kunmap_atomic(kaddr);
+       kunmap_local(kaddr);
 
-       kaddr = kmap_atomic(prev_bh->b_page);
+       kaddr = kmap_local_page(prev_bh->b_page);
        list = nilfs_cpfile_block_get_snapshot_list(
                cpfile, prev, prev_bh, kaddr);
        list->ssl_next = cpu_to_le64(cno);
-       kunmap_atomic(kaddr);
+       kunmap_local(kaddr);
 
-       kaddr = kmap_atomic(header_bh->b_page);
+       kaddr = kmap_local_page(header_bh->b_page);
        header = nilfs_cpfile_block_get_header(cpfile, header_bh, kaddr);
        le64_add_cpu(&header->ch_nsnapshots, 1);
-       kunmap_atomic(kaddr);
+       kunmap_local(kaddr);
 
        mark_buffer_dirty(prev_bh);
        mark_buffer_dirty(curr_bh);
@@ -768,23 +881,23 @@ static int nilfs_cpfile_clear_snapshot(struct inode *cpfile, __u64 cno)
        ret = nilfs_cpfile_get_checkpoint_block(cpfile, cno, 0, &cp_bh);
        if (ret < 0)
                goto out_sem;
-       kaddr = kmap_atomic(cp_bh->b_page);
+       kaddr = kmap_local_page(cp_bh->b_page);
        cp = nilfs_cpfile_block_get_checkpoint(cpfile, cno, cp_bh, kaddr);
        if (nilfs_checkpoint_invalid(cp)) {
                ret = -ENOENT;
-               kunmap_atomic(kaddr);
+               kunmap_local(kaddr);
                goto out_cp;
        }
        if (!nilfs_checkpoint_snapshot(cp)) {
                ret = 0;
-               kunmap_atomic(kaddr);
+               kunmap_local(kaddr);
                goto out_cp;
        }
 
        list = &cp->cp_snapshot_list;
        next = le64_to_cpu(list->ssl_next);
        prev = le64_to_cpu(list->ssl_prev);
-       kunmap_atomic(kaddr);
+       kunmap_local(kaddr);
 
        ret = nilfs_cpfile_get_header_block(cpfile, &header_bh);
        if (ret < 0)
@@ -808,29 +921,29 @@ static int nilfs_cpfile_clear_snapshot(struct inode *cpfile, __u64 cno)
                get_bh(prev_bh);
        }
 
-       kaddr = kmap_atomic(next_bh->b_page);
+       kaddr = kmap_local_page(next_bh->b_page);
        list = nilfs_cpfile_block_get_snapshot_list(
                cpfile, next, next_bh, kaddr);
        list->ssl_prev = cpu_to_le64(prev);
-       kunmap_atomic(kaddr);
+       kunmap_local(kaddr);
 
-       kaddr = kmap_atomic(prev_bh->b_page);
+       kaddr = kmap_local_page(prev_bh->b_page);
        list = nilfs_cpfile_block_get_snapshot_list(
                cpfile, prev, prev_bh, kaddr);
        list->ssl_next = cpu_to_le64(next);
-       kunmap_atomic(kaddr);
+       kunmap_local(kaddr);
 
-       kaddr = kmap_atomic(cp_bh->b_page);
+       kaddr = kmap_local_page(cp_bh->b_page);
        cp = nilfs_cpfile_block_get_checkpoint(cpfile, cno, cp_bh, kaddr);
        cp->cp_snapshot_list.ssl_next = cpu_to_le64(0);
        cp->cp_snapshot_list.ssl_prev = cpu_to_le64(0);
        nilfs_checkpoint_clear_snapshot(cp);
-       kunmap_atomic(kaddr);
+       kunmap_local(kaddr);
 
-       kaddr = kmap_atomic(header_bh->b_page);
+       kaddr = kmap_local_page(header_bh->b_page);
        header = nilfs_cpfile_block_get_header(cpfile, header_bh, kaddr);
        le64_add_cpu(&header->ch_nsnapshots, -1);
-       kunmap_atomic(kaddr);
+       kunmap_local(kaddr);
 
        mark_buffer_dirty(next_bh);
        mark_buffer_dirty(prev_bh);
@@ -889,13 +1002,13 @@ int nilfs_cpfile_is_snapshot(struct inode *cpfile, __u64 cno)
        ret = nilfs_cpfile_get_checkpoint_block(cpfile, cno, 0, &bh);
        if (ret < 0)
                goto out;
-       kaddr = kmap_atomic(bh->b_page);
+       kaddr = kmap_local_page(bh->b_page);
        cp = nilfs_cpfile_block_get_checkpoint(cpfile, cno, bh, kaddr);
        if (nilfs_checkpoint_invalid(cp))
                ret = -ENOENT;
        else
                ret = nilfs_checkpoint_snapshot(cp);
-       kunmap_atomic(kaddr);
+       kunmap_local(kaddr);
        brelse(bh);
 
  out:
@@ -972,12 +1085,12 @@ int nilfs_cpfile_get_stat(struct inode *cpfile, struct nilfs_cpstat *cpstat)
        ret = nilfs_cpfile_get_header_block(cpfile, &bh);
        if (ret < 0)
                goto out_sem;
-       kaddr = kmap_atomic(bh->b_page);
+       kaddr = kmap_local_page(bh->b_page);
        header = nilfs_cpfile_block_get_header(cpfile, bh, kaddr);
        cpstat->cs_cno = nilfs_mdt_cno(cpfile);
        cpstat->cs_ncps = le64_to_cpu(header->ch_ncheckpoints);
        cpstat->cs_nsss = le64_to_cpu(header->ch_nsnapshots);
-       kunmap_atomic(kaddr);
+       kunmap_local(kaddr);
        brelse(bh);
 
  out_sem:
index edabb2dc57567c9d8340c0db4a71d9c8908d871d..f5b1d59289ebf887704e1d01b1563f53f0a63596 100644 (file)
 #include <linux/nilfs2_ondisk.h>       /* nilfs_inode, nilfs_checkpoint */
 
 
-int nilfs_cpfile_get_checkpoint(struct inode *, __u64, int,
-                               struct nilfs_checkpoint **,
-                               struct buffer_head **);
-void nilfs_cpfile_put_checkpoint(struct inode *, __u64, struct buffer_head *);
+int nilfs_cpfile_read_checkpoint(struct inode *cpfile, __u64 cno,
+                                struct nilfs_root *root, struct inode *ifile);
+int nilfs_cpfile_create_checkpoint(struct inode *cpfile, __u64 cno);
+int nilfs_cpfile_finalize_checkpoint(struct inode *cpfile, __u64 cno,
+                                    struct nilfs_root *root, __u64 blkinc,
+                                    time64_t ctime, bool minor);
 int nilfs_cpfile_delete_checkpoints(struct inode *, __u64, __u64);
 int nilfs_cpfile_delete_checkpoint(struct inode *, __u64);
 int nilfs_cpfile_change_cpmode(struct inode *, __u64, int);
index 9cf6ba58f5859fe2a258c203e70921322fafab0a..180fc8d36213df3b73a5be9ad744bfefd97f4b47 100644 (file)
@@ -91,13 +91,13 @@ void nilfs_dat_commit_alloc(struct inode *dat, struct nilfs_palloc_req *req)
        struct nilfs_dat_entry *entry;
        void *kaddr;
 
-       kaddr = kmap_atomic(req->pr_entry_bh->b_page);
+       kaddr = kmap_local_page(req->pr_entry_bh->b_page);
        entry = nilfs_palloc_block_get_entry(dat, req->pr_entry_nr,
                                             req->pr_entry_bh, kaddr);
        entry->de_start = cpu_to_le64(NILFS_CNO_MIN);
        entry->de_end = cpu_to_le64(NILFS_CNO_MAX);
        entry->de_blocknr = cpu_to_le64(0);
-       kunmap_atomic(kaddr);
+       kunmap_local(kaddr);
 
        nilfs_palloc_commit_alloc_entry(dat, req);
        nilfs_dat_commit_entry(dat, req);
@@ -115,13 +115,13 @@ static void nilfs_dat_commit_free(struct inode *dat,
        struct nilfs_dat_entry *entry;
        void *kaddr;
 
-       kaddr = kmap_atomic(req->pr_entry_bh->b_page);
+       kaddr = kmap_local_page(req->pr_entry_bh->b_page);
        entry = nilfs_palloc_block_get_entry(dat, req->pr_entry_nr,
                                             req->pr_entry_bh, kaddr);
        entry->de_start = cpu_to_le64(NILFS_CNO_MIN);
        entry->de_end = cpu_to_le64(NILFS_CNO_MIN);
        entry->de_blocknr = cpu_to_le64(0);
-       kunmap_atomic(kaddr);
+       kunmap_local(kaddr);
 
        nilfs_dat_commit_entry(dat, req);
 
@@ -145,12 +145,12 @@ void nilfs_dat_commit_start(struct inode *dat, struct nilfs_palloc_req *req,
        struct nilfs_dat_entry *entry;
        void *kaddr;
 
-       kaddr = kmap_atomic(req->pr_entry_bh->b_page);
+       kaddr = kmap_local_page(req->pr_entry_bh->b_page);
        entry = nilfs_palloc_block_get_entry(dat, req->pr_entry_nr,
                                             req->pr_entry_bh, kaddr);
        entry->de_start = cpu_to_le64(nilfs_mdt_cno(dat));
        entry->de_blocknr = cpu_to_le64(blocknr);
-       kunmap_atomic(kaddr);
+       kunmap_local(kaddr);
 
        nilfs_dat_commit_entry(dat, req);
 }
@@ -167,12 +167,12 @@ int nilfs_dat_prepare_end(struct inode *dat, struct nilfs_palloc_req *req)
        if (ret < 0)
                return ret;
 
-       kaddr = kmap_atomic(req->pr_entry_bh->b_page);
+       kaddr = kmap_local_page(req->pr_entry_bh->b_page);
        entry = nilfs_palloc_block_get_entry(dat, req->pr_entry_nr,
                                             req->pr_entry_bh, kaddr);
        start = le64_to_cpu(entry->de_start);
        blocknr = le64_to_cpu(entry->de_blocknr);
-       kunmap_atomic(kaddr);
+       kunmap_local(kaddr);
 
        if (blocknr == 0) {
                ret = nilfs_palloc_prepare_free_entry(dat, req);
@@ -202,7 +202,7 @@ void nilfs_dat_commit_end(struct inode *dat, struct nilfs_palloc_req *req,
        sector_t blocknr;
        void *kaddr;
 
-       kaddr = kmap_atomic(req->pr_entry_bh->b_page);
+       kaddr = kmap_local_page(req->pr_entry_bh->b_page);
        entry = nilfs_palloc_block_get_entry(dat, req->pr_entry_nr,
                                             req->pr_entry_bh, kaddr);
        end = start = le64_to_cpu(entry->de_start);
@@ -212,7 +212,7 @@ void nilfs_dat_commit_end(struct inode *dat, struct nilfs_palloc_req *req,
        }
        entry->de_end = cpu_to_le64(end);
        blocknr = le64_to_cpu(entry->de_blocknr);
-       kunmap_atomic(kaddr);
+       kunmap_local(kaddr);
 
        if (blocknr == 0)
                nilfs_dat_commit_free(dat, req);
@@ -227,12 +227,12 @@ void nilfs_dat_abort_end(struct inode *dat, struct nilfs_palloc_req *req)
        sector_t blocknr;
        void *kaddr;
 
-       kaddr = kmap_atomic(req->pr_entry_bh->b_page);
+       kaddr = kmap_local_page(req->pr_entry_bh->b_page);
        entry = nilfs_palloc_block_get_entry(dat, req->pr_entry_nr,
                                             req->pr_entry_bh, kaddr);
        start = le64_to_cpu(entry->de_start);
        blocknr = le64_to_cpu(entry->de_blocknr);
-       kunmap_atomic(kaddr);
+       kunmap_local(kaddr);
 
        if (start == nilfs_mdt_cno(dat) && blocknr == 0)
                nilfs_palloc_abort_free_entry(dat, req);
@@ -362,7 +362,7 @@ int nilfs_dat_move(struct inode *dat, __u64 vblocknr, sector_t blocknr)
                }
        }
 
-       kaddr = kmap_atomic(entry_bh->b_page);
+       kaddr = kmap_local_page(entry_bh->b_page);
        entry = nilfs_palloc_block_get_entry(dat, vblocknr, entry_bh, kaddr);
        if (unlikely(entry->de_blocknr == cpu_to_le64(0))) {
                nilfs_crit(dat->i_sb,
@@ -370,13 +370,13 @@ int nilfs_dat_move(struct inode *dat, __u64 vblocknr, sector_t blocknr)
                           __func__, (unsigned long long)vblocknr,
                           (unsigned long long)le64_to_cpu(entry->de_start),
                           (unsigned long long)le64_to_cpu(entry->de_end));
-               kunmap_atomic(kaddr);
+               kunmap_local(kaddr);
                brelse(entry_bh);
                return -EINVAL;
        }
        WARN_ON(blocknr == 0);
        entry->de_blocknr = cpu_to_le64(blocknr);
-       kunmap_atomic(kaddr);
+       kunmap_local(kaddr);
 
        mark_buffer_dirty(entry_bh);
        nilfs_mdt_mark_dirty(dat);
@@ -426,7 +426,7 @@ int nilfs_dat_translate(struct inode *dat, __u64 vblocknr, sector_t *blocknrp)
                }
        }
 
-       kaddr = kmap_atomic(entry_bh->b_page);
+       kaddr = kmap_local_page(entry_bh->b_page);
        entry = nilfs_palloc_block_get_entry(dat, vblocknr, entry_bh, kaddr);
        blocknr = le64_to_cpu(entry->de_blocknr);
        if (blocknr == 0) {
@@ -436,7 +436,7 @@ int nilfs_dat_translate(struct inode *dat, __u64 vblocknr, sector_t *blocknrp)
        *blocknrp = blocknr;
 
  out:
-       kunmap_atomic(kaddr);
+       kunmap_local(kaddr);
        brelse(entry_bh);
        return ret;
 }
@@ -457,10 +457,10 @@ ssize_t nilfs_dat_get_vinfo(struct inode *dat, void *buf, unsigned int visz,
                                                   0, &entry_bh);
                if (ret < 0)
                        return ret;
-               kaddr = kmap_atomic(entry_bh->b_page);
+               kaddr = kmap_local_page(entry_bh->b_page);
                /* last virtual block number in this block */
                first = vinfo->vi_vblocknr;
-               do_div(first, entries_per_block);
+               first = div64_ul(first, entries_per_block);
                first *= entries_per_block;
                last = first + entries_per_block - 1;
                for (j = i, n = 0;
@@ -473,7 +473,7 @@ ssize_t nilfs_dat_get_vinfo(struct inode *dat, void *buf, unsigned int visz,
                        vinfo->vi_end = le64_to_cpu(entry->de_end);
                        vinfo->vi_blocknr = le64_to_cpu(entry->de_blocknr);
                }
-               kunmap_atomic(kaddr);
+               kunmap_local(kaddr);
                brelse(entry_bh);
        }
 
index 4c85914f2abc37a7d2ccb66115072196d3f60427..893ab36824cc2b7f1fa323c1c365aa56cf981e71 100644 (file)
@@ -66,7 +66,7 @@ static int nilfs_direct_lookup_contig(const struct nilfs_bmap *direct,
                dat = nilfs_bmap_get_dat(direct);
                ret = nilfs_dat_translate(dat, ptr, &blocknr);
                if (ret < 0)
-                       return ret;
+                       goto dat_error;
                ptr = blocknr;
        }
 
@@ -79,7 +79,7 @@ static int nilfs_direct_lookup_contig(const struct nilfs_bmap *direct,
                if (dat) {
                        ret = nilfs_dat_translate(dat, ptr2, &blocknr);
                        if (ret < 0)
-                               return ret;
+                               goto dat_error;
                        ptr2 = blocknr;
                }
                if (ptr2 != ptr + cnt)
@@ -87,6 +87,11 @@ static int nilfs_direct_lookup_contig(const struct nilfs_bmap *direct,
        }
        *ptrp = ptr;
        return cnt;
+
+ dat_error:
+       if (ret == -ENOENT)
+               ret = -EINVAL;  /* Notify bmap layer of metadata corruption */
+       return ret;
 }
 
 static __u64
index a8a4bc8490b4d8e8fa7ee62adfda33e600e2ed13..612e609158b520ccbb3f0ae97a7e9e26f5a644db 100644 (file)
@@ -15,6 +15,7 @@
 #include "mdt.h"
 #include "alloc.h"
 #include "ifile.h"
+#include "cpfile.h"
 
 /**
  * struct nilfs_ifile_info - on-memory private data of ifile
@@ -115,11 +116,11 @@ int nilfs_ifile_delete_inode(struct inode *ifile, ino_t ino)
                return ret;
        }
 
-       kaddr = kmap_atomic(req.pr_entry_bh->b_page);
+       kaddr = kmap_local_page(req.pr_entry_bh->b_page);
        raw_inode = nilfs_palloc_block_get_entry(ifile, req.pr_entry_nr,
                                                 req.pr_entry_bh, kaddr);
        raw_inode->i_flags = 0;
-       kunmap_atomic(kaddr);
+       kunmap_local(kaddr);
 
        mark_buffer_dirty(req.pr_entry_bh);
        brelse(req.pr_entry_bh);
@@ -173,14 +174,18 @@ int nilfs_ifile_count_free_inodes(struct inode *ifile,
  * nilfs_ifile_read - read or get ifile inode
  * @sb: super block instance
  * @root: root object
+ * @cno: number of checkpoint entry to read
  * @inode_size: size of an inode
- * @raw_inode: on-disk ifile inode
- * @inodep: buffer to store the inode
+ *
+ * Return: 0 on success, or the following negative error code on failure.
+ * * %-EINVAL  - Invalid checkpoint.
+ * * %-ENOMEM  - Insufficient memory available.
+ * * %-EIO     - I/O error (including metadata corruption).
  */
 int nilfs_ifile_read(struct super_block *sb, struct nilfs_root *root,
-                    size_t inode_size, struct nilfs_inode *raw_inode,
-                    struct inode **inodep)
+                    __u64 cno, size_t inode_size)
 {
+       struct the_nilfs *nilfs;
        struct inode *ifile;
        int err;
 
@@ -201,13 +206,13 @@ int nilfs_ifile_read(struct super_block *sb, struct nilfs_root *root,
 
        nilfs_palloc_setup_cache(ifile, &NILFS_IFILE_I(ifile)->palloc_cache);
 
-       err = nilfs_read_inode_common(ifile, raw_inode);
+       nilfs = sb->s_fs_info;
+       err = nilfs_cpfile_read_checkpoint(nilfs->ns_cpfile, cno, root, ifile);
        if (err)
                goto failed;
 
        unlock_new_inode(ifile);
  out:
-       *inodep = ifile;
        return 0;
  failed:
        iget_failed(ifile);
index 35c5273f48219bbfd7b196b68cbea5e6e0a5ae8d..625545cc2a989fb8c13a5fe43c1047bd86ff92e4 100644 (file)
 static inline struct nilfs_inode *
 nilfs_ifile_map_inode(struct inode *ifile, ino_t ino, struct buffer_head *ibh)
 {
-       void *kaddr = kmap(ibh->b_page);
+       void *kaddr = kmap_local_page(ibh->b_page);
 
        return nilfs_palloc_block_get_entry(ifile, ino, ibh, kaddr);
 }
 
-static inline void nilfs_ifile_unmap_inode(struct inode *ifile, ino_t ino,
-                                          struct buffer_head *ibh)
+static inline void nilfs_ifile_unmap_inode(struct nilfs_inode *raw_inode)
 {
-       kunmap(ibh->b_page);
+       kunmap_local(raw_inode);
 }
 
 int nilfs_ifile_create_inode(struct inode *, ino_t *, struct buffer_head **);
@@ -39,7 +38,6 @@ int nilfs_ifile_get_inode_block(struct inode *, ino_t, struct buffer_head **);
 int nilfs_ifile_count_free_inodes(struct inode *, u64 *, u64 *);
 
 int nilfs_ifile_read(struct super_block *sb, struct nilfs_root *root,
-                    size_t inode_size, struct nilfs_inode *raw_inode,
-                    struct inode **inodep);
+                    __u64 cno, size_t inode_size);
 
 #endif /* _NILFS_IFILE_H */
index 9c334c722fc1c1b3885d977efc61cc92463cb083..7340a01d80e1fddaebee1b0267d24b11bf0f1b54 100644 (file)
@@ -112,7 +112,7 @@ int nilfs_get_block(struct inode *inode, sector_t blkoff,
                                           "%s (ino=%lu): a race condition while inserting a data block at offset=%llu",
                                           __func__, inode->i_ino,
                                           (unsigned long long)blkoff);
-                               err = 0;
+                               err = -EAGAIN;
                        }
                        nilfs_transaction_abort(inode->i_sb);
                        goto out;
@@ -520,7 +520,7 @@ static int __nilfs_read_inode(struct super_block *sb,
                        inode, inode->i_mode,
                        huge_decode_dev(le64_to_cpu(raw_inode->i_device_code)));
        }
-       nilfs_ifile_unmap_inode(root->ifile, ino, bh);
+       nilfs_ifile_unmap_inode(raw_inode);
        brelse(bh);
        up_read(&NILFS_MDT(nilfs->ns_dat)->mi_sem);
        nilfs_set_inode_flags(inode);
@@ -529,7 +529,7 @@ static int __nilfs_read_inode(struct super_block *sb,
        return 0;
 
  failed_unmap:
-       nilfs_ifile_unmap_inode(root->ifile, ino, bh);
+       nilfs_ifile_unmap_inode(raw_inode);
        brelse(bh);
 
  bad_inode:
@@ -759,8 +759,18 @@ struct inode *nilfs_iget_for_shadow(struct inode *inode)
        return s_inode;
 }
 
+/**
+ * nilfs_write_inode_common - export common inode information to on-disk inode
+ * @inode:     inode object
+ * @raw_inode: on-disk inode
+ *
+ * This function writes standard information from the on-memory inode @inode
+ * to @raw_inode on ifile, cpfile or a super root block.  Since inode bmap
+ * data is not exported, nilfs_bmap_write() must be called separately during
+ * log writing.
+ */
 void nilfs_write_inode_common(struct inode *inode,
-                             struct nilfs_inode *raw_inode, int has_bmap)
+                             struct nilfs_inode *raw_inode)
 {
        struct nilfs_inode_info *ii = NILFS_I(inode);
 
@@ -778,21 +788,6 @@ void nilfs_write_inode_common(struct inode *inode,
        raw_inode->i_flags = cpu_to_le32(ii->i_flags);
        raw_inode->i_generation = cpu_to_le32(inode->i_generation);
 
-       if (NILFS_ROOT_METADATA_FILE(inode->i_ino)) {
-               struct the_nilfs *nilfs = inode->i_sb->s_fs_info;
-
-               /* zero-fill unused portion in the case of super root block */
-               raw_inode->i_xattr = 0;
-               raw_inode->i_pad = 0;
-               memset((void *)raw_inode + sizeof(*raw_inode), 0,
-                      nilfs->ns_inode_size - sizeof(*raw_inode));
-       }
-
-       if (has_bmap)
-               nilfs_bmap_write(ii->i_bmap, raw_inode);
-       else if (S_ISCHR(inode->i_mode) || S_ISBLK(inode->i_mode))
-               raw_inode->i_device_code =
-                       cpu_to_le64(huge_encode_dev(inode->i_rdev));
        /*
         * When extending inode, nilfs->ns_inode_size should be checked
         * for substitutions of appended fields.
@@ -813,14 +808,13 @@ void nilfs_update_inode(struct inode *inode, struct buffer_head *ibh, int flags)
        if (flags & I_DIRTY_DATASYNC)
                set_bit(NILFS_I_INODE_SYNC, &ii->i_state);
 
-       nilfs_write_inode_common(inode, raw_inode, 0);
-               /*
-                * XXX: call with has_bmap = 0 is a workaround to avoid
-                * deadlock of bmap.  This delays update of i_bmap to just
-                * before writing.
-                */
+       nilfs_write_inode_common(inode, raw_inode);
+
+       if (S_ISCHR(inode->i_mode) || S_ISBLK(inode->i_mode))
+               raw_inode->i_device_code =
+                       cpu_to_le64(huge_encode_dev(inode->i_rdev));
 
-       nilfs_ifile_unmap_inode(ifile, ino, ibh);
+       nilfs_ifile_unmap_inode(raw_inode);
 }
 
 #define NILFS_MAX_TRUNCATE_BLOCKS      16384  /* 64MB for 4KB block */
index cfb6aca5ec383020b6d05c712d03b49bf6218b26..f1a01c191cf53ad17131fcf2a256f70b37d0d86c 100644 (file)
@@ -1111,7 +1111,7 @@ static int nilfs_ioctl_set_alloc_range(struct inode *inode, void __user *argp)
        segbytes = nilfs->ns_blocks_per_segment * nilfs->ns_blocksize;
 
        minseg = range[0] + segbytes - 1;
-       do_div(minseg, segbytes);
+       minseg = div64_ul(minseg, segbytes);
 
        if (range[1] < 4096)
                goto out;
@@ -1120,7 +1120,7 @@ static int nilfs_ioctl_set_alloc_range(struct inode *inode, void __user *argp)
        if (maxseg < segbytes)
                goto out;
 
-       do_div(maxseg, segbytes);
+       maxseg = div64_ul(maxseg, segbytes);
        maxseg--;
 
        ret = nilfs_sufile_set_alloc_range(nilfs->ns_sufile, minseg, maxseg);
index e45c01a559c0136f7392b06d4e47e78cb5cdb34c..4f792a0ad0f0ffcacee9e3d19bb2e6a2239a629d 100644 (file)
@@ -47,12 +47,12 @@ nilfs_mdt_insert_new_block(struct inode *inode, unsigned long block,
 
        set_buffer_mapped(bh);
 
-       kaddr = kmap_atomic(bh->b_page);
+       kaddr = kmap_local_page(bh->b_page);
        memset(kaddr + bh_offset(bh), 0, i_blocksize(inode));
        if (init_block)
                init_block(inode, bh, kaddr);
        flush_dcache_page(bh->b_page);
-       kunmap_atomic(kaddr);
+       kunmap_local(kaddr);
 
        set_buffer_uptodate(bh);
        mark_buffer_dirty(bh);
index 98cffaf0ac127750a40801e2ced8df73aea7b625..2e29b98ba8bab21e230d4b15d8706332019fd8a4 100644 (file)
@@ -256,7 +256,8 @@ extern struct inode *nilfs_new_inode(struct inode *, umode_t);
 extern int nilfs_get_block(struct inode *, sector_t, struct buffer_head *, int);
 extern void nilfs_set_inode_flags(struct inode *);
 extern int nilfs_read_inode_common(struct inode *, struct nilfs_inode *);
-extern void nilfs_write_inode_common(struct inode *, struct nilfs_inode *, int);
+void nilfs_write_inode_common(struct inode *inode,
+                             struct nilfs_inode *raw_inode);
 struct inode *nilfs_ilookup(struct super_block *sb, struct nilfs_root *root,
                            unsigned long ino);
 struct inode *nilfs_iget_locked(struct super_block *sb, struct nilfs_root *root,
index 5c2eba1987bd703b61abdd601641a1dfac856023..14e470fb88706a403f4e31954906f07492314482 100644 (file)
@@ -103,11 +103,11 @@ void nilfs_copy_buffer(struct buffer_head *dbh, struct buffer_head *sbh)
        struct page *spage = sbh->b_page, *dpage = dbh->b_page;
        struct buffer_head *bh;
 
-       kaddr0 = kmap_atomic(spage);
-       kaddr1 = kmap_atomic(dpage);
+       kaddr0 = kmap_local_page(spage);
+       kaddr1 = kmap_local_page(dpage);
        memcpy(kaddr1 + bh_offset(dbh), kaddr0 + bh_offset(sbh), sbh->b_size);
-       kunmap_atomic(kaddr1);
-       kunmap_atomic(kaddr0);
+       kunmap_local(kaddr1);
+       kunmap_local(kaddr0);
 
        dbh->b_state = sbh->b_state & NILFS_BUFFER_INHERENT_BITS;
        dbh->b_blocknr = sbh->b_blocknr;
index a9b8d77c8c1d55b551582b826dafdcdcd047d13a..49a70c68bf3c06993cf8f6d0dc6f7f41d2a6ebf1 100644 (file)
@@ -482,9 +482,9 @@ static int nilfs_recovery_copy_block(struct the_nilfs *nilfs,
        if (unlikely(!bh_org))
                return -EIO;
 
-       kaddr = kmap_atomic(page);
+       kaddr = kmap_local_page(page);
        memcpy(kaddr + from, bh_org->b_data, bh_org->b_size);
-       kunmap_atomic(kaddr);
+       kunmap_local(kaddr);
        brelse(bh_org);
        return 0;
 }
index 6e59dc19a732498a64ebdbdea43fd14c88e2f0aa..dc431b4c34c96c9e17a1ae89383f6dcfe76b3148 100644 (file)
@@ -220,9 +220,9 @@ static void nilfs_segbuf_fill_in_data_crc(struct nilfs_segment_buffer *segbuf,
                crc = crc32_le(crc, bh->b_data, bh->b_size);
        }
        list_for_each_entry(bh, &segbuf->sb_payload_buffers, b_assoc_buffers) {
-               kaddr = kmap_atomic(bh->b_page);
+               kaddr = kmap_local_page(bh->b_page);
                crc = crc32_le(crc, kaddr + bh_offset(bh), bh->b_size);
-               kunmap_atomic(kaddr);
+               kunmap_local(kaddr);
        }
        raw_sum->ss_datasum = cpu_to_le32(crc);
 }
index 2bfb08052d399972dee9fd49583b77b95104ac83..aa5290cb7467cf2d756ec042b41b57e30c4336ae 100644 (file)
@@ -880,76 +880,6 @@ static void nilfs_segctor_clear_metadata_dirty(struct nilfs_sc_info *sci)
        nilfs_mdt_clear_dirty(nilfs->ns_dat);
 }
 
-static int nilfs_segctor_create_checkpoint(struct nilfs_sc_info *sci)
-{
-       struct the_nilfs *nilfs = sci->sc_super->s_fs_info;
-       struct buffer_head *bh_cp;
-       struct nilfs_checkpoint *raw_cp;
-       int err;
-
-       /* XXX: this interface will be changed */
-       err = nilfs_cpfile_get_checkpoint(nilfs->ns_cpfile, nilfs->ns_cno, 1,
-                                         &raw_cp, &bh_cp);
-       if (likely(!err)) {
-               /*
-                * The following code is duplicated with cpfile.  But, it is
-                * needed to collect the checkpoint even if it was not newly
-                * created.
-                */
-               mark_buffer_dirty(bh_cp);
-               nilfs_mdt_mark_dirty(nilfs->ns_cpfile);
-               nilfs_cpfile_put_checkpoint(
-                       nilfs->ns_cpfile, nilfs->ns_cno, bh_cp);
-       } else if (err == -EINVAL || err == -ENOENT) {
-               nilfs_error(sci->sc_super,
-                           "checkpoint creation failed due to metadata corruption.");
-               err = -EIO;
-       }
-       return err;
-}
-
-static int nilfs_segctor_fill_in_checkpoint(struct nilfs_sc_info *sci)
-{
-       struct the_nilfs *nilfs = sci->sc_super->s_fs_info;
-       struct buffer_head *bh_cp;
-       struct nilfs_checkpoint *raw_cp;
-       int err;
-
-       err = nilfs_cpfile_get_checkpoint(nilfs->ns_cpfile, nilfs->ns_cno, 0,
-                                         &raw_cp, &bh_cp);
-       if (unlikely(err)) {
-               if (err == -EINVAL || err == -ENOENT) {
-                       nilfs_error(sci->sc_super,
-                                   "checkpoint finalization failed due to metadata corruption.");
-                       err = -EIO;
-               }
-               goto failed_ibh;
-       }
-       raw_cp->cp_snapshot_list.ssl_next = 0;
-       raw_cp->cp_snapshot_list.ssl_prev = 0;
-       raw_cp->cp_inodes_count =
-               cpu_to_le64(atomic64_read(&sci->sc_root->inodes_count));
-       raw_cp->cp_blocks_count =
-               cpu_to_le64(atomic64_read(&sci->sc_root->blocks_count));
-       raw_cp->cp_nblk_inc =
-               cpu_to_le64(sci->sc_nblk_inc + sci->sc_nblk_this_inc);
-       raw_cp->cp_create = cpu_to_le64(sci->sc_seg_ctime);
-       raw_cp->cp_cno = cpu_to_le64(nilfs->ns_cno);
-
-       if (test_bit(NILFS_SC_HAVE_DELTA, &sci->sc_flags))
-               nilfs_checkpoint_clear_minor(raw_cp);
-       else
-               nilfs_checkpoint_set_minor(raw_cp);
-
-       nilfs_write_inode_common(sci->sc_root->ifile,
-                                &raw_cp->cp_ifile_inode, 1);
-       nilfs_cpfile_put_checkpoint(nilfs->ns_cpfile, nilfs->ns_cno, bh_cp);
-       return 0;
-
- failed_ibh:
-       return err;
-}
-
 static void nilfs_fill_in_file_bmap(struct inode *ifile,
                                    struct nilfs_inode_info *ii)
 
@@ -963,7 +893,7 @@ static void nilfs_fill_in_file_bmap(struct inode *ifile,
                raw_inode = nilfs_ifile_map_inode(ifile, ii->vfs_inode.i_ino,
                                                  ibh);
                nilfs_bmap_write(ii->i_bmap, raw_inode);
-               nilfs_ifile_unmap_inode(ifile, ii->vfs_inode.i_ino, ibh);
+               nilfs_ifile_unmap_inode(raw_inode);
        }
 }
 
@@ -977,6 +907,33 @@ static void nilfs_segctor_fill_in_file_bmap(struct nilfs_sc_info *sci)
        }
 }
 
+/**
+ * nilfs_write_root_mdt_inode - export root metadata inode information to
+ *                              the on-disk inode
+ * @inode:     inode object of the root metadata file
+ * @raw_inode: on-disk inode
+ *
+ * nilfs_write_root_mdt_inode() writes inode information and bmap data of
+ * @inode to the inode area of the metadata file allocated on the super root
+ * block created to finalize the log.  Since super root blocks are configured
+ * each time, this function zero-fills the unused area of @raw_inode.
+ */
+static void nilfs_write_root_mdt_inode(struct inode *inode,
+                                      struct nilfs_inode *raw_inode)
+{
+       struct the_nilfs *nilfs = inode->i_sb->s_fs_info;
+
+       nilfs_write_inode_common(inode, raw_inode);
+
+       /* zero-fill unused portion of raw_inode */
+       raw_inode->i_xattr = 0;
+       raw_inode->i_pad = 0;
+       memset((void *)raw_inode + sizeof(*raw_inode), 0,
+              nilfs->ns_inode_size - sizeof(*raw_inode));
+
+       nilfs_bmap_write(NILFS_I(inode)->i_bmap, raw_inode);
+}
+
 static void nilfs_segctor_fill_in_super_root(struct nilfs_sc_info *sci,
                                             struct the_nilfs *nilfs)
 {
@@ -998,12 +955,13 @@ static void nilfs_segctor_fill_in_super_root(struct nilfs_sc_info *sci,
                              nilfs->ns_nongc_ctime : sci->sc_seg_ctime);
        raw_sr->sr_flags = 0;
 
-       nilfs_write_inode_common(nilfs->ns_dat, (void *)raw_sr +
-                                NILFS_SR_DAT_OFFSET(isz), 1);
-       nilfs_write_inode_common(nilfs->ns_cpfile, (void *)raw_sr +
-                                NILFS_SR_CPFILE_OFFSET(isz), 1);
-       nilfs_write_inode_common(nilfs->ns_sufile, (void *)raw_sr +
-                                NILFS_SR_SUFILE_OFFSET(isz), 1);
+       nilfs_write_root_mdt_inode(nilfs->ns_dat, (void *)raw_sr +
+                                  NILFS_SR_DAT_OFFSET(isz));
+       nilfs_write_root_mdt_inode(nilfs->ns_cpfile, (void *)raw_sr +
+                                  NILFS_SR_CPFILE_OFFSET(isz));
+       nilfs_write_root_mdt_inode(nilfs->ns_sufile, (void *)raw_sr +
+                                  NILFS_SR_SUFILE_OFFSET(isz));
+
        memset((void *)raw_sr + srsz, 0, nilfs->ns_blocksize - srsz);
        set_buffer_uptodate(bh_sr);
        unlock_buffer(bh_sr);
@@ -1230,7 +1188,8 @@ static int nilfs_segctor_collect_blocks(struct nilfs_sc_info *sci, int mode)
                        break;
                nilfs_sc_cstage_inc(sci);
                /* Creating a checkpoint */
-               err = nilfs_segctor_create_checkpoint(sci);
+               err = nilfs_cpfile_create_checkpoint(nilfs->ns_cpfile,
+                                                    nilfs->ns_cno);
                if (unlikely(err))
                        break;
                fallthrough;
@@ -2101,7 +2060,11 @@ static int nilfs_segctor_do_construct(struct nilfs_sc_info *sci, int mode)
 
                if (mode == SC_LSEG_SR &&
                    nilfs_sc_cstage_get(sci) >= NILFS_ST_CPFILE) {
-                       err = nilfs_segctor_fill_in_checkpoint(sci);
+                       err = nilfs_cpfile_finalize_checkpoint(
+                               nilfs->ns_cpfile, nilfs->ns_cno, sci->sc_root,
+                               sci->sc_nblk_inc + sci->sc_nblk_this_inc,
+                               sci->sc_seg_ctime,
+                               !test_bit(NILFS_SC_HAVE_DELTA, &sci->sc_flags));
                        if (unlikely(err))
                                goto failed_to_write;
 
index 0a8119456c21364243af515beaed687c01ea40d8..6748218be7c591e21af3740cb756dcec2df7818a 100644 (file)
@@ -48,7 +48,7 @@ nilfs_sufile_get_blkoff(const struct inode *sufile, __u64 segnum)
 {
        __u64 t = segnum + NILFS_MDT(sufile)->mi_first_entry_offset;
 
-       do_div(t, nilfs_sufile_segment_usages_per_block(sufile));
+       t = div64_ul(t, nilfs_sufile_segment_usages_per_block(sufile));
        return (unsigned long)t;
 }
 
@@ -107,11 +107,11 @@ static void nilfs_sufile_mod_counter(struct buffer_head *header_bh,
        struct nilfs_sufile_header *header;
        void *kaddr;
 
-       kaddr = kmap_atomic(header_bh->b_page);
+       kaddr = kmap_local_page(header_bh->b_page);
        header = kaddr + bh_offset(header_bh);
        le64_add_cpu(&header->sh_ncleansegs, ncleanadd);
        le64_add_cpu(&header->sh_ndirtysegs, ndirtyadd);
-       kunmap_atomic(kaddr);
+       kunmap_local(kaddr);
 
        mark_buffer_dirty(header_bh);
 }
@@ -315,10 +315,10 @@ int nilfs_sufile_alloc(struct inode *sufile, __u64 *segnump)
        ret = nilfs_sufile_get_header_block(sufile, &header_bh);
        if (ret < 0)
                goto out_sem;
-       kaddr = kmap_atomic(header_bh->b_page);
+       kaddr = kmap_local_page(header_bh->b_page);
        header = kaddr + bh_offset(header_bh);
        last_alloc = le64_to_cpu(header->sh_last_alloc);
-       kunmap_atomic(kaddr);
+       kunmap_local(kaddr);
 
        nsegments = nilfs_sufile_get_nsegments(sufile);
        maxsegnum = sui->allocmax;
@@ -352,7 +352,7 @@ int nilfs_sufile_alloc(struct inode *sufile, __u64 *segnump)
                                                           &su_bh);
                if (ret < 0)
                        goto out_header;
-               kaddr = kmap_atomic(su_bh->b_page);
+               kaddr = kmap_local_page(su_bh->b_page);
                su = nilfs_sufile_block_get_segment_usage(
                        sufile, segnum, su_bh, kaddr);
 
@@ -363,14 +363,14 @@ int nilfs_sufile_alloc(struct inode *sufile, __u64 *segnump)
                                continue;
                        /* found a clean segment */
                        nilfs_segment_usage_set_dirty(su);
-                       kunmap_atomic(kaddr);
+                       kunmap_local(kaddr);
 
-                       kaddr = kmap_atomic(header_bh->b_page);
+                       kaddr = kmap_local_page(header_bh->b_page);
                        header = kaddr + bh_offset(header_bh);
                        le64_add_cpu(&header->sh_ncleansegs, -1);
                        le64_add_cpu(&header->sh_ndirtysegs, 1);
                        header->sh_last_alloc = cpu_to_le64(segnum);
-                       kunmap_atomic(kaddr);
+                       kunmap_local(kaddr);
 
                        sui->ncleansegs--;
                        mark_buffer_dirty(header_bh);
@@ -384,7 +384,7 @@ int nilfs_sufile_alloc(struct inode *sufile, __u64 *segnump)
                        goto out_header;
                }
 
-               kunmap_atomic(kaddr);
+               kunmap_local(kaddr);
                brelse(su_bh);
        }
 
@@ -406,16 +406,16 @@ void nilfs_sufile_do_cancel_free(struct inode *sufile, __u64 segnum,
        struct nilfs_segment_usage *su;
        void *kaddr;
 
-       kaddr = kmap_atomic(su_bh->b_page);
+       kaddr = kmap_local_page(su_bh->b_page);
        su = nilfs_sufile_block_get_segment_usage(sufile, segnum, su_bh, kaddr);
        if (unlikely(!nilfs_segment_usage_clean(su))) {
                nilfs_warn(sufile->i_sb, "%s: segment %llu must be clean",
                           __func__, (unsigned long long)segnum);
-               kunmap_atomic(kaddr);
+               kunmap_local(kaddr);
                return;
        }
        nilfs_segment_usage_set_dirty(su);
-       kunmap_atomic(kaddr);
+       kunmap_local(kaddr);
 
        nilfs_sufile_mod_counter(header_bh, -1, 1);
        NILFS_SUI(sufile)->ncleansegs--;
@@ -432,11 +432,11 @@ void nilfs_sufile_do_scrap(struct inode *sufile, __u64 segnum,
        void *kaddr;
        int clean, dirty;
 
-       kaddr = kmap_atomic(su_bh->b_page);
+       kaddr = kmap_local_page(su_bh->b_page);
        su = nilfs_sufile_block_get_segment_usage(sufile, segnum, su_bh, kaddr);
        if (su->su_flags == cpu_to_le32(BIT(NILFS_SEGMENT_USAGE_DIRTY)) &&
            su->su_nblocks == cpu_to_le32(0)) {
-               kunmap_atomic(kaddr);
+               kunmap_local(kaddr);
                return;
        }
        clean = nilfs_segment_usage_clean(su);
@@ -446,7 +446,7 @@ void nilfs_sufile_do_scrap(struct inode *sufile, __u64 segnum,
        su->su_lastmod = cpu_to_le64(0);
        su->su_nblocks = cpu_to_le32(0);
        su->su_flags = cpu_to_le32(BIT(NILFS_SEGMENT_USAGE_DIRTY));
-       kunmap_atomic(kaddr);
+       kunmap_local(kaddr);
 
        nilfs_sufile_mod_counter(header_bh, clean ? (u64)-1 : 0, dirty ? 0 : 1);
        NILFS_SUI(sufile)->ncleansegs -= clean;
@@ -463,12 +463,12 @@ void nilfs_sufile_do_free(struct inode *sufile, __u64 segnum,
        void *kaddr;
        int sudirty;
 
-       kaddr = kmap_atomic(su_bh->b_page);
+       kaddr = kmap_local_page(su_bh->b_page);
        su = nilfs_sufile_block_get_segment_usage(sufile, segnum, su_bh, kaddr);
        if (nilfs_segment_usage_clean(su)) {
                nilfs_warn(sufile->i_sb, "%s: segment %llu is already clean",
                           __func__, (unsigned long long)segnum);
-               kunmap_atomic(kaddr);
+               kunmap_local(kaddr);
                return;
        }
        if (unlikely(nilfs_segment_usage_error(su)))
@@ -481,7 +481,7 @@ void nilfs_sufile_do_free(struct inode *sufile, __u64 segnum,
                           (unsigned long long)segnum);
 
        nilfs_segment_usage_set_clean(su);
-       kunmap_atomic(kaddr);
+       kunmap_local(kaddr);
        mark_buffer_dirty(su_bh);
 
        nilfs_sufile_mod_counter(header_bh, 1, sudirty ? (u64)-1 : 0);
@@ -509,12 +509,12 @@ int nilfs_sufile_mark_dirty(struct inode *sufile, __u64 segnum)
        if (ret)
                goto out_sem;
 
-       kaddr = kmap_atomic(bh->b_page);
+       kaddr = kmap_local_page(bh->b_page);
        su = nilfs_sufile_block_get_segment_usage(sufile, segnum, bh, kaddr);
        if (unlikely(nilfs_segment_usage_error(su))) {
                struct the_nilfs *nilfs = sufile->i_sb->s_fs_info;
 
-               kunmap_atomic(kaddr);
+               kunmap_local(kaddr);
                brelse(bh);
                if (nilfs_segment_is_active(nilfs, segnum)) {
                        nilfs_error(sufile->i_sb,
@@ -532,7 +532,7 @@ int nilfs_sufile_mark_dirty(struct inode *sufile, __u64 segnum)
                ret = -EIO;
        } else {
                nilfs_segment_usage_set_dirty(su);
-               kunmap_atomic(kaddr);
+               kunmap_local(kaddr);
                mark_buffer_dirty(bh);
                nilfs_mdt_mark_dirty(sufile);
                brelse(bh);
@@ -562,7 +562,7 @@ int nilfs_sufile_set_segment_usage(struct inode *sufile, __u64 segnum,
        if (ret < 0)
                goto out_sem;
 
-       kaddr = kmap_atomic(bh->b_page);
+       kaddr = kmap_local_page(bh->b_page);
        su = nilfs_sufile_block_get_segment_usage(sufile, segnum, bh, kaddr);
        if (modtime) {
                /*
@@ -573,7 +573,7 @@ int nilfs_sufile_set_segment_usage(struct inode *sufile, __u64 segnum,
                su->su_lastmod = cpu_to_le64(modtime);
        }
        su->su_nblocks = cpu_to_le32(nblocks);
-       kunmap_atomic(kaddr);
+       kunmap_local(kaddr);
 
        mark_buffer_dirty(bh);
        nilfs_mdt_mark_dirty(sufile);
@@ -614,7 +614,7 @@ int nilfs_sufile_get_stat(struct inode *sufile, struct nilfs_sustat *sustat)
        if (ret < 0)
                goto out_sem;
 
-       kaddr = kmap_atomic(header_bh->b_page);
+       kaddr = kmap_local_page(header_bh->b_page);
        header = kaddr + bh_offset(header_bh);
        sustat->ss_nsegs = nilfs_sufile_get_nsegments(sufile);
        sustat->ss_ncleansegs = le64_to_cpu(header->sh_ncleansegs);
@@ -624,7 +624,7 @@ int nilfs_sufile_get_stat(struct inode *sufile, struct nilfs_sustat *sustat)
        spin_lock(&nilfs->ns_last_segment_lock);
        sustat->ss_prot_seq = nilfs->ns_prot_seq;
        spin_unlock(&nilfs->ns_last_segment_lock);
-       kunmap_atomic(kaddr);
+       kunmap_local(kaddr);
        brelse(header_bh);
 
  out_sem:
@@ -640,15 +640,15 @@ void nilfs_sufile_do_set_error(struct inode *sufile, __u64 segnum,
        void *kaddr;
        int suclean;
 
-       kaddr = kmap_atomic(su_bh->b_page);
+       kaddr = kmap_local_page(su_bh->b_page);
        su = nilfs_sufile_block_get_segment_usage(sufile, segnum, su_bh, kaddr);
        if (nilfs_segment_usage_error(su)) {
-               kunmap_atomic(kaddr);
+               kunmap_local(kaddr);
                return;
        }
        suclean = nilfs_segment_usage_clean(su);
        nilfs_segment_usage_set_error(su);
-       kunmap_atomic(kaddr);
+       kunmap_local(kaddr);
 
        if (suclean) {
                nilfs_sufile_mod_counter(header_bh, -1, 0);
@@ -717,7 +717,7 @@ static int nilfs_sufile_truncate_range(struct inode *sufile,
                        /* hole */
                        continue;
                }
-               kaddr = kmap_atomic(su_bh->b_page);
+               kaddr = kmap_local_page(su_bh->b_page);
                su = nilfs_sufile_block_get_segment_usage(
                        sufile, segnum, su_bh, kaddr);
                su2 = su;
@@ -726,7 +726,7 @@ static int nilfs_sufile_truncate_range(struct inode *sufile,
                             ~BIT(NILFS_SEGMENT_USAGE_ERROR)) ||
                            nilfs_segment_is_active(nilfs, segnum + j)) {
                                ret = -EBUSY;
-                               kunmap_atomic(kaddr);
+                               kunmap_local(kaddr);
                                brelse(su_bh);
                                goto out_header;
                        }
@@ -738,7 +738,7 @@ static int nilfs_sufile_truncate_range(struct inode *sufile,
                                nc++;
                        }
                }
-               kunmap_atomic(kaddr);
+               kunmap_local(kaddr);
                if (nc > 0) {
                        mark_buffer_dirty(su_bh);
                        ncleaned += nc;
@@ -823,10 +823,10 @@ int nilfs_sufile_resize(struct inode *sufile, __u64 newnsegs)
                sui->allocmin = 0;
        }
 
-       kaddr = kmap_atomic(header_bh->b_page);
+       kaddr = kmap_local_page(header_bh->b_page);
        header = kaddr + bh_offset(header_bh);
        header->sh_ncleansegs = cpu_to_le64(sui->ncleansegs);
-       kunmap_atomic(kaddr);
+       kunmap_local(kaddr);
 
        mark_buffer_dirty(header_bh);
        nilfs_mdt_mark_dirty(sufile);
@@ -891,7 +891,7 @@ ssize_t nilfs_sufile_get_suinfo(struct inode *sufile, __u64 segnum, void *buf,
                        continue;
                }
 
-               kaddr = kmap_atomic(su_bh->b_page);
+               kaddr = kmap_local_page(su_bh->b_page);
                su = nilfs_sufile_block_get_segment_usage(
                        sufile, segnum, su_bh, kaddr);
                for (j = 0; j < n;
@@ -904,7 +904,7 @@ ssize_t nilfs_sufile_get_suinfo(struct inode *sufile, __u64 segnum, void *buf,
                                si->sui_flags |=
                                        BIT(NILFS_SEGMENT_USAGE_ACTIVE);
                }
-               kunmap_atomic(kaddr);
+               kunmap_local(kaddr);
                brelse(su_bh);
        }
        ret = nsegs;
@@ -973,7 +973,7 @@ ssize_t nilfs_sufile_set_suinfo(struct inode *sufile, void *buf,
                goto out_header;
 
        for (;;) {
-               kaddr = kmap_atomic(bh->b_page);
+               kaddr = kmap_local_page(bh->b_page);
                su = nilfs_sufile_block_get_segment_usage(
                        sufile, sup->sup_segnum, bh, kaddr);
 
@@ -1010,7 +1010,7 @@ ssize_t nilfs_sufile_set_suinfo(struct inode *sufile, void *buf,
                        su->su_flags = cpu_to_le32(sup->sup_sui.sui_flags);
                }
 
-               kunmap_atomic(kaddr);
+               kunmap_local(kaddr);
 
                sup = (void *)sup + supsz;
                if (sup >= supend)
@@ -1115,7 +1115,7 @@ int nilfs_sufile_trim_fs(struct inode *sufile, struct fstrim_range *range)
                        continue;
                }
 
-               kaddr = kmap_atomic(su_bh->b_page);
+               kaddr = kmap_local_page(su_bh->b_page);
                su = nilfs_sufile_block_get_segment_usage(sufile, segnum,
                                su_bh, kaddr);
                for (i = 0; i < n; ++i, ++segnum, su = (void *)su + susz) {
@@ -1145,7 +1145,7 @@ int nilfs_sufile_trim_fs(struct inode *sufile, struct fstrim_range *range)
                        }
 
                        if (nblocks >= minlen) {
-                               kunmap_atomic(kaddr);
+                               kunmap_local(kaddr);
 
                                ret = blkdev_issue_discard(nilfs->ns_bdev,
                                                start * sects_per_block,
@@ -1157,7 +1157,7 @@ int nilfs_sufile_trim_fs(struct inode *sufile, struct fstrim_range *range)
                                }
 
                                ndiscarded += nblocks;
-                               kaddr = kmap_atomic(su_bh->b_page);
+                               kaddr = kmap_local_page(su_bh->b_page);
                                su = nilfs_sufile_block_get_segment_usage(
                                        sufile, segnum, su_bh, kaddr);
                        }
@@ -1166,7 +1166,7 @@ int nilfs_sufile_trim_fs(struct inode *sufile, struct fstrim_range *range)
                        start = seg_start;
                        nblocks = seg_end - seg_start + 1;
                }
-               kunmap_atomic(kaddr);
+               kunmap_local(kaddr);
                put_bh(su_bh);
        }
 
@@ -1246,10 +1246,10 @@ int nilfs_sufile_read(struct super_block *sb, size_t susize,
                goto failed;
 
        sui = NILFS_SUI(sufile);
-       kaddr = kmap_atomic(header_bh->b_page);
+       kaddr = kmap_local_page(header_bh->b_page);
        header = kaddr + bh_offset(header_bh);
        sui->ncleansegs = le64_to_cpu(header->sh_ncleansegs);
-       kunmap_atomic(kaddr);
+       kunmap_local(kaddr);
        brelse(header_bh);
 
        sui->allocmax = nilfs_sufile_get_nsegments(sufile) - 1;
index df8674173b22024b4aef2bcd430d6a23b92986b0..ac24ed109ce93563e6c9c932709813bf1aec72dc 100644 (file)
@@ -448,7 +448,7 @@ int nilfs_resize_fs(struct super_block *sb, __u64 newsize)
 
        sb2off = NILFS_SB2_OFFSET_BYTES(newsize);
        newnsegs = sb2off >> nilfs->ns_blocksize_bits;
-       do_div(newnsegs, nilfs->ns_blocks_per_segment);
+       newnsegs = div64_ul(newnsegs, nilfs->ns_blocks_per_segment);
 
        ret = nilfs_sufile_resize(nilfs->ns_sufile, newnsegs);
        up_write(&nilfs->ns_segctor_sem);
@@ -544,8 +544,6 @@ int nilfs_attach_checkpoint(struct super_block *sb, __u64 cno, int curr_mnt,
 {
        struct the_nilfs *nilfs = sb->s_fs_info;
        struct nilfs_root *root;
-       struct nilfs_checkpoint *raw_cp;
-       struct buffer_head *bh_cp;
        int err = -ENOMEM;
 
        root = nilfs_find_or_create_root(
@@ -557,38 +555,19 @@ int nilfs_attach_checkpoint(struct super_block *sb, __u64 cno, int curr_mnt,
                goto reuse; /* already attached checkpoint */
 
        down_read(&nilfs->ns_segctor_sem);
-       err = nilfs_cpfile_get_checkpoint(nilfs->ns_cpfile, cno, 0, &raw_cp,
-                                         &bh_cp);
+       err = nilfs_ifile_read(sb, root, cno, nilfs->ns_inode_size);
        up_read(&nilfs->ns_segctor_sem);
-       if (unlikely(err)) {
-               if (err == -ENOENT || err == -EINVAL) {
-                       nilfs_err(sb,
-                                 "Invalid checkpoint (checkpoint number=%llu)",
-                                 (unsigned long long)cno);
-                       err = -EINVAL;
-               }
+       if (unlikely(err))
                goto failed;
-       }
-
-       err = nilfs_ifile_read(sb, root, nilfs->ns_inode_size,
-                              &raw_cp->cp_ifile_inode, &root->ifile);
-       if (err)
-               goto failed_bh;
-
-       atomic64_set(&root->inodes_count,
-                       le64_to_cpu(raw_cp->cp_inodes_count));
-       atomic64_set(&root->blocks_count,
-                       le64_to_cpu(raw_cp->cp_blocks_count));
-
-       nilfs_cpfile_put_checkpoint(nilfs->ns_cpfile, cno, bh_cp);
 
  reuse:
        *rootp = root;
        return 0;
 
- failed_bh:
-       nilfs_cpfile_put_checkpoint(nilfs->ns_cpfile, cno, bh_cp);
  failed:
+       if (err == -EINVAL)
+               nilfs_err(sb, "Invalid checkpoint (checkpoint number=%llu)",
+                         (unsigned long long)cno);
        nilfs_put_root(root);
 
        return err;
index 71400496ed36519d2524ab552efed3e150899a52..2ae2c1bbf6d17c6aa10752679f5635329af362e4 100644 (file)
@@ -413,7 +413,7 @@ static u64 nilfs_max_segment_count(struct the_nilfs *nilfs)
 {
        u64 max_count = U64_MAX;
 
-       do_div(max_count, nilfs->ns_blocks_per_segment);
+       max_count = div64_ul(max_count, nilfs->ns_blocks_per_segment);
        return min_t(u64, max_count, ULONG_MAX);
 }
 
index 64a6ef638495c28aabaacdde2c5f0a882769bc8f..cb40cafbc06237fe7e72e913960376e118f59454 100644 (file)
@@ -1615,7 +1615,7 @@ update_holders:
 unlock:
        lockres_clear_flags(lockres, OCFS2_LOCK_UPCONVERT_FINISHING);
 
-       /* ocfs2_unblock_lock reques on seeing OCFS2_LOCK_UPCONVERT_FINISHING */
+       /* ocfs2_unblock_lock request on seeing OCFS2_LOCK_UPCONVERT_FINISHING */
        kick_dc = (lockres->l_flags & OCFS2_LOCK_BLOCKED);
 
        spin_unlock_irqrestore(&lockres->l_lock, flags);
index 8b6d15010703b30a8b1503e52aefbee35166a319..0da8e7bd32616fdc56f8687042cc8bade203af33 100644 (file)
@@ -2763,6 +2763,7 @@ const struct inode_operations ocfs2_file_iops = {
 const struct inode_operations ocfs2_special_file_iops = {
        .setattr        = ocfs2_setattr,
        .getattr        = ocfs2_getattr,
+       .listxattr      = ocfs2_listxattr,
        .permission     = ocfs2_permission,
        .get_inode_acl  = ocfs2_iop_get_acl,
        .set_acl        = ocfs2_iop_set_acl,
index e7314d6fb8c77ab5cc046405111986aed90cd2d3..8aabaed2c1cb9475d1df6b27fee89ed9cdb0afa4 100644 (file)
@@ -1711,12 +1711,12 @@ static int ocfs2_initialize_mem_caches(void)
        ocfs2_dquot_cachep = kmem_cache_create("ocfs2_dquot_cache",
                                        sizeof(struct ocfs2_dquot),
                                        0,
-                                       (SLAB_HWCACHE_ALIGN|SLAB_RECLAIM_ACCOUNT),
+                                       SLAB_HWCACHE_ALIGN|SLAB_RECLAIM_ACCOUNT,
                                        NULL);
        ocfs2_qf_chunk_cachep = kmem_cache_create("ocfs2_qf_chunk_cache",
                                        sizeof(struct ocfs2_quota_chunk),
                                        0,
-                                       (SLAB_RECLAIM_ACCOUNT),
+                                       SLAB_RECLAIM_ACCOUNT,
                                        NULL);
        if (!ocfs2_inode_cachep || !ocfs2_dquot_cachep ||
            !ocfs2_qf_chunk_cachep) {
index 3b6982bf6bcf3ecd64f36fa118d4ec551f080acf..e75e173a918622b1c213c4e3862631fb29a2fa9a 100644 (file)
@@ -22,7 +22,7 @@ int op_cache_initialize(void)
        op_cache = kmem_cache_create("orangefs_op_cache",
                                     sizeof(struct orangefs_kernel_op_s),
                                     0,
-                                    ORANGEFS_CACHE_CREATE_FLAGS,
+                                    0,
                                     NULL);
 
        if (!op_cache) {
index 926d9c0a428a2d4977c6d68d644d9d449495df48..e2df7eeadc7aa2bb6d15ff6b3c363655bd333469 100644 (file)
@@ -93,16 +93,6 @@ enum orangefs_vfs_op_states {
        OP_VFS_STATE_GIVEN_UP = 16,
 };
 
-/*
- * orangefs kernel memory related flags
- */
-
-#if (defined CONFIG_DEBUG_SLAB)
-#define ORANGEFS_CACHE_CREATE_FLAGS SLAB_RED_ZONE
-#else
-#define ORANGEFS_CACHE_CREATE_FLAGS 0
-#endif
-
 extern const struct xattr_handler * const orangefs_xattr_handlers[];
 
 extern struct posix_acl *orangefs_get_acl(struct inode *inode, int type, bool rcu);
index 5254256a224d7a0a78f9673a42dc7df386370a4a..34849b4a3243ca5a2d27114d8a6f5f0a6074d329 100644 (file)
@@ -527,7 +527,7 @@ struct dentry *orangefs_mount(struct file_system_type *fst,
        sb->s_fs_info = kzalloc(sizeof(struct orangefs_sb_info_s), GFP_KERNEL);
        if (!ORANGEFS_SB(sb)) {
                d = ERR_PTR(-ENOMEM);
-               goto free_sb_and_op;
+               goto free_op;
        }
 
        ret = orangefs_fill_sb(sb,
@@ -644,7 +644,7 @@ int orangefs_inode_cache_initialize(void)
                                        "orangefs_inode_cache",
                                        sizeof(struct orangefs_inode_s),
                                        0,
-                                       ORANGEFS_CACHE_CREATE_FLAGS,
+                                       0,
                                        offsetof(struct orangefs_inode_s,
                                                link_target),
                                        sizeof_field(struct orangefs_inode_s,
index 8586e2f5d24390c91263ea1ee48e7c3b22199cd2..0762575a1e7080b30ba29fbdf69958eff27aea42 100644 (file)
@@ -234,11 +234,11 @@ static int ovl_verify_area(loff_t pos, loff_t pos2, loff_t len, loff_t totlen)
 {
        loff_t tmp;
 
-       if (WARN_ON_ONCE(pos != pos2))
+       if (pos != pos2)
                return -EIO;
-       if (WARN_ON_ONCE(pos < 0 || len < 0 || totlen < 0))
+       if (pos < 0 || len < 0 || totlen < 0)
                return -EIO;
-       if (WARN_ON_ONCE(check_add_overflow(pos, len, &tmp)))
+       if (check_add_overflow(pos, len, &tmp))
                return -EIO;
        return 0;
 }
index 32b1116ae137c61bc0eb915498dab969b1f4c50c..d80a1431ef7be0c848b55d9104b284806a2ef1f8 100644 (file)
@@ -32,7 +32,7 @@ config PROC_FS
 config PROC_KCORE
        bool "/proc/kcore support" if !ARM
        depends on PROC_FS && MMU
-       select CRASH_CORE
+       select VMCORE_INFO
        help
          Provides a virtual ELF core file of the live kernel.  This can
          be read with gdb and other ELF tools.  No modifications can be
index 6422e569b0808526b4cb63a9d932adfa03e1d886..8e08a9a1b7ed57dba829dd104929d2dcb7a30ecc 100644 (file)
@@ -10,7 +10,7 @@
  *     Safe accesses to vmalloc/direct-mapped discontiguous areas, Kanoj Sarcar <kanoj@sgi.com>
  */
 
-#include <linux/crash_core.h>
+#include <linux/vmcore_info.h>
 #include <linux/mm.h>
 #include <linux/proc_fs.h>
 #include <linux/kcore.h>
index 3f78ebbb795fe237398789fb700d1b6d358859f5..23fbab954c20b62b7b67413763ca777001073d3b 100644 (file)
@@ -1352,8 +1352,7 @@ static inline pagemap_entry_t make_pme(u64 frame, u64 flags)
        return (pagemap_entry_t) { .pme = (frame & PM_PFRAME_MASK) | flags };
 }
 
-static int add_to_pagemap(unsigned long addr, pagemap_entry_t *pme,
-                         struct pagemapread *pm)
+static int add_to_pagemap(pagemap_entry_t *pme, struct pagemapread *pm)
 {
        pm->buffer[pm->pos++] = *pme;
        if (pm->pos >= pm->len)
@@ -1380,7 +1379,7 @@ static int pagemap_pte_hole(unsigned long start, unsigned long end,
                        hole_end = end;
 
                for (; addr < hole_end; addr += PAGE_SIZE) {
-                       err = add_to_pagemap(addr, &pme, pm);
+                       err = add_to_pagemap(&pme, pm);
                        if (err)
                                goto out;
                }
@@ -1392,7 +1391,7 @@ static int pagemap_pte_hole(unsigned long start, unsigned long end,
                if (vma->vm_flags & VM_SOFTDIRTY)
                        pme = make_pme(0, PM_SOFT_DIRTY);
                for (; addr < min(end, vma->vm_end); addr += PAGE_SIZE) {
-                       err = add_to_pagemap(addr, &pme, pm);
+                       err = add_to_pagemap(&pme, pm);
                        if (err)
                                goto out;
                }
@@ -1519,7 +1518,7 @@ static int pagemap_pmd_range(pmd_t *pmdp, unsigned long addr, unsigned long end,
                for (; addr != end; addr += PAGE_SIZE) {
                        pagemap_entry_t pme = make_pme(frame, flags);
 
-                       err = add_to_pagemap(addr, &pme, pm);
+                       err = add_to_pagemap(&pme, pm);
                        if (err)
                                break;
                        if (pm->show_pfn) {
@@ -1547,7 +1546,7 @@ static int pagemap_pmd_range(pmd_t *pmdp, unsigned long addr, unsigned long end,
                pagemap_entry_t pme;
 
                pme = pte_to_pagemap_entry(pm, vma, addr, ptep_get(pte));
-               err = add_to_pagemap(addr, &pme, pm);
+               err = add_to_pagemap(&pme, pm);
                if (err)
                        break;
        }
@@ -1597,7 +1596,7 @@ static int pagemap_hugetlb_range(pte_t *ptep, unsigned long hmask,
        for (; addr != end; addr += PAGE_SIZE) {
                pagemap_entry_t pme = make_pme(frame, flags);
 
-               err = add_to_pagemap(addr, &pme, pm);
+               err = add_to_pagemap(&pme, pm);
                if (err)
                        return err;
                if (pm->show_pfn && (flags & PM_PRESENT))
@@ -1807,7 +1806,7 @@ static unsigned long pagemap_page_category(struct pagemap_scan_private *p,
                if (p->masks_of_interest & PAGE_IS_FILE) {
                        swp = pte_to_swp_entry(pte);
                        if (is_pfn_swap_entry(swp) &&
-                           !PageAnon(pfn_swap_entry_to_page(swp)))
+                           !folio_test_anon(pfn_swap_entry_folio(swp)))
                                categories |= PAGE_IS_FILE;
                }
                if (pte_swp_soft_dirty(pte))
@@ -1873,7 +1872,7 @@ static unsigned long pagemap_thp_category(struct pagemap_scan_private *p,
                if (p->masks_of_interest & PAGE_IS_FILE) {
                        swp = pmd_to_swp_entry(pmd);
                        if (is_pfn_swap_entry(swp) &&
-                           !PageAnon(pfn_swap_entry_to_page(swp)))
+                           !folio_test_anon(pfn_swap_entry_folio(swp)))
                                categories |= PAGE_IS_FILE;
                }
        }
index ee05ab6b37e769a16a3b1e40cd820c17e0b76d85..71d9779c42b10aca8bd4e0b7b667fc62386e2305 100644 (file)
@@ -1515,11 +1515,29 @@ static int fs_bdev_thaw(struct block_device *bdev)
        return error;
 }
 
+static void fs_bdev_super_get(void *data)
+{
+       struct super_block *sb = data;
+
+       spin_lock(&sb_lock);
+       sb->s_count++;
+       spin_unlock(&sb_lock);
+}
+
+static void fs_bdev_super_put(void *data)
+{
+       struct super_block *sb = data;
+
+       put_super(sb);
+}
+
 const struct blk_holder_ops fs_holder_ops = {
        .mark_dead              = fs_bdev_mark_dead,
        .sync                   = fs_bdev_sync,
        .freeze                 = fs_bdev_freeze,
        .thaw                   = fs_bdev_thaw,
+       .get_holder             = fs_bdev_super_get,
+       .put_holder             = fs_bdev_super_put,
 };
 EXPORT_SYMBOL_GPL(fs_holder_ops);
 
index 959551ff9a951474844efbd7029aec1b65faf94e..60dcfafdc11a84d0d6ebe74ebc99c90ff145a35a 100644 (file)
@@ -50,45 +50,6 @@ static struct ctl_table vm_userfaultfd_table[] = {
 
 static struct kmem_cache *userfaultfd_ctx_cachep __ro_after_init;
 
-/*
- * Start with fault_pending_wqh and fault_wqh so they're more likely
- * to be in the same cacheline.
- *
- * Locking order:
- *     fd_wqh.lock
- *             fault_pending_wqh.lock
- *                     fault_wqh.lock
- *             event_wqh.lock
- *
- * To avoid deadlocks, IRQs must be disabled when taking any of the above locks,
- * since fd_wqh.lock is taken by aio_poll() while it's holding a lock that's
- * also taken in IRQ context.
- */
-struct userfaultfd_ctx {
-       /* waitqueue head for the pending (i.e. not read) userfaults */
-       wait_queue_head_t fault_pending_wqh;
-       /* waitqueue head for the userfaults */
-       wait_queue_head_t fault_wqh;
-       /* waitqueue head for the pseudo fd to wakeup poll/read */
-       wait_queue_head_t fd_wqh;
-       /* waitqueue head for events */
-       wait_queue_head_t event_wqh;
-       /* a refile sequence protected by fault_pending_wqh lock */
-       seqcount_spinlock_t refile_seq;
-       /* pseudo fd refcounting */
-       refcount_t refcount;
-       /* userfaultfd syscall flags */
-       unsigned int flags;
-       /* features requested from the userspace */
-       unsigned int features;
-       /* released */
-       bool released;
-       /* memory mappings are changing because of non-cooperative event */
-       atomic_t mmap_changing;
-       /* mm with one ore more vmas attached to this userfaultfd_ctx */
-       struct mm_struct *mm;
-};
-
 struct userfaultfd_fork_ctx {
        struct userfaultfd_ctx *orig;
        struct userfaultfd_ctx *new;
@@ -724,12 +685,15 @@ int dup_userfaultfd(struct vm_area_struct *vma, struct list_head *fcs)
                ctx->flags = octx->flags;
                ctx->features = octx->features;
                ctx->released = false;
+               init_rwsem(&ctx->map_changing_lock);
                atomic_set(&ctx->mmap_changing, 0);
                ctx->mm = vma->vm_mm;
                mmgrab(ctx->mm);
 
                userfaultfd_ctx_get(octx);
+               down_write(&octx->map_changing_lock);
                atomic_inc(&octx->mmap_changing);
+               up_write(&octx->map_changing_lock);
                fctx->orig = octx;
                fctx->new = ctx;
                list_add_tail(&fctx->list, fcs);
@@ -776,7 +740,9 @@ void mremap_userfaultfd_prep(struct vm_area_struct *vma,
        if (ctx->features & UFFD_FEATURE_EVENT_REMAP) {
                vm_ctx->ctx = ctx;
                userfaultfd_ctx_get(ctx);
+               down_write(&ctx->map_changing_lock);
                atomic_inc(&ctx->mmap_changing);
+               up_write(&ctx->map_changing_lock);
        } else {
                /* Drop uffd context if remap feature not enabled */
                vma_start_write(vma);
@@ -822,7 +788,9 @@ bool userfaultfd_remove(struct vm_area_struct *vma,
                return true;
 
        userfaultfd_ctx_get(ctx);
+       down_write(&ctx->map_changing_lock);
        atomic_inc(&ctx->mmap_changing);
+       up_write(&ctx->map_changing_lock);
        mmap_read_unlock(mm);
 
        msg_init(&ewq.msg);
@@ -864,7 +832,9 @@ int userfaultfd_unmap_prep(struct vm_area_struct *vma, unsigned long start,
                return -ENOMEM;
 
        userfaultfd_ctx_get(ctx);
+       down_write(&ctx->map_changing_lock);
        atomic_inc(&ctx->mmap_changing);
+       up_write(&ctx->map_changing_lock);
        unmap_ctx->ctx = ctx;
        unmap_ctx->start = start;
        unmap_ctx->end = end;
@@ -1748,9 +1718,8 @@ static int userfaultfd_copy(struct userfaultfd_ctx *ctx,
        if (uffdio_copy.mode & UFFDIO_COPY_MODE_WP)
                flags |= MFILL_ATOMIC_WP;
        if (mmget_not_zero(ctx->mm)) {
-               ret = mfill_atomic_copy(ctx->mm, uffdio_copy.dst, uffdio_copy.src,
-                                       uffdio_copy.len, &ctx->mmap_changing,
-                                       flags);
+               ret = mfill_atomic_copy(ctx, uffdio_copy.dst, uffdio_copy.src,
+                                       uffdio_copy.len, flags);
                mmput(ctx->mm);
        } else {
                return -ESRCH;
@@ -1800,9 +1769,8 @@ static int userfaultfd_zeropage(struct userfaultfd_ctx *ctx,
                goto out;
 
        if (mmget_not_zero(ctx->mm)) {
-               ret = mfill_atomic_zeropage(ctx->mm, uffdio_zeropage.range.start,
-                                          uffdio_zeropage.range.len,
-                                          &ctx->mmap_changing);
+               ret = mfill_atomic_zeropage(ctx, uffdio_zeropage.range.start,
+                                          uffdio_zeropage.range.len);
                mmput(ctx->mm);
        } else {
                return -ESRCH;
@@ -1857,9 +1825,8 @@ static int userfaultfd_writeprotect(struct userfaultfd_ctx *ctx,
                return -EINVAL;
 
        if (mmget_not_zero(ctx->mm)) {
-               ret = mwriteprotect_range(ctx->mm, uffdio_wp.range.start,
-                                         uffdio_wp.range.len, mode_wp,
-                                         &ctx->mmap_changing);
+               ret = mwriteprotect_range(ctx, uffdio_wp.range.start,
+                                         uffdio_wp.range.len, mode_wp);
                mmput(ctx->mm);
        } else {
                return -ESRCH;
@@ -1909,9 +1876,8 @@ static int userfaultfd_continue(struct userfaultfd_ctx *ctx, unsigned long arg)
                flags |= MFILL_ATOMIC_WP;
 
        if (mmget_not_zero(ctx->mm)) {
-               ret = mfill_atomic_continue(ctx->mm, uffdio_continue.range.start,
-                                           uffdio_continue.range.len,
-                                           &ctx->mmap_changing, flags);
+               ret = mfill_atomic_continue(ctx, uffdio_continue.range.start,
+                                           uffdio_continue.range.len, flags);
                mmput(ctx->mm);
        } else {
                return -ESRCH;
@@ -1964,9 +1930,8 @@ static inline int userfaultfd_poison(struct userfaultfd_ctx *ctx, unsigned long
                goto out;
 
        if (mmget_not_zero(ctx->mm)) {
-               ret = mfill_atomic_poison(ctx->mm, uffdio_poison.range.start,
-                                         uffdio_poison.range.len,
-                                         &ctx->mmap_changing, 0);
+               ret = mfill_atomic_poison(ctx, uffdio_poison.range.start,
+                                         uffdio_poison.range.len, 0);
                mmput(ctx->mm);
        } else {
                return -ESRCH;
@@ -2040,16 +2005,8 @@ static int userfaultfd_move(struct userfaultfd_ctx *ctx,
                return -EINVAL;
 
        if (mmget_not_zero(mm)) {
-               mmap_read_lock(mm);
-
-               /* Re-check after taking mmap_lock */
-               if (likely(!atomic_read(&ctx->mmap_changing)))
-                       ret = move_pages(ctx, mm, uffdio_move.dst, uffdio_move.src,
-                                        uffdio_move.len, uffdio_move.mode);
-               else
-                       ret = -EINVAL;
-
-               mmap_read_unlock(mm);
+               ret = move_pages(ctx, uffdio_move.dst, uffdio_move.src,
+                                uffdio_move.len, uffdio_move.mode);
                mmput(mm);
        } else {
                return -ESRCH;
@@ -2255,6 +2212,7 @@ static int new_userfaultfd(int flags)
        ctx->flags = flags;
        ctx->features = 0;
        ctx->released = false;
+       init_rwsem(&ctx->map_changing_lock);
        atomic_set(&ctx->mmap_changing, 0);
        ctx->mm = current->mm;
        /* prevent the mm struct to be freed */
index 129a3a759976598efe88f390847565c7027cecd5..709830274b7565187bed9a427f128d9e94acb3a1 100644 (file)
@@ -69,6 +69,7 @@
  *
  *  - tlb_remove_page() / __tlb_remove_page()
  *  - tlb_remove_page_size() / __tlb_remove_page_size()
+ *  - __tlb_remove_folio_pages()
  *
  *    __tlb_remove_page_size() is the basic primitive that queues a page for
  *    freeing. __tlb_remove_page() assumes PAGE_SIZE. Both will return a
  *    tlb_remove_page() and tlb_remove_page_size() imply the call to
  *    tlb_flush_mmu() when required and has no return value.
  *
+ *    __tlb_remove_folio_pages() is similar to __tlb_remove_page(), however,
+ *    instead of removing a single page, remove the given number of consecutive
+ *    pages that are all part of the same (large) folio: just like calling
+ *    __tlb_remove_page() on each page individually.
+ *
  *  - tlb_change_page_size()
  *
  *    call before __tlb_remove_page*() to set the current page-size; implies a
@@ -260,9 +266,10 @@ struct mmu_gather_batch {
  */
 #define MAX_GATHER_BATCH_COUNT (10000UL/MAX_GATHER_BATCH)
 
-extern bool __tlb_remove_page_size(struct mmu_gather *tlb,
-                                  struct encoded_page *page,
-                                  int page_size);
+extern bool __tlb_remove_page_size(struct mmu_gather *tlb, struct page *page,
+               bool delay_rmap, int page_size);
+bool __tlb_remove_folio_pages(struct mmu_gather *tlb, struct page *page,
+               unsigned int nr_pages, bool delay_rmap);
 
 #ifdef CONFIG_SMP
 /*
@@ -462,13 +469,14 @@ static inline void tlb_flush_mmu_tlbonly(struct mmu_gather *tlb)
 static inline void tlb_remove_page_size(struct mmu_gather *tlb,
                                        struct page *page, int page_size)
 {
-       if (__tlb_remove_page_size(tlb, encode_page(page, 0), page_size))
+       if (__tlb_remove_page_size(tlb, page, false, page_size))
                tlb_flush_mmu(tlb);
 }
 
-static __always_inline bool __tlb_remove_page(struct mmu_gather *tlb, struct page *page, unsigned int flags)
+static __always_inline bool __tlb_remove_page(struct mmu_gather *tlb,
+               struct page *page, bool delay_rmap)
 {
-       return __tlb_remove_page_size(tlb, encode_page(page, flags), PAGE_SIZE);
+       return __tlb_remove_page_size(tlb, page, delay_rmap, PAGE_SIZE);
 }
 
 /* tlb_remove_page
@@ -592,7 +600,9 @@ static inline void tlb_flush_p4d_range(struct mmu_gather *tlb,
 }
 
 #ifndef __tlb_remove_tlb_entry
-#define __tlb_remove_tlb_entry(tlb, ptep, address) do { } while (0)
+static inline void __tlb_remove_tlb_entry(struct mmu_gather *tlb, pte_t *ptep, unsigned long address)
+{
+}
 #endif
 
 /**
@@ -608,6 +618,26 @@ static inline void tlb_flush_p4d_range(struct mmu_gather *tlb,
                __tlb_remove_tlb_entry(tlb, ptep, address);     \
        } while (0)
 
+/**
+ * tlb_remove_tlb_entries - remember unmapping of multiple consecutive ptes for
+ *                         later tlb invalidation.
+ *
+ * Similar to tlb_remove_tlb_entry(), but remember unmapping of multiple
+ * consecutive ptes instead of only a single one.
+ */
+static inline void tlb_remove_tlb_entries(struct mmu_gather *tlb,
+               pte_t *ptep, unsigned int nr, unsigned long address)
+{
+       tlb_flush_pte_range(tlb, address, PAGE_SIZE * nr);
+       for (;;) {
+               __tlb_remove_tlb_entry(tlb, ptep, address);
+               if (--nr == 0)
+                       break;
+               ptep++;
+               address += PAGE_SIZE;
+       }
+}
+
 #define tlb_remove_huge_tlb_entry(h, tlb, ptep, address)       \
        do {                                                    \
                unsigned long _sz = huge_page_size(h);          \
index 5dd3a61d673d4f9492da2162d15944e0faa636a9..f7749d0f2562f11fd8ca4112e0e7d99e79305780 100644 (file)
  * -fsanitize=thread produce unwanted sections (.eh_frame
  * and .init_array.*), but CONFIG_CONSTRUCTORS wants to
  * keep any .init_array.* sections.
- * https://bugs.llvm.org/show_bug.cgi?id=46478
+ * https://llvm.org/pr46478
  */
 #ifdef CONFIG_UNWIND_TABLES
 #define DISCARD_EH_FRAME
index 574cffc90730f5f3296262375a08474dab6ec59d..80e243611fe243aa9b048efe282d472efc89f941 100644 (file)
@@ -160,6 +160,12 @@ static inline void acomp_request_set_tfm(struct acomp_req *req,
        req->base.tfm = crypto_acomp_tfm(tfm);
 }
 
+static inline bool acomp_is_async(struct crypto_acomp *tfm)
+{
+       return crypto_comp_alg_common(tfm)->base.cra_flags &
+              CRYPTO_ALG_ASYNC;
+}
+
 static inline struct crypto_acomp *crypto_acomp_reqtfm(struct acomp_req *req)
 {
        return __crypto_acomp_tfm(req->base.tfm);
index 59c707e4dea467be1320ef458c51ddbd1568cd71..58967593b6b4d170da49fe0df9ceac12d1cfbd4a 100644 (file)
@@ -87,8 +87,6 @@ static inline bool crypto_shash_alg_needs_key(struct shash_alg *alg)
                !(alg->base.cra_flags & CRYPTO_ALG_OPTIONAL_KEY);
 }
 
-bool crypto_hash_alg_has_setkey(struct hash_alg_common *halg);
-
 int crypto_grab_ahash(struct crypto_ahash_spawn *spawn,
                      struct crypto_instance *inst,
                      const char *name, u32 type, u32 mask);
index 462f8a34cdf872b17111abf68b4eeb9a8577b6b2..b7f308977c84371c1afecf5c50d0e6b1a6e77234 100644 (file)
@@ -10,6 +10,7 @@
 #ifndef _LINUX_PUBLIC_KEY_H
 #define _LINUX_PUBLIC_KEY_H
 
+#include <linux/errno.h>
 #include <linux/keyctl.h>
 #include <linux/oid_registry.h>
 
index 712782177c908e9f7abcf6a0ff4a32d72d1cdca7..7ae96c7bd72fdea68fb8ad84426977f5234d948b 100644 (file)
@@ -86,6 +86,7 @@
 #define ASPEED_CLK_MAC3RCLK            69
 #define ASPEED_CLK_MAC4RCLK            70
 #define ASPEED_CLK_I3C                 71
+#define ASPEED_CLK_FSI                 72
 
 /* Only list resets here that are not part of a clock gate + reset pair */
 #define ASPEED_RESET_ADC               55
index bc15108aa3c2ab527c25d688c66121208b35eac9..7666241520f8761bf31ced7374a75f04913fd8cd 100644 (file)
 #define CLK_MOUT_G3D_SWITCH            76
 #define CLK_GOUT_G3D_SWITCH            77
 #define CLK_DOUT_G3D_SWITCH            78
+#define CLK_MOUT_CPUCL0_DBG            79
+#define CLK_MOUT_CPUCL0_SWITCH         80
+#define CLK_GOUT_CPUCL0_DBG            81
+#define CLK_GOUT_CPUCL0_SWITCH         82
+#define CLK_DOUT_CPUCL0_DBG            83
+#define CLK_DOUT_CPUCL0_SWITCH         84
+#define CLK_MOUT_CPUCL1_DBG            85
+#define CLK_MOUT_CPUCL1_SWITCH         86
+#define CLK_GOUT_CPUCL1_DBG            87
+#define CLK_GOUT_CPUCL1_SWITCH         88
+#define CLK_DOUT_CPUCL1_DBG            89
+#define CLK_DOUT_CPUCL1_SWITCH         90
 
 /* CMU_APM */
 #define CLK_RCO_I3C_PMIC               1
 #define CLK_GOUT_CMGP_USI1_PCLK                14
 #define CLK_GOUT_SYSREG_CMGP_PCLK      15
 
+/* CMU_CPUCL0 */
+#define CLK_FOUT_CPUCL0_PLL            1
+#define CLK_MOUT_PLL_CPUCL0            2
+#define CLK_MOUT_CPUCL0_SWITCH_USER    3
+#define CLK_MOUT_CPUCL0_DBG_USER       4
+#define CLK_MOUT_CPUCL0_PLL            5
+#define CLK_DOUT_CPUCL0_CPU            6
+#define CLK_DOUT_CPUCL0_CMUREF         7
+#define CLK_DOUT_CPUCL0_PCLK           8
+#define CLK_DOUT_CLUSTER0_ACLK         9
+#define CLK_DOUT_CLUSTER0_ATCLK                10
+#define CLK_DOUT_CLUSTER0_PCLKDBG      11
+#define CLK_DOUT_CLUSTER0_PERIPHCLK    12
+#define CLK_GOUT_CLUSTER0_ATCLK                13
+#define CLK_GOUT_CLUSTER0_PCLK         14
+#define CLK_GOUT_CLUSTER0_PERIPHCLK    15
+#define CLK_GOUT_CLUSTER0_SCLK         16
+#define CLK_GOUT_CPUCL0_CMU_CPUCL0_PCLK        17
+#define CLK_GOUT_CLUSTER0_CPU          18
+#define CLK_CLUSTER0_SCLK              19
+
+/* CMU_CPUCL1 */
+#define CLK_FOUT_CPUCL1_PLL            1
+#define CLK_MOUT_PLL_CPUCL1            2
+#define CLK_MOUT_CPUCL1_SWITCH_USER    3
+#define CLK_MOUT_CPUCL1_DBG_USER       4
+#define CLK_MOUT_CPUCL1_PLL            5
+#define CLK_DOUT_CPUCL1_CPU            6
+#define CLK_DOUT_CPUCL1_CMUREF         7
+#define CLK_DOUT_CPUCL1_PCLK           8
+#define CLK_DOUT_CLUSTER1_ACLK         9
+#define CLK_DOUT_CLUSTER1_ATCLK                10
+#define CLK_DOUT_CLUSTER1_PCLKDBG      11
+#define CLK_DOUT_CLUSTER1_PERIPHCLK    12
+#define CLK_GOUT_CLUSTER1_ATCLK                13
+#define CLK_GOUT_CLUSTER1_PCLK         14
+#define CLK_GOUT_CLUSTER1_PERIPHCLK    15
+#define CLK_GOUT_CLUSTER1_SCLK         16
+#define CLK_GOUT_CPUCL1_CMU_CPUCL1_PCLK        17
+#define CLK_GOUT_CLUSTER1_CPU          18
+#define CLK_CLUSTER1_SCLK              19
+
 /* CMU_G3D */
 #define CLK_FOUT_G3D_PLL               1
 #define CLK_MOUT_G3D_PLL               2
index 79775a5134caa22f46d7b9eb826d3af90bee8708..b52f19a2b480f78f8dddc316d57d93a8fc8de6a5 100644 (file)
 
 #define CLK_RTCREF     33
 #define CLK_MSSPLL     34
+#define CLK_MSSPLL0    34
+#define CLK_MSSPLL1    35
+#define CLK_MSSPLL2    36
+#define CLK_MSSPLL3    37
+/* 38 is reserved for MSS PLL internals */
 
 /* Clock Conditioning Circuitry Clock IDs */
 
diff --git a/include/dt-bindings/clock/mobileye,eyeq5-clk.h b/include/dt-bindings/clock/mobileye,eyeq5-clk.h
new file mode 100644 (file)
index 0000000..26d8930
--- /dev/null
@@ -0,0 +1,22 @@
+/* SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause) */
+/*
+ * Copyright (C) 2024 Mobileye Vision Technologies Ltd.
+ */
+
+#ifndef _DT_BINDINGS_CLOCK_MOBILEYE_EYEQ5_CLK_H
+#define _DT_BINDINGS_CLOCK_MOBILEYE_EYEQ5_CLK_H
+
+#define EQ5C_PLL_CPU   0
+#define EQ5C_PLL_VMP   1
+#define EQ5C_PLL_PMA   2
+#define EQ5C_PLL_VDI   3
+#define EQ5C_PLL_DDR0  4
+#define EQ5C_PLL_PCI   5
+#define EQ5C_PLL_PER   6
+#define EQ5C_PLL_PMAC  7
+#define EQ5C_PLL_MPC   8
+#define EQ5C_PLL_DDR1  9
+
+#define EQ5C_DIV_OSPI  10
+
+#endif
index dfefd5e8bf6e9d8a9a3e81a81eab26fd9b422f51..921a33f24d33a28d90744a889ee25e2f916ac451 100644 (file)
 #define GCC_USB30_PRIM_BCR                                     26
 #define GCC_USB30_SEC_BCR                                      27
 #define GCC_USB_PHY_CFG_AHB2PHY_BCR                            28
+#define GCC_VIDEO_AXIC_CLK_BCR                                 29
+#define GCC_VIDEO_AXI0_CLK_BCR                                 30
+#define GCC_VIDEO_AXI1_CLK_BCR                                 31
 
 /* GCC GDSCRs */
 #define PCIE_0_GDSC                                            0
index 754c54a6eb06a46dafeeb030df71dc6b96c16866..7850cdc62e2854939627552c8c34be6ff8625563 100644 (file)
@@ -86,5 +86,6 @@
 #define R8A779G0_CLK_CPEX              74
 #define R8A779G0_CLK_CBFUSA            75
 #define R8A779G0_CLK_R                 76
+#define R8A779G0_CLK_CP                        77
 
 #endif /* __DT_BINDINGS_CLOCK_R8A779G0_CPG_MSSR_H__ */
index 1d98a25b08a4890c8828a4b8aae9cef1ede33bc4..61759df4b2e7f62b546cf0e348381318a5121a07 100644 (file)
@@ -1,4 +1,4 @@
-/* SPDX-License-Identifier: (GPL-2.0+ or MIT) */
+/* SPDX-License-Identifier: (GPL-2.0+ OR MIT) */
 /*
  * Copyright (c) 2023 Amlogic, Inc.
  * Author: hongyu chen1 <hongyu.chen1@amlogic.com>
index 4933019713672971fa8e2d63dd8a864cb2a725f8..0eb152889a896e959251b98d69970ec689a2c83f 100644 (file)
 /* ETHWARP resets */
 #define MT7988_ETHWARP_RST_SWITCH              0
 
+/* INFRA resets */
+#define MT7988_INFRA_RST0_PEXTP_MAC_SWRST      0
+#define MT7988_INFRA_RST1_THERM_CTRL_SWRST     1
+
+
 #endif  /* _DT_BINDINGS_RESET_CONTROLLER_MT7988 */
+
index 4b9d8fb393a8496da369a9f899cee00d4bab252b..eb4c369a79eb31b705aa78ec28437d3de69b97b7 100644 (file)
@@ -90,16 +90,6 @@ void kvm_vcpu_pmu_resync_el0(void);
                        vcpu->arch.pmu.events = *kvm_get_pmu_events();  \
        } while (0)
 
-/*
- * Evaluates as true when emulating PMUv3p5, and false otherwise.
- */
-#define kvm_pmu_is_3p5(vcpu) ({                                                \
-       u64 val = IDREG(vcpu->kvm, SYS_ID_AA64DFR0_EL1);                \
-       u8 pmuver = SYS_FIELD_GET(ID_AA64DFR0_EL1, PMUVer, val);        \
-                                                                       \
-       pmuver >= ID_AA64DFR0_EL1_PMUVer_V3P5;                          \
-})
-
 u8 kvm_arm_pmu_get_pmuver_limit(void);
 u64 kvm_pmu_evtyper_mask(struct kvm *kvm);
 int kvm_arm_set_default_pmu(struct kvm *kvm);
@@ -168,7 +158,6 @@ static inline u64 kvm_pmu_get_pmceid(struct kvm_vcpu *vcpu, bool pmceid1)
 }
 
 #define kvm_vcpu_has_pmu(vcpu)         ({ false; })
-#define kvm_pmu_is_3p5(vcpu)           ({ false; })
 static inline void kvm_pmu_update_vcpu_events(struct kvm_vcpu *vcpu) {}
 static inline void kvm_vcpu_pmu_restore_guest(struct kvm_vcpu *vcpu) {}
 static inline void kvm_vcpu_pmu_restore_host(struct kvm_vcpu *vcpu) {}
index 8cc38e836f540e26411a5d410842cb1a5a5c2ff2..47035946648eafd0e4e88e8f49e0143c4ef58605 100644 (file)
@@ -13,6 +13,7 @@
 #include <linux/spinlock.h>
 #include <linux/static_key.h>
 #include <linux/types.h>
+#include <linux/xarray.h>
 #include <kvm/iodev.h>
 #include <linux/list.h>
 #include <linux/jump_label.h>
@@ -116,7 +117,7 @@ struct irq_ops {
 
 struct vgic_irq {
        raw_spinlock_t irq_lock;        /* Protects the content of the struct */
-       struct list_head lpi_list;      /* Used to link all LPIs together */
+       struct rcu_head rcu;
        struct list_head ap_list;
 
        struct kvm_vcpu *vcpu;          /* SGIs and PPIs: The VCPU
@@ -273,10 +274,10 @@ struct vgic_dist {
         */
        u64                     propbaser;
 
-       /* Protects the lpi_list and the count value below. */
+       /* Protects the lpi_list. */
        raw_spinlock_t          lpi_list_lock;
-       struct list_head        lpi_list_head;
-       int                     lpi_list_count;
+       struct xarray           lpi_xa;
+       atomic_t                lpi_count;
 
        /* LPI translation cache */
        struct list_head        lpi_translation_cache;
index a170c389dd746047943b0afdde2358e048662394..34829f2c517ac278e1655c5a646bbbfbe051506b 100644 (file)
@@ -1548,4 +1548,25 @@ static inline void acpi_use_parent_companion(struct device *dev)
        ACPI_COMPANION_SET(dev, ACPI_COMPANION(dev->parent));
 }
 
+#ifdef CONFIG_ACPI_HMAT
+int hmat_update_target_coordinates(int nid, struct access_coordinate *coord,
+                                  enum access_coordinate_class access);
+#else
+static inline int hmat_update_target_coordinates(int nid,
+                                                struct access_coordinate *coord,
+                                                enum access_coordinate_class access)
+{
+       return -EOPNOTSUPP;
+}
+#endif
+
+#ifdef CONFIG_ACPI_NUMA
+bool acpi_node_backed_by_real_pxm(int nid);
+#else
+static inline bool acpi_node_backed_by_real_pxm(int nid)
+{
+       return false;
+}
+#endif
+
 #endif /*_LINUX_ACPI_H*/
index 7c0cf5031abe8796fc40c7307ddfde4832f2655d..0eb24d21aac2142cc94b4489ccd786df58a06400 100644 (file)
@@ -4,6 +4,7 @@
 
 #include <linux/const.h>
 #include <vdso/bits.h>
+#include <uapi/linux/bits.h>
 #include <asm/bitsperlong.h>
 
 #define BIT_MASK(nr)           (UL(1) << ((nr) % BITS_PER_LONG))
 #define GENMASK_INPUT_CHECK(h, l) 0
 #endif
 
-#define __GENMASK(h, l) \
-       (((~UL(0)) - (UL(1) << (l)) + 1) & \
-        (~UL(0) >> (BITS_PER_LONG - 1 - (h))))
 #define GENMASK(h, l) \
        (GENMASK_INPUT_CHECK(h, l) + __GENMASK(h, l))
-
-#define __GENMASK_ULL(h, l) \
-       (((~ULL(0)) - (ULL(1) << (l)) + 1) & \
-        (~ULL(0) >> (BITS_PER_LONG_LONG - 1 - (h))))
 #define GENMASK_ULL(h, l) \
        (GENMASK_INPUT_CHECK(h, l) + __GENMASK_ULL(h, l))
 
index f9b87c39cab0478aac030e50174dd5b3fd7c8f16..c3e8f7cf96be9e1c10169d2e7afe31696082eb8f 100644 (file)
@@ -1505,6 +1505,16 @@ struct blk_holder_ops {
         * Thaw the file system mounted on the block device.
         */
        int (*thaw)(struct block_device *bdev);
+
+       /*
+        * If needed, get a reference to the holder.
+        */
+       void (*get_holder)(void *holder);
+
+       /*
+        * Release the holder.
+        */
+       void (*put_holder)(void *holder);
 };
 
 /*
index 8a582d242f06729bc6b3a31ab31d0d331beae28d..20aa3c2d89f760a28e57d03bfd3baf62b560f61c 100644 (file)
@@ -11,7 +11,7 @@ int build_id_parse(struct vm_area_struct *vma, unsigned char *build_id,
                   __u32 *size);
 int build_id_parse_buf(const void *buf, unsigned char *build_id, u32 buf_size);
 
-#if IS_ENABLED(CONFIG_STACKTRACE_BUILD_ID) || IS_ENABLED(CONFIG_CRASH_CORE)
+#if IS_ENABLED(CONFIG_STACKTRACE_BUILD_ID) || IS_ENABLED(CONFIG_VMCORE_INFO)
 extern unsigned char vmlinux_build_id[BUILD_ID_SIZE_MAX];
 void init_vmlinux_build_id(void);
 #else
index d504eb4b49abec9a1620ecc3bb3ae5651fd64f16..2cb15fe4fe12919fb11983c3ec2c89e96a68a5a8 100644 (file)
@@ -138,4 +138,10 @@ static inline int get_cpu_cacheinfo_id(int cpu, int level)
 #define use_arch_cache_info()  (false)
 #endif
 
+#ifndef CONFIG_ARCH_HAS_CPU_CACHE_ALIASING
+#define cpu_dcache_is_aliasing()       false
+#else
+#include <asm/cachetype.h>
+#endif
+
 #endif /* _LINUX_CACHEINFO_H */
index 1293c38ddb7f7390b26c78e36148b8f9e5ffb1c4..4a537260f6557d0ab27d4ba382ab96d37dfa180f 100644 (file)
@@ -1084,18 +1084,28 @@ void of_fixed_factor_clk_setup(struct device_node *node);
  * @hw:                handle between common and hardware-specific interfaces
  * @mult:      multiplier
  * @div:       divider
+ * @acc:       fixed accuracy in ppb
+ * @flags:     behavior modifying flags
  *
  * Clock with a fixed multiplier and divider. The output frequency is the
  * parent clock rate divided by div and multiplied by mult.
- * Implements .recalc_rate, .set_rate and .round_rate
+ * Implements .recalc_rate, .set_rate, .round_rate and .recalc_accuracy
+ *
+ * Flags:
+ * * CLK_FIXED_FACTOR_FIXED_ACCURACY - Use the value in @acc instead of the
+ *                                     parent clk accuracy.
  */
 
 struct clk_fixed_factor {
        struct clk_hw   hw;
        unsigned int    mult;
        unsigned int    div;
+       unsigned long   acc;
+       unsigned int    flags;
 };
 
+#define CLK_FIXED_FACTOR_FIXED_ACCURACY        BIT(0)
+
 #define to_clk_fixed_factor(_hw) container_of(_hw, struct clk_fixed_factor, hw)
 
 extern const struct clk_ops clk_fixed_factor_ops;
@@ -1106,10 +1116,24 @@ void clk_unregister_fixed_factor(struct clk *clk);
 struct clk_hw *clk_hw_register_fixed_factor(struct device *dev,
                const char *name, const char *parent_name, unsigned long flags,
                unsigned int mult, unsigned int div);
+struct clk_hw *clk_hw_register_fixed_factor_fwname(struct device *dev,
+               struct device_node *np, const char *name, const char *fw_name,
+               unsigned long flags, unsigned int mult, unsigned int div);
+struct clk_hw *clk_hw_register_fixed_factor_with_accuracy_fwname(struct device *dev,
+               struct device_node *np, const char *name, const char *fw_name,
+               unsigned long flags, unsigned int mult, unsigned int div,
+               unsigned long acc);
 void clk_hw_unregister_fixed_factor(struct clk_hw *hw);
 struct clk_hw *devm_clk_hw_register_fixed_factor(struct device *dev,
                const char *name, const char *parent_name, unsigned long flags,
                unsigned int mult, unsigned int div);
+struct clk_hw *devm_clk_hw_register_fixed_factor_fwname(struct device *dev,
+               struct device_node *np, const char *name, const char *fw_name,
+               unsigned long flags, unsigned int mult, unsigned int div);
+struct clk_hw *devm_clk_hw_register_fixed_factor_with_accuracy_fwname(struct device *dev,
+               struct device_node *np, const char *name, const char *fw_name,
+               unsigned long flags, unsigned int mult, unsigned int div,
+               unsigned long acc);
 struct clk_hw *devm_clk_hw_register_fixed_factor_index(struct device *dev,
                const char *name, unsigned int index, unsigned long flags,
                unsigned int mult, unsigned int div);
index 06f1b292f8a00ad8f9221a2672baa71b9b6d41e7..00623f4de5e195d5a4b880db633b1bfd335f34fc 100644 (file)
@@ -201,6 +201,18 @@ bool clk_is_match(const struct clk *p, const struct clk *q);
  */
 int clk_rate_exclusive_get(struct clk *clk);
 
+/**
+ * devm_clk_rate_exclusive_get - devm variant of clk_rate_exclusive_get
+ * @dev: device the exclusivity is bound to
+ * @clk: clock source
+ *
+ * Calls clk_rate_exclusive_get() on @clk and registers a devm cleanup handler
+ * on @dev to call clk_rate_exclusive_put().
+ *
+ * Must not be called from within atomic context.
+ */
+int devm_clk_rate_exclusive_get(struct device *dev, struct clk *clk);
+
 /**
  * clk_rate_exclusive_put - release exclusivity over the rate control of a
  *                          producer
@@ -478,6 +490,22 @@ int __must_check devm_clk_bulk_get_optional(struct device *dev, int num_clks,
 int __must_check devm_clk_bulk_get_all(struct device *dev,
                                       struct clk_bulk_data **clks);
 
+/**
+ * devm_clk_bulk_get_all_enable - Get and enable all clocks of the consumer (managed)
+ * @dev: device for clock "consumer"
+ * @clks: pointer to the clk_bulk_data table of consumer
+ *
+ * Returns success (0) or negative errno.
+ *
+ * This helper function allows drivers to get all clocks of the
+ * consumer and enables them in one operation with management.
+ * The clks will automatically be disabled and freed when the device
+ * is unbound.
+ */
+
+int __must_check devm_clk_bulk_get_all_enable(struct device *dev,
+                                             struct clk_bulk_data **clks);
+
 /**
  * devm_clk_get - lookup and obtain a managed reference to a clock producer.
  * @dev: device for clock "consumer"
@@ -968,6 +996,12 @@ static inline int __must_check devm_clk_bulk_get_all(struct device *dev,
        return 0;
 }
 
+static inline int __must_check devm_clk_bulk_get_all_enable(struct device *dev,
+                                               struct clk_bulk_data **clks)
+{
+       return 0;
+}
+
 static inline struct clk *devm_get_clk_from_child(struct device *dev,
                                struct device_node *np, const char *con_id)
 {
index 63873b93deaa62634da9ed3fb19543ba0131abdf..9db877506ea836846a8ae2fbb64a4a4c5ac8733d 100644 (file)
@@ -6,12 +6,8 @@
 #include <linux/types.h>
 #include <linux/numa.h>
 
-/*
- * There is always at least global CMA area and a few optional
- * areas configured in kernel .config.
- */
 #ifdef CONFIG_CMA_AREAS
-#define MAX_CMA_AREAS  (1 + CONFIG_CMA_AREAS)
+#define MAX_CMA_AREAS  CONFIG_CMA_AREAS
 #endif
 
 #define CMA_MAX_NAME 64
index ddab1ef22beef34b5d81e0ee0573de2806372f74..49feac0162a5264a5b63b4a5e913bd821ccc66c3 100644 (file)
@@ -9,7 +9,7 @@
  * Clang prior to 17 is being silly and considers many __cleanup() variables
  * as unused (because they are, their sole purpose is to go out of scope).
  *
- * https://reviews.llvm.org/D152180
+ * https://github.com/llvm/llvm-project/commit/877210faa447f4cc7db87812f8ed80e398fedd61
  */
 #undef __cleanup
 #define __cleanup(func) __maybe_unused __attribute__((__cleanup__(func)))
 #define __diag_str(s)          __diag_str1(s)
 #define __diag(s)              _Pragma(__diag_str(clang diagnostic s))
 
-#if CONFIG_CLANG_VERSION >= 110000
-#define __diag_clang_11(s)     __diag(s)
-#else
-#define __diag_clang_11(s)
-#endif
+#define __diag_clang_13(s)     __diag(s)
 
 #define __diag_ignore_all(option, comment) \
-       __diag_clang(11, ignore, option)
+       __diag_clang(13, ignore, option)
index 9eaeaafe0cad3a235ec62e52253cd962161daff6..23270b16e1dbf3c41740d06b1afd7755ea03dc7b 100644 (file)
 #include <linux/linkage.h>
 #include <linux/elfcore.h>
 #include <linux/elf.h>
-#ifdef CONFIG_ARCH_HAS_GENERIC_CRASHKERNEL_RESERVATION
-#include <asm/crash_core.h>
-#endif
 
-/* Location of a reserved region to hold the crash kernel.
- */
-extern struct resource crashk_res;
-extern struct resource crashk_low_res;
+struct kimage;
+
+#ifdef CONFIG_CRASH_DUMP
 
-#define CRASH_CORE_NOTE_NAME      "CORE"
-#define CRASH_CORE_NOTE_HEAD_BYTES ALIGN(sizeof(struct elf_note), 4)
-#define CRASH_CORE_NOTE_NAME_BYTES ALIGN(sizeof(CRASH_CORE_NOTE_NAME), 4)
-#define CRASH_CORE_NOTE_DESC_BYTES ALIGN(sizeof(struct elf_prstatus), 4)
+int crash_shrink_memory(unsigned long new_size);
+ssize_t crash_get_memory_size(void);
 
+#ifndef arch_kexec_protect_crashkres
 /*
- * The per-cpu notes area is a list of notes terminated by a "NULL"
- * note header.  For kdump, the code in vmcore.c runs in the context
- * of the second kernel to combine them into one note.
+ * Protection mechanism for crashkernel reserved memory after
+ * the kdump kernel is loaded.
+ *
+ * Provide an empty default implementation here -- architecture
+ * code may override this
  */
-#define CRASH_CORE_NOTE_BYTES     ((CRASH_CORE_NOTE_HEAD_BYTES * 2) +  \
-                                    CRASH_CORE_NOTE_NAME_BYTES +       \
-                                    CRASH_CORE_NOTE_DESC_BYTES)
-
-#define VMCOREINFO_BYTES          PAGE_SIZE
-#define VMCOREINFO_NOTE_NAME      "VMCOREINFO"
-#define VMCOREINFO_NOTE_NAME_BYTES ALIGN(sizeof(VMCOREINFO_NOTE_NAME), 4)
-#define VMCOREINFO_NOTE_SIZE      ((CRASH_CORE_NOTE_HEAD_BYTES * 2) +  \
-                                    VMCOREINFO_NOTE_NAME_BYTES +       \
-                                    VMCOREINFO_BYTES)
-
-typedef u32 note_buf_t[CRASH_CORE_NOTE_BYTES/4];
-/* Per cpu memory for storing cpu states in case of system crash. */
-extern note_buf_t __percpu *crash_notes;
-
-void crash_update_vmcoreinfo_safecopy(void *ptr);
-void crash_save_vmcoreinfo(void);
-void arch_crash_save_vmcoreinfo(void);
-__printf(1, 2)
-void vmcoreinfo_append_str(const char *fmt, ...);
-phys_addr_t paddr_vmcoreinfo_note(void);
-
-#define VMCOREINFO_OSRELEASE(value) \
-       vmcoreinfo_append_str("OSRELEASE=%s\n", value)
-#define VMCOREINFO_BUILD_ID()                                          \
-       ({                                                              \
-               static_assert(sizeof(vmlinux_build_id) == 20);          \
-               vmcoreinfo_append_str("BUILD-ID=%20phN\n", vmlinux_build_id); \
-       })
-
-#define VMCOREINFO_PAGESIZE(value) \
-       vmcoreinfo_append_str("PAGESIZE=%ld\n", value)
-#define VMCOREINFO_SYMBOL(name) \
-       vmcoreinfo_append_str("SYMBOL(%s)=%lx\n", #name, (unsigned long)&name)
-#define VMCOREINFO_SYMBOL_ARRAY(name) \
-       vmcoreinfo_append_str("SYMBOL(%s)=%lx\n", #name, (unsigned long)name)
-#define VMCOREINFO_SIZE(name) \
-       vmcoreinfo_append_str("SIZE(%s)=%lu\n", #name, \
-                             (unsigned long)sizeof(name))
-#define VMCOREINFO_STRUCT_SIZE(name) \
-       vmcoreinfo_append_str("SIZE(%s)=%lu\n", #name, \
-                             (unsigned long)sizeof(struct name))
-#define VMCOREINFO_OFFSET(name, field) \
-       vmcoreinfo_append_str("OFFSET(%s.%s)=%lu\n", #name, #field, \
-                             (unsigned long)offsetof(struct name, field))
-#define VMCOREINFO_TYPE_OFFSET(name, field) \
-       vmcoreinfo_append_str("OFFSET(%s.%s)=%lu\n", #name, #field, \
-                             (unsigned long)offsetof(name, field))
-#define VMCOREINFO_LENGTH(name, value) \
-       vmcoreinfo_append_str("LENGTH(%s)=%lu\n", #name, (unsigned long)value)
-#define VMCOREINFO_NUMBER(name) \
-       vmcoreinfo_append_str("NUMBER(%s)=%ld\n", #name, (long)name)
-#define VMCOREINFO_CONFIG(name) \
-       vmcoreinfo_append_str("CONFIG_%s=y\n", #name)
-
-extern unsigned char *vmcoreinfo_data;
-extern size_t vmcoreinfo_size;
-extern u32 *vmcoreinfo_note;
-
-Elf_Word *append_elf_note(Elf_Word *buf, char *name, unsigned int type,
-                         void *data, size_t data_len);
-void final_note(Elf_Word *buf);
-
-int __init parse_crashkernel(char *cmdline, unsigned long long system_ram,
-               unsigned long long *crash_size, unsigned long long *crash_base,
-               unsigned long long *low_size, bool *high);
-
-#ifdef CONFIG_ARCH_HAS_GENERIC_CRASHKERNEL_RESERVATION
-#ifndef DEFAULT_CRASH_KERNEL_LOW_SIZE
-#define DEFAULT_CRASH_KERNEL_LOW_SIZE  (128UL << 20)
+static inline void arch_kexec_protect_crashkres(void) { }
 #endif
-#ifndef CRASH_ALIGN
-#define CRASH_ALIGN                    SZ_2M
+
+#ifndef arch_kexec_unprotect_crashkres
+static inline void arch_kexec_unprotect_crashkres(void) { }
 #endif
-#ifndef CRASH_ADDR_LOW_MAX
-#define CRASH_ADDR_LOW_MAX             SZ_4G
+
+
+
+#ifndef arch_crash_handle_hotplug_event
+static inline void arch_crash_handle_hotplug_event(struct kimage *image) { }
 #endif
-#ifndef CRASH_ADDR_HIGH_MAX
-#define CRASH_ADDR_HIGH_MAX            memblock_end_of_DRAM()
+
+int crash_check_update_elfcorehdr(void);
+
+#ifndef crash_hotplug_cpu_support
+static inline int crash_hotplug_cpu_support(void) { return 0; }
+#endif
+
+#ifndef crash_hotplug_memory_support
+static inline int crash_hotplug_memory_support(void) { return 0; }
 #endif
 
-void __init reserve_crashkernel_generic(char *cmdline,
-               unsigned long long crash_size,
-               unsigned long long crash_base,
-               unsigned long long crash_low_size,
-               bool high);
-#else
-static inline void __init reserve_crashkernel_generic(char *cmdline,
-               unsigned long long crash_size,
-               unsigned long long crash_base,
-               unsigned long long crash_low_size,
-               bool high)
-{}
+#ifndef crash_get_elfcorehdr_size
+static inline unsigned int crash_get_elfcorehdr_size(void) { return 0; }
 #endif
 
 /* Alignment required for elf header segment */
@@ -144,4 +73,23 @@ struct kexec_segment;
 #define KEXEC_CRASH_HP_REMOVE_MEMORY           4
 #define KEXEC_CRASH_HP_INVALID_CPU             -1U
 
+extern void __crash_kexec(struct pt_regs *regs);
+extern void crash_kexec(struct pt_regs *regs);
+int kexec_should_crash(struct task_struct *p);
+int kexec_crash_loaded(void);
+void crash_save_cpu(struct pt_regs *regs, int cpu);
+extern int kimage_crash_copy_vmcoreinfo(struct kimage *image);
+
+#else /* !CONFIG_CRASH_DUMP*/
+struct pt_regs;
+struct task_struct;
+struct kimage;
+static inline void __crash_kexec(struct pt_regs *regs) { }
+static inline void crash_kexec(struct pt_regs *regs) { }
+static inline int kexec_should_crash(struct task_struct *p) { return 0; }
+static inline int kexec_crash_loaded(void) { return 0; }
+static inline void crash_save_cpu(struct pt_regs *regs, int cpu) {};
+static inline int kimage_crash_copy_vmcoreinfo(struct kimage *image) { return 0; };
+#endif /* CONFIG_CRASH_DUMP*/
+
 #endif /* LINUX_CRASH_CORE_H */
diff --git a/include/linux/crash_reserve.h b/include/linux/crash_reserve.h
new file mode 100644 (file)
index 0000000..5a9df94
--- /dev/null
@@ -0,0 +1,48 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef LINUX_CRASH_RESERVE_H
+#define LINUX_CRASH_RESERVE_H
+
+#include <linux/linkage.h>
+#include <linux/elfcore.h>
+#include <linux/elf.h>
+#ifdef CONFIG_ARCH_HAS_GENERIC_CRASHKERNEL_RESERVATION
+#include <asm/crash_reserve.h>
+#endif
+
+/* Location of a reserved region to hold the crash kernel.
+ */
+extern struct resource crashk_res;
+extern struct resource crashk_low_res;
+
+int __init parse_crashkernel(char *cmdline, unsigned long long system_ram,
+               unsigned long long *crash_size, unsigned long long *crash_base,
+               unsigned long long *low_size, bool *high);
+
+#ifdef CONFIG_ARCH_HAS_GENERIC_CRASHKERNEL_RESERVATION
+#ifndef DEFAULT_CRASH_KERNEL_LOW_SIZE
+#define DEFAULT_CRASH_KERNEL_LOW_SIZE  (128UL << 20)
+#endif
+#ifndef CRASH_ALIGN
+#define CRASH_ALIGN                    SZ_2M
+#endif
+#ifndef CRASH_ADDR_LOW_MAX
+#define CRASH_ADDR_LOW_MAX             SZ_4G
+#endif
+#ifndef CRASH_ADDR_HIGH_MAX
+#define CRASH_ADDR_HIGH_MAX            memblock_end_of_DRAM()
+#endif
+
+void __init reserve_crashkernel_generic(char *cmdline,
+               unsigned long long crash_size,
+               unsigned long long crash_base,
+               unsigned long long crash_low_size,
+               bool high);
+#else
+static inline void __init reserve_crashkernel_generic(char *cmdline,
+               unsigned long long crash_size,
+               unsigned long long crash_base,
+               unsigned long long crash_low_size,
+               bool high)
+{}
+#endif
+#endif /* LINUX_CRASH_RESERVE_H */
index 5881e4ac30be6a6e6985a006f1b7aeb309923b86..886d07294f4e7ccbe3753049983db4b7bc3f3986 100644 (file)
@@ -126,19 +126,61 @@ enum damos_action {
        NR_DAMOS_ACTIONS,
 };
 
+/**
+ * enum damos_quota_goal_metric - Represents the metric to be used as the goal
+ *
+ * @DAMOS_QUOTA_USER_INPUT:    User-input value.
+ * @DAMOS_QUOTA_SOME_MEM_PSI_US:       System level some memory PSI in us.
+ * @NR_DAMOS_QUOTA_GOAL_METRICS:       Number of DAMOS quota goal metrics.
+ *
+ * Metrics equal to larger than @NR_DAMOS_QUOTA_GOAL_METRICS are unsupported.
+ */
+enum damos_quota_goal_metric {
+       DAMOS_QUOTA_USER_INPUT,
+       DAMOS_QUOTA_SOME_MEM_PSI_US,
+       NR_DAMOS_QUOTA_GOAL_METRICS,
+};
+
+/**
+ * struct damos_quota_goal - DAMOS scheme quota auto-tuning goal.
+ * @metric:            Metric to be used for representing the goal.
+ * @target_value:      Target value of @metric to achieve with the tuning.
+ * @current_value:     Current value of @metric.
+ * @last_psi_total:    Last measured total PSI
+ * @list:              List head for siblings.
+ *
+ * Data structure for getting the current score of the quota tuning goal.  The
+ * score is calculated by how close @current_value and @target_value are.  Then
+ * the score is entered to DAMON's internal feedback loop mechanism to get the
+ * auto-tuned quota.
+ *
+ * If @metric is DAMOS_QUOTA_USER_INPUT, @current_value should be manually
+ * entered by the user, probably inside the kdamond callbacks.  Otherwise,
+ * DAMON sets @current_value with self-measured value of @metric.
+ */
+struct damos_quota_goal {
+       enum damos_quota_goal_metric metric;
+       unsigned long target_value;
+       unsigned long current_value;
+       /* metric-dependent fields */
+       union {
+               u64 last_psi_total;
+       };
+       struct list_head list;
+};
+
 /**
  * struct damos_quota - Controls the aggressiveness of the given scheme.
+ * @reset_interval:    Charge reset interval in milliseconds.
  * @ms:                        Maximum milliseconds that the scheme can use.
  * @sz:                        Maximum bytes of memory that the action can be applied.
- * @reset_interval:    Charge reset interval in milliseconds.
+ * @goals:             Head of quota tuning goals (&damos_quota_goal) list.
+ * @esz:               Effective size quota in bytes.
  *
  * @weight_sz:         Weight of the region's size for prioritization.
  * @weight_nr_accesses:        Weight of the region's nr_accesses for prioritization.
  * @weight_age:                Weight of the region's age for prioritization.
  *
- * @get_score:         Feedback function for self-tuning quota.
- * @get_score_arg:     Parameter for @get_score
- *
  * To avoid consuming too much CPU time or IO resources for applying the
  * &struct damos->action to large memory, DAMON allows users to set time and/or
  * size quotas.  The quotas can be set by writing non-zero values to &ms and
@@ -151,42 +193,35 @@ enum damos_action {
  * throughput of the scheme's action.  DAMON then compares it against &sz and
  * uses smaller one as the effective quota.
  *
+ * If @goals is not empt, DAMON calculates yet another size quota based on the
+ * goals using its internal feedback loop algorithm, for every @reset_interval.
+ * Then, if the new size quota is smaller than the effective quota, it uses the
+ * new size quota as the effective quota.
+ *
+ * The resulting effective size quota in bytes is set to @esz.
+ *
  * For selecting regions within the quota, DAMON prioritizes current scheme's
  * target memory regions using the &struct damon_operations->get_scheme_score.
  * You could customize the prioritization logic by setting &weight_sz,
  * &weight_nr_accesses, and &weight_age, because monitoring operations are
  * encouraged to respect those.
- *
- * If @get_score function pointer is set, DAMON calls it back with
- * @get_score_arg and get the return value of it for every @reset_interval.
- * Then, DAMON adjusts the effective quota using the return value as a feedback
- * score to the current quota, using its internal feedback loop algorithm.
- *
- * The feedback loop algorithem assumes the quota input and the feedback score
- * output are in a positive proportional relationship, and the goal of the
- * tuning is getting the feedback screo value of 10,000.  If @ms and/or @sz are
- * set together, those work as a hard limit quota.  If neither @ms nor @sz are
- * set, the mechanism starts from the quota of one byte.
  */
 struct damos_quota {
+       unsigned long reset_interval;
        unsigned long ms;
        unsigned long sz;
-       unsigned long reset_interval;
+       struct list_head goals;
+       unsigned long esz;
 
        unsigned int weight_sz;
        unsigned int weight_nr_accesses;
        unsigned int weight_age;
 
-       unsigned long (*get_score)(void *arg);
-       void *get_score_arg;
-
 /* private: */
        /* For throughput estimation */
        unsigned long total_charged_sz;
        unsigned long total_charged_ns;
 
-       unsigned long esz;      /* Effective size quota in bytes */
-
        /* For charging the quota */
        unsigned long charged_sz;
        unsigned long charged_from;
@@ -640,6 +675,12 @@ static inline unsigned long damon_sz_region(struct damon_region *r)
 #define damon_for_each_scheme_safe(s, next, ctx) \
        list_for_each_entry_safe(s, next, &(ctx)->schemes, list)
 
+#define damos_for_each_quota_goal(goal, quota) \
+       list_for_each_entry(goal, &quota->goals, list)
+
+#define damos_for_each_quota_goal_safe(goal, next, quota) \
+       list_for_each_entry_safe(goal, next, &(quota)->goals, list)
+
 #define damos_for_each_filter(f, scheme) \
        list_for_each_entry(f, &(scheme)->filters, list)
 
@@ -673,6 +714,12 @@ struct damos_filter *damos_new_filter(enum damos_filter_type type,
 void damos_add_filter(struct damos *s, struct damos_filter *f);
 void damos_destroy_filter(struct damos_filter *f);
 
+struct damos_quota_goal *damos_new_quota_goal(
+               enum damos_quota_goal_metric metric,
+               unsigned long target_value);
+void damos_add_quota_goal(struct damos_quota *q, struct damos_quota_goal *g);
+void damos_destroy_quota_goal(struct damos_quota_goal *goal);
+
 struct damos *damon_new_scheme(struct damos_access_pattern *pattern,
                        enum damos_action action,
                        unsigned long apply_interval_us,
index b463502b16e17fbc08ff3a982a9597f5d70cb06b..9d3e3327af4c055f0a6d7a907f2e1d68a345814d 100644 (file)
@@ -63,6 +63,8 @@ void kill_dax(struct dax_device *dax_dev);
 void dax_write_cache(struct dax_device *dax_dev, bool wc);
 bool dax_write_cache_enabled(struct dax_device *dax_dev);
 bool dax_synchronous(struct dax_device *dax_dev);
+void set_dax_nocache(struct dax_device *dax_dev);
+void set_dax_nomc(struct dax_device *dax_dev);
 void set_dax_synchronous(struct dax_device *dax_dev);
 size_t dax_recovery_write(struct dax_device *dax_dev, pgoff_t pgoff,
                void *addr, size_t bytes, struct iov_iter *i);
@@ -86,11 +88,7 @@ static inline void *dax_holder(struct dax_device *dax_dev)
 static inline struct dax_device *alloc_dax(void *private,
                const struct dax_operations *ops)
 {
-       /*
-        * Callers should check IS_ENABLED(CONFIG_DAX) to know if this
-        * NULL is an error or expected.
-        */
-       return NULL;
+       return ERR_PTR(-EOPNOTSUPP);
 }
 static inline void put_dax(struct dax_device *dax_dev)
 {
@@ -109,6 +107,12 @@ static inline bool dax_synchronous(struct dax_device *dax_dev)
 {
        return true;
 }
+static inline void set_dax_nocache(struct dax_device *dax_dev)
+{
+}
+static inline void set_dax_nomc(struct dax_device *dax_dev)
+{
+}
 static inline void set_dax_synchronous(struct dax_device *dax_dev)
 {
 }
@@ -124,9 +128,6 @@ static inline size_t dax_recovery_write(struct dax_device *dax_dev,
 }
 #endif
 
-void set_dax_nocache(struct dax_device *dax_dev);
-void set_dax_nomc(struct dax_device *dax_dev);
-
 struct writeback_control;
 #if defined(CONFIG_BLOCK) && defined(CONFIG_FS_DAX)
 int dax_add_host(struct dax_device *dax_dev, struct gendisk *disk);
index e443be4d3b4b7e04bc350aff0b9d3a9e2fd4acbf..1e491c5dcac2d32080a0c72d058c4c5901c35597 100644 (file)
@@ -26,6 +26,11 @@ struct k3_udma_glue_tx_channel;
 struct k3_udma_glue_tx_channel *k3_udma_glue_request_tx_chn(struct device *dev,
                const char *name, struct k3_udma_glue_tx_channel_cfg *cfg);
 
+struct k3_udma_glue_tx_channel *
+k3_udma_glue_request_tx_chn_for_thread_id(struct device *dev,
+                                         struct k3_udma_glue_tx_channel_cfg *cfg,
+                                         struct device_node *udmax_np, u32 thread_id);
+
 void k3_udma_glue_release_tx_chn(struct k3_udma_glue_tx_channel *tx_chn);
 int k3_udma_glue_push_tx_chn(struct k3_udma_glue_tx_channel *tx_chn,
                             struct cppi5_host_desc_t *desc_tx,
@@ -109,6 +114,11 @@ struct k3_udma_glue_rx_channel *k3_udma_glue_request_rx_chn(
                const char *name,
                struct k3_udma_glue_rx_channel_cfg *cfg);
 
+struct k3_udma_glue_rx_channel *
+k3_udma_glue_request_remote_rx_chn_for_thread_id(struct device *dev,
+                                                struct k3_udma_glue_rx_channel_cfg *cfg,
+                                                struct device_node *udmax_np, u32 thread_id);
+
 void k3_udma_glue_release_rx_chn(struct k3_udma_glue_rx_channel *rx_chn);
 int k3_udma_glue_enable_rx_chn(struct k3_udma_glue_rx_channel *rx_chn);
 void k3_udma_glue_disable_rx_chn(struct k3_udma_glue_rx_channel *rx_chn);
index f0d56f106b60556939f4b7dce9611d4a4c8a9d7b..d59b0947fba08ad5edd916feb32144ac0328d72b 100644 (file)
@@ -694,6 +694,11 @@ extern struct efi {
 
 extern struct mm_struct efi_mm;
 
+static inline bool mm_is_efi(struct mm_struct *mm)
+{
+       return IS_ENABLED(CONFIG_EFI) && mm == &efi_mm;
+}
+
 static inline int
 efi_guidcmp (efi_guid_t left, efi_guid_t right)
 {
diff --git a/include/linux/einj-cxl.h b/include/linux/einj-cxl.h
new file mode 100644 (file)
index 0000000..624ff6f
--- /dev/null
@@ -0,0 +1,44 @@
+/* SPDX-License-Identifier: GPL-2.0-or-later */
+/*
+ * CXL protocol Error INJection support.
+ *
+ * Copyright (c) 2023 Advanced Micro Devices, Inc.
+ * All Rights Reserved.
+ *
+ * Author: Ben Cheatham <benjamin.cheatham@amd.com>
+ */
+#ifndef EINJ_CXL_H
+#define EINJ_CXL_H
+
+#include <linux/errno.h>
+#include <linux/types.h>
+
+struct pci_dev;
+struct seq_file;
+
+#if IS_ENABLED(CONFIG_ACPI_APEI_EINJ_CXL)
+int einj_cxl_available_error_type_show(struct seq_file *m, void *v);
+int einj_cxl_inject_error(struct pci_dev *dport_dev, u64 type);
+int einj_cxl_inject_rch_error(u64 rcrb, u64 type);
+bool einj_cxl_is_initialized(void);
+#else /* !IS_ENABLED(CONFIG_ACPI_APEI_EINJ_CXL) */
+static inline int einj_cxl_available_error_type_show(struct seq_file *m,
+                                                    void *v)
+{
+       return -ENXIO;
+}
+
+static inline int einj_cxl_inject_error(struct pci_dev *dport_dev, u64 type)
+{
+       return -ENXIO;
+}
+
+static inline int einj_cxl_inject_rch_error(u64 rcrb, u64 type)
+{
+       return -ENXIO;
+}
+
+static inline bool einj_cxl_is_initialized(void) { return false; }
+#endif /* CONFIG_ACPI_APEI_EINJ_CXL */
+
+#endif /* EINJ_CXL_H */
index 053137a0fe45678d0d2319f13953c2bbc77a588a..a357287eac1eab198ca1448ae678a9a2076c3859 100644 (file)
@@ -27,6 +27,7 @@
 
 #define F2FS_BYTES_TO_BLK(bytes)       ((bytes) >> F2FS_BLKSIZE_BITS)
 #define F2FS_BLK_TO_BYTES(blk)         ((blk) << F2FS_BLKSIZE_BITS)
+#define F2FS_BLK_END_BYTES(blk)                (F2FS_BLK_TO_BYTES(blk + 1) - 1)
 
 /* 0, 1(node nid), 2(meta nid) are reserved node id */
 #define F2FS_RESERVED_NODE_NUM         3
 
 #define F2FS_ENC_UTF8_12_1     1
 
-#define F2FS_IO_SIZE(sbi)      BIT(F2FS_OPTION(sbi).write_io_size_bits) /* Blocks */
-#define F2FS_IO_SIZE_KB(sbi)   BIT(F2FS_OPTION(sbi).write_io_size_bits + 2) /* KB */
-#define F2FS_IO_SIZE_BITS(sbi) (F2FS_OPTION(sbi).write_io_size_bits) /* power of 2 */
-#define F2FS_IO_SIZE_MASK(sbi) (F2FS_IO_SIZE(sbi) - 1)
-#define F2FS_IO_ALIGNED(sbi)   (F2FS_IO_SIZE(sbi) > 1)
-
 /* This flag is used by node and meta inodes, and by recovery */
 #define GFP_F2FS_ZERO          (GFP_NOFS | __GFP_ZERO)
 
@@ -81,6 +76,7 @@ enum stop_cp_reason {
        STOP_CP_REASON_CORRUPTED_SUMMARY,
        STOP_CP_REASON_UPDATE_INODE,
        STOP_CP_REASON_FLUSH_FAIL,
+       STOP_CP_REASON_NO_SEGMENT,
        STOP_CP_REASON_MAX,
 };
 
index 3e378b1fb0bc8204214073f1f671e182ff4ebf32..e9a72fd0bfe78bc4e58f99a5bff69777b8676f38 100644 (file)
@@ -38,38 +38,6 @@ int fprop_global_init(struct fprop_global *p, gfp_t gfp);
 void fprop_global_destroy(struct fprop_global *p);
 bool fprop_new_period(struct fprop_global *p, int periods);
 
-/*
- *  ---- SINGLE ----
- */
-struct fprop_local_single {
-       /* the local events counter */
-       unsigned long events;
-       /* Period in which we last updated events */
-       unsigned int period;
-       raw_spinlock_t lock;    /* Protect period and numerator */
-};
-
-#define INIT_FPROP_LOCAL_SINGLE(name)                  \
-{      .lock = __RAW_SPIN_LOCK_UNLOCKED(name.lock),    \
-}
-
-int fprop_local_init_single(struct fprop_local_single *pl);
-void fprop_local_destroy_single(struct fprop_local_single *pl);
-void __fprop_inc_single(struct fprop_global *p, struct fprop_local_single *pl);
-void fprop_fraction_single(struct fprop_global *p,
-       struct fprop_local_single *pl, unsigned long *numerator,
-       unsigned long *denominator);
-
-static inline
-void fprop_inc_single(struct fprop_global *p, struct fprop_local_single *pl)
-{
-       unsigned long flags;
-
-       local_irq_save(flags);
-       __fprop_inc_single(p, pl);
-       local_irq_restore(flags);
-}
-
 /*
  * ---- PERCPU ----
  */
index d5d5a4ee24f02cf0d87bce080aafbd2bf88285bb..00fc429b0af0fb9bbab2382a9e347fdbac383981 100644 (file)
@@ -3074,6 +3074,7 @@ extern struct inode *new_inode_pseudo(struct super_block *sb);
 extern struct inode *new_inode(struct super_block *sb);
 extern void free_inode_nonrcu(struct inode *inode);
 extern int setattr_should_drop_suidgid(struct mnt_idmap *, struct inode *);
+extern int file_remove_privs_flags(struct file *file, unsigned int flags);
 extern int file_remove_privs(struct file *);
 int setattr_should_drop_sgid(struct mnt_idmap *idmap,
                             const struct inode *inode);
index 95421860397a236d101bf4c3bc7a934a515bf847..3ff4c277296fb6e9fbdd83923acaaa50384ddd5c 100644 (file)
@@ -40,12 +40,14 @@ union acpi_subtable_headers {
 
 int acpi_parse_entries_array(char *id, unsigned long table_size,
                             union fw_table_header *table_header,
+                            unsigned long max_length,
                             struct acpi_subtable_proc *proc,
                             int proc_num, unsigned int max_entries);
 
 int cdat_table_parse(enum acpi_cdat_type type,
                     acpi_tbl_entry_handler_arg handler_arg, void *arg,
-                    struct acpi_table_cdat *table_header);
+                    struct acpi_table_cdat *table_header,
+                    unsigned long length);
 
 /* CXL is the only non-ACPI consumer of the FIRMWARE_TABLE library */
 #if IS_ENABLED(CONFIG_ACPI) && !IS_ENABLED(CONFIG_CXL_BUS)
index 8474131647388b954812515930c81b0c83315f3a..f3512fddf3d738b2f085b037e07ef96b0e908642 100644 (file)
@@ -5,7 +5,7 @@
  * DOC: Generic radix trees/sparse arrays
  *
  * Very simple and minimalistic, supporting arbitrary size entries up to
- * PAGE_SIZE.
+ * GENRADIX_NODE_SIZE.
  *
  * A genradix is defined with the type it will store, like so:
  *
 
 struct genradix_root;
 
+#define GENRADIX_NODE_SHIFT    9
+#define GENRADIX_NODE_SIZE     (1U << GENRADIX_NODE_SHIFT)
+
 struct __genradix {
        struct genradix_root            *root;
 };
 
 /*
- * NOTE: currently, sizeof(_type) must not be larger than PAGE_SIZE:
+ * NOTE: currently, sizeof(_type) must not be larger than GENRADIX_NODE_SIZE:
  */
 
 #define __GENRADIX_INITIALIZER                                 \
@@ -101,14 +104,14 @@ void __genradix_free(struct __genradix *);
 static inline size_t __idx_to_offset(size_t idx, size_t obj_size)
 {
        if (__builtin_constant_p(obj_size))
-               BUILD_BUG_ON(obj_size > PAGE_SIZE);
+               BUILD_BUG_ON(obj_size > GENRADIX_NODE_SIZE);
        else
-               BUG_ON(obj_size > PAGE_SIZE);
+               BUG_ON(obj_size > GENRADIX_NODE_SIZE);
 
        if (!is_power_of_2(obj_size)) {
-               size_t objs_per_page = PAGE_SIZE / obj_size;
+               size_t objs_per_page = GENRADIX_NODE_SIZE / obj_size;
 
-               return (idx / objs_per_page) * PAGE_SIZE +
+               return (idx / objs_per_page) * GENRADIX_NODE_SIZE +
                        (idx % objs_per_page) * obj_size;
        } else {
                return idx * obj_size;
@@ -118,9 +121,9 @@ static inline size_t __idx_to_offset(size_t idx, size_t obj_size)
 #define __genradix_cast(_radix)                (typeof((_radix)->type[0]) *)
 #define __genradix_obj_size(_radix)    sizeof((_radix)->type[0])
 #define __genradix_objs_per_page(_radix)                       \
-       (PAGE_SIZE / sizeof((_radix)->type[0]))
+       (GENRADIX_NODE_SIZE / sizeof((_radix)->type[0]))
 #define __genradix_page_remainder(_radix)                      \
-       (PAGE_SIZE % sizeof((_radix)->type[0]))
+       (GENRADIX_NODE_SIZE % sizeof((_radix)->type[0]))
 
 #define __genradix_idx_to_offset(_radix, _idx)                 \
        __idx_to_offset(_idx, __genradix_obj_size(_radix))
@@ -217,8 +220,8 @@ static inline void __genradix_iter_advance(struct genradix_iter *iter,
        iter->offset += obj_size;
 
        if (!is_power_of_2(obj_size) &&
-           (iter->offset & (PAGE_SIZE - 1)) + obj_size > PAGE_SIZE)
-               iter->offset = round_up(iter->offset, PAGE_SIZE);
+           (iter->offset & (GENRADIX_NODE_SIZE - 1)) + obj_size > GENRADIX_NODE_SIZE)
+               iter->offset = round_up(iter->offset, GENRADIX_NODE_SIZE);
 
        iter->pos++;
 }
@@ -235,8 +238,8 @@ static inline void __genradix_iter_rewind(struct genradix_iter *iter,
                return;
        }
 
-       if ((iter->offset & (PAGE_SIZE - 1)) == 0)
-               iter->offset -= PAGE_SIZE % obj_size;
+       if ((iter->offset & (GENRADIX_NODE_SIZE - 1)) == 0)
+               iter->offset -= GENRADIX_NODE_SIZE % obj_size;
 
        iter->offset -= obj_size;
        iter->pos--;
@@ -263,7 +266,7 @@ static inline void __genradix_iter_rewind(struct genradix_iter *iter,
        genradix_for_each_from(_radix, _iter, _p, 0)
 
 #define genradix_last_pos(_radix)                              \
-       (SIZE_MAX / PAGE_SIZE * __genradix_objs_per_page(_radix) - 1)
+       (SIZE_MAX / GENRADIX_NODE_SIZE * __genradix_objs_per_page(_radix) - 1)
 
 /**
  * genradix_for_each_reverse - iterate over entry in a genradix, reverse order
index 937c2a9b6e545ef567207fb52102cbb79adcdcb3..c775ea3c60155820cecb92d4641490877bcb39bc 100644 (file)
@@ -342,7 +342,7 @@ void drain_all_pages(struct zone *zone);
 void drain_local_pages(struct zone *zone);
 
 void page_alloc_init_late(void);
-void setup_pcp_cacheinfo(void);
+void setup_pcp_cacheinfo(unsigned int cpu);
 
 /*
  * gfp_allowed_mask is set to GFP_BOOT_MASK during early boot to restrict what
index 1b6053da8754e27d74032fd205997d91f05fc735..868c8fb1bbc1c2dabd708bc2c6485c2e42dee8fe 100644 (file)
@@ -21,44 +21,78 @@ typedef unsigned int __bitwise gfp_t;
  * include/trace/events/mmflags.h and tools/perf/builtin-kmem.c
  */
 
+enum {
+       ___GFP_DMA_BIT,
+       ___GFP_HIGHMEM_BIT,
+       ___GFP_DMA32_BIT,
+       ___GFP_MOVABLE_BIT,
+       ___GFP_RECLAIMABLE_BIT,
+       ___GFP_HIGH_BIT,
+       ___GFP_IO_BIT,
+       ___GFP_FS_BIT,
+       ___GFP_ZERO_BIT,
+       ___GFP_UNUSED_BIT,      /* 0x200u unused */
+       ___GFP_DIRECT_RECLAIM_BIT,
+       ___GFP_KSWAPD_RECLAIM_BIT,
+       ___GFP_WRITE_BIT,
+       ___GFP_NOWARN_BIT,
+       ___GFP_RETRY_MAYFAIL_BIT,
+       ___GFP_NOFAIL_BIT,
+       ___GFP_NORETRY_BIT,
+       ___GFP_MEMALLOC_BIT,
+       ___GFP_COMP_BIT,
+       ___GFP_NOMEMALLOC_BIT,
+       ___GFP_HARDWALL_BIT,
+       ___GFP_THISNODE_BIT,
+       ___GFP_ACCOUNT_BIT,
+       ___GFP_ZEROTAGS_BIT,
+#ifdef CONFIG_KASAN_HW_TAGS
+       ___GFP_SKIP_ZERO_BIT,
+       ___GFP_SKIP_KASAN_BIT,
+#endif
+#ifdef CONFIG_LOCKDEP
+       ___GFP_NOLOCKDEP_BIT,
+#endif
+       ___GFP_LAST_BIT
+};
+
 /* Plain integer GFP bitmasks. Do not use this directly. */
-#define ___GFP_DMA             0x01u
-#define ___GFP_HIGHMEM         0x02u
-#define ___GFP_DMA32           0x04u
-#define ___GFP_MOVABLE         0x08u
-#define ___GFP_RECLAIMABLE     0x10u
-#define ___GFP_HIGH            0x20u
-#define ___GFP_IO              0x40u
-#define ___GFP_FS              0x80u
-#define ___GFP_ZERO            0x100u
+#define ___GFP_DMA             BIT(___GFP_DMA_BIT)
+#define ___GFP_HIGHMEM         BIT(___GFP_HIGHMEM_BIT)
+#define ___GFP_DMA32           BIT(___GFP_DMA32_BIT)
+#define ___GFP_MOVABLE         BIT(___GFP_MOVABLE_BIT)
+#define ___GFP_RECLAIMABLE     BIT(___GFP_RECLAIMABLE_BIT)
+#define ___GFP_HIGH            BIT(___GFP_HIGH_BIT)
+#define ___GFP_IO              BIT(___GFP_IO_BIT)
+#define ___GFP_FS              BIT(___GFP_FS_BIT)
+#define ___GFP_ZERO            BIT(___GFP_ZERO_BIT)
 /* 0x200u unused */
-#define ___GFP_DIRECT_RECLAIM  0x400u
-#define ___GFP_KSWAPD_RECLAIM  0x800u
-#define ___GFP_WRITE           0x1000u
-#define ___GFP_NOWARN          0x2000u
-#define ___GFP_RETRY_MAYFAIL   0x4000u
-#define ___GFP_NOFAIL          0x8000u
-#define ___GFP_NORETRY         0x10000u
-#define ___GFP_MEMALLOC                0x20000u
-#define ___GFP_COMP            0x40000u
-#define ___GFP_NOMEMALLOC      0x80000u
-#define ___GFP_HARDWALL                0x100000u
-#define ___GFP_THISNODE                0x200000u
-#define ___GFP_ACCOUNT         0x400000u
-#define ___GFP_ZEROTAGS                0x800000u
+#define ___GFP_DIRECT_RECLAIM  BIT(___GFP_DIRECT_RECLAIM_BIT)
+#define ___GFP_KSWAPD_RECLAIM  BIT(___GFP_KSWAPD_RECLAIM_BIT)
+#define ___GFP_WRITE           BIT(___GFP_WRITE_BIT)
+#define ___GFP_NOWARN          BIT(___GFP_NOWARN_BIT)
+#define ___GFP_RETRY_MAYFAIL   BIT(___GFP_RETRY_MAYFAIL_BIT)
+#define ___GFP_NOFAIL          BIT(___GFP_NOFAIL_BIT)
+#define ___GFP_NORETRY         BIT(___GFP_NORETRY_BIT)
+#define ___GFP_MEMALLOC                BIT(___GFP_MEMALLOC_BIT)
+#define ___GFP_COMP            BIT(___GFP_COMP_BIT)
+#define ___GFP_NOMEMALLOC      BIT(___GFP_NOMEMALLOC_BIT)
+#define ___GFP_HARDWALL                BIT(___GFP_HARDWALL_BIT)
+#define ___GFP_THISNODE                BIT(___GFP_THISNODE_BIT)
+#define ___GFP_ACCOUNT         BIT(___GFP_ACCOUNT_BIT)
+#define ___GFP_ZEROTAGS                BIT(___GFP_ZEROTAGS_BIT)
 #ifdef CONFIG_KASAN_HW_TAGS
-#define ___GFP_SKIP_ZERO       0x1000000u
-#define ___GFP_SKIP_KASAN      0x2000000u
+#define ___GFP_SKIP_ZERO       BIT(___GFP_SKIP_ZERO_BIT)
+#define ___GFP_SKIP_KASAN      BIT(___GFP_SKIP_KASAN_BIT)
 #else
 #define ___GFP_SKIP_ZERO       0
 #define ___GFP_SKIP_KASAN      0
 #endif
 #ifdef CONFIG_LOCKDEP
-#define ___GFP_NOLOCKDEP       0x4000000u
+#define ___GFP_NOLOCKDEP       BIT(___GFP_NOLOCKDEP_BIT)
 #else
 #define ___GFP_NOLOCKDEP       0
 #endif
-/* If the above are modified, __GFP_BITS_SHIFT may need updating */
 
 /*
  * Physical address zone modifiers (see linux/mmzone.h - low four bits)
@@ -249,7 +283,7 @@ typedef unsigned int __bitwise gfp_t;
 #define __GFP_NOLOCKDEP ((__force gfp_t)___GFP_NOLOCKDEP)
 
 /* Room for N __GFP_FOO bits */
-#define __GFP_BITS_SHIFT (26 + IS_ENABLED(CONFIG_LOCKDEP))
+#define __GFP_BITS_SHIFT ___GFP_LAST_BIT
 #define __GFP_BITS_MASK ((__force gfp_t)((1 << __GFP_BITS_SHIFT) - 1))
 
 /**
index 451c1dff0e873c14951c455d3871bb74b3b7f3d1..00341b56d2910d6801ca65ffccb6830b507056bf 100644 (file)
@@ -439,6 +439,13 @@ static inline void memzero_page(struct page *page, size_t offset, size_t len)
        kunmap_local(addr);
 }
 
+/**
+ * memcpy_from_folio - Copy a range of bytes from a folio.
+ * @to: The memory to copy to.
+ * @folio: The folio to read from.
+ * @offset: The first byte in the folio to read.
+ * @len: The number of bytes to copy.
+ */
 static inline void memcpy_from_folio(char *to, struct folio *folio,
                size_t offset, size_t len)
 {
@@ -460,6 +467,13 @@ static inline void memcpy_from_folio(char *to, struct folio *folio,
        } while (len > 0);
 }
 
+/**
+ * memcpy_to_folio - Copy a range of bytes to a folio.
+ * @folio: The folio to write to.
+ * @offset: The first byte in the folio to store to.
+ * @from: The memory to copy from.
+ * @len: The number of bytes to copy.
+ */
 static inline void memcpy_to_folio(struct folio *folio, size_t offset,
                const char *from, size_t len)
 {
index 5f4c74facf6a3dfbe40be9308d05dadb8bc0186c..9d7754ad5e9b080b5fa2f818a552558ed528e1ee 100644 (file)
@@ -43,6 +43,7 @@
 #define QM_MB_CMD_CQC_BT                0x5
 #define QM_MB_CMD_SQC_VFT_V2            0x6
 #define QM_MB_CMD_STOP_QP               0x8
+#define QM_MB_CMD_FLUSH_QM             0x9
 #define QM_MB_CMD_SRC                   0xc
 #define QM_MB_CMD_DST                   0xd
 
@@ -151,6 +152,7 @@ enum qm_cap_bits {
        QM_SUPPORT_DB_ISOLATION = 0x0,
        QM_SUPPORT_FUNC_QOS,
        QM_SUPPORT_STOP_QP,
+       QM_SUPPORT_STOP_FUNC,
        QM_SUPPORT_MB_COMMAND,
        QM_SUPPORT_SVA_PREFETCH,
        QM_SUPPORT_RPM,
@@ -161,6 +163,11 @@ struct qm_dev_alg {
        const char *alg;
 };
 
+struct qm_dev_dfx {
+       u32 dev_state;
+       u32 dev_timeout;
+};
+
 struct dfx_diff_registers {
        u32 *regs;
        u32 reg_offset;
@@ -189,6 +196,7 @@ struct qm_debug {
        struct dentry *debug_root;
        struct dentry *qm_d;
        struct debugfs_file files[DEBUG_FILE_NUM];
+       struct qm_dev_dfx dev_dfx;
        unsigned int *qm_last_words;
        /* ACC engines recoreding last regs */
        unsigned int *last_words;
@@ -523,7 +531,7 @@ void hisi_qm_uninit(struct hisi_qm *qm);
 int hisi_qm_start(struct hisi_qm *qm);
 int hisi_qm_stop(struct hisi_qm *qm, enum qm_stop_reason r);
 int hisi_qm_start_qp(struct hisi_qp *qp, unsigned long arg);
-int hisi_qm_stop_qp(struct hisi_qp *qp);
+void hisi_qm_stop_qp(struct hisi_qp *qp);
 int hisi_qp_send(struct hisi_qp *qp, const void *msg);
 void hisi_qm_debug_init(struct hisi_qm *qm);
 void hisi_qm_debug_regs_clear(struct hisi_qm *qm);
index 5adb86af35fc4e6c8a24f5305d3d9bece035f52c..de0c89105076907bf906ae855e17b6f4ff8e3858 100644 (file)
@@ -265,10 +265,11 @@ unsigned long thp_get_unmapped_area(struct file *filp, unsigned long addr,
 
 void folio_prep_large_rmappable(struct folio *folio);
 bool can_split_folio(struct folio *folio, int *pextra_pins);
-int split_huge_page_to_list(struct page *page, struct list_head *list);
+int split_huge_page_to_list_to_order(struct page *page, struct list_head *list,
+               unsigned int new_order);
 static inline int split_huge_page(struct page *page)
 {
-       return split_huge_page_to_list(page, NULL);
+       return split_huge_page_to_list_to_order(page, NULL, 0);
 }
 void deferred_split_folio(struct folio *folio);
 
@@ -422,7 +423,8 @@ can_split_folio(struct folio *folio, int *pextra_pins)
        return false;
 }
 static inline int
-split_huge_page_to_list(struct page *page, struct list_head *list)
+split_huge_page_to_list_to_order(struct page *page, struct list_head *list,
+               unsigned int new_order)
 {
        return 0;
 }
@@ -519,17 +521,20 @@ static inline bool thp_migration_supported(void)
 }
 #endif /* CONFIG_TRANSPARENT_HUGEPAGE */
 
-static inline int split_folio_to_list(struct folio *folio,
-               struct list_head *list)
+static inline int split_folio_to_list_to_order(struct folio *folio,
+               struct list_head *list, int new_order)
 {
-       return split_huge_page_to_list(&folio->page, list);
+       return split_huge_page_to_list_to_order(&folio->page, list, new_order);
 }
 
-static inline int split_folio(struct folio *folio)
+static inline int split_folio_to_order(struct folio *folio, int new_order)
 {
-       return split_folio_to_list(folio, NULL);
+       return split_folio_to_list_to_order(folio, NULL, new_order);
 }
 
+#define split_folio_to_list(f, l) split_folio_to_list_to_order(f, l, 0)
+#define split_folio(f) split_folio_to_order(f, 0)
+
 /*
  * archs that select ARCH_WANTS_THP_SWAP but don't support THP_SWP due to
  * limitations in the implementation like arm64 MTE can override this to
index c1ee640d87b11d6d9f89b573cef98270306f55a5..77b30a8c6076b6cf536baf490c613be9a70d8c61 100644 (file)
@@ -178,7 +178,7 @@ pte_t *huge_pmd_share(struct mm_struct *mm, struct vm_area_struct *vma,
 struct address_space *hugetlb_page_mapping_lock_write(struct page *hpage);
 
 extern int sysctl_hugetlb_shm_group;
-extern struct list_head huge_boot_pages;
+extern struct list_head huge_boot_pages[MAX_NUMNODES];
 
 /* arch callbacks */
 
index 652ecb7abedae4b5bc3c451410139168702a63f9..5e6cd43a6dbdd9fcd29e30d59492c736a149e25a 100644 (file)
@@ -24,8 +24,8 @@
 #include <uapi/linux/i2c.h>
 
 extern const struct bus_type i2c_bus_type;
-extern struct device_type i2c_adapter_type;
-extern struct device_type i2c_client_type;
+extern const struct device_type i2c_adapter_type;
+extern const struct device_type i2c_client_type;
 
 /* --- General options ------------------------------------------------        */
 
@@ -931,7 +931,7 @@ static inline int i2c_adapter_id(struct i2c_adapter *adap)
 
 static inline u8 i2c_8bit_addr_from_msg(const struct i2c_msg *msg)
 {
-       return (msg->addr << 1) | (msg->flags & I2C_M_RD ? 1 : 0);
+       return (msg->addr << 1) | (msg->flags & I2C_M_RD);
 }
 
 u8 *i2c_get_dma_safe_msg_buf(struct i2c_msg *msg, unsigned int threshold);
index de6503c0edb8efcf86e0565a52b81e7a4e378014..c22ac465254bae06b148824651383e2cefb869f3 100644 (file)
@@ -514,7 +514,7 @@ void input_enable_softrepeat(struct input_dev *dev, int delay, int period);
 
 bool input_device_enabled(struct input_dev *dev);
 
-extern struct class input_class;
+extern const struct class input_class;
 
 /**
  * struct ff_device - force-feedback part of an input device
diff --git a/include/linux/input/navpoint.h b/include/linux/input/navpoint.h
deleted file mode 100644 (file)
index 5192ae3..0000000
+++ /dev/null
@@ -1,8 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0-only */
-/*
- *  Copyright (C) 2012 Paul Parsons <lost.distance@yahoo.com>
- */
-
-struct navpoint_platform_data {
-       int             port;           /* PXA SSP port for pxa_ssp_request() */
-};
index 400cb6c02176e0b8d990d675c54030f5171299d9..060835bb82d52f0fff68b9bf7a8fd50f27ae5d25 100644 (file)
@@ -15,7 +15,8 @@
 
 #if !defined(__ASSEMBLY__)
 
-#include <linux/crash_core.h>
+#include <linux/vmcore_info.h>
+#include <linux/crash_reserve.h>
 #include <asm/io.h>
 #include <linux/range.h>
 
@@ -31,6 +32,7 @@ extern note_buf_t __percpu *crash_notes;
 #include <linux/module.h>
 #include <linux/highmem.h>
 #include <asm/kexec.h>
+#include <linux/crash_core.h>
 
 /* Verify architecture specific macros are defined */
 
@@ -378,13 +380,6 @@ extern struct page *kimage_alloc_control_pages(struct kimage *image,
 static inline int machine_kexec_post_load(struct kimage *image) { return 0; }
 #endif
 
-extern void __crash_kexec(struct pt_regs *);
-extern void crash_kexec(struct pt_regs *);
-int kexec_should_crash(struct task_struct *);
-int kexec_crash_loaded(void);
-void crash_save_cpu(struct pt_regs *regs, int cpu);
-extern int kimage_crash_copy_vmcoreinfo(struct kimage *image);
-
 extern struct kimage *kexec_image;
 extern struct kimage *kexec_crash_image;
 
@@ -408,24 +403,6 @@ bool kexec_load_permitted(int kexec_image_type);
 /* flag to track if kexec reboot is in progress */
 extern bool kexec_in_progress;
 
-int crash_shrink_memory(unsigned long new_size);
-ssize_t crash_get_memory_size(void);
-
-#ifndef arch_kexec_protect_crashkres
-/*
- * Protection mechanism for crashkernel reserved memory after
- * the kdump kernel is loaded.
- *
- * Provide an empty default implementation here -- architecture
- * code may override this
- */
-static inline void arch_kexec_protect_crashkres(void) { }
-#endif
-
-#ifndef arch_kexec_unprotect_crashkres
-static inline void arch_kexec_unprotect_crashkres(void) { }
-#endif
-
 #ifndef page_to_boot_pfn
 static inline unsigned long page_to_boot_pfn(struct page *page)
 {
@@ -482,24 +459,6 @@ static inline int arch_kexec_post_alloc_pages(void *vaddr, unsigned int pages, g
 static inline void arch_kexec_pre_free_pages(void *vaddr, unsigned int pages) { }
 #endif
 
-#ifndef arch_crash_handle_hotplug_event
-static inline void arch_crash_handle_hotplug_event(struct kimage *image) { }
-#endif
-
-int crash_check_update_elfcorehdr(void);
-
-#ifndef crash_hotplug_cpu_support
-static inline int crash_hotplug_cpu_support(void) { return 0; }
-#endif
-
-#ifndef crash_hotplug_memory_support
-static inline int crash_hotplug_memory_support(void) { return 0; }
-#endif
-
-#ifndef crash_get_elfcorehdr_size
-static inline unsigned int crash_get_elfcorehdr_size(void) { return 0; }
-#endif
-
 extern bool kexec_file_dbg_print;
 
 #define kexec_dprintk(fmt, ...)                                        \
index 179df96b20f88d065d0c9be4d4ef643b71a801c6..48f31dcd318a08cd75bddeed9de265501c837c7b 100644 (file)
@@ -148,6 +148,11 @@ static inline bool kvm_is_error_hva(unsigned long addr)
 
 #endif
 
+static inline bool kvm_is_error_gpa(gpa_t gpa)
+{
+       return gpa == INVALID_GPA;
+}
+
 #define KVM_ERR_PTR_BAD_PAGE   (ERR_PTR(-ENOENT))
 
 static inline bool is_error_page(struct page *page)
@@ -238,7 +243,6 @@ struct kvm_async_pf {
        struct list_head link;
        struct list_head queue;
        struct kvm_vcpu *vcpu;
-       struct mm_struct *mm;
        gpa_t cr2_or_gpa;
        unsigned long addr;
        struct kvm_arch_async_pf arch;
@@ -1319,21 +1323,12 @@ void kvm_vcpu_mark_page_dirty(struct kvm_vcpu *vcpu, gfn_t gfn);
  *
  * @gpc:          struct gfn_to_pfn_cache object.
  * @kvm:          pointer to kvm instance.
- * @vcpu:         vCPU to be used for marking pages dirty and to be woken on
- *                invalidation.
- * @usage:        indicates if the resulting host physical PFN is used while
- *                the @vcpu is IN_GUEST_MODE (in which case invalidation of 
- *                the cache from MMU notifiers---but not for KVM memslot
- *                changes!---will also force @vcpu to exit the guest and
- *                refresh the cache); and/or if the PFN used directly
- *                by KVM (and thus needs a kernel virtual mapping).
  *
  * This sets up a gfn_to_pfn_cache by initializing locks and assigning the
  * immutable attributes.  Note, the cache must be zero-allocated (or zeroed by
  * the caller before init).
  */
-void kvm_gpc_init(struct gfn_to_pfn_cache *gpc, struct kvm *kvm,
-                 struct kvm_vcpu *vcpu, enum pfn_cache_usage usage);
+void kvm_gpc_init(struct gfn_to_pfn_cache *gpc, struct kvm *kvm);
 
 /**
  * kvm_gpc_activate - prepare a cached kernel mapping and HPA for a given guest
@@ -1353,6 +1348,22 @@ void kvm_gpc_init(struct gfn_to_pfn_cache *gpc, struct kvm *kvm,
  */
 int kvm_gpc_activate(struct gfn_to_pfn_cache *gpc, gpa_t gpa, unsigned long len);
 
+/**
+ * kvm_gpc_activate_hva - prepare a cached kernel mapping and HPA for a given HVA.
+ *
+ * @gpc:          struct gfn_to_pfn_cache object.
+ * @hva:          userspace virtual address to map.
+ * @len:          sanity check; the range being access must fit a single page.
+ *
+ * @return:       0 for success.
+ *                -EINVAL for a mapping which would cross a page boundary.
+ *                -EFAULT for an untranslatable guest physical address.
+ *
+ * The semantics of this function are the same as those of kvm_gpc_activate(). It
+ * merely bypasses a layer of address translation.
+ */
+int kvm_gpc_activate_hva(struct gfn_to_pfn_cache *gpc, unsigned long hva, unsigned long len);
+
 /**
  * kvm_gpc_check - check validity of a gfn_to_pfn_cache.
  *
@@ -1399,6 +1410,16 @@ int kvm_gpc_refresh(struct gfn_to_pfn_cache *gpc, unsigned long len);
  */
 void kvm_gpc_deactivate(struct gfn_to_pfn_cache *gpc);
 
+static inline bool kvm_gpc_is_gpa_active(struct gfn_to_pfn_cache *gpc)
+{
+       return gpc->active && !kvm_is_error_gpa(gpc->gpa);
+}
+
+static inline bool kvm_gpc_is_hva_active(struct gfn_to_pfn_cache *gpc)
+{
+       return gpc->active && kvm_is_error_gpa(gpc->gpa);
+}
+
 void kvm_sigset_activate(struct kvm_vcpu *vcpu);
 void kvm_sigset_deactivate(struct kvm_vcpu *vcpu);
 
@@ -1505,9 +1526,10 @@ bool kvm_arch_vcpu_in_kernel(struct kvm_vcpu *vcpu);
 int kvm_arch_vcpu_should_kick(struct kvm_vcpu *vcpu);
 bool kvm_arch_dy_runnable(struct kvm_vcpu *vcpu);
 bool kvm_arch_dy_has_pending_interrupt(struct kvm_vcpu *vcpu);
+bool kvm_arch_vcpu_preempted_in_kernel(struct kvm_vcpu *vcpu);
 int kvm_arch_post_init_vm(struct kvm *kvm);
 void kvm_arch_pre_destroy_vm(struct kvm *kvm);
-int kvm_arch_create_vm_debugfs(struct kvm *kvm);
+void kvm_arch_create_vm_debugfs(struct kvm *kvm);
 
 #ifndef __KVM_HAVE_ARCH_VM_ALLOC
 /*
@@ -1788,11 +1810,21 @@ static inline hpa_t pfn_to_hpa(kvm_pfn_t pfn)
        return (hpa_t)pfn << PAGE_SHIFT;
 }
 
-static inline bool kvm_is_error_gpa(struct kvm *kvm, gpa_t gpa)
+static inline bool kvm_is_gpa_in_memslot(struct kvm *kvm, gpa_t gpa)
 {
        unsigned long hva = gfn_to_hva(kvm, gpa_to_gfn(gpa));
 
-       return kvm_is_error_hva(hva);
+       return !kvm_is_error_hva(hva);
+}
+
+static inline void kvm_gpc_mark_dirty_in_slot(struct gfn_to_pfn_cache *gpc)
+{
+       lockdep_assert_held(&gpc->lock);
+
+       if (!gpc->memslot)
+               return;
+
+       mark_page_dirty_in_slot(gpc->kvm, gpc->memslot, gpa_to_gfn(gpc->gpa));
 }
 
 enum kvm_stat_kind {
index 9d1f7835d8c13917ad171297752e072c04bec1b3..d93f6522b2c34c2e5e33b80266f43cb2f7775bde 100644 (file)
@@ -49,12 +49,6 @@ typedef u64            hfn_t;
 
 typedef hfn_t kvm_pfn_t;
 
-enum pfn_cache_usage {
-       KVM_GUEST_USES_PFN = BIT(0),
-       KVM_HOST_USES_PFN  = BIT(1),
-       KVM_GUEST_AND_HOST_USE_PFN = KVM_GUEST_USES_PFN | KVM_HOST_USES_PFN,
-};
-
 struct gfn_to_hva_cache {
        u64 generation;
        gpa_t gpa;
@@ -69,13 +63,11 @@ struct gfn_to_pfn_cache {
        unsigned long uhva;
        struct kvm_memory_slot *memslot;
        struct kvm *kvm;
-       struct kvm_vcpu *vcpu;
        struct list_head list;
        rwlock_t lock;
        struct mutex refresh_lock;
        void *khva;
        kvm_pfn_t pfn;
-       enum pfn_cache_usage usage;
        bool active;
        bool valid;
 };
index 059aa1fff41e9c36ea8e3aee1b03913e6762132a..5f4b0a39cf46a3784a22e0319aa213551d7f4b2c 100644 (file)
@@ -766,7 +766,7 @@ static inline size_t list_count_nodes(struct list_head *head)
  * @member:    the name of the list_head within the struct.
  */
 #define list_entry_is_head(pos, head, member)                          \
-       (&pos->member == (head))
+       list_is_head(&pos->member, (head))
 
 /**
  * list_for_each_entry -       iterate over list of given type
@@ -1195,4 +1195,19 @@ static inline void hlist_splice_init(struct hlist_head *from,
             pos && ({ n = pos->member.next; 1; });                     \
             pos = hlist_entry_safe(n, typeof(*pos), member))
 
+/**
+ * hlist_count_nodes - count nodes in the hlist
+ * @head:      the head for your hlist.
+ */
+static inline size_t hlist_count_nodes(struct hlist_head *head)
+{
+       struct hlist_node *pos;
+       size_t count = 0;
+
+       hlist_for_each(pos, head)
+               count++;
+
+       return count;
+}
+
 #endif
index 7675a48a0701085ba4491070e614c9cc9ef23cdf..792b67ceb631b5aa992d8ac9cc2758440f03457d 100644 (file)
@@ -24,6 +24,8 @@ enum lru_status {
        LRU_SKIP,               /* item cannot be locked, skip */
        LRU_RETRY,              /* item not freeable. May drop the lock
                                   internally, but has to return locked. */
+       LRU_STOP,               /* stop lru list walking. May drop the lock
+                                  internally, but has to return locked. */
 };
 
 struct list_lru_one {
@@ -62,8 +64,6 @@ int __list_lru_init(struct list_lru *lru, bool memcg_aware,
 
 #define list_lru_init(lru)                             \
        __list_lru_init((lru), false, NULL, NULL)
-#define list_lru_init_key(lru, key)                    \
-       __list_lru_init((lru), false, (key), NULL)
 #define list_lru_init_memcg(lru, shrinker)             \
        __list_lru_init((lru), true, NULL, shrinker)
 
@@ -170,22 +170,6 @@ static inline unsigned long list_lru_count(struct list_lru *lru)
 void list_lru_isolate(struct list_lru_one *list, struct list_head *item);
 void list_lru_isolate_move(struct list_lru_one *list, struct list_head *item,
                           struct list_head *head);
-/**
- * list_lru_putback: undo list_lru_isolate
- * @lru: the lru pointer.
- * @item: the item to put back.
- * @nid: the node id of the sublist to put the item back to.
- * @memcg: the cgroup of the sublist to put the item back to.
- *
- * Put back an isolated item into its original LRU. Note that unlike
- * list_lru_add, this does not increment the node LRU count (as
- * list_lru_isolate does not originally decrement this count).
- *
- * Since we might have dropped the LRU lock in between, recompute list_lru_one
- * from the node's id and memcg.
- */
-void list_lru_putback(struct list_lru *lru, struct list_head *item, int nid,
-                     struct mem_cgroup *memcg);
 
 typedef enum lru_status (*list_lru_walk_cb)(struct list_head *item,
                struct list_lru_one *list, spinlock_t *lock, void *cb_arg);
index 20ff87f8e001d2604bfa61b33edb8452c30e4135..394fd0a887ae75eae7c31cf948eafa1bb12a38fe 100644 (file)
@@ -14,6 +14,7 @@
 #include <linux/vm_event_item.h>
 #include <linux/hardirq.h>
 #include <linux/jump_label.h>
+#include <linux/kernel.h>
 #include <linux/page_counter.h>
 #include <linux/vmpressure.h>
 #include <linux/eventfd.h>
@@ -712,18 +713,16 @@ static inline void mem_cgroup_uncharge(struct folio *folio)
        __mem_cgroup_uncharge(folio);
 }
 
-void __mem_cgroup_uncharge_list(struct list_head *page_list);
-static inline void mem_cgroup_uncharge_list(struct list_head *page_list)
+void __mem_cgroup_uncharge_folios(struct folio_batch *folios);
+static inline void mem_cgroup_uncharge_folios(struct folio_batch *folios)
 {
        if (mem_cgroup_disabled())
                return;
-       __mem_cgroup_uncharge_list(page_list);
+       __mem_cgroup_uncharge_folios(folios);
 }
 
 void mem_cgroup_cancel_charge(struct mem_cgroup *memcg, unsigned int nr_pages);
-
 void mem_cgroup_replace_folio(struct folio *old, struct folio *new);
-
 void mem_cgroup_migrate(struct folio *old, struct folio *new);
 
 /**
@@ -1162,7 +1161,7 @@ static inline void memcg_memory_event_mm(struct mm_struct *mm,
        rcu_read_unlock();
 }
 
-void split_page_memcg(struct page *head, unsigned int nr);
+void split_page_memcg(struct page *head, int old_order, int new_order);
 
 unsigned long mem_cgroup_soft_limit_reclaim(pg_data_t *pgdat, int order,
                                                gfp_t gfp_mask,
@@ -1294,7 +1293,7 @@ static inline void mem_cgroup_uncharge(struct folio *folio)
 {
 }
 
-static inline void mem_cgroup_uncharge_list(struct list_head *page_list)
+static inline void mem_cgroup_uncharge_folios(struct folio_batch *folios)
 {
 }
 
@@ -1620,7 +1619,7 @@ void count_memcg_event_mm(struct mm_struct *mm, enum vm_event_item idx)
 {
 }
 
-static inline void split_page_memcg(struct page *head, unsigned int nr)
+static inline void split_page_memcg(struct page *head, int old_order, int new_order)
 {
 }
 
@@ -1694,18 +1693,18 @@ static inline struct lruvec *folio_lruvec_relock_irq(struct folio *folio,
        return folio_lruvec_lock_irq(folio);
 }
 
-/* Don't lock again iff page's lruvec locked */
-static inline struct lruvec *folio_lruvec_relock_irqsave(struct folio *folio,
-               struct lruvec *locked_lruvec, unsigned long *flags)
+/* Don't lock again iff folio's lruvec locked */
+static inline void folio_lruvec_relock_irqsave(struct folio *folio,
+               struct lruvec **lruvecp, unsigned long *flags)
 {
-       if (locked_lruvec) {
-               if (folio_matches_lruvec(folio, locked_lruvec))
-                       return locked_lruvec;
+       if (*lruvecp) {
+               if (folio_matches_lruvec(folio, *lruvecp))
+                       return;
 
-               unlock_page_lruvec_irqrestore(locked_lruvec, *flags);
+               unlock_page_lruvec_irqrestore(*lruvecp, *flags);
        }
 
-       return folio_lruvec_lock_irqsave(folio, flags);
+       *lruvecp = folio_lruvec_lock_irqsave(folio, flags);
 }
 
 #ifdef CONFIG_CGROUP_WRITEBACK
index f53cfdaaaa4166a453a1dd8e8ddd8317a7aa66d4..c0afee5d126ef65d420770e1f8669842c499c8de 100644 (file)
@@ -96,8 +96,17 @@ int set_memory_block_size_order(unsigned int order);
 #define        MEM_GOING_ONLINE        (1<<3)
 #define        MEM_CANCEL_ONLINE       (1<<4)
 #define        MEM_CANCEL_OFFLINE      (1<<5)
+#define        MEM_PREPARE_ONLINE      (1<<6)
+#define        MEM_FINISH_OFFLINE      (1<<7)
 
 struct memory_notify {
+       /*
+        * The altmap_start_pfn and altmap_nr_pages fields are designated for
+        * specifying the altmap range and are exclusively intended for use in
+        * MEM_PREPARE_ONLINE/MEM_FINISH_OFFLINE notifiers.
+        */
+       unsigned long altmap_start_pfn;
+       unsigned long altmap_nr_pages;
        unsigned long start_pfn;
        unsigned long nr_pages;
        int status_change_nid_normal;
@@ -114,6 +123,7 @@ struct mem_section;
 #define DEFAULT_CALLBACK_PRI   0
 #define SLAB_CALLBACK_PRI      1
 #define HMAT_CALLBACK_PRI      2
+#define CXL_CALLBACK_PRI       5
 #define MM_COMPUTE_BATCH_PRI   10
 #define CPUSET_CALLBACK_PRI    10
 #define MEMTIER_HOTPLUG_PRI    100
index 7d207658349416b0304722dfedf642f506d4c356..7a9ff464608d702fa6e8c1f173e627f07d437bac 100644 (file)
@@ -106,6 +106,22 @@ typedef int __bitwise mhp_t;
  * implies the node id (nid).
  */
 #define MHP_NID_IS_MGID                ((__force mhp_t)BIT(2))
+/*
+ * The hotplugged memory is completely inaccessible while the memory is
+ * offline. The memory provider will handle MEM_PREPARE_ONLINE /
+ * MEM_FINISH_OFFLINE notifications and make the memory accessible.
+ *
+ * This flag is only relevant when used along with MHP_MEMMAP_ON_MEMORY,
+ * because the altmap cannot be written (e.g., poisoned) when adding
+ * memory -- before it is set online.
+ *
+ * This allows for adding memory with an altmap that is not currently
+ * made available by a hypervisor. When onlining that memory, the
+ * hypervisor can be instructed to make that memory available, and
+ * the onlining phase will not require any memory allocations, which is
+ * helpful in low-memory situations.
+ */
+#define MHP_OFFLINE_INACCESSIBLE       ((__force mhp_t)BIT(3))
 
 /*
  * Extended parameters for memory hotplug:
@@ -121,6 +137,7 @@ struct mhp_params {
 
 bool mhp_range_allowed(u64 start, u64 size, bool need_mapping);
 struct range mhp_get_pluggable_range(bool need_mapping);
+bool mhp_supports_memmap_on_memory(void);
 
 /*
  * Zone resizing functions
@@ -154,7 +171,7 @@ extern void adjust_present_page_count(struct page *page,
                                      long nr_pages);
 /* VM interface that may be used by firmware interface */
 extern int mhp_init_memmap_on_memory(unsigned long pfn, unsigned long nr_pages,
-                                    struct zone *zone);
+                                    struct zone *zone, bool mhp_off_inaccessible);
 extern void mhp_deinit_memmap_on_memory(unsigned long pfn, unsigned long nr_pages);
 extern int online_pages(unsigned long pfn, unsigned long nr_pages,
                        struct zone *zone, struct memory_group *group);
@@ -262,6 +279,11 @@ static inline bool movable_node_is_enabled(void)
        return false;
 }
 
+static inline bool mhp_supports_memmap_on_memory(void)
+{
+       return false;
+}
+
 static inline void pgdat_kswapd_lock(pg_data_t *pgdat) {}
 static inline void pgdat_kswapd_unlock(pg_data_t *pgdat) {}
 static inline void pgdat_kswapd_lock_init(pg_data_t *pgdat) {}
index 7be1e32e6d421d8003fb6466679e6fc7729f5c7d..16c5cc807ff6b4f855e913c56f0f58d5f7a77a3a 100644 (file)
@@ -95,6 +95,19 @@ static inline mempool_t *mempool_create_kmalloc_pool(int min_nr, size_t size)
                              (void *) size);
 }
 
+void *mempool_kvmalloc(gfp_t gfp_mask, void *pool_data);
+void mempool_kvfree(void *element, void *pool_data);
+
+static inline int mempool_init_kvmalloc_pool(mempool_t *pool, int min_nr, size_t size)
+{
+       return mempool_init(pool, min_nr, mempool_kvmalloc, mempool_kvfree, (void *) size);
+}
+
+static inline mempool_t *mempool_create_kvmalloc_pool(int min_nr, size_t size)
+{
+       return mempool_create(min_nr, mempool_kvmalloc, mempool_kvfree, (void *) size);
+}
+
 /*
  * A mempool_alloc_t and mempool_free_t for a simple page allocator that
  * allocates pages of the order specified by pool_data
index 744c830f4b132c318b2a5cf671ad2d0d59412dc7..3f7143ade32c049d55f9f2025cdfb712dda9b613 100644 (file)
@@ -25,6 +25,7 @@ struct vmem_altmap {
        unsigned long free;
        unsigned long align;
        unsigned long alloc;
+       bool inaccessible;
 };
 
 /*
@@ -108,7 +109,7 @@ struct dev_pagemap_ops {
  * @altmap: pre-allocated/reserved memory for vmemmap allocations
  * @ref: reference count that pins the devm_memremap_pages() mapping
  * @done: completion for @ref
- * @type: memory type: see MEMORY_* in memory_hotplug.h
+ * @type: memory type: see MEMORY_* above in memremap.h
  * @flags: PGMAP_* flags to specify defailed behavior
  * @vmemmap_shift: structural definition of how the vmemmap page metadata
  *      is populated, specifically the metadata page order.
index 44077837385f89090c457472fa1d43fcd1a7cb16..d52daf45861b9a51f9dc5e9e29f2700aa3146adb 100644 (file)
@@ -35,31 +35,33 @@ static __always_inline
 void min_heapify(struct min_heap *heap, int pos,
                const struct min_heap_callbacks *func)
 {
-       void *left, *right, *parent, *smallest;
+       void *left, *right;
        void *data = heap->data;
+       void *root = data + pos * func->elem_size;
+       int i = pos, j;
 
+       /* Find the sift-down path all the way to the leaves. */
        for (;;) {
-               if (pos * 2 + 1 >= heap->nr)
+               if (i * 2 + 2 >= heap->nr)
                        break;
+               left = data + (i * 2 + 1) * func->elem_size;
+               right = data + (i * 2 + 2) * func->elem_size;
+               i = func->less(left, right) ? i * 2 + 1 : i * 2 + 2;
+       }
 
-               left = data + ((pos * 2 + 1) * func->elem_size);
-               parent = data + (pos * func->elem_size);
-               smallest = parent;
-               if (func->less(left, smallest))
-                       smallest = left;
-
-               if (pos * 2 + 2 < heap->nr) {
-                       right = data + ((pos * 2 + 2) * func->elem_size);
-                       if (func->less(right, smallest))
-                               smallest = right;
-               }
-               if (smallest == parent)
-                       break;
-               func->swp(smallest, parent);
-               if (smallest == left)
-                       pos = (pos * 2) + 1;
-               else
-                       pos = (pos * 2) + 2;
+       /* Special case for the last leaf with no sibling. */
+       if (i * 2 + 2 == heap->nr)
+               i = i * 2 + 1;
+
+       /* Backtrack to the correct location. */
+       while (i != pos && func->less(root, data + i * func->elem_size))
+               i = (i - 1) / 2;
+
+       /* Shift the element into its correct place. */
+       j = i;
+       while (i != pos) {
+               i = (i - 1) / 2;
+               func->swp(data + i * func->elem_size, data + j * func->elem_size);
        }
 }
 
@@ -70,7 +72,7 @@ void min_heapify_all(struct min_heap *heap,
 {
        int i;
 
-       for (i = heap->nr / 2; i >= 0; i--)
+       for (i = heap->nr / 2 - 1; i >= 0; i--)
                min_heapify(heap, i, func);
 }
 
index 49f660563e4916a424a45a7b3084de23588ed9f9..c940b329a475fbe37ede4e5b8c9a34826df9b060 100644 (file)
@@ -12677,6 +12677,11 @@ struct mlx5_ifc_modify_page_track_obj_in_bits {
        struct mlx5_ifc_page_track_bits obj_context;
 };
 
+struct mlx5_ifc_query_page_track_obj_out_bits {
+       struct mlx5_ifc_general_obj_out_cmd_hdr_bits general_obj_out_cmd_hdr;
+       struct mlx5_ifc_page_track_bits obj_context;
+};
+
 struct mlx5_ifc_msecq_reg_bits {
        u8         reserved_at_0[0x20];
 
index f5a97dec51694894a979dd1d045a6b982622e09c..2c0910bc3e4a2cacca59a9af5b74cf8a114dab03 100644 (file)
@@ -36,6 +36,7 @@ struct anon_vma;
 struct anon_vma_chain;
 struct user_struct;
 struct pt_regs;
+struct folio_batch;
 
 extern int sysctl_page_lock_unfairness;
 
@@ -226,7 +227,6 @@ int overcommit_policy_handler(struct ctl_table *, int, void *, size_t *,
 /* test whether an address (unsigned long or pointer) is aligned to PAGE_SIZE */
 #define PAGE_ALIGNED(addr)     IS_ALIGNED((unsigned long)(addr), PAGE_SIZE)
 
-#define lru_to_page(head) (list_entry((head)->prev, struct page, lru))
 static inline struct folio *lru_to_folio(struct list_head *head)
 {
        return list_entry((head)->prev, struct folio, lru);
@@ -391,6 +391,20 @@ extern unsigned int kobjsize(const void *objp);
 # define VM_UFFD_MINOR         VM_NONE
 #endif /* CONFIG_HAVE_ARCH_USERFAULTFD_MINOR */
 
+/*
+ * This flag is used to connect VFIO to arch specific KVM code. It
+ * indicates that the memory under this VMA is safe for use with any
+ * non-cachable memory type inside KVM. Some VFIO devices, on some
+ * platforms, are thought to be unsafe and can cause machine crashes
+ * if KVM does not lock down the memory type.
+ */
+#ifdef CONFIG_64BIT
+#define VM_ALLOW_ANY_UNCACHED_BIT      39
+#define VM_ALLOW_ANY_UNCACHED          BIT(VM_ALLOW_ANY_UNCACHED_BIT)
+#else
+#define VM_ALLOW_ANY_UNCACHED          VM_NONE
+#endif
+
 /* Bits set in the VMA until the stack is in its final location */
 #define VM_STACK_INCOMPLETE_SETUP (VM_RAND_READ | VM_SEQ_READ | VM_STACK_EARLY)
 
@@ -781,6 +795,11 @@ static inline struct vm_area_struct *lock_vma_under_rcu(struct mm_struct *mm,
        return NULL;
 }
 
+static inline void vma_assert_locked(struct vm_area_struct *vma)
+{
+       mmap_assert_locked(vma->vm_mm);
+}
+
 static inline void release_fault_lock(struct vm_fault *vmf)
 {
        mmap_read_unlock(vmf->vma->vm_mm);
@@ -1178,7 +1197,7 @@ static inline int is_vmalloc_or_module_addr(const void *x)
  * How many times the entire folio is mapped as a single unit (eg by a
  * PMD or PUD entry).  This is probably not what you want, except for
  * debugging purposes - it does not include PTE-mapped sub-pages; look
- * at folio_mapcount() or page_mapcount() or total_mapcount() instead.
+ * at folio_mapcount() or page_mapcount() instead.
  */
 static inline int folio_entire_mapcount(struct folio *folio)
 {
@@ -1238,13 +1257,6 @@ static inline int folio_mapcount(struct folio *folio)
        return folio_total_mapcount(folio);
 }
 
-static inline int total_mapcount(struct page *page)
-{
-       if (likely(!PageCompound(page)))
-               return atomic_read(&page->_mapcount) + 1;
-       return folio_total_mapcount(page_folio(page));
-}
-
 static inline bool folio_large_is_mapped(struct folio *folio)
 {
        /*
@@ -1514,6 +1526,8 @@ static inline void folio_put_refs(struct folio *folio, int refs)
                __folio_put(folio);
 }
 
+void folios_put_refs(struct folio_batch *folios, unsigned int *refs);
+
 /*
  * union release_pages_arg - an array of pages or folios
  *
@@ -1536,18 +1550,19 @@ void release_pages(release_pages_arg, int nr);
 /**
  * folios_put - Decrement the reference count on an array of folios.
  * @folios: The folios.
- * @nr: How many folios there are.
  *
- * Like folio_put(), but for an array of folios.  This is more efficient
- * than writing the loop yourself as it will optimise the locks which
- * need to be taken if the folios are freed.
+ * Like folio_put(), but for a batch of folios.  This is more efficient
+ * than writing the loop yourself as it will optimise the locks which need
+ * to be taken if the folios are freed.  The folios batch is returned
+ * empty and ready to be reused for another batch; there is no need to
+ * reinitialise it.
  *
  * Context: May be called in process or interrupt context, but not in NMI
  * context.  May be called while holding a spinlock.
  */
-static inline void folios_put(struct folio **folios, unsigned int nr)
+static inline void folios_put(struct folio_batch *folios)
 {
-       release_pages(folios, nr);
+       folios_put_refs(folios, NULL);
 }
 
 static inline void put_page(struct page *page)
@@ -1640,13 +1655,11 @@ static inline int page_zone_id(struct page *page)
 }
 
 #ifdef NODE_NOT_IN_PAGE_FLAGS
-extern int page_to_nid(const struct page *page);
+int page_to_nid(const struct page *page);
 #else
 static inline int page_to_nid(const struct page *page)
 {
-       struct page *p = (struct page *)page;
-
-       return (PF_POISONED_CHECK(p)->flags >> NODES_PGSHIFT) & NODES_MASK;
+       return (PF_POISONED_CHECK(page)->flags >> NODES_PGSHIFT) & NODES_MASK;
 }
 #endif
 
@@ -2065,6 +2078,13 @@ static inline long folio_nr_pages(struct folio *folio)
 #endif
 }
 
+/* Only hugetlbfs can allocate folios larger than MAX_ORDER */
+#ifdef CONFIG_ARCH_HAS_GIGANTIC_PAGE
+#define MAX_FOLIO_NR_PAGES     (1UL << PUD_ORDER)
+#else
+#define MAX_FOLIO_NR_PAGES     MAX_ORDER_NR_PAGES
+#endif
+
 /*
  * compound_nr() returns the number of pages in this potentially compound
  * page.  compound_nr() can be called on a tail page, and is defined to
@@ -2595,19 +2615,19 @@ static inline void dec_mm_counter(struct mm_struct *mm, int member)
        mm_trace_rss_stat(mm, member);
 }
 
-/* Optimized variant when page is already known not to be PageAnon */
-static inline int mm_counter_file(struct page *page)
+/* Optimized variant when folio is already known not to be anon */
+static inline int mm_counter_file(struct folio *folio)
 {
-       if (PageSwapBacked(page))
+       if (folio_test_swapbacked(folio))
                return MM_SHMEMPAGES;
        return MM_FILEPAGES;
 }
 
-static inline int mm_counter(struct page *page)
+static inline int mm_counter(struct folio *folio)
 {
-       if (PageAnon(page))
+       if (folio_test_anon(folio))
                return MM_ANONPAGES;
-       return mm_counter_file(page);
+       return mm_counter_file(folio);
 }
 
 static inline unsigned long get_mm_rss(struct mm_struct *mm)
index 8b611e13153e68d944126f7cf57bca8a2bc69290..5240bd7bca338c5f490752fcf5d43ac8b7d78a02 100644 (file)
@@ -210,8 +210,8 @@ struct page {
  *
  * An 'encoded_page' pointer is a pointer to a regular 'struct page', but
  * with the low bits of the pointer indicating extra context-dependent
- * information. Not super-common, but happens in mmu_gather and mlock
- * handling, and this acts as a type system check on that use.
+ * information. Only used in mmu_gather handling, and this acts as a type
+ * system check on that use.
  *
  * We only really have two guaranteed bits in general, although you could
  * play with 'struct page' alignment (see CONFIG_HAVE_ALIGNED_STRUCT_PAGE)
@@ -220,21 +220,46 @@ struct page {
  * Use the supplied helper functions to endcode/decode the pointer and bits.
  */
 struct encoded_page;
-#define ENCODE_PAGE_BITS 3ul
+
+#define ENCODED_PAGE_BITS                      3ul
+
+/* Perform rmap removal after we have flushed the TLB. */
+#define ENCODED_PAGE_BIT_DELAY_RMAP            1ul
+
+/*
+ * The next item in an encoded_page array is the "nr_pages" argument, specifying
+ * the number of consecutive pages starting from this page, that all belong to
+ * the same folio. For example, "nr_pages" corresponds to the number of folio
+ * references that must be dropped. If this bit is not set, "nr_pages" is
+ * implicitly 1.
+ */
+#define ENCODED_PAGE_BIT_NR_PAGES_NEXT         2ul
+
 static __always_inline struct encoded_page *encode_page(struct page *page, unsigned long flags)
 {
-       BUILD_BUG_ON(flags > ENCODE_PAGE_BITS);
+       BUILD_BUG_ON(flags > ENCODED_PAGE_BITS);
        return (struct encoded_page *)(flags | (unsigned long)page);
 }
 
 static inline unsigned long encoded_page_flags(struct encoded_page *page)
 {
-       return ENCODE_PAGE_BITS & (unsigned long)page;
+       return ENCODED_PAGE_BITS & (unsigned long)page;
 }
 
 static inline struct page *encoded_page_ptr(struct encoded_page *page)
 {
-       return (struct page *)(~ENCODE_PAGE_BITS & (unsigned long)page);
+       return (struct page *)(~ENCODED_PAGE_BITS & (unsigned long)page);
+}
+
+static __always_inline struct encoded_page *encode_nr_pages(unsigned long nr)
+{
+       VM_WARN_ON_ONCE((nr << 2) >> 2 != nr);
+       return (struct encoded_page *)(nr << 2);
+}
+
+static __always_inline unsigned long encoded_nr_pages(struct encoded_page *page)
+{
+       return ((unsigned long)page) >> 2;
 }
 
 /*
@@ -394,12 +419,13 @@ FOLIO_MATCH(compound_head, _head_2a);
 
 /**
  * struct ptdesc -    Memory descriptor for page tables.
- * @__page_flags:     Same as page flags. Unused for page tables.
+ * @__page_flags:     Same as page flags. Powerpc only.
  * @pt_rcu_head:      For freeing page table pages.
  * @pt_list:          List of used page tables. Used for s390 and x86.
  * @_pt_pad_1:        Padding that aliases with page's compound head.
  * @pmd_huge_pte:     Protected by ptdesc->ptl, used for THPs.
  * @__page_mapping:   Aliases with page->mapping. Unused for page tables.
+ * @pt_index:         Used for s390 gmap.
  * @pt_mm:            Used for x86 pgds.
  * @pt_frag_refcount: For fragmented page table tracking. Powerpc only.
  * @_pt_pad_2:        Padding to ensure proper alignment.
@@ -425,6 +451,7 @@ struct ptdesc {
        unsigned long __page_mapping;
 
        union {
+               pgoff_t pt_index;
                struct mm_struct *pt_mm;
                atomic_t pt_frag_refcount;
        };
@@ -450,6 +477,7 @@ TABLE_MATCH(flags, __page_flags);
 TABLE_MATCH(compound_head, pt_list);
 TABLE_MATCH(compound_head, _pt_pad_1);
 TABLE_MATCH(mapping, __page_mapping);
+TABLE_MATCH(index, pt_index);
 TABLE_MATCH(rcu_head, pt_rcu_head);
 TABLE_MATCH(page_type, __page_type);
 TABLE_MATCH(_refcount, __page_refcount);
index 7c3e7b0b0e8fd6e91140d50432cf98eecb253193..39a7714605a7965d1792f557e3ee40a6cf73c4e2 100644 (file)
@@ -10,7 +10,7 @@ struct vm_area_struct;
 struct mm_struct;
 struct vma_iterator;
 
-void dump_page(struct page *page, const char *reason);
+void dump_page(const struct page *page, const char *reason);
 void dump_vma(const struct vm_area_struct *vma);
 void dump_mm(const struct mm_struct *mm);
 void vma_iter_dump_tree(const struct vma_iterator *vmi);
index f2b7a3f040999e8a1506306d0c30800a2a7d3ec2..bbaec80c78c5055b348edb3d641f8480a5f9b15e 100644 (file)
@@ -11,7 +11,7 @@
 #endif
 
 #ifndef leave_mm
-static inline void leave_mm(int cpu) { }
+static inline void leave_mm(void) { }
 #endif
 
 /*
index a497f189d98818bcda37458746ebb2bded7826e4..c11b7cde81efae9d9b57e266f531d4f1fd0d0592 100644 (file)
@@ -76,9 +76,12 @@ extern const char * const migratetype_names[MIGRATE_TYPES];
 #ifdef CONFIG_CMA
 #  define is_migrate_cma(migratetype) unlikely((migratetype) == MIGRATE_CMA)
 #  define is_migrate_cma_page(_page) (get_pageblock_migratetype(_page) == MIGRATE_CMA)
+#  define is_migrate_cma_folio(folio, pfn)     (MIGRATE_CMA ==         \
+       get_pfnblock_flags_mask(&folio->page, pfn, MIGRATETYPE_MASK))
 #else
 #  define is_migrate_cma(migratetype) false
 #  define is_migrate_cma_page(_page) false
+#  define is_migrate_cma_folio(folio, pfn) false
 #endif
 
 static inline bool is_migrate_movable(int mt)
@@ -464,7 +467,7 @@ enum {
 #define NR_BLOOM_FILTERS       2
 
 struct lru_gen_mm_state {
-       /* set to max_seq after each iteration */
+       /* synced with max_seq after each iteration */
        unsigned long seq;
        /* where the current iteration continues after */
        struct list_head *head;
@@ -479,8 +482,8 @@ struct lru_gen_mm_state {
 struct lru_gen_mm_walk {
        /* the lruvec under reclaim */
        struct lruvec *lruvec;
-       /* unstable max_seq from lru_gen_folio */
-       unsigned long max_seq;
+       /* max_seq from lru_gen_folio: can be out of date */
+       unsigned long seq;
        /* the next address within an mm to scan */
        unsigned long next_addr;
        /* to batch promoted pages */
index 001b2ce83832ed2de2e25b3427aaad5788b2ac3b..89b1e0ed981144e832a7f17ced01eca5d8ca7cd0 100644 (file)
@@ -115,6 +115,14 @@ int module_finalize(const Elf_Ehdr *hdr,
                    const Elf_Shdr *sechdrs,
                    struct module *mod);
 
+#ifdef CONFIG_MODULES
+void flush_module_init_free_work(void);
+#else
+static inline void flush_module_init_free_work(void)
+{
+}
+#endif
+
 /* Any cleanup needed when module leaves. */
 void module_arch_cleanup(struct module *mod);
 
index c04f690871ca18f5f15cd9c4e8cfee3b4ef77d0f..9798c1a1d3b6514d91b83d9e55fff8c5e9451d5d 100644 (file)
@@ -13,6 +13,7 @@
  */
 #include <linux/sched.h>
 #include <linux/mutex.h>
+#include <linux/wait.h>
 
 typedef enum {
        FL_READY,
index d168c628c0d54fbc7d112c94cc5f638373b001c7..35e971be0950b6d66b3aa7f91b8f255a02872be8 100644 (file)
@@ -11,7 +11,7 @@
 #include <linux/dmaengine.h>
 
 struct lpc32xx_mlc_platform_data {
-       bool (*dma_filter)(struct dma_chan *chan, void *filter_param);
+       dma_filter_fn dma_filter;
 };
 
 #endif  /* __LINUX_MTD_LPC32XX_MLC_H */
index cf54a9f80460392cecace40c07b7fc4dcc3f3889..a044b806566b4327e2420709e10b97cbe74e11c0 100644 (file)
@@ -11,7 +11,7 @@
 #include <linux/dmaengine.h>
 
 struct lpc32xx_slc_platform_data {
-       bool (*dma_filter)(struct dma_chan *chan, void *filter_param);
+       dma_filter_fn dma_filter;
 };
 
 #endif  /* __LINUX_MTD_LPC32XX_SLC_H */
index 914a9f974baaae5010d473d0e5d8c6e79f88e191..8d10d9d2e8300fef430a8db9b3f7c37e3364434e 100644 (file)
@@ -223,7 +223,7 @@ struct mtd_part {
  * @partitions_lock: lock protecting accesses to the partition list. Protects
  *                  not only the master partition list, but also all
  *                  sub-partitions.
- * @suspended: et to 1 when the device is suspended, 0 otherwise
+ * @suspended: set to 1 when the device is suspended, 0 otherwise
  *
  * This struct is embedded in mtd_info and contains master-specific
  * properties/fields. The master is the root MTD device from the MTD partition
index badb4c1ac079e87dd9c1cd439220698b02553e4f..5c19ead604996d5e06b2a7a29d7ec21b93e0b24d 100644 (file)
 struct spinand_op;
 struct spinand_device;
 
-#define SPINAND_MAX_ID_LEN     4
+#define SPINAND_MAX_ID_LEN     5
 /*
  * For erase, write and read operation, we got the following timings :
  * tBERS (erase) 1ms to 4ms
index f5ce7b1011461385968723826f4052f3f492a4af..d59116ac82099d0b5985a7f2b57a2e69e9e860dc 100644 (file)
@@ -611,6 +611,7 @@ int nfs_wb_folio_cancel(struct inode *inode, struct folio *folio);
 extern int  nfs_commit_inode(struct inode *, int);
 extern struct nfs_commit_data *nfs_commitdata_alloc(void);
 extern void nfs_commit_free(struct nfs_commit_data *data);
+void nfs_commit_begin(struct nfs_mds_commit_info *cinfo);
 bool nfs_commit_end(struct nfs_mds_commit_info *cinfo);
 
 static inline bool nfs_have_writebacks(const struct inode *inode)
index 539b57fbf3ce397ae80bbe186547f77100830d1b..d09b9773b20c8251f5a2e9232bbed6b22f6bd708 100644 (file)
@@ -1820,13 +1820,6 @@ struct nfs_rpc_ops {
        void    (*disable_swap)(struct inode *inode);
 };
 
-/*
- *     NFS_CALL(getattr, inode, (fattr));
- * into
- *     NFS_PROTO(inode)->getattr(fattr);
- */
-#define NFS_CALL(op, inode, args)      NFS_PROTO(inode)->op args
-
 /*
  * Function vectors etc. for the NFS client
  */
index e92e378df000fb1eca1e082ebc889fe7849a19d2..f53438eae815dead4b6c88bfada6eaf0d7d8ec11 100644 (file)
@@ -216,13 +216,6 @@ void watchdog_update_hrtimer_threshold(u64 period);
 static inline void watchdog_update_hrtimer_threshold(u64 period) { }
 #endif
 
-struct ctl_table;
-int proc_watchdog(struct ctl_table *, int, void *, size_t *, loff_t *);
-int proc_nmi_watchdog(struct ctl_table *, int , void *, size_t *, loff_t *);
-int proc_soft_watchdog(struct ctl_table *, int , void *, size_t *, loff_t *);
-int proc_watchdog_thresh(struct ctl_table *, int , void *, size_t *, loff_t *);
-int proc_watchdog_cpumask(struct ctl_table *, int, void *, size_t *, loff_t *);
-
 #ifdef CONFIG_HAVE_ACPI_APEI_NMI
 #include <asm/nmi.h>
 #endif
index 25b66d705ee2ec754021d5ea2e2f1bce15ef7dbe..dfc004e4bee747c9200fe970c79858b9ffc5f377 100644 (file)
@@ -34,6 +34,18 @@ struct access_coordinate {
        unsigned int write_latency;
 };
 
+/*
+ * ACCESS_COORDINATE_LOCAL correlates to ACCESS CLASS 0
+ *     - access_coordinate between target node and nearest initiator node
+ * ACCESS_COORDINATE_CPU correlates to ACCESS CLASS 1
+ *     - access_coordinate between target node and nearest CPU node
+ */
+enum access_coordinate_class {
+       ACCESS_COORDINATE_LOCAL,
+       ACCESS_COORDINATE_CPU,
+       ACCESS_COORDINATE_MAX
+};
+
 enum cache_indexing {
        NODE_CACHE_DIRECT_MAP,
        NODE_CACHE_INDEXED,
@@ -66,7 +78,7 @@ struct node_cache_attrs {
 #ifdef CONFIG_HMEM_REPORTING
 void node_add_cache(unsigned int nid, struct node_cache_attrs *cache_attrs);
 void node_set_perf_attrs(unsigned int nid, struct access_coordinate *coord,
-                        unsigned access);
+                        enum access_coordinate_class access);
 #else
 static inline void node_add_cache(unsigned int nid,
                                  struct node_cache_attrs *cache_attrs)
@@ -75,7 +87,7 @@ static inline void node_add_cache(unsigned int nid,
 
 static inline void node_set_perf_attrs(unsigned int nid,
                                       struct access_coordinate *coord,
-                                      unsigned access)
+                                      enum access_coordinate_class access)
 {
 }
 #endif
@@ -137,7 +149,7 @@ extern void unregister_memory_block_under_nodes(struct memory_block *mem_blk);
 
 extern int register_memory_node_under_compute_node(unsigned int mem_nid,
                                                   unsigned int cpu_nid,
-                                                  unsigned access);
+                                                  enum access_coordinate_class access);
 #else
 static inline void node_dev_init(void)
 {
index 6a9ddf20e79abdf0341f0cf6210e5991231eeab8..df702b2c2ae3a61f465331c073118d3779b89250 100644 (file)
@@ -13,6 +13,7 @@
  */
 #include <linux/types.h>
 #include <linux/bitops.h>
+#include <linux/cleanup.h>
 #include <linux/errno.h>
 #include <linux/kobject.h>
 #include <linux/mod_devicetable.h>
@@ -134,6 +135,7 @@ static inline struct device_node *of_node_get(struct device_node *node)
 }
 static inline void of_node_put(struct device_node *node) { }
 #endif /* !CONFIG_OF_DYNAMIC */
+DEFINE_FREE(device_node, struct device_node *, if (_T) of_node_put(_T))
 
 /* Pointer for first entry in chain of all nodes. */
 extern struct device_node *of_root;
@@ -180,11 +182,6 @@ static inline bool is_of_node(const struct fwnode_handle *fwnode)
                        &__of_fwnode_handle_node->fwnode : NULL;        \
        })
 
-static inline bool of_have_populated_dt(void)
-{
-       return of_root != NULL;
-}
-
 static inline bool of_node_is_root(const struct device_node *node)
 {
        return node && (node->parent == NULL);
@@ -294,6 +291,8 @@ extern struct device_node *of_get_next_child(const struct device_node *node,
                                             struct device_node *prev);
 extern struct device_node *of_get_next_available_child(
        const struct device_node *node, struct device_node *prev);
+extern struct device_node *of_get_next_reserved_child(
+       const struct device_node *node, struct device_node *prev);
 
 extern struct device_node *of_get_compatible_child(const struct device_node *parent,
                                        const char *compatible);
@@ -362,9 +361,6 @@ extern struct device_node *of_get_cpu_state_node(struct device_node *cpu_node,
                                                 int index);
 extern u64 of_get_cpu_hwid(struct device_node *cpun, unsigned int thread);
 
-#define for_each_property_of_node(dn, pp) \
-       for (pp = dn->properties; pp != NULL; pp = pp->next)
-
 extern int of_n_addr_cells(struct device_node *np);
 extern int of_n_size_cells(struct device_node *np);
 extern const struct of_device_id *of_match_node(
@@ -402,7 +398,20 @@ extern void of_alias_scan(void * (*dt_alloc)(u64 size, u64 align));
 extern int of_alias_get_id(struct device_node *np, const char *stem);
 extern int of_alias_get_highest_id(const char *stem);
 
-extern int of_machine_is_compatible(const char *compat);
+bool of_machine_compatible_match(const char *const *compats);
+
+/**
+ * of_machine_is_compatible - Test root of device tree for a given compatible value
+ * @compat: compatible string to look for in root node's compatible property.
+ *
+ * Return: true if the root node has the given value in its compatible property.
+ */
+static inline bool of_machine_is_compatible(const char *compat)
+{
+       const char *compats[] = { compat, NULL };
+
+       return of_machine_compatible_match(compats);
+}
 
 extern int of_add_property(struct device_node *np, struct property *prop);
 extern int of_remove_property(struct device_node *np, struct property *prop);
@@ -541,6 +550,12 @@ static inline struct device_node *of_get_next_available_child(
        return NULL;
 }
 
+static inline struct device_node *of_get_next_reserved_child(
+       const struct device_node *node, struct device_node *prev)
+{
+       return NULL;
+}
+
 static inline struct device_node *of_find_node_with_property(
        struct device_node *from, const char *prop_name)
 {
@@ -549,11 +564,6 @@ static inline struct device_node *of_find_node_with_property(
 
 #define of_fwnode_handle(node) NULL
 
-static inline bool of_have_populated_dt(void)
-{
-       return false;
-}
-
 static inline struct device_node *of_get_compatible_child(const struct device_node *parent,
                                        const char *compatible)
 {
@@ -808,6 +818,11 @@ static inline int of_remove_property(struct device_node *np, struct property *pr
        return 0;
 }
 
+static inline bool of_machine_compatible_match(const char *const *compats)
+{
+       return false;
+}
+
 static inline bool of_console_check(const struct device_node *dn, const char *name, int index)
 {
        return false;
@@ -892,6 +907,9 @@ static inline int of_prop_val_eq(struct property *p1, struct property *p2)
               !memcmp(p1->value, p2->value, (size_t)p1->length);
 }
 
+#define for_each_property_of_node(dn, pp) \
+       for (pp = dn->properties; pp != NULL; pp = pp->next)
+
 #if defined(CONFIG_OF) && defined(CONFIG_NUMA)
 extern int of_node_to_nid(struct device_node *np);
 #else
@@ -1428,9 +1446,25 @@ static inline int of_property_read_s32(const struct device_node *np,
 #define for_each_child_of_node(parent, child) \
        for (child = of_get_next_child(parent, NULL); child != NULL; \
             child = of_get_next_child(parent, child))
+
+#define for_each_child_of_node_scoped(parent, child) \
+       for (struct device_node *child __free(device_node) =            \
+            of_get_next_child(parent, NULL);                           \
+            child != NULL;                                             \
+            child = of_get_next_child(parent, child))
+
 #define for_each_available_child_of_node(parent, child) \
        for (child = of_get_next_available_child(parent, NULL); child != NULL; \
             child = of_get_next_available_child(parent, child))
+#define for_each_reserved_child_of_node(parent, child)                 \
+       for (child = of_get_next_reserved_child(parent, NULL); child != NULL; \
+            child = of_get_next_reserved_child(parent, child))
+
+#define for_each_available_child_of_node_scoped(parent, child) \
+       for (struct device_node *child __free(device_node) =            \
+            of_get_next_available_child(parent, NULL);                 \
+            child != NULL;                                             \
+            child = of_get_next_available_child(parent, child))
 
 #define for_each_of_cpu_node(cpu) \
        for (cpu = of_get_next_cpu_node(NULL); cpu != NULL; \
@@ -1634,6 +1668,21 @@ static inline bool of_device_is_system_power_controller(const struct device_node
        return of_property_read_bool(np, "system-power-controller");
 }
 
+/**
+ * of_have_populated_dt() - Has DT been populated by bootloader
+ *
+ * Return: True if a DTB has been populated by the bootloader and it isn't the
+ * empty builtin one. False otherwise.
+ */
+static inline bool of_have_populated_dt(void)
+{
+#ifdef CONFIG_OF
+       return of_property_present(of_root, "compatible");
+#else
+       return false;
+#endif
+}
+
 /*
  * Overlay support
  */
index 4d7756087b6b666aac2fdb4a707b734682518d97..a4bea62bfa290a7c1693934ecc120d93b78d52d1 100644 (file)
@@ -41,7 +41,7 @@ struct of_endpoint {
 bool of_graph_is_present(const struct device_node *node);
 int of_graph_parse_endpoint(const struct device_node *node,
                                struct of_endpoint *endpoint);
-int of_graph_get_endpoint_count(const struct device_node *np);
+unsigned int of_graph_get_endpoint_count(const struct device_node *np);
 struct device_node *of_graph_get_port_by_id(struct device_node *node, u32 id);
 struct device_node *of_graph_get_next_endpoint(const struct device_node *parent,
                                        struct device_node *previous);
@@ -68,7 +68,7 @@ static inline int of_graph_parse_endpoint(const struct device_node *node,
        return -ENOSYS;
 }
 
-static inline int of_graph_get_endpoint_count(const struct device_node *np)
+static inline unsigned int of_graph_get_endpoint_count(const struct device_node *np)
 {
        return 0;
 }
index 495b16b6b4d729360b2e85dac8e76a31ddce0dc6..0146daf3443066d8097181a5cdf49e87af24070d 100644 (file)
@@ -137,6 +137,7 @@ struct padata_shell {
  *             appropriate for one worker thread to do at once.
  * @max_threads: Max threads to use for the job, actual number may be less
  *               depending on task size and minimum chunk size.
+ * @numa_aware: Distribute jobs to different nodes with CPU in a round robin fashion.
  */
 struct padata_mt_job {
        void (*thread_fn)(unsigned long start, unsigned long end, void *arg);
@@ -146,6 +147,7 @@ struct padata_mt_job {
        unsigned long           align;
        unsigned long           min_chunk;
        int                     max_threads;
+       bool                    numa_aware;
 };
 
 /**
@@ -178,10 +180,6 @@ struct padata_instance {
 
 #ifdef CONFIG_PADATA
 extern void __init padata_init(void);
-#else
-static inline void __init padata_init(void) {}
-#endif
-
 extern struct padata_instance *padata_alloc(const char *name);
 extern void padata_free(struct padata_instance *pinst);
 extern struct padata_shell *padata_alloc_shell(struct padata_instance *pinst);
@@ -192,4 +190,12 @@ extern void padata_do_serial(struct padata_priv *padata);
 extern void __init padata_do_multithreaded(struct padata_mt_job *job);
 extern int padata_set_cpumask(struct padata_instance *pinst, int cpumask_type,
                              cpumask_var_t cpumask);
+#else
+static inline void __init padata_init(void) {}
+static inline void __init padata_do_multithreaded(struct padata_mt_job *job)
+{
+       job->thread_fn(job->start, job->start + job->size, job->fn_arg);
+}
+#endif
+
 #endif
index 735cddc13d20e1fdb1cad4615a78034156d6f28d..652d77805e99dfb2adf31def151a26466eff571c 100644 (file)
@@ -237,7 +237,7 @@ static inline const struct page *page_fixed_fake_head(const struct page *page)
 }
 #endif
 
-static __always_inline int page_is_fake_head(struct page *page)
+static __always_inline int page_is_fake_head(const struct page *page)
 {
        return page_fixed_fake_head(page) != page;
 }
@@ -281,12 +281,12 @@ static inline unsigned long _compound_head(const struct page *page)
  */
 #define folio_page(folio, n)   nth_page(&(folio)->page, n)
 
-static __always_inline int PageTail(struct page *page)
+static __always_inline int PageTail(const struct page *page)
 {
        return READ_ONCE(page->compound_head) & 1 || page_is_fake_head(page);
 }
 
-static __always_inline int PageCompound(struct page *page)
+static __always_inline int PageCompound(const struct page *page)
 {
        return test_bit(PG_head, &page->flags) ||
               READ_ONCE(page->compound_head) & 1;
@@ -306,6 +306,16 @@ static inline void page_init_poison(struct page *page, size_t size)
 }
 #endif
 
+static const unsigned long *const_folio_flags(const struct folio *folio,
+               unsigned n)
+{
+       const struct page *page = &folio->page;
+
+       VM_BUG_ON_PGFLAGS(PageTail(page), page);
+       VM_BUG_ON_PGFLAGS(n > 0 && !test_bit(PG_head, &page->flags), page);
+       return &page[n].flags;
+}
+
 static unsigned long *folio_flags(struct folio *folio, unsigned n)
 {
        struct page *page = &folio->page;
@@ -328,9 +338,6 @@ static unsigned long *folio_flags(struct folio *folio, unsigned n)
  *     for compound page all operations related to the page flag applied to
  *     head page.
  *
- * PF_ONLY_HEAD:
- *     for compound page, callers only ever operate on the head page.
- *
  * PF_NO_TAIL:
  *     modifications of the page flag must be done on small or head pages,
  *     checks can be done on tail pages too.
@@ -346,9 +353,6 @@ static unsigned long *folio_flags(struct folio *folio, unsigned n)
                page; })
 #define PF_ANY(page, enforce)  PF_POISONED_CHECK(page)
 #define PF_HEAD(page, enforce) PF_POISONED_CHECK(compound_head(page))
-#define PF_ONLY_HEAD(page, enforce) ({                                 \
-               VM_BUG_ON_PGFLAGS(PageTail(page), page);                \
-               PF_POISONED_CHECK(page); })
 #define PF_NO_TAIL(page, enforce) ({                                   \
                VM_BUG_ON_PGFLAGS(enforce && PageTail(page), page);     \
                PF_POISONED_CHECK(compound_head(page)); })
@@ -362,59 +366,81 @@ static unsigned long *folio_flags(struct folio *folio, unsigned n)
 /* Which page is the flag stored in */
 #define FOLIO_PF_ANY           0
 #define FOLIO_PF_HEAD          0
-#define FOLIO_PF_ONLY_HEAD     0
 #define FOLIO_PF_NO_TAIL       0
 #define FOLIO_PF_NO_COMPOUND   0
 #define FOLIO_PF_SECOND                1
 
+#define FOLIO_HEAD_PAGE                0
+#define FOLIO_SECOND_PAGE      1
+
 /*
  * Macros to create function definitions for page flags
  */
+#define FOLIO_TEST_FLAG(name, page)                                    \
+static __always_inline bool folio_test_##name(const struct folio *folio) \
+{ return test_bit(PG_##name, const_folio_flags(folio, page)); }
+
+#define FOLIO_SET_FLAG(name, page)                                     \
+static __always_inline void folio_set_##name(struct folio *folio)      \
+{ set_bit(PG_##name, folio_flags(folio, page)); }
+
+#define FOLIO_CLEAR_FLAG(name, page)                                   \
+static __always_inline void folio_clear_##name(struct folio *folio)    \
+{ clear_bit(PG_##name, folio_flags(folio, page)); }
+
+#define __FOLIO_SET_FLAG(name, page)                                   \
+static __always_inline void __folio_set_##name(struct folio *folio)    \
+{ __set_bit(PG_##name, folio_flags(folio, page)); }
+
+#define __FOLIO_CLEAR_FLAG(name, page)                                 \
+static __always_inline void __folio_clear_##name(struct folio *folio)  \
+{ __clear_bit(PG_##name, folio_flags(folio, page)); }
+
+#define FOLIO_TEST_SET_FLAG(name, page)                                        \
+static __always_inline bool folio_test_set_##name(struct folio *folio) \
+{ return test_and_set_bit(PG_##name, folio_flags(folio, page)); }
+
+#define FOLIO_TEST_CLEAR_FLAG(name, page)                              \
+static __always_inline bool folio_test_clear_##name(struct folio *folio) \
+{ return test_and_clear_bit(PG_##name, folio_flags(folio, page)); }
+
+#define FOLIO_FLAG(name, page)                                         \
+FOLIO_TEST_FLAG(name, page)                                            \
+FOLIO_SET_FLAG(name, page)                                             \
+FOLIO_CLEAR_FLAG(name, page)
+
 #define TESTPAGEFLAG(uname, lname, policy)                             \
-static __always_inline bool folio_test_##lname(struct folio *folio)    \
-{ return test_bit(PG_##lname, folio_flags(folio, FOLIO_##policy)); }   \
-static __always_inline int Page##uname(struct page *page)              \
+FOLIO_TEST_FLAG(lname, FOLIO_##policy)                                 \
+static __always_inline int Page##uname(const struct page *page)                \
 { return test_bit(PG_##lname, &policy(page, 0)->flags); }
 
 #define SETPAGEFLAG(uname, lname, policy)                              \
-static __always_inline                                                 \
-void folio_set_##lname(struct folio *folio)                            \
-{ set_bit(PG_##lname, folio_flags(folio, FOLIO_##policy)); }           \
+FOLIO_SET_FLAG(lname, FOLIO_##policy)                                  \
 static __always_inline void SetPage##uname(struct page *page)          \
 { set_bit(PG_##lname, &policy(page, 1)->flags); }
 
 #define CLEARPAGEFLAG(uname, lname, policy)                            \
-static __always_inline                                                 \
-void folio_clear_##lname(struct folio *folio)                          \
-{ clear_bit(PG_##lname, folio_flags(folio, FOLIO_##policy)); }         \
+FOLIO_CLEAR_FLAG(lname, FOLIO_##policy)                                        \
 static __always_inline void ClearPage##uname(struct page *page)                \
 { clear_bit(PG_##lname, &policy(page, 1)->flags); }
 
 #define __SETPAGEFLAG(uname, lname, policy)                            \
-static __always_inline                                                 \
-void __folio_set_##lname(struct folio *folio)                          \
-{ __set_bit(PG_##lname, folio_flags(folio, FOLIO_##policy)); }         \
+__FOLIO_SET_FLAG(lname, FOLIO_##policy)                                        \
 static __always_inline void __SetPage##uname(struct page *page)                \
 { __set_bit(PG_##lname, &policy(page, 1)->flags); }
 
 #define __CLEARPAGEFLAG(uname, lname, policy)                          \
-static __always_inline                                                 \
-void __folio_clear_##lname(struct folio *folio)                                \
-{ __clear_bit(PG_##lname, folio_flags(folio, FOLIO_##policy)); }       \
+__FOLIO_CLEAR_FLAG(lname, FOLIO_##policy)                              \
 static __always_inline void __ClearPage##uname(struct page *page)      \
 { __clear_bit(PG_##lname, &policy(page, 1)->flags); }
 
 #define TESTSETFLAG(uname, lname, policy)                              \
-static __always_inline                                                 \
-bool folio_test_set_##lname(struct folio *folio)                       \
-{ return test_and_set_bit(PG_##lname, folio_flags(folio, FOLIO_##policy)); } \
+FOLIO_TEST_SET_FLAG(lname, FOLIO_##policy)                             \
 static __always_inline int TestSetPage##uname(struct page *page)       \
 { return test_and_set_bit(PG_##lname, &policy(page, 1)->flags); }
 
 #define TESTCLEARFLAG(uname, lname, policy)                            \
-static __always_inline                                                 \
-bool folio_test_clear_##lname(struct folio *folio)                     \
-{ return test_and_clear_bit(PG_##lname, folio_flags(folio, FOLIO_##policy)); } \
+FOLIO_TEST_CLEAR_FLAG(lname, FOLIO_##policy)                           \
 static __always_inline int TestClearPage##uname(struct page *page)     \
 { return test_and_clear_bit(PG_##lname, &policy(page, 1)->flags); }
 
@@ -465,7 +491,7 @@ static inline int TestClearPage##uname(struct page *page) { return 0; }
        TESTSETFLAG_FALSE(uname, lname) TESTCLEARFLAG_FALSE(uname, lname)
 
 __PAGEFLAG(Locked, locked, PF_NO_TAIL)
-PAGEFLAG(Waiters, waiters, PF_ONLY_HEAD)
+FOLIO_FLAG(waiters, FOLIO_HEAD_PAGE)
 PAGEFLAG(Error, error, PF_NO_TAIL) TESTCLEARFLAG(Error, error, PF_NO_TAIL)
 PAGEFLAG(Referenced, referenced, PF_HEAD)
        TESTCLEARFLAG(Referenced, referenced, PF_HEAD)
@@ -532,13 +558,13 @@ PAGEFLAG_FALSE(HighMem, highmem)
 #endif
 
 #ifdef CONFIG_SWAP
-static __always_inline bool folio_test_swapcache(struct folio *folio)
+static __always_inline bool folio_test_swapcache(const struct folio *folio)
 {
        return folio_test_swapbacked(folio) &&
-                       test_bit(PG_swapcache, folio_flags(folio, 0));
+                       test_bit(PG_swapcache, const_folio_flags(folio, 0));
 }
 
-static __always_inline bool PageSwapCache(struct page *page)
+static __always_inline bool PageSwapCache(const struct page *page)
 {
        return folio_test_swapcache(page_folio(page));
 }
@@ -583,10 +609,10 @@ PAGEFLAG_FALSE(HWPoison, hwpoison)
 #endif
 
 #if defined(CONFIG_PAGE_IDLE_FLAG) && defined(CONFIG_64BIT)
-TESTPAGEFLAG(Young, young, PF_ANY)
-SETPAGEFLAG(Young, young, PF_ANY)
-TESTCLEARFLAG(Young, young, PF_ANY)
-PAGEFLAG(Idle, idle, PF_ANY)
+FOLIO_TEST_FLAG(young, FOLIO_HEAD_PAGE)
+FOLIO_SET_FLAG(young, FOLIO_HEAD_PAGE)
+FOLIO_TEST_CLEAR_FLAG(young, FOLIO_HEAD_PAGE)
+FOLIO_FLAG(idle, FOLIO_HEAD_PAGE)
 #endif
 
 /*
@@ -637,22 +663,22 @@ PAGEFLAG_FALSE(VmemmapSelfHosted, vmemmap_self_hosted)
  */
 #define PAGE_MAPPING_DAX_SHARED        ((void *)0x1)
 
-static __always_inline bool folio_mapping_flags(struct folio *folio)
+static __always_inline bool folio_mapping_flags(const struct folio *folio)
 {
        return ((unsigned long)folio->mapping & PAGE_MAPPING_FLAGS) != 0;
 }
 
-static __always_inline int PageMappingFlags(struct page *page)
+static __always_inline int PageMappingFlags(const struct page *page)
 {
        return ((unsigned long)page->mapping & PAGE_MAPPING_FLAGS) != 0;
 }
 
-static __always_inline bool folio_test_anon(struct folio *folio)
+static __always_inline bool folio_test_anon(const struct folio *folio)
 {
        return ((unsigned long)folio->mapping & PAGE_MAPPING_ANON) != 0;
 }
 
-static __always_inline bool PageAnon(struct page *page)
+static __always_inline bool PageAnon(const struct page *page)
 {
        return folio_test_anon(page_folio(page));
 }
@@ -663,7 +689,7 @@ static __always_inline bool __folio_test_movable(const struct folio *folio)
                        PAGE_MAPPING_MOVABLE;
 }
 
-static __always_inline int __PageMovable(struct page *page)
+static __always_inline int __PageMovable(const struct page *page)
 {
        return ((unsigned long)page->mapping & PAGE_MAPPING_FLAGS) ==
                                PAGE_MAPPING_MOVABLE;
@@ -676,13 +702,13 @@ static __always_inline int __PageMovable(struct page *page)
  * is found in VM_MERGEABLE vmas.  It's a PageAnon page, pointing not to any
  * anon_vma, but to that page's node of the stable tree.
  */
-static __always_inline bool folio_test_ksm(struct folio *folio)
+static __always_inline bool folio_test_ksm(const struct folio *folio)
 {
        return ((unsigned long)folio->mapping & PAGE_MAPPING_FLAGS) ==
                                PAGE_MAPPING_KSM;
 }
 
-static __always_inline bool PageKsm(struct page *page)
+static __always_inline bool PageKsm(const struct page *page)
 {
        return folio_test_ksm(page_folio(page));
 }
@@ -721,9 +747,9 @@ static inline bool folio_xor_flags_has_waiters(struct folio *folio,
  * some of the bytes in it may be; see the is_partially_uptodate()
  * address_space operation.
  */
-static inline bool folio_test_uptodate(struct folio *folio)
+static inline bool folio_test_uptodate(const struct folio *folio)
 {
-       bool ret = test_bit(PG_uptodate, folio_flags(folio, 0));
+       bool ret = test_bit(PG_uptodate, const_folio_flags(folio, 0));
        /*
         * Must ensure that the data we read out of the folio is loaded
         * _after_ we've loaded folio->flags to check the uptodate bit.
@@ -738,7 +764,7 @@ static inline bool folio_test_uptodate(struct folio *folio)
        return ret;
 }
 
-static inline int PageUptodate(struct page *page)
+static inline int PageUptodate(const struct page *page)
 {
        return folio_test_uptodate(page_folio(page));
 }
@@ -780,12 +806,12 @@ void set_page_writeback(struct page *page);
 #define folio_start_writeback_keepwrite(folio) \
        __folio_start_writeback(folio, true)
 
-static __always_inline bool folio_test_head(struct folio *folio)
+static __always_inline bool folio_test_head(const struct folio *folio)
 {
-       return test_bit(PG_head, folio_flags(folio, FOLIO_PF_ANY));
+       return test_bit(PG_head, const_folio_flags(folio, FOLIO_PF_ANY));
 }
 
-static __always_inline int PageHead(struct page *page)
+static __always_inline int PageHead(const struct page *page)
 {
        PF_POISONED_CHECK(page);
        return test_bit(PG_head, &page->flags) && !page_is_fake_head(page);
@@ -801,7 +827,7 @@ CLEARPAGEFLAG(Head, head, PF_ANY)
  *
  * Return: True if the folio is larger than one page.
  */
-static inline bool folio_test_large(struct folio *folio)
+static inline bool folio_test_large(const struct folio *folio)
 {
        return folio_test_head(folio);
 }
@@ -830,7 +856,7 @@ TESTPAGEFLAG_FALSE(LargeRmappable, large_rmappable)
 #define PG_head_mask ((1UL << PG_head))
 
 #ifdef CONFIG_HUGETLB_PAGE
-int PageHuge(struct page *page);
+int PageHuge(const struct page *page);
 SETPAGEFLAG(HugeTLB, hugetlb, PF_SECOND)
 CLEARPAGEFLAG(HugeTLB, hugetlb, PF_SECOND)
 
@@ -843,10 +869,10 @@ CLEARPAGEFLAG(HugeTLB, hugetlb, PF_SECOND)
  * Return: True for hugetlbfs folios, false for anon folios or folios
  * belonging to other filesystems.
  */
-static inline bool folio_test_hugetlb(struct folio *folio)
+static inline bool folio_test_hugetlb(const struct folio *folio)
 {
        return folio_test_large(folio) &&
-               test_bit(PG_hugetlb, folio_flags(folio, 1));
+               test_bit(PG_hugetlb, const_folio_flags(folio, 1));
 }
 #else
 TESTPAGEFLAG_FALSE(Huge, hugetlb)
@@ -861,7 +887,7 @@ TESTPAGEFLAG_FALSE(Huge, hugetlb)
  * hugetlbfs pages, but not normal pages. PageTransHuge() can only be
  * called only in the core VM paths where hugetlbfs pages can't exist.
  */
-static inline int PageTransHuge(struct page *page)
+static inline int PageTransHuge(const struct page *page)
 {
        VM_BUG_ON_PAGE(PageTail(page), page);
        return PageHead(page);
@@ -872,7 +898,7 @@ static inline int PageTransHuge(struct page *page)
  * and hugetlbfs pages, so it should only be called when it's known
  * that hugetlbfs pages aren't involved.
  */
-static inline int PageTransCompound(struct page *page)
+static inline int PageTransCompound(const struct page *page)
 {
        return PageCompound(page);
 }
@@ -882,7 +908,7 @@ static inline int PageTransCompound(struct page *page)
  * and hugetlbfs pages, so it should only be called when it's known
  * that hugetlbfs pages aren't involved.
  */
-static inline int PageTransTail(struct page *page)
+static inline int PageTransTail(const struct page *page)
 {
        return PageTail(page);
 }
@@ -946,7 +972,7 @@ static inline int page_type_has_type(unsigned int page_type)
        return (int)page_type < PAGE_MAPCOUNT_RESERVE;
 }
 
-static inline int page_has_type(struct page *page)
+static inline int page_has_type(const struct page *page)
 {
        return page_type_has_type(page->page_type);
 }
@@ -1030,7 +1056,7 @@ extern bool is_free_buddy_page(struct page *page);
 
 PAGEFLAG(Isolated, isolated, PF_ANY);
 
-static __always_inline int PageAnonExclusive(struct page *page)
+static __always_inline int PageAnonExclusive(const struct page *page)
 {
        VM_BUG_ON_PGFLAGS(!PageAnon(page), page);
        VM_BUG_ON_PGFLAGS(PageHuge(page) && !PageHead(page), page);
@@ -1103,19 +1129,18 @@ static __always_inline void __ClearPageAnonExclusive(struct page *page)
  * Determine if a page has private stuff, indicating that release routines
  * should be invoked upon it.
  */
-static inline int page_has_private(struct page *page)
+static inline int page_has_private(const struct page *page)
 {
        return !!(page->flags & PAGE_FLAGS_PRIVATE);
 }
 
-static inline bool folio_has_private(struct folio *folio)
+static inline bool folio_has_private(const struct folio *folio)
 {
        return page_has_private(&folio->page);
 }
 
 #undef PF_ANY
 #undef PF_HEAD
-#undef PF_ONLY_HEAD
 #undef PF_NO_TAIL
 #undef PF_NO_COMPOUND
 #undef PF_SECOND
index c141ea9a95ef86c9eea582d1872485e73321914e..8cd858d912c4b93ba7c8d9e9379fbe960d3e33b1 100644 (file)
@@ -4,7 +4,7 @@
 
 #include <linux/atomic.h>
 #include <linux/cache.h>
-#include <linux/kernel.h>
+#include <linux/limits.h>
 #include <asm/page.h>
 
 struct page_counter {
index 119a0c9d2a8b50a97db218d880d4ac869c08de7d..debdc25f08b93559600e5e0b9e4afd5161ad365b 100644 (file)
@@ -11,7 +11,8 @@ extern struct page_ext_operations page_owner_ops;
 extern void __reset_page_owner(struct page *page, unsigned short order);
 extern void __set_page_owner(struct page *page,
                        unsigned short order, gfp_t gfp_mask);
-extern void __split_page_owner(struct page *page, unsigned int nr);
+extern void __split_page_owner(struct page *page, int old_order,
+                       int new_order);
 extern void __folio_copy_owner(struct folio *newfolio, struct folio *old);
 extern void __set_page_owner_migrate_reason(struct page *page, int reason);
 extern void __dump_page_owner(const struct page *page);
@@ -31,10 +32,11 @@ static inline void set_page_owner(struct page *page,
                __set_page_owner(page, order, gfp_mask);
 }
 
-static inline void split_page_owner(struct page *page, unsigned int nr)
+static inline void split_page_owner(struct page *page, int old_order,
+                       int new_order)
 {
        if (static_branch_unlikely(&page_owner_inited))
-               __split_page_owner(page, nr);
+               __split_page_owner(page, old_order, new_order);
 }
 static inline void folio_copy_owner(struct folio *newfolio, struct folio *old)
 {
@@ -56,11 +58,11 @@ static inline void reset_page_owner(struct page *page, unsigned short order)
 {
 }
 static inline void set_page_owner(struct page *page,
-                       unsigned int order, gfp_t gfp_mask)
+                       unsigned short order, gfp_t gfp_mask)
 {
 }
-static inline void split_page_owner(struct page *page,
-                       unsigned short order)
+static inline void split_page_owner(struct page *page, int old_order,
+                       int new_order)
 {
 }
 static inline void folio_copy_owner(struct folio *newfolio, struct folio *folio)
index 87cc678adc850b12f144052fc7b4cf50d32ad107..fcc06c300a72c3797751279dbeaec17b1e3f65aa 100644 (file)
@@ -27,6 +27,7 @@ struct folio;
  */
 struct folio_batch {
        unsigned char nr;
+       unsigned char i;
        bool percpu_pvec_drained;
        struct folio *folios[PAGEVEC_SIZE];
 };
@@ -40,12 +41,14 @@ struct folio_batch {
 static inline void folio_batch_init(struct folio_batch *fbatch)
 {
        fbatch->nr = 0;
+       fbatch->i = 0;
        fbatch->percpu_pvec_drained = false;
 }
 
 static inline void folio_batch_reinit(struct folio_batch *fbatch)
 {
        fbatch->nr = 0;
+       fbatch->i = 0;
 }
 
 static inline unsigned int folio_batch_count(struct folio_batch *fbatch)
@@ -75,6 +78,21 @@ static inline unsigned folio_batch_add(struct folio_batch *fbatch,
        return folio_batch_space(fbatch);
 }
 
+/**
+ * folio_batch_next - Return the next folio to process.
+ * @fbatch: The folio batch being processed.
+ *
+ * Use this function to implement a queue of folios.
+ *
+ * Return: The next folio in the queue, or NULL if the queue is empty.
+ */
+static inline struct folio *folio_batch_next(struct folio_batch *fbatch)
+{
+       if (fbatch->i == fbatch->nr)
+               return NULL;
+       return fbatch->folios[fbatch->i++];
+}
+
 void __folio_batch_release(struct folio_batch *pvec);
 
 static inline void folio_batch_release(struct folio_batch *fbatch)
index f6d0e3513948ac4567a54a012d2517f4dcea7efe..85fc7554cd52bcc342bab3b469471ca9f1ed46f5 100644 (file)
@@ -212,15 +212,37 @@ static inline int pmd_dirty(pmd_t pmd)
 #define arch_flush_lazy_mmu_mode()     do {} while (0)
 #endif
 
-#ifndef set_ptes
+#ifndef pte_batch_hint
+/**
+ * pte_batch_hint - Number of pages that can be added to batch without scanning.
+ * @ptep: Page table pointer for the entry.
+ * @pte: Page table entry.
+ *
+ * Some architectures know that a set of contiguous ptes all map the same
+ * contiguous memory with the same permissions. In this case, it can provide a
+ * hint to aid pte batching without the core code needing to scan every pte.
+ *
+ * An architecture implementation may ignore the PTE accessed state. Further,
+ * the dirty state must apply atomically to all the PTEs described by the hint.
+ *
+ * May be overridden by the architecture, else pte_batch_hint is always 1.
+ */
+static inline unsigned int pte_batch_hint(pte_t *ptep, pte_t pte)
+{
+       return 1;
+}
+#endif
 
-#ifndef pte_next_pfn
-static inline pte_t pte_next_pfn(pte_t pte)
+#ifndef pte_advance_pfn
+static inline pte_t pte_advance_pfn(pte_t pte, unsigned long nr)
 {
-       return __pte(pte_val(pte) + (1UL << PFN_PTE_SHIFT));
+       return __pte(pte_val(pte) + (nr << PFN_PTE_SHIFT));
 }
 #endif
 
+#define pte_next_pfn(pte) pte_advance_pfn(pte, 1)
+
+#ifndef set_ptes
 /**
  * set_ptes - Map consecutive pages to a contiguous range of addresses.
  * @mm: Address space to map the pages into.
@@ -229,6 +251,10 @@ static inline pte_t pte_next_pfn(pte_t pte)
  * @pte: Page table entry for the first page.
  * @nr: Number of pages to map.
  *
+ * When nr==1, initial state of pte may be present or not present, and new state
+ * may be present or not present. When nr>1, initial state of all ptes must be
+ * not present, and new state must be present.
+ *
  * May be overridden by the architecture, or the architecture can define
  * set_pte() and PFN_PTE_SHIFT.
  *
@@ -580,6 +606,76 @@ static inline pte_t ptep_get_and_clear_full(struct mm_struct *mm,
 }
 #endif
 
+#ifndef get_and_clear_full_ptes
+/**
+ * get_and_clear_full_ptes - Clear present PTEs that map consecutive pages of
+ *                          the same folio, collecting dirty/accessed bits.
+ * @mm: Address space the pages are mapped into.
+ * @addr: Address the first page is mapped at.
+ * @ptep: Page table pointer for the first entry.
+ * @nr: Number of entries to clear.
+ * @full: Whether we are clearing a full mm.
+ *
+ * May be overridden by the architecture; otherwise, implemented as a simple
+ * loop over ptep_get_and_clear_full(), merging dirty/accessed bits into the
+ * returned PTE.
+ *
+ * Note that PTE bits in the PTE range besides the PFN can differ. For example,
+ * some PTEs might be write-protected.
+ *
+ * Context: The caller holds the page table lock.  The PTEs map consecutive
+ * pages that belong to the same folio.  The PTEs are all in the same PMD.
+ */
+static inline pte_t get_and_clear_full_ptes(struct mm_struct *mm,
+               unsigned long addr, pte_t *ptep, unsigned int nr, int full)
+{
+       pte_t pte, tmp_pte;
+
+       pte = ptep_get_and_clear_full(mm, addr, ptep, full);
+       while (--nr) {
+               ptep++;
+               addr += PAGE_SIZE;
+               tmp_pte = ptep_get_and_clear_full(mm, addr, ptep, full);
+               if (pte_dirty(tmp_pte))
+                       pte = pte_mkdirty(pte);
+               if (pte_young(tmp_pte))
+                       pte = pte_mkyoung(pte);
+       }
+       return pte;
+}
+#endif
+
+#ifndef clear_full_ptes
+/**
+ * clear_full_ptes - Clear present PTEs that map consecutive pages of the same
+ *                  folio.
+ * @mm: Address space the pages are mapped into.
+ * @addr: Address the first page is mapped at.
+ * @ptep: Page table pointer for the first entry.
+ * @nr: Number of entries to clear.
+ * @full: Whether we are clearing a full mm.
+ *
+ * May be overridden by the architecture; otherwise, implemented as a simple
+ * loop over ptep_get_and_clear_full().
+ *
+ * Note that PTE bits in the PTE range besides the PFN can differ. For example,
+ * some PTEs might be write-protected.
+ *
+ * Context: The caller holds the page table lock.  The PTEs map consecutive
+ * pages that belong to the same folio.  The PTEs are all in the same PMD.
+ */
+static inline void clear_full_ptes(struct mm_struct *mm, unsigned long addr,
+               pte_t *ptep, unsigned int nr, int full)
+{
+       for (;;) {
+               ptep_get_and_clear_full(mm, addr, ptep, full);
+               if (--nr == 0)
+                       break;
+               ptep++;
+               addr += PAGE_SIZE;
+       }
+}
+#endif
 
 /*
  * If two threads concurrently fault at the same page, the thread that
@@ -650,6 +746,37 @@ static inline void ptep_set_wrprotect(struct mm_struct *mm, unsigned long addres
 }
 #endif
 
+#ifndef wrprotect_ptes
+/**
+ * wrprotect_ptes - Write-protect PTEs that map consecutive pages of the same
+ *                 folio.
+ * @mm: Address space the pages are mapped into.
+ * @addr: Address the first page is mapped at.
+ * @ptep: Page table pointer for the first entry.
+ * @nr: Number of entries to write-protect.
+ *
+ * May be overridden by the architecture; otherwise, implemented as a simple
+ * loop over ptep_set_wrprotect().
+ *
+ * Note that PTE bits in the PTE range besides the PFN can differ. For example,
+ * some PTEs might be write-protected.
+ *
+ * Context: The caller holds the page table lock.  The PTEs map consecutive
+ * pages that belong to the same folio.  The PTEs are all in the same PMD.
+ */
+static inline void wrprotect_ptes(struct mm_struct *mm, unsigned long addr,
+               pte_t *ptep, unsigned int nr)
+{
+       for (;;) {
+               ptep_set_wrprotect(mm, addr, ptep);
+               if (--nr == 0)
+                       break;
+               ptep++;
+               addr += PAGE_SIZE;
+       }
+}
+#endif
+
 /*
  * On some architectures hardware does not set page access bit when accessing
  * memory page, it is responsibility of software setting this bit. It brings
@@ -1650,16 +1777,16 @@ typedef unsigned int pgtbl_mod_mask;
  * Only meaningful when called on a valid entry.
  */
 #ifndef pgd_leaf
-#define pgd_leaf(x)    0
+#define pgd_leaf(x)    false
 #endif
 #ifndef p4d_leaf
-#define p4d_leaf(x)    0
+#define p4d_leaf(x)    false
 #endif
 #ifndef pud_leaf
-#define pud_leaf(x)    0
+#define pud_leaf(x)    false
 #endif
 #ifndef pmd_leaf
-#define pmd_leaf(x)    0
+#define pmd_leaf(x)    false
 #endif
 
 #ifndef pgd_leaf_size
index f6d607ef0e80146b31962d5842eee1eaa0be20fe..58be86e6fe83ba2a0357d62e00af502b88432b9e 100644 (file)
@@ -176,7 +176,7 @@ struct phy_provider {
        struct module           *owner;
        struct list_head        list;
        struct phy * (*of_xlate)(struct device *dev,
-               struct of_phandle_args *args);
+                                const struct of_phandle_args *args);
 };
 
 /**
@@ -265,7 +265,7 @@ void phy_put(struct device *dev, struct phy *phy);
 void devm_phy_put(struct device *dev, struct phy *phy);
 struct phy *of_phy_get(struct device_node *np, const char *con_id);
 struct phy *of_phy_simple_xlate(struct device *dev,
-       struct of_phandle_args *args);
+                               const struct of_phandle_args *args);
 struct phy *phy_create(struct device *dev, struct device_node *node,
                       const struct phy_ops *ops);
 struct phy *devm_phy_create(struct device *dev, struct device_node *node,
@@ -275,11 +275,11 @@ void devm_phy_destroy(struct device *dev, struct phy *phy);
 struct phy_provider *__of_phy_provider_register(struct device *dev,
        struct device_node *children, struct module *owner,
        struct phy * (*of_xlate)(struct device *dev,
-                                struct of_phandle_args *args));
+                                const struct of_phandle_args *args));
 struct phy_provider *__devm_of_phy_provider_register(struct device *dev,
        struct device_node *children, struct module *owner,
        struct phy * (*of_xlate)(struct device *dev,
-                                struct of_phandle_args *args));
+                                const struct of_phandle_args *args));
 void of_phy_provider_unregister(struct phy_provider *phy_provider);
 void devm_of_phy_provider_unregister(struct device *dev,
        struct phy_provider *phy_provider);
@@ -479,7 +479,7 @@ static inline struct phy *of_phy_get(struct device_node *np, const char *con_id)
 }
 
 static inline struct phy *of_phy_simple_xlate(struct device *dev,
-       struct of_phandle_args *args)
+                                             const struct of_phandle_args *args)
 {
        return ERR_PTR(-ENOSYS);
 }
@@ -509,7 +509,7 @@ static inline void devm_phy_destroy(struct device *dev, struct phy *phy)
 static inline struct phy_provider *__of_phy_provider_register(
        struct device *dev, struct device_node *children, struct module *owner,
        struct phy * (*of_xlate)(struct device *dev,
-                                struct of_phandle_args *args))
+                                const struct of_phandle_args *args))
 {
        return ERR_PTR(-ENOSYS);
 }
@@ -517,7 +517,7 @@ static inline struct phy_provider *__of_phy_provider_register(
 static inline struct phy_provider *__devm_of_phy_provider_register(struct device
        *dev, struct device_node *children, struct module *owner,
        struct phy * (*of_xlate)(struct device *dev,
-                                struct of_phandle_args *args))
+                                const struct of_phandle_args *args))
 {
        return ERR_PTR(-ENOSYS);
 }
index 2a3a955864259a53a95d988991e6fc2c502bb720..8dbd51ea862678b8ec7a4684b986e3e574b921bd 100644 (file)
@@ -18,6 +18,16 @@ struct ptdump_state {
        const struct ptdump_range *range;
 };
 
+bool ptdump_walk_pgd_level_core(struct seq_file *m,
+                               struct mm_struct *mm, pgd_t *pgd,
+                               bool checkwx, bool dmesg);
 void ptdump_walk_pgd(struct ptdump_state *st, struct mm_struct *mm, pgd_t *pgd);
+bool ptdump_check_wx(void);
+
+static inline void debug_checkwx(void)
+{
+       if (IS_ENABLED(CONFIG_DEBUG_WX))
+               ptdump_check_wx();
+}
 
 #endif /* _LINUX_PTDUMP_H */
index 17cb0761ff658e6838aa4e04eb429d4155e55e81..3c2abbc587b49c308835ad05aac7b81d7a223831 100644 (file)
@@ -1261,6 +1261,7 @@ struct task_struct {
        /* Protected by alloc_lock: */
        struct mempolicy                *mempolicy;
        short                           il_prev;
+       u8                              il_weight;
        short                           pref_node_fork;
 #endif
 #ifdef CONFIG_NUMA_BALANCING
@@ -1625,25 +1626,26 @@ extern struct pid *cad_pid;
 #define PF_SUPERPRIV           0x00000100      /* Used super-user privileges */
 #define PF_DUMPCORE            0x00000200      /* Dumped core */
 #define PF_SIGNALED            0x00000400      /* Killed by a signal */
-#define PF_MEMALLOC            0x00000800      /* Allocating memory */
+#define PF_MEMALLOC            0x00000800      /* Allocating memory to free memory. See memalloc_noreclaim_save() */
 #define PF_NPROC_EXCEEDED      0x00001000      /* set_user() noticed that RLIMIT_NPROC was exceeded */
 #define PF_USED_MATH           0x00002000      /* If unset the fpu must be initialized before use */
 #define PF_USER_WORKER         0x00004000      /* Kernel thread cloned from userspace thread */
 #define PF_NOFREEZE            0x00008000      /* This thread should not be frozen */
 #define PF__HOLE__00010000     0x00010000
 #define PF_KSWAPD              0x00020000      /* I am kswapd */
-#define PF_MEMALLOC_NOFS       0x00040000      /* All allocation requests will inherit GFP_NOFS */
-#define PF_MEMALLOC_NOIO       0x00080000      /* All allocation requests will inherit GFP_NOIO */
+#define PF_MEMALLOC_NOFS       0x00040000      /* All allocations inherit GFP_NOFS. See memalloc_nfs_save() */
+#define PF_MEMALLOC_NOIO       0x00080000      /* All allocations inherit GFP_NOIO. See memalloc_noio_save() */
 #define PF_LOCAL_THROTTLE      0x00100000      /* Throttle writes only against the bdi I write to,
                                                 * I am cleaning dirty pages from some other bdi. */
 #define PF_KTHREAD             0x00200000      /* I am a kernel thread */
 #define PF_RANDOMIZE           0x00400000      /* Randomize virtual address space */
-#define PF__HOLE__00800000     0x00800000
-#define PF__HOLE__01000000     0x01000000
+#define PF_MEMALLOC_NORECLAIM  0x00800000      /* All allocation requests will clear __GFP_DIRECT_RECLAIM */
+#define PF_MEMALLOC_NOWARN     0x01000000      /* All allocation requests will inherit __GFP_NOWARN */
 #define PF__HOLE__02000000     0x02000000
 #define PF_NO_SETAFFINITY      0x04000000      /* Userland is not allowed to meddle with cpus_mask */
 #define PF_MCE_EARLY           0x08000000      /* Early kill for mce process policy */
-#define PF_MEMALLOC_PIN                0x10000000      /* Allocation context constrained to zones which allow long term pinning. */
+#define PF_MEMALLOC_PIN                0x10000000      /* Allocations constrained to zones which allow long term pinning.
+                                                * See memalloc_pin_save() */
 #define PF_BLOCK_TS            0x20000000      /* plug has ts that needs updating */
 #define PF__HOLE__40000000     0x40000000
 #define PF_SUSPEND_TASK                0x80000000      /* This thread called freeze_processes() and should not be frozen */
index 9a19f1b42f64129936dd763e1f9436536e60d47e..b6543f9d78d6b868f30fff4289c317ae365b8ca1 100644 (file)
@@ -236,16 +236,25 @@ static inline gfp_t current_gfp_context(gfp_t flags)
 {
        unsigned int pflags = READ_ONCE(current->flags);
 
-       if (unlikely(pflags & (PF_MEMALLOC_NOIO | PF_MEMALLOC_NOFS | PF_MEMALLOC_PIN))) {
+       if (unlikely(pflags & (PF_MEMALLOC_NOIO |
+                              PF_MEMALLOC_NOFS |
+                              PF_MEMALLOC_NORECLAIM |
+                              PF_MEMALLOC_NOWARN |
+                              PF_MEMALLOC_PIN))) {
                /*
-                * NOIO implies both NOIO and NOFS and it is a weaker context
-                * so always make sure it makes precedence
+                * Stronger flags before weaker flags:
+                * NORECLAIM implies NOIO, which in turn implies NOFS
                 */
-               if (pflags & PF_MEMALLOC_NOIO)
+               if (pflags & PF_MEMALLOC_NORECLAIM)
+                       flags &= ~__GFP_DIRECT_RECLAIM;
+               else if (pflags & PF_MEMALLOC_NOIO)
                        flags &= ~(__GFP_IO | __GFP_FS);
                else if (pflags & PF_MEMALLOC_NOFS)
                        flags &= ~__GFP_FS;
 
+               if (pflags & PF_MEMALLOC_NOWARN)
+                       flags |= __GFP_NOWARN;
+
                if (pflags & PF_MEMALLOC_PIN)
                        flags &= ~__GFP_MOVABLE;
        }
@@ -306,6 +315,24 @@ static inline void might_alloc(gfp_t gfp_mask)
        might_sleep_if(gfpflags_allow_blocking(gfp_mask));
 }
 
+/**
+ * memalloc_flags_save - Add a PF_* flag to current->flags, save old value
+ *
+ * This allows PF_* flags to be conveniently added, irrespective of current
+ * value, and then the old version restored with memalloc_flags_restore().
+ */
+static inline unsigned memalloc_flags_save(unsigned flags)
+{
+       unsigned oldflags = ~current->flags & flags;
+       current->flags |= flags;
+       return oldflags;
+}
+
+static inline void memalloc_flags_restore(unsigned flags)
+{
+       current->flags &= ~flags;
+}
+
 /**
  * memalloc_noio_save - Marks implicit GFP_NOIO allocation scope.
  *
@@ -315,13 +342,12 @@ static inline void might_alloc(gfp_t gfp_mask)
  * point of view. Use memalloc_noio_restore to end the scope with flags
  * returned by this function.
  *
- * This function is safe to be used from any context.
+ * Context: This function is safe to be used from any context.
+ * Return: The saved flags to be passed to memalloc_noio_restore.
  */
 static inline unsigned int memalloc_noio_save(void)
 {
-       unsigned int flags = current->flags & PF_MEMALLOC_NOIO;
-       current->flags |= PF_MEMALLOC_NOIO;
-       return flags;
+       return memalloc_flags_save(PF_MEMALLOC_NOIO);
 }
 
 /**
@@ -334,7 +360,7 @@ static inline unsigned int memalloc_noio_save(void)
  */
 static inline void memalloc_noio_restore(unsigned int flags)
 {
-       current->flags = (current->flags & ~PF_MEMALLOC_NOIO) | flags;
+       memalloc_flags_restore(flags);
 }
 
 /**
@@ -346,13 +372,12 @@ static inline void memalloc_noio_restore(unsigned int flags)
  * point of view. Use memalloc_nofs_restore to end the scope with flags
  * returned by this function.
  *
- * This function is safe to be used from any context.
+ * Context: This function is safe to be used from any context.
+ * Return: The saved flags to be passed to memalloc_nofs_restore.
  */
 static inline unsigned int memalloc_nofs_save(void)
 {
-       unsigned int flags = current->flags & PF_MEMALLOC_NOFS;
-       current->flags |= PF_MEMALLOC_NOFS;
-       return flags;
+       return memalloc_flags_save(PF_MEMALLOC_NOFS);
 }
 
 /**
@@ -365,32 +390,76 @@ static inline unsigned int memalloc_nofs_save(void)
  */
 static inline void memalloc_nofs_restore(unsigned int flags)
 {
-       current->flags = (current->flags & ~PF_MEMALLOC_NOFS) | flags;
+       memalloc_flags_restore(flags);
 }
 
+/**
+ * memalloc_noreclaim_save - Marks implicit __GFP_MEMALLOC scope.
+ *
+ * This function marks the beginning of the __GFP_MEMALLOC allocation scope.
+ * All further allocations will implicitly add the __GFP_MEMALLOC flag, which
+ * prevents entering reclaim and allows access to all memory reserves. This
+ * should only be used when the caller guarantees the allocation will allow more
+ * memory to be freed very shortly, i.e. it needs to allocate some memory in
+ * the process of freeing memory, and cannot reclaim due to potential recursion.
+ *
+ * Users of this scope have to be extremely careful to not deplete the reserves
+ * completely and implement a throttling mechanism which controls the
+ * consumption of the reserve based on the amount of freed memory. Usage of a
+ * pre-allocated pool (e.g. mempool) should be always considered before using
+ * this scope.
+ *
+ * Individual allocations under the scope can opt out using __GFP_NOMEMALLOC
+ *
+ * Context: This function should not be used in an interrupt context as that one
+ *          does not give PF_MEMALLOC access to reserves.
+ *          See __gfp_pfmemalloc_flags().
+ * Return: The saved flags to be passed to memalloc_noreclaim_restore.
+ */
 static inline unsigned int memalloc_noreclaim_save(void)
 {
-       unsigned int flags = current->flags & PF_MEMALLOC;
-       current->flags |= PF_MEMALLOC;
-       return flags;
+       return memalloc_flags_save(PF_MEMALLOC);
 }
 
+/**
+ * memalloc_noreclaim_restore - Ends the implicit __GFP_MEMALLOC scope.
+ * @flags: Flags to restore.
+ *
+ * Ends the implicit __GFP_MEMALLOC scope started by memalloc_noreclaim_save
+ * function. Always make sure that the given flags is the return value from the
+ * pairing memalloc_noreclaim_save call.
+ */
 static inline void memalloc_noreclaim_restore(unsigned int flags)
 {
-       current->flags = (current->flags & ~PF_MEMALLOC) | flags;
+       memalloc_flags_restore(flags);
 }
 
+/**
+ * memalloc_pin_save - Marks implicit ~__GFP_MOVABLE scope.
+ *
+ * This function marks the beginning of the ~__GFP_MOVABLE allocation scope.
+ * All further allocations will implicitly remove the __GFP_MOVABLE flag, which
+ * will constraint the allocations to zones that allow long term pinning, i.e.
+ * not ZONE_MOVABLE zones.
+ *
+ * Return: The saved flags to be passed to memalloc_pin_restore.
+ */
 static inline unsigned int memalloc_pin_save(void)
 {
-       unsigned int flags = current->flags & PF_MEMALLOC_PIN;
-
-       current->flags |= PF_MEMALLOC_PIN;
-       return flags;
+       return memalloc_flags_save(PF_MEMALLOC_PIN);
 }
 
+/**
+ * memalloc_pin_restore - Ends the implicit ~__GFP_MOVABLE scope.
+ * @flags: Flags to restore.
+ *
+ * Ends the implicit ~__GFP_MOVABLE scope started by memalloc_pin_save function.
+ * Always make sure that the given flags is the return value from the pairing
+ * memalloc_pin_save call.
+ */
 static inline void memalloc_pin_restore(unsigned int flags)
 {
-       current->flags = (current->flags & ~PF_MEMALLOC_PIN) | flags;
+       memalloc_flags_restore(flags);
 }
 
 #ifdef CONFIG_MEMCG
index 6c27d413da921dadcdbad2c0d60f34a654f36c98..7ca41af93b37e3108e58bbf9e51ccc4ef141a7cc 100644 (file)
@@ -15,7 +15,7 @@
 #include <linux/mod_devicetable.h>
 #include <uapi/linux/serio.h>
 
-extern struct bus_type serio_bus;
+extern const struct bus_type serio_bus;
 
 struct serio {
        void *port_data;
index d8c27f1e5559cb7018406e8031cc9c6d10033c60..693320b4f5c2775799852a061eb840e7613c8a0f 100644 (file)
@@ -4,9 +4,9 @@
 #ifndef __SOUNDWIRE_TYPES_H
 #define __SOUNDWIRE_TYPES_H
 
-extern struct bus_type sdw_bus_type;
-extern struct device_type sdw_slave_type;
-extern struct device_type sdw_master_type;
+extern const struct bus_type sdw_bus_type;
+extern const struct device_type sdw_slave_type;
+extern const struct device_type sdw_master_type;
 
 static inline int is_sdw_slave(const struct device *dev)
 {
index adcbb8f23600072310e23f6bb5a395683c196fc3..3c6caa5abc7c4262b8d62f9ef46d3fa273e7fb1e 100644 (file)
@@ -30,6 +30,53 @@ typedef u32 depot_stack_handle_t;
  */
 #define STACK_DEPOT_EXTRA_BITS 5
 
+#define DEPOT_HANDLE_BITS (sizeof(depot_stack_handle_t) * 8)
+
+#define DEPOT_POOL_ORDER 2 /* Pool size order, 4 pages */
+#define DEPOT_POOL_SIZE (1LL << (PAGE_SHIFT + DEPOT_POOL_ORDER))
+#define DEPOT_STACK_ALIGN 4
+#define DEPOT_OFFSET_BITS (DEPOT_POOL_ORDER + PAGE_SHIFT - DEPOT_STACK_ALIGN)
+#define DEPOT_POOL_INDEX_BITS (DEPOT_HANDLE_BITS - DEPOT_OFFSET_BITS - \
+                              STACK_DEPOT_EXTRA_BITS)
+
+#ifdef CONFIG_STACKDEPOT
+/* Compact structure that stores a reference to a stack. */
+union handle_parts {
+       depot_stack_handle_t handle;
+       struct {
+               /* pool_index is offset by 1 */
+               u32 pool_index  : DEPOT_POOL_INDEX_BITS;
+               u32 offset      : DEPOT_OFFSET_BITS;
+               u32 extra       : STACK_DEPOT_EXTRA_BITS;
+       };
+};
+
+struct stack_record {
+       struct list_head hash_list;     /* Links in the hash table */
+       u32 hash;                       /* Hash in hash table */
+       u32 size;                       /* Number of stored frames */
+       union handle_parts handle;      /* Constant after initialization */
+       refcount_t count;
+       union {
+               unsigned long entries[CONFIG_STACKDEPOT_MAX_FRAMES];    /* Frames */
+               struct {
+                       /*
+                        * An important invariant of the implementation is to
+                        * only place a stack record onto the freelist iff its
+                        * refcount is zero. Because stack records with a zero
+                        * refcount are never considered as valid, it is safe to
+                        * union @entries and freelist management state below.
+                        * Conversely, as soon as an entry is off the freelist
+                        * and its refcount becomes non-zero, the below must not
+                        * be accessed until being placed back on the freelist.
+                        */
+                       struct list_head free_list;     /* Links in the freelist */
+                       unsigned long rcu_state;        /* RCU cookie */
+               };
+       };
+};
+#endif
+
 typedef u32 depot_flags_t;
 
 /*
@@ -131,6 +178,17 @@ depot_stack_handle_t stack_depot_save_flags(unsigned long *entries,
 depot_stack_handle_t stack_depot_save(unsigned long *entries,
                                      unsigned int nr_entries, gfp_t gfp_flags);
 
+/**
+ * __stack_depot_get_stack_record - Get a pointer to a stack_record struct
+ *
+ * @handle: Stack depot handle
+ *
+ * This function is only for internal purposes.
+ *
+ * Return: Returns a pointer to a stack_record struct
+ */
+struct stack_record *__stack_depot_get_stack_record(depot_stack_handle_t handle);
+
 /**
  * stack_depot_fetch - Fetch a stack trace from stack depot
  *
index a9806a44a605c7d7f8701b8cd4408f24deee76f1..09f994ac87df44f13aa59dca2e34b29bb3766e5e 100644 (file)
@@ -9,7 +9,5 @@
    up something else. */
 
 extern asmlinkage void __init __noreturn start_kernel(void);
-extern void __init __noreturn arch_call_rest_init(void);
-extern void __ref __noreturn rest_init(void);
 
 #endif /* _LINUX_START_KERNEL_H */
index 5e9d1469c6faea4c8de2f408041ae0c18a75dbfd..5321585c778fcc1fef0e0420cb481786c02a7aac 100644 (file)
@@ -139,6 +139,7 @@ struct rpc_create_args {
        const char              *servername;
        const char              *nodename;
        const struct rpc_program *program;
+       struct rpc_stat         *stats;
        u32                     prognumber;     /* overrides program->number */
        u32                     version;
        rpc_authflavor_t        authflavor;
index 2d61987b354564d4b227e2f4f5ad676ca014814a..0c77ba488bbae9431f7e99f95461869cfe696750 100644 (file)
@@ -197,7 +197,7 @@ struct rpc_wait_queue {
        unsigned char           maxpriority;            /* maximum priority (0 if queue is not a priority queue) */
        unsigned char           priority;               /* current priority */
        unsigned char           nr;                     /* # tasks remaining for cookie */
-       unsigned short          qlen;                   /* total # tasks waiting in queue */
+       unsigned int            qlen;                   /* total # tasks waiting in queue */
        struct rpc_timer        timer_list;
 #if IS_ENABLED(CONFIG_SUNRPC_DEBUG) || IS_ENABLED(CONFIG_TRACEPOINTS)
        const char *            name;
index 464f6a9492ab7e2e3b8e0e5f5edc0d4f456c6636..81b952649d35e3ad4fa8c7e77388ac2ceb44ce60 100644 (file)
@@ -152,6 +152,7 @@ struct rpc_xprt_ops {
        int             (*prepare_request)(struct rpc_rqst *req,
                                           struct xdr_buf *buf);
        int             (*send_request)(struct rpc_rqst *req);
+       void            (*abort_send_request)(struct rpc_rqst *req);
        void            (*wait_for_reply_request)(struct rpc_task *task);
        void            (*timer)(struct rpc_xprt *xprt, struct rpc_task *task);
        void            (*release_request)(struct rpc_task *task);
index 95410ca891a7a252b8d730c1913c3ab208434288..f53d608daa013176fc339366820d4dbd1548e635 100644 (file)
@@ -437,9 +437,9 @@ static inline unsigned long total_swapcache_pages(void)
        return global_node_page_state(NR_SWAPCACHE);
 }
 
-extern void free_swap_cache(struct page *page);
-extern void free_page_and_swap_cache(struct page *);
-extern void free_pages_and_swap_cache(struct encoded_page **, int);
+void free_swap_cache(struct folio *folio);
+void free_page_and_swap_cache(struct page *);
+void free_pages_and_swap_cache(struct encoded_page **, int);
 /* linux/mm/swapfile.c */
 extern atomic_long_t nr_swap_pages;
 extern long total_swap_pages;
@@ -521,7 +521,7 @@ static inline void put_swap_device(struct swap_info_struct *si)
 /* used to sanity check ptes in zap_pte_range when CONFIG_SWAP=0 */
 #define free_swap_and_cache(e) is_pfn_swap_entry(e)
 
-static inline void free_swap_cache(struct page *page)
+static inline void free_swap_cache(struct folio *folio)
 {
 }
 
index bff1e8d97de0e089a70ed3f9aa872bf050955087..48b700ba1d188a798209d4de4693173bfc6b98af 100644 (file)
@@ -468,6 +468,19 @@ static inline struct page *pfn_swap_entry_to_page(swp_entry_t entry)
        return p;
 }
 
+static inline struct folio *pfn_swap_entry_folio(swp_entry_t entry)
+{
+       struct folio *folio = pfn_folio(swp_offset_pfn(entry));
+
+       /*
+        * Any use of migration entries may only occur while the
+        * corresponding folio is locked
+        */
+       BUG_ON(is_migration_entry(entry) && !folio_test_locked(folio));
+
+       return folio;
+}
+
 /*
  * A pfn swap entry is a special type of swap entry that always has a pfn stored
  * in the swap offset. They are used to represent unaddressable device memory
index a60639f37963999ef11c6dff9ac59487a94ca7c8..1638660abf5e05d9a1b76a07d0cbce985a98da19 100644 (file)
@@ -120,7 +120,7 @@ static inline unsigned long tc_get_speed(struct tc_bus *tbus)
 
 #ifdef CONFIG_TC
 
-extern struct bus_type tc_bus_type;
+extern const struct bus_type tc_bus_type;
 
 extern int tc_register_driver(struct tc_driver *tdrv);
 extern void tc_unregister_driver(struct tc_driver *tdrv);
index e4056547fbe6151ab79bfed1609446a2c99ee9e4..05d59f74fc887f1bcd781c550387d12e00d2cac6 100644 (file)
 #define UFFD_SHARED_FCNTL_FLAGS (O_CLOEXEC | O_NONBLOCK)
 #define UFFD_FLAGS_SET (EFD_SHARED_FCNTL_FLAGS)
 
+/*
+ * Start with fault_pending_wqh and fault_wqh so they're more likely
+ * to be in the same cacheline.
+ *
+ * Locking order:
+ *     fd_wqh.lock
+ *             fault_pending_wqh.lock
+ *                     fault_wqh.lock
+ *             event_wqh.lock
+ *
+ * To avoid deadlocks, IRQs must be disabled when taking any of the above locks,
+ * since fd_wqh.lock is taken by aio_poll() while it's holding a lock that's
+ * also taken in IRQ context.
+ */
+struct userfaultfd_ctx {
+       /* waitqueue head for the pending (i.e. not read) userfaults */
+       wait_queue_head_t fault_pending_wqh;
+       /* waitqueue head for the userfaults */
+       wait_queue_head_t fault_wqh;
+       /* waitqueue head for the pseudo fd to wakeup poll/read */
+       wait_queue_head_t fd_wqh;
+       /* waitqueue head for events */
+       wait_queue_head_t event_wqh;
+       /* a refile sequence protected by fault_pending_wqh lock */
+       seqcount_spinlock_t refile_seq;
+       /* pseudo fd refcounting */
+       refcount_t refcount;
+       /* userfaultfd syscall flags */
+       unsigned int flags;
+       /* features requested from the userspace */
+       unsigned int features;
+       /* released */
+       bool released;
+       /*
+        * Prevents userfaultfd operations (fill/move/wp) from happening while
+        * some non-cooperative event(s) is taking place. Increments are done
+        * in write-mode. Whereas, userfaultfd operations, which includes
+        * reading mmap_changing, is done under read-mode.
+        */
+       struct rw_semaphore map_changing_lock;
+       /* memory mappings are changing because of non-cooperative event */
+       atomic_t mmap_changing;
+       /* mm with one ore more vmas attached to this userfaultfd_ctx */
+       struct mm_struct *mm;
+};
+
 extern vm_fault_t handle_userfault(struct vm_fault *vmf, unsigned long reason);
 
 /* A combined operation mode + behavior flags. */
@@ -74,31 +120,26 @@ extern int mfill_atomic_install_pte(pmd_t *dst_pmd,
                                    unsigned long dst_addr, struct page *page,
                                    bool newly_allocated, uffd_flags_t flags);
 
-extern ssize_t mfill_atomic_copy(struct mm_struct *dst_mm, unsigned long dst_start,
+extern ssize_t mfill_atomic_copy(struct userfaultfd_ctx *ctx, unsigned long dst_start,
                                 unsigned long src_start, unsigned long len,
-                                atomic_t *mmap_changing, uffd_flags_t flags);
-extern ssize_t mfill_atomic_zeropage(struct mm_struct *dst_mm,
+                                uffd_flags_t flags);
+extern ssize_t mfill_atomic_zeropage(struct userfaultfd_ctx *ctx,
                                     unsigned long dst_start,
-                                    unsigned long len,
-                                    atomic_t *mmap_changing);
-extern ssize_t mfill_atomic_continue(struct mm_struct *dst_mm, unsigned long dst_start,
-                                    unsigned long len, atomic_t *mmap_changing,
-                                    uffd_flags_t flags);
-extern ssize_t mfill_atomic_poison(struct mm_struct *dst_mm, unsigned long start,
-                                  unsigned long len, atomic_t *mmap_changing,
-                                  uffd_flags_t flags);
-extern int mwriteprotect_range(struct mm_struct *dst_mm,
-                              unsigned long start, unsigned long len,
-                              bool enable_wp, atomic_t *mmap_changing);
+                                    unsigned long len);
+extern ssize_t mfill_atomic_continue(struct userfaultfd_ctx *ctx, unsigned long dst_start,
+                                    unsigned long len, uffd_flags_t flags);
+extern ssize_t mfill_atomic_poison(struct userfaultfd_ctx *ctx, unsigned long start,
+                                  unsigned long len, uffd_flags_t flags);
+extern int mwriteprotect_range(struct userfaultfd_ctx *ctx, unsigned long start,
+                              unsigned long len, bool enable_wp);
 extern long uffd_wp_range(struct vm_area_struct *vma,
                          unsigned long start, unsigned long len, bool enable_wp);
 
 /* move_pages */
 void double_pt_lock(spinlock_t *ptl1, spinlock_t *ptl2);
 void double_pt_unlock(spinlock_t *ptl1, spinlock_t *ptl2);
-ssize_t move_pages(struct userfaultfd_ctx *ctx, struct mm_struct *mm,
-                  unsigned long dst_start, unsigned long src_start,
-                  unsigned long len, __u64 flags);
+ssize_t move_pages(struct userfaultfd_ctx *ctx, unsigned long dst_start,
+                  unsigned long src_start, unsigned long len, __u64 flags);
 int move_pages_huge_pmd(struct mm_struct *mm, pmd_t *dst_pmd, pmd_t *src_pmd, pmd_t dst_pmdval,
                        struct vm_area_struct *dst_vma,
                        struct vm_area_struct *src_vma,
index 89b265bc6ec315bcadadebcc92b5ea4ab283822f..8b1a2982040914052b423c73d7871ed71b563a09 100644 (file)
@@ -356,6 +356,7 @@ struct virqfd {
        wait_queue_entry_t              wait;
        poll_table              pt;
        struct work_struct      shutdown;
+       struct work_struct      flush_inject;
        struct virqfd           **pvirqfd;
 };
 
@@ -363,5 +364,6 @@ int vfio_virqfd_enable(void *opaque, int (*handler)(void *, void *),
                       void (*thread)(void *, void *), void *data,
                       struct virqfd **pvirqfd, int fd);
 void vfio_virqfd_disable(struct virqfd **pvirqfd);
+void vfio_virqfd_flush_thread(struct virqfd **pvirqfd);
 
 #endif /* VFIO_H */
index 85e84b92751b67354ad6671d945cd39069afcdcc..a2c8b8bba71195127c97e32fac58718dca382798 100644 (file)
@@ -130,7 +130,15 @@ void vfio_pci_core_finish_enable(struct vfio_pci_core_device *vdev);
 int vfio_pci_core_setup_barmap(struct vfio_pci_core_device *vdev, int bar);
 pci_ers_result_t vfio_pci_core_aer_err_detected(struct pci_dev *pdev,
                                                pci_channel_state_t state);
-
+ssize_t vfio_pci_core_do_io_rw(struct vfio_pci_core_device *vdev, bool test_mem,
+                              void __iomem *io, char __user *buf,
+                              loff_t off, size_t count, size_t x_start,
+                              size_t x_end, bool iswrite);
+bool vfio_pci_core_range_intersect_range(loff_t buf_start, size_t buf_cnt,
+                                        loff_t reg_start, size_t reg_cnt,
+                                        loff_t *buf_offset,
+                                        size_t *intersect_count,
+                                        size_t *register_offset);
 #define VFIO_IOWRITE_DECLATION(size) \
 int vfio_pci_core_iowrite##size(struct vfio_pci_core_device *vdev,     \
                        bool test_mem, u##size val, void __iomem *io);
index 0f72c85a377be9d2c817a989b38da6d7c9c54b36..98ea90e90439d8972ee14ae69aa2e4c731e3a4c1 100644 (file)
@@ -258,7 +258,6 @@ extern long vread_iter(struct iov_iter *iter, const char *addr, size_t count);
 /*
  *     Internals.  Don't use..
  */
-extern struct list_head vmap_area_list;
 extern __init void vm_area_add_early(struct vm_struct *vm);
 extern __init void vm_area_register_early(struct vm_struct *vm, size_t align);
 
diff --git a/include/linux/vmcore_info.h b/include/linux/vmcore_info.h
new file mode 100644 (file)
index 0000000..e1dec1a
--- /dev/null
@@ -0,0 +1,81 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef LINUX_VMCORE_INFO_H
+#define LINUX_VMCORE_INFO_H
+
+#include <linux/linkage.h>
+#include <linux/elfcore.h>
+#include <linux/elf.h>
+
+#define CRASH_CORE_NOTE_NAME      "CORE"
+#define CRASH_CORE_NOTE_HEAD_BYTES ALIGN(sizeof(struct elf_note), 4)
+#define CRASH_CORE_NOTE_NAME_BYTES ALIGN(sizeof(CRASH_CORE_NOTE_NAME), 4)
+#define CRASH_CORE_NOTE_DESC_BYTES ALIGN(sizeof(struct elf_prstatus), 4)
+
+/*
+ * The per-cpu notes area is a list of notes terminated by a "NULL"
+ * note header.  For kdump, the code in vmcore.c runs in the context
+ * of the second kernel to combine them into one note.
+ */
+#define CRASH_CORE_NOTE_BYTES     ((CRASH_CORE_NOTE_HEAD_BYTES * 2) +  \
+                                    CRASH_CORE_NOTE_NAME_BYTES +       \
+                                    CRASH_CORE_NOTE_DESC_BYTES)
+
+#define VMCOREINFO_BYTES          PAGE_SIZE
+#define VMCOREINFO_NOTE_NAME      "VMCOREINFO"
+#define VMCOREINFO_NOTE_NAME_BYTES ALIGN(sizeof(VMCOREINFO_NOTE_NAME), 4)
+#define VMCOREINFO_NOTE_SIZE      ((CRASH_CORE_NOTE_HEAD_BYTES * 2) +  \
+                                    VMCOREINFO_NOTE_NAME_BYTES +       \
+                                    VMCOREINFO_BYTES)
+
+typedef u32 note_buf_t[CRASH_CORE_NOTE_BYTES/4];
+/* Per cpu memory for storing cpu states in case of system crash. */
+extern note_buf_t __percpu *crash_notes;
+
+void crash_update_vmcoreinfo_safecopy(void *ptr);
+void crash_save_vmcoreinfo(void);
+void arch_crash_save_vmcoreinfo(void);
+__printf(1, 2)
+void vmcoreinfo_append_str(const char *fmt, ...);
+phys_addr_t paddr_vmcoreinfo_note(void);
+
+#define VMCOREINFO_OSRELEASE(value) \
+       vmcoreinfo_append_str("OSRELEASE=%s\n", value)
+#define VMCOREINFO_BUILD_ID()                                          \
+       ({                                                              \
+               static_assert(sizeof(vmlinux_build_id) == 20);          \
+               vmcoreinfo_append_str("BUILD-ID=%20phN\n", vmlinux_build_id); \
+       })
+
+#define VMCOREINFO_PAGESIZE(value) \
+       vmcoreinfo_append_str("PAGESIZE=%ld\n", value)
+#define VMCOREINFO_SYMBOL(name) \
+       vmcoreinfo_append_str("SYMBOL(%s)=%lx\n", #name, (unsigned long)&name)
+#define VMCOREINFO_SYMBOL_ARRAY(name) \
+       vmcoreinfo_append_str("SYMBOL(%s)=%lx\n", #name, (unsigned long)name)
+#define VMCOREINFO_SIZE(name) \
+       vmcoreinfo_append_str("SIZE(%s)=%lu\n", #name, \
+                             (unsigned long)sizeof(name))
+#define VMCOREINFO_STRUCT_SIZE(name) \
+       vmcoreinfo_append_str("SIZE(%s)=%lu\n", #name, \
+                             (unsigned long)sizeof(struct name))
+#define VMCOREINFO_OFFSET(name, field) \
+       vmcoreinfo_append_str("OFFSET(%s.%s)=%lu\n", #name, #field, \
+                             (unsigned long)offsetof(struct name, field))
+#define VMCOREINFO_TYPE_OFFSET(name, field) \
+       vmcoreinfo_append_str("OFFSET(%s.%s)=%lu\n", #name, #field, \
+                             (unsigned long)offsetof(name, field))
+#define VMCOREINFO_LENGTH(name, value) \
+       vmcoreinfo_append_str("LENGTH(%s)=%lu\n", #name, (unsigned long)value)
+#define VMCOREINFO_NUMBER(name) \
+       vmcoreinfo_append_str("NUMBER(%s)=%ld\n", #name, (long)name)
+#define VMCOREINFO_CONFIG(name) \
+       vmcoreinfo_append_str("CONFIG_%s=y\n", #name)
+
+extern unsigned char *vmcoreinfo_data;
+extern size_t vmcoreinfo_size;
+extern u32 *vmcoreinfo_note;
+
+Elf_Word *append_elf_note(Elf_Word *buf, char *name, unsigned int type,
+                         void *data, size_t data_len);
+void final_note(Elf_Word *buf);
+#endif /* LINUX_VMCORE_INFO_H */
index 4ca2842d2842d0e321dbbaa0efb7233cc0c36061..6a5bb052fcc27f3820ddfc095f89100f8f5f12cb 100644 (file)
@@ -1,6 +1,6 @@
 /* SPDX-License-Identifier: GPL-2.0 */
-/**
- * lib/minmax.c: windowed min/max tracker by Kathleen Nichols.
+/*
+ * win_minmax.h: windowed min/max tracker by Kathleen Nichols.
  *
  */
 #ifndef MINMAX_H
index 453736fd1d23ce673345833cc13593ce13450ba1..9845cb62e40b2d95cfe4a8c07d41ae2f7aad2783 100644 (file)
@@ -11,6 +11,7 @@
 #include <linux/flex_proportions.h>
 #include <linux/backing-dev-defs.h>
 #include <linux/blk_types.h>
+#include <linux/pagevec.h>
 
 struct bio;
 
@@ -40,6 +41,7 @@ enum writeback_sync_modes {
  * in a manner such that unspecified fields are set to zero.
  */
 struct writeback_control {
+       /* public fields that can be set and/or consumed by the caller: */
        long nr_to_write;               /* Write this many pages, and decrement
                                           this for each page written */
        long pages_skipped;             /* Pages which were not written */
@@ -77,6 +79,11 @@ struct writeback_control {
         */
        struct swap_iocb **swap_plug;
 
+       /* internal fields used by the ->writepages implementation: */
+       struct folio_batch fbatch;
+       pgoff_t index;
+       int saved_err;
+
 #ifdef CONFIG_CGROUP_WRITEBACK
        struct bdi_writeback *wb;       /* wb this writeback is issued under */
        struct inode *inode;            /* inode being written out */
@@ -360,11 +367,12 @@ int balance_dirty_pages_ratelimited_flags(struct address_space *mapping,
 
 bool wb_over_bg_thresh(struct bdi_writeback *wb);
 
+struct folio *writeback_iter(struct address_space *mapping,
+               struct writeback_control *wbc, struct folio *folio, int *error);
+
 typedef int (*writepage_t)(struct folio *folio, struct writeback_control *wbc,
                                void *data);
 
-void tag_pages_for_writeback(struct address_space *mapping,
-                            pgoff_t start, pgoff_t end);
 int write_cache_pages(struct address_space *mapping,
                      struct writeback_control *wbc, writepage_t writepage,
                      void *data);
index 0b709f5bc65fac2477df81ee766305dd2aa33200..341aea4900704ce29159e0c2a53a0416b9f1a2ed 100644 (file)
@@ -29,8 +29,8 @@ struct zswap_lruvec_state {
 
 bool zswap_store(struct folio *folio);
 bool zswap_load(struct folio *folio);
-void zswap_invalidate(int type, pgoff_t offset);
-void zswap_swapon(int type);
+void zswap_invalidate(swp_entry_t swp);
+int zswap_swapon(int type, unsigned long nr_pages);
 void zswap_swapoff(int type);
 void zswap_memcg_offline_cleanup(struct mem_cgroup *memcg);
 void zswap_lruvec_state_init(struct lruvec *lruvec);
@@ -50,8 +50,11 @@ static inline bool zswap_load(struct folio *folio)
        return false;
 }
 
-static inline void zswap_invalidate(int type, pgoff_t offset) {}
-static inline void zswap_swapon(int type) {}
+static inline void zswap_invalidate(swp_entry_t swp) {}
+static inline int zswap_swapon(int type, unsigned long nr_pages)
+{
+       return 0;
+}
 static inline void zswap_swapoff(int type) {}
 static inline void zswap_memcg_offline_cleanup(struct mem_cgroup *memcg) {}
 static inline void zswap_lruvec_state_init(struct lruvec *lruvec) {}
index d77982685116e87b39698a8e9fe819bba5c29953..10c9cf6058b7e27258f4af4051cb4d505ad1d189 100644 (file)
@@ -224,8 +224,6 @@ struct cec_adap_ops {
  * @notifier:          CEC notifier
  * @pin:               CEC pin status struct
  * @cec_dir:           debugfs cec directory
- * @status_file:       debugfs cec status file
- * @error_inj_file:    debugfs cec error injection file
  * @sequence:          transmit sequence counter
  * @input_phys:                remote control input_phys name
  *
index 2b6cd343ee9e018c8ddf2e0b04c2bc62c26b01a6..0393b23129eb9f2bc9415f29fc5d75030fc8f2b0 100644 (file)
@@ -225,6 +225,7 @@ enum media_pad_signal_type {
  * @graph_obj: Embedded structure containing the media object common data
  * @entity:    Entity this pad belongs to
  * @index:     Pad index in the entity pads array, numbered from 0 to n
+ * @num_links: Number of links connected to this pad
  * @sig_type:  Type of the signal inside a media pad
  * @flags:     Pad flags, as defined in
  *             :ref:`include/uapi/linux/media.h <media_header>`
@@ -236,6 +237,7 @@ struct media_pad {
        struct media_gobj graph_obj;    /* must be first field in struct */
        struct media_entity *entity;
        u16 index;
+       u16 num_links;
        enum media_pad_signal_type sig_type;
        unsigned long flags;
 
@@ -337,10 +339,6 @@ enum media_entity_type {
  * @info.dev:  Contains device major and minor info.
  * @info.dev.major: device node major, if the device is a devnode.
  * @info.dev.minor: device node minor, if the device is a devnode.
- * @major:     Devnode major number (zero if not applicable). Kept just
- *             for backward compatibility.
- * @minor:     Devnode minor number (zero if not applicable). Kept just
- *             for backward compatibility.
  *
  * .. note::
  *
index 739b0f0fc1a0f4a90951dd1fb20ded8d2c565d54..63ad36f04f724cf13e1c2928e7eb3602f98c3294 100644 (file)
@@ -536,9 +536,10 @@ int v4l2_fill_pixfmt_mp(struct v4l2_pix_format_mplane *pixfmt, u32 pixelformat,
  * V4L2_CID_LINK_FREQ control implemented by the transmitter, or value
  * calculated based on the V4L2_CID_PIXEL_RATE implemented by the transmitter.
  *
- * Returns link frequency on success, otherwise a negative error code:
- *     -ENOENT: Link frequency or pixel rate control not found
- *     -EINVAL: Invalid link frequency value
+ * Return:
+ * * >0: Link frequency
+ * * %-ENOENT: Link frequency or pixel rate control not found
+ * * %-EINVAL: Invalid link frequency value
  */
 s64 v4l2_get_link_freq(struct v4l2_ctrl_handler *handler, unsigned int mul,
                       unsigned int div);
@@ -547,6 +548,31 @@ void v4l2_simplify_fraction(u32 *numerator, u32 *denominator,
                unsigned int n_terms, unsigned int threshold);
 u32 v4l2_fraction_to_interval(u32 numerator, u32 denominator);
 
+/**
+ * v4l2_link_freq_to_bitmap - Figure out platform-supported link frequencies
+ * @dev: The struct device
+ * @fw_link_freqs: Array of link frequencies from firmware
+ * @num_of_fw_link_freqs: Number of entries in @fw_link_freqs
+ * @driver_link_freqs: Array of link frequencies supported by the driver
+ * @num_of_driver_link_freqs: Number of entries in @driver_link_freqs
+ * @bitmap: Bitmap of driver-supported link frequencies found in @fw_link_freqs
+ *
+ * This function checks which driver-supported link frequencies are enabled in
+ * system firmware and sets the corresponding bits in @bitmap (after first
+ * zeroing it).
+ *
+ * Return:
+ * * %0: Success
+ * * %-ENOENT: No match found between driver-supported link frequencies and
+ *   those available in firmware.
+ * * %-ENODATA: No link frequencies were specified in firmware.
+ */
+int v4l2_link_freq_to_bitmap(struct device *dev, const u64 *fw_link_freqs,
+                            unsigned int num_of_fw_link_freqs,
+                            const s64 *driver_link_freqs,
+                            unsigned int num_of_driver_link_freqs,
+                            unsigned long *bitmap);
+
 static inline u64 v4l2_buffer_get_timestamp(const struct v4l2_buffer *buf)
 {
        /*
index 56719a26a46c3d13103d1b8c24b93d0ecf9b4ef8..8b86996b2719309f18dd87c436a500c222c34751 100644 (file)
@@ -72,6 +72,10 @@ struct vb2_buffer;
  *              argument to other ops in this structure.
  * @put_userptr: inform the allocator that a USERPTR buffer will no longer
  *              be used.
+ * @prepare:   called every time the buffer is passed from userspace to the
+ *             driver, useful for cache synchronisation, optional.
+ * @finish:    called every time the buffer is passed back from the driver
+ *             to the userspace, also optional.
  * @attach_dmabuf: attach a shared &struct dma_buf for a hardware operation;
  *                used for DMABUF memory types; dev is the alloc device
  *                dbuf is the shared dma_buf; returns ERR_PTR() on failure;
@@ -86,10 +90,6 @@ struct vb2_buffer;
  *             dmabuf.
  * @unmap_dmabuf: releases access control to the dmabuf - allocator is notified
  *               that this driver is done using the dmabuf for now.
- * @prepare:   called every time the buffer is passed from userspace to the
- *             driver, useful for cache synchronisation, optional.
- * @finish:    called every time the buffer is passed back from the driver
- *             to the userspace, also optional.
  * @vaddr:     return a kernel virtual address to a given memory buffer
  *             associated with the passed private structure or NULL if no
  *             such mapping exists.
@@ -271,11 +271,11 @@ struct vb2_buffer {
         *                      skips cache sync/invalidation.
         * skip_cache_sync_on_finish: when set buffer's ->finish() function
         *                      skips cache sync/invalidation.
+        * planes:              per-plane information; do not change
         * queued_entry:        entry on the queued buffers list, which holds
         *                      all buffers queued from userspace
         * done_entry:          entry on the list that stores all buffers ready
         *                      to be dequeued to userspace
-        * vb2_plane:           per-plane information; do not change
         */
        enum vb2_buffer_state   state;
        unsigned int            synced:1;
@@ -484,7 +484,6 @@ struct vb2_buf_ops {
  *             caller. For example, for V4L2, it should match
  *             the types defined on &enum v4l2_buf_type.
  * @io_modes:  supported io methods (see &enum vb2_io_modes).
- * @alloc_devs:        &struct device memory type/allocator-specific per-plane device
  * @dev:       device to use for the default allocation context if the driver
  *             doesn't fill in the @alloc_devs array.
  * @dma_attrs: DMA attributes to use for the DMA.
@@ -553,6 +552,7 @@ struct vb2_buf_ops {
  *             VIDIOC_REQBUFS will ensure at least @min_queued_buffers
  *             buffers will be allocated. Note that VIDIOC_CREATE_BUFS will not
  *             modify the requested buffer count.
+ * @alloc_devs:        &struct device memory type/allocator-specific per-plane device
  */
 /*
  * Private elements (won't appear at the uAPI book):
@@ -577,6 +577,9 @@ struct vb2_buf_ops {
  * @waiting_for_buffers: used in poll() to check if vb2 is still waiting for
  *             buffers. Only set for capture queues if qbuf has not yet been
  *             called since poll() needs to return %EPOLLERR in that situation.
+ * @waiting_in_dqbuf: set by the core for the duration of a blocking DQBUF, when
+ *             it has to wait for a buffer to become available with vb2_queue->lock
+ *             released. Used to prevent destroying the queue by other threads.
  * @is_multiplanar: set if buffer type is multiplanar
  * @is_output: set if buffer type is output
  * @copy_timestamp: set if vb2-core should set timestamps
index c38f4fe5e64cf4f14b668328ab0cfac76ea5d496..fddc767c47871f9191cf7927c5ee037ed09aee58 100644 (file)
@@ -486,6 +486,52 @@ extern int scsi_is_sdev_device(const struct device *);
 extern int scsi_is_target_device(const struct device *);
 extern void scsi_sanitize_inquiry_string(unsigned char *s, int len);
 
+/*
+ * scsi_execute_cmd users can set scsi_failure.result to have
+ * scsi_check_passthrough fail/retry a command. scsi_failure.result can be a
+ * specific host byte or message code, or SCMD_FAILURE_RESULT_ANY can be used
+ * to match any host or message code.
+ */
+#define SCMD_FAILURE_RESULT_ANY        0x7fffffff
+/*
+ * Set scsi_failure.result to SCMD_FAILURE_STAT_ANY to fail/retry any failure
+ * scsi_status_is_good returns false for.
+ */
+#define SCMD_FAILURE_STAT_ANY  0xff
+/*
+ * The following can be set to the scsi_failure sense, asc and ascq fields to
+ * match on any sense, ASC, or ASCQ value.
+ */
+#define SCMD_FAILURE_SENSE_ANY 0xff
+#define SCMD_FAILURE_ASC_ANY   0xff
+#define SCMD_FAILURE_ASCQ_ANY  0xff
+/* Always retry a matching failure. */
+#define SCMD_FAILURE_NO_LIMIT  -1
+
+struct scsi_failure {
+       int result;
+       u8 sense;
+       u8 asc;
+       u8 ascq;
+       /*
+        * Number of times scsi_execute_cmd will retry the failure. It does
+        * not count for the total_allowed.
+        */
+       s8 allowed;
+       /* Number of times the failure has been retried. */
+       s8 retries;
+};
+
+struct scsi_failures {
+       /*
+        * If a scsi_failure does not have a retry limit setup this limit will
+        * be used.
+        */
+       int total_allowed;
+       int total_retries;
+       struct scsi_failure *failure_definitions;
+};
+
 /* Optional arguments to scsi_execute_cmd */
 struct scsi_exec_args {
        unsigned char *sense;           /* sense buffer */
@@ -494,12 +540,14 @@ struct scsi_exec_args {
        blk_mq_req_flags_t req_flags;   /* BLK_MQ_REQ flags */
        int scmd_flags;                 /* SCMD flags */
        int *resid;                     /* residual length */
+       struct scsi_failures *failures; /* failures to retry */
 };
 
 int scsi_execute_cmd(struct scsi_device *sdev, const unsigned char *cmd,
                     blk_opf_t opf, void *buffer, unsigned int bufflen,
                     int timeout, int retries,
                     const struct scsi_exec_args *args);
+void scsi_failures_reset_retries(struct scsi_failures *failures);
 
 extern void sdev_disable_disk_events(struct scsi_device *sdev);
 extern void sdev_enable_disk_events(struct scsi_device *sdev);
index 3b907fc2ef08fe7e668b92d60236e11ece0c6df5..b259d42a1e1affd9d82be3975782b1b526e97437 100644 (file)
@@ -497,9 +497,6 @@ struct scsi_host_template {
         *   scsi_netlink.h
         */
        u64 vendor_id;
-
-       /* Delay for runtime autosuspend */
-       int rpm_autosuspend_delay;
 };
 
 /*
@@ -713,6 +710,9 @@ struct Scsi_Host {
         */
        struct device *dma_dev;
 
+       /* Delay for runtime autosuspend */
+       int rpm_autosuspend_delay;
+
        /*
         * We should ensure that this is aligned, both for better performance
         * and also because some compilers (m68k) don't automatically force
index 2b2a975efd20774e178057680f8fc3e36d0a3639..d05759d1853896d60b563a7ccb57282a16750877 100644 (file)
@@ -78,10 +78,10 @@ DEFINE_EVENT(mm_compaction_isolate_template, mm_compaction_fast_isolate_freepage
 #ifdef CONFIG_COMPACTION
 TRACE_EVENT(mm_compaction_migratepages,
 
-       TP_PROTO(struct compact_control *cc,
+       TP_PROTO(unsigned int nr_migratepages,
                unsigned int nr_succeeded),
 
-       TP_ARGS(cc, nr_succeeded),
+       TP_ARGS(nr_migratepages, nr_succeeded),
 
        TP_STRUCT__entry(
                __field(unsigned long, nr_migrated)
@@ -90,7 +90,7 @@ TRACE_EVENT(mm_compaction_migratepages,
 
        TP_fast_assign(
                __entry->nr_migrated = nr_succeeded;
-               __entry->nr_failed = cc->nr_migratepages - nr_succeeded;
+               __entry->nr_failed = nr_migratepages - nr_succeeded;
        ),
 
        TP_printk("nr_migrated=%lu nr_failed=%lu",
index 58688768ef0f033b0202125b8eedb29afce0d2a4..6e62cc64cd92a647ad42aa109a5578c668d35617 100644 (file)
@@ -304,6 +304,44 @@ TRACE_EVENT(mm_page_alloc_extfrag,
                __entry->change_ownership)
 );
 
+TRACE_EVENT(mm_alloc_contig_migrate_range_info,
+
+       TP_PROTO(unsigned long start,
+                unsigned long end,
+                unsigned long nr_migrated,
+                unsigned long nr_reclaimed,
+                unsigned long nr_mapped,
+                int migratetype),
+
+       TP_ARGS(start, end, nr_migrated, nr_reclaimed, nr_mapped, migratetype),
+
+       TP_STRUCT__entry(
+               __field(unsigned long, start)
+               __field(unsigned long, end)
+               __field(unsigned long, nr_migrated)
+               __field(unsigned long, nr_reclaimed)
+               __field(unsigned long, nr_mapped)
+               __field(int, migratetype)
+       ),
+
+       TP_fast_assign(
+               __entry->start = start;
+               __entry->end = end;
+               __entry->nr_migrated = nr_migrated;
+               __entry->nr_reclaimed = nr_reclaimed;
+               __entry->nr_mapped = nr_mapped;
+               __entry->migratetype = migratetype;
+       ),
+
+       TP_printk("start=0x%lx end=0x%lx migratetype=%d nr_migrated=%lu nr_reclaimed=%lu nr_mapped=%lu",
+                 __entry->start,
+                 __entry->end,
+                 __entry->migratetype,
+                 __entry->nr_migrated,
+                 __entry->nr_reclaimed,
+                 __entry->nr_mapped)
+);
+
 /*
  * Required for uniquely and securely identifying mm in rss_stat tracepoint.
  */
index 26a11e4a2c361d21e550ea500df01ec3e10d791a..b799f3bcba8233e7e693e3363b4e576c07d3eefe 100644 (file)
@@ -7,6 +7,8 @@
 #include <linux/tracepoint.h>
 #include <trace/events/mmflags.h>
 
+#define PG_COUNT_TO_KB(x) ((x) << (PAGE_SHIFT - 10))
+
 TRACE_EVENT(oom_score_adj_update,
 
        TP_PROTO(struct task_struct *task),
@@ -72,19 +74,45 @@ TRACE_EVENT(reclaim_retry_zone,
 );
 
 TRACE_EVENT(mark_victim,
-       TP_PROTO(int pid),
+       TP_PROTO(struct task_struct *task, uid_t uid),
 
-       TP_ARGS(pid),
+       TP_ARGS(task, uid),
 
        TP_STRUCT__entry(
                __field(int, pid)
+               __string(comm, task->comm)
+               __field(unsigned long, total_vm)
+               __field(unsigned long, anon_rss)
+               __field(unsigned long, file_rss)
+               __field(unsigned long, shmem_rss)
+               __field(uid_t, uid)
+               __field(unsigned long, pgtables)
+               __field(short, oom_score_adj)
        ),
 
        TP_fast_assign(
-               __entry->pid = pid;
+               __entry->pid = task->pid;
+               __assign_str(comm, task->comm);
+               __entry->total_vm = PG_COUNT_TO_KB(task->mm->total_vm);
+               __entry->anon_rss = PG_COUNT_TO_KB(get_mm_counter(task->mm, MM_ANONPAGES));
+               __entry->file_rss = PG_COUNT_TO_KB(get_mm_counter(task->mm, MM_FILEPAGES));
+               __entry->shmem_rss = PG_COUNT_TO_KB(get_mm_counter(task->mm, MM_SHMEMPAGES));
+               __entry->uid = uid;
+               __entry->pgtables = mm_pgtables_bytes(task->mm) >> 10;
+               __entry->oom_score_adj = task->signal->oom_score_adj;
        ),
 
-       TP_printk("pid=%d", __entry->pid)
+       TP_printk("pid=%d comm=%s total-vm=%lukB anon-rss=%lukB file-rss:%lukB shmem-rss:%lukB uid=%u pgtables=%lukB oom_score_adj=%hd",
+               __entry->pid,
+               __get_str(comm),
+               __entry->total_vm,
+               __entry->anon_rss,
+               __entry->file_rss,
+               __entry->shmem_rss,
+               __entry->uid,
+               __entry->pgtables,
+               __entry->oom_score_adj
+       )
 );
 
 TRACE_EVENT(wake_reaper,
index ce6a85b82afa0cb5811db1c7a35094c8c96d6bb0..ac05ed06a0714eff56e5d01e612917286369f020 100644 (file)
@@ -639,6 +639,7 @@ TRACE_EVENT(rpc_stats_latency,
                __field(unsigned long, backlog)
                __field(unsigned long, rtt)
                __field(unsigned long, execute)
+               __field(u32, xprt_id)
        ),
 
        TP_fast_assign(
@@ -651,13 +652,16 @@ TRACE_EVENT(rpc_stats_latency,
                __entry->backlog = ktime_to_us(backlog);
                __entry->rtt = ktime_to_us(rtt);
                __entry->execute = ktime_to_us(execute);
+               __entry->xprt_id = task->tk_xprt->id;
        ),
 
        TP_printk(SUNRPC_TRACE_TASK_SPECIFIER
-                 " xid=0x%08x %sv%d %s backlog=%lu rtt=%lu execute=%lu",
+                 " xid=0x%08x %sv%d %s backlog=%lu rtt=%lu execute=%lu"
+                 " xprt_id=%d",
                __entry->task_id, __entry->client_id, __entry->xid,
                __get_str(progname), __entry->version, __get_str(procname),
-               __entry->backlog, __entry->rtt, __entry->execute)
+               __entry->backlog, __entry->rtt, __entry->execute,
+               __entry->xprt_id)
 );
 
 TRACE_EVENT(rpc_xdr_overflow,
index 64ab5dac59ce0c7ea6834e308b307e17e649dc4b..e43e745915618e53762915436cb248cba1a71005 100644 (file)
@@ -239,6 +239,7 @@ TRACE_DEFINE_ENUM(NFS4ERR_RESET_TO_PNFS);
                { EHOSTDOWN,                    "EHOSTDOWN" }, \
                { EPIPE,                        "EPIPE" }, \
                { EPFNOSUPPORT,                 "EPFNOSUPPORT" }, \
+               { EINVAL,                       "EINVAL" }, \
                { EPROTONOSUPPORT,              "EPROTONOSUPPORT" }, \
                { NFS4ERR_ACCESS,               "ACCESS" }, \
                { NFS4ERR_ATTRNOTSUPP,          "ATTRNOTSUPP" }, \
index 352cb81947b87697960486e4af4e48313e891d3d..fadb3f857f2855db40dca88333d977e6a51b4c11 100644 (file)
@@ -24,4 +24,8 @@
 #endif
 #endif
 
+#ifndef __BITS_PER_LONG_LONG
+#define __BITS_PER_LONG_LONG 64
+#endif
+
 #endif /* _UAPI__ASM_GENERIC_BITS_PER_LONG */
index 6991c4b8ab189d25f2cda92661f7da4a8a558a94..cc61cb9b3e9af86a41b50d5221a1b94f4d6591bb 100644 (file)
@@ -32,6 +32,8 @@
 #define AT_HWCAP2 26   /* extension of AT_HWCAP */
 #define AT_RSEQ_FEATURE_SIZE   27      /* rseq supported feature size */
 #define AT_RSEQ_ALIGN          28      /* rseq allocation alignment */
+#define AT_HWCAP3 29   /* extension of AT_HWCAP */
+#define AT_HWCAP4 30   /* extension of AT_HWCAP */
 
 #define AT_EXECFN  31  /* filename of program */
 
diff --git a/include/uapi/linux/bits.h b/include/uapi/linux/bits.h
new file mode 100644 (file)
index 0000000..3c2a101
--- /dev/null
@@ -0,0 +1,15 @@
+/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */
+/* bits.h: Macros for dealing with bitmasks.  */
+
+#ifndef _UAPI_LINUX_BITS_H
+#define _UAPI_LINUX_BITS_H
+
+#define __GENMASK(h, l) \
+        (((~_UL(0)) - (_UL(1) << (l)) + 1) & \
+         (~_UL(0) >> (__BITS_PER_LONG - 1 - (h))))
+
+#define __GENMASK_ULL(h, l) \
+        (((~_ULL(0)) - (_ULL(1) << (l)) + 1) & \
+         (~_ULL(0) >> (__BITS_PER_LONG_LONG - 1 - (h))))
+
+#endif /* _UAPI_LINUX_BITS_H */
index e7418d15fe3906507827025545dcd5348202ff30..d08b99d60f6fd6d0d072d01ad6bcc1b48da0a242 100644 (file)
  *  7.39
  *  - add FUSE_DIRECT_IO_ALLOW_MMAP
  *  - add FUSE_STATX and related structures
+ *
+ *  7.40
+ *  - add max_stack_depth to fuse_init_out, add FUSE_PASSTHROUGH init flag
+ *  - add backing_id to fuse_open_out, add FOPEN_PASSTHROUGH open flag
+ *  - add FUSE_NO_EXPORT_SUPPORT init flag
+ *  - add FUSE_NOTIFY_RESEND, add FUSE_HAS_RESEND init flag
  */
 
 #ifndef _LINUX_FUSE_H
 #define FUSE_KERNEL_VERSION 7
 
 /** Minor version number of this interface */
-#define FUSE_KERNEL_MINOR_VERSION 39
+#define FUSE_KERNEL_MINOR_VERSION 40
 
 /** The node ID of the root inode */
 #define FUSE_ROOT_ID 1
@@ -353,6 +359,7 @@ struct fuse_file_lock {
  * FOPEN_STREAM: the file is stream-like (no file position at all)
  * FOPEN_NOFLUSH: don't flush data cache on close (unless FUSE_WRITEBACK_CACHE)
  * FOPEN_PARALLEL_DIRECT_WRITES: Allow concurrent direct writes on the same inode
+ * FOPEN_PASSTHROUGH: passthrough read/write io for this open file
  */
 #define FOPEN_DIRECT_IO                (1 << 0)
 #define FOPEN_KEEP_CACHE       (1 << 1)
@@ -361,6 +368,7 @@ struct fuse_file_lock {
 #define FOPEN_STREAM           (1 << 4)
 #define FOPEN_NOFLUSH          (1 << 5)
 #define FOPEN_PARALLEL_DIRECT_WRITES   (1 << 6)
+#define FOPEN_PASSTHROUGH      (1 << 7)
 
 /**
  * INIT request/reply flags
@@ -410,6 +418,9 @@ struct fuse_file_lock {
  *                     symlink and mknod (single group that matches parent)
  * FUSE_HAS_EXPIRE_ONLY: kernel supports expiry-only entry invalidation
  * FUSE_DIRECT_IO_ALLOW_MMAP: allow shared mmap in FOPEN_DIRECT_IO mode.
+ * FUSE_NO_EXPORT_SUPPORT: explicitly disable export support
+ * FUSE_HAS_RESEND: kernel supports resending pending requests, and the high bit
+ *                 of the request ID indicates resend requests
  */
 #define FUSE_ASYNC_READ                (1 << 0)
 #define FUSE_POSIX_LOCKS       (1 << 1)
@@ -449,6 +460,9 @@ struct fuse_file_lock {
 #define FUSE_CREATE_SUPP_GROUP (1ULL << 34)
 #define FUSE_HAS_EXPIRE_ONLY   (1ULL << 35)
 #define FUSE_DIRECT_IO_ALLOW_MMAP (1ULL << 36)
+#define FUSE_PASSTHROUGH       (1ULL << 37)
+#define FUSE_NO_EXPORT_SUPPORT (1ULL << 38)
+#define FUSE_HAS_RESEND                (1ULL << 39)
 
 /* Obsolete alias for FUSE_DIRECT_IO_ALLOW_MMAP */
 #define FUSE_DIRECT_IO_RELAX   FUSE_DIRECT_IO_ALLOW_MMAP
@@ -635,6 +649,7 @@ enum fuse_notify_code {
        FUSE_NOTIFY_STORE = 4,
        FUSE_NOTIFY_RETRIEVE = 5,
        FUSE_NOTIFY_DELETE = 6,
+       FUSE_NOTIFY_RESEND = 7,
        FUSE_NOTIFY_CODE_MAX,
 };
 
@@ -761,7 +776,7 @@ struct fuse_create_in {
 struct fuse_open_out {
        uint64_t        fh;
        uint32_t        open_flags;
-       uint32_t        padding;
+       int32_t         backing_id;
 };
 
 struct fuse_release_in {
@@ -877,7 +892,8 @@ struct fuse_init_out {
        uint16_t        max_pages;
        uint16_t        map_alignment;
        uint32_t        flags2;
-       uint32_t        unused[7];
+       uint32_t        max_stack_depth;
+       uint32_t        unused[6];
 };
 
 #define CUSE_INIT_INFO_MAX 4096
@@ -960,6 +976,14 @@ struct fuse_fallocate_in {
        uint32_t        padding;
 };
 
+/**
+ * FUSE request unique ID flag
+ *
+ * Indicates whether this is a resend request. The receiver should handle this
+ * request accordingly.
+ */
+#define FUSE_UNIQUE_RESEND (1ULL << 63)
+
 struct fuse_in_header {
        uint32_t        len;
        uint32_t        opcode;
@@ -1049,9 +1073,18 @@ struct fuse_notify_retrieve_in {
        uint64_t        dummy4;
 };
 
+struct fuse_backing_map {
+       int32_t         fd;
+       uint32_t        flags;
+       uint64_t        padding;
+};
+
 /* Device ioctls: */
 #define FUSE_DEV_IOC_MAGIC             229
 #define FUSE_DEV_IOC_CLONE             _IOR(FUSE_DEV_IOC_MAGIC, 0, uint32_t)
+#define FUSE_DEV_IOC_BACKING_OPEN      _IOW(FUSE_DEV_IOC_MAGIC, 1, \
+                                            struct fuse_backing_map)
+#define FUSE_DEV_IOC_BACKING_CLOSE     _IOW(FUSE_DEV_IOC_MAGIC, 2, uint32_t)
 
 struct fuse_lseek_in {
        uint64_t        fh;
index c3308536482bdb2bfb1279279325faf5430a3356..2190adbe30027cec3bd88bdf7c4366cd7c424b82 100644 (file)
 
 #define KVM_API_VERSION 12
 
+/*
+ * Backwards-compatible definitions.
+ */
+#define __KVM_HAVE_GUEST_DEBUG
+
 /* for KVM_SET_USER_MEMORY_REGION */
 struct kvm_userspace_memory_region {
        __u32 slot;
@@ -85,43 +90,6 @@ struct kvm_pit_config {
 
 #define KVM_PIT_SPEAKER_DUMMY     1
 
-struct kvm_s390_skeys {
-       __u64 start_gfn;
-       __u64 count;
-       __u64 skeydata_addr;
-       __u32 flags;
-       __u32 reserved[9];
-};
-
-#define KVM_S390_CMMA_PEEK (1 << 0)
-
-/**
- * kvm_s390_cmma_log - Used for CMMA migration.
- *
- * Used both for input and output.
- *
- * @start_gfn: Guest page number to start from.
- * @count: Size of the result buffer.
- * @flags: Control operation mode via KVM_S390_CMMA_* flags
- * @remaining: Used with KVM_S390_GET_CMMA_BITS. Indicates how many dirty
- *             pages are still remaining.
- * @mask: Used with KVM_S390_SET_CMMA_BITS. Bitmap of bits to actually set
- *        in the PGSTE.
- * @values: Pointer to the values buffer.
- *
- * Used in KVM_S390_{G,S}ET_CMMA_BITS ioctls.
- */
-struct kvm_s390_cmma_log {
-       __u64 start_gfn;
-       __u32 count;
-       __u32 flags;
-       union {
-               __u64 remaining;
-               __u64 mask;
-       };
-       __u64 values;
-};
-
 struct kvm_hyperv_exit {
 #define KVM_EXIT_HYPERV_SYNIC          1
 #define KVM_EXIT_HYPERV_HCALL          2
@@ -315,11 +283,6 @@ struct kvm_run {
                        __u32 ipb;
                } s390_sieic;
                /* KVM_EXIT_S390_RESET */
-#define KVM_S390_RESET_POR       1
-#define KVM_S390_RESET_CLEAR     2
-#define KVM_S390_RESET_SUBSYSTEM 4
-#define KVM_S390_RESET_CPU_INIT  8
-#define KVM_S390_RESET_IPL       16
                __u64 s390_reset_flags;
                /* KVM_EXIT_S390_UCONTROL */
                struct {
@@ -536,43 +499,6 @@ struct kvm_translation {
        __u8  pad[5];
 };
 
-/* for KVM_S390_MEM_OP */
-struct kvm_s390_mem_op {
-       /* in */
-       __u64 gaddr;            /* the guest address */
-       __u64 flags;            /* flags */
-       __u32 size;             /* amount of bytes */
-       __u32 op;               /* type of operation */
-       __u64 buf;              /* buffer in userspace */
-       union {
-               struct {
-                       __u8 ar;        /* the access register number */
-                       __u8 key;       /* access key, ignored if flag unset */
-                       __u8 pad1[6];   /* ignored */
-                       __u64 old_addr; /* ignored if cmpxchg flag unset */
-               };
-               __u32 sida_offset; /* offset into the sida */
-               __u8 reserved[32]; /* ignored */
-       };
-};
-/* types for kvm_s390_mem_op->op */
-#define KVM_S390_MEMOP_LOGICAL_READ    0
-#define KVM_S390_MEMOP_LOGICAL_WRITE   1
-#define KVM_S390_MEMOP_SIDA_READ       2
-#define KVM_S390_MEMOP_SIDA_WRITE      3
-#define KVM_S390_MEMOP_ABSOLUTE_READ   4
-#define KVM_S390_MEMOP_ABSOLUTE_WRITE  5
-#define KVM_S390_MEMOP_ABSOLUTE_CMPXCHG        6
-
-/* flags for kvm_s390_mem_op->flags */
-#define KVM_S390_MEMOP_F_CHECK_ONLY            (1ULL << 0)
-#define KVM_S390_MEMOP_F_INJECT_EXCEPTION      (1ULL << 1)
-#define KVM_S390_MEMOP_F_SKEY_PROTECTION       (1ULL << 2)
-
-/* flags specifying extension support via KVM_CAP_S390_MEM_OP_EXTENSION */
-#define KVM_S390_MEMOP_EXTENSION_CAP_BASE      (1 << 0)
-#define KVM_S390_MEMOP_EXTENSION_CAP_CMPXCHG   (1 << 1)
-
 /* for KVM_INTERRUPT */
 struct kvm_interrupt {
        /* in */
@@ -637,124 +563,6 @@ struct kvm_mp_state {
        __u32 mp_state;
 };
 
-struct kvm_s390_psw {
-       __u64 mask;
-       __u64 addr;
-};
-
-/* valid values for type in kvm_s390_interrupt */
-#define KVM_S390_SIGP_STOP             0xfffe0000u
-#define KVM_S390_PROGRAM_INT           0xfffe0001u
-#define KVM_S390_SIGP_SET_PREFIX       0xfffe0002u
-#define KVM_S390_RESTART               0xfffe0003u
-#define KVM_S390_INT_PFAULT_INIT       0xfffe0004u
-#define KVM_S390_INT_PFAULT_DONE       0xfffe0005u
-#define KVM_S390_MCHK                  0xfffe1000u
-#define KVM_S390_INT_CLOCK_COMP                0xffff1004u
-#define KVM_S390_INT_CPU_TIMER         0xffff1005u
-#define KVM_S390_INT_VIRTIO            0xffff2603u
-#define KVM_S390_INT_SERVICE           0xffff2401u
-#define KVM_S390_INT_EMERGENCY         0xffff1201u
-#define KVM_S390_INT_EXTERNAL_CALL     0xffff1202u
-/* Anything below 0xfffe0000u is taken by INT_IO */
-#define KVM_S390_INT_IO(ai,cssid,ssid,schid)   \
-       (((schid)) |                           \
-        ((ssid) << 16) |                      \
-        ((cssid) << 18) |                     \
-        ((ai) << 26))
-#define KVM_S390_INT_IO_MIN            0x00000000u
-#define KVM_S390_INT_IO_MAX            0xfffdffffu
-#define KVM_S390_INT_IO_AI_MASK                0x04000000u
-
-
-struct kvm_s390_interrupt {
-       __u32 type;
-       __u32 parm;
-       __u64 parm64;
-};
-
-struct kvm_s390_io_info {
-       __u16 subchannel_id;
-       __u16 subchannel_nr;
-       __u32 io_int_parm;
-       __u32 io_int_word;
-};
-
-struct kvm_s390_ext_info {
-       __u32 ext_params;
-       __u32 pad;
-       __u64 ext_params2;
-};
-
-struct kvm_s390_pgm_info {
-       __u64 trans_exc_code;
-       __u64 mon_code;
-       __u64 per_address;
-       __u32 data_exc_code;
-       __u16 code;
-       __u16 mon_class_nr;
-       __u8 per_code;
-       __u8 per_atmid;
-       __u8 exc_access_id;
-       __u8 per_access_id;
-       __u8 op_access_id;
-#define KVM_S390_PGM_FLAGS_ILC_VALID   0x01
-#define KVM_S390_PGM_FLAGS_ILC_0       0x02
-#define KVM_S390_PGM_FLAGS_ILC_1       0x04
-#define KVM_S390_PGM_FLAGS_ILC_MASK    0x06
-#define KVM_S390_PGM_FLAGS_NO_REWIND   0x08
-       __u8 flags;
-       __u8 pad[2];
-};
-
-struct kvm_s390_prefix_info {
-       __u32 address;
-};
-
-struct kvm_s390_extcall_info {
-       __u16 code;
-};
-
-struct kvm_s390_emerg_info {
-       __u16 code;
-};
-
-#define KVM_S390_STOP_FLAG_STORE_STATUS        0x01
-struct kvm_s390_stop_info {
-       __u32 flags;
-};
-
-struct kvm_s390_mchk_info {
-       __u64 cr14;
-       __u64 mcic;
-       __u64 failing_storage_address;
-       __u32 ext_damage_code;
-       __u32 pad;
-       __u8 fixed_logout[16];
-};
-
-struct kvm_s390_irq {
-       __u64 type;
-       union {
-               struct kvm_s390_io_info io;
-               struct kvm_s390_ext_info ext;
-               struct kvm_s390_pgm_info pgm;
-               struct kvm_s390_emerg_info emerg;
-               struct kvm_s390_extcall_info extcall;
-               struct kvm_s390_prefix_info prefix;
-               struct kvm_s390_stop_info stop;
-               struct kvm_s390_mchk_info mchk;
-               char reserved[64];
-       } u;
-};
-
-struct kvm_s390_irq_state {
-       __u64 buf;
-       __u32 flags;        /* will stay unused for compatibility reasons */
-       __u32 len;
-       __u32 reserved[4];  /* will stay unused for compatibility reasons */
-};
-
 /* for KVM_SET_GUEST_DEBUG */
 
 #define KVM_GUESTDBG_ENABLE            0x00000001
@@ -810,50 +618,6 @@ struct kvm_enable_cap {
        __u8  pad[64];
 };
 
-/* for KVM_PPC_GET_PVINFO */
-
-#define KVM_PPC_PVINFO_FLAGS_EV_IDLE   (1<<0)
-
-struct kvm_ppc_pvinfo {
-       /* out */
-       __u32 flags;
-       __u32 hcall[4];
-       __u8  pad[108];
-};
-
-/* for KVM_PPC_GET_SMMU_INFO */
-#define KVM_PPC_PAGE_SIZES_MAX_SZ      8
-
-struct kvm_ppc_one_page_size {
-       __u32 page_shift;       /* Page shift (or 0) */
-       __u32 pte_enc;          /* Encoding in the HPTE (>>12) */
-};
-
-struct kvm_ppc_one_seg_page_size {
-       __u32 page_shift;       /* Base page shift of segment (or 0) */
-       __u32 slb_enc;          /* SLB encoding for BookS */
-       struct kvm_ppc_one_page_size enc[KVM_PPC_PAGE_SIZES_MAX_SZ];
-};
-
-#define KVM_PPC_PAGE_SIZES_REAL                0x00000001
-#define KVM_PPC_1T_SEGMENTS            0x00000002
-#define KVM_PPC_NO_HASH                        0x00000004
-
-struct kvm_ppc_smmu_info {
-       __u64 flags;
-       __u32 slb_size;
-       __u16 data_keys;        /* # storage keys supported for data */
-       __u16 instr_keys;       /* # storage keys supported for instructions */
-       struct kvm_ppc_one_seg_page_size sps[KVM_PPC_PAGE_SIZES_MAX_SZ];
-};
-
-/* for KVM_PPC_RESIZE_HPT_{PREPARE,COMMIT} */
-struct kvm_ppc_resize_hpt {
-       __u64 flags;
-       __u32 shift;
-       __u32 pad;
-};
-
 #define KVMIO 0xAE
 
 /* machine type bits, to be used as argument to KVM_CREATE_VM */
@@ -923,9 +687,7 @@ struct kvm_ppc_resize_hpt {
 /* Bug in KVM_SET_USER_MEMORY_REGION fixed: */
 #define KVM_CAP_DESTROY_MEMORY_REGION_WORKS 21
 #define KVM_CAP_USER_NMI 22
-#ifdef __KVM_HAVE_GUEST_DEBUG
 #define KVM_CAP_SET_GUEST_DEBUG 23
-#endif
 #ifdef __KVM_HAVE_PIT
 #define KVM_CAP_REINJECT_CONTROL 24
 #endif
@@ -1156,8 +918,6 @@ struct kvm_ppc_resize_hpt {
 #define KVM_CAP_GUEST_MEMFD 234
 #define KVM_CAP_VM_TYPES 235
 
-#ifdef KVM_CAP_IRQ_ROUTING
-
 struct kvm_irq_routing_irqchip {
        __u32 irqchip;
        __u32 pin;
@@ -1222,42 +982,6 @@ struct kvm_irq_routing {
        struct kvm_irq_routing_entry entries[];
 };
 
-#endif
-
-#ifdef KVM_CAP_MCE
-/* x86 MCE */
-struct kvm_x86_mce {
-       __u64 status;
-       __u64 addr;
-       __u64 misc;
-       __u64 mcg_status;
-       __u8 bank;
-       __u8 pad1[7];
-       __u64 pad2[3];
-};
-#endif
-
-#ifdef KVM_CAP_XEN_HVM
-#define KVM_XEN_HVM_CONFIG_HYPERCALL_MSR       (1 << 0)
-#define KVM_XEN_HVM_CONFIG_INTERCEPT_HCALL     (1 << 1)
-#define KVM_XEN_HVM_CONFIG_SHARED_INFO         (1 << 2)
-#define KVM_XEN_HVM_CONFIG_RUNSTATE            (1 << 3)
-#define KVM_XEN_HVM_CONFIG_EVTCHN_2LEVEL       (1 << 4)
-#define KVM_XEN_HVM_CONFIG_EVTCHN_SEND         (1 << 5)
-#define KVM_XEN_HVM_CONFIG_RUNSTATE_UPDATE_FLAG        (1 << 6)
-#define KVM_XEN_HVM_CONFIG_PVCLOCK_TSC_UNSTABLE        (1 << 7)
-
-struct kvm_xen_hvm_config {
-       __u32 flags;
-       __u32 msr;
-       __u64 blob_addr_32;
-       __u64 blob_addr_64;
-       __u8 blob_size_32;
-       __u8 blob_size_64;
-       __u8 pad2[30];
-};
-#endif
-
 #define KVM_IRQFD_FLAG_DEASSIGN (1 << 0)
 /*
  * Available with KVM_CAP_IRQFD_RESAMPLE
@@ -1442,11 +1166,6 @@ struct kvm_vfio_spapr_tce {
                                         struct kvm_userspace_memory_region2)
 
 /* enable ucontrol for s390 */
-struct kvm_s390_ucas_mapping {
-       __u64 user_addr;
-       __u64 vcpu_addr;
-       __u64 length;
-};
 #define KVM_S390_UCAS_MAP        _IOW(KVMIO, 0x50, struct kvm_s390_ucas_mapping)
 #define KVM_S390_UCAS_UNMAP      _IOW(KVMIO, 0x51, struct kvm_s390_ucas_mapping)
 #define KVM_S390_VCPU_FAULT     _IOW(KVMIO, 0x52, unsigned long)
@@ -1641,89 +1360,6 @@ struct kvm_enc_region {
 #define KVM_S390_NORMAL_RESET  _IO(KVMIO,   0xc3)
 #define KVM_S390_CLEAR_RESET   _IO(KVMIO,   0xc4)
 
-struct kvm_s390_pv_sec_parm {
-       __u64 origin;
-       __u64 length;
-};
-
-struct kvm_s390_pv_unp {
-       __u64 addr;
-       __u64 size;
-       __u64 tweak;
-};
-
-enum pv_cmd_dmp_id {
-       KVM_PV_DUMP_INIT,
-       KVM_PV_DUMP_CONFIG_STOR_STATE,
-       KVM_PV_DUMP_COMPLETE,
-       KVM_PV_DUMP_CPU,
-};
-
-struct kvm_s390_pv_dmp {
-       __u64 subcmd;
-       __u64 buff_addr;
-       __u64 buff_len;
-       __u64 gaddr;            /* For dump storage state */
-       __u64 reserved[4];
-};
-
-enum pv_cmd_info_id {
-       KVM_PV_INFO_VM,
-       KVM_PV_INFO_DUMP,
-};
-
-struct kvm_s390_pv_info_dump {
-       __u64 dump_cpu_buffer_len;
-       __u64 dump_config_mem_buffer_per_1m;
-       __u64 dump_config_finalize_len;
-};
-
-struct kvm_s390_pv_info_vm {
-       __u64 inst_calls_list[4];
-       __u64 max_cpus;
-       __u64 max_guests;
-       __u64 max_guest_addr;
-       __u64 feature_indication;
-};
-
-struct kvm_s390_pv_info_header {
-       __u32 id;
-       __u32 len_max;
-       __u32 len_written;
-       __u32 reserved;
-};
-
-struct kvm_s390_pv_info {
-       struct kvm_s390_pv_info_header header;
-       union {
-               struct kvm_s390_pv_info_dump dump;
-               struct kvm_s390_pv_info_vm vm;
-       };
-};
-
-enum pv_cmd_id {
-       KVM_PV_ENABLE,
-       KVM_PV_DISABLE,
-       KVM_PV_SET_SEC_PARMS,
-       KVM_PV_UNPACK,
-       KVM_PV_VERIFY,
-       KVM_PV_PREP_RESET,
-       KVM_PV_UNSHARE_ALL,
-       KVM_PV_INFO,
-       KVM_PV_DUMP,
-       KVM_PV_ASYNC_CLEANUP_PREPARE,
-       KVM_PV_ASYNC_CLEANUP_PERFORM,
-};
-
-struct kvm_pv_cmd {
-       __u32 cmd;      /* Command to be executed */
-       __u16 rc;       /* Ultravisor return code */
-       __u16 rrc;      /* Ultravisor return reason code */
-       __u64 data;     /* Data or address */
-       __u32 flags;    /* flags for future extensions. Must be 0 for now */
-       __u32 reserved[3];
-};
-
 /* Available with KVM_CAP_S390_PROTECTED */
 #define KVM_S390_PV_COMMAND            _IOWR(KVMIO, 0xc5, struct kvm_pv_cmd)
 
@@ -1737,58 +1373,6 @@ struct kvm_pv_cmd {
 #define KVM_XEN_HVM_GET_ATTR   _IOWR(KVMIO, 0xc8, struct kvm_xen_hvm_attr)
 #define KVM_XEN_HVM_SET_ATTR   _IOW(KVMIO,  0xc9, struct kvm_xen_hvm_attr)
 
-struct kvm_xen_hvm_attr {
-       __u16 type;
-       __u16 pad[3];
-       union {
-               __u8 long_mode;
-               __u8 vector;
-               __u8 runstate_update_flag;
-               struct {
-                       __u64 gfn;
-#define KVM_XEN_INVALID_GFN ((__u64)-1)
-               } shared_info;
-               struct {
-                       __u32 send_port;
-                       __u32 type; /* EVTCHNSTAT_ipi / EVTCHNSTAT_interdomain */
-                       __u32 flags;
-#define KVM_XEN_EVTCHN_DEASSIGN                (1 << 0)
-#define KVM_XEN_EVTCHN_UPDATE          (1 << 1)
-#define KVM_XEN_EVTCHN_RESET           (1 << 2)
-                       /*
-                        * Events sent by the guest are either looped back to
-                        * the guest itself (potentially on a different port#)
-                        * or signalled via an eventfd.
-                        */
-                       union {
-                               struct {
-                                       __u32 port;
-                                       __u32 vcpu;
-                                       __u32 priority;
-                               } port;
-                               struct {
-                                       __u32 port; /* Zero for eventfd */
-                                       __s32 fd;
-                               } eventfd;
-                               __u32 padding[4];
-                       } deliver;
-               } evtchn;
-               __u32 xen_version;
-               __u64 pad[8];
-       } u;
-};
-
-
-/* Available with KVM_CAP_XEN_HVM / KVM_XEN_HVM_CONFIG_SHARED_INFO */
-#define KVM_XEN_ATTR_TYPE_LONG_MODE            0x0
-#define KVM_XEN_ATTR_TYPE_SHARED_INFO          0x1
-#define KVM_XEN_ATTR_TYPE_UPCALL_VECTOR                0x2
-/* Available with KVM_CAP_XEN_HVM / KVM_XEN_HVM_CONFIG_EVTCHN_SEND */
-#define KVM_XEN_ATTR_TYPE_EVTCHN               0x3
-#define KVM_XEN_ATTR_TYPE_XEN_VERSION          0x4
-/* Available with KVM_CAP_XEN_HVM / KVM_XEN_HVM_CONFIG_RUNSTATE_UPDATE_FLAG */
-#define KVM_XEN_ATTR_TYPE_RUNSTATE_UPDATE_FLAG 0x5
-
 /* Per-vCPU Xen attributes */
 #define KVM_XEN_VCPU_GET_ATTR  _IOWR(KVMIO, 0xca, struct kvm_xen_vcpu_attr)
 #define KVM_XEN_VCPU_SET_ATTR  _IOW(KVMIO,  0xcb, struct kvm_xen_vcpu_attr)
@@ -1799,242 +1383,6 @@ struct kvm_xen_hvm_attr {
 #define KVM_GET_SREGS2             _IOR(KVMIO,  0xcc, struct kvm_sregs2)
 #define KVM_SET_SREGS2             _IOW(KVMIO,  0xcd, struct kvm_sregs2)
 
-struct kvm_xen_vcpu_attr {
-       __u16 type;
-       __u16 pad[3];
-       union {
-               __u64 gpa;
-#define KVM_XEN_INVALID_GPA ((__u64)-1)
-               __u64 pad[8];
-               struct {
-                       __u64 state;
-                       __u64 state_entry_time;
-                       __u64 time_running;
-                       __u64 time_runnable;
-                       __u64 time_blocked;
-                       __u64 time_offline;
-               } runstate;
-               __u32 vcpu_id;
-               struct {
-                       __u32 port;
-                       __u32 priority;
-                       __u64 expires_ns;
-               } timer;
-               __u8 vector;
-       } u;
-};
-
-/* Available with KVM_CAP_XEN_HVM / KVM_XEN_HVM_CONFIG_SHARED_INFO */
-#define KVM_XEN_VCPU_ATTR_TYPE_VCPU_INFO       0x0
-#define KVM_XEN_VCPU_ATTR_TYPE_VCPU_TIME_INFO  0x1
-#define KVM_XEN_VCPU_ATTR_TYPE_RUNSTATE_ADDR   0x2
-#define KVM_XEN_VCPU_ATTR_TYPE_RUNSTATE_CURRENT        0x3
-#define KVM_XEN_VCPU_ATTR_TYPE_RUNSTATE_DATA   0x4
-#define KVM_XEN_VCPU_ATTR_TYPE_RUNSTATE_ADJUST 0x5
-/* Available with KVM_CAP_XEN_HVM / KVM_XEN_HVM_CONFIG_EVTCHN_SEND */
-#define KVM_XEN_VCPU_ATTR_TYPE_VCPU_ID         0x6
-#define KVM_XEN_VCPU_ATTR_TYPE_TIMER           0x7
-#define KVM_XEN_VCPU_ATTR_TYPE_UPCALL_VECTOR   0x8
-
-/* Secure Encrypted Virtualization command */
-enum sev_cmd_id {
-       /* Guest initialization commands */
-       KVM_SEV_INIT = 0,
-       KVM_SEV_ES_INIT,
-       /* Guest launch commands */
-       KVM_SEV_LAUNCH_START,
-       KVM_SEV_LAUNCH_UPDATE_DATA,
-       KVM_SEV_LAUNCH_UPDATE_VMSA,
-       KVM_SEV_LAUNCH_SECRET,
-       KVM_SEV_LAUNCH_MEASURE,
-       KVM_SEV_LAUNCH_FINISH,
-       /* Guest migration commands (outgoing) */
-       KVM_SEV_SEND_START,
-       KVM_SEV_SEND_UPDATE_DATA,
-       KVM_SEV_SEND_UPDATE_VMSA,
-       KVM_SEV_SEND_FINISH,
-       /* Guest migration commands (incoming) */
-       KVM_SEV_RECEIVE_START,
-       KVM_SEV_RECEIVE_UPDATE_DATA,
-       KVM_SEV_RECEIVE_UPDATE_VMSA,
-       KVM_SEV_RECEIVE_FINISH,
-       /* Guest status and debug commands */
-       KVM_SEV_GUEST_STATUS,
-       KVM_SEV_DBG_DECRYPT,
-       KVM_SEV_DBG_ENCRYPT,
-       /* Guest certificates commands */
-       KVM_SEV_CERT_EXPORT,
-       /* Attestation report */
-       KVM_SEV_GET_ATTESTATION_REPORT,
-       /* Guest Migration Extension */
-       KVM_SEV_SEND_CANCEL,
-
-       KVM_SEV_NR_MAX,
-};
-
-struct kvm_sev_cmd {
-       __u32 id;
-       __u64 data;
-       __u32 error;
-       __u32 sev_fd;
-};
-
-struct kvm_sev_launch_start {
-       __u32 handle;
-       __u32 policy;
-       __u64 dh_uaddr;
-       __u32 dh_len;
-       __u64 session_uaddr;
-       __u32 session_len;
-};
-
-struct kvm_sev_launch_update_data {
-       __u64 uaddr;
-       __u32 len;
-};
-
-
-struct kvm_sev_launch_secret {
-       __u64 hdr_uaddr;
-       __u32 hdr_len;
-       __u64 guest_uaddr;
-       __u32 guest_len;
-       __u64 trans_uaddr;
-       __u32 trans_len;
-};
-
-struct kvm_sev_launch_measure {
-       __u64 uaddr;
-       __u32 len;
-};
-
-struct kvm_sev_guest_status {
-       __u32 handle;
-       __u32 policy;
-       __u32 state;
-};
-
-struct kvm_sev_dbg {
-       __u64 src_uaddr;
-       __u64 dst_uaddr;
-       __u32 len;
-};
-
-struct kvm_sev_attestation_report {
-       __u8 mnonce[16];
-       __u64 uaddr;
-       __u32 len;
-};
-
-struct kvm_sev_send_start {
-       __u32 policy;
-       __u64 pdh_cert_uaddr;
-       __u32 pdh_cert_len;
-       __u64 plat_certs_uaddr;
-       __u32 plat_certs_len;
-       __u64 amd_certs_uaddr;
-       __u32 amd_certs_len;
-       __u64 session_uaddr;
-       __u32 session_len;
-};
-
-struct kvm_sev_send_update_data {
-       __u64 hdr_uaddr;
-       __u32 hdr_len;
-       __u64 guest_uaddr;
-       __u32 guest_len;
-       __u64 trans_uaddr;
-       __u32 trans_len;
-};
-
-struct kvm_sev_receive_start {
-       __u32 handle;
-       __u32 policy;
-       __u64 pdh_uaddr;
-       __u32 pdh_len;
-       __u64 session_uaddr;
-       __u32 session_len;
-};
-
-struct kvm_sev_receive_update_data {
-       __u64 hdr_uaddr;
-       __u32 hdr_len;
-       __u64 guest_uaddr;
-       __u32 guest_len;
-       __u64 trans_uaddr;
-       __u32 trans_len;
-};
-
-#define KVM_DEV_ASSIGN_ENABLE_IOMMU    (1 << 0)
-#define KVM_DEV_ASSIGN_PCI_2_3         (1 << 1)
-#define KVM_DEV_ASSIGN_MASK_INTX       (1 << 2)
-
-struct kvm_assigned_pci_dev {
-       __u32 assigned_dev_id;
-       __u32 busnr;
-       __u32 devfn;
-       __u32 flags;
-       __u32 segnr;
-       union {
-               __u32 reserved[11];
-       };
-};
-
-#define KVM_DEV_IRQ_HOST_INTX    (1 << 0)
-#define KVM_DEV_IRQ_HOST_MSI     (1 << 1)
-#define KVM_DEV_IRQ_HOST_MSIX    (1 << 2)
-
-#define KVM_DEV_IRQ_GUEST_INTX   (1 << 8)
-#define KVM_DEV_IRQ_GUEST_MSI    (1 << 9)
-#define KVM_DEV_IRQ_GUEST_MSIX   (1 << 10)
-
-#define KVM_DEV_IRQ_HOST_MASK   0x00ff
-#define KVM_DEV_IRQ_GUEST_MASK   0xff00
-
-struct kvm_assigned_irq {
-       __u32 assigned_dev_id;
-       __u32 host_irq; /* ignored (legacy field) */
-       __u32 guest_irq;
-       __u32 flags;
-       union {
-               __u32 reserved[12];
-       };
-};
-
-struct kvm_assigned_msix_nr {
-       __u32 assigned_dev_id;
-       __u16 entry_nr;
-       __u16 padding;
-};
-
-#define KVM_MAX_MSIX_PER_DEV           256
-struct kvm_assigned_msix_entry {
-       __u32 assigned_dev_id;
-       __u32 gsi;
-       __u16 entry; /* The index of entry in the MSI-X table */
-       __u16 padding[3];
-};
-
-#define KVM_X2APIC_API_USE_32BIT_IDS            (1ULL << 0)
-#define KVM_X2APIC_API_DISABLE_BROADCAST_QUIRK  (1ULL << 1)
-
-/* Available with KVM_CAP_ARM_USER_IRQ */
-
-/* Bits for run->s.regs.device_irq_level */
-#define KVM_ARM_DEV_EL1_VTIMER         (1 << 0)
-#define KVM_ARM_DEV_EL1_PTIMER         (1 << 1)
-#define KVM_ARM_DEV_PMU                        (1 << 2)
-
-struct kvm_hyperv_eventfd {
-       __u32 conn_id;
-       __s32 fd;
-       __u32 flags;
-       __u32 padding[3];
-};
-
-#define KVM_HYPERV_CONN_ID_MASK                0x00ffffff
-#define KVM_HYPERV_EVENTFD_DEASSIGN    (1 << 0)
-
 #define KVM_DIRTY_LOG_MANUAL_PROTECT_ENABLE    (1 << 0)
 #define KVM_DIRTY_LOG_INITIALLY_SET            (1 << 1)
 
@@ -2180,33 +1528,6 @@ struct kvm_stats_desc {
 /* Available with KVM_CAP_S390_ZPCI_OP */
 #define KVM_S390_ZPCI_OP         _IOW(KVMIO,  0xd1, struct kvm_s390_zpci_op)
 
-struct kvm_s390_zpci_op {
-       /* in */
-       __u32 fh;               /* target device */
-       __u8  op;               /* operation to perform */
-       __u8  pad[3];
-       union {
-               /* for KVM_S390_ZPCIOP_REG_AEN */
-               struct {
-                       __u64 ibv;      /* Guest addr of interrupt bit vector */
-                       __u64 sb;       /* Guest addr of summary bit */
-                       __u32 flags;
-                       __u32 noi;      /* Number of interrupts */
-                       __u8 isc;       /* Guest interrupt subclass */
-                       __u8 sbo;       /* Offset of guest summary bit vector */
-                       __u16 pad;
-               } reg_aen;
-               __u64 reserved[8];
-       } u;
-};
-
-/* types for kvm_s390_zpci_op->op */
-#define KVM_S390_ZPCIOP_REG_AEN                0
-#define KVM_S390_ZPCIOP_DEREG_AEN      1
-
-/* flags for kvm_s390_zpci_op->u.reg_aen.flags */
-#define KVM_S390_ZPCIOP_REGAEN_HOST    (1 << 0)
-
 /* Available with KVM_CAP_MEMORY_ATTRIBUTES */
 #define KVM_SET_MEMORY_ATTRIBUTES              _IOW(KVMIO,  0xd2, struct kvm_memory_attributes)
 
index a8963f7ef4c279de55b68930a4f67a93da96044b..1f9bb10d1a473f553f328d5b5a6747c687b7931f 100644 (file)
@@ -23,6 +23,7 @@ enum {
        MPOL_INTERLEAVE,
        MPOL_LOCAL,
        MPOL_PREFERRED_MANY,
+       MPOL_WEIGHTED_INTERLEAVE,
        MPOL_MAX,       /* always last member of enum */
 };
 
index 730673ecc63d0a5ca3f7d5b4db1a93ce93737374..6eeaf8bf236209fce3ccd1123e78d562b75c4a9c 100644 (file)
 /**
  * enum rkisp1_cif_isp_version - ISP variants
  *
- * @RKISP1_V10: used at least in rk3288 and rk3399
- * @RKISP1_V11: declared in the original vendor code, but not used
- * @RKISP1_V12: used at least in rk3326 and px30
- * @RKISP1_V13: used at least in rk1808
+ * @RKISP1_V10: Used at least in RK3288 and RK3399.
+ * @RKISP1_V11: Declared in the original vendor code, but not used. Same number
+ *     of entries in grids and histogram as v10.
+ * @RKISP1_V12: Used at least in RK3326 and PX30.
+ * @RKISP1_V13: Used at least in RK1808. Same number of entries in grids and
+ *     histogram as v12.
+ * @RKISP1_V_IMX8MP: Used in at least i.MX8MP. Same number of entries in grids
+ *     and histogram as v10.
  */
 enum rkisp1_cif_isp_version {
        RKISP1_V10 = 10,
        RKISP1_V11,
        RKISP1_V12,
        RKISP1_V13,
+       RKISP1_V_IMX8MP,
 };
 
 enum rkisp1_cif_isp_histogram_mode {
@@ -584,10 +589,9 @@ enum rkisp1_cif_isp_goc_mode {
  * as is reported by the hw_revision field of the struct media_device_info
  * that is returned by ioctl MEDIA_IOC_DEVICE_INFO.
  *
- * Versions <= V11 have RKISP1_CIF_ISP_GAMMA_OUT_MAX_SAMPLES_V10
- * entries, versions >= V12 have RKISP1_CIF_ISP_GAMMA_OUT_MAX_SAMPLES_V12
- * entries. RKISP1_CIF_ISP_GAMMA_OUT_MAX_SAMPLES is equal to the maximum
- * of the two.
+ * V10 has RKISP1_CIF_ISP_GAMMA_OUT_MAX_SAMPLES_V10 entries, V12 has
+ * RKISP1_CIF_ISP_GAMMA_OUT_MAX_SAMPLES_V12 entries.
+ * RKISP1_CIF_ISP_GAMMA_OUT_MAX_SAMPLES is equal to the maximum of the two.
  */
 struct rkisp1_cif_isp_goc_config {
        __u32 mode;
@@ -607,10 +611,10 @@ struct rkisp1_cif_isp_goc_config {
  * as is reported by the hw_revision field of the struct media_device_info
  * that is returned by ioctl MEDIA_IOC_DEVICE_INFO.
  *
- * Versions <= V11 have RKISP1_CIF_ISP_HISTOGRAM_WEIGHT_GRIDS_SIZE_V10
- * entries, versions >= V12 have RKISP1_CIF_ISP_HISTOGRAM_WEIGHT_GRIDS_SIZE_V12
- * entries. RKISP1_CIF_ISP_HISTOGRAM_WEIGHT_GRIDS_SIZE is equal to the maximum
- * of the two.
+ * V10 has RKISP1_CIF_ISP_HISTOGRAM_WEIGHT_GRIDS_SIZE_V10 entries, V12 has
+ * RKISP1_CIF_ISP_HISTOGRAM_WEIGHT_GRIDS_SIZE_V12 entries.
+ * RKISP1_CIF_ISP_HISTOGRAM_WEIGHT_GRIDS_SIZE is equal to the maximum of the
+ * two.
  */
 struct rkisp1_cif_isp_hst_config {
        __u32 mode;
@@ -902,9 +906,9 @@ struct rkisp1_cif_isp_bls_meas_val {
  * as is reported by the hw_revision field of the struct media_device_info
  * that is returned by ioctl MEDIA_IOC_DEVICE_INFO.
  *
- * Versions <= V11 have RKISP1_CIF_ISP_AE_MEAN_MAX_V10 entries,
- * versions >= V12 have RKISP1_CIF_ISP_AE_MEAN_MAX_V12 entries.
- * RKISP1_CIF_ISP_AE_MEAN_MAX is equal to the maximum of the two.
+ * V10 has RKISP1_CIF_ISP_AE_MEAN_MAX_V10 entries, V12 has
+ * RKISP1_CIF_ISP_AE_MEAN_MAX_V12 entries. RKISP1_CIF_ISP_AE_MEAN_MAX is equal
+ * to the maximum of the two.
  *
  * Image is divided into 5x5 blocks on V10 and 9x9 blocks on V12.
  */
@@ -944,21 +948,21 @@ struct rkisp1_cif_isp_af_stat {
  *            integer part.
  *
  * The window of the measurements area is divided to 5x5 sub-windows for
- * V10/V11 and to 9x9 sub-windows for V12. The histogram is then computed for
- * each sub-window independently and the final result is a weighted average of
- * the histogram measurements on all sub-windows. The window of the
- * measurements area and the weight of each sub-window are configurable using
+ * V10 and to 9x9 sub-windows for V12. The histogram is then computed for each
+ * sub-window independently and the final result is a weighted average of the
+ * histogram measurements on all sub-windows. The window of the measurements
+ * area and the weight of each sub-window are configurable using
  * struct @rkisp1_cif_isp_hst_config.
  *
- * The histogram contains 16 bins in V10/V11 and 32 bins in V12/V13.
+ * The histogram contains 16 bins in V10 and 32 bins in V12.
  *
  * The number of entries of @hist_bins depends on the hardware revision
  * as is reported by the hw_revision field of the struct media_device_info
  * that is returned by ioctl MEDIA_IOC_DEVICE_INFO.
  *
- * Versions <= V11 have RKISP1_CIF_ISP_HIST_BIN_N_MAX_V10 entries,
- * versions >= V12 have RKISP1_CIF_ISP_HIST_BIN_N_MAX_V12 entries.
- * RKISP1_CIF_ISP_HIST_BIN_N_MAX is equal to the maximum of the two.
+ * V10 has RKISP1_CIF_ISP_HIST_BIN_N_MAX_V10 entries, V12 has
+ * RKISP1_CIF_ISP_HIST_BIN_N_MAX_V12 entries. RKISP1_CIF_ISP_HIST_BIN_N_MAX is
+ * equal to the maximum of the two.
  */
 struct rkisp1_cif_isp_hist_stat {
        __u32 hist_bins[RKISP1_CIF_ISP_HIST_BIN_N_MAX];
index 68e7ac178cc21e5d49c46ba9c16d62d0bf02604b..a8015e5e7fa4daaeb6cb63477e431029632e4d1f 100644 (file)
@@ -1041,13 +1041,13 @@ struct v4l2_requestbuffers {
  * struct v4l2_plane - plane info for multi-planar buffers
  * @bytesused:         number of bytes occupied by data in the plane (payload)
  * @length:            size of this plane (NOT the payload) in bytes
- * @mem_offset:                when memory in the associated struct v4l2_buffer is
+ * @m.mem_offset:      when memory in the associated struct v4l2_buffer is
  *                     V4L2_MEMORY_MMAP, equals the offset from the start of
  *                     the device memory for this plane (or is a "cookie" that
  *                     should be passed to mmap() called on the video node)
- * @userptr:           when memory is V4L2_MEMORY_USERPTR, a userspace pointer
+ * @m.userptr:         when memory is V4L2_MEMORY_USERPTR, a userspace pointer
  *                     pointing to this plane
- * @fd:                        when memory is V4L2_MEMORY_DMABUF, a userspace file
+ * @m.fd:              when memory is V4L2_MEMORY_DMABUF, a userspace file
  *                     descriptor associated with this plane
  * @m:                 union of @mem_offset, @userptr and @fd
  * @data_offset:       offset in the plane to the start of data; usually 0,
@@ -1085,14 +1085,14 @@ struct v4l2_plane {
  * @sequence:  sequence count of this frame
  * @memory:    enum v4l2_memory; the method, in which the actual video data is
  *             passed
- * @offset:    for non-multiplanar buffers with memory == V4L2_MEMORY_MMAP;
+ * @m.offset:  for non-multiplanar buffers with memory == V4L2_MEMORY_MMAP;
  *             offset from the start of the device memory for this plane,
  *             (or a "cookie" that should be passed to mmap() as offset)
- * @userptr:   for non-multiplanar buffers with memory == V4L2_MEMORY_USERPTR;
+ * @m.userptr: for non-multiplanar buffers with memory == V4L2_MEMORY_USERPTR;
  *             a userspace pointer pointing to this buffer
- * @fd:                for non-multiplanar buffers with memory == V4L2_MEMORY_DMABUF;
+ * @m.fd:              for non-multiplanar buffers with memory == V4L2_MEMORY_DMABUF;
  *             a userspace file descriptor associated with this buffer
- * @planes:    for multiplanar buffers; userspace pointer to the array of plane
+ * @m.planes:  for multiplanar buffers; userspace pointer to the array of plane
  *             info structs for this buffer
  * @m:         union of @offset, @userptr, @planes and @fd
  * @length:    size in bytes of the buffer (NOT its payload) for single-plane
@@ -2423,15 +2423,15 @@ struct v4l2_meta_format {
 
 /**
  * struct v4l2_format - stream data format
- * @type:      enum v4l2_buf_type; type of the data stream
- * @pix:       definition of an image format
- * @pix_mp:    definition of a multiplanar image format
- * @win:       definition of an overlaid image
- * @vbi:       raw VBI capture or output parameters
- * @sliced:    sliced VBI capture or output parameters
- * @raw_data:  placeholder for future extensions and custom formats
- * @fmt:       union of @pix, @pix_mp, @win, @vbi, @sliced, @sdr, @meta
- *             and @raw_data
+ * @type:              enum v4l2_buf_type; type of the data stream
+ * @fmt.pix:           definition of an image format
+ * @fmt.pix_mp:                definition of a multiplanar image format
+ * @fmt.win:           definition of an overlaid image
+ * @fmt.vbi:           raw VBI capture or output parameters
+ * @fmt.sliced:                sliced VBI capture or output parameters
+ * @fmt.raw_data:      placeholder for future extensions and custom formats
+ * @fmt:               union of @pix, @pix_mp, @win, @vbi, @sliced, @sdr,
+ *                     @meta and @raw_data
  */
 struct v4l2_format {
        __u32    type;
index 8e2bce9a4f211de61b70cf204ee13b034cefbea8..cb2afcebbdf5147c9f5f6c36f25f13084d3102a9 100644 (file)
@@ -914,6 +914,8 @@ enum ufshcd_mcq_opr {
  * @dev_cmd_queue: Queue for issuing device management commands
  * @mcq_opr: MCQ operation and runtime registers
  * @ufs_rtc_update_work: A work for UFS RTC periodic update
+ * @pm_qos_req: PM QoS request handle
+ * @pm_qos_enabled: flag to check if pm qos is enabled
  */
 struct ufs_hba {
        void __iomem *mmio_base;
@@ -1080,6 +1082,8 @@ struct ufs_hba {
        struct ufshcd_mcq_opr_info_t mcq_opr[OPR_MAX];
 
        struct delayed_work ufs_rtc_update_work;
+       struct pm_qos_request pm_qos_req;
+       bool pm_qos_enabled;
 };
 
 /**
@@ -1263,6 +1267,7 @@ unsigned long ufshcd_mcq_poll_cqe_lock(struct ufs_hba *hba,
                                         struct ufs_hw_queue *hwq);
 void ufshcd_mcq_make_queues_operational(struct ufs_hba *hba);
 void ufshcd_mcq_enable_esi(struct ufs_hba *hba);
+void ufshcd_mcq_enable(struct ufs_hba *hba);
 void ufshcd_mcq_config_esi(struct ufs_hba *hba, struct msi_msg *msg);
 
 int ufshcd_opp_config_clks(struct device *dev, struct opp_table *opp_table,
@@ -1400,6 +1405,8 @@ int ufshcd_suspend_prepare(struct device *dev);
 int __ufshcd_suspend_prepare(struct device *dev, bool rpm_ok_for_spm);
 void ufshcd_resume_complete(struct device *dev);
 bool ufshcd_is_hba_active(struct ufs_hba *hba);
+void ufshcd_pm_qos_init(struct ufs_hba *hba);
+void ufshcd_pm_qos_exit(struct ufs_hba *hba);
 
 /* Wrapper functions for safely calling variant operations */
 static inline int ufshcd_vops_init(struct ufs_hba *hba)
index d5accacae6bca758d75aa2d13a79053c569fef73..a196e1c4c3bb0596b6d3d5070d32aa9e84c3b870 100644 (file)
@@ -282,6 +282,9 @@ enum {
 /* UTMRLRSR - UTP Task Management Request Run-Stop Register 80h */
 #define UTP_TASK_REQ_LIST_RUN_STOP_BIT         0x1
 
+/* REG_UFS_MEM_CFG - Global Config Registers 300h */
+#define MCQ_MODE_SELECT        BIT(0)
+
 /* CQISy - CQ y Interrupt Status Register  */
 #define UFSHCD_MCQ_CQIS_TAIL_ENT_PUSH_STS      0x1
 
index 01dbd0e8150180c2dd50d449adf7ad5d3ac0c8d0..da79760b8be3a67378deb49a8ae8588bc19ddce0 100644 (file)
@@ -643,7 +643,7 @@ void __weak __init free_initrd_mem(unsigned long start, unsigned long end)
                        "initrd");
 }
 
-#ifdef CONFIG_KEXEC_CORE
+#ifdef CONFIG_CRASH_RESERVE
 static bool __init kexec_free_initrd(void)
 {
        unsigned long crashk_start = (unsigned long)__va(crashk_res.start);
index d002f30f7f24c99e124149cea43f613c15818fbe..2ca52474d0c3032e44ae410add7b2430218f9c41 100644 (file)
@@ -88,6 +88,7 @@
 #include <linux/sched/task_stack.h>
 #include <linux/context_tracking.h>
 #include <linux/random.h>
+#include <linux/moduleloader.h>
 #include <linux/list.h>
 #include <linux/integrity.h>
 #include <linux/proc_ns.h>
 #include <linux/stackdepot.h>
 #include <linux/randomize_kstack.h>
 #include <linux/pidfs.h>
+#include <linux/ptdump.h>
 #include <net/net_namespace.h>
 
 #include <asm/io.h>
@@ -681,7 +683,7 @@ static void __init setup_command_line(char *command_line)
 
 static __initdata DECLARE_COMPLETION(kthreadd_done);
 
-noinline void __ref __noreturn rest_init(void)
+static noinline void __ref __noreturn rest_init(void)
 {
        struct task_struct *tsk;
        int pid;
@@ -826,11 +828,6 @@ static int __init early_randomize_kstack_offset(char *buf)
 early_param("randomize_kstack_offset", early_randomize_kstack_offset);
 #endif
 
-void __init __weak __noreturn arch_call_rest_init(void)
-{
-       rest_init();
-}
-
 static void __init print_unknown_bootoptions(void)
 {
        char *unknown_options;
@@ -1074,7 +1071,7 @@ void start_kernel(void)
        kcsan_init();
 
        /* Do the rest non-__init'ed, we're now alive */
-       arch_call_rest_init();
+       rest_init();
 
        /*
         * Avoid stack canaries in callers of boot_init_stack_canary for gcc-10
@@ -1406,12 +1403,13 @@ static void mark_readonly(void)
        if (IS_ENABLED(CONFIG_STRICT_KERNEL_RWX) && rodata_enabled) {
                /*
                 * load_module() results in W+X mappings, which are cleaned
-                * up with call_rcu().  Let's make sure that queued work is
+                * up with init_free_wq. Let's make sure that queued work is
                 * flushed so that we don't hit false positives looking for
                 * insecure pages which are W+X.
                 */
-               rcu_barrier();
+               flush_module_init_free_work();
                mark_rodata_ro();
+               debug_checkwx();
                rodata_test();
        } else if (IS_ENABLED(CONFIG_STRICT_KERNEL_RWX)) {
                pr_info("Kernel memory protection disabled.\n");
index 8c62e443f78b3cede057e03f13b953c6b0288030..45cb1dabce29ae0ab553c6d8f04808fe8e7f3232 100644 (file)
@@ -14,6 +14,7 @@
 #include <linux/ipc_namespace.h>
 #include <linux/msg.h>
 #include <linux/slab.h>
+#include <linux/cred.h>
 #include "util.h"
 
 static int proc_ipc_dointvec_minmax_orphans(struct ctl_table *table, int write,
@@ -190,25 +191,57 @@ static int set_is_seen(struct ctl_table_set *set)
        return &current->nsproxy->ipc_ns->ipc_set == set;
 }
 
+static void ipc_set_ownership(struct ctl_table_header *head,
+                             struct ctl_table *table,
+                             kuid_t *uid, kgid_t *gid)
+{
+       struct ipc_namespace *ns =
+               container_of(head->set, struct ipc_namespace, ipc_set);
+
+       kuid_t ns_root_uid = make_kuid(ns->user_ns, 0);
+       kgid_t ns_root_gid = make_kgid(ns->user_ns, 0);
+
+       *uid = uid_valid(ns_root_uid) ? ns_root_uid : GLOBAL_ROOT_UID;
+       *gid = gid_valid(ns_root_gid) ? ns_root_gid : GLOBAL_ROOT_GID;
+}
+
 static int ipc_permissions(struct ctl_table_header *head, struct ctl_table *table)
 {
        int mode = table->mode;
 
 #ifdef CONFIG_CHECKPOINT_RESTORE
-       struct ipc_namespace *ns = current->nsproxy->ipc_ns;
+       struct ipc_namespace *ns =
+               container_of(head->set, struct ipc_namespace, ipc_set);
 
        if (((table->data == &ns->ids[IPC_SEM_IDS].next_id) ||
             (table->data == &ns->ids[IPC_MSG_IDS].next_id) ||
             (table->data == &ns->ids[IPC_SHM_IDS].next_id)) &&
            checkpoint_restore_ns_capable(ns->user_ns))
                mode = 0666;
+       else
 #endif
-       return mode;
+       {
+               kuid_t ns_root_uid;
+               kgid_t ns_root_gid;
+
+               ipc_set_ownership(head, table, &ns_root_uid, &ns_root_gid);
+
+               if (uid_eq(current_euid(), ns_root_uid))
+                       mode >>= 6;
+
+               else if (in_egroup_p(ns_root_gid))
+                       mode >>= 3;
+       }
+
+       mode &= 7;
+
+       return (mode << 6) | (mode << 3) | mode;
 }
 
 static struct ctl_table_root set_root = {
        .lookup = set_lookup,
        .permissions = ipc_permissions,
+       .set_ownership = ipc_set_ownership,
 };
 
 bool setup_ipc_sysctls(struct ipc_namespace *ns)
@@ -259,8 +292,7 @@ bool setup_ipc_sysctls(struct ipc_namespace *ns)
                                tbl[i].data = NULL;
                }
 
-               ns->ipc_sysctls = __register_sysctl_table(&ns->ipc_set,
-                                                         "kernel", tbl,
+               ns->ipc_sysctls = __register_sysctl_table(&ns->ipc_set, "kernel", tbl,
                                                          ARRAY_SIZE(ipc_sysctls));
        }
        if (!ns->ipc_sysctls) {
index ebb5ed81c151a8471f63df3fe61f94de51d30b79..21fba3a6edaf7a0c2e96c31ff34b761cff6b79e0 100644 (file)
@@ -12,6 +12,7 @@
 #include <linux/stat.h>
 #include <linux/capability.h>
 #include <linux/slab.h>
+#include <linux/cred.h>
 
 static int msg_max_limit_min = MIN_MSGMAX;
 static int msg_max_limit_max = HARD_MSGMAX;
@@ -76,8 +77,43 @@ static int set_is_seen(struct ctl_table_set *set)
        return &current->nsproxy->ipc_ns->mq_set == set;
 }
 
+static void mq_set_ownership(struct ctl_table_header *head,
+                            struct ctl_table *table,
+                            kuid_t *uid, kgid_t *gid)
+{
+       struct ipc_namespace *ns =
+               container_of(head->set, struct ipc_namespace, mq_set);
+
+       kuid_t ns_root_uid = make_kuid(ns->user_ns, 0);
+       kgid_t ns_root_gid = make_kgid(ns->user_ns, 0);
+
+       *uid = uid_valid(ns_root_uid) ? ns_root_uid : GLOBAL_ROOT_UID;
+       *gid = gid_valid(ns_root_gid) ? ns_root_gid : GLOBAL_ROOT_GID;
+}
+
+static int mq_permissions(struct ctl_table_header *head, struct ctl_table *table)
+{
+       int mode = table->mode;
+       kuid_t ns_root_uid;
+       kgid_t ns_root_gid;
+
+       mq_set_ownership(head, table, &ns_root_uid, &ns_root_gid);
+
+       if (uid_eq(current_euid(), ns_root_uid))
+               mode >>= 6;
+
+       else if (in_egroup_p(ns_root_gid))
+               mode >>= 3;
+
+       mode &= 7;
+
+       return (mode << 6) | (mode << 3) | mode;
+}
+
 static struct ctl_table_root set_root = {
        .lookup = set_lookup,
+       .permissions = mq_permissions,
+       .set_ownership = mq_set_ownership,
 };
 
 bool setup_mq_sysctls(struct ipc_namespace *ns)
index 946dffa048b74cdcf42ecfd1b0eb1ea6ec2e1fda..6c34e63c88ff4c1e23fb0ba14c4002fcfb7783ed 100644 (file)
@@ -2,11 +2,13 @@
 
 menu "Kexec and crash features"
 
-config CRASH_CORE
+config CRASH_RESERVE
+       bool
+
+config VMCORE_INFO
        bool
 
 config KEXEC_CORE
-       select CRASH_CORE
        bool
 
 config KEXEC_ELF
@@ -95,9 +97,11 @@ config KEXEC_JUMP
 
 config CRASH_DUMP
        bool "kernel crash dumps"
+       default y
        depends on ARCH_SUPPORTS_CRASH_DUMP
-       select CRASH_CORE
-       select KEXEC_CORE
+       depends on KEXEC_CORE
+       select VMCORE_INFO
+       select CRASH_RESERVE
        help
          Generate crash dump after being started by kexec.
          This should be normally only set in special crash dump kernels
index ce105a5558fcfadafdc04aeefeefa6231f18008f..3c13240dfc9f09dac98bb5802a66b8e646a36127 100644 (file)
@@ -68,8 +68,10 @@ obj-$(CONFIG_MODULE_SIG_FORMAT) += module_signature.o
 obj-$(CONFIG_KALLSYMS) += kallsyms.o
 obj-$(CONFIG_KALLSYMS_SELFTEST) += kallsyms_selftest.o
 obj-$(CONFIG_BSD_PROCESS_ACCT) += acct.o
-obj-$(CONFIG_CRASH_CORE) += crash_core.o
+obj-$(CONFIG_VMCORE_INFO) += vmcore_info.o elfcorehdr.o
+obj-$(CONFIG_CRASH_RESERVE) += crash_reserve.o
 obj-$(CONFIG_KEXEC_CORE) += kexec_core.o
+obj-$(CONFIG_CRASH_DUMP) += crash_core.o
 obj-$(CONFIG_KEXEC) += kexec.o
 obj-$(CONFIG_KEXEC_FILE) += kexec_file.o
 obj-$(CONFIG_KEXEC_ELF) += kexec_elf.o
@@ -120,7 +122,6 @@ obj-$(CONFIG_PERF_EVENTS) += events/
 
 obj-$(CONFIG_USER_RETURN_NOTIFIER) += user-return-notifier.o
 obj-$(CONFIG_PADATA) += padata.o
-obj-$(CONFIG_CRASH_DUMP) += crash_dump.o
 obj-$(CONFIG_JUMP_LABEL) += jump_label.o
 obj-$(CONFIG_CONTEXT_TRACKING) += context_tracking.o
 obj-$(CONFIG_TORTURE_TEST) += torture.o
index b529182e8b04fc5e53f106b5b7d0fdeea6c877aa..c5a9fcd2d622819cc1f5b0f5cfd772514ee5f423 100644 (file)
@@ -19,7 +19,7 @@ int main(void)
        DEFINE(NR_PAGEFLAGS, __NR_PAGEFLAGS);
        DEFINE(MAX_NR_ZONES, __MAX_NR_ZONES);
 #ifdef CONFIG_SMP
-       DEFINE(NR_CPUS_BITS, ilog2(CONFIG_NR_CPUS));
+       DEFINE(NR_CPUS_BITS, bits_per(CONFIG_NR_CPUS));
 #endif
        DEFINE(SPINLOCK_SIZE, sizeof(spinlock_t));
 #ifdef CONFIG_LRU_GEN
index 75cd6a736d030656d468e344f4035c25f6c8630e..78b5dc7cee3ab761314a9328e1ac9ae7afc0fa95 100644 (file)
 #include <linux/sizes.h>
 #include <linux/kexec.h>
 #include <linux/memory.h>
+#include <linux/mm.h>
 #include <linux/cpuhotplug.h>
 #include <linux/memblock.h>
 #include <linux/kmemleak.h>
+#include <linux/crash_core.h>
+#include <linux/reboot.h>
+#include <linux/btf.h>
+#include <linux/objtool.h>
 
 #include <asm/page.h>
 #include <asm/sections.h>
 /* Per cpu memory for storing cpu states in case of system crash. */
 note_buf_t __percpu *crash_notes;
 
-/* vmcoreinfo stuff */
-unsigned char *vmcoreinfo_data;
-size_t vmcoreinfo_size;
-u32 *vmcoreinfo_note;
-
-/* trusted vmcoreinfo, e.g. we can make a copy in the crash memory */
-static unsigned char *vmcoreinfo_data_safecopy;
-
-/* Location of the reserved area for the crash kernel */
-struct resource crashk_res = {
-       .name  = "Crash kernel",
-       .start = 0,
-       .end   = 0,
-       .flags = IORESOURCE_BUSY | IORESOURCE_SYSTEM_RAM,
-       .desc  = IORES_DESC_CRASH_KERNEL
-};
-struct resource crashk_low_res = {
-       .name  = "Crash kernel",
-       .start = 0,
-       .end   = 0,
-       .flags = IORESOURCE_BUSY | IORESOURCE_SYSTEM_RAM,
-       .desc  = IORES_DESC_CRASH_KERNEL
-};
-
-/*
- * parsing the "crashkernel" commandline
- *
- * this code is intended to be called from architecture specific code
- */
+#ifdef CONFIG_CRASH_DUMP
 
-
-/*
- * This function parses command lines in the format
- *
- *   crashkernel=ramsize-range:size[,...][@offset]
- *
- * The function returns 0 on success and -EINVAL on failure.
- */
-static int __init parse_crashkernel_mem(char *cmdline,
-                                       unsigned long long system_ram,
-                                       unsigned long long *crash_size,
-                                       unsigned long long *crash_base)
+int kimage_crash_copy_vmcoreinfo(struct kimage *image)
 {
-       char *cur = cmdline, *tmp;
-       unsigned long long total_mem = system_ram;
+       struct page *vmcoreinfo_page;
+       void *safecopy;
+
+       if (!IS_ENABLED(CONFIG_CRASH_DUMP))
+               return 0;
+       if (image->type != KEXEC_TYPE_CRASH)
+               return 0;
 
        /*
-        * Firmware sometimes reserves some memory regions for its own use,
-        * so the system memory size is less than the actual physical memory
-        * size. Work around this by rounding up the total size to 128M,
-        * which is enough for most test cases.
+        * For kdump, allocate one vmcoreinfo safe copy from the
+        * crash memory. as we have arch_kexec_protect_crashkres()
+        * after kexec syscall, we naturally protect it from write
+        * (even read) access under kernel direct mapping. But on
+        * the other hand, we still need to operate it when crash
+        * happens to generate vmcoreinfo note, hereby we rely on
+        * vmap for this purpose.
         */
-       total_mem = roundup(total_mem, SZ_128M);
-
-       /* for each entry of the comma-separated list */
-       do {
-               unsigned long long start, end = ULLONG_MAX, size;
-
-               /* get the start of the range */
-               start = memparse(cur, &tmp);
-               if (cur == tmp) {
-                       pr_warn("crashkernel: Memory value expected\n");
-                       return -EINVAL;
-               }
-               cur = tmp;
-               if (*cur != '-') {
-                       pr_warn("crashkernel: '-' expected\n");
-                       return -EINVAL;
-               }
-               cur++;
-
-               /* if no ':' is here, than we read the end */
-               if (*cur != ':') {
-                       end = memparse(cur, &tmp);
-                       if (cur == tmp) {
-                               pr_warn("crashkernel: Memory value expected\n");
-                               return -EINVAL;
-                       }
-                       cur = tmp;
-                       if (end <= start) {
-                               pr_warn("crashkernel: end <= start\n");
-                               return -EINVAL;
-                       }
-               }
-
-               if (*cur != ':') {
-                       pr_warn("crashkernel: ':' expected\n");
-                       return -EINVAL;
-               }
-               cur++;
-
-               size = memparse(cur, &tmp);
-               if (cur == tmp) {
-                       pr_warn("Memory value expected\n");
-                       return -EINVAL;
-               }
-               cur = tmp;
-               if (size >= total_mem) {
-                       pr_warn("crashkernel: invalid size\n");
-                       return -EINVAL;
-               }
-
-               /* match ? */
-               if (total_mem >= start && total_mem < end) {
-                       *crash_size = size;
-                       break;
-               }
-       } while (*cur++ == ',');
-
-       if (*crash_size > 0) {
-               while (*cur && *cur != ' ' && *cur != '@')
-                       cur++;
-               if (*cur == '@') {
-                       cur++;
-                       *crash_base = memparse(cur, &tmp);
-                       if (cur == tmp) {
-                               pr_warn("Memory value expected after '@'\n");
-                               return -EINVAL;
-                       }
-               }
-       } else
-               pr_info("crashkernel size resulted in zero bytes\n");
-
-       return 0;
-}
-
-/*
- * That function parses "simple" (old) crashkernel command lines like
- *
- *     crashkernel=size[@offset]
- *
- * It returns 0 on success and -EINVAL on failure.
- */
-static int __init parse_crashkernel_simple(char *cmdline,
-                                          unsigned long long *crash_size,
-                                          unsigned long long *crash_base)
-{
-       char *cur = cmdline;
-
-       *crash_size = memparse(cmdline, &cur);
-       if (cmdline == cur) {
-               pr_warn("crashkernel: memory value expected\n");
-               return -EINVAL;
-       }
-
-       if (*cur == '@')
-               *crash_base = memparse(cur+1, &cur);
-       else if (*cur != ' ' && *cur != '\0') {
-               pr_warn("crashkernel: unrecognized char: %c\n", *cur);
-               return -EINVAL;
+       vmcoreinfo_page = kimage_alloc_control_pages(image, 0);
+       if (!vmcoreinfo_page) {
+               pr_warn("Could not allocate vmcoreinfo buffer\n");
+               return -ENOMEM;
        }
-
-       return 0;
-}
-
-#define SUFFIX_HIGH 0
-#define SUFFIX_LOW  1
-#define SUFFIX_NULL 2
-static __initdata char *suffix_tbl[] = {
-       [SUFFIX_HIGH] = ",high",
-       [SUFFIX_LOW]  = ",low",
-       [SUFFIX_NULL] = NULL,
-};
-
-/*
- * That function parses "suffix"  crashkernel command lines like
- *
- *     crashkernel=size,[high|low]
- *
- * It returns 0 on success and -EINVAL on failure.
- */
-static int __init parse_crashkernel_suffix(char *cmdline,
-                                          unsigned long long *crash_size,
-                                          const char *suffix)
-{
-       char *cur = cmdline;
-
-       *crash_size = memparse(cmdline, &cur);
-       if (cmdline == cur) {
-               pr_warn("crashkernel: memory value expected\n");
-               return -EINVAL;
+       safecopy = vmap(&vmcoreinfo_page, 1, VM_MAP, PAGE_KERNEL);
+       if (!safecopy) {
+               pr_warn("Could not vmap vmcoreinfo buffer\n");
+               return -ENOMEM;
        }
 
-       /* check with suffix */
-       if (strncmp(cur, suffix, strlen(suffix))) {
-               pr_warn("crashkernel: unrecognized char: %c\n", *cur);
-               return -EINVAL;
-       }
-       cur += strlen(suffix);
-       if (*cur != ' ' && *cur != '\0') {
-               pr_warn("crashkernel: unrecognized char: %c\n", *cur);
-               return -EINVAL;
-       }
+       image->vmcoreinfo_data_copy = safecopy;
+       crash_update_vmcoreinfo_safecopy(safecopy);
 
        return 0;
 }
 
-static __init char *get_last_crashkernel(char *cmdline,
-                            const char *name,
-                            const char *suffix)
-{
-       char *p = cmdline, *ck_cmdline = NULL;
-
-       /* find crashkernel and use the last one if there are more */
-       p = strstr(p, name);
-       while (p) {
-               char *end_p = strchr(p, ' ');
-               char *q;
-
-               if (!end_p)
-                       end_p = p + strlen(p);
-
-               if (!suffix) {
-                       int i;
-
-                       /* skip the one with any known suffix */
-                       for (i = 0; suffix_tbl[i]; i++) {
-                               q = end_p - strlen(suffix_tbl[i]);
-                               if (!strncmp(q, suffix_tbl[i],
-                                            strlen(suffix_tbl[i])))
-                                       goto next;
-                       }
-                       ck_cmdline = p;
-               } else {
-                       q = end_p - strlen(suffix);
-                       if (!strncmp(q, suffix, strlen(suffix)))
-                               ck_cmdline = p;
-               }
-next:
-               p = strstr(p+1, name);
-       }
 
-       return ck_cmdline;
-}
 
-static int __init __parse_crashkernel(char *cmdline,
-                            unsigned long long system_ram,
-                            unsigned long long *crash_size,
-                            unsigned long long *crash_base,
-                            const char *suffix)
+int kexec_should_crash(struct task_struct *p)
 {
-       char *first_colon, *first_space;
-       char *ck_cmdline;
-       char *name = "crashkernel=";
-
-       BUG_ON(!crash_size || !crash_base);
-       *crash_size = 0;
-       *crash_base = 0;
-
-       ck_cmdline = get_last_crashkernel(cmdline, name, suffix);
-       if (!ck_cmdline)
-               return -ENOENT;
-
-       ck_cmdline += strlen(name);
-
-       if (suffix)
-               return parse_crashkernel_suffix(ck_cmdline, crash_size,
-                               suffix);
        /*
-        * if the commandline contains a ':', then that's the extended
-        * syntax -- if not, it must be the classic syntax
+        * If crash_kexec_post_notifiers is enabled, don't run
+        * crash_kexec() here yet, which must be run after panic
+        * notifiers in panic().
         */
-       first_colon = strchr(ck_cmdline, ':');
-       first_space = strchr(ck_cmdline, ' ');
-       if (first_colon && (!first_space || first_colon < first_space))
-               return parse_crashkernel_mem(ck_cmdline, system_ram,
-                               crash_size, crash_base);
-
-       return parse_crashkernel_simple(ck_cmdline, crash_size, crash_base);
-}
-
-/*
- * That function is the entry point for command line parsing and should be
- * called from the arch-specific code.
- *
- * If crashkernel=,high|low is supported on architecture, non-NULL values
- * should be passed to parameters 'low_size' and 'high'.
- */
-int __init parse_crashkernel(char *cmdline,
-                            unsigned long long system_ram,
-                            unsigned long long *crash_size,
-                            unsigned long long *crash_base,
-                            unsigned long long *low_size,
-                            bool *high)
-{
-       int ret;
-
-       /* crashkernel=X[@offset] */
-       ret = __parse_crashkernel(cmdline, system_ram, crash_size,
-                               crash_base, NULL);
-#ifdef CONFIG_ARCH_HAS_GENERIC_CRASHKERNEL_RESERVATION
+       if (crash_kexec_post_notifiers)
+               return 0;
        /*
-        * If non-NULL 'high' passed in and no normal crashkernel
-        * setting detected, try parsing crashkernel=,high|low.
+        * There are 4 panic() calls in make_task_dead() path, each of which
+        * corresponds to each of these 4 conditions.
         */
-       if (high && ret == -ENOENT) {
-               ret = __parse_crashkernel(cmdline, 0, crash_size,
-                               crash_base, suffix_tbl[SUFFIX_HIGH]);
-               if (ret || !*crash_size)
-                       return -EINVAL;
-
-               /*
-                * crashkernel=Y,low can be specified or not, but invalid value
-                * is not allowed.
-                */
-               ret = __parse_crashkernel(cmdline, 0, low_size,
-                               crash_base, suffix_tbl[SUFFIX_LOW]);
-               if (ret == -ENOENT) {
-                       *low_size = DEFAULT_CRASH_KERNEL_LOW_SIZE;
-                       ret = 0;
-               } else if (ret) {
-                       return ret;
-               }
-
-               *high = true;
-       }
-#endif
-       if (!*crash_size)
-               ret = -EINVAL;
-
-       return ret;
+       if (in_interrupt() || !p->pid || is_global_init(p) || panic_on_oops)
+               return 1;
+       return 0;
 }
 
-/*
- * Add a dummy early_param handler to mark crashkernel= as a known command line
- * parameter and suppress incorrect warnings in init/main.c.
- */
-static int __init parse_crashkernel_dummy(char *arg)
+int kexec_crash_loaded(void)
 {
-       return 0;
+       return !!kexec_crash_image;
 }
-early_param("crashkernel", parse_crashkernel_dummy);
+EXPORT_SYMBOL_GPL(kexec_crash_loaded);
 
-#ifdef CONFIG_ARCH_HAS_GENERIC_CRASHKERNEL_RESERVATION
-static int __init reserve_crashkernel_low(unsigned long long low_size)
+/*
+ * No panic_cpu check version of crash_kexec().  This function is called
+ * only when panic_cpu holds the current CPU number; this is the only CPU
+ * which processes crash_kexec routines.
+ */
+void __noclone __crash_kexec(struct pt_regs *regs)
 {
-#ifdef CONFIG_64BIT
-       unsigned long long low_base;
-
-       low_base = memblock_phys_alloc_range(low_size, CRASH_ALIGN, 0, CRASH_ADDR_LOW_MAX);
-       if (!low_base) {
-               pr_err("cannot allocate crashkernel low memory (size:0x%llx).\n", low_size);
-               return -ENOMEM;
+       /* Take the kexec_lock here to prevent sys_kexec_load
+        * running on one cpu from replacing the crash kernel
+        * we are using after a panic on a different cpu.
+        *
+        * If the crash kernel was not located in a fixed area
+        * of memory the xchg(&kexec_crash_image) would be
+        * sufficient.  But since I reuse the memory...
+        */
+       if (kexec_trylock()) {
+               if (kexec_crash_image) {
+                       struct pt_regs fixed_regs;
+
+                       crash_setup_regs(&fixed_regs, regs);
+                       crash_save_vmcoreinfo();
+                       machine_crash_shutdown(&fixed_regs);
+                       machine_kexec(kexec_crash_image);
+               }
+               kexec_unlock();
        }
-
-       pr_info("crashkernel low memory reserved: 0x%08llx - 0x%08llx (%lld MB)\n",
-               low_base, low_base + low_size, low_size >> 20);
-
-       crashk_low_res.start = low_base;
-       crashk_low_res.end   = low_base + low_size - 1;
-#endif
-       return 0;
 }
+STACK_FRAME_NON_STANDARD(__crash_kexec);
 
-void __init reserve_crashkernel_generic(char *cmdline,
-                            unsigned long long crash_size,
-                            unsigned long long crash_base,
-                            unsigned long long crash_low_size,
-                            bool high)
+__bpf_kfunc void crash_kexec(struct pt_regs *regs)
 {
-       unsigned long long search_end = CRASH_ADDR_LOW_MAX, search_base = 0;
-       bool fixed_base = false;
-
-       /* User specifies base address explicitly. */
-       if (crash_base) {
-               fixed_base = true;
-               search_base = crash_base;
-               search_end = crash_base + crash_size;
-       } else if (high) {
-               search_base = CRASH_ADDR_LOW_MAX;
-               search_end = CRASH_ADDR_HIGH_MAX;
-       }
+       int old_cpu, this_cpu;
 
-retry:
-       crash_base = memblock_phys_alloc_range(crash_size, CRASH_ALIGN,
-                                              search_base, search_end);
-       if (!crash_base) {
-               /*
-                * For crashkernel=size[KMG]@offset[KMG], print out failure
-                * message if can't reserve the specified region.
-                */
-               if (fixed_base) {
-                       pr_warn("crashkernel reservation failed - memory is in use.\n");
-                       return;
-               }
+       /*
+        * Only one CPU is allowed to execute the crash_kexec() code as with
+        * panic().  Otherwise parallel calls of panic() and crash_kexec()
+        * may stop each other.  To exclude them, we use panic_cpu here too.
+        */
+       old_cpu = PANIC_CPU_INVALID;
+       this_cpu = raw_smp_processor_id();
 
-               /*
-                * For crashkernel=size[KMG], if the first attempt was for
-                * low memory, fall back to high memory, the minimum required
-                * low memory will be reserved later.
-                */
-               if (!high && search_end == CRASH_ADDR_LOW_MAX) {
-                       search_end = CRASH_ADDR_HIGH_MAX;
-                       search_base = CRASH_ADDR_LOW_MAX;
-                       crash_low_size = DEFAULT_CRASH_KERNEL_LOW_SIZE;
-                       goto retry;
-               }
+       if (atomic_try_cmpxchg(&panic_cpu, &old_cpu, this_cpu)) {
+               /* This is the 1st CPU which comes here, so go ahead. */
+               __crash_kexec(regs);
 
                /*
-                * For crashkernel=size[KMG],high, if the first attempt was
-                * for high memory, fall back to low memory.
+                * Reset panic_cpu to allow another panic()/crash_kexec()
+                * call.
                 */
-               if (high && search_end == CRASH_ADDR_HIGH_MAX) {
-                       search_end = CRASH_ADDR_LOW_MAX;
-                       search_base = 0;
-                       goto retry;
-               }
-               pr_warn("cannot allocate crashkernel (size:0x%llx)\n",
-                       crash_size);
-               return;
-       }
-
-       if ((crash_base >= CRASH_ADDR_LOW_MAX) &&
-            crash_low_size && reserve_crashkernel_low(crash_low_size)) {
-               memblock_phys_free(crash_base, crash_size);
-               return;
+               atomic_set(&panic_cpu, PANIC_CPU_INVALID);
        }
-
-       pr_info("crashkernel reserved: 0x%016llx - 0x%016llx (%lld MB)\n",
-               crash_base, crash_base + crash_size, crash_size >> 20);
-
-       /*
-        * The crashkernel memory will be removed from the kernel linear
-        * map. Inform kmemleak so that it won't try to access it.
-        */
-       kmemleak_ignore_phys(crash_base);
-       if (crashk_low_res.end)
-               kmemleak_ignore_phys(crashk_low_res.start);
-
-       crashk_res.start = crash_base;
-       crashk_res.end = crash_base + crash_size - 1;
 }
 
-static __init int insert_crashkernel_resources(void)
+static inline resource_size_t crash_resource_size(const struct resource *res)
 {
-       if (crashk_res.start < crashk_res.end)
-               insert_resource(&iomem_resource, &crashk_res);
+       return !res->end ? 0 : resource_size(res);
+}
+
 
-       if (crashk_low_res.start < crashk_low_res.end)
-               insert_resource(&iomem_resource, &crashk_low_res);
 
-       return 0;
-}
-early_initcall(insert_crashkernel_resources);
-#endif
 
 int crash_prepare_elf64_headers(struct crash_mem *mem, int need_kernel_map,
                          void **addr, unsigned long *sz)
@@ -633,205 +317,129 @@ int crash_exclude_mem_range(struct crash_mem *mem,
        return 0;
 }
 
-Elf_Word *append_elf_note(Elf_Word *buf, char *name, unsigned int type,
-                         void *data, size_t data_len)
+ssize_t crash_get_memory_size(void)
 {
-       struct elf_note *note = (struct elf_note *)buf;
-
-       note->n_namesz = strlen(name) + 1;
-       note->n_descsz = data_len;
-       note->n_type   = type;
-       buf += DIV_ROUND_UP(sizeof(*note), sizeof(Elf_Word));
-       memcpy(buf, name, note->n_namesz);
-       buf += DIV_ROUND_UP(note->n_namesz, sizeof(Elf_Word));
-       memcpy(buf, data, data_len);
-       buf += DIV_ROUND_UP(data_len, sizeof(Elf_Word));
-
-       return buf;
-}
+       ssize_t size = 0;
 
-void final_note(Elf_Word *buf)
-{
-       memset(buf, 0, sizeof(struct elf_note));
-}
+       if (!kexec_trylock())
+               return -EBUSY;
 
-static void update_vmcoreinfo_note(void)
-{
-       u32 *buf = vmcoreinfo_note;
+       size += crash_resource_size(&crashk_res);
+       size += crash_resource_size(&crashk_low_res);
 
-       if (!vmcoreinfo_size)
-               return;
-       buf = append_elf_note(buf, VMCOREINFO_NOTE_NAME, 0, vmcoreinfo_data,
-                             vmcoreinfo_size);
-       final_note(buf);
+       kexec_unlock();
+       return size;
 }
 
-void crash_update_vmcoreinfo_safecopy(void *ptr)
+static int __crash_shrink_memory(struct resource *old_res,
+                                unsigned long new_size)
 {
-       if (ptr)
-               memcpy(ptr, vmcoreinfo_data, vmcoreinfo_size);
+       struct resource *ram_res;
 
-       vmcoreinfo_data_safecopy = ptr;
-}
+       ram_res = kzalloc(sizeof(*ram_res), GFP_KERNEL);
+       if (!ram_res)
+               return -ENOMEM;
 
-void crash_save_vmcoreinfo(void)
-{
-       if (!vmcoreinfo_note)
-               return;
+       ram_res->start = old_res->start + new_size;
+       ram_res->end   = old_res->end;
+       ram_res->flags = IORESOURCE_BUSY | IORESOURCE_SYSTEM_RAM;
+       ram_res->name  = "System RAM";
+
+       if (!new_size) {
+               release_resource(old_res);
+               old_res->start = 0;
+               old_res->end   = 0;
+       } else {
+               crashk_res.end = ram_res->start - 1;
+       }
 
-       /* Use the safe copy to generate vmcoreinfo note if have */
-       if (vmcoreinfo_data_safecopy)
-               vmcoreinfo_data = vmcoreinfo_data_safecopy;
+       crash_free_reserved_phys_range(ram_res->start, ram_res->end);
+       insert_resource(&iomem_resource, ram_res);
 
-       vmcoreinfo_append_str("CRASHTIME=%lld\n", ktime_get_real_seconds());
-       update_vmcoreinfo_note();
+       return 0;
 }
 
-void vmcoreinfo_append_str(const char *fmt, ...)
+int crash_shrink_memory(unsigned long new_size)
 {
-       va_list args;
-       char buf[0x50];
-       size_t r;
-
-       va_start(args, fmt);
-       r = vscnprintf(buf, sizeof(buf), fmt, args);
-       va_end(args);
-
-       r = min(r, (size_t)VMCOREINFO_BYTES - vmcoreinfo_size);
-
-       memcpy(&vmcoreinfo_data[vmcoreinfo_size], buf, r);
-
-       vmcoreinfo_size += r;
-
-       WARN_ONCE(vmcoreinfo_size == VMCOREINFO_BYTES,
-                 "vmcoreinfo data exceeds allocated size, truncating");
-}
+       int ret = 0;
+       unsigned long old_size, low_size;
 
-/*
- * provide an empty default implementation here -- architecture
- * code may override this
- */
-void __weak arch_crash_save_vmcoreinfo(void)
-{}
+       if (!kexec_trylock())
+               return -EBUSY;
 
-phys_addr_t __weak paddr_vmcoreinfo_note(void)
-{
-       return __pa(vmcoreinfo_note);
-}
-EXPORT_SYMBOL(paddr_vmcoreinfo_note);
+       if (kexec_crash_image) {
+               ret = -ENOENT;
+               goto unlock;
+       }
 
-static int __init crash_save_vmcoreinfo_init(void)
-{
-       vmcoreinfo_data = (unsigned char *)get_zeroed_page(GFP_KERNEL);
-       if (!vmcoreinfo_data) {
-               pr_warn("Memory allocation for vmcoreinfo_data failed\n");
-               return -ENOMEM;
+       low_size = crash_resource_size(&crashk_low_res);
+       old_size = crash_resource_size(&crashk_res) + low_size;
+       new_size = roundup(new_size, KEXEC_CRASH_MEM_ALIGN);
+       if (new_size >= old_size) {
+               ret = (new_size == old_size) ? 0 : -EINVAL;
+               goto unlock;
        }
 
-       vmcoreinfo_note = alloc_pages_exact(VMCOREINFO_NOTE_SIZE,
-                                               GFP_KERNEL | __GFP_ZERO);
-       if (!vmcoreinfo_note) {
-               free_page((unsigned long)vmcoreinfo_data);
-               vmcoreinfo_data = NULL;
-               pr_warn("Memory allocation for vmcoreinfo_note failed\n");
-               return -ENOMEM;
+       /*
+        * (low_size > new_size) implies that low_size is greater than zero.
+        * This also means that if low_size is zero, the else branch is taken.
+        *
+        * If low_size is greater than 0, (low_size > new_size) indicates that
+        * crashk_low_res also needs to be shrunken. Otherwise, only crashk_res
+        * needs to be shrunken.
+        */
+       if (low_size > new_size) {
+               ret = __crash_shrink_memory(&crashk_res, 0);
+               if (ret)
+                       goto unlock;
+
+               ret = __crash_shrink_memory(&crashk_low_res, new_size);
+       } else {
+               ret = __crash_shrink_memory(&crashk_res, new_size - low_size);
        }
 
-       VMCOREINFO_OSRELEASE(init_uts_ns.name.release);
-       VMCOREINFO_BUILD_ID();
-       VMCOREINFO_PAGESIZE(PAGE_SIZE);
+       /* Swap crashk_res and crashk_low_res if needed */
+       if (!crashk_res.end && crashk_low_res.end) {
+               crashk_res.start = crashk_low_res.start;
+               crashk_res.end   = crashk_low_res.end;
+               release_resource(&crashk_low_res);
+               crashk_low_res.start = 0;
+               crashk_low_res.end   = 0;
+               insert_resource(&iomem_resource, &crashk_res);
+       }
 
-       VMCOREINFO_SYMBOL(init_uts_ns);
-       VMCOREINFO_OFFSET(uts_namespace, name);
-       VMCOREINFO_SYMBOL(node_online_map);
-#ifdef CONFIG_MMU
-       VMCOREINFO_SYMBOL_ARRAY(swapper_pg_dir);
-#endif
-       VMCOREINFO_SYMBOL(_stext);
-       VMCOREINFO_SYMBOL(vmap_area_list);
+unlock:
+       kexec_unlock();
+       return ret;
+}
 
-#ifndef CONFIG_NUMA
-       VMCOREINFO_SYMBOL(mem_map);
-       VMCOREINFO_SYMBOL(contig_page_data);
-#endif
-#ifdef CONFIG_SPARSEMEM
-       VMCOREINFO_SYMBOL_ARRAY(mem_section);
-       VMCOREINFO_LENGTH(mem_section, NR_SECTION_ROOTS);
-       VMCOREINFO_STRUCT_SIZE(mem_section);
-       VMCOREINFO_OFFSET(mem_section, section_mem_map);
-       VMCOREINFO_NUMBER(SECTION_SIZE_BITS);
-       VMCOREINFO_NUMBER(MAX_PHYSMEM_BITS);
-#endif
-       VMCOREINFO_STRUCT_SIZE(page);
-       VMCOREINFO_STRUCT_SIZE(pglist_data);
-       VMCOREINFO_STRUCT_SIZE(zone);
-       VMCOREINFO_STRUCT_SIZE(free_area);
-       VMCOREINFO_STRUCT_SIZE(list_head);
-       VMCOREINFO_SIZE(nodemask_t);
-       VMCOREINFO_OFFSET(page, flags);
-       VMCOREINFO_OFFSET(page, _refcount);
-       VMCOREINFO_OFFSET(page, mapping);
-       VMCOREINFO_OFFSET(page, lru);
-       VMCOREINFO_OFFSET(page, _mapcount);
-       VMCOREINFO_OFFSET(page, private);
-       VMCOREINFO_OFFSET(page, compound_head);
-       VMCOREINFO_OFFSET(pglist_data, node_zones);
-       VMCOREINFO_OFFSET(pglist_data, nr_zones);
-#ifdef CONFIG_FLATMEM
-       VMCOREINFO_OFFSET(pglist_data, node_mem_map);
-#endif
-       VMCOREINFO_OFFSET(pglist_data, node_start_pfn);
-       VMCOREINFO_OFFSET(pglist_data, node_spanned_pages);
-       VMCOREINFO_OFFSET(pglist_data, node_id);
-       VMCOREINFO_OFFSET(zone, free_area);
-       VMCOREINFO_OFFSET(zone, vm_stat);
-       VMCOREINFO_OFFSET(zone, spanned_pages);
-       VMCOREINFO_OFFSET(free_area, free_list);
-       VMCOREINFO_OFFSET(list_head, next);
-       VMCOREINFO_OFFSET(list_head, prev);
-       VMCOREINFO_OFFSET(vmap_area, va_start);
-       VMCOREINFO_OFFSET(vmap_area, list);
-       VMCOREINFO_LENGTH(zone.free_area, NR_PAGE_ORDERS);
-       log_buf_vmcoreinfo_setup();
-       VMCOREINFO_LENGTH(free_area.free_list, MIGRATE_TYPES);
-       VMCOREINFO_NUMBER(NR_FREE_PAGES);
-       VMCOREINFO_NUMBER(PG_lru);
-       VMCOREINFO_NUMBER(PG_private);
-       VMCOREINFO_NUMBER(PG_swapcache);
-       VMCOREINFO_NUMBER(PG_swapbacked);
-       VMCOREINFO_NUMBER(PG_slab);
-#ifdef CONFIG_MEMORY_FAILURE
-       VMCOREINFO_NUMBER(PG_hwpoison);
-#endif
-       VMCOREINFO_NUMBER(PG_head_mask);
-#define PAGE_BUDDY_MAPCOUNT_VALUE      (~PG_buddy)
-       VMCOREINFO_NUMBER(PAGE_BUDDY_MAPCOUNT_VALUE);
-#ifdef CONFIG_HUGETLB_PAGE
-       VMCOREINFO_NUMBER(PG_hugetlb);
-#define PAGE_OFFLINE_MAPCOUNT_VALUE    (~PG_offline)
-       VMCOREINFO_NUMBER(PAGE_OFFLINE_MAPCOUNT_VALUE);
-#endif
+void crash_save_cpu(struct pt_regs *regs, int cpu)
+{
+       struct elf_prstatus prstatus;
+       u32 *buf;
 
-#ifdef CONFIG_KALLSYMS
-       VMCOREINFO_SYMBOL(kallsyms_names);
-       VMCOREINFO_SYMBOL(kallsyms_num_syms);
-       VMCOREINFO_SYMBOL(kallsyms_token_table);
-       VMCOREINFO_SYMBOL(kallsyms_token_index);
-#ifdef CONFIG_KALLSYMS_BASE_RELATIVE
-       VMCOREINFO_SYMBOL(kallsyms_offsets);
-       VMCOREINFO_SYMBOL(kallsyms_relative_base);
-#else
-       VMCOREINFO_SYMBOL(kallsyms_addresses);
-#endif /* CONFIG_KALLSYMS_BASE_RELATIVE */
-#endif /* CONFIG_KALLSYMS */
-
-       arch_crash_save_vmcoreinfo();
-       update_vmcoreinfo_note();
+       if ((cpu < 0) || (cpu >= nr_cpu_ids))
+               return;
 
-       return 0;
+       /* Using ELF notes here is opportunistic.
+        * I need a well defined structure format
+        * for the data I pass, and I need tags
+        * on the data to indicate what information I have
+        * squirrelled away.  ELF notes happen to provide
+        * all of that, so there is no need to invent something new.
+        */
+       buf = (u32 *)per_cpu_ptr(crash_notes, cpu);
+       if (!buf)
+               return;
+       memset(&prstatus, 0, sizeof(prstatus));
+       prstatus.common.pr_pid = current->pid;
+       elf_core_copy_regs(&prstatus.pr_reg, regs);
+       buf = append_elf_note(buf, KEXEC_CORE_NOTE_NAME, NT_PRSTATUS,
+                             &prstatus, sizeof(prstatus));
+       final_note(buf);
 }
 
-subsys_initcall(crash_save_vmcoreinfo_init);
+
 
 static int __init crash_notes_memory_init(void)
 {
@@ -866,6 +474,8 @@ static int __init crash_notes_memory_init(void)
 }
 subsys_initcall(crash_notes_memory_init);
 
+#endif /*CONFIG_CRASH_DUMP*/
+
 #ifdef CONFIG_CRASH_HOTPLUG
 #undef pr_fmt
 #define pr_fmt(fmt) "crash hp: " fmt
diff --git a/kernel/crash_reserve.c b/kernel/crash_reserve.c
new file mode 100644 (file)
index 0000000..bbb6c3c
--- /dev/null
@@ -0,0 +1,464 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * crash.c - kernel crash support code.
+ * Copyright (C) 2002-2004 Eric Biederman  <ebiederm@xmission.com>
+ */
+
+#include <linux/buildid.h>
+#include <linux/init.h>
+#include <linux/utsname.h>
+#include <linux/vmalloc.h>
+#include <linux/sizes.h>
+#include <linux/kexec.h>
+#include <linux/memory.h>
+#include <linux/cpuhotplug.h>
+#include <linux/memblock.h>
+#include <linux/kexec.h>
+#include <linux/kmemleak.h>
+
+#include <asm/page.h>
+#include <asm/sections.h>
+
+#include <crypto/sha1.h>
+
+#include "kallsyms_internal.h"
+#include "kexec_internal.h"
+
+/* Location of the reserved area for the crash kernel */
+struct resource crashk_res = {
+       .name  = "Crash kernel",
+       .start = 0,
+       .end   = 0,
+       .flags = IORESOURCE_BUSY | IORESOURCE_SYSTEM_RAM,
+       .desc  = IORES_DESC_CRASH_KERNEL
+};
+struct resource crashk_low_res = {
+       .name  = "Crash kernel",
+       .start = 0,
+       .end   = 0,
+       .flags = IORESOURCE_BUSY | IORESOURCE_SYSTEM_RAM,
+       .desc  = IORES_DESC_CRASH_KERNEL
+};
+
+/*
+ * parsing the "crashkernel" commandline
+ *
+ * this code is intended to be called from architecture specific code
+ */
+
+
+/*
+ * This function parses command lines in the format
+ *
+ *   crashkernel=ramsize-range:size[,...][@offset]
+ *
+ * The function returns 0 on success and -EINVAL on failure.
+ */
+static int __init parse_crashkernel_mem(char *cmdline,
+                                       unsigned long long system_ram,
+                                       unsigned long long *crash_size,
+                                       unsigned long long *crash_base)
+{
+       char *cur = cmdline, *tmp;
+       unsigned long long total_mem = system_ram;
+
+       /*
+        * Firmware sometimes reserves some memory regions for its own use,
+        * so the system memory size is less than the actual physical memory
+        * size. Work around this by rounding up the total size to 128M,
+        * which is enough for most test cases.
+        */
+       total_mem = roundup(total_mem, SZ_128M);
+
+       /* for each entry of the comma-separated list */
+       do {
+               unsigned long long start, end = ULLONG_MAX, size;
+
+               /* get the start of the range */
+               start = memparse(cur, &tmp);
+               if (cur == tmp) {
+                       pr_warn("crashkernel: Memory value expected\n");
+                       return -EINVAL;
+               }
+               cur = tmp;
+               if (*cur != '-') {
+                       pr_warn("crashkernel: '-' expected\n");
+                       return -EINVAL;
+               }
+               cur++;
+
+               /* if no ':' is here, than we read the end */
+               if (*cur != ':') {
+                       end = memparse(cur, &tmp);
+                       if (cur == tmp) {
+                               pr_warn("crashkernel: Memory value expected\n");
+                               return -EINVAL;
+                       }
+                       cur = tmp;
+                       if (end <= start) {
+                               pr_warn("crashkernel: end <= start\n");
+                               return -EINVAL;
+                       }
+               }
+
+               if (*cur != ':') {
+                       pr_warn("crashkernel: ':' expected\n");
+                       return -EINVAL;
+               }
+               cur++;
+
+               size = memparse(cur, &tmp);
+               if (cur == tmp) {
+                       pr_warn("Memory value expected\n");
+                       return -EINVAL;
+               }
+               cur = tmp;
+               if (size >= total_mem) {
+                       pr_warn("crashkernel: invalid size\n");
+                       return -EINVAL;
+               }
+
+               /* match ? */
+               if (total_mem >= start && total_mem < end) {
+                       *crash_size = size;
+                       break;
+               }
+       } while (*cur++ == ',');
+
+       if (*crash_size > 0) {
+               while (*cur && *cur != ' ' && *cur != '@')
+                       cur++;
+               if (*cur == '@') {
+                       cur++;
+                       *crash_base = memparse(cur, &tmp);
+                       if (cur == tmp) {
+                               pr_warn("Memory value expected after '@'\n");
+                               return -EINVAL;
+                       }
+               }
+       } else
+               pr_info("crashkernel size resulted in zero bytes\n");
+
+       return 0;
+}
+
+/*
+ * That function parses "simple" (old) crashkernel command lines like
+ *
+ *     crashkernel=size[@offset]
+ *
+ * It returns 0 on success and -EINVAL on failure.
+ */
+static int __init parse_crashkernel_simple(char *cmdline,
+                                          unsigned long long *crash_size,
+                                          unsigned long long *crash_base)
+{
+       char *cur = cmdline;
+
+       *crash_size = memparse(cmdline, &cur);
+       if (cmdline == cur) {
+               pr_warn("crashkernel: memory value expected\n");
+               return -EINVAL;
+       }
+
+       if (*cur == '@')
+               *crash_base = memparse(cur+1, &cur);
+       else if (*cur != ' ' && *cur != '\0') {
+               pr_warn("crashkernel: unrecognized char: %c\n", *cur);
+               return -EINVAL;
+       }
+
+       return 0;
+}
+
+#define SUFFIX_HIGH 0
+#define SUFFIX_LOW  1
+#define SUFFIX_NULL 2
+static __initdata char *suffix_tbl[] = {
+       [SUFFIX_HIGH] = ",high",
+       [SUFFIX_LOW]  = ",low",
+       [SUFFIX_NULL] = NULL,
+};
+
+/*
+ * That function parses "suffix"  crashkernel command lines like
+ *
+ *     crashkernel=size,[high|low]
+ *
+ * It returns 0 on success and -EINVAL on failure.
+ */
+static int __init parse_crashkernel_suffix(char *cmdline,
+                                          unsigned long long *crash_size,
+                                          const char *suffix)
+{
+       char *cur = cmdline;
+
+       *crash_size = memparse(cmdline, &cur);
+       if (cmdline == cur) {
+               pr_warn("crashkernel: memory value expected\n");
+               return -EINVAL;
+       }
+
+       /* check with suffix */
+       if (strncmp(cur, suffix, strlen(suffix))) {
+               pr_warn("crashkernel: unrecognized char: %c\n", *cur);
+               return -EINVAL;
+       }
+       cur += strlen(suffix);
+       if (*cur != ' ' && *cur != '\0') {
+               pr_warn("crashkernel: unrecognized char: %c\n", *cur);
+               return -EINVAL;
+       }
+
+       return 0;
+}
+
+static __init char *get_last_crashkernel(char *cmdline,
+                            const char *name,
+                            const char *suffix)
+{
+       char *p = cmdline, *ck_cmdline = NULL;
+
+       /* find crashkernel and use the last one if there are more */
+       p = strstr(p, name);
+       while (p) {
+               char *end_p = strchr(p, ' ');
+               char *q;
+
+               if (!end_p)
+                       end_p = p + strlen(p);
+
+               if (!suffix) {
+                       int i;
+
+                       /* skip the one with any known suffix */
+                       for (i = 0; suffix_tbl[i]; i++) {
+                               q = end_p - strlen(suffix_tbl[i]);
+                               if (!strncmp(q, suffix_tbl[i],
+                                            strlen(suffix_tbl[i])))
+                                       goto next;
+                       }
+                       ck_cmdline = p;
+               } else {
+                       q = end_p - strlen(suffix);
+                       if (!strncmp(q, suffix, strlen(suffix)))
+                               ck_cmdline = p;
+               }
+next:
+               p = strstr(p+1, name);
+       }
+
+       return ck_cmdline;
+}
+
+static int __init __parse_crashkernel(char *cmdline,
+                            unsigned long long system_ram,
+                            unsigned long long *crash_size,
+                            unsigned long long *crash_base,
+                            const char *suffix)
+{
+       char *first_colon, *first_space;
+       char *ck_cmdline;
+       char *name = "crashkernel=";
+
+       BUG_ON(!crash_size || !crash_base);
+       *crash_size = 0;
+       *crash_base = 0;
+
+       ck_cmdline = get_last_crashkernel(cmdline, name, suffix);
+       if (!ck_cmdline)
+               return -ENOENT;
+
+       ck_cmdline += strlen(name);
+
+       if (suffix)
+               return parse_crashkernel_suffix(ck_cmdline, crash_size,
+                               suffix);
+       /*
+        * if the commandline contains a ':', then that's the extended
+        * syntax -- if not, it must be the classic syntax
+        */
+       first_colon = strchr(ck_cmdline, ':');
+       first_space = strchr(ck_cmdline, ' ');
+       if (first_colon && (!first_space || first_colon < first_space))
+               return parse_crashkernel_mem(ck_cmdline, system_ram,
+                               crash_size, crash_base);
+
+       return parse_crashkernel_simple(ck_cmdline, crash_size, crash_base);
+}
+
+/*
+ * That function is the entry point for command line parsing and should be
+ * called from the arch-specific code.
+ *
+ * If crashkernel=,high|low is supported on architecture, non-NULL values
+ * should be passed to parameters 'low_size' and 'high'.
+ */
+int __init parse_crashkernel(char *cmdline,
+                            unsigned long long system_ram,
+                            unsigned long long *crash_size,
+                            unsigned long long *crash_base,
+                            unsigned long long *low_size,
+                            bool *high)
+{
+       int ret;
+
+       /* crashkernel=X[@offset] */
+       ret = __parse_crashkernel(cmdline, system_ram, crash_size,
+                               crash_base, NULL);
+#ifdef CONFIG_ARCH_HAS_GENERIC_CRASHKERNEL_RESERVATION
+       /*
+        * If non-NULL 'high' passed in and no normal crashkernel
+        * setting detected, try parsing crashkernel=,high|low.
+        */
+       if (high && ret == -ENOENT) {
+               ret = __parse_crashkernel(cmdline, 0, crash_size,
+                               crash_base, suffix_tbl[SUFFIX_HIGH]);
+               if (ret || !*crash_size)
+                       return -EINVAL;
+
+               /*
+                * crashkernel=Y,low can be specified or not, but invalid value
+                * is not allowed.
+                */
+               ret = __parse_crashkernel(cmdline, 0, low_size,
+                               crash_base, suffix_tbl[SUFFIX_LOW]);
+               if (ret == -ENOENT) {
+                       *low_size = DEFAULT_CRASH_KERNEL_LOW_SIZE;
+                       ret = 0;
+               } else if (ret) {
+                       return ret;
+               }
+
+               *high = true;
+       }
+#endif
+       if (!*crash_size)
+               ret = -EINVAL;
+
+       return ret;
+}
+
+/*
+ * Add a dummy early_param handler to mark crashkernel= as a known command line
+ * parameter and suppress incorrect warnings in init/main.c.
+ */
+static int __init parse_crashkernel_dummy(char *arg)
+{
+       return 0;
+}
+early_param("crashkernel", parse_crashkernel_dummy);
+
+#ifdef CONFIG_ARCH_HAS_GENERIC_CRASHKERNEL_RESERVATION
+static int __init reserve_crashkernel_low(unsigned long long low_size)
+{
+#ifdef CONFIG_64BIT
+       unsigned long long low_base;
+
+       low_base = memblock_phys_alloc_range(low_size, CRASH_ALIGN, 0, CRASH_ADDR_LOW_MAX);
+       if (!low_base) {
+               pr_err("cannot allocate crashkernel low memory (size:0x%llx).\n", low_size);
+               return -ENOMEM;
+       }
+
+       pr_info("crashkernel low memory reserved: 0x%08llx - 0x%08llx (%lld MB)\n",
+               low_base, low_base + low_size, low_size >> 20);
+
+       crashk_low_res.start = low_base;
+       crashk_low_res.end   = low_base + low_size - 1;
+       insert_resource(&iomem_resource, &crashk_low_res);
+#endif
+       return 0;
+}
+
+void __init reserve_crashkernel_generic(char *cmdline,
+                            unsigned long long crash_size,
+                            unsigned long long crash_base,
+                            unsigned long long crash_low_size,
+                            bool high)
+{
+       unsigned long long search_end = CRASH_ADDR_LOW_MAX, search_base = 0;
+       bool fixed_base = false;
+
+       /* User specifies base address explicitly. */
+       if (crash_base) {
+               fixed_base = true;
+               search_base = crash_base;
+               search_end = crash_base + crash_size;
+       } else if (high) {
+               search_base = CRASH_ADDR_LOW_MAX;
+               search_end = CRASH_ADDR_HIGH_MAX;
+       }
+
+retry:
+       crash_base = memblock_phys_alloc_range(crash_size, CRASH_ALIGN,
+                                              search_base, search_end);
+       if (!crash_base) {
+               /*
+                * For crashkernel=size[KMG]@offset[KMG], print out failure
+                * message if can't reserve the specified region.
+                */
+               if (fixed_base) {
+                       pr_warn("crashkernel reservation failed - memory is in use.\n");
+                       return;
+               }
+
+               /*
+                * For crashkernel=size[KMG], if the first attempt was for
+                * low memory, fall back to high memory, the minimum required
+                * low memory will be reserved later.
+                */
+               if (!high && search_end == CRASH_ADDR_LOW_MAX) {
+                       search_end = CRASH_ADDR_HIGH_MAX;
+                       search_base = CRASH_ADDR_LOW_MAX;
+                       crash_low_size = DEFAULT_CRASH_KERNEL_LOW_SIZE;
+                       goto retry;
+               }
+
+               /*
+                * For crashkernel=size[KMG],high, if the first attempt was
+                * for high memory, fall back to low memory.
+                */
+               if (high && search_end == CRASH_ADDR_HIGH_MAX) {
+                       search_end = CRASH_ADDR_LOW_MAX;
+                       search_base = 0;
+                       goto retry;
+               }
+               pr_warn("cannot allocate crashkernel (size:0x%llx)\n",
+                       crash_size);
+               return;
+       }
+
+       if ((crash_base >= CRASH_ADDR_LOW_MAX) &&
+            crash_low_size && reserve_crashkernel_low(crash_low_size)) {
+               memblock_phys_free(crash_base, crash_size);
+               return;
+       }
+
+       pr_info("crashkernel reserved: 0x%016llx - 0x%016llx (%lld MB)\n",
+               crash_base, crash_base + crash_size, crash_size >> 20);
+
+       /*
+        * The crashkernel memory will be removed from the kernel linear
+        * map. Inform kmemleak so that it won't try to access it.
+        */
+       kmemleak_ignore_phys(crash_base);
+       if (crashk_low_res.end)
+               kmemleak_ignore_phys(crashk_low_res.start);
+
+       crashk_res.start = crash_base;
+       crashk_res.end = crash_base + crash_size - 1;
+}
+
+static __init int insert_crashkernel_resources(void)
+{
+       if (crashk_res.start < crashk_res.end)
+               insert_resource(&iomem_resource, &crashk_res);
+
+       if (crashk_low_res.start < crashk_low_res.end)
+               insert_resource(&iomem_resource, &crashk_low_res);
+
+       return 0;
+}
+early_initcall(insert_crashkernel_resources);
+#endif
index f005c66f378c32e9bae2d84b0ca80d1c126f0c15..055da410ac71d6b8a3d7100fa8000c53daa3f44e 100644 (file)
 
 #define pr_fmt(fmt) "cma: " fmt
 
-#ifdef CONFIG_CMA_DEBUG
-#ifndef DEBUG
-#  define DEBUG
-#endif
-#endif
-
 #include <asm/page.h>
 
 #include <linux/memblock.h>
similarity index 100%
rename from kernel/crash_dump.c
rename to kernel/elfcorehdr.c
index 929e98c629652a0fef1b71e6c002cca41936c4b4..e4834d23e1d1a209dd6f7ff34c7c9e30a56334fe 100644 (file)
@@ -188,7 +188,7 @@ static int __replace_page(struct vm_area_struct *vma, unsigned long addr,
                dec_mm_counter(mm, MM_ANONPAGES);
 
        if (!folio_test_anon(old_folio)) {
-               dec_mm_counter(mm, mm_counter_file(old_page));
+               dec_mm_counter(mm, mm_counter_file(old_folio));
                inc_mm_counter(mm, MM_ANONPAGES);
        }
 
index 9a24574988d23020b8fab1b483542521bf55a71a..b2fc2727d65441a3876c72c6d8aa374fda155b8f 100644 (file)
@@ -43,6 +43,7 @@ static int __read_mostly sysctl_hung_task_check_count = PID_MAX_LIMIT;
  * Zero means infinite timeout - no checking done:
  */
 unsigned long __read_mostly sysctl_hung_task_timeout_secs = CONFIG_DEFAULT_HUNG_TASK_TIMEOUT;
+EXPORT_SYMBOL_GPL(sysctl_hung_task_timeout_secs);
 
 /*
  * Zero (default value) means use sysctl_hung_task_timeout_secs:
index b4cac76ea5e989544c9a18f40eda820dc3cef6d0..8a689b4ff4f98223158e6700bcb9f579d8518802 100644 (file)
@@ -89,7 +89,6 @@ static struct test_item test_items[] = {
        ITEM_DATA(kallsyms_test_var_data_static),
        ITEM_DATA(kallsyms_test_var_bss),
        ITEM_DATA(kallsyms_test_var_data),
-       ITEM_DATA(vmap_area_list),
 #endif
 };
 
index 8f35a5a42af852c300d24935c11d340fd48c6afb..bab542fc1463d24bda47efd22b6f60a8b066fe9e 100644 (file)
@@ -28,12 +28,14 @@ static int kimage_alloc_init(struct kimage **rimage, unsigned long entry,
        struct kimage *image;
        bool kexec_on_panic = flags & KEXEC_ON_CRASH;
 
+#ifdef CONFIG_CRASH_DUMP
        if (kexec_on_panic) {
                /* Verify we have a valid entry point */
                if ((entry < phys_to_boot_phys(crashk_res.start)) ||
                    (entry > phys_to_boot_phys(crashk_res.end)))
                        return -EADDRNOTAVAIL;
        }
+#endif
 
        /* Allocate and initialize a controlling structure */
        image = do_kimage_alloc_init();
@@ -44,11 +46,13 @@ static int kimage_alloc_init(struct kimage **rimage, unsigned long entry,
        image->nr_segments = nr_segments;
        memcpy(image->segment, segments, nr_segments * sizeof(*segments));
 
+#ifdef CONFIG_CRASH_DUMP
        if (kexec_on_panic) {
                /* Enable special crash kernel control page alloc policy. */
                image->control_page = crashk_res.start;
                image->type = KEXEC_TYPE_CRASH;
        }
+#endif
 
        ret = sanity_check_segment_list(image);
        if (ret)
@@ -99,13 +103,14 @@ static int do_kexec_load(unsigned long entry, unsigned long nr_segments,
        if (!kexec_trylock())
                return -EBUSY;
 
+#ifdef CONFIG_CRASH_DUMP
        if (flags & KEXEC_ON_CRASH) {
                dest_image = &kexec_crash_image;
                if (kexec_crash_image)
                        arch_kexec_unprotect_crashkres();
-       } else {
+       } else
+#endif
                dest_image = &kexec_image;
-       }
 
        if (nr_segments == 0) {
                /* Uninstall image */
@@ -162,8 +167,10 @@ static int do_kexec_load(unsigned long entry, unsigned long nr_segments,
        image = xchg(dest_image, image);
 
 out:
+#ifdef CONFIG_CRASH_DUMP
        if ((flags & KEXEC_ON_CRASH) && kexec_crash_image)
                arch_kexec_protect_crashkres();
+#endif
 
        kimage_free(image);
 out_unlock:
index d08fc7b5db97905b3728b54aa37aa1492b60b1ee..0e96f6b243447368674fd343b6fb90343e79df43 100644 (file)
@@ -54,30 +54,6 @@ bool kexec_in_progress = false;
 
 bool kexec_file_dbg_print;
 
-int kexec_should_crash(struct task_struct *p)
-{
-       /*
-        * If crash_kexec_post_notifiers is enabled, don't run
-        * crash_kexec() here yet, which must be run after panic
-        * notifiers in panic().
-        */
-       if (crash_kexec_post_notifiers)
-               return 0;
-       /*
-        * There are 4 panic() calls in make_task_dead() path, each of which
-        * corresponds to each of these 4 conditions.
-        */
-       if (in_interrupt() || !p->pid || is_global_init(p) || panic_on_oops)
-               return 1;
-       return 0;
-}
-
-int kexec_crash_loaded(void)
-{
-       return !!kexec_crash_image;
-}
-EXPORT_SYMBOL_GPL(kexec_crash_loaded);
-
 /*
  * When kexec transitions to the new kernel there is a one-to-one
  * mapping between physical and virtual addresses.  On processors
@@ -209,6 +185,7 @@ int sanity_check_segment_list(struct kimage *image)
        if (total_pages > nr_pages / 2)
                return -EINVAL;
 
+#ifdef CONFIG_CRASH_DUMP
        /*
         * Verify we have good destination addresses.  Normally
         * the caller is responsible for making certain we don't
@@ -231,6 +208,7 @@ int sanity_check_segment_list(struct kimage *image)
                                return -EADDRNOTAVAIL;
                }
        }
+#endif
 
        return 0;
 }
@@ -403,6 +381,7 @@ static struct page *kimage_alloc_normal_control_pages(struct kimage *image,
        return pages;
 }
 
+#ifdef CONFIG_CRASH_DUMP
 static struct page *kimage_alloc_crash_control_pages(struct kimage *image,
                                                      unsigned int order)
 {
@@ -468,6 +447,7 @@ static struct page *kimage_alloc_crash_control_pages(struct kimage *image,
 
        return pages;
 }
+#endif
 
 
 struct page *kimage_alloc_control_pages(struct kimage *image,
@@ -479,48 +459,16 @@ struct page *kimage_alloc_control_pages(struct kimage *image,
        case KEXEC_TYPE_DEFAULT:
                pages = kimage_alloc_normal_control_pages(image, order);
                break;
+#ifdef CONFIG_CRASH_DUMP
        case KEXEC_TYPE_CRASH:
                pages = kimage_alloc_crash_control_pages(image, order);
                break;
+#endif
        }
 
        return pages;
 }
 
-int kimage_crash_copy_vmcoreinfo(struct kimage *image)
-{
-       struct page *vmcoreinfo_page;
-       void *safecopy;
-
-       if (image->type != KEXEC_TYPE_CRASH)
-               return 0;
-
-       /*
-        * For kdump, allocate one vmcoreinfo safe copy from the
-        * crash memory. as we have arch_kexec_protect_crashkres()
-        * after kexec syscall, we naturally protect it from write
-        * (even read) access under kernel direct mapping. But on
-        * the other hand, we still need to operate it when crash
-        * happens to generate vmcoreinfo note, hereby we rely on
-        * vmap for this purpose.
-        */
-       vmcoreinfo_page = kimage_alloc_control_pages(image, 0);
-       if (!vmcoreinfo_page) {
-               pr_warn("Could not allocate vmcoreinfo buffer\n");
-               return -ENOMEM;
-       }
-       safecopy = vmap(&vmcoreinfo_page, 1, VM_MAP, PAGE_KERNEL);
-       if (!safecopy) {
-               pr_warn("Could not vmap vmcoreinfo buffer\n");
-               return -ENOMEM;
-       }
-
-       image->vmcoreinfo_data_copy = safecopy;
-       crash_update_vmcoreinfo_safecopy(safecopy);
-
-       return 0;
-}
-
 static int kimage_add_entry(struct kimage *image, kimage_entry_t entry)
 {
        if (*image->entry != 0)
@@ -603,10 +551,12 @@ void kimage_free(struct kimage *image)
        if (!image)
                return;
 
+#ifdef CONFIG_CRASH_DUMP
        if (image->vmcoreinfo_data_copy) {
                crash_update_vmcoreinfo_safecopy(NULL);
                vunmap(image->vmcoreinfo_data_copy);
        }
+#endif
 
        kimage_free_extra_pages(image);
        for_each_kimage_entry(image, ptr, entry) {
@@ -800,22 +750,24 @@ static int kimage_load_normal_segment(struct kimage *image,
                                PAGE_SIZE - (maddr & ~PAGE_MASK));
                uchunk = min(ubytes, mchunk);
 
-               /* For file based kexec, source pages are in kernel memory */
-               if (image->file_mode)
-                       memcpy(ptr, kbuf, uchunk);
-               else
-                       result = copy_from_user(ptr, buf, uchunk);
+               if (uchunk) {
+                       /* For file based kexec, source pages are in kernel memory */
+                       if (image->file_mode)
+                               memcpy(ptr, kbuf, uchunk);
+                       else
+                               result = copy_from_user(ptr, buf, uchunk);
+                       ubytes -= uchunk;
+                       if (image->file_mode)
+                               kbuf += uchunk;
+                       else
+                               buf += uchunk;
+               }
                kunmap_local(ptr);
                if (result) {
                        result = -EFAULT;
                        goto out;
                }
-               ubytes -= uchunk;
                maddr  += mchunk;
-               if (image->file_mode)
-                       kbuf += mchunk;
-               else
-                       buf += mchunk;
                mbytes -= mchunk;
 
                cond_resched();
@@ -824,6 +776,7 @@ out:
        return result;
 }
 
+#ifdef CONFIG_CRASH_DUMP
 static int kimage_load_crash_segment(struct kimage *image,
                                        struct kexec_segment *segment)
 {
@@ -866,11 +819,18 @@ static int kimage_load_crash_segment(struct kimage *image,
                        memset(ptr + uchunk, 0, mchunk - uchunk);
                }
 
-               /* For file based kexec, source pages are in kernel memory */
-               if (image->file_mode)
-                       memcpy(ptr, kbuf, uchunk);
-               else
-                       result = copy_from_user(ptr, buf, uchunk);
+               if (uchunk) {
+                       /* For file based kexec, source pages are in kernel memory */
+                       if (image->file_mode)
+                               memcpy(ptr, kbuf, uchunk);
+                       else
+                               result = copy_from_user(ptr, buf, uchunk);
+                       ubytes -= uchunk;
+                       if (image->file_mode)
+                               kbuf += uchunk;
+                       else
+                               buf += uchunk;
+               }
                kexec_flush_icache_page(page);
                kunmap_local(ptr);
                arch_kexec_pre_free_pages(page_address(page), 1);
@@ -878,12 +838,7 @@ static int kimage_load_crash_segment(struct kimage *image,
                        result = -EFAULT;
                        goto out;
                }
-               ubytes -= uchunk;
                maddr  += mchunk;
-               if (image->file_mode)
-                       kbuf += mchunk;
-               else
-                       buf += mchunk;
                mbytes -= mchunk;
 
                cond_resched();
@@ -891,6 +846,7 @@ static int kimage_load_crash_segment(struct kimage *image,
 out:
        return result;
 }
+#endif
 
 int kimage_load_segment(struct kimage *image,
                                struct kexec_segment *segment)
@@ -901,9 +857,11 @@ int kimage_load_segment(struct kimage *image,
        case KEXEC_TYPE_DEFAULT:
                result = kimage_load_normal_segment(image, segment);
                break;
+#ifdef CONFIG_CRASH_DUMP
        case KEXEC_TYPE_CRASH:
                result = kimage_load_crash_segment(image, segment);
                break;
+#endif
        }
 
        return result;
@@ -1027,186 +985,6 @@ bool kexec_load_permitted(int kexec_image_type)
        return true;
 }
 
-/*
- * No panic_cpu check version of crash_kexec().  This function is called
- * only when panic_cpu holds the current CPU number; this is the only CPU
- * which processes crash_kexec routines.
- */
-void __noclone __crash_kexec(struct pt_regs *regs)
-{
-       /* Take the kexec_lock here to prevent sys_kexec_load
-        * running on one cpu from replacing the crash kernel
-        * we are using after a panic on a different cpu.
-        *
-        * If the crash kernel was not located in a fixed area
-        * of memory the xchg(&kexec_crash_image) would be
-        * sufficient.  But since I reuse the memory...
-        */
-       if (kexec_trylock()) {
-               if (kexec_crash_image) {
-                       struct pt_regs fixed_regs;
-
-                       crash_setup_regs(&fixed_regs, regs);
-                       crash_save_vmcoreinfo();
-                       machine_crash_shutdown(&fixed_regs);
-                       machine_kexec(kexec_crash_image);
-               }
-               kexec_unlock();
-       }
-}
-STACK_FRAME_NON_STANDARD(__crash_kexec);
-
-__bpf_kfunc void crash_kexec(struct pt_regs *regs)
-{
-       int old_cpu, this_cpu;
-
-       /*
-        * Only one CPU is allowed to execute the crash_kexec() code as with
-        * panic().  Otherwise parallel calls of panic() and crash_kexec()
-        * may stop each other.  To exclude them, we use panic_cpu here too.
-        */
-       old_cpu = PANIC_CPU_INVALID;
-       this_cpu = raw_smp_processor_id();
-
-       if (atomic_try_cmpxchg(&panic_cpu, &old_cpu, this_cpu)) {
-               /* This is the 1st CPU which comes here, so go ahead. */
-               __crash_kexec(regs);
-
-               /*
-                * Reset panic_cpu to allow another panic()/crash_kexec()
-                * call.
-                */
-               atomic_set(&panic_cpu, PANIC_CPU_INVALID);
-       }
-}
-
-static inline resource_size_t crash_resource_size(const struct resource *res)
-{
-       return !res->end ? 0 : resource_size(res);
-}
-
-ssize_t crash_get_memory_size(void)
-{
-       ssize_t size = 0;
-
-       if (!kexec_trylock())
-               return -EBUSY;
-
-       size += crash_resource_size(&crashk_res);
-       size += crash_resource_size(&crashk_low_res);
-
-       kexec_unlock();
-       return size;
-}
-
-static int __crash_shrink_memory(struct resource *old_res,
-                                unsigned long new_size)
-{
-       struct resource *ram_res;
-
-       ram_res = kzalloc(sizeof(*ram_res), GFP_KERNEL);
-       if (!ram_res)
-               return -ENOMEM;
-
-       ram_res->start = old_res->start + new_size;
-       ram_res->end   = old_res->end;
-       ram_res->flags = IORESOURCE_BUSY | IORESOURCE_SYSTEM_RAM;
-       ram_res->name  = "System RAM";
-
-       if (!new_size) {
-               release_resource(old_res);
-               old_res->start = 0;
-               old_res->end   = 0;
-       } else {
-               crashk_res.end = ram_res->start - 1;
-       }
-
-       crash_free_reserved_phys_range(ram_res->start, ram_res->end);
-       insert_resource(&iomem_resource, ram_res);
-
-       return 0;
-}
-
-int crash_shrink_memory(unsigned long new_size)
-{
-       int ret = 0;
-       unsigned long old_size, low_size;
-
-       if (!kexec_trylock())
-               return -EBUSY;
-
-       if (kexec_crash_image) {
-               ret = -ENOENT;
-               goto unlock;
-       }
-
-       low_size = crash_resource_size(&crashk_low_res);
-       old_size = crash_resource_size(&crashk_res) + low_size;
-       new_size = roundup(new_size, KEXEC_CRASH_MEM_ALIGN);
-       if (new_size >= old_size) {
-               ret = (new_size == old_size) ? 0 : -EINVAL;
-               goto unlock;
-       }
-
-       /*
-        * (low_size > new_size) implies that low_size is greater than zero.
-        * This also means that if low_size is zero, the else branch is taken.
-        *
-        * If low_size is greater than 0, (low_size > new_size) indicates that
-        * crashk_low_res also needs to be shrunken. Otherwise, only crashk_res
-        * needs to be shrunken.
-        */
-       if (low_size > new_size) {
-               ret = __crash_shrink_memory(&crashk_res, 0);
-               if (ret)
-                       goto unlock;
-
-               ret = __crash_shrink_memory(&crashk_low_res, new_size);
-       } else {
-               ret = __crash_shrink_memory(&crashk_res, new_size - low_size);
-       }
-
-       /* Swap crashk_res and crashk_low_res if needed */
-       if (!crashk_res.end && crashk_low_res.end) {
-               crashk_res.start = crashk_low_res.start;
-               crashk_res.end   = crashk_low_res.end;
-               release_resource(&crashk_low_res);
-               crashk_low_res.start = 0;
-               crashk_low_res.end   = 0;
-               insert_resource(&iomem_resource, &crashk_res);
-       }
-
-unlock:
-       kexec_unlock();
-       return ret;
-}
-
-void crash_save_cpu(struct pt_regs *regs, int cpu)
-{
-       struct elf_prstatus prstatus;
-       u32 *buf;
-
-       if ((cpu < 0) || (cpu >= nr_cpu_ids))
-               return;
-
-       /* Using ELF notes here is opportunistic.
-        * I need a well defined structure format
-        * for the data I pass, and I need tags
-        * on the data to indicate what information I have
-        * squirrelled away.  ELF notes happen to provide
-        * all of that, so there is no need to invent something new.
-        */
-       buf = (u32 *)per_cpu_ptr(crash_notes, cpu);
-       if (!buf)
-               return;
-       memset(&prstatus, 0, sizeof(prstatus));
-       prstatus.common.pr_pid = current->pid;
-       elf_core_copy_regs(&prstatus.pr_reg, regs);
-       buf = append_elf_note(buf, KEXEC_CORE_NOTE_NAME, NT_PRSTATUS,
-                             &prstatus, sizeof(prstatus));
-       final_note(buf);
-}
-
 /*
  * Move into place and start executing a preloaded standalone
  * executable.  If nothing was preloaded return an error.
index bef2f6f2571b42203cab4f7ba497ce34c41c3794..2d1db05fbf04f35d20bf941314ea9b591e0c220d 100644 (file)
@@ -285,11 +285,13 @@ kimage_file_alloc_init(struct kimage **rimage, int kernel_fd,
        kexec_file_dbg_print = !!(flags & KEXEC_FILE_DEBUG);
        image->file_mode = 1;
 
+#ifdef CONFIG_CRASH_DUMP
        if (kexec_on_panic) {
                /* Enable special crash kernel control page alloc policy. */
                image->control_page = crashk_res.start;
                image->type = KEXEC_TYPE_CRASH;
        }
+#endif
 
        ret = kimage_file_prepare_segments(image, kernel_fd, initrd_fd,
                                           cmdline_ptr, cmdline_len, flags);
@@ -349,13 +351,14 @@ SYSCALL_DEFINE5(kexec_file_load, int, kernel_fd, int, initrd_fd,
        if (!kexec_trylock())
                return -EBUSY;
 
+#ifdef CONFIG_CRASH_DUMP
        if (image_type == KEXEC_TYPE_CRASH) {
                dest_image = &kexec_crash_image;
                if (kexec_crash_image)
                        arch_kexec_unprotect_crashkres();
-       } else {
+       } else
+#endif
                dest_image = &kexec_image;
-       }
 
        if (flags & KEXEC_FILE_UNLOAD)
                goto exchange;
@@ -419,8 +422,10 @@ SYSCALL_DEFINE5(kexec_file_load, int, kernel_fd, int, initrd_fd,
 exchange:
        image = xchg(dest_image, image);
 out:
+#ifdef CONFIG_CRASH_DUMP
        if ((flags & KEXEC_FILE_ON_CRASH) && kexec_crash_image)
                arch_kexec_protect_crashkres();
+#endif
 
        kexec_unlock();
        kimage_free(image);
@@ -535,8 +540,10 @@ static int kexec_walk_memblock(struct kexec_buf *kbuf,
        phys_addr_t mstart, mend;
        struct resource res = { };
 
+#ifdef CONFIG_CRASH_DUMP
        if (kbuf->image->type == KEXEC_TYPE_CRASH)
                return func(&crashk_res, kbuf);
+#endif
 
        /*
         * Using MEMBLOCK_NONE will properly skip MEMBLOCK_DRIVER_MANAGED. See
@@ -595,12 +602,14 @@ static int kexec_walk_memblock(struct kexec_buf *kbuf,
 static int kexec_walk_resources(struct kexec_buf *kbuf,
                                int (*func)(struct resource *, void *))
 {
+#ifdef CONFIG_CRASH_DUMP
        if (kbuf->image->type == KEXEC_TYPE_CRASH)
                return walk_iomem_res_desc(crashk_res.desc,
                                           IORESOURCE_SYSTEM_RAM | IORESOURCE_BUSY,
                                           crashk_res.start, crashk_res.end,
                                           kbuf, func);
-       else if (kbuf->top_down)
+#endif
+       if (kbuf->top_down)
                return walk_system_ram_res_rev(0, ULONG_MAX, kbuf, func);
        else
                return walk_system_ram_res(0, ULONG_MAX, kbuf, func);
index 74da1409cd14b5db50c60832eb7445a747ae0447..2595defe8c0d92dd3ec395da082b3c0909ff06eb 100644 (file)
@@ -4,6 +4,8 @@
 
 #include <linux/kexec.h>
 
+struct kexec_segment;
+
 struct kimage *do_kimage_alloc_init(void);
 int sanity_check_segment_list(struct kimage *image);
 void kimage_free_page_list(struct list_head *list);
index 1d4bc493b2f4b2e94133cec75e569bef3f3ead25..fe7a517fc4abbfd62570692bafb6bb88e5a19da1 100644 (file)
@@ -120,6 +120,7 @@ static ssize_t kexec_loaded_show(struct kobject *kobj,
 }
 KERNEL_ATTR_RO(kexec_loaded);
 
+#ifdef CONFIG_CRASH_DUMP
 static ssize_t kexec_crash_loaded_show(struct kobject *kobj,
                                       struct kobj_attribute *attr, char *buf)
 {
@@ -152,9 +153,10 @@ static ssize_t kexec_crash_size_store(struct kobject *kobj,
 }
 KERNEL_ATTR_RW(kexec_crash_size);
 
+#endif /* CONFIG_CRASH_DUMP*/
 #endif /* CONFIG_KEXEC_CORE */
 
-#ifdef CONFIG_CRASH_CORE
+#ifdef CONFIG_VMCORE_INFO
 
 static ssize_t vmcoreinfo_show(struct kobject *kobj,
                               struct kobj_attribute *attr, char *buf)
@@ -177,7 +179,7 @@ KERNEL_ATTR_RO(crash_elfcorehdr_size);
 
 #endif
 
-#endif /* CONFIG_CRASH_CORE */
+#endif /* CONFIG_VMCORE_INFO */
 
 /* whether file capabilities are enabled */
 static ssize_t fscaps_show(struct kobject *kobj,
@@ -262,10 +264,12 @@ static struct attribute * kernel_attrs[] = {
 #endif
 #ifdef CONFIG_KEXEC_CORE
        &kexec_loaded_attr.attr,
+#ifdef CONFIG_CRASH_DUMP
        &kexec_crash_loaded_attr.attr,
        &kexec_crash_size_attr.attr,
 #endif
-#ifdef CONFIG_CRASH_CORE
+#endif
+#ifdef CONFIG_VMCORE_INFO
        &vmcoreinfo_attr.attr,
 #ifdef CONFIG_CRASH_HOTPLUG
        &crash_elfcorehdr_size_attr.attr,
index 689def7676c4c957b38db9104a9976a4f22b15cc..e1e8a7a9d6c193c092b02239446750c3ec4e8f10 100644 (file)
@@ -2489,6 +2489,11 @@ static void do_free_init(struct work_struct *w)
        }
 }
 
+void flush_module_init_free_work(void)
+{
+       flush_work(&init_free_wq);
+}
+
 #undef MODULE_PARAM_PREFIX
 #define MODULE_PARAM_PREFIX "module."
 /* Default value for module->async_probe_requested */
@@ -2595,8 +2600,8 @@ static noinline int do_init_module(struct module *mod)
         * Note that module_alloc() on most architectures creates W+X page
         * mappings which won't be cleaned up until do_free_init() runs.  Any
         * code such as mark_rodata_ro() which depends on those mappings to
-        * be cleaned up needs to sync with the queued work - ie
-        * rcu_barrier()
+        * be cleaned up needs to sync with the queued work by invoking
+        * flush_module_init_free_work().
         */
        if (llist_add(&freeinit->node, &init_free_list))
                schedule_work(&init_free_wq);
index 179fb1518070c21f028e201ab32ae3dd53e23357..e3f639ff16707a98e5c5a98d23432cd3cc28f684 100644 (file)
@@ -485,7 +485,8 @@ void __init padata_do_multithreaded(struct padata_mt_job *job)
        struct padata_work my_work, *pw;
        struct padata_mt_job_state ps;
        LIST_HEAD(works);
-       int nworks;
+       int nworks, nid;
+       static atomic_t last_used_nid __initdata;
 
        if (job->size == 0)
                return;
@@ -517,7 +518,16 @@ void __init padata_do_multithreaded(struct padata_mt_job *job)
        ps.chunk_size = roundup(ps.chunk_size, job->align);
 
        list_for_each_entry(pw, &works, pw_list)
-               queue_work(system_unbound_wq, &pw->pw_work);
+               if (job->numa_aware) {
+                       int old_node = atomic_read(&last_used_nid);
+
+                       do {
+                               nid = next_node_in(old_node, node_states[N_CPU]);
+                       } while (!atomic_try_cmpxchg(&last_used_nid, &old_node, nid));
+                       queue_work_node(nid, system_unbound_wq, &pw->pw_work);
+               } else {
+                       queue_work(system_unbound_wq, &pw->pw_work);
+               }
 
        /* Use the current thread, which saves starting a workqueue worker. */
        padata_work_init(&my_work, padata_mt_helper, &ps, PADATA_WORK_ONSTACK);
index f22d8f33ea14752819044715e1dfa3234fc36198..747c3f3d289a234832e6cf5774d92a20951e3cb3 100644 (file)
@@ -73,6 +73,7 @@ EXPORT_SYMBOL_GPL(panic_timeout);
 #define PANIC_PRINT_FTRACE_INFO                0x00000010
 #define PANIC_PRINT_ALL_PRINTK_MSG     0x00000020
 #define PANIC_PRINT_ALL_CPU_BT         0x00000040
+#define PANIC_PRINT_BLOCKED_TASKS      0x00000080
 unsigned long panic_print;
 
 ATOMIC_NOTIFIER_HEAD(panic_notifier_list);
@@ -227,6 +228,9 @@ static void panic_print_sys_info(bool console_flush)
 
        if (panic_print & PANIC_PRINT_FTRACE_INFO)
                ftrace_dump(DUMP_ALL);
+
+       if (panic_print & PANIC_PRINT_BLOCKED_TASKS)
+               show_state_filter(TASK_UNINTERRUPTIBLE);
 }
 
 void check_panic_on_warn(const char *origin)
@@ -674,8 +678,13 @@ void __warn(const char *file, int line, void *caller, unsigned taint,
                pr_warn("WARNING: CPU: %d PID: %d at %pS\n",
                        raw_smp_processor_id(), current->pid, caller);
 
+#pragma GCC diagnostic push
+#ifndef __clang__
+#pragma GCC diagnostic ignored "-Wsuggest-attribute=format"
+#endif
        if (args)
                vprintk(args->fmt, args->args);
+#pragma GCC diagnostic pop
 
        print_modules();
 
index b06f63e276c1f6680975ba26790f8044704d9543..a000bef511772b4a10613d16c5ea135686f90033 100644 (file)
@@ -34,7 +34,7 @@
 #include <linux/security.h>
 #include <linux/memblock.h>
 #include <linux/syscalls.h>
-#include <linux/crash_core.h>
+#include <linux/vmcore_info.h>
 #include <linux/ratelimit.h>
 #include <linux/kmsg_dump.h>
 #include <linux/syslog.h>
@@ -957,7 +957,7 @@ const struct file_operations kmsg_fops = {
        .release = devkmsg_release,
 };
 
-#ifdef CONFIG_CRASH_CORE
+#ifdef CONFIG_VMCORE_INFO
 /*
  * This appends the listed symbols to /proc/vmcore
  *
index 2fabd497d6598831d1144ecb8696e85ca4fb3f3d..d5f89f9ef29f65f137ef8a6978e6dbe75564056b 100644 (file)
@@ -375,10 +375,13 @@ static int check_ptrace_options(unsigned long data)
        return 0;
 }
 
-static inline void ptrace_set_stopped(struct task_struct *task)
+static inline void ptrace_set_stopped(struct task_struct *task, bool seize)
 {
        guard(spinlock)(&task->sighand->siglock);
 
+       /* SEIZE doesn't trap tracee on attach */
+       if (!seize)
+               send_signal_locked(SIGSTOP, SEND_SIG_PRIV, task, PIDTYPE_PID);
        /*
         * If the task is already STOPPED, set JOBCTL_TRAP_STOP and
         * TRAPPING, and kick it so that it transits to TRACED.  TRAPPING
@@ -457,14 +460,8 @@ static int ptrace_attach(struct task_struct *task, long request,
                                return -EPERM;
 
                        task->ptrace = flags;
-
                        ptrace_link(task, current);
-
-                       /* SEIZE doesn't trap tracee on attach */
-                       if (!seize)
-                               send_sig_info(SIGSTOP, SEND_SIG_PRIV, task);
-
-                       ptrace_set_stopped(task);
+                       ptrace_set_stopped(task, seize);
                }
        }
 
index 6a16129f9a5c0a90655bb37277c52f4dc1137721..03be0d1330a6b22336c91cd4cfeae7fd82838692 100644 (file)
@@ -1830,6 +1830,12 @@ bool should_numa_migrate_memory(struct task_struct *p, struct folio *folio,
        int dst_nid = cpu_to_node(dst_cpu);
        int last_cpupid, this_cpupid;
 
+       /*
+        * Cannot migrate to memoryless nodes.
+        */
+       if (!node_state(dst_nid, N_MEMORY))
+               return false;
+
        /*
         * The pages in slow memory node should be migrated according
         * to hot/cold instead of private/shared.
index bdca529f0f7b7aa23e377afca0bf9cc6d04a6473..7bdbcf1b78d0fa0d9f3fbde4b61e31ca5595f453 100644 (file)
@@ -2741,12 +2741,15 @@ relock:
                /* Has this task already been marked for death? */
                if ((signal->flags & SIGNAL_GROUP_EXIT) ||
                     signal->group_exec_task) {
-                       clear_siginfo(&ksig->info);
-                       ksig->info.si_signo = signr = SIGKILL;
+                       signr = SIGKILL;
                        sigdelset(&current->pending.signal, SIGKILL);
                        trace_signal_deliver(SIGKILL, SEND_SIG_NOINFO,
-                               &sighand->action[SIGKILL - 1]);
+                                            &sighand->action[SIGKILL-1]);
                        recalc_sigpending();
+                       /*
+                        * implies do_group_exit() or return to PF_USER_WORKER,
+                        * no need to initialize ksig->info/etc.
+                        */
                        goto fatal;
                }
 
@@ -2856,7 +2859,7 @@ relock:
                                spin_lock_irq(&sighand->siglock);
                        }
 
-                       if (likely(do_signal_stop(ksig->info.si_signo))) {
+                       if (likely(do_signal_stop(signr))) {
                                /* It released the siglock.  */
                                goto relock;
                        }
@@ -2880,7 +2883,7 @@ relock:
 
                if (sig_kernel_coredump(signr)) {
                        if (print_fatal_signals)
-                               print_fatal_signal(ksig->info.si_signo);
+                               print_fatal_signal(signr);
                        proc_coredump_connector(current);
                        /*
                         * If it was able to dump core, this kills all
@@ -2895,8 +2898,9 @@ relock:
 
                /*
                 * PF_USER_WORKER threads will catch and exit on fatal signals
-                * themselves. They have cleanup that must be performed, so
-                * we cannot call do_exit() on their behalf.
+                * themselves. They have cleanup that must be performed, so we
+                * cannot call do_exit() on their behalf. Note that ksig won't
+                * be properly initialized, PF_USER_WORKER's shouldn't use it.
                 */
                if (current->flags & PF_USER_WORKER)
                        goto out;
@@ -2904,17 +2908,17 @@ relock:
                /*
                 * Death signals, no core dump.
                 */
-               do_group_exit(ksig->info.si_signo);
+               do_group_exit(signr);
                /* NOTREACHED */
        }
        spin_unlock_irq(&sighand->siglock);
-out:
+
        ksig->sig = signr;
 
-       if (!(ksig->ka.sa.sa_flags & SA_EXPOSE_TAGBITS))
+       if (signr && !(ksig->ka.sa.sa_flags & SA_EXPOSE_TAGBITS))
                hide_si_addr_tag_bits(ksig);
-
-       return ksig->sig > 0;
+out:
+       return signr > 0;
 }
 
 /**
index 8f49b6b96dfd097d3172fe4d35d0b1ba5dc933c5..611cd904f0357e08fffca607ba1de0b51a109479 100644 (file)
@@ -751,26 +751,6 @@ bool tmigr_update_events(struct tmigr_group *group, struct tmigr_group *child,
 
                first_childevt = evt = data->evt;
 
-               /*
-                * Walking the hierarchy is required in any case when a
-                * remote expiry was done before. This ensures to not lose
-                * already queued events in non active groups (see section
-                * "Required event and timerqueue update after a remote
-                * expiry" in the documentation at the top).
-                *
-                * The two call sites which are executed without a remote expiry
-                * before, are not prevented from propagating changes through
-                * the hierarchy by the return:
-                *  - When entering this path by tmigr_new_timer(), @evt->ignore
-                *    is never set.
-                *  - tmigr_inactive_up() takes care of the propagation by
-                *    itself and ignores the return value. But an immediate
-                *    return is required because nothing has to be done in this
-                *    level as the event could be ignored.
-                */
-               if (evt->ignore && !remote)
-                       return true;
-
                raw_spin_lock(&group->lock);
 
                childstate.state = 0;
index ce4d99df5f0eb4621ef32455eadfc57be314edee..0b0b95418b16a7aa69078c6db3f59f7a15f388c2 100644 (file)
@@ -931,7 +931,7 @@ static ssize_t map_write(struct file *file, const char __user *buf,
        struct uid_gid_map new_map;
        unsigned idx;
        struct uid_gid_extent extent;
-       char *kbuf = NULL, *pos, *next_line;
+       char *kbuf, *pos, *next_line;
        ssize_t ret;
 
        /* Only allow < page size writes at the beginning of the file */
diff --git a/kernel/vmcore_info.c b/kernel/vmcore_info.c
new file mode 100644 (file)
index 0000000..f95516c
--- /dev/null
@@ -0,0 +1,233 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * crash.c - kernel crash support code.
+ * Copyright (C) 2002-2004 Eric Biederman  <ebiederm@xmission.com>
+ */
+
+#include <linux/buildid.h>
+#include <linux/init.h>
+#include <linux/utsname.h>
+#include <linux/vmalloc.h>
+#include <linux/sizes.h>
+#include <linux/kexec.h>
+#include <linux/memory.h>
+#include <linux/cpuhotplug.h>
+#include <linux/memblock.h>
+#include <linux/kmemleak.h>
+
+#include <asm/page.h>
+#include <asm/sections.h>
+
+#include <crypto/sha1.h>
+
+#include "kallsyms_internal.h"
+#include "kexec_internal.h"
+
+/* vmcoreinfo stuff */
+unsigned char *vmcoreinfo_data;
+size_t vmcoreinfo_size;
+u32 *vmcoreinfo_note;
+
+/* trusted vmcoreinfo, e.g. we can make a copy in the crash memory */
+static unsigned char *vmcoreinfo_data_safecopy;
+
+Elf_Word *append_elf_note(Elf_Word *buf, char *name, unsigned int type,
+                         void *data, size_t data_len)
+{
+       struct elf_note *note = (struct elf_note *)buf;
+
+       note->n_namesz = strlen(name) + 1;
+       note->n_descsz = data_len;
+       note->n_type   = type;
+       buf += DIV_ROUND_UP(sizeof(*note), sizeof(Elf_Word));
+       memcpy(buf, name, note->n_namesz);
+       buf += DIV_ROUND_UP(note->n_namesz, sizeof(Elf_Word));
+       memcpy(buf, data, data_len);
+       buf += DIV_ROUND_UP(data_len, sizeof(Elf_Word));
+
+       return buf;
+}
+
+void final_note(Elf_Word *buf)
+{
+       memset(buf, 0, sizeof(struct elf_note));
+}
+
+static void update_vmcoreinfo_note(void)
+{
+       u32 *buf = vmcoreinfo_note;
+
+       if (!vmcoreinfo_size)
+               return;
+       buf = append_elf_note(buf, VMCOREINFO_NOTE_NAME, 0, vmcoreinfo_data,
+                             vmcoreinfo_size);
+       final_note(buf);
+}
+
+void crash_update_vmcoreinfo_safecopy(void *ptr)
+{
+       if (ptr)
+               memcpy(ptr, vmcoreinfo_data, vmcoreinfo_size);
+
+       vmcoreinfo_data_safecopy = ptr;
+}
+
+void crash_save_vmcoreinfo(void)
+{
+       if (!vmcoreinfo_note)
+               return;
+
+       /* Use the safe copy to generate vmcoreinfo note if have */
+       if (vmcoreinfo_data_safecopy)
+               vmcoreinfo_data = vmcoreinfo_data_safecopy;
+
+       vmcoreinfo_append_str("CRASHTIME=%lld\n", ktime_get_real_seconds());
+       update_vmcoreinfo_note();
+}
+
+void vmcoreinfo_append_str(const char *fmt, ...)
+{
+       va_list args;
+       char buf[0x50];
+       size_t r;
+
+       va_start(args, fmt);
+       r = vscnprintf(buf, sizeof(buf), fmt, args);
+       va_end(args);
+
+       r = min(r, (size_t)VMCOREINFO_BYTES - vmcoreinfo_size);
+
+       memcpy(&vmcoreinfo_data[vmcoreinfo_size], buf, r);
+
+       vmcoreinfo_size += r;
+
+       WARN_ONCE(vmcoreinfo_size == VMCOREINFO_BYTES,
+                 "vmcoreinfo data exceeds allocated size, truncating");
+}
+
+/*
+ * provide an empty default implementation here -- architecture
+ * code may override this
+ */
+void __weak arch_crash_save_vmcoreinfo(void)
+{}
+
+phys_addr_t __weak paddr_vmcoreinfo_note(void)
+{
+       return __pa(vmcoreinfo_note);
+}
+EXPORT_SYMBOL(paddr_vmcoreinfo_note);
+
+static int __init crash_save_vmcoreinfo_init(void)
+{
+       vmcoreinfo_data = (unsigned char *)get_zeroed_page(GFP_KERNEL);
+       if (!vmcoreinfo_data) {
+               pr_warn("Memory allocation for vmcoreinfo_data failed\n");
+               return -ENOMEM;
+       }
+
+       vmcoreinfo_note = alloc_pages_exact(VMCOREINFO_NOTE_SIZE,
+                                               GFP_KERNEL | __GFP_ZERO);
+       if (!vmcoreinfo_note) {
+               free_page((unsigned long)vmcoreinfo_data);
+               vmcoreinfo_data = NULL;
+               pr_warn("Memory allocation for vmcoreinfo_note failed\n");
+               return -ENOMEM;
+       }
+
+       VMCOREINFO_OSRELEASE(init_uts_ns.name.release);
+       VMCOREINFO_BUILD_ID();
+       VMCOREINFO_PAGESIZE(PAGE_SIZE);
+
+       VMCOREINFO_SYMBOL(init_uts_ns);
+       VMCOREINFO_OFFSET(uts_namespace, name);
+       VMCOREINFO_SYMBOL(node_online_map);
+#ifdef CONFIG_MMU
+       VMCOREINFO_SYMBOL_ARRAY(swapper_pg_dir);
+#endif
+       VMCOREINFO_SYMBOL(_stext);
+       vmcoreinfo_append_str("NUMBER(VMALLOC_START)=0x%lx\n", (unsigned long) VMALLOC_START);
+
+#ifndef CONFIG_NUMA
+       VMCOREINFO_SYMBOL(mem_map);
+       VMCOREINFO_SYMBOL(contig_page_data);
+#endif
+#ifdef CONFIG_SPARSEMEM_VMEMMAP
+       VMCOREINFO_SYMBOL_ARRAY(vmemmap);
+#endif
+#ifdef CONFIG_SPARSEMEM
+       VMCOREINFO_SYMBOL_ARRAY(mem_section);
+       VMCOREINFO_LENGTH(mem_section, NR_SECTION_ROOTS);
+       VMCOREINFO_STRUCT_SIZE(mem_section);
+       VMCOREINFO_OFFSET(mem_section, section_mem_map);
+       VMCOREINFO_NUMBER(SECTION_SIZE_BITS);
+       VMCOREINFO_NUMBER(MAX_PHYSMEM_BITS);
+#endif
+       VMCOREINFO_STRUCT_SIZE(page);
+       VMCOREINFO_STRUCT_SIZE(pglist_data);
+       VMCOREINFO_STRUCT_SIZE(zone);
+       VMCOREINFO_STRUCT_SIZE(free_area);
+       VMCOREINFO_STRUCT_SIZE(list_head);
+       VMCOREINFO_SIZE(nodemask_t);
+       VMCOREINFO_OFFSET(page, flags);
+       VMCOREINFO_OFFSET(page, _refcount);
+       VMCOREINFO_OFFSET(page, mapping);
+       VMCOREINFO_OFFSET(page, lru);
+       VMCOREINFO_OFFSET(page, _mapcount);
+       VMCOREINFO_OFFSET(page, private);
+       VMCOREINFO_OFFSET(page, compound_head);
+       VMCOREINFO_OFFSET(pglist_data, node_zones);
+       VMCOREINFO_OFFSET(pglist_data, nr_zones);
+#ifdef CONFIG_FLATMEM
+       VMCOREINFO_OFFSET(pglist_data, node_mem_map);
+#endif
+       VMCOREINFO_OFFSET(pglist_data, node_start_pfn);
+       VMCOREINFO_OFFSET(pglist_data, node_spanned_pages);
+       VMCOREINFO_OFFSET(pglist_data, node_id);
+       VMCOREINFO_OFFSET(zone, free_area);
+       VMCOREINFO_OFFSET(zone, vm_stat);
+       VMCOREINFO_OFFSET(zone, spanned_pages);
+       VMCOREINFO_OFFSET(free_area, free_list);
+       VMCOREINFO_OFFSET(list_head, next);
+       VMCOREINFO_OFFSET(list_head, prev);
+       VMCOREINFO_LENGTH(zone.free_area, NR_PAGE_ORDERS);
+       log_buf_vmcoreinfo_setup();
+       VMCOREINFO_LENGTH(free_area.free_list, MIGRATE_TYPES);
+       VMCOREINFO_NUMBER(NR_FREE_PAGES);
+       VMCOREINFO_NUMBER(PG_lru);
+       VMCOREINFO_NUMBER(PG_private);
+       VMCOREINFO_NUMBER(PG_swapcache);
+       VMCOREINFO_NUMBER(PG_swapbacked);
+       VMCOREINFO_NUMBER(PG_slab);
+#ifdef CONFIG_MEMORY_FAILURE
+       VMCOREINFO_NUMBER(PG_hwpoison);
+#endif
+       VMCOREINFO_NUMBER(PG_head_mask);
+#define PAGE_BUDDY_MAPCOUNT_VALUE      (~PG_buddy)
+       VMCOREINFO_NUMBER(PAGE_BUDDY_MAPCOUNT_VALUE);
+#ifdef CONFIG_HUGETLB_PAGE
+       VMCOREINFO_NUMBER(PG_hugetlb);
+#define PAGE_OFFLINE_MAPCOUNT_VALUE    (~PG_offline)
+       VMCOREINFO_NUMBER(PAGE_OFFLINE_MAPCOUNT_VALUE);
+#endif
+
+#ifdef CONFIG_KALLSYMS
+       VMCOREINFO_SYMBOL(kallsyms_names);
+       VMCOREINFO_SYMBOL(kallsyms_num_syms);
+       VMCOREINFO_SYMBOL(kallsyms_token_table);
+       VMCOREINFO_SYMBOL(kallsyms_token_index);
+#ifdef CONFIG_KALLSYMS_BASE_RELATIVE
+       VMCOREINFO_SYMBOL(kallsyms_offsets);
+       VMCOREINFO_SYMBOL(kallsyms_relative_base);
+#else
+       VMCOREINFO_SYMBOL(kallsyms_addresses);
+#endif /* CONFIG_KALLSYMS_BASE_RELATIVE */
+#endif /* CONFIG_KALLSYMS */
+
+       arch_crash_save_vmcoreinfo();
+       update_vmcoreinfo_note();
+
+       return 0;
+}
+
+subsys_initcall(crash_save_vmcoreinfo_init);
index 81a8862295d610d9914b69cbaa8329ffacdc53ae..d7b2125503af3dd6f9d4b03eee38c14f8d9b3212 100644 (file)
@@ -796,8 +796,8 @@ static int proc_watchdog_common(int which, struct ctl_table *table, int write,
 /*
  * /proc/sys/kernel/watchdog
  */
-int proc_watchdog(struct ctl_table *table, int write,
-                 void *buffer, size_t *lenp, loff_t *ppos)
+static int proc_watchdog(struct ctl_table *table, int write,
+                        void *buffer, size_t *lenp, loff_t *ppos)
 {
        return proc_watchdog_common(WATCHDOG_HARDLOCKUP_ENABLED |
                                    WATCHDOG_SOFTOCKUP_ENABLED,
@@ -807,8 +807,8 @@ int proc_watchdog(struct ctl_table *table, int write,
 /*
  * /proc/sys/kernel/nmi_watchdog
  */
-int proc_nmi_watchdog(struct ctl_table *table, int write,
-                     void *buffer, size_t *lenp, loff_t *ppos)
+static int proc_nmi_watchdog(struct ctl_table *table, int write,
+                            void *buffer, size_t *lenp, loff_t *ppos)
 {
        if (!watchdog_hardlockup_available && write)
                return -ENOTSUPP;
@@ -816,21 +816,23 @@ int proc_nmi_watchdog(struct ctl_table *table, int write,
                                    table, write, buffer, lenp, ppos);
 }
 
+#ifdef CONFIG_SOFTLOCKUP_DETECTOR
 /*
  * /proc/sys/kernel/soft_watchdog
  */
-int proc_soft_watchdog(struct ctl_table *table, int write,
-                       void *buffer, size_t *lenp, loff_t *ppos)
+static int proc_soft_watchdog(struct ctl_table *table, int write,
+                             void *buffer, size_t *lenp, loff_t *ppos)
 {
        return proc_watchdog_common(WATCHDOG_SOFTOCKUP_ENABLED,
                                    table, write, buffer, lenp, ppos);
 }
+#endif
 
 /*
  * /proc/sys/kernel/watchdog_thresh
  */
-int proc_watchdog_thresh(struct ctl_table *table, int write,
-                        void *buffer, size_t *lenp, loff_t *ppos)
+static int proc_watchdog_thresh(struct ctl_table *table, int write,
+                               void *buffer, size_t *lenp, loff_t *ppos)
 {
        int err, old;
 
@@ -852,8 +854,8 @@ int proc_watchdog_thresh(struct ctl_table *table, int write,
  * user to specify a mask that will include cpus that have not yet
  * been brought online, if desired.
  */
-int proc_watchdog_cpumask(struct ctl_table *table, int write,
-                         void *buffer, size_t *lenp, loff_t *ppos)
+static int proc_watchdog_cpumask(struct ctl_table *table, int write,
+                                void *buffer, size_t *lenp, loff_t *ppos)
 {
        int err;
 
index 2164f066e7b69571e2f44e0ffbf3e95125893a40..733ee2ac0138e34b8da1f97956758cc9e1cba030 100644 (file)
@@ -2085,7 +2085,7 @@ config KCOV
        depends on ARCH_HAS_KCOV
        depends on CC_HAS_SANCOV_TRACE_PC || GCC_PLUGINS
        depends on !ARCH_WANTS_NO_INSTR || HAVE_NOINSTR_HACK || \
-                  GCC_VERSION >= 120000 || CLANG_VERSION >= 130000
+                  GCC_VERSION >= 120000 || CC_IS_CLANG
        select DEBUG_FS
        select GCC_PLUGIN_SANCOV if !CC_HAS_SANCOV_TRACE_PC
        select OBJTOOL if HAVE_NOINSTR_HACK
@@ -2142,7 +2142,7 @@ config TEST_DHRY
 
          To run the benchmark, it needs to be enabled explicitly, either from
          the kernel command line (when built-in), or from userspace (when
-         built-in or modular.
+         built-in or modular).
 
          Run once during kernel boot:
 
index e6eda054ab275ffa7b886c2eb4e49e97705f6b54..98016e137b7f09f82b168f840565e8121b28ce87 100644 (file)
@@ -158,7 +158,7 @@ config KASAN_STACK
          out-of-bounds bugs in stack variables.
 
          With Clang, stack instrumentation has a problem that causes excessive
-         stack usage, see https://bugs.llvm.org/show_bug.cgi?id=38809. Thus,
+         stack usage, see https://llvm.org/pr38809. Thus,
          with Clang, this option is deemed unsafe.
 
          This option is always disabled when compile-testing with Clang to
index ca0b4f360c1a0be47fc7194b43599afa0f828342..388e656ac9743dca9913b1dc09f88040e206228c 100644 (file)
@@ -938,7 +938,7 @@ static bool assoc_array_insert_mid_shortcut(struct assoc_array_edit *edit,
                edit->leaf_p = &new_n0->slots[0];
 
        pr_devel("<--%s() = ok [split shortcut]\n", __func__);
-       return edit;
+       return true;
 }
 
 /**
index e3a7acdeef0ed4485c875f9cb176434bf10f8e7a..898301b49eb644ccddbb1fd71df22d66b2622198 100644 (file)
@@ -140,7 +140,7 @@ int build_id_parse(struct vm_area_struct *vma, unsigned char *build_id,
                return -EFAULT; /* page not mapped */
 
        ret = -EINVAL;
-       page_addr = kmap_atomic(page);
+       page_addr = kmap_local_page(page);
        ehdr = (Elf32_Ehdr *)page_addr;
 
        /* compare magic x7f "ELF" */
@@ -156,7 +156,7 @@ int build_id_parse(struct vm_area_struct *vma, unsigned char *build_id,
        else if (ehdr->e_ident[EI_CLASS] == ELFCLASS64)
                ret = get_build_id_64(page_addr, build_id, size);
 out:
-       kunmap_atomic(page_addr);
+       kunmap_local(page_addr);
        put_page(page);
        return ret;
 }
@@ -174,7 +174,7 @@ int build_id_parse_buf(const void *buf, unsigned char *build_id, u32 buf_size)
        return parse_build_id_buf(build_id, NULL, buf, buf_size);
 }
 
-#if IS_ENABLED(CONFIG_STACKTRACE_BUILD_ID) || IS_ENABLED(CONFIG_CRASH_CORE)
+#if IS_ENABLED(CONFIG_STACKTRACE_BUILD_ID) || IS_ENABLED(CONFIG_VMCORE_INFO)
 unsigned char vmlinux_build_id[BUILD_ID_SIZE_MAX] __ro_after_init;
 
 /**
index 08edbbb19f573ff73f3fb339591533f8c707716b..ca6c87232c5809544a03ad0e1e6c22ab78104ccb 100644 (file)
@@ -277,7 +277,7 @@ int dhry(int n)
        dhry_assert_string_eq(Str_1_Loc, "DHRYSTONE PROGRAM, 1'ST STRING");
        dhry_assert_string_eq(Str_2_Loc, "DHRYSTONE PROGRAM, 2'ND STRING");
 
-       User_Time = ktime_to_ms(ktime_sub(End_Time, Begin_Time));
+       User_Time = ktime_ms_delta(End_Time, Begin_Time);
 
        kfree(Ptr_Glob);
        kfree(Next_Ptr_Glob);
index f15ac666e9d38bd23bb0cb4ed08beb626540a0c4..e6a279dabf848eb3d60be99927812a5a817a43ee 100644 (file)
@@ -10,7 +10,6 @@
 #include <linux/kernel.h>
 #include <linux/module.h>
 #include <linux/moduleparam.h>
-#include <linux/mutex.h>
 #include <linux/smp.h>
 
 #define DHRY_VAX       1757
index 6fba6423cc10b512acdfac38cc44887a626c8096..c78f335fa98137664745e6593c98b707a528bf34 100644 (file)
@@ -640,10 +640,9 @@ static int param_set_dyndbg_classnames(const char *instr, const struct kernel_pa
        int cls_id, totct = 0;
        bool wanted;
 
-       cl_str = tmp = kstrdup(instr, GFP_KERNEL);
-       p = strchr(cl_str, '\n');
-       if (p)
-               *p = '\0';
+       cl_str = tmp = kstrdup_and_replace(instr, '\n', '\0', GFP_KERNEL);
+       if (!tmp)
+               return -ENOMEM;
 
        /* start with previously set state-bits, then modify */
        curr_bits = old_bits = *dcp->bits;
index 83332fefa6f42e5e57434d214594e7c2306b1a0d..84ecccddc77182debc51ca7876866989a21b9fda 100644 (file)
@@ -83,83 +83,6 @@ bool fprop_new_period(struct fprop_global *p, int periods)
        return true;
 }
 
-/*
- * ---- SINGLE ----
- */
-
-int fprop_local_init_single(struct fprop_local_single *pl)
-{
-       pl->events = 0;
-       pl->period = 0;
-       raw_spin_lock_init(&pl->lock);
-       return 0;
-}
-
-void fprop_local_destroy_single(struct fprop_local_single *pl)
-{
-}
-
-static void fprop_reflect_period_single(struct fprop_global *p,
-                                       struct fprop_local_single *pl)
-{
-       unsigned int period = p->period;
-       unsigned long flags;
-
-       /* Fast path - period didn't change */
-       if (pl->period == period)
-               return;
-       raw_spin_lock_irqsave(&pl->lock, flags);
-       /* Someone updated pl->period while we were spinning? */
-       if (pl->period >= period) {
-               raw_spin_unlock_irqrestore(&pl->lock, flags);
-               return;
-       }
-       /* Aging zeroed our fraction? */
-       if (period - pl->period < BITS_PER_LONG)
-               pl->events >>= period - pl->period;
-       else
-               pl->events = 0;
-       pl->period = period;
-       raw_spin_unlock_irqrestore(&pl->lock, flags);
-}
-
-/* Event of type pl happened */
-void __fprop_inc_single(struct fprop_global *p, struct fprop_local_single *pl)
-{
-       fprop_reflect_period_single(p, pl);
-       pl->events++;
-       percpu_counter_add(&p->events, 1);
-}
-
-/* Return fraction of events of type pl */
-void fprop_fraction_single(struct fprop_global *p,
-                          struct fprop_local_single *pl,
-                          unsigned long *numerator, unsigned long *denominator)
-{
-       unsigned int seq;
-       s64 num, den;
-
-       do {
-               seq = read_seqcount_begin(&p->sequence);
-               fprop_reflect_period_single(p, pl);
-               num = pl->events;
-               den = percpu_counter_read_positive(&p->events);
-       } while (read_seqcount_retry(&p->sequence, seq));
-
-       /*
-        * Make fraction <= 1 and denominator > 0 even in presence of percpu
-        * counter errors
-        */
-       if (den <= num) {
-               if (num)
-                       den = num;
-               else
-                       den = 1;
-       }
-       *denominator = den;
-       *numerator = num;
-}
-
 /*
  * ---- PERCPU ----
  */
index 7ee468ef21ec612d8787755cfd8eeaac50b36ef5..7e945fdcbf11586e5dfa60b2f1a8580d8df60e15 100644 (file)
@@ -98,7 +98,8 @@ config FONT_10x18
 
 config FONT_SUN8x16
        bool "Sparc console 8x16 font"
-       depends on (FRAMEBUFFER_CONSOLE && (FONTS || SPARC)) || BOOTX_TEXT
+       depends on (FRAMEBUFFER_CONSOLE && (FONTS || SPARC)) || \
+                  BOOTX_TEXT || EARLYFB
        help
          This is the high resolution console font for Sun machines. Say Y.
 
index c3569d2ba503f4d24282a649bba4d147f358859b..16291814450ee39a2ad656f93c316bdc8f998939 100644 (file)
@@ -127,6 +127,7 @@ static __init_or_fwtbl_lib int call_handler(struct acpi_subtable_proc *proc,
  *
  * @id: table id (for debugging purposes)
  * @table_size: size of the root table
+ * @max_length: maximum size of the table (ignore if 0)
  * @table_header: where does the table start?
  * @proc: array of acpi_subtable_proc struct containing entry id
  *        and associated handler with it
@@ -148,18 +149,21 @@ static __init_or_fwtbl_lib int call_handler(struct acpi_subtable_proc *proc,
 int __init_or_fwtbl_lib
 acpi_parse_entries_array(char *id, unsigned long table_size,
                         union fw_table_header *table_header,
+                        unsigned long max_length,
                         struct acpi_subtable_proc *proc,
                         int proc_num, unsigned int max_entries)
 {
-       unsigned long table_end, subtable_len, entry_len;
+       unsigned long table_len, table_end, subtable_len, entry_len;
        struct acpi_subtable_entry entry;
        enum acpi_subtable_type type;
        int count = 0;
        int i;
 
        type = acpi_get_subtable_type(id);
-       table_end = (unsigned long)table_header +
-                   acpi_table_get_length(type, table_header);
+       table_len = acpi_table_get_length(type, table_header);
+       if (max_length && max_length < table_len)
+               table_len = max_length;
+       table_end = (unsigned long)table_header + table_len;
 
        /* Parse all entries looking for a match. */
 
@@ -208,7 +212,8 @@ int __init_or_fwtbl_lib
 cdat_table_parse(enum acpi_cdat_type type,
                 acpi_tbl_entry_handler_arg handler_arg,
                 void *arg,
-                struct acpi_table_cdat *table_header)
+                struct acpi_table_cdat *table_header,
+                unsigned long length)
 {
        struct acpi_subtable_proc proc = {
                .id             = type,
@@ -222,6 +227,6 @@ cdat_table_parse(enum acpi_cdat_type type,
        return acpi_parse_entries_array(ACPI_SIG_CDAT,
                                        sizeof(struct acpi_table_cdat),
                                        (union fw_table_header *)table_header,
-                                       &proc, 1, 0);
+                                       length, &proc, 1, 0);
 }
 EXPORT_SYMBOL_FWTBL_LIB(cdat_table_parse);
index 41f1bcdc44886c1cef8f25c080c006d22a45339d..aaefb9b678c8e4e1277411a27c554604686d3ddc 100644 (file)
@@ -5,7 +5,7 @@
 #include <linux/gfp.h>
 #include <linux/kmemleak.h>
 
-#define GENRADIX_ARY           (PAGE_SIZE / sizeof(struct genradix_node *))
+#define GENRADIX_ARY           (GENRADIX_NODE_SIZE / sizeof(struct genradix_node *))
 #define GENRADIX_ARY_SHIFT     ilog2(GENRADIX_ARY)
 
 struct genradix_node {
@@ -14,13 +14,13 @@ struct genradix_node {
                struct genradix_node    *children[GENRADIX_ARY];
 
                /* Leaf: */
-               u8                      data[PAGE_SIZE];
+               u8                      data[GENRADIX_NODE_SIZE];
        };
 };
 
 static inline int genradix_depth_shift(unsigned depth)
 {
-       return PAGE_SHIFT + GENRADIX_ARY_SHIFT * depth;
+       return GENRADIX_NODE_SHIFT + GENRADIX_ARY_SHIFT * depth;
 }
 
 /*
@@ -33,7 +33,7 @@ static inline size_t genradix_depth_size(unsigned depth)
 
 /* depth that's needed for a genradix that can address up to ULONG_MAX: */
 #define GENRADIX_MAX_DEPTH     \
-       DIV_ROUND_UP(BITS_PER_LONG - PAGE_SHIFT, GENRADIX_ARY_SHIFT)
+       DIV_ROUND_UP(BITS_PER_LONG - GENRADIX_NODE_SHIFT, GENRADIX_ARY_SHIFT)
 
 #define GENRADIX_DEPTH_MASK                            \
        ((unsigned long) (roundup_pow_of_two(GENRADIX_MAX_DEPTH + 1) - 1))
@@ -79,23 +79,12 @@ EXPORT_SYMBOL(__genradix_ptr);
 
 static inline struct genradix_node *genradix_alloc_node(gfp_t gfp_mask)
 {
-       struct genradix_node *node;
-
-       node = (struct genradix_node *)__get_free_page(gfp_mask|__GFP_ZERO);
-
-       /*
-        * We're using pages (not slab allocations) directly for kernel data
-        * structures, so we need to explicitly inform kmemleak of them in order
-        * to avoid false positive memory leak reports.
-        */
-       kmemleak_alloc(node, PAGE_SIZE, 1, gfp_mask);
-       return node;
+       return kzalloc(GENRADIX_NODE_SIZE, gfp_mask);
 }
 
 static inline void genradix_free_node(struct genradix_node *node)
 {
-       kmemleak_free(node);
-       free_page((unsigned long)node);
+       kfree(node);
 }
 
 /*
@@ -200,7 +189,7 @@ restart:
                        i++;
                        iter->offset = round_down(iter->offset + objs_per_ptr,
                                                  objs_per_ptr);
-                       iter->pos = (iter->offset >> PAGE_SHIFT) *
+                       iter->pos = (iter->offset >> GENRADIX_NODE_SHIFT) *
                                objs_per_page;
                        if (i == GENRADIX_ARY)
                                goto restart;
@@ -209,7 +198,7 @@ restart:
                n = n->children[i];
        }
 
-       return &n->data[iter->offset & (PAGE_SIZE - 1)];
+       return &n->data[iter->offset & (GENRADIX_NODE_SIZE - 1)];
 }
 EXPORT_SYMBOL(__genradix_iter_peek);
 
@@ -235,7 +224,7 @@ restart:
 
        if (ilog2(iter->offset) >= genradix_depth_shift(level)) {
                iter->offset = genradix_depth_size(level);
-               iter->pos = (iter->offset >> PAGE_SHIFT) * objs_per_page;
+               iter->pos = (iter->offset >> GENRADIX_NODE_SHIFT) * objs_per_page;
 
                iter->offset -= obj_size_plus_page_remainder;
                iter->pos--;
@@ -251,7 +240,7 @@ restart:
                        size_t objs_per_ptr = genradix_depth_size(level);
 
                        iter->offset = round_down(iter->offset, objs_per_ptr);
-                       iter->pos = (iter->offset >> PAGE_SHIFT) * objs_per_page;
+                       iter->pos = (iter->offset >> GENRADIX_NODE_SHIFT) * objs_per_page;
 
                        if (!iter->offset)
                                return NULL;
@@ -267,7 +256,7 @@ restart:
                n = n->children[i];
        }
 
-       return &n->data[iter->offset & (PAGE_SIZE - 1)];
+       return &n->data[iter->offset & (GENRADIX_NODE_SIZE - 1)];
 }
 EXPORT_SYMBOL(__genradix_iter_peek_prev);
 
@@ -289,7 +278,7 @@ int __genradix_prealloc(struct __genradix *radix, size_t size,
 {
        size_t offset;
 
-       for (offset = 0; offset < size; offset += PAGE_SIZE)
+       for (offset = 0; offset < size; offset += GENRADIX_NODE_SIZE)
                if (!__genradix_ptr_alloc(radix, offset, gfp_mask))
                        return -ENOMEM;
 
index af097028872722f4e3a0225f5ca4adb9097c3f39..55e1b35bf877ea529d4f278692c57f46a259d568 100644 (file)
@@ -1307,8 +1307,8 @@ static inline void mas_free(struct ma_state *mas, struct maple_enode *used)
 }
 
 /*
- * mas_node_count() - Check if enough nodes are allocated and request more if
- * there is not enough nodes.
+ * mas_node_count_gfp() - Check if enough nodes are allocated and request more
+ * if there is not enough nodes.
  * @mas: The maple state
  * @count: The number of nodes needed
  * @gfp: the gfp flags
@@ -2271,8 +2271,6 @@ bool mast_spanning_rebalance(struct maple_subtree_state *mast)
        struct ma_state l_tmp = *mast->orig_l;
        unsigned char depth = 0;
 
-       r_tmp = *mast->orig_r;
-       l_tmp = *mast->orig_l;
        do {
                mas_ascend(mast->orig_r);
                mas_ascend(mast->orig_l);
index 55a81782e2717f7bca4f8d53834458db960952a1..191761b1b623cc52c977c96a8b9a14cee1f03707 100644 (file)
@@ -22,6 +22,7 @@
 #include <linux/export.h>
 #include <linux/math.h>
 #include <linux/math64.h>
+#include <linux/minmax.h>
 #include <linux/log2.h>
 
 /* Not needed on 64bit architectures */
@@ -190,6 +191,20 @@ u64 mul_u64_u64_div_u64(u64 a, u64 b, u64 c)
 
        /* can a * b overflow ? */
        if (ilog2(a) + ilog2(b) > 62) {
+               /*
+                * Note that the algorithm after the if block below might lose
+                * some precision and the result is more exact for b > a. So
+                * exchange a and b if a is bigger than b.
+                *
+                * For example with a = 43980465100800, b = 100000000, c = 1000000000
+                * the below calculation doesn't modify b at all because div == 0
+                * and then shift becomes 45 + 26 - 62 = 9 and so the result
+                * becomes 4398035251080. However with a and b swapped the exact
+                * result is calculated (i.e. 4398046510080).
+                */
+               if (a > b)
+                       swap(a, b);
+
                /*
                 * (b * a) / c is equal to
                 *
index 1c5420ff254e8475f786437544a954ac1a87ea5a..385a94aa0b999b434e07287658363d0c518b734a 100644 (file)
@@ -21,7 +21,7 @@ altivec_flags += -isystem $(shell $(CC) -print-file-name=include)
 ifdef CONFIG_CC_IS_CLANG
 # clang ppc port does not yet support -maltivec when -msoft-float is
 # enabled. A future release of clang will resolve this
-# https://bugs.llvm.org/show_bug.cgi?id=31177
+# https://llvm.org/pr31177
 CFLAGS_REMOVE_altivec1.o  += -msoft-float
 CFLAGS_REMOVE_altivec2.o  += -msoft-float
 CFLAGS_REMOVE_altivec4.o  += -msoft-float
index b399bf10d6759b47c5ed28a1ff0bd3cd4d26b00c..a0509088f82aa57cad934d650960a12e55144a64 100644 (file)
@@ -215,6 +215,7 @@ void sort_r(void *base, size_t num, size_t size,
        /* pre-scale counters for performance */
        size_t n = num * size, a = (num/2) * size;
        const unsigned int lsbit = size & -size;  /* Used to find parent */
+       size_t shift = 0;
 
        if (!a)         /* num < 2 || size == 0 */
                return;
@@ -242,12 +243,21 @@ void sort_r(void *base, size_t num, size_t size,
        for (;;) {
                size_t b, c, d;
 
-               if (a)                  /* Building heap: sift down --a */
-                       a -= size;
-               else if (n -= size)     /* Sorting: Extract root to --n */
+               if (a)                  /* Building heap: sift down a */
+                       a -= size << shift;
+               else if (n > 3 * size) { /* Sorting: Extract two largest elements */
+                       n -= size;
                        do_swap(base, base + n, size, swap_func, priv);
-               else                    /* Sort complete */
+                       shift = do_cmp(base + size, base + 2 * size, cmp_func, priv) <= 0;
+                       a = size << shift;
+                       n -= size;
+                       do_swap(base + a, base + n, size, swap_func, priv);
+               } else if (n > size) {  /* Sorting: Extract root */
+                       n -= size;
+                       do_swap(base, base + n, size, swap_func, priv);
+               } else  {               /* Sort complete */
                        break;
+               }
 
                /*
                 * Sift element at "a" down into heap.  This is the
@@ -262,7 +272,7 @@ void sort_r(void *base, size_t num, size_t size,
                 * average, 3/4 worst-case.)
                 */
                for (b = a; c = 2*b + size, (d = c + size) < n;)
-                       b = do_cmp(base + c, base + d, cmp_func, priv) >= 0 ? c : d;
+                       b = do_cmp(base + c, base + d, cmp_func, priv) > 0 ? c : d;
                if (d == n)     /* Special case last leaf with no sibling */
                        b = c;
 
index 4a7055a63d9f8a8a6723563fd8a30115653eea83..af6cc19a200331aa0c37cf2e497384f0b19d8db0 100644 (file)
 #include <linux/memblock.h>
 #include <linux/kasan-enabled.h>
 
-#define DEPOT_HANDLE_BITS (sizeof(depot_stack_handle_t) * 8)
-
-#define DEPOT_POOL_ORDER 2 /* Pool size order, 4 pages */
-#define DEPOT_POOL_SIZE (1LL << (PAGE_SHIFT + DEPOT_POOL_ORDER))
-#define DEPOT_STACK_ALIGN 4
-#define DEPOT_OFFSET_BITS (DEPOT_POOL_ORDER + PAGE_SHIFT - DEPOT_STACK_ALIGN)
-#define DEPOT_POOL_INDEX_BITS (DEPOT_HANDLE_BITS - DEPOT_OFFSET_BITS - \
-                              STACK_DEPOT_EXTRA_BITS)
 #define DEPOT_POOLS_CAP 8192
+/* The pool_index is offset by 1 so the first record does not have a 0 handle. */
 #define DEPOT_MAX_POOLS \
-       (((1LL << (DEPOT_POOL_INDEX_BITS)) < DEPOT_POOLS_CAP) ? \
-        (1LL << (DEPOT_POOL_INDEX_BITS)) : DEPOT_POOLS_CAP)
-
-/* Compact structure that stores a reference to a stack. */
-union handle_parts {
-       depot_stack_handle_t handle;
-       struct {
-               u32 pool_index  : DEPOT_POOL_INDEX_BITS;
-               u32 offset      : DEPOT_OFFSET_BITS;
-               u32 extra       : STACK_DEPOT_EXTRA_BITS;
-       };
-};
-
-struct stack_record {
-       struct list_head hash_list;     /* Links in the hash table */
-       u32 hash;                       /* Hash in hash table */
-       u32 size;                       /* Number of stored frames */
-       union handle_parts handle;      /* Constant after initialization */
-       refcount_t count;
-       union {
-               unsigned long entries[CONFIG_STACKDEPOT_MAX_FRAMES];    /* Frames */
-               struct {
-                       /*
-                        * An important invariant of the implementation is to
-                        * only place a stack record onto the freelist iff its
-                        * refcount is zero. Because stack records with a zero
-                        * refcount are never considered as valid, it is safe to
-                        * union @entries and freelist management state below.
-                        * Conversely, as soon as an entry is off the freelist
-                        * and its refcount becomes non-zero, the below must not
-                        * be accessed until being placed back on the freelist.
-                        */
-                       struct list_head free_list;     /* Links in the freelist */
-                       unsigned long rcu_state;        /* RCU cookie */
-               };
-       };
-};
+       (((1LL << (DEPOT_POOL_INDEX_BITS)) - 1 < DEPOT_POOLS_CAP) ? \
+        (1LL << (DEPOT_POOL_INDEX_BITS)) - 1 : DEPOT_POOLS_CAP)
 
 static bool stack_depot_disabled;
 static bool __stack_depot_early_init_requested __initdata = IS_ENABLED(CONFIG_STACKDEPOT_ALWAYS_INIT);
@@ -372,7 +330,7 @@ static struct stack_record *depot_pop_free_pool(void **prealloc, size_t size)
        stack = current_pool + pool_offset;
 
        /* Pre-initialize handle once. */
-       stack->handle.pool_index = pool_index;
+       stack->handle.pool_index = pool_index + 1;
        stack->handle.offset = pool_offset >> DEPOT_STACK_ALIGN;
        stack->handle.extra = 0;
        INIT_LIST_HEAD(&stack->hash_list);
@@ -483,18 +441,19 @@ static struct stack_record *depot_fetch_stack(depot_stack_handle_t handle)
        const int pools_num_cached = READ_ONCE(pools_num);
        union handle_parts parts = { .handle = handle };
        void *pool;
+       u32 pool_index = parts.pool_index - 1;
        size_t offset = parts.offset << DEPOT_STACK_ALIGN;
        struct stack_record *stack;
 
        lockdep_assert_not_held(&pool_lock);
 
-       if (parts.pool_index > pools_num_cached) {
+       if (pool_index >= pools_num_cached) {
                WARN(1, "pool index %d out of bounds (%d) for stack id %08x\n",
-                    parts.pool_index, pools_num_cached, handle);
+                    pool_index, pools_num_cached, handle);
                return NULL;
        }
 
-       pool = stack_pools[parts.pool_index];
+       pool = stack_pools[pool_index];
        if (WARN_ON(!pool))
                return NULL;
 
@@ -728,6 +687,14 @@ depot_stack_handle_t stack_depot_save(unsigned long *entries,
 }
 EXPORT_SYMBOL_GPL(stack_depot_save);
 
+struct stack_record *__stack_depot_get_stack_record(depot_stack_handle_t handle)
+{
+       if (!handle)
+               return NULL;
+
+       return depot_fetch_stack(handle);
+}
+
 unsigned int stack_depot_fetch(depot_stack_handle_t handle,
                               unsigned long **entries)
 {
index dc3c68f46f0abe9fff6466c51ba81aed2ec9627c..3bc14d1ee81630d42c1648241ad0fb89129bb246 100644 (file)
@@ -417,7 +417,7 @@ static noinline int leaf_switch_2_none(unsigned long sp, bool fill,
  * These are expected to fail for most configurations because neither
  * GCC nor Clang have a way to perform initialization of variables in
  * non-code areas (i.e. in a switch statement before the first "case").
- * https://bugs.llvm.org/show_bug.cgi?id=44916
+ * https://llvm.org/pr44916
  */
 DEFINE_TEST_DRIVER(switch_1_none, uint64_t, SCALAR, ALWAYS_FAIL);
 DEFINE_TEST_DRIVER(switch_2_none, uint64_t, SCALAR, ALWAYS_FAIL);
index 3718d9886407d6a9d7fafb8a90eebc92f2211178..4ddf769861ff769fd891b58b2547691ec9c82cc3 100644 (file)
@@ -117,7 +117,7 @@ static int align_shift_alloc_test(void)
        int i;
 
        for (i = 0; i < BITS_PER_LONG; i++) {
-               align = ((unsigned long) 1) << i;
+               align = 1UL << i;
 
                ptr = __vmalloc_node(PAGE_SIZE, align, GFP_KERNEL|__GFP_ZERO, 0,
                                __builtin_return_address(0));
@@ -501,7 +501,7 @@ static int test_func(void *private)
 }
 
 static int
-init_test_configurtion(void)
+init_test_configuration(void)
 {
        /*
         * A maximum number of workers is defined as hard-coded
@@ -531,7 +531,7 @@ static void do_concurrent_test(void)
        /*
         * Set some basic configurations plus sanity check.
         */
-       ret = init_test_configurtion();
+       ret = init_test_configuration();
        if (ret < 0)
                return;
 
@@ -600,12 +600,7 @@ static int vmalloc_test_init(void)
        return -EAGAIN; /* Fail will directly unload the module */
 }
 
-static void vmalloc_test_exit(void)
-{
-}
-
 module_init(vmalloc_test_init)
-module_exit(vmalloc_test_exit)
 
 MODULE_LICENSE("GPL");
 MODULE_AUTHOR("Uladzislau Rezki");
index e77d4856442c3f750434e37819e13688d210048e..ebe2af2e072db390b2c65e149969c65544476e16 100644 (file)
@@ -423,6 +423,59 @@ static noinline void check_cmpxchg(struct xarray *xa)
        XA_BUG_ON(xa, !xa_empty(xa));
 }
 
+static noinline void check_cmpxchg_order(struct xarray *xa)
+{
+#ifdef CONFIG_XARRAY_MULTI
+       void *FIVE = xa_mk_value(5);
+       unsigned int i, order = 3;
+
+       XA_BUG_ON(xa, xa_store_order(xa, 0, order, FIVE, GFP_KERNEL));
+
+       /* Check entry FIVE has the order saved */
+       XA_BUG_ON(xa, xa_get_order(xa, xa_to_value(FIVE)) != order);
+
+       /* Check all the tied indexes have the same entry and order */
+       for (i = 0; i < (1 << order); i++) {
+               XA_BUG_ON(xa, xa_load(xa, i) != FIVE);
+               XA_BUG_ON(xa, xa_get_order(xa, i) != order);
+       }
+
+       /* Ensure that nothing is stored at index '1 << order' */
+       XA_BUG_ON(xa, xa_load(xa, 1 << order) != NULL);
+
+       /*
+        * Additionally, keep the node information and the order at
+        * '1 << order'
+        */
+       XA_BUG_ON(xa, xa_store_order(xa, 1 << order, order, FIVE, GFP_KERNEL));
+       for (i = (1 << order); i < (1 << order) + (1 << order) - 1; i++) {
+               XA_BUG_ON(xa, xa_load(xa, i) != FIVE);
+               XA_BUG_ON(xa, xa_get_order(xa, i) != order);
+       }
+
+       /* Conditionally replace FIVE entry at index '0' with NULL */
+       XA_BUG_ON(xa, xa_cmpxchg(xa, 0, FIVE, NULL, GFP_KERNEL) != FIVE);
+
+       /* Verify the order is lost at FIVE (and old) entries */
+       XA_BUG_ON(xa, xa_get_order(xa, xa_to_value(FIVE)) != 0);
+
+       /* Verify the order and entries are lost in all the tied indexes */
+       for (i = 0; i < (1 << order); i++) {
+               XA_BUG_ON(xa, xa_load(xa, i) != NULL);
+               XA_BUG_ON(xa, xa_get_order(xa, i) != 0);
+       }
+
+       /* Verify node and order are kept at '1 << order' */
+       for (i = (1 << order); i < (1 << order) + (1 << order) - 1; i++) {
+               XA_BUG_ON(xa, xa_load(xa, i) != FIVE);
+               XA_BUG_ON(xa, xa_get_order(xa, i) != order);
+       }
+
+       xa_store_order(xa, 0, BITS_PER_LONG - 1, NULL, GFP_KERNEL);
+       XA_BUG_ON(xa, !xa_empty(xa));
+#endif
+}
+
 static noinline void check_reserve(struct xarray *xa)
 {
        void *entry;
@@ -674,6 +727,181 @@ static noinline void check_multi_store(struct xarray *xa)
 #endif
 }
 
+#ifdef CONFIG_XARRAY_MULTI
+/* mimics page cache __filemap_add_folio() */
+static noinline void check_xa_multi_store_adv_add(struct xarray *xa,
+                                                 unsigned long index,
+                                                 unsigned int order,
+                                                 void *p)
+{
+       XA_STATE(xas, xa, index);
+       unsigned int nrpages = 1UL << order;
+
+       /* users are responsible for index alignemnt to the order when adding */
+       XA_BUG_ON(xa, index & (nrpages - 1));
+
+       xas_set_order(&xas, index, order);
+
+       do {
+               xas_lock_irq(&xas);
+
+               xas_store(&xas, p);
+               XA_BUG_ON(xa, xas_error(&xas));
+               XA_BUG_ON(xa, xa_load(xa, index) != p);
+
+               xas_unlock_irq(&xas);
+       } while (xas_nomem(&xas, GFP_KERNEL));
+
+       XA_BUG_ON(xa, xas_error(&xas));
+}
+
+/* mimics page_cache_delete() */
+static noinline void check_xa_multi_store_adv_del_entry(struct xarray *xa,
+                                                       unsigned long index,
+                                                       unsigned int order)
+{
+       XA_STATE(xas, xa, index);
+
+       xas_set_order(&xas, index, order);
+       xas_store(&xas, NULL);
+       xas_init_marks(&xas);
+}
+
+static noinline void check_xa_multi_store_adv_delete(struct xarray *xa,
+                                                    unsigned long index,
+                                                    unsigned int order)
+{
+       xa_lock_irq(xa);
+       check_xa_multi_store_adv_del_entry(xa, index, order);
+       xa_unlock_irq(xa);
+}
+
+/* mimics page cache filemap_get_entry() */
+static noinline void *test_get_entry(struct xarray *xa, unsigned long index)
+{
+       XA_STATE(xas, xa, index);
+       void *p;
+       static unsigned int loops = 0;
+
+       rcu_read_lock();
+repeat:
+       xas_reset(&xas);
+       p = xas_load(&xas);
+       if (xas_retry(&xas, p))
+               goto repeat;
+       rcu_read_unlock();
+
+       /*
+        * This is not part of the page cache, this selftest is pretty
+        * aggressive and does not want to trust the xarray API but rather
+        * test it, and for order 20 (4 GiB block size) we can loop over
+        * over a million entries which can cause a soft lockup. Page cache
+        * APIs won't be stupid, proper page cache APIs loop over the proper
+        * order so when using a larger order we skip shared entries.
+        */
+       if (++loops % XA_CHECK_SCHED == 0)
+               schedule();
+
+       return p;
+}
+
+static unsigned long some_val = 0xdeadbeef;
+static unsigned long some_val_2 = 0xdeaddead;
+
+/* mimics the page cache usage */
+static noinline void check_xa_multi_store_adv(struct xarray *xa,
+                                             unsigned long pos,
+                                             unsigned int order)
+{
+       unsigned int nrpages = 1UL << order;
+       unsigned long index, base, next_index, next_next_index;
+       unsigned int i;
+
+       index = pos >> PAGE_SHIFT;
+       base = round_down(index, nrpages);
+       next_index = round_down(base + nrpages, nrpages);
+       next_next_index = round_down(next_index + nrpages, nrpages);
+
+       check_xa_multi_store_adv_add(xa, base, order, &some_val);
+
+       for (i = 0; i < nrpages; i++)
+               XA_BUG_ON(xa, test_get_entry(xa, base + i) != &some_val);
+
+       XA_BUG_ON(xa, test_get_entry(xa, next_index) != NULL);
+
+       /* Use order 0 for the next item */
+       check_xa_multi_store_adv_add(xa, next_index, 0, &some_val_2);
+       XA_BUG_ON(xa, test_get_entry(xa, next_index) != &some_val_2);
+
+       /* Remove the next item */
+       check_xa_multi_store_adv_delete(xa, next_index, 0);
+
+       /* Now use order for a new pointer */
+       check_xa_multi_store_adv_add(xa, next_index, order, &some_val_2);
+
+       for (i = 0; i < nrpages; i++)
+               XA_BUG_ON(xa, test_get_entry(xa, next_index + i) != &some_val_2);
+
+       check_xa_multi_store_adv_delete(xa, next_index, order);
+       check_xa_multi_store_adv_delete(xa, base, order);
+       XA_BUG_ON(xa, !xa_empty(xa));
+
+       /* starting fresh again */
+
+       /* let's test some holes now */
+
+       /* hole at base and next_next */
+       check_xa_multi_store_adv_add(xa, next_index, order, &some_val_2);
+
+       for (i = 0; i < nrpages; i++)
+               XA_BUG_ON(xa, test_get_entry(xa, base + i) != NULL);
+
+       for (i = 0; i < nrpages; i++)
+               XA_BUG_ON(xa, test_get_entry(xa, next_index + i) != &some_val_2);
+
+       for (i = 0; i < nrpages; i++)
+               XA_BUG_ON(xa, test_get_entry(xa, next_next_index + i) != NULL);
+
+       check_xa_multi_store_adv_delete(xa, next_index, order);
+       XA_BUG_ON(xa, !xa_empty(xa));
+
+       /* hole at base and next */
+
+       check_xa_multi_store_adv_add(xa, next_next_index, order, &some_val_2);
+
+       for (i = 0; i < nrpages; i++)
+               XA_BUG_ON(xa, test_get_entry(xa, base + i) != NULL);
+
+       for (i = 0; i < nrpages; i++)
+               XA_BUG_ON(xa, test_get_entry(xa, next_index + i) != NULL);
+
+       for (i = 0; i < nrpages; i++)
+               XA_BUG_ON(xa, test_get_entry(xa, next_next_index + i) != &some_val_2);
+
+       check_xa_multi_store_adv_delete(xa, next_next_index, order);
+       XA_BUG_ON(xa, !xa_empty(xa));
+}
+#endif
+
+static noinline void check_multi_store_advanced(struct xarray *xa)
+{
+#ifdef CONFIG_XARRAY_MULTI
+       unsigned int max_order = IS_ENABLED(CONFIG_XARRAY_MULTI) ? 20 : 1;
+       unsigned long end = ULONG_MAX/2;
+       unsigned long pos, i;
+
+       /*
+        * About 117 million tests below.
+        */
+       for (pos = 7; pos < end; pos = (pos * pos) + 564) {
+               for (i = 0; i < max_order; i++) {
+                       check_xa_multi_store_adv(xa, pos, i);
+                       check_xa_multi_store_adv(xa, pos + 157, i);
+               }
+       }
+#endif
+}
+
 static noinline void check_xa_alloc_1(struct xarray *xa, unsigned int base)
 {
        int i;
@@ -1801,9 +2029,11 @@ static int xarray_checks(void)
        check_xas_erase(&array);
        check_insert(&array);
        check_cmpxchg(&array);
+       check_cmpxchg_order(&array);
        check_reserve(&array);
        check_reserve(&xa0);
        check_multi_store(&array);
+       check_multi_store_advanced(&array);
        check_get_order(&array);
        check_xa_alloc();
        check_find(&array);
index ffc3a2ba3a8cd85e2e6d95606bcab1510ce0d679..b924f4a5a3ef8a7bf71498abe4392a350191e618 100644 (file)
@@ -45,22 +45,6 @@ config ZSWAP_DEFAULT_ON
          The selection made here can be overridden by using the kernel
          command line 'zswap.enabled=' option.
 
-config ZSWAP_EXCLUSIVE_LOADS_DEFAULT_ON
-       bool "Invalidate zswap entries when pages are loaded"
-       depends on ZSWAP
-       help
-         If selected, exclusive loads for zswap will be enabled at boot,
-         otherwise it will be disabled.
-
-         If exclusive loads are enabled, when a page is loaded from zswap,
-         the zswap entry is invalidated at once, as opposed to leaving it
-         in zswap until the swap entry is freed.
-
-         This avoids having two copies of the same page in memory
-         (compressed and uncompressed) after faulting in a page from zswap.
-         The cost is that if the page was never dirtied and needs to be
-         swapped out again, it will be re-compressed.
-
 config ZSWAP_SHRINKER_DEFAULT_ON
        bool "Shrink the zswap pool on memory pressure"
        depends on ZSWAP
@@ -901,15 +885,6 @@ config CMA
 
          If unsure, say "n".
 
-config CMA_DEBUG
-       bool "CMA debug messages (DEVELOPMENT)"
-       depends on DEBUG_KERNEL && CMA
-       help
-         Turns on debug messages in CMA.  This produces KERN_DEBUG
-         messages for every CMA call as well as various messages while
-         processing calls such as dma_alloc_from_contiguous().
-         This option does not affect warning and error messages.
-
 config CMA_DEBUGFS
        bool "CMA debugfs interface"
        depends on CMA && DEBUG_FS
@@ -926,14 +901,14 @@ config CMA_SYSFS
 config CMA_AREAS
        int "Maximum count of the CMA areas"
        depends on CMA
-       default 19 if NUMA
-       default 7
+       default 20 if NUMA
+       default 8
        help
          CMA allows to create CMA areas for particular purpose, mainly,
          used as device private area. This parameter sets the maximum
          number of CMA area in the system.
 
-         If unsure, leave the default value "7" in UMA and "19" in NUMA.
+         If unsure, leave the default value "8" in UMA and "20" in NUMA.
 
 config MEM_SOFT_DIRTY
        bool "Track memory changes"
@@ -998,6 +973,12 @@ config IDLE_PAGE_TRACKING
          See Documentation/admin-guide/mm/idle_page_tracking.rst for
          more details.
 
+# Architectures which implement cpu_dcache_is_aliasing() to query
+# whether the data caches are aliased (VIVT or VIPT with dcache
+# aliasing) need to select this.
+config ARCH_HAS_CPU_CACHE_ALIASING
+       bool
+
 config ARCH_HAS_CACHE_LINE_SIZE
        bool
 
index 7c09c47e530bf624fd05cfec3456fbf2346446e3..01f5a8f71ddfa7dc98ca3919f45ce61682d9bf11 100644 (file)
--- a/mm/cma.c
+++ b/mm/cma.c
 
 #define pr_fmt(fmt) "cma: " fmt
 
-#ifdef CONFIG_CMA_DEBUG
-#ifndef DEBUG
-#  define DEBUG
-#endif
-#endif
 #define CREATE_TRACE_POINTS
 
 #include <linux/memblock.h>
@@ -387,7 +382,6 @@ err:
        return ret;
 }
 
-#ifdef CONFIG_CMA_DEBUG
 static void cma_debug_show_areas(struct cma *cma)
 {
        unsigned long next_zero_bit, next_set_bit, nr_zero;
@@ -412,9 +406,6 @@ static void cma_debug_show_areas(struct cma *cma)
        pr_cont("=> %lu free of %lu total pages\n", nr_total, cma->count);
        spin_unlock_irq(&cma->lock);
 }
-#else
-static inline void cma_debug_show_areas(struct cma *cma) { }
-#endif
 
 /**
  * cma_alloc() - allocate pages from contiguous area
@@ -436,17 +427,18 @@ struct page *cma_alloc(struct cma *cma, unsigned long count,
        unsigned long i;
        struct page *page = NULL;
        int ret = -ENOMEM;
+       const char *name = cma ? cma->name : NULL;
+
+       trace_cma_alloc_start(name, count, align);
 
        if (!cma || !cma->count || !cma->bitmap)
-               goto out;
+               return page;
 
        pr_debug("%s(cma %p, name: %s, count %lu, align %d)\n", __func__,
                (void *)cma, cma->name, count, align);
 
        if (!count)
-               goto out;
-
-       trace_cma_alloc_start(cma->name, count, align);
+               return page;
 
        mask = cma_bitmap_aligned_mask(cma, align);
        offset = cma_bitmap_aligned_offset(cma, align);
@@ -454,7 +446,7 @@ struct page *cma_alloc(struct cma *cma, unsigned long count,
        bitmap_count = cma_bitmap_pages_to_bits(cma, count);
 
        if (bitmap_count > bitmap_maxno)
-               goto out;
+               return page;
 
        for (;;) {
                spin_lock_irq(&cma->lock);
@@ -496,8 +488,6 @@ struct page *cma_alloc(struct cma *cma, unsigned long count,
                start = bitmap_no + mask + 1;
        }
 
-       trace_cma_alloc_finish(cma->name, pfn, page, count, align, ret);
-
        /*
         * CMA can allocate multiple page blocks, which results in different
         * blocks being marked with different tags. Reset the tags to ignore
@@ -515,14 +505,13 @@ struct page *cma_alloc(struct cma *cma, unsigned long count,
        }
 
        pr_debug("%s(): returned %p\n", __func__, page);
-out:
+       trace_cma_alloc_finish(name, pfn, page, count, align, ret);
        if (page) {
                count_vm_event(CMA_ALLOC_SUCCESS);
                cma_sysfs_account_success_pages(cma, count);
        } else {
                count_vm_event(CMA_ALLOC_FAIL);
-               if (cma)
-                       cma_sysfs_account_fail_pages(cma, count);
+               cma_sysfs_account_fail_pages(cma, count);
        }
 
        return page;
@@ -573,6 +562,7 @@ bool cma_release(struct cma *cma, const struct page *pages,
 
        free_contig_range(pfn, count);
        cma_clear_bitmap(cma, pfn, count);
+       cma_sysfs_account_release_pages(cma, count);
        trace_cma_release(cma->name, pfn, pages, count);
 
        return true;
index 88a0595670b766cd9780476f7dacb71fd9979e34..ad61cc6dd4396f861553f5acf44a9d0b5bdd9576 100644 (file)
--- a/mm/cma.h
+++ b/mm/cma.h
@@ -27,6 +27,8 @@ struct cma {
        atomic64_t nr_pages_succeeded;
        /* the number of CMA page allocation failures */
        atomic64_t nr_pages_failed;
+       /* the number of CMA page released */
+       atomic64_t nr_pages_released;
        /* kobject requires dynamic object */
        struct cma_kobject *cma_kobj;
 #endif
@@ -44,10 +46,13 @@ static inline unsigned long cma_bitmap_maxno(struct cma *cma)
 #ifdef CONFIG_CMA_SYSFS
 void cma_sysfs_account_success_pages(struct cma *cma, unsigned long nr_pages);
 void cma_sysfs_account_fail_pages(struct cma *cma, unsigned long nr_pages);
+void cma_sysfs_account_release_pages(struct cma *cma, unsigned long nr_pages);
 #else
 static inline void cma_sysfs_account_success_pages(struct cma *cma,
                                                   unsigned long nr_pages) {};
 static inline void cma_sysfs_account_fail_pages(struct cma *cma,
                                                unsigned long nr_pages) {};
+static inline void cma_sysfs_account_release_pages(struct cma *cma,
+                                                  unsigned long nr_pages) {};
 #endif
 #endif
index 56347d15b7e8b514dcd58928dc1fc16e3eddc3ea..f50db397317182f5ae4f230f0abe28c8cfbcf6eb 100644 (file)
@@ -24,6 +24,11 @@ void cma_sysfs_account_fail_pages(struct cma *cma, unsigned long nr_pages)
        atomic64_add(nr_pages, &cma->nr_pages_failed);
 }
 
+void cma_sysfs_account_release_pages(struct cma *cma, unsigned long nr_pages)
+{
+       atomic64_add(nr_pages, &cma->nr_pages_released);
+}
+
 static inline struct cma *cma_from_kobj(struct kobject *kobj)
 {
        return container_of(kobj, struct cma_kobject, kobj)->cma;
@@ -48,6 +53,15 @@ static ssize_t alloc_pages_fail_show(struct kobject *kobj,
 }
 CMA_ATTR_RO(alloc_pages_fail);
 
+static ssize_t release_pages_success_show(struct kobject *kobj,
+                                         struct kobj_attribute *attr, char *buf)
+{
+       struct cma *cma = cma_from_kobj(kobj);
+
+       return sysfs_emit(buf, "%llu\n", atomic64_read(&cma->nr_pages_released));
+}
+CMA_ATTR_RO(release_pages_success);
+
 static void cma_kobj_release(struct kobject *kobj)
 {
        struct cma *cma = cma_from_kobj(kobj);
@@ -60,6 +74,7 @@ static void cma_kobj_release(struct kobject *kobj)
 static struct attribute *cma_attrs[] = {
        &alloc_pages_success_attr.attr,
        &alloc_pages_fail_attr.attr,
+       &release_pages_success_attr.attr,
        NULL,
 };
 ATTRIBUTE_GROUPS(cma);
index b961db601df4194f4cc69535bf154bef4a2624f0..807b58e6eb68b3e2a2730d5b85beac3edd0fb2c7 100644 (file)
@@ -40,9 +40,22 @@ static inline void count_compact_events(enum vm_event_item item, long delta)
 {
        count_vm_events(item, delta);
 }
+
+/*
+ * order == -1 is expected when compacting proactively via
+ * 1. /proc/sys/vm/compact_memory
+ * 2. /sys/devices/system/node/nodex/compact
+ * 3. /proc/sys/vm/compaction_proactiveness
+ */
+static inline bool is_via_compact_memory(int order)
+{
+       return order == -1;
+}
+
 #else
 #define count_compact_event(item) do { } while (0)
 #define count_compact_events(item, delta) do { } while (0)
+static inline bool is_via_compact_memory(int order) { return false; }
 #endif
 
 #if defined CONFIG_COMPACTION || defined CONFIG_CMA
@@ -66,45 +79,56 @@ static inline void count_compact_events(enum vm_event_item item, long delta)
 #define COMPACTION_HPAGE_ORDER (PMD_SHIFT - PAGE_SHIFT)
 #endif
 
-static unsigned long release_freepages(struct list_head *freelist)
+static void split_map_pages(struct list_head *freepages)
 {
+       unsigned int i, order;
        struct page *page, *next;
-       unsigned long high_pfn = 0;
+       LIST_HEAD(tmp_list);
 
-       list_for_each_entry_safe(page, next, freelist, lru) {
-               unsigned long pfn = page_to_pfn(page);
-               list_del(&page->lru);
-               __free_page(page);
-               if (pfn > high_pfn)
-                       high_pfn = pfn;
-       }
+       for (order = 0; order < NR_PAGE_ORDERS; order++) {
+               list_for_each_entry_safe(page, next, &freepages[order], lru) {
+                       unsigned int nr_pages;
 
-       return high_pfn;
+                       list_del(&page->lru);
+
+                       nr_pages = 1 << order;
+
+                       post_alloc_hook(page, order, __GFP_MOVABLE);
+                       if (order)
+                               split_page(page, order);
+
+                       for (i = 0; i < nr_pages; i++) {
+                               list_add(&page->lru, &tmp_list);
+                               page++;
+                       }
+               }
+               list_splice_init(&tmp_list, &freepages[0]);
+       }
 }
 
-static void split_map_pages(struct list_head *list)
+static unsigned long release_free_list(struct list_head *freepages)
 {
-       unsigned int i, order, nr_pages;
-       struct page *page, *next;
-       LIST_HEAD(tmp_list);
-
-       list_for_each_entry_safe(page, next, list, lru) {
-               list_del(&page->lru);
+       int order;
+       unsigned long high_pfn = 0;
 
-               order = page_private(page);
-               nr_pages = 1 << order;
+       for (order = 0; order < NR_PAGE_ORDERS; order++) {
+               struct page *page, *next;
 
-               post_alloc_hook(page, order, __GFP_MOVABLE);
-               if (order)
-                       split_page(page, order);
+               list_for_each_entry_safe(page, next, &freepages[order], lru) {
+                       unsigned long pfn = page_to_pfn(page);
 
-               for (i = 0; i < nr_pages; i++) {
-                       list_add(&page->lru, &tmp_list);
-                       page++;
+                       list_del(&page->lru);
+                       /*
+                        * Convert free pages into post allocation pages, so
+                        * that we can free them via __free_page.
+                        */
+                       post_alloc_hook(page, order, __GFP_MOVABLE);
+                       __free_pages(page, order);
+                       if (pfn > high_pfn)
+                               high_pfn = pfn;
                }
        }
-
-       list_splice(&tmp_list, list);
+       return high_pfn;
 }
 
 #ifdef CONFIG_COMPACTION
@@ -657,7 +681,7 @@ static unsigned long isolate_freepages_block(struct compact_control *cc,
                nr_scanned += isolated - 1;
                total_isolated += isolated;
                cc->nr_freepages += isolated;
-               list_add_tail(&page->lru, freelist);
+               list_add_tail(&page->lru, &freelist[order]);
 
                if (!strict && cc->nr_migratepages <= cc->nr_freepages) {
                        blockpfn += isolated;
@@ -722,7 +746,11 @@ isolate_freepages_range(struct compact_control *cc,
                        unsigned long start_pfn, unsigned long end_pfn)
 {
        unsigned long isolated, pfn, block_start_pfn, block_end_pfn;
-       LIST_HEAD(freelist);
+       int order;
+       struct list_head tmp_freepages[NR_PAGE_ORDERS];
+
+       for (order = 0; order < NR_PAGE_ORDERS; order++)
+               INIT_LIST_HEAD(&tmp_freepages[order]);
 
        pfn = start_pfn;
        block_start_pfn = pageblock_start_pfn(pfn);
@@ -753,7 +781,7 @@ isolate_freepages_range(struct compact_control *cc,
                        break;
 
                isolated = isolate_freepages_block(cc, &isolate_start_pfn,
-                                       block_end_pfn, &freelist, 0, true);
+                                       block_end_pfn, tmp_freepages, 0, true);
 
                /*
                 * In strict mode, isolate_freepages_block() returns 0 if
@@ -770,15 +798,15 @@ isolate_freepages_range(struct compact_control *cc,
                 */
        }
 
-       /* __isolate_free_page() does not map the pages */
-       split_map_pages(&freelist);
-
        if (pfn < end_pfn) {
                /* Loop terminated early, cleanup. */
-               release_freepages(&freelist);
+               release_free_list(tmp_freepages);
                return 0;
        }
 
+       /* __isolate_free_page() does not map the pages */
+       split_map_pages(tmp_freepages);
+
        /* We don't use freelists for anything. */
        return pfn;
 }
@@ -816,6 +844,32 @@ static bool too_many_isolated(struct compact_control *cc)
        return too_many;
 }
 
+/**
+ * skip_isolation_on_order() - determine when to skip folio isolation based on
+ *                            folio order and compaction target order
+ * @order:             to-be-isolated folio order
+ * @target_order:      compaction target order
+ *
+ * This avoids unnecessary folio isolations during compaction.
+ */
+static bool skip_isolation_on_order(int order, int target_order)
+{
+       /*
+        * Unless we are performing global compaction (i.e.,
+        * is_via_compact_memory), skip any folios that are larger than the
+        * target order: we wouldn't be here if we'd have a free folio with
+        * the desired target_order, so migrating this folio would likely fail
+        * later.
+        */
+       if (!is_via_compact_memory(target_order) && order >= target_order)
+               return true;
+       /*
+        * We limit memory compaction to pageblocks and won't try
+        * creating free blocks of memory that are larger than that.
+        */
+       return order >= pageblock_order;
+}
+
 /**
  * isolate_migratepages_block() - isolate all migrate-able pages within
  *                               a single pageblock
@@ -947,7 +1001,22 @@ isolate_migratepages_block(struct compact_control *cc, unsigned long low_pfn,
                        valid_page = page;
                }
 
-               if (PageHuge(page) && cc->alloc_contig) {
+               if (PageHuge(page)) {
+                       /*
+                        * skip hugetlbfs if we are not compacting for pages
+                        * bigger than its order. THPs and other compound pages
+                        * are handled below.
+                        */
+                       if (!cc->alloc_contig) {
+                               const unsigned int order = compound_order(page);
+
+                               if (order <= MAX_PAGE_ORDER) {
+                                       low_pfn += (1UL << order) - 1;
+                                       nr_scanned += (1UL << order) - 1;
+                               }
+                               goto isolate_fail;
+                       }
+                       /* for alloc_contig case */
                        if (locked) {
                                unlock_page_lruvec_irqrestore(locked, flags);
                                locked = NULL;
@@ -1008,21 +1077,24 @@ isolate_migratepages_block(struct compact_control *cc, unsigned long low_pfn,
                }
 
                /*
-                * Regardless of being on LRU, compound pages such as THP and
-                * hugetlbfs are not to be compacted unless we are attempting
-                * an allocation much larger than the huge page size (eg CMA).
-                * We can potentially save a lot of iterations if we skip them
-                * at once. The check is racy, but we can consider only valid
-                * values and the only danger is skipping too much.
+                * Regardless of being on LRU, compound pages such as THP
+                * (hugetlbfs is handled above) are not to be compacted unless
+                * we are attempting an allocation larger than the compound
+                * page size. We can potentially save a lot of iterations if we
+                * skip them at once. The check is racy, but we can consider
+                * only valid values and the only danger is skipping too much.
                 */
                if (PageCompound(page) && !cc->alloc_contig) {
                        const unsigned int order = compound_order(page);
 
-                       if (likely(order <= MAX_PAGE_ORDER)) {
-                               low_pfn += (1UL << order) - 1;
-                               nr_scanned += (1UL << order) - 1;
+                       /* Skip based on page order and compaction target order. */
+                       if (skip_isolation_on_order(order, cc->order)) {
+                               if (order <= MAX_PAGE_ORDER) {
+                                       low_pfn += (1UL << order) - 1;
+                                       nr_scanned += (1UL << order) - 1;
+                               }
+                               goto isolate_fail;
                        }
-                       goto isolate_fail;
                }
 
                /*
@@ -1165,10 +1237,11 @@ isolate_migratepages_block(struct compact_control *cc, unsigned long low_pfn,
                        }
 
                        /*
-                        * folio become large since the non-locked check,
-                        * and it's on LRU.
+                        * Check LRU folio order under the lock
                         */
-                       if (unlikely(folio_test_large(folio) && !cc->alloc_contig)) {
+                       if (unlikely(skip_isolation_on_order(folio_order(folio),
+                                                            cc->order) &&
+                                    !cc->alloc_contig)) {
                                low_pfn += folio_nr_pages(folio) - 1;
                                nr_scanned += folio_nr_pages(folio) - 1;
                                folio_set_lru(folio);
@@ -1365,12 +1438,14 @@ static bool suitable_migration_target(struct compact_control *cc,
 {
        /* If the page is a large free page, then disallow migration */
        if (PageBuddy(page)) {
+               int order = cc->order > 0 ? cc->order : pageblock_order;
+
                /*
                 * We are checking page_order without zone->lock taken. But
                 * the only small danger is that we skip a potentially suitable
                 * pageblock, so it's not worth to check order for valid range.
                 */
-               if (buddy_order_unsafe(page) >= pageblock_order)
+               if (buddy_order_unsafe(page) >= order)
                        return false;
        }
 
@@ -1458,7 +1533,7 @@ fast_isolate_around(struct compact_control *cc, unsigned long pfn)
        if (!page)
                return;
 
-       isolate_freepages_block(cc, &start_pfn, end_pfn, &cc->freepages, 1, false);
+       isolate_freepages_block(cc, &start_pfn, end_pfn, cc->freepages, 1, false);
 
        /* Skip this pageblock in the future as it's full or nearly full */
        if (start_pfn == end_pfn && !cc->no_set_skip_hint)
@@ -1587,7 +1662,7 @@ static void fast_isolate_freepages(struct compact_control *cc)
                                nr_scanned += nr_isolated - 1;
                                total_isolated += nr_isolated;
                                cc->nr_freepages += nr_isolated;
-                               list_add_tail(&page->lru, &cc->freepages);
+                               list_add_tail(&page->lru, &cc->freepages[order]);
                                count_compact_events(COMPACTISOLATED, nr_isolated);
                        } else {
                                /* If isolation fails, abort the search */
@@ -1664,13 +1739,12 @@ static void isolate_freepages(struct compact_control *cc)
        unsigned long isolate_start_pfn; /* exact pfn we start at */
        unsigned long block_end_pfn;    /* end of current pageblock */
        unsigned long low_pfn;       /* lowest pfn scanner is able to scan */
-       struct list_head *freelist = &cc->freepages;
        unsigned int stride;
 
        /* Try a small search of the free lists for a candidate */
        fast_isolate_freepages(cc);
        if (cc->nr_freepages)
-               goto splitmap;
+               return;
 
        /*
         * Initialise the free scanner. The starting point is where we last
@@ -1730,7 +1804,7 @@ static void isolate_freepages(struct compact_control *cc)
 
                /* Found a block suitable for isolating free pages from. */
                nr_isolated = isolate_freepages_block(cc, &isolate_start_pfn,
-                                       block_end_pfn, freelist, stride, false);
+                                       block_end_pfn, cc->freepages, stride, false);
 
                /* Update the skip hint if the full pageblock was scanned */
                if (isolate_start_pfn == block_end_pfn)
@@ -1771,10 +1845,6 @@ static void isolate_freepages(struct compact_control *cc)
         * and the loop terminated due to isolate_start_pfn < low_pfn
         */
        cc->free_pfn = isolate_start_pfn;
-
-splitmap:
-       /* __isolate_free_page() does not map the pages */
-       split_map_pages(freelist);
 }
 
 /*
@@ -1785,19 +1855,47 @@ static struct folio *compaction_alloc(struct folio *src, unsigned long data)
 {
        struct compact_control *cc = (struct compact_control *)data;
        struct folio *dst;
+       int order = folio_order(src);
+       bool has_isolated_pages = false;
+       int start_order;
+       struct page *freepage;
+       unsigned long size;
+
+again:
+       for (start_order = order; start_order < NR_PAGE_ORDERS; start_order++)
+               if (!list_empty(&cc->freepages[start_order]))
+                       break;
 
-       if (list_empty(&cc->freepages)) {
-               isolate_freepages(cc);
-
-               if (list_empty(&cc->freepages))
+       /* no free pages in the list */
+       if (start_order == NR_PAGE_ORDERS) {
+               if (has_isolated_pages)
                        return NULL;
+               isolate_freepages(cc);
+               has_isolated_pages = true;
+               goto again;
        }
 
-       dst = list_entry(cc->freepages.next, struct folio, lru);
-       list_del(&dst->lru);
-       cc->nr_freepages--;
+       freepage = list_first_entry(&cc->freepages[start_order], struct page,
+                               lru);
+       size = 1 << start_order;
+
+       list_del(&freepage->lru);
+
+       while (start_order > order) {
+               start_order--;
+               size >>= 1;
+
+               list_add(&freepage[size].lru, &cc->freepages[start_order]);
+               set_page_private(&freepage[size], start_order);
+       }
+       dst = (struct folio *)freepage;
 
-       return dst;
+       post_alloc_hook(&dst->page, order, __GFP_MOVABLE);
+       if (order)
+               prep_compound_page(&dst->page, order);
+       cc->nr_freepages -= 1 << order;
+       cc->nr_migratepages -= 1 << order;
+       return page_rmappable_folio(&dst->page);
 }
 
 /*
@@ -1808,9 +1906,19 @@ static struct folio *compaction_alloc(struct folio *src, unsigned long data)
 static void compaction_free(struct folio *dst, unsigned long data)
 {
        struct compact_control *cc = (struct compact_control *)data;
+       int order = folio_order(dst);
+       struct page *page = &dst->page;
 
-       list_add(&dst->lru, &cc->freepages);
-       cc->nr_freepages++;
+       if (folio_put_testzero(dst)) {
+               free_pages_prepare(page, order);
+               list_add(&dst->lru, &cc->freepages[order]);
+               cc->nr_freepages += 1 << order;
+       }
+       cc->nr_migratepages += 1 << order;
+       /*
+        * someone else has referenced the page, we cannot take it back to our
+        * free list.
+        */
 }
 
 /* possible outcome of isolate_migratepages */
@@ -2086,17 +2194,6 @@ static isolate_migrate_t isolate_migratepages(struct compact_control *cc)
        return cc->nr_migratepages ? ISOLATE_SUCCESS : ISOLATE_NONE;
 }
 
-/*
- * order == -1 is expected when compacting proactively via
- * 1. /proc/sys/vm/compact_memory
- * 2. /sys/devices/system/node/nodex/compact
- * 3. /proc/sys/vm/compaction_proactiveness
- */
-static inline bool is_via_compact_memory(int order)
-{
-       return order == -1;
-}
-
 /*
  * Determine whether kswapd is (or recently was!) running on this node.
  *
@@ -2433,7 +2530,8 @@ compact_zone(struct compact_control *cc, struct capture_control *capc)
        unsigned long last_migrated_pfn;
        const bool sync = cc->mode != MIGRATE_ASYNC;
        bool update_cached;
-       unsigned int nr_succeeded = 0;
+       unsigned int nr_succeeded = 0, nr_migratepages;
+       int order;
 
        /*
         * These counters track activities during zone compaction.  Initialize
@@ -2443,7 +2541,8 @@ compact_zone(struct compact_control *cc, struct capture_control *capc)
        cc->total_free_scanned = 0;
        cc->nr_migratepages = 0;
        cc->nr_freepages = 0;
-       INIT_LIST_HEAD(&cc->freepages);
+       for (order = 0; order < NR_PAGE_ORDERS; order++)
+               INIT_LIST_HEAD(&cc->freepages[order]);
        INIT_LIST_HEAD(&cc->migratepages);
 
        cc->migratetype = gfp_migratetype(cc->gfp_mask);
@@ -2551,11 +2650,17 @@ rescan:
                                pageblock_start_pfn(cc->migrate_pfn - 1));
                }
 
+               /*
+                * Record the number of pages to migrate since the
+                * compaction_alloc/free() will update cc->nr_migratepages
+                * properly.
+                */
+               nr_migratepages = cc->nr_migratepages;
                err = migrate_pages(&cc->migratepages, compaction_alloc,
                                compaction_free, (unsigned long)cc, cc->mode,
                                MR_COMPACTION, &nr_succeeded);
 
-               trace_mm_compaction_migratepages(cc, nr_succeeded);
+               trace_mm_compaction_migratepages(nr_migratepages, nr_succeeded);
 
                /* All pages were either migrated or will be released */
                cc->nr_migratepages = 0;
@@ -2629,7 +2734,7 @@ out:
         * so we don't leave any returned pages behind in the next attempt.
         */
        if (cc->nr_freepages > 0) {
-               unsigned long free_pfn = release_freepages(&cc->freepages);
+               unsigned long free_pfn = release_free_list(cc->freepages);
 
                cc->nr_freepages = 0;
                VM_BUG_ON(free_pfn == 0);
@@ -2648,7 +2753,6 @@ out:
 
        trace_mm_compaction_end(cc, start_pfn, end_pfn, sync, ret);
 
-       VM_BUG_ON(!list_empty(&cc->freepages));
        VM_BUG_ON(!list_empty(&cc->migratepages));
 
        return ret;
@@ -2783,25 +2887,27 @@ enum compact_result try_to_compact_pages(gfp_t gfp_mask, unsigned int order,
 }
 
 /*
- * Compact all zones within a node till each zone's fragmentation score
- * reaches within proactive compaction thresholds (as determined by the
- * proactiveness tunable).
+ * compact_node() - compact all zones within a node
+ * @pgdat: The node page data
+ * @proactive: Whether the compaction is proactive
  *
- * It is possible that the function returns before reaching score targets
- * due to various back-off conditions, such as, contention on per-node or
- * per-zone locks.
+ * For proactive compaction, compact till each zone's fragmentation score
+ * reaches within proactive compaction thresholds (as determined by the
+ * proactiveness tunable), it is possible that the function returns before
+ * reaching score targets due to various back-off conditions, such as,
+ * contention on per-node or per-zone locks.
  */
-static void proactive_compact_node(pg_data_t *pgdat)
+static int compact_node(pg_data_t *pgdat, bool proactive)
 {
        int zoneid;
        struct zone *zone;
        struct compact_control cc = {
                .order = -1,
-               .mode = MIGRATE_SYNC_LIGHT,
+               .mode = proactive ? MIGRATE_SYNC_LIGHT : MIGRATE_SYNC,
                .ignore_skip_hint = true,
                .whole_zone = true,
                .gfp_mask = GFP_KERNEL,
-               .proactive_compaction = true,
+               .proactive_compaction = proactive,
        };
 
        for (zoneid = 0; zoneid < MAX_NR_ZONES; zoneid++) {
@@ -2809,54 +2915,39 @@ static void proactive_compact_node(pg_data_t *pgdat)
                if (!populated_zone(zone))
                        continue;
 
+               if (fatal_signal_pending(current))
+                       return -EINTR;
+
                cc.zone = zone;
 
                compact_zone(&cc, NULL);
 
-               count_compact_events(KCOMPACTD_MIGRATE_SCANNED,
-                                    cc.total_migrate_scanned);
-               count_compact_events(KCOMPACTD_FREE_SCANNED,
-                                    cc.total_free_scanned);
+               if (proactive) {
+                       count_compact_events(KCOMPACTD_MIGRATE_SCANNED,
+                                            cc.total_migrate_scanned);
+                       count_compact_events(KCOMPACTD_FREE_SCANNED,
+                                            cc.total_free_scanned);
+               }
        }
-}
-
-/* Compact all zones within a node */
-static void compact_node(int nid)
-{
-       pg_data_t *pgdat = NODE_DATA(nid);
-       int zoneid;
-       struct zone *zone;
-       struct compact_control cc = {
-               .order = -1,
-               .mode = MIGRATE_SYNC,
-               .ignore_skip_hint = true,
-               .whole_zone = true,
-               .gfp_mask = GFP_KERNEL,
-       };
-
-
-       for (zoneid = 0; zoneid < MAX_NR_ZONES; zoneid++) {
-
-               zone = &pgdat->node_zones[zoneid];
-               if (!populated_zone(zone))
-                       continue;
-
-               cc.zone = zone;
 
-               compact_zone(&cc, NULL);
-       }
+       return 0;
 }
 
-/* Compact all nodes in the system */
-static void compact_nodes(void)
+/* Compact all zones of all nodes in the system */
+static int compact_nodes(void)
 {
-       int nid;
+       int ret, nid;
 
        /* Flush pending updates to the LRU lists */
        lru_add_drain_all();
 
-       for_each_online_node(nid)
-               compact_node(nid);
+       for_each_online_node(nid) {
+               ret = compact_node(NODE_DATA(nid), false);
+               if (ret)
+                       return ret;
+       }
+
+       return 0;
 }
 
 static int compaction_proactiveness_sysctl_handler(struct ctl_table *table, int write,
@@ -2902,9 +2993,9 @@ static int sysctl_compaction_handler(struct ctl_table *table, int write,
                return -EINVAL;
 
        if (write)
-               compact_nodes();
+               ret = compact_nodes();
 
-       return 0;
+       return ret;
 }
 
 #if defined(CONFIG_SYSFS) && defined(CONFIG_NUMA)
@@ -2918,7 +3009,7 @@ static ssize_t compact_store(struct device *dev,
                /* Flush pending updates to the LRU lists */
                lru_add_drain_all();
 
-               compact_node(nid);
+               compact_node(NODE_DATA(nid), false);
        }
 
        return count;
@@ -3127,7 +3218,7 @@ static int kcompactd(void *p)
                        unsigned int prev_score, score;
 
                        prev_score = fragmentation_score_node(pgdat);
-                       proactive_compact_node(pgdat);
+                       compact_node(pgdat, true);
                        score = fragmentation_score_node(pgdat);
                        /*
                         * Defer proactive compaction if the fragmentation
index 29f43fbc2eff13db0c7cc84ea1bbf0ca3700edad..fecb8172410c54ba9b85dbcd5c88bdb9844121b8 100644 (file)
@@ -71,7 +71,7 @@ config DAMON_SYSFS_KUNIT_TEST
 
          If unsure, say N.
 
-config DAMON_DBGFS
+config DAMON_DBGFS_DEPRECATED
        bool "DAMON debugfs interface (DEPRECATED!)"
        depends on DAMON_VADDR && DAMON_PADDR && DEBUG_FS
        help
@@ -84,6 +84,11 @@ config DAMON_DBGFS
          (DAMON_SYSFS).  If you depend on this and cannot move, please report
          your usecase to damon@lists.linux.dev and linux-mm@kvack.org.
 
+config DAMON_DBGFS
+       bool
+       default y
+       depends on DAMON_DBGFS_DEPRECATED
+
 config DAMON_DBGFS_KUNIT_TEST
        bool "Test for damon debugfs interface" if !KUNIT_ALL_TESTS
        depends on DAMON_DBGFS && KUNIT=y
index 5b325749fc12597ddd273ae605bdb1c04a93f99e..6d503c1c125ef0e2c84162ff6d17787ef089a7f4 100644 (file)
@@ -11,6 +11,7 @@
 #include <linux/delay.h>
 #include <linux/kthread.h>
 #include <linux/mm.h>
+#include <linux/psi.h>
 #include <linux/slab.h>
 #include <linux/string.h>
 
@@ -299,12 +300,48 @@ void damos_destroy_filter(struct damos_filter *f)
        damos_free_filter(f);
 }
 
-/* initialize private fields of damos_quota and return the pointer */
-static struct damos_quota *damos_quota_init_priv(struct damos_quota *quota)
+struct damos_quota_goal *damos_new_quota_goal(
+               enum damos_quota_goal_metric metric,
+               unsigned long target_value)
 {
+       struct damos_quota_goal *goal;
+
+       goal = kmalloc(sizeof(*goal), GFP_KERNEL);
+       if (!goal)
+               return NULL;
+       goal->metric = metric;
+       goal->target_value = target_value;
+       INIT_LIST_HEAD(&goal->list);
+       return goal;
+}
+
+void damos_add_quota_goal(struct damos_quota *q, struct damos_quota_goal *g)
+{
+       list_add_tail(&g->list, &q->goals);
+}
+
+static void damos_del_quota_goal(struct damos_quota_goal *g)
+{
+       list_del(&g->list);
+}
+
+static void damos_free_quota_goal(struct damos_quota_goal *g)
+{
+       kfree(g);
+}
+
+void damos_destroy_quota_goal(struct damos_quota_goal *g)
+{
+       damos_del_quota_goal(g);
+       damos_free_quota_goal(g);
+}
+
+/* initialize fields of @quota that normally API users wouldn't set */
+static struct damos_quota *damos_quota_init(struct damos_quota *quota)
+{
+       quota->esz = 0;
        quota->total_charged_sz = 0;
        quota->total_charged_ns = 0;
-       quota->esz = 0;
        quota->charged_sz = 0;
        quota->charged_from = 0;
        quota->charge_target_from = NULL;
@@ -336,7 +373,9 @@ struct damos *damon_new_scheme(struct damos_access_pattern *pattern,
        scheme->stat = (struct damos_stat){};
        INIT_LIST_HEAD(&scheme->list);
 
-       scheme->quota = *(damos_quota_init_priv(quota));
+       scheme->quota = *(damos_quota_init(quota));
+       /* quota.goals should be separately set by caller */
+       INIT_LIST_HEAD(&scheme->quota.goals);
 
        scheme->wmarks = *wmarks;
        scheme->wmarks.activated = true;
@@ -373,8 +412,12 @@ static void damon_free_scheme(struct damos *s)
 
 void damon_destroy_scheme(struct damos *s)
 {
+       struct damos_quota_goal *g, *g_next;
        struct damos_filter *f, *next;
 
+       damos_for_each_quota_goal_safe(g, g_next, &s->quota)
+               damos_destroy_quota_goal(g);
+
        damos_for_each_filter_safe(f, next, s)
                damos_destroy_filter(f);
        damon_del_scheme(s);
@@ -1083,21 +1126,78 @@ static unsigned long damon_feed_loop_next_input(unsigned long last_input,
        return min_input;
 }
 
-/* Shouldn't be called if quota->ms, quota->sz, and quota->get_score unset */
+#ifdef CONFIG_PSI
+
+static u64 damos_get_some_mem_psi_total(void)
+{
+       if (static_branch_likely(&psi_disabled))
+               return 0;
+       return div_u64(psi_system.total[PSI_AVGS][PSI_MEM * 2],
+                       NSEC_PER_USEC);
+}
+
+#else  /* CONFIG_PSI */
+
+static inline u64 damos_get_some_mem_psi_total(void)
+{
+       return 0;
+};
+
+#endif /* CONFIG_PSI */
+
+static void damos_set_quota_goal_current_value(struct damos_quota_goal *goal)
+{
+       u64 now_psi_total;
+
+       switch (goal->metric) {
+       case DAMOS_QUOTA_USER_INPUT:
+               /* User should already set goal->current_value */
+               break;
+       case DAMOS_QUOTA_SOME_MEM_PSI_US:
+               now_psi_total = damos_get_some_mem_psi_total();
+               goal->current_value = now_psi_total - goal->last_psi_total;
+               goal->last_psi_total = now_psi_total;
+               break;
+       default:
+               break;
+       }
+}
+
+/* Return the highest score since it makes schemes least aggressive */
+static unsigned long damos_quota_score(struct damos_quota *quota)
+{
+       struct damos_quota_goal *goal;
+       unsigned long highest_score = 0;
+
+       damos_for_each_quota_goal(goal, quota) {
+               damos_set_quota_goal_current_value(goal);
+               highest_score = max(highest_score,
+                               goal->current_value * 10000 /
+                               goal->target_value);
+       }
+
+       return highest_score;
+}
+
+/*
+ * Called only if quota->ms, or quota->sz are set, or quota->goals is not empty
+ */
 static void damos_set_effective_quota(struct damos_quota *quota)
 {
        unsigned long throughput;
        unsigned long esz;
 
-       if (!quota->ms && !quota->get_score) {
+       if (!quota->ms && list_empty(&quota->goals)) {
                quota->esz = quota->sz;
                return;
        }
 
-       if (quota->get_score) {
+       if (!list_empty(&quota->goals)) {
+               unsigned long score = damos_quota_score(quota);
+
                quota->esz_bp = damon_feed_loop_next_input(
                                max(quota->esz_bp, 10000UL),
-                               quota->get_score(quota->get_score_arg));
+                               score);
                esz = quota->esz_bp / 10000;
        }
 
@@ -1107,7 +1207,7 @@ static void damos_set_effective_quota(struct damos_quota *quota)
                                quota->total_charged_ns;
                else
                        throughput = PAGE_SIZE * 1024;
-               if (quota->get_score)
+               if (!list_empty(&quota->goals))
                        esz = min(throughput * quota->ms, esz);
                else
                        esz = throughput * quota->ms;
@@ -1127,7 +1227,7 @@ static void damos_adjust_quota(struct damon_ctx *c, struct damos *s)
        unsigned long cumulated_sz;
        unsigned int score, max_score = 0;
 
-       if (!quota->ms && !quota->sz && !quota->get_score)
+       if (!quota->ms && !quota->sz && list_empty(&quota->goals))
                return;
 
        /* New charge window starts */
index 7dac24e69e3b95087edbaf236e755b64dc24e43e..2461cfe2e968833dd67103687ac6bc51a935a0b5 100644 (file)
 #include <linux/page_idle.h>
 #include <linux/slab.h>
 
+#define DAMON_DBGFS_DEPRECATION_NOTICE                                 \
+       "DAMON debugfs interface is deprecated, so users should move "  \
+       "to DAMON_SYSFS. If you cannot, please report your usecase to " \
+       "damon@lists.linux.dev and linux-mm@kvack.org.\n"
+
 static struct damon_ctx **dbgfs_ctxs;
 static int dbgfs_nr_ctxs;
 static struct dentry **dbgfs_dirs;
@@ -22,10 +27,7 @@ static DEFINE_MUTEX(damon_dbgfs_lock);
 
 static void damon_dbgfs_warn_deprecation(void)
 {
-       pr_warn_once("DAMON debugfs interface is deprecated, "
-                    "so users should move to DAMON_SYSFS. If you cannot, "
-                    "please report your usecase to damon@lists.linux.dev and "
-                    "linux-mm@kvack.org.\n");
+       pr_warn_once(DAMON_DBGFS_DEPRECATION_NOTICE);
 }
 
 /*
@@ -805,6 +807,14 @@ static void dbgfs_destroy_ctx(struct damon_ctx *ctx)
        damon_destroy_ctx(ctx);
 }
 
+static ssize_t damon_dbgfs_deprecated_read(struct file *file,
+               char __user *buf, size_t count, loff_t *ppos)
+{
+       static const char kbuf[512] = DAMON_DBGFS_DEPRECATION_NOTICE;
+
+       return simple_read_from_buffer(buf, count, ppos, kbuf, strlen(kbuf));
+}
+
 /*
  * Make a context of @name and create a debugfs directory for it.
  *
@@ -1056,6 +1066,10 @@ static int damon_dbgfs_static_file_open(struct inode *inode, struct file *file)
        return nonseekable_open(inode, file);
 }
 
+static const struct file_operations deprecated_fops = {
+       .read = damon_dbgfs_deprecated_read,
+};
+
 static const struct file_operations mk_contexts_fops = {
        .open = damon_dbgfs_static_file_open,
        .write = dbgfs_mk_context_write,
@@ -1076,9 +1090,9 @@ static int __init __damon_dbgfs_init(void)
 {
        struct dentry *dbgfs_root;
        const char * const file_names[] = {"mk_contexts", "rm_contexts",
-               "monitor_on"};
+               "monitor_on_DEPRECATED", "DEPRECATED"};
        const struct file_operations *fops[] = {&mk_contexts_fops,
-               &rm_contexts_fops, &monitor_on_fops};
+               &rm_contexts_fops, &monitor_on_fops, &deprecated_fops};
        int i;
 
        dbgfs_root = debugfs_create_dir("damon", NULL);
index 081e2a3257789201d643a2a338075f1519b04687..5e6dc312072cd01ac30d58be07ae74a204550c41 100644 (file)
@@ -249,7 +249,7 @@ static unsigned long damon_pa_pageout(struct damon_region *r, struct damos *s)
 put_folio:
                folio_put(folio);
        }
-       applied = reclaim_pages(&folio_list);
+       applied = reclaim_pages(&folio_list, false);
        cond_resched();
        return applied * PAGE_SIZE;
 }
index 66e190f0374ac84b47100b8ba21fe4c32e104891..9bd341d62b4c756b22cbe4d0c171400321c0f7e5 100644 (file)
@@ -62,6 +62,36 @@ static struct damos_quota damon_reclaim_quota = {
 };
 DEFINE_DAMON_MODULES_DAMOS_QUOTAS(damon_reclaim_quota);
 
+/*
+ * Desired level of memory pressure-stall time in microseconds.
+ *
+ * While keeping the caps that set by other quotas, DAMON_RECLAIM automatically
+ * increases and decreases the effective level of the quota aiming this level of
+ * memory pressure is incurred.  System-wide ``some`` memory PSI in microseconds
+ * per quota reset interval (``quota_reset_interval_ms``) is collected and
+ * compared to this value to see if the aim is satisfied.  Value zero means
+ * disabling this auto-tuning feature.
+ *
+ * Disabled by default.
+ */
+static unsigned long quota_mem_pressure_us __read_mostly;
+module_param(quota_mem_pressure_us, ulong, 0600);
+
+/*
+ * User-specifiable feedback for auto-tuning of the effective quota.
+ *
+ * While keeping the caps that set by other quotas, DAMON_RECLAIM automatically
+ * increases and decreases the effective level of the quota aiming receiving this
+ * feedback of value ``10,000`` from the user.  DAMON_RECLAIM assumes the feedback
+ * value and the quota are positively proportional.  Value zero means disabling
+ * this auto-tuning feature.
+ *
+ * Disabled by default.
+ *
+ */
+static unsigned long quota_autotune_feedback __read_mostly;
+module_param(quota_autotune_feedback, ulong, 0600);
+
 static struct damos_watermarks damon_reclaim_wmarks = {
        .metric = DAMOS_WMARK_FREE_MEM_RATE,
        .interval = 5000000,    /* 5 seconds */
@@ -159,11 +189,13 @@ static void damon_reclaim_copy_quota_status(struct damos_quota *dst,
        dst->charged_from = src->charged_from;
        dst->charge_target_from = src->charge_target_from;
        dst->charge_addr_from = src->charge_addr_from;
+       dst->esz_bp = src->esz_bp;
 }
 
 static int damon_reclaim_apply_parameters(void)
 {
        struct damos *scheme, *old_scheme;
+       struct damos_quota_goal *goal;
        struct damos_filter *filter;
        int err = 0;
 
@@ -180,6 +212,27 @@ static int damon_reclaim_apply_parameters(void)
                        damon_reclaim_copy_quota_status(&scheme->quota,
                                        &old_scheme->quota);
        }
+
+       if (quota_mem_pressure_us) {
+               goal = damos_new_quota_goal(DAMOS_QUOTA_SOME_MEM_PSI_US,
+                               quota_mem_pressure_us);
+               if (!goal) {
+                       damon_destroy_scheme(scheme);
+                       return -ENOMEM;
+               }
+               damos_add_quota_goal(&scheme->quota, goal);
+       }
+
+       if (quota_autotune_feedback) {
+               goal = damos_new_quota_goal(DAMOS_QUOTA_USER_INPUT, 10000);
+               if (!goal) {
+                       damon_destroy_scheme(scheme);
+                       return -ENOMEM;
+               }
+               goal->current_value = quota_autotune_feedback;
+               damos_add_quota_goal(&scheme->quota, goal);
+       }
+
        if (skip_anon) {
                filter = damos_new_filter(DAMOS_FILTER_TYPE_ANON, true);
                if (!filter) {
index 4c37a166eb8180ff4709a3f5c666e7619b701ac1..a63f51577cffdbd2e46a165438f1585442ed1f7c 100644 (file)
@@ -49,6 +49,8 @@ int damon_sysfs_schemes_update_regions_start(
                struct damon_sysfs_schemes *sysfs_schemes,
                struct damon_ctx *ctx, bool total_bytes_only);
 
+void damos_sysfs_mark_finished_regions_updates(struct damon_ctx *ctx);
+
 bool damos_sysfs_regions_upd_done(void);
 
 int damon_sysfs_schemes_update_regions_stop(struct damon_ctx *ctx);
@@ -57,5 +59,9 @@ int damon_sysfs_schemes_clear_regions(
                struct damon_sysfs_schemes *sysfs_schemes,
                struct damon_ctx *ctx);
 
-void damos_sysfs_set_quota_scores(struct damon_sysfs_schemes *sysfs_schemes,
+int damos_sysfs_set_quota_scores(struct damon_sysfs_schemes *sysfs_schemes,
+               struct damon_ctx *ctx);
+
+void damos_sysfs_update_effective_quotas(
+               struct damon_sysfs_schemes *sysfs_schemes,
                struct damon_ctx *ctx);
index ae0f0b314f3a9a5ec251021d0fb68d423fa53cd7..53a90ac678fb98c2b9c77bf8baecf62fcb0ff334 100644 (file)
@@ -127,17 +127,17 @@ static const struct kobj_type damon_sysfs_scheme_region_ktype = {
  *
  * Once the tried regions update request is received, the request handling
  * start function (damon_sysfs_scheme_update_regions_start()) sets the status
- * of all schemes as 'idle' again, and register ->before_damos_apply() and
- * ->after_sampling() callbacks.
+ * of all schemes as 'idle' again, and register ->before_damos_apply()
+ * callback.
  *
  * Then, the first followup ->before_damos_apply() callback
  * (damon_sysfs_before_damos_apply()) sets the status 'started'.  The first
- * ->after_sampling() callback (damon_sysfs_after_sampling()) after the call
- * is called only after the scheme is completely applied
- * to the given snapshot.  Hence the callback knows the situation by showing
- * 'started' status, and sets the status as 'finished'.  Then,
- * damon_sysfs_before_damos_apply() understands the situation by showing the
- * 'finished' status and do nothing.
+ * ->after_sampling() or ->after_aggregation() callback
+ *  (damon_sysfs_cmd_request_callback()) after the call is called only after
+ *  the scheme is completely applied to the given snapshot.  Hence the callback
+ *  knows the situation by showing 'started' status, and sets the status as
+ *  'finished'.  Then, damon_sysfs_before_damos_apply() understands the
+ *  situation by showing the 'finished' status and do nothing.
  *
  * If DAMOS is not applied to any region due to any reasons including the
  * access pattern, the watermarks, the quotas, and the filters,
@@ -826,15 +826,48 @@ static const struct kobj_type damon_sysfs_watermarks_ktype = {
 
 struct damos_sysfs_quota_goal {
        struct kobject kobj;
+       enum damos_quota_goal_metric metric;
        unsigned long target_value;
        unsigned long current_value;
 };
 
+/* This should match with enum damos_action */
+static const char * const damos_sysfs_quota_goal_metric_strs[] = {
+       "user_input",
+       "some_mem_psi_us",
+};
+
 static struct damos_sysfs_quota_goal *damos_sysfs_quota_goal_alloc(void)
 {
        return kzalloc(sizeof(struct damos_sysfs_quota_goal), GFP_KERNEL);
 }
 
+static ssize_t target_metric_show(struct kobject *kobj,
+               struct kobj_attribute *attr, char *buf)
+{
+       struct damos_sysfs_quota_goal *goal = container_of(kobj,
+                       struct damos_sysfs_quota_goal, kobj);
+
+       return sysfs_emit(buf, "%s\n",
+                       damos_sysfs_quota_goal_metric_strs[goal->metric]);
+}
+
+static ssize_t target_metric_store(struct kobject *kobj,
+               struct kobj_attribute *attr, const char *buf, size_t count)
+{
+       struct damos_sysfs_quota_goal *goal = container_of(kobj,
+                       struct damos_sysfs_quota_goal, kobj);
+       enum damos_quota_goal_metric m;
+
+       for (m = 0; m < NR_DAMOS_QUOTA_GOAL_METRICS; m++) {
+               if (sysfs_streq(buf, damos_sysfs_quota_goal_metric_strs[m])) {
+                       goal->metric = m;
+                       return count;
+               }
+       }
+       return -EINVAL;
+}
+
 static ssize_t target_value_show(struct kobject *kobj,
                struct kobj_attribute *attr, char *buf)
 {
@@ -880,6 +913,9 @@ static void damos_sysfs_quota_goal_release(struct kobject *kobj)
        kfree(container_of(kobj, struct damos_sysfs_quota_goal, kobj));
 }
 
+static struct kobj_attribute damos_sysfs_quota_goal_target_metric_attr =
+               __ATTR_RW_MODE(target_metric, 0600);
+
 static struct kobj_attribute damos_sysfs_quota_goal_target_value_attr =
                __ATTR_RW_MODE(target_value, 0600);
 
@@ -887,6 +923,7 @@ static struct kobj_attribute damos_sysfs_quota_goal_current_value_attr =
                __ATTR_RW_MODE(current_value, 0600);
 
 static struct attribute *damos_sysfs_quota_goal_attrs[] = {
+       &damos_sysfs_quota_goal_target_metric_attr.attr,
        &damos_sysfs_quota_goal_target_value_attr.attr,
        &damos_sysfs_quota_goal_current_value_attr.attr,
        NULL,
@@ -1139,6 +1176,7 @@ struct damon_sysfs_quotas {
        unsigned long ms;
        unsigned long sz;
        unsigned long reset_interval_ms;
+       unsigned long effective_sz;     /* Effective size quota in bytes */
 };
 
 static struct damon_sysfs_quotas *damon_sysfs_quotas_alloc(void)
@@ -1252,6 +1290,15 @@ static ssize_t reset_interval_ms_store(struct kobject *kobj,
        return count;
 }
 
+static ssize_t effective_bytes_show(struct kobject *kobj,
+               struct kobj_attribute *attr, char *buf)
+{
+       struct damon_sysfs_quotas *quotas = container_of(kobj,
+                       struct damon_sysfs_quotas, kobj);
+
+       return sysfs_emit(buf, "%lu\n", quotas->effective_sz);
+}
+
 static void damon_sysfs_quotas_release(struct kobject *kobj)
 {
        kfree(container_of(kobj, struct damon_sysfs_quotas, kobj));
@@ -1266,10 +1313,14 @@ static struct kobj_attribute damon_sysfs_quotas_sz_attr =
 static struct kobj_attribute damon_sysfs_quotas_reset_interval_ms_attr =
                __ATTR_RW_MODE(reset_interval_ms, 0600);
 
+static struct kobj_attribute damon_sysfs_quotas_effective_bytes_attr =
+               __ATTR_RO_MODE(effective_bytes, 0400);
+
 static struct attribute *damon_sysfs_quotas_attrs[] = {
        &damon_sysfs_quotas_ms_attr.attr,
        &damon_sysfs_quotas_sz_attr.attr,
        &damon_sysfs_quotas_reset_interval_ms_attr.attr,
+       &damon_sysfs_quotas_effective_bytes_attr.attr,
        NULL,
 };
 ATTRIBUTE_GROUPS(damon_sysfs_quotas);
@@ -1868,35 +1919,35 @@ static int damon_sysfs_set_scheme_filters(struct damos *scheme,
        return 0;
 }
 
-static unsigned long damos_sysfs_get_quota_score(void *arg)
-{
-       return (unsigned long)arg;
-}
-
-static void damos_sysfs_set_quota_score(
+static int damos_sysfs_set_quota_score(
                struct damos_sysfs_quota_goals *sysfs_goals,
                struct damos_quota *quota)
 {
-       struct damos_sysfs_quota_goal *sysfs_goal;
+       struct damos_quota_goal *goal, *next;
        int i;
 
-       quota->get_score = NULL;
-       quota->get_score_arg = (void *)0;
+       damos_for_each_quota_goal_safe(goal, next, quota)
+               damos_destroy_quota_goal(goal);
+
        for (i = 0; i < sysfs_goals->nr; i++) {
-               sysfs_goal = sysfs_goals->goals_arr[i];
+               struct damos_sysfs_quota_goal *sysfs_goal =
+                       sysfs_goals->goals_arr[i];
+
                if (!sysfs_goal->target_value)
                        continue;
 
-               /* Higher score makes scheme less aggressive */
-               quota->get_score_arg = (void *)max(
-                               (unsigned long)quota->get_score_arg,
-                               sysfs_goal->current_value * 10000 /
+               goal = damos_new_quota_goal(sysfs_goal->metric,
                                sysfs_goal->target_value);
-               quota->get_score = damos_sysfs_get_quota_score;
+               if (!goal)
+                       return -ENOMEM;
+               if (sysfs_goal->metric == DAMOS_QUOTA_USER_INPUT)
+                       goal->current_value = sysfs_goal->current_value;
+               damos_add_quota_goal(quota, goal);
        }
+       return 0;
 }
 
-void damos_sysfs_set_quota_scores(struct damon_sysfs_schemes *sysfs_schemes,
+int damos_sysfs_set_quota_scores(struct damon_sysfs_schemes *sysfs_schemes,
                struct damon_ctx *ctx)
 {
        struct damos *scheme;
@@ -1904,16 +1955,41 @@ void damos_sysfs_set_quota_scores(struct damon_sysfs_schemes *sysfs_schemes,
 
        damon_for_each_scheme(scheme, ctx) {
                struct damon_sysfs_scheme *sysfs_scheme;
+               int err;
 
                /* user could have removed the scheme sysfs dir */
                if (i >= sysfs_schemes->nr)
                        break;
 
                sysfs_scheme = sysfs_schemes->schemes_arr[i];
-               damos_sysfs_set_quota_score(sysfs_scheme->quotas->goals,
+               err = damos_sysfs_set_quota_score(sysfs_scheme->quotas->goals,
                                &scheme->quota);
+               if (err)
+                       /* kdamond will clean up schemes and terminated */
+                       return err;
                i++;
        }
+       return 0;
+}
+
+void damos_sysfs_update_effective_quotas(
+               struct damon_sysfs_schemes *sysfs_schemes,
+               struct damon_ctx *ctx)
+{
+       struct damos *scheme;
+       int schemes_idx = 0;
+
+       damon_for_each_scheme(scheme, ctx) {
+               struct damon_sysfs_quotas *sysfs_quotas;
+
+               /* user could have removed the scheme sysfs dir */
+               if (schemes_idx >= sysfs_schemes->nr)
+                       break;
+
+               sysfs_quotas =
+                       sysfs_schemes->schemes_arr[schemes_idx++]->quotas;
+               sysfs_quotas->effective_sz = scheme->quota.esz;
+       }
 }
 
 static struct damos *damon_sysfs_mk_scheme(
@@ -1953,13 +2029,17 @@ static struct damos *damon_sysfs_mk_scheme(
                .low = sysfs_wmarks->low,
        };
 
-       damos_sysfs_set_quota_score(sysfs_quotas->goals, &quota);
-
        scheme = damon_new_scheme(&pattern, sysfs_scheme->action,
                        sysfs_scheme->apply_interval_us, &quota, &wmarks);
        if (!scheme)
                return NULL;
 
+       err = damos_sysfs_set_quota_score(sysfs_quotas->goals, &scheme->quota);
+       if (err) {
+               damon_destroy_scheme(scheme);
+               return NULL;
+       }
+
        err = damon_sysfs_set_scheme_filters(scheme, sysfs_filters);
        if (err) {
                damon_destroy_scheme(scheme);
@@ -1995,7 +2075,11 @@ static void damon_sysfs_update_scheme(struct damos *scheme,
        scheme->quota.weight_nr_accesses = sysfs_weights->nr_accesses;
        scheme->quota.weight_age = sysfs_weights->age;
 
-       damos_sysfs_set_quota_score(sysfs_quotas->goals, &scheme->quota);
+       err = damos_sysfs_set_quota_score(sysfs_quotas->goals, &scheme->quota);
+       if (err) {
+               damon_destroy_scheme(scheme);
+               return;
+       }
 
        scheme->wmarks.metric = sysfs_wmarks->metric;
        scheme->wmarks.interval = sysfs_wmarks->interval_us;
@@ -2122,7 +2206,7 @@ static int damon_sysfs_before_damos_apply(struct damon_ctx *ctx,
  * callback is registered, damon_sysfs_lock should be held to ensure the
  * regions directories exist.
  */
-static int damon_sysfs_after_sampling(struct damon_ctx *ctx)
+void damos_sysfs_mark_finished_regions_updates(struct damon_ctx *ctx)
 {
        struct damon_sysfs_schemes *sysfs_schemes =
                damon_sysfs_schemes_for_damos_callback;
@@ -2138,8 +2222,6 @@ static int damon_sysfs_after_sampling(struct damon_ctx *ctx)
                        sysfs_regions->upd_status =
                                DAMOS_TRIED_REGIONS_UPD_FINISHED;
        }
-
-       return 0;
 }
 
 /* Called from damon_sysfs_cmd_request_callback under damon_sysfs_lock */
@@ -2212,7 +2294,6 @@ int damon_sysfs_schemes_update_regions_start(
        damos_tried_regions_init_upd_status(sysfs_schemes, ctx);
        damos_regions_upd_total_bytes_only = total_bytes_only;
        ctx->callback.before_damos_apply = damon_sysfs_before_damos_apply;
-       ctx->callback.after_sampling = damon_sysfs_after_sampling;
        return 0;
 }
 
@@ -2241,7 +2322,6 @@ int damon_sysfs_schemes_update_regions_stop(struct damon_ctx *ctx)
 {
        damon_sysfs_schemes_for_damos_callback = NULL;
        ctx->callback.before_damos_apply = NULL;
-       ctx->callback.after_sampling = NULL;
        damon_sysfs_schemes_region_idx = 0;
        return 0;
 }
index 1f891e18b4ee1068f5f88ee28bff9365f9940347..6fee383bc0c54cc98e53d404c5262fd254c1e7f0 100644 (file)
@@ -1019,6 +1019,11 @@ enum damon_sysfs_cmd {
         * regions
         */
        DAMON_SYSFS_CMD_CLEAR_SCHEMES_TRIED_REGIONS,
+       /*
+        * @DAMON_SYSFS_CMD_UPDATE_SCHEMES_EFFECTIVE_QUOTAS: Update the
+        * effective size quota of the scheme in bytes.
+        */
+       DAMON_SYSFS_CMD_UPDATE_SCHEMES_EFFECTIVE_QUOTAS,
        /*
         * @NR_DAMON_SYSFS_CMDS: Total number of DAMON sysfs commands.
         */
@@ -1035,6 +1040,7 @@ static const char * const damon_sysfs_cmd_strs[] = {
        "update_schemes_tried_bytes",
        "update_schemes_tried_regions",
        "clear_schemes_tried_regions",
+       "update_schemes_effective_quotas",
 };
 
 /*
@@ -1371,19 +1377,43 @@ static int damon_sysfs_commit_schemes_quota_goals(
 
        ctx = sysfs_kdamond->damon_ctx;
        sysfs_ctx = sysfs_kdamond->contexts->contexts_arr[0];
-       damos_sysfs_set_quota_scores(sysfs_ctx->schemes, ctx);
+       return damos_sysfs_set_quota_scores(sysfs_ctx->schemes, ctx);
+}
+
+/*
+ * damon_sysfs_upd_schemes_effective_quotas() - Update schemes effective quotas
+ * sysfs files.
+ * @kdamond:   The kobject wrapper that associated to the kdamond thread.
+ *
+ * This function reads the schemes' effective quotas of specific kdamond and
+ * update the related values for sysfs files.  This function should be called
+ * from DAMON callbacks while holding ``damon_syfs_lock``, to safely access the
+ * DAMON contexts-internal data and DAMON sysfs variables.
+ */
+static int damon_sysfs_upd_schemes_effective_quotas(
+               struct damon_sysfs_kdamond *kdamond)
+{
+       struct damon_ctx *ctx = kdamond->damon_ctx;
+
+       if (!ctx)
+               return -EINVAL;
+       damos_sysfs_update_effective_quotas(
+                       kdamond->contexts->contexts_arr[0]->schemes, ctx);
        return 0;
 }
 
+
 /*
  * damon_sysfs_cmd_request_callback() - DAMON callback for handling requests.
  * @c:         The DAMON context of the callback.
  * @active:    Whether @c is not deactivated due to watermarks.
+ * @after_aggr:        Whether this is called from after_aggregation() callback.
  *
  * This function is periodically called back from the kdamond thread for @c.
  * Then, it checks if there is a waiting DAMON sysfs request and handles it.
  */
-static int damon_sysfs_cmd_request_callback(struct damon_ctx *c, bool active)
+static int damon_sysfs_cmd_request_callback(struct damon_ctx *c, bool active,
+               bool after_aggregation)
 {
        struct damon_sysfs_kdamond *kdamond;
        bool total_bytes_only = false;
@@ -1401,6 +1431,8 @@ static int damon_sysfs_cmd_request_callback(struct damon_ctx *c, bool active)
                err = damon_sysfs_upd_schemes_stats(kdamond);
                break;
        case DAMON_SYSFS_CMD_COMMIT:
+               if (!after_aggregation)
+                       goto out;
                err = damon_sysfs_commit_input(kdamond);
                break;
        case DAMON_SYSFS_CMD_COMMIT_SCHEMES_QUOTA_GOALS:
@@ -1418,6 +1450,7 @@ static int damon_sysfs_cmd_request_callback(struct damon_ctx *c, bool active)
                                goto keep_lock_out;
                        }
                } else {
+                       damos_sysfs_mark_finished_regions_updates(c);
                        /*
                         * Continue regions updating if DAMON is till
                         * active and the update for all schemes is not
@@ -1432,6 +1465,9 @@ static int damon_sysfs_cmd_request_callback(struct damon_ctx *c, bool active)
        case DAMON_SYSFS_CMD_CLEAR_SCHEMES_TRIED_REGIONS:
                err = damon_sysfs_clear_schemes_regions(kdamond);
                break;
+       case DAMON_SYSFS_CMD_UPDATE_SCHEMES_EFFECTIVE_QUOTAS:
+               err = damon_sysfs_upd_schemes_effective_quotas(kdamond);
+               break;
        default:
                break;
        }
@@ -1450,7 +1486,16 @@ static int damon_sysfs_after_wmarks_check(struct damon_ctx *c)
         * after_wmarks_check() is called back while the context is deactivated
         * by watermarks.
         */
-       return damon_sysfs_cmd_request_callback(c, false);
+       return damon_sysfs_cmd_request_callback(c, false, false);
+}
+
+static int damon_sysfs_after_sampling(struct damon_ctx *c)
+{
+       /*
+        * after_sampling() is called back only while the context is not
+        * deactivated by watermarks.
+        */
+       return damon_sysfs_cmd_request_callback(c, true, false);
 }
 
 static int damon_sysfs_after_aggregation(struct damon_ctx *c)
@@ -1459,7 +1504,7 @@ static int damon_sysfs_after_aggregation(struct damon_ctx *c)
         * after_aggregation() is called back only while the context is not
         * deactivated by watermarks.
         */
-       return damon_sysfs_cmd_request_callback(c, true);
+       return damon_sysfs_cmd_request_callback(c, true, true);
 }
 
 static struct damon_ctx *damon_sysfs_build_ctx(
@@ -1478,6 +1523,7 @@ static struct damon_ctx *damon_sysfs_build_ctx(
        }
 
        ctx->callback.after_wmarks_check = damon_sysfs_after_wmarks_check;
+       ctx->callback.after_sampling = damon_sysfs_after_sampling;
        ctx->callback.after_aggregation = damon_sysfs_after_aggregation;
        ctx->callback.before_terminate = damon_sysfs_before_terminate;
        return ctx;
index ee533a5ceb79d3335e28d4c17530208dbe17fa0c..c1c1a6a484e4c07606129ef2731a895fd4ddce41 100644 (file)
@@ -51,87 +51,105 @@ const struct trace_print_flags vmaflag_names[] = {
        {0, NULL}
 };
 
-static void __dump_page(struct page *page)
+static void __dump_folio(struct folio *folio, struct page *page,
+               unsigned long pfn, unsigned long idx)
 {
-       struct folio *folio = page_folio(page);
-       struct page *head = &folio->page;
-       struct address_space *mapping;
-       bool compound = PageCompound(page);
-       /*
-        * Accessing the pageblock without the zone lock. It could change to
-        * "isolate" again in the meantime, but since we are just dumping the
-        * state for debugging, it should be fine to accept a bit of
-        * inaccuracy here due to racing.
-        */
-       bool page_cma = is_migrate_cma_page(page);
-       int mapcount;
+       struct address_space *mapping = folio_mapping(folio);
+       int mapcount = 0;
        char *type = "";
 
-       if (page < head || (page >= head + MAX_ORDER_NR_PAGES)) {
-               /*
-                * Corrupt page, so we cannot call page_mapping. Instead, do a
-                * safe subset of the steps that page_mapping() does. Caution:
-                * this will be misleading for tail pages, PageSwapCache pages,
-                * and potentially other situations. (See the page_mapping()
-                * implementation for what's missing here.)
-                */
-               unsigned long tmp = (unsigned long)page->mapping;
-
-               if (tmp & PAGE_MAPPING_ANON)
-                       mapping = NULL;
-               else
-                       mapping = (void *)(tmp & ~PAGE_MAPPING_FLAGS);
-               head = page;
-               folio = (struct folio *)page;
-               compound = false;
-       } else {
-               mapping = page_mapping(page);
-       }
-
        /*
-        * Avoid VM_BUG_ON() in page_mapcount().
-        * page->_mapcount space in struct page is used by sl[aou]b pages to
-        * encode own info.
+        * page->_mapcount space in struct page is used by slab pages to
+        * encode own info, and we must avoid calling page_folio() again.
         */
-       mapcount = PageSlab(head) ? 0 : page_mapcount(page);
-
-       pr_warn("page:%p refcount:%d mapcount:%d mapping:%p index:%#lx pfn:%#lx\n",
-                       page, page_ref_count(head), mapcount, mapping,
-                       page_to_pgoff(page), page_to_pfn(page));
-       if (compound) {
-               pr_warn("head:%p order:%u entire_mapcount:%d nr_pages_mapped:%d pincount:%d\n",
-                               head, compound_order(head),
+       if (!folio_test_slab(folio)) {
+               mapcount = atomic_read(&page->_mapcount) + 1;
+               if (folio_test_large(folio))
+                       mapcount += folio_entire_mapcount(folio);
+       }
+
+       pr_warn("page: refcount:%d mapcount:%d mapping:%p index:%#lx pfn:%#lx\n",
+                       folio_ref_count(folio), mapcount, mapping,
+                       folio->index + idx, pfn);
+       if (folio_test_large(folio)) {
+               pr_warn("head: order:%u entire_mapcount:%d nr_pages_mapped:%d pincount:%d\n",
+                               folio_order(folio),
                                folio_entire_mapcount(folio),
                                folio_nr_pages_mapped(folio),
                                atomic_read(&folio->_pincount));
        }
 
 #ifdef CONFIG_MEMCG
-       if (head->memcg_data)
-               pr_warn("memcg:%lx\n", head->memcg_data);
+       if (folio->memcg_data)
+               pr_warn("memcg:%lx\n", folio->memcg_data);
 #endif
-       if (PageKsm(page))
+       if (folio_test_ksm(folio))
                type = "ksm ";
-       else if (PageAnon(page))
+       else if (folio_test_anon(folio))
                type = "anon ";
        else if (mapping)
                dump_mapping(mapping);
        BUILD_BUG_ON(ARRAY_SIZE(pageflag_names) != __NR_PAGEFLAGS + 1);
 
-       pr_warn("%sflags: %pGp%s\n", type, &head->flags,
-               page_cma ? " CMA" : "");
-       pr_warn("page_type: %pGt\n", &head->page_type);
+       /*
+        * Accessing the pageblock without the zone lock. It could change to
+        * "isolate" again in the meantime, but since we are just dumping the
+        * state for debugging, it should be fine to accept a bit of
+        * inaccuracy here due to racing.
+        */
+       pr_warn("%sflags: %pGp%s\n", type, &folio->flags,
+               is_migrate_cma_folio(folio, pfn) ? " CMA" : "");
+       pr_warn("page_type: %pGt\n", &folio->page.page_type);
 
        print_hex_dump(KERN_WARNING, "raw: ", DUMP_PREFIX_NONE, 32,
                        sizeof(unsigned long), page,
                        sizeof(struct page), false);
-       if (head != page)
+       if (folio_test_large(folio))
                print_hex_dump(KERN_WARNING, "head: ", DUMP_PREFIX_NONE, 32,
-                       sizeof(unsigned long), head,
-                       sizeof(struct page), false);
+                       sizeof(unsigned long), folio,
+                       2 * sizeof(struct page), false);
+}
+
+static void __dump_page(const struct page *page)
+{
+       struct folio *foliop, folio;
+       struct page precise;
+       unsigned long pfn = page_to_pfn(page);
+       unsigned long idx, nr_pages = 1;
+       int loops = 5;
+
+again:
+       memcpy(&precise, page, sizeof(*page));
+       foliop = page_folio(&precise);
+       if (foliop == (struct folio *)&precise) {
+               idx = 0;
+               if (!folio_test_large(foliop))
+                       goto dump;
+               foliop = (struct folio *)page;
+       } else {
+               idx = folio_page_idx(foliop, page);
+       }
+
+       if (idx < MAX_FOLIO_NR_PAGES) {
+               memcpy(&folio, foliop, 2 * sizeof(struct page));
+               nr_pages = folio_nr_pages(&folio);
+               foliop = &folio;
+       }
+
+       if (idx > nr_pages) {
+               if (loops-- > 0)
+                       goto again;
+               pr_warn("page does not match folio\n");
+               precise.compound_head &= ~1UL;
+               foliop = (struct folio *)&precise;
+               idx = 0;
+       }
+
+dump:
+       __dump_folio(foliop, &precise, pfn, idx);
 }
 
-void dump_page(struct page *page, const char *reason)
+void dump_page(const struct page *page, const char *reason)
 {
        if (PagePoisoned(page))
                pr_warn("page:%p is uninitialized and poisoned", page);
index fef125d9508b8bb7ece2aa1689899484d07851cf..7437b2bd75c1ab48b093d390017e809af963f1e2 100644 (file)
@@ -852,7 +852,7 @@ noinline int __filemap_add_folio(struct address_space *mapping,
                struct folio *folio, pgoff_t index, gfp_t gfp, void **shadowp)
 {
        XA_STATE(xas, &mapping->i_pages, index);
-       int huge = folio_test_hugetlb(folio);
+       bool huge = folio_test_hugetlb(folio);
        bool charged = false;
        long nr = 1;
 
@@ -1363,7 +1363,7 @@ void migration_entry_wait_on_locked(swp_entry_t entry, spinlock_t *ptl)
        unsigned long pflags;
        bool in_thrashing;
        wait_queue_head_t *q;
-       struct folio *folio = page_folio(pfn_swap_entry_to_page(entry));
+       struct folio *folio = pfn_swap_entry_folio(entry);
 
        q = folio_waitqueue(folio);
        if (!folio_test_uptodate(folio) && folio_test_workingset(folio)) {
@@ -1921,8 +1921,6 @@ no_page:
                        gfp_t alloc_gfp = gfp;
 
                        err = -ENOMEM;
-                       if (order == 1)
-                               order = 0;
                        if (order > 0)
                                alloc_gfp |= __GFP_NORETRY | __GFP_NOWARN;
                        folio = filemap_alloc_folio(alloc_gfp, order);
@@ -3183,6 +3181,48 @@ static struct file *do_async_mmap_readahead(struct vm_fault *vmf,
        return fpin;
 }
 
+static vm_fault_t filemap_fault_recheck_pte_none(struct vm_fault *vmf)
+{
+       struct vm_area_struct *vma = vmf->vma;
+       vm_fault_t ret = 0;
+       pte_t *ptep;
+
+       /*
+        * We might have COW'ed a pagecache folio and might now have an mlocked
+        * anon folio mapped. The original pagecache folio is not mlocked and
+        * might have been evicted. During a read+clear/modify/write update of
+        * the PTE, such as done in do_numa_page()/change_pte_range(), we
+        * temporarily clear the PTE under PT lock and might detect it here as
+        * "none" when not holding the PT lock.
+        *
+        * Not rechecking the PTE under PT lock could result in an unexpected
+        * major fault in an mlock'ed region. Recheck only for this special
+        * scenario while holding the PT lock, to not degrade non-mlocked
+        * scenarios. Recheck the PTE without PT lock firstly, thereby reducing
+        * the number of times we hold PT lock.
+        */
+       if (!(vma->vm_flags & VM_LOCKED))
+               return 0;
+
+       if (!(vmf->flags & FAULT_FLAG_ORIG_PTE_VALID))
+               return 0;
+
+       ptep = pte_offset_map(vmf->pmd, vmf->address);
+       if (unlikely(!ptep))
+               return VM_FAULT_NOPAGE;
+
+       if (unlikely(!pte_none(ptep_get_lockless(ptep)))) {
+               ret = VM_FAULT_NOPAGE;
+       } else {
+               spin_lock(vmf->ptl);
+               if (unlikely(!pte_none(ptep_get(ptep))))
+                       ret = VM_FAULT_NOPAGE;
+               spin_unlock(vmf->ptl);
+       }
+       pte_unmap(ptep);
+       return ret;
+}
+
 /**
  * filemap_fault - read in file data for page fault handling
  * @vmf:       struct vm_fault containing details of the fault
@@ -3238,6 +3278,10 @@ vm_fault_t filemap_fault(struct vm_fault *vmf)
                        mapping_locked = true;
                }
        } else {
+               ret = filemap_fault_recheck_pte_none(vmf);
+               if (unlikely(ret))
+                       return ret;
+
                /* No page in the page cache at all */
                count_vm_event(PGMAJFAULT);
                count_memcg_event_mm(vmf->vma->vm_mm, PGMAJFAULT);
index 94c958f7ebb50dd925070157c0d0b2432dfc0483..9859aa4f755380a88013c70791e6e6df90ce861f 100644 (file)
@@ -790,8 +790,10 @@ struct deferred_split *get_deferred_split_queue(struct folio *folio)
 
 void folio_prep_large_rmappable(struct folio *folio)
 {
-       VM_BUG_ON_FOLIO(folio_order(folio) < 2, folio);
-       INIT_LIST_HEAD(&folio->_deferred_list);
+       if (!folio || !folio_test_large(folio))
+               return;
+       if (folio_order(folio) > 1)
+               INIT_LIST_HEAD(&folio->_deferred_list);
        folio_set_large_rmappable(folio);
 }
 
@@ -1905,12 +1907,14 @@ int zap_huge_pmd(struct mmu_gather *tlb, struct vm_area_struct *vma,
                zap_deposited_table(tlb->mm, pmd);
                spin_unlock(ptl);
        } else {
-               struct page *page = NULL;
+               struct folio *folio = NULL;
                int flush_needed = 1;
 
                if (pmd_present(orig_pmd)) {
-                       page = pmd_page(orig_pmd);
-                       folio_remove_rmap_pmd(page_folio(page), page, vma);
+                       struct page *page = pmd_page(orig_pmd);
+
+                       folio = page_folio(page);
+                       folio_remove_rmap_pmd(folio, page, vma);
                        VM_BUG_ON_PAGE(page_mapcount(page) < 0, page);
                        VM_BUG_ON_PAGE(!PageHead(page), page);
                } else if (thp_migration_supported()) {
@@ -1918,23 +1922,24 @@ int zap_huge_pmd(struct mmu_gather *tlb, struct vm_area_struct *vma,
 
                        VM_BUG_ON(!is_pmd_migration_entry(orig_pmd));
                        entry = pmd_to_swp_entry(orig_pmd);
-                       page = pfn_swap_entry_to_page(entry);
+                       folio = pfn_swap_entry_folio(entry);
                        flush_needed = 0;
                } else
                        WARN_ONCE(1, "Non present huge pmd without pmd migration enabled!");
 
-               if (PageAnon(page)) {
+               if (folio_test_anon(folio)) {
                        zap_deposited_table(tlb->mm, pmd);
                        add_mm_counter(tlb->mm, MM_ANONPAGES, -HPAGE_PMD_NR);
                } else {
                        if (arch_needs_pgtable_deposit())
                                zap_deposited_table(tlb->mm, pmd);
-                       add_mm_counter(tlb->mm, mm_counter_file(page), -HPAGE_PMD_NR);
+                       add_mm_counter(tlb->mm, mm_counter_file(folio),
+                                      -HPAGE_PMD_NR);
                }
 
                spin_unlock(ptl);
                if (flush_needed)
-                       tlb_remove_page_size(tlb, page, HPAGE_PMD_SIZE);
+                       tlb_remove_page_size(tlb, &folio->page, HPAGE_PMD_SIZE);
        }
        return 1;
 }
@@ -2045,7 +2050,7 @@ int change_huge_pmd(struct mmu_gather *tlb, struct vm_area_struct *vma,
 #ifdef CONFIG_ARCH_ENABLE_THP_MIGRATION
        if (is_swap_pmd(*pmd)) {
                swp_entry_t entry = pmd_to_swp_entry(*pmd);
-               struct folio *folio = page_folio(pfn_swap_entry_to_page(entry));
+               struct folio *folio = pfn_swap_entry_folio(entry);
                pmd_t newpmd;
 
                VM_BUG_ON(!is_pmd_migration_entry(*pmd));
@@ -2155,7 +2160,7 @@ unlock:
 
 #ifdef CONFIG_USERFAULTFD
 /*
- * The PT lock for src_pmd and the mmap_lock for reading are held by
+ * The PT lock for src_pmd and dst_vma/src_vma (for reading) are locked by
  * the caller, but it must return after releasing the page_table_lock.
  * Just move the page from src_pmd to dst_pmd if possible.
  * Return zero if succeeded in moving the page, -EAGAIN if it needs to be
@@ -2178,7 +2183,8 @@ int move_pages_huge_pmd(struct mm_struct *mm, pmd_t *dst_pmd, pmd_t *src_pmd, pm
        src_ptl = pmd_lockptr(mm, src_pmd);
 
        lockdep_assert_held(src_ptl);
-       mmap_assert_locked(mm);
+       vma_assert_locked(src_vma);
+       vma_assert_locked(dst_vma);
 
        /* Sanity checks before the operation */
        if (WARN_ON_ONCE(!pmd_none(dst_pmdval)) || WARN_ON_ONCE(src_addr & ~HPAGE_PMD_MASK) ||
@@ -2197,13 +2203,18 @@ int move_pages_huge_pmd(struct mm_struct *mm, pmd_t *dst_pmd, pmd_t *src_pmd, pm
        }
 
        src_page = pmd_page(src_pmdval);
-       if (unlikely(!PageAnonExclusive(src_page))) {
-               spin_unlock(src_ptl);
-               return -EBUSY;
-       }
 
-       src_folio = page_folio(src_page);
-       folio_get(src_folio);
+       if (!is_huge_zero_pmd(src_pmdval)) {
+               if (unlikely(!PageAnonExclusive(src_page))) {
+                       spin_unlock(src_ptl);
+                       return -EBUSY;
+               }
+
+               src_folio = page_folio(src_page);
+               folio_get(src_folio);
+       } else
+               src_folio = NULL;
+
        spin_unlock(src_ptl);
 
        flush_cache_range(src_vma, src_addr, src_addr + HPAGE_PMD_SIZE);
@@ -2211,19 +2222,22 @@ int move_pages_huge_pmd(struct mm_struct *mm, pmd_t *dst_pmd, pmd_t *src_pmd, pm
                                src_addr + HPAGE_PMD_SIZE);
        mmu_notifier_invalidate_range_start(&range);
 
-       folio_lock(src_folio);
+       if (src_folio) {
+               folio_lock(src_folio);
 
-       /*
-        * split_huge_page walks the anon_vma chain without the page
-        * lock. Serialize against it with the anon_vma lock, the page
-        * lock is not enough.
-        */
-       src_anon_vma = folio_get_anon_vma(src_folio);
-       if (!src_anon_vma) {
-               err = -EAGAIN;
-               goto unlock_folio;
-       }
-       anon_vma_lock_write(src_anon_vma);
+               /*
+                * split_huge_page walks the anon_vma chain without the page
+                * lock. Serialize against it with the anon_vma lock, the page
+                * lock is not enough.
+                */
+               src_anon_vma = folio_get_anon_vma(src_folio);
+               if (!src_anon_vma) {
+                       err = -EAGAIN;
+                       goto unlock_folio;
+               }
+               anon_vma_lock_write(src_anon_vma);
+       } else
+               src_anon_vma = NULL;
 
        dst_ptl = pmd_lockptr(mm, dst_pmd);
        double_pt_lock(src_ptl, dst_ptl);
@@ -2232,45 +2246,54 @@ int move_pages_huge_pmd(struct mm_struct *mm, pmd_t *dst_pmd, pmd_t *src_pmd, pm
                err = -EAGAIN;
                goto unlock_ptls;
        }
-       if (folio_maybe_dma_pinned(src_folio) ||
-           !PageAnonExclusive(&src_folio->page)) {
-               err = -EBUSY;
-               goto unlock_ptls;
-       }
+       if (src_folio) {
+               if (folio_maybe_dma_pinned(src_folio) ||
+                   !PageAnonExclusive(&src_folio->page)) {
+                       err = -EBUSY;
+                       goto unlock_ptls;
+               }
 
-       if (WARN_ON_ONCE(!folio_test_head(src_folio)) ||
-           WARN_ON_ONCE(!folio_test_anon(src_folio))) {
-               err = -EBUSY;
-               goto unlock_ptls;
-       }
+               if (WARN_ON_ONCE(!folio_test_head(src_folio)) ||
+                   WARN_ON_ONCE(!folio_test_anon(src_folio))) {
+                       err = -EBUSY;
+                       goto unlock_ptls;
+               }
 
-       folio_move_anon_rmap(src_folio, dst_vma);
-       WRITE_ONCE(src_folio->index, linear_page_index(dst_vma, dst_addr));
+               folio_move_anon_rmap(src_folio, dst_vma);
+               WRITE_ONCE(src_folio->index, linear_page_index(dst_vma, dst_addr));
 
-       src_pmdval = pmdp_huge_clear_flush(src_vma, src_addr, src_pmd);
-       /* Folio got pinned from under us. Put it back and fail the move. */
-       if (folio_maybe_dma_pinned(src_folio)) {
-               set_pmd_at(mm, src_addr, src_pmd, src_pmdval);
-               err = -EBUSY;
-               goto unlock_ptls;
-       }
+               src_pmdval = pmdp_huge_clear_flush(src_vma, src_addr, src_pmd);
+               /* Folio got pinned from under us. Put it back and fail the move. */
+               if (folio_maybe_dma_pinned(src_folio)) {
+                       set_pmd_at(mm, src_addr, src_pmd, src_pmdval);
+                       err = -EBUSY;
+                       goto unlock_ptls;
+               }
 
-       _dst_pmd = mk_huge_pmd(&src_folio->page, dst_vma->vm_page_prot);
-       /* Follow mremap() behavior and treat the entry dirty after the move */
-       _dst_pmd = pmd_mkwrite(pmd_mkdirty(_dst_pmd), dst_vma);
+               _dst_pmd = mk_huge_pmd(&src_folio->page, dst_vma->vm_page_prot);
+               /* Follow mremap() behavior and treat the entry dirty after the move */
+               _dst_pmd = pmd_mkwrite(pmd_mkdirty(_dst_pmd), dst_vma);
+       } else {
+               src_pmdval = pmdp_huge_clear_flush(src_vma, src_addr, src_pmd);
+               _dst_pmd = mk_huge_pmd(src_page, dst_vma->vm_page_prot);
+       }
        set_pmd_at(mm, dst_addr, dst_pmd, _dst_pmd);
 
        src_pgtable = pgtable_trans_huge_withdraw(mm, src_pmd);
        pgtable_trans_huge_deposit(mm, dst_pmd, src_pgtable);
 unlock_ptls:
        double_pt_unlock(src_ptl, dst_ptl);
-       anon_vma_unlock_write(src_anon_vma);
-       put_anon_vma(src_anon_vma);
+       if (src_anon_vma) {
+               anon_vma_unlock_write(src_anon_vma);
+               put_anon_vma(src_anon_vma);
+       }
 unlock_folio:
        /* unblock rmap walks */
-       folio_unlock(src_folio);
+       if (src_folio)
+               folio_unlock(src_folio);
        mmu_notifier_invalidate_range_end(&range);
-       folio_put(src_folio);
+       if (src_folio)
+               folio_put(src_folio);
        return err;
 }
 #endif /* CONFIG_USERFAULTFD */
@@ -2442,7 +2465,7 @@ static void __split_huge_pmd_locked(struct vm_area_struct *vma, pmd_t *pmd,
                        swp_entry_t entry;
 
                        entry = pmd_to_swp_entry(old_pmd);
-                       page = pfn_swap_entry_to_page(entry);
+                       folio = pfn_swap_entry_folio(entry);
                } else {
                        page = pmd_page(old_pmd);
                        folio = page_folio(page);
@@ -2453,7 +2476,7 @@ static void __split_huge_pmd_locked(struct vm_area_struct *vma, pmd_t *pmd,
                        folio_remove_rmap_pmd(folio, page, vma);
                        folio_put(folio);
                }
-               add_mm_counter(mm, mm_counter_file(page), -HPAGE_PMD_NR);
+               add_mm_counter(mm, mm_counter_file(folio), -HPAGE_PMD_NR);
                return;
        }
 
@@ -2559,15 +2582,16 @@ static void __split_huge_pmd_locked(struct vm_area_struct *vma, pmd_t *pmd,
 
        pte = pte_offset_map(&_pmd, haddr);
        VM_BUG_ON(!pte);
-       for (i = 0, addr = haddr; i < HPAGE_PMD_NR; i++, addr += PAGE_SIZE) {
-               pte_t entry;
-               /*
-                * Note that NUMA hinting access restrictions are not
-                * transferred to avoid any possibility of altering
-                * permissions across VMAs.
-                */
-               if (freeze || pmd_migration) {
+
+       /*
+        * Note that NUMA hinting access restrictions are not transferred to
+        * avoid any possibility of altering permissions across VMAs.
+        */
+       if (freeze || pmd_migration) {
+               for (i = 0, addr = haddr; i < HPAGE_PMD_NR; i++, addr += PAGE_SIZE) {
+                       pte_t entry;
                        swp_entry_t swp_entry;
+
                        if (write)
                                swp_entry = make_writable_migration_entry(
                                                        page_to_pfn(page + i));
@@ -2586,25 +2610,32 @@ static void __split_huge_pmd_locked(struct vm_area_struct *vma, pmd_t *pmd,
                                entry = pte_swp_mksoft_dirty(entry);
                        if (uffd_wp)
                                entry = pte_swp_mkuffd_wp(entry);
-               } else {
-                       entry = mk_pte(page + i, READ_ONCE(vma->vm_page_prot));
-                       if (write)
-                               entry = pte_mkwrite(entry, vma);
-                       if (!young)
-                               entry = pte_mkold(entry);
-                       /* NOTE: this may set soft-dirty too on some archs */
-                       if (dirty)
-                               entry = pte_mkdirty(entry);
-                       if (soft_dirty)
-                               entry = pte_mksoft_dirty(entry);
-                       if (uffd_wp)
-                               entry = pte_mkuffd_wp(entry);
+
+                       VM_WARN_ON(!pte_none(ptep_get(pte + i)));
+                       set_pte_at(mm, addr, pte + i, entry);
                }
-               VM_BUG_ON(!pte_none(ptep_get(pte)));
-               set_pte_at(mm, addr, pte, entry);
-               pte++;
+       } else {
+               pte_t entry;
+
+               entry = mk_pte(page, READ_ONCE(vma->vm_page_prot));
+               if (write)
+                       entry = pte_mkwrite(entry, vma);
+               if (!young)
+                       entry = pte_mkold(entry);
+               /* NOTE: this may set soft-dirty too on some archs */
+               if (dirty)
+                       entry = pte_mkdirty(entry);
+               if (soft_dirty)
+                       entry = pte_mksoft_dirty(entry);
+               if (uffd_wp)
+                       entry = pte_mkuffd_wp(entry);
+
+               for (i = 0; i < HPAGE_PMD_NR; i++)
+                       VM_WARN_ON(!pte_none(ptep_get(pte + i)));
+
+               set_ptes(mm, haddr, pte, entry, HPAGE_PMD_NR);
        }
-       pte_unmap(pte - 1);
+       pte_unmap(pte);
 
        if (!pmd_migration)
                folio_remove_rmap_pmd(folio, page, vma);
@@ -2698,11 +2729,14 @@ void vma_adjust_trans_huge(struct vm_area_struct *vma,
 
 static void unmap_folio(struct folio *folio)
 {
-       enum ttu_flags ttu_flags = TTU_RMAP_LOCKED | TTU_SPLIT_HUGE_PMD |
-               TTU_SYNC | TTU_BATCH_FLUSH;
+       enum ttu_flags ttu_flags = TTU_RMAP_LOCKED | TTU_SYNC |
+               TTU_BATCH_FLUSH;
 
        VM_BUG_ON_FOLIO(!folio_test_large(folio), folio);
 
+       if (folio_test_pmd_mappable(folio))
+               ttu_flags |= TTU_SPLIT_HUGE_PMD;
+
        /*
         * Anon pages need migration entries to preserve them, but file
         * pages can simply be left unmapped, then faulted back on demand.
@@ -2736,7 +2770,6 @@ static void lru_add_page_tail(struct page *head, struct page *tail,
                struct lruvec *lruvec, struct list_head *list)
 {
        VM_BUG_ON_PAGE(!PageHead(head), head);
-       VM_BUG_ON_PAGE(PageCompound(tail), head);
        VM_BUG_ON_PAGE(PageLRU(tail), head);
        lockdep_assert_held(&lruvec->lru_lock);
 
@@ -2757,7 +2790,8 @@ static void lru_add_page_tail(struct page *head, struct page *tail,
 }
 
 static void __split_huge_page_tail(struct folio *folio, int tail,
-               struct lruvec *lruvec, struct list_head *list)
+               struct lruvec *lruvec, struct list_head *list,
+               unsigned int new_order)
 {
        struct page *head = &folio->page;
        struct page *page_tail = head + tail;
@@ -2827,10 +2861,15 @@ static void __split_huge_page_tail(struct folio *folio, int tail,
         * which needs correct compound_head().
         */
        clear_compound_head(page_tail);
+       if (new_order) {
+               prep_compound_page(page_tail, new_order);
+               folio_prep_large_rmappable(new_folio);
+       }
 
        /* Finally unfreeze refcount. Additional reference from page cache. */
-       page_ref_unfreeze(page_tail, 1 + (!folio_test_anon(folio) ||
-                                         folio_test_swapcache(folio)));
+       page_ref_unfreeze(page_tail,
+               1 + ((!folio_test_anon(folio) || folio_test_swapcache(folio)) ?
+                            folio_nr_pages(new_folio) : 0));
 
        if (folio_test_young(folio))
                folio_set_young(new_folio);
@@ -2848,18 +2887,20 @@ static void __split_huge_page_tail(struct folio *folio, int tail,
 }
 
 static void __split_huge_page(struct page *page, struct list_head *list,
-               pgoff_t end)
+               pgoff_t end, unsigned int new_order)
 {
        struct folio *folio = page_folio(page);
        struct page *head = &folio->page;
        struct lruvec *lruvec;
        struct address_space *swap_cache = NULL;
        unsigned long offset = 0;
-       unsigned int nr = thp_nr_pages(head);
        int i, nr_dropped = 0;
+       unsigned int new_nr = 1 << new_order;
+       int order = folio_order(folio);
+       unsigned int nr = 1 << order;
 
        /* complete memcg works before add pages to LRU */
-       split_page_memcg(head, nr);
+       split_page_memcg(head, order, new_order);
 
        if (folio_test_anon(folio) && folio_test_swapcache(folio)) {
                offset = swp_offset(folio->swap);
@@ -2872,13 +2913,13 @@ static void __split_huge_page(struct page *page, struct list_head *list,
 
        ClearPageHasHWPoisoned(head);
 
-       for (i = nr - 1; i >= 1; i--) {
-               __split_huge_page_tail(folio, i, lruvec, list);
+       for (i = nr - new_nr; i >= new_nr; i -= new_nr) {
+               __split_huge_page_tail(folio, i, lruvec, list, new_order);
                /* Some pages can be beyond EOF: drop them from page cache */
                if (head[i].index >= end) {
                        struct folio *tail = page_folio(head + i);
 
-                       if (shmem_mapping(head->mapping))
+                       if (shmem_mapping(folio->mapping))
                                nr_dropped++;
                        else if (folio_test_clear_dirty(tail))
                                folio_account_cleaned(tail,
@@ -2886,7 +2927,7 @@ static void __split_huge_page(struct page *page, struct list_head *list,
                        __filemap_remove_folio(tail, NULL);
                        folio_put(tail);
                } else if (!PageAnon(page)) {
-                       __xa_store(&head->mapping->i_pages, head[i].index,
+                       __xa_store(&folio->mapping->i_pages, head[i].index,
                                        head + i, 0);
                } else if (swap_cache) {
                        __xa_store(&swap_cache->i_pages, offset + i,
@@ -2894,40 +2935,55 @@ static void __split_huge_page(struct page *page, struct list_head *list,
                }
        }
 
-       ClearPageCompound(head);
+       if (!new_order)
+               ClearPageCompound(head);
+       else {
+               struct folio *new_folio = (struct folio *)head;
+
+               folio_set_order(new_folio, new_order);
+       }
        unlock_page_lruvec(lruvec);
        /* Caller disabled irqs, so they are still disabled here */
 
-       split_page_owner(head, nr);
+       split_page_owner(head, order, new_order);
 
        /* See comment in __split_huge_page_tail() */
-       if (PageAnon(head)) {
+       if (folio_test_anon(folio)) {
                /* Additional pin to swap cache */
-               if (PageSwapCache(head)) {
-                       page_ref_add(head, 2);
+               if (folio_test_swapcache(folio)) {
+                       folio_ref_add(folio, 1 + new_nr);
                        xa_unlock(&swap_cache->i_pages);
                } else {
-                       page_ref_inc(head);
+                       folio_ref_inc(folio);
                }
        } else {
                /* Additional pin to page cache */
-               page_ref_add(head, 2);
-               xa_unlock(&head->mapping->i_pages);
+               folio_ref_add(folio, 1 + new_nr);
+               xa_unlock(&folio->mapping->i_pages);
        }
        local_irq_enable();
 
        if (nr_dropped)
-               shmem_uncharge(head->mapping->host, nr_dropped);
+               shmem_uncharge(folio->mapping->host, nr_dropped);
        remap_page(folio, nr);
 
        if (folio_test_swapcache(folio))
                split_swap_cluster(folio->swap);
 
-       for (i = 0; i < nr; i++) {
+       /*
+        * set page to its compound_head when split to non order-0 pages, so
+        * we can skip unlocking it below, since PG_locked is transferred to
+        * the compound_head of the page and the caller will unlock it.
+        */
+       if (new_order)
+               page = compound_head(page);
+
+       for (i = 0; i < nr; i += new_nr) {
                struct page *subpage = head + i;
+               struct folio *new_folio = page_folio(subpage);
                if (subpage == page)
                        continue;
-               unlock_page(subpage);
+               folio_unlock(new_folio);
 
                /*
                 * Subpages may be freed if there wasn't any mapping
@@ -2957,29 +3013,36 @@ bool can_split_folio(struct folio *folio, int *pextra_pins)
 }
 
 /*
- * This function splits huge page into normal pages. @page can point to any
- * subpage of huge page to split. Split doesn't change the position of @page.
+ * This function splits huge page into pages in @new_order. @page can point to
+ * any subpage of huge page to split. Split doesn't change the position of
+ * @page.
+ *
+ * NOTE: order-1 anonymous folio is not supported because _deferred_list,
+ * which is used by partially mapped folios, is stored in subpage 2 and an
+ * order-1 folio only has subpage 0 and 1. File-backed order-1 folios are OK,
+ * since they do not use _deferred_list.
  *
  * Only caller must hold pin on the @page, otherwise split fails with -EBUSY.
  * The huge page must be locked.
  *
  * If @list is null, tail pages will be added to LRU list, otherwise, to @list.
  *
- * Both head page and tail pages will inherit mapping, flags, and so on from
- * the hugepage.
+ * Pages in new_order will inherit mapping, flags, and so on from the hugepage.
  *
- * GUP pin and PG_locked transferred to @page. Rest subpages can be freed if
- * they are not mapped.
+ * GUP pin and PG_locked transferred to @page or the compound page @page belongs
+ * to. Rest subpages can be freed if they are not mapped.
  *
  * Returns 0 if the hugepage is split successfully.
  * Returns -EBUSY if the page is pinned or if anon_vma disappeared from under
  * us.
  */
-int split_huge_page_to_list(struct page *page, struct list_head *list)
+int split_huge_page_to_list_to_order(struct page *page, struct list_head *list,
+                                    unsigned int new_order)
 {
        struct folio *folio = page_folio(page);
        struct deferred_split *ds_queue = get_deferred_split_queue(folio);
-       XA_STATE(xas, &folio->mapping->i_pages, folio->index);
+       /* reset xarray order to new order after split */
+       XA_STATE_ORDER(xas, &folio->mapping->i_pages, folio->index, new_order);
        struct anon_vma *anon_vma = NULL;
        struct address_space *mapping = NULL;
        int extra_pins, ret;
@@ -2989,6 +3052,34 @@ int split_huge_page_to_list(struct page *page, struct list_head *list)
        VM_BUG_ON_FOLIO(!folio_test_locked(folio), folio);
        VM_BUG_ON_FOLIO(!folio_test_large(folio), folio);
 
+       if (new_order >= folio_order(folio))
+               return -EINVAL;
+
+       /* Cannot split anonymous THP to order-1 */
+       if (new_order == 1 && folio_test_anon(folio)) {
+               VM_WARN_ONCE(1, "Cannot split to order-1 folio");
+               return -EINVAL;
+       }
+
+       if (new_order) {
+               /* Only swapping a whole PMD-mapped folio is supported */
+               if (folio_test_swapcache(folio))
+                       return -EINVAL;
+               /* Split shmem folio to non-zero order not supported */
+               if (shmem_mapping(folio->mapping)) {
+                       VM_WARN_ONCE(1,
+                               "Cannot split shmem folio to non-0 order");
+                       return -EINVAL;
+               }
+               /* No split if the file system does not support large folio */
+               if (!mapping_large_folio_support(folio->mapping)) {
+                       VM_WARN_ONCE(1,
+                               "Cannot split file folio to non-0 order");
+                       return -EINVAL;
+               }
+       }
+
+
        is_hzp = is_huge_zero_page(&folio->page);
        if (is_hzp) {
                pr_warn_ratelimited("Called split_huge_page for huge zero page\n");
@@ -3082,16 +3173,24 @@ int split_huge_page_to_list(struct page *page, struct list_head *list)
        /* Prevent deferred_split_scan() touching ->_refcount */
        spin_lock(&ds_queue->split_queue_lock);
        if (folio_ref_freeze(folio, 1 + extra_pins)) {
-               if (!list_empty(&folio->_deferred_list)) {
+               if (folio_order(folio) > 1 &&
+                   !list_empty(&folio->_deferred_list)) {
                        ds_queue->split_queue_len--;
-                       list_del(&folio->_deferred_list);
+                       /*
+                        * Reinitialize page_deferred_list after removing the
+                        * page from the split_queue, otherwise a subsequent
+                        * split will see list corruption when checking the
+                        * page_deferred_list.
+                        */
+                       list_del_init(&folio->_deferred_list);
                }
                spin_unlock(&ds_queue->split_queue_lock);
                if (mapping) {
                        int nr = folio_nr_pages(folio);
 
                        xas_split(&xas, folio, folio_order(folio));
-                       if (folio_test_pmd_mappable(folio)) {
+                       if (folio_test_pmd_mappable(folio) &&
+                           new_order < HPAGE_PMD_ORDER) {
                                if (folio_test_swapbacked(folio)) {
                                        __lruvec_stat_mod_folio(folio,
                                                        NR_SHMEM_THPS, -nr);
@@ -3103,7 +3202,7 @@ int split_huge_page_to_list(struct page *page, struct list_head *list)
                        }
                }
 
-               __split_huge_page(page, list, end);
+               __split_huge_page(page, list, end, new_order);
                ret = 0;
        } else {
                spin_unlock(&ds_queue->split_queue_lock);
@@ -3133,6 +3232,9 @@ void folio_undo_large_rmappable(struct folio *folio)
        struct deferred_split *ds_queue;
        unsigned long flags;
 
+       if (folio_order(folio) <= 1)
+               return;
+
        /*
         * At this point, there is no one trying to add the folio to
         * deferred_list. If folio is not in deferred_list, it's safe
@@ -3158,7 +3260,12 @@ void deferred_split_folio(struct folio *folio)
 #endif
        unsigned long flags;
 
-       VM_BUG_ON_FOLIO(folio_order(folio) < 2, folio);
+       /*
+        * Order 1 folios have no space for a deferred list, but we also
+        * won't waste much memory by not adding them to the deferred list.
+        */
+       if (folio_order(folio) <= 1)
+               return;
 
        /*
         * The try_to_unmap() in page reclaim path might reach here too,
@@ -3316,7 +3423,7 @@ static inline bool vma_not_suitable_for_thp_split(struct vm_area_struct *vma)
 }
 
 static int split_huge_pages_pid(int pid, unsigned long vaddr_start,
-                               unsigned long vaddr_end)
+                               unsigned long vaddr_end, unsigned int new_order)
 {
        int ret = 0;
        struct task_struct *task;
@@ -3379,14 +3486,23 @@ static int split_huge_pages_pid(int pid, unsigned long vaddr_start,
                if (!is_transparent_hugepage(folio))
                        goto next;
 
+               if (new_order >= folio_order(folio))
+                       goto next;
+
                total++;
-               if (!can_split_folio(folio, NULL))
+               /*
+                * For folios with private, split_huge_page_to_list_to_order()
+                * will try to drop it before split and then check if the folio
+                * can be split or not. So skip the check here.
+                */
+               if (!folio_test_private(folio) &&
+                   !can_split_folio(folio, NULL))
                        goto next;
 
                if (!folio_trylock(folio))
                        goto next;
 
-               if (!split_folio(folio))
+               if (!split_folio_to_order(folio, new_order))
                        split++;
 
                folio_unlock(folio);
@@ -3404,7 +3520,7 @@ out:
 }
 
 static int split_huge_pages_in_file(const char *file_path, pgoff_t off_start,
-                               pgoff_t off_end)
+                               pgoff_t off_end, unsigned int new_order)
 {
        struct filename *file;
        struct file *candidate;
@@ -3440,10 +3556,13 @@ static int split_huge_pages_in_file(const char *file_path, pgoff_t off_start,
                total++;
                nr_pages = folio_nr_pages(folio);
 
+               if (new_order >= folio_order(folio))
+                       goto next;
+
                if (!folio_trylock(folio))
                        goto next;
 
-               if (!split_folio(folio))
+               if (!split_folio_to_order(folio, new_order))
                        split++;
 
                folio_unlock(folio);
@@ -3468,10 +3587,14 @@ static ssize_t split_huge_pages_write(struct file *file, const char __user *buf,
 {
        static DEFINE_MUTEX(split_debug_mutex);
        ssize_t ret;
-       /* hold pid, start_vaddr, end_vaddr or file_path, off_start, off_end */
+       /*
+        * hold pid, start_vaddr, end_vaddr, new_order or
+        * file_path, off_start, off_end, new_order
+        */
        char input_buf[MAX_INPUT_BUF_SZ];
        int pid;
        unsigned long vaddr_start, vaddr_end;
+       unsigned int new_order = 0;
 
        ret = mutex_lock_interruptible(&split_debug_mutex);
        if (ret)
@@ -3500,29 +3623,29 @@ static ssize_t split_huge_pages_write(struct file *file, const char __user *buf,
                        goto out;
                }
 
-               ret = sscanf(buf, "0x%lx,0x%lx", &off_start, &off_end);
-               if (ret != 2) {
+               ret = sscanf(buf, "0x%lx,0x%lx,%d", &off_start, &off_end, &new_order);
+               if (ret != 2 && ret != 3) {
                        ret = -EINVAL;
                        goto out;
                }
-               ret = split_huge_pages_in_file(file_path, off_start, off_end);
+               ret = split_huge_pages_in_file(file_path, off_start, off_end, new_order);
                if (!ret)
                        ret = input_len;
 
                goto out;
        }
 
-       ret = sscanf(input_buf, "%d,0x%lx,0x%lx", &pid, &vaddr_start, &vaddr_end);
+       ret = sscanf(input_buf, "%d,0x%lx,0x%lx,%d", &pid, &vaddr_start, &vaddr_end, &new_order);
        if (ret == 1 && pid == 1) {
                split_huge_pages_all();
                ret = strlen(input_buf);
                goto out;
-       } else if (ret != 3) {
+       } else if (ret != 3 && ret != 4) {
                ret = -EINVAL;
                goto out;
        }
 
-       ret = split_huge_pages_pid(pid, vaddr_start, vaddr_end);
+       ret = split_huge_pages_pid(pid, vaddr_start, vaddr_end, new_order);
        if (!ret)
                ret = strlen(input_buf);
 out:
index ed1581b670d42e3e5cc1bd3ea4c01390f10e1016..23ef240ba48a60a77102f7bf1beb2e76a987486d 100644 (file)
@@ -35,6 +35,7 @@
 #include <linux/delayacct.h>
 #include <linux/memory.h>
 #include <linux/mm_inline.h>
+#include <linux/padata.h>
 
 #include <asm/page.h>
 #include <asm/pgalloc.h>
@@ -68,7 +69,7 @@ static bool hugetlb_cma_folio(struct folio *folio, unsigned int order)
 #endif
 static unsigned long hugetlb_cma_size __initdata;
 
-__initdata LIST_HEAD(huge_boot_pages);
+__initdata struct list_head huge_boot_pages[MAX_NUMNODES];
 
 /* for command line parsing */
 static struct hstate * __initdata parsed_hstate;
@@ -1464,15 +1465,15 @@ static int get_valid_node_allowed(int nid, nodemask_t *nodes_allowed)
  * next node from which to allocate, handling wrap at end of node
  * mask.
  */
-static int hstate_next_node_to_alloc(struct hstate *h,
+static int hstate_next_node_to_alloc(int *next_node,
                                        nodemask_t *nodes_allowed)
 {
        int nid;
 
        VM_BUG_ON(!nodes_allowed);
 
-       nid = get_valid_node_allowed(h->next_nid_to_alloc, nodes_allowed);
-       h->next_nid_to_alloc = next_node_allowed(nid, nodes_allowed);
+       nid = get_valid_node_allowed(*next_node, nodes_allowed);
+       *next_node = next_node_allowed(nid, nodes_allowed);
 
        return nid;
 }
@@ -1495,10 +1496,10 @@ static int hstate_next_node_to_free(struct hstate *h, nodemask_t *nodes_allowed)
        return nid;
 }
 
-#define for_each_node_mask_to_alloc(hs, nr_nodes, node, mask)          \
+#define for_each_node_mask_to_alloc(next_node, nr_nodes, node, mask)           \
        for (nr_nodes = nodes_weight(*mask);                            \
                nr_nodes > 0 &&                                         \
-               ((node = hstate_next_node_to_alloc(hs, mask)) || 1);    \
+               ((node = hstate_next_node_to_alloc(next_node, mask)) || 1);     \
                nr_nodes--)
 
 #define for_each_node_mask_to_free(hs, nr_nodes, node, mask)           \
@@ -2163,9 +2164,9 @@ static bool prep_compound_gigantic_folio_for_demote(struct folio *folio,
  * transparent huge pages.  See the PageTransHuge() documentation for more
  * details.
  */
-int PageHuge(struct page *page)
+int PageHuge(const struct page *page)
 {
-       struct folio *folio;
+       const struct folio *folio;
 
        if (!PageCompound(page))
                return 0;
@@ -2350,12 +2351,13 @@ static void prep_and_add_allocated_folios(struct hstate *h,
  */
 static struct folio *alloc_pool_huge_folio(struct hstate *h,
                                        nodemask_t *nodes_allowed,
-                                       nodemask_t *node_alloc_noretry)
+                                       nodemask_t *node_alloc_noretry,
+                                       int *next_node)
 {
        gfp_t gfp_mask = htlb_alloc_mask(h) | __GFP_THISNODE;
        int nr_nodes, node;
 
-       for_each_node_mask_to_alloc(h, nr_nodes, node, nodes_allowed) {
+       for_each_node_mask_to_alloc(next_node, nr_nodes, node, nodes_allowed) {
                struct folio *folio;
 
                folio = only_alloc_fresh_hugetlb_folio(h, gfp_mask, node,
@@ -3029,21 +3031,9 @@ static int alloc_and_dissolve_hugetlb_folio(struct hstate *h,
 {
        gfp_t gfp_mask = htlb_alloc_mask(h) | __GFP_THISNODE;
        int nid = folio_nid(old_folio);
-       struct folio *new_folio;
+       struct folio *new_folio = NULL;
        int ret = 0;
 
-       /*
-        * Before dissolving the folio, we need to allocate a new one for the
-        * pool to remain stable.  Here, we allocate the folio and 'prep' it
-        * by doing everything but actually updating counters and adding to
-        * the pool.  This simplifies and let us do most of the processing
-        * under the lock.
-        */
-       new_folio = alloc_buddy_hugetlb_folio(h, gfp_mask, nid, NULL, NULL);
-       if (!new_folio)
-               return -ENOMEM;
-       __prep_new_hugetlb_folio(h, new_folio);
-
 retry:
        spin_lock_irq(&hugetlb_lock);
        if (!folio_test_hugetlb(old_folio)) {
@@ -3073,6 +3063,16 @@ retry:
                cond_resched();
                goto retry;
        } else {
+               if (!new_folio) {
+                       spin_unlock_irq(&hugetlb_lock);
+                       new_folio = alloc_buddy_hugetlb_folio(h, gfp_mask, nid,
+                                                             NULL, NULL);
+                       if (!new_folio)
+                               return -ENOMEM;
+                       __prep_new_hugetlb_folio(h, new_folio);
+                       goto retry;
+               }
+
                /*
                 * Ok, old_folio is still a genuine free hugepage. Remove it from
                 * the freelist and decrease the counters. These will be
@@ -3100,9 +3100,11 @@ retry:
 
 free_new:
        spin_unlock_irq(&hugetlb_lock);
-       /* Folio has a zero ref count, but needs a ref to be freed */
-       folio_ref_unfreeze(new_folio, 1);
-       update_and_free_hugetlb_folio(h, new_folio, false);
+       if (new_folio) {
+               /* Folio has a zero ref count, but needs a ref to be freed */
+               folio_ref_unfreeze(new_folio, 1);
+               update_and_free_hugetlb_folio(h, new_folio, false);
+       }
 
        return ret;
 }
@@ -3299,7 +3301,7 @@ int alloc_bootmem_huge_page(struct hstate *h, int nid)
 int __alloc_bootmem_huge_page(struct hstate *h, int nid)
 {
        struct huge_bootmem_page *m = NULL; /* initialize for clang */
-       int nr_nodes, node;
+       int nr_nodes, node = nid;
 
        /* do node specific alloc */
        if (nid != NUMA_NO_NODE) {
@@ -3310,7 +3312,7 @@ int __alloc_bootmem_huge_page(struct hstate *h, int nid)
                goto found;
        }
        /* allocate from next node when distributing huge pages */
-       for_each_node_mask_to_alloc(h, nr_nodes, node, &node_states[N_MEMORY]) {
+       for_each_node_mask_to_alloc(&h->next_nid_to_alloc, nr_nodes, node, &node_states[N_MEMORY]) {
                m = memblock_alloc_try_nid_raw(
                                huge_page_size(h), huge_page_size(h),
                                0, MEMBLOCK_ALLOC_ACCESSIBLE, node);
@@ -3337,7 +3339,7 @@ found:
                huge_page_size(h) - PAGE_SIZE);
        /* Put them into a private list first because mem_map is not up yet */
        INIT_LIST_HEAD(&m->list);
-       list_add(&m->list, &huge_boot_pages);
+       list_add(&m->list, &huge_boot_pages[node]);
        m->hstate = h;
        return 1;
 }
@@ -3388,8 +3390,6 @@ static void __init prep_and_add_bootmem_folios(struct hstate *h,
        /* Send list for bulk vmemmap optimization processing */
        hugetlb_vmemmap_optimize_folios(h, folio_list);
 
-       /* Add all new pool pages to free lists in one lock cycle */
-       spin_lock_irqsave(&hugetlb_lock, flags);
        list_for_each_entry_safe(folio, tmp_f, folio_list, lru) {
                if (!folio_test_hugetlb_vmemmap_optimized(folio)) {
                        /*
@@ -3402,23 +3402,25 @@ static void __init prep_and_add_bootmem_folios(struct hstate *h,
                                        HUGETLB_VMEMMAP_RESERVE_PAGES,
                                        pages_per_huge_page(h));
                }
+               /* Subdivide locks to achieve better parallel performance */
+               spin_lock_irqsave(&hugetlb_lock, flags);
                __prep_account_new_huge_page(h, folio_nid(folio));
                enqueue_hugetlb_folio(h, folio);
+               spin_unlock_irqrestore(&hugetlb_lock, flags);
        }
-       spin_unlock_irqrestore(&hugetlb_lock, flags);
 }
 
 /*
  * Put bootmem huge pages into the standard lists after mem_map is up.
  * Note: This only applies to gigantic (order > MAX_PAGE_ORDER) pages.
  */
-static void __init gather_bootmem_prealloc(void)
+static void __init gather_bootmem_prealloc_node(unsigned long nid)
 {
        LIST_HEAD(folio_list);
        struct huge_bootmem_page *m;
        struct hstate *h = NULL, *prev_h = NULL;
 
-       list_for_each_entry(m, &huge_boot_pages, list) {
+       list_for_each_entry(m, &huge_boot_pages[nid], list) {
                struct page *page = virt_to_page(m);
                struct folio *folio = (void *)page;
 
@@ -3451,6 +3453,31 @@ static void __init gather_bootmem_prealloc(void)
        prep_and_add_bootmem_folios(h, &folio_list);
 }
 
+static void __init gather_bootmem_prealloc_parallel(unsigned long start,
+                                                   unsigned long end, void *arg)
+{
+       int nid;
+
+       for (nid = start; nid < end; nid++)
+               gather_bootmem_prealloc_node(nid);
+}
+
+static void __init gather_bootmem_prealloc(void)
+{
+       struct padata_mt_job job = {
+               .thread_fn      = gather_bootmem_prealloc_parallel,
+               .fn_arg         = NULL,
+               .start          = 0,
+               .size           = num_node_state(N_MEMORY),
+               .align          = 1,
+               .min_chunk      = 1,
+               .max_threads    = num_node_state(N_MEMORY),
+               .numa_aware     = true,
+       };
+
+       padata_do_multithreaded(&job);
+}
+
 static void __init hugetlb_hstate_alloc_pages_onenode(struct hstate *h, int nid)
 {
        unsigned long i;
@@ -3482,6 +3509,108 @@ static void __init hugetlb_hstate_alloc_pages_onenode(struct hstate *h, int nid)
        h->max_huge_pages_node[nid] = i;
 }
 
+static bool __init hugetlb_hstate_alloc_pages_specific_nodes(struct hstate *h)
+{
+       int i;
+       bool node_specific_alloc = false;
+
+       for_each_online_node(i) {
+               if (h->max_huge_pages_node[i] > 0) {
+                       hugetlb_hstate_alloc_pages_onenode(h, i);
+                       node_specific_alloc = true;
+               }
+       }
+
+       return node_specific_alloc;
+}
+
+static void __init hugetlb_hstate_alloc_pages_errcheck(unsigned long allocated, struct hstate *h)
+{
+       if (allocated < h->max_huge_pages) {
+               char buf[32];
+
+               string_get_size(huge_page_size(h), 1, STRING_UNITS_2, buf, 32);
+               pr_warn("HugeTLB: allocating %lu of page size %s failed.  Only allocated %lu hugepages.\n",
+                       h->max_huge_pages, buf, allocated);
+               h->max_huge_pages = allocated;
+       }
+}
+
+static void __init hugetlb_pages_alloc_boot_node(unsigned long start, unsigned long end, void *arg)
+{
+       struct hstate *h = (struct hstate *)arg;
+       int i, num = end - start;
+       nodemask_t node_alloc_noretry;
+       LIST_HEAD(folio_list);
+       int next_node = first_online_node;
+
+       /* Bit mask controlling how hard we retry per-node allocations.*/
+       nodes_clear(node_alloc_noretry);
+
+       for (i = 0; i < num; ++i) {
+               struct folio *folio = alloc_pool_huge_folio(h, &node_states[N_MEMORY],
+                                               &node_alloc_noretry, &next_node);
+               if (!folio)
+                       break;
+
+               list_move(&folio->lru, &folio_list);
+               cond_resched();
+       }
+
+       prep_and_add_allocated_folios(h, &folio_list);
+}
+
+static unsigned long __init hugetlb_gigantic_pages_alloc_boot(struct hstate *h)
+{
+       unsigned long i;
+
+       for (i = 0; i < h->max_huge_pages; ++i) {
+               if (!alloc_bootmem_huge_page(h, NUMA_NO_NODE))
+                       break;
+               cond_resched();
+       }
+
+       return i;
+}
+
+static unsigned long __init hugetlb_pages_alloc_boot(struct hstate *h)
+{
+       struct padata_mt_job job = {
+               .fn_arg         = h,
+               .align          = 1,
+               .numa_aware     = true
+       };
+
+       job.thread_fn   = hugetlb_pages_alloc_boot_node;
+       job.start       = 0;
+       job.size        = h->max_huge_pages;
+
+       /*
+        * job.max_threads is twice the num_node_state(N_MEMORY),
+        *
+        * Tests below indicate that a multiplier of 2 significantly improves
+        * performance, and although larger values also provide improvements,
+        * the gains are marginal.
+        *
+        * Therefore, choosing 2 as the multiplier strikes a good balance between
+        * enhancing parallel processing capabilities and maintaining efficient
+        * resource management.
+        *
+        * +------------+-------+-------+-------+-------+-------+
+        * | multiplier |   1   |   2   |   3   |   4   |   5   |
+        * +------------+-------+-------+-------+-------+-------+
+        * | 256G 2node | 358ms | 215ms | 157ms | 134ms | 126ms |
+        * | 2T   4node | 979ms | 679ms | 543ms | 489ms | 481ms |
+        * | 50G  2node | 71ms  | 44ms  | 37ms  | 30ms  | 31ms  |
+        * +------------+-------+-------+-------+-------+-------+
+        */
+       job.max_threads = num_node_state(N_MEMORY) * 2;
+       job.min_chunk   = h->max_huge_pages / num_node_state(N_MEMORY) / 2;
+       padata_do_multithreaded(&job);
+
+       return h->nr_huge_pages;
+}
+
 /*
  * NOTE: this routine is called in different contexts for gigantic and
  * non-gigantic pages.
@@ -3495,11 +3624,8 @@ static void __init hugetlb_hstate_alloc_pages_onenode(struct hstate *h, int nid)
  */
 static void __init hugetlb_hstate_alloc_pages(struct hstate *h)
 {
-       unsigned long i;
-       struct folio *folio;
-       LIST_HEAD(folio_list);
-       nodemask_t *node_alloc_noretry;
-       bool node_specific_alloc = false;
+       unsigned long allocated;
+       static bool initialized __initdata;
 
        /* skip gigantic hugepages allocation if hugetlb_cma enabled */
        if (hstate_is_gigantic(h) && hugetlb_cma_size) {
@@ -3507,66 +3633,26 @@ static void __init hugetlb_hstate_alloc_pages(struct hstate *h)
                return;
        }
 
-       /* do node specific alloc */
-       for_each_online_node(i) {
-               if (h->max_huge_pages_node[i] > 0) {
-                       hugetlb_hstate_alloc_pages_onenode(h, i);
-                       node_specific_alloc = true;
-               }
+       /* hugetlb_hstate_alloc_pages will be called many times, initialize huge_boot_pages once */
+       if (!initialized) {
+               int i = 0;
+
+               for (i = 0; i < MAX_NUMNODES; i++)
+                       INIT_LIST_HEAD(&huge_boot_pages[i]);
+               initialized = true;
        }
 
-       if (node_specific_alloc)
+       /* do node specific alloc */
+       if (hugetlb_hstate_alloc_pages_specific_nodes(h))
                return;
 
        /* below will do all node balanced alloc */
-       if (!hstate_is_gigantic(h)) {
-               /*
-                * Bit mask controlling how hard we retry per-node allocations.
-                * Ignore errors as lower level routines can deal with
-                * node_alloc_noretry == NULL.  If this kmalloc fails at boot
-                * time, we are likely in bigger trouble.
-                */
-               node_alloc_noretry = kmalloc(sizeof(*node_alloc_noretry),
-                                               GFP_KERNEL);
-       } else {
-               /* allocations done at boot time */
-               node_alloc_noretry = NULL;
-       }
-
-       /* bit mask controlling how hard we retry per-node allocations */
-       if (node_alloc_noretry)
-               nodes_clear(*node_alloc_noretry);
-
-       for (i = 0; i < h->max_huge_pages; ++i) {
-               if (hstate_is_gigantic(h)) {
-                       /*
-                        * gigantic pages not added to list as they are not
-                        * added to pools now.
-                        */
-                       if (!alloc_bootmem_huge_page(h, NUMA_NO_NODE))
-                               break;
-               } else {
-                       folio = alloc_pool_huge_folio(h, &node_states[N_MEMORY],
-                                                       node_alloc_noretry);
-                       if (!folio)
-                               break;
-                       list_add(&folio->lru, &folio_list);
-               }
-               cond_resched();
-       }
-
-       /* list will be empty if hstate_is_gigantic */
-       prep_and_add_allocated_folios(h, &folio_list);
-
-       if (i < h->max_huge_pages) {
-               char buf[32];
+       if (hstate_is_gigantic(h))
+               allocated = hugetlb_gigantic_pages_alloc_boot(h);
+       else
+               allocated = hugetlb_pages_alloc_boot(h);
 
-               string_get_size(huge_page_size(h), 1, STRING_UNITS_2, buf, 32);
-               pr_warn("HugeTLB: allocating %lu of page size %s failed.  Only allocated %lu hugepages.\n",
-                       h->max_huge_pages, buf, i);
-               h->max_huge_pages = i;
-       }
-       kfree(node_alloc_noretry);
+       hugetlb_hstate_alloc_pages_errcheck(allocated, h);
 }
 
 static void __init hugetlb_init_hstates(void)
@@ -3668,7 +3754,7 @@ static int adjust_pool_surplus(struct hstate *h, nodemask_t *nodes_allowed,
        VM_BUG_ON(delta != -1 && delta != 1);
 
        if (delta < 0) {
-               for_each_node_mask_to_alloc(h, nr_nodes, node, nodes_allowed) {
+               for_each_node_mask_to_alloc(&h->next_nid_to_alloc, nr_nodes, node, nodes_allowed) {
                        if (h->surplus_huge_pages_node[node])
                                goto found;
                }
@@ -3783,7 +3869,8 @@ static int set_max_huge_pages(struct hstate *h, unsigned long count, int nid,
                cond_resched();
 
                folio = alloc_pool_huge_folio(h, nodes_allowed,
-                                               node_alloc_noretry);
+                                               node_alloc_noretry,
+                                               &h->next_nid_to_alloc);
                if (!folio) {
                        prep_and_add_allocated_folios(h, &page_list);
                        spin_lock_irq(&hugetlb_lock);
@@ -5585,6 +5672,7 @@ void __unmap_hugepage_range(struct mmu_gather *tlb, struct vm_area_struct *vma,
        struct page *page;
        struct hstate *h = hstate_vma(vma);
        unsigned long sz = huge_page_size(h);
+       bool adjust_reservation = false;
        unsigned long last_addr_mask;
        bool force_flush = false;
 
@@ -5677,7 +5765,31 @@ void __unmap_hugepage_range(struct mmu_gather *tlb, struct vm_area_struct *vma,
                hugetlb_count_sub(pages_per_huge_page(h), mm);
                hugetlb_remove_rmap(page_folio(page));
 
+               /*
+                * Restore the reservation for anonymous page, otherwise the
+                * backing page could be stolen by someone.
+                * If there we are freeing a surplus, do not set the restore
+                * reservation bit.
+                */
+               if (!h->surplus_huge_pages && __vma_private_lock(vma) &&
+                   folio_test_anon(page_folio(page))) {
+                       folio_set_hugetlb_restore_reserve(page_folio(page));
+                       /* Reservation to be adjusted after the spin lock */
+                       adjust_reservation = true;
+               }
+
                spin_unlock(ptl);
+
+               /*
+                * Adjust the reservation for the region that will have the
+                * reserve restored. Keep in mind that vma_needs_reservation() changes
+                * resv->adds_in_progress if it succeeds. If this is not done,
+                * do_exit() will not see it, and will keep the reservation
+                * forever.
+                */
+               if (adjust_reservation && vma_needs_reservation(h, vma, address))
+                       vma_add_reservation(h, vma, address);
+
                tlb_remove_page_size(tlb, page, huge_page_size(h));
                /*
                 * Bail out after unmapping reference page if supplied
@@ -5826,7 +5938,8 @@ static void unmap_ref_private(struct mm_struct *mm, struct vm_area_struct *vma,
  */
 static vm_fault_t hugetlb_wp(struct mm_struct *mm, struct vm_area_struct *vma,
                       unsigned long address, pte_t *ptep, unsigned int flags,
-                      struct folio *pagecache_folio, spinlock_t *ptl)
+                      struct folio *pagecache_folio, spinlock_t *ptl,
+                      struct vm_fault *vmf)
 {
        const bool unshare = flags & FAULT_FLAG_UNSHARE;
        pte_t pte = huge_ptep_get(ptep);
@@ -5960,10 +6073,9 @@ retry_avoidcopy:
         * When the original hugepage is shared one, it does not have
         * anon_vma prepared.
         */
-       if (unlikely(anon_vma_prepare(vma))) {
-               ret = VM_FAULT_OOM;
+       ret = vmf_anon_prepare(vmf);
+       if (unlikely(ret))
                goto out_release_all;
-       }
 
        if (copy_user_large_folio(new_folio, old_folio, address, vma)) {
                ret = VM_FAULT_HWPOISON_LARGE;
@@ -6060,39 +6172,21 @@ int hugetlb_add_to_page_cache(struct folio *folio, struct address_space *mapping
        return 0;
 }
 
-static inline vm_fault_t hugetlb_handle_userfault(struct vm_area_struct *vma,
+static inline vm_fault_t hugetlb_handle_userfault(struct vm_fault *vmf,
                                                  struct address_space *mapping,
-                                                 pgoff_t idx,
-                                                 unsigned int flags,
-                                                 unsigned long haddr,
-                                                 unsigned long addr,
                                                  unsigned long reason)
 {
        u32 hash;
-       struct vm_fault vmf = {
-               .vma = vma,
-               .address = haddr,
-               .real_address = addr,
-               .flags = flags,
-
-               /*
-                * Hard to debug if it ends up being
-                * used by a callee that assumes
-                * something about the other
-                * uninitialized fields... same as in
-                * memory.c
-                */
-       };
 
        /*
         * vma_lock and hugetlb_fault_mutex must be dropped before handling
         * userfault. Also mmap_lock could be dropped due to handling
         * userfault, any vma operation should be careful from here.
         */
-       hugetlb_vma_unlock_read(vma);
-       hash = hugetlb_fault_mutex_hash(mapping, idx);
+       hugetlb_vma_unlock_read(vmf->vma);
+       hash = hugetlb_fault_mutex_hash(mapping, vmf->pgoff);
        mutex_unlock(&hugetlb_fault_mutex_table[hash]);
-       return handle_userfault(&vmf, reason);
+       return handle_userfault(vmf, reason);
 }
 
 /*
@@ -6116,7 +6210,8 @@ static vm_fault_t hugetlb_no_page(struct mm_struct *mm,
                        struct vm_area_struct *vma,
                        struct address_space *mapping, pgoff_t idx,
                        unsigned long address, pte_t *ptep,
-                       pte_t old_pte, unsigned int flags)
+                       pte_t old_pte, unsigned int flags,
+                       struct vm_fault *vmf)
 {
        struct hstate *h = hstate_vma(vma);
        vm_fault_t ret = VM_FAULT_SIGBUS;
@@ -6175,8 +6270,7 @@ static vm_fault_t hugetlb_no_page(struct mm_struct *mm,
                                goto out;
                        }
 
-                       return hugetlb_handle_userfault(vma, mapping, idx, flags,
-                                                       haddr, address,
+                       return hugetlb_handle_userfault(vmf, mapping,
                                                        VM_UFFD_MISSING);
                }
 
@@ -6221,10 +6315,10 @@ static vm_fault_t hugetlb_no_page(struct mm_struct *mm,
                        new_pagecache_folio = true;
                } else {
                        folio_lock(folio);
-                       if (unlikely(anon_vma_prepare(vma))) {
-                               ret = VM_FAULT_OOM;
+
+                       ret = vmf_anon_prepare(vmf);
+                       if (unlikely(ret))
                                goto backout_unlocked;
-                       }
                        anon_rmap = 1;
                }
        } else {
@@ -6248,8 +6342,7 @@ static vm_fault_t hugetlb_no_page(struct mm_struct *mm,
                                ret = 0;
                                goto out;
                        }
-                       return hugetlb_handle_userfault(vma, mapping, idx, flags,
-                                                       haddr, address,
+                       return hugetlb_handle_userfault(vmf, mapping,
                                                        VM_UFFD_MINOR);
                }
        }
@@ -6292,7 +6385,7 @@ static vm_fault_t hugetlb_no_page(struct mm_struct *mm,
        hugetlb_count_add(pages_per_huge_page(h), mm);
        if ((flags & FAULT_FLAG_WRITE) && !(vma->vm_flags & VM_SHARED)) {
                /* Optimization, do the COW without a second fault */
-               ret = hugetlb_wp(mm, vma, address, ptep, flags, folio, ptl);
+               ret = hugetlb_wp(mm, vma, address, ptep, flags, folio, ptl, vmf);
        }
 
        spin_unlock(ptl);
@@ -6353,19 +6446,25 @@ vm_fault_t hugetlb_fault(struct mm_struct *mm, struct vm_area_struct *vma,
        spinlock_t *ptl;
        vm_fault_t ret;
        u32 hash;
-       pgoff_t idx;
        struct folio *folio = NULL;
        struct folio *pagecache_folio = NULL;
        struct hstate *h = hstate_vma(vma);
        struct address_space *mapping;
        int need_wait_lock = 0;
        unsigned long haddr = address & huge_page_mask(h);
+       struct vm_fault vmf = {
+               .vma = vma,
+               .address = haddr,
+               .real_address = address,
+               .flags = flags,
+               .pgoff = vma_hugecache_offset(h, vma, haddr),
+               /* TODO: Track hugetlb faults using vm_fault */
 
-       /* TODO: Handle faults under the VMA lock */
-       if (flags & FAULT_FLAG_VMA_LOCK) {
-               vma_end_read(vma);
-               return VM_FAULT_RETRY;
-       }
+               /*
+                * Some fields may not be initialized, be careful as it may
+                * be hard to debug if called functions make assumptions
+                */
+       };
 
        /*
         * Serialize hugepage allocation and instantiation, so that we don't
@@ -6373,8 +6472,7 @@ vm_fault_t hugetlb_fault(struct mm_struct *mm, struct vm_area_struct *vma,
         * the same page in the page cache.
         */
        mapping = vma->vm_file->f_mapping;
-       idx = vma_hugecache_offset(h, vma, haddr);
-       hash = hugetlb_fault_mutex_hash(mapping, idx);
+       hash = hugetlb_fault_mutex_hash(mapping, vmf.pgoff);
        mutex_lock(&hugetlb_fault_mutex_table[hash]);
 
        /*
@@ -6408,8 +6506,8 @@ vm_fault_t hugetlb_fault(struct mm_struct *mm, struct vm_area_struct *vma,
                 * hugetlb_no_page will drop vma lock and hugetlb fault
                 * mutex internally, which make us return immediately.
                 */
-               return hugetlb_no_page(mm, vma, mapping, idx, address, ptep,
-                                     entry, flags);
+               return hugetlb_no_page(mm, vma, mapping, vmf.pgoff, address,
+                                       ptep, entry, flags, &vmf);
        }
 
        ret = 0;
@@ -6455,7 +6553,8 @@ vm_fault_t hugetlb_fault(struct mm_struct *mm, struct vm_area_struct *vma,
                /* Just decrements count, does not deallocate */
                vma_end_reservation(h, vma, haddr);
 
-               pagecache_folio = filemap_lock_hugetlb_folio(h, mapping, idx);
+               pagecache_folio = filemap_lock_hugetlb_folio(h, mapping,
+                                                            vmf.pgoff);
                if (IS_ERR(pagecache_folio))
                        pagecache_folio = NULL;
        }
@@ -6470,13 +6569,6 @@ vm_fault_t hugetlb_fault(struct mm_struct *mm, struct vm_area_struct *vma,
        if (userfaultfd_wp(vma) && huge_pte_uffd_wp(huge_ptep_get(ptep)) &&
            (flags & FAULT_FLAG_WRITE) && !huge_pte_write(entry)) {
                if (!userfaultfd_wp_async(vma)) {
-                       struct vm_fault vmf = {
-                               .vma = vma,
-                               .address = haddr,
-                               .real_address = address,
-                               .flags = flags,
-                       };
-
                        spin_unlock(ptl);
                        if (pagecache_folio) {
                                folio_unlock(pagecache_folio);
@@ -6510,7 +6602,7 @@ vm_fault_t hugetlb_fault(struct mm_struct *mm, struct vm_area_struct *vma,
        if (flags & (FAULT_FLAG_WRITE|FAULT_FLAG_UNSHARE)) {
                if (!huge_pte_write(entry)) {
                        ret = hugetlb_wp(mm, vma, address, ptep, flags,
-                                        pagecache_folio, ptl);
+                                        pagecache_folio, ptl, &vmf);
                        goto out_put_page;
                } else if (likely(flags & FAULT_FLAG_WRITE)) {
                        entry = huge_pte_mkdirty(entry);
@@ -6688,11 +6780,20 @@ int hugetlb_mfill_atomic_pte(pte_t *dst_pte,
        }
 
        /*
-        * The memory barrier inside __folio_mark_uptodate makes sure that
-        * preceding stores to the page contents become visible before
-        * the set_pte_at() write.
+        * If we just allocated a new page, we need a memory barrier to ensure
+        * that preceding stores to the page become visible before the
+        * set_pte_at() write. The memory barrier inside __folio_mark_uptodate
+        * is what we need.
+        *
+        * In the case where we have not allocated a new page (is_continue),
+        * the page must already be uptodate. UFFDIO_CONTINUE already includes
+        * an earlier smp_wmb() to ensure that prior stores will be visible
+        * before the set_pte_at() write.
         */
-       __folio_mark_uptodate(folio);
+       if (!is_continue)
+               __folio_mark_uptodate(folio);
+       else
+               WARN_ON_ONCE(!folio_test_uptodate(folio));
 
        /* Add shared, newly allocated pages to the page cache. */
        if (vm_shared && !is_continue) {
@@ -7695,6 +7796,13 @@ void __init hugetlb_cma_reserve(int order)
        bool node_specific_cma_alloc = false;
        int nid;
 
+       /*
+        * HugeTLB CMA reservation is required for gigantic
+        * huge pages which could not be allocated via the
+        * page allocator. Just warn if there is any change
+        * breaking this assumption.
+        */
+       VM_WARN_ON(order <= MAX_PAGE_ORDER);
        cma_reserve_called = true;
 
        if (!hugetlb_cma_size)
index 4398f572485f00596bbd5af271479ff683b547d9..7e486f2c502cee245991e2468a0655228a81aef5 100644 (file)
@@ -83,6 +83,99 @@ static inline void *folio_raw_mapping(struct folio *folio)
        return (void *)(mapping & ~PAGE_MAPPING_FLAGS);
 }
 
+#ifdef CONFIG_MMU
+
+/* Flags for folio_pte_batch(). */
+typedef int __bitwise fpb_t;
+
+/* Compare PTEs after pte_mkclean(), ignoring the dirty bit. */
+#define FPB_IGNORE_DIRTY               ((__force fpb_t)BIT(0))
+
+/* Compare PTEs after pte_clear_soft_dirty(), ignoring the soft-dirty bit. */
+#define FPB_IGNORE_SOFT_DIRTY          ((__force fpb_t)BIT(1))
+
+static inline pte_t __pte_batch_clear_ignored(pte_t pte, fpb_t flags)
+{
+       if (flags & FPB_IGNORE_DIRTY)
+               pte = pte_mkclean(pte);
+       if (likely(flags & FPB_IGNORE_SOFT_DIRTY))
+               pte = pte_clear_soft_dirty(pte);
+       return pte_wrprotect(pte_mkold(pte));
+}
+
+/**
+ * folio_pte_batch - detect a PTE batch for a large folio
+ * @folio: The large folio to detect a PTE batch for.
+ * @addr: The user virtual address the first page is mapped at.
+ * @start_ptep: Page table pointer for the first entry.
+ * @pte: Page table entry for the first page.
+ * @max_nr: The maximum number of table entries to consider.
+ * @flags: Flags to modify the PTE batch semantics.
+ * @any_writable: Optional pointer to indicate whether any entry except the
+ *               first one is writable.
+ *
+ * Detect a PTE batch: consecutive (present) PTEs that map consecutive
+ * pages of the same large folio.
+ *
+ * All PTEs inside a PTE batch have the same PTE bits set, excluding the PFN,
+ * the accessed bit, writable bit, dirty bit (with FPB_IGNORE_DIRTY) and
+ * soft-dirty bit (with FPB_IGNORE_SOFT_DIRTY).
+ *
+ * start_ptep must map any page of the folio. max_nr must be at least one and
+ * must be limited by the caller so scanning cannot exceed a single page table.
+ *
+ * Return: the number of table entries in the batch.
+ */
+static inline int folio_pte_batch(struct folio *folio, unsigned long addr,
+               pte_t *start_ptep, pte_t pte, int max_nr, fpb_t flags,
+               bool *any_writable)
+{
+       unsigned long folio_end_pfn = folio_pfn(folio) + folio_nr_pages(folio);
+       const pte_t *end_ptep = start_ptep + max_nr;
+       pte_t expected_pte, *ptep;
+       bool writable;
+       int nr;
+
+       if (any_writable)
+               *any_writable = false;
+
+       VM_WARN_ON_FOLIO(!pte_present(pte), folio);
+       VM_WARN_ON_FOLIO(!folio_test_large(folio) || max_nr < 1, folio);
+       VM_WARN_ON_FOLIO(page_folio(pfn_to_page(pte_pfn(pte))) != folio, folio);
+
+       nr = pte_batch_hint(start_ptep, pte);
+       expected_pte = __pte_batch_clear_ignored(pte_advance_pfn(pte, nr), flags);
+       ptep = start_ptep + nr;
+
+       while (ptep < end_ptep) {
+               pte = ptep_get(ptep);
+               if (any_writable)
+                       writable = !!pte_write(pte);
+               pte = __pte_batch_clear_ignored(pte, flags);
+
+               if (!pte_same(pte, expected_pte))
+                       break;
+
+               /*
+                * Stop immediately once we reached the end of the folio. In
+                * corner cases the next PFN might fall into a different
+                * folio.
+                */
+               if (pte_pfn(pte) >= folio_end_pfn)
+                       break;
+
+               if (any_writable)
+                       *any_writable |= writable;
+
+               nr = pte_batch_hint(ptep, pte);
+               expected_pte = pte_advance_pfn(expected_pte, nr);
+               ptep += nr;
+       }
+
+       return min(ptep - start_ptep, max_nr);
+}
+#endif /* CONFIG_MMU */
+
 void __acct_reclaim_writeback(pg_data_t *pgdat, struct folio *folio,
                                                int nr_throttled);
 static inline void acct_reclaim_writeback(struct folio *folio)
@@ -103,6 +196,7 @@ static inline void wake_throttle_isolated(pg_data_t *pgdat)
                wake_up(wqh);
 }
 
+vm_fault_t vmf_anon_prepare(struct vm_fault *vmf);
 vm_fault_t do_swap_page(struct vm_fault *vmf);
 void folio_rotate_reclaimable(struct folio *folio);
 bool __folio_end_writeback(struct folio *folio);
@@ -419,8 +513,7 @@ static inline struct folio *page_rmappable_folio(struct page *page)
 {
        struct folio *folio = (struct folio *)page;
 
-       if (folio && folio_order(folio) > 1)
-               folio_prep_large_rmappable(folio);
+       folio_prep_large_rmappable(folio);
        return folio;
 }
 
@@ -447,10 +540,12 @@ extern void prep_compound_page(struct page *page, unsigned int order);
 
 extern void post_alloc_hook(struct page *page, unsigned int order,
                                        gfp_t gfp_flags);
+extern bool free_pages_prepare(struct page *page, unsigned int order);
+
 extern int user_min_free_kbytes;
 
-extern void free_unref_page(struct page *page, unsigned int order);
-extern void free_unref_page_list(struct list_head *list);
+void free_unref_page(struct page *page, unsigned int order);
+void free_unref_folios(struct folio_batch *fbatch);
 
 extern void zone_pcp_reset(struct zone *zone);
 extern void zone_pcp_disable(struct zone *zone);
@@ -481,7 +576,7 @@ int split_free_page(struct page *free_page,
  * completes when free_pfn <= migrate_pfn
  */
 struct compact_control {
-       struct list_head freepages;     /* List of free pages to migrate to */
+       struct list_head freepages[NR_PAGE_ORDERS];     /* List of free pages to migrate to */
        struct list_head migratepages;  /* List of pages being migrated */
        unsigned int nr_freepages;      /* Number of isolated free pages */
        unsigned int nr_migratepages;   /* Number of pages to migrate */
@@ -537,7 +632,8 @@ isolate_migratepages_range(struct compact_control *cc,
                           unsigned long low_pfn, unsigned long end_pfn);
 
 int __alloc_contig_migrate_range(struct compact_control *cc,
-                                       unsigned long start, unsigned long end);
+                                       unsigned long start, unsigned long end,
+                                       int migratetype);
 
 /* Free whole pageblock and set its migration type to MIGRATE_CMA. */
 void init_cma_reserved_pageblock(struct page *page);
@@ -866,7 +962,7 @@ extern unsigned long  __must_check vm_mmap_pgoff(struct file *, unsigned long,
         unsigned long, unsigned long);
 
 extern void set_pageblock_order(void);
-unsigned long reclaim_pages(struct list_head *folio_list);
+unsigned long reclaim_pages(struct list_head *folio_list, bool ignore_references);
 unsigned int reclaim_clean_pages_from_list(struct zone *zone,
                                            struct list_head *folio_list);
 /* The ALLOC_WMARK bits are used as an index to zone->watermark */
@@ -1114,6 +1210,15 @@ static inline bool gup_must_unshare(struct vm_area_struct *vma,
 extern bool mirrored_kernelcore;
 extern bool memblock_has_mirror(void);
 
+static __always_inline void vma_set_range(struct vm_area_struct *vma,
+                                         unsigned long start, unsigned long end,
+                                         pgoff_t pgoff)
+{
+       vma->vm_start = start;
+       vma->vm_end = end;
+       vma->vm_pgoff = pgoff;
+}
+
 static inline bool vma_soft_dirty_enabled(struct vm_area_struct *vma)
 {
        /*
index 6ca63e8dda741b5e4094f7205f0b74a163be2e43..e7c9a4dc89f826943a37dd39ce876776be55b23d 100644 (file)
@@ -55,7 +55,7 @@ void kasan_set_track(struct kasan_track *track, depot_stack_handle_t stack)
        u64 ts_nsec = local_clock();
 
        track->cpu = cpu;
-       track->timestamp = ts_nsec >> 3;
+       track->timestamp = ts_nsec >> 9;
 #endif /* CONFIG_KASAN_EXTRA_INFO */
        track->pid = current->pid;
        track->stack = stack;
index 971cfff4ca0b78f259d26acd68801bed7e6e4e4e..7b32be2a3cf0e89353b95e463a65e6c1f9d5d81f 100644 (file)
@@ -440,7 +440,8 @@ static void kmalloc_oob_16(struct kunit *test)
        /* This test is specifically crafted for the generic mode. */
        KASAN_TEST_NEEDS_CONFIG_ON(test, CONFIG_KASAN_GENERIC);
 
-       ptr1 = kmalloc(sizeof(*ptr1) - 3, GFP_KERNEL);
+       /* RELOC_HIDE to prevent gcc from warning about short alloc */
+       ptr1 = RELOC_HIDE(kmalloc(sizeof(*ptr1) - 3, GFP_KERNEL), 0);
        KUNIT_ASSERT_NOT_ERR_OR_NULL(test, ptr1);
 
        ptr2 = kmalloc(sizeof(*ptr2), GFP_KERNEL);
@@ -697,6 +698,84 @@ static void kmalloc_uaf3(struct kunit *test)
        KUNIT_EXPECT_KASAN_FAIL(test, ((volatile char *)ptr1)[8]);
 }
 
+static void kasan_atomics_helper(struct kunit *test, void *unsafe, void *safe)
+{
+       int *i_unsafe = unsafe;
+
+       KUNIT_EXPECT_KASAN_FAIL(test, READ_ONCE(*i_unsafe));
+       KUNIT_EXPECT_KASAN_FAIL(test, WRITE_ONCE(*i_unsafe, 42));
+       KUNIT_EXPECT_KASAN_FAIL(test, smp_load_acquire(i_unsafe));
+       KUNIT_EXPECT_KASAN_FAIL(test, smp_store_release(i_unsafe, 42));
+
+       KUNIT_EXPECT_KASAN_FAIL(test, atomic_read(unsafe));
+       KUNIT_EXPECT_KASAN_FAIL(test, atomic_set(unsafe, 42));
+       KUNIT_EXPECT_KASAN_FAIL(test, atomic_add(42, unsafe));
+       KUNIT_EXPECT_KASAN_FAIL(test, atomic_sub(42, unsafe));
+       KUNIT_EXPECT_KASAN_FAIL(test, atomic_inc(unsafe));
+       KUNIT_EXPECT_KASAN_FAIL(test, atomic_dec(unsafe));
+       KUNIT_EXPECT_KASAN_FAIL(test, atomic_and(42, unsafe));
+       KUNIT_EXPECT_KASAN_FAIL(test, atomic_andnot(42, unsafe));
+       KUNIT_EXPECT_KASAN_FAIL(test, atomic_or(42, unsafe));
+       KUNIT_EXPECT_KASAN_FAIL(test, atomic_xor(42, unsafe));
+       KUNIT_EXPECT_KASAN_FAIL(test, atomic_xchg(unsafe, 42));
+       KUNIT_EXPECT_KASAN_FAIL(test, atomic_cmpxchg(unsafe, 21, 42));
+       KUNIT_EXPECT_KASAN_FAIL(test, atomic_try_cmpxchg(unsafe, safe, 42));
+       KUNIT_EXPECT_KASAN_FAIL(test, atomic_try_cmpxchg(safe, unsafe, 42));
+       KUNIT_EXPECT_KASAN_FAIL(test, atomic_sub_and_test(42, unsafe));
+       KUNIT_EXPECT_KASAN_FAIL(test, atomic_dec_and_test(unsafe));
+       KUNIT_EXPECT_KASAN_FAIL(test, atomic_inc_and_test(unsafe));
+       KUNIT_EXPECT_KASAN_FAIL(test, atomic_add_negative(42, unsafe));
+       KUNIT_EXPECT_KASAN_FAIL(test, atomic_add_unless(unsafe, 21, 42));
+       KUNIT_EXPECT_KASAN_FAIL(test, atomic_inc_not_zero(unsafe));
+       KUNIT_EXPECT_KASAN_FAIL(test, atomic_inc_unless_negative(unsafe));
+       KUNIT_EXPECT_KASAN_FAIL(test, atomic_dec_unless_positive(unsafe));
+       KUNIT_EXPECT_KASAN_FAIL(test, atomic_dec_if_positive(unsafe));
+
+       KUNIT_EXPECT_KASAN_FAIL(test, atomic_long_read(unsafe));
+       KUNIT_EXPECT_KASAN_FAIL(test, atomic_long_set(unsafe, 42));
+       KUNIT_EXPECT_KASAN_FAIL(test, atomic_long_add(42, unsafe));
+       KUNIT_EXPECT_KASAN_FAIL(test, atomic_long_sub(42, unsafe));
+       KUNIT_EXPECT_KASAN_FAIL(test, atomic_long_inc(unsafe));
+       KUNIT_EXPECT_KASAN_FAIL(test, atomic_long_dec(unsafe));
+       KUNIT_EXPECT_KASAN_FAIL(test, atomic_long_and(42, unsafe));
+       KUNIT_EXPECT_KASAN_FAIL(test, atomic_long_andnot(42, unsafe));
+       KUNIT_EXPECT_KASAN_FAIL(test, atomic_long_or(42, unsafe));
+       KUNIT_EXPECT_KASAN_FAIL(test, atomic_long_xor(42, unsafe));
+       KUNIT_EXPECT_KASAN_FAIL(test, atomic_long_xchg(unsafe, 42));
+       KUNIT_EXPECT_KASAN_FAIL(test, atomic_long_cmpxchg(unsafe, 21, 42));
+       KUNIT_EXPECT_KASAN_FAIL(test, atomic_long_try_cmpxchg(unsafe, safe, 42));
+       KUNIT_EXPECT_KASAN_FAIL(test, atomic_long_try_cmpxchg(safe, unsafe, 42));
+       KUNIT_EXPECT_KASAN_FAIL(test, atomic_long_sub_and_test(42, unsafe));
+       KUNIT_EXPECT_KASAN_FAIL(test, atomic_long_dec_and_test(unsafe));
+       KUNIT_EXPECT_KASAN_FAIL(test, atomic_long_inc_and_test(unsafe));
+       KUNIT_EXPECT_KASAN_FAIL(test, atomic_long_add_negative(42, unsafe));
+       KUNIT_EXPECT_KASAN_FAIL(test, atomic_long_add_unless(unsafe, 21, 42));
+       KUNIT_EXPECT_KASAN_FAIL(test, atomic_long_inc_not_zero(unsafe));
+       KUNIT_EXPECT_KASAN_FAIL(test, atomic_long_inc_unless_negative(unsafe));
+       KUNIT_EXPECT_KASAN_FAIL(test, atomic_long_dec_unless_positive(unsafe));
+       KUNIT_EXPECT_KASAN_FAIL(test, atomic_long_dec_if_positive(unsafe));
+}
+
+static void kasan_atomics(struct kunit *test)
+{
+       void *a1, *a2;
+
+       /*
+        * Just as with kasan_bitops_tags(), we allocate 48 bytes of memory such
+        * that the following 16 bytes will make up the redzone.
+        */
+       a1 = kzalloc(48, GFP_KERNEL);
+       KUNIT_ASSERT_NOT_ERR_OR_NULL(test, a1);
+       a2 = kzalloc(sizeof(atomic_long_t), GFP_KERNEL);
+       KUNIT_ASSERT_NOT_ERR_OR_NULL(test, a2);
+
+       /* Use atomics to access the redzone. */
+       kasan_atomics_helper(test, a1 + 48, a2);
+
+       kfree(a1);
+       kfree(a2);
+}
+
 static void kmalloc_double_kzfree(struct kunit *test)
 {
        char *ptr;
@@ -1883,6 +1962,7 @@ static struct kunit_case kasan_kunit_test_cases[] = {
        KUNIT_CASE(kasan_strings),
        KUNIT_CASE(kasan_bitops_generic),
        KUNIT_CASE(kasan_bitops_tags),
+       KUNIT_CASE(kasan_atomics),
        KUNIT_CASE(vmalloc_helpers_tags),
        KUNIT_CASE(vmalloc_oob),
        KUNIT_CASE(vmap_tags),
index 8b7b3ea2c74ea75829c8c3263841f7df0ebe21c3..27ec22767e42220042db9eeb82cc8f771e9b0bf3 100644 (file)
@@ -62,7 +62,7 @@ static noinline void __init copy_user_test(void)
        kfree(kmem);
 }
 
-static int __init test_kasan_module_init(void)
+static int __init kasan_test_module_init(void)
 {
        /*
         * Temporarily enable multi-shot mode. Otherwise, KASAN would only
@@ -77,5 +77,5 @@ static int __init test_kasan_module_init(void)
        return -EAGAIN;
 }
 
-module_init(test_kasan_module_init);
+module_init(kasan_test_module_init);
 MODULE_LICENSE("GPL");
index 7afa4feb03e18f32e37f67c0131d2678249776d3..b48c768acc84d29700fe8196e77850920f72e641 100644 (file)
@@ -267,7 +267,7 @@ static void print_track(struct kasan_track *track, const char *prefix)
        u64 ts_nsec = track->timestamp;
        unsigned long rem_usec;
 
-       ts_nsec <<= 3;
+       ts_nsec <<= 9;
        rem_usec = do_div(ts_nsec, NSEC_PER_SEC) / 1000;
 
        pr_err("%s by task %u on cpu %d at %lu.%06lus:\n",
index 9ef84f31833f3d6cc30e36b5ccf622f03106741e..d6210ca48ddab94342fa61fd86290b4e9c96ebed 100644 (file)
@@ -199,19 +199,12 @@ static bool shadow_mapped(unsigned long addr)
        pud = pud_offset(p4d, addr);
        if (pud_none(*pud))
                return false;
-
-       /*
-        * We can't use pud_large() or pud_huge(), the first one is
-        * arch-specific, the last one depends on HUGETLB_PAGE.  So let's abuse
-        * pud_bad(), if pud is bad then it's bad because it's huge.
-        */
-       if (pud_bad(*pud))
+       if (pud_leaf(*pud))
                return true;
        pmd = pmd_offset(pud, addr);
        if (pmd_none(*pmd))
                return false;
-
-       if (pmd_bad(*pmd))
+       if (pmd_leaf(*pmd))
                return true;
        pte = pte_offset_kernel(pmd, addr);
        return !pte_none(ptep_get(pte));
index 2b219acb528e25fd7f16b9f58d85a81048355bd4..38830174608fba663ed416ad4e2661242e484c58 100644 (file)
@@ -410,6 +410,12 @@ static inline int hpage_collapse_test_exit(struct mm_struct *mm)
        return atomic_read(&mm->mm_users) == 0;
 }
 
+static inline int hpage_collapse_test_exit_or_disable(struct mm_struct *mm)
+{
+       return hpage_collapse_test_exit(mm) ||
+              test_bit(MMF_DISABLE_THP, &mm->flags);
+}
+
 void __khugepaged_enter(struct mm_struct *mm)
 {
        struct khugepaged_mm_slot *mm_slot;
@@ -683,9 +689,7 @@ static void __collapse_huge_page_copy_succeeded(pte_t *pte,
                                                spinlock_t *ptl,
                                                struct list_head *compound_pagelist)
 {
-       struct folio *src_folio;
-       struct page *src_page;
-       struct page *tmp;
+       struct folio *src, *tmp;
        pte_t *_pte;
        pte_t pteval;
 
@@ -704,10 +708,11 @@ static void __collapse_huge_page_copy_succeeded(pte_t *pte,
                                ksm_might_unmap_zero_page(vma->vm_mm, pteval);
                        }
                } else {
-                       src_page = pte_page(pteval);
-                       src_folio = page_folio(src_page);
-                       if (!folio_test_large(src_folio))
-                               release_pte_folio(src_folio);
+                       struct page *src_page = pte_page(pteval);
+
+                       src = page_folio(src_page);
+                       if (!folio_test_large(src))
+                               release_pte_folio(src);
                        /*
                         * ptl mostly unnecessary, but preempt has to
                         * be disabled to update the per-cpu stats
@@ -715,20 +720,19 @@ static void __collapse_huge_page_copy_succeeded(pte_t *pte,
                         */
                        spin_lock(ptl);
                        ptep_clear(vma->vm_mm, address, _pte);
-                       folio_remove_rmap_pte(src_folio, src_page, vma);
+                       folio_remove_rmap_pte(src, src_page, vma);
                        spin_unlock(ptl);
                        free_page_and_swap_cache(src_page);
                }
        }
 
-       list_for_each_entry_safe(src_page, tmp, compound_pagelist, lru) {
-               list_del(&src_page->lru);
-               mod_node_page_state(page_pgdat(src_page),
-                                   NR_ISOLATED_ANON + page_is_file_lru(src_page),
-                                   -compound_nr(src_page));
-               unlock_page(src_page);
-               free_swap_cache(src_page);
-               putback_lru_page(src_page);
+       list_for_each_entry_safe(src, tmp, compound_pagelist, lru) {
+               list_del(&src->lru);
+               node_stat_sub_folio(src, NR_ISOLATED_ANON +
+                               folio_is_file_lru(src));
+               folio_unlock(src);
+               free_swap_cache(src);
+               folio_putback_lru(src);
        }
 }
 
@@ -914,7 +918,7 @@ static int hugepage_vma_revalidate(struct mm_struct *mm, unsigned long address,
 {
        struct vm_area_struct *vma;
 
-       if (unlikely(hpage_collapse_test_exit(mm)))
+       if (unlikely(hpage_collapse_test_exit_or_disable(mm)))
                return SCAN_ANY_PROCESS;
 
        *vmap = vma = find_vma(mm, address);
@@ -1634,7 +1638,7 @@ int collapse_pte_mapped_thp(struct mm_struct *mm, unsigned long addr,
        /* step 3: set proper refcount and mm_counters. */
        if (nr_ptes) {
                folio_ref_sub(folio, nr_ptes);
-               add_mm_counter(mm, mm_counter_file(&folio->page), -nr_ptes);
+               add_mm_counter(mm, mm_counter_file(folio), -nr_ptes);
        }
 
        /* step 4: remove empty page table */
@@ -1665,7 +1669,7 @@ abort:
        if (nr_ptes) {
                flush_tlb_mm(mm);
                folio_ref_sub(folio, nr_ptes);
-               add_mm_counter(mm, mm_counter_file(&folio->page), -nr_ptes);
+               add_mm_counter(mm, mm_counter_file(folio), -nr_ptes);
        }
        if (start_pte)
                pte_unmap_unlock(start_pte, ptl);
@@ -2360,7 +2364,7 @@ static unsigned int khugepaged_scan_mm_slot(unsigned int pages, int *result,
                goto breakouterloop_mmap_lock;
 
        progress++;
-       if (unlikely(hpage_collapse_test_exit(mm)))
+       if (unlikely(hpage_collapse_test_exit_or_disable(mm)))
                goto breakouterloop;
 
        vma_iter_init(&vmi, mm, khugepaged_scan.address);
@@ -2368,7 +2372,7 @@ static unsigned int khugepaged_scan_mm_slot(unsigned int pages, int *result,
                unsigned long hstart, hend;
 
                cond_resched();
-               if (unlikely(hpage_collapse_test_exit(mm))) {
+               if (unlikely(hpage_collapse_test_exit_or_disable(mm))) {
                        progress++;
                        break;
                }
@@ -2390,7 +2394,7 @@ skip:
                        bool mmap_locked = true;
 
                        cond_resched();
-                       if (unlikely(hpage_collapse_test_exit(mm)))
+                       if (unlikely(hpage_collapse_test_exit_or_disable(mm)))
                                goto breakouterloop;
 
                        VM_BUG_ON(khugepaged_scan.address < hstart ||
@@ -2408,7 +2412,7 @@ skip:
                                fput(file);
                                if (*result == SCAN_PTE_MAPPED_HUGEPAGE) {
                                        mmap_read_lock(mm);
-                                       if (hpage_collapse_test_exit(mm))
+                                       if (hpage_collapse_test_exit_or_disable(mm))
                                                goto breakouterloop;
                                        *result = collapse_pte_mapped_thp(mm,
                                                khugepaged_scan.address, false);
index 5d6e2dee5692a32f4bff22a9b16d955eb7955dc4..0b09daa188ef6cd6ebcbe9ea587a59a8c1aa6aff 100644 (file)
@@ -359,6 +359,12 @@ void kmsan_handle_dma_sg(struct scatterlist *sg, int nents,
 }
 
 /* Functions from kmsan-checks.h follow. */
+
+/*
+ * To create an origin, kmsan_poison_memory() unwinds the stacks and stores it
+ * into the stack depot. This may cause deadlocks if done from within KMSAN
+ * runtime, therefore we bail out if kmsan_in_runtime().
+ */
 void kmsan_poison_memory(const void *address, size_t size, gfp_t flags)
 {
        if (!kmsan_enabled || kmsan_in_runtime())
@@ -371,47 +377,31 @@ void kmsan_poison_memory(const void *address, size_t size, gfp_t flags)
 }
 EXPORT_SYMBOL(kmsan_poison_memory);
 
+/*
+ * Unlike kmsan_poison_memory(), this function can be used from within KMSAN
+ * runtime, because it does not trigger allocations or call instrumented code.
+ */
 void kmsan_unpoison_memory(const void *address, size_t size)
 {
        unsigned long ua_flags;
 
-       if (!kmsan_enabled || kmsan_in_runtime())
+       if (!kmsan_enabled)
                return;
 
        ua_flags = user_access_save();
-       kmsan_enter_runtime();
        /* The users may want to poison/unpoison random memory. */
        kmsan_internal_unpoison_memory((void *)address, size,
                                       KMSAN_POISON_NOCHECK);
-       kmsan_leave_runtime();
        user_access_restore(ua_flags);
 }
 EXPORT_SYMBOL(kmsan_unpoison_memory);
 
 /*
- * Version of kmsan_unpoison_memory() that can be called from within the KMSAN
- * runtime.
- *
- * Non-instrumented IRQ entry functions receive struct pt_regs from assembly
- * code. Those regs need to be unpoisoned, otherwise using them will result in
- * false positives.
- * Using kmsan_unpoison_memory() is not an option in entry code, because the
- * return value of in_task() is inconsistent - as a result, certain calls to
- * kmsan_unpoison_memory() are ignored. kmsan_unpoison_entry_regs() ensures that
- * the registers are unpoisoned even if kmsan_in_runtime() is true in the early
- * entry code.
+ * Version of kmsan_unpoison_memory() called from IRQ entry functions.
  */
 void kmsan_unpoison_entry_regs(const struct pt_regs *regs)
 {
-       unsigned long ua_flags;
-
-       if (!kmsan_enabled)
-               return;
-
-       ua_flags = user_access_save();
-       kmsan_internal_unpoison_memory((void *)regs, sizeof(*regs),
-                                      KMSAN_POISON_NOCHECK);
-       user_access_restore(ua_flags);
+       kmsan_unpoison_memory((void *)regs, sizeof(*regs));
 }
 
 void kmsan_check_memory(const void *addr, size_t size)
index 35b0147542a9ded5f906ce1066c2ea702349d3ae..3fd64736bc4589b87de7d37663139fcc692a5281 100644 (file)
@@ -162,20 +162,6 @@ void list_lru_isolate_move(struct list_lru_one *list, struct list_head *item,
 }
 EXPORT_SYMBOL_GPL(list_lru_isolate_move);
 
-void list_lru_putback(struct list_lru *lru, struct list_head *item, int nid,
-                     struct mem_cgroup *memcg)
-{
-       struct list_lru_one *list =
-               list_lru_from_memcg_idx(lru, nid, memcg_kmem_id(memcg));
-
-       if (list_empty(item)) {
-               list_add_tail(item, &list->list);
-               if (!list->nr_items++)
-                       set_shrinker_bit(memcg, nid, lru_shrinker_id(lru));
-       }
-}
-EXPORT_SYMBOL_GPL(list_lru_putback);
-
 unsigned long list_lru_count_one(struct list_lru *lru,
                                 int nid, struct mem_cgroup *memcg)
 {
@@ -257,6 +243,9 @@ restart:
                         */
                        assert_spin_locked(&nlru->lock);
                        goto restart;
+               case LRU_STOP:
+                       assert_spin_locked(&nlru->lock);
+                       goto out;
                default:
                        BUG();
                }
@@ -567,6 +556,9 @@ int __list_lru_init(struct list_lru *lru, bool memcg_aware,
                lru->shrinker_id = shrinker->id;
        else
                lru->shrinker_id = -1;
+
+       if (mem_cgroup_kmem_disabled())
+               memcg_aware = false;
 #endif
 
        lru->node = kcalloc(nr_node_ids, sizeof(*lru->node), GFP_KERNEL);
index cfa5e7288261189cb8242e5a0367fe6ffeebca12..44a498c94158c882c624eac2e29a5f07d854e322 100644 (file)
@@ -386,7 +386,7 @@ static int madvise_cold_or_pageout_pte_range(pmd_t *pmd,
                        return 0;
                }
 
-               if (pmd_young(orig_pmd)) {
+               if (!pageout && pmd_young(orig_pmd)) {
                        pmdp_invalidate(vma, addr, pmd);
                        orig_pmd = pmd_mkold(orig_pmd);
 
@@ -410,7 +410,7 @@ static int madvise_cold_or_pageout_pte_range(pmd_t *pmd,
 huge_unlock:
                spin_unlock(ptl);
                if (pageout)
-                       reclaim_pages(&folio_list);
+                       reclaim_pages(&folio_list, true);
                return 0;
        }
 
@@ -453,7 +453,7 @@ restart:
                if (folio_test_large(folio)) {
                        int err;
 
-                       if (folio_estimated_sharers(folio) != 1)
+                       if (folio_estimated_sharers(folio) > 1)
                                break;
                        if (pageout_anon_only_filter && !folio_test_anon(folio))
                                break;
@@ -490,7 +490,7 @@ restart:
 
                VM_BUG_ON_FOLIO(folio_test_large(folio), folio);
 
-               if (pte_young(ptent)) {
+               if (!pageout && pte_young(ptent)) {
                        ptent = ptep_get_and_clear_full(mm, addr, pte,
                                                        tlb->fullmm);
                        ptent = pte_mkold(ptent);
@@ -524,7 +524,7 @@ restart:
                pte_unmap_unlock(start_pte, ptl);
        }
        if (pageout)
-               reclaim_pages(&folio_list);
+               reclaim_pages(&folio_list, true);
        cond_resched();
 
        return 0;
index 61932c9215e7734e4dfc7dc6e427c3692d1c3c6f..fabce2b50c69551e46b0197cbad5a96cb79dde19 100644 (file)
@@ -33,6 +33,7 @@
 #include <linux/shmem_fs.h>
 #include <linux/hugetlb.h>
 #include <linux/pagemap.h>
+#include <linux/pagevec.h>
 #include <linux/vm_event_item.h>
 #include <linux/smp.h>
 #include <linux/page-flags.h>
@@ -3606,22 +3607,24 @@ void obj_cgroup_uncharge(struct obj_cgroup *objcg, size_t size)
 /*
  * Because page_memcg(head) is not set on tails, set it now.
  */
-void split_page_memcg(struct page *head, unsigned int nr)
+void split_page_memcg(struct page *head, int old_order, int new_order)
 {
        struct folio *folio = page_folio(head);
        struct mem_cgroup *memcg = folio_memcg(folio);
        int i;
+       unsigned int old_nr = 1 << old_order;
+       unsigned int new_nr = 1 << new_order;
 
        if (mem_cgroup_disabled() || !memcg)
                return;
 
-       for (i = 1; i < nr; i++)
+       for (i = new_nr; i < old_nr; i += new_nr)
                folio_page(folio, i)->memcg_data = folio->memcg_data;
 
        if (folio_memcg_kmem(folio))
-               obj_cgroup_get_many(__folio_objcg(folio), nr - 1);
+               obj_cgroup_get_many(__folio_objcg(folio), old_nr / new_nr - 1);
        else
-               css_get_many(&memcg->css, nr - 1);
+               css_get_many(&memcg->css, old_nr / new_nr - 1);
 }
 
 #ifdef CONFIG_SWAP
@@ -4800,7 +4803,7 @@ void mem_cgroup_wb_stats(struct bdi_writeback *wb, unsigned long *pfilepages,
        struct mem_cgroup *memcg = mem_cgroup_from_css(wb->memcg_css);
        struct mem_cgroup *parent;
 
-       mem_cgroup_flush_stats(memcg);
+       mem_cgroup_flush_stats_ratelimited(memcg);
 
        *pdirty = memcg_page_state(memcg, NR_FILE_DIRTY);
        *pwriteback = memcg_page_state(memcg, NR_WRITEBACK);
@@ -5621,7 +5624,7 @@ static int mem_cgroup_css_online(struct cgroup_subsys_state *css)
        if (alloc_shrinker_info(memcg))
                goto offline_kmem;
 
-       if (unlikely(mem_cgroup_is_root(memcg)))
+       if (unlikely(mem_cgroup_is_root(memcg)) && !mem_cgroup_disabled())
                queue_delayed_work(system_unbound_wq, &stats_flush_dwork,
                                   FLUSH_TIME);
        lru_gen_online_memcg(memcg);
@@ -5873,7 +5876,7 @@ static int mem_cgroup_do_precharge(unsigned long count)
 }
 
 union mc_target {
-       struct page     *page;
+       struct folio    *folio;
        swp_entry_t     ent;
 };
 
@@ -5965,23 +5968,22 @@ static struct page *mc_handle_file_pte(struct vm_area_struct *vma,
 }
 
 /**
- * mem_cgroup_move_account - move account of the page
- * @page: the page
+ * mem_cgroup_move_account - move account of the folio
+ * @folio: The folio.
  * @compound: charge the page as compound or small page
- * @from: mem_cgroup which the page is moved from.
- * @to:        mem_cgroup which the page is moved to. @from != @to.
+ * @from: mem_cgroup which the folio is moved from.
+ * @to:        mem_cgroup which the folio is moved to. @from != @to.
  *
- * The page must be locked and not on the LRU.
+ * The folio must be locked and not on the LRU.
  *
  * This function doesn't do "charge" to new cgroup and doesn't do "uncharge"
  * from old cgroup.
  */
-static int mem_cgroup_move_account(struct page *page,
+static int mem_cgroup_move_account(struct folio *folio,
                                   bool compound,
                                   struct mem_cgroup *from,
                                   struct mem_cgroup *to)
 {
-       struct folio *folio = page_folio(page);
        struct lruvec *from_vec, *to_vec;
        struct pglist_data *pgdat;
        unsigned int nr_pages = compound ? folio_nr_pages(folio) : 1;
@@ -6096,7 +6098,7 @@ out:
  * Return:
  * * MC_TARGET_NONE - If the pte is not a target for move charge.
  * * MC_TARGET_PAGE - If the page corresponding to this pte is a target for
- *   move charge. If @target is not NULL, the page is stored in target->page
+ *   move charge. If @target is not NULL, the folio is stored in target->folio
  *   with extra refcnt taken (Caller should release it).
  * * MC_TARGET_SWAP - If the swap entry corresponding to this pte is a
  *   target for charge migration.  If @target is not NULL, the entry is
@@ -6110,6 +6112,7 @@ static enum mc_target_type get_mctgt_type(struct vm_area_struct *vma,
                unsigned long addr, pte_t ptent, union mc_target *target)
 {
        struct page *page = NULL;
+       struct folio *folio;
        enum mc_target_type ret = MC_TARGET_NONE;
        swp_entry_t ent = { .val = 0 };
 
@@ -6124,9 +6127,11 @@ static enum mc_target_type get_mctgt_type(struct vm_area_struct *vma,
        else if (is_swap_pte(ptent))
                page = mc_handle_swap_pte(vma, ptent, &ent);
 
+       if (page)
+               folio = page_folio(page);
        if (target && page) {
-               if (!trylock_page(page)) {
-                       put_page(page);
+               if (!folio_trylock(folio)) {
+                       folio_put(folio);
                        return ret;
                }
                /*
@@ -6141,8 +6146,8 @@ static enum mc_target_type get_mctgt_type(struct vm_area_struct *vma,
                 * Alas, skip moving the page in this case.
                 */
                if (!pte_present(ptent) && page_mapped(page)) {
-                       unlock_page(page);
-                       put_page(page);
+                       folio_unlock(folio);
+                       folio_put(folio);
                        return ret;
                }
        }
@@ -6155,18 +6160,18 @@ static enum mc_target_type get_mctgt_type(struct vm_area_struct *vma,
                 * mem_cgroup_move_account() checks the page is valid or
                 * not under LRU exclusion.
                 */
-               if (page_memcg(page) == mc.from) {
+               if (folio_memcg(folio) == mc.from) {
                        ret = MC_TARGET_PAGE;
-                       if (is_device_private_page(page) ||
-                           is_device_coherent_page(page))
+                       if (folio_is_device_private(folio) ||
+                           folio_is_device_coherent(folio))
                                ret = MC_TARGET_DEVICE;
                        if (target)
-                               target->page = page;
+                               target->folio = folio;
                }
                if (!ret || !target) {
                        if (target)
-                               unlock_page(page);
-                       put_page(page);
+                               folio_unlock(folio);
+                       folio_put(folio);
                }
        }
        /*
@@ -6192,6 +6197,7 @@ static enum mc_target_type get_mctgt_type_thp(struct vm_area_struct *vma,
                unsigned long addr, pmd_t pmd, union mc_target *target)
 {
        struct page *page = NULL;
+       struct folio *folio;
        enum mc_target_type ret = MC_TARGET_NONE;
 
        if (unlikely(is_swap_pmd(pmd))) {
@@ -6201,17 +6207,18 @@ static enum mc_target_type get_mctgt_type_thp(struct vm_area_struct *vma,
        }
        page = pmd_page(pmd);
        VM_BUG_ON_PAGE(!page || !PageHead(page), page);
+       folio = page_folio(page);
        if (!(mc.flags & MOVE_ANON))
                return ret;
-       if (page_memcg(page) == mc.from) {
+       if (folio_memcg(folio) == mc.from) {
                ret = MC_TARGET_PAGE;
                if (target) {
-                       get_page(page);
-                       if (!trylock_page(page)) {
-                               put_page(page);
+                       folio_get(folio);
+                       if (!folio_trylock(folio)) {
+                               folio_put(folio);
                                return MC_TARGET_NONE;
                        }
-                       target->page = page;
+                       target->folio = folio;
                }
        }
        return ret;
@@ -6431,7 +6438,7 @@ static int mem_cgroup_move_charge_pte_range(pmd_t *pmd,
        spinlock_t *ptl;
        enum mc_target_type target_type;
        union mc_target target;
-       struct page *page;
+       struct folio *folio;
 
        ptl = pmd_trans_huge_lock(pmd, vma);
        if (ptl) {
@@ -6441,26 +6448,26 @@ static int mem_cgroup_move_charge_pte_range(pmd_t *pmd,
                }
                target_type = get_mctgt_type_thp(vma, addr, *pmd, &target);
                if (target_type == MC_TARGET_PAGE) {
-                       page = target.page;
-                       if (isolate_lru_page(page)) {
-                               if (!mem_cgroup_move_account(page, true,
+                       folio = target.folio;
+                       if (folio_isolate_lru(folio)) {
+                               if (!mem_cgroup_move_account(folio, true,
                                                             mc.from, mc.to)) {
                                        mc.precharge -= HPAGE_PMD_NR;
                                        mc.moved_charge += HPAGE_PMD_NR;
                                }
-                               putback_lru_page(page);
+                               folio_putback_lru(folio);
                        }
-                       unlock_page(page);
-                       put_page(page);
+                       folio_unlock(folio);
+                       folio_put(folio);
                } else if (target_type == MC_TARGET_DEVICE) {
-                       page = target.page;
-                       if (!mem_cgroup_move_account(page, true,
+                       folio = target.folio;
+                       if (!mem_cgroup_move_account(folio, true,
                                                     mc.from, mc.to)) {
                                mc.precharge -= HPAGE_PMD_NR;
                                mc.moved_charge += HPAGE_PMD_NR;
                        }
-                       unlock_page(page);
-                       put_page(page);
+                       folio_unlock(folio);
+                       folio_put(folio);
                }
                spin_unlock(ptl);
                return 0;
@@ -6483,28 +6490,28 @@ retry:
                        device = true;
                        fallthrough;
                case MC_TARGET_PAGE:
-                       page = target.page;
+                       folio = target.folio;
                        /*
                         * We can have a part of the split pmd here. Moving it
                         * can be done but it would be too convoluted so simply
                         * ignore such a partial THP and keep it in original
                         * memcg. There should be somebody mapping the head.
                         */
-                       if (PageTransCompound(page))
+                       if (folio_test_large(folio))
                                goto put;
-                       if (!device && !isolate_lru_page(page))
+                       if (!device && !folio_isolate_lru(folio))
                                goto put;
-                       if (!mem_cgroup_move_account(page, false,
+                       if (!mem_cgroup_move_account(folio, false,
                                                mc.from, mc.to)) {
                                mc.precharge--;
                                /* we uncharge from mc.from later. */
                                mc.moved_charge++;
                        }
                        if (!device)
-                               putback_lru_page(page);
+                               folio_putback_lru(folio);
 put:                   /* get_mctgt_type() gets & locks the page */
-                       unlock_page(page);
-                       put_page(page);
+                       folio_unlock(folio);
+                       folio_put(folio);
                        break;
                case MC_TARGET_SWAP:
                        ent = target.ent;
@@ -6977,6 +6984,8 @@ static ssize_t memory_reclaim(struct kernfs_open_file *of, char *buf,
 
        reclaim_options = MEMCG_RECLAIM_MAY_SWAP | MEMCG_RECLAIM_PROACTIVE;
        while (nr_reclaimed < nr_to_reclaim) {
+               /* Will converge on zero, but reclaim enforces a minimum */
+               unsigned long batch_size = (nr_to_reclaim - nr_reclaimed) / 4;
                unsigned long reclaimed;
 
                if (signal_pending(current))
@@ -6991,8 +7000,7 @@ static ssize_t memory_reclaim(struct kernfs_open_file *of, char *buf,
                        lru_add_drain_all();
 
                reclaimed = try_to_free_mem_cgroup_pages(memcg,
-                                       min(nr_to_reclaim - nr_reclaimed, SWAP_CLUSTER_MAX),
-                                       GFP_KERNEL, reclaim_options);
+                                       batch_size, GFP_KERNEL, reclaim_options);
 
                if (!reclaimed && !nr_retries--)
                        return -EAGAIN;
@@ -7505,21 +7513,14 @@ void __mem_cgroup_uncharge(struct folio *folio)
        uncharge_batch(&ug);
 }
 
-/**
- * __mem_cgroup_uncharge_list - uncharge a list of page
- * @page_list: list of pages to uncharge
- *
- * Uncharge a list of pages previously charged with
- * __mem_cgroup_charge().
- */
-void __mem_cgroup_uncharge_list(struct list_head *page_list)
+void __mem_cgroup_uncharge_folios(struct folio_batch *folios)
 {
        struct uncharge_gather ug;
-       struct folio *folio;
+       unsigned int i;
 
        uncharge_gather_clear(&ug);
-       list_for_each_entry(folio, page_list, lru)
-               uncharge_folio(folio, &ug);
+       for (i = 0; i < folios->nr; i++)
+               uncharge_folio(folios->folios[i], &ug);
        if (ug.memcg)
                uncharge_batch(&ug);
 }
index d3a1ba4208c90139f5c172751b7ca7320b2f54a9..7d8d3ab3fa37867cb74a95dcccb822c537cf636d 100644 (file)
 #define MEMFD_TAG_PINNED        PAGECACHE_TAG_TOWRITE
 #define LAST_SCAN               4       /* about 150ms max */
 
+static bool memfd_folio_has_extra_refs(struct folio *folio)
+{
+       return folio_ref_count(folio) - folio_mapcount(folio) !=
+              folio_nr_pages(folio);
+}
+
 static void memfd_tag_pins(struct xa_state *xas)
 {
-       struct page *page;
+       struct folio *folio;
        int latency = 0;
-       int cache_count;
 
        lru_add_drain();
 
        xas_lock_irq(xas);
-       xas_for_each(xas, page, ULONG_MAX) {
-               cache_count = 1;
-               if (!xa_is_value(page) &&
-                   PageTransHuge(page) && !PageHuge(page))
-                       cache_count = HPAGE_PMD_NR;
-
-               if (!xa_is_value(page) &&
-                   page_count(page) - total_mapcount(page) != cache_count)
+       xas_for_each(xas, folio, ULONG_MAX) {
+               if (!xa_is_value(folio) && memfd_folio_has_extra_refs(folio))
                        xas_set_mark(xas, MEMFD_TAG_PINNED);
-               if (cache_count != 1)
-                       xas_set(xas, page->index + cache_count);
 
-               latency += cache_count;
-               if (latency < XA_CHECK_SCHED)
+               if (++latency < XA_CHECK_SCHED)
                        continue;
                latency = 0;
 
@@ -66,16 +62,16 @@ static void memfd_tag_pins(struct xa_state *xas)
 /*
  * Setting SEAL_WRITE requires us to verify there's no pending writer. However,
  * via get_user_pages(), drivers might have some pending I/O without any active
- * user-space mappings (eg., direct-IO, AIO). Therefore, we look at all pages
+ * user-space mappings (eg., direct-IO, AIO). Therefore, we look at all folios
  * and see whether it has an elevated ref-count. If so, we tag them and wait for
  * them to be dropped.
  * The caller must guarantee that no new user will acquire writable references
- * to those pages to avoid races.
+ * to those folios to avoid races.
  */
 static int memfd_wait_for_pins(struct address_space *mapping)
 {
        XA_STATE(xas, &mapping->i_pages, 0);
-       struct page *page;
+       struct folio *folio;
        int error, scan;
 
        memfd_tag_pins(&xas);
@@ -83,7 +79,6 @@ static int memfd_wait_for_pins(struct address_space *mapping)
        error = 0;
        for (scan = 0; scan <= LAST_SCAN; scan++) {
                int latency = 0;
-               int cache_count;
 
                if (!xas_marked(&xas, MEMFD_TAG_PINNED))
                        break;
@@ -95,20 +90,15 @@ static int memfd_wait_for_pins(struct address_space *mapping)
 
                xas_set(&xas, 0);
                xas_lock_irq(&xas);
-               xas_for_each_marked(&xas, page, ULONG_MAX, MEMFD_TAG_PINNED) {
+               xas_for_each_marked(&xas, folio, ULONG_MAX, MEMFD_TAG_PINNED) {
                        bool clear = true;
 
-                       cache_count = 1;
-                       if (!xa_is_value(page) &&
-                           PageTransHuge(page) && !PageHuge(page))
-                               cache_count = HPAGE_PMD_NR;
-
-                       if (!xa_is_value(page) && cache_count !=
-                           page_count(page) - total_mapcount(page)) {
+                       if (!xa_is_value(folio) &&
+                           memfd_folio_has_extra_refs(folio)) {
                                /*
                                 * On the last scan, we clean up all those tags
                                 * we inserted; but make a note that we still
-                                * found pages pinned.
+                                * found folios pinned.
                                 */
                                if (scan == LAST_SCAN)
                                        error = -EBUSY;
@@ -118,8 +108,7 @@ static int memfd_wait_for_pins(struct address_space *mapping)
                        if (clear)
                                xas_clear_mark(&xas, MEMFD_TAG_PINNED);
 
-                       latency += cache_count;
-                       if (latency < XA_CHECK_SCHED)
+                       if (++latency < XA_CHECK_SCHED)
                                continue;
                        latency = 0;
 
index 5462d9e3c84c7d41af4e29f00c16cd00efb0ace3..0537664620e5f5479e62c461b45e3a3b3db50b9b 100644 (file)
@@ -39,7 +39,7 @@ static LIST_HEAD(memory_tiers);
 static struct node_memory_type_map node_memory_types[MAX_NUMNODES];
 struct memory_dev_type *default_dram_type;
 
-static struct bus_type memory_tier_subsys = {
+static const struct bus_type memory_tier_subsys = {
        .name = "memory_tiering",
        .dev_name = "memory_tier",
 };
@@ -359,6 +359,26 @@ static void disable_all_demotion_targets(void)
        synchronize_rcu();
 }
 
+static void dump_demotion_targets(void)
+{
+       int node;
+
+       for_each_node_state(node, N_MEMORY) {
+               struct memory_tier *memtier = __node_get_memory_tier(node);
+               nodemask_t preferred = node_demotion[node].preferred;
+
+               if (!memtier)
+                       continue;
+
+               if (nodes_empty(preferred))
+                       pr_info("Demotion targets for Node %d: null\n", node);
+               else
+                       pr_info("Demotion targets for Node %d: preferred: %*pbl, fallback: %*pbl\n",
+                               node, nodemask_pr_args(&preferred),
+                               nodemask_pr_args(&memtier->lower_tier_mask));
+       }
+}
+
 /*
  * Find an automatic demotion target for all memory
  * nodes. Failing here is OK.  It might just indicate
@@ -443,7 +463,7 @@ static void establish_demotion_targets(void)
         * Now build the lower_tier mask for each node collecting node mask from
         * all memory tier below it. This allows us to fallback demotion page
         * allocation to a set of nodes that is closer the above selected
-        * perferred node.
+        * preferred node.
         */
        lower_tier = node_states[N_MEMORY];
        list_for_each_entry(memtier, &memory_tiers, list) {
@@ -456,6 +476,8 @@ static void establish_demotion_targets(void)
                nodes_andnot(lower_tier, lower_tier, tier_nodes);
                memtier->lower_tier_mask = lower_tier;
        }
+
+       dump_demotion_targets();
 }
 
 #else
index 0bfc8b007c01a3323a15a17d51c4da46a6207540..f2bc6dd15eb830b9c8a0b6602746e2947a6997e6 100644 (file)
@@ -806,9 +806,9 @@ copy_nonpresent_pte(struct mm_struct *dst_mm, struct mm_struct *src_mm,
                }
                rss[MM_SWAPENTS]++;
        } else if (is_migration_entry(entry)) {
-               page = pfn_swap_entry_to_page(entry);
+               folio = pfn_swap_entry_folio(entry);
 
-               rss[mm_counter(page)]++;
+               rss[mm_counter(folio)]++;
 
                if (!is_readable_migration_entry(entry) &&
                                is_cow_mapping(vm_flags)) {
@@ -840,7 +840,7 @@ copy_nonpresent_pte(struct mm_struct *dst_mm, struct mm_struct *src_mm,
                 * keep things as they are.
                 */
                folio_get(folio);
-               rss[mm_counter(page)]++;
+               rss[mm_counter(folio)]++;
                /* Cannot fail as these pages cannot get pinned. */
                folio_try_dup_anon_rmap_pte(folio, page, src_vma);
 
@@ -930,68 +930,111 @@ copy_present_page(struct vm_area_struct *dst_vma, struct vm_area_struct *src_vma
        return 0;
 }
 
+static __always_inline void __copy_present_ptes(struct vm_area_struct *dst_vma,
+               struct vm_area_struct *src_vma, pte_t *dst_pte, pte_t *src_pte,
+               pte_t pte, unsigned long addr, int nr)
+{
+       struct mm_struct *src_mm = src_vma->vm_mm;
+
+       /* If it's a COW mapping, write protect it both processes. */
+       if (is_cow_mapping(src_vma->vm_flags) && pte_write(pte)) {
+               wrprotect_ptes(src_mm, addr, src_pte, nr);
+               pte = pte_wrprotect(pte);
+       }
+
+       /* If it's a shared mapping, mark it clean in the child. */
+       if (src_vma->vm_flags & VM_SHARED)
+               pte = pte_mkclean(pte);
+       pte = pte_mkold(pte);
+
+       if (!userfaultfd_wp(dst_vma))
+               pte = pte_clear_uffd_wp(pte);
+
+       set_ptes(dst_vma->vm_mm, addr, dst_pte, pte, nr);
+}
+
 /*
- * Copy one pte.  Returns 0 if succeeded, or -EAGAIN if one preallocated page
- * is required to copy this pte.
+ * Copy one present PTE, trying to batch-process subsequent PTEs that map
+ * consecutive pages of the same folio by copying them as well.
+ *
+ * Returns -EAGAIN if one preallocated page is required to copy the next PTE.
+ * Otherwise, returns the number of copied PTEs (at least 1).
  */
 static inline int
-copy_present_pte(struct vm_area_struct *dst_vma, struct vm_area_struct *src_vma,
-                pte_t *dst_pte, pte_t *src_pte, unsigned long addr, int *rss,
-                struct folio **prealloc)
+copy_present_ptes(struct vm_area_struct *dst_vma, struct vm_area_struct *src_vma,
+                pte_t *dst_pte, pte_t *src_pte, pte_t pte, unsigned long addr,
+                int max_nr, int *rss, struct folio **prealloc)
 {
-       struct mm_struct *src_mm = src_vma->vm_mm;
-       unsigned long vm_flags = src_vma->vm_flags;
-       pte_t pte = ptep_get(src_pte);
        struct page *page;
        struct folio *folio;
+       bool any_writable;
+       fpb_t flags = 0;
+       int err, nr;
 
        page = vm_normal_page(src_vma, addr, pte);
-       if (page)
-               folio = page_folio(page);
-       if (page && folio_test_anon(folio)) {
+       if (unlikely(!page))
+               goto copy_pte;
+
+       folio = page_folio(page);
+
+       /*
+        * If we likely have to copy, just don't bother with batching. Make
+        * sure that the common "small folio" case is as fast as possible
+        * by keeping the batching logic separate.
+        */
+       if (unlikely(!*prealloc && folio_test_large(folio) && max_nr != 1)) {
+               if (src_vma->vm_flags & VM_SHARED)
+                       flags |= FPB_IGNORE_DIRTY;
+               if (!vma_soft_dirty_enabled(src_vma))
+                       flags |= FPB_IGNORE_SOFT_DIRTY;
+
+               nr = folio_pte_batch(folio, addr, src_pte, pte, max_nr, flags,
+                                    &any_writable);
+               folio_ref_add(folio, nr);
+               if (folio_test_anon(folio)) {
+                       if (unlikely(folio_try_dup_anon_rmap_ptes(folio, page,
+                                                                 nr, src_vma))) {
+                               folio_ref_sub(folio, nr);
+                               return -EAGAIN;
+                       }
+                       rss[MM_ANONPAGES] += nr;
+                       VM_WARN_ON_FOLIO(PageAnonExclusive(page), folio);
+               } else {
+                       folio_dup_file_rmap_ptes(folio, page, nr);
+                       rss[mm_counter_file(folio)] += nr;
+               }
+               if (any_writable)
+                       pte = pte_mkwrite(pte, src_vma);
+               __copy_present_ptes(dst_vma, src_vma, dst_pte, src_pte, pte,
+                                   addr, nr);
+               return nr;
+       }
+
+       folio_get(folio);
+       if (folio_test_anon(folio)) {
                /*
                 * If this page may have been pinned by the parent process,
                 * copy the page immediately for the child so that we'll always
                 * guarantee the pinned page won't be randomly replaced in the
                 * future.
                 */
-               folio_get(folio);
                if (unlikely(folio_try_dup_anon_rmap_pte(folio, page, src_vma))) {
                        /* Page may be pinned, we have to copy. */
                        folio_put(folio);
-                       return copy_present_page(dst_vma, src_vma, dst_pte, src_pte,
-                                                addr, rss, prealloc, page);
+                       err = copy_present_page(dst_vma, src_vma, dst_pte, src_pte,
+                                               addr, rss, prealloc, page);
+                       return err ? err : 1;
                }
                rss[MM_ANONPAGES]++;
-       } else if (page) {
-               folio_get(folio);
+               VM_WARN_ON_FOLIO(PageAnonExclusive(page), folio);
+       } else {
                folio_dup_file_rmap_pte(folio, page);
-               rss[mm_counter_file(page)]++;
-       }
-
-       /*
-        * If it's a COW mapping, write protect it both
-        * in the parent and the child
-        */
-       if (is_cow_mapping(vm_flags) && pte_write(pte)) {
-               ptep_set_wrprotect(src_mm, addr, src_pte);
-               pte = pte_wrprotect(pte);
+               rss[mm_counter_file(folio)]++;
        }
-       VM_BUG_ON(page && folio_test_anon(folio) && PageAnonExclusive(page));
 
-       /*
-        * If it's a shared mapping, mark it clean in
-        * the child
-        */
-       if (vm_flags & VM_SHARED)
-               pte = pte_mkclean(pte);
-       pte = pte_mkold(pte);
-
-       if (!userfaultfd_wp(dst_vma))
-               pte = pte_clear_uffd_wp(pte);
-
-       set_pte_at(dst_vma->vm_mm, addr, dst_pte, pte);
-       return 0;
+copy_pte:
+       __copy_present_ptes(dst_vma, src_vma, dst_pte, src_pte, pte, addr, 1);
+       return 1;
 }
 
 static inline struct folio *folio_prealloc(struct mm_struct *src_mm,
@@ -1028,10 +1071,11 @@ copy_pte_range(struct vm_area_struct *dst_vma, struct vm_area_struct *src_vma,
        pte_t *src_pte, *dst_pte;
        pte_t ptent;
        spinlock_t *src_ptl, *dst_ptl;
-       int progress, ret = 0;
+       int progress, max_nr, ret = 0;
        int rss[NR_MM_COUNTERS];
        swp_entry_t entry = (swp_entry_t){0};
        struct folio *prealloc = NULL;
+       int nr;
 
 again:
        progress = 0;
@@ -1062,6 +1106,8 @@ again:
        arch_enter_lazy_mmu_mode();
 
        do {
+               nr = 1;
+
                /*
                 * We are holding two locks at this point - either of them
                 * could generate latencies in another task on another CPU.
@@ -1091,6 +1137,8 @@ again:
                                progress += 8;
                                continue;
                        }
+                       ptent = ptep_get(src_pte);
+                       VM_WARN_ON_ONCE(!pte_present(ptent));
 
                        /*
                         * Device exclusive entry restored, continue by copying
@@ -1098,9 +1146,10 @@ again:
                         */
                        WARN_ON_ONCE(ret != -ENOENT);
                }
-               /* copy_present_pte() will clear `*prealloc' if consumed */
-               ret = copy_present_pte(dst_vma, src_vma, dst_pte, src_pte,
-                                      addr, rss, &prealloc);
+               /* copy_present_ptes() will clear `*prealloc' if consumed */
+               max_nr = (end - addr) / PAGE_SIZE;
+               ret = copy_present_ptes(dst_vma, src_vma, dst_pte, src_pte,
+                                       ptent, addr, max_nr, rss, &prealloc);
                /*
                 * If we need a pre-allocated page for this pte, drop the
                 * locks, allocate, and try again.
@@ -1117,8 +1166,10 @@ again:
                        folio_put(prealloc);
                        prealloc = NULL;
                }
-               progress += 8;
-       } while (dst_pte++, src_pte++, addr += PAGE_SIZE, addr != end);
+               nr = ret;
+               progress += 8 * nr;
+       } while (dst_pte += nr, src_pte += nr, addr += PAGE_SIZE * nr,
+                addr != end);
 
        arch_leave_lazy_mmu_mode();
        pte_unmap_unlock(orig_src_pte, src_ptl);
@@ -1139,7 +1190,7 @@ again:
                prealloc = folio_prealloc(src_mm, src_vma, addr, false);
                if (!prealloc)
                        return -ENOMEM;
-       } else if (ret) {
+       } else if (ret < 0) {
                VM_WARN_ON_ONCE(1);
        }
 
@@ -1369,19 +1420,16 @@ static inline bool should_zap_cows(struct zap_details *details)
        return details->even_cows;
 }
 
-/* Decides whether we should zap this page with the page pointer specified */
-static inline bool should_zap_page(struct zap_details *details, struct page *page)
+/* Decides whether we should zap this folio with the folio pointer specified */
+static inline bool should_zap_folio(struct zap_details *details,
+                                   struct folio *folio)
 {
-       /* If we can make a decision without *page.. */
+       /* If we can make a decision without *folio.. */
        if (should_zap_cows(details))
                return true;
 
-       /* E.g. the caller passes NULL for the case of a zero page */
-       if (!page)
-               return true;
-
-       /* Otherwise we should only zap non-anon pages */
-       return !PageAnon(page);
+       /* Otherwise we should only zap non-anon folios */
+       return !folio_test_anon(folio);
 }
 
 static inline bool zap_drop_file_uffd_wp(struct zap_details *details)
@@ -1398,7 +1446,7 @@ static inline bool zap_drop_file_uffd_wp(struct zap_details *details)
  */
 static inline void
 zap_install_uffd_wp_if_needed(struct vm_area_struct *vma,
-                             unsigned long addr, pte_t *pte,
+                             unsigned long addr, pte_t *pte, int nr,
                              struct zap_details *details, pte_t pteval)
 {
        /* Zap on anonymous always means dropping everything */
@@ -1408,7 +1456,111 @@ zap_install_uffd_wp_if_needed(struct vm_area_struct *vma,
        if (zap_drop_file_uffd_wp(details))
                return;
 
-       pte_install_uffd_wp_if_needed(vma, addr, pte, pteval);
+       for (;;) {
+               /* the PFN in the PTE is irrelevant. */
+               pte_install_uffd_wp_if_needed(vma, addr, pte, pteval);
+               if (--nr == 0)
+                       break;
+               pte++;
+               addr += PAGE_SIZE;
+       }
+}
+
+static __always_inline void zap_present_folio_ptes(struct mmu_gather *tlb,
+               struct vm_area_struct *vma, struct folio *folio,
+               struct page *page, pte_t *pte, pte_t ptent, unsigned int nr,
+               unsigned long addr, struct zap_details *details, int *rss,
+               bool *force_flush, bool *force_break)
+{
+       struct mm_struct *mm = tlb->mm;
+       bool delay_rmap = false;
+
+       if (!folio_test_anon(folio)) {
+               ptent = get_and_clear_full_ptes(mm, addr, pte, nr, tlb->fullmm);
+               if (pte_dirty(ptent)) {
+                       folio_mark_dirty(folio);
+                       if (tlb_delay_rmap(tlb)) {
+                               delay_rmap = true;
+                               *force_flush = true;
+                       }
+               }
+               if (pte_young(ptent) && likely(vma_has_recency(vma)))
+                       folio_mark_accessed(folio);
+               rss[mm_counter(folio)] -= nr;
+       } else {
+               /* We don't need up-to-date accessed/dirty bits. */
+               clear_full_ptes(mm, addr, pte, nr, tlb->fullmm);
+               rss[MM_ANONPAGES] -= nr;
+       }
+       /* Checking a single PTE in a batch is sufficient. */
+       arch_check_zapped_pte(vma, ptent);
+       tlb_remove_tlb_entries(tlb, pte, nr, addr);
+       if (unlikely(userfaultfd_pte_wp(vma, ptent)))
+               zap_install_uffd_wp_if_needed(vma, addr, pte, nr, details,
+                                             ptent);
+
+       if (!delay_rmap) {
+               folio_remove_rmap_ptes(folio, page, nr, vma);
+
+               /* Only sanity-check the first page in a batch. */
+               if (unlikely(page_mapcount(page) < 0))
+                       print_bad_pte(vma, addr, ptent, page);
+       }
+       if (unlikely(__tlb_remove_folio_pages(tlb, page, nr, delay_rmap))) {
+               *force_flush = true;
+               *force_break = true;
+       }
+}
+
+/*
+ * Zap or skip at least one present PTE, trying to batch-process subsequent
+ * PTEs that map consecutive pages of the same folio.
+ *
+ * Returns the number of processed (skipped or zapped) PTEs (at least 1).
+ */
+static inline int zap_present_ptes(struct mmu_gather *tlb,
+               struct vm_area_struct *vma, pte_t *pte, pte_t ptent,
+               unsigned int max_nr, unsigned long addr,
+               struct zap_details *details, int *rss, bool *force_flush,
+               bool *force_break)
+{
+       const fpb_t fpb_flags = FPB_IGNORE_DIRTY | FPB_IGNORE_SOFT_DIRTY;
+       struct mm_struct *mm = tlb->mm;
+       struct folio *folio;
+       struct page *page;
+       int nr;
+
+       page = vm_normal_page(vma, addr, ptent);
+       if (!page) {
+               /* We don't need up-to-date accessed/dirty bits. */
+               ptep_get_and_clear_full(mm, addr, pte, tlb->fullmm);
+               arch_check_zapped_pte(vma, ptent);
+               tlb_remove_tlb_entry(tlb, pte, addr);
+               VM_WARN_ON_ONCE(userfaultfd_wp(vma));
+               ksm_might_unmap_zero_page(mm, ptent);
+               return 1;
+       }
+
+       folio = page_folio(page);
+       if (unlikely(!should_zap_folio(details, folio)))
+               return 1;
+
+       /*
+        * Make sure that the common "small folio" case is as fast as possible
+        * by keeping the batching logic separate.
+        */
+       if (unlikely(folio_test_large(folio) && max_nr != 1)) {
+               nr = folio_pte_batch(folio, addr, pte, ptent, max_nr, fpb_flags,
+                                    NULL);
+
+               zap_present_folio_ptes(tlb, vma, folio, page, pte, ptent, nr,
+                                      addr, details, rss, force_flush,
+                                      force_break);
+               return nr;
+       }
+       zap_present_folio_ptes(tlb, vma, folio, page, pte, ptent, 1, addr,
+                              details, rss, force_flush, force_break);
+       return 1;
 }
 
 static unsigned long zap_pte_range(struct mmu_gather *tlb,
@@ -1416,13 +1568,14 @@ static unsigned long zap_pte_range(struct mmu_gather *tlb,
                                unsigned long addr, unsigned long end,
                                struct zap_details *details)
 {
+       bool force_flush = false, force_break = false;
        struct mm_struct *mm = tlb->mm;
-       int force_flush = 0;
        int rss[NR_MM_COUNTERS];
        spinlock_t *ptl;
        pte_t *start_pte;
        pte_t *pte;
        swp_entry_t entry;
+       int nr;
 
        tlb_change_page_size(tlb, PAGE_SIZE);
        init_rss_vec(rss);
@@ -1436,7 +1589,9 @@ static unsigned long zap_pte_range(struct mmu_gather *tlb,
                pte_t ptent = ptep_get(pte);
                struct folio *folio;
                struct page *page;
+               int max_nr;
 
+               nr = 1;
                if (pte_none(ptent))
                        continue;
 
@@ -1444,44 +1599,12 @@ static unsigned long zap_pte_range(struct mmu_gather *tlb,
                        break;
 
                if (pte_present(ptent)) {
-                       unsigned int delay_rmap;
-
-                       page = vm_normal_page(vma, addr, ptent);
-                       if (unlikely(!should_zap_page(details, page)))
-                               continue;
-                       ptent = ptep_get_and_clear_full(mm, addr, pte,
-                                                       tlb->fullmm);
-                       arch_check_zapped_pte(vma, ptent);
-                       tlb_remove_tlb_entry(tlb, pte, addr);
-                       zap_install_uffd_wp_if_needed(vma, addr, pte, details,
-                                                     ptent);
-                       if (unlikely(!page)) {
-                               ksm_might_unmap_zero_page(mm, ptent);
-                               continue;
-                       }
-
-                       folio = page_folio(page);
-                       delay_rmap = 0;
-                       if (!folio_test_anon(folio)) {
-                               if (pte_dirty(ptent)) {
-                                       folio_mark_dirty(folio);
-                                       if (tlb_delay_rmap(tlb)) {
-                                               delay_rmap = 1;
-                                               force_flush = 1;
-                                       }
-                               }
-                               if (pte_young(ptent) && likely(vma_has_recency(vma)))
-                                       folio_mark_accessed(folio);
-                       }
-                       rss[mm_counter(page)]--;
-                       if (!delay_rmap) {
-                               folio_remove_rmap_pte(folio, page, vma);
-                               if (unlikely(page_mapcount(page) < 0))
-                                       print_bad_pte(vma, addr, ptent, page);
-                       }
-                       if (unlikely(__tlb_remove_page(tlb, page, delay_rmap))) {
-                               force_flush = 1;
-                               addr += PAGE_SIZE;
+                       max_nr = (end - addr) / PAGE_SIZE;
+                       nr = zap_present_ptes(tlb, vma, pte, ptent, max_nr,
+                                             addr, details, rss, &force_flush,
+                                             &force_break);
+                       if (unlikely(force_break)) {
+                               addr += nr * PAGE_SIZE;
                                break;
                        }
                        continue;
@@ -1492,7 +1615,7 @@ static unsigned long zap_pte_range(struct mmu_gather *tlb,
                    is_device_exclusive_entry(entry)) {
                        page = pfn_swap_entry_to_page(entry);
                        folio = page_folio(page);
-                       if (unlikely(!should_zap_page(details, page)))
+                       if (unlikely(!should_zap_folio(details, folio)))
                                continue;
                        /*
                         * Both device private/exclusive mappings should only
@@ -1501,7 +1624,7 @@ static unsigned long zap_pte_range(struct mmu_gather *tlb,
                         * see zap_install_uffd_wp_if_needed().
                         */
                        WARN_ON_ONCE(!vma_is_anonymous(vma));
-                       rss[mm_counter(page)]--;
+                       rss[mm_counter(folio)]--;
                        if (is_device_private_entry(entry))
                                folio_remove_rmap_pte(folio, page, vma);
                        folio_put(folio);
@@ -1513,10 +1636,10 @@ static unsigned long zap_pte_range(struct mmu_gather *tlb,
                        if (unlikely(!free_swap_and_cache(entry)))
                                print_bad_pte(vma, addr, ptent, NULL);
                } else if (is_migration_entry(entry)) {
-                       page = pfn_swap_entry_to_page(entry);
-                       if (!should_zap_page(details, page))
+                       folio = pfn_swap_entry_folio(entry);
+                       if (!should_zap_folio(details, folio))
                                continue;
-                       rss[mm_counter(page)]--;
+                       rss[mm_counter(folio)]--;
                } else if (pte_marker_entry_uffd_wp(entry)) {
                        /*
                         * For anon: always drop the marker; for file: only
@@ -1535,8 +1658,8 @@ static unsigned long zap_pte_range(struct mmu_gather *tlb,
                        WARN_ON_ONCE(1);
                }
                pte_clear_not_present_full(mm, addr, pte, tlb->fullmm);
-               zap_install_uffd_wp_if_needed(vma, addr, pte, details, ptent);
-       } while (pte++, addr += PAGE_SIZE, addr != end);
+               zap_install_uffd_wp_if_needed(vma, addr, pte, 1, details, ptent);
+       } while (pte += nr, addr += PAGE_SIZE * nr, addr != end);
 
        add_mm_rss_vec(mm, rss);
        arch_leave_lazy_mmu_mode();
@@ -1870,7 +1993,7 @@ static int insert_page_into_pte_locked(struct vm_area_struct *vma, pte_t *pte,
                return -EBUSY;
        /* Ok, finally just insert the thing.. */
        folio_get(folio);
-       inc_mm_counter(vma->vm_mm, mm_counter_file(page));
+       inc_mm_counter(vma->vm_mm, mm_counter_file(folio));
        folio_add_file_rmap_pte(folio, page, vma);
        set_pte_at(vma->vm_mm, addr, pte, mk_pte(page, prot));
        return 0;
@@ -3081,7 +3204,7 @@ static inline vm_fault_t vmf_can_call_fault(const struct vm_fault *vmf)
        return VM_FAULT_RETRY;
 }
 
-static vm_fault_t vmf_anon_prepare(struct vm_fault *vmf)
+vm_fault_t vmf_anon_prepare(struct vm_fault *vmf)
 {
        struct vm_area_struct *vma = vmf->vma;
 
@@ -3175,7 +3298,7 @@ static vm_fault_t wp_page_copy(struct vm_fault *vmf)
        if (likely(vmf->pte && pte_same(ptep_get(vmf->pte), vmf->orig_pte))) {
                if (old_folio) {
                        if (!folio_test_anon(old_folio)) {
-                               dec_mm_counter(mm, mm_counter_file(&old_folio->page));
+                               dec_mm_counter(mm, mm_counter_file(old_folio));
                                inc_mm_counter(mm, MM_ANONPAGES);
                        }
                } else {
@@ -3253,7 +3376,7 @@ static vm_fault_t wp_page_copy(struct vm_fault *vmf)
                folio_put(new_folio);
        if (old_folio) {
                if (page_copied)
-                       free_swap_cache(&old_folio->page);
+                       free_swap_cache(old_folio);
                folio_put(old_folio);
        }
 
@@ -3375,6 +3498,16 @@ static vm_fault_t wp_page_shared(struct vm_fault *vmf, struct folio *folio)
 static bool wp_can_reuse_anon_folio(struct folio *folio,
                                    struct vm_area_struct *vma)
 {
+       /*
+        * We could currently only reuse a subpage of a large folio if no
+        * other subpages of the large folios are still mapped. However,
+        * let's just consistently not reuse subpages even if we could
+        * reuse in that scenario, and give back a large folio a bit
+        * sooner.
+        */
+       if (folio_test_large(folio))
+               return false;
+
        /*
         * We have to verify under folio lock: these early checks are
         * just an optimization to avoid locking the folio and freeing
@@ -4170,8 +4303,8 @@ static bool pte_range_none(pte_t *pte, int nr_pages)
 
 static struct folio *alloc_anon_folio(struct vm_fault *vmf)
 {
-#ifdef CONFIG_TRANSPARENT_HUGEPAGE
        struct vm_area_struct *vma = vmf->vma;
+#ifdef CONFIG_TRANSPARENT_HUGEPAGE
        unsigned long orders;
        struct folio *folio;
        unsigned long addr;
@@ -4223,15 +4356,21 @@ static struct folio *alloc_anon_folio(struct vm_fault *vmf)
                addr = ALIGN_DOWN(vmf->address, PAGE_SIZE << order);
                folio = vma_alloc_folio(gfp, order, vma, addr, true);
                if (folio) {
+                       if (mem_cgroup_charge(folio, vma->vm_mm, gfp)) {
+                               folio_put(folio);
+                               goto next;
+                       }
+                       folio_throttle_swaprate(folio, gfp);
                        clear_huge_page(&folio->page, vmf->address, 1 << order);
                        return folio;
                }
+next:
                order = next_order(&orders, order);
        }
 
 fallback:
 #endif
-       return vma_alloc_zeroed_movable_folio(vmf->vma, vmf->address);
+       return folio_prealloc(vma->vm_mm, vma, vmf->address, true);
 }
 
 /*
@@ -4298,10 +4437,6 @@ static vm_fault_t do_anonymous_page(struct vm_fault *vmf)
        nr_pages = folio_nr_pages(folio);
        addr = ALIGN_DOWN(vmf->address, nr_pages * PAGE_SIZE);
 
-       if (mem_cgroup_charge(folio, vma->vm_mm, GFP_KERNEL))
-               goto oom_free_page;
-       folio_throttle_swaprate(folio, GFP_KERNEL);
-
        /*
         * The memory barrier inside __folio_mark_uptodate makes sure that
         * preceding stores to the page contents become visible before
@@ -4355,8 +4490,6 @@ unlock:
 release:
        folio_put(folio);
        goto unlock;
-oom_free_page:
-       folio_put(folio);
 oom:
        return VM_FAULT_OOM;
 }
@@ -4480,7 +4613,7 @@ vm_fault_t do_set_pmd(struct vm_fault *vmf, struct page *page)
        if (write)
                entry = maybe_pmd_mkwrite(pmd_mkdirty(entry), vma);
 
-       add_mm_counter(vma->vm_mm, mm_counter_file(page), HPAGE_PMD_NR);
+       add_mm_counter(vma->vm_mm, mm_counter_file(folio), HPAGE_PMD_NR);
        folio_add_file_rmap_pmd(folio, page, vma);
 
        /*
@@ -4543,7 +4676,7 @@ void set_pte_range(struct vm_fault *vmf, struct folio *folio,
                folio_add_new_anon_rmap(folio, vma, addr);
                folio_add_lru_vma(folio, vma);
        } else {
-               add_mm_counter(vma->vm_mm, mm_counter_file(page), nr);
+               add_mm_counter(vma->vm_mm, mm_counter_file(folio), nr);
                folio_add_file_rmap_ptes(folio, page, nr, vma);
        }
        set_ptes(vma->vm_mm, addr, vmf->pte, entry, nr);
@@ -4653,7 +4786,8 @@ static int fault_around_bytes_set(void *data, u64 val)
         * The minimum value is 1 page, however this results in no fault-around
         * at all. See should_fault_around().
         */
-       fault_around_pages = max(rounddown_pow_of_two(val) >> PAGE_SHIFT, 1UL);
+       val = max(val, PAGE_SIZE);
+       fault_around_pages = rounddown_pow_of_two(val) >> PAGE_SHIFT;
 
        return 0;
 }
@@ -4928,18 +5062,18 @@ static vm_fault_t do_numa_page(struct vm_fault *vmf)
        int flags = 0;
 
        /*
-        * The "pte" at this point cannot be used safely without
-        * validation through pte_unmap_same(). It's of NUMA type but
-        * the pfn may be screwed if the read is non atomic.
+        * The pte cannot be used safely until we verify, while holding the page
+        * table lock, that its contents have not changed during fault handling.
         */
        spin_lock(vmf->ptl);
-       if (unlikely(!pte_same(ptep_get(vmf->pte), vmf->orig_pte))) {
+       /* Read the live PTE from the page tables: */
+       old_pte = ptep_get(vmf->pte);
+
+       if (unlikely(!pte_same(old_pte, vmf->orig_pte))) {
                pte_unmap_unlock(vmf->pte, vmf->ptl);
                goto out;
        }
 
-       /* Get the normal PTE  */
-       old_pte = ptep_get(vmf->pte);
        pte = pte_modify(old_pte, vma->vm_page_prot);
 
        /*
@@ -6163,7 +6297,7 @@ static int clear_subpage(unsigned long addr, int idx, void *arg)
 {
        struct page *page = arg;
 
-       clear_user_highpage(page + idx, addr);
+       clear_user_highpage(nth_page(page, idx), addr);
        return 0;
 }
 
@@ -6213,10 +6347,11 @@ struct copy_subpage_arg {
 static int copy_subpage(unsigned long addr, int idx, void *arg)
 {
        struct copy_subpage_arg *copy_arg = arg;
+       struct page *dst = nth_page(copy_arg->dst, idx);
+       struct page *src = nth_page(copy_arg->src, idx);
 
-       if (copy_mc_user_highpage(copy_arg->dst + idx, copy_arg->src + idx,
-                                 addr, copy_arg->vma)) {
-               memory_failure_queue(page_to_pfn(copy_arg->src + idx), 0);
+       if (copy_mc_user_highpage(dst, src, addr, copy_arg->vma)) {
+               memory_failure_queue(page_to_pfn(src), 0);
                return -EHWPOISON;
        }
        return 0;
index 21890994c1d3cc6d407143218851ee48287d1114..a444e2d7dd2bffa3ffe496830a76395f21e7767e 100644 (file)
@@ -1087,7 +1087,7 @@ void adjust_present_page_count(struct page *page, struct memory_group *group,
 }
 
 int mhp_init_memmap_on_memory(unsigned long pfn, unsigned long nr_pages,
-                             struct zone *zone)
+                             struct zone *zone, bool mhp_off_inaccessible)
 {
        unsigned long end_pfn = pfn + nr_pages;
        int ret, i;
@@ -1096,6 +1096,15 @@ int mhp_init_memmap_on_memory(unsigned long pfn, unsigned long nr_pages,
        if (ret)
                return ret;
 
+       /*
+        * Memory block is accessible at this stage and hence poison the struct
+        * pages now.  If the memory block is accessible during memory hotplug
+        * addition phase, then page poisining is already performed in
+        * sparse_add_section().
+        */
+       if (mhp_off_inaccessible)
+               page_init_poison(pfn_to_page(pfn), sizeof(struct page) * nr_pages);
+
        move_pfn_range_to_zone(zone, pfn, nr_pages, NULL, MIGRATE_UNMOVABLE);
 
        for (i = 0; i < nr_pages; i++)
@@ -1328,7 +1337,7 @@ static inline bool arch_supports_memmap_on_memory(unsigned long vmemmap_size)
 }
 #endif
 
-static bool mhp_supports_memmap_on_memory(unsigned long size)
+bool mhp_supports_memmap_on_memory(void)
 {
        unsigned long vmemmap_size = memory_block_memmap_size();
        unsigned long memmap_pages = memory_block_memmap_on_memory_pages();
@@ -1337,17 +1346,11 @@ static bool mhp_supports_memmap_on_memory(unsigned long size)
         * Besides having arch support and the feature enabled at runtime, we
         * need a few more assumptions to hold true:
         *
-        * a) We span a single memory block: memory onlining/offlinin;g happens
-        *    in memory block granularity. We don't want the vmemmap of online
-        *    memory blocks to reside on offline memory blocks. In the future,
-        *    we might want to support variable-sized memory blocks to make the
-        *    feature more versatile.
-        *
-        * b) The vmemmap pages span complete PMDs: We don't want vmemmap code
+        * a) The vmemmap pages span complete PMDs: We don't want vmemmap code
         *    to populate memory from the altmap for unrelated parts (i.e.,
         *    other memory blocks)
         *
-        * c) The vmemmap pages (and thereby the pages that will be exposed to
+        * b) The vmemmap pages (and thereby the pages that will be exposed to
         *    the buddy) have to cover full pageblocks: memory onlining/offlining
         *    code requires applicable ranges to be page-aligned, for example, to
         *    set the migratetypes properly.
@@ -1359,7 +1362,7 @@ static bool mhp_supports_memmap_on_memory(unsigned long size)
         *       altmap as an alternative source of memory, and we do not exactly
         *       populate a single PMD.
         */
-       if (!mhp_memmap_on_memory() || size != memory_block_size_bytes())
+       if (!mhp_memmap_on_memory())
                return false;
 
        /*
@@ -1382,6 +1385,7 @@ static bool mhp_supports_memmap_on_memory(unsigned long size)
 
        return arch_supports_memmap_on_memory(vmemmap_size);
 }
+EXPORT_SYMBOL_GPL(mhp_supports_memmap_on_memory);
 
 static void __ref remove_memory_blocks_and_altmaps(u64 start, u64 size)
 {
@@ -1415,7 +1419,7 @@ static void __ref remove_memory_blocks_and_altmaps(u64 start, u64 size)
 }
 
 static int create_altmaps_and_memory_blocks(int nid, struct memory_group *group,
-                                           u64 start, u64 size)
+                                           u64 start, u64 size, mhp_t mhp_flags)
 {
        unsigned long memblock_size = memory_block_size_bytes();
        u64 cur_start;
@@ -1431,6 +1435,8 @@ static int create_altmaps_and_memory_blocks(int nid, struct memory_group *group,
                };
 
                mhp_altmap.free = memory_block_memmap_on_memory_pages();
+               if (mhp_flags & MHP_OFFLINE_INACCESSIBLE)
+                       mhp_altmap.inaccessible = true;
                params.altmap = kmemdup(&mhp_altmap, sizeof(struct vmem_altmap),
                                        GFP_KERNEL);
                if (!params.altmap) {
@@ -1515,8 +1521,8 @@ int __ref add_memory_resource(int nid, struct resource *res, mhp_t mhp_flags)
         * Self hosted memmap array
         */
        if ((mhp_flags & MHP_MEMMAP_ON_MEMORY) &&
-           mhp_supports_memmap_on_memory(memory_block_size_bytes())) {
-               ret = create_altmaps_and_memory_blocks(nid, group, start, size);
+           mhp_supports_memmap_on_memory()) {
+               ret = create_altmaps_and_memory_blocks(nid, group, start, size, mhp_flags);
                if (ret)
                        goto error;
        } else {
index 10a590ee1c89974c353a28aa0c4eb393a375e31b..0fe77738d971dec2b586bb78fb3c072159b9ba05 100644 (file)
  *                for anonymous memory. For process policy an process counter
  *                is used.
  *
+ * weighted interleave
+ *                Allocate memory interleaved over a set of nodes based on
+ *                a set of weights (per-node), with normal fallback if it
+ *                fails.  Otherwise operates the same as interleave.
+ *                Example: nodeset(0,1) & weights (2,1) - 2 pages allocated
+ *                on node 0 for every 1 page allocated on node 1.
+ *
  * bind           Only allocate memory on a specific set of nodes,
  *                no fallback.
  *                FIXME: memory is allocated starting with the first node
@@ -131,6 +138,32 @@ static struct mempolicy default_policy = {
 
 static struct mempolicy preferred_node_policy[MAX_NUMNODES];
 
+/*
+ * iw_table is the sysfs-set interleave weight table, a value of 0 denotes
+ * system-default value should be used. A NULL iw_table also denotes that
+ * system-default values should be used. Until the system-default table
+ * is implemented, the system-default is always 1.
+ *
+ * iw_table is RCU protected
+ */
+static u8 __rcu *iw_table;
+static DEFINE_MUTEX(iw_table_lock);
+
+static u8 get_il_weight(int node)
+{
+       u8 *table;
+       u8 weight;
+
+       rcu_read_lock();
+       table = rcu_dereference(iw_table);
+       /* if no iw_table, use system default */
+       weight = table ? table[node] : 1;
+       /* if value in iw_table is 0, use system default */
+       weight = weight ? weight : 1;
+       rcu_read_unlock();
+       return weight;
+}
+
 /**
  * numa_nearest_node - Find nearest node by state
  * @node: Node id to start the search
@@ -415,6 +448,10 @@ static const struct mempolicy_operations mpol_ops[MPOL_MAX] = {
                .create = mpol_new_nodemask,
                .rebind = mpol_rebind_preferred,
        },
+       [MPOL_WEIGHTED_INTERLEAVE] = {
+               .create = mpol_new_nodemask,
+               .rebind = mpol_rebind_nodemask,
+       },
 };
 
 static bool migrate_folio_add(struct folio *folio, struct list_head *foliolist,
@@ -654,7 +691,6 @@ static int queue_pages_test_walk(unsigned long start, unsigned long end,
 {
        struct vm_area_struct *next, *vma = walk->vma;
        struct queue_pages *qp = walk->private;
-       unsigned long endvma = vma->vm_end;
        unsigned long flags = qp->flags;
 
        /* range check first */
@@ -682,9 +718,6 @@ static int queue_pages_test_walk(unsigned long start, unsigned long end,
            !(flags & MPOL_MF_STRICT))
                return 1;
 
-       if (endvma > end)
-               endvma = end;
-
        /*
         * Check page nodes, and queue pages to move, in the current vma.
         * But if no moving, and no strict checking, the scan can be skipped.
@@ -836,8 +869,11 @@ static long do_set_mempolicy(unsigned short mode, unsigned short flags,
 
        old = current->mempolicy;
        current->mempolicy = new;
-       if (new && new->mode == MPOL_INTERLEAVE)
+       if (new && (new->mode == MPOL_INTERLEAVE ||
+                   new->mode == MPOL_WEIGHTED_INTERLEAVE)) {
                current->il_prev = MAX_NUMNODES-1;
+               current->il_weight = 0;
+       }
        task_unlock(current);
        mpol_put(old);
        ret = 0;
@@ -862,6 +898,7 @@ static void get_policy_nodemask(struct mempolicy *pol, nodemask_t *nodes)
        case MPOL_INTERLEAVE:
        case MPOL_PREFERRED:
        case MPOL_PREFERRED_MANY:
+       case MPOL_WEIGHTED_INTERLEAVE:
                *nodes = pol->nodes;
                break;
        case MPOL_LOCAL:
@@ -946,6 +983,13 @@ static long do_get_mempolicy(int *policy, nodemask_t *nmask,
                } else if (pol == current->mempolicy &&
                                pol->mode == MPOL_INTERLEAVE) {
                        *policy = next_node_in(current->il_prev, pol->nodes);
+               } else if (pol == current->mempolicy &&
+                               pol->mode == MPOL_WEIGHTED_INTERLEAVE) {
+                       if (current->il_weight)
+                               *policy = current->il_prev;
+                       else
+                               *policy = next_node_in(current->il_prev,
+                                                      pol->nodes);
                } else {
                        err = -EINVAL;
                        goto out;
@@ -1310,30 +1354,32 @@ static long do_mbind(unsigned long start, unsigned long len,
                 * VMAs, the nodes will still be interleaved from the targeted
                 * nodemask, but one by one may be selected differently.
                 */
-               if (new->mode == MPOL_INTERLEAVE) {
-                       struct page *page;
+               if (new->mode == MPOL_INTERLEAVE ||
+                   new->mode == MPOL_WEIGHTED_INTERLEAVE) {
+                       struct folio *folio;
                        unsigned int order;
                        unsigned long addr = -EFAULT;
 
-                       list_for_each_entry(page, &pagelist, lru) {
-                               if (!PageKsm(page))
+                       list_for_each_entry(folio, &pagelist, lru) {
+                               if (!folio_test_ksm(folio))
                                        break;
                        }
-                       if (!list_entry_is_head(page, &pagelist, lru)) {
+                       if (!list_entry_is_head(folio, &pagelist, lru)) {
                                vma_iter_init(&vmi, mm, start);
                                for_each_vma_range(vmi, vma, end) {
-                                       addr = page_address_in_vma(page, vma);
+                                       addr = page_address_in_vma(
+                                               folio_page(folio, 0), vma);
                                        if (addr != -EFAULT)
                                                break;
                                }
                        }
                        if (addr != -EFAULT) {
-                               order = compound_order(page);
+                               order = folio_order(folio);
                                /* We already know the pol, but not the ilx */
                                mpol_cond_put(get_vma_policy(vma, addr, order,
                                                             &mmpol.ilx));
                                /* Set base from which to increment by index */
-                               mmpol.ilx -= page->index >> order;
+                               mmpol.ilx -= folio->index >> order;
                        }
                }
        }
@@ -1758,7 +1804,8 @@ struct mempolicy *__get_vma_policy(struct vm_area_struct *vma,
  * @vma: virtual memory area whose policy is sought
  * @addr: address in @vma for shared policy lookup
  * @order: 0, or appropriate huge_page_order for interleaving
- * @ilx: interleave index (output), for use only when MPOL_INTERLEAVE
+ * @ilx: interleave index (output), for use only when MPOL_INTERLEAVE or
+ *       MPOL_WEIGHTED_INTERLEAVE
  *
  * Returns effective policy for a VMA at specified address.
  * Falls back to current->mempolicy or system default policy, as necessary.
@@ -1775,7 +1822,8 @@ struct mempolicy *get_vma_policy(struct vm_area_struct *vma,
        pol = __get_vma_policy(vma, addr, ilx);
        if (!pol)
                pol = get_task_policy(current);
-       if (pol->mode == MPOL_INTERLEAVE) {
+       if (pol->mode == MPOL_INTERLEAVE ||
+           pol->mode == MPOL_WEIGHTED_INTERLEAVE) {
                *ilx += vma->vm_pgoff >> order;
                *ilx += (addr - vma->vm_start) >> (PAGE_SHIFT + order);
        }
@@ -1825,12 +1873,40 @@ bool apply_policy_zone(struct mempolicy *policy, enum zone_type zone)
        return zone >= dynamic_policy_zone;
 }
 
+static unsigned int weighted_interleave_nodes(struct mempolicy *policy)
+{
+       unsigned int node;
+       unsigned int cpuset_mems_cookie;
+
+retry:
+       /* to prevent miscount use tsk->mems_allowed_seq to detect rebind */
+       cpuset_mems_cookie = read_mems_allowed_begin();
+       node = current->il_prev;
+       if (!current->il_weight || !node_isset(node, policy->nodes)) {
+               node = next_node_in(node, policy->nodes);
+               if (read_mems_allowed_retry(cpuset_mems_cookie))
+                       goto retry;
+               if (node == MAX_NUMNODES)
+                       return node;
+               current->il_prev = node;
+               current->il_weight = get_il_weight(node);
+       }
+       current->il_weight--;
+       return node;
+}
+
 /* Do dynamic interleaving for a process */
 static unsigned int interleave_nodes(struct mempolicy *policy)
 {
        unsigned int nid;
+       unsigned int cpuset_mems_cookie;
+
+       /* to prevent miscount, use tsk->mems_allowed_seq to detect rebind */
+       do {
+               cpuset_mems_cookie = read_mems_allowed_begin();
+               nid = next_node_in(current->il_prev, policy->nodes);
+       } while (read_mems_allowed_retry(cpuset_mems_cookie));
 
-       nid = next_node_in(current->il_prev, policy->nodes);
        if (nid < MAX_NUMNODES)
                current->il_prev = nid;
        return nid;
@@ -1859,6 +1935,9 @@ unsigned int mempolicy_slab_node(void)
        case MPOL_INTERLEAVE:
                return interleave_nodes(policy);
 
+       case MPOL_WEIGHTED_INTERLEAVE:
+               return weighted_interleave_nodes(policy);
+
        case MPOL_BIND:
        case MPOL_PREFERRED_MANY:
        {
@@ -1883,6 +1962,59 @@ unsigned int mempolicy_slab_node(void)
        }
 }
 
+static unsigned int read_once_policy_nodemask(struct mempolicy *pol,
+                                             nodemask_t *mask)
+{
+       /*
+        * barrier stabilizes the nodemask locally so that it can be iterated
+        * over safely without concern for changes. Allocators validate node
+        * selection does not violate mems_allowed, so this is safe.
+        */
+       barrier();
+       memcpy(mask, &pol->nodes, sizeof(nodemask_t));
+       barrier();
+       return nodes_weight(*mask);
+}
+
+static unsigned int weighted_interleave_nid(struct mempolicy *pol, pgoff_t ilx)
+{
+       nodemask_t nodemask;
+       unsigned int target, nr_nodes;
+       u8 *table;
+       unsigned int weight_total = 0;
+       u8 weight;
+       int nid;
+
+       nr_nodes = read_once_policy_nodemask(pol, &nodemask);
+       if (!nr_nodes)
+               return numa_node_id();
+
+       rcu_read_lock();
+       table = rcu_dereference(iw_table);
+       /* calculate the total weight */
+       for_each_node_mask(nid, nodemask) {
+               /* detect system default usage */
+               weight = table ? table[nid] : 1;
+               weight = weight ? weight : 1;
+               weight_total += weight;
+       }
+
+       /* Calculate the node offset based on totals */
+       target = ilx % weight_total;
+       nid = first_node(nodemask);
+       while (target) {
+               /* detect system default usage */
+               weight = table ? table[nid] : 1;
+               weight = weight ? weight : 1;
+               if (target < weight)
+                       break;
+               target -= weight;
+               nid = next_node_in(nid, nodemask);
+       }
+       rcu_read_unlock();
+       return nid;
+}
+
 /*
  * Do static interleaving for interleave index @ilx.  Returns the ilx'th
  * node in pol->nodes (starting from ilx=0), wrapping around if ilx
@@ -1890,20 +2022,12 @@ unsigned int mempolicy_slab_node(void)
  */
 static unsigned int interleave_nid(struct mempolicy *pol, pgoff_t ilx)
 {
-       nodemask_t nodemask = pol->nodes;
+       nodemask_t nodemask;
        unsigned int target, nnodes;
        int i;
        int nid;
-       /*
-        * The barrier will stabilize the nodemask in a register or on
-        * the stack so that it will stop changing under the code.
-        *
-        * Between first_node() and next_node(), pol->nodes could be changed
-        * by other threads. So we put pol->nodes in a local stack.
-        */
-       barrier();
 
-       nnodes = nodes_weight(nodemask);
+       nnodes = read_once_policy_nodemask(pol, &nodemask);
        if (!nnodes)
                return numa_node_id();
        target = ilx % nnodes;
@@ -1951,6 +2075,11 @@ static nodemask_t *policy_nodemask(gfp_t gfp, struct mempolicy *pol,
                *nid = (ilx == NO_INTERLEAVE_INDEX) ?
                        interleave_nodes(pol) : interleave_nid(pol, ilx);
                break;
+       case MPOL_WEIGHTED_INTERLEAVE:
+               *nid = (ilx == NO_INTERLEAVE_INDEX) ?
+                       weighted_interleave_nodes(pol) :
+                       weighted_interleave_nid(pol, ilx);
+               break;
        }
 
        return nodemask;
@@ -2012,6 +2141,7 @@ bool init_nodemask_of_mempolicy(nodemask_t *mask)
        case MPOL_PREFERRED_MANY:
        case MPOL_BIND:
        case MPOL_INTERLEAVE:
+       case MPOL_WEIGHTED_INTERLEAVE:
                *mask = mempolicy->nodes;
                break;
 
@@ -2112,6 +2242,7 @@ struct page *alloc_pages_mpol(gfp_t gfp, unsigned int order,
                 * node in its nodemask, we allocate the standard way.
                 */
                if (pol->mode != MPOL_INTERLEAVE &&
+                   pol->mode != MPOL_WEIGHTED_INTERLEAVE &&
                    (!nodemask || node_isset(nid, *nodemask))) {
                        /*
                         * First, try to allocate THP only on local node, but
@@ -2247,6 +2378,121 @@ static unsigned long alloc_pages_bulk_array_interleave(gfp_t gfp,
        return total_allocated;
 }
 
+static unsigned long alloc_pages_bulk_array_weighted_interleave(gfp_t gfp,
+               struct mempolicy *pol, unsigned long nr_pages,
+               struct page **page_array)
+{
+       struct task_struct *me = current;
+       unsigned int cpuset_mems_cookie;
+       unsigned long total_allocated = 0;
+       unsigned long nr_allocated = 0;
+       unsigned long rounds;
+       unsigned long node_pages, delta;
+       u8 *table, *weights, weight;
+       unsigned int weight_total = 0;
+       unsigned long rem_pages = nr_pages;
+       nodemask_t nodes;
+       int nnodes, node;
+       int resume_node = MAX_NUMNODES - 1;
+       u8 resume_weight = 0;
+       int prev_node;
+       int i;
+
+       if (!nr_pages)
+               return 0;
+
+       /* read the nodes onto the stack, retry if done during rebind */
+       do {
+               cpuset_mems_cookie = read_mems_allowed_begin();
+               nnodes = read_once_policy_nodemask(pol, &nodes);
+       } while (read_mems_allowed_retry(cpuset_mems_cookie));
+
+       /* if the nodemask has become invalid, we cannot do anything */
+       if (!nnodes)
+               return 0;
+
+       /* Continue allocating from most recent node and adjust the nr_pages */
+       node = me->il_prev;
+       weight = me->il_weight;
+       if (weight && node_isset(node, nodes)) {
+               node_pages = min(rem_pages, weight);
+               nr_allocated = __alloc_pages_bulk(gfp, node, NULL, node_pages,
+                                                 NULL, page_array);
+               page_array += nr_allocated;
+               total_allocated += nr_allocated;
+               /* if that's all the pages, no need to interleave */
+               if (rem_pages <= weight) {
+                       me->il_weight -= rem_pages;
+                       return total_allocated;
+               }
+               /* Otherwise we adjust remaining pages, continue from there */
+               rem_pages -= weight;
+       }
+       /* clear active weight in case of an allocation failure */
+       me->il_weight = 0;
+       prev_node = node;
+
+       /* create a local copy of node weights to operate on outside rcu */
+       weights = kzalloc(nr_node_ids, GFP_KERNEL);
+       if (!weights)
+               return total_allocated;
+
+       rcu_read_lock();
+       table = rcu_dereference(iw_table);
+       if (table)
+               memcpy(weights, table, nr_node_ids);
+       rcu_read_unlock();
+
+       /* calculate total, detect system default usage */
+       for_each_node_mask(node, nodes) {
+               if (!weights[node])
+                       weights[node] = 1;
+               weight_total += weights[node];
+       }
+
+       /*
+        * Calculate rounds/partial rounds to minimize __alloc_pages_bulk calls.
+        * Track which node weighted interleave should resume from.
+        *
+        * if (rounds > 0) and (delta == 0), resume_node will always be
+        * the node following prev_node and its weight.
+        */
+       rounds = rem_pages / weight_total;
+       delta = rem_pages % weight_total;
+       resume_node = next_node_in(prev_node, nodes);
+       resume_weight = weights[resume_node];
+       for (i = 0; i < nnodes; i++) {
+               node = next_node_in(prev_node, nodes);
+               weight = weights[node];
+               node_pages = weight * rounds;
+               /* If a delta exists, add this node's portion of the delta */
+               if (delta > weight) {
+                       node_pages += weight;
+                       delta -= weight;
+               } else if (delta) {
+                       /* when delta is depleted, resume from that node */
+                       node_pages += delta;
+                       resume_node = node;
+                       resume_weight = weight - delta;
+                       delta = 0;
+               }
+               /* node_pages can be 0 if an allocation fails and rounds == 0 */
+               if (!node_pages)
+                       break;
+               nr_allocated = __alloc_pages_bulk(gfp, node, NULL, node_pages,
+                                                 NULL, page_array);
+               page_array += nr_allocated;
+               total_allocated += nr_allocated;
+               if (total_allocated == nr_pages)
+                       break;
+               prev_node = node;
+       }
+       me->il_prev = resume_node;
+       me->il_weight = resume_weight;
+       kfree(weights);
+       return total_allocated;
+}
+
 static unsigned long alloc_pages_bulk_array_preferred_many(gfp_t gfp, int nid,
                struct mempolicy *pol, unsigned long nr_pages,
                struct page **page_array)
@@ -2287,6 +2533,10 @@ unsigned long alloc_pages_bulk_array_mempolicy(gfp_t gfp,
                return alloc_pages_bulk_array_interleave(gfp, pol,
                                                         nr_pages, page_array);
 
+       if (pol->mode == MPOL_WEIGHTED_INTERLEAVE)
+               return alloc_pages_bulk_array_weighted_interleave(
+                                 gfp, pol, nr_pages, page_array);
+
        if (pol->mode == MPOL_PREFERRED_MANY)
                return alloc_pages_bulk_array_preferred_many(gfp,
                                numa_node_id(), pol, nr_pages, page_array);
@@ -2362,6 +2612,7 @@ bool __mpol_equal(struct mempolicy *a, struct mempolicy *b)
        case MPOL_INTERLEAVE:
        case MPOL_PREFERRED:
        case MPOL_PREFERRED_MANY:
+       case MPOL_WEIGHTED_INTERLEAVE:
                return !!nodes_equal(a->nodes, b->nodes);
        case MPOL_LOCAL:
                return true;
@@ -2498,6 +2749,10 @@ int mpol_misplaced(struct folio *folio, struct vm_area_struct *vma,
                polnid = interleave_nid(pol, ilx);
                break;
 
+       case MPOL_WEIGHTED_INTERLEAVE:
+               polnid = weighted_interleave_nid(pol, ilx);
+               break;
+
        case MPOL_PREFERRED:
                if (node_isset(curnid, pol->nodes))
                        goto out;
@@ -2872,6 +3127,7 @@ static const char * const policy_modes[] =
        [MPOL_PREFERRED]  = "prefer",
        [MPOL_BIND]       = "bind",
        [MPOL_INTERLEAVE] = "interleave",
+       [MPOL_WEIGHTED_INTERLEAVE] = "weighted interleave",
        [MPOL_LOCAL]      = "local",
        [MPOL_PREFERRED_MANY]  = "prefer (many)",
 };
@@ -2931,6 +3187,7 @@ int mpol_parse_str(char *str, struct mempolicy **mpol)
                }
                break;
        case MPOL_INTERLEAVE:
+       case MPOL_WEIGHTED_INTERLEAVE:
                /*
                 * Default to online nodes with memory if no nodelist
                 */
@@ -3041,6 +3298,7 @@ void mpol_to_str(char *buffer, int maxlen, struct mempolicy *pol)
        case MPOL_PREFERRED_MANY:
        case MPOL_BIND:
        case MPOL_INTERLEAVE:
+       case MPOL_WEIGHTED_INTERLEAVE:
                nodes = pol->nodes;
                break;
        default:
@@ -3067,3 +3325,200 @@ void mpol_to_str(char *buffer, int maxlen, struct mempolicy *pol)
                p += scnprintf(p, buffer + maxlen - p, ":%*pbl",
                               nodemask_pr_args(&nodes));
 }
+
+#ifdef CONFIG_SYSFS
+struct iw_node_attr {
+       struct kobj_attribute kobj_attr;
+       int nid;
+};
+
+static ssize_t node_show(struct kobject *kobj, struct kobj_attribute *attr,
+                        char *buf)
+{
+       struct iw_node_attr *node_attr;
+       u8 weight;
+
+       node_attr = container_of(attr, struct iw_node_attr, kobj_attr);
+       weight = get_il_weight(node_attr->nid);
+       return sysfs_emit(buf, "%d\n", weight);
+}
+
+static ssize_t node_store(struct kobject *kobj, struct kobj_attribute *attr,
+                         const char *buf, size_t count)
+{
+       struct iw_node_attr *node_attr;
+       u8 *new;
+       u8 *old;
+       u8 weight = 0;
+
+       node_attr = container_of(attr, struct iw_node_attr, kobj_attr);
+       if (count == 0 || sysfs_streq(buf, ""))
+               weight = 0;
+       else if (kstrtou8(buf, 0, &weight))
+               return -EINVAL;
+
+       new = kzalloc(nr_node_ids, GFP_KERNEL);
+       if (!new)
+               return -ENOMEM;
+
+       mutex_lock(&iw_table_lock);
+       old = rcu_dereference_protected(iw_table,
+                                       lockdep_is_held(&iw_table_lock));
+       if (old)
+               memcpy(new, old, nr_node_ids);
+       new[node_attr->nid] = weight;
+       rcu_assign_pointer(iw_table, new);
+       mutex_unlock(&iw_table_lock);
+       synchronize_rcu();
+       kfree(old);
+       return count;
+}
+
+static struct iw_node_attr **node_attrs;
+
+static void sysfs_wi_node_release(struct iw_node_attr *node_attr,
+                                 struct kobject *parent)
+{
+       if (!node_attr)
+               return;
+       sysfs_remove_file(parent, &node_attr->kobj_attr.attr);
+       kfree(node_attr->kobj_attr.attr.name);
+       kfree(node_attr);
+}
+
+static void sysfs_wi_release(struct kobject *wi_kobj)
+{
+       int i;
+
+       for (i = 0; i < nr_node_ids; i++)
+               sysfs_wi_node_release(node_attrs[i], wi_kobj);
+       kobject_put(wi_kobj);
+}
+
+static const struct kobj_type wi_ktype = {
+       .sysfs_ops = &kobj_sysfs_ops,
+       .release = sysfs_wi_release,
+};
+
+static int add_weight_node(int nid, struct kobject *wi_kobj)
+{
+       struct iw_node_attr *node_attr;
+       char *name;
+
+       node_attr = kzalloc(sizeof(*node_attr), GFP_KERNEL);
+       if (!node_attr)
+               return -ENOMEM;
+
+       name = kasprintf(GFP_KERNEL, "node%d", nid);
+       if (!name) {
+               kfree(node_attr);
+               return -ENOMEM;
+       }
+
+       sysfs_attr_init(&node_attr->kobj_attr.attr);
+       node_attr->kobj_attr.attr.name = name;
+       node_attr->kobj_attr.attr.mode = 0644;
+       node_attr->kobj_attr.show = node_show;
+       node_attr->kobj_attr.store = node_store;
+       node_attr->nid = nid;
+
+       if (sysfs_create_file(wi_kobj, &node_attr->kobj_attr.attr)) {
+               kfree(node_attr->kobj_attr.attr.name);
+               kfree(node_attr);
+               pr_err("failed to add attribute to weighted_interleave\n");
+               return -ENOMEM;
+       }
+
+       node_attrs[nid] = node_attr;
+       return 0;
+}
+
+static int add_weighted_interleave_group(struct kobject *root_kobj)
+{
+       struct kobject *wi_kobj;
+       int nid, err;
+
+       wi_kobj = kzalloc(sizeof(struct kobject), GFP_KERNEL);
+       if (!wi_kobj)
+               return -ENOMEM;
+
+       err = kobject_init_and_add(wi_kobj, &wi_ktype, root_kobj,
+                                  "weighted_interleave");
+       if (err) {
+               kfree(wi_kobj);
+               return err;
+       }
+
+       for_each_node_state(nid, N_POSSIBLE) {
+               err = add_weight_node(nid, wi_kobj);
+               if (err) {
+                       pr_err("failed to add sysfs [node%d]\n", nid);
+                       break;
+               }
+       }
+       if (err)
+               kobject_put(wi_kobj);
+       return 0;
+}
+
+static void mempolicy_kobj_release(struct kobject *kobj)
+{
+       u8 *old;
+
+       mutex_lock(&iw_table_lock);
+       old = rcu_dereference_protected(iw_table,
+                                       lockdep_is_held(&iw_table_lock));
+       rcu_assign_pointer(iw_table, NULL);
+       mutex_unlock(&iw_table_lock);
+       synchronize_rcu();
+       kfree(old);
+       kfree(node_attrs);
+       kfree(kobj);
+}
+
+static const struct kobj_type mempolicy_ktype = {
+       .release = mempolicy_kobj_release
+};
+
+static int __init mempolicy_sysfs_init(void)
+{
+       int err;
+       static struct kobject *mempolicy_kobj;
+
+       mempolicy_kobj = kzalloc(sizeof(*mempolicy_kobj), GFP_KERNEL);
+       if (!mempolicy_kobj) {
+               err = -ENOMEM;
+               goto err_out;
+       }
+
+       node_attrs = kcalloc(nr_node_ids, sizeof(struct iw_node_attr *),
+                            GFP_KERNEL);
+       if (!node_attrs) {
+               err = -ENOMEM;
+               goto mempol_out;
+       }
+
+       err = kobject_init_and_add(mempolicy_kobj, &mempolicy_ktype, mm_kobj,
+                                  "mempolicy");
+       if (err)
+               goto node_out;
+
+       err = add_weighted_interleave_group(mempolicy_kobj);
+       if (err) {
+               pr_err("mempolicy sysfs structure failed to initialize\n");
+               kobject_put(mempolicy_kobj);
+               return err;
+       }
+
+       return err;
+node_out:
+       kfree(node_attrs);
+mempol_out:
+       kfree(mempolicy_kobj);
+err_out:
+       pr_err("failed to add mempolicy kobject to the system\n");
+       return err;
+}
+
+late_initcall(mempolicy_sysfs_init);
+#endif /* CONFIG_SYSFS */
index dbbf0e9fb42467911516c3904c63829164cce9c3..076c736f5f1ff8df1a9450736f5e061e073f6257 100644 (file)
@@ -590,6 +590,19 @@ void mempool_kfree(void *element, void *pool_data)
 }
 EXPORT_SYMBOL(mempool_kfree);
 
+void *mempool_kvmalloc(gfp_t gfp_mask, void *pool_data)
+{
+       size_t size = (size_t)pool_data;
+       return kvmalloc(size, gfp_mask);
+}
+EXPORT_SYMBOL(mempool_kvmalloc);
+
+void mempool_kvfree(void *element, void *pool_data)
+{
+       kvfree(element);
+}
+EXPORT_SYMBOL(mempool_kvfree);
+
 /*
  * A simple mempool-backed page allocator that allocates pages
  * of the order specified by pool_data.
index 32f3e9dda8370f9967988886162434b40fb2d18c..c2c609c3911994a8a877b16a6194860e573f8331 100644 (file)
@@ -51,10 +51,10 @@ static void __init memtest(u64 pattern, phys_addr_t start_phys, phys_addr_t size
        last_bad = 0;
 
        for (p = start; p < end; p++)
-               *p = pattern;
+               WRITE_ONCE(*p, pattern);
 
        for (p = start; p < end; p++, start_phys_aligned += incr) {
-               if (*p == pattern)
+               if (READ_ONCE(*p) == pattern)
                        continue;
                if (start_phys_aligned == last_bad + incr) {
                        last_bad += incr;
index c27b1f8097d4a72e569ce5a06be42b93184e9db0..73a052a382f13a21bd72e23fb5996ae07c3022d3 100644 (file)
@@ -211,14 +211,17 @@ static bool remove_migration_pte(struct folio *folio,
                folio_get(folio);
                pte = mk_pte(new, READ_ONCE(vma->vm_page_prot));
                old_pte = ptep_get(pvmw.pte);
-               if (pte_swp_soft_dirty(old_pte))
-                       pte = pte_mksoft_dirty(pte);
 
                entry = pte_to_swp_entry(old_pte);
                if (!is_migration_entry_young(entry))
                        pte = pte_mkold(pte);
                if (folio_test_dirty(folio) && is_migration_entry_dirty(entry))
                        pte = pte_mkdirty(pte);
+               if (pte_swp_soft_dirty(old_pte))
+                       pte = pte_mksoft_dirty(pte);
+               else
+                       pte = pte_clear_soft_dirty(pte);
+
                if (is_writable_migration_entry(entry))
                        pte = pte_mkwrite(pte, vma);
                else if (pte_swp_uffd_wp(old_pte))
index 086546ac5766db712752fa79a9e8dabb1a81cc71..1ed2f2ab37cd18a08fb1b4069c4aee372a30b934 100644 (file)
@@ -206,8 +206,7 @@ static void mlock_folio_batch(struct folio_batch *fbatch)
 
        if (lruvec)
                unlock_page_lruvec_irq(lruvec);
-       folios_put(fbatch->folios, folio_batch_count(fbatch));
-       folio_batch_reinit(fbatch);
+       folios_put(fbatch);
 }
 
 void mlock_drain_local(void)
index 2c19f5515e36c47eb3bb23f02d9c7c2c1c646393..549e76af8f82a8ed2fea2e1439e96f06f7f25b19 100644 (file)
@@ -2231,6 +2231,7 @@ static int __init deferred_init_memmap(void *data)
                        .align       = PAGES_PER_SECTION,
                        .min_chunk   = PAGES_PER_SECTION,
                        .max_threads = max_threads,
+                       .numa_aware  = false,
                };
 
                padata_do_multithreaded(&job);
index 3281287771c9c6100ebefde692bca06e247ae0f8..04da02114c6f992e251f6b52fa10482286947798 100644 (file)
--- a/mm/mmap.c
+++ b/mm/mmap.c
@@ -105,7 +105,7 @@ void vma_set_page_prot(struct vm_area_struct *vma)
  * Requires inode->i_mapping->i_mmap_rwsem
  */
 static void __remove_shared_vm_struct(struct vm_area_struct *vma,
-               struct file *file, struct address_space *mapping)
+                                     struct address_space *mapping)
 {
        if (vma_is_shared_maywrite(vma))
                mapping_unmap_writable(mapping);
@@ -126,7 +126,7 @@ void unlink_file_vma(struct vm_area_struct *vma)
        if (file) {
                struct address_space *mapping = file->f_mapping;
                i_mmap_lock_write(mapping);
-               __remove_shared_vm_struct(vma, file, mapping);
+               __remove_shared_vm_struct(vma, mapping);
                i_mmap_unlock_write(mapping);
        }
 }
@@ -392,26 +392,30 @@ static void __vma_link_file(struct vm_area_struct *vma,
        flush_dcache_mmap_unlock(mapping);
 }
 
+static void vma_link_file(struct vm_area_struct *vma)
+{
+       struct file *file = vma->vm_file;
+       struct address_space *mapping;
+
+       if (file) {
+               mapping = file->f_mapping;
+               i_mmap_lock_write(mapping);
+               __vma_link_file(vma, mapping);
+               i_mmap_unlock_write(mapping);
+       }
+}
+
 static int vma_link(struct mm_struct *mm, struct vm_area_struct *vma)
 {
        VMA_ITERATOR(vmi, mm, 0);
-       struct address_space *mapping = NULL;
 
        vma_iter_config(&vmi, vma->vm_start, vma->vm_end);
        if (vma_iter_prealloc(&vmi, vma))
                return -ENOMEM;
 
        vma_start_write(vma);
-
        vma_iter_store(&vmi, vma);
-
-       if (vma->vm_file) {
-               mapping = vma->vm_file->f_mapping;
-               i_mmap_lock_write(mapping);
-               __vma_link_file(vma, mapping);
-               i_mmap_unlock_write(mapping);
-       }
-
+       vma_link_file(vma);
        mm->map_count++;
        validate_mm(mm);
        return 0;
@@ -519,10 +523,9 @@ static inline void vma_complete(struct vma_prepare *vp,
        }
 
        if (vp->remove && vp->file) {
-               __remove_shared_vm_struct(vp->remove, vp->file, vp->mapping);
+               __remove_shared_vm_struct(vp->remove, vp->mapping);
                if (vp->remove2)
-                       __remove_shared_vm_struct(vp->remove2, vp->file,
-                                                 vp->mapping);
+                       __remove_shared_vm_struct(vp->remove2, vp->mapping);
        } else if (vp->insert) {
                /*
                 * split_vma has split insert from vma, and needs
@@ -660,9 +663,7 @@ int vma_expand(struct vma_iterator *vmi, struct vm_area_struct *vma,
 
        vma_prepare(&vp);
        vma_adjust_trans_huge(vma, start, end, 0);
-       vma->vm_start = start;
-       vma->vm_end = end;
-       vma->vm_pgoff = pgoff;
+       vma_set_range(vma, start, end, pgoff);
        vma_iter_store(vmi, vma);
 
        vma_complete(&vp, vmi, vma->vm_mm);
@@ -705,9 +706,7 @@ int vma_shrink(struct vma_iterator *vmi, struct vm_area_struct *vma,
        vma_adjust_trans_huge(vma, start, end, 0);
 
        vma_iter_clear(vmi);
-       vma->vm_start = start;
-       vma->vm_end = end;
-       vma->vm_pgoff = pgoff;
+       vma_set_range(vma, start, end, pgoff);
        vma_complete(&vp, vmi, vma->vm_mm);
        return 0;
 }
@@ -861,13 +860,15 @@ can_vma_merge_after(struct vm_area_struct *vma, unsigned long vm_flags,
  *      area is returned, or the function will return NULL
  */
 static struct vm_area_struct
-*vma_merge(struct vma_iterator *vmi, struct mm_struct *mm,
-          struct vm_area_struct *prev, unsigned long addr, unsigned long end,
-          unsigned long vm_flags, struct anon_vma *anon_vma, struct file *file,
-          pgoff_t pgoff, struct mempolicy *policy,
+*vma_merge(struct vma_iterator *vmi, struct vm_area_struct *prev,
+          struct vm_area_struct *src, unsigned long addr, unsigned long end,
+          unsigned long vm_flags, pgoff_t pgoff, struct mempolicy *policy,
           struct vm_userfaultfd_ctx vm_userfaultfd_ctx,
           struct anon_vma_name *anon_name)
 {
+       struct mm_struct *mm = src->vm_mm;
+       struct anon_vma *anon_vma = src->anon_vma;
+       struct file *file = src->vm_file;
        struct vm_area_struct *curr, *next, *res;
        struct vm_area_struct *vma, *adjust, *remove, *remove2;
        struct vm_area_struct *anon_dup = NULL;
@@ -1020,10 +1021,7 @@ static struct vm_area_struct
 
        vma_prepare(&vp);
        vma_adjust_trans_huge(vma, vma_start, vma_end, adj_start);
-
-       vma->vm_start = vma_start;
-       vma->vm_end = vma_end;
-       vma->vm_pgoff = vma_pgoff;
+       vma_set_range(vma, vma_start, vma_end, vma_pgoff);
 
        if (vma_expanded)
                vma_iter_store(vmi, vma);
@@ -2056,7 +2054,6 @@ static int expand_upwards(struct vm_area_struct *vma, unsigned long address)
                }
        }
        anon_vma_unlock_write(vma->anon_vma);
-       khugepaged_enter_vma(vma, vma->vm_flags);
        mas_destroy(&mas);
        validate_mm(mm);
        return error;
@@ -2150,7 +2147,6 @@ int expand_downwards(struct vm_area_struct *vma, unsigned long address)
                }
        }
        anon_vma_unlock_write(vma->anon_vma);
-       khugepaged_enter_vma(vma, vma->vm_flags);
        mas_destroy(&mas);
        validate_mm(mm);
        return error;
@@ -2440,9 +2436,8 @@ struct vm_area_struct *vma_modify(struct vma_iterator *vmi,
        pgoff_t pgoff = vma->vm_pgoff + ((start - vma->vm_start) >> PAGE_SHIFT);
        struct vm_area_struct *merged;
 
-       merged = vma_merge(vmi, vma->vm_mm, prev, start, end, vm_flags,
-                          vma->anon_vma, vma->vm_file, pgoff, policy,
-                          uffd_ctx, anon_name);
+       merged = vma_merge(vmi, prev, vma, start, end, vm_flags,
+                          pgoff, policy, uffd_ctx, anon_name);
        if (merged)
                return merged;
 
@@ -2472,9 +2467,8 @@ static struct vm_area_struct
                   struct vm_area_struct *vma, unsigned long start,
                   unsigned long end, pgoff_t pgoff)
 {
-       return vma_merge(vmi, vma->vm_mm, prev, start, end, vma->vm_flags,
-                        vma->anon_vma, vma->vm_file, pgoff, vma_policy(vma),
-                        vma->vm_userfaultfd_ctx, anon_vma_name(vma));
+       return vma_merge(vmi, prev, vma, start, end, vma->vm_flags, pgoff,
+                        vma_policy(vma), vma->vm_userfaultfd_ctx, anon_vma_name(vma));
 }
 
 /*
@@ -2488,10 +2482,9 @@ struct vm_area_struct *vma_merge_extend(struct vma_iterator *vmi,
        pgoff_t pgoff = vma->vm_pgoff + vma_pages(vma);
 
        /* vma is specified as prev, so case 1 or 2 will apply. */
-       return vma_merge(vmi, vma->vm_mm, vma, vma->vm_end, vma->vm_end + delta,
-                        vma->vm_flags, vma->anon_vma, vma->vm_file, pgoff,
-                        vma_policy(vma), vma->vm_userfaultfd_ctx,
-                        anon_vma_name(vma));
+       return vma_merge(vmi, vma, vma, vma->vm_end, vma->vm_end + delta,
+                        vma->vm_flags, pgoff, vma_policy(vma),
+                        vma->vm_userfaultfd_ctx, anon_vma_name(vma));
 }
 
 /*
@@ -2818,11 +2811,9 @@ cannot_expand:
        }
 
        vma_iter_config(&vmi, addr, end);
-       vma->vm_start = addr;
-       vma->vm_end = end;
+       vma_set_range(vma, addr, end, pgoff);
        vm_flags_init(vma, vm_flags);
        vma->vm_page_prot = vm_get_page_prot(vm_flags);
-       vma->vm_pgoff = pgoff;
 
        if (file) {
                vma->vm_file = get_file(file);
@@ -2899,16 +2890,7 @@ cannot_expand:
        vma_start_write(vma);
        vma_iter_store(&vmi, vma);
        mm->map_count++;
-       if (vma->vm_file) {
-               i_mmap_lock_write(vma->vm_file->f_mapping);
-               if (vma_is_shared_maywrite(vma))
-                       mapping_allow_writable(vma->vm_file->f_mapping);
-
-               flush_dcache_mmap_lock(vma->vm_file->f_mapping);
-               vma_interval_tree_insert(vma, &vma->vm_file->f_mapping->i_mmap);
-               flush_dcache_mmap_unlock(vma->vm_file->f_mapping);
-               i_mmap_unlock_write(vma->vm_file->f_mapping);
-       }
+       vma_link_file(vma);
 
        /*
         * vma_merge() calls khugepaged_enter_vma() either, the below
@@ -3181,9 +3163,7 @@ static int do_brk_flags(struct vma_iterator *vmi, struct vm_area_struct *vma,
                goto unacct_fail;
 
        vma_set_anonymous(vma);
-       vma->vm_start = addr;
-       vma->vm_end = addr + len;
-       vma->vm_pgoff = addr >> PAGE_SHIFT;
+       vma_set_range(vma, addr, addr + len, addr >> PAGE_SHIFT);
        vm_flags_init(vma, flags);
        vma->vm_page_prot = vm_get_page_prot(flags);
        vma_start_write(vma);
@@ -3420,9 +3400,7 @@ struct vm_area_struct *copy_vma(struct vm_area_struct **vmap,
                new_vma = vm_area_dup(vma);
                if (!new_vma)
                        goto out;
-               new_vma->vm_start = addr;
-               new_vma->vm_end = addr + len;
-               new_vma->vm_pgoff = pgoff;
+               vma_set_range(new_vma, addr, addr + len, pgoff);
                if (vma_dup_policy(vma, new_vma))
                        goto out_free_vma;
                if (anon_vma_clone(new_vma, vma))
@@ -3590,9 +3568,7 @@ static struct vm_area_struct *__install_special_mapping(
        if (unlikely(vma == NULL))
                return ERR_PTR(-ENOMEM);
 
-       vma->vm_start = addr;
-       vma->vm_end = addr + len;
-
+       vma_set_range(vma, addr, addr + len, 0);
        vm_flags_init(vma, (vm_flags | mm->def_flags |
                      VM_DONTEXPAND | VM_SOFTDIRTY) & ~VM_LOCKED_MASK);
        vma->vm_page_prot = vm_get_page_prot(vma->vm_flags);
@@ -3876,7 +3852,7 @@ static int init_user_reserve(void)
 
        free_kbytes = K(global_zone_page_state(NR_FREE_PAGES));
 
-       sysctl_user_reserve_kbytes = min(free_kbytes / 32, 1UL << 17);
+       sysctl_user_reserve_kbytes = min(free_kbytes / 32, SZ_128K);
        return 0;
 }
 subsys_initcall(init_user_reserve);
@@ -3897,7 +3873,7 @@ static int init_admin_reserve(void)
 
        free_kbytes = K(global_zone_page_state(NR_FREE_PAGES));
 
-       sysctl_admin_reserve_kbytes = min(free_kbytes / 32, 1UL << 13);
+       sysctl_admin_reserve_kbytes = min(free_kbytes / 32, SZ_8K);
        return 0;
 }
 subsys_initcall(init_admin_reserve);
@@ -3929,12 +3905,12 @@ static int reserve_mem_notifier(struct notifier_block *nb,
        case MEM_ONLINE:
                /* Default max is 128MB. Leave alone if modified by operator. */
                tmp = sysctl_user_reserve_kbytes;
-               if (0 < tmp && tmp < (1UL << 17))
+               if (tmp > 0 && tmp < SZ_128K)
                        init_user_reserve();
 
                /* Default max is 8MB.  Leave alone if modified by operator. */
                tmp = sysctl_admin_reserve_kbytes;
-               if (0 < tmp && tmp < (1UL << 13))
+               if (tmp > 0 && tmp < SZ_8K)
                        init_admin_reserve();
 
                break;
index 604ddf08affed2063923549bc503f0e18c6fd34e..99b3e9408aa0fb8961c980fe7cf18162fde1d427 100644 (file)
@@ -50,12 +50,21 @@ static bool tlb_next_batch(struct mmu_gather *tlb)
 #ifdef CONFIG_SMP
 static void tlb_flush_rmap_batch(struct mmu_gather_batch *batch, struct vm_area_struct *vma)
 {
+       struct encoded_page **pages = batch->encoded_pages;
+
        for (int i = 0; i < batch->nr; i++) {
-               struct encoded_page *enc = batch->encoded_pages[i];
+               struct encoded_page *enc = pages[i];
 
-               if (encoded_page_flags(enc)) {
+               if (encoded_page_flags(enc) & ENCODED_PAGE_BIT_DELAY_RMAP) {
                        struct page *page = encoded_page_ptr(enc);
-                       folio_remove_rmap_pte(page_folio(page), page, vma);
+                       unsigned int nr_pages = 1;
+
+                       if (unlikely(encoded_page_flags(enc) &
+                                    ENCODED_PAGE_BIT_NR_PAGES_NEXT))
+                               nr_pages = encoded_nr_pages(pages[++i]);
+
+                       folio_remove_rmap_ptes(page_folio(page), page, nr_pages,
+                                              vma);
                }
        }
 }
@@ -82,26 +91,62 @@ void tlb_flush_rmaps(struct mmu_gather *tlb, struct vm_area_struct *vma)
 }
 #endif
 
-static void tlb_batch_pages_flush(struct mmu_gather *tlb)
+/*
+ * We might end up freeing a lot of pages. Reschedule on a regular
+ * basis to avoid soft lockups in configurations without full
+ * preemption enabled. The magic number of 512 folios seems to work.
+ */
+#define MAX_NR_FOLIOS_PER_FREE         512
+
+static void __tlb_batch_free_encoded_pages(struct mmu_gather_batch *batch)
 {
-       struct mmu_gather_batch *batch;
+       struct encoded_page **pages = batch->encoded_pages;
+       unsigned int nr, nr_pages;
 
-       for (batch = &tlb->local; batch && batch->nr; batch = batch->next) {
-               struct encoded_page **pages = batch->encoded_pages;
+       while (batch->nr) {
+               if (!page_poisoning_enabled_static() && !want_init_on_free()) {
+                       nr = min(MAX_NR_FOLIOS_PER_FREE, batch->nr);
 
-               do {
                        /*
-                        * limit free batch count when PAGE_SIZE > 4K
+                        * Make sure we cover page + nr_pages, and don't leave
+                        * nr_pages behind when capping the number of entries.
+                        */
+                       if (unlikely(encoded_page_flags(pages[nr - 1]) &
+                                    ENCODED_PAGE_BIT_NR_PAGES_NEXT))
+                               nr++;
+               } else {
+                       /*
+                        * With page poisoning and init_on_free, the time it
+                        * takes to free memory grows proportionally with the
+                        * actual memory size. Therefore, limit based on the
+                        * actual memory size and not the number of involved
+                        * folios.
                         */
-                       unsigned int nr = min(512U, batch->nr);
+                       for (nr = 0, nr_pages = 0;
+                            nr < batch->nr && nr_pages < MAX_NR_FOLIOS_PER_FREE;
+                            nr++) {
+                               if (unlikely(encoded_page_flags(pages[nr]) &
+                                            ENCODED_PAGE_BIT_NR_PAGES_NEXT))
+                                       nr_pages += encoded_nr_pages(pages[++nr]);
+                               else
+                                       nr_pages++;
+                       }
+               }
 
-                       free_pages_and_swap_cache(pages, nr);
-                       pages += nr;
-                       batch->nr -= nr;
+               free_pages_and_swap_cache(pages, nr);
+               pages += nr;
+               batch->nr -= nr;
 
-                       cond_resched();
-               } while (batch->nr);
+               cond_resched();
        }
+}
+
+static void tlb_batch_pages_flush(struct mmu_gather *tlb)
+{
+       struct mmu_gather_batch *batch;
+
+       for (batch = &tlb->local; batch && batch->nr; batch = batch->next)
+               __tlb_batch_free_encoded_pages(batch);
        tlb->active = &tlb->local;
 }
 
@@ -116,14 +161,19 @@ static void tlb_batch_list_free(struct mmu_gather *tlb)
        tlb->local.next = NULL;
 }
 
-bool __tlb_remove_page_size(struct mmu_gather *tlb, struct encoded_page *page, int page_size)
+static bool __tlb_remove_folio_pages_size(struct mmu_gather *tlb,
+               struct page *page, unsigned int nr_pages, bool delay_rmap,
+               int page_size)
 {
+       int flags = delay_rmap ? ENCODED_PAGE_BIT_DELAY_RMAP : 0;
        struct mmu_gather_batch *batch;
 
        VM_BUG_ON(!tlb->end);
 
 #ifdef CONFIG_MMU_GATHER_PAGE_SIZE
        VM_WARN_ON(tlb->page_size != page_size);
+       VM_WARN_ON_ONCE(nr_pages != 1 && page_size != PAGE_SIZE);
+       VM_WARN_ON_ONCE(page_folio(page) != page_folio(page + nr_pages - 1));
 #endif
 
        batch = tlb->active;
@@ -131,17 +181,40 @@ bool __tlb_remove_page_size(struct mmu_gather *tlb, struct encoded_page *page, i
         * Add the page and check if we are full. If so
         * force a flush.
         */
-       batch->encoded_pages[batch->nr++] = page;
-       if (batch->nr == batch->max) {
+       if (likely(nr_pages == 1)) {
+               batch->encoded_pages[batch->nr++] = encode_page(page, flags);
+       } else {
+               flags |= ENCODED_PAGE_BIT_NR_PAGES_NEXT;
+               batch->encoded_pages[batch->nr++] = encode_page(page, flags);
+               batch->encoded_pages[batch->nr++] = encode_nr_pages(nr_pages);
+       }
+       /*
+        * Make sure that we can always add another "page" + "nr_pages",
+        * requiring two entries instead of only a single one.
+        */
+       if (batch->nr >= batch->max - 1) {
                if (!tlb_next_batch(tlb))
                        return true;
                batch = tlb->active;
        }
-       VM_BUG_ON_PAGE(batch->nr > batch->max, encoded_page_ptr(page));
+       VM_BUG_ON_PAGE(batch->nr > batch->max - 1, page);
 
        return false;
 }
 
+bool __tlb_remove_folio_pages(struct mmu_gather *tlb, struct page *page,
+               unsigned int nr_pages, bool delay_rmap)
+{
+       return __tlb_remove_folio_pages_size(tlb, page, nr_pages, delay_rmap,
+                                            PAGE_SIZE);
+}
+
+bool __tlb_remove_page_size(struct mmu_gather *tlb, struct page *page,
+               bool delay_rmap, int page_size)
+{
+       return __tlb_remove_folio_pages_size(tlb, page, 1, delay_rmap, page_size);
+}
+
 #endif /* MMU_GATHER_NO_GATHER */
 
 #ifdef CONFIG_MMU_GATHER_TABLE_FREE
index 81991102f7859e94cacd9670d42891038e92b214..f8a4544b4601db4e4ff8e575934ea9502850cd8a 100644 (file)
@@ -198,13 +198,13 @@ static long change_pte_range(struct mmu_gather *tlb,
                        pte_t newpte;
 
                        if (is_writable_migration_entry(entry)) {
-                               struct page *page = pfn_swap_entry_to_page(entry);
+                               struct folio *folio = pfn_swap_entry_folio(entry);
 
                                /*
                                 * A protection check is difficult so
                                 * just be safe and disable write
                                 */
-                               if (PageAnon(page))
+                               if (folio_test_anon(folio))
                                        entry = make_readable_exclusive_migration_entry(
                                                             swp_offset(entry));
                                else
index b6dc558d31440831e51ff12ec68e2f2f69df1633..5ec8f44e7ce976016cf7916bb8ca3df48d47d713 100644 (file)
@@ -131,8 +131,6 @@ int follow_pfn(struct vm_area_struct *vma, unsigned long address,
 }
 EXPORT_SYMBOL(follow_pfn);
 
-LIST_HEAD(vmap_area_list);
-
 void vfree(const void *addr)
 {
        kfree(addr);
index 91ccd82097c2ba7f9e9eb55437d07c38dfcd25a3..8d6a207c3c59052b93881fb51d4926dbd9d76f37 100644 (file)
@@ -44,6 +44,7 @@
 #include <linux/kthread.h>
 #include <linux/init.h>
 #include <linux/mmu_notifier.h>
+#include <linux/cred.h>
 
 #include <asm/tlb.h>
 #include "internal.h"
@@ -754,6 +755,7 @@ static inline void queue_oom_reaper(struct task_struct *tsk)
  */
 static void mark_oom_victim(struct task_struct *tsk)
 {
+       const struct cred *cred;
        struct mm_struct *mm = tsk->mm;
 
        WARN_ON(oom_killer_disabled);
@@ -773,7 +775,9 @@ static void mark_oom_victim(struct task_struct *tsk)
         */
        __thaw_task(tsk);
        atomic_inc(&oom_victims);
-       trace_mark_victim(tsk->pid);
+       cred = get_task_cred(tsk);
+       trace_mark_victim(tsk, cred->uid.val);
+       put_cred(cred);
 }
 
 /**
index 3f255534986a2fda07e2d35187bb385f64749c5c..3e19b87049db1742dc12c0ce2d246f68e026d012 100644 (file)
@@ -2325,18 +2325,18 @@ void __init page_writeback_init(void)
 }
 
 /**
- * tag_pages_for_writeback - tag pages to be written by write_cache_pages
+ * tag_pages_for_writeback - tag pages to be written by writeback
  * @mapping: address space structure to write
  * @start: starting page index
  * @end: ending page index (inclusive)
  *
  * This function scans the page range from @start to @end (inclusive) and tags
- * all pages that have DIRTY tag set with a special TOWRITE tag. The idea is
- * that write_cache_pages (or whoever calls this function) will then use
- * TOWRITE tag to identify pages eligible for writeback.  This mechanism is
- * used to avoid livelocking of writeback by a process steadily creating new
- * dirty pages in the file (thus it is important for this function to be quick
- * so that it can tag pages faster than a dirtying process can create them).
+ * all pages that have DIRTY tag set with a special TOWRITE tag.  The caller
+ * can then use the TOWRITE tag to identify pages eligible for writeback.
+ * This mechanism is used to avoid livelocking of writeback by a process
+ * steadily creating new dirty pages in the file (thus it is important for this
+ * function to be quick so that it can tag pages faster than a dirtying process
+ * can create them).
  */
 void tag_pages_for_writeback(struct address_space *mapping,
                             pgoff_t start, pgoff_t end)
@@ -2360,183 +2360,242 @@ void tag_pages_for_writeback(struct address_space *mapping,
 }
 EXPORT_SYMBOL(tag_pages_for_writeback);
 
+static bool folio_prepare_writeback(struct address_space *mapping,
+               struct writeback_control *wbc, struct folio *folio)
+{
+       /*
+        * Folio truncated or invalidated. We can freely skip it then,
+        * even for data integrity operations: the folio has disappeared
+        * concurrently, so there could be no real expectation of this
+        * data integrity operation even if there is now a new, dirty
+        * folio at the same pagecache index.
+        */
+       if (unlikely(folio->mapping != mapping))
+               return false;
+
+       /*
+        * Did somebody else write it for us?
+        */
+       if (!folio_test_dirty(folio))
+               return false;
+
+       if (folio_test_writeback(folio)) {
+               if (wbc->sync_mode == WB_SYNC_NONE)
+                       return false;
+               folio_wait_writeback(folio);
+       }
+       BUG_ON(folio_test_writeback(folio));
+
+       if (!folio_clear_dirty_for_io(folio))
+               return false;
+
+       return true;
+}
+
+static xa_mark_t wbc_to_tag(struct writeback_control *wbc)
+{
+       if (wbc->sync_mode == WB_SYNC_ALL || wbc->tagged_writepages)
+               return PAGECACHE_TAG_TOWRITE;
+       return PAGECACHE_TAG_DIRTY;
+}
+
+static pgoff_t wbc_end(struct writeback_control *wbc)
+{
+       if (wbc->range_cyclic)
+               return -1;
+       return wbc->range_end >> PAGE_SHIFT;
+}
+
+static struct folio *writeback_get_folio(struct address_space *mapping,
+               struct writeback_control *wbc)
+{
+       struct folio *folio;
+
+retry:
+       folio = folio_batch_next(&wbc->fbatch);
+       if (!folio) {
+               folio_batch_release(&wbc->fbatch);
+               cond_resched();
+               filemap_get_folios_tag(mapping, &wbc->index, wbc_end(wbc),
+                               wbc_to_tag(wbc), &wbc->fbatch);
+               folio = folio_batch_next(&wbc->fbatch);
+               if (!folio)
+                       return NULL;
+       }
+
+       folio_lock(folio);
+       if (unlikely(!folio_prepare_writeback(mapping, wbc, folio))) {
+               folio_unlock(folio);
+               goto retry;
+       }
+
+       trace_wbc_writepage(wbc, inode_to_bdi(mapping->host));
+       return folio;
+}
+
 /**
- * write_cache_pages - walk the list of dirty pages of the given address space and write all of them.
+ * writeback_iter - iterate folio of a mapping for writeback
  * @mapping: address space structure to write
- * @wbc: subtract the number of written pages from *@wbc->nr_to_write
- * @writepage: function called for each page
- * @data: data passed to writepage function
+ * @wbc: writeback context
+ * @folio: previously iterated folio (%NULL to start)
+ * @error: in-out pointer for writeback errors (see below)
  *
- * If a page is already under I/O, write_cache_pages() skips it, even
- * if it's dirty.  This is desirable behaviour for memory-cleaning writeback,
- * but it is INCORRECT for data-integrity system calls such as fsync().  fsync()
- * and msync() need to guarantee that all the data which was dirty at the time
- * the call was made get new I/O started against them.  If wbc->sync_mode is
- * WB_SYNC_ALL then we were called for data integrity and we must wait for
- * existing IO to complete.
- *
- * To avoid livelocks (when other process dirties new pages), we first tag
- * pages which should be written back with TOWRITE tag and only then start
- * writing them. For data-integrity sync we have to be careful so that we do
- * not miss some pages (e.g., because some other process has cleared TOWRITE
- * tag we set). The rule we follow is that TOWRITE tag can be cleared only
- * by the process clearing the DIRTY tag (and submitting the page for IO).
- *
- * To avoid deadlocks between range_cyclic writeback and callers that hold
- * pages in PageWriteback to aggregate IO until write_cache_pages() returns,
- * we do not loop back to the start of the file. Doing so causes a page
- * lock/page writeback access order inversion - we should only ever lock
- * multiple pages in ascending page->index order, and looping back to the start
- * of the file violates that rule and causes deadlocks.
+ * This function returns the next folio for the writeback operation described by
+ * @wbc on @mapping and  should be called in a while loop in the ->writepages
+ * implementation.
  *
- * Return: %0 on success, negative error code otherwise
+ * To start the writeback operation, %NULL is passed in the @folio argument, and
+ * for every subsequent iteration the folio returned previously should be passed
+ * back in.
+ *
+ * If there was an error in the per-folio writeback inside the writeback_iter()
+ * loop, @error should be set to the error value.
+ *
+ * Once the writeback described in @wbc has finished, this function will return
+ * %NULL and if there was an error in any iteration restore it to @error.
+ *
+ * Note: callers should not manually break out of the loop using break or goto
+ * but must keep calling writeback_iter() until it returns %NULL.
+ *
+ * Return: the folio to write or %NULL if the loop is done.
  */
-int write_cache_pages(struct address_space *mapping,
-                     struct writeback_control *wbc, writepage_t writepage,
-                     void *data)
+struct folio *writeback_iter(struct address_space *mapping,
+               struct writeback_control *wbc, struct folio *folio, int *error)
 {
-       int ret = 0;
-       int done = 0;
-       int error;
-       struct folio_batch fbatch;
-       int nr_folios;
-       pgoff_t index;
-       pgoff_t end;            /* Inclusive */
-       pgoff_t done_index;
-       int range_whole = 0;
-       xa_mark_t tag;
-
-       folio_batch_init(&fbatch);
-       if (wbc->range_cyclic) {
-               index = mapping->writeback_index; /* prev offset */
-               end = -1;
-       } else {
-               index = wbc->range_start >> PAGE_SHIFT;
-               end = wbc->range_end >> PAGE_SHIFT;
-               if (wbc->range_start == 0 && wbc->range_end == LLONG_MAX)
-                       range_whole = 1;
-       }
-       if (wbc->sync_mode == WB_SYNC_ALL || wbc->tagged_writepages) {
-               tag_pages_for_writeback(mapping, index, end);
-               tag = PAGECACHE_TAG_TOWRITE;
-       } else {
-               tag = PAGECACHE_TAG_DIRTY;
-       }
-       done_index = index;
-       while (!done && (index <= end)) {
-               int i;
-
-               nr_folios = filemap_get_folios_tag(mapping, &index, end,
-                               tag, &fbatch);
-
-               if (nr_folios == 0)
-                       break;
+       if (!folio) {
+               folio_batch_init(&wbc->fbatch);
+               wbc->saved_err = *error = 0;
 
-               for (i = 0; i < nr_folios; i++) {
-                       struct folio *folio = fbatch.folios[i];
-                       unsigned long nr;
+               /*
+                * For range cyclic writeback we remember where we stopped so
+                * that we can continue where we stopped.
+                *
+                * For non-cyclic writeback we always start at the beginning of
+                * the passed in range.
+                */
+               if (wbc->range_cyclic)
+                       wbc->index = mapping->writeback_index;
+               else
+                       wbc->index = wbc->range_start >> PAGE_SHIFT;
 
-                       done_index = folio->index;
+               /*
+                * To avoid livelocks when other processes dirty new pages, we
+                * first tag pages which should be written back and only then
+                * start writing them.
+                *
+                * For data-integrity writeback we have to be careful so that we
+                * do not miss some pages (e.g., because some other process has
+                * cleared the TOWRITE tag we set).  The rule we follow is that
+                * TOWRITE tag can be cleared only by the process clearing the
+                * DIRTY tag (and submitting the page for I/O).
+                */
+               if (wbc->sync_mode == WB_SYNC_ALL || wbc->tagged_writepages)
+                       tag_pages_for_writeback(mapping, wbc->index,
+                                       wbc_end(wbc));
+       } else {
+               wbc->nr_to_write -= folio_nr_pages(folio);
 
-                       folio_lock(folio);
+               WARN_ON_ONCE(*error > 0);
 
-                       /*
-                        * Page truncated or invalidated. We can freely skip it
-                        * then, even for data integrity operations: the page
-                        * has disappeared concurrently, so there could be no
-                        * real expectation of this data integrity operation
-                        * even if there is now a new, dirty page at the same
-                        * pagecache address.
-                        */
-                       if (unlikely(folio->mapping != mapping)) {
-continue_unlock:
-                               folio_unlock(folio);
-                               continue;
-                       }
+               /*
+                * For integrity writeback we have to keep going until we have
+                * written all the folios we tagged for writeback above, even if
+                * we run past wbc->nr_to_write or encounter errors.
+                * We stash away the first error we encounter in wbc->saved_err
+                * so that it can be retrieved when we're done.  This is because
+                * the file system may still have state to clear for each folio.
+                *
+                * For background writeback we exit as soon as we run past
+                * wbc->nr_to_write or encounter the first error.
+                */
+               if (wbc->sync_mode == WB_SYNC_ALL) {
+                       if (*error && !wbc->saved_err)
+                               wbc->saved_err = *error;
+               } else {
+                       if (*error || wbc->nr_to_write <= 0)
+                               goto done;
+               }
+       }
 
-                       if (!folio_test_dirty(folio)) {
-                               /* someone wrote it for us */
-                               goto continue_unlock;
-                       }
+       folio = writeback_get_folio(mapping, wbc);
+       if (!folio) {
+               /*
+                * To avoid deadlocks between range_cyclic writeback and callers
+                * that hold pages in PageWriteback to aggregate I/O until
+                * the writeback iteration finishes, we do not loop back to the
+                * start of the file.  Doing so causes a page lock/page
+                * writeback access order inversion - we should only ever lock
+                * multiple pages in ascending page->index order, and looping
+                * back to the start of the file violates that rule and causes
+                * deadlocks.
+                */
+               if (wbc->range_cyclic)
+                       mapping->writeback_index = 0;
 
-                       if (folio_test_writeback(folio)) {
-                               if (wbc->sync_mode != WB_SYNC_NONE)
-                                       folio_wait_writeback(folio);
-                               else
-                                       goto continue_unlock;
-                       }
+               /*
+                * Return the first error we encountered (if there was any) to
+                * the caller.
+                */
+               *error = wbc->saved_err;
+       }
+       return folio;
 
-                       BUG_ON(folio_test_writeback(folio));
-                       if (!folio_clear_dirty_for_io(folio))
-                               goto continue_unlock;
+done:
+       if (wbc->range_cyclic)
+               mapping->writeback_index = folio->index + folio_nr_pages(folio);
+       folio_batch_release(&wbc->fbatch);
+       return NULL;
+}
 
-                       trace_wbc_writepage(wbc, inode_to_bdi(mapping->host));
-                       error = writepage(folio, wbc, data);
-                       nr = folio_nr_pages(folio);
-                       if (unlikely(error)) {
-                               /*
-                                * Handle errors according to the type of
-                                * writeback. There's no need to continue for
-                                * background writeback. Just push done_index
-                                * past this page so media errors won't choke
-                                * writeout for the entire file. For integrity
-                                * writeback, we must process the entire dirty
-                                * set regardless of errors because the fs may
-                                * still have state to clear for each page. In
-                                * that case we continue processing and return
-                                * the first error.
-                                */
-                               if (error == AOP_WRITEPAGE_ACTIVATE) {
-                                       folio_unlock(folio);
-                                       error = 0;
-                               } else if (wbc->sync_mode != WB_SYNC_ALL) {
-                                       ret = error;
-                                       done_index = folio->index + nr;
-                                       done = 1;
-                                       break;
-                               }
-                               if (!ret)
-                                       ret = error;
-                       }
+/**
+ * write_cache_pages - walk the list of dirty pages of the given address space and write all of them.
+ * @mapping: address space structure to write
+ * @wbc: subtract the number of written pages from *@wbc->nr_to_write
+ * @writepage: function called for each page
+ * @data: data passed to writepage function
+ *
+ * Return: %0 on success, negative error code otherwise
+ *
+ * Note: please use writeback_iter() instead.
+ */
+int write_cache_pages(struct address_space *mapping,
+                     struct writeback_control *wbc, writepage_t writepage,
+                     void *data)
+{
+       struct folio *folio = NULL;
+       int error;
 
-                       /*
-                        * We stop writing back only if we are not doing
-                        * integrity sync. In case of integrity sync we have to
-                        * keep going until we have written all the pages
-                        * we tagged for writeback prior to entering this loop.
-                        */
-                       wbc->nr_to_write -= nr;
-                       if (wbc->nr_to_write <= 0 &&
-                           wbc->sync_mode == WB_SYNC_NONE) {
-                               done = 1;
-                               break;
-                       }
+       while ((folio = writeback_iter(mapping, wbc, folio, &error))) {
+               error = writepage(folio, wbc, data);
+               if (error == AOP_WRITEPAGE_ACTIVATE) {
+                       folio_unlock(folio);
+                       error = 0;
                }
-               folio_batch_release(&fbatch);
-               cond_resched();
        }
 
-       /*
-        * If we hit the last page and there is more work to be done: wrap
-        * back the index back to the start of the file for the next
-        * time we are called.
-        */
-       if (wbc->range_cyclic && !done)
-               done_index = 0;
-       if (wbc->range_cyclic || (range_whole && wbc->nr_to_write > 0))
-               mapping->writeback_index = done_index;
-
-       return ret;
+       return error;
 }
 EXPORT_SYMBOL(write_cache_pages);
 
-static int writepage_cb(struct folio *folio, struct writeback_control *wbc,
-               void *data)
+static int writeback_use_writepage(struct address_space *mapping,
+               struct writeback_control *wbc)
 {
-       struct address_space *mapping = data;
-       int ret = mapping->a_ops->writepage(&folio->page, wbc);
-       mapping_set_error(mapping, ret);
-       return ret;
+       struct folio *folio = NULL;
+       struct blk_plug plug;
+       int err;
+
+       blk_start_plug(&plug);
+       while ((folio = writeback_iter(mapping, wbc, folio, &err))) {
+               err = mapping->a_ops->writepage(&folio->page, wbc);
+               if (err == AOP_WRITEPAGE_ACTIVATE) {
+                       folio_unlock(folio);
+                       err = 0;
+               }
+               mapping_set_error(mapping, err);
+       }
+       blk_finish_plug(&plug);
+
+       return err;
 }
 
 int do_writepages(struct address_space *mapping, struct writeback_control *wbc)
@@ -2552,12 +2611,7 @@ int do_writepages(struct address_space *mapping, struct writeback_control *wbc)
                if (mapping->a_ops->writepages) {
                        ret = mapping->a_ops->writepages(mapping, wbc);
                } else if (mapping->a_ops->writepage) {
-                       struct blk_plug plug;
-
-                       blk_start_plug(&plug);
-                       ret = write_cache_pages(mapping, wbc, writepage_cb,
-                                               mapping);
-                       blk_finish_plug(&plug);
+                       ret = writeback_use_writepage(mapping, wbc);
                } else {
                        /* deal with chardevs and other special files */
                        ret = 0;
index 62fc2e8f2733eac2e32dfa0bfc1436ea4f919d93..14d39f34d3367fbb9683683be7e6ed150f97fae2 100644 (file)
@@ -32,6 +32,7 @@
 #include <linux/sysctl.h>
 #include <linux/cpu.h>
 #include <linux/cpuset.h>
+#include <linux/pagevec.h>
 #include <linux/memory_hotplug.h>
 #include <linux/nodemask.h>
 #include <linux/vmstat.h>
@@ -464,19 +465,19 @@ static int page_outside_zone_boundaries(struct zone *zone, struct page *page)
 /*
  * Temporary debugging check for pages not lying within a given zone.
  */
-static int __maybe_unused bad_range(struct zone *zone, struct page *page)
+static bool __maybe_unused bad_range(struct zone *zone, struct page *page)
 {
        if (page_outside_zone_boundaries(zone, page))
-               return 1;
+               return true;
        if (zone != page_zone(page))
-               return 1;
+               return true;
 
-       return 0;
+       return false;
 }
 #else
-static inline int __maybe_unused bad_range(struct zone *zone, struct page *page)
+static inline bool __maybe_unused bad_range(struct zone *zone, struct page *page)
 {
-       return 0;
+       return false;
 }
 #endif
 
@@ -1061,7 +1062,7 @@ out:
  * on-demand allocation and then freed again before the deferred pages
  * initialization is done, but this is not likely to happen.
  */
-static inline bool should_skip_kasan_poison(struct page *page, fpi_t fpi_flags)
+static inline bool should_skip_kasan_poison(struct page *page)
 {
        if (IS_ENABLED(CONFIG_KASAN_GENERIC))
                return deferred_pages_enabled();
@@ -1080,11 +1081,11 @@ static void kernel_init_pages(struct page *page, int numpages)
        kasan_enable_current();
 }
 
-static __always_inline bool free_pages_prepare(struct page *page,
-                       unsigned int order, fpi_t fpi_flags)
+__always_inline bool free_pages_prepare(struct page *page,
+                       unsigned int order)
 {
        int bad = 0;
-       bool skip_kasan_poison = should_skip_kasan_poison(page, fpi_flags);
+       bool skip_kasan_poison = should_skip_kasan_poison(page);
        bool init = want_init_on_free();
        bool compound = PageCompound(page);
 
@@ -1266,7 +1267,7 @@ static void __free_pages_ok(struct page *page, unsigned int order,
        unsigned long pfn = page_to_pfn(page);
        struct zone *zone = page_zone(page);
 
-       if (!free_pages_prepare(page, order, fpi_flags))
+       if (!free_pages_prepare(page, order))
                return;
 
        /*
@@ -1422,14 +1423,14 @@ static void check_new_page_bad(struct page *page)
 /*
  * This page is about to be returned from the page allocator
  */
-static int check_new_page(struct page *page)
+static bool check_new_page(struct page *page)
 {
        if (likely(page_expected_state(page,
                                PAGE_FLAGS_CHECK_AT_PREP|__PG_HWPOISON)))
-               return 0;
+               return false;
 
        check_new_page_bad(page);
-       return 1;
+       return true;
 }
 
 static inline bool check_new_pages(struct page *page, unsigned int order)
@@ -2343,7 +2344,7 @@ static bool free_unref_page_prepare(struct page *page, unsigned long pfn,
 {
        int migratetype;
 
-       if (!free_pages_prepare(page, order, FPI_NONE))
+       if (!free_pages_prepare(page, order))
                return false;
 
        migratetype = get_pfnblock_migratetype(page, pfn);
@@ -2515,66 +2516,70 @@ void free_unref_page(struct page *page, unsigned int order)
 }
 
 /*
- * Free a list of 0-order pages
+ * Free a batch of folios
  */
-void free_unref_page_list(struct list_head *list)
+void free_unref_folios(struct folio_batch *folios)
 {
        unsigned long __maybe_unused UP_flags;
-       struct page *page, *next;
        struct per_cpu_pages *pcp = NULL;
        struct zone *locked_zone = NULL;
-       int batch_count = 0;
-       int migratetype;
+       int i, j, migratetype;
+
+       /* Prepare folios for freeing */
+       for (i = 0, j = 0; i < folios->nr; i++) {
+               struct folio *folio = folios->folios[i];
+               unsigned long pfn = folio_pfn(folio);
+               unsigned int order = folio_order(folio);
 
-       /* Prepare pages for freeing */
-       list_for_each_entry_safe(page, next, list, lru) {
-               unsigned long pfn = page_to_pfn(page);
-               if (!free_unref_page_prepare(page, pfn, 0)) {
-                       list_del(&page->lru);
+               if (order > 0 && folio_test_large_rmappable(folio))
+                       folio_undo_large_rmappable(folio);
+               if (!free_unref_page_prepare(&folio->page, pfn, order))
                        continue;
-               }
 
                /*
-                * Free isolated pages directly to the allocator, see
-                * comment in free_unref_page.
+                * Free isolated folios and orders not handled on the PCP
+                * directly to the allocator, see comment in free_unref_page.
                 */
-               migratetype = get_pcppage_migratetype(page);
-               if (unlikely(is_migrate_isolate(migratetype))) {
-                       list_del(&page->lru);
-                       free_one_page(page_zone(page), page, pfn, 0, migratetype, FPI_NONE);
+               migratetype = get_pcppage_migratetype(&folio->page);
+               if (!pcp_allowed_order(order) ||
+                   is_migrate_isolate(migratetype)) {
+                       free_one_page(folio_zone(folio), &folio->page, pfn,
+                                       order, migratetype, FPI_NONE);
                        continue;
                }
+               folio->private = (void *)(unsigned long)order;
+               if (j != i)
+                       folios->folios[j] = folio;
+               j++;
        }
+       folios->nr = j;
 
-       list_for_each_entry_safe(page, next, list, lru) {
-               struct zone *zone = page_zone(page);
+       for (i = 0; i < folios->nr; i++) {
+               struct folio *folio = folios->folios[i];
+               struct zone *zone = folio_zone(folio);
+               unsigned int order = (unsigned long)folio->private;
 
-               list_del(&page->lru);
-               migratetype = get_pcppage_migratetype(page);
+               folio->private = NULL;
+               migratetype = get_pcppage_migratetype(&folio->page);
 
-               /*
-                * Either different zone requiring a different pcp lock or
-                * excessive lock hold times when freeing a large list of
-                * pages.
-                */
-               if (zone != locked_zone || batch_count == SWAP_CLUSTER_MAX) {
+               /* Different zone requires a different pcp lock */
+               if (zone != locked_zone) {
                        if (pcp) {
                                pcp_spin_unlock(pcp);
                                pcp_trylock_finish(UP_flags);
                        }
 
-                       batch_count = 0;
-
                        /*
-                        * trylock is necessary as pages may be getting freed
+                        * trylock is necessary as folios may be getting freed
                         * from IRQ or SoftIRQ context after an IO completion.
                         */
                        pcp_trylock_prepare(UP_flags);
                        pcp = pcp_spin_trylock(zone->per_cpu_pageset);
                        if (unlikely(!pcp)) {
                                pcp_trylock_finish(UP_flags);
-                               free_one_page(zone, page, page_to_pfn(page),
-                                             0, migratetype, FPI_NONE);
+                               free_one_page(zone, &folio->page,
+                                               folio_pfn(folio), order,
+                                               migratetype, FPI_NONE);
                                locked_zone = NULL;
                                continue;
                        }
@@ -2588,15 +2593,16 @@ void free_unref_page_list(struct list_head *list)
                if (unlikely(migratetype >= MIGRATE_PCPTYPES))
                        migratetype = MIGRATE_MOVABLE;
 
-               trace_mm_page_free_batched(page);
-               free_unref_page_commit(zone, pcp, page, migratetype, 0);
-               batch_count++;
+               trace_mm_page_free_batched(&folio->page);
+               free_unref_page_commit(zone, pcp, &folio->page, migratetype,
+                               order);
        }
 
        if (pcp) {
                pcp_spin_unlock(pcp);
                pcp_trylock_finish(UP_flags);
        }
+       folio_batch_reinit(folios);
 }
 
 /*
@@ -2616,8 +2622,8 @@ void split_page(struct page *page, unsigned int order)
 
        for (i = 1; i < (1 << order); i++)
                set_page_refcounted(page + i);
-       split_page_owner(page, 1 << order);
-       split_page_memcg(page, 1 << order);
+       split_page_owner(page, order, 0);
+       split_page_memcg(page, order, 0);
 }
 EXPORT_SYMBOL_GPL(split_page);
 
@@ -4813,8 +4819,8 @@ static void *make_alloc_exact(unsigned long addr, unsigned int order,
                struct page *page = virt_to_page((void *)addr);
                struct page *last = page + nr;
 
-               split_page_owner(page, 1 << order);
-               split_page_memcg(page, 1 << order);
+               split_page_owner(page, order, 0);
+               split_page_memcg(page, order, 0);
                while (page < --last)
                        set_page_refcounted(last);
 
@@ -5584,37 +5590,34 @@ static void zone_pcp_update(struct zone *zone, int cpu_online)
        mutex_unlock(&pcp_batch_high_lock);
 }
 
-static void zone_pcp_update_cacheinfo(struct zone *zone)
+static void zone_pcp_update_cacheinfo(struct zone *zone, unsigned int cpu)
 {
-       int cpu;
        struct per_cpu_pages *pcp;
        struct cpu_cacheinfo *cci;
 
-       for_each_online_cpu(cpu) {
-               pcp = per_cpu_ptr(zone->per_cpu_pageset, cpu);
-               cci = get_cpu_cacheinfo(cpu);
-               /*
-                * If data cache slice of CPU is large enough, "pcp->batch"
-                * pages can be preserved in PCP before draining PCP for
-                * consecutive high-order pages freeing without allocation.
-                * This can reduce zone lock contention without hurting
-                * cache-hot pages sharing.
-                */
-               spin_lock(&pcp->lock);
-               if ((cci->per_cpu_data_slice_size >> PAGE_SHIFT) > 3 * pcp->batch)
-                       pcp->flags |= PCPF_FREE_HIGH_BATCH;
-               else
-                       pcp->flags &= ~PCPF_FREE_HIGH_BATCH;
-               spin_unlock(&pcp->lock);
-       }
+       pcp = per_cpu_ptr(zone->per_cpu_pageset, cpu);
+       cci = get_cpu_cacheinfo(cpu);
+       /*
+        * If data cache slice of CPU is large enough, "pcp->batch"
+        * pages can be preserved in PCP before draining PCP for
+        * consecutive high-order pages freeing without allocation.
+        * This can reduce zone lock contention without hurting
+        * cache-hot pages sharing.
+        */
+       spin_lock(&pcp->lock);
+       if ((cci->per_cpu_data_slice_size >> PAGE_SHIFT) > 3 * pcp->batch)
+               pcp->flags |= PCPF_FREE_HIGH_BATCH;
+       else
+               pcp->flags &= ~PCPF_FREE_HIGH_BATCH;
+       spin_unlock(&pcp->lock);
 }
 
-void setup_pcp_cacheinfo(void)
+void setup_pcp_cacheinfo(unsigned int cpu)
 {
        struct zone *zone;
 
        for_each_populated_zone(zone)
-               zone_pcp_update_cacheinfo(zone);
+               zone_pcp_update_cacheinfo(zone, cpu);
 }
 
 /*
@@ -5857,7 +5860,7 @@ static void __setup_per_zone_wmarks(void)
 
                spin_lock_irqsave(&zone->lock, flags);
                tmp = (u64)pages_min * zone_managed_pages(zone);
-               do_div(tmp, lowmem_pages);
+               tmp = div64_ul(tmp, lowmem_pages);
                if (is_highmem(zone) || zone_idx(zone) == ZONE_MOVABLE) {
                        /*
                         * __GFP_HIGH and PF_MEMALLOC allocations usually don't
@@ -6231,9 +6234,14 @@ static void alloc_contig_dump_pages(struct list_head *page_list)
        }
 }
 
-/* [start, end) must belong to a single zone. */
+/*
+ * [start, end) must belong to a single zone.
+ * @migratetype: using migratetype to filter the type of migration in
+ *             trace_mm_alloc_contig_migrate_range_info.
+ */
 int __alloc_contig_migrate_range(struct compact_control *cc,
-                                       unsigned long start, unsigned long end)
+                                       unsigned long start, unsigned long end,
+                                       int migratetype)
 {
        /* This function is based on compact_zone() from compaction.c. */
        unsigned int nr_reclaimed;
@@ -6244,6 +6252,10 @@ int __alloc_contig_migrate_range(struct compact_control *cc,
                .nid = zone_to_nid(cc->zone),
                .gfp_mask = GFP_USER | __GFP_MOVABLE | __GFP_RETRY_MAYFAIL,
        };
+       struct page *page;
+       unsigned long total_mapped = 0;
+       unsigned long total_migrated = 0;
+       unsigned long total_reclaimed = 0;
 
        lru_cache_disable();
 
@@ -6269,9 +6281,18 @@ int __alloc_contig_migrate_range(struct compact_control *cc,
                                                        &cc->migratepages);
                cc->nr_migratepages -= nr_reclaimed;
 
+               if (trace_mm_alloc_contig_migrate_range_info_enabled()) {
+                       total_reclaimed += nr_reclaimed;
+                       list_for_each_entry(page, &cc->migratepages, lru)
+                               total_mapped += page_mapcount(page);
+               }
+
                ret = migrate_pages(&cc->migratepages, alloc_migration_target,
                        NULL, (unsigned long)&mtc, cc->mode, MR_CONTIG_RANGE, NULL);
 
+               if (trace_mm_alloc_contig_migrate_range_info_enabled() && !ret)
+                       total_migrated += cc->nr_migratepages;
+
                /*
                 * On -ENOMEM, migrate_pages() bails out right away. It is pointless
                 * to retry again over this error, so do the same here.
@@ -6285,9 +6306,13 @@ int __alloc_contig_migrate_range(struct compact_control *cc,
                if (!(cc->gfp_mask & __GFP_NOWARN) && ret == -EBUSY)
                        alloc_contig_dump_pages(&cc->migratepages);
                putback_movable_pages(&cc->migratepages);
-               return ret;
        }
-       return 0;
+
+       trace_mm_alloc_contig_migrate_range_info(start, end, migratetype,
+                                                total_migrated,
+                                                total_reclaimed,
+                                                total_mapped);
+       return (ret < 0) ? ret : 0;
 }
 
 /**
@@ -6367,7 +6392,7 @@ int alloc_contig_range(unsigned long start, unsigned long end,
         * allocated.  So, if we fall through be sure to clear ret so that
         * -EBUSY is not accidentally used or returned to caller.
         */
-       ret = __alloc_contig_migrate_range(&cc, start, end);
+       ret = __alloc_contig_migrate_range(&cc, start, end, migratetype);
        if (ret && ret != -EBUSY)
                goto done;
        ret = 0;
index cd0ea36682533ae7956a25ba50086c3584ec4bfa..a5c8fa4c2a75c35a68349e0e6a44e8f5b51435d5 100644 (file)
@@ -434,7 +434,7 @@ static int isolate_single_pageblock(unsigned long boundary_pfn, int flags,
                                }
 
                                ret = __alloc_contig_migrate_range(&cc, head_pfn,
-                                                       head_pfn + nr_pages);
+                                                       head_pfn + nr_pages, page_mt);
 
                                /*
                                 * restore the page's migratetype so that it can
index 5634e5d890f881b083e10ff514c4ca50a7287a15..e7139952ffd9dee593fd51fa88a9c69d8d830cd3 100644 (file)
@@ -36,6 +36,15 @@ struct page_owner {
        pid_t free_tgid;
 };
 
+struct stack {
+       struct stack_record *stack_record;
+       struct stack *next;
+};
+static struct stack dummy_stack;
+static struct stack failure_stack;
+static struct stack *stack_list;
+static DEFINE_SPINLOCK(stack_list_lock);
+
 static bool page_owner_enabled __initdata;
 DEFINE_STATIC_KEY_FALSE(page_owner_inited);
 
@@ -95,6 +104,15 @@ static __init void init_page_owner(void)
        register_early_stack();
        static_branch_enable(&page_owner_inited);
        init_early_allocated_pages();
+       /* Initialize dummy and failure stacks and link them to stack_list */
+       dummy_stack.stack_record = __stack_depot_get_stack_record(dummy_handle);
+       failure_stack.stack_record = __stack_depot_get_stack_record(failure_handle);
+       if (dummy_stack.stack_record)
+               refcount_set(&dummy_stack.stack_record->count, 1);
+       if (failure_stack.stack_record)
+               refcount_set(&failure_stack.stack_record->count, 1);
+       dummy_stack.next = &failure_stack;
+       stack_list = &dummy_stack;
 }
 
 struct page_ext_operations page_owner_ops = {
@@ -135,11 +153,74 @@ static noinline depot_stack_handle_t save_stack(gfp_t flags)
        return handle;
 }
 
+static void add_stack_record_to_list(struct stack_record *stack_record,
+                                    gfp_t gfp_mask)
+{
+       unsigned long flags;
+       struct stack *stack;
+
+       /* Filter gfp_mask the same way stackdepot does, for consistency */
+       gfp_mask &= ~GFP_ZONEMASK;
+       gfp_mask &= (GFP_ATOMIC | GFP_KERNEL);
+       gfp_mask |= __GFP_NOWARN;
+
+       stack = kmalloc(sizeof(*stack), gfp_mask);
+       if (!stack)
+               return;
+
+       stack->stack_record = stack_record;
+       stack->next = NULL;
+
+       spin_lock_irqsave(&stack_list_lock, flags);
+       stack->next = stack_list;
+       /*
+        * This pairs with smp_load_acquire() from function
+        * stack_start(). This guarantees that stack_start()
+        * will see an updated stack_list before starting to
+        * traverse the list.
+        */
+       smp_store_release(&stack_list, stack);
+       spin_unlock_irqrestore(&stack_list_lock, flags);
+}
+
+static void inc_stack_record_count(depot_stack_handle_t handle, gfp_t gfp_mask)
+{
+       struct stack_record *stack_record = __stack_depot_get_stack_record(handle);
+
+       if (!stack_record)
+               return;
+
+       /*
+        * New stack_record's that do not use STACK_DEPOT_FLAG_GET start
+        * with REFCOUNT_SATURATED to catch spurious increments of their
+        * refcount.
+        * Since we do not use STACK_DEPOT_FLAG_GET API, let us
+        * set a refcount of 1 ourselves.
+        */
+       if (refcount_read(&stack_record->count) == REFCOUNT_SATURATED) {
+               int old = REFCOUNT_SATURATED;
+
+               if (atomic_try_cmpxchg_relaxed(&stack_record->count.refs, &old, 1))
+                       /* Add the new stack_record to our list */
+                       add_stack_record_to_list(stack_record, gfp_mask);
+       }
+       refcount_inc(&stack_record->count);
+}
+
+static void dec_stack_record_count(depot_stack_handle_t handle)
+{
+       struct stack_record *stack_record = __stack_depot_get_stack_record(handle);
+
+       if (stack_record)
+               refcount_dec(&stack_record->count);
+}
+
 void __reset_page_owner(struct page *page, unsigned short order)
 {
        int i;
        struct page_ext *page_ext;
        depot_stack_handle_t handle;
+       depot_stack_handle_t alloc_handle;
        struct page_owner *page_owner;
        u64 free_ts_nsec = local_clock();
 
@@ -147,17 +228,29 @@ void __reset_page_owner(struct page *page, unsigned short order)
        if (unlikely(!page_ext))
                return;
 
+       page_owner = get_page_owner(page_ext);
+       alloc_handle = page_owner->handle;
+
        handle = save_stack(GFP_NOWAIT | __GFP_NOWARN);
        for (i = 0; i < (1 << order); i++) {
                __clear_bit(PAGE_EXT_OWNER_ALLOCATED, &page_ext->flags);
-               page_owner = get_page_owner(page_ext);
                page_owner->free_handle = handle;
                page_owner->free_ts_nsec = free_ts_nsec;
                page_owner->free_pid = current->pid;
                page_owner->free_tgid = current->tgid;
                page_ext = page_ext_next(page_ext);
+               page_owner = get_page_owner(page_ext);
        }
        page_ext_put(page_ext);
+       if (alloc_handle != early_handle)
+               /*
+                * early_handle is being set as a handle for all those
+                * early allocated pages. See init_pages_in_zone().
+                * Since their refcount is not being incremented because
+                * the machinery is not ready yet, we cannot decrement
+                * their refcount either.
+                */
+               dec_stack_record_count(alloc_handle);
 }
 
 static inline void __set_page_owner_handle(struct page_ext *page_ext,
@@ -199,6 +292,7 @@ noinline void __set_page_owner(struct page *page, unsigned short order,
                return;
        __set_page_owner_handle(page_ext, handle, order, gfp_mask);
        page_ext_put(page_ext);
+       inc_stack_record_count(handle, gfp_mask);
 }
 
 void __set_page_owner_migrate_reason(struct page *page, int reason)
@@ -214,7 +308,7 @@ void __set_page_owner_migrate_reason(struct page *page, int reason)
        page_ext_put(page_ext);
 }
 
-void __split_page_owner(struct page *page, unsigned int nr)
+void __split_page_owner(struct page *page, int old_order, int new_order)
 {
        int i;
        struct page_ext *page_ext = page_ext_get(page);
@@ -223,9 +317,9 @@ void __split_page_owner(struct page *page, unsigned int nr)
        if (unlikely(!page_ext))
                return;
 
-       for (i = 0; i < nr; i++) {
+       for (i = 0; i < (1 << old_order); i++) {
                page_owner = get_page_owner(page_ext);
-               page_owner->order = 0;
+               page_owner->order = new_order;
                page_ext = page_ext_next(page_ext);
        }
        page_ext_put(page_ext);
@@ -719,8 +813,111 @@ static const struct file_operations proc_page_owner_operations = {
        .llseek         = lseek_page_owner,
 };
 
+static void *stack_start(struct seq_file *m, loff_t *ppos)
+{
+       struct stack *stack;
+
+       if (*ppos == -1UL)
+               return NULL;
+
+       if (!*ppos) {
+               /*
+                * This pairs with smp_store_release() from function
+                * add_stack_record_to_list(), so we get a consistent
+                * value of stack_list.
+                */
+               stack = smp_load_acquire(&stack_list);
+       } else {
+               stack = m->private;
+               stack = stack->next;
+       }
+
+       m->private = stack;
+
+       return stack;
+}
+
+static void *stack_next(struct seq_file *m, void *v, loff_t *ppos)
+{
+       struct stack *stack = v;
+
+       stack = stack->next;
+       *ppos = stack ? *ppos + 1 : -1UL;
+       m->private = stack;
+
+       return stack;
+}
+
+static unsigned long page_owner_stack_threshold;
+
+static int stack_print(struct seq_file *m, void *v)
+{
+       int i, stack_count;
+       struct stack *stack = v;
+       unsigned long *entries;
+       unsigned long nr_entries;
+       struct stack_record *stack_record = stack->stack_record;
+
+       if (!stack->stack_record)
+               return 0;
+
+       nr_entries = stack_record->size;
+       entries = stack_record->entries;
+       stack_count = refcount_read(&stack_record->count) - 1;
+
+       if (stack_count < 1 || stack_count < page_owner_stack_threshold)
+               return 0;
+
+       for (i = 0; i < nr_entries; i++)
+               seq_printf(m, " %pS\n", (void *)entries[i]);
+       seq_printf(m, "stack_count: %d\n\n", stack_count);
+
+       return 0;
+}
+
+static void stack_stop(struct seq_file *m, void *v)
+{
+}
+
+static const struct seq_operations page_owner_stack_op = {
+       .start  = stack_start,
+       .next   = stack_next,
+       .stop   = stack_stop,
+       .show   = stack_print
+};
+
+static int page_owner_stack_open(struct inode *inode, struct file *file)
+{
+       return seq_open_private(file, &page_owner_stack_op, 0);
+}
+
+static const struct file_operations page_owner_stack_operations = {
+       .open           = page_owner_stack_open,
+       .read           = seq_read,
+       .llseek         = seq_lseek,
+       .release        = seq_release,
+};
+
+static int page_owner_threshold_get(void *data, u64 *val)
+{
+       *val = READ_ONCE(page_owner_stack_threshold);
+       return 0;
+}
+
+static int page_owner_threshold_set(void *data, u64 val)
+{
+       WRITE_ONCE(page_owner_stack_threshold, val);
+       return 0;
+}
+
+DEFINE_SIMPLE_ATTRIBUTE(proc_page_owner_threshold, &page_owner_threshold_get,
+                       &page_owner_threshold_set, "%llu");
+
+
 static int __init pageowner_init(void)
 {
+       struct dentry *dir;
+
        if (!static_branch_unlikely(&page_owner_inited)) {
                pr_info("page_owner is disabled\n");
                return 0;
@@ -728,6 +925,11 @@ static int __init pageowner_init(void)
 
        debugfs_create_file("page_owner", 0400, NULL, NULL,
                            &proc_page_owner_operations);
+       dir = debugfs_create_dir("page_owner_stacks", NULL);
+       debugfs_create_file("show_stacks", 0400, dir, NULL,
+                           &page_owner_stack_operations);
+       debugfs_create_file("count_threshold", 0600, dir, NULL,
+                           &proc_page_owner_threshold);
 
        return 0;
 }
index 03c1bdae4a4368ecb0f65f3e9dc9f3087e619a81..106e1d66e9f9ee0612c9005f9fdb1faa3053610f 100644 (file)
@@ -1,6 +1,7 @@
 // SPDX-License-Identifier: GPL-2.0
 
 #include <linux/pagewalk.h>
+#include <linux/debugfs.h>
 #include <linux/ptdump.h>
 #include <linux/kasan.h>
 
@@ -163,3 +164,24 @@ void ptdump_walk_pgd(struct ptdump_state *st, struct mm_struct *mm, pgd_t *pgd)
        /* Flush out the last page */
        st->note_page(st, 0, -1, 0);
 }
+
+static int check_wx_show(struct seq_file *m, void *v)
+{
+       if (ptdump_check_wx())
+               seq_puts(m, "SUCCESS\n");
+       else
+               seq_puts(m, "FAILED\n");
+
+       return 0;
+}
+
+DEFINE_SHOW_ATTRIBUTE(check_wx);
+
+static int ptdump_debugfs_init(void)
+{
+       debugfs_create_file("check_wx_pages", 0400, NULL, NULL, &check_wx_fops);
+
+       return 0;
+}
+
+device_initcall(ptdump_debugfs_init);
index 2648ec4f04947b2e837377da68d7b8ae1fd48f7a..130c0e7df99f585a5cd8343ad26aa6af7f50cf51 100644 (file)
@@ -500,10 +500,8 @@ void page_cache_ra_order(struct readahead_control *ractl,
 
        if (new_order < MAX_PAGECACHE_ORDER) {
                new_order += 2;
-               if (new_order > MAX_PAGECACHE_ORDER)
-                       new_order = MAX_PAGECACHE_ORDER;
-               while ((1 << new_order) > ra->size)
-                       new_order--;
+               new_order = min_t(unsigned int, MAX_PAGECACHE_ORDER, new_order);
+               new_order = min_t(unsigned int, new_order, ilog2(ra->size));
        }
 
        filemap_invalidate_lock_shared(mapping);
@@ -516,9 +514,6 @@ void page_cache_ra_order(struct readahead_control *ractl,
                /* Don't allocate pages past EOF */
                while (index + (1UL << order) - 1 > limit)
                        order--;
-               /* THP machinery does not support order-1 */
-               if (order == 1)
-                       order = 0;
                err = ra_alloc_folio(ractl, index, mark, order, gfp);
                if (err)
                        break;
index f5d43edad529a76858a9aab5536a755f0a50ec67..3746a553101832164b8171c188fd9f9d6af1b6af 100644 (file)
--- a/mm/rmap.c
+++ b/mm/rmap.c
@@ -1780,7 +1780,7 @@ static bool try_to_unmap_one(struct folio *folio, struct vm_area_struct *vma,
                                set_huge_pte_at(mm, address, pvmw.pte, pteval,
                                                hsz);
                        } else {
-                               dec_mm_counter(mm, mm_counter(&folio->page));
+                               dec_mm_counter(mm, mm_counter(folio));
                                set_pte_at(mm, address, pvmw.pte, pteval);
                        }
 
@@ -1795,7 +1795,7 @@ static bool try_to_unmap_one(struct folio *folio, struct vm_area_struct *vma,
                         * migration) will not expect userfaults on already
                         * copied pages.
                         */
-                       dec_mm_counter(mm, mm_counter(&folio->page));
+                       dec_mm_counter(mm, mm_counter(folio));
                } else if (folio_test_anon(folio)) {
                        swp_entry_t entry = page_swap_entry(subpage);
                        pte_t swp_pte;
@@ -1903,7 +1903,7 @@ static bool try_to_unmap_one(struct folio *folio, struct vm_area_struct *vma,
                         *
                         * See Documentation/mm/mmu_notifier.rst
                         */
-                       dec_mm_counter(mm, mm_counter_file(&folio->page));
+                       dec_mm_counter(mm, mm_counter_file(folio));
                }
 discard:
                if (unlikely(folio_test_hugetlb(folio)))
@@ -2169,7 +2169,7 @@ static bool try_to_migrate_one(struct folio *folio, struct vm_area_struct *vma,
                                swp_pte = pte_swp_mkuffd_wp(swp_pte);
                        set_pte_at(mm, pvmw.address, pvmw.pte, swp_pte);
                        trace_set_migration_pte(pvmw.address, pte_val(swp_pte),
-                                               compound_order(&folio->page));
+                                               folio_order(folio));
                        /*
                         * No need to invalidate here it will synchronize on
                         * against the special swap migration pte.
@@ -2181,7 +2181,7 @@ static bool try_to_migrate_one(struct folio *folio, struct vm_area_struct *vma,
                                set_huge_pte_at(mm, address, pvmw.pte, pteval,
                                                hsz);
                        } else {
-                               dec_mm_counter(mm, mm_counter(&folio->page));
+                               dec_mm_counter(mm, mm_counter(folio));
                                set_pte_at(mm, address, pvmw.pte, pteval);
                        }
 
@@ -2196,7 +2196,7 @@ static bool try_to_migrate_one(struct folio *folio, struct vm_area_struct *vma,
                         * migration) will not expect userfaults on already
                         * copied pages.
                         */
-                       dec_mm_counter(mm, mm_counter(&folio->page));
+                       dec_mm_counter(mm, mm_counter(folio));
                } else {
                        swp_entry_t entry;
                        pte_t swp_pte;
@@ -2261,7 +2261,7 @@ static bool try_to_migrate_one(struct folio *folio, struct vm_area_struct *vma,
                        else
                                set_pte_at(mm, address, pvmw.pte, swp_pte);
                        trace_set_migration_pte(address, pte_val(swp_pte),
-                                               compound_order(&folio->page));
+                                               folio_order(folio));
                        /*
                         * No need to invalidate here it will synchronize on
                         * against the special swap migration pte.
index a7603db21bcadbc77637ec9193cb50c85b1b290b..0aad0d9a621b80e7a3f758125806bfb64e984c12 100644 (file)
@@ -4298,6 +4298,24 @@ static int shmem_show_options(struct seq_file *seq, struct dentry *root)
        mpol_put(mpol);
        if (sbinfo->noswap)
                seq_printf(seq, ",noswap");
+#ifdef CONFIG_TMPFS_QUOTA
+       if (sb_has_quota_active(root->d_sb, USRQUOTA))
+               seq_printf(seq, ",usrquota");
+       if (sb_has_quota_active(root->d_sb, GRPQUOTA))
+               seq_printf(seq, ",grpquota");
+       if (sbinfo->qlimits.usrquota_bhardlimit)
+               seq_printf(seq, ",usrquota_block_hardlimit=%lld",
+                          sbinfo->qlimits.usrquota_bhardlimit);
+       if (sbinfo->qlimits.grpquota_bhardlimit)
+               seq_printf(seq, ",grpquota_block_hardlimit=%lld",
+                          sbinfo->qlimits.grpquota_bhardlimit);
+       if (sbinfo->qlimits.usrquota_ihardlimit)
+               seq_printf(seq, ",usrquota_inode_hardlimit=%lld",
+                          sbinfo->qlimits.usrquota_ihardlimit);
+       if (sbinfo->qlimits.grpquota_ihardlimit)
+               seq_printf(seq, ",grpquota_inode_hardlimit=%lld",
+                          sbinfo->qlimits.grpquota_ihardlimit);
+#endif
        return 0;
 }
 
index 23af762148cacf386bc051189ab1a758518921aa..f5234672f03ceab3b4c017f3b1acfc53cadb2393 100644 (file)
@@ -655,7 +655,7 @@ static struct kmem_cache *__init create_kmalloc_cache(const char *name,
 
 struct kmem_cache *
 kmalloc_caches[NR_KMALLOC_TYPES][KMALLOC_SHIFT_HIGH + 1] __ro_after_init =
-{ /* initialization for https://bugs.llvm.org/show_bug.cgi?id=42570 */ };
+{ /* initialization for https://llvm.org/pr42570 */ };
 EXPORT_SYMBOL(kmalloc_caches);
 
 #ifdef CONFIG_RANDOM_KMALLOC_CACHES
index 338cf946dee8de3a9cc1ea4f335805138b76bfbe..aed0951b87fa04d8efabb85cff28cca5aaa9976a 100644 (file)
@@ -908,7 +908,8 @@ int __meminit sparse_add_section(int nid, unsigned long start_pfn,
         * Poison uninitialized struct pages in order to catch invalid flags
         * combinations.
         */
-       page_init_poison(memmap, sizeof(struct page) * nr_pages);
+       if (!altmap || !altmap->inaccessible)
+               page_init_poison(memmap, sizeof(struct page) * nr_pages);
 
        ms = __nr_to_section(section_nr);
        set_section_nid(section_nr, nid);
index cd8f0150ba3aa8cde8828d2760f34516a605fb1d..500a09a48dfd3afe33f06722305532d325e43727 100644 (file)
--- a/mm/swap.c
+++ b/mm/swap.c
@@ -74,22 +74,21 @@ static DEFINE_PER_CPU(struct cpu_fbatches, cpu_fbatches) = {
        .lock = INIT_LOCAL_LOCK(lock),
 };
 
-/*
- * This path almost never happens for VM activity - pages are normally freed
- * in batches.  But it gets used by networking - and for compound pages.
- */
-static void __page_cache_release(struct folio *folio)
+static void __page_cache_release(struct folio *folio, struct lruvec **lruvecp,
+               unsigned long *flagsp)
 {
        if (folio_test_lru(folio)) {
-               struct lruvec *lruvec;
-               unsigned long flags;
-
-               lruvec = folio_lruvec_lock_irqsave(folio, &flags);
-               lruvec_del_folio(lruvec, folio);
+               folio_lruvec_relock_irqsave(folio, lruvecp, flagsp);
+               lruvec_del_folio(*lruvecp, folio);
                __folio_clear_lru_flags(folio);
-               unlock_page_lruvec_irqrestore(lruvec, flags);
        }
-       /* See comment on folio_test_mlocked in release_pages() */
+
+       /*
+        * In rare cases, when truncation or holepunching raced with
+        * munlock after VM_LOCKED was cleared, Mlocked may still be
+        * found set here.  This does not indicate a problem, unless
+        * "unevictable_pgs_cleared" appears worryingly large.
+        */
        if (unlikely(folio_test_mlocked(folio))) {
                long nr_pages = folio_nr_pages(folio);
 
@@ -99,9 +98,23 @@ static void __page_cache_release(struct folio *folio)
        }
 }
 
+/*
+ * This path almost never happens for VM activity - pages are normally freed
+ * in batches.  But it gets used by networking - and for compound pages.
+ */
+static void page_cache_release(struct folio *folio)
+{
+       struct lruvec *lruvec = NULL;
+       unsigned long flags;
+
+       __page_cache_release(folio, &lruvec, &flags);
+       if (lruvec)
+               unlock_page_lruvec_irqrestore(lruvec, flags);
+}
+
 static void __folio_put_small(struct folio *folio)
 {
-       __page_cache_release(folio);
+       page_cache_release(folio);
        mem_cgroup_uncharge(folio);
        free_unref_page(&folio->page, 0);
 }
@@ -115,7 +128,7 @@ static void __folio_put_large(struct folio *folio)
         * be called for hugetlb (it has a separate hugetlb_cgroup.)
         */
        if (!folio_test_hugetlb(folio))
-               __page_cache_release(folio);
+               page_cache_release(folio);
        destroy_large_folio(folio);
 }
 
@@ -138,22 +151,25 @@ EXPORT_SYMBOL(__folio_put);
  */
 void put_pages_list(struct list_head *pages)
 {
+       struct folio_batch fbatch;
        struct folio *folio, *next;
 
+       folio_batch_init(&fbatch);
        list_for_each_entry_safe(folio, next, pages, lru) {
-               if (!folio_put_testzero(folio)) {
-                       list_del(&folio->lru);
+               if (!folio_put_testzero(folio))
                        continue;
-               }
                if (folio_test_large(folio)) {
-                       list_del(&folio->lru);
                        __folio_put_large(folio);
                        continue;
                }
                /* LRU flag must be clear because it's passed using the lru */
+               if (folio_batch_add(&fbatch, folio) > 0)
+                       continue;
+               free_unref_folios(&fbatch);
        }
 
-       free_unref_page_list(pages);
+       if (fbatch.nr)
+               free_unref_folios(&fbatch);
        INIT_LIST_HEAD(pages);
 }
 EXPORT_SYMBOL(put_pages_list);
@@ -175,7 +191,7 @@ static void lru_add_fn(struct lruvec *lruvec, struct folio *folio)
         * while the LRU lock is held.
         *
         * (That is not true of __page_cache_release(), and not necessarily
-        * true of release_pages(): but those only clear the mlocked flag after
+        * true of folios_put(): but those only clear the mlocked flag after
         * folio_put_testzero() has excluded any other users of the folio.)
         */
        if (folio_evictable(folio)) {
@@ -213,7 +229,7 @@ static void folio_batch_move_lru(struct folio_batch *fbatch, move_fn_t move_fn)
                if (move_fn != lru_add_fn && !folio_test_clear_lru(folio))
                        continue;
 
-               lruvec = folio_lruvec_relock_irqsave(folio, lruvec, &flags);
+               folio_lruvec_relock_irqsave(folio, &lruvec, &flags);
                move_fn(lruvec, folio);
 
                folio_set_lru(folio);
@@ -221,8 +237,7 @@ static void folio_batch_move_lru(struct folio_batch *fbatch, move_fn_t move_fn)
 
        if (lruvec)
                unlock_page_lruvec_irqrestore(lruvec, flags);
-       folios_put(fbatch->folios, folio_batch_count(fbatch));
-       folio_batch_reinit(fbatch);
+       folios_put(fbatch);
 }
 
 static void folio_batch_add_and_move(struct folio_batch *fbatch,
@@ -946,41 +961,29 @@ void lru_cache_disable(void)
 }
 
 /**
- * release_pages - batched put_page()
- * @arg: array of pages to release
- * @nr: number of pages
+ * folios_put_refs - Reduce the reference count on a batch of folios.
+ * @folios: The folios.
+ * @refs: The number of refs to subtract from each folio.
  *
- * Decrement the reference count on all the pages in @arg.  If it
- * fell to zero, remove the page from the LRU and free it.
+ * Like folio_put(), but for a batch of folios.  This is more efficient
+ * than writing the loop yourself as it will optimise the locks which need
+ * to be taken if the folios are freed.  The folios batch is returned
+ * empty and ready to be reused for another batch; there is no need
+ * to reinitialise it.  If @refs is NULL, we subtract one from each
+ * folio refcount.
  *
- * Note that the argument can be an array of pages, encoded pages,
- * or folio pointers. We ignore any encoded bits, and turn any of
- * them into just a folio that gets free'd.
+ * Context: May be called in process or interrupt context, but not in NMI
+ * context.  May be called while holding a spinlock.
  */
-void release_pages(release_pages_arg arg, int nr)
+void folios_put_refs(struct folio_batch *folios, unsigned int *refs)
 {
-       int i;
-       struct encoded_page **encoded = arg.encoded_pages;
-       LIST_HEAD(pages_to_free);
+       int i, j;
        struct lruvec *lruvec = NULL;
        unsigned long flags = 0;
-       unsigned int lock_batch;
 
-       for (i = 0; i < nr; i++) {
-               struct folio *folio;
-
-               /* Turn any of the argument types into a folio */
-               folio = page_folio(encoded_page_ptr(encoded[i]));
-
-               /*
-                * Make sure the IRQ-safe lock-holding time does not get
-                * excessive with a continuous string of pages from the
-                * same lruvec. The lock is held only if lruvec != NULL.
-                */
-               if (lruvec && ++lock_batch == SWAP_CLUSTER_MAX) {
-                       unlock_page_lruvec_irqrestore(lruvec, flags);
-                       lruvec = NULL;
-               }
+       for (i = 0, j = 0; i < folios->nr; i++) {
+               struct folio *folio = folios->folios[i];
+               unsigned int nr_refs = refs ? refs[i] : 1;
 
                if (is_huge_zero_page(&folio->page))
                        continue;
@@ -990,56 +993,85 @@ void release_pages(release_pages_arg arg, int nr)
                                unlock_page_lruvec_irqrestore(lruvec, flags);
                                lruvec = NULL;
                        }
-                       if (put_devmap_managed_page(&folio->page))
+                       if (put_devmap_managed_page_refs(&folio->page, nr_refs))
                                continue;
-                       if (folio_put_testzero(folio))
+                       if (folio_ref_sub_and_test(folio, nr_refs))
                                free_zone_device_page(&folio->page);
                        continue;
                }
 
-               if (!folio_put_testzero(folio))
+               if (!folio_ref_sub_and_test(folio, nr_refs))
                        continue;
 
-               if (folio_test_large(folio)) {
+               /* hugetlb has its own memcg */
+               if (folio_test_hugetlb(folio)) {
                        if (lruvec) {
                                unlock_page_lruvec_irqrestore(lruvec, flags);
                                lruvec = NULL;
                        }
-                       __folio_put_large(folio);
+                       free_huge_folio(folio);
                        continue;
                }
+               if (folio_test_large(folio) &&
+                   folio_test_large_rmappable(folio))
+                       folio_undo_large_rmappable(folio);
 
-               if (folio_test_lru(folio)) {
-                       struct lruvec *prev_lruvec = lruvec;
+               __page_cache_release(folio, &lruvec, &flags);
 
-                       lruvec = folio_lruvec_relock_irqsave(folio, lruvec,
-                                                                       &flags);
-                       if (prev_lruvec != lruvec)
-                               lock_batch = 0;
+               if (j != i)
+                       folios->folios[j] = folio;
+               j++;
+       }
+       if (lruvec)
+               unlock_page_lruvec_irqrestore(lruvec, flags);
+       if (!j) {
+               folio_batch_reinit(folios);
+               return;
+       }
 
-                       lruvec_del_folio(lruvec, folio);
-                       __folio_clear_lru_flags(folio);
-               }
+       folios->nr = j;
+       mem_cgroup_uncharge_folios(folios);
+       free_unref_folios(folios);
+}
+EXPORT_SYMBOL(folios_put_refs);
 
-               /*
-                * In rare cases, when truncation or holepunching raced with
-                * munlock after VM_LOCKED was cleared, Mlocked may still be
-                * found set here.  This does not indicate a problem, unless
-                * "unevictable_pgs_cleared" appears worryingly large.
-                */
-               if (unlikely(folio_test_mlocked(folio))) {
-                       __folio_clear_mlocked(folio);
-                       zone_stat_sub_folio(folio, NR_MLOCK);
-                       count_vm_event(UNEVICTABLE_PGCLEARED);
-               }
+/**
+ * release_pages - batched put_page()
+ * @arg: array of pages to release
+ * @nr: number of pages
+ *
+ * Decrement the reference count on all the pages in @arg.  If it
+ * fell to zero, remove the page from the LRU and free it.
+ *
+ * Note that the argument can be an array of pages, encoded pages,
+ * or folio pointers. We ignore any encoded bits, and turn any of
+ * them into just a folio that gets free'd.
+ */
+void release_pages(release_pages_arg arg, int nr)
+{
+       struct folio_batch fbatch;
+       int refs[PAGEVEC_SIZE];
+       struct encoded_page **encoded = arg.encoded_pages;
+       int i;
+
+       folio_batch_init(&fbatch);
+       for (i = 0; i < nr; i++) {
+               /* Turn any of the argument types into a folio */
+               struct folio *folio = page_folio(encoded_page_ptr(encoded[i]));
+
+               /* Is our next entry actually "nr_pages" -> "nr_refs" ? */
+               refs[fbatch.nr] = 1;
+               if (unlikely(encoded_page_flags(encoded[i]) &
+                            ENCODED_PAGE_BIT_NR_PAGES_NEXT))
+                       refs[fbatch.nr] = encoded_nr_pages(encoded[++i]);
 
-               list_add(&folio->lru, &pages_to_free);
+               if (folio_batch_add(&fbatch, folio) > 0)
+                       continue;
+               folios_put_refs(&fbatch, refs);
        }
-       if (lruvec)
-               unlock_page_lruvec_irqrestore(lruvec, flags);
 
-       mem_cgroup_uncharge_list(&pages_to_free);
-       free_unref_page_list(&pages_to_free);
+       if (fbatch.nr)
+               folios_put_refs(&fbatch, refs);
 }
 EXPORT_SYMBOL(release_pages);
 
@@ -1059,8 +1091,7 @@ void __folio_batch_release(struct folio_batch *fbatch)
                lru_add_drain();
                fbatch->percpu_pvec_drained = true;
        }
-       release_pages(fbatch->folios, folio_batch_count(fbatch));
-       folio_batch_reinit(fbatch);
+       folios_put(fbatch);
 }
 EXPORT_SYMBOL(__folio_batch_release);
 
index 0bec1f705f8e09313e1fcdcf87568cd5bf68da38..90973ce7881db2a65c38d21b7b2fad28e60ade36 100644 (file)
@@ -273,6 +273,9 @@ void free_swap_slot(swp_entry_t entry)
 {
        struct swap_slots_cache *cache;
 
+       /* Large folio swap slot is not covered. */
+       zswap_invalidate(entry);
+
        cache = raw_cpu_ptr(&swp_slots);
        if (likely(use_swap_slot_cache && cache->slots_ret)) {
                spin_lock_irq(&cache->free_lock);
index 7255c01a1e4e16d758186019f904e70a7890a5cc..bfc7e8c58a6d34b948d73916ccd9bf0f1be14e21 100644 (file)
@@ -15,6 +15,7 @@
 #include <linux/swapops.h>
 #include <linux/init.h>
 #include <linux/pagemap.h>
+#include <linux/pagevec.h>
 #include <linux/backing-dev.h>
 #include <linux/blkdev.h>
 #include <linux/migrate.h>
@@ -282,10 +283,8 @@ void clear_shadow_from_swap_cache(int type, unsigned long begin,
  * folio_free_swap() _with_ the lock.
  *                                     - Marcelo
  */
-void free_swap_cache(struct page *page)
+void free_swap_cache(struct folio *folio)
 {
-       struct folio *folio = page_folio(page);
-
        if (folio_test_swapcache(folio) && !folio_mapped(folio) &&
            folio_trylock(folio)) {
                folio_free_swap(folio);
@@ -299,9 +298,11 @@ void free_swap_cache(struct page *page)
  */
 void free_page_and_swap_cache(struct page *page)
 {
-       free_swap_cache(page);
+       struct folio *folio = page_folio(page);
+
+       free_swap_cache(folio);
        if (!is_huge_zero_page(page))
-               put_page(page);
+               folio_put(folio);
 }
 
 /*
@@ -310,10 +311,25 @@ void free_page_and_swap_cache(struct page *page)
  */
 void free_pages_and_swap_cache(struct encoded_page **pages, int nr)
 {
+       struct folio_batch folios;
+       unsigned int refs[PAGEVEC_SIZE];
+
        lru_add_drain();
-       for (int i = 0; i < nr; i++)
-               free_swap_cache(encoded_page_ptr(pages[i]));
-       release_pages(pages, nr);
+       folio_batch_init(&folios);
+       for (int i = 0; i < nr; i++) {
+               struct folio *folio = page_folio(encoded_page_ptr(pages[i]));
+
+               free_swap_cache(folio);
+               refs[folios.nr] = 1;
+               if (unlikely(encoded_page_flags(pages[i]) &
+                            ENCODED_PAGE_BIT_NR_PAGES_NEXT))
+                       refs[folios.nr] = encoded_nr_pages(pages[++i]);
+
+               if (folio_batch_add(&folios, folio) == 0)
+                       folios_put_refs(&folios, refs);
+       }
+       if (folios.nr)
+               folios_put_refs(&folios, refs);
 }
 
 static inline bool swap_use_vma_readahead(void)
index 573843d9cc91ca8061ab45fbabbd3fe319bedc78..4919423cce76a3a44a2f26e6f31e2fd7c3577e0a 100644 (file)
@@ -737,8 +737,6 @@ static void swap_range_free(struct swap_info_struct *si, unsigned long offset,
                if (was_full && (si->flags & SWP_WRITEOK))
                        add_to_avail_list(si);
        }
-       atomic_long_add(nr_entries, &nr_swap_pages);
-       WRITE_ONCE(si->inuse_pages, si->inuse_pages - nr_entries);
        if (si->flags & SWP_BLKDEV)
                swap_slot_free_notify =
                        si->bdev->bd_disk->fops->swap_slot_free_notify;
@@ -746,12 +744,19 @@ static void swap_range_free(struct swap_info_struct *si, unsigned long offset,
                swap_slot_free_notify = NULL;
        while (offset <= end) {
                arch_swap_invalidate_page(si->type, offset);
-               zswap_invalidate(si->type, offset);
                if (swap_slot_free_notify)
                        swap_slot_free_notify(si->bdev, offset);
                offset++;
        }
        clear_shadow_from_swap_cache(si->type, begin, end);
+
+       /*
+        * Make sure that try_to_unuse() observes si->inuse_pages reaching 0
+        * only after the above cleanups are done.
+        */
+       smp_wmb();
+       atomic_long_add(nr_entries, &nr_swap_pages);
+       WRITE_ONCE(si->inuse_pages, si->inuse_pages - nr_entries);
 }
 
 static void set_cluster_next(struct swap_info_struct *si, unsigned long next)
@@ -1227,6 +1232,11 @@ static unsigned char __swap_entry_free_locked(struct swap_info_struct *p,
  * with get_swap_device() and put_swap_device(), unless the swap
  * functions call get/put_swap_device() by themselves.
  *
+ * Note that when only holding the PTL, swapoff might succeed immediately
+ * after freeing a swap entry. Therefore, immediately after
+ * __swap_entry_free(), the swap info might become stale and should not
+ * be touched without a prior get_swap_device().
+ *
  * Check whether swap entry is valid in the swap device.  If so,
  * return pointer to swap_info_struct, and keep the swap entry valid
  * via preventing the swap device from being swapoff, until
@@ -1604,13 +1614,19 @@ int free_swap_and_cache(swp_entry_t entry)
        if (non_swap_entry(entry))
                return 1;
 
-       p = _swap_info_get(entry);
+       p = get_swap_device(entry);
        if (p) {
+               if (WARN_ON(data_race(!p->swap_map[swp_offset(entry)]))) {
+                       put_swap_device(p);
+                       return 0;
+               }
+
                count = __swap_entry_free(p, entry);
                if (count == SWAP_HAS_CACHE &&
                    !swap_page_trans_huge_swapped(p, entry))
                        __try_to_reclaim_swap(p, swp_offset(entry),
                                              TTRS_UNMAPPED | TTRS_FULL);
+               put_swap_device(p);
        }
        return p != NULL;
 }
@@ -2049,7 +2065,7 @@ static int try_to_unuse(unsigned int type)
        unsigned int i;
 
        if (!READ_ONCE(si->inuse_pages))
-               return 0;
+               goto success;
 
 retry:
        retval = shmem_unuse(type);
@@ -2130,6 +2146,12 @@ retry:
                return -EINTR;
        }
 
+success:
+       /*
+        * Make sure that further cleanups after try_to_unuse() returns happen
+        * after swap_range_free() reduces si->inuse_pages to 0.
+        */
+       smp_mb();
        return 0;
 }
 
@@ -2348,8 +2370,6 @@ static void enable_swap_info(struct swap_info_struct *p, int prio,
                                unsigned char *swap_map,
                                struct swap_cluster_info *cluster_info)
 {
-       zswap_swapon(p->type);
-
        spin_lock(&swap_lock);
        spin_lock(&p->lock);
        setup_swap_info(p, prio, swap_map, cluster_info);
@@ -3167,6 +3187,10 @@ SYSCALL_DEFINE2(swapon, const char __user *, specialfile, int, swap_flags)
        if (error)
                goto bad_swap_unlock_inode;
 
+       error = zswap_swapon(p->type, maxpages);
+       if (error)
+               goto free_swap_address_space;
+
        /*
         * Flush any pending IO and dirty mappings before we start using this
         * swap device.
@@ -3175,7 +3199,7 @@ SYSCALL_DEFINE2(swapon, const char __user *, specialfile, int, swap_flags)
        error = inode_drain_writes(inode);
        if (error) {
                inode->i_flags &= ~S_SWAPFILE;
-               goto free_swap_address_space;
+               goto free_swap_zswap;
        }
 
        mutex_lock(&swapon_mutex);
@@ -3199,6 +3223,8 @@ SYSCALL_DEFINE2(swapon, const char __user *, specialfile, int, swap_flags)
 
        error = 0;
        goto out;
+free_swap_zswap:
+       zswap_swapoff(p->type);
 free_swap_address_space:
        exit_swap_address_space(p->type);
 bad_swap_unlock_inode:
@@ -3320,7 +3346,8 @@ static int __swap_duplicate(swp_entry_t entry, unsigned char usage)
        } else
                err = -ENOENT;                  /* unused swap entry */
 
-       WRITE_ONCE(p->swap_map[offset], count | has_cache);
+       if (!err)
+               WRITE_ONCE(p->swap_map[offset], count | has_cache);
 
 unlock_out:
        unlock_cluster_or_swap_info(p, ci);
index 313f1c42768a621d59385a0673e0cdf85d5c1720..712160cd41ecac1a875ad4afb5b565dddc4bc2f2 100644 (file)
 #include "internal.h"
 
 static __always_inline
-struct vm_area_struct *find_dst_vma(struct mm_struct *dst_mm,
-                                   unsigned long dst_start,
-                                   unsigned long len)
+bool validate_dst_vma(struct vm_area_struct *dst_vma, unsigned long dst_end)
 {
-       /*
-        * Make sure that the dst range is both valid and fully within a
-        * single existing vma.
-        */
-       struct vm_area_struct *dst_vma;
-
-       dst_vma = find_vma(dst_mm, dst_start);
-       if (!range_in_vma(dst_vma, dst_start, dst_start + len))
-               return NULL;
+       /* Make sure that the dst range is fully within dst_vma. */
+       if (dst_end > dst_vma->vm_end)
+               return false;
 
        /*
         * Check the vma is registered in uffd, this is required to
@@ -40,11 +32,122 @@ struct vm_area_struct *find_dst_vma(struct mm_struct *dst_mm,
         * time.
         */
        if (!dst_vma->vm_userfaultfd_ctx.ctx)
-               return NULL;
+               return false;
+
+       return true;
+}
+
+static __always_inline
+struct vm_area_struct *find_vma_and_prepare_anon(struct mm_struct *mm,
+                                                unsigned long addr)
+{
+       struct vm_area_struct *vma;
+
+       mmap_assert_locked(mm);
+       vma = vma_lookup(mm, addr);
+       if (!vma)
+               vma = ERR_PTR(-ENOENT);
+       else if (!(vma->vm_flags & VM_SHARED) &&
+                unlikely(anon_vma_prepare(vma)))
+               vma = ERR_PTR(-ENOMEM);
+
+       return vma;
+}
+
+#ifdef CONFIG_PER_VMA_LOCK
+/*
+ * lock_vma() - Lookup and lock vma corresponding to @address.
+ * @mm: mm to search vma in.
+ * @address: address that the vma should contain.
+ *
+ * Should be called without holding mmap_lock. vma should be unlocked after use
+ * with unlock_vma().
+ *
+ * Return: A locked vma containing @address, -ENOENT if no vma is found, or
+ * -ENOMEM if anon_vma couldn't be allocated.
+ */
+static struct vm_area_struct *lock_vma(struct mm_struct *mm,
+                                      unsigned long address)
+{
+       struct vm_area_struct *vma;
 
+       vma = lock_vma_under_rcu(mm, address);
+       if (vma) {
+               /*
+                * lock_vma_under_rcu() only checks anon_vma for private
+                * anonymous mappings. But we need to ensure it is assigned in
+                * private file-backed vmas as well.
+                */
+               if (!(vma->vm_flags & VM_SHARED) && unlikely(!vma->anon_vma))
+                       vma_end_read(vma);
+               else
+                       return vma;
+       }
+
+       mmap_read_lock(mm);
+       vma = find_vma_and_prepare_anon(mm, address);
+       if (!IS_ERR(vma)) {
+               /*
+                * We cannot use vma_start_read() as it may fail due to
+                * false locked (see comment in vma_start_read()). We
+                * can avoid that by directly locking vm_lock under
+                * mmap_lock, which guarantees that nobody can lock the
+                * vma for write (vma_start_write()) under us.
+                */
+               down_read(&vma->vm_lock->lock);
+       }
+
+       mmap_read_unlock(mm);
+       return vma;
+}
+
+static struct vm_area_struct *uffd_mfill_lock(struct mm_struct *dst_mm,
+                                             unsigned long dst_start,
+                                             unsigned long len)
+{
+       struct vm_area_struct *dst_vma;
+
+       dst_vma = lock_vma(dst_mm, dst_start);
+       if (IS_ERR(dst_vma) || validate_dst_vma(dst_vma, dst_start + len))
+               return dst_vma;
+
+       vma_end_read(dst_vma);
+       return ERR_PTR(-ENOENT);
+}
+
+static void uffd_mfill_unlock(struct vm_area_struct *vma)
+{
+       vma_end_read(vma);
+}
+
+#else
+
+static struct vm_area_struct *uffd_mfill_lock(struct mm_struct *dst_mm,
+                                             unsigned long dst_start,
+                                             unsigned long len)
+{
+       struct vm_area_struct *dst_vma;
+
+       mmap_read_lock(dst_mm);
+       dst_vma = find_vma_and_prepare_anon(dst_mm, dst_start);
+       if (IS_ERR(dst_vma))
+               goto out_unlock;
+
+       if (validate_dst_vma(dst_vma, dst_start + len))
+               return dst_vma;
+
+       dst_vma = ERR_PTR(-ENOENT);
+out_unlock:
+       mmap_read_unlock(dst_mm);
        return dst_vma;
 }
 
+static void uffd_mfill_unlock(struct vm_area_struct *vma)
+{
+       mmap_read_unlock(vma->vm_mm);
+}
+#endif
+
 /* Check if dst_addr is outside of file's size. Must be called with ptl held. */
 static bool mfill_file_over_size(struct vm_area_struct *dst_vma,
                                 unsigned long dst_addr)
@@ -124,7 +227,7 @@ int mfill_atomic_install_pte(pmd_t *dst_pmd,
         * Must happen after rmap, as mm_counter() checks mapping (via
         * PageAnon()), which is set by __page_set_anon_rmap().
         */
-       inc_mm_counter(dst_mm, mm_counter(page));
+       inc_mm_counter(dst_mm, mm_counter(folio));
 
        set_pte_at(dst_mm, dst_addr, dst_pte, _dst_pte);
 
@@ -350,18 +453,18 @@ static pmd_t *mm_alloc_pmd(struct mm_struct *mm, unsigned long address)
 #ifdef CONFIG_HUGETLB_PAGE
 /*
  * mfill_atomic processing for HUGETLB vmas.  Note that this routine is
- * called with mmap_lock held, it will release mmap_lock before returning.
+ * called with either vma-lock or mmap_lock held, it will release the lock
+ * before returning.
  */
 static __always_inline ssize_t mfill_atomic_hugetlb(
+                                             struct userfaultfd_ctx *ctx,
                                              struct vm_area_struct *dst_vma,
                                              unsigned long dst_start,
                                              unsigned long src_start,
                                              unsigned long len,
-                                             atomic_t *mmap_changing,
                                              uffd_flags_t flags)
 {
        struct mm_struct *dst_mm = dst_vma->vm_mm;
-       int vm_shared = dst_vma->vm_flags & VM_SHARED;
        ssize_t err;
        pte_t *dst_pte;
        unsigned long src_addr, dst_addr;
@@ -379,7 +482,8 @@ static __always_inline ssize_t mfill_atomic_hugetlb(
         * feature is not supported.
         */
        if (uffd_flags_mode_is(flags, MFILL_ATOMIC_ZEROPAGE)) {
-               mmap_read_unlock(dst_mm);
+               up_read(&ctx->map_changing_lock);
+               uffd_mfill_unlock(dst_vma);
                return -EINVAL;
        }
 
@@ -402,24 +506,28 @@ retry:
         * retry, dst_vma will be set to NULL and we must lookup again.
         */
        if (!dst_vma) {
+               dst_vma = uffd_mfill_lock(dst_mm, dst_start, len);
+               if (IS_ERR(dst_vma)) {
+                       err = PTR_ERR(dst_vma);
+                       goto out;
+               }
+
                err = -ENOENT;
-               dst_vma = find_dst_vma(dst_mm, dst_start, len);
-               if (!dst_vma || !is_vm_hugetlb_page(dst_vma))
-                       goto out_unlock;
+               if (!is_vm_hugetlb_page(dst_vma))
+                       goto out_unlock_vma;
 
                err = -EINVAL;
                if (vma_hpagesize != vma_kernel_pagesize(dst_vma))
-                       goto out_unlock;
-
-               vm_shared = dst_vma->vm_flags & VM_SHARED;
-       }
+                       goto out_unlock_vma;
 
-       /*
-        * If not shared, ensure the dst_vma has a anon_vma.
-        */
-       err = -ENOMEM;
-       if (!vm_shared) {
-               if (unlikely(anon_vma_prepare(dst_vma)))
+               /*
+                * If memory mappings are changing because of non-cooperative
+                * operation (e.g. mremap) running in parallel, bail out and
+                * request the user to retry later
+                */
+               down_read(&ctx->map_changing_lock);
+               err = -EAGAIN;
+               if (atomic_read(&ctx->mmap_changing))
                        goto out_unlock;
        }
 
@@ -463,7 +571,8 @@ retry:
                cond_resched();
 
                if (unlikely(err == -ENOENT)) {
-                       mmap_read_unlock(dst_mm);
+                       up_read(&ctx->map_changing_lock);
+                       uffd_mfill_unlock(dst_vma);
                        BUG_ON(!folio);
 
                        err = copy_folio_from_user(folio,
@@ -472,16 +581,6 @@ retry:
                                err = -EFAULT;
                                goto out;
                        }
-                       mmap_read_lock(dst_mm);
-                       /*
-                        * If memory mappings are changing because of non-cooperative
-                        * operation (e.g. mremap) running in parallel, bail out and
-                        * request the user to retry later
-                        */
-                       if (mmap_changing && atomic_read(mmap_changing)) {
-                               err = -EAGAIN;
-                               break;
-                       }
 
                        dst_vma = NULL;
                        goto retry;
@@ -501,7 +600,9 @@ retry:
        }
 
 out_unlock:
-       mmap_read_unlock(dst_mm);
+       up_read(&ctx->map_changing_lock);
+out_unlock_vma:
+       uffd_mfill_unlock(dst_vma);
 out:
        if (folio)
                folio_put(folio);
@@ -512,11 +613,11 @@ out:
 }
 #else /* !CONFIG_HUGETLB_PAGE */
 /* fail at build time if gcc attempts to use this */
-extern ssize_t mfill_atomic_hugetlb(struct vm_area_struct *dst_vma,
+extern ssize_t mfill_atomic_hugetlb(struct userfaultfd_ctx *ctx,
+                                   struct vm_area_struct *dst_vma,
                                    unsigned long dst_start,
                                    unsigned long src_start,
                                    unsigned long len,
-                                   atomic_t *mmap_changing,
                                    uffd_flags_t flags);
 #endif /* CONFIG_HUGETLB_PAGE */
 
@@ -564,13 +665,13 @@ static __always_inline ssize_t mfill_atomic_pte(pmd_t *dst_pmd,
        return err;
 }
 
-static __always_inline ssize_t mfill_atomic(struct mm_struct *dst_mm,
+static __always_inline ssize_t mfill_atomic(struct userfaultfd_ctx *ctx,
                                            unsigned long dst_start,
                                            unsigned long src_start,
                                            unsigned long len,
-                                           atomic_t *mmap_changing,
                                            uffd_flags_t flags)
 {
+       struct mm_struct *dst_mm = ctx->mm;
        struct vm_area_struct *dst_vma;
        ssize_t err;
        pmd_t *dst_pmd;
@@ -593,24 +694,24 @@ static __always_inline ssize_t mfill_atomic(struct mm_struct *dst_mm,
        copied = 0;
        folio = NULL;
 retry:
-       mmap_read_lock(dst_mm);
+       /*
+        * Make sure the vma is not shared, that the dst range is
+        * both valid and fully within a single existing vma.
+        */
+       dst_vma = uffd_mfill_lock(dst_mm, dst_start, len);
+       if (IS_ERR(dst_vma)) {
+               err = PTR_ERR(dst_vma);
+               goto out;
+       }
 
        /*
         * If memory mappings are changing because of non-cooperative
         * operation (e.g. mremap) running in parallel, bail out and
         * request the user to retry later
         */
+       down_read(&ctx->map_changing_lock);
        err = -EAGAIN;
-       if (mmap_changing && atomic_read(mmap_changing))
-               goto out_unlock;
-
-       /*
-        * Make sure the vma is not shared, that the dst range is
-        * both valid and fully within a single existing vma.
-        */
-       err = -ENOENT;
-       dst_vma = find_dst_vma(dst_mm, dst_start, len);
-       if (!dst_vma)
+       if (atomic_read(&ctx->mmap_changing))
                goto out_unlock;
 
        err = -EINVAL;
@@ -633,8 +734,8 @@ retry:
         * If this is a HUGETLB vma, pass off to appropriate routine
         */
        if (is_vm_hugetlb_page(dst_vma))
-               return  mfill_atomic_hugetlb(dst_vma, dst_start, src_start,
-                                            len, mmap_changing, flags);
+               return  mfill_atomic_hugetlb(ctx, dst_vma, dst_start,
+                                            src_start, len, flags);
 
        if (!vma_is_anonymous(dst_vma) && !vma_is_shmem(dst_vma))
                goto out_unlock;
@@ -642,16 +743,6 @@ retry:
            uffd_flags_mode_is(flags, MFILL_ATOMIC_CONTINUE))
                goto out_unlock;
 
-       /*
-        * Ensure the dst_vma has a anon_vma or this page
-        * would get a NULL anon_vma when moved in the
-        * dst_vma.
-        */
-       err = -ENOMEM;
-       if (!(dst_vma->vm_flags & VM_SHARED) &&
-           unlikely(anon_vma_prepare(dst_vma)))
-               goto out_unlock;
-
        while (src_addr < src_start + len) {
                pmd_t dst_pmdval;
 
@@ -693,7 +784,8 @@ retry:
                if (unlikely(err == -ENOENT)) {
                        void *kaddr;
 
-                       mmap_read_unlock(dst_mm);
+                       up_read(&ctx->map_changing_lock);
+                       uffd_mfill_unlock(dst_vma);
                        BUG_ON(!folio);
 
                        kaddr = kmap_local_folio(folio, 0);
@@ -723,7 +815,8 @@ retry:
        }
 
 out_unlock:
-       mmap_read_unlock(dst_mm);
+       up_read(&ctx->map_changing_lock);
+       uffd_mfill_unlock(dst_vma);
 out:
        if (folio)
                folio_put(folio);
@@ -733,34 +826,42 @@ out:
        return copied ? copied : err;
 }
 
-ssize_t mfill_atomic_copy(struct mm_struct *dst_mm, unsigned long dst_start,
+ssize_t mfill_atomic_copy(struct userfaultfd_ctx *ctx, unsigned long dst_start,
                          unsigned long src_start, unsigned long len,
-                         atomic_t *mmap_changing, uffd_flags_t flags)
+                         uffd_flags_t flags)
 {
-       return mfill_atomic(dst_mm, dst_start, src_start, len, mmap_changing,
+       return mfill_atomic(ctx, dst_start, src_start, len,
                            uffd_flags_set_mode(flags, MFILL_ATOMIC_COPY));
 }
 
-ssize_t mfill_atomic_zeropage(struct mm_struct *dst_mm, unsigned long start,
-                             unsigned long len, atomic_t *mmap_changing)
+ssize_t mfill_atomic_zeropage(struct userfaultfd_ctx *ctx,
+                             unsigned long start,
+                             unsigned long len)
 {
-       return mfill_atomic(dst_mm, start, 0, len, mmap_changing,
+       return mfill_atomic(ctx, start, 0, len,
                            uffd_flags_set_mode(0, MFILL_ATOMIC_ZEROPAGE));
 }
 
-ssize_t mfill_atomic_continue(struct mm_struct *dst_mm, unsigned long start,
-                             unsigned long len, atomic_t *mmap_changing,
-                             uffd_flags_t flags)
+ssize_t mfill_atomic_continue(struct userfaultfd_ctx *ctx, unsigned long start,
+                             unsigned long len, uffd_flags_t flags)
 {
-       return mfill_atomic(dst_mm, start, 0, len, mmap_changing,
+
+       /*
+        * A caller might reasonably assume that UFFDIO_CONTINUE contains an
+        * smp_wmb() to ensure that any writes to the about-to-be-mapped page by
+        * the thread doing the UFFDIO_CONTINUE are guaranteed to be visible to
+        * subsequent loads from the page through the newly mapped address range.
+        */
+       smp_wmb();
+
+       return mfill_atomic(ctx, start, 0, len,
                            uffd_flags_set_mode(flags, MFILL_ATOMIC_CONTINUE));
 }
 
-ssize_t mfill_atomic_poison(struct mm_struct *dst_mm, unsigned long start,
-                           unsigned long len, atomic_t *mmap_changing,
-                           uffd_flags_t flags)
+ssize_t mfill_atomic_poison(struct userfaultfd_ctx *ctx, unsigned long start,
+                           unsigned long len, uffd_flags_t flags)
 {
-       return mfill_atomic(dst_mm, start, 0, len, mmap_changing,
+       return mfill_atomic(ctx, start, 0, len,
                            uffd_flags_set_mode(flags, MFILL_ATOMIC_POISON));
 }
 
@@ -793,10 +894,10 @@ long uffd_wp_range(struct vm_area_struct *dst_vma,
        return ret;
 }
 
-int mwriteprotect_range(struct mm_struct *dst_mm, unsigned long start,
-                       unsigned long len, bool enable_wp,
-                       atomic_t *mmap_changing)
+int mwriteprotect_range(struct userfaultfd_ctx *ctx, unsigned long start,
+                       unsigned long len, bool enable_wp)
 {
+       struct mm_struct *dst_mm = ctx->mm;
        unsigned long end = start + len;
        unsigned long _start, _end;
        struct vm_area_struct *dst_vma;
@@ -820,8 +921,9 @@ int mwriteprotect_range(struct mm_struct *dst_mm, unsigned long start,
         * operation (e.g. mremap) running in parallel, bail out and
         * request the user to retry later
         */
+       down_read(&ctx->map_changing_lock);
        err = -EAGAIN;
-       if (mmap_changing && atomic_read(mmap_changing))
+       if (atomic_read(&ctx->mmap_changing))
                goto out_unlock;
 
        err = -ENOENT;
@@ -850,6 +952,7 @@ int mwriteprotect_range(struct mm_struct *dst_mm, unsigned long start,
                err = 0;
        }
 out_unlock:
+       up_read(&ctx->map_changing_lock);
        mmap_read_unlock(dst_mm);
        return err;
 }
@@ -959,6 +1062,33 @@ static int move_swap_pte(struct mm_struct *mm,
        return 0;
 }
 
+static int move_zeropage_pte(struct mm_struct *mm,
+                            struct vm_area_struct *dst_vma,
+                            struct vm_area_struct *src_vma,
+                            unsigned long dst_addr, unsigned long src_addr,
+                            pte_t *dst_pte, pte_t *src_pte,
+                            pte_t orig_dst_pte, pte_t orig_src_pte,
+                            spinlock_t *dst_ptl, spinlock_t *src_ptl)
+{
+       pte_t zero_pte;
+
+       double_pt_lock(dst_ptl, src_ptl);
+       if (!pte_same(ptep_get(src_pte), orig_src_pte) ||
+           !pte_same(ptep_get(dst_pte), orig_dst_pte)) {
+               double_pt_unlock(dst_ptl, src_ptl);
+               return -EAGAIN;
+       }
+
+       zero_pte = pte_mkspecial(pfn_pte(my_zero_pfn(dst_addr),
+                                        dst_vma->vm_page_prot));
+       ptep_clear_flush(src_vma, src_addr, src_pte);
+       set_pte_at(mm, dst_addr, dst_pte, zero_pte);
+       double_pt_unlock(dst_ptl, src_ptl);
+
+       return 0;
+}
+
+
 /*
  * The mmap_lock for reading is held by the caller. Just move the page
  * from src_pmd to dst_pmd if possible, and return true if succeeded
@@ -1041,6 +1171,14 @@ retry:
        }
 
        if (pte_present(orig_src_pte)) {
+               if (is_zero_pfn(pte_pfn(orig_src_pte))) {
+                       err = move_zeropage_pte(mm, dst_vma, src_vma,
+                                              dst_addr, src_addr, dst_pte, src_pte,
+                                              orig_dst_pte, orig_src_pte,
+                                              dst_ptl, src_ptl);
+                       goto out;
+               }
+
                /*
                 * Pin and lock both source folio and anon_vma. Since we are in
                 * RCU read section, we can't block, so on contention have to
@@ -1224,27 +1362,136 @@ static int validate_move_areas(struct userfaultfd_ctx *ctx,
        if (!vma_is_anonymous(src_vma) || !vma_is_anonymous(dst_vma))
                return -EINVAL;
 
+       return 0;
+}
+
+static __always_inline
+int find_vmas_mm_locked(struct mm_struct *mm,
+                       unsigned long dst_start,
+                       unsigned long src_start,
+                       struct vm_area_struct **dst_vmap,
+                       struct vm_area_struct **src_vmap)
+{
+       struct vm_area_struct *vma;
+
+       mmap_assert_locked(mm);
+       vma = find_vma_and_prepare_anon(mm, dst_start);
+       if (IS_ERR(vma))
+               return PTR_ERR(vma);
+
+       *dst_vmap = vma;
+       /* Skip finding src_vma if src_start is in dst_vma */
+       if (src_start >= vma->vm_start && src_start < vma->vm_end)
+               goto out_success;
+
+       vma = vma_lookup(mm, src_start);
+       if (!vma)
+               return -ENOENT;
+out_success:
+       *src_vmap = vma;
+       return 0;
+}
+
+#ifdef CONFIG_PER_VMA_LOCK
+static int uffd_move_lock(struct mm_struct *mm,
+                         unsigned long dst_start,
+                         unsigned long src_start,
+                         struct vm_area_struct **dst_vmap,
+                         struct vm_area_struct **src_vmap)
+{
+       struct vm_area_struct *vma;
+       int err;
+
+       vma = lock_vma(mm, dst_start);
+       if (IS_ERR(vma))
+               return PTR_ERR(vma);
+
+       *dst_vmap = vma;
        /*
-        * Ensure the dst_vma has a anon_vma or this page
-        * would get a NULL anon_vma when moved in the
-        * dst_vma.
+        * Skip finding src_vma if src_start is in dst_vma. This also ensures
+        * that we don't lock the same vma twice.
         */
-       if (unlikely(anon_vma_prepare(dst_vma)))
-               return -ENOMEM;
+       if (src_start >= vma->vm_start && src_start < vma->vm_end) {
+               *src_vmap = vma;
+               return 0;
+       }
 
-       return 0;
+       /*
+        * Using lock_vma() to get src_vma can lead to following deadlock:
+        *
+        * Thread1                              Thread2
+        * -------                              -------
+        * vma_start_read(dst_vma)
+        *                                      mmap_write_lock(mm)
+        *                                      vma_start_write(src_vma)
+        * vma_start_read(src_vma)
+        * mmap_read_lock(mm)
+        *                                      vma_start_write(dst_vma)
+        */
+       *src_vmap = lock_vma_under_rcu(mm, src_start);
+       if (likely(*src_vmap))
+               return 0;
+
+       /* Undo any locking and retry in mmap_lock critical section */
+       vma_end_read(*dst_vmap);
+
+       mmap_read_lock(mm);
+       err = find_vmas_mm_locked(mm, dst_start, src_start, dst_vmap, src_vmap);
+       if (!err) {
+               /*
+                * See comment in lock_vma() as to why not using
+                * vma_start_read() here.
+                */
+               down_read(&(*dst_vmap)->vm_lock->lock);
+               if (*dst_vmap != *src_vmap)
+                       down_read(&(*src_vmap)->vm_lock->lock);
+       }
+       mmap_read_unlock(mm);
+       return err;
+}
+
+static void uffd_move_unlock(struct vm_area_struct *dst_vma,
+                            struct vm_area_struct *src_vma)
+{
+       vma_end_read(src_vma);
+       if (src_vma != dst_vma)
+               vma_end_read(dst_vma);
+}
+
+#else
+
+static int uffd_move_lock(struct mm_struct *mm,
+                         unsigned long dst_start,
+                         unsigned long src_start,
+                         struct vm_area_struct **dst_vmap,
+                         struct vm_area_struct **src_vmap)
+{
+       int err;
+
+       mmap_read_lock(mm);
+       err = find_vmas_mm_locked(mm, dst_start, src_start, dst_vmap, src_vmap);
+       if (err)
+               mmap_read_unlock(mm);
+       return err;
+}
+
+static void uffd_move_unlock(struct vm_area_struct *dst_vma,
+                            struct vm_area_struct *src_vma)
+{
+       mmap_assert_locked(src_vma->vm_mm);
+       mmap_read_unlock(dst_vma->vm_mm);
 }
+#endif
 
 /**
  * move_pages - move arbitrary anonymous pages of an existing vma
  * @ctx: pointer to the userfaultfd context
- * @mm: the address space to move pages
  * @dst_start: start of the destination virtual memory range
  * @src_start: start of the source virtual memory range
  * @len: length of the virtual memory range
  * @mode: flags from uffdio_move.mode
  *
- * Must be called with mmap_lock held for read.
+ * It will either use the mmap_lock in read mode or per-vma locks
  *
  * move_pages() remaps arbitrary anonymous pages atomically in zero
  * copy. It only works on non shared anonymous pages because those can
@@ -1312,10 +1559,10 @@ static int validate_move_areas(struct userfaultfd_ctx *ctx,
  * could be obtained. This is the only additional complexity added to
  * the rmap code to provide this anonymous page remapping functionality.
  */
-ssize_t move_pages(struct userfaultfd_ctx *ctx, struct mm_struct *mm,
-                  unsigned long dst_start, unsigned long src_start,
-                  unsigned long len, __u64 mode)
+ssize_t move_pages(struct userfaultfd_ctx *ctx, unsigned long dst_start,
+                  unsigned long src_start, unsigned long len, __u64 mode)
 {
+       struct mm_struct *mm = ctx->mm;
        struct vm_area_struct *src_vma, *dst_vma;
        unsigned long src_addr, dst_addr;
        pmd_t *src_pmd, *dst_pmd;
@@ -1333,28 +1580,34 @@ ssize_t move_pages(struct userfaultfd_ctx *ctx, struct mm_struct *mm,
            WARN_ON_ONCE(dst_start + len <= dst_start))
                goto out;
 
+       err = uffd_move_lock(mm, dst_start, src_start, &dst_vma, &src_vma);
+       if (err)
+               goto out;
+
+       /* Re-check after taking map_changing_lock */
+       err = -EAGAIN;
+       down_read(&ctx->map_changing_lock);
+       if (likely(atomic_read(&ctx->mmap_changing)))
+               goto out_unlock;
        /*
         * Make sure the vma is not shared, that the src and dst remap
         * ranges are both valid and fully within a single existing
         * vma.
         */
-       src_vma = find_vma(mm, src_start);
-       if (!src_vma || (src_vma->vm_flags & VM_SHARED))
-               goto out;
-       if (src_start < src_vma->vm_start ||
-           src_start + len > src_vma->vm_end)
-               goto out;
+       err = -EINVAL;
+       if (src_vma->vm_flags & VM_SHARED)
+               goto out_unlock;
+       if (src_start + len > src_vma->vm_end)
+               goto out_unlock;
 
-       dst_vma = find_vma(mm, dst_start);
-       if (!dst_vma || (dst_vma->vm_flags & VM_SHARED))
-               goto out;
-       if (dst_start < dst_vma->vm_start ||
-           dst_start + len > dst_vma->vm_end)
-               goto out;
+       if (dst_vma->vm_flags & VM_SHARED)
+               goto out_unlock;
+       if (dst_start + len > dst_vma->vm_end)
+               goto out_unlock;
 
        err = validate_move_areas(ctx, src_vma, dst_vma);
        if (err)
-               goto out;
+               goto out_unlock;
 
        for (src_addr = src_start, dst_addr = dst_start;
             src_addr < src_start + len;) {
@@ -1404,19 +1657,14 @@ ssize_t move_pages(struct userfaultfd_ctx *ctx, struct mm_struct *mm,
                                err = -ENOENT;
                                break;
                        }
-                       /* Avoid moving zeropages for now */
-                       if (is_huge_zero_pmd(*src_pmd)) {
-                               spin_unlock(ptl);
-                               err = -EBUSY;
-                               break;
-                       }
 
                        /* Check if we can move the pmd without splitting it. */
                        if (move_splits_huge_pmd(dst_addr, src_addr, src_start + len) ||
                            !pmd_none(dst_pmdval)) {
                                struct folio *folio = pfn_folio(pmd_pfn(*src_pmd));
 
-                               if (!folio || !PageAnonExclusive(&folio->page)) {
+                               if (!folio || (!is_huge_zero_page(&folio->page) &&
+                                              !PageAnonExclusive(&folio->page))) {
                                        spin_unlock(ptl);
                                        err = -EBUSY;
                                        break;
@@ -1476,6 +1724,9 @@ ssize_t move_pages(struct userfaultfd_ctx *ctx, struct mm_struct *mm,
                moved += step_size;
        }
 
+out_unlock:
+       up_read(&ctx->map_changing_lock);
+       uffd_move_unlock(dst_vma, src_vma);
 out:
        VM_WARN_ON(moved < 0);
        VM_WARN_ON(err > 0);
index 5faf3adc6f433ab153465a3f391027ee73aa4a31..669397235787b90e0b7cd44cc8257dcfb0847fd9 100644 (file)
--- a/mm/util.c
+++ b/mm/util.c
@@ -959,6 +959,7 @@ EXPORT_SYMBOL_GPL(vm_memory_committed);
 int __vm_enough_memory(struct mm_struct *mm, long pages, int cap_sys_admin)
 {
        long allowed;
+       unsigned long bytes_failed;
 
        vm_acct_memory(pages);
 
@@ -993,8 +994,9 @@ int __vm_enough_memory(struct mm_struct *mm, long pages, int cap_sys_admin)
        if (percpu_counter_read_positive(&vm_committed_as) < allowed)
                return 0;
 error:
-       pr_warn_ratelimited("%s: pid: %d, comm: %s, not enough memory for the allocation\n",
-                           __func__, current->pid, current->comm);
+       bytes_failed = pages << PAGE_SHIFT;
+       pr_warn_ratelimited("%s: pid: %d, comm: %s, bytes: %lu not enough memory for the allocation\n",
+                           __func__, current->pid, current->comm, bytes_failed);
        vm_unacct_memory(pages);
 
        return -ENOMEM;
index 1e36322d83d895ca8964240d8c5fa9dfb14868a9..22aa63f4ef6322a71030c5dd163706c83ef5cd8d 100644 (file)
@@ -800,17 +800,9 @@ EXPORT_SYMBOL(vmalloc_to_pfn);
 #define DEBUG_AUGMENT_LOWEST_MATCH_CHECK 0
 
 
-static DEFINE_SPINLOCK(vmap_area_lock);
 static DEFINE_SPINLOCK(free_vmap_area_lock);
-/* Export for kexec only */
-LIST_HEAD(vmap_area_list);
-static struct rb_root vmap_area_root = RB_ROOT;
 static bool vmap_initialized __read_mostly;
 
-static struct rb_root purge_vmap_area_root = RB_ROOT;
-static LIST_HEAD(purge_vmap_area_list);
-static DEFINE_SPINLOCK(purge_vmap_area_lock);
-
 /*
  * This kmem_cache is used for vmap_area objects. Instead of
  * allocating from slab we reuse an object from this cache to
@@ -844,6 +836,129 @@ static struct rb_root free_vmap_area_root = RB_ROOT;
  */
 static DEFINE_PER_CPU(struct vmap_area *, ne_fit_preload_node);
 
+/*
+ * This structure defines a single, solid model where a list and
+ * rb-tree are part of one entity protected by the lock. Nodes are
+ * sorted in ascending order, thus for O(1) access to left/right
+ * neighbors a list is used as well as for sequential traversal.
+ */
+struct rb_list {
+       struct rb_root root;
+       struct list_head head;
+       spinlock_t lock;
+};
+
+/*
+ * A fast size storage contains VAs up to 1M size. A pool consists
+ * of linked between each other ready to go VAs of certain sizes.
+ * An index in the pool-array corresponds to number of pages + 1.
+ */
+#define MAX_VA_SIZE_PAGES 256
+
+struct vmap_pool {
+       struct list_head head;
+       unsigned long len;
+};
+
+/*
+ * An effective vmap-node logic. Users make use of nodes instead
+ * of a global heap. It allows to balance an access and mitigate
+ * contention.
+ */
+static struct vmap_node {
+       /* Simple size segregated storage. */
+       struct vmap_pool pool[MAX_VA_SIZE_PAGES];
+       spinlock_t pool_lock;
+       bool skip_populate;
+
+       /* Bookkeeping data of this node. */
+       struct rb_list busy;
+       struct rb_list lazy;
+
+       /*
+        * Ready-to-free areas.
+        */
+       struct list_head purge_list;
+       struct work_struct purge_work;
+       unsigned long nr_purged;
+} single;
+
+/*
+ * Initial setup consists of one single node, i.e. a balancing
+ * is fully disabled. Later on, after vmap is initialized these
+ * parameters are updated based on a system capacity.
+ */
+static struct vmap_node *vmap_nodes = &single;
+static __read_mostly unsigned int nr_vmap_nodes = 1;
+static __read_mostly unsigned int vmap_zone_size = 1;
+
+static inline unsigned int
+addr_to_node_id(unsigned long addr)
+{
+       return (addr / vmap_zone_size) % nr_vmap_nodes;
+}
+
+static inline struct vmap_node *
+addr_to_node(unsigned long addr)
+{
+       return &vmap_nodes[addr_to_node_id(addr)];
+}
+
+static inline struct vmap_node *
+id_to_node(unsigned int id)
+{
+       return &vmap_nodes[id % nr_vmap_nodes];
+}
+
+/*
+ * We use the value 0 to represent "no node", that is why
+ * an encoded value will be the node-id incremented by 1.
+ * It is always greater then 0. A valid node_id which can
+ * be encoded is [0:nr_vmap_nodes - 1]. If a passed node_id
+ * is not valid 0 is returned.
+ */
+static unsigned int
+encode_vn_id(unsigned int node_id)
+{
+       /* Can store U8_MAX [0:254] nodes. */
+       if (node_id < nr_vmap_nodes)
+               return (node_id + 1) << BITS_PER_BYTE;
+
+       /* Warn and no node encoded. */
+       WARN_ONCE(1, "Encode wrong node id (%u)\n", node_id);
+       return 0;
+}
+
+/*
+ * Returns an encoded node-id, the valid range is within
+ * [0:nr_vmap_nodes-1] values. Otherwise nr_vmap_nodes is
+ * returned if extracted data is wrong.
+ */
+static unsigned int
+decode_vn_id(unsigned int val)
+{
+       unsigned int node_id = (val >> BITS_PER_BYTE) - 1;
+
+       /* Can store U8_MAX [0:254] nodes. */
+       if (node_id < nr_vmap_nodes)
+               return node_id;
+
+       /* If it was _not_ zero, warn. */
+       WARN_ONCE(node_id != UINT_MAX,
+               "Decode wrong node id (%d)\n", node_id);
+
+       return nr_vmap_nodes;
+}
+
+static bool
+is_vn_id_valid(unsigned int node_id)
+{
+       if (node_id < nr_vmap_nodes)
+               return true;
+
+       return false;
+}
+
 static __always_inline unsigned long
 va_size(struct vmap_area *va)
 {
@@ -875,10 +990,11 @@ unsigned long vmalloc_nr_pages(void)
 }
 
 /* Look up the first VA which satisfies addr < va_end, NULL if none. */
-static struct vmap_area *find_vmap_area_exceed_addr(unsigned long addr)
+static struct vmap_area *
+__find_vmap_area_exceed_addr(unsigned long addr, struct rb_root *root)
 {
        struct vmap_area *va = NULL;
-       struct rb_node *n = vmap_area_root.rb_node;
+       struct rb_node *n = root->rb_node;
 
        addr = (unsigned long)kasan_reset_tag((void *)addr);
 
@@ -899,6 +1015,41 @@ static struct vmap_area *find_vmap_area_exceed_addr(unsigned long addr)
        return va;
 }
 
+/*
+ * Returns a node where a first VA, that satisfies addr < va_end, resides.
+ * If success, a node is locked. A user is responsible to unlock it when a
+ * VA is no longer needed to be accessed.
+ *
+ * Returns NULL if nothing found.
+ */
+static struct vmap_node *
+find_vmap_area_exceed_addr_lock(unsigned long addr, struct vmap_area **va)
+{
+       struct vmap_node *vn, *va_node = NULL;
+       struct vmap_area *va_lowest;
+       int i;
+
+       for (i = 0; i < nr_vmap_nodes; i++) {
+               vn = &vmap_nodes[i];
+
+               spin_lock(&vn->busy.lock);
+               va_lowest = __find_vmap_area_exceed_addr(addr, &vn->busy.root);
+               if (va_lowest) {
+                       if (!va_node || va_lowest->va_start < (*va)->va_start) {
+                               if (va_node)
+                                       spin_unlock(&va_node->busy.lock);
+
+                               *va = va_lowest;
+                               va_node = vn;
+                               continue;
+                       }
+               }
+               spin_unlock(&vn->busy.lock);
+       }
+
+       return va_node;
+}
+
 static struct vmap_area *__find_vmap_area(unsigned long addr, struct rb_root *root)
 {
        struct rb_node *n = root->rb_node;
@@ -1454,9 +1605,9 @@ classify_va_fit_type(struct vmap_area *va,
 }
 
 static __always_inline int
-adjust_va_to_fit_type(struct rb_root *root, struct list_head *head,
-                     struct vmap_area *va, unsigned long nva_start_addr,
-                     unsigned long size)
+va_clip(struct rb_root *root, struct list_head *head,
+               struct vmap_area *va, unsigned long nva_start_addr,
+               unsigned long size)
 {
        struct vmap_area *lva = NULL;
        enum fit_type type = classify_va_fit_type(va, nva_start_addr, size);
@@ -1553,6 +1704,32 @@ adjust_va_to_fit_type(struct rb_root *root, struct list_head *head,
        return 0;
 }
 
+static unsigned long
+va_alloc(struct vmap_area *va,
+               struct rb_root *root, struct list_head *head,
+               unsigned long size, unsigned long align,
+               unsigned long vstart, unsigned long vend)
+{
+       unsigned long nva_start_addr;
+       int ret;
+
+       if (va->va_start > vstart)
+               nva_start_addr = ALIGN(va->va_start, align);
+       else
+               nva_start_addr = ALIGN(vstart, align);
+
+       /* Check the "vend" restriction. */
+       if (nva_start_addr + size > vend)
+               return vend;
+
+       /* Update the free vmap_area. */
+       ret = va_clip(root, head, va, nva_start_addr, size);
+       if (WARN_ON_ONCE(ret))
+               return vend;
+
+       return nva_start_addr;
+}
+
 /*
  * Returns a start address of the newly allocated area, if success.
  * Otherwise a vend is returned that indicates failure.
@@ -1565,7 +1742,6 @@ __alloc_vmap_area(struct rb_root *root, struct list_head *head,
        bool adjust_search_size = true;
        unsigned long nva_start_addr;
        struct vmap_area *va;
-       int ret;
 
        /*
         * Do not adjust when:
@@ -1583,18 +1759,8 @@ __alloc_vmap_area(struct rb_root *root, struct list_head *head,
        if (unlikely(!va))
                return vend;
 
-       if (va->va_start > vstart)
-               nva_start_addr = ALIGN(va->va_start, align);
-       else
-               nva_start_addr = ALIGN(vstart, align);
-
-       /* Check the "vend" restriction. */
-       if (nva_start_addr + size > vend)
-               return vend;
-
-       /* Update the free vmap_area. */
-       ret = adjust_va_to_fit_type(root, head, va, nva_start_addr, size);
-       if (WARN_ON_ONCE(ret))
+       nva_start_addr = va_alloc(va, root, head, size, align, vstart, vend);
+       if (nva_start_addr == vend)
                return vend;
 
 #if DEBUG_AUGMENT_LOWEST_MATCH_CHECK
@@ -1609,12 +1775,14 @@ __alloc_vmap_area(struct rb_root *root, struct list_head *head,
  */
 static void free_vmap_area(struct vmap_area *va)
 {
+       struct vmap_node *vn = addr_to_node(va->va_start);
+
        /*
         * Remove from the busy tree/list.
         */
-       spin_lock(&vmap_area_lock);
-       unlink_va(va, &vmap_area_root);
-       spin_unlock(&vmap_area_lock);
+       spin_lock(&vn->busy.lock);
+       unlink_va(va, &vn->busy.root);
+       spin_unlock(&vn->busy.lock);
 
        /*
         * Insert/Merge it back to the free tree/list.
@@ -1647,6 +1815,104 @@ preload_this_cpu_lock(spinlock_t *lock, gfp_t gfp_mask, int node)
                kmem_cache_free(vmap_area_cachep, va);
 }
 
+static struct vmap_pool *
+size_to_va_pool(struct vmap_node *vn, unsigned long size)
+{
+       unsigned int idx = (size - 1) / PAGE_SIZE;
+
+       if (idx < MAX_VA_SIZE_PAGES)
+               return &vn->pool[idx];
+
+       return NULL;
+}
+
+static bool
+node_pool_add_va(struct vmap_node *n, struct vmap_area *va)
+{
+       struct vmap_pool *vp;
+
+       vp = size_to_va_pool(n, va_size(va));
+       if (!vp)
+               return false;
+
+       spin_lock(&n->pool_lock);
+       list_add(&va->list, &vp->head);
+       WRITE_ONCE(vp->len, vp->len + 1);
+       spin_unlock(&n->pool_lock);
+
+       return true;
+}
+
+static struct vmap_area *
+node_pool_del_va(struct vmap_node *vn, unsigned long size,
+               unsigned long align, unsigned long vstart,
+               unsigned long vend)
+{
+       struct vmap_area *va = NULL;
+       struct vmap_pool *vp;
+       int err = 0;
+
+       vp = size_to_va_pool(vn, size);
+       if (!vp || list_empty(&vp->head))
+               return NULL;
+
+       spin_lock(&vn->pool_lock);
+       if (!list_empty(&vp->head)) {
+               va = list_first_entry(&vp->head, struct vmap_area, list);
+
+               if (IS_ALIGNED(va->va_start, align)) {
+                       /*
+                        * Do some sanity check and emit a warning
+                        * if one of below checks detects an error.
+                        */
+                       err |= (va_size(va) != size);
+                       err |= (va->va_start < vstart);
+                       err |= (va->va_end > vend);
+
+                       if (!WARN_ON_ONCE(err)) {
+                               list_del_init(&va->list);
+                               WRITE_ONCE(vp->len, vp->len - 1);
+                       } else {
+                               va = NULL;
+                       }
+               } else {
+                       list_move_tail(&va->list, &vp->head);
+                       va = NULL;
+               }
+       }
+       spin_unlock(&vn->pool_lock);
+
+       return va;
+}
+
+static struct vmap_area *
+node_alloc(unsigned long size, unsigned long align,
+               unsigned long vstart, unsigned long vend,
+               unsigned long *addr, unsigned int *vn_id)
+{
+       struct vmap_area *va;
+
+       *vn_id = 0;
+       *addr = vend;
+
+       /*
+        * Fallback to a global heap if not vmalloc or there
+        * is only one node.
+        */
+       if (vstart != VMALLOC_START || vend != VMALLOC_END ||
+                       nr_vmap_nodes == 1)
+               return NULL;
+
+       *vn_id = raw_smp_processor_id() % nr_vmap_nodes;
+       va = node_pool_del_va(id_to_node(*vn_id), size, align, vstart, vend);
+       *vn_id = encode_vn_id(*vn_id);
+
+       if (va)
+               *addr = va->va_start;
+
+       return va;
+}
+
 /*
  * Allocate a region of KVA of the specified size and alignment, within the
  * vstart and vend.
@@ -1657,9 +1923,11 @@ static struct vmap_area *alloc_vmap_area(unsigned long size,
                                int node, gfp_t gfp_mask,
                                unsigned long va_flags)
 {
+       struct vmap_node *vn;
        struct vmap_area *va;
        unsigned long freed;
        unsigned long addr;
+       unsigned int vn_id;
        int purged = 0;
        int ret;
 
@@ -1670,23 +1938,37 @@ static struct vmap_area *alloc_vmap_area(unsigned long size,
                return ERR_PTR(-EBUSY);
 
        might_sleep();
-       gfp_mask = gfp_mask & GFP_RECLAIM_MASK;
-
-       va = kmem_cache_alloc_node(vmap_area_cachep, gfp_mask, node);
-       if (unlikely(!va))
-               return ERR_PTR(-ENOMEM);
 
        /*
-        * Only scan the relevant parts containing pointers to other objects
-        * to avoid false negatives.
+        * If a VA is obtained from a global heap(if it fails here)
+        * it is anyway marked with this "vn_id" so it is returned
+        * to this pool's node later. Such way gives a possibility
+        * to populate pools based on users demand.
+        *
+        * On success a ready to go VA is returned.
         */
-       kmemleak_scan_area(&va->rb_node, SIZE_MAX, gfp_mask);
+       va = node_alloc(size, align, vstart, vend, &addr, &vn_id);
+       if (!va) {
+               gfp_mask = gfp_mask & GFP_RECLAIM_MASK;
+
+               va = kmem_cache_alloc_node(vmap_area_cachep, gfp_mask, node);
+               if (unlikely(!va))
+                       return ERR_PTR(-ENOMEM);
+
+               /*
+                * Only scan the relevant parts containing pointers to other objects
+                * to avoid false negatives.
+                */
+               kmemleak_scan_area(&va->rb_node, SIZE_MAX, gfp_mask);
+       }
 
 retry:
-       preload_this_cpu_lock(&free_vmap_area_lock, gfp_mask, node);
-       addr = __alloc_vmap_area(&free_vmap_area_root, &free_vmap_area_list,
-               size, align, vstart, vend);
-       spin_unlock(&free_vmap_area_lock);
+       if (addr == vend) {
+               preload_this_cpu_lock(&free_vmap_area_lock, gfp_mask, node);
+               addr = __alloc_vmap_area(&free_vmap_area_root, &free_vmap_area_list,
+                       size, align, vstart, vend);
+               spin_unlock(&free_vmap_area_lock);
+       }
 
        trace_alloc_vmap_area(addr, size, align, vstart, vend, addr == vend);
 
@@ -1700,11 +1982,13 @@ retry:
        va->va_start = addr;
        va->va_end = addr + size;
        va->vm = NULL;
-       va->flags = va_flags;
+       va->flags = (va_flags | vn_id);
 
-       spin_lock(&vmap_area_lock);
-       insert_vmap_area(va, &vmap_area_root, &vmap_area_list);
-       spin_unlock(&vmap_area_lock);
+       vn = addr_to_node(va->va_start);
+
+       spin_lock(&vn->busy.lock);
+       insert_vmap_area(va, &vn->busy.root, &vn->busy.head);
+       spin_unlock(&vn->busy.lock);
 
        BUG_ON(!IS_ALIGNED(va->va_start, align));
        BUG_ON(va->va_start < vstart);
@@ -1789,70 +2073,199 @@ static DEFINE_MUTEX(vmap_purge_lock);
 
 /* for per-CPU blocks */
 static void purge_fragmented_blocks_allcpus(void);
+static cpumask_t purge_nodes;
 
-/*
- * Purges all lazily-freed vmap areas.
- */
-static bool __purge_vmap_area_lazy(unsigned long start, unsigned long end)
+static void
+reclaim_list_global(struct list_head *head)
 {
-       unsigned long resched_threshold;
-       unsigned int num_purged_areas = 0;
-       struct list_head local_purge_list;
-       struct vmap_area *va, *n_va;
+       struct vmap_area *va, *n;
 
-       lockdep_assert_held(&vmap_purge_lock);
+       if (list_empty(head))
+               return;
 
-       spin_lock(&purge_vmap_area_lock);
-       purge_vmap_area_root = RB_ROOT;
-       list_replace_init(&purge_vmap_area_list, &local_purge_list);
-       spin_unlock(&purge_vmap_area_lock);
+       spin_lock(&free_vmap_area_lock);
+       list_for_each_entry_safe(va, n, head, list)
+               merge_or_add_vmap_area_augment(va,
+                       &free_vmap_area_root, &free_vmap_area_list);
+       spin_unlock(&free_vmap_area_lock);
+}
 
-       if (unlikely(list_empty(&local_purge_list)))
-               goto out;
+static void
+decay_va_pool_node(struct vmap_node *vn, bool full_decay)
+{
+       struct vmap_area *va, *nva;
+       struct list_head decay_list;
+       struct rb_root decay_root;
+       unsigned long n_decay;
+       int i;
 
-       start = min(start,
-               list_first_entry(&local_purge_list,
-                       struct vmap_area, list)->va_start);
+       decay_root = RB_ROOT;
+       INIT_LIST_HEAD(&decay_list);
 
-       end = max(end,
-               list_last_entry(&local_purge_list,
-                       struct vmap_area, list)->va_end);
+       for (i = 0; i < MAX_VA_SIZE_PAGES; i++) {
+               struct list_head tmp_list;
 
-       flush_tlb_kernel_range(start, end);
-       resched_threshold = lazy_max_pages() << 1;
+               if (list_empty(&vn->pool[i].head))
+                       continue;
 
-       spin_lock(&free_vmap_area_lock);
-       list_for_each_entry_safe(va, n_va, &local_purge_list, list) {
-               unsigned long nr = (va->va_end - va->va_start) >> PAGE_SHIFT;
-               unsigned long orig_start = va->va_start;
-               unsigned long orig_end = va->va_end;
+               INIT_LIST_HEAD(&tmp_list);
+
+               /* Detach the pool, so no-one can access it. */
+               spin_lock(&vn->pool_lock);
+               list_replace_init(&vn->pool[i].head, &tmp_list);
+               spin_unlock(&vn->pool_lock);
+
+               if (full_decay)
+                       WRITE_ONCE(vn->pool[i].len, 0);
+
+               /* Decay a pool by ~25% out of left objects. */
+               n_decay = vn->pool[i].len >> 2;
+
+               list_for_each_entry_safe(va, nva, &tmp_list, list) {
+                       list_del_init(&va->list);
+                       merge_or_add_vmap_area(va, &decay_root, &decay_list);
+
+                       if (!full_decay) {
+                               WRITE_ONCE(vn->pool[i].len, vn->pool[i].len - 1);
+
+                               if (!--n_decay)
+                                       break;
+                       }
+               }
 
                /*
-                * Finally insert or merge lazily-freed area. It is
-                * detached and there is no need to "unlink" it from
-                * anything.
+                * Attach the pool back if it has been partly decayed.
+                * Please note, it is supposed that nobody(other contexts)
+                * can populate the pool therefore a simple list replace
+                * operation takes place here.
                 */
-               va = merge_or_add_vmap_area_augment(va, &free_vmap_area_root,
-                               &free_vmap_area_list);
+               if (!full_decay && !list_empty(&tmp_list)) {
+                       spin_lock(&vn->pool_lock);
+                       list_replace_init(&tmp_list, &vn->pool[i].head);
+                       spin_unlock(&vn->pool_lock);
+               }
+       }
 
-               if (!va)
-                       continue;
+       reclaim_list_global(&decay_list);
+}
+
+static void purge_vmap_node(struct work_struct *work)
+{
+       struct vmap_node *vn = container_of(work,
+               struct vmap_node, purge_work);
+       struct vmap_area *va, *n_va;
+       LIST_HEAD(local_list);
+
+       vn->nr_purged = 0;
+
+       list_for_each_entry_safe(va, n_va, &vn->purge_list, list) {
+               unsigned long nr = (va->va_end - va->va_start) >> PAGE_SHIFT;
+               unsigned long orig_start = va->va_start;
+               unsigned long orig_end = va->va_end;
+               unsigned int vn_id = decode_vn_id(va->flags);
+
+               list_del_init(&va->list);
 
                if (is_vmalloc_or_module_addr((void *)orig_start))
                        kasan_release_vmalloc(orig_start, orig_end,
                                              va->va_start, va->va_end);
 
                atomic_long_sub(nr, &vmap_lazy_nr);
-               num_purged_areas++;
+               vn->nr_purged++;
 
-               if (atomic_long_read(&vmap_lazy_nr) < resched_threshold)
-                       cond_resched_lock(&free_vmap_area_lock);
+               if (is_vn_id_valid(vn_id) && !vn->skip_populate)
+                       if (node_pool_add_va(vn, va))
+                               continue;
+
+               /* Go back to global. */
+               list_add(&va->list, &local_list);
        }
-       spin_unlock(&free_vmap_area_lock);
 
-out:
-       trace_purge_vmap_area_lazy(start, end, num_purged_areas);
-       return num_purged_areas > 0;
+       reclaim_list_global(&local_list);
+}
+
+/*
+ * Purges all lazily-freed vmap areas.
+ */
+static bool __purge_vmap_area_lazy(unsigned long start, unsigned long end,
+               bool full_pool_decay)
+{
+       unsigned long nr_purged_areas = 0;
+       unsigned int nr_purge_helpers;
+       unsigned int nr_purge_nodes;
+       struct vmap_node *vn;
+       int i;
+
+       lockdep_assert_held(&vmap_purge_lock);
+
+       /*
+        * Use cpumask to mark which node has to be processed.
+        */
+       purge_nodes = CPU_MASK_NONE;
+
+       for (i = 0; i < nr_vmap_nodes; i++) {
+               vn = &vmap_nodes[i];
+
+               INIT_LIST_HEAD(&vn->purge_list);
+               vn->skip_populate = full_pool_decay;
+               decay_va_pool_node(vn, full_pool_decay);
+
+               if (RB_EMPTY_ROOT(&vn->lazy.root))
+                       continue;
+
+               spin_lock(&vn->lazy.lock);
+               WRITE_ONCE(vn->lazy.root.rb_node, NULL);
+               list_replace_init(&vn->lazy.head, &vn->purge_list);
+               spin_unlock(&vn->lazy.lock);
+
+               start = min(start, list_first_entry(&vn->purge_list,
+                       struct vmap_area, list)->va_start);
+
+               end = max(end, list_last_entry(&vn->purge_list,
+                       struct vmap_area, list)->va_end);
+
+               cpumask_set_cpu(i, &purge_nodes);
+       }
+
+       nr_purge_nodes = cpumask_weight(&purge_nodes);
+       if (nr_purge_nodes > 0) {
+               flush_tlb_kernel_range(start, end);
+
+               /* One extra worker is per a lazy_max_pages() full set minus one. */
+               nr_purge_helpers = atomic_long_read(&vmap_lazy_nr) / lazy_max_pages();
+               nr_purge_helpers = clamp(nr_purge_helpers, 1U, nr_purge_nodes) - 1;
+
+               for_each_cpu(i, &purge_nodes) {
+                       vn = &vmap_nodes[i];
+
+                       if (nr_purge_helpers > 0) {
+                               INIT_WORK(&vn->purge_work, purge_vmap_node);
+
+                               if (cpumask_test_cpu(i, cpu_online_mask))
+                                       schedule_work_on(i, &vn->purge_work);
+                               else
+                                       schedule_work(&vn->purge_work);
+
+                               nr_purge_helpers--;
+                       } else {
+                               vn->purge_work.func = NULL;
+                               purge_vmap_node(&vn->purge_work);
+                               nr_purged_areas += vn->nr_purged;
+                       }
+               }
+
+               for_each_cpu(i, &purge_nodes) {
+                       vn = &vmap_nodes[i];
+
+                       if (vn->purge_work.func) {
+                               flush_work(&vn->purge_work);
+                               nr_purged_areas += vn->nr_purged;
+                       }
+               }
+       }
+
+       trace_purge_vmap_area_lazy(start, end, nr_purged_areas);
+       return nr_purged_areas > 0;
 }
 
 /*
@@ -1863,22 +2276,15 @@ static void reclaim_and_purge_vmap_areas(void)
 {
        mutex_lock(&vmap_purge_lock);
        purge_fragmented_blocks_allcpus();
-       __purge_vmap_area_lazy(ULONG_MAX, 0);
+       __purge_vmap_area_lazy(ULONG_MAX, 0, true);
        mutex_unlock(&vmap_purge_lock);
 }
 
 static void drain_vmap_area_work(struct work_struct *work)
 {
-       unsigned long nr_lazy;
-
-       do {
-               mutex_lock(&vmap_purge_lock);
-               __purge_vmap_area_lazy(ULONG_MAX, 0);
-               mutex_unlock(&vmap_purge_lock);
-
-               /* Recheck if further work is required. */
-               nr_lazy = atomic_long_read(&vmap_lazy_nr);
-       } while (nr_lazy > lazy_max_pages());
+       mutex_lock(&vmap_purge_lock);
+       __purge_vmap_area_lazy(ULONG_MAX, 0, false);
+       mutex_unlock(&vmap_purge_lock);
 }
 
 /*
@@ -1890,6 +2296,8 @@ static void free_vmap_area_noflush(struct vmap_area *va)
 {
        unsigned long nr_lazy_max = lazy_max_pages();
        unsigned long va_start = va->va_start;
+       unsigned int vn_id = decode_vn_id(va->flags);
+       struct vmap_node *vn;
        unsigned long nr_lazy;
 
        if (WARN_ON_ONCE(!list_empty(&va->list)))
@@ -1899,12 +2307,15 @@ static void free_vmap_area_noflush(struct vmap_area *va)
                                PAGE_SHIFT, &vmap_lazy_nr);
 
        /*
-        * Merge or place it to the purge tree/list.
+        * If it was request by a certain node we would like to
+        * return it to that node, i.e. its pool for later reuse.
         */
-       spin_lock(&purge_vmap_area_lock);
-       merge_or_add_vmap_area(va,
-               &purge_vmap_area_root, &purge_vmap_area_list);
-       spin_unlock(&purge_vmap_area_lock);
+       vn = is_vn_id_valid(vn_id) ?
+               id_to_node(vn_id):addr_to_node(va->va_start);
+
+       spin_lock(&vn->lazy.lock);
+       insert_vmap_area(va, &vn->lazy.root, &vn->lazy.head);
+       spin_unlock(&vn->lazy.lock);
 
        trace_free_vmap_area_noflush(va_start, nr_lazy, nr_lazy_max);
 
@@ -1928,26 +2339,62 @@ static void free_unmap_vmap_area(struct vmap_area *va)
 
 struct vmap_area *find_vmap_area(unsigned long addr)
 {
+       struct vmap_node *vn;
        struct vmap_area *va;
+       int i, j;
 
-       spin_lock(&vmap_area_lock);
-       va = __find_vmap_area(addr, &vmap_area_root);
-       spin_unlock(&vmap_area_lock);
+       /*
+        * An addr_to_node_id(addr) converts an address to a node index
+        * where a VA is located. If VA spans several zones and passed
+        * addr is not the same as va->va_start, what is not common, we
+        * may need to scan extra nodes. See an example:
+        *
+        *      <----va---->
+        * -|-----|-----|-----|-----|-
+        *     1     2     0     1
+        *
+        * VA resides in node 1 whereas it spans 1, 2 an 0. If passed
+        * addr is within 2 or 0 nodes we should do extra work.
+        */
+       i = j = addr_to_node_id(addr);
+       do {
+               vn = &vmap_nodes[i];
 
-       return va;
+               spin_lock(&vn->busy.lock);
+               va = __find_vmap_area(addr, &vn->busy.root);
+               spin_unlock(&vn->busy.lock);
+
+               if (va)
+                       return va;
+       } while ((i = (i + 1) % nr_vmap_nodes) != j);
+
+       return NULL;
 }
 
 static struct vmap_area *find_unlink_vmap_area(unsigned long addr)
 {
+       struct vmap_node *vn;
        struct vmap_area *va;
+       int i, j;
 
-       spin_lock(&vmap_area_lock);
-       va = __find_vmap_area(addr, &vmap_area_root);
-       if (va)
-               unlink_va(va, &vmap_area_root);
-       spin_unlock(&vmap_area_lock);
+       /*
+        * Check the comment in the find_vmap_area() about the loop.
+        */
+       i = j = addr_to_node_id(addr);
+       do {
+               vn = &vmap_nodes[i];
 
-       return va;
+               spin_lock(&vn->busy.lock);
+               va = __find_vmap_area(addr, &vn->busy.root);
+               if (va)
+                       unlink_va(va, &vn->busy.root);
+               spin_unlock(&vn->busy.lock);
+
+               if (va)
+                       return va;
+       } while ((i = (i + 1) % nr_vmap_nodes) != j);
+
+       return NULL;
 }
 
 /*** Per cpu kva allocator ***/
@@ -2149,6 +2596,7 @@ static void *new_vmap_block(unsigned int order, gfp_t gfp_mask)
 
 static void free_vmap_block(struct vmap_block *vb)
 {
+       struct vmap_node *vn;
        struct vmap_block *tmp;
        struct xarray *xa;
 
@@ -2156,9 +2604,10 @@ static void free_vmap_block(struct vmap_block *vb)
        tmp = xa_erase(xa, addr_to_vb_idx(vb->va->va_start));
        BUG_ON(tmp != vb);
 
-       spin_lock(&vmap_area_lock);
-       unlink_va(vb->va, &vmap_area_root);
-       spin_unlock(&vmap_area_lock);
+       vn = addr_to_node(vb->va->va_start);
+       spin_lock(&vn->busy.lock);
+       unlink_va(vb->va, &vn->busy.root);
+       spin_unlock(&vn->busy.lock);
 
        free_vmap_area_noflush(vb->va);
        kfree_rcu(vb, rcu_head);
@@ -2375,7 +2824,7 @@ static void _vm_unmap_aliases(unsigned long start, unsigned long end, int flush)
        }
        free_purged_blocks(&purge_list);
 
-       if (!__purge_vmap_area_lazy(start, end) && flush)
+       if (!__purge_vmap_area_lazy(start, end, false) && flush)
                flush_tlb_kernel_range(start, end);
        mutex_unlock(&vmap_purge_lock);
 }
@@ -2569,47 +3018,6 @@ void __init vm_area_register_early(struct vm_struct *vm, size_t align)
        kasan_populate_early_vm_area_shadow(vm->addr, vm->size);
 }
 
-static void vmap_init_free_space(void)
-{
-       unsigned long vmap_start = 1;
-       const unsigned long vmap_end = ULONG_MAX;
-       struct vmap_area *busy, *free;
-
-       /*
-        *     B     F     B     B     B     F
-        * -|-----|.....|-----|-----|-----|.....|-
-        *  |           The KVA space           |
-        *  |<--------------------------------->|
-        */
-       list_for_each_entry(busy, &vmap_area_list, list) {
-               if (busy->va_start - vmap_start > 0) {
-                       free = kmem_cache_zalloc(vmap_area_cachep, GFP_NOWAIT);
-                       if (!WARN_ON_ONCE(!free)) {
-                               free->va_start = vmap_start;
-                               free->va_end = busy->va_start;
-
-                               insert_vmap_area_augment(free, NULL,
-                                       &free_vmap_area_root,
-                                               &free_vmap_area_list);
-                       }
-               }
-
-               vmap_start = busy->va_end;
-       }
-
-       if (vmap_end - vmap_start > 0) {
-               free = kmem_cache_zalloc(vmap_area_cachep, GFP_NOWAIT);
-               if (!WARN_ON_ONCE(!free)) {
-                       free->va_start = vmap_start;
-                       free->va_end = vmap_end;
-
-                       insert_vmap_area_augment(free, NULL,
-                               &free_vmap_area_root,
-                                       &free_vmap_area_list);
-               }
-       }
-}
-
 static inline void setup_vmalloc_vm_locked(struct vm_struct *vm,
        struct vmap_area *va, unsigned long flags, const void *caller)
 {
@@ -2623,9 +3031,11 @@ static inline void setup_vmalloc_vm_locked(struct vm_struct *vm,
 static void setup_vmalloc_vm(struct vm_struct *vm, struct vmap_area *va,
                              unsigned long flags, const void *caller)
 {
-       spin_lock(&vmap_area_lock);
+       struct vmap_node *vn = addr_to_node(va->va_start);
+
+       spin_lock(&vn->busy.lock);
        setup_vmalloc_vm_locked(vm, va, flags, caller);
-       spin_unlock(&vmap_area_lock);
+       spin_unlock(&vn->busy.lock);
 }
 
 static void clear_vm_uninitialized_flag(struct vm_struct *vm)
@@ -3813,10 +4223,12 @@ finished:
  */
 long vread_iter(struct iov_iter *iter, const char *addr, size_t count)
 {
+       struct vmap_node *vn;
        struct vmap_area *va;
        struct vm_struct *vm;
        char *vaddr;
        size_t n, size, flags, remains;
+       unsigned long next;
 
        addr = kasan_reset_tag(addr);
 
@@ -3826,16 +4238,15 @@ long vread_iter(struct iov_iter *iter, const char *addr, size_t count)
 
        remains = count;
 
-       spin_lock(&vmap_area_lock);
-       va = find_vmap_area_exceed_addr((unsigned long)addr);
-       if (!va)
+       vn = find_vmap_area_exceed_addr_lock((unsigned long) addr, &va);
+       if (!vn)
                goto finished_zero;
 
        /* no intersects with alive vmap_area */
        if ((unsigned long)addr + remains <= va->va_start)
                goto finished_zero;
 
-       list_for_each_entry_from(va, &vmap_area_list, list) {
+       do {
                size_t copied;
 
                if (remains == 0)
@@ -3850,10 +4261,10 @@ long vread_iter(struct iov_iter *iter, const char *addr, size_t count)
                WARN_ON(flags == VMAP_BLOCK);
 
                if (!vm && !flags)
-                       continue;
+                       goto next_va;
 
                if (vm && (vm->flags & VM_UNINITIALIZED))
-                       continue;
+                       goto next_va;
 
                /* Pair with smp_wmb() in clear_vm_uninitialized_flag() */
                smp_rmb();
@@ -3862,7 +4273,7 @@ long vread_iter(struct iov_iter *iter, const char *addr, size_t count)
                size = vm ? get_vm_area_size(vm) : va_size(va);
 
                if (addr >= vaddr + size)
-                       continue;
+                       goto next_va;
 
                if (addr < vaddr) {
                        size_t to_zero = min_t(size_t, vaddr - addr, remains);
@@ -3891,15 +4302,22 @@ long vread_iter(struct iov_iter *iter, const char *addr, size_t count)
 
                if (copied != n)
                        goto finished;
-       }
+
+       next_va:
+               next = va->va_end;
+               spin_unlock(&vn->busy.lock);
+       } while ((vn = find_vmap_area_exceed_addr_lock(next, &va)));
 
 finished_zero:
-       spin_unlock(&vmap_area_lock);
+       if (vn)
+               spin_unlock(&vn->busy.lock);
+
        /* zero-fill memory holes */
        return count - remains + zero_iter(iter, remains);
 finished:
        /* Nothing remains, or We couldn't copy/zero everything. */
-       spin_unlock(&vmap_area_lock);
+       if (vn)
+               spin_unlock(&vn->busy.lock);
 
        return count - remains;
 }
@@ -4212,9 +4630,8 @@ retry:
                        /* It is a BUG(), but trigger recovery instead. */
                        goto recovery;
 
-               ret = adjust_va_to_fit_type(&free_vmap_area_root,
-                                           &free_vmap_area_list,
-                                           va, start, size);
+               ret = va_clip(&free_vmap_area_root,
+                       &free_vmap_area_list, va, start, size);
                if (WARN_ON_ONCE(unlikely(ret)))
                        /* It is a BUG(), but trigger recovery instead. */
                        goto recovery;
@@ -4234,14 +4651,15 @@ retry:
        }
 
        /* insert all vm's */
-       spin_lock(&vmap_area_lock);
        for (area = 0; area < nr_vms; area++) {
-               insert_vmap_area(vas[area], &vmap_area_root, &vmap_area_list);
+               struct vmap_node *vn = addr_to_node(vas[area]->va_start);
 
+               spin_lock(&vn->busy.lock);
+               insert_vmap_area(vas[area], &vn->busy.root, &vn->busy.head);
                setup_vmalloc_vm_locked(vms[area], vas[area], VM_ALLOC,
                                 pcpu_get_vm_areas);
+               spin_unlock(&vn->busy.lock);
        }
-       spin_unlock(&vmap_area_lock);
 
        /*
         * Mark allocated areas as accessible. Do it now as a best-effort
@@ -4350,60 +4768,39 @@ void pcpu_free_vm_areas(struct vm_struct **vms, int nr_vms)
 #ifdef CONFIG_PRINTK
 bool vmalloc_dump_obj(void *object)
 {
-       void *objp = (void *)PAGE_ALIGN((unsigned long)object);
        const void *caller;
        struct vm_struct *vm;
        struct vmap_area *va;
+       struct vmap_node *vn;
        unsigned long addr;
        unsigned int nr_pages;
 
-       if (!spin_trylock(&vmap_area_lock))
+       addr = PAGE_ALIGN((unsigned long) object);
+       vn = addr_to_node(addr);
+
+       if (!spin_trylock(&vn->busy.lock))
                return false;
-       va = __find_vmap_area((unsigned long)objp, &vmap_area_root);
-       if (!va) {
-               spin_unlock(&vmap_area_lock);
+
+       va = __find_vmap_area(addr, &vn->busy.root);
+       if (!va || !va->vm) {
+               spin_unlock(&vn->busy.lock);
                return false;
        }
 
        vm = va->vm;
-       if (!vm) {
-               spin_unlock(&vmap_area_lock);
-               return false;
-       }
-       addr = (unsigned long)vm->addr;
+       addr = (unsigned long) vm->addr;
        caller = vm->caller;
        nr_pages = vm->nr_pages;
-       spin_unlock(&vmap_area_lock);
+       spin_unlock(&vn->busy.lock);
+
        pr_cont(" %u-page vmalloc region starting at %#lx allocated at %pS\n",
                nr_pages, addr, caller);
+
        return true;
 }
 #endif
 
 #ifdef CONFIG_PROC_FS
-static void *s_start(struct seq_file *m, loff_t *pos)
-       __acquires(&vmap_purge_lock)
-       __acquires(&vmap_area_lock)
-{
-       mutex_lock(&vmap_purge_lock);
-       spin_lock(&vmap_area_lock);
-
-       return seq_list_start(&vmap_area_list, *pos);
-}
-
-static void *s_next(struct seq_file *m, void *p, loff_t *pos)
-{
-       return seq_list_next(p, &vmap_area_list, pos);
-}
-
-static void s_stop(struct seq_file *m, void *p)
-       __releases(&vmap_area_lock)
-       __releases(&vmap_purge_lock)
-{
-       spin_unlock(&vmap_area_lock);
-       mutex_unlock(&vmap_purge_lock);
-}
-
 static void show_numa_info(struct seq_file *m, struct vm_struct *v)
 {
        if (IS_ENABLED(CONFIG_NUMA)) {
@@ -4430,105 +4827,237 @@ static void show_numa_info(struct seq_file *m, struct vm_struct *v)
 
 static void show_purge_info(struct seq_file *m)
 {
+       struct vmap_node *vn;
        struct vmap_area *va;
+       int i;
+
+       for (i = 0; i < nr_vmap_nodes; i++) {
+               vn = &vmap_nodes[i];
 
-       spin_lock(&purge_vmap_area_lock);
-       list_for_each_entry(va, &purge_vmap_area_list, list) {
-               seq_printf(m, "0x%pK-0x%pK %7ld unpurged vm_area\n",
-                       (void *)va->va_start, (void *)va->va_end,
-                       va->va_end - va->va_start);
+               spin_lock(&vn->lazy.lock);
+               list_for_each_entry(va, &vn->lazy.head, list) {
+                       seq_printf(m, "0x%pK-0x%pK %7ld unpurged vm_area\n",
+                               (void *)va->va_start, (void *)va->va_end,
+                               va->va_end - va->va_start);
+               }
+               spin_unlock(&vn->lazy.lock);
        }
-       spin_unlock(&purge_vmap_area_lock);
 }
 
-static int s_show(struct seq_file *m, void *p)
+static int vmalloc_info_show(struct seq_file *m, void *p)
 {
+       struct vmap_node *vn;
        struct vmap_area *va;
        struct vm_struct *v;
+       int i;
 
-       va = list_entry(p, struct vmap_area, list);
+       for (i = 0; i < nr_vmap_nodes; i++) {
+               vn = &vmap_nodes[i];
 
-       if (!va->vm) {
-               if (va->flags & VMAP_RAM)
-                       seq_printf(m, "0x%pK-0x%pK %7ld vm_map_ram\n",
-                               (void *)va->va_start, (void *)va->va_end,
-                               va->va_end - va->va_start);
+               spin_lock(&vn->busy.lock);
+               list_for_each_entry(va, &vn->busy.head, list) {
+                       if (!va->vm) {
+                               if (va->flags & VMAP_RAM)
+                                       seq_printf(m, "0x%pK-0x%pK %7ld vm_map_ram\n",
+                                               (void *)va->va_start, (void *)va->va_end,
+                                               va->va_end - va->va_start);
 
-               goto final;
-       }
+                               continue;
+                       }
 
-       v = va->vm;
+                       v = va->vm;
 
-       seq_printf(m, "0x%pK-0x%pK %7ld",
-               v->addr, v->addr + v->size, v->size);
+                       seq_printf(m, "0x%pK-0x%pK %7ld",
+                               v->addr, v->addr + v->size, v->size);
 
-       if (v->caller)
-               seq_printf(m, " %pS", v->caller);
+                       if (v->caller)
+                               seq_printf(m, " %pS", v->caller);
 
-       if (v->nr_pages)
-               seq_printf(m, " pages=%d", v->nr_pages);
+                       if (v->nr_pages)
+                               seq_printf(m, " pages=%d", v->nr_pages);
 
-       if (v->phys_addr)
-               seq_printf(m, " phys=%pa", &v->phys_addr);
+                       if (v->phys_addr)
+                               seq_printf(m, " phys=%pa", &v->phys_addr);
 
-       if (v->flags & VM_IOREMAP)
-               seq_puts(m, " ioremap");
+                       if (v->flags & VM_IOREMAP)
+                               seq_puts(m, " ioremap");
 
-       if (v->flags & VM_SPARSE)
-               seq_puts(m, " sparse");
+                       if (v->flags & VM_SPARSE)
+                               seq_puts(m, " sparse");
 
-       if (v->flags & VM_ALLOC)
-               seq_puts(m, " vmalloc");
+                       if (v->flags & VM_ALLOC)
+                               seq_puts(m, " vmalloc");
 
-       if (v->flags & VM_MAP)
-               seq_puts(m, " vmap");
+                       if (v->flags & VM_MAP)
+                               seq_puts(m, " vmap");
 
-       if (v->flags & VM_USERMAP)
-               seq_puts(m, " user");
+                       if (v->flags & VM_USERMAP)
+                               seq_puts(m, " user");
 
-       if (v->flags & VM_DMA_COHERENT)
-               seq_puts(m, " dma-coherent");
+                       if (v->flags & VM_DMA_COHERENT)
+                               seq_puts(m, " dma-coherent");
 
-       if (is_vmalloc_addr(v->pages))
-               seq_puts(m, " vpages");
+                       if (is_vmalloc_addr(v->pages))
+                               seq_puts(m, " vpages");
 
-       show_numa_info(m, v);
-       seq_putc(m, '\n');
+                       show_numa_info(m, v);
+                       seq_putc(m, '\n');
+               }
+               spin_unlock(&vn->busy.lock);
+       }
 
        /*
         * As a final step, dump "unpurged" areas.
         */
-final:
-       if (list_is_last(&va->list, &vmap_area_list))
-               show_purge_info(m);
-
+       show_purge_info(m);
        return 0;
 }
 
-static const struct seq_operations vmalloc_op = {
-       .start = s_start,
-       .next = s_next,
-       .stop = s_stop,
-       .show = s_show,
-};
-
 static int __init proc_vmalloc_init(void)
 {
+       void *priv_data = NULL;
+
        if (IS_ENABLED(CONFIG_NUMA))
-               proc_create_seq_private("vmallocinfo", 0400, NULL,
-                               &vmalloc_op,
-                               nr_node_ids * sizeof(unsigned int), NULL);
-       else
-               proc_create_seq("vmallocinfo", 0400, NULL, &vmalloc_op);
+               priv_data = kmalloc(nr_node_ids * sizeof(unsigned int), GFP_KERNEL);
+
+       proc_create_single_data("vmallocinfo",
+               0400, NULL, vmalloc_info_show, priv_data);
+
        return 0;
 }
 module_init(proc_vmalloc_init);
 
 #endif
 
+static void __init vmap_init_free_space(void)
+{
+       unsigned long vmap_start = 1;
+       const unsigned long vmap_end = ULONG_MAX;
+       struct vmap_area *free;
+       struct vm_struct *busy;
+
+       /*
+        *     B     F     B     B     B     F
+        * -|-----|.....|-----|-----|-----|.....|-
+        *  |           The KVA space           |
+        *  |<--------------------------------->|
+        */
+       for (busy = vmlist; busy; busy = busy->next) {
+               if ((unsigned long) busy->addr - vmap_start > 0) {
+                       free = kmem_cache_zalloc(vmap_area_cachep, GFP_NOWAIT);
+                       if (!WARN_ON_ONCE(!free)) {
+                               free->va_start = vmap_start;
+                               free->va_end = (unsigned long) busy->addr;
+
+                               insert_vmap_area_augment(free, NULL,
+                                       &free_vmap_area_root,
+                                               &free_vmap_area_list);
+                       }
+               }
+
+               vmap_start = (unsigned long) busy->addr + busy->size;
+       }
+
+       if (vmap_end - vmap_start > 0) {
+               free = kmem_cache_zalloc(vmap_area_cachep, GFP_NOWAIT);
+               if (!WARN_ON_ONCE(!free)) {
+                       free->va_start = vmap_start;
+                       free->va_end = vmap_end;
+
+                       insert_vmap_area_augment(free, NULL,
+                               &free_vmap_area_root,
+                                       &free_vmap_area_list);
+               }
+       }
+}
+
+static void vmap_init_nodes(void)
+{
+       struct vmap_node *vn;
+       int i, n;
+
+#if BITS_PER_LONG == 64
+       /*
+        * A high threshold of max nodes is fixed and bound to 128,
+        * thus a scale factor is 1 for systems where number of cores
+        * are less or equal to specified threshold.
+        *
+        * As for NUMA-aware notes. For bigger systems, for example
+        * NUMA with multi-sockets, where we can end-up with thousands
+        * of cores in total, a "sub-numa-clustering" should be added.
+        *
+        * In this case a NUMA domain is considered as a single entity
+        * with dedicated sub-nodes in it which describe one group or
+        * set of cores. Therefore a per-domain purging is supposed to
+        * be added as well as a per-domain balancing.
+        */
+       n = clamp_t(unsigned int, num_possible_cpus(), 1, 128);
+
+       if (n > 1) {
+               vn = kmalloc_array(n, sizeof(*vn), GFP_NOWAIT | __GFP_NOWARN);
+               if (vn) {
+                       /* Node partition is 16 pages. */
+                       vmap_zone_size = (1 << 4) * PAGE_SIZE;
+                       nr_vmap_nodes = n;
+                       vmap_nodes = vn;
+               } else {
+                       pr_err("Failed to allocate an array. Disable a node layer\n");
+               }
+       }
+#endif
+
+       for (n = 0; n < nr_vmap_nodes; n++) {
+               vn = &vmap_nodes[n];
+               vn->busy.root = RB_ROOT;
+               INIT_LIST_HEAD(&vn->busy.head);
+               spin_lock_init(&vn->busy.lock);
+
+               vn->lazy.root = RB_ROOT;
+               INIT_LIST_HEAD(&vn->lazy.head);
+               spin_lock_init(&vn->lazy.lock);
+
+               for (i = 0; i < MAX_VA_SIZE_PAGES; i++) {
+                       INIT_LIST_HEAD(&vn->pool[i].head);
+                       WRITE_ONCE(vn->pool[i].len, 0);
+               }
+
+               spin_lock_init(&vn->pool_lock);
+       }
+}
+
+static unsigned long
+vmap_node_shrink_count(struct shrinker *shrink, struct shrink_control *sc)
+{
+       unsigned long count;
+       struct vmap_node *vn;
+       int i, j;
+
+       for (count = 0, i = 0; i < nr_vmap_nodes; i++) {
+               vn = &vmap_nodes[i];
+
+               for (j = 0; j < MAX_VA_SIZE_PAGES; j++)
+                       count += READ_ONCE(vn->pool[j].len);
+       }
+
+       return count ? count : SHRINK_EMPTY;
+}
+
+static unsigned long
+vmap_node_shrink_scan(struct shrinker *shrink, struct shrink_control *sc)
+{
+       int i;
+
+       for (i = 0; i < nr_vmap_nodes; i++)
+               decay_va_pool_node(&vmap_nodes[i], true);
+
+       return SHRINK_STOP;
+}
+
 void __init vmalloc_init(void)
 {
+       struct shrinker *vmap_node_shrinker;
        struct vmap_area *va;
+       struct vmap_node *vn;
        struct vm_struct *tmp;
        int i;
 
@@ -4550,6 +5079,11 @@ void __init vmalloc_init(void)
                xa_init(&vbq->vmap_blocks);
        }
 
+       /*
+        * Setup nodes before importing vmlist.
+        */
+       vmap_init_nodes();
+
        /* Import existing vmlist entries. */
        for (tmp = vmlist; tmp; tmp = tmp->next) {
                va = kmem_cache_zalloc(vmap_area_cachep, GFP_NOWAIT);
@@ -4559,7 +5093,9 @@ void __init vmalloc_init(void)
                va->va_start = (unsigned long)tmp->addr;
                va->va_end = va->va_start + tmp->size;
                va->vm = tmp;
-               insert_vmap_area(va, &vmap_area_root, &vmap_area_list);
+
+               vn = addr_to_node(va->va_start);
+               insert_vmap_area(va, &vn->busy.root, &vn->busy.head);
        }
 
        /*
@@ -4567,4 +5103,14 @@ void __init vmalloc_init(void)
         */
        vmap_init_free_space();
        vmap_initialized = true;
+
+       vmap_node_shrinker = shrinker_alloc(0, "vmap-node");
+       if (!vmap_node_shrinker) {
+               pr_err("Failed to allocate vmap-node shrinker!\n");
+               return;
+       }
+
+       vmap_node_shrinker->count_objects = vmap_node_shrink_count;
+       vmap_node_shrinker->scan_objects = vmap_node_shrink_scan;
+       shrinker_register(vmap_node_shrinker);
 }
index 4255619a1a314717df613e20090b160fce72a7e9..3ef654addd44c26f999c84ba82a10f005804ece4 100644 (file)
@@ -108,6 +108,12 @@ struct scan_control {
        /* Can folios be swapped as part of reclaim? */
        unsigned int may_swap:1;
 
+       /* Not allow cache_trim_mode to be turned on as part of reclaim? */
+       unsigned int no_cache_trim_mode:1;
+
+       /* Has cache_trim_mode failed at least once? */
+       unsigned int cache_trim_mode_failed:1;
+
        /* Proactive reclaim invoked by userspace through memory.reclaim */
        unsigned int proactive:1;
 
@@ -1006,14 +1012,15 @@ static unsigned int shrink_folio_list(struct list_head *folio_list,
                struct pglist_data *pgdat, struct scan_control *sc,
                struct reclaim_stat *stat, bool ignore_references)
 {
+       struct folio_batch free_folios;
        LIST_HEAD(ret_folios);
-       LIST_HEAD(free_folios);
        LIST_HEAD(demote_folios);
        unsigned int nr_reclaimed = 0;
        unsigned int pgactivate = 0;
        bool do_demote_pass;
        struct swap_iocb *plug = NULL;
 
+       folio_batch_init(&free_folios);
        memset(stat, 0, sizeof(*stat));
        cond_resched();
        do_demote_pass = can_demote(pgdat->node_id, sc);
@@ -1412,14 +1419,14 @@ free_it:
                 */
                nr_reclaimed += nr_pages;
 
-               /*
-                * Is there need to periodically free_folio_list? It would
-                * appear not as the counts should be low
-                */
-               if (unlikely(folio_test_large(folio)))
-                       destroy_large_folio(folio);
-               else
-                       list_add(&folio->lru, &free_folios);
+               if (folio_test_large(folio) &&
+                   folio_test_large_rmappable(folio))
+                       folio_undo_large_rmappable(folio);
+               if (folio_batch_add(&free_folios, folio) == 0) {
+                       mem_cgroup_uncharge_folios(&free_folios);
+                       try_to_unmap_flush();
+                       free_unref_folios(&free_folios);
+               }
                continue;
 
 activate_locked_split:
@@ -1483,9 +1490,9 @@ keep:
 
        pgactivate = stat->nr_activate[0] + stat->nr_activate[1];
 
-       mem_cgroup_uncharge_list(&free_folios);
+       mem_cgroup_uncharge_folios(&free_folios);
        try_to_unmap_flush();
-       free_unref_page_list(&free_folios);
+       free_unref_folios(&free_folios);
 
        list_splice(&ret_folios, folio_list);
        count_vm_events(PGACTIVATE, pgactivate);
@@ -1744,17 +1751,17 @@ bool folio_isolate_lru(struct folio *folio)
  * the LRU list will go small and be scanned faster than necessary, leading to
  * unnecessary swapping, thrashing and OOM.
  */
-static int too_many_isolated(struct pglist_data *pgdat, int file,
+static bool too_many_isolated(struct pglist_data *pgdat, int file,
                struct scan_control *sc)
 {
        unsigned long inactive, isolated;
        bool too_many;
 
        if (current_is_kswapd())
-               return 0;
+               return false;
 
        if (!writeback_throttling_sane(sc))
-               return 0;
+               return false;
 
        if (file) {
                inactive = node_page_state(pgdat, NR_INACTIVE_FILE);
@@ -1783,7 +1790,6 @@ static int too_many_isolated(struct pglist_data *pgdat, int file,
 
 /*
  * move_folios_to_lru() moves folios from private @list to appropriate LRU list.
- * On return, @list is reused as a list of folios to be freed by the caller.
  *
  * Returns the number of pages moved to the given lruvec.
  */
@@ -1791,8 +1797,9 @@ static unsigned int move_folios_to_lru(struct lruvec *lruvec,
                struct list_head *list)
 {
        int nr_pages, nr_moved = 0;
-       LIST_HEAD(folios_to_free);
+       struct folio_batch free_folios;
 
+       folio_batch_init(&free_folios);
        while (!list_empty(list)) {
                struct folio *folio = lru_to_folio(list);
 
@@ -1821,12 +1828,15 @@ static unsigned int move_folios_to_lru(struct lruvec *lruvec,
                if (unlikely(folio_put_testzero(folio))) {
                        __folio_clear_lru_flags(folio);
 
-                       if (unlikely(folio_test_large(folio))) {
+                       if (folio_test_large(folio) &&
+                           folio_test_large_rmappable(folio))
+                               folio_undo_large_rmappable(folio);
+                       if (folio_batch_add(&free_folios, folio) == 0) {
                                spin_unlock_irq(&lruvec->lru_lock);
-                               destroy_large_folio(folio);
+                               mem_cgroup_uncharge_folios(&free_folios);
+                               free_unref_folios(&free_folios);
                                spin_lock_irq(&lruvec->lru_lock);
-                       } else
-                               list_add(&folio->lru, &folios_to_free);
+                       }
 
                        continue;
                }
@@ -1843,10 +1853,12 @@ static unsigned int move_folios_to_lru(struct lruvec *lruvec,
                        workingset_age_nonresident(lruvec, nr_pages);
        }
 
-       /*
-        * To save our caller's stack, now use input list for pages to free.
-        */
-       list_splice(&folios_to_free, list);
+       if (free_folios.nr) {
+               spin_unlock_irq(&lruvec->lru_lock);
+               mem_cgroup_uncharge_folios(&free_folios);
+               free_unref_folios(&free_folios);
+               spin_lock_irq(&lruvec->lru_lock);
+       }
 
        return nr_moved;
 }
@@ -1925,8 +1937,6 @@ static unsigned long shrink_inactive_list(unsigned long nr_to_scan,
        spin_unlock_irq(&lruvec->lru_lock);
 
        lru_note_cost(lruvec, file, stat.nr_pageout, nr_scanned - nr_reclaimed);
-       mem_cgroup_uncharge_list(&folio_list);
-       free_unref_page_list(&folio_list);
 
        /*
         * If dirty folios are scanned that are not queued for IO, it
@@ -1998,7 +2008,7 @@ static void shrink_active_list(unsigned long nr_to_scan,
        LIST_HEAD(l_inactive);
        unsigned nr_deactivate, nr_activate;
        unsigned nr_rotated = 0;
-       int file = is_file_lru(lru);
+       bool file = is_file_lru(lru);
        struct pglist_data *pgdat = lruvec_pgdat(lruvec);
 
        lru_add_drain();
@@ -2067,8 +2077,6 @@ static void shrink_active_list(unsigned long nr_to_scan,
 
        nr_activate = move_folios_to_lru(lruvec, &l_active);
        nr_deactivate = move_folios_to_lru(lruvec, &l_inactive);
-       /* Keep all free folios in l_active list */
-       list_splice(&l_inactive, &l_active);
 
        __count_vm_events(PGDEACTIVATE, nr_deactivate);
        __count_memcg_events(lruvec_memcg(lruvec), PGDEACTIVATE, nr_deactivate);
@@ -2078,14 +2086,13 @@ static void shrink_active_list(unsigned long nr_to_scan,
 
        if (nr_rotated)
                lru_note_cost(lruvec, file, 0, nr_rotated);
-       mem_cgroup_uncharge_list(&l_active);
-       free_unref_page_list(&l_active);
        trace_mm_vmscan_lru_shrink_active(pgdat->node_id, nr_taken, nr_activate,
                        nr_deactivate, nr_rotated, sc->priority, file);
 }
 
 static unsigned int reclaim_folio_list(struct list_head *folio_list,
-                                     struct pglist_data *pgdat)
+                                     struct pglist_data *pgdat,
+                                     bool ignore_references)
 {
        struct reclaim_stat dummy_stat;
        unsigned int nr_reclaimed;
@@ -2098,7 +2105,7 @@ static unsigned int reclaim_folio_list(struct list_head *folio_list,
                .no_demotion = 1,
        };
 
-       nr_reclaimed = shrink_folio_list(folio_list, pgdat, &sc, &dummy_stat, false);
+       nr_reclaimed = shrink_folio_list(folio_list, pgdat, &sc, &dummy_stat, ignore_references);
        while (!list_empty(folio_list)) {
                folio = lru_to_folio(folio_list);
                list_del(&folio->lru);
@@ -2108,7 +2115,7 @@ static unsigned int reclaim_folio_list(struct list_head *folio_list,
        return nr_reclaimed;
 }
 
-unsigned long reclaim_pages(struct list_head *folio_list)
+unsigned long reclaim_pages(struct list_head *folio_list, bool ignore_references)
 {
        int nid;
        unsigned int nr_reclaimed = 0;
@@ -2130,11 +2137,12 @@ unsigned long reclaim_pages(struct list_head *folio_list)
                        continue;
                }
 
-               nr_reclaimed += reclaim_folio_list(&node_folio_list, NODE_DATA(nid));
+               nr_reclaimed += reclaim_folio_list(&node_folio_list, NODE_DATA(nid),
+                                                  ignore_references);
                nid = folio_nid(lru_to_folio(folio_list));
        } while (!list_empty(folio_list));
 
-       nr_reclaimed += reclaim_folio_list(&node_folio_list, NODE_DATA(nid));
+       nr_reclaimed += reclaim_folio_list(&node_folio_list, NODE_DATA(nid), ignore_references);
 
        memalloc_noreclaim_restore(noreclaim_flag);
 
@@ -2269,7 +2277,8 @@ static void prepare_scan_control(pg_data_t *pgdat, struct scan_control *sc)
         * anonymous pages.
         */
        file = lruvec_page_state(target_lruvec, NR_INACTIVE_FILE);
-       if (file >> sc->priority && !(sc->may_deactivate & DEACTIVATE_FILE))
+       if (file >> sc->priority && !(sc->may_deactivate & DEACTIVATE_FILE) &&
+           !sc->no_cache_trim_mode)
                sc->cache_trim_mode = 1;
        else
                sc->cache_trim_mode = 0;
@@ -2412,7 +2421,7 @@ static void get_scan_count(struct lruvec *lruvec, struct scan_control *sc,
        denominator = ap + fp;
 out:
        for_each_evictable_lru(lru) {
-               int file = is_file_lru(lru);
+               bool file = is_file_lru(lru);
                unsigned long lruvec_size;
                unsigned long low, min;
                unsigned long scan;
@@ -2879,38 +2888,37 @@ static struct mm_struct *get_next_mm(struct lru_gen_mm_walk *walk)
 
 #endif
 
-static void reset_mm_stats(struct lruvec *lruvec, struct lru_gen_mm_walk *walk, bool last)
+static void reset_mm_stats(struct lru_gen_mm_walk *walk, bool last)
 {
        int i;
        int hist;
+       struct lruvec *lruvec = walk->lruvec;
        struct lru_gen_mm_state *mm_state = get_mm_state(lruvec);
 
        lockdep_assert_held(&get_mm_list(lruvec_memcg(lruvec))->lock);
 
-       if (walk) {
-               hist = lru_hist_from_seq(walk->max_seq);
+       hist = lru_hist_from_seq(walk->seq);
 
-               for (i = 0; i < NR_MM_STATS; i++) {
-                       WRITE_ONCE(mm_state->stats[hist][i],
-                                  mm_state->stats[hist][i] + walk->mm_stats[i]);
-                       walk->mm_stats[i] = 0;
-               }
+       for (i = 0; i < NR_MM_STATS; i++) {
+               WRITE_ONCE(mm_state->stats[hist][i],
+                          mm_state->stats[hist][i] + walk->mm_stats[i]);
+               walk->mm_stats[i] = 0;
        }
 
        if (NR_HIST_GENS > 1 && last) {
-               hist = lru_hist_from_seq(mm_state->seq + 1);
+               hist = lru_hist_from_seq(walk->seq + 1);
 
                for (i = 0; i < NR_MM_STATS; i++)
                        WRITE_ONCE(mm_state->stats[hist][i], 0);
        }
 }
 
-static bool iterate_mm_list(struct lruvec *lruvec, struct lru_gen_mm_walk *walk,
-                           struct mm_struct **iter)
+static bool iterate_mm_list(struct lru_gen_mm_walk *walk, struct mm_struct **iter)
 {
        bool first = false;
        bool last = false;
        struct mm_struct *mm = NULL;
+       struct lruvec *lruvec = walk->lruvec;
        struct mem_cgroup *memcg = lruvec_memcg(lruvec);
        struct lru_gen_mm_list *mm_list = get_mm_list(memcg);
        struct lru_gen_mm_state *mm_state = get_mm_state(lruvec);
@@ -2927,9 +2935,9 @@ static bool iterate_mm_list(struct lruvec *lruvec, struct lru_gen_mm_walk *walk,
         */
        spin_lock(&mm_list->lock);
 
-       VM_WARN_ON_ONCE(mm_state->seq + 1 < walk->max_seq);
+       VM_WARN_ON_ONCE(mm_state->seq + 1 < walk->seq);
 
-       if (walk->max_seq <= mm_state->seq)
+       if (walk->seq <= mm_state->seq)
                goto done;
 
        if (!mm_state->head)
@@ -2954,12 +2962,12 @@ static bool iterate_mm_list(struct lruvec *lruvec, struct lru_gen_mm_walk *walk,
        } while (!(mm = get_next_mm(walk)));
 done:
        if (*iter || last)
-               reset_mm_stats(lruvec, walk, last);
+               reset_mm_stats(walk, last);
 
        spin_unlock(&mm_list->lock);
 
        if (mm && first)
-               reset_bloom_filter(mm_state, walk->max_seq + 1);
+               reset_bloom_filter(mm_state, walk->seq + 1);
 
        if (*iter)
                mmput_async(*iter);
@@ -2969,7 +2977,7 @@ done:
        return last;
 }
 
-static bool iterate_mm_list_nowalk(struct lruvec *lruvec, unsigned long max_seq)
+static bool iterate_mm_list_nowalk(struct lruvec *lruvec, unsigned long seq)
 {
        bool success = false;
        struct mem_cgroup *memcg = lruvec_memcg(lruvec);
@@ -2978,13 +2986,12 @@ static bool iterate_mm_list_nowalk(struct lruvec *lruvec, unsigned long max_seq)
 
        spin_lock(&mm_list->lock);
 
-       VM_WARN_ON_ONCE(mm_state->seq + 1 < max_seq);
+       VM_WARN_ON_ONCE(mm_state->seq + 1 < seq);
 
-       if (max_seq > mm_state->seq) {
+       if (seq > mm_state->seq) {
                mm_state->head = NULL;
                mm_state->tail = NULL;
                WRITE_ONCE(mm_state->seq, mm_state->seq + 1);
-               reset_mm_stats(lruvec, NULL, true);
                success = true;
        }
 
@@ -3159,9 +3166,10 @@ static void update_batch_size(struct lru_gen_mm_walk *walk, struct folio *folio,
        walk->nr_pages[new_gen][type][zone] += delta;
 }
 
-static void reset_batch_size(struct lruvec *lruvec, struct lru_gen_mm_walk *walk)
+static void reset_batch_size(struct lru_gen_mm_walk *walk)
 {
        int gen, type, zone;
+       struct lruvec *lruvec = walk->lruvec;
        struct lru_gen_folio *lrugen = &lruvec->lrugen;
 
        walk->batched = 0;
@@ -3331,7 +3339,8 @@ static bool walk_pte_range(pmd_t *pmd, unsigned long start, unsigned long end,
        struct lru_gen_mm_walk *walk = args->private;
        struct mem_cgroup *memcg = lruvec_memcg(walk->lruvec);
        struct pglist_data *pgdat = lruvec_pgdat(walk->lruvec);
-       int old_gen, new_gen = lru_gen_from_seq(walk->max_seq);
+       DEFINE_MAX_SEQ(walk->lruvec);
+       int old_gen, new_gen = lru_gen_from_seq(max_seq);
 
        pte = pte_offset_map_nolock(args->mm, pmd, start & PMD_MASK, &ptl);
        if (!pte)
@@ -3398,7 +3407,8 @@ static void walk_pmd_range_locked(pud_t *pud, unsigned long addr, struct vm_area
        struct lru_gen_mm_walk *walk = args->private;
        struct mem_cgroup *memcg = lruvec_memcg(walk->lruvec);
        struct pglist_data *pgdat = lruvec_pgdat(walk->lruvec);
-       int old_gen, new_gen = lru_gen_from_seq(walk->max_seq);
+       DEFINE_MAX_SEQ(walk->lruvec);
+       int old_gen, new_gen = lru_gen_from_seq(max_seq);
 
        VM_WARN_ON_ONCE(pud_leaf(*pud));
 
@@ -3529,7 +3539,7 @@ restart:
                        walk_pmd_range_locked(pud, addr, vma, args, bitmap, &first);
                }
 
-               if (!walk->force_scan && !test_bloom_filter(mm_state, walk->max_seq, pmd + i))
+               if (!walk->force_scan && !test_bloom_filter(mm_state, walk->seq, pmd + i))
                        continue;
 
                walk->mm_stats[MM_NONLEAF_FOUND]++;
@@ -3540,7 +3550,7 @@ restart:
                walk->mm_stats[MM_NONLEAF_ADDED]++;
 
                /* carry over to the next generation */
-               update_bloom_filter(mm_state, walk->max_seq + 1, pmd + i);
+               update_bloom_filter(mm_state, walk->seq + 1, pmd + i);
        }
 
        walk_pmd_range_locked(pud, -1, vma, args, bitmap, &first);
@@ -3591,7 +3601,7 @@ done:
        return -EAGAIN;
 }
 
-static void walk_mm(struct lruvec *lruvec, struct mm_struct *mm, struct lru_gen_mm_walk *walk)
+static void walk_mm(struct mm_struct *mm, struct lru_gen_mm_walk *walk)
 {
        static const struct mm_walk_ops mm_walk_ops = {
                .test_walk = should_skip_vma,
@@ -3600,6 +3610,7 @@ static void walk_mm(struct lruvec *lruvec, struct mm_struct *mm, struct lru_gen_
        };
 
        int err;
+       struct lruvec *lruvec = walk->lruvec;
        struct mem_cgroup *memcg = lruvec_memcg(lruvec);
 
        walk->next_addr = FIRST_USER_ADDRESS;
@@ -3610,7 +3621,7 @@ static void walk_mm(struct lruvec *lruvec, struct mm_struct *mm, struct lru_gen_
                err = -EBUSY;
 
                /* another thread might have called inc_max_seq() */
-               if (walk->max_seq != max_seq)
+               if (walk->seq != max_seq)
                        break;
 
                /* folio_update_gen() requires stable folio_memcg() */
@@ -3628,7 +3639,7 @@ static void walk_mm(struct lruvec *lruvec, struct mm_struct *mm, struct lru_gen_
 
                if (walk->batched) {
                        spin_lock_irq(&lruvec->lru_lock);
-                       reset_batch_size(lruvec, walk);
+                       reset_batch_size(walk);
                        spin_unlock_irq(&lruvec->lru_lock);
                }
 
@@ -3747,7 +3758,7 @@ next:
        return success;
 }
 
-static bool inc_max_seq(struct lruvec *lruvec, unsigned long max_seq,
+static bool inc_max_seq(struct lruvec *lruvec, unsigned long seq,
                        bool can_swap, bool force_scan)
 {
        bool success;
@@ -3755,14 +3766,14 @@ static bool inc_max_seq(struct lruvec *lruvec, unsigned long max_seq,
        int type, zone;
        struct lru_gen_folio *lrugen = &lruvec->lrugen;
 restart:
-       if (max_seq < READ_ONCE(lrugen->max_seq))
+       if (seq < READ_ONCE(lrugen->max_seq))
                return false;
 
        spin_lock_irq(&lruvec->lru_lock);
 
        VM_WARN_ON_ONCE(!seq_is_valid(lruvec));
 
-       success = max_seq == lrugen->max_seq;
+       success = seq == lrugen->max_seq;
        if (!success)
                goto unlock;
 
@@ -3815,8 +3826,8 @@ unlock:
        return success;
 }
 
-static bool try_to_inc_max_seq(struct lruvec *lruvec, unsigned long max_seq,
-                              struct scan_control *sc, bool can_swap, bool force_scan)
+static bool try_to_inc_max_seq(struct lruvec *lruvec, unsigned long seq,
+                              bool can_swap, bool force_scan)
 {
        bool success;
        struct lru_gen_mm_walk *walk;
@@ -3824,13 +3835,13 @@ static bool try_to_inc_max_seq(struct lruvec *lruvec, unsigned long max_seq,
        struct lru_gen_folio *lrugen = &lruvec->lrugen;
        struct lru_gen_mm_state *mm_state = get_mm_state(lruvec);
 
-       VM_WARN_ON_ONCE(max_seq > READ_ONCE(lrugen->max_seq));
+       VM_WARN_ON_ONCE(seq > READ_ONCE(lrugen->max_seq));
 
        if (!mm_state)
-               return inc_max_seq(lruvec, max_seq, can_swap, force_scan);
+               return inc_max_seq(lruvec, seq, can_swap, force_scan);
 
        /* see the comment in iterate_mm_list() */
-       if (max_seq <= READ_ONCE(mm_state->seq))
+       if (seq <= READ_ONCE(mm_state->seq))
                return false;
 
        /*
@@ -3840,29 +3851,29 @@ static bool try_to_inc_max_seq(struct lruvec *lruvec, unsigned long max_seq,
         * is less efficient, but it avoids bursty page faults.
         */
        if (!should_walk_mmu()) {
-               success = iterate_mm_list_nowalk(lruvec, max_seq);
+               success = iterate_mm_list_nowalk(lruvec, seq);
                goto done;
        }
 
        walk = set_mm_walk(NULL, true);
        if (!walk) {
-               success = iterate_mm_list_nowalk(lruvec, max_seq);
+               success = iterate_mm_list_nowalk(lruvec, seq);
                goto done;
        }
 
        walk->lruvec = lruvec;
-       walk->max_seq = max_seq;
+       walk->seq = seq;
        walk->can_swap = can_swap;
        walk->force_scan = force_scan;
 
        do {
-               success = iterate_mm_list(lruvec, walk, &mm);
+               success = iterate_mm_list(walk, &mm);
                if (mm)
-                       walk_mm(lruvec, mm, walk);
+                       walk_mm(mm, walk);
        } while (mm);
 done:
        if (success) {
-               success = inc_max_seq(lruvec, max_seq, can_swap, force_scan);
+               success = inc_max_seq(lruvec, seq, can_swap, force_scan);
                WARN_ON_ONCE(!success);
        }
 
@@ -4287,7 +4298,7 @@ static bool isolate_folio(struct lruvec *lruvec, struct folio *folio, struct sca
 {
        bool success;
 
-       /* swapping inhibited */
+       /* swap constrained */
        if (!(sc->gfp_mask & __GFP_IO) &&
            (folio_test_dirty(folio) ||
             (folio_test_anon(folio) && !folio_test_swapcache(folio))))
@@ -4456,9 +4467,12 @@ static int isolate_folios(struct lruvec *lruvec, struct scan_control *sc, int sw
        DEFINE_MIN_SEQ(lruvec);
 
        /*
-        * Try to make the obvious choice first. When anon and file are both
-        * available from the same generation, interpret swappiness 1 as file
-        * first and 200 as anon first.
+        * Try to make the obvious choice first, and if anon and file are both
+        * available from the same generation,
+        * 1. Interpret swappiness 1 as file first and MAX_SWAPPINESS as anon
+        *    first.
+        * 2. If !__GFP_IO, file first since clean pagecache is more likely to
+        *    exist than clean swapcache.
         */
        if (!swappiness)
                type = LRU_GEN_FILE;
@@ -4468,6 +4482,8 @@ static int isolate_folios(struct lruvec *lruvec, struct scan_control *sc, int sw
                type = LRU_GEN_FILE;
        else if (swappiness == 200)
                type = LRU_GEN_ANON;
+       else if (!(sc->gfp_mask & __GFP_IO))
+               type = LRU_GEN_FILE;
        else
                type = get_type_to_scan(lruvec, swappiness, &tier);
 
@@ -4558,8 +4574,10 @@ retry:
        move_folios_to_lru(lruvec, &list);
 
        walk = current->reclaim_state->mm_walk;
-       if (walk && walk->batched)
-               reset_batch_size(lruvec, walk);
+       if (walk && walk->batched) {
+               walk->lruvec = lruvec;
+               reset_batch_size(walk);
+       }
 
        item = PGSTEAL_KSWAPD + reclaimer_offset();
        if (!cgroup_reclaim(sc))
@@ -4569,10 +4587,6 @@ retry:
 
        spin_unlock_irq(&lruvec->lru_lock);
 
-       mem_cgroup_uncharge_list(&list);
-       free_unref_page_list(&list);
-
-       INIT_LIST_HEAD(&list);
        list_splice_init(&clean, &list);
 
        if (!list_empty(&list)) {
@@ -4584,14 +4598,13 @@ retry:
 }
 
 static bool should_run_aging(struct lruvec *lruvec, unsigned long max_seq,
-                            struct scan_control *sc, bool can_swap, unsigned long *nr_to_scan)
+                            bool can_swap, unsigned long *nr_to_scan)
 {
        int gen, type, zone;
        unsigned long old = 0;
        unsigned long young = 0;
        unsigned long total = 0;
        struct lru_gen_folio *lrugen = &lruvec->lrugen;
-       struct mem_cgroup *memcg = lruvec_memcg(lruvec);
        DEFINE_MIN_SEQ(lruvec);
 
        /* whether this lruvec is completely out of cold folios */
@@ -4619,13 +4632,7 @@ static bool should_run_aging(struct lruvec *lruvec, unsigned long max_seq,
                }
        }
 
-       /* try to scrape all its memory if this memcg was deleted */
-       if (!mem_cgroup_online(memcg)) {
-               *nr_to_scan = total;
-               return false;
-       }
-
-       *nr_to_scan = total >> sc->priority;
+       *nr_to_scan = total;
 
        /*
         * The aging tries to be lazy to reduce the overhead, while the eviction
@@ -4657,6 +4664,7 @@ static bool should_run_aging(struct lruvec *lruvec, unsigned long max_seq,
  */
 static long get_nr_to_scan(struct lruvec *lruvec, struct scan_control *sc, bool can_swap)
 {
+       bool success;
        unsigned long nr_to_scan;
        struct mem_cgroup *memcg = lruvec_memcg(lruvec);
        DEFINE_MAX_SEQ(lruvec);
@@ -4664,15 +4672,18 @@ static long get_nr_to_scan(struct lruvec *lruvec, struct scan_control *sc, bool
        if (mem_cgroup_below_min(sc->target_mem_cgroup, memcg))
                return -1;
 
-       if (!should_run_aging(lruvec, max_seq, sc, can_swap, &nr_to_scan))
-               return nr_to_scan;
+       success = should_run_aging(lruvec, max_seq, can_swap, &nr_to_scan);
 
-       /* skip the aging path at the default priority */
-       if (sc->priority == DEF_PRIORITY)
+       /* try to scrape all its memory if this memcg was deleted */
+       if (nr_to_scan && !mem_cgroup_online(memcg))
                return nr_to_scan;
 
-       /* skip this lruvec as it's low on cold folios */
-       return try_to_inc_max_seq(lruvec, max_seq, sc, can_swap, false) ? -1 : 0;
+       /* try to get away with not aging at the default priority */
+       if (!success || sc->priority == DEF_PRIORITY)
+               return nr_to_scan >> sc->priority;
+
+       /* stop scanning this lruvec as it's low on cold folios */
+       return try_to_inc_max_seq(lruvec, max_seq, can_swap, false) ? -1 : 0;
 }
 
 static bool should_abort_scan(struct lruvec *lruvec, struct scan_control *sc)
@@ -4712,10 +4723,6 @@ static bool try_to_shrink_lruvec(struct lruvec *lruvec, struct scan_control *sc)
        unsigned long scanned = 0;
        int swappiness = get_swappiness(lruvec, sc);
 
-       /* clean file folios are more likely to exist */
-       if (swappiness && !(sc->gfp_mask & __GFP_IO))
-               swappiness = 1;
-
        while (true) {
                int delta;
 
@@ -4878,7 +4885,6 @@ static void set_initial_priority(struct pglist_data *pgdat, struct scan_control
 {
        int priority;
        unsigned long reclaimable;
-       struct lruvec *lruvec = mem_cgroup_lruvec(NULL, pgdat);
 
        if (sc->priority != DEF_PRIORITY || sc->nr_to_reclaim < MIN_LRU_BATCH)
                return;
@@ -4888,7 +4894,7 @@ static void set_initial_priority(struct pglist_data *pgdat, struct scan_control
         * where reclaimed_to_scanned_ratio = inactive / total.
         */
        reclaimable = node_page_state(pgdat, NR_INACTIVE_FILE);
-       if (get_swappiness(lruvec, sc))
+       if (can_reclaim_anon_pages(NULL, pgdat->node_id, sc))
                reclaimable += node_page_state(pgdat, NR_INACTIVE_ANON);
 
        /* round down reclaimable and round up sc->nr_to_reclaim */
@@ -5332,7 +5338,7 @@ static const struct seq_operations lru_gen_seq_ops = {
        .show = lru_gen_seq_show,
 };
 
-static int run_aging(struct lruvec *lruvec, unsigned long seq, struct scan_control *sc,
+static int run_aging(struct lruvec *lruvec, unsigned long seq,
                     bool can_swap, bool force_scan)
 {
        DEFINE_MAX_SEQ(lruvec);
@@ -5347,7 +5353,7 @@ static int run_aging(struct lruvec *lruvec, unsigned long seq, struct scan_contr
        if (!force_scan && min_seq[!can_swap] + MAX_NR_GENS - 1 <= max_seq)
                return -ERANGE;
 
-       try_to_inc_max_seq(lruvec, max_seq, sc, can_swap, force_scan);
+       try_to_inc_max_seq(lruvec, max_seq, can_swap, force_scan);
 
        return 0;
 }
@@ -5415,7 +5421,7 @@ static int run_cmd(char cmd, int memcg_id, int nid, unsigned long seq,
 
        switch (cmd) {
        case '+':
-               err = run_aging(lruvec, seq, sc, swappiness, opt);
+               err = run_aging(lruvec, seq, swappiness, opt);
                break;
        case '-':
                err = run_eviction(lruvec, seq, sc, swappiness, opt);
@@ -5987,6 +5993,8 @@ again:
         */
        if (reclaimable)
                pgdat->kswapd_failures = 0;
+       else if (sc->cache_trim_mode)
+               sc->cache_trim_mode_failed = 1;
 }
 
 /*
@@ -6799,6 +6807,7 @@ restart:
                bool raise_priority = true;
                bool balanced;
                bool ret;
+               bool was_frozen;
 
                sc.reclaim_idx = highest_zoneidx;
 
@@ -6897,9 +6906,9 @@ restart:
 
                /* Check if kswapd should be suspending */
                __fs_reclaim_release(_THIS_IP_);
-               ret = try_to_freeze();
+               ret = kthread_freezable_should_stop(&was_frozen);
                __fs_reclaim_acquire(_THIS_IP_);
-               if (ret || kthread_should_stop())
+               if (was_frozen || ret)
                        break;
 
                /*
@@ -6921,6 +6930,16 @@ restart:
                        sc.priority--;
        } while (sc.priority >= 1);
 
+       /*
+        * Restart only if it went through the priority loop all the way,
+        * but cache_trim_mode didn't work.
+        */
+       if (!sc.nr_reclaimed && sc.priority < 1 &&
+           !sc.no_cache_trim_mode && sc.cache_trim_mode_failed) {
+               sc.no_cache_trim_mode = 1;
+               goto restart;
+       }
+
        if (!sc.nr_reclaimed)
                pgdat->kswapd_failures++;
 
@@ -7105,7 +7124,7 @@ static int kswapd(void *p)
        WRITE_ONCE(pgdat->kswapd_highest_zoneidx, MAX_NR_ZONES);
        atomic_set(&pgdat->nr_writeback_throttled, 0);
        for ( ; ; ) {
-               bool ret;
+               bool was_frozen;
 
                alloc_order = reclaim_order = READ_ONCE(pgdat->kswapd_order);
                highest_zoneidx = kswapd_highest_zoneidx(pgdat,
@@ -7122,15 +7141,14 @@ kswapd_try_sleep:
                WRITE_ONCE(pgdat->kswapd_order, 0);
                WRITE_ONCE(pgdat->kswapd_highest_zoneidx, MAX_NR_ZONES);
 
-               ret = try_to_freeze();
-               if (kthread_should_stop())
+               if (kthread_freezable_should_stop(&was_frozen))
                        break;
 
                /*
                 * We can speed up thawing tasks if we don't call balance_pgdat
                 * after returning from the refrigerator
                 */
-               if (ret)
+               if (was_frozen)
                        continue;
 
                /*
index 7c76b396b74cfdb39b761124556ee4e428510fdf..7ab05621052dc5a4b5f3add96a92ca54e76b10c2 100644 (file)
@@ -364,8 +364,9 @@ static inline int __idx(struct z3fold_header *zhdr, enum buddy bud)
 }
 
 /*
- * Encodes the handle of a particular buddy within a z3fold page
- * Pool lock should be held as this function accesses first_num
+ * Encodes the handle of a particular buddy within a z3fold page.
+ * Zhdr->page_lock should be held as this function accesses first_num
+ * if bud != HEADLESS.
  */
 static unsigned long __encode_handle(struct z3fold_header *zhdr,
                                struct z3fold_buddy_slots *slots,
index c937635e0ad15e8ad9fce753914eac9f8ebdbfa1..7d7cb3eaabe029beaaf522c8521e8589cd0fafdd 100644 (file)
 #define OBJ_TAG_BITS   1
 #define OBJ_TAG_MASK   OBJ_ALLOCATED_TAG
 
-#define OBJ_INDEX_BITS (BITS_PER_LONG - _PFN_BITS - OBJ_TAG_BITS)
+#define OBJ_INDEX_BITS (BITS_PER_LONG - _PFN_BITS)
 #define OBJ_INDEX_MASK ((_AC(1, UL) << OBJ_INDEX_BITS) - 1)
 
 #define HUGE_BITS      1
 #define FULLNESS_BITS  4
 #define CLASS_BITS     8
-#define ISOLATED_BITS  5
 #define MAGIC_VAL_BITS 8
 
 #define MAX(a, b) ((a) >= (b) ? (a) : (b))
@@ -246,7 +245,6 @@ struct zspage {
                unsigned int huge:HUGE_BITS;
                unsigned int fullness:FULLNESS_BITS;
                unsigned int class:CLASS_BITS + 1;
-               unsigned int isolated:ISOLATED_BITS;
                unsigned int magic:MAGIC_VAL_BITS;
        };
        unsigned int inuse;
@@ -278,18 +276,14 @@ static bool ZsHugePage(struct zspage *zspage)
 static void migrate_lock_init(struct zspage *zspage);
 static void migrate_read_lock(struct zspage *zspage);
 static void migrate_read_unlock(struct zspage *zspage);
-
-#ifdef CONFIG_COMPACTION
 static void migrate_write_lock(struct zspage *zspage);
-static void migrate_write_lock_nested(struct zspage *zspage);
 static void migrate_write_unlock(struct zspage *zspage);
+
+#ifdef CONFIG_COMPACTION
 static void kick_deferred_free(struct zs_pool *pool);
 static void init_deferred_free(struct zs_pool *pool);
 static void SetZsPageMovable(struct zs_pool *pool, struct zspage *zspage);
 #else
-static void migrate_write_lock(struct zspage *zspage) {}
-static void migrate_write_lock_nested(struct zspage *zspage) {}
-static void migrate_write_unlock(struct zspage *zspage) {}
 static void kick_deferred_free(struct zs_pool *pool) {}
 static void init_deferred_free(struct zs_pool *pool) {}
 static void SetZsPageMovable(struct zs_pool *pool, struct zspage *zspage) {}
@@ -476,30 +470,12 @@ static inline void set_freeobj(struct zspage *zspage, unsigned int obj)
        zspage->freeobj = obj;
 }
 
-static void get_zspage_mapping(struct zspage *zspage,
-                              unsigned int *class_idx,
-                              int *fullness)
-{
-       BUG_ON(zspage->magic != ZSPAGE_MAGIC);
-
-       *fullness = zspage->fullness;
-       *class_idx = zspage->class;
-}
-
 static struct size_class *zspage_class(struct zs_pool *pool,
                                       struct zspage *zspage)
 {
        return pool->size_class[zspage->class];
 }
 
-static void set_zspage_mapping(struct zspage *zspage,
-                              unsigned int class_idx,
-                              int fullness)
-{
-       zspage->class = class_idx;
-       zspage->fullness = fullness;
-}
-
 /*
  * zsmalloc divides the pool into various size classes where each
  * class maintains a list of zspages where each zspage is divided
@@ -694,16 +670,17 @@ static void insert_zspage(struct size_class *class,
 {
        class_stat_inc(class, fullness, 1);
        list_add(&zspage->list, &class->fullness_list[fullness]);
+       zspage->fullness = fullness;
 }
 
 /*
  * This function removes the given zspage from the freelist identified
  * by <class, fullness_group>.
  */
-static void remove_zspage(struct size_class *class,
-                               struct zspage *zspage,
-                               int fullness)
+static void remove_zspage(struct size_class *class, struct zspage *zspage)
 {
+       int fullness = zspage->fullness;
+
        VM_BUG_ON(list_empty(&class->fullness_list[fullness]));
 
        list_del_init(&zspage->list);
@@ -721,17 +698,14 @@ static void remove_zspage(struct size_class *class,
  */
 static int fix_fullness_group(struct size_class *class, struct zspage *zspage)
 {
-       int class_idx;
-       int currfg, newfg;
+       int newfg;
 
-       get_zspage_mapping(zspage, &class_idx, &currfg);
        newfg = get_fullness_group(class, zspage);
-       if (newfg == currfg)
+       if (newfg == zspage->fullness)
                goto out;
 
-       remove_zspage(class, zspage, currfg);
+       remove_zspage(class, zspage);
        insert_zspage(class, zspage, newfg);
-       set_zspage_mapping(zspage, class_idx, newfg);
 out:
        return newfg;
 }
@@ -763,14 +737,12 @@ static struct page *get_next_page(struct page *page)
 static void obj_to_location(unsigned long obj, struct page **page,
                                unsigned int *obj_idx)
 {
-       obj >>= OBJ_TAG_BITS;
        *page = pfn_to_page(obj >> OBJ_INDEX_BITS);
        *obj_idx = (obj & OBJ_INDEX_MASK);
 }
 
 static void obj_to_page(unsigned long obj, struct page **page)
 {
-       obj >>= OBJ_TAG_BITS;
        *page = pfn_to_page(obj >> OBJ_INDEX_BITS);
 }
 
@@ -785,7 +757,6 @@ static unsigned long location_to_obj(struct page *page, unsigned int obj_idx)
 
        obj = page_to_pfn(page) << OBJ_INDEX_BITS;
        obj |= obj_idx & OBJ_INDEX_MASK;
-       obj <<= OBJ_TAG_BITS;
 
        return obj;
 }
@@ -849,15 +820,11 @@ static void __free_zspage(struct zs_pool *pool, struct size_class *class,
                                struct zspage *zspage)
 {
        struct page *page, *next;
-       int fg;
-       unsigned int class_idx;
-
-       get_zspage_mapping(zspage, &class_idx, &fg);
 
        assert_spin_locked(&pool->lock);
 
        VM_BUG_ON(get_zspage_inuse(zspage));
-       VM_BUG_ON(fg != ZS_INUSE_RATIO_0);
+       VM_BUG_ON(zspage->fullness != ZS_INUSE_RATIO_0);
 
        next = page = get_first_page(zspage);
        do {
@@ -892,7 +859,7 @@ static void free_zspage(struct zs_pool *pool, struct size_class *class,
                return;
        }
 
-       remove_zspage(class, zspage, ZS_INUSE_RATIO_0);
+       remove_zspage(class, zspage);
        __free_zspage(pool, class, zspage);
 }
 
@@ -1011,6 +978,7 @@ static struct zspage *alloc_zspage(struct zs_pool *pool,
        create_page_chain(class, zspage, pages);
        init_zspage(class, zspage);
        zspage->pool = pool;
+       zspage->class = class->index;
 
        return zspage;
 }
@@ -1403,7 +1371,6 @@ unsigned long zs_malloc(struct zs_pool *pool, size_t size, gfp_t gfp)
        obj = obj_malloc(pool, zspage, handle);
        newfg = get_fullness_group(class, zspage);
        insert_zspage(class, zspage, newfg);
-       set_zspage_mapping(zspage, class->index, newfg);
        record_obj(handle, obj);
        atomic_long_add(class->pages_per_zspage, &pool->pages_allocated);
        class_stat_inc(class, ZS_OBJS_ALLOCATED, class->objs_per_zspage);
@@ -1623,7 +1590,7 @@ static struct zspage *isolate_src_zspage(struct size_class *class)
                zspage = list_first_entry_or_null(&class->fullness_list[fg],
                                                  struct zspage, list);
                if (zspage) {
-                       remove_zspage(class, zspage, fg);
+                       remove_zspage(class, zspage);
                        return zspage;
                }
        }
@@ -1640,7 +1607,7 @@ static struct zspage *isolate_dst_zspage(struct size_class *class)
                zspage = list_first_entry_or_null(&class->fullness_list[fg],
                                                  struct zspage, list);
                if (zspage) {
-                       remove_zspage(class, zspage, fg);
+                       remove_zspage(class, zspage);
                        return zspage;
                }
        }
@@ -1661,7 +1628,6 @@ static int putback_zspage(struct size_class *class, struct zspage *zspage)
 
        fullness = get_fullness_group(class, zspage);
        insert_zspage(class, zspage, fullness);
-       set_zspage_mapping(zspage, class->index, fullness);
 
        return fullness;
 }
@@ -1725,33 +1691,17 @@ static void migrate_read_unlock(struct zspage *zspage) __releases(&zspage->lock)
        read_unlock(&zspage->lock);
 }
 
-#ifdef CONFIG_COMPACTION
 static void migrate_write_lock(struct zspage *zspage)
 {
        write_lock(&zspage->lock);
 }
 
-static void migrate_write_lock_nested(struct zspage *zspage)
-{
-       write_lock_nested(&zspage->lock, SINGLE_DEPTH_NESTING);
-}
-
 static void migrate_write_unlock(struct zspage *zspage)
 {
        write_unlock(&zspage->lock);
 }
 
-/* Number of isolated subpage for *page migration* in this zspage */
-static void inc_zspage_isolation(struct zspage *zspage)
-{
-       zspage->isolated++;
-}
-
-static void dec_zspage_isolation(struct zspage *zspage)
-{
-       VM_BUG_ON(zspage->isolated == 0);
-       zspage->isolated--;
-}
+#ifdef CONFIG_COMPACTION
 
 static const struct movable_operations zsmalloc_mops;
 
@@ -1780,21 +1730,12 @@ static void replace_sub_page(struct size_class *class, struct zspage *zspage,
 
 static bool zs_page_isolate(struct page *page, isolate_mode_t mode)
 {
-       struct zs_pool *pool;
-       struct zspage *zspage;
-
        /*
         * Page is locked so zspage couldn't be destroyed. For detail, look at
         * lock_zspage in free_zspage.
         */
        VM_BUG_ON_PAGE(PageIsolated(page), page);
 
-       zspage = get_zspage(page);
-       pool = zspage->pool;
-       spin_lock(&pool->lock);
-       inc_zspage_isolation(zspage);
-       spin_unlock(&pool->lock);
-
        return true;
 }
 
@@ -1859,7 +1800,6 @@ static int zs_page_migrate(struct page *newpage, struct page *page,
        kunmap_atomic(s_addr);
 
        replace_sub_page(class, zspage, newpage, page);
-       dec_zspage_isolation(zspage);
        /*
         * Since we complete the data copy and set up new zspage structure,
         * it's okay to release the pool's lock.
@@ -1881,16 +1821,7 @@ static int zs_page_migrate(struct page *newpage, struct page *page,
 
 static void zs_page_putback(struct page *page)
 {
-       struct zs_pool *pool;
-       struct zspage *zspage;
-
        VM_BUG_ON_PAGE(!PageIsolated(page), page);
-
-       zspage = get_zspage(page);
-       pool = zspage->pool;
-       spin_lock(&pool->lock);
-       dec_zspage_isolation(zspage);
-       spin_unlock(&pool->lock);
 }
 
 static const struct movable_operations zsmalloc_mops = {
@@ -1907,8 +1838,6 @@ static void async_free_zspage(struct work_struct *work)
 {
        int i;
        struct size_class *class;
-       unsigned int class_idx;
-       int fullness;
        struct zspage *zspage, *tmp;
        LIST_HEAD(free_pages);
        struct zs_pool *pool = container_of(work, struct zs_pool,
@@ -1929,10 +1858,8 @@ static void async_free_zspage(struct work_struct *work)
                list_del(&zspage->list);
                lock_zspage(zspage);
 
-               get_zspage_mapping(zspage, &class_idx, &fullness);
-               VM_BUG_ON(fullness != ZS_INUSE_RATIO_0);
-               class = pool->size_class[class_idx];
                spin_lock(&pool->lock);
+               class = zspage_class(pool, zspage);
                __free_zspage(pool, class, zspage);
                spin_unlock(&pool->lock);
        }
@@ -2006,19 +1933,17 @@ static unsigned long __zs_compact(struct zs_pool *pool,
                        dst_zspage = isolate_dst_zspage(class);
                        if (!dst_zspage)
                                break;
-                       migrate_write_lock(dst_zspage);
                }
 
                src_zspage = isolate_src_zspage(class);
                if (!src_zspage)
                        break;
 
-               migrate_write_lock_nested(src_zspage);
-
+               migrate_write_lock(src_zspage);
                migrate_zspage(pool, src_zspage, dst_zspage);
-               fg = putback_zspage(class, src_zspage);
                migrate_write_unlock(src_zspage);
 
+               fg = putback_zspage(class, src_zspage);
                if (fg == ZS_INUSE_RATIO_0) {
                        free_zspage(pool, class, src_zspage);
                        pages_freed += class->pages_per_zspage;
@@ -2028,7 +1953,6 @@ static unsigned long __zs_compact(struct zs_pool *pool,
                if (get_fullness_group(class, dst_zspage) == ZS_INUSE_RATIO_100
                    || spin_is_contended(&pool->lock)) {
                        putback_zspage(class, dst_zspage);
-                       migrate_write_unlock(dst_zspage);
                        dst_zspage = NULL;
 
                        spin_unlock(&pool->lock);
@@ -2037,15 +1961,12 @@ static unsigned long __zs_compact(struct zs_pool *pool,
                }
        }
 
-       if (src_zspage) {
+       if (src_zspage)
                putback_zspage(class, src_zspage);
-               migrate_write_unlock(src_zspage);
-       }
 
-       if (dst_zspage) {
+       if (dst_zspage)
                putback_zspage(class, dst_zspage);
-               migrate_write_unlock(dst_zspage);
-       }
+
        spin_unlock(&pool->lock);
 
        return pages_freed;
index db4625af65fb7f6655a057e145bbe20dd64f7ae9..9dec853647c8e4c6fc1d0a4b0de0849ea3102a47 100644 (file)
@@ -71,8 +71,6 @@ static u64 zswap_reject_compress_poor;
 static u64 zswap_reject_alloc_fail;
 /* Store failed because the entry metadata could not be allocated (rare) */
 static u64 zswap_reject_kmemcache_fail;
-/* Duplicate store was encountered (rare) */
-static u64 zswap_duplicate_entry;
 
 /* Shrinker work queue */
 static struct workqueue_struct *shrink_wq;
@@ -141,10 +139,6 @@ static bool zswap_non_same_filled_pages_enabled = true;
 module_param_named(non_same_filled_pages_enabled, zswap_non_same_filled_pages_enabled,
                   bool, 0644);
 
-static bool zswap_exclusive_loads_enabled = IS_ENABLED(
-               CONFIG_ZSWAP_EXCLUSIVE_LOADS_DEFAULT_ON);
-module_param_named(exclusive_loads, zswap_exclusive_loads_enabled, bool, 0644);
-
 /* Number of zpools in zswap_pool (empirically determined for scalability) */
 #define ZSWAP_NR_ZPOOLS 32
 
@@ -168,6 +162,7 @@ struct crypto_acomp_ctx {
        struct crypto_wait wait;
        u8 *buffer;
        struct mutex mutex;
+       bool is_sleepable;
 };
 
 /*
@@ -179,18 +174,24 @@ struct crypto_acomp_ctx {
 struct zswap_pool {
        struct zpool *zpools[ZSWAP_NR_ZPOOLS];
        struct crypto_acomp_ctx __percpu *acomp_ctx;
-       struct kref kref;
+       struct percpu_ref ref;
        struct list_head list;
        struct work_struct release_work;
-       struct work_struct shrink_work;
        struct hlist_node node;
        char tfm_name[CRYPTO_MAX_ALG_NAME];
-       struct list_lru list_lru;
-       struct mem_cgroup *next_shrink;
-       struct shrinker *shrinker;
-       atomic_t nr_stored;
 };
 
+/* Global LRU lists shared by all zswap pools. */
+static struct list_lru zswap_list_lru;
+/* counter of pages stored in all zswap pools. */
+static atomic_t zswap_nr_stored = ATOMIC_INIT(0);
+
+/* The lock protects zswap_next_shrink updates. */
+static DEFINE_SPINLOCK(zswap_shrink_lock);
+static struct mem_cgroup *zswap_next_shrink;
+static struct work_struct zswap_shrink_work;
+static struct shrinker *zswap_shrinker;
+
 /*
  * struct zswap_entry
  *
@@ -199,12 +200,6 @@ struct zswap_pool {
  *
  * rbnode - links the entry into red-black tree for the appropriate swap type
  * swpentry - associated swap entry, the offset indexes into the red-black tree
- * refcount - the number of outstanding reference to the entry. This is needed
- *            to protect against premature freeing of the entry by code
- *            concurrent calls to load, invalidate, and writeback.  The lock
- *            for the zswap_tree structure that contains the entry must
- *            be held while changing the refcount.  Since the lock must
- *            be held, there is no reason to also make refcount atomic.
  * length - the length in bytes of the compressed page data.  Needed during
  *          decompression. For a same value filled page length is 0, and both
  *          pool and lru are invalid and must be ignored.
@@ -217,7 +212,6 @@ struct zswap_pool {
 struct zswap_entry {
        struct rb_node rbnode;
        swp_entry_t swpentry;
-       int refcount;
        unsigned int length;
        struct zswap_pool *pool;
        union {
@@ -228,17 +222,13 @@ struct zswap_entry {
        struct list_head lru;
 };
 
-/*
- * The tree lock in the zswap_tree struct protects a few things:
- * - the rbtree
- * - the refcount field of each entry in the tree
- */
 struct zswap_tree {
        struct rb_root rbroot;
        spinlock_t lock;
 };
 
 static struct zswap_tree *zswap_trees[MAX_SWAPFILES];
+static unsigned int nr_zswap_trees[MAX_SWAPFILES];
 
 /* RCU-protected iteration */
 static LIST_HEAD(zswap_pools);
@@ -265,15 +255,16 @@ static bool zswap_has_pool;
 * helpers and fwd declarations
 **********************************/
 
+static inline struct zswap_tree *swap_zswap_tree(swp_entry_t swp)
+{
+       return &zswap_trees[swp_type(swp)][swp_offset(swp)
+               >> SWAP_ADDRESS_SPACE_SHIFT];
+}
+
 #define zswap_pool_debug(msg, p)                               \
        pr_debug("%s pool %s/%s\n", msg, (p)->tfm_name,         \
                 zpool_get_type((p)->zpools[0]))
 
-static int zswap_writeback_entry(struct zswap_entry *entry,
-                                struct zswap_tree *tree);
-static int zswap_pool_get(struct zswap_pool *pool);
-static void zswap_pool_put(struct zswap_pool *pool);
-
 static bool zswap_is_full(void)
 {
        return totalram_pages() * zswap_max_pool_percent / 100 <
@@ -313,385 +304,631 @@ static void zswap_update_total_size(void)
        zswap_pool_total_size = total;
 }
 
-/* should be called under RCU */
-#ifdef CONFIG_MEMCG
-static inline struct mem_cgroup *mem_cgroup_from_entry(struct zswap_entry *entry)
+/*********************************
+* pool functions
+**********************************/
+static void __zswap_pool_empty(struct percpu_ref *ref);
+
+static struct zswap_pool *zswap_pool_create(char *type, char *compressor)
 {
-       return entry->objcg ? obj_cgroup_memcg(entry->objcg) : NULL;
+       int i;
+       struct zswap_pool *pool;
+       char name[38]; /* 'zswap' + 32 char (max) num + \0 */
+       gfp_t gfp = __GFP_NORETRY | __GFP_NOWARN | __GFP_KSWAPD_RECLAIM;
+       int ret;
+
+       if (!zswap_has_pool) {
+               /* if either are unset, pool initialization failed, and we
+                * need both params to be set correctly before trying to
+                * create a pool.
+                */
+               if (!strcmp(type, ZSWAP_PARAM_UNSET))
+                       return NULL;
+               if (!strcmp(compressor, ZSWAP_PARAM_UNSET))
+                       return NULL;
+       }
+
+       pool = kzalloc(sizeof(*pool), GFP_KERNEL);
+       if (!pool)
+               return NULL;
+
+       for (i = 0; i < ZSWAP_NR_ZPOOLS; i++) {
+               /* unique name for each pool specifically required by zsmalloc */
+               snprintf(name, 38, "zswap%x",
+                        atomic_inc_return(&zswap_pools_count));
+
+               pool->zpools[i] = zpool_create_pool(type, name, gfp);
+               if (!pool->zpools[i]) {
+                       pr_err("%s zpool not available\n", type);
+                       goto error;
+               }
+       }
+       pr_debug("using %s zpool\n", zpool_get_type(pool->zpools[0]));
+
+       strscpy(pool->tfm_name, compressor, sizeof(pool->tfm_name));
+
+       pool->acomp_ctx = alloc_percpu(*pool->acomp_ctx);
+       if (!pool->acomp_ctx) {
+               pr_err("percpu alloc failed\n");
+               goto error;
+       }
+
+       ret = cpuhp_state_add_instance(CPUHP_MM_ZSWP_POOL_PREPARE,
+                                      &pool->node);
+       if (ret)
+               goto error;
+
+       /* being the current pool takes 1 ref; this func expects the
+        * caller to always add the new pool as the current pool
+        */
+       ret = percpu_ref_init(&pool->ref, __zswap_pool_empty,
+                             PERCPU_REF_ALLOW_REINIT, GFP_KERNEL);
+       if (ret)
+               goto ref_fail;
+       INIT_LIST_HEAD(&pool->list);
+
+       zswap_pool_debug("created", pool);
+
+       return pool;
+
+ref_fail:
+       cpuhp_state_remove_instance(CPUHP_MM_ZSWP_POOL_PREPARE, &pool->node);
+error:
+       if (pool->acomp_ctx)
+               free_percpu(pool->acomp_ctx);
+       while (i--)
+               zpool_destroy_pool(pool->zpools[i]);
+       kfree(pool);
+       return NULL;
 }
-#else
-static inline struct mem_cgroup *mem_cgroup_from_entry(struct zswap_entry *entry)
+
+static struct zswap_pool *__zswap_pool_create_fallback(void)
 {
-       return NULL;
+       bool has_comp, has_zpool;
+
+       has_comp = crypto_has_acomp(zswap_compressor, 0, 0);
+       if (!has_comp && strcmp(zswap_compressor,
+                               CONFIG_ZSWAP_COMPRESSOR_DEFAULT)) {
+               pr_err("compressor %s not available, using default %s\n",
+                      zswap_compressor, CONFIG_ZSWAP_COMPRESSOR_DEFAULT);
+               param_free_charp(&zswap_compressor);
+               zswap_compressor = CONFIG_ZSWAP_COMPRESSOR_DEFAULT;
+               has_comp = crypto_has_acomp(zswap_compressor, 0, 0);
+       }
+       if (!has_comp) {
+               pr_err("default compressor %s not available\n",
+                      zswap_compressor);
+               param_free_charp(&zswap_compressor);
+               zswap_compressor = ZSWAP_PARAM_UNSET;
+       }
+
+       has_zpool = zpool_has_pool(zswap_zpool_type);
+       if (!has_zpool && strcmp(zswap_zpool_type,
+                                CONFIG_ZSWAP_ZPOOL_DEFAULT)) {
+               pr_err("zpool %s not available, using default %s\n",
+                      zswap_zpool_type, CONFIG_ZSWAP_ZPOOL_DEFAULT);
+               param_free_charp(&zswap_zpool_type);
+               zswap_zpool_type = CONFIG_ZSWAP_ZPOOL_DEFAULT;
+               has_zpool = zpool_has_pool(zswap_zpool_type);
+       }
+       if (!has_zpool) {
+               pr_err("default zpool %s not available\n",
+                      zswap_zpool_type);
+               param_free_charp(&zswap_zpool_type);
+               zswap_zpool_type = ZSWAP_PARAM_UNSET;
+       }
+
+       if (!has_comp || !has_zpool)
+               return NULL;
+
+       return zswap_pool_create(zswap_zpool_type, zswap_compressor);
 }
-#endif
 
-static inline int entry_to_nid(struct zswap_entry *entry)
+static void zswap_pool_destroy(struct zswap_pool *pool)
 {
-       return page_to_nid(virt_to_page(entry));
+       int i;
+
+       zswap_pool_debug("destroying", pool);
+
+       cpuhp_state_remove_instance(CPUHP_MM_ZSWP_POOL_PREPARE, &pool->node);
+       free_percpu(pool->acomp_ctx);
+
+       for (i = 0; i < ZSWAP_NR_ZPOOLS; i++)
+               zpool_destroy_pool(pool->zpools[i]);
+       kfree(pool);
 }
 
-void zswap_memcg_offline_cleanup(struct mem_cgroup *memcg)
+static void __zswap_pool_release(struct work_struct *work)
 {
-       struct zswap_pool *pool;
+       struct zswap_pool *pool = container_of(work, typeof(*pool),
+                                               release_work);
 
-       /* lock out zswap pools list modification */
-       spin_lock(&zswap_pools_lock);
-       list_for_each_entry(pool, &zswap_pools, list) {
-               if (pool->next_shrink == memcg)
-                       pool->next_shrink = mem_cgroup_iter(NULL, pool->next_shrink, NULL);
-       }
-       spin_unlock(&zswap_pools_lock);
+       synchronize_rcu();
+
+       /* nobody should have been able to get a ref... */
+       WARN_ON(!percpu_ref_is_zero(&pool->ref));
+       percpu_ref_exit(&pool->ref);
+
+       /* pool is now off zswap_pools list and has no references. */
+       zswap_pool_destroy(pool);
 }
 
-/*********************************
-* zswap entry functions
-**********************************/
-static struct kmem_cache *zswap_entry_cache;
+static struct zswap_pool *zswap_pool_current(void);
 
-static struct zswap_entry *zswap_entry_cache_alloc(gfp_t gfp, int nid)
+static void __zswap_pool_empty(struct percpu_ref *ref)
 {
-       struct zswap_entry *entry;
-       entry = kmem_cache_alloc_node(zswap_entry_cache, gfp, nid);
-       if (!entry)
-               return NULL;
-       entry->refcount = 1;
-       RB_CLEAR_NODE(&entry->rbnode);
-       return entry;
+       struct zswap_pool *pool;
+
+       pool = container_of(ref, typeof(*pool), ref);
+
+       spin_lock_bh(&zswap_pools_lock);
+
+       WARN_ON(pool == zswap_pool_current());
+
+       list_del_rcu(&pool->list);
+
+       INIT_WORK(&pool->release_work, __zswap_pool_release);
+       schedule_work(&pool->release_work);
+
+       spin_unlock_bh(&zswap_pools_lock);
 }
 
-static void zswap_entry_cache_free(struct zswap_entry *entry)
+static int __must_check zswap_pool_get(struct zswap_pool *pool)
 {
-       kmem_cache_free(zswap_entry_cache, entry);
+       if (!pool)
+               return 0;
+
+       return percpu_ref_tryget(&pool->ref);
 }
 
-/*********************************
-* zswap lruvec functions
-**********************************/
-void zswap_lruvec_state_init(struct lruvec *lruvec)
+static void zswap_pool_put(struct zswap_pool *pool)
 {
-       atomic_long_set(&lruvec->zswap_lruvec_state.nr_zswap_protected, 0);
+       percpu_ref_put(&pool->ref);
 }
 
-void zswap_folio_swapin(struct folio *folio)
+static struct zswap_pool *__zswap_pool_current(void)
 {
-       struct lruvec *lruvec;
+       struct zswap_pool *pool;
 
-       VM_WARN_ON_ONCE(!folio_test_locked(folio));
-       lruvec = folio_lruvec(folio);
-       atomic_long_inc(&lruvec->zswap_lruvec_state.nr_zswap_protected);
+       pool = list_first_or_null_rcu(&zswap_pools, typeof(*pool), list);
+       WARN_ONCE(!pool && zswap_has_pool,
+                 "%s: no page storage pool!\n", __func__);
+
+       return pool;
 }
 
-/*********************************
-* lru functions
-**********************************/
-static void zswap_lru_add(struct list_lru *list_lru, struct zswap_entry *entry)
+static struct zswap_pool *zswap_pool_current(void)
 {
-       atomic_long_t *nr_zswap_protected;
-       unsigned long lru_size, old, new;
-       int nid = entry_to_nid(entry);
-       struct mem_cgroup *memcg;
-       struct lruvec *lruvec;
-
-       /*
-        * Note that it is safe to use rcu_read_lock() here, even in the face of
-        * concurrent memcg offlining. Thanks to the memcg->kmemcg_id indirection
-        * used in list_lru lookup, only two scenarios are possible:
-        *
-        * 1. list_lru_add() is called before memcg->kmemcg_id is updated. The
-        *    new entry will be reparented to memcg's parent's list_lru.
-        * 2. list_lru_add() is called after memcg->kmemcg_id is updated. The
-        *    new entry will be added directly to memcg's parent's list_lru.
-        *
-        * Similar reasoning holds for list_lru_del() and list_lru_putback().
-        */
-       rcu_read_lock();
-       memcg = mem_cgroup_from_entry(entry);
-       /* will always succeed */
-       list_lru_add(list_lru, &entry->lru, nid, memcg);
+       assert_spin_locked(&zswap_pools_lock);
 
-       /* Update the protection area */
-       lru_size = list_lru_count_one(list_lru, nid, memcg);
-       lruvec = mem_cgroup_lruvec(memcg, NODE_DATA(nid));
-       nr_zswap_protected = &lruvec->zswap_lruvec_state.nr_zswap_protected;
-       old = atomic_long_inc_return(nr_zswap_protected);
-       /*
-        * Decay to avoid overflow and adapt to changing workloads.
-        * This is based on LRU reclaim cost decaying heuristics.
-        */
-       do {
-               new = old > lru_size / 4 ? old / 2 : old;
-       } while (!atomic_long_try_cmpxchg(nr_zswap_protected, &old, new));
-       rcu_read_unlock();
+       return __zswap_pool_current();
 }
 
-static void zswap_lru_del(struct list_lru *list_lru, struct zswap_entry *entry)
+static struct zswap_pool *zswap_pool_current_get(void)
 {
-       int nid = entry_to_nid(entry);
-       struct mem_cgroup *memcg;
+       struct zswap_pool *pool;
 
        rcu_read_lock();
-       memcg = mem_cgroup_from_entry(entry);
-       /* will always succeed */
-       list_lru_del(list_lru, &entry->lru, nid, memcg);
+
+       pool = __zswap_pool_current();
+       if (!zswap_pool_get(pool))
+               pool = NULL;
+
        rcu_read_unlock();
+
+       return pool;
 }
 
-static void zswap_lru_putback(struct list_lru *list_lru,
-               struct zswap_entry *entry)
+/* type and compressor must be null-terminated */
+static struct zswap_pool *zswap_pool_find_get(char *type, char *compressor)
 {
-       int nid = entry_to_nid(entry);
-       spinlock_t *lock = &list_lru->node[nid].lock;
-       struct mem_cgroup *memcg;
-       struct lruvec *lruvec;
+       struct zswap_pool *pool;
 
-       rcu_read_lock();
-       memcg = mem_cgroup_from_entry(entry);
-       spin_lock(lock);
-       /* we cannot use list_lru_add here, because it increments node's lru count */
-       list_lru_putback(list_lru, &entry->lru, nid, memcg);
-       spin_unlock(lock);
+       assert_spin_locked(&zswap_pools_lock);
 
-       lruvec = mem_cgroup_lruvec(memcg, NODE_DATA(entry_to_nid(entry)));
-       /* increment the protection area to account for the LRU rotation. */
-       atomic_long_inc(&lruvec->zswap_lruvec_state.nr_zswap_protected);
-       rcu_read_unlock();
+       list_for_each_entry_rcu(pool, &zswap_pools, list) {
+               if (strcmp(pool->tfm_name, compressor))
+                       continue;
+               /* all zpools share the same type */
+               if (strcmp(zpool_get_type(pool->zpools[0]), type))
+                       continue;
+               /* if we can't get it, it's about to be destroyed */
+               if (!zswap_pool_get(pool))
+                       continue;
+               return pool;
+       }
+
+       return NULL;
 }
 
 /*********************************
-* rbtree functions
+* param callbacks
 **********************************/
-static struct zswap_entry *zswap_rb_search(struct rb_root *root, pgoff_t offset)
-{
-       struct rb_node *node = root->rb_node;
-       struct zswap_entry *entry;
-       pgoff_t entry_offset;
 
-       while (node) {
-               entry = rb_entry(node, struct zswap_entry, rbnode);
-               entry_offset = swp_offset(entry->swpentry);
-               if (entry_offset > offset)
-                       node = node->rb_left;
-               else if (entry_offset < offset)
-                       node = node->rb_right;
-               else
-                       return entry;
-       }
-       return NULL;
+static bool zswap_pool_changed(const char *s, const struct kernel_param *kp)
+{
+       /* no change required */
+       if (!strcmp(s, *(char **)kp->arg) && zswap_has_pool)
+               return false;
+       return true;
 }
 
-/*
- * In the case that a entry with the same offset is found, a pointer to
- * the existing entry is stored in dupentry and the function returns -EEXIST
- */
-static int zswap_rb_insert(struct rb_root *root, struct zswap_entry *entry,
-                       struct zswap_entry **dupentry)
+/* val must be a null-terminated string */
+static int __zswap_param_set(const char *val, const struct kernel_param *kp,
+                            char *type, char *compressor)
 {
-       struct rb_node **link = &root->rb_node, *parent = NULL;
-       struct zswap_entry *myentry;
-       pgoff_t myentry_offset, entry_offset = swp_offset(entry->swpentry);
+       struct zswap_pool *pool, *put_pool = NULL;
+       char *s = strstrip((char *)val);
+       int ret = 0;
+       bool new_pool = false;
 
-       while (*link) {
-               parent = *link;
-               myentry = rb_entry(parent, struct zswap_entry, rbnode);
-               myentry_offset = swp_offset(myentry->swpentry);
-               if (myentry_offset > entry_offset)
-                       link = &(*link)->rb_left;
-               else if (myentry_offset < entry_offset)
-                       link = &(*link)->rb_right;
-               else {
-                       *dupentry = myentry;
-                       return -EEXIST;
-               }
+       mutex_lock(&zswap_init_lock);
+       switch (zswap_init_state) {
+       case ZSWAP_UNINIT:
+               /* if this is load-time (pre-init) param setting,
+                * don't create a pool; that's done during init.
+                */
+               ret = param_set_charp(s, kp);
+               break;
+       case ZSWAP_INIT_SUCCEED:
+               new_pool = zswap_pool_changed(s, kp);
+               break;
+       case ZSWAP_INIT_FAILED:
+               pr_err("can't set param, initialization failed\n");
+               ret = -ENODEV;
        }
-       rb_link_node(&entry->rbnode, parent, link);
-       rb_insert_color(&entry->rbnode, root);
-       return 0;
-}
+       mutex_unlock(&zswap_init_lock);
 
-static bool zswap_rb_erase(struct rb_root *root, struct zswap_entry *entry)
-{
-       if (!RB_EMPTY_NODE(&entry->rbnode)) {
-               rb_erase(&entry->rbnode, root);
-               RB_CLEAR_NODE(&entry->rbnode);
-               return true;
+       /* no need to create a new pool, return directly */
+       if (!new_pool)
+               return ret;
+
+       if (!type) {
+               if (!zpool_has_pool(s)) {
+                       pr_err("zpool %s not available\n", s);
+                       return -ENOENT;
+               }
+               type = s;
+       } else if (!compressor) {
+               if (!crypto_has_acomp(s, 0, 0)) {
+                       pr_err("compressor %s not available\n", s);
+                       return -ENOENT;
+               }
+               compressor = s;
+       } else {
+               WARN_ON(1);
+               return -EINVAL;
        }
-       return false;
-}
 
-static struct zpool *zswap_find_zpool(struct zswap_entry *entry)
-{
-       int i = 0;
+       spin_lock_bh(&zswap_pools_lock);
 
-       if (ZSWAP_NR_ZPOOLS > 1)
-               i = hash_ptr(entry, ilog2(ZSWAP_NR_ZPOOLS));
+       pool = zswap_pool_find_get(type, compressor);
+       if (pool) {
+               zswap_pool_debug("using existing", pool);
+               WARN_ON(pool == zswap_pool_current());
+               list_del_rcu(&pool->list);
+       }
 
-       return entry->pool->zpools[i];
-}
+       spin_unlock_bh(&zswap_pools_lock);
 
-/*
- * Carries out the common pattern of freeing and entry's zpool allocation,
- * freeing the entry itself, and decrementing the number of stored pages.
- */
-static void zswap_free_entry(struct zswap_entry *entry)
-{
-       if (!entry->length)
-               atomic_dec(&zswap_same_filled_pages);
+       if (!pool)
+               pool = zswap_pool_create(type, compressor);
        else {
-               zswap_lru_del(&entry->pool->list_lru, entry);
-               zpool_free(zswap_find_zpool(entry), entry->handle);
-               atomic_dec(&entry->pool->nr_stored);
-               zswap_pool_put(entry->pool);
+               /*
+                * Restore the initial ref dropped by percpu_ref_kill()
+                * when the pool was decommissioned and switch it again
+                * to percpu mode.
+                */
+               percpu_ref_resurrect(&pool->ref);
+
+               /* Drop the ref from zswap_pool_find_get(). */
+               zswap_pool_put(pool);
        }
-       if (entry->objcg) {
-               obj_cgroup_uncharge_zswap(entry->objcg, entry->length);
-               obj_cgroup_put(entry->objcg);
+
+       if (pool)
+               ret = param_set_charp(s, kp);
+       else
+               ret = -EINVAL;
+
+       spin_lock_bh(&zswap_pools_lock);
+
+       if (!ret) {
+               put_pool = zswap_pool_current();
+               list_add_rcu(&pool->list, &zswap_pools);
+               zswap_has_pool = true;
+       } else if (pool) {
+               /* add the possibly pre-existing pool to the end of the pools
+                * list; if it's new (and empty) then it'll be removed and
+                * destroyed by the put after we drop the lock
+                */
+               list_add_tail_rcu(&pool->list, &zswap_pools);
+               put_pool = pool;
        }
-       zswap_entry_cache_free(entry);
-       atomic_dec(&zswap_stored_pages);
-       zswap_update_total_size();
+
+       spin_unlock_bh(&zswap_pools_lock);
+
+       if (!zswap_has_pool && !pool) {
+               /* if initial pool creation failed, and this pool creation also
+                * failed, maybe both compressor and zpool params were bad.
+                * Allow changing this param, so pool creation will succeed
+                * when the other param is changed. We already verified this
+                * param is ok in the zpool_has_pool() or crypto_has_acomp()
+                * checks above.
+                */
+               ret = param_set_charp(s, kp);
+       }
+
+       /* drop the ref from either the old current pool,
+        * or the new pool we failed to add
+        */
+       if (put_pool)
+               percpu_ref_kill(&put_pool->ref);
+
+       return ret;
 }
 
-/* caller must hold the tree lock */
-static void zswap_entry_get(struct zswap_entry *entry)
+static int zswap_compressor_param_set(const char *val,
+                                     const struct kernel_param *kp)
 {
-       entry->refcount++;
+       return __zswap_param_set(val, kp, zswap_zpool_type, NULL);
 }
 
-/* caller must hold the tree lock
-* remove from the tree and free it, if nobody reference the entry
-*/
-static void zswap_entry_put(struct zswap_tree *tree,
-                       struct zswap_entry *entry)
+static int zswap_zpool_param_set(const char *val,
+                                const struct kernel_param *kp)
 {
-       int refcount = --entry->refcount;
-
-       WARN_ON_ONCE(refcount < 0);
-       if (refcount == 0) {
-               WARN_ON_ONCE(!RB_EMPTY_NODE(&entry->rbnode));
-               zswap_free_entry(entry);
-       }
+       return __zswap_param_set(val, kp, NULL, zswap_compressor);
 }
 
-/* caller must hold the tree lock */
-static struct zswap_entry *zswap_entry_find_get(struct rb_root *root,
-                               pgoff_t offset)
+static int zswap_enabled_param_set(const char *val,
+                                  const struct kernel_param *kp)
 {
-       struct zswap_entry *entry;
+       int ret = -ENODEV;
 
-       entry = zswap_rb_search(root, offset);
-       if (entry)
-               zswap_entry_get(entry);
+       /* if this is load-time (pre-init) param setting, only set param. */
+       if (system_state != SYSTEM_RUNNING)
+               return param_set_bool(val, kp);
 
-       return entry;
+       mutex_lock(&zswap_init_lock);
+       switch (zswap_init_state) {
+       case ZSWAP_UNINIT:
+               if (zswap_setup())
+                       break;
+               fallthrough;
+       case ZSWAP_INIT_SUCCEED:
+               if (!zswap_has_pool)
+                       pr_err("can't enable, no pool configured\n");
+               else
+                       ret = param_set_bool(val, kp);
+               break;
+       case ZSWAP_INIT_FAILED:
+               pr_err("can't enable, initialization failed\n");
+       }
+       mutex_unlock(&zswap_init_lock);
+
+       return ret;
 }
 
 /*********************************
-* shrinker functions
+* lru functions
 **********************************/
-static enum lru_status shrink_memcg_cb(struct list_head *item, struct list_lru_one *l,
-                                      spinlock_t *lock, void *arg);
 
-static unsigned long zswap_shrinker_scan(struct shrinker *shrinker,
-               struct shrink_control *sc)
+/* should be called under RCU */
+#ifdef CONFIG_MEMCG
+static inline struct mem_cgroup *mem_cgroup_from_entry(struct zswap_entry *entry)
 {
-       struct lruvec *lruvec = mem_cgroup_lruvec(sc->memcg, NODE_DATA(sc->nid));
-       unsigned long shrink_ret, nr_protected, lru_size;
-       struct zswap_pool *pool = shrinker->private_data;
-       bool encountered_page_in_swapcache = false;
-
-       if (!zswap_shrinker_enabled ||
-                       !mem_cgroup_zswap_writeback_enabled(sc->memcg)) {
-               sc->nr_scanned = 0;
-               return SHRINK_STOP;
-       }
+       return entry->objcg ? obj_cgroup_memcg(entry->objcg) : NULL;
+}
+#else
+static inline struct mem_cgroup *mem_cgroup_from_entry(struct zswap_entry *entry)
+{
+       return NULL;
+}
+#endif
 
-       nr_protected =
-               atomic_long_read(&lruvec->zswap_lruvec_state.nr_zswap_protected);
-       lru_size = list_lru_shrink_count(&pool->list_lru, sc);
+static inline int entry_to_nid(struct zswap_entry *entry)
+{
+       return page_to_nid(virt_to_page(entry));
+}
 
-       /*
-        * Abort if we are shrinking into the protected region.
-        *
-        * This short-circuiting is necessary because if we have too many multiple
-        * concurrent reclaimers getting the freeable zswap object counts at the
-        * same time (before any of them made reasonable progress), the total
-        * number of reclaimed objects might be more than the number of unprotected
-        * objects (i.e the reclaimers will reclaim into the protected area of the
-        * zswap LRU).
+static void zswap_lru_add(struct list_lru *list_lru, struct zswap_entry *entry)
+{
+       atomic_long_t *nr_zswap_protected;
+       unsigned long lru_size, old, new;
+       int nid = entry_to_nid(entry);
+       struct mem_cgroup *memcg;
+       struct lruvec *lruvec;
+
+       /*
+        * Note that it is safe to use rcu_read_lock() here, even in the face of
+        * concurrent memcg offlining. Thanks to the memcg->kmemcg_id indirection
+        * used in list_lru lookup, only two scenarios are possible:
+        *
+        * 1. list_lru_add() is called before memcg->kmemcg_id is updated. The
+        *    new entry will be reparented to memcg's parent's list_lru.
+        * 2. list_lru_add() is called after memcg->kmemcg_id is updated. The
+        *    new entry will be added directly to memcg's parent's list_lru.
+        *
+        * Similar reasoning holds for list_lru_del().
         */
-       if (nr_protected >= lru_size - sc->nr_to_scan) {
-               sc->nr_scanned = 0;
-               return SHRINK_STOP;
+       rcu_read_lock();
+       memcg = mem_cgroup_from_entry(entry);
+       /* will always succeed */
+       list_lru_add(list_lru, &entry->lru, nid, memcg);
+
+       /* Update the protection area */
+       lru_size = list_lru_count_one(list_lru, nid, memcg);
+       lruvec = mem_cgroup_lruvec(memcg, NODE_DATA(nid));
+       nr_zswap_protected = &lruvec->zswap_lruvec_state.nr_zswap_protected;
+       old = atomic_long_inc_return(nr_zswap_protected);
+       /*
+        * Decay to avoid overflow and adapt to changing workloads.
+        * This is based on LRU reclaim cost decaying heuristics.
+        */
+       do {
+               new = old > lru_size / 4 ? old / 2 : old;
+       } while (!atomic_long_try_cmpxchg(nr_zswap_protected, &old, new));
+       rcu_read_unlock();
+}
+
+static void zswap_lru_del(struct list_lru *list_lru, struct zswap_entry *entry)
+{
+       int nid = entry_to_nid(entry);
+       struct mem_cgroup *memcg;
+
+       rcu_read_lock();
+       memcg = mem_cgroup_from_entry(entry);
+       /* will always succeed */
+       list_lru_del(list_lru, &entry->lru, nid, memcg);
+       rcu_read_unlock();
+}
+
+void zswap_lruvec_state_init(struct lruvec *lruvec)
+{
+       atomic_long_set(&lruvec->zswap_lruvec_state.nr_zswap_protected, 0);
+}
+
+void zswap_folio_swapin(struct folio *folio)
+{
+       struct lruvec *lruvec;
+
+       if (folio) {
+               lruvec = folio_lruvec(folio);
+               atomic_long_inc(&lruvec->zswap_lruvec_state.nr_zswap_protected);
        }
+}
 
-       shrink_ret = list_lru_shrink_walk(&pool->list_lru, sc, &shrink_memcg_cb,
-               &encountered_page_in_swapcache);
+void zswap_memcg_offline_cleanup(struct mem_cgroup *memcg)
+{
+       /* lock out zswap shrinker walking memcg tree */
+       spin_lock(&zswap_shrink_lock);
+       if (zswap_next_shrink == memcg)
+               zswap_next_shrink = mem_cgroup_iter(NULL, zswap_next_shrink, NULL);
+       spin_unlock(&zswap_shrink_lock);
+}
 
-       if (encountered_page_in_swapcache)
-               return SHRINK_STOP;
+/*********************************
+* rbtree functions
+**********************************/
+static struct zswap_entry *zswap_rb_search(struct rb_root *root, pgoff_t offset)
+{
+       struct rb_node *node = root->rb_node;
+       struct zswap_entry *entry;
+       pgoff_t entry_offset;
 
-       return shrink_ret ? shrink_ret : SHRINK_STOP;
+       while (node) {
+               entry = rb_entry(node, struct zswap_entry, rbnode);
+               entry_offset = swp_offset(entry->swpentry);
+               if (entry_offset > offset)
+                       node = node->rb_left;
+               else if (entry_offset < offset)
+                       node = node->rb_right;
+               else
+                       return entry;
+       }
+       return NULL;
 }
 
-static unsigned long zswap_shrinker_count(struct shrinker *shrinker,
-               struct shrink_control *sc)
+/*
+ * In the case that a entry with the same offset is found, a pointer to
+ * the existing entry is stored in dupentry and the function returns -EEXIST
+ */
+static int zswap_rb_insert(struct rb_root *root, struct zswap_entry *entry,
+                       struct zswap_entry **dupentry)
 {
-       struct zswap_pool *pool = shrinker->private_data;
-       struct mem_cgroup *memcg = sc->memcg;
-       struct lruvec *lruvec = mem_cgroup_lruvec(memcg, NODE_DATA(sc->nid));
-       unsigned long nr_backing, nr_stored, nr_freeable, nr_protected;
+       struct rb_node **link = &root->rb_node, *parent = NULL;
+       struct zswap_entry *myentry;
+       pgoff_t myentry_offset, entry_offset = swp_offset(entry->swpentry);
 
-       if (!zswap_shrinker_enabled || !mem_cgroup_zswap_writeback_enabled(memcg))
-               return 0;
+       while (*link) {
+               parent = *link;
+               myentry = rb_entry(parent, struct zswap_entry, rbnode);
+               myentry_offset = swp_offset(myentry->swpentry);
+               if (myentry_offset > entry_offset)
+                       link = &(*link)->rb_left;
+               else if (myentry_offset < entry_offset)
+                       link = &(*link)->rb_right;
+               else {
+                       *dupentry = myentry;
+                       return -EEXIST;
+               }
+       }
+       rb_link_node(&entry->rbnode, parent, link);
+       rb_insert_color(&entry->rbnode, root);
+       return 0;
+}
 
-#ifdef CONFIG_MEMCG_KMEM
-       mem_cgroup_flush_stats(memcg);
-       nr_backing = memcg_page_state(memcg, MEMCG_ZSWAP_B) >> PAGE_SHIFT;
-       nr_stored = memcg_page_state(memcg, MEMCG_ZSWAPPED);
-#else
-       /* use pool stats instead of memcg stats */
-       nr_backing = get_zswap_pool_size(pool) >> PAGE_SHIFT;
-       nr_stored = atomic_read(&pool->nr_stored);
-#endif
+static void zswap_rb_erase(struct rb_root *root, struct zswap_entry *entry)
+{
+       rb_erase(&entry->rbnode, root);
+       RB_CLEAR_NODE(&entry->rbnode);
+}
 
-       if (!nr_stored)
-               return 0;
+/*********************************
+* zswap entry functions
+**********************************/
+static struct kmem_cache *zswap_entry_cache;
 
-       nr_protected =
-               atomic_long_read(&lruvec->zswap_lruvec_state.nr_zswap_protected);
-       nr_freeable = list_lru_shrink_count(&pool->list_lru, sc);
-       /*
-        * Subtract the lru size by an estimate of the number of pages
-        * that should be protected.
-        */
-       nr_freeable = nr_freeable > nr_protected ? nr_freeable - nr_protected : 0;
+static struct zswap_entry *zswap_entry_cache_alloc(gfp_t gfp, int nid)
+{
+       struct zswap_entry *entry;
+       entry = kmem_cache_alloc_node(zswap_entry_cache, gfp, nid);
+       if (!entry)
+               return NULL;
+       RB_CLEAR_NODE(&entry->rbnode);
+       return entry;
+}
 
-       /*
-        * Scale the number of freeable pages by the memory saving factor.
-        * This ensures that the better zswap compresses memory, the fewer
-        * pages we will evict to swap (as it will otherwise incur IO for
-        * relatively small memory saving).
-        */
-       return mult_frac(nr_freeable, nr_backing, nr_stored);
+static void zswap_entry_cache_free(struct zswap_entry *entry)
+{
+       kmem_cache_free(zswap_entry_cache, entry);
 }
 
-static void zswap_alloc_shrinker(struct zswap_pool *pool)
+static struct zpool *zswap_find_zpool(struct zswap_entry *entry)
 {
-       pool->shrinker =
-               shrinker_alloc(SHRINKER_NUMA_AWARE | SHRINKER_MEMCG_AWARE, "mm-zswap");
-       if (!pool->shrinker)
-               return;
+       int i = 0;
+
+       if (ZSWAP_NR_ZPOOLS > 1)
+               i = hash_ptr(entry, ilog2(ZSWAP_NR_ZPOOLS));
+
+       return entry->pool->zpools[i];
+}
+
+/*
+ * Carries out the common pattern of freeing and entry's zpool allocation,
+ * freeing the entry itself, and decrementing the number of stored pages.
+ */
+static void zswap_entry_free(struct zswap_entry *entry)
+{
+       if (!entry->length)
+               atomic_dec(&zswap_same_filled_pages);
+       else {
+               zswap_lru_del(&zswap_list_lru, entry);
+               zpool_free(zswap_find_zpool(entry), entry->handle);
+               atomic_dec(&zswap_nr_stored);
+               zswap_pool_put(entry->pool);
+       }
+       if (entry->objcg) {
+               obj_cgroup_uncharge_zswap(entry->objcg, entry->length);
+               obj_cgroup_put(entry->objcg);
+       }
+       zswap_entry_cache_free(entry);
+       atomic_dec(&zswap_stored_pages);
+       zswap_update_total_size();
+}
 
-       pool->shrinker->private_data = pool;
-       pool->shrinker->scan_objects = zswap_shrinker_scan;
-       pool->shrinker->count_objects = zswap_shrinker_count;
-       pool->shrinker->batch = 0;
-       pool->shrinker->seeks = DEFAULT_SEEKS;
+/*
+ * The caller hold the tree lock and search the entry from the tree,
+ * so it must be on the tree, remove it from the tree and free it.
+ */
+static void zswap_invalidate_entry(struct zswap_tree *tree,
+                                  struct zswap_entry *entry)
+{
+       zswap_rb_erase(&tree->rbroot, entry);
+       zswap_entry_free(entry);
 }
 
 /*********************************
-* per-cpu code
+* compressed storage functions
 **********************************/
 static int zswap_cpu_comp_prepare(unsigned int cpu, struct hlist_node *node)
 {
@@ -715,6 +952,7 @@ static int zswap_cpu_comp_prepare(unsigned int cpu, struct hlist_node *node)
                goto acomp_fail;
        }
        acomp_ctx->acomp = acomp;
+       acomp_ctx->is_sleepable = acomp_is_async(acomp);
 
        req = acomp_request_alloc(acomp_ctx->acomp);
        if (!req) {
@@ -759,134 +997,252 @@ static int zswap_cpu_comp_dead(unsigned int cpu, struct hlist_node *node)
        return 0;
 }
 
-/*********************************
-* pool functions
-**********************************/
-
-static struct zswap_pool *__zswap_pool_current(void)
+static bool zswap_compress(struct folio *folio, struct zswap_entry *entry)
 {
-       struct zswap_pool *pool;
+       struct crypto_acomp_ctx *acomp_ctx;
+       struct scatterlist input, output;
+       int comp_ret = 0, alloc_ret = 0;
+       unsigned int dlen = PAGE_SIZE;
+       unsigned long handle;
+       struct zpool *zpool;
+       char *buf;
+       gfp_t gfp;
+       u8 *dst;
 
-       pool = list_first_or_null_rcu(&zswap_pools, typeof(*pool), list);
-       WARN_ONCE(!pool && zswap_has_pool,
-                 "%s: no page storage pool!\n", __func__);
+       acomp_ctx = raw_cpu_ptr(entry->pool->acomp_ctx);
 
-       return pool;
-}
+       mutex_lock(&acomp_ctx->mutex);
 
-static struct zswap_pool *zswap_pool_current(void)
-{
-       assert_spin_locked(&zswap_pools_lock);
+       dst = acomp_ctx->buffer;
+       sg_init_table(&input, 1);
+       sg_set_page(&input, &folio->page, PAGE_SIZE, 0);
 
-       return __zswap_pool_current();
-}
+       /*
+        * We need PAGE_SIZE * 2 here since there maybe over-compression case,
+        * and hardware-accelerators may won't check the dst buffer size, so
+        * giving the dst buffer with enough length to avoid buffer overflow.
+        */
+       sg_init_one(&output, dst, PAGE_SIZE * 2);
+       acomp_request_set_params(acomp_ctx->req, &input, &output, PAGE_SIZE, dlen);
 
-static struct zswap_pool *zswap_pool_current_get(void)
-{
-       struct zswap_pool *pool;
+       /*
+        * it maybe looks a little bit silly that we send an asynchronous request,
+        * then wait for its completion synchronously. This makes the process look
+        * synchronous in fact.
+        * Theoretically, acomp supports users send multiple acomp requests in one
+        * acomp instance, then get those requests done simultaneously. but in this
+        * case, zswap actually does store and load page by page, there is no
+        * existing method to send the second page before the first page is done
+        * in one thread doing zwap.
+        * but in different threads running on different cpu, we have different
+        * acomp instance, so multiple threads can do (de)compression in parallel.
+        */
+       comp_ret = crypto_wait_req(crypto_acomp_compress(acomp_ctx->req), &acomp_ctx->wait);
+       dlen = acomp_ctx->req->dlen;
+       if (comp_ret)
+               goto unlock;
 
-       rcu_read_lock();
+       zpool = zswap_find_zpool(entry);
+       gfp = __GFP_NORETRY | __GFP_NOWARN | __GFP_KSWAPD_RECLAIM;
+       if (zpool_malloc_support_movable(zpool))
+               gfp |= __GFP_HIGHMEM | __GFP_MOVABLE;
+       alloc_ret = zpool_malloc(zpool, dlen, gfp, &handle);
+       if (alloc_ret)
+               goto unlock;
 
-       pool = __zswap_pool_current();
-       if (!zswap_pool_get(pool))
-               pool = NULL;
+       buf = zpool_map_handle(zpool, handle, ZPOOL_MM_WO);
+       memcpy(buf, dst, dlen);
+       zpool_unmap_handle(zpool, handle);
 
-       rcu_read_unlock();
+       entry->handle = handle;
+       entry->length = dlen;
 
-       return pool;
+unlock:
+       if (comp_ret == -ENOSPC || alloc_ret == -ENOSPC)
+               zswap_reject_compress_poor++;
+       else if (comp_ret)
+               zswap_reject_compress_fail++;
+       else if (alloc_ret)
+               zswap_reject_alloc_fail++;
+
+       mutex_unlock(&acomp_ctx->mutex);
+       return comp_ret == 0 && alloc_ret == 0;
 }
 
-static struct zswap_pool *zswap_pool_last_get(void)
+static void zswap_decompress(struct zswap_entry *entry, struct page *page)
 {
-       struct zswap_pool *pool, *last = NULL;
+       struct zpool *zpool = zswap_find_zpool(entry);
+       struct scatterlist input, output;
+       struct crypto_acomp_ctx *acomp_ctx;
+       u8 *src;
 
-       rcu_read_lock();
+       acomp_ctx = raw_cpu_ptr(entry->pool->acomp_ctx);
+       mutex_lock(&acomp_ctx->mutex);
 
-       list_for_each_entry_rcu(pool, &zswap_pools, list)
-               last = pool;
-       WARN_ONCE(!last && zswap_has_pool,
-                 "%s: no page storage pool!\n", __func__);
-       if (!zswap_pool_get(last))
-               last = NULL;
+       src = zpool_map_handle(zpool, entry->handle, ZPOOL_MM_RO);
+       if (acomp_ctx->is_sleepable && !zpool_can_sleep_mapped(zpool)) {
+               memcpy(acomp_ctx->buffer, src, entry->length);
+               src = acomp_ctx->buffer;
+               zpool_unmap_handle(zpool, entry->handle);
+       }
 
-       rcu_read_unlock();
+       sg_init_one(&input, src, entry->length);
+       sg_init_table(&output, 1);
+       sg_set_page(&output, page, PAGE_SIZE, 0);
+       acomp_request_set_params(acomp_ctx->req, &input, &output, entry->length, PAGE_SIZE);
+       BUG_ON(crypto_wait_req(crypto_acomp_decompress(acomp_ctx->req), &acomp_ctx->wait));
+       BUG_ON(acomp_ctx->req->dlen != PAGE_SIZE);
+       mutex_unlock(&acomp_ctx->mutex);
 
-       return last;
+       if (!acomp_ctx->is_sleepable || zpool_can_sleep_mapped(zpool))
+               zpool_unmap_handle(zpool, entry->handle);
 }
 
-/* type and compressor must be null-terminated */
-static struct zswap_pool *zswap_pool_find_get(char *type, char *compressor)
+/*********************************
+* writeback code
+**********************************/
+/*
+ * Attempts to free an entry by adding a folio to the swap cache,
+ * decompressing the entry data into the folio, and issuing a
+ * bio write to write the folio back to the swap device.
+ *
+ * This can be thought of as a "resumed writeback" of the folio
+ * to the swap device.  We are basically resuming the same swap
+ * writeback path that was intercepted with the zswap_store()
+ * in the first place.  After the folio has been decompressed into
+ * the swap cache, the compressed version stored by zswap can be
+ * freed.
+ */
+static int zswap_writeback_entry(struct zswap_entry *entry,
+                                swp_entry_t swpentry)
 {
-       struct zswap_pool *pool;
+       struct zswap_tree *tree;
+       struct folio *folio;
+       struct mempolicy *mpol;
+       bool folio_was_allocated;
+       struct writeback_control wbc = {
+               .sync_mode = WB_SYNC_NONE,
+       };
 
-       assert_spin_locked(&zswap_pools_lock);
+       /* try to allocate swap cache folio */
+       mpol = get_task_policy(current);
+       folio = __read_swap_cache_async(swpentry, GFP_KERNEL, mpol,
+                               NO_INTERLEAVE_INDEX, &folio_was_allocated, true);
+       if (!folio)
+               return -ENOMEM;
 
-       list_for_each_entry_rcu(pool, &zswap_pools, list) {
-               if (strcmp(pool->tfm_name, compressor))
-                       continue;
-               /* all zpools share the same type */
-               if (strcmp(zpool_get_type(pool->zpools[0]), type))
-                       continue;
-               /* if we can't get it, it's about to be destroyed */
-               if (!zswap_pool_get(pool))
-                       continue;
-               return pool;
+       /*
+        * Found an existing folio, we raced with swapin or concurrent
+        * shrinker. We generally writeback cold folios from zswap, and
+        * swapin means the folio just became hot, so skip this folio.
+        * For unlikely concurrent shrinker case, it will be unlinked
+        * and freed when invalidated by the concurrent shrinker anyway.
+        */
+       if (!folio_was_allocated) {
+               folio_put(folio);
+               return -EEXIST;
        }
 
-       return NULL;
-}
+       /*
+        * folio is locked, and the swapcache is now secured against
+        * concurrent swapping to and from the slot, and concurrent
+        * swapoff so we can safely dereference the zswap tree here.
+        * Verify that the swap entry hasn't been invalidated and recycled
+        * behind our backs, to avoid overwriting a new swap folio with
+        * old compressed data. Only when this is successful can the entry
+        * be dereferenced.
+        */
+       tree = swap_zswap_tree(swpentry);
+       spin_lock(&tree->lock);
+       if (zswap_rb_search(&tree->rbroot, swp_offset(swpentry)) != entry) {
+               spin_unlock(&tree->lock);
+               delete_from_swap_cache(folio);
+               folio_unlock(folio);
+               folio_put(folio);
+               return -ENOMEM;
+       }
 
-/*
- * If the entry is still valid in the tree, drop the initial ref and remove it
- * from the tree. This function must be called with an additional ref held,
- * otherwise it may race with another invalidation freeing the entry.
- */
-static void zswap_invalidate_entry(struct zswap_tree *tree,
-                                  struct zswap_entry *entry)
-{
-       if (zswap_rb_erase(&tree->rbroot, entry))
-               zswap_entry_put(tree, entry);
+       /* Safe to deref entry after the entry is verified above. */
+       zswap_rb_erase(&tree->rbroot, entry);
+       spin_unlock(&tree->lock);
+
+       zswap_decompress(entry, &folio->page);
+
+       count_vm_event(ZSWPWB);
+       if (entry->objcg)
+               count_objcg_event(entry->objcg, ZSWPWB);
+
+       zswap_entry_free(entry);
+
+       /* folio is up to date */
+       folio_mark_uptodate(folio);
+
+       /* move it to the tail of the inactive list after end_writeback */
+       folio_set_reclaim(folio);
+
+       /* start writeback */
+       __swap_writepage(folio, &wbc);
+       folio_put(folio);
+
+       return 0;
 }
 
+/*********************************
+* shrinker functions
+**********************************/
 static enum lru_status shrink_memcg_cb(struct list_head *item, struct list_lru_one *l,
                                       spinlock_t *lock, void *arg)
 {
        struct zswap_entry *entry = container_of(item, struct zswap_entry, lru);
        bool *encountered_page_in_swapcache = (bool *)arg;
-       struct zswap_tree *tree;
-       pgoff_t swpoffset;
+       swp_entry_t swpentry;
        enum lru_status ret = LRU_REMOVED_RETRY;
        int writeback_result;
 
+       /*
+        * As soon as we drop the LRU lock, the entry can be freed by
+        * a concurrent invalidation. This means the following:
+        *
+        * 1. We extract the swp_entry_t to the stack, allowing
+        *    zswap_writeback_entry() to pin the swap entry and
+        *    then validate the zwap entry against that swap entry's
+        *    tree using pointer value comparison. Only when that
+        *    is successful can the entry be dereferenced.
+        *
+        * 2. Usually, objects are taken off the LRU for reclaim. In
+        *    this case this isn't possible, because if reclaim fails
+        *    for whatever reason, we have no means of knowing if the
+        *    entry is alive to put it back on the LRU.
+        *
+        *    So rotate it before dropping the lock. If the entry is
+        *    written back or invalidated, the free path will unlink
+        *    it. For failures, rotation is the right thing as well.
+        *
+        *    Temporary failures, where the same entry should be tried
+        *    again immediately, almost never happen for this shrinker.
+        *    We don't do any trylocking; -ENOMEM comes closest,
+        *    but that's extremely rare and doesn't happen spuriously
+        *    either. Don't bother distinguishing this case.
+        */
+       list_move_tail(item, &l->list);
+
        /*
         * Once the lru lock is dropped, the entry might get freed. The
-        * swpoffset is copied to the stack, and entry isn't deref'd again
+        * swpentry is copied to the stack, and entry isn't deref'd again
         * until the entry is verified to still be alive in the tree.
         */
-       swpoffset = swp_offset(entry->swpentry);
-       tree = zswap_trees[swp_type(entry->swpentry)];
-       list_lru_isolate(l, item);
+       swpentry = entry->swpentry;
+
        /*
         * It's safe to drop the lock here because we return either
         * LRU_REMOVED_RETRY or LRU_RETRY.
         */
        spin_unlock(lock);
 
-       /* Check for invalidate() race */
-       spin_lock(&tree->lock);
-       if (entry != zswap_rb_search(&tree->rbroot, swpoffset))
-               goto unlock;
-
-       /* Hold a reference to prevent a free during writeback */
-       zswap_entry_get(entry);
-       spin_unlock(&tree->lock);
-
-       writeback_result = zswap_writeback_entry(entry, tree);
+       writeback_result = zswap_writeback_entry(entry, swpentry);
 
-       spin_lock(&tree->lock);
        if (writeback_result) {
                zswap_reject_reclaim_fail++;
-               zswap_lru_putback(&entry->pool->list_lru, entry);
                ret = LRU_RETRY;
 
                /*
@@ -894,570 +1250,194 @@ static enum lru_status shrink_memcg_cb(struct list_head *item, struct list_lru_o
                 * into the warmer region. We should terminate shrinking (if we're in the dynamic
                 * shrinker context).
                 */
-               if (writeback_result == -EEXIST && encountered_page_in_swapcache)
+               if (writeback_result == -EEXIST && encountered_page_in_swapcache) {
+                       ret = LRU_STOP;
                        *encountered_page_in_swapcache = true;
-
-               goto put_unlock;
+               }
+       } else {
+               zswap_written_back_pages++;
        }
-       zswap_written_back_pages++;
-
-       if (entry->objcg)
-               count_objcg_event(entry->objcg, ZSWPWB);
-
-       count_vm_event(ZSWPWB);
-       /*
-        * Writeback started successfully, the page now belongs to the
-        * swapcache. Drop the entry from zswap - unless invalidate already
-        * took it out while we had the tree->lock released for IO.
-        */
-       zswap_invalidate_entry(tree, entry);
 
-put_unlock:
-       /* Drop local reference */
-       zswap_entry_put(tree, entry);
-unlock:
-       spin_unlock(&tree->lock);
        spin_lock(lock);
        return ret;
 }
 
-static int shrink_memcg(struct mem_cgroup *memcg)
+static unsigned long zswap_shrinker_scan(struct shrinker *shrinker,
+               struct shrink_control *sc)
 {
-       struct zswap_pool *pool;
-       int nid, shrunk = 0;
+       struct lruvec *lruvec = mem_cgroup_lruvec(sc->memcg, NODE_DATA(sc->nid));
+       unsigned long shrink_ret, nr_protected, lru_size;
+       bool encountered_page_in_swapcache = false;
 
-       if (!mem_cgroup_zswap_writeback_enabled(memcg))
-               return -EINVAL;
+       if (!zswap_shrinker_enabled ||
+                       !mem_cgroup_zswap_writeback_enabled(sc->memcg)) {
+               sc->nr_scanned = 0;
+               return SHRINK_STOP;
+       }
+
+       nr_protected =
+               atomic_long_read(&lruvec->zswap_lruvec_state.nr_zswap_protected);
+       lru_size = list_lru_shrink_count(&zswap_list_lru, sc);
 
        /*
-        * Skip zombies because their LRUs are reparented and we would be
-        * reclaiming from the parent instead of the dead memcg.
+        * Abort if we are shrinking into the protected region.
+        *
+        * This short-circuiting is necessary because if we have too many multiple
+        * concurrent reclaimers getting the freeable zswap object counts at the
+        * same time (before any of them made reasonable progress), the total
+        * number of reclaimed objects might be more than the number of unprotected
+        * objects (i.e the reclaimers will reclaim into the protected area of the
+        * zswap LRU).
         */
-       if (memcg && !mem_cgroup_online(memcg))
-               return -ENOENT;
+       if (nr_protected >= lru_size - sc->nr_to_scan) {
+               sc->nr_scanned = 0;
+               return SHRINK_STOP;
+       }
 
-       pool = zswap_pool_current_get();
-       if (!pool)
-               return -EINVAL;
+       shrink_ret = list_lru_shrink_walk(&zswap_list_lru, sc, &shrink_memcg_cb,
+               &encountered_page_in_swapcache);
 
-       for_each_node_state(nid, N_NORMAL_MEMORY) {
-               unsigned long nr_to_walk = 1;
+       if (encountered_page_in_swapcache)
+               return SHRINK_STOP;
 
-               shrunk += list_lru_walk_one(&pool->list_lru, nid, memcg,
-                                           &shrink_memcg_cb, NULL, &nr_to_walk);
-       }
-       zswap_pool_put(pool);
-       return shrunk ? 0 : -EAGAIN;
+       return shrink_ret ? shrink_ret : SHRINK_STOP;
 }
 
-static void shrink_worker(struct work_struct *w)
+static unsigned long zswap_shrinker_count(struct shrinker *shrinker,
+               struct shrink_control *sc)
 {
-       struct zswap_pool *pool = container_of(w, typeof(*pool),
-                                               shrink_work);
-       struct mem_cgroup *memcg;
-       int ret, failures = 0;
-
-       /* global reclaim will select cgroup in a round-robin fashion. */
-       do {
-               spin_lock(&zswap_pools_lock);
-               pool->next_shrink = mem_cgroup_iter(NULL, pool->next_shrink, NULL);
-               memcg = pool->next_shrink;
-
-               /*
-                * We need to retry if we have gone through a full round trip, or if we
-                * got an offline memcg (or else we risk undoing the effect of the
-                * zswap memcg offlining cleanup callback). This is not catastrophic
-                * per se, but it will keep the now offlined memcg hostage for a while.
-                *
-                * Note that if we got an online memcg, we will keep the extra
-                * reference in case the original reference obtained by mem_cgroup_iter
-                * is dropped by the zswap memcg offlining callback, ensuring that the
-                * memcg is not killed when we are reclaiming.
-                */
-               if (!memcg) {
-                       spin_unlock(&zswap_pools_lock);
-                       if (++failures == MAX_RECLAIM_RETRIES)
-                               break;
-
-                       goto resched;
-               }
-
-               if (!mem_cgroup_tryget_online(memcg)) {
-                       /* drop the reference from mem_cgroup_iter() */
-                       mem_cgroup_iter_break(NULL, memcg);
-                       pool->next_shrink = NULL;
-                       spin_unlock(&zswap_pools_lock);
+       struct mem_cgroup *memcg = sc->memcg;
+       struct lruvec *lruvec = mem_cgroup_lruvec(memcg, NODE_DATA(sc->nid));
+       unsigned long nr_backing, nr_stored, nr_freeable, nr_protected;
 
-                       if (++failures == MAX_RECLAIM_RETRIES)
-                               break;
+       if (!zswap_shrinker_enabled || !mem_cgroup_zswap_writeback_enabled(memcg))
+               return 0;
 
-                       goto resched;
-               }
-               spin_unlock(&zswap_pools_lock);
+#ifdef CONFIG_MEMCG_KMEM
+       mem_cgroup_flush_stats(memcg);
+       nr_backing = memcg_page_state(memcg, MEMCG_ZSWAP_B) >> PAGE_SHIFT;
+       nr_stored = memcg_page_state(memcg, MEMCG_ZSWAPPED);
+#else
+       /* use pool stats instead of memcg stats */
+       nr_backing = zswap_pool_total_size >> PAGE_SHIFT;
+       nr_stored = atomic_read(&zswap_nr_stored);
+#endif
 
-               ret = shrink_memcg(memcg);
-               /* drop the extra reference */
-               mem_cgroup_put(memcg);
+       if (!nr_stored)
+               return 0;
 
-               if (ret == -EINVAL)
-                       break;
-               if (ret && ++failures == MAX_RECLAIM_RETRIES)
-                       break;
-
-resched:
-               cond_resched();
-       } while (!zswap_can_accept());
-       zswap_pool_put(pool);
-}
-
-static struct zswap_pool *zswap_pool_create(char *type, char *compressor)
-{
-       int i;
-       struct zswap_pool *pool;
-       char name[38]; /* 'zswap' + 32 char (max) num + \0 */
-       gfp_t gfp = __GFP_NORETRY | __GFP_NOWARN | __GFP_KSWAPD_RECLAIM;
-       int ret;
-
-       if (!zswap_has_pool) {
-               /* if either are unset, pool initialization failed, and we
-                * need both params to be set correctly before trying to
-                * create a pool.
-                */
-               if (!strcmp(type, ZSWAP_PARAM_UNSET))
-                       return NULL;
-               if (!strcmp(compressor, ZSWAP_PARAM_UNSET))
-                       return NULL;
-       }
-
-       pool = kzalloc(sizeof(*pool), GFP_KERNEL);
-       if (!pool)
-               return NULL;
-
-       for (i = 0; i < ZSWAP_NR_ZPOOLS; i++) {
-               /* unique name for each pool specifically required by zsmalloc */
-               snprintf(name, 38, "zswap%x",
-                        atomic_inc_return(&zswap_pools_count));
-
-               pool->zpools[i] = zpool_create_pool(type, name, gfp);
-               if (!pool->zpools[i]) {
-                       pr_err("%s zpool not available\n", type);
-                       goto error;
-               }
-       }
-       pr_debug("using %s zpool\n", zpool_get_type(pool->zpools[0]));
-
-       strscpy(pool->tfm_name, compressor, sizeof(pool->tfm_name));
-
-       pool->acomp_ctx = alloc_percpu(*pool->acomp_ctx);
-       if (!pool->acomp_ctx) {
-               pr_err("percpu alloc failed\n");
-               goto error;
-       }
-
-       ret = cpuhp_state_add_instance(CPUHP_MM_ZSWP_POOL_PREPARE,
-                                      &pool->node);
-       if (ret)
-               goto error;
-
-       zswap_alloc_shrinker(pool);
-       if (!pool->shrinker)
-               goto error;
-
-       pr_debug("using %s compressor\n", pool->tfm_name);
-
-       /* being the current pool takes 1 ref; this func expects the
-        * caller to always add the new pool as the current pool
-        */
-       kref_init(&pool->kref);
-       INIT_LIST_HEAD(&pool->list);
-       if (list_lru_init_memcg(&pool->list_lru, pool->shrinker))
-               goto lru_fail;
-       shrinker_register(pool->shrinker);
-       INIT_WORK(&pool->shrink_work, shrink_worker);
-       atomic_set(&pool->nr_stored, 0);
-
-       zswap_pool_debug("created", pool);
-
-       return pool;
-
-lru_fail:
-       list_lru_destroy(&pool->list_lru);
-       shrinker_free(pool->shrinker);
-error:
-       if (pool->acomp_ctx)
-               free_percpu(pool->acomp_ctx);
-       while (i--)
-               zpool_destroy_pool(pool->zpools[i]);
-       kfree(pool);
-       return NULL;
-}
-
-static struct zswap_pool *__zswap_pool_create_fallback(void)
-{
-       bool has_comp, has_zpool;
-
-       has_comp = crypto_has_acomp(zswap_compressor, 0, 0);
-       if (!has_comp && strcmp(zswap_compressor,
-                               CONFIG_ZSWAP_COMPRESSOR_DEFAULT)) {
-               pr_err("compressor %s not available, using default %s\n",
-                      zswap_compressor, CONFIG_ZSWAP_COMPRESSOR_DEFAULT);
-               param_free_charp(&zswap_compressor);
-               zswap_compressor = CONFIG_ZSWAP_COMPRESSOR_DEFAULT;
-               has_comp = crypto_has_acomp(zswap_compressor, 0, 0);
-       }
-       if (!has_comp) {
-               pr_err("default compressor %s not available\n",
-                      zswap_compressor);
-               param_free_charp(&zswap_compressor);
-               zswap_compressor = ZSWAP_PARAM_UNSET;
-       }
-
-       has_zpool = zpool_has_pool(zswap_zpool_type);
-       if (!has_zpool && strcmp(zswap_zpool_type,
-                                CONFIG_ZSWAP_ZPOOL_DEFAULT)) {
-               pr_err("zpool %s not available, using default %s\n",
-                      zswap_zpool_type, CONFIG_ZSWAP_ZPOOL_DEFAULT);
-               param_free_charp(&zswap_zpool_type);
-               zswap_zpool_type = CONFIG_ZSWAP_ZPOOL_DEFAULT;
-               has_zpool = zpool_has_pool(zswap_zpool_type);
-       }
-       if (!has_zpool) {
-               pr_err("default zpool %s not available\n",
-                      zswap_zpool_type);
-               param_free_charp(&zswap_zpool_type);
-               zswap_zpool_type = ZSWAP_PARAM_UNSET;
-       }
-
-       if (!has_comp || !has_zpool)
-               return NULL;
-
-       return zswap_pool_create(zswap_zpool_type, zswap_compressor);
-}
-
-static void zswap_pool_destroy(struct zswap_pool *pool)
-{
-       int i;
-
-       zswap_pool_debug("destroying", pool);
-
-       shrinker_free(pool->shrinker);
-       cpuhp_state_remove_instance(CPUHP_MM_ZSWP_POOL_PREPARE, &pool->node);
-       free_percpu(pool->acomp_ctx);
-       list_lru_destroy(&pool->list_lru);
-
-       spin_lock(&zswap_pools_lock);
-       mem_cgroup_iter_break(NULL, pool->next_shrink);
-       pool->next_shrink = NULL;
-       spin_unlock(&zswap_pools_lock);
-
-       for (i = 0; i < ZSWAP_NR_ZPOOLS; i++)
-               zpool_destroy_pool(pool->zpools[i]);
-       kfree(pool);
-}
-
-static int __must_check zswap_pool_get(struct zswap_pool *pool)
-{
-       if (!pool)
-               return 0;
-
-       return kref_get_unless_zero(&pool->kref);
-}
-
-static void __zswap_pool_release(struct work_struct *work)
-{
-       struct zswap_pool *pool = container_of(work, typeof(*pool),
-                                               release_work);
-
-       synchronize_rcu();
-
-       /* nobody should have been able to get a kref... */
-       WARN_ON(kref_get_unless_zero(&pool->kref));
-
-       /* pool is now off zswap_pools list and has no references. */
-       zswap_pool_destroy(pool);
-}
-
-static void __zswap_pool_empty(struct kref *kref)
-{
-       struct zswap_pool *pool;
-
-       pool = container_of(kref, typeof(*pool), kref);
-
-       spin_lock(&zswap_pools_lock);
-
-       WARN_ON(pool == zswap_pool_current());
-
-       list_del_rcu(&pool->list);
-
-       INIT_WORK(&pool->release_work, __zswap_pool_release);
-       schedule_work(&pool->release_work);
-
-       spin_unlock(&zswap_pools_lock);
-}
-
-static void zswap_pool_put(struct zswap_pool *pool)
-{
-       kref_put(&pool->kref, __zswap_pool_empty);
-}
-
-/*********************************
-* param callbacks
-**********************************/
-
-static bool zswap_pool_changed(const char *s, const struct kernel_param *kp)
-{
-       /* no change required */
-       if (!strcmp(s, *(char **)kp->arg) && zswap_has_pool)
-               return false;
-       return true;
-}
-
-/* val must be a null-terminated string */
-static int __zswap_param_set(const char *val, const struct kernel_param *kp,
-                            char *type, char *compressor)
-{
-       struct zswap_pool *pool, *put_pool = NULL;
-       char *s = strstrip((char *)val);
-       int ret = 0;
-       bool new_pool = false;
-
-       mutex_lock(&zswap_init_lock);
-       switch (zswap_init_state) {
-       case ZSWAP_UNINIT:
-               /* if this is load-time (pre-init) param setting,
-                * don't create a pool; that's done during init.
-                */
-               ret = param_set_charp(s, kp);
-               break;
-       case ZSWAP_INIT_SUCCEED:
-               new_pool = zswap_pool_changed(s, kp);
-               break;
-       case ZSWAP_INIT_FAILED:
-               pr_err("can't set param, initialization failed\n");
-               ret = -ENODEV;
-       }
-       mutex_unlock(&zswap_init_lock);
-
-       /* no need to create a new pool, return directly */
-       if (!new_pool)
-               return ret;
-
-       if (!type) {
-               if (!zpool_has_pool(s)) {
-                       pr_err("zpool %s not available\n", s);
-                       return -ENOENT;
-               }
-               type = s;
-       } else if (!compressor) {
-               if (!crypto_has_acomp(s, 0, 0)) {
-                       pr_err("compressor %s not available\n", s);
-                       return -ENOENT;
-               }
-               compressor = s;
-       } else {
-               WARN_ON(1);
-               return -EINVAL;
-       }
-
-       spin_lock(&zswap_pools_lock);
-
-       pool = zswap_pool_find_get(type, compressor);
-       if (pool) {
-               zswap_pool_debug("using existing", pool);
-               WARN_ON(pool == zswap_pool_current());
-               list_del_rcu(&pool->list);
-       }
-
-       spin_unlock(&zswap_pools_lock);
-
-       if (!pool)
-               pool = zswap_pool_create(type, compressor);
-
-       if (pool)
-               ret = param_set_charp(s, kp);
-       else
-               ret = -EINVAL;
-
-       spin_lock(&zswap_pools_lock);
-
-       if (!ret) {
-               put_pool = zswap_pool_current();
-               list_add_rcu(&pool->list, &zswap_pools);
-               zswap_has_pool = true;
-       } else if (pool) {
-               /* add the possibly pre-existing pool to the end of the pools
-                * list; if it's new (and empty) then it'll be removed and
-                * destroyed by the put after we drop the lock
-                */
-               list_add_tail_rcu(&pool->list, &zswap_pools);
-               put_pool = pool;
-       }
-
-       spin_unlock(&zswap_pools_lock);
-
-       if (!zswap_has_pool && !pool) {
-               /* if initial pool creation failed, and this pool creation also
-                * failed, maybe both compressor and zpool params were bad.
-                * Allow changing this param, so pool creation will succeed
-                * when the other param is changed. We already verified this
-                * param is ok in the zpool_has_pool() or crypto_has_acomp()
-                * checks above.
-                */
-               ret = param_set_charp(s, kp);
-       }
-
-       /* drop the ref from either the old current pool,
-        * or the new pool we failed to add
-        */
-       if (put_pool)
-               zswap_pool_put(put_pool);
-
-       return ret;
-}
-
-static int zswap_compressor_param_set(const char *val,
-                                     const struct kernel_param *kp)
-{
-       return __zswap_param_set(val, kp, zswap_zpool_type, NULL);
-}
-
-static int zswap_zpool_param_set(const char *val,
-                                const struct kernel_param *kp)
-{
-       return __zswap_param_set(val, kp, NULL, zswap_compressor);
-}
-
-static int zswap_enabled_param_set(const char *val,
-                                  const struct kernel_param *kp)
-{
-       int ret = -ENODEV;
-
-       /* if this is load-time (pre-init) param setting, only set param. */
-       if (system_state != SYSTEM_RUNNING)
-               return param_set_bool(val, kp);
-
-       mutex_lock(&zswap_init_lock);
-       switch (zswap_init_state) {
-       case ZSWAP_UNINIT:
-               if (zswap_setup())
-                       break;
-               fallthrough;
-       case ZSWAP_INIT_SUCCEED:
-               if (!zswap_has_pool)
-                       pr_err("can't enable, no pool configured\n");
-               else
-                       ret = param_set_bool(val, kp);
-               break;
-       case ZSWAP_INIT_FAILED:
-               pr_err("can't enable, initialization failed\n");
-       }
-       mutex_unlock(&zswap_init_lock);
+       nr_protected =
+               atomic_long_read(&lruvec->zswap_lruvec_state.nr_zswap_protected);
+       nr_freeable = list_lru_shrink_count(&zswap_list_lru, sc);
+       /*
+        * Subtract the lru size by an estimate of the number of pages
+        * that should be protected.
+        */
+       nr_freeable = nr_freeable > nr_protected ? nr_freeable - nr_protected : 0;
 
-       return ret;
+       /*
+        * Scale the number of freeable pages by the memory saving factor.
+        * This ensures that the better zswap compresses memory, the fewer
+        * pages we will evict to swap (as it will otherwise incur IO for
+        * relatively small memory saving).
+        */
+       return mult_frac(nr_freeable, nr_backing, nr_stored);
 }
 
-static void __zswap_load(struct zswap_entry *entry, struct page *page)
+static struct shrinker *zswap_alloc_shrinker(void)
 {
-       struct zpool *zpool = zswap_find_zpool(entry);
-       struct scatterlist input, output;
-       struct crypto_acomp_ctx *acomp_ctx;
-       u8 *src;
-
-       acomp_ctx = raw_cpu_ptr(entry->pool->acomp_ctx);
-       mutex_lock(&acomp_ctx->mutex);
-
-       src = zpool_map_handle(zpool, entry->handle, ZPOOL_MM_RO);
-       if (!zpool_can_sleep_mapped(zpool)) {
-               memcpy(acomp_ctx->buffer, src, entry->length);
-               src = acomp_ctx->buffer;
-               zpool_unmap_handle(zpool, entry->handle);
-       }
+       struct shrinker *shrinker;
 
-       sg_init_one(&input, src, entry->length);
-       sg_init_table(&output, 1);
-       sg_set_page(&output, page, PAGE_SIZE, 0);
-       acomp_request_set_params(acomp_ctx->req, &input, &output, entry->length, PAGE_SIZE);
-       BUG_ON(crypto_wait_req(crypto_acomp_decompress(acomp_ctx->req), &acomp_ctx->wait));
-       BUG_ON(acomp_ctx->req->dlen != PAGE_SIZE);
-       mutex_unlock(&acomp_ctx->mutex);
+       shrinker =
+               shrinker_alloc(SHRINKER_NUMA_AWARE | SHRINKER_MEMCG_AWARE, "mm-zswap");
+       if (!shrinker)
+               return NULL;
 
-       if (zpool_can_sleep_mapped(zpool))
-               zpool_unmap_handle(zpool, entry->handle);
+       shrinker->scan_objects = zswap_shrinker_scan;
+       shrinker->count_objects = zswap_shrinker_count;
+       shrinker->batch = 0;
+       shrinker->seeks = DEFAULT_SEEKS;
+       return shrinker;
 }
 
-/*********************************
-* writeback code
-**********************************/
-/*
- * Attempts to free an entry by adding a folio to the swap cache,
- * decompressing the entry data into the folio, and issuing a
- * bio write to write the folio back to the swap device.
- *
- * This can be thought of as a "resumed writeback" of the folio
- * to the swap device.  We are basically resuming the same swap
- * writeback path that was intercepted with the zswap_store()
- * in the first place.  After the folio has been decompressed into
- * the swap cache, the compressed version stored by zswap can be
- * freed.
- */
-static int zswap_writeback_entry(struct zswap_entry *entry,
-                                struct zswap_tree *tree)
+static int shrink_memcg(struct mem_cgroup *memcg)
 {
-       swp_entry_t swpentry = entry->swpentry;
-       struct folio *folio;
-       struct mempolicy *mpol;
-       bool folio_was_allocated;
-       struct writeback_control wbc = {
-               .sync_mode = WB_SYNC_NONE,
-       };
+       int nid, shrunk = 0;
 
-       /* try to allocate swap cache folio */
-       mpol = get_task_policy(current);
-       folio = __read_swap_cache_async(swpentry, GFP_KERNEL, mpol,
-                               NO_INTERLEAVE_INDEX, &folio_was_allocated, true);
-       if (!folio)
-               return -ENOMEM;
+       if (!mem_cgroup_zswap_writeback_enabled(memcg))
+               return -EINVAL;
 
        /*
-        * Found an existing folio, we raced with load/swapin. We generally
-        * writeback cold folios from zswap, and swapin means the folio just
-        * became hot. Skip this folio and let the caller find another one.
+        * Skip zombies because their LRUs are reparented and we would be
+        * reclaiming from the parent instead of the dead memcg.
         */
-       if (!folio_was_allocated) {
-               folio_put(folio);
-               return -EEXIST;
-       }
+       if (memcg && !mem_cgroup_online(memcg))
+               return -ENOENT;
 
-       /*
-        * folio is locked, and the swapcache is now secured against
-        * concurrent swapping to and from the slot. Verify that the
-        * swap entry hasn't been invalidated and recycled behind our
-        * backs (our zswap_entry reference doesn't prevent that), to
-        * avoid overwriting a new swap folio with old compressed data.
-        */
-       spin_lock(&tree->lock);
-       if (zswap_rb_search(&tree->rbroot, swp_offset(entry->swpentry)) != entry) {
-               spin_unlock(&tree->lock);
-               delete_from_swap_cache(folio);
-               folio_unlock(folio);
-               folio_put(folio);
-               return -ENOMEM;
+       for_each_node_state(nid, N_NORMAL_MEMORY) {
+               unsigned long nr_to_walk = 1;
+
+               shrunk += list_lru_walk_one(&zswap_list_lru, nid, memcg,
+                                           &shrink_memcg_cb, NULL, &nr_to_walk);
        }
-       spin_unlock(&tree->lock);
+       return shrunk ? 0 : -EAGAIN;
+}
 
-       __zswap_load(entry, &folio->page);
+static void shrink_worker(struct work_struct *w)
+{
+       struct mem_cgroup *memcg;
+       int ret, failures = 0;
 
-       /* folio is up to date */
-       folio_mark_uptodate(folio);
+       /* global reclaim will select cgroup in a round-robin fashion. */
+       do {
+               spin_lock(&zswap_shrink_lock);
+               zswap_next_shrink = mem_cgroup_iter(NULL, zswap_next_shrink, NULL);
+               memcg = zswap_next_shrink;
 
-       /* move it to the tail of the inactive list after end_writeback */
-       folio_set_reclaim(folio);
+               /*
+                * We need to retry if we have gone through a full round trip, or if we
+                * got an offline memcg (or else we risk undoing the effect of the
+                * zswap memcg offlining cleanup callback). This is not catastrophic
+                * per se, but it will keep the now offlined memcg hostage for a while.
+                *
+                * Note that if we got an online memcg, we will keep the extra
+                * reference in case the original reference obtained by mem_cgroup_iter
+                * is dropped by the zswap memcg offlining callback, ensuring that the
+                * memcg is not killed when we are reclaiming.
+                */
+               if (!memcg) {
+                       spin_unlock(&zswap_shrink_lock);
+                       if (++failures == MAX_RECLAIM_RETRIES)
+                               break;
 
-       /* start writeback */
-       __swap_writepage(folio, &wbc);
-       folio_put(folio);
+                       goto resched;
+               }
 
-       return 0;
+               if (!mem_cgroup_tryget_online(memcg)) {
+                       /* drop the reference from mem_cgroup_iter() */
+                       mem_cgroup_iter_break(NULL, memcg);
+                       zswap_next_shrink = NULL;
+                       spin_unlock(&zswap_shrink_lock);
+
+                       if (++failures == MAX_RECLAIM_RETRIES)
+                               break;
+
+                       goto resched;
+               }
+               spin_unlock(&zswap_shrink_lock);
+
+               ret = shrink_memcg(memcg);
+               /* drop the extra reference */
+               mem_cgroup_put(memcg);
+
+               if (ret == -EINVAL)
+                       break;
+               if (ret && ++failures == MAX_RECLAIM_RETRIES)
+                       break;
+
+resched:
+               cond_resched();
+       } while (!zswap_can_accept());
 }
 
 static int zswap_is_page_same_filled(void *ptr, unsigned long *value)
@@ -1493,23 +1473,11 @@ static void zswap_fill_page(void *ptr, unsigned long value)
 bool zswap_store(struct folio *folio)
 {
        swp_entry_t swp = folio->swap;
-       int type = swp_type(swp);
        pgoff_t offset = swp_offset(swp);
-       struct page *page = &folio->page;
-       struct zswap_tree *tree = zswap_trees[type];
+       struct zswap_tree *tree = swap_zswap_tree(swp);
        struct zswap_entry *entry, *dupentry;
-       struct scatterlist input, output;
-       struct crypto_acomp_ctx *acomp_ctx;
        struct obj_cgroup *objcg = NULL;
        struct mem_cgroup *memcg = NULL;
-       struct zswap_pool *pool;
-       struct zpool *zpool;
-       unsigned int dlen = PAGE_SIZE;
-       unsigned long handle, value;
-       char *buf;
-       u8 *src, *dst;
-       gfp_t gfp;
-       int ret;
 
        VM_WARN_ON_ONCE(!folio_test_locked(folio));
        VM_WARN_ON_ONCE(!folio_test_swapcache(folio));
@@ -1518,24 +1486,8 @@ bool zswap_store(struct folio *folio)
        if (folio_test_large(folio))
                return false;
 
-       if (!tree)
-               return false;
-
-       /*
-        * If this is a duplicate, it must be removed before attempting to store
-        * it, otherwise, if the store fails the old page won't be removed from
-        * the tree, and it might be written back overriding the new data.
-        */
-       spin_lock(&tree->lock);
-       dupentry = zswap_rb_search(&tree->rbroot, offset);
-       if (dupentry) {
-               zswap_duplicate_entry++;
-               zswap_invalidate_entry(tree, dupentry);
-       }
-       spin_unlock(&tree->lock);
-
        if (!zswap_enabled)
-               return false;
+               goto check_old;
 
        objcg = get_obj_cgroup_from_folio(folio);
        if (objcg && !obj_cgroup_may_zswap(objcg)) {
@@ -1562,17 +1514,19 @@ bool zswap_store(struct folio *folio)
        }
 
        /* allocate entry */
-       entry = zswap_entry_cache_alloc(GFP_KERNEL, page_to_nid(page));
+       entry = zswap_entry_cache_alloc(GFP_KERNEL, folio_nid(folio));
        if (!entry) {
                zswap_reject_kmemcache_fail++;
                goto reject;
        }
 
        if (zswap_same_filled_pages_enabled) {
-               src = kmap_local_page(page);
+               unsigned long value;
+               u8 *src;
+
+               src = kmap_local_folio(folio, 0);
                if (zswap_is_page_same_filled(src, &value)) {
                        kunmap_local(src);
-                       entry->swpentry = swp_entry(type, offset);
                        entry->length = 0;
                        entry->value = value;
                        atomic_inc(&zswap_same_filled_pages);
@@ -1591,74 +1545,18 @@ bool zswap_store(struct folio *folio)
 
        if (objcg) {
                memcg = get_mem_cgroup_from_objcg(objcg);
-               if (memcg_list_lru_alloc(memcg, &entry->pool->list_lru, GFP_KERNEL)) {
+               if (memcg_list_lru_alloc(memcg, &zswap_list_lru, GFP_KERNEL)) {
                        mem_cgroup_put(memcg);
                        goto put_pool;
                }
                mem_cgroup_put(memcg);
        }
 
-       /* compress */
-       acomp_ctx = raw_cpu_ptr(entry->pool->acomp_ctx);
-
-       mutex_lock(&acomp_ctx->mutex);
-
-       dst = acomp_ctx->buffer;
-       sg_init_table(&input, 1);
-       sg_set_page(&input, &folio->page, PAGE_SIZE, 0);
-
-       /*
-        * We need PAGE_SIZE * 2 here since there maybe over-compression case,
-        * and hardware-accelerators may won't check the dst buffer size, so
-        * giving the dst buffer with enough length to avoid buffer overflow.
-        */
-       sg_init_one(&output, dst, PAGE_SIZE * 2);
-       acomp_request_set_params(acomp_ctx->req, &input, &output, PAGE_SIZE, dlen);
-       /*
-        * it maybe looks a little bit silly that we send an asynchronous request,
-        * then wait for its completion synchronously. This makes the process look
-        * synchronous in fact.
-        * Theoretically, acomp supports users send multiple acomp requests in one
-        * acomp instance, then get those requests done simultaneously. but in this
-        * case, zswap actually does store and load page by page, there is no
-        * existing method to send the second page before the first page is done
-        * in one thread doing zwap.
-        * but in different threads running on different cpu, we have different
-        * acomp instance, so multiple threads can do (de)compression in parallel.
-        */
-       ret = crypto_wait_req(crypto_acomp_compress(acomp_ctx->req), &acomp_ctx->wait);
-       dlen = acomp_ctx->req->dlen;
-
-       if (ret) {
-               zswap_reject_compress_fail++;
-               goto put_dstmem;
-       }
-
-       /* store */
-       zpool = zswap_find_zpool(entry);
-       gfp = __GFP_NORETRY | __GFP_NOWARN | __GFP_KSWAPD_RECLAIM;
-       if (zpool_malloc_support_movable(zpool))
-               gfp |= __GFP_HIGHMEM | __GFP_MOVABLE;
-       ret = zpool_malloc(zpool, dlen, gfp, &handle);
-       if (ret == -ENOSPC) {
-               zswap_reject_compress_poor++;
-               goto put_dstmem;
-       }
-       if (ret) {
-               zswap_reject_alloc_fail++;
-               goto put_dstmem;
-       }
-       buf = zpool_map_handle(zpool, handle, ZPOOL_MM_WO);
-       memcpy(buf, dst, dlen);
-       zpool_unmap_handle(zpool, handle);
-       mutex_unlock(&acomp_ctx->mutex);
-
-       /* populate entry */
-       entry->swpentry = swp_entry(type, offset);
-       entry->handle = handle;
-       entry->length = dlen;
+       if (!zswap_compress(folio, entry))
+               goto put_pool;
 
 insert_entry:
+       entry->swpentry = swp;
        entry->objcg = objcg;
        if (objcg) {
                obj_cgroup_charge_zswap(objcg, entry->length);
@@ -1669,20 +1567,17 @@ insert_entry:
        /* map */
        spin_lock(&tree->lock);
        /*
-        * A duplicate entry should have been removed at the beginning of this
-        * function. Since the swap entry should be pinned, if a duplicate is
-        * found again here it means that something went wrong in the swap
-        * cache.
+        * The folio may have been dirtied again, invalidate the
+        * possibly stale entry before inserting the new entry.
         */
-       while (zswap_rb_insert(&tree->rbroot, entry, &dupentry) == -EEXIST) {
-               WARN_ON(1);
-               zswap_duplicate_entry++;
+       if (zswap_rb_insert(&tree->rbroot, entry, &dupentry) == -EEXIST) {
                zswap_invalidate_entry(tree, dupentry);
+               WARN_ON(zswap_rb_insert(&tree->rbroot, entry, &dupentry));
        }
        if (entry->length) {
                INIT_LIST_HEAD(&entry->lru);
-               zswap_lru_add(&entry->pool->list_lru, entry);
-               atomic_inc(&entry->pool->nr_stored);
+               zswap_lru_add(&zswap_list_lru, entry);
+               atomic_inc(&zswap_nr_stored);
        }
        spin_unlock(&tree->lock);
 
@@ -1693,8 +1588,6 @@ insert_entry:
 
        return true;
 
-put_dstmem:
-       mutex_unlock(&acomp_ctx->mutex);
 put_pool:
        zswap_pool_put(entry->pool);
 freepage:
@@ -1702,38 +1595,46 @@ freepage:
 reject:
        if (objcg)
                obj_cgroup_put(objcg);
+check_old:
+       /*
+        * If the zswap store fails or zswap is disabled, we must invalidate the
+        * possibly stale entry which was previously stored at this offset.
+        * Otherwise, writeback could overwrite the new data in the swapfile.
+        */
+       spin_lock(&tree->lock);
+       entry = zswap_rb_search(&tree->rbroot, offset);
+       if (entry)
+               zswap_invalidate_entry(tree, entry);
+       spin_unlock(&tree->lock);
        return false;
 
 shrink:
-       pool = zswap_pool_last_get();
-       if (pool && !queue_work(shrink_wq, &pool->shrink_work))
-               zswap_pool_put(pool);
+       queue_work(shrink_wq, &zswap_shrink_work);
        goto reject;
 }
 
 bool zswap_load(struct folio *folio)
 {
        swp_entry_t swp = folio->swap;
-       int type = swp_type(swp);
        pgoff_t offset = swp_offset(swp);
        struct page *page = &folio->page;
-       struct zswap_tree *tree = zswap_trees[type];
+       struct zswap_tree *tree = swap_zswap_tree(swp);
        struct zswap_entry *entry;
        u8 *dst;
 
        VM_WARN_ON_ONCE(!folio_test_locked(folio));
 
-       /* find */
        spin_lock(&tree->lock);
-       entry = zswap_entry_find_get(&tree->rbroot, offset);
+       entry = zswap_rb_search(&tree->rbroot, offset);
        if (!entry) {
                spin_unlock(&tree->lock);
                return false;
        }
+       zswap_rb_erase(&tree->rbroot, entry);
        spin_unlock(&tree->lock);
 
        if (entry->length)
-               __zswap_load(entry, page);
+               zswap_decompress(entry, page);
        else {
                dst = kmap_local_page(page);
                zswap_fill_page(dst, entry->value);
@@ -1744,67 +1645,63 @@ bool zswap_load(struct folio *folio)
        if (entry->objcg)
                count_objcg_event(entry->objcg, ZSWPIN);
 
-       spin_lock(&tree->lock);
-       if (zswap_exclusive_loads_enabled) {
-               zswap_invalidate_entry(tree, entry);
-               folio_mark_dirty(folio);
-       } else if (entry->length) {
-               zswap_lru_del(&entry->pool->list_lru, entry);
-               zswap_lru_add(&entry->pool->list_lru, entry);
-       }
-       zswap_entry_put(tree, entry);
-       spin_unlock(&tree->lock);
+       zswap_entry_free(entry);
+
+       folio_mark_dirty(folio);
 
        return true;
 }
 
-void zswap_invalidate(int type, pgoff_t offset)
+void zswap_invalidate(swp_entry_t swp)
 {
-       struct zswap_tree *tree = zswap_trees[type];
+       pgoff_t offset = swp_offset(swp);
+       struct zswap_tree *tree = swap_zswap_tree(swp);
        struct zswap_entry *entry;
 
-       /* find */
        spin_lock(&tree->lock);
        entry = zswap_rb_search(&tree->rbroot, offset);
-       if (!entry) {
-               /* entry was written back */
-               spin_unlock(&tree->lock);
-               return;
-       }
-       zswap_invalidate_entry(tree, entry);
+       if (entry)
+               zswap_invalidate_entry(tree, entry);
        spin_unlock(&tree->lock);
 }
 
-void zswap_swapon(int type)
+int zswap_swapon(int type, unsigned long nr_pages)
 {
-       struct zswap_tree *tree;
+       struct zswap_tree *trees, *tree;
+       unsigned int nr, i;
 
-       tree = kzalloc(sizeof(*tree), GFP_KERNEL);
-       if (!tree) {
+       nr = DIV_ROUND_UP(nr_pages, SWAP_ADDRESS_SPACE_PAGES);
+       trees = kvcalloc(nr, sizeof(*tree), GFP_KERNEL);
+       if (!trees) {
                pr_err("alloc failed, zswap disabled for swap type %d\n", type);
-               return;
+               return -ENOMEM;
+       }
+
+       for (i = 0; i < nr; i++) {
+               tree = trees + i;
+               tree->rbroot = RB_ROOT;
+               spin_lock_init(&tree->lock);
        }
 
-       tree->rbroot = RB_ROOT;
-       spin_lock_init(&tree->lock);
-       zswap_trees[type] = tree;
+       nr_zswap_trees[type] = nr;
+       zswap_trees[type] = trees;
+       return 0;
 }
 
 void zswap_swapoff(int type)
 {
-       struct zswap_tree *tree = zswap_trees[type];
-       struct zswap_entry *entry, *n;
+       struct zswap_tree *trees = zswap_trees[type];
+       unsigned int i;
 
-       if (!tree)
+       if (!trees)
                return;
 
-       /* walk the tree and free everything */
-       spin_lock(&tree->lock);
-       rbtree_postorder_for_each_entry_safe(entry, n, &tree->rbroot, rbnode)
-               zswap_free_entry(entry);
-       tree->rbroot = RB_ROOT;
-       spin_unlock(&tree->lock);
-       kfree(tree);
+       /* try_to_unuse() invalidated all the entries already */
+       for (i = 0; i < nr_zswap_trees[type]; i++)
+               WARN_ON_ONCE(!RB_EMPTY_ROOT(&trees[i].rbroot));
+
+       kvfree(trees);
+       nr_zswap_trees[type] = 0;
        zswap_trees[type] = NULL;
 }
 
@@ -1837,8 +1734,6 @@ static int zswap_debugfs_init(void)
                           zswap_debugfs_root, &zswap_reject_compress_poor);
        debugfs_create_u64("written_back_pages", 0444,
                           zswap_debugfs_root, &zswap_written_back_pages);
-       debugfs_create_u64("duplicate_entry", 0444,
-                          zswap_debugfs_root, &zswap_duplicate_entry);
        debugfs_create_u64("pool_total_size", 0444,
                           zswap_debugfs_root, &zswap_pool_total_size);
        debugfs_create_atomic_t("stored_pages", 0444,
@@ -1876,6 +1771,20 @@ static int zswap_setup(void)
        if (ret)
                goto hp_fail;
 
+       shrink_wq = alloc_workqueue("zswap-shrink",
+                       WQ_UNBOUND|WQ_MEM_RECLAIM, 1);
+       if (!shrink_wq)
+               goto shrink_wq_fail;
+
+       zswap_shrinker = zswap_alloc_shrinker();
+       if (!zswap_shrinker)
+               goto shrinker_fail;
+       if (list_lru_init_memcg(&zswap_list_lru, zswap_shrinker))
+               goto lru_fail;
+       shrinker_register(zswap_shrinker);
+
+       INIT_WORK(&zswap_shrink_work, shrink_worker);
+
        pool = __zswap_pool_create_fallback();
        if (pool) {
                pr_info("loaded using pool %s/%s\n", pool->tfm_name,
@@ -1887,18 +1796,17 @@ static int zswap_setup(void)
                zswap_enabled = false;
        }
 
-       shrink_wq = create_workqueue("zswap-shrink");
-       if (!shrink_wq)
-               goto fallback_fail;
-
        if (zswap_debugfs_init())
                pr_warn("debugfs initialization failed\n");
        zswap_init_state = ZSWAP_INIT_SUCCEED;
        return 0;
 
-fallback_fail:
-       if (pool)
-               zswap_pool_destroy(pool);
+lru_fail:
+       shrinker_free(zswap_shrinker);
+shrinker_fail:
+       destroy_workqueue(shrink_wq);
+shrink_wq_fail:
+       cpuhp_remove_multi_state(CPUHP_MM_ZSWP_POOL_PREPARE);
 hp_fail:
        kmem_cache_destroy(zswap_entry_cache);
 cache_fail:
index 2d7b7324295885e7a5ee70dd63b5dffd9a9a8968..9a1cb5079a7a0758b241f8425ea4153045e72d07 100644 (file)
@@ -5053,7 +5053,7 @@ void br_multicast_uninit_stats(struct net_bridge *br)
        free_percpu(br->mcast_stats);
 }
 
-/* noinline for https://bugs.llvm.org/show_bug.cgi?id=45802#c9 */
+/* noinline for https://llvm.org/pr45802#c9 */
 static noinline_for_stack void mcast_stats_add_dir(u64 *dst, u64 *src)
 {
        dst[BR_MCAST_DIR_RX] += src[BR_MCAST_DIR_RX];
index cbb2b4bb0dfac5656bcb81474fc470633f36122a..3757fd93523f0f3ecad7ba187c240c7a36606d93 100644 (file)
@@ -217,5 +217,5 @@ module_init(gre_init);
 module_exit(gre_exit);
 
 MODULE_DESCRIPTION("GRE over IPv4 demultiplexer driver");
-MODULE_AUTHOR("D. Kozlov (xeb@mail.ru)");
+MODULE_AUTHOR("D. Kozlov <xeb@mail.ru>");
 MODULE_LICENSE("GPL");
index 5e97e0aa8e07d0bb12615734b78d7ecb8700b5df..ca7e77e842835a6d153891fdca7dc8f196e0a2ba 100644 (file)
@@ -2405,7 +2405,7 @@ static void __exit ip6gre_fini(void)
 module_init(ip6gre_init);
 module_exit(ip6gre_fini);
 MODULE_LICENSE("GPL");
-MODULE_AUTHOR("D. Kozlov (xeb@mail.ru)");
+MODULE_AUTHOR("D. Kozlov <xeb@mail.ru>");
 MODULE_DESCRIPTION("GRE over IPv6 tunneling device");
 MODULE_ALIAS_RTNL_LINK("ip6gre");
 MODULE_ALIAS_RTNL_LINK("ip6gretap");
index 5b56ae6612dd11f218241a9735e4435fd517b0bf..4d8ee3607b53df3efd1bf69452682a466b7798c4 100644 (file)
@@ -1904,6 +1904,6 @@ static void __exit iucv_exit(void)
 subsys_initcall(iucv_init);
 module_exit(iucv_exit);
 
-MODULE_AUTHOR("(C) 2001 IBM Corp. by Fritz Elfert (felfert@millenux.com)");
+MODULE_AUTHOR("(C) 2001 IBM Corp. by Fritz Elfert <felfert@millenux.com>");
 MODULE_DESCRIPTION("Linux for S/390 IUCV lowlevel driver");
 MODULE_LICENSE("GPL");
index 45d1e6a157fc7d14352f06dd23d677f50b0441b1..34ab659f541e0187f5372b5938d182879d7b8657 100644 (file)
@@ -109,5 +109,5 @@ module_init(mpls_gso_init);
 module_exit(mpls_gso_exit);
 
 MODULE_DESCRIPTION("MPLS GSO support");
-MODULE_AUTHOR("Simon Horman (horms@verge.net.au)");
+MODULE_AUTHOR("Simon Horman <horms@verge.net.au>");
 MODULE_LICENSE("GPL");
index d435bffc6199978500dc9106ee63cad8a158f87e..97ff11973c49377cbff0ad36f4b266cc54b1c045 100644 (file)
@@ -284,10 +284,10 @@ char *rpc_sockaddr2uaddr(const struct sockaddr *sap, gfp_t gfp_flags)
        }
 
        if (snprintf(portbuf, sizeof(portbuf),
-                    ".%u.%u", port >> 8, port & 0xff) > (int)sizeof(portbuf))
+                    ".%u.%u", port >> 8, port & 0xff) >= (int)sizeof(portbuf))
                return NULL;
 
-       if (strlcat(addrbuf, portbuf, sizeof(addrbuf)) > sizeof(addrbuf))
+       if (strlcat(addrbuf, portbuf, sizeof(addrbuf)) >= sizeof(addrbuf))
                return NULL;
 
        return kstrdup(addrbuf, gfp_flags);
index cda0935a68c9db9462a663de3baad3ed786c0385..28f3749f6dc6cb7393d61c8fc07d9980a374f28b 100644 (file)
@@ -405,7 +405,7 @@ static struct rpc_clnt * rpc_new_client(const struct rpc_create_args *args,
        clnt->cl_maxproc  = version->nrprocs;
        clnt->cl_prog     = args->prognumber ? : program->number;
        clnt->cl_vers     = version->number;
-       clnt->cl_stats    = program->stats;
+       clnt->cl_stats    = args->stats ? : program->stats;
        clnt->cl_metrics  = rpc_alloc_iostats(clnt);
        rpc_init_pipe_dir_head(&clnt->cl_pipedir_objects);
        err = -ENOMEM;
@@ -691,6 +691,7 @@ struct rpc_clnt *rpc_clone_client(struct rpc_clnt *clnt)
                .version        = clnt->cl_vers,
                .authflavor     = clnt->cl_auth->au_flavor,
                .cred           = clnt->cl_cred,
+               .stats          = clnt->cl_stats,
        };
        return __rpc_clone_client(&args, clnt);
 }
@@ -713,6 +714,7 @@ rpc_clone_client_set_auth(struct rpc_clnt *clnt, rpc_authflavor_t flavor)
                .version        = clnt->cl_vers,
                .authflavor     = flavor,
                .cred           = clnt->cl_cred,
+               .stats          = clnt->cl_stats,
        };
        return __rpc_clone_client(&args, clnt);
 }
@@ -1068,6 +1070,7 @@ struct rpc_clnt *rpc_bind_new_program(struct rpc_clnt *old,
                .version        = vers,
                .authflavor     = old->cl_auth->au_flavor,
                .cred           = old->cl_cred,
+               .stats          = old->cl_stats,
        };
        struct rpc_clnt *clnt;
        int err;
index af13fdfa6672146daf1b572aa29a8d4bd3731b1d..09f245cda5262a572c450237419c80b183a83568 100644 (file)
@@ -1398,6 +1398,12 @@ xprt_request_dequeue_transmit_locked(struct rpc_task *task)
        if (!test_and_clear_bit(RPC_TASK_NEED_XMIT, &task->tk_runstate))
                return;
        if (!list_empty(&req->rq_xmit)) {
+               struct rpc_xprt *xprt = req->rq_xprt;
+
+               if (list_is_first(&req->rq_xmit, &xprt->xmit_queue) &&
+                   xprt->ops->abort_send_request)
+                       xprt->ops->abort_send_request(req);
+
                list_del(&req->rq_xmit);
                if (!list_empty(&req->rq_xmit2)) {
                        struct rpc_rqst *next = list_first_entry(&req->rq_xmit2,
@@ -1541,6 +1547,9 @@ xprt_request_transmit(struct rpc_rqst *req, struct rpc_task *snd_task)
        int is_retrans = RPC_WAS_SENT(task);
        int status;
 
+       if (test_bit(XPRT_CLOSE_WAIT, &xprt->state))
+               return -ENOTCONN;
+
        if (!req->rq_bytes_sent) {
                if (xprt_request_data_received(task)) {
                        status = 0;
index d92c13e78a56cf584772fb68783866be75b9bf79..bb9b747d58a1afac3619b80cff3e8196e36d96c3 100644 (file)
@@ -62,6 +62,7 @@
 #include "sunrpc.h"
 
 static void xs_close(struct rpc_xprt *xprt);
+static void xs_reset_srcport(struct sock_xprt *transport);
 static void xs_set_srcport(struct sock_xprt *transport, struct socket *sock);
 static void xs_tcp_set_socket_timeouts(struct rpc_xprt *xprt,
                struct socket *sock);
@@ -883,6 +884,17 @@ static int xs_stream_prepare_request(struct rpc_rqst *req, struct xdr_buf *buf)
        return xdr_alloc_bvec(buf, rpc_task_gfp_mask());
 }
 
+static void xs_stream_abort_send_request(struct rpc_rqst *req)
+{
+       struct rpc_xprt *xprt = req->rq_xprt;
+       struct sock_xprt *transport =
+               container_of(xprt, struct sock_xprt, xprt);
+
+       if (transport->xmit.offset != 0 &&
+           !test_bit(XPRT_CLOSE_WAIT, &xprt->state))
+               xprt_force_disconnect(xprt);
+}
+
 /*
  * Determine if the previous message in the stream was aborted before it
  * could complete transmission.
@@ -1565,8 +1577,10 @@ static void xs_tcp_state_change(struct sock *sk)
                break;
        case TCP_CLOSE:
                if (test_and_clear_bit(XPRT_SOCK_CONNECTING,
-                                       &transport->sock_state))
+                                      &transport->sock_state)) {
+                       xs_reset_srcport(transport);
                        xprt_clear_connecting(xprt);
+               }
                clear_bit(XPRT_CLOSING, &xprt->state);
                /* Trigger the socket release */
                xs_run_error_worker(transport, XPRT_SOCK_WAKE_DISCONNECT);
@@ -1722,6 +1736,11 @@ static void xs_set_port(struct rpc_xprt *xprt, unsigned short port)
        xs_update_peer_port(xprt);
 }
 
+static void xs_reset_srcport(struct sock_xprt *transport)
+{
+       transport->srcport = 0;
+}
+
 static void xs_set_srcport(struct sock_xprt *transport, struct socket *sock)
 {
        if (transport->srcport == 0 && transport->xprt.reuseport)
@@ -3012,6 +3031,7 @@ static const struct rpc_xprt_ops xs_local_ops = {
        .buf_free               = rpc_free,
        .prepare_request        = xs_stream_prepare_request,
        .send_request           = xs_local_send_request,
+       .abort_send_request     = xs_stream_abort_send_request,
        .wait_for_reply_request = xprt_wait_for_reply_request_def,
        .close                  = xs_close,
        .destroy                = xs_destroy,
@@ -3059,6 +3079,7 @@ static const struct rpc_xprt_ops xs_tcp_ops = {
        .buf_free               = rpc_free,
        .prepare_request        = xs_stream_prepare_request,
        .send_request           = xs_tcp_send_request,
+       .abort_send_request     = xs_stream_abort_send_request,
        .wait_for_reply_request = xprt_wait_for_reply_request_def,
        .close                  = xs_tcp_shutdown,
        .destroy                = xs_destroy,
index 93405264ff2337870e8c0693a0759540424a2f81..9062598ea03de322c19b9358fc1bdc44da6d53e7 100644 (file)
@@ -133,7 +133,9 @@ static struct mdev_type *mbochs_mdev_types[] = {
 };
 
 static dev_t           mbochs_devt;
-static struct class    *mbochs_class;
+static const struct class mbochs_class = {
+       .name = MBOCHS_CLASS_NAME,
+};
 static struct cdev     mbochs_cdev;
 static struct device   mbochs_dev;
 static struct mdev_parent mbochs_parent;
@@ -1422,13 +1424,10 @@ static int __init mbochs_dev_init(void)
        if (ret)
                goto err_cdev;
 
-       mbochs_class = class_create(MBOCHS_CLASS_NAME);
-       if (IS_ERR(mbochs_class)) {
-               pr_err("Error: failed to register mbochs_dev class\n");
-               ret = PTR_ERR(mbochs_class);
+       ret = class_register(&mbochs_class);
+       if (ret)
                goto err_driver;
-       }
-       mbochs_dev.class = mbochs_class;
+       mbochs_dev.class = &mbochs_class;
        mbochs_dev.release = mbochs_device_release;
        dev_set_name(&mbochs_dev, "%s", MBOCHS_NAME);
 
@@ -1448,7 +1447,7 @@ err_device:
        device_del(&mbochs_dev);
 err_put:
        put_device(&mbochs_dev);
-       class_destroy(mbochs_class);
+       class_unregister(&mbochs_class);
 err_driver:
        mdev_unregister_driver(&mbochs_driver);
 err_cdev:
@@ -1466,8 +1465,7 @@ static void __exit mbochs_dev_exit(void)
        mdev_unregister_driver(&mbochs_driver);
        cdev_del(&mbochs_cdev);
        unregister_chrdev_region(mbochs_devt, MINORMASK + 1);
-       class_destroy(mbochs_class);
-       mbochs_class = NULL;
+       class_unregister(&mbochs_class);
 }
 
 MODULE_IMPORT_NS(DMA_BUF);
index 72ea5832c927166111b96fd3e40dbd789b895afc..27795501de6ec5acddf4354d7b0fb170fe24ed6b 100644 (file)
@@ -84,7 +84,9 @@ static struct mdev_type *mdpy_mdev_types[] = {
 };
 
 static dev_t           mdpy_devt;
-static struct class    *mdpy_class;
+static const struct class mdpy_class = {
+       .name = MDPY_CLASS_NAME,
+};
 static struct cdev     mdpy_cdev;
 static struct device   mdpy_dev;
 static struct mdev_parent mdpy_parent;
@@ -709,13 +711,10 @@ static int __init mdpy_dev_init(void)
        if (ret)
                goto err_cdev;
 
-       mdpy_class = class_create(MDPY_CLASS_NAME);
-       if (IS_ERR(mdpy_class)) {
-               pr_err("Error: failed to register mdpy_dev class\n");
-               ret = PTR_ERR(mdpy_class);
+       ret = class_register(&mdpy_class);
+       if (ret)
                goto err_driver;
-       }
-       mdpy_dev.class = mdpy_class;
+       mdpy_dev.class = &mdpy_class;
        mdpy_dev.release = mdpy_device_release;
        dev_set_name(&mdpy_dev, "%s", MDPY_NAME);
 
@@ -735,7 +734,7 @@ err_device:
        device_del(&mdpy_dev);
 err_put:
        put_device(&mdpy_dev);
-       class_destroy(mdpy_class);
+       class_unregister(&mdpy_class);
 err_driver:
        mdev_unregister_driver(&mdpy_driver);
 err_cdev:
@@ -753,8 +752,7 @@ static void __exit mdpy_dev_exit(void)
        mdev_unregister_driver(&mdpy_driver);
        cdev_del(&mdpy_cdev);
        unregister_chrdev_region(mdpy_devt, MINORMASK + 1);
-       class_destroy(mdpy_class);
-       mdpy_class = NULL;
+       class_unregister(&mdpy_class);
 }
 
 module_param_named(count, mdpy_driver.max_instances, int, 0444);
index 4f163e0bf6a48719c522ee9d6cd63a5b96c5d1c6..20274c63e7451a8722137e7077f4456f293f4e54 100755 (executable)
@@ -8,7 +8,7 @@
 # Example invocation:
 #      scripts/check-sysctl-docs -vtable="kernel" \
 #              Documentation/admin-guide/sysctl/kernel.rst \
-#              $(git grep -l register_sysctl_)
+#              $(git grep -l register_sysctl)
 #
 # Specify -vdebug=1 to see debugging information
 
@@ -20,14 +20,10 @@ BEGIN {
 }
 
 # The following globals are used:
-# children: maps ctl_table names and procnames to child ctl_table names
 # documented: maps documented entries (each key is an entry)
 # entries: maps ctl_table names and procnames to counts (so
 #          enumerating the subkeys for a given ctl_table lists its
 #          procnames)
-# files: maps procnames to source file names
-# paths: maps ctl_path names to paths
-# curpath: the name of the current ctl_path struct
 # curtable: the name of the current ctl_table struct
 # curentry: the name of the current proc entry (procname when parsing
 #           a ctl_table, constructed path when parsing a ctl_path)
@@ -94,42 +90,23 @@ FNR == NR {
 
 # Stage 2: process each file and find all sysctl tables
 BEGINFILE {
-    delete children
     delete entries
-    delete paths
-    curpath = ""
     curtable = ""
     curentry = ""
+    delete vars
     if (debug) print "Processing file " FILENAME
 }
 
-/^static struct ctl_path/ {
-    match($0, /static struct ctl_path ([^][]+)/, tables)
-    curpath = tables[1]
-    if (debug) print "Processing path " curpath
-}
-
-/^static struct ctl_table/ {
-    match($0, /static struct ctl_table ([^][]+)/, tables)
-    curtable = tables[1]
+/^static( const)? struct ctl_table/ {
+    match($0, /static( const)? struct ctl_table ([^][]+)/, tables)
+    curtable = tables[2]
     if (debug) print "Processing table " curtable
 }
 
 /^};$/ {
-    curpath = ""
     curtable = ""
     curentry = ""
-}
-
-curpath && /\.procname[\t ]*=[\t ]*".+"/ {
-    match($0, /.procname[\t ]*=[\t ]*"([^"]+)"/, names)
-    if (curentry) {
-       curentry = curentry "/" names[1]
-    } else {
-       curentry = names[1]
-    }
-    if (debug) print "Setting path " curpath " to " curentry
-    paths[curpath] = curentry
+    delete vars
 }
 
 curtable && /\.procname[\t ]*=[\t ]*".+"/ {
@@ -140,10 +117,32 @@ curtable && /\.procname[\t ]*=[\t ]*".+"/ {
     file[curentry] = FILENAME
 }
 
-/\.child[\t ]*=/ {
-    child = trimpunct($NF)
-    if (debug) print "Linking child " child " to table " curtable " entry " curentry
-    children[curtable][curentry] = child
+/register_sysctl.*/ {
+    match($0, /register_sysctl(|_init|_sz)\("([^"]+)" *, *([^,)]+)/, tables)
+    if (debug) print "Registering table " tables[3] " at " tables[2]
+    if (tables[2] == table) {
+        for (entry in entries[tables[3]]) {
+            printentry(entry)
+        }
+    }
+}
+
+/kmemdup.*/ {
+    match($0, /([^ \t]+) *= *kmemdup\(([^,]+) *,/, names)
+    if (debug) print "Found variable " names[1] " for table " names[2]
+    if (names[2] in entries) {
+        vars[names[1]] = names[2]
+    }
+}
+
+/__register_sysctl_table.*/ {
+    match($0, /__register_sysctl_table\([^,]+, *"([^"]+)" *, *([^,]+)/, tables)
+    if (debug) print "Registering variable table " tables[2] " at " tables[1]
+    if (tables[1] == table && tables[2] in vars) {
+        for (entry in entries[vars[tables[2]]]) {
+            printentry(entry)
+        }
+    }
 }
 
 END {
index 188412aa275795321a9be77e33d94ee9130b60b6..fa96cfd16e998dc6fba20dd899b50f0ecd0685f4 100644 (file)
@@ -2,11 +2,13 @@ acpi_dock_ops
 address_space_operations
 backlight_ops
 block_device_operations
+bus_type
 clk_ops
 comedi_lrange
 component_ops
 dentry_operations
 dev_pm_ops
+device_type
 dma_map_ops
 driver_info
 drm_connector_funcs
index 10fadc2387194e1cb1fc19d29c92e8a8fa1f8c6d..fd6bd69c5096ac540efb93979d5eb03cb410deb5 100644 (file)
@@ -130,7 +130,11 @@ LX_CONFIG(CONFIG_X86_MCE_THRESHOLD)
 LX_CONFIG(CONFIG_X86_MCE_AMD)
 LX_CONFIG(CONFIG_X86_MCE)
 LX_CONFIG(CONFIG_X86_IO_APIC)
-LX_CONFIG(CONFIG_HAVE_KVM)
+/*
+ * CONFIG_KVM can be "m" but it affects common code too.  Use CONFIG_KVM_COMMON
+ * as a proxy for IS_ENABLED(CONFIG_KVM).
+ */
+LX_CONFIG_KVM = IS_BUILTIN(CONFIG_KVM_COMMON)
 LX_CONFIG(CONFIG_NUMA)
 LX_CONFIG(CONFIG_ARM64)
 LX_CONFIG(CONFIG_ARM64_4K_PAGES)
index ef478e273791f359edc722b28c4d35f55bc8e2ee..66ae5c7690cf1750eb0bbaf3a0942fafa3fe9868 100644 (file)
@@ -151,7 +151,7 @@ def x86_show_interupts(prec):
         if cnt is not None:
             text += "%*s: %10u\n" % (prec, "MIS", cnt['counter'])
 
-    if constants.LX_CONFIG_HAVE_KVM:
+    if constants.LX_CONFIG_KVM:
         text += x86_show_irqstat(prec, "PIN", 'kvm_posted_intr_ipis', 'Posted-interrupt notification event')
         text += x86_show_irqstat(prec, "NPI", 'kvm_posted_intr_nested_ipis', 'Nested posted-interrupt event')
         text += x86_show_irqstat(prec, "PIW", 'kvm_posted_intr_wakeup_ipis', 'Posted-interrupt wakeup event')
index d3c8a0274d1edaed141e674b4517a4fb79afe790..803f1737105289577717984789a13ed6d312d47d 100644 (file)
@@ -29,32 +29,34 @@ class LxVmallocInfo(gdb.Command):
         if not constants.LX_CONFIG_MMU:
             raise gdb.GdbError("Requires MMU support")
 
-        vmap_area_list = gdb.parse_and_eval('vmap_area_list')
-        for vmap_area in lists.list_for_each_entry(vmap_area_list, vmap_area_ptr_type, "list"):
-            if not vmap_area['vm']:
-                gdb.write("0x%x-0x%x %10d vm_map_ram\n" % (vmap_area['va_start'], vmap_area['va_end'],
-                    vmap_area['va_end'] - vmap_area['va_start']))
-                continue
-            v = vmap_area['vm']
-            gdb.write("0x%x-0x%x %10d" % (v['addr'], v['addr'] + v['size'], v['size']))
-            if v['caller']:
-                gdb.write(" %s" % str(v['caller']).split(' ')[-1])
-            if v['nr_pages']:
-                gdb.write(" pages=%d" % v['nr_pages'])
-            if v['phys_addr']:
-                gdb.write(" phys=0x%x" % v['phys_addr'])
-            if v['flags'] & constants.LX_VM_IOREMAP:
-                gdb.write(" ioremap")
-            if v['flags'] & constants.LX_VM_ALLOC:
-                gdb.write(" vmalloc")
-            if v['flags'] & constants.LX_VM_MAP:
-                gdb.write(" vmap")
-            if v['flags'] & constants.LX_VM_USERMAP:
-                gdb.write(" user")
-            if v['flags'] & constants.LX_VM_DMA_COHERENT:
-                gdb.write(" dma-coherent")
-            if is_vmalloc_addr(v['pages']):
-                gdb.write(" vpages")
-            gdb.write("\n")
+        nr_vmap_nodes = gdb.parse_and_eval('nr_vmap_nodes')
+        for i in range(0, nr_vmap_nodes):
+            vn = gdb.parse_and_eval('&vmap_nodes[%d]' % i)
+            for vmap_area in lists.list_for_each_entry(vn['busy']['head'], vmap_area_ptr_type, "list"):
+                if not vmap_area['vm']:
+                    gdb.write("0x%x-0x%x %10d vm_map_ram\n" % (vmap_area['va_start'], vmap_area['va_end'],
+                        vmap_area['va_end'] - vmap_area['va_start']))
+                    continue
+                v = vmap_area['vm']
+                gdb.write("0x%x-0x%x %10d" % (v['addr'], v['addr'] + v['size'], v['size']))
+                if v['caller']:
+                    gdb.write(" %s" % str(v['caller']).split(' ')[-1])
+                if v['nr_pages']:
+                    gdb.write(" pages=%d" % v['nr_pages'])
+                if v['phys_addr']:
+                    gdb.write(" phys=0x%x" % v['phys_addr'])
+                if v['flags'] & constants.LX_VM_IOREMAP:
+                    gdb.write(" ioremap")
+                if v['flags'] & constants.LX_VM_ALLOC:
+                    gdb.write(" vmalloc")
+                if v['flags'] & constants.LX_VM_MAP:
+                    gdb.write(" vmap")
+                if v['flags'] & constants.LX_VM_USERMAP:
+                    gdb.write(" user")
+                if v['flags'] & constants.LX_VM_DMA_COHERENT:
+                    gdb.write(" dma-coherent")
+                if is_vmalloc_addr(v['pages']):
+                    gdb.write(" vpages")
+                gdb.write("\n")
 
 LxVmallocInfo()
index e217683b10d6fabb06430917e4980664f752c35d..5927cc6b7de3383a4ee5b50b51a3da47c350fede 100755 (executable)
@@ -29,7 +29,7 @@ llvm)
        elif [ "$SRCARCH" = loongarch ]; then
                echo 18.0.0
        else
-               echo 11.0.0
+               echo 13.0.1
        fi
        ;;
 rustc)
index f84df9e383fd0acf75b9afb87422aff9c088e3a8..0871b2e92584b2a73f4cb89ae34ebc9c2b15ff17 100755 (executable)
@@ -352,7 +352,7 @@ if ($arch eq "x86_64") {
     $mcount_regex = "^\\s*([0-9a-fA-F]+):.*\\s_mcount\$";
 } elsif ($arch eq "riscv") {
     $function_regex = "^([0-9a-fA-F]+)\\s+<([^.0-9][0-9a-zA-Z_\\.]+)>:";
-    $mcount_regex = "^\\s*([0-9a-fA-F]+):\\sR_RISCV_CALL(_PLT)?\\s_?mcount\$";
+    $mcount_regex = "^\\s*([0-9a-fA-F]+):\\sR_RISCV_CALL(_PLT)?\\s_mcount\$";
     $type = ".quad";
     $alignment = 2;
 } elsif ($arch eq "csky") {
index 52c9af08ad35d3d31995337d4643b26c015f75ca..412e76f1575d0db570027bdfdcafedd0029a5c62 100644 (file)
@@ -142,8 +142,6 @@ config HARDENED_USERCOPY
 config FORTIFY_SOURCE
        bool "Harden common str/mem functions against buffer overflows"
        depends on ARCH_HAS_FORTIFY_SOURCE
-       # https://bugs.llvm.org/show_bug.cgi?id=41459
-       depends on !CC_IS_CLANG || CLANG_VERSION >= 120001
        # https://github.com/llvm/llvm-project/issues/53645
        depends on !CC_IS_CLANG || !X86_32
        help
index 3f49a9e28bfc5509742822fff70a2d2b3eb74ee7..b8ff5cccd0c811fd76050be0f9cf50efa899a3cc 100644 (file)
@@ -365,15 +365,13 @@ static int i2sbus_probe(struct macio_dev* dev, const struct of_device_id *match)
        return 0;
 }
 
-static int i2sbus_remove(struct macio_dev* dev)
+static void i2sbus_remove(struct macio_dev *dev)
 {
        struct i2sbus_control *control = dev_get_drvdata(&dev->ofdev.dev);
        struct i2sbus_dev *i2sdev, *tmp;
 
        list_for_each_entry_safe(i2sdev, tmp, &control->list, item)
                soundbus_remove_one(&i2sdev->sound);
-
-       return 0;
 }
 
 #ifdef CONFIG_PM
diff --git a/sound/core/.kunitconfig b/sound/core/.kunitconfig
new file mode 100644 (file)
index 0000000..440f974
--- /dev/null
@@ -0,0 +1,5 @@
+CONFIG_KUNIT=y
+CONFIG_SOUND=y
+CONFIG_SND=y
+CONFIG_SND_PCM=y
+CONFIG_SND_CORE_TEST=y
index 15b07d09c4b72064d1f4eaff7d41cd40c9a29d01..4d2ee99c12a3fb2a5af3dcebc4bc9a77efadcccb 100644 (file)
@@ -409,7 +409,7 @@ static void snd_timer_close_locked(struct snd_timer_instance *timeri,
        struct snd_timer *timer = timeri->timer;
 
        if (timer) {
-               guard(spinlock)(&timer->lock);
+               guard(spinlock_irq)(&timer->lock);
                timeri->flags |= SNDRV_TIMER_IFLG_DEAD;
        }
 
index b6cd13b1775d93f5704989cd9bf1c4f4c0cc1147..e904f62e195267faab2a1ed67dcab4fa75f2d8f8 100644 (file)
@@ -6891,6 +6891,13 @@ static void yoga7_14arb7_fixup_i2c(struct hda_codec *cdc,
        comp_generic_fixup(cdc, action, "i2c", "INT8866", "-%s:00", 1);
 }
 
+static void alc256_fixup_acer_sfg16_micmute_led(struct hda_codec *codec,
+       const struct hda_fixup *fix, int action)
+{
+       alc_fixup_hp_gpio_led(codec, action, 0, 0x04);
+}
+
+
 /* for alc295_fixup_hp_top_speakers */
 #include "hp_x360_helper.c"
 
@@ -7360,6 +7367,7 @@ enum {
        ALC287_FIXUP_LEGION_16ITHG6,
        ALC287_FIXUP_YOGA9_14IAP7_BASS_SPK,
        ALC287_FIXUP_YOGA9_14IAP7_BASS_SPK_PIN,
+       ALC287_FIXUP_YOGA9_14IMH9_BASS_SPK_PIN,
        ALC295_FIXUP_DELL_INSPIRON_TOP_SPEAKERS,
        ALC236_FIXUP_DELL_DUAL_CODECS,
        ALC287_FIXUP_CS35L41_I2C_2_THINKPAD_ACPI,
@@ -7373,6 +7381,7 @@ enum {
        ALC289_FIXUP_DELL_CS35L41_SPI_2,
        ALC294_FIXUP_CS35L41_I2C_2,
        ALC245_FIXUP_CS35L56_SPI_4_HP_GPIO_LED,
+       ALC256_FIXUP_ACER_SFG16_MICMUTE_LED,
 };
 
 /* A special fixup for Lenovo C940 and Yoga Duet 7;
@@ -9490,6 +9499,12 @@ static const struct hda_fixup alc269_fixups[] = {
                .chained = true,
                .chain_id = ALC287_FIXUP_YOGA9_14IAP7_BASS_SPK,
        },
+       [ALC287_FIXUP_YOGA9_14IMH9_BASS_SPK_PIN] = {
+               .type = HDA_FIXUP_FUNC,
+               .v.func = alc287_fixup_yoga9_14iap7_bass_spk_pin,
+               .chained = true,
+               .chain_id = ALC287_FIXUP_CS35L41_I2C_2,
+       },
        [ALC295_FIXUP_DELL_INSPIRON_TOP_SPEAKERS] = {
                .type = HDA_FIXUP_FUNC,
                .v.func = alc295_fixup_dell_inspiron_top_speakers,
@@ -9562,6 +9577,10 @@ static const struct hda_fixup alc269_fixups[] = {
                .chained = true,
                .chain_id = ALC285_FIXUP_HP_GPIO_LED,
        },
+       [ALC256_FIXUP_ACER_SFG16_MICMUTE_LED] = {
+               .type = HDA_FIXUP_FUNC,
+               .v.func = alc256_fixup_acer_sfg16_micmute_led,
+       },
 };
 
 static const struct snd_pci_quirk alc269_fixup_tbl[] = {
@@ -9605,6 +9624,7 @@ static const struct snd_pci_quirk alc269_fixup_tbl[] = {
        SND_PCI_QUIRK(0x1025, 0x1430, "Acer TravelMate B311R-31", ALC256_FIXUP_ACER_MIC_NO_PRESENCE),
        SND_PCI_QUIRK(0x1025, 0x1466, "Acer Aspire A515-56", ALC255_FIXUP_ACER_HEADPHONE_AND_MIC),
        SND_PCI_QUIRK(0x1025, 0x1534, "Acer Predator PH315-54", ALC255_FIXUP_ACER_MIC_NO_PRESENCE),
+       SND_PCI_QUIRK(0x1025, 0x169a, "Acer Swift SFG16", ALC256_FIXUP_ACER_SFG16_MICMUTE_LED),
        SND_PCI_QUIRK(0x1028, 0x0470, "Dell M101z", ALC269_FIXUP_DELL_M101Z),
        SND_PCI_QUIRK(0x1028, 0x053c, "Dell Latitude E5430", ALC292_FIXUP_DELL_E7X),
        SND_PCI_QUIRK(0x1028, 0x054b, "Dell XPS one 2710", ALC275_FIXUP_DELL_XPS),
@@ -10270,6 +10290,8 @@ static const struct snd_pci_quirk alc269_fixup_tbl[] = {
        SND_PCI_QUIRK(0x17aa, 0x38c3, "Y980 DUAL", ALC287_FIXUP_TAS2781_I2C),
        SND_PCI_QUIRK(0x17aa, 0x38cb, "Y790 YG DUAL", ALC287_FIXUP_TAS2781_I2C),
        SND_PCI_QUIRK(0x17aa, 0x38cd, "Y790 VECO DUAL", ALC287_FIXUP_TAS2781_I2C),
+       SND_PCI_QUIRK(0x17aa, 0x38d2, "Lenovo Yoga 9 14IMH9", ALC287_FIXUP_YOGA9_14IMH9_BASS_SPK_PIN),
+       SND_PCI_QUIRK(0x17aa, 0x38d7, "Lenovo Yoga 9 14IMH9", ALC287_FIXUP_YOGA9_14IMH9_BASS_SPK_PIN),
        SND_PCI_QUIRK(0x17aa, 0x3902, "Lenovo E50-80", ALC269_FIXUP_DMIC_THINKPAD_ACPI),
        SND_PCI_QUIRK(0x17aa, 0x3977, "IdeaPad S210", ALC283_FIXUP_INT_MIC),
        SND_PCI_QUIRK(0x17aa, 0x3978, "Lenovo B50-70", ALC269_FIXUP_DMIC_THINKPAD_ACPI),
index 81256ab56835ede6fafe4bc325526fa68943291e..409fc11646948e40ec6b03be6840c150c04db8b7 100644 (file)
@@ -1652,34 +1652,6 @@ static const struct usb_feature_control_info *get_feature_control_info(int contr
        return NULL;
 }
 
-static int feature_unit_mutevol_ctl_name(struct usb_mixer_interface *mixer,
-                                        struct snd_kcontrol *kctl,
-                                        struct usb_audio_term *iterm,
-                                        struct usb_audio_term *oterm)
-{
-       struct usb_audio_term *aterm, *bterm;
-       bool output_first;
-       int len = 0;
-
-       /*
-        * If the input terminal is USB Streaming, we try getting the name of
-        * the output terminal first in hopes of getting something more
-        * descriptive than "PCM".
-        */
-       output_first = iterm && !(iterm->type >> 16) && (iterm->type & 0xff00) == 0x0100;
-
-       aterm = output_first ? oterm : iterm;
-       bterm = output_first ? iterm : oterm;
-
-       if (aterm)
-               len = get_term_name(mixer->chip, aterm, kctl->id.name,
-                                   sizeof(kctl->id.name), 1);
-       if (!len && bterm)
-               len = get_term_name(mixer->chip, bterm, kctl->id.name,
-                                   sizeof(kctl->id.name), 1);
-       return len;
-}
-
 static void __build_feature_ctl(struct usb_mixer_interface *mixer,
                                const struct usbmix_name_map *imap,
                                unsigned int ctl_mask, int control,
@@ -1761,15 +1733,22 @@ static void __build_feature_ctl(struct usb_mixer_interface *mixer,
        case UAC_FU_MUTE:
        case UAC_FU_VOLUME:
                /*
-                * Determine the control name:
-                * - If a name id is given in descriptor, use it.
-                * - If input and output terminals are present, try to derive
-                *   the name from either of these.
-                * - Otherwise, make up a name using the feature unit ID.
+                * determine the control name.  the rule is:
+                * - if a name id is given in descriptor, use it.
+                * - if the connected input can be determined, then use the name
+                *   of terminal type.
+                * - if the connected output can be determined, use it.
+                * - otherwise, anonymous name.
                 */
                if (!len) {
-                       len = feature_unit_mutevol_ctl_name(mixer, kctl, iterm,
-                                                           oterm);
+                       if (iterm)
+                               len = get_term_name(mixer->chip, iterm,
+                                                   kctl->id.name,
+                                                   sizeof(kctl->id.name), 1);
+                       if (!len && oterm)
+                               len = get_term_name(mixer->chip, oterm,
+                                                   kctl->id.name,
+                                                   sizeof(kctl->id.name), 1);
                        if (!len)
                                snprintf(kctl->id.name, sizeof(kctl->id.name),
                                         "Feature %d", unitid);
diff --git a/tools/arch/riscv/include/asm/csr.h b/tools/arch/riscv/include/asm/csr.h
new file mode 100644 (file)
index 0000000..0dfc092
--- /dev/null
@@ -0,0 +1,541 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
+/*
+ * Copyright (C) 2015 Regents of the University of California
+ */
+
+#ifndef _ASM_RISCV_CSR_H
+#define _ASM_RISCV_CSR_H
+
+#include <linux/bits.h>
+
+/* Status register flags */
+#define SR_SIE         _AC(0x00000002, UL) /* Supervisor Interrupt Enable */
+#define SR_MIE         _AC(0x00000008, UL) /* Machine Interrupt Enable */
+#define SR_SPIE                _AC(0x00000020, UL) /* Previous Supervisor IE */
+#define SR_MPIE                _AC(0x00000080, UL) /* Previous Machine IE */
+#define SR_SPP         _AC(0x00000100, UL) /* Previously Supervisor */
+#define SR_MPP         _AC(0x00001800, UL) /* Previously Machine */
+#define SR_SUM         _AC(0x00040000, UL) /* Supervisor User Memory Access */
+
+#define SR_FS          _AC(0x00006000, UL) /* Floating-point Status */
+#define SR_FS_OFF      _AC(0x00000000, UL)
+#define SR_FS_INITIAL  _AC(0x00002000, UL)
+#define SR_FS_CLEAN    _AC(0x00004000, UL)
+#define SR_FS_DIRTY    _AC(0x00006000, UL)
+
+#define SR_VS          _AC(0x00000600, UL) /* Vector Status */
+#define SR_VS_OFF      _AC(0x00000000, UL)
+#define SR_VS_INITIAL  _AC(0x00000200, UL)
+#define SR_VS_CLEAN    _AC(0x00000400, UL)
+#define SR_VS_DIRTY    _AC(0x00000600, UL)
+
+#define SR_XS          _AC(0x00018000, UL) /* Extension Status */
+#define SR_XS_OFF      _AC(0x00000000, UL)
+#define SR_XS_INITIAL  _AC(0x00008000, UL)
+#define SR_XS_CLEAN    _AC(0x00010000, UL)
+#define SR_XS_DIRTY    _AC(0x00018000, UL)
+
+#define SR_FS_VS       (SR_FS | SR_VS) /* Vector and Floating-Point Unit */
+
+#ifndef CONFIG_64BIT
+#define SR_SD          _AC(0x80000000, UL) /* FS/VS/XS dirty */
+#else
+#define SR_SD          _AC(0x8000000000000000, UL) /* FS/VS/XS dirty */
+#endif
+
+#ifdef CONFIG_64BIT
+#define SR_UXL         _AC(0x300000000, UL) /* XLEN mask for U-mode */
+#define SR_UXL_32      _AC(0x100000000, UL) /* XLEN = 32 for U-mode */
+#define SR_UXL_64      _AC(0x200000000, UL) /* XLEN = 64 for U-mode */
+#endif
+
+/* SATP flags */
+#ifndef CONFIG_64BIT
+#define SATP_PPN       _AC(0x003FFFFF, UL)
+#define SATP_MODE_32   _AC(0x80000000, UL)
+#define SATP_MODE_SHIFT        31
+#define SATP_ASID_BITS 9
+#define SATP_ASID_SHIFT        22
+#define SATP_ASID_MASK _AC(0x1FF, UL)
+#else
+#define SATP_PPN       _AC(0x00000FFFFFFFFFFF, UL)
+#define SATP_MODE_39   _AC(0x8000000000000000, UL)
+#define SATP_MODE_48   _AC(0x9000000000000000, UL)
+#define SATP_MODE_57   _AC(0xa000000000000000, UL)
+#define SATP_MODE_SHIFT        60
+#define SATP_ASID_BITS 16
+#define SATP_ASID_SHIFT        44
+#define SATP_ASID_MASK _AC(0xFFFF, UL)
+#endif
+
+/* Exception cause high bit - is an interrupt if set */
+#define CAUSE_IRQ_FLAG         (_AC(1, UL) << (__riscv_xlen - 1))
+
+/* Interrupt causes (minus the high bit) */
+#define IRQ_S_SOFT             1
+#define IRQ_VS_SOFT            2
+#define IRQ_M_SOFT             3
+#define IRQ_S_TIMER            5
+#define IRQ_VS_TIMER           6
+#define IRQ_M_TIMER            7
+#define IRQ_S_EXT              9
+#define IRQ_VS_EXT             10
+#define IRQ_M_EXT              11
+#define IRQ_S_GEXT             12
+#define IRQ_PMU_OVF            13
+#define IRQ_LOCAL_MAX          (IRQ_PMU_OVF + 1)
+#define IRQ_LOCAL_MASK         GENMASK((IRQ_LOCAL_MAX - 1), 0)
+
+/* Exception causes */
+#define EXC_INST_MISALIGNED    0
+#define EXC_INST_ACCESS                1
+#define EXC_INST_ILLEGAL       2
+#define EXC_BREAKPOINT         3
+#define EXC_LOAD_MISALIGNED    4
+#define EXC_LOAD_ACCESS                5
+#define EXC_STORE_MISALIGNED   6
+#define EXC_STORE_ACCESS       7
+#define EXC_SYSCALL            8
+#define EXC_HYPERVISOR_SYSCALL 9
+#define EXC_SUPERVISOR_SYSCALL 10
+#define EXC_INST_PAGE_FAULT    12
+#define EXC_LOAD_PAGE_FAULT    13
+#define EXC_STORE_PAGE_FAULT   15
+#define EXC_INST_GUEST_PAGE_FAULT      20
+#define EXC_LOAD_GUEST_PAGE_FAULT      21
+#define EXC_VIRTUAL_INST_FAULT         22
+#define EXC_STORE_GUEST_PAGE_FAULT     23
+
+/* PMP configuration */
+#define PMP_R                  0x01
+#define PMP_W                  0x02
+#define PMP_X                  0x04
+#define PMP_A                  0x18
+#define PMP_A_TOR              0x08
+#define PMP_A_NA4              0x10
+#define PMP_A_NAPOT            0x18
+#define PMP_L                  0x80
+
+/* HSTATUS flags */
+#ifdef CONFIG_64BIT
+#define HSTATUS_VSXL           _AC(0x300000000, UL)
+#define HSTATUS_VSXL_SHIFT     32
+#endif
+#define HSTATUS_VTSR           _AC(0x00400000, UL)
+#define HSTATUS_VTW            _AC(0x00200000, UL)
+#define HSTATUS_VTVM           _AC(0x00100000, UL)
+#define HSTATUS_VGEIN          _AC(0x0003f000, UL)
+#define HSTATUS_VGEIN_SHIFT    12
+#define HSTATUS_HU             _AC(0x00000200, UL)
+#define HSTATUS_SPVP           _AC(0x00000100, UL)
+#define HSTATUS_SPV            _AC(0x00000080, UL)
+#define HSTATUS_GVA            _AC(0x00000040, UL)
+#define HSTATUS_VSBE           _AC(0x00000020, UL)
+
+/* HGATP flags */
+#define HGATP_MODE_OFF         _AC(0, UL)
+#define HGATP_MODE_SV32X4      _AC(1, UL)
+#define HGATP_MODE_SV39X4      _AC(8, UL)
+#define HGATP_MODE_SV48X4      _AC(9, UL)
+#define HGATP_MODE_SV57X4      _AC(10, UL)
+
+#define HGATP32_MODE_SHIFT     31
+#define HGATP32_VMID_SHIFT     22
+#define HGATP32_VMID           GENMASK(28, 22)
+#define HGATP32_PPN            GENMASK(21, 0)
+
+#define HGATP64_MODE_SHIFT     60
+#define HGATP64_VMID_SHIFT     44
+#define HGATP64_VMID           GENMASK(57, 44)
+#define HGATP64_PPN            GENMASK(43, 0)
+
+#define HGATP_PAGE_SHIFT       12
+
+#ifdef CONFIG_64BIT
+#define HGATP_PPN              HGATP64_PPN
+#define HGATP_VMID_SHIFT       HGATP64_VMID_SHIFT
+#define HGATP_VMID             HGATP64_VMID
+#define HGATP_MODE_SHIFT       HGATP64_MODE_SHIFT
+#else
+#define HGATP_PPN              HGATP32_PPN
+#define HGATP_VMID_SHIFT       HGATP32_VMID_SHIFT
+#define HGATP_VMID             HGATP32_VMID
+#define HGATP_MODE_SHIFT       HGATP32_MODE_SHIFT
+#endif
+
+/* VSIP & HVIP relation */
+#define VSIP_TO_HVIP_SHIFT     (IRQ_VS_SOFT - IRQ_S_SOFT)
+#define VSIP_VALID_MASK                ((_AC(1, UL) << IRQ_S_SOFT) | \
+                                (_AC(1, UL) << IRQ_S_TIMER) | \
+                                (_AC(1, UL) << IRQ_S_EXT))
+
+/* AIA CSR bits */
+#define TOPI_IID_SHIFT         16
+#define TOPI_IID_MASK          GENMASK(11, 0)
+#define TOPI_IPRIO_MASK                GENMASK(7, 0)
+#define TOPI_IPRIO_BITS                8
+
+#define TOPEI_ID_SHIFT         16
+#define TOPEI_ID_MASK          GENMASK(10, 0)
+#define TOPEI_PRIO_MASK                GENMASK(10, 0)
+
+#define ISELECT_IPRIO0         0x30
+#define ISELECT_IPRIO15                0x3f
+#define ISELECT_MASK           GENMASK(8, 0)
+
+#define HVICTL_VTI             BIT(30)
+#define HVICTL_IID             GENMASK(27, 16)
+#define HVICTL_IID_SHIFT       16
+#define HVICTL_DPR             BIT(9)
+#define HVICTL_IPRIOM          BIT(8)
+#define HVICTL_IPRIO           GENMASK(7, 0)
+
+/* xENVCFG flags */
+#define ENVCFG_STCE                    (_AC(1, ULL) << 63)
+#define ENVCFG_PBMTE                   (_AC(1, ULL) << 62)
+#define ENVCFG_CBZE                    (_AC(1, UL) << 7)
+#define ENVCFG_CBCFE                   (_AC(1, UL) << 6)
+#define ENVCFG_CBIE_SHIFT              4
+#define ENVCFG_CBIE                    (_AC(0x3, UL) << ENVCFG_CBIE_SHIFT)
+#define ENVCFG_CBIE_ILL                        _AC(0x0, UL)
+#define ENVCFG_CBIE_FLUSH              _AC(0x1, UL)
+#define ENVCFG_CBIE_INV                        _AC(0x3, UL)
+#define ENVCFG_FIOM                    _AC(0x1, UL)
+
+/* Smstateen bits */
+#define SMSTATEEN0_AIA_IMSIC_SHIFT     58
+#define SMSTATEEN0_AIA_IMSIC           (_ULL(1) << SMSTATEEN0_AIA_IMSIC_SHIFT)
+#define SMSTATEEN0_AIA_SHIFT           59
+#define SMSTATEEN0_AIA                 (_ULL(1) << SMSTATEEN0_AIA_SHIFT)
+#define SMSTATEEN0_AIA_ISEL_SHIFT      60
+#define SMSTATEEN0_AIA_ISEL            (_ULL(1) << SMSTATEEN0_AIA_ISEL_SHIFT)
+#define SMSTATEEN0_HSENVCFG_SHIFT      62
+#define SMSTATEEN0_HSENVCFG            (_ULL(1) << SMSTATEEN0_HSENVCFG_SHIFT)
+#define SMSTATEEN0_SSTATEEN0_SHIFT     63
+#define SMSTATEEN0_SSTATEEN0           (_ULL(1) << SMSTATEEN0_SSTATEEN0_SHIFT)
+
+/* symbolic CSR names: */
+#define CSR_CYCLE              0xc00
+#define CSR_TIME               0xc01
+#define CSR_INSTRET            0xc02
+#define CSR_HPMCOUNTER3                0xc03
+#define CSR_HPMCOUNTER4                0xc04
+#define CSR_HPMCOUNTER5                0xc05
+#define CSR_HPMCOUNTER6                0xc06
+#define CSR_HPMCOUNTER7                0xc07
+#define CSR_HPMCOUNTER8                0xc08
+#define CSR_HPMCOUNTER9                0xc09
+#define CSR_HPMCOUNTER10       0xc0a
+#define CSR_HPMCOUNTER11       0xc0b
+#define CSR_HPMCOUNTER12       0xc0c
+#define CSR_HPMCOUNTER13       0xc0d
+#define CSR_HPMCOUNTER14       0xc0e
+#define CSR_HPMCOUNTER15       0xc0f
+#define CSR_HPMCOUNTER16       0xc10
+#define CSR_HPMCOUNTER17       0xc11
+#define CSR_HPMCOUNTER18       0xc12
+#define CSR_HPMCOUNTER19       0xc13
+#define CSR_HPMCOUNTER20       0xc14
+#define CSR_HPMCOUNTER21       0xc15
+#define CSR_HPMCOUNTER22       0xc16
+#define CSR_HPMCOUNTER23       0xc17
+#define CSR_HPMCOUNTER24       0xc18
+#define CSR_HPMCOUNTER25       0xc19
+#define CSR_HPMCOUNTER26       0xc1a
+#define CSR_HPMCOUNTER27       0xc1b
+#define CSR_HPMCOUNTER28       0xc1c
+#define CSR_HPMCOUNTER29       0xc1d
+#define CSR_HPMCOUNTER30       0xc1e
+#define CSR_HPMCOUNTER31       0xc1f
+#define CSR_CYCLEH             0xc80
+#define CSR_TIMEH              0xc81
+#define CSR_INSTRETH           0xc82
+#define CSR_HPMCOUNTER3H       0xc83
+#define CSR_HPMCOUNTER4H       0xc84
+#define CSR_HPMCOUNTER5H       0xc85
+#define CSR_HPMCOUNTER6H       0xc86
+#define CSR_HPMCOUNTER7H       0xc87
+#define CSR_HPMCOUNTER8H       0xc88
+#define CSR_HPMCOUNTER9H       0xc89
+#define CSR_HPMCOUNTER10H      0xc8a
+#define CSR_HPMCOUNTER11H      0xc8b
+#define CSR_HPMCOUNTER12H      0xc8c
+#define CSR_HPMCOUNTER13H      0xc8d
+#define CSR_HPMCOUNTER14H      0xc8e
+#define CSR_HPMCOUNTER15H      0xc8f
+#define CSR_HPMCOUNTER16H      0xc90
+#define CSR_HPMCOUNTER17H      0xc91
+#define CSR_HPMCOUNTER18H      0xc92
+#define CSR_HPMCOUNTER19H      0xc93
+#define CSR_HPMCOUNTER20H      0xc94
+#define CSR_HPMCOUNTER21H      0xc95
+#define CSR_HPMCOUNTER22H      0xc96
+#define CSR_HPMCOUNTER23H      0xc97
+#define CSR_HPMCOUNTER24H      0xc98
+#define CSR_HPMCOUNTER25H      0xc99
+#define CSR_HPMCOUNTER26H      0xc9a
+#define CSR_HPMCOUNTER27H      0xc9b
+#define CSR_HPMCOUNTER28H      0xc9c
+#define CSR_HPMCOUNTER29H      0xc9d
+#define CSR_HPMCOUNTER30H      0xc9e
+#define CSR_HPMCOUNTER31H      0xc9f
+
+#define CSR_SSCOUNTOVF         0xda0
+
+#define CSR_SSTATUS            0x100
+#define CSR_SIE                        0x104
+#define CSR_STVEC              0x105
+#define CSR_SCOUNTEREN         0x106
+#define CSR_SENVCFG            0x10a
+#define CSR_SSTATEEN0          0x10c
+#define CSR_SSCRATCH           0x140
+#define CSR_SEPC               0x141
+#define CSR_SCAUSE             0x142
+#define CSR_STVAL              0x143
+#define CSR_SIP                        0x144
+#define CSR_SATP               0x180
+
+#define CSR_STIMECMP           0x14D
+#define CSR_STIMECMPH          0x15D
+
+/* Supervisor-Level Window to Indirectly Accessed Registers (AIA) */
+#define CSR_SISELECT           0x150
+#define CSR_SIREG              0x151
+
+/* Supervisor-Level Interrupts (AIA) */
+#define CSR_STOPEI             0x15c
+#define CSR_STOPI              0xdb0
+
+/* Supervisor-Level High-Half CSRs (AIA) */
+#define CSR_SIEH               0x114
+#define CSR_SIPH               0x154
+
+#define CSR_VSSTATUS           0x200
+#define CSR_VSIE               0x204
+#define CSR_VSTVEC             0x205
+#define CSR_VSSCRATCH          0x240
+#define CSR_VSEPC              0x241
+#define CSR_VSCAUSE            0x242
+#define CSR_VSTVAL             0x243
+#define CSR_VSIP               0x244
+#define CSR_VSATP              0x280
+#define CSR_VSTIMECMP          0x24D
+#define CSR_VSTIMECMPH         0x25D
+
+#define CSR_HSTATUS            0x600
+#define CSR_HEDELEG            0x602
+#define CSR_HIDELEG            0x603
+#define CSR_HIE                        0x604
+#define CSR_HTIMEDELTA         0x605
+#define CSR_HCOUNTEREN         0x606
+#define CSR_HGEIE              0x607
+#define CSR_HENVCFG            0x60a
+#define CSR_HTIMEDELTAH                0x615
+#define CSR_HENVCFGH           0x61a
+#define CSR_HTVAL              0x643
+#define CSR_HIP                        0x644
+#define CSR_HVIP               0x645
+#define CSR_HTINST             0x64a
+#define CSR_HGATP              0x680
+#define CSR_HGEIP              0xe12
+
+/* Virtual Interrupts and Interrupt Priorities (H-extension with AIA) */
+#define CSR_HVIEN              0x608
+#define CSR_HVICTL             0x609
+#define CSR_HVIPRIO1           0x646
+#define CSR_HVIPRIO2           0x647
+
+/* VS-Level Window to Indirectly Accessed Registers (H-extension with AIA) */
+#define CSR_VSISELECT          0x250
+#define CSR_VSIREG             0x251
+
+/* VS-Level Interrupts (H-extension with AIA) */
+#define CSR_VSTOPEI            0x25c
+#define CSR_VSTOPI             0xeb0
+
+/* Hypervisor and VS-Level High-Half CSRs (H-extension with AIA) */
+#define CSR_HIDELEGH           0x613
+#define CSR_HVIENH             0x618
+#define CSR_HVIPH              0x655
+#define CSR_HVIPRIO1H          0x656
+#define CSR_HVIPRIO2H          0x657
+#define CSR_VSIEH              0x214
+#define CSR_VSIPH              0x254
+
+/* Hypervisor stateen CSRs */
+#define CSR_HSTATEEN0          0x60c
+#define CSR_HSTATEEN0H         0x61c
+
+#define CSR_MSTATUS            0x300
+#define CSR_MISA               0x301
+#define CSR_MIDELEG            0x303
+#define CSR_MIE                        0x304
+#define CSR_MTVEC              0x305
+#define CSR_MENVCFG            0x30a
+#define CSR_MENVCFGH           0x31a
+#define CSR_MSCRATCH           0x340
+#define CSR_MEPC               0x341
+#define CSR_MCAUSE             0x342
+#define CSR_MTVAL              0x343
+#define CSR_MIP                        0x344
+#define CSR_PMPCFG0            0x3a0
+#define CSR_PMPADDR0           0x3b0
+#define CSR_MVENDORID          0xf11
+#define CSR_MARCHID            0xf12
+#define CSR_MIMPID             0xf13
+#define CSR_MHARTID            0xf14
+
+/* Machine-Level Window to Indirectly Accessed Registers (AIA) */
+#define CSR_MISELECT           0x350
+#define CSR_MIREG              0x351
+
+/* Machine-Level Interrupts (AIA) */
+#define CSR_MTOPEI             0x35c
+#define CSR_MTOPI              0xfb0
+
+/* Virtual Interrupts for Supervisor Level (AIA) */
+#define CSR_MVIEN              0x308
+#define CSR_MVIP               0x309
+
+/* Machine-Level High-Half CSRs (AIA) */
+#define CSR_MIDELEGH           0x313
+#define CSR_MIEH               0x314
+#define CSR_MVIENH             0x318
+#define CSR_MVIPH              0x319
+#define CSR_MIPH               0x354
+
+#define CSR_VSTART             0x8
+#define CSR_VCSR               0xf
+#define CSR_VL                 0xc20
+#define CSR_VTYPE              0xc21
+#define CSR_VLENB              0xc22
+
+#ifdef CONFIG_RISCV_M_MODE
+# define CSR_STATUS    CSR_MSTATUS
+# define CSR_IE                CSR_MIE
+# define CSR_TVEC      CSR_MTVEC
+# define CSR_SCRATCH   CSR_MSCRATCH
+# define CSR_EPC       CSR_MEPC
+# define CSR_CAUSE     CSR_MCAUSE
+# define CSR_TVAL      CSR_MTVAL
+# define CSR_IP                CSR_MIP
+
+# define CSR_IEH               CSR_MIEH
+# define CSR_ISELECT   CSR_MISELECT
+# define CSR_IREG      CSR_MIREG
+# define CSR_IPH               CSR_MIPH
+# define CSR_TOPEI     CSR_MTOPEI
+# define CSR_TOPI      CSR_MTOPI
+
+# define SR_IE         SR_MIE
+# define SR_PIE                SR_MPIE
+# define SR_PP         SR_MPP
+
+# define RV_IRQ_SOFT           IRQ_M_SOFT
+# define RV_IRQ_TIMER  IRQ_M_TIMER
+# define RV_IRQ_EXT            IRQ_M_EXT
+#else /* CONFIG_RISCV_M_MODE */
+# define CSR_STATUS    CSR_SSTATUS
+# define CSR_IE                CSR_SIE
+# define CSR_TVEC      CSR_STVEC
+# define CSR_SCRATCH   CSR_SSCRATCH
+# define CSR_EPC       CSR_SEPC
+# define CSR_CAUSE     CSR_SCAUSE
+# define CSR_TVAL      CSR_STVAL
+# define CSR_IP                CSR_SIP
+
+# define CSR_IEH               CSR_SIEH
+# define CSR_ISELECT   CSR_SISELECT
+# define CSR_IREG      CSR_SIREG
+# define CSR_IPH               CSR_SIPH
+# define CSR_TOPEI     CSR_STOPEI
+# define CSR_TOPI      CSR_STOPI
+
+# define SR_IE         SR_SIE
+# define SR_PIE                SR_SPIE
+# define SR_PP         SR_SPP
+
+# define RV_IRQ_SOFT           IRQ_S_SOFT
+# define RV_IRQ_TIMER  IRQ_S_TIMER
+# define RV_IRQ_EXT            IRQ_S_EXT
+# define RV_IRQ_PMU    IRQ_PMU_OVF
+# define SIP_LCOFIP     (_AC(0x1, UL) << IRQ_PMU_OVF)
+
+#endif /* !CONFIG_RISCV_M_MODE */
+
+/* IE/IP (Supervisor/Machine Interrupt Enable/Pending) flags */
+#define IE_SIE         (_AC(0x1, UL) << RV_IRQ_SOFT)
+#define IE_TIE         (_AC(0x1, UL) << RV_IRQ_TIMER)
+#define IE_EIE         (_AC(0x1, UL) << RV_IRQ_EXT)
+
+#ifdef __ASSEMBLY__
+#define __ASM_STR(x)    x
+#else
+#define __ASM_STR(x)    #x
+#endif
+
+#ifndef __ASSEMBLY__
+
+#define csr_swap(csr, val)                                     \
+({                                                             \
+       unsigned long __v = (unsigned long)(val);               \
+       __asm__ __volatile__ ("csrrw %0, " __ASM_STR(csr) ", %1"\
+                             : "=r" (__v) : "rK" (__v)         \
+                             : "memory");                      \
+       __v;                                                    \
+})
+
+#define csr_read(csr)                                          \
+({                                                             \
+       register unsigned long __v;                             \
+       __asm__ __volatile__ ("csrr %0, " __ASM_STR(csr)        \
+                             : "=r" (__v) :                    \
+                             : "memory");                      \
+       __v;                                                    \
+})
+
+#define csr_write(csr, val)                                    \
+({                                                             \
+       unsigned long __v = (unsigned long)(val);               \
+       __asm__ __volatile__ ("csrw " __ASM_STR(csr) ", %0"     \
+                             : : "rK" (__v)                    \
+                             : "memory");                      \
+})
+
+#define csr_read_set(csr, val)                                 \
+({                                                             \
+       unsigned long __v = (unsigned long)(val);               \
+       __asm__ __volatile__ ("csrrs %0, " __ASM_STR(csr) ", %1"\
+                             : "=r" (__v) : "rK" (__v)         \
+                             : "memory");                      \
+       __v;                                                    \
+})
+
+#define csr_set(csr, val)                                      \
+({                                                             \
+       unsigned long __v = (unsigned long)(val);               \
+       __asm__ __volatile__ ("csrs " __ASM_STR(csr) ", %0"     \
+                             : : "rK" (__v)                    \
+                             : "memory");                      \
+})
+
+#define csr_read_clear(csr, val)                               \
+({                                                             \
+       unsigned long __v = (unsigned long)(val);               \
+       __asm__ __volatile__ ("csrrc %0, " __ASM_STR(csr) ", %1"\
+                             : "=r" (__v) : "rK" (__v)         \
+                             : "memory");                      \
+       __v;                                                    \
+})
+
+#define csr_clear(csr, val)                                    \
+({                                                             \
+       unsigned long __v = (unsigned long)(val);               \
+       __asm__ __volatile__ ("csrc " __ASM_STR(csr) ", %0"     \
+                             : : "rK" (__v)                    \
+                             : "memory");                      \
+})
+
+#endif /* __ASSEMBLY__ */
+
+#endif /* _ASM_RISCV_CSR_H */
diff --git a/tools/arch/riscv/include/asm/vdso/processor.h b/tools/arch/riscv/include/asm/vdso/processor.h
new file mode 100644 (file)
index 0000000..662aca0
--- /dev/null
@@ -0,0 +1,32 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
+#ifndef __ASM_VDSO_PROCESSOR_H
+#define __ASM_VDSO_PROCESSOR_H
+
+#ifndef __ASSEMBLY__
+
+#include <asm-generic/barrier.h>
+
+static inline void cpu_relax(void)
+{
+#ifdef __riscv_muldiv
+       int dummy;
+       /* In lieu of a halt instruction, induce a long-latency stall. */
+       __asm__ __volatile__ ("div %0, %0, zero" : "=r" (dummy));
+#endif
+
+#ifdef CONFIG_TOOLCHAIN_HAS_ZIHINTPAUSE
+       /*
+        * Reduce instruction retirement.
+        * This assumes the PC changes.
+        */
+       __asm__ __volatile__ ("pause");
+#else
+       /* Encoding of the pause instruction */
+       __asm__ __volatile__ (".4byte 0x100000F");
+#endif
+       barrier();
+}
+
+#endif /* __ASSEMBLY__ */
+
+#endif /* __ASM_VDSO_PROCESSOR_H */
index 3a19904c2db6935fda03c0a7c9eeaa47e62f823c..3f73ac3ed3a0709a700ae927bca01069a1910665 100644 (file)
@@ -84,7 +84,7 @@
 #define HYPERVISOR_CALLBACK_VECTOR     0xf3
 
 /* Vector for KVM to deliver posted interrupt IPI */
-#ifdef CONFIG_HAVE_KVM
+#if IS_ENABLED(CONFIG_KVM)
 #define POSTED_INTR_VECTOR             0xf2
 #define POSTED_INTR_WAKEUP_VECTOR      0xf1
 #define POSTED_INTR_NESTED_VECTOR      0xf0
index 64df118376df66d6ddeb4895054eb12c7c40942b..1e2ab148d5dbe69adde3ece4b56f29f7fccafce5 100644 (file)
@@ -87,6 +87,7 @@ FEATURE_TESTS_EXTRA :=                  \
          gtk2-infobar                   \
          hello                          \
          libbabeltrace                  \
+         libcapstone                    \
          libbfd-liberty                 \
          libbfd-liberty-z               \
          libopencsd                     \
@@ -134,6 +135,7 @@ FEATURE_DISPLAY ?=              \
          libcrypto              \
          libunwind              \
          libdw-dwarf-unwind     \
+         libcapstone            \
          zlib                   \
          lzma                   \
          get_cpuid              \
index 37722e509eb9f1924380e65542b55937f3e0cc9e..ed54cef450f58b162eb10e6daaea6f0e4718faaf 100644 (file)
@@ -54,6 +54,7 @@ FILES=                                          \
          test-timerfd.bin                       \
          test-libdw-dwarf-unwind.bin            \
          test-libbabeltrace.bin                 \
+         test-libcapstone.bin                  \
          test-compile-32.bin                    \
          test-compile-x32.bin                   \
          test-zlib.bin                          \
@@ -286,6 +287,9 @@ $(OUTPUT)test-libdw-dwarf-unwind.bin:
 $(OUTPUT)test-libbabeltrace.bin:
        $(BUILD) # -lbabeltrace provided by $(FEATURE_CHECK_LDFLAGS-libbabeltrace)
 
+$(OUTPUT)test-libcapstone.bin:
+       $(BUILD) # -lcapstone provided by $(FEATURE_CHECK_LDFLAGS-libcapstone)
+
 $(OUTPUT)test-compile-32.bin:
        $(CC) -m32 -o $@ test-compile.c
 
index 6f4bf386a3b5c4b0255d6e261146ab701cec9462..dd0a18c2ef8fc0809b2f45b12842eeec4396677b 100644 (file)
 #undef main
 #endif
 
+#define main main_test_libcapstone
+# include "test-libcapstone.c"
+#undef main
+
 #define main main_test_lzma
 # include "test-lzma.c"
 #undef main
diff --git a/tools/build/feature/test-libcapstone.c b/tools/build/feature/test-libcapstone.c
new file mode 100644 (file)
index 0000000..fbe8dba
--- /dev/null
@@ -0,0 +1,11 @@
+// SPDX-License-Identifier: GPL-2.0
+
+#include <capstone/capstone.h>
+
+int main(void)
+{
+       csh handle;
+
+       cs_open(CS_ARCH_X86, CS_MODE_64, &handle);
+       return 0;
+}
index 79de3638a01abeef61f733d148704684091b4549..bb0e671be96d57bf53bd0b91c2ac55f8f92ab101 100755 (executable)
@@ -138,12 +138,14 @@ class TestInvalidSignature(DynamicBoostControlTest):
 
     def test_authenticated_nonce(self) -> None:
         """fetch authenticated nonce"""
+        get_nonce(self.d, None)
         with self.assertRaises(OSError) as error:
             get_nonce(self.d, self.signature)
-        self.assertEqual(error.exception.errno, 1)
+        self.assertEqual(error.exception.errno, 22)
 
     def test_set_uid(self) -> None:
         """set uid"""
+        get_nonce(self.d, None)
         with self.assertRaises(OSError) as error:
             set_uid(self.d, self.uid, self.signature)
         self.assertEqual(error.exception.errno, 1)
@@ -152,13 +154,13 @@ class TestInvalidSignature(DynamicBoostControlTest):
         """fetch a parameter"""
         with self.assertRaises(OSError) as error:
             process_param(self.d, PARAM_GET_SOC_PWR_CUR, self.signature)
-        self.assertEqual(error.exception.errno, 1)
+        self.assertEqual(error.exception.errno, 11)
 
     def test_set_param(self) -> None:
         """set a parameter"""
         with self.assertRaises(OSError) as error:
             process_param(self.d, PARAM_SET_PWR_CAP, self.signature, 1000)
-        self.assertEqual(error.exception.errno, 1)
+        self.assertEqual(error.exception.errno, 11)
 
 
 class TestUnFusedSystem(DynamicBoostControlTest):
index 058e3ff10f9b2849fd25b35164472ec9f949adca..c6d67fc9e57ef0554675bb29bdfeaf5c116fd1df 100644 (file)
@@ -248,10 +248,10 @@ u64 perf_evlist__read_format(struct perf_evlist *evlist)
 
 static void perf_evlist__id_hash(struct perf_evlist *evlist,
                                 struct perf_evsel *evsel,
-                                int cpu, int thread, u64 id)
+                                int cpu_map_idx, int thread, u64 id)
 {
        int hash;
-       struct perf_sample_id *sid = SID(evsel, cpu, thread);
+       struct perf_sample_id *sid = SID(evsel, cpu_map_idx, thread);
 
        sid->id = id;
        sid->evsel = evsel;
@@ -269,21 +269,27 @@ void perf_evlist__reset_id_hash(struct perf_evlist *evlist)
 
 void perf_evlist__id_add(struct perf_evlist *evlist,
                         struct perf_evsel *evsel,
-                        int cpu, int thread, u64 id)
+                        int cpu_map_idx, int thread, u64 id)
 {
-       perf_evlist__id_hash(evlist, evsel, cpu, thread, id);
+       if (!SID(evsel, cpu_map_idx, thread))
+               return;
+
+       perf_evlist__id_hash(evlist, evsel, cpu_map_idx, thread, id);
        evsel->id[evsel->ids++] = id;
 }
 
 int perf_evlist__id_add_fd(struct perf_evlist *evlist,
                           struct perf_evsel *evsel,
-                          int cpu, int thread, int fd)
+                          int cpu_map_idx, int thread, int fd)
 {
        u64 read_data[4] = { 0, };
        int id_idx = 1; /* The first entry is the counter value */
        u64 id;
        int ret;
 
+       if (!SID(evsel, cpu_map_idx, thread))
+               return -1;
+
        ret = ioctl(fd, PERF_EVENT_IOC_ID, &id);
        if (!ret)
                goto add;
@@ -312,7 +318,7 @@ int perf_evlist__id_add_fd(struct perf_evlist *evlist,
        id = read_data[id_idx];
 
 add:
-       perf_evlist__id_add(evlist, evsel, cpu, thread, id);
+       perf_evlist__id_add(evlist, evsel, cpu_map_idx, thread, id);
        return 0;
 }
 
index d86ffe8ed483e231e4f107decb2cdab74d91b0e1..f43bdb9b6227ca7f0a7a1f8ddca4e63f08024720 100644 (file)
@@ -126,11 +126,11 @@ u64 perf_evlist__read_format(struct perf_evlist *evlist);
 
 void perf_evlist__id_add(struct perf_evlist *evlist,
                         struct perf_evsel *evsel,
-                        int cpu, int thread, u64 id);
+                        int cpu_map_idx, int thread, u64 id);
 
 int perf_evlist__id_add_fd(struct perf_evlist *evlist,
                           struct perf_evsel *evsel,
-                          int cpu, int thread, int fd);
+                          int cpu_map_idx, int thread, int fd);
 
 void perf_evlist__reset_id_hash(struct perf_evlist *evlist);
 
index 5cdac21625322bda990163e51e43236e9f9bb89b..d435eb42354bfe263e57cdce0fa8ecc4b9a7729b 100644 (file)
@@ -122,6 +122,8 @@ int start_command(struct child_process *cmd)
                }
                if (cmd->preexec_cb)
                        cmd->preexec_cb();
+               if (cmd->no_exec_cmd)
+                       exit(cmd->no_exec_cmd(cmd));
                if (cmd->exec_cmd) {
                        execv_cmd(cmd->argv);
                } else {
index 17d969c6add3bb7948bcea1ec4c346fa46298765..d794138a797f4aec5929a967c1526cded56fc665 100644 (file)
@@ -47,6 +47,8 @@ struct child_process {
        unsigned exec_cmd:1; /* if this is to be external sub-command */
        unsigned stdout_to_stderr:1;
        void (*preexec_cb)(void);
+        /* If set, call function in child rather than doing an exec. */
+       int (*no_exec_cmd)(struct child_process *process);
 };
 
 int start_command(struct child_process *);
index 1c5606cc33346bd29b523deb466cc739a0c6bb33..7bb03606b9eaa2e3763962f240dfb653d2b6b8f7 100644 (file)
@@ -3,7 +3,8 @@
 #
 include ../scripts/Makefile.include
 
-TARGETS=page-types slabinfo page_owner_sort
+BUILD_TARGETS=page-types slabinfo page_owner_sort
+INSTALL_TARGETS = $(BUILD_TARGETS) thpmaps
 
 LIB_DIR = ../lib/api
 LIBS = $(LIB_DIR)/libapi.a
@@ -11,9 +12,9 @@ LIBS = $(LIB_DIR)/libapi.a
 CFLAGS += -Wall -Wextra -I../lib/ -pthread
 LDFLAGS += $(LIBS) -pthread
 
-all: $(TARGETS)
+all: $(BUILD_TARGETS)
 
-$(TARGETS): $(LIBS)
+$(BUILD_TARGETS): $(LIBS)
 
 $(LIBS):
        make -C $(LIB_DIR)
@@ -29,4 +30,4 @@ sbindir ?= /usr/sbin
 
 install: all
        install -d $(DESTDIR)$(sbindir)
-       install -m 755 -p $(TARGETS) $(DESTDIR)$(sbindir)
+       install -m 755 -p $(INSTALL_TARGETS) $(DESTDIR)$(sbindir)
diff --git a/tools/mm/thpmaps b/tools/mm/thpmaps
new file mode 100644 (file)
index 0000000..803e031
--- /dev/null
@@ -0,0 +1,675 @@
+#!/usr/bin/env python3
+# SPDX-License-Identifier: GPL-2.0-only
+# Copyright (C) 2024 ARM Ltd.
+#
+# Utility providing smaps-like output detailing transparent hugepage usage.
+# For more info, run:
+# ./thpmaps --help
+#
+# Requires numpy:
+# pip3 install numpy
+
+
+import argparse
+import collections
+import math
+import os
+import re
+import resource
+import shutil
+import sys
+import textwrap
+import time
+import numpy as np
+
+
+with open('/sys/kernel/mm/transparent_hugepage/hpage_pmd_size') as f:
+    PAGE_SIZE = resource.getpagesize()
+    PAGE_SHIFT = int(math.log2(PAGE_SIZE))
+    PMD_SIZE = int(f.read())
+    PMD_ORDER = int(math.log2(PMD_SIZE / PAGE_SIZE))
+
+
+def align_forward(v, a):
+    return (v + (a - 1)) & ~(a - 1)
+
+
+def align_offset(v, a):
+    return v & (a - 1)
+
+
+def kbnr(kb):
+    # Convert KB to number of pages.
+    return (kb << 10) >> PAGE_SHIFT
+
+
+def nrkb(nr):
+    # Convert number of pages to KB.
+    return (nr << PAGE_SHIFT) >> 10
+
+
+def odkb(order):
+    # Convert page order to KB.
+    return (PAGE_SIZE << order) >> 10
+
+
+def cont_ranges_all(search, index):
+    # Given a list of arrays, find the ranges for which values are monotonically
+    # incrementing in all arrays. all arrays in search and index must be the
+    # same size.
+    sz = len(search[0])
+    r = np.full(sz, 2)
+    d = np.diff(search[0]) == 1
+    for dd in [np.diff(arr) == 1 for arr in search[1:]]:
+        d &= dd
+    r[1:] -= d
+    r[:-1] -= d
+    return [np.repeat(arr, r).reshape(-1, 2) for arr in index]
+
+
+class ArgException(Exception):
+    pass
+
+
+class FileIOException(Exception):
+    pass
+
+
+class BinArrayFile:
+    # Base class used to read /proc/<pid>/pagemap and /proc/kpageflags into a
+    # numpy array. Use inherrited class in a with clause to ensure file is
+    # closed when it goes out of scope.
+    def __init__(self, filename, element_size):
+        self.element_size = element_size
+        self.filename = filename
+        self.fd = os.open(self.filename, os.O_RDONLY)
+
+    def cleanup(self):
+        os.close(self.fd)
+
+    def __enter__(self):
+        return self
+
+    def __exit__(self, exc_type, exc_val, exc_tb):
+        self.cleanup()
+
+    def _readin(self, offset, buffer):
+        length = os.preadv(self.fd, (buffer,), offset)
+        if len(buffer) != length:
+            raise FileIOException('error: {} failed to read {} bytes at {:x}'
+                            .format(self.filename, len(buffer), offset))
+
+    def _toarray(self, buf):
+        assert(self.element_size == 8)
+        return np.frombuffer(buf, dtype=np.uint64)
+
+    def getv(self, vec):
+        vec *= self.element_size
+        offsets = vec[:, 0]
+        lengths = (np.diff(vec) + self.element_size).reshape(len(vec))
+        buf = bytearray(int(np.sum(lengths)))
+        view = memoryview(buf)
+        pos = 0
+        for offset, length in zip(offsets, lengths):
+            offset = int(offset)
+            length = int(length)
+            self._readin(offset, view[pos:pos+length])
+            pos += length
+        return self._toarray(buf)
+
+    def get(self, index, nr=1):
+        offset = index * self.element_size
+        length = nr * self.element_size
+        buf = bytearray(length)
+        self._readin(offset, buf)
+        return self._toarray(buf)
+
+
+PM_PAGE_PRESENT = 1 << 63
+PM_PFN_MASK = (1 << 55) - 1
+
+class PageMap(BinArrayFile):
+    # Read ranges of a given pid's pagemap into a numpy array.
+    def __init__(self, pid='self'):
+        super().__init__(f'/proc/{pid}/pagemap', 8)
+
+
+KPF_ANON = 1 << 12
+KPF_COMPOUND_HEAD = 1 << 15
+KPF_COMPOUND_TAIL = 1 << 16
+KPF_THP = 1 << 22
+
+class KPageFlags(BinArrayFile):
+    # Read ranges of /proc/kpageflags into a numpy array.
+    def __init__(self):
+         super().__init__(f'/proc/kpageflags', 8)
+
+
+vma_all_stats = set([
+    "Size",
+    "Rss",
+    "Pss",
+    "Pss_Dirty",
+    "Shared_Clean",
+    "Shared_Dirty",
+    "Private_Clean",
+    "Private_Dirty",
+    "Referenced",
+    "Anonymous",
+    "KSM",
+    "LazyFree",
+    "AnonHugePages",
+    "ShmemPmdMapped",
+    "FilePmdMapped",
+    "Shared_Hugetlb",
+    "Private_Hugetlb",
+    "Swap",
+    "SwapPss",
+    "Locked",
+])
+
+vma_min_stats = set([
+    "Rss",
+    "Anonymous",
+    "AnonHugePages",
+    "ShmemPmdMapped",
+    "FilePmdMapped",
+])
+
+VMA = collections.namedtuple('VMA', [
+    'name',
+    'start',
+    'end',
+    'read',
+    'write',
+    'execute',
+    'private',
+    'pgoff',
+    'major',
+    'minor',
+    'inode',
+    'stats',
+])
+
+class VMAList:
+    # A container for VMAs, parsed from /proc/<pid>/smaps. Iterate over the
+    # instance to receive VMAs.
+    def __init__(self, pid='self', stats=[]):
+        self.vmas = []
+        with open(f'/proc/{pid}/smaps', 'r') as file:
+            for line in file:
+                elements = line.split()
+                if '-' in elements[0]:
+                    start, end = map(lambda x: int(x, 16), elements[0].split('-'))
+                    major, minor = map(lambda x: int(x, 16), elements[3].split(':'))
+                    self.vmas.append(VMA(
+                        name=elements[5] if len(elements) == 6 else '',
+                        start=start,
+                        end=end,
+                        read=elements[1][0] == 'r',
+                        write=elements[1][1] == 'w',
+                        execute=elements[1][2] == 'x',
+                        private=elements[1][3] == 'p',
+                        pgoff=int(elements[2], 16),
+                        major=major,
+                        minor=minor,
+                        inode=int(elements[4], 16),
+                        stats={},
+                    ))
+                else:
+                    param = elements[0][:-1]
+                    if param in stats:
+                        value = int(elements[1])
+                        self.vmas[-1].stats[param] = {'type': None, 'value': value}
+
+    def __iter__(self):
+        yield from self.vmas
+
+
+def thp_parse(vma, kpageflags, ranges, indexes, vfns, pfns, anons, heads):
+    # Given 4 same-sized arrays representing a range within a page table backed
+    # by THPs (vfns: virtual frame numbers, pfns: physical frame numbers, anons:
+    # True if page is anonymous, heads: True if page is head of a THP), return a
+    # dictionary of statistics describing the mapped THPs.
+    stats = {
+        'file': {
+            'partial': 0,
+            'aligned': [0] * (PMD_ORDER + 1),
+            'unaligned': [0] * (PMD_ORDER + 1),
+        },
+        'anon': {
+            'partial': 0,
+            'aligned': [0] * (PMD_ORDER + 1),
+            'unaligned': [0] * (PMD_ORDER + 1),
+        },
+    }
+
+    for rindex, rpfn in zip(ranges[0], ranges[2]):
+        index_next = int(rindex[0])
+        index_end = int(rindex[1]) + 1
+        pfn_end = int(rpfn[1]) + 1
+
+        folios = indexes[index_next:index_end][heads[index_next:index_end]]
+
+        # Account pages for any partially mapped THP at the front. In that case,
+        # the first page of the range is a tail.
+        nr = (int(folios[0]) if len(folios) else index_end) - index_next
+        stats['anon' if anons[index_next] else 'file']['partial'] += nr
+
+        # Account pages for any partially mapped THP at the back. In that case,
+        # the next page after the range is a tail.
+        if len(folios):
+            flags = int(kpageflags.get(pfn_end)[0])
+            if flags & KPF_COMPOUND_TAIL:
+                nr = index_end - int(folios[-1])
+                folios = folios[:-1]
+                index_end -= nr
+                stats['anon' if anons[index_end - 1] else 'file']['partial'] += nr
+
+        # Account fully mapped THPs in the middle of the range.
+        if len(folios):
+            folio_nrs = np.append(np.diff(folios), np.uint64(index_end - folios[-1]))
+            folio_orders = np.log2(folio_nrs).astype(np.uint64)
+            for index, order in zip(folios, folio_orders):
+                index = int(index)
+                order = int(order)
+                nr = 1 << order
+                vfn = int(vfns[index])
+                align = 'aligned' if align_forward(vfn, nr) == vfn else 'unaligned'
+                anon = 'anon' if anons[index] else 'file'
+                stats[anon][align][order] += nr
+
+    # Account PMD-mapped THPs spearately, so filter out of the stats. There is a
+    # race between acquiring the smaps stats and reading pagemap, where memory
+    # could be deallocated. So clamp to zero incase it would have gone negative.
+    anon_pmd_mapped = vma.stats['AnonHugePages']['value']
+    file_pmd_mapped = vma.stats['ShmemPmdMapped']['value'] + \
+                      vma.stats['FilePmdMapped']['value']
+    stats['anon']['aligned'][PMD_ORDER] = max(0, stats['anon']['aligned'][PMD_ORDER] - kbnr(anon_pmd_mapped))
+    stats['file']['aligned'][PMD_ORDER] = max(0, stats['file']['aligned'][PMD_ORDER] - kbnr(file_pmd_mapped))
+
+    rstats = {
+        f"anon-thp-pmd-aligned-{odkb(PMD_ORDER)}kB": {'type': 'anon', 'value': anon_pmd_mapped},
+        f"file-thp-pmd-aligned-{odkb(PMD_ORDER)}kB": {'type': 'file', 'value': file_pmd_mapped},
+    }
+
+    def flatten_sub(type, subtype, stats):
+        param = f"{type}-thp-pte-{subtype}-{{}}kB"
+        for od, nr in enumerate(stats[2:], 2):
+            rstats[param.format(odkb(od))] = {'type': type, 'value': nrkb(nr)}
+
+    def flatten_type(type, stats):
+        flatten_sub(type, 'aligned', stats['aligned'])
+        flatten_sub(type, 'unaligned', stats['unaligned'])
+        rstats[f"{type}-thp-pte-partial"] = {'type': type, 'value': nrkb(stats['partial'])}
+
+    flatten_type('anon', stats['anon'])
+    flatten_type('file', stats['file'])
+
+    return rstats
+
+
+def cont_parse(vma, order, ranges, anons, heads):
+    # Given 4 same-sized arrays representing a range within a page table backed
+    # by THPs (vfns: virtual frame numbers, pfns: physical frame numbers, anons:
+    # True if page is anonymous, heads: True if page is head of a THP), return a
+    # dictionary of statistics describing the contiguous blocks.
+    nr_cont = 1 << order
+    nr_anon = 0
+    nr_file = 0
+
+    for rindex, rvfn, rpfn in zip(*ranges):
+        index_next = int(rindex[0])
+        index_end = int(rindex[1]) + 1
+        vfn_start = int(rvfn[0])
+        pfn_start = int(rpfn[0])
+
+        if align_offset(pfn_start, nr_cont) != align_offset(vfn_start, nr_cont):
+            continue
+
+        off = align_forward(vfn_start, nr_cont) - vfn_start
+        index_next += off
+
+        while index_next + nr_cont <= index_end:
+            folio_boundary = heads[index_next+1:index_next+nr_cont].any()
+            if not folio_boundary:
+                if anons[index_next]:
+                    nr_anon += nr_cont
+                else:
+                    nr_file += nr_cont
+            index_next += nr_cont
+
+    # Account blocks that are PMD-mapped spearately, so filter out of the stats.
+    # There is a race between acquiring the smaps stats and reading pagemap,
+    # where memory could be deallocated. So clamp to zero incase it would have
+    # gone negative.
+    anon_pmd_mapped = vma.stats['AnonHugePages']['value']
+    file_pmd_mapped = vma.stats['ShmemPmdMapped']['value'] + \
+                    vma.stats['FilePmdMapped']['value']
+    nr_anon = max(0, nr_anon - kbnr(anon_pmd_mapped))
+    nr_file = max(0, nr_file - kbnr(file_pmd_mapped))
+
+    rstats = {
+        f"anon-cont-pmd-aligned-{nrkb(nr_cont)}kB": {'type': 'anon', 'value': anon_pmd_mapped},
+        f"file-cont-pmd-aligned-{nrkb(nr_cont)}kB": {'type': 'file', 'value': file_pmd_mapped},
+    }
+
+    rstats[f"anon-cont-pte-aligned-{nrkb(nr_cont)}kB"] = {'type': 'anon', 'value': nrkb(nr_anon)}
+    rstats[f"file-cont-pte-aligned-{nrkb(nr_cont)}kB"] = {'type': 'file', 'value': nrkb(nr_file)}
+
+    return rstats
+
+
+def vma_print(vma, pid):
+    # Prints a VMA instance in a format similar to smaps. The main difference is
+    # that the pid is included as the first value.
+    print("{:010d}: {:016x}-{:016x} {}{}{}{} {:08x} {:02x}:{:02x} {:08x} {}"
+        .format(
+            pid, vma.start, vma.end,
+            'r' if vma.read else '-', 'w' if vma.write else '-',
+            'x' if vma.execute else '-', 'p' if vma.private else 's',
+            vma.pgoff, vma.major, vma.minor, vma.inode, vma.name
+        ))
+
+
+def stats_print(stats, tot_anon, tot_file, inc_empty):
+    # Print a statistics dictionary.
+    label_field = 32
+    for label, stat in stats.items():
+        type = stat['type']
+        value = stat['value']
+        if value or inc_empty:
+            pad = max(0, label_field - len(label) - 1)
+            if type == 'anon' and tot_anon > 0:
+                percent = f' ({value / tot_anon:3.0%})'
+            elif type == 'file' and tot_file > 0:
+                percent = f' ({value / tot_file:3.0%})'
+            else:
+                percent = ''
+            print(f"{label}:{' ' * pad}{value:8} kB{percent}")
+
+
+def vma_parse(vma, pagemap, kpageflags, contorders):
+    # Generate thp and cont statistics for a single VMA.
+    start = vma.start >> PAGE_SHIFT
+    end = vma.end >> PAGE_SHIFT
+
+    pmes = pagemap.get(start, end - start)
+    present = pmes & PM_PAGE_PRESENT != 0
+    pfns = pmes & PM_PFN_MASK
+    pfns = pfns[present]
+    vfns = np.arange(start, end, dtype=np.uint64)
+    vfns = vfns[present]
+
+    pfn_vec = cont_ranges_all([pfns], [pfns])[0]
+    flags = kpageflags.getv(pfn_vec)
+    anons = flags & KPF_ANON != 0
+    heads = flags & KPF_COMPOUND_HEAD != 0
+    thps = flags & KPF_THP != 0
+
+    vfns = vfns[thps]
+    pfns = pfns[thps]
+    anons = anons[thps]
+    heads = heads[thps]
+
+    indexes = np.arange(len(vfns), dtype=np.uint64)
+    ranges = cont_ranges_all([vfns, pfns], [indexes, vfns, pfns])
+
+    thpstats = thp_parse(vma, kpageflags, ranges, indexes, vfns, pfns, anons, heads)
+    contstats = [cont_parse(vma, order, ranges, anons, heads) for order in contorders]
+
+    tot_anon = vma.stats['Anonymous']['value']
+    tot_file = vma.stats['Rss']['value'] - tot_anon
+
+    return {
+        **thpstats,
+        **{k: v for s in contstats for k, v in s.items()}
+    }, tot_anon, tot_file
+
+
+def do_main(args):
+    pids = set()
+    rollup = {}
+    rollup_anon = 0
+    rollup_file = 0
+
+    if args.cgroup:
+        strict = False
+        for walk_info in os.walk(args.cgroup):
+            cgroup = walk_info[0]
+            with open(f'{cgroup}/cgroup.procs') as pidfile:
+                for line in pidfile.readlines():
+                    pids.add(int(line.strip()))
+    elif args.pid:
+        strict = True
+        pids = pids.union(args.pid)
+    else:
+        strict = False
+        for pid in os.listdir('/proc'):
+            if pid.isdigit():
+                pids.add(int(pid))
+
+    if not args.rollup:
+        print("       PID             START              END PROT   OFFSET   DEV    INODE OBJECT")
+
+    for pid in pids:
+        try:
+            with PageMap(pid) as pagemap:
+                with KPageFlags() as kpageflags:
+                    for vma in VMAList(pid, vma_all_stats if args.inc_smaps else vma_min_stats):
+                        if (vma.read or vma.write or vma.execute) and vma.stats['Rss']['value'] > 0:
+                            stats, vma_anon, vma_file = vma_parse(vma, pagemap, kpageflags, args.cont)
+                        else:
+                            stats = {}
+                            vma_anon = 0
+                            vma_file = 0
+                        if args.inc_smaps:
+                            stats = {**vma.stats, **stats}
+                        if args.rollup:
+                            for k, v in stats.items():
+                                if k in rollup:
+                                    assert(rollup[k]['type'] == v['type'])
+                                    rollup[k]['value'] += v['value']
+                                else:
+                                    rollup[k] = v
+                            rollup_anon += vma_anon
+                            rollup_file += vma_file
+                        else:
+                            vma_print(vma, pid)
+                            stats_print(stats, vma_anon, vma_file, args.inc_empty)
+        except (FileNotFoundError, ProcessLookupError, FileIOException):
+            if strict:
+                raise
+
+    if args.rollup:
+        stats_print(rollup, rollup_anon, rollup_file, args.inc_empty)
+
+
+def main():
+    docs_width = shutil.get_terminal_size().columns
+    docs_width -= 2
+    docs_width = min(80, docs_width)
+
+    def format(string):
+        text = re.sub(r'\s+', ' ', string)
+        text = re.sub(r'\s*\\n\s*', '\n', text)
+        paras = text.split('\n')
+        paras = [textwrap.fill(p, width=docs_width) for p in paras]
+        return '\n'.join(paras)
+
+    def formatter(prog):
+        return argparse.RawDescriptionHelpFormatter(prog, width=docs_width)
+
+    def size2order(human):
+        units = {
+            "K": 2**10, "M": 2**20, "G": 2**30,
+            "k": 2**10, "m": 2**20, "g": 2**30,
+        }
+        unit = 1
+        if human[-1] in units:
+            unit = units[human[-1]]
+            human = human[:-1]
+        try:
+            size = int(human)
+        except ValueError:
+            raise ArgException('error: --cont value must be integer size with optional KMG unit')
+        size *= unit
+        order = int(math.log2(size / PAGE_SIZE))
+        if order < 1:
+            raise ArgException('error: --cont value must be size of at least 2 pages')
+        if (1 << order) * PAGE_SIZE != size:
+            raise ArgException('error: --cont value must be size of power-of-2 pages')
+        if order > PMD_ORDER:
+            raise ArgException('error: --cont value must be less than or equal to PMD order')
+        return order
+
+    parser = argparse.ArgumentParser(formatter_class=formatter,
+        description=format("""Prints information about how transparent huge
+                    pages are mapped, either system-wide, or for a specified
+                    process or cgroup.\\n
+                    \\n
+                    When run with --pid, the user explicitly specifies the set
+                    of pids to scan. e.g. "--pid 10 [--pid 134 ...]". When run
+                    with --cgroup, the user passes either a v1 or v2 cgroup and
+                    all pids that belong to the cgroup subtree are scanned. When
+                    run with neither --pid nor --cgroup, the full set of pids on
+                    the system is gathered from /proc and scanned as if the user
+                    had provided "--pid 1 --pid 2 ...".\\n
+                    \\n
+                    A default set of statistics is always generated for THP
+                    mappings. However, it is also possible to generate
+                    additional statistics for "contiguous block mappings" where
+                    the block size is user-defined.\\n
+                    \\n
+                    Statistics are maintained independently for anonymous and
+                    file-backed (pagecache) memory and are shown both in kB and
+                    as a percentage of either total anonymous or total
+                    file-backed memory as appropriate.\\n
+                    \\n
+                    THP Statistics\\n
+                    --------------\\n
+                    \\n
+                    Statistics are always generated for fully- and
+                    contiguously-mapped THPs whose mapping address is aligned to
+                    their size, for each <size> supported by the system.
+                    Separate counters describe THPs mapped by PTE vs those
+                    mapped by PMD. (Although note a THP can only be mapped by
+                    PMD if it is PMD-sized):\\n
+                    \\n
+                    - anon-thp-pte-aligned-<size>kB\\n
+                    - file-thp-pte-aligned-<size>kB\\n
+                    - anon-thp-pmd-aligned-<size>kB\\n
+                    - file-thp-pmd-aligned-<size>kB\\n
+                    \\n
+                    Similarly, statistics are always generated for fully- and
+                    contiguously-mapped THPs whose mapping address is *not*
+                    aligned to their size, for each <size> supported by the
+                    system. Due to the unaligned mapping, it is impossible to
+                    map by PMD, so there are only PTE counters for this case:\\n
+                    \\n
+                    - anon-thp-pte-unaligned-<size>kB\\n
+                    - file-thp-pte-unaligned-<size>kB\\n
+                    \\n
+                    Statistics are also always generated for mapped pages that
+                    belong to a THP but where the is THP is *not* fully- and
+                    contiguously- mapped. These "partial" mappings are all
+                    counted in the same counter regardless of the size of the
+                    THP that is partially mapped:\\n
+                    \\n
+                    - anon-thp-pte-partial\\n
+                    - file-thp-pte-partial\\n
+                    \\n
+                    Contiguous Block Statistics\\n
+                    ---------------------------\\n
+                    \\n
+                    An optional, additional set of statistics is generated for
+                    every contiguous block size specified with `--cont <size>`.
+                    These statistics show how much memory is mapped in
+                    contiguous blocks of <size> and also aligned to <size>. A
+                    given contiguous block must all belong to the same THP, but
+                    there is no requirement for it to be the *whole* THP.
+                    Separate counters describe contiguous blocks mapped by PTE
+                    vs those mapped by PMD:\\n
+                    \\n
+                    - anon-cont-pte-aligned-<size>kB\\n
+                    - file-cont-pte-aligned-<size>kB\\n
+                    - anon-cont-pmd-aligned-<size>kB\\n
+                    - file-cont-pmd-aligned-<size>kB\\n
+                    \\n
+                    As an example, if monitoring 64K contiguous blocks (--cont
+                    64K), there are a number of sources that could provide such
+                    blocks: a fully- and contiguously-mapped 64K THP that is
+                    aligned to a 64K boundary would provide 1 block. A fully-
+                    and contiguously-mapped 128K THP that is aligned to at least
+                    a 64K boundary would provide 2 blocks. Or a 128K THP that
+                    maps its first 100K, but contiguously and starting at a 64K
+                    boundary would provide 1 block. A fully- and
+                    contiguously-mapped 2M THP would provide 32 blocks. There
+                    are many other possible permutations.\\n"""),
+        epilog=format("""Requires root privilege to access pagemap and
+                    kpageflags."""))
+
+    group = parser.add_mutually_exclusive_group(required=False)
+    group.add_argument('--pid',
+        metavar='pid', required=False, type=int, default=[], action='append',
+        help="""Process id of the target process. Maybe issued multiple times to
+            scan multiple processes. --pid and --cgroup are mutually exclusive.
+            If neither are provided, all processes are scanned to provide
+            system-wide information.""")
+
+    group.add_argument('--cgroup',
+        metavar='path', required=False,
+        help="""Path to the target cgroup in sysfs. Iterates over every pid in
+            the cgroup and its children. --pid and --cgroup are mutually
+            exclusive. If neither are provided, all processes are scanned to
+            provide system-wide information.""")
+
+    parser.add_argument('--rollup',
+        required=False, default=False, action='store_true',
+        help="""Sum the per-vma statistics to provide a summary over the whole
+            system, process or cgroup.""")
+
+    parser.add_argument('--cont',
+        metavar='size[KMG]', required=False, default=[], action='append',
+        help="""Adds stats for memory that is mapped in contiguous blocks of
+            <size> and also aligned to <size>. May be issued multiple times to
+            track multiple sized blocks. Useful to infer e.g. arm64 contpte and
+            hpa mappings. Size must be a power-of-2 number of pages.""")
+
+    parser.add_argument('--inc-smaps',
+        required=False, default=False, action='store_true',
+        help="""Include all numerical, additive /proc/<pid>/smaps stats in the
+            output.""")
+
+    parser.add_argument('--inc-empty',
+        required=False, default=False, action='store_true',
+        help="""Show all statistics including those whose value is 0.""")
+
+    parser.add_argument('--periodic',
+        metavar='sleep_ms', required=False, type=int,
+        help="""Run in a loop, polling every sleep_ms milliseconds.""")
+
+    args = parser.parse_args()
+
+    try:
+        args.cont = [size2order(cont) for cont in args.cont]
+    except ArgException as e:
+        parser.print_usage()
+        raise
+
+    if args.periodic:
+        while True:
+            do_main(args)
+            print()
+            time.sleep(args.periodic / 1000)
+    else:
+        do_main(args)
+
+
+if __name__ == "__main__":
+    try:
+        main()
+    except Exception as e:
+        prog = os.path.basename(sys.argv[0])
+        print(f'{prog}: {e}')
+        exit(1)
index 3a301696f005127622b63a86165deaf5e66a4cae..7ebf29c9118491193a0fa51bc33b78860b8c6e12 100644 (file)
@@ -13,7 +13,6 @@ NORETURN(__reiserfs_panic)
 NORETURN(__stack_chk_fail)
 NORETURN(__tdx_hypercall_failed)
 NORETURN(__ubsan_handle_builtin_unreachable)
-NORETURN(arch_call_rest_init)
 NORETURN(arch_cpu_idle_dead)
 NORETURN(bch2_trans_in_restart_error)
 NORETURN(bch2_trans_restart_error)
index 2109690b0d5fd5b122ac498a8bafd91e82faa4ac..59ab1ff9d75f7e3a3d35c5d6df2ac20569fd4d6d 100644 (file)
@@ -115,9 +115,13 @@ toggle respectively.
 
 perf script also supports higher level ways to dump instruction traces:
 
+       perf script --insn-trace=disasm
+
+or to use the xed disassembler, which requires installing the xed tool
+(see XED below):
+
        perf script --insn-trace --xed
 
-Dump all instructions. This requires installing the xed tool (see XED below)
 Dumping all instructions in a long trace can be fairly slow. It is usually better
 to start with higher level decoding, like
 
@@ -130,12 +134,12 @@ or
 and then select a time range of interest. The time range can then be examined
 in detail with
 
-       perf script --time starttime,stoptime --insn-trace --xed
+       perf script --time starttime,stoptime --insn-trace=disasm
 
 While examining the trace it's also useful to filter on specific CPUs using
 the -C option
 
-       perf script --time starttime,stoptime --insn-trace --xed -C 1
+       perf script --time starttime,stoptime --insn-trace=disasm -C 1
 
 Dump all instructions in time range on CPU 1.
 
@@ -1306,7 +1310,7 @@ Without timestamps, --per-thread must be specified to distinguish threads.
 
 perf script can be used to provide an instruction trace
 
- $ perf script --guestkallsyms $KALLSYMS --insn-trace --xed -F+ipc | grep -C10 vmresume | head -21
+ $ perf script --guestkallsyms $KALLSYMS --insn-trace=disasm -F+ipc | grep -C10 vmresume | head -21
        CPU 0/KVM  1440  ffffffff82133cdd __vmx_vcpu_run+0x3d ([kernel.kallsyms])                movq  0x48(%rax), %r9
        CPU 0/KVM  1440  ffffffff82133ce1 __vmx_vcpu_run+0x41 ([kernel.kallsyms])                movq  0x50(%rax), %r10
        CPU 0/KVM  1440  ffffffff82133ce5 __vmx_vcpu_run+0x45 ([kernel.kallsyms])                movq  0x58(%rax), %r11
@@ -1407,7 +1411,7 @@ There were none.
 
 'perf script' can be used to provide an instruction trace showing timestamps
 
- $ perf script -i perf.data.kvm --guestkallsyms $KALLSYMS --insn-trace --xed -F+ipc | grep -C10 vmresume | head -21
+ $ perf script -i perf.data.kvm --guestkallsyms $KALLSYMS --insn-trace=disasm -F+ipc | grep -C10 vmresume | head -21
        CPU 1/KVM 17006 [001] 11500.262865593:  ffffffff82133cdd __vmx_vcpu_run+0x3d ([kernel.kallsyms])                 movq  0x48(%rax), %r9
        CPU 1/KVM 17006 [001] 11500.262865593:  ffffffff82133ce1 __vmx_vcpu_run+0x41 ([kernel.kallsyms])                 movq  0x50(%rax), %r10
        CPU 1/KVM 17006 [001] 11500.262865593:  ffffffff82133ce5 __vmx_vcpu_run+0x45 ([kernel.kallsyms])                 movq  0x58(%rax), %r11
index 38f59ac064f7d4615daf5e1bba57a7045ba4c597..d8b863e01fe082fc283300edf15627535a6b7661 100644 (file)
@@ -531,8 +531,35 @@ include::itrace.txt[]
 --raw-trace::
        When displaying traceevent output, do not use print fmt or plugins.
 
+-H::
 --hierarchy::
-       Enable hierarchical output.
+       Enable hierarchical output.  In the hierarchy mode, each sort key groups
+       samples based on the criteria and then sub-divide it using the lower
+       level sort key.
+
+       For example:
+       In normal output:
+
+         perf report -s dso,sym
+         # Overhead  Shared Object      Symbol
+             50.00%  [kernel.kallsyms]  [k] kfunc1
+             20.00%  perf               [.] foo
+             15.00%  [kernel.kallsyms]  [k] kfunc2
+             10.00%  perf               [.] bar
+              5.00%  libc.so            [.] libcall
+
+       In hierarchy output:
+
+         perf report -s dso,sym --hierarchy
+         #   Overhead  Shared Object / Symbol
+             65.00%    [kernel.kallsyms]
+               50.00%    [k] kfunc1
+               15.00%    [k] kfunc2
+             30.00%    perf
+               20.00%    [.] foo
+               10.00%    [.] bar
+              5.00%    libc.so
+                5.00%    [.] libcall
 
 --inline::
        If a callgraph address belongs to an inlined function, the inline stack
index 6a8581012e1620f6fb709b83bcf5307c6b43588f..13e37e9385ee421fe89427420932f8f1f6d61d3f 100644 (file)
@@ -642,8 +642,8 @@ SUPPORTED FIELDS
 
 Currently supported fields:
 
-ev_name, comm, pid, tid, cpu, ip, time, period, phys_addr, addr,
-symbol, symoff, dso, time_enabled, time_running, values, callchain,
+ev_name, comm, id, stream_id, pid, tid, cpu, ip, time, period, phys_addr,
+addr, symbol, symoff, dso, time_enabled, time_running, values, callchain,
 brstack, brstacksym, datasrc, datasrc_decode, iregs, uregs,
 weight, transaction, raw_buf, attr, cpumode.
 
index ff9a52e4468842060d1b24700e77f555b8a3af5c..005e51df855e7cda1451abfbca8930a140138bf5 100644 (file)
@@ -132,9 +132,10 @@ OPTIONS
         Comma separated list of fields to print. Options are:
         comm, tid, pid, time, cpu, event, trace, ip, sym, dso, dsoff, addr, symoff,
         srcline, period, iregs, uregs, brstack, brstacksym, flags, bpf-output,
-        brstackinsn, brstackinsnlen, brstackoff, callindent, insn, insnlen, synth,
-        phys_addr, metric, misc, srccode, ipc, data_page_size, code_page_size, ins_lat,
-        machine_pid, vcpu, cgroup, retire_lat.
+        brstackinsn, brstackinsnlen, brstackoff, callindent, insn, disasm,
+        insnlen, synth, phys_addr, metric, misc, srccode, ipc, data_page_size,
+        code_page_size, ins_lat, machine_pid, vcpu, cgroup, retire_lat.
+
         Field list can be prepended with the type, trace, sw or hw,
         to indicate to which event type the field list applies.
         e.g., -F sw:comm,tid,time,ip,sym  and -F trace:time,cpu,trace
@@ -217,9 +218,9 @@ OPTIONS
        Instruction Trace decoding. For calls and returns, it will display the
        name of the symbol indented with spaces to reflect the stack depth.
 
-       When doing instruction trace decoding insn and insnlen give the
-       instruction bytes and the instruction length of the current
-       instruction.
+       When doing instruction trace decoding, insn, disasm and insnlen give the
+       instruction bytes, disassembled instructions (requires libcapstone support)
+       and the instruction length of the current instruction respectively.
 
        The synth field is used by synthesized events which may be created when
        Instruction Trace decoding.
@@ -441,9 +442,10 @@ include::itrace.txt[]
        will be printed. Each entry has function name and file/line. Enabled by
        default, disable with --no-inline.
 
---insn-trace::
-       Show instruction stream for intel_pt traces. Combine with --xed to
-       show disassembly.
+--insn-trace[=<raw|disasm>]::
+       Show instruction stream in bytes (raw) or disassembled (disasm)
+       for intel_pt traces. The default is 'raw'. To use xed, combine
+       'raw' with --xed to show disassembly done by xed.
 
 --xed::
        Run xed disassembler on output. Requires installing the xed disassembler.
index 5af2e432b54fb51a5e5371cffdfd22d162e0c915..29756a87ab6fa696d0966a965f1d4bf36bccde50 100644 (file)
@@ -308,6 +308,14 @@ use --per-die in addition to -a. (system-wide).  The output includes the
 die number and the number of online processors on that die. This is
 useful to gauge the amount of aggregation.
 
+--per-cluster::
+Aggregate counts per processor cluster for system-wide mode measurement.  This
+is a useful mode to detect imbalance between clusters.  To enable this mode,
+use --per-cluster in addition to -a. (system-wide).  The output includes the
+cluster number and the number of online processors on that cluster. This is
+useful to gauge the amount of aggregation. The information of cluster ID and
+related CPUs can be gotten from /sys/devices/system/cpu/cpuX/topology/cluster_{id, cpus}.
+
 --per-cache::
 Aggregate counts per cache instance for system-wide mode measurements.  By
 default, the aggregation happens for the cache level at the highest index
@@ -396,6 +404,9 @@ Aggregate counts per processor socket for system-wide mode measurements.
 --per-die::
 Aggregate counts per processor die for system-wide mode measurements.
 
+--per-cluster::
+Aggregate counts perf processor cluster for system-wide mode measurements.
+
 --per-cache::
 Aggregate counts per cache instance for system-wide mode measurements.  By
 default, the aggregation happens for the cache level at the highest index
index 3c202ec080ba2817dcb643e8d3941fe46068c1d7..a754875fa5bbbf2b53481ae98ec2d63e7327a8a8 100644 (file)
@@ -261,8 +261,38 @@ Default is to monitor all CPUS.
 --raw-trace::
        When displaying traceevent output, do not use print fmt or plugins.
 
+-H::
 --hierarchy::
-       Enable hierarchy output.
+       Enable hierarchical output.  In the hierarchy mode, each sort key groups
+       samples based on the criteria and then sub-divide it using the lower
+       level sort key.
+
+       For example, in normal output:
+
+         perf report -s dso,sym
+         #
+         # Overhead  Shared Object      Symbol
+         # ........  .................  ...........
+             50.00%  [kernel.kallsyms]  [k] kfunc1
+             20.00%  perf               [.] foo
+             15.00%  [kernel.kallsyms]  [k] kfunc2
+             10.00%  perf               [.] bar
+              5.00%  libc.so            [.] libcall
+
+       In hierarchy output:
+
+         perf report -s dso,sym --hierarchy
+         #
+         #   Overhead  Shared Object / Symbol
+         # ..........  ......................
+             65.00%    [kernel.kallsyms]
+               50.00%    [k] kfunc1
+               15.00%    [k] kfunc2
+             30.00%    perf
+               20.00%    [.] foo
+               10.00%    [.] bar
+              5.00%    libc.so
+                5.00%    [.] libcall
 
 --overwrite::
        Enable this to use just the most recent records, which helps in high core count
index a7cf7bc2f9689dcdfea6cbe8daaaf01205e76be3..09f516f3fdfb051f639b09e5afb35fa53a894c88 100644 (file)
@@ -63,6 +63,8 @@ OPTIONS
                              in browser mode
           perf-event-open  - Print perf_event_open() arguments and
                              return value
+          kmaps            - Print kernel and module maps (perf script
+                             and perf report without browser)
 
 --debug-file::
        Write debug output to a specified file.
index 825745a645c11578c7ea1a3d1a07b340134147c9..67b326ba00407a7a6f7a01a67908ab9e33ad8ac7 100644 (file)
@@ -2,6 +2,7 @@ For a higher level overview, try: perf report --sort comm,dso
 Sample related events with: perf record -e '{cycles,instructions}:S'
 Compare performance results with: perf diff [<old file> <new file>]
 Boolean options have negative forms, e.g.: perf report --no-children
+To not accumulate CPU time of children symbols add --no-children
 Customize output of perf script with: perf script -F event,ip,sym
 Generate a script for your data: perf script -g <lang>
 Save output of perf stat using: perf stat record <target workload>
@@ -12,32 +13,52 @@ List events using substring match: perf list <keyword>
 To see list of saved events and attributes: perf evlist -v
 Use --symfs <dir> if your symbol files are in non-standard locations
 To see callchains in a more compact form: perf report -g folded
+To see call chains by final symbol taking CPU time (bottom up) use perf report -G
 Show individual samples with: perf script
 Limit to show entries above 5% only: perf report --percent-limit 5
 Profiling branch (mis)predictions with: perf record -b / perf report
-To show assembler sample contexts use perf record -b / perf script -F +brstackinsn --xed
-Treat branches as callchains: perf report --branch-history
-To count events in every 1000 msec: perf stat -I 1000
-Print event counts in CSV format with: perf stat -x,
+To show assembler sample context control flow use perf record -b / perf report --samples 10 and then browse context
+To adjust path to source files to local file system use perf report --prefix=... --prefix-strip=...
+Treat branches as callchains: perf record -b ... ; perf report --branch-history
+Show estimate cycles per function and IPC in annotate use perf record -b ... ; perf report --total-cycles
+To count events every 1000 msec: perf stat -I 1000
+Print event counts in machine readable CSV format with: perf stat -x\;
 If you have debuginfo enabled, try: perf report -s sym,srcline
 For memory address profiling, try: perf mem record / perf mem report
 For tracepoint events, try: perf report -s trace_fields
 To record callchains for each sample: perf record -g
+If call chains don't work try perf record --call-graph dwarf or --call-graph lbr
 To record every process run by a user: perf record -u <user>
+To show inline functions in call traces add --inline to perf report
+To not record events from perf itself add --exclude-perf
 Skip collecting build-id when recording: perf record -B
 To change sampling frequency to 100 Hz: perf record -F 100
+To show information about system the samples were collected on use perf report --header
+To only collect call graph on one event use perf record -e cpu/cpu-cycles,callgraph=1/,branches ; perf report --show-ref-call-graph
+To set sampling period of individual events use perf record -e cpu/cpu-cycles,period=100001/,cpu/branches,period=10001/ ...
+To group events which need to be collected together for accuracy use {}: perf record -e {cycles,branches}' ...
+To compute metrics for samples use perf record -e '{cycles,instructions}' ... ; perf script -F +metric
 See assembly instructions with percentage: perf annotate <symbol>
 If you prefer Intel style assembly, try: perf annotate -M intel
+When collecting LBR backtraces use --stitch-lbr to handle more than 32 deep entries: perf record --call-graph lbr ; perf report --stitch-lbr
 For hierarchical output, try: perf report --hierarchy
 Order by the overhead of source file name and line number: perf report -s srcline
 System-wide collection from all CPUs: perf record -a
 Show current config key-value pairs: perf config --list
+To collect Processor Trace with samples use perf record -e '{intel_pt//,cycles}' ; perf script --call-trace or --insn-trace --xed -F +ipc (remove --xed if no xed)
+To trace calls using Processor Trace use perf record -e intel_pt// ... ; perf script --call-trace. Then use perf script --time A-B --insn-trace to look at region of interest.
+To measure approximate function latency with Processor Trace use perf record -e intel_pt// ... ; perf script --call-ret-trace
+To trace only single function with Processor Trace use perf record --filter 'filter func @ program' -e intel_pt//u ./program ; perf script --insn-trace
 Show user configuration overrides: perf config --user --list
 To add Node.js USDT(User-Level Statically Defined Tracing): perf buildid-cache --add `which node`
-To report cacheline events from previous recording: perf c2c report
+To analyze cache line scalability issues use perf c2c record ... ; perf c2c report
 To browse sample contexts use perf report --sample 10 and select in context menu
 To separate samples by time use perf report --sort time,overhead,sym
+To filter subset of samples with report or script add --time X-Y or --cpu A,B,C or --socket-filter ...
 To set sample time separation other than 100ms with --sort time use --time-quantum
 Add -I to perf record to sample register values, which will be visible in perf report sample context.
 To show IPC for sampling periods use perf record -e '{cycles,instructions}:S' and then browse context
 To show context switches in perf report sample context add --switch-events to perf record.
+To show time in nanoseconds in record/report add --ns
+To compare hot regions in two workloads use perf record -b -o file ... ; perf diff --stream file1 file2
+To compare scalability of two workload samples use perf diff -c ratio file1 file2
index aa55850fbc213b939df67bb1df68f776ca555006..1fe8df97fe8840d0587f002f3cb6dc9e4595766f 100644 (file)
@@ -28,8 +28,6 @@ include $(srctree)/tools/scripts/Makefile.arch
 
 $(call detected_var,SRCARCH)
 
-NO_PERF_REGS := 1
-
 ifneq ($(NO_SYSCALL_TABLE),1)
   NO_SYSCALL_TABLE := 1
 
@@ -50,7 +48,6 @@ endif
 
 # Additional ARCH settings for ppc
 ifeq ($(SRCARCH),powerpc)
-  NO_PERF_REGS := 0
   CFLAGS += -I$(OUTPUT)arch/powerpc/include/generated
   LIBUNWIND_LIBS := -lunwind -lunwind-ppc64
 endif
@@ -66,49 +63,31 @@ ifeq ($(SRCARCH),x86)
   else
     LIBUNWIND_LIBS = -lunwind-x86 -llzma -lunwind
   endif
-  NO_PERF_REGS := 0
 endif
 
 ifeq ($(SRCARCH),arm)
-  NO_PERF_REGS := 0
   LIBUNWIND_LIBS = -lunwind -lunwind-arm
 endif
 
 ifeq ($(SRCARCH),arm64)
-  NO_PERF_REGS := 0
   CFLAGS += -I$(OUTPUT)arch/arm64/include/generated
   LIBUNWIND_LIBS = -lunwind -lunwind-aarch64
 endif
 
 ifeq ($(SRCARCH),loongarch)
-  NO_PERF_REGS := 0
   CFLAGS += -I$(OUTPUT)arch/loongarch/include/generated
   LIBUNWIND_LIBS = -lunwind -lunwind-loongarch64
 endif
 
-ifeq ($(SRCARCH),riscv)
-  NO_PERF_REGS := 0
-endif
-
-ifeq ($(SRCARCH),csky)
-  NO_PERF_REGS := 0
-endif
-
 ifeq ($(ARCH),s390)
-  NO_PERF_REGS := 0
   CFLAGS += -fPIC -I$(OUTPUT)arch/s390/include/generated
 endif
 
 ifeq ($(ARCH),mips)
-  NO_PERF_REGS := 0
   CFLAGS += -I$(OUTPUT)arch/mips/include/generated
   LIBUNWIND_LIBS = -lunwind -lunwind-mips
 endif
 
-ifeq ($(NO_PERF_REGS),0)
-  $(call detected,CONFIG_PERF_REGS)
-endif
-
 # So far there's only x86 and arm libdw unwind support merged in perf.
 # Disable it on all other architectures in case libdw unwind
 # support is detected in system. Add supported architectures
@@ -165,10 +144,6 @@ endif
 FEATURE_CHECK_CFLAGS-libopencsd := $(LIBOPENCSD_CFLAGS)
 FEATURE_CHECK_LDFLAGS-libopencsd := $(LIBOPENCSD_LDFLAGS) $(OPENCSDLIBS)
 
-ifeq ($(NO_PERF_REGS),0)
-  CFLAGS += -DHAVE_PERF_REGS_SUPPORT
-endif
-
 # for linking with debug library, run like:
 # make DEBUG=1 LIBDW_DIR=/opt/libdw/
 ifdef LIBDW_DIR
@@ -191,6 +166,15 @@ endif
 FEATURE_CHECK_CFLAGS-libbabeltrace := $(LIBBABELTRACE_CFLAGS)
 FEATURE_CHECK_LDFLAGS-libbabeltrace := $(LIBBABELTRACE_LDFLAGS) -lbabeltrace-ctf
 
+# for linking with debug library, run like:
+# make DEBUG=1 LIBCAPSTONE_DIR=/opt/capstone/
+ifdef LIBCAPSTONE_DIR
+  LIBCAPSTONE_CFLAGS  := -I$(LIBCAPSTONE_DIR)/include
+  LIBCAPSTONE_LDFLAGS := -L$(LIBCAPSTONE_DIR)/
+endif
+FEATURE_CHECK_CFLAGS-libcapstone := $(LIBCAPSTONE_CFLAGS)
+FEATURE_CHECK_LDFLAGS-libcapstone := $(LIBCAPSTONE_LDFLAGS) -lcapstone
+
 ifdef LIBZSTD_DIR
   LIBZSTD_CFLAGS  := -I$(LIBZSTD_DIR)/lib
   LIBZSTD_LDFLAGS := -L$(LIBZSTD_DIR)/lib
@@ -209,11 +193,17 @@ endif
 include $(srctree)/tools/scripts/utilities.mak
 
 ifeq ($(call get-executable,$(FLEX)),)
-  dummy := $(error Error: $(FLEX) is missing on this system, please install it)
+  $(error Error: $(FLEX) is missing on this system, please install it)
 endif
 
 ifeq ($(call get-executable,$(BISON)),)
-  dummy := $(error Error: $(BISON) is missing on this system, please install it)
+  $(error Error: $(BISON) is missing on this system, please install it)
+endif
+
+ifneq ($(NO_LIBTRACEEVENT),1)
+  ifeq ($(call get-executable,$(PKG_CONFIG)),)
+  dummy := $(error Error: $(PKG_CONFIG) needed by libtraceevent is missing on this system, please install it)
+  endif
 endif
 
 ifneq ($(OUTPUT),)
@@ -438,46 +428,46 @@ else
       LIBC_SUPPORT := 1
     endif
     ifeq ($(LIBC_SUPPORT),1)
-      msg := $(error ERROR: No libelf found. Disables 'probe' tool, jvmti and BPF support. Please install libelf-dev, libelf-devel, elfutils-libelf-devel or build with NO_LIBELF=1.)
+      $(error ERROR: No libelf found. Disables 'probe' tool, jvmti and BPF support. Please install libelf-dev, libelf-devel, elfutils-libelf-devel or build with NO_LIBELF=1.)
     else
       ifneq ($(filter s% -fsanitize=address%,$(EXTRA_CFLAGS),),)
         ifneq ($(shell ldconfig -p | grep libasan >/dev/null 2>&1; echo $$?), 0)
-          msg := $(error No libasan found, please install libasan);
+          $(error No libasan found, please install libasan)
         endif
       endif
 
       ifneq ($(filter s% -fsanitize=undefined%,$(EXTRA_CFLAGS),),)
         ifneq ($(shell ldconfig -p | grep libubsan >/dev/null 2>&1; echo $$?), 0)
-          msg := $(error No libubsan found, please install libubsan);
+          $(error No libubsan found, please install libubsan)
         endif
       endif
 
       ifneq ($(filter s% -static%,$(LDFLAGS),),)
-        msg := $(error No static glibc found, please install glibc-static);
+        $(error No static glibc found, please install glibc-static)
       else
-        msg := $(error No gnu/libc-version.h found, please install glibc-dev[el]);
+        $(error No gnu/libc-version.h found, please install glibc-dev[el])
       endif
     endif
   else
     ifndef NO_LIBDW_DWARF_UNWIND
       ifneq ($(feature-libdw-dwarf-unwind),1)
         NO_LIBDW_DWARF_UNWIND := 1
-        msg := $(warning No libdw DWARF unwind found, Please install elfutils-devel/libdw-dev >= 0.158 and/or set LIBDW_DIR);
+        $(warning No libdw DWARF unwind found, Please install elfutils-devel/libdw-dev >= 0.158 and/or set LIBDW_DIR)
       endif
     endif
     ifneq ($(feature-dwarf), 1)
       ifndef NO_DWARF
-        msg := $(warning No libdw.h found or old libdw.h found or elfutils is older than 0.138, disables dwarf support. Please install new elfutils-devel/libdw-dev);
+        $(warning No libdw.h found or old libdw.h found or elfutils is older than 0.138, disables dwarf support. Please install new elfutils-devel/libdw-dev)
         NO_DWARF := 1
       endif
     else
       ifneq ($(feature-dwarf_getlocations), 1)
-        msg := $(warning Old libdw.h, finding variables at given 'perf probe' point will not work, install elfutils-devel/libdw-dev >= 0.157);
+        $(warning Old libdw.h, finding variables at given 'perf probe' point will not work, install elfutils-devel/libdw-dev >= 0.157)
       else
         CFLAGS += -DHAVE_DWARF_GETLOCATIONS_SUPPORT
       endif # dwarf_getlocations
       ifneq ($(feature-dwarf_getcfi), 1)
-        msg := $(warning Old libdw.h, finding variables at given 'perf probe' point will not work, install elfutils-devel/libdw-dev >= 0.142);
+        $(warning Old libdw.h, finding variables at given 'perf probe' point will not work, install elfutils-devel/libdw-dev >= 0.142)
       else
         CFLAGS += -DHAVE_DWARF_CFI_SUPPORT
       endif # dwarf_getcfi
@@ -525,7 +515,7 @@ ifdef CORESIGHT
       endif
     endif
   else
-    dummy := $(error Error: No libopencsd library found or the version is not up-to-date. Please install recent libopencsd to build with CORESIGHT=1)
+    $(error Error: No libopencsd library found or the version is not up-to-date. Please install recent libopencsd to build with CORESIGHT=1)
   endif
 endif
 
@@ -551,7 +541,7 @@ ifndef NO_LIBELF
   ifeq ($(feature-libelf-gelf_getnote), 1)
     CFLAGS += -DHAVE_GELF_GETNOTE_SUPPORT
   else
-    msg := $(warning gelf_getnote() not found on libelf, SDT support disabled);
+    $(warning gelf_getnote() not found on libelf, SDT support disabled)
   endif
 
   ifeq ($(feature-libelf-getshdrstrndx), 1)
@@ -568,7 +558,7 @@ ifndef NO_LIBELF
 
   ifndef NO_DWARF
     ifeq ($(origin PERF_HAVE_DWARF_REGS), undefined)
-      msg := $(warning DWARF register mappings have not been defined for architecture $(SRCARCH), DWARF support disabled);
+      $(warning DWARF register mappings have not been defined for architecture $(SRCARCH), DWARF support disabled)
       NO_DWARF := 1
     else
       CFLAGS += -DHAVE_DWARF_SUPPORT $(LIBDW_CFLAGS)
@@ -590,11 +580,11 @@ ifndef NO_LIBELF
           $(call detected,CONFIG_LIBBPF)
           $(call detected,CONFIG_LIBBPF_DYNAMIC)
         else
-          dummy := $(error Error: No libbpf devel library found or older than v1.0, please install/update libbpf-devel);
+          $(error Error: No libbpf devel library found or older than v1.0, please install/update libbpf-devel)
         endif
       else
         ifeq ($(NO_ZLIB), 1)
-          dummy := $(warning Warning: Statically building libbpf not possible as zlib is missing)
+          $(warning Warning: Statically building libbpf not possible as zlib is missing)
           NO_LIBBPF := 1
         else
           # Libbpf will be built as a static library from tools/lib/bpf.
@@ -609,7 +599,7 @@ endif # NO_LIBELF
 
 ifndef NO_SDT
   ifneq ($(feature-sdt), 1)
-    msg := $(warning No sys/sdt.h found, no SDT events are defined, please install systemtap-sdt-devel or systemtap-sdt-dev);
+    $(warning No sys/sdt.h found, no SDT events are defined, please install systemtap-sdt-devel or systemtap-sdt-dev)
     NO_SDT := 1;
   else
     CFLAGS += -DHAVE_SDT_EVENT
@@ -651,13 +641,13 @@ ifndef NO_LIBUNWIND
     have_libunwind = 1
     $(call feature_check,libunwind-debug-frame-aarch64)
     ifneq ($(feature-libunwind-debug-frame-aarch64), 1)
-      msg := $(warning No debug_frame support found in libunwind-aarch64);
+      $(warning No debug_frame support found in libunwind-aarch64)
       CFLAGS += -DNO_LIBUNWIND_DEBUG_FRAME_AARCH64
     endif
   endif
 
   ifneq ($(feature-libunwind), 1)
-    msg := $(warning No libunwind found. Please install libunwind-dev[el] >= 1.1 and/or set LIBUNWIND_DIR);
+    $(warning No libunwind found. Please install libunwind-dev[el] >= 1.1 and/or set LIBUNWIND_DIR)
     NO_LOCAL_LIBUNWIND := 1
   else
     have_libunwind := 1
@@ -673,7 +663,7 @@ endif
 
 ifndef NO_LIBBPF
   ifneq ($(feature-bpf), 1)
-    msg := $(warning BPF API too old. Please install recent kernel headers. BPF support in 'perf record' is disabled.)
+    $(warning BPF API too old. Please install recent kernel headers. BPF support in 'perf record' is disabled.)
     NO_LIBBPF := 1
   endif
 endif
@@ -686,28 +676,28 @@ endif
 
 ifeq ($(BUILD_BPF_SKEL),1)
   ifeq ($(filter -DHAVE_LIBELF_SUPPORT, $(CFLAGS)),)
-    dummy := $(warning Warning: Disabled BPF skeletons as libelf is required by bpftool)
+    $(warning Warning: Disabled BPF skeletons as libelf is required by bpftool)
     BUILD_BPF_SKEL := 0
   else ifeq ($(filter -DHAVE_ZLIB_SUPPORT, $(CFLAGS)),)
-    dummy := $(warning Warning: Disabled BPF skeletons as zlib is required by bpftool)
+    $(warning Warning: Disabled BPF skeletons as zlib is required by bpftool)
     BUILD_BPF_SKEL := 0
   else ifeq ($(filter -DHAVE_LIBBPF_SUPPORT, $(CFLAGS)),)
-    dummy := $(warning Warning: Disabled BPF skeletons as libbpf is required)
+    $(warning Warning: Disabled BPF skeletons as libbpf is required)
     BUILD_BPF_SKEL := 0
   else ifeq ($(call get-executable,$(CLANG)),)
-    dummy := $(warning Warning: Disabled BPF skeletons as clang ($(CLANG)) is missing)
+    $(warning Warning: Disabled BPF skeletons as clang ($(CLANG)) is missing)
     BUILD_BPF_SKEL := 0
   else
     CLANG_VERSION := $(shell $(CLANG) --version | head -1 | sed 's/.*clang version \([[:digit:]]\+.[[:digit:]]\+.[[:digit:]]\+\).*/\1/g')
     ifeq ($(call version-lt3,$(CLANG_VERSION),12.0.1),1)
-      dummy := $(warning Warning: Disabled BPF skeletons as reliable BTF generation needs at least $(CLANG) version 12.0.1)
+      $(warning Warning: Disabled BPF skeletons as reliable BTF generation needs at least $(CLANG) version 12.0.1)
       BUILD_BPF_SKEL := 0
     endif
   endif
   ifeq ($(BUILD_BPF_SKEL),1)
     $(call feature_check,clang-bpf-co-re)
     ifeq ($(feature-clang-bpf-co-re), 0)
-      dummy := $(warning Warning: Disabled BPF skeletons as clang is too old)
+      $(warning Warning: Disabled BPF skeletons as clang is too old)
       BUILD_BPF_SKEL := 0
     endif
   endif
@@ -727,7 +717,7 @@ dwarf-post-unwind-text := BUG
 # setup DWARF post unwinder
 ifdef NO_LIBUNWIND
   ifdef NO_LIBDW_DWARF_UNWIND
-    msg := $(warning Disabling post unwind, no support found.);
+    $(warning Disabling post unwind, no support found.)
     dwarf-post-unwind := 0
   else
     dwarf-post-unwind-text := libdw
@@ -753,7 +743,7 @@ ifndef NO_LOCAL_LIBUNWIND
   ifeq ($(SRCARCH),$(filter $(SRCARCH),arm arm64))
     $(call feature_check,libunwind-debug-frame)
     ifneq ($(feature-libunwind-debug-frame), 1)
-      msg := $(warning No debug_frame support found in libunwind);
+      $(warning No debug_frame support found in libunwind)
       CFLAGS += -DNO_LIBUNWIND_DEBUG_FRAME
     endif
   else
@@ -782,7 +772,7 @@ ifneq ($(NO_LIBTRACEEVENT),1)
     ifndef NO_LIBAUDIT
       $(call feature_check,libaudit)
       ifneq ($(feature-libaudit), 1)
-        msg := $(warning No libaudit.h found, disables 'trace' tool, please install audit-libs-devel or libaudit-dev);
+        $(warning No libaudit.h found, disables 'trace' tool, please install audit-libs-devel or libaudit-dev)
         NO_LIBAUDIT := 1
       else
         CFLAGS += -DHAVE_LIBAUDIT_SUPPORT
@@ -795,7 +785,7 @@ endif
 
 ifndef NO_LIBCRYPTO
   ifneq ($(feature-libcrypto), 1)
-    msg := $(warning No libcrypto.h found, disables jitted code injection, please install openssl-devel or libssl-dev);
+    $(warning No libcrypto.h found, disables jitted code injection, please install openssl-devel or libssl-dev)
     NO_LIBCRYPTO := 1
   else
     CFLAGS += -DHAVE_LIBCRYPTO_SUPPORT
@@ -807,7 +797,7 @@ endif
 ifndef NO_SLANG
   ifneq ($(feature-libslang), 1)
     ifneq ($(feature-libslang-include-subdir), 1)
-      msg := $(warning slang not found, disables TUI support. Please install slang-devel, libslang-dev or libslang2-dev);
+      $(warning slang not found, disables TUI support. Please install slang-devel, libslang-dev or libslang2-dev)
       NO_SLANG := 1
     else
       CFLAGS += -DHAVE_SLANG_INCLUDE_SUBDIR
@@ -825,7 +815,7 @@ ifdef GTK2
   FLAGS_GTK2=$(CFLAGS) $(LDFLAGS) $(EXTLIBS) $(shell $(PKG_CONFIG) --libs --cflags gtk+-2.0 2>/dev/null)
   $(call feature_check,gtk2)
   ifneq ($(feature-gtk2), 1)
-    msg := $(warning GTK2 not found, disables GTK2 support. Please install gtk2-devel or libgtk2.0-dev);
+    $(warning GTK2 not found, disables GTK2 support. Please install gtk2-devel or libgtk2.0-dev)
     NO_GTK2 := 1
   else
     $(call feature_check,gtk2-infobar)
@@ -854,7 +844,7 @@ else
   ifneq ($(feature-libperl), 1)
     CFLAGS += -DNO_LIBPERL
     NO_LIBPERL := 1
-    msg := $(warning Missing perl devel files. Disabling perl scripting support, please install perl-ExtUtils-Embed/libperl-dev);
+    $(warning Missing perl devel files. Disabling perl scripting support, please install perl-ExtUtils-Embed/libperl-dev)
   else
     LDFLAGS += $(PERL_EMBED_LDFLAGS)
     EXTLIBS += $(PERL_EMBED_LIBADD)
@@ -869,7 +859,7 @@ endif
 ifeq ($(feature-timerfd), 1)
   CFLAGS += -DHAVE_TIMERFD_SUPPORT
 else
-  msg := $(warning No timerfd support. Disables 'perf kvm stat live');
+  $(warning No timerfd support. Disables 'perf kvm stat live')
 endif
 
 disable-python = $(eval $(disable-python_code))
@@ -903,7 +893,7 @@ else
            PYTHON_EXTENSION_SUFFIX := $(shell $(PYTHON) -c 'from importlib import machinery; print(machinery.EXTENSION_SUFFIXES[0])')
            LANG_BINDINGS += $(obj-perf)python/perf$(PYTHON_EXTENSION_SUFFIX)
         else
-           msg := $(warning Missing python setuptools, the python binding won't be built, please install python3-setuptools or equivalent);
+           $(warning Missing python setuptools, the python binding won't be built, please install python3-setuptools or equivalent)
          endif
          CFLAGS += -DHAVE_LIBPYTHON_SUPPORT
          $(call detected,CONFIG_LIBPYTHON)
@@ -962,7 +952,7 @@ ifdef BUILD_NONDISTRO
   ifeq ($(feature-libbfd-buildid), 1)
     CFLAGS += -DHAVE_LIBBFD_BUILDID_SUPPORT
   else
-    msg := $(warning Old version of libbfd/binutils things like PE executable profiling will not be available);
+    $(warning Old version of libbfd/binutils things like PE executable profiling will not be available)
   endif
 endif
 
@@ -994,7 +984,7 @@ ifndef NO_LZMA
     EXTLIBS += -llzma
     $(call detected,CONFIG_LZMA)
   else
-    msg := $(warning No liblzma found, disables xz kernel module decompression, please install xz-devel/liblzma-dev);
+    $(warning No liblzma found, disables xz kernel module decompression, please install xz-devel/liblzma-dev)
     NO_LZMA := 1
   endif
 endif
@@ -1007,7 +997,7 @@ ifndef NO_LIBZSTD
     EXTLIBS += -lzstd
     $(call detected,CONFIG_ZSTD)
   else
-    msg := $(warning No libzstd found, disables trace compression, please install libzstd-dev[el] and/or set LIBZSTD_DIR);
+    $(warning No libzstd found, disables trace compression, please install libzstd-dev[el] and/or set LIBZSTD_DIR)
     NO_LIBZSTD := 1
   endif
 endif
@@ -1018,7 +1008,7 @@ ifndef NO_LIBCAP
     EXTLIBS += -lcap
     $(call detected,CONFIG_LIBCAP)
   else
-    msg := $(warning No libcap found, disables capability support, please install libcap-devel/libcap-dev);
+    $(warning No libcap found, disables capability support, please install libcap-devel/libcap-dev)
     NO_LIBCAP := 1
   endif
 endif
@@ -1031,11 +1021,11 @@ endif
 
 ifndef NO_LIBNUMA
   ifeq ($(feature-libnuma), 0)
-    msg := $(warning No numa.h found, disables 'perf bench numa mem' benchmark, please install numactl-devel/libnuma-devel/libnuma-dev);
+    $(warning No numa.h found, disables 'perf bench numa mem' benchmark, please install numactl-devel/libnuma-devel/libnuma-dev)
     NO_LIBNUMA := 1
   else
     ifeq ($(feature-numa_num_possible_cpus), 0)
-      msg := $(warning Old numa library found, disables 'perf bench numa mem' benchmark, please install numactl-devel/libnuma-devel/libnuma-dev >= 2.0.8);
+      $(warning Old numa library found, disables 'perf bench numa mem' benchmark, please install numactl-devel/libnuma-devel/libnuma-dev >= 2.0.8)
       NO_LIBNUMA := 1
     else
       CFLAGS += -DHAVE_LIBNUMA_SUPPORT
@@ -1090,14 +1080,26 @@ ifndef NO_LIBBABELTRACE
     EXTLIBS += -lbabeltrace-ctf
     $(call detected,CONFIG_LIBBABELTRACE)
   else
-    msg := $(warning No libbabeltrace found, disables 'perf data' CTF format support, please install libbabeltrace-dev[el]/libbabeltrace-ctf-dev);
+    $(warning No libbabeltrace found, disables 'perf data' CTF format support, please install libbabeltrace-dev[el]/libbabeltrace-ctf-dev)
+  endif
+endif
+
+ifndef NO_CAPSTONE
+  $(call feature_check,libcapstone)
+  ifeq ($(feature-libcapstone), 1)
+    CFLAGS += -DHAVE_LIBCAPSTONE_SUPPORT $(LIBCAPSTONE_CFLAGS)
+    LDFLAGS += $(LICAPSTONE_LDFLAGS)
+    EXTLIBS += -lcapstone
+    $(call detected,CONFIG_LIBCAPSTONE)
+  else
+    msg := $(warning No libcapstone found, disables disasm engine support for 'perf script', please install libcapstone-dev/capstone-devel);
   endif
 endif
 
 ifndef NO_AUXTRACE
   ifeq ($(SRCARCH),x86)
     ifeq ($(feature-get_cpuid), 0)
-      msg := $(warning Your gcc lacks the __get_cpuid() builtin, disables support for auxtrace/Intel PT, please install a newer gcc);
+      $(warning Your gcc lacks the __get_cpuid() builtin, disables support for auxtrace/Intel PT, please install a newer gcc)
       NO_AUXTRACE := 1
     endif
   endif
@@ -1155,7 +1157,7 @@ ifndef NO_LIBPFM4
     ASCIIDOC_EXTRA = -aHAVE_LIBPFM=1
     $(call detected,CONFIG_LIBPFM4)
   else
-    msg := $(warning libpfm4 not found, disables libpfm4 support. Please install libpfm4-dev);
+    $(warning libpfm4 not found, disables libpfm4 support. Please install libpfm4-dev)
   endif
 endif
 
@@ -1173,7 +1175,7 @@ ifneq ($(NO_LIBTRACEEVENT),1)
     CFLAGS += -DLIBTRACEEVENT_VERSION=$(LIBTRACEEVENT_VERSION_CPP)
     $(call detected,CONFIG_LIBTRACEEVENT)
   else
-    dummy := $(error ERROR: libtraceevent is missing. Please install libtraceevent-dev/libtraceevent-devel or build with NO_LIBTRACEEVENT=1)
+    $(error ERROR: libtraceevent is missing. Please install libtraceevent-dev/libtraceevent-devel or build with NO_LIBTRACEEVENT=1)
   endif
 
   $(call feature_check,libtracefs)
index f8774a9b1377a3e98b98543a66b4f8aea6fb6837..04d89d2ed209b4dc9189d218e6c76c55b4d96fda 100644 (file)
@@ -84,6 +84,9 @@ include ../scripts/utilities.mak
 # Define NO_LIBBABELTRACE if you do not want libbabeltrace support
 # for CTF data format.
 #
+# Define NO_CAPSTONE if you do not want libcapstone support
+# for disasm engine.
+#
 # Define NO_LZMA if you do not want to support compressed (xz) kernel modules
 #
 # Define NO_AUXTRACE if you do not want AUX area tracing support
@@ -380,7 +383,7 @@ python-clean := $(call QUIET_CLEAN, python) $(RM) -r $(PYTHON_EXTBUILD) $(OUTPUT
 ifeq ($(CONFIG_LIBTRACEEVENT),y)
   PYTHON_EXT_SRCS := $(shell grep -v ^\# util/python-ext-sources)
 else
-  PYTHON_EXT_SRCS := $(shell grep -v ^\#\\\|util/trace-event.c util/python-ext-sources)
+  PYTHON_EXT_SRCS := $(shell grep -v ^\#\\\|util/trace-event.c\\\|util/trace-event-parse.c util/python-ext-sources)
 endif
 
 PYTHON_EXT_DEPS := util/python-ext-sources util/setup.py $(LIBAPI)
@@ -482,7 +485,7 @@ drm_hdr_dir := $(srctree)/tools/include/uapi/drm
 drm_ioctl_tbl := $(srctree)/tools/perf/trace/beauty/drm_ioctl.sh
 
 # Create output directory if not already present
-_dummy := $(shell [ -d '$(beauty_ioctl_outdir)' ] || mkdir -p '$(beauty_ioctl_outdir)')
+$(shell [ -d '$(beauty_ioctl_outdir)' ] || mkdir -p '$(beauty_ioctl_outdir)')
 
 $(drm_ioctl_array): $(drm_hdr_dir)/drm.h $(drm_hdr_dir)/i915_drm.h $(drm_ioctl_tbl)
        $(Q)$(SHELL) '$(drm_ioctl_tbl)' $(drm_hdr_dir) > $@
@@ -672,7 +675,7 @@ tests-coresight-targets-clean:
 all: shell_compatibility_test $(ALL_PROGRAMS) $(LANG_BINDINGS) $(OTHER_PROGRAMS) tests-coresight-targets
 
 # Create python binding output directory if not already present
-_dummy := $(shell [ -d '$(OUTPUT)python' ] || mkdir -p '$(OUTPUT)python')
+$(shell [ -d '$(OUTPUT)python' ] || mkdir -p '$(OUTPUT)python')
 
 $(OUTPUT)python/perf$(PYTHON_EXTENSION_SUFFIX): $(PYTHON_EXT_SRCS) $(PYTHON_EXT_DEPS) $(LIBPERF) $(LIBSUBCMD)
        $(QUIET_GEN)LDSHARED="$(CC) -pthread -shared" \
@@ -1048,6 +1051,11 @@ install-tests: all install-gtk
                $(INSTALL) -d -m 755 '$(DESTDIR_SQ)$(perfexec_instdir_SQ)/tests/shell/lib'; \
                $(INSTALL) tests/shell/lib/*.sh -m 644 '$(DESTDIR_SQ)$(perfexec_instdir_SQ)/tests/shell/lib'; \
                $(INSTALL) tests/shell/lib/*.py -m 644 '$(DESTDIR_SQ)$(perfexec_instdir_SQ)/tests/shell/lib'; \
+               $(INSTALL) -d -m 755 '$(DESTDIR_SQ)$(perfexec_instdir_SQ)/tests/shell/common'; \
+               $(INSTALL) tests/shell/common/*.sh '$(DESTDIR_SQ)$(perfexec_instdir_SQ)/tests/shell/common'; \
+               $(INSTALL) tests/shell/common/*.pl '$(DESTDIR_SQ)$(perfexec_instdir_SQ)/tests/shell/common'; \
+               $(INSTALL) -d -m 755 '$(DESTDIR_SQ)$(perfexec_instdir_SQ)/tests/shell/base_probe'; \
+               $(INSTALL) tests/shell/base_probe/*.sh '$(DESTDIR_SQ)$(perfexec_instdir_SQ)/tests/shell/base_probe'; \
                $(INSTALL) -d -m 755 '$(DESTDIR_SQ)$(perfexec_instdir_SQ)/tests/shell/coresight' ; \
                $(INSTALL) tests/shell/coresight/*.sh '$(DESTDIR_SQ)$(perfexec_instdir_SQ)/tests/shell/coresight'
        $(Q)$(MAKE) -C tests/shell/coresight install-tests
@@ -1142,7 +1150,7 @@ ifeq ($(VMLINUX_H),)
   endif
 endif
 
-$(SKEL_OUT)/vmlinux.h: $(VMLINUX_BTF) $(BPFTOOL)
+$(SKEL_OUT)/vmlinux.h: $(VMLINUX_BTF) $(BPFTOOL) $(VMLINUX_H)
 ifeq ($(VMLINUX_H),)
        $(QUIET_GEN)$(BPFTOOL) btf dump file $< format c > $@
 else
@@ -1167,7 +1175,7 @@ bpf-skel:
 endif # CONFIG_PERF_BPF_SKEL
 
 bpf-skel-clean:
-       $(call QUIET_CLEAN, bpf-skel) $(RM) -r $(SKEL_TMP_OUT) $(SKELETONS)
+       $(call QUIET_CLEAN, bpf-skel) $(RM) -r $(SKEL_TMP_OUT) $(SKELETONS) $(SKEL_OUT)/vmlinux.h
 
 clean:: $(LIBAPI)-clean $(LIBBPF)-clean $(LIBSUBCMD)-clean $(LIBSYMBOL)-clean $(LIBPERF)-clean arm64-sysreg-defs-clean fixdep-clean python-clean bpf-skel-clean tests-coresight-targets-clean
        $(call QUIET_CLEAN, core-objs)  $(RM) $(LIBPERF_A) $(OUTPUT)perf-archive $(OUTPUT)perf-iostat $(LANG_BINDINGS)
index 2c56e8b56ddf4b91cb6addaac90e1ae5e9be725d..f94a0210c7b74bf934126c905a082fba274d59d4 100644 (file)
@@ -2,7 +2,7 @@
 #include "perf_regs.h"
 #include "../../../util/perf_regs.h"
 
-const struct sample_reg sample_reg_masks[] = {
+static const struct sample_reg sample_reg_masks[] = {
        SMPL_REG_END
 };
 
@@ -15,3 +15,8 @@ uint64_t arch__user_reg_mask(void)
 {
        return PERF_REGS_MASK;
 }
+
+const struct sample_reg *arch__sample_reg_masks(void)
+{
+       return sample_reg_masks;
+}
index 7f3af3b97f3bac2a35faddc2a0a0ad180198a618..8b7cb68ba1a8a24ec86593570756304aa3fdfc43 100644 (file)
@@ -13,6 +13,7 @@
 #include "hisi-ptt.h"
 #include "../../../util/pmu.h"
 #include "../../../util/cs-etm.h"
+#include "../../arm64/util/mem-events.h"
 
 void perf_pmu__arch_init(struct perf_pmu *pmu __maybe_unused)
 {
@@ -26,6 +27,8 @@ void perf_pmu__arch_init(struct perf_pmu *pmu __maybe_unused)
                pmu->selectable = true;
                pmu->is_uncore = false;
                pmu->perf_event_attr_init_default = arm_spe_pmu_default_config;
+               if (!strcmp(pmu->name, "arm_spe_0"))
+                       pmu->mem_events = perf_mem_events_arm;
        } else if (strstarts(pmu->name, HISI_PTT_PMU_NAME)) {
                pmu->selectable = true;
 #endif
index fab3095fb5d08215b7ba5f5f42e262fa9e3a9610..5735ed4479bb9b1a710ac2ad5fb5716ba7d1462f 100644 (file)
@@ -18,7 +18,7 @@ sysprf := $(srctree)/tools/perf/arch/arm64/entry/syscalls/
 systbl := $(sysprf)/mksyscalltbl
 
 # Create output directory if not already present
-_dummy := $(shell [ -d '$(out)' ] || mkdir -p '$(out)')
+$(shell [ -d '$(out)' ] || mkdir -p '$(out)')
 
 $(header): $(sysdef) $(systbl)
        $(Q)$(SHELL) '$(systbl)' '$(CC)' '$(HOSTCC)' $(incpath) $(sysdef) > $@
index ba1144366e85a5077b46efabefd2017e1132ffb8..aab1cc2bc283b79db1b6eb67d6617371317ca491 100644 (file)
@@ -12,5 +12,7 @@
 
 void arch__add_leaf_frame_record_opts(struct record_opts *opts)
 {
+       const struct sample_reg *sample_reg_masks = arch__sample_reg_masks();
+
        opts->sample_user_regs |= sample_reg_masks[PERF_REG_ARM64_LR].mask;
 }
index 3bcc5c7035c21429eb1f55ae8891ac642249f6e2..9f8da7937255cc9b14c3c58a1119b40bd0c76f6b 100644 (file)
@@ -1,37 +1,12 @@
 // SPDX-License-Identifier: GPL-2.0
-#include "map_symbol.h"
+#include "util/map_symbol.h"
+#include "util/mem-events.h"
 #include "mem-events.h"
 
-#define E(t, n, s) { .tag = t, .name = n, .sysfs_name = s }
+#define E(t, n, s, l, a) { .tag = t, .name = n, .event_name = s, .ldlat = l, .aux_event = a }
 
-static struct perf_mem_event perf_mem_events[PERF_MEM_EVENTS__MAX] = {
-       E("spe-load",   "arm_spe_0/ts_enable=1,pa_enable=1,load_filter=1,store_filter=0,min_latency=%u/",       "arm_spe_0"),
-       E("spe-store",  "arm_spe_0/ts_enable=1,pa_enable=1,load_filter=0,store_filter=1/",                      "arm_spe_0"),
-       E("spe-ldst",   "arm_spe_0/ts_enable=1,pa_enable=1,load_filter=1,store_filter=1,min_latency=%u/",       "arm_spe_0"),
+struct perf_mem_event perf_mem_events_arm[PERF_MEM_EVENTS__MAX] = {
+       E("spe-load",   "%s/ts_enable=1,pa_enable=1,load_filter=1,store_filter=0,min_latency=%u/",      NULL,   true,   0),
+       E("spe-store",  "%s/ts_enable=1,pa_enable=1,load_filter=0,store_filter=1/",                     NULL,   false,  0),
+       E("spe-ldst",   "%s/ts_enable=1,pa_enable=1,load_filter=1,store_filter=1,min_latency=%u/",      NULL,   true,   0),
 };
-
-static char mem_ev_name[100];
-
-struct perf_mem_event *perf_mem_events__ptr(int i)
-{
-       if (i >= PERF_MEM_EVENTS__MAX)
-               return NULL;
-
-       return &perf_mem_events[i];
-}
-
-const char *perf_mem_events__name(int i, const char *pmu_name __maybe_unused)
-{
-       struct perf_mem_event *e = perf_mem_events__ptr(i);
-
-       if (i >= PERF_MEM_EVENTS__MAX)
-               return NULL;
-
-       if (i == PERF_MEM_EVENTS__LOAD || i == PERF_MEM_EVENTS__LOAD_STORE)
-               scnprintf(mem_ev_name, sizeof(mem_ev_name),
-                         e->name, perf_mem_events__loads_ldlat);
-       else /* PERF_MEM_EVENTS__STORE */
-               scnprintf(mem_ev_name, sizeof(mem_ev_name), e->name);
-
-       return mem_ev_name;
-}
diff --git a/tools/perf/arch/arm64/util/mem-events.h b/tools/perf/arch/arm64/util/mem-events.h
new file mode 100644 (file)
index 0000000..5fc50be
--- /dev/null
@@ -0,0 +1,7 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef _ARM64_MEM_EVENTS_H
+#define _ARM64_MEM_EVENTS_H
+
+extern struct perf_mem_event perf_mem_events_arm[PERF_MEM_EVENTS__MAX];
+
+#endif /* _ARM64_MEM_EVENTS_H */
index 1b79d8eab22f51c38a6a04416b08e6b03396cedb..09308665e28a9ea91c4660b7828d69fd160491e8 100644 (file)
@@ -16,7 +16,7 @@
 #define HWCAP_SVE      (1 << 22)
 #endif
 
-const struct sample_reg sample_reg_masks[] = {
+static const struct sample_reg sample_reg_masks[] = {
        SMPL_REG(x0, PERF_REG_ARM64_X0),
        SMPL_REG(x1, PERF_REG_ARM64_X1),
        SMPL_REG(x2, PERF_REG_ARM64_X2),
@@ -175,3 +175,8 @@ uint64_t arch__user_reg_mask(void)
        }
        return PERF_REGS_MASK;
 }
+
+const struct sample_reg *arch__sample_reg_masks(void)
+{
+       return sample_reg_masks;
+}
index c0877c264d49e167be05f554837af018a1bdf7d3..6b1665f4118071d5b3d6f7634ac9440bc0c6eeaf 100644 (file)
@@ -2,7 +2,7 @@
 #include "perf_regs.h"
 #include "../../util/perf_regs.h"
 
-const struct sample_reg sample_reg_masks[] = {
+static const struct sample_reg sample_reg_masks[] = {
        SMPL_REG_END
 };
 
@@ -15,3 +15,8 @@ uint64_t arch__user_reg_mask(void)
 {
        return PERF_REGS_MASK;
 }
+
+const struct sample_reg *arch__sample_reg_masks(void)
+{
+       return sample_reg_masks;
+}
index c392e7af474332e2c6ebd5a2e108207fc08e4786..3992a67a87d9c6419bc72291a1d3e05d0ac29f30 100644 (file)
@@ -17,7 +17,7 @@ sysprf := $(srctree)/tools/perf/arch/loongarch/entry/syscalls/
 systbl := $(sysprf)/mksyscalltbl
 
 # Create output directory if not already present
-_dummy := $(shell [ -d '$(out)' ] || mkdir -p '$(out)')
+$(shell [ -d '$(out)' ] || mkdir -p '$(out)')
 
 $(header): $(sysdef) $(systbl)
        $(Q)$(SHELL) '$(systbl)' '$(CC)' '$(HOSTCC)' $(incpath) $(sysdef) > $@
index 2c56e8b56ddf4b91cb6addaac90e1ae5e9be725d..f94a0210c7b74bf934126c905a082fba274d59d4 100644 (file)
@@ -2,7 +2,7 @@
 #include "perf_regs.h"
 #include "../../../util/perf_regs.h"
 
-const struct sample_reg sample_reg_masks[] = {
+static const struct sample_reg sample_reg_masks[] = {
        SMPL_REG_END
 };
 
@@ -15,3 +15,8 @@ uint64_t arch__user_reg_mask(void)
 {
        return PERF_REGS_MASK;
 }
+
+const struct sample_reg *arch__sample_reg_masks(void)
+{
+       return sample_reg_masks;
+}
index 8bc09072e3d6b8aa0723c432bbc03c00ee993ba8..cd0b011b3be5f5e6101e191d9326505f2f48cd7f 100644 (file)
@@ -11,7 +11,7 @@ sysdef := $(sysprf)/syscall_n64.tbl
 systbl := $(sysprf)/mksyscalltbl
 
 # Create output directory if not already present
-_dummy := $(shell [ -d '$(out)' ] || mkdir -p '$(out)')
+$(shell [ -d '$(out)' ] || mkdir -p '$(out)')
 
 $(header): $(sysdef) $(systbl)
        $(Q)$(SHELL) '$(systbl)' $(sysdef) > $@
index c0877c264d49e167be05f554837af018a1bdf7d3..6b1665f4118071d5b3d6f7634ac9440bc0c6eeaf 100644 (file)
@@ -2,7 +2,7 @@
 #include "perf_regs.h"
 #include "../../util/perf_regs.h"
 
-const struct sample_reg sample_reg_masks[] = {
+static const struct sample_reg sample_reg_masks[] = {
        SMPL_REG_END
 };
 
@@ -15,3 +15,8 @@ uint64_t arch__user_reg_mask(void)
 {
        return PERF_REGS_MASK;
 }
+
+const struct sample_reg *arch__sample_reg_masks(void)
+{
+       return sample_reg_masks;
+}
index 840ea0e59287dbb0ed37866cd7f809d2288579d5..bf6d323574f66df3d411516f02edf234fbfebcb6 100644 (file)
@@ -19,7 +19,7 @@ sysdef := $(sysprf)/syscall.tbl
 systbl := $(sysprf)/mksyscalltbl
 
 # Create output directory if not already present
-_dummy := $(shell [ -d '$(out)' ] || mkdir -p '$(out)')
+$(shell [ -d '$(out)' ] || mkdir -p '$(out)')
 
 $(header64): $(sysdef) $(systbl)
        $(Q)$(SHELL) '$(systbl)' '64' $(sysdef) > $@
index 9889245c555c4cfb9730997d2ae5b1cdfbf1b3f9..1d323f3a3322bc394a22380c0cef41cd57905f6e 100644 (file)
@@ -2,6 +2,7 @@ perf-y += header.o
 perf-$(CONFIG_LIBTRACEEVENT) += kvm-stat.o
 perf-y += perf_regs.o
 perf-y += mem-events.o
+perf-y += pmu.o
 perf-y += sym-handling.o
 perf-y += evsel.o
 perf-y += event.o
index d9a0ac1cdf30208301e5874b8db1aa1067f41102..c8357b571ccffff3e82cf16f7124c417143c1f64 100644 (file)
@@ -114,7 +114,7 @@ static int is_tracepoint_available(const char *str, struct evlist *evlist)
 
        parse_events_error__init(&err);
        ret = parse_events(evlist, str, &err);
-       if (err.str)
+       if (ret)
                parse_events_error__print(&err, "tracepoint");
        parse_events_error__exit(&err);
        return ret;
index 78b986e5268d95ec7f80a9b2a91a0596041a4608..765d4a054b0a42362f34af60ede150e0eacdca28 100644 (file)
@@ -1,12 +1,12 @@
 // SPDX-License-Identifier: GPL-2.0
-#include "map_symbol.h"
+#include "util/map_symbol.h"
+#include "util/mem-events.h"
 #include "mem-events.h"
 
-/* PowerPC does not support 'ldlat' parameter. */
-const char *perf_mem_events__name(int i, const char *pmu_name __maybe_unused)
-{
-       if (i == PERF_MEM_EVENTS__LOAD)
-               return "cpu/mem-loads/";
+#define E(t, n, s, l, a) { .tag = t, .name = n, .event_name = s, .ldlat = l, .aux_event = a }
 
-       return "cpu/mem-stores/";
-}
+struct perf_mem_event perf_mem_events_power[PERF_MEM_EVENTS__MAX] = {
+       E("ldlat-loads",        "%s/mem-loads/",        "mem-loads",    false,  0),
+       E("ldlat-stores",       "%s/mem-stores/",       "mem-stores",   false,  0),
+       E(NULL,                 NULL,                   NULL,           false,  0),
+};
diff --git a/tools/perf/arch/powerpc/util/mem-events.h b/tools/perf/arch/powerpc/util/mem-events.h
new file mode 100644 (file)
index 0000000..6acc3d1
--- /dev/null
@@ -0,0 +1,7 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef _POWER_MEM_EVENTS_H
+#define _POWER_MEM_EVENTS_H
+
+extern struct perf_mem_event perf_mem_events_power[PERF_MEM_EVENTS__MAX];
+
+#endif /* _POWER_MEM_EVENTS_H */
index b38aa056eea053f9306c91e5387691ffcfd3074f..e8e6e6fc6f176a57aa2db840e777d7b00ef79921 100644 (file)
@@ -17,7 +17,7 @@
 #define PVR_POWER9             0x004E
 #define PVR_POWER10            0x0080
 
-const struct sample_reg sample_reg_masks[] = {
+static const struct sample_reg sample_reg_masks[] = {
        SMPL_REG(r0, PERF_REG_POWERPC_R0),
        SMPL_REG(r1, PERF_REG_POWERPC_R1),
        SMPL_REG(r2, PERF_REG_POWERPC_R2),
@@ -232,3 +232,8 @@ uint64_t arch__user_reg_mask(void)
 {
        return PERF_REGS_MASK;
 }
+
+const struct sample_reg *arch__sample_reg_masks(void)
+{
+       return sample_reg_masks;
+}
diff --git a/tools/perf/arch/powerpc/util/pmu.c b/tools/perf/arch/powerpc/util/pmu.c
new file mode 100644 (file)
index 0000000..554675d
--- /dev/null
@@ -0,0 +1,12 @@
+// SPDX-License-Identifier: GPL-2.0
+
+#include <string.h>
+
+#include "../../../util/pmu.h"
+#include "mem-events.h"
+
+void perf_pmu__arch_init(struct perf_pmu *pmu)
+{
+       if (pmu->is_core)
+               pmu->mem_events = perf_mem_events_power;
+}
index c0877c264d49e167be05f554837af018a1bdf7d3..6b1665f4118071d5b3d6f7634ac9440bc0c6eeaf 100644 (file)
@@ -2,7 +2,7 @@
 #include "perf_regs.h"
 #include "../../util/perf_regs.h"
 
-const struct sample_reg sample_reg_masks[] = {
+static const struct sample_reg sample_reg_masks[] = {
        SMPL_REG_END
 };
 
@@ -15,3 +15,8 @@ uint64_t arch__user_reg_mask(void)
 {
        return PERF_REGS_MASK;
 }
+
+const struct sample_reg *arch__sample_reg_masks(void)
+{
+       return sample_reg_masks;
+}
index 74bffbea03e20cd495c3b57e6fc90fcb5fa3e6df..56994e63b43ae289d719de1b2465b8913ea51695 100644 (file)
@@ -17,7 +17,7 @@ sysdef := $(sysprf)/syscall.tbl
 systbl := $(sysprf)/mksyscalltbl
 
 # Create output directory if not already present
-_dummy := $(shell [ -d '$(out)' ] || mkdir -p '$(out)')
+$(shell [ -d '$(out)' ] || mkdir -p '$(out)')
 
 $(header): $(sysdef) $(systbl)
        $(Q)$(SHELL) '$(systbl)' $(sysdef) > $@
index c0877c264d49e167be05f554837af018a1bdf7d3..6b1665f4118071d5b3d6f7634ac9440bc0c6eeaf 100644 (file)
@@ -2,7 +2,7 @@
 #include "perf_regs.h"
 #include "../../util/perf_regs.h"
 
-const struct sample_reg sample_reg_masks[] = {
+static const struct sample_reg sample_reg_masks[] = {
        SMPL_REG_END
 };
 
@@ -15,3 +15,8 @@ uint64_t arch__user_reg_mask(void)
 {
        return PERF_REGS_MASK;
 }
+
+const struct sample_reg *arch__sample_reg_masks(void)
+{
+       return sample_reg_masks;
+}
index 5a9f9a7bf07d1750c4a868cd6e19f33632c30d6e..8952e00f9b60203a65d09eaf2234d1a21f9a171d 100644 (file)
@@ -17,7 +17,7 @@ sys       := $(srctree)/tools/perf/arch/x86/entry/syscalls
 systbl    := $(sys)/syscalltbl.sh
 
 # Create output directory if not already present
-_dummy := $(shell [ -d '$(out)' ] || mkdir -p '$(out)')
+$(shell [ -d '$(out)' ] || mkdir -p '$(out)')
 
 $(header): $(sys)/syscall_64.tbl $(systbl)
        $(Q)$(SHELL) '$(systbl)' $(sys)/syscall_64.tbl 'x86_64' > $@
index 5bfec3345d59fbf959631999cdb49ce25b7b96ec..c05c0a85dad419fc4a31665e1094414683d4fed9 100644 (file)
@@ -34,6 +34,7 @@ static int sample_ustack(struct perf_sample *sample,
        }
 
        stack_size = map__end(map) - sp;
+       map__put(map);
        stack_size = stack_size > STACK_SIZE ? STACK_SIZE : stack_size;
 
        memcpy(buf, (void *) sp, stack_size);
index 40f5d17fedab6955c89042837eddb13227b04c58..e221ea10417406e9f824da9d77f16787842c178d 100644 (file)
@@ -259,11 +259,10 @@ static int test_event(const struct evlist_test *e)
        parse_events_error__init(&err);
        ret = parse_events(evlist, e->name, &err);
        if (ret) {
-               pr_debug("failed to parse event '%s', err %d, str '%s'\n",
-                        e->name, ret, err.str);
+               pr_debug("failed to parse event '%s', err %d\n", e->name, ret);
                parse_events_error__print(&err, e->name);
                ret = TEST_FAIL;
-               if (strstr(err.str, "can't access trace events"))
+               if (parse_events_error__contains(&err, "can't access trace events"))
                        ret = TEST_SKIP;
        } else {
                ret = e->check(evlist);
index 191b372f9a2d3630a849a32a52e49c10cd9fd353..62df03e91c7e1ce535089138476442b53f04ddbc 100644 (file)
@@ -1,93 +1,28 @@
 // SPDX-License-Identifier: GPL-2.0
-#include "util/pmu.h"
-#include "util/pmus.h"
-#include "util/env.h"
-#include "map_symbol.h"
-#include "mem-events.h"
 #include "linux/string.h"
-#include "env.h"
+#include "util/map_symbol.h"
+#include "util/mem-events.h"
+#include "mem-events.h"
 
-static char mem_loads_name[100];
-static bool mem_loads_name__init;
-static char mem_stores_name[100];
 
 #define MEM_LOADS_AUX          0x8203
-#define MEM_LOADS_AUX_NAME     "{%s/mem-loads-aux/,%s/mem-loads,ldlat=%u/}:P"
 
-#define E(t, n, s) { .tag = t, .name = n, .sysfs_name = s }
+#define E(t, n, s, l, a) { .tag = t, .name = n, .event_name = s, .ldlat = l, .aux_event = a }
 
-static struct perf_mem_event perf_mem_events_intel[PERF_MEM_EVENTS__MAX] = {
-       E("ldlat-loads",        "%s/mem-loads,ldlat=%u/P",      "%s/events/mem-loads"),
-       E("ldlat-stores",       "%s/mem-stores/P",              "%s/events/mem-stores"),
-       E(NULL,                 NULL,                           NULL),
+struct perf_mem_event perf_mem_events_intel[PERF_MEM_EVENTS__MAX] = {
+       E("ldlat-loads",        "%s/mem-loads,ldlat=%u/P",      "mem-loads",    true,   0),
+       E("ldlat-stores",       "%s/mem-stores/P",              "mem-stores",   false,  0),
+       E(NULL,                 NULL,                           NULL,           false,  0),
 };
 
-static struct perf_mem_event perf_mem_events_amd[PERF_MEM_EVENTS__MAX] = {
-       E(NULL,         NULL,           NULL),
-       E(NULL,         NULL,           NULL),
-       E("mem-ldst",   "ibs_op//",     "ibs_op"),
+struct perf_mem_event perf_mem_events_intel_aux[PERF_MEM_EVENTS__MAX] = {
+       E("ldlat-loads",        "{%s/mem-loads-aux/,%s/mem-loads,ldlat=%u/}:P", "mem-loads",    true,   MEM_LOADS_AUX),
+       E("ldlat-stores",       "%s/mem-stores/P",              "mem-stores",   false,  0),
+       E(NULL,                 NULL,                           NULL,           false,  0),
 };
 
-struct perf_mem_event *perf_mem_events__ptr(int i)
-{
-       if (i >= PERF_MEM_EVENTS__MAX)
-               return NULL;
-
-       if (x86__is_amd_cpu())
-               return &perf_mem_events_amd[i];
-
-       return &perf_mem_events_intel[i];
-}
-
-bool is_mem_loads_aux_event(struct evsel *leader)
-{
-       struct perf_pmu *pmu = perf_pmus__find("cpu");
-
-       if (!pmu)
-               pmu = perf_pmus__find("cpu_core");
-
-       if (pmu && !perf_pmu__have_event(pmu, "mem-loads-aux"))
-               return false;
-
-       return leader->core.attr.config == MEM_LOADS_AUX;
-}
-
-const char *perf_mem_events__name(int i, const char *pmu_name)
-{
-       struct perf_mem_event *e = perf_mem_events__ptr(i);
-
-       if (!e)
-               return NULL;
-
-       if (i == PERF_MEM_EVENTS__LOAD) {
-               if (mem_loads_name__init && !pmu_name)
-                       return mem_loads_name;
-
-               if (!pmu_name) {
-                       mem_loads_name__init = true;
-                       pmu_name = "cpu";
-               }
-
-               if (perf_pmus__have_event(pmu_name, "mem-loads-aux")) {
-                       scnprintf(mem_loads_name, sizeof(mem_loads_name),
-                                 MEM_LOADS_AUX_NAME, pmu_name, pmu_name,
-                                 perf_mem_events__loads_ldlat);
-               } else {
-                       scnprintf(mem_loads_name, sizeof(mem_loads_name),
-                                 e->name, pmu_name,
-                                 perf_mem_events__loads_ldlat);
-               }
-               return mem_loads_name;
-       }
-
-       if (i == PERF_MEM_EVENTS__STORE) {
-               if (!pmu_name)
-                       pmu_name = "cpu";
-
-               scnprintf(mem_stores_name, sizeof(mem_stores_name),
-                         e->name, pmu_name);
-               return mem_stores_name;
-       }
-
-       return e->name;
-}
+struct perf_mem_event perf_mem_events_amd[PERF_MEM_EVENTS__MAX] = {
+       E(NULL,         NULL,           NULL,   false,  0),
+       E(NULL,         NULL,           NULL,   false,  0),
+       E("mem-ldst",   "%s//",         NULL,   false,  0),
+};
diff --git a/tools/perf/arch/x86/util/mem-events.h b/tools/perf/arch/x86/util/mem-events.h
new file mode 100644 (file)
index 0000000..f55c8d3
--- /dev/null
@@ -0,0 +1,10 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef _X86_MEM_EVENTS_H
+#define _X86_MEM_EVENTS_H
+
+extern struct perf_mem_event perf_mem_events_intel[PERF_MEM_EVENTS__MAX];
+extern struct perf_mem_event perf_mem_events_intel_aux[PERF_MEM_EVENTS__MAX];
+
+extern struct perf_mem_event perf_mem_events_amd[PERF_MEM_EVENTS__MAX];
+
+#endif /* _X86_MEM_EVENTS_H */
index b813502a272757605fa2f35d7bec38ca572f82ca..12fd93f04802287379c4ea393bc6f2f6afb19053 100644 (file)
@@ -13,7 +13,7 @@
 #include "../../../util/pmu.h"
 #include "../../../util/pmus.h"
 
-const struct sample_reg sample_reg_masks[] = {
+static const struct sample_reg sample_reg_masks[] = {
        SMPL_REG(AX, PERF_REG_X86_AX),
        SMPL_REG(BX, PERF_REG_X86_BX),
        SMPL_REG(CX, PERF_REG_X86_CX),
@@ -276,6 +276,11 @@ int arch_sdt_arg_parse_op(char *old_op, char **new_op)
        return SDT_ARG_VALID;
 }
 
+const struct sample_reg *arch__sample_reg_masks(void)
+{
+       return sample_reg_masks;
+}
+
 uint64_t arch__intr_reg_mask(void)
 {
        struct perf_event_attr attr = {
index 469555ae9b3c2d77328e11ba27e575948611eeaa..c3d89d6ba1bf03ad1b13d1a81ebdf3351b109392 100644 (file)
@@ -15,6 +15,7 @@
 #include "../../../util/pmu.h"
 #include "../../../util/fncache.h"
 #include "../../../util/pmus.h"
+#include "mem-events.h"
 #include "env.h"
 
 void perf_pmu__arch_init(struct perf_pmu *pmu __maybe_unused)
@@ -30,14 +31,14 @@ void perf_pmu__arch_init(struct perf_pmu *pmu __maybe_unused)
                pmu->selectable = true;
        }
 #endif
-}
-
-int perf_pmus__num_mem_pmus(void)
-{
-       /* AMD uses IBS OP pmu and not a core PMU for perf mem/c2c */
-       if (x86__is_amd_cpu())
-               return 1;
 
-       /* Intel uses core pmus for perf mem/c2c */
-       return perf_pmus__num_core_pmus();
+       if (x86__is_amd_cpu()) {
+               if (!strcmp(pmu->name, "ibs_op"))
+                       pmu->mem_events = perf_mem_events_amd;
+       } else if (pmu->is_core) {
+               if (perf_pmu__have_event(pmu, "mem-loads-aux"))
+                       pmu->mem_events = perf_mem_events_intel_aux;
+               else
+                       pmu->mem_events = perf_mem_events_intel;
+       }
 }
index 9b99f48b923cf41188ef395b644e849f2f10605e..e2d6cfe21057bd8ac30563245b44fed5d9a3bd58 100644 (file)
@@ -33,7 +33,7 @@ static double cpuinfo_tsc_freq(void)
 
        cpuinfo = fopen("/proc/cpuinfo", "r");
        if (!cpuinfo) {
-               pr_err("Failed to read /proc/cpuinfo for TSC frequency");
+               pr_err("Failed to read /proc/cpuinfo for TSC frequency\n");
                return NAN;
        }
        while (getline(&line, &len, cpuinfo) > 0) {
@@ -48,7 +48,7 @@ static double cpuinfo_tsc_freq(void)
        }
 out:
        if (fpclassify(result) == FP_ZERO)
-               pr_err("Failed to find TSC frequency in /proc/cpuinfo");
+               pr_err("Failed to find TSC frequency in /proc/cpuinfo\n");
 
        free(line);
        fclose(cpuinfo);
index f78eea9e21539352e96c68f37c4b0001c84054e4..16b40f5d43db06509c95db7b3747a4c52132aca0 100644 (file)
@@ -3215,12 +3215,19 @@ static int parse_record_events(const struct option *opt,
                               const char *str, int unset __maybe_unused)
 {
        bool *event_set = (bool *) opt->value;
+       struct perf_pmu *pmu;
+
+       pmu = perf_mem_events_find_pmu();
+       if (!pmu) {
+               pr_err("failed: there is no PMU that supports perf c2c\n");
+               exit(-1);
+       }
 
        if (!strcmp(str, "list")) {
-               perf_mem_events__list();
+               perf_pmu__mem_events_list(pmu);
                exit(0);
        }
-       if (perf_mem_events__parse(str))
+       if (perf_pmu__mem_events_parse(pmu, str))
                exit(-1);
 
        *event_set = true;
@@ -3238,13 +3245,13 @@ static const char * const *record_mem_usage = __usage_record;
 
 static int perf_c2c__record(int argc, const char **argv)
 {
-       int rec_argc, i = 0, j, rec_tmp_nr = 0;
+       int rec_argc, i = 0, j;
        const char **rec_argv;
-       char **rec_tmp;
        int ret;
        bool all_user = false, all_kernel = false;
        bool event_set = false;
        struct perf_mem_event *e;
+       struct perf_pmu *pmu;
        struct option options[] = {
        OPT_CALLBACK('e', "event", &event_set, "event",
                     "event selector. Use 'perf c2c record -e list' to list available events",
@@ -3256,7 +3263,13 @@ static int perf_c2c__record(int argc, const char **argv)
        OPT_END()
        };
 
-       if (perf_mem_events__init()) {
+       pmu = perf_mem_events_find_pmu();
+       if (!pmu) {
+               pr_err("failed: no PMU supports the memory events\n");
+               return -1;
+       }
+
+       if (perf_pmu__mem_events_init(pmu)) {
                pr_err("failed: memory events not supported\n");
                return -1;
        }
@@ -3265,22 +3278,16 @@ static int perf_c2c__record(int argc, const char **argv)
                             PARSE_OPT_KEEP_UNKNOWN);
 
        /* Max number of arguments multiplied by number of PMUs that can support them. */
-       rec_argc = argc + 11 * perf_pmus__num_mem_pmus();
+       rec_argc = argc + 11 * (perf_pmu__mem_events_num_mem_pmus(pmu) + 1);
 
        rec_argv = calloc(rec_argc + 1, sizeof(char *));
        if (!rec_argv)
                return -1;
 
-       rec_tmp = calloc(rec_argc + 1, sizeof(char *));
-       if (!rec_tmp) {
-               free(rec_argv);
-               return -1;
-       }
-
        rec_argv[i++] = "record";
 
        if (!event_set) {
-               e = perf_mem_events__ptr(PERF_MEM_EVENTS__LOAD_STORE);
+               e = perf_pmu__mem_events_ptr(pmu, PERF_MEM_EVENTS__LOAD_STORE);
                /*
                 * The load and store operations are required, use the event
                 * PERF_MEM_EVENTS__LOAD_STORE if it is supported.
@@ -3289,15 +3296,15 @@ static int perf_c2c__record(int argc, const char **argv)
                        e->record = true;
                        rec_argv[i++] = "-W";
                } else {
-                       e = perf_mem_events__ptr(PERF_MEM_EVENTS__LOAD);
+                       e = perf_pmu__mem_events_ptr(pmu, PERF_MEM_EVENTS__LOAD);
                        e->record = true;
 
-                       e = perf_mem_events__ptr(PERF_MEM_EVENTS__STORE);
+                       e = perf_pmu__mem_events_ptr(pmu, PERF_MEM_EVENTS__STORE);
                        e->record = true;
                }
        }
 
-       e = perf_mem_events__ptr(PERF_MEM_EVENTS__LOAD);
+       e = perf_pmu__mem_events_ptr(pmu, PERF_MEM_EVENTS__LOAD);
        if (e->record)
                rec_argv[i++] = "-W";
 
@@ -3305,7 +3312,7 @@ static int perf_c2c__record(int argc, const char **argv)
        rec_argv[i++] = "--phys-data";
        rec_argv[i++] = "--sample-cpu";
 
-       ret = perf_mem_events__record_args(rec_argv, &i, rec_tmp, &rec_tmp_nr);
+       ret = perf_mem_events__record_args(rec_argv, &i);
        if (ret)
                goto out;
 
@@ -3332,10 +3339,6 @@ static int perf_c2c__record(int argc, const char **argv)
 
        ret = cmd_record(i, rec_argv);
 out:
-       for (i = 0; i < rec_tmp_nr; i++)
-               free(rec_tmp[i]);
-
-       free(rec_tmp);
        free(rec_argv);
        return ret;
 }
index e27a1b1288c29ffe96ce871bc5bab76c8a67c8b7..02bf608d585ee604d269486feb4c2fdca350c008 100644 (file)
@@ -208,17 +208,24 @@ static void default_print_metric(void *ps,
        if (!print_state->last_metricgroups ||
            strcmp(print_state->last_metricgroups, group ?: "")) {
                if (group && print_state->metricgroups) {
-                       if (print_state->name_only)
+                       if (print_state->name_only) {
                                fprintf(fp, "%s ", group);
-                       else if (print_state->metrics) {
-                               const char *gdesc = describe_metricgroup(group);
+                       } else {
+                               const char *gdesc = print_state->desc
+                                       ? describe_metricgroup(group)
+                                       : NULL;
+                               const char *print_colon = "";
+
+                               if (print_state->metrics) {
+                                       print_colon = ":";
+                                       fputc('\n', fp);
+                               }
 
                                if (gdesc)
-                                       fprintf(fp, "\n%s: [%s]\n", group, gdesc);
+                                       fprintf(fp, "%s%s [%s]\n", group, print_colon, gdesc);
                                else
-                                       fprintf(fp, "\n%s:\n", group);
-                       } else
-                               fprintf(fp, "%s\n", group);
+                                       fprintf(fp, "%s%s\n", group, print_colon);
+                       }
                }
                zfree(&print_state->last_metricgroups);
                print_state->last_metricgroups = strdup(group ?: "");
index 51499c20da01e82239f2fa5f8a3da9b7696c83a5..5b851e64e4a1afb4a04fc40753142befd23df62a 100644 (file)
@@ -43,12 +43,19 @@ static int parse_record_events(const struct option *opt,
                               const char *str, int unset __maybe_unused)
 {
        struct perf_mem *mem = *(struct perf_mem **)opt->value;
+       struct perf_pmu *pmu;
+
+       pmu = perf_mem_events_find_pmu();
+       if (!pmu) {
+               pr_err("failed: there is no PMU that supports perf mem\n");
+               exit(-1);
+       }
 
        if (!strcmp(str, "list")) {
-               perf_mem_events__list();
+               perf_pmu__mem_events_list(pmu);
                exit(0);
        }
-       if (perf_mem_events__parse(str))
+       if (perf_pmu__mem_events_parse(pmu, str))
                exit(-1);
 
        mem->operation = 0;
@@ -65,13 +72,13 @@ static const char * const *record_mem_usage = __usage;
 
 static int __cmd_record(int argc, const char **argv, struct perf_mem *mem)
 {
-       int rec_argc, i = 0, j, tmp_nr = 0;
+       int rec_argc, i = 0, j;
        int start, end;
        const char **rec_argv;
-       char **rec_tmp;
        int ret;
        bool all_user = false, all_kernel = false;
        struct perf_mem_event *e;
+       struct perf_pmu *pmu;
        struct option options[] = {
        OPT_CALLBACK('e', "event", &mem, "event",
                     "event selector. use 'perf mem record -e list' to list available events",
@@ -84,7 +91,13 @@ static int __cmd_record(int argc, const char **argv, struct perf_mem *mem)
        OPT_END()
        };
 
-       if (perf_mem_events__init()) {
+       pmu = perf_mem_events_find_pmu();
+       if (!pmu) {
+               pr_err("failed: no PMU supports the memory events\n");
+               return -1;
+       }
+
+       if (perf_pmu__mem_events_init(pmu)) {
                pr_err("failed: memory events not supported\n");
                return -1;
        }
@@ -93,7 +106,7 @@ static int __cmd_record(int argc, const char **argv, struct perf_mem *mem)
                             PARSE_OPT_KEEP_UNKNOWN);
 
        /* Max number of arguments multiplied by number of PMUs that can support them. */
-       rec_argc = argc + 9 * perf_pmus__num_mem_pmus();
+       rec_argc = argc + 9 * (perf_pmu__mem_events_num_mem_pmus(pmu) + 1);
 
        if (mem->cpu_list)
                rec_argc += 2;
@@ -102,18 +115,9 @@ static int __cmd_record(int argc, const char **argv, struct perf_mem *mem)
        if (!rec_argv)
                return -1;
 
-       /*
-        * Save the allocated event name strings.
-        */
-       rec_tmp = calloc(rec_argc + 1, sizeof(char *));
-       if (!rec_tmp) {
-               free(rec_argv);
-               return -1;
-       }
-
        rec_argv[i++] = "record";
 
-       e = perf_mem_events__ptr(PERF_MEM_EVENTS__LOAD_STORE);
+       e = perf_pmu__mem_events_ptr(pmu, PERF_MEM_EVENTS__LOAD_STORE);
 
        /*
         * The load and store operations are required, use the event
@@ -126,17 +130,17 @@ static int __cmd_record(int argc, const char **argv, struct perf_mem *mem)
                rec_argv[i++] = "-W";
        } else {
                if (mem->operation & MEM_OPERATION_LOAD) {
-                       e = perf_mem_events__ptr(PERF_MEM_EVENTS__LOAD);
+                       e = perf_pmu__mem_events_ptr(pmu, PERF_MEM_EVENTS__LOAD);
                        e->record = true;
                }
 
                if (mem->operation & MEM_OPERATION_STORE) {
-                       e = perf_mem_events__ptr(PERF_MEM_EVENTS__STORE);
+                       e = perf_pmu__mem_events_ptr(pmu, PERF_MEM_EVENTS__STORE);
                        e->record = true;
                }
        }
 
-       e = perf_mem_events__ptr(PERF_MEM_EVENTS__LOAD);
+       e = perf_pmu__mem_events_ptr(pmu, PERF_MEM_EVENTS__LOAD);
        if (e->record)
                rec_argv[i++] = "-W";
 
@@ -149,7 +153,7 @@ static int __cmd_record(int argc, const char **argv, struct perf_mem *mem)
                rec_argv[i++] = "--data-page-size";
 
        start = i;
-       ret = perf_mem_events__record_args(rec_argv, &i, rec_tmp, &tmp_nr);
+       ret = perf_mem_events__record_args(rec_argv, &i);
        if (ret)
                goto out;
        end = i;
@@ -179,10 +183,6 @@ static int __cmd_record(int argc, const char **argv, struct perf_mem *mem)
 
        ret = cmd_record(i, rec_argv);
 out:
-       for (i = 0; i < tmp_nr; i++)
-               free(rec_tmp[i]);
-
-       free(rec_tmp);
        free(rec_argv);
        return ret;
 }
index 86c91012517267c5355d7fedebdeed42e9cfb675..ff7e1d6cfcd2e70b0331a449da79639635db2117 100644 (file)
@@ -1773,8 +1773,11 @@ record__finish_output(struct record *rec)
        struct perf_data *data = &rec->data;
        int fd = perf_data__fd(data);
 
-       if (data->is_pipe)
+       if (data->is_pipe) {
+               /* Just to display approx. size */
+               data->file.size = rec->bytes_written;
                return;
+       }
 
        rec->session->header.data_size += rec->bytes_written;
        data->file.size = lseek(perf_data__fd(data), 0, SEEK_CUR);
@@ -1830,8 +1833,8 @@ static int
 record__switch_output(struct record *rec, bool at_exit)
 {
        struct perf_data *data = &rec->data;
+       char *new_filename = NULL;
        int fd, err;
-       char *new_filename;
 
        /* Same Size:      "2015122520103046"*/
        char timestamp[] = "InvalidTimestamp";
@@ -1853,16 +1856,17 @@ record__switch_output(struct record *rec, bool at_exit)
        }
 
        fd = perf_data__switch(data, timestamp,
-                                   rec->session->header.data_offset,
-                                   at_exit, &new_filename);
+                              rec->session->header.data_offset,
+                              at_exit, &new_filename);
        if (fd >= 0 && !at_exit) {
                rec->bytes_written = 0;
                rec->session->header.data_size = 0;
        }
 
-       if (!quiet)
+       if (!quiet) {
                fprintf(stderr, "[ perf record: Dump %s.%s ]\n",
                        data->path, timestamp);
+       }
 
        if (rec->switch_output.num_files) {
                int n = rec->switch_output.cur_file + 1;
@@ -2472,6 +2476,11 @@ static int __cmd_record(struct record *rec, int argc, const char **argv)
        if (data->is_pipe && rec->evlist->core.nr_entries == 1)
                rec->opts.sample_id = true;
 
+       if (rec->timestamp_filename && perf_data__is_pipe(data)) {
+               rec->timestamp_filename = false;
+               pr_warning("WARNING: --timestamp-filename option is not available in pipe mode.\n");
+       }
+
        evlist__uniquify_name(rec->evlist);
 
        /* Debug message used by test scripts */
index f2ed2b7e80a32649095f123b63b3ed8ecaf42cc9..dcd93ee5fc24e3389f6e154171fd9faa499706dd 100644 (file)
@@ -59,6 +59,7 @@
 #include <linux/ctype.h>
 #include <signal.h>
 #include <linux/bitmap.h>
+#include <linux/list_sort.h>
 #include <linux/string.h>
 #include <linux/stringify.h>
 #include <linux/time64.h>
@@ -828,35 +829,6 @@ static void tasks_setup(struct report *rep)
        rep->tool.no_warn = true;
 }
 
-struct task {
-       struct thread           *thread;
-       struct list_head         list;
-       struct list_head         children;
-};
-
-static struct task *tasks_list(struct task *task, struct machine *machine)
-{
-       struct thread *parent_thread, *thread = task->thread;
-       struct task   *parent_task;
-
-       /* Already listed. */
-       if (!list_empty(&task->list))
-               return NULL;
-
-       /* Last one in the chain. */
-       if (thread__ppid(thread) == -1)
-               return task;
-
-       parent_thread = machine__find_thread(machine, -1, thread__ppid(thread));
-       if (!parent_thread)
-               return ERR_PTR(-ENOENT);
-
-       parent_task = thread__priv(parent_thread);
-       thread__put(parent_thread);
-       list_add_tail(&task->list, &parent_task->children);
-       return tasks_list(parent_task, machine);
-}
-
 struct maps__fprintf_task_args {
        int indent;
        FILE *fp;
@@ -900,89 +872,156 @@ static size_t maps__fprintf_task(struct maps *maps, int indent, FILE *fp)
        return args.printed;
 }
 
-static void task__print_level(struct task *task, FILE *fp, int level)
+static int thread_level(struct machine *machine, const struct thread *thread)
 {
-       struct thread *thread = task->thread;
-       struct task *child;
-       int comm_indent = fprintf(fp, "  %8d %8d %8d |%*s",
-                                 thread__pid(thread), thread__tid(thread),
-                                 thread__ppid(thread), level, "");
+       struct thread *parent_thread;
+       int res;
 
-       fprintf(fp, "%s\n", thread__comm_str(thread));
+       if (thread__tid(thread) <= 0)
+               return 0;
 
-       maps__fprintf_task(thread__maps(thread), comm_indent, fp);
+       if (thread__ppid(thread) <= 0)
+               return 1;
 
-       if (!list_empty(&task->children)) {
-               list_for_each_entry(child, &task->children, list)
-                       task__print_level(child, fp, level + 1);
+       parent_thread = machine__find_thread(machine, -1, thread__ppid(thread));
+       if (!parent_thread) {
+               pr_err("Missing parent thread of %d\n", thread__tid(thread));
+               return 0;
        }
+       res = 1 + thread_level(machine, parent_thread);
+       thread__put(parent_thread);
+       return res;
 }
 
-static int tasks_print(struct report *rep, FILE *fp)
+static void task__print_level(struct machine *machine, struct thread *thread, FILE *fp)
 {
-       struct perf_session *session = rep->session;
-       struct machine      *machine = &session->machines.host;
-       struct task *tasks, *task;
-       unsigned int nr = 0, itask = 0, i;
-       struct rb_node *nd;
-       LIST_HEAD(list);
+       int level = thread_level(machine, thread);
+       int comm_indent = fprintf(fp, "  %8d %8d %8d |%*s",
+                                 thread__pid(thread), thread__tid(thread),
+                                 thread__ppid(thread), level, "");
 
-       /*
-        * No locking needed while accessing machine->threads,
-        * because --tasks is single threaded command.
-        */
+       fprintf(fp, "%s\n", thread__comm_str(thread));
 
-       /* Count all the threads. */
-       for (i = 0; i < THREADS__TABLE_SIZE; i++)
-               nr += machine->threads[i].nr;
+       maps__fprintf_task(thread__maps(thread), comm_indent, fp);
+}
 
-       tasks = malloc(sizeof(*tasks) * nr);
-       if (!tasks)
-               return -ENOMEM;
+/*
+ * Sort two thread list nodes such that they form a tree. The first node is the
+ * root of the tree, its children are ordered numerically after it. If a child
+ * has children itself then they appear immediately after their parent. For
+ * example, the 4 threads in the order they'd appear in the list:
+ * - init with a TID 1 and a parent of 0
+ * - systemd with a TID 3000 and a parent of init/1
+ * - systemd child thread with TID 4000, the parent is 3000
+ * - NetworkManager is a child of init with a TID of 3500.
+ */
+static int task_list_cmp(void *priv, const struct list_head *la, const struct list_head *lb)
+{
+       struct machine *machine = priv;
+       struct thread_list *task_a = list_entry(la, struct thread_list, list);
+       struct thread_list *task_b = list_entry(lb, struct thread_list, list);
+       struct thread *a = task_a->thread;
+       struct thread *b = task_b->thread;
+       int level_a, level_b, res;
+
+       /* Same thread? */
+       if (thread__tid(a) == thread__tid(b))
+               return 0;
 
-       for (i = 0; i < THREADS__TABLE_SIZE; i++) {
-               struct threads *threads = &machine->threads[i];
+       /* Compare a and b to root. */
+       if (thread__tid(a) == 0)
+               return -1;
 
-               for (nd = rb_first_cached(&threads->entries); nd;
-                    nd = rb_next(nd)) {
-                       task = tasks + itask++;
+       if (thread__tid(b) == 0)
+               return 1;
 
-                       task->thread = rb_entry(nd, struct thread_rb_node, rb_node)->thread;
-                       INIT_LIST_HEAD(&task->children);
-                       INIT_LIST_HEAD(&task->list);
-                       thread__set_priv(task->thread, task);
-               }
-       }
+       /* If parents match sort by tid. */
+       if (thread__ppid(a) == thread__ppid(b))
+               return thread__tid(a) < thread__tid(b) ? -1 : 1;
 
        /*
-        * Iterate every task down to the unprocessed parent
-        * and link all in task children list. Task with no
-        * parent is added into 'list'.
+        * Find a and b such that if they are a child of each other a and b's
+        * tid's match, otherwise a and b have a common parent and distinct
+        * tid's to sort by. First make the depths of the threads match.
         */
-       for (itask = 0; itask < nr; itask++) {
-               task = tasks + itask;
-
-               if (!list_empty(&task->list))
-                       continue;
-
-               task = tasks_list(task, machine);
-               if (IS_ERR(task)) {
-                       pr_err("Error: failed to process tasks\n");
-                       free(tasks);
-                       return PTR_ERR(task);
+       level_a = thread_level(machine, a);
+       level_b = thread_level(machine, b);
+       a = thread__get(a);
+       b = thread__get(b);
+       for (int i = level_a; i > level_b; i--) {
+               struct thread *parent = machine__find_thread(machine, -1, thread__ppid(a));
+
+               thread__put(a);
+               if (!parent) {
+                       pr_err("Missing parent thread of %d\n", thread__tid(a));
+                       thread__put(b);
+                       return -1;
                }
+               a = parent;
+       }
+       for (int i = level_b; i > level_a; i--) {
+               struct thread *parent = machine__find_thread(machine, -1, thread__ppid(b));
 
-               if (task)
-                       list_add_tail(&task->list, &list);
+               thread__put(b);
+               if (!parent) {
+                       pr_err("Missing parent thread of %d\n", thread__tid(b));
+                       thread__put(a);
+                       return 1;
+               }
+               b = parent;
+       }
+       /* Search up to a common parent. */
+       while (thread__ppid(a) != thread__ppid(b)) {
+               struct thread *parent;
+
+               parent = machine__find_thread(machine, -1, thread__ppid(a));
+               thread__put(a);
+               if (!parent)
+                       pr_err("Missing parent thread of %d\n", thread__tid(a));
+               a = parent;
+               parent = machine__find_thread(machine, -1, thread__ppid(b));
+               thread__put(b);
+               if (!parent)
+                       pr_err("Missing parent thread of %d\n", thread__tid(b));
+               b = parent;
+               if (!a || !b) {
+                       /* Handle missing parent (unexpected) with some sanity. */
+                       thread__put(a);
+                       thread__put(b);
+                       return !a && !b ? 0 : (!a ? -1 : 1);
+               }
+       }
+       if (thread__tid(a) == thread__tid(b)) {
+               /* a is a child of b or vice-versa, deeper levels appear later. */
+               res = level_a < level_b ? -1 : (level_a > level_b ? 1 : 0);
+       } else {
+               /* Sort by tid now the parent is the same. */
+               res = thread__tid(a) < thread__tid(b) ? -1 : 1;
        }
+       thread__put(a);
+       thread__put(b);
+       return res;
+}
 
-       fprintf(fp, "# %8s %8s %8s  %s\n", "pid", "tid", "ppid", "comm");
+static int tasks_print(struct report *rep, FILE *fp)
+{
+       struct machine *machine = &rep->session->machines.host;
+       LIST_HEAD(tasks);
+       int ret;
 
-       list_for_each_entry(task, &list, list)
-               task__print_level(task, fp, 0);
+       ret = machine__thread_list(machine, &tasks);
+       if (!ret) {
+               struct thread_list *task;
 
-       free(tasks);
-       return 0;
+               list_sort(machine, &tasks, task_list_cmp);
+
+               fprintf(fp, "# %8s %8s %8s  %s\n", "pid", "tid", "ppid", "comm");
+
+               list_for_each_entry(task, &tasks, list)
+                       task__print_level(machine, task->thread, fp);
+       }
+       thread_list__delete(&tasks);
+       return ret;
 }
 
 static int __cmd_report(struct report *rep)
@@ -1410,7 +1449,7 @@ int cmd_report(int argc, const char **argv)
                    "only show processor socket that match with this filter"),
        OPT_BOOLEAN(0, "raw-trace", &symbol_conf.raw_trace,
                    "Show raw trace event output (do not use print fmt or plugins)"),
-       OPT_BOOLEAN(0, "hierarchy", &symbol_conf.report_hierarchy,
+       OPT_BOOLEAN('H', "hierarchy", &symbol_conf.report_hierarchy,
                    "Show entries in a hierarchy"),
        OPT_CALLBACK_DEFAULT(0, "stdio-color", NULL, "mode",
                             "'always' (default), 'never' or 'auto' only applicable to --stdio mode",
@@ -1766,6 +1805,8 @@ repeat:
        } else
                ret = 0;
 
+       if (!use_browser && (verbose > 2 || debug_kmaps))
+               perf_session__dump_kmaps(session);
 error:
        if (report.ptime_range) {
                itrace_synth_opts__clear_time_range(&itrace_synth_opts);
index dd6065afbbaf6fd41084b78219a4948d8acd9379..b248c433529a8feb4131c1dfd6938a3447269dd5 100644 (file)
@@ -92,24 +92,6 @@ struct sched_atom {
        struct task_desc        *wakee;
 };
 
-#define TASK_STATE_TO_CHAR_STR "RSDTtZXxKWP"
-
-/* task state bitmask, copied from include/linux/sched.h */
-#define TASK_RUNNING           0
-#define TASK_INTERRUPTIBLE     1
-#define TASK_UNINTERRUPTIBLE   2
-#define __TASK_STOPPED         4
-#define __TASK_TRACED          8
-/* in tsk->exit_state */
-#define EXIT_DEAD              16
-#define EXIT_ZOMBIE            32
-#define EXIT_TRACE             (EXIT_ZOMBIE | EXIT_DEAD)
-/* in tsk->state again */
-#define TASK_DEAD              64
-#define TASK_WAKEKILL          128
-#define TASK_WAKING            256
-#define TASK_PARKED            512
-
 enum thread_state {
        THREAD_SLEEPING = 0,
        THREAD_WAIT_CPU,
@@ -266,7 +248,7 @@ struct thread_runtime {
        u64 total_preempt_time;
        u64 total_delay_time;
 
-       int last_state;
+       char last_state;
 
        char shortname[3];
        bool comm_changed;
@@ -436,7 +418,7 @@ static void add_sched_event_wakeup(struct perf_sched *sched, struct task_desc *t
 }
 
 static void add_sched_event_sleep(struct perf_sched *sched, struct task_desc *task,
-                                 u64 timestamp, u64 task_state __maybe_unused)
+                                 u64 timestamp, const char task_state __maybe_unused)
 {
        struct sched_atom *event = get_new_event(task, timestamp);
 
@@ -860,7 +842,7 @@ static int replay_switch_event(struct perf_sched *sched,
                   *next_comm  = evsel__strval(evsel, sample, "next_comm");
        const u32 prev_pid = evsel__intval(evsel, sample, "prev_pid"),
                  next_pid = evsel__intval(evsel, sample, "next_pid");
-       const u64 prev_state = evsel__intval(evsel, sample, "prev_state");
+       const char prev_state = evsel__taskstate(evsel, sample, "prev_state");
        struct task_desc *prev, __maybe_unused *next;
        u64 timestamp0, timestamp = sample->time;
        int cpu = sample->cpu;
@@ -1050,13 +1032,6 @@ static int thread_atoms_insert(struct perf_sched *sched, struct thread *thread)
        return 0;
 }
 
-static char sched_out_state(u64 prev_state)
-{
-       const char *str = TASK_STATE_TO_CHAR_STR;
-
-       return str[prev_state];
-}
-
 static int
 add_sched_out_event(struct work_atoms *atoms,
                    char run_state,
@@ -1132,7 +1107,7 @@ static int latency_switch_event(struct perf_sched *sched,
 {
        const u32 prev_pid = evsel__intval(evsel, sample, "prev_pid"),
                  next_pid = evsel__intval(evsel, sample, "next_pid");
-       const u64 prev_state = evsel__intval(evsel, sample, "prev_state");
+       const char prev_state = evsel__taskstate(evsel, sample, "prev_state");
        struct work_atoms *out_events, *in_events;
        struct thread *sched_out, *sched_in;
        u64 timestamp0, timestamp = sample->time;
@@ -1168,7 +1143,7 @@ static int latency_switch_event(struct perf_sched *sched,
                        goto out_put;
                }
        }
-       if (add_sched_out_event(out_events, sched_out_state(prev_state), timestamp))
+       if (add_sched_out_event(out_events, prev_state, timestamp))
                return -1;
 
        in_events = thread_atoms_search(&sched->atom_root, sched_in, &sched->cmp_pid);
@@ -2033,24 +2008,12 @@ static void timehist_header(struct perf_sched *sched)
        printf("\n");
 }
 
-static char task_state_char(struct thread *thread, int state)
-{
-       static const char state_to_char[] = TASK_STATE_TO_CHAR_STR;
-       unsigned bit = state ? ffs(state) : 0;
-
-       /* 'I' for idle */
-       if (thread__tid(thread) == 0)
-               return 'I';
-
-       return bit < sizeof(state_to_char) - 1 ? state_to_char[bit] : '?';
-}
-
 static void timehist_print_sample(struct perf_sched *sched,
                                  struct evsel *evsel,
                                  struct perf_sample *sample,
                                  struct addr_location *al,
                                  struct thread *thread,
-                                 u64 t, int state)
+                                 u64 t, const char state)
 {
        struct thread_runtime *tr = thread__priv(thread);
        const char *next_comm = evsel__strval(evsel, sample, "next_comm");
@@ -2091,7 +2054,7 @@ static void timehist_print_sample(struct perf_sched *sched,
        print_sched_time(tr->dt_run, 6);
 
        if (sched->show_state)
-               printf(" %5c ", task_state_char(thread, state));
+               printf(" %5c ", thread__tid(thread) == 0 ? 'I' : state);
 
        if (sched->show_next) {
                snprintf(nstr, sizeof(nstr), "next: %s[%d]", next_comm, next_pid);
@@ -2163,9 +2126,9 @@ static void timehist_update_runtime_stats(struct thread_runtime *r,
                else if (r->last_time) {
                        u64 dt_wait = tprev - r->last_time;
 
-                       if (r->last_state == TASK_RUNNING)
+                       if (r->last_state == 'R')
                                r->dt_preempt = dt_wait;
-                       else if (r->last_state == TASK_UNINTERRUPTIBLE)
+                       else if (r->last_state == 'D')
                                r->dt_iowait = dt_wait;
                        else
                                r->dt_sleep = dt_wait;
@@ -2590,7 +2553,7 @@ static int timehist_sched_change_event(struct perf_tool *tool,
        struct thread_runtime *tr = NULL;
        u64 tprev, t = sample->time;
        int rc = 0;
-       int state = evsel__intval(evsel, sample, "prev_state");
+       const char state = evsel__taskstate(evsel, sample, "prev_state");
 
        addr_location__init(&al);
        if (machine__resolve(machine, &al, sample) < 0) {
@@ -3204,14 +3167,44 @@ static void perf_sched__merge_lat(struct perf_sched *sched)
        }
 }
 
+static int setup_cpus_switch_event(struct perf_sched *sched)
+{
+       unsigned int i;
+
+       sched->cpu_last_switched = calloc(MAX_CPUS, sizeof(*(sched->cpu_last_switched)));
+       if (!sched->cpu_last_switched)
+               return -1;
+
+       sched->curr_pid = malloc(MAX_CPUS * sizeof(*(sched->curr_pid)));
+       if (!sched->curr_pid) {
+               zfree(&sched->cpu_last_switched);
+               return -1;
+       }
+
+       for (i = 0; i < MAX_CPUS; i++)
+               sched->curr_pid[i] = -1;
+
+       return 0;
+}
+
+static void free_cpus_switch_event(struct perf_sched *sched)
+{
+       zfree(&sched->curr_pid);
+       zfree(&sched->cpu_last_switched);
+}
+
 static int perf_sched__lat(struct perf_sched *sched)
 {
+       int rc = -1;
        struct rb_node *next;
 
        setup_pager();
 
+       if (setup_cpus_switch_event(sched))
+               return rc;
+
        if (perf_sched__read_events(sched))
-               return -1;
+               goto out_free_cpus_switch_event;
 
        perf_sched__merge_lat(sched);
        perf_sched__sort_lat(sched);
@@ -3240,13 +3233,15 @@ static int perf_sched__lat(struct perf_sched *sched)
        print_bad_events(sched);
        printf("\n");
 
-       return 0;
+       rc = 0;
+
+out_free_cpus_switch_event:
+       free_cpus_switch_event(sched);
+       return rc;
 }
 
 static int setup_map_cpus(struct perf_sched *sched)
 {
-       struct perf_cpu_map *map;
-
        sched->max_cpu.cpu  = sysconf(_SC_NPROCESSORS_CONF);
 
        if (sched->map.comp) {
@@ -3255,16 +3250,15 @@ static int setup_map_cpus(struct perf_sched *sched)
                        return -1;
        }
 
-       if (!sched->map.cpus_str)
-               return 0;
-
-       map = perf_cpu_map__new(sched->map.cpus_str);
-       if (!map) {
-               pr_err("failed to get cpus map from %s\n", sched->map.cpus_str);
-               return -1;
+       if (sched->map.cpus_str) {
+               sched->map.cpus = perf_cpu_map__new(sched->map.cpus_str);
+               if (!sched->map.cpus) {
+                       pr_err("failed to get cpus map from %s\n", sched->map.cpus_str);
+                       zfree(&sched->map.comp_cpus);
+                       return -1;
+               }
        }
 
-       sched->map.cpus = map;
        return 0;
 }
 
@@ -3304,33 +3298,69 @@ static int setup_color_cpus(struct perf_sched *sched)
 
 static int perf_sched__map(struct perf_sched *sched)
 {
+       int rc = -1;
+
+       sched->curr_thread = calloc(MAX_CPUS, sizeof(*(sched->curr_thread)));
+       if (!sched->curr_thread)
+               return rc;
+
+       if (setup_cpus_switch_event(sched))
+               goto out_free_curr_thread;
+
        if (setup_map_cpus(sched))
-               return -1;
+               goto out_free_cpus_switch_event;
 
        if (setup_color_pids(sched))
-               return -1;
+               goto out_put_map_cpus;
 
        if (setup_color_cpus(sched))
-               return -1;
+               goto out_put_color_pids;
 
        setup_pager();
        if (perf_sched__read_events(sched))
-               return -1;
+               goto out_put_color_cpus;
+
+       rc = 0;
        print_bad_events(sched);
-       return 0;
+
+out_put_color_cpus:
+       perf_cpu_map__put(sched->map.color_cpus);
+
+out_put_color_pids:
+       perf_thread_map__put(sched->map.color_pids);
+
+out_put_map_cpus:
+       zfree(&sched->map.comp_cpus);
+       perf_cpu_map__put(sched->map.cpus);
+
+out_free_cpus_switch_event:
+       free_cpus_switch_event(sched);
+
+out_free_curr_thread:
+       zfree(&sched->curr_thread);
+       return rc;
 }
 
 static int perf_sched__replay(struct perf_sched *sched)
 {
+       int ret;
        unsigned long i;
 
+       mutex_init(&sched->start_work_mutex);
+       mutex_init(&sched->work_done_wait_mutex);
+
+       ret = setup_cpus_switch_event(sched);
+       if (ret)
+               goto out_mutex_destroy;
+
        calibrate_run_measurement_overhead(sched);
        calibrate_sleep_measurement_overhead(sched);
 
        test_calibrations(sched);
 
-       if (perf_sched__read_events(sched))
-               return -1;
+       ret = perf_sched__read_events(sched);
+       if (ret)
+               goto out_free_cpus_switch_event;
 
        printf("nr_run_events:        %ld\n", sched->nr_run_events);
        printf("nr_sleep_events:      %ld\n", sched->nr_sleep_events);
@@ -3355,7 +3385,14 @@ static int perf_sched__replay(struct perf_sched *sched)
 
        sched->thread_funcs_exit = true;
        destroy_tasks(sched);
-       return 0;
+
+out_free_cpus_switch_event:
+       free_cpus_switch_event(sched);
+
+out_mutex_destroy:
+       mutex_destroy(&sched->start_work_mutex);
+       mutex_destroy(&sched->work_done_wait_mutex);
+       return ret;
 }
 
 static void setup_sorting(struct perf_sched *sched, const struct option *options,
@@ -3590,28 +3627,7 @@ int cmd_sched(int argc, const char **argv)
                .switch_event       = replay_switch_event,
                .fork_event         = replay_fork_event,
        };
-       unsigned int i;
-       int ret = 0;
-
-       mutex_init(&sched.start_work_mutex);
-       mutex_init(&sched.work_done_wait_mutex);
-       sched.curr_thread = calloc(MAX_CPUS, sizeof(*sched.curr_thread));
-       if (!sched.curr_thread) {
-               ret = -ENOMEM;
-               goto out;
-       }
-       sched.cpu_last_switched = calloc(MAX_CPUS, sizeof(*sched.cpu_last_switched));
-       if (!sched.cpu_last_switched) {
-               ret = -ENOMEM;
-               goto out;
-       }
-       sched.curr_pid = malloc(MAX_CPUS * sizeof(*sched.curr_pid));
-       if (!sched.curr_pid) {
-               ret = -ENOMEM;
-               goto out;
-       }
-       for (i = 0; i < MAX_CPUS; i++)
-               sched.curr_pid[i] = -1;
+       int ret;
 
        argc = parse_options_subcommand(argc, argv, sched_options, sched_subcommands,
                                        sched_usage, PARSE_OPT_STOP_AT_NON_OPTION);
@@ -3622,9 +3638,9 @@ int cmd_sched(int argc, const char **argv)
         * Aliased to 'perf script' for now:
         */
        if (!strcmp(argv[0], "script")) {
-               ret = cmd_script(argc, argv);
+               return cmd_script(argc, argv);
        } else if (strlen(argv[0]) > 2 && strstarts("record", argv[0])) {
-               ret = __cmd_record(argc, argv);
+               return __cmd_record(argc, argv);
        } else if (strlen(argv[0]) > 2 && strstarts("latency", argv[0])) {
                sched.tp_handler = &lat_ops;
                if (argc > 1) {
@@ -3633,7 +3649,7 @@ int cmd_sched(int argc, const char **argv)
                                usage_with_options(latency_usage, latency_options);
                }
                setup_sorting(&sched, latency_options, latency_usage);
-               ret = perf_sched__lat(&sched);
+               return perf_sched__lat(&sched);
        } else if (!strcmp(argv[0], "map")) {
                if (argc) {
                        argc = parse_options(argc, argv, map_options, map_usage, 0);
@@ -3642,7 +3658,7 @@ int cmd_sched(int argc, const char **argv)
                }
                sched.tp_handler = &map_ops;
                setup_sorting(&sched, latency_options, latency_usage);
-               ret = perf_sched__map(&sched);
+               return perf_sched__map(&sched);
        } else if (strlen(argv[0]) > 2 && strstarts("replay", argv[0])) {
                sched.tp_handler = &replay_ops;
                if (argc) {
@@ -3650,7 +3666,7 @@ int cmd_sched(int argc, const char **argv)
                        if (argc)
                                usage_with_options(replay_usage, replay_options);
                }
-               ret = perf_sched__replay(&sched);
+               return perf_sched__replay(&sched);
        } else if (!strcmp(argv[0], "timehist")) {
                if (argc) {
                        argc = parse_options(argc, argv, timehist_options,
@@ -3666,24 +3682,16 @@ int cmd_sched(int argc, const char **argv)
                                parse_options_usage(NULL, timehist_options, "w", true);
                        if (sched.show_next)
                                parse_options_usage(NULL, timehist_options, "n", true);
-                       ret = -EINVAL;
-                       goto out;
+                       return -EINVAL;
                }
                ret = symbol__validate_sym_arguments();
                if (ret)
-                       goto out;
+                       return ret;
 
-               ret = perf_sched__timehist(&sched);
+               return perf_sched__timehist(&sched);
        } else {
                usage_with_options(sched_usage, sched_options);
        }
 
-out:
-       free(sched.curr_pid);
-       free(sched.cpu_last_switched);
-       free(sched.curr_thread);
-       mutex_destroy(&sched.start_work_mutex);
-       mutex_destroy(&sched.work_done_wait_mutex);
-
-       return ret;
+       return 0;
 }
index b1f57401ff23940d4be38d4a02dbe08223b800fc..37088cc0ff1b5fb9f1d8b8d39b415af6562f7179 100644 (file)
@@ -34,6 +34,7 @@
 #include "util/event.h"
 #include "ui/ui.h"
 #include "print_binary.h"
+#include "print_insn.h"
 #include "archinsn.h"
 #include <linux/bitmap.h>
 #include <linux/kernel.h>
@@ -134,6 +135,7 @@ enum perf_output_field {
        PERF_OUTPUT_CGROUP          = 1ULL << 39,
        PERF_OUTPUT_RETIRE_LAT      = 1ULL << 40,
        PERF_OUTPUT_DSOFF           = 1ULL << 41,
+       PERF_OUTPUT_DISASM          = 1ULL << 42,
 };
 
 struct perf_script {
@@ -189,6 +191,7 @@ struct output_option {
        {.str = "bpf-output",   .field = PERF_OUTPUT_BPF_OUTPUT},
        {.str = "callindent", .field = PERF_OUTPUT_CALLINDENT},
        {.str = "insn", .field = PERF_OUTPUT_INSN},
+       {.str = "disasm", .field = PERF_OUTPUT_DISASM},
        {.str = "insnlen", .field = PERF_OUTPUT_INSNLEN},
        {.str = "brstackinsn", .field = PERF_OUTPUT_BRSTACKINSN},
        {.str = "brstackoff", .field = PERF_OUTPUT_BRSTACKOFF},
@@ -1162,7 +1165,8 @@ out:
 static int ip__fprintf_jump(uint64_t ip, struct branch_entry *en,
                            struct perf_insn *x, u8 *inbuf, int len,
                            int insn, FILE *fp, int *total_cycles,
-                           struct perf_event_attr *attr)
+                           struct perf_event_attr *attr,
+                           struct thread *thread)
 {
        int ilen = 0;
        int printed = fprintf(fp, "\t%016" PRIx64 "\t%-30s\t", ip,
@@ -1171,6 +1175,16 @@ static int ip__fprintf_jump(uint64_t ip, struct branch_entry *en,
        if (PRINT_FIELD(BRSTACKINSNLEN))
                printed += fprintf(fp, "ilen: %d\t", ilen);
 
+       if (PRINT_FIELD(SRCLINE)) {
+               struct addr_location al;
+
+               addr_location__init(&al);
+               thread__find_map(thread, x->cpumode, ip, &al);
+               printed += map__fprintf_srcline(al.map, al.addr, " srcline: ", fp);
+               printed += fprintf(fp, "\t");
+               addr_location__exit(&al);
+       }
+
        printed += fprintf(fp, "#%s%s%s%s",
                              en->flags.predicted ? " PRED" : "",
                              en->flags.mispred ? " MISPRED" : "",
@@ -1182,6 +1196,7 @@ static int ip__fprintf_jump(uint64_t ip, struct branch_entry *en,
                if (insn)
                        printed += fprintf(fp, " %.2f IPC", (float)insn / en->flags.cycles);
        }
+
        return printed + fprintf(fp, "\n");
 }
 
@@ -1260,7 +1275,7 @@ static int perf_sample__fprintf_brstackinsn(struct perf_sample *sample,
                                           x.cpumode, x.cpu, &lastsym, attr, fp);
                printed += ip__fprintf_jump(entries[nr - 1].from, &entries[nr - 1],
                                            &x, buffer, len, 0, fp, &total_cycles,
-                                           attr);
+                                           attr, thread);
                if (PRINT_FIELD(SRCCODE))
                        printed += print_srccode(thread, x.cpumode, entries[nr - 1].from);
        }
@@ -1291,7 +1306,7 @@ static int perf_sample__fprintf_brstackinsn(struct perf_sample *sample,
                        printed += ip__fprintf_sym(ip, thread, x.cpumode, x.cpu, &lastsym, attr, fp);
                        if (ip == end) {
                                printed += ip__fprintf_jump(ip, &entries[i], &x, buffer + off, len - off, ++insn, fp,
-                                                           &total_cycles, attr);
+                                                           &total_cycles, attr, thread);
                                if (PRINT_FIELD(SRCCODE))
                                        printed += print_srccode(thread, x.cpumode, ip);
                                break;
@@ -1511,11 +1526,12 @@ static int perf_sample__fprintf_insn(struct perf_sample *sample,
        if (PRINT_FIELD(INSNLEN))
                printed += fprintf(fp, " ilen: %d", sample->insn_len);
        if (PRINT_FIELD(INSN) && sample->insn_len) {
-               int i;
-
-               printed += fprintf(fp, " insn:");
-               for (i = 0; i < sample->insn_len; i++)
-                       printed += fprintf(fp, " %02x", (unsigned char)sample->insn[i]);
+               printed += fprintf(fp, " insn: ");
+               printed += sample__fprintf_insn_raw(sample, fp);
+       }
+       if (PRINT_FIELD(DISASM) && sample->insn_len) {
+               printed += fprintf(fp, "\t\t");
+               printed += sample__fprintf_insn_asm(sample, thread, machine, fp);
        }
        if (PRINT_FIELD(BRSTACKINSN) || PRINT_FIELD(BRSTACKINSNLEN))
                printed += perf_sample__fprintf_brstackinsn(sample, thread, attr, machine, fp);
@@ -3108,6 +3124,13 @@ parse:
                        rc = -EINVAL;
                        goto out;
                }
+#ifndef HAVE_LIBCAPSTONE_SUPPORT
+               if (change != REMOVE && strcmp(tok, "disasm") == 0) {
+                       fprintf(stderr, "Field \"disasm\" requires perf to be built with libcapstone support.\n");
+                       rc = -EINVAL;
+                       goto out;
+               }
+#endif
 
                if (type == -1) {
                        /* add user option to all events types for
@@ -3765,10 +3788,24 @@ static int perf_script__process_auxtrace_info(struct perf_session *session,
 #endif
 
 static int parse_insn_trace(const struct option *opt __maybe_unused,
-                           const char *str __maybe_unused,
-                           int unset __maybe_unused)
+                           const char *str, int unset __maybe_unused)
 {
-       parse_output_fields(NULL, "+insn,-event,-period", 0);
+       const char *fields = "+insn,-event,-period";
+       int ret;
+
+       if (str) {
+               if (strcmp(str, "disasm") == 0)
+                       fields = "+disasm,-event,-period";
+               else if (strlen(str) != 0 && strcmp(str, "raw") != 0) {
+                       fprintf(stderr, "Only accept raw|disasm\n");
+                       return -EINVAL;
+               }
+       }
+
+       ret = parse_output_fields(NULL, fields, 0);
+       if (ret < 0)
+               return ret;
+
        itrace_parse_synth_opts(opt, "i0ns", 0);
        symbol_conf.nanosecs = true;
        return 0;
@@ -3902,7 +3939,7 @@ int cmd_script(int argc, const char **argv)
                     "Fields: comm,tid,pid,time,cpu,event,trace,ip,sym,dso,dsoff,"
                     "addr,symoff,srcline,period,iregs,uregs,brstack,"
                     "brstacksym,flags,data_src,weight,bpf-output,brstackinsn,"
-                    "brstackinsnlen,brstackoff,callindent,insn,insnlen,synth,"
+                    "brstackinsnlen,brstackoff,callindent,insn,disasm,insnlen,synth,"
                     "phys_addr,metric,misc,srccode,ipc,tod,data_page_size,"
                     "code_page_size,ins_lat,machine_pid,vcpu,cgroup,retire_lat",
                     parse_output_fields),
@@ -3914,7 +3951,7 @@ int cmd_script(int argc, const char **argv)
                   "only consider these symbols"),
        OPT_INTEGER(0, "addr-range", &symbol_conf.addr_range,
                    "Use with -S to list traced records within address range"),
-       OPT_CALLBACK_OPTARG(0, "insn-trace", &itrace_synth_opts, NULL, NULL,
+       OPT_CALLBACK_OPTARG(0, "insn-trace", &itrace_synth_opts, NULL, "raw|disasm",
                        "Decode instructions from itrace", parse_insn_trace),
        OPT_CALLBACK_OPTARG(0, "xed", NULL, NULL, NULL,
                        "Run xed disassembler on output", parse_xed),
@@ -4366,6 +4403,9 @@ script_found:
 
        flush_scripting();
 
+       if (verbose > 2 || debug_kmaps)
+               perf_session__dump_kmaps(session);
+
 out_delete:
        if (script.ptime_range) {
                itrace_synth_opts__clear_time_range(&itrace_synth_opts);
index 5fe9abc6a52418f3b5612c8e5e38d4d052c31f98..6bba1a89d03015525b1f6232b223d1d9acb6d6ad 100644 (file)
@@ -1238,6 +1238,8 @@ static struct option stat_options[] = {
                     "aggregate counts per processor socket", AGGR_SOCKET),
        OPT_SET_UINT(0, "per-die", &stat_config.aggr_mode,
                     "aggregate counts per processor die", AGGR_DIE),
+       OPT_SET_UINT(0, "per-cluster", &stat_config.aggr_mode,
+                    "aggregate counts per processor cluster", AGGR_CLUSTER),
        OPT_CALLBACK_OPTARG(0, "per-cache", &stat_config.aggr_mode, &stat_config.aggr_level,
                            "cache level", "aggregate count at this cache level (Default: LLC)",
                            parse_cache_level),
@@ -1428,6 +1430,7 @@ static struct aggr_cpu_id aggr_cpu_id__cache(struct perf_cpu cpu, void *data)
 static const char *const aggr_mode__string[] = {
        [AGGR_CORE] = "core",
        [AGGR_CACHE] = "cache",
+       [AGGR_CLUSTER] = "cluster",
        [AGGR_DIE] = "die",
        [AGGR_GLOBAL] = "global",
        [AGGR_NODE] = "node",
@@ -1455,6 +1458,12 @@ static struct aggr_cpu_id perf_stat__get_cache_id(struct perf_stat_config *confi
        return aggr_cpu_id__cache(cpu, /*data=*/NULL);
 }
 
+static struct aggr_cpu_id perf_stat__get_cluster(struct perf_stat_config *config __maybe_unused,
+                                                struct perf_cpu cpu)
+{
+       return aggr_cpu_id__cluster(cpu, /*data=*/NULL);
+}
+
 static struct aggr_cpu_id perf_stat__get_core(struct perf_stat_config *config __maybe_unused,
                                              struct perf_cpu cpu)
 {
@@ -1507,6 +1516,12 @@ static struct aggr_cpu_id perf_stat__get_die_cached(struct perf_stat_config *con
        return perf_stat__get_aggr(config, perf_stat__get_die, cpu);
 }
 
+static struct aggr_cpu_id perf_stat__get_cluster_cached(struct perf_stat_config *config,
+                                                       struct perf_cpu cpu)
+{
+       return perf_stat__get_aggr(config, perf_stat__get_cluster, cpu);
+}
+
 static struct aggr_cpu_id perf_stat__get_cache_id_cached(struct perf_stat_config *config,
                                                         struct perf_cpu cpu)
 {
@@ -1544,6 +1559,8 @@ static aggr_cpu_id_get_t aggr_mode__get_aggr(enum aggr_mode aggr_mode)
                return aggr_cpu_id__socket;
        case AGGR_DIE:
                return aggr_cpu_id__die;
+       case AGGR_CLUSTER:
+               return aggr_cpu_id__cluster;
        case AGGR_CACHE:
                return aggr_cpu_id__cache;
        case AGGR_CORE:
@@ -1569,6 +1586,8 @@ static aggr_get_id_t aggr_mode__get_id(enum aggr_mode aggr_mode)
                return perf_stat__get_socket_cached;
        case AGGR_DIE:
                return perf_stat__get_die_cached;
+       case AGGR_CLUSTER:
+               return perf_stat__get_cluster_cached;
        case AGGR_CACHE:
                return perf_stat__get_cache_id_cached;
        case AGGR_CORE:
@@ -1737,6 +1756,21 @@ static struct aggr_cpu_id perf_env__get_cache_aggr_by_cpu(struct perf_cpu cpu,
        return id;
 }
 
+static struct aggr_cpu_id perf_env__get_cluster_aggr_by_cpu(struct perf_cpu cpu,
+                                                           void *data)
+{
+       struct perf_env *env = data;
+       struct aggr_cpu_id id = aggr_cpu_id__empty();
+
+       if (cpu.cpu != -1) {
+               id.socket = env->cpu[cpu.cpu].socket_id;
+               id.die = env->cpu[cpu.cpu].die_id;
+               id.cluster = env->cpu[cpu.cpu].cluster_id;
+       }
+
+       return id;
+}
+
 static struct aggr_cpu_id perf_env__get_core_aggr_by_cpu(struct perf_cpu cpu, void *data)
 {
        struct perf_env *env = data;
@@ -1744,12 +1778,12 @@ static struct aggr_cpu_id perf_env__get_core_aggr_by_cpu(struct perf_cpu cpu, vo
 
        if (cpu.cpu != -1) {
                /*
-                * core_id is relative to socket and die,
-                * we need a global id. So we set
-                * socket, die id and core id
+                * core_id is relative to socket, die and cluster, we need a
+                * global id. So we set socket, die id, cluster id and core id.
                 */
                id.socket = env->cpu[cpu.cpu].socket_id;
                id.die = env->cpu[cpu.cpu].die_id;
+               id.cluster = env->cpu[cpu.cpu].cluster_id;
                id.core = env->cpu[cpu.cpu].core_id;
        }
 
@@ -1805,6 +1839,12 @@ static struct aggr_cpu_id perf_stat__get_die_file(struct perf_stat_config *confi
        return perf_env__get_die_aggr_by_cpu(cpu, &perf_stat.session->header.env);
 }
 
+static struct aggr_cpu_id perf_stat__get_cluster_file(struct perf_stat_config *config __maybe_unused,
+                                                     struct perf_cpu cpu)
+{
+       return perf_env__get_cluster_aggr_by_cpu(cpu, &perf_stat.session->header.env);
+}
+
 static struct aggr_cpu_id perf_stat__get_cache_file(struct perf_stat_config *config __maybe_unused,
                                                    struct perf_cpu cpu)
 {
@@ -1842,6 +1882,8 @@ static aggr_cpu_id_get_t aggr_mode__get_aggr_file(enum aggr_mode aggr_mode)
                return perf_env__get_socket_aggr_by_cpu;
        case AGGR_DIE:
                return perf_env__get_die_aggr_by_cpu;
+       case AGGR_CLUSTER:
+               return perf_env__get_cluster_aggr_by_cpu;
        case AGGR_CACHE:
                return perf_env__get_cache_aggr_by_cpu;
        case AGGR_CORE:
@@ -1867,6 +1909,8 @@ static aggr_get_id_t aggr_mode__get_id_file(enum aggr_mode aggr_mode)
                return perf_stat__get_socket_file;
        case AGGR_DIE:
                return perf_stat__get_die_file;
+       case AGGR_CLUSTER:
+               return perf_stat__get_cluster_file;
        case AGGR_CACHE:
                return perf_stat__get_cache_file;
        case AGGR_CORE:
@@ -2398,6 +2442,8 @@ static int __cmd_report(int argc, const char **argv)
                     "aggregate counts per processor socket", AGGR_SOCKET),
        OPT_SET_UINT(0, "per-die", &perf_stat.aggr_mode,
                     "aggregate counts per processor die", AGGR_DIE),
+       OPT_SET_UINT(0, "per-cluster", &perf_stat.aggr_mode,
+                    "aggregate counts perf processor cluster", AGGR_CLUSTER),
        OPT_CALLBACK_OPTARG(0, "per-cache", &perf_stat.aggr_mode, &perf_stat.aggr_level,
                            "cache level",
                            "aggregate count at this cache level (Default: LLC)",
index 5301d1badd435906ddf152511e6935b73236c034..5ac6dcc64cef7539be794fde9334fc7f311aee64 100644 (file)
@@ -1573,7 +1573,7 @@ int cmd_top(int argc, const char **argv)
                    "add last branch records to call history"),
        OPT_BOOLEAN(0, "raw-trace", &symbol_conf.raw_trace,
                    "Show raw trace event output (do not use print fmt or plugins)"),
-       OPT_BOOLEAN(0, "hierarchy", &symbol_conf.report_hierarchy,
+       OPT_BOOLEAN('H', "hierarchy", &symbol_conf.report_hierarchy,
                    "Show entries in a hierarchy"),
        OPT_BOOLEAN(0, "overwrite", &top.record_opts.overwrite,
                    "Use a backward ring buffer, default: no"),
index 109b8e64fe69ae32fee0ee7b6937d260d32b6203..90eaff8c0f6e3aacef735a1aa0ad0107504fb89e 100644 (file)
@@ -74,6 +74,7 @@
 #include <linux/err.h>
 #include <linux/filter.h>
 #include <linux/kernel.h>
+#include <linux/list_sort.h>
 #include <linux/random.h>
 #include <linux/stringify.h>
 #include <linux/time64.h>
@@ -4312,34 +4313,38 @@ static unsigned long thread__nr_events(struct thread_trace *ttrace)
        return ttrace ? ttrace->nr_events : 0;
 }
 
-DEFINE_RESORT_RB(threads,
-               (thread__nr_events(thread__priv(a->thread)) <
-                thread__nr_events(thread__priv(b->thread))),
-       struct thread *thread;
-)
+static int trace_nr_events_cmp(void *priv __maybe_unused,
+                              const struct list_head *la,
+                              const struct list_head *lb)
 {
-       entry->thread = rb_entry(nd, struct thread_rb_node, rb_node)->thread;
+       struct thread_list *a = list_entry(la, struct thread_list, list);
+       struct thread_list *b = list_entry(lb, struct thread_list, list);
+       unsigned long a_nr_events = thread__nr_events(thread__priv(a->thread));
+       unsigned long b_nr_events = thread__nr_events(thread__priv(b->thread));
+
+       if (a_nr_events != b_nr_events)
+               return a_nr_events < b_nr_events ? -1 : 1;
+
+       /* Identical number of threads, place smaller tids first. */
+       return thread__tid(a->thread) < thread__tid(b->thread)
+               ? -1
+               : (thread__tid(a->thread) > thread__tid(b->thread) ? 1 : 0);
 }
 
 static size_t trace__fprintf_thread_summary(struct trace *trace, FILE *fp)
 {
        size_t printed = trace__fprintf_threads_header(fp);
-       struct rb_node *nd;
-       int i;
-
-       for (i = 0; i < THREADS__TABLE_SIZE; i++) {
-               DECLARE_RESORT_RB_MACHINE_THREADS(threads, trace->host, i);
+       LIST_HEAD(threads);
 
-               if (threads == NULL) {
-                       fprintf(fp, "%s", "Error sorting output by nr_events!\n");
-                       return 0;
-               }
+       if (machine__thread_list(trace->host, &threads) == 0) {
+               struct thread_list *pos;
 
-               resort_rb__for_each_entry(nd, threads)
-                       printed += trace__fprintf_thread(fp, threads_entry->thread, trace);
+               list_sort(NULL, &threads, trace_nr_events_cmp);
 
-               resort_rb__delete(threads);
+               list_for_each_entry(pos, &threads, list)
+                       printed += trace__fprintf_thread(fp, pos->thread, trace);
        }
+       thread_list__delete(&threads);
        return printed;
 }
 
index ac20c2b9bbc21b681aedbf5583b890952051d184..398aa53e9e2ef0e435093767ddbc93a60ed946d9 100644 (file)
@@ -73,6 +73,7 @@ static void library_status(void)
        STATUS(HAVE_LIBCRYPTO_SUPPORT, libcrypto);
        STATUS(HAVE_LIBUNWIND_SUPPORT, libunwind);
        STATUS(HAVE_DWARF_SUPPORT, libdw-dwarf-unwind);
+       STATUS(HAVE_LIBCAPSTONE_SUPPORT, libcapstone);
        STATUS(HAVE_ZLIB_SUPPORT, zlib);
        STATUS(HAVE_LZMA_SUPPORT, lzma);
        STATUS(HAVE_AUXTRACE_SUPPORT, get_cpuid);
@@ -82,6 +83,8 @@ static void library_status(void)
        STATUS(HAVE_LIBPFM, libpfm4);
        STATUS(HAVE_LIBTRACEEVENT, libtraceevent);
        STATUS(HAVE_BPF_SKEL, bpf_skeletons);
+       STATUS(HAVE_DWARF_UNWIND_SUPPORT, dwarf-unwind-support);
+       STATUS(HAVE_CSTRACE_SUPPORT, libopencsd);
 }
 
 int cmd_version(int argc, const char **argv)
index 599a588dbeb40070fec7e3dd2c721edcb73dbb09..4d5e9138d4cc27144e5316feb5f332ff4a3ddb5c 100644 (file)
@@ -15,3 +15,4 @@
 0x0066[[:xdigit:]]{4},1,power8,core
 0x004e[[:xdigit:]]{4},1,power9,core
 0x0080[[:xdigit:]]{4},1,power10,core
+0x0082[[:xdigit:]]{4},1,power10,core
index c2b10ec1c6e01e0a7aa568b8122ddb729c7f7026..02cce3a629cb9fe63bc0468b2afebc77eb1b1e45 100644 (file)
                "Unit": "CPU-M-CF",
                "EventCode": "145",
                "EventName": "DCW_REQ",
-               "BriefDescription": "Directory Write Level 1 Data Cache from Cache",
+               "BriefDescription": "Directory Write Level 1 Data Cache from L2-Cache",
                "PublicDescription": "A directory write to the Level-1 Data cache directory where the returned cache line was sourced from the requestors Level-2 cache."
        },
        {
                "Unit": "CPU-M-CF",
                "EventCode": "146",
                "EventName": "DCW_REQ_IV",
-               "BriefDescription": "Directory Write Level 1 Data Cache from Cache with Intervention",
+               "BriefDescription": "Directory Write Level 1 Data Cache from L2-Cache with Intervention",
                "PublicDescription": "A directory write to the Level-1 Data cache directory where the returned cache line was sourced from the requestors Level-2 cache with intervention."
        },
        {
                "Unit": "CPU-M-CF",
                "EventCode": "147",
                "EventName": "DCW_REQ_CHIP_HIT",
-               "BriefDescription": "Directory Write Level 1 Data Cache from Cache with Chip HP Hit",
+               "BriefDescription": "Directory Write Level 1 Data Cache from L2-Cache with Chip HP Hit",
                "PublicDescription": "A directory write to the Level-1 Data cache directory where the returned cache line was sourced from the requestors Level-2 cache after using chip level horizontal persistence, Chip-HP hit."
        },
        {
                "Unit": "CPU-M-CF",
                "EventCode": "148",
                "EventName": "DCW_REQ_DRAWER_HIT",
-               "BriefDescription": "Directory Write Level 1 Data Cache from Cache with Drawer HP Hit",
+               "BriefDescription": "Directory Write Level 1 Data Cache from L2-Cache with Drawer HP Hit",
                "PublicDescription": "A directory write to the Level-1 Data cache directory where the returned cache line was sourced from the requestors Level-2 cache after using drawer level horizontal persistence, Drawer-HP hit."
        },
        {
                "Unit": "CPU-M-CF",
                "EventCode": "149",
                "EventName": "DCW_ON_CHIP",
-               "BriefDescription": "Directory Write Level 1 Data Cache from On-Chip Cache",
+               "BriefDescription": "Directory Write Level 1 Data Cache from On-Chip L2-Cache",
                "PublicDescription": "A directory write to the Level-1 Data cache directory where the returned cache line was sourced from an On-Chip Level-2 cache."
        },
        {
                "Unit": "CPU-M-CF",
                "EventCode": "150",
                "EventName": "DCW_ON_CHIP_IV",
-               "BriefDescription": "Directory Write Level 1 Data Cache from On-Chip Cache with Intervention",
+               "BriefDescription": "Directory Write Level 1 Data Cache from On-Chip L2-Cache with Intervention",
                "PublicDescription": "A directory write to the Level-1 Data cache directory where the returned cache line was sourced from an On-Chip Level-2 cache with intervention."
        },
        {
                "Unit": "CPU-M-CF",
                "EventCode": "151",
                "EventName": "DCW_ON_CHIP_CHIP_HIT",
-               "BriefDescription": "Directory Write Level 1 Data Cache from On-Chip Cache with Chip HP Hit",
+               "BriefDescription": "Directory Write Level 1 Data Cache from On-Chip L2-Cache with Chip HP Hit",
                "PublicDescription": "A directory write to the Level-1 Data cache directory where the returned cache line was sourced from an On-Chip Level-2 cache after using chip level horizontal persistence, Chip-HP hit."
        },
        {
                "Unit": "CPU-M-CF",
                "EventCode": "152",
                "EventName": "DCW_ON_CHIP_DRAWER_HIT",
-               "BriefDescription": "Directory Write Level 1 Data Cache from On-Chip Cache with Drawer HP Hit",
+               "BriefDescription": "Directory Write Level 1 Data Cache from On-Chip L2-Cache with Drawer HP Hit",
                "PublicDescription": "A directory write to the Level-1 Data cache directory where the returned cache line was sourced from an On-Chip Level-2 cache using drawer level horizontal persistence, Drawer-HP hit."
        },
        {
                "Unit": "CPU-M-CF",
                "EventCode": "153",
                "EventName": "DCW_ON_MODULE",
-               "BriefDescription": "Directory Write Level 1 Data Cache from On-Module Cache",
+               "BriefDescription": "Directory Write Level 1 Data Cache from On-Module L2-Cache",
                "PublicDescription": "A directory write to the Level-1 Data cache directory where the returned cache line was sourced from an On-Module Level-2 cache."
        },
        {
                "Unit": "CPU-M-CF",
                "EventCode": "154",
                "EventName": "DCW_ON_DRAWER",
-               "BriefDescription": "Directory Write Level 1 Data Cache from On-Drawer Cache",
+               "BriefDescription": "Directory Write Level 1 Data Cache from On-Drawer L2-Cache",
                "PublicDescription": "A directory write to the Level-1 Data cache directory where the returned cache line was sourced from an On-Drawer Level-2 cache."
        },
        {
                "Unit": "CPU-M-CF",
                "EventCode": "155",
                "EventName": "DCW_OFF_DRAWER",
-               "BriefDescription": "Directory Write Level 1 Data Cache from Off-Drawer Cache",
+               "BriefDescription": "Directory Write Level 1 Data Cache from Off-Drawer L2-Cache",
                "PublicDescription": "A directory write to the Level-1 Data cache directory where the returned cache line was sourced from an Off-Drawer Level-2 cache."
        },
        {
                "Unit": "CPU-M-CF",
                "EventCode": "160",
                "EventName": "IDCW_ON_MODULE_IV",
-               "BriefDescription": "Directory Write Level 1 Instruction and Data Cache from On-Module Memory Cache with Intervention",
+               "BriefDescription": "Directory Write Level 1 Instruction and Data Cache from On-Module Memory L2-Cache with Intervention",
                "PublicDescription": "A directory write to the Level-1 Data or Level-1 Instruction cache directory where the returned cache line was sourced from an On-Module Level-2 cache with intervention."
        },
        {
                "Unit": "CPU-M-CF",
                "EventCode": "161",
                "EventName": "IDCW_ON_MODULE_CHIP_HIT",
-               "BriefDescription": "Directory Write Level 1 Instruction and Data Cache from On-Module Memory Cache with Chip Hit",
+               "BriefDescription": "Directory Write Level 1 Instruction and Data Cache from On-Module Memory L2-Cache with Chip Hit",
                "PublicDescription": "A directory write to the Level-1 Data or Level-1 Instruction cache directory where the returned cache line was sourced from an On-Module Level-2 cache using chip horizontal persistence, Chip-HP hit."
        },
        {
                "Unit": "CPU-M-CF",
                "EventCode": "162",
                "EventName": "IDCW_ON_MODULE_DRAWER_HIT",
-               "BriefDescription": "Directory Write Level 1 Instruction and Data Cache from On-Module Memory Cache with Drawer Hit",
+               "BriefDescription": "Directory Write Level 1 Instruction and Data Cache from On-Module Memory L2-Cache with Drawer Hit",
                "PublicDescription": "A directory write to the Level-1 Data or Level-1 Instruction cache directory where the returned cache line was sourced from an On-Module Level-2 cache using drawer level horizontal persistence, Drawer-HP hit."
        },
        {
                "Unit": "CPU-M-CF",
                "EventCode": "163",
                "EventName": "IDCW_ON_DRAWER_IV",
-               "BriefDescription": "Directory Write Level 1 Instruction and Data Cache from On-Drawer Cache with Intervention",
+               "BriefDescription": "Directory Write Level 1 Instruction and Data Cache from On-Drawer L2-Cache with Intervention",
                "PublicDescription": "A directory write to the Level-1 Data or Level-1 Instruction cache directory where the returned cache line was sourced from an On-Drawer Level-2 cache with intervention."
        },
        {
                "Unit": "CPU-M-CF",
                "EventCode": "164",
                "EventName": "IDCW_ON_DRAWER_CHIP_HIT",
-               "BriefDescription": "Directory Write Level 1 Instruction and Data Cache from On-Drawer Cache with Chip Hit",
+               "BriefDescription": "Directory Write Level 1 Instruction and Data Cache from On-Drawer L2-Cache with Chip Hit",
                "PublicDescription": "A directory write to the Level-1 Data or Level-1 instruction cache directory where the returned cache line was sourced from an On-Drawer Level-2 cache using chip level horizontal persistence, Chip-HP hit."
        },
        {
                "Unit": "CPU-M-CF",
                "EventCode": "165",
                "EventName": "IDCW_ON_DRAWER_DRAWER_HIT",
-               "BriefDescription": "Directory Write Level 1 Instruction and Data Cache from On-Drawer Cache with Drawer Hit",
+               "BriefDescription": "Directory Write Level 1 Instruction and Data Cache from On-Drawer L2-Cache with Drawer Hit",
                "PublicDescription": "A directory write to the Level-1 Data or Level-1 instruction cache directory where the returned cache line was sourced from an On-Drawer Level-2 cache using drawer level horizontal persistence, Drawer-HP hit."
        },
        {
                "Unit": "CPU-M-CF",
                "EventCode": "166",
                "EventName": "IDCW_OFF_DRAWER_IV",
-               "BriefDescription": "Directory Write Level 1 Instruction and Data Cache from Off-Drawer Cache with Intervention",
+               "BriefDescription": "Directory Write Level 1 Instruction and Data Cache from Off-Drawer L2-Cache with Intervention",
                "PublicDescription": "A directory write to the Level-1 Data or Level-1 instruction cache directory where the returned cache line was sourced from an Off-Drawer Level-2 cache with intervention."
        },
        {
                "Unit": "CPU-M-CF",
                "EventCode": "167",
                "EventName": "IDCW_OFF_DRAWER_CHIP_HIT",
-               "BriefDescription": "Directory Write Level 1 Instruction and Data Cache from Off-Drawer Cache with Chip Hit",
+               "BriefDescription": "Directory Write Level 1 Instruction and Data Cache from Off-Drawer L2-Cache with Chip Hit",
                "PublicDescription": "A directory write to the Level-1 Data or Level-1 instruction cache directory where the returned cache line was sourced from an Off-Drawer Level-2 cache using chip level horizontal persistence, Chip-HP hit."
        },
        {
                "Unit": "CPU-M-CF",
                "EventCode": "168",
                "EventName": "IDCW_OFF_DRAWER_DRAWER_HIT",
-               "BriefDescription": "Directory Write Level 1 Instruction and Data Cache from Off-Drawer Cache with Drawer Hit",
+               "BriefDescription": "Directory Write Level 1 Instruction and Data Cache from Off-Drawer L2-Cache with Drawer Hit",
                "PublicDescription": "A directory write to the Level-1 Data or Level-1 Instruction cache directory where the returned cache line was sourced from an Off-Drawer Level-2 cache using drawer level horizontal persistence, Drawer-HP hit."
        },
        {
                "Unit": "CPU-M-CF",
                "EventCode": "169",
                "EventName": "ICW_REQ",
-               "BriefDescription": "Directory Write Level 1 Instruction Cache from Cache",
+               "BriefDescription": "Directory Write Level 1 Instruction Cache from L2-Cache",
                "PublicDescription": "A directory write to the Level-1 Instruction cache directory where the returned cache line was sourced the requestors Level-2 cache."
        },
        {
                "Unit": "CPU-M-CF",
                "EventCode": "170",
                "EventName": "ICW_REQ_IV",
-               "BriefDescription": "Directory Write Level 1 Instruction Cache from Cache with Intervention",
+               "BriefDescription": "Directory Write Level 1 Instruction Cache from L2-Cache with Intervention",
                "PublicDescription": "A directory write to the Level-1 Instruction cache directory where the returned cache line was sourced from the requestors Level-2 cache with intervention."
        },
        {
                "Unit": "CPU-M-CF",
                "EventCode": "171",
                "EventName": "ICW_REQ_CHIP_HIT",
-               "BriefDescription": "Directory Write Level 1 Instruction Cache from Cache with Chip HP Hit",
+               "BriefDescription": "Directory Write Level 1 Instruction Cache from L2-Cache with Chip HP Hit",
                "PublicDescription": "A directory write to the Level-1 Instruction cache directory where the returned cache line was sourced from the requestors Level-2 cache using chip level horizontal persistence, Chip-HP hit."
        },
        {
                "Unit": "CPU-M-CF",
                "EventCode": "172",
                "EventName": "ICW_REQ_DRAWER_HIT",
-               "BriefDescription": "Directory Write Level 1 Instruction Cache from Cache with Drawer HP Hit",
+               "BriefDescription": "Directory Write Level 1 Instruction Cache from L2-Cache with Drawer HP Hit",
                "PublicDescription": "A directory write to the Level-1 Instruction cache directory where the returned cache line was sourced from the requestors Level-2 cache using drawer level horizontal persistence, Drawer-HP hit."
        },
        {
                "Unit": "CPU-M-CF",
                "EventCode": "173",
                "EventName": "ICW_ON_CHIP",
-               "BriefDescription": "Directory Write Level 1 Instruction Cache from On-Chip Cache",
+               "BriefDescription": "Directory Write Level 1 Instruction Cache from On-Chip L2-Cache",
                "PublicDescription": "A directory write to the Level-1 Instruction cache directory where the returned cache line was sourced from an On-Chip Level-2 cache."
        },
        {
                "Unit": "CPU-M-CF",
                "EventCode": "174",
                "EventName": "ICW_ON_CHIP_IV",
-               "BriefDescription": "Directory Write Level 1 Instruction Cache from On-Chip Cache with Intervention",
+               "BriefDescription": "Directory Write Level 1 Instruction Cache from On-Chip L2-Cache with Intervention",
                "PublicDescription": "A directory write to the Level-1 Instruction cache directory where the returned cache line was sourced an On-Chip Level-2 cache with intervention."
        },
        {
                "Unit": "CPU-M-CF",
                "EventCode": "175",
                "EventName": "ICW_ON_CHIP_CHIP_HIT",
-               "BriefDescription": "Directory Write Level 1 Instruction Cache from On-Chip Cache with Chip HP Hit",
+               "BriefDescription": "Directory Write Level 1 Instruction Cache from On-Chip L2-Cache with Chip HP Hit",
                "PublicDescription": "A directory write to the Level-1 Instruction cache directory where the returned cache line was sourced from an On-Chip Level-2 cache using chip level horizontal persistence, Chip-HP hit."
        },
        {
                "Unit": "CPU-M-CF",
                "EventCode": "176",
                "EventName": "ICW_ON_CHIP_DRAWER_HIT",
-               "BriefDescription": "Directory Write Level 1 Instruction Cache from On-Chip Cache with Drawer HP Hit",
+               "BriefDescription": "Directory Write Level 1 Instruction Cache from On-Chip L2-Cache with Drawer HP Hit",
                "PublicDescription": "A directory write to the Level-1 Instruction cache directory where the returned cache line was sourced from an On-Chip level 2 cache using drawer level horizontal persistence, Drawer-HP hit."
        },
        {
                "Unit": "CPU-M-CF",
                "EventCode": "177",
                "EventName": "ICW_ON_MODULE",
-               "BriefDescription": "Directory Write Level 1 Instruction Cache from On-Module Cache",
+               "BriefDescription": "Directory Write Level 1 Instruction Cache from On-Module L2-Cache",
                "PublicDescription": "A directory write to the Level-1 Instruction cache directory where the returned cache line was sourced from an On-Module Level-2 cache."
        },
        {
                "Unit": "CPU-M-CF",
                "EventCode": "178",
                "EventName": "ICW_ON_DRAWER",
-               "BriefDescription": "Directory Write Level 1 Instruction Cache from On-Drawer Cache",
+               "BriefDescription": "Directory Write Level 1 Instruction Cache from On-Drawer L2-Cache",
                "PublicDescription": "A directory write to the Level-1 Instruction cache directory where the returned cache line was sourced an On-Drawer Level-2 cache."
        },
        {
                "Unit": "CPU-M-CF",
                "EventCode": "179",
                "EventName": "ICW_OFF_DRAWER",
-               "BriefDescription": "Directory Write Level 1 Instruction Cache from Off-Drawer Cache",
+               "BriefDescription": "Directory Write Level 1 Instruction Cache from Off-Drawer L2-Cache",
                "PublicDescription": "A directory write to the Level-1 Instruction cache directory where the returned cache line was sourced an Off-Drawer Level-2 cache."
        },
        {
index bbfa3883e53384f563427e1b7567e679e1d1465f..b72c0e2cb94621273efb48a83fbbc697ac226955 100644 (file)
         "MetricExpr": "(cpu_core@UOPS_DISPATCHED.PORT_0@ + cpu_core@UOPS_DISPATCHED.PORT_1@ + cpu_core@UOPS_DISPATCHED.PORT_5_11@ + cpu_core@UOPS_DISPATCHED.PORT_6@) / (5 * tma_info_core_core_clks)",
         "MetricGroup": "TopdownL5;tma_L5_group;tma_ports_utilized_3m_group",
         "MetricName": "tma_alu_op_utilization",
-        "MetricThreshold": "tma_alu_op_utilization > 0.6",
+        "MetricThreshold": "tma_alu_op_utilization > 0.4",
         "ScaleUnit": "100%",
         "Unit": "cpu_core"
     },
     {
         "BriefDescription": "This metric estimates fraction of slots the CPU retired uops delivered by the Microcode_Sequencer as a result of Assists",
-        "MetricExpr": "100 * cpu_core@ASSISTS.ANY\\,umask\\=0x1B@ / tma_info_thread_slots",
+        "MetricExpr": "78 * cpu_core@ASSISTS.ANY@ / tma_info_thread_slots",
         "MetricGroup": "TopdownL4;tma_L4_group;tma_microcode_sequencer_group",
         "MetricName": "tma_assists",
         "MetricThreshold": "tma_assists > 0.1 & (tma_microcode_sequencer > 0.05 & tma_heavy_operations > 0.1)",
         "ScaleUnit": "100%",
         "Unit": "cpu_core"
     },
+    {
+        "BriefDescription": "This metric represents fraction of cycles the CPU was stalled due staying in C0.1 power-performance optimized state (Faster wakeup time; Smaller power savings).",
+        "MetricExpr": "cpu_core@CPU_CLK_UNHALTED.C01@ / tma_info_thread_clks",
+        "MetricGroup": "C0Wait;TopdownL4;tma_L4_group;tma_serializing_operation_group",
+        "MetricName": "tma_c01_wait",
+        "MetricThreshold": "tma_c01_wait > 0.05 & (tma_serializing_operation > 0.1 & (tma_core_bound > 0.1 & tma_backend_bound > 0.2))",
+        "ScaleUnit": "100%",
+        "Unit": "cpu_core"
+    },
+    {
+        "BriefDescription": "This metric represents fraction of cycles the CPU was stalled due staying in C0.2 power-performance optimized state (Slower wakeup time; Larger power savings).",
+        "MetricExpr": "cpu_core@CPU_CLK_UNHALTED.C02@ / tma_info_thread_clks",
+        "MetricGroup": "C0Wait;TopdownL4;tma_L4_group;tma_serializing_operation_group",
+        "MetricName": "tma_c02_wait",
+        "MetricThreshold": "tma_c02_wait > 0.05 & (tma_serializing_operation > 0.1 & (tma_core_bound > 0.1 & tma_backend_bound > 0.2))",
+        "ScaleUnit": "100%",
+        "Unit": "cpu_core"
+    },
     {
         "BriefDescription": "This metric estimates fraction of cycles the CPU retired uops originated from CISC (complex instruction set computer) instruction",
         "MetricExpr": "max(0, tma_microcode_sequencer - tma_assists)",
     },
     {
         "BriefDescription": "This metric estimates fraction of cycles while the memory subsystem was handling synchronizations due to contested accesses",
-        "MetricExpr": "(25 * tma_info_system_average_frequency * (cpu_core@MEM_LOAD_L3_HIT_RETIRED.XSNP_FWD@ * (cpu_core@OCR.DEMAND_DATA_RD.L3_HIT.SNOOP_HITM@ / (cpu_core@OCR.DEMAND_DATA_RD.L3_HIT.SNOOP_HITM@ + cpu_core@OCR.DEMAND_DATA_RD.L3_HIT.SNOOP_HIT_WITH_FWD@))) + 24 * tma_info_system_average_frequency * cpu_core@MEM_LOAD_L3_HIT_RETIRED.XSNP_MISS@) * (1 + cpu_core@MEM_LOAD_RETIRED.FB_HIT@ / cpu_core@MEM_LOAD_RETIRED.L1_MISS@ / 2) / tma_info_thread_clks",
+        "MetricExpr": "(25 * tma_info_system_core_frequency * (cpu_core@MEM_LOAD_L3_HIT_RETIRED.XSNP_FWD@ * (cpu_core@OCR.DEMAND_DATA_RD.L3_HIT.SNOOP_HITM@ / (cpu_core@OCR.DEMAND_DATA_RD.L3_HIT.SNOOP_HITM@ + cpu_core@OCR.DEMAND_DATA_RD.L3_HIT.SNOOP_HIT_WITH_FWD@))) + 24 * tma_info_system_core_frequency * cpu_core@MEM_LOAD_L3_HIT_RETIRED.XSNP_MISS@) * (1 + cpu_core@MEM_LOAD_RETIRED.FB_HIT@ / cpu_core@MEM_LOAD_RETIRED.L1_MISS@ / 2) / tma_info_thread_clks",
         "MetricGroup": "DataSharing;Offcore;Snoop;TopdownL4;tma_L4_group;tma_issueSyncxn;tma_l3_bound_group",
         "MetricName": "tma_contested_accesses",
         "MetricThreshold": "tma_contested_accesses > 0.05 & (tma_l3_bound > 0.05 & (tma_memory_bound > 0.2 & tma_backend_bound > 0.2))",
     },
     {
         "BriefDescription": "This metric estimates fraction of cycles while the memory subsystem was handling synchronizations due to data-sharing accesses",
-        "MetricExpr": "24 * tma_info_system_average_frequency * (cpu_core@MEM_LOAD_L3_HIT_RETIRED.XSNP_NO_FWD@ + cpu_core@MEM_LOAD_L3_HIT_RETIRED.XSNP_FWD@ * (1 - cpu_core@OCR.DEMAND_DATA_RD.L3_HIT.SNOOP_HITM@ / (cpu_core@OCR.DEMAND_DATA_RD.L3_HIT.SNOOP_HITM@ + cpu_core@OCR.DEMAND_DATA_RD.L3_HIT.SNOOP_HIT_WITH_FWD@))) * (1 + cpu_core@MEM_LOAD_RETIRED.FB_HIT@ / cpu_core@MEM_LOAD_RETIRED.L1_MISS@ / 2) / tma_info_thread_clks",
+        "MetricExpr": "24 * tma_info_system_core_frequency * (cpu_core@MEM_LOAD_L3_HIT_RETIRED.XSNP_NO_FWD@ + cpu_core@MEM_LOAD_L3_HIT_RETIRED.XSNP_FWD@ * (1 - cpu_core@OCR.DEMAND_DATA_RD.L3_HIT.SNOOP_HITM@ / (cpu_core@OCR.DEMAND_DATA_RD.L3_HIT.SNOOP_HITM@ + cpu_core@OCR.DEMAND_DATA_RD.L3_HIT.SNOOP_HIT_WITH_FWD@))) * (1 + cpu_core@MEM_LOAD_RETIRED.FB_HIT@ / cpu_core@MEM_LOAD_RETIRED.L1_MISS@ / 2) / tma_info_thread_clks",
         "MetricGroup": "Offcore;Snoop;TopdownL4;tma_L4_group;tma_issueSyncxn;tma_l3_bound_group",
         "MetricName": "tma_data_sharing",
         "MetricThreshold": "tma_data_sharing > 0.05 & (tma_l3_bound > 0.05 & (tma_memory_bound > 0.2 & tma_backend_bound > 0.2))",
         "MetricExpr": "(cpu_core@INST_DECODED.DECODERS\\,cmask\\=1@ - cpu_core@INST_DECODED.DECODERS\\,cmask\\=2@) / tma_info_core_core_clks / 2",
         "MetricGroup": "DSBmiss;FetchBW;TopdownL4;tma_L4_group;tma_issueD0;tma_mite_group",
         "MetricName": "tma_decoder0_alone",
-        "MetricThreshold": "tma_decoder0_alone > 0.1 & (tma_mite > 0.1 & (tma_fetch_bandwidth > 0.1 & tma_frontend_bound > 0.15 & tma_info_thread_ipc / 6 > 0.35))",
+        "MetricThreshold": "tma_decoder0_alone > 0.1 & (tma_mite > 0.1 & tma_fetch_bandwidth > 0.2)",
         "PublicDescription": "This metric represents fraction of cycles where decoder-0 was the only active decoder. Related metrics: tma_few_uops_instructions",
         "ScaleUnit": "100%",
         "Unit": "cpu_core"
         "MetricExpr": "(cpu_core@IDQ.DSB_CYCLES_ANY@ - cpu_core@IDQ.DSB_CYCLES_OK@) / tma_info_core_core_clks / 2",
         "MetricGroup": "DSB;FetchBW;TopdownL3;tma_L3_group;tma_fetch_bandwidth_group",
         "MetricName": "tma_dsb",
-        "MetricThreshold": "tma_dsb > 0.15 & (tma_fetch_bandwidth > 0.1 & tma_frontend_bound > 0.15 & tma_info_thread_ipc / 6 > 0.35)",
+        "MetricThreshold": "tma_dsb > 0.15 & tma_fetch_bandwidth > 0.2",
         "PublicDescription": "This metric represents Core fraction of cycles in which CPU was likely limited due to DSB (decoded uop cache) fetch pipeline.  For example; inefficient utilization of the DSB cache structure or bank conflict when reading from it; are categorized here.",
         "ScaleUnit": "100%",
         "Unit": "cpu_core"
         "MetricGroup": "MemoryTLB;TopdownL4;tma_L4_group;tma_issueTLB;tma_l1_bound_group",
         "MetricName": "tma_dtlb_load",
         "MetricThreshold": "tma_dtlb_load > 0.1 & (tma_l1_bound > 0.1 & (tma_memory_bound > 0.2 & tma_backend_bound > 0.2))",
-        "PublicDescription": "This metric roughly estimates the fraction of cycles where the Data TLB (DTLB) was missed by load accesses. TLBs (Translation Look-aside Buffers) are processor caches for recently used entries out of the Page Tables that are used to map virtual- to physical-addresses by the operating system. This metric approximates the potential delay of demand loads missing the first-level data TLB (assuming worst case scenario with back to back misses to different pages). This includes hitting in the second-level TLB (STLB) as well as performing a hardware page walk on an STLB miss. Sample with: MEM_INST_RETIRED.STLB_MISS_LOADS_PS. Related metrics: tma_dtlb_store, tma_info_bottleneck_memory_data_tlbs",
+        "PublicDescription": "This metric roughly estimates the fraction of cycles where the Data TLB (DTLB) was missed by load accesses. TLBs (Translation Look-aside Buffers) are processor caches for recently used entries out of the Page Tables that are used to map virtual- to physical-addresses by the operating system. This metric approximates the potential delay of demand loads missing the first-level data TLB (assuming worst case scenario with back to back misses to different pages). This includes hitting in the second-level TLB (STLB) as well as performing a hardware page walk on an STLB miss. Sample with: MEM_INST_RETIRED.STLB_MISS_LOADS_PS. Related metrics: tma_dtlb_store, tma_info_bottleneck_memory_data_tlbs, tma_info_bottleneck_memory_synchronization",
         "ScaleUnit": "100%",
         "Unit": "cpu_core"
     },
         "MetricGroup": "MemoryTLB;TopdownL4;tma_L4_group;tma_issueTLB;tma_store_bound_group",
         "MetricName": "tma_dtlb_store",
         "MetricThreshold": "tma_dtlb_store > 0.05 & (tma_store_bound > 0.2 & (tma_memory_bound > 0.2 & tma_backend_bound > 0.2))",
-        "PublicDescription": "This metric roughly estimates the fraction of cycles spent handling first-level data TLB store misses.  As with ordinary data caching; focus on improving data locality and reducing working-set size to reduce DTLB overhead.  Additionally; consider using profile-guided optimization (PGO) to collocate frequently-used data on the same page.  Try using larger page sizes for large amounts of frequently-used data. Sample with: MEM_INST_RETIRED.STLB_MISS_STORES_PS. Related metrics: tma_dtlb_load, tma_info_bottleneck_memory_data_tlbs",
+        "PublicDescription": "This metric roughly estimates the fraction of cycles spent handling first-level data TLB store misses.  As with ordinary data caching; focus on improving data locality and reducing working-set size to reduce DTLB overhead.  Additionally; consider using profile-guided optimization (PGO) to collocate frequently-used data on the same page.  Try using larger page sizes for large amounts of frequently-used data. Sample with: MEM_INST_RETIRED.STLB_MISS_STORES_PS. Related metrics: tma_dtlb_load, tma_info_bottleneck_memory_data_tlbs, tma_info_bottleneck_memory_synchronization",
         "ScaleUnit": "100%",
         "Unit": "cpu_core"
     },
     {
         "BriefDescription": "This metric roughly estimates how often CPU was handling synchronizations due to False Sharing",
-        "MetricExpr": "28 * tma_info_system_average_frequency * cpu_core@OCR.DEMAND_RFO.L3_HIT.SNOOP_HITM@ / tma_info_thread_clks",
+        "MetricExpr": "28 * tma_info_system_core_frequency * cpu_core@OCR.DEMAND_RFO.L3_HIT.SNOOP_HITM@ / tma_info_thread_clks",
         "MetricGroup": "DataSharing;Offcore;Snoop;TopdownL4;tma_L4_group;tma_issueSyncxn;tma_store_bound_group",
         "MetricName": "tma_false_sharing",
         "MetricThreshold": "tma_false_sharing > 0.05 & (tma_store_bound > 0.2 & (tma_memory_bound > 0.2 & tma_backend_bound > 0.2))",
         "MetricGroup": "MemoryBW;TopdownL4;tma_L4_group;tma_issueBW;tma_issueSL;tma_issueSmSt;tma_l1_bound_group",
         "MetricName": "tma_fb_full",
         "MetricThreshold": "tma_fb_full > 0.3",
-        "PublicDescription": "This metric does a *rough estimation* of how often L1D Fill Buffer unavailability limited additional L1D miss memory access requests to proceed. The higher the metric value; the deeper the memory hierarchy level the misses are satisfied from (metric values >1 are valid). Often it hints on approaching bandwidth limits (to L2 cache; L3 cache or external memory). Related metrics: tma_info_bottleneck_memory_bandwidth, tma_info_system_dram_bw_use, tma_mem_bandwidth, tma_sq_full, tma_store_latency, tma_streaming_stores",
+        "PublicDescription": "This metric does a *rough estimation* of how often L1D Fill Buffer unavailability limited additional L1D miss memory access requests to proceed. The higher the metric value; the deeper the memory hierarchy level the misses are satisfied from (metric values >1 are valid). Often it hints on approaching bandwidth limits (to L2 cache; L3 cache or external memory). Related metrics: tma_info_bottleneck_cache_memory_bandwidth, tma_info_system_dram_bw_use, tma_mem_bandwidth, tma_sq_full, tma_store_latency, tma_streaming_stores",
         "ScaleUnit": "100%",
         "Unit": "cpu_core"
     },
         "MetricExpr": "max(0, tma_frontend_bound - tma_fetch_latency)",
         "MetricGroup": "FetchBW;Frontend;TmaL2;TopdownL2;tma_L2_group;tma_frontend_bound_group;tma_issueFB",
         "MetricName": "tma_fetch_bandwidth",
-        "MetricThreshold": "tma_fetch_bandwidth > 0.1 & tma_frontend_bound > 0.15 & tma_info_thread_ipc / 6 > 0.35",
+        "MetricThreshold": "tma_fetch_bandwidth > 0.2",
         "MetricgroupNoGroup": "TopdownL2",
         "PublicDescription": "This metric represents fraction of slots the CPU was stalled due to Frontend bandwidth issues.  For example; inefficiencies at the instruction decoders; or restrictions for caching in the DSB (decoded uops cache) are categorized under Fetch Bandwidth. In such cases; the Frontend typically delivers suboptimal amount of uops to the Backend. Sample with: FRONTEND_RETIRED.LATENCY_GE_2_BUBBLES_GE_1_PS;FRONTEND_RETIRED.LATENCY_GE_1_PS;FRONTEND_RETIRED.LATENCY_GE_2_PS. Related metrics: tma_dsb_switches, tma_info_botlnk_l2_dsb_misses, tma_info_frontend_dsb_coverage, tma_info_inst_mix_iptb, tma_lcp",
         "ScaleUnit": "100%",
     {
         "BriefDescription": "This metric represents fraction of slots where the CPU was retiring fused instructions -- where one uop can represent multiple contiguous instructions",
         "MetricExpr": "tma_light_operations * cpu_core@INST_RETIRED.MACRO_FUSED@ / (tma_retiring * tma_info_thread_slots)",
-        "MetricGroup": "Pipeline;TopdownL3;tma_L3_group;tma_light_operations_group",
+        "MetricGroup": "Branches;Pipeline;TopdownL3;tma_L3_group;tma_light_operations_group",
         "MetricName": "tma_fused_instructions",
         "MetricThreshold": "tma_fused_instructions > 0.1 & tma_light_operations > 0.6",
-        "PublicDescription": "This metric represents fraction of slots where the CPU was retiring fused instructions -- where one uop can represent multiple contiguous instructions. The instruction pairs of CMP+JCC or DEC+JCC are commonly used examples.",
+        "PublicDescription": "This metric represents fraction of slots where the CPU was retiring fused instructions -- where one uop can represent multiple contiguous instructions. CMP+JCC or DEC+JCC are common examples of legacy fusions. {([MTL] Note new MOV+OP and Load+OP fusions appear under Other_Light_Ops in MTL!)}",
         "ScaleUnit": "100%",
         "Unit": "cpu_core"
     },
         "MetricName": "tma_heavy_operations",
         "MetricThreshold": "tma_heavy_operations > 0.1",
         "MetricgroupNoGroup": "TopdownL2",
-        "PublicDescription": "This metric represents fraction of slots where the CPU was retiring heavy-weight operations -- instructions that require two or more uops or micro-coded sequences. This highly-correlates with the uop length of these instructions/sequences. Sample with: UOPS_RETIRED.HEAVY",
+        "PublicDescription": "This metric represents fraction of slots where the CPU was retiring heavy-weight operations -- instructions that require two or more uops or micro-coded sequences. This highly-correlates with the uop length of these instructions/sequences. ([ICL+] Note this may overcount due to approximation using indirect events; [ADL+] .). Sample with: UOPS_RETIRED.HEAVY",
         "ScaleUnit": "100%",
         "Unit": "cpu_core"
     },
     {
         "BriefDescription": "This metric represents fraction of cycles the CPU was stalled due to instruction cache misses",
         "MetricExpr": "cpu_core@ICACHE_DATA.STALLS@ / tma_info_thread_clks",
-        "MetricGroup": "BigFoot;FetchLat;IcMiss;TopdownL3;tma_L3_group;tma_fetch_latency_group",
+        "MetricGroup": "BigFootprint;FetchLat;IcMiss;TopdownL3;tma_L3_group;tma_fetch_latency_group",
         "MetricName": "tma_icache_misses",
         "MetricThreshold": "tma_icache_misses > 0.05 & (tma_fetch_latency > 0.1 & tma_frontend_bound > 0.15)",
         "PublicDescription": "This metric represents fraction of cycles the CPU was stalled due to instruction cache misses. Sample with: FRONTEND_RETIRED.L2_MISS_PS;FRONTEND_RETIRED.L1I_MISS_PS",
     },
     {
         "BriefDescription": "Branch Misprediction Cost: Fraction of TMA slots wasted per non-speculative branch misprediction (retired JEClear)",
-        "MetricExpr": "(tma_branch_mispredicts + tma_fetch_latency * tma_mispredicts_resteers / (tma_branch_resteers + tma_dsb_switches + tma_icache_misses + tma_itlb_misses + tma_lcp + tma_ms_switches)) * tma_info_thread_slots / BR_MISP_RETIRED.ALL_BRANCHES",
+        "MetricExpr": "tma_info_bottleneck_mispredictions * tma_info_thread_slots / cpu_core@BR_MISP_RETIRED.ALL_BRANCHES@ / 100",
         "MetricGroup": "Bad;BrMispredicts;tma_issueBM",
         "MetricName": "tma_info_bad_spec_branch_misprediction_cost",
         "PublicDescription": "Branch Misprediction Cost: Fraction of TMA slots wasted per non-speculative branch misprediction (retired JEClear). Related metrics: tma_branch_mispredicts, tma_info_bottleneck_mispredictions, tma_mispredicts_resteers",
     },
     {
         "BriefDescription": "Instructions per retired mispredicts for indirect CALL or JMP branches (lower number means higher occurrence rate).",
-        "MetricExpr": "cpu_core@BR_MISP_RETIRED.INDIRECT_CALL\\,umask\\=0x80@ / BR_MISP_RETIRED.INDIRECT",
+        "MetricExpr": "cpu_core@INST_RETIRED.ANY@ / BR_MISP_RETIRED.INDIRECT",
         "MetricGroup": "Bad;BrMispredicts",
         "MetricName": "tma_info_bad_spec_ipmisp_indirect",
         "MetricThreshold": "tma_info_bad_spec_ipmisp_indirect < 1e3",
         "MetricThreshold": "tma_info_bad_spec_ipmispredict < 200",
         "Unit": "cpu_core"
     },
+    {
+        "BriefDescription": "Speculative to Retired ratio of all clears (covering mispredicts and nukes)",
+        "MetricExpr": "cpu_core@INT_MISC.CLEARS_COUNT@ / (cpu_core@BR_MISP_RETIRED.ALL_BRANCHES@ + cpu_core@MACHINE_CLEARS.COUNT@)",
+        "MetricGroup": "BrMispredicts",
+        "MetricName": "tma_info_bad_spec_spec_clears_ratio",
+        "Unit": "cpu_core"
+    },
     {
         "BriefDescription": "Probability of Core Bound bottleneck hidden by SMT-profiling artifacts",
         "MetricExpr": "(100 * (1 - tma_core_bound / tma_ports_utilization if tma_core_bound < tma_ports_utilization else 1) if tma_info_system_smt_2t_utilization > 0.5 else 0)",
         "PublicDescription": "Total pipeline cost of Instruction Cache misses - subset of the Big_Code Bottleneck. Related metrics: ",
         "Unit": "cpu_core"
     },
+    {
+        "BriefDescription": "Total pipeline cost of \"useful operations\" - the baseline operations not covered by Branching_Overhead nor Irregular_Overhead.",
+        "MetricExpr": "100 * (tma_retiring - (cpu_core@BR_INST_RETIRED.ALL_BRANCHES@ + cpu_core@BR_INST_RETIRED.NEAR_CALL@) / tma_info_thread_slots - tma_microcode_sequencer / (tma_few_uops_instructions + tma_microcode_sequencer) * (tma_assists / tma_microcode_sequencer) * tma_heavy_operations)",
+        "MetricGroup": "Ret",
+        "MetricName": "tma_info_bottleneck_base_non_br",
+        "MetricThreshold": "tma_info_bottleneck_base_non_br > 20",
+        "Unit": "cpu_core"
+    },
     {
         "BriefDescription": "Total pipeline cost of instruction fetch related bottlenecks by large code footprint programs (i-side cache; TLB and BTB misses)",
         "MetricExpr": "100 * tma_fetch_latency * (tma_itlb_misses + tma_icache_misses + tma_unknown_branches) / (tma_branch_resteers + tma_dsb_switches + tma_icache_misses + tma_itlb_misses + tma_lcp + tma_ms_switches)",
-        "MetricGroup": "BigFoot;Fed;Frontend;IcMiss;MemoryTLB;tma_issueBC",
+        "MetricGroup": "BigFootprint;Fed;Frontend;IcMiss;MemoryTLB",
         "MetricName": "tma_info_bottleneck_big_code",
         "MetricThreshold": "tma_info_bottleneck_big_code > 20",
-        "PublicDescription": "Total pipeline cost of instruction fetch related bottlenecks by large code footprint programs (i-side cache; TLB and BTB misses). Related metrics: tma_info_bottleneck_branching_overhead",
         "Unit": "cpu_core"
     },
     {
         "BriefDescription": "Total pipeline cost of branch related instructions (used for program control-flow including function calls)",
-        "MetricExpr": "100 * ((cpu_core@BR_INST_RETIRED.COND@ + 3 * cpu_core@BR_INST_RETIRED.NEAR_CALL@ + (cpu_core@BR_INST_RETIRED.NEAR_TAKEN@ - cpu_core@BR_INST_RETIRED.COND_TAKEN@ - 2 * cpu_core@BR_INST_RETIRED.NEAR_CALL@)) / tma_info_thread_slots)",
-        "MetricGroup": "Ret;tma_issueBC",
+        "MetricExpr": "100 * ((cpu_core@BR_INST_RETIRED.ALL_BRANCHES@ + cpu_core@BR_INST_RETIRED.NEAR_CALL@) / tma_info_thread_slots)",
+        "MetricGroup": "Ret",
         "MetricName": "tma_info_bottleneck_branching_overhead",
-        "MetricThreshold": "tma_info_bottleneck_branching_overhead > 10",
-        "PublicDescription": "Total pipeline cost of branch related instructions (used for program control-flow including function calls). Related metrics: tma_info_bottleneck_big_code",
+        "MetricThreshold": "tma_info_bottleneck_branching_overhead > 5",
+        "Unit": "cpu_core"
+    },
+    {
+        "BriefDescription": "Total pipeline cost of external Memory- or Cache-Bandwidth related bottlenecks",
+        "MetricExpr": "100 * (tma_memory_bound * (tma_dram_bound / (tma_dram_bound + tma_l1_bound + tma_l2_bound + tma_l3_bound + tma_store_bound)) * (tma_mem_bandwidth / (tma_mem_bandwidth + tma_mem_latency)) + tma_memory_bound * (tma_l3_bound / (tma_dram_bound + tma_l1_bound + tma_l2_bound + tma_l3_bound + tma_store_bound)) * (tma_sq_full / (tma_contested_accesses + tma_data_sharing + tma_l3_hit_latency + tma_sq_full)) + tma_memory_bound * (tma_l1_bound / (tma_dram_bound + tma_l1_bound + tma_l2_bound + tma_l3_bound + tma_store_bound)) * (tma_fb_full / (tma_dtlb_load + tma_fb_full + tma_lock_latency + tma_split_loads + tma_store_fwd_blk)))",
+        "MetricGroup": "Mem;MemoryBW;Offcore;tma_issueBW",
+        "MetricName": "tma_info_bottleneck_cache_memory_bandwidth",
+        "MetricThreshold": "tma_info_bottleneck_cache_memory_bandwidth > 20",
+        "PublicDescription": "Total pipeline cost of external Memory- or Cache-Bandwidth related bottlenecks. Related metrics: tma_fb_full, tma_info_system_dram_bw_use, tma_mem_bandwidth, tma_sq_full",
+        "Unit": "cpu_core"
+    },
+    {
+        "BriefDescription": "Total pipeline cost of external Memory- or Cache-Latency related bottlenecks",
+        "MetricExpr": "100 * (tma_memory_bound * (tma_dram_bound / (tma_dram_bound + tma_l1_bound + tma_l2_bound + tma_l3_bound + tma_store_bound)) * (tma_mem_latency / (tma_mem_bandwidth + tma_mem_latency)) + tma_memory_bound * (tma_l3_bound / (tma_dram_bound + tma_l1_bound + tma_l2_bound + tma_l3_bound + tma_store_bound)) * (tma_l3_hit_latency / (tma_contested_accesses + tma_data_sharing + tma_l3_hit_latency + tma_sq_full)) + tma_memory_bound * tma_l2_bound / (tma_dram_bound + tma_l1_bound + tma_l2_bound + tma_l3_bound + tma_store_bound) + tma_memory_bound * (tma_store_bound / (tma_dram_bound + tma_l1_bound + tma_l2_bound + tma_l3_bound + tma_store_bound)) * (tma_store_latency / (tma_dtlb_store + tma_false_sharing + tma_split_stores + tma_store_latency + tma_streaming_stores)))",
+        "MetricGroup": "Mem;MemoryLat;Offcore;tma_issueLat",
+        "MetricName": "tma_info_bottleneck_cache_memory_latency",
+        "MetricThreshold": "tma_info_bottleneck_cache_memory_latency > 20",
+        "PublicDescription": "Total pipeline cost of external Memory- or Cache-Latency related bottlenecks. Related metrics: tma_l3_hit_latency, tma_mem_latency",
+        "Unit": "cpu_core"
+    },
+    {
+        "BriefDescription": "Total pipeline cost when the execution is compute-bound - an estimation",
+        "MetricExpr": "100 * (tma_core_bound * tma_divider / (tma_divider + tma_ports_utilization + tma_serializing_operation) + tma_core_bound * (tma_ports_utilization / (tma_divider + tma_ports_utilization + tma_serializing_operation)) * (tma_ports_utilized_3m / (tma_ports_utilized_0 + tma_ports_utilized_1 + tma_ports_utilized_2 + tma_ports_utilized_3m)))",
+        "MetricGroup": "Cor;tma_issueComp",
+        "MetricName": "tma_info_bottleneck_compute_bound_est",
+        "MetricThreshold": "tma_info_bottleneck_compute_bound_est > 20",
+        "PublicDescription": "Total pipeline cost when the execution is compute-bound - an estimation. Covers Core Bound when High ILP as well as when long-latency execution units are busy. Related metrics: ",
         "Unit": "cpu_core"
     },
     {
         "BriefDescription": "Total pipeline cost of instruction fetch bandwidth related bottlenecks",
-        "MetricExpr": "100 * (tma_frontend_bound - tma_fetch_latency * tma_mispredicts_resteers / (tma_branch_resteers + tma_dsb_switches + tma_icache_misses + tma_itlb_misses + tma_lcp + tma_ms_switches)) - tma_info_bottleneck_big_code",
+        "MetricExpr": "100 * (tma_frontend_bound - (1 - 10 * tma_microcode_sequencer * tma_other_mispredicts / tma_branch_mispredicts) * tma_fetch_latency * tma_mispredicts_resteers / (tma_branch_resteers + tma_dsb_switches + tma_icache_misses + tma_itlb_misses + tma_lcp + tma_ms_switches) - (1 - cpu_core@INST_RETIRED.REP_ITERATION@ / cpu_core@UOPS_RETIRED.MS\\,cmask\\=1@) * (tma_fetch_latency * (tma_ms_switches + tma_branch_resteers * (tma_clears_resteers + tma_mispredicts_resteers * tma_other_mispredicts / tma_branch_mispredicts) / (tma_clears_resteers + tma_mispredicts_resteers + tma_unknown_branches)) / (tma_branch_resteers + tma_dsb_switches + tma_icache_misses + tma_itlb_misses + tma_lcp + tma_ms_switches))) - tma_info_bottleneck_big_code",
         "MetricGroup": "Fed;FetchBW;Frontend",
         "MetricName": "tma_info_bottleneck_instruction_fetch_bw",
         "MetricThreshold": "tma_info_bottleneck_instruction_fetch_bw > 20",
         "Unit": "cpu_core"
     },
     {
-        "BriefDescription": "Total pipeline cost of (external) Memory Bandwidth related bottlenecks",
-        "MetricExpr": "100 * tma_memory_bound * (tma_dram_bound / (tma_dram_bound + tma_l1_bound + tma_l2_bound + tma_l3_bound + tma_store_bound) * (tma_mem_bandwidth / (tma_mem_bandwidth + tma_mem_latency)) + tma_l3_bound / (tma_dram_bound + tma_l1_bound + tma_l2_bound + tma_l3_bound + tma_store_bound) * (tma_sq_full / (tma_contested_accesses + tma_data_sharing + tma_l3_hit_latency + tma_sq_full))) + tma_l1_bound / (tma_dram_bound + tma_l1_bound + tma_l2_bound + tma_l3_bound + tma_store_bound) * (tma_fb_full / (tma_dtlb_load + tma_fb_full + tma_lock_latency + tma_split_loads + tma_store_fwd_blk))",
-        "MetricGroup": "Mem;MemoryBW;Offcore;tma_issueBW",
-        "MetricName": "tma_info_bottleneck_memory_bandwidth",
-        "MetricThreshold": "tma_info_bottleneck_memory_bandwidth > 20",
-        "PublicDescription": "Total pipeline cost of (external) Memory Bandwidth related bottlenecks. Related metrics: tma_fb_full, tma_info_system_dram_bw_use, tma_mem_bandwidth, tma_sq_full",
+        "BriefDescription": "Total pipeline cost of irregular execution (e.g",
+        "MetricExpr": "100 * ((1 - cpu_core@INST_RETIRED.REP_ITERATION@ / cpu_core@UOPS_RETIRED.MS\\,cmask\\=1@) * (tma_fetch_latency * (tma_ms_switches + tma_branch_resteers * (tma_clears_resteers + tma_mispredicts_resteers * tma_other_mispredicts / tma_branch_mispredicts) / (tma_clears_resteers + tma_mispredicts_resteers + tma_unknown_branches)) / (tma_branch_resteers + tma_dsb_switches + tma_icache_misses + tma_itlb_misses + tma_lcp + tma_ms_switches)) + 10 * tma_microcode_sequencer * tma_other_mispredicts / tma_branch_mispredicts * tma_branch_mispredicts + tma_machine_clears * tma_other_nukes / tma_other_nukes + tma_core_bound * (tma_serializing_operation + cpu_core@RS.EMPTY\\,umask\\=1@ / tma_info_thread_clks * tma_ports_utilized_0) / (tma_divider + tma_ports_utilization + tma_serializing_operation) + tma_microcode_sequencer / (tma_few_uops_instructions + tma_microcode_sequencer) * (tma_assists / tma_microcode_sequencer) * tma_heavy_operations)",
+        "MetricGroup": "Bad;Cor;Ret;tma_issueMS",
+        "MetricName": "tma_info_bottleneck_irregular_overhead",
+        "MetricThreshold": "tma_info_bottleneck_irregular_overhead > 10",
+        "PublicDescription": "Total pipeline cost of irregular execution (e.g. FP-assists in HPC, Wait time with work imbalance multithreaded workloads, overhead in system services or virtualized environments). Related metrics: tma_microcode_sequencer, tma_ms_switches",
         "Unit": "cpu_core"
     },
     {
         "BriefDescription": "Total pipeline cost of Memory Address Translation related bottlenecks (data-side TLBs)",
-        "MetricExpr": "100 * tma_memory_bound * (tma_l1_bound / max(tma_memory_bound, tma_dram_bound + tma_l1_bound + tma_l2_bound + tma_l3_bound + tma_store_bound) * (tma_dtlb_load / max(tma_l1_bound, tma_dtlb_load + tma_fb_full + tma_lock_latency + tma_split_loads + tma_store_fwd_blk)) + tma_store_bound / (tma_dram_bound + tma_l1_bound + tma_l2_bound + tma_l3_bound + tma_store_bound) * (tma_dtlb_store / (tma_dtlb_store + tma_false_sharing + tma_split_stores + tma_store_latency + tma_streaming_stores)))",
+        "MetricExpr": "100 * (tma_memory_bound * (tma_l1_bound / max(tma_memory_bound, tma_dram_bound + tma_l1_bound + tma_l2_bound + tma_l3_bound + tma_store_bound)) * (tma_dtlb_load / max(tma_l1_bound, tma_dtlb_load + tma_fb_full + tma_lock_latency + tma_split_loads + tma_store_fwd_blk)) + tma_memory_bound * (tma_store_bound / (tma_dram_bound + tma_l1_bound + tma_l2_bound + tma_l3_bound + tma_store_bound)) * (tma_dtlb_store / (tma_dtlb_store + tma_false_sharing + tma_split_stores + tma_store_latency + tma_streaming_stores)))",
         "MetricGroup": "Mem;MemoryTLB;Offcore;tma_issueTLB",
         "MetricName": "tma_info_bottleneck_memory_data_tlbs",
         "MetricThreshold": "tma_info_bottleneck_memory_data_tlbs > 20",
-        "PublicDescription": "Total pipeline cost of Memory Address Translation related bottlenecks (data-side TLBs). Related metrics: tma_dtlb_load, tma_dtlb_store",
+        "PublicDescription": "Total pipeline cost of Memory Address Translation related bottlenecks (data-side TLBs). Related metrics: tma_dtlb_load, tma_dtlb_store, tma_info_bottleneck_memory_synchronization",
         "Unit": "cpu_core"
     },
     {
-        "BriefDescription": "Total pipeline cost of Memory Latency related bottlenecks (external memory and off-core caches)",
-        "MetricExpr": "100 * tma_memory_bound * (tma_dram_bound / (tma_dram_bound + tma_l1_bound + tma_l2_bound + tma_l3_bound + tma_store_bound) * (tma_mem_latency / (tma_mem_bandwidth + tma_mem_latency)) + tma_l3_bound / (tma_dram_bound + tma_l1_bound + tma_l2_bound + tma_l3_bound + tma_store_bound) * (tma_l3_hit_latency / (tma_contested_accesses + tma_data_sharing + tma_l3_hit_latency + tma_sq_full)) + tma_l2_bound / (tma_dram_bound + tma_l1_bound + tma_l2_bound + tma_l3_bound + tma_store_bound))",
-        "MetricGroup": "Mem;MemoryLat;Offcore;tma_issueLat",
-        "MetricName": "tma_info_bottleneck_memory_latency",
-        "MetricThreshold": "tma_info_bottleneck_memory_latency > 20",
-        "PublicDescription": "Total pipeline cost of Memory Latency related bottlenecks (external memory and off-core caches). Related metrics: tma_l3_hit_latency, tma_mem_latency",
+        "BriefDescription": "Total pipeline cost of Memory Synchronization related bottlenecks (data transfers and coherency updates across processors)",
+        "MetricExpr": "100 * (tma_memory_bound * (tma_l3_bound / (tma_dram_bound + tma_l1_bound + tma_l2_bound + tma_l3_bound + tma_store_bound) * (tma_contested_accesses + tma_data_sharing) / (tma_contested_accesses + tma_data_sharing + tma_l3_hit_latency + tma_sq_full) + tma_store_bound / (tma_dram_bound + tma_l1_bound + tma_l2_bound + tma_l3_bound + tma_store_bound) * tma_false_sharing / (tma_dtlb_store + tma_false_sharing + tma_split_stores + tma_store_latency + tma_streaming_stores - tma_store_latency)) + tma_machine_clears * (1 - tma_other_nukes / tma_other_nukes))",
+        "MetricGroup": "Mem;Offcore;tma_issueTLB",
+        "MetricName": "tma_info_bottleneck_memory_synchronization",
+        "MetricThreshold": "tma_info_bottleneck_memory_synchronization > 10",
+        "PublicDescription": "Total pipeline cost of Memory Synchronization related bottlenecks (data transfers and coherency updates across processors). Related metrics: tma_dtlb_load, tma_dtlb_store, tma_info_bottleneck_memory_data_tlbs",
         "Unit": "cpu_core"
     },
     {
         "BriefDescription": "Total pipeline cost of Branch Misprediction related bottlenecks",
-        "MetricExpr": "100 * (tma_branch_mispredicts + tma_fetch_latency * tma_mispredicts_resteers / (tma_branch_resteers + tma_dsb_switches + tma_icache_misses + tma_itlb_misses + tma_lcp + tma_ms_switches))",
+        "MetricExpr": "100 * (1 - 10 * tma_microcode_sequencer * tma_other_mispredicts / tma_branch_mispredicts) * (tma_branch_mispredicts + tma_fetch_latency * tma_mispredicts_resteers / (tma_branch_resteers + tma_dsb_switches + tma_icache_misses + tma_itlb_misses + tma_lcp + tma_ms_switches))",
         "MetricGroup": "Bad;BadSpec;BrMispredicts;tma_issueBM",
         "MetricName": "tma_info_bottleneck_mispredictions",
         "MetricThreshold": "tma_info_bottleneck_mispredictions > 20",
         "PublicDescription": "Total pipeline cost of Branch Misprediction related bottlenecks. Related metrics: tma_branch_mispredicts, tma_info_bad_spec_branch_misprediction_cost, tma_mispredicts_resteers",
         "Unit": "cpu_core"
     },
+    {
+        "BriefDescription": "Total pipeline cost of remaining bottlenecks (apart from those listed in the Info.Bottlenecks metrics class)",
+        "MetricExpr": "100 - (tma_info_bottleneck_big_code + tma_info_bottleneck_instruction_fetch_bw + tma_info_bottleneck_mispredictions + tma_info_bottleneck_cache_memory_bandwidth + tma_info_bottleneck_cache_memory_latency + tma_info_bottleneck_memory_data_tlbs + tma_info_bottleneck_memory_synchronization + tma_info_bottleneck_compute_bound_est + tma_info_bottleneck_irregular_overhead + tma_info_bottleneck_branching_overhead + tma_info_bottleneck_base_non_br)",
+        "MetricGroup": "Cor;Offcore",
+        "MetricName": "tma_info_bottleneck_other_bottlenecks",
+        "MetricThreshold": "tma_info_bottleneck_other_bottlenecks > 20",
+        "PublicDescription": "Total pipeline cost of remaining bottlenecks (apart from those listed in the Info.Bottlenecks metrics class). Examples include data-dependencies (Core Bound when Low ILP) and other unlisted memory-related stalls.",
+        "Unit": "cpu_core"
+    },
     {
         "BriefDescription": "Fraction of branches that are CALL or RET",
         "MetricExpr": "(cpu_core@BR_INST_RETIRED.NEAR_CALL@ + cpu_core@BR_INST_RETIRED.NEAR_RETURN@) / BR_INST_RETIRED.ALL_BRANCHES",
     },
     {
         "BriefDescription": "Core actual clocks when any Logical Processor is active on the Physical Core",
-        "MetricExpr": "cpu_core@CPU_CLK_UNHALTED.DISTRIBUTED@",
+        "MetricExpr": "(cpu_core@CPU_CLK_UNHALTED.DISTRIBUTED@ if #SMT_on else tma_info_thread_clks)",
         "MetricGroup": "SMT",
         "MetricName": "tma_info_core_core_clks",
         "Unit": "cpu_core"
         "MetricName": "tma_info_core_coreipc",
         "Unit": "cpu_core"
     },
+    {
+        "BriefDescription": "uops Executed per Cycle",
+        "MetricExpr": "cpu_core@UOPS_EXECUTED.THREAD@ / tma_info_thread_clks",
+        "MetricGroup": "Power",
+        "MetricName": "tma_info_core_epc",
+        "Unit": "cpu_core"
+    },
     {
         "BriefDescription": "Floating Point Operations Per Cycle",
-        "MetricExpr": "(cpu_core@FP_ARITH_INST_RETIRED.SCALAR_SINGLE@ + cpu_core@FP_ARITH_INST_RETIRED.SCALAR_DOUBLE@ + 2 * cpu_core@FP_ARITH_INST_RETIRED.128B_PACKED_DOUBLE@ + 4 * (cpu_core@FP_ARITH_INST_RETIRED.128B_PACKED_SINGLE@ + cpu_core@FP_ARITH_INST_RETIRED.256B_PACKED_DOUBLE@) + 8 * cpu_core@FP_ARITH_INST_RETIRED.256B_PACKED_SINGLE@) / tma_info_core_core_clks",
+        "MetricExpr": "(cpu_core@FP_ARITH_INST_RETIRED.SCALAR@ + 2 * cpu_core@FP_ARITH_INST_RETIRED.128B_PACKED_DOUBLE@ + 4 * cpu_core@FP_ARITH_INST_RETIRED.4_FLOPS@ + 8 * cpu_core@FP_ARITH_INST_RETIRED.256B_PACKED_SINGLE@) / tma_info_core_core_clks",
         "MetricGroup": "Flops;Ret",
         "MetricName": "tma_info_core_flopc",
         "Unit": "cpu_core"
         "Unit": "cpu_core"
     },
     {
-        "BriefDescription": "Instruction-Level-Parallelism (average number of uops executed when there is execution) per-core",
-        "MetricExpr": "cpu_core@UOPS_EXECUTED.THREAD@ / (cpu_core@UOPS_EXECUTED.CORE_CYCLES_GE_1@ / 2 if #SMT_on else cpu_core@UOPS_EXECUTED.CORE_CYCLES_GE_1@)",
+        "BriefDescription": "Instruction-Level-Parallelism (average number of uops executed when there is execution) per thread (logical-processor)",
+        "MetricExpr": "cpu_core@UOPS_EXECUTED.THREAD@ / cpu_core@UOPS_EXECUTED.THREAD\\,cmask\\=1@",
         "MetricGroup": "Backend;Cor;Pipeline;PortsUtil",
         "MetricName": "tma_info_core_ilp",
         "Unit": "cpu_core"
         "MetricName": "tma_info_frontend_lsd_coverage",
         "Unit": "cpu_core"
     },
+    {
+        "BriefDescription": "Average number of cycles the front-end was delayed due to an Unknown Branch detection",
+        "MetricExpr": "cpu_core@INT_MISC.UNKNOWN_BRANCH_CYCLES@ / cpu_core@INT_MISC.UNKNOWN_BRANCH_CYCLES\\,cmask\\=1\\,edge@",
+        "MetricGroup": "Fed",
+        "MetricName": "tma_info_frontend_unknown_branch_cost",
+        "PublicDescription": "Average number of cycles the front-end was delayed due to an Unknown Branch detection. See Unknown_Branches node.",
+        "Unit": "cpu_core"
+    },
     {
         "BriefDescription": "Branch instructions per taken branch.",
         "MetricExpr": "cpu_core@BR_INST_RETIRED.ALL_BRANCHES@ / BR_INST_RETIRED.NEAR_TAKEN",
         "MetricGroup": "Flops;InsType",
         "MetricName": "tma_info_inst_mix_iparith",
         "MetricThreshold": "tma_info_inst_mix_iparith < 10",
-        "PublicDescription": "Instructions per FP Arithmetic instruction (lower number means higher occurrence rate). May undercount due to FMA double counting. Approximated prior to BDW.",
+        "PublicDescription": "Instructions per FP Arithmetic instruction (lower number means higher occurrence rate). Values < 1 are possible due to intentional FMA double counting. Approximated prior to BDW.",
         "Unit": "cpu_core"
     },
     {
         "MetricGroup": "Flops;FpVector;InsType",
         "MetricName": "tma_info_inst_mix_iparith_avx128",
         "MetricThreshold": "tma_info_inst_mix_iparith_avx128 < 10",
-        "PublicDescription": "Instructions per FP Arithmetic AVX/SSE 128-bit instruction (lower number means higher occurrence rate). May undercount due to FMA double counting.",
+        "PublicDescription": "Instructions per FP Arithmetic AVX/SSE 128-bit instruction (lower number means higher occurrence rate). Values < 1 are possible due to intentional FMA double counting.",
         "Unit": "cpu_core"
     },
     {
         "MetricGroup": "Flops;FpVector;InsType",
         "MetricName": "tma_info_inst_mix_iparith_avx256",
         "MetricThreshold": "tma_info_inst_mix_iparith_avx256 < 10",
-        "PublicDescription": "Instructions per FP Arithmetic AVX* 256-bit instruction (lower number means higher occurrence rate). May undercount due to FMA double counting.",
+        "PublicDescription": "Instructions per FP Arithmetic AVX* 256-bit instruction (lower number means higher occurrence rate). Values < 1 are possible due to intentional FMA double counting.",
         "Unit": "cpu_core"
     },
     {
         "MetricGroup": "Flops;FpScalar;InsType",
         "MetricName": "tma_info_inst_mix_iparith_scalar_dp",
         "MetricThreshold": "tma_info_inst_mix_iparith_scalar_dp < 10",
-        "PublicDescription": "Instructions per FP Arithmetic Scalar Double-Precision instruction (lower number means higher occurrence rate). May undercount due to FMA double counting.",
+        "PublicDescription": "Instructions per FP Arithmetic Scalar Double-Precision instruction (lower number means higher occurrence rate). Values < 1 are possible due to intentional FMA double counting.",
         "Unit": "cpu_core"
     },
     {
         "MetricGroup": "Flops;FpScalar;InsType",
         "MetricName": "tma_info_inst_mix_iparith_scalar_sp",
         "MetricThreshold": "tma_info_inst_mix_iparith_scalar_sp < 10",
-        "PublicDescription": "Instructions per FP Arithmetic Scalar Single-Precision instruction (lower number means higher occurrence rate). May undercount due to FMA double counting.",
+        "PublicDescription": "Instructions per FP Arithmetic Scalar Single-Precision instruction (lower number means higher occurrence rate). Values < 1 are possible due to intentional FMA double counting.",
         "Unit": "cpu_core"
     },
     {
     },
     {
         "BriefDescription": "Instructions per Floating Point (FP) Operation (lower number means higher occurrence rate)",
-        "MetricExpr": "cpu_core@INST_RETIRED.ANY@ / (cpu_core@FP_ARITH_INST_RETIRED.SCALAR_SINGLE@ + cpu_core@FP_ARITH_INST_RETIRED.SCALAR_DOUBLE@ + 2 * cpu_core@FP_ARITH_INST_RETIRED.128B_PACKED_DOUBLE@ + 4 * (cpu_core@FP_ARITH_INST_RETIRED.128B_PACKED_SINGLE@ + cpu_core@FP_ARITH_INST_RETIRED.256B_PACKED_DOUBLE@) + 8 * cpu_core@FP_ARITH_INST_RETIRED.256B_PACKED_SINGLE@)",
+        "MetricExpr": "cpu_core@INST_RETIRED.ANY@ / (cpu_core@FP_ARITH_INST_RETIRED.SCALAR@ + 2 * cpu_core@FP_ARITH_INST_RETIRED.128B_PACKED_DOUBLE@ + 4 * cpu_core@FP_ARITH_INST_RETIRED.4_FLOPS@ + 8 * cpu_core@FP_ARITH_INST_RETIRED.256B_PACKED_SINGLE@)",
         "MetricGroup": "Flops;InsType",
         "MetricName": "tma_info_inst_mix_ipflop",
         "MetricThreshold": "tma_info_inst_mix_ipflop < 10",
         "MetricThreshold": "tma_info_inst_mix_ipload < 3",
         "Unit": "cpu_core"
     },
+    {
+        "BriefDescription": "Instructions per PAUSE (lower number means higher occurrence rate)",
+        "MetricExpr": "tma_info_inst_mix_instructions / CPU_CLK_UNHALTED.PAUSE_INST",
+        "MetricGroup": "Flops;FpVector;InsType",
+        "MetricName": "tma_info_inst_mix_ippause",
+        "Unit": "cpu_core"
+    },
     {
         "BriefDescription": "Instructions per Store (lower number means higher occurrence rate)",
         "MetricExpr": "cpu_core@INST_RETIRED.ANY@ / MEM_INST_RETIRED.ALL_STORES",
     },
     {
         "BriefDescription": "Average per-core data fill bandwidth to the L1 data cache [GB / sec]",
-        "MetricExpr": "64 * cpu_core@L1D.REPLACEMENT@ / 1e9 / duration_time",
+        "MetricExpr": "tma_info_memory_l1d_cache_fill_bw",
         "MetricGroup": "Mem;MemoryBW",
-        "MetricName": "tma_info_memory_core_l1d_cache_fill_bw",
+        "MetricName": "tma_info_memory_core_l1d_cache_fill_bw_2t",
         "Unit": "cpu_core"
     },
     {
         "BriefDescription": "Average per-core data fill bandwidth to the L2 cache [GB / sec]",
-        "MetricExpr": "64 * cpu_core@L2_LINES_IN.ALL@ / 1e9 / duration_time",
+        "MetricExpr": "tma_info_memory_l2_cache_fill_bw",
         "MetricGroup": "Mem;MemoryBW",
-        "MetricName": "tma_info_memory_core_l2_cache_fill_bw",
+        "MetricName": "tma_info_memory_core_l2_cache_fill_bw_2t",
         "Unit": "cpu_core"
     },
     {
         "BriefDescription": "Average per-core data access bandwidth to the L3 cache [GB / sec]",
-        "MetricExpr": "64 * cpu_core@OFFCORE_REQUESTS.ALL_REQUESTS@ / 1e9 / duration_time",
+        "MetricExpr": "tma_info_memory_l3_cache_access_bw",
         "MetricGroup": "Mem;MemoryBW;Offcore",
-        "MetricName": "tma_info_memory_core_l3_cache_access_bw",
+        "MetricName": "tma_info_memory_core_l3_cache_access_bw_2t",
         "Unit": "cpu_core"
     },
     {
         "BriefDescription": "Average per-core data fill bandwidth to the L3 cache [GB / sec]",
-        "MetricExpr": "64 * cpu_core@LONGEST_LAT_CACHE.MISS@ / 1e9 / duration_time",
+        "MetricExpr": "tma_info_memory_l3_cache_fill_bw",
         "MetricGroup": "Mem;MemoryBW",
-        "MetricName": "tma_info_memory_core_l3_cache_fill_bw",
+        "MetricName": "tma_info_memory_core_l3_cache_fill_bw_2t",
         "Unit": "cpu_core"
     },
     {
         "BriefDescription": "Fill Buffer (FB) hits per kilo instructions for retired demand loads (L1D misses that merge into ongoing miss-handling entries)",
         "MetricExpr": "1e3 * cpu_core@MEM_LOAD_RETIRED.FB_HIT@ / INST_RETIRED.ANY",
-        "MetricGroup": "CacheMisses;Mem",
+        "MetricGroup": "CacheHits;Mem",
         "MetricName": "tma_info_memory_fb_hpki",
         "Unit": "cpu_core"
     },
+    {
+        "BriefDescription": "",
+        "MetricExpr": "64 * cpu_core@L1D.REPLACEMENT@ / 1e9 / duration_time",
+        "MetricGroup": "Mem;MemoryBW",
+        "MetricName": "tma_info_memory_l1d_cache_fill_bw",
+        "Unit": "cpu_core"
+    },
     {
         "BriefDescription": "L1 cache true misses per kilo instruction for retired demand loads",
         "MetricExpr": "1e3 * cpu_core@MEM_LOAD_RETIRED.L1_MISS@ / INST_RETIRED.ANY",
-        "MetricGroup": "CacheMisses;Mem",
+        "MetricGroup": "CacheHits;Mem",
         "MetricName": "tma_info_memory_l1mpki",
         "Unit": "cpu_core"
     },
     {
         "BriefDescription": "L1 cache true misses per kilo instruction for all demand loads (including speculative)",
         "MetricExpr": "1e3 * cpu_core@L2_RQSTS.ALL_DEMAND_DATA_RD@ / INST_RETIRED.ANY",
-        "MetricGroup": "CacheMisses;Mem",
+        "MetricGroup": "CacheHits;Mem",
         "MetricName": "tma_info_memory_l1mpki_load",
         "Unit": "cpu_core"
     },
+    {
+        "BriefDescription": "",
+        "MetricExpr": "64 * cpu_core@L2_LINES_IN.ALL@ / 1e9 / duration_time",
+        "MetricGroup": "Mem;MemoryBW",
+        "MetricName": "tma_info_memory_l2_cache_fill_bw",
+        "Unit": "cpu_core"
+    },
     {
         "BriefDescription": "L2 cache hits per kilo instruction for all request types (including speculative)",
         "MetricExpr": "1e3 * (cpu_core@L2_RQSTS.REFERENCES@ - cpu_core@L2_RQSTS.MISS@) / INST_RETIRED.ANY",
-        "MetricGroup": "CacheMisses;Mem",
+        "MetricGroup": "CacheHits;Mem",
         "MetricName": "tma_info_memory_l2hpki_all",
         "Unit": "cpu_core"
     },
     {
         "BriefDescription": "L2 cache hits per kilo instruction for all demand loads  (including speculative)",
         "MetricExpr": "1e3 * cpu_core@L2_RQSTS.DEMAND_DATA_RD_HIT@ / INST_RETIRED.ANY",
-        "MetricGroup": "CacheMisses;Mem",
+        "MetricGroup": "CacheHits;Mem",
         "MetricName": "tma_info_memory_l2hpki_load",
         "Unit": "cpu_core"
     },
     {
         "BriefDescription": "L2 cache true misses per kilo instruction for retired demand loads",
         "MetricExpr": "1e3 * cpu_core@MEM_LOAD_RETIRED.L2_MISS@ / INST_RETIRED.ANY",
-        "MetricGroup": "Backend;CacheMisses;Mem",
+        "MetricGroup": "Backend;CacheHits;Mem",
         "MetricName": "tma_info_memory_l2mpki",
         "Unit": "cpu_core"
     },
     {
         "BriefDescription": "L2 cache ([RKL+] true) misses per kilo instruction for all request types (including speculative)",
         "MetricExpr": "1e3 * cpu_core@L2_RQSTS.MISS@ / INST_RETIRED.ANY",
-        "MetricGroup": "CacheMisses;Mem;Offcore",
+        "MetricGroup": "CacheHits;Mem;Offcore",
         "MetricName": "tma_info_memory_l2mpki_all",
         "Unit": "cpu_core"
     },
     {
         "BriefDescription": "L2 cache ([RKL+] true) misses per kilo instruction for all demand loads  (including speculative)",
         "MetricExpr": "1e3 * cpu_core@L2_RQSTS.DEMAND_DATA_RD_MISS@ / INST_RETIRED.ANY",
-        "MetricGroup": "CacheMisses;Mem",
+        "MetricGroup": "CacheHits;Mem",
         "MetricName": "tma_info_memory_l2mpki_load",
         "Unit": "cpu_core"
     },
     {
-        "BriefDescription": "L3 cache true misses per kilo instruction for retired demand loads",
-        "MetricExpr": "1e3 * cpu_core@MEM_LOAD_RETIRED.L3_MISS@ / INST_RETIRED.ANY",
-        "MetricGroup": "CacheMisses;Mem",
-        "MetricName": "tma_info_memory_l3mpki",
+        "BriefDescription": "",
+        "MetricExpr": "64 * cpu_core@OFFCORE_REQUESTS.ALL_REQUESTS@ / 1e9 / duration_time",
+        "MetricGroup": "Mem;MemoryBW;Offcore",
+        "MetricName": "tma_info_memory_l3_cache_access_bw",
         "Unit": "cpu_core"
     },
     {
-        "BriefDescription": "Actual Average Latency for L1 data-cache miss demand load operations (in core cycles)",
-        "MetricExpr": "cpu_core@L1D_PEND_MISS.PENDING@ / MEM_LOAD_COMPLETED.L1_MISS_ANY",
-        "MetricGroup": "Mem;MemoryBound;MemoryLat",
-        "MetricName": "tma_info_memory_load_miss_real_latency",
+        "BriefDescription": "",
+        "MetricExpr": "64 * cpu_core@LONGEST_LAT_CACHE.MISS@ / 1e9 / duration_time",
+        "MetricGroup": "Mem;MemoryBW",
+        "MetricName": "tma_info_memory_l3_cache_fill_bw",
         "Unit": "cpu_core"
     },
     {
-        "BriefDescription": "Memory-Level-Parallelism (average number of L1 miss demand load when there is at least one such miss",
-        "MetricExpr": "cpu_core@L1D_PEND_MISS.PENDING@ / L1D_PEND_MISS.PENDING_CYCLES",
-        "MetricGroup": "Mem;MemoryBW;MemoryBound",
-        "MetricName": "tma_info_memory_mlp",
-        "PublicDescription": "Memory-Level-Parallelism (average number of L1 miss demand load when there is at least one such miss. Per-Logical Processor)",
+        "BriefDescription": "L3 cache true misses per kilo instruction for retired demand loads",
+        "MetricExpr": "1e3 * cpu_core@MEM_LOAD_RETIRED.L3_MISS@ / INST_RETIRED.ANY",
+        "MetricGroup": "Mem",
+        "MetricName": "tma_info_memory_l3mpki",
         "Unit": "cpu_core"
     },
     {
         "BriefDescription": "Average Parallel L2 cache miss data reads",
-        "MetricExpr": "cpu_core@OFFCORE_REQUESTS_OUTSTANDING.ALL_DATA_RD@ / OFFCORE_REQUESTS_OUTSTANDING.CYCLES_WITH_DATA_RD",
+        "MetricExpr": "cpu_core@OFFCORE_REQUESTS_OUTSTANDING.DATA_RD@ / OFFCORE_REQUESTS_OUTSTANDING.CYCLES_WITH_DATA_RD",
         "MetricGroup": "Memory_BW;Offcore",
-        "MetricName": "tma_info_memory_oro_data_l2_mlp",
+        "MetricName": "tma_info_memory_latency_data_l2_mlp",
         "Unit": "cpu_core"
     },
     {
         "BriefDescription": "Average Latency for L2 cache miss demand Loads",
         "MetricExpr": "cpu_core@OFFCORE_REQUESTS_OUTSTANDING.DEMAND_DATA_RD@ / OFFCORE_REQUESTS.DEMAND_DATA_RD",
         "MetricGroup": "Memory_Lat;Offcore",
-        "MetricName": "tma_info_memory_oro_load_l2_miss_latency",
+        "MetricName": "tma_info_memory_latency_load_l2_miss_latency",
         "Unit": "cpu_core"
     },
     {
         "BriefDescription": "Average Parallel L2 cache miss demand Loads",
         "MetricExpr": "cpu_core@OFFCORE_REQUESTS_OUTSTANDING.DEMAND_DATA_RD@ / cpu_core@OFFCORE_REQUESTS_OUTSTANDING.DEMAND_DATA_RD\\,cmask\\=1@",
         "MetricGroup": "Memory_BW;Offcore",
-        "MetricName": "tma_info_memory_oro_load_l2_mlp",
+        "MetricName": "tma_info_memory_latency_load_l2_mlp",
         "Unit": "cpu_core"
     },
     {
         "BriefDescription": "Average Latency for L3 cache miss demand Loads",
         "MetricExpr": "cpu_core@OFFCORE_REQUESTS_OUTSTANDING.L3_MISS_DEMAND_DATA_RD@ / OFFCORE_REQUESTS.L3_MISS_DEMAND_DATA_RD",
         "MetricGroup": "Memory_Lat;Offcore",
-        "MetricName": "tma_info_memory_oro_load_l3_miss_latency",
+        "MetricName": "tma_info_memory_latency_load_l3_miss_latency",
         "Unit": "cpu_core"
     },
     {
-        "BriefDescription": "Average per-thread data fill bandwidth to the L1 data cache [GB / sec]",
-        "MetricExpr": "tma_info_memory_core_l1d_cache_fill_bw",
-        "MetricGroup": "Mem;MemoryBW",
-        "MetricName": "tma_info_memory_thread_l1d_cache_fill_bw_1t",
+        "BriefDescription": "Actual Average Latency for L1 data-cache miss demand load operations (in core cycles)",
+        "MetricExpr": "cpu_core@L1D_PEND_MISS.PENDING@ / MEM_LOAD_COMPLETED.L1_MISS_ANY",
+        "MetricGroup": "Mem;MemoryBound;MemoryLat",
+        "MetricName": "tma_info_memory_load_miss_real_latency",
         "Unit": "cpu_core"
     },
     {
-        "BriefDescription": "Average per-thread data fill bandwidth to the L2 cache [GB / sec]",
-        "MetricExpr": "tma_info_memory_core_l2_cache_fill_bw",
-        "MetricGroup": "Mem;MemoryBW",
-        "MetricName": "tma_info_memory_thread_l2_cache_fill_bw_1t",
+        "BriefDescription": "\"Bus lock\" per kilo instruction",
+        "MetricExpr": "1e3 * cpu_core@SQ_MISC.BUS_LOCK@ / INST_RETIRED.ANY",
+        "MetricGroup": "Mem",
+        "MetricName": "tma_info_memory_mix_bus_lock_pki",
         "Unit": "cpu_core"
     },
     {
-        "BriefDescription": "Average per-thread data access bandwidth to the L3 cache [GB / sec]",
-        "MetricExpr": "tma_info_memory_core_l3_cache_access_bw",
-        "MetricGroup": "Mem;MemoryBW;Offcore",
-        "MetricName": "tma_info_memory_thread_l3_cache_access_bw_1t",
+        "BriefDescription": "Un-cacheable retired load per kilo instruction",
+        "MetricExpr": "1e3 * cpu_core@MEM_LOAD_MISC_RETIRED.UC@ / INST_RETIRED.ANY",
+        "MetricGroup": "Mem",
+        "MetricName": "tma_info_memory_mix_uc_load_pki",
         "Unit": "cpu_core"
     },
     {
-        "BriefDescription": "Average per-thread data fill bandwidth to the L3 cache [GB / sec]",
-        "MetricExpr": "tma_info_memory_core_l3_cache_fill_bw",
-        "MetricGroup": "Mem;MemoryBW",
-        "MetricName": "tma_info_memory_thread_l3_cache_fill_bw_1t",
+        "BriefDescription": "Memory-Level-Parallelism (average number of L1 miss demand load when there is at least one such miss",
+        "MetricExpr": "cpu_core@L1D_PEND_MISS.PENDING@ / L1D_PEND_MISS.PENDING_CYCLES",
+        "MetricGroup": "Mem;MemoryBW;MemoryBound",
+        "MetricName": "tma_info_memory_mlp",
+        "PublicDescription": "Memory-Level-Parallelism (average number of L1 miss demand load when there is at least one such miss. Per-Logical Processor)",
         "Unit": "cpu_core"
     },
     {
         "Unit": "cpu_core"
     },
     {
-        "BriefDescription": "Instruction-Level-Parallelism (average number of uops executed when there is execution) per-thread",
-        "MetricExpr": "cpu_core@UOPS_EXECUTED.THREAD@ / cpu_core@UOPS_EXECUTED.THREAD\\,cmask\\=1@",
+        "BriefDescription": "",
+        "MetricExpr": "cpu_core@UOPS_EXECUTED.THREAD@ / (cpu_core@UOPS_EXECUTED.CORE_CYCLES_GE_1@ / 2 if #SMT_on else cpu_core@UOPS_EXECUTED.THREAD\\,cmask\\=1@)",
         "MetricGroup": "Cor;Pipeline;PortsUtil;SMT",
         "MetricName": "tma_info_pipeline_execute",
         "Unit": "cpu_core"
     },
     {
         "BriefDescription": "Instructions per a microcode Assist invocation",
-        "MetricExpr": "cpu_core@INST_RETIRED.ANY@ / cpu_core@ASSISTS.ANY\\,umask\\=0x1B@",
-        "MetricGroup": "Pipeline;Ret;Retire",
+        "MetricExpr": "cpu_core@INST_RETIRED.ANY@ / ASSISTS.ANY",
+        "MetricGroup": "MicroSeq;Pipeline;Ret;Retire",
         "MetricName": "tma_info_pipeline_ipassist",
         "MetricThreshold": "tma_info_pipeline_ipassist < 100e3",
         "PublicDescription": "Instructions per a microcode Assist invocation. See Assists tree node for details (lower number means higher occurrence rate)",
     {
         "BriefDescription": "Estimated fraction of retirement-cycles dealing with repeat instructions",
         "MetricExpr": "cpu_core@INST_RETIRED.REP_ITERATION@ / cpu_core@UOPS_RETIRED.SLOTS\\,cmask\\=1@",
-        "MetricGroup": "Pipeline;Ret",
+        "MetricGroup": "MicroSeq;Pipeline;Ret",
         "MetricName": "tma_info_pipeline_strings_cycles",
         "MetricThreshold": "tma_info_pipeline_strings_cycles > 0.1",
         "Unit": "cpu_core"
     },
     {
-        "BriefDescription": "Measured Average Frequency for unhalted processors [GHz]",
+        "BriefDescription": "Fraction of cycles the processor is waiting yet unhalted; covering legacy PAUSE instruction, as well as C0.1 / C0.2 power-performance optimized states",
+        "MetricExpr": "cpu_core@CPU_CLK_UNHALTED.C0_WAIT@ / tma_info_thread_clks",
+        "MetricGroup": "C0Wait",
+        "MetricName": "tma_info_system_c0_wait",
+        "MetricThreshold": "tma_info_system_c0_wait > 0.05",
+        "Unit": "cpu_core"
+    },
+    {
+        "BriefDescription": "Measured Average Core Frequency for unhalted processors [GHz]",
         "MetricExpr": "tma_info_system_turbo_utilization * TSC / 1e9 / duration_time",
         "MetricGroup": "Power;Summary",
-        "MetricName": "tma_info_system_average_frequency",
+        "MetricName": "tma_info_system_core_frequency",
         "Unit": "cpu_core"
     },
     {
-        "BriefDescription": "Average CPU Utilization",
+        "BriefDescription": "Average CPU Utilization (percentage)",
         "MetricExpr": "cpu_core@CPU_CLK_UNHALTED.REF_TSC@ / TSC",
         "MetricGroup": "HPC;Summary",
         "MetricName": "tma_info_system_cpu_utilization",
         "Unit": "cpu_core"
     },
+    {
+        "BriefDescription": "Average number of utilized CPUs",
+        "MetricExpr": "#num_cpus_online * tma_info_system_cpu_utilization",
+        "MetricGroup": "Summary",
+        "MetricName": "tma_info_system_cpus_utilized",
+        "Unit": "cpu_core"
+    },
     {
         "BriefDescription": "Average external Memory Bandwidth Use for reads and writes [GB / sec]",
         "MetricExpr": "64 * (UNC_ARB_TRK_REQUESTS.ALL + UNC_ARB_COH_TRK_REQUESTS.ALL) / 1e6 / duration_time / 1e3",
-        "MetricGroup": "HPC;Mem;MemoryBW;SoC;tma_issueBW",
+        "MetricGroup": "HPC;MemOffcore;MemoryBW;SoC;tma_issueBW",
         "MetricName": "tma_info_system_dram_bw_use",
-        "PublicDescription": "Average external Memory Bandwidth Use for reads and writes [GB / sec]. Related metrics: tma_fb_full, tma_info_bottleneck_memory_bandwidth, tma_mem_bandwidth, tma_sq_full",
+        "PublicDescription": "Average external Memory Bandwidth Use for reads and writes [GB / sec]. Related metrics: tma_fb_full, tma_info_bottleneck_cache_memory_bandwidth, tma_mem_bandwidth, tma_sq_full",
         "Unit": "cpu_core"
     },
     {
         "BriefDescription": "Giga Floating Point Operations Per Second",
-        "MetricExpr": "(cpu_core@FP_ARITH_INST_RETIRED.SCALAR_SINGLE@ + cpu_core@FP_ARITH_INST_RETIRED.SCALAR_DOUBLE@ + 2 * cpu_core@FP_ARITH_INST_RETIRED.128B_PACKED_DOUBLE@ + 4 * (cpu_core@FP_ARITH_INST_RETIRED.128B_PACKED_SINGLE@ + cpu_core@FP_ARITH_INST_RETIRED.256B_PACKED_DOUBLE@) + 8 * cpu_core@FP_ARITH_INST_RETIRED.256B_PACKED_SINGLE@) / 1e9 / duration_time",
+        "MetricExpr": "(cpu_core@FP_ARITH_INST_RETIRED.SCALAR@ + 2 * cpu_core@FP_ARITH_INST_RETIRED.128B_PACKED_DOUBLE@ + 4 * cpu_core@FP_ARITH_INST_RETIRED.4_FLOPS@ + 8 * cpu_core@FP_ARITH_INST_RETIRED.256B_PACKED_SINGLE@) / 1e9 / duration_time",
         "MetricGroup": "Cor;Flops;HPC",
         "MetricName": "tma_info_system_gflops",
-        "PublicDescription": "Giga Floating Point Operations Per Second. Aggregate across all supported options of: FP precisions, scalar and vector instructions, vector-width and AMX engine.",
+        "PublicDescription": "Giga Floating Point Operations Per Second. Aggregate across all supported options of: FP precisions, scalar and vector instructions, vector-width",
         "Unit": "cpu_core"
     },
     {
         "PublicDescription": "Average latency of data read request to external memory (in nanoseconds). Accounts for demand loads and L1/L2 prefetches. ([RKL+]memory-controller only)",
         "Unit": "cpu_core"
     },
-    {
-        "BriefDescription": "Average latency of all requests to external memory (in Uncore cycles)",
-        "MetricConstraint": "NO_GROUP_EVENTS",
-        "MetricExpr": "(UNC_ARB_TRK_OCCUPANCY.ALL + UNC_ARB_DAT_OCCUPANCY.RD) / UNC_ARB_TRK_REQUESTS.ALL",
-        "MetricGroup": "Mem;SoC",
-        "MetricName": "tma_info_system_mem_request_latency",
-        "Unit": "cpu_core"
-    },
     {
         "BriefDescription": "Fraction of cycles where both hardware Logical Processors were active",
         "MetricExpr": "(1 - cpu_core@CPU_CLK_UNHALTED.ONE_THREAD_ACTIVE@ / cpu_core@CPU_CLK_UNHALTED.REF_DISTRIBUTED@ if #SMT_on else 0)",
     },
     {
         "BriefDescription": "This metric represents overall Integer (Int) select operations fraction the CPU has executed (retired)",
-        "MetricExpr": "tma_int_vector_128b + tma_int_vector_256b + tma_shuffles",
+        "MetricExpr": "tma_int_vector_128b + tma_int_vector_256b",
         "MetricGroup": "Pipeline;TopdownL3;tma_L3_group;tma_light_operations_group",
         "MetricName": "tma_int_operations",
         "MetricThreshold": "tma_int_operations > 0.1 & tma_light_operations > 0.6",
         "Unit": "cpu_core"
     },
     {
-        "BriefDescription": "This metric represents 256-bit vector Integer ADD/SUB/SAD or VNNI (Vector Neural Network Instructions) uops fraction the CPU has retired",
+        "BriefDescription": "This metric represents 256-bit vector Integer ADD/SUB/SAD/MUL or VNNI (Vector Neural Network Instructions) uops fraction the CPU has retired",
         "MetricExpr": "(cpu_core@INT_VEC_RETIRED.ADD_256@ + cpu_core@INT_VEC_RETIRED.MUL_256@ + cpu_core@INT_VEC_RETIRED.VNNI_256@) / (tma_retiring * tma_info_thread_slots)",
         "MetricGroup": "Compute;IntVector;Pipeline;TopdownL4;tma_L4_group;tma_int_operations_group;tma_issue2P",
         "MetricName": "tma_int_vector_256b",
         "MetricThreshold": "tma_int_vector_256b > 0.1 & (tma_int_operations > 0.1 & tma_light_operations > 0.6)",
-        "PublicDescription": "This metric represents 256-bit vector Integer ADD/SUB/SAD or VNNI (Vector Neural Network Instructions) uops fraction the CPU has retired. Related metrics: tma_fp_scalar, tma_fp_vector, tma_fp_vector_128b, tma_fp_vector_256b, tma_fp_vector_512b, tma_int_vector_128b, tma_port_0, tma_port_1, tma_port_5, tma_port_6, tma_ports_utilized_2",
+        "PublicDescription": "This metric represents 256-bit vector Integer ADD/SUB/SAD/MUL or VNNI (Vector Neural Network Instructions) uops fraction the CPU has retired. Related metrics: tma_fp_scalar, tma_fp_vector, tma_fp_vector_128b, tma_fp_vector_256b, tma_fp_vector_512b, tma_int_vector_128b, tma_port_0, tma_port_1, tma_port_5, tma_port_6, tma_ports_utilized_2",
         "ScaleUnit": "100%",
         "Unit": "cpu_core"
     },
     {
         "BriefDescription": "This metric represents fraction of cycles the CPU was stalled due to Instruction TLB (ITLB) misses",
         "MetricExpr": "cpu_core@ICACHE_TAG.STALLS@ / tma_info_thread_clks",
-        "MetricGroup": "BigFoot;FetchLat;MemoryTLB;TopdownL3;tma_L3_group;tma_fetch_latency_group",
+        "MetricGroup": "BigFootprint;FetchLat;MemoryTLB;TopdownL3;tma_L3_group;tma_fetch_latency_group",
         "MetricName": "tma_itlb_misses",
         "MetricThreshold": "tma_itlb_misses > 0.05 & (tma_fetch_latency > 0.1 & tma_frontend_bound > 0.15)",
         "PublicDescription": "This metric represents fraction of cycles the CPU was stalled due to Instruction TLB (ITLB) misses. Sample with: FRONTEND_RETIRED.STLB_MISS_PS;FRONTEND_RETIRED.ITLB_MISS_PS",
     {
         "BriefDescription": "This metric estimates how often the CPU was stalled without loads missing the L1 data cache",
         "MetricExpr": "max((cpu_core@EXE_ACTIVITY.BOUND_ON_LOADS@ - cpu_core@MEMORY_ACTIVITY.STALLS_L1D_MISS@) / tma_info_thread_clks, 0)",
-        "MetricGroup": "CacheMisses;MemoryBound;TmaL3mem;TopdownL3;tma_L3_group;tma_issueL1;tma_issueMC;tma_memory_bound_group",
+        "MetricGroup": "CacheHits;MemoryBound;TmaL3mem;TopdownL3;tma_L3_group;tma_issueL1;tma_issueMC;tma_memory_bound_group",
         "MetricName": "tma_l1_bound",
         "MetricThreshold": "tma_l1_bound > 0.1 & (tma_memory_bound > 0.2 & tma_backend_bound > 0.2)",
         "PublicDescription": "This metric estimates how often the CPU was stalled without loads missing the L1 data cache.  The L1 data cache typically has the shortest latency.  However; in certain cases like loads blocked on older stores; a load might suffer due to high latency even though it is being satisfied by the L1. Another example is loads who miss in the TLB. These cases are characterized by execution unit stalls; while some non-completed demand load lives in the machine without having that demand load missing the L1 cache. Sample with: MEM_LOAD_RETIRED.L1_HIT_PS;MEM_LOAD_RETIRED.FB_HIT_PS. Related metrics: tma_clears_resteers, tma_machine_clears, tma_microcode_sequencer, tma_ms_switches, tma_ports_utilized_1",
     {
         "BriefDescription": "This metric estimates how often the CPU was stalled due to L2 cache accesses by loads",
         "MetricExpr": "(cpu_core@MEMORY_ACTIVITY.STALLS_L1D_MISS@ - cpu_core@MEMORY_ACTIVITY.STALLS_L2_MISS@) / tma_info_thread_clks",
-        "MetricGroup": "CacheMisses;MemoryBound;TmaL3mem;TopdownL3;tma_L3_group;tma_memory_bound_group",
+        "MetricGroup": "CacheHits;MemoryBound;TmaL3mem;TopdownL3;tma_L3_group;tma_memory_bound_group",
         "MetricName": "tma_l2_bound",
         "MetricThreshold": "tma_l2_bound > 0.05 & (tma_memory_bound > 0.2 & tma_backend_bound > 0.2)",
         "PublicDescription": "This metric estimates how often the CPU was stalled due to L2 cache accesses by loads.  Avoiding cache misses (i.e. L1 misses/L2 hits) can improve the latency and increase performance. Sample with: MEM_LOAD_RETIRED.L2_HIT_PS",
     {
         "BriefDescription": "This metric estimates how often the CPU was stalled due to loads accesses to L3 cache or contended with a sibling Core",
         "MetricExpr": "(cpu_core@MEMORY_ACTIVITY.STALLS_L2_MISS@ - cpu_core@MEMORY_ACTIVITY.STALLS_L3_MISS@) / tma_info_thread_clks",
-        "MetricGroup": "CacheMisses;MemoryBound;TmaL3mem;TopdownL3;tma_L3_group;tma_memory_bound_group",
+        "MetricGroup": "CacheHits;MemoryBound;TmaL3mem;TopdownL3;tma_L3_group;tma_memory_bound_group",
         "MetricName": "tma_l3_bound",
         "MetricThreshold": "tma_l3_bound > 0.05 & (tma_memory_bound > 0.2 & tma_backend_bound > 0.2)",
         "PublicDescription": "This metric estimates how often the CPU was stalled due to loads accesses to L3 cache or contended with a sibling Core.  Avoiding cache misses (i.e. L2 misses/L3 hits) can improve the latency and increase performance. Sample with: MEM_LOAD_RETIRED.L3_HIT_PS",
         "Unit": "cpu_core"
     },
     {
-        "BriefDescription": "This metric represents fraction of cycles with demand load accesses that hit the L3 cache under unloaded scenarios (possibly L3 latency limited)",
-        "MetricExpr": "9 * tma_info_system_average_frequency * cpu_core@MEM_LOAD_RETIRED.L3_HIT@ * (1 + cpu_core@MEM_LOAD_RETIRED.FB_HIT@ / cpu_core@MEM_LOAD_RETIRED.L1_MISS@ / 2) / tma_info_thread_clks",
+        "BriefDescription": "This metric estimates fraction of cycles with demand load accesses that hit the L3 cache under unloaded scenarios (possibly L3 latency limited)",
+        "MetricExpr": "9 * tma_info_system_core_frequency * (cpu_core@MEM_LOAD_RETIRED.L3_HIT@ * (1 + cpu_core@MEM_LOAD_RETIRED.FB_HIT@ / cpu_core@MEM_LOAD_RETIRED.L1_MISS@ / 2)) / tma_info_thread_clks",
         "MetricGroup": "MemoryLat;TopdownL4;tma_L4_group;tma_issueLat;tma_l3_bound_group",
         "MetricName": "tma_l3_hit_latency",
         "MetricThreshold": "tma_l3_hit_latency > 0.1 & (tma_l3_bound > 0.05 & (tma_memory_bound > 0.2 & tma_backend_bound > 0.2))",
-        "PublicDescription": "This metric represents fraction of cycles with demand load accesses that hit the L3 cache under unloaded scenarios (possibly L3 latency limited).  Avoiding private cache misses (i.e. L2 misses/L3 hits) will improve the latency; reduce contention with sibling physical cores and increase performance.  Note the value of this node may overlap with its siblings. Sample with: MEM_LOAD_RETIRED.L3_HIT_PS. Related metrics: tma_info_bottleneck_memory_latency, tma_mem_latency",
+        "PublicDescription": "This metric estimates fraction of cycles with demand load accesses that hit the L3 cache under unloaded scenarios (possibly L3 latency limited).  Avoiding private cache misses (i.e. L2 misses/L3 hits) will improve the latency; reduce contention with sibling physical cores and increase performance.  Note the value of this node may overlap with its siblings. Sample with: MEM_LOAD_RETIRED.L3_HIT_PS. Related metrics: tma_info_bottleneck_cache_memory_latency, tma_mem_latency",
         "ScaleUnit": "100%",
         "Unit": "cpu_core"
     },
         "MetricName": "tma_light_operations",
         "MetricThreshold": "tma_light_operations > 0.6",
         "MetricgroupNoGroup": "TopdownL2",
-        "PublicDescription": "This metric represents fraction of slots where the CPU was retiring light-weight operations -- instructions that require no more than one uop (micro-operation). This correlates with total number of instructions used by the program. A uops-per-instruction (see UopPI metric) ratio of 1 or less should be expected for decently optimized software running on Intel Core/Xeon products. While this often indicates efficient X86 instructions were executed; high value does not necessarily mean better performance cannot be achieved. Sample with: INST_RETIRED.PREC_DIST",
+        "PublicDescription": "This metric represents fraction of slots where the CPU was retiring light-weight operations -- instructions that require no more than one uop (micro-operation). This correlates with total number of instructions used by the program. A uops-per-instruction (see UopPI metric) ratio of 1 or less should be expected for decently optimized code running on Intel Core/Xeon products. While this often indicates efficient X86 instructions were executed; high value does not necessarily mean better performance cannot be achieved. ([ICL+] Note this may undercount due to approximation using indirect events; [ADL+] .). Sample with: INST_RETIRED.PREC_DIST",
         "ScaleUnit": "100%",
         "Unit": "cpu_core"
     },
         "MetricExpr": "(cpu_core@LSD.CYCLES_ACTIVE@ - cpu_core@LSD.CYCLES_OK@) / tma_info_core_core_clks / 2",
         "MetricGroup": "FetchBW;LSD;TopdownL3;tma_L3_group;tma_fetch_bandwidth_group",
         "MetricName": "tma_lsd",
-        "MetricThreshold": "tma_lsd > 0.15 & (tma_fetch_bandwidth > 0.1 & tma_frontend_bound > 0.15 & tma_info_thread_ipc / 6 > 0.35)",
+        "MetricThreshold": "tma_lsd > 0.15 & tma_fetch_bandwidth > 0.2",
         "PublicDescription": "This metric represents Core fraction of cycles in which CPU was likely limited due to LSD (Loop Stream Detector) unit.  LSD typically does well sustaining Uop supply. However; in some rare cases; optimal uop-delivery could not be reached for small loops whose size (in terms of number of uops) does not suit well the LSD structure.",
         "ScaleUnit": "100%",
         "Unit": "cpu_core"
         "Unit": "cpu_core"
     },
     {
-        "BriefDescription": "This metric estimates fraction of cycles where the core's performance was likely hurt due to approaching bandwidth limits of external memory (DRAM)",
+        "BriefDescription": "This metric estimates fraction of cycles where the core's performance was likely hurt due to approaching bandwidth limits of external memory - DRAM ([SPR-HBM] and/or HBM)",
         "MetricExpr": "min(cpu_core@CPU_CLK_UNHALTED.THREAD@, cpu_core@OFFCORE_REQUESTS_OUTSTANDING.ALL_DATA_RD\\,cmask\\=4@) / tma_info_thread_clks",
         "MetricGroup": "MemoryBW;Offcore;TopdownL4;tma_L4_group;tma_dram_bound_group;tma_issueBW",
         "MetricName": "tma_mem_bandwidth",
         "MetricThreshold": "tma_mem_bandwidth > 0.2 & (tma_dram_bound > 0.1 & (tma_memory_bound > 0.2 & tma_backend_bound > 0.2))",
-        "PublicDescription": "This metric estimates fraction of cycles where the core's performance was likely hurt due to approaching bandwidth limits of external memory (DRAM).  The underlying heuristic assumes that a similar off-core traffic is generated by all IA cores. This metric does not aggregate non-data-read requests by this logical processor; requests from other IA Logical Processors/Physical Cores/sockets; or other non-IA devices like GPU; hence the maximum external memory bandwidth limits may or may not be approached when this metric is flagged (see Uncore counters for that). Related metrics: tma_fb_full, tma_info_bottleneck_memory_bandwidth, tma_info_system_dram_bw_use, tma_sq_full",
+        "PublicDescription": "This metric estimates fraction of cycles where the core's performance was likely hurt due to approaching bandwidth limits of external memory - DRAM ([SPR-HBM] and/or HBM).  The underlying heuristic assumes that a similar off-core traffic is generated by all IA cores. This metric does not aggregate non-data-read requests by this logical processor; requests from other IA Logical Processors/Physical Cores/sockets; or other non-IA devices like GPU; hence the maximum external memory bandwidth limits may or may not be approached when this metric is flagged (see Uncore counters for that). Related metrics: tma_fb_full, tma_info_bottleneck_cache_memory_bandwidth, tma_info_system_dram_bw_use, tma_sq_full",
         "ScaleUnit": "100%",
         "Unit": "cpu_core"
     },
     {
-        "BriefDescription": "This metric estimates fraction of cycles where the performance was likely hurt due to latency from external memory (DRAM)",
+        "BriefDescription": "This metric estimates fraction of cycles where the performance was likely hurt due to latency from external memory - DRAM ([SPR-HBM] and/or HBM)",
         "MetricExpr": "min(cpu_core@CPU_CLK_UNHALTED.THREAD@, cpu_core@OFFCORE_REQUESTS_OUTSTANDING.CYCLES_WITH_DATA_RD@) / tma_info_thread_clks - tma_mem_bandwidth",
         "MetricGroup": "MemoryLat;Offcore;TopdownL4;tma_L4_group;tma_dram_bound_group;tma_issueLat",
         "MetricName": "tma_mem_latency",
         "MetricThreshold": "tma_mem_latency > 0.1 & (tma_dram_bound > 0.1 & (tma_memory_bound > 0.2 & tma_backend_bound > 0.2))",
-        "PublicDescription": "This metric estimates fraction of cycles where the performance was likely hurt due to latency from external memory (DRAM).  This metric does not aggregate requests from other Logical Processors/Physical Cores/sockets (see Uncore counters for that). Related metrics: tma_info_bottleneck_memory_latency, tma_l3_hit_latency",
+        "PublicDescription": "This metric estimates fraction of cycles where the performance was likely hurt due to latency from external memory - DRAM ([SPR-HBM] and/or HBM).  This metric does not aggregate requests from other Logical Processors/Physical Cores/sockets (see Uncore counters for that). Related metrics: tma_info_bottleneck_cache_memory_latency, tma_l3_hit_latency",
         "ScaleUnit": "100%",
         "Unit": "cpu_core"
     },
         "BriefDescription": "This metric represents fraction of cycles the CPU was stalled due to LFENCE Instructions.",
         "MetricConstraint": "NO_GROUP_EVENTS_NMI",
         "MetricExpr": "13 * cpu_core@MISC2_RETIRED.LFENCE@ / tma_info_thread_clks",
-        "MetricGroup": "TopdownL6;tma_L6_group;tma_serializing_operation_group",
+        "MetricGroup": "TopdownL4;tma_L4_group;tma_serializing_operation_group",
         "MetricName": "tma_memory_fence",
-        "MetricThreshold": "tma_memory_fence > 0.05 & (tma_serializing_operation > 0.1 & (tma_ports_utilized_0 > 0.2 & (tma_ports_utilization > 0.15 & (tma_core_bound > 0.1 & tma_backend_bound > 0.2))))",
+        "MetricThreshold": "tma_memory_fence > 0.05 & (tma_serializing_operation > 0.1 & (tma_core_bound > 0.1 & tma_backend_bound > 0.2))",
         "ScaleUnit": "100%",
         "Unit": "cpu_core"
     },
         "MetricGroup": "MicroSeq;TopdownL3;tma_L3_group;tma_heavy_operations_group;tma_issueMC;tma_issueMS",
         "MetricName": "tma_microcode_sequencer",
         "MetricThreshold": "tma_microcode_sequencer > 0.05 & tma_heavy_operations > 0.1",
-        "PublicDescription": "This metric represents fraction of slots the CPU was retiring uops fetched by the Microcode Sequencer (MS) unit.  The MS is used for CISC instructions not supported by the default decoders (like repeat move strings; or CPUID); or by microcode assists used to address some operation modes (like in Floating Point assists). These cases can often be avoided. Sample with: UOPS_RETIRED.MS. Related metrics: tma_clears_resteers, tma_l1_bound, tma_machine_clears, tma_ms_switches",
+        "PublicDescription": "This metric represents fraction of slots the CPU was retiring uops fetched by the Microcode Sequencer (MS) unit.  The MS is used for CISC instructions not supported by the default decoders (like repeat move strings; or CPUID); or by microcode assists used to address some operation modes (like in Floating Point assists). These cases can often be avoided. Sample with: UOPS_RETIRED.MS. Related metrics: tma_clears_resteers, tma_info_bottleneck_irregular_overhead, tma_l1_bound, tma_machine_clears, tma_ms_switches",
         "ScaleUnit": "100%",
         "Unit": "cpu_core"
     },
         "MetricExpr": "(cpu_core@IDQ.MITE_CYCLES_ANY@ - cpu_core@IDQ.MITE_CYCLES_OK@) / tma_info_core_core_clks / 2",
         "MetricGroup": "DSBmiss;FetchBW;TopdownL3;tma_L3_group;tma_fetch_bandwidth_group",
         "MetricName": "tma_mite",
-        "MetricThreshold": "tma_mite > 0.1 & (tma_fetch_bandwidth > 0.1 & tma_frontend_bound > 0.15 & tma_info_thread_ipc / 6 > 0.35)",
+        "MetricThreshold": "tma_mite > 0.1 & tma_fetch_bandwidth > 0.2",
         "PublicDescription": "This metric represents Core fraction of cycles in which CPU was likely limited due to the MITE pipeline (the legacy decode pipeline). This pipeline is used for code that was not pre-cached in the DSB or LSD. For example; inefficiencies due to asymmetric decoders; use of long immediate or LCP can manifest as MITE fetch bandwidth bottleneck. Sample with: FRONTEND_RETIRED.ANY_DSB_MISS",
         "ScaleUnit": "100%",
         "Unit": "cpu_core"
     },
     {
-        "BriefDescription": "The Mixing_Vectors metric gives the percentage of injected blend uops out of all uops issued",
+        "BriefDescription": "This metric estimates penalty in terms of percentage of([SKL+] injected blend uops out of all Uops Issued -- the Count Domain; [ADL+] cycles)",
         "MetricExpr": "160 * cpu_core@ASSISTS.SSE_AVX_MIX@ / tma_info_thread_clks",
         "MetricGroup": "TopdownL5;tma_L5_group;tma_issueMV;tma_ports_utilized_0_group",
         "MetricName": "tma_mixing_vectors",
         "MetricThreshold": "tma_mixing_vectors > 0.05",
-        "PublicDescription": "The Mixing_Vectors metric gives the percentage of injected blend uops out of all uops issued. Usually a Mixing_Vectors over 5% is worth investigating. Read more in Appendix B1 of the Optimizations Guide for this topic. Related metrics: tma_ms_switches",
+        "PublicDescription": "This metric estimates penalty in terms of percentage of([SKL+] injected blend uops out of all Uops Issued -- the Count Domain; [ADL+] cycles). Usually a Mixing_Vectors over 5% is worth investigating. Read more in Appendix B1 of the Optimizations Guide for this topic. Related metrics: tma_ms_switches",
         "ScaleUnit": "100%",
         "Unit": "cpu_core"
     },
     {
         "BriefDescription": "This metric estimates the fraction of cycles when the CPU was stalled due to switches of uop delivery to the Microcode Sequencer (MS)",
-        "MetricExpr": "3 * cpu_core@UOPS_RETIRED.MS\\,cmask\\=1\\,edge@ / (tma_retiring * tma_info_thread_slots / cpu_core@UOPS_ISSUED.ANY@) / tma_info_thread_clks",
+        "MetricExpr": "3 * cpu_core@UOPS_RETIRED.MS\\,cmask\\=1\\,edge@ / (cpu_core@UOPS_RETIRED.SLOTS@ / cpu_core@UOPS_ISSUED.ANY@) / tma_info_thread_clks",
         "MetricGroup": "FetchLat;MicroSeq;TopdownL3;tma_L3_group;tma_fetch_latency_group;tma_issueMC;tma_issueMS;tma_issueMV;tma_issueSO",
         "MetricName": "tma_ms_switches",
         "MetricThreshold": "tma_ms_switches > 0.05 & (tma_fetch_latency > 0.1 & tma_frontend_bound > 0.15)",
-        "PublicDescription": "This metric estimates the fraction of cycles when the CPU was stalled due to switches of uop delivery to the Microcode Sequencer (MS). Commonly used instructions are optimized for delivery by the DSB (decoded i-cache) or MITE (legacy instruction decode) pipelines. Certain operations cannot be handled natively by the execution pipeline; and must be performed by microcode (small programs injected into the execution stream). Switching to the MS too often can negatively impact performance. The MS is designated to deliver long uop flows required by CISC instructions like CPUID; or uncommon conditions like Floating Point Assists when dealing with Denormals. Sample with: FRONTEND_RETIRED.MS_FLOWS. Related metrics: tma_clears_resteers, tma_l1_bound, tma_machine_clears, tma_microcode_sequencer, tma_mixing_vectors, tma_serializing_operation",
+        "PublicDescription": "This metric estimates the fraction of cycles when the CPU was stalled due to switches of uop delivery to the Microcode Sequencer (MS). Commonly used instructions are optimized for delivery by the DSB (decoded i-cache) or MITE (legacy instruction decode) pipelines. Certain operations cannot be handled natively by the execution pipeline; and must be performed by microcode (small programs injected into the execution stream). Switching to the MS too often can negatively impact performance. The MS is designated to deliver long uop flows required by CISC instructions like CPUID; or uncommon conditions like Floating Point Assists when dealing with Denormals. Sample with: FRONTEND_RETIRED.MS_FLOWS. Related metrics: tma_clears_resteers, tma_info_bottleneck_irregular_overhead, tma_l1_bound, tma_machine_clears, tma_microcode_sequencer, tma_mixing_vectors, tma_serializing_operation",
         "ScaleUnit": "100%",
         "Unit": "cpu_core"
     },
     {
         "BriefDescription": "This metric represents fraction of slots where the CPU was retiring branch instructions that were not fused",
         "MetricExpr": "tma_light_operations * (cpu_core@BR_INST_RETIRED.ALL_BRANCHES@ - cpu_core@INST_RETIRED.MACRO_FUSED@) / (tma_retiring * tma_info_thread_slots)",
-        "MetricGroup": "Pipeline;TopdownL3;tma_L3_group;tma_light_operations_group",
+        "MetricGroup": "Branches;Pipeline;TopdownL3;tma_L3_group;tma_light_operations_group",
         "MetricName": "tma_non_fused_branches",
         "MetricThreshold": "tma_non_fused_branches > 0.1 & tma_light_operations > 0.6",
         "PublicDescription": "This metric represents fraction of slots where the CPU was retiring branch instructions that were not fused. Non-conditional branches like direct JMP or CALL would count here. Can be used to examine fusible conditional jumps that were not fused.",
     {
         "BriefDescription": "This metric represents fraction of slots where the CPU was retiring NOP (no op) instructions",
         "MetricExpr": "tma_light_operations * cpu_core@INST_RETIRED.NOP@ / (tma_retiring * tma_info_thread_slots)",
-        "MetricGroup": "Pipeline;TopdownL3;tma_L3_group;tma_light_operations_group",
+        "MetricGroup": "Pipeline;TopdownL4;tma_L4_group;tma_other_light_ops_group",
         "MetricName": "tma_nop_instructions",
-        "MetricThreshold": "tma_nop_instructions > 0.1 & tma_light_operations > 0.6",
+        "MetricThreshold": "tma_nop_instructions > 0.1 & (tma_other_light_ops > 0.3 & tma_light_operations > 0.6)",
         "PublicDescription": "This metric represents fraction of slots where the CPU was retiring NOP (no op) instructions. Compilers often use NOPs for certain address alignments - e.g. start address of a function or loop body. Sample with: INST_RETIRED.NOP",
         "ScaleUnit": "100%",
         "Unit": "cpu_core"
     },
     {
         "BriefDescription": "This metric represents the remaining light uops fraction the CPU has executed - remaining means not covered by other sibling nodes",
-        "MetricExpr": "max(0, tma_light_operations - (tma_fp_arith + tma_int_operations + tma_memory_operations + tma_fused_instructions + tma_non_fused_branches + tma_nop_instructions))",
+        "MetricExpr": "max(0, tma_light_operations - (tma_fp_arith + tma_int_operations + tma_memory_operations + tma_fused_instructions + tma_non_fused_branches))",
         "MetricGroup": "Pipeline;TopdownL3;tma_L3_group;tma_light_operations_group",
         "MetricName": "tma_other_light_ops",
         "MetricThreshold": "tma_other_light_ops > 0.3 & tma_light_operations > 0.6",
         "ScaleUnit": "100%",
         "Unit": "cpu_core"
     },
+    {
+        "BriefDescription": "This metric estimates fraction of slots the CPU was stalled due to other cases of misprediction (non-retired x86 branches or other types).",
+        "MetricExpr": "max(tma_branch_mispredicts * (1 - cpu_core@BR_MISP_RETIRED.ALL_BRANCHES@ / (cpu_core@INT_MISC.CLEARS_COUNT@ - cpu_core@MACHINE_CLEARS.COUNT@)), 0.0001)",
+        "MetricGroup": "BrMispredicts;TopdownL3;tma_L3_group;tma_branch_mispredicts_group",
+        "MetricName": "tma_other_mispredicts",
+        "MetricThreshold": "tma_other_mispredicts > 0.05 & (tma_branch_mispredicts > 0.1 & tma_bad_speculation > 0.15)",
+        "ScaleUnit": "100%",
+        "Unit": "cpu_core"
+    },
+    {
+        "BriefDescription": "This metric represents fraction of slots the CPU has wasted due to Nukes (Machine Clears) not related to memory ordering.",
+        "MetricExpr": "max(tma_machine_clears * (1 - cpu_core@MACHINE_CLEARS.MEMORY_ORDERING@ / cpu_core@MACHINE_CLEARS.COUNT@), 0.0001)",
+        "MetricGroup": "Machine_Clears;TopdownL3;tma_L3_group;tma_machine_clears_group",
+        "MetricName": "tma_other_nukes",
+        "MetricThreshold": "tma_other_nukes > 0.05 & (tma_machine_clears > 0.1 & tma_bad_speculation > 0.15)",
+        "ScaleUnit": "100%",
+        "Unit": "cpu_core"
+    },
     {
         "BriefDescription": "This metric roughly estimates fraction of slots the CPU retired uops as a result of handing Page Faults",
         "MetricExpr": "99 * cpu_core@ASSISTS.PAGE_FAULT@ / tma_info_thread_slots",
         "Unit": "cpu_core"
     },
     {
-        "BriefDescription": "This metric represents Core fraction of cycles CPU dispatched uops on execution port 6 ([HSW+]Primary Branch and simple ALU)",
+        "BriefDescription": "This metric represents Core fraction of cycles CPU dispatched uops on execution port 6 ([HSW+] Primary Branch and simple ALU)",
         "MetricExpr": "cpu_core@UOPS_DISPATCHED.PORT_6@ / tma_info_core_core_clks",
         "MetricGroup": "TopdownL6;tma_L6_group;tma_alu_op_utilization_group;tma_issue2P",
         "MetricName": "tma_port_6",
         "MetricThreshold": "tma_port_6 > 0.6",
-        "PublicDescription": "This metric represents Core fraction of cycles CPU dispatched uops on execution port 6 ([HSW+]Primary Branch and simple ALU). Sample with: UOPS_DISPATCHED.PORT_6. Related metrics: tma_fp_scalar, tma_fp_vector, tma_fp_vector_128b, tma_fp_vector_256b, tma_fp_vector_512b, tma_int_vector_128b, tma_int_vector_256b, tma_port_0, tma_port_1, tma_port_5, tma_ports_utilized_2",
+        "PublicDescription": "This metric represents Core fraction of cycles CPU dispatched uops on execution port 6 ([HSW+] Primary Branch and simple ALU). Sample with: UOPS_DISPATCHED.PORT_6. Related metrics: tma_fp_scalar, tma_fp_vector, tma_fp_vector_128b, tma_fp_vector_256b, tma_fp_vector_512b, tma_int_vector_128b, tma_int_vector_256b, tma_port_0, tma_port_1, tma_port_5, tma_ports_utilized_2",
         "ScaleUnit": "100%",
         "Unit": "cpu_core"
     },
     {
         "BriefDescription": "This metric estimates fraction of cycles the CPU performance was potentially limited due to Core computation issues (non divider-related)",
-        "MetricExpr": "((cpu_core@EXE_ACTIVITY.3_PORTS_UTIL\\,umask\\=0x80@ + tma_serializing_operation * (cpu_core@CYCLE_ACTIVITY.STALLS_TOTAL@ - cpu_core@EXE_ACTIVITY.BOUND_ON_LOADS@) + (cpu_core@EXE_ACTIVITY.1_PORTS_UTIL@ + tma_retiring * cpu_core@EXE_ACTIVITY.2_PORTS_UTIL\\,umask\\=0xc@)) / tma_info_thread_clks if cpu_core@ARITH.DIV_ACTIVE@ < cpu_core@CYCLE_ACTIVITY.STALLS_TOTAL@ - cpu_core@EXE_ACTIVITY.BOUND_ON_LOADS@ else (cpu_core@EXE_ACTIVITY.1_PORTS_UTIL@ + tma_retiring * cpu_core@EXE_ACTIVITY.2_PORTS_UTIL\\,umask\\=0xc@) / tma_info_thread_clks)",
+        "MetricExpr": "((tma_ports_utilized_0 * tma_info_thread_clks + (cpu_core@EXE_ACTIVITY.1_PORTS_UTIL@ + tma_retiring * cpu_core@EXE_ACTIVITY.2_PORTS_UTIL\\,umask\\=0xc@)) / tma_info_thread_clks if cpu_core@ARITH.DIV_ACTIVE@ < cpu_core@CYCLE_ACTIVITY.STALLS_TOTAL@ - cpu_core@EXE_ACTIVITY.BOUND_ON_LOADS@ else (cpu_core@EXE_ACTIVITY.1_PORTS_UTIL@ + tma_retiring * cpu_core@EXE_ACTIVITY.2_PORTS_UTIL\\,umask\\=0xc@) / tma_info_thread_clks)",
         "MetricGroup": "PortsUtil;TopdownL3;tma_L3_group;tma_core_bound_group",
         "MetricName": "tma_ports_utilization",
         "MetricThreshold": "tma_ports_utilization > 0.15 & (tma_core_bound > 0.1 & tma_backend_bound > 0.2)",
     },
     {
         "BriefDescription": "This metric represents fraction of cycles CPU executed no uops on any execution port (Logical Processor cycles since ICL, Physical Core cycles otherwise)",
-        "MetricExpr": "cpu_core@EXE_ACTIVITY.3_PORTS_UTIL\\,umask\\=0x80@ / tma_info_thread_clks + tma_serializing_operation * (cpu_core@CYCLE_ACTIVITY.STALLS_TOTAL@ - cpu_core@EXE_ACTIVITY.BOUND_ON_LOADS@) / tma_info_thread_clks",
+        "MetricExpr": "(cpu_core@EXE_ACTIVITY.3_PORTS_UTIL\\,umask\\=0x80@ + cpu_core@RS.EMPTY\\,umask\\=1@) / tma_info_thread_clks * (cpu_core@CYCLE_ACTIVITY.STALLS_TOTAL@ - cpu_core@EXE_ACTIVITY.BOUND_ON_LOADS@) / tma_info_thread_clks",
         "MetricGroup": "PortsUtil;TopdownL4;tma_L4_group;tma_ports_utilization_group",
         "MetricName": "tma_ports_utilized_0",
         "MetricThreshold": "tma_ports_utilized_0 > 0.2 & (tma_ports_utilization > 0.15 & (tma_core_bound > 0.1 & tma_backend_bound > 0.2))",
         "MetricExpr": "cpu_core@UOPS_EXECUTED.CYCLES_GE_3@ / tma_info_thread_clks",
         "MetricGroup": "PortsUtil;TopdownL4;tma_L4_group;tma_ports_utilization_group",
         "MetricName": "tma_ports_utilized_3m",
-        "MetricThreshold": "tma_ports_utilized_3m > 0.7 & (tma_ports_utilization > 0.15 & (tma_core_bound > 0.1 & tma_backend_bound > 0.2))",
+        "MetricThreshold": "tma_ports_utilized_3m > 0.4 & (tma_ports_utilization > 0.15 & (tma_core_bound > 0.1 & tma_backend_bound > 0.2))",
         "PublicDescription": "This metric represents fraction of cycles CPU executed total of 3 or more uops per cycle on all execution ports (Logical Processor cycles since ICL, Physical Core cycles otherwise). Sample with: UOPS_EXECUTED.CYCLES_GE_3",
         "ScaleUnit": "100%",
         "Unit": "cpu_core"
     },
     {
         "BriefDescription": "This metric represents fraction of cycles the CPU issue-pipeline was stalled due to serializing operations",
-        "MetricExpr": "cpu_core@RESOURCE_STALLS.SCOREBOARD@ / tma_info_thread_clks",
-        "MetricGroup": "PortsUtil;TopdownL5;tma_L5_group;tma_issueSO;tma_ports_utilized_0_group",
+        "MetricExpr": "cpu_core@RESOURCE_STALLS.SCOREBOARD@ / tma_info_thread_clks + tma_c02_wait",
+        "MetricGroup": "PortsUtil;TopdownL3;tma_L3_group;tma_core_bound_group;tma_issueSO",
         "MetricName": "tma_serializing_operation",
-        "MetricThreshold": "tma_serializing_operation > 0.1 & (tma_ports_utilized_0 > 0.2 & (tma_ports_utilization > 0.15 & (tma_core_bound > 0.1 & tma_backend_bound > 0.2)))",
+        "MetricThreshold": "tma_serializing_operation > 0.1 & (tma_core_bound > 0.1 & tma_backend_bound > 0.2)",
         "PublicDescription": "This metric represents fraction of cycles the CPU issue-pipeline was stalled due to serializing operations. Instructions like CPUID; WRMSR or LFENCE serialize the out-of-order execution which may limit performance. Sample with: RESOURCE_STALLS.SCOREBOARD. Related metrics: tma_ms_switches",
         "ScaleUnit": "100%",
         "Unit": "cpu_core"
     },
     {
-        "BriefDescription": "This metric represents Shuffle (cross \"vector lane\" data transfers) uops fraction the CPU has retired.",
-        "MetricExpr": "cpu_core@INT_VEC_RETIRED.SHUFFLES@ / (tma_retiring * tma_info_thread_slots)",
-        "MetricGroup": "HPC;Pipeline;TopdownL4;tma_L4_group;tma_int_operations_group",
-        "MetricName": "tma_shuffles",
-        "MetricThreshold": "tma_shuffles > 0.1 & (tma_int_operations > 0.1 & tma_light_operations > 0.6)",
+        "BriefDescription": "This metric represents fraction of slots where the CPU was retiring Shuffle operations of 256-bit vector size (FP or Integer)",
+        "MetricExpr": "tma_light_operations * cpu_core@INT_VEC_RETIRED.SHUFFLES@ / (tma_retiring * tma_info_thread_slots)",
+        "MetricGroup": "HPC;Pipeline;TopdownL4;tma_L4_group;tma_other_light_ops_group",
+        "MetricName": "tma_shuffles_256b",
+        "MetricThreshold": "tma_shuffles_256b > 0.1 & (tma_other_light_ops > 0.3 & tma_light_operations > 0.6)",
+        "PublicDescription": "This metric represents fraction of slots where the CPU was retiring Shuffle operations of 256-bit vector size (FP or Integer). Shuffles may incur slow cross \"vector lane\" data transfers.",
         "ScaleUnit": "100%",
         "Unit": "cpu_core"
     },
         "BriefDescription": "This metric represents fraction of cycles the CPU was stalled due to PAUSE Instructions",
         "MetricConstraint": "NO_GROUP_EVENTS_NMI",
         "MetricExpr": "cpu_core@CPU_CLK_UNHALTED.PAUSE@ / tma_info_thread_clks",
-        "MetricGroup": "TopdownL6;tma_L6_group;tma_serializing_operation_group",
+        "MetricGroup": "TopdownL4;tma_L4_group;tma_serializing_operation_group",
         "MetricName": "tma_slow_pause",
-        "MetricThreshold": "tma_slow_pause > 0.05 & (tma_serializing_operation > 0.1 & (tma_ports_utilized_0 > 0.2 & (tma_ports_utilization > 0.15 & (tma_core_bound > 0.1 & tma_backend_bound > 0.2))))",
+        "MetricThreshold": "tma_slow_pause > 0.05 & (tma_serializing_operation > 0.1 & (tma_core_bound > 0.1 & tma_backend_bound > 0.2))",
         "PublicDescription": "This metric represents fraction of cycles the CPU was stalled due to PAUSE Instructions. Sample with: CPU_CLK_UNHALTED.PAUSE_INST",
         "ScaleUnit": "100%",
         "Unit": "cpu_core"
         "MetricGroup": "MemoryBW;Offcore;TopdownL4;tma_L4_group;tma_issueBW;tma_l3_bound_group",
         "MetricName": "tma_sq_full",
         "MetricThreshold": "tma_sq_full > 0.3 & (tma_l3_bound > 0.05 & (tma_memory_bound > 0.2 & tma_backend_bound > 0.2))",
-        "PublicDescription": "This metric measures fraction of cycles where the Super Queue (SQ) was full taking into account all request-types and both hardware SMT threads (Logical Processors). Related metrics: tma_fb_full, tma_info_bottleneck_memory_bandwidth, tma_info_system_dram_bw_use, tma_mem_bandwidth",
+        "PublicDescription": "This metric measures fraction of cycles where the Super Queue (SQ) was full taking into account all request-types and both hardware SMT threads (Logical Processors). Related metrics: tma_fb_full, tma_info_bottleneck_cache_memory_bandwidth, tma_info_system_dram_bw_use, tma_mem_bandwidth",
         "ScaleUnit": "100%",
         "Unit": "cpu_core"
     },
     {
         "BriefDescription": "This metric represents fraction of cycles the CPU was stalled due to new branch address clears",
         "MetricExpr": "cpu_core@INT_MISC.UNKNOWN_BRANCH_CYCLES@ / tma_info_thread_clks",
-        "MetricGroup": "BigFoot;FetchLat;TopdownL4;tma_L4_group;tma_branch_resteers_group",
+        "MetricGroup": "BigFootprint;FetchLat;TopdownL4;tma_L4_group;tma_branch_resteers_group",
         "MetricName": "tma_unknown_branches",
         "MetricThreshold": "tma_unknown_branches > 0.05 & (tma_branch_resteers > 0.05 & (tma_fetch_latency > 0.1 & tma_frontend_bound > 0.15))",
-        "PublicDescription": "This metric represents fraction of cycles the CPU was stalled due to new branch address clears. These are fetched branches the Branch Prediction Unit was unable to recognize (e.g. first time the branch is fetched or hitting BTB capacity limit). Sample with: FRONTEND_RETIRED.UNKNOWN_BRANCH",
+        "PublicDescription": "This metric represents fraction of cycles the CPU was stalled due to new branch address clears. These are fetched branches the Branch Prediction Unit was unable to recognize (e.g. first time the branch is fetched or hitting BTB capacity limit) hence called Unknown Branches. Sample with: FRONTEND_RETIRED.UNKNOWN_BRANCH",
         "ScaleUnit": "100%",
         "Unit": "cpu_core"
     },
index c8ba96c4a7f827b2f80498cbfaf4fe731bd6ac8b..cd291943dc081caf950dcbe8d4c86f3cc80c48e0 100644 (file)
@@ -26,7 +26,7 @@
         "Unit": "cpu_core"
     },
     {
-        "BriefDescription": "FP_ARITH_DISPATCHED.PORT_0",
+        "BriefDescription": "FP_ARITH_DISPATCHED.PORT_0 [This event is alias to FP_ARITH_DISPATCHED.V0]",
         "EventCode": "0xb3",
         "EventName": "FP_ARITH_DISPATCHED.PORT_0",
         "SampleAfterValue": "2000003",
@@ -34,7 +34,7 @@
         "Unit": "cpu_core"
     },
     {
-        "BriefDescription": "FP_ARITH_DISPATCHED.PORT_1",
+        "BriefDescription": "FP_ARITH_DISPATCHED.PORT_1 [This event is alias to FP_ARITH_DISPATCHED.V1]",
         "EventCode": "0xb3",
         "EventName": "FP_ARITH_DISPATCHED.PORT_1",
         "SampleAfterValue": "2000003",
         "Unit": "cpu_core"
     },
     {
-        "BriefDescription": "FP_ARITH_DISPATCHED.PORT_5",
+        "BriefDescription": "FP_ARITH_DISPATCHED.PORT_5 [This event is alias to FP_ARITH_DISPATCHED.V2]",
         "EventCode": "0xb3",
         "EventName": "FP_ARITH_DISPATCHED.PORT_5",
         "SampleAfterValue": "2000003",
         "UMask": "0x4",
         "Unit": "cpu_core"
     },
+    {
+        "BriefDescription": "FP_ARITH_DISPATCHED.V0 [This event is alias to FP_ARITH_DISPATCHED.PORT_0]",
+        "EventCode": "0xb3",
+        "EventName": "FP_ARITH_DISPATCHED.V0",
+        "SampleAfterValue": "2000003",
+        "UMask": "0x1",
+        "Unit": "cpu_core"
+    },
+    {
+        "BriefDescription": "FP_ARITH_DISPATCHED.V1 [This event is alias to FP_ARITH_DISPATCHED.PORT_1]",
+        "EventCode": "0xb3",
+        "EventName": "FP_ARITH_DISPATCHED.V1",
+        "SampleAfterValue": "2000003",
+        "UMask": "0x2",
+        "Unit": "cpu_core"
+    },
+    {
+        "BriefDescription": "FP_ARITH_DISPATCHED.V2 [This event is alias to FP_ARITH_DISPATCHED.PORT_5]",
+        "EventCode": "0xb3",
+        "EventName": "FP_ARITH_DISPATCHED.V2",
+        "SampleAfterValue": "2000003",
+        "UMask": "0x4",
+        "Unit": "cpu_core"
+    },
     {
         "BriefDescription": "Counts number of SSE/AVX computational 128-bit packed double precision floating-point instructions retired; some instructions will count twice as noted below.  Each count represents 2 computation operations, one for each element.  Applies to SSE* and AVX* packed double precision floating-point instructions: ADD SUB HADD HSUB SUBADD MUL DIV MIN MAX SQRT DPP FM(N)ADD/SUB.  DPP and FM(N)ADD/SUB instructions count twice as they perform 2 calculations per element.",
         "EventCode": "0xc7",
index 516eb0f93f020f8bf466fd8a68574ec1dd59357e..7a03835f262c193fdd67c35d3a3d50701e9e9f26 100644 (file)
@@ -2,10 +2,11 @@
     "Backend": "Grouping from Top-down Microarchitecture Analysis Metrics spreadsheet",
     "Bad": "Grouping from Top-down Microarchitecture Analysis Metrics spreadsheet",
     "BadSpec": "Grouping from Top-down Microarchitecture Analysis Metrics spreadsheet",
-    "BigFoot": "Grouping from Top-down Microarchitecture Analysis Metrics spreadsheet",
+    "BigFootprint": "Grouping from Top-down Microarchitecture Analysis Metrics spreadsheet",
     "BrMispredicts": "Grouping from Top-down Microarchitecture Analysis Metrics spreadsheet",
     "Branches": "Grouping from Top-down Microarchitecture Analysis Metrics spreadsheet",
-    "CacheMisses": "Grouping from Top-down Microarchitecture Analysis Metrics spreadsheet",
+    "C0Wait": "Grouping from Top-down Microarchitecture Analysis Metrics spreadsheet",
+    "CacheHits": "Grouping from Top-down Microarchitecture Analysis Metrics spreadsheet",
     "CodeGen": "Grouping from Top-down Microarchitecture Analysis Metrics spreadsheet",
     "Compute": "Grouping from Top-down Microarchitecture Analysis Metrics spreadsheet",
     "Cor": "Grouping from Top-down Microarchitecture Analysis Metrics spreadsheet",
@@ -26,7 +27,9 @@
     "L2Evicts": "Grouping from Top-down Microarchitecture Analysis Metrics spreadsheet",
     "LSD": "Grouping from Top-down Microarchitecture Analysis Metrics spreadsheet",
     "MachineClears": "Grouping from Top-down Microarchitecture Analysis Metrics spreadsheet",
+    "Machine_Clears": "Grouping from Top-down Microarchitecture Analysis Metrics spreadsheet",
     "Mem": "Grouping from Top-down Microarchitecture Analysis Metrics spreadsheet",
+    "MemOffcore": "Grouping from Top-down Microarchitecture Analysis Metrics spreadsheet",
     "MemoryBW": "Grouping from Top-down Microarchitecture Analysis Metrics spreadsheet",
     "MemoryBound": "Grouping from Top-down Microarchitecture Analysis Metrics spreadsheet",
     "MemoryLat": "Grouping from Top-down Microarchitecture Analysis Metrics spreadsheet",
@@ -69,6 +72,7 @@
     "tma_backend_bound_group": "Metrics contributing to tma_backend_bound category",
     "tma_bad_speculation_group": "Metrics contributing to tma_bad_speculation category",
     "tma_base_group": "Metrics contributing to tma_base category",
+    "tma_branch_mispredicts_group": "Metrics contributing to tma_branch_mispredicts category",
     "tma_branch_resteers_group": "Metrics contributing to tma_branch_resteers category",
     "tma_core_bound_group": "Metrics contributing to tma_core_bound category",
     "tma_dram_bound_group": "Metrics contributing to tma_dram_bound category",
@@ -82,9 +86,9 @@
     "tma_heavy_operations_group": "Metrics contributing to tma_heavy_operations category",
     "tma_int_operations_group": "Metrics contributing to tma_int_operations category",
     "tma_issue2P": "Metrics related by the issue $issue2P",
-    "tma_issueBC": "Metrics related by the issue $issueBC",
     "tma_issueBM": "Metrics related by the issue $issueBM",
     "tma_issueBW": "Metrics related by the issue $issueBW",
+    "tma_issueComp": "Metrics related by the issue $issueComp",
     "tma_issueD0": "Metrics related by the issue $issueD0",
     "tma_issueFB": "Metrics related by the issue $issueFB",
     "tma_issueFL": "Metrics related by the issue $issueFL",
     "tma_microcode_sequencer_group": "Metrics contributing to tma_microcode_sequencer category",
     "tma_mite_group": "Metrics contributing to tma_mite category",
     "tma_nuke_group": "Metrics contributing to tma_nuke category",
+    "tma_other_light_ops_group": "Metrics contributing to tma_other_light_ops category",
     "tma_ports_utilization_group": "Metrics contributing to tma_ports_utilization category",
     "tma_ports_utilized_0_group": "Metrics contributing to tma_ports_utilized_0 category",
     "tma_ports_utilized_3m_group": "Metrics contributing to tma_ports_utilized_3m category",
index 1db73e02021516f8a5e526f356a95c181325ece9..5250a17d9caedea9d43743bbe21717efde5a0181 100644 (file)
         "UMask": "0x8",
         "Unit": "cpu_core"
     },
+    {
+        "BriefDescription": "This event is deprecated. [This event is alias to MISC_RETIRED.LBR_INSERTS]",
+        "Deprecated": "1",
+        "EventCode": "0xe4",
+        "EventName": "LBR_INSERTS.ANY",
+        "PEBS": "1",
+        "SampleAfterValue": "1000003",
+        "UMask": "0x1",
+        "Unit": "cpu_atom"
+    },
     {
         "BriefDescription": "Counts modified writebacks from L1 cache and L2 cache that have any type of response.",
         "EventCode": "0xB7",
index f9876bef16da19b58e112c0bc6a24f569f286f3f..df6032e816d462961194ad36ee1ac5592da66ac7 100644 (file)
         "BriefDescription": "INST_RETIRED.MACRO_FUSED",
         "EventCode": "0xc0",
         "EventName": "INST_RETIRED.MACRO_FUSED",
+        "PEBS": "1",
         "SampleAfterValue": "2000003",
         "UMask": "0x10",
         "Unit": "cpu_core"
         "BriefDescription": "Retired NOP instructions.",
         "EventCode": "0xc0",
         "EventName": "INST_RETIRED.NOP",
+        "PEBS": "1",
         "PublicDescription": "Counts all retired NOP or ENDBR32/64 instructions",
         "SampleAfterValue": "2000003",
         "UMask": "0x2",
         "BriefDescription": "Iterations of Repeat string retired instructions.",
         "EventCode": "0xc0",
         "EventName": "INST_RETIRED.REP_ITERATION",
+        "PEBS": "1",
         "PublicDescription": "Number of iterations of Repeat (REP) string retired instructions such as MOVS, CMPS, and SCAS. Each has a byte, word, and doubleword version and string instructions can be repeated using a repetition prefix, REP, that allows their architectural execution to be repeated a number of times as specified by the RCX register. Note the number of iterations is implementation-dependent.",
         "SampleAfterValue": "2000003",
         "UMask": "0x8",
         "UMask": "0x20",
         "Unit": "cpu_core"
     },
+    {
+        "BriefDescription": "Counts the number of LBR entries recorded. Requires LBRs to be enabled in IA32_LBR_CTL. [This event is alias to LBR_INSERTS.ANY]",
+        "EventCode": "0xe4",
+        "EventName": "MISC_RETIRED.LBR_INSERTS",
+        "PEBS": "1",
+        "PublicDescription": "Counts the number of LBR entries recorded. Requires LBRs to be enabled in IA32_LBR_CTL. This event is PDIR on GP0 and NPEBS on all other GPs [This event is alias to LBR_INSERTS.ANY]",
+        "SampleAfterValue": "1000003",
+        "UMask": "0x1",
+        "Unit": "cpu_atom"
+    },
     {
         "BriefDescription": "Increments whenever there is an update to the LBR array.",
         "EventCode": "0xcc",
index 6336de61f628fb07e20d77170df2d7f37a9a8a5f..ccc892149dbe727eff0ff233d3e673252eab50d3 100644 (file)
@@ -1,4 +1,13 @@
 [
+    {
+        "BriefDescription": "This event is deprecated. [This event is alias to MISC_RETIRED.LBR_INSERTS]",
+        "Deprecated": "1",
+        "EventCode": "0xe4",
+        "EventName": "LBR_INSERTS.ANY",
+        "PEBS": "1",
+        "SampleAfterValue": "1000003",
+        "UMask": "0x1"
+    },
     {
         "BriefDescription": "Counts modified writebacks from L1 cache and L2 cache that have any type of response.",
         "EventCode": "0xB7",
index 3153bab527a985657e15fd28d065c8e14f645706..846bcdafca6d8e78502d245c242db1d26fd4410e 100644 (file)
         "SampleAfterValue": "20003",
         "UMask": "0x1"
     },
+    {
+        "BriefDescription": "Counts the number of LBR entries recorded. Requires LBRs to be enabled in IA32_LBR_CTL. [This event is alias to LBR_INSERTS.ANY]",
+        "EventCode": "0xe4",
+        "EventName": "MISC_RETIRED.LBR_INSERTS",
+        "PEBS": "1",
+        "PublicDescription": "Counts the number of LBR entries recorded. Requires LBRs to be enabled in IA32_LBR_CTL. This event is PDIR on GP0 and NPEBS on all other GPs [This event is alias to LBR_INSERTS.ANY]",
+        "SampleAfterValue": "1000003",
+        "UMask": "0x1"
+    },
     {
         "BriefDescription": "Counts the number of issue slots not consumed by the backend due to a micro-sequencer (MS) scoreboard, which stalls the front-end from issuing from the UROM until a specified older uop retires.",
         "EventCode": "0x75",
index ecbe9660b2b31b72760609bcb771fd6e8475c856..e6d710cf3ce29849957c26bba5b322c52478c102 100644 (file)
     "EventCode": "0xac",
     "BriefDescription": "Average sampled latency when data is sourced from DRAM in the same NUMA node.",
     "UMask": "0x01",
+    "EnAllCores": "0x1",
+    "EnAllSlices": "0x1",
+    "SliceId": "0x3",
+    "ThreadMask": "0x3",
     "Unit": "L3PMC"
   },
   {
     "EventCode": "0xac",
     "BriefDescription": "Average sampled latency when data is sourced from DRAM in a different NUMA node.",
     "UMask": "0x02",
+    "EnAllCores": "0x1",
+    "EnAllSlices": "0x1",
+    "SliceId": "0x3",
+    "ThreadMask": "0x3",
     "Unit": "L3PMC"
   },
   {
     "EventCode": "0xac",
     "BriefDescription": "Average sampled latency when data is sourced from another CCX's cache when the address was in the same NUMA node.",
     "UMask": "0x04",
+    "EnAllCores": "0x1",
+    "EnAllSlices": "0x1",
+    "SliceId": "0x3",
+    "ThreadMask": "0x3",
     "Unit": "L3PMC"
   },
   {
     "EventCode": "0xac",
     "BriefDescription": "Average sampled latency when data is sourced from another CCX's cache when the address was in a different NUMA node.",
     "UMask": "0x08",
+    "EnAllCores": "0x1",
+    "EnAllSlices": "0x1",
+    "SliceId": "0x3",
+    "ThreadMask": "0x3",
     "Unit": "L3PMC"
   },
   {
     "EventCode": "0xac",
     "BriefDescription": "Average sampled latency when data is sourced from extension memory (CXL) in the same NUMA node.",
     "UMask": "0x10",
+    "EnAllCores": "0x1",
+    "EnAllSlices": "0x1",
+    "SliceId": "0x3",
+    "ThreadMask": "0x3",
     "Unit": "L3PMC"
   },
   {
     "EventCode": "0xac",
     "BriefDescription": "Average sampled latency when data is sourced from extension memory (CXL) in a different NUMA node.",
     "UMask": "0x20",
+    "EnAllCores": "0x1",
+    "EnAllSlices": "0x1",
+    "SliceId": "0x3",
+    "ThreadMask": "0x3",
     "Unit": "L3PMC"
   },
   {
     "EventCode": "0xac",
     "BriefDescription": "Average sampled latency from all data sources.",
     "UMask": "0x3f",
+    "EnAllCores": "0x1",
+    "EnAllSlices": "0x1",
+    "SliceId": "0x3",
+    "ThreadMask": "0x3",
     "Unit": "L3PMC"
   },
   {
     "EventCode": "0xad",
     "BriefDescription": "L3 cache fill requests sourced from DRAM in the same NUMA node.",
     "UMask": "0x01",
+    "EnAllCores": "0x1",
+    "EnAllSlices": "0x1",
+    "SliceId": "0x3",
+    "ThreadMask": "0x3",
     "Unit": "L3PMC"
   },
   {
     "EventCode": "0xad",
     "BriefDescription": "L3 cache fill requests sourced from DRAM in a different NUMA node.",
     "UMask": "0x02",
+    "EnAllCores": "0x1",
+    "EnAllSlices": "0x1",
+    "SliceId": "0x3",
+    "ThreadMask": "0x3",
     "Unit": "L3PMC"
   },
   {
     "EventCode": "0xad",
     "BriefDescription": "L3 cache fill requests sourced from another CCX's cache when the address was in the same NUMA node.",
     "UMask": "0x04",
+    "EnAllCores": "0x1",
+    "EnAllSlices": "0x1",
+    "SliceId": "0x3",
+    "ThreadMask": "0x3",
     "Unit": "L3PMC"
   },
   {
     "EventCode": "0xad",
     "BriefDescription": "L3 cache fill requests sourced from another CCX's cache when the address was in a different NUMA node.",
     "UMask": "0x08",
+    "EnAllCores": "0x1",
+    "EnAllSlices": "0x1",
+    "SliceId": "0x3",
+    "ThreadMask": "0x3",
     "Unit": "L3PMC"
   },
   {
     "EventCode": "0xad",
     "BriefDescription": "L3 cache fill requests sourced from extension memory (CXL) in the same NUMA node.",
     "UMask": "0x10",
+    "EnAllCores": "0x1",
+    "EnAllSlices": "0x1",
+    "SliceId": "0x3",
+    "ThreadMask": "0x3",
     "Unit": "L3PMC"
   },
   {
     "EventCode": "0xad",
     "BriefDescription": "L3 cache fill requests sourced from extension memory (CXL) in a different NUMA node.",
     "UMask": "0x20",
+    "EnAllCores": "0x1",
+    "EnAllSlices": "0x1",
+    "SliceId": "0x3",
+    "ThreadMask": "0x3",
     "Unit": "L3PMC"
   },
   {
     "EventCode": "0xad",
     "BriefDescription": "L3 cache fill requests sourced from all data sources.",
     "UMask": "0x3f",
+    "EnAllCores": "0x1",
+    "EnAllSlices": "0x1",
+    "SliceId": "0x3",
+    "ThreadMask": "0x3",
     "Unit": "L3PMC"
   }
 ]
index 55a10b0bf36f6ab3ddeb7ed25fb9e71122d45cb7..c20833fb1f5852a3c077a8ae518516fe000d44cf 100644 (file)
         "MetricExpr": "(UOPS_DISPATCHED_PORT.PORT_0 + UOPS_DISPATCHED_PORT.PORT_1 + UOPS_DISPATCHED_PORT.PORT_5 + UOPS_DISPATCHED_PORT.PORT_6) / tma_info_thread_slots",
         "MetricGroup": "TopdownL5;tma_L5_group;tma_ports_utilized_3m_group",
         "MetricName": "tma_alu_op_utilization",
-        "MetricThreshold": "tma_alu_op_utilization > 0.6",
+        "MetricThreshold": "tma_alu_op_utilization > 0.4",
         "ScaleUnit": "100%"
     },
     {
         "BriefDescription": "This metric estimates fraction of slots the CPU retired uops delivered by the Microcode_Sequencer as a result of Assists",
-        "MetricExpr": "100 * OTHER_ASSISTS.ANY_WB_ASSIST / tma_info_thread_slots",
+        "MetricExpr": "66 * OTHER_ASSISTS.ANY_WB_ASSIST / tma_info_thread_slots",
         "MetricGroup": "TopdownL4;tma_L4_group;tma_microcode_sequencer_group",
         "MetricName": "tma_assists",
         "MetricThreshold": "tma_assists > 0.1 & (tma_microcode_sequencer > 0.05 & tma_heavy_operations > 0.1)",
         "MetricExpr": "(IDQ.ALL_DSB_CYCLES_ANY_UOPS - IDQ.ALL_DSB_CYCLES_4_UOPS) / tma_info_core_core_clks / 2",
         "MetricGroup": "DSB;FetchBW;TopdownL3;tma_L3_group;tma_fetch_bandwidth_group",
         "MetricName": "tma_dsb",
-        "MetricThreshold": "tma_dsb > 0.15 & (tma_fetch_bandwidth > 0.1 & tma_frontend_bound > 0.15 & tma_info_thread_ipc / 4 > 0.35)",
+        "MetricThreshold": "tma_dsb > 0.15 & tma_fetch_bandwidth > 0.2",
         "PublicDescription": "This metric represents Core fraction of cycles in which CPU was likely limited due to DSB (decoded uop cache) fetch pipeline.  For example; inefficient utilization of the DSB cache structure or bank conflict when reading from it; are categorized here.",
         "ScaleUnit": "100%"
     },
         "MetricExpr": "tma_frontend_bound - tma_fetch_latency",
         "MetricGroup": "FetchBW;Frontend;TmaL2;TopdownL2;tma_L2_group;tma_frontend_bound_group;tma_issueFB",
         "MetricName": "tma_fetch_bandwidth",
-        "MetricThreshold": "tma_fetch_bandwidth > 0.1 & tma_frontend_bound > 0.15 & tma_info_thread_ipc / 4 > 0.35",
+        "MetricThreshold": "tma_fetch_bandwidth > 0.2",
         "MetricgroupNoGroup": "TopdownL2",
         "PublicDescription": "This metric represents fraction of slots the CPU was stalled due to Frontend bandwidth issues.  For example; inefficiencies at the instruction decoders; or restrictions for caching in the DSB (decoded uops cache) are categorized under Fetch Bandwidth. In such cases; the Frontend typically delivers suboptimal amount of uops to the Backend. Related metrics: tma_dsb_switches, tma_info_frontend_dsb_coverage, tma_info_inst_mix_iptb, tma_lcp",
         "ScaleUnit": "100%"
         "MetricName": "tma_heavy_operations",
         "MetricThreshold": "tma_heavy_operations > 0.1",
         "MetricgroupNoGroup": "TopdownL2",
-        "PublicDescription": "This metric represents fraction of slots where the CPU was retiring heavy-weight operations -- instructions that require two or more uops or micro-coded sequences. This highly-correlates with the uop length of these instructions/sequences.",
+        "PublicDescription": "This metric represents fraction of slots where the CPU was retiring heavy-weight operations -- instructions that require two or more uops or micro-coded sequences. This highly-correlates with the uop length of these instructions/sequences. ([ICL+] Note this may overcount due to approximation using indirect events; [ADL+] .)",
         "ScaleUnit": "100%"
     },
     {
         "BriefDescription": "This metric represents fraction of cycles the CPU was stalled due to instruction cache misses.",
         "MetricExpr": "ICACHE.IFDATA_STALL / tma_info_thread_clks",
-        "MetricGroup": "BigFoot;FetchLat;IcMiss;TopdownL3;tma_L3_group;tma_fetch_latency_group",
+        "MetricGroup": "BigFootprint;FetchLat;IcMiss;TopdownL3;tma_L3_group;tma_fetch_latency_group",
         "MetricName": "tma_icache_misses",
         "MetricThreshold": "tma_icache_misses > 0.05 & (tma_fetch_latency > 0.1 & tma_frontend_bound > 0.15)",
         "ScaleUnit": "100%"
     },
-    {
-        "BriefDescription": "Branch Misprediction Cost: Fraction of TMA slots wasted per non-speculative branch misprediction (retired JEClear)",
-        "MetricExpr": "(tma_branch_mispredicts + tma_fetch_latency * tma_mispredicts_resteers / (tma_branch_resteers + tma_dsb_switches + tma_icache_misses + tma_itlb_misses + tma_lcp + tma_ms_switches)) * tma_info_thread_slots / BR_MISP_RETIRED.ALL_BRANCHES",
-        "MetricGroup": "Bad;BrMispredicts;tma_issueBM",
-        "MetricName": "tma_info_bad_spec_branch_misprediction_cost",
-        "PublicDescription": "Branch Misprediction Cost: Fraction of TMA slots wasted per non-speculative branch misprediction (retired JEClear). Related metrics: tma_branch_mispredicts, tma_mispredicts_resteers"
-    },
     {
         "BriefDescription": "Instructions per retired mispredicts for indirect CALL or JMP branches (lower number means higher occurrence rate).",
-        "MetricExpr": "tma_info_inst_mix_instructions / (UOPS_RETIRED.RETIRE_SLOTS / UOPS_ISSUED.ANY * cpu@BR_MISP_EXEC.ALL_BRANCHES\\,umask\\=0xE4@)",
+        "MetricExpr": "tma_info_inst_mix_instructions / (UOPS_RETIRED.RETIRE_SLOTS / UOPS_ISSUED.ANY * BR_MISP_EXEC.INDIRECT)",
         "MetricGroup": "Bad;BrMispredicts",
         "MetricName": "tma_info_bad_spec_ipmisp_indirect",
         "MetricThreshold": "tma_info_bad_spec_ipmisp_indirect < 1e3"
     },
     {
         "BriefDescription": "Floating Point Operations Per Cycle",
-        "MetricExpr": "(FP_ARITH_INST_RETIRED.SCALAR_SINGLE + FP_ARITH_INST_RETIRED.SCALAR_DOUBLE + 2 * FP_ARITH_INST_RETIRED.128B_PACKED_DOUBLE + 4 * (FP_ARITH_INST_RETIRED.128B_PACKED_SINGLE + FP_ARITH_INST_RETIRED.256B_PACKED_DOUBLE) + 8 * FP_ARITH_INST_RETIRED.256B_PACKED_SINGLE) / tma_info_core_core_clks",
+        "MetricExpr": "(FP_ARITH_INST_RETIRED.SCALAR + 2 * FP_ARITH_INST_RETIRED.128B_PACKED_DOUBLE + 4 * FP_ARITH_INST_RETIRED.4_FLOPS + 8 * FP_ARITH_INST_RETIRED.256B_PACKED_SINGLE) / tma_info_core_core_clks",
         "MetricGroup": "Flops;Ret",
         "MetricName": "tma_info_core_flopc"
     },
         "PublicDescription": "Actual per-core usage of the Floating Point non-X87 execution units (regardless of precision or vector-width). Values > 1 are possible due to ([BDW+] Fused-Multiply Add (FMA) counting - common; [ADL+] use all of ADD/MUL/FMA in Scalar or 128/256-bit vectors - less common)."
     },
     {
-        "BriefDescription": "Instruction-Level-Parallelism (average number of uops executed when there is execution) per-core",
-        "MetricExpr": "UOPS_EXECUTED.THREAD / (cpu@UOPS_EXECUTED.CORE\\,cmask\\=1@ / 2 if #SMT_on else UOPS_EXECUTED.CYCLES_GE_1_UOP_EXEC)",
+        "BriefDescription": "Instruction-Level-Parallelism (average number of uops executed when there is execution) per thread (logical-processor)",
+        "MetricExpr": "UOPS_EXECUTED.THREAD / cpu@UOPS_EXECUTED.THREAD\\,cmask\\=1@",
         "MetricGroup": "Backend;Cor;Pipeline;PortsUtil",
         "MetricName": "tma_info_core_ilp"
     },
         "MetricGroup": "Flops;InsType",
         "MetricName": "tma_info_inst_mix_iparith",
         "MetricThreshold": "tma_info_inst_mix_iparith < 10",
-        "PublicDescription": "Instructions per FP Arithmetic instruction (lower number means higher occurrence rate). May undercount due to FMA double counting. Approximated prior to BDW."
+        "PublicDescription": "Instructions per FP Arithmetic instruction (lower number means higher occurrence rate). Values < 1 are possible due to intentional FMA double counting. Approximated prior to BDW."
     },
     {
         "BriefDescription": "Instructions per FP Arithmetic AVX/SSE 128-bit instruction (lower number means higher occurrence rate)",
         "MetricGroup": "Flops;FpVector;InsType",
         "MetricName": "tma_info_inst_mix_iparith_avx128",
         "MetricThreshold": "tma_info_inst_mix_iparith_avx128 < 10",
-        "PublicDescription": "Instructions per FP Arithmetic AVX/SSE 128-bit instruction (lower number means higher occurrence rate). May undercount due to FMA double counting."
+        "PublicDescription": "Instructions per FP Arithmetic AVX/SSE 128-bit instruction (lower number means higher occurrence rate). Values < 1 are possible due to intentional FMA double counting."
     },
     {
         "BriefDescription": "Instructions per FP Arithmetic AVX* 256-bit instruction (lower number means higher occurrence rate)",
         "MetricGroup": "Flops;FpVector;InsType",
         "MetricName": "tma_info_inst_mix_iparith_avx256",
         "MetricThreshold": "tma_info_inst_mix_iparith_avx256 < 10",
-        "PublicDescription": "Instructions per FP Arithmetic AVX* 256-bit instruction (lower number means higher occurrence rate). May undercount due to FMA double counting."
+        "PublicDescription": "Instructions per FP Arithmetic AVX* 256-bit instruction (lower number means higher occurrence rate). Values < 1 are possible due to intentional FMA double counting."
     },
     {
         "BriefDescription": "Instructions per FP Arithmetic Scalar Double-Precision instruction (lower number means higher occurrence rate)",
         "MetricGroup": "Flops;FpScalar;InsType",
         "MetricName": "tma_info_inst_mix_iparith_scalar_dp",
         "MetricThreshold": "tma_info_inst_mix_iparith_scalar_dp < 10",
-        "PublicDescription": "Instructions per FP Arithmetic Scalar Double-Precision instruction (lower number means higher occurrence rate). May undercount due to FMA double counting."
+        "PublicDescription": "Instructions per FP Arithmetic Scalar Double-Precision instruction (lower number means higher occurrence rate). Values < 1 are possible due to intentional FMA double counting."
     },
     {
         "BriefDescription": "Instructions per FP Arithmetic Scalar Single-Precision instruction (lower number means higher occurrence rate)",
         "MetricGroup": "Flops;FpScalar;InsType",
         "MetricName": "tma_info_inst_mix_iparith_scalar_sp",
         "MetricThreshold": "tma_info_inst_mix_iparith_scalar_sp < 10",
-        "PublicDescription": "Instructions per FP Arithmetic Scalar Single-Precision instruction (lower number means higher occurrence rate). May undercount due to FMA double counting."
+        "PublicDescription": "Instructions per FP Arithmetic Scalar Single-Precision instruction (lower number means higher occurrence rate). Values < 1 are possible due to intentional FMA double counting."
     },
     {
         "BriefDescription": "Instructions per Branch (lower number means higher occurrence rate)",
     },
     {
         "BriefDescription": "Instructions per Floating Point (FP) Operation (lower number means higher occurrence rate)",
-        "MetricExpr": "INST_RETIRED.ANY / (FP_ARITH_INST_RETIRED.SCALAR_SINGLE + FP_ARITH_INST_RETIRED.SCALAR_DOUBLE + 2 * FP_ARITH_INST_RETIRED.128B_PACKED_DOUBLE + 4 * (FP_ARITH_INST_RETIRED.128B_PACKED_SINGLE + FP_ARITH_INST_RETIRED.256B_PACKED_DOUBLE) + 8 * FP_ARITH_INST_RETIRED.256B_PACKED_SINGLE)",
+        "MetricExpr": "INST_RETIRED.ANY / (FP_ARITH_INST_RETIRED.SCALAR + 2 * FP_ARITH_INST_RETIRED.128B_PACKED_DOUBLE + 4 * FP_ARITH_INST_RETIRED.4_FLOPS + 8 * FP_ARITH_INST_RETIRED.256B_PACKED_SINGLE)",
         "MetricGroup": "Flops;InsType",
         "MetricName": "tma_info_inst_mix_ipflop",
         "MetricThreshold": "tma_info_inst_mix_ipflop < 10"
     },
     {
         "BriefDescription": "Average per-core data fill bandwidth to the L1 data cache [GB / sec]",
-        "MetricExpr": "64 * L1D.REPLACEMENT / 1e9 / duration_time",
+        "MetricExpr": "tma_info_memory_l1d_cache_fill_bw",
         "MetricGroup": "Mem;MemoryBW",
-        "MetricName": "tma_info_memory_core_l1d_cache_fill_bw"
+        "MetricName": "tma_info_memory_core_l1d_cache_fill_bw_2t"
     },
     {
         "BriefDescription": "Average per-core data fill bandwidth to the L2 cache [GB / sec]",
-        "MetricExpr": "64 * L2_LINES_IN.ALL / 1e9 / duration_time",
+        "MetricExpr": "tma_info_memory_l2_cache_fill_bw",
         "MetricGroup": "Mem;MemoryBW",
-        "MetricName": "tma_info_memory_core_l2_cache_fill_bw"
+        "MetricName": "tma_info_memory_core_l2_cache_fill_bw_2t"
     },
     {
         "BriefDescription": "Average per-core data fill bandwidth to the L3 cache [GB / sec]",
-        "MetricExpr": "64 * LONGEST_LAT_CACHE.MISS / 1e9 / duration_time",
+        "MetricExpr": "tma_info_memory_l3_cache_fill_bw",
         "MetricGroup": "Mem;MemoryBW",
-        "MetricName": "tma_info_memory_core_l3_cache_fill_bw"
+        "MetricName": "tma_info_memory_core_l3_cache_fill_bw_2t"
+    },
+    {
+        "BriefDescription": "",
+        "MetricExpr": "64 * L1D.REPLACEMENT / 1e9 / duration_time",
+        "MetricGroup": "Mem;MemoryBW",
+        "MetricName": "tma_info_memory_l1d_cache_fill_bw"
     },
     {
         "BriefDescription": "L1 cache true misses per kilo instruction for retired demand loads",
         "MetricExpr": "1e3 * MEM_LOAD_UOPS_RETIRED.L1_MISS / INST_RETIRED.ANY",
-        "MetricGroup": "CacheMisses;Mem",
+        "MetricGroup": "CacheHits;Mem",
         "MetricName": "tma_info_memory_l1mpki"
     },
+    {
+        "BriefDescription": "",
+        "MetricExpr": "64 * L2_LINES_IN.ALL / 1e9 / duration_time",
+        "MetricGroup": "Mem;MemoryBW",
+        "MetricName": "tma_info_memory_l2_cache_fill_bw"
+    },
     {
         "BriefDescription": "L2 cache hits per kilo instruction for all request types (including speculative)",
         "MetricExpr": "1e3 * (L2_RQSTS.REFERENCES - L2_RQSTS.MISS) / INST_RETIRED.ANY",
-        "MetricGroup": "CacheMisses;Mem",
+        "MetricGroup": "CacheHits;Mem",
         "MetricName": "tma_info_memory_l2hpki_all"
     },
     {
         "BriefDescription": "L2 cache hits per kilo instruction for all demand loads  (including speculative)",
         "MetricExpr": "1e3 * L2_RQSTS.DEMAND_DATA_RD_HIT / INST_RETIRED.ANY",
-        "MetricGroup": "CacheMisses;Mem",
+        "MetricGroup": "CacheHits;Mem",
         "MetricName": "tma_info_memory_l2hpki_load"
     },
     {
         "BriefDescription": "L2 cache true misses per kilo instruction for retired demand loads",
         "MetricExpr": "1e3 * MEM_LOAD_UOPS_RETIRED.L2_MISS / INST_RETIRED.ANY",
-        "MetricGroup": "Backend;CacheMisses;Mem",
+        "MetricGroup": "Backend;CacheHits;Mem",
         "MetricName": "tma_info_memory_l2mpki"
     },
     {
         "BriefDescription": "L2 cache ([RKL+] true) misses per kilo instruction for all request types (including speculative)",
         "MetricExpr": "1e3 * L2_RQSTS.MISS / INST_RETIRED.ANY",
-        "MetricGroup": "CacheMisses;Mem;Offcore",
+        "MetricGroup": "CacheHits;Mem;Offcore",
         "MetricName": "tma_info_memory_l2mpki_all"
     },
     {
         "BriefDescription": "L2 cache ([RKL+] true) misses per kilo instruction for all demand loads  (including speculative)",
         "MetricExpr": "1e3 * L2_RQSTS.DEMAND_DATA_RD_MISS / INST_RETIRED.ANY",
-        "MetricGroup": "CacheMisses;Mem",
+        "MetricGroup": "CacheHits;Mem",
         "MetricName": "tma_info_memory_l2mpki_load"
     },
+    {
+        "BriefDescription": "",
+        "MetricExpr": "64 * LONGEST_LAT_CACHE.MISS / 1e9 / duration_time",
+        "MetricGroup": "Mem;MemoryBW",
+        "MetricName": "tma_info_memory_l3_cache_fill_bw"
+    },
     {
         "BriefDescription": "L3 cache true misses per kilo instruction for retired demand loads",
         "MetricExpr": "1e3 * MEM_LOAD_UOPS_RETIRED.L3_MISS / INST_RETIRED.ANY",
-        "MetricGroup": "CacheMisses;Mem",
+        "MetricGroup": "Mem",
         "MetricName": "tma_info_memory_l3mpki"
     },
-    {
-        "BriefDescription": "Actual Average Latency for L1 data-cache miss demand load operations (in core cycles)",
-        "MetricConstraint": "NO_GROUP_EVENTS",
-        "MetricExpr": "L1D_PEND_MISS.PENDING / (MEM_LOAD_UOPS_RETIRED.L1_MISS + MEM_LOAD_UOPS_RETIRED.HIT_LFB)",
-        "MetricGroup": "Mem;MemoryBound;MemoryLat",
-        "MetricName": "tma_info_memory_load_miss_real_latency"
-    },
-    {
-        "BriefDescription": "Memory-Level-Parallelism (average number of L1 miss demand load when there is at least one such miss",
-        "MetricConstraint": "NO_GROUP_EVENTS",
-        "MetricExpr": "L1D_PEND_MISS.PENDING / L1D_PEND_MISS.PENDING_CYCLES",
-        "MetricGroup": "Mem;MemoryBW;MemoryBound",
-        "MetricName": "tma_info_memory_mlp",
-        "PublicDescription": "Memory-Level-Parallelism (average number of L1 miss demand load when there is at least one such miss. Per-Logical Processor)"
-    },
     {
         "BriefDescription": "Average Parallel L2 cache miss data reads",
         "MetricExpr": "OFFCORE_REQUESTS_OUTSTANDING.ALL_DATA_RD / OFFCORE_REQUESTS_OUTSTANDING.CYCLES_WITH_DATA_RD",
         "MetricGroup": "Memory_BW;Offcore",
-        "MetricName": "tma_info_memory_oro_data_l2_mlp"
+        "MetricName": "tma_info_memory_latency_data_l2_mlp"
     },
     {
         "BriefDescription": "Average Latency for L2 cache miss demand Loads",
         "MetricExpr": "OFFCORE_REQUESTS_OUTSTANDING.DEMAND_DATA_RD / OFFCORE_REQUESTS.DEMAND_DATA_RD",
         "MetricGroup": "Memory_Lat;Offcore",
-        "MetricName": "tma_info_memory_oro_load_l2_miss_latency"
+        "MetricName": "tma_info_memory_latency_load_l2_miss_latency"
     },
     {
         "BriefDescription": "Average Parallel L2 cache miss demand Loads",
         "MetricExpr": "OFFCORE_REQUESTS_OUTSTANDING.DEMAND_DATA_RD / OFFCORE_REQUESTS_OUTSTANDING.CYCLES_WITH_DEMAND_DATA_RD",
         "MetricGroup": "Memory_BW;Offcore",
-        "MetricName": "tma_info_memory_oro_load_l2_mlp"
+        "MetricName": "tma_info_memory_latency_load_l2_mlp"
     },
     {
-        "BriefDescription": "Average per-thread data fill bandwidth to the L1 data cache [GB / sec]",
-        "MetricExpr": "tma_info_memory_core_l1d_cache_fill_bw",
-        "MetricGroup": "Mem;MemoryBW",
-        "MetricName": "tma_info_memory_thread_l1d_cache_fill_bw_1t"
-    },
-    {
-        "BriefDescription": "Average per-thread data fill bandwidth to the L2 cache [GB / sec]",
-        "MetricExpr": "tma_info_memory_core_l2_cache_fill_bw",
-        "MetricGroup": "Mem;MemoryBW",
-        "MetricName": "tma_info_memory_thread_l2_cache_fill_bw_1t"
-    },
-    {
-        "BriefDescription": "Average per-thread data access bandwidth to the L3 cache [GB / sec]",
-        "MetricExpr": "0",
-        "MetricGroup": "Mem;MemoryBW;Offcore",
-        "MetricName": "tma_info_memory_thread_l3_cache_access_bw_1t"
+        "BriefDescription": "Actual Average Latency for L1 data-cache miss demand load operations (in core cycles)",
+        "MetricConstraint": "NO_GROUP_EVENTS",
+        "MetricExpr": "L1D_PEND_MISS.PENDING / (MEM_LOAD_UOPS_RETIRED.L1_MISS + MEM_LOAD_UOPS_RETIRED.HIT_LFB)",
+        "MetricGroup": "Mem;MemoryBound;MemoryLat",
+        "MetricName": "tma_info_memory_load_miss_real_latency"
     },
     {
-        "BriefDescription": "Average per-thread data fill bandwidth to the L3 cache [GB / sec]",
-        "MetricExpr": "tma_info_memory_core_l3_cache_fill_bw",
-        "MetricGroup": "Mem;MemoryBW",
-        "MetricName": "tma_info_memory_thread_l3_cache_fill_bw_1t"
+        "BriefDescription": "Memory-Level-Parallelism (average number of L1 miss demand load when there is at least one such miss",
+        "MetricConstraint": "NO_GROUP_EVENTS",
+        "MetricExpr": "L1D_PEND_MISS.PENDING / L1D_PEND_MISS.PENDING_CYCLES",
+        "MetricGroup": "Mem;MemoryBW;MemoryBound",
+        "MetricName": "tma_info_memory_mlp",
+        "PublicDescription": "Memory-Level-Parallelism (average number of L1 miss demand load when there is at least one such miss. Per-Logical Processor)"
     },
     {
         "BriefDescription": "Utilization of the core's Page Walker(s) serving STLB misses triggered by instruction/Load/Store accesses",
         "MetricThreshold": "tma_info_memory_tlb_page_walks_utilization > 0.5"
     },
     {
-        "BriefDescription": "Instruction-Level-Parallelism (average number of uops executed when there is execution) per-thread",
-        "MetricExpr": "UOPS_EXECUTED.THREAD / cpu@UOPS_EXECUTED.THREAD\\,cmask\\=1@",
+        "BriefDescription": "",
+        "MetricExpr": "UOPS_EXECUTED.THREAD / (cpu@UOPS_EXECUTED.CORE\\,cmask\\=1@ / 2 if #SMT_on else UOPS_EXECUTED.CYCLES_GE_1_UOP_EXEC)",
         "MetricGroup": "Cor;Pipeline;PortsUtil;SMT",
         "MetricName": "tma_info_pipeline_execute"
     },
         "MetricName": "tma_info_pipeline_retire"
     },
     {
-        "BriefDescription": "Measured Average Frequency for unhalted processors [GHz]",
+        "BriefDescription": "Measured Average Core Frequency for unhalted processors [GHz]",
         "MetricExpr": "tma_info_system_turbo_utilization * TSC / 1e9 / duration_time",
         "MetricGroup": "Power;Summary",
-        "MetricName": "tma_info_system_average_frequency"
+        "MetricName": "tma_info_system_core_frequency"
     },
     {
-        "BriefDescription": "Average CPU Utilization",
+        "BriefDescription": "Average CPU Utilization (percentage)",
         "MetricExpr": "CPU_CLK_UNHALTED.REF_TSC / TSC",
         "MetricGroup": "HPC;Summary",
         "MetricName": "tma_info_system_cpu_utilization"
     },
+    {
+        "BriefDescription": "Average number of utilized CPUs",
+        "MetricExpr": "#num_cpus_online * tma_info_system_cpu_utilization",
+        "MetricGroup": "Summary",
+        "MetricName": "tma_info_system_cpus_utilized"
+    },
     {
         "BriefDescription": "Average external Memory Bandwidth Use for reads and writes [GB / sec]",
         "MetricExpr": "64 * (UNC_ARB_TRK_REQUESTS.ALL + UNC_ARB_COH_TRK_REQUESTS.ALL) / 1e6 / duration_time / 1e3",
-        "MetricGroup": "HPC;Mem;MemoryBW;SoC;tma_issueBW",
+        "MetricGroup": "HPC;MemOffcore;MemoryBW;SoC;tma_issueBW",
         "MetricName": "tma_info_system_dram_bw_use",
         "PublicDescription": "Average external Memory Bandwidth Use for reads and writes [GB / sec]. Related metrics: tma_fb_full, tma_mem_bandwidth, tma_sq_full"
     },
     {
         "BriefDescription": "Giga Floating Point Operations Per Second",
-        "MetricExpr": "(FP_ARITH_INST_RETIRED.SCALAR_SINGLE + FP_ARITH_INST_RETIRED.SCALAR_DOUBLE + 2 * FP_ARITH_INST_RETIRED.128B_PACKED_DOUBLE + 4 * (FP_ARITH_INST_RETIRED.128B_PACKED_SINGLE + FP_ARITH_INST_RETIRED.256B_PACKED_DOUBLE) + 8 * FP_ARITH_INST_RETIRED.256B_PACKED_SINGLE) / 1e9 / duration_time",
+        "MetricExpr": "(FP_ARITH_INST_RETIRED.SCALAR + 2 * FP_ARITH_INST_RETIRED.128B_PACKED_DOUBLE + 4 * FP_ARITH_INST_RETIRED.4_FLOPS + 8 * FP_ARITH_INST_RETIRED.256B_PACKED_SINGLE) / 1e9 / duration_time",
         "MetricGroup": "Cor;Flops;HPC",
         "MetricName": "tma_info_system_gflops",
-        "PublicDescription": "Giga Floating Point Operations Per Second. Aggregate across all supported options of: FP precisions, scalar and vector instructions, vector-width and AMX engine."
+        "PublicDescription": "Giga Floating Point Operations Per Second. Aggregate across all supported options of: FP precisions, scalar and vector instructions, vector-width"
     },
     {
         "BriefDescription": "Instructions per Far Branch ( Far Branches apply upon transition from application to operating system, handling interrupts, exceptions) [lower number means higher occurrence rate]",
         "MetricName": "tma_info_system_kernel_utilization",
         "MetricThreshold": "tma_info_system_kernel_utilization > 0.05"
     },
-    {
-        "BriefDescription": "Average number of parallel requests to external memory",
-        "MetricExpr": "UNC_ARB_TRK_OCCUPANCY.ALL / UNC_ARB_TRK_OCCUPANCY.CYCLES_WITH_ANY_REQUEST",
-        "MetricGroup": "Mem;SoC",
-        "MetricName": "tma_info_system_mem_parallel_requests",
-        "PublicDescription": "Average number of parallel requests to external memory. Accounts for all requests"
-    },
-    {
-        "BriefDescription": "Average latency of all requests to external memory (in Uncore cycles)",
-        "MetricExpr": "UNC_ARB_TRK_OCCUPANCY.ALL / UNC_ARB_TRK_REQUESTS.ALL",
-        "MetricGroup": "Mem;SoC",
-        "MetricName": "tma_info_system_mem_request_latency"
-    },
     {
         "BriefDescription": "Fraction of cycles where both hardware Logical Processors were active",
         "MetricExpr": "(1 - CPU_CLK_UNHALTED.ONE_THREAD_ACTIVE / (CPU_CLK_UNHALTED.REF_XCLK_ANY / 2) if #SMT_on else 0)",
     {
         "BriefDescription": "This metric represents fraction of cycles the CPU was stalled due to Instruction TLB (ITLB) misses",
         "MetricExpr": "(14 * ITLB_MISSES.STLB_HIT + cpu@ITLB_MISSES.WALK_DURATION\\,cmask\\=1@ + 7 * ITLB_MISSES.WALK_COMPLETED) / tma_info_thread_clks",
-        "MetricGroup": "BigFoot;FetchLat;MemoryTLB;TopdownL3;tma_L3_group;tma_fetch_latency_group",
+        "MetricGroup": "BigFootprint;FetchLat;MemoryTLB;TopdownL3;tma_L3_group;tma_fetch_latency_group",
         "MetricName": "tma_itlb_misses",
         "MetricThreshold": "tma_itlb_misses > 0.05 & (tma_fetch_latency > 0.1 & tma_frontend_bound > 0.15)",
         "PublicDescription": "This metric represents fraction of cycles the CPU was stalled due to Instruction TLB (ITLB) misses. Sample with: ITLB_MISSES.WALK_COMPLETED",
     {
         "BriefDescription": "This metric estimates how often the CPU was stalled without loads missing the L1 data cache",
         "MetricExpr": "max((CYCLE_ACTIVITY.STALLS_MEM_ANY - CYCLE_ACTIVITY.STALLS_L1D_MISS) / tma_info_thread_clks, 0)",
-        "MetricGroup": "CacheMisses;MemoryBound;TmaL3mem;TopdownL3;tma_L3_group;tma_issueL1;tma_issueMC;tma_memory_bound_group",
+        "MetricGroup": "CacheHits;MemoryBound;TmaL3mem;TopdownL3;tma_L3_group;tma_issueL1;tma_issueMC;tma_memory_bound_group",
         "MetricName": "tma_l1_bound",
         "MetricThreshold": "tma_l1_bound > 0.1 & (tma_memory_bound > 0.2 & tma_backend_bound > 0.2)",
         "PublicDescription": "This metric estimates how often the CPU was stalled without loads missing the L1 data cache.  The L1 data cache typically has the shortest latency.  However; in certain cases like loads blocked on older stores; a load might suffer due to high latency even though it is being satisfied by the L1. Another example is loads who miss in the TLB. These cases are characterized by execution unit stalls; while some non-completed demand load lives in the machine without having that demand load missing the L1 cache. Sample with: MEM_LOAD_UOPS_RETIRED.L1_HIT_PS;MEM_LOAD_UOPS_RETIRED.HIT_LFB_PS. Related metrics: tma_clears_resteers, tma_machine_clears, tma_microcode_sequencer, tma_ms_switches, tma_ports_utilized_1",
     {
         "BriefDescription": "This metric estimates how often the CPU was stalled due to L2 cache accesses by loads",
         "MetricExpr": "(CYCLE_ACTIVITY.STALLS_L1D_MISS - CYCLE_ACTIVITY.STALLS_L2_MISS) / tma_info_thread_clks",
-        "MetricGroup": "CacheMisses;MemoryBound;TmaL3mem;TopdownL3;tma_L3_group;tma_memory_bound_group",
+        "MetricGroup": "CacheHits;MemoryBound;TmaL3mem;TopdownL3;tma_L3_group;tma_memory_bound_group",
         "MetricName": "tma_l2_bound",
         "MetricThreshold": "tma_l2_bound > 0.05 & (tma_memory_bound > 0.2 & tma_backend_bound > 0.2)",
         "PublicDescription": "This metric estimates how often the CPU was stalled due to L2 cache accesses by loads.  Avoiding cache misses (i.e. L1 misses/L2 hits) can improve the latency and increase performance. Sample with: MEM_LOAD_UOPS_RETIRED.L2_HIT_PS",
         "BriefDescription": "This metric estimates how often the CPU was stalled due to loads accesses to L3 cache or contended with a sibling Core",
         "MetricConstraint": "NO_GROUP_EVENTS_SMT",
         "MetricExpr": "MEM_LOAD_UOPS_RETIRED.L3_HIT / (MEM_LOAD_UOPS_RETIRED.L3_HIT + 7 * MEM_LOAD_UOPS_RETIRED.L3_MISS) * CYCLE_ACTIVITY.STALLS_L2_MISS / tma_info_thread_clks",
-        "MetricGroup": "CacheMisses;MemoryBound;TmaL3mem;TopdownL3;tma_L3_group;tma_memory_bound_group",
+        "MetricGroup": "CacheHits;MemoryBound;TmaL3mem;TopdownL3;tma_L3_group;tma_memory_bound_group",
         "MetricName": "tma_l3_bound",
         "MetricThreshold": "tma_l3_bound > 0.05 & (tma_memory_bound > 0.2 & tma_backend_bound > 0.2)",
         "PublicDescription": "This metric estimates how often the CPU was stalled due to loads accesses to L3 cache or contended with a sibling Core.  Avoiding cache misses (i.e. L2 misses/L3 hits) can improve the latency and increase performance. Sample with: MEM_LOAD_UOPS_RETIRED.L3_HIT_PS",
         "ScaleUnit": "100%"
     },
     {
-        "BriefDescription": "This metric represents fraction of cycles with demand load accesses that hit the L3 cache under unloaded scenarios (possibly L3 latency limited)",
+        "BriefDescription": "This metric estimates fraction of cycles with demand load accesses that hit the L3 cache under unloaded scenarios (possibly L3 latency limited)",
         "MetricConstraint": "NO_GROUP_EVENTS",
         "MetricExpr": "29 * (MEM_LOAD_UOPS_RETIRED.L3_HIT * (1 + MEM_LOAD_UOPS_RETIRED.HIT_LFB / (MEM_LOAD_UOPS_RETIRED.L2_HIT + MEM_LOAD_UOPS_RETIRED.L3_HIT + MEM_LOAD_UOPS_L3_HIT_RETIRED.XSNP_HIT + MEM_LOAD_UOPS_L3_HIT_RETIRED.XSNP_HITM + MEM_LOAD_UOPS_L3_HIT_RETIRED.XSNP_MISS + MEM_LOAD_UOPS_RETIRED.L3_MISS))) / tma_info_thread_clks",
         "MetricGroup": "MemoryLat;TopdownL4;tma_L4_group;tma_issueLat;tma_l3_bound_group",
         "MetricName": "tma_l3_hit_latency",
         "MetricThreshold": "tma_l3_hit_latency > 0.1 & (tma_l3_bound > 0.05 & (tma_memory_bound > 0.2 & tma_backend_bound > 0.2))",
-        "PublicDescription": "This metric represents fraction of cycles with demand load accesses that hit the L3 cache under unloaded scenarios (possibly L3 latency limited).  Avoiding private cache misses (i.e. L2 misses/L3 hits) will improve the latency; reduce contention with sibling physical cores and increase performance.  Note the value of this node may overlap with its siblings. Sample with: MEM_LOAD_UOPS_RETIRED.L3_HIT_PS. Related metrics: tma_mem_latency",
+        "PublicDescription": "This metric estimates fraction of cycles with demand load accesses that hit the L3 cache under unloaded scenarios (possibly L3 latency limited).  Avoiding private cache misses (i.e. L2 misses/L3 hits) will improve the latency; reduce contention with sibling physical cores and increase performance.  Note the value of this node may overlap with its siblings. Sample with: MEM_LOAD_UOPS_RETIRED.L3_HIT_PS. Related metrics: tma_mem_latency",
         "ScaleUnit": "100%"
     },
     {
         "MetricName": "tma_light_operations",
         "MetricThreshold": "tma_light_operations > 0.6",
         "MetricgroupNoGroup": "TopdownL2",
-        "PublicDescription": "This metric represents fraction of slots where the CPU was retiring light-weight operations -- instructions that require no more than one uop (micro-operation). This correlates with total number of instructions used by the program. A uops-per-instruction (see UopPI metric) ratio of 1 or less should be expected for decently optimized software running on Intel Core/Xeon products. While this often indicates efficient X86 instructions were executed; high value does not necessarily mean better performance cannot be achieved. Sample with: INST_RETIRED.PREC_DIST",
+        "PublicDescription": "This metric represents fraction of slots where the CPU was retiring light-weight operations -- instructions that require no more than one uop (micro-operation). This correlates with total number of instructions used by the program. A uops-per-instruction (see UopPI metric) ratio of 1 or less should be expected for decently optimized code running on Intel Core/Xeon products. While this often indicates efficient X86 instructions were executed; high value does not necessarily mean better performance cannot be achieved. ([ICL+] Note this may undercount due to approximation using indirect events; [ADL+] .). Sample with: INST_RETIRED.PREC_DIST",
         "ScaleUnit": "100%"
     },
     {
         "ScaleUnit": "100%"
     },
     {
-        "BriefDescription": "This metric estimates fraction of cycles where the core's performance was likely hurt due to approaching bandwidth limits of external memory (DRAM)",
+        "BriefDescription": "This metric estimates fraction of cycles where the core's performance was likely hurt due to approaching bandwidth limits of external memory - DRAM ([SPR-HBM] and/or HBM)",
         "MetricExpr": "min(CPU_CLK_UNHALTED.THREAD, cpu@OFFCORE_REQUESTS_OUTSTANDING.ALL_DATA_RD\\,cmask\\=4@) / tma_info_thread_clks",
         "MetricGroup": "MemoryBW;Offcore;TopdownL4;tma_L4_group;tma_dram_bound_group;tma_issueBW",
         "MetricName": "tma_mem_bandwidth",
         "MetricThreshold": "tma_mem_bandwidth > 0.2 & (tma_dram_bound > 0.1 & (tma_memory_bound > 0.2 & tma_backend_bound > 0.2))",
-        "PublicDescription": "This metric estimates fraction of cycles where the core's performance was likely hurt due to approaching bandwidth limits of external memory (DRAM).  The underlying heuristic assumes that a similar off-core traffic is generated by all IA cores. This metric does not aggregate non-data-read requests by this logical processor; requests from other IA Logical Processors/Physical Cores/sockets; or other non-IA devices like GPU; hence the maximum external memory bandwidth limits may or may not be approached when this metric is flagged (see Uncore counters for that). Related metrics: tma_fb_full, tma_info_system_dram_bw_use, tma_sq_full",
+        "PublicDescription": "This metric estimates fraction of cycles where the core's performance was likely hurt due to approaching bandwidth limits of external memory - DRAM ([SPR-HBM] and/or HBM).  The underlying heuristic assumes that a similar off-core traffic is generated by all IA cores. This metric does not aggregate non-data-read requests by this logical processor; requests from other IA Logical Processors/Physical Cores/sockets; or other non-IA devices like GPU; hence the maximum external memory bandwidth limits may or may not be approached when this metric is flagged (see Uncore counters for that). Related metrics: tma_fb_full, tma_info_system_dram_bw_use, tma_sq_full",
         "ScaleUnit": "100%"
     },
     {
-        "BriefDescription": "This metric estimates fraction of cycles where the performance was likely hurt due to latency from external memory (DRAM)",
+        "BriefDescription": "This metric estimates fraction of cycles where the performance was likely hurt due to latency from external memory - DRAM ([SPR-HBM] and/or HBM)",
         "MetricExpr": "min(CPU_CLK_UNHALTED.THREAD, OFFCORE_REQUESTS_OUTSTANDING.CYCLES_WITH_DATA_RD) / tma_info_thread_clks - tma_mem_bandwidth",
         "MetricGroup": "MemoryLat;Offcore;TopdownL4;tma_L4_group;tma_dram_bound_group;tma_issueLat",
         "MetricName": "tma_mem_latency",
         "MetricThreshold": "tma_mem_latency > 0.1 & (tma_dram_bound > 0.1 & (tma_memory_bound > 0.2 & tma_backend_bound > 0.2))",
-        "PublicDescription": "This metric estimates fraction of cycles where the performance was likely hurt due to latency from external memory (DRAM).  This metric does not aggregate requests from other Logical Processors/Physical Cores/sockets (see Uncore counters for that). Related metrics: tma_l3_hit_latency",
+        "PublicDescription": "This metric estimates fraction of cycles where the performance was likely hurt due to latency from external memory - DRAM ([SPR-HBM] and/or HBM).  This metric does not aggregate requests from other Logical Processors/Physical Cores/sockets (see Uncore counters for that). Related metrics: tma_l3_hit_latency",
         "ScaleUnit": "100%"
     },
     {
         "MetricExpr": "(IDQ.ALL_MITE_CYCLES_ANY_UOPS - IDQ.ALL_MITE_CYCLES_4_UOPS) / tma_info_core_core_clks / 2",
         "MetricGroup": "DSBmiss;FetchBW;TopdownL3;tma_L3_group;tma_fetch_bandwidth_group",
         "MetricName": "tma_mite",
-        "MetricThreshold": "tma_mite > 0.1 & (tma_fetch_bandwidth > 0.1 & tma_frontend_bound > 0.15 & tma_info_thread_ipc / 4 > 0.35)",
+        "MetricThreshold": "tma_mite > 0.1 & tma_fetch_bandwidth > 0.2",
         "PublicDescription": "This metric represents Core fraction of cycles in which CPU was likely limited due to the MITE pipeline (the legacy decode pipeline). This pipeline is used for code that was not pre-cached in the DSB or LSD. For example; inefficiencies due to asymmetric decoders; use of long immediate or LCP can manifest as MITE fetch bandwidth bottleneck.",
         "ScaleUnit": "100%"
     },
         "ScaleUnit": "100%"
     },
     {
-        "BriefDescription": "This metric represents Core fraction of cycles CPU dispatched uops on execution port 6 ([HSW+]Primary Branch and simple ALU)",
+        "BriefDescription": "This metric represents Core fraction of cycles CPU dispatched uops on execution port 6 ([HSW+] Primary Branch and simple ALU)",
         "MetricExpr": "UOPS_DISPATCHED_PORT.PORT_6 / tma_info_core_core_clks",
         "MetricGroup": "TopdownL6;tma_L6_group;tma_alu_op_utilization_group;tma_issue2P",
         "MetricName": "tma_port_6",
         "MetricThreshold": "tma_port_6 > 0.6",
-        "PublicDescription": "This metric represents Core fraction of cycles CPU dispatched uops on execution port 6 ([HSW+]Primary Branch and simple ALU). Sample with: UOPS_DISPATCHED_PORT.PORT_6. Related metrics: tma_fp_scalar, tma_fp_vector, tma_fp_vector_128b, tma_fp_vector_256b, tma_fp_vector_512b, tma_port_0, tma_port_1, tma_port_5, tma_ports_utilized_2",
+        "PublicDescription": "This metric represents Core fraction of cycles CPU dispatched uops on execution port 6 ([HSW+] Primary Branch and simple ALU). Sample with: UOPS_DISPATCHED_PORT.PORT_6. Related metrics: tma_fp_scalar, tma_fp_vector, tma_fp_vector_128b, tma_fp_vector_256b, tma_fp_vector_512b, tma_port_0, tma_port_1, tma_port_5, tma_ports_utilized_2",
         "ScaleUnit": "100%"
     },
     {
         "MetricExpr": "(cpu@UOPS_EXECUTED.CORE\\,cmask\\=3@ / 2 if #SMT_on else UOPS_EXECUTED.CYCLES_GE_3_UOPS_EXEC) / tma_info_core_core_clks",
         "MetricGroup": "PortsUtil;TopdownL4;tma_L4_group;tma_ports_utilization_group",
         "MetricName": "tma_ports_utilized_3m",
-        "MetricThreshold": "tma_ports_utilized_3m > 0.7 & (tma_ports_utilization > 0.15 & (tma_core_bound > 0.1 & tma_backend_bound > 0.2))",
+        "MetricThreshold": "tma_ports_utilized_3m > 0.4 & (tma_ports_utilization > 0.15 & (tma_core_bound > 0.1 & tma_backend_bound > 0.2))",
         "ScaleUnit": "100%"
     },
     {
     {
         "BriefDescription": "This metric represents fraction of cycles the CPU was stalled due to new branch address clears",
         "MetricExpr": "tma_branch_resteers - tma_mispredicts_resteers - tma_clears_resteers",
-        "MetricGroup": "BigFoot;FetchLat;TopdownL4;tma_L4_group;tma_branch_resteers_group",
+        "MetricGroup": "BigFootprint;FetchLat;TopdownL4;tma_L4_group;tma_branch_resteers_group",
         "MetricName": "tma_unknown_branches",
         "MetricThreshold": "tma_unknown_branches > 0.05 & (tma_branch_resteers > 0.05 & (tma_fetch_latency > 0.1 & tma_frontend_bound > 0.15))",
-        "PublicDescription": "This metric represents fraction of cycles the CPU was stalled due to new branch address clears. These are fetched branches the Branch Prediction Unit was unable to recognize (e.g. first time the branch is fetched or hitting BTB capacity limit). Sample with: BACLEARS.ANY",
+        "PublicDescription": "This metric represents fraction of cycles the CPU was stalled due to new branch address clears. These are fetched branches the Branch Prediction Unit was unable to recognize (e.g. first time the branch is fetched or hitting BTB capacity limit) hence called Unknown Branches. Sample with: BACLEARS.ANY",
         "ScaleUnit": "100%"
     },
     {
index ac7cdb8319607f35461f84c39e98d56c5c1d3aff..b01ed47072bc70c124bb2a4c7513cf27716ba43a 100644 (file)
         "BriefDescription": "Number of times RTM abort was triggered",
         "EventCode": "0xc9",
         "EventName": "RTM_RETIRED.ABORTED",
-        "PEBS": "1",
+        "PEBS": "2",
         "PublicDescription": "Number of times RTM abort was triggered .",
         "SampleAfterValue": "2000003",
         "UMask": "0x4"
index f6a0258e3241236b97c48a6098503f40bd12e5dd..8c808347f6da4e8382af6a780730636fadcdb914 100644 (file)
@@ -2,10 +2,10 @@
     "Backend": "Grouping from Top-down Microarchitecture Analysis Metrics spreadsheet",
     "Bad": "Grouping from Top-down Microarchitecture Analysis Metrics spreadsheet",
     "BadSpec": "Grouping from Top-down Microarchitecture Analysis Metrics spreadsheet",
-    "BigFoot": "Grouping from Top-down Microarchitecture Analysis Metrics spreadsheet",
+    "BigFootprint": "Grouping from Top-down Microarchitecture Analysis Metrics spreadsheet",
     "BrMispredicts": "Grouping from Top-down Microarchitecture Analysis Metrics spreadsheet",
     "Branches": "Grouping from Top-down Microarchitecture Analysis Metrics spreadsheet",
-    "CacheMisses": "Grouping from Top-down Microarchitecture Analysis Metrics spreadsheet",
+    "CacheHits": "Grouping from Top-down Microarchitecture Analysis Metrics spreadsheet",
     "Compute": "Grouping from Top-down Microarchitecture Analysis Metrics spreadsheet",
     "Cor": "Grouping from Top-down Microarchitecture Analysis Metrics spreadsheet",
     "DSB": "Grouping from Top-down Microarchitecture Analysis Metrics spreadsheet",
@@ -24,7 +24,9 @@
     "L2Evicts": "Grouping from Top-down Microarchitecture Analysis Metrics spreadsheet",
     "LSD": "Grouping from Top-down Microarchitecture Analysis Metrics spreadsheet",
     "MachineClears": "Grouping from Top-down Microarchitecture Analysis Metrics spreadsheet",
+    "Machine_Clears": "Grouping from Top-down Microarchitecture Analysis Metrics spreadsheet",
     "Mem": "Grouping from Top-down Microarchitecture Analysis Metrics spreadsheet",
+    "MemOffcore": "Grouping from Top-down Microarchitecture Analysis Metrics spreadsheet",
     "MemoryBW": "Grouping from Top-down Microarchitecture Analysis Metrics spreadsheet",
     "MemoryBound": "Grouping from Top-down Microarchitecture Analysis Metrics spreadsheet",
     "MemoryLat": "Grouping from Top-down Microarchitecture Analysis Metrics spreadsheet",
@@ -94,6 +96,7 @@
     "tma_l3_bound_group": "Metrics contributing to tma_l3_bound category",
     "tma_light_operations_group": "Metrics contributing to tma_light_operations category",
     "tma_load_op_utilization_group": "Metrics contributing to tma_load_op_utilization category",
+    "tma_machine_clears_group": "Metrics contributing to tma_machine_clears category",
     "tma_mem_latency_group": "Metrics contributing to tma_mem_latency category",
     "tma_memory_bound_group": "Metrics contributing to tma_memory_bound category",
     "tma_microcode_sequencer_group": "Metrics contributing to tma_microcode_sequencer category",
index e1f55fcfa0d02acb1feb8ce3627cc655853231db..826357787201a3fe241d748e8a7e26c632bd8491 100644 (file)
         "MetricExpr": "(UOPS_DISPATCHED_PORT.PORT_0 + UOPS_DISPATCHED_PORT.PORT_1 + UOPS_DISPATCHED_PORT.PORT_5 + UOPS_DISPATCHED_PORT.PORT_6) / tma_info_thread_slots",
         "MetricGroup": "TopdownL5;tma_L5_group;tma_ports_utilized_3m_group",
         "MetricName": "tma_alu_op_utilization",
-        "MetricThreshold": "tma_alu_op_utilization > 0.6",
+        "MetricThreshold": "tma_alu_op_utilization > 0.4",
         "ScaleUnit": "100%"
     },
     {
         "BriefDescription": "This metric estimates fraction of slots the CPU retired uops delivered by the Microcode_Sequencer as a result of Assists",
-        "MetricExpr": "100 * OTHER_ASSISTS.ANY_WB_ASSIST / tma_info_thread_slots",
+        "MetricExpr": "66 * OTHER_ASSISTS.ANY_WB_ASSIST / tma_info_thread_slots",
         "MetricGroup": "TopdownL4;tma_L4_group;tma_microcode_sequencer_group",
         "MetricName": "tma_assists",
         "MetricThreshold": "tma_assists > 0.1 & (tma_microcode_sequencer > 0.05 & tma_heavy_operations > 0.1)",
         "MetricExpr": "(IDQ.ALL_DSB_CYCLES_ANY_UOPS - IDQ.ALL_DSB_CYCLES_4_UOPS) / tma_info_core_core_clks / 2",
         "MetricGroup": "DSB;FetchBW;TopdownL3;tma_L3_group;tma_fetch_bandwidth_group",
         "MetricName": "tma_dsb",
-        "MetricThreshold": "tma_dsb > 0.15 & (tma_fetch_bandwidth > 0.1 & tma_frontend_bound > 0.15 & tma_info_thread_ipc / 4 > 0.35)",
+        "MetricThreshold": "tma_dsb > 0.15 & tma_fetch_bandwidth > 0.2",
         "PublicDescription": "This metric represents Core fraction of cycles in which CPU was likely limited due to DSB (decoded uop cache) fetch pipeline.  For example; inefficient utilization of the DSB cache structure or bank conflict when reading from it; are categorized here.",
         "ScaleUnit": "100%"
     },
         "MetricExpr": "tma_frontend_bound - tma_fetch_latency",
         "MetricGroup": "FetchBW;Frontend;TmaL2;TopdownL2;tma_L2_group;tma_frontend_bound_group;tma_issueFB",
         "MetricName": "tma_fetch_bandwidth",
-        "MetricThreshold": "tma_fetch_bandwidth > 0.1 & tma_frontend_bound > 0.15 & tma_info_thread_ipc / 4 > 0.35",
+        "MetricThreshold": "tma_fetch_bandwidth > 0.2",
         "MetricgroupNoGroup": "TopdownL2",
         "PublicDescription": "This metric represents fraction of slots the CPU was stalled due to Frontend bandwidth issues.  For example; inefficiencies at the instruction decoders; or restrictions for caching in the DSB (decoded uops cache) are categorized under Fetch Bandwidth. In such cases; the Frontend typically delivers suboptimal amount of uops to the Backend. Sample with: FRONTEND_RETIRED.LATENCY_GE_2_BUBBLES_GE_1_PS;FRONTEND_RETIRED.LATENCY_GE_1_PS;FRONTEND_RETIRED.LATENCY_GE_2_PS. Related metrics: tma_dsb_switches, tma_info_frontend_dsb_coverage, tma_info_inst_mix_iptb, tma_lcp",
         "ScaleUnit": "100%"
         "MetricName": "tma_heavy_operations",
         "MetricThreshold": "tma_heavy_operations > 0.1",
         "MetricgroupNoGroup": "TopdownL2",
-        "PublicDescription": "This metric represents fraction of slots where the CPU was retiring heavy-weight operations -- instructions that require two or more uops or micro-coded sequences. This highly-correlates with the uop length of these instructions/sequences.",
+        "PublicDescription": "This metric represents fraction of slots where the CPU was retiring heavy-weight operations -- instructions that require two or more uops or micro-coded sequences. This highly-correlates with the uop length of these instructions/sequences. ([ICL+] Note this may overcount due to approximation using indirect events; [ADL+] .)",
         "ScaleUnit": "100%"
     },
     {
         "BriefDescription": "This metric represents fraction of cycles the CPU was stalled due to instruction cache misses",
         "MetricExpr": "ICACHE.IFDATA_STALL / tma_info_thread_clks",
-        "MetricGroup": "BigFoot;FetchLat;IcMiss;TopdownL3;tma_L3_group;tma_fetch_latency_group",
+        "MetricGroup": "BigFootprint;FetchLat;IcMiss;TopdownL3;tma_L3_group;tma_fetch_latency_group",
         "MetricName": "tma_icache_misses",
         "MetricThreshold": "tma_icache_misses > 0.05 & (tma_fetch_latency > 0.1 & tma_frontend_bound > 0.15)",
         "PublicDescription": "This metric represents fraction of cycles the CPU was stalled due to instruction cache misses. Sample with: FRONTEND_RETIRED.L2_MISS_PS;FRONTEND_RETIRED.L1I_MISS_PS",
         "ScaleUnit": "100%"
     },
-    {
-        "BriefDescription": "Branch Misprediction Cost: Fraction of TMA slots wasted per non-speculative branch misprediction (retired JEClear)",
-        "MetricExpr": "(tma_branch_mispredicts + tma_fetch_latency * tma_mispredicts_resteers / (tma_branch_resteers + tma_dsb_switches + tma_icache_misses + tma_itlb_misses + tma_lcp + tma_ms_switches)) * tma_info_thread_slots / BR_MISP_RETIRED.ALL_BRANCHES",
-        "MetricGroup": "Bad;BrMispredicts;tma_issueBM",
-        "MetricName": "tma_info_bad_spec_branch_misprediction_cost",
-        "PublicDescription": "Branch Misprediction Cost: Fraction of TMA slots wasted per non-speculative branch misprediction (retired JEClear). Related metrics: tma_branch_mispredicts, tma_mispredicts_resteers"
-    },
     {
         "BriefDescription": "Instructions per retired mispredicts for indirect CALL or JMP branches (lower number means higher occurrence rate).",
-        "MetricExpr": "tma_info_inst_mix_instructions / (UOPS_RETIRED.RETIRE_SLOTS / UOPS_ISSUED.ANY * cpu@BR_MISP_EXEC.ALL_BRANCHES\\,umask\\=0xE4@)",
+        "MetricExpr": "tma_info_inst_mix_instructions / (UOPS_RETIRED.RETIRE_SLOTS / UOPS_ISSUED.ANY * BR_MISP_EXEC.INDIRECT)",
         "MetricGroup": "Bad;BrMispredicts",
         "MetricName": "tma_info_bad_spec_ipmisp_indirect",
         "MetricThreshold": "tma_info_bad_spec_ipmisp_indirect < 1e3"
     },
     {
         "BriefDescription": "Floating Point Operations Per Cycle",
-        "MetricExpr": "(FP_ARITH_INST_RETIRED.SCALAR_SINGLE + FP_ARITH_INST_RETIRED.SCALAR_DOUBLE + 2 * FP_ARITH_INST_RETIRED.128B_PACKED_DOUBLE + 4 * (FP_ARITH_INST_RETIRED.128B_PACKED_SINGLE + FP_ARITH_INST_RETIRED.256B_PACKED_DOUBLE) + 8 * FP_ARITH_INST_RETIRED.256B_PACKED_SINGLE) / tma_info_core_core_clks",
+        "MetricExpr": "(FP_ARITH_INST_RETIRED.SCALAR + 2 * FP_ARITH_INST_RETIRED.128B_PACKED_DOUBLE + 4 * FP_ARITH_INST_RETIRED.4_FLOPS + 8 * FP_ARITH_INST_RETIRED.256B_PACKED_SINGLE) / tma_info_core_core_clks",
         "MetricGroup": "Flops;Ret",
         "MetricName": "tma_info_core_flopc"
     },
         "PublicDescription": "Actual per-core usage of the Floating Point non-X87 execution units (regardless of precision or vector-width). Values > 1 are possible due to ([BDW+] Fused-Multiply Add (FMA) counting - common; [ADL+] use all of ADD/MUL/FMA in Scalar or 128/256-bit vectors - less common)."
     },
     {
-        "BriefDescription": "Instruction-Level-Parallelism (average number of uops executed when there is execution) per-core",
-        "MetricExpr": "UOPS_EXECUTED.THREAD / (cpu@UOPS_EXECUTED.CORE\\,cmask\\=1@ / 2 if #SMT_on else UOPS_EXECUTED.CYCLES_GE_1_UOP_EXEC)",
+        "BriefDescription": "Instruction-Level-Parallelism (average number of uops executed when there is execution) per thread (logical-processor)",
+        "MetricExpr": "UOPS_EXECUTED.THREAD / cpu@UOPS_EXECUTED.THREAD\\,cmask\\=1@",
         "MetricGroup": "Backend;Cor;Pipeline;PortsUtil",
         "MetricName": "tma_info_core_ilp"
     },
         "MetricGroup": "Flops;InsType",
         "MetricName": "tma_info_inst_mix_iparith",
         "MetricThreshold": "tma_info_inst_mix_iparith < 10",
-        "PublicDescription": "Instructions per FP Arithmetic instruction (lower number means higher occurrence rate). May undercount due to FMA double counting. Approximated prior to BDW."
+        "PublicDescription": "Instructions per FP Arithmetic instruction (lower number means higher occurrence rate). Values < 1 are possible due to intentional FMA double counting. Approximated prior to BDW."
     },
     {
         "BriefDescription": "Instructions per FP Arithmetic AVX/SSE 128-bit instruction (lower number means higher occurrence rate)",
         "MetricGroup": "Flops;FpVector;InsType",
         "MetricName": "tma_info_inst_mix_iparith_avx128",
         "MetricThreshold": "tma_info_inst_mix_iparith_avx128 < 10",
-        "PublicDescription": "Instructions per FP Arithmetic AVX/SSE 128-bit instruction (lower number means higher occurrence rate). May undercount due to FMA double counting."
+        "PublicDescription": "Instructions per FP Arithmetic AVX/SSE 128-bit instruction (lower number means higher occurrence rate). Values < 1 are possible due to intentional FMA double counting."
     },
     {
         "BriefDescription": "Instructions per FP Arithmetic AVX* 256-bit instruction (lower number means higher occurrence rate)",
         "MetricGroup": "Flops;FpVector;InsType",
         "MetricName": "tma_info_inst_mix_iparith_avx256",
         "MetricThreshold": "tma_info_inst_mix_iparith_avx256 < 10",
-        "PublicDescription": "Instructions per FP Arithmetic AVX* 256-bit instruction (lower number means higher occurrence rate). May undercount due to FMA double counting."
+        "PublicDescription": "Instructions per FP Arithmetic AVX* 256-bit instruction (lower number means higher occurrence rate). Values < 1 are possible due to intentional FMA double counting."
     },
     {
         "BriefDescription": "Instructions per FP Arithmetic Scalar Double-Precision instruction (lower number means higher occurrence rate)",
         "MetricGroup": "Flops;FpScalar;InsType",
         "MetricName": "tma_info_inst_mix_iparith_scalar_dp",
         "MetricThreshold": "tma_info_inst_mix_iparith_scalar_dp < 10",
-        "PublicDescription": "Instructions per FP Arithmetic Scalar Double-Precision instruction (lower number means higher occurrence rate). May undercount due to FMA double counting."
+        "PublicDescription": "Instructions per FP Arithmetic Scalar Double-Precision instruction (lower number means higher occurrence rate). Values < 1 are possible due to intentional FMA double counting."
     },
     {
         "BriefDescription": "Instructions per FP Arithmetic Scalar Single-Precision instruction (lower number means higher occurrence rate)",
         "MetricGroup": "Flops;FpScalar;InsType",
         "MetricName": "tma_info_inst_mix_iparith_scalar_sp",
         "MetricThreshold": "tma_info_inst_mix_iparith_scalar_sp < 10",
-        "PublicDescription": "Instructions per FP Arithmetic Scalar Single-Precision instruction (lower number means higher occurrence rate). May undercount due to FMA double counting."
+        "PublicDescription": "Instructions per FP Arithmetic Scalar Single-Precision instruction (lower number means higher occurrence rate). Values < 1 are possible due to intentional FMA double counting."
     },
     {
         "BriefDescription": "Instructions per Branch (lower number means higher occurrence rate)",
     },
     {
         "BriefDescription": "Instructions per Floating Point (FP) Operation (lower number means higher occurrence rate)",
-        "MetricExpr": "INST_RETIRED.ANY / (FP_ARITH_INST_RETIRED.SCALAR_SINGLE + FP_ARITH_INST_RETIRED.SCALAR_DOUBLE + 2 * FP_ARITH_INST_RETIRED.128B_PACKED_DOUBLE + 4 * (FP_ARITH_INST_RETIRED.128B_PACKED_SINGLE + FP_ARITH_INST_RETIRED.256B_PACKED_DOUBLE) + 8 * FP_ARITH_INST_RETIRED.256B_PACKED_SINGLE)",
+        "MetricExpr": "INST_RETIRED.ANY / (FP_ARITH_INST_RETIRED.SCALAR + 2 * FP_ARITH_INST_RETIRED.128B_PACKED_DOUBLE + 4 * FP_ARITH_INST_RETIRED.4_FLOPS + 8 * FP_ARITH_INST_RETIRED.256B_PACKED_SINGLE)",
         "MetricGroup": "Flops;InsType",
         "MetricName": "tma_info_inst_mix_ipflop",
         "MetricThreshold": "tma_info_inst_mix_ipflop < 10"
     },
     {
         "BriefDescription": "Average per-core data fill bandwidth to the L1 data cache [GB / sec]",
-        "MetricExpr": "64 * L1D.REPLACEMENT / 1e9 / duration_time",
+        "MetricExpr": "tma_info_memory_l1d_cache_fill_bw",
         "MetricGroup": "Mem;MemoryBW",
-        "MetricName": "tma_info_memory_core_l1d_cache_fill_bw"
+        "MetricName": "tma_info_memory_core_l1d_cache_fill_bw_2t"
     },
     {
         "BriefDescription": "Average per-core data fill bandwidth to the L2 cache [GB / sec]",
-        "MetricExpr": "64 * L2_LINES_IN.ALL / 1e9 / duration_time",
+        "MetricExpr": "tma_info_memory_l2_cache_fill_bw",
         "MetricGroup": "Mem;MemoryBW",
-        "MetricName": "tma_info_memory_core_l2_cache_fill_bw"
+        "MetricName": "tma_info_memory_core_l2_cache_fill_bw_2t"
     },
     {
         "BriefDescription": "Average per-core data fill bandwidth to the L3 cache [GB / sec]",
-        "MetricExpr": "64 * LONGEST_LAT_CACHE.MISS / 1e9 / duration_time",
+        "MetricExpr": "tma_info_memory_l3_cache_fill_bw",
         "MetricGroup": "Mem;MemoryBW",
-        "MetricName": "tma_info_memory_core_l3_cache_fill_bw"
+        "MetricName": "tma_info_memory_core_l3_cache_fill_bw_2t"
+    },
+    {
+        "BriefDescription": "",
+        "MetricExpr": "64 * L1D.REPLACEMENT / 1e9 / duration_time",
+        "MetricGroup": "Mem;MemoryBW",
+        "MetricName": "tma_info_memory_l1d_cache_fill_bw"
     },
     {
         "BriefDescription": "L1 cache true misses per kilo instruction for retired demand loads",
         "MetricExpr": "1e3 * MEM_LOAD_UOPS_RETIRED.L1_MISS / INST_RETIRED.ANY",
-        "MetricGroup": "CacheMisses;Mem",
+        "MetricGroup": "CacheHits;Mem",
         "MetricName": "tma_info_memory_l1mpki"
     },
+    {
+        "BriefDescription": "",
+        "MetricExpr": "64 * L2_LINES_IN.ALL / 1e9 / duration_time",
+        "MetricGroup": "Mem;MemoryBW",
+        "MetricName": "tma_info_memory_l2_cache_fill_bw"
+    },
     {
         "BriefDescription": "L2 cache hits per kilo instruction for all request types (including speculative)",
         "MetricExpr": "1e3 * (L2_RQSTS.REFERENCES - L2_RQSTS.MISS) / INST_RETIRED.ANY",
-        "MetricGroup": "CacheMisses;Mem",
+        "MetricGroup": "CacheHits;Mem",
         "MetricName": "tma_info_memory_l2hpki_all"
     },
     {
         "BriefDescription": "L2 cache hits per kilo instruction for all demand loads  (including speculative)",
         "MetricExpr": "1e3 * L2_RQSTS.DEMAND_DATA_RD_HIT / INST_RETIRED.ANY",
-        "MetricGroup": "CacheMisses;Mem",
+        "MetricGroup": "CacheHits;Mem",
         "MetricName": "tma_info_memory_l2hpki_load"
     },
     {
         "BriefDescription": "L2 cache true misses per kilo instruction for retired demand loads",
         "MetricExpr": "1e3 * MEM_LOAD_UOPS_RETIRED.L2_MISS / INST_RETIRED.ANY",
-        "MetricGroup": "Backend;CacheMisses;Mem",
+        "MetricGroup": "Backend;CacheHits;Mem",
         "MetricName": "tma_info_memory_l2mpki"
     },
     {
         "BriefDescription": "L2 cache ([RKL+] true) misses per kilo instruction for all request types (including speculative)",
         "MetricExpr": "1e3 * L2_RQSTS.MISS / INST_RETIRED.ANY",
-        "MetricGroup": "CacheMisses;Mem;Offcore",
+        "MetricGroup": "CacheHits;Mem;Offcore",
         "MetricName": "tma_info_memory_l2mpki_all"
     },
     {
         "BriefDescription": "L2 cache ([RKL+] true) misses per kilo instruction for all demand loads  (including speculative)",
         "MetricExpr": "1e3 * L2_RQSTS.DEMAND_DATA_RD_MISS / INST_RETIRED.ANY",
-        "MetricGroup": "CacheMisses;Mem",
+        "MetricGroup": "CacheHits;Mem",
         "MetricName": "tma_info_memory_l2mpki_load"
     },
+    {
+        "BriefDescription": "",
+        "MetricExpr": "64 * LONGEST_LAT_CACHE.MISS / 1e9 / duration_time",
+        "MetricGroup": "Mem;MemoryBW",
+        "MetricName": "tma_info_memory_l3_cache_fill_bw"
+    },
     {
         "BriefDescription": "L3 cache true misses per kilo instruction for retired demand loads",
         "MetricExpr": "1e3 * MEM_LOAD_UOPS_RETIRED.L3_MISS / INST_RETIRED.ANY",
-        "MetricGroup": "CacheMisses;Mem",
+        "MetricGroup": "Mem",
         "MetricName": "tma_info_memory_l3mpki"
     },
-    {
-        "BriefDescription": "Actual Average Latency for L1 data-cache miss demand load operations (in core cycles)",
-        "MetricConstraint": "NO_GROUP_EVENTS",
-        "MetricExpr": "L1D_PEND_MISS.PENDING / (MEM_LOAD_UOPS_RETIRED.L1_MISS + MEM_LOAD_UOPS_RETIRED.HIT_LFB)",
-        "MetricGroup": "Mem;MemoryBound;MemoryLat",
-        "MetricName": "tma_info_memory_load_miss_real_latency"
-    },
-    {
-        "BriefDescription": "Memory-Level-Parallelism (average number of L1 miss demand load when there is at least one such miss",
-        "MetricConstraint": "NO_GROUP_EVENTS",
-        "MetricExpr": "L1D_PEND_MISS.PENDING / L1D_PEND_MISS.PENDING_CYCLES",
-        "MetricGroup": "Mem;MemoryBW;MemoryBound",
-        "MetricName": "tma_info_memory_mlp",
-        "PublicDescription": "Memory-Level-Parallelism (average number of L1 miss demand load when there is at least one such miss. Per-Logical Processor)"
-    },
     {
         "BriefDescription": "Average Parallel L2 cache miss data reads",
         "MetricExpr": "OFFCORE_REQUESTS_OUTSTANDING.ALL_DATA_RD / OFFCORE_REQUESTS_OUTSTANDING.CYCLES_WITH_DATA_RD",
         "MetricGroup": "Memory_BW;Offcore",
-        "MetricName": "tma_info_memory_oro_data_l2_mlp"
+        "MetricName": "tma_info_memory_latency_data_l2_mlp"
     },
     {
         "BriefDescription": "Average Latency for L2 cache miss demand Loads",
         "MetricExpr": "OFFCORE_REQUESTS_OUTSTANDING.DEMAND_DATA_RD / OFFCORE_REQUESTS.DEMAND_DATA_RD",
         "MetricGroup": "Memory_Lat;Offcore",
-        "MetricName": "tma_info_memory_oro_load_l2_miss_latency"
+        "MetricName": "tma_info_memory_latency_load_l2_miss_latency"
     },
     {
         "BriefDescription": "Average Parallel L2 cache miss demand Loads",
         "MetricExpr": "OFFCORE_REQUESTS_OUTSTANDING.DEMAND_DATA_RD / OFFCORE_REQUESTS_OUTSTANDING.CYCLES_WITH_DEMAND_DATA_RD",
         "MetricGroup": "Memory_BW;Offcore",
-        "MetricName": "tma_info_memory_oro_load_l2_mlp"
-    },
-    {
-        "BriefDescription": "Average per-thread data fill bandwidth to the L1 data cache [GB / sec]",
-        "MetricExpr": "tma_info_memory_core_l1d_cache_fill_bw",
-        "MetricGroup": "Mem;MemoryBW",
-        "MetricName": "tma_info_memory_thread_l1d_cache_fill_bw_1t"
+        "MetricName": "tma_info_memory_latency_load_l2_mlp"
     },
     {
-        "BriefDescription": "Average per-thread data fill bandwidth to the L2 cache [GB / sec]",
-        "MetricExpr": "tma_info_memory_core_l2_cache_fill_bw",
-        "MetricGroup": "Mem;MemoryBW",
-        "MetricName": "tma_info_memory_thread_l2_cache_fill_bw_1t"
-    },
-    {
-        "BriefDescription": "Average per-thread data access bandwidth to the L3 cache [GB / sec]",
-        "MetricExpr": "0",
-        "MetricGroup": "Mem;MemoryBW;Offcore",
-        "MetricName": "tma_info_memory_thread_l3_cache_access_bw_1t"
+        "BriefDescription": "Actual Average Latency for L1 data-cache miss demand load operations (in core cycles)",
+        "MetricConstraint": "NO_GROUP_EVENTS",
+        "MetricExpr": "L1D_PEND_MISS.PENDING / (MEM_LOAD_UOPS_RETIRED.L1_MISS + MEM_LOAD_UOPS_RETIRED.HIT_LFB)",
+        "MetricGroup": "Mem;MemoryBound;MemoryLat",
+        "MetricName": "tma_info_memory_load_miss_real_latency"
     },
     {
-        "BriefDescription": "Average per-thread data fill bandwidth to the L3 cache [GB / sec]",
-        "MetricExpr": "tma_info_memory_core_l3_cache_fill_bw",
-        "MetricGroup": "Mem;MemoryBW",
-        "MetricName": "tma_info_memory_thread_l3_cache_fill_bw_1t"
+        "BriefDescription": "Memory-Level-Parallelism (average number of L1 miss demand load when there is at least one such miss",
+        "MetricConstraint": "NO_GROUP_EVENTS",
+        "MetricExpr": "L1D_PEND_MISS.PENDING / L1D_PEND_MISS.PENDING_CYCLES",
+        "MetricGroup": "Mem;MemoryBW;MemoryBound",
+        "MetricName": "tma_info_memory_mlp",
+        "PublicDescription": "Memory-Level-Parallelism (average number of L1 miss demand load when there is at least one such miss. Per-Logical Processor)"
     },
     {
         "BriefDescription": "Utilization of the core's Page Walker(s) serving STLB misses triggered by instruction/Load/Store accesses",
         "MetricThreshold": "tma_info_memory_tlb_page_walks_utilization > 0.5"
     },
     {
-        "BriefDescription": "Instruction-Level-Parallelism (average number of uops executed when there is execution) per-thread",
-        "MetricExpr": "UOPS_EXECUTED.THREAD / cpu@UOPS_EXECUTED.THREAD\\,cmask\\=1@",
+        "BriefDescription": "",
+        "MetricExpr": "UOPS_EXECUTED.THREAD / (cpu@UOPS_EXECUTED.CORE\\,cmask\\=1@ / 2 if #SMT_on else UOPS_EXECUTED.CYCLES_GE_1_UOP_EXEC)",
         "MetricGroup": "Cor;Pipeline;PortsUtil;SMT",
         "MetricName": "tma_info_pipeline_execute"
     },
         "MetricName": "tma_info_pipeline_retire"
     },
     {
-        "BriefDescription": "Measured Average Frequency for unhalted processors [GHz]",
+        "BriefDescription": "Measured Average Core Frequency for unhalted processors [GHz]",
         "MetricExpr": "tma_info_system_turbo_utilization * TSC / 1e9 / duration_time",
         "MetricGroup": "Power;Summary",
-        "MetricName": "tma_info_system_average_frequency"
+        "MetricName": "tma_info_system_core_frequency"
     },
     {
-        "BriefDescription": "Average CPU Utilization",
+        "BriefDescription": "Average CPU Utilization (percentage)",
         "MetricExpr": "CPU_CLK_UNHALTED.REF_TSC / TSC",
         "MetricGroup": "HPC;Summary",
         "MetricName": "tma_info_system_cpu_utilization"
     },
+    {
+        "BriefDescription": "Average number of utilized CPUs",
+        "MetricExpr": "#num_cpus_online * tma_info_system_cpu_utilization",
+        "MetricGroup": "Summary",
+        "MetricName": "tma_info_system_cpus_utilized"
+    },
     {
         "BriefDescription": "Average external Memory Bandwidth Use for reads and writes [GB / sec]",
         "MetricExpr": "64 * (UNC_M_CAS_COUNT.RD + UNC_M_CAS_COUNT.WR) / 1e9 / duration_time",
-        "MetricGroup": "HPC;Mem;MemoryBW;SoC;tma_issueBW",
+        "MetricGroup": "HPC;MemOffcore;MemoryBW;SoC;tma_issueBW",
         "MetricName": "tma_info_system_dram_bw_use",
         "PublicDescription": "Average external Memory Bandwidth Use for reads and writes [GB / sec]. Related metrics: tma_fb_full, tma_mem_bandwidth, tma_sq_full"
     },
     {
         "BriefDescription": "Giga Floating Point Operations Per Second",
-        "MetricExpr": "(FP_ARITH_INST_RETIRED.SCALAR_SINGLE + FP_ARITH_INST_RETIRED.SCALAR_DOUBLE + 2 * FP_ARITH_INST_RETIRED.128B_PACKED_DOUBLE + 4 * (FP_ARITH_INST_RETIRED.128B_PACKED_SINGLE + FP_ARITH_INST_RETIRED.256B_PACKED_DOUBLE) + 8 * FP_ARITH_INST_RETIRED.256B_PACKED_SINGLE) / 1e9 / duration_time",
+        "MetricExpr": "(FP_ARITH_INST_RETIRED.SCALAR + 2 * FP_ARITH_INST_RETIRED.128B_PACKED_DOUBLE + 4 * FP_ARITH_INST_RETIRED.4_FLOPS + 8 * FP_ARITH_INST_RETIRED.256B_PACKED_SINGLE) / 1e9 / duration_time",
         "MetricGroup": "Cor;Flops;HPC",
         "MetricName": "tma_info_system_gflops",
-        "PublicDescription": "Giga Floating Point Operations Per Second. Aggregate across all supported options of: FP precisions, scalar and vector instructions, vector-width and AMX engine."
+        "PublicDescription": "Giga Floating Point Operations Per Second. Aggregate across all supported options of: FP precisions, scalar and vector instructions, vector-width"
     },
     {
         "BriefDescription": "Instructions per Far Branch ( Far Branches apply upon transition from application to operating system, handling interrupts, exceptions) [lower number means higher occurrence rate]",
     {
         "BriefDescription": "This metric represents fraction of cycles the CPU was stalled due to Instruction TLB (ITLB) misses",
         "MetricExpr": "(14 * ITLB_MISSES.STLB_HIT + cpu@ITLB_MISSES.WALK_DURATION\\,cmask\\=1@ + 7 * ITLB_MISSES.WALK_COMPLETED) / tma_info_thread_clks",
-        "MetricGroup": "BigFoot;FetchLat;MemoryTLB;TopdownL3;tma_L3_group;tma_fetch_latency_group",
+        "MetricGroup": "BigFootprint;FetchLat;MemoryTLB;TopdownL3;tma_L3_group;tma_fetch_latency_group",
         "MetricName": "tma_itlb_misses",
         "MetricThreshold": "tma_itlb_misses > 0.05 & (tma_fetch_latency > 0.1 & tma_frontend_bound > 0.15)",
         "PublicDescription": "This metric represents fraction of cycles the CPU was stalled due to Instruction TLB (ITLB) misses. Sample with: FRONTEND_RETIRED.STLB_MISS_PS;FRONTEND_RETIRED.ITLB_MISS_PS",
     {
         "BriefDescription": "This metric estimates how often the CPU was stalled without loads missing the L1 data cache",
         "MetricExpr": "max((CYCLE_ACTIVITY.STALLS_MEM_ANY - CYCLE_ACTIVITY.STALLS_L1D_MISS) / tma_info_thread_clks, 0)",
-        "MetricGroup": "CacheMisses;MemoryBound;TmaL3mem;TopdownL3;tma_L3_group;tma_issueL1;tma_issueMC;tma_memory_bound_group",
+        "MetricGroup": "CacheHits;MemoryBound;TmaL3mem;TopdownL3;tma_L3_group;tma_issueL1;tma_issueMC;tma_memory_bound_group",
         "MetricName": "tma_l1_bound",
         "MetricThreshold": "tma_l1_bound > 0.1 & (tma_memory_bound > 0.2 & tma_backend_bound > 0.2)",
         "PublicDescription": "This metric estimates how often the CPU was stalled without loads missing the L1 data cache.  The L1 data cache typically has the shortest latency.  However; in certain cases like loads blocked on older stores; a load might suffer due to high latency even though it is being satisfied by the L1. Another example is loads who miss in the TLB. These cases are characterized by execution unit stalls; while some non-completed demand load lives in the machine without having that demand load missing the L1 cache. Sample with: MEM_LOAD_RETIRED.L1_HIT_PS;MEM_LOAD_RETIRED.FB_HIT_PS. Related metrics: tma_clears_resteers, tma_machine_clears, tma_microcode_sequencer, tma_ms_switches, tma_ports_utilized_1",
     {
         "BriefDescription": "This metric estimates how often the CPU was stalled due to L2 cache accesses by loads",
         "MetricExpr": "(CYCLE_ACTIVITY.STALLS_L1D_MISS - CYCLE_ACTIVITY.STALLS_L2_MISS) / tma_info_thread_clks",
-        "MetricGroup": "CacheMisses;MemoryBound;TmaL3mem;TopdownL3;tma_L3_group;tma_memory_bound_group",
+        "MetricGroup": "CacheHits;MemoryBound;TmaL3mem;TopdownL3;tma_L3_group;tma_memory_bound_group",
         "MetricName": "tma_l2_bound",
         "MetricThreshold": "tma_l2_bound > 0.05 & (tma_memory_bound > 0.2 & tma_backend_bound > 0.2)",
         "PublicDescription": "This metric estimates how often the CPU was stalled due to L2 cache accesses by loads.  Avoiding cache misses (i.e. L1 misses/L2 hits) can improve the latency and increase performance. Sample with: MEM_LOAD_RETIRED.L2_HIT_PS",
         "BriefDescription": "This metric estimates how often the CPU was stalled due to loads accesses to L3 cache or contended with a sibling Core",
         "MetricConstraint": "NO_GROUP_EVENTS_SMT",
         "MetricExpr": "MEM_LOAD_UOPS_RETIRED.L3_HIT / (MEM_LOAD_UOPS_RETIRED.L3_HIT + 7 * MEM_LOAD_UOPS_RETIRED.L3_MISS) * CYCLE_ACTIVITY.STALLS_L2_MISS / tma_info_thread_clks",
-        "MetricGroup": "CacheMisses;MemoryBound;TmaL3mem;TopdownL3;tma_L3_group;tma_memory_bound_group",
+        "MetricGroup": "CacheHits;MemoryBound;TmaL3mem;TopdownL3;tma_L3_group;tma_memory_bound_group",
         "MetricName": "tma_l3_bound",
         "MetricThreshold": "tma_l3_bound > 0.05 & (tma_memory_bound > 0.2 & tma_backend_bound > 0.2)",
         "PublicDescription": "This metric estimates how often the CPU was stalled due to loads accesses to L3 cache or contended with a sibling Core.  Avoiding cache misses (i.e. L2 misses/L3 hits) can improve the latency and increase performance. Sample with: MEM_LOAD_RETIRED.L3_HIT_PS",
         "ScaleUnit": "100%"
     },
     {
-        "BriefDescription": "This metric represents fraction of cycles with demand load accesses that hit the L3 cache under unloaded scenarios (possibly L3 latency limited)",
+        "BriefDescription": "This metric estimates fraction of cycles with demand load accesses that hit the L3 cache under unloaded scenarios (possibly L3 latency limited)",
         "MetricConstraint": "NO_GROUP_EVENTS",
         "MetricExpr": "29 * (MEM_LOAD_UOPS_RETIRED.L3_HIT * (1 + MEM_LOAD_UOPS_RETIRED.HIT_LFB / (MEM_LOAD_UOPS_RETIRED.L2_HIT + MEM_LOAD_UOPS_RETIRED.L3_HIT + MEM_LOAD_UOPS_L3_HIT_RETIRED.XSNP_HIT + MEM_LOAD_UOPS_L3_HIT_RETIRED.XSNP_HITM + MEM_LOAD_UOPS_L3_HIT_RETIRED.XSNP_MISS + MEM_LOAD_UOPS_RETIRED.L3_MISS))) / tma_info_thread_clks",
         "MetricGroup": "MemoryLat;TopdownL4;tma_L4_group;tma_issueLat;tma_l3_bound_group",
         "MetricName": "tma_l3_hit_latency",
         "MetricThreshold": "tma_l3_hit_latency > 0.1 & (tma_l3_bound > 0.05 & (tma_memory_bound > 0.2 & tma_backend_bound > 0.2))",
-        "PublicDescription": "This metric represents fraction of cycles with demand load accesses that hit the L3 cache under unloaded scenarios (possibly L3 latency limited).  Avoiding private cache misses (i.e. L2 misses/L3 hits) will improve the latency; reduce contention with sibling physical cores and increase performance.  Note the value of this node may overlap with its siblings. Sample with: MEM_LOAD_RETIRED.L3_HIT_PS. Related metrics: tma_mem_latency",
+        "PublicDescription": "This metric estimates fraction of cycles with demand load accesses that hit the L3 cache under unloaded scenarios (possibly L3 latency limited).  Avoiding private cache misses (i.e. L2 misses/L3 hits) will improve the latency; reduce contention with sibling physical cores and increase performance.  Note the value of this node may overlap with its siblings. Sample with: MEM_LOAD_RETIRED.L3_HIT_PS. Related metrics: tma_mem_latency",
         "ScaleUnit": "100%"
     },
     {
         "MetricName": "tma_light_operations",
         "MetricThreshold": "tma_light_operations > 0.6",
         "MetricgroupNoGroup": "TopdownL2",
-        "PublicDescription": "This metric represents fraction of slots where the CPU was retiring light-weight operations -- instructions that require no more than one uop (micro-operation). This correlates with total number of instructions used by the program. A uops-per-instruction (see UopPI metric) ratio of 1 or less should be expected for decently optimized software running on Intel Core/Xeon products. While this often indicates efficient X86 instructions were executed; high value does not necessarily mean better performance cannot be achieved. Sample with: INST_RETIRED.PREC_DIST",
+        "PublicDescription": "This metric represents fraction of slots where the CPU was retiring light-weight operations -- instructions that require no more than one uop (micro-operation). This correlates with total number of instructions used by the program. A uops-per-instruction (see UopPI metric) ratio of 1 or less should be expected for decently optimized code running on Intel Core/Xeon products. While this often indicates efficient X86 instructions were executed; high value does not necessarily mean better performance cannot be achieved. ([ICL+] Note this may undercount due to approximation using indirect events; [ADL+] .). Sample with: INST_RETIRED.PREC_DIST",
         "ScaleUnit": "100%"
     },
     {
         "ScaleUnit": "100%"
     },
     {
-        "BriefDescription": "This metric estimates fraction of cycles where the core's performance was likely hurt due to approaching bandwidth limits of external memory (DRAM)",
+        "BriefDescription": "This metric estimates fraction of cycles where the core's performance was likely hurt due to approaching bandwidth limits of external memory - DRAM ([SPR-HBM] and/or HBM)",
         "MetricExpr": "min(CPU_CLK_UNHALTED.THREAD, cpu@OFFCORE_REQUESTS_OUTSTANDING.ALL_DATA_RD\\,cmask\\=4@) / tma_info_thread_clks",
         "MetricGroup": "MemoryBW;Offcore;TopdownL4;tma_L4_group;tma_dram_bound_group;tma_issueBW",
         "MetricName": "tma_mem_bandwidth",
         "MetricThreshold": "tma_mem_bandwidth > 0.2 & (tma_dram_bound > 0.1 & (tma_memory_bound > 0.2 & tma_backend_bound > 0.2))",
-        "PublicDescription": "This metric estimates fraction of cycles where the core's performance was likely hurt due to approaching bandwidth limits of external memory (DRAM).  The underlying heuristic assumes that a similar off-core traffic is generated by all IA cores. This metric does not aggregate non-data-read requests by this logical processor; requests from other IA Logical Processors/Physical Cores/sockets; or other non-IA devices like GPU; hence the maximum external memory bandwidth limits may or may not be approached when this metric is flagged (see Uncore counters for that). Related metrics: tma_fb_full, tma_info_system_dram_bw_use, tma_sq_full",
+        "PublicDescription": "This metric estimates fraction of cycles where the core's performance was likely hurt due to approaching bandwidth limits of external memory - DRAM ([SPR-HBM] and/or HBM).  The underlying heuristic assumes that a similar off-core traffic is generated by all IA cores. This metric does not aggregate non-data-read requests by this logical processor; requests from other IA Logical Processors/Physical Cores/sockets; or other non-IA devices like GPU; hence the maximum external memory bandwidth limits may or may not be approached when this metric is flagged (see Uncore counters for that). Related metrics: tma_fb_full, tma_info_system_dram_bw_use, tma_sq_full",
         "ScaleUnit": "100%"
     },
     {
-        "BriefDescription": "This metric estimates fraction of cycles where the performance was likely hurt due to latency from external memory (DRAM)",
+        "BriefDescription": "This metric estimates fraction of cycles where the performance was likely hurt due to latency from external memory - DRAM ([SPR-HBM] and/or HBM)",
         "MetricExpr": "min(CPU_CLK_UNHALTED.THREAD, OFFCORE_REQUESTS_OUTSTANDING.CYCLES_WITH_DATA_RD) / tma_info_thread_clks - tma_mem_bandwidth",
         "MetricGroup": "MemoryLat;Offcore;TopdownL4;tma_L4_group;tma_dram_bound_group;tma_issueLat",
         "MetricName": "tma_mem_latency",
         "MetricThreshold": "tma_mem_latency > 0.1 & (tma_dram_bound > 0.1 & (tma_memory_bound > 0.2 & tma_backend_bound > 0.2))",
-        "PublicDescription": "This metric estimates fraction of cycles where the performance was likely hurt due to latency from external memory (DRAM).  This metric does not aggregate requests from other Logical Processors/Physical Cores/sockets (see Uncore counters for that). Related metrics: tma_l3_hit_latency",
+        "PublicDescription": "This metric estimates fraction of cycles where the performance was likely hurt due to latency from external memory - DRAM ([SPR-HBM] and/or HBM).  This metric does not aggregate requests from other Logical Processors/Physical Cores/sockets (see Uncore counters for that). Related metrics: tma_l3_hit_latency",
         "ScaleUnit": "100%"
     },
     {
         "MetricExpr": "(IDQ.ALL_MITE_CYCLES_ANY_UOPS - IDQ.ALL_MITE_CYCLES_4_UOPS) / tma_info_core_core_clks / 2",
         "MetricGroup": "DSBmiss;FetchBW;TopdownL3;tma_L3_group;tma_fetch_bandwidth_group",
         "MetricName": "tma_mite",
-        "MetricThreshold": "tma_mite > 0.1 & (tma_fetch_bandwidth > 0.1 & tma_frontend_bound > 0.15 & tma_info_thread_ipc / 4 > 0.35)",
+        "MetricThreshold": "tma_mite > 0.1 & tma_fetch_bandwidth > 0.2",
         "PublicDescription": "This metric represents Core fraction of cycles in which CPU was likely limited due to the MITE pipeline (the legacy decode pipeline). This pipeline is used for code that was not pre-cached in the DSB or LSD. For example; inefficiencies due to asymmetric decoders; use of long immediate or LCP can manifest as MITE fetch bandwidth bottleneck. Sample with: FRONTEND_RETIRED.ANY_DSB_MISS",
         "ScaleUnit": "100%"
     },
         "ScaleUnit": "100%"
     },
     {
-        "BriefDescription": "This metric represents Core fraction of cycles CPU dispatched uops on execution port 6 ([HSW+]Primary Branch and simple ALU)",
+        "BriefDescription": "This metric represents Core fraction of cycles CPU dispatched uops on execution port 6 ([HSW+] Primary Branch and simple ALU)",
         "MetricExpr": "UOPS_DISPATCHED_PORT.PORT_6 / tma_info_core_core_clks",
         "MetricGroup": "TopdownL6;tma_L6_group;tma_alu_op_utilization_group;tma_issue2P",
         "MetricName": "tma_port_6",
         "MetricThreshold": "tma_port_6 > 0.6",
-        "PublicDescription": "This metric represents Core fraction of cycles CPU dispatched uops on execution port 6 ([HSW+]Primary Branch and simple ALU). Sample with: UOPS_DISPATCHED.PORT_6. Related metrics: tma_fp_scalar, tma_fp_vector, tma_fp_vector_128b, tma_fp_vector_256b, tma_fp_vector_512b, tma_port_0, tma_port_1, tma_port_5, tma_ports_utilized_2",
+        "PublicDescription": "This metric represents Core fraction of cycles CPU dispatched uops on execution port 6 ([HSW+] Primary Branch and simple ALU). Sample with: UOPS_DISPATCHED.PORT_6. Related metrics: tma_fp_scalar, tma_fp_vector, tma_fp_vector_128b, tma_fp_vector_256b, tma_fp_vector_512b, tma_port_0, tma_port_1, tma_port_5, tma_ports_utilized_2",
         "ScaleUnit": "100%"
     },
     {
         "MetricExpr": "(cpu@UOPS_EXECUTED.CORE\\,cmask\\=3@ / 2 if #SMT_on else UOPS_EXECUTED.CYCLES_GE_3_UOPS_EXEC) / tma_info_core_core_clks",
         "MetricGroup": "PortsUtil;TopdownL4;tma_L4_group;tma_ports_utilization_group",
         "MetricName": "tma_ports_utilized_3m",
-        "MetricThreshold": "tma_ports_utilized_3m > 0.7 & (tma_ports_utilization > 0.15 & (tma_core_bound > 0.1 & tma_backend_bound > 0.2))",
+        "MetricThreshold": "tma_ports_utilized_3m > 0.4 & (tma_ports_utilization > 0.15 & (tma_core_bound > 0.1 & tma_backend_bound > 0.2))",
         "PublicDescription": "This metric represents fraction of cycles CPU executed total of 3 or more uops per cycle on all execution ports (Logical Processor cycles since ICL, Physical Core cycles otherwise). Sample with: UOPS_EXECUTED.CYCLES_GE_3",
         "ScaleUnit": "100%"
     },
     {
         "BriefDescription": "This metric represents fraction of cycles the CPU was stalled due to new branch address clears",
         "MetricExpr": "tma_branch_resteers - tma_mispredicts_resteers - tma_clears_resteers",
-        "MetricGroup": "BigFoot;FetchLat;TopdownL4;tma_L4_group;tma_branch_resteers_group",
+        "MetricGroup": "BigFootprint;FetchLat;TopdownL4;tma_L4_group;tma_branch_resteers_group",
         "MetricName": "tma_unknown_branches",
         "MetricThreshold": "tma_unknown_branches > 0.05 & (tma_branch_resteers > 0.05 & (tma_fetch_latency > 0.1 & tma_frontend_bound > 0.15))",
-        "PublicDescription": "This metric represents fraction of cycles the CPU was stalled due to new branch address clears. These are fetched branches the Branch Prediction Unit was unable to recognize (e.g. first time the branch is fetched or hitting BTB capacity limit). Sample with: FRONTEND_RETIRED.UNKNOWN_BRANCH",
+        "PublicDescription": "This metric represents fraction of cycles the CPU was stalled due to new branch address clears. These are fetched branches the Branch Prediction Unit was unable to recognize (e.g. first time the branch is fetched or hitting BTB capacity limit) hence called Unknown Branches. Sample with: FRONTEND_RETIRED.UNKNOWN_BRANCH",
         "ScaleUnit": "100%"
     },
     {
index f6a0258e3241236b97c48a6098503f40bd12e5dd..8c808347f6da4e8382af6a780730636fadcdb914 100644 (file)
@@ -2,10 +2,10 @@
     "Backend": "Grouping from Top-down Microarchitecture Analysis Metrics spreadsheet",
     "Bad": "Grouping from Top-down Microarchitecture Analysis Metrics spreadsheet",
     "BadSpec": "Grouping from Top-down Microarchitecture Analysis Metrics spreadsheet",
-    "BigFoot": "Grouping from Top-down Microarchitecture Analysis Metrics spreadsheet",
+    "BigFootprint": "Grouping from Top-down Microarchitecture Analysis Metrics spreadsheet",
     "BrMispredicts": "Grouping from Top-down Microarchitecture Analysis Metrics spreadsheet",
     "Branches": "Grouping from Top-down Microarchitecture Analysis Metrics spreadsheet",
-    "CacheMisses": "Grouping from Top-down Microarchitecture Analysis Metrics spreadsheet",
+    "CacheHits": "Grouping from Top-down Microarchitecture Analysis Metrics spreadsheet",
     "Compute": "Grouping from Top-down Microarchitecture Analysis Metrics spreadsheet",
     "Cor": "Grouping from Top-down Microarchitecture Analysis Metrics spreadsheet",
     "DSB": "Grouping from Top-down Microarchitecture Analysis Metrics spreadsheet",
@@ -24,7 +24,9 @@
     "L2Evicts": "Grouping from Top-down Microarchitecture Analysis Metrics spreadsheet",
     "LSD": "Grouping from Top-down Microarchitecture Analysis Metrics spreadsheet",
     "MachineClears": "Grouping from Top-down Microarchitecture Analysis Metrics spreadsheet",
+    "Machine_Clears": "Grouping from Top-down Microarchitecture Analysis Metrics spreadsheet",
     "Mem": "Grouping from Top-down Microarchitecture Analysis Metrics spreadsheet",
+    "MemOffcore": "Grouping from Top-down Microarchitecture Analysis Metrics spreadsheet",
     "MemoryBW": "Grouping from Top-down Microarchitecture Analysis Metrics spreadsheet",
     "MemoryBound": "Grouping from Top-down Microarchitecture Analysis Metrics spreadsheet",
     "MemoryLat": "Grouping from Top-down Microarchitecture Analysis Metrics spreadsheet",
@@ -94,6 +96,7 @@
     "tma_l3_bound_group": "Metrics contributing to tma_l3_bound category",
     "tma_light_operations_group": "Metrics contributing to tma_light_operations category",
     "tma_load_op_utilization_group": "Metrics contributing to tma_load_op_utilization category",
+    "tma_machine_clears_group": "Metrics contributing to tma_machine_clears category",
     "tma_mem_latency_group": "Metrics contributing to tma_mem_latency category",
     "tma_memory_bound_group": "Metrics contributing to tma_memory_bound category",
     "tma_microcode_sequencer_group": "Metrics contributing to tma_microcode_sequencer category",
index 83d20130c21738b16b44e329f81e4b5000a09231..320aaab53a0ba49d0592de5cd6904a23c0240335 100644 (file)
         "BriefDescription": "Number of cores in C-State; C0 and C1",
         "EventCode": "0x80",
         "EventName": "UNC_P_POWER_STATE_OCCUPANCY.CORES_C0",
+        "Filter": "occ_sel=1",
         "PerPkg": "1",
         "PublicDescription": "This is an occupancy event that tracks the number of cores that are in the chosen C-State.  It can be used by itself to get the average number of cores in that C-state with thresholding to generate histograms, or with other PCU events and occupancy triggering to capture other details.",
         "Unit": "PCU"
         "BriefDescription": "Number of cores in C-State; C3",
         "EventCode": "0x80",
         "EventName": "UNC_P_POWER_STATE_OCCUPANCY.CORES_C3",
+        "Filter": "occ_sel=2",
         "PerPkg": "1",
         "PublicDescription": "This is an occupancy event that tracks the number of cores that are in the chosen C-State.  It can be used by itself to get the average number of cores in that C-state with thresholding to generate histograms, or with other PCU events and occupancy triggering to capture other details.",
         "Unit": "PCU"
         "BriefDescription": "Number of cores in C-State; C6 and C7",
         "EventCode": "0x80",
         "EventName": "UNC_P_POWER_STATE_OCCUPANCY.CORES_C6",
+        "Filter": "occ_sel=3",
         "PerPkg": "1",
         "PublicDescription": "This is an occupancy event that tracks the number of cores that are in the chosen C-State.  It can be used by itself to get the average number of cores in that C-state with thresholding to generate histograms, or with other PCU events and occupancy triggering to capture other details.",
         "Unit": "PCU"
index b319e4edc238d8cdaa963ad18168a6f37add0bf9..f2d378c9d68f7e5b664d1c12a08d5b3360a5fd4b 100644 (file)
         "MetricExpr": "(UOPS_DISPATCHED_PORT.PORT_0 + UOPS_DISPATCHED_PORT.PORT_1 + UOPS_DISPATCHED_PORT.PORT_5 + UOPS_DISPATCHED_PORT.PORT_6) / tma_info_thread_slots",
         "MetricGroup": "TopdownL5;tma_L5_group;tma_ports_utilized_3m_group",
         "MetricName": "tma_alu_op_utilization",
-        "MetricThreshold": "tma_alu_op_utilization > 0.6",
+        "MetricThreshold": "tma_alu_op_utilization > 0.4",
         "ScaleUnit": "100%"
     },
     {
         "BriefDescription": "This metric estimates fraction of slots the CPU retired uops delivered by the Microcode_Sequencer as a result of Assists",
-        "MetricExpr": "100 * OTHER_ASSISTS.ANY_WB_ASSIST / tma_info_thread_slots",
+        "MetricExpr": "66 * OTHER_ASSISTS.ANY_WB_ASSIST / tma_info_thread_slots",
         "MetricGroup": "TopdownL4;tma_L4_group;tma_microcode_sequencer_group",
         "MetricName": "tma_assists",
         "MetricThreshold": "tma_assists > 0.1 & (tma_microcode_sequencer > 0.05 & tma_heavy_operations > 0.1)",
         "MetricExpr": "(IDQ.ALL_DSB_CYCLES_ANY_UOPS - IDQ.ALL_DSB_CYCLES_4_UOPS) / tma_info_core_core_clks / 2",
         "MetricGroup": "DSB;FetchBW;TopdownL3;tma_L3_group;tma_fetch_bandwidth_group",
         "MetricName": "tma_dsb",
-        "MetricThreshold": "tma_dsb > 0.15 & (tma_fetch_bandwidth > 0.1 & tma_frontend_bound > 0.15 & tma_info_thread_ipc / 4 > 0.35)",
+        "MetricThreshold": "tma_dsb > 0.15 & tma_fetch_bandwidth > 0.2",
         "PublicDescription": "This metric represents Core fraction of cycles in which CPU was likely limited due to DSB (decoded uop cache) fetch pipeline.  For example; inefficient utilization of the DSB cache structure or bank conflict when reading from it; are categorized here.",
         "ScaleUnit": "100%"
     },
         "MetricExpr": "tma_frontend_bound - tma_fetch_latency",
         "MetricGroup": "FetchBW;Frontend;TmaL2;TopdownL2;tma_L2_group;tma_frontend_bound_group;tma_issueFB",
         "MetricName": "tma_fetch_bandwidth",
-        "MetricThreshold": "tma_fetch_bandwidth > 0.1 & tma_frontend_bound > 0.15 & tma_info_thread_ipc / 4 > 0.35",
+        "MetricThreshold": "tma_fetch_bandwidth > 0.2",
         "MetricgroupNoGroup": "TopdownL2",
         "PublicDescription": "This metric represents fraction of slots the CPU was stalled due to Frontend bandwidth issues.  For example; inefficiencies at the instruction decoders; or restrictions for caching in the DSB (decoded uops cache) are categorized under Fetch Bandwidth. In such cases; the Frontend typically delivers suboptimal amount of uops to the Backend. Related metrics: tma_dsb_switches, tma_info_frontend_dsb_coverage, tma_info_inst_mix_iptb, tma_lcp",
         "ScaleUnit": "100%"
         "MetricName": "tma_heavy_operations",
         "MetricThreshold": "tma_heavy_operations > 0.1",
         "MetricgroupNoGroup": "TopdownL2",
-        "PublicDescription": "This metric represents fraction of slots where the CPU was retiring heavy-weight operations -- instructions that require two or more uops or micro-coded sequences. This highly-correlates with the uop length of these instructions/sequences.",
+        "PublicDescription": "This metric represents fraction of slots where the CPU was retiring heavy-weight operations -- instructions that require two or more uops or micro-coded sequences. This highly-correlates with the uop length of these instructions/sequences. ([ICL+] Note this may overcount due to approximation using indirect events; [ADL+] .)",
         "ScaleUnit": "100%"
     },
     {
         "BriefDescription": "This metric represents fraction of cycles the CPU was stalled due to instruction cache misses.",
         "MetricExpr": "ICACHE.IFDATA_STALL / tma_info_thread_clks",
-        "MetricGroup": "BigFoot;FetchLat;IcMiss;TopdownL3;tma_L3_group;tma_fetch_latency_group",
+        "MetricGroup": "BigFootprint;FetchLat;IcMiss;TopdownL3;tma_L3_group;tma_fetch_latency_group",
         "MetricName": "tma_icache_misses",
         "MetricThreshold": "tma_icache_misses > 0.05 & (tma_fetch_latency > 0.1 & tma_frontend_bound > 0.15)",
         "ScaleUnit": "100%"
     },
-    {
-        "BriefDescription": "Branch Misprediction Cost: Fraction of TMA slots wasted per non-speculative branch misprediction (retired JEClear)",
-        "MetricExpr": "(tma_branch_mispredicts + tma_fetch_latency * tma_mispredicts_resteers / (tma_branch_resteers + tma_dsb_switches + tma_icache_misses + tma_itlb_misses + tma_lcp + tma_ms_switches)) * tma_info_thread_slots / BR_MISP_RETIRED.ALL_BRANCHES",
-        "MetricGroup": "Bad;BrMispredicts;tma_issueBM",
-        "MetricName": "tma_info_bad_spec_branch_misprediction_cost",
-        "PublicDescription": "Branch Misprediction Cost: Fraction of TMA slots wasted per non-speculative branch misprediction (retired JEClear). Related metrics: tma_branch_mispredicts, tma_mispredicts_resteers"
-    },
     {
         "BriefDescription": "Instructions per retired mispredicts for indirect CALL or JMP branches (lower number means higher occurrence rate).",
-        "MetricExpr": "tma_info_inst_mix_instructions / (UOPS_RETIRED.RETIRE_SLOTS / UOPS_ISSUED.ANY * cpu@BR_MISP_EXEC.ALL_BRANCHES\\,umask\\=0xE4@)",
+        "MetricExpr": "tma_info_inst_mix_instructions / (UOPS_RETIRED.RETIRE_SLOTS / UOPS_ISSUED.ANY * BR_MISP_EXEC.INDIRECT)",
         "MetricGroup": "Bad;BrMispredicts",
         "MetricName": "tma_info_bad_spec_ipmisp_indirect",
         "MetricThreshold": "tma_info_bad_spec_ipmisp_indirect < 1e3"
     },
     {
         "BriefDescription": "Floating Point Operations Per Cycle",
-        "MetricExpr": "(FP_ARITH_INST_RETIRED.SCALAR_SINGLE + FP_ARITH_INST_RETIRED.SCALAR_DOUBLE + 2 * FP_ARITH_INST_RETIRED.128B_PACKED_DOUBLE + 4 * (FP_ARITH_INST_RETIRED.128B_PACKED_SINGLE + FP_ARITH_INST_RETIRED.256B_PACKED_DOUBLE) + 8 * FP_ARITH_INST_RETIRED.256B_PACKED_SINGLE) / tma_info_core_core_clks",
+        "MetricExpr": "(FP_ARITH_INST_RETIRED.SCALAR + 2 * FP_ARITH_INST_RETIRED.128B_PACKED_DOUBLE + 4 * FP_ARITH_INST_RETIRED.4_FLOPS + 8 * FP_ARITH_INST_RETIRED.256B_PACKED_SINGLE) / tma_info_core_core_clks",
         "MetricGroup": "Flops;Ret",
         "MetricName": "tma_info_core_flopc"
     },
         "PublicDescription": "Actual per-core usage of the Floating Point non-X87 execution units (regardless of precision or vector-width). Values > 1 are possible due to ([BDW+] Fused-Multiply Add (FMA) counting - common; [ADL+] use all of ADD/MUL/FMA in Scalar or 128/256-bit vectors - less common)."
     },
     {
-        "BriefDescription": "Instruction-Level-Parallelism (average number of uops executed when there is execution) per-core",
-        "MetricExpr": "UOPS_EXECUTED.THREAD / (cpu@UOPS_EXECUTED.CORE\\,cmask\\=1@ / 2 if #SMT_on else UOPS_EXECUTED.CYCLES_GE_1_UOP_EXEC)",
+        "BriefDescription": "Instruction-Level-Parallelism (average number of uops executed when there is execution) per thread (logical-processor)",
+        "MetricExpr": "UOPS_EXECUTED.THREAD / cpu@UOPS_EXECUTED.THREAD\\,cmask\\=1@",
         "MetricGroup": "Backend;Cor;Pipeline;PortsUtil",
         "MetricName": "tma_info_core_ilp"
     },
         "MetricGroup": "Flops;InsType",
         "MetricName": "tma_info_inst_mix_iparith",
         "MetricThreshold": "tma_info_inst_mix_iparith < 10",
-        "PublicDescription": "Instructions per FP Arithmetic instruction (lower number means higher occurrence rate). May undercount due to FMA double counting. Approximated prior to BDW."
+        "PublicDescription": "Instructions per FP Arithmetic instruction (lower number means higher occurrence rate). Values < 1 are possible due to intentional FMA double counting. Approximated prior to BDW."
     },
     {
         "BriefDescription": "Instructions per FP Arithmetic AVX/SSE 128-bit instruction (lower number means higher occurrence rate)",
         "MetricGroup": "Flops;FpVector;InsType",
         "MetricName": "tma_info_inst_mix_iparith_avx128",
         "MetricThreshold": "tma_info_inst_mix_iparith_avx128 < 10",
-        "PublicDescription": "Instructions per FP Arithmetic AVX/SSE 128-bit instruction (lower number means higher occurrence rate). May undercount due to FMA double counting."
+        "PublicDescription": "Instructions per FP Arithmetic AVX/SSE 128-bit instruction (lower number means higher occurrence rate). Values < 1 are possible due to intentional FMA double counting."
     },
     {
         "BriefDescription": "Instructions per FP Arithmetic AVX* 256-bit instruction (lower number means higher occurrence rate)",
         "MetricGroup": "Flops;FpVector;InsType",
         "MetricName": "tma_info_inst_mix_iparith_avx256",
         "MetricThreshold": "tma_info_inst_mix_iparith_avx256 < 10",
-        "PublicDescription": "Instructions per FP Arithmetic AVX* 256-bit instruction (lower number means higher occurrence rate). May undercount due to FMA double counting."
+        "PublicDescription": "Instructions per FP Arithmetic AVX* 256-bit instruction (lower number means higher occurrence rate). Values < 1 are possible due to intentional FMA double counting."
     },
     {
         "BriefDescription": "Instructions per FP Arithmetic Scalar Double-Precision instruction (lower number means higher occurrence rate)",
         "MetricGroup": "Flops;FpScalar;InsType",
         "MetricName": "tma_info_inst_mix_iparith_scalar_dp",
         "MetricThreshold": "tma_info_inst_mix_iparith_scalar_dp < 10",
-        "PublicDescription": "Instructions per FP Arithmetic Scalar Double-Precision instruction (lower number means higher occurrence rate). May undercount due to FMA double counting."
+        "PublicDescription": "Instructions per FP Arithmetic Scalar Double-Precision instruction (lower number means higher occurrence rate). Values < 1 are possible due to intentional FMA double counting."
     },
     {
         "BriefDescription": "Instructions per FP Arithmetic Scalar Single-Precision instruction (lower number means higher occurrence rate)",
         "MetricGroup": "Flops;FpScalar;InsType",
         "MetricName": "tma_info_inst_mix_iparith_scalar_sp",
         "MetricThreshold": "tma_info_inst_mix_iparith_scalar_sp < 10",
-        "PublicDescription": "Instructions per FP Arithmetic Scalar Single-Precision instruction (lower number means higher occurrence rate). May undercount due to FMA double counting."
+        "PublicDescription": "Instructions per FP Arithmetic Scalar Single-Precision instruction (lower number means higher occurrence rate). Values < 1 are possible due to intentional FMA double counting."
     },
     {
         "BriefDescription": "Instructions per Branch (lower number means higher occurrence rate)",
     },
     {
         "BriefDescription": "Instructions per Floating Point (FP) Operation (lower number means higher occurrence rate)",
-        "MetricExpr": "INST_RETIRED.ANY / (FP_ARITH_INST_RETIRED.SCALAR_SINGLE + FP_ARITH_INST_RETIRED.SCALAR_DOUBLE + 2 * FP_ARITH_INST_RETIRED.128B_PACKED_DOUBLE + 4 * (FP_ARITH_INST_RETIRED.128B_PACKED_SINGLE + FP_ARITH_INST_RETIRED.256B_PACKED_DOUBLE) + 8 * FP_ARITH_INST_RETIRED.256B_PACKED_SINGLE)",
+        "MetricExpr": "INST_RETIRED.ANY / (FP_ARITH_INST_RETIRED.SCALAR + 2 * FP_ARITH_INST_RETIRED.128B_PACKED_DOUBLE + 4 * FP_ARITH_INST_RETIRED.4_FLOPS + 8 * FP_ARITH_INST_RETIRED.256B_PACKED_SINGLE)",
         "MetricGroup": "Flops;InsType",
         "MetricName": "tma_info_inst_mix_ipflop",
         "MetricThreshold": "tma_info_inst_mix_ipflop < 10"
     },
     {
         "BriefDescription": "Average per-core data fill bandwidth to the L1 data cache [GB / sec]",
-        "MetricExpr": "64 * L1D.REPLACEMENT / 1e9 / duration_time",
+        "MetricExpr": "tma_info_memory_l1d_cache_fill_bw",
         "MetricGroup": "Mem;MemoryBW",
-        "MetricName": "tma_info_memory_core_l1d_cache_fill_bw"
+        "MetricName": "tma_info_memory_core_l1d_cache_fill_bw_2t"
     },
     {
         "BriefDescription": "Average per-core data fill bandwidth to the L2 cache [GB / sec]",
-        "MetricExpr": "64 * L2_LINES_IN.ALL / 1e9 / duration_time",
+        "MetricExpr": "tma_info_memory_l2_cache_fill_bw",
         "MetricGroup": "Mem;MemoryBW",
-        "MetricName": "tma_info_memory_core_l2_cache_fill_bw"
+        "MetricName": "tma_info_memory_core_l2_cache_fill_bw_2t"
     },
     {
         "BriefDescription": "Average per-core data fill bandwidth to the L3 cache [GB / sec]",
-        "MetricExpr": "64 * LONGEST_LAT_CACHE.MISS / 1e9 / duration_time",
+        "MetricExpr": "tma_info_memory_l3_cache_fill_bw",
         "MetricGroup": "Mem;MemoryBW",
-        "MetricName": "tma_info_memory_core_l3_cache_fill_bw"
+        "MetricName": "tma_info_memory_core_l3_cache_fill_bw_2t"
+    },
+    {
+        "BriefDescription": "Average Parallel L2 cache miss data reads",
+        "MetricExpr": "tma_info_memory_latency_data_l2_mlp",
+        "MetricGroup": "Memory_BW;Offcore;TopdownL1;tma_L1_group",
+        "MetricName": "tma_info_memory_data_l2_mlp",
+        "MetricgroupNoGroup": "TopdownL1"
+    },
+    {
+        "BriefDescription": "",
+        "MetricExpr": "64 * L1D.REPLACEMENT / 1e9 / duration_time",
+        "MetricGroup": "Mem;MemoryBW",
+        "MetricName": "tma_info_memory_l1d_cache_fill_bw"
+    },
+    {
+        "BriefDescription": "Average per-core data fill bandwidth to the L1 data cache [GB / sec]",
+        "MetricExpr": "64 * L1D.REPLACEMENT / 1e9 / (duration_time * 1e3 / 1e3)",
+        "MetricGroup": "Mem;MemoryBW;TopdownL1;tma_L1_group",
+        "MetricName": "tma_info_memory_l1d_cache_fill_bw_2t",
+        "MetricgroupNoGroup": "TopdownL1"
     },
     {
         "BriefDescription": "L1 cache true misses per kilo instruction for retired demand loads",
         "MetricExpr": "1e3 * MEM_LOAD_UOPS_RETIRED.L1_MISS / INST_RETIRED.ANY",
-        "MetricGroup": "CacheMisses;Mem",
+        "MetricGroup": "CacheHits;Mem",
         "MetricName": "tma_info_memory_l1mpki"
     },
+    {
+        "BriefDescription": "",
+        "MetricExpr": "64 * L2_LINES_IN.ALL / 1e9 / duration_time",
+        "MetricGroup": "Mem;MemoryBW",
+        "MetricName": "tma_info_memory_l2_cache_fill_bw"
+    },
+    {
+        "BriefDescription": "Average per-core data fill bandwidth to the L2 cache [GB / sec]",
+        "MetricExpr": "64 * L2_LINES_IN.ALL / 1e9 / (duration_time * 1e3 / 1e3)",
+        "MetricGroup": "Mem;MemoryBW;TopdownL1;tma_L1_group",
+        "MetricName": "tma_info_memory_l2_cache_fill_bw_2t",
+        "MetricgroupNoGroup": "TopdownL1"
+    },
     {
         "BriefDescription": "L2 cache hits per kilo instruction for all request types (including speculative)",
         "MetricExpr": "1e3 * (L2_RQSTS.REFERENCES - L2_RQSTS.MISS) / INST_RETIRED.ANY",
-        "MetricGroup": "CacheMisses;Mem",
+        "MetricGroup": "CacheHits;Mem",
         "MetricName": "tma_info_memory_l2hpki_all"
     },
     {
         "BriefDescription": "L2 cache hits per kilo instruction for all demand loads  (including speculative)",
         "MetricExpr": "1e3 * L2_RQSTS.DEMAND_DATA_RD_HIT / INST_RETIRED.ANY",
-        "MetricGroup": "CacheMisses;Mem",
+        "MetricGroup": "CacheHits;Mem",
         "MetricName": "tma_info_memory_l2hpki_load"
     },
     {
         "BriefDescription": "L2 cache true misses per kilo instruction for retired demand loads",
         "MetricExpr": "1e3 * MEM_LOAD_UOPS_RETIRED.L2_MISS / INST_RETIRED.ANY",
-        "MetricGroup": "Backend;CacheMisses;Mem",
+        "MetricGroup": "Backend;CacheHits;Mem",
         "MetricName": "tma_info_memory_l2mpki"
     },
     {
         "BriefDescription": "L2 cache ([RKL+] true) misses per kilo instruction for all request types (including speculative)",
         "MetricExpr": "1e3 * L2_RQSTS.MISS / INST_RETIRED.ANY",
-        "MetricGroup": "CacheMisses;Mem;Offcore",
+        "MetricGroup": "CacheHits;Mem;Offcore",
         "MetricName": "tma_info_memory_l2mpki_all"
     },
     {
         "BriefDescription": "L2 cache ([RKL+] true) misses per kilo instruction for all demand loads  (including speculative)",
         "MetricExpr": "1e3 * L2_RQSTS.DEMAND_DATA_RD_MISS / INST_RETIRED.ANY",
-        "MetricGroup": "CacheMisses;Mem",
+        "MetricGroup": "CacheHits;Mem",
         "MetricName": "tma_info_memory_l2mpki_load"
     },
     {
-        "BriefDescription": "L3 cache true misses per kilo instruction for retired demand loads",
-        "MetricExpr": "1e3 * MEM_LOAD_UOPS_RETIRED.L3_MISS / INST_RETIRED.ANY",
-        "MetricGroup": "CacheMisses;Mem",
-        "MetricName": "tma_info_memory_l3mpki"
+        "BriefDescription": "",
+        "MetricExpr": "64 * LONGEST_LAT_CACHE.MISS / 1e9 / duration_time",
+        "MetricGroup": "Mem;MemoryBW",
+        "MetricName": "tma_info_memory_l3_cache_fill_bw"
     },
     {
-        "BriefDescription": "Actual Average Latency for L1 data-cache miss demand load operations (in core cycles)",
-        "MetricConstraint": "NO_GROUP_EVENTS",
-        "MetricExpr": "L1D_PEND_MISS.PENDING / (MEM_LOAD_UOPS_RETIRED.L1_MISS + MEM_LOAD_UOPS_RETIRED.HIT_LFB)",
-        "MetricGroup": "Mem;MemoryBound;MemoryLat",
-        "MetricName": "tma_info_memory_load_miss_real_latency"
+        "BriefDescription": "Average per-core data fill bandwidth to the L3 cache [GB / sec]",
+        "MetricExpr": "64 * LONGEST_LAT_CACHE.MISS / 1e9 / (duration_time * 1e3 / 1e3)",
+        "MetricGroup": "Mem;MemoryBW;TopdownL1;tma_L1_group",
+        "MetricName": "tma_info_memory_l3_cache_fill_bw_2t",
+        "MetricgroupNoGroup": "TopdownL1"
     },
     {
-        "BriefDescription": "Memory-Level-Parallelism (average number of L1 miss demand load when there is at least one such miss",
-        "MetricConstraint": "NO_GROUP_EVENTS",
-        "MetricExpr": "L1D_PEND_MISS.PENDING / L1D_PEND_MISS.PENDING_CYCLES",
-        "MetricGroup": "Mem;MemoryBW;MemoryBound",
-        "MetricName": "tma_info_memory_mlp",
-        "PublicDescription": "Memory-Level-Parallelism (average number of L1 miss demand load when there is at least one such miss. Per-Logical Processor)"
+        "BriefDescription": "L3 cache true misses per kilo instruction for retired demand loads",
+        "MetricExpr": "1e3 * MEM_LOAD_UOPS_RETIRED.L3_MISS / INST_RETIRED.ANY",
+        "MetricGroup": "Mem",
+        "MetricName": "tma_info_memory_l3mpki"
     },
     {
         "BriefDescription": "Average Parallel L2 cache miss data reads",
         "MetricExpr": "OFFCORE_REQUESTS_OUTSTANDING.ALL_DATA_RD / OFFCORE_REQUESTS_OUTSTANDING.CYCLES_WITH_DATA_RD",
         "MetricGroup": "Memory_BW;Offcore",
-        "MetricName": "tma_info_memory_oro_data_l2_mlp"
+        "MetricName": "tma_info_memory_latency_data_l2_mlp"
     },
     {
         "BriefDescription": "Average Latency for L2 cache miss demand Loads",
-        "MetricExpr": "OFFCORE_REQUESTS_OUTSTANDING.DEMAND_DATA_RD / OFFCORE_REQUESTS.DEMAND_DATA_RD",
+        "MetricExpr": "tma_info_memory_load_l2_miss_latency",
         "MetricGroup": "Memory_Lat;Offcore",
-        "MetricName": "tma_info_memory_oro_load_l2_miss_latency"
+        "MetricName": "tma_info_memory_latency_load_l2_miss_latency"
     },
     {
         "BriefDescription": "Average Parallel L2 cache miss demand Loads",
-        "MetricExpr": "OFFCORE_REQUESTS_OUTSTANDING.DEMAND_DATA_RD / OFFCORE_REQUESTS_OUTSTANDING.CYCLES_WITH_DEMAND_DATA_RD",
+        "MetricExpr": "tma_info_memory_load_l2_mlp",
         "MetricGroup": "Memory_BW;Offcore",
-        "MetricName": "tma_info_memory_oro_load_l2_mlp"
+        "MetricName": "tma_info_memory_latency_load_l2_mlp"
     },
     {
-        "BriefDescription": "Average per-thread data fill bandwidth to the L1 data cache [GB / sec]",
-        "MetricExpr": "tma_info_memory_core_l1d_cache_fill_bw",
-        "MetricGroup": "Mem;MemoryBW",
-        "MetricName": "tma_info_memory_thread_l1d_cache_fill_bw_1t"
+        "BriefDescription": "Average Latency for L2 cache miss demand Loads",
+        "MetricExpr": "OFFCORE_REQUESTS_OUTSTANDING.DEMAND_DATA_RD / OFFCORE_REQUESTS.DEMAND_DATA_RD",
+        "MetricGroup": "Memory_Lat;Offcore;TopdownL1;tma_L1_group",
+        "MetricName": "tma_info_memory_load_l2_miss_latency",
+        "MetricgroupNoGroup": "TopdownL1"
     },
     {
-        "BriefDescription": "Average per-thread data fill bandwidth to the L2 cache [GB / sec]",
-        "MetricExpr": "tma_info_memory_core_l2_cache_fill_bw",
-        "MetricGroup": "Mem;MemoryBW",
-        "MetricName": "tma_info_memory_thread_l2_cache_fill_bw_1t"
+        "BriefDescription": "Average Parallel L2 cache miss demand Loads",
+        "MetricExpr": "OFFCORE_REQUESTS_OUTSTANDING.DEMAND_DATA_RD / OFFCORE_REQUESTS_OUTSTANDING.CYCLES_WITH_DEMAND_DATA_RD",
+        "MetricGroup": "Memory_BW;Offcore;TopdownL1;tma_L1_group",
+        "MetricName": "tma_info_memory_load_l2_mlp",
+        "MetricgroupNoGroup": "TopdownL1"
+    },
+    {
+        "BriefDescription": "Actual Average Latency for L1 data-cache miss demand load operations (in core cycles)",
+        "MetricConstraint": "NO_GROUP_EVENTS",
+        "MetricExpr": "L1D_PEND_MISS.PENDING / (MEM_LOAD_UOPS_RETIRED.L1_MISS + MEM_LOAD_UOPS_RETIRED.HIT_LFB)",
+        "MetricGroup": "Mem;MemoryBound;MemoryLat",
+        "MetricName": "tma_info_memory_load_miss_real_latency"
     },
     {
-        "BriefDescription": "Average per-thread data access bandwidth to the L3 cache [GB / sec]",
-        "MetricExpr": "0",
-        "MetricGroup": "Mem;MemoryBW;Offcore",
-        "MetricName": "tma_info_memory_thread_l3_cache_access_bw_1t"
+        "BriefDescription": "Memory-Level-Parallelism (average number of L1 miss demand load when there is at least one such miss",
+        "MetricConstraint": "NO_GROUP_EVENTS",
+        "MetricExpr": "L1D_PEND_MISS.PENDING / L1D_PEND_MISS.PENDING_CYCLES",
+        "MetricGroup": "Mem;MemoryBW;MemoryBound",
+        "MetricName": "tma_info_memory_mlp",
+        "PublicDescription": "Memory-Level-Parallelism (average number of L1 miss demand load when there is at least one such miss. Per-Logical Processor)"
     },
     {
-        "BriefDescription": "Average per-thread data fill bandwidth to the L3 cache [GB / sec]",
-        "MetricExpr": "tma_info_memory_core_l3_cache_fill_bw",
-        "MetricGroup": "Mem;MemoryBW",
-        "MetricName": "tma_info_memory_thread_l3_cache_fill_bw_1t"
+        "BriefDescription": "Utilization of the core's Page Walker(s) serving STLB misses triggered by instruction/Load/Store accesses",
+        "MetricExpr": "tma_info_memory_tlb_page_walks_utilization",
+        "MetricGroup": "Mem;MemoryTLB;TopdownL1;tma_L1_group",
+        "MetricName": "tma_info_memory_page_walks_utilization",
+        "MetricgroupNoGroup": "TopdownL1"
     },
     {
         "BriefDescription": "Utilization of the core's Page Walker(s) serving STLB misses triggered by instruction/Load/Store accesses",
         "MetricThreshold": "tma_info_memory_tlb_page_walks_utilization > 0.5"
     },
     {
-        "BriefDescription": "Instruction-Level-Parallelism (average number of uops executed when there is execution) per-thread",
-        "MetricExpr": "UOPS_EXECUTED.THREAD / cpu@UOPS_EXECUTED.THREAD\\,cmask\\=1@",
+        "BriefDescription": "",
+        "MetricExpr": "UOPS_EXECUTED.THREAD / (cpu@UOPS_EXECUTED.CORE\\,cmask\\=1@ / 2 if #SMT_on else UOPS_EXECUTED.CYCLES_GE_1_UOP_EXEC)",
         "MetricGroup": "Cor;Pipeline;PortsUtil;SMT",
         "MetricName": "tma_info_pipeline_execute"
     },
         "MetricName": "tma_info_pipeline_retire"
     },
     {
-        "BriefDescription": "Measured Average Frequency for unhalted processors [GHz]",
+        "BriefDescription": "Measured Average Core Frequency for unhalted processors [GHz]",
         "MetricExpr": "tma_info_system_turbo_utilization * TSC / 1e9 / duration_time",
         "MetricGroup": "Power;Summary",
-        "MetricName": "tma_info_system_average_frequency"
+        "MetricName": "tma_info_system_core_frequency"
     },
     {
-        "BriefDescription": "Average CPU Utilization",
+        "BriefDescription": "Average CPU Utilization (percentage)",
         "MetricExpr": "CPU_CLK_UNHALTED.REF_TSC / TSC",
         "MetricGroup": "HPC;Summary",
         "MetricName": "tma_info_system_cpu_utilization"
     },
+    {
+        "BriefDescription": "Average number of utilized CPUs",
+        "MetricExpr": "#num_cpus_online * tma_info_system_cpu_utilization",
+        "MetricGroup": "Summary",
+        "MetricName": "tma_info_system_cpus_utilized"
+    },
     {
         "BriefDescription": "Average external Memory Bandwidth Use for reads and writes [GB / sec]",
         "MetricExpr": "64 * (UNC_M_CAS_COUNT.RD + UNC_M_CAS_COUNT.WR) / 1e9 / duration_time",
-        "MetricGroup": "HPC;Mem;MemoryBW;SoC;tma_issueBW",
+        "MetricGroup": "HPC;MemOffcore;MemoryBW;SoC;tma_issueBW",
         "MetricName": "tma_info_system_dram_bw_use",
         "PublicDescription": "Average external Memory Bandwidth Use for reads and writes [GB / sec]. Related metrics: tma_fb_full, tma_mem_bandwidth, tma_sq_full"
     },
     {
         "BriefDescription": "Giga Floating Point Operations Per Second",
-        "MetricExpr": "(FP_ARITH_INST_RETIRED.SCALAR_SINGLE + FP_ARITH_INST_RETIRED.SCALAR_DOUBLE + 2 * FP_ARITH_INST_RETIRED.128B_PACKED_DOUBLE + 4 * (FP_ARITH_INST_RETIRED.128B_PACKED_SINGLE + FP_ARITH_INST_RETIRED.256B_PACKED_DOUBLE) + 8 * FP_ARITH_INST_RETIRED.256B_PACKED_SINGLE) / 1e9 / duration_time",
+        "MetricExpr": "(FP_ARITH_INST_RETIRED.SCALAR + 2 * FP_ARITH_INST_RETIRED.128B_PACKED_DOUBLE + 4 * FP_ARITH_INST_RETIRED.4_FLOPS + 8 * FP_ARITH_INST_RETIRED.256B_PACKED_SINGLE) / 1e9 / duration_time",
         "MetricGroup": "Cor;Flops;HPC",
         "MetricName": "tma_info_system_gflops",
-        "PublicDescription": "Giga Floating Point Operations Per Second. Aggregate across all supported options of: FP precisions, scalar and vector instructions, vector-width and AMX engine."
+        "PublicDescription": "Giga Floating Point Operations Per Second. Aggregate across all supported options of: FP precisions, scalar and vector instructions, vector-width"
     },
     {
         "BriefDescription": "Instructions per Far Branch ( Far Branches apply upon transition from application to operating system, handling interrupts, exceptions) [lower number means higher occurrence rate]",
         "MetricGroup": "Power",
         "MetricName": "tma_info_system_turbo_utilization"
     },
+    {
+        "BriefDescription": "Measured Average Uncore Frequency for the SoC [GHz]",
+        "MetricExpr": "tma_info_system_socket_clks / 1e9 / duration_time",
+        "MetricGroup": "SoC",
+        "MetricName": "tma_info_system_uncore_frequency"
+    },
     {
         "BriefDescription": "Per-Logical Processor actual clocks when the Logical Processor is active.",
         "MetricExpr": "CPU_CLK_UNHALTED.THREAD",
     {
         "BriefDescription": "This metric represents fraction of cycles the CPU was stalled due to Instruction TLB (ITLB) misses",
         "MetricExpr": "(14 * ITLB_MISSES.STLB_HIT + cpu@ITLB_MISSES.WALK_DURATION\\,cmask\\=1@ + 7 * ITLB_MISSES.WALK_COMPLETED) / tma_info_thread_clks",
-        "MetricGroup": "BigFoot;FetchLat;MemoryTLB;TopdownL3;tma_L3_group;tma_fetch_latency_group",
+        "MetricGroup": "BigFootprint;FetchLat;MemoryTLB;TopdownL3;tma_L3_group;tma_fetch_latency_group",
         "MetricName": "tma_itlb_misses",
         "MetricThreshold": "tma_itlb_misses > 0.05 & (tma_fetch_latency > 0.1 & tma_frontend_bound > 0.15)",
         "PublicDescription": "This metric represents fraction of cycles the CPU was stalled due to Instruction TLB (ITLB) misses. Sample with: ITLB_MISSES.WALK_COMPLETED",
     {
         "BriefDescription": "This metric estimates how often the CPU was stalled without loads missing the L1 data cache",
         "MetricExpr": "max((CYCLE_ACTIVITY.STALLS_MEM_ANY - CYCLE_ACTIVITY.STALLS_L1D_MISS) / tma_info_thread_clks, 0)",
-        "MetricGroup": "CacheMisses;MemoryBound;TmaL3mem;TopdownL3;tma_L3_group;tma_issueL1;tma_issueMC;tma_memory_bound_group",
+        "MetricGroup": "CacheHits;MemoryBound;TmaL3mem;TopdownL3;tma_L3_group;tma_issueL1;tma_issueMC;tma_memory_bound_group",
         "MetricName": "tma_l1_bound",
         "MetricThreshold": "tma_l1_bound > 0.1 & (tma_memory_bound > 0.2 & tma_backend_bound > 0.2)",
         "PublicDescription": "This metric estimates how often the CPU was stalled without loads missing the L1 data cache.  The L1 data cache typically has the shortest latency.  However; in certain cases like loads blocked on older stores; a load might suffer due to high latency even though it is being satisfied by the L1. Another example is loads who miss in the TLB. These cases are characterized by execution unit stalls; while some non-completed demand load lives in the machine without having that demand load missing the L1 cache. Sample with: MEM_LOAD_UOPS_RETIRED.L1_HIT_PS;MEM_LOAD_UOPS_RETIRED.HIT_LFB_PS. Related metrics: tma_clears_resteers, tma_machine_clears, tma_microcode_sequencer, tma_ms_switches, tma_ports_utilized_1",
     {
         "BriefDescription": "This metric estimates how often the CPU was stalled due to L2 cache accesses by loads",
         "MetricExpr": "(CYCLE_ACTIVITY.STALLS_L1D_MISS - CYCLE_ACTIVITY.STALLS_L2_MISS) / tma_info_thread_clks",
-        "MetricGroup": "CacheMisses;MemoryBound;TmaL3mem;TopdownL3;tma_L3_group;tma_memory_bound_group",
+        "MetricGroup": "CacheHits;MemoryBound;TmaL3mem;TopdownL3;tma_L3_group;tma_memory_bound_group",
         "MetricName": "tma_l2_bound",
         "MetricThreshold": "tma_l2_bound > 0.05 & (tma_memory_bound > 0.2 & tma_backend_bound > 0.2)",
         "PublicDescription": "This metric estimates how often the CPU was stalled due to L2 cache accesses by loads.  Avoiding cache misses (i.e. L1 misses/L2 hits) can improve the latency and increase performance. Sample with: MEM_LOAD_UOPS_RETIRED.L2_HIT_PS",
         "BriefDescription": "This metric estimates how often the CPU was stalled due to loads accesses to L3 cache or contended with a sibling Core",
         "MetricConstraint": "NO_GROUP_EVENTS_SMT",
         "MetricExpr": "MEM_LOAD_UOPS_RETIRED.L3_HIT / (MEM_LOAD_UOPS_RETIRED.L3_HIT + 7 * MEM_LOAD_UOPS_RETIRED.L3_MISS) * CYCLE_ACTIVITY.STALLS_L2_MISS / tma_info_thread_clks",
-        "MetricGroup": "CacheMisses;MemoryBound;TmaL3mem;TopdownL3;tma_L3_group;tma_memory_bound_group",
+        "MetricGroup": "CacheHits;MemoryBound;TmaL3mem;TopdownL3;tma_L3_group;tma_memory_bound_group",
         "MetricName": "tma_l3_bound",
         "MetricThreshold": "tma_l3_bound > 0.05 & (tma_memory_bound > 0.2 & tma_backend_bound > 0.2)",
         "PublicDescription": "This metric estimates how often the CPU was stalled due to loads accesses to L3 cache or contended with a sibling Core.  Avoiding cache misses (i.e. L2 misses/L3 hits) can improve the latency and increase performance. Sample with: MEM_LOAD_UOPS_RETIRED.L3_HIT_PS",
         "ScaleUnit": "100%"
     },
     {
-        "BriefDescription": "This metric represents fraction of cycles with demand load accesses that hit the L3 cache under unloaded scenarios (possibly L3 latency limited)",
+        "BriefDescription": "This metric estimates fraction of cycles with demand load accesses that hit the L3 cache under unloaded scenarios (possibly L3 latency limited)",
         "MetricConstraint": "NO_GROUP_EVENTS",
         "MetricExpr": "41 * (MEM_LOAD_UOPS_RETIRED.L3_HIT * (1 + MEM_LOAD_UOPS_RETIRED.HIT_LFB / (MEM_LOAD_UOPS_RETIRED.L2_HIT + MEM_LOAD_UOPS_RETIRED.L3_HIT + MEM_LOAD_UOPS_L3_HIT_RETIRED.XSNP_HIT + MEM_LOAD_UOPS_L3_HIT_RETIRED.XSNP_HITM + MEM_LOAD_UOPS_L3_HIT_RETIRED.XSNP_MISS + MEM_LOAD_UOPS_L3_MISS_RETIRED.LOCAL_DRAM + MEM_LOAD_UOPS_L3_MISS_RETIRED.REMOTE_DRAM + MEM_LOAD_UOPS_L3_MISS_RETIRED.REMOTE_HITM + MEM_LOAD_UOPS_L3_MISS_RETIRED.REMOTE_FWD))) / tma_info_thread_clks",
         "MetricGroup": "MemoryLat;TopdownL4;tma_L4_group;tma_issueLat;tma_l3_bound_group",
         "MetricName": "tma_l3_hit_latency",
         "MetricThreshold": "tma_l3_hit_latency > 0.1 & (tma_l3_bound > 0.05 & (tma_memory_bound > 0.2 & tma_backend_bound > 0.2))",
-        "PublicDescription": "This metric represents fraction of cycles with demand load accesses that hit the L3 cache under unloaded scenarios (possibly L3 latency limited).  Avoiding private cache misses (i.e. L2 misses/L3 hits) will improve the latency; reduce contention with sibling physical cores and increase performance.  Note the value of this node may overlap with its siblings. Sample with: MEM_LOAD_UOPS_RETIRED.L3_HIT_PS. Related metrics: tma_mem_latency",
+        "PublicDescription": "This metric estimates fraction of cycles with demand load accesses that hit the L3 cache under unloaded scenarios (possibly L3 latency limited).  Avoiding private cache misses (i.e. L2 misses/L3 hits) will improve the latency; reduce contention with sibling physical cores and increase performance.  Note the value of this node may overlap with its siblings. Sample with: MEM_LOAD_UOPS_RETIRED.L3_HIT_PS. Related metrics: tma_mem_latency",
         "ScaleUnit": "100%"
     },
     {
         "MetricName": "tma_light_operations",
         "MetricThreshold": "tma_light_operations > 0.6",
         "MetricgroupNoGroup": "TopdownL2",
-        "PublicDescription": "This metric represents fraction of slots where the CPU was retiring light-weight operations -- instructions that require no more than one uop (micro-operation). This correlates with total number of instructions used by the program. A uops-per-instruction (see UopPI metric) ratio of 1 or less should be expected for decently optimized software running on Intel Core/Xeon products. While this often indicates efficient X86 instructions were executed; high value does not necessarily mean better performance cannot be achieved. Sample with: INST_RETIRED.PREC_DIST",
+        "PublicDescription": "This metric represents fraction of slots where the CPU was retiring light-weight operations -- instructions that require no more than one uop (micro-operation). This correlates with total number of instructions used by the program. A uops-per-instruction (see UopPI metric) ratio of 1 or less should be expected for decently optimized code running on Intel Core/Xeon products. While this often indicates efficient X86 instructions were executed; high value does not necessarily mean better performance cannot be achieved. ([ICL+] Note this may undercount due to approximation using indirect events; [ADL+] .). Sample with: INST_RETIRED.PREC_DIST",
         "ScaleUnit": "100%"
     },
     {
     },
     {
         "BriefDescription": "This metric estimates fraction of cycles while the memory subsystem was handling loads from local memory",
-        "MetricConstraint": "NO_GROUP_EVENTS",
         "MetricExpr": "200 * (MEM_LOAD_UOPS_L3_MISS_RETIRED.LOCAL_DRAM * (1 + MEM_LOAD_UOPS_RETIRED.HIT_LFB / (MEM_LOAD_UOPS_RETIRED.L2_HIT + MEM_LOAD_UOPS_RETIRED.L3_HIT + MEM_LOAD_UOPS_L3_HIT_RETIRED.XSNP_HIT + MEM_LOAD_UOPS_L3_HIT_RETIRED.XSNP_HITM + MEM_LOAD_UOPS_L3_HIT_RETIRED.XSNP_MISS + MEM_LOAD_UOPS_L3_MISS_RETIRED.LOCAL_DRAM + MEM_LOAD_UOPS_L3_MISS_RETIRED.REMOTE_DRAM + MEM_LOAD_UOPS_L3_MISS_RETIRED.REMOTE_HITM + MEM_LOAD_UOPS_L3_MISS_RETIRED.REMOTE_FWD))) / tma_info_thread_clks",
         "MetricGroup": "Server;TopdownL5;tma_L5_group;tma_mem_latency_group",
-        "MetricName": "tma_local_dram",
-        "MetricThreshold": "tma_local_dram > 0.1 & (tma_mem_latency > 0.1 & (tma_dram_bound > 0.1 & (tma_memory_bound > 0.2 & tma_backend_bound > 0.2)))",
+        "MetricName": "tma_local_mem",
+        "MetricThreshold": "tma_local_mem > 0.1 & (tma_mem_latency > 0.1 & (tma_dram_bound > 0.1 & (tma_memory_bound > 0.2 & tma_backend_bound > 0.2)))",
         "PublicDescription": "This metric estimates fraction of cycles while the memory subsystem was handling loads from local memory. Caching will improve the latency and increase performance. Sample with: MEM_LOAD_UOPS_L3_MISS_RETIRED.LOCAL_DRAM_PS",
         "ScaleUnit": "100%"
     },
         "ScaleUnit": "100%"
     },
     {
-        "BriefDescription": "This metric estimates fraction of cycles where the core's performance was likely hurt due to approaching bandwidth limits of external memory (DRAM)",
+        "BriefDescription": "This metric estimates fraction of cycles where the core's performance was likely hurt due to approaching bandwidth limits of external memory - DRAM ([SPR-HBM] and/or HBM)",
         "MetricExpr": "min(CPU_CLK_UNHALTED.THREAD, cpu@OFFCORE_REQUESTS_OUTSTANDING.ALL_DATA_RD\\,cmask\\=4@) / tma_info_thread_clks",
         "MetricGroup": "MemoryBW;Offcore;TopdownL4;tma_L4_group;tma_dram_bound_group;tma_issueBW",
         "MetricName": "tma_mem_bandwidth",
         "MetricThreshold": "tma_mem_bandwidth > 0.2 & (tma_dram_bound > 0.1 & (tma_memory_bound > 0.2 & tma_backend_bound > 0.2))",
-        "PublicDescription": "This metric estimates fraction of cycles where the core's performance was likely hurt due to approaching bandwidth limits of external memory (DRAM).  The underlying heuristic assumes that a similar off-core traffic is generated by all IA cores. This metric does not aggregate non-data-read requests by this logical processor; requests from other IA Logical Processors/Physical Cores/sockets; or other non-IA devices like GPU; hence the maximum external memory bandwidth limits may or may not be approached when this metric is flagged (see Uncore counters for that). Related metrics: tma_fb_full, tma_info_system_dram_bw_use, tma_sq_full",
+        "PublicDescription": "This metric estimates fraction of cycles where the core's performance was likely hurt due to approaching bandwidth limits of external memory - DRAM ([SPR-HBM] and/or HBM).  The underlying heuristic assumes that a similar off-core traffic is generated by all IA cores. This metric does not aggregate non-data-read requests by this logical processor; requests from other IA Logical Processors/Physical Cores/sockets; or other non-IA devices like GPU; hence the maximum external memory bandwidth limits may or may not be approached when this metric is flagged (see Uncore counters for that). Related metrics: tma_fb_full, tma_info_system_dram_bw_use, tma_sq_full",
         "ScaleUnit": "100%"
     },
     {
-        "BriefDescription": "This metric estimates fraction of cycles where the performance was likely hurt due to latency from external memory (DRAM)",
+        "BriefDescription": "This metric estimates fraction of cycles where the performance was likely hurt due to latency from external memory - DRAM ([SPR-HBM] and/or HBM)",
         "MetricExpr": "min(CPU_CLK_UNHALTED.THREAD, OFFCORE_REQUESTS_OUTSTANDING.CYCLES_WITH_DATA_RD) / tma_info_thread_clks - tma_mem_bandwidth",
         "MetricGroup": "MemoryLat;Offcore;TopdownL4;tma_L4_group;tma_dram_bound_group;tma_issueLat",
         "MetricName": "tma_mem_latency",
         "MetricThreshold": "tma_mem_latency > 0.1 & (tma_dram_bound > 0.1 & (tma_memory_bound > 0.2 & tma_backend_bound > 0.2))",
-        "PublicDescription": "This metric estimates fraction of cycles where the performance was likely hurt due to latency from external memory (DRAM).  This metric does not aggregate requests from other Logical Processors/Physical Cores/sockets (see Uncore counters for that). Related metrics: tma_l3_hit_latency",
+        "PublicDescription": "This metric estimates fraction of cycles where the performance was likely hurt due to latency from external memory - DRAM ([SPR-HBM] and/or HBM).  This metric does not aggregate requests from other Logical Processors/Physical Cores/sockets (see Uncore counters for that). Related metrics: tma_l3_hit_latency",
         "ScaleUnit": "100%"
     },
     {
         "MetricExpr": "(IDQ.ALL_MITE_CYCLES_ANY_UOPS - IDQ.ALL_MITE_CYCLES_4_UOPS) / tma_info_core_core_clks / 2",
         "MetricGroup": "DSBmiss;FetchBW;TopdownL3;tma_L3_group;tma_fetch_bandwidth_group",
         "MetricName": "tma_mite",
-        "MetricThreshold": "tma_mite > 0.1 & (tma_fetch_bandwidth > 0.1 & tma_frontend_bound > 0.15 & tma_info_thread_ipc / 4 > 0.35)",
+        "MetricThreshold": "tma_mite > 0.1 & tma_fetch_bandwidth > 0.2",
         "PublicDescription": "This metric represents Core fraction of cycles in which CPU was likely limited due to the MITE pipeline (the legacy decode pipeline). This pipeline is used for code that was not pre-cached in the DSB or LSD. For example; inefficiencies due to asymmetric decoders; use of long immediate or LCP can manifest as MITE fetch bandwidth bottleneck.",
         "ScaleUnit": "100%"
     },
         "ScaleUnit": "100%"
     },
     {
-        "BriefDescription": "This metric represents Core fraction of cycles CPU dispatched uops on execution port 6 ([HSW+]Primary Branch and simple ALU)",
+        "BriefDescription": "This metric represents Core fraction of cycles CPU dispatched uops on execution port 6 ([HSW+] Primary Branch and simple ALU)",
         "MetricExpr": "UOPS_DISPATCHED_PORT.PORT_6 / tma_info_core_core_clks",
         "MetricGroup": "TopdownL6;tma_L6_group;tma_alu_op_utilization_group;tma_issue2P",
         "MetricName": "tma_port_6",
         "MetricThreshold": "tma_port_6 > 0.6",
-        "PublicDescription": "This metric represents Core fraction of cycles CPU dispatched uops on execution port 6 ([HSW+]Primary Branch and simple ALU). Sample with: UOPS_DISPATCHED_PORT.PORT_6. Related metrics: tma_fp_scalar, tma_fp_vector, tma_fp_vector_128b, tma_fp_vector_256b, tma_fp_vector_512b, tma_port_0, tma_port_1, tma_port_5, tma_ports_utilized_2",
+        "PublicDescription": "This metric represents Core fraction of cycles CPU dispatched uops on execution port 6 ([HSW+] Primary Branch and simple ALU). Sample with: UOPS_DISPATCHED_PORT.PORT_6. Related metrics: tma_fp_scalar, tma_fp_vector, tma_fp_vector_128b, tma_fp_vector_256b, tma_fp_vector_512b, tma_port_0, tma_port_1, tma_port_5, tma_ports_utilized_2",
         "ScaleUnit": "100%"
     },
     {
         "MetricExpr": "(cpu@UOPS_EXECUTED.CORE\\,cmask\\=3@ / 2 if #SMT_on else UOPS_EXECUTED.CYCLES_GE_3_UOPS_EXEC) / tma_info_core_core_clks",
         "MetricGroup": "PortsUtil;TopdownL4;tma_L4_group;tma_ports_utilization_group",
         "MetricName": "tma_ports_utilized_3m",
-        "MetricThreshold": "tma_ports_utilized_3m > 0.7 & (tma_ports_utilization > 0.15 & (tma_core_bound > 0.1 & tma_backend_bound > 0.2))",
+        "MetricThreshold": "tma_ports_utilized_3m > 0.4 & (tma_ports_utilization > 0.15 & (tma_core_bound > 0.1 & tma_backend_bound > 0.2))",
         "ScaleUnit": "100%"
     },
     {
     },
     {
         "BriefDescription": "This metric estimates fraction of cycles while the memory subsystem was handling loads from remote memory",
-        "MetricConstraint": "NO_GROUP_EVENTS",
         "MetricExpr": "310 * (MEM_LOAD_UOPS_L3_MISS_RETIRED.REMOTE_DRAM * (1 + MEM_LOAD_UOPS_RETIRED.HIT_LFB / (MEM_LOAD_UOPS_RETIRED.L2_HIT + MEM_LOAD_UOPS_RETIRED.L3_HIT + MEM_LOAD_UOPS_L3_HIT_RETIRED.XSNP_HIT + MEM_LOAD_UOPS_L3_HIT_RETIRED.XSNP_HITM + MEM_LOAD_UOPS_L3_HIT_RETIRED.XSNP_MISS + MEM_LOAD_UOPS_L3_MISS_RETIRED.LOCAL_DRAM + MEM_LOAD_UOPS_L3_MISS_RETIRED.REMOTE_DRAM + MEM_LOAD_UOPS_L3_MISS_RETIRED.REMOTE_HITM + MEM_LOAD_UOPS_L3_MISS_RETIRED.REMOTE_FWD))) / tma_info_thread_clks",
         "MetricGroup": "Server;Snoop;TopdownL5;tma_L5_group;tma_mem_latency_group",
-        "MetricName": "tma_remote_dram",
-        "MetricThreshold": "tma_remote_dram > 0.1 & (tma_mem_latency > 0.1 & (tma_dram_bound > 0.1 & (tma_memory_bound > 0.2 & tma_backend_bound > 0.2)))",
+        "MetricName": "tma_remote_mem",
+        "MetricThreshold": "tma_remote_mem > 0.1 & (tma_mem_latency > 0.1 & (tma_dram_bound > 0.1 & (tma_memory_bound > 0.2 & tma_backend_bound > 0.2)))",
         "PublicDescription": "This metric estimates fraction of cycles while the memory subsystem was handling loads from remote memory. This is caused often due to non-optimal NUMA allocations. #link to NUMA article. Sample with: MEM_LOAD_UOPS_L3_MISS_RETIRED.REMOTE_DRAM_PS",
         "ScaleUnit": "100%"
     },
     {
         "BriefDescription": "This metric represents fraction of cycles the CPU was stalled due to new branch address clears",
         "MetricExpr": "tma_branch_resteers - tma_mispredicts_resteers - tma_clears_resteers",
-        "MetricGroup": "BigFoot;FetchLat;TopdownL4;tma_L4_group;tma_branch_resteers_group",
+        "MetricGroup": "BigFootprint;FetchLat;TopdownL4;tma_L4_group;tma_branch_resteers_group",
         "MetricName": "tma_unknown_branches",
         "MetricThreshold": "tma_unknown_branches > 0.05 & (tma_branch_resteers > 0.05 & (tma_fetch_latency > 0.1 & tma_frontend_bound > 0.15))",
-        "PublicDescription": "This metric represents fraction of cycles the CPU was stalled due to new branch address clears. These are fetched branches the Branch Prediction Unit was unable to recognize (e.g. first time the branch is fetched or hitting BTB capacity limit). Sample with: BACLEARS.ANY",
+        "PublicDescription": "This metric represents fraction of cycles the CPU was stalled due to new branch address clears. These are fetched branches the Branch Prediction Unit was unable to recognize (e.g. first time the branch is fetched or hitting BTB capacity limit) hence called Unknown Branches. Sample with: BACLEARS.ANY",
         "ScaleUnit": "100%"
     },
     {
index f6a0258e3241236b97c48a6098503f40bd12e5dd..8c808347f6da4e8382af6a780730636fadcdb914 100644 (file)
@@ -2,10 +2,10 @@
     "Backend": "Grouping from Top-down Microarchitecture Analysis Metrics spreadsheet",
     "Bad": "Grouping from Top-down Microarchitecture Analysis Metrics spreadsheet",
     "BadSpec": "Grouping from Top-down Microarchitecture Analysis Metrics spreadsheet",
-    "BigFoot": "Grouping from Top-down Microarchitecture Analysis Metrics spreadsheet",
+    "BigFootprint": "Grouping from Top-down Microarchitecture Analysis Metrics spreadsheet",
     "BrMispredicts": "Grouping from Top-down Microarchitecture Analysis Metrics spreadsheet",
     "Branches": "Grouping from Top-down Microarchitecture Analysis Metrics spreadsheet",
-    "CacheMisses": "Grouping from Top-down Microarchitecture Analysis Metrics spreadsheet",
+    "CacheHits": "Grouping from Top-down Microarchitecture Analysis Metrics spreadsheet",
     "Compute": "Grouping from Top-down Microarchitecture Analysis Metrics spreadsheet",
     "Cor": "Grouping from Top-down Microarchitecture Analysis Metrics spreadsheet",
     "DSB": "Grouping from Top-down Microarchitecture Analysis Metrics spreadsheet",
@@ -24,7 +24,9 @@
     "L2Evicts": "Grouping from Top-down Microarchitecture Analysis Metrics spreadsheet",
     "LSD": "Grouping from Top-down Microarchitecture Analysis Metrics spreadsheet",
     "MachineClears": "Grouping from Top-down Microarchitecture Analysis Metrics spreadsheet",
+    "Machine_Clears": "Grouping from Top-down Microarchitecture Analysis Metrics spreadsheet",
     "Mem": "Grouping from Top-down Microarchitecture Analysis Metrics spreadsheet",
+    "MemOffcore": "Grouping from Top-down Microarchitecture Analysis Metrics spreadsheet",
     "MemoryBW": "Grouping from Top-down Microarchitecture Analysis Metrics spreadsheet",
     "MemoryBound": "Grouping from Top-down Microarchitecture Analysis Metrics spreadsheet",
     "MemoryLat": "Grouping from Top-down Microarchitecture Analysis Metrics spreadsheet",
@@ -94,6 +96,7 @@
     "tma_l3_bound_group": "Metrics contributing to tma_l3_bound category",
     "tma_light_operations_group": "Metrics contributing to tma_light_operations category",
     "tma_load_op_utilization_group": "Metrics contributing to tma_load_op_utilization category",
+    "tma_machine_clears_group": "Metrics contributing to tma_machine_clears category",
     "tma_mem_latency_group": "Metrics contributing to tma_mem_latency category",
     "tma_memory_bound_group": "Metrics contributing to tma_memory_bound category",
     "tma_microcode_sequencer_group": "Metrics contributing to tma_microcode_sequencer category",
index 83d20130c21738b16b44e329f81e4b5000a09231..320aaab53a0ba49d0592de5cd6904a23c0240335 100644 (file)
         "BriefDescription": "Number of cores in C-State; C0 and C1",
         "EventCode": "0x80",
         "EventName": "UNC_P_POWER_STATE_OCCUPANCY.CORES_C0",
+        "Filter": "occ_sel=1",
         "PerPkg": "1",
         "PublicDescription": "This is an occupancy event that tracks the number of cores that are in the chosen C-State.  It can be used by itself to get the average number of cores in that C-state with thresholding to generate histograms, or with other PCU events and occupancy triggering to capture other details.",
         "Unit": "PCU"
         "BriefDescription": "Number of cores in C-State; C3",
         "EventCode": "0x80",
         "EventName": "UNC_P_POWER_STATE_OCCUPANCY.CORES_C3",
+        "Filter": "occ_sel=2",
         "PerPkg": "1",
         "PublicDescription": "This is an occupancy event that tracks the number of cores that are in the chosen C-State.  It can be used by itself to get the average number of cores in that C-state with thresholding to generate histograms, or with other PCU events and occupancy triggering to capture other details.",
         "Unit": "PCU"
         "BriefDescription": "Number of cores in C-State; C6 and C7",
         "EventCode": "0x80",
         "EventName": "UNC_P_POWER_STATE_OCCUPANCY.CORES_C6",
+        "Filter": "occ_sel=3",
         "PerPkg": "1",
         "PublicDescription": "This is an occupancy event that tracks the number of cores that are in the chosen C-State.  It can be used by itself to get the average number of cores in that C-state with thresholding to generate histograms, or with other PCU events and occupancy triggering to capture other details.",
         "Unit": "PCU"
index 8bc6c07078566e0f2baa317041fa45dc95c0c60f..7f88b156f73b6f08ec35101a69491471b98c31bc 100644 (file)
         "MetricName": "llc_miss_remote_memory_bandwidth_read",
         "ScaleUnit": "1MB/s"
     },
+    {
+        "BriefDescription": "Bandwidth (MB/sec) of write requests that miss the last level cache (LLC) and go to remote memory.",
+        "MetricExpr": "UNC_CHA_REQUESTS.WRITES_REMOTE * 64 / 1e6 / duration_time",
+        "MetricName": "llc_miss_remote_memory_bandwidth_write",
+        "ScaleUnit": "1MB/s"
+    },
     {
         "BriefDescription": "The ratio of number of completed memory load instructions to the total number completed instructions",
         "MetricExpr": "MEM_INST_RETIRED.ALL_LOADS / INST_RETIRED.ANY",
         "MetricExpr": "(UOPS_DISPATCHED_PORT.PORT_0 + UOPS_DISPATCHED_PORT.PORT_1 + UOPS_DISPATCHED_PORT.PORT_5 + UOPS_DISPATCHED_PORT.PORT_6) / tma_info_thread_slots",
         "MetricGroup": "TopdownL5;tma_L5_group;tma_ports_utilized_3m_group",
         "MetricName": "tma_alu_op_utilization",
-        "MetricThreshold": "tma_alu_op_utilization > 0.6",
+        "MetricThreshold": "tma_alu_op_utilization > 0.4",
         "ScaleUnit": "100%"
     },
     {
         "BriefDescription": "This metric estimates fraction of slots the CPU retired uops delivered by the Microcode_Sequencer as a result of Assists",
-        "MetricExpr": "100 * (FP_ASSIST.ANY + OTHER_ASSISTS.ANY) / tma_info_thread_slots",
+        "MetricExpr": "34 * (FP_ASSIST.ANY + OTHER_ASSISTS.ANY) / tma_info_thread_slots",
         "MetricGroup": "TopdownL4;tma_L4_group;tma_microcode_sequencer_group",
         "MetricName": "tma_assists",
         "MetricThreshold": "tma_assists > 0.1 & (tma_microcode_sequencer > 0.05 & tma_heavy_operations > 0.1)",
     {
         "BriefDescription": "This metric estimates fraction of cycles while the memory subsystem was handling synchronizations due to contested accesses",
         "MetricConstraint": "NO_GROUP_EVENTS",
-        "MetricExpr": "(44 * tma_info_system_average_frequency * (MEM_LOAD_L3_HIT_RETIRED.XSNP_HITM * (OCR.DEMAND_DATA_RD.L3_HIT.HITM_OTHER_CORE / (OCR.DEMAND_DATA_RD.L3_HIT.HITM_OTHER_CORE + OCR.DEMAND_DATA_RD.L3_HIT.HIT_OTHER_CORE_FWD))) + 44 * tma_info_system_average_frequency * MEM_LOAD_L3_HIT_RETIRED.XSNP_MISS) * (1 + MEM_LOAD_RETIRED.FB_HIT / MEM_LOAD_RETIRED.L1_MISS / 2) / tma_info_thread_clks",
+        "MetricExpr": "(44 * tma_info_system_core_frequency * (MEM_LOAD_L3_HIT_RETIRED.XSNP_HITM * (OCR.DEMAND_DATA_RD.L3_HIT.HITM_OTHER_CORE / (OCR.DEMAND_DATA_RD.L3_HIT.HITM_OTHER_CORE + OCR.DEMAND_DATA_RD.L3_HIT.HIT_OTHER_CORE_FWD))) + 44 * tma_info_system_core_frequency * MEM_LOAD_L3_HIT_RETIRED.XSNP_MISS) * (1 + MEM_LOAD_RETIRED.FB_HIT / MEM_LOAD_RETIRED.L1_MISS / 2) / tma_info_thread_clks",
         "MetricGroup": "DataSharing;Offcore;Snoop;TopdownL4;tma_L4_group;tma_issueSyncxn;tma_l3_bound_group",
         "MetricName": "tma_contested_accesses",
         "MetricThreshold": "tma_contested_accesses > 0.05 & (tma_l3_bound > 0.05 & (tma_memory_bound > 0.2 & tma_backend_bound > 0.2))",
     {
         "BriefDescription": "This metric estimates fraction of cycles while the memory subsystem was handling synchronizations due to data-sharing accesses",
         "MetricConstraint": "NO_GROUP_EVENTS",
-        "MetricExpr": "44 * tma_info_system_average_frequency * (MEM_LOAD_L3_HIT_RETIRED.XSNP_HIT + MEM_LOAD_L3_HIT_RETIRED.XSNP_HITM * (1 - OCR.DEMAND_DATA_RD.L3_HIT.HITM_OTHER_CORE / (OCR.DEMAND_DATA_RD.L3_HIT.HITM_OTHER_CORE + OCR.DEMAND_DATA_RD.L3_HIT.HIT_OTHER_CORE_FWD))) * (1 + MEM_LOAD_RETIRED.FB_HIT / MEM_LOAD_RETIRED.L1_MISS / 2) / tma_info_thread_clks",
+        "MetricExpr": "44 * tma_info_system_core_frequency * (MEM_LOAD_L3_HIT_RETIRED.XSNP_HIT + MEM_LOAD_L3_HIT_RETIRED.XSNP_HITM * (1 - OCR.DEMAND_DATA_RD.L3_HIT.HITM_OTHER_CORE / (OCR.DEMAND_DATA_RD.L3_HIT.HITM_OTHER_CORE + OCR.DEMAND_DATA_RD.L3_HIT.HIT_OTHER_CORE_FWD))) * (1 + MEM_LOAD_RETIRED.FB_HIT / MEM_LOAD_RETIRED.L1_MISS / 2) / tma_info_thread_clks",
         "MetricGroup": "Offcore;Snoop;TopdownL4;tma_L4_group;tma_issueSyncxn;tma_l3_bound_group",
         "MetricName": "tma_data_sharing",
         "MetricThreshold": "tma_data_sharing > 0.05 & (tma_l3_bound > 0.05 & (tma_memory_bound > 0.2 & tma_backend_bound > 0.2))",
         "MetricExpr": "(cpu@INST_DECODED.DECODERS\\,cmask\\=1@ - cpu@INST_DECODED.DECODERS\\,cmask\\=2@) / tma_info_core_core_clks / 2",
         "MetricGroup": "DSBmiss;FetchBW;TopdownL4;tma_L4_group;tma_issueD0;tma_mite_group",
         "MetricName": "tma_decoder0_alone",
-        "MetricThreshold": "tma_decoder0_alone > 0.1 & (tma_mite > 0.1 & (tma_fetch_bandwidth > 0.1 & tma_frontend_bound > 0.15 & tma_info_thread_ipc / 4 > 0.35))",
+        "MetricThreshold": "tma_decoder0_alone > 0.1 & (tma_mite > 0.1 & tma_fetch_bandwidth > 0.2)",
         "PublicDescription": "This metric represents fraction of cycles where decoder-0 was the only active decoder. Related metrics: tma_few_uops_instructions",
         "ScaleUnit": "100%"
     },
     },
     {
         "BriefDescription": "This metric represents Core fraction of cycles in which CPU was likely limited due to DSB (decoded uop cache) fetch pipeline",
-        "MetricExpr": "(IDQ.ALL_DSB_CYCLES_ANY_UOPS - IDQ.ALL_DSB_CYCLES_4_UOPS) / tma_info_core_core_clks / 2",
+        "MetricExpr": "(IDQ.DSB_CYCLES_ANY - IDQ.DSB_CYCLES_OK) / tma_info_core_core_clks / 2",
         "MetricGroup": "DSB;FetchBW;TopdownL3;tma_L3_group;tma_fetch_bandwidth_group",
         "MetricName": "tma_dsb",
-        "MetricThreshold": "tma_dsb > 0.15 & (tma_fetch_bandwidth > 0.1 & tma_frontend_bound > 0.15 & tma_info_thread_ipc / 4 > 0.35)",
+        "MetricThreshold": "tma_dsb > 0.15 & tma_fetch_bandwidth > 0.2",
         "PublicDescription": "This metric represents Core fraction of cycles in which CPU was likely limited due to DSB (decoded uop cache) fetch pipeline.  For example; inefficient utilization of the DSB cache structure or bank conflict when reading from it; are categorized here.",
         "ScaleUnit": "100%"
     },
         "MetricGroup": "MemoryTLB;TopdownL4;tma_L4_group;tma_issueTLB;tma_l1_bound_group",
         "MetricName": "tma_dtlb_load",
         "MetricThreshold": "tma_dtlb_load > 0.1 & (tma_l1_bound > 0.1 & (tma_memory_bound > 0.2 & tma_backend_bound > 0.2))",
-        "PublicDescription": "This metric roughly estimates the fraction of cycles where the Data TLB (DTLB) was missed by load accesses. TLBs (Translation Look-aside Buffers) are processor caches for recently used entries out of the Page Tables that are used to map virtual- to physical-addresses by the operating system. This metric approximates the potential delay of demand loads missing the first-level data TLB (assuming worst case scenario with back to back misses to different pages). This includes hitting in the second-level TLB (STLB) as well as performing a hardware page walk on an STLB miss. Sample with: MEM_INST_RETIRED.STLB_MISS_LOADS_PS. Related metrics: tma_dtlb_store, tma_info_bottleneck_memory_data_tlbs",
+        "PublicDescription": "This metric roughly estimates the fraction of cycles where the Data TLB (DTLB) was missed by load accesses. TLBs (Translation Look-aside Buffers) are processor caches for recently used entries out of the Page Tables that are used to map virtual- to physical-addresses by the operating system. This metric approximates the potential delay of demand loads missing the first-level data TLB (assuming worst case scenario with back to back misses to different pages). This includes hitting in the second-level TLB (STLB) as well as performing a hardware page walk on an STLB miss. Sample with: MEM_INST_RETIRED.STLB_MISS_LOADS_PS. Related metrics: tma_dtlb_store, tma_info_bottleneck_memory_data_tlbs, tma_info_bottleneck_memory_synchronization",
         "ScaleUnit": "100%"
     },
     {
         "MetricGroup": "MemoryTLB;TopdownL4;tma_L4_group;tma_issueTLB;tma_store_bound_group",
         "MetricName": "tma_dtlb_store",
         "MetricThreshold": "tma_dtlb_store > 0.05 & (tma_store_bound > 0.2 & (tma_memory_bound > 0.2 & tma_backend_bound > 0.2))",
-        "PublicDescription": "This metric roughly estimates the fraction of cycles spent handling first-level data TLB store misses.  As with ordinary data caching; focus on improving data locality and reducing working-set size to reduce DTLB overhead.  Additionally; consider using profile-guided optimization (PGO) to collocate frequently-used data on the same page.  Try using larger page sizes for large amounts of frequently-used data. Sample with: MEM_INST_RETIRED.STLB_MISS_STORES_PS. Related metrics: tma_dtlb_load, tma_info_bottleneck_memory_data_tlbs",
+        "PublicDescription": "This metric roughly estimates the fraction of cycles spent handling first-level data TLB store misses.  As with ordinary data caching; focus on improving data locality and reducing working-set size to reduce DTLB overhead.  Additionally; consider using profile-guided optimization (PGO) to collocate frequently-used data on the same page.  Try using larger page sizes for large amounts of frequently-used data. Sample with: MEM_INST_RETIRED.STLB_MISS_STORES_PS. Related metrics: tma_dtlb_load, tma_info_bottleneck_memory_data_tlbs, tma_info_bottleneck_memory_synchronization",
         "ScaleUnit": "100%"
     },
     {
         "BriefDescription": "This metric roughly estimates how often CPU was handling synchronizations due to False Sharing",
         "MetricConstraint": "NO_GROUP_EVENTS",
-        "MetricExpr": "(110 * tma_info_system_average_frequency * (OCR.DEMAND_RFO.L3_MISS.REMOTE_HITM + OCR.PF_L2_RFO.L3_MISS.REMOTE_HITM) + 47.5 * tma_info_system_average_frequency * (OCR.DEMAND_RFO.L3_HIT.HITM_OTHER_CORE + OCR.PF_L2_RFO.L3_HIT.HITM_OTHER_CORE)) / tma_info_thread_clks",
+        "MetricExpr": "(110 * tma_info_system_core_frequency * (OCR.DEMAND_RFO.L3_MISS.REMOTE_HITM + OCR.PF_L2_RFO.L3_MISS.REMOTE_HITM) + 47.5 * tma_info_system_core_frequency * (OCR.DEMAND_RFO.L3_HIT.HITM_OTHER_CORE + OCR.PF_L2_RFO.L3_HIT.HITM_OTHER_CORE)) / tma_info_thread_clks",
         "MetricGroup": "DataSharing;Offcore;Snoop;TopdownL4;tma_L4_group;tma_issueSyncxn;tma_store_bound_group",
         "MetricName": "tma_false_sharing",
         "MetricThreshold": "tma_false_sharing > 0.05 & (tma_store_bound > 0.2 & (tma_memory_bound > 0.2 & tma_backend_bound > 0.2))",
         "MetricGroup": "MemoryBW;TopdownL4;tma_L4_group;tma_issueBW;tma_issueSL;tma_issueSmSt;tma_l1_bound_group",
         "MetricName": "tma_fb_full",
         "MetricThreshold": "tma_fb_full > 0.3",
-        "PublicDescription": "This metric does a *rough estimation* of how often L1D Fill Buffer unavailability limited additional L1D miss memory access requests to proceed. The higher the metric value; the deeper the memory hierarchy level the misses are satisfied from (metric values >1 are valid). Often it hints on approaching bandwidth limits (to L2 cache; L3 cache or external memory). Related metrics: tma_info_bottleneck_memory_bandwidth, tma_info_system_dram_bw_use, tma_mem_bandwidth, tma_sq_full, tma_store_latency, tma_streaming_stores",
+        "PublicDescription": "This metric does a *rough estimation* of how often L1D Fill Buffer unavailability limited additional L1D miss memory access requests to proceed. The higher the metric value; the deeper the memory hierarchy level the misses are satisfied from (metric values >1 are valid). Often it hints on approaching bandwidth limits (to L2 cache; L3 cache or external memory). Related metrics: tma_info_bottleneck_cache_memory_bandwidth, tma_info_system_dram_bw_use, tma_mem_bandwidth, tma_sq_full, tma_store_latency, tma_streaming_stores",
         "ScaleUnit": "100%"
     },
     {
         "MetricExpr": "tma_frontend_bound - tma_fetch_latency",
         "MetricGroup": "FetchBW;Frontend;TmaL2;TopdownL2;tma_L2_group;tma_frontend_bound_group;tma_issueFB",
         "MetricName": "tma_fetch_bandwidth",
-        "MetricThreshold": "tma_fetch_bandwidth > 0.1 & tma_frontend_bound > 0.15 & tma_info_thread_ipc / 4 > 0.35",
+        "MetricThreshold": "tma_fetch_bandwidth > 0.2",
         "MetricgroupNoGroup": "TopdownL2",
         "PublicDescription": "This metric represents fraction of slots the CPU was stalled due to Frontend bandwidth issues.  For example; inefficiencies at the instruction decoders; or restrictions for caching in the DSB (decoded uops cache) are categorized under Fetch Bandwidth. In such cases; the Frontend typically delivers suboptimal amount of uops to the Backend. Sample with: FRONTEND_RETIRED.LATENCY_GE_2_BUBBLES_GE_1_PS;FRONTEND_RETIRED.LATENCY_GE_1_PS;FRONTEND_RETIRED.LATENCY_GE_2_PS. Related metrics: tma_dsb_switches, tma_info_botlnk_l2_dsb_misses, tma_info_frontend_dsb_coverage, tma_info_inst_mix_iptb, tma_lcp",
         "ScaleUnit": "100%"
         "PublicDescription": "This metric represents overall arithmetic floating-point (FP) operations fraction the CPU has executed (retired). Note this metric's value may exceed its parent due to use of \"Uops\" CountDomain and FMA double-counting.",
         "ScaleUnit": "100%"
     },
+    {
+        "BriefDescription": "This metric roughly estimates fraction of slots the CPU retired uops as a result of handing Floating Point (FP) Assists",
+        "MetricExpr": "34 * FP_ASSIST.ANY / tma_info_thread_slots",
+        "MetricGroup": "HPC;TopdownL5;tma_L5_group;tma_assists_group",
+        "MetricName": "tma_fp_assists",
+        "MetricThreshold": "tma_fp_assists > 0.1",
+        "PublicDescription": "This metric roughly estimates fraction of slots the CPU retired uops as a result of handing Floating Point (FP) Assists. FP Assist may apply when working with very small floating point values (so-called Denormals).",
+        "ScaleUnit": "100%"
+    },
     {
         "BriefDescription": "This metric approximates arithmetic floating-point (FP) scalar uops fraction the CPU has retired",
         "MetricExpr": "cpu@FP_ARITH_INST_RETIRED.SCALAR_SINGLE\\,umask\\=0x03@ / UOPS_RETIRED.RETIRE_SLOTS",
     {
         "BriefDescription": "This metric represents fraction of slots where the CPU was retiring fused instructions -- where one uop can represent multiple contiguous instructions",
         "MetricExpr": "tma_light_operations * UOPS_RETIRED.MACRO_FUSED / UOPS_RETIRED.RETIRE_SLOTS",
-        "MetricGroup": "Pipeline;TopdownL3;tma_L3_group;tma_light_operations_group",
+        "MetricGroup": "Branches;Pipeline;TopdownL3;tma_L3_group;tma_light_operations_group",
         "MetricName": "tma_fused_instructions",
         "MetricThreshold": "tma_fused_instructions > 0.1 & tma_light_operations > 0.6",
-        "PublicDescription": "This metric represents fraction of slots where the CPU was retiring fused instructions -- where one uop can represent multiple contiguous instructions. The instruction pairs of CMP+JCC or DEC+JCC are commonly used examples.",
+        "PublicDescription": "This metric represents fraction of slots where the CPU was retiring fused instructions -- where one uop can represent multiple contiguous instructions. CMP+JCC or DEC+JCC are common examples of legacy fusions. {([MTL] Note new MOV+OP and Load+OP fusions appear under Other_Light_Ops in MTL!)}",
         "ScaleUnit": "100%"
     },
     {
         "MetricName": "tma_heavy_operations",
         "MetricThreshold": "tma_heavy_operations > 0.1",
         "MetricgroupNoGroup": "TopdownL2",
-        "PublicDescription": "This metric represents fraction of slots where the CPU was retiring heavy-weight operations -- instructions that require two or more uops or micro-coded sequences. This highly-correlates with the uop length of these instructions/sequences.",
+        "PublicDescription": "This metric represents fraction of slots where the CPU was retiring heavy-weight operations -- instructions that require two or more uops or micro-coded sequences. This highly-correlates with the uop length of these instructions/sequences. ([ICL+] Note this may overcount due to approximation using indirect events; [ADL+] .)",
         "ScaleUnit": "100%"
     },
     {
         "BriefDescription": "This metric represents fraction of cycles the CPU was stalled due to instruction cache misses",
         "MetricExpr": "(ICACHE_16B.IFDATA_STALL + 2 * cpu@ICACHE_16B.IFDATA_STALL\\,cmask\\=1\\,edge@) / tma_info_thread_clks",
-        "MetricGroup": "BigFoot;FetchLat;IcMiss;TopdownL3;tma_L3_group;tma_fetch_latency_group",
+        "MetricGroup": "BigFootprint;FetchLat;IcMiss;TopdownL3;tma_L3_group;tma_fetch_latency_group",
         "MetricName": "tma_icache_misses",
         "MetricThreshold": "tma_icache_misses > 0.05 & (tma_fetch_latency > 0.1 & tma_frontend_bound > 0.15)",
         "PublicDescription": "This metric represents fraction of cycles the CPU was stalled due to instruction cache misses. Sample with: FRONTEND_RETIRED.L2_MISS_PS;FRONTEND_RETIRED.L1I_MISS_PS",
     },
     {
         "BriefDescription": "Branch Misprediction Cost: Fraction of TMA slots wasted per non-speculative branch misprediction (retired JEClear)",
-        "MetricExpr": "(tma_branch_mispredicts + tma_fetch_latency * tma_mispredicts_resteers / (tma_branch_resteers + tma_dsb_switches + tma_icache_misses + tma_itlb_misses + tma_lcp + tma_ms_switches)) * tma_info_thread_slots / BR_MISP_RETIRED.ALL_BRANCHES",
+        "MetricExpr": "tma_info_bottleneck_mispredictions * tma_info_thread_slots / BR_MISP_RETIRED.ALL_BRANCHES / 100",
         "MetricGroup": "Bad;BrMispredicts;tma_issueBM",
         "MetricName": "tma_info_bad_spec_branch_misprediction_cost",
         "PublicDescription": "Branch Misprediction Cost: Fraction of TMA slots wasted per non-speculative branch misprediction (retired JEClear). Related metrics: tma_branch_mispredicts, tma_info_bottleneck_mispredictions, tma_mispredicts_resteers"
     },
     {
         "BriefDescription": "Instructions per retired mispredicts for indirect CALL or JMP branches (lower number means higher occurrence rate).",
-        "MetricExpr": "tma_info_inst_mix_instructions / (UOPS_RETIRED.RETIRE_SLOTS / UOPS_ISSUED.ANY * cpu@BR_MISP_EXEC.ALL_BRANCHES\\,umask\\=0xE4@)",
+        "MetricExpr": "tma_info_inst_mix_instructions / (UOPS_RETIRED.RETIRE_SLOTS / UOPS_ISSUED.ANY * BR_MISP_EXEC.INDIRECT)",
         "MetricGroup": "Bad;BrMispredicts",
         "MetricName": "tma_info_bad_spec_ipmisp_indirect",
         "MetricThreshold": "tma_info_bad_spec_ipmisp_indirect < 1e3"
     },
     {
         "BriefDescription": "Number of Instructions per non-speculative Branch Misprediction (JEClear) (lower number means higher occurrence rate)",
-        "MetricExpr": "tma_info_core_ipmispredict",
+        "MetricExpr": "INST_RETIRED.ANY / BR_MISP_RETIRED.ALL_BRANCHES",
         "MetricGroup": "Bad;BadSpec;BrMispredicts",
         "MetricName": "tma_info_bad_spec_ipmispredict",
         "MetricThreshold": "tma_info_bad_spec_ipmispredict < 200"
     },
+    {
+        "BriefDescription": "Speculative to Retired ratio of all clears (covering mispredicts and nukes)",
+        "MetricExpr": "INT_MISC.CLEARS_COUNT / (BR_MISP_RETIRED.ALL_BRANCHES + MACHINE_CLEARS.COUNT)",
+        "MetricGroup": "BrMispredicts",
+        "MetricName": "tma_info_bad_spec_spec_clears_ratio"
+    },
+    {
+        "BriefDescription": "Probability of Core Bound bottleneck hidden by SMT-profiling artifacts",
+        "MetricExpr": "(100 * (1 - tma_core_bound / (((EXE_ACTIVITY.EXE_BOUND_0_PORTS + tma_core_bound * RS_EVENTS.EMPTY_CYCLES) / CPU_CLK_UNHALTED.THREAD * (CYCLE_ACTIVITY.STALLS_TOTAL - CYCLE_ACTIVITY.STALLS_MEM_ANY) / CPU_CLK_UNHALTED.THREAD * CPU_CLK_UNHALTED.THREAD + (EXE_ACTIVITY.1_PORTS_UTIL + tma_retiring * EXE_ACTIVITY.2_PORTS_UTIL)) / CPU_CLK_UNHALTED.THREAD if ARITH.DIVIDER_ACTIVE < CYCLE_ACTIVITY.STALLS_TOTAL - CYCLE_ACTIVITY.STALLS_MEM_ANY else (EXE_ACTIVITY.1_PORTS_UTIL + tma_retiring * EXE_ACTIVITY.2_PORTS_UTIL) / CPU_CLK_UNHALTED.THREAD) if tma_core_bound < (((EXE_ACTIVITY.EXE_BOUND_0_PORTS + tma_core_bound * RS_EVENTS.EMPTY_CYCLES) / CPU_CLK_UNHALTED.THREAD * (CYCLE_ACTIVITY.STALLS_TOTAL - CYCLE_ACTIVITY.STALLS_MEM_ANY) / CPU_CLK_UNHALTED.THREAD * CPU_CLK_UNHALTED.THREAD + (EXE_ACTIVITY.1_PORTS_UTIL + tma_retiring * EXE_ACTIVITY.2_PORTS_UTIL)) / CPU_CLK_UNHALTED.THREAD if ARITH.DIVIDER_ACTIVE < CYCLE_ACTIVITY.STALLS_TOTAL - CYCLE_ACTIVITY.STALLS_MEM_ANY else (EXE_ACTIVITY.1_PORTS_UTIL + tma_retiring * EXE_ACTIVITY.2_PORTS_UTIL) / CPU_CLK_UNHALTED.THREAD) else 1) if tma_info_system_smt_2t_utilization > 0.5 else 0)",
+        "MetricGroup": "Cor;SMT;TopdownL1;tma_L1_group",
+        "MetricName": "tma_info_botlnk_core_bound_likely",
+        "MetricgroupNoGroup": "TopdownL1"
+    },
+    {
+        "BriefDescription": "Total pipeline cost of DSB (uop cache) misses - subset of the Instruction_Fetch_BW Bottleneck.",
+        "MetricExpr": "100 * (100 * (tma_fetch_latency * (DSB2MITE_SWITCHES.PENALTY_CYCLES / CPU_CLK_UNHALTED.THREAD) / ((ICACHE_16B.IFDATA_STALL + 2 * cpu@ICACHE_16B.IFDATA_STALL\\,cmask\\=0x1\\,edge\\=0x1@) / CPU_CLK_UNHALTED.THREAD + ICACHE_TAG.STALLS / CPU_CLK_UNHALTED.THREAD + (INT_MISC.CLEAR_RESTEER_CYCLES / CPU_CLK_UNHALTED.THREAD + 9 * BACLEARS.ANY / CPU_CLK_UNHALTED.THREAD) + min(2 * IDQ.MS_SWITCHES / CPU_CLK_UNHALTED.THREAD, 1) + DECODE.LCP / CPU_CLK_UNHALTED.THREAD + DSB2MITE_SWITCHES.PENALTY_CYCLES / CPU_CLK_UNHALTED.THREAD) + tma_fetch_bandwidth * tma_mite / (tma_mite + tma_dsb)))",
+        "MetricGroup": "DSBmiss;Fed;TopdownL1;tma_L1_group",
+        "MetricName": "tma_info_botlnk_dsb_misses",
+        "MetricgroupNoGroup": "TopdownL1"
+    },
+    {
+        "BriefDescription": "Total pipeline cost of Instruction Cache misses - subset of the Big_Code Bottleneck.",
+        "MetricExpr": "100 * (100 * (tma_fetch_latency * ((ICACHE_16B.IFDATA_STALL + 2 * cpu@ICACHE_16B.IFDATA_STALL\\,cmask\\=0x1\\,edge\\=0x1@) / CPU_CLK_UNHALTED.THREAD) / ((ICACHE_16B.IFDATA_STALL + 2 * cpu@ICACHE_16B.IFDATA_STALL\\,cmask\\=0x1\\,edge\\=0x1@) / CPU_CLK_UNHALTED.THREAD + ICACHE_TAG.STALLS / CPU_CLK_UNHALTED.THREAD + (INT_MISC.CLEAR_RESTEER_CYCLES / CPU_CLK_UNHALTED.THREAD + 9 * BACLEARS.ANY / CPU_CLK_UNHALTED.THREAD) + min(2 * IDQ.MS_SWITCHES / CPU_CLK_UNHALTED.THREAD, 1) + DECODE.LCP / CPU_CLK_UNHALTED.THREAD + DSB2MITE_SWITCHES.PENALTY_CYCLES / CPU_CLK_UNHALTED.THREAD)))",
+        "MetricGroup": "Fed;FetchLat;IcMiss;TopdownL1;tma_L1_group",
+        "MetricName": "tma_info_botlnk_ic_misses",
+        "MetricgroupNoGroup": "TopdownL1"
+    },
     {
         "BriefDescription": "Probability of Core Bound bottleneck hidden by SMT-profiling artifacts",
         "MetricConstraint": "NO_GROUP_EVENTS",
         "MetricThreshold": "tma_info_botlnk_l2_ic_misses > 5",
         "PublicDescription": "Total pipeline cost of Instruction Cache misses - subset of the Big_Code Bottleneck. Related metrics: "
     },
+    {
+        "BriefDescription": "Total pipeline cost of \"useful operations\" - the baseline operations not covered by Branching_Overhead nor Irregular_Overhead.",
+        "MetricExpr": "100 * (tma_retiring - (BR_INST_RETIRED.ALL_BRANCHES + BR_INST_RETIRED.NEAR_CALL) / tma_info_thread_slots - tma_microcode_sequencer / (tma_few_uops_instructions + tma_microcode_sequencer) * (tma_assists / tma_microcode_sequencer) * tma_heavy_operations)",
+        "MetricGroup": "Ret",
+        "MetricName": "tma_info_bottleneck_base_non_br",
+        "MetricThreshold": "tma_info_bottleneck_base_non_br > 20"
+    },
     {
         "BriefDescription": "Total pipeline cost of instruction fetch related bottlenecks by large code footprint programs (i-side cache; TLB and BTB misses)",
         "MetricConstraint": "NO_GROUP_EVENTS",
         "MetricExpr": "100 * tma_fetch_latency * (tma_itlb_misses + tma_icache_misses + tma_unknown_branches) / (tma_branch_resteers + tma_dsb_switches + tma_icache_misses + tma_itlb_misses + tma_lcp + tma_ms_switches)",
-        "MetricGroup": "BigFoot;Fed;Frontend;IcMiss;MemoryTLB;tma_issueBC",
+        "MetricGroup": "BigFootprint;Fed;Frontend;IcMiss;MemoryTLB",
         "MetricName": "tma_info_bottleneck_big_code",
-        "MetricThreshold": "tma_info_bottleneck_big_code > 20",
-        "PublicDescription": "Total pipeline cost of instruction fetch related bottlenecks by large code footprint programs (i-side cache; TLB and BTB misses). Related metrics: tma_info_bottleneck_branching_overhead"
+        "MetricThreshold": "tma_info_bottleneck_big_code > 20"
     },
     {
         "BriefDescription": "Total pipeline cost of branch related instructions (used for program control-flow including function calls)",
-        "MetricExpr": "100 * ((BR_INST_RETIRED.CONDITIONAL + 3 * BR_INST_RETIRED.NEAR_CALL + (BR_INST_RETIRED.NEAR_TAKEN - (BR_INST_RETIRED.CONDITIONAL - BR_INST_RETIRED.NOT_TAKEN) - 2 * BR_INST_RETIRED.NEAR_CALL)) / tma_info_thread_slots)",
-        "MetricGroup": "Ret;tma_issueBC",
+        "MetricExpr": "100 * ((BR_INST_RETIRED.ALL_BRANCHES + BR_INST_RETIRED.NEAR_CALL) / tma_info_thread_slots)",
+        "MetricGroup": "Ret",
         "MetricName": "tma_info_bottleneck_branching_overhead",
-        "MetricThreshold": "tma_info_bottleneck_branching_overhead > 10",
-        "PublicDescription": "Total pipeline cost of branch related instructions (used for program control-flow including function calls). Related metrics: tma_info_bottleneck_big_code"
+        "MetricThreshold": "tma_info_bottleneck_branching_overhead > 5"
+    },
+    {
+        "BriefDescription": "Total pipeline cost of external Memory- or Cache-Bandwidth related bottlenecks",
+        "MetricExpr": "100 * (tma_memory_bound * (tma_dram_bound / (tma_dram_bound + tma_l1_bound + tma_l2_bound + tma_l3_bound + tma_pmm_bound + tma_store_bound)) * (tma_mem_bandwidth / (tma_mem_bandwidth + tma_mem_latency)) + tma_memory_bound * (tma_l3_bound / (tma_dram_bound + tma_l1_bound + tma_l2_bound + tma_l3_bound + tma_pmm_bound + tma_store_bound)) * (tma_sq_full / (tma_contested_accesses + tma_data_sharing + tma_l3_hit_latency + tma_sq_full)) + tma_memory_bound * (tma_l1_bound / (tma_dram_bound + tma_l1_bound + tma_l2_bound + tma_l3_bound + tma_pmm_bound + tma_store_bound)) * (tma_fb_full / (tma_4k_aliasing + tma_dtlb_load + tma_fb_full + tma_lock_latency + tma_split_loads + tma_store_fwd_blk)))",
+        "MetricGroup": "Mem;MemoryBW;Offcore;tma_issueBW",
+        "MetricName": "tma_info_bottleneck_cache_memory_bandwidth",
+        "MetricThreshold": "tma_info_bottleneck_cache_memory_bandwidth > 20",
+        "PublicDescription": "Total pipeline cost of external Memory- or Cache-Bandwidth related bottlenecks. Related metrics: tma_fb_full, tma_info_system_dram_bw_use, tma_mem_bandwidth, tma_sq_full"
+    },
+    {
+        "BriefDescription": "Total pipeline cost of external Memory- or Cache-Latency related bottlenecks",
+        "MetricExpr": "100 * (tma_memory_bound * (tma_dram_bound / (tma_dram_bound + tma_l1_bound + tma_l2_bound + tma_l3_bound + tma_pmm_bound + tma_store_bound)) * (tma_mem_latency / (tma_mem_bandwidth + tma_mem_latency)) + tma_memory_bound * (tma_l3_bound / (tma_dram_bound + tma_l1_bound + tma_l2_bound + tma_l3_bound + tma_pmm_bound + tma_store_bound)) * (tma_l3_hit_latency / (tma_contested_accesses + tma_data_sharing + tma_l3_hit_latency + tma_sq_full)) + tma_memory_bound * tma_l2_bound / (tma_dram_bound + tma_l1_bound + tma_l2_bound + tma_l3_bound + tma_pmm_bound + tma_store_bound) + tma_memory_bound * (tma_store_bound / (tma_dram_bound + tma_l1_bound + tma_l2_bound + tma_l3_bound + tma_pmm_bound + tma_store_bound)) * (tma_store_latency / (tma_dtlb_store + tma_false_sharing + tma_split_stores + tma_store_latency)))",
+        "MetricGroup": "Mem;MemoryLat;Offcore;tma_issueLat",
+        "MetricName": "tma_info_bottleneck_cache_memory_latency",
+        "MetricThreshold": "tma_info_bottleneck_cache_memory_latency > 20",
+        "PublicDescription": "Total pipeline cost of external Memory- or Cache-Latency related bottlenecks. Related metrics: tma_l3_hit_latency, tma_mem_latency"
+    },
+    {
+        "BriefDescription": "Total pipeline cost when the execution is compute-bound - an estimation",
+        "MetricExpr": "100 * (tma_core_bound * tma_divider / (tma_divider + tma_ports_utilization + tma_serializing_operation) + tma_core_bound * (tma_ports_utilization / (tma_divider + tma_ports_utilization + tma_serializing_operation)) * (tma_ports_utilized_3m / (tma_ports_utilized_0 + tma_ports_utilized_1 + tma_ports_utilized_2 + tma_ports_utilized_3m)))",
+        "MetricGroup": "Cor;tma_issueComp",
+        "MetricName": "tma_info_bottleneck_compute_bound_est",
+        "MetricThreshold": "tma_info_bottleneck_compute_bound_est > 20",
+        "PublicDescription": "Total pipeline cost when the execution is compute-bound - an estimation. Covers Core Bound when High ILP as well as when long-latency execution units are busy. Related metrics: "
     },
     {
         "BriefDescription": "Total pipeline cost of instruction fetch bandwidth related bottlenecks",
         "MetricConstraint": "NO_GROUP_EVENTS",
-        "MetricExpr": "100 * (tma_frontend_bound - tma_fetch_latency * tma_mispredicts_resteers / (tma_branch_resteers + tma_dsb_switches + tma_icache_misses + tma_itlb_misses + tma_lcp + tma_ms_switches)) - tma_info_bottleneck_big_code",
+        "MetricExpr": "100 * (tma_frontend_bound - (1 - 10 * tma_microcode_sequencer * tma_other_mispredicts / tma_branch_mispredicts) * tma_fetch_latency * tma_mispredicts_resteers / (tma_branch_resteers + tma_dsb_switches + tma_icache_misses + tma_itlb_misses + tma_lcp + tma_ms_switches) - tma_microcode_sequencer / (tma_few_uops_instructions + tma_microcode_sequencer) * (tma_assists / tma_microcode_sequencer) * tma_fetch_latency * (tma_ms_switches + tma_branch_resteers * (tma_clears_resteers + tma_mispredicts_resteers * (10 * tma_microcode_sequencer * tma_other_mispredicts / tma_branch_mispredicts)) / (tma_clears_resteers + tma_mispredicts_resteers + tma_unknown_branches)) / (tma_branch_resteers + tma_dsb_switches + tma_icache_misses + tma_itlb_misses + tma_lcp + tma_ms_switches)) - tma_info_bottleneck_big_code",
         "MetricGroup": "Fed;FetchBW;Frontend",
         "MetricName": "tma_info_bottleneck_instruction_fetch_bw",
         "MetricThreshold": "tma_info_bottleneck_instruction_fetch_bw > 20"
     },
     {
-        "BriefDescription": "Total pipeline cost of (external) Memory Bandwidth related bottlenecks",
-        "MetricConstraint": "NO_GROUP_EVENTS",
-        "MetricExpr": "100 * tma_memory_bound * (tma_dram_bound / (tma_dram_bound + tma_l1_bound + tma_l2_bound + tma_l3_bound + tma_pmm_bound + tma_store_bound) * (tma_mem_bandwidth / (tma_mem_bandwidth + tma_mem_latency)) + tma_l3_bound / (tma_dram_bound + tma_l1_bound + tma_l2_bound + tma_l3_bound + tma_pmm_bound + tma_store_bound) * (tma_sq_full / (tma_contested_accesses + tma_data_sharing + tma_l3_hit_latency + tma_sq_full))) + tma_l1_bound / (tma_dram_bound + tma_l1_bound + tma_l2_bound + tma_l3_bound + tma_pmm_bound + tma_store_bound) * (tma_fb_full / (tma_4k_aliasing + tma_dtlb_load + tma_fb_full + tma_lock_latency + tma_split_loads + tma_store_fwd_blk))",
-        "MetricGroup": "Mem;MemoryBW;Offcore;tma_issueBW",
-        "MetricName": "tma_info_bottleneck_memory_bandwidth",
-        "MetricThreshold": "tma_info_bottleneck_memory_bandwidth > 20",
-        "PublicDescription": "Total pipeline cost of (external) Memory Bandwidth related bottlenecks. Related metrics: tma_fb_full, tma_info_system_dram_bw_use, tma_mem_bandwidth, tma_sq_full"
+        "BriefDescription": "Total pipeline cost of irregular execution (e.g",
+        "MetricExpr": "100 * (tma_microcode_sequencer / (tma_few_uops_instructions + tma_microcode_sequencer) * (tma_assists / tma_microcode_sequencer) * tma_fetch_latency * (tma_ms_switches + tma_branch_resteers * (tma_clears_resteers + tma_mispredicts_resteers * (10 * tma_microcode_sequencer * tma_other_mispredicts / tma_branch_mispredicts)) / (tma_clears_resteers + tma_mispredicts_resteers + tma_unknown_branches)) / (tma_branch_resteers + tma_dsb_switches + tma_icache_misses + tma_itlb_misses + tma_lcp + tma_ms_switches) + 10 * tma_microcode_sequencer * tma_other_mispredicts / tma_branch_mispredicts * tma_branch_mispredicts + tma_machine_clears * tma_other_nukes / tma_other_nukes + tma_core_bound * (tma_serializing_operation + tma_core_bound * RS_EVENTS.EMPTY_CYCLES / tma_info_thread_clks * tma_ports_utilized_0) / (tma_divider + tma_ports_utilization + tma_serializing_operation) + tma_microcode_sequencer / (tma_few_uops_instructions + tma_microcode_sequencer) * (tma_assists / tma_microcode_sequencer) * tma_heavy_operations)",
+        "MetricGroup": "Bad;Cor;Ret;tma_issueMS",
+        "MetricName": "tma_info_bottleneck_irregular_overhead",
+        "MetricThreshold": "tma_info_bottleneck_irregular_overhead > 10",
+        "PublicDescription": "Total pipeline cost of irregular execution (e.g. FP-assists in HPC, Wait time with work imbalance multithreaded workloads, overhead in system services or virtualized environments). Related metrics: tma_microcode_sequencer, tma_ms_switches"
     },
     {
         "BriefDescription": "Total pipeline cost of Memory Address Translation related bottlenecks (data-side TLBs)",
         "MetricConstraint": "NO_GROUP_EVENTS",
-        "MetricExpr": "100 * tma_memory_bound * (tma_l1_bound / max(tma_memory_bound, tma_dram_bound + tma_l1_bound + tma_l2_bound + tma_l3_bound + tma_pmm_bound + tma_store_bound) * (tma_dtlb_load / max(tma_l1_bound, tma_4k_aliasing + tma_dtlb_load + tma_fb_full + tma_lock_latency + tma_split_loads + tma_store_fwd_blk)) + tma_store_bound / (tma_dram_bound + tma_l1_bound + tma_l2_bound + tma_l3_bound + tma_pmm_bound + tma_store_bound) * (tma_dtlb_store / (tma_dtlb_store + tma_false_sharing + tma_split_stores + tma_store_latency)))",
+        "MetricExpr": "100 * (tma_memory_bound * (tma_l1_bound / max(tma_memory_bound, tma_dram_bound + tma_l1_bound + tma_l2_bound + tma_l3_bound + tma_pmm_bound + tma_store_bound)) * (tma_dtlb_load / max(tma_l1_bound, tma_4k_aliasing + tma_dtlb_load + tma_fb_full + tma_lock_latency + tma_split_loads + tma_store_fwd_blk)) + tma_memory_bound * (tma_store_bound / (tma_dram_bound + tma_l1_bound + tma_l2_bound + tma_l3_bound + tma_pmm_bound + tma_store_bound)) * (tma_dtlb_store / (tma_dtlb_store + tma_false_sharing + tma_split_stores + tma_store_latency)))",
         "MetricGroup": "Mem;MemoryTLB;Offcore;tma_issueTLB",
         "MetricName": "tma_info_bottleneck_memory_data_tlbs",
         "MetricThreshold": "tma_info_bottleneck_memory_data_tlbs > 20",
-        "PublicDescription": "Total pipeline cost of Memory Address Translation related bottlenecks (data-side TLBs). Related metrics: tma_dtlb_load, tma_dtlb_store"
+        "PublicDescription": "Total pipeline cost of Memory Address Translation related bottlenecks (data-side TLBs). Related metrics: tma_dtlb_load, tma_dtlb_store, tma_info_bottleneck_memory_synchronization"
     },
     {
-        "BriefDescription": "Total pipeline cost of Memory Latency related bottlenecks (external memory and off-core caches)",
-        "MetricConstraint": "NO_GROUP_EVENTS",
-        "MetricExpr": "100 * tma_memory_bound * (tma_dram_bound / (tma_dram_bound + tma_l1_bound + tma_l2_bound + tma_l3_bound + tma_pmm_bound + tma_store_bound) * (tma_mem_latency / (tma_mem_bandwidth + tma_mem_latency)) + tma_l3_bound / (tma_dram_bound + tma_l1_bound + tma_l2_bound + tma_l3_bound + tma_pmm_bound + tma_store_bound) * (tma_l3_hit_latency / (tma_contested_accesses + tma_data_sharing + tma_l3_hit_latency + tma_sq_full)) + tma_l2_bound / (tma_dram_bound + tma_l1_bound + tma_l2_bound + tma_l3_bound + tma_pmm_bound + tma_store_bound))",
-        "MetricGroup": "Mem;MemoryLat;Offcore;tma_issueLat",
-        "MetricName": "tma_info_bottleneck_memory_latency",
-        "MetricThreshold": "tma_info_bottleneck_memory_latency > 20",
-        "PublicDescription": "Total pipeline cost of Memory Latency related bottlenecks (external memory and off-core caches). Related metrics: tma_l3_hit_latency, tma_mem_latency"
+        "BriefDescription": "Total pipeline cost of Memory Synchronization related bottlenecks (data transfers and coherency updates across processors)",
+        "MetricExpr": "100 * (tma_memory_bound * (tma_dram_bound / (tma_dram_bound + tma_l1_bound + tma_l2_bound + tma_l3_bound + tma_pmm_bound + tma_store_bound) * (tma_mem_latency / (tma_mem_bandwidth + tma_mem_latency)) * tma_remote_cache / (tma_local_mem + tma_remote_cache + tma_remote_mem) + tma_l3_bound / (tma_dram_bound + tma_l1_bound + tma_l2_bound + tma_l3_bound + tma_pmm_bound + tma_store_bound) * (tma_contested_accesses + tma_data_sharing) / (tma_contested_accesses + tma_data_sharing + tma_l3_hit_latency + tma_sq_full) + tma_store_bound / (tma_dram_bound + tma_l1_bound + tma_l2_bound + tma_l3_bound + tma_pmm_bound + tma_store_bound) * tma_false_sharing / (tma_dtlb_store + tma_false_sharing + tma_split_stores + tma_store_latency - tma_store_latency)) + tma_machine_clears * (1 - tma_other_nukes / tma_other_nukes))",
+        "MetricGroup": "Mem;Offcore;tma_issueTLB",
+        "MetricName": "tma_info_bottleneck_memory_synchronization",
+        "MetricThreshold": "tma_info_bottleneck_memory_synchronization > 10",
+        "PublicDescription": "Total pipeline cost of Memory Synchronization related bottlenecks (data transfers and coherency updates across processors). Related metrics: tma_dtlb_load, tma_dtlb_store, tma_info_bottleneck_memory_data_tlbs"
     },
     {
         "BriefDescription": "Total pipeline cost of Branch Misprediction related bottlenecks",
         "MetricConstraint": "NO_GROUP_EVENTS",
-        "MetricExpr": "100 * (tma_branch_mispredicts + tma_fetch_latency * tma_mispredicts_resteers / (tma_branch_resteers + tma_dsb_switches + tma_icache_misses + tma_itlb_misses + tma_lcp + tma_ms_switches))",
+        "MetricExpr": "100 * (1 - 10 * tma_microcode_sequencer * tma_other_mispredicts / tma_branch_mispredicts) * (tma_branch_mispredicts + tma_fetch_latency * tma_mispredicts_resteers / (tma_branch_resteers + tma_dsb_switches + tma_icache_misses + tma_itlb_misses + tma_lcp + tma_ms_switches))",
         "MetricGroup": "Bad;BadSpec;BrMispredicts;tma_issueBM",
         "MetricName": "tma_info_bottleneck_mispredictions",
         "MetricThreshold": "tma_info_bottleneck_mispredictions > 20",
         "PublicDescription": "Total pipeline cost of Branch Misprediction related bottlenecks. Related metrics: tma_branch_mispredicts, tma_info_bad_spec_branch_misprediction_cost, tma_mispredicts_resteers"
     },
+    {
+        "BriefDescription": "Total pipeline cost of remaining bottlenecks (apart from those listed in the Info.Bottlenecks metrics class)",
+        "MetricExpr": "100 - (tma_info_bottleneck_big_code + tma_info_bottleneck_instruction_fetch_bw + tma_info_bottleneck_mispredictions + tma_info_bottleneck_cache_memory_bandwidth + tma_info_bottleneck_cache_memory_latency + tma_info_bottleneck_memory_data_tlbs + tma_info_bottleneck_memory_synchronization + tma_info_bottleneck_compute_bound_est + tma_info_bottleneck_irregular_overhead + tma_info_bottleneck_branching_overhead + tma_info_bottleneck_base_non_br)",
+        "MetricGroup": "Cor;Offcore",
+        "MetricName": "tma_info_bottleneck_other_bottlenecks",
+        "MetricThreshold": "tma_info_bottleneck_other_bottlenecks > 20",
+        "PublicDescription": "Total pipeline cost of remaining bottlenecks (apart from those listed in the Info.Bottlenecks metrics class). Examples include data-dependencies (Core Bound when Low ILP) and other unlisted memory-related stalls."
+    },
     {
         "BriefDescription": "Fraction of branches that are CALL or RET",
         "MetricExpr": "(BR_INST_RETIRED.NEAR_CALL + BR_INST_RETIRED.NEAR_RETURN) / BR_INST_RETIRED.ALL_BRANCHES",
     {
         "BriefDescription": "Fraction of branches that are unconditional (direct or indirect) jumps",
         "MetricConstraint": "NO_GROUP_EVENTS",
-        "MetricExpr": "(BR_INST_RETIRED.NEAR_TAKEN - (BR_INST_RETIRED.CONDITIONAL - BR_INST_RETIRED.NOT_TAKEN) - 2 * BR_INST_RETIRED.NEAR_CALL) / BR_INST_RETIRED.ALL_BRANCHES",
+        "MetricExpr": "(BR_INST_RETIRED.NEAR_TAKEN - (BR_INST_RETIRED.COND - BR_INST_RETIRED.NOT_TAKEN) - 2 * BR_INST_RETIRED.NEAR_CALL) / BR_INST_RETIRED.ALL_BRANCHES",
         "MetricGroup": "Bad;Branches",
         "MetricName": "tma_info_branches_jump"
     },
         "MetricGroup": "Ret;SMT;TmaL1;tma_L1_group",
         "MetricName": "tma_info_core_coreipc"
     },
+    {
+        "BriefDescription": "uops Executed per Cycle",
+        "MetricExpr": "UOPS_EXECUTED.THREAD / tma_info_thread_clks",
+        "MetricGroup": "Power",
+        "MetricName": "tma_info_core_epc"
+    },
     {
         "BriefDescription": "Floating Point Operations Per Cycle",
         "MetricConstraint": "NO_GROUP_EVENTS",
-        "MetricExpr": "(FP_ARITH_INST_RETIRED.SCALAR_SINGLE + FP_ARITH_INST_RETIRED.SCALAR_DOUBLE + 2 * FP_ARITH_INST_RETIRED.128B_PACKED_DOUBLE + 4 * (FP_ARITH_INST_RETIRED.128B_PACKED_SINGLE + FP_ARITH_INST_RETIRED.256B_PACKED_DOUBLE) + 8 * (FP_ARITH_INST_RETIRED.256B_PACKED_SINGLE + FP_ARITH_INST_RETIRED.512B_PACKED_DOUBLE) + 16 * FP_ARITH_INST_RETIRED.512B_PACKED_SINGLE) / tma_info_core_core_clks",
+        "MetricExpr": "(FP_ARITH_INST_RETIRED.SCALAR + 2 * FP_ARITH_INST_RETIRED.128B_PACKED_DOUBLE + 4 * FP_ARITH_INST_RETIRED.4_FLOPS + 8 * FP_ARITH_INST_RETIRED.8_FLOPS + 16 * FP_ARITH_INST_RETIRED.512B_PACKED_SINGLE) / tma_info_core_core_clks",
         "MetricGroup": "Flops;Ret",
         "MetricName": "tma_info_core_flopc"
     },
         "PublicDescription": "Actual per-core usage of the Floating Point non-X87 execution units (regardless of precision or vector-width). Values > 1 are possible due to ([BDW+] Fused-Multiply Add (FMA) counting - common; [ADL+] use all of ADD/MUL/FMA in Scalar or 128/256-bit vectors - less common)."
     },
     {
-        "BriefDescription": "Instruction-Level-Parallelism (average number of uops executed when there is execution) per-core",
-        "MetricExpr": "UOPS_EXECUTED.THREAD / (UOPS_EXECUTED.CORE_CYCLES_GE_1 / 2 if #SMT_on else UOPS_EXECUTED.CORE_CYCLES_GE_1)",
+        "BriefDescription": "Instruction-Level-Parallelism (average number of uops executed when there is execution) per thread (logical-processor)",
+        "MetricExpr": "UOPS_EXECUTED.THREAD / cpu@UOPS_EXECUTED.THREAD\\,cmask\\=1@",
         "MetricGroup": "Backend;Cor;Pipeline;PortsUtil",
         "MetricName": "tma_info_core_ilp"
     },
-    {
-        "BriefDescription": "Number of Instructions per non-speculative Branch Misprediction (JEClear)",
-        "MetricExpr": "INST_RETIRED.ANY / BR_MISP_RETIRED.ALL_BRANCHES",
-        "MetricGroup": "Bad;BadSpec;BrMispredicts;TopdownL1;tma_L1_group",
-        "MetricName": "tma_info_core_ipmispredict",
-        "MetricgroupNoGroup": "TopdownL1"
-    },
     {
         "BriefDescription": "Fraction of Uops delivered by the DSB (aka Decoded ICache; or Uop Cache)",
         "MetricExpr": "IDQ.DSB_UOPS / (IDQ.DSB_UOPS + IDQ.MITE_UOPS + IDQ.MS_UOPS)",
         "MetricGroup": "Flops;InsType",
         "MetricName": "tma_info_inst_mix_iparith",
         "MetricThreshold": "tma_info_inst_mix_iparith < 10",
-        "PublicDescription": "Instructions per FP Arithmetic instruction (lower number means higher occurrence rate). May undercount due to FMA double counting. Approximated prior to BDW."
+        "PublicDescription": "Instructions per FP Arithmetic instruction (lower number means higher occurrence rate). Values < 1 are possible due to intentional FMA double counting. Approximated prior to BDW."
     },
     {
         "BriefDescription": "Instructions per FP Arithmetic AVX/SSE 128-bit instruction (lower number means higher occurrence rate)",
         "MetricGroup": "Flops;FpVector;InsType",
         "MetricName": "tma_info_inst_mix_iparith_avx128",
         "MetricThreshold": "tma_info_inst_mix_iparith_avx128 < 10",
-        "PublicDescription": "Instructions per FP Arithmetic AVX/SSE 128-bit instruction (lower number means higher occurrence rate). May undercount due to FMA double counting."
+        "PublicDescription": "Instructions per FP Arithmetic AVX/SSE 128-bit instruction (lower number means higher occurrence rate). Values < 1 are possible due to intentional FMA double counting."
     },
     {
         "BriefDescription": "Instructions per FP Arithmetic AVX* 256-bit instruction (lower number means higher occurrence rate)",
         "MetricGroup": "Flops;FpVector;InsType",
         "MetricName": "tma_info_inst_mix_iparith_avx256",
         "MetricThreshold": "tma_info_inst_mix_iparith_avx256 < 10",
-        "PublicDescription": "Instructions per FP Arithmetic AVX* 256-bit instruction (lower number means higher occurrence rate). May undercount due to FMA double counting."
+        "PublicDescription": "Instructions per FP Arithmetic AVX* 256-bit instruction (lower number means higher occurrence rate). Values < 1 are possible due to intentional FMA double counting."
     },
     {
         "BriefDescription": "Instructions per FP Arithmetic AVX 512-bit instruction (lower number means higher occurrence rate)",
         "MetricGroup": "Flops;FpVector;InsType",
         "MetricName": "tma_info_inst_mix_iparith_avx512",
         "MetricThreshold": "tma_info_inst_mix_iparith_avx512 < 10",
-        "PublicDescription": "Instructions per FP Arithmetic AVX 512-bit instruction (lower number means higher occurrence rate). May undercount due to FMA double counting."
+        "PublicDescription": "Instructions per FP Arithmetic AVX 512-bit instruction (lower number means higher occurrence rate). Values < 1 are possible due to intentional FMA double counting."
     },
     {
         "BriefDescription": "Instructions per FP Arithmetic Scalar Double-Precision instruction (lower number means higher occurrence rate)",
         "MetricGroup": "Flops;FpScalar;InsType",
         "MetricName": "tma_info_inst_mix_iparith_scalar_dp",
         "MetricThreshold": "tma_info_inst_mix_iparith_scalar_dp < 10",
-        "PublicDescription": "Instructions per FP Arithmetic Scalar Double-Precision instruction (lower number means higher occurrence rate). May undercount due to FMA double counting."
+        "PublicDescription": "Instructions per FP Arithmetic Scalar Double-Precision instruction (lower number means higher occurrence rate). Values < 1 are possible due to intentional FMA double counting."
     },
     {
         "BriefDescription": "Instructions per FP Arithmetic Scalar Single-Precision instruction (lower number means higher occurrence rate)",
         "MetricGroup": "Flops;FpScalar;InsType",
         "MetricName": "tma_info_inst_mix_iparith_scalar_sp",
         "MetricThreshold": "tma_info_inst_mix_iparith_scalar_sp < 10",
-        "PublicDescription": "Instructions per FP Arithmetic Scalar Single-Precision instruction (lower number means higher occurrence rate). May undercount due to FMA double counting."
+        "PublicDescription": "Instructions per FP Arithmetic Scalar Single-Precision instruction (lower number means higher occurrence rate). Values < 1 are possible due to intentional FMA double counting."
     },
     {
         "BriefDescription": "Instructions per Branch (lower number means higher occurrence rate)",
     {
         "BriefDescription": "Instructions per Floating Point (FP) Operation (lower number means higher occurrence rate)",
         "MetricConstraint": "NO_GROUP_EVENTS",
-        "MetricExpr": "INST_RETIRED.ANY / (FP_ARITH_INST_RETIRED.SCALAR_SINGLE + FP_ARITH_INST_RETIRED.SCALAR_DOUBLE + 2 * FP_ARITH_INST_RETIRED.128B_PACKED_DOUBLE + 4 * (FP_ARITH_INST_RETIRED.128B_PACKED_SINGLE + FP_ARITH_INST_RETIRED.256B_PACKED_DOUBLE) + 8 * (FP_ARITH_INST_RETIRED.256B_PACKED_SINGLE + FP_ARITH_INST_RETIRED.512B_PACKED_DOUBLE) + 16 * FP_ARITH_INST_RETIRED.512B_PACKED_SINGLE)",
+        "MetricExpr": "INST_RETIRED.ANY / (FP_ARITH_INST_RETIRED.SCALAR + 2 * FP_ARITH_INST_RETIRED.128B_PACKED_DOUBLE + 4 * FP_ARITH_INST_RETIRED.4_FLOPS + 8 * FP_ARITH_INST_RETIRED.8_FLOPS + 16 * FP_ARITH_INST_RETIRED.512B_PACKED_SINGLE)",
         "MetricGroup": "Flops;InsType",
         "MetricName": "tma_info_inst_mix_ipflop",
         "MetricThreshold": "tma_info_inst_mix_ipflop < 10"
         "MetricName": "tma_info_inst_mix_ipload",
         "MetricThreshold": "tma_info_inst_mix_ipload < 3"
     },
+    {
+        "BriefDescription": "Instructions per PAUSE (lower number means higher occurrence rate)",
+        "MetricExpr": "tma_info_inst_mix_instructions / ROB_MISC_EVENTS.PAUSE_INST",
+        "MetricGroup": "Flops;FpVector;InsType",
+        "MetricName": "tma_info_inst_mix_ippause"
+    },
     {
         "BriefDescription": "Instructions per Store (lower number means higher occurrence rate)",
         "MetricExpr": "INST_RETIRED.ANY / MEM_INST_RETIRED.ALL_STORES",
         "MetricThreshold": "tma_info_inst_mix_iptb < 9",
         "PublicDescription": "Instruction per taken branch. Related metrics: tma_dsb_switches, tma_fetch_bandwidth, tma_info_botlnk_l2_dsb_misses, tma_info_frontend_dsb_coverage, tma_lcp"
     },
+    {
+        "BriefDescription": "STLB (2nd level TLB) code speculative misses per kilo instruction (misses of any page-size that complete the page walk)",
+        "MetricExpr": "tma_info_memory_tlb_code_stlb_mpki",
+        "MetricGroup": "Fed;MemoryTLB;TopdownL1;tma_L1_group",
+        "MetricName": "tma_info_memory_code_stlb_mpki",
+        "MetricgroupNoGroup": "TopdownL1"
+    },
     {
         "BriefDescription": "Average per-core data fill bandwidth to the L1 data cache [GB / sec]",
-        "MetricExpr": "64 * L1D.REPLACEMENT / 1e9 / duration_time",
+        "MetricExpr": "tma_info_memory_l1d_cache_fill_bw",
         "MetricGroup": "Mem;MemoryBW",
-        "MetricName": "tma_info_memory_core_l1d_cache_fill_bw"
+        "MetricName": "tma_info_memory_core_l1d_cache_fill_bw_2t"
     },
     {
         "BriefDescription": "Average per-core data fill bandwidth to the L2 cache [GB / sec]",
-        "MetricExpr": "64 * L2_LINES_IN.ALL / 1e9 / duration_time",
+        "MetricExpr": "tma_info_memory_l2_cache_fill_bw",
         "MetricGroup": "Mem;MemoryBW",
-        "MetricName": "tma_info_memory_core_l2_cache_fill_bw"
+        "MetricName": "tma_info_memory_core_l2_cache_fill_bw_2t"
     },
     {
         "BriefDescription": "Rate of non silent evictions from the L2 cache per Kilo instruction",
     },
     {
         "BriefDescription": "Average per-core data access bandwidth to the L3 cache [GB / sec]",
-        "MetricExpr": "64 * OFFCORE_REQUESTS.ALL_REQUESTS / 1e9 / duration_time",
+        "MetricExpr": "tma_info_memory_l3_cache_access_bw",
         "MetricGroup": "Mem;MemoryBW;Offcore",
-        "MetricName": "tma_info_memory_core_l3_cache_access_bw"
+        "MetricName": "tma_info_memory_core_l3_cache_access_bw_2t"
     },
     {
         "BriefDescription": "Average per-core data fill bandwidth to the L3 cache [GB / sec]",
-        "MetricExpr": "64 * LONGEST_LAT_CACHE.MISS / 1e9 / duration_time",
+        "MetricExpr": "tma_info_memory_l3_cache_fill_bw",
         "MetricGroup": "Mem;MemoryBW",
-        "MetricName": "tma_info_memory_core_l3_cache_fill_bw"
+        "MetricName": "tma_info_memory_core_l3_cache_fill_bw_2t"
+    },
+    {
+        "BriefDescription": "Average Parallel L2 cache miss data reads",
+        "MetricExpr": "tma_info_memory_latency_data_l2_mlp",
+        "MetricGroup": "Memory_BW;Offcore;TopdownL1;tma_L1_group",
+        "MetricName": "tma_info_memory_data_l2_mlp",
+        "MetricgroupNoGroup": "TopdownL1"
     },
     {
         "BriefDescription": "Fill Buffer (FB) hits per kilo instructions for retired demand loads (L1D misses that merge into ongoing miss-handling entries)",
         "MetricExpr": "1e3 * MEM_LOAD_RETIRED.FB_HIT / INST_RETIRED.ANY",
-        "MetricGroup": "CacheMisses;Mem",
+        "MetricGroup": "CacheHits;Mem",
         "MetricName": "tma_info_memory_fb_hpki"
     },
+    {
+        "BriefDescription": "",
+        "MetricExpr": "64 * L1D.REPLACEMENT / 1e9 / duration_time",
+        "MetricGroup": "Mem;MemoryBW",
+        "MetricName": "tma_info_memory_l1d_cache_fill_bw"
+    },
+    {
+        "BriefDescription": "Average per-core data fill bandwidth to the L1 data cache [GB / sec]",
+        "MetricExpr": "64 * L1D.REPLACEMENT / 1e9 / (duration_time * 1e3 / 1e3)",
+        "MetricGroup": "Mem;MemoryBW;TopdownL1;tma_L1_group",
+        "MetricName": "tma_info_memory_l1d_cache_fill_bw_2t",
+        "MetricgroupNoGroup": "TopdownL1"
+    },
     {
         "BriefDescription": "L1 cache true misses per kilo instruction for retired demand loads",
         "MetricExpr": "1e3 * MEM_LOAD_RETIRED.L1_MISS / INST_RETIRED.ANY",
-        "MetricGroup": "CacheMisses;Mem",
+        "MetricGroup": "CacheHits;Mem",
         "MetricName": "tma_info_memory_l1mpki"
     },
     {
         "BriefDescription": "L1 cache true misses per kilo instruction for all demand loads (including speculative)",
         "MetricExpr": "1e3 * L2_RQSTS.ALL_DEMAND_DATA_RD / INST_RETIRED.ANY",
-        "MetricGroup": "CacheMisses;Mem",
+        "MetricGroup": "CacheHits;Mem",
         "MetricName": "tma_info_memory_l1mpki_load"
     },
+    {
+        "BriefDescription": "",
+        "MetricExpr": "64 * L2_LINES_IN.ALL / 1e9 / duration_time",
+        "MetricGroup": "Mem;MemoryBW",
+        "MetricName": "tma_info_memory_l2_cache_fill_bw"
+    },
+    {
+        "BriefDescription": "Average per-core data fill bandwidth to the L2 cache [GB / sec]",
+        "MetricExpr": "64 * L2_LINES_IN.ALL / 1e9 / (duration_time * 1e3 / 1e3)",
+        "MetricGroup": "Mem;MemoryBW;TopdownL1;tma_L1_group",
+        "MetricName": "tma_info_memory_l2_cache_fill_bw_2t",
+        "MetricgroupNoGroup": "TopdownL1"
+    },
+    {
+        "BriefDescription": "Rate of non silent evictions from the L2 cache per Kilo instruction",
+        "MetricExpr": "1e3 * L2_LINES_OUT.NON_SILENT / INST_RETIRED.ANY",
+        "MetricGroup": "L2Evicts;Mem;Server;TopdownL1;tma_L1_group",
+        "MetricName": "tma_info_memory_l2_evictions_nonsilent_pki",
+        "MetricgroupNoGroup": "TopdownL1"
+    },
+    {
+        "BriefDescription": "Rate of silent evictions from the L2 cache per Kilo instruction where the evicted lines are dropped (no writeback to L3 or memory)",
+        "MetricExpr": "1e3 * L2_LINES_OUT.SILENT / INST_RETIRED.ANY",
+        "MetricGroup": "L2Evicts;Mem;Server;TopdownL1;tma_L1_group",
+        "MetricName": "tma_info_memory_l2_evictions_silent_pki",
+        "MetricgroupNoGroup": "TopdownL1"
+    },
     {
         "BriefDescription": "L2 cache hits per kilo instruction for all request types (including speculative)",
         "MetricExpr": "1e3 * (L2_RQSTS.REFERENCES - L2_RQSTS.MISS) / INST_RETIRED.ANY",
-        "MetricGroup": "CacheMisses;Mem",
+        "MetricGroup": "CacheHits;Mem",
         "MetricName": "tma_info_memory_l2hpki_all"
     },
     {
         "BriefDescription": "L2 cache hits per kilo instruction for all demand loads  (including speculative)",
         "MetricExpr": "1e3 * L2_RQSTS.DEMAND_DATA_RD_HIT / INST_RETIRED.ANY",
-        "MetricGroup": "CacheMisses;Mem",
+        "MetricGroup": "CacheHits;Mem",
         "MetricName": "tma_info_memory_l2hpki_load"
     },
     {
         "BriefDescription": "L2 cache true misses per kilo instruction for retired demand loads",
         "MetricExpr": "1e3 * MEM_LOAD_RETIRED.L2_MISS / INST_RETIRED.ANY",
-        "MetricGroup": "Backend;CacheMisses;Mem",
+        "MetricGroup": "Backend;CacheHits;Mem",
         "MetricName": "tma_info_memory_l2mpki"
     },
     {
         "BriefDescription": "L2 cache ([RKL+] true) misses per kilo instruction for all request types (including speculative)",
         "MetricExpr": "1e3 * L2_RQSTS.MISS / INST_RETIRED.ANY",
-        "MetricGroup": "CacheMisses;Mem;Offcore",
+        "MetricGroup": "CacheHits;Mem;Offcore",
         "MetricName": "tma_info_memory_l2mpki_all"
     },
     {
         "BriefDescription": "L2 cache ([RKL+] true) misses per kilo instruction for all demand loads  (including speculative)",
         "MetricExpr": "1e3 * L2_RQSTS.DEMAND_DATA_RD_MISS / INST_RETIRED.ANY",
-        "MetricGroup": "CacheMisses;Mem",
+        "MetricGroup": "CacheHits;Mem",
         "MetricName": "tma_info_memory_l2mpki_load"
     },
+    {
+        "BriefDescription": "",
+        "MetricExpr": "64 * OFFCORE_REQUESTS.ALL_REQUESTS / 1e9 / duration_time",
+        "MetricGroup": "Mem;MemoryBW;Offcore",
+        "MetricName": "tma_info_memory_l3_cache_access_bw"
+    },
+    {
+        "BriefDescription": "Average per-core data access bandwidth to the L3 cache [GB / sec]",
+        "MetricExpr": "64 * OFFCORE_REQUESTS.ALL_REQUESTS / 1e9 / (duration_time * 1e3 / 1e3)",
+        "MetricGroup": "Mem;MemoryBW;Offcore;TopdownL1;tma_L1_group",
+        "MetricName": "tma_info_memory_l3_cache_access_bw_2t",
+        "MetricgroupNoGroup": "TopdownL1"
+    },
+    {
+        "BriefDescription": "",
+        "MetricExpr": "64 * LONGEST_LAT_CACHE.MISS / 1e9 / duration_time",
+        "MetricGroup": "Mem;MemoryBW",
+        "MetricName": "tma_info_memory_l3_cache_fill_bw"
+    },
+    {
+        "BriefDescription": "Average per-core data fill bandwidth to the L3 cache [GB / sec]",
+        "MetricExpr": "64 * LONGEST_LAT_CACHE.MISS / 1e9 / (duration_time * 1e3 / 1e3)",
+        "MetricGroup": "Mem;MemoryBW;TopdownL1;tma_L1_group",
+        "MetricName": "tma_info_memory_l3_cache_fill_bw_2t",
+        "MetricgroupNoGroup": "TopdownL1"
+    },
     {
         "BriefDescription": "L3 cache true misses per kilo instruction for retired demand loads",
         "MetricExpr": "1e3 * MEM_LOAD_RETIRED.L3_MISS / INST_RETIRED.ANY",
-        "MetricGroup": "CacheMisses;Mem",
+        "MetricGroup": "Mem",
         "MetricName": "tma_info_memory_l3mpki"
     },
     {
-        "BriefDescription": "Actual Average Latency for L1 data-cache miss demand load operations (in core cycles)",
-        "MetricExpr": "L1D_PEND_MISS.PENDING / (MEM_LOAD_RETIRED.L1_MISS + MEM_LOAD_RETIRED.FB_HIT)",
-        "MetricGroup": "Mem;MemoryBound;MemoryLat",
-        "MetricName": "tma_info_memory_load_miss_real_latency"
+        "BriefDescription": "Average Parallel L2 cache miss data reads",
+        "MetricExpr": "OFFCORE_REQUESTS_OUTSTANDING.ALL_DATA_RD / OFFCORE_REQUESTS_OUTSTANDING.CYCLES_WITH_DATA_RD",
+        "MetricGroup": "Memory_BW;Offcore",
+        "MetricName": "tma_info_memory_latency_data_l2_mlp"
     },
     {
-        "BriefDescription": "Memory-Level-Parallelism (average number of L1 miss demand load when there is at least one such miss",
-        "MetricExpr": "L1D_PEND_MISS.PENDING / L1D_PEND_MISS.PENDING_CYCLES",
-        "MetricGroup": "Mem;MemoryBW;MemoryBound",
-        "MetricName": "tma_info_memory_mlp",
-        "PublicDescription": "Memory-Level-Parallelism (average number of L1 miss demand load when there is at least one such miss. Per-Logical Processor)"
+        "BriefDescription": "Average Latency for L2 cache miss demand Loads",
+        "MetricExpr": "tma_info_memory_load_l2_miss_latency",
+        "MetricGroup": "Memory_Lat;Offcore",
+        "MetricName": "tma_info_memory_latency_load_l2_miss_latency"
     },
     {
-        "BriefDescription": "Average Parallel L2 cache miss data reads",
-        "MetricExpr": "OFFCORE_REQUESTS_OUTSTANDING.ALL_DATA_RD / OFFCORE_REQUESTS_OUTSTANDING.CYCLES_WITH_DATA_RD",
+        "BriefDescription": "Average Parallel L2 cache miss demand Loads",
+        "MetricExpr": "tma_info_memory_load_l2_mlp",
         "MetricGroup": "Memory_BW;Offcore",
-        "MetricName": "tma_info_memory_oro_data_l2_mlp"
+        "MetricName": "tma_info_memory_latency_load_l2_mlp"
     },
     {
         "BriefDescription": "Average Latency for L2 cache miss demand Loads",
         "MetricExpr": "OFFCORE_REQUESTS_OUTSTANDING.DEMAND_DATA_RD / OFFCORE_REQUESTS.DEMAND_DATA_RD",
-        "MetricGroup": "Memory_Lat;Offcore",
-        "MetricName": "tma_info_memory_oro_load_l2_miss_latency"
+        "MetricGroup": "Memory_Lat;Offcore;TopdownL1;tma_L1_group",
+        "MetricName": "tma_info_memory_load_l2_miss_latency",
+        "MetricgroupNoGroup": "TopdownL1"
     },
     {
         "BriefDescription": "Average Parallel L2 cache miss demand Loads",
         "MetricExpr": "OFFCORE_REQUESTS_OUTSTANDING.DEMAND_DATA_RD / OFFCORE_REQUESTS_OUTSTANDING.CYCLES_WITH_DEMAND_DATA_RD",
-        "MetricGroup": "Memory_BW;Offcore",
-        "MetricName": "tma_info_memory_oro_load_l2_mlp"
+        "MetricGroup": "Memory_BW;Offcore;TopdownL1;tma_L1_group",
+        "MetricName": "tma_info_memory_load_l2_mlp",
+        "MetricgroupNoGroup": "TopdownL1"
     },
     {
-        "BriefDescription": "Average per-thread data fill bandwidth to the L1 data cache [GB / sec]",
-        "MetricExpr": "tma_info_memory_core_l1d_cache_fill_bw",
-        "MetricGroup": "Mem;MemoryBW",
-        "MetricName": "tma_info_memory_thread_l1d_cache_fill_bw_1t"
+        "BriefDescription": "Actual Average Latency for L1 data-cache miss demand load operations (in core cycles)",
+        "MetricExpr": "L1D_PEND_MISS.PENDING / (MEM_LOAD_RETIRED.L1_MISS + MEM_LOAD_RETIRED.FB_HIT)",
+        "MetricGroup": "Mem;MemoryBound;MemoryLat",
+        "MetricName": "tma_info_memory_load_miss_real_latency"
     },
     {
-        "BriefDescription": "Average per-thread data fill bandwidth to the L2 cache [GB / sec]",
-        "MetricExpr": "tma_info_memory_core_l2_cache_fill_bw",
-        "MetricGroup": "Mem;MemoryBW",
-        "MetricName": "tma_info_memory_thread_l2_cache_fill_bw_1t"
+        "BriefDescription": "STLB (2nd level TLB) data load speculative misses per kilo instruction (misses of any page-size that complete the page walk)",
+        "MetricExpr": "tma_info_memory_tlb_load_stlb_mpki",
+        "MetricGroup": "Mem;MemoryTLB;TopdownL1;tma_L1_group",
+        "MetricName": "tma_info_memory_load_stlb_mpki",
+        "MetricgroupNoGroup": "TopdownL1"
     },
     {
-        "BriefDescription": "Average per-thread data access bandwidth to the L3 cache [GB / sec]",
-        "MetricExpr": "tma_info_memory_core_l3_cache_access_bw",
-        "MetricGroup": "Mem;MemoryBW;Offcore",
-        "MetricName": "tma_info_memory_thread_l3_cache_access_bw_1t"
+        "BriefDescription": "Un-cacheable retired load per kilo instruction",
+        "MetricExpr": "tma_info_memory_uc_load_pki",
+        "MetricGroup": "Mem",
+        "MetricName": "tma_info_memory_mix_uc_load_pki"
     },
     {
-        "BriefDescription": "Average per-thread data fill bandwidth to the L3 cache [GB / sec]",
-        "MetricExpr": "tma_info_memory_core_l3_cache_fill_bw",
-        "MetricGroup": "Mem;MemoryBW",
-        "MetricName": "tma_info_memory_thread_l3_cache_fill_bw_1t"
+        "BriefDescription": "Memory-Level-Parallelism (average number of L1 miss demand load when there is at least one such miss",
+        "MetricExpr": "L1D_PEND_MISS.PENDING / L1D_PEND_MISS.PENDING_CYCLES",
+        "MetricGroup": "Mem;MemoryBW;MemoryBound",
+        "MetricName": "tma_info_memory_mlp",
+        "PublicDescription": "Memory-Level-Parallelism (average number of L1 miss demand load when there is at least one such miss. Per-Logical Processor)"
+    },
+    {
+        "BriefDescription": "Utilization of the core's Page Walker(s) serving STLB misses triggered by instruction/Load/Store accesses",
+        "MetricExpr": "tma_info_memory_tlb_page_walks_utilization",
+        "MetricGroup": "Mem;MemoryTLB;TopdownL1;tma_L1_group",
+        "MetricName": "tma_info_memory_page_walks_utilization",
+        "MetricgroupNoGroup": "TopdownL1"
+    },
+    {
+        "BriefDescription": "STLB (2nd level TLB) data store speculative misses per kilo instruction (misses of any page-size that complete the page walk)",
+        "MetricExpr": "tma_info_memory_tlb_store_stlb_mpki",
+        "MetricGroup": "Mem;MemoryTLB;TopdownL1;tma_L1_group",
+        "MetricName": "tma_info_memory_store_stlb_mpki",
+        "MetricgroupNoGroup": "TopdownL1"
     },
     {
         "BriefDescription": "STLB (2nd level TLB) code speculative misses per kilo instruction (misses of any page-size that complete the page walk)",
         "MetricName": "tma_info_memory_tlb_store_stlb_mpki"
     },
     {
-        "BriefDescription": "Instruction-Level-Parallelism (average number of uops executed when there is execution) per-thread",
-        "MetricExpr": "UOPS_EXECUTED.THREAD / cpu@UOPS_EXECUTED.THREAD\\,cmask\\=1@",
+        "BriefDescription": "Un-cacheable retired load per kilo instruction",
+        "MetricExpr": "1e3 * MEM_LOAD_MISC_RETIRED.UC / INST_RETIRED.ANY",
+        "MetricGroup": "Mem;TopdownL1;tma_L1_group",
+        "MetricName": "tma_info_memory_uc_load_pki",
+        "MetricgroupNoGroup": "TopdownL1"
+    },
+    {
+        "BriefDescription": "",
+        "MetricExpr": "UOPS_EXECUTED.THREAD / (UOPS_EXECUTED.CORE_CYCLES_GE_1 / 2 if #SMT_on else cpu@UOPS_EXECUTED.THREAD\\,cmask\\=1@)",
         "MetricGroup": "Cor;Pipeline;PortsUtil;SMT",
         "MetricName": "tma_info_pipeline_execute"
     },
+    {
+        "BriefDescription": "Instructions per a microcode Assist invocation",
+        "MetricExpr": "INST_RETIRED.ANY / (FP_ASSIST.ANY + OTHER_ASSISTS.ANY)",
+        "MetricGroup": "MicroSeq;Pipeline;Ret;Retire",
+        "MetricName": "tma_info_pipeline_ipassist",
+        "MetricThreshold": "tma_info_pipeline_ipassist < 100e3",
+        "PublicDescription": "Instructions per a microcode Assist invocation. See Assists tree node for details (lower number means higher occurrence rate)"
+    },
     {
         "BriefDescription": "Average number of Uops retired in cycles where at least one uop has retired.",
         "MetricExpr": "UOPS_RETIRED.RETIRE_SLOTS / cpu@UOPS_RETIRED.RETIRE_SLOTS\\,cmask\\=1@",
         "MetricName": "tma_info_pipeline_retire"
     },
     {
-        "BriefDescription": "Measured Average Frequency for unhalted processors [GHz]",
+        "BriefDescription": "Measured Average Core Frequency for unhalted processors [GHz]",
         "MetricExpr": "tma_info_system_turbo_utilization * TSC / 1e9 / duration_time",
         "MetricGroup": "Power;Summary",
-        "MetricName": "tma_info_system_average_frequency"
+        "MetricName": "tma_info_system_core_frequency"
     },
     {
-        "BriefDescription": "Average CPU Utilization",
+        "BriefDescription": "Average CPU Utilization (percentage)",
         "MetricExpr": "CPU_CLK_UNHALTED.REF_TSC / TSC",
         "MetricGroup": "HPC;Summary",
         "MetricName": "tma_info_system_cpu_utilization"
     },
+    {
+        "BriefDescription": "Average number of utilized CPUs",
+        "MetricExpr": "#num_cpus_online * tma_info_system_cpu_utilization",
+        "MetricGroup": "Summary",
+        "MetricName": "tma_info_system_cpus_utilized"
+    },
     {
         "BriefDescription": "Average external Memory Bandwidth Use for reads and writes [GB / sec]",
         "MetricExpr": "64 * (UNC_M_CAS_COUNT.RD + UNC_M_CAS_COUNT.WR) / 1e9 / duration_time",
-        "MetricGroup": "HPC;Mem;MemoryBW;SoC;tma_issueBW",
+        "MetricGroup": "HPC;MemOffcore;MemoryBW;SoC;tma_issueBW",
         "MetricName": "tma_info_system_dram_bw_use",
-        "PublicDescription": "Average external Memory Bandwidth Use for reads and writes [GB / sec]. Related metrics: tma_fb_full, tma_info_bottleneck_memory_bandwidth, tma_mem_bandwidth, tma_sq_full"
+        "PublicDescription": "Average external Memory Bandwidth Use for reads and writes [GB / sec]. Related metrics: tma_fb_full, tma_info_bottleneck_cache_memory_bandwidth, tma_mem_bandwidth, tma_sq_full"
     },
     {
         "BriefDescription": "Giga Floating Point Operations Per Second",
         "MetricConstraint": "NO_GROUP_EVENTS",
-        "MetricExpr": "(FP_ARITH_INST_RETIRED.SCALAR_SINGLE + FP_ARITH_INST_RETIRED.SCALAR_DOUBLE + 2 * FP_ARITH_INST_RETIRED.128B_PACKED_DOUBLE + 4 * (FP_ARITH_INST_RETIRED.128B_PACKED_SINGLE + FP_ARITH_INST_RETIRED.256B_PACKED_DOUBLE) + 8 * (FP_ARITH_INST_RETIRED.256B_PACKED_SINGLE + FP_ARITH_INST_RETIRED.512B_PACKED_DOUBLE) + 16 * FP_ARITH_INST_RETIRED.512B_PACKED_SINGLE) / 1e9 / duration_time",
+        "MetricExpr": "(FP_ARITH_INST_RETIRED.SCALAR + 2 * FP_ARITH_INST_RETIRED.128B_PACKED_DOUBLE + 4 * FP_ARITH_INST_RETIRED.4_FLOPS + 8 * FP_ARITH_INST_RETIRED.8_FLOPS + 16 * FP_ARITH_INST_RETIRED.512B_PACKED_SINGLE) / 1e9 / duration_time",
         "MetricGroup": "Cor;Flops;HPC",
         "MetricName": "tma_info_system_gflops",
-        "PublicDescription": "Giga Floating Point Operations Per Second. Aggregate across all supported options of: FP precisions, scalar and vector instructions, vector-width and AMX engine."
+        "PublicDescription": "Giga Floating Point Operations Per Second. Aggregate across all supported options of: FP precisions, scalar and vector instructions, vector-width"
     },
     {
         "BriefDescription": "Average IO (network or disk) Bandwidth Use for Reads [GB / sec]",
         "MetricExpr": "(UNC_IIO_DATA_REQ_OF_CPU.MEM_WRITE.PART0 + UNC_IIO_DATA_REQ_OF_CPU.MEM_WRITE.PART1 + UNC_IIO_DATA_REQ_OF_CPU.MEM_WRITE.PART2 + UNC_IIO_DATA_REQ_OF_CPU.MEM_WRITE.PART3) * 4 / 1e9 / duration_time",
-        "MetricGroup": "IoBW;Mem;Server;SoC",
-        "MetricName": "tma_info_system_io_read_bw"
+        "MetricGroup": "IoBW;MemOffcore;Server;SoC",
+        "MetricName": "tma_info_system_io_read_bw",
+        "PublicDescription": "Average IO (network or disk) Bandwidth Use for Reads [GB / sec]. Bandwidth of IO reads that are initiated by end device controllers that are requesting memory from the CPU"
     },
     {
         "BriefDescription": "Average IO (network or disk) Bandwidth Use for Writes [GB / sec]",
         "MetricExpr": "(UNC_IIO_DATA_REQ_OF_CPU.MEM_READ.PART0 + UNC_IIO_DATA_REQ_OF_CPU.MEM_READ.PART1 + UNC_IIO_DATA_REQ_OF_CPU.MEM_READ.PART2 + UNC_IIO_DATA_REQ_OF_CPU.MEM_READ.PART3) * 4 / 1e9 / duration_time",
-        "MetricGroup": "IoBW;Mem;Server;SoC",
-        "MetricName": "tma_info_system_io_write_bw"
+        "MetricGroup": "IoBW;MemOffcore;Server;SoC",
+        "MetricName": "tma_info_system_io_write_bw",
+        "PublicDescription": "Average IO (network or disk) Bandwidth Use for Writes [GB / sec]. Bandwidth of IO writes that are initiated by end device controllers that are writing memory to the CPU"
     },
     {
         "BriefDescription": "Instructions per Far Branch ( Far Branches apply upon transition from application to operating system, handling interrupts, exceptions) [lower number means higher occurrence rate]",
     {
         "BriefDescription": "Average latency of data read request to external DRAM memory [in nanoseconds]",
         "MetricExpr": "1e9 * (UNC_M_RPQ_OCCUPANCY / UNC_M_RPQ_INSERTS) / imc_0@event\\=0x0@",
-        "MetricGroup": "Mem;MemoryLat;Server;SoC",
+        "MetricGroup": "MemOffcore;MemoryLat;Server;SoC",
         "MetricName": "tma_info_system_mem_dram_read_latency",
         "PublicDescription": "Average latency of data read request to external DRAM memory [in nanoseconds]. Accounts for demand loads and L1/L2 data-read prefetches"
     },
     {
         "BriefDescription": "Average latency of data read request to external 3D X-Point memory [in nanoseconds]",
         "MetricExpr": "(1e9 * (UNC_M_PMM_RPQ_OCCUPANCY.ALL / UNC_M_PMM_RPQ_INSERTS) / imc_0@event\\=0x0@ if #has_pmem > 0 else 0)",
-        "MetricGroup": "Mem;MemoryLat;Server;SoC",
+        "MetricGroup": "MemOffcore;MemoryLat;Server;SoC",
         "MetricName": "tma_info_system_mem_pmm_read_latency",
         "PublicDescription": "Average latency of data read request to external 3D X-Point memory [in nanoseconds]. Accounts for demand loads and L1/L2 data-read prefetches"
     },
     {
         "BriefDescription": "Average 3DXP Memory Bandwidth Use for reads [GB / sec]",
         "MetricExpr": "(64 * UNC_M_PMM_RPQ_INSERTS / 1e9 / duration_time if #has_pmem > 0 else 0)",
-        "MetricGroup": "Mem;MemoryBW;Server;SoC",
+        "MetricGroup": "MemOffcore;MemoryBW;Server;SoC",
         "MetricName": "tma_info_system_pmm_read_bw"
     },
     {
         "BriefDescription": "Average 3DXP Memory Bandwidth Use for Writes [GB / sec]",
         "MetricExpr": "(64 * UNC_M_PMM_WPQ_INSERTS / 1e9 / duration_time if #has_pmem > 0 else 0)",
-        "MetricGroup": "Mem;MemoryBW;Server;SoC",
+        "MetricGroup": "MemOffcore;MemoryBW;Server;SoC",
         "MetricName": "tma_info_system_pmm_write_bw"
     },
     {
         "MetricGroup": "Power",
         "MetricName": "tma_info_system_turbo_utilization"
     },
+    {
+        "BriefDescription": "Measured Average Uncore Frequency for the SoC [GHz]",
+        "MetricExpr": "tma_info_system_socket_clks / 1e9 / duration_time",
+        "MetricGroup": "SoC",
+        "MetricName": "tma_info_system_uncore_frequency"
+    },
     {
         "BriefDescription": "Per-Logical Processor actual clocks when the Logical Processor is active.",
         "MetricExpr": "CPU_CLK_UNHALTED.THREAD",
     },
     {
         "BriefDescription": "This metric represents fraction of cycles the CPU was stalled due to Instruction TLB (ITLB) misses",
-        "MetricExpr": "ICACHE_64B.IFTAG_STALL / tma_info_thread_clks",
-        "MetricGroup": "BigFoot;FetchLat;MemoryTLB;TopdownL3;tma_L3_group;tma_fetch_latency_group",
+        "MetricExpr": "ICACHE_TAG.STALLS / tma_info_thread_clks",
+        "MetricGroup": "BigFootprint;FetchLat;MemoryTLB;TopdownL3;tma_L3_group;tma_fetch_latency_group",
         "MetricName": "tma_itlb_misses",
         "MetricThreshold": "tma_itlb_misses > 0.05 & (tma_fetch_latency > 0.1 & tma_frontend_bound > 0.15)",
         "PublicDescription": "This metric represents fraction of cycles the CPU was stalled due to Instruction TLB (ITLB) misses. Sample with: FRONTEND_RETIRED.STLB_MISS_PS;FRONTEND_RETIRED.ITLB_MISS_PS",
     {
         "BriefDescription": "This metric estimates how often the CPU was stalled without loads missing the L1 data cache",
         "MetricExpr": "max((CYCLE_ACTIVITY.STALLS_MEM_ANY - CYCLE_ACTIVITY.STALLS_L1D_MISS) / tma_info_thread_clks, 0)",
-        "MetricGroup": "CacheMisses;MemoryBound;TmaL3mem;TopdownL3;tma_L3_group;tma_issueL1;tma_issueMC;tma_memory_bound_group",
+        "MetricGroup": "CacheHits;MemoryBound;TmaL3mem;TopdownL3;tma_L3_group;tma_issueL1;tma_issueMC;tma_memory_bound_group",
         "MetricName": "tma_l1_bound",
         "MetricThreshold": "tma_l1_bound > 0.1 & (tma_memory_bound > 0.2 & tma_backend_bound > 0.2)",
         "PublicDescription": "This metric estimates how often the CPU was stalled without loads missing the L1 data cache.  The L1 data cache typically has the shortest latency.  However; in certain cases like loads blocked on older stores; a load might suffer due to high latency even though it is being satisfied by the L1. Another example is loads who miss in the TLB. These cases are characterized by execution unit stalls; while some non-completed demand load lives in the machine without having that demand load missing the L1 cache. Sample with: MEM_LOAD_RETIRED.L1_HIT_PS;MEM_LOAD_RETIRED.FB_HIT_PS. Related metrics: tma_clears_resteers, tma_machine_clears, tma_microcode_sequencer, tma_ms_switches, tma_ports_utilized_1",
         "BriefDescription": "This metric estimates how often the CPU was stalled due to L2 cache accesses by loads",
         "MetricConstraint": "NO_GROUP_EVENTS",
         "MetricExpr": "MEM_LOAD_RETIRED.L2_HIT * (1 + MEM_LOAD_RETIRED.FB_HIT / MEM_LOAD_RETIRED.L1_MISS) / (MEM_LOAD_RETIRED.L2_HIT * (1 + MEM_LOAD_RETIRED.FB_HIT / MEM_LOAD_RETIRED.L1_MISS) + cpu@L1D_PEND_MISS.FB_FULL\\,cmask\\=1@) * ((CYCLE_ACTIVITY.STALLS_L1D_MISS - CYCLE_ACTIVITY.STALLS_L2_MISS) / tma_info_thread_clks)",
-        "MetricGroup": "CacheMisses;MemoryBound;TmaL3mem;TopdownL3;tma_L3_group;tma_memory_bound_group",
+        "MetricGroup": "CacheHits;MemoryBound;TmaL3mem;TopdownL3;tma_L3_group;tma_memory_bound_group",
         "MetricName": "tma_l2_bound",
         "MetricThreshold": "tma_l2_bound > 0.05 & (tma_memory_bound > 0.2 & tma_backend_bound > 0.2)",
         "PublicDescription": "This metric estimates how often the CPU was stalled due to L2 cache accesses by loads.  Avoiding cache misses (i.e. L1 misses/L2 hits) can improve the latency and increase performance. Sample with: MEM_LOAD_RETIRED.L2_HIT_PS",
     {
         "BriefDescription": "This metric estimates how often the CPU was stalled due to loads accesses to L3 cache or contended with a sibling Core",
         "MetricExpr": "(CYCLE_ACTIVITY.STALLS_L2_MISS - CYCLE_ACTIVITY.STALLS_L3_MISS) / tma_info_thread_clks",
-        "MetricGroup": "CacheMisses;MemoryBound;TmaL3mem;TopdownL3;tma_L3_group;tma_memory_bound_group",
+        "MetricGroup": "CacheHits;MemoryBound;TmaL3mem;TopdownL3;tma_L3_group;tma_memory_bound_group",
         "MetricName": "tma_l3_bound",
         "MetricThreshold": "tma_l3_bound > 0.05 & (tma_memory_bound > 0.2 & tma_backend_bound > 0.2)",
         "PublicDescription": "This metric estimates how often the CPU was stalled due to loads accesses to L3 cache or contended with a sibling Core.  Avoiding cache misses (i.e. L2 misses/L3 hits) can improve the latency and increase performance. Sample with: MEM_LOAD_RETIRED.L3_HIT_PS",
         "ScaleUnit": "100%"
     },
     {
-        "BriefDescription": "This metric represents fraction of cycles with demand load accesses that hit the L3 cache under unloaded scenarios (possibly L3 latency limited)",
-        "MetricExpr": "17 * tma_info_system_average_frequency * MEM_LOAD_RETIRED.L3_HIT * (1 + MEM_LOAD_RETIRED.FB_HIT / MEM_LOAD_RETIRED.L1_MISS / 2) / tma_info_thread_clks",
+        "BriefDescription": "This metric estimates fraction of cycles with demand load accesses that hit the L3 cache under unloaded scenarios (possibly L3 latency limited)",
+        "MetricExpr": "17 * tma_info_system_core_frequency * (MEM_LOAD_RETIRED.L3_HIT * (1 + MEM_LOAD_RETIRED.FB_HIT / MEM_LOAD_RETIRED.L1_MISS / 2)) / tma_info_thread_clks",
         "MetricGroup": "MemoryLat;TopdownL4;tma_L4_group;tma_issueLat;tma_l3_bound_group",
         "MetricName": "tma_l3_hit_latency",
         "MetricThreshold": "tma_l3_hit_latency > 0.1 & (tma_l3_bound > 0.05 & (tma_memory_bound > 0.2 & tma_backend_bound > 0.2))",
-        "PublicDescription": "This metric represents fraction of cycles with demand load accesses that hit the L3 cache under unloaded scenarios (possibly L3 latency limited).  Avoiding private cache misses (i.e. L2 misses/L3 hits) will improve the latency; reduce contention with sibling physical cores and increase performance.  Note the value of this node may overlap with its siblings. Sample with: MEM_LOAD_RETIRED.L3_HIT_PS. Related metrics: tma_info_bottleneck_memory_latency, tma_mem_latency",
+        "PublicDescription": "This metric estimates fraction of cycles with demand load accesses that hit the L3 cache under unloaded scenarios (possibly L3 latency limited).  Avoiding private cache misses (i.e. L2 misses/L3 hits) will improve the latency; reduce contention with sibling physical cores and increase performance.  Note the value of this node may overlap with its siblings. Sample with: MEM_LOAD_RETIRED.L3_HIT_PS. Related metrics: tma_info_bottleneck_cache_memory_latency, tma_mem_latency",
         "ScaleUnit": "100%"
     },
     {
         "BriefDescription": "This metric represents fraction of cycles CPU was stalled due to Length Changing Prefixes (LCPs)",
-        "MetricExpr": "ILD_STALL.LCP / tma_info_thread_clks",
+        "MetricExpr": "DECODE.LCP / tma_info_thread_clks",
         "MetricGroup": "FetchLat;TopdownL3;tma_L3_group;tma_fetch_latency_group;tma_issueFB",
         "MetricName": "tma_lcp",
         "MetricThreshold": "tma_lcp > 0.05 & (tma_fetch_latency > 0.1 & tma_frontend_bound > 0.15)",
         "MetricName": "tma_light_operations",
         "MetricThreshold": "tma_light_operations > 0.6",
         "MetricgroupNoGroup": "TopdownL2",
-        "PublicDescription": "This metric represents fraction of slots where the CPU was retiring light-weight operations -- instructions that require no more than one uop (micro-operation). This correlates with total number of instructions used by the program. A uops-per-instruction (see UopPI metric) ratio of 1 or less should be expected for decently optimized software running on Intel Core/Xeon products. While this often indicates efficient X86 instructions were executed; high value does not necessarily mean better performance cannot be achieved. Sample with: INST_RETIRED.PREC_DIST",
+        "PublicDescription": "This metric represents fraction of slots where the CPU was retiring light-weight operations -- instructions that require no more than one uop (micro-operation). This correlates with total number of instructions used by the program. A uops-per-instruction (see UopPI metric) ratio of 1 or less should be expected for decently optimized code running on Intel Core/Xeon products. While this often indicates efficient X86 instructions were executed; high value does not necessarily mean better performance cannot be achieved. ([ICL+] Note this may undercount due to approximation using indirect events; [ADL+] .). Sample with: INST_RETIRED.PREC_DIST",
         "ScaleUnit": "100%"
     },
     {
     },
     {
         "BriefDescription": "This metric estimates fraction of cycles while the memory subsystem was handling loads from local memory",
-        "MetricExpr": "59.5 * tma_info_system_average_frequency * MEM_LOAD_L3_MISS_RETIRED.LOCAL_DRAM * (1 + MEM_LOAD_RETIRED.FB_HIT / MEM_LOAD_RETIRED.L1_MISS / 2) / tma_info_thread_clks",
+        "MetricExpr": "59.5 * tma_info_system_core_frequency * MEM_LOAD_L3_MISS_RETIRED.LOCAL_DRAM * (1 + MEM_LOAD_RETIRED.FB_HIT / MEM_LOAD_RETIRED.L1_MISS / 2) / tma_info_thread_clks",
         "MetricGroup": "Server;TopdownL5;tma_L5_group;tma_mem_latency_group",
-        "MetricName": "tma_local_dram",
-        "MetricThreshold": "tma_local_dram > 0.1 & (tma_mem_latency > 0.1 & (tma_dram_bound > 0.1 & (tma_memory_bound > 0.2 & tma_backend_bound > 0.2)))",
+        "MetricName": "tma_local_mem",
+        "MetricThreshold": "tma_local_mem > 0.1 & (tma_mem_latency > 0.1 & (tma_dram_bound > 0.1 & (tma_memory_bound > 0.2 & tma_backend_bound > 0.2)))",
         "PublicDescription": "This metric estimates fraction of cycles while the memory subsystem was handling loads from local memory. Caching will improve the latency and increase performance. Sample with: MEM_LOAD_L3_MISS_RETIRED.LOCAL_DRAM_PS",
         "ScaleUnit": "100%"
     },
         "ScaleUnit": "100%"
     },
     {
-        "BriefDescription": "This metric estimates fraction of cycles where the core's performance was likely hurt due to approaching bandwidth limits of external memory (DRAM)",
+        "BriefDescription": "This metric estimates fraction of cycles where the core's performance was likely hurt due to approaching bandwidth limits of external memory - DRAM ([SPR-HBM] and/or HBM)",
         "MetricExpr": "min(CPU_CLK_UNHALTED.THREAD, cpu@OFFCORE_REQUESTS_OUTSTANDING.ALL_DATA_RD\\,cmask\\=4@) / tma_info_thread_clks",
         "MetricGroup": "MemoryBW;Offcore;TopdownL4;tma_L4_group;tma_dram_bound_group;tma_issueBW",
         "MetricName": "tma_mem_bandwidth",
         "MetricThreshold": "tma_mem_bandwidth > 0.2 & (tma_dram_bound > 0.1 & (tma_memory_bound > 0.2 & tma_backend_bound > 0.2))",
-        "PublicDescription": "This metric estimates fraction of cycles where the core's performance was likely hurt due to approaching bandwidth limits of external memory (DRAM).  The underlying heuristic assumes that a similar off-core traffic is generated by all IA cores. This metric does not aggregate non-data-read requests by this logical processor; requests from other IA Logical Processors/Physical Cores/sockets; or other non-IA devices like GPU; hence the maximum external memory bandwidth limits may or may not be approached when this metric is flagged (see Uncore counters for that). Related metrics: tma_fb_full, tma_info_bottleneck_memory_bandwidth, tma_info_system_dram_bw_use, tma_sq_full",
+        "PublicDescription": "This metric estimates fraction of cycles where the core's performance was likely hurt due to approaching bandwidth limits of external memory - DRAM ([SPR-HBM] and/or HBM).  The underlying heuristic assumes that a similar off-core traffic is generated by all IA cores. This metric does not aggregate non-data-read requests by this logical processor; requests from other IA Logical Processors/Physical Cores/sockets; or other non-IA devices like GPU; hence the maximum external memory bandwidth limits may or may not be approached when this metric is flagged (see Uncore counters for that). Related metrics: tma_fb_full, tma_info_bottleneck_cache_memory_bandwidth, tma_info_system_dram_bw_use, tma_sq_full",
         "ScaleUnit": "100%"
     },
     {
-        "BriefDescription": "This metric estimates fraction of cycles where the performance was likely hurt due to latency from external memory (DRAM)",
+        "BriefDescription": "This metric estimates fraction of cycles where the performance was likely hurt due to latency from external memory - DRAM ([SPR-HBM] and/or HBM)",
         "MetricExpr": "min(CPU_CLK_UNHALTED.THREAD, OFFCORE_REQUESTS_OUTSTANDING.CYCLES_WITH_DATA_RD) / tma_info_thread_clks - tma_mem_bandwidth",
         "MetricGroup": "MemoryLat;Offcore;TopdownL4;tma_L4_group;tma_dram_bound_group;tma_issueLat",
         "MetricName": "tma_mem_latency",
         "MetricThreshold": "tma_mem_latency > 0.1 & (tma_dram_bound > 0.1 & (tma_memory_bound > 0.2 & tma_backend_bound > 0.2))",
-        "PublicDescription": "This metric estimates fraction of cycles where the performance was likely hurt due to latency from external memory (DRAM).  This metric does not aggregate requests from other Logical Processors/Physical Cores/sockets (see Uncore counters for that). Related metrics: tma_info_bottleneck_memory_latency, tma_l3_hit_latency",
+        "PublicDescription": "This metric estimates fraction of cycles where the performance was likely hurt due to latency from external memory - DRAM ([SPR-HBM] and/or HBM).  This metric does not aggregate requests from other Logical Processors/Physical Cores/sockets (see Uncore counters for that). Related metrics: tma_info_bottleneck_cache_memory_latency, tma_l3_hit_latency",
         "ScaleUnit": "100%"
     },
     {
         "MetricGroup": "MicroSeq;TopdownL3;tma_L3_group;tma_heavy_operations_group;tma_issueMC;tma_issueMS",
         "MetricName": "tma_microcode_sequencer",
         "MetricThreshold": "tma_microcode_sequencer > 0.05 & tma_heavy_operations > 0.1",
-        "PublicDescription": "This metric represents fraction of slots the CPU was retiring uops fetched by the Microcode Sequencer (MS) unit.  The MS is used for CISC instructions not supported by the default decoders (like repeat move strings; or CPUID); or by microcode assists used to address some operation modes (like in Floating Point assists). These cases can often be avoided. Sample with: IDQ.MS_UOPS. Related metrics: tma_clears_resteers, tma_l1_bound, tma_machine_clears, tma_ms_switches",
+        "PublicDescription": "This metric represents fraction of slots the CPU was retiring uops fetched by the Microcode Sequencer (MS) unit.  The MS is used for CISC instructions not supported by the default decoders (like repeat move strings; or CPUID); or by microcode assists used to address some operation modes (like in Floating Point assists). These cases can often be avoided. Sample with: IDQ.MS_UOPS. Related metrics: tma_clears_resteers, tma_info_bottleneck_irregular_overhead, tma_l1_bound, tma_machine_clears, tma_ms_switches",
         "ScaleUnit": "100%"
     },
     {
         "MetricExpr": "(IDQ.ALL_MITE_CYCLES_ANY_UOPS - IDQ.ALL_MITE_CYCLES_4_UOPS) / tma_info_core_core_clks / 2",
         "MetricGroup": "DSBmiss;FetchBW;TopdownL3;tma_L3_group;tma_fetch_bandwidth_group",
         "MetricName": "tma_mite",
-        "MetricThreshold": "tma_mite > 0.1 & (tma_fetch_bandwidth > 0.1 & tma_frontend_bound > 0.15 & tma_info_thread_ipc / 4 > 0.35)",
+        "MetricThreshold": "tma_mite > 0.1 & tma_fetch_bandwidth > 0.2",
         "PublicDescription": "This metric represents Core fraction of cycles in which CPU was likely limited due to the MITE pipeline (the legacy decode pipeline). This pipeline is used for code that was not pre-cached in the DSB or LSD. For example; inefficiencies due to asymmetric decoders; use of long immediate or LCP can manifest as MITE fetch bandwidth bottleneck. Sample with: FRONTEND_RETIRED.ANY_DSB_MISS",
         "ScaleUnit": "100%"
     },
     {
-        "BriefDescription": "The Mixing_Vectors metric gives the percentage of injected blend uops out of all uops issued",
+        "BriefDescription": "This metric estimates penalty in terms of percentage of([SKL+] injected blend uops out of all Uops Issued -- the Count Domain; [ADL+] cycles)",
         "MetricExpr": "UOPS_ISSUED.VECTOR_WIDTH_MISMATCH / UOPS_ISSUED.ANY",
         "MetricGroup": "TopdownL5;tma_L5_group;tma_issueMV;tma_ports_utilized_0_group",
         "MetricName": "tma_mixing_vectors",
         "MetricThreshold": "tma_mixing_vectors > 0.05",
-        "PublicDescription": "The Mixing_Vectors metric gives the percentage of injected blend uops out of all uops issued. Usually a Mixing_Vectors over 5% is worth investigating. Read more in Appendix B1 of the Optimizations Guide for this topic. Related metrics: tma_ms_switches",
+        "PublicDescription": "This metric estimates penalty in terms of percentage of([SKL+] injected blend uops out of all Uops Issued -- the Count Domain; [ADL+] cycles). Usually a Mixing_Vectors over 5% is worth investigating. Read more in Appendix B1 of the Optimizations Guide for this topic. Related metrics: tma_ms_switches",
         "ScaleUnit": "100%"
     },
     {
         "MetricGroup": "FetchLat;MicroSeq;TopdownL3;tma_L3_group;tma_fetch_latency_group;tma_issueMC;tma_issueMS;tma_issueMV;tma_issueSO",
         "MetricName": "tma_ms_switches",
         "MetricThreshold": "tma_ms_switches > 0.05 & (tma_fetch_latency > 0.1 & tma_frontend_bound > 0.15)",
-        "PublicDescription": "This metric estimates the fraction of cycles when the CPU was stalled due to switches of uop delivery to the Microcode Sequencer (MS). Commonly used instructions are optimized for delivery by the DSB (decoded i-cache) or MITE (legacy instruction decode) pipelines. Certain operations cannot be handled natively by the execution pipeline; and must be performed by microcode (small programs injected into the execution stream). Switching to the MS too often can negatively impact performance. The MS is designated to deliver long uop flows required by CISC instructions like CPUID; or uncommon conditions like Floating Point Assists when dealing with Denormals. Sample with: IDQ.MS_SWITCHES. Related metrics: tma_clears_resteers, tma_l1_bound, tma_machine_clears, tma_microcode_sequencer, tma_mixing_vectors, tma_serializing_operation",
+        "PublicDescription": "This metric estimates the fraction of cycles when the CPU was stalled due to switches of uop delivery to the Microcode Sequencer (MS). Commonly used instructions are optimized for delivery by the DSB (decoded i-cache) or MITE (legacy instruction decode) pipelines. Certain operations cannot be handled natively by the execution pipeline; and must be performed by microcode (small programs injected into the execution stream). Switching to the MS too often can negatively impact performance. The MS is designated to deliver long uop flows required by CISC instructions like CPUID; or uncommon conditions like Floating Point Assists when dealing with Denormals. Sample with: IDQ.MS_SWITCHES. Related metrics: tma_clears_resteers, tma_info_bottleneck_irregular_overhead, tma_l1_bound, tma_machine_clears, tma_microcode_sequencer, tma_mixing_vectors, tma_serializing_operation",
         "ScaleUnit": "100%"
     },
     {
         "BriefDescription": "This metric represents fraction of slots where the CPU was retiring branch instructions that were not fused",
         "MetricExpr": "tma_light_operations * (BR_INST_RETIRED.ALL_BRANCHES - UOPS_RETIRED.MACRO_FUSED) / UOPS_RETIRED.RETIRE_SLOTS",
-        "MetricGroup": "Pipeline;TopdownL3;tma_L3_group;tma_light_operations_group",
+        "MetricGroup": "Branches;Pipeline;TopdownL3;tma_L3_group;tma_light_operations_group",
         "MetricName": "tma_non_fused_branches",
         "MetricThreshold": "tma_non_fused_branches > 0.1 & tma_light_operations > 0.6",
         "PublicDescription": "This metric represents fraction of slots where the CPU was retiring branch instructions that were not fused. Non-conditional branches like direct JMP or CALL would count here. Can be used to examine fusible conditional jumps that were not fused.",
     {
         "BriefDescription": "This metric represents fraction of slots where the CPU was retiring NOP (no op) instructions",
         "MetricExpr": "tma_light_operations * INST_RETIRED.NOP / UOPS_RETIRED.RETIRE_SLOTS",
-        "MetricGroup": "Pipeline;TopdownL3;tma_L3_group;tma_light_operations_group",
+        "MetricGroup": "Pipeline;TopdownL4;tma_L4_group;tma_other_light_ops_group",
         "MetricName": "tma_nop_instructions",
-        "MetricThreshold": "tma_nop_instructions > 0.1 & tma_light_operations > 0.6",
+        "MetricThreshold": "tma_nop_instructions > 0.1 & (tma_other_light_ops > 0.3 & tma_light_operations > 0.6)",
         "PublicDescription": "This metric represents fraction of slots where the CPU was retiring NOP (no op) instructions. Compilers often use NOPs for certain address alignments - e.g. start address of a function or loop body. Sample with: INST_RETIRED.NOP",
         "ScaleUnit": "100%"
     },
     {
         "BriefDescription": "This metric represents the remaining light uops fraction the CPU has executed - remaining means not covered by other sibling nodes",
-        "MetricExpr": "max(0, tma_light_operations - (tma_fp_arith + tma_memory_operations + tma_fused_instructions + tma_non_fused_branches + tma_nop_instructions))",
+        "MetricExpr": "max(0, tma_light_operations - (tma_fp_arith + tma_memory_operations + tma_fused_instructions + tma_non_fused_branches))",
         "MetricGroup": "Pipeline;TopdownL3;tma_L3_group;tma_light_operations_group",
         "MetricName": "tma_other_light_ops",
         "MetricThreshold": "tma_other_light_ops > 0.3 & tma_light_operations > 0.6",
         "PublicDescription": "This metric represents the remaining light uops fraction the CPU has executed - remaining means not covered by other sibling nodes. May undercount due to FMA double counting",
         "ScaleUnit": "100%"
     },
+    {
+        "BriefDescription": "This metric estimates fraction of slots the CPU was stalled due to other cases of misprediction (non-retired x86 branches or other types).",
+        "MetricExpr": "max(tma_branch_mispredicts * (1 - BR_MISP_RETIRED.ALL_BRANCHES / (INT_MISC.CLEARS_COUNT - MACHINE_CLEARS.COUNT)), 0.0001)",
+        "MetricGroup": "BrMispredicts;TopdownL3;tma_L3_group;tma_branch_mispredicts_group",
+        "MetricName": "tma_other_mispredicts",
+        "MetricThreshold": "tma_other_mispredicts > 0.05 & (tma_branch_mispredicts > 0.1 & tma_bad_speculation > 0.15)",
+        "ScaleUnit": "100%"
+    },
+    {
+        "BriefDescription": "This metric represents fraction of slots the CPU has wasted due to Nukes (Machine Clears) not related to memory ordering.",
+        "MetricExpr": "max(tma_machine_clears * (1 - MACHINE_CLEARS.MEMORY_ORDERING / MACHINE_CLEARS.COUNT), 0.0001)",
+        "MetricGroup": "Machine_Clears;TopdownL3;tma_L3_group;tma_machine_clears_group",
+        "MetricName": "tma_other_nukes",
+        "MetricThreshold": "tma_other_nukes > 0.05 & (tma_machine_clears > 0.1 & tma_bad_speculation > 0.15)",
+        "ScaleUnit": "100%"
+    },
     {
         "BriefDescription": "This metric roughly estimates (based on idle latencies) how often the CPU was stalled on accesses to external 3D-Xpoint (Crystal Ridge, a.k.a",
         "MetricConstraint": "NO_GROUP_EVENTS",
-        "MetricExpr": "(((1 - ((19 * (MEM_LOAD_L3_MISS_RETIRED.REMOTE_DRAM * (1 + MEM_LOAD_RETIRED.FB_HIT / MEM_LOAD_RETIRED.L1_MISS)) + 10 * (MEM_LOAD_L3_MISS_RETIRED.LOCAL_DRAM * (1 + MEM_LOAD_RETIRED.FB_HIT / MEM_LOAD_RETIRED.L1_MISS) + MEM_LOAD_L3_MISS_RETIRED.REMOTE_FWD * (1 + MEM_LOAD_RETIRED.FB_HIT / MEM_LOAD_RETIRED.L1_MISS) + MEM_LOAD_L3_MISS_RETIRED.REMOTE_HITM * (1 + MEM_LOAD_RETIRED.FB_HIT / MEM_LOAD_RETIRED.L1_MISS))) / (19 * (MEM_LOAD_L3_MISS_RETIRED.REMOTE_DRAM * (1 + MEM_LOAD_RETIRED.FB_HIT / MEM_LOAD_RETIRED.L1_MISS)) + 10 * (MEM_LOAD_L3_MISS_RETIRED.LOCAL_DRAM * (1 + MEM_LOAD_RETIRED.FB_HIT / MEM_LOAD_RETIRED.L1_MISS) + MEM_LOAD_L3_MISS_RETIRED.REMOTE_FWD * (1 + MEM_LOAD_RETIRED.FB_HIT / MEM_LOAD_RETIRED.L1_MISS) + MEM_LOAD_L3_MISS_RETIRED.REMOTE_HITM * (1 + MEM_LOAD_RETIRED.FB_HIT / MEM_LOAD_RETIRED.L1_MISS)) + (25 * (MEM_LOAD_RETIRED.LOCAL_PMM * (1 + MEM_LOAD_RETIRED.FB_HIT / MEM_LOAD_RETIRED.L1_MISS) if #has_pmem > 0 else 0) + 33 * (MEM_LOAD_L3_MISS_RETIRED.REMOTE_PMM * (1 + MEM_LOAD_RETIRED.FB_HIT / MEM_LOAD_RETIRED.L1_MISS) if #has_pmem > 0 else 0))) if #has_pmem > 0 else 0)) * (CYCLE_ACTIVITY.STALLS_L3_MISS / tma_info_thread_clks + (CYCLE_ACTIVITY.STALLS_L1D_MISS - CYCLE_ACTIVITY.STALLS_L2_MISS) / tma_info_thread_clks - tma_l2_bound) if 1e6 * (MEM_LOAD_L3_MISS_RETIRED.REMOTE_PMM + MEM_LOAD_RETIRED.LOCAL_PMM) > MEM_LOAD_RETIRED.L1_MISS else 0) if #has_pmem > 0 else 0)",
+        "MetricExpr": "(((1 - (19 * (MEM_LOAD_L3_MISS_RETIRED.REMOTE_DRAM * (1 + MEM_LOAD_RETIRED.FB_HIT / MEM_LOAD_RETIRED.L1_MISS)) + 10 * (MEM_LOAD_L3_MISS_RETIRED.LOCAL_DRAM * (1 + MEM_LOAD_RETIRED.FB_HIT / MEM_LOAD_RETIRED.L1_MISS) + MEM_LOAD_L3_MISS_RETIRED.REMOTE_FWD * (1 + MEM_LOAD_RETIRED.FB_HIT / MEM_LOAD_RETIRED.L1_MISS) + MEM_LOAD_L3_MISS_RETIRED.REMOTE_HITM * (1 + MEM_LOAD_RETIRED.FB_HIT / MEM_LOAD_RETIRED.L1_MISS))) / (19 * (MEM_LOAD_L3_MISS_RETIRED.REMOTE_DRAM * (1 + MEM_LOAD_RETIRED.FB_HIT / MEM_LOAD_RETIRED.L1_MISS)) + 10 * (MEM_LOAD_L3_MISS_RETIRED.LOCAL_DRAM * (1 + MEM_LOAD_RETIRED.FB_HIT / MEM_LOAD_RETIRED.L1_MISS) + MEM_LOAD_L3_MISS_RETIRED.REMOTE_FWD * (1 + MEM_LOAD_RETIRED.FB_HIT / MEM_LOAD_RETIRED.L1_MISS) + MEM_LOAD_L3_MISS_RETIRED.REMOTE_HITM * (1 + MEM_LOAD_RETIRED.FB_HIT / MEM_LOAD_RETIRED.L1_MISS)) + (25 * (MEM_LOAD_RETIRED.LOCAL_PMM * (1 + MEM_LOAD_RETIRED.FB_HIT / MEM_LOAD_RETIRED.L1_MISS)) + 33 * (MEM_LOAD_L3_MISS_RETIRED.REMOTE_PMM * (1 + MEM_LOAD_RETIRED.FB_HIT / MEM_LOAD_RETIRED.L1_MISS))))) * (CYCLE_ACTIVITY.STALLS_L3_MISS / tma_info_thread_clks + (CYCLE_ACTIVITY.STALLS_L1D_MISS - CYCLE_ACTIVITY.STALLS_L2_MISS) / tma_info_thread_clks - tma_l2_bound) if 1e6 * (MEM_LOAD_L3_MISS_RETIRED.REMOTE_PMM + MEM_LOAD_RETIRED.LOCAL_PMM) > MEM_LOAD_RETIRED.L1_MISS else 0) if #has_pmem > 0 else 0)",
         "MetricGroup": "MemoryBound;Server;TmaL3mem;TopdownL3;tma_L3_group;tma_memory_bound_group",
         "MetricName": "tma_pmm_bound",
         "MetricThreshold": "tma_pmm_bound > 0.1 & (tma_memory_bound > 0.2 & tma_backend_bound > 0.2)",
         "ScaleUnit": "100%"
     },
     {
-        "BriefDescription": "This metric represents Core fraction of cycles CPU dispatched uops on execution port 6 ([HSW+]Primary Branch and simple ALU)",
+        "BriefDescription": "This metric represents Core fraction of cycles CPU dispatched uops on execution port 6 ([HSW+] Primary Branch and simple ALU)",
         "MetricExpr": "UOPS_DISPATCHED_PORT.PORT_6 / tma_info_core_core_clks",
         "MetricGroup": "TopdownL6;tma_L6_group;tma_alu_op_utilization_group;tma_issue2P",
         "MetricName": "tma_port_6",
         "MetricThreshold": "tma_port_6 > 0.6",
-        "PublicDescription": "This metric represents Core fraction of cycles CPU dispatched uops on execution port 6 ([HSW+]Primary Branch and simple ALU). Sample with: UOPS_DISPATCHED_PORT.PORT_6. Related metrics: tma_fp_scalar, tma_fp_vector, tma_fp_vector_128b, tma_fp_vector_256b, tma_fp_vector_512b, tma_port_0, tma_port_1, tma_port_5, tma_ports_utilized_2",
+        "PublicDescription": "This metric represents Core fraction of cycles CPU dispatched uops on execution port 6 ([HSW+] Primary Branch and simple ALU). Sample with: UOPS_DISPATCHED_PORT.PORT_6. Related metrics: tma_fp_scalar, tma_fp_vector, tma_fp_vector_128b, tma_fp_vector_256b, tma_fp_vector_512b, tma_port_0, tma_port_1, tma_port_5, tma_ports_utilized_2",
         "ScaleUnit": "100%"
     },
     {
     },
     {
         "BriefDescription": "This metric estimates fraction of cycles the CPU performance was potentially limited due to Core computation issues (non divider-related)",
-        "MetricExpr": "((EXE_ACTIVITY.EXE_BOUND_0_PORTS + (EXE_ACTIVITY.1_PORTS_UTIL + tma_retiring * EXE_ACTIVITY.2_PORTS_UTIL)) / tma_info_thread_clks if ARITH.DIVIDER_ACTIVE < CYCLE_ACTIVITY.STALLS_TOTAL - CYCLE_ACTIVITY.STALLS_MEM_ANY else (EXE_ACTIVITY.1_PORTS_UTIL + tma_retiring * EXE_ACTIVITY.2_PORTS_UTIL) / tma_info_thread_clks)",
+        "MetricExpr": "((tma_ports_utilized_0 * tma_info_thread_clks + (EXE_ACTIVITY.1_PORTS_UTIL + tma_retiring * EXE_ACTIVITY.2_PORTS_UTIL)) / tma_info_thread_clks if ARITH.DIVIDER_ACTIVE < CYCLE_ACTIVITY.STALLS_TOTAL - CYCLE_ACTIVITY.STALLS_MEM_ANY else (EXE_ACTIVITY.1_PORTS_UTIL + tma_retiring * EXE_ACTIVITY.2_PORTS_UTIL) / tma_info_thread_clks)",
         "MetricGroup": "PortsUtil;TopdownL3;tma_L3_group;tma_core_bound_group",
         "MetricName": "tma_ports_utilization",
         "MetricThreshold": "tma_ports_utilization > 0.15 & (tma_core_bound > 0.1 & tma_backend_bound > 0.2)",
     },
     {
         "BriefDescription": "This metric represents fraction of cycles CPU executed no uops on any execution port (Logical Processor cycles since ICL, Physical Core cycles otherwise)",
-        "MetricExpr": "(UOPS_EXECUTED.CORE_CYCLES_NONE / 2 if #SMT_on else CYCLE_ACTIVITY.STALLS_TOTAL - CYCLE_ACTIVITY.STALLS_MEM_ANY) / tma_info_core_core_clks",
+        "MetricExpr": "(EXE_ACTIVITY.EXE_BOUND_0_PORTS + tma_core_bound * RS_EVENTS.EMPTY_CYCLES) / tma_info_thread_clks * (CYCLE_ACTIVITY.STALLS_TOTAL - CYCLE_ACTIVITY.STALLS_MEM_ANY) / tma_info_thread_clks",
         "MetricGroup": "PortsUtil;TopdownL4;tma_L4_group;tma_ports_utilization_group",
         "MetricName": "tma_ports_utilized_0",
         "MetricThreshold": "tma_ports_utilized_0 > 0.2 & (tma_ports_utilization > 0.15 & (tma_core_bound > 0.1 & tma_backend_bound > 0.2))",
         "MetricExpr": "(UOPS_EXECUTED.CORE_CYCLES_GE_3 / 2 if #SMT_on else UOPS_EXECUTED.CORE_CYCLES_GE_3) / tma_info_core_core_clks",
         "MetricGroup": "PortsUtil;TopdownL4;tma_L4_group;tma_ports_utilization_group",
         "MetricName": "tma_ports_utilized_3m",
-        "MetricThreshold": "tma_ports_utilized_3m > 0.7 & (tma_ports_utilization > 0.15 & (tma_core_bound > 0.1 & tma_backend_bound > 0.2))",
+        "MetricThreshold": "tma_ports_utilized_3m > 0.4 & (tma_ports_utilization > 0.15 & (tma_core_bound > 0.1 & tma_backend_bound > 0.2))",
         "ScaleUnit": "100%"
     },
     {
         "BriefDescription": "This metric estimates fraction of cycles while the memory subsystem was handling loads from remote cache in other sockets including synchronizations issues",
         "MetricConstraint": "NO_GROUP_EVENTS_NMI",
-        "MetricExpr": "(89.5 * tma_info_system_average_frequency * MEM_LOAD_L3_MISS_RETIRED.REMOTE_HITM + 89.5 * tma_info_system_average_frequency * MEM_LOAD_L3_MISS_RETIRED.REMOTE_FWD) * (1 + MEM_LOAD_RETIRED.FB_HIT / MEM_LOAD_RETIRED.L1_MISS / 2) / tma_info_thread_clks",
+        "MetricExpr": "(89.5 * tma_info_system_core_frequency * MEM_LOAD_L3_MISS_RETIRED.REMOTE_HITM + 89.5 * tma_info_system_core_frequency * MEM_LOAD_L3_MISS_RETIRED.REMOTE_FWD) * (1 + MEM_LOAD_RETIRED.FB_HIT / MEM_LOAD_RETIRED.L1_MISS / 2) / tma_info_thread_clks",
         "MetricGroup": "Offcore;Server;Snoop;TopdownL5;tma_L5_group;tma_issueSyncxn;tma_mem_latency_group",
         "MetricName": "tma_remote_cache",
         "MetricThreshold": "tma_remote_cache > 0.05 & (tma_mem_latency > 0.1 & (tma_dram_bound > 0.1 & (tma_memory_bound > 0.2 & tma_backend_bound > 0.2)))",
     },
     {
         "BriefDescription": "This metric estimates fraction of cycles while the memory subsystem was handling loads from remote memory",
-        "MetricExpr": "127 * tma_info_system_average_frequency * MEM_LOAD_L3_MISS_RETIRED.REMOTE_DRAM * (1 + MEM_LOAD_RETIRED.FB_HIT / MEM_LOAD_RETIRED.L1_MISS / 2) / tma_info_thread_clks",
+        "MetricExpr": "127 * tma_info_system_core_frequency * MEM_LOAD_L3_MISS_RETIRED.REMOTE_DRAM * (1 + MEM_LOAD_RETIRED.FB_HIT / MEM_LOAD_RETIRED.L1_MISS / 2) / tma_info_thread_clks",
         "MetricGroup": "Server;Snoop;TopdownL5;tma_L5_group;tma_mem_latency_group",
-        "MetricName": "tma_remote_dram",
-        "MetricThreshold": "tma_remote_dram > 0.1 & (tma_mem_latency > 0.1 & (tma_dram_bound > 0.1 & (tma_memory_bound > 0.2 & tma_backend_bound > 0.2)))",
+        "MetricName": "tma_remote_mem",
+        "MetricThreshold": "tma_remote_mem > 0.1 & (tma_mem_latency > 0.1 & (tma_dram_bound > 0.1 & (tma_memory_bound > 0.2 & tma_backend_bound > 0.2)))",
         "PublicDescription": "This metric estimates fraction of cycles while the memory subsystem was handling loads from remote memory. This is caused often due to non-optimal NUMA allocations. #link to NUMA article. Sample with: MEM_LOAD_L3_MISS_RETIRED.REMOTE_DRAM_PS",
         "ScaleUnit": "100%"
     },
     {
         "BriefDescription": "This metric represents fraction of cycles the CPU issue-pipeline was stalled due to serializing operations",
         "MetricExpr": "PARTIAL_RAT_STALLS.SCOREBOARD / tma_info_thread_clks",
-        "MetricGroup": "PortsUtil;TopdownL5;tma_L5_group;tma_issueSO;tma_ports_utilized_0_group",
+        "MetricGroup": "PortsUtil;TopdownL3;tma_L3_group;tma_core_bound_group;tma_issueSO",
         "MetricName": "tma_serializing_operation",
-        "MetricThreshold": "tma_serializing_operation > 0.1 & (tma_ports_utilized_0 > 0.2 & (tma_ports_utilization > 0.15 & (tma_core_bound > 0.1 & tma_backend_bound > 0.2)))",
+        "MetricThreshold": "tma_serializing_operation > 0.1 & (tma_core_bound > 0.1 & tma_backend_bound > 0.2)",
         "PublicDescription": "This metric represents fraction of cycles the CPU issue-pipeline was stalled due to serializing operations. Instructions like CPUID; WRMSR or LFENCE serialize the out-of-order execution which may limit performance. Sample with: PARTIAL_RAT_STALLS.SCOREBOARD. Related metrics: tma_ms_switches",
         "ScaleUnit": "100%"
     },
     {
         "BriefDescription": "This metric represents fraction of cycles the CPU was stalled due to PAUSE Instructions",
         "MetricExpr": "40 * ROB_MISC_EVENTS.PAUSE_INST / tma_info_thread_clks",
-        "MetricGroup": "TopdownL6;tma_L6_group;tma_serializing_operation_group",
+        "MetricGroup": "TopdownL4;tma_L4_group;tma_serializing_operation_group",
         "MetricName": "tma_slow_pause",
-        "MetricThreshold": "tma_slow_pause > 0.05 & (tma_serializing_operation > 0.1 & (tma_ports_utilized_0 > 0.2 & (tma_ports_utilization > 0.15 & (tma_core_bound > 0.1 & tma_backend_bound > 0.2))))",
+        "MetricThreshold": "tma_slow_pause > 0.05 & (tma_serializing_operation > 0.1 & (tma_core_bound > 0.1 & tma_backend_bound > 0.2))",
         "PublicDescription": "This metric represents fraction of cycles the CPU was stalled due to PAUSE Instructions. Sample with: MISC_RETIRED.PAUSE_INST",
         "ScaleUnit": "100%"
     },
         "MetricGroup": "MemoryBW;Offcore;TopdownL4;tma_L4_group;tma_issueBW;tma_l3_bound_group",
         "MetricName": "tma_sq_full",
         "MetricThreshold": "tma_sq_full > 0.3 & (tma_l3_bound > 0.05 & (tma_memory_bound > 0.2 & tma_backend_bound > 0.2))",
-        "PublicDescription": "This metric measures fraction of cycles where the Super Queue (SQ) was full taking into account all request-types and both hardware SMT threads (Logical Processors). Related metrics: tma_fb_full, tma_info_bottleneck_memory_bandwidth, tma_info_system_dram_bw_use, tma_mem_bandwidth",
+        "PublicDescription": "This metric measures fraction of cycles where the Super Queue (SQ) was full taking into account all request-types and both hardware SMT threads (Logical Processors). Related metrics: tma_fb_full, tma_info_bottleneck_cache_memory_bandwidth, tma_info_system_dram_bw_use, tma_mem_bandwidth",
         "ScaleUnit": "100%"
     },
     {
     {
         "BriefDescription": "This metric represents fraction of cycles the CPU was stalled due to new branch address clears",
         "MetricExpr": "9 * BACLEARS.ANY / tma_info_thread_clks",
-        "MetricGroup": "BigFoot;FetchLat;TopdownL4;tma_L4_group;tma_branch_resteers_group",
+        "MetricGroup": "BigFootprint;FetchLat;TopdownL4;tma_L4_group;tma_branch_resteers_group",
         "MetricName": "tma_unknown_branches",
         "MetricThreshold": "tma_unknown_branches > 0.05 & (tma_branch_resteers > 0.05 & (tma_fetch_latency > 0.1 & tma_frontend_bound > 0.15))",
-        "PublicDescription": "This metric represents fraction of cycles the CPU was stalled due to new branch address clears. These are fetched branches the Branch Prediction Unit was unable to recognize (e.g. first time the branch is fetched or hitting BTB capacity limit). Sample with: BACLEARS.ANY",
+        "PublicDescription": "This metric represents fraction of cycles the CPU was stalled due to new branch address clears. These are fetched branches the Branch Prediction Unit was unable to recognize (e.g. first time the branch is fetched or hitting BTB capacity limit) hence called Unknown Branches. Sample with: BACLEARS.ANY",
         "ScaleUnit": "100%"
     },
     {
index bc6a9a4d27a9562a0de97285ce80fc03160ad583..904d299c95a317e880342e4c1e73be696cee73e1 100644 (file)
@@ -2,10 +2,10 @@
     "Backend": "Grouping from Top-down Microarchitecture Analysis Metrics spreadsheet",
     "Bad": "Grouping from Top-down Microarchitecture Analysis Metrics spreadsheet",
     "BadSpec": "Grouping from Top-down Microarchitecture Analysis Metrics spreadsheet",
-    "BigFoot": "Grouping from Top-down Microarchitecture Analysis Metrics spreadsheet",
+    "BigFootprint": "Grouping from Top-down Microarchitecture Analysis Metrics spreadsheet",
     "BrMispredicts": "Grouping from Top-down Microarchitecture Analysis Metrics spreadsheet",
     "Branches": "Grouping from Top-down Microarchitecture Analysis Metrics spreadsheet",
-    "CacheMisses": "Grouping from Top-down Microarchitecture Analysis Metrics spreadsheet",
+    "CacheHits": "Grouping from Top-down Microarchitecture Analysis Metrics spreadsheet",
     "CodeGen": "Grouping from Top-down Microarchitecture Analysis Metrics spreadsheet",
     "Compute": "Grouping from Top-down Microarchitecture Analysis Metrics spreadsheet",
     "Cor": "Grouping from Top-down Microarchitecture Analysis Metrics spreadsheet",
@@ -26,7 +26,9 @@
     "L2Evicts": "Grouping from Top-down Microarchitecture Analysis Metrics spreadsheet",
     "LSD": "Grouping from Top-down Microarchitecture Analysis Metrics spreadsheet",
     "MachineClears": "Grouping from Top-down Microarchitecture Analysis Metrics spreadsheet",
+    "Machine_Clears": "Grouping from Top-down Microarchitecture Analysis Metrics spreadsheet",
     "Mem": "Grouping from Top-down Microarchitecture Analysis Metrics spreadsheet",
+    "MemOffcore": "Grouping from Top-down Microarchitecture Analysis Metrics spreadsheet",
     "MemoryBW": "Grouping from Top-down Microarchitecture Analysis Metrics spreadsheet",
     "MemoryBound": "Grouping from Top-down Microarchitecture Analysis Metrics spreadsheet",
     "MemoryLat": "Grouping from Top-down Microarchitecture Analysis Metrics spreadsheet",
     "tma_L5_group": "Metrics for top-down breakdown at level 5",
     "tma_L6_group": "Metrics for top-down breakdown at level 6",
     "tma_alu_op_utilization_group": "Metrics contributing to tma_alu_op_utilization category",
+    "tma_assists_group": "Metrics contributing to tma_assists category",
     "tma_backend_bound_group": "Metrics contributing to tma_backend_bound category",
     "tma_bad_speculation_group": "Metrics contributing to tma_bad_speculation category",
+    "tma_branch_mispredicts_group": "Metrics contributing to tma_branch_mispredicts category",
     "tma_branch_resteers_group": "Metrics contributing to tma_branch_resteers category",
     "tma_core_bound_group": "Metrics contributing to tma_core_bound category",
     "tma_dram_bound_group": "Metrics contributing to tma_dram_bound category",
@@ -78,9 +82,9 @@
     "tma_frontend_bound_group": "Metrics contributing to tma_frontend_bound category",
     "tma_heavy_operations_group": "Metrics contributing to tma_heavy_operations category",
     "tma_issue2P": "Metrics related by the issue $issue2P",
-    "tma_issueBC": "Metrics related by the issue $issueBC",
     "tma_issueBM": "Metrics related by the issue $issueBM",
     "tma_issueBW": "Metrics related by the issue $issueBW",
+    "tma_issueComp": "Metrics related by the issue $issueComp",
     "tma_issueD0": "Metrics related by the issue $issueD0",
     "tma_issueFB": "Metrics related by the issue $issueFB",
     "tma_issueFL": "Metrics related by the issue $issueFL",
     "tma_l3_bound_group": "Metrics contributing to tma_l3_bound category",
     "tma_light_operations_group": "Metrics contributing to tma_light_operations category",
     "tma_load_op_utilization_group": "Metrics contributing to tma_load_op_utilization category",
+    "tma_machine_clears_group": "Metrics contributing to tma_machine_clears category",
     "tma_mem_latency_group": "Metrics contributing to tma_mem_latency category",
     "tma_memory_bound_group": "Metrics contributing to tma_memory_bound category",
     "tma_microcode_sequencer_group": "Metrics contributing to tma_microcode_sequencer category",
     "tma_mite_group": "Metrics contributing to tma_mite category",
+    "tma_other_light_ops_group": "Metrics contributing to tma_other_light_ops category",
     "tma_ports_utilization_group": "Metrics contributing to tma_ports_utilization category",
     "tma_ports_utilized_0_group": "Metrics contributing to tma_ports_utilized_0 category",
     "tma_ports_utilized_3m_group": "Metrics contributing to tma_ports_utilized_3m category",
index c6254af7a468e03d704c122105d92dbe39582fb8..ceef46046488637a58de3d4a8dc758307929e147 100644 (file)
         "EventName": "UNC_P_POWER_STATE_OCCUPANCY.CORES_C0",
         "PerPkg": "1",
         "PublicDescription": "This is an occupancy event that tracks the number of cores that are in the chosen C-State.  It can be used by itself to get the average number of cores in that C-state with thresholding to generate histograms, or with other PCU events and occupancy triggering to capture other details.",
+        "UMask": "0x40",
         "Unit": "PCU"
     },
     {
         "EventName": "UNC_P_POWER_STATE_OCCUPANCY.CORES_C3",
         "PerPkg": "1",
         "PublicDescription": "This is an occupancy event that tracks the number of cores that are in the chosen C-State.  It can be used by itself to get the average number of cores in that C-state with thresholding to generate histograms, or with other PCU events and occupancy triggering to capture other details.",
+        "UMask": "0x80",
         "Unit": "PCU"
     },
     {
         "EventName": "UNC_P_POWER_STATE_OCCUPANCY.CORES_C6",
         "PerPkg": "1",
         "PublicDescription": "This is an occupancy event that tracks the number of cores that are in the chosen C-State.  It can be used by itself to get the average number of cores in that C-state with thresholding to generate histograms, or with other PCU events and occupancy triggering to capture other details.",
+        "UMask": "0xc0",
         "Unit": "PCU"
     },
     {
index bf5a511b99d11cad9ac0667319540296ab466c57..86a8f3b7fe1d6ae08f89e99903cb67bff35b3dc9 100644 (file)
         "UMask": "0x10c8968201",
         "Unit": "CHA"
     },
+    {
+        "BriefDescription": "UNC_CHA_TOR_INSERTS.IA_MISS_DRD_PREF_CXL_EXP_LOCAL",
+        "EventCode": "0x35",
+        "EventName": "UNC_CHA_TOR_INSERTS.IA_MISS_DRD_PREF_CXL_EXP_LOCAL",
+        "PerPkg": "1",
+        "PortMask": "0x000",
+        "UMask": "0x20c8968201",
+        "Unit": "CHA"
+    },
     {
         "BriefDescription": "TOR Inserts : DRd_Prefs issued by iA Cores targeting DDR Mem that Missed the LLC",
         "EventCode": "0x35",
         "UMask": "0x10ccd68201",
         "Unit": "CHA"
     },
+    {
+        "BriefDescription": "UNC_CHA_TOR_INSERTS.IA_MISS_LLCPREFDATA_CXL_EXP_LOCAL",
+        "EventCode": "0x35",
+        "EventName": "UNC_CHA_TOR_INSERTS.IA_MISS_LLCPREFDATA_CXL_EXP_LOCAL",
+        "PerPkg": "1",
+        "PortMask": "0x000",
+        "UMask": "0x20ccd68201",
+        "Unit": "CHA"
+    },
     {
         "BriefDescription": "TOR Inserts; LLCPrefRFO misses from local IA",
         "EventCode": "0x35",
         "UMask": "0x10c8868201",
         "Unit": "CHA"
     },
+    {
+        "BriefDescription": "UNC_CHA_TOR_INSERTS.IA_MISS_LLCPREFRFO_CXL_EXP_LOCAL",
+        "EventCode": "0x35",
+        "EventName": "UNC_CHA_TOR_INSERTS.IA_MISS_LLCPREFRFO_CXL_EXP_LOCAL",
+        "PerPkg": "1",
+        "PortMask": "0x000",
+        "UMask": "0x20c8868201",
+        "Unit": "CHA"
+    },
     {
         "BriefDescription": "TOR Inserts : WCiLFs issued by iA Cores targeting DDR that missed the LLC - HOMed locally",
         "EventCode": "0x35",
         "UMask": "0x10ccc68201",
         "Unit": "CHA"
     },
+    {
+        "BriefDescription": "UNC_CHA_TOR_INSERTS.IA_MISS_RFO_PREF_CXL_EXP_LOCAL",
+        "EventCode": "0x35",
+        "EventName": "UNC_CHA_TOR_INSERTS.IA_MISS_RFO_PREF_CXL_EXP_LOCAL",
+        "PerPkg": "1",
+        "PortMask": "0x000",
+        "UMask": "0x20ccc68201",
+        "Unit": "CHA"
+    },
     {
         "BriefDescription": "TOR Inserts; RFO prefetch misses from local IA",
         "EventCode": "0x35",
         "UMask": "0x40",
         "Unit": "CHA"
     },
+    {
+        "BriefDescription": "TOR Inserts for INVXTOM opcodes received from a remote socket which miss the L3 and target memory in a CXL type 3 memory expander local to this socket.",
+        "EventCode": "0x35",
+        "EventName": "UNC_CHA_TOR_INSERTS.RRQ_MISS_INVXTOM_CXL_EXP_LOCAL",
+        "PerPkg": "1",
+        "UMask": "0x20e87e8240",
+        "Unit": "CHA"
+    },
+    {
+        "BriefDescription": "TOR Inserts for RDCODE opcodes received from a remote socket which miss the L3 and target memory in a CXL type 3 memory expander local to this socket.",
+        "EventCode": "0x35",
+        "EventName": "UNC_CHA_TOR_INSERTS.RRQ_MISS_RDCODE_CXL_EXP_LOCAL",
+        "PerPkg": "1",
+        "UMask": "0x20e80e8240",
+        "Unit": "CHA"
+    },
+    {
+        "BriefDescription": "TOR Inserts for RDCUR opcodes received from a remote socket which miss the L3 and target memory in a CXL type 3 memory expander local to this socket.",
+        "EventCode": "0x35",
+        "EventName": "UNC_CHA_TOR_INSERTS.RRQ_MISS_RDCUR_CXL_EXP_LOCAL",
+        "PerPkg": "1",
+        "UMask": "0x20e8068240",
+        "Unit": "CHA"
+    },
+    {
+        "BriefDescription": "TOR Inserts for RDDATA opcodes received from a remote socket which miss the L3 and target memory in a CXL type 3 memory expander local to this socket.",
+        "EventCode": "0x35",
+        "EventName": "UNC_CHA_TOR_INSERTS.RRQ_MISS_RDDATA_CXL_EXP_LOCAL",
+        "PerPkg": "1",
+        "UMask": "0x20e8168240",
+        "Unit": "CHA"
+    },
+    {
+        "BriefDescription": "TOR Inserts for RDINVOWN_OPT opcodes received from a remote socket which miss the L3 and target memory in a CXL type 3 memory expander local to this socket.",
+        "EventCode": "0x35",
+        "EventName": "UNC_CHA_TOR_INSERTS.RRQ_MISS_RDINVOWN_OPT_CXL_EXP_LOCAL",
+        "PerPkg": "1",
+        "UMask": "0x20e8268240",
+        "Unit": "CHA"
+    },
     {
         "BriefDescription": "TOR Inserts; All Snoops from Remote",
         "EventCode": "0x35",
         "UMask": "0x10c8968201",
         "Unit": "CHA"
     },
+    {
+        "BriefDescription": "UNC_CHA_TOR_OCCUPANCY.IA_MISS_DRD_PREF_CXL_EXP_LOCAL",
+        "EventCode": "0x36",
+        "EventName": "UNC_CHA_TOR_OCCUPANCY.IA_MISS_DRD_PREF_CXL_EXP_LOCAL",
+        "PerPkg": "1",
+        "PortMask": "0x000",
+        "UMask": "0x20c8968201",
+        "Unit": "CHA"
+    },
     {
         "BriefDescription": "TOR Occupancy : DRd_Prefs issued by iA Cores targeting DDR Mem that Missed the LLC",
         "EventCode": "0x36",
         "UMask": "0x10ccd68201",
         "Unit": "CHA"
     },
+    {
+        "BriefDescription": "UNC_CHA_TOR_OCCUPANCY.IA_MISS_LLCPREFDATA_CXL_EXP_LOCAL",
+        "EventCode": "0x36",
+        "EventName": "UNC_CHA_TOR_OCCUPANCY.IA_MISS_LLCPREFDATA_CXL_EXP_LOCAL",
+        "PerPkg": "1",
+        "PortMask": "0x000",
+        "UMask": "0x20ccd68201",
+        "Unit": "CHA"
+    },
     {
         "BriefDescription": "TOR Occupancy; LLCPrefRFO misses from local IA",
         "EventCode": "0x36",
         "UMask": "0x10c8868201",
         "Unit": "CHA"
     },
+    {
+        "BriefDescription": "UNC_CHA_TOR_OCCUPANCY.IA_MISS_LLCPREFRFO_CXL_EXP_LOCAL",
+        "EventCode": "0x36",
+        "EventName": "UNC_CHA_TOR_OCCUPANCY.IA_MISS_LLCPREFRFO_CXL_EXP_LOCAL",
+        "PerPkg": "1",
+        "PortMask": "0x000",
+        "UMask": "0x20c8868201",
+        "Unit": "CHA"
+    },
     {
         "BriefDescription": "TOR Occupancy : WCiLFs issued by iA Cores targeting DDR that missed the LLC - HOMed locally",
         "EventCode": "0x36",
         "UMask": "0x10ccc68201",
         "Unit": "CHA"
     },
+    {
+        "BriefDescription": "UNC_CHA_TOR_OCCUPANCY.IA_MISS_RFO_PREF_CXL_EXP_LOCAL",
+        "EventCode": "0x36",
+        "EventName": "UNC_CHA_TOR_OCCUPANCY.IA_MISS_RFO_PREF_CXL_EXP_LOCAL",
+        "PerPkg": "1",
+        "PortMask": "0x000",
+        "UMask": "0x20ccc68201",
+        "Unit": "CHA"
+    },
     {
         "BriefDescription": "TOR Occupancy; RFO prefetch misses from local IA",
         "EventCode": "0x36",
         "UMask": "0x40",
         "Unit": "CHA"
     },
+    {
+        "BriefDescription": "TOR Occupancy for INVXTOM opcodes received from a remote socket which miss the L3 and target memory in a CXL type 3 memory expander local to this socket.",
+        "EventCode": "0x36",
+        "EventName": "UNC_CHA_TOR_OCCUPANCY.RRQ_MISS_INVXTOM_CXL_EXP_LOCAL",
+        "PerPkg": "1",
+        "UMask": "0x20e87e8240",
+        "Unit": "CHA"
+    },
+    {
+        "BriefDescription": "TOR Occupancy for RDCODE opcodes received from a remote socket which miss the L3 and target memory in a CXL type 3 memory expander local to this socket.",
+        "EventCode": "0x36",
+        "EventName": "UNC_CHA_TOR_OCCUPANCY.RRQ_MISS_RDCODE_CXL_EXP_LOCAL",
+        "PerPkg": "1",
+        "UMask": "0x20e80e8240",
+        "Unit": "CHA"
+    },
+    {
+        "BriefDescription": "TOR Occupancy for RDCUR opcodes received from a remote socket which miss the L3 and target memory in a CXL type 3 memory expander local to this socket.",
+        "EventCode": "0x36",
+        "EventName": "UNC_CHA_TOR_OCCUPANCY.RRQ_MISS_RDCUR_CXL_EXP_LOCAL",
+        "PerPkg": "1",
+        "UMask": "0x20e8068240",
+        "Unit": "CHA"
+    },
+    {
+        "BriefDescription": "TOR Occupancy for RDDATA opcodes received from a remote socket which miss the L3 and target memory in a CXL type 3 memory expander local to this socket.",
+        "EventCode": "0x36",
+        "EventName": "UNC_CHA_TOR_OCCUPANCY.RRQ_MISS_RDDATA_CXL_EXP_LOCAL",
+        "PerPkg": "1",
+        "UMask": "0x20e8168240",
+        "Unit": "CHA"
+    },
+    {
+        "BriefDescription": "TOR Occupancy for RDINVOWN_OPT opcodes received from a remote socket which miss the L3 and target memory in a CXL type 3 memory expander local to this socket.",
+        "EventCode": "0x36",
+        "EventName": "UNC_CHA_TOR_OCCUPANCY.RRQ_MISS_RDINVOWN_OPT_CXL_EXP_LOCAL",
+        "PerPkg": "1",
+        "UMask": "0x20e8268240",
+        "Unit": "CHA"
+    },
     {
         "BriefDescription": "TOR Occupancy; All Snoops from Remote",
         "EventCode": "0x36",
index 7f0dc65a55d2ff9b32dcc2ff1adb9aa7286163ed..f937ba0e50e1a980d38523e82199af4ae4fb835e 100644 (file)
         "SampleAfterValue": "200003",
         "UMask": "0x4f"
     },
+    {
+        "BriefDescription": "Counts the number of unhalted cycles when the core is stalled due to an instruction cache or TLB miss.",
+        "EventCode": "0x35",
+        "EventName": "MEM_BOUND_STALLS_IFETCH.ALL",
+        "SampleAfterValue": "1000003",
+        "UMask": "0x7f"
+    },
+    {
+        "BriefDescription": "Counts the number of cycles the core is stalled due to an instruction cache or TLB miss which hit in the L2 cache.",
+        "EventCode": "0x35",
+        "EventName": "MEM_BOUND_STALLS_IFETCH.L2_HIT",
+        "PublicDescription": "Counts the number of cycles the core is stalled due to an instruction cache or Translation Lookaside Buffer (TLB) miss which hit in the L2 cache.",
+        "SampleAfterValue": "1000003",
+        "UMask": "0x1"
+    },
+    {
+        "BriefDescription": "Counts the number of unhalted cycles when the core is stalled due to an icache or itlb miss which hit in the LLC.",
+        "EventCode": "0x35",
+        "EventName": "MEM_BOUND_STALLS_IFETCH.LLC_HIT",
+        "SampleAfterValue": "1000003",
+        "UMask": "0x6"
+    },
+    {
+        "BriefDescription": "Counts the number of unhalted cycles when the core is stalled due to an icache or itlb miss which missed all the caches.",
+        "EventCode": "0x35",
+        "EventName": "MEM_BOUND_STALLS_IFETCH.LLC_MISS",
+        "SampleAfterValue": "1000003",
+        "UMask": "0x78"
+    },
+    {
+        "BriefDescription": "Counts the number of unhalted cycles when the core is stalled due to an L1 demand load miss.",
+        "EventCode": "0x34",
+        "EventName": "MEM_BOUND_STALLS_LOAD.ALL",
+        "SampleAfterValue": "1000003",
+        "UMask": "0x7f"
+    },
+    {
+        "BriefDescription": "Counts the number of cycles the core is stalled due to a demand load which hit in the L2 cache.",
+        "EventCode": "0x34",
+        "EventName": "MEM_BOUND_STALLS_LOAD.L2_HIT",
+        "PublicDescription": "Counts the number of cycles a core is stalled due to a demand load which hit in the L2 cache.",
+        "SampleAfterValue": "1000003",
+        "UMask": "0x1"
+    },
+    {
+        "BriefDescription": "Counts the number of unhalted cycles when the core is stalled due to a demand load miss which hit in the LLC.",
+        "EventCode": "0x34",
+        "EventName": "MEM_BOUND_STALLS_LOAD.LLC_HIT",
+        "SampleAfterValue": "1000003",
+        "UMask": "0x6"
+    },
+    {
+        "BriefDescription": "Counts the number of unhalted cycles when the core is stalled due to a demand load miss which missed all the local caches.",
+        "EventCode": "0x34",
+        "EventName": "MEM_BOUND_STALLS_LOAD.LLC_MISS",
+        "SampleAfterValue": "1000003",
+        "UMask": "0x78"
+    },
+    {
+        "BriefDescription": "Counts the number of load ops retired that miss the L3 cache and hit in DRAM",
+        "EventCode": "0xd3",
+        "EventName": "MEM_LOAD_UOPS_L3_MISS_RETIRED.LOCAL_DRAM",
+        "PEBS": "1",
+        "SampleAfterValue": "1000003",
+        "UMask": "0x1"
+    },
+    {
+        "BriefDescription": "Counts the number of load ops retired that hit the L1 data cache.",
+        "EventCode": "0xd1",
+        "EventName": "MEM_LOAD_UOPS_RETIRED.L1_HIT",
+        "PEBS": "1",
+        "SampleAfterValue": "200003",
+        "UMask": "0x1"
+    },
+    {
+        "BriefDescription": "Counts the number of load ops retired that miss in the L1 data cache.",
+        "EventCode": "0xd1",
+        "EventName": "MEM_LOAD_UOPS_RETIRED.L1_MISS",
+        "PEBS": "1",
+        "SampleAfterValue": "200003",
+        "UMask": "0x40"
+    },
+    {
+        "BriefDescription": "Counts the number of load ops retired that hit in the L2 cache.",
+        "EventCode": "0xd1",
+        "EventName": "MEM_LOAD_UOPS_RETIRED.L2_HIT",
+        "PEBS": "1",
+        "SampleAfterValue": "200003",
+        "UMask": "0x2"
+    },
+    {
+        "BriefDescription": "Counts the number of load ops retired that miss in the L2 cache.",
+        "EventCode": "0xd1",
+        "EventName": "MEM_LOAD_UOPS_RETIRED.L2_MISS",
+        "PEBS": "1",
+        "SampleAfterValue": "200003",
+        "UMask": "0x80"
+    },
+    {
+        "BriefDescription": "Counts the number of load ops retired that hit in the L3 cache.",
+        "EventCode": "0xd1",
+        "EventName": "MEM_LOAD_UOPS_RETIRED.L3_HIT",
+        "PEBS": "1",
+        "SampleAfterValue": "200003",
+        "UMask": "0x1c"
+    },
+    {
+        "BriefDescription": "Counts the number of loads that hit in a write combining buffer (WCB), excluding the first load that caused the WCB to allocate.",
+        "EventCode": "0xd1",
+        "EventName": "MEM_LOAD_UOPS_RETIRED.WCB_HIT",
+        "PEBS": "1",
+        "SampleAfterValue": "200003",
+        "UMask": "0x20"
+    },
+    {
+        "BriefDescription": "Counts the number of cycles that uops are blocked for any of the following reasons:  load buffer, store buffer or RSV full.",
+        "EventCode": "0x04",
+        "EventName": "MEM_SCHEDULER_BLOCK.ALL",
+        "SampleAfterValue": "20003",
+        "UMask": "0x7"
+    },
+    {
+        "BriefDescription": "Counts the number of cycles that uops are blocked due to a load buffer full condition.",
+        "EventCode": "0x04",
+        "EventName": "MEM_SCHEDULER_BLOCK.LD_BUF",
+        "SampleAfterValue": "20003",
+        "UMask": "0x2"
+    },
+    {
+        "BriefDescription": "Counts the number of cycles that uops are blocked due to an RSV full condition.",
+        "EventCode": "0x04",
+        "EventName": "MEM_SCHEDULER_BLOCK.RSV",
+        "SampleAfterValue": "20003",
+        "UMask": "0x4"
+    },
+    {
+        "BriefDescription": "Counts the number of cycles that uops are blocked due to a store buffer full condition.",
+        "EventCode": "0x04",
+        "EventName": "MEM_SCHEDULER_BLOCK.ST_BUF",
+        "SampleAfterValue": "20003",
+        "UMask": "0x1"
+    },
     {
         "BriefDescription": "Counts the number of load ops retired.",
         "Data_LA": "1",
         "SampleAfterValue": "1000003",
         "UMask": "0x5"
     },
+    {
+        "BriefDescription": "Counts the number of load uops retired that performed one or more locks",
+        "Data_LA": "1",
+        "EventCode": "0xd0",
+        "EventName": "MEM_UOPS_RETIRED.LOCK_LOADS",
+        "PEBS": "1",
+        "SampleAfterValue": "200003",
+        "UMask": "0x21"
+    },
+    {
+        "BriefDescription": "Counts the number of memory uops retired that were splits.",
+        "Data_LA": "1",
+        "EventCode": "0xd0",
+        "EventName": "MEM_UOPS_RETIRED.SPLIT",
+        "PEBS": "1",
+        "SampleAfterValue": "200003",
+        "UMask": "0x43"
+    },
+    {
+        "BriefDescription": "Counts the number of retired split load uops.",
+        "Data_LA": "1",
+        "EventCode": "0xd0",
+        "EventName": "MEM_UOPS_RETIRED.SPLIT_LOADS",
+        "PEBS": "1",
+        "SampleAfterValue": "200003",
+        "UMask": "0x41"
+    },
+    {
+        "BriefDescription": "Counts the number of retired split store uops.",
+        "Data_LA": "1",
+        "EventCode": "0xd0",
+        "EventName": "MEM_UOPS_RETIRED.SPLIT_STORES",
+        "PEBS": "1",
+        "SampleAfterValue": "200003",
+        "UMask": "0x42"
+    },
     {
         "BriefDescription": "Counts the number of  stores uops retired same as MEM_UOPS_RETIRED.ALL_STORES",
         "Data_LA": "1",
         "PEBS": "2",
         "SampleAfterValue": "1000003",
         "UMask": "0x6"
+    },
+    {
+        "BriefDescription": "Counts the number of issue slots every cycle that were not delivered by the frontend due to an icache miss",
+        "EventCode": "0x71",
+        "EventName": "TOPDOWN_FE_BOUND.ICACHE",
+        "SampleAfterValue": "1000003",
+        "UMask": "0x20"
     }
 ]
diff --git a/tools/perf/pmu-events/arch/x86/grandridge/floating-point.json b/tools/perf/pmu-events/arch/x86/grandridge/floating-point.json
new file mode 100644 (file)
index 0000000..00c9a8a
--- /dev/null
@@ -0,0 +1,68 @@
+[
+    {
+        "BriefDescription": "Counts the number of cycles when any of the floating point dividers are active.",
+        "CounterMask": "1",
+        "EventCode": "0xcd",
+        "EventName": "ARITH.FPDIV_ACTIVE",
+        "SampleAfterValue": "1000003",
+        "UMask": "0x2"
+    },
+    {
+        "BriefDescription": "Counts the number of all types of floating point operations per uop with all default weighting",
+        "EventCode": "0xc8",
+        "EventName": "FP_FLOPS_RETIRED.ALL",
+        "PEBS": "1",
+        "SampleAfterValue": "1000003",
+        "UMask": "0x3"
+    },
+    {
+        "BriefDescription": "This event is deprecated. [This event is alias to FP_FLOPS_RETIRED.FP64]",
+        "Deprecated": "1",
+        "EventCode": "0xc8",
+        "EventName": "FP_FLOPS_RETIRED.DP",
+        "PEBS": "1",
+        "SampleAfterValue": "1000003",
+        "UMask": "0x1"
+    },
+    {
+        "BriefDescription": "Counts the number of floating point operations that produce 32 bit single precision results [This event is alias to FP_FLOPS_RETIRED.SP]",
+        "EventCode": "0xc8",
+        "EventName": "FP_FLOPS_RETIRED.FP32",
+        "PEBS": "1",
+        "SampleAfterValue": "1000003",
+        "UMask": "0x2"
+    },
+    {
+        "BriefDescription": "Counts the number of floating point operations that produce 64 bit double precision results [This event is alias to FP_FLOPS_RETIRED.DP]",
+        "EventCode": "0xc8",
+        "EventName": "FP_FLOPS_RETIRED.FP64",
+        "PEBS": "1",
+        "SampleAfterValue": "1000003",
+        "UMask": "0x1"
+    },
+    {
+        "BriefDescription": "This event is deprecated. [This event is alias to FP_FLOPS_RETIRED.FP32]",
+        "Deprecated": "1",
+        "EventCode": "0xc8",
+        "EventName": "FP_FLOPS_RETIRED.SP",
+        "PEBS": "1",
+        "SampleAfterValue": "1000003",
+        "UMask": "0x2"
+    },
+    {
+        "BriefDescription": "Counts the number of floating point operations retired that required microcode assist.",
+        "EventCode": "0xc3",
+        "EventName": "MACHINE_CLEARS.FP_ASSIST",
+        "PublicDescription": "Counts the number of floating point operations retired that required microcode assist, which is not a reflection of the number of FP operations, instructions or uops.",
+        "SampleAfterValue": "20003",
+        "UMask": "0x4"
+    },
+    {
+        "BriefDescription": "Counts the number of floating point divide uops retired (x87 and sse, including x87 sqrt).",
+        "EventCode": "0xc2",
+        "EventName": "UOPS_RETIRED.FPDIV",
+        "PEBS": "1",
+        "SampleAfterValue": "2000003",
+        "UMask": "0x8"
+    }
+]
index be8f1c7e195c0eefbfb7784d93dd0d14f624d4bb..356d36aecc81096523d8963869f1f6dd47ce7f92 100644 (file)
@@ -1,4 +1,20 @@
 [
+    {
+        "BriefDescription": "Counts the total number of BACLEARS due to all branch types including conditional and unconditional jumps, returns, and indirect branches.",
+        "EventCode": "0xe6",
+        "EventName": "BACLEARS.ANY",
+        "PublicDescription": "Counts the total number of BACLEARS, which occur when the Branch Target Buffer (BTB) prediction or lack thereof, was corrected by a later branch predictor in the frontend.  Includes BACLEARS due to all branch types including conditional and unconditional jumps, returns, and indirect branches.",
+        "SampleAfterValue": "200003",
+        "UMask": "0x1"
+    },
+    {
+        "BriefDescription": "Counts the number of instructions retired that were tagged because empty issue slots were seen before the uop due to ITLB miss",
+        "EventCode": "0xc6",
+        "EventName": "FRONTEND_RETIRED.ITLB_MISS",
+        "PEBS": "1",
+        "SampleAfterValue": "1000003",
+        "UMask": "0x10"
+    },
     {
         "BriefDescription": "Counts every time the code stream enters into a new cache line by walking sequential from the previous line or being redirected by a jump.",
         "EventCode": "0x80",
index 79d8af45100c98e54ab8a74eb08b9bb121b4cf04..e0ce2decc805f20ef4ba1bb64365e903bad77d15 100644 (file)
@@ -1,4 +1,70 @@
 [
+    {
+        "BriefDescription": "Counts the number of cycles that the head (oldest load) of the load buffer is stalled due to any number of reasons, including an L1 miss, WCB full, pagewalk, store address block or store data block, on a load that retires.",
+        "EventCode": "0x05",
+        "EventName": "LD_HEAD.ANY_AT_RET",
+        "SampleAfterValue": "1000003",
+        "UMask": "0xff"
+    },
+    {
+        "BriefDescription": "Counts the number of cycles that the head (oldest load) of the load buffer is stalled due to a core bound stall including a store address match, a DTLB miss or a page walk that detains the load from retiring.",
+        "EventCode": "0x05",
+        "EventName": "LD_HEAD.L1_BOUND_AT_RET",
+        "SampleAfterValue": "1000003",
+        "UMask": "0xf4"
+    },
+    {
+        "BriefDescription": "Counts the number of cycles that the head (oldest load) of the load buffer and retirement are both stalled due to a DL1 miss.",
+        "EventCode": "0x05",
+        "EventName": "LD_HEAD.L1_MISS_AT_RET",
+        "SampleAfterValue": "1000003",
+        "UMask": "0x81"
+    },
+    {
+        "BriefDescription": "Counts the number of cycles that the head (oldest load) of the load buffer and retirement are both stalled due to other block cases.",
+        "EventCode": "0x05",
+        "EventName": "LD_HEAD.OTHER_AT_RET",
+        "PublicDescription": "Counts the number of cycles that the head (oldest load) of the load buffer and retirement are both stalled due to other block cases such as pipeline conflicts, fences, etc.",
+        "SampleAfterValue": "1000003",
+        "UMask": "0xc0"
+    },
+    {
+        "BriefDescription": "Counts the number of cycles that the head (oldest load) of the load buffer and retirement are both stalled due to a pagewalk.",
+        "EventCode": "0x05",
+        "EventName": "LD_HEAD.PGWALK_AT_RET",
+        "SampleAfterValue": "1000003",
+        "UMask": "0xa0"
+    },
+    {
+        "BriefDescription": "Counts the number of cycles that the head (oldest load) of the load buffer and retirement are both stalled due to a store address match.",
+        "EventCode": "0x05",
+        "EventName": "LD_HEAD.ST_ADDR_AT_RET",
+        "SampleAfterValue": "1000003",
+        "UMask": "0x84"
+    },
+    {
+        "BriefDescription": "Counts the number of machine clears due to memory ordering caused by a snoop from an external agent. Does not count internally generated machine clears such as those due to memory disambiguation.",
+        "EventCode": "0xc3",
+        "EventName": "MACHINE_CLEARS.MEMORY_ORDERING",
+        "SampleAfterValue": "20003",
+        "UMask": "0x2"
+    },
+    {
+        "BriefDescription": "Counts misaligned loads that are 4K page splits.",
+        "EventCode": "0x13",
+        "EventName": "MISALIGN_MEM_REF.LOAD_PAGE_SPLIT",
+        "PEBS": "1",
+        "SampleAfterValue": "200003",
+        "UMask": "0x2"
+    },
+    {
+        "BriefDescription": "Counts misaligned stores that are 4K page splits.",
+        "EventCode": "0x13",
+        "EventName": "MISALIGN_MEM_REF.STORE_PAGE_SPLIT",
+        "PEBS": "1",
+        "SampleAfterValue": "200003",
+        "UMask": "0x4"
+    },
     {
         "BriefDescription": "Counts demand data reads that were not supplied by the L3 cache.",
         "EventCode": "0xB7",
index 2414f6ff53b053c4feb666662df0b721683919c4..70a9da7e97dfc81b0d006ea071146c9522ae3928 100644 (file)
@@ -1,4 +1,13 @@
 [
+    {
+        "BriefDescription": "This event is deprecated. [This event is alias to MISC_RETIRED.LBR_INSERTS]",
+        "Deprecated": "1",
+        "EventCode": "0xe4",
+        "EventName": "LBR_INSERTS.ANY",
+        "PEBS": "1",
+        "SampleAfterValue": "1000003",
+        "UMask": "0x1"
+    },
     {
         "BriefDescription": "Counts demand data reads that have any type of response.",
         "EventCode": "0xB7",
         "MSRValue": "0x10002",
         "SampleAfterValue": "100003",
         "UMask": "0x1"
+    },
+    {
+        "BriefDescription": "Counts the number of issue slots in a UMWAIT or TPAUSE instruction where no uop issues due to the instruction putting the CPU into the C0.1 activity state.",
+        "EventCode": "0x75",
+        "EventName": "SERIALIZATION.C01_MS_SCB",
+        "SampleAfterValue": "200003",
+        "UMask": "0x4"
     }
 ]
index 41212957ef2180a2a8d3b55f3e668db960d94819..daa0639bb1cabea27c551fc50de9c286f14e83f8 100644 (file)
@@ -1,4 +1,12 @@
 [
+    {
+        "BriefDescription": "Counts the number of cycles when any of the dividers are active.",
+        "CounterMask": "1",
+        "EventCode": "0xcd",
+        "EventName": "ARITH.DIV_ACTIVE",
+        "SampleAfterValue": "1000003",
+        "UMask": "0x3"
+    },
     {
         "BriefDescription": "Counts the total number of branch instructions retired for all branch types.",
         "EventCode": "0xc4",
@@ -7,6 +15,71 @@
         "PublicDescription": "Counts the total number of instructions in which the instruction pointer (IP) of the processor is resteered due to a branch instruction and the branch instruction successfully retires.  All branch type instructions are accounted for.",
         "SampleAfterValue": "200003"
     },
+    {
+        "BriefDescription": "Counts the number of retired JCC (Jump on Conditional Code) branch instructions retired, includes both taken and not taken branches.",
+        "EventCode": "0xc4",
+        "EventName": "BR_INST_RETIRED.COND",
+        "PEBS": "1",
+        "SampleAfterValue": "200003",
+        "UMask": "0x7e"
+    },
+    {
+        "BriefDescription": "Counts the number of taken JCC (Jump on Conditional Code) branch instructions retired.",
+        "EventCode": "0xc4",
+        "EventName": "BR_INST_RETIRED.COND_TAKEN",
+        "PEBS": "1",
+        "SampleAfterValue": "200003",
+        "UMask": "0xfe"
+    },
+    {
+        "BriefDescription": "Counts the number of far branch instructions retired, includes far jump, far call and return, and interrupt call and return.",
+        "EventCode": "0xc4",
+        "EventName": "BR_INST_RETIRED.FAR_BRANCH",
+        "PEBS": "1",
+        "SampleAfterValue": "200003",
+        "UMask": "0xbf"
+    },
+    {
+        "BriefDescription": "Counts the number of near indirect JMP and near indirect CALL branch instructions retired.",
+        "EventCode": "0xc4",
+        "EventName": "BR_INST_RETIRED.INDIRECT",
+        "PEBS": "1",
+        "SampleAfterValue": "200003",
+        "UMask": "0xeb"
+    },
+    {
+        "BriefDescription": "Counts the number of near indirect CALL branch instructions retired.",
+        "EventCode": "0xc4",
+        "EventName": "BR_INST_RETIRED.INDIRECT_CALL",
+        "PEBS": "1",
+        "SampleAfterValue": "200003",
+        "UMask": "0xfb"
+    },
+    {
+        "BriefDescription": "This event is deprecated. Refer to new event BR_INST_RETIRED.INDIRECT_CALL",
+        "Deprecated": "1",
+        "EventCode": "0xc4",
+        "EventName": "BR_INST_RETIRED.IND_CALL",
+        "PEBS": "1",
+        "SampleAfterValue": "200003",
+        "UMask": "0xfb"
+    },
+    {
+        "BriefDescription": "Counts the number of near CALL branch instructions retired.",
+        "EventCode": "0xc4",
+        "EventName": "BR_INST_RETIRED.NEAR_CALL",
+        "PEBS": "1",
+        "SampleAfterValue": "200003",
+        "UMask": "0xf9"
+    },
+    {
+        "BriefDescription": "Counts the number of near RET branch instructions retired.",
+        "EventCode": "0xc4",
+        "EventName": "BR_INST_RETIRED.NEAR_RETURN",
+        "PEBS": "1",
+        "SampleAfterValue": "200003",
+        "UMask": "0xf7"
+    },
     {
         "BriefDescription": "Counts the total number of mispredicted branch instructions retired for all branch types.",
         "EventCode": "0xc5",
         "PublicDescription": "Counts the total number of mispredicted branch instructions retired.  All branch type instructions are accounted for.  Prediction of the branch target address enables the processor to begin executing instructions before the non-speculative execution path is known. The branch prediction unit (BPU) predicts the target address based on the instruction pointer (IP) of the branch and on the execution path through which execution reached this IP.    A branch misprediction occurs when the prediction is wrong, and results in discarding all instructions executed in the speculative path and re-fetching from the correct path.",
         "SampleAfterValue": "200003"
     },
+    {
+        "BriefDescription": "Counts the number of mispredicted JCC (Jump on Conditional Code) branch instructions retired.",
+        "EventCode": "0xc5",
+        "EventName": "BR_MISP_RETIRED.COND",
+        "PEBS": "1",
+        "SampleAfterValue": "200003",
+        "UMask": "0x7e"
+    },
+    {
+        "BriefDescription": "Counts the number of mispredicted taken JCC (Jump on Conditional Code) branch instructions retired.",
+        "EventCode": "0xc5",
+        "EventName": "BR_MISP_RETIRED.COND_TAKEN",
+        "PEBS": "1",
+        "SampleAfterValue": "200003",
+        "UMask": "0xfe"
+    },
+    {
+        "BriefDescription": "Counts the number of mispredicted near indirect JMP and near indirect CALL branch instructions retired.",
+        "EventCode": "0xc5",
+        "EventName": "BR_MISP_RETIRED.INDIRECT",
+        "PEBS": "1",
+        "SampleAfterValue": "200003",
+        "UMask": "0xeb"
+    },
+    {
+        "BriefDescription": "Counts the number of mispredicted near indirect CALL branch instructions retired.",
+        "EventCode": "0xc5",
+        "EventName": "BR_MISP_RETIRED.INDIRECT_CALL",
+        "PEBS": "1",
+        "SampleAfterValue": "200003",
+        "UMask": "0xfb"
+    },
+    {
+        "BriefDescription": "Counts the number of mispredicted near taken branch instructions retired.",
+        "EventCode": "0xc5",
+        "EventName": "BR_MISP_RETIRED.NEAR_TAKEN",
+        "PEBS": "1",
+        "SampleAfterValue": "200003",
+        "UMask": "0x80"
+    },
+    {
+        "BriefDescription": "Counts the number of mispredicted near RET branch instructions retired.",
+        "EventCode": "0xc5",
+        "EventName": "BR_MISP_RETIRED.RETURN",
+        "PEBS": "1",
+        "SampleAfterValue": "200003",
+        "UMask": "0xf7"
+    },
     {
         "BriefDescription": "Fixed Counter: Counts the number of unhalted core clock cycles",
         "EventName": "CPU_CLK_UNHALTED.CORE",
         "PEBS": "1",
         "SampleAfterValue": "2000003"
     },
+    {
+        "BriefDescription": "Counts the number of retired loads that are blocked because it initially appears to be store forward blocked, but subsequently is shown not to be blocked based on 4K alias check.",
+        "EventCode": "0x03",
+        "EventName": "LD_BLOCKS.ADDRESS_ALIAS",
+        "PEBS": "1",
+        "SampleAfterValue": "1000003",
+        "UMask": "0x4"
+    },
+    {
+        "BriefDescription": "Counts the number of retired loads that are blocked because its address exactly matches an older store whose data is not ready.",
+        "EventCode": "0x03",
+        "EventName": "LD_BLOCKS.DATA_UNKNOWN",
+        "PEBS": "1",
+        "SampleAfterValue": "1000003",
+        "UMask": "0x1"
+    },
+    {
+        "BriefDescription": "Counts the number of retired loads that are blocked because its address partially overlapped with an older store.",
+        "EventCode": "0x03",
+        "EventName": "LD_BLOCKS.STORE_FORWARD",
+        "PEBS": "1",
+        "SampleAfterValue": "1000003",
+        "UMask": "0x2"
+    },
+    {
+        "BriefDescription": "Counts the number of machine clears due to memory ordering in which an internal load passes an older store within the same CPU.",
+        "EventCode": "0xc3",
+        "EventName": "MACHINE_CLEARS.DISAMBIGUATION",
+        "SampleAfterValue": "20003",
+        "UMask": "0x8"
+    },
+    {
+        "BriefDescription": "Counts the number of machine clears due to a page fault.  Counts both I-Side and D-Side (Loads/Stores) page faults.  A page fault occurs when either the page is not present, or an access violation occurs.",
+        "EventCode": "0xc3",
+        "EventName": "MACHINE_CLEARS.PAGE_FAULT",
+        "SampleAfterValue": "20003",
+        "UMask": "0x20"
+    },
+    {
+        "BriefDescription": "Counts the number of machine clears that flush the pipeline and restart the machine with the use of microcode due to SMC, MEMORY_ORDERING, FP_ASSISTS, PAGE_FAULT, DISAMBIGUATION, and FPC_VIRTUAL_TRAP.",
+        "EventCode": "0xc3",
+        "EventName": "MACHINE_CLEARS.SLOW",
+        "SampleAfterValue": "20003",
+        "UMask": "0x6f"
+    },
+    {
+        "BriefDescription": "Counts the number of machine clears due to program modifying data (self modifying code) within 1K of a recently fetched code page.",
+        "EventCode": "0xc3",
+        "EventName": "MACHINE_CLEARS.SMC",
+        "SampleAfterValue": "20003",
+        "UMask": "0x1"
+    },
+    {
+        "BriefDescription": "Counts the number of Last Branch Record (LBR) entries. Requires LBRs to be enabled and configured in IA32_LBR_CTL. [This event is alias to LBR_INSERTS.ANY]",
+        "EventCode": "0xe4",
+        "EventName": "MISC_RETIRED.LBR_INSERTS",
+        "PEBS": "1",
+        "SampleAfterValue": "1000003",
+        "UMask": "0x1"
+    },
     {
         "BriefDescription": "Counts the number of issue slots that were not consumed by the backend because allocation is stalled due to a mispredicted jump or a machine clear.",
         "EventCode": "0x73",
         "PublicDescription": "Counts the total number of issue slots that were not consumed by the backend because allocation is stalled due to a mispredicted jump or a machine clear. Only issue slots wasted due to fast nukes such as memory ordering nukes are counted. Other nukes are not accounted for. Counts all issue slots blocked during this recovery window, including relevant microcode flows, and while uops are not yet available in the instruction queue (IQ) or until an FE_BOUND event occurs besides OTHER and CISC. Also includes the issue slots that were consumed by the backend but were thrown away because they were younger than the mispredict or machine clear.",
         "SampleAfterValue": "1000003"
     },
+    {
+        "BriefDescription": "Counts the number of issue slots every cycle that were not consumed by the backend due to Fast Nukes such as  Memory Ordering Machine clears and MRN nukes",
+        "EventCode": "0x73",
+        "EventName": "TOPDOWN_BAD_SPECULATION.FASTNUKE",
+        "SampleAfterValue": "1000003",
+        "UMask": "0x2"
+    },
+    {
+        "BriefDescription": "Counts the total number of issue slots that were not consumed by the backend because allocation is stalled due to a machine clear (nuke) of any kind including memory ordering and memory disambiguation.",
+        "EventCode": "0x73",
+        "EventName": "TOPDOWN_BAD_SPECULATION.MACHINE_CLEARS",
+        "SampleAfterValue": "1000003",
+        "UMask": "0x3"
+    },
+    {
+        "BriefDescription": "Counts the number of issue slots every cycle that were not consumed by the backend due to Branch Mispredict",
+        "EventCode": "0x73",
+        "EventName": "TOPDOWN_BAD_SPECULATION.MISPREDICT",
+        "SampleAfterValue": "1000003",
+        "UMask": "0x4"
+    },
+    {
+        "BriefDescription": "Counts the number of issue slots every cycle that were not consumed by the backend due to a machine clear (nuke).",
+        "EventCode": "0x73",
+        "EventName": "TOPDOWN_BAD_SPECULATION.NUKE",
+        "SampleAfterValue": "1000003",
+        "UMask": "0x1"
+    },
     {
         "BriefDescription": "Counts the number of retirement slots not consumed due to backend stalls",
         "EventCode": "0x74",
         "EventName": "TOPDOWN_BE_BOUND.ALL",
         "SampleAfterValue": "1000003"
     },
+    {
+        "BriefDescription": "Counts the number of issue slots every cycle that were not consumed by the backend due to due to certain allocation restrictions",
+        "EventCode": "0x74",
+        "EventName": "TOPDOWN_BE_BOUND.ALLOC_RESTRICTIONS",
+        "SampleAfterValue": "1000003",
+        "UMask": "0x1"
+    },
+    {
+        "BriefDescription": "Counts the number of issue slots every cycle that were not consumed by the backend due to memory reservation stall (scheduler not being able to accept another uop).  This could be caused by RSV full or load/store buffer block.",
+        "EventCode": "0x74",
+        "EventName": "TOPDOWN_BE_BOUND.MEM_SCHEDULER",
+        "SampleAfterValue": "1000003",
+        "UMask": "0x2"
+    },
+    {
+        "BriefDescription": "Counts the number of issue slots every cycle that were not consumed by the backend due to IEC and FPC RAT stalls - which can be due to the FIQ and IEC reservation station stall (integer, FP and SIMD scheduler not being able to accept another uop. )",
+        "EventCode": "0x74",
+        "EventName": "TOPDOWN_BE_BOUND.NON_MEM_SCHEDULER",
+        "SampleAfterValue": "1000003",
+        "UMask": "0x8"
+    },
+    {
+        "BriefDescription": "Counts the number of issue slots every cycle that were not consumed by the backend due to mrbl stall.  A 'marble' refers to a physical register file entry, also known as the physical destination (PDST).",
+        "EventCode": "0x74",
+        "EventName": "TOPDOWN_BE_BOUND.REGISTER",
+        "SampleAfterValue": "1000003",
+        "UMask": "0x20"
+    },
+    {
+        "BriefDescription": "Counts the number of issue slots every cycle that were not consumed by the backend due to iq/jeu scoreboards or ms scb",
+        "EventCode": "0x74",
+        "EventName": "TOPDOWN_BE_BOUND.SERIALIZATION",
+        "SampleAfterValue": "1000003",
+        "UMask": "0x10"
+    },
     {
         "BriefDescription": "Counts the number of retirement slots not consumed due to front end stalls",
         "EventCode": "0x71",
         "EventName": "TOPDOWN_FE_BOUND.ALL",
         "SampleAfterValue": "1000003"
     },
+    {
+        "BriefDescription": "Counts the number of issue slots every cycle that were not delivered by the frontend due to BAClear",
+        "EventCode": "0x71",
+        "EventName": "TOPDOWN_FE_BOUND.BRANCH_DETECT",
+        "SampleAfterValue": "1000003",
+        "UMask": "0x2"
+    },
+    {
+        "BriefDescription": "Counts the number of issue slots every cycle that were not delivered by the frontend due to BTClear",
+        "EventCode": "0x71",
+        "EventName": "TOPDOWN_FE_BOUND.BRANCH_RESTEER",
+        "SampleAfterValue": "1000003",
+        "UMask": "0x40"
+    },
+    {
+        "BriefDescription": "Counts the number of issue slots every cycle that were not delivered by the frontend due to ms",
+        "EventCode": "0x71",
+        "EventName": "TOPDOWN_FE_BOUND.CISC",
+        "SampleAfterValue": "1000003",
+        "UMask": "0x1"
+    },
+    {
+        "BriefDescription": "Counts the number of issue slots every cycle that were not delivered by the frontend due to decode stall",
+        "EventCode": "0x71",
+        "EventName": "TOPDOWN_FE_BOUND.DECODE",
+        "SampleAfterValue": "1000003",
+        "UMask": "0x8"
+    },
+    {
+        "BriefDescription": "Counts the number of issue slots every cycle that were not delivered by the frontend due to frontend bandwidth restrictions due to decode, predecode, cisc, and other limitations.",
+        "EventCode": "0x71",
+        "EventName": "TOPDOWN_FE_BOUND.FRONTEND_BANDWIDTH",
+        "SampleAfterValue": "1000003",
+        "UMask": "0x8d"
+    },
+    {
+        "BriefDescription": "Counts the number of issue slots every cycle that were not delivered by the frontend due to latency related stalls including BACLEARs, BTCLEARs, ITLB misses, and ICache misses.",
+        "EventCode": "0x71",
+        "EventName": "TOPDOWN_FE_BOUND.FRONTEND_LATENCY",
+        "SampleAfterValue": "1000003",
+        "UMask": "0x72"
+    },
+    {
+        "BriefDescription": "This event is deprecated. [This event is alias to TOPDOWN_FE_BOUND.ITLB_MISS]",
+        "Deprecated": "1",
+        "EventCode": "0x71",
+        "EventName": "TOPDOWN_FE_BOUND.ITLB",
+        "SampleAfterValue": "1000003",
+        "UMask": "0x10"
+    },
+    {
+        "BriefDescription": "Counts the number of issue slots every cycle that were not delivered by the frontend due to itlb miss [This event is alias to TOPDOWN_FE_BOUND.ITLB]",
+        "EventCode": "0x71",
+        "EventName": "TOPDOWN_FE_BOUND.ITLB_MISS",
+        "SampleAfterValue": "1000003",
+        "UMask": "0x10"
+    },
+    {
+        "BriefDescription": "Counts the number of issue slots every cycle that were not delivered by the frontend that do not categorize into any other common frontend stall",
+        "EventCode": "0x71",
+        "EventName": "TOPDOWN_FE_BOUND.OTHER",
+        "SampleAfterValue": "1000003",
+        "UMask": "0x80"
+    },
+    {
+        "BriefDescription": "Counts the number of issue slots every cycle that were not delivered by the frontend due to predecode wrong",
+        "EventCode": "0x71",
+        "EventName": "TOPDOWN_FE_BOUND.PREDECODE",
+        "SampleAfterValue": "1000003",
+        "UMask": "0x4"
+    },
     {
         "BriefDescription": "Counts the number of consumed retirement slots.  Similar to UOPS_RETIRED.ALL",
         "EventCode": "0x72",
         "EventName": "TOPDOWN_RETIRING.ALL",
         "PEBS": "1",
         "SampleAfterValue": "1000003"
+    },
+    {
+        "BriefDescription": "Counts the number of uops issued by the front end every cycle.",
+        "EventCode": "0x0e",
+        "EventName": "UOPS_ISSUED.ANY",
+        "PublicDescription": "Counts the number of uops issued by the front end every cycle. When 4-uops are requested and only 2-uops are delivered, the event counts 2.  Uops_issued correlates to the number of ROB entries.  If uop takes 2 ROB slots it counts as 2 uops_issued.",
+        "SampleAfterValue": "1000003"
+    },
+    {
+        "BriefDescription": "Counts the total number of uops retired.",
+        "EventCode": "0xc2",
+        "EventName": "UOPS_RETIRED.ALL",
+        "PEBS": "1",
+        "SampleAfterValue": "2000003"
+    },
+    {
+        "BriefDescription": "Counts the number of integer divide uops retired.",
+        "EventCode": "0xc2",
+        "EventName": "UOPS_RETIRED.IDIV",
+        "PEBS": "1",
+        "SampleAfterValue": "2000003",
+        "UMask": "0x10"
+    },
+    {
+        "BriefDescription": "Counts the number of uops that are from the complex flows issued by the micro-sequencer (MS).  This includes uops from flows due to complex instructions, faults, assists, and inserted flows.",
+        "EventCode": "0xc2",
+        "EventName": "UOPS_RETIRED.MS",
+        "PEBS": "1",
+        "SampleAfterValue": "2000003",
+        "UMask": "0x1"
+    },
+    {
+        "BriefDescription": "Counts the number of x87 uops retired, includes those in ms flows",
+        "EventCode": "0xc2",
+        "EventName": "UOPS_RETIRED.X87",
+        "PEBS": "1",
+        "SampleAfterValue": "2000003",
+        "UMask": "0x2"
     }
 ]
diff --git a/tools/perf/pmu-events/arch/x86/grandridge/uncore-cache.json b/tools/perf/pmu-events/arch/x86/grandridge/uncore-cache.json
new file mode 100644 (file)
index 0000000..74dfd92
--- /dev/null
@@ -0,0 +1,1795 @@
+[
+    {
+        "BriefDescription": "Clockticks for CMS units attached to CHA",
+        "EventCode": "0x01",
+        "EventName": "UNC_CHACMS_CLOCKTICKS",
+        "PerPkg": "1",
+        "PortMask": "0x000",
+        "PublicDescription": "UNC_CHACMS_CLOCKTICKS",
+        "Unit": "CHACMS"
+    },
+    {
+        "BriefDescription": "Number of CHA clock cycles while the event is enabled",
+        "EventCode": "0x01",
+        "EventName": "UNC_CHA_CLOCKTICKS",
+        "PerPkg": "1",
+        "PublicDescription": "Clockticks of the uncore caching and home agent (CHA)",
+        "Unit": "CHA"
+    },
+    {
+        "BriefDescription": "Distress signal assertion for dynamic prefetch throttle (DPT).  Threshold for distress signal assertion reached in TOR or IRQ (immediate cause for triggering).",
+        "EventCode": "0x59",
+        "EventName": "UNC_CHA_DISTRESS_ASSERTED.DPT_ANY",
+        "PerPkg": "1",
+        "PortMask": "0x000",
+        "UMask": "0x3",
+        "Unit": "CHA"
+    },
+    {
+        "BriefDescription": "Distress signal assertion for dynamic prefetch throttle (DPT).  Threshold for distress signal assertion reached in IRQ (immediate cause for triggering).",
+        "EventCode": "0x59",
+        "EventName": "UNC_CHA_DISTRESS_ASSERTED.DPT_IRQ",
+        "PerPkg": "1",
+        "UMask": "0x1",
+        "Unit": "CHA"
+    },
+    {
+        "BriefDescription": "Distress signal assertion for dynamic prefetch throttle (DPT).  Threshold for distress signal assertion reached in TOR (immediate cause for triggering).",
+        "EventCode": "0x59",
+        "EventName": "UNC_CHA_DISTRESS_ASSERTED.DPT_TOR",
+        "PerPkg": "1",
+        "UMask": "0x2",
+        "Unit": "CHA"
+    },
+    {
+        "BriefDescription": "Counts when a normal (Non-Isochronous) full line write is issued from the CHA to the any of the memory controller channels.",
+        "EventCode": "0x5b",
+        "EventName": "UNC_CHA_IMC_WRITES_COUNT.FULL",
+        "PerPkg": "1",
+        "UMask": "0x1",
+        "Unit": "CHA"
+    },
+    {
+        "BriefDescription": "CHA to iMC Full Line Writes Issued : ISOCH Full Line : Counts the total number of full line writes issued from the HA into the memory controller.",
+        "EventCode": "0x5b",
+        "EventName": "UNC_CHA_IMC_WRITES_COUNT.FULL_PRIORITY",
+        "PerPkg": "1",
+        "UMask": "0x4",
+        "Unit": "CHA"
+    },
+    {
+        "BriefDescription": "CHA to iMC Full Line Writes Issued : Partial Non-ISOCH : Counts the total number of full line writes issued from the HA into the memory controller.",
+        "EventCode": "0x5b",
+        "EventName": "UNC_CHA_IMC_WRITES_COUNT.PARTIAL",
+        "PerPkg": "1",
+        "UMask": "0x2",
+        "Unit": "CHA"
+    },
+    {
+        "BriefDescription": "CHA to iMC Full Line Writes Issued : ISOCH Partial : Counts the total number of full line writes issued from the HA into the memory controller.",
+        "EventCode": "0x5b",
+        "EventName": "UNC_CHA_IMC_WRITES_COUNT.PARTIAL_PRIORITY",
+        "PerPkg": "1",
+        "UMask": "0x8",
+        "Unit": "CHA"
+    },
+    {
+        "BriefDescription": "Cache Lookups: CRd Requests",
+        "EventCode": "0x34",
+        "EventName": "UNC_CHA_LLC_LOOKUP.CODE",
+        "PerPkg": "1",
+        "PublicDescription": "Cache Lookups : CRd Requests",
+        "UMask": "0x1bd0ff",
+        "Unit": "CHA"
+    },
+    {
+        "BriefDescription": "Cache Lookups: Read Requests and Read Prefetches",
+        "EventCode": "0x34",
+        "EventName": "UNC_CHA_LLC_LOOKUP.DATA_RD",
+        "PerPkg": "1",
+        "PublicDescription": "Counts the number of times the LLC was accessed - this includes code, data, prefetches and hints coming from L2.  This has numerous filters available.  Note the non-standard filtering equation.  This event will count requests that lookup the cache multiple times with multiple increments.  One must ALWAYS set umask bit 0 and select a state or states to match.  Otherwise, the event will count nothing.   CHAFilter0[24:21,17] bits correspond to [FMESI] state. Read transactions",
+        "UMask": "0x1bc1ff",
+        "Unit": "CHA"
+    },
+    {
+        "BriefDescription": "Cache Lookups: Read Requests, Read Prefetches, and Snoops",
+        "EventCode": "0x34",
+        "EventName": "UNC_CHA_LLC_LOOKUP.DATA_READ_ALL",
+        "PerPkg": "1",
+        "PublicDescription": "Cache Lookups : Data Reads",
+        "UMask": "0x1fc1ff",
+        "Unit": "CHA"
+    },
+    {
+        "BriefDescription": "Cache Lookups: Read Requests to Locally Homed Memory",
+        "EventCode": "0x34",
+        "EventName": "UNC_CHA_LLC_LOOKUP.DATA_READ_LOCAL",
+        "PerPkg": "1",
+        "PublicDescription": "Cache Lookups : Demand Data Reads, Core and LLC prefetches",
+        "UMask": "0x841ff",
+        "Unit": "CHA"
+    },
+    {
+        "BriefDescription": "Cache Lookups: Read Requests, Read Prefetches, and Snoops which miss the Cache",
+        "EventCode": "0x34",
+        "EventName": "UNC_CHA_LLC_LOOKUP.DATA_READ_MISS",
+        "PerPkg": "1",
+        "PublicDescription": "Cache Lookups : Data Read Misses",
+        "UMask": "0x1fc101",
+        "Unit": "CHA"
+    },
+    {
+        "BriefDescription": "Cache Lookups: All Requests to Locally Homed Memory",
+        "EventCode": "0x34",
+        "EventName": "UNC_CHA_LLC_LOOKUP.LOCALLY_HOMED_ADDRESS",
+        "PerPkg": "1",
+        "PublicDescription": "Cache Lookups : Transactions homed locally",
+        "UMask": "0xbdfff",
+        "Unit": "CHA"
+    },
+    {
+        "BriefDescription": "Cache Lookups: Code Read Requests and Code Read Prefetches to Locally Homed Memory",
+        "EventCode": "0x34",
+        "EventName": "UNC_CHA_LLC_LOOKUP.LOCAL_CODE",
+        "PerPkg": "1",
+        "PublicDescription": "Cache Lookups : CRd Requests",
+        "UMask": "0x19d0ff",
+        "Unit": "CHA"
+    },
+    {
+        "BriefDescription": "Cache Lookups: Read Requests and Read Prefetches to Locally Homed Memory",
+        "EventCode": "0x34",
+        "EventName": "UNC_CHA_LLC_LOOKUP.LOCAL_DATA_RD",
+        "PerPkg": "1",
+        "PublicDescription": "Counts the number of times the LLC was accessed - this includes code, data, prefetches and hints coming from L2.  This has numerous filters available.  Note the non-standard filtering equation.  This event will count requests that lookup the cache multiple times with multiple increments.  One must ALWAYS set umask bit 0 and select a state or states to match.  Otherwise, the event will count nothing.   CHAFilter0[24:21,17] bits correspond to [FMESI] state. Read transactions",
+        "UMask": "0x19c1ff",
+        "Unit": "CHA"
+    },
+    {
+        "BriefDescription": "Cache Lookups: Code Read Requests to Locally Homed Memory",
+        "EventCode": "0x34",
+        "EventName": "UNC_CHA_LLC_LOOKUP.LOCAL_DMND_CODE",
+        "PerPkg": "1",
+        "PublicDescription": "Cache Lookups : CRd Requests",
+        "UMask": "0x1850ff",
+        "Unit": "CHA"
+    },
+    {
+        "BriefDescription": "Cache Lookups: Read Requests to Locally Homed Memory",
+        "EventCode": "0x34",
+        "EventName": "UNC_CHA_LLC_LOOKUP.LOCAL_DMND_DATA_RD",
+        "PerPkg": "1",
+        "PublicDescription": "Counts the number of times the LLC was accessed - this includes code, data, prefetches and hints coming from L2.  This has numerous filters available.  Note the non-standard filtering equation.  This event will count requests that lookup the cache multiple times with multiple increments.  One must ALWAYS set umask bit 0 and select a state or states to match.  Otherwise, the event will count nothing.   CHAFilter0[24:21,17] bits correspond to [FMESI] state. Read transactions",
+        "UMask": "0x1841ff",
+        "Unit": "CHA"
+    },
+    {
+        "BriefDescription": "Cache Lookups: RFO Requests to Locally Homed Memory",
+        "EventCode": "0x34",
+        "EventName": "UNC_CHA_LLC_LOOKUP.LOCAL_DMND_RFO",
+        "PerPkg": "1",
+        "PublicDescription": "Cache Lookups : RFO Requests",
+        "UMask": "0x1848ff",
+        "Unit": "CHA"
+    },
+    {
+        "BriefDescription": "Cache Lookups: LLC Prefetch Requests to Locally Homed Memory",
+        "EventCode": "0x34",
+        "EventName": "UNC_CHA_LLC_LOOKUP.LOCAL_LLC_PF",
+        "PerPkg": "1",
+        "PublicDescription": "Counts the number of times the LLC was accessed - this includes code, data, prefetches and hints coming from L2.  This has numerous filters available.  Note the non-standard filtering equation.  This event will count requests that lookup the cache multiple times with multiple increments.  One must ALWAYS set umask bit 0 and select a state or states to match.  Otherwise, the event will count nothing.   CHAFilter0[24:21,17] bits correspond to [FMESI] state. Read transactions",
+        "UMask": "0x189dff",
+        "Unit": "CHA"
+    },
+    {
+        "BriefDescription": "Cache Lookups: All Prefetches to Locally Homed Memory",
+        "EventCode": "0x34",
+        "EventName": "UNC_CHA_LLC_LOOKUP.LOCAL_PF",
+        "PerPkg": "1",
+        "PublicDescription": "Counts the number of times the LLC was accessed - this includes code, data, prefetches and hints coming from L2.  This has numerous filters available.  Note the non-standard filtering equation.  This event will count requests that lookup the cache multiple times with multiple increments.  One must ALWAYS set umask bit 0 and select a state or states to match.  Otherwise, the event will count nothing.   CHAFilter0[24:21,17] bits correspond to [FMESI] state. Read transactions",
+        "UMask": "0x199dff",
+        "Unit": "CHA"
+    },
+    {
+        "BriefDescription": "Cache Lookups: Code Prefetches to Locally Homed Memory",
+        "EventCode": "0x34",
+        "EventName": "UNC_CHA_LLC_LOOKUP.LOCAL_PF_CODE",
+        "PerPkg": "1",
+        "PublicDescription": "Cache Lookups : CRd Requests",
+        "UMask": "0x1910ff",
+        "Unit": "CHA"
+    },
+    {
+        "BriefDescription": "Cache Lookups: Read Prefetches to Locally Homed Memory",
+        "EventCode": "0x34",
+        "EventName": "UNC_CHA_LLC_LOOKUP.LOCAL_PF_DATA_RD",
+        "PerPkg": "1",
+        "PublicDescription": "Counts the number of times the LLC was accessed - this includes code, data, prefetches and hints coming from L2.  This has numerous filters available.  Note the non-standard filtering equation.  This event will count requests that lookup the cache multiple times with multiple increments.  One must ALWAYS set umask bit 0 and select a state or states to match.  Otherwise, the event will count nothing.   CHAFilter0[24:21,17] bits correspond to [FMESI] state. Read transactions",
+        "UMask": "0x1981ff",
+        "Unit": "CHA"
+    },
+    {
+        "BriefDescription": "Cache Lookups: RFO Prefetches to Locally Homed Memory",
+        "EventCode": "0x34",
+        "EventName": "UNC_CHA_LLC_LOOKUP.LOCAL_PF_RFO",
+        "PerPkg": "1",
+        "PublicDescription": "Cache Lookups : RFO Requests",
+        "UMask": "0x1908ff",
+        "Unit": "CHA"
+    },
+    {
+        "BriefDescription": "Cache Lookups: RFO Requests and RFO Prefetches to Locally Homed Memory",
+        "EventCode": "0x34",
+        "EventName": "UNC_CHA_LLC_LOOKUP.LOCAL_RFO",
+        "PerPkg": "1",
+        "PublicDescription": "Cache Lookups : RFO Requests",
+        "UMask": "0x19c8ff",
+        "Unit": "CHA"
+    },
+    {
+        "BriefDescription": "Cache Lookups: All RFO and RFO Prefetches",
+        "EventCode": "0x34",
+        "EventName": "UNC_CHA_LLC_LOOKUP.RFO",
+        "PerPkg": "1",
+        "PublicDescription": "Cache Lookups : All RFOs - Demand and Prefetches",
+        "UMask": "0x1bc8ff",
+        "Unit": "CHA"
+    },
+    {
+        "BriefDescription": "Cache Lookups: RFO Requests and RFO Prefetches to Locally Homed Memory",
+        "EventCode": "0x34",
+        "EventName": "UNC_CHA_LLC_LOOKUP.RFO_LOCAL",
+        "PerPkg": "1",
+        "PublicDescription": "Cache Lookups : Locally HOMed RFOs - Demand and Prefetches",
+        "UMask": "0x9c8ff",
+        "Unit": "CHA"
+    },
+    {
+        "BriefDescription": "Cache Lookups: Writes to Locally Homed Memory (includes writebacks from L1/L2)",
+        "EventCode": "0x34",
+        "EventName": "UNC_CHA_LLC_LOOKUP.WRITE_LOCAL",
+        "PerPkg": "1",
+        "PublicDescription": "Cache Lookups : Writes",
+        "UMask": "0x842ff",
+        "Unit": "CHA"
+    },
+    {
+        "BriefDescription": "Counts the number of lines that were victimized on a fill.  This can be filtered by the state that the line was in.",
+        "EventCode": "0x37",
+        "EventName": "UNC_CHA_LLC_VICTIMS.ALL",
+        "PerPkg": "1",
+        "PublicDescription": "Lines Victimized : All Lines Victimized",
+        "UMask": "0xf",
+        "Unit": "CHA"
+    },
+    {
+        "BriefDescription": "Lines Victimized : IA traffic : Counts the number of lines that were victimized on a fill.  This can be filtered by the state that the line was in.",
+        "EventCode": "0x37",
+        "EventName": "UNC_CHA_LLC_VICTIMS.IA",
+        "PerPkg": "1",
+        "UMask": "0x20",
+        "Unit": "CHA"
+    },
+    {
+        "BriefDescription": "Lines Victimized : IO traffic : Counts the number of lines that were victimized on a fill.  This can be filtered by the state that the line was in.",
+        "EventCode": "0x37",
+        "EventName": "UNC_CHA_LLC_VICTIMS.IO",
+        "PerPkg": "1",
+        "UMask": "0x10",
+        "Unit": "CHA"
+    },
+    {
+        "BriefDescription": "Counts the number of lines that were victimized on a fill.  This can be filtered by the state that the line was in.",
+        "EventCode": "0x37",
+        "EventName": "UNC_CHA_LLC_VICTIMS.LOCAL_ALL",
+        "PerPkg": "1",
+        "PublicDescription": "Lines Victimized : Local - All Lines",
+        "UMask": "0x200f",
+        "Unit": "CHA"
+    },
+    {
+        "BriefDescription": "Lines Victimized : Counts the number of lines that were victimized on a fill.  This can be filtered by the state that the line was in.",
+        "EventCode": "0x37",
+        "EventName": "UNC_CHA_LLC_VICTIMS.LOCAL_E",
+        "PerPkg": "1",
+        "PublicDescription": "Lines Victimized : Local - Lines in E State",
+        "UMask": "0x2002",
+        "Unit": "CHA"
+    },
+    {
+        "BriefDescription": "Lines Victimized : Counts the number of lines that were victimized on a fill.  This can be filtered by the state that the line was in.",
+        "EventCode": "0x37",
+        "EventName": "UNC_CHA_LLC_VICTIMS.LOCAL_F",
+        "PerPkg": "1",
+        "PublicDescription": "Lines Victimized : Local - Lines in F State",
+        "UMask": "0x2008",
+        "Unit": "CHA"
+    },
+    {
+        "BriefDescription": "Lines Victimized : Counts the number of lines that were victimized on a fill.  This can be filtered by the state that the line was in.",
+        "EventCode": "0x37",
+        "EventName": "UNC_CHA_LLC_VICTIMS.LOCAL_M",
+        "PerPkg": "1",
+        "PublicDescription": "Lines Victimized : Local - Lines in M State",
+        "UMask": "0x2001",
+        "Unit": "CHA"
+    },
+    {
+        "BriefDescription": "Lines Victimized : Counts the number of lines that were victimized on a fill.  This can be filtered by the state that the line was in.",
+        "EventCode": "0x37",
+        "EventName": "UNC_CHA_LLC_VICTIMS.LOCAL_S",
+        "PerPkg": "1",
+        "PublicDescription": "Lines Victimized : Local - Lines in S State",
+        "UMask": "0x2004",
+        "Unit": "CHA"
+    },
+    {
+        "BriefDescription": "Counts the number of lines that were victimized on a fill.  This can be filtered by the state that the line was in.",
+        "EventCode": "0x37",
+        "EventName": "UNC_CHA_LLC_VICTIMS.TOTAL_E",
+        "PerPkg": "1",
+        "PublicDescription": "Lines Victimized : Lines in E state",
+        "UMask": "0x2",
+        "Unit": "CHA"
+    },
+    {
+        "BriefDescription": "Counts the number of lines that were victimized on a fill.  This can be filtered by the state that the line was in.",
+        "EventCode": "0x37",
+        "EventName": "UNC_CHA_LLC_VICTIMS.TOTAL_M",
+        "PerPkg": "1",
+        "PublicDescription": "Lines Victimized : Lines in M state",
+        "UMask": "0x1",
+        "Unit": "CHA"
+    },
+    {
+        "BriefDescription": "Counts the number of lines that were victimized on a fill.  This can be filtered by the state that the line was in.",
+        "EventCode": "0x37",
+        "EventName": "UNC_CHA_LLC_VICTIMS.TOTAL_S",
+        "PerPkg": "1",
+        "PublicDescription": "Lines Victimized : Lines in S State",
+        "UMask": "0x4",
+        "Unit": "CHA"
+    },
+    {
+        "BriefDescription": "Counts when a RFO (the Read for Ownership issued before a  write) request hit a cacheline in the S (Shared) state.",
+        "EventCode": "0x39",
+        "EventName": "UNC_CHA_MISC.RFO_HIT_S",
+        "PerPkg": "1",
+        "PublicDescription": "Cbo Misc : RFO HitS",
+        "UMask": "0x8",
+        "Unit": "CHA"
+    },
+    {
+        "BriefDescription": "OSB Snoop Broadcast : Local InvItoE : Count of OSB snoop broadcasts. Counts by 1 per request causing OSB snoops to be broadcast. Does not count all the snoops generated by OSB.",
+        "EventCode": "0x55",
+        "EventName": "UNC_CHA_OSB.LOCAL_INVITOE",
+        "PerPkg": "1",
+        "UMask": "0x1",
+        "Unit": "CHA"
+    },
+    {
+        "BriefDescription": "OSB Snoop Broadcast : Local Rd : Count of OSB snoop broadcasts. Counts by 1 per request causing OSB snoops to be broadcast. Does not count all the snoops generated by OSB.",
+        "EventCode": "0x55",
+        "EventName": "UNC_CHA_OSB.LOCAL_READ",
+        "PerPkg": "1",
+        "UMask": "0x2",
+        "Unit": "CHA"
+    },
+    {
+        "BriefDescription": "OSB Snoop Broadcast : Off : Count of OSB snoop broadcasts. Counts by 1 per request causing OSB snoops to be broadcast. Does not count all the snoops generated by OSB.",
+        "EventCode": "0x55",
+        "EventName": "UNC_CHA_OSB.OFF_PWRHEURISTIC",
+        "PerPkg": "1",
+        "UMask": "0x20",
+        "Unit": "CHA"
+    },
+    {
+        "BriefDescription": "OSB Snoop Broadcast : RFO HitS Snoop Broadcast : Count of OSB snoop broadcasts. Counts by 1 per request causing OSB snoops to be broadcast. Does not count all the snoops generated by OSB.",
+        "EventCode": "0x55",
+        "EventName": "UNC_CHA_OSB.RFO_HITS_SNP_BCAST",
+        "PerPkg": "1",
+        "UMask": "0x10",
+        "Unit": "CHA"
+    },
+    {
+        "BriefDescription": "Counts the total number of requests coming from a unit on this socket for exclusive ownership of a cache line without receiving data (INVITOE) to the CHA.",
+        "EventCode": "0x50",
+        "EventName": "UNC_CHA_REQUESTS.INVITOE",
+        "PerPkg": "1",
+        "PublicDescription": "HA Read and Write Requests : InvalItoE",
+        "UMask": "0x30",
+        "Unit": "CHA"
+    },
+    {
+        "BriefDescription": "Counts the total number of requests coming from a unit on this socket for exclusive ownership of a cache line without receiving data (INVITOE) to the CHA.",
+        "EventCode": "0x50",
+        "EventName": "UNC_CHA_REQUESTS.INVITOE_LOCAL",
+        "PerPkg": "1",
+        "UMask": "0x10",
+        "Unit": "CHA"
+    },
+    {
+        "BriefDescription": "Counts read requests made into this CHA. Reads include all read opcodes (including RFO: the Read for Ownership issued before a  write) .",
+        "EventCode": "0x50",
+        "EventName": "UNC_CHA_REQUESTS.READS",
+        "PerPkg": "1",
+        "PublicDescription": "HA Read and Write Requests : Reads",
+        "UMask": "0x3",
+        "Unit": "CHA"
+    },
+    {
+        "BriefDescription": "Counts read requests coming from a unit on this socket made into this CHA. Reads include all read opcodes (including RFO: the Read for Ownership issued before a  write).",
+        "EventCode": "0x50",
+        "EventName": "UNC_CHA_REQUESTS.READS_LOCAL",
+        "PerPkg": "1",
+        "UMask": "0x1",
+        "Unit": "CHA"
+    },
+    {
+        "BriefDescription": "Counts write requests made into the CHA, including streaming, evictions, HitM (Reads from another core to a Modified cacheline), etc.",
+        "EventCode": "0x50",
+        "EventName": "UNC_CHA_REQUESTS.WRITES",
+        "PerPkg": "1",
+        "PublicDescription": "HA Read and Write Requests : Writes",
+        "UMask": "0xc",
+        "Unit": "CHA"
+    },
+    {
+        "BriefDescription": "Counts  write requests coming from a unit on this socket made into this CHA, including streaming, evictions, HitM (Reads from another core to a Modified cacheline), etc.",
+        "EventCode": "0x50",
+        "EventName": "UNC_CHA_REQUESTS.WRITES_LOCAL",
+        "PerPkg": "1",
+        "UMask": "0x4",
+        "Unit": "CHA"
+    },
+    {
+        "BriefDescription": "All TOR Inserts",
+        "EventCode": "0x35",
+        "EventName": "UNC_CHA_TOR_INSERTS.ALL",
+        "PerPkg": "1",
+        "PublicDescription": "TOR Inserts : All",
+        "UMask": "0xc001ffff",
+        "Unit": "CHA"
+    },
+    {
+        "BriefDescription": "All locally initiated requests from IA Cores",
+        "EventCode": "0x35",
+        "EventName": "UNC_CHA_TOR_INSERTS.IA",
+        "PerPkg": "1",
+        "PublicDescription": "TOR Inserts : All requests from iA Cores",
+        "UMask": "0xc001ff01",
+        "Unit": "CHA"
+    },
+    {
+        "BriefDescription": "CLFlush events that are initiated from the Core",
+        "EventCode": "0x35",
+        "EventName": "UNC_CHA_TOR_INSERTS.IA_CLFLUSH",
+        "PerPkg": "1",
+        "PublicDescription": "TOR Inserts : CLFlushes issued by iA Cores",
+        "UMask": "0xc8c7ff01",
+        "Unit": "CHA"
+    },
+    {
+        "BriefDescription": "CLFlushOpt events that are initiated from the Core",
+        "EventCode": "0x35",
+        "EventName": "UNC_CHA_TOR_INSERTS.IA_CLFLUSHOPT",
+        "PerPkg": "1",
+        "PublicDescription": "TOR Inserts : CLFlushOpts issued by iA Cores",
+        "UMask": "0xc8d7ff01",
+        "Unit": "CHA"
+    },
+    {
+        "BriefDescription": "Code read from local IA that miss the cache",
+        "EventCode": "0x35",
+        "EventName": "UNC_CHA_TOR_INSERTS.IA_CRD",
+        "PerPkg": "1",
+        "PublicDescription": "TOR Inserts : CRDs issued by iA Cores",
+        "UMask": "0xc80fff01",
+        "Unit": "CHA"
+    },
+    {
+        "BriefDescription": "Code read prefetch from local IA that miss the cache",
+        "EventCode": "0x35",
+        "EventName": "UNC_CHA_TOR_INSERTS.IA_CRD_PREF",
+        "PerPkg": "1",
+        "PublicDescription": "TOR Inserts; Code read prefetch from local IA that misses in the snoop filter",
+        "UMask": "0xc88fff01",
+        "Unit": "CHA"
+    },
+    {
+        "BriefDescription": "Data read opt from local IA that miss the cache",
+        "EventCode": "0x35",
+        "EventName": "UNC_CHA_TOR_INSERTS.IA_DRD_OPT",
+        "PerPkg": "1",
+        "PublicDescription": "TOR Inserts : DRd_Opts issued by iA Cores",
+        "UMask": "0xc827ff01",
+        "Unit": "CHA"
+    },
+    {
+        "BriefDescription": "Data read opt prefetch from local IA that miss the cache",
+        "EventCode": "0x35",
+        "EventName": "UNC_CHA_TOR_INSERTS.IA_DRD_OPT_PREF",
+        "PerPkg": "1",
+        "PublicDescription": "TOR Inserts : DRd_Opt_Prefs issued by iA Cores",
+        "UMask": "0xc8a7ff01",
+        "Unit": "CHA"
+    },
+    {
+        "BriefDescription": "All locally initiated requests from IA Cores which hit the cache",
+        "EventCode": "0x35",
+        "EventName": "UNC_CHA_TOR_INSERTS.IA_HIT",
+        "PerPkg": "1",
+        "PublicDescription": "TOR Inserts : All requests from iA Cores that Hit the LLC",
+        "UMask": "0xc001fd01",
+        "Unit": "CHA"
+    },
+    {
+        "BriefDescription": "Code read from local IA that hit the cache",
+        "EventCode": "0x35",
+        "EventName": "UNC_CHA_TOR_INSERTS.IA_HIT_CRD",
+        "PerPkg": "1",
+        "PublicDescription": "TOR Inserts : CRds issued by iA Cores that Hit the LLC",
+        "UMask": "0xc80ffd01",
+        "Unit": "CHA"
+    },
+    {
+        "BriefDescription": "Code read prefetch from local IA that hit the cache",
+        "EventCode": "0x35",
+        "EventName": "UNC_CHA_TOR_INSERTS.IA_HIT_CRD_PREF",
+        "PerPkg": "1",
+        "PublicDescription": "TOR Inserts : CRd_Prefs issued by iA Cores that hit the LLC",
+        "UMask": "0xc88ffd01",
+        "Unit": "CHA"
+    },
+    {
+        "BriefDescription": "Data read opt from local IA that hit the cache",
+        "EventCode": "0x35",
+        "EventName": "UNC_CHA_TOR_INSERTS.IA_HIT_DRD_OPT",
+        "PerPkg": "1",
+        "PublicDescription": "TOR Inserts : DRd_Opts issued by iA Cores that hit the LLC",
+        "UMask": "0xc827fd01",
+        "Unit": "CHA"
+    },
+    {
+        "BriefDescription": "Data read opt prefetch from local IA that hit the cache",
+        "EventCode": "0x35",
+        "EventName": "UNC_CHA_TOR_INSERTS.IA_HIT_DRD_OPT_PREF",
+        "PerPkg": "1",
+        "PublicDescription": "TOR Inserts : DRd_Opt_Prefs issued by iA Cores that hit the LLC",
+        "UMask": "0xc8a7fd01",
+        "Unit": "CHA"
+    },
+    {
+        "BriefDescription": "ItoM requests from local IA cores that hit the cache",
+        "EventCode": "0x35",
+        "EventName": "UNC_CHA_TOR_INSERTS.IA_HIT_ITOM",
+        "PerPkg": "1",
+        "PublicDescription": "TOR Inserts : ItoMs issued by iA Cores that Hit LLC",
+        "UMask": "0xcc47fd01",
+        "Unit": "CHA"
+    },
+    {
+        "BriefDescription": "Last level cache prefetch code read from local IA that hit the cache",
+        "EventCode": "0x35",
+        "EventName": "UNC_CHA_TOR_INSERTS.IA_HIT_LLCPREFCODE",
+        "PerPkg": "1",
+        "PublicDescription": "TOR Inserts : LLCPrefCode issued by iA Cores that hit the LLC",
+        "UMask": "0xcccffd01",
+        "Unit": "CHA"
+    },
+    {
+        "BriefDescription": "Last level cache prefetch data read from local IA that hit the cache",
+        "EventCode": "0x35",
+        "EventName": "UNC_CHA_TOR_INSERTS.IA_HIT_LLCPREFDATA",
+        "PerPkg": "1",
+        "PublicDescription": "TOR Inserts : LLCPrefData issued by iA Cores that hit the LLC",
+        "UMask": "0xccd7fd01",
+        "Unit": "CHA"
+    },
+    {
+        "BriefDescription": "Last level cache prefetch read for ownership from local IA that hit the cache",
+        "EventCode": "0x35",
+        "EventName": "UNC_CHA_TOR_INSERTS.IA_HIT_LLCPREFRFO",
+        "PerPkg": "1",
+        "PublicDescription": "TOR Inserts : LLCPrefRFO issued by iA Cores that hit the LLC",
+        "UMask": "0xccc7fd01",
+        "Unit": "CHA"
+    },
+    {
+        "BriefDescription": "Read for ownership from local IA that hit the cache",
+        "EventCode": "0x35",
+        "EventName": "UNC_CHA_TOR_INSERTS.IA_HIT_RFO",
+        "PerPkg": "1",
+        "PublicDescription": "TOR Inserts : RFOs issued by iA Cores that Hit the LLC",
+        "UMask": "0xc807fd01",
+        "Unit": "CHA"
+    },
+    {
+        "BriefDescription": "Read for ownership prefetch from local IA that hit the cache",
+        "EventCode": "0x35",
+        "EventName": "UNC_CHA_TOR_INSERTS.IA_HIT_RFO_PREF",
+        "PerPkg": "1",
+        "PublicDescription": "TOR Inserts : RFO_Prefs issued by iA Cores that Hit the LLC",
+        "UMask": "0xc887fd01",
+        "Unit": "CHA"
+    },
+    {
+        "BriefDescription": "ItoM events that are initiated from the Core",
+        "EventCode": "0x35",
+        "EventName": "UNC_CHA_TOR_INSERTS.IA_ITOM",
+        "PerPkg": "1",
+        "PublicDescription": "TOR Inserts : ItoMs issued by iA Cores",
+        "UMask": "0xcc47ff01",
+        "Unit": "CHA"
+    },
+    {
+        "BriefDescription": "ItoMCacheNear requests from local IA cores",
+        "EventCode": "0x35",
+        "EventName": "UNC_CHA_TOR_INSERTS.IA_ITOMCACHENEAR",
+        "PerPkg": "1",
+        "PublicDescription": "TOR Inserts : ItoMCacheNears issued by iA Cores",
+        "UMask": "0xcd47ff01",
+        "Unit": "CHA"
+    },
+    {
+        "BriefDescription": "Last level cache prefetch code read from local IA.",
+        "EventCode": "0x35",
+        "EventName": "UNC_CHA_TOR_INSERTS.IA_LLCPREFCODE",
+        "PerPkg": "1",
+        "PublicDescription": "TOR Inserts : LLCPrefCode issued by iA Cores",
+        "UMask": "0xcccfff01",
+        "Unit": "CHA"
+    },
+    {
+        "BriefDescription": "Last level cache prefetch data read from local IA.",
+        "EventCode": "0x35",
+        "EventName": "UNC_CHA_TOR_INSERTS.IA_LLCPREFDATA",
+        "PerPkg": "1",
+        "PublicDescription": "TOR Inserts : LLCPrefData issued by iA Cores",
+        "UMask": "0xccd7ff01",
+        "Unit": "CHA"
+    },
+    {
+        "BriefDescription": "Last level cache prefetch read for ownership from local IA that miss the cache",
+        "EventCode": "0x35",
+        "EventName": "UNC_CHA_TOR_INSERTS.IA_LLCPREFRFO",
+        "PerPkg": "1",
+        "PublicDescription": "TOR Inserts : LLCPrefRFO issued by iA Cores",
+        "UMask": "0xccc7ff01",
+        "Unit": "CHA"
+    },
+    {
+        "BriefDescription": "All locally initiated requests from IA Cores which miss the cache",
+        "EventCode": "0x35",
+        "EventName": "UNC_CHA_TOR_INSERTS.IA_MISS",
+        "PerPkg": "1",
+        "PublicDescription": "TOR Inserts : All requests from iA Cores that Missed the LLC",
+        "UMask": "0xc001fe01",
+        "Unit": "CHA"
+    },
+    {
+        "BriefDescription": "Code read from local IA that miss the cache",
+        "EventCode": "0x35",
+        "EventName": "UNC_CHA_TOR_INSERTS.IA_MISS_CRD",
+        "PerPkg": "1",
+        "PublicDescription": "TOR Inserts : CRds issued by iA Cores that Missed the LLC",
+        "UMask": "0xc80ffe01",
+        "Unit": "CHA"
+    },
+    {
+        "BriefDescription": "CRDs from local IA cores to locally homed memory",
+        "EventCode": "0x35",
+        "EventName": "UNC_CHA_TOR_INSERTS.IA_MISS_CRD_LOCAL",
+        "PerPkg": "1",
+        "PublicDescription": "TOR Inserts : CRd issued by iA Cores that Missed the LLC - HOMed locally",
+        "UMask": "0xc80efe01",
+        "Unit": "CHA"
+    },
+    {
+        "BriefDescription": "Code read prefetch from local IA that miss the cache",
+        "EventCode": "0x35",
+        "EventName": "UNC_CHA_TOR_INSERTS.IA_MISS_CRD_PREF",
+        "PerPkg": "1",
+        "PublicDescription": "TOR Inserts : CRd_Prefs issued by iA Cores that Missed the LLC",
+        "UMask": "0xc88ffe01",
+        "Unit": "CHA"
+    },
+    {
+        "BriefDescription": "CRD Prefetches from local IA cores to locally homed memory",
+        "EventCode": "0x35",
+        "EventName": "UNC_CHA_TOR_INSERTS.IA_MISS_CRD_PREF_LOCAL",
+        "PerPkg": "1",
+        "PublicDescription": "TOR Inserts : CRd_Prefs issued by iA Cores that Missed the LLC - HOMed locally",
+        "UMask": "0xc88efe01",
+        "Unit": "CHA"
+    },
+    {
+        "BriefDescription": "Data read opt from local IA that miss the cache",
+        "EventCode": "0x35",
+        "EventName": "UNC_CHA_TOR_INSERTS.IA_MISS_DRD_OPT",
+        "PerPkg": "1",
+        "PublicDescription": "TOR Inserts : DRd_Opt issued by iA Cores that missed the LLC",
+        "UMask": "0xc827fe01",
+        "Unit": "CHA"
+    },
+    {
+        "BriefDescription": "Inserts into the TOR from local IA cores which miss the LLC and snoop filter with the opcode DRd_Opt, and which target local memory",
+        "EventCode": "0x35",
+        "EventName": "UNC_CHA_TOR_INSERTS.IA_MISS_DRD_OPT_LOCAL",
+        "PerPkg": "1",
+        "PublicDescription": "TOR Inserts : DRd_Opt issued by iA Cores that Missed the LLC - HOMed locally",
+        "UMask": "0xc826fe01",
+        "Unit": "CHA"
+    },
+    {
+        "BriefDescription": "Data read opt prefetch from local IA that miss the cache",
+        "EventCode": "0x35",
+        "EventName": "UNC_CHA_TOR_INSERTS.IA_MISS_DRD_OPT_PREF",
+        "PerPkg": "1",
+        "PublicDescription": "TOR Inserts : DRd_Opt_Prefs issued by iA Cores that missed the LLC",
+        "UMask": "0xc8a7fe01",
+        "Unit": "CHA"
+    },
+    {
+        "BriefDescription": "Inserts into the TOR from local IA cores which miss the LLC and snoop filter with the opcode DRD_PREF_OPT, and target local memory",
+        "EventCode": "0x35",
+        "EventName": "UNC_CHA_TOR_INSERTS.IA_MISS_DRD_OPT_PREF_LOCAL",
+        "PerPkg": "1",
+        "PublicDescription": "TOR Inserts : DRd_Opt_Prefs issued by iA Cores that missed the LLC",
+        "UMask": "0xc8a6fe01",
+        "Unit": "CHA"
+    },
+    {
+        "BriefDescription": "ItoM requests from local IA cores that miss the cache",
+        "EventCode": "0x35",
+        "EventName": "UNC_CHA_TOR_INSERTS.IA_MISS_ITOM",
+        "PerPkg": "1",
+        "PublicDescription": "TOR Inserts : ItoMs issued by iA Cores that Missed LLC",
+        "UMask": "0xcc47fe01",
+        "Unit": "CHA"
+    },
+    {
+        "BriefDescription": "Last level cache prefetch code read from local IA that miss the cache",
+        "EventCode": "0x35",
+        "EventName": "UNC_CHA_TOR_INSERTS.IA_MISS_LLCPREFCODE",
+        "PerPkg": "1",
+        "PublicDescription": "TOR Inserts : LLCPrefCode issued by iA Cores that missed the LLC",
+        "UMask": "0xcccffe01",
+        "Unit": "CHA"
+    },
+    {
+        "BriefDescription": "Last level cache prefetch data read from local IA that miss the cache",
+        "EventCode": "0x35",
+        "EventName": "UNC_CHA_TOR_INSERTS.IA_MISS_LLCPREFDATA",
+        "PerPkg": "1",
+        "PublicDescription": "TOR Inserts : LLCPrefData issued by iA Cores that missed the LLC",
+        "UMask": "0xccd7fe01",
+        "Unit": "CHA"
+    },
+    {
+        "BriefDescription": "Last level cache prefetch read for ownership from local IA that miss the cache",
+        "EventCode": "0x35",
+        "EventName": "UNC_CHA_TOR_INSERTS.IA_MISS_LLCPREFRFO",
+        "PerPkg": "1",
+        "PublicDescription": "TOR Inserts : LLCPrefRFO issued by iA Cores that missed the LLC",
+        "UMask": "0xccc7fe01",
+        "Unit": "CHA"
+    },
+    {
+        "BriefDescription": "WCILF requests from local IA cores to locally homed DDR addresses that miss the cache",
+        "EventCode": "0x35",
+        "EventName": "UNC_CHA_TOR_INSERTS.IA_MISS_LOCAL_WCILF_DDR",
+        "PerPkg": "1",
+        "PublicDescription": "TOR Inserts : WCiLFs issued by iA Cores targeting DDR that missed the LLC - HOMed locally",
+        "UMask": "0xc8668601",
+        "Unit": "CHA"
+    },
+    {
+        "BriefDescription": "WCIL requests from local IA cores to locally homed DDR addresses that miss the cache",
+        "EventCode": "0x35",
+        "EventName": "UNC_CHA_TOR_INSERTS.IA_MISS_LOCAL_WCIL_DDR",
+        "PerPkg": "1",
+        "PublicDescription": "TOR Inserts : WCiLs issued by iA Cores targeting DDR that missed the LLC - HOMed locally",
+        "UMask": "0xc86e8601",
+        "Unit": "CHA"
+    },
+    {
+        "BriefDescription": "Read for ownership from local IA that miss the cache",
+        "EventCode": "0x35",
+        "EventName": "UNC_CHA_TOR_INSERTS.IA_MISS_RFO",
+        "PerPkg": "1",
+        "PublicDescription": "TOR Inserts : RFOs issued by iA Cores that Missed the LLC",
+        "UMask": "0xc807fe01",
+        "Unit": "CHA"
+    },
+    {
+        "BriefDescription": "Read for ownership from local IA that miss the cache",
+        "EventCode": "0x35",
+        "EventName": "UNC_CHA_TOR_INSERTS.IA_MISS_RFO_LOCAL",
+        "PerPkg": "1",
+        "PublicDescription": "TOR Inserts : RFOs issued by iA Cores that Missed the LLC - HOMed locally",
+        "UMask": "0xc806fe01",
+        "Unit": "CHA"
+    },
+    {
+        "BriefDescription": "Read for ownership prefetch from local IA that miss the cache",
+        "EventCode": "0x35",
+        "EventName": "UNC_CHA_TOR_INSERTS.IA_MISS_RFO_PREF",
+        "PerPkg": "1",
+        "PublicDescription": "TOR Inserts : RFO_Prefs issued by iA Cores that Missed the LLC",
+        "UMask": "0xc887fe01",
+        "Unit": "CHA"
+    },
+    {
+        "BriefDescription": "Read for ownership prefetch from local IA that miss the cache",
+        "EventCode": "0x35",
+        "EventName": "UNC_CHA_TOR_INSERTS.IA_MISS_RFO_PREF_LOCAL",
+        "PerPkg": "1",
+        "PublicDescription": "TOR Inserts : RFO_Prefs issued by iA Cores that Missed the LLC - HOMed locally",
+        "UMask": "0xc886fe01",
+        "Unit": "CHA"
+    },
+    {
+        "BriefDescription": "UCRDF requests from local IA cores that miss the cache",
+        "EventCode": "0x35",
+        "EventName": "UNC_CHA_TOR_INSERTS.IA_MISS_UCRDF",
+        "PerPkg": "1",
+        "PublicDescription": "TOR Inserts : UCRdFs issued by iA Cores that Missed LLC",
+        "UMask": "0xc877de01",
+        "Unit": "CHA"
+    },
+    {
+        "BriefDescription": "WCIL requests from a local IA core that miss the cache",
+        "EventCode": "0x35",
+        "EventName": "UNC_CHA_TOR_INSERTS.IA_MISS_WCIL",
+        "PerPkg": "1",
+        "PublicDescription": "TOR Inserts : WCiLs issued by iA Cores that Missed the LLC",
+        "UMask": "0xc86ffe01",
+        "Unit": "CHA"
+    },
+    {
+        "BriefDescription": "WCILF requests from local IA core that miss the cache",
+        "EventCode": "0x35",
+        "EventName": "UNC_CHA_TOR_INSERTS.IA_MISS_WCILF",
+        "PerPkg": "1",
+        "PublicDescription": "TOR Inserts : WCiLF issued by iA Cores that Missed the LLC",
+        "UMask": "0xc867fe01",
+        "Unit": "CHA"
+    },
+    {
+        "BriefDescription": "WCILF requests from local IA cores to DDR homed addresses which miss the cache",
+        "EventCode": "0x35",
+        "EventName": "UNC_CHA_TOR_INSERTS.IA_MISS_WCILF_DDR",
+        "PerPkg": "1",
+        "PublicDescription": "TOR Inserts : WCiLFs issued by iA Cores targeting DDR that missed the LLC",
+        "UMask": "0xc8678601",
+        "Unit": "CHA"
+    },
+    {
+        "BriefDescription": "WCIL requests from local IA cores to DDR homed addresses which miss the cache",
+        "EventCode": "0x35",
+        "EventName": "UNC_CHA_TOR_INSERTS.IA_MISS_WCIL_DDR",
+        "PerPkg": "1",
+        "PublicDescription": "TOR Inserts : WCiLs issued by iA Cores targeting DDR that missed the LLC",
+        "UMask": "0xc86f8601",
+        "Unit": "CHA"
+    },
+    {
+        "BriefDescription": "WIL requests from local IA cores that miss the cache",
+        "EventCode": "0x35",
+        "EventName": "UNC_CHA_TOR_INSERTS.IA_MISS_WIL",
+        "PerPkg": "1",
+        "PublicDescription": "TOR Inserts : WiLs issued by iA Cores that Missed LLC",
+        "UMask": "0xc87fde01",
+        "Unit": "CHA"
+    },
+    {
+        "BriefDescription": "Read for ownership from local IA that miss the cache",
+        "EventCode": "0x35",
+        "EventName": "UNC_CHA_TOR_INSERTS.IA_RFO",
+        "PerPkg": "1",
+        "PublicDescription": "TOR Inserts : RFOs issued by iA Cores",
+        "UMask": "0xc807ff01",
+        "Unit": "CHA"
+    },
+    {
+        "BriefDescription": "Read for ownership prefetch from local IA that miss the cache",
+        "EventCode": "0x35",
+        "EventName": "UNC_CHA_TOR_INSERTS.IA_RFO_PREF",
+        "PerPkg": "1",
+        "PublicDescription": "TOR Inserts : RFO_Prefs issued by iA Cores",
+        "UMask": "0xc887ff01",
+        "Unit": "CHA"
+    },
+    {
+        "BriefDescription": "SpecItoM events that are initiated from the Core",
+        "EventCode": "0x35",
+        "EventName": "UNC_CHA_TOR_INSERTS.IA_SPECITOM",
+        "PerPkg": "1",
+        "PublicDescription": "TOR Inserts : SpecItoMs issued by iA Cores",
+        "UMask": "0xcc57ff01",
+        "Unit": "CHA"
+    },
+    {
+        "BriefDescription": "WbEFtoEs issued by iA Cores.  (Non Modified Write Backs)",
+        "EventCode": "0x35",
+        "EventName": "UNC_CHA_TOR_INSERTS.IA_WBEFTOE",
+        "PerPkg": "1",
+        "PublicDescription": "TOR Inserts : ItoMs issued by IO Devices that Hit the LLC",
+        "UMask": "0xcc3fff01",
+        "Unit": "CHA"
+    },
+    {
+        "BriefDescription": "WbEFtoIs issued by iA Cores .  (Non Modified Write Backs)",
+        "EventCode": "0x35",
+        "EventName": "UNC_CHA_TOR_INSERTS.IA_WBEFTOI",
+        "PerPkg": "1",
+        "PublicDescription": "TOR Inserts : ItoMs issued by IO Devices that Hit the LLC",
+        "UMask": "0xcc37ff01",
+        "Unit": "CHA"
+    },
+    {
+        "BriefDescription": "WbMtoEs issued by iA Cores .  (Modified Write Backs)",
+        "EventCode": "0x35",
+        "EventName": "UNC_CHA_TOR_INSERTS.IA_WBMTOE",
+        "PerPkg": "1",
+        "PublicDescription": "TOR Inserts : ItoMs issued by IO Devices that Hit the LLC",
+        "UMask": "0xcc2fff01",
+        "Unit": "CHA"
+    },
+    {
+        "BriefDescription": "WbMtoI requests from local IA cores",
+        "EventCode": "0x35",
+        "EventName": "UNC_CHA_TOR_INSERTS.IA_WBMTOI",
+        "PerPkg": "1",
+        "PublicDescription": "TOR Inserts : WbMtoIs issued by iA Cores",
+        "UMask": "0xcc27ff01",
+        "Unit": "CHA"
+    },
+    {
+        "BriefDescription": "WbStoIs issued by iA Cores .  (Non Modified Write Backs)",
+        "EventCode": "0x35",
+        "EventName": "UNC_CHA_TOR_INSERTS.IA_WBSTOI",
+        "PerPkg": "1",
+        "PublicDescription": "TOR Inserts : ItoMs issued by IO Devices that Hit the LLC",
+        "UMask": "0xcc67ff01",
+        "Unit": "CHA"
+    },
+    {
+        "BriefDescription": "WCIL requests from a local IA core",
+        "EventCode": "0x35",
+        "EventName": "UNC_CHA_TOR_INSERTS.IA_WCIL",
+        "PerPkg": "1",
+        "PublicDescription": "TOR Inserts : WCiLs issued by iA Cores",
+        "UMask": "0xc86fff01",
+        "Unit": "CHA"
+    },
+    {
+        "BriefDescription": "WCILF requests from local IA core",
+        "EventCode": "0x35",
+        "EventName": "UNC_CHA_TOR_INSERTS.IA_WCILF",
+        "PerPkg": "1",
+        "PublicDescription": "TOR Inserts : WCiLF issued by iA Cores",
+        "UMask": "0xc867ff01",
+        "Unit": "CHA"
+    },
+    {
+        "BriefDescription": "All TOR inserts from local IO devices",
+        "EventCode": "0x35",
+        "EventName": "UNC_CHA_TOR_INSERTS.IO",
+        "PerPkg": "1",
+        "PublicDescription": "TOR Inserts : All requests from IO Devices",
+        "UMask": "0xc001ff04",
+        "Unit": "CHA"
+    },
+    {
+        "BriefDescription": "CLFlush requests from IO devices",
+        "EventCode": "0x35",
+        "EventName": "UNC_CHA_TOR_INSERTS.IO_CLFLUSH",
+        "PerPkg": "1",
+        "PublicDescription": "TOR Inserts : CLFlushes issued by IO Devices",
+        "UMask": "0xc8c3ff04",
+        "Unit": "CHA"
+    },
+    {
+        "BriefDescription": "All TOR inserts from local IO devices which hit the cache",
+        "EventCode": "0x35",
+        "EventName": "UNC_CHA_TOR_INSERTS.IO_HIT",
+        "PerPkg": "1",
+        "PublicDescription": "TOR Inserts : All requests from IO Devices that hit the LLC",
+        "UMask": "0xc001fd04",
+        "Unit": "CHA"
+    },
+    {
+        "BriefDescription": "ItoMs from local IO devices which hit the cache",
+        "EventCode": "0x35",
+        "EventName": "UNC_CHA_TOR_INSERTS.IO_HIT_ITOM",
+        "PerPkg": "1",
+        "PublicDescription": "TOR Inserts : ItoMs issued by IO Devices that Hit the LLC",
+        "UMask": "0xcc43fd04",
+        "Unit": "CHA"
+    },
+    {
+        "BriefDescription": "ItoMCacheNears, indicating a partial write request, from IO Devices that hit the LLC",
+        "EventCode": "0x35",
+        "EventName": "UNC_CHA_TOR_INSERTS.IO_HIT_ITOMCACHENEAR",
+        "PerPkg": "1",
+        "PublicDescription": "TOR Inserts : ItoMCacheNears, indicating a partial write request, from IO Devices that hit the LLC",
+        "UMask": "0xcd43fd04",
+        "Unit": "CHA"
+    },
+    {
+        "BriefDescription": "PCIRDCURs issued by IO devices which hit the LLC",
+        "EventCode": "0x35",
+        "EventName": "UNC_CHA_TOR_INSERTS.IO_HIT_PCIRDCUR",
+        "PerPkg": "1",
+        "PublicDescription": "TOR Inserts : PCIRdCurs issued by IO Devices that hit the LLC",
+        "UMask": "0xc8f3fd04",
+        "Unit": "CHA"
+    },
+    {
+        "BriefDescription": "RFOs from local IO devices which hit the cache",
+        "EventCode": "0x35",
+        "EventName": "UNC_CHA_TOR_INSERTS.IO_HIT_RFO",
+        "PerPkg": "1",
+        "PublicDescription": "TOR Inserts : RFOs issued by IO Devices that hit the LLC",
+        "UMask": "0xc803fd04",
+        "Unit": "CHA"
+    },
+    {
+        "BriefDescription": "All TOR ItoM inserts from local IO devices",
+        "EventCode": "0x35",
+        "EventName": "UNC_CHA_TOR_INSERTS.IO_ITOM",
+        "PerPkg": "1",
+        "PublicDescription": "TOR Inserts : ItoMs issued by IO Devices",
+        "UMask": "0xcc43ff04",
+        "Unit": "CHA"
+    },
+    {
+        "BriefDescription": "ItoMCacheNears, indicating a partial write request, from IO Devices",
+        "EventCode": "0x35",
+        "EventName": "UNC_CHA_TOR_INSERTS.IO_ITOMCACHENEAR",
+        "PerPkg": "1",
+        "PublicDescription": "TOR Inserts : ItoMCacheNears, indicating a partial write request, from IO Devices",
+        "UMask": "0xcd43ff04",
+        "Unit": "CHA"
+    },
+    {
+        "BriefDescription": "All TOR inserts from local IO devices which miss the cache",
+        "EventCode": "0x35",
+        "EventName": "UNC_CHA_TOR_INSERTS.IO_MISS",
+        "PerPkg": "1",
+        "PublicDescription": "TOR Inserts : All requests from IO Devices that missed the LLC",
+        "UMask": "0xc001fe04",
+        "Unit": "CHA"
+    },
+    {
+        "BriefDescription": "All TOR ItoM inserts from local IO devices which miss the cache",
+        "EventCode": "0x35",
+        "EventName": "UNC_CHA_TOR_INSERTS.IO_MISS_ITOM",
+        "PerPkg": "1",
+        "PublicDescription": "TOR Inserts : ItoMs issued by IO Devices that missed the LLC",
+        "UMask": "0xcc43fe04",
+        "Unit": "CHA"
+    },
+    {
+        "BriefDescription": "ItoMCacheNears, indicating a partial write request, from IO Devices that missed the LLC",
+        "EventCode": "0x35",
+        "EventName": "UNC_CHA_TOR_INSERTS.IO_MISS_ITOMCACHENEAR",
+        "PerPkg": "1",
+        "PublicDescription": "TOR Inserts : ItoMCacheNears, indicating a partial write request, from IO Devices that missed the LLC",
+        "UMask": "0xcd43fe04",
+        "Unit": "CHA"
+    },
+    {
+        "BriefDescription": "PCIRDCURs issued by IO devices which miss the LLC",
+        "EventCode": "0x35",
+        "EventName": "UNC_CHA_TOR_INSERTS.IO_MISS_PCIRDCUR",
+        "PerPkg": "1",
+        "PublicDescription": "TOR Inserts : PCIRdCurs issued by IO Devices that missed the LLC",
+        "UMask": "0xc8f3fe04",
+        "Unit": "CHA"
+    },
+    {
+        "BriefDescription": "All TOR RFO inserts from local IO devices which miss the cache",
+        "EventCode": "0x35",
+        "EventName": "UNC_CHA_TOR_INSERTS.IO_MISS_RFO",
+        "PerPkg": "1",
+        "PublicDescription": "TOR Inserts : RFOs issued by IO Devices that missed the LLC",
+        "UMask": "0xc803fe04",
+        "Unit": "CHA"
+    },
+    {
+        "BriefDescription": "PCIRDCURs issued by IO devices",
+        "EventCode": "0x35",
+        "EventName": "UNC_CHA_TOR_INSERTS.IO_PCIRDCUR",
+        "PerPkg": "1",
+        "PublicDescription": "TOR Inserts : PCIRdCurs issued by IO Devices",
+        "UMask": "0xc8f3ff04",
+        "Unit": "CHA"
+    },
+    {
+        "BriefDescription": "RFOs from local IO devices",
+        "EventCode": "0x35",
+        "EventName": "UNC_CHA_TOR_INSERTS.IO_RFO",
+        "PerPkg": "1",
+        "PublicDescription": "TOR Inserts : RFOs issued by IO Devices",
+        "UMask": "0xc803ff04",
+        "Unit": "CHA"
+    },
+    {
+        "BriefDescription": "WBMtoI requests from IO devices",
+        "EventCode": "0x35",
+        "EventName": "UNC_CHA_TOR_INSERTS.IO_WBMTOI",
+        "PerPkg": "1",
+        "PublicDescription": "TOR Inserts : WbMtoIs issued by IO Devices",
+        "UMask": "0xcc23ff04",
+        "Unit": "CHA"
+    },
+    {
+        "BriefDescription": "TOR Inserts for SF or LLC Evictions",
+        "EventCode": "0x35",
+        "EventName": "UNC_CHA_TOR_INSERTS.LLC_OR_SF_EVICTIONS",
+        "PerPkg": "1",
+        "PublicDescription": "TOR allocation occurred as a result of SF/LLC evictions (came from the ISMQ)",
+        "UMask": "0xc001ff02",
+        "Unit": "CHA"
+    },
+    {
+        "BriefDescription": "All locally initiated requests",
+        "EventCode": "0x35",
+        "EventName": "UNC_CHA_TOR_INSERTS.LOC_ALL",
+        "PerPkg": "1",
+        "PublicDescription": "TOR Inserts : All from Local iA and IO",
+        "UMask": "0xc000ff05",
+        "Unit": "CHA"
+    },
+    {
+        "BriefDescription": "All from Local iA",
+        "EventCode": "0x35",
+        "EventName": "UNC_CHA_TOR_INSERTS.LOC_IA",
+        "PerPkg": "1",
+        "PublicDescription": "TOR Inserts : All from Local iA",
+        "UMask": "0xc000ff01",
+        "Unit": "CHA"
+    },
+    {
+        "BriefDescription": "All from Local IO",
+        "EventCode": "0x35",
+        "EventName": "UNC_CHA_TOR_INSERTS.LOC_IO",
+        "PerPkg": "1",
+        "PublicDescription": "TOR Inserts : All from Local IO",
+        "UMask": "0xc000ff04",
+        "Unit": "CHA"
+    },
+    {
+        "BriefDescription": "Occupancy for all TOR entries",
+        "EventCode": "0x36",
+        "EventName": "UNC_CHA_TOR_OCCUPANCY.ALL",
+        "PerPkg": "1",
+        "PublicDescription": "TOR Occupancy : All",
+        "UMask": "0xc001ffff",
+        "Unit": "CHA"
+    },
+    {
+        "BriefDescription": "TOR Occupancy for All locally initiated requests from IA Cores",
+        "EventCode": "0x36",
+        "EventName": "UNC_CHA_TOR_OCCUPANCY.IA",
+        "PerPkg": "1",
+        "PublicDescription": "TOR Occupancy : All requests from iA Cores",
+        "UMask": "0xc001ff01",
+        "Unit": "CHA"
+    },
+    {
+        "BriefDescription": "TOR Occupancy for CLFlush events that are initiated from the Core",
+        "EventCode": "0x36",
+        "EventName": "UNC_CHA_TOR_OCCUPANCY.IA_CLFLUSH",
+        "PerPkg": "1",
+        "PublicDescription": "TOR Occupancy : CLFlushes issued by iA Cores",
+        "UMask": "0xc8c7ff01",
+        "Unit": "CHA"
+    },
+    {
+        "BriefDescription": "TOR Occupancy for CLFlushOpt events that are initiated from the Core",
+        "EventCode": "0x36",
+        "EventName": "UNC_CHA_TOR_OCCUPANCY.IA_CLFLUSHOPT",
+        "PerPkg": "1",
+        "PublicDescription": "TOR Occupancy : CLFlushOpts issued by iA Cores",
+        "UMask": "0xc8d7ff01",
+        "Unit": "CHA"
+    },
+    {
+        "BriefDescription": "TOR Occupancy for Code read from local IA that miss the cache",
+        "EventCode": "0x36",
+        "EventName": "UNC_CHA_TOR_OCCUPANCY.IA_CRD",
+        "PerPkg": "1",
+        "PublicDescription": "TOR Occupancy : CRDs issued by iA Cores",
+        "UMask": "0xc80fff01",
+        "Unit": "CHA"
+    },
+    {
+        "BriefDescription": "TOR Occupancy for Code read prefetch from local IA that miss the cache",
+        "EventCode": "0x36",
+        "EventName": "UNC_CHA_TOR_OCCUPANCY.IA_CRD_PREF",
+        "PerPkg": "1",
+        "PublicDescription": "TOR Occupancy; Code read prefetch from local IA that misses in the snoop filter",
+        "UMask": "0xc88fff01",
+        "Unit": "CHA"
+    },
+    {
+        "BriefDescription": "TOR Occupancy for Data read opt prefetch from local IA that miss the cache",
+        "EventCode": "0x36",
+        "EventName": "UNC_CHA_TOR_OCCUPANCY.IA_DRD_OPT_PREF",
+        "PerPkg": "1",
+        "PublicDescription": "TOR Occupancy : DRd_Opt_Prefs issued by iA Cores",
+        "UMask": "0xc8a7ff01",
+        "Unit": "CHA"
+    },
+    {
+        "BriefDescription": "TOR Occupancy for All locally initiated requests from IA Cores which hit the cache",
+        "EventCode": "0x36",
+        "EventName": "UNC_CHA_TOR_OCCUPANCY.IA_HIT",
+        "PerPkg": "1",
+        "PublicDescription": "TOR Occupancy : All requests from iA Cores that Hit the LLC",
+        "UMask": "0xc001fd01",
+        "Unit": "CHA"
+    },
+    {
+        "BriefDescription": "TOR Occupancy for Code read from local IA that hit the cache",
+        "EventCode": "0x36",
+        "EventName": "UNC_CHA_TOR_OCCUPANCY.IA_HIT_CRD",
+        "PerPkg": "1",
+        "PublicDescription": "TOR Occupancy : CRds issued by iA Cores that Hit the LLC",
+        "UMask": "0xc80ffd01",
+        "Unit": "CHA"
+    },
+    {
+        "BriefDescription": "TOR Occupancy for Code read prefetch from local IA that hit the cache",
+        "EventCode": "0x36",
+        "EventName": "UNC_CHA_TOR_OCCUPANCY.IA_HIT_CRD_PREF",
+        "PerPkg": "1",
+        "PublicDescription": "TOR Occupancy : CRd_Prefs issued by iA Cores that hit the LLC",
+        "UMask": "0xc88ffd01",
+        "Unit": "CHA"
+    },
+    {
+        "BriefDescription": "TOR Occupancy for Data read opt prefetch from local IA that hit the cache",
+        "EventCode": "0x36",
+        "EventName": "UNC_CHA_TOR_OCCUPANCY.IA_HIT_DRD_OPT_PREF",
+        "PerPkg": "1",
+        "PublicDescription": "TOR Occupancy : DRd_Opt_Prefs issued by iA Cores that hit the LLC",
+        "UMask": "0xc8a7fd01",
+        "Unit": "CHA"
+    },
+    {
+        "BriefDescription": "TOR Occupancy for ItoM requests from local IA cores that hit the cache",
+        "EventCode": "0x36",
+        "EventName": "UNC_CHA_TOR_OCCUPANCY.IA_HIT_ITOM",
+        "PerPkg": "1",
+        "PublicDescription": "TOR Occupancy : ItoMs issued by iA Cores that Hit LLC",
+        "UMask": "0xcc47fd01",
+        "Unit": "CHA"
+    },
+    {
+        "BriefDescription": "TOR Occupancy for Last level cache prefetch code read from local IA that hit the cache",
+        "EventCode": "0x36",
+        "EventName": "UNC_CHA_TOR_OCCUPANCY.IA_HIT_LLCPREFCODE",
+        "PerPkg": "1",
+        "PublicDescription": "TOR Occupancy : LLCPrefCode issued by iA Cores that hit the LLC",
+        "UMask": "0xcccffd01",
+        "Unit": "CHA"
+    },
+    {
+        "BriefDescription": "TOR Occupancy for Last level cache prefetch data read from local IA that hit the cache",
+        "EventCode": "0x36",
+        "EventName": "UNC_CHA_TOR_OCCUPANCY.IA_HIT_LLCPREFDATA",
+        "PerPkg": "1",
+        "PublicDescription": "TOR Occupancy : LLCPrefData issued by iA Cores that hit the LLC",
+        "UMask": "0xccd7fd01",
+        "Unit": "CHA"
+    },
+    {
+        "BriefDescription": "TOR Occupancy for Last level cache prefetch read for ownership from local IA that hit the cache",
+        "EventCode": "0x36",
+        "EventName": "UNC_CHA_TOR_OCCUPANCY.IA_HIT_LLCPREFRFO",
+        "PerPkg": "1",
+        "PublicDescription": "TOR Occupancy : LLCPrefRFO issued by iA Cores that hit the LLC",
+        "UMask": "0xccc7fd01",
+        "Unit": "CHA"
+    },
+    {
+        "BriefDescription": "TOR Occupancy for Read for ownership from local IA that hit the cache",
+        "EventCode": "0x36",
+        "EventName": "UNC_CHA_TOR_OCCUPANCY.IA_HIT_RFO",
+        "PerPkg": "1",
+        "PublicDescription": "TOR Occupancy : RFOs issued by iA Cores that Hit the LLC",
+        "UMask": "0xc807fd01",
+        "Unit": "CHA"
+    },
+    {
+        "BriefDescription": "TOR Occupancy for Read for ownership prefetch from local IA that hit the cache",
+        "EventCode": "0x36",
+        "EventName": "UNC_CHA_TOR_OCCUPANCY.IA_HIT_RFO_PREF",
+        "PerPkg": "1",
+        "PublicDescription": "TOR Occupancy : RFO_Prefs issued by iA Cores that Hit the LLC",
+        "UMask": "0xc887fd01",
+        "Unit": "CHA"
+    },
+    {
+        "BriefDescription": "TOR Occupancy for ItoM events that are initiated from the Core",
+        "EventCode": "0x36",
+        "EventName": "UNC_CHA_TOR_OCCUPANCY.IA_ITOM",
+        "PerPkg": "1",
+        "PublicDescription": "TOR Occupancy : ItoMs issued by iA Cores",
+        "UMask": "0xcc47ff01",
+        "Unit": "CHA"
+    },
+    {
+        "BriefDescription": "TOR Occupancy for ItoMCacheNear requests from local IA cores",
+        "EventCode": "0x36",
+        "EventName": "UNC_CHA_TOR_OCCUPANCY.IA_ITOMCACHENEAR",
+        "PerPkg": "1",
+        "PublicDescription": "TOR Occupancy : ItoMCacheNears issued by iA Cores",
+        "UMask": "0xcd47ff01",
+        "Unit": "CHA"
+    },
+    {
+        "BriefDescription": "TOR Occupancy for Last level cache prefetch code read from local IA.",
+        "EventCode": "0x36",
+        "EventName": "UNC_CHA_TOR_OCCUPANCY.IA_LLCPREFCODE",
+        "PerPkg": "1",
+        "PublicDescription": "TOR Occupancy : LLCPrefCode issued by iA Cores",
+        "UMask": "0xcccfff01",
+        "Unit": "CHA"
+    },
+    {
+        "BriefDescription": "TOR Occupancy for Last level cache prefetch data read from local IA.",
+        "EventCode": "0x36",
+        "EventName": "UNC_CHA_TOR_OCCUPANCY.IA_LLCPREFDATA",
+        "PerPkg": "1",
+        "PublicDescription": "TOR Occupancy : LLCPrefData issued by iA Cores",
+        "UMask": "0xccd7ff01",
+        "Unit": "CHA"
+    },
+    {
+        "BriefDescription": "TOR Occupancy for Last level cache prefetch read for ownership from local IA that miss the cache",
+        "EventCode": "0x36",
+        "EventName": "UNC_CHA_TOR_OCCUPANCY.IA_LLCPREFRFO",
+        "PerPkg": "1",
+        "PublicDescription": "TOR Occupancy : LLCPrefRFO issued by iA Cores",
+        "UMask": "0xccc7ff01",
+        "Unit": "CHA"
+    },
+    {
+        "BriefDescription": "TOR Occupancy for All locally initiated requests from IA Cores which miss the cache",
+        "EventCode": "0x36",
+        "EventName": "UNC_CHA_TOR_OCCUPANCY.IA_MISS",
+        "PerPkg": "1",
+        "PublicDescription": "TOR Occupancy : All requests from iA Cores that Missed the LLC",
+        "UMask": "0xc001fe01",
+        "Unit": "CHA"
+    },
+    {
+        "BriefDescription": "TOR Occupancy for Code read from local IA that miss the cache",
+        "EventCode": "0x36",
+        "EventName": "UNC_CHA_TOR_OCCUPANCY.IA_MISS_CRD",
+        "PerPkg": "1",
+        "PublicDescription": "TOR Occupancy : CRds issued by iA Cores that Missed the LLC",
+        "UMask": "0xc80ffe01",
+        "Unit": "CHA"
+    },
+    {
+        "BriefDescription": "TOR Occupancy for CRDs from local IA cores to locally homed memory",
+        "EventCode": "0x36",
+        "EventName": "UNC_CHA_TOR_OCCUPANCY.IA_MISS_CRD_LOCAL",
+        "PerPkg": "1",
+        "PublicDescription": "TOR Occupancy : CRd issued by iA Cores that Missed the LLC - HOMed locally",
+        "UMask": "0xc80efe01",
+        "Unit": "CHA"
+    },
+    {
+        "BriefDescription": "TOR Occupancy for Code read prefetch from local IA that miss the cache",
+        "EventCode": "0x36",
+        "EventName": "UNC_CHA_TOR_OCCUPANCY.IA_MISS_CRD_PREF",
+        "PerPkg": "1",
+        "PublicDescription": "TOR Occupancy : CRd_Prefs issued by iA Cores that Missed the LLC",
+        "UMask": "0xc88ffe01",
+        "Unit": "CHA"
+    },
+    {
+        "BriefDescription": "TOR Occupancy for CRD Prefetches from local IA cores to locally homed memory",
+        "EventCode": "0x36",
+        "EventName": "UNC_CHA_TOR_OCCUPANCY.IA_MISS_CRD_PREF_LOCAL",
+        "PerPkg": "1",
+        "PublicDescription": "TOR Occupancy : CRd_Prefs issued by iA Cores that Missed the LLC - HOMed locally",
+        "UMask": "0xc88efe01",
+        "Unit": "CHA"
+    },
+    {
+        "BriefDescription": "TOR Occupancy for Data read opt prefetch from local IA that miss the cache",
+        "EventCode": "0x36",
+        "EventName": "UNC_CHA_TOR_OCCUPANCY.IA_MISS_DRD_OPT_PREF",
+        "PerPkg": "1",
+        "PublicDescription": "TOR Occupancy : DRd_Opt_Prefs issued by iA Cores that missed the LLC",
+        "UMask": "0xc8a7fe01",
+        "Unit": "CHA"
+    },
+    {
+        "BriefDescription": "TOR Occupancy for ItoM requests from local IA cores that miss the cache",
+        "EventCode": "0x36",
+        "EventName": "UNC_CHA_TOR_OCCUPANCY.IA_MISS_ITOM",
+        "PerPkg": "1",
+        "PublicDescription": "TOR Occupancy : ItoMs issued by iA Cores that Missed LLC",
+        "UMask": "0xcc47fe01",
+        "Unit": "CHA"
+    },
+    {
+        "BriefDescription": "TOR Occupancy for Last level cache prefetch code read from local IA that miss the cache",
+        "EventCode": "0x36",
+        "EventName": "UNC_CHA_TOR_OCCUPANCY.IA_MISS_LLCPREFCODE",
+        "PerPkg": "1",
+        "PublicDescription": "TOR Occupancy : LLCPrefCode issued by iA Cores that missed the LLC",
+        "UMask": "0xcccffe01",
+        "Unit": "CHA"
+    },
+    {
+        "BriefDescription": "TOR Occupancy for Last level cache prefetch data read from local IA that miss the cache",
+        "EventCode": "0x36",
+        "EventName": "UNC_CHA_TOR_OCCUPANCY.IA_MISS_LLCPREFDATA",
+        "PerPkg": "1",
+        "PublicDescription": "TOR Occupancy : LLCPrefData issued by iA Cores that missed the LLC",
+        "UMask": "0xccd7fe01",
+        "Unit": "CHA"
+    },
+    {
+        "BriefDescription": "TOR Occupancy for Last level cache prefetch read for ownership from local IA that miss the cache",
+        "EventCode": "0x36",
+        "EventName": "UNC_CHA_TOR_OCCUPANCY.IA_MISS_LLCPREFRFO",
+        "PerPkg": "1",
+        "PublicDescription": "TOR Occupancy : LLCPrefRFO issued by iA Cores that missed the LLC",
+        "UMask": "0xccc7fe01",
+        "Unit": "CHA"
+    },
+    {
+        "BriefDescription": "TOR Occupancy for WCILF requests from local IA cores to locally homed DDR addresses that miss the cache",
+        "EventCode": "0x36",
+        "EventName": "UNC_CHA_TOR_OCCUPANCY.IA_MISS_LOCAL_WCILF_DDR",
+        "PerPkg": "1",
+        "PublicDescription": "TOR Occupancy : WCiLFs issued by iA Cores targeting DDR that missed the LLC - HOMed locally",
+        "UMask": "0xc8668601",
+        "Unit": "CHA"
+    },
+    {
+        "BriefDescription": "TOR Occupancy for WCIL requests from local IA cores to locally homed DDR addresses that miss the cache",
+        "EventCode": "0x36",
+        "EventName": "UNC_CHA_TOR_OCCUPANCY.IA_MISS_LOCAL_WCIL_DDR",
+        "PerPkg": "1",
+        "PublicDescription": "TOR Occupancy : WCiLs issued by iA Cores targeting DDR that missed the LLC - HOMed locally",
+        "UMask": "0xc86e8601",
+        "Unit": "CHA"
+    },
+    {
+        "BriefDescription": "TOR Occupancy for Read for ownership from local IA that miss the cache",
+        "EventCode": "0x36",
+        "EventName": "UNC_CHA_TOR_OCCUPANCY.IA_MISS_RFO",
+        "PerPkg": "1",
+        "PublicDescription": "TOR Occupancy : RFOs issued by iA Cores that Missed the LLC",
+        "UMask": "0xc807fe01",
+        "Unit": "CHA"
+    },
+    {
+        "BriefDescription": "TOR Occupancy for Read for ownership from local IA that miss the cache",
+        "EventCode": "0x36",
+        "EventName": "UNC_CHA_TOR_OCCUPANCY.IA_MISS_RFO_LOCAL",
+        "PerPkg": "1",
+        "PublicDescription": "TOR Occupancy : RFOs issued by iA Cores that Missed the LLC - HOMed locally",
+        "UMask": "0xc806fe01",
+        "Unit": "CHA"
+    },
+    {
+        "BriefDescription": "TOR Occupancy for Read for ownership prefetch from local IA that miss the cache",
+        "EventCode": "0x36",
+        "EventName": "UNC_CHA_TOR_OCCUPANCY.IA_MISS_RFO_PREF",
+        "PerPkg": "1",
+        "PublicDescription": "TOR Occupancy : RFO_Prefs issued by iA Cores that Missed the LLC",
+        "UMask": "0xc887fe01",
+        "Unit": "CHA"
+    },
+    {
+        "BriefDescription": "TOR Occupancy for Read for ownership prefetch from local IA that miss the cache",
+        "EventCode": "0x36",
+        "EventName": "UNC_CHA_TOR_OCCUPANCY.IA_MISS_RFO_PREF_LOCAL",
+        "PerPkg": "1",
+        "PublicDescription": "TOR Occupancy : RFO_Prefs issued by iA Cores that Missed the LLC - HOMed locally",
+        "UMask": "0xc886fe01",
+        "Unit": "CHA"
+    },
+    {
+        "BriefDescription": "TOR Occupancy for UCRDF requests from local IA cores that miss the cache",
+        "EventCode": "0x36",
+        "EventName": "UNC_CHA_TOR_OCCUPANCY.IA_MISS_UCRDF",
+        "PerPkg": "1",
+        "PublicDescription": "TOR Occupancy : UCRdFs issued by iA Cores that Missed LLC",
+        "UMask": "0xc877de01",
+        "Unit": "CHA"
+    },
+    {
+        "BriefDescription": "TOR Occupancy for WCIL requests from a local IA core that miss the cache",
+        "EventCode": "0x36",
+        "EventName": "UNC_CHA_TOR_OCCUPANCY.IA_MISS_WCIL",
+        "PerPkg": "1",
+        "PublicDescription": "TOR Occupancy : WCiLs issued by iA Cores that Missed the LLC",
+        "UMask": "0xc86ffe01",
+        "Unit": "CHA"
+    },
+    {
+        "BriefDescription": "TOR Occupancy for WCILF requests from local IA core that miss the cache",
+        "EventCode": "0x36",
+        "EventName": "UNC_CHA_TOR_OCCUPANCY.IA_MISS_WCILF",
+        "PerPkg": "1",
+        "PublicDescription": "TOR Occupancy : WCiLF issued by iA Cores that Missed the LLC",
+        "UMask": "0xc867fe01",
+        "Unit": "CHA"
+    },
+    {
+        "BriefDescription": "TOR Occupancy for WCILF requests from local IA cores to DDR homed addresses which miss the cache",
+        "EventCode": "0x36",
+        "EventName": "UNC_CHA_TOR_OCCUPANCY.IA_MISS_WCILF_DDR",
+        "PerPkg": "1",
+        "PublicDescription": "TOR Occupancy : WCiLFs issued by iA Cores targeting DDR that missed the LLC",
+        "UMask": "0xc8678601",
+        "Unit": "CHA"
+    },
+    {
+        "BriefDescription": "TOR Occupancy for WCIL requests from local IA cores to DDR homed addresses which miss the cache",
+        "EventCode": "0x36",
+        "EventName": "UNC_CHA_TOR_OCCUPANCY.IA_MISS_WCIL_DDR",
+        "PerPkg": "1",
+        "PublicDescription": "TOR Occupancy : WCiLs issued by iA Cores targeting DDR that missed the LLC",
+        "UMask": "0xc86f8601",
+        "Unit": "CHA"
+    },
+    {
+        "BriefDescription": "TOR Occupancy for WIL requests from local IA cores that miss the cache",
+        "EventCode": "0x36",
+        "EventName": "UNC_CHA_TOR_OCCUPANCY.IA_MISS_WIL",
+        "PerPkg": "1",
+        "PublicDescription": "TOR Occupancy : WiLs issued by iA Cores that Missed LLC",
+        "UMask": "0xc87fde01",
+        "Unit": "CHA"
+    },
+    {
+        "BriefDescription": "TOR Occupancy for Read for ownership from local IA that miss the cache",
+        "EventCode": "0x36",
+        "EventName": "UNC_CHA_TOR_OCCUPANCY.IA_RFO",
+        "PerPkg": "1",
+        "PublicDescription": "TOR Occupancy : RFOs issued by iA Cores",
+        "UMask": "0xc807ff01",
+        "Unit": "CHA"
+    },
+    {
+        "BriefDescription": "TOR Occupancy for Read for ownership prefetch from local IA that miss the cache",
+        "EventCode": "0x36",
+        "EventName": "UNC_CHA_TOR_OCCUPANCY.IA_RFO_PREF",
+        "PerPkg": "1",
+        "PublicDescription": "TOR Occupancy : RFO_Prefs issued by iA Cores",
+        "UMask": "0xc887ff01",
+        "Unit": "CHA"
+    },
+    {
+        "BriefDescription": "TOR Occupancy for SpecItoM events that are initiated from the Core",
+        "EventCode": "0x36",
+        "EventName": "UNC_CHA_TOR_OCCUPANCY.IA_SPECITOM",
+        "PerPkg": "1",
+        "PublicDescription": "TOR Occupancy : SpecItoMs issued by iA Cores",
+        "UMask": "0xcc57ff01",
+        "Unit": "CHA"
+    },
+    {
+        "BriefDescription": "TOR Occupancy for WbMtoI requests from local IA cores",
+        "EventCode": "0x36",
+        "EventName": "UNC_CHA_TOR_OCCUPANCY.IA_WBMTOI",
+        "PerPkg": "1",
+        "PublicDescription": "TOR Occupancy : WbMtoIs issued by iA Cores",
+        "UMask": "0xcc27ff01",
+        "Unit": "CHA"
+    },
+    {
+        "BriefDescription": "TOR Occupancy for WCIL requests from a local IA core",
+        "EventCode": "0x36",
+        "EventName": "UNC_CHA_TOR_OCCUPANCY.IA_WCIL",
+        "PerPkg": "1",
+        "PublicDescription": "TOR Occupancy : WCiLs issued by iA Cores",
+        "UMask": "0xc86fff01",
+        "Unit": "CHA"
+    },
+    {
+        "BriefDescription": "TOR Occupancy for WCILF requests from local IA core",
+        "EventCode": "0x36",
+        "EventName": "UNC_CHA_TOR_OCCUPANCY.IA_WCILF",
+        "PerPkg": "1",
+        "PublicDescription": "TOR Occupancy : WCiLF issued by iA Cores",
+        "UMask": "0xc867ff01",
+        "Unit": "CHA"
+    },
+    {
+        "BriefDescription": "TOR Occupancy for All TOR inserts from local IO devices",
+        "EventCode": "0x36",
+        "EventName": "UNC_CHA_TOR_OCCUPANCY.IO",
+        "PerPkg": "1",
+        "PublicDescription": "TOR Occupancy : All requests from IO Devices",
+        "UMask": "0xc001ff04",
+        "Unit": "CHA"
+    },
+    {
+        "BriefDescription": "TOR Occupancy for CLFlush requests from IO devices",
+        "EventCode": "0x36",
+        "EventName": "UNC_CHA_TOR_OCCUPANCY.IO_CLFLUSH",
+        "PerPkg": "1",
+        "PublicDescription": "TOR Occupancy : CLFlushes issued by IO Devices",
+        "UMask": "0xc8c3ff04",
+        "Unit": "CHA"
+    },
+    {
+        "BriefDescription": "TOR Occupancy for All TOR inserts from local IO devices which hit the cache",
+        "EventCode": "0x36",
+        "EventName": "UNC_CHA_TOR_OCCUPANCY.IO_HIT",
+        "PerPkg": "1",
+        "PublicDescription": "TOR Occupancy : All requests from IO Devices that hit the LLC",
+        "UMask": "0xc001fd04",
+        "Unit": "CHA"
+    },
+    {
+        "BriefDescription": "TOR Occupancy for ItoMs from local IO devices which hit the cache",
+        "EventCode": "0x36",
+        "EventName": "UNC_CHA_TOR_OCCUPANCY.IO_HIT_ITOM",
+        "PerPkg": "1",
+        "PublicDescription": "TOR Occupancy : ItoMs issued by IO Devices that Hit the LLC",
+        "UMask": "0xcc43fd04",
+        "Unit": "CHA"
+    },
+    {
+        "BriefDescription": "TOR Occupancy for ItoMCacheNears, indicating a partial write request, from IO Devices that hit the LLC",
+        "EventCode": "0x36",
+        "EventName": "UNC_CHA_TOR_OCCUPANCY.IO_HIT_ITOMCACHENEAR",
+        "PerPkg": "1",
+        "PublicDescription": "TOR Occupancy : ItoMCacheNears, indicating a partial write request, from IO Devices that hit the LLC",
+        "UMask": "0xcd43fd04",
+        "Unit": "CHA"
+    },
+    {
+        "BriefDescription": "TOR Occupancy for PCIRDCURs issued by IO devices which hit the LLC",
+        "EventCode": "0x36",
+        "EventName": "UNC_CHA_TOR_OCCUPANCY.IO_HIT_PCIRDCUR",
+        "PerPkg": "1",
+        "PublicDescription": "TOR Occupancy : PCIRdCurs issued by IO Devices that hit the LLC",
+        "UMask": "0xc8f3fd04",
+        "Unit": "CHA"
+    },
+    {
+        "BriefDescription": "TOR Occupancy for RFOs from local IO devices which hit the cache",
+        "EventCode": "0x36",
+        "EventName": "UNC_CHA_TOR_OCCUPANCY.IO_HIT_RFO",
+        "PerPkg": "1",
+        "PublicDescription": "TOR Occupancy : RFOs issued by IO Devices that hit the LLC",
+        "UMask": "0xc803fd04",
+        "Unit": "CHA"
+    },
+    {
+        "BriefDescription": "TOR Occupancy for All TOR ItoM inserts from local IO devices",
+        "EventCode": "0x36",
+        "EventName": "UNC_CHA_TOR_OCCUPANCY.IO_ITOM",
+        "PerPkg": "1",
+        "PublicDescription": "TOR Occupancy : ItoMs issued by IO Devices",
+        "UMask": "0xcc43ff04",
+        "Unit": "CHA"
+    },
+    {
+        "BriefDescription": "TOR Occupancy for ItoMCacheNears, indicating a partial write request, from IO Devices",
+        "EventCode": "0x36",
+        "EventName": "UNC_CHA_TOR_OCCUPANCY.IO_ITOMCACHENEAR",
+        "PerPkg": "1",
+        "PublicDescription": "TOR Occupancy : ItoMCacheNears, indicating a partial write request, from IO Devices",
+        "UMask": "0xcd43ff04",
+        "Unit": "CHA"
+    },
+    {
+        "BriefDescription": "TOR Occupancy for All TOR inserts from local IO devices which miss the cache",
+        "EventCode": "0x36",
+        "EventName": "UNC_CHA_TOR_OCCUPANCY.IO_MISS",
+        "PerPkg": "1",
+        "PublicDescription": "TOR Occupancy : All requests from IO Devices that missed the LLC",
+        "UMask": "0xc001fe04",
+        "Unit": "CHA"
+    },
+    {
+        "BriefDescription": "TOR Occupancy for All TOR ItoM inserts from local IO devices which miss the cache",
+        "EventCode": "0x36",
+        "EventName": "UNC_CHA_TOR_OCCUPANCY.IO_MISS_ITOM",
+        "PerPkg": "1",
+        "PublicDescription": "TOR Occupancy : ItoMs issued by IO Devices that missed the LLC",
+        "UMask": "0xcc43fe04",
+        "Unit": "CHA"
+    },
+    {
+        "BriefDescription": "TOR Occupancy for ItoMCacheNears, indicating a partial write request, from IO Devices that missed the LLC",
+        "EventCode": "0x36",
+        "EventName": "UNC_CHA_TOR_OCCUPANCY.IO_MISS_ITOMCACHENEAR",
+        "PerPkg": "1",
+        "PublicDescription": "TOR Occupancy : ItoMCacheNears, indicating a partial write request, from IO Devices that missed the LLC",
+        "UMask": "0xcd43fe04",
+        "Unit": "CHA"
+    },
+    {
+        "BriefDescription": "TOR Occupancy for PCIRDCURs issued by IO devices which miss the LLC",
+        "EventCode": "0x36",
+        "EventName": "UNC_CHA_TOR_OCCUPANCY.IO_MISS_PCIRDCUR",
+        "PerPkg": "1",
+        "PublicDescription": "TOR Occupancy : PCIRdCurs issued by IO Devices that missed the LLC",
+        "UMask": "0xc8f3fe04",
+        "Unit": "CHA"
+    },
+    {
+        "BriefDescription": "TOR Occupancy for All TOR RFO inserts from local IO devices which miss the cache",
+        "EventCode": "0x36",
+        "EventName": "UNC_CHA_TOR_OCCUPANCY.IO_MISS_RFO",
+        "PerPkg": "1",
+        "PublicDescription": "TOR Occupancy : RFOs issued by IO Devices that missed the LLC",
+        "UMask": "0xc803fe04",
+        "Unit": "CHA"
+    },
+    {
+        "BriefDescription": "TOR Occupancy for PCIRDCURs issued by IO devices",
+        "EventCode": "0x36",
+        "EventName": "UNC_CHA_TOR_OCCUPANCY.IO_PCIRDCUR",
+        "PerPkg": "1",
+        "PublicDescription": "TOR Occupancy : PCIRdCurs issued by IO Devices",
+        "UMask": "0xc8f3ff04",
+        "Unit": "CHA"
+    },
+    {
+        "BriefDescription": "TOR Occupancy for RFOs from local IO devices",
+        "EventCode": "0x36",
+        "EventName": "UNC_CHA_TOR_OCCUPANCY.IO_RFO",
+        "PerPkg": "1",
+        "PublicDescription": "TOR Occupancy : RFOs issued by IO Devices",
+        "UMask": "0xc803ff04",
+        "Unit": "CHA"
+    },
+    {
+        "BriefDescription": "TOR Occupancy for WBMtoI requests from IO devices",
+        "EventCode": "0x36",
+        "EventName": "UNC_CHA_TOR_OCCUPANCY.IO_WBMTOI",
+        "PerPkg": "1",
+        "PublicDescription": "TOR Occupancy : WbMtoIs issued by IO Devices",
+        "UMask": "0xcc23ff04",
+        "Unit": "CHA"
+    },
+    {
+        "BriefDescription": "TOR Occupancy for All locally initiated requests",
+        "EventCode": "0x36",
+        "EventName": "UNC_CHA_TOR_OCCUPANCY.LOC_ALL",
+        "PerPkg": "1",
+        "PublicDescription": "TOR Occupancy : All from Local iA and IO",
+        "UMask": "0xc000ff05",
+        "Unit": "CHA"
+    },
+    {
+        "BriefDescription": "TOR Occupancy for All from Local iA",
+        "EventCode": "0x36",
+        "EventName": "UNC_CHA_TOR_OCCUPANCY.LOC_IA",
+        "PerPkg": "1",
+        "PublicDescription": "TOR Occupancy : All from Local iA",
+        "UMask": "0xc000ff01",
+        "Unit": "CHA"
+    },
+    {
+        "BriefDescription": "TOR Occupancy for All from Local IO",
+        "EventCode": "0x36",
+        "EventName": "UNC_CHA_TOR_OCCUPANCY.LOC_IO",
+        "PerPkg": "1",
+        "PublicDescription": "TOR Occupancy : All from Local IO",
+        "UMask": "0xc000ff04",
+        "Unit": "CHA"
+    }
+]
diff --git a/tools/perf/pmu-events/arch/x86/grandridge/uncore-interconnect.json b/tools/perf/pmu-events/arch/x86/grandridge/uncore-interconnect.json
new file mode 100644 (file)
index 0000000..9091f8f
--- /dev/null
@@ -0,0 +1,175 @@
+[
+    {
+        "BriefDescription": "Clockticks of the mesh to memory (B2CMI)",
+        "EventCode": "0x01",
+        "EventName": "UNC_B2CMI_CLOCKTICKS",
+        "PerPkg": "1",
+        "Unit": "B2CMI"
+    },
+    {
+        "BriefDescription": "Counts the number of times B2CMI egress did D2C (direct to core)",
+        "EventCode": "0x16",
+        "EventName": "UNC_B2CMI_DIRECT2CORE_TAKEN",
+        "PerPkg": "1",
+        "UMask": "0x1",
+        "Unit": "B2CMI"
+    },
+    {
+        "BriefDescription": "Counts the number of times D2C wasn't honoured even though the incoming request had d2c set for non cisgress txn",
+        "EventCode": "0x18",
+        "EventName": "UNC_B2CMI_DIRECT2CORE_TXN_OVERRIDE",
+        "PerPkg": "1",
+        "UMask": "0x1",
+        "Unit": "B2CMI"
+    },
+    {
+        "BriefDescription": "Counts any read",
+        "EventCode": "0x24",
+        "EventName": "UNC_B2CMI_IMC_READS.ALL",
+        "PerPkg": "1",
+        "UMask": "0x104",
+        "Unit": "B2CMI"
+    },
+    {
+        "BriefDescription": "Counts normal reads issue to CMI",
+        "EventCode": "0x24",
+        "EventName": "UNC_B2CMI_IMC_READS.NORMAL",
+        "PerPkg": "1",
+        "UMask": "0x101",
+        "Unit": "B2CMI"
+    },
+    {
+        "BriefDescription": "Counts reads to 1lm non persistent memory regions",
+        "EventCode": "0x24",
+        "EventName": "UNC_B2CMI_IMC_READS.TO_DDR_AS_MEM",
+        "PerPkg": "1",
+        "UMask": "0x108",
+        "Unit": "B2CMI"
+    },
+    {
+        "BriefDescription": "All Writes - All Channels",
+        "EventCode": "0x25",
+        "EventName": "UNC_B2CMI_IMC_WRITES.ALL",
+        "PerPkg": "1",
+        "UMask": "0x110",
+        "Unit": "B2CMI"
+    },
+    {
+        "BriefDescription": "Full Non-ISOCH - All Channels",
+        "EventCode": "0x25",
+        "EventName": "UNC_B2CMI_IMC_WRITES.FULL",
+        "PerPkg": "1",
+        "UMask": "0x101",
+        "Unit": "B2CMI"
+    },
+    {
+        "BriefDescription": "Partial Non-ISOCH - All Channels",
+        "EventCode": "0x25",
+        "EventName": "UNC_B2CMI_IMC_WRITES.PARTIAL",
+        "PerPkg": "1",
+        "UMask": "0x102",
+        "Unit": "B2CMI"
+    },
+    {
+        "BriefDescription": "DDR - All Channels",
+        "EventCode": "0x25",
+        "EventName": "UNC_B2CMI_IMC_WRITES.TO_DDR_AS_MEM",
+        "PerPkg": "1",
+        "UMask": "0x120",
+        "Unit": "B2CMI"
+    },
+    {
+        "BriefDescription": "Prefetch CAM Inserts : XPT - Ch 0",
+        "EventCode": "0x56",
+        "EventName": "UNC_B2CMI_PREFCAM_INSERTS.CH0_XPT",
+        "PerPkg": "1",
+        "UMask": "0x1",
+        "Unit": "B2CMI"
+    },
+    {
+        "BriefDescription": "Prefetch CAM Inserts : XPT -All Channels",
+        "EventCode": "0x56",
+        "EventName": "UNC_B2CMI_PREFCAM_INSERTS.XPT_ALLCH",
+        "PerPkg": "1",
+        "PublicDescription": "Prefetch CAM Inserts : XPT - All Channels",
+        "UMask": "0x1",
+        "Unit": "B2CMI"
+    },
+    {
+        "BriefDescription": "Prefetch CAM Occupancy : Channel 0",
+        "EventCode": "0x54",
+        "EventName": "UNC_B2CMI_PREFCAM_OCCUPANCY.CH0",
+        "PerPkg": "1",
+        "UMask": "0x1",
+        "Unit": "B2CMI"
+    },
+    {
+        "BriefDescription": "Tracker Inserts : Channel 0",
+        "EventCode": "0x32",
+        "EventName": "UNC_B2CMI_TRACKER_INSERTS.CH0",
+        "PerPkg": "1",
+        "UMask": "0x104",
+        "Unit": "B2CMI"
+    },
+    {
+        "BriefDescription": "Tracker Occupancy : Channel 0",
+        "EventCode": "0x33",
+        "EventName": "UNC_B2CMI_TRACKER_OCCUPANCY.CH0",
+        "PerPkg": "1",
+        "UMask": "0x1",
+        "Unit": "B2CMI"
+    },
+    {
+        "BriefDescription": "Write Tracker Inserts : Channel 0",
+        "EventCode": "0x40",
+        "EventName": "UNC_B2CMI_WR_TRACKER_INSERTS.CH0",
+        "PerPkg": "1",
+        "UMask": "0x1",
+        "Unit": "B2CMI"
+    },
+    {
+        "BriefDescription": "Total Write Cache Occupancy : Mem",
+        "EventCode": "0x0F",
+        "EventName": "UNC_I_CACHE_TOTAL_OCCUPANCY.MEM",
+        "PerPkg": "1",
+        "UMask": "0x4",
+        "Unit": "IRP"
+    },
+    {
+        "BriefDescription": "IRP Clockticks",
+        "EventCode": "0x01",
+        "EventName": "UNC_I_CLOCKTICKS",
+        "PerPkg": "1",
+        "Unit": "IRP"
+    },
+    {
+        "BriefDescription": "Inbound read requests received by the IRP and inserted into the FAF queue",
+        "EventCode": "0x18",
+        "EventName": "UNC_I_FAF_INSERTS",
+        "PerPkg": "1",
+        "Unit": "IRP"
+    },
+    {
+        "BriefDescription": "FAF occupancy",
+        "EventCode": "0x19",
+        "EventName": "UNC_I_FAF_OCCUPANCY",
+        "PerPkg": "1",
+        "Unit": "IRP"
+    },
+    {
+        "BriefDescription": "Misc Events - Set 1 : Lost Forward : Snoop pulled away ownership before a write was committed",
+        "EventCode": "0x1F",
+        "EventName": "UNC_I_MISC1.LOST_FWD",
+        "PerPkg": "1",
+        "UMask": "0x10",
+        "Unit": "IRP"
+    },
+    {
+        "BriefDescription": "Inbound write (fast path) requests to coherent memory, received by the IRP resulting in write ownership requests issued by IRP to the mesh.",
+        "EventCode": "0x11",
+        "EventName": "UNC_I_TRANSACTIONS.WR_PREF",
+        "PerPkg": "1",
+        "UMask": "0x8",
+        "Unit": "IRP"
+    }
+]
diff --git a/tools/perf/pmu-events/arch/x86/grandridge/uncore-io.json b/tools/perf/pmu-events/arch/x86/grandridge/uncore-io.json
new file mode 100644 (file)
index 0000000..c301ef9
--- /dev/null
@@ -0,0 +1,1187 @@
+[
+    {
+        "BriefDescription": "IIO Clockticks",
+        "EventCode": "0x01",
+        "EventName": "UNC_IIO_CLOCKTICKS",
+        "PerPkg": "1",
+        "PortMask": "0x000",
+        "Unit": "IIO"
+    },
+    {
+        "BriefDescription": "PCIE Completion Buffer Inserts.  Counts once per 64 byte read issued from this PCIE device.",
+        "EventCode": "0xC2",
+        "EventName": "UNC_IIO_COMP_BUF_INSERTS.CMPD.ALL_PARTS",
+        "FCMask": "0x07",
+        "PerPkg": "1",
+        "PortMask": "0x0FF",
+        "UMask": "0x70ff004",
+        "Unit": "IIO"
+    },
+    {
+        "BriefDescription": "PCIE Completion Buffer Inserts.  Counts once per 64 byte read issued from this PCIE device.",
+        "EventCode": "0xC2",
+        "EventName": "UNC_IIO_COMP_BUF_INSERTS.CMPD.PART0",
+        "FCMask": "0x07",
+        "PerPkg": "1",
+        "PortMask": "0x001",
+        "UMask": "0x7001004",
+        "Unit": "IIO"
+    },
+    {
+        "BriefDescription": "PCIE Completion Buffer Inserts.  Counts once per 64 byte read issued from this PCIE device.",
+        "EventCode": "0xC2",
+        "EventName": "UNC_IIO_COMP_BUF_INSERTS.CMPD.PART1",
+        "FCMask": "0x07",
+        "PerPkg": "1",
+        "PortMask": "0x002",
+        "UMask": "0x7002004",
+        "Unit": "IIO"
+    },
+    {
+        "BriefDescription": "PCIE Completion Buffer Inserts.  Counts once per 64 byte read issued from this PCIE device.",
+        "EventCode": "0xC2",
+        "EventName": "UNC_IIO_COMP_BUF_INSERTS.CMPD.PART2",
+        "FCMask": "0x07",
+        "PerPkg": "1",
+        "PortMask": "0x004",
+        "UMask": "0x7004004",
+        "Unit": "IIO"
+    },
+    {
+        "BriefDescription": "PCIE Completion Buffer Inserts.  Counts once per 64 byte read issued from this PCIE device.",
+        "EventCode": "0xC2",
+        "EventName": "UNC_IIO_COMP_BUF_INSERTS.CMPD.PART3",
+        "FCMask": "0x07",
+        "PerPkg": "1",
+        "PortMask": "0x008",
+        "UMask": "0x7008004",
+        "Unit": "IIO"
+    },
+    {
+        "BriefDescription": "PCIE Completion Buffer Inserts.  Counts once per 64 byte read issued from this PCIE device.",
+        "EventCode": "0xC2",
+        "EventName": "UNC_IIO_COMP_BUF_INSERTS.CMPD.PART4",
+        "FCMask": "0x07",
+        "PerPkg": "1",
+        "PortMask": "0x010",
+        "UMask": "0x7010004",
+        "Unit": "IIO"
+    },
+    {
+        "BriefDescription": "PCIE Completion Buffer Inserts.  Counts once per 64 byte read issued from this PCIE device.",
+        "EventCode": "0xC2",
+        "EventName": "UNC_IIO_COMP_BUF_INSERTS.CMPD.PART5",
+        "FCMask": "0x07",
+        "PerPkg": "1",
+        "PortMask": "0x020",
+        "UMask": "0x7020004",
+        "Unit": "IIO"
+    },
+    {
+        "BriefDescription": "PCIE Completion Buffer Inserts.  Counts once per 64 byte read issued from this PCIE device.",
+        "EventCode": "0xC2",
+        "EventName": "UNC_IIO_COMP_BUF_INSERTS.CMPD.PART6",
+        "FCMask": "0x07",
+        "PerPkg": "1",
+        "PortMask": "0x040",
+        "UMask": "0x7040004",
+        "Unit": "IIO"
+    },
+    {
+        "BriefDescription": "PCIE Completion Buffer Inserts.  Counts once per 64 byte read issued from this PCIE device.",
+        "EventCode": "0xC2",
+        "EventName": "UNC_IIO_COMP_BUF_INSERTS.CMPD.PART7",
+        "FCMask": "0x07",
+        "PerPkg": "1",
+        "PortMask": "0x080",
+        "UMask": "0x7080004",
+        "Unit": "IIO"
+    },
+    {
+        "BriefDescription": "Count of allocations in the completion buffer",
+        "EventCode": "0xD5",
+        "EventName": "UNC_IIO_COMP_BUF_OCCUPANCY.CMPD.ALL_PARTS",
+        "FCMask": "0x07",
+        "PerPkg": "1",
+        "PortMask": "0x0FF",
+        "UMask": "0x70ff0ff",
+        "Unit": "IIO"
+    },
+    {
+        "BriefDescription": "Count of allocations in the completion buffer",
+        "EventCode": "0xD5",
+        "EventName": "UNC_IIO_COMP_BUF_OCCUPANCY.CMPD.PART0",
+        "FCMask": "0x07",
+        "PerPkg": "1",
+        "PortMask": "0x001",
+        "UMask": "0x7001001",
+        "Unit": "IIO"
+    },
+    {
+        "BriefDescription": "Count of allocations in the completion buffer",
+        "EventCode": "0xD5",
+        "EventName": "UNC_IIO_COMP_BUF_OCCUPANCY.CMPD.PART1",
+        "FCMask": "0x07",
+        "PerPkg": "1",
+        "PortMask": "0x002",
+        "UMask": "0x7002002",
+        "Unit": "IIO"
+    },
+    {
+        "BriefDescription": "Count of allocations in the completion buffer",
+        "EventCode": "0xD5",
+        "EventName": "UNC_IIO_COMP_BUF_OCCUPANCY.CMPD.PART2",
+        "FCMask": "0x07",
+        "PerPkg": "1",
+        "PortMask": "0x004",
+        "UMask": "0x7004004",
+        "Unit": "IIO"
+    },
+    {
+        "BriefDescription": "Count of allocations in the completion buffer",
+        "EventCode": "0xD5",
+        "EventName": "UNC_IIO_COMP_BUF_OCCUPANCY.CMPD.PART3",
+        "FCMask": "0x07",
+        "PerPkg": "1",
+        "PortMask": "0x008",
+        "UMask": "0x7008008",
+        "Unit": "IIO"
+    },
+    {
+        "BriefDescription": "Count of allocations in the completion buffer",
+        "EventCode": "0xD5",
+        "EventName": "UNC_IIO_COMP_BUF_OCCUPANCY.CMPD.PART4",
+        "FCMask": "0x07",
+        "PerPkg": "1",
+        "PortMask": "0x010",
+        "UMask": "0x7010010",
+        "Unit": "IIO"
+    },
+    {
+        "BriefDescription": "Count of allocations in the completion buffer",
+        "EventCode": "0xD5",
+        "EventName": "UNC_IIO_COMP_BUF_OCCUPANCY.CMPD.PART5",
+        "FCMask": "0x07",
+        "PerPkg": "1",
+        "PortMask": "0x020",
+        "UMask": "0x7020020",
+        "Unit": "IIO"
+    },
+    {
+        "BriefDescription": "Count of allocations in the completion buffer",
+        "EventCode": "0xD5",
+        "EventName": "UNC_IIO_COMP_BUF_OCCUPANCY.CMPD.PART6",
+        "FCMask": "0x07",
+        "PerPkg": "1",
+        "PortMask": "0x040",
+        "UMask": "0x7040040",
+        "Unit": "IIO"
+    },
+    {
+        "BriefDescription": "Count of allocations in the completion buffer",
+        "EventCode": "0xD5",
+        "EventName": "UNC_IIO_COMP_BUF_OCCUPANCY.CMPD.PART7",
+        "FCMask": "0x07",
+        "PerPkg": "1",
+        "PortMask": "0x080",
+        "UMask": "0x7080080",
+        "Unit": "IIO"
+    },
+    {
+        "BriefDescription": "Data requested by the CPU : Core reporting completion of Card read from Core DRAM",
+        "EventCode": "0xC0",
+        "EventName": "UNC_IIO_DATA_REQ_BY_CPU.MEM_READ.ALL_PARTS",
+        "FCMask": "0x07",
+        "PerPkg": "1",
+        "PortMask": "0x0FF",
+        "UMask": "0x70ff004",
+        "Unit": "IIO"
+    },
+    {
+        "BriefDescription": "Data requested by the CPU : Core reporting completion of Card read from Core DRAM",
+        "EventCode": "0xC0",
+        "EventName": "UNC_IIO_DATA_REQ_BY_CPU.MEM_READ.PART0",
+        "FCMask": "0x07",
+        "PerPkg": "1",
+        "PortMask": "0x001",
+        "UMask": "0x7001004",
+        "Unit": "IIO"
+    },
+    {
+        "BriefDescription": "Data requested by the CPU : Core reporting completion of Card read from Core DRAM",
+        "EventCode": "0xC0",
+        "EventName": "UNC_IIO_DATA_REQ_BY_CPU.MEM_READ.PART1",
+        "FCMask": "0x07",
+        "PerPkg": "1",
+        "PortMask": "0x002",
+        "UMask": "0x7002004",
+        "Unit": "IIO"
+    },
+    {
+        "BriefDescription": "Data requested by the CPU : Core reporting completion of Card read from Core DRAM",
+        "EventCode": "0xC0",
+        "EventName": "UNC_IIO_DATA_REQ_BY_CPU.MEM_READ.PART2",
+        "FCMask": "0x07",
+        "PerPkg": "1",
+        "PortMask": "0x004",
+        "UMask": "0x7004004",
+        "Unit": "IIO"
+    },
+    {
+        "BriefDescription": "Data requested by the CPU : Core reporting completion of Card read from Core DRAM",
+        "EventCode": "0xC0",
+        "EventName": "UNC_IIO_DATA_REQ_BY_CPU.MEM_READ.PART3",
+        "FCMask": "0x07",
+        "PerPkg": "1",
+        "PortMask": "0x008",
+        "UMask": "0x7008004",
+        "Unit": "IIO"
+    },
+    {
+        "BriefDescription": "Data requested by the CPU : Core reporting completion of Card read from Core DRAM",
+        "EventCode": "0xC0",
+        "EventName": "UNC_IIO_DATA_REQ_BY_CPU.MEM_READ.PART4",
+        "FCMask": "0x07",
+        "PerPkg": "1",
+        "PortMask": "0x010",
+        "UMask": "0x7010004",
+        "Unit": "IIO"
+    },
+    {
+        "BriefDescription": "Data requested by the CPU : Core reporting completion of Card read from Core DRAM",
+        "EventCode": "0xC0",
+        "EventName": "UNC_IIO_DATA_REQ_BY_CPU.MEM_READ.PART5",
+        "FCMask": "0x07",
+        "PerPkg": "1",
+        "PortMask": "0x020",
+        "UMask": "0x7020004",
+        "Unit": "IIO"
+    },
+    {
+        "BriefDescription": "Data requested by the CPU : Core reporting completion of Card read from Core DRAM",
+        "EventCode": "0xC0",
+        "EventName": "UNC_IIO_DATA_REQ_BY_CPU.MEM_READ.PART6",
+        "FCMask": "0x07",
+        "PerPkg": "1",
+        "PortMask": "0x040",
+        "UMask": "0x7040004",
+        "Unit": "IIO"
+    },
+    {
+        "BriefDescription": "Data requested by the CPU : Core reporting completion of Card read from Core DRAM",
+        "EventCode": "0xC0",
+        "EventName": "UNC_IIO_DATA_REQ_BY_CPU.MEM_READ.PART7",
+        "FCMask": "0x07",
+        "PerPkg": "1",
+        "PortMask": "0x080",
+        "UMask": "0x7080004",
+        "Unit": "IIO"
+    },
+    {
+        "BriefDescription": "Data requested by the CPU : Core writing to Cards MMIO space",
+        "EventCode": "0xC0",
+        "EventName": "UNC_IIO_DATA_REQ_BY_CPU.MEM_WRITE.ALL_PARTS",
+        "FCMask": "0x07",
+        "PerPkg": "1",
+        "PortMask": "0x0FF",
+        "UMask": "0x70ff001",
+        "Unit": "IIO"
+    },
+    {
+        "BriefDescription": "Data requested by the CPU : Core writing to Cards MMIO space",
+        "EventCode": "0xC0",
+        "EventName": "UNC_IIO_DATA_REQ_BY_CPU.MEM_WRITE.PART0",
+        "FCMask": "0x07",
+        "PerPkg": "1",
+        "PortMask": "0x001",
+        "UMask": "0x7001001",
+        "Unit": "IIO"
+    },
+    {
+        "BriefDescription": "Data requested by the CPU : Core writing to Cards MMIO space",
+        "EventCode": "0xC0",
+        "EventName": "UNC_IIO_DATA_REQ_BY_CPU.MEM_WRITE.PART1",
+        "FCMask": "0x07",
+        "PerPkg": "1",
+        "PortMask": "0x002",
+        "UMask": "0x7002001",
+        "Unit": "IIO"
+    },
+    {
+        "BriefDescription": "Data requested by the CPU : Core writing to Cards MMIO space",
+        "EventCode": "0xC0",
+        "EventName": "UNC_IIO_DATA_REQ_BY_CPU.MEM_WRITE.PART2",
+        "FCMask": "0x07",
+        "PerPkg": "1",
+        "PortMask": "0x004",
+        "UMask": "0x7004001",
+        "Unit": "IIO"
+    },
+    {
+        "BriefDescription": "Data requested by the CPU : Core writing to Cards MMIO space",
+        "EventCode": "0xC0",
+        "EventName": "UNC_IIO_DATA_REQ_BY_CPU.MEM_WRITE.PART3",
+        "FCMask": "0x07",
+        "PerPkg": "1",
+        "PortMask": "0x008",
+        "UMask": "0x7008001",
+        "Unit": "IIO"
+    },
+    {
+        "BriefDescription": "Data requested by the CPU : Core writing to Cards MMIO space",
+        "EventCode": "0xC0",
+        "EventName": "UNC_IIO_DATA_REQ_BY_CPU.MEM_WRITE.PART4",
+        "FCMask": "0x07",
+        "PerPkg": "1",
+        "PortMask": "0x010",
+        "UMask": "0x7010001",
+        "Unit": "IIO"
+    },
+    {
+        "BriefDescription": "Data requested by the CPU : Core writing to Cards MMIO space",
+        "EventCode": "0xC0",
+        "EventName": "UNC_IIO_DATA_REQ_BY_CPU.MEM_WRITE.PART5",
+        "FCMask": "0x07",
+        "PerPkg": "1",
+        "PortMask": "0x020",
+        "UMask": "0x7020001",
+        "Unit": "IIO"
+    },
+    {
+        "BriefDescription": "Data requested by the CPU : Core writing to Cards MMIO space",
+        "EventCode": "0xC0",
+        "EventName": "UNC_IIO_DATA_REQ_BY_CPU.MEM_WRITE.PART6",
+        "FCMask": "0x07",
+        "PerPkg": "1",
+        "PortMask": "0x040",
+        "UMask": "0x7040001",
+        "Unit": "IIO"
+    },
+    {
+        "BriefDescription": "Data requested by the CPU : Core writing to Cards MMIO space",
+        "EventCode": "0xC0",
+        "EventName": "UNC_IIO_DATA_REQ_BY_CPU.MEM_WRITE.PART7",
+        "FCMask": "0x07",
+        "PerPkg": "1",
+        "PortMask": "0x080",
+        "UMask": "0x7080001",
+        "Unit": "IIO"
+    },
+    {
+        "BriefDescription": "Four byte data request of the CPU : Card reading from DRAM",
+        "EventCode": "0x83",
+        "EventName": "UNC_IIO_DATA_REQ_OF_CPU.MEM_READ.PART0",
+        "FCMask": "0x07",
+        "PerPkg": "1",
+        "PortMask": "0x001",
+        "UMask": "0x7001004",
+        "Unit": "IIO"
+    },
+    {
+        "BriefDescription": "Four byte data request of the CPU : Card reading from DRAM",
+        "EventCode": "0x83",
+        "EventName": "UNC_IIO_DATA_REQ_OF_CPU.MEM_READ.PART1",
+        "FCMask": "0x07",
+        "PerPkg": "1",
+        "PortMask": "0x02",
+        "UMask": "0x7002004",
+        "Unit": "IIO"
+    },
+    {
+        "BriefDescription": "Four byte data request of the CPU : Card reading from DRAM",
+        "EventCode": "0x83",
+        "EventName": "UNC_IIO_DATA_REQ_OF_CPU.MEM_READ.PART2",
+        "FCMask": "0x07",
+        "PerPkg": "1",
+        "PortMask": "0x04",
+        "UMask": "0x7004004",
+        "Unit": "IIO"
+    },
+    {
+        "BriefDescription": "Four byte data request of the CPU : Card reading from DRAM",
+        "EventCode": "0x83",
+        "EventName": "UNC_IIO_DATA_REQ_OF_CPU.MEM_READ.PART3",
+        "FCMask": "0x07",
+        "PerPkg": "1",
+        "PortMask": "0x08",
+        "UMask": "0x7008004",
+        "Unit": "IIO"
+    },
+    {
+        "BriefDescription": "Four byte data request of the CPU : Card reading from DRAM",
+        "EventCode": "0x83",
+        "EventName": "UNC_IIO_DATA_REQ_OF_CPU.MEM_READ.PART4",
+        "FCMask": "0x07",
+        "PerPkg": "1",
+        "PortMask": "0x10",
+        "UMask": "0x7010004",
+        "Unit": "IIO"
+    },
+    {
+        "BriefDescription": "Four byte data request of the CPU : Card reading from DRAM",
+        "EventCode": "0x83",
+        "EventName": "UNC_IIO_DATA_REQ_OF_CPU.MEM_READ.PART5",
+        "FCMask": "0x07",
+        "PerPkg": "1",
+        "PortMask": "0x20",
+        "UMask": "0x7020004",
+        "Unit": "IIO"
+    },
+    {
+        "BriefDescription": "Four byte data request of the CPU : Card reading from DRAM",
+        "EventCode": "0x83",
+        "EventName": "UNC_IIO_DATA_REQ_OF_CPU.MEM_READ.PART6",
+        "FCMask": "0x07",
+        "PerPkg": "1",
+        "PortMask": "0x40",
+        "UMask": "0x7040004",
+        "Unit": "IIO"
+    },
+    {
+        "BriefDescription": "Four byte data request of the CPU : Card reading from DRAM",
+        "EventCode": "0x83",
+        "EventName": "UNC_IIO_DATA_REQ_OF_CPU.MEM_READ.PART7",
+        "FCMask": "0x07",
+        "PerPkg": "1",
+        "PortMask": "0x80",
+        "UMask": "0x7080004",
+        "Unit": "IIO"
+    },
+    {
+        "BriefDescription": "Four byte data request of the CPU : Card writing to DRAM",
+        "EventCode": "0x83",
+        "EventName": "UNC_IIO_DATA_REQ_OF_CPU.MEM_WRITE.PART0",
+        "FCMask": "0x07",
+        "PerPkg": "1",
+        "PortMask": "0x001",
+        "UMask": "0x7001001",
+        "Unit": "IIO"
+    },
+    {
+        "BriefDescription": "Four byte data request of the CPU : Card writing to DRAM",
+        "EventCode": "0x83",
+        "EventName": "UNC_IIO_DATA_REQ_OF_CPU.MEM_WRITE.PART1",
+        "FCMask": "0x07",
+        "PerPkg": "1",
+        "PortMask": "0x02",
+        "UMask": "0x7002001",
+        "Unit": "IIO"
+    },
+    {
+        "BriefDescription": "Four byte data request of the CPU : Card writing to DRAM",
+        "EventCode": "0x83",
+        "EventName": "UNC_IIO_DATA_REQ_OF_CPU.MEM_WRITE.PART2",
+        "FCMask": "0x07",
+        "PerPkg": "1",
+        "PortMask": "0x04",
+        "UMask": "0x7004001",
+        "Unit": "IIO"
+    },
+    {
+        "BriefDescription": "Four byte data request of the CPU : Card writing to DRAM",
+        "EventCode": "0x83",
+        "EventName": "UNC_IIO_DATA_REQ_OF_CPU.MEM_WRITE.PART3",
+        "FCMask": "0x07",
+        "PerPkg": "1",
+        "PortMask": "0x08",
+        "UMask": "0x7008001",
+        "Unit": "IIO"
+    },
+    {
+        "BriefDescription": "Four byte data request of the CPU : Card writing to DRAM",
+        "EventCode": "0x83",
+        "EventName": "UNC_IIO_DATA_REQ_OF_CPU.MEM_WRITE.PART4",
+        "FCMask": "0x07",
+        "PerPkg": "1",
+        "PortMask": "0x10",
+        "UMask": "0x7010001",
+        "Unit": "IIO"
+    },
+    {
+        "BriefDescription": "Four byte data request of the CPU : Card writing to DRAM",
+        "EventCode": "0x83",
+        "EventName": "UNC_IIO_DATA_REQ_OF_CPU.MEM_WRITE.PART5",
+        "FCMask": "0x07",
+        "PerPkg": "1",
+        "PortMask": "0x20",
+        "UMask": "0x7020001",
+        "Unit": "IIO"
+    },
+    {
+        "BriefDescription": "Four byte data request of the CPU : Card writing to DRAM",
+        "EventCode": "0x83",
+        "EventName": "UNC_IIO_DATA_REQ_OF_CPU.MEM_WRITE.PART6",
+        "FCMask": "0x07",
+        "PerPkg": "1",
+        "PortMask": "0x40",
+        "UMask": "0x7040001",
+        "Unit": "IIO"
+    },
+    {
+        "BriefDescription": "Four byte data request of the CPU : Card writing to DRAM",
+        "EventCode": "0x83",
+        "EventName": "UNC_IIO_DATA_REQ_OF_CPU.MEM_WRITE.PART7",
+        "FCMask": "0x07",
+        "PerPkg": "1",
+        "PortMask": "0x80",
+        "UMask": "0x7080001",
+        "Unit": "IIO"
+    },
+    {
+        "BriefDescription": "Data requested of the CPU : Card writing to another Card (same or different stack)",
+        "EventCode": "0x83",
+        "EventName": "UNC_IIO_DATA_REQ_OF_CPU.PEER_WRITE.PART0",
+        "FCMask": "0x07",
+        "PerPkg": "1",
+        "PortMask": "0x001",
+        "UMask": "0x7001002",
+        "Unit": "IIO"
+    },
+    {
+        "BriefDescription": "Data requested of the CPU : Card writing to another Card (same or different stack)",
+        "EventCode": "0x83",
+        "EventName": "UNC_IIO_DATA_REQ_OF_CPU.PEER_WRITE.PART1",
+        "FCMask": "0x07",
+        "PerPkg": "1",
+        "PortMask": "0x002",
+        "UMask": "0x7002002",
+        "Unit": "IIO"
+    },
+    {
+        "BriefDescription": "Data requested of the CPU : Card writing to another Card (same or different stack)",
+        "EventCode": "0x83",
+        "EventName": "UNC_IIO_DATA_REQ_OF_CPU.PEER_WRITE.PART2",
+        "FCMask": "0x07",
+        "PerPkg": "1",
+        "PortMask": "0x004",
+        "UMask": "0x7004002",
+        "Unit": "IIO"
+    },
+    {
+        "BriefDescription": "Data requested of the CPU : Card writing to another Card (same or different stack)",
+        "EventCode": "0x83",
+        "EventName": "UNC_IIO_DATA_REQ_OF_CPU.PEER_WRITE.PART3",
+        "FCMask": "0x07",
+        "PerPkg": "1",
+        "PortMask": "0x008",
+        "UMask": "0x7008002",
+        "Unit": "IIO"
+    },
+    {
+        "BriefDescription": "Data requested of the CPU : Card writing to another Card (same or different stack)",
+        "EventCode": "0x83",
+        "EventName": "UNC_IIO_DATA_REQ_OF_CPU.PEER_WRITE.PART4",
+        "FCMask": "0x07",
+        "PerPkg": "1",
+        "PortMask": "0x010",
+        "UMask": "0x7010002",
+        "Unit": "IIO"
+    },
+    {
+        "BriefDescription": "Data requested of the CPU : Card writing to another Card (same or different stack)",
+        "EventCode": "0x83",
+        "EventName": "UNC_IIO_DATA_REQ_OF_CPU.PEER_WRITE.PART5",
+        "FCMask": "0x07",
+        "PerPkg": "1",
+        "PortMask": "0x020",
+        "UMask": "0x7020002",
+        "Unit": "IIO"
+    },
+    {
+        "BriefDescription": "Data requested of the CPU : Card writing to another Card (same or different stack)",
+        "EventCode": "0x83",
+        "EventName": "UNC_IIO_DATA_REQ_OF_CPU.PEER_WRITE.PART6",
+        "FCMask": "0x07",
+        "PerPkg": "1",
+        "PortMask": "0x040",
+        "UMask": "0x7040002",
+        "Unit": "IIO"
+    },
+    {
+        "BriefDescription": "Data requested of the CPU : Card writing to another Card (same or different stack)",
+        "EventCode": "0x83",
+        "EventName": "UNC_IIO_DATA_REQ_OF_CPU.PEER_WRITE.PART7",
+        "FCMask": "0x07",
+        "PerPkg": "1",
+        "PortMask": "0x080",
+        "UMask": "0x7080002",
+        "Unit": "IIO"
+    },
+    {
+        "BriefDescription": "IOTLB Hits to a 1G Page",
+        "EventCode": "0x40",
+        "EventName": "UNC_IIO_IOMMU0.1G_HITS",
+        "PerPkg": "1",
+        "PortMask": "0x000",
+        "UMask": "0x10",
+        "Unit": "IIO"
+    },
+    {
+        "BriefDescription": "IOTLB Hits to a 2M Page",
+        "EventCode": "0x40",
+        "EventName": "UNC_IIO_IOMMU0.2M_HITS",
+        "PerPkg": "1",
+        "PortMask": "0x000",
+        "UMask": "0x8",
+        "Unit": "IIO"
+    },
+    {
+        "BriefDescription": "IOTLB Hits to a 4K Page",
+        "EventCode": "0x40",
+        "EventName": "UNC_IIO_IOMMU0.4K_HITS",
+        "PerPkg": "1",
+        "PortMask": "0x000",
+        "UMask": "0x4",
+        "Unit": "IIO"
+    },
+    {
+        "BriefDescription": "Context cache hits",
+        "EventCode": "0x40",
+        "EventName": "UNC_IIO_IOMMU0.CTXT_CACHE_HITS",
+        "PerPkg": "1",
+        "PortMask": "0x000",
+        "UMask": "0x80",
+        "Unit": "IIO"
+    },
+    {
+        "BriefDescription": "Context cache lookups",
+        "EventCode": "0x40",
+        "EventName": "UNC_IIO_IOMMU0.CTXT_CACHE_LOOKUPS",
+        "PerPkg": "1",
+        "PortMask": "0x000",
+        "UMask": "0x40",
+        "Unit": "IIO"
+    },
+    {
+        "BriefDescription": "IOTLB lookups first",
+        "EventCode": "0x40",
+        "EventName": "UNC_IIO_IOMMU0.FIRST_LOOKUPS",
+        "PerPkg": "1",
+        "PortMask": "0x000",
+        "UMask": "0x1",
+        "Unit": "IIO"
+    },
+    {
+        "BriefDescription": "IOTLB Fills (same as IOTLB miss)",
+        "EventCode": "0x40",
+        "EventName": "UNC_IIO_IOMMU0.MISSES",
+        "PerPkg": "1",
+        "PortMask": "0x000",
+        "UMask": "0x20",
+        "Unit": "IIO"
+    },
+    {
+        "BriefDescription": "IOMMU memory access (both low and high priority)",
+        "EventCode": "0x41",
+        "EventName": "UNC_IIO_IOMMU1.NUM_MEM_ACCESSES",
+        "PerPkg": "1",
+        "PortMask": "0x000",
+        "UMask": "0xc0",
+        "Unit": "IIO"
+    },
+    {
+        "BriefDescription": "Second Level Page Walk Cache Hit to a 1G page",
+        "EventCode": "0x41",
+        "EventName": "UNC_IIO_IOMMU1.SLPWC_1G_HITS",
+        "PerPkg": "1",
+        "PortMask": "0x000",
+        "UMask": "0x4",
+        "Unit": "IIO"
+    },
+    {
+        "BriefDescription": "Second Level Page Walk Cache Hit to a 256T page",
+        "EventCode": "0x41",
+        "EventName": "UNC_IIO_IOMMU1.SLPWC_256T_HITS",
+        "PerPkg": "1",
+        "PortMask": "0x000",
+        "UMask": "0x10",
+        "Unit": "IIO"
+    },
+    {
+        "BriefDescription": "Second Level Page Walk Cache Hit to a 512G page",
+        "EventCode": "0x41",
+        "EventName": "UNC_IIO_IOMMU1.SLPWC_512G_HITS",
+        "PerPkg": "1",
+        "PortMask": "0x000",
+        "UMask": "0x8",
+        "Unit": "IIO"
+    },
+    {
+        "BriefDescription": "-",
+        "EventCode": "0x8e",
+        "EventName": "UNC_IIO_NUM_REQ_OF_CPU_BY_TGT.ABORT",
+        "FCMask": "0x07",
+        "PerPkg": "1",
+        "PortMask": "0x0FF",
+        "UMask": "0x70ff080",
+        "Unit": "IIO"
+    },
+    {
+        "BriefDescription": "-",
+        "EventCode": "0x8e",
+        "EventName": "UNC_IIO_NUM_REQ_OF_CPU_BY_TGT.CONFINED_P2P",
+        "FCMask": "0x07",
+        "PerPkg": "1",
+        "PortMask": "0x0FF",
+        "UMask": "0x70ff040",
+        "Unit": "IIO"
+    },
+    {
+        "BriefDescription": "-",
+        "EventCode": "0x8e",
+        "EventName": "UNC_IIO_NUM_REQ_OF_CPU_BY_TGT.LOC_P2P",
+        "FCMask": "0x07",
+        "PerPkg": "1",
+        "PortMask": "0x0FF",
+        "UMask": "0x70ff020",
+        "Unit": "IIO"
+    },
+    {
+        "BriefDescription": "-",
+        "EventCode": "0x8e",
+        "EventName": "UNC_IIO_NUM_REQ_OF_CPU_BY_TGT.MCAST",
+        "FCMask": "0x07",
+        "PerPkg": "1",
+        "PortMask": "0x0FF",
+        "UMask": "0x70ff002",
+        "Unit": "IIO"
+    },
+    {
+        "BriefDescription": "-",
+        "EventCode": "0x8e",
+        "EventName": "UNC_IIO_NUM_REQ_OF_CPU_BY_TGT.MEM",
+        "FCMask": "0x07",
+        "PerPkg": "1",
+        "PortMask": "0x0FF",
+        "UMask": "0x70ff008",
+        "Unit": "IIO"
+    },
+    {
+        "BriefDescription": "-",
+        "EventCode": "0x8e",
+        "EventName": "UNC_IIO_NUM_REQ_OF_CPU_BY_TGT.MSGB",
+        "FCMask": "0x07",
+        "PerPkg": "1",
+        "PortMask": "0x0FF",
+        "UMask": "0x70ff001",
+        "Unit": "IIO"
+    },
+    {
+        "BriefDescription": "-",
+        "EventCode": "0x8e",
+        "EventName": "UNC_IIO_NUM_REQ_OF_CPU_BY_TGT.UBOX",
+        "FCMask": "0x07",
+        "PerPkg": "1",
+        "PortMask": "0x0FF",
+        "UMask": "0x70ff004",
+        "Unit": "IIO"
+    },
+    {
+        "BriefDescription": "All 9 bits of Page Walk Tracker Occupancy",
+        "EventCode": "0x42",
+        "EventName": "UNC_IIO_PWT_OCCUPANCY",
+        "PerPkg": "1",
+        "PortMask": "0x000",
+        "Unit": "IIO"
+    },
+    {
+        "BriefDescription": "Number Transactions requested by the CPU : Core reading from Cards MMIO space",
+        "EventCode": "0xC1",
+        "EventName": "UNC_IIO_TXN_REQ_BY_CPU.MEM_READ.PART0",
+        "FCMask": "0x07",
+        "PerPkg": "1",
+        "PortMask": "0x001",
+        "UMask": "0x7001004",
+        "Unit": "IIO"
+    },
+    {
+        "BriefDescription": "Number Transactions requested by the CPU : Core reading from Cards MMIO space",
+        "EventCode": "0xC1",
+        "EventName": "UNC_IIO_TXN_REQ_BY_CPU.MEM_READ.PART1",
+        "FCMask": "0x07",
+        "PerPkg": "1",
+        "PortMask": "0x002",
+        "UMask": "0x7002004",
+        "Unit": "IIO"
+    },
+    {
+        "BriefDescription": "Number Transactions requested by the CPU : Core reading from Cards MMIO space",
+        "EventCode": "0xC1",
+        "EventName": "UNC_IIO_TXN_REQ_BY_CPU.MEM_READ.PART2",
+        "FCMask": "0x07",
+        "PerPkg": "1",
+        "PortMask": "0x004",
+        "UMask": "0x7004004",
+        "Unit": "IIO"
+    },
+    {
+        "BriefDescription": "Number Transactions requested by the CPU : Core reading from Cards MMIO space",
+        "EventCode": "0xC1",
+        "EventName": "UNC_IIO_TXN_REQ_BY_CPU.MEM_READ.PART3",
+        "FCMask": "0x07",
+        "PerPkg": "1",
+        "PortMask": "0x008",
+        "UMask": "0x7008004",
+        "Unit": "IIO"
+    },
+    {
+        "BriefDescription": "Number Transactions requested by the CPU : Core reading from Cards MMIO space",
+        "EventCode": "0xC1",
+        "EventName": "UNC_IIO_TXN_REQ_BY_CPU.MEM_READ.PART4",
+        "FCMask": "0x07",
+        "PerPkg": "1",
+        "PortMask": "0x010",
+        "UMask": "0x7010004",
+        "Unit": "IIO"
+    },
+    {
+        "BriefDescription": "Number Transactions requested by the CPU : Core reading from Cards MMIO space",
+        "EventCode": "0xC1",
+        "EventName": "UNC_IIO_TXN_REQ_BY_CPU.MEM_READ.PART5",
+        "FCMask": "0x07",
+        "PerPkg": "1",
+        "PortMask": "0x020",
+        "UMask": "0x7020004",
+        "Unit": "IIO"
+    },
+    {
+        "BriefDescription": "Number Transactions requested by the CPU : Core reading from Cards MMIO space",
+        "EventCode": "0xC1",
+        "EventName": "UNC_IIO_TXN_REQ_BY_CPU.MEM_READ.PART6",
+        "FCMask": "0x07",
+        "PerPkg": "1",
+        "PortMask": "0x040",
+        "UMask": "0x7040004",
+        "Unit": "IIO"
+    },
+    {
+        "BriefDescription": "Number Transactions requested by the CPU : Core reading from Cards MMIO space",
+        "EventCode": "0xC1",
+        "EventName": "UNC_IIO_TXN_REQ_BY_CPU.MEM_READ.PART7",
+        "FCMask": "0x07",
+        "PerPkg": "1",
+        "PortMask": "0x080",
+        "UMask": "0x7080004",
+        "Unit": "IIO"
+    },
+    {
+        "BriefDescription": "Number Transactions requested by the CPU : Core writing to Cards MMIO space",
+        "EventCode": "0xC1",
+        "EventName": "UNC_IIO_TXN_REQ_BY_CPU.MEM_WRITE.PART0",
+        "FCMask": "0x07",
+        "PerPkg": "1",
+        "PortMask": "0x001",
+        "UMask": "0x7001001",
+        "Unit": "IIO"
+    },
+    {
+        "BriefDescription": "Number Transactions requested by the CPU : Core writing to Cards MMIO space",
+        "EventCode": "0xC1",
+        "EventName": "UNC_IIO_TXN_REQ_BY_CPU.MEM_WRITE.PART1",
+        "FCMask": "0x07",
+        "PerPkg": "1",
+        "PortMask": "0x002",
+        "UMask": "0x7002001",
+        "Unit": "IIO"
+    },
+    {
+        "BriefDescription": "Number Transactions requested by the CPU : Core writing to Cards MMIO space",
+        "EventCode": "0xC1",
+        "EventName": "UNC_IIO_TXN_REQ_BY_CPU.MEM_WRITE.PART2",
+        "FCMask": "0x07",
+        "PerPkg": "1",
+        "PortMask": "0x004",
+        "UMask": "0x7004001",
+        "Unit": "IIO"
+    },
+    {
+        "BriefDescription": "Number Transactions requested by the CPU : Core writing to Cards MMIO space",
+        "EventCode": "0xC1",
+        "EventName": "UNC_IIO_TXN_REQ_BY_CPU.MEM_WRITE.PART3",
+        "FCMask": "0x07",
+        "PerPkg": "1",
+        "PortMask": "0x008",
+        "UMask": "0x7008001",
+        "Unit": "IIO"
+    },
+    {
+        "BriefDescription": "Number Transactions requested by the CPU : Core writing to Cards MMIO space",
+        "EventCode": "0xC1",
+        "EventName": "UNC_IIO_TXN_REQ_BY_CPU.MEM_WRITE.PART4",
+        "FCMask": "0x07",
+        "PerPkg": "1",
+        "PortMask": "0x010",
+        "UMask": "0x7010001",
+        "Unit": "IIO"
+    },
+    {
+        "BriefDescription": "Number Transactions requested by the CPU : Core writing to Cards MMIO space",
+        "EventCode": "0xC1",
+        "EventName": "UNC_IIO_TXN_REQ_BY_CPU.MEM_WRITE.PART5",
+        "FCMask": "0x07",
+        "PerPkg": "1",
+        "PortMask": "0x020",
+        "UMask": "0x7020001",
+        "Unit": "IIO"
+    },
+    {
+        "BriefDescription": "Number Transactions requested by the CPU : Core writing to Cards MMIO space",
+        "EventCode": "0xC1",
+        "EventName": "UNC_IIO_TXN_REQ_BY_CPU.MEM_WRITE.PART6",
+        "FCMask": "0x07",
+        "PerPkg": "1",
+        "PortMask": "0x040",
+        "UMask": "0x7040001",
+        "Unit": "IIO"
+    },
+    {
+        "BriefDescription": "Number Transactions requested by the CPU : Core writing to Cards MMIO space",
+        "EventCode": "0xC1",
+        "EventName": "UNC_IIO_TXN_REQ_BY_CPU.MEM_WRITE.PART7",
+        "FCMask": "0x07",
+        "PerPkg": "1",
+        "PortMask": "0x080",
+        "UMask": "0x7080001",
+        "Unit": "IIO"
+    },
+    {
+        "BriefDescription": "Number Transactions requested of the CPU : Card reading from DRAM",
+        "EventCode": "0x84",
+        "EventName": "UNC_IIO_TXN_REQ_OF_CPU.MEM_READ.PART0",
+        "FCMask": "0x07",
+        "PerPkg": "1",
+        "PortMask": "0x001",
+        "UMask": "0x7001004",
+        "Unit": "IIO"
+    },
+    {
+        "BriefDescription": "Number Transactions requested of the CPU : Card reading from DRAM",
+        "EventCode": "0x84",
+        "EventName": "UNC_IIO_TXN_REQ_OF_CPU.MEM_READ.PART1",
+        "FCMask": "0x07",
+        "PerPkg": "1",
+        "PortMask": "0x002",
+        "UMask": "0x7002004",
+        "Unit": "IIO"
+    },
+    {
+        "BriefDescription": "Number Transactions requested of the CPU : Card reading from DRAM",
+        "EventCode": "0x84",
+        "EventName": "UNC_IIO_TXN_REQ_OF_CPU.MEM_READ.PART2",
+        "FCMask": "0x07",
+        "PerPkg": "1",
+        "PortMask": "0x004",
+        "UMask": "0x7004004",
+        "Unit": "IIO"
+    },
+    {
+        "BriefDescription": "Number Transactions requested of the CPU : Card reading from DRAM",
+        "EventCode": "0x84",
+        "EventName": "UNC_IIO_TXN_REQ_OF_CPU.MEM_READ.PART3",
+        "FCMask": "0x07",
+        "PerPkg": "1",
+        "PortMask": "0x008",
+        "UMask": "0x7008004",
+        "Unit": "IIO"
+    },
+    {
+        "BriefDescription": "Number Transactions requested of the CPU : Card reading from DRAM",
+        "EventCode": "0x84",
+        "EventName": "UNC_IIO_TXN_REQ_OF_CPU.MEM_READ.PART4",
+        "FCMask": "0x07",
+        "PerPkg": "1",
+        "PortMask": "0x010",
+        "UMask": "0x7010004",
+        "Unit": "IIO"
+    },
+    {
+        "BriefDescription": "Number Transactions requested of the CPU : Card reading from DRAM",
+        "EventCode": "0x84",
+        "EventName": "UNC_IIO_TXN_REQ_OF_CPU.MEM_READ.PART5",
+        "FCMask": "0x07",
+        "PerPkg": "1",
+        "PortMask": "0x020",
+        "UMask": "0x7020004",
+        "Unit": "IIO"
+    },
+    {
+        "BriefDescription": "Number Transactions requested of the CPU : Card reading from DRAM",
+        "EventCode": "0x84",
+        "EventName": "UNC_IIO_TXN_REQ_OF_CPU.MEM_READ.PART6",
+        "FCMask": "0x07",
+        "PerPkg": "1",
+        "PortMask": "0x040",
+        "UMask": "0x7040004",
+        "Unit": "IIO"
+    },
+    {
+        "BriefDescription": "Number Transactions requested of the CPU : Card reading from DRAM",
+        "EventCode": "0x84",
+        "EventName": "UNC_IIO_TXN_REQ_OF_CPU.MEM_READ.PART7",
+        "FCMask": "0x07",
+        "PerPkg": "1",
+        "PortMask": "0x080",
+        "UMask": "0x7080004",
+        "Unit": "IIO"
+    },
+    {
+        "BriefDescription": "Number Transactions requested of the CPU : Card writing to DRAM",
+        "EventCode": "0x84",
+        "EventName": "UNC_IIO_TXN_REQ_OF_CPU.MEM_WRITE.PART0",
+        "FCMask": "0x07",
+        "PerPkg": "1",
+        "PortMask": "0x001",
+        "UMask": "0x7001001",
+        "Unit": "IIO"
+    },
+    {
+        "BriefDescription": "Number Transactions requested of the CPU : Card writing to DRAM",
+        "EventCode": "0x84",
+        "EventName": "UNC_IIO_TXN_REQ_OF_CPU.MEM_WRITE.PART1",
+        "FCMask": "0x07",
+        "PerPkg": "1",
+        "PortMask": "0x002",
+        "UMask": "0x7002001",
+        "Unit": "IIO"
+    },
+    {
+        "BriefDescription": "Number Transactions requested of the CPU : Card writing to DRAM",
+        "EventCode": "0x84",
+        "EventName": "UNC_IIO_TXN_REQ_OF_CPU.MEM_WRITE.PART2",
+        "FCMask": "0x07",
+        "PerPkg": "1",
+        "PortMask": "0x004",
+        "UMask": "0x7004001",
+        "Unit": "IIO"
+    },
+    {
+        "BriefDescription": "Number Transactions requested of the CPU : Card writing to DRAM",
+        "EventCode": "0x84",
+        "EventName": "UNC_IIO_TXN_REQ_OF_CPU.MEM_WRITE.PART3",
+        "FCMask": "0x07",
+        "PerPkg": "1",
+        "PortMask": "0x008",
+        "UMask": "0x7008001",
+        "Unit": "IIO"
+    },
+    {
+        "BriefDescription": "Number Transactions requested of the CPU : Card writing to DRAM",
+        "EventCode": "0x84",
+        "EventName": "UNC_IIO_TXN_REQ_OF_CPU.MEM_WRITE.PART4",
+        "FCMask": "0x07",
+        "PerPkg": "1",
+        "PortMask": "0x010",
+        "UMask": "0x7010001",
+        "Unit": "IIO"
+    },
+    {
+        "BriefDescription": "Number Transactions requested of the CPU : Card writing to DRAM",
+        "EventCode": "0x84",
+        "EventName": "UNC_IIO_TXN_REQ_OF_CPU.MEM_WRITE.PART5",
+        "FCMask": "0x07",
+        "PerPkg": "1",
+        "PortMask": "0x020",
+        "UMask": "0x7020001",
+        "Unit": "IIO"
+    },
+    {
+        "BriefDescription": "Number Transactions requested of the CPU : Card writing to DRAM",
+        "EventCode": "0x84",
+        "EventName": "UNC_IIO_TXN_REQ_OF_CPU.MEM_WRITE.PART6",
+        "FCMask": "0x07",
+        "PerPkg": "1",
+        "PortMask": "0x040",
+        "UMask": "0x7040001",
+        "Unit": "IIO"
+    },
+    {
+        "BriefDescription": "Number Transactions requested of the CPU : Card writing to DRAM",
+        "EventCode": "0x84",
+        "EventName": "UNC_IIO_TXN_REQ_OF_CPU.MEM_WRITE.PART7",
+        "FCMask": "0x07",
+        "PerPkg": "1",
+        "PortMask": "0x080",
+        "UMask": "0x7080001",
+        "Unit": "IIO"
+    },
+    {
+        "BriefDescription": "Number Transactions requested of the CPU : Card writing to another Card (same or different stack)",
+        "EventCode": "0x84",
+        "EventName": "UNC_IIO_TXN_REQ_OF_CPU.PEER_WRITE.PART0",
+        "FCMask": "0x07",
+        "PerPkg": "1",
+        "PortMask": "0x001",
+        "UMask": "0x7001002",
+        "Unit": "IIO"
+    },
+    {
+        "BriefDescription": "Number Transactions requested of the CPU : Card writing to another Card (same or different stack)",
+        "EventCode": "0x84",
+        "EventName": "UNC_IIO_TXN_REQ_OF_CPU.PEER_WRITE.PART1",
+        "FCMask": "0x07",
+        "PerPkg": "1",
+        "PortMask": "0x002",
+        "UMask": "0x7002002",
+        "Unit": "IIO"
+    },
+    {
+        "BriefDescription": "Number Transactions requested of the CPU : Card writing to another Card (same or different stack)",
+        "EventCode": "0x84",
+        "EventName": "UNC_IIO_TXN_REQ_OF_CPU.PEER_WRITE.PART2",
+        "FCMask": "0x07",
+        "PerPkg": "1",
+        "PortMask": "0x004",
+        "UMask": "0x7004002",
+        "Unit": "IIO"
+    },
+    {
+        "BriefDescription": "Number Transactions requested of the CPU : Card writing to another Card (same or different stack)",
+        "EventCode": "0x84",
+        "EventName": "UNC_IIO_TXN_REQ_OF_CPU.PEER_WRITE.PART3",
+        "FCMask": "0x07",
+        "PerPkg": "1",
+        "PortMask": "0x008",
+        "UMask": "0x7008002",
+        "Unit": "IIO"
+    },
+    {
+        "BriefDescription": "Number Transactions requested of the CPU : Card writing to another Card (same or different stack)",
+        "EventCode": "0x84",
+        "EventName": "UNC_IIO_TXN_REQ_OF_CPU.PEER_WRITE.PART4",
+        "FCMask": "0x07",
+        "PerPkg": "1",
+        "PortMask": "0x010",
+        "UMask": "0x7010002",
+        "Unit": "IIO"
+    },
+    {
+        "BriefDescription": "Number Transactions requested of the CPU : Card writing to another Card (same or different stack)",
+        "EventCode": "0x84",
+        "EventName": "UNC_IIO_TXN_REQ_OF_CPU.PEER_WRITE.PART5",
+        "FCMask": "0x07",
+        "PerPkg": "1",
+        "PortMask": "0x020",
+        "UMask": "0x7020002",
+        "Unit": "IIO"
+    },
+    {
+        "BriefDescription": "Number Transactions requested of the CPU : Card writing to another Card (same or different stack)",
+        "EventCode": "0x84",
+        "EventName": "UNC_IIO_TXN_REQ_OF_CPU.PEER_WRITE.PART6",
+        "FCMask": "0x07",
+        "PerPkg": "1",
+        "PortMask": "0x040",
+        "UMask": "0x7040002",
+        "Unit": "IIO"
+    },
+    {
+        "BriefDescription": "Number Transactions requested of the CPU : Card writing to another Card (same or different stack)",
+        "EventCode": "0x84",
+        "EventName": "UNC_IIO_TXN_REQ_OF_CPU.PEER_WRITE.PART7",
+        "FCMask": "0x07",
+        "PerPkg": "1",
+        "PortMask": "0x080",
+        "UMask": "0x7080002",
+        "Unit": "IIO"
+    }
+]
diff --git a/tools/perf/pmu-events/arch/x86/grandridge/uncore-memory.json b/tools/perf/pmu-events/arch/x86/grandridge/uncore-memory.json
new file mode 100644 (file)
index 0000000..a2405ed
--- /dev/null
@@ -0,0 +1,385 @@
+[
+    {
+        "BriefDescription": "DRAM Activate Count : Counts the number of DRAM Activate commands sent on this channel.  Activate commands are issued to open up a page on the DRAM devices so that it can be read or written to with a CAS.  One can calculate the number of Page Misses by subtracting the number of Page Miss precharges from the number of Activates.",
+        "EventCode": "0x02",
+        "EventName": "UNC_M_ACT_COUNT.ALL",
+        "PerPkg": "1",
+        "UMask": "0xf7",
+        "Unit": "IMC"
+    },
+    {
+        "BriefDescription": "DRAM Activate Count : Read transaction on Page Empty or Page Miss : Counts the number of DRAM Activate commands sent on this channel.  Activate commands are issued to open up a page on the DRAM devices so that it can be read or written to with a CAS.  One can calculate the number of Page Misses by subtracting the number of Page Miss precharges from the number of Activates.",
+        "EventCode": "0x02",
+        "EventName": "UNC_M_ACT_COUNT.RD",
+        "PerPkg": "1",
+        "UMask": "0xf1",
+        "Unit": "IMC"
+    },
+    {
+        "BriefDescription": "DRAM Activate Count : Underfill Read transaction on Page Empty or Page Miss : Counts the number of DRAM Activate commands sent on this channel.  Activate commands are issued to open up a page on the DRAM devices so that it can be read or written to with a CAS.  One can calculate the number of Page Misses by subtracting the number of Page Miss precharges from the number of Activates.",
+        "EventCode": "0x02",
+        "EventName": "UNC_M_ACT_COUNT.UFILL",
+        "PerPkg": "1",
+        "UMask": "0xf4",
+        "Unit": "IMC"
+    },
+    {
+        "BriefDescription": "DRAM Activate Count : Write transaction on Page Empty or Page Miss : Counts the number of DRAM Activate commands sent on this channel.  Activate commands are issued to open up a page on the DRAM devices so that it can be read or written to with a CAS.  One can calculate the number of Page Misses by subtracting the number of Page Miss precharges from the number of Activates.",
+        "EventCode": "0x02",
+        "EventName": "UNC_M_ACT_COUNT.WR",
+        "PerPkg": "1",
+        "UMask": "0xf2",
+        "Unit": "IMC"
+    },
+    {
+        "BriefDescription": "CAS count for SubChannel 0, all CAS operations",
+        "EventCode": "0x05",
+        "EventName": "UNC_M_CAS_COUNT_SCH0.ALL",
+        "PerPkg": "1",
+        "UMask": "0xff",
+        "Unit": "IMC"
+    },
+    {
+        "BriefDescription": "CAS count for SubChannel 0, all reads",
+        "EventCode": "0x05",
+        "EventName": "UNC_M_CAS_COUNT_SCH0.RD",
+        "PerPkg": "1",
+        "UMask": "0xcf",
+        "Unit": "IMC"
+    },
+    {
+        "BriefDescription": "CAS count for SubChannel 0 regular reads",
+        "EventCode": "0x05",
+        "EventName": "UNC_M_CAS_COUNT_SCH0.RD_REG",
+        "PerPkg": "1",
+        "UMask": "0xc1",
+        "Unit": "IMC"
+    },
+    {
+        "BriefDescription": "CAS count for SubChannel 0 underfill reads",
+        "EventCode": "0x05",
+        "EventName": "UNC_M_CAS_COUNT_SCH0.RD_UNDERFILL",
+        "PerPkg": "1",
+        "UMask": "0xc4",
+        "Unit": "IMC"
+    },
+    {
+        "BriefDescription": "CAS count for SubChannel 0, all writes",
+        "EventCode": "0x05",
+        "EventName": "UNC_M_CAS_COUNT_SCH0.WR",
+        "PerPkg": "1",
+        "UMask": "0xf0",
+        "Unit": "IMC"
+    },
+    {
+        "BriefDescription": "CAS count for SubChannel 0 regular writes",
+        "EventCode": "0x05",
+        "EventName": "UNC_M_CAS_COUNT_SCH0.WR_NONPRE",
+        "PerPkg": "1",
+        "UMask": "0xd0",
+        "Unit": "IMC"
+    },
+    {
+        "BriefDescription": "CAS count for SubChannel 0 auto-precharge writes",
+        "EventCode": "0x05",
+        "EventName": "UNC_M_CAS_COUNT_SCH0.WR_PRE",
+        "PerPkg": "1",
+        "UMask": "0xe0",
+        "Unit": "IMC"
+    },
+    {
+        "BriefDescription": "CAS count for SubChannel 1, all CAS operations",
+        "EventCode": "0x06",
+        "EventName": "UNC_M_CAS_COUNT_SCH1.ALL",
+        "PerPkg": "1",
+        "UMask": "0xff",
+        "Unit": "IMC"
+    },
+    {
+        "BriefDescription": "CAS count for SubChannel 1, all reads",
+        "EventCode": "0x06",
+        "EventName": "UNC_M_CAS_COUNT_SCH1.RD",
+        "PerPkg": "1",
+        "UMask": "0xcf",
+        "Unit": "IMC"
+    },
+    {
+        "BriefDescription": "CAS count for SubChannel 1 regular reads",
+        "EventCode": "0x06",
+        "EventName": "UNC_M_CAS_COUNT_SCH1.RD_REG",
+        "PerPkg": "1",
+        "UMask": "0xc1",
+        "Unit": "IMC"
+    },
+    {
+        "BriefDescription": "CAS count for SubChannel 1 underfill reads",
+        "EventCode": "0x06",
+        "EventName": "UNC_M_CAS_COUNT_SCH1.RD_UNDERFILL",
+        "PerPkg": "1",
+        "UMask": "0xc4",
+        "Unit": "IMC"
+    },
+    {
+        "BriefDescription": "CAS count for SubChannel 1, all writes",
+        "EventCode": "0x06",
+        "EventName": "UNC_M_CAS_COUNT_SCH1.WR",
+        "PerPkg": "1",
+        "UMask": "0xf0",
+        "Unit": "IMC"
+    },
+    {
+        "BriefDescription": "CAS count for SubChannel 1 regular writes",
+        "EventCode": "0x06",
+        "EventName": "UNC_M_CAS_COUNT_SCH1.WR_NONPRE",
+        "PerPkg": "1",
+        "UMask": "0xd0",
+        "Unit": "IMC"
+    },
+    {
+        "BriefDescription": "CAS count for SubChannel 1 auto-precharge writes",
+        "EventCode": "0x06",
+        "EventName": "UNC_M_CAS_COUNT_SCH1.WR_PRE",
+        "PerPkg": "1",
+        "UMask": "0xe0",
+        "Unit": "IMC"
+    },
+    {
+        "BriefDescription": "Number of DRAM DCLK clock cycles while the event is enabled",
+        "EventCode": "0x01",
+        "EventName": "UNC_M_CLOCKTICKS",
+        "PerPkg": "1",
+        "PublicDescription": "DRAM Clockticks",
+        "UMask": "0x1",
+        "Unit": "IMC"
+    },
+    {
+        "BriefDescription": "Number of DRAM HCLK clock cycles while the event is enabled",
+        "EventCode": "0x01",
+        "EventName": "UNC_M_HCLOCKTICKS",
+        "PerPkg": "1",
+        "PublicDescription": "DRAM Clockticks",
+        "Unit": "IMC"
+    },
+    {
+        "BriefDescription": "DRAM Precharge commands. : Counts the number of DRAM Precharge commands sent on this channel.",
+        "EventCode": "0x03",
+        "EventName": "UNC_M_PRE_COUNT.ALL",
+        "PerPkg": "1",
+        "UMask": "0xff",
+        "Unit": "IMC"
+    },
+    {
+        "BriefDescription": "DRAM Precharge commands. : Precharge due to (?) : Counts the number of DRAM Precharge commands sent on this channel.",
+        "EventCode": "0x03",
+        "EventName": "UNC_M_PRE_COUNT.PGT",
+        "PerPkg": "1",
+        "UMask": "0xf8",
+        "Unit": "IMC"
+    },
+    {
+        "BriefDescription": "DRAM Precharge commands. : Counts the number of DRAM Precharge commands sent on this channel.",
+        "EventCode": "0x03",
+        "EventName": "UNC_M_PRE_COUNT.RD",
+        "PerPkg": "1",
+        "UMask": "0xf1",
+        "Unit": "IMC"
+    },
+    {
+        "BriefDescription": "DRAM Precharge commands. : Counts the number of DRAM Precharge commands sent on this channel.",
+        "EventCode": "0x03",
+        "EventName": "UNC_M_PRE_COUNT.UFILL",
+        "PerPkg": "1",
+        "UMask": "0xf4",
+        "Unit": "IMC"
+    },
+    {
+        "BriefDescription": "DRAM Precharge commands. : Counts the number of DRAM Precharge commands sent on this channel.",
+        "EventCode": "0x03",
+        "EventName": "UNC_M_PRE_COUNT.WR",
+        "PerPkg": "1",
+        "UMask": "0xf2",
+        "Unit": "IMC"
+    },
+    {
+        "BriefDescription": "Read buffer inserts on subchannel 0",
+        "EventCode": "0x17",
+        "EventName": "UNC_M_RDB_INSERTS.SCH0",
+        "PerPkg": "1",
+        "UMask": "0x40",
+        "Unit": "IMC"
+    },
+    {
+        "BriefDescription": "Read buffer inserts on subchannel 1",
+        "EventCode": "0x17",
+        "EventName": "UNC_M_RDB_INSERTS.SCH1",
+        "PerPkg": "1",
+        "UMask": "0x80",
+        "Unit": "IMC"
+    },
+    {
+        "BriefDescription": "Read buffer occupancy on subchannel 0",
+        "EventCode": "0x1a",
+        "EventName": "UNC_M_RDB_OCCUPANCY_SCH0",
+        "PerPkg": "1",
+        "Unit": "IMC"
+    },
+    {
+        "BriefDescription": "Read buffer occupancy on subchannel 1",
+        "EventCode": "0x1b",
+        "EventName": "UNC_M_RDB_OCCUPANCY_SCH1",
+        "PerPkg": "1",
+        "Unit": "IMC"
+    },
+    {
+        "BriefDescription": "Read Pending Queue Allocations : Counts the number of allocations into the Read Pending Queue.  This queue is used to schedule reads out to the memory controller and to track the requests.  Requests allocate into the RPQ soon after they enter the memory controller, and need credits for an entry in this buffer before being sent from the HA to the iMC.  They deallocate after the CAS command has been issued to memory.  This includes both ISOCH and non-ISOCH requests.",
+        "EventCode": "0x10",
+        "EventName": "UNC_M_RPQ_INSERTS.PCH0",
+        "PerPkg": "1",
+        "UMask": "0x50",
+        "Unit": "IMC"
+    },
+    {
+        "BriefDescription": "Read Pending Queue Allocations : Counts the number of allocations into the Read Pending Queue.  This queue is used to schedule reads out to the memory controller and to track the requests.  Requests allocate into the RPQ soon after they enter the memory controller, and need credits for an entry in this buffer before being sent from the HA to the iMC.  They deallocate after the CAS command has been issued to memory.  This includes both ISOCH and non-ISOCH requests.",
+        "EventCode": "0x10",
+        "EventName": "UNC_M_RPQ_INSERTS.PCH1",
+        "PerPkg": "1",
+        "UMask": "0xa0",
+        "Unit": "IMC"
+    },
+    {
+        "BriefDescription": "Read Pending Queue inserts for subchannel 0, pseudochannel 0",
+        "EventCode": "0x10",
+        "EventName": "UNC_M_RPQ_INSERTS.SCH0_PCH0",
+        "PerPkg": "1",
+        "UMask": "0x10",
+        "Unit": "IMC"
+    },
+    {
+        "BriefDescription": "Read Pending Queue inserts for subchannel 0, pseudochannel 1",
+        "EventCode": "0x10",
+        "EventName": "UNC_M_RPQ_INSERTS.SCH0_PCH1",
+        "PerPkg": "1",
+        "UMask": "0x20",
+        "Unit": "IMC"
+    },
+    {
+        "BriefDescription": "Read Pending Queue inserts for subchannel 1, pseudochannel 0",
+        "EventCode": "0x10",
+        "EventName": "UNC_M_RPQ_INSERTS.SCH1_PCH0",
+        "PerPkg": "1",
+        "UMask": "0x40",
+        "Unit": "IMC"
+    },
+    {
+        "BriefDescription": "Read Pending Queue inserts for subchannel 1, pseudochannel 1",
+        "EventCode": "0x10",
+        "EventName": "UNC_M_RPQ_INSERTS.SCH1_PCH1",
+        "PerPkg": "1",
+        "UMask": "0x80",
+        "Unit": "IMC"
+    },
+    {
+        "BriefDescription": "Read pending queue occupancy for subchannel 0, pseudochannel 0",
+        "EventCode": "0x80",
+        "EventName": "UNC_M_RPQ_OCCUPANCY_SCH0_PCH0",
+        "PerPkg": "1",
+        "Unit": "IMC"
+    },
+    {
+        "BriefDescription": "Read pending queue occupancy for subchannel 0, pseudochannel 1",
+        "EventCode": "0x81",
+        "EventName": "UNC_M_RPQ_OCCUPANCY_SCH0_PCH1",
+        "PerPkg": "1",
+        "Unit": "IMC"
+    },
+    {
+        "BriefDescription": "Read pending queue occupancy for subchannel 1, pseudochannel 0",
+        "EventCode": "0x82",
+        "EventName": "UNC_M_RPQ_OCCUPANCY_SCH1_PCH0",
+        "PerPkg": "1",
+        "Unit": "IMC"
+    },
+    {
+        "BriefDescription": "Read pending queue occupancy for subchannel 1, pseudochannel 1",
+        "EventCode": "0x83",
+        "EventName": "UNC_M_RPQ_OCCUPANCY_SCH1_PCH1",
+        "PerPkg": "1",
+        "Unit": "IMC"
+    },
+    {
+        "BriefDescription": "Write Pending Queue Allocations",
+        "EventCode": "0x22",
+        "EventName": "UNC_M_WPQ_INSERTS.PCH0",
+        "PerPkg": "1",
+        "UMask": "0x50",
+        "Unit": "IMC"
+    },
+    {
+        "BriefDescription": "Write Pending Queue Allocations",
+        "EventCode": "0x22",
+        "EventName": "UNC_M_WPQ_INSERTS.PCH1",
+        "PerPkg": "1",
+        "UMask": "0xa0",
+        "Unit": "IMC"
+    },
+    {
+        "BriefDescription": "Write Pending Queue inserts for subchannel 0, pseudochannel 0",
+        "EventCode": "0x22",
+        "EventName": "UNC_M_WPQ_INSERTS.SCH0_PCH0",
+        "PerPkg": "1",
+        "UMask": "0x10",
+        "Unit": "IMC"
+    },
+    {
+        "BriefDescription": "Write Pending Queue inserts for subchannel 0, pseudochannel 1",
+        "EventCode": "0x22",
+        "EventName": "UNC_M_WPQ_INSERTS.SCH0_PCH1",
+        "PerPkg": "1",
+        "UMask": "0x20",
+        "Unit": "IMC"
+    },
+    {
+        "BriefDescription": "Write Pending Queue inserts for subchannel 1, pseudochannel 0",
+        "EventCode": "0x22",
+        "EventName": "UNC_M_WPQ_INSERTS.SCH1_PCH0",
+        "PerPkg": "1",
+        "UMask": "0x40",
+        "Unit": "IMC"
+    },
+    {
+        "BriefDescription": "Write Pending Queue inserts for subchannel 1, pseudochannel 1",
+        "EventCode": "0x22",
+        "EventName": "UNC_M_WPQ_INSERTS.SCH1_PCH1",
+        "PerPkg": "1",
+        "UMask": "0x80",
+        "Unit": "IMC"
+    },
+    {
+        "BriefDescription": "Write pending queue occupancy for subchannel 0, pseudochannel 0",
+        "EventCode": "0x84",
+        "EventName": "UNC_M_WPQ_OCCUPANCY_SCH0_PCH0",
+        "PerPkg": "1",
+        "Unit": "IMC"
+    },
+    {
+        "BriefDescription": "Write pending queue occupancy for subchannel 0, pseudochannel 1",
+        "EventCode": "0x85",
+        "EventName": "UNC_M_WPQ_OCCUPANCY_SCH0_PCH1",
+        "PerPkg": "1",
+        "Unit": "IMC"
+    },
+    {
+        "BriefDescription": "Write pending queue occupancy for subchannel 1, pseudochannel 0",
+        "EventCode": "0x86",
+        "EventName": "UNC_M_WPQ_OCCUPANCY_SCH1_PCH0",
+        "PerPkg": "1",
+        "Unit": "IMC"
+    },
+    {
+        "BriefDescription": "Write pending queue occupancy for subchannel 1, pseudochannel 1",
+        "EventCode": "0x87",
+        "EventName": "UNC_M_WPQ_OCCUPANCY_SCH1_PCH1",
+        "PerPkg": "1",
+        "Unit": "IMC"
+    }
+]
diff --git a/tools/perf/pmu-events/arch/x86/grandridge/uncore-power.json b/tools/perf/pmu-events/arch/x86/grandridge/uncore-power.json
new file mode 100644 (file)
index 0000000..e3a6616
--- /dev/null
@@ -0,0 +1,10 @@
+[
+    {
+        "BriefDescription": "PCU Clockticks",
+        "EventCode": "0x01",
+        "EventName": "UNC_P_CLOCKTICKS",
+        "PerPkg": "1",
+        "PublicDescription": "PCU Clockticks:  The PCU runs off a fixed 1 GHz clock.  This event counts the number of pclk cycles measured while the counter was enabled.  The pclk, like the Memory Controller's dclk, counts at a constant rate making it a good measure of actual wall time.",
+        "Unit": "PCU"
+    }
+]
index bd5f2b634c98c07cf48c0984d73a191f57674c06..371974c6d6c3ae9eeea0d2523b58d0d10bc07ffe 100644 (file)
@@ -1,18 +1,94 @@
 [
     {
-        "BriefDescription": "Counts the number of page walks completed due to load DTLB misses to a 1G page.",
+        "BriefDescription": "Counts the number of first level TLB misses but second level hits due to a demand load that did not start a page walk. Accounts for all page sizes. Will result in a DTLB write from STLB.",
+        "EventCode": "0x08",
+        "EventName": "DTLB_LOAD_MISSES.STLB_HIT",
+        "SampleAfterValue": "200003",
+        "UMask": "0x20"
+    },
+    {
+        "BriefDescription": "Counts the number of page walks completed due to load DTLB misses.",
         "EventCode": "0x08",
         "EventName": "DTLB_LOAD_MISSES.WALK_COMPLETED",
-        "SampleAfterValue": "1000003",
+        "SampleAfterValue": "200003",
         "UMask": "0xe"
     },
+    {
+        "BriefDescription": "Counts the number of page walks completed due to load DTLB misses to a 2M or 4M page.",
+        "EventCode": "0x08",
+        "EventName": "DTLB_LOAD_MISSES.WALK_COMPLETED_2M_4M",
+        "PublicDescription": "Counts the number of page walks completed due to loads (including SW prefetches) whose address translations missed in all Translation Lookaside Buffer (TLB) levels and were mapped to 2M or 4M pages. Includes page walks that page fault.",
+        "SampleAfterValue": "200003",
+        "UMask": "0x4"
+    },
+    {
+        "BriefDescription": "Counts the number of page walks completed due to load DTLB misses to a 4K page.",
+        "EventCode": "0x08",
+        "EventName": "DTLB_LOAD_MISSES.WALK_COMPLETED_4K",
+        "PublicDescription": "Counts the number of page walks completed due to loads (including SW prefetches) whose address translations missed in all Translation Lookaside Buffer (TLB) levels and were mapped to 4K pages. Includes page walks that page fault.",
+        "SampleAfterValue": "200003",
+        "UMask": "0x2"
+    },
+    {
+        "BriefDescription": "Counts the number of page walks outstanding for Loads (demand or SW prefetch) in PMH every cycle.",
+        "EventCode": "0x08",
+        "EventName": "DTLB_LOAD_MISSES.WALK_PENDING",
+        "PublicDescription": "Counts the number of page walks outstanding for Loads (demand or SW prefetch) in PMH every cycle.  A PMH page walk is outstanding from page walk start till PMH becomes idle again (ready to serve next walk). Includes EPT-walk intervals.",
+        "SampleAfterValue": "200003",
+        "UMask": "0x10"
+    },
+    {
+        "BriefDescription": "Counts the number of first level TLB misses but second level hits due to stores that did not start a page walk. Accounts for all pages sizes. Will result in a DTLB write from STLB.",
+        "EventCode": "0x49",
+        "EventName": "DTLB_STORE_MISSES.STLB_HIT",
+        "SampleAfterValue": "2000003",
+        "UMask": "0x20"
+    },
     {
         "BriefDescription": "Counts the number of page walks completed due to store DTLB misses to a 1G page.",
         "EventCode": "0x49",
         "EventName": "DTLB_STORE_MISSES.WALK_COMPLETED",
-        "SampleAfterValue": "1000003",
+        "SampleAfterValue": "2000003",
         "UMask": "0xe"
     },
+    {
+        "BriefDescription": "Counts the number of page walks completed due to store DTLB misses to a 2M or 4M page.",
+        "EventCode": "0x49",
+        "EventName": "DTLB_STORE_MISSES.WALK_COMPLETED_2M_4M",
+        "PublicDescription": "Counts the number of page walks completed due to stores whose address translations missed in all Translation Lookaside Buffer (TLB) levels and were mapped to 2M or 4M pages.  Includes page walks that page fault.",
+        "SampleAfterValue": "2000003",
+        "UMask": "0x4"
+    },
+    {
+        "BriefDescription": "Counts the number of page walks completed due to store DTLB misses to a 4K page.",
+        "EventCode": "0x49",
+        "EventName": "DTLB_STORE_MISSES.WALK_COMPLETED_4K",
+        "PublicDescription": "Counts the number of page walks completed due to stores whose address translations missed in all Translation Lookaside Buffer (TLB) levels and were mapped to 4K pages.  Includes page walks that page fault.",
+        "SampleAfterValue": "2000003",
+        "UMask": "0x2"
+    },
+    {
+        "BriefDescription": "Counts the number of page walks outstanding in the page miss handler (PMH) for stores every cycle.",
+        "EventCode": "0x49",
+        "EventName": "DTLB_STORE_MISSES.WALK_PENDING",
+        "PublicDescription": "Counts the number of page walks outstanding in the page miss handler (PMH) for stores every cycle. A PMH page walk is outstanding from page walk start till PMH becomes idle again (ready to serve next walk). Includes EPT-walk intervals.",
+        "SampleAfterValue": "200003",
+        "UMask": "0x10"
+    },
+    {
+        "BriefDescription": "Counts the number of page walks initiated by a instruction fetch that missed the first and second level TLBs.",
+        "EventCode": "0x85",
+        "EventName": "ITLB_MISSES.MISS_CAUSED_WALK",
+        "SampleAfterValue": "1000003",
+        "UMask": "0x1"
+    },
+    {
+        "BriefDescription": "Counts the number of first level TLB misses but second level hits due to an instruction fetch that did not start a page walk. Account for all pages sizes. Will result in an ITLB write from STLB.",
+        "EventCode": "0x85",
+        "EventName": "ITLB_MISSES.STLB_HIT",
+        "SampleAfterValue": "2000003",
+        "UMask": "0x20"
+    },
     {
         "BriefDescription": "Counts the number of page walks completed due to instruction fetch misses to any page size.",
         "EventCode": "0x85",
         "PublicDescription": "Counts the number of page walks completed due to instruction fetches whose address translations missed in all Translation Lookaside Buffer (TLB) levels and were mapped to any page size.  Includes page walks that page fault.",
         "SampleAfterValue": "200003",
         "UMask": "0xe"
+    },
+    {
+        "BriefDescription": "Counts the number of page walks completed due to instruction fetch misses to a 2M or 4M page.",
+        "EventCode": "0x85",
+        "EventName": "ITLB_MISSES.WALK_COMPLETED_2M_4M",
+        "PublicDescription": "Counts the number of page walks completed due to instruction fetches whose address translations missed in all Translation Lookaside Buffer (TLB) levels and were mapped to 2M or 4M pages.  Includes page walks that page fault.",
+        "SampleAfterValue": "2000003",
+        "UMask": "0x4"
+    },
+    {
+        "BriefDescription": "Counts the number of page walks completed due to instruction fetch misses to a 4K page.",
+        "EventCode": "0x85",
+        "EventName": "ITLB_MISSES.WALK_COMPLETED_4K",
+        "PublicDescription": "Counts the number of page walks completed due to instruction fetches whose address translations missed in all Translation Lookaside Buffer (TLB) levels and were mapped to 4K pages.  Includes page walks that page fault.",
+        "SampleAfterValue": "2000003",
+        "UMask": "0x2"
+    },
+    {
+        "BriefDescription": "Counts the number of page walks outstanding for iside in PMH every cycle.",
+        "EventCode": "0x85",
+        "EventName": "ITLB_MISSES.WALK_PENDING",
+        "PublicDescription": "Counts the number of page walks outstanding for iside in PMH every cycle.  A PMH page walk is outstanding from page walk start till PMH becomes idle again (ready to serve next walk). Includes EPT-walk intervals.  Walks could be counted by edge detecting on this event, but would count restarted suspended walks.",
+        "SampleAfterValue": "200003",
+        "UMask": "0x10"
+    },
+    {
+        "BriefDescription": "Counts the number of cycles that the head (oldest load) of the load buffer and retirement are both stalled due to a DTLB miss.",
+        "EventCode": "0x05",
+        "EventName": "LD_HEAD.DTLB_MISS_AT_RET",
+        "SampleAfterValue": "1000003",
+        "UMask": "0x90"
     }
 ]
index 79d89c26367791d0f76c8e70bced30d6d394a9e9..5631018ed388037371150621d2fadc117a1a3cd0 100644 (file)
         "MetricExpr": "(UOPS_DISPATCHED_PORT.PORT_0 + UOPS_DISPATCHED_PORT.PORT_1 + UOPS_DISPATCHED_PORT.PORT_5 + UOPS_DISPATCHED_PORT.PORT_6) / tma_info_thread_slots",
         "MetricGroup": "TopdownL5;tma_L5_group;tma_ports_utilized_3m_group",
         "MetricName": "tma_alu_op_utilization",
-        "MetricThreshold": "tma_alu_op_utilization > 0.6",
+        "MetricThreshold": "tma_alu_op_utilization > 0.4",
         "ScaleUnit": "100%"
     },
     {
         "BriefDescription": "This metric estimates fraction of slots the CPU retired uops delivered by the Microcode_Sequencer as a result of Assists",
-        "MetricExpr": "100 * OTHER_ASSISTS.ANY_WB_ASSIST / tma_info_thread_slots",
+        "MetricExpr": "66 * OTHER_ASSISTS.ANY_WB_ASSIST / tma_info_thread_slots",
         "MetricGroup": "TopdownL4;tma_L4_group;tma_microcode_sequencer_group",
         "MetricName": "tma_assists",
         "MetricThreshold": "tma_assists > 0.1 & (tma_microcode_sequencer > 0.05 & tma_heavy_operations > 0.1)",
         "MetricExpr": "(IDQ.ALL_DSB_CYCLES_ANY_UOPS - IDQ.ALL_DSB_CYCLES_4_UOPS) / tma_info_core_core_clks / 2",
         "MetricGroup": "DSB;FetchBW;TopdownL3;tma_L3_group;tma_fetch_bandwidth_group",
         "MetricName": "tma_dsb",
-        "MetricThreshold": "tma_dsb > 0.15 & (tma_fetch_bandwidth > 0.1 & tma_frontend_bound > 0.15 & tma_info_thread_ipc / 4 > 0.35)",
+        "MetricThreshold": "tma_dsb > 0.15 & tma_fetch_bandwidth > 0.2",
         "PublicDescription": "This metric represents Core fraction of cycles in which CPU was likely limited due to DSB (decoded uop cache) fetch pipeline.  For example; inefficient utilization of the DSB cache structure or bank conflict when reading from it; are categorized here.",
         "ScaleUnit": "100%"
     },
         "MetricExpr": "tma_frontend_bound - tma_fetch_latency",
         "MetricGroup": "FetchBW;Frontend;TmaL2;TopdownL2;tma_L2_group;tma_frontend_bound_group;tma_issueFB",
         "MetricName": "tma_fetch_bandwidth",
-        "MetricThreshold": "tma_fetch_bandwidth > 0.1 & tma_frontend_bound > 0.15 & tma_info_thread_ipc / 4 > 0.35",
+        "MetricThreshold": "tma_fetch_bandwidth > 0.2",
         "MetricgroupNoGroup": "TopdownL2",
         "PublicDescription": "This metric represents fraction of slots the CPU was stalled due to Frontend bandwidth issues.  For example; inefficiencies at the instruction decoders; or restrictions for caching in the DSB (decoded uops cache) are categorized under Fetch Bandwidth. In such cases; the Frontend typically delivers suboptimal amount of uops to the Backend. Related metrics: tma_dsb_switches, tma_info_frontend_dsb_coverage, tma_info_inst_mix_iptb, tma_lcp",
         "ScaleUnit": "100%"
         "MetricName": "tma_heavy_operations",
         "MetricThreshold": "tma_heavy_operations > 0.1",
         "MetricgroupNoGroup": "TopdownL2",
-        "PublicDescription": "This metric represents fraction of slots where the CPU was retiring heavy-weight operations -- instructions that require two or more uops or micro-coded sequences. This highly-correlates with the uop length of these instructions/sequences.",
+        "PublicDescription": "This metric represents fraction of slots where the CPU was retiring heavy-weight operations -- instructions that require two or more uops or micro-coded sequences. This highly-correlates with the uop length of these instructions/sequences. ([ICL+] Note this may overcount due to approximation using indirect events; [ADL+] .)",
         "ScaleUnit": "100%"
     },
     {
         "BriefDescription": "This metric represents fraction of cycles the CPU was stalled due to instruction cache misses.",
         "MetricExpr": "ICACHE.IFDATA_STALL / tma_info_thread_clks",
-        "MetricGroup": "BigFoot;FetchLat;IcMiss;TopdownL3;tma_L3_group;tma_fetch_latency_group",
+        "MetricGroup": "BigFootprint;FetchLat;IcMiss;TopdownL3;tma_L3_group;tma_fetch_latency_group",
         "MetricName": "tma_icache_misses",
         "MetricThreshold": "tma_icache_misses > 0.05 & (tma_fetch_latency > 0.1 & tma_frontend_bound > 0.15)",
         "ScaleUnit": "100%"
     },
     {
         "BriefDescription": "Instructions per retired mispredicts for indirect CALL or JMP branches (lower number means higher occurrence rate).",
-        "MetricExpr": "tma_info_inst_mix_instructions / (UOPS_RETIRED.RETIRE_SLOTS / UOPS_ISSUED.ANY * cpu@BR_MISP_EXEC.ALL_BRANCHES\\,umask\\=0xE4@)",
+        "MetricExpr": "tma_info_inst_mix_instructions / (UOPS_RETIRED.RETIRE_SLOTS / UOPS_ISSUED.ANY * BR_MISP_EXEC.INDIRECT)",
         "MetricGroup": "Bad;BrMispredicts",
         "MetricName": "tma_info_bad_spec_ipmisp_indirect",
         "MetricThreshold": "tma_info_bad_spec_ipmisp_indirect < 1e3"
         "MetricName": "tma_info_core_coreipc"
     },
     {
-        "BriefDescription": "Instruction-Level-Parallelism (average number of uops executed when there is execution) per-core",
+        "BriefDescription": "Instruction-Level-Parallelism (average number of uops executed when there is execution) per thread (logical-processor)",
         "MetricExpr": "(UOPS_EXECUTED.CORE / 2 / (cpu@UOPS_EXECUTED.CORE\\,cmask\\=1@ / 2 if #SMT_on else cpu@UOPS_EXECUTED.CORE\\,cmask\\=1@) if #SMT_on else UOPS_EXECUTED.CORE / (cpu@UOPS_EXECUTED.CORE\\,cmask\\=1@ / 2 if #SMT_on else cpu@UOPS_EXECUTED.CORE\\,cmask\\=1@))",
         "MetricGroup": "Backend;Cor;Pipeline;PortsUtil",
         "MetricName": "tma_info_core_ilp"
     },
     {
         "BriefDescription": "Average per-core data fill bandwidth to the L1 data cache [GB / sec]",
-        "MetricExpr": "64 * L1D.REPLACEMENT / 1e9 / duration_time",
+        "MetricExpr": "tma_info_memory_l1d_cache_fill_bw",
         "MetricGroup": "Mem;MemoryBW",
-        "MetricName": "tma_info_memory_core_l1d_cache_fill_bw"
+        "MetricName": "tma_info_memory_core_l1d_cache_fill_bw_2t"
     },
     {
         "BriefDescription": "Average per-core data fill bandwidth to the L2 cache [GB / sec]",
-        "MetricExpr": "64 * L2_LINES_IN.ALL / 1e9 / duration_time",
+        "MetricExpr": "tma_info_memory_l2_cache_fill_bw",
         "MetricGroup": "Mem;MemoryBW",
-        "MetricName": "tma_info_memory_core_l2_cache_fill_bw"
+        "MetricName": "tma_info_memory_core_l2_cache_fill_bw_2t"
     },
     {
         "BriefDescription": "Average per-core data fill bandwidth to the L3 cache [GB / sec]",
-        "MetricExpr": "64 * LONGEST_LAT_CACHE.MISS / 1e9 / duration_time",
+        "MetricExpr": "tma_info_memory_l3_cache_fill_bw",
+        "MetricGroup": "Mem;MemoryBW",
+        "MetricName": "tma_info_memory_core_l3_cache_fill_bw_2t"
+    },
+    {
+        "BriefDescription": "",
+        "MetricExpr": "64 * L1D.REPLACEMENT / 1e9 / duration_time",
         "MetricGroup": "Mem;MemoryBW",
-        "MetricName": "tma_info_memory_core_l3_cache_fill_bw"
+        "MetricName": "tma_info_memory_l1d_cache_fill_bw"
     },
     {
         "BriefDescription": "L1 cache true misses per kilo instruction for retired demand loads",
         "MetricExpr": "1e3 * MEM_LOAD_UOPS_RETIRED.L1_MISS / INST_RETIRED.ANY",
-        "MetricGroup": "CacheMisses;Mem",
+        "MetricGroup": "CacheHits;Mem",
         "MetricName": "tma_info_memory_l1mpki"
     },
+    {
+        "BriefDescription": "",
+        "MetricExpr": "64 * L2_LINES_IN.ALL / 1e9 / duration_time",
+        "MetricGroup": "Mem;MemoryBW",
+        "MetricName": "tma_info_memory_l2_cache_fill_bw"
+    },
     {
         "BriefDescription": "L2 cache true misses per kilo instruction for retired demand loads",
         "MetricExpr": "1e3 * MEM_LOAD_UOPS_RETIRED.L2_MISS / INST_RETIRED.ANY",
-        "MetricGroup": "Backend;CacheMisses;Mem",
+        "MetricGroup": "Backend;CacheHits;Mem",
         "MetricName": "tma_info_memory_l2mpki"
     },
+    {
+        "BriefDescription": "",
+        "MetricExpr": "64 * LONGEST_LAT_CACHE.MISS / 1e9 / duration_time",
+        "MetricGroup": "Mem;MemoryBW",
+        "MetricName": "tma_info_memory_l3_cache_fill_bw"
+    },
     {
         "BriefDescription": "L3 cache true misses per kilo instruction for retired demand loads",
         "MetricExpr": "1e3 * MEM_LOAD_UOPS_RETIRED.L3_MISS / INST_RETIRED.ANY",
-        "MetricGroup": "CacheMisses;Mem",
+        "MetricGroup": "Mem",
         "MetricName": "tma_info_memory_l3mpki"
     },
-    {
-        "BriefDescription": "Actual Average Latency for L1 data-cache miss demand load operations (in core cycles)",
-        "MetricConstraint": "NO_GROUP_EVENTS",
-        "MetricExpr": "L1D_PEND_MISS.PENDING / (MEM_LOAD_UOPS_RETIRED.L1_MISS + MEM_LOAD_UOPS_RETIRED.HIT_LFB)",
-        "MetricGroup": "Mem;MemoryBound;MemoryLat",
-        "MetricName": "tma_info_memory_load_miss_real_latency"
-    },
-    {
-        "BriefDescription": "Memory-Level-Parallelism (average number of L1 miss demand load when there is at least one such miss",
-        "MetricConstraint": "NO_GROUP_EVENTS",
-        "MetricExpr": "L1D_PEND_MISS.PENDING / L1D_PEND_MISS.PENDING_CYCLES",
-        "MetricGroup": "Mem;MemoryBW;MemoryBound",
-        "MetricName": "tma_info_memory_mlp",
-        "PublicDescription": "Memory-Level-Parallelism (average number of L1 miss demand load when there is at least one such miss. Per-Logical Processor)"
-    },
     {
         "BriefDescription": "Average Parallel L2 cache miss data reads",
         "MetricExpr": "OFFCORE_REQUESTS_OUTSTANDING.ALL_DATA_RD / OFFCORE_REQUESTS_OUTSTANDING.CYCLES_WITH_DATA_RD",
         "MetricGroup": "Memory_BW;Offcore",
-        "MetricName": "tma_info_memory_oro_data_l2_mlp"
+        "MetricName": "tma_info_memory_latency_data_l2_mlp"
     },
     {
         "BriefDescription": "Average Latency for L2 cache miss demand Loads",
         "MetricExpr": "OFFCORE_REQUESTS_OUTSTANDING.DEMAND_DATA_RD / OFFCORE_REQUESTS.DEMAND_DATA_RD",
         "MetricGroup": "Memory_Lat;Offcore",
-        "MetricName": "tma_info_memory_oro_load_l2_miss_latency"
+        "MetricName": "tma_info_memory_latency_load_l2_miss_latency"
     },
     {
         "BriefDescription": "Average Parallel L2 cache miss demand Loads",
         "MetricExpr": "OFFCORE_REQUESTS_OUTSTANDING.DEMAND_DATA_RD / OFFCORE_REQUESTS_OUTSTANDING.CYCLES_WITH_DEMAND_DATA_RD",
         "MetricGroup": "Memory_BW;Offcore",
-        "MetricName": "tma_info_memory_oro_load_l2_mlp"
-    },
-    {
-        "BriefDescription": "Average per-thread data fill bandwidth to the L1 data cache [GB / sec]",
-        "MetricExpr": "tma_info_memory_core_l1d_cache_fill_bw",
-        "MetricGroup": "Mem;MemoryBW",
-        "MetricName": "tma_info_memory_thread_l1d_cache_fill_bw_1t"
-    },
-    {
-        "BriefDescription": "Average per-thread data fill bandwidth to the L2 cache [GB / sec]",
-        "MetricExpr": "tma_info_memory_core_l2_cache_fill_bw",
-        "MetricGroup": "Mem;MemoryBW",
-        "MetricName": "tma_info_memory_thread_l2_cache_fill_bw_1t"
+        "MetricName": "tma_info_memory_latency_load_l2_mlp"
     },
     {
-        "BriefDescription": "Average per-thread data access bandwidth to the L3 cache [GB / sec]",
-        "MetricExpr": "0",
-        "MetricGroup": "Mem;MemoryBW;Offcore",
-        "MetricName": "tma_info_memory_thread_l3_cache_access_bw_1t"
+        "BriefDescription": "Actual Average Latency for L1 data-cache miss demand load operations (in core cycles)",
+        "MetricConstraint": "NO_GROUP_EVENTS",
+        "MetricExpr": "L1D_PEND_MISS.PENDING / (MEM_LOAD_UOPS_RETIRED.L1_MISS + MEM_LOAD_UOPS_RETIRED.HIT_LFB)",
+        "MetricGroup": "Mem;MemoryBound;MemoryLat",
+        "MetricName": "tma_info_memory_load_miss_real_latency"
     },
     {
-        "BriefDescription": "Average per-thread data fill bandwidth to the L3 cache [GB / sec]",
-        "MetricExpr": "tma_info_memory_core_l3_cache_fill_bw",
-        "MetricGroup": "Mem;MemoryBW",
-        "MetricName": "tma_info_memory_thread_l3_cache_fill_bw_1t"
+        "BriefDescription": "Memory-Level-Parallelism (average number of L1 miss demand load when there is at least one such miss",
+        "MetricConstraint": "NO_GROUP_EVENTS",
+        "MetricExpr": "L1D_PEND_MISS.PENDING / L1D_PEND_MISS.PENDING_CYCLES",
+        "MetricGroup": "Mem;MemoryBW;MemoryBound",
+        "MetricName": "tma_info_memory_mlp",
+        "PublicDescription": "Memory-Level-Parallelism (average number of L1 miss demand load when there is at least one such miss. Per-Logical Processor)"
     },
     {
         "BriefDescription": "Utilization of the core's Page Walker(s) serving STLB misses triggered by instruction/Load/Store accesses",
         "MetricName": "tma_info_pipeline_retire"
     },
     {
-        "BriefDescription": "Measured Average Frequency for unhalted processors [GHz]",
+        "BriefDescription": "Measured Average Core Frequency for unhalted processors [GHz]",
         "MetricExpr": "tma_info_system_turbo_utilization * TSC / 1e9 / duration_time",
         "MetricGroup": "Power;Summary",
-        "MetricName": "tma_info_system_average_frequency"
+        "MetricName": "tma_info_system_core_frequency"
     },
     {
-        "BriefDescription": "Average CPU Utilization",
+        "BriefDescription": "Average CPU Utilization (percentage)",
         "MetricExpr": "CPU_CLK_UNHALTED.REF_TSC / TSC",
         "MetricGroup": "HPC;Summary",
         "MetricName": "tma_info_system_cpu_utilization"
     },
+    {
+        "BriefDescription": "Average number of utilized CPUs",
+        "MetricExpr": "#num_cpus_online * tma_info_system_cpu_utilization",
+        "MetricGroup": "Summary",
+        "MetricName": "tma_info_system_cpus_utilized"
+    },
     {
         "BriefDescription": "Average external Memory Bandwidth Use for reads and writes [GB / sec]",
         "MetricExpr": "64 * (UNC_ARB_TRK_REQUESTS.ALL + UNC_ARB_COH_TRK_REQUESTS.ALL) / 1e6 / duration_time / 1e3",
-        "MetricGroup": "HPC;Mem;MemoryBW;SoC;tma_issueBW",
+        "MetricGroup": "HPC;MemOffcore;MemoryBW;SoC;tma_issueBW",
         "MetricName": "tma_info_system_dram_bw_use",
         "PublicDescription": "Average external Memory Bandwidth Use for reads and writes [GB / sec]. Related metrics: tma_fb_full, tma_mem_bandwidth, tma_sq_full"
     },
         "MetricName": "tma_info_system_kernel_utilization",
         "MetricThreshold": "tma_info_system_kernel_utilization > 0.05"
     },
-    {
-        "BriefDescription": "Average number of parallel requests to external memory",
-        "MetricExpr": "UNC_ARB_TRK_OCCUPANCY.ALL / UNC_ARB_TRK_OCCUPANCY.CYCLES_WITH_ANY_REQUEST",
-        "MetricGroup": "Mem;SoC",
-        "MetricName": "tma_info_system_mem_parallel_requests",
-        "PublicDescription": "Average number of parallel requests to external memory. Accounts for all requests"
-    },
-    {
-        "BriefDescription": "Average latency of all requests to external memory (in Uncore cycles)",
-        "MetricExpr": "UNC_ARB_TRK_OCCUPANCY.ALL / UNC_ARB_TRK_REQUESTS.ALL",
-        "MetricGroup": "Mem;SoC",
-        "MetricName": "tma_info_system_mem_request_latency"
-    },
     {
         "BriefDescription": "Fraction of cycles where both hardware Logical Processors were active",
         "MetricExpr": "(1 - CPU_CLK_UNHALTED.ONE_THREAD_ACTIVE / (CPU_CLK_UNHALTED.REF_XCLK_ANY / 2) if #SMT_on else 0)",
     {
         "BriefDescription": "This metric represents fraction of cycles the CPU was stalled due to Instruction TLB (ITLB) misses",
         "MetricExpr": "(14 * ITLB_MISSES.STLB_HIT + ITLB_MISSES.WALK_DURATION) / tma_info_thread_clks",
-        "MetricGroup": "BigFoot;FetchLat;MemoryTLB;TopdownL3;tma_L3_group;tma_fetch_latency_group",
+        "MetricGroup": "BigFootprint;FetchLat;MemoryTLB;TopdownL3;tma_L3_group;tma_fetch_latency_group",
         "MetricName": "tma_itlb_misses",
         "MetricThreshold": "tma_itlb_misses > 0.05 & (tma_fetch_latency > 0.1 & tma_frontend_bound > 0.15)",
         "PublicDescription": "This metric represents fraction of cycles the CPU was stalled due to Instruction TLB (ITLB) misses. Sample with: ITLB_MISSES.WALK_COMPLETED",
     {
         "BriefDescription": "This metric estimates how often the CPU was stalled without loads missing the L1 data cache",
         "MetricExpr": "max((min(CPU_CLK_UNHALTED.THREAD, CYCLE_ACTIVITY.STALLS_LDM_PENDING) - CYCLE_ACTIVITY.STALLS_L1D_PENDING) / tma_info_thread_clks, 0)",
-        "MetricGroup": "CacheMisses;MemoryBound;TmaL3mem;TopdownL3;tma_L3_group;tma_issueL1;tma_issueMC;tma_memory_bound_group",
+        "MetricGroup": "CacheHits;MemoryBound;TmaL3mem;TopdownL3;tma_L3_group;tma_issueL1;tma_issueMC;tma_memory_bound_group",
         "MetricName": "tma_l1_bound",
         "MetricThreshold": "tma_l1_bound > 0.1 & (tma_memory_bound > 0.2 & tma_backend_bound > 0.2)",
         "PublicDescription": "This metric estimates how often the CPU was stalled without loads missing the L1 data cache.  The L1 data cache typically has the shortest latency.  However; in certain cases like loads blocked on older stores; a load might suffer due to high latency even though it is being satisfied by the L1. Another example is loads who miss in the TLB. These cases are characterized by execution unit stalls; while some non-completed demand load lives in the machine without having that demand load missing the L1 cache. Sample with: MEM_LOAD_UOPS_RETIRED.L1_HIT_PS;MEM_LOAD_UOPS_RETIRED.HIT_LFB_PS. Related metrics: tma_clears_resteers, tma_machine_clears, tma_microcode_sequencer, tma_ms_switches, tma_ports_utilized_1",
     {
         "BriefDescription": "This metric estimates how often the CPU was stalled due to L2 cache accesses by loads",
         "MetricExpr": "(CYCLE_ACTIVITY.STALLS_L1D_PENDING - CYCLE_ACTIVITY.STALLS_L2_PENDING) / tma_info_thread_clks",
-        "MetricGroup": "CacheMisses;MemoryBound;TmaL3mem;TopdownL3;tma_L3_group;tma_memory_bound_group",
+        "MetricGroup": "CacheHits;MemoryBound;TmaL3mem;TopdownL3;tma_L3_group;tma_memory_bound_group",
         "MetricName": "tma_l2_bound",
         "MetricThreshold": "tma_l2_bound > 0.05 & (tma_memory_bound > 0.2 & tma_backend_bound > 0.2)",
         "PublicDescription": "This metric estimates how often the CPU was stalled due to L2 cache accesses by loads.  Avoiding cache misses (i.e. L1 misses/L2 hits) can improve the latency and increase performance. Sample with: MEM_LOAD_UOPS_RETIRED.L2_HIT_PS",
         "BriefDescription": "This metric estimates how often the CPU was stalled due to loads accesses to L3 cache or contended with a sibling Core",
         "MetricConstraint": "NO_GROUP_EVENTS_SMT",
         "MetricExpr": "MEM_LOAD_UOPS_RETIRED.L3_HIT / (MEM_LOAD_UOPS_RETIRED.L3_HIT + 7 * MEM_LOAD_UOPS_RETIRED.L3_MISS) * CYCLE_ACTIVITY.STALLS_L2_PENDING / tma_info_thread_clks",
-        "MetricGroup": "CacheMisses;MemoryBound;TmaL3mem;TopdownL3;tma_L3_group;tma_memory_bound_group",
+        "MetricGroup": "CacheHits;MemoryBound;TmaL3mem;TopdownL3;tma_L3_group;tma_memory_bound_group",
         "MetricName": "tma_l3_bound",
         "MetricThreshold": "tma_l3_bound > 0.05 & (tma_memory_bound > 0.2 & tma_backend_bound > 0.2)",
         "PublicDescription": "This metric estimates how often the CPU was stalled due to loads accesses to L3 cache or contended with a sibling Core.  Avoiding cache misses (i.e. L2 misses/L3 hits) can improve the latency and increase performance. Sample with: MEM_LOAD_UOPS_RETIRED.L3_HIT_PS",
         "ScaleUnit": "100%"
     },
     {
-        "BriefDescription": "This metric represents fraction of cycles with demand load accesses that hit the L3 cache under unloaded scenarios (possibly L3 latency limited)",
+        "BriefDescription": "This metric estimates fraction of cycles with demand load accesses that hit the L3 cache under unloaded scenarios (possibly L3 latency limited)",
         "MetricConstraint": "NO_GROUP_EVENTS",
         "MetricExpr": "29 * (MEM_LOAD_UOPS_RETIRED.L3_HIT * (1 + MEM_LOAD_UOPS_RETIRED.HIT_LFB / (MEM_LOAD_UOPS_RETIRED.L2_HIT + MEM_LOAD_UOPS_RETIRED.L3_HIT + MEM_LOAD_UOPS_L3_HIT_RETIRED.XSNP_HIT + MEM_LOAD_UOPS_L3_HIT_RETIRED.XSNP_HITM + MEM_LOAD_UOPS_L3_HIT_RETIRED.XSNP_MISS + MEM_LOAD_UOPS_RETIRED.L3_MISS))) / tma_info_thread_clks",
         "MetricGroup": "MemoryLat;TopdownL4;tma_L4_group;tma_issueLat;tma_l3_bound_group",
         "MetricName": "tma_l3_hit_latency",
         "MetricThreshold": "tma_l3_hit_latency > 0.1 & (tma_l3_bound > 0.05 & (tma_memory_bound > 0.2 & tma_backend_bound > 0.2))",
-        "PublicDescription": "This metric represents fraction of cycles with demand load accesses that hit the L3 cache under unloaded scenarios (possibly L3 latency limited).  Avoiding private cache misses (i.e. L2 misses/L3 hits) will improve the latency; reduce contention with sibling physical cores and increase performance.  Note the value of this node may overlap with its siblings. Sample with: MEM_LOAD_UOPS_RETIRED.L3_HIT_PS. Related metrics: tma_mem_latency",
+        "PublicDescription": "This metric estimates fraction of cycles with demand load accesses that hit the L3 cache under unloaded scenarios (possibly L3 latency limited).  Avoiding private cache misses (i.e. L2 misses/L3 hits) will improve the latency; reduce contention with sibling physical cores and increase performance.  Note the value of this node may overlap with its siblings. Sample with: MEM_LOAD_UOPS_RETIRED.L3_HIT_PS. Related metrics: tma_mem_latency",
         "ScaleUnit": "100%"
     },
     {
         "MetricName": "tma_light_operations",
         "MetricThreshold": "tma_light_operations > 0.6",
         "MetricgroupNoGroup": "TopdownL2",
-        "PublicDescription": "This metric represents fraction of slots where the CPU was retiring light-weight operations -- instructions that require no more than one uop (micro-operation). This correlates with total number of instructions used by the program. A uops-per-instruction (see UopPI metric) ratio of 1 or less should be expected for decently optimized software running on Intel Core/Xeon products. While this often indicates efficient X86 instructions were executed; high value does not necessarily mean better performance cannot be achieved. Sample with: INST_RETIRED.PREC_DIST",
+        "PublicDescription": "This metric represents fraction of slots where the CPU was retiring light-weight operations -- instructions that require no more than one uop (micro-operation). This correlates with total number of instructions used by the program. A uops-per-instruction (see UopPI metric) ratio of 1 or less should be expected for decently optimized code running on Intel Core/Xeon products. While this often indicates efficient X86 instructions were executed; high value does not necessarily mean better performance cannot be achieved. ([ICL+] Note this may undercount due to approximation using indirect events; [ADL+] .). Sample with: INST_RETIRED.PREC_DIST",
         "ScaleUnit": "100%"
     },
     {
         "ScaleUnit": "100%"
     },
     {
-        "BriefDescription": "This metric estimates fraction of cycles where the core's performance was likely hurt due to approaching bandwidth limits of external memory (DRAM)",
+        "BriefDescription": "This metric estimates fraction of cycles where the core's performance was likely hurt due to approaching bandwidth limits of external memory - DRAM ([SPR-HBM] and/or HBM)",
         "MetricExpr": "min(CPU_CLK_UNHALTED.THREAD, cpu@OFFCORE_REQUESTS_OUTSTANDING.ALL_DATA_RD\\,cmask\\=6@) / tma_info_thread_clks",
         "MetricGroup": "MemoryBW;Offcore;TopdownL4;tma_L4_group;tma_dram_bound_group;tma_issueBW",
         "MetricName": "tma_mem_bandwidth",
         "MetricThreshold": "tma_mem_bandwidth > 0.2 & (tma_dram_bound > 0.1 & (tma_memory_bound > 0.2 & tma_backend_bound > 0.2))",
-        "PublicDescription": "This metric estimates fraction of cycles where the core's performance was likely hurt due to approaching bandwidth limits of external memory (DRAM).  The underlying heuristic assumes that a similar off-core traffic is generated by all IA cores. This metric does not aggregate non-data-read requests by this logical processor; requests from other IA Logical Processors/Physical Cores/sockets; or other non-IA devices like GPU; hence the maximum external memory bandwidth limits may or may not be approached when this metric is flagged (see Uncore counters for that). Related metrics: tma_fb_full, tma_info_system_dram_bw_use, tma_sq_full",
+        "PublicDescription": "This metric estimates fraction of cycles where the core's performance was likely hurt due to approaching bandwidth limits of external memory - DRAM ([SPR-HBM] and/or HBM).  The underlying heuristic assumes that a similar off-core traffic is generated by all IA cores. This metric does not aggregate non-data-read requests by this logical processor; requests from other IA Logical Processors/Physical Cores/sockets; or other non-IA devices like GPU; hence the maximum external memory bandwidth limits may or may not be approached when this metric is flagged (see Uncore counters for that). Related metrics: tma_fb_full, tma_info_system_dram_bw_use, tma_sq_full",
         "ScaleUnit": "100%"
     },
     {
-        "BriefDescription": "This metric estimates fraction of cycles where the performance was likely hurt due to latency from external memory (DRAM)",
+        "BriefDescription": "This metric estimates fraction of cycles where the performance was likely hurt due to latency from external memory - DRAM ([SPR-HBM] and/or HBM)",
         "MetricExpr": "min(CPU_CLK_UNHALTED.THREAD, OFFCORE_REQUESTS_OUTSTANDING.CYCLES_WITH_DATA_RD) / tma_info_thread_clks - tma_mem_bandwidth",
         "MetricGroup": "MemoryLat;Offcore;TopdownL4;tma_L4_group;tma_dram_bound_group;tma_issueLat",
         "MetricName": "tma_mem_latency",
         "MetricThreshold": "tma_mem_latency > 0.1 & (tma_dram_bound > 0.1 & (tma_memory_bound > 0.2 & tma_backend_bound > 0.2))",
-        "PublicDescription": "This metric estimates fraction of cycles where the performance was likely hurt due to latency from external memory (DRAM).  This metric does not aggregate requests from other Logical Processors/Physical Cores/sockets (see Uncore counters for that). Related metrics: tma_l3_hit_latency",
+        "PublicDescription": "This metric estimates fraction of cycles where the performance was likely hurt due to latency from external memory - DRAM ([SPR-HBM] and/or HBM).  This metric does not aggregate requests from other Logical Processors/Physical Cores/sockets (see Uncore counters for that). Related metrics: tma_l3_hit_latency",
         "ScaleUnit": "100%"
     },
     {
         "MetricExpr": "(IDQ.ALL_MITE_CYCLES_ANY_UOPS - IDQ.ALL_MITE_CYCLES_4_UOPS) / tma_info_core_core_clks / 2",
         "MetricGroup": "DSBmiss;FetchBW;TopdownL3;tma_L3_group;tma_fetch_bandwidth_group",
         "MetricName": "tma_mite",
-        "MetricThreshold": "tma_mite > 0.1 & (tma_fetch_bandwidth > 0.1 & tma_frontend_bound > 0.15 & tma_info_thread_ipc / 4 > 0.35)",
+        "MetricThreshold": "tma_mite > 0.1 & tma_fetch_bandwidth > 0.2",
         "PublicDescription": "This metric represents Core fraction of cycles in which CPU was likely limited due to the MITE pipeline (the legacy decode pipeline). This pipeline is used for code that was not pre-cached in the DSB or LSD. For example; inefficiencies due to asymmetric decoders; use of long immediate or LCP can manifest as MITE fetch bandwidth bottleneck.",
         "ScaleUnit": "100%"
     },
         "MetricGroup": "Compute;TopdownL6;tma_L6_group;tma_alu_op_utilization_group;tma_issue2P",
         "MetricName": "tma_port_0",
         "MetricThreshold": "tma_port_0 > 0.6",
-        "PublicDescription": "This metric represents Core fraction of cycles CPU dispatched uops on execution port 0 ([SNB+] ALU; [HSW+] ALU and 2nd branch). Sample with: UOPS_DISPATCHED_PORT.PORT_0. Related metrics: tma_fp_scalar, tma_fp_vector, tma_fp_vector_512b, tma_port_1, tma_port_5, tma_port_6, tma_ports_utilized_2",
+        "PublicDescription": "This metric represents Core fraction of cycles CPU dispatched uops on execution port 0 ([SNB+] ALU; [HSW+] ALU and 2nd branch). Sample with: UOPS_DISPATCHED_PORT.PORT_0. Related metrics: tma_fp_scalar, tma_fp_vector, tma_fp_vector_128b, tma_fp_vector_256b, tma_fp_vector_512b, tma_port_1, tma_port_5, tma_port_6, tma_ports_utilized_2",
         "ScaleUnit": "100%"
     },
     {
         "MetricGroup": "TopdownL6;tma_L6_group;tma_alu_op_utilization_group;tma_issue2P",
         "MetricName": "tma_port_1",
         "MetricThreshold": "tma_port_1 > 0.6",
-        "PublicDescription": "This metric represents Core fraction of cycles CPU dispatched uops on execution port 1 (ALU). Sample with: UOPS_DISPATCHED_PORT.PORT_1. Related metrics: tma_fp_scalar, tma_fp_vector, tma_fp_vector_512b, tma_port_0, tma_port_5, tma_port_6, tma_ports_utilized_2",
+        "PublicDescription": "This metric represents Core fraction of cycles CPU dispatched uops on execution port 1 (ALU). Sample with: UOPS_DISPATCHED_PORT.PORT_1. Related metrics: tma_fp_scalar, tma_fp_vector, tma_fp_vector_128b, tma_fp_vector_256b, tma_fp_vector_512b, tma_port_0, tma_port_5, tma_port_6, tma_ports_utilized_2",
         "ScaleUnit": "100%"
     },
     {
         "MetricGroup": "TopdownL6;tma_L6_group;tma_alu_op_utilization_group;tma_issue2P",
         "MetricName": "tma_port_5",
         "MetricThreshold": "tma_port_5 > 0.6",
-        "PublicDescription": "This metric represents Core fraction of cycles CPU dispatched uops on execution port 5 ([SNB+] Branches and ALU; [HSW+] ALU). Sample with: UOPS_DISPATCHED.PORT_5. Related metrics: tma_fp_scalar, tma_fp_vector, tma_fp_vector_512b, tma_port_0, tma_port_1, tma_port_6, tma_ports_utilized_2",
+        "PublicDescription": "This metric represents Core fraction of cycles CPU dispatched uops on execution port 5 ([SNB+] Branches and ALU; [HSW+] ALU). Sample with: UOPS_DISPATCHED.PORT_5. Related metrics: tma_fp_scalar, tma_fp_vector, tma_fp_vector_128b, tma_fp_vector_256b, tma_fp_vector_512b, tma_port_0, tma_port_1, tma_port_6, tma_ports_utilized_2",
         "ScaleUnit": "100%"
     },
     {
-        "BriefDescription": "This metric represents Core fraction of cycles CPU dispatched uops on execution port 6 ([HSW+]Primary Branch and simple ALU)",
+        "BriefDescription": "This metric represents Core fraction of cycles CPU dispatched uops on execution port 6 ([HSW+] Primary Branch and simple ALU)",
         "MetricExpr": "UOPS_DISPATCHED_PORT.PORT_6 / tma_info_core_core_clks",
         "MetricGroup": "TopdownL6;tma_L6_group;tma_alu_op_utilization_group;tma_issue2P",
         "MetricName": "tma_port_6",
         "MetricThreshold": "tma_port_6 > 0.6",
-        "PublicDescription": "This metric represents Core fraction of cycles CPU dispatched uops on execution port 6 ([HSW+]Primary Branch and simple ALU). Sample with: UOPS_DISPATCHED_PORT.PORT_6. Related metrics: tma_fp_scalar, tma_fp_vector, tma_fp_vector_512b, tma_port_0, tma_port_1, tma_port_5, tma_ports_utilized_2",
+        "PublicDescription": "This metric represents Core fraction of cycles CPU dispatched uops on execution port 6 ([HSW+] Primary Branch and simple ALU). Sample with: UOPS_DISPATCHED_PORT.PORT_6. Related metrics: tma_fp_scalar, tma_fp_vector, tma_fp_vector_128b, tma_fp_vector_256b, tma_fp_vector_512b, tma_port_0, tma_port_1, tma_port_5, tma_ports_utilized_2",
         "ScaleUnit": "100%"
     },
     {
         "MetricGroup": "PortsUtil;TopdownL4;tma_L4_group;tma_issue2P;tma_ports_utilization_group",
         "MetricName": "tma_ports_utilized_2",
         "MetricThreshold": "tma_ports_utilized_2 > 0.15 & (tma_ports_utilization > 0.15 & (tma_core_bound > 0.1 & tma_backend_bound > 0.2))",
-        "PublicDescription": "This metric represents fraction of cycles CPU executed total of 2 uops per cycle on all execution ports (Logical Processor cycles since ICL, Physical Core cycles otherwise).  Loop Vectorization -most compilers feature auto-Vectorization options today- reduces pressure on the execution ports as multiple elements are calculated with same uop. Related metrics: tma_fp_scalar, tma_fp_vector, tma_fp_vector_512b, tma_port_0, tma_port_1, tma_port_5, tma_port_6",
+        "PublicDescription": "This metric represents fraction of cycles CPU executed total of 2 uops per cycle on all execution ports (Logical Processor cycles since ICL, Physical Core cycles otherwise).  Loop Vectorization -most compilers feature auto-Vectorization options today- reduces pressure on the execution ports as multiple elements are calculated with same uop. Related metrics: tma_fp_scalar, tma_fp_vector, tma_fp_vector_128b, tma_fp_vector_256b, tma_fp_vector_512b, tma_port_0, tma_port_1, tma_port_5, tma_port_6",
         "ScaleUnit": "100%"
     },
     {
         "MetricExpr": "(cpu@UOPS_EXECUTED.CORE\\,cmask\\=3@ / 2 if #SMT_on else cpu@UOPS_EXECUTED.CORE\\,cmask\\=3@) / tma_info_core_core_clks",
         "MetricGroup": "PortsUtil;TopdownL4;tma_L4_group;tma_ports_utilization_group",
         "MetricName": "tma_ports_utilized_3m",
-        "MetricThreshold": "tma_ports_utilized_3m > 0.7 & (tma_ports_utilization > 0.15 & (tma_core_bound > 0.1 & tma_backend_bound > 0.2))",
+        "MetricThreshold": "tma_ports_utilized_3m > 0.4 & (tma_ports_utilization > 0.15 & (tma_core_bound > 0.1 & tma_backend_bound > 0.2))",
         "ScaleUnit": "100%"
     },
     {
         "MetricName": "tma_store_op_utilization",
         "MetricThreshold": "tma_store_op_utilization > 0.6",
         "ScaleUnit": "100%"
-    },
-    {
-        "BriefDescription": "This metric serves as an approximation of legacy x87 usage",
-        "MetricExpr": "INST_RETIRED.X87 * tma_info_thread_uoppi / UOPS_RETIRED.RETIRE_SLOTS",
-        "MetricGroup": "Compute;TopdownL4;tma_L4_group;tma_fp_arith_group",
-        "MetricName": "tma_x87_use",
-        "MetricThreshold": "tma_x87_use > 0.1",
-        "PublicDescription": "This metric serves as an approximation of legacy x87 usage. It accounts for instructions beyond X87 FP arithmetic operations; hence may be used as a thermometer to avoid X87 high usage and preferably upgrade to modern ISA. See Tip under Tuning Hint.",
-        "ScaleUnit": "100%"
     }
 ]
index 2fc25e22a42a1fa66fae42d5eb0889190c6042de..6ba0ea6e3fa6657f1f4df0cdada1f5f878ccc6ce 100644 (file)
         "BriefDescription": "Number of times an RTM execution aborted due to any reasons (multiple categories may count as one).",
         "EventCode": "0xc9",
         "EventName": "RTM_RETIRED.ABORTED",
-        "PEBS": "1",
+        "PEBS": "2",
         "SampleAfterValue": "2000003",
         "UMask": "0x4"
     },
index f6a0258e3241236b97c48a6098503f40bd12e5dd..8c808347f6da4e8382af6a780730636fadcdb914 100644 (file)
@@ -2,10 +2,10 @@
     "Backend": "Grouping from Top-down Microarchitecture Analysis Metrics spreadsheet",
     "Bad": "Grouping from Top-down Microarchitecture Analysis Metrics spreadsheet",
     "BadSpec": "Grouping from Top-down Microarchitecture Analysis Metrics spreadsheet",
-    "BigFoot": "Grouping from Top-down Microarchitecture Analysis Metrics spreadsheet",
+    "BigFootprint": "Grouping from Top-down Microarchitecture Analysis Metrics spreadsheet",
     "BrMispredicts": "Grouping from Top-down Microarchitecture Analysis Metrics spreadsheet",
     "Branches": "Grouping from Top-down Microarchitecture Analysis Metrics spreadsheet",
-    "CacheMisses": "Grouping from Top-down Microarchitecture Analysis Metrics spreadsheet",
+    "CacheHits": "Grouping from Top-down Microarchitecture Analysis Metrics spreadsheet",
     "Compute": "Grouping from Top-down Microarchitecture Analysis Metrics spreadsheet",
     "Cor": "Grouping from Top-down Microarchitecture Analysis Metrics spreadsheet",
     "DSB": "Grouping from Top-down Microarchitecture Analysis Metrics spreadsheet",
@@ -24,7 +24,9 @@
     "L2Evicts": "Grouping from Top-down Microarchitecture Analysis Metrics spreadsheet",
     "LSD": "Grouping from Top-down Microarchitecture Analysis Metrics spreadsheet",
     "MachineClears": "Grouping from Top-down Microarchitecture Analysis Metrics spreadsheet",
+    "Machine_Clears": "Grouping from Top-down Microarchitecture Analysis Metrics spreadsheet",
     "Mem": "Grouping from Top-down Microarchitecture Analysis Metrics spreadsheet",
+    "MemOffcore": "Grouping from Top-down Microarchitecture Analysis Metrics spreadsheet",
     "MemoryBW": "Grouping from Top-down Microarchitecture Analysis Metrics spreadsheet",
     "MemoryBound": "Grouping from Top-down Microarchitecture Analysis Metrics spreadsheet",
     "MemoryLat": "Grouping from Top-down Microarchitecture Analysis Metrics spreadsheet",
@@ -94,6 +96,7 @@
     "tma_l3_bound_group": "Metrics contributing to tma_l3_bound category",
     "tma_light_operations_group": "Metrics contributing to tma_light_operations category",
     "tma_load_op_utilization_group": "Metrics contributing to tma_load_op_utilization category",
+    "tma_machine_clears_group": "Metrics contributing to tma_machine_clears category",
     "tma_mem_latency_group": "Metrics contributing to tma_mem_latency category",
     "tma_memory_bound_group": "Metrics contributing to tma_memory_bound category",
     "tma_microcode_sequencer_group": "Metrics contributing to tma_microcode_sequencer category",
index 5f451948c8934e2fcac34fbeeec89aae7fd45d18..21e2cb5e31783d8172a9f80c29a3002fcb655feb 100644 (file)
         "MetricExpr": "(UOPS_DISPATCHED_PORT.PORT_0 + UOPS_DISPATCHED_PORT.PORT_1 + UOPS_DISPATCHED_PORT.PORT_5 + UOPS_DISPATCHED_PORT.PORT_6) / tma_info_thread_slots",
         "MetricGroup": "TopdownL5;tma_L5_group;tma_ports_utilized_3m_group",
         "MetricName": "tma_alu_op_utilization",
-        "MetricThreshold": "tma_alu_op_utilization > 0.6",
+        "MetricThreshold": "tma_alu_op_utilization > 0.4",
         "ScaleUnit": "100%"
     },
     {
         "BriefDescription": "This metric estimates fraction of slots the CPU retired uops delivered by the Microcode_Sequencer as a result of Assists",
-        "MetricExpr": "100 * OTHER_ASSISTS.ANY_WB_ASSIST / tma_info_thread_slots",
+        "MetricExpr": "66 * OTHER_ASSISTS.ANY_WB_ASSIST / tma_info_thread_slots",
         "MetricGroup": "TopdownL4;tma_L4_group;tma_microcode_sequencer_group",
         "MetricName": "tma_assists",
         "MetricThreshold": "tma_assists > 0.1 & (tma_microcode_sequencer > 0.05 & tma_heavy_operations > 0.1)",
         "MetricExpr": "(IDQ.ALL_DSB_CYCLES_ANY_UOPS - IDQ.ALL_DSB_CYCLES_4_UOPS) / tma_info_core_core_clks / 2",
         "MetricGroup": "DSB;FetchBW;TopdownL3;tma_L3_group;tma_fetch_bandwidth_group",
         "MetricName": "tma_dsb",
-        "MetricThreshold": "tma_dsb > 0.15 & (tma_fetch_bandwidth > 0.1 & tma_frontend_bound > 0.15 & tma_info_thread_ipc / 4 > 0.35)",
+        "MetricThreshold": "tma_dsb > 0.15 & tma_fetch_bandwidth > 0.2",
         "PublicDescription": "This metric represents Core fraction of cycles in which CPU was likely limited due to DSB (decoded uop cache) fetch pipeline.  For example; inefficient utilization of the DSB cache structure or bank conflict when reading from it; are categorized here.",
         "ScaleUnit": "100%"
     },
         "MetricExpr": "tma_frontend_bound - tma_fetch_latency",
         "MetricGroup": "FetchBW;Frontend;TmaL2;TopdownL2;tma_L2_group;tma_frontend_bound_group;tma_issueFB",
         "MetricName": "tma_fetch_bandwidth",
-        "MetricThreshold": "tma_fetch_bandwidth > 0.1 & tma_frontend_bound > 0.15 & tma_info_thread_ipc / 4 > 0.35",
+        "MetricThreshold": "tma_fetch_bandwidth > 0.2",
         "MetricgroupNoGroup": "TopdownL2",
         "PublicDescription": "This metric represents fraction of slots the CPU was stalled due to Frontend bandwidth issues.  For example; inefficiencies at the instruction decoders; or restrictions for caching in the DSB (decoded uops cache) are categorized under Fetch Bandwidth. In such cases; the Frontend typically delivers suboptimal amount of uops to the Backend. Related metrics: tma_dsb_switches, tma_info_frontend_dsb_coverage, tma_info_inst_mix_iptb, tma_lcp",
         "ScaleUnit": "100%"
         "MetricName": "tma_heavy_operations",
         "MetricThreshold": "tma_heavy_operations > 0.1",
         "MetricgroupNoGroup": "TopdownL2",
-        "PublicDescription": "This metric represents fraction of slots where the CPU was retiring heavy-weight operations -- instructions that require two or more uops or micro-coded sequences. This highly-correlates with the uop length of these instructions/sequences.",
+        "PublicDescription": "This metric represents fraction of slots where the CPU was retiring heavy-weight operations -- instructions that require two or more uops or micro-coded sequences. This highly-correlates with the uop length of these instructions/sequences. ([ICL+] Note this may overcount due to approximation using indirect events; [ADL+] .)",
         "ScaleUnit": "100%"
     },
     {
         "BriefDescription": "This metric represents fraction of cycles the CPU was stalled due to instruction cache misses.",
         "MetricExpr": "ICACHE.IFDATA_STALL / tma_info_thread_clks",
-        "MetricGroup": "BigFoot;FetchLat;IcMiss;TopdownL3;tma_L3_group;tma_fetch_latency_group",
+        "MetricGroup": "BigFootprint;FetchLat;IcMiss;TopdownL3;tma_L3_group;tma_fetch_latency_group",
         "MetricName": "tma_icache_misses",
         "MetricThreshold": "tma_icache_misses > 0.05 & (tma_fetch_latency > 0.1 & tma_frontend_bound > 0.15)",
         "ScaleUnit": "100%"
     },
     {
         "BriefDescription": "Instructions per retired mispredicts for indirect CALL or JMP branches (lower number means higher occurrence rate).",
-        "MetricExpr": "tma_info_inst_mix_instructions / (UOPS_RETIRED.RETIRE_SLOTS / UOPS_ISSUED.ANY * cpu@BR_MISP_EXEC.ALL_BRANCHES\\,umask\\=0xE4@)",
+        "MetricExpr": "tma_info_inst_mix_instructions / (UOPS_RETIRED.RETIRE_SLOTS / UOPS_ISSUED.ANY * BR_MISP_EXEC.INDIRECT)",
         "MetricGroup": "Bad;BrMispredicts",
         "MetricName": "tma_info_bad_spec_ipmisp_indirect",
         "MetricThreshold": "tma_info_bad_spec_ipmisp_indirect < 1e3"
         "MetricName": "tma_info_core_coreipc"
     },
     {
-        "BriefDescription": "Instruction-Level-Parallelism (average number of uops executed when there is execution) per-core",
+        "BriefDescription": "Instruction-Level-Parallelism (average number of uops executed when there is execution) per thread (logical-processor)",
         "MetricExpr": "(UOPS_EXECUTED.CORE / 2 / (cpu@UOPS_EXECUTED.CORE\\,cmask\\=1@ / 2 if #SMT_on else cpu@UOPS_EXECUTED.CORE\\,cmask\\=1@) if #SMT_on else UOPS_EXECUTED.CORE / (cpu@UOPS_EXECUTED.CORE\\,cmask\\=1@ / 2 if #SMT_on else cpu@UOPS_EXECUTED.CORE\\,cmask\\=1@))",
         "MetricGroup": "Backend;Cor;Pipeline;PortsUtil",
         "MetricName": "tma_info_core_ilp"
     },
     {
         "BriefDescription": "Average per-core data fill bandwidth to the L1 data cache [GB / sec]",
-        "MetricExpr": "64 * L1D.REPLACEMENT / 1e9 / duration_time",
+        "MetricExpr": "tma_info_memory_l1d_cache_fill_bw",
         "MetricGroup": "Mem;MemoryBW",
-        "MetricName": "tma_info_memory_core_l1d_cache_fill_bw"
+        "MetricName": "tma_info_memory_core_l1d_cache_fill_bw_2t"
     },
     {
         "BriefDescription": "Average per-core data fill bandwidth to the L2 cache [GB / sec]",
-        "MetricExpr": "64 * L2_LINES_IN.ALL / 1e9 / duration_time",
+        "MetricExpr": "tma_info_memory_l2_cache_fill_bw",
         "MetricGroup": "Mem;MemoryBW",
-        "MetricName": "tma_info_memory_core_l2_cache_fill_bw"
+        "MetricName": "tma_info_memory_core_l2_cache_fill_bw_2t"
     },
     {
         "BriefDescription": "Average per-core data fill bandwidth to the L3 cache [GB / sec]",
-        "MetricExpr": "64 * LONGEST_LAT_CACHE.MISS / 1e9 / duration_time",
+        "MetricExpr": "tma_info_memory_l3_cache_fill_bw",
+        "MetricGroup": "Mem;MemoryBW",
+        "MetricName": "tma_info_memory_core_l3_cache_fill_bw_2t"
+    },
+    {
+        "BriefDescription": "Average Parallel L2 cache miss data reads",
+        "MetricExpr": "tma_info_memory_latency_data_l2_mlp",
+        "MetricGroup": "Memory_BW;Offcore;TopdownL1;tma_L1_group",
+        "MetricName": "tma_info_memory_data_l2_mlp",
+        "MetricgroupNoGroup": "TopdownL1"
+    },
+    {
+        "BriefDescription": "",
+        "MetricExpr": "64 * L1D.REPLACEMENT / 1e9 / duration_time",
         "MetricGroup": "Mem;MemoryBW",
-        "MetricName": "tma_info_memory_core_l3_cache_fill_bw"
+        "MetricName": "tma_info_memory_l1d_cache_fill_bw"
+    },
+    {
+        "BriefDescription": "Average per-core data fill bandwidth to the L1 data cache [GB / sec]",
+        "MetricExpr": "64 * L1D.REPLACEMENT / 1e9 / (duration_time * 1e3 / 1e3)",
+        "MetricGroup": "Mem;MemoryBW;TopdownL1;tma_L1_group",
+        "MetricName": "tma_info_memory_l1d_cache_fill_bw_2t",
+        "MetricgroupNoGroup": "TopdownL1"
     },
     {
         "BriefDescription": "L1 cache true misses per kilo instruction for retired demand loads",
         "MetricExpr": "1e3 * MEM_LOAD_UOPS_RETIRED.L1_MISS / INST_RETIRED.ANY",
-        "MetricGroup": "CacheMisses;Mem",
+        "MetricGroup": "CacheHits;Mem",
         "MetricName": "tma_info_memory_l1mpki"
     },
+    {
+        "BriefDescription": "",
+        "MetricExpr": "64 * L2_LINES_IN.ALL / 1e9 / duration_time",
+        "MetricGroup": "Mem;MemoryBW",
+        "MetricName": "tma_info_memory_l2_cache_fill_bw"
+    },
+    {
+        "BriefDescription": "Average per-core data fill bandwidth to the L2 cache [GB / sec]",
+        "MetricExpr": "64 * L2_LINES_IN.ALL / 1e9 / (duration_time * 1e3 / 1e3)",
+        "MetricGroup": "Mem;MemoryBW;TopdownL1;tma_L1_group",
+        "MetricName": "tma_info_memory_l2_cache_fill_bw_2t",
+        "MetricgroupNoGroup": "TopdownL1"
+    },
     {
         "BriefDescription": "L2 cache true misses per kilo instruction for retired demand loads",
         "MetricExpr": "1e3 * MEM_LOAD_UOPS_RETIRED.L2_MISS / INST_RETIRED.ANY",
-        "MetricGroup": "Backend;CacheMisses;Mem",
+        "MetricGroup": "Backend;CacheHits;Mem",
         "MetricName": "tma_info_memory_l2mpki"
     },
     {
-        "BriefDescription": "L3 cache true misses per kilo instruction for retired demand loads",
-        "MetricExpr": "1e3 * MEM_LOAD_UOPS_RETIRED.L3_MISS / INST_RETIRED.ANY",
-        "MetricGroup": "CacheMisses;Mem",
-        "MetricName": "tma_info_memory_l3mpki"
+        "BriefDescription": "",
+        "MetricExpr": "64 * LONGEST_LAT_CACHE.MISS / 1e9 / duration_time",
+        "MetricGroup": "Mem;MemoryBW",
+        "MetricName": "tma_info_memory_l3_cache_fill_bw"
     },
     {
-        "BriefDescription": "Actual Average Latency for L1 data-cache miss demand load operations (in core cycles)",
-        "MetricConstraint": "NO_GROUP_EVENTS",
-        "MetricExpr": "L1D_PEND_MISS.PENDING / (MEM_LOAD_UOPS_RETIRED.L1_MISS + MEM_LOAD_UOPS_RETIRED.HIT_LFB)",
-        "MetricGroup": "Mem;MemoryBound;MemoryLat",
-        "MetricName": "tma_info_memory_load_miss_real_latency"
+        "BriefDescription": "Average per-core data fill bandwidth to the L3 cache [GB / sec]",
+        "MetricExpr": "64 * LONGEST_LAT_CACHE.MISS / 1e9 / (duration_time * 1e3 / 1e3)",
+        "MetricGroup": "Mem;MemoryBW;TopdownL1;tma_L1_group",
+        "MetricName": "tma_info_memory_l3_cache_fill_bw_2t",
+        "MetricgroupNoGroup": "TopdownL1"
     },
     {
-        "BriefDescription": "Memory-Level-Parallelism (average number of L1 miss demand load when there is at least one such miss",
-        "MetricConstraint": "NO_GROUP_EVENTS",
-        "MetricExpr": "L1D_PEND_MISS.PENDING / L1D_PEND_MISS.PENDING_CYCLES",
-        "MetricGroup": "Mem;MemoryBW;MemoryBound",
-        "MetricName": "tma_info_memory_mlp",
-        "PublicDescription": "Memory-Level-Parallelism (average number of L1 miss demand load when there is at least one such miss. Per-Logical Processor)"
+        "BriefDescription": "L3 cache true misses per kilo instruction for retired demand loads",
+        "MetricExpr": "1e3 * MEM_LOAD_UOPS_RETIRED.L3_MISS / INST_RETIRED.ANY",
+        "MetricGroup": "Mem",
+        "MetricName": "tma_info_memory_l3mpki"
     },
     {
         "BriefDescription": "Average Parallel L2 cache miss data reads",
         "MetricExpr": "OFFCORE_REQUESTS_OUTSTANDING.ALL_DATA_RD / OFFCORE_REQUESTS_OUTSTANDING.CYCLES_WITH_DATA_RD",
         "MetricGroup": "Memory_BW;Offcore",
-        "MetricName": "tma_info_memory_oro_data_l2_mlp"
+        "MetricName": "tma_info_memory_latency_data_l2_mlp"
     },
     {
         "BriefDescription": "Average Latency for L2 cache miss demand Loads",
-        "MetricExpr": "OFFCORE_REQUESTS_OUTSTANDING.DEMAND_DATA_RD / OFFCORE_REQUESTS.DEMAND_DATA_RD",
+        "MetricExpr": "tma_info_memory_load_l2_miss_latency",
         "MetricGroup": "Memory_Lat;Offcore",
-        "MetricName": "tma_info_memory_oro_load_l2_miss_latency"
+        "MetricName": "tma_info_memory_latency_load_l2_miss_latency"
     },
     {
         "BriefDescription": "Average Parallel L2 cache miss demand Loads",
-        "MetricExpr": "OFFCORE_REQUESTS_OUTSTANDING.DEMAND_DATA_RD / OFFCORE_REQUESTS_OUTSTANDING.CYCLES_WITH_DEMAND_DATA_RD",
+        "MetricExpr": "tma_info_memory_load_l2_mlp",
         "MetricGroup": "Memory_BW;Offcore",
-        "MetricName": "tma_info_memory_oro_load_l2_mlp"
+        "MetricName": "tma_info_memory_latency_load_l2_mlp"
     },
     {
-        "BriefDescription": "Average per-thread data fill bandwidth to the L1 data cache [GB / sec]",
-        "MetricExpr": "tma_info_memory_core_l1d_cache_fill_bw",
-        "MetricGroup": "Mem;MemoryBW",
-        "MetricName": "tma_info_memory_thread_l1d_cache_fill_bw_1t"
+        "BriefDescription": "Average Latency for L2 cache miss demand Loads",
+        "MetricExpr": "OFFCORE_REQUESTS_OUTSTANDING.DEMAND_DATA_RD / OFFCORE_REQUESTS.DEMAND_DATA_RD",
+        "MetricGroup": "Memory_Lat;Offcore;TopdownL1;tma_L1_group",
+        "MetricName": "tma_info_memory_load_l2_miss_latency",
+        "MetricgroupNoGroup": "TopdownL1"
     },
     {
-        "BriefDescription": "Average per-thread data fill bandwidth to the L2 cache [GB / sec]",
-        "MetricExpr": "tma_info_memory_core_l2_cache_fill_bw",
-        "MetricGroup": "Mem;MemoryBW",
-        "MetricName": "tma_info_memory_thread_l2_cache_fill_bw_1t"
+        "BriefDescription": "Average Parallel L2 cache miss demand Loads",
+        "MetricExpr": "OFFCORE_REQUESTS_OUTSTANDING.DEMAND_DATA_RD / OFFCORE_REQUESTS_OUTSTANDING.CYCLES_WITH_DEMAND_DATA_RD",
+        "MetricGroup": "Memory_BW;Offcore;TopdownL1;tma_L1_group",
+        "MetricName": "tma_info_memory_load_l2_mlp",
+        "MetricgroupNoGroup": "TopdownL1"
     },
     {
-        "BriefDescription": "Average per-thread data access bandwidth to the L3 cache [GB / sec]",
-        "MetricExpr": "0",
-        "MetricGroup": "Mem;MemoryBW;Offcore",
-        "MetricName": "tma_info_memory_thread_l3_cache_access_bw_1t"
+        "BriefDescription": "Actual Average Latency for L1 data-cache miss demand load operations (in core cycles)",
+        "MetricConstraint": "NO_GROUP_EVENTS",
+        "MetricExpr": "L1D_PEND_MISS.PENDING / (MEM_LOAD_UOPS_RETIRED.L1_MISS + MEM_LOAD_UOPS_RETIRED.HIT_LFB)",
+        "MetricGroup": "Mem;MemoryBound;MemoryLat",
+        "MetricName": "tma_info_memory_load_miss_real_latency"
     },
     {
-        "BriefDescription": "Average per-thread data fill bandwidth to the L3 cache [GB / sec]",
-        "MetricExpr": "tma_info_memory_core_l3_cache_fill_bw",
-        "MetricGroup": "Mem;MemoryBW",
-        "MetricName": "tma_info_memory_thread_l3_cache_fill_bw_1t"
+        "BriefDescription": "Memory-Level-Parallelism (average number of L1 miss demand load when there is at least one such miss",
+        "MetricConstraint": "NO_GROUP_EVENTS",
+        "MetricExpr": "L1D_PEND_MISS.PENDING / L1D_PEND_MISS.PENDING_CYCLES",
+        "MetricGroup": "Mem;MemoryBW;MemoryBound",
+        "MetricName": "tma_info_memory_mlp",
+        "PublicDescription": "Memory-Level-Parallelism (average number of L1 miss demand load when there is at least one such miss. Per-Logical Processor)"
+    },
+    {
+        "BriefDescription": "Utilization of the core's Page Walker(s) serving STLB misses triggered by instruction/Load/Store accesses",
+        "MetricExpr": "tma_info_memory_tlb_page_walks_utilization",
+        "MetricGroup": "Mem;MemoryTLB;TopdownL1;tma_L1_group",
+        "MetricName": "tma_info_memory_page_walks_utilization",
+        "MetricgroupNoGroup": "TopdownL1"
     },
     {
         "BriefDescription": "Utilization of the core's Page Walker(s) serving STLB misses triggered by instruction/Load/Store accesses",
         "MetricName": "tma_info_pipeline_retire"
     },
     {
-        "BriefDescription": "Measured Average Frequency for unhalted processors [GHz]",
+        "BriefDescription": "Measured Average Core Frequency for unhalted processors [GHz]",
         "MetricExpr": "tma_info_system_turbo_utilization * TSC / 1e9 / duration_time",
         "MetricGroup": "Power;Summary",
-        "MetricName": "tma_info_system_average_frequency"
+        "MetricName": "tma_info_system_core_frequency"
     },
     {
-        "BriefDescription": "Average CPU Utilization",
+        "BriefDescription": "Average CPU Utilization (percentage)",
         "MetricExpr": "CPU_CLK_UNHALTED.REF_TSC / TSC",
         "MetricGroup": "HPC;Summary",
         "MetricName": "tma_info_system_cpu_utilization"
     },
+    {
+        "BriefDescription": "Average number of utilized CPUs",
+        "MetricExpr": "#num_cpus_online * tma_info_system_cpu_utilization",
+        "MetricGroup": "Summary",
+        "MetricName": "tma_info_system_cpus_utilized"
+    },
     {
         "BriefDescription": "Average external Memory Bandwidth Use for reads and writes [GB / sec]",
         "MetricExpr": "64 * (UNC_M_CAS_COUNT.RD + UNC_M_CAS_COUNT.WR) / 1e9 / duration_time",
-        "MetricGroup": "HPC;Mem;MemoryBW;SoC;tma_issueBW",
+        "MetricGroup": "HPC;MemOffcore;MemoryBW;SoC;tma_issueBW",
         "MetricName": "tma_info_system_dram_bw_use",
         "PublicDescription": "Average external Memory Bandwidth Use for reads and writes [GB / sec]. Related metrics: tma_fb_full, tma_mem_bandwidth, tma_sq_full"
     },
         "MetricGroup": "Power",
         "MetricName": "tma_info_system_turbo_utilization"
     },
+    {
+        "BriefDescription": "Measured Average Uncore Frequency for the SoC [GHz]",
+        "MetricExpr": "tma_info_system_socket_clks / 1e9 / duration_time",
+        "MetricGroup": "SoC",
+        "MetricName": "tma_info_system_uncore_frequency"
+    },
     {
         "BriefDescription": "Per-Logical Processor actual clocks when the Logical Processor is active.",
         "MetricExpr": "CPU_CLK_UNHALTED.THREAD",
     {
         "BriefDescription": "This metric represents fraction of cycles the CPU was stalled due to Instruction TLB (ITLB) misses",
         "MetricExpr": "(14 * ITLB_MISSES.STLB_HIT + ITLB_MISSES.WALK_DURATION) / tma_info_thread_clks",
-        "MetricGroup": "BigFoot;FetchLat;MemoryTLB;TopdownL3;tma_L3_group;tma_fetch_latency_group",
+        "MetricGroup": "BigFootprint;FetchLat;MemoryTLB;TopdownL3;tma_L3_group;tma_fetch_latency_group",
         "MetricName": "tma_itlb_misses",
         "MetricThreshold": "tma_itlb_misses > 0.05 & (tma_fetch_latency > 0.1 & tma_frontend_bound > 0.15)",
         "PublicDescription": "This metric represents fraction of cycles the CPU was stalled due to Instruction TLB (ITLB) misses. Sample with: ITLB_MISSES.WALK_COMPLETED",
     {
         "BriefDescription": "This metric estimates how often the CPU was stalled without loads missing the L1 data cache",
         "MetricExpr": "max((min(CPU_CLK_UNHALTED.THREAD, CYCLE_ACTIVITY.STALLS_LDM_PENDING) - CYCLE_ACTIVITY.STALLS_L1D_PENDING) / tma_info_thread_clks, 0)",
-        "MetricGroup": "CacheMisses;MemoryBound;TmaL3mem;TopdownL3;tma_L3_group;tma_issueL1;tma_issueMC;tma_memory_bound_group",
+        "MetricGroup": "CacheHits;MemoryBound;TmaL3mem;TopdownL3;tma_L3_group;tma_issueL1;tma_issueMC;tma_memory_bound_group",
         "MetricName": "tma_l1_bound",
         "MetricThreshold": "tma_l1_bound > 0.1 & (tma_memory_bound > 0.2 & tma_backend_bound > 0.2)",
         "PublicDescription": "This metric estimates how often the CPU was stalled without loads missing the L1 data cache.  The L1 data cache typically has the shortest latency.  However; in certain cases like loads blocked on older stores; a load might suffer due to high latency even though it is being satisfied by the L1. Another example is loads who miss in the TLB. These cases are characterized by execution unit stalls; while some non-completed demand load lives in the machine without having that demand load missing the L1 cache. Sample with: MEM_LOAD_UOPS_RETIRED.L1_HIT_PS;MEM_LOAD_UOPS_RETIRED.HIT_LFB_PS. Related metrics: tma_clears_resteers, tma_machine_clears, tma_microcode_sequencer, tma_ms_switches, tma_ports_utilized_1",
     {
         "BriefDescription": "This metric estimates how often the CPU was stalled due to L2 cache accesses by loads",
         "MetricExpr": "(CYCLE_ACTIVITY.STALLS_L1D_PENDING - CYCLE_ACTIVITY.STALLS_L2_PENDING) / tma_info_thread_clks",
-        "MetricGroup": "CacheMisses;MemoryBound;TmaL3mem;TopdownL3;tma_L3_group;tma_memory_bound_group",
+        "MetricGroup": "CacheHits;MemoryBound;TmaL3mem;TopdownL3;tma_L3_group;tma_memory_bound_group",
         "MetricName": "tma_l2_bound",
         "MetricThreshold": "tma_l2_bound > 0.05 & (tma_memory_bound > 0.2 & tma_backend_bound > 0.2)",
         "PublicDescription": "This metric estimates how often the CPU was stalled due to L2 cache accesses by loads.  Avoiding cache misses (i.e. L1 misses/L2 hits) can improve the latency and increase performance. Sample with: MEM_LOAD_UOPS_RETIRED.L2_HIT_PS",
         "BriefDescription": "This metric estimates how often the CPU was stalled due to loads accesses to L3 cache or contended with a sibling Core",
         "MetricConstraint": "NO_GROUP_EVENTS_SMT",
         "MetricExpr": "MEM_LOAD_UOPS_RETIRED.L3_HIT / (MEM_LOAD_UOPS_RETIRED.L3_HIT + 7 * MEM_LOAD_UOPS_RETIRED.L3_MISS) * CYCLE_ACTIVITY.STALLS_L2_PENDING / tma_info_thread_clks",
-        "MetricGroup": "CacheMisses;MemoryBound;TmaL3mem;TopdownL3;tma_L3_group;tma_memory_bound_group",
+        "MetricGroup": "CacheHits;MemoryBound;TmaL3mem;TopdownL3;tma_L3_group;tma_memory_bound_group",
         "MetricName": "tma_l3_bound",
         "MetricThreshold": "tma_l3_bound > 0.05 & (tma_memory_bound > 0.2 & tma_backend_bound > 0.2)",
         "PublicDescription": "This metric estimates how often the CPU was stalled due to loads accesses to L3 cache or contended with a sibling Core.  Avoiding cache misses (i.e. L2 misses/L3 hits) can improve the latency and increase performance. Sample with: MEM_LOAD_UOPS_RETIRED.L3_HIT_PS",
         "ScaleUnit": "100%"
     },
     {
-        "BriefDescription": "This metric represents fraction of cycles with demand load accesses that hit the L3 cache under unloaded scenarios (possibly L3 latency limited)",
+        "BriefDescription": "This metric estimates fraction of cycles with demand load accesses that hit the L3 cache under unloaded scenarios (possibly L3 latency limited)",
         "MetricConstraint": "NO_GROUP_EVENTS",
         "MetricExpr": "41 * (MEM_LOAD_UOPS_RETIRED.L3_HIT * (1 + MEM_LOAD_UOPS_RETIRED.HIT_LFB / (MEM_LOAD_UOPS_RETIRED.L2_HIT + MEM_LOAD_UOPS_RETIRED.L3_HIT + MEM_LOAD_UOPS_L3_HIT_RETIRED.XSNP_HIT + MEM_LOAD_UOPS_L3_HIT_RETIRED.XSNP_HITM + MEM_LOAD_UOPS_L3_HIT_RETIRED.XSNP_MISS + MEM_LOAD_UOPS_L3_MISS_RETIRED.LOCAL_DRAM + MEM_LOAD_UOPS_L3_MISS_RETIRED.REMOTE_DRAM + MEM_LOAD_UOPS_L3_MISS_RETIRED.REMOTE_HITM + MEM_LOAD_UOPS_L3_MISS_RETIRED.REMOTE_FWD))) / tma_info_thread_clks",
         "MetricGroup": "MemoryLat;TopdownL4;tma_L4_group;tma_issueLat;tma_l3_bound_group",
         "MetricName": "tma_l3_hit_latency",
         "MetricThreshold": "tma_l3_hit_latency > 0.1 & (tma_l3_bound > 0.05 & (tma_memory_bound > 0.2 & tma_backend_bound > 0.2))",
-        "PublicDescription": "This metric represents fraction of cycles with demand load accesses that hit the L3 cache under unloaded scenarios (possibly L3 latency limited).  Avoiding private cache misses (i.e. L2 misses/L3 hits) will improve the latency; reduce contention with sibling physical cores and increase performance.  Note the value of this node may overlap with its siblings. Sample with: MEM_LOAD_UOPS_RETIRED.L3_HIT_PS. Related metrics: tma_mem_latency",
+        "PublicDescription": "This metric estimates fraction of cycles with demand load accesses that hit the L3 cache under unloaded scenarios (possibly L3 latency limited).  Avoiding private cache misses (i.e. L2 misses/L3 hits) will improve the latency; reduce contention with sibling physical cores and increase performance.  Note the value of this node may overlap with its siblings. Sample with: MEM_LOAD_UOPS_RETIRED.L3_HIT_PS. Related metrics: tma_mem_latency",
         "ScaleUnit": "100%"
     },
     {
         "MetricName": "tma_light_operations",
         "MetricThreshold": "tma_light_operations > 0.6",
         "MetricgroupNoGroup": "TopdownL2",
-        "PublicDescription": "This metric represents fraction of slots where the CPU was retiring light-weight operations -- instructions that require no more than one uop (micro-operation). This correlates with total number of instructions used by the program. A uops-per-instruction (see UopPI metric) ratio of 1 or less should be expected for decently optimized software running on Intel Core/Xeon products. While this often indicates efficient X86 instructions were executed; high value does not necessarily mean better performance cannot be achieved. Sample with: INST_RETIRED.PREC_DIST",
+        "PublicDescription": "This metric represents fraction of slots where the CPU was retiring light-weight operations -- instructions that require no more than one uop (micro-operation). This correlates with total number of instructions used by the program. A uops-per-instruction (see UopPI metric) ratio of 1 or less should be expected for decently optimized code running on Intel Core/Xeon products. While this often indicates efficient X86 instructions were executed; high value does not necessarily mean better performance cannot be achieved. ([ICL+] Note this may undercount due to approximation using indirect events; [ADL+] .). Sample with: INST_RETIRED.PREC_DIST",
         "ScaleUnit": "100%"
     },
     {
     },
     {
         "BriefDescription": "This metric estimates fraction of cycles while the memory subsystem was handling loads from local memory",
-        "MetricConstraint": "NO_GROUP_EVENTS",
         "MetricExpr": "200 * (MEM_LOAD_UOPS_L3_MISS_RETIRED.LOCAL_DRAM * (1 + MEM_LOAD_UOPS_RETIRED.HIT_LFB / (MEM_LOAD_UOPS_RETIRED.L2_HIT + MEM_LOAD_UOPS_RETIRED.L3_HIT + MEM_LOAD_UOPS_L3_HIT_RETIRED.XSNP_HIT + MEM_LOAD_UOPS_L3_HIT_RETIRED.XSNP_HITM + MEM_LOAD_UOPS_L3_HIT_RETIRED.XSNP_MISS + MEM_LOAD_UOPS_L3_MISS_RETIRED.LOCAL_DRAM + MEM_LOAD_UOPS_L3_MISS_RETIRED.REMOTE_DRAM + MEM_LOAD_UOPS_L3_MISS_RETIRED.REMOTE_HITM + MEM_LOAD_UOPS_L3_MISS_RETIRED.REMOTE_FWD))) / tma_info_thread_clks",
         "MetricGroup": "Server;TopdownL5;tma_L5_group;tma_mem_latency_group",
-        "MetricName": "tma_local_dram",
-        "MetricThreshold": "tma_local_dram > 0.1 & (tma_mem_latency > 0.1 & (tma_dram_bound > 0.1 & (tma_memory_bound > 0.2 & tma_backend_bound > 0.2)))",
+        "MetricName": "tma_local_mem",
+        "MetricThreshold": "tma_local_mem > 0.1 & (tma_mem_latency > 0.1 & (tma_dram_bound > 0.1 & (tma_memory_bound > 0.2 & tma_backend_bound > 0.2)))",
         "PublicDescription": "This metric estimates fraction of cycles while the memory subsystem was handling loads from local memory. Caching will improve the latency and increase performance. Sample with: MEM_LOAD_UOPS_L3_MISS_RETIRED.LOCAL_DRAM_PS",
         "ScaleUnit": "100%"
     },
         "ScaleUnit": "100%"
     },
     {
-        "BriefDescription": "This metric estimates fraction of cycles where the core's performance was likely hurt due to approaching bandwidth limits of external memory (DRAM)",
+        "BriefDescription": "This metric estimates fraction of cycles where the core's performance was likely hurt due to approaching bandwidth limits of external memory - DRAM ([SPR-HBM] and/or HBM)",
         "MetricExpr": "min(CPU_CLK_UNHALTED.THREAD, cpu@OFFCORE_REQUESTS_OUTSTANDING.ALL_DATA_RD\\,cmask\\=6@) / tma_info_thread_clks",
         "MetricGroup": "MemoryBW;Offcore;TopdownL4;tma_L4_group;tma_dram_bound_group;tma_issueBW",
         "MetricName": "tma_mem_bandwidth",
         "MetricThreshold": "tma_mem_bandwidth > 0.2 & (tma_dram_bound > 0.1 & (tma_memory_bound > 0.2 & tma_backend_bound > 0.2))",
-        "PublicDescription": "This metric estimates fraction of cycles where the core's performance was likely hurt due to approaching bandwidth limits of external memory (DRAM).  The underlying heuristic assumes that a similar off-core traffic is generated by all IA cores. This metric does not aggregate non-data-read requests by this logical processor; requests from other IA Logical Processors/Physical Cores/sockets; or other non-IA devices like GPU; hence the maximum external memory bandwidth limits may or may not be approached when this metric is flagged (see Uncore counters for that). Related metrics: tma_fb_full, tma_info_system_dram_bw_use, tma_sq_full",
+        "PublicDescription": "This metric estimates fraction of cycles where the core's performance was likely hurt due to approaching bandwidth limits of external memory - DRAM ([SPR-HBM] and/or HBM).  The underlying heuristic assumes that a similar off-core traffic is generated by all IA cores. This metric does not aggregate non-data-read requests by this logical processor; requests from other IA Logical Processors/Physical Cores/sockets; or other non-IA devices like GPU; hence the maximum external memory bandwidth limits may or may not be approached when this metric is flagged (see Uncore counters for that). Related metrics: tma_fb_full, tma_info_system_dram_bw_use, tma_sq_full",
         "ScaleUnit": "100%"
     },
     {
-        "BriefDescription": "This metric estimates fraction of cycles where the performance was likely hurt due to latency from external memory (DRAM)",
+        "BriefDescription": "This metric estimates fraction of cycles where the performance was likely hurt due to latency from external memory - DRAM ([SPR-HBM] and/or HBM)",
         "MetricExpr": "min(CPU_CLK_UNHALTED.THREAD, OFFCORE_REQUESTS_OUTSTANDING.CYCLES_WITH_DATA_RD) / tma_info_thread_clks - tma_mem_bandwidth",
         "MetricGroup": "MemoryLat;Offcore;TopdownL4;tma_L4_group;tma_dram_bound_group;tma_issueLat",
         "MetricName": "tma_mem_latency",
         "MetricThreshold": "tma_mem_latency > 0.1 & (tma_dram_bound > 0.1 & (tma_memory_bound > 0.2 & tma_backend_bound > 0.2))",
-        "PublicDescription": "This metric estimates fraction of cycles where the performance was likely hurt due to latency from external memory (DRAM).  This metric does not aggregate requests from other Logical Processors/Physical Cores/sockets (see Uncore counters for that). Related metrics: tma_l3_hit_latency",
+        "PublicDescription": "This metric estimates fraction of cycles where the performance was likely hurt due to latency from external memory - DRAM ([SPR-HBM] and/or HBM).  This metric does not aggregate requests from other Logical Processors/Physical Cores/sockets (see Uncore counters for that). Related metrics: tma_l3_hit_latency",
         "ScaleUnit": "100%"
     },
     {
         "MetricExpr": "(IDQ.ALL_MITE_CYCLES_ANY_UOPS - IDQ.ALL_MITE_CYCLES_4_UOPS) / tma_info_core_core_clks / 2",
         "MetricGroup": "DSBmiss;FetchBW;TopdownL3;tma_L3_group;tma_fetch_bandwidth_group",
         "MetricName": "tma_mite",
-        "MetricThreshold": "tma_mite > 0.1 & (tma_fetch_bandwidth > 0.1 & tma_frontend_bound > 0.15 & tma_info_thread_ipc / 4 > 0.35)",
+        "MetricThreshold": "tma_mite > 0.1 & tma_fetch_bandwidth > 0.2",
         "PublicDescription": "This metric represents Core fraction of cycles in which CPU was likely limited due to the MITE pipeline (the legacy decode pipeline). This pipeline is used for code that was not pre-cached in the DSB or LSD. For example; inefficiencies due to asymmetric decoders; use of long immediate or LCP can manifest as MITE fetch bandwidth bottleneck.",
         "ScaleUnit": "100%"
     },
         "MetricGroup": "Compute;TopdownL6;tma_L6_group;tma_alu_op_utilization_group;tma_issue2P",
         "MetricName": "tma_port_0",
         "MetricThreshold": "tma_port_0 > 0.6",
-        "PublicDescription": "This metric represents Core fraction of cycles CPU dispatched uops on execution port 0 ([SNB+] ALU; [HSW+] ALU and 2nd branch). Sample with: UOPS_DISPATCHED_PORT.PORT_0. Related metrics: tma_fp_scalar, tma_fp_vector, tma_fp_vector_512b, tma_port_1, tma_port_5, tma_port_6, tma_ports_utilized_2",
+        "PublicDescription": "This metric represents Core fraction of cycles CPU dispatched uops on execution port 0 ([SNB+] ALU; [HSW+] ALU and 2nd branch). Sample with: UOPS_DISPATCHED_PORT.PORT_0. Related metrics: tma_fp_scalar, tma_fp_vector, tma_fp_vector_128b, tma_fp_vector_256b, tma_fp_vector_512b, tma_port_1, tma_port_5, tma_port_6, tma_ports_utilized_2",
         "ScaleUnit": "100%"
     },
     {
         "MetricGroup": "TopdownL6;tma_L6_group;tma_alu_op_utilization_group;tma_issue2P",
         "MetricName": "tma_port_1",
         "MetricThreshold": "tma_port_1 > 0.6",
-        "PublicDescription": "This metric represents Core fraction of cycles CPU dispatched uops on execution port 1 (ALU). Sample with: UOPS_DISPATCHED_PORT.PORT_1. Related metrics: tma_fp_scalar, tma_fp_vector, tma_fp_vector_512b, tma_port_0, tma_port_5, tma_port_6, tma_ports_utilized_2",
+        "PublicDescription": "This metric represents Core fraction of cycles CPU dispatched uops on execution port 1 (ALU). Sample with: UOPS_DISPATCHED_PORT.PORT_1. Related metrics: tma_fp_scalar, tma_fp_vector, tma_fp_vector_128b, tma_fp_vector_256b, tma_fp_vector_512b, tma_port_0, tma_port_5, tma_port_6, tma_ports_utilized_2",
         "ScaleUnit": "100%"
     },
     {
         "MetricGroup": "TopdownL6;tma_L6_group;tma_alu_op_utilization_group;tma_issue2P",
         "MetricName": "tma_port_5",
         "MetricThreshold": "tma_port_5 > 0.6",
-        "PublicDescription": "This metric represents Core fraction of cycles CPU dispatched uops on execution port 5 ([SNB+] Branches and ALU; [HSW+] ALU). Sample with: UOPS_DISPATCHED.PORT_5. Related metrics: tma_fp_scalar, tma_fp_vector, tma_fp_vector_512b, tma_port_0, tma_port_1, tma_port_6, tma_ports_utilized_2",
+        "PublicDescription": "This metric represents Core fraction of cycles CPU dispatched uops on execution port 5 ([SNB+] Branches and ALU; [HSW+] ALU). Sample with: UOPS_DISPATCHED.PORT_5. Related metrics: tma_fp_scalar, tma_fp_vector, tma_fp_vector_128b, tma_fp_vector_256b, tma_fp_vector_512b, tma_port_0, tma_port_1, tma_port_6, tma_ports_utilized_2",
         "ScaleUnit": "100%"
     },
     {
-        "BriefDescription": "This metric represents Core fraction of cycles CPU dispatched uops on execution port 6 ([HSW+]Primary Branch and simple ALU)",
+        "BriefDescription": "This metric represents Core fraction of cycles CPU dispatched uops on execution port 6 ([HSW+] Primary Branch and simple ALU)",
         "MetricExpr": "UOPS_DISPATCHED_PORT.PORT_6 / tma_info_core_core_clks",
         "MetricGroup": "TopdownL6;tma_L6_group;tma_alu_op_utilization_group;tma_issue2P",
         "MetricName": "tma_port_6",
         "MetricThreshold": "tma_port_6 > 0.6",
-        "PublicDescription": "This metric represents Core fraction of cycles CPU dispatched uops on execution port 6 ([HSW+]Primary Branch and simple ALU). Sample with: UOPS_DISPATCHED_PORT.PORT_6. Related metrics: tma_fp_scalar, tma_fp_vector, tma_fp_vector_512b, tma_port_0, tma_port_1, tma_port_5, tma_ports_utilized_2",
+        "PublicDescription": "This metric represents Core fraction of cycles CPU dispatched uops on execution port 6 ([HSW+] Primary Branch and simple ALU). Sample with: UOPS_DISPATCHED_PORT.PORT_6. Related metrics: tma_fp_scalar, tma_fp_vector, tma_fp_vector_128b, tma_fp_vector_256b, tma_fp_vector_512b, tma_port_0, tma_port_1, tma_port_5, tma_ports_utilized_2",
         "ScaleUnit": "100%"
     },
     {
         "MetricGroup": "PortsUtil;TopdownL4;tma_L4_group;tma_issue2P;tma_ports_utilization_group",
         "MetricName": "tma_ports_utilized_2",
         "MetricThreshold": "tma_ports_utilized_2 > 0.15 & (tma_ports_utilization > 0.15 & (tma_core_bound > 0.1 & tma_backend_bound > 0.2))",
-        "PublicDescription": "This metric represents fraction of cycles CPU executed total of 2 uops per cycle on all execution ports (Logical Processor cycles since ICL, Physical Core cycles otherwise).  Loop Vectorization -most compilers feature auto-Vectorization options today- reduces pressure on the execution ports as multiple elements are calculated with same uop. Related metrics: tma_fp_scalar, tma_fp_vector, tma_fp_vector_512b, tma_port_0, tma_port_1, tma_port_5, tma_port_6",
+        "PublicDescription": "This metric represents fraction of cycles CPU executed total of 2 uops per cycle on all execution ports (Logical Processor cycles since ICL, Physical Core cycles otherwise).  Loop Vectorization -most compilers feature auto-Vectorization options today- reduces pressure on the execution ports as multiple elements are calculated with same uop. Related metrics: tma_fp_scalar, tma_fp_vector, tma_fp_vector_128b, tma_fp_vector_256b, tma_fp_vector_512b, tma_port_0, tma_port_1, tma_port_5, tma_port_6",
         "ScaleUnit": "100%"
     },
     {
         "MetricExpr": "(cpu@UOPS_EXECUTED.CORE\\,cmask\\=3@ / 2 if #SMT_on else cpu@UOPS_EXECUTED.CORE\\,cmask\\=3@) / tma_info_core_core_clks",
         "MetricGroup": "PortsUtil;TopdownL4;tma_L4_group;tma_ports_utilization_group",
         "MetricName": "tma_ports_utilized_3m",
-        "MetricThreshold": "tma_ports_utilized_3m > 0.7 & (tma_ports_utilization > 0.15 & (tma_core_bound > 0.1 & tma_backend_bound > 0.2))",
+        "MetricThreshold": "tma_ports_utilized_3m > 0.4 & (tma_ports_utilization > 0.15 & (tma_core_bound > 0.1 & tma_backend_bound > 0.2))",
         "ScaleUnit": "100%"
     },
     {
     },
     {
         "BriefDescription": "This metric estimates fraction of cycles while the memory subsystem was handling loads from remote memory",
-        "MetricConstraint": "NO_GROUP_EVENTS",
         "MetricExpr": "310 * (MEM_LOAD_UOPS_L3_MISS_RETIRED.REMOTE_DRAM * (1 + MEM_LOAD_UOPS_RETIRED.HIT_LFB / (MEM_LOAD_UOPS_RETIRED.L2_HIT + MEM_LOAD_UOPS_RETIRED.L3_HIT + MEM_LOAD_UOPS_L3_HIT_RETIRED.XSNP_HIT + MEM_LOAD_UOPS_L3_HIT_RETIRED.XSNP_HITM + MEM_LOAD_UOPS_L3_HIT_RETIRED.XSNP_MISS + MEM_LOAD_UOPS_L3_MISS_RETIRED.LOCAL_DRAM + MEM_LOAD_UOPS_L3_MISS_RETIRED.REMOTE_DRAM + MEM_LOAD_UOPS_L3_MISS_RETIRED.REMOTE_HITM + MEM_LOAD_UOPS_L3_MISS_RETIRED.REMOTE_FWD))) / tma_info_thread_clks",
         "MetricGroup": "Server;Snoop;TopdownL5;tma_L5_group;tma_mem_latency_group",
-        "MetricName": "tma_remote_dram",
-        "MetricThreshold": "tma_remote_dram > 0.1 & (tma_mem_latency > 0.1 & (tma_dram_bound > 0.1 & (tma_memory_bound > 0.2 & tma_backend_bound > 0.2)))",
+        "MetricName": "tma_remote_mem",
+        "MetricThreshold": "tma_remote_mem > 0.1 & (tma_mem_latency > 0.1 & (tma_dram_bound > 0.1 & (tma_memory_bound > 0.2 & tma_backend_bound > 0.2)))",
         "PublicDescription": "This metric estimates fraction of cycles while the memory subsystem was handling loads from remote memory. This is caused often due to non-optimal NUMA allocations. #link to NUMA article. Sample with: MEM_LOAD_UOPS_L3_MISS_RETIRED.REMOTE_DRAM_PS",
         "ScaleUnit": "100%"
     },
         "MetricThreshold": "tma_store_op_utilization > 0.6",
         "ScaleUnit": "100%"
     },
-    {
-        "BriefDescription": "This metric serves as an approximation of legacy x87 usage",
-        "MetricExpr": "INST_RETIRED.X87 * tma_info_thread_uoppi / UOPS_RETIRED.RETIRE_SLOTS",
-        "MetricGroup": "Compute;TopdownL4;tma_L4_group;tma_fp_arith_group",
-        "MetricName": "tma_x87_use",
-        "MetricThreshold": "tma_x87_use > 0.1",
-        "PublicDescription": "This metric serves as an approximation of legacy x87 usage. It accounts for instructions beyond X87 FP arithmetic operations; hence may be used as a thermometer to avoid X87 high usage and preferably upgrade to modern ISA. See Tip under Tuning Hint.",
-        "ScaleUnit": "100%"
-    },
     {
         "BriefDescription": "Uncore operating frequency in GHz",
         "MetricExpr": "UNC_C_CLOCKTICKS / (#num_cores / #num_packages * #num_packages) / 1e9 / duration_time",
index f6a0258e3241236b97c48a6098503f40bd12e5dd..8c808347f6da4e8382af6a780730636fadcdb914 100644 (file)
@@ -2,10 +2,10 @@
     "Backend": "Grouping from Top-down Microarchitecture Analysis Metrics spreadsheet",
     "Bad": "Grouping from Top-down Microarchitecture Analysis Metrics spreadsheet",
     "BadSpec": "Grouping from Top-down Microarchitecture Analysis Metrics spreadsheet",
-    "BigFoot": "Grouping from Top-down Microarchitecture Analysis Metrics spreadsheet",
+    "BigFootprint": "Grouping from Top-down Microarchitecture Analysis Metrics spreadsheet",
     "BrMispredicts": "Grouping from Top-down Microarchitecture Analysis Metrics spreadsheet",
     "Branches": "Grouping from Top-down Microarchitecture Analysis Metrics spreadsheet",
-    "CacheMisses": "Grouping from Top-down Microarchitecture Analysis Metrics spreadsheet",
+    "CacheHits": "Grouping from Top-down Microarchitecture Analysis Metrics spreadsheet",
     "Compute": "Grouping from Top-down Microarchitecture Analysis Metrics spreadsheet",
     "Cor": "Grouping from Top-down Microarchitecture Analysis Metrics spreadsheet",
     "DSB": "Grouping from Top-down Microarchitecture Analysis Metrics spreadsheet",
@@ -24,7 +24,9 @@
     "L2Evicts": "Grouping from Top-down Microarchitecture Analysis Metrics spreadsheet",
     "LSD": "Grouping from Top-down Microarchitecture Analysis Metrics spreadsheet",
     "MachineClears": "Grouping from Top-down Microarchitecture Analysis Metrics spreadsheet",
+    "Machine_Clears": "Grouping from Top-down Microarchitecture Analysis Metrics spreadsheet",
     "Mem": "Grouping from Top-down Microarchitecture Analysis Metrics spreadsheet",
+    "MemOffcore": "Grouping from Top-down Microarchitecture Analysis Metrics spreadsheet",
     "MemoryBW": "Grouping from Top-down Microarchitecture Analysis Metrics spreadsheet",
     "MemoryBound": "Grouping from Top-down Microarchitecture Analysis Metrics spreadsheet",
     "MemoryLat": "Grouping from Top-down Microarchitecture Analysis Metrics spreadsheet",
@@ -94,6 +96,7 @@
     "tma_l3_bound_group": "Metrics contributing to tma_l3_bound category",
     "tma_light_operations_group": "Metrics contributing to tma_light_operations category",
     "tma_load_op_utilization_group": "Metrics contributing to tma_load_op_utilization category",
+    "tma_machine_clears_group": "Metrics contributing to tma_machine_clears category",
     "tma_mem_latency_group": "Metrics contributing to tma_mem_latency category",
     "tma_memory_bound_group": "Metrics contributing to tma_memory_bound category",
     "tma_microcode_sequencer_group": "Metrics contributing to tma_microcode_sequencer category",
index daebf1050acbfc07002177d82b396e4ce85ae955..c391325ee36b0c305d45024ee2d038068be8b88a 100644 (file)
         "BriefDescription": "Number of cores in C-State; C0 and C1",
         "EventCode": "0x80",
         "EventName": "UNC_P_POWER_STATE_OCCUPANCY.CORES_C0",
+        "Filter": "occ_sel=1",
         "PerPkg": "1",
         "PublicDescription": "This is an occupancy event that tracks the number of cores that are in the chosen C-State.  It can be used by itself to get the average number of cores in that C-state with thresholding to generate histograms, or with other PCU events and occupancy triggering to capture other details.",
         "Unit": "PCU"
         "BriefDescription": "Number of cores in C-State; C3",
         "EventCode": "0x80",
         "EventName": "UNC_P_POWER_STATE_OCCUPANCY.CORES_C3",
+        "Filter": "occ_sel=2",
         "PerPkg": "1",
         "PublicDescription": "This is an occupancy event that tracks the number of cores that are in the chosen C-State.  It can be used by itself to get the average number of cores in that C-state with thresholding to generate histograms, or with other PCU events and occupancy triggering to capture other details.",
         "Unit": "PCU"
         "BriefDescription": "Number of cores in C-State; C6 and C7",
         "EventCode": "0x80",
         "EventName": "UNC_P_POWER_STATE_OCCUPANCY.CORES_C6",
+        "Filter": "occ_sel=3",
         "PerPkg": "1",
         "PublicDescription": "This is an occupancy event that tracks the number of cores that are in the chosen C-State.  It can be used by itself to get the average number of cores in that C-state with thresholding to generate histograms, or with other PCU events and occupancy triggering to capture other details.",
         "Unit": "PCU"
index b43a6c6d8b7fec12c9eddf6a32586eb33f45c202..f67cc73779f8e5b0b5ecce531fe7102bb0e95fef 100644 (file)
         "MetricExpr": "(UOPS_DISPATCHED.PORT_0 + UOPS_DISPATCHED.PORT_1 + UOPS_DISPATCHED.PORT_5 + UOPS_DISPATCHED.PORT_6) / (4 * tma_info_core_core_clks)",
         "MetricGroup": "TopdownL5;tma_L5_group;tma_ports_utilized_3m_group",
         "MetricName": "tma_alu_op_utilization",
-        "MetricThreshold": "tma_alu_op_utilization > 0.6",
+        "MetricThreshold": "tma_alu_op_utilization > 0.4",
         "ScaleUnit": "100%"
     },
     {
         "BriefDescription": "This metric estimates fraction of slots the CPU retired uops delivered by the Microcode_Sequencer as a result of Assists",
-        "MetricExpr": "100 * ASSISTS.ANY / tma_info_thread_slots",
+        "MetricExpr": "34 * ASSISTS.ANY / tma_info_thread_slots",
         "MetricGroup": "TopdownL4;tma_L4_group;tma_microcode_sequencer_group",
         "MetricName": "tma_assists",
         "MetricThreshold": "tma_assists > 0.1 & (tma_microcode_sequencer > 0.05 & tma_heavy_operations > 0.1)",
     {
         "BriefDescription": "This category represents fraction of slots where no uops are being delivered due to a lack of required resources for accepting new uops in the Backend",
         "DefaultMetricgroupName": "TopdownL1",
-        "MetricExpr": "topdown\\-be\\-bound / (topdown\\-fe\\-bound + topdown\\-bad\\-spec + topdown\\-retiring + topdown\\-be\\-bound) + 5 * cpu@INT_MISC.RECOVERY_CYCLES\\,cmask\\=1\\,edge@ / tma_info_thread_slots",
+        "MetricExpr": "topdown\\-be\\-bound / (topdown\\-fe\\-bound + topdown\\-bad\\-spec + topdown\\-retiring + topdown\\-be\\-bound) + 5 * INT_MISC.CLEARS_COUNT / tma_info_thread_slots",
         "MetricGroup": "Default;TmaL1;TopdownL1;tma_L1_group",
         "MetricName": "tma_backend_bound",
         "MetricThreshold": "tma_backend_bound > 0.2",
     {
         "BriefDescription": "This metric represents fraction of slots where the CPU was retiring branch instructions.",
         "MetricExpr": "tma_light_operations * BR_INST_RETIRED.ALL_BRANCHES / (tma_retiring * tma_info_thread_slots)",
-        "MetricGroup": "Pipeline;TopdownL3;tma_L3_group;tma_light_operations_group",
+        "MetricGroup": "Branches;Pipeline;TopdownL3;tma_L3_group;tma_light_operations_group",
         "MetricName": "tma_branch_instructions",
         "MetricThreshold": "tma_branch_instructions > 0.1 & tma_light_operations > 0.6",
         "ScaleUnit": "100%"
     {
         "BriefDescription": "This metric estimates fraction of cycles while the memory subsystem was handling synchronizations due to contested accesses",
         "MetricConstraint": "NO_GROUP_EVENTS",
-        "MetricExpr": "(29 * tma_info_system_average_frequency * MEM_LOAD_L3_HIT_RETIRED.XSNP_HITM + 23.5 * tma_info_system_average_frequency * MEM_LOAD_L3_HIT_RETIRED.XSNP_MISS) * (1 + MEM_LOAD_RETIRED.FB_HIT / MEM_LOAD_RETIRED.L1_MISS / 2) / tma_info_thread_clks",
+        "MetricExpr": "(29 * tma_info_system_core_frequency * MEM_LOAD_L3_HIT_RETIRED.XSNP_HITM + 23.5 * tma_info_system_core_frequency * MEM_LOAD_L3_HIT_RETIRED.XSNP_MISS) * (1 + MEM_LOAD_RETIRED.FB_HIT / MEM_LOAD_RETIRED.L1_MISS / 2) / tma_info_thread_clks",
         "MetricGroup": "DataSharing;Offcore;Snoop;TopdownL4;tma_L4_group;tma_issueSyncxn;tma_l3_bound_group",
         "MetricName": "tma_contested_accesses",
         "MetricThreshold": "tma_contested_accesses > 0.05 & (tma_l3_bound > 0.05 & (tma_memory_bound > 0.2 & tma_backend_bound > 0.2))",
     {
         "BriefDescription": "This metric estimates fraction of cycles while the memory subsystem was handling synchronizations due to data-sharing accesses",
         "MetricConstraint": "NO_GROUP_EVENTS",
-        "MetricExpr": "23.5 * tma_info_system_average_frequency * MEM_LOAD_L3_HIT_RETIRED.XSNP_HIT * (1 + MEM_LOAD_RETIRED.FB_HIT / MEM_LOAD_RETIRED.L1_MISS / 2) / tma_info_thread_clks",
+        "MetricExpr": "23.5 * tma_info_system_core_frequency * MEM_LOAD_L3_HIT_RETIRED.XSNP_HIT * (1 + MEM_LOAD_RETIRED.FB_HIT / MEM_LOAD_RETIRED.L1_MISS / 2) / tma_info_thread_clks",
         "MetricGroup": "Offcore;Snoop;TopdownL4;tma_L4_group;tma_issueSyncxn;tma_l3_bound_group",
         "MetricName": "tma_data_sharing",
         "MetricThreshold": "tma_data_sharing > 0.05 & (tma_l3_bound > 0.05 & (tma_memory_bound > 0.2 & tma_backend_bound > 0.2))",
         "MetricExpr": "(cpu@INST_DECODED.DECODERS\\,cmask\\=1@ - cpu@INST_DECODED.DECODERS\\,cmask\\=2@) / tma_info_core_core_clks / 2",
         "MetricGroup": "DSBmiss;FetchBW;TopdownL4;tma_L4_group;tma_issueD0;tma_mite_group",
         "MetricName": "tma_decoder0_alone",
-        "MetricThreshold": "tma_decoder0_alone > 0.1 & (tma_mite > 0.1 & (tma_fetch_bandwidth > 0.1 & tma_frontend_bound > 0.15 & tma_info_thread_ipc / 5 > 0.35))",
+        "MetricThreshold": "tma_decoder0_alone > 0.1 & (tma_mite > 0.1 & tma_fetch_bandwidth > 0.2)",
         "PublicDescription": "This metric represents fraction of cycles where decoder-0 was the only active decoder. Related metrics: tma_few_uops_instructions",
         "ScaleUnit": "100%"
     },
         "MetricExpr": "(IDQ.DSB_CYCLES_ANY - IDQ.DSB_CYCLES_OK) / tma_info_core_core_clks / 2",
         "MetricGroup": "DSB;FetchBW;TopdownL3;tma_L3_group;tma_fetch_bandwidth_group",
         "MetricName": "tma_dsb",
-        "MetricThreshold": "tma_dsb > 0.15 & (tma_fetch_bandwidth > 0.1 & tma_frontend_bound > 0.15 & tma_info_thread_ipc / 5 > 0.35)",
+        "MetricThreshold": "tma_dsb > 0.15 & tma_fetch_bandwidth > 0.2",
         "PublicDescription": "This metric represents Core fraction of cycles in which CPU was likely limited due to DSB (decoded uop cache) fetch pipeline.  For example; inefficient utilization of the DSB cache structure or bank conflict when reading from it; are categorized here.",
         "ScaleUnit": "100%"
     },
         "MetricGroup": "MemoryTLB;TopdownL4;tma_L4_group;tma_issueTLB;tma_l1_bound_group",
         "MetricName": "tma_dtlb_load",
         "MetricThreshold": "tma_dtlb_load > 0.1 & (tma_l1_bound > 0.1 & (tma_memory_bound > 0.2 & tma_backend_bound > 0.2))",
-        "PublicDescription": "This metric roughly estimates the fraction of cycles where the Data TLB (DTLB) was missed by load accesses. TLBs (Translation Look-aside Buffers) are processor caches for recently used entries out of the Page Tables that are used to map virtual- to physical-addresses by the operating system. This metric approximates the potential delay of demand loads missing the first-level data TLB (assuming worst case scenario with back to back misses to different pages). This includes hitting in the second-level TLB (STLB) as well as performing a hardware page walk on an STLB miss. Sample with: MEM_INST_RETIRED.STLB_MISS_LOADS_PS. Related metrics: tma_dtlb_store, tma_info_bottleneck_memory_data_tlbs",
+        "PublicDescription": "This metric roughly estimates the fraction of cycles where the Data TLB (DTLB) was missed by load accesses. TLBs (Translation Look-aside Buffers) are processor caches for recently used entries out of the Page Tables that are used to map virtual- to physical-addresses by the operating system. This metric approximates the potential delay of demand loads missing the first-level data TLB (assuming worst case scenario with back to back misses to different pages). This includes hitting in the second-level TLB (STLB) as well as performing a hardware page walk on an STLB miss. Sample with: MEM_INST_RETIRED.STLB_MISS_LOADS_PS. Related metrics: tma_dtlb_store, tma_info_bottleneck_memory_data_tlbs, tma_info_bottleneck_memory_synchronization",
         "ScaleUnit": "100%"
     },
     {
         "MetricGroup": "MemoryTLB;TopdownL4;tma_L4_group;tma_issueTLB;tma_store_bound_group",
         "MetricName": "tma_dtlb_store",
         "MetricThreshold": "tma_dtlb_store > 0.05 & (tma_store_bound > 0.2 & (tma_memory_bound > 0.2 & tma_backend_bound > 0.2))",
-        "PublicDescription": "This metric roughly estimates the fraction of cycles spent handling first-level data TLB store misses.  As with ordinary data caching; focus on improving data locality and reducing working-set size to reduce DTLB overhead.  Additionally; consider using profile-guided optimization (PGO) to collocate frequently-used data on the same page.  Try using larger page sizes for large amounts of frequently-used data. Sample with: MEM_INST_RETIRED.STLB_MISS_STORES_PS. Related metrics: tma_dtlb_load, tma_info_bottleneck_memory_data_tlbs",
+        "PublicDescription": "This metric roughly estimates the fraction of cycles spent handling first-level data TLB store misses.  As with ordinary data caching; focus on improving data locality and reducing working-set size to reduce DTLB overhead.  Additionally; consider using profile-guided optimization (PGO) to collocate frequently-used data on the same page.  Try using larger page sizes for large amounts of frequently-used data. Sample with: MEM_INST_RETIRED.STLB_MISS_STORES_PS. Related metrics: tma_dtlb_load, tma_info_bottleneck_memory_data_tlbs, tma_info_bottleneck_memory_synchronization",
         "ScaleUnit": "100%"
     },
     {
         "BriefDescription": "This metric roughly estimates how often CPU was handling synchronizations due to False Sharing",
-        "MetricExpr": "32.5 * tma_info_system_average_frequency * OCR.DEMAND_RFO.L3_HIT.SNOOP_HITM / tma_info_thread_clks",
+        "MetricExpr": "32.5 * tma_info_system_core_frequency * OCR.DEMAND_RFO.L3_HIT.SNOOP_HITM / tma_info_thread_clks",
         "MetricGroup": "DataSharing;Offcore;Snoop;TopdownL4;tma_L4_group;tma_issueSyncxn;tma_store_bound_group",
         "MetricName": "tma_false_sharing",
         "MetricThreshold": "tma_false_sharing > 0.05 & (tma_store_bound > 0.2 & (tma_memory_bound > 0.2 & tma_backend_bound > 0.2))",
         "MetricGroup": "MemoryBW;TopdownL4;tma_L4_group;tma_issueBW;tma_issueSL;tma_issueSmSt;tma_l1_bound_group",
         "MetricName": "tma_fb_full",
         "MetricThreshold": "tma_fb_full > 0.3",
-        "PublicDescription": "This metric does a *rough estimation* of how often L1D Fill Buffer unavailability limited additional L1D miss memory access requests to proceed. The higher the metric value; the deeper the memory hierarchy level the misses are satisfied from (metric values >1 are valid). Often it hints on approaching bandwidth limits (to L2 cache; L3 cache or external memory). Related metrics: tma_info_bottleneck_memory_bandwidth, tma_info_system_dram_bw_use, tma_mem_bandwidth, tma_sq_full, tma_store_latency, tma_streaming_stores",
+        "PublicDescription": "This metric does a *rough estimation* of how often L1D Fill Buffer unavailability limited additional L1D miss memory access requests to proceed. The higher the metric value; the deeper the memory hierarchy level the misses are satisfied from (metric values >1 are valid). Often it hints on approaching bandwidth limits (to L2 cache; L3 cache or external memory). Related metrics: tma_info_bottleneck_cache_memory_bandwidth, tma_info_system_dram_bw_use, tma_mem_bandwidth, tma_sq_full, tma_store_latency, tma_streaming_stores",
         "ScaleUnit": "100%"
     },
     {
         "MetricExpr": "max(0, tma_frontend_bound - tma_fetch_latency)",
         "MetricGroup": "FetchBW;Frontend;TmaL2;TopdownL2;tma_L2_group;tma_frontend_bound_group;tma_issueFB",
         "MetricName": "tma_fetch_bandwidth",
-        "MetricThreshold": "tma_fetch_bandwidth > 0.1 & tma_frontend_bound > 0.15 & tma_info_thread_ipc / 5 > 0.35",
+        "MetricThreshold": "tma_fetch_bandwidth > 0.2",
         "MetricgroupNoGroup": "TopdownL2",
         "PublicDescription": "This metric represents fraction of slots the CPU was stalled due to Frontend bandwidth issues.  For example; inefficiencies at the instruction decoders; or restrictions for caching in the DSB (decoded uops cache) are categorized under Fetch Bandwidth. In such cases; the Frontend typically delivers suboptimal amount of uops to the Backend. Sample with: FRONTEND_RETIRED.LATENCY_GE_2_BUBBLES_GE_1_PS;FRONTEND_RETIRED.LATENCY_GE_1_PS;FRONTEND_RETIRED.LATENCY_GE_2_PS. Related metrics: tma_dsb_switches, tma_info_botlnk_l2_dsb_misses, tma_info_frontend_dsb_coverage, tma_info_inst_mix_iptb, tma_lcp",
         "ScaleUnit": "100%"
         "PublicDescription": "This metric represents overall arithmetic floating-point (FP) operations fraction the CPU has executed (retired). Note this metric's value may exceed its parent due to use of \"Uops\" CountDomain and FMA double-counting.",
         "ScaleUnit": "100%"
     },
+    {
+        "BriefDescription": "This metric roughly estimates fraction of slots the CPU retired uops as a result of handing Floating Point (FP) Assists",
+        "MetricExpr": "34 * ASSISTS.FP / tma_info_thread_slots",
+        "MetricGroup": "HPC;TopdownL5;tma_L5_group;tma_assists_group",
+        "MetricName": "tma_fp_assists",
+        "MetricThreshold": "tma_fp_assists > 0.1",
+        "PublicDescription": "This metric roughly estimates fraction of slots the CPU retired uops as a result of handing Floating Point (FP) Assists. FP Assist may apply when working with very small floating point values (so-called Denormals).",
+        "ScaleUnit": "100%"
+    },
     {
         "BriefDescription": "This metric approximates arithmetic floating-point (FP) scalar uops fraction the CPU has retired",
         "MetricExpr": "cpu@FP_ARITH_INST_RETIRED.SCALAR_SINGLE\\,umask\\=0x03@ / (tma_retiring * tma_info_thread_slots)",
         "MetricName": "tma_heavy_operations",
         "MetricThreshold": "tma_heavy_operations > 0.1",
         "MetricgroupNoGroup": "TopdownL2",
-        "PublicDescription": "This metric represents fraction of slots where the CPU was retiring heavy-weight operations -- instructions that require two or more uops or micro-coded sequences. This highly-correlates with the uop length of these instructions/sequences.",
+        "PublicDescription": "This metric represents fraction of slots where the CPU was retiring heavy-weight operations -- instructions that require two or more uops or micro-coded sequences. This highly-correlates with the uop length of these instructions/sequences. ([ICL+] Note this may overcount due to approximation using indirect events; [ADL+] .)",
         "ScaleUnit": "100%"
     },
     {
         "BriefDescription": "This metric represents fraction of cycles the CPU was stalled due to instruction cache misses",
-        "MetricExpr": "ICACHE_16B.IFDATA_STALL / tma_info_thread_clks",
-        "MetricGroup": "BigFoot;FetchLat;IcMiss;TopdownL3;tma_L3_group;tma_fetch_latency_group",
+        "MetricExpr": "ICACHE_DATA.STALLS / tma_info_thread_clks",
+        "MetricGroup": "BigFootprint;FetchLat;IcMiss;TopdownL3;tma_L3_group;tma_fetch_latency_group",
         "MetricName": "tma_icache_misses",
         "MetricThreshold": "tma_icache_misses > 0.05 & (tma_fetch_latency > 0.1 & tma_frontend_bound > 0.15)",
         "PublicDescription": "This metric represents fraction of cycles the CPU was stalled due to instruction cache misses. Sample with: FRONTEND_RETIRED.L2_MISS_PS;FRONTEND_RETIRED.L1I_MISS_PS",
     {
         "BriefDescription": "Branch Misprediction Cost: Fraction of TMA slots wasted per non-speculative branch misprediction (retired JEClear)",
         "MetricConstraint": "NO_GROUP_EVENTS",
-        "MetricExpr": "(tma_branch_mispredicts + tma_fetch_latency * tma_mispredicts_resteers / (tma_branch_resteers + tma_dsb_switches + tma_icache_misses + tma_itlb_misses + tma_lcp + tma_ms_switches)) * tma_info_thread_slots / BR_MISP_RETIRED.ALL_BRANCHES",
+        "MetricExpr": "tma_info_bottleneck_mispredictions * tma_info_thread_slots / BR_MISP_RETIRED.ALL_BRANCHES / 100",
         "MetricGroup": "Bad;BrMispredicts;tma_issueBM",
         "MetricName": "tma_info_bad_spec_branch_misprediction_cost",
         "PublicDescription": "Branch Misprediction Cost: Fraction of TMA slots wasted per non-speculative branch misprediction (retired JEClear). Related metrics: tma_branch_mispredicts, tma_info_bottleneck_mispredictions, tma_mispredicts_resteers"
         "MetricName": "tma_info_bad_spec_ipmispredict",
         "MetricThreshold": "tma_info_bad_spec_ipmispredict < 200"
     },
+    {
+        "BriefDescription": "Speculative to Retired ratio of all clears (covering mispredicts and nukes)",
+        "MetricExpr": "INT_MISC.CLEARS_COUNT / (BR_MISP_RETIRED.ALL_BRANCHES + MACHINE_CLEARS.COUNT)",
+        "MetricGroup": "BrMispredicts",
+        "MetricName": "tma_info_bad_spec_spec_clears_ratio"
+    },
     {
         "BriefDescription": "Probability of Core Bound bottleneck hidden by SMT-profiling artifacts",
         "MetricConstraint": "NO_GROUP_EVENTS",
         "MetricThreshold": "tma_info_botlnk_l2_ic_misses > 5",
         "PublicDescription": "Total pipeline cost of Instruction Cache misses - subset of the Big_Code Bottleneck. Related metrics: "
     },
+    {
+        "BriefDescription": "Total pipeline cost of \"useful operations\" - the baseline operations not covered by Branching_Overhead nor Irregular_Overhead.",
+        "MetricExpr": "100 * (tma_retiring - (BR_INST_RETIRED.ALL_BRANCHES + BR_INST_RETIRED.NEAR_CALL) / tma_info_thread_slots - tma_microcode_sequencer / (tma_few_uops_instructions + tma_microcode_sequencer) * (tma_assists / tma_microcode_sequencer) * tma_heavy_operations)",
+        "MetricGroup": "Ret",
+        "MetricName": "tma_info_bottleneck_base_non_br",
+        "MetricThreshold": "tma_info_bottleneck_base_non_br > 20"
+    },
     {
         "BriefDescription": "Total pipeline cost of instruction fetch related bottlenecks by large code footprint programs (i-side cache; TLB and BTB misses)",
         "MetricConstraint": "NO_GROUP_EVENTS",
         "MetricExpr": "100 * tma_fetch_latency * (tma_itlb_misses + tma_icache_misses + tma_unknown_branches) / (tma_branch_resteers + tma_dsb_switches + tma_icache_misses + tma_itlb_misses + tma_lcp + tma_ms_switches)",
-        "MetricGroup": "BigFoot;Fed;Frontend;IcMiss;MemoryTLB;tma_issueBC",
+        "MetricGroup": "BigFootprint;Fed;Frontend;IcMiss;MemoryTLB",
         "MetricName": "tma_info_bottleneck_big_code",
-        "MetricThreshold": "tma_info_bottleneck_big_code > 20",
-        "PublicDescription": "Total pipeline cost of instruction fetch related bottlenecks by large code footprint programs (i-side cache; TLB and BTB misses). Related metrics: tma_info_bottleneck_branching_overhead"
+        "MetricThreshold": "tma_info_bottleneck_big_code > 20"
     },
     {
         "BriefDescription": "Total pipeline cost of branch related instructions (used for program control-flow including function calls)",
-        "MetricExpr": "100 * ((BR_INST_RETIRED.COND + 3 * BR_INST_RETIRED.NEAR_CALL + (BR_INST_RETIRED.NEAR_TAKEN - BR_INST_RETIRED.COND_TAKEN - 2 * BR_INST_RETIRED.NEAR_CALL)) / tma_info_thread_slots)",
-        "MetricGroup": "Ret;tma_issueBC",
+        "MetricExpr": "100 * ((BR_INST_RETIRED.ALL_BRANCHES + BR_INST_RETIRED.NEAR_CALL) / tma_info_thread_slots)",
+        "MetricGroup": "Ret",
         "MetricName": "tma_info_bottleneck_branching_overhead",
-        "MetricThreshold": "tma_info_bottleneck_branching_overhead > 10",
-        "PublicDescription": "Total pipeline cost of branch related instructions (used for program control-flow including function calls). Related metrics: tma_info_bottleneck_big_code"
+        "MetricThreshold": "tma_info_bottleneck_branching_overhead > 5"
+    },
+    {
+        "BriefDescription": "Total pipeline cost of external Memory- or Cache-Bandwidth related bottlenecks",
+        "MetricExpr": "100 * (tma_memory_bound * (tma_dram_bound / (tma_dram_bound + tma_l1_bound + tma_l2_bound + tma_l3_bound + tma_store_bound)) * (tma_mem_bandwidth / (tma_mem_bandwidth + tma_mem_latency)) + tma_memory_bound * (tma_l3_bound / (tma_dram_bound + tma_l1_bound + tma_l2_bound + tma_l3_bound + tma_store_bound)) * (tma_sq_full / (tma_contested_accesses + tma_data_sharing + tma_l3_hit_latency + tma_sq_full)) + tma_memory_bound * (tma_l1_bound / (tma_dram_bound + tma_l1_bound + tma_l2_bound + tma_l3_bound + tma_store_bound)) * (tma_fb_full / (tma_4k_aliasing + tma_dtlb_load + tma_fb_full + tma_lock_latency + tma_split_loads + tma_store_fwd_blk)))",
+        "MetricGroup": "Mem;MemoryBW;Offcore;tma_issueBW",
+        "MetricName": "tma_info_bottleneck_cache_memory_bandwidth",
+        "MetricThreshold": "tma_info_bottleneck_cache_memory_bandwidth > 20",
+        "PublicDescription": "Total pipeline cost of external Memory- or Cache-Bandwidth related bottlenecks. Related metrics: tma_fb_full, tma_info_system_dram_bw_use, tma_mem_bandwidth, tma_sq_full"
+    },
+    {
+        "BriefDescription": "Total pipeline cost of external Memory- or Cache-Latency related bottlenecks",
+        "MetricExpr": "100 * (tma_memory_bound * (tma_dram_bound / (tma_dram_bound + tma_l1_bound + tma_l2_bound + tma_l3_bound + tma_store_bound)) * (tma_mem_latency / (tma_mem_bandwidth + tma_mem_latency)) + tma_memory_bound * (tma_l3_bound / (tma_dram_bound + tma_l1_bound + tma_l2_bound + tma_l3_bound + tma_store_bound)) * (tma_l3_hit_latency / (tma_contested_accesses + tma_data_sharing + tma_l3_hit_latency + tma_sq_full)) + tma_memory_bound * tma_l2_bound / (tma_dram_bound + tma_l1_bound + tma_l2_bound + tma_l3_bound + tma_store_bound) + tma_memory_bound * (tma_store_bound / (tma_dram_bound + tma_l1_bound + tma_l2_bound + tma_l3_bound + tma_store_bound)) * (tma_store_latency / (tma_dtlb_store + tma_false_sharing + tma_split_stores + tma_store_latency + tma_streaming_stores)))",
+        "MetricGroup": "Mem;MemoryLat;Offcore;tma_issueLat",
+        "MetricName": "tma_info_bottleneck_cache_memory_latency",
+        "MetricThreshold": "tma_info_bottleneck_cache_memory_latency > 20",
+        "PublicDescription": "Total pipeline cost of external Memory- or Cache-Latency related bottlenecks. Related metrics: tma_l3_hit_latency, tma_mem_latency"
+    },
+    {
+        "BriefDescription": "Total pipeline cost when the execution is compute-bound - an estimation",
+        "MetricExpr": "100 * (tma_core_bound * tma_divider / (tma_divider + tma_ports_utilization + tma_serializing_operation) + tma_core_bound * (tma_ports_utilization / (tma_divider + tma_ports_utilization + tma_serializing_operation)) * (tma_ports_utilized_3m / (tma_ports_utilized_0 + tma_ports_utilized_1 + tma_ports_utilized_2 + tma_ports_utilized_3m)))",
+        "MetricGroup": "Cor;tma_issueComp",
+        "MetricName": "tma_info_bottleneck_compute_bound_est",
+        "MetricThreshold": "tma_info_bottleneck_compute_bound_est > 20",
+        "PublicDescription": "Total pipeline cost when the execution is compute-bound - an estimation. Covers Core Bound when High ILP as well as when long-latency execution units are busy. Related metrics: "
     },
     {
         "BriefDescription": "Total pipeline cost of instruction fetch bandwidth related bottlenecks",
         "MetricConstraint": "NO_GROUP_EVENTS",
-        "MetricExpr": "100 * (tma_frontend_bound - tma_fetch_latency * tma_mispredicts_resteers / (tma_branch_resteers + tma_dsb_switches + tma_icache_misses + tma_itlb_misses + tma_lcp + tma_ms_switches)) - tma_info_bottleneck_big_code",
+        "MetricExpr": "100 * (tma_frontend_bound - (1 - 10 * tma_microcode_sequencer * tma_other_mispredicts / tma_branch_mispredicts) * tma_fetch_latency * tma_mispredicts_resteers / (tma_branch_resteers + tma_dsb_switches + tma_icache_misses + tma_itlb_misses + tma_lcp + tma_ms_switches) - tma_microcode_sequencer / (tma_few_uops_instructions + tma_microcode_sequencer) * (tma_assists / tma_microcode_sequencer) * tma_fetch_latency * (tma_ms_switches + tma_branch_resteers * (tma_clears_resteers + tma_mispredicts_resteers * (10 * tma_microcode_sequencer * tma_other_mispredicts / tma_branch_mispredicts)) / (tma_clears_resteers + tma_mispredicts_resteers + tma_unknown_branches)) / (tma_branch_resteers + tma_dsb_switches + tma_icache_misses + tma_itlb_misses + tma_lcp + tma_ms_switches)) - tma_info_bottleneck_big_code",
         "MetricGroup": "Fed;FetchBW;Frontend",
         "MetricName": "tma_info_bottleneck_instruction_fetch_bw",
         "MetricThreshold": "tma_info_bottleneck_instruction_fetch_bw > 20"
     },
     {
-        "BriefDescription": "Total pipeline cost of (external) Memory Bandwidth related bottlenecks",
-        "MetricConstraint": "NO_GROUP_EVENTS",
-        "MetricExpr": "100 * tma_memory_bound * (tma_dram_bound / (tma_dram_bound + tma_l1_bound + tma_l2_bound + tma_l3_bound + tma_store_bound) * (tma_mem_bandwidth / (tma_mem_bandwidth + tma_mem_latency)) + tma_l3_bound / (tma_dram_bound + tma_l1_bound + tma_l2_bound + tma_l3_bound + tma_store_bound) * (tma_sq_full / (tma_contested_accesses + tma_data_sharing + tma_l3_hit_latency + tma_sq_full))) + tma_l1_bound / (tma_dram_bound + tma_l1_bound + tma_l2_bound + tma_l3_bound + tma_store_bound) * (tma_fb_full / (tma_4k_aliasing + tma_dtlb_load + tma_fb_full + tma_lock_latency + tma_split_loads + tma_store_fwd_blk))",
-        "MetricGroup": "Mem;MemoryBW;Offcore;tma_issueBW",
-        "MetricName": "tma_info_bottleneck_memory_bandwidth",
-        "MetricThreshold": "tma_info_bottleneck_memory_bandwidth > 20",
-        "PublicDescription": "Total pipeline cost of (external) Memory Bandwidth related bottlenecks. Related metrics: tma_fb_full, tma_info_system_dram_bw_use, tma_mem_bandwidth, tma_sq_full"
+        "BriefDescription": "Total pipeline cost of irregular execution (e.g",
+        "MetricExpr": "100 * (tma_microcode_sequencer / (tma_few_uops_instructions + tma_microcode_sequencer) * (tma_assists / tma_microcode_sequencer) * tma_fetch_latency * (tma_ms_switches + tma_branch_resteers * (tma_clears_resteers + tma_mispredicts_resteers * (10 * tma_microcode_sequencer * tma_other_mispredicts / tma_branch_mispredicts)) / (tma_clears_resteers + tma_mispredicts_resteers + tma_unknown_branches)) / (tma_branch_resteers + tma_dsb_switches + tma_icache_misses + tma_itlb_misses + tma_lcp + tma_ms_switches) + 10 * tma_microcode_sequencer * tma_other_mispredicts / tma_branch_mispredicts * tma_branch_mispredicts + tma_machine_clears * tma_other_nukes / tma_other_nukes + tma_core_bound * (tma_serializing_operation + tma_core_bound * RS_EVENTS.EMPTY_CYCLES / tma_info_thread_clks * tma_ports_utilized_0) / (tma_divider + tma_ports_utilization + tma_serializing_operation) + tma_microcode_sequencer / (tma_few_uops_instructions + tma_microcode_sequencer) * (tma_assists / tma_microcode_sequencer) * tma_heavy_operations)",
+        "MetricGroup": "Bad;Cor;Ret;tma_issueMS",
+        "MetricName": "tma_info_bottleneck_irregular_overhead",
+        "MetricThreshold": "tma_info_bottleneck_irregular_overhead > 10",
+        "PublicDescription": "Total pipeline cost of irregular execution (e.g. FP-assists in HPC, Wait time with work imbalance multithreaded workloads, overhead in system services or virtualized environments). Related metrics: tma_microcode_sequencer, tma_ms_switches"
     },
     {
         "BriefDescription": "Total pipeline cost of Memory Address Translation related bottlenecks (data-side TLBs)",
         "MetricConstraint": "NO_GROUP_EVENTS",
-        "MetricExpr": "100 * tma_memory_bound * (tma_l1_bound / max(tma_memory_bound, tma_dram_bound + tma_l1_bound + tma_l2_bound + tma_l3_bound + tma_store_bound) * (tma_dtlb_load / max(tma_l1_bound, tma_4k_aliasing + tma_dtlb_load + tma_fb_full + tma_lock_latency + tma_split_loads + tma_store_fwd_blk)) + tma_store_bound / (tma_dram_bound + tma_l1_bound + tma_l2_bound + tma_l3_bound + tma_store_bound) * (tma_dtlb_store / (tma_dtlb_store + tma_false_sharing + tma_split_stores + tma_store_latency + tma_streaming_stores)))",
+        "MetricExpr": "100 * (tma_memory_bound * (tma_l1_bound / max(tma_memory_bound, tma_dram_bound + tma_l1_bound + tma_l2_bound + tma_l3_bound + tma_store_bound)) * (tma_dtlb_load / max(tma_l1_bound, tma_4k_aliasing + tma_dtlb_load + tma_fb_full + tma_lock_latency + tma_split_loads + tma_store_fwd_blk)) + tma_memory_bound * (tma_store_bound / (tma_dram_bound + tma_l1_bound + tma_l2_bound + tma_l3_bound + tma_store_bound)) * (tma_dtlb_store / (tma_dtlb_store + tma_false_sharing + tma_split_stores + tma_store_latency + tma_streaming_stores)))",
         "MetricGroup": "Mem;MemoryTLB;Offcore;tma_issueTLB",
         "MetricName": "tma_info_bottleneck_memory_data_tlbs",
         "MetricThreshold": "tma_info_bottleneck_memory_data_tlbs > 20",
-        "PublicDescription": "Total pipeline cost of Memory Address Translation related bottlenecks (data-side TLBs). Related metrics: tma_dtlb_load, tma_dtlb_store"
+        "PublicDescription": "Total pipeline cost of Memory Address Translation related bottlenecks (data-side TLBs). Related metrics: tma_dtlb_load, tma_dtlb_store, tma_info_bottleneck_memory_synchronization"
     },
     {
-        "BriefDescription": "Total pipeline cost of Memory Latency related bottlenecks (external memory and off-core caches)",
-        "MetricConstraint": "NO_GROUP_EVENTS",
-        "MetricExpr": "100 * tma_memory_bound * (tma_dram_bound / (tma_dram_bound + tma_l1_bound + tma_l2_bound + tma_l3_bound + tma_store_bound) * (tma_mem_latency / (tma_mem_bandwidth + tma_mem_latency)) + tma_l3_bound / (tma_dram_bound + tma_l1_bound + tma_l2_bound + tma_l3_bound + tma_store_bound) * (tma_l3_hit_latency / (tma_contested_accesses + tma_data_sharing + tma_l3_hit_latency + tma_sq_full)) + tma_l2_bound / (tma_dram_bound + tma_l1_bound + tma_l2_bound + tma_l3_bound + tma_store_bound))",
-        "MetricGroup": "Mem;MemoryLat;Offcore;tma_issueLat",
-        "MetricName": "tma_info_bottleneck_memory_latency",
-        "MetricThreshold": "tma_info_bottleneck_memory_latency > 20",
-        "PublicDescription": "Total pipeline cost of Memory Latency related bottlenecks (external memory and off-core caches). Related metrics: tma_l3_hit_latency, tma_mem_latency"
+        "BriefDescription": "Total pipeline cost of Memory Synchronization related bottlenecks (data transfers and coherency updates across processors)",
+        "MetricExpr": "100 * (tma_memory_bound * (tma_l3_bound / (tma_dram_bound + tma_l1_bound + tma_l2_bound + tma_l3_bound + tma_store_bound) * (tma_contested_accesses + tma_data_sharing) / (tma_contested_accesses + tma_data_sharing + tma_l3_hit_latency + tma_sq_full) + tma_store_bound / (tma_dram_bound + tma_l1_bound + tma_l2_bound + tma_l3_bound + tma_store_bound) * tma_false_sharing / (tma_dtlb_store + tma_false_sharing + tma_split_stores + tma_store_latency + tma_streaming_stores - tma_store_latency)) + tma_machine_clears * (1 - tma_other_nukes / tma_other_nukes))",
+        "MetricGroup": "Mem;Offcore;tma_issueTLB",
+        "MetricName": "tma_info_bottleneck_memory_synchronization",
+        "MetricThreshold": "tma_info_bottleneck_memory_synchronization > 10",
+        "PublicDescription": "Total pipeline cost of Memory Synchronization related bottlenecks (data transfers and coherency updates across processors). Related metrics: tma_dtlb_load, tma_dtlb_store, tma_info_bottleneck_memory_data_tlbs"
     },
     {
         "BriefDescription": "Total pipeline cost of Branch Misprediction related bottlenecks",
         "MetricConstraint": "NO_GROUP_EVENTS",
-        "MetricExpr": "100 * (tma_branch_mispredicts + tma_fetch_latency * tma_mispredicts_resteers / (tma_branch_resteers + tma_dsb_switches + tma_icache_misses + tma_itlb_misses + tma_lcp + tma_ms_switches))",
+        "MetricExpr": "100 * (1 - 10 * tma_microcode_sequencer * tma_other_mispredicts / tma_branch_mispredicts) * (tma_branch_mispredicts + tma_fetch_latency * tma_mispredicts_resteers / (tma_branch_resteers + tma_dsb_switches + tma_icache_misses + tma_itlb_misses + tma_lcp + tma_ms_switches))",
         "MetricGroup": "Bad;BadSpec;BrMispredicts;tma_issueBM",
         "MetricName": "tma_info_bottleneck_mispredictions",
         "MetricThreshold": "tma_info_bottleneck_mispredictions > 20",
         "PublicDescription": "Total pipeline cost of Branch Misprediction related bottlenecks. Related metrics: tma_branch_mispredicts, tma_info_bad_spec_branch_misprediction_cost, tma_mispredicts_resteers"
     },
+    {
+        "BriefDescription": "Total pipeline cost of remaining bottlenecks (apart from those listed in the Info.Bottlenecks metrics class)",
+        "MetricExpr": "100 - (tma_info_bottleneck_big_code + tma_info_bottleneck_instruction_fetch_bw + tma_info_bottleneck_mispredictions + tma_info_bottleneck_cache_memory_bandwidth + tma_info_bottleneck_cache_memory_latency + tma_info_bottleneck_memory_data_tlbs + tma_info_bottleneck_memory_synchronization + tma_info_bottleneck_compute_bound_est + tma_info_bottleneck_irregular_overhead + tma_info_bottleneck_branching_overhead + tma_info_bottleneck_base_non_br)",
+        "MetricGroup": "Cor;Offcore",
+        "MetricName": "tma_info_bottleneck_other_bottlenecks",
+        "MetricThreshold": "tma_info_bottleneck_other_bottlenecks > 20",
+        "PublicDescription": "Total pipeline cost of remaining bottlenecks (apart from those listed in the Info.Bottlenecks metrics class). Examples include data-dependencies (Core Bound when Low ILP) and other unlisted memory-related stalls."
+    },
     {
         "BriefDescription": "Fraction of branches that are CALL or RET",
         "MetricExpr": "(BR_INST_RETIRED.NEAR_CALL + BR_INST_RETIRED.NEAR_RETURN) / BR_INST_RETIRED.ALL_BRANCHES",
     },
     {
         "BriefDescription": "Core actual clocks when any Logical Processor is active on the Physical Core",
-        "MetricExpr": "CPU_CLK_UNHALTED.DISTRIBUTED",
+        "MetricExpr": "(CPU_CLK_UNHALTED.DISTRIBUTED if #SMT_on else tma_info_thread_clks)",
         "MetricGroup": "SMT",
         "MetricName": "tma_info_core_core_clks"
     },
         "MetricGroup": "Ret;SMT;TmaL1;tma_L1_group",
         "MetricName": "tma_info_core_coreipc"
     },
+    {
+        "BriefDescription": "uops Executed per Cycle",
+        "MetricExpr": "UOPS_EXECUTED.THREAD / tma_info_thread_clks",
+        "MetricGroup": "Power",
+        "MetricName": "tma_info_core_epc"
+    },
     {
         "BriefDescription": "Floating Point Operations Per Cycle",
-        "MetricExpr": "(cpu@FP_ARITH_INST_RETIRED.SCALAR_SINGLE\\,umask\\=0x03@ + 2 * FP_ARITH_INST_RETIRED.128B_PACKED_DOUBLE + 4 * cpu@FP_ARITH_INST_RETIRED.128B_PACKED_SINGLE\\,umask\\=0x18@ + 8 * cpu@FP_ARITH_INST_RETIRED.256B_PACKED_SINGLE\\,umask\\=0x60@ + 16 * FP_ARITH_INST_RETIRED.512B_PACKED_SINGLE) / tma_info_core_core_clks",
+        "MetricExpr": "(FP_ARITH_INST_RETIRED.SCALAR + 2 * FP_ARITH_INST_RETIRED.128B_PACKED_DOUBLE + 4 * FP_ARITH_INST_RETIRED.4_FLOPS + 8 * FP_ARITH_INST_RETIRED.8_FLOPS + 16 * FP_ARITH_INST_RETIRED.512B_PACKED_SINGLE) / tma_info_core_core_clks",
         "MetricGroup": "Flops;Ret",
         "MetricName": "tma_info_core_flopc"
     },
         "PublicDescription": "Actual per-core usage of the Floating Point non-X87 execution units (regardless of precision or vector-width). Values > 1 are possible due to ([BDW+] Fused-Multiply Add (FMA) counting - common; [ADL+] use all of ADD/MUL/FMA in Scalar or 128/256-bit vectors - less common)."
     },
     {
-        "BriefDescription": "Instruction-Level-Parallelism (average number of uops executed when there is execution) per-core",
-        "MetricExpr": "UOPS_EXECUTED.THREAD / (UOPS_EXECUTED.CORE_CYCLES_GE_1 / 2 if #SMT_on else UOPS_EXECUTED.CORE_CYCLES_GE_1)",
+        "BriefDescription": "Instruction-Level-Parallelism (average number of uops executed when there is execution) per thread (logical-processor)",
+        "MetricExpr": "UOPS_EXECUTED.THREAD / cpu@UOPS_EXECUTED.THREAD\\,cmask\\=1@",
         "MetricGroup": "Backend;Cor;Pipeline;PortsUtil",
         "MetricName": "tma_info_core_ilp"
     },
         "MetricGroup": "Flops;InsType",
         "MetricName": "tma_info_inst_mix_iparith",
         "MetricThreshold": "tma_info_inst_mix_iparith < 10",
-        "PublicDescription": "Instructions per FP Arithmetic instruction (lower number means higher occurrence rate). May undercount due to FMA double counting. Approximated prior to BDW."
+        "PublicDescription": "Instructions per FP Arithmetic instruction (lower number means higher occurrence rate). Values < 1 are possible due to intentional FMA double counting. Approximated prior to BDW."
     },
     {
         "BriefDescription": "Instructions per FP Arithmetic AVX/SSE 128-bit instruction (lower number means higher occurrence rate)",
         "MetricGroup": "Flops;FpVector;InsType",
         "MetricName": "tma_info_inst_mix_iparith_avx128",
         "MetricThreshold": "tma_info_inst_mix_iparith_avx128 < 10",
-        "PublicDescription": "Instructions per FP Arithmetic AVX/SSE 128-bit instruction (lower number means higher occurrence rate). May undercount due to FMA double counting."
+        "PublicDescription": "Instructions per FP Arithmetic AVX/SSE 128-bit instruction (lower number means higher occurrence rate). Values < 1 are possible due to intentional FMA double counting."
     },
     {
         "BriefDescription": "Instructions per FP Arithmetic AVX* 256-bit instruction (lower number means higher occurrence rate)",
         "MetricGroup": "Flops;FpVector;InsType",
         "MetricName": "tma_info_inst_mix_iparith_avx256",
         "MetricThreshold": "tma_info_inst_mix_iparith_avx256 < 10",
-        "PublicDescription": "Instructions per FP Arithmetic AVX* 256-bit instruction (lower number means higher occurrence rate). May undercount due to FMA double counting."
+        "PublicDescription": "Instructions per FP Arithmetic AVX* 256-bit instruction (lower number means higher occurrence rate). Values < 1 are possible due to intentional FMA double counting."
     },
     {
         "BriefDescription": "Instructions per FP Arithmetic AVX 512-bit instruction (lower number means higher occurrence rate)",
         "MetricGroup": "Flops;FpVector;InsType",
         "MetricName": "tma_info_inst_mix_iparith_avx512",
         "MetricThreshold": "tma_info_inst_mix_iparith_avx512 < 10",
-        "PublicDescription": "Instructions per FP Arithmetic AVX 512-bit instruction (lower number means higher occurrence rate). May undercount due to FMA double counting."
+        "PublicDescription": "Instructions per FP Arithmetic AVX 512-bit instruction (lower number means higher occurrence rate). Values < 1 are possible due to intentional FMA double counting."
     },
     {
         "BriefDescription": "Instructions per FP Arithmetic Scalar Double-Precision instruction (lower number means higher occurrence rate)",
         "MetricGroup": "Flops;FpScalar;InsType",
         "MetricName": "tma_info_inst_mix_iparith_scalar_dp",
         "MetricThreshold": "tma_info_inst_mix_iparith_scalar_dp < 10",
-        "PublicDescription": "Instructions per FP Arithmetic Scalar Double-Precision instruction (lower number means higher occurrence rate). May undercount due to FMA double counting."
+        "PublicDescription": "Instructions per FP Arithmetic Scalar Double-Precision instruction (lower number means higher occurrence rate). Values < 1 are possible due to intentional FMA double counting."
     },
     {
         "BriefDescription": "Instructions per FP Arithmetic Scalar Single-Precision instruction (lower number means higher occurrence rate)",
         "MetricGroup": "Flops;FpScalar;InsType",
         "MetricName": "tma_info_inst_mix_iparith_scalar_sp",
         "MetricThreshold": "tma_info_inst_mix_iparith_scalar_sp < 10",
-        "PublicDescription": "Instructions per FP Arithmetic Scalar Single-Precision instruction (lower number means higher occurrence rate). May undercount due to FMA double counting."
+        "PublicDescription": "Instructions per FP Arithmetic Scalar Single-Precision instruction (lower number means higher occurrence rate). Values < 1 are possible due to intentional FMA double counting."
     },
     {
         "BriefDescription": "Instructions per Branch (lower number means higher occurrence rate)",
     },
     {
         "BriefDescription": "Instructions per Floating Point (FP) Operation (lower number means higher occurrence rate)",
-        "MetricExpr": "INST_RETIRED.ANY / (cpu@FP_ARITH_INST_RETIRED.SCALAR_SINGLE\\,umask\\=0x03@ + 2 * FP_ARITH_INST_RETIRED.128B_PACKED_DOUBLE + 4 * cpu@FP_ARITH_INST_RETIRED.128B_PACKED_SINGLE\\,umask\\=0x18@ + 8 * cpu@FP_ARITH_INST_RETIRED.256B_PACKED_SINGLE\\,umask\\=0x60@ + 16 * FP_ARITH_INST_RETIRED.512B_PACKED_SINGLE)",
+        "MetricExpr": "INST_RETIRED.ANY / (FP_ARITH_INST_RETIRED.SCALAR + 2 * FP_ARITH_INST_RETIRED.128B_PACKED_DOUBLE + 4 * FP_ARITH_INST_RETIRED.4_FLOPS + 8 * FP_ARITH_INST_RETIRED.8_FLOPS + 16 * FP_ARITH_INST_RETIRED.512B_PACKED_SINGLE)",
         "MetricGroup": "Flops;InsType",
         "MetricName": "tma_info_inst_mix_ipflop",
         "MetricThreshold": "tma_info_inst_mix_ipflop < 10"
         "MetricName": "tma_info_inst_mix_ipload",
         "MetricThreshold": "tma_info_inst_mix_ipload < 3"
     },
+    {
+        "BriefDescription": "Instructions per PAUSE (lower number means higher occurrence rate)",
+        "MetricExpr": "tma_info_inst_mix_instructions / MISC_RETIRED.PAUSE_INST",
+        "MetricGroup": "Flops;FpVector;InsType",
+        "MetricName": "tma_info_inst_mix_ippause"
+    },
     {
         "BriefDescription": "Instructions per Store (lower number means higher occurrence rate)",
         "MetricExpr": "INST_RETIRED.ANY / MEM_INST_RETIRED.ALL_STORES",
     },
     {
         "BriefDescription": "Average per-core data fill bandwidth to the L1 data cache [GB / sec]",
-        "MetricExpr": "64 * L1D.REPLACEMENT / 1e9 / duration_time",
+        "MetricExpr": "tma_info_memory_l1d_cache_fill_bw",
         "MetricGroup": "Mem;MemoryBW",
-        "MetricName": "tma_info_memory_core_l1d_cache_fill_bw"
+        "MetricName": "tma_info_memory_core_l1d_cache_fill_bw_2t"
     },
     {
         "BriefDescription": "Average per-core data fill bandwidth to the L2 cache [GB / sec]",
-        "MetricExpr": "64 * L2_LINES_IN.ALL / 1e9 / duration_time",
+        "MetricExpr": "tma_info_memory_l2_cache_fill_bw",
         "MetricGroup": "Mem;MemoryBW",
-        "MetricName": "tma_info_memory_core_l2_cache_fill_bw"
+        "MetricName": "tma_info_memory_core_l2_cache_fill_bw_2t"
     },
     {
         "BriefDescription": "Average per-core data access bandwidth to the L3 cache [GB / sec]",
-        "MetricExpr": "64 * OFFCORE_REQUESTS.ALL_REQUESTS / 1e9 / duration_time",
+        "MetricExpr": "tma_info_memory_l3_cache_access_bw",
         "MetricGroup": "Mem;MemoryBW;Offcore",
-        "MetricName": "tma_info_memory_core_l3_cache_access_bw"
+        "MetricName": "tma_info_memory_core_l3_cache_access_bw_2t"
     },
     {
         "BriefDescription": "Average per-core data fill bandwidth to the L3 cache [GB / sec]",
-        "MetricExpr": "64 * LONGEST_LAT_CACHE.MISS / 1e9 / duration_time",
+        "MetricExpr": "tma_info_memory_l3_cache_fill_bw",
         "MetricGroup": "Mem;MemoryBW",
-        "MetricName": "tma_info_memory_core_l3_cache_fill_bw"
+        "MetricName": "tma_info_memory_core_l3_cache_fill_bw_2t"
     },
     {
         "BriefDescription": "Fill Buffer (FB) hits per kilo instructions for retired demand loads (L1D misses that merge into ongoing miss-handling entries)",
         "MetricExpr": "1e3 * MEM_LOAD_RETIRED.FB_HIT / INST_RETIRED.ANY",
-        "MetricGroup": "CacheMisses;Mem",
+        "MetricGroup": "CacheHits;Mem",
         "MetricName": "tma_info_memory_fb_hpki"
     },
+    {
+        "BriefDescription": "",
+        "MetricExpr": "64 * L1D.REPLACEMENT / 1e9 / duration_time",
+        "MetricGroup": "Mem;MemoryBW",
+        "MetricName": "tma_info_memory_l1d_cache_fill_bw"
+    },
     {
         "BriefDescription": "L1 cache true misses per kilo instruction for retired demand loads",
         "MetricExpr": "1e3 * MEM_LOAD_RETIRED.L1_MISS / INST_RETIRED.ANY",
-        "MetricGroup": "CacheMisses;Mem",
+        "MetricGroup": "CacheHits;Mem",
         "MetricName": "tma_info_memory_l1mpki"
     },
     {
         "BriefDescription": "L1 cache true misses per kilo instruction for all demand loads (including speculative)",
         "MetricExpr": "1e3 * L2_RQSTS.ALL_DEMAND_DATA_RD / INST_RETIRED.ANY",
-        "MetricGroup": "CacheMisses;Mem",
+        "MetricGroup": "CacheHits;Mem",
         "MetricName": "tma_info_memory_l1mpki_load"
     },
+    {
+        "BriefDescription": "",
+        "MetricExpr": "64 * L2_LINES_IN.ALL / 1e9 / duration_time",
+        "MetricGroup": "Mem;MemoryBW",
+        "MetricName": "tma_info_memory_l2_cache_fill_bw"
+    },
     {
         "BriefDescription": "L2 cache hits per kilo instruction for all demand loads  (including speculative)",
         "MetricExpr": "1e3 * L2_RQSTS.DEMAND_DATA_RD_HIT / INST_RETIRED.ANY",
-        "MetricGroup": "CacheMisses;Mem",
+        "MetricGroup": "CacheHits;Mem",
         "MetricName": "tma_info_memory_l2hpki_load"
     },
     {
         "BriefDescription": "L2 cache true misses per kilo instruction for retired demand loads",
         "MetricExpr": "1e3 * MEM_LOAD_RETIRED.L2_MISS / INST_RETIRED.ANY",
-        "MetricGroup": "Backend;CacheMisses;Mem",
+        "MetricGroup": "Backend;CacheHits;Mem",
         "MetricName": "tma_info_memory_l2mpki"
     },
     {
         "BriefDescription": "L2 cache ([RKL+] true) misses per kilo instruction for all request types (including speculative)",
         "MetricExpr": "1e3 * (OFFCORE_REQUESTS.ALL_DATA_RD - OFFCORE_REQUESTS.DEMAND_DATA_RD + L2_RQSTS.ALL_DEMAND_MISS + L2_RQSTS.SWPF_MISS) / tma_info_inst_mix_instructions",
-        "MetricGroup": "CacheMisses;Mem;Offcore",
+        "MetricGroup": "CacheHits;Mem;Offcore",
         "MetricName": "tma_info_memory_l2mpki_all"
     },
     {
         "BriefDescription": "L2 cache ([RKL+] true) misses per kilo instruction for all demand loads  (including speculative)",
         "MetricExpr": "1e3 * L2_RQSTS.DEMAND_DATA_RD_MISS / INST_RETIRED.ANY",
-        "MetricGroup": "CacheMisses;Mem",
+        "MetricGroup": "CacheHits;Mem",
         "MetricName": "tma_info_memory_l2mpki_load"
     },
     {
-        "BriefDescription": "L3 cache true misses per kilo instruction for retired demand loads",
-        "MetricExpr": "1e3 * MEM_LOAD_RETIRED.L3_MISS / INST_RETIRED.ANY",
-        "MetricGroup": "CacheMisses;Mem",
-        "MetricName": "tma_info_memory_l3mpki"
+        "BriefDescription": "",
+        "MetricExpr": "64 * OFFCORE_REQUESTS.ALL_REQUESTS / 1e9 / duration_time",
+        "MetricGroup": "Mem;MemoryBW;Offcore",
+        "MetricName": "tma_info_memory_l3_cache_access_bw"
     },
     {
-        "BriefDescription": "Actual Average Latency for L1 data-cache miss demand load operations (in core cycles)",
-        "MetricExpr": "L1D_PEND_MISS.PENDING / (MEM_LOAD_RETIRED.L1_MISS + MEM_LOAD_RETIRED.FB_HIT)",
-        "MetricGroup": "Mem;MemoryBound;MemoryLat",
-        "MetricName": "tma_info_memory_load_miss_real_latency"
+        "BriefDescription": "",
+        "MetricExpr": "64 * LONGEST_LAT_CACHE.MISS / 1e9 / duration_time",
+        "MetricGroup": "Mem;MemoryBW",
+        "MetricName": "tma_info_memory_l3_cache_fill_bw"
     },
     {
-        "BriefDescription": "Memory-Level-Parallelism (average number of L1 miss demand load when there is at least one such miss",
-        "MetricExpr": "L1D_PEND_MISS.PENDING / L1D_PEND_MISS.PENDING_CYCLES",
-        "MetricGroup": "Mem;MemoryBW;MemoryBound",
-        "MetricName": "tma_info_memory_mlp",
-        "PublicDescription": "Memory-Level-Parallelism (average number of L1 miss demand load when there is at least one such miss. Per-Logical Processor)"
+        "BriefDescription": "L3 cache true misses per kilo instruction for retired demand loads",
+        "MetricExpr": "1e3 * MEM_LOAD_RETIRED.L3_MISS / INST_RETIRED.ANY",
+        "MetricGroup": "Mem",
+        "MetricName": "tma_info_memory_l3mpki"
     },
     {
         "BriefDescription": "Average Parallel L2 cache miss data reads",
         "MetricExpr": "OFFCORE_REQUESTS_OUTSTANDING.ALL_DATA_RD / OFFCORE_REQUESTS_OUTSTANDING.CYCLES_WITH_DATA_RD",
         "MetricGroup": "Memory_BW;Offcore",
-        "MetricName": "tma_info_memory_oro_data_l2_mlp"
+        "MetricName": "tma_info_memory_latency_data_l2_mlp"
     },
     {
         "BriefDescription": "Average Latency for L2 cache miss demand Loads",
         "MetricExpr": "OFFCORE_REQUESTS_OUTSTANDING.DEMAND_DATA_RD / OFFCORE_REQUESTS.DEMAND_DATA_RD",
         "MetricGroup": "Memory_Lat;Offcore",
-        "MetricName": "tma_info_memory_oro_load_l2_miss_latency"
+        "MetricName": "tma_info_memory_latency_load_l2_miss_latency"
     },
     {
         "BriefDescription": "Average Parallel L2 cache miss demand Loads",
         "MetricExpr": "OFFCORE_REQUESTS_OUTSTANDING.DEMAND_DATA_RD / cpu@OFFCORE_REQUESTS_OUTSTANDING.DEMAND_DATA_RD\\,cmask\\=1@",
         "MetricGroup": "Memory_BW;Offcore",
-        "MetricName": "tma_info_memory_oro_load_l2_mlp"
+        "MetricName": "tma_info_memory_latency_load_l2_mlp"
     },
     {
         "BriefDescription": "Average Latency for L3 cache miss demand Loads",
         "MetricExpr": "cpu@OFFCORE_REQUESTS_OUTSTANDING.DEMAND_DATA_RD\\,umask\\=0x10@ / OFFCORE_REQUESTS.L3_MISS_DEMAND_DATA_RD",
         "MetricGroup": "Memory_Lat;Offcore",
-        "MetricName": "tma_info_memory_oro_load_l3_miss_latency"
+        "MetricName": "tma_info_memory_latency_load_l3_miss_latency"
     },
     {
-        "BriefDescription": "Average per-thread data fill bandwidth to the L1 data cache [GB / sec]",
-        "MetricExpr": "tma_info_memory_core_l1d_cache_fill_bw",
-        "MetricGroup": "Mem;MemoryBW",
-        "MetricName": "tma_info_memory_thread_l1d_cache_fill_bw_1t"
+        "BriefDescription": "Actual Average Latency for L1 data-cache miss demand load operations (in core cycles)",
+        "MetricExpr": "L1D_PEND_MISS.PENDING / (MEM_LOAD_RETIRED.L1_MISS + MEM_LOAD_RETIRED.FB_HIT)",
+        "MetricGroup": "Mem;MemoryBound;MemoryLat",
+        "MetricName": "tma_info_memory_load_miss_real_latency"
     },
     {
-        "BriefDescription": "Average per-thread data fill bandwidth to the L2 cache [GB / sec]",
-        "MetricExpr": "tma_info_memory_core_l2_cache_fill_bw",
-        "MetricGroup": "Mem;MemoryBW",
-        "MetricName": "tma_info_memory_thread_l2_cache_fill_bw_1t"
+        "BriefDescription": "\"Bus lock\" per kilo instruction",
+        "MetricExpr": "1e3 * SQ_MISC.BUS_LOCK / INST_RETIRED.ANY",
+        "MetricGroup": "Mem",
+        "MetricName": "tma_info_memory_mix_bus_lock_pki"
     },
     {
-        "BriefDescription": "Average per-thread data access bandwidth to the L3 cache [GB / sec]",
-        "MetricExpr": "tma_info_memory_core_l3_cache_access_bw",
-        "MetricGroup": "Mem;MemoryBW;Offcore",
-        "MetricName": "tma_info_memory_thread_l3_cache_access_bw_1t"
+        "BriefDescription": "Un-cacheable retired load per kilo instruction",
+        "MetricExpr": "1e3 * MEM_LOAD_MISC_RETIRED.UC / INST_RETIRED.ANY",
+        "MetricGroup": "Mem",
+        "MetricName": "tma_info_memory_mix_uc_load_pki"
     },
     {
-        "BriefDescription": "Average per-thread data fill bandwidth to the L3 cache [GB / sec]",
-        "MetricExpr": "tma_info_memory_core_l3_cache_fill_bw",
-        "MetricGroup": "Mem;MemoryBW",
-        "MetricName": "tma_info_memory_thread_l3_cache_fill_bw_1t"
+        "BriefDescription": "Memory-Level-Parallelism (average number of L1 miss demand load when there is at least one such miss",
+        "MetricExpr": "L1D_PEND_MISS.PENDING / L1D_PEND_MISS.PENDING_CYCLES",
+        "MetricGroup": "Mem;MemoryBW;MemoryBound",
+        "MetricName": "tma_info_memory_mlp",
+        "PublicDescription": "Memory-Level-Parallelism (average number of L1 miss demand load when there is at least one such miss. Per-Logical Processor)"
     },
     {
         "BriefDescription": "STLB (2nd level TLB) code speculative misses per kilo instruction (misses of any page-size that complete the page walk)",
         "MetricName": "tma_info_memory_tlb_store_stlb_mpki"
     },
     {
-        "BriefDescription": "Instruction-Level-Parallelism (average number of uops executed when there is execution) per-thread",
-        "MetricExpr": "UOPS_EXECUTED.THREAD / cpu@UOPS_EXECUTED.THREAD\\,cmask\\=1@",
+        "BriefDescription": "",
+        "MetricExpr": "UOPS_EXECUTED.THREAD / (UOPS_EXECUTED.CORE_CYCLES_GE_1 / 2 if #SMT_on else cpu@UOPS_EXECUTED.THREAD\\,cmask\\=1@)",
         "MetricGroup": "Cor;Pipeline;PortsUtil;SMT",
         "MetricName": "tma_info_pipeline_execute"
     },
+    {
+        "BriefDescription": "Instructions per a microcode Assist invocation",
+        "MetricExpr": "INST_RETIRED.ANY / ASSISTS.ANY",
+        "MetricGroup": "MicroSeq;Pipeline;Ret;Retire",
+        "MetricName": "tma_info_pipeline_ipassist",
+        "MetricThreshold": "tma_info_pipeline_ipassist < 100e3",
+        "PublicDescription": "Instructions per a microcode Assist invocation. See Assists tree node for details (lower number means higher occurrence rate)"
+    },
     {
         "BriefDescription": "Average number of Uops retired in cycles where at least one uop has retired.",
         "MetricExpr": "tma_retiring * tma_info_thread_slots / cpu@UOPS_RETIRED.SLOTS\\,cmask\\=1@",
         "MetricName": "tma_info_pipeline_retire"
     },
     {
-        "BriefDescription": "Measured Average Frequency for unhalted processors [GHz]",
+        "BriefDescription": "Measured Average Core Frequency for unhalted processors [GHz]",
         "MetricExpr": "tma_info_system_turbo_utilization * TSC / 1e9 / duration_time",
         "MetricGroup": "Power;Summary",
-        "MetricName": "tma_info_system_average_frequency"
+        "MetricName": "tma_info_system_core_frequency"
     },
     {
-        "BriefDescription": "Average CPU Utilization",
+        "BriefDescription": "Average CPU Utilization (percentage)",
         "MetricExpr": "CPU_CLK_UNHALTED.REF_TSC / TSC",
         "MetricGroup": "HPC;Summary",
         "MetricName": "tma_info_system_cpu_utilization"
     },
+    {
+        "BriefDescription": "Average number of utilized CPUs",
+        "MetricExpr": "#num_cpus_online * tma_info_system_cpu_utilization",
+        "MetricGroup": "Summary",
+        "MetricName": "tma_info_system_cpus_utilized"
+    },
     {
         "BriefDescription": "Average external Memory Bandwidth Use for reads and writes [GB / sec]",
         "MetricExpr": "64 * (UNC_ARB_TRK_REQUESTS.ALL + UNC_ARB_COH_TRK_REQUESTS.ALL) / 1e6 / duration_time / 1e3",
-        "MetricGroup": "HPC;Mem;MemoryBW;SoC;tma_issueBW",
+        "MetricGroup": "HPC;MemOffcore;MemoryBW;SoC;tma_issueBW",
         "MetricName": "tma_info_system_dram_bw_use",
-        "PublicDescription": "Average external Memory Bandwidth Use for reads and writes [GB / sec]. Related metrics: tma_fb_full, tma_info_bottleneck_memory_bandwidth, tma_mem_bandwidth, tma_sq_full"
+        "PublicDescription": "Average external Memory Bandwidth Use for reads and writes [GB / sec]. Related metrics: tma_fb_full, tma_info_bottleneck_cache_memory_bandwidth, tma_mem_bandwidth, tma_sq_full"
     },
     {
         "BriefDescription": "Giga Floating Point Operations Per Second",
-        "MetricExpr": "(cpu@FP_ARITH_INST_RETIRED.SCALAR_SINGLE\\,umask\\=0x03@ + 2 * FP_ARITH_INST_RETIRED.128B_PACKED_DOUBLE + 4 * cpu@FP_ARITH_INST_RETIRED.128B_PACKED_SINGLE\\,umask\\=0x18@ + 8 * cpu@FP_ARITH_INST_RETIRED.256B_PACKED_SINGLE\\,umask\\=0x60@ + 16 * FP_ARITH_INST_RETIRED.512B_PACKED_SINGLE) / 1e9 / duration_time",
+        "MetricExpr": "(FP_ARITH_INST_RETIRED.SCALAR + 2 * FP_ARITH_INST_RETIRED.128B_PACKED_DOUBLE + 4 * FP_ARITH_INST_RETIRED.4_FLOPS + 8 * FP_ARITH_INST_RETIRED.8_FLOPS + 16 * FP_ARITH_INST_RETIRED.512B_PACKED_SINGLE) / 1e9 / duration_time",
         "MetricGroup": "Cor;Flops;HPC",
         "MetricName": "tma_info_system_gflops",
-        "PublicDescription": "Giga Floating Point Operations Per Second. Aggregate across all supported options of: FP precisions, scalar and vector instructions, vector-width and AMX engine."
+        "PublicDescription": "Giga Floating Point Operations Per Second. Aggregate across all supported options of: FP precisions, scalar and vector instructions, vector-width"
     },
     {
         "BriefDescription": "Instructions per Far Branch ( Far Branches apply upon transition from application to operating system, handling interrupts, exceptions) [lower number means higher occurrence rate]",
     },
     {
         "BriefDescription": "This metric represents fraction of cycles the CPU was stalled due to Instruction TLB (ITLB) misses",
-        "MetricExpr": "ICACHE_64B.IFTAG_STALL / tma_info_thread_clks",
-        "MetricGroup": "BigFoot;FetchLat;MemoryTLB;TopdownL3;tma_L3_group;tma_fetch_latency_group",
+        "MetricExpr": "ICACHE_TAG.STALLS / tma_info_thread_clks",
+        "MetricGroup": "BigFootprint;FetchLat;MemoryTLB;TopdownL3;tma_L3_group;tma_fetch_latency_group",
         "MetricName": "tma_itlb_misses",
         "MetricThreshold": "tma_itlb_misses > 0.05 & (tma_fetch_latency > 0.1 & tma_frontend_bound > 0.15)",
         "PublicDescription": "This metric represents fraction of cycles the CPU was stalled due to Instruction TLB (ITLB) misses. Sample with: FRONTEND_RETIRED.STLB_MISS_PS;FRONTEND_RETIRED.ITLB_MISS_PS",
     {
         "BriefDescription": "This metric estimates how often the CPU was stalled without loads missing the L1 data cache",
         "MetricExpr": "max((CYCLE_ACTIVITY.STALLS_MEM_ANY - CYCLE_ACTIVITY.STALLS_L1D_MISS) / tma_info_thread_clks, 0)",
-        "MetricGroup": "CacheMisses;MemoryBound;TmaL3mem;TopdownL3;tma_L3_group;tma_issueL1;tma_issueMC;tma_memory_bound_group",
+        "MetricGroup": "CacheHits;MemoryBound;TmaL3mem;TopdownL3;tma_L3_group;tma_issueL1;tma_issueMC;tma_memory_bound_group",
         "MetricName": "tma_l1_bound",
         "MetricThreshold": "tma_l1_bound > 0.1 & (tma_memory_bound > 0.2 & tma_backend_bound > 0.2)",
         "PublicDescription": "This metric estimates how often the CPU was stalled without loads missing the L1 data cache.  The L1 data cache typically has the shortest latency.  However; in certain cases like loads blocked on older stores; a load might suffer due to high latency even though it is being satisfied by the L1. Another example is loads who miss in the TLB. These cases are characterized by execution unit stalls; while some non-completed demand load lives in the machine without having that demand load missing the L1 cache. Sample with: MEM_LOAD_RETIRED.L1_HIT_PS;MEM_LOAD_RETIRED.FB_HIT_PS. Related metrics: tma_clears_resteers, tma_machine_clears, tma_microcode_sequencer, tma_ms_switches, tma_ports_utilized_1",
         "BriefDescription": "This metric estimates how often the CPU was stalled due to L2 cache accesses by loads",
         "MetricConstraint": "NO_GROUP_EVENTS",
         "MetricExpr": "MEM_LOAD_RETIRED.L2_HIT * (1 + MEM_LOAD_RETIRED.FB_HIT / MEM_LOAD_RETIRED.L1_MISS) / (MEM_LOAD_RETIRED.L2_HIT * (1 + MEM_LOAD_RETIRED.FB_HIT / MEM_LOAD_RETIRED.L1_MISS) + L1D_PEND_MISS.FB_FULL_PERIODS) * ((CYCLE_ACTIVITY.STALLS_L1D_MISS - CYCLE_ACTIVITY.STALLS_L2_MISS) / tma_info_thread_clks)",
-        "MetricGroup": "CacheMisses;MemoryBound;TmaL3mem;TopdownL3;tma_L3_group;tma_memory_bound_group",
+        "MetricGroup": "CacheHits;MemoryBound;TmaL3mem;TopdownL3;tma_L3_group;tma_memory_bound_group",
         "MetricName": "tma_l2_bound",
         "MetricThreshold": "tma_l2_bound > 0.05 & (tma_memory_bound > 0.2 & tma_backend_bound > 0.2)",
         "PublicDescription": "This metric estimates how often the CPU was stalled due to L2 cache accesses by loads.  Avoiding cache misses (i.e. L1 misses/L2 hits) can improve the latency and increase performance. Sample with: MEM_LOAD_RETIRED.L2_HIT_PS",
         "BriefDescription": "This metric estimates how often the CPU was stalled due to loads accesses to L3 cache or contended with a sibling Core",
         "MetricConstraint": "NO_GROUP_EVENTS_NMI",
         "MetricExpr": "(CYCLE_ACTIVITY.STALLS_L2_MISS - CYCLE_ACTIVITY.STALLS_L3_MISS) / tma_info_thread_clks",
-        "MetricGroup": "CacheMisses;MemoryBound;TmaL3mem;TopdownL3;tma_L3_group;tma_memory_bound_group",
+        "MetricGroup": "CacheHits;MemoryBound;TmaL3mem;TopdownL3;tma_L3_group;tma_memory_bound_group",
         "MetricName": "tma_l3_bound",
         "MetricThreshold": "tma_l3_bound > 0.05 & (tma_memory_bound > 0.2 & tma_backend_bound > 0.2)",
         "PublicDescription": "This metric estimates how often the CPU was stalled due to loads accesses to L3 cache or contended with a sibling Core.  Avoiding cache misses (i.e. L2 misses/L3 hits) can improve the latency and increase performance. Sample with: MEM_LOAD_RETIRED.L3_HIT_PS",
         "ScaleUnit": "100%"
     },
     {
-        "BriefDescription": "This metric represents fraction of cycles with demand load accesses that hit the L3 cache under unloaded scenarios (possibly L3 latency limited)",
-        "MetricExpr": "9 * tma_info_system_average_frequency * MEM_LOAD_RETIRED.L3_HIT * (1 + MEM_LOAD_RETIRED.FB_HIT / MEM_LOAD_RETIRED.L1_MISS / 2) / tma_info_thread_clks",
+        "BriefDescription": "This metric estimates fraction of cycles with demand load accesses that hit the L3 cache under unloaded scenarios (possibly L3 latency limited)",
+        "MetricExpr": "9 * tma_info_system_core_frequency * (MEM_LOAD_RETIRED.L3_HIT * (1 + MEM_LOAD_RETIRED.FB_HIT / MEM_LOAD_RETIRED.L1_MISS / 2)) / tma_info_thread_clks",
         "MetricGroup": "MemoryLat;TopdownL4;tma_L4_group;tma_issueLat;tma_l3_bound_group",
         "MetricName": "tma_l3_hit_latency",
         "MetricThreshold": "tma_l3_hit_latency > 0.1 & (tma_l3_bound > 0.05 & (tma_memory_bound > 0.2 & tma_backend_bound > 0.2))",
-        "PublicDescription": "This metric represents fraction of cycles with demand load accesses that hit the L3 cache under unloaded scenarios (possibly L3 latency limited).  Avoiding private cache misses (i.e. L2 misses/L3 hits) will improve the latency; reduce contention with sibling physical cores and increase performance.  Note the value of this node may overlap with its siblings. Sample with: MEM_LOAD_RETIRED.L3_HIT_PS. Related metrics: tma_info_bottleneck_memory_latency, tma_mem_latency",
+        "PublicDescription": "This metric estimates fraction of cycles with demand load accesses that hit the L3 cache under unloaded scenarios (possibly L3 latency limited).  Avoiding private cache misses (i.e. L2 misses/L3 hits) will improve the latency; reduce contention with sibling physical cores and increase performance.  Note the value of this node may overlap with its siblings. Sample with: MEM_LOAD_RETIRED.L3_HIT_PS. Related metrics: tma_info_bottleneck_cache_memory_latency, tma_mem_latency",
         "ScaleUnit": "100%"
     },
     {
         "BriefDescription": "This metric represents fraction of cycles CPU was stalled due to Length Changing Prefixes (LCPs)",
-        "MetricExpr": "ILD_STALL.LCP / tma_info_thread_clks",
+        "MetricExpr": "DECODE.LCP / tma_info_thread_clks",
         "MetricGroup": "FetchLat;TopdownL3;tma_L3_group;tma_fetch_latency_group;tma_issueFB",
         "MetricName": "tma_lcp",
         "MetricThreshold": "tma_lcp > 0.05 & (tma_fetch_latency > 0.1 & tma_frontend_bound > 0.15)",
         "MetricName": "tma_light_operations",
         "MetricThreshold": "tma_light_operations > 0.6",
         "MetricgroupNoGroup": "TopdownL2",
-        "PublicDescription": "This metric represents fraction of slots where the CPU was retiring light-weight operations -- instructions that require no more than one uop (micro-operation). This correlates with total number of instructions used by the program. A uops-per-instruction (see UopPI metric) ratio of 1 or less should be expected for decently optimized software running on Intel Core/Xeon products. While this often indicates efficient X86 instructions were executed; high value does not necessarily mean better performance cannot be achieved. Sample with: INST_RETIRED.PREC_DIST",
+        "PublicDescription": "This metric represents fraction of slots where the CPU was retiring light-weight operations -- instructions that require no more than one uop (micro-operation). This correlates with total number of instructions used by the program. A uops-per-instruction (see UopPI metric) ratio of 1 or less should be expected for decently optimized code running on Intel Core/Xeon products. While this often indicates efficient X86 instructions were executed; high value does not necessarily mean better performance cannot be achieved. ([ICL+] Note this may undercount due to approximation using indirect events; [ADL+] .). Sample with: INST_RETIRED.PREC_DIST",
         "ScaleUnit": "100%"
     },
     {
         "MetricExpr": "(LSD.CYCLES_ACTIVE - LSD.CYCLES_OK) / tma_info_core_core_clks / 2",
         "MetricGroup": "FetchBW;LSD;TopdownL3;tma_L3_group;tma_fetch_bandwidth_group",
         "MetricName": "tma_lsd",
-        "MetricThreshold": "tma_lsd > 0.15 & (tma_fetch_bandwidth > 0.1 & tma_frontend_bound > 0.15 & tma_info_thread_ipc / 5 > 0.35)",
+        "MetricThreshold": "tma_lsd > 0.15 & tma_fetch_bandwidth > 0.2",
         "PublicDescription": "This metric represents Core fraction of cycles in which CPU was likely limited due to LSD (Loop Stream Detector) unit.  LSD typically does well sustaining Uop supply. However; in some rare cases; optimal uop-delivery could not be reached for small loops whose size (in terms of number of uops) does not suit well the LSD structure.",
         "ScaleUnit": "100%"
     },
         "ScaleUnit": "100%"
     },
     {
-        "BriefDescription": "This metric estimates fraction of cycles where the core's performance was likely hurt due to approaching bandwidth limits of external memory (DRAM)",
+        "BriefDescription": "This metric estimates fraction of cycles where the core's performance was likely hurt due to approaching bandwidth limits of external memory - DRAM ([SPR-HBM] and/or HBM)",
         "MetricExpr": "min(CPU_CLK_UNHALTED.THREAD, cpu@OFFCORE_REQUESTS_OUTSTANDING.ALL_DATA_RD\\,cmask\\=4@) / tma_info_thread_clks",
         "MetricGroup": "MemoryBW;Offcore;TopdownL4;tma_L4_group;tma_dram_bound_group;tma_issueBW",
         "MetricName": "tma_mem_bandwidth",
         "MetricThreshold": "tma_mem_bandwidth > 0.2 & (tma_dram_bound > 0.1 & (tma_memory_bound > 0.2 & tma_backend_bound > 0.2))",
-        "PublicDescription": "This metric estimates fraction of cycles where the core's performance was likely hurt due to approaching bandwidth limits of external memory (DRAM).  The underlying heuristic assumes that a similar off-core traffic is generated by all IA cores. This metric does not aggregate non-data-read requests by this logical processor; requests from other IA Logical Processors/Physical Cores/sockets; or other non-IA devices like GPU; hence the maximum external memory bandwidth limits may or may not be approached when this metric is flagged (see Uncore counters for that). Related metrics: tma_fb_full, tma_info_bottleneck_memory_bandwidth, tma_info_system_dram_bw_use, tma_sq_full",
+        "PublicDescription": "This metric estimates fraction of cycles where the core's performance was likely hurt due to approaching bandwidth limits of external memory - DRAM ([SPR-HBM] and/or HBM).  The underlying heuristic assumes that a similar off-core traffic is generated by all IA cores. This metric does not aggregate non-data-read requests by this logical processor; requests from other IA Logical Processors/Physical Cores/sockets; or other non-IA devices like GPU; hence the maximum external memory bandwidth limits may or may not be approached when this metric is flagged (see Uncore counters for that). Related metrics: tma_fb_full, tma_info_bottleneck_cache_memory_bandwidth, tma_info_system_dram_bw_use, tma_sq_full",
         "ScaleUnit": "100%"
     },
     {
-        "BriefDescription": "This metric estimates fraction of cycles where the performance was likely hurt due to latency from external memory (DRAM)",
+        "BriefDescription": "This metric estimates fraction of cycles where the performance was likely hurt due to latency from external memory - DRAM ([SPR-HBM] and/or HBM)",
         "MetricExpr": "min(CPU_CLK_UNHALTED.THREAD, OFFCORE_REQUESTS_OUTSTANDING.CYCLES_WITH_DATA_RD) / tma_info_thread_clks - tma_mem_bandwidth",
         "MetricGroup": "MemoryLat;Offcore;TopdownL4;tma_L4_group;tma_dram_bound_group;tma_issueLat",
         "MetricName": "tma_mem_latency",
         "MetricThreshold": "tma_mem_latency > 0.1 & (tma_dram_bound > 0.1 & (tma_memory_bound > 0.2 & tma_backend_bound > 0.2))",
-        "PublicDescription": "This metric estimates fraction of cycles where the performance was likely hurt due to latency from external memory (DRAM).  This metric does not aggregate requests from other Logical Processors/Physical Cores/sockets (see Uncore counters for that). Related metrics: tma_info_bottleneck_memory_latency, tma_l3_hit_latency",
+        "PublicDescription": "This metric estimates fraction of cycles where the performance was likely hurt due to latency from external memory - DRAM ([SPR-HBM] and/or HBM).  This metric does not aggregate requests from other Logical Processors/Physical Cores/sockets (see Uncore counters for that). Related metrics: tma_info_bottleneck_cache_memory_latency, tma_l3_hit_latency",
         "ScaleUnit": "100%"
     },
     {
     },
     {
         "BriefDescription": "This metric represents fraction of slots the CPU was retiring uops fetched by the Microcode Sequencer (MS) unit",
-        "MetricExpr": "tma_retiring * tma_info_thread_slots / UOPS_ISSUED.ANY * IDQ.MS_UOPS / tma_info_thread_slots",
+        "MetricExpr": "UOPS_RETIRED.SLOTS / UOPS_ISSUED.ANY * IDQ.MS_UOPS / tma_info_thread_slots",
         "MetricGroup": "MicroSeq;TopdownL3;tma_L3_group;tma_heavy_operations_group;tma_issueMC;tma_issueMS",
         "MetricName": "tma_microcode_sequencer",
         "MetricThreshold": "tma_microcode_sequencer > 0.05 & tma_heavy_operations > 0.1",
-        "PublicDescription": "This metric represents fraction of slots the CPU was retiring uops fetched by the Microcode Sequencer (MS) unit.  The MS is used for CISC instructions not supported by the default decoders (like repeat move strings; or CPUID); or by microcode assists used to address some operation modes (like in Floating Point assists). These cases can often be avoided. Sample with: IDQ.MS_UOPS. Related metrics: tma_clears_resteers, tma_l1_bound, tma_machine_clears, tma_ms_switches",
+        "PublicDescription": "This metric represents fraction of slots the CPU was retiring uops fetched by the Microcode Sequencer (MS) unit.  The MS is used for CISC instructions not supported by the default decoders (like repeat move strings; or CPUID); or by microcode assists used to address some operation modes (like in Floating Point assists). These cases can often be avoided. Sample with: IDQ.MS_UOPS. Related metrics: tma_clears_resteers, tma_info_bottleneck_irregular_overhead, tma_l1_bound, tma_machine_clears, tma_ms_switches",
         "ScaleUnit": "100%"
     },
     {
         "MetricExpr": "(IDQ.MITE_CYCLES_ANY - IDQ.MITE_CYCLES_OK) / tma_info_core_core_clks / 2",
         "MetricGroup": "DSBmiss;FetchBW;TopdownL3;tma_L3_group;tma_fetch_bandwidth_group",
         "MetricName": "tma_mite",
-        "MetricThreshold": "tma_mite > 0.1 & (tma_fetch_bandwidth > 0.1 & tma_frontend_bound > 0.15 & tma_info_thread_ipc / 5 > 0.35)",
+        "MetricThreshold": "tma_mite > 0.1 & tma_fetch_bandwidth > 0.2",
         "PublicDescription": "This metric represents Core fraction of cycles in which CPU was likely limited due to the MITE pipeline (the legacy decode pipeline). This pipeline is used for code that was not pre-cached in the DSB or LSD. For example; inefficiencies due to asymmetric decoders; use of long immediate or LCP can manifest as MITE fetch bandwidth bottleneck. Sample with: FRONTEND_RETIRED.ANY_DSB_MISS",
         "ScaleUnit": "100%"
     },
         "MetricExpr": "(cpu@IDQ.MITE_UOPS\\,cmask\\=4@ - cpu@IDQ.MITE_UOPS\\,cmask\\=5@) / tma_info_thread_clks",
         "MetricGroup": "DSBmiss;FetchBW;TopdownL4;tma_L4_group;tma_mite_group",
         "MetricName": "tma_mite_4wide",
-        "MetricThreshold": "tma_mite_4wide > 0.05 & (tma_mite > 0.1 & (tma_fetch_bandwidth > 0.1 & tma_frontend_bound > 0.15 & tma_info_thread_ipc / 5 > 0.35))",
+        "MetricThreshold": "tma_mite_4wide > 0.05 & (tma_mite > 0.1 & tma_fetch_bandwidth > 0.2)",
         "ScaleUnit": "100%"
     },
     {
-        "BriefDescription": "The Mixing_Vectors metric gives the percentage of injected blend uops out of all uops issued",
+        "BriefDescription": "This metric estimates penalty in terms of percentage of([SKL+] injected blend uops out of all Uops Issued -- the Count Domain; [ADL+] cycles)",
         "MetricExpr": "UOPS_ISSUED.VECTOR_WIDTH_MISMATCH / UOPS_ISSUED.ANY",
         "MetricGroup": "TopdownL5;tma_L5_group;tma_issueMV;tma_ports_utilized_0_group",
         "MetricName": "tma_mixing_vectors",
         "MetricThreshold": "tma_mixing_vectors > 0.05",
-        "PublicDescription": "The Mixing_Vectors metric gives the percentage of injected blend uops out of all uops issued. Usually a Mixing_Vectors over 5% is worth investigating. Read more in Appendix B1 of the Optimizations Guide for this topic. Related metrics: tma_ms_switches",
+        "PublicDescription": "This metric estimates penalty in terms of percentage of([SKL+] injected blend uops out of all Uops Issued -- the Count Domain; [ADL+] cycles). Usually a Mixing_Vectors over 5% is worth investigating. Read more in Appendix B1 of the Optimizations Guide for this topic. Related metrics: tma_ms_switches",
         "ScaleUnit": "100%"
     },
     {
         "MetricGroup": "FetchLat;MicroSeq;TopdownL3;tma_L3_group;tma_fetch_latency_group;tma_issueMC;tma_issueMS;tma_issueMV;tma_issueSO",
         "MetricName": "tma_ms_switches",
         "MetricThreshold": "tma_ms_switches > 0.05 & (tma_fetch_latency > 0.1 & tma_frontend_bound > 0.15)",
-        "PublicDescription": "This metric estimates the fraction of cycles when the CPU was stalled due to switches of uop delivery to the Microcode Sequencer (MS). Commonly used instructions are optimized for delivery by the DSB (decoded i-cache) or MITE (legacy instruction decode) pipelines. Certain operations cannot be handled natively by the execution pipeline; and must be performed by microcode (small programs injected into the execution stream). Switching to the MS too often can negatively impact performance. The MS is designated to deliver long uop flows required by CISC instructions like CPUID; or uncommon conditions like Floating Point Assists when dealing with Denormals. Sample with: IDQ.MS_SWITCHES. Related metrics: tma_clears_resteers, tma_l1_bound, tma_machine_clears, tma_microcode_sequencer, tma_mixing_vectors, tma_serializing_operation",
+        "PublicDescription": "This metric estimates the fraction of cycles when the CPU was stalled due to switches of uop delivery to the Microcode Sequencer (MS). Commonly used instructions are optimized for delivery by the DSB (decoded i-cache) or MITE (legacy instruction decode) pipelines. Certain operations cannot be handled natively by the execution pipeline; and must be performed by microcode (small programs injected into the execution stream). Switching to the MS too often can negatively impact performance. The MS is designated to deliver long uop flows required by CISC instructions like CPUID; or uncommon conditions like Floating Point Assists when dealing with Denormals. Sample with: IDQ.MS_SWITCHES. Related metrics: tma_clears_resteers, tma_info_bottleneck_irregular_overhead, tma_l1_bound, tma_machine_clears, tma_microcode_sequencer, tma_mixing_vectors, tma_serializing_operation",
         "ScaleUnit": "100%"
     },
     {
         "BriefDescription": "This metric represents fraction of slots where the CPU was retiring NOP (no op) instructions",
         "MetricExpr": "tma_light_operations * INST_RETIRED.NOP / (tma_retiring * tma_info_thread_slots)",
-        "MetricGroup": "Pipeline;TopdownL3;tma_L3_group;tma_light_operations_group",
+        "MetricGroup": "Pipeline;TopdownL4;tma_L4_group;tma_other_light_ops_group",
         "MetricName": "tma_nop_instructions",
-        "MetricThreshold": "tma_nop_instructions > 0.1 & tma_light_operations > 0.6",
+        "MetricThreshold": "tma_nop_instructions > 0.1 & (tma_other_light_ops > 0.3 & tma_light_operations > 0.6)",
         "PublicDescription": "This metric represents fraction of slots where the CPU was retiring NOP (no op) instructions. Compilers often use NOPs for certain address alignments - e.g. start address of a function or loop body. Sample with: INST_RETIRED.NOP",
         "ScaleUnit": "100%"
     },
     {
         "BriefDescription": "This metric represents the remaining light uops fraction the CPU has executed - remaining means not covered by other sibling nodes",
         "MetricConstraint": "NO_GROUP_EVENTS",
-        "MetricExpr": "max(0, tma_light_operations - (tma_fp_arith + tma_memory_operations + tma_branch_instructions + tma_nop_instructions))",
+        "MetricExpr": "max(0, tma_light_operations - (tma_fp_arith + tma_memory_operations + tma_branch_instructions))",
         "MetricGroup": "Pipeline;TopdownL3;tma_L3_group;tma_light_operations_group",
         "MetricName": "tma_other_light_ops",
         "MetricThreshold": "tma_other_light_ops > 0.3 & tma_light_operations > 0.6",
         "PublicDescription": "This metric represents the remaining light uops fraction the CPU has executed - remaining means not covered by other sibling nodes. May undercount due to FMA double counting",
         "ScaleUnit": "100%"
     },
+    {
+        "BriefDescription": "This metric estimates fraction of slots the CPU was stalled due to other cases of misprediction (non-retired x86 branches or other types).",
+        "MetricExpr": "max(tma_branch_mispredicts * (1 - BR_MISP_RETIRED.ALL_BRANCHES / (INT_MISC.CLEARS_COUNT - MACHINE_CLEARS.COUNT)), 0.0001)",
+        "MetricGroup": "BrMispredicts;TopdownL3;tma_L3_group;tma_branch_mispredicts_group",
+        "MetricName": "tma_other_mispredicts",
+        "MetricThreshold": "tma_other_mispredicts > 0.05 & (tma_branch_mispredicts > 0.1 & tma_bad_speculation > 0.15)",
+        "ScaleUnit": "100%"
+    },
+    {
+        "BriefDescription": "This metric represents fraction of slots the CPU has wasted due to Nukes (Machine Clears) not related to memory ordering.",
+        "MetricExpr": "max(tma_machine_clears * (1 - MACHINE_CLEARS.MEMORY_ORDERING / MACHINE_CLEARS.COUNT), 0.0001)",
+        "MetricGroup": "Machine_Clears;TopdownL3;tma_L3_group;tma_machine_clears_group",
+        "MetricName": "tma_other_nukes",
+        "MetricThreshold": "tma_other_nukes > 0.05 & (tma_machine_clears > 0.1 & tma_bad_speculation > 0.15)",
+        "ScaleUnit": "100%"
+    },
     {
         "BriefDescription": "This metric represents Core fraction of cycles CPU dispatched uops on execution port 0 ([SNB+] ALU; [HSW+] ALU and 2nd branch)",
         "MetricExpr": "UOPS_DISPATCHED.PORT_0 / tma_info_core_core_clks",
         "ScaleUnit": "100%"
     },
     {
-        "BriefDescription": "This metric represents Core fraction of cycles CPU dispatched uops on execution port 6 ([HSW+]Primary Branch and simple ALU)",
+        "BriefDescription": "This metric represents Core fraction of cycles CPU dispatched uops on execution port 6 ([HSW+] Primary Branch and simple ALU)",
         "MetricExpr": "UOPS_DISPATCHED.PORT_6 / tma_info_core_core_clks",
         "MetricGroup": "TopdownL6;tma_L6_group;tma_alu_op_utilization_group;tma_issue2P",
         "MetricName": "tma_port_6",
         "MetricThreshold": "tma_port_6 > 0.6",
-        "PublicDescription": "This metric represents Core fraction of cycles CPU dispatched uops on execution port 6 ([HSW+]Primary Branch and simple ALU). Sample with: UOPS_DISPATCHED.PORT_6. Related metrics: tma_fp_scalar, tma_fp_vector, tma_fp_vector_128b, tma_fp_vector_256b, tma_fp_vector_512b, tma_port_0, tma_port_1, tma_port_5, tma_ports_utilized_2",
+        "PublicDescription": "This metric represents Core fraction of cycles CPU dispatched uops on execution port 6 ([HSW+] Primary Branch and simple ALU). Sample with: UOPS_DISPATCHED.PORT_6. Related metrics: tma_fp_scalar, tma_fp_vector, tma_fp_vector_128b, tma_fp_vector_256b, tma_fp_vector_512b, tma_port_0, tma_port_1, tma_port_5, tma_ports_utilized_2",
         "ScaleUnit": "100%"
     },
     {
         "BriefDescription": "This metric estimates fraction of cycles the CPU performance was potentially limited due to Core computation issues (non divider-related)",
-        "MetricExpr": "((cpu@EXE_ACTIVITY.3_PORTS_UTIL\\,umask\\=0x80@ + tma_serializing_operation * (CYCLE_ACTIVITY.STALLS_TOTAL - CYCLE_ACTIVITY.STALLS_MEM_ANY) + (EXE_ACTIVITY.1_PORTS_UTIL + tma_retiring * EXE_ACTIVITY.2_PORTS_UTIL)) / tma_info_thread_clks if ARITH.DIVIDER_ACTIVE < CYCLE_ACTIVITY.STALLS_TOTAL - CYCLE_ACTIVITY.STALLS_MEM_ANY else (EXE_ACTIVITY.1_PORTS_UTIL + tma_retiring * EXE_ACTIVITY.2_PORTS_UTIL) / tma_info_thread_clks)",
+        "MetricExpr": "((tma_ports_utilized_0 * tma_info_thread_clks + (EXE_ACTIVITY.1_PORTS_UTIL + tma_retiring * EXE_ACTIVITY.2_PORTS_UTIL)) / tma_info_thread_clks if ARITH.DIVIDER_ACTIVE < CYCLE_ACTIVITY.STALLS_TOTAL - CYCLE_ACTIVITY.STALLS_MEM_ANY else (EXE_ACTIVITY.1_PORTS_UTIL + tma_retiring * EXE_ACTIVITY.2_PORTS_UTIL) / tma_info_thread_clks)",
         "MetricGroup": "PortsUtil;TopdownL3;tma_L3_group;tma_core_bound_group",
         "MetricName": "tma_ports_utilization",
         "MetricThreshold": "tma_ports_utilization > 0.15 & (tma_core_bound > 0.1 & tma_backend_bound > 0.2)",
     },
     {
         "BriefDescription": "This metric represents fraction of cycles CPU executed no uops on any execution port (Logical Processor cycles since ICL, Physical Core cycles otherwise)",
-        "MetricExpr": "cpu@EXE_ACTIVITY.3_PORTS_UTIL\\,umask\\=0x80@ / tma_info_thread_clks + tma_serializing_operation * (CYCLE_ACTIVITY.STALLS_TOTAL - CYCLE_ACTIVITY.STALLS_MEM_ANY) / tma_info_thread_clks",
+        "MetricExpr": "(cpu@EXE_ACTIVITY.3_PORTS_UTIL\\,umask\\=0x80@ + tma_core_bound * RS_EVENTS.EMPTY_CYCLES) / tma_info_thread_clks * (CYCLE_ACTIVITY.STALLS_TOTAL - CYCLE_ACTIVITY.STALLS_MEM_ANY) / tma_info_thread_clks",
         "MetricGroup": "PortsUtil;TopdownL4;tma_L4_group;tma_ports_utilization_group",
         "MetricName": "tma_ports_utilized_0",
         "MetricThreshold": "tma_ports_utilized_0 > 0.2 & (tma_ports_utilization > 0.15 & (tma_core_bound > 0.1 & tma_backend_bound > 0.2))",
         "MetricExpr": "UOPS_EXECUTED.CYCLES_GE_3 / tma_info_thread_clks",
         "MetricGroup": "PortsUtil;TopdownL4;tma_L4_group;tma_ports_utilization_group",
         "MetricName": "tma_ports_utilized_3m",
-        "MetricThreshold": "tma_ports_utilized_3m > 0.7 & (tma_ports_utilization > 0.15 & (tma_core_bound > 0.1 & tma_backend_bound > 0.2))",
+        "MetricThreshold": "tma_ports_utilized_3m > 0.4 & (tma_ports_utilization > 0.15 & (tma_core_bound > 0.1 & tma_backend_bound > 0.2))",
         "PublicDescription": "This metric represents fraction of cycles CPU executed total of 3 or more uops per cycle on all execution ports (Logical Processor cycles since ICL, Physical Core cycles otherwise). Sample with: UOPS_EXECUTED.CYCLES_GE_3",
         "ScaleUnit": "100%"
     },
     {
         "BriefDescription": "This metric represents fraction of cycles the CPU issue-pipeline was stalled due to serializing operations",
         "MetricExpr": "RESOURCE_STALLS.SCOREBOARD / tma_info_thread_clks",
-        "MetricGroup": "PortsUtil;TopdownL5;tma_L5_group;tma_issueSO;tma_ports_utilized_0_group",
+        "MetricGroup": "PortsUtil;TopdownL3;tma_L3_group;tma_core_bound_group;tma_issueSO",
         "MetricName": "tma_serializing_operation",
-        "MetricThreshold": "tma_serializing_operation > 0.1 & (tma_ports_utilized_0 > 0.2 & (tma_ports_utilization > 0.15 & (tma_core_bound > 0.1 & tma_backend_bound > 0.2)))",
+        "MetricThreshold": "tma_serializing_operation > 0.1 & (tma_core_bound > 0.1 & tma_backend_bound > 0.2)",
         "PublicDescription": "This metric represents fraction of cycles the CPU issue-pipeline was stalled due to serializing operations. Instructions like CPUID; WRMSR or LFENCE serialize the out-of-order execution which may limit performance. Sample with: RESOURCE_STALLS.SCOREBOARD. Related metrics: tma_ms_switches",
         "ScaleUnit": "100%"
     },
     {
         "BriefDescription": "This metric represents fraction of cycles the CPU was stalled due to PAUSE Instructions",
         "MetricExpr": "140 * MISC_RETIRED.PAUSE_INST / tma_info_thread_clks",
-        "MetricGroup": "TopdownL6;tma_L6_group;tma_serializing_operation_group",
+        "MetricGroup": "TopdownL4;tma_L4_group;tma_serializing_operation_group",
         "MetricName": "tma_slow_pause",
-        "MetricThreshold": "tma_slow_pause > 0.05 & (tma_serializing_operation > 0.1 & (tma_ports_utilized_0 > 0.2 & (tma_ports_utilization > 0.15 & (tma_core_bound > 0.1 & tma_backend_bound > 0.2))))",
+        "MetricThreshold": "tma_slow_pause > 0.05 & (tma_serializing_operation > 0.1 & (tma_core_bound > 0.1 & tma_backend_bound > 0.2))",
         "PublicDescription": "This metric represents fraction of cycles the CPU was stalled due to PAUSE Instructions. Sample with: MISC_RETIRED.PAUSE_INST",
         "ScaleUnit": "100%"
     },
         "MetricGroup": "MemoryBW;Offcore;TopdownL4;tma_L4_group;tma_issueBW;tma_l3_bound_group",
         "MetricName": "tma_sq_full",
         "MetricThreshold": "tma_sq_full > 0.3 & (tma_l3_bound > 0.05 & (tma_memory_bound > 0.2 & tma_backend_bound > 0.2))",
-        "PublicDescription": "This metric measures fraction of cycles where the Super Queue (SQ) was full taking into account all request-types and both hardware SMT threads (Logical Processors). Related metrics: tma_fb_full, tma_info_bottleneck_memory_bandwidth, tma_info_system_dram_bw_use, tma_mem_bandwidth",
+        "PublicDescription": "This metric measures fraction of cycles where the Super Queue (SQ) was full taking into account all request-types and both hardware SMT threads (Logical Processors). Related metrics: tma_fb_full, tma_info_bottleneck_cache_memory_bandwidth, tma_info_system_dram_bw_use, tma_mem_bandwidth",
         "ScaleUnit": "100%"
     },
     {
     {
         "BriefDescription": "This metric represents fraction of cycles the CPU was stalled due to new branch address clears",
         "MetricExpr": "10 * BACLEARS.ANY / tma_info_thread_clks",
-        "MetricGroup": "BigFoot;FetchLat;TopdownL4;tma_L4_group;tma_branch_resteers_group",
+        "MetricGroup": "BigFootprint;FetchLat;TopdownL4;tma_L4_group;tma_branch_resteers_group",
         "MetricName": "tma_unknown_branches",
         "MetricThreshold": "tma_unknown_branches > 0.05 & (tma_branch_resteers > 0.05 & (tma_fetch_latency > 0.1 & tma_frontend_bound > 0.15))",
-        "PublicDescription": "This metric represents fraction of cycles the CPU was stalled due to new branch address clears. These are fetched branches the Branch Prediction Unit was unable to recognize (e.g. first time the branch is fetched or hitting BTB capacity limit). Sample with: BACLEARS.ANY",
+        "PublicDescription": "This metric represents fraction of cycles the CPU was stalled due to new branch address clears. These are fetched branches the Branch Prediction Unit was unable to recognize (e.g. first time the branch is fetched or hitting BTB capacity limit) hence called Unknown Branches. Sample with: BACLEARS.ANY",
         "ScaleUnit": "100%"
     },
     {
index e8d2ec1c029bf75c8f473e463712a89dc14d33d5..f847632205490dd914e562dfe9778f35ac6eac58 100644 (file)
         "BriefDescription": "Number of times an RTM execution aborted.",
         "EventCode": "0xc9",
         "EventName": "RTM_RETIRED.ABORTED",
+        "PEBS": "1",
         "PublicDescription": "Counts the number of times RTM abort was triggered.",
         "SampleAfterValue": "100003",
         "UMask": "0x4"
index a151ba9cccb07c557d450f97b46f47bf62503416..5452a1448ded3bc350ed1a265cb100924cd00d14 100644 (file)
@@ -2,10 +2,10 @@
     "Backend": "Grouping from Top-down Microarchitecture Analysis Metrics spreadsheet",
     "Bad": "Grouping from Top-down Microarchitecture Analysis Metrics spreadsheet",
     "BadSpec": "Grouping from Top-down Microarchitecture Analysis Metrics spreadsheet",
-    "BigFoot": "Grouping from Top-down Microarchitecture Analysis Metrics spreadsheet",
+    "BigFootprint": "Grouping from Top-down Microarchitecture Analysis Metrics spreadsheet",
     "BrMispredicts": "Grouping from Top-down Microarchitecture Analysis Metrics spreadsheet",
     "Branches": "Grouping from Top-down Microarchitecture Analysis Metrics spreadsheet",
-    "CacheMisses": "Grouping from Top-down Microarchitecture Analysis Metrics spreadsheet",
+    "CacheHits": "Grouping from Top-down Microarchitecture Analysis Metrics spreadsheet",
     "CodeGen": "Grouping from Top-down Microarchitecture Analysis Metrics spreadsheet",
     "Compute": "Grouping from Top-down Microarchitecture Analysis Metrics spreadsheet",
     "Cor": "Grouping from Top-down Microarchitecture Analysis Metrics spreadsheet",
@@ -25,7 +25,9 @@
     "L2Evicts": "Grouping from Top-down Microarchitecture Analysis Metrics spreadsheet",
     "LSD": "Grouping from Top-down Microarchitecture Analysis Metrics spreadsheet",
     "MachineClears": "Grouping from Top-down Microarchitecture Analysis Metrics spreadsheet",
+    "Machine_Clears": "Grouping from Top-down Microarchitecture Analysis Metrics spreadsheet",
     "Mem": "Grouping from Top-down Microarchitecture Analysis Metrics spreadsheet",
+    "MemOffcore": "Grouping from Top-down Microarchitecture Analysis Metrics spreadsheet",
     "MemoryBW": "Grouping from Top-down Microarchitecture Analysis Metrics spreadsheet",
     "MemoryBound": "Grouping from Top-down Microarchitecture Analysis Metrics spreadsheet",
     "MemoryLat": "Grouping from Top-down Microarchitecture Analysis Metrics spreadsheet",
     "tma_L5_group": "Metrics for top-down breakdown at level 5",
     "tma_L6_group": "Metrics for top-down breakdown at level 6",
     "tma_alu_op_utilization_group": "Metrics contributing to tma_alu_op_utilization category",
+    "tma_assists_group": "Metrics contributing to tma_assists category",
     "tma_backend_bound_group": "Metrics contributing to tma_backend_bound category",
     "tma_bad_speculation_group": "Metrics contributing to tma_bad_speculation category",
+    "tma_branch_mispredicts_group": "Metrics contributing to tma_branch_mispredicts category",
     "tma_branch_resteers_group": "Metrics contributing to tma_branch_resteers category",
     "tma_core_bound_group": "Metrics contributing to tma_core_bound category",
     "tma_dram_bound_group": "Metrics contributing to tma_dram_bound category",
@@ -77,9 +81,9 @@
     "tma_frontend_bound_group": "Metrics contributing to tma_frontend_bound category",
     "tma_heavy_operations_group": "Metrics contributing to tma_heavy_operations category",
     "tma_issue2P": "Metrics related by the issue $issue2P",
-    "tma_issueBC": "Metrics related by the issue $issueBC",
     "tma_issueBM": "Metrics related by the issue $issueBM",
     "tma_issueBW": "Metrics related by the issue $issueBW",
+    "tma_issueComp": "Metrics related by the issue $issueComp",
     "tma_issueD0": "Metrics related by the issue $issueD0",
     "tma_issueFB": "Metrics related by the issue $issueFB",
     "tma_issueFL": "Metrics related by the issue $issueFL",
     "tma_l3_bound_group": "Metrics contributing to tma_l3_bound category",
     "tma_light_operations_group": "Metrics contributing to tma_light_operations category",
     "tma_load_op_utilization_group": "Metrics contributing to tma_load_op_utilization category",
+    "tma_machine_clears_group": "Metrics contributing to tma_machine_clears category",
     "tma_mem_latency_group": "Metrics contributing to tma_mem_latency category",
     "tma_memory_bound_group": "Metrics contributing to tma_memory_bound category",
     "tma_microcode_sequencer_group": "Metrics contributing to tma_microcode_sequencer category",
     "tma_mite_group": "Metrics contributing to tma_mite category",
+    "tma_other_light_ops_group": "Metrics contributing to tma_other_light_ops category",
     "tma_ports_utilization_group": "Metrics contributing to tma_ports_utilization category",
     "tma_ports_utilized_0_group": "Metrics contributing to tma_ports_utilized_0 category",
     "tma_ports_utilized_3m_group": "Metrics contributing to tma_ports_utilized_3m category",
index cfb590632918f3d61adc6b445a886f80ee18b26a..4fdc87339555af0a15ba87e0db31681de1aa964d 100644 (file)
@@ -19,7 +19,7 @@
         "BriefDescription": "Core cycles where the core was running in a manner where Turbo may be clipped to the AVX512 turbo schedule.",
         "EventCode": "0x28",
         "EventName": "CORE_POWER.LVL2_TURBO_LICENSE",
-        "PublicDescription": "Core cycles where the core was running with power-delivery for license level 2 (introduced in Skylake Server microarchtecture).  This includes high current AVX 512-bit instructions.",
+        "PublicDescription": "Core cycles where the core was running with power-delivery for license level 2 (introduced in Skylake Server microarchitecture).  This includes high current AVX 512-bit instructions.",
         "SampleAfterValue": "200003",
         "UMask": "0x20"
     },
index 375b78044f144da68d0e56f9addc2d557c1a01ff..c7313fd4fdf4a5c0a95126904426b1df9b39105c 100644 (file)
         "BriefDescription": "Cycles when Reservation Station (RS) is empty for the thread",
         "EventCode": "0x5e",
         "EventName": "RS_EVENTS.EMPTY_CYCLES",
-        "PublicDescription": "Counts cycles during which the reservation station (RS) is empty for this logical processor. This is usually caused when the front-end pipeline runs into stravation periods (e.g. branch mispredictions or i-cache misses)",
+        "PublicDescription": "Counts cycles during which the reservation station (RS) is empty for this logical processor. This is usually caused when the front-end pipeline runs into starvation periods (e.g. branch mispredictions or i-cache misses)",
         "SampleAfterValue": "1000003",
         "UMask": "0x1"
     },
         "SampleAfterValue": "10000003",
         "UMask": "0x2"
     },
-    {
-        "BriefDescription": "TMA slots wasted due to incorrect speculation by branch mispredictions",
-        "EventCode": "0xa4",
-        "EventName": "TOPDOWN.BR_MISPREDICT_SLOTS",
-        "PublicDescription": "Number of TMA slots that were wasted due to incorrect speculation by branch mispredictions. This event estimates number of operations that were issued but not retired from the speculative path as well as the out-of-order engine recovery past a branch misprediction.",
-        "SampleAfterValue": "10000003",
-        "UMask": "0x8"
-    },
     {
         "BriefDescription": "TMA slots available for an unhalted logical processor. Fixed counter - architectural event",
         "EventName": "TOPDOWN.SLOTS",
index 71d78a7841ea826073622118fb6e3fa44b1b89bc..c015b8277dc76dcef739cf8bef075fc8d4e441f0 100644 (file)
         "MetricExpr": "(UOPS_DISPATCHED.PORT_0 + UOPS_DISPATCHED.PORT_1 + UOPS_DISPATCHED.PORT_5 + UOPS_DISPATCHED.PORT_6) / (4 * tma_info_core_core_clks)",
         "MetricGroup": "TopdownL5;tma_L5_group;tma_ports_utilized_3m_group",
         "MetricName": "tma_alu_op_utilization",
-        "MetricThreshold": "tma_alu_op_utilization > 0.6",
+        "MetricThreshold": "tma_alu_op_utilization > 0.4",
         "ScaleUnit": "100%"
     },
     {
         "BriefDescription": "This metric estimates fraction of slots the CPU retired uops delivered by the Microcode_Sequencer as a result of Assists",
-        "MetricExpr": "100 * ASSISTS.ANY / tma_info_thread_slots",
+        "MetricExpr": "34 * ASSISTS.ANY / tma_info_thread_slots",
         "MetricGroup": "TopdownL4;tma_L4_group;tma_microcode_sequencer_group",
         "MetricName": "tma_assists",
         "MetricThreshold": "tma_assists > 0.1 & (tma_microcode_sequencer > 0.05 & tma_heavy_operations > 0.1)",
     {
         "BriefDescription": "This category represents fraction of slots where no uops are being delivered due to a lack of required resources for accepting new uops in the Backend",
         "DefaultMetricgroupName": "TopdownL1",
-        "MetricExpr": "topdown\\-be\\-bound / (topdown\\-fe\\-bound + topdown\\-bad\\-spec + topdown\\-retiring + topdown\\-be\\-bound) + 5 * cpu@INT_MISC.RECOVERY_CYCLES\\,cmask\\=1\\,edge@ / tma_info_thread_slots",
+        "MetricExpr": "topdown\\-be\\-bound / (topdown\\-fe\\-bound + topdown\\-bad\\-spec + topdown\\-retiring + topdown\\-be\\-bound) + 5 * INT_MISC.CLEARS_COUNT / tma_info_thread_slots",
         "MetricGroup": "Default;TmaL1;TopdownL1;tma_L1_group",
         "MetricName": "tma_backend_bound",
         "MetricThreshold": "tma_backend_bound > 0.2",
     {
         "BriefDescription": "This metric represents fraction of slots where the CPU was retiring branch instructions.",
         "MetricExpr": "tma_light_operations * BR_INST_RETIRED.ALL_BRANCHES / (tma_retiring * tma_info_thread_slots)",
-        "MetricGroup": "Pipeline;TopdownL3;tma_L3_group;tma_light_operations_group",
+        "MetricGroup": "Branches;Pipeline;TopdownL3;tma_L3_group;tma_light_operations_group",
         "MetricName": "tma_branch_instructions",
         "MetricThreshold": "tma_branch_instructions > 0.1 & tma_light_operations > 0.6",
         "ScaleUnit": "100%"
     {
         "BriefDescription": "This metric estimates fraction of cycles while the memory subsystem was handling synchronizations due to contested accesses",
         "MetricConstraint": "NO_GROUP_EVENTS",
-        "MetricExpr": "(44 * tma_info_system_average_frequency * (MEM_LOAD_L3_HIT_RETIRED.XSNP_HITM * (OCR.DEMAND_DATA_RD.L3_HIT.SNOOP_HITM / (OCR.DEMAND_DATA_RD.L3_HIT.SNOOP_HITM + OCR.DEMAND_DATA_RD.L3_HIT.SNOOP_HIT_WITH_FWD))) + 43.5 * tma_info_system_average_frequency * MEM_LOAD_L3_HIT_RETIRED.XSNP_MISS) * (1 + MEM_LOAD_RETIRED.FB_HIT / MEM_LOAD_RETIRED.L1_MISS / 2) / tma_info_thread_clks",
+        "MetricExpr": "(44 * tma_info_system_core_frequency * (MEM_LOAD_L3_HIT_RETIRED.XSNP_HITM * (OCR.DEMAND_DATA_RD.L3_HIT.SNOOP_HITM / (OCR.DEMAND_DATA_RD.L3_HIT.SNOOP_HITM + OCR.DEMAND_DATA_RD.L3_HIT.SNOOP_HIT_WITH_FWD))) + 43.5 * tma_info_system_core_frequency * MEM_LOAD_L3_HIT_RETIRED.XSNP_MISS) * (1 + MEM_LOAD_RETIRED.FB_HIT / MEM_LOAD_RETIRED.L1_MISS / 2) / tma_info_thread_clks",
         "MetricGroup": "DataSharing;Offcore;Snoop;TopdownL4;tma_L4_group;tma_issueSyncxn;tma_l3_bound_group",
         "MetricName": "tma_contested_accesses",
         "MetricThreshold": "tma_contested_accesses > 0.05 & (tma_l3_bound > 0.05 & (tma_memory_bound > 0.2 & tma_backend_bound > 0.2))",
     {
         "BriefDescription": "This metric estimates fraction of cycles while the memory subsystem was handling synchronizations due to data-sharing accesses",
         "MetricConstraint": "NO_GROUP_EVENTS",
-        "MetricExpr": "43.5 * tma_info_system_average_frequency * (MEM_LOAD_L3_HIT_RETIRED.XSNP_HIT + MEM_LOAD_L3_HIT_RETIRED.XSNP_HITM * (1 - OCR.DEMAND_DATA_RD.L3_HIT.SNOOP_HITM / (OCR.DEMAND_DATA_RD.L3_HIT.SNOOP_HITM + OCR.DEMAND_DATA_RD.L3_HIT.SNOOP_HIT_WITH_FWD))) * (1 + MEM_LOAD_RETIRED.FB_HIT / MEM_LOAD_RETIRED.L1_MISS / 2) / tma_info_thread_clks",
+        "MetricExpr": "43.5 * tma_info_system_core_frequency * (MEM_LOAD_L3_HIT_RETIRED.XSNP_HIT + MEM_LOAD_L3_HIT_RETIRED.XSNP_HITM * (1 - OCR.DEMAND_DATA_RD.L3_HIT.SNOOP_HITM / (OCR.DEMAND_DATA_RD.L3_HIT.SNOOP_HITM + OCR.DEMAND_DATA_RD.L3_HIT.SNOOP_HIT_WITH_FWD))) * (1 + MEM_LOAD_RETIRED.FB_HIT / MEM_LOAD_RETIRED.L1_MISS / 2) / tma_info_thread_clks",
         "MetricGroup": "Offcore;Snoop;TopdownL4;tma_L4_group;tma_issueSyncxn;tma_l3_bound_group",
         "MetricName": "tma_data_sharing",
         "MetricThreshold": "tma_data_sharing > 0.05 & (tma_l3_bound > 0.05 & (tma_memory_bound > 0.2 & tma_backend_bound > 0.2))",
         "MetricExpr": "(cpu@INST_DECODED.DECODERS\\,cmask\\=1@ - cpu@INST_DECODED.DECODERS\\,cmask\\=2@) / tma_info_core_core_clks / 2",
         "MetricGroup": "DSBmiss;FetchBW;TopdownL4;tma_L4_group;tma_issueD0;tma_mite_group",
         "MetricName": "tma_decoder0_alone",
-        "MetricThreshold": "tma_decoder0_alone > 0.1 & (tma_mite > 0.1 & (tma_fetch_bandwidth > 0.1 & tma_frontend_bound > 0.15 & tma_info_thread_ipc / 5 > 0.35))",
+        "MetricThreshold": "tma_decoder0_alone > 0.1 & (tma_mite > 0.1 & tma_fetch_bandwidth > 0.2)",
         "PublicDescription": "This metric represents fraction of cycles where decoder-0 was the only active decoder. Related metrics: tma_few_uops_instructions",
         "ScaleUnit": "100%"
     },
         "MetricExpr": "(IDQ.DSB_CYCLES_ANY - IDQ.DSB_CYCLES_OK) / tma_info_core_core_clks / 2",
         "MetricGroup": "DSB;FetchBW;TopdownL3;tma_L3_group;tma_fetch_bandwidth_group",
         "MetricName": "tma_dsb",
-        "MetricThreshold": "tma_dsb > 0.15 & (tma_fetch_bandwidth > 0.1 & tma_frontend_bound > 0.15 & tma_info_thread_ipc / 5 > 0.35)",
+        "MetricThreshold": "tma_dsb > 0.15 & tma_fetch_bandwidth > 0.2",
         "PublicDescription": "This metric represents Core fraction of cycles in which CPU was likely limited due to DSB (decoded uop cache) fetch pipeline.  For example; inefficient utilization of the DSB cache structure or bank conflict when reading from it; are categorized here.",
         "ScaleUnit": "100%"
     },
         "MetricGroup": "MemoryTLB;TopdownL4;tma_L4_group;tma_issueTLB;tma_l1_bound_group",
         "MetricName": "tma_dtlb_load",
         "MetricThreshold": "tma_dtlb_load > 0.1 & (tma_l1_bound > 0.1 & (tma_memory_bound > 0.2 & tma_backend_bound > 0.2))",
-        "PublicDescription": "This metric roughly estimates the fraction of cycles where the Data TLB (DTLB) was missed by load accesses. TLBs (Translation Look-aside Buffers) are processor caches for recently used entries out of the Page Tables that are used to map virtual- to physical-addresses by the operating system. This metric approximates the potential delay of demand loads missing the first-level data TLB (assuming worst case scenario with back to back misses to different pages). This includes hitting in the second-level TLB (STLB) as well as performing a hardware page walk on an STLB miss. Sample with: MEM_INST_RETIRED.STLB_MISS_LOADS_PS. Related metrics: tma_dtlb_store, tma_info_bottleneck_memory_data_tlbs",
+        "PublicDescription": "This metric roughly estimates the fraction of cycles where the Data TLB (DTLB) was missed by load accesses. TLBs (Translation Look-aside Buffers) are processor caches for recently used entries out of the Page Tables that are used to map virtual- to physical-addresses by the operating system. This metric approximates the potential delay of demand loads missing the first-level data TLB (assuming worst case scenario with back to back misses to different pages). This includes hitting in the second-level TLB (STLB) as well as performing a hardware page walk on an STLB miss. Sample with: MEM_INST_RETIRED.STLB_MISS_LOADS_PS. Related metrics: tma_dtlb_store, tma_info_bottleneck_memory_data_tlbs, tma_info_bottleneck_memory_synchronization",
         "ScaleUnit": "100%"
     },
     {
         "MetricGroup": "MemoryTLB;TopdownL4;tma_L4_group;tma_issueTLB;tma_store_bound_group",
         "MetricName": "tma_dtlb_store",
         "MetricThreshold": "tma_dtlb_store > 0.05 & (tma_store_bound > 0.2 & (tma_memory_bound > 0.2 & tma_backend_bound > 0.2))",
-        "PublicDescription": "This metric roughly estimates the fraction of cycles spent handling first-level data TLB store misses.  As with ordinary data caching; focus on improving data locality and reducing working-set size to reduce DTLB overhead.  Additionally; consider using profile-guided optimization (PGO) to collocate frequently-used data on the same page.  Try using larger page sizes for large amounts of frequently-used data. Sample with: MEM_INST_RETIRED.STLB_MISS_STORES_PS. Related metrics: tma_dtlb_load, tma_info_bottleneck_memory_data_tlbs",
+        "PublicDescription": "This metric roughly estimates the fraction of cycles spent handling first-level data TLB store misses.  As with ordinary data caching; focus on improving data locality and reducing working-set size to reduce DTLB overhead.  Additionally; consider using profile-guided optimization (PGO) to collocate frequently-used data on the same page.  Try using larger page sizes for large amounts of frequently-used data. Sample with: MEM_INST_RETIRED.STLB_MISS_STORES_PS. Related metrics: tma_dtlb_load, tma_info_bottleneck_memory_data_tlbs, tma_info_bottleneck_memory_synchronization",
         "ScaleUnit": "100%"
     },
     {
         "BriefDescription": "This metric roughly estimates how often CPU was handling synchronizations due to False Sharing",
-        "MetricExpr": "48 * tma_info_system_average_frequency * OCR.DEMAND_RFO.L3_HIT.SNOOP_HITM / tma_info_thread_clks",
+        "MetricExpr": "48 * tma_info_system_core_frequency * OCR.DEMAND_RFO.L3_HIT.SNOOP_HITM / tma_info_thread_clks",
         "MetricGroup": "DataSharing;Offcore;Snoop;TopdownL4;tma_L4_group;tma_issueSyncxn;tma_store_bound_group",
         "MetricName": "tma_false_sharing",
         "MetricThreshold": "tma_false_sharing > 0.05 & (tma_store_bound > 0.2 & (tma_memory_bound > 0.2 & tma_backend_bound > 0.2))",
         "MetricGroup": "MemoryBW;TopdownL4;tma_L4_group;tma_issueBW;tma_issueSL;tma_issueSmSt;tma_l1_bound_group",
         "MetricName": "tma_fb_full",
         "MetricThreshold": "tma_fb_full > 0.3",
-        "PublicDescription": "This metric does a *rough estimation* of how often L1D Fill Buffer unavailability limited additional L1D miss memory access requests to proceed. The higher the metric value; the deeper the memory hierarchy level the misses are satisfied from (metric values >1 are valid). Often it hints on approaching bandwidth limits (to L2 cache; L3 cache or external memory). Related metrics: tma_info_bottleneck_memory_bandwidth, tma_info_system_dram_bw_use, tma_mem_bandwidth, tma_sq_full, tma_store_latency, tma_streaming_stores",
+        "PublicDescription": "This metric does a *rough estimation* of how often L1D Fill Buffer unavailability limited additional L1D miss memory access requests to proceed. The higher the metric value; the deeper the memory hierarchy level the misses are satisfied from (metric values >1 are valid). Often it hints on approaching bandwidth limits (to L2 cache; L3 cache or external memory). Related metrics: tma_info_bottleneck_cache_memory_bandwidth, tma_info_system_dram_bw_use, tma_mem_bandwidth, tma_sq_full, tma_store_latency, tma_streaming_stores",
         "ScaleUnit": "100%"
     },
     {
         "MetricExpr": "max(0, tma_frontend_bound - tma_fetch_latency)",
         "MetricGroup": "FetchBW;Frontend;TmaL2;TopdownL2;tma_L2_group;tma_frontend_bound_group;tma_issueFB",
         "MetricName": "tma_fetch_bandwidth",
-        "MetricThreshold": "tma_fetch_bandwidth > 0.1 & tma_frontend_bound > 0.15 & tma_info_thread_ipc / 5 > 0.35",
+        "MetricThreshold": "tma_fetch_bandwidth > 0.2",
         "MetricgroupNoGroup": "TopdownL2",
         "PublicDescription": "This metric represents fraction of slots the CPU was stalled due to Frontend bandwidth issues.  For example; inefficiencies at the instruction decoders; or restrictions for caching in the DSB (decoded uops cache) are categorized under Fetch Bandwidth. In such cases; the Frontend typically delivers suboptimal amount of uops to the Backend. Sample with: FRONTEND_RETIRED.LATENCY_GE_2_BUBBLES_GE_1_PS;FRONTEND_RETIRED.LATENCY_GE_1_PS;FRONTEND_RETIRED.LATENCY_GE_2_PS. Related metrics: tma_dsb_switches, tma_info_botlnk_l2_dsb_misses, tma_info_frontend_dsb_coverage, tma_info_inst_mix_iptb, tma_lcp",
         "ScaleUnit": "100%"
         "PublicDescription": "This metric represents overall arithmetic floating-point (FP) operations fraction the CPU has executed (retired). Note this metric's value may exceed its parent due to use of \"Uops\" CountDomain and FMA double-counting.",
         "ScaleUnit": "100%"
     },
+    {
+        "BriefDescription": "This metric roughly estimates fraction of slots the CPU retired uops as a result of handing Floating Point (FP) Assists",
+        "MetricExpr": "34 * ASSISTS.FP / tma_info_thread_slots",
+        "MetricGroup": "HPC;TopdownL5;tma_L5_group;tma_assists_group",
+        "MetricName": "tma_fp_assists",
+        "MetricThreshold": "tma_fp_assists > 0.1",
+        "PublicDescription": "This metric roughly estimates fraction of slots the CPU retired uops as a result of handing Floating Point (FP) Assists. FP Assist may apply when working with very small floating point values (so-called Denormals).",
+        "ScaleUnit": "100%"
+    },
     {
         "BriefDescription": "This metric approximates arithmetic floating-point (FP) scalar uops fraction the CPU has retired",
         "MetricExpr": "cpu@FP_ARITH_INST_RETIRED.SCALAR_SINGLE\\,umask\\=0x03@ / (tma_retiring * tma_info_thread_slots)",
         "MetricName": "tma_heavy_operations",
         "MetricThreshold": "tma_heavy_operations > 0.1",
         "MetricgroupNoGroup": "TopdownL2",
-        "PublicDescription": "This metric represents fraction of slots where the CPU was retiring heavy-weight operations -- instructions that require two or more uops or micro-coded sequences. This highly-correlates with the uop length of these instructions/sequences.",
+        "PublicDescription": "This metric represents fraction of slots where the CPU was retiring heavy-weight operations -- instructions that require two or more uops or micro-coded sequences. This highly-correlates with the uop length of these instructions/sequences. ([ICL+] Note this may overcount due to approximation using indirect events; [ADL+] .)",
         "ScaleUnit": "100%"
     },
     {
         "BriefDescription": "This metric represents fraction of cycles the CPU was stalled due to instruction cache misses",
-        "MetricExpr": "ICACHE_16B.IFDATA_STALL / tma_info_thread_clks",
-        "MetricGroup": "BigFoot;FetchLat;IcMiss;TopdownL3;tma_L3_group;tma_fetch_latency_group",
+        "MetricExpr": "ICACHE_DATA.STALLS / tma_info_thread_clks",
+        "MetricGroup": "BigFootprint;FetchLat;IcMiss;TopdownL3;tma_L3_group;tma_fetch_latency_group",
         "MetricName": "tma_icache_misses",
         "MetricThreshold": "tma_icache_misses > 0.05 & (tma_fetch_latency > 0.1 & tma_frontend_bound > 0.15)",
         "PublicDescription": "This metric represents fraction of cycles the CPU was stalled due to instruction cache misses. Sample with: FRONTEND_RETIRED.L2_MISS_PS;FRONTEND_RETIRED.L1I_MISS_PS",
     {
         "BriefDescription": "Branch Misprediction Cost: Fraction of TMA slots wasted per non-speculative branch misprediction (retired JEClear)",
         "MetricConstraint": "NO_GROUP_EVENTS",
-        "MetricExpr": "(tma_branch_mispredicts + tma_fetch_latency * tma_mispredicts_resteers / (tma_branch_resteers + tma_dsb_switches + tma_icache_misses + tma_itlb_misses + tma_lcp + tma_ms_switches)) * tma_info_thread_slots / BR_MISP_RETIRED.ALL_BRANCHES",
+        "MetricExpr": "tma_info_bottleneck_mispredictions * tma_info_thread_slots / BR_MISP_RETIRED.ALL_BRANCHES / 100",
         "MetricGroup": "Bad;BrMispredicts;tma_issueBM",
         "MetricName": "tma_info_bad_spec_branch_misprediction_cost",
         "PublicDescription": "Branch Misprediction Cost: Fraction of TMA slots wasted per non-speculative branch misprediction (retired JEClear). Related metrics: tma_branch_mispredicts, tma_info_bottleneck_mispredictions, tma_mispredicts_resteers"
     },
     {
         "BriefDescription": "Number of Instructions per non-speculative Branch Misprediction (JEClear) (lower number means higher occurrence rate)",
-        "MetricExpr": "tma_info_core_ipmispredict",
+        "MetricExpr": "INST_RETIRED.ANY / BR_MISP_RETIRED.ALL_BRANCHES",
         "MetricGroup": "Bad;BadSpec;BrMispredicts",
         "MetricName": "tma_info_bad_spec_ipmispredict",
         "MetricThreshold": "tma_info_bad_spec_ipmispredict < 200"
     },
+    {
+        "BriefDescription": "Speculative to Retired ratio of all clears (covering mispredicts and nukes)",
+        "MetricExpr": "INT_MISC.CLEARS_COUNT / (BR_MISP_RETIRED.ALL_BRANCHES + MACHINE_CLEARS.COUNT)",
+        "MetricGroup": "BrMispredicts",
+        "MetricName": "tma_info_bad_spec_spec_clears_ratio"
+    },
+    {
+        "BriefDescription": "Probability of Core Bound bottleneck hidden by SMT-profiling artifacts",
+        "MetricExpr": "(100 * (1 - max(0, topdown\\-be\\-bound / (topdown\\-fe\\-bound + topdown\\-bad\\-spec + topdown\\-retiring + topdown\\-be\\-bound) + 5 * INT_MISC.CLEARS_COUNT / slots - (CYCLE_ACTIVITY.STALLS_MEM_ANY + EXE_ACTIVITY.BOUND_ON_STORES) / (CYCLE_ACTIVITY.STALLS_TOTAL + (EXE_ACTIVITY.1_PORTS_UTIL + topdown\\-retiring / (topdown\\-fe\\-bound + topdown\\-bad\\-spec + topdown\\-retiring + topdown\\-be\\-bound) * EXE_ACTIVITY.2_PORTS_UTIL) + EXE_ACTIVITY.BOUND_ON_STORES) * (topdown\\-be\\-bound / (topdown\\-fe\\-bound + topdown\\-bad\\-spec + topdown\\-retiring + topdown\\-be\\-bound) + 5 * INT_MISC.CLEARS_COUNT / slots)) / (((cpu@EXE_ACTIVITY.3_PORTS_UTIL\\,umask\\=0x80@ + max(0, topdown\\-be\\-bound / (topdown\\-fe\\-bound + topdown\\-bad\\-spec + topdown\\-retiring + topdown\\-be\\-bound) + 5 * INT_MISC.CLEARS_COUNT / slots - (CYCLE_ACTIVITY.STALLS_MEM_ANY + EXE_ACTIVITY.BOUND_ON_STORES) / (CYCLE_ACTIVITY.STALLS_TOTAL + (EXE_ACTIVITY.1_PORTS_UTIL + topdown\\-retiring / (topdown\\-fe\\-bound + topdown\\-bad\\-spec + topdown\\-retiring + topdown\\-be\\-bound) * EXE_ACTIVITY.2_PORTS_UTIL) + EXE_ACTIVITY.BOUND_ON_STORES) * (topdown\\-be\\-bound / (topdown\\-fe\\-bound + topdown\\-bad\\-spec + topdown\\-retiring + topdown\\-be\\-bound) + 5 * INT_MISC.CLEARS_COUNT / slots)) * RS_EVENTS.EMPTY_CYCLES) / CPU_CLK_UNHALTED.THREAD * (CYCLE_ACTIVITY.STALLS_TOTAL - CYCLE_ACTIVITY.STALLS_MEM_ANY) / CPU_CLK_UNHALTED.THREAD * CPU_CLK_UNHALTED.THREAD + (EXE_ACTIVITY.1_PORTS_UTIL + topdown\\-retiring / (topdown\\-fe\\-bound + topdown\\-bad\\-spec + topdown\\-retiring + topdown\\-be\\-bound) * EXE_ACTIVITY.2_PORTS_UTIL)) / CPU_CLK_UNHALTED.THREAD if ARITH.DIVIDER_ACTIVE < CYCLE_ACTIVITY.STALLS_TOTAL - CYCLE_ACTIVITY.STALLS_MEM_ANY else (EXE_ACTIVITY.1_PORTS_UTIL + topdown\\-retiring / (topdown\\-fe\\-bound + topdown\\-bad\\-spec + topdown\\-retiring + topdown\\-be\\-bound) * EXE_ACTIVITY.2_PORTS_UTIL) / CPU_CLK_UNHALTED.THREAD) if max(0, topdown\\-be\\-bound / (topdown\\-fe\\-bound + topdown\\-bad\\-spec + topdown\\-retiring + topdown\\-be\\-bound) + 5 * INT_MISC.CLEARS_COUNT / slots - (CYCLE_ACTIVITY.STALLS_MEM_ANY + EXE_ACTIVITY.BOUND_ON_STORES) / (CYCLE_ACTIVITY.STALLS_TOTAL + (EXE_ACTIVITY.1_PORTS_UTIL + topdown\\-retiring / (topdown\\-fe\\-bound + topdown\\-bad\\-spec + topdown\\-retiring + topdown\\-be\\-bound) * EXE_ACTIVITY.2_PORTS_UTIL) + EXE_ACTIVITY.BOUND_ON_STORES) * (topdown\\-be\\-bound / (topdown\\-fe\\-bound + topdown\\-bad\\-spec + topdown\\-retiring + topdown\\-be\\-bound) + 5 * INT_MISC.CLEARS_COUNT / slots)) < (((cpu@EXE_ACTIVITY.3_PORTS_UTIL\\,umask\\=0x80@ + max(0, topdown\\-be\\-bound / (topdown\\-fe\\-bound + topdown\\-bad\\-spec + topdown\\-retiring + topdown\\-be\\-bound) + 5 * INT_MISC.CLEARS_COUNT / slots - (CYCLE_ACTIVITY.STALLS_MEM_ANY + EXE_ACTIVITY.BOUND_ON_STORES) / (CYCLE_ACTIVITY.STALLS_TOTAL + (EXE_ACTIVITY.1_PORTS_UTIL + topdown\\-retiring / (topdown\\-fe\\-bound + topdown\\-bad\\-spec + topdown\\-retiring + topdown\\-be\\-bound) * EXE_ACTIVITY.2_PORTS_UTIL) + EXE_ACTIVITY.BOUND_ON_STORES) * (topdown\\-be\\-bound / (topdown\\-fe\\-bound + topdown\\-bad\\-spec + topdown\\-retiring + topdown\\-be\\-bound) + 5 * INT_MISC.CLEARS_COUNT / slots)) * RS_EVENTS.EMPTY_CYCLES) / CPU_CLK_UNHALTED.THREAD * (CYCLE_ACTIVITY.STALLS_TOTAL - CYCLE_ACTIVITY.STALLS_MEM_ANY) / CPU_CLK_UNHALTED.THREAD * CPU_CLK_UNHALTED.THREAD + (EXE_ACTIVITY.1_PORTS_UTIL + topdown\\-retiring / (topdown\\-fe\\-bound + topdown\\-bad\\-spec + topdown\\-retiring + topdown\\-be\\-bound) * EXE_ACTIVITY.2_PORTS_UTIL)) / CPU_CLK_UNHALTED.THREAD if ARITH.DIVIDER_ACTIVE < CYCLE_ACTIVITY.STALLS_TOTAL - CYCLE_ACTIVITY.STALLS_MEM_ANY else (EXE_ACTIVITY.1_PORTS_UTIL + topdown\\-retiring / (topdown\\-fe\\-bound + topdown\\-bad\\-spec + topdown\\-retiring + topdown\\-be\\-bound) * EXE_ACTIVITY.2_PORTS_UTIL) / CPU_CLK_UNHALTED.THREAD) else 1) if tma_info_system_smt_2t_utilization > 0.5 else 0)",
+        "MetricGroup": "Cor;SMT;TopdownL1;tma_L1_group",
+        "MetricName": "tma_info_botlnk_core_bound_likely",
+        "MetricgroupNoGroup": "TopdownL1"
+    },
+    {
+        "BriefDescription": "Total pipeline cost of DSB (uop cache) misses - subset of the Instruction_Fetch_BW Bottleneck.",
+        "MetricExpr": "100 * (100 * ((5 * IDQ_UOPS_NOT_DELIVERED.CYCLES_0_UOPS_DELIV.CORE - INT_MISC.UOP_DROPPING) / slots * (DSB2MITE_SWITCHES.PENALTY_CYCLES / CPU_CLK_UNHALTED.THREAD) / (ICACHE_DATA.STALLS / CPU_CLK_UNHALTED.THREAD + ICACHE_TAG.STALLS / CPU_CLK_UNHALTED.THREAD + (INT_MISC.CLEAR_RESTEER_CYCLES / CPU_CLK_UNHALTED.THREAD + 10 * BACLEARS.ANY / CPU_CLK_UNHALTED.THREAD) + min(3 * IDQ.MS_SWITCHES / CPU_CLK_UNHALTED.THREAD, 1) + DECODE.LCP / CPU_CLK_UNHALTED.THREAD + DSB2MITE_SWITCHES.PENALTY_CYCLES / CPU_CLK_UNHALTED.THREAD) + max(0, topdown\\-fe\\-bound / (topdown\\-fe\\-bound + topdown\\-bad\\-spec + topdown\\-retiring + topdown\\-be\\-bound) - INT_MISC.UOP_DROPPING / slots - (5 * IDQ_UOPS_NOT_DELIVERED.CYCLES_0_UOPS_DELIV.CORE - INT_MISC.UOP_DROPPING) / slots) * ((IDQ.MITE_CYCLES_ANY - IDQ.MITE_CYCLES_OK) / (CPU_CLK_UNHALTED.DISTRIBUTED if #SMT_on else CPU_CLK_UNHALTED.THREAD) / 2) / ((IDQ.MITE_CYCLES_ANY - IDQ.MITE_CYCLES_OK) / (CPU_CLK_UNHALTED.DISTRIBUTED if #SMT_on else CPU_CLK_UNHALTED.THREAD) / 2 + (IDQ.DSB_CYCLES_ANY - IDQ.DSB_CYCLES_OK) / (CPU_CLK_UNHALTED.DISTRIBUTED if #SMT_on else CPU_CLK_UNHALTED.THREAD) / 2)))",
+        "MetricGroup": "DSBmiss;Fed;TopdownL1;tma_L1_group",
+        "MetricName": "tma_info_botlnk_dsb_misses",
+        "MetricgroupNoGroup": "TopdownL1"
+    },
+    {
+        "BriefDescription": "Total pipeline cost of Instruction Cache misses - subset of the Big_Code Bottleneck.",
+        "MetricExpr": "100 * (100 * ((5 * IDQ_UOPS_NOT_DELIVERED.CYCLES_0_UOPS_DELIV.CORE - INT_MISC.UOP_DROPPING) / slots * (ICACHE_DATA.STALLS / CPU_CLK_UNHALTED.THREAD) / (ICACHE_DATA.STALLS / CPU_CLK_UNHALTED.THREAD + ICACHE_TAG.STALLS / CPU_CLK_UNHALTED.THREAD + (INT_MISC.CLEAR_RESTEER_CYCLES / CPU_CLK_UNHALTED.THREAD + 10 * BACLEARS.ANY / CPU_CLK_UNHALTED.THREAD) + min(3 * IDQ.MS_SWITCHES / CPU_CLK_UNHALTED.THREAD, 1) + DECODE.LCP / CPU_CLK_UNHALTED.THREAD + DSB2MITE_SWITCHES.PENALTY_CYCLES / CPU_CLK_UNHALTED.THREAD)))",
+        "MetricGroup": "Fed;FetchLat;IcMiss;TopdownL1;tma_L1_group",
+        "MetricName": "tma_info_botlnk_ic_misses",
+        "MetricgroupNoGroup": "TopdownL1"
+    },
     {
         "BriefDescription": "Probability of Core Bound bottleneck hidden by SMT-profiling artifacts",
         "MetricConstraint": "NO_GROUP_EVENTS",
         "MetricThreshold": "tma_info_botlnk_l2_ic_misses > 5",
         "PublicDescription": "Total pipeline cost of Instruction Cache misses - subset of the Big_Code Bottleneck. Related metrics: "
     },
+    {
+        "BriefDescription": "Total pipeline cost of \"useful operations\" - the baseline operations not covered by Branching_Overhead nor Irregular_Overhead.",
+        "MetricExpr": "100 * (tma_retiring - (BR_INST_RETIRED.ALL_BRANCHES + BR_INST_RETIRED.NEAR_CALL) / tma_info_thread_slots - tma_microcode_sequencer / (tma_few_uops_instructions + tma_microcode_sequencer) * (tma_assists / tma_microcode_sequencer) * tma_heavy_operations)",
+        "MetricGroup": "Ret",
+        "MetricName": "tma_info_bottleneck_base_non_br",
+        "MetricThreshold": "tma_info_bottleneck_base_non_br > 20"
+    },
     {
         "BriefDescription": "Total pipeline cost of instruction fetch related bottlenecks by large code footprint programs (i-side cache; TLB and BTB misses)",
         "MetricConstraint": "NO_GROUP_EVENTS",
         "MetricExpr": "100 * tma_fetch_latency * (tma_itlb_misses + tma_icache_misses + tma_unknown_branches) / (tma_branch_resteers + tma_dsb_switches + tma_icache_misses + tma_itlb_misses + tma_lcp + tma_ms_switches)",
-        "MetricGroup": "BigFoot;Fed;Frontend;IcMiss;MemoryTLB;tma_issueBC",
+        "MetricGroup": "BigFootprint;Fed;Frontend;IcMiss;MemoryTLB",
         "MetricName": "tma_info_bottleneck_big_code",
-        "MetricThreshold": "tma_info_bottleneck_big_code > 20",
-        "PublicDescription": "Total pipeline cost of instruction fetch related bottlenecks by large code footprint programs (i-side cache; TLB and BTB misses). Related metrics: tma_info_bottleneck_branching_overhead"
+        "MetricThreshold": "tma_info_bottleneck_big_code > 20"
     },
     {
         "BriefDescription": "Total pipeline cost of branch related instructions (used for program control-flow including function calls)",
-        "MetricExpr": "100 * ((BR_INST_RETIRED.COND + 3 * BR_INST_RETIRED.NEAR_CALL + (BR_INST_RETIRED.NEAR_TAKEN - BR_INST_RETIRED.COND_TAKEN - 2 * BR_INST_RETIRED.NEAR_CALL)) / tma_info_thread_slots)",
-        "MetricGroup": "Ret;tma_issueBC",
+        "MetricExpr": "100 * ((BR_INST_RETIRED.ALL_BRANCHES + BR_INST_RETIRED.NEAR_CALL) / tma_info_thread_slots)",
+        "MetricGroup": "Ret",
         "MetricName": "tma_info_bottleneck_branching_overhead",
-        "MetricThreshold": "tma_info_bottleneck_branching_overhead > 10",
-        "PublicDescription": "Total pipeline cost of branch related instructions (used for program control-flow including function calls). Related metrics: tma_info_bottleneck_big_code"
+        "MetricThreshold": "tma_info_bottleneck_branching_overhead > 5"
+    },
+    {
+        "BriefDescription": "Total pipeline cost of external Memory- or Cache-Bandwidth related bottlenecks",
+        "MetricExpr": "100 * (tma_memory_bound * (tma_dram_bound / (tma_dram_bound + tma_l1_bound + tma_l2_bound + tma_l3_bound + tma_pmm_bound + tma_store_bound)) * (tma_mem_bandwidth / (tma_mem_bandwidth + tma_mem_latency)) + tma_memory_bound * (tma_l3_bound / (tma_dram_bound + tma_l1_bound + tma_l2_bound + tma_l3_bound + tma_pmm_bound + tma_store_bound)) * (tma_sq_full / (tma_contested_accesses + tma_data_sharing + tma_l3_hit_latency + tma_sq_full)) + tma_memory_bound * (tma_l1_bound / (tma_dram_bound + tma_l1_bound + tma_l2_bound + tma_l3_bound + tma_pmm_bound + tma_store_bound)) * (tma_fb_full / (tma_4k_aliasing + tma_dtlb_load + tma_fb_full + tma_lock_latency + tma_split_loads + tma_store_fwd_blk)))",
+        "MetricGroup": "Mem;MemoryBW;Offcore;tma_issueBW",
+        "MetricName": "tma_info_bottleneck_cache_memory_bandwidth",
+        "MetricThreshold": "tma_info_bottleneck_cache_memory_bandwidth > 20",
+        "PublicDescription": "Total pipeline cost of external Memory- or Cache-Bandwidth related bottlenecks. Related metrics: tma_fb_full, tma_info_system_dram_bw_use, tma_mem_bandwidth, tma_sq_full"
+    },
+    {
+        "BriefDescription": "Total pipeline cost of external Memory- or Cache-Latency related bottlenecks",
+        "MetricExpr": "100 * (tma_memory_bound * (tma_dram_bound / (tma_dram_bound + tma_l1_bound + tma_l2_bound + tma_l3_bound + tma_pmm_bound + tma_store_bound)) * (tma_mem_latency / (tma_mem_bandwidth + tma_mem_latency)) + tma_memory_bound * (tma_l3_bound / (tma_dram_bound + tma_l1_bound + tma_l2_bound + tma_l3_bound + tma_pmm_bound + tma_store_bound)) * (tma_l3_hit_latency / (tma_contested_accesses + tma_data_sharing + tma_l3_hit_latency + tma_sq_full)) + tma_memory_bound * tma_l2_bound / (tma_dram_bound + tma_l1_bound + tma_l2_bound + tma_l3_bound + tma_pmm_bound + tma_store_bound) + tma_memory_bound * (tma_store_bound / (tma_dram_bound + tma_l1_bound + tma_l2_bound + tma_l3_bound + tma_pmm_bound + tma_store_bound)) * (tma_store_latency / (tma_dtlb_store + tma_false_sharing + tma_split_stores + tma_store_latency + tma_streaming_stores)))",
+        "MetricGroup": "Mem;MemoryLat;Offcore;tma_issueLat",
+        "MetricName": "tma_info_bottleneck_cache_memory_latency",
+        "MetricThreshold": "tma_info_bottleneck_cache_memory_latency > 20",
+        "PublicDescription": "Total pipeline cost of external Memory- or Cache-Latency related bottlenecks. Related metrics: tma_l3_hit_latency, tma_mem_latency"
+    },
+    {
+        "BriefDescription": "Total pipeline cost when the execution is compute-bound - an estimation",
+        "MetricExpr": "100 * (tma_core_bound * tma_divider / (tma_divider + tma_ports_utilization + tma_serializing_operation) + tma_core_bound * (tma_ports_utilization / (tma_divider + tma_ports_utilization + tma_serializing_operation)) * (tma_ports_utilized_3m / (tma_ports_utilized_0 + tma_ports_utilized_1 + tma_ports_utilized_2 + tma_ports_utilized_3m)))",
+        "MetricGroup": "Cor;tma_issueComp",
+        "MetricName": "tma_info_bottleneck_compute_bound_est",
+        "MetricThreshold": "tma_info_bottleneck_compute_bound_est > 20",
+        "PublicDescription": "Total pipeline cost when the execution is compute-bound - an estimation. Covers Core Bound when High ILP as well as when long-latency execution units are busy. Related metrics: "
     },
     {
         "BriefDescription": "Total pipeline cost of instruction fetch bandwidth related bottlenecks",
         "MetricConstraint": "NO_GROUP_EVENTS",
-        "MetricExpr": "100 * (tma_frontend_bound - tma_fetch_latency * tma_mispredicts_resteers / (tma_branch_resteers + tma_dsb_switches + tma_icache_misses + tma_itlb_misses + tma_lcp + tma_ms_switches)) - tma_info_bottleneck_big_code",
+        "MetricExpr": "100 * (tma_frontend_bound - (1 - 10 * tma_microcode_sequencer * tma_other_mispredicts / tma_branch_mispredicts) * tma_fetch_latency * tma_mispredicts_resteers / (tma_branch_resteers + tma_dsb_switches + tma_icache_misses + tma_itlb_misses + tma_lcp + tma_ms_switches) - tma_microcode_sequencer / (tma_few_uops_instructions + tma_microcode_sequencer) * (tma_assists / tma_microcode_sequencer) * tma_fetch_latency * (tma_ms_switches + tma_branch_resteers * (tma_clears_resteers + tma_mispredicts_resteers * (10 * tma_microcode_sequencer * tma_other_mispredicts / tma_branch_mispredicts)) / (tma_clears_resteers + tma_mispredicts_resteers + tma_unknown_branches)) / (tma_branch_resteers + tma_dsb_switches + tma_icache_misses + tma_itlb_misses + tma_lcp + tma_ms_switches)) - tma_info_bottleneck_big_code",
         "MetricGroup": "Fed;FetchBW;Frontend",
         "MetricName": "tma_info_bottleneck_instruction_fetch_bw",
         "MetricThreshold": "tma_info_bottleneck_instruction_fetch_bw > 20"
     },
     {
-        "BriefDescription": "Total pipeline cost of (external) Memory Bandwidth related bottlenecks",
-        "MetricConstraint": "NO_GROUP_EVENTS",
-        "MetricExpr": "100 * tma_memory_bound * (tma_dram_bound / (tma_dram_bound + tma_l1_bound + tma_l2_bound + tma_l3_bound + tma_pmm_bound + tma_store_bound) * (tma_mem_bandwidth / (tma_mem_bandwidth + tma_mem_latency)) + tma_l3_bound / (tma_dram_bound + tma_l1_bound + tma_l2_bound + tma_l3_bound + tma_pmm_bound + tma_store_bound) * (tma_sq_full / (tma_contested_accesses + tma_data_sharing + tma_l3_hit_latency + tma_sq_full))) + tma_l1_bound / (tma_dram_bound + tma_l1_bound + tma_l2_bound + tma_l3_bound + tma_pmm_bound + tma_store_bound) * (tma_fb_full / (tma_4k_aliasing + tma_dtlb_load + tma_fb_full + tma_lock_latency + tma_split_loads + tma_store_fwd_blk))",
-        "MetricGroup": "Mem;MemoryBW;Offcore;tma_issueBW",
-        "MetricName": "tma_info_bottleneck_memory_bandwidth",
-        "MetricThreshold": "tma_info_bottleneck_memory_bandwidth > 20",
-        "PublicDescription": "Total pipeline cost of (external) Memory Bandwidth related bottlenecks. Related metrics: tma_fb_full, tma_info_system_dram_bw_use, tma_mem_bandwidth, tma_sq_full"
+        "BriefDescription": "Total pipeline cost of irregular execution (e.g",
+        "MetricExpr": "100 * (tma_microcode_sequencer / (tma_few_uops_instructions + tma_microcode_sequencer) * (tma_assists / tma_microcode_sequencer) * tma_fetch_latency * (tma_ms_switches + tma_branch_resteers * (tma_clears_resteers + tma_mispredicts_resteers * (10 * tma_microcode_sequencer * tma_other_mispredicts / tma_branch_mispredicts)) / (tma_clears_resteers + tma_mispredicts_resteers + tma_unknown_branches)) / (tma_branch_resteers + tma_dsb_switches + tma_icache_misses + tma_itlb_misses + tma_lcp + tma_ms_switches) + 10 * tma_microcode_sequencer * tma_other_mispredicts / tma_branch_mispredicts * tma_branch_mispredicts + tma_machine_clears * tma_other_nukes / tma_other_nukes + tma_core_bound * (tma_serializing_operation + tma_core_bound * RS_EVENTS.EMPTY_CYCLES / tma_info_thread_clks * tma_ports_utilized_0) / (tma_divider + tma_ports_utilization + tma_serializing_operation) + tma_microcode_sequencer / (tma_few_uops_instructions + tma_microcode_sequencer) * (tma_assists / tma_microcode_sequencer) * tma_heavy_operations)",
+        "MetricGroup": "Bad;Cor;Ret;tma_issueMS",
+        "MetricName": "tma_info_bottleneck_irregular_overhead",
+        "MetricThreshold": "tma_info_bottleneck_irregular_overhead > 10",
+        "PublicDescription": "Total pipeline cost of irregular execution (e.g. FP-assists in HPC, Wait time with work imbalance multithreaded workloads, overhead in system services or virtualized environments). Related metrics: tma_microcode_sequencer, tma_ms_switches"
     },
     {
         "BriefDescription": "Total pipeline cost of Memory Address Translation related bottlenecks (data-side TLBs)",
         "MetricConstraint": "NO_GROUP_EVENTS",
-        "MetricExpr": "100 * tma_memory_bound * (tma_l1_bound / max(tma_memory_bound, tma_dram_bound + tma_l1_bound + tma_l2_bound + tma_l3_bound + tma_pmm_bound + tma_store_bound) * (tma_dtlb_load / max(tma_l1_bound, tma_4k_aliasing + tma_dtlb_load + tma_fb_full + tma_lock_latency + tma_split_loads + tma_store_fwd_blk)) + tma_store_bound / (tma_dram_bound + tma_l1_bound + tma_l2_bound + tma_l3_bound + tma_pmm_bound + tma_store_bound) * (tma_dtlb_store / (tma_dtlb_store + tma_false_sharing + tma_split_stores + tma_store_latency + tma_streaming_stores)))",
+        "MetricExpr": "100 * (tma_memory_bound * (tma_l1_bound / max(tma_memory_bound, tma_dram_bound + tma_l1_bound + tma_l2_bound + tma_l3_bound + tma_pmm_bound + tma_store_bound)) * (tma_dtlb_load / max(tma_l1_bound, tma_4k_aliasing + tma_dtlb_load + tma_fb_full + tma_lock_latency + tma_split_loads + tma_store_fwd_blk)) + tma_memory_bound * (tma_store_bound / (tma_dram_bound + tma_l1_bound + tma_l2_bound + tma_l3_bound + tma_pmm_bound + tma_store_bound)) * (tma_dtlb_store / (tma_dtlb_store + tma_false_sharing + tma_split_stores + tma_store_latency + tma_streaming_stores)))",
         "MetricGroup": "Mem;MemoryTLB;Offcore;tma_issueTLB",
         "MetricName": "tma_info_bottleneck_memory_data_tlbs",
         "MetricThreshold": "tma_info_bottleneck_memory_data_tlbs > 20",
-        "PublicDescription": "Total pipeline cost of Memory Address Translation related bottlenecks (data-side TLBs). Related metrics: tma_dtlb_load, tma_dtlb_store"
+        "PublicDescription": "Total pipeline cost of Memory Address Translation related bottlenecks (data-side TLBs). Related metrics: tma_dtlb_load, tma_dtlb_store, tma_info_bottleneck_memory_synchronization"
     },
     {
-        "BriefDescription": "Total pipeline cost of Memory Latency related bottlenecks (external memory and off-core caches)",
-        "MetricConstraint": "NO_GROUP_EVENTS",
-        "MetricExpr": "100 * tma_memory_bound * (tma_dram_bound / (tma_dram_bound + tma_l1_bound + tma_l2_bound + tma_l3_bound + tma_pmm_bound + tma_store_bound) * (tma_mem_latency / (tma_mem_bandwidth + tma_mem_latency)) + tma_l3_bound / (tma_dram_bound + tma_l1_bound + tma_l2_bound + tma_l3_bound + tma_pmm_bound + tma_store_bound) * (tma_l3_hit_latency / (tma_contested_accesses + tma_data_sharing + tma_l3_hit_latency + tma_sq_full)) + tma_l2_bound / (tma_dram_bound + tma_l1_bound + tma_l2_bound + tma_l3_bound + tma_pmm_bound + tma_store_bound))",
-        "MetricGroup": "Mem;MemoryLat;Offcore;tma_issueLat",
-        "MetricName": "tma_info_bottleneck_memory_latency",
-        "MetricThreshold": "tma_info_bottleneck_memory_latency > 20",
-        "PublicDescription": "Total pipeline cost of Memory Latency related bottlenecks (external memory and off-core caches). Related metrics: tma_l3_hit_latency, tma_mem_latency"
+        "BriefDescription": "Total pipeline cost of Memory Synchronization related bottlenecks (data transfers and coherency updates across processors)",
+        "MetricExpr": "100 * (tma_memory_bound * (tma_dram_bound / (tma_dram_bound + tma_l1_bound + tma_l2_bound + tma_l3_bound + tma_pmm_bound + tma_store_bound) * (tma_mem_latency / (tma_mem_bandwidth + tma_mem_latency)) * tma_remote_cache / (tma_local_mem + tma_remote_cache + tma_remote_mem) + tma_l3_bound / (tma_dram_bound + tma_l1_bound + tma_l2_bound + tma_l3_bound + tma_pmm_bound + tma_store_bound) * (tma_contested_accesses + tma_data_sharing) / (tma_contested_accesses + tma_data_sharing + tma_l3_hit_latency + tma_sq_full) + tma_store_bound / (tma_dram_bound + tma_l1_bound + tma_l2_bound + tma_l3_bound + tma_pmm_bound + tma_store_bound) * tma_false_sharing / (tma_dtlb_store + tma_false_sharing + tma_split_stores + tma_store_latency + tma_streaming_stores - tma_store_latency)) + tma_machine_clears * (1 - tma_other_nukes / tma_other_nukes))",
+        "MetricGroup": "Mem;Offcore;tma_issueTLB",
+        "MetricName": "tma_info_bottleneck_memory_synchronization",
+        "MetricThreshold": "tma_info_bottleneck_memory_synchronization > 10",
+        "PublicDescription": "Total pipeline cost of Memory Synchronization related bottlenecks (data transfers and coherency updates across processors). Related metrics: tma_dtlb_load, tma_dtlb_store, tma_info_bottleneck_memory_data_tlbs"
     },
     {
         "BriefDescription": "Total pipeline cost of Branch Misprediction related bottlenecks",
         "MetricConstraint": "NO_GROUP_EVENTS",
-        "MetricExpr": "100 * (tma_branch_mispredicts + tma_fetch_latency * tma_mispredicts_resteers / (tma_branch_resteers + tma_dsb_switches + tma_icache_misses + tma_itlb_misses + tma_lcp + tma_ms_switches))",
+        "MetricExpr": "100 * (1 - 10 * tma_microcode_sequencer * tma_other_mispredicts / tma_branch_mispredicts) * (tma_branch_mispredicts + tma_fetch_latency * tma_mispredicts_resteers / (tma_branch_resteers + tma_dsb_switches + tma_icache_misses + tma_itlb_misses + tma_lcp + tma_ms_switches))",
         "MetricGroup": "Bad;BadSpec;BrMispredicts;tma_issueBM",
         "MetricName": "tma_info_bottleneck_mispredictions",
         "MetricThreshold": "tma_info_bottleneck_mispredictions > 20",
         "PublicDescription": "Total pipeline cost of Branch Misprediction related bottlenecks. Related metrics: tma_branch_mispredicts, tma_info_bad_spec_branch_misprediction_cost, tma_mispredicts_resteers"
     },
+    {
+        "BriefDescription": "Total pipeline cost of remaining bottlenecks (apart from those listed in the Info.Bottlenecks metrics class)",
+        "MetricExpr": "100 - (tma_info_bottleneck_big_code + tma_info_bottleneck_instruction_fetch_bw + tma_info_bottleneck_mispredictions + tma_info_bottleneck_cache_memory_bandwidth + tma_info_bottleneck_cache_memory_latency + tma_info_bottleneck_memory_data_tlbs + tma_info_bottleneck_memory_synchronization + tma_info_bottleneck_compute_bound_est + tma_info_bottleneck_irregular_overhead + tma_info_bottleneck_branching_overhead + tma_info_bottleneck_base_non_br)",
+        "MetricGroup": "Cor;Offcore",
+        "MetricName": "tma_info_bottleneck_other_bottlenecks",
+        "MetricThreshold": "tma_info_bottleneck_other_bottlenecks > 20",
+        "PublicDescription": "Total pipeline cost of remaining bottlenecks (apart from those listed in the Info.Bottlenecks metrics class). Examples include data-dependencies (Core Bound when Low ILP) and other unlisted memory-related stalls."
+    },
     {
         "BriefDescription": "Fraction of branches that are CALL or RET",
         "MetricExpr": "(BR_INST_RETIRED.NEAR_CALL + BR_INST_RETIRED.NEAR_RETURN) / BR_INST_RETIRED.ALL_BRANCHES",
     },
     {
         "BriefDescription": "Core actual clocks when any Logical Processor is active on the Physical Core",
-        "MetricExpr": "CPU_CLK_UNHALTED.DISTRIBUTED",
+        "MetricExpr": "(CPU_CLK_UNHALTED.DISTRIBUTED if #SMT_on else tma_info_thread_clks)",
         "MetricGroup": "SMT",
         "MetricName": "tma_info_core_core_clks"
     },
         "MetricGroup": "Ret;SMT;TmaL1;tma_L1_group",
         "MetricName": "tma_info_core_coreipc"
     },
+    {
+        "BriefDescription": "uops Executed per Cycle",
+        "MetricExpr": "UOPS_EXECUTED.THREAD / tma_info_thread_clks",
+        "MetricGroup": "Power",
+        "MetricName": "tma_info_core_epc"
+    },
     {
         "BriefDescription": "Floating Point Operations Per Cycle",
-        "MetricExpr": "(cpu@FP_ARITH_INST_RETIRED.SCALAR_SINGLE\\,umask\\=0x03@ + 2 * FP_ARITH_INST_RETIRED.128B_PACKED_DOUBLE + 4 * cpu@FP_ARITH_INST_RETIRED.128B_PACKED_SINGLE\\,umask\\=0x18@ + 8 * cpu@FP_ARITH_INST_RETIRED.256B_PACKED_SINGLE\\,umask\\=0x60@ + 16 * FP_ARITH_INST_RETIRED.512B_PACKED_SINGLE) / tma_info_core_core_clks",
+        "MetricExpr": "(FP_ARITH_INST_RETIRED.SCALAR + 2 * FP_ARITH_INST_RETIRED.128B_PACKED_DOUBLE + 4 * FP_ARITH_INST_RETIRED.4_FLOPS + 8 * FP_ARITH_INST_RETIRED.8_FLOPS + 16 * FP_ARITH_INST_RETIRED.512B_PACKED_SINGLE) / tma_info_core_core_clks",
         "MetricGroup": "Flops;Ret",
         "MetricName": "tma_info_core_flopc"
     },
         "PublicDescription": "Actual per-core usage of the Floating Point non-X87 execution units (regardless of precision or vector-width). Values > 1 are possible due to ([BDW+] Fused-Multiply Add (FMA) counting - common; [ADL+] use all of ADD/MUL/FMA in Scalar or 128/256-bit vectors - less common)."
     },
     {
-        "BriefDescription": "Instruction-Level-Parallelism (average number of uops executed when there is execution) per-core",
-        "MetricExpr": "UOPS_EXECUTED.THREAD / (UOPS_EXECUTED.CORE_CYCLES_GE_1 / 2 if #SMT_on else UOPS_EXECUTED.CORE_CYCLES_GE_1)",
+        "BriefDescription": "Instruction-Level-Parallelism (average number of uops executed when there is execution) per thread (logical-processor)",
+        "MetricExpr": "UOPS_EXECUTED.THREAD / cpu@UOPS_EXECUTED.THREAD\\,cmask\\=1@",
         "MetricGroup": "Backend;Cor;Pipeline;PortsUtil",
         "MetricName": "tma_info_core_ilp"
     },
-    {
-        "BriefDescription": "Number of Instructions per non-speculative Branch Misprediction (JEClear)",
-        "MetricExpr": "INST_RETIRED.ANY / BR_MISP_RETIRED.ALL_BRANCHES",
-        "MetricGroup": "Bad;BadSpec;BrMispredicts;TopdownL1;tma_L1_group",
-        "MetricName": "tma_info_core_ipmispredict",
-        "MetricgroupNoGroup": "TopdownL1"
-    },
     {
         "BriefDescription": "Fraction of Uops delivered by the DSB (aka Decoded ICache; or Uop Cache)",
         "MetricExpr": "IDQ.DSB_UOPS / UOPS_ISSUED.ANY",
         "MetricGroup": "Flops;InsType",
         "MetricName": "tma_info_inst_mix_iparith",
         "MetricThreshold": "tma_info_inst_mix_iparith < 10",
-        "PublicDescription": "Instructions per FP Arithmetic instruction (lower number means higher occurrence rate). May undercount due to FMA double counting. Approximated prior to BDW."
+        "PublicDescription": "Instructions per FP Arithmetic instruction (lower number means higher occurrence rate). Values < 1 are possible due to intentional FMA double counting. Approximated prior to BDW."
     },
     {
         "BriefDescription": "Instructions per FP Arithmetic AVX/SSE 128-bit instruction (lower number means higher occurrence rate)",
         "MetricGroup": "Flops;FpVector;InsType",
         "MetricName": "tma_info_inst_mix_iparith_avx128",
         "MetricThreshold": "tma_info_inst_mix_iparith_avx128 < 10",
-        "PublicDescription": "Instructions per FP Arithmetic AVX/SSE 128-bit instruction (lower number means higher occurrence rate). May undercount due to FMA double counting."
+        "PublicDescription": "Instructions per FP Arithmetic AVX/SSE 128-bit instruction (lower number means higher occurrence rate). Values < 1 are possible due to intentional FMA double counting."
     },
     {
         "BriefDescription": "Instructions per FP Arithmetic AVX* 256-bit instruction (lower number means higher occurrence rate)",
         "MetricGroup": "Flops;FpVector;InsType",
         "MetricName": "tma_info_inst_mix_iparith_avx256",
         "MetricThreshold": "tma_info_inst_mix_iparith_avx256 < 10",
-        "PublicDescription": "Instructions per FP Arithmetic AVX* 256-bit instruction (lower number means higher occurrence rate). May undercount due to FMA double counting."
+        "PublicDescription": "Instructions per FP Arithmetic AVX* 256-bit instruction (lower number means higher occurrence rate). Values < 1 are possible due to intentional FMA double counting."
     },
     {
         "BriefDescription": "Instructions per FP Arithmetic AVX 512-bit instruction (lower number means higher occurrence rate)",
         "MetricGroup": "Flops;FpVector;InsType",
         "MetricName": "tma_info_inst_mix_iparith_avx512",
         "MetricThreshold": "tma_info_inst_mix_iparith_avx512 < 10",
-        "PublicDescription": "Instructions per FP Arithmetic AVX 512-bit instruction (lower number means higher occurrence rate). May undercount due to FMA double counting."
+        "PublicDescription": "Instructions per FP Arithmetic AVX 512-bit instruction (lower number means higher occurrence rate). Values < 1 are possible due to intentional FMA double counting."
     },
     {
         "BriefDescription": "Instructions per FP Arithmetic Scalar Double-Precision instruction (lower number means higher occurrence rate)",
         "MetricGroup": "Flops;FpScalar;InsType",
         "MetricName": "tma_info_inst_mix_iparith_scalar_dp",
         "MetricThreshold": "tma_info_inst_mix_iparith_scalar_dp < 10",
-        "PublicDescription": "Instructions per FP Arithmetic Scalar Double-Precision instruction (lower number means higher occurrence rate). May undercount due to FMA double counting."
+        "PublicDescription": "Instructions per FP Arithmetic Scalar Double-Precision instruction (lower number means higher occurrence rate). Values < 1 are possible due to intentional FMA double counting."
     },
     {
         "BriefDescription": "Instructions per FP Arithmetic Scalar Single-Precision instruction (lower number means higher occurrence rate)",
         "MetricGroup": "Flops;FpScalar;InsType",
         "MetricName": "tma_info_inst_mix_iparith_scalar_sp",
         "MetricThreshold": "tma_info_inst_mix_iparith_scalar_sp < 10",
-        "PublicDescription": "Instructions per FP Arithmetic Scalar Single-Precision instruction (lower number means higher occurrence rate). May undercount due to FMA double counting."
+        "PublicDescription": "Instructions per FP Arithmetic Scalar Single-Precision instruction (lower number means higher occurrence rate). Values < 1 are possible due to intentional FMA double counting."
     },
     {
         "BriefDescription": "Instructions per Branch (lower number means higher occurrence rate)",
     },
     {
         "BriefDescription": "Instructions per Floating Point (FP) Operation (lower number means higher occurrence rate)",
-        "MetricExpr": "INST_RETIRED.ANY / (cpu@FP_ARITH_INST_RETIRED.SCALAR_SINGLE\\,umask\\=0x03@ + 2 * FP_ARITH_INST_RETIRED.128B_PACKED_DOUBLE + 4 * cpu@FP_ARITH_INST_RETIRED.128B_PACKED_SINGLE\\,umask\\=0x18@ + 8 * cpu@FP_ARITH_INST_RETIRED.256B_PACKED_SINGLE\\,umask\\=0x60@ + 16 * FP_ARITH_INST_RETIRED.512B_PACKED_SINGLE)",
+        "MetricExpr": "INST_RETIRED.ANY / (FP_ARITH_INST_RETIRED.SCALAR + 2 * FP_ARITH_INST_RETIRED.128B_PACKED_DOUBLE + 4 * FP_ARITH_INST_RETIRED.4_FLOPS + 8 * FP_ARITH_INST_RETIRED.8_FLOPS + 16 * FP_ARITH_INST_RETIRED.512B_PACKED_SINGLE)",
         "MetricGroup": "Flops;InsType",
         "MetricName": "tma_info_inst_mix_ipflop",
         "MetricThreshold": "tma_info_inst_mix_ipflop < 10"
         "MetricName": "tma_info_inst_mix_ipload",
         "MetricThreshold": "tma_info_inst_mix_ipload < 3"
     },
+    {
+        "BriefDescription": "Instructions per PAUSE (lower number means higher occurrence rate)",
+        "MetricExpr": "tma_info_inst_mix_instructions / MISC_RETIRED.PAUSE_INST",
+        "MetricGroup": "Flops;FpVector;InsType",
+        "MetricName": "tma_info_inst_mix_ippause"
+    },
     {
         "BriefDescription": "Instructions per Store (lower number means higher occurrence rate)",
         "MetricExpr": "INST_RETIRED.ANY / MEM_INST_RETIRED.ALL_STORES",
         "MetricThreshold": "tma_info_inst_mix_iptb < 11",
         "PublicDescription": "Instruction per taken branch. Related metrics: tma_dsb_switches, tma_fetch_bandwidth, tma_info_botlnk_l2_dsb_misses, tma_info_frontend_dsb_coverage, tma_lcp"
     },
+    {
+        "BriefDescription": "\"Bus lock\" per kilo instruction",
+        "MetricExpr": "tma_info_memory_mix_bus_lock_pki",
+        "MetricGroup": "Mem;TopdownL1;tma_L1_group",
+        "MetricName": "tma_info_memory_bus_lock_pki",
+        "MetricgroupNoGroup": "TopdownL1"
+    },
+    {
+        "BriefDescription": "STLB (2nd level TLB) code speculative misses per kilo instruction (misses of any page-size that complete the page walk)",
+        "MetricExpr": "tma_info_memory_tlb_code_stlb_mpki",
+        "MetricGroup": "Fed;MemoryTLB;TopdownL1;tma_L1_group",
+        "MetricName": "tma_info_memory_code_stlb_mpki",
+        "MetricgroupNoGroup": "TopdownL1"
+    },
     {
         "BriefDescription": "Average per-core data fill bandwidth to the L1 data cache [GB / sec]",
-        "MetricExpr": "64 * L1D.REPLACEMENT / 1e9 / duration_time",
+        "MetricExpr": "tma_info_memory_l1d_cache_fill_bw",
         "MetricGroup": "Mem;MemoryBW",
-        "MetricName": "tma_info_memory_core_l1d_cache_fill_bw"
+        "MetricName": "tma_info_memory_core_l1d_cache_fill_bw_2t"
     },
     {
         "BriefDescription": "Average per-core data fill bandwidth to the L2 cache [GB / sec]",
-        "MetricExpr": "64 * L2_LINES_IN.ALL / 1e9 / duration_time",
+        "MetricExpr": "tma_info_memory_l2_cache_fill_bw",
         "MetricGroup": "Mem;MemoryBW",
-        "MetricName": "tma_info_memory_core_l2_cache_fill_bw"
+        "MetricName": "tma_info_memory_core_l2_cache_fill_bw_2t"
     },
     {
         "BriefDescription": "Rate of non silent evictions from the L2 cache per Kilo instruction",
     },
     {
         "BriefDescription": "Average per-core data access bandwidth to the L3 cache [GB / sec]",
-        "MetricExpr": "64 * OFFCORE_REQUESTS.ALL_REQUESTS / 1e9 / duration_time",
+        "MetricExpr": "tma_info_memory_l3_cache_access_bw",
         "MetricGroup": "Mem;MemoryBW;Offcore",
-        "MetricName": "tma_info_memory_core_l3_cache_access_bw"
+        "MetricName": "tma_info_memory_core_l3_cache_access_bw_2t"
     },
     {
         "BriefDescription": "Average per-core data fill bandwidth to the L3 cache [GB / sec]",
-        "MetricExpr": "64 * LONGEST_LAT_CACHE.MISS / 1e9 / duration_time",
+        "MetricExpr": "tma_info_memory_l3_cache_fill_bw",
         "MetricGroup": "Mem;MemoryBW",
-        "MetricName": "tma_info_memory_core_l3_cache_fill_bw"
+        "MetricName": "tma_info_memory_core_l3_cache_fill_bw_2t"
+    },
+    {
+        "BriefDescription": "Average Parallel L2 cache miss data reads",
+        "MetricExpr": "tma_info_memory_latency_data_l2_mlp",
+        "MetricGroup": "Memory_BW;Offcore;TopdownL1;tma_L1_group",
+        "MetricName": "tma_info_memory_data_l2_mlp",
+        "MetricgroupNoGroup": "TopdownL1"
     },
     {
         "BriefDescription": "Fill Buffer (FB) hits per kilo instructions for retired demand loads (L1D misses that merge into ongoing miss-handling entries)",
         "MetricExpr": "1e3 * MEM_LOAD_RETIRED.FB_HIT / INST_RETIRED.ANY",
-        "MetricGroup": "CacheMisses;Mem",
+        "MetricGroup": "CacheHits;Mem",
         "MetricName": "tma_info_memory_fb_hpki"
     },
+    {
+        "BriefDescription": "",
+        "MetricExpr": "64 * L1D.REPLACEMENT / 1e9 / duration_time",
+        "MetricGroup": "Mem;MemoryBW",
+        "MetricName": "tma_info_memory_l1d_cache_fill_bw"
+    },
+    {
+        "BriefDescription": "Average per-core data fill bandwidth to the L1 data cache [GB / sec]",
+        "MetricExpr": "64 * L1D.REPLACEMENT / 1e9 / (duration_time * 1e3 / 1e3)",
+        "MetricGroup": "Mem;MemoryBW;TopdownL1;tma_L1_group",
+        "MetricName": "tma_info_memory_l1d_cache_fill_bw_2t",
+        "MetricgroupNoGroup": "TopdownL1"
+    },
     {
         "BriefDescription": "L1 cache true misses per kilo instruction for retired demand loads",
         "MetricExpr": "1e3 * MEM_LOAD_RETIRED.L1_MISS / INST_RETIRED.ANY",
-        "MetricGroup": "CacheMisses;Mem",
+        "MetricGroup": "CacheHits;Mem",
         "MetricName": "tma_info_memory_l1mpki"
     },
     {
         "BriefDescription": "L1 cache true misses per kilo instruction for all demand loads (including speculative)",
         "MetricExpr": "1e3 * L2_RQSTS.ALL_DEMAND_DATA_RD / INST_RETIRED.ANY",
-        "MetricGroup": "CacheMisses;Mem",
+        "MetricGroup": "CacheHits;Mem",
         "MetricName": "tma_info_memory_l1mpki_load"
     },
+    {
+        "BriefDescription": "",
+        "MetricExpr": "64 * L2_LINES_IN.ALL / 1e9 / duration_time",
+        "MetricGroup": "Mem;MemoryBW",
+        "MetricName": "tma_info_memory_l2_cache_fill_bw"
+    },
+    {
+        "BriefDescription": "Average per-core data fill bandwidth to the L2 cache [GB / sec]",
+        "MetricExpr": "64 * L2_LINES_IN.ALL / 1e9 / (duration_time * 1e3 / 1e3)",
+        "MetricGroup": "Mem;MemoryBW;TopdownL1;tma_L1_group",
+        "MetricName": "tma_info_memory_l2_cache_fill_bw_2t",
+        "MetricgroupNoGroup": "TopdownL1"
+    },
+    {
+        "BriefDescription": "Rate of non silent evictions from the L2 cache per Kilo instruction",
+        "MetricExpr": "1e3 * L2_LINES_OUT.NON_SILENT / INST_RETIRED.ANY",
+        "MetricGroup": "L2Evicts;Mem;Server;TopdownL1;tma_L1_group",
+        "MetricName": "tma_info_memory_l2_evictions_nonsilent_pki",
+        "MetricgroupNoGroup": "TopdownL1"
+    },
+    {
+        "BriefDescription": "Rate of silent evictions from the L2 cache per Kilo instruction where the evicted lines are dropped (no writeback to L3 or memory)",
+        "MetricExpr": "1e3 * L2_LINES_OUT.SILENT / INST_RETIRED.ANY",
+        "MetricGroup": "L2Evicts;Mem;Server;TopdownL1;tma_L1_group",
+        "MetricName": "tma_info_memory_l2_evictions_silent_pki",
+        "MetricgroupNoGroup": "TopdownL1"
+    },
     {
         "BriefDescription": "L2 cache hits per kilo instruction for all demand loads  (including speculative)",
         "MetricExpr": "1e3 * L2_RQSTS.DEMAND_DATA_RD_HIT / INST_RETIRED.ANY",
-        "MetricGroup": "CacheMisses;Mem",
+        "MetricGroup": "CacheHits;Mem",
         "MetricName": "tma_info_memory_l2hpki_load"
     },
     {
         "BriefDescription": "L2 cache true misses per kilo instruction for retired demand loads",
         "MetricExpr": "1e3 * MEM_LOAD_RETIRED.L2_MISS / INST_RETIRED.ANY",
-        "MetricGroup": "Backend;CacheMisses;Mem",
+        "MetricGroup": "Backend;CacheHits;Mem",
         "MetricName": "tma_info_memory_l2mpki"
     },
     {
         "BriefDescription": "L2 cache ([RKL+] true) misses per kilo instruction for all request types (including speculative)",
         "MetricExpr": "1e3 * (OFFCORE_REQUESTS.ALL_DATA_RD - OFFCORE_REQUESTS.DEMAND_DATA_RD + L2_RQSTS.ALL_DEMAND_MISS + L2_RQSTS.SWPF_MISS) / tma_info_inst_mix_instructions",
-        "MetricGroup": "CacheMisses;Mem;Offcore",
+        "MetricGroup": "CacheHits;Mem;Offcore",
         "MetricName": "tma_info_memory_l2mpki_all"
     },
     {
         "BriefDescription": "L2 cache ([RKL+] true) misses per kilo instruction for all demand loads  (including speculative)",
         "MetricExpr": "1e3 * L2_RQSTS.DEMAND_DATA_RD_MISS / INST_RETIRED.ANY",
-        "MetricGroup": "CacheMisses;Mem",
+        "MetricGroup": "CacheHits;Mem",
         "MetricName": "tma_info_memory_l2mpki_load"
     },
     {
-        "BriefDescription": "L3 cache true misses per kilo instruction for retired demand loads",
-        "MetricExpr": "1e3 * MEM_LOAD_RETIRED.L3_MISS / INST_RETIRED.ANY",
-        "MetricGroup": "CacheMisses;Mem",
-        "MetricName": "tma_info_memory_l3mpki"
+        "BriefDescription": "",
+        "MetricExpr": "64 * OFFCORE_REQUESTS.ALL_REQUESTS / 1e9 / duration_time",
+        "MetricGroup": "Mem;MemoryBW;Offcore",
+        "MetricName": "tma_info_memory_l3_cache_access_bw"
     },
     {
-        "BriefDescription": "Actual Average Latency for L1 data-cache miss demand load operations (in core cycles)",
-        "MetricExpr": "L1D_PEND_MISS.PENDING / (MEM_LOAD_RETIRED.L1_MISS + MEM_LOAD_RETIRED.FB_HIT)",
-        "MetricGroup": "Mem;MemoryBound;MemoryLat",
-        "MetricName": "tma_info_memory_load_miss_real_latency"
+        "BriefDescription": "Average per-core data access bandwidth to the L3 cache [GB / sec]",
+        "MetricExpr": "64 * OFFCORE_REQUESTS.ALL_REQUESTS / 1e9 / (duration_time * 1e3 / 1e3)",
+        "MetricGroup": "Mem;MemoryBW;Offcore;TopdownL1;tma_L1_group",
+        "MetricName": "tma_info_memory_l3_cache_access_bw_2t",
+        "MetricgroupNoGroup": "TopdownL1"
     },
     {
-        "BriefDescription": "Memory-Level-Parallelism (average number of L1 miss demand load when there is at least one such miss",
-        "MetricExpr": "L1D_PEND_MISS.PENDING / L1D_PEND_MISS.PENDING_CYCLES",
-        "MetricGroup": "Mem;MemoryBW;MemoryBound",
-        "MetricName": "tma_info_memory_mlp",
-        "PublicDescription": "Memory-Level-Parallelism (average number of L1 miss demand load when there is at least one such miss. Per-Logical Processor)"
+        "BriefDescription": "",
+        "MetricExpr": "64 * LONGEST_LAT_CACHE.MISS / 1e9 / duration_time",
+        "MetricGroup": "Mem;MemoryBW",
+        "MetricName": "tma_info_memory_l3_cache_fill_bw"
+    },
+    {
+        "BriefDescription": "Average per-core data fill bandwidth to the L3 cache [GB / sec]",
+        "MetricExpr": "64 * LONGEST_LAT_CACHE.MISS / 1e9 / (duration_time * 1e3 / 1e3)",
+        "MetricGroup": "Mem;MemoryBW;TopdownL1;tma_L1_group",
+        "MetricName": "tma_info_memory_l3_cache_fill_bw_2t",
+        "MetricgroupNoGroup": "TopdownL1"
+    },
+    {
+        "BriefDescription": "L3 cache true misses per kilo instruction for retired demand loads",
+        "MetricExpr": "1e3 * MEM_LOAD_RETIRED.L3_MISS / INST_RETIRED.ANY",
+        "MetricGroup": "Mem",
+        "MetricName": "tma_info_memory_l3mpki"
     },
     {
         "BriefDescription": "Average Parallel L2 cache miss data reads",
         "MetricExpr": "OFFCORE_REQUESTS_OUTSTANDING.ALL_DATA_RD / OFFCORE_REQUESTS_OUTSTANDING.CYCLES_WITH_DATA_RD",
         "MetricGroup": "Memory_BW;Offcore",
-        "MetricName": "tma_info_memory_oro_data_l2_mlp"
+        "MetricName": "tma_info_memory_latency_data_l2_mlp"
     },
     {
         "BriefDescription": "Average Latency for L2 cache miss demand Loads",
-        "MetricExpr": "OFFCORE_REQUESTS_OUTSTANDING.DEMAND_DATA_RD / OFFCORE_REQUESTS.DEMAND_DATA_RD",
+        "MetricExpr": "tma_info_memory_load_l2_miss_latency",
         "MetricGroup": "Memory_Lat;Offcore",
-        "MetricName": "tma_info_memory_oro_load_l2_miss_latency"
+        "MetricName": "tma_info_memory_latency_load_l2_miss_latency"
     },
     {
         "BriefDescription": "Average Parallel L2 cache miss demand Loads",
         "MetricExpr": "OFFCORE_REQUESTS_OUTSTANDING.DEMAND_DATA_RD / cpu@OFFCORE_REQUESTS_OUTSTANDING.DEMAND_DATA_RD\\,cmask\\=1@",
         "MetricGroup": "Memory_BW;Offcore",
-        "MetricName": "tma_info_memory_oro_load_l2_mlp"
+        "MetricName": "tma_info_memory_latency_load_l2_mlp"
     },
     {
         "BriefDescription": "Average Latency for L3 cache miss demand Loads",
-        "MetricExpr": "cpu@OFFCORE_REQUESTS_OUTSTANDING.DEMAND_DATA_RD\\,umask\\=0x10@ / OFFCORE_REQUESTS.L3_MISS_DEMAND_DATA_RD",
+        "MetricExpr": "tma_info_memory_load_l3_miss_latency",
         "MetricGroup": "Memory_Lat;Offcore",
-        "MetricName": "tma_info_memory_oro_load_l3_miss_latency"
+        "MetricName": "tma_info_memory_latency_load_l3_miss_latency"
     },
     {
-        "BriefDescription": "Average per-thread data fill bandwidth to the L1 data cache [GB / sec]",
-        "MetricExpr": "tma_info_memory_core_l1d_cache_fill_bw",
-        "MetricGroup": "Mem;MemoryBW",
-        "MetricName": "tma_info_memory_thread_l1d_cache_fill_bw_1t"
+        "BriefDescription": "Average Latency for L2 cache miss demand Loads",
+        "MetricExpr": "OFFCORE_REQUESTS_OUTSTANDING.DEMAND_DATA_RD / OFFCORE_REQUESTS.DEMAND_DATA_RD",
+        "MetricGroup": "Memory_Lat;Offcore;TopdownL1;tma_L1_group",
+        "MetricName": "tma_info_memory_load_l2_miss_latency",
+        "MetricgroupNoGroup": "TopdownL1"
     },
     {
-        "BriefDescription": "Average per-thread data fill bandwidth to the L2 cache [GB / sec]",
-        "MetricExpr": "tma_info_memory_core_l2_cache_fill_bw",
-        "MetricGroup": "Mem;MemoryBW",
-        "MetricName": "tma_info_memory_thread_l2_cache_fill_bw_1t"
+        "BriefDescription": "Average Parallel L2 cache miss demand Loads",
+        "MetricExpr": "OFFCORE_REQUESTS_OUTSTANDING.DEMAND_DATA_RD / cpu@OFFCORE_REQUESTS_OUTSTANDING.DEMAND_DATA_RD\\,cmask\\=0x1@",
+        "MetricGroup": "Memory_BW;Offcore;TopdownL1;tma_L1_group",
+        "MetricName": "tma_info_memory_load_l2_mlp",
+        "MetricgroupNoGroup": "TopdownL1"
     },
     {
-        "BriefDescription": "Average per-thread data access bandwidth to the L3 cache [GB / sec]",
-        "MetricExpr": "tma_info_memory_core_l3_cache_access_bw",
-        "MetricGroup": "Mem;MemoryBW;Offcore",
-        "MetricName": "tma_info_memory_thread_l3_cache_access_bw_1t"
+        "BriefDescription": "Average Latency for L3 cache miss demand Loads",
+        "MetricExpr": "cpu@OFFCORE_REQUESTS_OUTSTANDING.DEMAND_DATA_RD\\,umask\\=0x10@ / OFFCORE_REQUESTS.L3_MISS_DEMAND_DATA_RD",
+        "MetricGroup": "Memory_Lat;Offcore;TopdownL1;tma_L1_group",
+        "MetricName": "tma_info_memory_load_l3_miss_latency",
+        "MetricgroupNoGroup": "TopdownL1"
     },
     {
-        "BriefDescription": "Average per-thread data fill bandwidth to the L3 cache [GB / sec]",
-        "MetricExpr": "tma_info_memory_core_l3_cache_fill_bw",
-        "MetricGroup": "Mem;MemoryBW",
-        "MetricName": "tma_info_memory_thread_l3_cache_fill_bw_1t"
+        "BriefDescription": "Actual Average Latency for L1 data-cache miss demand load operations (in core cycles)",
+        "MetricExpr": "L1D_PEND_MISS.PENDING / (MEM_LOAD_RETIRED.L1_MISS + MEM_LOAD_RETIRED.FB_HIT)",
+        "MetricGroup": "Mem;MemoryBound;MemoryLat",
+        "MetricName": "tma_info_memory_load_miss_real_latency"
+    },
+    {
+        "BriefDescription": "STLB (2nd level TLB) data load speculative misses per kilo instruction (misses of any page-size that complete the page walk)",
+        "MetricExpr": "tma_info_memory_tlb_load_stlb_mpki",
+        "MetricGroup": "Mem;MemoryTLB;TopdownL1;tma_L1_group",
+        "MetricName": "tma_info_memory_load_stlb_mpki",
+        "MetricgroupNoGroup": "TopdownL1"
+    },
+    {
+        "BriefDescription": "\"Bus lock\" per kilo instruction",
+        "MetricExpr": "1e3 * SQ_MISC.BUS_LOCK / INST_RETIRED.ANY",
+        "MetricGroup": "Mem",
+        "MetricName": "tma_info_memory_mix_bus_lock_pki"
+    },
+    {
+        "BriefDescription": "Un-cacheable retired load per kilo instruction",
+        "MetricExpr": "tma_info_memory_uc_load_pki",
+        "MetricGroup": "Mem",
+        "MetricName": "tma_info_memory_mix_uc_load_pki"
+    },
+    {
+        "BriefDescription": "Memory-Level-Parallelism (average number of L1 miss demand load when there is at least one such miss",
+        "MetricExpr": "L1D_PEND_MISS.PENDING / L1D_PEND_MISS.PENDING_CYCLES",
+        "MetricGroup": "Mem;MemoryBW;MemoryBound",
+        "MetricName": "tma_info_memory_mlp",
+        "PublicDescription": "Memory-Level-Parallelism (average number of L1 miss demand load when there is at least one such miss. Per-Logical Processor)"
+    },
+    {
+        "BriefDescription": "Utilization of the core's Page Walker(s) serving STLB misses triggered by instruction/Load/Store accesses",
+        "MetricExpr": "(ITLB_MISSES.WALK_PENDING + DTLB_LOAD_MISSES.WALK_PENDING + DTLB_STORE_MISSES.WALK_PENDING) / (2 * (CPU_CLK_UNHALTED.DISTRIBUTED if #SMT_on else CPU_CLK_UNHALTED.THREAD))",
+        "MetricGroup": "Mem;MemoryTLB;TopdownL1;tma_L1_group",
+        "MetricName": "tma_info_memory_page_walks_utilization",
+        "MetricgroupNoGroup": "TopdownL1"
+    },
+    {
+        "BriefDescription": "STLB (2nd level TLB) data store speculative misses per kilo instruction (misses of any page-size that complete the page walk)",
+        "MetricExpr": "tma_info_memory_tlb_store_stlb_mpki",
+        "MetricGroup": "Mem;MemoryTLB;TopdownL1;tma_L1_group",
+        "MetricName": "tma_info_memory_store_stlb_mpki",
+        "MetricgroupNoGroup": "TopdownL1"
     },
     {
         "BriefDescription": "STLB (2nd level TLB) code speculative misses per kilo instruction (misses of any page-size that complete the page walk)",
         "MetricName": "tma_info_memory_tlb_store_stlb_mpki"
     },
     {
-        "BriefDescription": "Instruction-Level-Parallelism (average number of uops executed when there is execution) per-thread",
-        "MetricExpr": "UOPS_EXECUTED.THREAD / cpu@UOPS_EXECUTED.THREAD\\,cmask\\=1@",
+        "BriefDescription": "Un-cacheable retired load per kilo instruction",
+        "MetricExpr": "1e3 * MEM_LOAD_MISC_RETIRED.UC / INST_RETIRED.ANY",
+        "MetricGroup": "Mem;TopdownL1;tma_L1_group",
+        "MetricName": "tma_info_memory_uc_load_pki",
+        "MetricgroupNoGroup": "TopdownL1"
+    },
+    {
+        "BriefDescription": "",
+        "MetricExpr": "UOPS_EXECUTED.THREAD / (UOPS_EXECUTED.CORE_CYCLES_GE_1 / 2 if #SMT_on else cpu@UOPS_EXECUTED.THREAD\\,cmask\\=1@)",
         "MetricGroup": "Cor;Pipeline;PortsUtil;SMT",
         "MetricName": "tma_info_pipeline_execute"
     },
+    {
+        "BriefDescription": "Instructions per a microcode Assist invocation",
+        "MetricExpr": "INST_RETIRED.ANY / ASSISTS.ANY",
+        "MetricGroup": "MicroSeq;Pipeline;Ret;Retire",
+        "MetricName": "tma_info_pipeline_ipassist",
+        "MetricThreshold": "tma_info_pipeline_ipassist < 100e3",
+        "PublicDescription": "Instructions per a microcode Assist invocation. See Assists tree node for details (lower number means higher occurrence rate)"
+    },
     {
         "BriefDescription": "Average number of Uops retired in cycles where at least one uop has retired.",
         "MetricExpr": "tma_retiring * tma_info_thread_slots / cpu@UOPS_RETIRED.SLOTS\\,cmask\\=1@",
         "MetricName": "tma_info_pipeline_retire"
     },
     {
-        "BriefDescription": "Measured Average Frequency for unhalted processors [GHz]",
+        "BriefDescription": "Measured Average Core Frequency for unhalted processors [GHz]",
         "MetricExpr": "tma_info_system_turbo_utilization * TSC / 1e9 / duration_time",
         "MetricGroup": "Power;Summary",
-        "MetricName": "tma_info_system_average_frequency"
+        "MetricName": "tma_info_system_core_frequency"
     },
     {
-        "BriefDescription": "Average CPU Utilization",
+        "BriefDescription": "Average CPU Utilization (percentage)",
         "MetricExpr": "CPU_CLK_UNHALTED.REF_TSC / TSC",
         "MetricGroup": "HPC;Summary",
         "MetricName": "tma_info_system_cpu_utilization"
     },
+    {
+        "BriefDescription": "Average number of utilized CPUs",
+        "MetricExpr": "#num_cpus_online * tma_info_system_cpu_utilization",
+        "MetricGroup": "Summary",
+        "MetricName": "tma_info_system_cpus_utilized"
+    },
     {
         "BriefDescription": "Average external Memory Bandwidth Use for reads and writes [GB / sec]",
         "MetricExpr": "64 * (UNC_M_CAS_COUNT.RD + UNC_M_CAS_COUNT.WR) / 1e9 / duration_time",
-        "MetricGroup": "HPC;Mem;MemoryBW;SoC;tma_issueBW",
+        "MetricGroup": "HPC;MemOffcore;MemoryBW;SoC;tma_issueBW",
         "MetricName": "tma_info_system_dram_bw_use",
-        "PublicDescription": "Average external Memory Bandwidth Use for reads and writes [GB / sec]. Related metrics: tma_fb_full, tma_info_bottleneck_memory_bandwidth, tma_mem_bandwidth, tma_sq_full"
+        "PublicDescription": "Average external Memory Bandwidth Use for reads and writes [GB / sec]. Related metrics: tma_fb_full, tma_info_bottleneck_cache_memory_bandwidth, tma_mem_bandwidth, tma_sq_full"
     },
     {
         "BriefDescription": "Giga Floating Point Operations Per Second",
-        "MetricExpr": "(cpu@FP_ARITH_INST_RETIRED.SCALAR_SINGLE\\,umask\\=0x03@ + 2 * FP_ARITH_INST_RETIRED.128B_PACKED_DOUBLE + 4 * cpu@FP_ARITH_INST_RETIRED.128B_PACKED_SINGLE\\,umask\\=0x18@ + 8 * cpu@FP_ARITH_INST_RETIRED.256B_PACKED_SINGLE\\,umask\\=0x60@ + 16 * FP_ARITH_INST_RETIRED.512B_PACKED_SINGLE) / 1e9 / duration_time",
+        "MetricExpr": "(FP_ARITH_INST_RETIRED.SCALAR + 2 * FP_ARITH_INST_RETIRED.128B_PACKED_DOUBLE + 4 * FP_ARITH_INST_RETIRED.4_FLOPS + 8 * FP_ARITH_INST_RETIRED.8_FLOPS + 16 * FP_ARITH_INST_RETIRED.512B_PACKED_SINGLE) / 1e9 / duration_time",
         "MetricGroup": "Cor;Flops;HPC",
         "MetricName": "tma_info_system_gflops",
-        "PublicDescription": "Giga Floating Point Operations Per Second. Aggregate across all supported options of: FP precisions, scalar and vector instructions, vector-width and AMX engine."
+        "PublicDescription": "Giga Floating Point Operations Per Second. Aggregate across all supported options of: FP precisions, scalar and vector instructions, vector-width"
     },
     {
         "BriefDescription": "Average IO (network or disk) Bandwidth Use for Reads [GB / sec]",
-        "MetricExpr": "(UNC_CHA_TOR_INSERTS.IO_HIT_ITOM + UNC_CHA_TOR_INSERTS.IO_MISS_ITOM + UNC_CHA_TOR_INSERTS.IO_HIT_ITOMCACHENEAR + UNC_CHA_TOR_INSERTS.IO_MISS_ITOMCACHENEAR) * 64 / 1e9 / duration_time",
-        "MetricGroup": "IoBW;Mem;Server;SoC",
-        "MetricName": "tma_info_system_io_read_bw"
+        "MetricExpr": "UNC_CHA_TOR_INSERTS.IO_PCIRDCUR * 64 / 1e9 / duration_time",
+        "MetricGroup": "IoBW;MemOffcore;Server;SoC",
+        "MetricName": "tma_info_system_io_read_bw",
+        "PublicDescription": "Average IO (network or disk) Bandwidth Use for Reads [GB / sec]. Bandwidth of IO reads that are initiated by end device controllers that are requesting memory from the CPU"
     },
     {
         "BriefDescription": "Average IO (network or disk) Bandwidth Use for Writes [GB / sec]",
-        "MetricExpr": "UNC_CHA_TOR_INSERTS.IO_PCIRDCUR * 64 / 1e9 / duration_time",
-        "MetricGroup": "IoBW;Mem;Server;SoC",
-        "MetricName": "tma_info_system_io_write_bw"
+        "MetricExpr": "(UNC_CHA_TOR_INSERTS.IO_HIT_ITOM + UNC_CHA_TOR_INSERTS.IO_MISS_ITOM + UNC_CHA_TOR_INSERTS.IO_HIT_ITOMCACHENEAR + UNC_CHA_TOR_INSERTS.IO_MISS_ITOMCACHENEAR) * 64 / 1e9 / duration_time",
+        "MetricGroup": "IoBW;MemOffcore;Server;SoC",
+        "MetricName": "tma_info_system_io_write_bw",
+        "PublicDescription": "Average IO (network or disk) Bandwidth Use for Writes [GB / sec]. Bandwidth of IO writes that are initiated by end device controllers that are writing memory to the CPU"
     },
     {
         "BriefDescription": "Instructions per Far Branch ( Far Branches apply upon transition from application to operating system, handling interrupts, exceptions) [lower number means higher occurrence rate]",
     {
         "BriefDescription": "Average latency of data read request to external DRAM memory [in nanoseconds]",
         "MetricExpr": "1e9 * (UNC_CHA_TOR_OCCUPANCY.IA_MISS_DRD_DDR / UNC_CHA_TOR_INSERTS.IA_MISS_DRD_DDR) / cha_0@event\\=0x0@",
-        "MetricGroup": "Mem;MemoryLat;Server;SoC",
+        "MetricGroup": "MemOffcore;MemoryLat;Server;SoC",
         "MetricName": "tma_info_system_mem_dram_read_latency",
         "PublicDescription": "Average latency of data read request to external DRAM memory [in nanoseconds]. Accounts for demand loads and L1/L2 data-read prefetches"
     },
     {
         "BriefDescription": "Average latency of data read request to external 3D X-Point memory [in nanoseconds]",
         "MetricExpr": "(1e9 * (UNC_CHA_TOR_OCCUPANCY.IA_MISS_DRD_PMM / UNC_CHA_TOR_INSERTS.IA_MISS_DRD_PMM) / cha_0@event\\=0x0@ if #has_pmem > 0 else 0)",
-        "MetricGroup": "Mem;MemoryLat;Server;SoC",
+        "MetricGroup": "MemOffcore;MemoryLat;Server;SoC",
         "MetricName": "tma_info_system_mem_pmm_read_latency",
         "PublicDescription": "Average latency of data read request to external 3D X-Point memory [in nanoseconds]. Accounts for demand loads and L1/L2 data-read prefetches"
     },
     {
         "BriefDescription": "Average 3DXP Memory Bandwidth Use for reads [GB / sec]",
         "MetricExpr": "(64 * UNC_M_PMM_RPQ_INSERTS / 1e9 / duration_time if #has_pmem > 0 else 0)",
-        "MetricGroup": "Mem;MemoryBW;Server;SoC",
+        "MetricGroup": "MemOffcore;MemoryBW;Server;SoC",
         "MetricName": "tma_info_system_pmm_read_bw"
     },
     {
         "BriefDescription": "Average 3DXP Memory Bandwidth Use for Writes [GB / sec]",
         "MetricExpr": "(64 * UNC_M_PMM_WPQ_INSERTS / 1e9 / duration_time if #has_pmem > 0 else 0)",
-        "MetricGroup": "Mem;MemoryBW;Server;SoC",
+        "MetricGroup": "MemOffcore;MemoryBW;Server;SoC",
         "MetricName": "tma_info_system_pmm_write_bw"
     },
     {
         "MetricGroup": "Power",
         "MetricName": "tma_info_system_turbo_utilization"
     },
+    {
+        "BriefDescription": "Measured Average Uncore Frequency for the SoC [GHz]",
+        "MetricExpr": "tma_info_system_socket_clks / 1e9 / duration_time",
+        "MetricGroup": "SoC",
+        "MetricName": "tma_info_system_uncore_frequency"
+    },
     {
         "BriefDescription": "Per-Logical Processor actual clocks when the Logical Processor is active.",
         "MetricExpr": "CPU_CLK_UNHALTED.THREAD",
     },
     {
         "BriefDescription": "This metric represents fraction of cycles the CPU was stalled due to Instruction TLB (ITLB) misses",
-        "MetricExpr": "ICACHE_64B.IFTAG_STALL / tma_info_thread_clks",
-        "MetricGroup": "BigFoot;FetchLat;MemoryTLB;TopdownL3;tma_L3_group;tma_fetch_latency_group",
+        "MetricExpr": "ICACHE_TAG.STALLS / tma_info_thread_clks",
+        "MetricGroup": "BigFootprint;FetchLat;MemoryTLB;TopdownL3;tma_L3_group;tma_fetch_latency_group",
         "MetricName": "tma_itlb_misses",
         "MetricThreshold": "tma_itlb_misses > 0.05 & (tma_fetch_latency > 0.1 & tma_frontend_bound > 0.15)",
         "PublicDescription": "This metric represents fraction of cycles the CPU was stalled due to Instruction TLB (ITLB) misses. Sample with: FRONTEND_RETIRED.STLB_MISS_PS;FRONTEND_RETIRED.ITLB_MISS_PS",
     {
         "BriefDescription": "This metric estimates how often the CPU was stalled without loads missing the L1 data cache",
         "MetricExpr": "max((CYCLE_ACTIVITY.STALLS_MEM_ANY - CYCLE_ACTIVITY.STALLS_L1D_MISS) / tma_info_thread_clks, 0)",
-        "MetricGroup": "CacheMisses;MemoryBound;TmaL3mem;TopdownL3;tma_L3_group;tma_issueL1;tma_issueMC;tma_memory_bound_group",
+        "MetricGroup": "CacheHits;MemoryBound;TmaL3mem;TopdownL3;tma_L3_group;tma_issueL1;tma_issueMC;tma_memory_bound_group",
         "MetricName": "tma_l1_bound",
         "MetricThreshold": "tma_l1_bound > 0.1 & (tma_memory_bound > 0.2 & tma_backend_bound > 0.2)",
         "PublicDescription": "This metric estimates how often the CPU was stalled without loads missing the L1 data cache.  The L1 data cache typically has the shortest latency.  However; in certain cases like loads blocked on older stores; a load might suffer due to high latency even though it is being satisfied by the L1. Another example is loads who miss in the TLB. These cases are characterized by execution unit stalls; while some non-completed demand load lives in the machine without having that demand load missing the L1 cache. Sample with: MEM_LOAD_RETIRED.L1_HIT_PS;MEM_LOAD_RETIRED.FB_HIT_PS. Related metrics: tma_clears_resteers, tma_machine_clears, tma_microcode_sequencer, tma_ms_switches, tma_ports_utilized_1",
         "BriefDescription": "This metric estimates how often the CPU was stalled due to L2 cache accesses by loads",
         "MetricConstraint": "NO_GROUP_EVENTS",
         "MetricExpr": "MEM_LOAD_RETIRED.L2_HIT * (1 + MEM_LOAD_RETIRED.FB_HIT / MEM_LOAD_RETIRED.L1_MISS) / (MEM_LOAD_RETIRED.L2_HIT * (1 + MEM_LOAD_RETIRED.FB_HIT / MEM_LOAD_RETIRED.L1_MISS) + L1D_PEND_MISS.FB_FULL_PERIODS) * ((CYCLE_ACTIVITY.STALLS_L1D_MISS - CYCLE_ACTIVITY.STALLS_L2_MISS) / tma_info_thread_clks)",
-        "MetricGroup": "CacheMisses;MemoryBound;TmaL3mem;TopdownL3;tma_L3_group;tma_memory_bound_group",
+        "MetricGroup": "CacheHits;MemoryBound;TmaL3mem;TopdownL3;tma_L3_group;tma_memory_bound_group",
         "MetricName": "tma_l2_bound",
         "MetricThreshold": "tma_l2_bound > 0.05 & (tma_memory_bound > 0.2 & tma_backend_bound > 0.2)",
         "PublicDescription": "This metric estimates how often the CPU was stalled due to L2 cache accesses by loads.  Avoiding cache misses (i.e. L1 misses/L2 hits) can improve the latency and increase performance. Sample with: MEM_LOAD_RETIRED.L2_HIT_PS",
         "BriefDescription": "This metric estimates how often the CPU was stalled due to loads accesses to L3 cache or contended with a sibling Core",
         "MetricConstraint": "NO_GROUP_EVENTS_NMI",
         "MetricExpr": "(CYCLE_ACTIVITY.STALLS_L2_MISS - CYCLE_ACTIVITY.STALLS_L3_MISS) / tma_info_thread_clks",
-        "MetricGroup": "CacheMisses;MemoryBound;TmaL3mem;TopdownL3;tma_L3_group;tma_memory_bound_group",
+        "MetricGroup": "CacheHits;MemoryBound;TmaL3mem;TopdownL3;tma_L3_group;tma_memory_bound_group",
         "MetricName": "tma_l3_bound",
         "MetricThreshold": "tma_l3_bound > 0.05 & (tma_memory_bound > 0.2 & tma_backend_bound > 0.2)",
         "PublicDescription": "This metric estimates how often the CPU was stalled due to loads accesses to L3 cache or contended with a sibling Core.  Avoiding cache misses (i.e. L2 misses/L3 hits) can improve the latency and increase performance. Sample with: MEM_LOAD_RETIRED.L3_HIT_PS",
         "ScaleUnit": "100%"
     },
     {
-        "BriefDescription": "This metric represents fraction of cycles with demand load accesses that hit the L3 cache under unloaded scenarios (possibly L3 latency limited)",
-        "MetricExpr": "19 * tma_info_system_average_frequency * MEM_LOAD_RETIRED.L3_HIT * (1 + MEM_LOAD_RETIRED.FB_HIT / MEM_LOAD_RETIRED.L1_MISS / 2) / tma_info_thread_clks",
+        "BriefDescription": "This metric estimates fraction of cycles with demand load accesses that hit the L3 cache under unloaded scenarios (possibly L3 latency limited)",
+        "MetricExpr": "19 * tma_info_system_core_frequency * (MEM_LOAD_RETIRED.L3_HIT * (1 + MEM_LOAD_RETIRED.FB_HIT / MEM_LOAD_RETIRED.L1_MISS / 2)) / tma_info_thread_clks",
         "MetricGroup": "MemoryLat;TopdownL4;tma_L4_group;tma_issueLat;tma_l3_bound_group",
         "MetricName": "tma_l3_hit_latency",
         "MetricThreshold": "tma_l3_hit_latency > 0.1 & (tma_l3_bound > 0.05 & (tma_memory_bound > 0.2 & tma_backend_bound > 0.2))",
-        "PublicDescription": "This metric represents fraction of cycles with demand load accesses that hit the L3 cache under unloaded scenarios (possibly L3 latency limited).  Avoiding private cache misses (i.e. L2 misses/L3 hits) will improve the latency; reduce contention with sibling physical cores and increase performance.  Note the value of this node may overlap with its siblings. Sample with: MEM_LOAD_RETIRED.L3_HIT_PS. Related metrics: tma_info_bottleneck_memory_latency, tma_mem_latency",
+        "PublicDescription": "This metric estimates fraction of cycles with demand load accesses that hit the L3 cache under unloaded scenarios (possibly L3 latency limited).  Avoiding private cache misses (i.e. L2 misses/L3 hits) will improve the latency; reduce contention with sibling physical cores and increase performance.  Note the value of this node may overlap with its siblings. Sample with: MEM_LOAD_RETIRED.L3_HIT_PS. Related metrics: tma_info_bottleneck_cache_memory_latency, tma_mem_latency",
         "ScaleUnit": "100%"
     },
     {
         "BriefDescription": "This metric represents fraction of cycles CPU was stalled due to Length Changing Prefixes (LCPs)",
-        "MetricExpr": "ILD_STALL.LCP / tma_info_thread_clks",
+        "MetricExpr": "DECODE.LCP / tma_info_thread_clks",
         "MetricGroup": "FetchLat;TopdownL3;tma_L3_group;tma_fetch_latency_group;tma_issueFB",
         "MetricName": "tma_lcp",
         "MetricThreshold": "tma_lcp > 0.05 & (tma_fetch_latency > 0.1 & tma_frontend_bound > 0.15)",
         "MetricName": "tma_light_operations",
         "MetricThreshold": "tma_light_operations > 0.6",
         "MetricgroupNoGroup": "TopdownL2",
-        "PublicDescription": "This metric represents fraction of slots where the CPU was retiring light-weight operations -- instructions that require no more than one uop (micro-operation). This correlates with total number of instructions used by the program. A uops-per-instruction (see UopPI metric) ratio of 1 or less should be expected for decently optimized software running on Intel Core/Xeon products. While this often indicates efficient X86 instructions were executed; high value does not necessarily mean better performance cannot be achieved. Sample with: INST_RETIRED.PREC_DIST",
+        "PublicDescription": "This metric represents fraction of slots where the CPU was retiring light-weight operations -- instructions that require no more than one uop (micro-operation). This correlates with total number of instructions used by the program. A uops-per-instruction (see UopPI metric) ratio of 1 or less should be expected for decently optimized code running on Intel Core/Xeon products. While this often indicates efficient X86 instructions were executed; high value does not necessarily mean better performance cannot be achieved. ([ICL+] Note this may undercount due to approximation using indirect events; [ADL+] .). Sample with: INST_RETIRED.PREC_DIST",
         "ScaleUnit": "100%"
     },
     {
     },
     {
         "BriefDescription": "This metric estimates fraction of cycles while the memory subsystem was handling loads from local memory",
-        "MetricExpr": "43.5 * tma_info_system_average_frequency * MEM_LOAD_L3_MISS_RETIRED.LOCAL_DRAM * (1 + MEM_LOAD_RETIRED.FB_HIT / MEM_LOAD_RETIRED.L1_MISS / 2) / tma_info_thread_clks",
+        "MetricExpr": "43.5 * tma_info_system_core_frequency * MEM_LOAD_L3_MISS_RETIRED.LOCAL_DRAM * (1 + MEM_LOAD_RETIRED.FB_HIT / MEM_LOAD_RETIRED.L1_MISS / 2) / tma_info_thread_clks",
         "MetricGroup": "Server;TopdownL5;tma_L5_group;tma_mem_latency_group",
-        "MetricName": "tma_local_dram",
-        "MetricThreshold": "tma_local_dram > 0.1 & (tma_mem_latency > 0.1 & (tma_dram_bound > 0.1 & (tma_memory_bound > 0.2 & tma_backend_bound > 0.2)))",
+        "MetricName": "tma_local_mem",
+        "MetricThreshold": "tma_local_mem > 0.1 & (tma_mem_latency > 0.1 & (tma_dram_bound > 0.1 & (tma_memory_bound > 0.2 & tma_backend_bound > 0.2)))",
         "PublicDescription": "This metric estimates fraction of cycles while the memory subsystem was handling loads from local memory. Caching will improve the latency and increase performance. Sample with: MEM_LOAD_L3_MISS_RETIRED.LOCAL_DRAM_PS",
         "ScaleUnit": "100%"
     },
         "ScaleUnit": "100%"
     },
     {
-        "BriefDescription": "This metric estimates fraction of cycles where the core's performance was likely hurt due to approaching bandwidth limits of external memory (DRAM)",
+        "BriefDescription": "This metric estimates fraction of cycles where the core's performance was likely hurt due to approaching bandwidth limits of external memory - DRAM ([SPR-HBM] and/or HBM)",
         "MetricExpr": "min(CPU_CLK_UNHALTED.THREAD, cpu@OFFCORE_REQUESTS_OUTSTANDING.ALL_DATA_RD\\,cmask\\=4@) / tma_info_thread_clks",
         "MetricGroup": "MemoryBW;Offcore;TopdownL4;tma_L4_group;tma_dram_bound_group;tma_issueBW",
         "MetricName": "tma_mem_bandwidth",
         "MetricThreshold": "tma_mem_bandwidth > 0.2 & (tma_dram_bound > 0.1 & (tma_memory_bound > 0.2 & tma_backend_bound > 0.2))",
-        "PublicDescription": "This metric estimates fraction of cycles where the core's performance was likely hurt due to approaching bandwidth limits of external memory (DRAM).  The underlying heuristic assumes that a similar off-core traffic is generated by all IA cores. This metric does not aggregate non-data-read requests by this logical processor; requests from other IA Logical Processors/Physical Cores/sockets; or other non-IA devices like GPU; hence the maximum external memory bandwidth limits may or may not be approached when this metric is flagged (see Uncore counters for that). Related metrics: tma_fb_full, tma_info_bottleneck_memory_bandwidth, tma_info_system_dram_bw_use, tma_sq_full",
+        "PublicDescription": "This metric estimates fraction of cycles where the core's performance was likely hurt due to approaching bandwidth limits of external memory - DRAM ([SPR-HBM] and/or HBM).  The underlying heuristic assumes that a similar off-core traffic is generated by all IA cores. This metric does not aggregate non-data-read requests by this logical processor; requests from other IA Logical Processors/Physical Cores/sockets; or other non-IA devices like GPU; hence the maximum external memory bandwidth limits may or may not be approached when this metric is flagged (see Uncore counters for that). Related metrics: tma_fb_full, tma_info_bottleneck_cache_memory_bandwidth, tma_info_system_dram_bw_use, tma_sq_full",
         "ScaleUnit": "100%"
     },
     {
-        "BriefDescription": "This metric estimates fraction of cycles where the performance was likely hurt due to latency from external memory (DRAM)",
+        "BriefDescription": "This metric estimates fraction of cycles where the performance was likely hurt due to latency from external memory - DRAM ([SPR-HBM] and/or HBM)",
         "MetricExpr": "min(CPU_CLK_UNHALTED.THREAD, OFFCORE_REQUESTS_OUTSTANDING.CYCLES_WITH_DATA_RD) / tma_info_thread_clks - tma_mem_bandwidth",
         "MetricGroup": "MemoryLat;Offcore;TopdownL4;tma_L4_group;tma_dram_bound_group;tma_issueLat",
         "MetricName": "tma_mem_latency",
         "MetricThreshold": "tma_mem_latency > 0.1 & (tma_dram_bound > 0.1 & (tma_memory_bound > 0.2 & tma_backend_bound > 0.2))",
-        "PublicDescription": "This metric estimates fraction of cycles where the performance was likely hurt due to latency from external memory (DRAM).  This metric does not aggregate requests from other Logical Processors/Physical Cores/sockets (see Uncore counters for that). Related metrics: tma_info_bottleneck_memory_latency, tma_l3_hit_latency",
+        "PublicDescription": "This metric estimates fraction of cycles where the performance was likely hurt due to latency from external memory - DRAM ([SPR-HBM] and/or HBM).  This metric does not aggregate requests from other Logical Processors/Physical Cores/sockets (see Uncore counters for that). Related metrics: tma_info_bottleneck_cache_memory_latency, tma_l3_hit_latency",
         "ScaleUnit": "100%"
     },
     {
     },
     {
         "BriefDescription": "This metric represents fraction of slots the CPU was retiring uops fetched by the Microcode Sequencer (MS) unit",
-        "MetricExpr": "tma_retiring * tma_info_thread_slots / UOPS_ISSUED.ANY * IDQ.MS_UOPS / tma_info_thread_slots",
+        "MetricExpr": "UOPS_RETIRED.SLOTS / UOPS_ISSUED.ANY * IDQ.MS_UOPS / tma_info_thread_slots",
         "MetricGroup": "MicroSeq;TopdownL3;tma_L3_group;tma_heavy_operations_group;tma_issueMC;tma_issueMS",
         "MetricName": "tma_microcode_sequencer",
         "MetricThreshold": "tma_microcode_sequencer > 0.05 & tma_heavy_operations > 0.1",
-        "PublicDescription": "This metric represents fraction of slots the CPU was retiring uops fetched by the Microcode Sequencer (MS) unit.  The MS is used for CISC instructions not supported by the default decoders (like repeat move strings; or CPUID); or by microcode assists used to address some operation modes (like in Floating Point assists). These cases can often be avoided. Sample with: IDQ.MS_UOPS. Related metrics: tma_clears_resteers, tma_l1_bound, tma_machine_clears, tma_ms_switches",
+        "PublicDescription": "This metric represents fraction of slots the CPU was retiring uops fetched by the Microcode Sequencer (MS) unit.  The MS is used for CISC instructions not supported by the default decoders (like repeat move strings; or CPUID); or by microcode assists used to address some operation modes (like in Floating Point assists). These cases can often be avoided. Sample with: IDQ.MS_UOPS. Related metrics: tma_clears_resteers, tma_info_bottleneck_irregular_overhead, tma_l1_bound, tma_machine_clears, tma_ms_switches",
         "ScaleUnit": "100%"
     },
     {
         "MetricExpr": "(IDQ.MITE_CYCLES_ANY - IDQ.MITE_CYCLES_OK) / tma_info_core_core_clks / 2",
         "MetricGroup": "DSBmiss;FetchBW;TopdownL3;tma_L3_group;tma_fetch_bandwidth_group",
         "MetricName": "tma_mite",
-        "MetricThreshold": "tma_mite > 0.1 & (tma_fetch_bandwidth > 0.1 & tma_frontend_bound > 0.15 & tma_info_thread_ipc / 5 > 0.35)",
+        "MetricThreshold": "tma_mite > 0.1 & tma_fetch_bandwidth > 0.2",
         "PublicDescription": "This metric represents Core fraction of cycles in which CPU was likely limited due to the MITE pipeline (the legacy decode pipeline). This pipeline is used for code that was not pre-cached in the DSB or LSD. For example; inefficiencies due to asymmetric decoders; use of long immediate or LCP can manifest as MITE fetch bandwidth bottleneck. Sample with: FRONTEND_RETIRED.ANY_DSB_MISS",
         "ScaleUnit": "100%"
     },
         "MetricExpr": "(cpu@IDQ.MITE_UOPS\\,cmask\\=4@ - cpu@IDQ.MITE_UOPS\\,cmask\\=5@) / tma_info_thread_clks",
         "MetricGroup": "DSBmiss;FetchBW;TopdownL4;tma_L4_group;tma_mite_group",
         "MetricName": "tma_mite_4wide",
-        "MetricThreshold": "tma_mite_4wide > 0.05 & (tma_mite > 0.1 & (tma_fetch_bandwidth > 0.1 & tma_frontend_bound > 0.15 & tma_info_thread_ipc / 5 > 0.35))",
+        "MetricThreshold": "tma_mite_4wide > 0.05 & (tma_mite > 0.1 & tma_fetch_bandwidth > 0.2)",
         "ScaleUnit": "100%"
     },
     {
-        "BriefDescription": "The Mixing_Vectors metric gives the percentage of injected blend uops out of all uops issued",
+        "BriefDescription": "This metric estimates penalty in terms of percentage of([SKL+] injected blend uops out of all Uops Issued -- the Count Domain; [ADL+] cycles)",
         "MetricExpr": "UOPS_ISSUED.VECTOR_WIDTH_MISMATCH / UOPS_ISSUED.ANY",
         "MetricGroup": "TopdownL5;tma_L5_group;tma_issueMV;tma_ports_utilized_0_group",
         "MetricName": "tma_mixing_vectors",
         "MetricThreshold": "tma_mixing_vectors > 0.05",
-        "PublicDescription": "The Mixing_Vectors metric gives the percentage of injected blend uops out of all uops issued. Usually a Mixing_Vectors over 5% is worth investigating. Read more in Appendix B1 of the Optimizations Guide for this topic. Related metrics: tma_ms_switches",
+        "PublicDescription": "This metric estimates penalty in terms of percentage of([SKL+] injected blend uops out of all Uops Issued -- the Count Domain; [ADL+] cycles). Usually a Mixing_Vectors over 5% is worth investigating. Read more in Appendix B1 of the Optimizations Guide for this topic. Related metrics: tma_ms_switches",
         "ScaleUnit": "100%"
     },
     {
         "MetricGroup": "FetchLat;MicroSeq;TopdownL3;tma_L3_group;tma_fetch_latency_group;tma_issueMC;tma_issueMS;tma_issueMV;tma_issueSO",
         "MetricName": "tma_ms_switches",
         "MetricThreshold": "tma_ms_switches > 0.05 & (tma_fetch_latency > 0.1 & tma_frontend_bound > 0.15)",
-        "PublicDescription": "This metric estimates the fraction of cycles when the CPU was stalled due to switches of uop delivery to the Microcode Sequencer (MS). Commonly used instructions are optimized for delivery by the DSB (decoded i-cache) or MITE (legacy instruction decode) pipelines. Certain operations cannot be handled natively by the execution pipeline; and must be performed by microcode (small programs injected into the execution stream). Switching to the MS too often can negatively impact performance. The MS is designated to deliver long uop flows required by CISC instructions like CPUID; or uncommon conditions like Floating Point Assists when dealing with Denormals. Sample with: IDQ.MS_SWITCHES. Related metrics: tma_clears_resteers, tma_l1_bound, tma_machine_clears, tma_microcode_sequencer, tma_mixing_vectors, tma_serializing_operation",
+        "PublicDescription": "This metric estimates the fraction of cycles when the CPU was stalled due to switches of uop delivery to the Microcode Sequencer (MS). Commonly used instructions are optimized for delivery by the DSB (decoded i-cache) or MITE (legacy instruction decode) pipelines. Certain operations cannot be handled natively by the execution pipeline; and must be performed by microcode (small programs injected into the execution stream). Switching to the MS too often can negatively impact performance. The MS is designated to deliver long uop flows required by CISC instructions like CPUID; or uncommon conditions like Floating Point Assists when dealing with Denormals. Sample with: IDQ.MS_SWITCHES. Related metrics: tma_clears_resteers, tma_info_bottleneck_irregular_overhead, tma_l1_bound, tma_machine_clears, tma_microcode_sequencer, tma_mixing_vectors, tma_serializing_operation",
         "ScaleUnit": "100%"
     },
     {
         "BriefDescription": "This metric represents fraction of slots where the CPU was retiring NOP (no op) instructions",
         "MetricExpr": "tma_light_operations * INST_RETIRED.NOP / (tma_retiring * tma_info_thread_slots)",
-        "MetricGroup": "Pipeline;TopdownL3;tma_L3_group;tma_light_operations_group",
+        "MetricGroup": "Pipeline;TopdownL4;tma_L4_group;tma_other_light_ops_group",
         "MetricName": "tma_nop_instructions",
-        "MetricThreshold": "tma_nop_instructions > 0.1 & tma_light_operations > 0.6",
+        "MetricThreshold": "tma_nop_instructions > 0.1 & (tma_other_light_ops > 0.3 & tma_light_operations > 0.6)",
         "PublicDescription": "This metric represents fraction of slots where the CPU was retiring NOP (no op) instructions. Compilers often use NOPs for certain address alignments - e.g. start address of a function or loop body. Sample with: INST_RETIRED.NOP",
         "ScaleUnit": "100%"
     },
     {
         "BriefDescription": "This metric represents the remaining light uops fraction the CPU has executed - remaining means not covered by other sibling nodes",
         "MetricConstraint": "NO_GROUP_EVENTS",
-        "MetricExpr": "max(0, tma_light_operations - (tma_fp_arith + tma_memory_operations + tma_branch_instructions + tma_nop_instructions))",
+        "MetricExpr": "max(0, tma_light_operations - (tma_fp_arith + tma_memory_operations + tma_branch_instructions))",
         "MetricGroup": "Pipeline;TopdownL3;tma_L3_group;tma_light_operations_group",
         "MetricName": "tma_other_light_ops",
         "MetricThreshold": "tma_other_light_ops > 0.3 & tma_light_operations > 0.6",
         "PublicDescription": "This metric represents the remaining light uops fraction the CPU has executed - remaining means not covered by other sibling nodes. May undercount due to FMA double counting",
         "ScaleUnit": "100%"
     },
+    {
+        "BriefDescription": "This metric estimates fraction of slots the CPU was stalled due to other cases of misprediction (non-retired x86 branches or other types).",
+        "MetricExpr": "max(tma_branch_mispredicts * (1 - BR_MISP_RETIRED.ALL_BRANCHES / (INT_MISC.CLEARS_COUNT - MACHINE_CLEARS.COUNT)), 0.0001)",
+        "MetricGroup": "BrMispredicts;TopdownL3;tma_L3_group;tma_branch_mispredicts_group",
+        "MetricName": "tma_other_mispredicts",
+        "MetricThreshold": "tma_other_mispredicts > 0.05 & (tma_branch_mispredicts > 0.1 & tma_bad_speculation > 0.15)",
+        "ScaleUnit": "100%"
+    },
+    {
+        "BriefDescription": "This metric represents fraction of slots the CPU has wasted due to Nukes (Machine Clears) not related to memory ordering.",
+        "MetricExpr": "max(tma_machine_clears * (1 - MACHINE_CLEARS.MEMORY_ORDERING / MACHINE_CLEARS.COUNT), 0.0001)",
+        "MetricGroup": "Machine_Clears;TopdownL3;tma_L3_group;tma_machine_clears_group",
+        "MetricName": "tma_other_nukes",
+        "MetricThreshold": "tma_other_nukes > 0.05 & (tma_machine_clears > 0.1 & tma_bad_speculation > 0.15)",
+        "ScaleUnit": "100%"
+    },
     {
         "BriefDescription": "This metric roughly estimates (based on idle latencies) how often the CPU was stalled on accesses to external 3D-Xpoint (Crystal Ridge, a.k.a",
-        "MetricExpr": "(((1 - ((19 * (MEM_LOAD_L3_MISS_RETIRED.REMOTE_DRAM * (1 + MEM_LOAD_RETIRED.FB_HIT / MEM_LOAD_RETIRED.L1_MISS)) + 10 * (MEM_LOAD_L3_MISS_RETIRED.LOCAL_DRAM * (1 + MEM_LOAD_RETIRED.FB_HIT / MEM_LOAD_RETIRED.L1_MISS) + MEM_LOAD_L3_MISS_RETIRED.REMOTE_FWD * (1 + MEM_LOAD_RETIRED.FB_HIT / MEM_LOAD_RETIRED.L1_MISS) + MEM_LOAD_L3_MISS_RETIRED.REMOTE_HITM * (1 + MEM_LOAD_RETIRED.FB_HIT / MEM_LOAD_RETIRED.L1_MISS))) / (19 * (MEM_LOAD_L3_MISS_RETIRED.REMOTE_DRAM * (1 + MEM_LOAD_RETIRED.FB_HIT / MEM_LOAD_RETIRED.L1_MISS)) + 10 * (MEM_LOAD_L3_MISS_RETIRED.LOCAL_DRAM * (1 + MEM_LOAD_RETIRED.FB_HIT / MEM_LOAD_RETIRED.L1_MISS) + MEM_LOAD_L3_MISS_RETIRED.REMOTE_FWD * (1 + MEM_LOAD_RETIRED.FB_HIT / MEM_LOAD_RETIRED.L1_MISS) + MEM_LOAD_L3_MISS_RETIRED.REMOTE_HITM * (1 + MEM_LOAD_RETIRED.FB_HIT / MEM_LOAD_RETIRED.L1_MISS)) + (25 * (MEM_LOAD_RETIRED.LOCAL_PMM * (1 + MEM_LOAD_RETIRED.FB_HIT / MEM_LOAD_RETIRED.L1_MISS) if #has_pmem > 0 else 0) + 33 * (MEM_LOAD_L3_MISS_RETIRED.REMOTE_PMM * (1 + MEM_LOAD_RETIRED.FB_HIT / MEM_LOAD_RETIRED.L1_MISS) if #has_pmem > 0 else 0))) if #has_pmem > 0 else 0)) * (CYCLE_ACTIVITY.STALLS_L3_MISS / tma_info_thread_clks + (CYCLE_ACTIVITY.STALLS_L1D_MISS - CYCLE_ACTIVITY.STALLS_L2_MISS) / tma_info_thread_clks - tma_l2_bound) if 1e6 * (MEM_LOAD_L3_MISS_RETIRED.REMOTE_PMM + MEM_LOAD_RETIRED.LOCAL_PMM) > MEM_LOAD_RETIRED.L1_MISS else 0) if #has_pmem > 0 else 0)",
+        "MetricExpr": "(((1 - (19 * (MEM_LOAD_L3_MISS_RETIRED.REMOTE_DRAM * (1 + MEM_LOAD_RETIRED.FB_HIT / MEM_LOAD_RETIRED.L1_MISS)) + 10 * (MEM_LOAD_L3_MISS_RETIRED.LOCAL_DRAM * (1 + MEM_LOAD_RETIRED.FB_HIT / MEM_LOAD_RETIRED.L1_MISS) + MEM_LOAD_L3_MISS_RETIRED.REMOTE_FWD * (1 + MEM_LOAD_RETIRED.FB_HIT / MEM_LOAD_RETIRED.L1_MISS) + MEM_LOAD_L3_MISS_RETIRED.REMOTE_HITM * (1 + MEM_LOAD_RETIRED.FB_HIT / MEM_LOAD_RETIRED.L1_MISS))) / (19 * (MEM_LOAD_L3_MISS_RETIRED.REMOTE_DRAM * (1 + MEM_LOAD_RETIRED.FB_HIT / MEM_LOAD_RETIRED.L1_MISS)) + 10 * (MEM_LOAD_L3_MISS_RETIRED.LOCAL_DRAM * (1 + MEM_LOAD_RETIRED.FB_HIT / MEM_LOAD_RETIRED.L1_MISS) + MEM_LOAD_L3_MISS_RETIRED.REMOTE_FWD * (1 + MEM_LOAD_RETIRED.FB_HIT / MEM_LOAD_RETIRED.L1_MISS) + MEM_LOAD_L3_MISS_RETIRED.REMOTE_HITM * (1 + MEM_LOAD_RETIRED.FB_HIT / MEM_LOAD_RETIRED.L1_MISS)) + (25 * (MEM_LOAD_RETIRED.LOCAL_PMM * (1 + MEM_LOAD_RETIRED.FB_HIT / MEM_LOAD_RETIRED.L1_MISS)) + 33 * (MEM_LOAD_L3_MISS_RETIRED.REMOTE_PMM * (1 + MEM_LOAD_RETIRED.FB_HIT / MEM_LOAD_RETIRED.L1_MISS))))) * (CYCLE_ACTIVITY.STALLS_L3_MISS / tma_info_thread_clks + (CYCLE_ACTIVITY.STALLS_L1D_MISS - CYCLE_ACTIVITY.STALLS_L2_MISS) / tma_info_thread_clks - tma_l2_bound) if 1e6 * (MEM_LOAD_L3_MISS_RETIRED.REMOTE_PMM + MEM_LOAD_RETIRED.LOCAL_PMM) > MEM_LOAD_RETIRED.L1_MISS else 0) if #has_pmem > 0 else 0)",
         "MetricGroup": "MemoryBound;Server;TmaL3mem;TopdownL3;tma_L3_group;tma_memory_bound_group",
         "MetricName": "tma_pmm_bound",
         "MetricThreshold": "tma_pmm_bound > 0.1 & (tma_memory_bound > 0.2 & tma_backend_bound > 0.2)",
         "ScaleUnit": "100%"
     },
     {
-        "BriefDescription": "This metric represents Core fraction of cycles CPU dispatched uops on execution port 6 ([HSW+]Primary Branch and simple ALU)",
+        "BriefDescription": "This metric represents Core fraction of cycles CPU dispatched uops on execution port 6 ([HSW+] Primary Branch and simple ALU)",
         "MetricExpr": "UOPS_DISPATCHED.PORT_6 / tma_info_core_core_clks",
         "MetricGroup": "TopdownL6;tma_L6_group;tma_alu_op_utilization_group;tma_issue2P",
         "MetricName": "tma_port_6",
         "MetricThreshold": "tma_port_6 > 0.6",
-        "PublicDescription": "This metric represents Core fraction of cycles CPU dispatched uops on execution port 6 ([HSW+]Primary Branch and simple ALU). Sample with: UOPS_DISPATCHED.PORT_6. Related metrics: tma_fp_scalar, tma_fp_vector, tma_fp_vector_128b, tma_fp_vector_256b, tma_fp_vector_512b, tma_port_0, tma_port_1, tma_port_5, tma_ports_utilized_2",
+        "PublicDescription": "This metric represents Core fraction of cycles CPU dispatched uops on execution port 6 ([HSW+] Primary Branch and simple ALU). Sample with: UOPS_DISPATCHED.PORT_6. Related metrics: tma_fp_scalar, tma_fp_vector, tma_fp_vector_128b, tma_fp_vector_256b, tma_fp_vector_512b, tma_port_0, tma_port_1, tma_port_5, tma_ports_utilized_2",
         "ScaleUnit": "100%"
     },
     {
         "BriefDescription": "This metric estimates fraction of cycles the CPU performance was potentially limited due to Core computation issues (non divider-related)",
-        "MetricExpr": "((cpu@EXE_ACTIVITY.3_PORTS_UTIL\\,umask\\=0x80@ + tma_serializing_operation * (CYCLE_ACTIVITY.STALLS_TOTAL - CYCLE_ACTIVITY.STALLS_MEM_ANY) + (EXE_ACTIVITY.1_PORTS_UTIL + tma_retiring * EXE_ACTIVITY.2_PORTS_UTIL)) / tma_info_thread_clks if ARITH.DIVIDER_ACTIVE < CYCLE_ACTIVITY.STALLS_TOTAL - CYCLE_ACTIVITY.STALLS_MEM_ANY else (EXE_ACTIVITY.1_PORTS_UTIL + tma_retiring * EXE_ACTIVITY.2_PORTS_UTIL) / tma_info_thread_clks)",
+        "MetricExpr": "((tma_ports_utilized_0 * tma_info_thread_clks + (EXE_ACTIVITY.1_PORTS_UTIL + tma_retiring * EXE_ACTIVITY.2_PORTS_UTIL)) / tma_info_thread_clks if ARITH.DIVIDER_ACTIVE < CYCLE_ACTIVITY.STALLS_TOTAL - CYCLE_ACTIVITY.STALLS_MEM_ANY else (EXE_ACTIVITY.1_PORTS_UTIL + tma_retiring * EXE_ACTIVITY.2_PORTS_UTIL) / tma_info_thread_clks)",
         "MetricGroup": "PortsUtil;TopdownL3;tma_L3_group;tma_core_bound_group",
         "MetricName": "tma_ports_utilization",
         "MetricThreshold": "tma_ports_utilization > 0.15 & (tma_core_bound > 0.1 & tma_backend_bound > 0.2)",
     },
     {
         "BriefDescription": "This metric represents fraction of cycles CPU executed no uops on any execution port (Logical Processor cycles since ICL, Physical Core cycles otherwise)",
-        "MetricExpr": "cpu@EXE_ACTIVITY.3_PORTS_UTIL\\,umask\\=0x80@ / tma_info_thread_clks + tma_serializing_operation * (CYCLE_ACTIVITY.STALLS_TOTAL - CYCLE_ACTIVITY.STALLS_MEM_ANY) / tma_info_thread_clks",
+        "MetricExpr": "(cpu@EXE_ACTIVITY.3_PORTS_UTIL\\,umask\\=0x80@ + tma_core_bound * RS_EVENTS.EMPTY_CYCLES) / tma_info_thread_clks * (CYCLE_ACTIVITY.STALLS_TOTAL - CYCLE_ACTIVITY.STALLS_MEM_ANY) / tma_info_thread_clks",
         "MetricGroup": "PortsUtil;TopdownL4;tma_L4_group;tma_ports_utilization_group",
         "MetricName": "tma_ports_utilized_0",
         "MetricThreshold": "tma_ports_utilized_0 > 0.2 & (tma_ports_utilization > 0.15 & (tma_core_bound > 0.1 & tma_backend_bound > 0.2))",
         "MetricExpr": "UOPS_EXECUTED.CYCLES_GE_3 / tma_info_thread_clks",
         "MetricGroup": "PortsUtil;TopdownL4;tma_L4_group;tma_ports_utilization_group",
         "MetricName": "tma_ports_utilized_3m",
-        "MetricThreshold": "tma_ports_utilized_3m > 0.7 & (tma_ports_utilization > 0.15 & (tma_core_bound > 0.1 & tma_backend_bound > 0.2))",
+        "MetricThreshold": "tma_ports_utilized_3m > 0.4 & (tma_ports_utilization > 0.15 & (tma_core_bound > 0.1 & tma_backend_bound > 0.2))",
         "PublicDescription": "This metric represents fraction of cycles CPU executed total of 3 or more uops per cycle on all execution ports (Logical Processor cycles since ICL, Physical Core cycles otherwise). Sample with: UOPS_EXECUTED.CYCLES_GE_3",
         "ScaleUnit": "100%"
     },
     {
         "BriefDescription": "This metric estimates fraction of cycles while the memory subsystem was handling loads from remote cache in other sockets including synchronizations issues",
-        "MetricExpr": "(97 * tma_info_system_average_frequency * MEM_LOAD_L3_MISS_RETIRED.REMOTE_HITM + 97 * tma_info_system_average_frequency * MEM_LOAD_L3_MISS_RETIRED.REMOTE_FWD) * (1 + MEM_LOAD_RETIRED.FB_HIT / MEM_LOAD_RETIRED.L1_MISS / 2) / tma_info_thread_clks",
+        "MetricExpr": "(97 * tma_info_system_core_frequency * MEM_LOAD_L3_MISS_RETIRED.REMOTE_HITM + 97 * tma_info_system_core_frequency * MEM_LOAD_L3_MISS_RETIRED.REMOTE_FWD) * (1 + MEM_LOAD_RETIRED.FB_HIT / MEM_LOAD_RETIRED.L1_MISS / 2) / tma_info_thread_clks",
         "MetricGroup": "Offcore;Server;Snoop;TopdownL5;tma_L5_group;tma_issueSyncxn;tma_mem_latency_group",
         "MetricName": "tma_remote_cache",
         "MetricThreshold": "tma_remote_cache > 0.05 & (tma_mem_latency > 0.1 & (tma_dram_bound > 0.1 & (tma_memory_bound > 0.2 & tma_backend_bound > 0.2)))",
     },
     {
         "BriefDescription": "This metric estimates fraction of cycles while the memory subsystem was handling loads from remote memory",
-        "MetricExpr": "108 * tma_info_system_average_frequency * MEM_LOAD_L3_MISS_RETIRED.REMOTE_DRAM * (1 + MEM_LOAD_RETIRED.FB_HIT / MEM_LOAD_RETIRED.L1_MISS / 2) / tma_info_thread_clks",
+        "MetricExpr": "108 * tma_info_system_core_frequency * MEM_LOAD_L3_MISS_RETIRED.REMOTE_DRAM * (1 + MEM_LOAD_RETIRED.FB_HIT / MEM_LOAD_RETIRED.L1_MISS / 2) / tma_info_thread_clks",
         "MetricGroup": "Server;Snoop;TopdownL5;tma_L5_group;tma_mem_latency_group",
-        "MetricName": "tma_remote_dram",
-        "MetricThreshold": "tma_remote_dram > 0.1 & (tma_mem_latency > 0.1 & (tma_dram_bound > 0.1 & (tma_memory_bound > 0.2 & tma_backend_bound > 0.2)))",
+        "MetricName": "tma_remote_mem",
+        "MetricThreshold": "tma_remote_mem > 0.1 & (tma_mem_latency > 0.1 & (tma_dram_bound > 0.1 & (tma_memory_bound > 0.2 & tma_backend_bound > 0.2)))",
         "PublicDescription": "This metric estimates fraction of cycles while the memory subsystem was handling loads from remote memory. This is caused often due to non-optimal NUMA allocations. #link to NUMA article. Sample with: MEM_LOAD_L3_MISS_RETIRED.REMOTE_DRAM_PS",
         "ScaleUnit": "100%"
     },
     {
         "BriefDescription": "This metric represents fraction of cycles the CPU issue-pipeline was stalled due to serializing operations",
         "MetricExpr": "RESOURCE_STALLS.SCOREBOARD / tma_info_thread_clks",
-        "MetricGroup": "PortsUtil;TopdownL5;tma_L5_group;tma_issueSO;tma_ports_utilized_0_group",
+        "MetricGroup": "PortsUtil;TopdownL3;tma_L3_group;tma_core_bound_group;tma_issueSO",
         "MetricName": "tma_serializing_operation",
-        "MetricThreshold": "tma_serializing_operation > 0.1 & (tma_ports_utilized_0 > 0.2 & (tma_ports_utilization > 0.15 & (tma_core_bound > 0.1 & tma_backend_bound > 0.2)))",
+        "MetricThreshold": "tma_serializing_operation > 0.1 & (tma_core_bound > 0.1 & tma_backend_bound > 0.2)",
         "PublicDescription": "This metric represents fraction of cycles the CPU issue-pipeline was stalled due to serializing operations. Instructions like CPUID; WRMSR or LFENCE serialize the out-of-order execution which may limit performance. Sample with: RESOURCE_STALLS.SCOREBOARD. Related metrics: tma_ms_switches",
         "ScaleUnit": "100%"
     },
     {
         "BriefDescription": "This metric represents fraction of cycles the CPU was stalled due to PAUSE Instructions",
         "MetricExpr": "37 * MISC_RETIRED.PAUSE_INST / tma_info_thread_clks",
-        "MetricGroup": "TopdownL6;tma_L6_group;tma_serializing_operation_group",
+        "MetricGroup": "TopdownL4;tma_L4_group;tma_serializing_operation_group",
         "MetricName": "tma_slow_pause",
-        "MetricThreshold": "tma_slow_pause > 0.05 & (tma_serializing_operation > 0.1 & (tma_ports_utilized_0 > 0.2 & (tma_ports_utilization > 0.15 & (tma_core_bound > 0.1 & tma_backend_bound > 0.2))))",
+        "MetricThreshold": "tma_slow_pause > 0.05 & (tma_serializing_operation > 0.1 & (tma_core_bound > 0.1 & tma_backend_bound > 0.2))",
         "PublicDescription": "This metric represents fraction of cycles the CPU was stalled due to PAUSE Instructions. Sample with: MISC_RETIRED.PAUSE_INST",
         "ScaleUnit": "100%"
     },
         "MetricGroup": "MemoryBW;Offcore;TopdownL4;tma_L4_group;tma_issueBW;tma_l3_bound_group",
         "MetricName": "tma_sq_full",
         "MetricThreshold": "tma_sq_full > 0.3 & (tma_l3_bound > 0.05 & (tma_memory_bound > 0.2 & tma_backend_bound > 0.2))",
-        "PublicDescription": "This metric measures fraction of cycles where the Super Queue (SQ) was full taking into account all request-types and both hardware SMT threads (Logical Processors). Related metrics: tma_fb_full, tma_info_bottleneck_memory_bandwidth, tma_info_system_dram_bw_use, tma_mem_bandwidth",
+        "PublicDescription": "This metric measures fraction of cycles where the Super Queue (SQ) was full taking into account all request-types and both hardware SMT threads (Logical Processors). Related metrics: tma_fb_full, tma_info_bottleneck_cache_memory_bandwidth, tma_info_system_dram_bw_use, tma_mem_bandwidth",
         "ScaleUnit": "100%"
     },
     {
     {
         "BriefDescription": "This metric represents fraction of cycles the CPU was stalled due to new branch address clears",
         "MetricExpr": "10 * BACLEARS.ANY / tma_info_thread_clks",
-        "MetricGroup": "BigFoot;FetchLat;TopdownL4;tma_L4_group;tma_branch_resteers_group",
+        "MetricGroup": "BigFootprint;FetchLat;TopdownL4;tma_L4_group;tma_branch_resteers_group",
         "MetricName": "tma_unknown_branches",
         "MetricThreshold": "tma_unknown_branches > 0.05 & (tma_branch_resteers > 0.05 & (tma_fetch_latency > 0.1 & tma_frontend_bound > 0.15))",
-        "PublicDescription": "This metric represents fraction of cycles the CPU was stalled due to new branch address clears. These are fetched branches the Branch Prediction Unit was unable to recognize (e.g. first time the branch is fetched or hitting BTB capacity limit). Sample with: BACLEARS.ANY",
+        "PublicDescription": "This metric represents fraction of cycles the CPU was stalled due to new branch address clears. These are fetched branches the Branch Prediction Unit was unable to recognize (e.g. first time the branch is fetched or hitting BTB capacity limit) hence called Unknown Branches. Sample with: BACLEARS.ANY",
         "ScaleUnit": "100%"
     },
     {
index bc6a9a4d27a9562a0de97285ce80fc03160ad583..904d299c95a317e880342e4c1e73be696cee73e1 100644 (file)
@@ -2,10 +2,10 @@
     "Backend": "Grouping from Top-down Microarchitecture Analysis Metrics spreadsheet",
     "Bad": "Grouping from Top-down Microarchitecture Analysis Metrics spreadsheet",
     "BadSpec": "Grouping from Top-down Microarchitecture Analysis Metrics spreadsheet",
-    "BigFoot": "Grouping from Top-down Microarchitecture Analysis Metrics spreadsheet",
+    "BigFootprint": "Grouping from Top-down Microarchitecture Analysis Metrics spreadsheet",
     "BrMispredicts": "Grouping from Top-down Microarchitecture Analysis Metrics spreadsheet",
     "Branches": "Grouping from Top-down Microarchitecture Analysis Metrics spreadsheet",
-    "CacheMisses": "Grouping from Top-down Microarchitecture Analysis Metrics spreadsheet",
+    "CacheHits": "Grouping from Top-down Microarchitecture Analysis Metrics spreadsheet",
     "CodeGen": "Grouping from Top-down Microarchitecture Analysis Metrics spreadsheet",
     "Compute": "Grouping from Top-down Microarchitecture Analysis Metrics spreadsheet",
     "Cor": "Grouping from Top-down Microarchitecture Analysis Metrics spreadsheet",
@@ -26,7 +26,9 @@
     "L2Evicts": "Grouping from Top-down Microarchitecture Analysis Metrics spreadsheet",
     "LSD": "Grouping from Top-down Microarchitecture Analysis Metrics spreadsheet",
     "MachineClears": "Grouping from Top-down Microarchitecture Analysis Metrics spreadsheet",
+    "Machine_Clears": "Grouping from Top-down Microarchitecture Analysis Metrics spreadsheet",
     "Mem": "Grouping from Top-down Microarchitecture Analysis Metrics spreadsheet",
+    "MemOffcore": "Grouping from Top-down Microarchitecture Analysis Metrics spreadsheet",
     "MemoryBW": "Grouping from Top-down Microarchitecture Analysis Metrics spreadsheet",
     "MemoryBound": "Grouping from Top-down Microarchitecture Analysis Metrics spreadsheet",
     "MemoryLat": "Grouping from Top-down Microarchitecture Analysis Metrics spreadsheet",
     "tma_L5_group": "Metrics for top-down breakdown at level 5",
     "tma_L6_group": "Metrics for top-down breakdown at level 6",
     "tma_alu_op_utilization_group": "Metrics contributing to tma_alu_op_utilization category",
+    "tma_assists_group": "Metrics contributing to tma_assists category",
     "tma_backend_bound_group": "Metrics contributing to tma_backend_bound category",
     "tma_bad_speculation_group": "Metrics contributing to tma_bad_speculation category",
+    "tma_branch_mispredicts_group": "Metrics contributing to tma_branch_mispredicts category",
     "tma_branch_resteers_group": "Metrics contributing to tma_branch_resteers category",
     "tma_core_bound_group": "Metrics contributing to tma_core_bound category",
     "tma_dram_bound_group": "Metrics contributing to tma_dram_bound category",
@@ -78,9 +82,9 @@
     "tma_frontend_bound_group": "Metrics contributing to tma_frontend_bound category",
     "tma_heavy_operations_group": "Metrics contributing to tma_heavy_operations category",
     "tma_issue2P": "Metrics related by the issue $issue2P",
-    "tma_issueBC": "Metrics related by the issue $issueBC",
     "tma_issueBM": "Metrics related by the issue $issueBM",
     "tma_issueBW": "Metrics related by the issue $issueBW",
+    "tma_issueComp": "Metrics related by the issue $issueComp",
     "tma_issueD0": "Metrics related by the issue $issueD0",
     "tma_issueFB": "Metrics related by the issue $issueFB",
     "tma_issueFL": "Metrics related by the issue $issueFL",
     "tma_l3_bound_group": "Metrics contributing to tma_l3_bound category",
     "tma_light_operations_group": "Metrics contributing to tma_light_operations category",
     "tma_load_op_utilization_group": "Metrics contributing to tma_load_op_utilization category",
+    "tma_machine_clears_group": "Metrics contributing to tma_machine_clears category",
     "tma_mem_latency_group": "Metrics contributing to tma_mem_latency category",
     "tma_memory_bound_group": "Metrics contributing to tma_memory_bound category",
     "tma_microcode_sequencer_group": "Metrics contributing to tma_microcode_sequencer category",
     "tma_mite_group": "Metrics contributing to tma_mite category",
+    "tma_other_light_ops_group": "Metrics contributing to tma_other_light_ops category",
     "tma_ports_utilization_group": "Metrics contributing to tma_ports_utilization category",
     "tma_ports_utilized_0_group": "Metrics contributing to tma_ports_utilized_0 category",
     "tma_ports_utilized_3m_group": "Metrics contributing to tma_ports_utilized_3m category",
index ee4dac6fc79710ae5781d2e0f3d788b4d31a5cc9..920cab6ffe373200b5bd8ac4a816fc5d1d2a7f9b 100644 (file)
         "EventName": "UNC_P_POWER_STATE_OCCUPANCY.CORES_C0",
         "PerPkg": "1",
         "PublicDescription": "Number of cores in C-State : C0 and C1 : This is an occupancy event that tracks the number of cores that are in the chosen C-State.  It can be used by itself to get the average number of cores in that C-state with thresholding to generate histograms, or with other PCU events and occupancy triggering to capture other details.",
+        "UMask": "0x40",
         "Unit": "PCU"
     },
     {
         "EventName": "UNC_P_POWER_STATE_OCCUPANCY.CORES_C3",
         "PerPkg": "1",
         "PublicDescription": "Number of cores in C-State : C3 : This is an occupancy event that tracks the number of cores that are in the chosen C-State.  It can be used by itself to get the average number of cores in that C-state with thresholding to generate histograms, or with other PCU events and occupancy triggering to capture other details.",
+        "UMask": "0x80",
         "Unit": "PCU"
     },
     {
         "EventName": "UNC_P_POWER_STATE_OCCUPANCY.CORES_C6",
         "PerPkg": "1",
         "PublicDescription": "Number of cores in C-State : C6 and C7 : This is an occupancy event that tracks the number of cores that are in the chosen C-State.  It can be used by itself to get the average number of cores in that C-state with thresholding to generate histograms, or with other PCU events and occupancy triggering to capture other details.",
+        "UMask": "0xc0",
         "Unit": "PCU"
     },
     {
index 33fe555252b21f9cd6ecb22750a0c112e791ad4e..5f3f0b5aebadc734707bb5dbef53c47d8bc7e955 100644 (file)
         "MetricExpr": "(UOPS_DISPATCHED_PORT.PORT_0 + UOPS_DISPATCHED_PORT.PORT_1 + UOPS_DISPATCHED_PORT.PORT_5) / (3 * tma_info_core_core_clks)",
         "MetricGroup": "TopdownL5;tma_L5_group;tma_ports_utilized_3m_group",
         "MetricName": "tma_alu_op_utilization",
-        "MetricThreshold": "tma_alu_op_utilization > 0.6",
+        "MetricThreshold": "tma_alu_op_utilization > 0.4",
         "ScaleUnit": "100%"
     },
     {
         "BriefDescription": "This metric estimates fraction of slots the CPU retired uops delivered by the Microcode_Sequencer as a result of Assists",
-        "MetricExpr": "100 * OTHER_ASSISTS.ANY_WB_ASSIST / tma_info_thread_slots",
+        "MetricExpr": "66 * OTHER_ASSISTS.ANY_WB_ASSIST / tma_info_thread_slots",
         "MetricGroup": "TopdownL4;tma_L4_group;tma_microcode_sequencer_group",
         "MetricName": "tma_assists",
         "MetricThreshold": "tma_assists > 0.1 & (tma_microcode_sequencer > 0.05 & tma_heavy_operations > 0.1)",
         "MetricExpr": "(IDQ.ALL_DSB_CYCLES_ANY_UOPS - IDQ.ALL_DSB_CYCLES_4_UOPS) / tma_info_core_core_clks / 2",
         "MetricGroup": "DSB;FetchBW;TopdownL3;tma_L3_group;tma_fetch_bandwidth_group",
         "MetricName": "tma_dsb",
-        "MetricThreshold": "tma_dsb > 0.15 & (tma_fetch_bandwidth > 0.1 & tma_frontend_bound > 0.15 & tma_info_thread_ipc / 4 > 0.35)",
+        "MetricThreshold": "tma_dsb > 0.15 & tma_fetch_bandwidth > 0.2",
         "PublicDescription": "This metric represents Core fraction of cycles in which CPU was likely limited due to DSB (decoded uop cache) fetch pipeline.  For example; inefficient utilization of the DSB cache structure or bank conflict when reading from it; are categorized here.",
         "ScaleUnit": "100%"
     },
         "MetricExpr": "tma_frontend_bound - tma_fetch_latency",
         "MetricGroup": "FetchBW;Frontend;TmaL2;TopdownL2;tma_L2_group;tma_frontend_bound_group;tma_issueFB",
         "MetricName": "tma_fetch_bandwidth",
-        "MetricThreshold": "tma_fetch_bandwidth > 0.1 & tma_frontend_bound > 0.15 & tma_info_thread_ipc / 4 > 0.35",
+        "MetricThreshold": "tma_fetch_bandwidth > 0.2",
         "MetricgroupNoGroup": "TopdownL2",
         "PublicDescription": "This metric represents fraction of slots the CPU was stalled due to Frontend bandwidth issues.  For example; inefficiencies at the instruction decoders; or restrictions for caching in the DSB (decoded uops cache) are categorized under Fetch Bandwidth. In such cases; the Frontend typically delivers suboptimal amount of uops to the Backend. Related metrics: tma_dsb_switches, tma_info_frontend_dsb_coverage, tma_info_inst_mix_iptb, tma_lcp",
         "ScaleUnit": "100%"
         "MetricGroup": "Compute;Flops;TopdownL4;tma_L4_group;tma_fp_arith_group;tma_issue2P",
         "MetricName": "tma_fp_scalar",
         "MetricThreshold": "tma_fp_scalar > 0.1 & (tma_fp_arith > 0.2 & tma_light_operations > 0.6)",
-        "PublicDescription": "This metric approximates arithmetic floating-point (FP) scalar uops fraction the CPU has retired. May overcount due to FMA double counting. Related metrics: tma_fp_vector, tma_fp_vector_512b, tma_port_0, tma_port_1, tma_port_5, tma_port_6, tma_ports_utilized_2",
+        "PublicDescription": "This metric approximates arithmetic floating-point (FP) scalar uops fraction the CPU has retired. May overcount due to FMA double counting. Related metrics: tma_fp_vector, tma_fp_vector_128b, tma_fp_vector_256b, tma_fp_vector_512b, tma_port_0, tma_port_1, tma_port_5, tma_port_6, tma_ports_utilized_2",
         "ScaleUnit": "100%"
     },
     {
         "MetricGroup": "Compute;Flops;TopdownL4;tma_L4_group;tma_fp_arith_group;tma_issue2P",
         "MetricName": "tma_fp_vector",
         "MetricThreshold": "tma_fp_vector > 0.1 & (tma_fp_arith > 0.2 & tma_light_operations > 0.6)",
-        "PublicDescription": "This metric approximates arithmetic floating-point (FP) vector uops fraction the CPU has retired aggregated across all vector widths. May overcount due to FMA double counting. Related metrics: tma_fp_scalar, tma_fp_vector_512b, tma_port_0, tma_port_1, tma_port_5, tma_port_6, tma_ports_utilized_2",
+        "PublicDescription": "This metric approximates arithmetic floating-point (FP) vector uops fraction the CPU has retired aggregated across all vector widths. May overcount due to FMA double counting. Related metrics: tma_fp_scalar, tma_fp_vector_128b, tma_fp_vector_256b, tma_fp_vector_512b, tma_port_0, tma_port_1, tma_port_5, tma_port_6, tma_ports_utilized_2",
+        "ScaleUnit": "100%"
+    },
+    {
+        "BriefDescription": "This metric approximates arithmetic FP vector uops fraction the CPU has retired for 128-bit wide vectors",
+        "MetricExpr": "(FP_COMP_OPS_EXE.SSE_SCALAR_DOUBLE + FP_COMP_OPS_EXE.SSE_PACKED_DOUBLE) / UOPS_EXECUTED.THREAD",
+        "MetricGroup": "Compute;Flops;TopdownL5;tma_L5_group;tma_fp_vector_group;tma_issue2P",
+        "MetricName": "tma_fp_vector_128b",
+        "MetricThreshold": "tma_fp_vector_128b > 0.1 & (tma_fp_vector > 0.1 & (tma_fp_arith > 0.2 & tma_light_operations > 0.6))",
+        "PublicDescription": "This metric approximates arithmetic FP vector uops fraction the CPU has retired for 128-bit wide vectors. May overcount due to FMA double counting. Related metrics: tma_fp_scalar, tma_fp_vector, tma_fp_vector_256b, tma_fp_vector_512b, tma_port_0, tma_port_1, tma_port_5, tma_port_6, tma_ports_utilized_2",
+        "ScaleUnit": "100%"
+    },
+    {
+        "BriefDescription": "This metric approximates arithmetic FP vector uops fraction the CPU has retired for 256-bit wide vectors",
+        "MetricExpr": "(SIMD_FP_256.PACKED_DOUBLE + SIMD_FP_256.PACKED_SINGLE) / UOPS_EXECUTED.THREAD",
+        "MetricGroup": "Compute;Flops;TopdownL5;tma_L5_group;tma_fp_vector_group;tma_issue2P",
+        "MetricName": "tma_fp_vector_256b",
+        "MetricThreshold": "tma_fp_vector_256b > 0.1 & (tma_fp_vector > 0.1 & (tma_fp_arith > 0.2 & tma_light_operations > 0.6))",
+        "PublicDescription": "This metric approximates arithmetic FP vector uops fraction the CPU has retired for 256-bit wide vectors. May overcount due to FMA double counting. Related metrics: tma_fp_scalar, tma_fp_vector, tma_fp_vector_128b, tma_fp_vector_512b, tma_port_0, tma_port_1, tma_port_5, tma_port_6, tma_ports_utilized_2",
         "ScaleUnit": "100%"
     },
     {
         "MetricName": "tma_heavy_operations",
         "MetricThreshold": "tma_heavy_operations > 0.1",
         "MetricgroupNoGroup": "TopdownL2",
-        "PublicDescription": "This metric represents fraction of slots where the CPU was retiring heavy-weight operations -- instructions that require two or more uops or micro-coded sequences. This highly-correlates with the uop length of these instructions/sequences.",
+        "PublicDescription": "This metric represents fraction of slots where the CPU was retiring heavy-weight operations -- instructions that require two or more uops or micro-coded sequences. This highly-correlates with the uop length of these instructions/sequences. ([ICL+] Note this may overcount due to approximation using indirect events; [ADL+] .)",
         "ScaleUnit": "100%"
     },
     {
         "BriefDescription": "This metric represents fraction of cycles the CPU was stalled due to instruction cache misses.",
         "MetricExpr": "ICACHE.IFETCH_STALL / tma_info_thread_clks - tma_itlb_misses",
-        "MetricGroup": "BigFoot;FetchLat;IcMiss;TopdownL3;tma_L3_group;tma_fetch_latency_group",
+        "MetricGroup": "BigFootprint;FetchLat;IcMiss;TopdownL3;tma_L3_group;tma_fetch_latency_group",
         "MetricName": "tma_icache_misses",
         "MetricThreshold": "tma_icache_misses > 0.05 & (tma_fetch_latency > 0.1 & tma_frontend_bound > 0.15)",
         "ScaleUnit": "100%"
     },
     {
         "BriefDescription": "Instructions per retired mispredicts for indirect CALL or JMP branches (lower number means higher occurrence rate).",
-        "MetricExpr": "tma_info_inst_mix_instructions / (UOPS_RETIRED.RETIRE_SLOTS / UOPS_ISSUED.ANY * cpu@BR_MISP_EXEC.ALL_BRANCHES\\,umask\\=0xE4@)",
+        "MetricExpr": "tma_info_inst_mix_instructions / (UOPS_RETIRED.RETIRE_SLOTS / UOPS_ISSUED.ANY * BR_MISP_EXEC.INDIRECT)",
         "MetricGroup": "Bad;BrMispredicts",
         "MetricName": "tma_info_bad_spec_ipmisp_indirect",
         "MetricThreshold": "tma_info_bad_spec_ipmisp_indirect < 1e3"
         "MetricName": "tma_info_core_flopc"
     },
     {
-        "BriefDescription": "Instruction-Level-Parallelism (average number of uops executed when there is execution) per-core",
-        "MetricExpr": "UOPS_EXECUTED.THREAD / (cpu@UOPS_EXECUTED.CORE\\,cmask\\=1@ / 2 if #SMT_on else UOPS_EXECUTED.CYCLES_GE_1_UOP_EXEC)",
+        "BriefDescription": "Instruction-Level-Parallelism (average number of uops executed when there is execution) per thread (logical-processor)",
+        "MetricExpr": "UOPS_EXECUTED.THREAD / cpu@UOPS_EXECUTED.THREAD\\,cmask\\=1@",
         "MetricGroup": "Backend;Cor;Pipeline;PortsUtil",
         "MetricName": "tma_info_core_ilp"
     },
         "MetricGroup": "Flops;InsType",
         "MetricName": "tma_info_inst_mix_iparith",
         "MetricThreshold": "tma_info_inst_mix_iparith < 10",
-        "PublicDescription": "Instructions per FP Arithmetic instruction (lower number means higher occurrence rate). May undercount due to FMA double counting. Approximated prior to BDW."
+        "PublicDescription": "Instructions per FP Arithmetic instruction (lower number means higher occurrence rate). Values < 1 are possible due to intentional FMA double counting. Approximated prior to BDW."
     },
     {
         "BriefDescription": "Instructions per Branch (lower number means higher occurrence rate)",
     },
     {
         "BriefDescription": "Average per-core data fill bandwidth to the L1 data cache [GB / sec]",
-        "MetricExpr": "64 * L1D.REPLACEMENT / 1e9 / duration_time",
+        "MetricExpr": "tma_info_memory_l1d_cache_fill_bw",
         "MetricGroup": "Mem;MemoryBW",
-        "MetricName": "tma_info_memory_core_l1d_cache_fill_bw"
+        "MetricName": "tma_info_memory_core_l1d_cache_fill_bw_2t"
     },
     {
         "BriefDescription": "Average per-core data fill bandwidth to the L2 cache [GB / sec]",
-        "MetricExpr": "64 * L2_LINES_IN.ALL / 1e9 / duration_time",
+        "MetricExpr": "tma_info_memory_l2_cache_fill_bw",
         "MetricGroup": "Mem;MemoryBW",
-        "MetricName": "tma_info_memory_core_l2_cache_fill_bw"
+        "MetricName": "tma_info_memory_core_l2_cache_fill_bw_2t"
     },
     {
         "BriefDescription": "Average per-core data fill bandwidth to the L3 cache [GB / sec]",
-        "MetricExpr": "64 * LONGEST_LAT_CACHE.MISS / 1e9 / duration_time",
+        "MetricExpr": "tma_info_memory_l3_cache_fill_bw",
+        "MetricGroup": "Mem;MemoryBW",
+        "MetricName": "tma_info_memory_core_l3_cache_fill_bw_2t"
+    },
+    {
+        "BriefDescription": "",
+        "MetricExpr": "64 * L1D.REPLACEMENT / 1e9 / duration_time",
         "MetricGroup": "Mem;MemoryBW",
-        "MetricName": "tma_info_memory_core_l3_cache_fill_bw"
+        "MetricName": "tma_info_memory_l1d_cache_fill_bw"
     },
     {
         "BriefDescription": "L1 cache true misses per kilo instruction for retired demand loads",
         "MetricExpr": "1e3 * MEM_LOAD_UOPS_RETIRED.L1_MISS / INST_RETIRED.ANY",
-        "MetricGroup": "CacheMisses;Mem",
+        "MetricGroup": "CacheHits;Mem",
         "MetricName": "tma_info_memory_l1mpki"
     },
+    {
+        "BriefDescription": "",
+        "MetricExpr": "64 * L2_LINES_IN.ALL / 1e9 / duration_time",
+        "MetricGroup": "Mem;MemoryBW",
+        "MetricName": "tma_info_memory_l2_cache_fill_bw"
+    },
     {
         "BriefDescription": "L2 cache true misses per kilo instruction for retired demand loads",
         "MetricExpr": "1e3 * MEM_LOAD_UOPS_RETIRED.L2_MISS / INST_RETIRED.ANY",
-        "MetricGroup": "Backend;CacheMisses;Mem",
+        "MetricGroup": "Backend;CacheHits;Mem",
         "MetricName": "tma_info_memory_l2mpki"
     },
+    {
+        "BriefDescription": "",
+        "MetricExpr": "64 * LONGEST_LAT_CACHE.MISS / 1e9 / duration_time",
+        "MetricGroup": "Mem;MemoryBW",
+        "MetricName": "tma_info_memory_l3_cache_fill_bw"
+    },
     {
         "BriefDescription": "L3 cache true misses per kilo instruction for retired demand loads",
         "MetricExpr": "1e3 * MEM_LOAD_UOPS_RETIRED.LLC_MISS / INST_RETIRED.ANY",
-        "MetricGroup": "CacheMisses;Mem",
+        "MetricGroup": "Mem",
         "MetricName": "tma_info_memory_l3mpki"
     },
-    {
-        "BriefDescription": "Actual Average Latency for L1 data-cache miss demand load operations (in core cycles)",
-        "MetricConstraint": "NO_GROUP_EVENTS",
-        "MetricExpr": "L1D_PEND_MISS.PENDING / (MEM_LOAD_UOPS_RETIRED.L1_MISS + MEM_LOAD_UOPS_RETIRED.HIT_LFB)",
-        "MetricGroup": "Mem;MemoryBound;MemoryLat",
-        "MetricName": "tma_info_memory_load_miss_real_latency"
-    },
-    {
-        "BriefDescription": "Memory-Level-Parallelism (average number of L1 miss demand load when there is at least one such miss",
-        "MetricConstraint": "NO_GROUP_EVENTS",
-        "MetricExpr": "L1D_PEND_MISS.PENDING / L1D_PEND_MISS.PENDING_CYCLES",
-        "MetricGroup": "Mem;MemoryBW;MemoryBound",
-        "MetricName": "tma_info_memory_mlp",
-        "PublicDescription": "Memory-Level-Parallelism (average number of L1 miss demand load when there is at least one such miss. Per-Logical Processor)"
-    },
     {
         "BriefDescription": "Average Parallel L2 cache miss data reads",
         "MetricExpr": "OFFCORE_REQUESTS_OUTSTANDING.ALL_DATA_RD / OFFCORE_REQUESTS_OUTSTANDING.CYCLES_WITH_DATA_RD",
         "MetricGroup": "Memory_BW;Offcore",
-        "MetricName": "tma_info_memory_oro_data_l2_mlp"
+        "MetricName": "tma_info_memory_latency_data_l2_mlp"
     },
     {
         "BriefDescription": "Average Latency for L2 cache miss demand Loads",
         "MetricExpr": "OFFCORE_REQUESTS_OUTSTANDING.DEMAND_DATA_RD / OFFCORE_REQUESTS.DEMAND_DATA_RD",
         "MetricGroup": "Memory_Lat;Offcore",
-        "MetricName": "tma_info_memory_oro_load_l2_miss_latency"
+        "MetricName": "tma_info_memory_latency_load_l2_miss_latency"
     },
     {
         "BriefDescription": "Average Parallel L2 cache miss demand Loads",
         "MetricExpr": "OFFCORE_REQUESTS_OUTSTANDING.DEMAND_DATA_RD / OFFCORE_REQUESTS_OUTSTANDING.CYCLES_WITH_DEMAND_DATA_RD",
         "MetricGroup": "Memory_BW;Offcore",
-        "MetricName": "tma_info_memory_oro_load_l2_mlp"
-    },
-    {
-        "BriefDescription": "Average per-thread data fill bandwidth to the L1 data cache [GB / sec]",
-        "MetricExpr": "tma_info_memory_core_l1d_cache_fill_bw",
-        "MetricGroup": "Mem;MemoryBW",
-        "MetricName": "tma_info_memory_thread_l1d_cache_fill_bw_1t"
-    },
-    {
-        "BriefDescription": "Average per-thread data fill bandwidth to the L2 cache [GB / sec]",
-        "MetricExpr": "tma_info_memory_core_l2_cache_fill_bw",
-        "MetricGroup": "Mem;MemoryBW",
-        "MetricName": "tma_info_memory_thread_l2_cache_fill_bw_1t"
+        "MetricName": "tma_info_memory_latency_load_l2_mlp"
     },
     {
-        "BriefDescription": "Average per-thread data access bandwidth to the L3 cache [GB / sec]",
-        "MetricExpr": "0",
-        "MetricGroup": "Mem;MemoryBW;Offcore",
-        "MetricName": "tma_info_memory_thread_l3_cache_access_bw_1t"
+        "BriefDescription": "Actual Average Latency for L1 data-cache miss demand load operations (in core cycles)",
+        "MetricConstraint": "NO_GROUP_EVENTS",
+        "MetricExpr": "L1D_PEND_MISS.PENDING / (MEM_LOAD_UOPS_RETIRED.L1_MISS + MEM_LOAD_UOPS_RETIRED.HIT_LFB)",
+        "MetricGroup": "Mem;MemoryBound;MemoryLat",
+        "MetricName": "tma_info_memory_load_miss_real_latency"
     },
     {
-        "BriefDescription": "Average per-thread data fill bandwidth to the L3 cache [GB / sec]",
-        "MetricExpr": "tma_info_memory_core_l3_cache_fill_bw",
-        "MetricGroup": "Mem;MemoryBW",
-        "MetricName": "tma_info_memory_thread_l3_cache_fill_bw_1t"
+        "BriefDescription": "Memory-Level-Parallelism (average number of L1 miss demand load when there is at least one such miss",
+        "MetricConstraint": "NO_GROUP_EVENTS",
+        "MetricExpr": "L1D_PEND_MISS.PENDING / L1D_PEND_MISS.PENDING_CYCLES",
+        "MetricGroup": "Mem;MemoryBW;MemoryBound",
+        "MetricName": "tma_info_memory_mlp",
+        "PublicDescription": "Memory-Level-Parallelism (average number of L1 miss demand load when there is at least one such miss. Per-Logical Processor)"
     },
     {
         "BriefDescription": "Utilization of the core's Page Walker(s) serving STLB misses triggered by instruction/Load/Store accesses",
         "MetricThreshold": "tma_info_memory_tlb_page_walks_utilization > 0.5"
     },
     {
-        "BriefDescription": "Instruction-Level-Parallelism (average number of uops executed when there is execution) per-thread",
-        "MetricExpr": "UOPS_EXECUTED.THREAD / cpu@UOPS_EXECUTED.THREAD\\,cmask\\=1@",
+        "BriefDescription": "",
+        "MetricExpr": "UOPS_EXECUTED.THREAD / (cpu@UOPS_EXECUTED.CORE\\,cmask\\=1@ / 2 if #SMT_on else UOPS_EXECUTED.CYCLES_GE_1_UOP_EXEC)",
         "MetricGroup": "Cor;Pipeline;PortsUtil;SMT",
         "MetricName": "tma_info_pipeline_execute"
     },
         "MetricName": "tma_info_pipeline_retire"
     },
     {
-        "BriefDescription": "Measured Average Frequency for unhalted processors [GHz]",
+        "BriefDescription": "Measured Average Core Frequency for unhalted processors [GHz]",
         "MetricExpr": "tma_info_system_turbo_utilization * TSC / 1e9 / duration_time",
         "MetricGroup": "Power;Summary",
-        "MetricName": "tma_info_system_average_frequency"
+        "MetricName": "tma_info_system_core_frequency"
     },
     {
-        "BriefDescription": "Average CPU Utilization",
+        "BriefDescription": "Average CPU Utilization (percentage)",
         "MetricExpr": "CPU_CLK_UNHALTED.REF_TSC / TSC",
         "MetricGroup": "HPC;Summary",
         "MetricName": "tma_info_system_cpu_utilization"
     },
+    {
+        "BriefDescription": "Average number of utilized CPUs",
+        "MetricExpr": "#num_cpus_online * tma_info_system_cpu_utilization",
+        "MetricGroup": "Summary",
+        "MetricName": "tma_info_system_cpus_utilized"
+    },
     {
         "BriefDescription": "Average external Memory Bandwidth Use for reads and writes [GB / sec]",
         "MetricExpr": "64 * (UNC_ARB_TRK_REQUESTS.ALL + UNC_ARB_COH_TRK_REQUESTS.ALL) / 1e6 / duration_time / 1e3",
-        "MetricGroup": "HPC;Mem;MemoryBW;SoC;tma_issueBW",
+        "MetricGroup": "HPC;MemOffcore;MemoryBW;SoC;tma_issueBW",
         "MetricName": "tma_info_system_dram_bw_use",
         "PublicDescription": "Average external Memory Bandwidth Use for reads and writes [GB / sec]. Related metrics: tma_fb_full, tma_mem_bandwidth, tma_sq_full"
     },
         "MetricExpr": "(FP_COMP_OPS_EXE.SSE_SCALAR_SINGLE + FP_COMP_OPS_EXE.SSE_SCALAR_DOUBLE + 2 * FP_COMP_OPS_EXE.SSE_PACKED_DOUBLE + 4 * (FP_COMP_OPS_EXE.SSE_PACKED_SINGLE + SIMD_FP_256.PACKED_DOUBLE) + 8 * SIMD_FP_256.PACKED_SINGLE) / 1e9 / duration_time",
         "MetricGroup": "Cor;Flops;HPC",
         "MetricName": "tma_info_system_gflops",
-        "PublicDescription": "Giga Floating Point Operations Per Second. Aggregate across all supported options of: FP precisions, scalar and vector instructions, vector-width and AMX engine."
+        "PublicDescription": "Giga Floating Point Operations Per Second. Aggregate across all supported options of: FP precisions, scalar and vector instructions, vector-width"
     },
     {
         "BriefDescription": "Instructions per Far Branch ( Far Branches apply upon transition from application to operating system, handling interrupts, exceptions) [lower number means higher occurrence rate]",
         "MetricName": "tma_info_system_kernel_utilization",
         "MetricThreshold": "tma_info_system_kernel_utilization > 0.05"
     },
-    {
-        "BriefDescription": "Average number of parallel requests to external memory",
-        "MetricExpr": "UNC_ARB_TRK_OCCUPANCY.ALL / UNC_ARB_TRK_OCCUPANCY.CYCLES_WITH_ANY_REQUEST",
-        "MetricGroup": "Mem;SoC",
-        "MetricName": "tma_info_system_mem_parallel_requests",
-        "PublicDescription": "Average number of parallel requests to external memory. Accounts for all requests"
-    },
-    {
-        "BriefDescription": "Average latency of all requests to external memory (in Uncore cycles)",
-        "MetricExpr": "UNC_ARB_TRK_OCCUPANCY.ALL / UNC_ARB_TRK_REQUESTS.ALL",
-        "MetricGroup": "Mem;SoC",
-        "MetricName": "tma_info_system_mem_request_latency"
-    },
     {
         "BriefDescription": "Fraction of cycles where both hardware Logical Processors were active",
         "MetricExpr": "(1 - CPU_CLK_UNHALTED.ONE_THREAD_ACTIVE / (CPU_CLK_UNHALTED.REF_XCLK_ANY / 2) if #SMT_on else 0)",
     {
         "BriefDescription": "This metric represents fraction of cycles the CPU was stalled due to Instruction TLB (ITLB) misses",
         "MetricExpr": "(12 * ITLB_MISSES.STLB_HIT + ITLB_MISSES.WALK_DURATION) / tma_info_thread_clks",
-        "MetricGroup": "BigFoot;FetchLat;MemoryTLB;TopdownL3;tma_L3_group;tma_fetch_latency_group",
+        "MetricGroup": "BigFootprint;FetchLat;MemoryTLB;TopdownL3;tma_L3_group;tma_fetch_latency_group",
         "MetricName": "tma_itlb_misses",
         "MetricThreshold": "tma_itlb_misses > 0.05 & (tma_fetch_latency > 0.1 & tma_frontend_bound > 0.15)",
         "PublicDescription": "This metric represents fraction of cycles the CPU was stalled due to Instruction TLB (ITLB) misses. Sample with: ITLB_MISSES.WALK_COMPLETED",
     {
         "BriefDescription": "This metric estimates how often the CPU was stalled without loads missing the L1 data cache",
         "MetricExpr": "max((min(CPU_CLK_UNHALTED.THREAD, CYCLE_ACTIVITY.STALLS_LDM_PENDING) - CYCLE_ACTIVITY.STALLS_L1D_PENDING) / tma_info_thread_clks, 0)",
-        "MetricGroup": "CacheMisses;MemoryBound;TmaL3mem;TopdownL3;tma_L3_group;tma_issueL1;tma_issueMC;tma_memory_bound_group",
+        "MetricGroup": "CacheHits;MemoryBound;TmaL3mem;TopdownL3;tma_L3_group;tma_issueL1;tma_issueMC;tma_memory_bound_group",
         "MetricName": "tma_l1_bound",
         "MetricThreshold": "tma_l1_bound > 0.1 & (tma_memory_bound > 0.2 & tma_backend_bound > 0.2)",
         "PublicDescription": "This metric estimates how often the CPU was stalled without loads missing the L1 data cache.  The L1 data cache typically has the shortest latency.  However; in certain cases like loads blocked on older stores; a load might suffer due to high latency even though it is being satisfied by the L1. Another example is loads who miss in the TLB. These cases are characterized by execution unit stalls; while some non-completed demand load lives in the machine without having that demand load missing the L1 cache. Sample with: MEM_LOAD_UOPS_RETIRED.L1_HIT_PS;MEM_LOAD_UOPS_RETIRED.HIT_LFB_PS. Related metrics: tma_clears_resteers, tma_machine_clears, tma_microcode_sequencer, tma_ms_switches, tma_ports_utilized_1",
     {
         "BriefDescription": "This metric estimates how often the CPU was stalled due to L2 cache accesses by loads",
         "MetricExpr": "(CYCLE_ACTIVITY.STALLS_L1D_PENDING - CYCLE_ACTIVITY.STALLS_L2_PENDING) / tma_info_thread_clks",
-        "MetricGroup": "CacheMisses;MemoryBound;TmaL3mem;TopdownL3;tma_L3_group;tma_memory_bound_group",
+        "MetricGroup": "CacheHits;MemoryBound;TmaL3mem;TopdownL3;tma_L3_group;tma_memory_bound_group",
         "MetricName": "tma_l2_bound",
         "MetricThreshold": "tma_l2_bound > 0.05 & (tma_memory_bound > 0.2 & tma_backend_bound > 0.2)",
         "PublicDescription": "This metric estimates how often the CPU was stalled due to L2 cache accesses by loads.  Avoiding cache misses (i.e. L1 misses/L2 hits) can improve the latency and increase performance. Sample with: MEM_LOAD_UOPS_RETIRED.L2_HIT_PS",
         "BriefDescription": "This metric estimates how often the CPU was stalled due to loads accesses to L3 cache or contended with a sibling Core",
         "MetricConstraint": "NO_GROUP_EVENTS_SMT",
         "MetricExpr": "MEM_LOAD_UOPS_RETIRED.LLC_HIT / (MEM_LOAD_UOPS_RETIRED.LLC_HIT + 7 * MEM_LOAD_UOPS_RETIRED.LLC_MISS) * CYCLE_ACTIVITY.STALLS_L2_PENDING / tma_info_thread_clks",
-        "MetricGroup": "CacheMisses;MemoryBound;TmaL3mem;TopdownL3;tma_L3_group;tma_memory_bound_group",
+        "MetricGroup": "CacheHits;MemoryBound;TmaL3mem;TopdownL3;tma_L3_group;tma_memory_bound_group",
         "MetricName": "tma_l3_bound",
         "MetricThreshold": "tma_l3_bound > 0.05 & (tma_memory_bound > 0.2 & tma_backend_bound > 0.2)",
         "PublicDescription": "This metric estimates how often the CPU was stalled due to loads accesses to L3 cache or contended with a sibling Core.  Avoiding cache misses (i.e. L2 misses/L3 hits) can improve the latency and increase performance. Sample with: MEM_LOAD_UOPS_RETIRED.L3_HIT_PS",
         "ScaleUnit": "100%"
     },
     {
-        "BriefDescription": "This metric represents fraction of cycles with demand load accesses that hit the L3 cache under unloaded scenarios (possibly L3 latency limited)",
+        "BriefDescription": "This metric estimates fraction of cycles with demand load accesses that hit the L3 cache under unloaded scenarios (possibly L3 latency limited)",
         "MetricConstraint": "NO_GROUP_EVENTS",
         "MetricExpr": "29 * (MEM_LOAD_UOPS_RETIRED.LLC_HIT * (1 + MEM_LOAD_UOPS_RETIRED.HIT_LFB / (MEM_LOAD_UOPS_RETIRED.L2_HIT + MEM_LOAD_UOPS_RETIRED.LLC_HIT + MEM_LOAD_UOPS_LLC_HIT_RETIRED.XSNP_HIT + MEM_LOAD_UOPS_LLC_HIT_RETIRED.XSNP_HITM + MEM_LOAD_UOPS_LLC_HIT_RETIRED.XSNP_MISS + MEM_LOAD_UOPS_RETIRED.LLC_MISS))) / tma_info_thread_clks",
         "MetricGroup": "MemoryLat;TopdownL4;tma_L4_group;tma_issueLat;tma_l3_bound_group",
         "MetricName": "tma_l3_hit_latency",
         "MetricThreshold": "tma_l3_hit_latency > 0.1 & (tma_l3_bound > 0.05 & (tma_memory_bound > 0.2 & tma_backend_bound > 0.2))",
-        "PublicDescription": "This metric represents fraction of cycles with demand load accesses that hit the L3 cache under unloaded scenarios (possibly L3 latency limited).  Avoiding private cache misses (i.e. L2 misses/L3 hits) will improve the latency; reduce contention with sibling physical cores and increase performance.  Note the value of this node may overlap with its siblings. Sample with: MEM_LOAD_UOPS_RETIRED.L3_HIT_PS. Related metrics: tma_mem_latency",
+        "PublicDescription": "This metric estimates fraction of cycles with demand load accesses that hit the L3 cache under unloaded scenarios (possibly L3 latency limited).  Avoiding private cache misses (i.e. L2 misses/L3 hits) will improve the latency; reduce contention with sibling physical cores and increase performance.  Note the value of this node may overlap with its siblings. Sample with: MEM_LOAD_UOPS_RETIRED.L3_HIT_PS. Related metrics: tma_mem_latency",
         "ScaleUnit": "100%"
     },
     {
         "MetricName": "tma_light_operations",
         "MetricThreshold": "tma_light_operations > 0.6",
         "MetricgroupNoGroup": "TopdownL2",
-        "PublicDescription": "This metric represents fraction of slots where the CPU was retiring light-weight operations -- instructions that require no more than one uop (micro-operation). This correlates with total number of instructions used by the program. A uops-per-instruction (see UopPI metric) ratio of 1 or less should be expected for decently optimized software running on Intel Core/Xeon products. While this often indicates efficient X86 instructions were executed; high value does not necessarily mean better performance cannot be achieved. Sample with: INST_RETIRED.PREC_DIST",
+        "PublicDescription": "This metric represents fraction of slots where the CPU was retiring light-weight operations -- instructions that require no more than one uop (micro-operation). This correlates with total number of instructions used by the program. A uops-per-instruction (see UopPI metric) ratio of 1 or less should be expected for decently optimized code running on Intel Core/Xeon products. While this often indicates efficient X86 instructions were executed; high value does not necessarily mean better performance cannot be achieved. ([ICL+] Note this may undercount due to approximation using indirect events; [ADL+] .). Sample with: INST_RETIRED.PREC_DIST",
         "ScaleUnit": "100%"
     },
     {
         "ScaleUnit": "100%"
     },
     {
-        "BriefDescription": "This metric estimates fraction of cycles where the core's performance was likely hurt due to approaching bandwidth limits of external memory (DRAM)",
+        "BriefDescription": "This metric estimates fraction of cycles where the core's performance was likely hurt due to approaching bandwidth limits of external memory - DRAM ([SPR-HBM] and/or HBM)",
         "MetricExpr": "min(CPU_CLK_UNHALTED.THREAD, cpu@OFFCORE_REQUESTS_OUTSTANDING.ALL_DATA_RD\\,cmask\\=6@) / tma_info_thread_clks",
         "MetricGroup": "MemoryBW;Offcore;TopdownL4;tma_L4_group;tma_dram_bound_group;tma_issueBW",
         "MetricName": "tma_mem_bandwidth",
         "MetricThreshold": "tma_mem_bandwidth > 0.2 & (tma_dram_bound > 0.1 & (tma_memory_bound > 0.2 & tma_backend_bound > 0.2))",
-        "PublicDescription": "This metric estimates fraction of cycles where the core's performance was likely hurt due to approaching bandwidth limits of external memory (DRAM).  The underlying heuristic assumes that a similar off-core traffic is generated by all IA cores. This metric does not aggregate non-data-read requests by this logical processor; requests from other IA Logical Processors/Physical Cores/sockets; or other non-IA devices like GPU; hence the maximum external memory bandwidth limits may or may not be approached when this metric is flagged (see Uncore counters for that). Related metrics: tma_fb_full, tma_info_system_dram_bw_use, tma_sq_full",
+        "PublicDescription": "This metric estimates fraction of cycles where the core's performance was likely hurt due to approaching bandwidth limits of external memory - DRAM ([SPR-HBM] and/or HBM).  The underlying heuristic assumes that a similar off-core traffic is generated by all IA cores. This metric does not aggregate non-data-read requests by this logical processor; requests from other IA Logical Processors/Physical Cores/sockets; or other non-IA devices like GPU; hence the maximum external memory bandwidth limits may or may not be approached when this metric is flagged (see Uncore counters for that). Related metrics: tma_fb_full, tma_info_system_dram_bw_use, tma_sq_full",
         "ScaleUnit": "100%"
     },
     {
-        "BriefDescription": "This metric estimates fraction of cycles where the performance was likely hurt due to latency from external memory (DRAM)",
+        "BriefDescription": "This metric estimates fraction of cycles where the performance was likely hurt due to latency from external memory - DRAM ([SPR-HBM] and/or HBM)",
         "MetricExpr": "min(CPU_CLK_UNHALTED.THREAD, OFFCORE_REQUESTS_OUTSTANDING.CYCLES_WITH_DATA_RD) / tma_info_thread_clks - tma_mem_bandwidth",
         "MetricGroup": "MemoryLat;Offcore;TopdownL4;tma_L4_group;tma_dram_bound_group;tma_issueLat",
         "MetricName": "tma_mem_latency",
         "MetricThreshold": "tma_mem_latency > 0.1 & (tma_dram_bound > 0.1 & (tma_memory_bound > 0.2 & tma_backend_bound > 0.2))",
-        "PublicDescription": "This metric estimates fraction of cycles where the performance was likely hurt due to latency from external memory (DRAM).  This metric does not aggregate requests from other Logical Processors/Physical Cores/sockets (see Uncore counters for that). Related metrics: tma_l3_hit_latency",
+        "PublicDescription": "This metric estimates fraction of cycles where the performance was likely hurt due to latency from external memory - DRAM ([SPR-HBM] and/or HBM).  This metric does not aggregate requests from other Logical Processors/Physical Cores/sockets (see Uncore counters for that). Related metrics: tma_l3_hit_latency",
         "ScaleUnit": "100%"
     },
     {
         "MetricExpr": "(IDQ.ALL_MITE_CYCLES_ANY_UOPS - IDQ.ALL_MITE_CYCLES_4_UOPS) / tma_info_core_core_clks / 2",
         "MetricGroup": "DSBmiss;FetchBW;TopdownL3;tma_L3_group;tma_fetch_bandwidth_group",
         "MetricName": "tma_mite",
-        "MetricThreshold": "tma_mite > 0.1 & (tma_fetch_bandwidth > 0.1 & tma_frontend_bound > 0.15 & tma_info_thread_ipc / 4 > 0.35)",
+        "MetricThreshold": "tma_mite > 0.1 & tma_fetch_bandwidth > 0.2",
         "PublicDescription": "This metric represents Core fraction of cycles in which CPU was likely limited due to the MITE pipeline (the legacy decode pipeline). This pipeline is used for code that was not pre-cached in the DSB or LSD. For example; inefficiencies due to asymmetric decoders; use of long immediate or LCP can manifest as MITE fetch bandwidth bottleneck.",
         "ScaleUnit": "100%"
     },
         "MetricGroup": "Compute;TopdownL6;tma_L6_group;tma_alu_op_utilization_group;tma_issue2P",
         "MetricName": "tma_port_0",
         "MetricThreshold": "tma_port_0 > 0.6",
-        "PublicDescription": "This metric represents Core fraction of cycles CPU dispatched uops on execution port 0 ([SNB+] ALU; [HSW+] ALU and 2nd branch). Sample with: UOPS_DISPATCHED_PORT.PORT_0. Related metrics: tma_fp_scalar, tma_fp_vector, tma_fp_vector_512b, tma_port_1, tma_port_5, tma_port_6, tma_ports_utilized_2",
+        "PublicDescription": "This metric represents Core fraction of cycles CPU dispatched uops on execution port 0 ([SNB+] ALU; [HSW+] ALU and 2nd branch). Sample with: UOPS_DISPATCHED_PORT.PORT_0. Related metrics: tma_fp_scalar, tma_fp_vector, tma_fp_vector_128b, tma_fp_vector_256b, tma_fp_vector_512b, tma_port_1, tma_port_5, tma_port_6, tma_ports_utilized_2",
         "ScaleUnit": "100%"
     },
     {
         "MetricGroup": "TopdownL6;tma_L6_group;tma_alu_op_utilization_group;tma_issue2P",
         "MetricName": "tma_port_1",
         "MetricThreshold": "tma_port_1 > 0.6",
-        "PublicDescription": "This metric represents Core fraction of cycles CPU dispatched uops on execution port 1 (ALU). Sample with: UOPS_DISPATCHED_PORT.PORT_1. Related metrics: tma_fp_scalar, tma_fp_vector, tma_fp_vector_512b, tma_port_0, tma_port_5, tma_port_6, tma_ports_utilized_2",
+        "PublicDescription": "This metric represents Core fraction of cycles CPU dispatched uops on execution port 1 (ALU). Sample with: UOPS_DISPATCHED_PORT.PORT_1. Related metrics: tma_fp_scalar, tma_fp_vector, tma_fp_vector_128b, tma_fp_vector_256b, tma_fp_vector_512b, tma_port_0, tma_port_5, tma_port_6, tma_ports_utilized_2",
         "ScaleUnit": "100%"
     },
     {
         "MetricGroup": "TopdownL6;tma_L6_group;tma_alu_op_utilization_group;tma_issue2P",
         "MetricName": "tma_port_5",
         "MetricThreshold": "tma_port_5 > 0.6",
-        "PublicDescription": "This metric represents Core fraction of cycles CPU dispatched uops on execution port 5 ([SNB+] Branches and ALU; [HSW+] ALU). Sample with: UOPS_DISPATCHED.PORT_5. Related metrics: tma_fp_scalar, tma_fp_vector, tma_fp_vector_512b, tma_port_0, tma_port_1, tma_port_6, tma_ports_utilized_2",
+        "PublicDescription": "This metric represents Core fraction of cycles CPU dispatched uops on execution port 5 ([SNB+] Branches and ALU; [HSW+] ALU). Sample with: UOPS_DISPATCHED.PORT_5. Related metrics: tma_fp_scalar, tma_fp_vector, tma_fp_vector_128b, tma_fp_vector_256b, tma_fp_vector_512b, tma_port_0, tma_port_1, tma_port_6, tma_ports_utilized_2",
         "ScaleUnit": "100%"
     },
     {
         "MetricGroup": "PortsUtil;TopdownL4;tma_L4_group;tma_issue2P;tma_ports_utilization_group",
         "MetricName": "tma_ports_utilized_2",
         "MetricThreshold": "tma_ports_utilized_2 > 0.15 & (tma_ports_utilization > 0.15 & (tma_core_bound > 0.1 & tma_backend_bound > 0.2))",
-        "PublicDescription": "This metric represents fraction of cycles CPU executed total of 2 uops per cycle on all execution ports (Logical Processor cycles since ICL, Physical Core cycles otherwise).  Loop Vectorization -most compilers feature auto-Vectorization options today- reduces pressure on the execution ports as multiple elements are calculated with same uop. Related metrics: tma_fp_scalar, tma_fp_vector, tma_fp_vector_512b, tma_port_0, tma_port_1, tma_port_5, tma_port_6",
+        "PublicDescription": "This metric represents fraction of cycles CPU executed total of 2 uops per cycle on all execution ports (Logical Processor cycles since ICL, Physical Core cycles otherwise).  Loop Vectorization -most compilers feature auto-Vectorization options today- reduces pressure on the execution ports as multiple elements are calculated with same uop. Related metrics: tma_fp_scalar, tma_fp_vector, tma_fp_vector_128b, tma_fp_vector_256b, tma_fp_vector_512b, tma_port_0, tma_port_1, tma_port_5, tma_port_6",
         "ScaleUnit": "100%"
     },
     {
         "MetricExpr": "(cpu@UOPS_EXECUTED.CORE\\,cmask\\=3@ / 2 if #SMT_on else UOPS_EXECUTED.CYCLES_GE_3_UOPS_EXEC) / tma_info_core_core_clks",
         "MetricGroup": "PortsUtil;TopdownL4;tma_L4_group;tma_ports_utilization_group",
         "MetricName": "tma_ports_utilized_3m",
-        "MetricThreshold": "tma_ports_utilized_3m > 0.7 & (tma_ports_utilization > 0.15 & (tma_core_bound > 0.1 & tma_backend_bound > 0.2))",
+        "MetricThreshold": "tma_ports_utilized_3m > 0.4 & (tma_ports_utilization > 0.15 & (tma_core_bound > 0.1 & tma_backend_bound > 0.2))",
         "ScaleUnit": "100%"
     },
     {
index f6a0258e3241236b97c48a6098503f40bd12e5dd..8c808347f6da4e8382af6a780730636fadcdb914 100644 (file)
@@ -2,10 +2,10 @@
     "Backend": "Grouping from Top-down Microarchitecture Analysis Metrics spreadsheet",
     "Bad": "Grouping from Top-down Microarchitecture Analysis Metrics spreadsheet",
     "BadSpec": "Grouping from Top-down Microarchitecture Analysis Metrics spreadsheet",
-    "BigFoot": "Grouping from Top-down Microarchitecture Analysis Metrics spreadsheet",
+    "BigFootprint": "Grouping from Top-down Microarchitecture Analysis Metrics spreadsheet",
     "BrMispredicts": "Grouping from Top-down Microarchitecture Analysis Metrics spreadsheet",
     "Branches": "Grouping from Top-down Microarchitecture Analysis Metrics spreadsheet",
-    "CacheMisses": "Grouping from Top-down Microarchitecture Analysis Metrics spreadsheet",
+    "CacheHits": "Grouping from Top-down Microarchitecture Analysis Metrics spreadsheet",
     "Compute": "Grouping from Top-down Microarchitecture Analysis Metrics spreadsheet",
     "Cor": "Grouping from Top-down Microarchitecture Analysis Metrics spreadsheet",
     "DSB": "Grouping from Top-down Microarchitecture Analysis Metrics spreadsheet",
@@ -24,7 +24,9 @@
     "L2Evicts": "Grouping from Top-down Microarchitecture Analysis Metrics spreadsheet",
     "LSD": "Grouping from Top-down Microarchitecture Analysis Metrics spreadsheet",
     "MachineClears": "Grouping from Top-down Microarchitecture Analysis Metrics spreadsheet",
+    "Machine_Clears": "Grouping from Top-down Microarchitecture Analysis Metrics spreadsheet",
     "Mem": "Grouping from Top-down Microarchitecture Analysis Metrics spreadsheet",
+    "MemOffcore": "Grouping from Top-down Microarchitecture Analysis Metrics spreadsheet",
     "MemoryBW": "Grouping from Top-down Microarchitecture Analysis Metrics spreadsheet",
     "MemoryBound": "Grouping from Top-down Microarchitecture Analysis Metrics spreadsheet",
     "MemoryLat": "Grouping from Top-down Microarchitecture Analysis Metrics spreadsheet",
@@ -94,6 +96,7 @@
     "tma_l3_bound_group": "Metrics contributing to tma_l3_bound category",
     "tma_light_operations_group": "Metrics contributing to tma_light_operations category",
     "tma_load_op_utilization_group": "Metrics contributing to tma_load_op_utilization category",
+    "tma_machine_clears_group": "Metrics contributing to tma_machine_clears category",
     "tma_mem_latency_group": "Metrics contributing to tma_mem_latency category",
     "tma_memory_bound_group": "Metrics contributing to tma_memory_bound category",
     "tma_microcode_sequencer_group": "Metrics contributing to tma_microcode_sequencer category",
index f5e46a768fdd820957854f573b0ae64661d7a83c..e6f5b05a71b50f1fc2e8a93f03860e95b8c9f20d 100644 (file)
         "MetricExpr": "(UOPS_DISPATCHED_PORT.PORT_0 + UOPS_DISPATCHED_PORT.PORT_1 + UOPS_DISPATCHED_PORT.PORT_5) / (3 * tma_info_core_core_clks)",
         "MetricGroup": "TopdownL5;tma_L5_group;tma_ports_utilized_3m_group",
         "MetricName": "tma_alu_op_utilization",
-        "MetricThreshold": "tma_alu_op_utilization > 0.6",
+        "MetricThreshold": "tma_alu_op_utilization > 0.4",
         "ScaleUnit": "100%"
     },
     {
         "BriefDescription": "This metric estimates fraction of slots the CPU retired uops delivered by the Microcode_Sequencer as a result of Assists",
-        "MetricExpr": "100 * OTHER_ASSISTS.ANY_WB_ASSIST / tma_info_thread_slots",
+        "MetricExpr": "66 * OTHER_ASSISTS.ANY_WB_ASSIST / tma_info_thread_slots",
         "MetricGroup": "TopdownL4;tma_L4_group;tma_microcode_sequencer_group",
         "MetricName": "tma_assists",
         "MetricThreshold": "tma_assists > 0.1 & (tma_microcode_sequencer > 0.05 & tma_heavy_operations > 0.1)",
         "MetricExpr": "(IDQ.ALL_DSB_CYCLES_ANY_UOPS - IDQ.ALL_DSB_CYCLES_4_UOPS) / tma_info_core_core_clks / 2",
         "MetricGroup": "DSB;FetchBW;TopdownL3;tma_L3_group;tma_fetch_bandwidth_group",
         "MetricName": "tma_dsb",
-        "MetricThreshold": "tma_dsb > 0.15 & (tma_fetch_bandwidth > 0.1 & tma_frontend_bound > 0.15 & tma_info_thread_ipc / 4 > 0.35)",
+        "MetricThreshold": "tma_dsb > 0.15 & tma_fetch_bandwidth > 0.2",
         "PublicDescription": "This metric represents Core fraction of cycles in which CPU was likely limited due to DSB (decoded uop cache) fetch pipeline.  For example; inefficient utilization of the DSB cache structure or bank conflict when reading from it; are categorized here.",
         "ScaleUnit": "100%"
     },
         "MetricExpr": "tma_frontend_bound - tma_fetch_latency",
         "MetricGroup": "FetchBW;Frontend;TmaL2;TopdownL2;tma_L2_group;tma_frontend_bound_group;tma_issueFB",
         "MetricName": "tma_fetch_bandwidth",
-        "MetricThreshold": "tma_fetch_bandwidth > 0.1 & tma_frontend_bound > 0.15 & tma_info_thread_ipc / 4 > 0.35",
+        "MetricThreshold": "tma_fetch_bandwidth > 0.2",
         "MetricgroupNoGroup": "TopdownL2",
         "PublicDescription": "This metric represents fraction of slots the CPU was stalled due to Frontend bandwidth issues.  For example; inefficiencies at the instruction decoders; or restrictions for caching in the DSB (decoded uops cache) are categorized under Fetch Bandwidth. In such cases; the Frontend typically delivers suboptimal amount of uops to the Backend. Related metrics: tma_dsb_switches, tma_info_frontend_dsb_coverage, tma_info_inst_mix_iptb, tma_lcp",
         "ScaleUnit": "100%"
         "MetricGroup": "Compute;Flops;TopdownL4;tma_L4_group;tma_fp_arith_group;tma_issue2P",
         "MetricName": "tma_fp_scalar",
         "MetricThreshold": "tma_fp_scalar > 0.1 & (tma_fp_arith > 0.2 & tma_light_operations > 0.6)",
-        "PublicDescription": "This metric approximates arithmetic floating-point (FP) scalar uops fraction the CPU has retired. May overcount due to FMA double counting. Related metrics: tma_fp_vector, tma_fp_vector_512b, tma_port_0, tma_port_1, tma_port_5, tma_port_6, tma_ports_utilized_2",
+        "PublicDescription": "This metric approximates arithmetic floating-point (FP) scalar uops fraction the CPU has retired. May overcount due to FMA double counting. Related metrics: tma_fp_vector, tma_fp_vector_128b, tma_fp_vector_256b, tma_fp_vector_512b, tma_port_0, tma_port_1, tma_port_5, tma_port_6, tma_ports_utilized_2",
         "ScaleUnit": "100%"
     },
     {
         "MetricGroup": "Compute;Flops;TopdownL4;tma_L4_group;tma_fp_arith_group;tma_issue2P",
         "MetricName": "tma_fp_vector",
         "MetricThreshold": "tma_fp_vector > 0.1 & (tma_fp_arith > 0.2 & tma_light_operations > 0.6)",
-        "PublicDescription": "This metric approximates arithmetic floating-point (FP) vector uops fraction the CPU has retired aggregated across all vector widths. May overcount due to FMA double counting. Related metrics: tma_fp_scalar, tma_fp_vector_512b, tma_port_0, tma_port_1, tma_port_5, tma_port_6, tma_ports_utilized_2",
+        "PublicDescription": "This metric approximates arithmetic floating-point (FP) vector uops fraction the CPU has retired aggregated across all vector widths. May overcount due to FMA double counting. Related metrics: tma_fp_scalar, tma_fp_vector_128b, tma_fp_vector_256b, tma_fp_vector_512b, tma_port_0, tma_port_1, tma_port_5, tma_port_6, tma_ports_utilized_2",
+        "ScaleUnit": "100%"
+    },
+    {
+        "BriefDescription": "This metric approximates arithmetic FP vector uops fraction the CPU has retired for 128-bit wide vectors",
+        "MetricExpr": "(FP_COMP_OPS_EXE.SSE_SCALAR_DOUBLE + FP_COMP_OPS_EXE.SSE_PACKED_DOUBLE) / UOPS_EXECUTED.THREAD",
+        "MetricGroup": "Compute;Flops;TopdownL5;tma_L5_group;tma_fp_vector_group;tma_issue2P",
+        "MetricName": "tma_fp_vector_128b",
+        "MetricThreshold": "tma_fp_vector_128b > 0.1 & (tma_fp_vector > 0.1 & (tma_fp_arith > 0.2 & tma_light_operations > 0.6))",
+        "PublicDescription": "This metric approximates arithmetic FP vector uops fraction the CPU has retired for 128-bit wide vectors. May overcount due to FMA double counting. Related metrics: tma_fp_scalar, tma_fp_vector, tma_fp_vector_256b, tma_fp_vector_512b, tma_port_0, tma_port_1, tma_port_5, tma_port_6, tma_ports_utilized_2",
+        "ScaleUnit": "100%"
+    },
+    {
+        "BriefDescription": "This metric approximates arithmetic FP vector uops fraction the CPU has retired for 256-bit wide vectors",
+        "MetricExpr": "(SIMD_FP_256.PACKED_DOUBLE + SIMD_FP_256.PACKED_SINGLE) / UOPS_EXECUTED.THREAD",
+        "MetricGroup": "Compute;Flops;TopdownL5;tma_L5_group;tma_fp_vector_group;tma_issue2P",
+        "MetricName": "tma_fp_vector_256b",
+        "MetricThreshold": "tma_fp_vector_256b > 0.1 & (tma_fp_vector > 0.1 & (tma_fp_arith > 0.2 & tma_light_operations > 0.6))",
+        "PublicDescription": "This metric approximates arithmetic FP vector uops fraction the CPU has retired for 256-bit wide vectors. May overcount due to FMA double counting. Related metrics: tma_fp_scalar, tma_fp_vector, tma_fp_vector_128b, tma_fp_vector_512b, tma_port_0, tma_port_1, tma_port_5, tma_port_6, tma_ports_utilized_2",
         "ScaleUnit": "100%"
     },
     {
         "MetricName": "tma_heavy_operations",
         "MetricThreshold": "tma_heavy_operations > 0.1",
         "MetricgroupNoGroup": "TopdownL2",
-        "PublicDescription": "This metric represents fraction of slots where the CPU was retiring heavy-weight operations -- instructions that require two or more uops or micro-coded sequences. This highly-correlates with the uop length of these instructions/sequences.",
+        "PublicDescription": "This metric represents fraction of slots where the CPU was retiring heavy-weight operations -- instructions that require two or more uops or micro-coded sequences. This highly-correlates with the uop length of these instructions/sequences. ([ICL+] Note this may overcount due to approximation using indirect events; [ADL+] .)",
         "ScaleUnit": "100%"
     },
     {
         "BriefDescription": "This metric represents fraction of cycles the CPU was stalled due to instruction cache misses.",
         "MetricExpr": "ICACHE.IFETCH_STALL / tma_info_thread_clks - tma_itlb_misses",
-        "MetricGroup": "BigFoot;FetchLat;IcMiss;TopdownL3;tma_L3_group;tma_fetch_latency_group",
+        "MetricGroup": "BigFootprint;FetchLat;IcMiss;TopdownL3;tma_L3_group;tma_fetch_latency_group",
         "MetricName": "tma_icache_misses",
         "MetricThreshold": "tma_icache_misses > 0.05 & (tma_fetch_latency > 0.1 & tma_frontend_bound > 0.15)",
         "ScaleUnit": "100%"
     },
     {
         "BriefDescription": "Instructions per retired mispredicts for indirect CALL or JMP branches (lower number means higher occurrence rate).",
-        "MetricExpr": "tma_info_inst_mix_instructions / (UOPS_RETIRED.RETIRE_SLOTS / UOPS_ISSUED.ANY * cpu@BR_MISP_EXEC.ALL_BRANCHES\\,umask\\=0xE4@)",
+        "MetricExpr": "tma_info_inst_mix_instructions / (UOPS_RETIRED.RETIRE_SLOTS / UOPS_ISSUED.ANY * BR_MISP_EXEC.INDIRECT)",
         "MetricGroup": "Bad;BrMispredicts",
         "MetricName": "tma_info_bad_spec_ipmisp_indirect",
         "MetricThreshold": "tma_info_bad_spec_ipmisp_indirect < 1e3"
         "MetricName": "tma_info_core_flopc"
     },
     {
-        "BriefDescription": "Instruction-Level-Parallelism (average number of uops executed when there is execution) per-core",
-        "MetricExpr": "UOPS_EXECUTED.THREAD / (cpu@UOPS_EXECUTED.CORE\\,cmask\\=1@ / 2 if #SMT_on else UOPS_EXECUTED.CYCLES_GE_1_UOP_EXEC)",
+        "BriefDescription": "Instruction-Level-Parallelism (average number of uops executed when there is execution) per thread (logical-processor)",
+        "MetricExpr": "UOPS_EXECUTED.THREAD / cpu@UOPS_EXECUTED.THREAD\\,cmask\\=1@",
         "MetricGroup": "Backend;Cor;Pipeline;PortsUtil",
         "MetricName": "tma_info_core_ilp"
     },
         "MetricGroup": "Flops;InsType",
         "MetricName": "tma_info_inst_mix_iparith",
         "MetricThreshold": "tma_info_inst_mix_iparith < 10",
-        "PublicDescription": "Instructions per FP Arithmetic instruction (lower number means higher occurrence rate). May undercount due to FMA double counting. Approximated prior to BDW."
+        "PublicDescription": "Instructions per FP Arithmetic instruction (lower number means higher occurrence rate). Values < 1 are possible due to intentional FMA double counting. Approximated prior to BDW."
     },
     {
         "BriefDescription": "Instructions per Branch (lower number means higher occurrence rate)",
     },
     {
         "BriefDescription": "Average per-core data fill bandwidth to the L1 data cache [GB / sec]",
-        "MetricExpr": "64 * L1D.REPLACEMENT / 1e9 / duration_time",
+        "MetricExpr": "tma_info_memory_l1d_cache_fill_bw",
         "MetricGroup": "Mem;MemoryBW",
-        "MetricName": "tma_info_memory_core_l1d_cache_fill_bw"
+        "MetricName": "tma_info_memory_core_l1d_cache_fill_bw_2t"
     },
     {
         "BriefDescription": "Average per-core data fill bandwidth to the L2 cache [GB / sec]",
-        "MetricExpr": "64 * L2_LINES_IN.ALL / 1e9 / duration_time",
+        "MetricExpr": "tma_info_memory_l2_cache_fill_bw",
         "MetricGroup": "Mem;MemoryBW",
-        "MetricName": "tma_info_memory_core_l2_cache_fill_bw"
+        "MetricName": "tma_info_memory_core_l2_cache_fill_bw_2t"
     },
     {
         "BriefDescription": "Average per-core data fill bandwidth to the L3 cache [GB / sec]",
-        "MetricExpr": "64 * LONGEST_LAT_CACHE.MISS / 1e9 / duration_time",
+        "MetricExpr": "tma_info_memory_l3_cache_fill_bw",
+        "MetricGroup": "Mem;MemoryBW",
+        "MetricName": "tma_info_memory_core_l3_cache_fill_bw_2t"
+    },
+    {
+        "BriefDescription": "",
+        "MetricExpr": "64 * L1D.REPLACEMENT / 1e9 / duration_time",
         "MetricGroup": "Mem;MemoryBW",
-        "MetricName": "tma_info_memory_core_l3_cache_fill_bw"
+        "MetricName": "tma_info_memory_l1d_cache_fill_bw"
     },
     {
         "BriefDescription": "L1 cache true misses per kilo instruction for retired demand loads",
         "MetricExpr": "1e3 * MEM_LOAD_UOPS_RETIRED.L1_MISS / INST_RETIRED.ANY",
-        "MetricGroup": "CacheMisses;Mem",
+        "MetricGroup": "CacheHits;Mem",
         "MetricName": "tma_info_memory_l1mpki"
     },
+    {
+        "BriefDescription": "",
+        "MetricExpr": "64 * L2_LINES_IN.ALL / 1e9 / duration_time",
+        "MetricGroup": "Mem;MemoryBW",
+        "MetricName": "tma_info_memory_l2_cache_fill_bw"
+    },
     {
         "BriefDescription": "L2 cache true misses per kilo instruction for retired demand loads",
         "MetricExpr": "1e3 * MEM_LOAD_UOPS_RETIRED.L2_MISS / INST_RETIRED.ANY",
-        "MetricGroup": "Backend;CacheMisses;Mem",
+        "MetricGroup": "Backend;CacheHits;Mem",
         "MetricName": "tma_info_memory_l2mpki"
     },
+    {
+        "BriefDescription": "",
+        "MetricExpr": "64 * LONGEST_LAT_CACHE.MISS / 1e9 / duration_time",
+        "MetricGroup": "Mem;MemoryBW",
+        "MetricName": "tma_info_memory_l3_cache_fill_bw"
+    },
     {
         "BriefDescription": "L3 cache true misses per kilo instruction for retired demand loads",
         "MetricExpr": "1e3 * MEM_LOAD_UOPS_RETIRED.LLC_MISS / INST_RETIRED.ANY",
-        "MetricGroup": "CacheMisses;Mem",
+        "MetricGroup": "Mem",
         "MetricName": "tma_info_memory_l3mpki"
     },
-    {
-        "BriefDescription": "Actual Average Latency for L1 data-cache miss demand load operations (in core cycles)",
-        "MetricConstraint": "NO_GROUP_EVENTS",
-        "MetricExpr": "L1D_PEND_MISS.PENDING / (MEM_LOAD_UOPS_RETIRED.L1_MISS + MEM_LOAD_UOPS_RETIRED.HIT_LFB)",
-        "MetricGroup": "Mem;MemoryBound;MemoryLat",
-        "MetricName": "tma_info_memory_load_miss_real_latency"
-    },
-    {
-        "BriefDescription": "Memory-Level-Parallelism (average number of L1 miss demand load when there is at least one such miss",
-        "MetricConstraint": "NO_GROUP_EVENTS",
-        "MetricExpr": "L1D_PEND_MISS.PENDING / L1D_PEND_MISS.PENDING_CYCLES",
-        "MetricGroup": "Mem;MemoryBW;MemoryBound",
-        "MetricName": "tma_info_memory_mlp",
-        "PublicDescription": "Memory-Level-Parallelism (average number of L1 miss demand load when there is at least one such miss. Per-Logical Processor)"
-    },
     {
         "BriefDescription": "Average Parallel L2 cache miss data reads",
         "MetricExpr": "OFFCORE_REQUESTS_OUTSTANDING.ALL_DATA_RD / OFFCORE_REQUESTS_OUTSTANDING.CYCLES_WITH_DATA_RD",
         "MetricGroup": "Memory_BW;Offcore",
-        "MetricName": "tma_info_memory_oro_data_l2_mlp"
+        "MetricName": "tma_info_memory_latency_data_l2_mlp"
     },
     {
         "BriefDescription": "Average Latency for L2 cache miss demand Loads",
         "MetricExpr": "OFFCORE_REQUESTS_OUTSTANDING.DEMAND_DATA_RD / OFFCORE_REQUESTS.DEMAND_DATA_RD",
         "MetricGroup": "Memory_Lat;Offcore",
-        "MetricName": "tma_info_memory_oro_load_l2_miss_latency"
+        "MetricName": "tma_info_memory_latency_load_l2_miss_latency"
     },
     {
         "BriefDescription": "Average Parallel L2 cache miss demand Loads",
         "MetricExpr": "OFFCORE_REQUESTS_OUTSTANDING.DEMAND_DATA_RD / OFFCORE_REQUESTS_OUTSTANDING.CYCLES_WITH_DEMAND_DATA_RD",
         "MetricGroup": "Memory_BW;Offcore",
-        "MetricName": "tma_info_memory_oro_load_l2_mlp"
-    },
-    {
-        "BriefDescription": "Average per-thread data fill bandwidth to the L1 data cache [GB / sec]",
-        "MetricExpr": "tma_info_memory_core_l1d_cache_fill_bw",
-        "MetricGroup": "Mem;MemoryBW",
-        "MetricName": "tma_info_memory_thread_l1d_cache_fill_bw_1t"
+        "MetricName": "tma_info_memory_latency_load_l2_mlp"
     },
     {
-        "BriefDescription": "Average per-thread data fill bandwidth to the L2 cache [GB / sec]",
-        "MetricExpr": "tma_info_memory_core_l2_cache_fill_bw",
-        "MetricGroup": "Mem;MemoryBW",
-        "MetricName": "tma_info_memory_thread_l2_cache_fill_bw_1t"
-    },
-    {
-        "BriefDescription": "Average per-thread data access bandwidth to the L3 cache [GB / sec]",
-        "MetricExpr": "0",
-        "MetricGroup": "Mem;MemoryBW;Offcore",
-        "MetricName": "tma_info_memory_thread_l3_cache_access_bw_1t"
+        "BriefDescription": "Actual Average Latency for L1 data-cache miss demand load operations (in core cycles)",
+        "MetricConstraint": "NO_GROUP_EVENTS",
+        "MetricExpr": "L1D_PEND_MISS.PENDING / (MEM_LOAD_UOPS_RETIRED.L1_MISS + MEM_LOAD_UOPS_RETIRED.HIT_LFB)",
+        "MetricGroup": "Mem;MemoryBound;MemoryLat",
+        "MetricName": "tma_info_memory_load_miss_real_latency"
     },
     {
-        "BriefDescription": "Average per-thread data fill bandwidth to the L3 cache [GB / sec]",
-        "MetricExpr": "tma_info_memory_core_l3_cache_fill_bw",
-        "MetricGroup": "Mem;MemoryBW",
-        "MetricName": "tma_info_memory_thread_l3_cache_fill_bw_1t"
+        "BriefDescription": "Memory-Level-Parallelism (average number of L1 miss demand load when there is at least one such miss",
+        "MetricConstraint": "NO_GROUP_EVENTS",
+        "MetricExpr": "L1D_PEND_MISS.PENDING / L1D_PEND_MISS.PENDING_CYCLES",
+        "MetricGroup": "Mem;MemoryBW;MemoryBound",
+        "MetricName": "tma_info_memory_mlp",
+        "PublicDescription": "Memory-Level-Parallelism (average number of L1 miss demand load when there is at least one such miss. Per-Logical Processor)"
     },
     {
         "BriefDescription": "Utilization of the core's Page Walker(s) serving STLB misses triggered by instruction/Load/Store accesses",
         "MetricThreshold": "tma_info_memory_tlb_page_walks_utilization > 0.5"
     },
     {
-        "BriefDescription": "Instruction-Level-Parallelism (average number of uops executed when there is execution) per-thread",
-        "MetricExpr": "UOPS_EXECUTED.THREAD / cpu@UOPS_EXECUTED.THREAD\\,cmask\\=1@",
+        "BriefDescription": "",
+        "MetricExpr": "UOPS_EXECUTED.THREAD / (cpu@UOPS_EXECUTED.CORE\\,cmask\\=1@ / 2 if #SMT_on else UOPS_EXECUTED.CYCLES_GE_1_UOP_EXEC)",
         "MetricGroup": "Cor;Pipeline;PortsUtil;SMT",
         "MetricName": "tma_info_pipeline_execute"
     },
         "MetricName": "tma_info_pipeline_retire"
     },
     {
-        "BriefDescription": "Measured Average Frequency for unhalted processors [GHz]",
+        "BriefDescription": "Measured Average Core Frequency for unhalted processors [GHz]",
         "MetricExpr": "tma_info_system_turbo_utilization * TSC / 1e9 / duration_time",
         "MetricGroup": "Power;Summary",
-        "MetricName": "tma_info_system_average_frequency"
+        "MetricName": "tma_info_system_core_frequency"
     },
     {
-        "BriefDescription": "Average CPU Utilization",
+        "BriefDescription": "Average CPU Utilization (percentage)",
         "MetricExpr": "CPU_CLK_UNHALTED.REF_TSC / TSC",
         "MetricGroup": "HPC;Summary",
         "MetricName": "tma_info_system_cpu_utilization"
     },
+    {
+        "BriefDescription": "Average number of utilized CPUs",
+        "MetricExpr": "#num_cpus_online * tma_info_system_cpu_utilization",
+        "MetricGroup": "Summary",
+        "MetricName": "tma_info_system_cpus_utilized"
+    },
     {
         "BriefDescription": "Average external Memory Bandwidth Use for reads and writes [GB / sec]",
         "MetricExpr": "64 * (UNC_M_CAS_COUNT.RD + UNC_M_CAS_COUNT.WR) / 1e9 / duration_time",
-        "MetricGroup": "HPC;Mem;MemoryBW;SoC;tma_issueBW",
+        "MetricGroup": "HPC;MemOffcore;MemoryBW;SoC;tma_issueBW",
         "MetricName": "tma_info_system_dram_bw_use",
         "PublicDescription": "Average external Memory Bandwidth Use for reads and writes [GB / sec]. Related metrics: tma_fb_full, tma_mem_bandwidth, tma_sq_full"
     },
         "MetricExpr": "(FP_COMP_OPS_EXE.SSE_SCALAR_SINGLE + FP_COMP_OPS_EXE.SSE_SCALAR_DOUBLE + 2 * FP_COMP_OPS_EXE.SSE_PACKED_DOUBLE + 4 * (FP_COMP_OPS_EXE.SSE_PACKED_SINGLE + SIMD_FP_256.PACKED_DOUBLE) + 8 * SIMD_FP_256.PACKED_SINGLE) / 1e9 / duration_time",
         "MetricGroup": "Cor;Flops;HPC",
         "MetricName": "tma_info_system_gflops",
-        "PublicDescription": "Giga Floating Point Operations Per Second. Aggregate across all supported options of: FP precisions, scalar and vector instructions, vector-width and AMX engine."
+        "PublicDescription": "Giga Floating Point Operations Per Second. Aggregate across all supported options of: FP precisions, scalar and vector instructions, vector-width"
     },
     {
         "BriefDescription": "Instructions per Far Branch ( Far Branches apply upon transition from application to operating system, handling interrupts, exceptions) [lower number means higher occurrence rate]",
         "MetricGroup": "Power",
         "MetricName": "tma_info_system_turbo_utilization"
     },
+    {
+        "BriefDescription": "Measured Average Uncore Frequency for the SoC [GHz]",
+        "MetricExpr": "tma_info_system_socket_clks / 1e9 / duration_time",
+        "MetricGroup": "SoC",
+        "MetricName": "tma_info_system_uncore_frequency"
+    },
     {
         "BriefDescription": "Per-Logical Processor actual clocks when the Logical Processor is active.",
         "MetricExpr": "CPU_CLK_UNHALTED.THREAD",
     {
         "BriefDescription": "This metric represents fraction of cycles the CPU was stalled due to Instruction TLB (ITLB) misses",
         "MetricExpr": "(12 * ITLB_MISSES.STLB_HIT + ITLB_MISSES.WALK_DURATION) / tma_info_thread_clks",
-        "MetricGroup": "BigFoot;FetchLat;MemoryTLB;TopdownL3;tma_L3_group;tma_fetch_latency_group",
+        "MetricGroup": "BigFootprint;FetchLat;MemoryTLB;TopdownL3;tma_L3_group;tma_fetch_latency_group",
         "MetricName": "tma_itlb_misses",
         "MetricThreshold": "tma_itlb_misses > 0.05 & (tma_fetch_latency > 0.1 & tma_frontend_bound > 0.15)",
         "PublicDescription": "This metric represents fraction of cycles the CPU was stalled due to Instruction TLB (ITLB) misses. Sample with: ITLB_MISSES.WALK_COMPLETED",
     {
         "BriefDescription": "This metric estimates how often the CPU was stalled without loads missing the L1 data cache",
         "MetricExpr": "max((min(CPU_CLK_UNHALTED.THREAD, CYCLE_ACTIVITY.STALLS_LDM_PENDING) - CYCLE_ACTIVITY.STALLS_L1D_PENDING) / tma_info_thread_clks, 0)",
-        "MetricGroup": "CacheMisses;MemoryBound;TmaL3mem;TopdownL3;tma_L3_group;tma_issueL1;tma_issueMC;tma_memory_bound_group",
+        "MetricGroup": "CacheHits;MemoryBound;TmaL3mem;TopdownL3;tma_L3_group;tma_issueL1;tma_issueMC;tma_memory_bound_group",
         "MetricName": "tma_l1_bound",
         "MetricThreshold": "tma_l1_bound > 0.1 & (tma_memory_bound > 0.2 & tma_backend_bound > 0.2)",
         "PublicDescription": "This metric estimates how often the CPU was stalled without loads missing the L1 data cache.  The L1 data cache typically has the shortest latency.  However; in certain cases like loads blocked on older stores; a load might suffer due to high latency even though it is being satisfied by the L1. Another example is loads who miss in the TLB. These cases are characterized by execution unit stalls; while some non-completed demand load lives in the machine without having that demand load missing the L1 cache. Sample with: MEM_LOAD_UOPS_RETIRED.L1_HIT_PS;MEM_LOAD_UOPS_RETIRED.HIT_LFB_PS. Related metrics: tma_clears_resteers, tma_machine_clears, tma_microcode_sequencer, tma_ms_switches, tma_ports_utilized_1",
     {
         "BriefDescription": "This metric estimates how often the CPU was stalled due to L2 cache accesses by loads",
         "MetricExpr": "(CYCLE_ACTIVITY.STALLS_L1D_PENDING - CYCLE_ACTIVITY.STALLS_L2_PENDING) / tma_info_thread_clks",
-        "MetricGroup": "CacheMisses;MemoryBound;TmaL3mem;TopdownL3;tma_L3_group;tma_memory_bound_group",
+        "MetricGroup": "CacheHits;MemoryBound;TmaL3mem;TopdownL3;tma_L3_group;tma_memory_bound_group",
         "MetricName": "tma_l2_bound",
         "MetricThreshold": "tma_l2_bound > 0.05 & (tma_memory_bound > 0.2 & tma_backend_bound > 0.2)",
         "PublicDescription": "This metric estimates how often the CPU was stalled due to L2 cache accesses by loads.  Avoiding cache misses (i.e. L1 misses/L2 hits) can improve the latency and increase performance. Sample with: MEM_LOAD_UOPS_RETIRED.L2_HIT_PS",
         "BriefDescription": "This metric estimates how often the CPU was stalled due to loads accesses to L3 cache or contended with a sibling Core",
         "MetricConstraint": "NO_GROUP_EVENTS_SMT",
         "MetricExpr": "MEM_LOAD_UOPS_RETIRED.LLC_HIT / (MEM_LOAD_UOPS_RETIRED.LLC_HIT + 7 * MEM_LOAD_UOPS_RETIRED.LLC_MISS) * CYCLE_ACTIVITY.STALLS_L2_PENDING / tma_info_thread_clks",
-        "MetricGroup": "CacheMisses;MemoryBound;TmaL3mem;TopdownL3;tma_L3_group;tma_memory_bound_group",
+        "MetricGroup": "CacheHits;MemoryBound;TmaL3mem;TopdownL3;tma_L3_group;tma_memory_bound_group",
         "MetricName": "tma_l3_bound",
         "MetricThreshold": "tma_l3_bound > 0.05 & (tma_memory_bound > 0.2 & tma_backend_bound > 0.2)",
         "PublicDescription": "This metric estimates how often the CPU was stalled due to loads accesses to L3 cache or contended with a sibling Core.  Avoiding cache misses (i.e. L2 misses/L3 hits) can improve the latency and increase performance. Sample with: MEM_LOAD_UOPS_RETIRED.L3_HIT_PS",
         "ScaleUnit": "100%"
     },
     {
-        "BriefDescription": "This metric represents fraction of cycles with demand load accesses that hit the L3 cache under unloaded scenarios (possibly L3 latency limited)",
+        "BriefDescription": "This metric estimates fraction of cycles with demand load accesses that hit the L3 cache under unloaded scenarios (possibly L3 latency limited)",
         "MetricConstraint": "NO_GROUP_EVENTS",
         "MetricExpr": "41 * (MEM_LOAD_UOPS_RETIRED.LLC_HIT * (1 + MEM_LOAD_UOPS_RETIRED.HIT_LFB / (MEM_LOAD_UOPS_RETIRED.L2_HIT + MEM_LOAD_UOPS_RETIRED.LLC_HIT + MEM_LOAD_UOPS_LLC_HIT_RETIRED.XSNP_HIT + MEM_LOAD_UOPS_LLC_HIT_RETIRED.XSNP_HITM + MEM_LOAD_UOPS_LLC_HIT_RETIRED.XSNP_MISS + MEM_LOAD_UOPS_LLC_MISS_RETIRED.LOCAL_DRAM + MEM_LOAD_UOPS_LLC_MISS_RETIRED.REMOTE_DRAM + MEM_LOAD_UOPS_LLC_MISS_RETIRED.REMOTE_HITM + MEM_LOAD_UOPS_LLC_MISS_RETIRED.REMOTE_FWD))) / tma_info_thread_clks",
         "MetricGroup": "MemoryLat;TopdownL4;tma_L4_group;tma_issueLat;tma_l3_bound_group",
         "MetricName": "tma_l3_hit_latency",
         "MetricThreshold": "tma_l3_hit_latency > 0.1 & (tma_l3_bound > 0.05 & (tma_memory_bound > 0.2 & tma_backend_bound > 0.2))",
-        "PublicDescription": "This metric represents fraction of cycles with demand load accesses that hit the L3 cache under unloaded scenarios (possibly L3 latency limited).  Avoiding private cache misses (i.e. L2 misses/L3 hits) will improve the latency; reduce contention with sibling physical cores and increase performance.  Note the value of this node may overlap with its siblings. Sample with: MEM_LOAD_UOPS_RETIRED.L3_HIT_PS. Related metrics: tma_mem_latency",
+        "PublicDescription": "This metric estimates fraction of cycles with demand load accesses that hit the L3 cache under unloaded scenarios (possibly L3 latency limited).  Avoiding private cache misses (i.e. L2 misses/L3 hits) will improve the latency; reduce contention with sibling physical cores and increase performance.  Note the value of this node may overlap with its siblings. Sample with: MEM_LOAD_UOPS_RETIRED.L3_HIT_PS. Related metrics: tma_mem_latency",
         "ScaleUnit": "100%"
     },
     {
         "MetricName": "tma_light_operations",
         "MetricThreshold": "tma_light_operations > 0.6",
         "MetricgroupNoGroup": "TopdownL2",
-        "PublicDescription": "This metric represents fraction of slots where the CPU was retiring light-weight operations -- instructions that require no more than one uop (micro-operation). This correlates with total number of instructions used by the program. A uops-per-instruction (see UopPI metric) ratio of 1 or less should be expected for decently optimized software running on Intel Core/Xeon products. While this often indicates efficient X86 instructions were executed; high value does not necessarily mean better performance cannot be achieved. Sample with: INST_RETIRED.PREC_DIST",
+        "PublicDescription": "This metric represents fraction of slots where the CPU was retiring light-weight operations -- instructions that require no more than one uop (micro-operation). This correlates with total number of instructions used by the program. A uops-per-instruction (see UopPI metric) ratio of 1 or less should be expected for decently optimized code running on Intel Core/Xeon products. While this often indicates efficient X86 instructions were executed; high value does not necessarily mean better performance cannot be achieved. ([ICL+] Note this may undercount due to approximation using indirect events; [ADL+] .). Sample with: INST_RETIRED.PREC_DIST",
         "ScaleUnit": "100%"
     },
     {
     },
     {
         "BriefDescription": "This metric estimates fraction of cycles while the memory subsystem was handling loads from local memory",
-        "MetricConstraint": "NO_GROUP_EVENTS",
         "MetricExpr": "200 * (MEM_LOAD_UOPS_LLC_MISS_RETIRED.LOCAL_DRAM * (1 + MEM_LOAD_UOPS_RETIRED.HIT_LFB / (MEM_LOAD_UOPS_RETIRED.L2_HIT + MEM_LOAD_UOPS_RETIRED.LLC_HIT + MEM_LOAD_UOPS_LLC_HIT_RETIRED.XSNP_HIT + MEM_LOAD_UOPS_LLC_HIT_RETIRED.XSNP_HITM + MEM_LOAD_UOPS_LLC_HIT_RETIRED.XSNP_MISS + MEM_LOAD_UOPS_LLC_MISS_RETIRED.LOCAL_DRAM + MEM_LOAD_UOPS_LLC_MISS_RETIRED.REMOTE_DRAM + MEM_LOAD_UOPS_LLC_MISS_RETIRED.REMOTE_HITM + MEM_LOAD_UOPS_LLC_MISS_RETIRED.REMOTE_FWD))) / tma_info_thread_clks",
         "MetricGroup": "Server;TopdownL5;tma_L5_group;tma_mem_latency_group",
-        "MetricName": "tma_local_dram",
-        "MetricThreshold": "tma_local_dram > 0.1 & (tma_mem_latency > 0.1 & (tma_dram_bound > 0.1 & (tma_memory_bound > 0.2 & tma_backend_bound > 0.2)))",
+        "MetricName": "tma_local_mem",
+        "MetricThreshold": "tma_local_mem > 0.1 & (tma_mem_latency > 0.1 & (tma_dram_bound > 0.1 & (tma_memory_bound > 0.2 & tma_backend_bound > 0.2)))",
         "PublicDescription": "This metric estimates fraction of cycles while the memory subsystem was handling loads from local memory. Caching will improve the latency and increase performance. Sample with: MEM_LOAD_UOPS_L3_MISS_RETIRED.LOCAL_DRAM_PS",
         "ScaleUnit": "100%"
     },
         "ScaleUnit": "100%"
     },
     {
-        "BriefDescription": "This metric estimates fraction of cycles where the core's performance was likely hurt due to approaching bandwidth limits of external memory (DRAM)",
+        "BriefDescription": "This metric estimates fraction of cycles where the core's performance was likely hurt due to approaching bandwidth limits of external memory - DRAM ([SPR-HBM] and/or HBM)",
         "MetricExpr": "min(CPU_CLK_UNHALTED.THREAD, cpu@OFFCORE_REQUESTS_OUTSTANDING.ALL_DATA_RD\\,cmask\\=6@) / tma_info_thread_clks",
         "MetricGroup": "MemoryBW;Offcore;TopdownL4;tma_L4_group;tma_dram_bound_group;tma_issueBW",
         "MetricName": "tma_mem_bandwidth",
         "MetricThreshold": "tma_mem_bandwidth > 0.2 & (tma_dram_bound > 0.1 & (tma_memory_bound > 0.2 & tma_backend_bound > 0.2))",
-        "PublicDescription": "This metric estimates fraction of cycles where the core's performance was likely hurt due to approaching bandwidth limits of external memory (DRAM).  The underlying heuristic assumes that a similar off-core traffic is generated by all IA cores. This metric does not aggregate non-data-read requests by this logical processor; requests from other IA Logical Processors/Physical Cores/sockets; or other non-IA devices like GPU; hence the maximum external memory bandwidth limits may or may not be approached when this metric is flagged (see Uncore counters for that). Related metrics: tma_fb_full, tma_info_system_dram_bw_use, tma_sq_full",
+        "PublicDescription": "This metric estimates fraction of cycles where the core's performance was likely hurt due to approaching bandwidth limits of external memory - DRAM ([SPR-HBM] and/or HBM).  The underlying heuristic assumes that a similar off-core traffic is generated by all IA cores. This metric does not aggregate non-data-read requests by this logical processor; requests from other IA Logical Processors/Physical Cores/sockets; or other non-IA devices like GPU; hence the maximum external memory bandwidth limits may or may not be approached when this metric is flagged (see Uncore counters for that). Related metrics: tma_fb_full, tma_info_system_dram_bw_use, tma_sq_full",
         "ScaleUnit": "100%"
     },
     {
-        "BriefDescription": "This metric estimates fraction of cycles where the performance was likely hurt due to latency from external memory (DRAM)",
+        "BriefDescription": "This metric estimates fraction of cycles where the performance was likely hurt due to latency from external memory - DRAM ([SPR-HBM] and/or HBM)",
         "MetricExpr": "min(CPU_CLK_UNHALTED.THREAD, OFFCORE_REQUESTS_OUTSTANDING.CYCLES_WITH_DATA_RD) / tma_info_thread_clks - tma_mem_bandwidth",
         "MetricGroup": "MemoryLat;Offcore;TopdownL4;tma_L4_group;tma_dram_bound_group;tma_issueLat",
         "MetricName": "tma_mem_latency",
         "MetricThreshold": "tma_mem_latency > 0.1 & (tma_dram_bound > 0.1 & (tma_memory_bound > 0.2 & tma_backend_bound > 0.2))",
-        "PublicDescription": "This metric estimates fraction of cycles where the performance was likely hurt due to latency from external memory (DRAM).  This metric does not aggregate requests from other Logical Processors/Physical Cores/sockets (see Uncore counters for that). Related metrics: tma_l3_hit_latency",
+        "PublicDescription": "This metric estimates fraction of cycles where the performance was likely hurt due to latency from external memory - DRAM ([SPR-HBM] and/or HBM).  This metric does not aggregate requests from other Logical Processors/Physical Cores/sockets (see Uncore counters for that). Related metrics: tma_l3_hit_latency",
         "ScaleUnit": "100%"
     },
     {
         "MetricExpr": "(IDQ.ALL_MITE_CYCLES_ANY_UOPS - IDQ.ALL_MITE_CYCLES_4_UOPS) / tma_info_core_core_clks / 2",
         "MetricGroup": "DSBmiss;FetchBW;TopdownL3;tma_L3_group;tma_fetch_bandwidth_group",
         "MetricName": "tma_mite",
-        "MetricThreshold": "tma_mite > 0.1 & (tma_fetch_bandwidth > 0.1 & tma_frontend_bound > 0.15 & tma_info_thread_ipc / 4 > 0.35)",
+        "MetricThreshold": "tma_mite > 0.1 & tma_fetch_bandwidth > 0.2",
         "PublicDescription": "This metric represents Core fraction of cycles in which CPU was likely limited due to the MITE pipeline (the legacy decode pipeline). This pipeline is used for code that was not pre-cached in the DSB or LSD. For example; inefficiencies due to asymmetric decoders; use of long immediate or LCP can manifest as MITE fetch bandwidth bottleneck.",
         "ScaleUnit": "100%"
     },
         "MetricGroup": "Compute;TopdownL6;tma_L6_group;tma_alu_op_utilization_group;tma_issue2P",
         "MetricName": "tma_port_0",
         "MetricThreshold": "tma_port_0 > 0.6",
-        "PublicDescription": "This metric represents Core fraction of cycles CPU dispatched uops on execution port 0 ([SNB+] ALU; [HSW+] ALU and 2nd branch). Sample with: UOPS_DISPATCHED_PORT.PORT_0. Related metrics: tma_fp_scalar, tma_fp_vector, tma_fp_vector_512b, tma_port_1, tma_port_5, tma_port_6, tma_ports_utilized_2",
+        "PublicDescription": "This metric represents Core fraction of cycles CPU dispatched uops on execution port 0 ([SNB+] ALU; [HSW+] ALU and 2nd branch). Sample with: UOPS_DISPATCHED_PORT.PORT_0. Related metrics: tma_fp_scalar, tma_fp_vector, tma_fp_vector_128b, tma_fp_vector_256b, tma_fp_vector_512b, tma_port_1, tma_port_5, tma_port_6, tma_ports_utilized_2",
         "ScaleUnit": "100%"
     },
     {
         "MetricGroup": "TopdownL6;tma_L6_group;tma_alu_op_utilization_group;tma_issue2P",
         "MetricName": "tma_port_1",
         "MetricThreshold": "tma_port_1 > 0.6",
-        "PublicDescription": "This metric represents Core fraction of cycles CPU dispatched uops on execution port 1 (ALU). Sample with: UOPS_DISPATCHED_PORT.PORT_1. Related metrics: tma_fp_scalar, tma_fp_vector, tma_fp_vector_512b, tma_port_0, tma_port_5, tma_port_6, tma_ports_utilized_2",
+        "PublicDescription": "This metric represents Core fraction of cycles CPU dispatched uops on execution port 1 (ALU). Sample with: UOPS_DISPATCHED_PORT.PORT_1. Related metrics: tma_fp_scalar, tma_fp_vector, tma_fp_vector_128b, tma_fp_vector_256b, tma_fp_vector_512b, tma_port_0, tma_port_5, tma_port_6, tma_ports_utilized_2",
         "ScaleUnit": "100%"
     },
     {
         "MetricGroup": "TopdownL6;tma_L6_group;tma_alu_op_utilization_group;tma_issue2P",
         "MetricName": "tma_port_5",
         "MetricThreshold": "tma_port_5 > 0.6",
-        "PublicDescription": "This metric represents Core fraction of cycles CPU dispatched uops on execution port 5 ([SNB+] Branches and ALU; [HSW+] ALU). Sample with: UOPS_DISPATCHED.PORT_5. Related metrics: tma_fp_scalar, tma_fp_vector, tma_fp_vector_512b, tma_port_0, tma_port_1, tma_port_6, tma_ports_utilized_2",
+        "PublicDescription": "This metric represents Core fraction of cycles CPU dispatched uops on execution port 5 ([SNB+] Branches and ALU; [HSW+] ALU). Sample with: UOPS_DISPATCHED.PORT_5. Related metrics: tma_fp_scalar, tma_fp_vector, tma_fp_vector_128b, tma_fp_vector_256b, tma_fp_vector_512b, tma_port_0, tma_port_1, tma_port_6, tma_ports_utilized_2",
         "ScaleUnit": "100%"
     },
     {
         "MetricGroup": "PortsUtil;TopdownL4;tma_L4_group;tma_issue2P;tma_ports_utilization_group",
         "MetricName": "tma_ports_utilized_2",
         "MetricThreshold": "tma_ports_utilized_2 > 0.15 & (tma_ports_utilization > 0.15 & (tma_core_bound > 0.1 & tma_backend_bound > 0.2))",
-        "PublicDescription": "This metric represents fraction of cycles CPU executed total of 2 uops per cycle on all execution ports (Logical Processor cycles since ICL, Physical Core cycles otherwise).  Loop Vectorization -most compilers feature auto-Vectorization options today- reduces pressure on the execution ports as multiple elements are calculated with same uop. Related metrics: tma_fp_scalar, tma_fp_vector, tma_fp_vector_512b, tma_port_0, tma_port_1, tma_port_5, tma_port_6",
+        "PublicDescription": "This metric represents fraction of cycles CPU executed total of 2 uops per cycle on all execution ports (Logical Processor cycles since ICL, Physical Core cycles otherwise).  Loop Vectorization -most compilers feature auto-Vectorization options today- reduces pressure on the execution ports as multiple elements are calculated with same uop. Related metrics: tma_fp_scalar, tma_fp_vector, tma_fp_vector_128b, tma_fp_vector_256b, tma_fp_vector_512b, tma_port_0, tma_port_1, tma_port_5, tma_port_6",
         "ScaleUnit": "100%"
     },
     {
         "MetricExpr": "(cpu@UOPS_EXECUTED.CORE\\,cmask\\=3@ / 2 if #SMT_on else UOPS_EXECUTED.CYCLES_GE_3_UOPS_EXEC) / tma_info_core_core_clks",
         "MetricGroup": "PortsUtil;TopdownL4;tma_L4_group;tma_ports_utilization_group",
         "MetricName": "tma_ports_utilized_3m",
-        "MetricThreshold": "tma_ports_utilized_3m > 0.7 & (tma_ports_utilization > 0.15 & (tma_core_bound > 0.1 & tma_backend_bound > 0.2))",
+        "MetricThreshold": "tma_ports_utilized_3m > 0.4 & (tma_ports_utilization > 0.15 & (tma_core_bound > 0.1 & tma_backend_bound > 0.2))",
         "ScaleUnit": "100%"
     },
     {
     },
     {
         "BriefDescription": "This metric estimates fraction of cycles while the memory subsystem was handling loads from remote memory",
-        "MetricConstraint": "NO_GROUP_EVENTS",
         "MetricExpr": "310 * (MEM_LOAD_UOPS_LLC_MISS_RETIRED.REMOTE_DRAM * (1 + MEM_LOAD_UOPS_RETIRED.HIT_LFB / (MEM_LOAD_UOPS_RETIRED.L2_HIT + MEM_LOAD_UOPS_RETIRED.LLC_HIT + MEM_LOAD_UOPS_LLC_HIT_RETIRED.XSNP_HIT + MEM_LOAD_UOPS_LLC_HIT_RETIRED.XSNP_HITM + MEM_LOAD_UOPS_LLC_HIT_RETIRED.XSNP_MISS + MEM_LOAD_UOPS_LLC_MISS_RETIRED.LOCAL_DRAM + MEM_LOAD_UOPS_LLC_MISS_RETIRED.REMOTE_DRAM + MEM_LOAD_UOPS_LLC_MISS_RETIRED.REMOTE_HITM + MEM_LOAD_UOPS_LLC_MISS_RETIRED.REMOTE_FWD))) / tma_info_thread_clks",
         "MetricGroup": "Server;Snoop;TopdownL5;tma_L5_group;tma_mem_latency_group",
-        "MetricName": "tma_remote_dram",
-        "MetricThreshold": "tma_remote_dram > 0.1 & (tma_mem_latency > 0.1 & (tma_dram_bound > 0.1 & (tma_memory_bound > 0.2 & tma_backend_bound > 0.2)))",
+        "MetricName": "tma_remote_mem",
+        "MetricThreshold": "tma_remote_mem > 0.1 & (tma_mem_latency > 0.1 & (tma_dram_bound > 0.1 & (tma_memory_bound > 0.2 & tma_backend_bound > 0.2)))",
         "PublicDescription": "This metric estimates fraction of cycles while the memory subsystem was handling loads from remote memory. This is caused often due to non-optimal NUMA allocations. #link to NUMA article. Sample with: MEM_LOAD_UOPS_L3_MISS_RETIRED.REMOTE_DRAM_PS",
         "ScaleUnit": "100%"
     },
index f6a0258e3241236b97c48a6098503f40bd12e5dd..8c808347f6da4e8382af6a780730636fadcdb914 100644 (file)
@@ -2,10 +2,10 @@
     "Backend": "Grouping from Top-down Microarchitecture Analysis Metrics spreadsheet",
     "Bad": "Grouping from Top-down Microarchitecture Analysis Metrics spreadsheet",
     "BadSpec": "Grouping from Top-down Microarchitecture Analysis Metrics spreadsheet",
-    "BigFoot": "Grouping from Top-down Microarchitecture Analysis Metrics spreadsheet",
+    "BigFootprint": "Grouping from Top-down Microarchitecture Analysis Metrics spreadsheet",
     "BrMispredicts": "Grouping from Top-down Microarchitecture Analysis Metrics spreadsheet",
     "Branches": "Grouping from Top-down Microarchitecture Analysis Metrics spreadsheet",
-    "CacheMisses": "Grouping from Top-down Microarchitecture Analysis Metrics spreadsheet",
+    "CacheHits": "Grouping from Top-down Microarchitecture Analysis Metrics spreadsheet",
     "Compute": "Grouping from Top-down Microarchitecture Analysis Metrics spreadsheet",
     "Cor": "Grouping from Top-down Microarchitecture Analysis Metrics spreadsheet",
     "DSB": "Grouping from Top-down Microarchitecture Analysis Metrics spreadsheet",
@@ -24,7 +24,9 @@
     "L2Evicts": "Grouping from Top-down Microarchitecture Analysis Metrics spreadsheet",
     "LSD": "Grouping from Top-down Microarchitecture Analysis Metrics spreadsheet",
     "MachineClears": "Grouping from Top-down Microarchitecture Analysis Metrics spreadsheet",
+    "Machine_Clears": "Grouping from Top-down Microarchitecture Analysis Metrics spreadsheet",
     "Mem": "Grouping from Top-down Microarchitecture Analysis Metrics spreadsheet",
+    "MemOffcore": "Grouping from Top-down Microarchitecture Analysis Metrics spreadsheet",
     "MemoryBW": "Grouping from Top-down Microarchitecture Analysis Metrics spreadsheet",
     "MemoryBound": "Grouping from Top-down Microarchitecture Analysis Metrics spreadsheet",
     "MemoryLat": "Grouping from Top-down Microarchitecture Analysis Metrics spreadsheet",
@@ -94,6 +96,7 @@
     "tma_l3_bound_group": "Metrics contributing to tma_l3_bound category",
     "tma_light_operations_group": "Metrics contributing to tma_light_operations category",
     "tma_load_op_utilization_group": "Metrics contributing to tma_load_op_utilization category",
+    "tma_machine_clears_group": "Metrics contributing to tma_machine_clears category",
     "tma_mem_latency_group": "Metrics contributing to tma_mem_latency category",
     "tma_memory_bound_group": "Metrics contributing to tma_memory_bound category",
     "tma_microcode_sequencer_group": "Metrics contributing to tma_microcode_sequencer category",
index 5df1ebfb89ea327eacbb025e84cc4b3dc4cc6679..ad6c531a9e381e6039e22a807899adc3efe61347 100644 (file)
         "BriefDescription": "Number of cores in C-State; C0 and C1",
         "EventCode": "0x80",
         "EventName": "UNC_P_POWER_STATE_OCCUPANCY.CORES_C0",
+        "Filter": "occ_sel=1",
         "PerPkg": "1",
         "PublicDescription": "This is an occupancy event that tracks the number of cores that are in the chosen C-State.  It can be used by itself to get the average number of cores in that C-state with thresholding to generate histograms, or with other PCU events and occupancy triggering to capture other details.",
         "Unit": "PCU"
         "BriefDescription": "Number of cores in C-State; C3",
         "EventCode": "0x80",
         "EventName": "UNC_P_POWER_STATE_OCCUPANCY.CORES_C3",
+        "Filter": "occ_sel=2",
         "PerPkg": "1",
         "PublicDescription": "This is an occupancy event that tracks the number of cores that are in the chosen C-State.  It can be used by itself to get the average number of cores in that C-state with thresholding to generate histograms, or with other PCU events and occupancy triggering to capture other details.",
         "Unit": "PCU"
         "BriefDescription": "Number of cores in C-State; C6 and C7",
         "EventCode": "0x80",
         "EventName": "UNC_P_POWER_STATE_OCCUPANCY.CORES_C6",
+        "Filter": "occ_sel=3",
         "PerPkg": "1",
         "PublicDescription": "This is an occupancy event that tracks the number of cores that are in the chosen C-State.  It can be used by itself to get the average number of cores in that C-state with thresholding to generate histograms, or with other PCU events and occupancy triggering to capture other details.",
         "Unit": "PCU"
index 35b1a3aa728d66c94f1049252a3b020d79e0d0bd..fc8c3f785be1b7a64ad0b9f5d2c94af783e4ace3 100644 (file)
         "MetricExpr": "tma_frontend_bound - tma_fetch_latency",
         "MetricGroup": "FetchBW;Frontend;TmaL2;TopdownL2;tma_L2_group;tma_frontend_bound_group;tma_issueFB",
         "MetricName": "tma_fetch_bandwidth",
-        "MetricThreshold": "tma_fetch_bandwidth > 0.1 & tma_frontend_bound > 0.15 & tma_info_thread_ipc / 4 > 0.35",
+        "MetricThreshold": "tma_fetch_bandwidth > 0.2",
         "MetricgroupNoGroup": "TopdownL2",
         "PublicDescription": "This metric represents fraction of slots the CPU was stalled due to Frontend bandwidth issues.  For example; inefficiencies at the instruction decoders; or restrictions for caching in the DSB (decoded uops cache) are categorized under Fetch Bandwidth. In such cases; the Frontend typically delivers suboptimal amount of uops to the Backend. Related metrics: tma_dsb_switches, tma_info_frontend_dsb_coverage, tma_lcp",
         "ScaleUnit": "100%"
         "MetricGroup": "Compute;Flops;TopdownL4;tma_L4_group;tma_fp_arith_group;tma_issue2P",
         "MetricName": "tma_fp_scalar",
         "MetricThreshold": "tma_fp_scalar > 0.1 & (tma_fp_arith > 0.2 & tma_light_operations > 0.6)",
-        "PublicDescription": "This metric approximates arithmetic floating-point (FP) scalar uops fraction the CPU has retired. May overcount due to FMA double counting. Related metrics: tma_fp_vector, tma_fp_vector_512b, tma_port_6, tma_ports_utilized_2",
+        "PublicDescription": "This metric approximates arithmetic floating-point (FP) scalar uops fraction the CPU has retired. May overcount due to FMA double counting. Related metrics: tma_fp_vector, tma_fp_vector_128b, tma_fp_vector_256b, tma_fp_vector_512b, tma_port_6, tma_ports_utilized_2",
         "ScaleUnit": "100%"
     },
     {
         "MetricGroup": "Compute;Flops;TopdownL4;tma_L4_group;tma_fp_arith_group;tma_issue2P",
         "MetricName": "tma_fp_vector",
         "MetricThreshold": "tma_fp_vector > 0.1 & (tma_fp_arith > 0.2 & tma_light_operations > 0.6)",
-        "PublicDescription": "This metric approximates arithmetic floating-point (FP) vector uops fraction the CPU has retired aggregated across all vector widths. May overcount due to FMA double counting. Related metrics: tma_fp_scalar, tma_fp_vector_512b, tma_port_6, tma_ports_utilized_2",
+        "PublicDescription": "This metric approximates arithmetic floating-point (FP) vector uops fraction the CPU has retired aggregated across all vector widths. May overcount due to FMA double counting. Related metrics: tma_fp_scalar, tma_fp_vector_128b, tma_fp_vector_256b, tma_fp_vector_512b, tma_port_6, tma_ports_utilized_2",
+        "ScaleUnit": "100%"
+    },
+    {
+        "BriefDescription": "This metric approximates arithmetic FP vector uops fraction the CPU has retired for 128-bit wide vectors",
+        "MetricExpr": "(FP_COMP_OPS_EXE.SSE_SCALAR_DOUBLE + FP_COMP_OPS_EXE.SSE_PACKED_DOUBLE) / UOPS_DISPATCHED.THREAD",
+        "MetricGroup": "Compute;Flops;TopdownL5;tma_L5_group;tma_fp_vector_group;tma_issue2P",
+        "MetricName": "tma_fp_vector_128b",
+        "MetricThreshold": "tma_fp_vector_128b > 0.1 & (tma_fp_vector > 0.1 & (tma_fp_arith > 0.2 & tma_light_operations > 0.6))",
+        "PublicDescription": "This metric approximates arithmetic FP vector uops fraction the CPU has retired for 128-bit wide vectors. May overcount due to FMA double counting. Related metrics: tma_fp_scalar, tma_fp_vector, tma_fp_vector_256b, tma_fp_vector_512b, tma_port_6, tma_ports_utilized_2",
+        "ScaleUnit": "100%"
+    },
+    {
+        "BriefDescription": "This metric approximates arithmetic FP vector uops fraction the CPU has retired for 256-bit wide vectors",
+        "MetricExpr": "(SIMD_FP_256.PACKED_DOUBLE + SIMD_FP_256.PACKED_SINGLE) / UOPS_DISPATCHED.THREAD",
+        "MetricGroup": "Compute;Flops;TopdownL5;tma_L5_group;tma_fp_vector_group;tma_issue2P",
+        "MetricName": "tma_fp_vector_256b",
+        "MetricThreshold": "tma_fp_vector_256b > 0.1 & (tma_fp_vector > 0.1 & (tma_fp_arith > 0.2 & tma_light_operations > 0.6))",
+        "PublicDescription": "This metric approximates arithmetic FP vector uops fraction the CPU has retired for 256-bit wide vectors. May overcount due to FMA double counting. Related metrics: tma_fp_scalar, tma_fp_vector, tma_fp_vector_128b, tma_fp_vector_512b, tma_port_6, tma_ports_utilized_2",
         "ScaleUnit": "100%"
     },
     {
         "MetricName": "tma_heavy_operations",
         "MetricThreshold": "tma_heavy_operations > 0.1",
         "MetricgroupNoGroup": "TopdownL2",
-        "PublicDescription": "This metric represents fraction of slots where the CPU was retiring heavy-weight operations -- instructions that require two or more uops or micro-coded sequences. This highly-correlates with the uop length of these instructions/sequences.",
+        "PublicDescription": "This metric represents fraction of slots where the CPU was retiring heavy-weight operations -- instructions that require two or more uops or micro-coded sequences. This highly-correlates with the uop length of these instructions/sequences. ([ICL+] Note this may overcount due to approximation using indirect events; [ADL+] .)",
         "ScaleUnit": "100%"
     },
     {
         "MetricName": "tma_info_core_flopc"
     },
     {
-        "BriefDescription": "Instruction-Level-Parallelism (average number of uops executed when there is execution) per-core",
+        "BriefDescription": "Instruction-Level-Parallelism (average number of uops executed when there is execution) per thread (logical-processor)",
         "MetricExpr": "UOPS_DISPATCHED.THREAD / (cpu@UOPS_DISPATCHED.CORE\\,cmask\\=1@ / 2 if #SMT_on else cpu@UOPS_DISPATCHED.CORE\\,cmask\\=1@)",
         "MetricGroup": "Backend;Cor;Pipeline;PortsUtil",
         "MetricName": "tma_info_core_ilp"
         "MetricName": "tma_info_pipeline_retire"
     },
     {
-        "BriefDescription": "Measured Average Frequency for unhalted processors [GHz]",
+        "BriefDescription": "Measured Average Core Frequency for unhalted processors [GHz]",
         "MetricExpr": "tma_info_system_turbo_utilization * TSC / 1e9 / duration_time",
         "MetricGroup": "Power;Summary",
-        "MetricName": "tma_info_system_average_frequency"
+        "MetricName": "tma_info_system_core_frequency"
     },
     {
-        "BriefDescription": "Average CPU Utilization",
+        "BriefDescription": "Average CPU Utilization (percentage)",
         "MetricExpr": "CPU_CLK_UNHALTED.REF_TSC / TSC",
         "MetricGroup": "HPC;Summary",
         "MetricName": "tma_info_system_cpu_utilization"
     },
+    {
+        "BriefDescription": "Average number of utilized CPUs",
+        "MetricExpr": "#num_cpus_online * tma_info_system_cpu_utilization",
+        "MetricGroup": "Summary",
+        "MetricName": "tma_info_system_cpus_utilized"
+    },
     {
         "BriefDescription": "Average external Memory Bandwidth Use for reads and writes [GB / sec]",
         "MetricExpr": "64 * (UNC_M_CAS_COUNT.RD + UNC_M_CAS_COUNT.WR) / 1e9 / duration_time",
-        "MetricGroup": "HPC;Mem;MemoryBW;SoC;tma_issueBW",
+        "MetricGroup": "HPC;MemOffcore;MemoryBW;SoC;tma_issueBW",
         "MetricName": "tma_info_system_dram_bw_use",
         "PublicDescription": "Average external Memory Bandwidth Use for reads and writes [GB / sec]. Related metrics: tma_mem_bandwidth"
     },
         "MetricExpr": "(FP_COMP_OPS_EXE.SSE_SCALAR_SINGLE + FP_COMP_OPS_EXE.SSE_SCALAR_DOUBLE + 2 * FP_COMP_OPS_EXE.SSE_PACKED_DOUBLE + 4 * (FP_COMP_OPS_EXE.SSE_PACKED_SINGLE + SIMD_FP_256.PACKED_DOUBLE) + 8 * SIMD_FP_256.PACKED_SINGLE) / 1e9 / duration_time",
         "MetricGroup": "Cor;Flops;HPC",
         "MetricName": "tma_info_system_gflops",
-        "PublicDescription": "Giga Floating Point Operations Per Second. Aggregate across all supported options of: FP precisions, scalar and vector instructions, vector-width and AMX engine."
+        "PublicDescription": "Giga Floating Point Operations Per Second. Aggregate across all supported options of: FP precisions, scalar and vector instructions, vector-width"
     },
     {
         "BriefDescription": "Instructions per Far Branch ( Far Branches apply upon transition from application to operating system, handling interrupts, exceptions) [lower number means higher occurrence rate]",
         "MetricGroup": "Power",
         "MetricName": "tma_info_system_turbo_utilization"
     },
+    {
+        "BriefDescription": "Measured Average Uncore Frequency for the SoC [GHz]",
+        "MetricExpr": "tma_info_system_socket_clks / 1e9 / duration_time",
+        "MetricGroup": "SoC",
+        "MetricName": "tma_info_system_uncore_frequency"
+    },
     {
         "BriefDescription": "Per-Logical Processor actual clocks when the Logical Processor is active.",
         "MetricExpr": "CPU_CLK_UNHALTED.THREAD",
     {
         "BriefDescription": "This metric represents fraction of cycles the CPU was stalled due to Instruction TLB (ITLB) misses",
         "MetricExpr": "(12 * ITLB_MISSES.STLB_HIT + ITLB_MISSES.WALK_DURATION) / tma_info_thread_clks",
-        "MetricGroup": "BigFoot;FetchLat;MemoryTLB;TopdownL3;tma_L3_group;tma_fetch_latency_group",
+        "MetricGroup": "BigFootprint;FetchLat;MemoryTLB;TopdownL3;tma_L3_group;tma_fetch_latency_group",
         "MetricName": "tma_itlb_misses",
         "MetricThreshold": "tma_itlb_misses > 0.05 & (tma_fetch_latency > 0.1 & tma_frontend_bound > 0.15)",
         "PublicDescription": "This metric represents fraction of cycles the CPU was stalled due to Instruction TLB (ITLB) misses. Sample with: ITLB_MISSES.WALK_COMPLETED",
         "BriefDescription": "This metric estimates how often the CPU was stalled due to loads accesses to L3 cache or contended with a sibling Core",
         "MetricConstraint": "NO_GROUP_EVENTS_SMT",
         "MetricExpr": "MEM_LOAD_UOPS_RETIRED.LLC_HIT / (MEM_LOAD_UOPS_RETIRED.LLC_HIT + 7 * MEM_LOAD_UOPS_RETIRED.LLC_MISS) * CYCLE_ACTIVITY.STALLS_L2_PENDING / tma_info_thread_clks",
-        "MetricGroup": "CacheMisses;MemoryBound;TmaL3mem;TopdownL3;tma_L3_group;tma_memory_bound_group",
+        "MetricGroup": "CacheHits;MemoryBound;TmaL3mem;TopdownL3;tma_L3_group;tma_memory_bound_group",
         "MetricName": "tma_l3_bound",
         "MetricThreshold": "tma_l3_bound > 0.05 & (tma_memory_bound > 0.2 & tma_backend_bound > 0.2)",
         "PublicDescription": "This metric estimates how often the CPU was stalled due to loads accesses to L3 cache or contended with a sibling Core.  Avoiding cache misses (i.e. L2 misses/L3 hits) can improve the latency and increase performance. Sample with: MEM_LOAD_UOPS_RETIRED.L3_HIT_PS",
         "MetricName": "tma_light_operations",
         "MetricThreshold": "tma_light_operations > 0.6",
         "MetricgroupNoGroup": "TopdownL2",
-        "PublicDescription": "This metric represents fraction of slots where the CPU was retiring light-weight operations -- instructions that require no more than one uop (micro-operation). This correlates with total number of instructions used by the program. A uops-per-instruction (see UopPI metric) ratio of 1 or less should be expected for decently optimized software running on Intel Core/Xeon products. While this often indicates efficient X86 instructions were executed; high value does not necessarily mean better performance cannot be achieved. Sample with: INST_RETIRED.PREC_DIST",
+        "PublicDescription": "This metric represents fraction of slots where the CPU was retiring light-weight operations -- instructions that require no more than one uop (micro-operation). This correlates with total number of instructions used by the program. A uops-per-instruction (see UopPI metric) ratio of 1 or less should be expected for decently optimized code running on Intel Core/Xeon products. While this often indicates efficient X86 instructions were executed; high value does not necessarily mean better performance cannot be achieved. ([ICL+] Note this may undercount due to approximation using indirect events; [ADL+] .). Sample with: INST_RETIRED.PREC_DIST",
         "ScaleUnit": "100%"
     },
     {
         "ScaleUnit": "100%"
     },
     {
-        "BriefDescription": "This metric estimates fraction of cycles where the core's performance was likely hurt due to approaching bandwidth limits of external memory (DRAM)",
+        "BriefDescription": "This metric estimates fraction of cycles where the core's performance was likely hurt due to approaching bandwidth limits of external memory - DRAM ([SPR-HBM] and/or HBM)",
         "MetricExpr": "min(CPU_CLK_UNHALTED.THREAD, cpu@OFFCORE_REQUESTS_OUTSTANDING.ALL_DATA_RD\\,cmask\\=6@) / tma_info_thread_clks",
         "MetricGroup": "MemoryBW;Offcore;TopdownL4;tma_L4_group;tma_dram_bound_group;tma_issueBW",
         "MetricName": "tma_mem_bandwidth",
         "MetricThreshold": "tma_mem_bandwidth > 0.2 & (tma_dram_bound > 0.1 & (tma_memory_bound > 0.2 & tma_backend_bound > 0.2))",
-        "PublicDescription": "This metric estimates fraction of cycles where the core's performance was likely hurt due to approaching bandwidth limits of external memory (DRAM).  The underlying heuristic assumes that a similar off-core traffic is generated by all IA cores. This metric does not aggregate non-data-read requests by this logical processor; requests from other IA Logical Processors/Physical Cores/sockets; or other non-IA devices like GPU; hence the maximum external memory bandwidth limits may or may not be approached when this metric is flagged (see Uncore counters for that). Related metrics: tma_info_system_dram_bw_use",
+        "PublicDescription": "This metric estimates fraction of cycles where the core's performance was likely hurt due to approaching bandwidth limits of external memory - DRAM ([SPR-HBM] and/or HBM).  The underlying heuristic assumes that a similar off-core traffic is generated by all IA cores. This metric does not aggregate non-data-read requests by this logical processor; requests from other IA Logical Processors/Physical Cores/sockets; or other non-IA devices like GPU; hence the maximum external memory bandwidth limits may or may not be approached when this metric is flagged (see Uncore counters for that). Related metrics: tma_info_system_dram_bw_use",
         "ScaleUnit": "100%"
     },
     {
-        "BriefDescription": "This metric estimates fraction of cycles where the performance was likely hurt due to latency from external memory (DRAM)",
+        "BriefDescription": "This metric estimates fraction of cycles where the performance was likely hurt due to latency from external memory - DRAM ([SPR-HBM] and/or HBM)",
         "MetricExpr": "min(CPU_CLK_UNHALTED.THREAD, OFFCORE_REQUESTS_OUTSTANDING.CYCLES_WITH_DATA_RD) / tma_info_thread_clks - tma_mem_bandwidth",
         "MetricGroup": "MemoryLat;Offcore;TopdownL4;tma_L4_group;tma_dram_bound_group;tma_issueLat",
         "MetricName": "tma_mem_latency",
         "MetricThreshold": "tma_mem_latency > 0.1 & (tma_dram_bound > 0.1 & (tma_memory_bound > 0.2 & tma_backend_bound > 0.2))",
-        "PublicDescription": "This metric estimates fraction of cycles where the performance was likely hurt due to latency from external memory (DRAM).  This metric does not aggregate requests from other Logical Processors/Physical Cores/sockets (see Uncore counters for that). Related metrics: ",
+        "PublicDescription": "This metric estimates fraction of cycles where the performance was likely hurt due to latency from external memory - DRAM ([SPR-HBM] and/or HBM).  This metric does not aggregate requests from other Logical Processors/Physical Cores/sockets (see Uncore counters for that). Related metrics: ",
         "ScaleUnit": "100%"
     },
     {
index bebb85945d627ea3a94d94fc0909fda0c23baf88..a2c27794c0d8c454e84282b15ba52417df25ff6e 100644 (file)
@@ -2,10 +2,10 @@
     "Backend": "Grouping from Top-down Microarchitecture Analysis Metrics spreadsheet",
     "Bad": "Grouping from Top-down Microarchitecture Analysis Metrics spreadsheet",
     "BadSpec": "Grouping from Top-down Microarchitecture Analysis Metrics spreadsheet",
-    "BigFoot": "Grouping from Top-down Microarchitecture Analysis Metrics spreadsheet",
+    "BigFootprint": "Grouping from Top-down Microarchitecture Analysis Metrics spreadsheet",
     "BrMispredicts": "Grouping from Top-down Microarchitecture Analysis Metrics spreadsheet",
     "Branches": "Grouping from Top-down Microarchitecture Analysis Metrics spreadsheet",
-    "CacheMisses": "Grouping from Top-down Microarchitecture Analysis Metrics spreadsheet",
+    "CacheHits": "Grouping from Top-down Microarchitecture Analysis Metrics spreadsheet",
     "Compute": "Grouping from Top-down Microarchitecture Analysis Metrics spreadsheet",
     "Cor": "Grouping from Top-down Microarchitecture Analysis Metrics spreadsheet",
     "DSB": "Grouping from Top-down Microarchitecture Analysis Metrics spreadsheet",
@@ -23,7 +23,9 @@
     "L2Evicts": "Grouping from Top-down Microarchitecture Analysis Metrics spreadsheet",
     "LSD": "Grouping from Top-down Microarchitecture Analysis Metrics spreadsheet",
     "MachineClears": "Grouping from Top-down Microarchitecture Analysis Metrics spreadsheet",
+    "Machine_Clears": "Grouping from Top-down Microarchitecture Analysis Metrics spreadsheet",
     "Mem": "Grouping from Top-down Microarchitecture Analysis Metrics spreadsheet",
+    "MemOffcore": "Grouping from Top-down Microarchitecture Analysis Metrics spreadsheet",
     "MemoryBW": "Grouping from Top-down Microarchitecture Analysis Metrics spreadsheet",
     "MemoryBound": "Grouping from Top-down Microarchitecture Analysis Metrics spreadsheet",
     "MemoryLat": "Grouping from Top-down Microarchitecture Analysis Metrics spreadsheet",
@@ -88,6 +90,7 @@
     "tma_issueTLB": "Metrics related by the issue $issueTLB",
     "tma_l1_bound_group": "Metrics contributing to tma_l1_bound category",
     "tma_light_operations_group": "Metrics contributing to tma_light_operations category",
+    "tma_machine_clears_group": "Metrics contributing to tma_machine_clears category",
     "tma_mem_latency_group": "Metrics contributing to tma_mem_latency category",
     "tma_memory_bound_group": "Metrics contributing to tma_memory_bound category",
     "tma_microcode_sequencer_group": "Metrics contributing to tma_microcode_sequencer category",
index b3ee5d7410156376332869fba7fc75cf09df11f9..6f98fc1728e68d957872b6b75de7667684059dac 100644 (file)
         "BriefDescription": "Number of cores in C0",
         "EventCode": "0x80",
         "EventName": "UNC_P_POWER_STATE_OCCUPANCY.CORES_C0",
+        "Filter": "occ_sel=1",
         "PerPkg": "1",
         "PublicDescription": "This is an occupancy event that tracks the number of cores that are in C0.  It can be used by itself to get the average number of cores in C0, with thresholding to generate histograms, or with other PCU events and occupancy triggering to capture other details.",
         "Unit": "PCU"
         "BriefDescription": "Number of cores in C0",
         "EventCode": "0x80",
         "EventName": "UNC_P_POWER_STATE_OCCUPANCY.CORES_C3",
+        "Filter": "occ_sel=2",
         "PerPkg": "1",
         "PublicDescription": "This is an occupancy event that tracks the number of cores that are in C0.  It can be used by itself to get the average number of cores in C0, with thresholding to generate histograms, or with other PCU events and occupancy triggering to capture other details.",
         "Unit": "PCU"
         "BriefDescription": "Number of cores in C0",
         "EventCode": "0x80",
         "EventName": "UNC_P_POWER_STATE_OCCUPANCY.CORES_C6",
+        "Filter": "occ_sel=3",
         "PerPkg": "1",
         "PublicDescription": "This is an occupancy event that tracks the number of cores that are in C0.  It can be used by itself to get the average number of cores in C0, with thresholding to generate histograms, or with other PCU events and occupancy triggering to capture other details.",
         "Unit": "PCU"
index 4d1deed4437ab24fb1fb060819cc40deb9fb7daa..5297d25f4e0333c02d0891ec59afadddf03e3fd3 100644 (file)
@@ -1,38 +1,38 @@
 Family-model,Version,Filename,EventType
-GenuineIntel-6-(97|9A|B7|BA|BF),v1.23,alderlake,core
-GenuineIntel-6-BE,v1.23,alderlaken,core
+GenuineIntel-6-(97|9A|B7|BA|BF),v1.24,alderlake,core
+GenuineIntel-6-BE,v1.24,alderlaken,core
 GenuineIntel-6-(1C|26|27|35|36),v5,bonnell,core
-GenuineIntel-6-(3D|47),v28,broadwell,core
+GenuineIntel-6-(3D|47),v29,broadwell,core
 GenuineIntel-6-56,v11,broadwellde,core
 GenuineIntel-6-4F,v22,broadwellx,core
 GenuineIntel-6-55-[56789ABCDEF],v1.20,cascadelakex,core
 GenuineIntel-6-9[6C],v1.04,elkhartlake,core
-GenuineIntel-6-CF,v1.02,emeraldrapids,core
+GenuineIntel-6-CF,v1.03,emeraldrapids,core
 GenuineIntel-6-5[CF],v13,goldmont,core
 GenuineIntel-6-7A,v1.01,goldmontplus,core
-GenuineIntel-6-B6,v1.00,grandridge,core
+GenuineIntel-6-B6,v1.01,grandridge,core
 GenuineIntel-6-A[DE],v1.01,graniterapids,core
-GenuineIntel-6-(3C|45|46),v33,haswell,core
+GenuineIntel-6-(3C|45|46),v35,haswell,core
 GenuineIntel-6-3F,v28,haswellx,core
-GenuineIntel-6-7[DE],v1.19,icelake,core
+GenuineIntel-6-7[DE],v1.21,icelake,core
 GenuineIntel-6-6[AC],v1.23,icelakex,core
 GenuineIntel-6-3A,v24,ivybridge,core
 GenuineIntel-6-3E,v24,ivytown,core
 GenuineIntel-6-2D,v24,jaketown,core
 GenuineIntel-6-(57|85),v16,knightslanding,core
 GenuineIntel-6-BD,v1.00,lunarlake,core
-GenuineIntel-6-A[AC],v1.06,meteorlake,core
+GenuineIntel-6-A[AC],v1.07,meteorlake,core
 GenuineIntel-6-1[AEF],v4,nehalemep,core
 GenuineIntel-6-2E,v4,nehalemex,core
-GenuineIntel-6-A7,v1.01,rocketlake,core
+GenuineIntel-6-A7,v1.02,rocketlake,core
 GenuineIntel-6-2A,v19,sandybridge,core
 GenuineIntel-6-8F,v1.17,sapphirerapids,core
-GenuineIntel-6-AF,v1.00,sierraforest,core
+GenuineIntel-6-AF,v1.01,sierraforest,core
 GenuineIntel-6-(37|4A|4C|4D|5A),v15,silvermont,core
-GenuineIntel-6-(4E|5E|8E|9E|A5|A6),v57,skylake,core
+GenuineIntel-6-(4E|5E|8E|9E|A5|A6),v58,skylake,core
 GenuineIntel-6-55-[01234],v1.32,skylakex,core
 GenuineIntel-6-86,v1.21,snowridgex,core
-GenuineIntel-6-8[CD],v1.13,tigerlake,core
+GenuineIntel-6-8[CD],v1.15,tigerlake,core
 GenuineIntel-6-2C,v5,westmereep-dp,core
 GenuineIntel-6-25,v4,westmereep-sp,core
 GenuineIntel-6-2F,v4,westmereex,core
index 5fef87502d4b6230116f7905d1aae1cc96d5770c..47861a6dd8e9a40d46830d67c628a76c8985a3bc 100644 (file)
         "EventCode": "0x35",
         "EventName": "MEM_BOUND_STALLS_IFETCH.ALL",
         "SampleAfterValue": "1000003",
-        "UMask": "0x6f",
+        "UMask": "0x7f",
         "Unit": "cpu_atom"
     },
     {
         "EventCode": "0x35",
         "EventName": "MEM_BOUND_STALLS_IFETCH.LLC_MISS",
         "SampleAfterValue": "1000003",
-        "UMask": "0x68",
+        "UMask": "0x78",
         "Unit": "cpu_atom"
     },
     {
         "EventCode": "0x34",
         "EventName": "MEM_BOUND_STALLS_LOAD.ALL",
         "SampleAfterValue": "1000003",
-        "UMask": "0x6f",
+        "UMask": "0x7f",
         "Unit": "cpu_atom"
     },
     {
         "EventCode": "0x34",
         "EventName": "MEM_BOUND_STALLS_LOAD.LLC_MISS",
         "SampleAfterValue": "1000003",
-        "UMask": "0x68",
+        "UMask": "0x78",
         "Unit": "cpu_atom"
     },
     {
index f66506ee37ef6528fc7103640e6951985b55b4c0..30e604d2120f982e4347bca2deb83f00a20175e9 100644 (file)
@@ -1,4 +1,13 @@
 [
+    {
+        "BriefDescription": "Counts the number of cycles when any of the floating point dividers are active.",
+        "CounterMask": "1",
+        "EventCode": "0xcd",
+        "EventName": "ARITH.FPDIV_ACTIVE",
+        "SampleAfterValue": "1000003",
+        "UMask": "0x2",
+        "Unit": "cpu_atom"
+    },
     {
         "BriefDescription": "This event counts the cycles the floating point divider is busy.",
         "CounterMask": "1",
@@ -26,7 +35,7 @@
         "Unit": "cpu_core"
     },
     {
-        "BriefDescription": "FP_ARITH_DISPATCHED.PORT_0",
+        "BriefDescription": "FP_ARITH_DISPATCHED.PORT_0 [This event is alias to FP_ARITH_DISPATCHED.V0]",
         "EventCode": "0xb3",
         "EventName": "FP_ARITH_DISPATCHED.PORT_0",
         "SampleAfterValue": "2000003",
@@ -34,7 +43,7 @@
         "Unit": "cpu_core"
     },
     {
-        "BriefDescription": "FP_ARITH_DISPATCHED.PORT_1",
+        "BriefDescription": "FP_ARITH_DISPATCHED.PORT_1 [This event is alias to FP_ARITH_DISPATCHED.V1]",
         "EventCode": "0xb3",
         "EventName": "FP_ARITH_DISPATCHED.PORT_1",
         "SampleAfterValue": "2000003",
         "Unit": "cpu_core"
     },
     {
-        "BriefDescription": "FP_ARITH_DISPATCHED.PORT_5",
+        "BriefDescription": "FP_ARITH_DISPATCHED.PORT_5 [This event is alias to FP_ARITH_DISPATCHED.V2]",
         "EventCode": "0xb3",
         "EventName": "FP_ARITH_DISPATCHED.PORT_5",
         "SampleAfterValue": "2000003",
         "UMask": "0x4",
         "Unit": "cpu_core"
     },
+    {
+        "BriefDescription": "FP_ARITH_DISPATCHED.V0 [This event is alias to FP_ARITH_DISPATCHED.PORT_0]",
+        "EventCode": "0xb3",
+        "EventName": "FP_ARITH_DISPATCHED.V0",
+        "SampleAfterValue": "2000003",
+        "UMask": "0x1",
+        "Unit": "cpu_core"
+    },
+    {
+        "BriefDescription": "FP_ARITH_DISPATCHED.V1 [This event is alias to FP_ARITH_DISPATCHED.PORT_1]",
+        "EventCode": "0xb3",
+        "EventName": "FP_ARITH_DISPATCHED.V1",
+        "SampleAfterValue": "2000003",
+        "UMask": "0x2",
+        "Unit": "cpu_core"
+    },
+    {
+        "BriefDescription": "FP_ARITH_DISPATCHED.V2 [This event is alias to FP_ARITH_DISPATCHED.PORT_5]",
+        "EventCode": "0xb3",
+        "EventName": "FP_ARITH_DISPATCHED.V2",
+        "SampleAfterValue": "2000003",
+        "UMask": "0x4",
+        "Unit": "cpu_core"
+    },
     {
         "BriefDescription": "Counts number of SSE/AVX computational 128-bit packed double precision floating-point instructions retired; some instructions will count twice as noted below.  Each count represents 2 computation operations, one for each element.  Applies to SSE* and AVX* packed double precision floating-point instructions: ADD SUB HADD HSUB SUBADD MUL DIV MIN MAX SQRT DPP FM(N)ADD/SUB.  DPP and FM(N)ADD/SUB instructions count twice as they perform 2 calculations per element.",
         "EventCode": "0xc7",
         "UMask": "0xfc",
         "Unit": "cpu_core"
     },
+    {
+        "BriefDescription": "Counts the number of all types of floating point operations per uop with all default weighting",
+        "EventCode": "0xc8",
+        "EventName": "FP_FLOPS_RETIRED.ALL",
+        "PEBS": "1",
+        "SampleAfterValue": "1000003",
+        "UMask": "0x3",
+        "Unit": "cpu_atom"
+    },
+    {
+        "BriefDescription": "This event is deprecated. [This event is alias to FP_FLOPS_RETIRED.FP64]",
+        "Deprecated": "1",
+        "EventCode": "0xc8",
+        "EventName": "FP_FLOPS_RETIRED.DP",
+        "PEBS": "1",
+        "SampleAfterValue": "1000003",
+        "UMask": "0x1",
+        "Unit": "cpu_atom"
+    },
+    {
+        "BriefDescription": "Counts the number of floating point operations that produce 32 bit single precision results [This event is alias to FP_FLOPS_RETIRED.SP]",
+        "EventCode": "0xc8",
+        "EventName": "FP_FLOPS_RETIRED.FP32",
+        "PEBS": "1",
+        "SampleAfterValue": "1000003",
+        "UMask": "0x2",
+        "Unit": "cpu_atom"
+    },
+    {
+        "BriefDescription": "Counts the number of floating point operations that produce 64 bit double precision results [This event is alias to FP_FLOPS_RETIRED.DP]",
+        "EventCode": "0xc8",
+        "EventName": "FP_FLOPS_RETIRED.FP64",
+        "PEBS": "1",
+        "SampleAfterValue": "1000003",
+        "UMask": "0x1",
+        "Unit": "cpu_atom"
+    },
+    {
+        "BriefDescription": "This event is deprecated. [This event is alias to FP_FLOPS_RETIRED.FP32]",
+        "Deprecated": "1",
+        "EventCode": "0xc8",
+        "EventName": "FP_FLOPS_RETIRED.SP",
+        "PEBS": "1",
+        "SampleAfterValue": "1000003",
+        "UMask": "0x2",
+        "Unit": "cpu_atom"
+    },
     {
         "BriefDescription": "Counts the number of floating point operations retired that required microcode assist.",
         "EventCode": "0xc3",
index d55e792c0c43f3feabb9941d63201d80fc8e63af..7effc1f271e77bbba3b6bf9de0f7f83ee05d4f11 100644 (file)
@@ -7,6 +7,16 @@
         "UMask": "0x8",
         "Unit": "cpu_core"
     },
+    {
+        "BriefDescription": "This event is deprecated. [This event is alias to MISC_RETIRED.LBR_INSERTS]",
+        "Deprecated": "1",
+        "EventCode": "0xe4",
+        "EventName": "LBR_INSERTS.ANY",
+        "PEBS": "1",
+        "SampleAfterValue": "1000003",
+        "UMask": "0x1",
+        "Unit": "cpu_atom"
+    },
     {
         "BriefDescription": "Counts demand data reads that have any type of response.",
         "EventCode": "0x2A,0x2B",
index deaa7aba93f736188d7a4fc0fd1475659931d04b..24bbfcebd2bed295cebfc531f622e86ce05b34b8 100644 (file)
@@ -1,4 +1,13 @@
 [
+    {
+        "BriefDescription": "Counts the number of cycles when any of the dividers are active.",
+        "CounterMask": "1",
+        "EventCode": "0xcd",
+        "EventName": "ARITH.DIV_ACTIVE",
+        "SampleAfterValue": "1000003",
+        "UMask": "0x3",
+        "Unit": "cpu_atom"
+    },
     {
         "BriefDescription": "Cycles when divide unit is busy executing divide or square root operations.",
         "CounterMask": "1",
         "SampleAfterValue": "400009",
         "Unit": "cpu_core"
     },
+    {
+        "BriefDescription": "Counts the number of retired JCC (Jump on Conditional Code) branch instructions retired, includes both taken and not taken branches.",
+        "EventCode": "0xc4",
+        "EventName": "BR_INST_RETIRED.COND",
+        "PEBS": "1",
+        "SampleAfterValue": "200003",
+        "UMask": "0x7e",
+        "Unit": "cpu_atom"
+    },
     {
         "BriefDescription": "Conditional branch instructions retired.",
         "EventCode": "0xc4",
         "UMask": "0x10",
         "Unit": "cpu_core"
     },
+    {
+        "BriefDescription": "Counts the number of taken JCC (Jump on Conditional Code) branch instructions retired.",
+        "EventCode": "0xc4",
+        "EventName": "BR_INST_RETIRED.COND_TAKEN",
+        "PEBS": "1",
+        "SampleAfterValue": "200003",
+        "UMask": "0xfe",
+        "Unit": "cpu_atom"
+    },
     {
         "BriefDescription": "Taken conditional branch instructions retired.",
         "EventCode": "0xc4",
         "UMask": "0x40",
         "Unit": "cpu_core"
     },
+    {
+        "BriefDescription": "Counts the number of near indirect JMP and near indirect CALL branch instructions retired.",
+        "EventCode": "0xc4",
+        "EventName": "BR_INST_RETIRED.INDIRECT",
+        "PEBS": "1",
+        "SampleAfterValue": "200003",
+        "UMask": "0xeb",
+        "Unit": "cpu_atom"
+    },
     {
         "BriefDescription": "Indirect near branch instructions retired (excluding returns)",
         "EventCode": "0xc4",
         "UMask": "0x80",
         "Unit": "cpu_core"
     },
+    {
+        "BriefDescription": "Counts the number of near indirect CALL branch instructions retired.",
+        "EventCode": "0xc4",
+        "EventName": "BR_INST_RETIRED.INDIRECT_CALL",
+        "PEBS": "1",
+        "SampleAfterValue": "200003",
+        "UMask": "0xfb",
+        "Unit": "cpu_atom"
+    },
+    {
+        "BriefDescription": "This event is deprecated. Refer to new event BR_INST_RETIRED.INDIRECT_CALL",
+        "Deprecated": "1",
+        "EventCode": "0xc4",
+        "EventName": "BR_INST_RETIRED.IND_CALL",
+        "PEBS": "1",
+        "SampleAfterValue": "200003",
+        "UMask": "0xfb",
+        "Unit": "cpu_atom"
+    },
     {
         "BriefDescription": "Counts the number of near CALL branch instructions retired.",
         "EventCode": "0xc4",
         "UMask": "0x2",
         "Unit": "cpu_core"
     },
+    {
+        "BriefDescription": "Counts the number of near RET branch instructions retired.",
+        "EventCode": "0xc4",
+        "EventName": "BR_INST_RETIRED.NEAR_RETURN",
+        "PEBS": "1",
+        "SampleAfterValue": "200003",
+        "UMask": "0xf7",
+        "Unit": "cpu_atom"
+    },
     {
         "BriefDescription": "Return instructions retired.",
         "EventCode": "0xc4",
         "BriefDescription": "INST_RETIRED.MACRO_FUSED",
         "EventCode": "0xc0",
         "EventName": "INST_RETIRED.MACRO_FUSED",
+        "PEBS": "1",
         "SampleAfterValue": "2000003",
         "UMask": "0x10",
         "Unit": "cpu_core"
         "BriefDescription": "Retired NOP instructions.",
         "EventCode": "0xc0",
         "EventName": "INST_RETIRED.NOP",
+        "PEBS": "1",
         "PublicDescription": "Counts all retired NOP or ENDBR32/64 or PREFETCHIT0/1 instructions",
         "SampleAfterValue": "2000003",
         "UMask": "0x2",
         "BriefDescription": "Iterations of Repeat string retired instructions.",
         "EventCode": "0xc0",
         "EventName": "INST_RETIRED.REP_ITERATION",
+        "PEBS": "1",
         "PublicDescription": "Number of iterations of Repeat (REP) string retired instructions such as MOVS, CMPS, and SCAS. Each has a byte, word, and doubleword version and string instructions can be repeated using a repetition prefix, REP, that allows their architectural execution to be repeated a number of times as specified by the RCX register. Note the number of iterations is implementation-dependent.",
         "SampleAfterValue": "2000003",
         "UMask": "0x8",
         "UMask": "0x20",
         "Unit": "cpu_core"
     },
+    {
+        "BriefDescription": "Counts the number of Last Branch Record (LBR) entries. Requires LBRs to be enabled and configured in IA32_LBR_CTL. [This event is alias to LBR_INSERTS.ANY]",
+        "EventCode": "0xe4",
+        "EventName": "MISC_RETIRED.LBR_INSERTS",
+        "PEBS": "1",
+        "SampleAfterValue": "1000003",
+        "UMask": "0x1",
+        "Unit": "cpu_atom"
+    },
     {
         "BriefDescription": "Counts cycles where the pipeline is stalled due to serializing operations.",
         "EventCode": "0xa2",
index 056c2a885a32486253e935225b234fd92a6204c6..55798e64c58a302aa7cc0a40b03548c74118ef6e 100644 (file)
         "UMask": "0x4",
         "Unit": "cpu_core"
     },
+    {
+        "BriefDescription": "Counts the number of page walks completed due to load DTLB misses to a 4K page.",
+        "EventCode": "0x08",
+        "EventName": "DTLB_LOAD_MISSES.WALK_COMPLETED_4K",
+        "PublicDescription": "Counts the number of page walks completed due to loads (including SW prefetches) whose address translations missed in all Translation Lookaside Buffer (TLB) levels and were mapped to 4K pages. Includes page walks that page fault.",
+        "SampleAfterValue": "200003",
+        "UMask": "0x2",
+        "Unit": "cpu_atom"
+    },
     {
         "BriefDescription": "Page walks completed due to a demand data load to a 4K page.",
         "EventCode": "0x12",
         "UMask": "0x8",
         "Unit": "cpu_core"
     },
+    {
+        "BriefDescription": "Counts the number of page walks completed due to store DTLB misses to a 2M or 4M page.",
+        "EventCode": "0x49",
+        "EventName": "DTLB_STORE_MISSES.WALK_COMPLETED_2M_4M",
+        "PublicDescription": "Counts the number of page walks completed due to stores whose address translations missed in all Translation Lookaside Buffer (TLB) levels and were mapped to 2M or 4M pages.  Includes page walks that page fault.",
+        "SampleAfterValue": "2000003",
+        "UMask": "0x4",
+        "Unit": "cpu_atom"
+    },
     {
         "BriefDescription": "Page walks completed due to a demand data store to a 2M/4M page.",
         "EventCode": "0x13",
         "UMask": "0x4",
         "Unit": "cpu_core"
     },
+    {
+        "BriefDescription": "Counts the number of page walks completed due to store DTLB misses to a 4K page.",
+        "EventCode": "0x49",
+        "EventName": "DTLB_STORE_MISSES.WALK_COMPLETED_4K",
+        "PublicDescription": "Counts the number of page walks completed due to stores whose address translations missed in all Translation Lookaside Buffer (TLB) levels and were mapped to 4K pages.  Includes page walks that page fault.",
+        "SampleAfterValue": "2000003",
+        "UMask": "0x2",
+        "Unit": "cpu_atom"
+    },
     {
         "BriefDescription": "Page walks completed due to a demand data store to a 4K page.",
         "EventCode": "0x13",
         "UMask": "0x4",
         "Unit": "cpu_core"
     },
+    {
+        "BriefDescription": "Counts the number of page walks completed due to instruction fetch misses to a 4K page.",
+        "EventCode": "0x85",
+        "EventName": "ITLB_MISSES.WALK_COMPLETED_4K",
+        "PublicDescription": "Counts the number of page walks completed due to instruction fetches whose address translations missed in all Translation Lookaside Buffer (TLB) levels and were mapped to 4K pages.  Includes page walks that page fault.",
+        "SampleAfterValue": "2000003",
+        "UMask": "0x2",
+        "Unit": "cpu_atom"
+    },
     {
         "BriefDescription": "Code miss in all TLB levels causes a page walk that completes. (4K)",
         "EventCode": "0x11",
index e8d2ec1c029bf75c8f473e463712a89dc14d33d5..f847632205490dd914e562dfe9778f35ac6eac58 100644 (file)
         "BriefDescription": "Number of times an RTM execution aborted.",
         "EventCode": "0xc9",
         "EventName": "RTM_RETIRED.ABORTED",
+        "PEBS": "1",
         "PublicDescription": "Counts the number of times RTM abort was triggered.",
         "SampleAfterValue": "100003",
         "UMask": "0x4"
index a151ba9cccb07c557d450f97b46f47bf62503416..5452a1448ded3bc350ed1a265cb100924cd00d14 100644 (file)
@@ -2,10 +2,10 @@
     "Backend": "Grouping from Top-down Microarchitecture Analysis Metrics spreadsheet",
     "Bad": "Grouping from Top-down Microarchitecture Analysis Metrics spreadsheet",
     "BadSpec": "Grouping from Top-down Microarchitecture Analysis Metrics spreadsheet",
-    "BigFoot": "Grouping from Top-down Microarchitecture Analysis Metrics spreadsheet",
+    "BigFootprint": "Grouping from Top-down Microarchitecture Analysis Metrics spreadsheet",
     "BrMispredicts": "Grouping from Top-down Microarchitecture Analysis Metrics spreadsheet",
     "Branches": "Grouping from Top-down Microarchitecture Analysis Metrics spreadsheet",
-    "CacheMisses": "Grouping from Top-down Microarchitecture Analysis Metrics spreadsheet",
+    "CacheHits": "Grouping from Top-down Microarchitecture Analysis Metrics spreadsheet",
     "CodeGen": "Grouping from Top-down Microarchitecture Analysis Metrics spreadsheet",
     "Compute": "Grouping from Top-down Microarchitecture Analysis Metrics spreadsheet",
     "Cor": "Grouping from Top-down Microarchitecture Analysis Metrics spreadsheet",
@@ -25,7 +25,9 @@
     "L2Evicts": "Grouping from Top-down Microarchitecture Analysis Metrics spreadsheet",
     "LSD": "Grouping from Top-down Microarchitecture Analysis Metrics spreadsheet",
     "MachineClears": "Grouping from Top-down Microarchitecture Analysis Metrics spreadsheet",
+    "Machine_Clears": "Grouping from Top-down Microarchitecture Analysis Metrics spreadsheet",
     "Mem": "Grouping from Top-down Microarchitecture Analysis Metrics spreadsheet",
+    "MemOffcore": "Grouping from Top-down Microarchitecture Analysis Metrics spreadsheet",
     "MemoryBW": "Grouping from Top-down Microarchitecture Analysis Metrics spreadsheet",
     "MemoryBound": "Grouping from Top-down Microarchitecture Analysis Metrics spreadsheet",
     "MemoryLat": "Grouping from Top-down Microarchitecture Analysis Metrics spreadsheet",
     "tma_L5_group": "Metrics for top-down breakdown at level 5",
     "tma_L6_group": "Metrics for top-down breakdown at level 6",
     "tma_alu_op_utilization_group": "Metrics contributing to tma_alu_op_utilization category",
+    "tma_assists_group": "Metrics contributing to tma_assists category",
     "tma_backend_bound_group": "Metrics contributing to tma_backend_bound category",
     "tma_bad_speculation_group": "Metrics contributing to tma_bad_speculation category",
+    "tma_branch_mispredicts_group": "Metrics contributing to tma_branch_mispredicts category",
     "tma_branch_resteers_group": "Metrics contributing to tma_branch_resteers category",
     "tma_core_bound_group": "Metrics contributing to tma_core_bound category",
     "tma_dram_bound_group": "Metrics contributing to tma_dram_bound category",
@@ -77,9 +81,9 @@
     "tma_frontend_bound_group": "Metrics contributing to tma_frontend_bound category",
     "tma_heavy_operations_group": "Metrics contributing to tma_heavy_operations category",
     "tma_issue2P": "Metrics related by the issue $issue2P",
-    "tma_issueBC": "Metrics related by the issue $issueBC",
     "tma_issueBM": "Metrics related by the issue $issueBM",
     "tma_issueBW": "Metrics related by the issue $issueBW",
+    "tma_issueComp": "Metrics related by the issue $issueComp",
     "tma_issueD0": "Metrics related by the issue $issueD0",
     "tma_issueFB": "Metrics related by the issue $issueFB",
     "tma_issueFL": "Metrics related by the issue $issueFL",
     "tma_l3_bound_group": "Metrics contributing to tma_l3_bound category",
     "tma_light_operations_group": "Metrics contributing to tma_light_operations category",
     "tma_load_op_utilization_group": "Metrics contributing to tma_load_op_utilization category",
+    "tma_machine_clears_group": "Metrics contributing to tma_machine_clears category",
     "tma_mem_latency_group": "Metrics contributing to tma_mem_latency category",
     "tma_memory_bound_group": "Metrics contributing to tma_memory_bound category",
     "tma_microcode_sequencer_group": "Metrics contributing to tma_microcode_sequencer category",
     "tma_mite_group": "Metrics contributing to tma_mite category",
+    "tma_other_light_ops_group": "Metrics contributing to tma_other_light_ops category",
     "tma_ports_utilization_group": "Metrics contributing to tma_ports_utilization category",
     "tma_ports_utilized_0_group": "Metrics contributing to tma_ports_utilized_0 category",
     "tma_ports_utilized_3m_group": "Metrics contributing to tma_ports_utilized_3m category",
index cfb590632918f3d61adc6b445a886f80ee18b26a..4fdc87339555af0a15ba87e0db31681de1aa964d 100644 (file)
@@ -19,7 +19,7 @@
         "BriefDescription": "Core cycles where the core was running in a manner where Turbo may be clipped to the AVX512 turbo schedule.",
         "EventCode": "0x28",
         "EventName": "CORE_POWER.LVL2_TURBO_LICENSE",
-        "PublicDescription": "Core cycles where the core was running with power-delivery for license level 2 (introduced in Skylake Server microarchtecture).  This includes high current AVX 512-bit instructions.",
+        "PublicDescription": "Core cycles where the core was running with power-delivery for license level 2 (introduced in Skylake Server microarchitecture).  This includes high current AVX 512-bit instructions.",
         "SampleAfterValue": "200003",
         "UMask": "0x20"
     },
index 375b78044f144da68d0e56f9addc2d557c1a01ff..c7313fd4fdf4a5c0a95126904426b1df9b39105c 100644 (file)
         "BriefDescription": "Cycles when Reservation Station (RS) is empty for the thread",
         "EventCode": "0x5e",
         "EventName": "RS_EVENTS.EMPTY_CYCLES",
-        "PublicDescription": "Counts cycles during which the reservation station (RS) is empty for this logical processor. This is usually caused when the front-end pipeline runs into stravation periods (e.g. branch mispredictions or i-cache misses)",
+        "PublicDescription": "Counts cycles during which the reservation station (RS) is empty for this logical processor. This is usually caused when the front-end pipeline runs into starvation periods (e.g. branch mispredictions or i-cache misses)",
         "SampleAfterValue": "1000003",
         "UMask": "0x1"
     },
         "SampleAfterValue": "10000003",
         "UMask": "0x2"
     },
-    {
-        "BriefDescription": "TMA slots wasted due to incorrect speculation by branch mispredictions",
-        "EventCode": "0xa4",
-        "EventName": "TOPDOWN.BR_MISPREDICT_SLOTS",
-        "PublicDescription": "Number of TMA slots that were wasted due to incorrect speculation by branch mispredictions. This event estimates number of operations that were issued but not retired from the speculative path as well as the out-of-order engine recovery past a branch misprediction.",
-        "SampleAfterValue": "10000003",
-        "UMask": "0x8"
-    },
     {
         "BriefDescription": "TMA slots available for an unhalted logical processor. Fixed counter - architectural event",
         "EventName": "TOPDOWN.SLOTS",
index 27433fc15ede77b2de29677fdef4cdd4ce2b7a77..1dad462e58b16ea824276150f65b06821fffa75a 100644 (file)
         "MetricExpr": "(UOPS_DISPATCHED.PORT_0 + UOPS_DISPATCHED.PORT_1 + UOPS_DISPATCHED.PORT_5 + UOPS_DISPATCHED.PORT_6) / (4 * tma_info_core_core_clks)",
         "MetricGroup": "TopdownL5;tma_L5_group;tma_ports_utilized_3m_group",
         "MetricName": "tma_alu_op_utilization",
-        "MetricThreshold": "tma_alu_op_utilization > 0.6",
+        "MetricThreshold": "tma_alu_op_utilization > 0.4",
         "ScaleUnit": "100%"
     },
     {
         "BriefDescription": "This metric estimates fraction of slots the CPU retired uops delivered by the Microcode_Sequencer as a result of Assists",
-        "MetricExpr": "100 * ASSISTS.ANY / tma_info_thread_slots",
+        "MetricExpr": "34 * ASSISTS.ANY / tma_info_thread_slots",
         "MetricGroup": "TopdownL4;tma_L4_group;tma_microcode_sequencer_group",
         "MetricName": "tma_assists",
         "MetricThreshold": "tma_assists > 0.1 & (tma_microcode_sequencer > 0.05 & tma_heavy_operations > 0.1)",
     {
         "BriefDescription": "This category represents fraction of slots where no uops are being delivered due to a lack of required resources for accepting new uops in the Backend",
         "DefaultMetricgroupName": "TopdownL1",
-        "MetricExpr": "topdown\\-be\\-bound / (topdown\\-fe\\-bound + topdown\\-bad\\-spec + topdown\\-retiring + topdown\\-be\\-bound) + 5 * cpu@INT_MISC.RECOVERY_CYCLES\\,cmask\\=1\\,edge@ / tma_info_thread_slots",
+        "MetricExpr": "topdown\\-be\\-bound / (topdown\\-fe\\-bound + topdown\\-bad\\-spec + topdown\\-retiring + topdown\\-be\\-bound) + 5 * INT_MISC.CLEARS_COUNT / tma_info_thread_slots",
         "MetricGroup": "Default;TmaL1;TopdownL1;tma_L1_group",
         "MetricName": "tma_backend_bound",
         "MetricThreshold": "tma_backend_bound > 0.2",
     {
         "BriefDescription": "This metric represents fraction of slots where the CPU was retiring branch instructions.",
         "MetricExpr": "tma_light_operations * BR_INST_RETIRED.ALL_BRANCHES / (tma_retiring * tma_info_thread_slots)",
-        "MetricGroup": "Pipeline;TopdownL3;tma_L3_group;tma_light_operations_group",
+        "MetricGroup": "Branches;Pipeline;TopdownL3;tma_L3_group;tma_light_operations_group",
         "MetricName": "tma_branch_instructions",
         "MetricThreshold": "tma_branch_instructions > 0.1 & tma_light_operations > 0.6",
         "ScaleUnit": "100%"
     {
         "BriefDescription": "This metric estimates fraction of cycles while the memory subsystem was handling synchronizations due to contested accesses",
         "MetricConstraint": "NO_GROUP_EVENTS",
-        "MetricExpr": "(29 * tma_info_system_average_frequency * MEM_LOAD_L3_HIT_RETIRED.XSNP_HITM + 23.5 * tma_info_system_average_frequency * MEM_LOAD_L3_HIT_RETIRED.XSNP_MISS) * (1 + MEM_LOAD_RETIRED.FB_HIT / MEM_LOAD_RETIRED.L1_MISS / 2) / tma_info_thread_clks",
+        "MetricExpr": "(29 * tma_info_system_core_frequency * MEM_LOAD_L3_HIT_RETIRED.XSNP_HITM + 23.5 * tma_info_system_core_frequency * MEM_LOAD_L3_HIT_RETIRED.XSNP_MISS) * (1 + MEM_LOAD_RETIRED.FB_HIT / MEM_LOAD_RETIRED.L1_MISS / 2) / tma_info_thread_clks",
         "MetricGroup": "DataSharing;Offcore;Snoop;TopdownL4;tma_L4_group;tma_issueSyncxn;tma_l3_bound_group",
         "MetricName": "tma_contested_accesses",
         "MetricThreshold": "tma_contested_accesses > 0.05 & (tma_l3_bound > 0.05 & (tma_memory_bound > 0.2 & tma_backend_bound > 0.2))",
     {
         "BriefDescription": "This metric estimates fraction of cycles while the memory subsystem was handling synchronizations due to data-sharing accesses",
         "MetricConstraint": "NO_GROUP_EVENTS",
-        "MetricExpr": "23.5 * tma_info_system_average_frequency * MEM_LOAD_L3_HIT_RETIRED.XSNP_HIT * (1 + MEM_LOAD_RETIRED.FB_HIT / MEM_LOAD_RETIRED.L1_MISS / 2) / tma_info_thread_clks",
+        "MetricExpr": "23.5 * tma_info_system_core_frequency * MEM_LOAD_L3_HIT_RETIRED.XSNP_HIT * (1 + MEM_LOAD_RETIRED.FB_HIT / MEM_LOAD_RETIRED.L1_MISS / 2) / tma_info_thread_clks",
         "MetricGroup": "Offcore;Snoop;TopdownL4;tma_L4_group;tma_issueSyncxn;tma_l3_bound_group",
         "MetricName": "tma_data_sharing",
         "MetricThreshold": "tma_data_sharing > 0.05 & (tma_l3_bound > 0.05 & (tma_memory_bound > 0.2 & tma_backend_bound > 0.2))",
         "MetricExpr": "(cpu@INST_DECODED.DECODERS\\,cmask\\=1@ - cpu@INST_DECODED.DECODERS\\,cmask\\=2@) / tma_info_core_core_clks / 2",
         "MetricGroup": "DSBmiss;FetchBW;TopdownL4;tma_L4_group;tma_issueD0;tma_mite_group",
         "MetricName": "tma_decoder0_alone",
-        "MetricThreshold": "tma_decoder0_alone > 0.1 & (tma_mite > 0.1 & (tma_fetch_bandwidth > 0.1 & tma_frontend_bound > 0.15 & tma_info_thread_ipc / 5 > 0.35))",
+        "MetricThreshold": "tma_decoder0_alone > 0.1 & (tma_mite > 0.1 & tma_fetch_bandwidth > 0.2)",
         "PublicDescription": "This metric represents fraction of cycles where decoder-0 was the only active decoder. Related metrics: tma_few_uops_instructions",
         "ScaleUnit": "100%"
     },
         "MetricExpr": "(IDQ.DSB_CYCLES_ANY - IDQ.DSB_CYCLES_OK) / tma_info_core_core_clks / 2",
         "MetricGroup": "DSB;FetchBW;TopdownL3;tma_L3_group;tma_fetch_bandwidth_group",
         "MetricName": "tma_dsb",
-        "MetricThreshold": "tma_dsb > 0.15 & (tma_fetch_bandwidth > 0.1 & tma_frontend_bound > 0.15 & tma_info_thread_ipc / 5 > 0.35)",
+        "MetricThreshold": "tma_dsb > 0.15 & tma_fetch_bandwidth > 0.2",
         "PublicDescription": "This metric represents Core fraction of cycles in which CPU was likely limited due to DSB (decoded uop cache) fetch pipeline.  For example; inefficient utilization of the DSB cache structure or bank conflict when reading from it; are categorized here.",
         "ScaleUnit": "100%"
     },
         "MetricGroup": "MemoryTLB;TopdownL4;tma_L4_group;tma_issueTLB;tma_l1_bound_group",
         "MetricName": "tma_dtlb_load",
         "MetricThreshold": "tma_dtlb_load > 0.1 & (tma_l1_bound > 0.1 & (tma_memory_bound > 0.2 & tma_backend_bound > 0.2))",
-        "PublicDescription": "This metric roughly estimates the fraction of cycles where the Data TLB (DTLB) was missed by load accesses. TLBs (Translation Look-aside Buffers) are processor caches for recently used entries out of the Page Tables that are used to map virtual- to physical-addresses by the operating system. This metric approximates the potential delay of demand loads missing the first-level data TLB (assuming worst case scenario with back to back misses to different pages). This includes hitting in the second-level TLB (STLB) as well as performing a hardware page walk on an STLB miss. Sample with: MEM_INST_RETIRED.STLB_MISS_LOADS_PS. Related metrics: tma_dtlb_store, tma_info_bottleneck_memory_data_tlbs",
+        "PublicDescription": "This metric roughly estimates the fraction of cycles where the Data TLB (DTLB) was missed by load accesses. TLBs (Translation Look-aside Buffers) are processor caches for recently used entries out of the Page Tables that are used to map virtual- to physical-addresses by the operating system. This metric approximates the potential delay of demand loads missing the first-level data TLB (assuming worst case scenario with back to back misses to different pages). This includes hitting in the second-level TLB (STLB) as well as performing a hardware page walk on an STLB miss. Sample with: MEM_INST_RETIRED.STLB_MISS_LOADS_PS. Related metrics: tma_dtlb_store, tma_info_bottleneck_memory_data_tlbs, tma_info_bottleneck_memory_synchronization",
         "ScaleUnit": "100%"
     },
     {
         "MetricGroup": "MemoryTLB;TopdownL4;tma_L4_group;tma_issueTLB;tma_store_bound_group",
         "MetricName": "tma_dtlb_store",
         "MetricThreshold": "tma_dtlb_store > 0.05 & (tma_store_bound > 0.2 & (tma_memory_bound > 0.2 & tma_backend_bound > 0.2))",
-        "PublicDescription": "This metric roughly estimates the fraction of cycles spent handling first-level data TLB store misses.  As with ordinary data caching; focus on improving data locality and reducing working-set size to reduce DTLB overhead.  Additionally; consider using profile-guided optimization (PGO) to collocate frequently-used data on the same page.  Try using larger page sizes for large amounts of frequently-used data. Sample with: MEM_INST_RETIRED.STLB_MISS_STORES_PS. Related metrics: tma_dtlb_load, tma_info_bottleneck_memory_data_tlbs",
+        "PublicDescription": "This metric roughly estimates the fraction of cycles spent handling first-level data TLB store misses.  As with ordinary data caching; focus on improving data locality and reducing working-set size to reduce DTLB overhead.  Additionally; consider using profile-guided optimization (PGO) to collocate frequently-used data on the same page.  Try using larger page sizes for large amounts of frequently-used data. Sample with: MEM_INST_RETIRED.STLB_MISS_STORES_PS. Related metrics: tma_dtlb_load, tma_info_bottleneck_memory_data_tlbs, tma_info_bottleneck_memory_synchronization",
         "ScaleUnit": "100%"
     },
     {
         "BriefDescription": "This metric roughly estimates how often CPU was handling synchronizations due to False Sharing",
-        "MetricExpr": "32.5 * tma_info_system_average_frequency * OCR.DEMAND_RFO.L3_HIT.SNOOP_HITM / tma_info_thread_clks",
+        "MetricExpr": "32.5 * tma_info_system_core_frequency * OCR.DEMAND_RFO.L3_HIT.SNOOP_HITM / tma_info_thread_clks",
         "MetricGroup": "DataSharing;Offcore;Snoop;TopdownL4;tma_L4_group;tma_issueSyncxn;tma_store_bound_group",
         "MetricName": "tma_false_sharing",
         "MetricThreshold": "tma_false_sharing > 0.05 & (tma_store_bound > 0.2 & (tma_memory_bound > 0.2 & tma_backend_bound > 0.2))",
         "MetricGroup": "MemoryBW;TopdownL4;tma_L4_group;tma_issueBW;tma_issueSL;tma_issueSmSt;tma_l1_bound_group",
         "MetricName": "tma_fb_full",
         "MetricThreshold": "tma_fb_full > 0.3",
-        "PublicDescription": "This metric does a *rough estimation* of how often L1D Fill Buffer unavailability limited additional L1D miss memory access requests to proceed. The higher the metric value; the deeper the memory hierarchy level the misses are satisfied from (metric values >1 are valid). Often it hints on approaching bandwidth limits (to L2 cache; L3 cache or external memory). Related metrics: tma_info_bottleneck_memory_bandwidth, tma_info_system_dram_bw_use, tma_mem_bandwidth, tma_sq_full, tma_store_latency, tma_streaming_stores",
+        "PublicDescription": "This metric does a *rough estimation* of how often L1D Fill Buffer unavailability limited additional L1D miss memory access requests to proceed. The higher the metric value; the deeper the memory hierarchy level the misses are satisfied from (metric values >1 are valid). Often it hints on approaching bandwidth limits (to L2 cache; L3 cache or external memory). Related metrics: tma_info_bottleneck_cache_memory_bandwidth, tma_info_system_dram_bw_use, tma_mem_bandwidth, tma_sq_full, tma_store_latency, tma_streaming_stores",
         "ScaleUnit": "100%"
     },
     {
         "MetricExpr": "max(0, tma_frontend_bound - tma_fetch_latency)",
         "MetricGroup": "FetchBW;Frontend;TmaL2;TopdownL2;tma_L2_group;tma_frontend_bound_group;tma_issueFB",
         "MetricName": "tma_fetch_bandwidth",
-        "MetricThreshold": "tma_fetch_bandwidth > 0.1 & tma_frontend_bound > 0.15 & tma_info_thread_ipc / 5 > 0.35",
+        "MetricThreshold": "tma_fetch_bandwidth > 0.2",
         "MetricgroupNoGroup": "TopdownL2",
         "PublicDescription": "This metric represents fraction of slots the CPU was stalled due to Frontend bandwidth issues.  For example; inefficiencies at the instruction decoders; or restrictions for caching in the DSB (decoded uops cache) are categorized under Fetch Bandwidth. In such cases; the Frontend typically delivers suboptimal amount of uops to the Backend. Sample with: FRONTEND_RETIRED.LATENCY_GE_2_BUBBLES_GE_1_PS;FRONTEND_RETIRED.LATENCY_GE_1_PS;FRONTEND_RETIRED.LATENCY_GE_2_PS. Related metrics: tma_dsb_switches, tma_info_botlnk_l2_dsb_misses, tma_info_frontend_dsb_coverage, tma_info_inst_mix_iptb, tma_lcp",
         "ScaleUnit": "100%"
         "PublicDescription": "This metric represents overall arithmetic floating-point (FP) operations fraction the CPU has executed (retired). Note this metric's value may exceed its parent due to use of \"Uops\" CountDomain and FMA double-counting.",
         "ScaleUnit": "100%"
     },
+    {
+        "BriefDescription": "This metric roughly estimates fraction of slots the CPU retired uops as a result of handing Floating Point (FP) Assists",
+        "MetricExpr": "34 * ASSISTS.FP / tma_info_thread_slots",
+        "MetricGroup": "HPC;TopdownL5;tma_L5_group;tma_assists_group",
+        "MetricName": "tma_fp_assists",
+        "MetricThreshold": "tma_fp_assists > 0.1",
+        "PublicDescription": "This metric roughly estimates fraction of slots the CPU retired uops as a result of handing Floating Point (FP) Assists. FP Assist may apply when working with very small floating point values (so-called Denormals).",
+        "ScaleUnit": "100%"
+    },
     {
         "BriefDescription": "This metric approximates arithmetic floating-point (FP) scalar uops fraction the CPU has retired",
         "MetricExpr": "cpu@FP_ARITH_INST_RETIRED.SCALAR_SINGLE\\,umask\\=0x03@ / (tma_retiring * tma_info_thread_slots)",
         "MetricName": "tma_heavy_operations",
         "MetricThreshold": "tma_heavy_operations > 0.1",
         "MetricgroupNoGroup": "TopdownL2",
-        "PublicDescription": "This metric represents fraction of slots where the CPU was retiring heavy-weight operations -- instructions that require two or more uops or micro-coded sequences. This highly-correlates with the uop length of these instructions/sequences.",
+        "PublicDescription": "This metric represents fraction of slots where the CPU was retiring heavy-weight operations -- instructions that require two or more uops or micro-coded sequences. This highly-correlates with the uop length of these instructions/sequences. ([ICL+] Note this may overcount due to approximation using indirect events; [ADL+] .)",
         "ScaleUnit": "100%"
     },
     {
         "BriefDescription": "This metric represents fraction of cycles the CPU was stalled due to instruction cache misses",
-        "MetricExpr": "ICACHE_16B.IFDATA_STALL / tma_info_thread_clks",
-        "MetricGroup": "BigFoot;FetchLat;IcMiss;TopdownL3;tma_L3_group;tma_fetch_latency_group",
+        "MetricExpr": "ICACHE_DATA.STALLS / tma_info_thread_clks",
+        "MetricGroup": "BigFootprint;FetchLat;IcMiss;TopdownL3;tma_L3_group;tma_fetch_latency_group",
         "MetricName": "tma_icache_misses",
         "MetricThreshold": "tma_icache_misses > 0.05 & (tma_fetch_latency > 0.1 & tma_frontend_bound > 0.15)",
         "PublicDescription": "This metric represents fraction of cycles the CPU was stalled due to instruction cache misses. Sample with: FRONTEND_RETIRED.L2_MISS_PS;FRONTEND_RETIRED.L1I_MISS_PS",
     {
         "BriefDescription": "Branch Misprediction Cost: Fraction of TMA slots wasted per non-speculative branch misprediction (retired JEClear)",
         "MetricConstraint": "NO_GROUP_EVENTS",
-        "MetricExpr": "(tma_branch_mispredicts + tma_fetch_latency * tma_mispredicts_resteers / (tma_branch_resteers + tma_dsb_switches + tma_icache_misses + tma_itlb_misses + tma_lcp + tma_ms_switches)) * tma_info_thread_slots / BR_MISP_RETIRED.ALL_BRANCHES",
+        "MetricExpr": "tma_info_bottleneck_mispredictions * tma_info_thread_slots / BR_MISP_RETIRED.ALL_BRANCHES / 100",
         "MetricGroup": "Bad;BrMispredicts;tma_issueBM",
         "MetricName": "tma_info_bad_spec_branch_misprediction_cost",
         "PublicDescription": "Branch Misprediction Cost: Fraction of TMA slots wasted per non-speculative branch misprediction (retired JEClear). Related metrics: tma_branch_mispredicts, tma_info_bottleneck_mispredictions, tma_mispredicts_resteers"
         "MetricName": "tma_info_bad_spec_ipmispredict",
         "MetricThreshold": "tma_info_bad_spec_ipmispredict < 200"
     },
+    {
+        "BriefDescription": "Speculative to Retired ratio of all clears (covering mispredicts and nukes)",
+        "MetricExpr": "INT_MISC.CLEARS_COUNT / (BR_MISP_RETIRED.ALL_BRANCHES + MACHINE_CLEARS.COUNT)",
+        "MetricGroup": "BrMispredicts",
+        "MetricName": "tma_info_bad_spec_spec_clears_ratio"
+    },
     {
         "BriefDescription": "Probability of Core Bound bottleneck hidden by SMT-profiling artifacts",
         "MetricConstraint": "NO_GROUP_EVENTS",
         "MetricThreshold": "tma_info_botlnk_l2_ic_misses > 5",
         "PublicDescription": "Total pipeline cost of Instruction Cache misses - subset of the Big_Code Bottleneck. Related metrics: "
     },
+    {
+        "BriefDescription": "Total pipeline cost of \"useful operations\" - the baseline operations not covered by Branching_Overhead nor Irregular_Overhead.",
+        "MetricExpr": "100 * (tma_retiring - (BR_INST_RETIRED.ALL_BRANCHES + BR_INST_RETIRED.NEAR_CALL) / tma_info_thread_slots - tma_microcode_sequencer / (tma_few_uops_instructions + tma_microcode_sequencer) * (tma_assists / tma_microcode_sequencer) * tma_heavy_operations)",
+        "MetricGroup": "Ret",
+        "MetricName": "tma_info_bottleneck_base_non_br",
+        "MetricThreshold": "tma_info_bottleneck_base_non_br > 20"
+    },
     {
         "BriefDescription": "Total pipeline cost of instruction fetch related bottlenecks by large code footprint programs (i-side cache; TLB and BTB misses)",
         "MetricConstraint": "NO_GROUP_EVENTS",
         "MetricExpr": "100 * tma_fetch_latency * (tma_itlb_misses + tma_icache_misses + tma_unknown_branches) / (tma_branch_resteers + tma_dsb_switches + tma_icache_misses + tma_itlb_misses + tma_lcp + tma_ms_switches)",
-        "MetricGroup": "BigFoot;Fed;Frontend;IcMiss;MemoryTLB;tma_issueBC",
+        "MetricGroup": "BigFootprint;Fed;Frontend;IcMiss;MemoryTLB",
         "MetricName": "tma_info_bottleneck_big_code",
-        "MetricThreshold": "tma_info_bottleneck_big_code > 20",
-        "PublicDescription": "Total pipeline cost of instruction fetch related bottlenecks by large code footprint programs (i-side cache; TLB and BTB misses). Related metrics: tma_info_bottleneck_branching_overhead"
+        "MetricThreshold": "tma_info_bottleneck_big_code > 20"
     },
     {
         "BriefDescription": "Total pipeline cost of branch related instructions (used for program control-flow including function calls)",
-        "MetricExpr": "100 * ((BR_INST_RETIRED.COND + 3 * BR_INST_RETIRED.NEAR_CALL + (BR_INST_RETIRED.NEAR_TAKEN - BR_INST_RETIRED.COND_TAKEN - 2 * BR_INST_RETIRED.NEAR_CALL)) / tma_info_thread_slots)",
-        "MetricGroup": "Ret;tma_issueBC",
+        "MetricExpr": "100 * ((BR_INST_RETIRED.ALL_BRANCHES + BR_INST_RETIRED.NEAR_CALL) / tma_info_thread_slots)",
+        "MetricGroup": "Ret",
         "MetricName": "tma_info_bottleneck_branching_overhead",
-        "MetricThreshold": "tma_info_bottleneck_branching_overhead > 10",
-        "PublicDescription": "Total pipeline cost of branch related instructions (used for program control-flow including function calls). Related metrics: tma_info_bottleneck_big_code"
+        "MetricThreshold": "tma_info_bottleneck_branching_overhead > 5"
+    },
+    {
+        "BriefDescription": "Total pipeline cost of external Memory- or Cache-Bandwidth related bottlenecks",
+        "MetricExpr": "100 * (tma_memory_bound * (tma_dram_bound / (tma_dram_bound + tma_l1_bound + tma_l2_bound + tma_l3_bound + tma_store_bound)) * (tma_mem_bandwidth / (tma_mem_bandwidth + tma_mem_latency)) + tma_memory_bound * (tma_l3_bound / (tma_dram_bound + tma_l1_bound + tma_l2_bound + tma_l3_bound + tma_store_bound)) * (tma_sq_full / (tma_contested_accesses + tma_data_sharing + tma_l3_hit_latency + tma_sq_full)) + tma_memory_bound * (tma_l1_bound / (tma_dram_bound + tma_l1_bound + tma_l2_bound + tma_l3_bound + tma_store_bound)) * (tma_fb_full / (tma_4k_aliasing + tma_dtlb_load + tma_fb_full + tma_lock_latency + tma_split_loads + tma_store_fwd_blk)))",
+        "MetricGroup": "Mem;MemoryBW;Offcore;tma_issueBW",
+        "MetricName": "tma_info_bottleneck_cache_memory_bandwidth",
+        "MetricThreshold": "tma_info_bottleneck_cache_memory_bandwidth > 20",
+        "PublicDescription": "Total pipeline cost of external Memory- or Cache-Bandwidth related bottlenecks. Related metrics: tma_fb_full, tma_info_system_dram_bw_use, tma_mem_bandwidth, tma_sq_full"
+    },
+    {
+        "BriefDescription": "Total pipeline cost of external Memory- or Cache-Latency related bottlenecks",
+        "MetricExpr": "100 * (tma_memory_bound * (tma_dram_bound / (tma_dram_bound + tma_l1_bound + tma_l2_bound + tma_l3_bound + tma_store_bound)) * (tma_mem_latency / (tma_mem_bandwidth + tma_mem_latency)) + tma_memory_bound * (tma_l3_bound / (tma_dram_bound + tma_l1_bound + tma_l2_bound + tma_l3_bound + tma_store_bound)) * (tma_l3_hit_latency / (tma_contested_accesses + tma_data_sharing + tma_l3_hit_latency + tma_sq_full)) + tma_memory_bound * tma_l2_bound / (tma_dram_bound + tma_l1_bound + tma_l2_bound + tma_l3_bound + tma_store_bound) + tma_memory_bound * (tma_store_bound / (tma_dram_bound + tma_l1_bound + tma_l2_bound + tma_l3_bound + tma_store_bound)) * (tma_store_latency / (tma_dtlb_store + tma_false_sharing + tma_split_stores + tma_store_latency + tma_streaming_stores)))",
+        "MetricGroup": "Mem;MemoryLat;Offcore;tma_issueLat",
+        "MetricName": "tma_info_bottleneck_cache_memory_latency",
+        "MetricThreshold": "tma_info_bottleneck_cache_memory_latency > 20",
+        "PublicDescription": "Total pipeline cost of external Memory- or Cache-Latency related bottlenecks. Related metrics: tma_l3_hit_latency, tma_mem_latency"
+    },
+    {
+        "BriefDescription": "Total pipeline cost when the execution is compute-bound - an estimation",
+        "MetricExpr": "100 * (tma_core_bound * tma_divider / (tma_divider + tma_ports_utilization + tma_serializing_operation) + tma_core_bound * (tma_ports_utilization / (tma_divider + tma_ports_utilization + tma_serializing_operation)) * (tma_ports_utilized_3m / (tma_ports_utilized_0 + tma_ports_utilized_1 + tma_ports_utilized_2 + tma_ports_utilized_3m)))",
+        "MetricGroup": "Cor;tma_issueComp",
+        "MetricName": "tma_info_bottleneck_compute_bound_est",
+        "MetricThreshold": "tma_info_bottleneck_compute_bound_est > 20",
+        "PublicDescription": "Total pipeline cost when the execution is compute-bound - an estimation. Covers Core Bound when High ILP as well as when long-latency execution units are busy. Related metrics: "
     },
     {
         "BriefDescription": "Total pipeline cost of instruction fetch bandwidth related bottlenecks",
         "MetricConstraint": "NO_GROUP_EVENTS",
-        "MetricExpr": "100 * (tma_frontend_bound - tma_fetch_latency * tma_mispredicts_resteers / (tma_branch_resteers + tma_dsb_switches + tma_icache_misses + tma_itlb_misses + tma_lcp + tma_ms_switches)) - tma_info_bottleneck_big_code",
+        "MetricExpr": "100 * (tma_frontend_bound - (1 - 10 * tma_microcode_sequencer * tma_other_mispredicts / tma_branch_mispredicts) * tma_fetch_latency * tma_mispredicts_resteers / (tma_branch_resteers + tma_dsb_switches + tma_icache_misses + tma_itlb_misses + tma_lcp + tma_ms_switches) - tma_microcode_sequencer / (tma_few_uops_instructions + tma_microcode_sequencer) * (tma_assists / tma_microcode_sequencer) * tma_fetch_latency * (tma_ms_switches + tma_branch_resteers * (tma_clears_resteers + tma_mispredicts_resteers * (10 * tma_microcode_sequencer * tma_other_mispredicts / tma_branch_mispredicts)) / (tma_clears_resteers + tma_mispredicts_resteers + tma_unknown_branches)) / (tma_branch_resteers + tma_dsb_switches + tma_icache_misses + tma_itlb_misses + tma_lcp + tma_ms_switches)) - tma_info_bottleneck_big_code",
         "MetricGroup": "Fed;FetchBW;Frontend",
         "MetricName": "tma_info_bottleneck_instruction_fetch_bw",
         "MetricThreshold": "tma_info_bottleneck_instruction_fetch_bw > 20"
     },
     {
-        "BriefDescription": "Total pipeline cost of (external) Memory Bandwidth related bottlenecks",
-        "MetricConstraint": "NO_GROUP_EVENTS",
-        "MetricExpr": "100 * tma_memory_bound * (tma_dram_bound / (tma_dram_bound + tma_l1_bound + tma_l2_bound + tma_l3_bound + tma_store_bound) * (tma_mem_bandwidth / (tma_mem_bandwidth + tma_mem_latency)) + tma_l3_bound / (tma_dram_bound + tma_l1_bound + tma_l2_bound + tma_l3_bound + tma_store_bound) * (tma_sq_full / (tma_contested_accesses + tma_data_sharing + tma_l3_hit_latency + tma_sq_full))) + tma_l1_bound / (tma_dram_bound + tma_l1_bound + tma_l2_bound + tma_l3_bound + tma_store_bound) * (tma_fb_full / (tma_4k_aliasing + tma_dtlb_load + tma_fb_full + tma_lock_latency + tma_split_loads + tma_store_fwd_blk))",
-        "MetricGroup": "Mem;MemoryBW;Offcore;tma_issueBW",
-        "MetricName": "tma_info_bottleneck_memory_bandwidth",
-        "MetricThreshold": "tma_info_bottleneck_memory_bandwidth > 20",
-        "PublicDescription": "Total pipeline cost of (external) Memory Bandwidth related bottlenecks. Related metrics: tma_fb_full, tma_info_system_dram_bw_use, tma_mem_bandwidth, tma_sq_full"
+        "BriefDescription": "Total pipeline cost of irregular execution (e.g",
+        "MetricExpr": "100 * (tma_microcode_sequencer / (tma_few_uops_instructions + tma_microcode_sequencer) * (tma_assists / tma_microcode_sequencer) * tma_fetch_latency * (tma_ms_switches + tma_branch_resteers * (tma_clears_resteers + tma_mispredicts_resteers * (10 * tma_microcode_sequencer * tma_other_mispredicts / tma_branch_mispredicts)) / (tma_clears_resteers + tma_mispredicts_resteers + tma_unknown_branches)) / (tma_branch_resteers + tma_dsb_switches + tma_icache_misses + tma_itlb_misses + tma_lcp + tma_ms_switches) + 10 * tma_microcode_sequencer * tma_other_mispredicts / tma_branch_mispredicts * tma_branch_mispredicts + tma_machine_clears * tma_other_nukes / tma_other_nukes + tma_core_bound * (tma_serializing_operation + tma_core_bound * RS_EVENTS.EMPTY_CYCLES / tma_info_thread_clks * tma_ports_utilized_0) / (tma_divider + tma_ports_utilization + tma_serializing_operation) + tma_microcode_sequencer / (tma_few_uops_instructions + tma_microcode_sequencer) * (tma_assists / tma_microcode_sequencer) * tma_heavy_operations)",
+        "MetricGroup": "Bad;Cor;Ret;tma_issueMS",
+        "MetricName": "tma_info_bottleneck_irregular_overhead",
+        "MetricThreshold": "tma_info_bottleneck_irregular_overhead > 10",
+        "PublicDescription": "Total pipeline cost of irregular execution (e.g. FP-assists in HPC, Wait time with work imbalance multithreaded workloads, overhead in system services or virtualized environments). Related metrics: tma_microcode_sequencer, tma_ms_switches"
     },
     {
         "BriefDescription": "Total pipeline cost of Memory Address Translation related bottlenecks (data-side TLBs)",
         "MetricConstraint": "NO_GROUP_EVENTS",
-        "MetricExpr": "100 * tma_memory_bound * (tma_l1_bound / max(tma_memory_bound, tma_dram_bound + tma_l1_bound + tma_l2_bound + tma_l3_bound + tma_store_bound) * (tma_dtlb_load / max(tma_l1_bound, tma_4k_aliasing + tma_dtlb_load + tma_fb_full + tma_lock_latency + tma_split_loads + tma_store_fwd_blk)) + tma_store_bound / (tma_dram_bound + tma_l1_bound + tma_l2_bound + tma_l3_bound + tma_store_bound) * (tma_dtlb_store / (tma_dtlb_store + tma_false_sharing + tma_split_stores + tma_store_latency + tma_streaming_stores)))",
+        "MetricExpr": "100 * (tma_memory_bound * (tma_l1_bound / max(tma_memory_bound, tma_dram_bound + tma_l1_bound + tma_l2_bound + tma_l3_bound + tma_store_bound)) * (tma_dtlb_load / max(tma_l1_bound, tma_4k_aliasing + tma_dtlb_load + tma_fb_full + tma_lock_latency + tma_split_loads + tma_store_fwd_blk)) + tma_memory_bound * (tma_store_bound / (tma_dram_bound + tma_l1_bound + tma_l2_bound + tma_l3_bound + tma_store_bound)) * (tma_dtlb_store / (tma_dtlb_store + tma_false_sharing + tma_split_stores + tma_store_latency + tma_streaming_stores)))",
         "MetricGroup": "Mem;MemoryTLB;Offcore;tma_issueTLB",
         "MetricName": "tma_info_bottleneck_memory_data_tlbs",
         "MetricThreshold": "tma_info_bottleneck_memory_data_tlbs > 20",
-        "PublicDescription": "Total pipeline cost of Memory Address Translation related bottlenecks (data-side TLBs). Related metrics: tma_dtlb_load, tma_dtlb_store"
+        "PublicDescription": "Total pipeline cost of Memory Address Translation related bottlenecks (data-side TLBs). Related metrics: tma_dtlb_load, tma_dtlb_store, tma_info_bottleneck_memory_synchronization"
     },
     {
-        "BriefDescription": "Total pipeline cost of Memory Latency related bottlenecks (external memory and off-core caches)",
-        "MetricConstraint": "NO_GROUP_EVENTS",
-        "MetricExpr": "100 * tma_memory_bound * (tma_dram_bound / (tma_dram_bound + tma_l1_bound + tma_l2_bound + tma_l3_bound + tma_store_bound) * (tma_mem_latency / (tma_mem_bandwidth + tma_mem_latency)) + tma_l3_bound / (tma_dram_bound + tma_l1_bound + tma_l2_bound + tma_l3_bound + tma_store_bound) * (tma_l3_hit_latency / (tma_contested_accesses + tma_data_sharing + tma_l3_hit_latency + tma_sq_full)) + tma_l2_bound / (tma_dram_bound + tma_l1_bound + tma_l2_bound + tma_l3_bound + tma_store_bound))",
-        "MetricGroup": "Mem;MemoryLat;Offcore;tma_issueLat",
-        "MetricName": "tma_info_bottleneck_memory_latency",
-        "MetricThreshold": "tma_info_bottleneck_memory_latency > 20",
-        "PublicDescription": "Total pipeline cost of Memory Latency related bottlenecks (external memory and off-core caches). Related metrics: tma_l3_hit_latency, tma_mem_latency"
+        "BriefDescription": "Total pipeline cost of Memory Synchronization related bottlenecks (data transfers and coherency updates across processors)",
+        "MetricExpr": "100 * (tma_memory_bound * (tma_l3_bound / (tma_dram_bound + tma_l1_bound + tma_l2_bound + tma_l3_bound + tma_store_bound) * (tma_contested_accesses + tma_data_sharing) / (tma_contested_accesses + tma_data_sharing + tma_l3_hit_latency + tma_sq_full) + tma_store_bound / (tma_dram_bound + tma_l1_bound + tma_l2_bound + tma_l3_bound + tma_store_bound) * tma_false_sharing / (tma_dtlb_store + tma_false_sharing + tma_split_stores + tma_store_latency + tma_streaming_stores - tma_store_latency)) + tma_machine_clears * (1 - tma_other_nukes / tma_other_nukes))",
+        "MetricGroup": "Mem;Offcore;tma_issueTLB",
+        "MetricName": "tma_info_bottleneck_memory_synchronization",
+        "MetricThreshold": "tma_info_bottleneck_memory_synchronization > 10",
+        "PublicDescription": "Total pipeline cost of Memory Synchronization related bottlenecks (data transfers and coherency updates across processors). Related metrics: tma_dtlb_load, tma_dtlb_store, tma_info_bottleneck_memory_data_tlbs"
     },
     {
         "BriefDescription": "Total pipeline cost of Branch Misprediction related bottlenecks",
         "MetricConstraint": "NO_GROUP_EVENTS",
-        "MetricExpr": "100 * (tma_branch_mispredicts + tma_fetch_latency * tma_mispredicts_resteers / (tma_branch_resteers + tma_dsb_switches + tma_icache_misses + tma_itlb_misses + tma_lcp + tma_ms_switches))",
+        "MetricExpr": "100 * (1 - 10 * tma_microcode_sequencer * tma_other_mispredicts / tma_branch_mispredicts) * (tma_branch_mispredicts + tma_fetch_latency * tma_mispredicts_resteers / (tma_branch_resteers + tma_dsb_switches + tma_icache_misses + tma_itlb_misses + tma_lcp + tma_ms_switches))",
         "MetricGroup": "Bad;BadSpec;BrMispredicts;tma_issueBM",
         "MetricName": "tma_info_bottleneck_mispredictions",
         "MetricThreshold": "tma_info_bottleneck_mispredictions > 20",
         "PublicDescription": "Total pipeline cost of Branch Misprediction related bottlenecks. Related metrics: tma_branch_mispredicts, tma_info_bad_spec_branch_misprediction_cost, tma_mispredicts_resteers"
     },
+    {
+        "BriefDescription": "Total pipeline cost of remaining bottlenecks (apart from those listed in the Info.Bottlenecks metrics class)",
+        "MetricExpr": "100 - (tma_info_bottleneck_big_code + tma_info_bottleneck_instruction_fetch_bw + tma_info_bottleneck_mispredictions + tma_info_bottleneck_cache_memory_bandwidth + tma_info_bottleneck_cache_memory_latency + tma_info_bottleneck_memory_data_tlbs + tma_info_bottleneck_memory_synchronization + tma_info_bottleneck_compute_bound_est + tma_info_bottleneck_irregular_overhead + tma_info_bottleneck_branching_overhead + tma_info_bottleneck_base_non_br)",
+        "MetricGroup": "Cor;Offcore",
+        "MetricName": "tma_info_bottleneck_other_bottlenecks",
+        "MetricThreshold": "tma_info_bottleneck_other_bottlenecks > 20",
+        "PublicDescription": "Total pipeline cost of remaining bottlenecks (apart from those listed in the Info.Bottlenecks metrics class). Examples include data-dependencies (Core Bound when Low ILP) and other unlisted memory-related stalls."
+    },
     {
         "BriefDescription": "Fraction of branches that are CALL or RET",
         "MetricExpr": "(BR_INST_RETIRED.NEAR_CALL + BR_INST_RETIRED.NEAR_RETURN) / BR_INST_RETIRED.ALL_BRANCHES",
     },
     {
         "BriefDescription": "Core actual clocks when any Logical Processor is active on the Physical Core",
-        "MetricExpr": "CPU_CLK_UNHALTED.DISTRIBUTED",
+        "MetricExpr": "(CPU_CLK_UNHALTED.DISTRIBUTED if #SMT_on else tma_info_thread_clks)",
         "MetricGroup": "SMT",
         "MetricName": "tma_info_core_core_clks"
     },
         "MetricGroup": "Ret;SMT;TmaL1;tma_L1_group",
         "MetricName": "tma_info_core_coreipc"
     },
+    {
+        "BriefDescription": "uops Executed per Cycle",
+        "MetricExpr": "UOPS_EXECUTED.THREAD / tma_info_thread_clks",
+        "MetricGroup": "Power",
+        "MetricName": "tma_info_core_epc"
+    },
     {
         "BriefDescription": "Floating Point Operations Per Cycle",
-        "MetricExpr": "(cpu@FP_ARITH_INST_RETIRED.SCALAR_SINGLE\\,umask\\=0x03@ + 2 * FP_ARITH_INST_RETIRED.128B_PACKED_DOUBLE + 4 * cpu@FP_ARITH_INST_RETIRED.128B_PACKED_SINGLE\\,umask\\=0x18@ + 8 * cpu@FP_ARITH_INST_RETIRED.256B_PACKED_SINGLE\\,umask\\=0x60@ + 16 * FP_ARITH_INST_RETIRED.512B_PACKED_SINGLE) / tma_info_core_core_clks",
+        "MetricExpr": "(FP_ARITH_INST_RETIRED.SCALAR + 2 * FP_ARITH_INST_RETIRED.128B_PACKED_DOUBLE + 4 * FP_ARITH_INST_RETIRED.4_FLOPS + 8 * FP_ARITH_INST_RETIRED.8_FLOPS + 16 * FP_ARITH_INST_RETIRED.512B_PACKED_SINGLE) / tma_info_core_core_clks",
         "MetricGroup": "Flops;Ret",
         "MetricName": "tma_info_core_flopc"
     },
         "PublicDescription": "Actual per-core usage of the Floating Point non-X87 execution units (regardless of precision or vector-width). Values > 1 are possible due to ([BDW+] Fused-Multiply Add (FMA) counting - common; [ADL+] use all of ADD/MUL/FMA in Scalar or 128/256-bit vectors - less common)."
     },
     {
-        "BriefDescription": "Instruction-Level-Parallelism (average number of uops executed when there is execution) per-core",
-        "MetricExpr": "UOPS_EXECUTED.THREAD / (UOPS_EXECUTED.CORE_CYCLES_GE_1 / 2 if #SMT_on else UOPS_EXECUTED.CORE_CYCLES_GE_1)",
+        "BriefDescription": "Instruction-Level-Parallelism (average number of uops executed when there is execution) per thread (logical-processor)",
+        "MetricExpr": "UOPS_EXECUTED.THREAD / cpu@UOPS_EXECUTED.THREAD\\,cmask\\=1@",
         "MetricGroup": "Backend;Cor;Pipeline;PortsUtil",
         "MetricName": "tma_info_core_ilp"
     },
         "MetricGroup": "Flops;InsType",
         "MetricName": "tma_info_inst_mix_iparith",
         "MetricThreshold": "tma_info_inst_mix_iparith < 10",
-        "PublicDescription": "Instructions per FP Arithmetic instruction (lower number means higher occurrence rate). May undercount due to FMA double counting. Approximated prior to BDW."
+        "PublicDescription": "Instructions per FP Arithmetic instruction (lower number means higher occurrence rate). Values < 1 are possible due to intentional FMA double counting. Approximated prior to BDW."
     },
     {
         "BriefDescription": "Instructions per FP Arithmetic AVX/SSE 128-bit instruction (lower number means higher occurrence rate)",
         "MetricGroup": "Flops;FpVector;InsType",
         "MetricName": "tma_info_inst_mix_iparith_avx128",
         "MetricThreshold": "tma_info_inst_mix_iparith_avx128 < 10",
-        "PublicDescription": "Instructions per FP Arithmetic AVX/SSE 128-bit instruction (lower number means higher occurrence rate). May undercount due to FMA double counting."
+        "PublicDescription": "Instructions per FP Arithmetic AVX/SSE 128-bit instruction (lower number means higher occurrence rate). Values < 1 are possible due to intentional FMA double counting."
     },
     {
         "BriefDescription": "Instructions per FP Arithmetic AVX* 256-bit instruction (lower number means higher occurrence rate)",
         "MetricGroup": "Flops;FpVector;InsType",
         "MetricName": "tma_info_inst_mix_iparith_avx256",
         "MetricThreshold": "tma_info_inst_mix_iparith_avx256 < 10",
-        "PublicDescription": "Instructions per FP Arithmetic AVX* 256-bit instruction (lower number means higher occurrence rate). May undercount due to FMA double counting."
+        "PublicDescription": "Instructions per FP Arithmetic AVX* 256-bit instruction (lower number means higher occurrence rate). Values < 1 are possible due to intentional FMA double counting."
     },
     {
         "BriefDescription": "Instructions per FP Arithmetic AVX 512-bit instruction (lower number means higher occurrence rate)",
         "MetricGroup": "Flops;FpVector;InsType",
         "MetricName": "tma_info_inst_mix_iparith_avx512",
         "MetricThreshold": "tma_info_inst_mix_iparith_avx512 < 10",
-        "PublicDescription": "Instructions per FP Arithmetic AVX 512-bit instruction (lower number means higher occurrence rate). May undercount due to FMA double counting."
+        "PublicDescription": "Instructions per FP Arithmetic AVX 512-bit instruction (lower number means higher occurrence rate). Values < 1 are possible due to intentional FMA double counting."
     },
     {
         "BriefDescription": "Instructions per FP Arithmetic Scalar Double-Precision instruction (lower number means higher occurrence rate)",
         "MetricGroup": "Flops;FpScalar;InsType",
         "MetricName": "tma_info_inst_mix_iparith_scalar_dp",
         "MetricThreshold": "tma_info_inst_mix_iparith_scalar_dp < 10",
-        "PublicDescription": "Instructions per FP Arithmetic Scalar Double-Precision instruction (lower number means higher occurrence rate). May undercount due to FMA double counting."
+        "PublicDescription": "Instructions per FP Arithmetic Scalar Double-Precision instruction (lower number means higher occurrence rate). Values < 1 are possible due to intentional FMA double counting."
     },
     {
         "BriefDescription": "Instructions per FP Arithmetic Scalar Single-Precision instruction (lower number means higher occurrence rate)",
         "MetricGroup": "Flops;FpScalar;InsType",
         "MetricName": "tma_info_inst_mix_iparith_scalar_sp",
         "MetricThreshold": "tma_info_inst_mix_iparith_scalar_sp < 10",
-        "PublicDescription": "Instructions per FP Arithmetic Scalar Single-Precision instruction (lower number means higher occurrence rate). May undercount due to FMA double counting."
+        "PublicDescription": "Instructions per FP Arithmetic Scalar Single-Precision instruction (lower number means higher occurrence rate). Values < 1 are possible due to intentional FMA double counting."
     },
     {
         "BriefDescription": "Instructions per Branch (lower number means higher occurrence rate)",
     },
     {
         "BriefDescription": "Instructions per Floating Point (FP) Operation (lower number means higher occurrence rate)",
-        "MetricExpr": "INST_RETIRED.ANY / (cpu@FP_ARITH_INST_RETIRED.SCALAR_SINGLE\\,umask\\=0x03@ + 2 * FP_ARITH_INST_RETIRED.128B_PACKED_DOUBLE + 4 * cpu@FP_ARITH_INST_RETIRED.128B_PACKED_SINGLE\\,umask\\=0x18@ + 8 * cpu@FP_ARITH_INST_RETIRED.256B_PACKED_SINGLE\\,umask\\=0x60@ + 16 * FP_ARITH_INST_RETIRED.512B_PACKED_SINGLE)",
+        "MetricExpr": "INST_RETIRED.ANY / (FP_ARITH_INST_RETIRED.SCALAR + 2 * FP_ARITH_INST_RETIRED.128B_PACKED_DOUBLE + 4 * FP_ARITH_INST_RETIRED.4_FLOPS + 8 * FP_ARITH_INST_RETIRED.8_FLOPS + 16 * FP_ARITH_INST_RETIRED.512B_PACKED_SINGLE)",
         "MetricGroup": "Flops;InsType",
         "MetricName": "tma_info_inst_mix_ipflop",
         "MetricThreshold": "tma_info_inst_mix_ipflop < 10"
         "MetricName": "tma_info_inst_mix_ipload",
         "MetricThreshold": "tma_info_inst_mix_ipload < 3"
     },
+    {
+        "BriefDescription": "Instructions per PAUSE (lower number means higher occurrence rate)",
+        "MetricExpr": "tma_info_inst_mix_instructions / MISC_RETIRED.PAUSE_INST",
+        "MetricGroup": "Flops;FpVector;InsType",
+        "MetricName": "tma_info_inst_mix_ippause"
+    },
     {
         "BriefDescription": "Instructions per Store (lower number means higher occurrence rate)",
         "MetricExpr": "INST_RETIRED.ANY / MEM_INST_RETIRED.ALL_STORES",
     },
     {
         "BriefDescription": "Average per-core data fill bandwidth to the L1 data cache [GB / sec]",
-        "MetricExpr": "64 * L1D.REPLACEMENT / 1e9 / duration_time",
+        "MetricExpr": "tma_info_memory_l1d_cache_fill_bw",
         "MetricGroup": "Mem;MemoryBW",
-        "MetricName": "tma_info_memory_core_l1d_cache_fill_bw"
+        "MetricName": "tma_info_memory_core_l1d_cache_fill_bw_2t"
     },
     {
         "BriefDescription": "Average per-core data fill bandwidth to the L2 cache [GB / sec]",
-        "MetricExpr": "64 * L2_LINES_IN.ALL / 1e9 / duration_time",
+        "MetricExpr": "tma_info_memory_l2_cache_fill_bw",
         "MetricGroup": "Mem;MemoryBW",
-        "MetricName": "tma_info_memory_core_l2_cache_fill_bw"
+        "MetricName": "tma_info_memory_core_l2_cache_fill_bw_2t"
     },
     {
         "BriefDescription": "Average per-core data access bandwidth to the L3 cache [GB / sec]",
-        "MetricExpr": "64 * OFFCORE_REQUESTS.ALL_REQUESTS / 1e9 / duration_time",
+        "MetricExpr": "tma_info_memory_l3_cache_access_bw",
         "MetricGroup": "Mem;MemoryBW;Offcore",
-        "MetricName": "tma_info_memory_core_l3_cache_access_bw"
+        "MetricName": "tma_info_memory_core_l3_cache_access_bw_2t"
     },
     {
         "BriefDescription": "Average per-core data fill bandwidth to the L3 cache [GB / sec]",
-        "MetricExpr": "64 * LONGEST_LAT_CACHE.MISS / 1e9 / duration_time",
+        "MetricExpr": "tma_info_memory_l3_cache_fill_bw",
         "MetricGroup": "Mem;MemoryBW",
-        "MetricName": "tma_info_memory_core_l3_cache_fill_bw"
+        "MetricName": "tma_info_memory_core_l3_cache_fill_bw_2t"
     },
     {
         "BriefDescription": "Fill Buffer (FB) hits per kilo instructions for retired demand loads (L1D misses that merge into ongoing miss-handling entries)",
         "MetricExpr": "1e3 * MEM_LOAD_RETIRED.FB_HIT / INST_RETIRED.ANY",
-        "MetricGroup": "CacheMisses;Mem",
+        "MetricGroup": "CacheHits;Mem",
         "MetricName": "tma_info_memory_fb_hpki"
     },
+    {
+        "BriefDescription": "",
+        "MetricExpr": "64 * L1D.REPLACEMENT / 1e9 / duration_time",
+        "MetricGroup": "Mem;MemoryBW",
+        "MetricName": "tma_info_memory_l1d_cache_fill_bw"
+    },
     {
         "BriefDescription": "L1 cache true misses per kilo instruction for retired demand loads",
         "MetricExpr": "1e3 * MEM_LOAD_RETIRED.L1_MISS / INST_RETIRED.ANY",
-        "MetricGroup": "CacheMisses;Mem",
+        "MetricGroup": "CacheHits;Mem",
         "MetricName": "tma_info_memory_l1mpki"
     },
     {
         "BriefDescription": "L1 cache true misses per kilo instruction for all demand loads (including speculative)",
         "MetricExpr": "1e3 * L2_RQSTS.ALL_DEMAND_DATA_RD / INST_RETIRED.ANY",
-        "MetricGroup": "CacheMisses;Mem",
+        "MetricGroup": "CacheHits;Mem",
         "MetricName": "tma_info_memory_l1mpki_load"
     },
+    {
+        "BriefDescription": "",
+        "MetricExpr": "64 * L2_LINES_IN.ALL / 1e9 / duration_time",
+        "MetricGroup": "Mem;MemoryBW",
+        "MetricName": "tma_info_memory_l2_cache_fill_bw"
+    },
     {
         "BriefDescription": "L2 cache hits per kilo instruction for all request types (including speculative)",
         "MetricExpr": "1e3 * (L2_RQSTS.REFERENCES - L2_RQSTS.MISS) / INST_RETIRED.ANY",
-        "MetricGroup": "CacheMisses;Mem",
+        "MetricGroup": "CacheHits;Mem",
         "MetricName": "tma_info_memory_l2hpki_all"
     },
     {
         "BriefDescription": "L2 cache hits per kilo instruction for all demand loads  (including speculative)",
         "MetricExpr": "1e3 * L2_RQSTS.DEMAND_DATA_RD_HIT / INST_RETIRED.ANY",
-        "MetricGroup": "CacheMisses;Mem",
+        "MetricGroup": "CacheHits;Mem",
         "MetricName": "tma_info_memory_l2hpki_load"
     },
     {
         "BriefDescription": "L2 cache true misses per kilo instruction for retired demand loads",
         "MetricExpr": "1e3 * MEM_LOAD_RETIRED.L2_MISS / INST_RETIRED.ANY",
-        "MetricGroup": "Backend;CacheMisses;Mem",
+        "MetricGroup": "Backend;CacheHits;Mem",
         "MetricName": "tma_info_memory_l2mpki"
     },
     {
         "BriefDescription": "L2 cache ([RKL+] true) misses per kilo instruction for all request types (including speculative)",
         "MetricExpr": "1e3 * L2_RQSTS.MISS / INST_RETIRED.ANY",
-        "MetricGroup": "CacheMisses;Mem;Offcore",
+        "MetricGroup": "CacheHits;Mem;Offcore",
         "MetricName": "tma_info_memory_l2mpki_all"
     },
     {
         "BriefDescription": "L2 cache ([RKL+] true) misses per kilo instruction for all demand loads  (including speculative)",
         "MetricExpr": "1e3 * L2_RQSTS.DEMAND_DATA_RD_MISS / INST_RETIRED.ANY",
-        "MetricGroup": "CacheMisses;Mem",
+        "MetricGroup": "CacheHits;Mem",
         "MetricName": "tma_info_memory_l2mpki_load"
     },
     {
-        "BriefDescription": "L3 cache true misses per kilo instruction for retired demand loads",
-        "MetricExpr": "1e3 * MEM_LOAD_RETIRED.L3_MISS / INST_RETIRED.ANY",
-        "MetricGroup": "CacheMisses;Mem",
-        "MetricName": "tma_info_memory_l3mpki"
+        "BriefDescription": "",
+        "MetricExpr": "64 * OFFCORE_REQUESTS.ALL_REQUESTS / 1e9 / duration_time",
+        "MetricGroup": "Mem;MemoryBW;Offcore",
+        "MetricName": "tma_info_memory_l3_cache_access_bw"
     },
     {
-        "BriefDescription": "Actual Average Latency for L1 data-cache miss demand load operations (in core cycles)",
-        "MetricExpr": "L1D_PEND_MISS.PENDING / (MEM_LOAD_RETIRED.L1_MISS + MEM_LOAD_RETIRED.FB_HIT)",
-        "MetricGroup": "Mem;MemoryBound;MemoryLat",
-        "MetricName": "tma_info_memory_load_miss_real_latency"
+        "BriefDescription": "",
+        "MetricExpr": "64 * LONGEST_LAT_CACHE.MISS / 1e9 / duration_time",
+        "MetricGroup": "Mem;MemoryBW",
+        "MetricName": "tma_info_memory_l3_cache_fill_bw"
     },
     {
-        "BriefDescription": "Memory-Level-Parallelism (average number of L1 miss demand load when there is at least one such miss",
-        "MetricExpr": "L1D_PEND_MISS.PENDING / L1D_PEND_MISS.PENDING_CYCLES",
-        "MetricGroup": "Mem;MemoryBW;MemoryBound",
-        "MetricName": "tma_info_memory_mlp",
-        "PublicDescription": "Memory-Level-Parallelism (average number of L1 miss demand load when there is at least one such miss. Per-Logical Processor)"
+        "BriefDescription": "L3 cache true misses per kilo instruction for retired demand loads",
+        "MetricExpr": "1e3 * MEM_LOAD_RETIRED.L3_MISS / INST_RETIRED.ANY",
+        "MetricGroup": "Mem",
+        "MetricName": "tma_info_memory_l3mpki"
     },
     {
         "BriefDescription": "Average Parallel L2 cache miss data reads",
         "MetricExpr": "OFFCORE_REQUESTS_OUTSTANDING.ALL_DATA_RD / OFFCORE_REQUESTS_OUTSTANDING.CYCLES_WITH_DATA_RD",
         "MetricGroup": "Memory_BW;Offcore",
-        "MetricName": "tma_info_memory_oro_data_l2_mlp"
+        "MetricName": "tma_info_memory_latency_data_l2_mlp"
     },
     {
         "BriefDescription": "Average Latency for L2 cache miss demand Loads",
         "MetricExpr": "OFFCORE_REQUESTS_OUTSTANDING.DEMAND_DATA_RD / OFFCORE_REQUESTS.DEMAND_DATA_RD",
         "MetricGroup": "Memory_Lat;Offcore",
-        "MetricName": "tma_info_memory_oro_load_l2_miss_latency"
+        "MetricName": "tma_info_memory_latency_load_l2_miss_latency"
     },
     {
         "BriefDescription": "Average Parallel L2 cache miss demand Loads",
         "MetricExpr": "OFFCORE_REQUESTS_OUTSTANDING.DEMAND_DATA_RD / cpu@OFFCORE_REQUESTS_OUTSTANDING.DEMAND_DATA_RD\\,cmask\\=1@",
         "MetricGroup": "Memory_BW;Offcore",
-        "MetricName": "tma_info_memory_oro_load_l2_mlp"
+        "MetricName": "tma_info_memory_latency_load_l2_mlp"
     },
     {
         "BriefDescription": "Average Latency for L3 cache miss demand Loads",
         "MetricExpr": "cpu@OFFCORE_REQUESTS_OUTSTANDING.DEMAND_DATA_RD\\,umask\\=0x10@ / OFFCORE_REQUESTS.L3_MISS_DEMAND_DATA_RD",
         "MetricGroup": "Memory_Lat;Offcore",
-        "MetricName": "tma_info_memory_oro_load_l3_miss_latency"
+        "MetricName": "tma_info_memory_latency_load_l3_miss_latency"
     },
     {
-        "BriefDescription": "Average per-thread data fill bandwidth to the L1 data cache [GB / sec]",
-        "MetricExpr": "tma_info_memory_core_l1d_cache_fill_bw",
-        "MetricGroup": "Mem;MemoryBW",
-        "MetricName": "tma_info_memory_thread_l1d_cache_fill_bw_1t"
+        "BriefDescription": "Actual Average Latency for L1 data-cache miss demand load operations (in core cycles)",
+        "MetricExpr": "L1D_PEND_MISS.PENDING / (MEM_LOAD_RETIRED.L1_MISS + MEM_LOAD_RETIRED.FB_HIT)",
+        "MetricGroup": "Mem;MemoryBound;MemoryLat",
+        "MetricName": "tma_info_memory_load_miss_real_latency"
     },
     {
-        "BriefDescription": "Average per-thread data fill bandwidth to the L2 cache [GB / sec]",
-        "MetricExpr": "tma_info_memory_core_l2_cache_fill_bw",
-        "MetricGroup": "Mem;MemoryBW",
-        "MetricName": "tma_info_memory_thread_l2_cache_fill_bw_1t"
+        "BriefDescription": "\"Bus lock\" per kilo instruction",
+        "MetricExpr": "1e3 * SQ_MISC.BUS_LOCK / INST_RETIRED.ANY",
+        "MetricGroup": "Mem",
+        "MetricName": "tma_info_memory_mix_bus_lock_pki"
     },
     {
-        "BriefDescription": "Average per-thread data access bandwidth to the L3 cache [GB / sec]",
-        "MetricExpr": "tma_info_memory_core_l3_cache_access_bw",
-        "MetricGroup": "Mem;MemoryBW;Offcore",
-        "MetricName": "tma_info_memory_thread_l3_cache_access_bw_1t"
+        "BriefDescription": "Un-cacheable retired load per kilo instruction",
+        "MetricExpr": "1e3 * MEM_LOAD_MISC_RETIRED.UC / INST_RETIRED.ANY",
+        "MetricGroup": "Mem",
+        "MetricName": "tma_info_memory_mix_uc_load_pki"
     },
     {
-        "BriefDescription": "Average per-thread data fill bandwidth to the L3 cache [GB / sec]",
-        "MetricExpr": "tma_info_memory_core_l3_cache_fill_bw",
-        "MetricGroup": "Mem;MemoryBW",
-        "MetricName": "tma_info_memory_thread_l3_cache_fill_bw_1t"
+        "BriefDescription": "Memory-Level-Parallelism (average number of L1 miss demand load when there is at least one such miss",
+        "MetricExpr": "L1D_PEND_MISS.PENDING / L1D_PEND_MISS.PENDING_CYCLES",
+        "MetricGroup": "Mem;MemoryBW;MemoryBound",
+        "MetricName": "tma_info_memory_mlp",
+        "PublicDescription": "Memory-Level-Parallelism (average number of L1 miss demand load when there is at least one such miss. Per-Logical Processor)"
     },
     {
         "BriefDescription": "STLB (2nd level TLB) code speculative misses per kilo instruction (misses of any page-size that complete the page walk)",
         "MetricName": "tma_info_memory_tlb_store_stlb_mpki"
     },
     {
-        "BriefDescription": "Instruction-Level-Parallelism (average number of uops executed when there is execution) per-thread",
-        "MetricExpr": "UOPS_EXECUTED.THREAD / cpu@UOPS_EXECUTED.THREAD\\,cmask\\=1@",
+        "BriefDescription": "",
+        "MetricExpr": "UOPS_EXECUTED.THREAD / (UOPS_EXECUTED.CORE_CYCLES_GE_1 / 2 if #SMT_on else cpu@UOPS_EXECUTED.THREAD\\,cmask\\=1@)",
         "MetricGroup": "Cor;Pipeline;PortsUtil;SMT",
         "MetricName": "tma_info_pipeline_execute"
     },
+    {
+        "BriefDescription": "Instructions per a microcode Assist invocation",
+        "MetricExpr": "INST_RETIRED.ANY / ASSISTS.ANY",
+        "MetricGroup": "MicroSeq;Pipeline;Ret;Retire",
+        "MetricName": "tma_info_pipeline_ipassist",
+        "MetricThreshold": "tma_info_pipeline_ipassist < 100e3",
+        "PublicDescription": "Instructions per a microcode Assist invocation. See Assists tree node for details (lower number means higher occurrence rate)"
+    },
     {
         "BriefDescription": "Average number of Uops retired in cycles where at least one uop has retired.",
         "MetricExpr": "tma_retiring * tma_info_thread_slots / cpu@UOPS_RETIRED.SLOTS\\,cmask\\=1@",
         "MetricName": "tma_info_pipeline_retire"
     },
     {
-        "BriefDescription": "Measured Average Frequency for unhalted processors [GHz]",
+        "BriefDescription": "Measured Average Core Frequency for unhalted processors [GHz]",
         "MetricExpr": "tma_info_system_turbo_utilization * TSC / 1e9 / duration_time",
         "MetricGroup": "Power;Summary",
-        "MetricName": "tma_info_system_average_frequency"
+        "MetricName": "tma_info_system_core_frequency"
     },
     {
-        "BriefDescription": "Average CPU Utilization",
+        "BriefDescription": "Average CPU Utilization (percentage)",
         "MetricExpr": "CPU_CLK_UNHALTED.REF_TSC / TSC",
         "MetricGroup": "HPC;Summary",
         "MetricName": "tma_info_system_cpu_utilization"
     },
+    {
+        "BriefDescription": "Average number of utilized CPUs",
+        "MetricExpr": "#num_cpus_online * tma_info_system_cpu_utilization",
+        "MetricGroup": "Summary",
+        "MetricName": "tma_info_system_cpus_utilized"
+    },
     {
         "BriefDescription": "Average external Memory Bandwidth Use for reads and writes [GB / sec]",
         "MetricExpr": "64 * (UNC_ARB_TRK_REQUESTS.ALL + UNC_ARB_COH_TRK_REQUESTS.ALL) / 1e6 / duration_time / 1e3",
-        "MetricGroup": "HPC;Mem;MemoryBW;SoC;tma_issueBW",
+        "MetricGroup": "HPC;MemOffcore;MemoryBW;SoC;tma_issueBW",
         "MetricName": "tma_info_system_dram_bw_use",
-        "PublicDescription": "Average external Memory Bandwidth Use for reads and writes [GB / sec]. Related metrics: tma_fb_full, tma_info_bottleneck_memory_bandwidth, tma_mem_bandwidth, tma_sq_full"
+        "PublicDescription": "Average external Memory Bandwidth Use for reads and writes [GB / sec]. Related metrics: tma_fb_full, tma_info_bottleneck_cache_memory_bandwidth, tma_mem_bandwidth, tma_sq_full"
     },
     {
         "BriefDescription": "Giga Floating Point Operations Per Second",
-        "MetricExpr": "(cpu@FP_ARITH_INST_RETIRED.SCALAR_SINGLE\\,umask\\=0x03@ + 2 * FP_ARITH_INST_RETIRED.128B_PACKED_DOUBLE + 4 * cpu@FP_ARITH_INST_RETIRED.128B_PACKED_SINGLE\\,umask\\=0x18@ + 8 * cpu@FP_ARITH_INST_RETIRED.256B_PACKED_SINGLE\\,umask\\=0x60@ + 16 * FP_ARITH_INST_RETIRED.512B_PACKED_SINGLE) / 1e9 / duration_time",
+        "MetricExpr": "(FP_ARITH_INST_RETIRED.SCALAR + 2 * FP_ARITH_INST_RETIRED.128B_PACKED_DOUBLE + 4 * FP_ARITH_INST_RETIRED.4_FLOPS + 8 * FP_ARITH_INST_RETIRED.8_FLOPS + 16 * FP_ARITH_INST_RETIRED.512B_PACKED_SINGLE) / 1e9 / duration_time",
         "MetricGroup": "Cor;Flops;HPC",
         "MetricName": "tma_info_system_gflops",
-        "PublicDescription": "Giga Floating Point Operations Per Second. Aggregate across all supported options of: FP precisions, scalar and vector instructions, vector-width and AMX engine."
+        "PublicDescription": "Giga Floating Point Operations Per Second. Aggregate across all supported options of: FP precisions, scalar and vector instructions, vector-width"
     },
     {
         "BriefDescription": "Instructions per Far Branch ( Far Branches apply upon transition from application to operating system, handling interrupts, exceptions) [lower number means higher occurrence rate]",
         "MetricName": "tma_info_system_mem_read_latency",
         "PublicDescription": "Average latency of data read request to external memory (in nanoseconds). Accounts for demand loads and L1/L2 prefetches. ([RKL+]memory-controller only)"
     },
-    {
-        "BriefDescription": "Average latency of all requests to external memory (in Uncore cycles)",
-        "MetricExpr": "(UNC_ARB_TRK_OCCUPANCY.ALL + UNC_ARB_DAT_OCCUPANCY.RD) / UNC_ARB_TRK_REQUESTS.ALL",
-        "MetricGroup": "Mem;SoC",
-        "MetricName": "tma_info_system_mem_request_latency"
-    },
     {
         "BriefDescription": "Fraction of Core cycles where the core was running with power-delivery for baseline license level 0",
         "MetricExpr": "CORE_POWER.LVL0_TURBO_LICENSE / tma_info_core_core_clks",
     },
     {
         "BriefDescription": "This metric represents fraction of cycles the CPU was stalled due to Instruction TLB (ITLB) misses",
-        "MetricExpr": "ICACHE_64B.IFTAG_STALL / tma_info_thread_clks",
-        "MetricGroup": "BigFoot;FetchLat;MemoryTLB;TopdownL3;tma_L3_group;tma_fetch_latency_group",
+        "MetricExpr": "ICACHE_TAG.STALLS / tma_info_thread_clks",
+        "MetricGroup": "BigFootprint;FetchLat;MemoryTLB;TopdownL3;tma_L3_group;tma_fetch_latency_group",
         "MetricName": "tma_itlb_misses",
         "MetricThreshold": "tma_itlb_misses > 0.05 & (tma_fetch_latency > 0.1 & tma_frontend_bound > 0.15)",
         "PublicDescription": "This metric represents fraction of cycles the CPU was stalled due to Instruction TLB (ITLB) misses. Sample with: FRONTEND_RETIRED.STLB_MISS_PS;FRONTEND_RETIRED.ITLB_MISS_PS",
     {
         "BriefDescription": "This metric estimates how often the CPU was stalled without loads missing the L1 data cache",
         "MetricExpr": "max((CYCLE_ACTIVITY.STALLS_MEM_ANY - CYCLE_ACTIVITY.STALLS_L1D_MISS) / tma_info_thread_clks, 0)",
-        "MetricGroup": "CacheMisses;MemoryBound;TmaL3mem;TopdownL3;tma_L3_group;tma_issueL1;tma_issueMC;tma_memory_bound_group",
+        "MetricGroup": "CacheHits;MemoryBound;TmaL3mem;TopdownL3;tma_L3_group;tma_issueL1;tma_issueMC;tma_memory_bound_group",
         "MetricName": "tma_l1_bound",
         "MetricThreshold": "tma_l1_bound > 0.1 & (tma_memory_bound > 0.2 & tma_backend_bound > 0.2)",
         "PublicDescription": "This metric estimates how often the CPU was stalled without loads missing the L1 data cache.  The L1 data cache typically has the shortest latency.  However; in certain cases like loads blocked on older stores; a load might suffer due to high latency even though it is being satisfied by the L1. Another example is loads who miss in the TLB. These cases are characterized by execution unit stalls; while some non-completed demand load lives in the machine without having that demand load missing the L1 cache. Sample with: MEM_LOAD_RETIRED.L1_HIT_PS;MEM_LOAD_RETIRED.FB_HIT_PS. Related metrics: tma_clears_resteers, tma_machine_clears, tma_microcode_sequencer, tma_ms_switches, tma_ports_utilized_1",
         "BriefDescription": "This metric estimates how often the CPU was stalled due to L2 cache accesses by loads",
         "MetricConstraint": "NO_GROUP_EVENTS",
         "MetricExpr": "MEM_LOAD_RETIRED.L2_HIT * (1 + MEM_LOAD_RETIRED.FB_HIT / MEM_LOAD_RETIRED.L1_MISS) / (MEM_LOAD_RETIRED.L2_HIT * (1 + MEM_LOAD_RETIRED.FB_HIT / MEM_LOAD_RETIRED.L1_MISS) + L1D_PEND_MISS.FB_FULL_PERIODS) * ((CYCLE_ACTIVITY.STALLS_L1D_MISS - CYCLE_ACTIVITY.STALLS_L2_MISS) / tma_info_thread_clks)",
-        "MetricGroup": "CacheMisses;MemoryBound;TmaL3mem;TopdownL3;tma_L3_group;tma_memory_bound_group",
+        "MetricGroup": "CacheHits;MemoryBound;TmaL3mem;TopdownL3;tma_L3_group;tma_memory_bound_group",
         "MetricName": "tma_l2_bound",
         "MetricThreshold": "tma_l2_bound > 0.05 & (tma_memory_bound > 0.2 & tma_backend_bound > 0.2)",
         "PublicDescription": "This metric estimates how often the CPU was stalled due to L2 cache accesses by loads.  Avoiding cache misses (i.e. L1 misses/L2 hits) can improve the latency and increase performance. Sample with: MEM_LOAD_RETIRED.L2_HIT_PS",
         "BriefDescription": "This metric estimates how often the CPU was stalled due to loads accesses to L3 cache or contended with a sibling Core",
         "MetricConstraint": "NO_GROUP_EVENTS_NMI",
         "MetricExpr": "(CYCLE_ACTIVITY.STALLS_L2_MISS - CYCLE_ACTIVITY.STALLS_L3_MISS) / tma_info_thread_clks",
-        "MetricGroup": "CacheMisses;MemoryBound;TmaL3mem;TopdownL3;tma_L3_group;tma_memory_bound_group",
+        "MetricGroup": "CacheHits;MemoryBound;TmaL3mem;TopdownL3;tma_L3_group;tma_memory_bound_group",
         "MetricName": "tma_l3_bound",
         "MetricThreshold": "tma_l3_bound > 0.05 & (tma_memory_bound > 0.2 & tma_backend_bound > 0.2)",
         "PublicDescription": "This metric estimates how often the CPU was stalled due to loads accesses to L3 cache or contended with a sibling Core.  Avoiding cache misses (i.e. L2 misses/L3 hits) can improve the latency and increase performance. Sample with: MEM_LOAD_RETIRED.L3_HIT_PS",
         "ScaleUnit": "100%"
     },
     {
-        "BriefDescription": "This metric represents fraction of cycles with demand load accesses that hit the L3 cache under unloaded scenarios (possibly L3 latency limited)",
-        "MetricExpr": "9 * tma_info_system_average_frequency * MEM_LOAD_RETIRED.L3_HIT * (1 + MEM_LOAD_RETIRED.FB_HIT / MEM_LOAD_RETIRED.L1_MISS / 2) / tma_info_thread_clks",
+        "BriefDescription": "This metric estimates fraction of cycles with demand load accesses that hit the L3 cache under unloaded scenarios (possibly L3 latency limited)",
+        "MetricExpr": "9 * tma_info_system_core_frequency * (MEM_LOAD_RETIRED.L3_HIT * (1 + MEM_LOAD_RETIRED.FB_HIT / MEM_LOAD_RETIRED.L1_MISS / 2)) / tma_info_thread_clks",
         "MetricGroup": "MemoryLat;TopdownL4;tma_L4_group;tma_issueLat;tma_l3_bound_group",
         "MetricName": "tma_l3_hit_latency",
         "MetricThreshold": "tma_l3_hit_latency > 0.1 & (tma_l3_bound > 0.05 & (tma_memory_bound > 0.2 & tma_backend_bound > 0.2))",
-        "PublicDescription": "This metric represents fraction of cycles with demand load accesses that hit the L3 cache under unloaded scenarios (possibly L3 latency limited).  Avoiding private cache misses (i.e. L2 misses/L3 hits) will improve the latency; reduce contention with sibling physical cores and increase performance.  Note the value of this node may overlap with its siblings. Sample with: MEM_LOAD_RETIRED.L3_HIT_PS. Related metrics: tma_info_bottleneck_memory_latency, tma_mem_latency",
+        "PublicDescription": "This metric estimates fraction of cycles with demand load accesses that hit the L3 cache under unloaded scenarios (possibly L3 latency limited).  Avoiding private cache misses (i.e. L2 misses/L3 hits) will improve the latency; reduce contention with sibling physical cores and increase performance.  Note the value of this node may overlap with its siblings. Sample with: MEM_LOAD_RETIRED.L3_HIT_PS. Related metrics: tma_info_bottleneck_cache_memory_latency, tma_mem_latency",
         "ScaleUnit": "100%"
     },
     {
         "BriefDescription": "This metric represents fraction of cycles CPU was stalled due to Length Changing Prefixes (LCPs)",
-        "MetricExpr": "ILD_STALL.LCP / tma_info_thread_clks",
+        "MetricExpr": "DECODE.LCP / tma_info_thread_clks",
         "MetricGroup": "FetchLat;TopdownL3;tma_L3_group;tma_fetch_latency_group;tma_issueFB",
         "MetricName": "tma_lcp",
         "MetricThreshold": "tma_lcp > 0.05 & (tma_fetch_latency > 0.1 & tma_frontend_bound > 0.15)",
         "MetricName": "tma_light_operations",
         "MetricThreshold": "tma_light_operations > 0.6",
         "MetricgroupNoGroup": "TopdownL2",
-        "PublicDescription": "This metric represents fraction of slots where the CPU was retiring light-weight operations -- instructions that require no more than one uop (micro-operation). This correlates with total number of instructions used by the program. A uops-per-instruction (see UopPI metric) ratio of 1 or less should be expected for decently optimized software running on Intel Core/Xeon products. While this often indicates efficient X86 instructions were executed; high value does not necessarily mean better performance cannot be achieved. Sample with: INST_RETIRED.PREC_DIST",
+        "PublicDescription": "This metric represents fraction of slots where the CPU was retiring light-weight operations -- instructions that require no more than one uop (micro-operation). This correlates with total number of instructions used by the program. A uops-per-instruction (see UopPI metric) ratio of 1 or less should be expected for decently optimized code running on Intel Core/Xeon products. While this often indicates efficient X86 instructions were executed; high value does not necessarily mean better performance cannot be achieved. ([ICL+] Note this may undercount due to approximation using indirect events; [ADL+] .). Sample with: INST_RETIRED.PREC_DIST",
         "ScaleUnit": "100%"
     },
     {
         "MetricExpr": "(LSD.CYCLES_ACTIVE - LSD.CYCLES_OK) / tma_info_core_core_clks / 2",
         "MetricGroup": "FetchBW;LSD;TopdownL3;tma_L3_group;tma_fetch_bandwidth_group",
         "MetricName": "tma_lsd",
-        "MetricThreshold": "tma_lsd > 0.15 & (tma_fetch_bandwidth > 0.1 & tma_frontend_bound > 0.15 & tma_info_thread_ipc / 5 > 0.35)",
+        "MetricThreshold": "tma_lsd > 0.15 & tma_fetch_bandwidth > 0.2",
         "PublicDescription": "This metric represents Core fraction of cycles in which CPU was likely limited due to LSD (Loop Stream Detector) unit.  LSD typically does well sustaining Uop supply. However; in some rare cases; optimal uop-delivery could not be reached for small loops whose size (in terms of number of uops) does not suit well the LSD structure.",
         "ScaleUnit": "100%"
     },
         "ScaleUnit": "100%"
     },
     {
-        "BriefDescription": "This metric estimates fraction of cycles where the core's performance was likely hurt due to approaching bandwidth limits of external memory (DRAM)",
+        "BriefDescription": "This metric estimates fraction of cycles where the core's performance was likely hurt due to approaching bandwidth limits of external memory - DRAM ([SPR-HBM] and/or HBM)",
         "MetricExpr": "min(CPU_CLK_UNHALTED.THREAD, cpu@OFFCORE_REQUESTS_OUTSTANDING.ALL_DATA_RD\\,cmask\\=4@) / tma_info_thread_clks",
         "MetricGroup": "MemoryBW;Offcore;TopdownL4;tma_L4_group;tma_dram_bound_group;tma_issueBW",
         "MetricName": "tma_mem_bandwidth",
         "MetricThreshold": "tma_mem_bandwidth > 0.2 & (tma_dram_bound > 0.1 & (tma_memory_bound > 0.2 & tma_backend_bound > 0.2))",
-        "PublicDescription": "This metric estimates fraction of cycles where the core's performance was likely hurt due to approaching bandwidth limits of external memory (DRAM).  The underlying heuristic assumes that a similar off-core traffic is generated by all IA cores. This metric does not aggregate non-data-read requests by this logical processor; requests from other IA Logical Processors/Physical Cores/sockets; or other non-IA devices like GPU; hence the maximum external memory bandwidth limits may or may not be approached when this metric is flagged (see Uncore counters for that). Related metrics: tma_fb_full, tma_info_bottleneck_memory_bandwidth, tma_info_system_dram_bw_use, tma_sq_full",
+        "PublicDescription": "This metric estimates fraction of cycles where the core's performance was likely hurt due to approaching bandwidth limits of external memory - DRAM ([SPR-HBM] and/or HBM).  The underlying heuristic assumes that a similar off-core traffic is generated by all IA cores. This metric does not aggregate non-data-read requests by this logical processor; requests from other IA Logical Processors/Physical Cores/sockets; or other non-IA devices like GPU; hence the maximum external memory bandwidth limits may or may not be approached when this metric is flagged (see Uncore counters for that). Related metrics: tma_fb_full, tma_info_bottleneck_cache_memory_bandwidth, tma_info_system_dram_bw_use, tma_sq_full",
         "ScaleUnit": "100%"
     },
     {
-        "BriefDescription": "This metric estimates fraction of cycles where the performance was likely hurt due to latency from external memory (DRAM)",
+        "BriefDescription": "This metric estimates fraction of cycles where the performance was likely hurt due to latency from external memory - DRAM ([SPR-HBM] and/or HBM)",
         "MetricExpr": "min(CPU_CLK_UNHALTED.THREAD, OFFCORE_REQUESTS_OUTSTANDING.CYCLES_WITH_DATA_RD) / tma_info_thread_clks - tma_mem_bandwidth",
         "MetricGroup": "MemoryLat;Offcore;TopdownL4;tma_L4_group;tma_dram_bound_group;tma_issueLat",
         "MetricName": "tma_mem_latency",
         "MetricThreshold": "tma_mem_latency > 0.1 & (tma_dram_bound > 0.1 & (tma_memory_bound > 0.2 & tma_backend_bound > 0.2))",
-        "PublicDescription": "This metric estimates fraction of cycles where the performance was likely hurt due to latency from external memory (DRAM).  This metric does not aggregate requests from other Logical Processors/Physical Cores/sockets (see Uncore counters for that). Related metrics: tma_info_bottleneck_memory_latency, tma_l3_hit_latency",
+        "PublicDescription": "This metric estimates fraction of cycles where the performance was likely hurt due to latency from external memory - DRAM ([SPR-HBM] and/or HBM).  This metric does not aggregate requests from other Logical Processors/Physical Cores/sockets (see Uncore counters for that). Related metrics: tma_info_bottleneck_cache_memory_latency, tma_l3_hit_latency",
         "ScaleUnit": "100%"
     },
     {
     },
     {
         "BriefDescription": "This metric represents fraction of slots the CPU was retiring uops fetched by the Microcode Sequencer (MS) unit",
-        "MetricExpr": "tma_retiring * tma_info_thread_slots / UOPS_ISSUED.ANY * IDQ.MS_UOPS / tma_info_thread_slots",
+        "MetricExpr": "UOPS_RETIRED.SLOTS / UOPS_ISSUED.ANY * IDQ.MS_UOPS / tma_info_thread_slots",
         "MetricGroup": "MicroSeq;TopdownL3;tma_L3_group;tma_heavy_operations_group;tma_issueMC;tma_issueMS",
         "MetricName": "tma_microcode_sequencer",
         "MetricThreshold": "tma_microcode_sequencer > 0.05 & tma_heavy_operations > 0.1",
-        "PublicDescription": "This metric represents fraction of slots the CPU was retiring uops fetched by the Microcode Sequencer (MS) unit.  The MS is used for CISC instructions not supported by the default decoders (like repeat move strings; or CPUID); or by microcode assists used to address some operation modes (like in Floating Point assists). These cases can often be avoided. Sample with: IDQ.MS_UOPS. Related metrics: tma_clears_resteers, tma_l1_bound, tma_machine_clears, tma_ms_switches",
+        "PublicDescription": "This metric represents fraction of slots the CPU was retiring uops fetched by the Microcode Sequencer (MS) unit.  The MS is used for CISC instructions not supported by the default decoders (like repeat move strings; or CPUID); or by microcode assists used to address some operation modes (like in Floating Point assists). These cases can often be avoided. Sample with: IDQ.MS_UOPS. Related metrics: tma_clears_resteers, tma_info_bottleneck_irregular_overhead, tma_l1_bound, tma_machine_clears, tma_ms_switches",
         "ScaleUnit": "100%"
     },
     {
         "MetricExpr": "(IDQ.MITE_CYCLES_ANY - IDQ.MITE_CYCLES_OK) / tma_info_core_core_clks / 2",
         "MetricGroup": "DSBmiss;FetchBW;TopdownL3;tma_L3_group;tma_fetch_bandwidth_group",
         "MetricName": "tma_mite",
-        "MetricThreshold": "tma_mite > 0.1 & (tma_fetch_bandwidth > 0.1 & tma_frontend_bound > 0.15 & tma_info_thread_ipc / 5 > 0.35)",
+        "MetricThreshold": "tma_mite > 0.1 & tma_fetch_bandwidth > 0.2",
         "PublicDescription": "This metric represents Core fraction of cycles in which CPU was likely limited due to the MITE pipeline (the legacy decode pipeline). This pipeline is used for code that was not pre-cached in the DSB or LSD. For example; inefficiencies due to asymmetric decoders; use of long immediate or LCP can manifest as MITE fetch bandwidth bottleneck. Sample with: FRONTEND_RETIRED.ANY_DSB_MISS",
         "ScaleUnit": "100%"
     },
         "MetricExpr": "(cpu@IDQ.MITE_UOPS\\,cmask\\=4@ - cpu@IDQ.MITE_UOPS\\,cmask\\=5@) / tma_info_thread_clks",
         "MetricGroup": "DSBmiss;FetchBW;TopdownL4;tma_L4_group;tma_mite_group",
         "MetricName": "tma_mite_4wide",
-        "MetricThreshold": "tma_mite_4wide > 0.05 & (tma_mite > 0.1 & (tma_fetch_bandwidth > 0.1 & tma_frontend_bound > 0.15 & tma_info_thread_ipc / 5 > 0.35))",
+        "MetricThreshold": "tma_mite_4wide > 0.05 & (tma_mite > 0.1 & tma_fetch_bandwidth > 0.2)",
         "ScaleUnit": "100%"
     },
     {
-        "BriefDescription": "The Mixing_Vectors metric gives the percentage of injected blend uops out of all uops issued",
+        "BriefDescription": "This metric estimates penalty in terms of percentage of([SKL+] injected blend uops out of all Uops Issued -- the Count Domain; [ADL+] cycles)",
         "MetricExpr": "UOPS_ISSUED.VECTOR_WIDTH_MISMATCH / UOPS_ISSUED.ANY",
         "MetricGroup": "TopdownL5;tma_L5_group;tma_issueMV;tma_ports_utilized_0_group",
         "MetricName": "tma_mixing_vectors",
         "MetricThreshold": "tma_mixing_vectors > 0.05",
-        "PublicDescription": "The Mixing_Vectors metric gives the percentage of injected blend uops out of all uops issued. Usually a Mixing_Vectors over 5% is worth investigating. Read more in Appendix B1 of the Optimizations Guide for this topic. Related metrics: tma_ms_switches",
+        "PublicDescription": "This metric estimates penalty in terms of percentage of([SKL+] injected blend uops out of all Uops Issued -- the Count Domain; [ADL+] cycles). Usually a Mixing_Vectors over 5% is worth investigating. Read more in Appendix B1 of the Optimizations Guide for this topic. Related metrics: tma_ms_switches",
         "ScaleUnit": "100%"
     },
     {
         "MetricGroup": "FetchLat;MicroSeq;TopdownL3;tma_L3_group;tma_fetch_latency_group;tma_issueMC;tma_issueMS;tma_issueMV;tma_issueSO",
         "MetricName": "tma_ms_switches",
         "MetricThreshold": "tma_ms_switches > 0.05 & (tma_fetch_latency > 0.1 & tma_frontend_bound > 0.15)",
-        "PublicDescription": "This metric estimates the fraction of cycles when the CPU was stalled due to switches of uop delivery to the Microcode Sequencer (MS). Commonly used instructions are optimized for delivery by the DSB (decoded i-cache) or MITE (legacy instruction decode) pipelines. Certain operations cannot be handled natively by the execution pipeline; and must be performed by microcode (small programs injected into the execution stream). Switching to the MS too often can negatively impact performance. The MS is designated to deliver long uop flows required by CISC instructions like CPUID; or uncommon conditions like Floating Point Assists when dealing with Denormals. Sample with: IDQ.MS_SWITCHES. Related metrics: tma_clears_resteers, tma_l1_bound, tma_machine_clears, tma_microcode_sequencer, tma_mixing_vectors, tma_serializing_operation",
+        "PublicDescription": "This metric estimates the fraction of cycles when the CPU was stalled due to switches of uop delivery to the Microcode Sequencer (MS). Commonly used instructions are optimized for delivery by the DSB (decoded i-cache) or MITE (legacy instruction decode) pipelines. Certain operations cannot be handled natively by the execution pipeline; and must be performed by microcode (small programs injected into the execution stream). Switching to the MS too often can negatively impact performance. The MS is designated to deliver long uop flows required by CISC instructions like CPUID; or uncommon conditions like Floating Point Assists when dealing with Denormals. Sample with: IDQ.MS_SWITCHES. Related metrics: tma_clears_resteers, tma_info_bottleneck_irregular_overhead, tma_l1_bound, tma_machine_clears, tma_microcode_sequencer, tma_mixing_vectors, tma_serializing_operation",
         "ScaleUnit": "100%"
     },
     {
         "BriefDescription": "This metric represents fraction of slots where the CPU was retiring NOP (no op) instructions",
         "MetricExpr": "tma_light_operations * INST_RETIRED.NOP / (tma_retiring * tma_info_thread_slots)",
-        "MetricGroup": "Pipeline;TopdownL3;tma_L3_group;tma_light_operations_group",
+        "MetricGroup": "Pipeline;TopdownL4;tma_L4_group;tma_other_light_ops_group",
         "MetricName": "tma_nop_instructions",
-        "MetricThreshold": "tma_nop_instructions > 0.1 & tma_light_operations > 0.6",
+        "MetricThreshold": "tma_nop_instructions > 0.1 & (tma_other_light_ops > 0.3 & tma_light_operations > 0.6)",
         "PublicDescription": "This metric represents fraction of slots where the CPU was retiring NOP (no op) instructions. Compilers often use NOPs for certain address alignments - e.g. start address of a function or loop body. Sample with: INST_RETIRED.NOP",
         "ScaleUnit": "100%"
     },
     {
         "BriefDescription": "This metric represents the remaining light uops fraction the CPU has executed - remaining means not covered by other sibling nodes",
         "MetricConstraint": "NO_GROUP_EVENTS",
-        "MetricExpr": "max(0, tma_light_operations - (tma_fp_arith + tma_memory_operations + tma_branch_instructions + tma_nop_instructions))",
+        "MetricExpr": "max(0, tma_light_operations - (tma_fp_arith + tma_memory_operations + tma_branch_instructions))",
         "MetricGroup": "Pipeline;TopdownL3;tma_L3_group;tma_light_operations_group",
         "MetricName": "tma_other_light_ops",
         "MetricThreshold": "tma_other_light_ops > 0.3 & tma_light_operations > 0.6",
         "PublicDescription": "This metric represents the remaining light uops fraction the CPU has executed - remaining means not covered by other sibling nodes. May undercount due to FMA double counting",
         "ScaleUnit": "100%"
     },
+    {
+        "BriefDescription": "This metric estimates fraction of slots the CPU was stalled due to other cases of misprediction (non-retired x86 branches or other types).",
+        "MetricExpr": "max(tma_branch_mispredicts * (1 - BR_MISP_RETIRED.ALL_BRANCHES / (INT_MISC.CLEARS_COUNT - MACHINE_CLEARS.COUNT)), 0.0001)",
+        "MetricGroup": "BrMispredicts;TopdownL3;tma_L3_group;tma_branch_mispredicts_group",
+        "MetricName": "tma_other_mispredicts",
+        "MetricThreshold": "tma_other_mispredicts > 0.05 & (tma_branch_mispredicts > 0.1 & tma_bad_speculation > 0.15)",
+        "ScaleUnit": "100%"
+    },
+    {
+        "BriefDescription": "This metric represents fraction of slots the CPU has wasted due to Nukes (Machine Clears) not related to memory ordering.",
+        "MetricExpr": "max(tma_machine_clears * (1 - MACHINE_CLEARS.MEMORY_ORDERING / MACHINE_CLEARS.COUNT), 0.0001)",
+        "MetricGroup": "Machine_Clears;TopdownL3;tma_L3_group;tma_machine_clears_group",
+        "MetricName": "tma_other_nukes",
+        "MetricThreshold": "tma_other_nukes > 0.05 & (tma_machine_clears > 0.1 & tma_bad_speculation > 0.15)",
+        "ScaleUnit": "100%"
+    },
     {
         "BriefDescription": "This metric represents Core fraction of cycles CPU dispatched uops on execution port 0 ([SNB+] ALU; [HSW+] ALU and 2nd branch)",
         "MetricExpr": "UOPS_DISPATCHED.PORT_0 / tma_info_core_core_clks",
         "ScaleUnit": "100%"
     },
     {
-        "BriefDescription": "This metric represents Core fraction of cycles CPU dispatched uops on execution port 6 ([HSW+]Primary Branch and simple ALU)",
+        "BriefDescription": "This metric represents Core fraction of cycles CPU dispatched uops on execution port 6 ([HSW+] Primary Branch and simple ALU)",
         "MetricExpr": "UOPS_DISPATCHED.PORT_6 / tma_info_core_core_clks",
         "MetricGroup": "TopdownL6;tma_L6_group;tma_alu_op_utilization_group;tma_issue2P",
         "MetricName": "tma_port_6",
         "MetricThreshold": "tma_port_6 > 0.6",
-        "PublicDescription": "This metric represents Core fraction of cycles CPU dispatched uops on execution port 6 ([HSW+]Primary Branch and simple ALU). Sample with: UOPS_DISPATCHED.PORT_6. Related metrics: tma_fp_scalar, tma_fp_vector, tma_fp_vector_128b, tma_fp_vector_256b, tma_fp_vector_512b, tma_port_0, tma_port_1, tma_port_5, tma_ports_utilized_2",
+        "PublicDescription": "This metric represents Core fraction of cycles CPU dispatched uops on execution port 6 ([HSW+] Primary Branch and simple ALU). Sample with: UOPS_DISPATCHED.PORT_6. Related metrics: tma_fp_scalar, tma_fp_vector, tma_fp_vector_128b, tma_fp_vector_256b, tma_fp_vector_512b, tma_port_0, tma_port_1, tma_port_5, tma_ports_utilized_2",
         "ScaleUnit": "100%"
     },
     {
         "BriefDescription": "This metric estimates fraction of cycles the CPU performance was potentially limited due to Core computation issues (non divider-related)",
-        "MetricExpr": "((cpu@EXE_ACTIVITY.3_PORTS_UTIL\\,umask\\=0x80@ + tma_serializing_operation * (CYCLE_ACTIVITY.STALLS_TOTAL - CYCLE_ACTIVITY.STALLS_MEM_ANY) + (EXE_ACTIVITY.1_PORTS_UTIL + tma_retiring * EXE_ACTIVITY.2_PORTS_UTIL)) / tma_info_thread_clks if ARITH.DIVIDER_ACTIVE < CYCLE_ACTIVITY.STALLS_TOTAL - CYCLE_ACTIVITY.STALLS_MEM_ANY else (EXE_ACTIVITY.1_PORTS_UTIL + tma_retiring * EXE_ACTIVITY.2_PORTS_UTIL) / tma_info_thread_clks)",
+        "MetricExpr": "((tma_ports_utilized_0 * tma_info_thread_clks + (EXE_ACTIVITY.1_PORTS_UTIL + tma_retiring * EXE_ACTIVITY.2_PORTS_UTIL)) / tma_info_thread_clks if ARITH.DIVIDER_ACTIVE < CYCLE_ACTIVITY.STALLS_TOTAL - CYCLE_ACTIVITY.STALLS_MEM_ANY else (EXE_ACTIVITY.1_PORTS_UTIL + tma_retiring * EXE_ACTIVITY.2_PORTS_UTIL) / tma_info_thread_clks)",
         "MetricGroup": "PortsUtil;TopdownL3;tma_L3_group;tma_core_bound_group",
         "MetricName": "tma_ports_utilization",
         "MetricThreshold": "tma_ports_utilization > 0.15 & (tma_core_bound > 0.1 & tma_backend_bound > 0.2)",
     },
     {
         "BriefDescription": "This metric represents fraction of cycles CPU executed no uops on any execution port (Logical Processor cycles since ICL, Physical Core cycles otherwise)",
-        "MetricExpr": "cpu@EXE_ACTIVITY.3_PORTS_UTIL\\,umask\\=0x80@ / tma_info_thread_clks + tma_serializing_operation * (CYCLE_ACTIVITY.STALLS_TOTAL - CYCLE_ACTIVITY.STALLS_MEM_ANY) / tma_info_thread_clks",
+        "MetricExpr": "(cpu@EXE_ACTIVITY.3_PORTS_UTIL\\,umask\\=0x80@ + tma_core_bound * RS_EVENTS.EMPTY_CYCLES) / tma_info_thread_clks * (CYCLE_ACTIVITY.STALLS_TOTAL - CYCLE_ACTIVITY.STALLS_MEM_ANY) / tma_info_thread_clks",
         "MetricGroup": "PortsUtil;TopdownL4;tma_L4_group;tma_ports_utilization_group",
         "MetricName": "tma_ports_utilized_0",
         "MetricThreshold": "tma_ports_utilized_0 > 0.2 & (tma_ports_utilization > 0.15 & (tma_core_bound > 0.1 & tma_backend_bound > 0.2))",
         "MetricExpr": "UOPS_EXECUTED.CYCLES_GE_3 / tma_info_thread_clks",
         "MetricGroup": "PortsUtil;TopdownL4;tma_L4_group;tma_ports_utilization_group",
         "MetricName": "tma_ports_utilized_3m",
-        "MetricThreshold": "tma_ports_utilized_3m > 0.7 & (tma_ports_utilization > 0.15 & (tma_core_bound > 0.1 & tma_backend_bound > 0.2))",
+        "MetricThreshold": "tma_ports_utilized_3m > 0.4 & (tma_ports_utilization > 0.15 & (tma_core_bound > 0.1 & tma_backend_bound > 0.2))",
         "PublicDescription": "This metric represents fraction of cycles CPU executed total of 3 or more uops per cycle on all execution ports (Logical Processor cycles since ICL, Physical Core cycles otherwise). Sample with: UOPS_EXECUTED.CYCLES_GE_3",
         "ScaleUnit": "100%"
     },
     {
         "BriefDescription": "This metric represents fraction of cycles the CPU issue-pipeline was stalled due to serializing operations",
         "MetricExpr": "RESOURCE_STALLS.SCOREBOARD / tma_info_thread_clks",
-        "MetricGroup": "PortsUtil;TopdownL5;tma_L5_group;tma_issueSO;tma_ports_utilized_0_group",
+        "MetricGroup": "PortsUtil;TopdownL3;tma_L3_group;tma_core_bound_group;tma_issueSO",
         "MetricName": "tma_serializing_operation",
-        "MetricThreshold": "tma_serializing_operation > 0.1 & (tma_ports_utilized_0 > 0.2 & (tma_ports_utilization > 0.15 & (tma_core_bound > 0.1 & tma_backend_bound > 0.2)))",
+        "MetricThreshold": "tma_serializing_operation > 0.1 & (tma_core_bound > 0.1 & tma_backend_bound > 0.2)",
         "PublicDescription": "This metric represents fraction of cycles the CPU issue-pipeline was stalled due to serializing operations. Instructions like CPUID; WRMSR or LFENCE serialize the out-of-order execution which may limit performance. Sample with: RESOURCE_STALLS.SCOREBOARD. Related metrics: tma_ms_switches",
         "ScaleUnit": "100%"
     },
     {
         "BriefDescription": "This metric represents fraction of cycles the CPU was stalled due to PAUSE Instructions",
         "MetricExpr": "140 * MISC_RETIRED.PAUSE_INST / tma_info_thread_clks",
-        "MetricGroup": "TopdownL6;tma_L6_group;tma_serializing_operation_group",
+        "MetricGroup": "TopdownL4;tma_L4_group;tma_serializing_operation_group",
         "MetricName": "tma_slow_pause",
-        "MetricThreshold": "tma_slow_pause > 0.05 & (tma_serializing_operation > 0.1 & (tma_ports_utilized_0 > 0.2 & (tma_ports_utilization > 0.15 & (tma_core_bound > 0.1 & tma_backend_bound > 0.2))))",
+        "MetricThreshold": "tma_slow_pause > 0.05 & (tma_serializing_operation > 0.1 & (tma_core_bound > 0.1 & tma_backend_bound > 0.2))",
         "PublicDescription": "This metric represents fraction of cycles the CPU was stalled due to PAUSE Instructions. Sample with: MISC_RETIRED.PAUSE_INST",
         "ScaleUnit": "100%"
     },
         "MetricGroup": "MemoryBW;Offcore;TopdownL4;tma_L4_group;tma_issueBW;tma_l3_bound_group",
         "MetricName": "tma_sq_full",
         "MetricThreshold": "tma_sq_full > 0.3 & (tma_l3_bound > 0.05 & (tma_memory_bound > 0.2 & tma_backend_bound > 0.2))",
-        "PublicDescription": "This metric measures fraction of cycles where the Super Queue (SQ) was full taking into account all request-types and both hardware SMT threads (Logical Processors). Related metrics: tma_fb_full, tma_info_bottleneck_memory_bandwidth, tma_info_system_dram_bw_use, tma_mem_bandwidth",
+        "PublicDescription": "This metric measures fraction of cycles where the Super Queue (SQ) was full taking into account all request-types and both hardware SMT threads (Logical Processors). Related metrics: tma_fb_full, tma_info_bottleneck_cache_memory_bandwidth, tma_info_system_dram_bw_use, tma_mem_bandwidth",
         "ScaleUnit": "100%"
     },
     {
     {
         "BriefDescription": "This metric represents fraction of cycles the CPU was stalled due to new branch address clears",
         "MetricExpr": "10 * BACLEARS.ANY / tma_info_thread_clks",
-        "MetricGroup": "BigFoot;FetchLat;TopdownL4;tma_L4_group;tma_branch_resteers_group",
+        "MetricGroup": "BigFootprint;FetchLat;TopdownL4;tma_L4_group;tma_branch_resteers_group",
         "MetricName": "tma_unknown_branches",
         "MetricThreshold": "tma_unknown_branches > 0.05 & (tma_branch_resteers > 0.05 & (tma_fetch_latency > 0.1 & tma_frontend_bound > 0.15))",
-        "PublicDescription": "This metric represents fraction of cycles the CPU was stalled due to new branch address clears. These are fetched branches the Branch Prediction Unit was unable to recognize (e.g. first time the branch is fetched or hitting BTB capacity limit). Sample with: BACLEARS.ANY",
+        "PublicDescription": "This metric represents fraction of cycles the CPU was stalled due to new branch address clears. These are fetched branches the Branch Prediction Unit was unable to recognize (e.g. first time the branch is fetched or hitting BTB capacity limit) hence called Unknown Branches. Sample with: BACLEARS.ANY",
         "ScaleUnit": "100%"
     },
     {
index bebb85945d627ea3a94d94fc0909fda0c23baf88..a2c27794c0d8c454e84282b15ba52417df25ff6e 100644 (file)
@@ -2,10 +2,10 @@
     "Backend": "Grouping from Top-down Microarchitecture Analysis Metrics spreadsheet",
     "Bad": "Grouping from Top-down Microarchitecture Analysis Metrics spreadsheet",
     "BadSpec": "Grouping from Top-down Microarchitecture Analysis Metrics spreadsheet",
-    "BigFoot": "Grouping from Top-down Microarchitecture Analysis Metrics spreadsheet",
+    "BigFootprint": "Grouping from Top-down Microarchitecture Analysis Metrics spreadsheet",
     "BrMispredicts": "Grouping from Top-down Microarchitecture Analysis Metrics spreadsheet",
     "Branches": "Grouping from Top-down Microarchitecture Analysis Metrics spreadsheet",
-    "CacheMisses": "Grouping from Top-down Microarchitecture Analysis Metrics spreadsheet",
+    "CacheHits": "Grouping from Top-down Microarchitecture Analysis Metrics spreadsheet",
     "Compute": "Grouping from Top-down Microarchitecture Analysis Metrics spreadsheet",
     "Cor": "Grouping from Top-down Microarchitecture Analysis Metrics spreadsheet",
     "DSB": "Grouping from Top-down Microarchitecture Analysis Metrics spreadsheet",
@@ -23,7 +23,9 @@
     "L2Evicts": "Grouping from Top-down Microarchitecture Analysis Metrics spreadsheet",
     "LSD": "Grouping from Top-down Microarchitecture Analysis Metrics spreadsheet",
     "MachineClears": "Grouping from Top-down Microarchitecture Analysis Metrics spreadsheet",
+    "Machine_Clears": "Grouping from Top-down Microarchitecture Analysis Metrics spreadsheet",
     "Mem": "Grouping from Top-down Microarchitecture Analysis Metrics spreadsheet",
+    "MemOffcore": "Grouping from Top-down Microarchitecture Analysis Metrics spreadsheet",
     "MemoryBW": "Grouping from Top-down Microarchitecture Analysis Metrics spreadsheet",
     "MemoryBound": "Grouping from Top-down Microarchitecture Analysis Metrics spreadsheet",
     "MemoryLat": "Grouping from Top-down Microarchitecture Analysis Metrics spreadsheet",
@@ -88,6 +90,7 @@
     "tma_issueTLB": "Metrics related by the issue $issueTLB",
     "tma_l1_bound_group": "Metrics contributing to tma_l1_bound category",
     "tma_light_operations_group": "Metrics contributing to tma_light_operations category",
+    "tma_machine_clears_group": "Metrics contributing to tma_machine_clears category",
     "tma_mem_latency_group": "Metrics contributing to tma_mem_latency category",
     "tma_memory_bound_group": "Metrics contributing to tma_memory_bound category",
     "tma_microcode_sequencer_group": "Metrics contributing to tma_microcode_sequencer category",
index 8898b6fd0dea3432d02805398297db51ae8283ab..ce836ebda5422c679be6ce5d8a2d83cc2f973816 100644 (file)
         "MetricExpr": "tma_frontend_bound - tma_fetch_latency",
         "MetricGroup": "FetchBW;Frontend;TmaL2;TopdownL2;tma_L2_group;tma_frontend_bound_group;tma_issueFB",
         "MetricName": "tma_fetch_bandwidth",
-        "MetricThreshold": "tma_fetch_bandwidth > 0.1 & tma_frontend_bound > 0.15 & tma_info_thread_ipc / 4 > 0.35",
+        "MetricThreshold": "tma_fetch_bandwidth > 0.2",
         "MetricgroupNoGroup": "TopdownL2",
         "PublicDescription": "This metric represents fraction of slots the CPU was stalled due to Frontend bandwidth issues.  For example; inefficiencies at the instruction decoders; or restrictions for caching in the DSB (decoded uops cache) are categorized under Fetch Bandwidth. In such cases; the Frontend typically delivers suboptimal amount of uops to the Backend. Related metrics: tma_dsb_switches, tma_info_frontend_dsb_coverage, tma_lcp",
         "ScaleUnit": "100%"
         "MetricGroup": "Compute;Flops;TopdownL4;tma_L4_group;tma_fp_arith_group;tma_issue2P",
         "MetricName": "tma_fp_scalar",
         "MetricThreshold": "tma_fp_scalar > 0.1 & (tma_fp_arith > 0.2 & tma_light_operations > 0.6)",
-        "PublicDescription": "This metric approximates arithmetic floating-point (FP) scalar uops fraction the CPU has retired. May overcount due to FMA double counting. Related metrics: tma_fp_vector, tma_fp_vector_512b, tma_port_6, tma_ports_utilized_2",
+        "PublicDescription": "This metric approximates arithmetic floating-point (FP) scalar uops fraction the CPU has retired. May overcount due to FMA double counting. Related metrics: tma_fp_vector, tma_fp_vector_128b, tma_fp_vector_256b, tma_fp_vector_512b, tma_port_6, tma_ports_utilized_2",
         "ScaleUnit": "100%"
     },
     {
         "MetricGroup": "Compute;Flops;TopdownL4;tma_L4_group;tma_fp_arith_group;tma_issue2P",
         "MetricName": "tma_fp_vector",
         "MetricThreshold": "tma_fp_vector > 0.1 & (tma_fp_arith > 0.2 & tma_light_operations > 0.6)",
-        "PublicDescription": "This metric approximates arithmetic floating-point (FP) vector uops fraction the CPU has retired aggregated across all vector widths. May overcount due to FMA double counting. Related metrics: tma_fp_scalar, tma_fp_vector_512b, tma_port_6, tma_ports_utilized_2",
+        "PublicDescription": "This metric approximates arithmetic floating-point (FP) vector uops fraction the CPU has retired aggregated across all vector widths. May overcount due to FMA double counting. Related metrics: tma_fp_scalar, tma_fp_vector_128b, tma_fp_vector_256b, tma_fp_vector_512b, tma_port_6, tma_ports_utilized_2",
+        "ScaleUnit": "100%"
+    },
+    {
+        "BriefDescription": "This metric approximates arithmetic FP vector uops fraction the CPU has retired for 128-bit wide vectors",
+        "MetricExpr": "(FP_COMP_OPS_EXE.SSE_SCALAR_DOUBLE + FP_COMP_OPS_EXE.SSE_PACKED_DOUBLE) / UOPS_DISPATCHED.THREAD",
+        "MetricGroup": "Compute;Flops;TopdownL5;tma_L5_group;tma_fp_vector_group;tma_issue2P",
+        "MetricName": "tma_fp_vector_128b",
+        "MetricThreshold": "tma_fp_vector_128b > 0.1 & (tma_fp_vector > 0.1 & (tma_fp_arith > 0.2 & tma_light_operations > 0.6))",
+        "PublicDescription": "This metric approximates arithmetic FP vector uops fraction the CPU has retired for 128-bit wide vectors. May overcount due to FMA double counting. Related metrics: tma_fp_scalar, tma_fp_vector, tma_fp_vector_256b, tma_fp_vector_512b, tma_port_6, tma_ports_utilized_2",
+        "ScaleUnit": "100%"
+    },
+    {
+        "BriefDescription": "This metric approximates arithmetic FP vector uops fraction the CPU has retired for 256-bit wide vectors",
+        "MetricExpr": "(SIMD_FP_256.PACKED_DOUBLE + SIMD_FP_256.PACKED_SINGLE) / UOPS_DISPATCHED.THREAD",
+        "MetricGroup": "Compute;Flops;TopdownL5;tma_L5_group;tma_fp_vector_group;tma_issue2P",
+        "MetricName": "tma_fp_vector_256b",
+        "MetricThreshold": "tma_fp_vector_256b > 0.1 & (tma_fp_vector > 0.1 & (tma_fp_arith > 0.2 & tma_light_operations > 0.6))",
+        "PublicDescription": "This metric approximates arithmetic FP vector uops fraction the CPU has retired for 256-bit wide vectors. May overcount due to FMA double counting. Related metrics: tma_fp_scalar, tma_fp_vector, tma_fp_vector_128b, tma_fp_vector_512b, tma_port_6, tma_ports_utilized_2",
         "ScaleUnit": "100%"
     },
     {
         "MetricName": "tma_heavy_operations",
         "MetricThreshold": "tma_heavy_operations > 0.1",
         "MetricgroupNoGroup": "TopdownL2",
-        "PublicDescription": "This metric represents fraction of slots where the CPU was retiring heavy-weight operations -- instructions that require two or more uops or micro-coded sequences. This highly-correlates with the uop length of these instructions/sequences.",
+        "PublicDescription": "This metric represents fraction of slots where the CPU was retiring heavy-weight operations -- instructions that require two or more uops or micro-coded sequences. This highly-correlates with the uop length of these instructions/sequences. ([ICL+] Note this may overcount due to approximation using indirect events; [ADL+] .)",
         "ScaleUnit": "100%"
     },
     {
         "MetricName": "tma_info_core_flopc"
     },
     {
-        "BriefDescription": "Instruction-Level-Parallelism (average number of uops executed when there is execution) per-core",
+        "BriefDescription": "Instruction-Level-Parallelism (average number of uops executed when there is execution) per thread (logical-processor)",
         "MetricExpr": "UOPS_DISPATCHED.THREAD / (cpu@UOPS_DISPATCHED.CORE\\,cmask\\=1@ / 2 if #SMT_on else cpu@UOPS_DISPATCHED.CORE\\,cmask\\=1@)",
         "MetricGroup": "Backend;Cor;Pipeline;PortsUtil",
         "MetricName": "tma_info_core_ilp"
         "MetricName": "tma_info_pipeline_retire"
     },
     {
-        "BriefDescription": "Measured Average Frequency for unhalted processors [GHz]",
+        "BriefDescription": "Measured Average Core Frequency for unhalted processors [GHz]",
         "MetricExpr": "tma_info_system_turbo_utilization * TSC / 1e9 / duration_time",
         "MetricGroup": "Power;Summary",
-        "MetricName": "tma_info_system_average_frequency"
+        "MetricName": "tma_info_system_core_frequency"
     },
     {
-        "BriefDescription": "Average CPU Utilization",
+        "BriefDescription": "Average CPU Utilization (percentage)",
         "MetricExpr": "CPU_CLK_UNHALTED.REF_TSC / TSC",
         "MetricGroup": "HPC;Summary",
         "MetricName": "tma_info_system_cpu_utilization"
     },
+    {
+        "BriefDescription": "Average number of utilized CPUs",
+        "MetricExpr": "#num_cpus_online * tma_info_system_cpu_utilization",
+        "MetricGroup": "Summary",
+        "MetricName": "tma_info_system_cpus_utilized"
+    },
     {
         "BriefDescription": "Average external Memory Bandwidth Use for reads and writes [GB / sec]",
         "MetricExpr": "64 * (UNC_ARB_TRK_REQUESTS.ALL + UNC_ARB_COH_TRK_REQUESTS.ALL) / 1e6 / duration_time / 1e3",
-        "MetricGroup": "HPC;Mem;MemoryBW;SoC;tma_issueBW",
+        "MetricGroup": "HPC;MemOffcore;MemoryBW;SoC;tma_issueBW",
         "MetricName": "tma_info_system_dram_bw_use",
         "PublicDescription": "Average external Memory Bandwidth Use for reads and writes [GB / sec]. Related metrics: tma_mem_bandwidth"
     },
         "MetricExpr": "(FP_COMP_OPS_EXE.SSE_SCALAR_SINGLE + FP_COMP_OPS_EXE.SSE_SCALAR_DOUBLE + 2 * FP_COMP_OPS_EXE.SSE_PACKED_DOUBLE + 4 * (FP_COMP_OPS_EXE.SSE_PACKED_SINGLE + SIMD_FP_256.PACKED_DOUBLE) + 8 * SIMD_FP_256.PACKED_SINGLE) / 1e9 / duration_time",
         "MetricGroup": "Cor;Flops;HPC",
         "MetricName": "tma_info_system_gflops",
-        "PublicDescription": "Giga Floating Point Operations Per Second. Aggregate across all supported options of: FP precisions, scalar and vector instructions, vector-width and AMX engine."
+        "PublicDescription": "Giga Floating Point Operations Per Second. Aggregate across all supported options of: FP precisions, scalar and vector instructions, vector-width"
     },
     {
         "BriefDescription": "Instructions per Far Branch ( Far Branches apply upon transition from application to operating system, handling interrupts, exceptions) [lower number means higher occurrence rate]",
         "MetricName": "tma_info_system_kernel_utilization",
         "MetricThreshold": "tma_info_system_kernel_utilization > 0.05"
     },
-    {
-        "BriefDescription": "Average number of parallel requests to external memory",
-        "MetricExpr": "UNC_ARB_TRK_OCCUPANCY.ALL / UNC_ARB_TRK_OCCUPANCY.CYCLES_WITH_ANY_REQUEST",
-        "MetricGroup": "Mem;SoC",
-        "MetricName": "tma_info_system_mem_parallel_requests",
-        "PublicDescription": "Average number of parallel requests to external memory. Accounts for all requests"
-    },
-    {
-        "BriefDescription": "Average latency of all requests to external memory (in Uncore cycles)",
-        "MetricExpr": "UNC_ARB_TRK_OCCUPANCY.ALL / UNC_ARB_TRK_REQUESTS.ALL",
-        "MetricGroup": "Mem;SoC",
-        "MetricName": "tma_info_system_mem_request_latency"
-    },
     {
         "BriefDescription": "Fraction of cycles where both hardware Logical Processors were active",
         "MetricExpr": "(1 - CPU_CLK_UNHALTED.ONE_THREAD_ACTIVE / (CPU_CLK_UNHALTED.REF_XCLK_ANY / 2) if #SMT_on else 0)",
     {
         "BriefDescription": "This metric represents fraction of cycles the CPU was stalled due to Instruction TLB (ITLB) misses",
         "MetricExpr": "(12 * ITLB_MISSES.STLB_HIT + ITLB_MISSES.WALK_DURATION) / tma_info_thread_clks",
-        "MetricGroup": "BigFoot;FetchLat;MemoryTLB;TopdownL3;tma_L3_group;tma_fetch_latency_group",
+        "MetricGroup": "BigFootprint;FetchLat;MemoryTLB;TopdownL3;tma_L3_group;tma_fetch_latency_group",
         "MetricName": "tma_itlb_misses",
         "MetricThreshold": "tma_itlb_misses > 0.05 & (tma_fetch_latency > 0.1 & tma_frontend_bound > 0.15)",
         "PublicDescription": "This metric represents fraction of cycles the CPU was stalled due to Instruction TLB (ITLB) misses. Sample with: ITLB_MISSES.WALK_COMPLETED",
         "BriefDescription": "This metric estimates how often the CPU was stalled due to loads accesses to L3 cache or contended with a sibling Core",
         "MetricConstraint": "NO_GROUP_EVENTS_SMT",
         "MetricExpr": "MEM_LOAD_UOPS_RETIRED.LLC_HIT / (MEM_LOAD_UOPS_RETIRED.LLC_HIT + 7 * MEM_LOAD_UOPS_MISC_RETIRED.LLC_MISS) * CYCLE_ACTIVITY.STALLS_L2_PENDING / tma_info_thread_clks",
-        "MetricGroup": "CacheMisses;MemoryBound;TmaL3mem;TopdownL3;tma_L3_group;tma_memory_bound_group",
+        "MetricGroup": "CacheHits;MemoryBound;TmaL3mem;TopdownL3;tma_L3_group;tma_memory_bound_group",
         "MetricName": "tma_l3_bound",
         "MetricThreshold": "tma_l3_bound > 0.05 & (tma_memory_bound > 0.2 & tma_backend_bound > 0.2)",
         "PublicDescription": "This metric estimates how often the CPU was stalled due to loads accesses to L3 cache or contended with a sibling Core.  Avoiding cache misses (i.e. L2 misses/L3 hits) can improve the latency and increase performance. Sample with: MEM_LOAD_UOPS_RETIRED.L3_HIT_PS",
         "MetricName": "tma_light_operations",
         "MetricThreshold": "tma_light_operations > 0.6",
         "MetricgroupNoGroup": "TopdownL2",
-        "PublicDescription": "This metric represents fraction of slots where the CPU was retiring light-weight operations -- instructions that require no more than one uop (micro-operation). This correlates with total number of instructions used by the program. A uops-per-instruction (see UopPI metric) ratio of 1 or less should be expected for decently optimized software running on Intel Core/Xeon products. While this often indicates efficient X86 instructions were executed; high value does not necessarily mean better performance cannot be achieved. Sample with: INST_RETIRED.PREC_DIST",
+        "PublicDescription": "This metric represents fraction of slots where the CPU was retiring light-weight operations -- instructions that require no more than one uop (micro-operation). This correlates with total number of instructions used by the program. A uops-per-instruction (see UopPI metric) ratio of 1 or less should be expected for decently optimized code running on Intel Core/Xeon products. While this often indicates efficient X86 instructions were executed; high value does not necessarily mean better performance cannot be achieved. ([ICL+] Note this may undercount due to approximation using indirect events; [ADL+] .). Sample with: INST_RETIRED.PREC_DIST",
         "ScaleUnit": "100%"
     },
     {
         "ScaleUnit": "100%"
     },
     {
-        "BriefDescription": "This metric estimates fraction of cycles where the core's performance was likely hurt due to approaching bandwidth limits of external memory (DRAM)",
+        "BriefDescription": "This metric estimates fraction of cycles where the core's performance was likely hurt due to approaching bandwidth limits of external memory - DRAM ([SPR-HBM] and/or HBM)",
         "MetricExpr": "min(CPU_CLK_UNHALTED.THREAD, cpu@OFFCORE_REQUESTS_OUTSTANDING.ALL_DATA_RD\\,cmask\\=6@) / tma_info_thread_clks",
         "MetricGroup": "MemoryBW;Offcore;TopdownL4;tma_L4_group;tma_dram_bound_group;tma_issueBW",
         "MetricName": "tma_mem_bandwidth",
         "MetricThreshold": "tma_mem_bandwidth > 0.2 & (tma_dram_bound > 0.1 & (tma_memory_bound > 0.2 & tma_backend_bound > 0.2))",
-        "PublicDescription": "This metric estimates fraction of cycles where the core's performance was likely hurt due to approaching bandwidth limits of external memory (DRAM).  The underlying heuristic assumes that a similar off-core traffic is generated by all IA cores. This metric does not aggregate non-data-read requests by this logical processor; requests from other IA Logical Processors/Physical Cores/sockets; or other non-IA devices like GPU; hence the maximum external memory bandwidth limits may or may not be approached when this metric is flagged (see Uncore counters for that). Related metrics: tma_info_system_dram_bw_use",
+        "PublicDescription": "This metric estimates fraction of cycles where the core's performance was likely hurt due to approaching bandwidth limits of external memory - DRAM ([SPR-HBM] and/or HBM).  The underlying heuristic assumes that a similar off-core traffic is generated by all IA cores. This metric does not aggregate non-data-read requests by this logical processor; requests from other IA Logical Processors/Physical Cores/sockets; or other non-IA devices like GPU; hence the maximum external memory bandwidth limits may or may not be approached when this metric is flagged (see Uncore counters for that). Related metrics: tma_info_system_dram_bw_use",
         "ScaleUnit": "100%"
     },
     {
-        "BriefDescription": "This metric estimates fraction of cycles where the performance was likely hurt due to latency from external memory (DRAM)",
+        "BriefDescription": "This metric estimates fraction of cycles where the performance was likely hurt due to latency from external memory - DRAM ([SPR-HBM] and/or HBM)",
         "MetricExpr": "min(CPU_CLK_UNHALTED.THREAD, OFFCORE_REQUESTS_OUTSTANDING.CYCLES_WITH_DATA_RD) / tma_info_thread_clks - tma_mem_bandwidth",
         "MetricGroup": "MemoryLat;Offcore;TopdownL4;tma_L4_group;tma_dram_bound_group;tma_issueLat",
         "MetricName": "tma_mem_latency",
         "MetricThreshold": "tma_mem_latency > 0.1 & (tma_dram_bound > 0.1 & (tma_memory_bound > 0.2 & tma_backend_bound > 0.2))",
-        "PublicDescription": "This metric estimates fraction of cycles where the performance was likely hurt due to latency from external memory (DRAM).  This metric does not aggregate requests from other Logical Processors/Physical Cores/sockets (see Uncore counters for that). Related metrics: ",
+        "PublicDescription": "This metric estimates fraction of cycles where the performance was likely hurt due to latency from external memory - DRAM ([SPR-HBM] and/or HBM).  This metric does not aggregate requests from other Logical Processors/Physical Cores/sockets (see Uncore counters for that). Related metrics: ",
         "ScaleUnit": "100%"
     },
     {
index e6f7934320bfbf8c8857e49e0c0750e4b1f1b0b2..81e5ca1c3078d9583190a10ce4b376f64b2129f7 100644 (file)
@@ -2,10 +2,11 @@
     "Backend": "Grouping from Top-down Microarchitecture Analysis Metrics spreadsheet",
     "Bad": "Grouping from Top-down Microarchitecture Analysis Metrics spreadsheet",
     "BadSpec": "Grouping from Top-down Microarchitecture Analysis Metrics spreadsheet",
-    "BigFoot": "Grouping from Top-down Microarchitecture Analysis Metrics spreadsheet",
+    "BigFootprint": "Grouping from Top-down Microarchitecture Analysis Metrics spreadsheet",
     "BrMispredicts": "Grouping from Top-down Microarchitecture Analysis Metrics spreadsheet",
     "Branches": "Grouping from Top-down Microarchitecture Analysis Metrics spreadsheet",
-    "CacheMisses": "Grouping from Top-down Microarchitecture Analysis Metrics spreadsheet",
+    "C0Wait": "Grouping from Top-down Microarchitecture Analysis Metrics spreadsheet",
+    "CacheHits": "Grouping from Top-down Microarchitecture Analysis Metrics spreadsheet",
     "CodeGen": "Grouping from Top-down Microarchitecture Analysis Metrics spreadsheet",
     "Compute": "Grouping from Top-down Microarchitecture Analysis Metrics spreadsheet",
     "Cor": "Grouping from Top-down Microarchitecture Analysis Metrics spreadsheet",
@@ -27,7 +28,9 @@
     "L2Evicts": "Grouping from Top-down Microarchitecture Analysis Metrics spreadsheet",
     "LSD": "Grouping from Top-down Microarchitecture Analysis Metrics spreadsheet",
     "MachineClears": "Grouping from Top-down Microarchitecture Analysis Metrics spreadsheet",
+    "Machine_Clears": "Grouping from Top-down Microarchitecture Analysis Metrics spreadsheet",
     "Mem": "Grouping from Top-down Microarchitecture Analysis Metrics spreadsheet",
+    "MemOffcore": "Grouping from Top-down Microarchitecture Analysis Metrics spreadsheet",
     "MemoryBW": "Grouping from Top-down Microarchitecture Analysis Metrics spreadsheet",
     "MemoryBound": "Grouping from Top-down Microarchitecture Analysis Metrics spreadsheet",
     "MemoryLat": "Grouping from Top-down Microarchitecture Analysis Metrics spreadsheet",
@@ -68,6 +71,7 @@
     "tma_assists_group": "Metrics contributing to tma_assists category",
     "tma_backend_bound_group": "Metrics contributing to tma_backend_bound category",
     "tma_bad_speculation_group": "Metrics contributing to tma_bad_speculation category",
+    "tma_branch_mispredicts_group": "Metrics contributing to tma_branch_mispredicts category",
     "tma_branch_resteers_group": "Metrics contributing to tma_branch_resteers category",
     "tma_core_bound_group": "Metrics contributing to tma_core_bound category",
     "tma_dram_bound_group": "Metrics contributing to tma_dram_bound category",
@@ -81,9 +85,9 @@
     "tma_heavy_operations_group": "Metrics contributing to tma_heavy_operations category",
     "tma_int_operations_group": "Metrics contributing to tma_int_operations category",
     "tma_issue2P": "Metrics related by the issue $issue2P",
-    "tma_issueBC": "Metrics related by the issue $issueBC",
     "tma_issueBM": "Metrics related by the issue $issueBM",
     "tma_issueBW": "Metrics related by the issue $issueBW",
+    "tma_issueComp": "Metrics related by the issue $issueComp",
     "tma_issueD0": "Metrics related by the issue $issueD0",
     "tma_issueFB": "Metrics related by the issue $issueFB",
     "tma_issueFL": "Metrics related by the issue $issueFL",
     "tma_l3_bound_group": "Metrics contributing to tma_l3_bound category",
     "tma_light_operations_group": "Metrics contributing to tma_light_operations category",
     "tma_load_op_utilization_group": "Metrics contributing to tma_load_op_utilization category",
+    "tma_machine_clears_group": "Metrics contributing to tma_machine_clears category",
     "tma_mem_bandwidth_group": "Metrics contributing to tma_mem_bandwidth category",
     "tma_mem_latency_group": "Metrics contributing to tma_mem_latency category",
     "tma_memory_bound_group": "Metrics contributing to tma_memory_bound category",
     "tma_microcode_sequencer_group": "Metrics contributing to tma_microcode_sequencer category",
     "tma_mite_group": "Metrics contributing to tma_mite category",
+    "tma_other_light_ops_group": "Metrics contributing to tma_other_light_ops category",
     "tma_ports_utilization_group": "Metrics contributing to tma_ports_utilization category",
     "tma_ports_utilized_0_group": "Metrics contributing to tma_ports_utilized_0 category",
     "tma_ports_utilized_3m_group": "Metrics contributing to tma_ports_utilized_3m category",
index 56e54babcc26f16aee88abae0716675e3ab97c83..6f0e6360e989e1a7f6100e86037498eff45163ec 100644 (file)
         "MetricName": "io_bandwidth_write",
         "ScaleUnit": "1MB/s"
     },
+    {
+        "BriefDescription": "Percentage of inbound full cacheline writes initiated by end device controllers that miss the L3 cache.",
+        "MetricExpr": "UNC_CHA_TOR_INSERTS.IO_MISS_ITOM / UNC_CHA_TOR_INSERTS.IO_ITOM",
+        "MetricName": "io_percent_of_inbound_full_writes_that_miss_l3",
+        "ScaleUnit": "100%"
+    },
+    {
+        "BriefDescription": "Percentage of inbound partial cacheline writes initiated by end device controllers that miss the L3 cache.",
+        "MetricExpr": "(UNC_CHA_TOR_INSERTS.IO_MISS_ITOMCACHENEAR + UNC_CHA_TOR_INSERTS.IO_MISS_RFO) / (UNC_CHA_TOR_INSERTS.IO_ITOMCACHENEAR + UNC_CHA_TOR_INSERTS.IO_RFO)",
+        "MetricName": "io_percent_of_inbound_partial_writes_that_miss_l3",
+        "ScaleUnit": "100%"
+    },
+    {
+        "BriefDescription": "Percentage of inbound reads initiated by end device controllers that miss the L3 cache.",
+        "MetricExpr": "UNC_CHA_TOR_INSERTS.IO_MISS_PCIRDCUR / UNC_CHA_TOR_INSERTS.IO_PCIRDCUR",
+        "MetricName": "io_percent_of_inbound_reads_that_miss_l3",
+        "ScaleUnit": "100%"
+    },
     {
         "BriefDescription": "Ratio of number of completed page walks (for 2 megabyte and 4 megabyte page sizes) caused by a code fetch to the total number of completed instructions",
         "MetricExpr": "ITLB_MISSES.WALK_COMPLETED_2M_4M / INST_RETIRED.ANY",
         "MetricExpr": "(UOPS_DISPATCHED.PORT_0 + UOPS_DISPATCHED.PORT_1 + UOPS_DISPATCHED.PORT_5_11 + UOPS_DISPATCHED.PORT_6) / (5 * tma_info_core_core_clks)",
         "MetricGroup": "TopdownL5;tma_L5_group;tma_ports_utilized_3m_group",
         "MetricName": "tma_alu_op_utilization",
-        "MetricThreshold": "tma_alu_op_utilization > 0.6",
+        "MetricThreshold": "tma_alu_op_utilization > 0.4",
         "ScaleUnit": "100%"
     },
     {
-        "BriefDescription": "This metric estimates fraction of cycles where the Advanced Matrix Extensions (AMX) execution engine was busy with tile (arithmetic) operations",
+        "BriefDescription": "This metric estimates fraction of cycles where the Advanced Matrix eXtensions (AMX) execution engine was busy with tile (arithmetic) operations",
         "MetricExpr": "EXE.AMX_BUSY / tma_info_core_core_clks",
-        "MetricGroup": "Compute;HPC;Server;TopdownL5;tma_L5_group;tma_ports_utilized_0_group",
+        "MetricGroup": "Compute;HPC;Server;TopdownL3;tma_L3_group;tma_core_bound_group",
         "MetricName": "tma_amx_busy",
-        "MetricThreshold": "tma_amx_busy > 0.5 & (tma_ports_utilized_0 > 0.2 & (tma_ports_utilization > 0.15 & (tma_core_bound > 0.1 & tma_backend_bound > 0.2)))",
+        "MetricThreshold": "tma_amx_busy > 0.5 & (tma_core_bound > 0.1 & tma_backend_bound > 0.2)",
         "ScaleUnit": "100%"
     },
     {
         "BriefDescription": "This metric estimates fraction of slots the CPU retired uops delivered by the Microcode_Sequencer as a result of Assists",
-        "MetricExpr": "100 * cpu@ASSISTS.ANY\\,umask\\=0x1B@ / tma_info_thread_slots",
+        "MetricExpr": "78 * ASSISTS.ANY / tma_info_thread_slots",
         "MetricGroup": "TopdownL4;tma_L4_group;tma_microcode_sequencer_group",
         "MetricName": "tma_assists",
         "MetricThreshold": "tma_assists > 0.1 & (tma_microcode_sequencer > 0.05 & tma_heavy_operations > 0.1)",
         "PublicDescription": "This metric represents fraction of cycles the CPU was stalled due to Branch Resteers. Branch Resteers estimates the Frontend delay in fetching operations from corrected path; following all sorts of miss-predicted branches. For example; branchy code with lots of miss-predictions might get categorized under Branch Resteers. Note the value of this node may overlap with its siblings. Sample with: BR_MISP_RETIRED.ALL_BRANCHES",
         "ScaleUnit": "100%"
     },
+    {
+        "BriefDescription": "This metric represents fraction of cycles the CPU was stalled due staying in C0.1 power-performance optimized state (Faster wakeup time; Smaller power savings).",
+        "MetricExpr": "CPU_CLK_UNHALTED.C01 / tma_info_thread_clks",
+        "MetricGroup": "C0Wait;TopdownL4;tma_L4_group;tma_serializing_operation_group",
+        "MetricName": "tma_c01_wait",
+        "MetricThreshold": "tma_c01_wait > 0.05 & (tma_serializing_operation > 0.1 & (tma_core_bound > 0.1 & tma_backend_bound > 0.2))",
+        "ScaleUnit": "100%"
+    },
+    {
+        "BriefDescription": "This metric represents fraction of cycles the CPU was stalled due staying in C0.2 power-performance optimized state (Slower wakeup time; Larger power savings).",
+        "MetricExpr": "CPU_CLK_UNHALTED.C02 / tma_info_thread_clks",
+        "MetricGroup": "C0Wait;TopdownL4;tma_L4_group;tma_serializing_operation_group",
+        "MetricName": "tma_c02_wait",
+        "MetricThreshold": "tma_c02_wait > 0.05 & (tma_serializing_operation > 0.1 & (tma_core_bound > 0.1 & tma_backend_bound > 0.2))",
+        "ScaleUnit": "100%"
+    },
     {
         "BriefDescription": "This metric estimates fraction of cycles the CPU retired uops originated from CISC (complex instruction set computer) instruction",
         "MetricExpr": "max(0, tma_microcode_sequencer - tma_assists)",
     },
     {
         "BriefDescription": "This metric estimates fraction of cycles while the memory subsystem was handling synchronizations due to contested accesses",
-        "MetricExpr": "(76 * tma_info_system_average_frequency * (MEM_LOAD_L3_HIT_RETIRED.XSNP_FWD * (OCR.DEMAND_DATA_RD.L3_HIT.SNOOP_HITM / (OCR.DEMAND_DATA_RD.L3_HIT.SNOOP_HITM + OCR.DEMAND_DATA_RD.L3_HIT.SNOOP_HIT_WITH_FWD))) + 75.5 * tma_info_system_average_frequency * MEM_LOAD_L3_HIT_RETIRED.XSNP_MISS) * (1 + MEM_LOAD_RETIRED.FB_HIT / MEM_LOAD_RETIRED.L1_MISS / 2) / tma_info_thread_clks",
+        "MetricExpr": "(76 * tma_info_system_core_frequency * (MEM_LOAD_L3_HIT_RETIRED.XSNP_FWD * (OCR.DEMAND_DATA_RD.L3_HIT.SNOOP_HITM / (OCR.DEMAND_DATA_RD.L3_HIT.SNOOP_HITM + OCR.DEMAND_DATA_RD.L3_HIT.SNOOP_HIT_WITH_FWD))) + 75.5 * tma_info_system_core_frequency * MEM_LOAD_L3_HIT_RETIRED.XSNP_MISS) * (1 + MEM_LOAD_RETIRED.FB_HIT / MEM_LOAD_RETIRED.L1_MISS / 2) / tma_info_thread_clks",
         "MetricGroup": "DataSharing;Offcore;Snoop;TopdownL4;tma_L4_group;tma_issueSyncxn;tma_l3_bound_group",
         "MetricName": "tma_contested_accesses",
         "MetricThreshold": "tma_contested_accesses > 0.05 & (tma_l3_bound > 0.05 & (tma_memory_bound > 0.2 & tma_backend_bound > 0.2))",
     },
     {
         "BriefDescription": "This metric estimates fraction of cycles while the memory subsystem was handling synchronizations due to data-sharing accesses",
-        "MetricExpr": "75.5 * tma_info_system_average_frequency * (MEM_LOAD_L3_HIT_RETIRED.XSNP_NO_FWD + MEM_LOAD_L3_HIT_RETIRED.XSNP_FWD * (1 - OCR.DEMAND_DATA_RD.L3_HIT.SNOOP_HITM / (OCR.DEMAND_DATA_RD.L3_HIT.SNOOP_HITM + OCR.DEMAND_DATA_RD.L3_HIT.SNOOP_HIT_WITH_FWD))) * (1 + MEM_LOAD_RETIRED.FB_HIT / MEM_LOAD_RETIRED.L1_MISS / 2) / tma_info_thread_clks",
+        "MetricExpr": "75.5 * tma_info_system_core_frequency * (MEM_LOAD_L3_HIT_RETIRED.XSNP_NO_FWD + MEM_LOAD_L3_HIT_RETIRED.XSNP_FWD * (1 - OCR.DEMAND_DATA_RD.L3_HIT.SNOOP_HITM / (OCR.DEMAND_DATA_RD.L3_HIT.SNOOP_HITM + OCR.DEMAND_DATA_RD.L3_HIT.SNOOP_HIT_WITH_FWD))) * (1 + MEM_LOAD_RETIRED.FB_HIT / MEM_LOAD_RETIRED.L1_MISS / 2) / tma_info_thread_clks",
         "MetricGroup": "Offcore;Snoop;TopdownL4;tma_L4_group;tma_issueSyncxn;tma_l3_bound_group",
         "MetricName": "tma_data_sharing",
         "MetricThreshold": "tma_data_sharing > 0.05 & (tma_l3_bound > 0.05 & (tma_memory_bound > 0.2 & tma_backend_bound > 0.2))",
         "MetricExpr": "(cpu@INST_DECODED.DECODERS\\,cmask\\=1@ - cpu@INST_DECODED.DECODERS\\,cmask\\=2@) / tma_info_core_core_clks / 2",
         "MetricGroup": "DSBmiss;FetchBW;TopdownL4;tma_L4_group;tma_issueD0;tma_mite_group",
         "MetricName": "tma_decoder0_alone",
-        "MetricThreshold": "tma_decoder0_alone > 0.1 & (tma_mite > 0.1 & (tma_fetch_bandwidth > 0.1 & tma_frontend_bound > 0.15 & tma_info_thread_ipc / 6 > 0.35))",
+        "MetricThreshold": "tma_decoder0_alone > 0.1 & (tma_mite > 0.1 & tma_fetch_bandwidth > 0.2)",
         "PublicDescription": "This metric represents fraction of cycles where decoder-0 was the only active decoder. Related metrics: tma_few_uops_instructions",
         "ScaleUnit": "100%"
     },
         "MetricExpr": "(IDQ.DSB_CYCLES_ANY - IDQ.DSB_CYCLES_OK) / tma_info_core_core_clks / 2",
         "MetricGroup": "DSB;FetchBW;TopdownL3;tma_L3_group;tma_fetch_bandwidth_group",
         "MetricName": "tma_dsb",
-        "MetricThreshold": "tma_dsb > 0.15 & (tma_fetch_bandwidth > 0.1 & tma_frontend_bound > 0.15 & tma_info_thread_ipc / 6 > 0.35)",
+        "MetricThreshold": "tma_dsb > 0.15 & tma_fetch_bandwidth > 0.2",
         "PublicDescription": "This metric represents Core fraction of cycles in which CPU was likely limited due to DSB (decoded uop cache) fetch pipeline.  For example; inefficient utilization of the DSB cache structure or bank conflict when reading from it; are categorized here.",
         "ScaleUnit": "100%"
     },
         "MetricGroup": "MemoryTLB;TopdownL4;tma_L4_group;tma_issueTLB;tma_l1_bound_group",
         "MetricName": "tma_dtlb_load",
         "MetricThreshold": "tma_dtlb_load > 0.1 & (tma_l1_bound > 0.1 & (tma_memory_bound > 0.2 & tma_backend_bound > 0.2))",
-        "PublicDescription": "This metric roughly estimates the fraction of cycles where the Data TLB (DTLB) was missed by load accesses. TLBs (Translation Look-aside Buffers) are processor caches for recently used entries out of the Page Tables that are used to map virtual- to physical-addresses by the operating system. This metric approximates the potential delay of demand loads missing the first-level data TLB (assuming worst case scenario with back to back misses to different pages). This includes hitting in the second-level TLB (STLB) as well as performing a hardware page walk on an STLB miss. Sample with: MEM_INST_RETIRED.STLB_MISS_LOADS_PS. Related metrics: tma_dtlb_store, tma_info_bottleneck_memory_data_tlbs",
+        "PublicDescription": "This metric roughly estimates the fraction of cycles where the Data TLB (DTLB) was missed by load accesses. TLBs (Translation Look-aside Buffers) are processor caches for recently used entries out of the Page Tables that are used to map virtual- to physical-addresses by the operating system. This metric approximates the potential delay of demand loads missing the first-level data TLB (assuming worst case scenario with back to back misses to different pages). This includes hitting in the second-level TLB (STLB) as well as performing a hardware page walk on an STLB miss. Sample with: MEM_INST_RETIRED.STLB_MISS_LOADS_PS. Related metrics: tma_dtlb_store, tma_info_bottleneck_memory_data_tlbs, tma_info_bottleneck_memory_synchronization",
         "ScaleUnit": "100%"
     },
     {
         "MetricGroup": "MemoryTLB;TopdownL4;tma_L4_group;tma_issueTLB;tma_store_bound_group",
         "MetricName": "tma_dtlb_store",
         "MetricThreshold": "tma_dtlb_store > 0.05 & (tma_store_bound > 0.2 & (tma_memory_bound > 0.2 & tma_backend_bound > 0.2))",
-        "PublicDescription": "This metric roughly estimates the fraction of cycles spent handling first-level data TLB store misses.  As with ordinary data caching; focus on improving data locality and reducing working-set size to reduce DTLB overhead.  Additionally; consider using profile-guided optimization (PGO) to collocate frequently-used data on the same page.  Try using larger page sizes for large amounts of frequently-used data. Sample with: MEM_INST_RETIRED.STLB_MISS_STORES_PS. Related metrics: tma_dtlb_load, tma_info_bottleneck_memory_data_tlbs",
+        "PublicDescription": "This metric roughly estimates the fraction of cycles spent handling first-level data TLB store misses.  As with ordinary data caching; focus on improving data locality and reducing working-set size to reduce DTLB overhead.  Additionally; consider using profile-guided optimization (PGO) to collocate frequently-used data on the same page.  Try using larger page sizes for large amounts of frequently-used data. Sample with: MEM_INST_RETIRED.STLB_MISS_STORES_PS. Related metrics: tma_dtlb_load, tma_info_bottleneck_memory_data_tlbs, tma_info_bottleneck_memory_synchronization",
         "ScaleUnit": "100%"
     },
     {
         "BriefDescription": "This metric roughly estimates how often CPU was handling synchronizations due to False Sharing",
-        "MetricExpr": "80 * tma_info_system_average_frequency * OCR.DEMAND_RFO.L3_HIT.SNOOP_HITM / tma_info_thread_clks",
+        "MetricExpr": "80 * tma_info_system_core_frequency * OCR.DEMAND_RFO.L3_HIT.SNOOP_HITM / tma_info_thread_clks",
         "MetricGroup": "DataSharing;Offcore;Snoop;TopdownL4;tma_L4_group;tma_issueSyncxn;tma_store_bound_group",
         "MetricName": "tma_false_sharing",
         "MetricThreshold": "tma_false_sharing > 0.05 & (tma_store_bound > 0.2 & (tma_memory_bound > 0.2 & tma_backend_bound > 0.2))",
         "MetricGroup": "MemoryBW;TopdownL4;tma_L4_group;tma_issueBW;tma_issueSL;tma_issueSmSt;tma_l1_bound_group",
         "MetricName": "tma_fb_full",
         "MetricThreshold": "tma_fb_full > 0.3",
-        "PublicDescription": "This metric does a *rough estimation* of how often L1D Fill Buffer unavailability limited additional L1D miss memory access requests to proceed. The higher the metric value; the deeper the memory hierarchy level the misses are satisfied from (metric values >1 are valid). Often it hints on approaching bandwidth limits (to L2 cache; L3 cache or external memory). Related metrics: tma_info_bottleneck_memory_bandwidth, tma_info_system_dram_bw_use, tma_mem_bandwidth, tma_sq_full, tma_store_latency, tma_streaming_stores",
+        "PublicDescription": "This metric does a *rough estimation* of how often L1D Fill Buffer unavailability limited additional L1D miss memory access requests to proceed. The higher the metric value; the deeper the memory hierarchy level the misses are satisfied from (metric values >1 are valid). Often it hints on approaching bandwidth limits (to L2 cache; L3 cache or external memory). Related metrics: tma_info_bottleneck_cache_memory_bandwidth, tma_info_system_dram_bw_use, tma_mem_bandwidth, tma_sq_full, tma_store_latency, tma_streaming_stores",
         "ScaleUnit": "100%"
     },
     {
         "MetricExpr": "max(0, tma_frontend_bound - tma_fetch_latency)",
         "MetricGroup": "Default;FetchBW;Frontend;TmaL2;TopdownL2;tma_L2_group;tma_frontend_bound_group;tma_issueFB",
         "MetricName": "tma_fetch_bandwidth",
-        "MetricThreshold": "tma_fetch_bandwidth > 0.1 & tma_frontend_bound > 0.15 & tma_info_thread_ipc / 6 > 0.35",
+        "MetricThreshold": "tma_fetch_bandwidth > 0.2",
         "MetricgroupNoGroup": "TopdownL2;Default",
         "PublicDescription": "This metric represents fraction of slots the CPU was stalled due to Frontend bandwidth issues.  For example; inefficiencies at the instruction decoders; or restrictions for caching in the DSB (decoded uops cache) are categorized under Fetch Bandwidth. In such cases; the Frontend typically delivers suboptimal amount of uops to the Backend. Sample with: FRONTEND_RETIRED.LATENCY_GE_2_BUBBLES_GE_1_PS;FRONTEND_RETIRED.LATENCY_GE_1_PS;FRONTEND_RETIRED.LATENCY_GE_2_PS. Related metrics: tma_dsb_switches, tma_info_botlnk_l2_dsb_misses, tma_info_frontend_dsb_coverage, tma_info_inst_mix_iptb, tma_lcp",
         "ScaleUnit": "100%"
         "PublicDescription": "This metric represents fraction of slots where the CPU was retiring instructions that that are decoder into two or up to ([SNB+] four; [ADL+] five) uops. This highly-correlates with the number of uops in such instructions. Related metrics: tma_decoder0_alone",
         "ScaleUnit": "100%"
     },
-    {
-        "BriefDescription": "This metric approximates arithmetic floating-point (FP) matrix uops fraction the CPU has retired (aggregated across all supported FP datatypes in AMX engine)",
-        "MetricExpr": "cpu@AMX_OPS_RETIRED.BF16\\,cmask\\=1@ / (tma_retiring * tma_info_thread_slots)",
-        "MetricGroup": "Compute;Flops;HPC;Pipeline;Server;TopdownL4;tma_L4_group;tma_fp_arith_group",
-        "MetricName": "tma_fp_amx",
-        "MetricThreshold": "tma_fp_amx > 0.1 & (tma_fp_arith > 0.2 & tma_light_operations > 0.6)",
-        "PublicDescription": "This metric approximates arithmetic floating-point (FP) matrix uops fraction the CPU has retired (aggregated across all supported FP datatypes in AMX engine). Refer to AMX_Busy and GFLOPs metrics for actual AMX utilization and FP performance, resp.",
-        "ScaleUnit": "100%"
-    },
     {
         "BriefDescription": "This metric represents overall arithmetic floating-point (FP) operations fraction the CPU has executed (retired)",
-        "MetricExpr": "tma_x87_use + tma_fp_scalar + tma_fp_vector + tma_fp_amx",
+        "MetricExpr": "tma_x87_use + tma_fp_scalar + tma_fp_vector",
         "MetricGroup": "HPC;TopdownL3;tma_L3_group;tma_light_operations_group",
         "MetricName": "tma_fp_arith",
         "MetricThreshold": "tma_fp_arith > 0.2 & tma_light_operations > 0.6",
     },
     {
         "BriefDescription": "This metric approximates arithmetic floating-point (FP) scalar uops fraction the CPU has retired",
-        "MetricExpr": "(cpu@FP_ARITH_INST_RETIRED.SCALAR_SINGLE\\,umask\\=0x03@ + FP_ARITH_INST_RETIRED2.SCALAR) / (tma_retiring * tma_info_thread_slots)",
+        "MetricExpr": "(FP_ARITH_INST_RETIRED.SCALAR + FP_ARITH_INST_RETIRED2.SCALAR) / (tma_retiring * tma_info_thread_slots)",
         "MetricGroup": "Compute;Flops;TopdownL4;tma_L4_group;tma_fp_arith_group;tma_issue2P",
         "MetricName": "tma_fp_scalar",
         "MetricThreshold": "tma_fp_scalar > 0.1 & (tma_fp_arith > 0.2 & tma_light_operations > 0.6)",
     },
     {
         "BriefDescription": "This metric approximates arithmetic floating-point (FP) vector uops fraction the CPU has retired aggregated across all vector widths",
-        "MetricExpr": "(cpu@FP_ARITH_INST_RETIRED.128B_PACKED_DOUBLE\\,umask\\=0x3c@ + FP_ARITH_INST_RETIRED2.VECTOR) / (tma_retiring * tma_info_thread_slots)",
+        "MetricExpr": "(cpu@FP_ARITH_INST_RETIRED.128B_PACKED_DOUBLE\\,umask\\=0xfc@ + FP_ARITH_INST_RETIRED2.VECTOR) / (tma_retiring * tma_info_thread_slots)",
         "MetricGroup": "Compute;Flops;TopdownL4;tma_L4_group;tma_fp_arith_group;tma_issue2P",
         "MetricName": "tma_fp_vector",
         "MetricThreshold": "tma_fp_vector > 0.1 & (tma_fp_arith > 0.2 & tma_light_operations > 0.6)",
     {
         "BriefDescription": "This metric represents fraction of slots where the CPU was retiring fused instructions -- where one uop can represent multiple contiguous instructions",
         "MetricExpr": "tma_light_operations * INST_RETIRED.MACRO_FUSED / (tma_retiring * tma_info_thread_slots)",
-        "MetricGroup": "Pipeline;TopdownL3;tma_L3_group;tma_light_operations_group",
+        "MetricGroup": "Branches;Pipeline;TopdownL3;tma_L3_group;tma_light_operations_group",
         "MetricName": "tma_fused_instructions",
         "MetricThreshold": "tma_fused_instructions > 0.1 & tma_light_operations > 0.6",
-        "PublicDescription": "This metric represents fraction of slots where the CPU was retiring fused instructions -- where one uop can represent multiple contiguous instructions. The instruction pairs of CMP+JCC or DEC+JCC are commonly used examples.",
+        "PublicDescription": "This metric represents fraction of slots where the CPU was retiring fused instructions -- where one uop can represent multiple contiguous instructions. CMP+JCC or DEC+JCC are common examples of legacy fusions. {([MTL] Note new MOV+OP and Load+OP fusions appear under Other_Light_Ops in MTL!)}",
         "ScaleUnit": "100%"
     },
     {
         "MetricName": "tma_heavy_operations",
         "MetricThreshold": "tma_heavy_operations > 0.1",
         "MetricgroupNoGroup": "TopdownL2;Default",
-        "PublicDescription": "This metric represents fraction of slots where the CPU was retiring heavy-weight operations -- instructions that require two or more uops or micro-coded sequences. This highly-correlates with the uop length of these instructions/sequences. Sample with: UOPS_RETIRED.HEAVY",
+        "PublicDescription": "This metric represents fraction of slots where the CPU was retiring heavy-weight operations -- instructions that require two or more uops or micro-coded sequences. This highly-correlates with the uop length of these instructions/sequences. ([ICL+] Note this may overcount due to approximation using indirect events; [ADL+] .). Sample with: UOPS_RETIRED.HEAVY",
         "ScaleUnit": "100%"
     },
     {
         "BriefDescription": "This metric represents fraction of cycles the CPU was stalled due to instruction cache misses",
         "MetricExpr": "ICACHE_DATA.STALLS / tma_info_thread_clks",
-        "MetricGroup": "BigFoot;FetchLat;IcMiss;TopdownL3;tma_L3_group;tma_fetch_latency_group",
+        "MetricGroup": "BigFootprint;FetchLat;IcMiss;TopdownL3;tma_L3_group;tma_fetch_latency_group",
         "MetricName": "tma_icache_misses",
         "MetricThreshold": "tma_icache_misses > 0.05 & (tma_fetch_latency > 0.1 & tma_frontend_bound > 0.15)",
         "PublicDescription": "This metric represents fraction of cycles the CPU was stalled due to instruction cache misses. Sample with: FRONTEND_RETIRED.L2_MISS_PS;FRONTEND_RETIRED.L1I_MISS_PS",
     },
     {
         "BriefDescription": "Branch Misprediction Cost: Fraction of TMA slots wasted per non-speculative branch misprediction (retired JEClear)",
-        "MetricExpr": "(tma_branch_mispredicts + tma_fetch_latency * tma_mispredicts_resteers / (tma_branch_resteers + tma_dsb_switches + tma_icache_misses + tma_itlb_misses + tma_lcp + tma_ms_switches)) * tma_info_thread_slots / BR_MISP_RETIRED.ALL_BRANCHES",
+        "MetricExpr": "tma_info_bottleneck_mispredictions * tma_info_thread_slots / BR_MISP_RETIRED.ALL_BRANCHES / 100",
         "MetricGroup": "Bad;BrMispredicts;tma_issueBM",
         "MetricName": "tma_info_bad_spec_branch_misprediction_cost",
         "PublicDescription": "Branch Misprediction Cost: Fraction of TMA slots wasted per non-speculative branch misprediction (retired JEClear). Related metrics: tma_branch_mispredicts, tma_info_bottleneck_mispredictions, tma_mispredicts_resteers"
         "MetricName": "tma_info_bad_spec_ipmispredict",
         "MetricThreshold": "tma_info_bad_spec_ipmispredict < 200"
     },
+    {
+        "BriefDescription": "Speculative to Retired ratio of all clears (covering mispredicts and nukes)",
+        "MetricExpr": "INT_MISC.CLEARS_COUNT / (BR_MISP_RETIRED.ALL_BRANCHES + MACHINE_CLEARS.COUNT)",
+        "MetricGroup": "BrMispredicts",
+        "MetricName": "tma_info_bad_spec_spec_clears_ratio"
+    },
+    {
+        "BriefDescription": "Probability of Core Bound bottleneck hidden by SMT-profiling artifacts",
+        "MetricExpr": "(100 * (1 - max(0, topdown\\-be\\-bound / (topdown\\-fe\\-bound + topdown\\-bad\\-spec + topdown\\-retiring + topdown\\-be\\-bound) - topdown\\-mem\\-bound / (topdown\\-fe\\-bound + topdown\\-bad\\-spec + topdown\\-retiring + topdown\\-be\\-bound)) / (((cpu@EXE_ACTIVITY.3_PORTS_UTIL\\,umask\\=0x80@ + cpu@RS.EMPTY\\,umask\\=0x1@) / CPU_CLK_UNHALTED.THREAD * (CYCLE_ACTIVITY.STALLS_TOTAL - EXE_ACTIVITY.BOUND_ON_LOADS) / CPU_CLK_UNHALTED.THREAD * CPU_CLK_UNHALTED.THREAD + (EXE_ACTIVITY.1_PORTS_UTIL + topdown\\-retiring / (topdown\\-fe\\-bound + topdown\\-bad\\-spec + topdown\\-retiring + topdown\\-be\\-bound) * cpu@EXE_ACTIVITY.2_PORTS_UTIL\\,umask\\=0xc@)) / CPU_CLK_UNHALTED.THREAD if ARITH.DIV_ACTIVE < CYCLE_ACTIVITY.STALLS_TOTAL - EXE_ACTIVITY.BOUND_ON_LOADS else (EXE_ACTIVITY.1_PORTS_UTIL + topdown\\-retiring / (topdown\\-fe\\-bound + topdown\\-bad\\-spec + topdown\\-retiring + topdown\\-be\\-bound) * cpu@EXE_ACTIVITY.2_PORTS_UTIL\\,umask\\=0xc@) / CPU_CLK_UNHALTED.THREAD) if max(0, topdown\\-be\\-bound / (topdown\\-fe\\-bound + topdown\\-bad\\-spec + topdown\\-retiring + topdown\\-be\\-bound) - topdown\\-mem\\-bound / (topdown\\-fe\\-bound + topdown\\-bad\\-spec + topdown\\-retiring + topdown\\-be\\-bound)) < (((cpu@EXE_ACTIVITY.3_PORTS_UTIL\\,umask\\=0x80@ + cpu@RS.EMPTY\\,umask\\=0x1@) / CPU_CLK_UNHALTED.THREAD * (CYCLE_ACTIVITY.STALLS_TOTAL - EXE_ACTIVITY.BOUND_ON_LOADS) / CPU_CLK_UNHALTED.THREAD * CPU_CLK_UNHALTED.THREAD + (EXE_ACTIVITY.1_PORTS_UTIL + topdown\\-retiring / (topdown\\-fe\\-bound + topdown\\-bad\\-spec + topdown\\-retiring + topdown\\-be\\-bound) * cpu@EXE_ACTIVITY.2_PORTS_UTIL\\,umask\\=0xc@)) / CPU_CLK_UNHALTED.THREAD if ARITH.DIV_ACTIVE < CYCLE_ACTIVITY.STALLS_TOTAL - EXE_ACTIVITY.BOUND_ON_LOADS else (EXE_ACTIVITY.1_PORTS_UTIL + topdown\\-retiring / (topdown\\-fe\\-bound + topdown\\-bad\\-spec + topdown\\-retiring + topdown\\-be\\-bound) * cpu@EXE_ACTIVITY.2_PORTS_UTIL\\,umask\\=0xc@) / CPU_CLK_UNHALTED.THREAD) else 1) if tma_info_system_smt_2t_utilization > 0.5 else 0) + 0 * slots",
+        "MetricGroup": "Cor;SMT;TopdownL1;tma_L1_group",
+        "MetricName": "tma_info_botlnk_core_bound_likely",
+        "MetricgroupNoGroup": "TopdownL1"
+    },
+    {
+        "BriefDescription": "Total pipeline cost of DSB (uop cache) misses - subset of the Instruction_Fetch_BW Bottleneck.",
+        "MetricExpr": "100 * (100 * ((topdown\\-fetch\\-lat / (topdown\\-fe\\-bound + topdown\\-bad\\-spec + topdown\\-retiring + topdown\\-be\\-bound) - INT_MISC.UOP_DROPPING / slots) * (DSB2MITE_SWITCHES.PENALTY_CYCLES / CPU_CLK_UNHALTED.THREAD) / (ICACHE_DATA.STALLS / CPU_CLK_UNHALTED.THREAD + ICACHE_TAG.STALLS / CPU_CLK_UNHALTED.THREAD + (INT_MISC.CLEAR_RESTEER_CYCLES / CPU_CLK_UNHALTED.THREAD + INT_MISC.UNKNOWN_BRANCH_CYCLES / CPU_CLK_UNHALTED.THREAD) + min(3 * cpu@UOPS_RETIRED.MS\\,cmask\\=0x1\\,edge\\=0x1@ / (UOPS_RETIRED.SLOTS / UOPS_ISSUED.ANY) / CPU_CLK_UNHALTED.THREAD, 1) + DECODE.LCP / CPU_CLK_UNHALTED.THREAD + DSB2MITE_SWITCHES.PENALTY_CYCLES / CPU_CLK_UNHALTED.THREAD) + max(0, topdown\\-fe\\-bound / (topdown\\-fe\\-bound + topdown\\-bad\\-spec + topdown\\-retiring + topdown\\-be\\-bound) - INT_MISC.UOP_DROPPING / slots - (topdown\\-fetch\\-lat / (topdown\\-fe\\-bound + topdown\\-bad\\-spec + topdown\\-retiring + topdown\\-be\\-bound) - INT_MISC.UOP_DROPPING / slots)) * ((IDQ.MITE_CYCLES_ANY - IDQ.MITE_CYCLES_OK) / (CPU_CLK_UNHALTED.DISTRIBUTED if #SMT_on else CPU_CLK_UNHALTED.THREAD) / 2) / ((IDQ.MITE_CYCLES_ANY - IDQ.MITE_CYCLES_OK) / (CPU_CLK_UNHALTED.DISTRIBUTED if #SMT_on else CPU_CLK_UNHALTED.THREAD) / 2 + (IDQ.DSB_CYCLES_ANY - IDQ.DSB_CYCLES_OK) / (CPU_CLK_UNHALTED.DISTRIBUTED if #SMT_on else CPU_CLK_UNHALTED.THREAD) / 2)))",
+        "MetricGroup": "DSBmiss;Fed;TopdownL1;tma_L1_group",
+        "MetricName": "tma_info_botlnk_dsb_misses",
+        "MetricgroupNoGroup": "TopdownL1"
+    },
+    {
+        "BriefDescription": "Total pipeline cost of Instruction Cache misses - subset of the Big_Code Bottleneck.",
+        "MetricExpr": "100 * (100 * ((topdown\\-fetch\\-lat / (topdown\\-fe\\-bound + topdown\\-bad\\-spec + topdown\\-retiring + topdown\\-be\\-bound) - INT_MISC.UOP_DROPPING / slots) * (ICACHE_DATA.STALLS / CPU_CLK_UNHALTED.THREAD) / (ICACHE_DATA.STALLS / CPU_CLK_UNHALTED.THREAD + ICACHE_TAG.STALLS / CPU_CLK_UNHALTED.THREAD + (INT_MISC.CLEAR_RESTEER_CYCLES / CPU_CLK_UNHALTED.THREAD + INT_MISC.UNKNOWN_BRANCH_CYCLES / CPU_CLK_UNHALTED.THREAD) + min(3 * cpu@UOPS_RETIRED.MS\\,cmask\\=0x1\\,edge\\=0x1@ / (UOPS_RETIRED.SLOTS / UOPS_ISSUED.ANY) / CPU_CLK_UNHALTED.THREAD, 1) + DECODE.LCP / CPU_CLK_UNHALTED.THREAD + DSB2MITE_SWITCHES.PENALTY_CYCLES / CPU_CLK_UNHALTED.THREAD)))",
+        "MetricGroup": "Fed;FetchLat;IcMiss;TopdownL1;tma_L1_group",
+        "MetricName": "tma_info_botlnk_ic_misses",
+        "MetricgroupNoGroup": "TopdownL1"
+    },
     {
         "BriefDescription": "Probability of Core Bound bottleneck hidden by SMT-profiling artifacts",
         "MetricExpr": "(100 * (1 - tma_core_bound / tma_ports_utilization if tma_core_bound < tma_ports_utilization else 1) if tma_info_system_smt_2t_utilization > 0.5 else 0)",
         "MetricThreshold": "tma_info_botlnk_l2_ic_misses > 5",
         "PublicDescription": "Total pipeline cost of Instruction Cache misses - subset of the Big_Code Bottleneck. Related metrics: "
     },
+    {
+        "BriefDescription": "Total pipeline cost of \"useful operations\" - the baseline operations not covered by Branching_Overhead nor Irregular_Overhead.",
+        "MetricExpr": "100 * (tma_retiring - (BR_INST_RETIRED.ALL_BRANCHES + BR_INST_RETIRED.NEAR_CALL) / tma_info_thread_slots - tma_microcode_sequencer / (tma_few_uops_instructions + tma_microcode_sequencer) * (tma_assists / tma_microcode_sequencer) * tma_heavy_operations)",
+        "MetricGroup": "Ret",
+        "MetricName": "tma_info_bottleneck_base_non_br",
+        "MetricThreshold": "tma_info_bottleneck_base_non_br > 20"
+    },
     {
         "BriefDescription": "Total pipeline cost of instruction fetch related bottlenecks by large code footprint programs (i-side cache; TLB and BTB misses)",
         "MetricExpr": "100 * tma_fetch_latency * (tma_itlb_misses + tma_icache_misses + tma_unknown_branches) / (tma_branch_resteers + tma_dsb_switches + tma_icache_misses + tma_itlb_misses + tma_lcp + tma_ms_switches)",
-        "MetricGroup": "BigFoot;Fed;Frontend;IcMiss;MemoryTLB;tma_issueBC",
+        "MetricGroup": "BigFootprint;Fed;Frontend;IcMiss;MemoryTLB",
         "MetricName": "tma_info_bottleneck_big_code",
-        "MetricThreshold": "tma_info_bottleneck_big_code > 20",
-        "PublicDescription": "Total pipeline cost of instruction fetch related bottlenecks by large code footprint programs (i-side cache; TLB and BTB misses). Related metrics: tma_info_bottleneck_branching_overhead"
+        "MetricThreshold": "tma_info_bottleneck_big_code > 20"
     },
     {
         "BriefDescription": "Total pipeline cost of branch related instructions (used for program control-flow including function calls)",
-        "MetricExpr": "100 * ((BR_INST_RETIRED.COND + 3 * BR_INST_RETIRED.NEAR_CALL + (BR_INST_RETIRED.NEAR_TAKEN - BR_INST_RETIRED.COND_TAKEN - 2 * BR_INST_RETIRED.NEAR_CALL)) / tma_info_thread_slots)",
-        "MetricGroup": "Ret;tma_issueBC",
+        "MetricExpr": "100 * ((BR_INST_RETIRED.ALL_BRANCHES + BR_INST_RETIRED.NEAR_CALL) / tma_info_thread_slots)",
+        "MetricGroup": "Ret",
         "MetricName": "tma_info_bottleneck_branching_overhead",
-        "MetricThreshold": "tma_info_bottleneck_branching_overhead > 10",
-        "PublicDescription": "Total pipeline cost of branch related instructions (used for program control-flow including function calls). Related metrics: tma_info_bottleneck_big_code"
+        "MetricThreshold": "tma_info_bottleneck_branching_overhead > 5"
+    },
+    {
+        "BriefDescription": "Total pipeline cost of external Memory- or Cache-Bandwidth related bottlenecks",
+        "MetricExpr": "100 * (tma_memory_bound * (tma_dram_bound / (tma_dram_bound + tma_l1_bound + tma_l2_bound + tma_l3_bound + tma_pmm_bound + tma_store_bound)) * (tma_mem_bandwidth / (tma_mem_bandwidth + tma_mem_latency)) + tma_memory_bound * (tma_l3_bound / (tma_dram_bound + tma_l1_bound + tma_l2_bound + tma_l3_bound + tma_pmm_bound + tma_store_bound)) * (tma_sq_full / (tma_contested_accesses + tma_data_sharing + tma_l3_hit_latency + tma_sq_full)) + tma_memory_bound * (tma_l1_bound / (tma_dram_bound + tma_l1_bound + tma_l2_bound + tma_l3_bound + tma_pmm_bound + tma_store_bound)) * (tma_fb_full / (tma_dtlb_load + tma_fb_full + tma_lock_latency + tma_split_loads + tma_store_fwd_blk)))",
+        "MetricGroup": "Mem;MemoryBW;Offcore;tma_issueBW",
+        "MetricName": "tma_info_bottleneck_cache_memory_bandwidth",
+        "MetricThreshold": "tma_info_bottleneck_cache_memory_bandwidth > 20",
+        "PublicDescription": "Total pipeline cost of external Memory- or Cache-Bandwidth related bottlenecks. Related metrics: tma_fb_full, tma_info_system_dram_bw_use, tma_mem_bandwidth, tma_sq_full"
+    },
+    {
+        "BriefDescription": "Total pipeline cost of external Memory- or Cache-Latency related bottlenecks",
+        "MetricExpr": "100 * (tma_memory_bound * (tma_dram_bound / (tma_dram_bound + tma_l1_bound + tma_l2_bound + tma_l3_bound + tma_pmm_bound + tma_store_bound)) * (tma_mem_latency / (tma_mem_bandwidth + tma_mem_latency)) + tma_memory_bound * (tma_l3_bound / (tma_dram_bound + tma_l1_bound + tma_l2_bound + tma_l3_bound + tma_pmm_bound + tma_store_bound)) * (tma_l3_hit_latency / (tma_contested_accesses + tma_data_sharing + tma_l3_hit_latency + tma_sq_full)) + tma_memory_bound * tma_l2_bound / (tma_dram_bound + tma_l1_bound + tma_l2_bound + tma_l3_bound + tma_pmm_bound + tma_store_bound) + tma_memory_bound * (tma_store_bound / (tma_dram_bound + tma_l1_bound + tma_l2_bound + tma_l3_bound + tma_pmm_bound + tma_store_bound)) * (tma_store_latency / (tma_dtlb_store + tma_false_sharing + tma_split_stores + tma_store_latency + tma_streaming_stores)))",
+        "MetricGroup": "Mem;MemoryLat;Offcore;tma_issueLat",
+        "MetricName": "tma_info_bottleneck_cache_memory_latency",
+        "MetricThreshold": "tma_info_bottleneck_cache_memory_latency > 20",
+        "PublicDescription": "Total pipeline cost of external Memory- or Cache-Latency related bottlenecks. Related metrics: tma_l3_hit_latency, tma_mem_latency"
+    },
+    {
+        "BriefDescription": "Total pipeline cost when the execution is compute-bound - an estimation",
+        "MetricExpr": "100 * (tma_core_bound * tma_divider / (tma_amx_busy + tma_divider + tma_ports_utilization + tma_serializing_operation) + tma_core_bound * tma_amx_busy / (tma_amx_busy + tma_divider + tma_ports_utilization + tma_serializing_operation) + tma_core_bound * (tma_ports_utilization / (tma_amx_busy + tma_divider + tma_ports_utilization + tma_serializing_operation)) * (tma_ports_utilized_3m / (tma_ports_utilized_0 + tma_ports_utilized_1 + tma_ports_utilized_2 + tma_ports_utilized_3m)))",
+        "MetricGroup": "Cor;tma_issueComp",
+        "MetricName": "tma_info_bottleneck_compute_bound_est",
+        "MetricThreshold": "tma_info_bottleneck_compute_bound_est > 20",
+        "PublicDescription": "Total pipeline cost when the execution is compute-bound - an estimation. Covers Core Bound when High ILP as well as when long-latency execution units are busy. Related metrics: "
     },
     {
         "BriefDescription": "Total pipeline cost of instruction fetch bandwidth related bottlenecks",
-        "MetricExpr": "100 * (tma_frontend_bound - tma_fetch_latency * tma_mispredicts_resteers / (tma_branch_resteers + tma_dsb_switches + tma_icache_misses + tma_itlb_misses + tma_lcp + tma_ms_switches)) - tma_info_bottleneck_big_code",
+        "MetricExpr": "100 * (tma_frontend_bound - (1 - 10 * tma_microcode_sequencer * tma_other_mispredicts / tma_branch_mispredicts) * tma_fetch_latency * tma_mispredicts_resteers / (tma_branch_resteers + tma_dsb_switches + tma_icache_misses + tma_itlb_misses + tma_lcp + tma_ms_switches) - (1 - INST_RETIRED.REP_ITERATION / cpu@UOPS_RETIRED.MS\\,cmask\\=1@) * (tma_fetch_latency * (tma_ms_switches + tma_branch_resteers * (tma_clears_resteers + tma_mispredicts_resteers * tma_other_mispredicts / tma_branch_mispredicts) / (tma_clears_resteers + tma_mispredicts_resteers + tma_unknown_branches)) / (tma_branch_resteers + tma_dsb_switches + tma_icache_misses + tma_itlb_misses + tma_lcp + tma_ms_switches))) - tma_info_bottleneck_big_code",
         "MetricGroup": "Fed;FetchBW;Frontend",
         "MetricName": "tma_info_bottleneck_instruction_fetch_bw",
         "MetricThreshold": "tma_info_bottleneck_instruction_fetch_bw > 20"
     },
     {
-        "BriefDescription": "Total pipeline cost of (external) Memory Bandwidth related bottlenecks",
-        "MetricExpr": "100 * tma_memory_bound * (tma_dram_bound / (tma_dram_bound + tma_l1_bound + tma_l2_bound + tma_l3_bound + tma_pmm_bound + tma_store_bound) * (tma_mem_bandwidth / (tma_mem_bandwidth + tma_mem_latency)) + tma_l3_bound / (tma_dram_bound + tma_l1_bound + tma_l2_bound + tma_l3_bound + tma_pmm_bound + tma_store_bound) * (tma_sq_full / (tma_contested_accesses + tma_data_sharing + tma_l3_hit_latency + tma_sq_full))) + tma_l1_bound / (tma_dram_bound + tma_l1_bound + tma_l2_bound + tma_l3_bound + tma_pmm_bound + tma_store_bound) * (tma_fb_full / (tma_dtlb_load + tma_fb_full + tma_lock_latency + tma_split_loads + tma_store_fwd_blk))",
-        "MetricGroup": "Mem;MemoryBW;Offcore;tma_issueBW",
-        "MetricName": "tma_info_bottleneck_memory_bandwidth",
-        "MetricThreshold": "tma_info_bottleneck_memory_bandwidth > 20",
-        "PublicDescription": "Total pipeline cost of (external) Memory Bandwidth related bottlenecks. Related metrics: tma_fb_full, tma_info_system_dram_bw_use, tma_mem_bandwidth, tma_sq_full"
+        "BriefDescription": "Total pipeline cost of irregular execution (e.g",
+        "MetricExpr": "100 * ((1 - INST_RETIRED.REP_ITERATION / cpu@UOPS_RETIRED.MS\\,cmask\\=1@) * (tma_fetch_latency * (tma_ms_switches + tma_branch_resteers * (tma_clears_resteers + tma_mispredicts_resteers * tma_other_mispredicts / tma_branch_mispredicts) / (tma_clears_resteers + tma_mispredicts_resteers + tma_unknown_branches)) / (tma_branch_resteers + tma_dsb_switches + tma_icache_misses + tma_itlb_misses + tma_lcp + tma_ms_switches)) + 10 * tma_microcode_sequencer * tma_other_mispredicts / tma_branch_mispredicts * tma_branch_mispredicts + tma_machine_clears * tma_other_nukes / tma_other_nukes + tma_core_bound * (tma_serializing_operation + cpu@RS.EMPTY\\,umask\\=1@ / tma_info_thread_clks * tma_ports_utilized_0) / (tma_amx_busy + tma_divider + tma_ports_utilization + tma_serializing_operation) + tma_microcode_sequencer / (tma_few_uops_instructions + tma_microcode_sequencer) * (tma_assists / tma_microcode_sequencer) * tma_heavy_operations)",
+        "MetricGroup": "Bad;Cor;Ret;tma_issueMS",
+        "MetricName": "tma_info_bottleneck_irregular_overhead",
+        "MetricThreshold": "tma_info_bottleneck_irregular_overhead > 10",
+        "PublicDescription": "Total pipeline cost of irregular execution (e.g. FP-assists in HPC, Wait time with work imbalance multithreaded workloads, overhead in system services or virtualized environments). Related metrics: tma_microcode_sequencer, tma_ms_switches"
     },
     {
         "BriefDescription": "Total pipeline cost of Memory Address Translation related bottlenecks (data-side TLBs)",
-        "MetricExpr": "100 * tma_memory_bound * (tma_l1_bound / max(tma_memory_bound, tma_dram_bound + tma_l1_bound + tma_l2_bound + tma_l3_bound + tma_pmm_bound + tma_store_bound) * (tma_dtlb_load / max(tma_l1_bound, tma_dtlb_load + tma_fb_full + tma_lock_latency + tma_split_loads + tma_store_fwd_blk)) + tma_store_bound / (tma_dram_bound + tma_l1_bound + tma_l2_bound + tma_l3_bound + tma_pmm_bound + tma_store_bound) * (tma_dtlb_store / (tma_dtlb_store + tma_false_sharing + tma_split_stores + tma_store_latency + tma_streaming_stores)))",
+        "MetricExpr": "100 * (tma_memory_bound * (tma_l1_bound / max(tma_memory_bound, tma_dram_bound + tma_l1_bound + tma_l2_bound + tma_l3_bound + tma_pmm_bound + tma_store_bound)) * (tma_dtlb_load / max(tma_l1_bound, tma_dtlb_load + tma_fb_full + tma_lock_latency + tma_split_loads + tma_store_fwd_blk)) + tma_memory_bound * (tma_store_bound / (tma_dram_bound + tma_l1_bound + tma_l2_bound + tma_l3_bound + tma_pmm_bound + tma_store_bound)) * (tma_dtlb_store / (tma_dtlb_store + tma_false_sharing + tma_split_stores + tma_store_latency + tma_streaming_stores)))",
         "MetricGroup": "Mem;MemoryTLB;Offcore;tma_issueTLB",
         "MetricName": "tma_info_bottleneck_memory_data_tlbs",
         "MetricThreshold": "tma_info_bottleneck_memory_data_tlbs > 20",
-        "PublicDescription": "Total pipeline cost of Memory Address Translation related bottlenecks (data-side TLBs). Related metrics: tma_dtlb_load, tma_dtlb_store"
+        "PublicDescription": "Total pipeline cost of Memory Address Translation related bottlenecks (data-side TLBs). Related metrics: tma_dtlb_load, tma_dtlb_store, tma_info_bottleneck_memory_synchronization"
     },
     {
-        "BriefDescription": "Total pipeline cost of Memory Latency related bottlenecks (external memory and off-core caches)",
-        "MetricExpr": "100 * tma_memory_bound * (tma_dram_bound / (tma_dram_bound + tma_l1_bound + tma_l2_bound + tma_l3_bound + tma_pmm_bound + tma_store_bound) * (tma_mem_latency / (tma_mem_bandwidth + tma_mem_latency)) + tma_l3_bound / (tma_dram_bound + tma_l1_bound + tma_l2_bound + tma_l3_bound + tma_pmm_bound + tma_store_bound) * (tma_l3_hit_latency / (tma_contested_accesses + tma_data_sharing + tma_l3_hit_latency + tma_sq_full)) + tma_l2_bound / (tma_dram_bound + tma_l1_bound + tma_l2_bound + tma_l3_bound + tma_pmm_bound + tma_store_bound))",
-        "MetricGroup": "Mem;MemoryLat;Offcore;tma_issueLat",
-        "MetricName": "tma_info_bottleneck_memory_latency",
-        "MetricThreshold": "tma_info_bottleneck_memory_latency > 20",
-        "PublicDescription": "Total pipeline cost of Memory Latency related bottlenecks (external memory and off-core caches). Related metrics: tma_l3_hit_latency, tma_mem_latency"
+        "BriefDescription": "Total pipeline cost of Memory Synchronization related bottlenecks (data transfers and coherency updates across processors)",
+        "MetricExpr": "100 * (tma_memory_bound * (tma_dram_bound / (tma_dram_bound + tma_l1_bound + tma_l2_bound + tma_l3_bound + tma_pmm_bound + tma_store_bound) * (tma_mem_latency / (tma_mem_bandwidth + tma_mem_latency)) * tma_remote_cache / (tma_local_mem + tma_remote_cache + tma_remote_mem) + tma_l3_bound / (tma_dram_bound + tma_l1_bound + tma_l2_bound + tma_l3_bound + tma_pmm_bound + tma_store_bound) * (tma_contested_accesses + tma_data_sharing) / (tma_contested_accesses + tma_data_sharing + tma_l3_hit_latency + tma_sq_full) + tma_store_bound / (tma_dram_bound + tma_l1_bound + tma_l2_bound + tma_l3_bound + tma_pmm_bound + tma_store_bound) * tma_false_sharing / (tma_dtlb_store + tma_false_sharing + tma_split_stores + tma_store_latency + tma_streaming_stores - tma_store_latency)) + tma_machine_clears * (1 - tma_other_nukes / tma_other_nukes))",
+        "MetricGroup": "Mem;Offcore;tma_issueTLB",
+        "MetricName": "tma_info_bottleneck_memory_synchronization",
+        "MetricThreshold": "tma_info_bottleneck_memory_synchronization > 10",
+        "PublicDescription": "Total pipeline cost of Memory Synchronization related bottlenecks (data transfers and coherency updates across processors). Related metrics: tma_dtlb_load, tma_dtlb_store, tma_info_bottleneck_memory_data_tlbs"
     },
     {
         "BriefDescription": "Total pipeline cost of Branch Misprediction related bottlenecks",
-        "MetricExpr": "100 * (tma_branch_mispredicts + tma_fetch_latency * tma_mispredicts_resteers / (tma_branch_resteers + tma_dsb_switches + tma_icache_misses + tma_itlb_misses + tma_lcp + tma_ms_switches))",
+        "MetricExpr": "100 * (1 - 10 * tma_microcode_sequencer * tma_other_mispredicts / tma_branch_mispredicts) * (tma_branch_mispredicts + tma_fetch_latency * tma_mispredicts_resteers / (tma_branch_resteers + tma_dsb_switches + tma_icache_misses + tma_itlb_misses + tma_lcp + tma_ms_switches))",
         "MetricGroup": "Bad;BadSpec;BrMispredicts;tma_issueBM",
         "MetricName": "tma_info_bottleneck_mispredictions",
         "MetricThreshold": "tma_info_bottleneck_mispredictions > 20",
         "PublicDescription": "Total pipeline cost of Branch Misprediction related bottlenecks. Related metrics: tma_branch_mispredicts, tma_info_bad_spec_branch_misprediction_cost, tma_mispredicts_resteers"
     },
+    {
+        "BriefDescription": "Total pipeline cost of remaining bottlenecks (apart from those listed in the Info.Bottlenecks metrics class)",
+        "MetricExpr": "100 - (tma_info_bottleneck_big_code + tma_info_bottleneck_instruction_fetch_bw + tma_info_bottleneck_mispredictions + tma_info_bottleneck_cache_memory_bandwidth + tma_info_bottleneck_cache_memory_latency + tma_info_bottleneck_memory_data_tlbs + tma_info_bottleneck_memory_synchronization + tma_info_bottleneck_compute_bound_est + tma_info_bottleneck_irregular_overhead + tma_info_bottleneck_branching_overhead + tma_info_bottleneck_base_non_br)",
+        "MetricGroup": "Cor;Offcore",
+        "MetricName": "tma_info_bottleneck_other_bottlenecks",
+        "MetricThreshold": "tma_info_bottleneck_other_bottlenecks > 20",
+        "PublicDescription": "Total pipeline cost of remaining bottlenecks (apart from those listed in the Info.Bottlenecks metrics class). Examples include data-dependencies (Core Bound when Low ILP) and other unlisted memory-related stalls."
+    },
     {
         "BriefDescription": "Fraction of branches that are CALL or RET",
         "MetricExpr": "(BR_INST_RETIRED.NEAR_CALL + BR_INST_RETIRED.NEAR_RETURN) / BR_INST_RETIRED.ALL_BRANCHES",
     },
     {
         "BriefDescription": "Core actual clocks when any Logical Processor is active on the Physical Core",
-        "MetricExpr": "CPU_CLK_UNHALTED.DISTRIBUTED",
+        "MetricExpr": "(CPU_CLK_UNHALTED.DISTRIBUTED if #SMT_on else tma_info_thread_clks)",
         "MetricGroup": "SMT",
         "MetricName": "tma_info_core_core_clks"
     },
         "MetricGroup": "Ret;SMT;TmaL1;tma_L1_group",
         "MetricName": "tma_info_core_coreipc"
     },
+    {
+        "BriefDescription": "uops Executed per Cycle",
+        "MetricExpr": "UOPS_EXECUTED.THREAD / tma_info_thread_clks",
+        "MetricGroup": "Power",
+        "MetricName": "tma_info_core_epc"
+    },
     {
         "BriefDescription": "Floating Point Operations Per Cycle",
-        "MetricExpr": "cpu@FP_ARITH_INST_RETIRED.SCALAR_SINGLE\\,umask\\=0x03@ + FP_ARITH_INST_RETIRED2.SCALAR_HALF + 2 * (FP_ARITH_INST_RETIRED.128B_PACKED_DOUBLE + FP_ARITH_INST_RETIRED2.COMPLEX_SCALAR_HALF) + 4 * cpu@FP_ARITH_INST_RETIRED.128B_PACKED_SINGLE\\,umask\\=0x18@ + 8 * (FP_ARITH_INST_RETIRED2.128B_PACKED_HALF + cpu@FP_ARITH_INST_RETIRED.256B_PACKED_SINGLE\\,umask\\=0x60@) + 16 * (FP_ARITH_INST_RETIRED2.256B_PACKED_HALF + FP_ARITH_INST_RETIRED.512B_PACKED_SINGLE) + 32 * FP_ARITH_INST_RETIRED2.512B_PACKED_HALF + 4 * AMX_OPS_RETIRED.BF16",
+        "MetricExpr": "(FP_ARITH_INST_RETIRED.SCALAR + 2 * FP_ARITH_INST_RETIRED.128B_PACKED_DOUBLE + 4 * FP_ARITH_INST_RETIRED.4_FLOPS + 8 * FP_ARITH_INST_RETIRED.8_FLOPS + 16 * FP_ARITH_INST_RETIRED.512B_PACKED_SINGLE) / tma_info_core_core_clks",
         "MetricGroup": "Flops;Ret",
         "MetricName": "tma_info_core_flopc"
     },
         "PublicDescription": "Actual per-core usage of the Floating Point non-X87 execution units (regardless of precision or vector-width). Values > 1 are possible due to ([BDW+] Fused-Multiply Add (FMA) counting - common; [ADL+] use all of ADD/MUL/FMA in Scalar or 128/256-bit vectors - less common)."
     },
     {
-        "BriefDescription": "Instruction-Level-Parallelism (average number of uops executed when there is execution) per-core",
-        "MetricExpr": "UOPS_EXECUTED.THREAD / (UOPS_EXECUTED.CORE_CYCLES_GE_1 / 2 if #SMT_on else UOPS_EXECUTED.CORE_CYCLES_GE_1)",
+        "BriefDescription": "Instruction-Level-Parallelism (average number of uops executed when there is execution) per thread (logical-processor)",
+        "MetricExpr": "UOPS_EXECUTED.THREAD / cpu@UOPS_EXECUTED.THREAD\\,cmask\\=1@",
         "MetricGroup": "Backend;Cor;Pipeline;PortsUtil",
         "MetricName": "tma_info_core_ilp"
     },
         "MetricGroup": "IcMiss",
         "MetricName": "tma_info_frontend_l2mpki_code_all"
     },
+    {
+        "BriefDescription": "Average number of cycles the front-end was delayed due to an Unknown Branch detection",
+        "MetricExpr": "INT_MISC.UNKNOWN_BRANCH_CYCLES / cpu@INT_MISC.UNKNOWN_BRANCH_CYCLES\\,cmask\\=1\\,edge@",
+        "MetricGroup": "Fed",
+        "MetricName": "tma_info_frontend_unknown_branch_cost",
+        "PublicDescription": "Average number of cycles the front-end was delayed due to an Unknown Branch detection. See Unknown_Branches node."
+    },
     {
         "BriefDescription": "Branch instructions per taken branch.",
         "MetricExpr": "BR_INST_RETIRED.ALL_BRANCHES / BR_INST_RETIRED.NEAR_TAKEN",
     },
     {
         "BriefDescription": "Instructions per FP Arithmetic instruction (lower number means higher occurrence rate)",
-        "MetricExpr": "INST_RETIRED.ANY / (cpu@FP_ARITH_INST_RETIRED.SCALAR_SINGLE\\,umask\\=0x03@ + FP_ARITH_INST_RETIRED2.SCALAR + (cpu@FP_ARITH_INST_RETIRED.128B_PACKED_DOUBLE\\,umask\\=0x3c@ + FP_ARITH_INST_RETIRED2.VECTOR))",
+        "MetricExpr": "INST_RETIRED.ANY / (FP_ARITH_INST_RETIRED.SCALAR + FP_ARITH_INST_RETIRED2.SCALAR + (cpu@FP_ARITH_INST_RETIRED.128B_PACKED_DOUBLE\\,umask\\=0xfc@ + FP_ARITH_INST_RETIRED2.VECTOR))",
         "MetricGroup": "Flops;InsType",
         "MetricName": "tma_info_inst_mix_iparith",
         "MetricThreshold": "tma_info_inst_mix_iparith < 10",
-        "PublicDescription": "Instructions per FP Arithmetic instruction (lower number means higher occurrence rate). May undercount due to FMA double counting. Approximated prior to BDW."
-    },
-    {
-        "BriefDescription": "Instructions per FP Arithmetic AMX operation (lower number means higher occurrence rate)",
-        "MetricExpr": "INST_RETIRED.ANY / AMX_OPS_RETIRED.BF16",
-        "MetricGroup": "Flops;FpVector;InsType;Server",
-        "MetricName": "tma_info_inst_mix_iparith_amx_f16",
-        "MetricThreshold": "tma_info_inst_mix_iparith_amx_f16 < 10",
-        "PublicDescription": "Instructions per FP Arithmetic AMX operation (lower number means higher occurrence rate). Operations factored per matrices' sizes of the AMX instructions."
-    },
-    {
-        "BriefDescription": "Instructions per Integer Arithmetic AMX operation (lower number means higher occurrence rate)",
-        "MetricExpr": "INST_RETIRED.ANY / AMX_OPS_RETIRED.INT8",
-        "MetricGroup": "InsType;IntVector;Server",
-        "MetricName": "tma_info_inst_mix_iparith_amx_int8",
-        "MetricThreshold": "tma_info_inst_mix_iparith_amx_int8 < 10",
-        "PublicDescription": "Instructions per Integer Arithmetic AMX operation (lower number means higher occurrence rate). Operations factored per matrices' sizes of the AMX instructions."
+        "PublicDescription": "Instructions per FP Arithmetic instruction (lower number means higher occurrence rate). Values < 1 are possible due to intentional FMA double counting. Approximated prior to BDW."
     },
     {
         "BriefDescription": "Instructions per FP Arithmetic AVX/SSE 128-bit instruction (lower number means higher occurrence rate)",
         "MetricGroup": "Flops;FpVector;InsType",
         "MetricName": "tma_info_inst_mix_iparith_avx128",
         "MetricThreshold": "tma_info_inst_mix_iparith_avx128 < 10",
-        "PublicDescription": "Instructions per FP Arithmetic AVX/SSE 128-bit instruction (lower number means higher occurrence rate). May undercount due to FMA double counting."
+        "PublicDescription": "Instructions per FP Arithmetic AVX/SSE 128-bit instruction (lower number means higher occurrence rate). Values < 1 are possible due to intentional FMA double counting."
     },
     {
         "BriefDescription": "Instructions per FP Arithmetic AVX* 256-bit instruction (lower number means higher occurrence rate)",
         "MetricGroup": "Flops;FpVector;InsType",
         "MetricName": "tma_info_inst_mix_iparith_avx256",
         "MetricThreshold": "tma_info_inst_mix_iparith_avx256 < 10",
-        "PublicDescription": "Instructions per FP Arithmetic AVX* 256-bit instruction (lower number means higher occurrence rate). May undercount due to FMA double counting."
+        "PublicDescription": "Instructions per FP Arithmetic AVX* 256-bit instruction (lower number means higher occurrence rate). Values < 1 are possible due to intentional FMA double counting."
     },
     {
         "BriefDescription": "Instructions per FP Arithmetic AVX 512-bit instruction (lower number means higher occurrence rate)",
         "MetricGroup": "Flops;FpVector;InsType",
         "MetricName": "tma_info_inst_mix_iparith_avx512",
         "MetricThreshold": "tma_info_inst_mix_iparith_avx512 < 10",
-        "PublicDescription": "Instructions per FP Arithmetic AVX 512-bit instruction (lower number means higher occurrence rate). May undercount due to FMA double counting."
+        "PublicDescription": "Instructions per FP Arithmetic AVX 512-bit instruction (lower number means higher occurrence rate). Values < 1 are possible due to intentional FMA double counting."
     },
     {
         "BriefDescription": "Instructions per FP Arithmetic Scalar Double-Precision instruction (lower number means higher occurrence rate)",
         "MetricGroup": "Flops;FpScalar;InsType",
         "MetricName": "tma_info_inst_mix_iparith_scalar_dp",
         "MetricThreshold": "tma_info_inst_mix_iparith_scalar_dp < 10",
-        "PublicDescription": "Instructions per FP Arithmetic Scalar Double-Precision instruction (lower number means higher occurrence rate). May undercount due to FMA double counting."
+        "PublicDescription": "Instructions per FP Arithmetic Scalar Double-Precision instruction (lower number means higher occurrence rate). Values < 1 are possible due to intentional FMA double counting."
+    },
+    {
+        "BriefDescription": "Instructions per FP Arithmetic Scalar Half-Precision instruction (lower number means higher occurrence rate)",
+        "MetricExpr": "INST_RETIRED.ANY / FP_ARITH_INST_RETIRED2.SCALAR",
+        "MetricGroup": "Flops;FpScalar;InsType;Server",
+        "MetricName": "tma_info_inst_mix_iparith_scalar_hp",
+        "MetricThreshold": "tma_info_inst_mix_iparith_scalar_hp < 10",
+        "PublicDescription": "Instructions per FP Arithmetic Scalar Half-Precision instruction (lower number means higher occurrence rate). Values < 1 are possible due to intentional FMA double counting."
     },
     {
         "BriefDescription": "Instructions per FP Arithmetic Scalar Single-Precision instruction (lower number means higher occurrence rate)",
         "MetricGroup": "Flops;FpScalar;InsType",
         "MetricName": "tma_info_inst_mix_iparith_scalar_sp",
         "MetricThreshold": "tma_info_inst_mix_iparith_scalar_sp < 10",
-        "PublicDescription": "Instructions per FP Arithmetic Scalar Single-Precision instruction (lower number means higher occurrence rate). May undercount due to FMA double counting."
+        "PublicDescription": "Instructions per FP Arithmetic Scalar Single-Precision instruction (lower number means higher occurrence rate). Values < 1 are possible due to intentional FMA double counting."
     },
     {
         "BriefDescription": "Instructions per Branch (lower number means higher occurrence rate)",
     },
     {
         "BriefDescription": "Instructions per Floating Point (FP) Operation (lower number means higher occurrence rate)",
-        "MetricExpr": "INST_RETIRED.ANY / tma_info_core_flopc",
+        "MetricExpr": "INST_RETIRED.ANY / (FP_ARITH_INST_RETIRED.SCALAR + 2 * FP_ARITH_INST_RETIRED.128B_PACKED_DOUBLE + 4 * FP_ARITH_INST_RETIRED.4_FLOPS + 8 * FP_ARITH_INST_RETIRED.8_FLOPS + 16 * FP_ARITH_INST_RETIRED.512B_PACKED_SINGLE)",
         "MetricGroup": "Flops;InsType",
         "MetricName": "tma_info_inst_mix_ipflop",
         "MetricThreshold": "tma_info_inst_mix_ipflop < 10"
         "MetricName": "tma_info_inst_mix_ipload",
         "MetricThreshold": "tma_info_inst_mix_ipload < 3"
     },
+    {
+        "BriefDescription": "Instructions per PAUSE (lower number means higher occurrence rate)",
+        "MetricExpr": "tma_info_inst_mix_instructions / CPU_CLK_UNHALTED.PAUSE_INST",
+        "MetricGroup": "Flops;FpVector;InsType",
+        "MetricName": "tma_info_inst_mix_ippause"
+    },
     {
         "BriefDescription": "Instructions per Store (lower number means higher occurrence rate)",
         "MetricExpr": "INST_RETIRED.ANY / MEM_INST_RETIRED.ALL_STORES",
         "MetricThreshold": "tma_info_inst_mix_iptb < 13",
         "PublicDescription": "Instruction per taken branch. Related metrics: tma_dsb_switches, tma_fetch_bandwidth, tma_info_botlnk_l2_dsb_misses, tma_info_frontend_dsb_coverage, tma_lcp"
     },
+    {
+        "BriefDescription": "\"Bus lock\" per kilo instruction",
+        "MetricExpr": "tma_info_memory_mix_bus_lock_pki",
+        "MetricGroup": "Mem;TopdownL1;tma_L1_group",
+        "MetricName": "tma_info_memory_bus_lock_pki",
+        "MetricgroupNoGroup": "TopdownL1"
+    },
+    {
+        "BriefDescription": "STLB (2nd level TLB) code speculative misses per kilo instruction (misses of any page-size that complete the page walk)",
+        "MetricExpr": "tma_info_memory_tlb_code_stlb_mpki",
+        "MetricGroup": "Fed;MemoryTLB;TopdownL1;tma_L1_group",
+        "MetricName": "tma_info_memory_code_stlb_mpki",
+        "MetricgroupNoGroup": "TopdownL1"
+    },
     {
         "BriefDescription": "Average per-core data fill bandwidth to the L1 data cache [GB / sec]",
-        "MetricExpr": "64 * L1D.REPLACEMENT / 1e9 / duration_time",
+        "MetricExpr": "tma_info_memory_l1d_cache_fill_bw",
         "MetricGroup": "Mem;MemoryBW",
-        "MetricName": "tma_info_memory_core_l1d_cache_fill_bw"
+        "MetricName": "tma_info_memory_core_l1d_cache_fill_bw_2t"
     },
     {
         "BriefDescription": "Average per-core data fill bandwidth to the L2 cache [GB / sec]",
-        "MetricExpr": "64 * L2_LINES_IN.ALL / 1e9 / duration_time",
+        "MetricExpr": "tma_info_memory_l2_cache_fill_bw",
         "MetricGroup": "Mem;MemoryBW",
-        "MetricName": "tma_info_memory_core_l2_cache_fill_bw"
+        "MetricName": "tma_info_memory_core_l2_cache_fill_bw_2t"
     },
     {
         "BriefDescription": "Rate of non silent evictions from the L2 cache per Kilo instruction",
     },
     {
         "BriefDescription": "Average per-core data access bandwidth to the L3 cache [GB / sec]",
-        "MetricExpr": "64 * OFFCORE_REQUESTS.ALL_REQUESTS / 1e9 / duration_time",
+        "MetricExpr": "tma_info_memory_l3_cache_access_bw",
         "MetricGroup": "Mem;MemoryBW;Offcore",
-        "MetricName": "tma_info_memory_core_l3_cache_access_bw"
+        "MetricName": "tma_info_memory_core_l3_cache_access_bw_2t"
     },
     {
         "BriefDescription": "Average per-core data fill bandwidth to the L3 cache [GB / sec]",
-        "MetricExpr": "64 * LONGEST_LAT_CACHE.MISS / 1e9 / duration_time",
+        "MetricExpr": "tma_info_memory_l3_cache_fill_bw",
         "MetricGroup": "Mem;MemoryBW",
-        "MetricName": "tma_info_memory_core_l3_cache_fill_bw"
+        "MetricName": "tma_info_memory_core_l3_cache_fill_bw_2t"
+    },
+    {
+        "BriefDescription": "Average Parallel L2 cache miss data reads",
+        "MetricExpr": "tma_info_memory_latency_data_l2_mlp",
+        "MetricGroup": "Memory_BW;Offcore;TopdownL1;tma_L1_group",
+        "MetricName": "tma_info_memory_data_l2_mlp",
+        "MetricgroupNoGroup": "TopdownL1"
     },
     {
         "BriefDescription": "Fill Buffer (FB) hits per kilo instructions for retired demand loads (L1D misses that merge into ongoing miss-handling entries)",
         "MetricExpr": "1e3 * MEM_LOAD_RETIRED.FB_HIT / INST_RETIRED.ANY",
-        "MetricGroup": "CacheMisses;Mem",
+        "MetricGroup": "CacheHits;Mem",
         "MetricName": "tma_info_memory_fb_hpki"
     },
+    {
+        "BriefDescription": "",
+        "MetricExpr": "64 * L1D.REPLACEMENT / 1e9 / duration_time",
+        "MetricGroup": "Mem;MemoryBW",
+        "MetricName": "tma_info_memory_l1d_cache_fill_bw"
+    },
+    {
+        "BriefDescription": "Average per-core data fill bandwidth to the L1 data cache [GB / sec]",
+        "MetricExpr": "64 * L1D.REPLACEMENT / 1e9 / (duration_time * 1e3 / 1e3)",
+        "MetricGroup": "Mem;MemoryBW;TopdownL1;tma_L1_group",
+        "MetricName": "tma_info_memory_l1d_cache_fill_bw_2t",
+        "MetricgroupNoGroup": "TopdownL1"
+    },
     {
         "BriefDescription": "L1 cache true misses per kilo instruction for retired demand loads",
         "MetricExpr": "1e3 * MEM_LOAD_RETIRED.L1_MISS / INST_RETIRED.ANY",
-        "MetricGroup": "CacheMisses;Mem",
+        "MetricGroup": "CacheHits;Mem",
         "MetricName": "tma_info_memory_l1mpki"
     },
     {
         "BriefDescription": "L1 cache true misses per kilo instruction for all demand loads (including speculative)",
         "MetricExpr": "1e3 * L2_RQSTS.ALL_DEMAND_DATA_RD / INST_RETIRED.ANY",
-        "MetricGroup": "CacheMisses;Mem",
+        "MetricGroup": "CacheHits;Mem",
         "MetricName": "tma_info_memory_l1mpki_load"
     },
+    {
+        "BriefDescription": "",
+        "MetricExpr": "64 * L2_LINES_IN.ALL / 1e9 / duration_time",
+        "MetricGroup": "Mem;MemoryBW",
+        "MetricName": "tma_info_memory_l2_cache_fill_bw"
+    },
+    {
+        "BriefDescription": "Average per-core data fill bandwidth to the L2 cache [GB / sec]",
+        "MetricExpr": "64 * L2_LINES_IN.ALL / 1e9 / (duration_time * 1e3 / 1e3)",
+        "MetricGroup": "Mem;MemoryBW;TopdownL1;tma_L1_group",
+        "MetricName": "tma_info_memory_l2_cache_fill_bw_2t",
+        "MetricgroupNoGroup": "TopdownL1"
+    },
+    {
+        "BriefDescription": "Rate of non silent evictions from the L2 cache per Kilo instruction",
+        "MetricExpr": "1e3 * L2_LINES_OUT.NON_SILENT / INST_RETIRED.ANY",
+        "MetricGroup": "L2Evicts;Mem;Server;TopdownL1;tma_L1_group",
+        "MetricName": "tma_info_memory_l2_evictions_nonsilent_pki",
+        "MetricgroupNoGroup": "TopdownL1"
+    },
+    {
+        "BriefDescription": "Rate of silent evictions from the L2 cache per Kilo instruction where the evicted lines are dropped (no writeback to L3 or memory)",
+        "MetricExpr": "1e3 * L2_LINES_OUT.SILENT / INST_RETIRED.ANY",
+        "MetricGroup": "L2Evicts;Mem;Server;TopdownL1;tma_L1_group",
+        "MetricName": "tma_info_memory_l2_evictions_silent_pki",
+        "MetricgroupNoGroup": "TopdownL1"
+    },
     {
         "BriefDescription": "L2 cache hits per kilo instruction for all request types (including speculative)",
         "MetricExpr": "1e3 * (L2_RQSTS.REFERENCES - L2_RQSTS.MISS) / INST_RETIRED.ANY",
-        "MetricGroup": "CacheMisses;Mem",
+        "MetricGroup": "CacheHits;Mem",
         "MetricName": "tma_info_memory_l2hpki_all"
     },
     {
         "BriefDescription": "L2 cache hits per kilo instruction for all demand loads  (including speculative)",
         "MetricExpr": "1e3 * L2_RQSTS.DEMAND_DATA_RD_HIT / INST_RETIRED.ANY",
-        "MetricGroup": "CacheMisses;Mem",
+        "MetricGroup": "CacheHits;Mem",
         "MetricName": "tma_info_memory_l2hpki_load"
     },
     {
         "BriefDescription": "L2 cache true misses per kilo instruction for retired demand loads",
         "MetricExpr": "1e3 * MEM_LOAD_RETIRED.L2_MISS / INST_RETIRED.ANY",
-        "MetricGroup": "Backend;CacheMisses;Mem",
+        "MetricGroup": "Backend;CacheHits;Mem",
         "MetricName": "tma_info_memory_l2mpki"
     },
     {
         "BriefDescription": "L2 cache ([RKL+] true) misses per kilo instruction for all request types (including speculative)",
         "MetricExpr": "1e3 * L2_RQSTS.MISS / INST_RETIRED.ANY",
-        "MetricGroup": "CacheMisses;Mem;Offcore",
+        "MetricGroup": "CacheHits;Mem;Offcore",
         "MetricName": "tma_info_memory_l2mpki_all"
     },
     {
         "BriefDescription": "L2 cache ([RKL+] true) misses per kilo instruction for all demand loads  (including speculative)",
         "MetricExpr": "1e3 * L2_RQSTS.DEMAND_DATA_RD_MISS / INST_RETIRED.ANY",
-        "MetricGroup": "CacheMisses;Mem",
+        "MetricGroup": "CacheHits;Mem",
         "MetricName": "tma_info_memory_l2mpki_load"
     },
+    {
+        "BriefDescription": "",
+        "MetricExpr": "64 * OFFCORE_REQUESTS.ALL_REQUESTS / 1e9 / duration_time",
+        "MetricGroup": "Mem;MemoryBW;Offcore",
+        "MetricName": "tma_info_memory_l3_cache_access_bw"
+    },
+    {
+        "BriefDescription": "Average per-core data access bandwidth to the L3 cache [GB / sec]",
+        "MetricExpr": "64 * OFFCORE_REQUESTS.ALL_REQUESTS / 1e9 / (duration_time * 1e3 / 1e3)",
+        "MetricGroup": "Mem;MemoryBW;Offcore;TopdownL1;tma_L1_group",
+        "MetricName": "tma_info_memory_l3_cache_access_bw_2t",
+        "MetricgroupNoGroup": "TopdownL1"
+    },
+    {
+        "BriefDescription": "",
+        "MetricExpr": "64 * LONGEST_LAT_CACHE.MISS / 1e9 / duration_time",
+        "MetricGroup": "Mem;MemoryBW",
+        "MetricName": "tma_info_memory_l3_cache_fill_bw"
+    },
+    {
+        "BriefDescription": "Average per-core data fill bandwidth to the L3 cache [GB / sec]",
+        "MetricExpr": "64 * LONGEST_LAT_CACHE.MISS / 1e9 / (duration_time * 1e3 / 1e3)",
+        "MetricGroup": "Mem;MemoryBW;TopdownL1;tma_L1_group",
+        "MetricName": "tma_info_memory_l3_cache_fill_bw_2t",
+        "MetricgroupNoGroup": "TopdownL1"
+    },
     {
         "BriefDescription": "L3 cache true misses per kilo instruction for retired demand loads",
         "MetricExpr": "1e3 * MEM_LOAD_RETIRED.L3_MISS / INST_RETIRED.ANY",
-        "MetricGroup": "CacheMisses;Mem",
+        "MetricGroup": "Mem",
         "MetricName": "tma_info_memory_l3mpki"
     },
+    {
+        "BriefDescription": "Average Parallel L2 cache miss data reads",
+        "MetricExpr": "OFFCORE_REQUESTS_OUTSTANDING.DATA_RD / OFFCORE_REQUESTS_OUTSTANDING.CYCLES_WITH_DATA_RD",
+        "MetricGroup": "Memory_BW;Offcore",
+        "MetricName": "tma_info_memory_latency_data_l2_mlp"
+    },
+    {
+        "BriefDescription": "Average Latency for L2 cache miss demand Loads",
+        "MetricExpr": "tma_info_memory_load_l2_miss_latency",
+        "MetricGroup": "Memory_Lat;Offcore",
+        "MetricName": "tma_info_memory_latency_load_l2_miss_latency"
+    },
+    {
+        "BriefDescription": "Average Parallel L2 cache miss demand Loads",
+        "MetricExpr": "OFFCORE_REQUESTS_OUTSTANDING.DEMAND_DATA_RD / cpu@OFFCORE_REQUESTS_OUTSTANDING.DEMAND_DATA_RD\\,cmask\\=1@",
+        "MetricGroup": "Memory_BW;Offcore",
+        "MetricName": "tma_info_memory_latency_load_l2_mlp"
+    },
+    {
+        "BriefDescription": "Average Latency for L3 cache miss demand Loads",
+        "MetricExpr": "tma_info_memory_load_l3_miss_latency",
+        "MetricGroup": "Memory_Lat;Offcore",
+        "MetricName": "tma_info_memory_latency_load_l3_miss_latency"
+    },
+    {
+        "BriefDescription": "Average Latency for L2 cache miss demand Loads",
+        "MetricExpr": "OFFCORE_REQUESTS_OUTSTANDING.DEMAND_DATA_RD / OFFCORE_REQUESTS.DEMAND_DATA_RD",
+        "MetricGroup": "Memory_Lat;Offcore;TopdownL1;tma_L1_group",
+        "MetricName": "tma_info_memory_load_l2_miss_latency",
+        "MetricgroupNoGroup": "TopdownL1"
+    },
+    {
+        "BriefDescription": "Average Parallel L2 cache miss demand Loads",
+        "MetricExpr": "OFFCORE_REQUESTS_OUTSTANDING.DEMAND_DATA_RD / cpu@OFFCORE_REQUESTS_OUTSTANDING.DEMAND_DATA_RD\\,cmask\\=0x1@",
+        "MetricGroup": "Memory_BW;Offcore;TopdownL1;tma_L1_group",
+        "MetricName": "tma_info_memory_load_l2_mlp",
+        "MetricgroupNoGroup": "TopdownL1"
+    },
+    {
+        "BriefDescription": "Average Latency for L3 cache miss demand Loads",
+        "MetricExpr": "OFFCORE_REQUESTS_OUTSTANDING.L3_MISS_DEMAND_DATA_RD / OFFCORE_REQUESTS.L3_MISS_DEMAND_DATA_RD",
+        "MetricGroup": "Memory_Lat;Offcore;TopdownL1;tma_L1_group",
+        "MetricName": "tma_info_memory_load_l3_miss_latency",
+        "MetricgroupNoGroup": "TopdownL1"
+    },
     {
         "BriefDescription": "Actual Average Latency for L1 data-cache miss demand load operations (in core cycles)",
         "MetricExpr": "L1D_PEND_MISS.PENDING / MEM_LOAD_COMPLETED.L1_MISS_ANY",
         "MetricGroup": "Mem;MemoryBound;MemoryLat",
         "MetricName": "tma_info_memory_load_miss_real_latency"
     },
+    {
+        "BriefDescription": "STLB (2nd level TLB) data load speculative misses per kilo instruction (misses of any page-size that complete the page walk)",
+        "MetricExpr": "tma_info_memory_tlb_load_stlb_mpki",
+        "MetricGroup": "Mem;MemoryTLB;TopdownL1;tma_L1_group",
+        "MetricName": "tma_info_memory_load_stlb_mpki",
+        "MetricgroupNoGroup": "TopdownL1"
+    },
+    {
+        "BriefDescription": "\"Bus lock\" per kilo instruction",
+        "MetricExpr": "1e3 * SQ_MISC.BUS_LOCK / INST_RETIRED.ANY",
+        "MetricGroup": "Mem",
+        "MetricName": "tma_info_memory_mix_bus_lock_pki"
+    },
+    {
+        "BriefDescription": "Off-core accesses per kilo instruction for modified write requests",
+        "MetricExpr": "1e3 * OCR.MODIFIED_WRITE.ANY_RESPONSE / tma_info_inst_mix_instructions",
+        "MetricGroup": "Offcore",
+        "MetricName": "tma_info_memory_mix_offcore_mwrite_any_pki"
+    },
+    {
+        "BriefDescription": "Off-core accesses per kilo instruction for reads-to-core requests (speculative; including in-core HW prefetches)",
+        "MetricExpr": "1e3 * OCR.READS_TO_CORE.ANY_RESPONSE / tma_info_inst_mix_instructions",
+        "MetricGroup": "CacheHits;Offcore",
+        "MetricName": "tma_info_memory_mix_offcore_read_any_pki"
+    },
+    {
+        "BriefDescription": "L3 cache misses per kilo instruction for reads-to-core requests (speculative; including in-core HW prefetches)",
+        "MetricExpr": "1e3 * OCR.READS_TO_CORE.L3_MISS / tma_info_inst_mix_instructions",
+        "MetricGroup": "Offcore",
+        "MetricName": "tma_info_memory_mix_offcore_read_l3m_pki"
+    },
+    {
+        "BriefDescription": "Un-cacheable retired load per kilo instruction",
+        "MetricExpr": "tma_info_memory_uc_load_pki",
+        "MetricGroup": "Mem",
+        "MetricName": "tma_info_memory_mix_uc_load_pki"
+    },
     {
         "BriefDescription": "Memory-Level-Parallelism (average number of L1 miss demand load when there is at least one such miss",
         "MetricExpr": "L1D_PEND_MISS.PENDING / L1D_PEND_MISS.PENDING_CYCLES",
         "PublicDescription": "Memory-Level-Parallelism (average number of L1 miss demand load when there is at least one such miss. Per-Logical Processor)"
     },
     {
-        "BriefDescription": "Average Parallel L2 cache miss data reads",
-        "MetricExpr": "OFFCORE_REQUESTS_OUTSTANDING.ALL_DATA_RD / OFFCORE_REQUESTS_OUTSTANDING.CYCLES_WITH_DATA_RD",
-        "MetricGroup": "Memory_BW;Offcore",
-        "MetricName": "tma_info_memory_oro_data_l2_mlp"
+        "BriefDescription": "Off-core accesses per kilo instruction for modified write requests",
+        "MetricExpr": "1e3 * OCR.MODIFIED_WRITE.ANY_RESPONSE / INST_RETIRED.ANY",
+        "MetricGroup": "Offcore;TopdownL1;tma_L1_group",
+        "MetricName": "tma_info_memory_offcore_mwrite_any_pki",
+        "MetricgroupNoGroup": "TopdownL1"
     },
     {
-        "BriefDescription": "Average Latency for L2 cache miss demand Loads",
-        "MetricExpr": "OFFCORE_REQUESTS_OUTSTANDING.DEMAND_DATA_RD / OFFCORE_REQUESTS.DEMAND_DATA_RD",
-        "MetricGroup": "Memory_Lat;Offcore",
-        "MetricName": "tma_info_memory_oro_load_l2_miss_latency"
+        "BriefDescription": "Off-core accesses per kilo instruction for reads-to-core requests (speculative; including in-core HW prefetches)",
+        "MetricExpr": "1e3 * OCR.READS_TO_CORE.ANY_RESPONSE / INST_RETIRED.ANY",
+        "MetricGroup": "CacheHits;Offcore;TopdownL1;tma_L1_group",
+        "MetricName": "tma_info_memory_offcore_read_any_pki",
+        "MetricgroupNoGroup": "TopdownL1"
     },
     {
-        "BriefDescription": "Average Parallel L2 cache miss demand Loads",
-        "MetricExpr": "OFFCORE_REQUESTS_OUTSTANDING.DEMAND_DATA_RD / cpu@OFFCORE_REQUESTS_OUTSTANDING.DEMAND_DATA_RD\\,cmask\\=1@",
-        "MetricGroup": "Memory_BW;Offcore",
-        "MetricName": "tma_info_memory_oro_load_l2_mlp"
+        "BriefDescription": "L3 cache misses per kilo instruction for reads-to-core requests (speculative; including in-core HW prefetches)",
+        "MetricExpr": "1e3 * OCR.READS_TO_CORE.L3_MISS / INST_RETIRED.ANY",
+        "MetricGroup": "Offcore;TopdownL1;tma_L1_group",
+        "MetricName": "tma_info_memory_offcore_read_l3m_pki",
+        "MetricgroupNoGroup": "TopdownL1"
     },
     {
-        "BriefDescription": "Average Latency for L3 cache miss demand Loads",
-        "MetricExpr": "OFFCORE_REQUESTS_OUTSTANDING.L3_MISS_DEMAND_DATA_RD / OFFCORE_REQUESTS.L3_MISS_DEMAND_DATA_RD",
-        "MetricGroup": "Memory_Lat;Offcore",
-        "MetricName": "tma_info_memory_oro_load_l3_miss_latency"
+        "BriefDescription": "Utilization of the core's Page Walker(s) serving STLB misses triggered by instruction/Load/Store accesses",
+        "MetricExpr": "(ITLB_MISSES.WALK_PENDING + DTLB_LOAD_MISSES.WALK_PENDING + DTLB_STORE_MISSES.WALK_PENDING) / (4 * (CPU_CLK_UNHALTED.DISTRIBUTED if #SMT_on else CPU_CLK_UNHALTED.THREAD))",
+        "MetricGroup": "Mem;MemoryTLB;TopdownL1;tma_L1_group",
+        "MetricName": "tma_info_memory_page_walks_utilization",
+        "MetricgroupNoGroup": "TopdownL1"
     },
     {
-        "BriefDescription": "Average per-thread data fill bandwidth to the L1 data cache [GB / sec]",
-        "MetricExpr": "tma_info_memory_core_l1d_cache_fill_bw",
-        "MetricGroup": "Mem;MemoryBW",
-        "MetricName": "tma_info_memory_thread_l1d_cache_fill_bw_1t"
+        "BriefDescription": "Average DRAM BW for Reads-to-Core (R2C) covering for memory attached to local- and remote-socket",
+        "MetricExpr": "64 * OCR.READS_TO_CORE.DRAM / 1e9 / (duration_time * 1e3 / 1e3)",
+        "MetricGroup": "HPC;Mem;MemoryBW;SoC;TopdownL1;tma_L1_group",
+        "MetricName": "tma_info_memory_r2c_dram_bw",
+        "MetricgroupNoGroup": "TopdownL1",
+        "PublicDescription": "Average DRAM BW for Reads-to-Core (R2C) covering for memory attached to local- and remote-socket. See R2C_Offcore_BW."
     },
     {
-        "BriefDescription": "Average per-thread data fill bandwidth to the L2 cache [GB / sec]",
-        "MetricExpr": "tma_info_memory_core_l2_cache_fill_bw",
-        "MetricGroup": "Mem;MemoryBW",
-        "MetricName": "tma_info_memory_thread_l2_cache_fill_bw_1t"
+        "BriefDescription": "Average L3-cache miss BW for Reads-to-Core (R2C)",
+        "MetricExpr": "64 * OCR.READS_TO_CORE.L3_MISS / 1e9 / (duration_time * 1e3 / 1e3)",
+        "MetricGroup": "HPC;Mem;MemoryBW;SoC;TopdownL1;tma_L1_group",
+        "MetricName": "tma_info_memory_r2c_l3m_bw",
+        "MetricgroupNoGroup": "TopdownL1",
+        "PublicDescription": "Average L3-cache miss BW for Reads-to-Core (R2C). This covering going to DRAM or other memory off-chip memory tears. See R2C_Offcore_BW."
     },
     {
-        "BriefDescription": "Average per-thread data access bandwidth to the L3 cache [GB / sec]",
-        "MetricExpr": "tma_info_memory_core_l3_cache_access_bw",
-        "MetricGroup": "Mem;MemoryBW;Offcore",
-        "MetricName": "tma_info_memory_thread_l3_cache_access_bw_1t"
+        "BriefDescription": "Average Off-core access BW for Reads-to-Core (R2C)",
+        "MetricExpr": "64 * OCR.READS_TO_CORE.ANY_RESPONSE / 1e9 / (duration_time * 1e3 / 1e3)",
+        "MetricGroup": "HPC;Mem;MemoryBW;SoC;TopdownL1;tma_L1_group",
+        "MetricName": "tma_info_memory_r2c_offcore_bw",
+        "MetricgroupNoGroup": "TopdownL1",
+        "PublicDescription": "Average Off-core access BW for Reads-to-Core (R2C). R2C account for demand or prefetch load/RFO/code access that fill data into the Core caches."
     },
     {
-        "BriefDescription": "Average per-thread data fill bandwidth to the L3 cache [GB / sec]",
-        "MetricExpr": "tma_info_memory_core_l3_cache_fill_bw",
-        "MetricGroup": "Mem;MemoryBW",
-        "MetricName": "tma_info_memory_thread_l3_cache_fill_bw_1t"
+        "BriefDescription": "Average DRAM BW for Reads-to-Core (R2C) covering for memory attached to local- and remote-socket",
+        "MetricExpr": "64 * OCR.READS_TO_CORE.DRAM / 1e9 / duration_time",
+        "MetricGroup": "HPC;Mem;MemoryBW;SoC",
+        "MetricName": "tma_info_memory_soc_r2c_dram_bw",
+        "PublicDescription": "Average DRAM BW for Reads-to-Core (R2C) covering for memory attached to local- and remote-socket. See R2C_Offcore_BW."
+    },
+    {
+        "BriefDescription": "Average L3-cache miss BW for Reads-to-Core (R2C)",
+        "MetricExpr": "64 * OCR.READS_TO_CORE.L3_MISS / 1e9 / duration_time",
+        "MetricGroup": "HPC;Mem;MemoryBW;SoC",
+        "MetricName": "tma_info_memory_soc_r2c_l3m_bw",
+        "PublicDescription": "Average L3-cache miss BW for Reads-to-Core (R2C). This covering going to DRAM or other memory off-chip memory tears. See R2C_Offcore_BW."
+    },
+    {
+        "BriefDescription": "Average Off-core access BW for Reads-to-Core (R2C)",
+        "MetricExpr": "64 * OCR.READS_TO_CORE.ANY_RESPONSE / 1e9 / duration_time",
+        "MetricGroup": "HPC;Mem;MemoryBW;SoC",
+        "MetricName": "tma_info_memory_soc_r2c_offcore_bw",
+        "PublicDescription": "Average Off-core access BW for Reads-to-Core (R2C). R2C account for demand or prefetch load/RFO/code access that fill data into the Core caches."
+    },
+    {
+        "BriefDescription": "STLB (2nd level TLB) data store speculative misses per kilo instruction (misses of any page-size that complete the page walk)",
+        "MetricExpr": "tma_info_memory_tlb_store_stlb_mpki",
+        "MetricGroup": "Mem;MemoryTLB;TopdownL1;tma_L1_group",
+        "MetricName": "tma_info_memory_store_stlb_mpki",
+        "MetricgroupNoGroup": "TopdownL1"
     },
     {
         "BriefDescription": "STLB (2nd level TLB) code speculative misses per kilo instruction (misses of any page-size that complete the page walk)",
         "MetricName": "tma_info_memory_tlb_store_stlb_mpki"
     },
     {
-        "BriefDescription": "Instruction-Level-Parallelism (average number of uops executed when there is execution) per-thread",
-        "MetricExpr": "UOPS_EXECUTED.THREAD / cpu@UOPS_EXECUTED.THREAD\\,cmask\\=1@",
+        "BriefDescription": "Un-cacheable retired load per kilo instruction",
+        "MetricExpr": "1e3 * MEM_LOAD_MISC_RETIRED.UC / INST_RETIRED.ANY",
+        "MetricGroup": "Mem;TopdownL1;tma_L1_group",
+        "MetricName": "tma_info_memory_uc_load_pki",
+        "MetricgroupNoGroup": "TopdownL1"
+    },
+    {
+        "BriefDescription": "",
+        "MetricExpr": "UOPS_EXECUTED.THREAD / (UOPS_EXECUTED.CORE_CYCLES_GE_1 / 2 if #SMT_on else cpu@UOPS_EXECUTED.THREAD\\,cmask\\=1@)",
         "MetricGroup": "Cor;Pipeline;PortsUtil;SMT",
         "MetricName": "tma_info_pipeline_execute"
     },
     {
         "BriefDescription": "Instructions per a microcode Assist invocation",
-        "MetricExpr": "INST_RETIRED.ANY / cpu@ASSISTS.ANY\\,umask\\=0x1B@",
-        "MetricGroup": "Pipeline;Ret;Retire",
+        "MetricExpr": "INST_RETIRED.ANY / ASSISTS.ANY",
+        "MetricGroup": "MicroSeq;Pipeline;Ret;Retire",
         "MetricName": "tma_info_pipeline_ipassist",
         "MetricThreshold": "tma_info_pipeline_ipassist < 100e3",
         "PublicDescription": "Instructions per a microcode Assist invocation. See Assists tree node for details (lower number means higher occurrence rate)"
     {
         "BriefDescription": "Estimated fraction of retirement-cycles dealing with repeat instructions",
         "MetricExpr": "INST_RETIRED.REP_ITERATION / cpu@UOPS_RETIRED.SLOTS\\,cmask\\=1@",
-        "MetricGroup": "Pipeline;Ret",
+        "MetricGroup": "MicroSeq;Pipeline;Ret",
         "MetricName": "tma_info_pipeline_strings_cycles",
         "MetricThreshold": "tma_info_pipeline_strings_cycles > 0.1"
     },
     {
-        "BriefDescription": "Measured Average Frequency for unhalted processors [GHz]",
+        "BriefDescription": "Fraction of cycles the processor is waiting yet unhalted; covering legacy PAUSE instruction, as well as C0.1 / C0.2 power-performance optimized states",
+        "MetricExpr": "CPU_CLK_UNHALTED.C0_WAIT / tma_info_thread_clks",
+        "MetricGroup": "C0Wait",
+        "MetricName": "tma_info_system_c0_wait",
+        "MetricThreshold": "tma_info_system_c0_wait > 0.05"
+    },
+    {
+        "BriefDescription": "Measured Average Core Frequency for unhalted processors [GHz]",
         "MetricExpr": "tma_info_system_turbo_utilization * TSC / 1e9 / duration_time",
         "MetricGroup": "Power;Summary",
-        "MetricName": "tma_info_system_average_frequency"
+        "MetricName": "tma_info_system_core_frequency"
     },
     {
-        "BriefDescription": "Average CPU Utilization",
+        "BriefDescription": "Average CPU Utilization (percentage)",
         "MetricExpr": "CPU_CLK_UNHALTED.REF_TSC / TSC",
         "MetricGroup": "HPC;Summary",
         "MetricName": "tma_info_system_cpu_utilization"
     },
+    {
+        "BriefDescription": "Average number of utilized CPUs",
+        "MetricExpr": "#num_cpus_online * tma_info_system_cpu_utilization",
+        "MetricGroup": "Summary",
+        "MetricName": "tma_info_system_cpus_utilized"
+    },
     {
         "BriefDescription": "Average external Memory Bandwidth Use for reads and writes [GB / sec]",
         "MetricExpr": "64 * (UNC_M_CAS_COUNT.RD + UNC_M_CAS_COUNT.WR) / 1e9 / duration_time",
-        "MetricGroup": "HPC;Mem;MemoryBW;SoC;tma_issueBW",
+        "MetricGroup": "HPC;MemOffcore;MemoryBW;SoC;tma_issueBW",
         "MetricName": "tma_info_system_dram_bw_use",
-        "PublicDescription": "Average external Memory Bandwidth Use for reads and writes [GB / sec]. Related metrics: tma_fb_full, tma_info_bottleneck_memory_bandwidth, tma_mem_bandwidth, tma_sq_full"
+        "PublicDescription": "Average external Memory Bandwidth Use for reads and writes [GB / sec]. Related metrics: tma_fb_full, tma_info_bottleneck_cache_memory_bandwidth, tma_mem_bandwidth, tma_sq_full"
     },
     {
         "BriefDescription": "Giga Floating Point Operations Per Second",
-        "MetricExpr": "tma_info_core_flopc / duration_time",
+        "MetricExpr": "(FP_ARITH_INST_RETIRED.SCALAR + 2 * FP_ARITH_INST_RETIRED.128B_PACKED_DOUBLE + 4 * FP_ARITH_INST_RETIRED.4_FLOPS + 8 * FP_ARITH_INST_RETIRED.8_FLOPS + 16 * FP_ARITH_INST_RETIRED.512B_PACKED_SINGLE) / 1e9 / duration_time",
         "MetricGroup": "Cor;Flops;HPC",
         "MetricName": "tma_info_system_gflops",
-        "PublicDescription": "Giga Floating Point Operations Per Second. Aggregate across all supported options of: FP precisions, scalar and vector instructions, vector-width and AMX engine."
+        "PublicDescription": "Giga Floating Point Operations Per Second. Aggregate across all supported options of: FP precisions, scalar and vector instructions, vector-width"
     },
     {
-        "BriefDescription": "Average IO (network or disk) Bandwidth Use for Writes [GB / sec]",
+        "BriefDescription": "Average IO (network or disk) Bandwidth Use for Reads [GB / sec]",
         "MetricExpr": "UNC_CHA_TOR_INSERTS.IO_PCIRDCUR * 64 / 1e9 / duration_time",
-        "MetricGroup": "IoBW;Mem;Server;SoC",
-        "MetricName": "tma_info_system_io_write_bw"
+        "MetricGroup": "IoBW;MemOffcore;Server;SoC",
+        "MetricName": "tma_info_system_io_read_bw",
+        "PublicDescription": "Average IO (network or disk) Bandwidth Use for Reads [GB / sec]. Bandwidth of IO reads that are initiated by end device controllers that are requesting memory from the CPU"
+    },
+    {
+        "BriefDescription": "Average IO (network or disk) Bandwidth Use for Writes [GB / sec]",
+        "MetricExpr": "(UNC_CHA_TOR_INSERTS.IO_ITOM + UNC_CHA_TOR_INSERTS.IO_ITOMCACHENEAR) * 64 / 1e9 / duration_time",
+        "MetricGroup": "IoBW;MemOffcore;Server;SoC",
+        "MetricName": "tma_info_system_io_write_bw",
+        "PublicDescription": "Average IO (network or disk) Bandwidth Use for Writes [GB / sec]. Bandwidth of IO writes that are initiated by end device controllers that are writing memory to the CPU"
     },
     {
         "BriefDescription": "Instructions per Far Branch ( Far Branches apply upon transition from application to operating system, handling interrupts, exceptions) [lower number means higher occurrence rate]",
     {
         "BriefDescription": "Average latency of data read request to external DRAM memory [in nanoseconds]",
         "MetricExpr": "1e9 * (UNC_CHA_TOR_OCCUPANCY.IA_MISS_DRD_DDR / UNC_CHA_TOR_INSERTS.IA_MISS_DRD_DDR) / uncore_cha_0@event\\=0x1@",
-        "MetricGroup": "Mem;MemoryLat;Server;SoC",
+        "MetricGroup": "MemOffcore;MemoryLat;Server;SoC",
         "MetricName": "tma_info_system_mem_dram_read_latency",
         "PublicDescription": "Average latency of data read request to external DRAM memory [in nanoseconds]. Accounts for demand loads and L1/L2 data-read prefetches"
     },
     {
         "BriefDescription": "Average latency of data read request to external 3D X-Point memory [in nanoseconds]",
         "MetricExpr": "(1e9 * (UNC_CHA_TOR_OCCUPANCY.IA_MISS_DRD_PMM / UNC_CHA_TOR_INSERTS.IA_MISS_DRD_PMM) / uncore_cha_0@event\\=0x1@ if #has_pmem > 0 else 0)",
-        "MetricGroup": "Mem;MemoryLat;Server;SoC",
+        "MetricGroup": "MemOffcore;MemoryLat;Server;SoC",
         "MetricName": "tma_info_system_mem_pmm_read_latency",
         "PublicDescription": "Average latency of data read request to external 3D X-Point memory [in nanoseconds]. Accounts for demand loads and L1/L2 data-read prefetches"
     },
     {
         "BriefDescription": "Average 3DXP Memory Bandwidth Use for reads [GB / sec]",
         "MetricExpr": "(64 * UNC_M_PMM_RPQ_INSERTS / 1e9 / duration_time if #has_pmem > 0 else 0)",
-        "MetricGroup": "Mem;MemoryBW;Server;SoC",
+        "MetricGroup": "MemOffcore;MemoryBW;Server;SoC",
         "MetricName": "tma_info_system_pmm_read_bw"
     },
     {
         "BriefDescription": "Average 3DXP Memory Bandwidth Use for Writes [GB / sec]",
         "MetricExpr": "(64 * UNC_M_PMM_WPQ_INSERTS / 1e9 / duration_time if #has_pmem > 0 else 0)",
-        "MetricGroup": "Mem;MemoryBW;Server;SoC",
+        "MetricGroup": "MemOffcore;MemoryBW;Server;SoC",
         "MetricName": "tma_info_system_pmm_write_bw"
     },
     {
         "MetricGroup": "SoC",
         "MetricName": "tma_info_system_socket_clks"
     },
-    {
-        "BriefDescription": "Tera Integer (matrix) Operations Per Second",
-        "MetricExpr": "8 * AMX_OPS_RETIRED.INT8 / 1e12 / duration_time",
-        "MetricGroup": "Cor;HPC;IntVector;Server",
-        "MetricName": "tma_info_system_tiops"
-    },
     {
         "BriefDescription": "Average Frequency Utilization relative nominal frequency",
         "MetricExpr": "tma_info_thread_clks / CPU_CLK_UNHALTED.REF_TSC",
         "MetricGroup": "Power",
         "MetricName": "tma_info_system_turbo_utilization"
     },
+    {
+        "BriefDescription": "Measured Average Uncore Frequency for the SoC [GHz]",
+        "MetricExpr": "tma_info_system_socket_clks / 1e9 / duration_time",
+        "MetricGroup": "SoC",
+        "MetricName": "tma_info_system_uncore_frequency"
+    },
     {
         "BriefDescription": "Cross-socket Ultra Path Interconnect (UPI) data transmit bandwidth for data only [MB / sec]",
         "MetricExpr": "UNC_UPI_TxL_FLITS.ALL_DATA * 64 / 9 / 1e6",
         "MetricName": "tma_info_thread_uptb",
         "MetricThreshold": "tma_info_thread_uptb < 9"
     },
-    {
-        "BriefDescription": "This metric approximates arithmetic Integer (Int) matrix uops fraction the CPU has retired (aggregated across all supported Int datatypes in AMX engine)",
-        "MetricExpr": "cpu@AMX_OPS_RETIRED.INT8\\,cmask\\=1@ / (tma_retiring * tma_info_thread_slots)",
-        "MetricGroup": "Compute;HPC;IntVector;Pipeline;Server;TopdownL4;tma_L4_group;tma_int_operations_group",
-        "MetricName": "tma_int_amx",
-        "MetricThreshold": "tma_int_amx > 0.1 & (tma_int_operations > 0.1 & tma_light_operations > 0.6)",
-        "PublicDescription": "This metric approximates arithmetic Integer (Int) matrix uops fraction the CPU has retired (aggregated across all supported Int datatypes in AMX engine). Refer to AMX_Busy and TIOPs metrics for actual AMX utilization and Int performance, resp.",
-        "ScaleUnit": "100%"
-    },
     {
         "BriefDescription": "This metric represents overall Integer (Int) select operations fraction the CPU has executed (retired)",
-        "MetricExpr": "tma_int_vector_128b + tma_int_vector_256b + tma_shuffles + tma_int_amx",
+        "MetricExpr": "tma_int_vector_128b + tma_int_vector_256b",
         "MetricGroup": "Pipeline;TopdownL3;tma_L3_group;tma_light_operations_group",
         "MetricName": "tma_int_operations",
         "MetricThreshold": "tma_int_operations > 0.1 & tma_light_operations > 0.6",
         "ScaleUnit": "100%"
     },
     {
-        "BriefDescription": "This metric represents 256-bit vector Integer ADD/SUB/SAD or VNNI (Vector Neural Network Instructions) uops fraction the CPU has retired",
+        "BriefDescription": "This metric represents 256-bit vector Integer ADD/SUB/SAD/MUL or VNNI (Vector Neural Network Instructions) uops fraction the CPU has retired",
         "MetricExpr": "(INT_VEC_RETIRED.ADD_256 + INT_VEC_RETIRED.MUL_256 + INT_VEC_RETIRED.VNNI_256) / (tma_retiring * tma_info_thread_slots)",
         "MetricGroup": "Compute;IntVector;Pipeline;TopdownL4;tma_L4_group;tma_int_operations_group;tma_issue2P",
         "MetricName": "tma_int_vector_256b",
         "MetricThreshold": "tma_int_vector_256b > 0.1 & (tma_int_operations > 0.1 & tma_light_operations > 0.6)",
-        "PublicDescription": "This metric represents 256-bit vector Integer ADD/SUB/SAD or VNNI (Vector Neural Network Instructions) uops fraction the CPU has retired. Related metrics: tma_fp_scalar, tma_fp_vector, tma_fp_vector_128b, tma_fp_vector_256b, tma_fp_vector_512b, tma_int_vector_128b, tma_port_0, tma_port_1, tma_port_5, tma_port_6, tma_ports_utilized_2",
+        "PublicDescription": "This metric represents 256-bit vector Integer ADD/SUB/SAD/MUL or VNNI (Vector Neural Network Instructions) uops fraction the CPU has retired. Related metrics: tma_fp_scalar, tma_fp_vector, tma_fp_vector_128b, tma_fp_vector_256b, tma_fp_vector_512b, tma_int_vector_128b, tma_port_0, tma_port_1, tma_port_5, tma_port_6, tma_ports_utilized_2",
         "ScaleUnit": "100%"
     },
     {
         "BriefDescription": "This metric represents fraction of cycles the CPU was stalled due to Instruction TLB (ITLB) misses",
         "MetricExpr": "ICACHE_TAG.STALLS / tma_info_thread_clks",
-        "MetricGroup": "BigFoot;FetchLat;MemoryTLB;TopdownL3;tma_L3_group;tma_fetch_latency_group",
+        "MetricGroup": "BigFootprint;FetchLat;MemoryTLB;TopdownL3;tma_L3_group;tma_fetch_latency_group",
         "MetricName": "tma_itlb_misses",
         "MetricThreshold": "tma_itlb_misses > 0.05 & (tma_fetch_latency > 0.1 & tma_frontend_bound > 0.15)",
         "PublicDescription": "This metric represents fraction of cycles the CPU was stalled due to Instruction TLB (ITLB) misses. Sample with: FRONTEND_RETIRED.STLB_MISS_PS;FRONTEND_RETIRED.ITLB_MISS_PS",
     {
         "BriefDescription": "This metric estimates how often the CPU was stalled without loads missing the L1 data cache",
         "MetricExpr": "max((EXE_ACTIVITY.BOUND_ON_LOADS - MEMORY_ACTIVITY.STALLS_L1D_MISS) / tma_info_thread_clks, 0)",
-        "MetricGroup": "CacheMisses;MemoryBound;TmaL3mem;TopdownL3;tma_L3_group;tma_issueL1;tma_issueMC;tma_memory_bound_group",
+        "MetricGroup": "CacheHits;MemoryBound;TmaL3mem;TopdownL3;tma_L3_group;tma_issueL1;tma_issueMC;tma_memory_bound_group",
         "MetricName": "tma_l1_bound",
         "MetricThreshold": "tma_l1_bound > 0.1 & (tma_memory_bound > 0.2 & tma_backend_bound > 0.2)",
         "PublicDescription": "This metric estimates how often the CPU was stalled without loads missing the L1 data cache.  The L1 data cache typically has the shortest latency.  However; in certain cases like loads blocked on older stores; a load might suffer due to high latency even though it is being satisfied by the L1. Another example is loads who miss in the TLB. These cases are characterized by execution unit stalls; while some non-completed demand load lives in the machine without having that demand load missing the L1 cache. Sample with: MEM_LOAD_RETIRED.L1_HIT_PS;MEM_LOAD_RETIRED.FB_HIT_PS. Related metrics: tma_clears_resteers, tma_machine_clears, tma_microcode_sequencer, tma_ms_switches, tma_ports_utilized_1",
     {
         "BriefDescription": "This metric estimates how often the CPU was stalled due to L2 cache accesses by loads",
         "MetricExpr": "(MEMORY_ACTIVITY.STALLS_L1D_MISS - MEMORY_ACTIVITY.STALLS_L2_MISS) / tma_info_thread_clks",
-        "MetricGroup": "CacheMisses;MemoryBound;TmaL3mem;TopdownL3;tma_L3_group;tma_memory_bound_group",
+        "MetricGroup": "CacheHits;MemoryBound;TmaL3mem;TopdownL3;tma_L3_group;tma_memory_bound_group",
         "MetricName": "tma_l2_bound",
         "MetricThreshold": "tma_l2_bound > 0.05 & (tma_memory_bound > 0.2 & tma_backend_bound > 0.2)",
         "PublicDescription": "This metric estimates how often the CPU was stalled due to L2 cache accesses by loads.  Avoiding cache misses (i.e. L1 misses/L2 hits) can improve the latency and increase performance. Sample with: MEM_LOAD_RETIRED.L2_HIT_PS",
     {
         "BriefDescription": "This metric estimates how often the CPU was stalled due to loads accesses to L3 cache or contended with a sibling Core",
         "MetricExpr": "(MEMORY_ACTIVITY.STALLS_L2_MISS - MEMORY_ACTIVITY.STALLS_L3_MISS) / tma_info_thread_clks",
-        "MetricGroup": "CacheMisses;MemoryBound;TmaL3mem;TopdownL3;tma_L3_group;tma_memory_bound_group",
+        "MetricGroup": "CacheHits;MemoryBound;TmaL3mem;TopdownL3;tma_L3_group;tma_memory_bound_group",
         "MetricName": "tma_l3_bound",
         "MetricThreshold": "tma_l3_bound > 0.05 & (tma_memory_bound > 0.2 & tma_backend_bound > 0.2)",
         "PublicDescription": "This metric estimates how often the CPU was stalled due to loads accesses to L3 cache or contended with a sibling Core.  Avoiding cache misses (i.e. L2 misses/L3 hits) can improve the latency and increase performance. Sample with: MEM_LOAD_RETIRED.L3_HIT_PS",
         "ScaleUnit": "100%"
     },
     {
-        "BriefDescription": "This metric represents fraction of cycles with demand load accesses that hit the L3 cache under unloaded scenarios (possibly L3 latency limited)",
-        "MetricExpr": "33 * tma_info_system_average_frequency * MEM_LOAD_RETIRED.L3_HIT * (1 + MEM_LOAD_RETIRED.FB_HIT / MEM_LOAD_RETIRED.L1_MISS / 2) / tma_info_thread_clks",
+        "BriefDescription": "This metric estimates fraction of cycles with demand load accesses that hit the L3 cache under unloaded scenarios (possibly L3 latency limited)",
+        "MetricExpr": "33 * tma_info_system_core_frequency * (MEM_LOAD_RETIRED.L3_HIT * (1 + MEM_LOAD_RETIRED.FB_HIT / MEM_LOAD_RETIRED.L1_MISS / 2)) / tma_info_thread_clks",
         "MetricGroup": "MemoryLat;TopdownL4;tma_L4_group;tma_issueLat;tma_l3_bound_group",
         "MetricName": "tma_l3_hit_latency",
         "MetricThreshold": "tma_l3_hit_latency > 0.1 & (tma_l3_bound > 0.05 & (tma_memory_bound > 0.2 & tma_backend_bound > 0.2))",
-        "PublicDescription": "This metric represents fraction of cycles with demand load accesses that hit the L3 cache under unloaded scenarios (possibly L3 latency limited).  Avoiding private cache misses (i.e. L2 misses/L3 hits) will improve the latency; reduce contention with sibling physical cores and increase performance.  Note the value of this node may overlap with its siblings. Sample with: MEM_LOAD_RETIRED.L3_HIT_PS. Related metrics: tma_info_bottleneck_memory_latency, tma_mem_latency",
+        "PublicDescription": "This metric estimates fraction of cycles with demand load accesses that hit the L3 cache under unloaded scenarios (possibly L3 latency limited).  Avoiding private cache misses (i.e. L2 misses/L3 hits) will improve the latency; reduce contention with sibling physical cores and increase performance.  Note the value of this node may overlap with its siblings. Sample with: MEM_LOAD_RETIRED.L3_HIT_PS. Related metrics: tma_info_bottleneck_cache_memory_latency, tma_mem_latency",
         "ScaleUnit": "100%"
     },
     {
         "MetricName": "tma_light_operations",
         "MetricThreshold": "tma_light_operations > 0.6",
         "MetricgroupNoGroup": "TopdownL2;Default",
-        "PublicDescription": "This metric represents fraction of slots where the CPU was retiring light-weight operations -- instructions that require no more than one uop (micro-operation). This correlates with total number of instructions used by the program. A uops-per-instruction (see UopPI metric) ratio of 1 or less should be expected for decently optimized software running on Intel Core/Xeon products. While this often indicates efficient X86 instructions were executed; high value does not necessarily mean better performance cannot be achieved. Sample with: INST_RETIRED.PREC_DIST",
+        "PublicDescription": "This metric represents fraction of slots where the CPU was retiring light-weight operations -- instructions that require no more than one uop (micro-operation). This correlates with total number of instructions used by the program. A uops-per-instruction (see UopPI metric) ratio of 1 or less should be expected for decently optimized code running on Intel Core/Xeon products. While this often indicates efficient X86 instructions were executed; high value does not necessarily mean better performance cannot be achieved. ([ICL+] Note this may undercount due to approximation using indirect events; [ADL+] .). Sample with: INST_RETIRED.PREC_DIST",
         "ScaleUnit": "100%"
     },
     {
     },
     {
         "BriefDescription": "This metric estimates fraction of cycles while the memory subsystem was handling loads from local memory",
-        "MetricExpr": "71 * tma_info_system_average_frequency * MEM_LOAD_L3_MISS_RETIRED.LOCAL_DRAM * (1 + MEM_LOAD_RETIRED.FB_HIT / MEM_LOAD_RETIRED.L1_MISS / 2) / tma_info_thread_clks",
+        "MetricExpr": "71 * tma_info_system_core_frequency * MEM_LOAD_L3_MISS_RETIRED.LOCAL_DRAM * (1 + MEM_LOAD_RETIRED.FB_HIT / MEM_LOAD_RETIRED.L1_MISS / 2) / tma_info_thread_clks",
         "MetricGroup": "Server;TopdownL5;tma_L5_group;tma_mem_latency_group",
-        "MetricName": "tma_local_dram",
-        "MetricThreshold": "tma_local_dram > 0.1 & (tma_mem_latency > 0.1 & (tma_dram_bound > 0.1 & (tma_memory_bound > 0.2 & tma_backend_bound > 0.2)))",
+        "MetricName": "tma_local_mem",
+        "MetricThreshold": "tma_local_mem > 0.1 & (tma_mem_latency > 0.1 & (tma_dram_bound > 0.1 & (tma_memory_bound > 0.2 & tma_backend_bound > 0.2)))",
         "PublicDescription": "This metric estimates fraction of cycles while the memory subsystem was handling loads from local memory. Caching will improve the latency and increase performance. Sample with: MEM_LOAD_L3_MISS_RETIRED.LOCAL_DRAM_PS",
         "ScaleUnit": "100%"
     },
         "ScaleUnit": "100%"
     },
     {
-        "BriefDescription": "This metric estimates fraction of cycles where the core's performance was likely hurt due to approaching bandwidth limits of external memory (DRAM)",
+        "BriefDescription": "This metric estimates fraction of cycles where the core's performance was likely hurt due to approaching bandwidth limits of external memory - DRAM ([SPR-HBM] and/or HBM)",
         "MetricExpr": "min(CPU_CLK_UNHALTED.THREAD, cpu@OFFCORE_REQUESTS_OUTSTANDING.ALL_DATA_RD\\,cmask\\=4@) / tma_info_thread_clks",
         "MetricGroup": "MemoryBW;Offcore;TopdownL4;tma_L4_group;tma_dram_bound_group;tma_issueBW",
         "MetricName": "tma_mem_bandwidth",
         "MetricThreshold": "tma_mem_bandwidth > 0.2 & (tma_dram_bound > 0.1 & (tma_memory_bound > 0.2 & tma_backend_bound > 0.2))",
-        "PublicDescription": "This metric estimates fraction of cycles where the core's performance was likely hurt due to approaching bandwidth limits of external memory (DRAM).  The underlying heuristic assumes that a similar off-core traffic is generated by all IA cores. This metric does not aggregate non-data-read requests by this logical processor; requests from other IA Logical Processors/Physical Cores/sockets; or other non-IA devices like GPU; hence the maximum external memory bandwidth limits may or may not be approached when this metric is flagged (see Uncore counters for that). Related metrics: tma_fb_full, tma_info_bottleneck_memory_bandwidth, tma_info_system_dram_bw_use, tma_sq_full",
+        "PublicDescription": "This metric estimates fraction of cycles where the core's performance was likely hurt due to approaching bandwidth limits of external memory - DRAM ([SPR-HBM] and/or HBM).  The underlying heuristic assumes that a similar off-core traffic is generated by all IA cores. This metric does not aggregate non-data-read requests by this logical processor; requests from other IA Logical Processors/Physical Cores/sockets; or other non-IA devices like GPU; hence the maximum external memory bandwidth limits may or may not be approached when this metric is flagged (see Uncore counters for that). Related metrics: tma_fb_full, tma_info_bottleneck_cache_memory_bandwidth, tma_info_system_dram_bw_use, tma_sq_full",
         "ScaleUnit": "100%"
     },
     {
-        "BriefDescription": "This metric estimates fraction of cycles where the performance was likely hurt due to latency from external memory (DRAM)",
+        "BriefDescription": "This metric estimates fraction of cycles where the performance was likely hurt due to latency from external memory - DRAM ([SPR-HBM] and/or HBM)",
         "MetricExpr": "min(CPU_CLK_UNHALTED.THREAD, OFFCORE_REQUESTS_OUTSTANDING.CYCLES_WITH_DATA_RD) / tma_info_thread_clks - tma_mem_bandwidth",
         "MetricGroup": "MemoryLat;Offcore;TopdownL4;tma_L4_group;tma_dram_bound_group;tma_issueLat",
         "MetricName": "tma_mem_latency",
         "MetricThreshold": "tma_mem_latency > 0.1 & (tma_dram_bound > 0.1 & (tma_memory_bound > 0.2 & tma_backend_bound > 0.2))",
-        "PublicDescription": "This metric estimates fraction of cycles where the performance was likely hurt due to latency from external memory (DRAM).  This metric does not aggregate requests from other Logical Processors/Physical Cores/sockets (see Uncore counters for that). Related metrics: tma_info_bottleneck_memory_latency, tma_l3_hit_latency",
+        "PublicDescription": "This metric estimates fraction of cycles where the performance was likely hurt due to latency from external memory - DRAM ([SPR-HBM] and/or HBM).  This metric does not aggregate requests from other Logical Processors/Physical Cores/sockets (see Uncore counters for that). Related metrics: tma_info_bottleneck_cache_memory_latency, tma_l3_hit_latency",
         "ScaleUnit": "100%"
     },
     {
         "BriefDescription": "This metric represents fraction of cycles the CPU was stalled due to LFENCE Instructions.",
         "MetricConstraint": "NO_GROUP_EVENTS_NMI",
         "MetricExpr": "13 * MISC2_RETIRED.LFENCE / tma_info_thread_clks",
-        "MetricGroup": "TopdownL6;tma_L6_group;tma_serializing_operation_group",
+        "MetricGroup": "TopdownL4;tma_L4_group;tma_serializing_operation_group",
         "MetricName": "tma_memory_fence",
-        "MetricThreshold": "tma_memory_fence > 0.05 & (tma_serializing_operation > 0.1 & (tma_ports_utilized_0 > 0.2 & (tma_ports_utilization > 0.15 & (tma_core_bound > 0.1 & tma_backend_bound > 0.2))))",
+        "MetricThreshold": "tma_memory_fence > 0.05 & (tma_serializing_operation > 0.1 & (tma_core_bound > 0.1 & tma_backend_bound > 0.2))",
         "ScaleUnit": "100%"
     },
     {
         "MetricGroup": "MicroSeq;TopdownL3;tma_L3_group;tma_heavy_operations_group;tma_issueMC;tma_issueMS",
         "MetricName": "tma_microcode_sequencer",
         "MetricThreshold": "tma_microcode_sequencer > 0.05 & tma_heavy_operations > 0.1",
-        "PublicDescription": "This metric represents fraction of slots the CPU was retiring uops fetched by the Microcode Sequencer (MS) unit.  The MS is used for CISC instructions not supported by the default decoders (like repeat move strings; or CPUID); or by microcode assists used to address some operation modes (like in Floating Point assists). These cases can often be avoided. Sample with: UOPS_RETIRED.MS. Related metrics: tma_clears_resteers, tma_l1_bound, tma_machine_clears, tma_ms_switches",
+        "PublicDescription": "This metric represents fraction of slots the CPU was retiring uops fetched by the Microcode Sequencer (MS) unit.  The MS is used for CISC instructions not supported by the default decoders (like repeat move strings; or CPUID); or by microcode assists used to address some operation modes (like in Floating Point assists). These cases can often be avoided. Sample with: UOPS_RETIRED.MS. Related metrics: tma_clears_resteers, tma_info_bottleneck_irregular_overhead, tma_l1_bound, tma_machine_clears, tma_ms_switches",
         "ScaleUnit": "100%"
     },
     {
         "MetricExpr": "(IDQ.MITE_CYCLES_ANY - IDQ.MITE_CYCLES_OK) / tma_info_core_core_clks / 2",
         "MetricGroup": "DSBmiss;FetchBW;TopdownL3;tma_L3_group;tma_fetch_bandwidth_group",
         "MetricName": "tma_mite",
-        "MetricThreshold": "tma_mite > 0.1 & (tma_fetch_bandwidth > 0.1 & tma_frontend_bound > 0.15 & tma_info_thread_ipc / 6 > 0.35)",
+        "MetricThreshold": "tma_mite > 0.1 & tma_fetch_bandwidth > 0.2",
         "PublicDescription": "This metric represents Core fraction of cycles in which CPU was likely limited due to the MITE pipeline (the legacy decode pipeline). This pipeline is used for code that was not pre-cached in the DSB or LSD. For example; inefficiencies due to asymmetric decoders; use of long immediate or LCP can manifest as MITE fetch bandwidth bottleneck. Sample with: FRONTEND_RETIRED.ANY_DSB_MISS",
         "ScaleUnit": "100%"
     },
     {
-        "BriefDescription": "The Mixing_Vectors metric gives the percentage of injected blend uops out of all uops issued",
+        "BriefDescription": "This metric estimates penalty in terms of percentage of([SKL+] injected blend uops out of all Uops Issued -- the Count Domain; [ADL+] cycles)",
         "MetricExpr": "160 * ASSISTS.SSE_AVX_MIX / tma_info_thread_clks",
         "MetricGroup": "TopdownL5;tma_L5_group;tma_issueMV;tma_ports_utilized_0_group",
         "MetricName": "tma_mixing_vectors",
         "MetricThreshold": "tma_mixing_vectors > 0.05",
-        "PublicDescription": "The Mixing_Vectors metric gives the percentage of injected blend uops out of all uops issued. Usually a Mixing_Vectors over 5% is worth investigating. Read more in Appendix B1 of the Optimizations Guide for this topic. Related metrics: tma_ms_switches",
+        "PublicDescription": "This metric estimates penalty in terms of percentage of([SKL+] injected blend uops out of all Uops Issued -- the Count Domain; [ADL+] cycles). Usually a Mixing_Vectors over 5% is worth investigating. Read more in Appendix B1 of the Optimizations Guide for this topic. Related metrics: tma_ms_switches",
         "ScaleUnit": "100%"
     },
     {
         "BriefDescription": "This metric estimates the fraction of cycles when the CPU was stalled due to switches of uop delivery to the Microcode Sequencer (MS)",
-        "MetricExpr": "3 * cpu@UOPS_RETIRED.MS\\,cmask\\=1\\,edge@ / (tma_retiring * tma_info_thread_slots / UOPS_ISSUED.ANY) / tma_info_thread_clks",
+        "MetricExpr": "3 * cpu@UOPS_RETIRED.MS\\,cmask\\=1\\,edge@ / (UOPS_RETIRED.SLOTS / UOPS_ISSUED.ANY) / tma_info_thread_clks",
         "MetricGroup": "FetchLat;MicroSeq;TopdownL3;tma_L3_group;tma_fetch_latency_group;tma_issueMC;tma_issueMS;tma_issueMV;tma_issueSO",
         "MetricName": "tma_ms_switches",
         "MetricThreshold": "tma_ms_switches > 0.05 & (tma_fetch_latency > 0.1 & tma_frontend_bound > 0.15)",
-        "PublicDescription": "This metric estimates the fraction of cycles when the CPU was stalled due to switches of uop delivery to the Microcode Sequencer (MS). Commonly used instructions are optimized for delivery by the DSB (decoded i-cache) or MITE (legacy instruction decode) pipelines. Certain operations cannot be handled natively by the execution pipeline; and must be performed by microcode (small programs injected into the execution stream). Switching to the MS too often can negatively impact performance. The MS is designated to deliver long uop flows required by CISC instructions like CPUID; or uncommon conditions like Floating Point Assists when dealing with Denormals. Sample with: FRONTEND_RETIRED.MS_FLOWS. Related metrics: tma_clears_resteers, tma_l1_bound, tma_machine_clears, tma_microcode_sequencer, tma_mixing_vectors, tma_serializing_operation",
+        "PublicDescription": "This metric estimates the fraction of cycles when the CPU was stalled due to switches of uop delivery to the Microcode Sequencer (MS). Commonly used instructions are optimized for delivery by the DSB (decoded i-cache) or MITE (legacy instruction decode) pipelines. Certain operations cannot be handled natively by the execution pipeline; and must be performed by microcode (small programs injected into the execution stream). Switching to the MS too often can negatively impact performance. The MS is designated to deliver long uop flows required by CISC instructions like CPUID; or uncommon conditions like Floating Point Assists when dealing with Denormals. Sample with: FRONTEND_RETIRED.MS_FLOWS. Related metrics: tma_clears_resteers, tma_info_bottleneck_irregular_overhead, tma_l1_bound, tma_machine_clears, tma_microcode_sequencer, tma_mixing_vectors, tma_serializing_operation",
         "ScaleUnit": "100%"
     },
     {
         "BriefDescription": "This metric represents fraction of slots where the CPU was retiring branch instructions that were not fused",
         "MetricExpr": "tma_light_operations * (BR_INST_RETIRED.ALL_BRANCHES - INST_RETIRED.MACRO_FUSED) / (tma_retiring * tma_info_thread_slots)",
-        "MetricGroup": "Pipeline;TopdownL3;tma_L3_group;tma_light_operations_group",
+        "MetricGroup": "Branches;Pipeline;TopdownL3;tma_L3_group;tma_light_operations_group",
         "MetricName": "tma_non_fused_branches",
         "MetricThreshold": "tma_non_fused_branches > 0.1 & tma_light_operations > 0.6",
         "PublicDescription": "This metric represents fraction of slots where the CPU was retiring branch instructions that were not fused. Non-conditional branches like direct JMP or CALL would count here. Can be used to examine fusible conditional jumps that were not fused.",
     {
         "BriefDescription": "This metric represents fraction of slots where the CPU was retiring NOP (no op) instructions",
         "MetricExpr": "tma_light_operations * INST_RETIRED.NOP / (tma_retiring * tma_info_thread_slots)",
-        "MetricGroup": "Pipeline;TopdownL3;tma_L3_group;tma_light_operations_group",
+        "MetricGroup": "Pipeline;TopdownL4;tma_L4_group;tma_other_light_ops_group",
         "MetricName": "tma_nop_instructions",
-        "MetricThreshold": "tma_nop_instructions > 0.1 & tma_light_operations > 0.6",
+        "MetricThreshold": "tma_nop_instructions > 0.1 & (tma_other_light_ops > 0.3 & tma_light_operations > 0.6)",
         "PublicDescription": "This metric represents fraction of slots where the CPU was retiring NOP (no op) instructions. Compilers often use NOPs for certain address alignments - e.g. start address of a function or loop body. Sample with: INST_RETIRED.NOP",
         "ScaleUnit": "100%"
     },
     {
         "BriefDescription": "This metric represents the remaining light uops fraction the CPU has executed - remaining means not covered by other sibling nodes",
-        "MetricExpr": "max(0, tma_light_operations - (tma_fp_arith + tma_int_operations + tma_memory_operations + tma_fused_instructions + tma_non_fused_branches + tma_nop_instructions))",
+        "MetricExpr": "max(0, tma_light_operations - (tma_fp_arith + tma_int_operations + tma_memory_operations + tma_fused_instructions + tma_non_fused_branches))",
         "MetricGroup": "Pipeline;TopdownL3;tma_L3_group;tma_light_operations_group",
         "MetricName": "tma_other_light_ops",
         "MetricThreshold": "tma_other_light_ops > 0.3 & tma_light_operations > 0.6",
         "PublicDescription": "This metric represents the remaining light uops fraction the CPU has executed - remaining means not covered by other sibling nodes. May undercount due to FMA double counting",
         "ScaleUnit": "100%"
     },
+    {
+        "BriefDescription": "This metric estimates fraction of slots the CPU was stalled due to other cases of misprediction (non-retired x86 branches or other types).",
+        "MetricExpr": "max(tma_branch_mispredicts * (1 - BR_MISP_RETIRED.ALL_BRANCHES / (INT_MISC.CLEARS_COUNT - MACHINE_CLEARS.COUNT)), 0.0001)",
+        "MetricGroup": "BrMispredicts;TopdownL3;tma_L3_group;tma_branch_mispredicts_group",
+        "MetricName": "tma_other_mispredicts",
+        "MetricThreshold": "tma_other_mispredicts > 0.05 & (tma_branch_mispredicts > 0.1 & tma_bad_speculation > 0.15)",
+        "ScaleUnit": "100%"
+    },
+    {
+        "BriefDescription": "This metric represents fraction of slots the CPU has wasted due to Nukes (Machine Clears) not related to memory ordering.",
+        "MetricExpr": "max(tma_machine_clears * (1 - MACHINE_CLEARS.MEMORY_ORDERING / MACHINE_CLEARS.COUNT), 0.0001)",
+        "MetricGroup": "Machine_Clears;TopdownL3;tma_L3_group;tma_machine_clears_group",
+        "MetricName": "tma_other_nukes",
+        "MetricThreshold": "tma_other_nukes > 0.05 & (tma_machine_clears > 0.1 & tma_bad_speculation > 0.15)",
+        "ScaleUnit": "100%"
+    },
     {
         "BriefDescription": "This metric roughly estimates fraction of slots the CPU retired uops as a result of handing Page Faults",
         "MetricExpr": "99 * ASSISTS.PAGE_FAULT / tma_info_thread_slots",
     },
     {
         "BriefDescription": "This metric roughly estimates (based on idle latencies) how often the CPU was stalled on accesses to external 3D-Xpoint (Crystal Ridge, a.k.a",
-        "MetricExpr": "(((1 - ((19 * (MEM_LOAD_L3_MISS_RETIRED.REMOTE_DRAM * (1 + MEM_LOAD_RETIRED.FB_HIT / MEM_LOAD_RETIRED.L1_MISS)) + 10 * (MEM_LOAD_L3_MISS_RETIRED.LOCAL_DRAM * (1 + MEM_LOAD_RETIRED.FB_HIT / MEM_LOAD_RETIRED.L1_MISS) + MEM_LOAD_L3_MISS_RETIRED.REMOTE_FWD * (1 + MEM_LOAD_RETIRED.FB_HIT / MEM_LOAD_RETIRED.L1_MISS) + MEM_LOAD_L3_MISS_RETIRED.REMOTE_HITM * (1 + MEM_LOAD_RETIRED.FB_HIT / MEM_LOAD_RETIRED.L1_MISS))) / (19 * (MEM_LOAD_L3_MISS_RETIRED.REMOTE_DRAM * (1 + MEM_LOAD_RETIRED.FB_HIT / MEM_LOAD_RETIRED.L1_MISS)) + 10 * (MEM_LOAD_L3_MISS_RETIRED.LOCAL_DRAM * (1 + MEM_LOAD_RETIRED.FB_HIT / MEM_LOAD_RETIRED.L1_MISS) + MEM_LOAD_L3_MISS_RETIRED.REMOTE_FWD * (1 + MEM_LOAD_RETIRED.FB_HIT / MEM_LOAD_RETIRED.L1_MISS) + MEM_LOAD_L3_MISS_RETIRED.REMOTE_HITM * (1 + MEM_LOAD_RETIRED.FB_HIT / MEM_LOAD_RETIRED.L1_MISS)) + (25 * (MEM_LOAD_RETIRED.LOCAL_PMM * (1 + MEM_LOAD_RETIRED.FB_HIT / MEM_LOAD_RETIRED.L1_MISS) if #has_pmem > 0 else 0) + 33 * (MEM_LOAD_L3_MISS_RETIRED.REMOTE_PMM * (1 + MEM_LOAD_RETIRED.FB_HIT / MEM_LOAD_RETIRED.L1_MISS) if #has_pmem > 0 else 0))) if #has_pmem > 0 else 0)) * (MEMORY_ACTIVITY.STALLS_L3_MISS / tma_info_thread_clks) if 1e6 * (MEM_LOAD_L3_MISS_RETIRED.REMOTE_PMM + MEM_LOAD_RETIRED.LOCAL_PMM) > MEM_LOAD_RETIRED.L1_MISS else 0) if #has_pmem > 0 else 0)",
+        "MetricExpr": "(((1 - (19 * (MEM_LOAD_L3_MISS_RETIRED.REMOTE_DRAM * (1 + MEM_LOAD_RETIRED.FB_HIT / MEM_LOAD_RETIRED.L1_MISS)) + 10 * (MEM_LOAD_L3_MISS_RETIRED.LOCAL_DRAM * (1 + MEM_LOAD_RETIRED.FB_HIT / MEM_LOAD_RETIRED.L1_MISS) + MEM_LOAD_L3_MISS_RETIRED.REMOTE_FWD * (1 + MEM_LOAD_RETIRED.FB_HIT / MEM_LOAD_RETIRED.L1_MISS) + MEM_LOAD_L3_MISS_RETIRED.REMOTE_HITM * (1 + MEM_LOAD_RETIRED.FB_HIT / MEM_LOAD_RETIRED.L1_MISS))) / (19 * (MEM_LOAD_L3_MISS_RETIRED.REMOTE_DRAM * (1 + MEM_LOAD_RETIRED.FB_HIT / MEM_LOAD_RETIRED.L1_MISS)) + 10 * (MEM_LOAD_L3_MISS_RETIRED.LOCAL_DRAM * (1 + MEM_LOAD_RETIRED.FB_HIT / MEM_LOAD_RETIRED.L1_MISS) + MEM_LOAD_L3_MISS_RETIRED.REMOTE_FWD * (1 + MEM_LOAD_RETIRED.FB_HIT / MEM_LOAD_RETIRED.L1_MISS) + MEM_LOAD_L3_MISS_RETIRED.REMOTE_HITM * (1 + MEM_LOAD_RETIRED.FB_HIT / MEM_LOAD_RETIRED.L1_MISS)) + (25 * (MEM_LOAD_RETIRED.LOCAL_PMM * (1 + MEM_LOAD_RETIRED.FB_HIT / MEM_LOAD_RETIRED.L1_MISS)) + 33 * (MEM_LOAD_L3_MISS_RETIRED.REMOTE_PMM * (1 + MEM_LOAD_RETIRED.FB_HIT / MEM_LOAD_RETIRED.L1_MISS))))) * (MEMORY_ACTIVITY.STALLS_L3_MISS / tma_info_thread_clks) if 1e6 * (MEM_LOAD_L3_MISS_RETIRED.REMOTE_PMM + MEM_LOAD_RETIRED.LOCAL_PMM) > MEM_LOAD_RETIRED.L1_MISS else 0) if #has_pmem > 0 else 0)",
         "MetricGroup": "MemoryBound;Server;TmaL3mem;TopdownL3;tma_L3_group;tma_memory_bound_group",
         "MetricName": "tma_pmm_bound",
         "MetricThreshold": "tma_pmm_bound > 0.1 & (tma_memory_bound > 0.2 & tma_backend_bound > 0.2)",
         "ScaleUnit": "100%"
     },
     {
-        "BriefDescription": "This metric represents Core fraction of cycles CPU dispatched uops on execution port 6 ([HSW+]Primary Branch and simple ALU)",
+        "BriefDescription": "This metric represents Core fraction of cycles CPU dispatched uops on execution port 6 ([HSW+] Primary Branch and simple ALU)",
         "MetricExpr": "UOPS_DISPATCHED.PORT_6 / tma_info_core_core_clks",
         "MetricGroup": "TopdownL6;tma_L6_group;tma_alu_op_utilization_group;tma_issue2P",
         "MetricName": "tma_port_6",
         "MetricThreshold": "tma_port_6 > 0.6",
-        "PublicDescription": "This metric represents Core fraction of cycles CPU dispatched uops on execution port 6 ([HSW+]Primary Branch and simple ALU). Sample with: UOPS_DISPATCHED.PORT_6. Related metrics: tma_fp_scalar, tma_fp_vector, tma_fp_vector_128b, tma_fp_vector_256b, tma_fp_vector_512b, tma_int_vector_128b, tma_int_vector_256b, tma_port_0, tma_port_1, tma_port_5, tma_ports_utilized_2",
+        "PublicDescription": "This metric represents Core fraction of cycles CPU dispatched uops on execution port 6 ([HSW+] Primary Branch and simple ALU). Sample with: UOPS_DISPATCHED.PORT_6. Related metrics: tma_fp_scalar, tma_fp_vector, tma_fp_vector_128b, tma_fp_vector_256b, tma_fp_vector_512b, tma_int_vector_128b, tma_int_vector_256b, tma_port_0, tma_port_1, tma_port_5, tma_ports_utilized_2",
         "ScaleUnit": "100%"
     },
     {
         "BriefDescription": "This metric estimates fraction of cycles the CPU performance was potentially limited due to Core computation issues (non divider-related)",
-        "MetricExpr": "((cpu@EXE_ACTIVITY.3_PORTS_UTIL\\,umask\\=0x80@ + tma_serializing_operation * (CYCLE_ACTIVITY.STALLS_TOTAL - EXE_ACTIVITY.BOUND_ON_LOADS) + (EXE_ACTIVITY.1_PORTS_UTIL + tma_retiring * cpu@EXE_ACTIVITY.2_PORTS_UTIL\\,umask\\=0xc@)) / tma_info_thread_clks if ARITH.DIV_ACTIVE < CYCLE_ACTIVITY.STALLS_TOTAL - EXE_ACTIVITY.BOUND_ON_LOADS else (EXE_ACTIVITY.1_PORTS_UTIL + tma_retiring * cpu@EXE_ACTIVITY.2_PORTS_UTIL\\,umask\\=0xc@) / tma_info_thread_clks)",
+        "MetricExpr": "((tma_ports_utilized_0 * tma_info_thread_clks + (EXE_ACTIVITY.1_PORTS_UTIL + tma_retiring * cpu@EXE_ACTIVITY.2_PORTS_UTIL\\,umask\\=0xc@)) / tma_info_thread_clks if ARITH.DIV_ACTIVE < CYCLE_ACTIVITY.STALLS_TOTAL - EXE_ACTIVITY.BOUND_ON_LOADS else (EXE_ACTIVITY.1_PORTS_UTIL + tma_retiring * cpu@EXE_ACTIVITY.2_PORTS_UTIL\\,umask\\=0xc@) / tma_info_thread_clks)",
         "MetricGroup": "PortsUtil;TopdownL3;tma_L3_group;tma_core_bound_group",
         "MetricName": "tma_ports_utilization",
         "MetricThreshold": "tma_ports_utilization > 0.15 & (tma_core_bound > 0.1 & tma_backend_bound > 0.2)",
     },
     {
         "BriefDescription": "This metric represents fraction of cycles CPU executed no uops on any execution port (Logical Processor cycles since ICL, Physical Core cycles otherwise)",
-        "MetricExpr": "cpu@EXE_ACTIVITY.3_PORTS_UTIL\\,umask\\=0x80@ / tma_info_thread_clks + tma_serializing_operation * (CYCLE_ACTIVITY.STALLS_TOTAL - EXE_ACTIVITY.BOUND_ON_LOADS) / tma_info_thread_clks",
+        "MetricExpr": "(cpu@EXE_ACTIVITY.3_PORTS_UTIL\\,umask\\=0x80@ + cpu@RS.EMPTY\\,umask\\=1@) / tma_info_thread_clks * (CYCLE_ACTIVITY.STALLS_TOTAL - EXE_ACTIVITY.BOUND_ON_LOADS) / tma_info_thread_clks",
         "MetricGroup": "PortsUtil;TopdownL4;tma_L4_group;tma_ports_utilization_group",
         "MetricName": "tma_ports_utilized_0",
         "MetricThreshold": "tma_ports_utilized_0 > 0.2 & (tma_ports_utilization > 0.15 & (tma_core_bound > 0.1 & tma_backend_bound > 0.2))",
         "MetricExpr": "UOPS_EXECUTED.CYCLES_GE_3 / tma_info_thread_clks",
         "MetricGroup": "PortsUtil;TopdownL4;tma_L4_group;tma_ports_utilization_group",
         "MetricName": "tma_ports_utilized_3m",
-        "MetricThreshold": "tma_ports_utilized_3m > 0.7 & (tma_ports_utilization > 0.15 & (tma_core_bound > 0.1 & tma_backend_bound > 0.2))",
+        "MetricThreshold": "tma_ports_utilized_3m > 0.4 & (tma_ports_utilization > 0.15 & (tma_core_bound > 0.1 & tma_backend_bound > 0.2))",
         "PublicDescription": "This metric represents fraction of cycles CPU executed total of 3 or more uops per cycle on all execution ports (Logical Processor cycles since ICL, Physical Core cycles otherwise). Sample with: UOPS_EXECUTED.CYCLES_GE_3",
         "ScaleUnit": "100%"
     },
     {
         "BriefDescription": "This metric estimates fraction of cycles while the memory subsystem was handling loads from remote cache in other sockets including synchronizations issues",
-        "MetricExpr": "(135.5 * tma_info_system_average_frequency * MEM_LOAD_L3_MISS_RETIRED.REMOTE_HITM + 135.5 * tma_info_system_average_frequency * MEM_LOAD_L3_MISS_RETIRED.REMOTE_FWD) * (1 + MEM_LOAD_RETIRED.FB_HIT / MEM_LOAD_RETIRED.L1_MISS / 2) / tma_info_thread_clks",
+        "MetricExpr": "(135.5 * tma_info_system_core_frequency * MEM_LOAD_L3_MISS_RETIRED.REMOTE_HITM + 135.5 * tma_info_system_core_frequency * MEM_LOAD_L3_MISS_RETIRED.REMOTE_FWD) * (1 + MEM_LOAD_RETIRED.FB_HIT / MEM_LOAD_RETIRED.L1_MISS / 2) / tma_info_thread_clks",
         "MetricGroup": "Offcore;Server;Snoop;TopdownL5;tma_L5_group;tma_issueSyncxn;tma_mem_latency_group",
         "MetricName": "tma_remote_cache",
         "MetricThreshold": "tma_remote_cache > 0.05 & (tma_mem_latency > 0.1 & (tma_dram_bound > 0.1 & (tma_memory_bound > 0.2 & tma_backend_bound > 0.2)))",
     },
     {
         "BriefDescription": "This metric estimates fraction of cycles while the memory subsystem was handling loads from remote memory",
-        "MetricExpr": "149 * tma_info_system_average_frequency * MEM_LOAD_L3_MISS_RETIRED.REMOTE_DRAM * (1 + MEM_LOAD_RETIRED.FB_HIT / MEM_LOAD_RETIRED.L1_MISS / 2) / tma_info_thread_clks",
+        "MetricExpr": "149 * tma_info_system_core_frequency * MEM_LOAD_L3_MISS_RETIRED.REMOTE_DRAM * (1 + MEM_LOAD_RETIRED.FB_HIT / MEM_LOAD_RETIRED.L1_MISS / 2) / tma_info_thread_clks",
         "MetricGroup": "Server;Snoop;TopdownL5;tma_L5_group;tma_mem_latency_group",
-        "MetricName": "tma_remote_dram",
-        "MetricThreshold": "tma_remote_dram > 0.1 & (tma_mem_latency > 0.1 & (tma_dram_bound > 0.1 & (tma_memory_bound > 0.2 & tma_backend_bound > 0.2)))",
+        "MetricName": "tma_remote_mem",
+        "MetricThreshold": "tma_remote_mem > 0.1 & (tma_mem_latency > 0.1 & (tma_dram_bound > 0.1 & (tma_memory_bound > 0.2 & tma_backend_bound > 0.2)))",
         "PublicDescription": "This metric estimates fraction of cycles while the memory subsystem was handling loads from remote memory. This is caused often due to non-optimal NUMA allocations. #link to NUMA article. Sample with: MEM_LOAD_L3_MISS_RETIRED.REMOTE_DRAM_PS",
         "ScaleUnit": "100%"
     },
     },
     {
         "BriefDescription": "This metric represents fraction of cycles the CPU issue-pipeline was stalled due to serializing operations",
-        "MetricExpr": "RESOURCE_STALLS.SCOREBOARD / tma_info_thread_clks",
-        "MetricGroup": "PortsUtil;TopdownL5;tma_L5_group;tma_issueSO;tma_ports_utilized_0_group",
+        "MetricExpr": "RESOURCE_STALLS.SCOREBOARD / tma_info_thread_clks + tma_c02_wait",
+        "MetricGroup": "PortsUtil;TopdownL3;tma_L3_group;tma_core_bound_group;tma_issueSO",
         "MetricName": "tma_serializing_operation",
-        "MetricThreshold": "tma_serializing_operation > 0.1 & (tma_ports_utilized_0 > 0.2 & (tma_ports_utilization > 0.15 & (tma_core_bound > 0.1 & tma_backend_bound > 0.2)))",
+        "MetricThreshold": "tma_serializing_operation > 0.1 & (tma_core_bound > 0.1 & tma_backend_bound > 0.2)",
         "PublicDescription": "This metric represents fraction of cycles the CPU issue-pipeline was stalled due to serializing operations. Instructions like CPUID; WRMSR or LFENCE serialize the out-of-order execution which may limit performance. Sample with: RESOURCE_STALLS.SCOREBOARD. Related metrics: tma_ms_switches",
         "ScaleUnit": "100%"
     },
     {
-        "BriefDescription": "This metric represents Shuffle (cross \"vector lane\" data transfers) uops fraction the CPU has retired.",
-        "MetricExpr": "INT_VEC_RETIRED.SHUFFLES / (tma_retiring * tma_info_thread_slots)",
-        "MetricGroup": "HPC;Pipeline;TopdownL4;tma_L4_group;tma_int_operations_group",
-        "MetricName": "tma_shuffles",
-        "MetricThreshold": "tma_shuffles > 0.1 & (tma_int_operations > 0.1 & tma_light_operations > 0.6)",
+        "BriefDescription": "This metric represents fraction of slots where the CPU was retiring Shuffle operations of 256-bit vector size (FP or Integer)",
+        "MetricExpr": "tma_light_operations * INT_VEC_RETIRED.SHUFFLES / (tma_retiring * tma_info_thread_slots)",
+        "MetricGroup": "HPC;Pipeline;TopdownL4;tma_L4_group;tma_other_light_ops_group",
+        "MetricName": "tma_shuffles_256b",
+        "MetricThreshold": "tma_shuffles_256b > 0.1 & (tma_other_light_ops > 0.3 & tma_light_operations > 0.6)",
+        "PublicDescription": "This metric represents fraction of slots where the CPU was retiring Shuffle operations of 256-bit vector size (FP or Integer). Shuffles may incur slow cross \"vector lane\" data transfers.",
         "ScaleUnit": "100%"
     },
     {
         "BriefDescription": "This metric represents fraction of cycles the CPU was stalled due to PAUSE Instructions",
         "MetricConstraint": "NO_GROUP_EVENTS_NMI",
         "MetricExpr": "CPU_CLK_UNHALTED.PAUSE / tma_info_thread_clks",
-        "MetricGroup": "TopdownL6;tma_L6_group;tma_serializing_operation_group",
+        "MetricGroup": "TopdownL4;tma_L4_group;tma_serializing_operation_group",
         "MetricName": "tma_slow_pause",
-        "MetricThreshold": "tma_slow_pause > 0.05 & (tma_serializing_operation > 0.1 & (tma_ports_utilized_0 > 0.2 & (tma_ports_utilization > 0.15 & (tma_core_bound > 0.1 & tma_backend_bound > 0.2))))",
+        "MetricThreshold": "tma_slow_pause > 0.05 & (tma_serializing_operation > 0.1 & (tma_core_bound > 0.1 & tma_backend_bound > 0.2))",
         "PublicDescription": "This metric represents fraction of cycles the CPU was stalled due to PAUSE Instructions. Sample with: CPU_CLK_UNHALTED.PAUSE_INST",
         "ScaleUnit": "100%"
     },
         "MetricGroup": "MemoryBW;Offcore;TopdownL4;tma_L4_group;tma_issueBW;tma_l3_bound_group",
         "MetricName": "tma_sq_full",
         "MetricThreshold": "tma_sq_full > 0.3 & (tma_l3_bound > 0.05 & (tma_memory_bound > 0.2 & tma_backend_bound > 0.2))",
-        "PublicDescription": "This metric measures fraction of cycles where the Super Queue (SQ) was full taking into account all request-types and both hardware SMT threads (Logical Processors). Related metrics: tma_fb_full, tma_info_bottleneck_memory_bandwidth, tma_info_system_dram_bw_use, tma_mem_bandwidth",
+        "PublicDescription": "This metric measures fraction of cycles where the Super Queue (SQ) was full taking into account all request-types and both hardware SMT threads (Logical Processors). Related metrics: tma_fb_full, tma_info_bottleneck_cache_memory_bandwidth, tma_info_system_dram_bw_use, tma_mem_bandwidth",
         "ScaleUnit": "100%"
     },
     {
     {
         "BriefDescription": "This metric represents fraction of cycles the CPU was stalled due to new branch address clears",
         "MetricExpr": "INT_MISC.UNKNOWN_BRANCH_CYCLES / tma_info_thread_clks",
-        "MetricGroup": "BigFoot;FetchLat;TopdownL4;tma_L4_group;tma_branch_resteers_group",
+        "MetricGroup": "BigFootprint;FetchLat;TopdownL4;tma_L4_group;tma_branch_resteers_group",
         "MetricName": "tma_unknown_branches",
         "MetricThreshold": "tma_unknown_branches > 0.05 & (tma_branch_resteers > 0.05 & (tma_fetch_latency > 0.1 & tma_frontend_bound > 0.15))",
-        "PublicDescription": "This metric represents fraction of cycles the CPU was stalled due to new branch address clears. These are fetched branches the Branch Prediction Unit was unable to recognize (e.g. first time the branch is fetched or hitting BTB capacity limit). Sample with: FRONTEND_RETIRED.UNKNOWN_BRANCH",
+        "PublicDescription": "This metric represents fraction of cycles the CPU was stalled due to new branch address clears. These are fetched branches the Branch Prediction Unit was unable to recognize (e.g. first time the branch is fetched or hitting BTB capacity limit) hence called Unknown Branches. Sample with: FRONTEND_RETIRED.UNKNOWN_BRANCH",
         "ScaleUnit": "100%"
     },
     {
index 7f0dc65a55d2ff9b32dcc2ff1adb9aa7286163ed..f937ba0e50e1a980d38523e82199af4ae4fb835e 100644 (file)
         "SampleAfterValue": "200003",
         "UMask": "0x4f"
     },
+    {
+        "BriefDescription": "Counts the number of unhalted cycles when the core is stalled due to an instruction cache or TLB miss.",
+        "EventCode": "0x35",
+        "EventName": "MEM_BOUND_STALLS_IFETCH.ALL",
+        "SampleAfterValue": "1000003",
+        "UMask": "0x7f"
+    },
+    {
+        "BriefDescription": "Counts the number of cycles the core is stalled due to an instruction cache or TLB miss which hit in the L2 cache.",
+        "EventCode": "0x35",
+        "EventName": "MEM_BOUND_STALLS_IFETCH.L2_HIT",
+        "PublicDescription": "Counts the number of cycles the core is stalled due to an instruction cache or Translation Lookaside Buffer (TLB) miss which hit in the L2 cache.",
+        "SampleAfterValue": "1000003",
+        "UMask": "0x1"
+    },
+    {
+        "BriefDescription": "Counts the number of unhalted cycles when the core is stalled due to an icache or itlb miss which hit in the LLC.",
+        "EventCode": "0x35",
+        "EventName": "MEM_BOUND_STALLS_IFETCH.LLC_HIT",
+        "SampleAfterValue": "1000003",
+        "UMask": "0x6"
+    },
+    {
+        "BriefDescription": "Counts the number of unhalted cycles when the core is stalled due to an icache or itlb miss which missed all the caches.",
+        "EventCode": "0x35",
+        "EventName": "MEM_BOUND_STALLS_IFETCH.LLC_MISS",
+        "SampleAfterValue": "1000003",
+        "UMask": "0x78"
+    },
+    {
+        "BriefDescription": "Counts the number of unhalted cycles when the core is stalled due to an L1 demand load miss.",
+        "EventCode": "0x34",
+        "EventName": "MEM_BOUND_STALLS_LOAD.ALL",
+        "SampleAfterValue": "1000003",
+        "UMask": "0x7f"
+    },
+    {
+        "BriefDescription": "Counts the number of cycles the core is stalled due to a demand load which hit in the L2 cache.",
+        "EventCode": "0x34",
+        "EventName": "MEM_BOUND_STALLS_LOAD.L2_HIT",
+        "PublicDescription": "Counts the number of cycles a core is stalled due to a demand load which hit in the L2 cache.",
+        "SampleAfterValue": "1000003",
+        "UMask": "0x1"
+    },
+    {
+        "BriefDescription": "Counts the number of unhalted cycles when the core is stalled due to a demand load miss which hit in the LLC.",
+        "EventCode": "0x34",
+        "EventName": "MEM_BOUND_STALLS_LOAD.LLC_HIT",
+        "SampleAfterValue": "1000003",
+        "UMask": "0x6"
+    },
+    {
+        "BriefDescription": "Counts the number of unhalted cycles when the core is stalled due to a demand load miss which missed all the local caches.",
+        "EventCode": "0x34",
+        "EventName": "MEM_BOUND_STALLS_LOAD.LLC_MISS",
+        "SampleAfterValue": "1000003",
+        "UMask": "0x78"
+    },
+    {
+        "BriefDescription": "Counts the number of load ops retired that miss the L3 cache and hit in DRAM",
+        "EventCode": "0xd3",
+        "EventName": "MEM_LOAD_UOPS_L3_MISS_RETIRED.LOCAL_DRAM",
+        "PEBS": "1",
+        "SampleAfterValue": "1000003",
+        "UMask": "0x1"
+    },
+    {
+        "BriefDescription": "Counts the number of load ops retired that hit the L1 data cache.",
+        "EventCode": "0xd1",
+        "EventName": "MEM_LOAD_UOPS_RETIRED.L1_HIT",
+        "PEBS": "1",
+        "SampleAfterValue": "200003",
+        "UMask": "0x1"
+    },
+    {
+        "BriefDescription": "Counts the number of load ops retired that miss in the L1 data cache.",
+        "EventCode": "0xd1",
+        "EventName": "MEM_LOAD_UOPS_RETIRED.L1_MISS",
+        "PEBS": "1",
+        "SampleAfterValue": "200003",
+        "UMask": "0x40"
+    },
+    {
+        "BriefDescription": "Counts the number of load ops retired that hit in the L2 cache.",
+        "EventCode": "0xd1",
+        "EventName": "MEM_LOAD_UOPS_RETIRED.L2_HIT",
+        "PEBS": "1",
+        "SampleAfterValue": "200003",
+        "UMask": "0x2"
+    },
+    {
+        "BriefDescription": "Counts the number of load ops retired that miss in the L2 cache.",
+        "EventCode": "0xd1",
+        "EventName": "MEM_LOAD_UOPS_RETIRED.L2_MISS",
+        "PEBS": "1",
+        "SampleAfterValue": "200003",
+        "UMask": "0x80"
+    },
+    {
+        "BriefDescription": "Counts the number of load ops retired that hit in the L3 cache.",
+        "EventCode": "0xd1",
+        "EventName": "MEM_LOAD_UOPS_RETIRED.L3_HIT",
+        "PEBS": "1",
+        "SampleAfterValue": "200003",
+        "UMask": "0x1c"
+    },
+    {
+        "BriefDescription": "Counts the number of loads that hit in a write combining buffer (WCB), excluding the first load that caused the WCB to allocate.",
+        "EventCode": "0xd1",
+        "EventName": "MEM_LOAD_UOPS_RETIRED.WCB_HIT",
+        "PEBS": "1",
+        "SampleAfterValue": "200003",
+        "UMask": "0x20"
+    },
+    {
+        "BriefDescription": "Counts the number of cycles that uops are blocked for any of the following reasons:  load buffer, store buffer or RSV full.",
+        "EventCode": "0x04",
+        "EventName": "MEM_SCHEDULER_BLOCK.ALL",
+        "SampleAfterValue": "20003",
+        "UMask": "0x7"
+    },
+    {
+        "BriefDescription": "Counts the number of cycles that uops are blocked due to a load buffer full condition.",
+        "EventCode": "0x04",
+        "EventName": "MEM_SCHEDULER_BLOCK.LD_BUF",
+        "SampleAfterValue": "20003",
+        "UMask": "0x2"
+    },
+    {
+        "BriefDescription": "Counts the number of cycles that uops are blocked due to an RSV full condition.",
+        "EventCode": "0x04",
+        "EventName": "MEM_SCHEDULER_BLOCK.RSV",
+        "SampleAfterValue": "20003",
+        "UMask": "0x4"
+    },
+    {
+        "BriefDescription": "Counts the number of cycles that uops are blocked due to a store buffer full condition.",
+        "EventCode": "0x04",
+        "EventName": "MEM_SCHEDULER_BLOCK.ST_BUF",
+        "SampleAfterValue": "20003",
+        "UMask": "0x1"
+    },
     {
         "BriefDescription": "Counts the number of load ops retired.",
         "Data_LA": "1",
         "SampleAfterValue": "1000003",
         "UMask": "0x5"
     },
+    {
+        "BriefDescription": "Counts the number of load uops retired that performed one or more locks",
+        "Data_LA": "1",
+        "EventCode": "0xd0",
+        "EventName": "MEM_UOPS_RETIRED.LOCK_LOADS",
+        "PEBS": "1",
+        "SampleAfterValue": "200003",
+        "UMask": "0x21"
+    },
+    {
+        "BriefDescription": "Counts the number of memory uops retired that were splits.",
+        "Data_LA": "1",
+        "EventCode": "0xd0",
+        "EventName": "MEM_UOPS_RETIRED.SPLIT",
+        "PEBS": "1",
+        "SampleAfterValue": "200003",
+        "UMask": "0x43"
+    },
+    {
+        "BriefDescription": "Counts the number of retired split load uops.",
+        "Data_LA": "1",
+        "EventCode": "0xd0",
+        "EventName": "MEM_UOPS_RETIRED.SPLIT_LOADS",
+        "PEBS": "1",
+        "SampleAfterValue": "200003",
+        "UMask": "0x41"
+    },
+    {
+        "BriefDescription": "Counts the number of retired split store uops.",
+        "Data_LA": "1",
+        "EventCode": "0xd0",
+        "EventName": "MEM_UOPS_RETIRED.SPLIT_STORES",
+        "PEBS": "1",
+        "SampleAfterValue": "200003",
+        "UMask": "0x42"
+    },
     {
         "BriefDescription": "Counts the number of  stores uops retired same as MEM_UOPS_RETIRED.ALL_STORES",
         "Data_LA": "1",
         "PEBS": "2",
         "SampleAfterValue": "1000003",
         "UMask": "0x6"
+    },
+    {
+        "BriefDescription": "Counts the number of issue slots every cycle that were not delivered by the frontend due to an icache miss",
+        "EventCode": "0x71",
+        "EventName": "TOPDOWN_FE_BOUND.ICACHE",
+        "SampleAfterValue": "1000003",
+        "UMask": "0x20"
     }
 ]
diff --git a/tools/perf/pmu-events/arch/x86/sierraforest/floating-point.json b/tools/perf/pmu-events/arch/x86/sierraforest/floating-point.json
new file mode 100644 (file)
index 0000000..00c9a8a
--- /dev/null
@@ -0,0 +1,68 @@
+[
+    {
+        "BriefDescription": "Counts the number of cycles when any of the floating point dividers are active.",
+        "CounterMask": "1",
+        "EventCode": "0xcd",
+        "EventName": "ARITH.FPDIV_ACTIVE",
+        "SampleAfterValue": "1000003",
+        "UMask": "0x2"
+    },
+    {
+        "BriefDescription": "Counts the number of all types of floating point operations per uop with all default weighting",
+        "EventCode": "0xc8",
+        "EventName": "FP_FLOPS_RETIRED.ALL",
+        "PEBS": "1",
+        "SampleAfterValue": "1000003",
+        "UMask": "0x3"
+    },
+    {
+        "BriefDescription": "This event is deprecated. [This event is alias to FP_FLOPS_RETIRED.FP64]",
+        "Deprecated": "1",
+        "EventCode": "0xc8",
+        "EventName": "FP_FLOPS_RETIRED.DP",
+        "PEBS": "1",
+        "SampleAfterValue": "1000003",
+        "UMask": "0x1"
+    },
+    {
+        "BriefDescription": "Counts the number of floating point operations that produce 32 bit single precision results [This event is alias to FP_FLOPS_RETIRED.SP]",
+        "EventCode": "0xc8",
+        "EventName": "FP_FLOPS_RETIRED.FP32",
+        "PEBS": "1",
+        "SampleAfterValue": "1000003",
+        "UMask": "0x2"
+    },
+    {
+        "BriefDescription": "Counts the number of floating point operations that produce 64 bit double precision results [This event is alias to FP_FLOPS_RETIRED.DP]",
+        "EventCode": "0xc8",
+        "EventName": "FP_FLOPS_RETIRED.FP64",
+        "PEBS": "1",
+        "SampleAfterValue": "1000003",
+        "UMask": "0x1"
+    },
+    {
+        "BriefDescription": "This event is deprecated. [This event is alias to FP_FLOPS_RETIRED.FP32]",
+        "Deprecated": "1",
+        "EventCode": "0xc8",
+        "EventName": "FP_FLOPS_RETIRED.SP",
+        "PEBS": "1",
+        "SampleAfterValue": "1000003",
+        "UMask": "0x2"
+    },
+    {
+        "BriefDescription": "Counts the number of floating point operations retired that required microcode assist.",
+        "EventCode": "0xc3",
+        "EventName": "MACHINE_CLEARS.FP_ASSIST",
+        "PublicDescription": "Counts the number of floating point operations retired that required microcode assist, which is not a reflection of the number of FP operations, instructions or uops.",
+        "SampleAfterValue": "20003",
+        "UMask": "0x4"
+    },
+    {
+        "BriefDescription": "Counts the number of floating point divide uops retired (x87 and sse, including x87 sqrt).",
+        "EventCode": "0xc2",
+        "EventName": "UOPS_RETIRED.FPDIV",
+        "PEBS": "1",
+        "SampleAfterValue": "2000003",
+        "UMask": "0x8"
+    }
+]
index be8f1c7e195c0eefbfb7784d93dd0d14f624d4bb..356d36aecc81096523d8963869f1f6dd47ce7f92 100644 (file)
@@ -1,4 +1,20 @@
 [
+    {
+        "BriefDescription": "Counts the total number of BACLEARS due to all branch types including conditional and unconditional jumps, returns, and indirect branches.",
+        "EventCode": "0xe6",
+        "EventName": "BACLEARS.ANY",
+        "PublicDescription": "Counts the total number of BACLEARS, which occur when the Branch Target Buffer (BTB) prediction or lack thereof, was corrected by a later branch predictor in the frontend.  Includes BACLEARS due to all branch types including conditional and unconditional jumps, returns, and indirect branches.",
+        "SampleAfterValue": "200003",
+        "UMask": "0x1"
+    },
+    {
+        "BriefDescription": "Counts the number of instructions retired that were tagged because empty issue slots were seen before the uop due to ITLB miss",
+        "EventCode": "0xc6",
+        "EventName": "FRONTEND_RETIRED.ITLB_MISS",
+        "PEBS": "1",
+        "SampleAfterValue": "1000003",
+        "UMask": "0x10"
+    },
     {
         "BriefDescription": "Counts every time the code stream enters into a new cache line by walking sequential from the previous line or being redirected by a jump.",
         "EventCode": "0x80",
index 79d8af45100c98e54ab8a74eb08b9bb121b4cf04..e0ce2decc805f20ef4ba1bb64365e903bad77d15 100644 (file)
@@ -1,4 +1,70 @@
 [
+    {
+        "BriefDescription": "Counts the number of cycles that the head (oldest load) of the load buffer is stalled due to any number of reasons, including an L1 miss, WCB full, pagewalk, store address block or store data block, on a load that retires.",
+        "EventCode": "0x05",
+        "EventName": "LD_HEAD.ANY_AT_RET",
+        "SampleAfterValue": "1000003",
+        "UMask": "0xff"
+    },
+    {
+        "BriefDescription": "Counts the number of cycles that the head (oldest load) of the load buffer is stalled due to a core bound stall including a store address match, a DTLB miss or a page walk that detains the load from retiring.",
+        "EventCode": "0x05",
+        "EventName": "LD_HEAD.L1_BOUND_AT_RET",
+        "SampleAfterValue": "1000003",
+        "UMask": "0xf4"
+    },
+    {
+        "BriefDescription": "Counts the number of cycles that the head (oldest load) of the load buffer and retirement are both stalled due to a DL1 miss.",
+        "EventCode": "0x05",
+        "EventName": "LD_HEAD.L1_MISS_AT_RET",
+        "SampleAfterValue": "1000003",
+        "UMask": "0x81"
+    },
+    {
+        "BriefDescription": "Counts the number of cycles that the head (oldest load) of the load buffer and retirement are both stalled due to other block cases.",
+        "EventCode": "0x05",
+        "EventName": "LD_HEAD.OTHER_AT_RET",
+        "PublicDescription": "Counts the number of cycles that the head (oldest load) of the load buffer and retirement are both stalled due to other block cases such as pipeline conflicts, fences, etc.",
+        "SampleAfterValue": "1000003",
+        "UMask": "0xc0"
+    },
+    {
+        "BriefDescription": "Counts the number of cycles that the head (oldest load) of the load buffer and retirement are both stalled due to a pagewalk.",
+        "EventCode": "0x05",
+        "EventName": "LD_HEAD.PGWALK_AT_RET",
+        "SampleAfterValue": "1000003",
+        "UMask": "0xa0"
+    },
+    {
+        "BriefDescription": "Counts the number of cycles that the head (oldest load) of the load buffer and retirement are both stalled due to a store address match.",
+        "EventCode": "0x05",
+        "EventName": "LD_HEAD.ST_ADDR_AT_RET",
+        "SampleAfterValue": "1000003",
+        "UMask": "0x84"
+    },
+    {
+        "BriefDescription": "Counts the number of machine clears due to memory ordering caused by a snoop from an external agent. Does not count internally generated machine clears such as those due to memory disambiguation.",
+        "EventCode": "0xc3",
+        "EventName": "MACHINE_CLEARS.MEMORY_ORDERING",
+        "SampleAfterValue": "20003",
+        "UMask": "0x2"
+    },
+    {
+        "BriefDescription": "Counts misaligned loads that are 4K page splits.",
+        "EventCode": "0x13",
+        "EventName": "MISALIGN_MEM_REF.LOAD_PAGE_SPLIT",
+        "PEBS": "1",
+        "SampleAfterValue": "200003",
+        "UMask": "0x2"
+    },
+    {
+        "BriefDescription": "Counts misaligned stores that are 4K page splits.",
+        "EventCode": "0x13",
+        "EventName": "MISALIGN_MEM_REF.STORE_PAGE_SPLIT",
+        "PEBS": "1",
+        "SampleAfterValue": "200003",
+        "UMask": "0x4"
+    },
     {
         "BriefDescription": "Counts demand data reads that were not supplied by the L3 cache.",
         "EventCode": "0xB7",
index 2414f6ff53b053c4feb666662df0b721683919c4..70a9da7e97dfc81b0d006ea071146c9522ae3928 100644 (file)
@@ -1,4 +1,13 @@
 [
+    {
+        "BriefDescription": "This event is deprecated. [This event is alias to MISC_RETIRED.LBR_INSERTS]",
+        "Deprecated": "1",
+        "EventCode": "0xe4",
+        "EventName": "LBR_INSERTS.ANY",
+        "PEBS": "1",
+        "SampleAfterValue": "1000003",
+        "UMask": "0x1"
+    },
     {
         "BriefDescription": "Counts demand data reads that have any type of response.",
         "EventCode": "0xB7",
         "MSRValue": "0x10002",
         "SampleAfterValue": "100003",
         "UMask": "0x1"
+    },
+    {
+        "BriefDescription": "Counts the number of issue slots in a UMWAIT or TPAUSE instruction where no uop issues due to the instruction putting the CPU into the C0.1 activity state.",
+        "EventCode": "0x75",
+        "EventName": "SERIALIZATION.C01_MS_SCB",
+        "SampleAfterValue": "200003",
+        "UMask": "0x4"
     }
 ]
index 41212957ef2180a2a8d3b55f3e668db960d94819..ba9843110f070dc279662e26ffa0300146935fe8 100644 (file)
@@ -1,4 +1,12 @@
 [
+    {
+        "BriefDescription": "Counts the number of cycles when any of the dividers are active.",
+        "CounterMask": "1",
+        "EventCode": "0xcd",
+        "EventName": "ARITH.DIV_ACTIVE",
+        "SampleAfterValue": "1000003",
+        "UMask": "0x3"
+    },
     {
         "BriefDescription": "Counts the total number of branch instructions retired for all branch types.",
         "EventCode": "0xc4",
@@ -7,6 +15,71 @@
         "PublicDescription": "Counts the total number of instructions in which the instruction pointer (IP) of the processor is resteered due to a branch instruction and the branch instruction successfully retires.  All branch type instructions are accounted for.",
         "SampleAfterValue": "200003"
     },
+    {
+        "BriefDescription": "Counts the number of retired JCC (Jump on Conditional Code) branch instructions retired, includes both taken and not taken branches.",
+        "EventCode": "0xc4",
+        "EventName": "BR_INST_RETIRED.COND",
+        "PEBS": "1",
+        "SampleAfterValue": "200003",
+        "UMask": "0x7e"
+    },
+    {
+        "BriefDescription": "Counts the number of taken JCC (Jump on Conditional Code) branch instructions retired.",
+        "EventCode": "0xc4",
+        "EventName": "BR_INST_RETIRED.COND_TAKEN",
+        "PEBS": "1",
+        "SampleAfterValue": "200003",
+        "UMask": "0xfe"
+    },
+    {
+        "BriefDescription": "Counts the number of far branch instructions retired, includes far jump, far call and return, and interrupt call and return.",
+        "EventCode": "0xc4",
+        "EventName": "BR_INST_RETIRED.FAR_BRANCH",
+        "PEBS": "1",
+        "SampleAfterValue": "200003",
+        "UMask": "0xbf"
+    },
+    {
+        "BriefDescription": "Counts the number of near indirect JMP and near indirect CALL branch instructions retired.",
+        "EventCode": "0xc4",
+        "EventName": "BR_INST_RETIRED.INDIRECT",
+        "PEBS": "1",
+        "SampleAfterValue": "200003",
+        "UMask": "0xeb"
+    },
+    {
+        "BriefDescription": "Counts the number of near indirect CALL branch instructions retired.",
+        "EventCode": "0xc4",
+        "EventName": "BR_INST_RETIRED.INDIRECT_CALL",
+        "PEBS": "1",
+        "SampleAfterValue": "200003",
+        "UMask": "0xfb"
+    },
+    {
+        "BriefDescription": "This event is deprecated. Refer to new event BR_INST_RETIRED.INDIRECT_CALL",
+        "Deprecated": "1",
+        "EventCode": "0xc4",
+        "EventName": "BR_INST_RETIRED.IND_CALL",
+        "PEBS": "1",
+        "SampleAfterValue": "200003",
+        "UMask": "0xfb"
+    },
+    {
+        "BriefDescription": "Counts the number of near CALL branch instructions retired.",
+        "EventCode": "0xc4",
+        "EventName": "BR_INST_RETIRED.NEAR_CALL",
+        "PEBS": "1",
+        "SampleAfterValue": "200003",
+        "UMask": "0xf9"
+    },
+    {
+        "BriefDescription": "Counts the number of near RET branch instructions retired.",
+        "EventCode": "0xc4",
+        "EventName": "BR_INST_RETIRED.NEAR_RETURN",
+        "PEBS": "1",
+        "SampleAfterValue": "200003",
+        "UMask": "0xf7"
+    },
     {
         "BriefDescription": "Counts the total number of mispredicted branch instructions retired for all branch types.",
         "EventCode": "0xc5",
         "PublicDescription": "Counts the total number of mispredicted branch instructions retired.  All branch type instructions are accounted for.  Prediction of the branch target address enables the processor to begin executing instructions before the non-speculative execution path is known. The branch prediction unit (BPU) predicts the target address based on the instruction pointer (IP) of the branch and on the execution path through which execution reached this IP.    A branch misprediction occurs when the prediction is wrong, and results in discarding all instructions executed in the speculative path and re-fetching from the correct path.",
         "SampleAfterValue": "200003"
     },
+    {
+        "BriefDescription": "Counts the number of mispredicted JCC (Jump on Conditional Code) branch instructions retired.",
+        "EventCode": "0xc5",
+        "EventName": "BR_MISP_RETIRED.COND",
+        "PEBS": "1",
+        "SampleAfterValue": "200003",
+        "UMask": "0x7e"
+    },
+    {
+        "BriefDescription": "Counts the number of mispredicted taken JCC (Jump on Conditional Code) branch instructions retired.",
+        "EventCode": "0xc5",
+        "EventName": "BR_MISP_RETIRED.COND_TAKEN",
+        "PEBS": "1",
+        "SampleAfterValue": "200003",
+        "UMask": "0xfe"
+    },
+    {
+        "BriefDescription": "Counts the number of mispredicted near indirect JMP and near indirect CALL branch instructions retired.",
+        "EventCode": "0xc5",
+        "EventName": "BR_MISP_RETIRED.INDIRECT",
+        "PEBS": "1",
+        "SampleAfterValue": "200003",
+        "UMask": "0xeb"
+    },
+    {
+        "BriefDescription": "Counts the number of mispredicted near indirect CALL branch instructions retired.",
+        "EventCode": "0xc5",
+        "EventName": "BR_MISP_RETIRED.INDIRECT_CALL",
+        "PEBS": "1",
+        "SampleAfterValue": "200003",
+        "UMask": "0xfb"
+    },
+    {
+        "BriefDescription": "Counts the number of mispredicted near taken branch instructions retired.",
+        "EventCode": "0xc5",
+        "EventName": "BR_MISP_RETIRED.NEAR_TAKEN",
+        "PEBS": "1",
+        "SampleAfterValue": "200003",
+        "UMask": "0x80"
+    },
+    {
+        "BriefDescription": "Counts the number of mispredicted near RET branch instructions retired.",
+        "EventCode": "0xc5",
+        "EventName": "BR_MISP_RETIRED.RETURN",
+        "PEBS": "1",
+        "SampleAfterValue": "200003",
+        "UMask": "0xf7"
+    },
     {
         "BriefDescription": "Fixed Counter: Counts the number of unhalted core clock cycles",
         "EventName": "CPU_CLK_UNHALTED.CORE",
         "PEBS": "1",
         "SampleAfterValue": "2000003"
     },
+    {
+        "BriefDescription": "Counts the number of retired loads that are blocked because it initially appears to be store forward blocked, but subsequently is shown not to be blocked based on 4K alias check.",
+        "EventCode": "0x03",
+        "EventName": "LD_BLOCKS.ADDRESS_ALIAS",
+        "PEBS": "1",
+        "SampleAfterValue": "1000003",
+        "UMask": "0x4"
+    },
+    {
+        "BriefDescription": "Counts the number of retired loads that are blocked because its address exactly matches an older store whose data is not ready.",
+        "EventCode": "0x03",
+        "EventName": "LD_BLOCKS.DATA_UNKNOWN",
+        "PEBS": "1",
+        "SampleAfterValue": "1000003",
+        "UMask": "0x1"
+    },
+    {
+        "BriefDescription": "Counts the number of retired loads that are blocked because its address partially overlapped with an older store.",
+        "EventCode": "0x03",
+        "EventName": "LD_BLOCKS.STORE_FORWARD",
+        "PEBS": "1",
+        "SampleAfterValue": "1000003",
+        "UMask": "0x2"
+    },
+    {
+        "BriefDescription": "Counts the number of machine clears due to memory ordering in which an internal load passes an older store within the same CPU.",
+        "EventCode": "0xc3",
+        "EventName": "MACHINE_CLEARS.DISAMBIGUATION",
+        "SampleAfterValue": "20003",
+        "UMask": "0x8"
+    },
+    {
+        "BriefDescription": "Counts the number of machine clears due to a page fault.  Counts both I-Side and D-Side (Loads/Stores) page faults.  A page fault occurs when either the page is not present, or an access violation occurs.",
+        "EventCode": "0xc3",
+        "EventName": "MACHINE_CLEARS.PAGE_FAULT",
+        "SampleAfterValue": "20003",
+        "UMask": "0x20"
+    },
+    {
+        "BriefDescription": "Counts the number of machine clears that flush the pipeline and restart the machine with the use of microcode due to SMC, MEMORY_ORDERING, FP_ASSISTS, PAGE_FAULT, DISAMBIGUATION, and FPC_VIRTUAL_TRAP.",
+        "EventCode": "0xc3",
+        "EventName": "MACHINE_CLEARS.SLOW",
+        "SampleAfterValue": "20003",
+        "UMask": "0x6f"
+    },
+    {
+        "BriefDescription": "Counts the number of machine clears due to program modifying data (self modifying code) within 1K of a recently fetched code page.",
+        "EventCode": "0xc3",
+        "EventName": "MACHINE_CLEARS.SMC",
+        "SampleAfterValue": "20003",
+        "UMask": "0x1"
+    },
+    {
+        "BriefDescription": "Counts the number of Last Branch Record (LBR) entries. Requires LBRs to be enabled and configured in IA32_LBR_CTL. [This event is alias to LBR_INSERTS.ANY]",
+        "EventCode": "0xe4",
+        "EventName": "MISC_RETIRED.LBR_INSERTS",
+        "PEBS": "1",
+        "SampleAfterValue": "1000003",
+        "UMask": "0x1"
+    },
     {
         "BriefDescription": "Counts the number of issue slots that were not consumed by the backend because allocation is stalled due to a mispredicted jump or a machine clear.",
         "EventCode": "0x73",
         "PublicDescription": "Counts the total number of issue slots that were not consumed by the backend because allocation is stalled due to a mispredicted jump or a machine clear. Only issue slots wasted due to fast nukes such as memory ordering nukes are counted. Other nukes are not accounted for. Counts all issue slots blocked during this recovery window, including relevant microcode flows, and while uops are not yet available in the instruction queue (IQ) or until an FE_BOUND event occurs besides OTHER and CISC. Also includes the issue slots that were consumed by the backend but were thrown away because they were younger than the mispredict or machine clear.",
         "SampleAfterValue": "1000003"
     },
+    {
+        "BriefDescription": "Counts the number of issue slots every cycle that were not consumed by the backend due to Fast Nukes such as  Memory Ordering Machine clears and MRN nukes",
+        "EventCode": "0x73",
+        "EventName": "TOPDOWN_BAD_SPECULATION.FASTNUKE",
+        "SampleAfterValue": "1000003",
+        "UMask": "0x2"
+    },
+    {
+        "BriefDescription": "Counts the total number of issue slots that were not consumed by the backend because allocation is stalled due to a machine clear (nuke) of any kind including memory ordering and memory disambiguation.",
+        "EventCode": "0x73",
+        "EventName": "TOPDOWN_BAD_SPECULATION.MACHINE_CLEARS",
+        "SampleAfterValue": "1000003",
+        "UMask": "0x3"
+    },
+    {
+        "BriefDescription": "Counts the number of issue slots every cycle that were not consumed by the backend due to Branch Mispredict",
+        "EventCode": "0x73",
+        "EventName": "TOPDOWN_BAD_SPECULATION.MISPREDICT",
+        "SampleAfterValue": "1000003",
+        "UMask": "0x4"
+    },
+    {
+        "BriefDescription": "Counts the number of issue slots every cycle that were not consumed by the backend due to a machine clear (nuke).",
+        "EventCode": "0x73",
+        "EventName": "TOPDOWN_BAD_SPECULATION.NUKE",
+        "SampleAfterValue": "1000003",
+        "UMask": "0x1"
+    },
     {
         "BriefDescription": "Counts the number of retirement slots not consumed due to backend stalls",
         "EventCode": "0x74",
         "EventName": "TOPDOWN_BE_BOUND.ALL",
         "SampleAfterValue": "1000003"
     },
+    {
+        "BriefDescription": "Counts the number of issue slots every cycle that were not consumed by the backend due to due to certain allocation restrictions",
+        "EventCode": "0x74",
+        "EventName": "TOPDOWN_BE_BOUND.ALLOC_RESTRICTIONS",
+        "SampleAfterValue": "1000003",
+        "UMask": "0x1"
+    },
+    {
+        "BriefDescription": "Counts the number of issue slots every cycle that were not consumed by the backend due to memory reservation stall (scheduler not being able to accept another uop).  This could be caused by RSV full or load/store buffer block.",
+        "EventCode": "0x74",
+        "EventName": "TOPDOWN_BE_BOUND.MEM_SCHEDULER",
+        "SampleAfterValue": "1000003",
+        "UMask": "0x2"
+    },
+    {
+        "BriefDescription": "Counts the number of issue slots every cycle that were not consumed by the backend due to IEC and FPC RAT stalls - which can be due to the FIQ and IEC reservation station stall (integer, FP and SIMD scheduler not being able to accept another uop. )",
+        "EventCode": "0x74",
+        "EventName": "TOPDOWN_BE_BOUND.NON_MEM_SCHEDULER",
+        "SampleAfterValue": "1000003",
+        "UMask": "0x8"
+    },
+    {
+        "BriefDescription": "Counts the number of issue slots every cycle that were not consumed by the backend due to mrbl stall.  A 'marble' refers to a physical register file entry, also known as the physical destination (PDST).",
+        "EventCode": "0x74",
+        "EventName": "TOPDOWN_BE_BOUND.REGISTER",
+        "SampleAfterValue": "1000003",
+        "UMask": "0x20"
+    },
+    {
+        "BriefDescription": "Counts the number of issue slots every cycle that were not consumed by the backend due to ROB full",
+        "EventCode": "0x74",
+        "EventName": "TOPDOWN_BE_BOUND.REORDER_BUFFER",
+        "SampleAfterValue": "1000003",
+        "UMask": "0x40"
+    },
+    {
+        "BriefDescription": "Counts the number of issue slots every cycle that were not consumed by the backend due to iq/jeu scoreboards or ms scb",
+        "EventCode": "0x74",
+        "EventName": "TOPDOWN_BE_BOUND.SERIALIZATION",
+        "SampleAfterValue": "1000003",
+        "UMask": "0x10"
+    },
     {
         "BriefDescription": "Counts the number of retirement slots not consumed due to front end stalls",
         "EventCode": "0x71",
         "EventName": "TOPDOWN_FE_BOUND.ALL",
         "SampleAfterValue": "1000003"
     },
+    {
+        "BriefDescription": "Counts the number of issue slots every cycle that were not delivered by the frontend due to BAClear",
+        "EventCode": "0x71",
+        "EventName": "TOPDOWN_FE_BOUND.BRANCH_DETECT",
+        "SampleAfterValue": "1000003",
+        "UMask": "0x2"
+    },
+    {
+        "BriefDescription": "Counts the number of issue slots every cycle that were not delivered by the frontend due to BTClear",
+        "EventCode": "0x71",
+        "EventName": "TOPDOWN_FE_BOUND.BRANCH_RESTEER",
+        "SampleAfterValue": "1000003",
+        "UMask": "0x40"
+    },
+    {
+        "BriefDescription": "Counts the number of issue slots every cycle that were not delivered by the frontend due to ms",
+        "EventCode": "0x71",
+        "EventName": "TOPDOWN_FE_BOUND.CISC",
+        "SampleAfterValue": "1000003",
+        "UMask": "0x1"
+    },
+    {
+        "BriefDescription": "Counts the number of issue slots every cycle that were not delivered by the frontend due to decode stall",
+        "EventCode": "0x71",
+        "EventName": "TOPDOWN_FE_BOUND.DECODE",
+        "SampleAfterValue": "1000003",
+        "UMask": "0x8"
+    },
+    {
+        "BriefDescription": "Counts the number of issue slots every cycle that were not delivered by the frontend due to frontend bandwidth restrictions due to decode, predecode, cisc, and other limitations.",
+        "EventCode": "0x71",
+        "EventName": "TOPDOWN_FE_BOUND.FRONTEND_BANDWIDTH",
+        "SampleAfterValue": "1000003",
+        "UMask": "0x8d"
+    },
+    {
+        "BriefDescription": "Counts the number of issue slots every cycle that were not delivered by the frontend due to latency related stalls including BACLEARs, BTCLEARs, ITLB misses, and ICache misses.",
+        "EventCode": "0x71",
+        "EventName": "TOPDOWN_FE_BOUND.FRONTEND_LATENCY",
+        "SampleAfterValue": "1000003",
+        "UMask": "0x72"
+    },
+    {
+        "BriefDescription": "This event is deprecated. [This event is alias to TOPDOWN_FE_BOUND.ITLB_MISS]",
+        "Deprecated": "1",
+        "EventCode": "0x71",
+        "EventName": "TOPDOWN_FE_BOUND.ITLB",
+        "SampleAfterValue": "1000003",
+        "UMask": "0x10"
+    },
+    {
+        "BriefDescription": "Counts the number of issue slots every cycle that were not delivered by the frontend due to itlb miss [This event is alias to TOPDOWN_FE_BOUND.ITLB]",
+        "EventCode": "0x71",
+        "EventName": "TOPDOWN_FE_BOUND.ITLB_MISS",
+        "SampleAfterValue": "1000003",
+        "UMask": "0x10"
+    },
+    {
+        "BriefDescription": "Counts the number of issue slots every cycle that were not delivered by the frontend that do not categorize into any other common frontend stall",
+        "EventCode": "0x71",
+        "EventName": "TOPDOWN_FE_BOUND.OTHER",
+        "SampleAfterValue": "1000003",
+        "UMask": "0x80"
+    },
+    {
+        "BriefDescription": "Counts the number of issue slots every cycle that were not delivered by the frontend due to predecode wrong",
+        "EventCode": "0x71",
+        "EventName": "TOPDOWN_FE_BOUND.PREDECODE",
+        "SampleAfterValue": "1000003",
+        "UMask": "0x4"
+    },
     {
         "BriefDescription": "Counts the number of consumed retirement slots.  Similar to UOPS_RETIRED.ALL",
         "EventCode": "0x72",
         "EventName": "TOPDOWN_RETIRING.ALL",
         "PEBS": "1",
         "SampleAfterValue": "1000003"
+    },
+    {
+        "BriefDescription": "Counts the number of uops issued by the front end every cycle.",
+        "EventCode": "0x0e",
+        "EventName": "UOPS_ISSUED.ANY",
+        "PublicDescription": "Counts the number of uops issued by the front end every cycle. When 4-uops are requested and only 2-uops are delivered, the event counts 2.  Uops_issued correlates to the number of ROB entries.  If uop takes 2 ROB slots it counts as 2 uops_issued.",
+        "SampleAfterValue": "1000003"
+    },
+    {
+        "BriefDescription": "Counts the total number of uops retired.",
+        "EventCode": "0xc2",
+        "EventName": "UOPS_RETIRED.ALL",
+        "PEBS": "1",
+        "SampleAfterValue": "2000003"
+    },
+    {
+        "BriefDescription": "Counts the number of integer divide uops retired.",
+        "EventCode": "0xc2",
+        "EventName": "UOPS_RETIRED.IDIV",
+        "PEBS": "1",
+        "SampleAfterValue": "2000003",
+        "UMask": "0x10"
+    },
+    {
+        "BriefDescription": "Counts the number of uops that are from the complex flows issued by the micro-sequencer (MS).  This includes uops from flows due to complex instructions, faults, assists, and inserted flows.",
+        "EventCode": "0xc2",
+        "EventName": "UOPS_RETIRED.MS",
+        "PEBS": "1",
+        "SampleAfterValue": "2000003",
+        "UMask": "0x1"
+    },
+    {
+        "BriefDescription": "Counts the number of x87 uops retired, includes those in ms flows",
+        "EventCode": "0xc2",
+        "EventName": "UOPS_RETIRED.X87",
+        "PEBS": "1",
+        "SampleAfterValue": "2000003",
+        "UMask": "0x2"
     }
 ]
diff --git a/tools/perf/pmu-events/arch/x86/sierraforest/uncore-cache.json b/tools/perf/pmu-events/arch/x86/sierraforest/uncore-cache.json
new file mode 100644 (file)
index 0000000..a3aafbb
--- /dev/null
@@ -0,0 +1,2853 @@
+[
+    {
+        "BriefDescription": "Clockticks for CMS units attached to CHA",
+        "EventCode": "0x01",
+        "EventName": "UNC_CHACMS_CLOCKTICKS",
+        "PerPkg": "1",
+        "PortMask": "0x000",
+        "PublicDescription": "UNC_CHACMS_CLOCKTICKS",
+        "Unit": "CHACMS"
+    },
+    {
+        "BriefDescription": "Number of CHA clock cycles while the event is enabled",
+        "EventCode": "0x01",
+        "EventName": "UNC_CHA_CLOCKTICKS",
+        "PerPkg": "1",
+        "PublicDescription": "Clockticks of the uncore caching and home agent (CHA)",
+        "Unit": "CHA"
+    },
+    {
+        "BriefDescription": "Counts transactions that looked into the multi-socket cacheline Directory state, and therefore did not send a snoop because the Directory indicated it was not needed.",
+        "EventCode": "0x53",
+        "EventName": "UNC_CHA_DIR_LOOKUP.NO_SNP",
+        "PerPkg": "1",
+        "UMask": "0x2",
+        "Unit": "CHA"
+    },
+    {
+        "BriefDescription": "Counts  transactions that looked into the multi-socket cacheline Directory state, and sent one or more snoops, because the Directory indicated it was needed.",
+        "EventCode": "0x53",
+        "EventName": "UNC_CHA_DIR_LOOKUP.SNP",
+        "PerPkg": "1",
+        "UMask": "0x1",
+        "Unit": "CHA"
+    },
+    {
+        "BriefDescription": "Counts only multi-socket cacheline Directory state updates memory writes issued from the HA pipe. This does not include memory write requests which are for I (Invalid) or E (Exclusive) cachelines.",
+        "EventCode": "0x54",
+        "EventName": "UNC_CHA_DIR_UPDATE.HA",
+        "PerPkg": "1",
+        "UMask": "0x1",
+        "Unit": "CHA"
+    },
+    {
+        "BriefDescription": "Counts only multi-socket cacheline Directory state updates due to memory writes issued from the TOR pipe which are the result of remote transaction hitting the SF/LLC and returning data Core2Core. This does not include memory write requests which are for I (Invalid) or E (Exclusive) cachelines.",
+        "EventCode": "0x54",
+        "EventName": "UNC_CHA_DIR_UPDATE.TOR",
+        "PerPkg": "1",
+        "UMask": "0x2",
+        "Unit": "CHA"
+    },
+    {
+        "BriefDescription": "Distress signal assertion for dynamic prefetch throttle (DPT).  Threshold for distress signal assertion reached in TOR or IRQ (immediate cause for triggering).",
+        "EventCode": "0x59",
+        "EventName": "UNC_CHA_DISTRESS_ASSERTED.DPT_ANY",
+        "PerPkg": "1",
+        "PortMask": "0x000",
+        "UMask": "0x3",
+        "Unit": "CHA"
+    },
+    {
+        "BriefDescription": "Distress signal assertion for dynamic prefetch throttle (DPT).  Threshold for distress signal assertion reached in IRQ (immediate cause for triggering).",
+        "EventCode": "0x59",
+        "EventName": "UNC_CHA_DISTRESS_ASSERTED.DPT_IRQ",
+        "PerPkg": "1",
+        "UMask": "0x1",
+        "Unit": "CHA"
+    },
+    {
+        "BriefDescription": "Distress signal assertion for dynamic prefetch throttle (DPT).  Threshold for distress signal assertion reached in TOR (immediate cause for triggering).",
+        "EventCode": "0x59",
+        "EventName": "UNC_CHA_DISTRESS_ASSERTED.DPT_TOR",
+        "PerPkg": "1",
+        "UMask": "0x2",
+        "Unit": "CHA"
+    },
+    {
+        "BriefDescription": "Counts when a normal (Non-Isochronous) full line write is issued from the CHA to the any of the memory controller channels.",
+        "EventCode": "0x5b",
+        "EventName": "UNC_CHA_IMC_WRITES_COUNT.FULL",
+        "PerPkg": "1",
+        "UMask": "0x1",
+        "Unit": "CHA"
+    },
+    {
+        "BriefDescription": "CHA to iMC Full Line Writes Issued : ISOCH Full Line : Counts the total number of full line writes issued from the HA into the memory controller.",
+        "EventCode": "0x5b",
+        "EventName": "UNC_CHA_IMC_WRITES_COUNT.FULL_PRIORITY",
+        "PerPkg": "1",
+        "UMask": "0x4",
+        "Unit": "CHA"
+    },
+    {
+        "BriefDescription": "CHA to iMC Full Line Writes Issued : Partial Non-ISOCH : Counts the total number of full line writes issued from the HA into the memory controller.",
+        "EventCode": "0x5b",
+        "EventName": "UNC_CHA_IMC_WRITES_COUNT.PARTIAL",
+        "PerPkg": "1",
+        "UMask": "0x2",
+        "Unit": "CHA"
+    },
+    {
+        "BriefDescription": "CHA to iMC Full Line Writes Issued : ISOCH Partial : Counts the total number of full line writes issued from the HA into the memory controller.",
+        "EventCode": "0x5b",
+        "EventName": "UNC_CHA_IMC_WRITES_COUNT.PARTIAL_PRIORITY",
+        "PerPkg": "1",
+        "UMask": "0x8",
+        "Unit": "CHA"
+    },
+    {
+        "BriefDescription": "Cache Lookups: All Requests to Remotely Homed Memory",
+        "EventCode": "0x34",
+        "EventName": "UNC_CHA_LLC_LOOKUP.ALL_REMOTE",
+        "PerPkg": "1",
+        "PublicDescription": "Cache Lookups : All transactions from Remote Agents",
+        "UMask": "0x17e0ff",
+        "Unit": "CHA"
+    },
+    {
+        "BriefDescription": "Cache Lookups: CRd Requests",
+        "EventCode": "0x34",
+        "EventName": "UNC_CHA_LLC_LOOKUP.CODE",
+        "PerPkg": "1",
+        "PublicDescription": "Cache Lookups : CRd Requests",
+        "UMask": "0x1bd0ff",
+        "Unit": "CHA"
+    },
+    {
+        "BriefDescription": "Cache Lookups: Read Requests and Read Prefetches",
+        "EventCode": "0x34",
+        "EventName": "UNC_CHA_LLC_LOOKUP.DATA_RD",
+        "PerPkg": "1",
+        "PublicDescription": "Counts the number of times the LLC was accessed - this includes code, data, prefetches and hints coming from L2.  This has numerous filters available.  Note the non-standard filtering equation.  This event will count requests that lookup the cache multiple times with multiple increments.  One must ALWAYS set umask bit 0 and select a state or states to match.  Otherwise, the event will count nothing.   CHAFilter0[24:21,17] bits correspond to [FMESI] state. Read transactions",
+        "UMask": "0x1bc1ff",
+        "Unit": "CHA"
+    },
+    {
+        "BriefDescription": "Cache Lookups: Read Requests, Read Prefetches, and Snoops",
+        "EventCode": "0x34",
+        "EventName": "UNC_CHA_LLC_LOOKUP.DATA_READ_ALL",
+        "PerPkg": "1",
+        "PublicDescription": "Cache Lookups : Data Reads",
+        "UMask": "0x1fc1ff",
+        "Unit": "CHA"
+    },
+    {
+        "BriefDescription": "Cache Lookups: Read Requests to Locally Homed Memory",
+        "EventCode": "0x34",
+        "EventName": "UNC_CHA_LLC_LOOKUP.DATA_READ_LOCAL",
+        "PerPkg": "1",
+        "PublicDescription": "Cache Lookups : Demand Data Reads, Core and LLC prefetches",
+        "UMask": "0x841ff",
+        "Unit": "CHA"
+    },
+    {
+        "BriefDescription": "Cache Lookups: Read Requests, Read Prefetches, and Snoops which miss the Cache",
+        "EventCode": "0x34",
+        "EventName": "UNC_CHA_LLC_LOOKUP.DATA_READ_MISS",
+        "PerPkg": "1",
+        "PublicDescription": "Cache Lookups : Data Read Misses",
+        "UMask": "0x1fc101",
+        "Unit": "CHA"
+    },
+    {
+        "BriefDescription": "Cache Lookups: All Requests to Locally Homed Memory",
+        "EventCode": "0x34",
+        "EventName": "UNC_CHA_LLC_LOOKUP.LOCALLY_HOMED_ADDRESS",
+        "PerPkg": "1",
+        "PublicDescription": "Cache Lookups : Transactions homed locally",
+        "UMask": "0xbdfff",
+        "Unit": "CHA"
+    },
+    {
+        "BriefDescription": "Cache Lookups: Code Read Requests and Code Read Prefetches to Locally Homed Memory",
+        "EventCode": "0x34",
+        "EventName": "UNC_CHA_LLC_LOOKUP.LOCAL_CODE",
+        "PerPkg": "1",
+        "PublicDescription": "Cache Lookups : CRd Requests",
+        "UMask": "0x19d0ff",
+        "Unit": "CHA"
+    },
+    {
+        "BriefDescription": "Cache Lookups: Read Requests and Read Prefetches to Locally Homed Memory",
+        "EventCode": "0x34",
+        "EventName": "UNC_CHA_LLC_LOOKUP.LOCAL_DATA_RD",
+        "PerPkg": "1",
+        "PublicDescription": "Counts the number of times the LLC was accessed - this includes code, data, prefetches and hints coming from L2.  This has numerous filters available.  Note the non-standard filtering equation.  This event will count requests that lookup the cache multiple times with multiple increments.  One must ALWAYS set umask bit 0 and select a state or states to match.  Otherwise, the event will count nothing.   CHAFilter0[24:21,17] bits correspond to [FMESI] state. Read transactions",
+        "UMask": "0x19c1ff",
+        "Unit": "CHA"
+    },
+    {
+        "BriefDescription": "Cache Lookups: Code Read Requests to Locally Homed Memory",
+        "EventCode": "0x34",
+        "EventName": "UNC_CHA_LLC_LOOKUP.LOCAL_DMND_CODE",
+        "PerPkg": "1",
+        "PublicDescription": "Cache Lookups : CRd Requests",
+        "UMask": "0x1850ff",
+        "Unit": "CHA"
+    },
+    {
+        "BriefDescription": "Cache Lookups: Read Requests to Locally Homed Memory",
+        "EventCode": "0x34",
+        "EventName": "UNC_CHA_LLC_LOOKUP.LOCAL_DMND_DATA_RD",
+        "PerPkg": "1",
+        "PublicDescription": "Counts the number of times the LLC was accessed - this includes code, data, prefetches and hints coming from L2.  This has numerous filters available.  Note the non-standard filtering equation.  This event will count requests that lookup the cache multiple times with multiple increments.  One must ALWAYS set umask bit 0 and select a state or states to match.  Otherwise, the event will count nothing.   CHAFilter0[24:21,17] bits correspond to [FMESI] state. Read transactions",
+        "UMask": "0x1841ff",
+        "Unit": "CHA"
+    },
+    {
+        "BriefDescription": "Cache Lookups: RFO Requests to Locally Homed Memory",
+        "EventCode": "0x34",
+        "EventName": "UNC_CHA_LLC_LOOKUP.LOCAL_DMND_RFO",
+        "PerPkg": "1",
+        "PublicDescription": "Cache Lookups : RFO Requests",
+        "UMask": "0x1848ff",
+        "Unit": "CHA"
+    },
+    {
+        "BriefDescription": "Cache Lookups: LLC Prefetch Requests to Locally Homed Memory",
+        "EventCode": "0x34",
+        "EventName": "UNC_CHA_LLC_LOOKUP.LOCAL_LLC_PF",
+        "PerPkg": "1",
+        "PublicDescription": "Counts the number of times the LLC was accessed - this includes code, data, prefetches and hints coming from L2.  This has numerous filters available.  Note the non-standard filtering equation.  This event will count requests that lookup the cache multiple times with multiple increments.  One must ALWAYS set umask bit 0 and select a state or states to match.  Otherwise, the event will count nothing.   CHAFilter0[24:21,17] bits correspond to [FMESI] state. Read transactions",
+        "UMask": "0x189dff",
+        "Unit": "CHA"
+    },
+    {
+        "BriefDescription": "Cache Lookups: All Prefetches to Locally Homed Memory",
+        "EventCode": "0x34",
+        "EventName": "UNC_CHA_LLC_LOOKUP.LOCAL_PF",
+        "PerPkg": "1",
+        "PublicDescription": "Counts the number of times the LLC was accessed - this includes code, data, prefetches and hints coming from L2.  This has numerous filters available.  Note the non-standard filtering equation.  This event will count requests that lookup the cache multiple times with multiple increments.  One must ALWAYS set umask bit 0 and select a state or states to match.  Otherwise, the event will count nothing.   CHAFilter0[24:21,17] bits correspond to [FMESI] state. Read transactions",
+        "UMask": "0x199dff",
+        "Unit": "CHA"
+    },
+    {
+        "BriefDescription": "Cache Lookups: Code Prefetches to Locally Homed Memory",
+        "EventCode": "0x34",
+        "EventName": "UNC_CHA_LLC_LOOKUP.LOCAL_PF_CODE",
+        "PerPkg": "1",
+        "PublicDescription": "Cache Lookups : CRd Requests",
+        "UMask": "0x1910ff",
+        "Unit": "CHA"
+    },
+    {
+        "BriefDescription": "Cache Lookups: Read Prefetches to Locally Homed Memory",
+        "EventCode": "0x34",
+        "EventName": "UNC_CHA_LLC_LOOKUP.LOCAL_PF_DATA_RD",
+        "PerPkg": "1",
+        "PublicDescription": "Counts the number of times the LLC was accessed - this includes code, data, prefetches and hints coming from L2.  This has numerous filters available.  Note the non-standard filtering equation.  This event will count requests that lookup the cache multiple times with multiple increments.  One must ALWAYS set umask bit 0 and select a state or states to match.  Otherwise, the event will count nothing.   CHAFilter0[24:21,17] bits correspond to [FMESI] state. Read transactions",
+        "UMask": "0x1981ff",
+        "Unit": "CHA"
+    },
+    {
+        "BriefDescription": "Cache Lookups: RFO Prefetches to Locally Homed Memory",
+        "EventCode": "0x34",
+        "EventName": "UNC_CHA_LLC_LOOKUP.LOCAL_PF_RFO",
+        "PerPkg": "1",
+        "PublicDescription": "Cache Lookups : RFO Requests",
+        "UMask": "0x1908ff",
+        "Unit": "CHA"
+    },
+    {
+        "BriefDescription": "Cache Lookups: RFO Requests and RFO Prefetches to Locally Homed Memory",
+        "EventCode": "0x34",
+        "EventName": "UNC_CHA_LLC_LOOKUP.LOCAL_RFO",
+        "PerPkg": "1",
+        "PublicDescription": "Cache Lookups : RFO Requests",
+        "UMask": "0x19c8ff",
+        "Unit": "CHA"
+    },
+    {
+        "BriefDescription": "Cache Lookups: All Requests to Remotely Homed Memory",
+        "EventCode": "0x34",
+        "EventName": "UNC_CHA_LLC_LOOKUP.REMOTELY_HOMED_ADDRESS",
+        "PerPkg": "1",
+        "PublicDescription": "Cache Lookups : Transactions homed remotely : Counts the number of times the LLC was accessed - this includes code, data, prefetches and hints coming from L2.  This has numerous filters available.  Note the non-standard filtering equation.  This event will count requests that lookup the cache multiple times with multiple increments.  One must ALWAYS set umask bit 0 and select a state or states to match.  Otherwise, the event will count nothing. : Transaction whose address resides in a remote MC",
+        "UMask": "0x15dfff",
+        "Unit": "CHA"
+    },
+    {
+        "BriefDescription": "Cache Lookups: Code Read/Prefetch Requests from a Remote Socket",
+        "EventCode": "0x34",
+        "EventName": "UNC_CHA_LLC_LOOKUP.REMOTE_CODE",
+        "PerPkg": "1",
+        "PublicDescription": "Cache Lookups : CRd Requests",
+        "UMask": "0x1a10ff",
+        "Unit": "CHA"
+    },
+    {
+        "BriefDescription": "Cache Lookups: Data Read/Prefetch Requests from a Remote Socket",
+        "EventCode": "0x34",
+        "EventName": "UNC_CHA_LLC_LOOKUP.REMOTE_DATA_RD",
+        "PerPkg": "1",
+        "PublicDescription": "Counts the number of times the LLC was accessed - this includes code, data, prefetches and hints coming from L2.  This has numerous filters available.  Note the non-standard filtering equation.  This event will count requests that lookup the cache multiple times with multiple increments.  One must ALWAYS set umask bit 0 and select a state or states to match.  Otherwise, the event will count nothing.   CHAFilter0[24:21,17] bits correspond to [FMESI] state. Read transactions",
+        "UMask": "0x1a01ff",
+        "Unit": "CHA"
+    },
+    {
+        "BriefDescription": "Cache Lookups: RFO Requests/Prefetches from a Remote Socket",
+        "EventCode": "0x34",
+        "EventName": "UNC_CHA_LLC_LOOKUP.REMOTE_RFO",
+        "PerPkg": "1",
+        "PublicDescription": "Cache Lookups : RFO Requests",
+        "UMask": "0x1a08ff",
+        "Unit": "CHA"
+    },
+    {
+        "BriefDescription": "Cache Lookups: Snoop Requests from a Remote Socket",
+        "EventCode": "0x34",
+        "EventName": "UNC_CHA_LLC_LOOKUP.REMOTE_SNP",
+        "PerPkg": "1",
+        "PublicDescription": "Counts the number of times the LLC was accessed",
+        "UMask": "0x1c19ff",
+        "Unit": "CHA"
+    },
+    {
+        "BriefDescription": "Cache Lookups: All RFO and RFO Prefetches",
+        "EventCode": "0x34",
+        "EventName": "UNC_CHA_LLC_LOOKUP.RFO",
+        "PerPkg": "1",
+        "PublicDescription": "Cache Lookups : All RFOs - Demand and Prefetches",
+        "UMask": "0x1bc8ff",
+        "Unit": "CHA"
+    },
+    {
+        "BriefDescription": "Cache Lookups: RFO Requests and RFO Prefetches to Locally Homed Memory",
+        "EventCode": "0x34",
+        "EventName": "UNC_CHA_LLC_LOOKUP.RFO_LOCAL",
+        "PerPkg": "1",
+        "PublicDescription": "Cache Lookups : Locally HOMed RFOs - Demand and Prefetches",
+        "UMask": "0x9c8ff",
+        "Unit": "CHA"
+    },
+    {
+        "BriefDescription": "Cache Lookups: Writes to Locally Homed Memory (includes writebacks from L1/L2)",
+        "EventCode": "0x34",
+        "EventName": "UNC_CHA_LLC_LOOKUP.WRITE_LOCAL",
+        "PerPkg": "1",
+        "PublicDescription": "Cache Lookups : Writes",
+        "UMask": "0x842ff",
+        "Unit": "CHA"
+    },
+    {
+        "BriefDescription": "Cache Lookups: Writes to Remotely Homed Memory (includes writebacks from L1/L2)",
+        "EventCode": "0x34",
+        "EventName": "UNC_CHA_LLC_LOOKUP.WRITE_REMOTE",
+        "PerPkg": "1",
+        "PublicDescription": "Cache Lookups : Remote Writes",
+        "UMask": "0x17c2ff",
+        "Unit": "CHA"
+    },
+    {
+        "BriefDescription": "Counts the number of lines that were victimized on a fill.  This can be filtered by the state that the line was in.",
+        "EventCode": "0x37",
+        "EventName": "UNC_CHA_LLC_VICTIMS.ALL",
+        "PerPkg": "1",
+        "PublicDescription": "Lines Victimized : All Lines Victimized",
+        "UMask": "0xf",
+        "Unit": "CHA"
+    },
+    {
+        "BriefDescription": "Lines Victimized : IA traffic : Counts the number of lines that were victimized on a fill.  This can be filtered by the state that the line was in.",
+        "EventCode": "0x37",
+        "EventName": "UNC_CHA_LLC_VICTIMS.IA",
+        "PerPkg": "1",
+        "UMask": "0x20",
+        "Unit": "CHA"
+    },
+    {
+        "BriefDescription": "Lines Victimized : IO traffic : Counts the number of lines that were victimized on a fill.  This can be filtered by the state that the line was in.",
+        "EventCode": "0x37",
+        "EventName": "UNC_CHA_LLC_VICTIMS.IO",
+        "PerPkg": "1",
+        "UMask": "0x10",
+        "Unit": "CHA"
+    },
+    {
+        "BriefDescription": "Counts the number of lines that were victimized on a fill.  This can be filtered by the state that the line was in.",
+        "EventCode": "0x37",
+        "EventName": "UNC_CHA_LLC_VICTIMS.LOCAL_ALL",
+        "PerPkg": "1",
+        "PublicDescription": "Lines Victimized : Local - All Lines",
+        "UMask": "0x200f",
+        "Unit": "CHA"
+    },
+    {
+        "BriefDescription": "Lines Victimized : Counts the number of lines that were victimized on a fill.  This can be filtered by the state that the line was in.",
+        "EventCode": "0x37",
+        "EventName": "UNC_CHA_LLC_VICTIMS.LOCAL_E",
+        "PerPkg": "1",
+        "PublicDescription": "Lines Victimized : Local - Lines in E State",
+        "UMask": "0x2002",
+        "Unit": "CHA"
+    },
+    {
+        "BriefDescription": "Lines Victimized : Counts the number of lines that were victimized on a fill.  This can be filtered by the state that the line was in.",
+        "EventCode": "0x37",
+        "EventName": "UNC_CHA_LLC_VICTIMS.LOCAL_F",
+        "PerPkg": "1",
+        "PublicDescription": "Lines Victimized : Local - Lines in F State",
+        "UMask": "0x2008",
+        "Unit": "CHA"
+    },
+    {
+        "BriefDescription": "Lines Victimized : Counts the number of lines that were victimized on a fill.  This can be filtered by the state that the line was in.",
+        "EventCode": "0x37",
+        "EventName": "UNC_CHA_LLC_VICTIMS.LOCAL_M",
+        "PerPkg": "1",
+        "PublicDescription": "Lines Victimized : Local - Lines in M State",
+        "UMask": "0x2001",
+        "Unit": "CHA"
+    },
+    {
+        "BriefDescription": "Lines Victimized : Counts the number of lines that were victimized on a fill.  This can be filtered by the state that the line was in.",
+        "EventCode": "0x37",
+        "EventName": "UNC_CHA_LLC_VICTIMS.LOCAL_S",
+        "PerPkg": "1",
+        "PublicDescription": "Lines Victimized : Local - Lines in S State",
+        "UMask": "0x2004",
+        "Unit": "CHA"
+    },
+    {
+        "BriefDescription": "Counts the number of lines that were victimized on a fill.  This can be filtered by the state that the line was in.",
+        "EventCode": "0x37",
+        "EventName": "UNC_CHA_LLC_VICTIMS.REMOTE_ALL",
+        "PerPkg": "1",
+        "PublicDescription": "Lines Victimized : Remote - All Lines",
+        "UMask": "0x800f",
+        "Unit": "CHA"
+    },
+    {
+        "BriefDescription": "Lines Victimized : Counts the number of lines that were victimized on a fill.  This can be filtered by the state that the line was in.",
+        "EventCode": "0x37",
+        "EventName": "UNC_CHA_LLC_VICTIMS.REMOTE_E",
+        "PerPkg": "1",
+        "PublicDescription": "Lines Victimized : Remote - Lines in E State",
+        "UMask": "0x8002",
+        "Unit": "CHA"
+    },
+    {
+        "BriefDescription": "Lines Victimized : Counts the number of lines that were victimized on a fill.  This can be filtered by the state that the line was in.",
+        "EventCode": "0x37",
+        "EventName": "UNC_CHA_LLC_VICTIMS.REMOTE_M",
+        "PerPkg": "1",
+        "PublicDescription": "Lines Victimized : Remote - Lines in M State",
+        "UMask": "0x8001",
+        "Unit": "CHA"
+    },
+    {
+        "BriefDescription": "Lines Victimized : Counts the number of lines that were victimized on a fill.  This can be filtered by the state that the line was in.",
+        "EventCode": "0x37",
+        "EventName": "UNC_CHA_LLC_VICTIMS.REMOTE_S",
+        "PerPkg": "1",
+        "PublicDescription": "Lines Victimized : Remote - Lines in S State",
+        "UMask": "0x8004",
+        "Unit": "CHA"
+    },
+    {
+        "BriefDescription": "Counts the number of lines that were victimized on a fill.  This can be filtered by the state that the line was in.",
+        "EventCode": "0x37",
+        "EventName": "UNC_CHA_LLC_VICTIMS.TOTAL_E",
+        "PerPkg": "1",
+        "PublicDescription": "Lines Victimized : Lines in E state",
+        "UMask": "0x2",
+        "Unit": "CHA"
+    },
+    {
+        "BriefDescription": "Counts the number of lines that were victimized on a fill.  This can be filtered by the state that the line was in.",
+        "EventCode": "0x37",
+        "EventName": "UNC_CHA_LLC_VICTIMS.TOTAL_M",
+        "PerPkg": "1",
+        "PublicDescription": "Lines Victimized : Lines in M state",
+        "UMask": "0x1",
+        "Unit": "CHA"
+    },
+    {
+        "BriefDescription": "Counts the number of lines that were victimized on a fill.  This can be filtered by the state that the line was in.",
+        "EventCode": "0x37",
+        "EventName": "UNC_CHA_LLC_VICTIMS.TOTAL_S",
+        "PerPkg": "1",
+        "PublicDescription": "Lines Victimized : Lines in S State",
+        "UMask": "0x4",
+        "Unit": "CHA"
+    },
+    {
+        "BriefDescription": "Counts when a RFO (the Read for Ownership issued before a  write) request hit a cacheline in the S (Shared) state.",
+        "EventCode": "0x39",
+        "EventName": "UNC_CHA_MISC.RFO_HIT_S",
+        "PerPkg": "1",
+        "PublicDescription": "Cbo Misc : RFO HitS",
+        "UMask": "0x8",
+        "Unit": "CHA"
+    },
+    {
+        "BriefDescription": "OSB Snoop Broadcast : Local InvItoE : Count of OSB snoop broadcasts. Counts by 1 per request causing OSB snoops to be broadcast. Does not count all the snoops generated by OSB.",
+        "EventCode": "0x55",
+        "EventName": "UNC_CHA_OSB.LOCAL_INVITOE",
+        "PerPkg": "1",
+        "UMask": "0x1",
+        "Unit": "CHA"
+    },
+    {
+        "BriefDescription": "OSB Snoop Broadcast : Local Rd : Count of OSB snoop broadcasts. Counts by 1 per request causing OSB snoops to be broadcast. Does not count all the snoops generated by OSB.",
+        "EventCode": "0x55",
+        "EventName": "UNC_CHA_OSB.LOCAL_READ",
+        "PerPkg": "1",
+        "UMask": "0x2",
+        "Unit": "CHA"
+    },
+    {
+        "BriefDescription": "OSB Snoop Broadcast : Off : Count of OSB snoop broadcasts. Counts by 1 per request causing OSB snoops to be broadcast. Does not count all the snoops generated by OSB.",
+        "EventCode": "0x55",
+        "EventName": "UNC_CHA_OSB.OFF_PWRHEURISTIC",
+        "PerPkg": "1",
+        "UMask": "0x20",
+        "Unit": "CHA"
+    },
+    {
+        "BriefDescription": "OSB Snoop Broadcast : Remote Rd : Count of OSB snoop broadcasts. Counts by 1 per request causing OSB snoops to be broadcast. Does not count all the snoops generated by OSB.",
+        "EventCode": "0x55",
+        "EventName": "UNC_CHA_OSB.REMOTE_READ",
+        "PerPkg": "1",
+        "UMask": "0x4",
+        "Unit": "CHA"
+    },
+    {
+        "BriefDescription": "OSB Snoop Broadcast : RFO HitS Snoop Broadcast : Count of OSB snoop broadcasts. Counts by 1 per request causing OSB snoops to be broadcast. Does not count all the snoops generated by OSB.",
+        "EventCode": "0x55",
+        "EventName": "UNC_CHA_OSB.RFO_HITS_SNP_BCAST",
+        "PerPkg": "1",
+        "UMask": "0x10",
+        "Unit": "CHA"
+    },
+    {
+        "BriefDescription": "UNC_CHA_REMOTE_SF.ALLOC_EXCLUSIVE",
+        "EventCode": "0x69",
+        "EventName": "UNC_CHA_REMOTE_SF.ALLOC_EXCLUSIVE",
+        "PerPkg": "1",
+        "UMask": "0x10",
+        "Unit": "CHA"
+    },
+    {
+        "BriefDescription": "UNC_CHA_REMOTE_SF.ALLOC_SHARED",
+        "EventCode": "0x69",
+        "EventName": "UNC_CHA_REMOTE_SF.ALLOC_SHARED",
+        "PerPkg": "1",
+        "UMask": "0x8",
+        "Unit": "CHA"
+    },
+    {
+        "BriefDescription": "UNC_CHA_REMOTE_SF.DEALLOC_EVCTCLN",
+        "EventCode": "0x69",
+        "EventName": "UNC_CHA_REMOTE_SF.DEALLOC_EVCTCLN",
+        "PerPkg": "1",
+        "UMask": "0x40",
+        "Unit": "CHA"
+    },
+    {
+        "BriefDescription": "UNC_CHA_REMOTE_SF.DIRBACKED_ONLY",
+        "EventCode": "0x69",
+        "EventName": "UNC_CHA_REMOTE_SF.DIRBACKED_ONLY",
+        "PerPkg": "1",
+        "Unit": "CHA"
+    },
+    {
+        "BriefDescription": "UNC_CHA_REMOTE_SF.HIT_EXCLUSIVE",
+        "EventCode": "0x69",
+        "EventName": "UNC_CHA_REMOTE_SF.HIT_EXCLUSIVE",
+        "PerPkg": "1",
+        "UMask": "0x2",
+        "Unit": "CHA"
+    },
+    {
+        "BriefDescription": "UNC_CHA_REMOTE_SF.HIT_SHARED",
+        "EventCode": "0x69",
+        "EventName": "UNC_CHA_REMOTE_SF.HIT_SHARED",
+        "PerPkg": "1",
+        "UMask": "0x1",
+        "Unit": "CHA"
+    },
+    {
+        "BriefDescription": "UNC_CHA_REMOTE_SF.INCLUSIVE_ONLY",
+        "EventCode": "0x69",
+        "EventName": "UNC_CHA_REMOTE_SF.INCLUSIVE_ONLY",
+        "PerPkg": "1",
+        "Unit": "CHA"
+    },
+    {
+        "BriefDescription": "UNC_CHA_REMOTE_SF.MISS",
+        "EventCode": "0x69",
+        "EventName": "UNC_CHA_REMOTE_SF.MISS",
+        "PerPkg": "1",
+        "UMask": "0x4",
+        "Unit": "CHA"
+    },
+    {
+        "BriefDescription": "UNC_CHA_REMOTE_SF.UPDATE_EXCLUSIVE",
+        "EventCode": "0x69",
+        "EventName": "UNC_CHA_REMOTE_SF.UPDATE_EXCLUSIVE",
+        "PerPkg": "1",
+        "Unit": "CHA"
+    },
+    {
+        "BriefDescription": "UNC_CHA_REMOTE_SF.UPDATE_SHARED",
+        "EventCode": "0x69",
+        "EventName": "UNC_CHA_REMOTE_SF.UPDATE_SHARED",
+        "PerPkg": "1",
+        "UMask": "0x80",
+        "Unit": "CHA"
+    },
+    {
+        "BriefDescription": "UNC_CHA_REMOTE_SF.VICTIM_EXCLUSIVE",
+        "EventCode": "0x69",
+        "EventName": "UNC_CHA_REMOTE_SF.VICTIM_EXCLUSIVE",
+        "PerPkg": "1",
+        "Unit": "CHA"
+    },
+    {
+        "BriefDescription": "UNC_CHA_REMOTE_SF.VICTIM_SHARED",
+        "EventCode": "0x69",
+        "EventName": "UNC_CHA_REMOTE_SF.VICTIM_SHARED",
+        "PerPkg": "1",
+        "Unit": "CHA"
+    },
+    {
+        "BriefDescription": "Counts the total number of requests coming from a unit on this socket for exclusive ownership of a cache line without receiving data (INVITOE) to the CHA.",
+        "EventCode": "0x50",
+        "EventName": "UNC_CHA_REQUESTS.INVITOE",
+        "PerPkg": "1",
+        "PublicDescription": "HA Read and Write Requests : InvalItoE",
+        "UMask": "0x30",
+        "Unit": "CHA"
+    },
+    {
+        "BriefDescription": "Counts the total number of requests coming from a unit on this socket for exclusive ownership of a cache line without receiving data (INVITOE) to the CHA.",
+        "EventCode": "0x50",
+        "EventName": "UNC_CHA_REQUESTS.INVITOE_LOCAL",
+        "PerPkg": "1",
+        "UMask": "0x10",
+        "Unit": "CHA"
+    },
+    {
+        "BriefDescription": "Counts the total number of requests coming from a remote socket for exclusive ownership of a cache line without receiving data (INVITOE) to the CHA.",
+        "EventCode": "0x50",
+        "EventName": "UNC_CHA_REQUESTS.INVITOE_REMOTE",
+        "PerPkg": "1",
+        "UMask": "0x20",
+        "Unit": "CHA"
+    },
+    {
+        "BriefDescription": "Counts read requests made into this CHA. Reads include all read opcodes (including RFO: the Read for Ownership issued before a  write) .",
+        "EventCode": "0x50",
+        "EventName": "UNC_CHA_REQUESTS.READS",
+        "PerPkg": "1",
+        "PublicDescription": "HA Read and Write Requests : Reads",
+        "UMask": "0x3",
+        "Unit": "CHA"
+    },
+    {
+        "BriefDescription": "Counts read requests coming from a unit on this socket made into this CHA. Reads include all read opcodes (including RFO: the Read for Ownership issued before a  write).",
+        "EventCode": "0x50",
+        "EventName": "UNC_CHA_REQUESTS.READS_LOCAL",
+        "PerPkg": "1",
+        "UMask": "0x1",
+        "Unit": "CHA"
+    },
+    {
+        "BriefDescription": "Counts read requests coming from a remote socket made into the CHA. Reads include all read opcodes (including RFO: the Read for Ownership issued before a  write).",
+        "EventCode": "0x50",
+        "EventName": "UNC_CHA_REQUESTS.READS_REMOTE",
+        "PerPkg": "1",
+        "UMask": "0x2",
+        "Unit": "CHA"
+    },
+    {
+        "BriefDescription": "Counts write requests made into the CHA, including streaming, evictions, HitM (Reads from another core to a Modified cacheline), etc.",
+        "EventCode": "0x50",
+        "EventName": "UNC_CHA_REQUESTS.WRITES",
+        "PerPkg": "1",
+        "PublicDescription": "HA Read and Write Requests : Writes",
+        "UMask": "0xc",
+        "Unit": "CHA"
+    },
+    {
+        "BriefDescription": "Counts  write requests coming from a unit on this socket made into this CHA, including streaming, evictions, HitM (Reads from another core to a Modified cacheline), etc.",
+        "EventCode": "0x50",
+        "EventName": "UNC_CHA_REQUESTS.WRITES_LOCAL",
+        "PerPkg": "1",
+        "UMask": "0x4",
+        "Unit": "CHA"
+    },
+    {
+        "BriefDescription": "Counts the total number of read requests made into the Home Agent. Reads include all read opcodes (including RFO).  Writes include all writes (streaming, evictions, HitM, etc).",
+        "EventCode": "0x50",
+        "EventName": "UNC_CHA_REQUESTS.WRITES_REMOTE",
+        "PerPkg": "1",
+        "UMask": "0x8",
+        "Unit": "CHA"
+    },
+    {
+        "BriefDescription": "All TOR Inserts",
+        "EventCode": "0x35",
+        "EventName": "UNC_CHA_TOR_INSERTS.ALL",
+        "PerPkg": "1",
+        "PublicDescription": "TOR Inserts : All",
+        "UMask": "0xc001ffff",
+        "Unit": "CHA"
+    },
+    {
+        "BriefDescription": "CLFlush transactions from a CXL device which hit in the L3.",
+        "EventCode": "0x35",
+        "EventName": "UNC_CHA_TOR_INSERTS.CXL_HIT_CLFLUSH",
+        "PerPkg": "1",
+        "UMask": "0x78c8c7fd20",
+        "Unit": "CHA"
+    },
+    {
+        "BriefDescription": "FsRdCur transactions from a CXL device which hit in the L3.",
+        "EventCode": "0x35",
+        "EventName": "UNC_CHA_TOR_INSERTS.CXL_HIT_FSRDCUR",
+        "PerPkg": "1",
+        "UMask": "0x78c8effd20",
+        "Unit": "CHA"
+    },
+    {
+        "BriefDescription": "FsRdCurPtl transactions from a CXL device which hit in the L3.",
+        "EventCode": "0x35",
+        "EventName": "UNC_CHA_TOR_INSERTS.CXL_HIT_FSRDCURPTL",
+        "PerPkg": "1",
+        "UMask": "0x78c9effd20",
+        "Unit": "CHA"
+    },
+    {
+        "BriefDescription": "ItoM transactions from a CXL device which hit in the L3.",
+        "EventCode": "0x35",
+        "EventName": "UNC_CHA_TOR_INSERTS.CXL_HIT_ITOM",
+        "PerPkg": "1",
+        "UMask": "0x78cc47fd20",
+        "Unit": "CHA"
+    },
+    {
+        "BriefDescription": "ItoMWr transactions from a CXL device which hit in the L3.",
+        "EventCode": "0x35",
+        "EventName": "UNC_CHA_TOR_INSERTS.CXL_HIT_ITOMWR",
+        "PerPkg": "1",
+        "UMask": "0x78cc4ffd20",
+        "Unit": "CHA"
+    },
+    {
+        "BriefDescription": "MemPushWr transactions from a CXL device which hit in the L3.",
+        "EventCode": "0x35",
+        "EventName": "UNC_CHA_TOR_INSERTS.CXL_HIT_MEMPUSHWR",
+        "PerPkg": "1",
+        "UMask": "0x78cc6ffd20",
+        "Unit": "CHA"
+    },
+    {
+        "BriefDescription": "WCiL transactions from a CXL device which hit in the L3.",
+        "EventCode": "0x35",
+        "EventName": "UNC_CHA_TOR_INSERTS.CXL_HIT_WCIL",
+        "PerPkg": "1",
+        "UMask": "0x78c86ffd20",
+        "Unit": "CHA"
+    },
+    {
+        "BriefDescription": "WcilF transactions from a CXL device which hit in the L3.",
+        "EventCode": "0x35",
+        "EventName": "UNC_CHA_TOR_INSERTS.CXL_HIT_WCILF",
+        "PerPkg": "1",
+        "UMask": "0x78c867fd20",
+        "Unit": "CHA"
+    },
+    {
+        "BriefDescription": "WiL transactions from a CXL device which hit in the L3.",
+        "EventCode": "0x35",
+        "EventName": "UNC_CHA_TOR_INSERTS.CXL_HIT_WIL",
+        "PerPkg": "1",
+        "UMask": "0x78c87ffd20",
+        "Unit": "CHA"
+    },
+    {
+        "BriefDescription": "CLFlush transactions from a CXL device which miss the L3.",
+        "EventCode": "0x35",
+        "EventName": "UNC_CHA_TOR_INSERTS.CXL_MISS_CLFLUSH",
+        "PerPkg": "1",
+        "UMask": "0x78c8c7fe20",
+        "Unit": "CHA"
+    },
+    {
+        "BriefDescription": "FsRdCur transactions from a CXL device which miss the L3.",
+        "EventCode": "0x35",
+        "EventName": "UNC_CHA_TOR_INSERTS.CXL_MISS_FSRDCUR",
+        "PerPkg": "1",
+        "UMask": "0x78c8effe20",
+        "Unit": "CHA"
+    },
+    {
+        "BriefDescription": "FsRdCurPtl transactions from a CXL device which miss the L3.",
+        "EventCode": "0x35",
+        "EventName": "UNC_CHA_TOR_INSERTS.CXL_MISS_FSRDCURPTL",
+        "PerPkg": "1",
+        "UMask": "0x78c9effe20",
+        "Unit": "CHA"
+    },
+    {
+        "BriefDescription": "ItoM transactions from a CXL device which miss the L3.",
+        "EventCode": "0x35",
+        "EventName": "UNC_CHA_TOR_INSERTS.CXL_MISS_ITOM",
+        "PerPkg": "1",
+        "UMask": "0x78cc47fe20",
+        "Unit": "CHA"
+    },
+    {
+        "BriefDescription": "ItoMWr transactions from a CXL device which miss the L3.",
+        "EventCode": "0x35",
+        "EventName": "UNC_CHA_TOR_INSERTS.CXL_MISS_ITOMWR",
+        "PerPkg": "1",
+        "UMask": "0x78cc4ffe20",
+        "Unit": "CHA"
+    },
+    {
+        "BriefDescription": "MemPushWr transactions from a CXL device which miss the L3.",
+        "EventCode": "0x35",
+        "EventName": "UNC_CHA_TOR_INSERTS.CXL_MISS_MEMPUSHWR",
+        "PerPkg": "1",
+        "UMask": "0x78cc6ffe20",
+        "Unit": "CHA"
+    },
+    {
+        "BriefDescription": "WCiL transactions from a CXL device which miss the L3.",
+        "EventCode": "0x35",
+        "EventName": "UNC_CHA_TOR_INSERTS.CXL_MISS_WCIL",
+        "PerPkg": "1",
+        "UMask": "0x78c86ffe20",
+        "Unit": "CHA"
+    },
+    {
+        "BriefDescription": "WcilF transactions from a CXL device which miss the L3.",
+        "EventCode": "0x35",
+        "EventName": "UNC_CHA_TOR_INSERTS.CXL_MISS_WCILF",
+        "PerPkg": "1",
+        "UMask": "0x78c867fe20",
+        "Unit": "CHA"
+    },
+    {
+        "BriefDescription": "WiL transactions from a CXL device which miss the L3.",
+        "EventCode": "0x35",
+        "EventName": "UNC_CHA_TOR_INSERTS.CXL_MISS_WIL",
+        "PerPkg": "1",
+        "UMask": "0x78c87ffe20",
+        "Unit": "CHA"
+    },
+    {
+        "BriefDescription": "All locally initiated requests from IA Cores",
+        "EventCode": "0x35",
+        "EventName": "UNC_CHA_TOR_INSERTS.IA",
+        "PerPkg": "1",
+        "PublicDescription": "TOR Inserts : All requests from iA Cores",
+        "UMask": "0xc001ff01",
+        "Unit": "CHA"
+    },
+    {
+        "BriefDescription": "CLFlush events that are initiated from the Core",
+        "EventCode": "0x35",
+        "EventName": "UNC_CHA_TOR_INSERTS.IA_CLFLUSH",
+        "PerPkg": "1",
+        "PublicDescription": "TOR Inserts : CLFlushes issued by iA Cores",
+        "UMask": "0xc8c7ff01",
+        "Unit": "CHA"
+    },
+    {
+        "BriefDescription": "CLFlushOpt events that are initiated from the Core",
+        "EventCode": "0x35",
+        "EventName": "UNC_CHA_TOR_INSERTS.IA_CLFLUSHOPT",
+        "PerPkg": "1",
+        "PublicDescription": "TOR Inserts : CLFlushOpts issued by iA Cores",
+        "UMask": "0xc8d7ff01",
+        "Unit": "CHA"
+    },
+    {
+        "BriefDescription": "Code read from local IA that miss the cache",
+        "EventCode": "0x35",
+        "EventName": "UNC_CHA_TOR_INSERTS.IA_CRD",
+        "PerPkg": "1",
+        "PublicDescription": "TOR Inserts : CRDs issued by iA Cores",
+        "UMask": "0xc80fff01",
+        "Unit": "CHA"
+    },
+    {
+        "BriefDescription": "Code read prefetch from local IA that miss the cache",
+        "EventCode": "0x35",
+        "EventName": "UNC_CHA_TOR_INSERTS.IA_CRD_PREF",
+        "PerPkg": "1",
+        "PublicDescription": "TOR Inserts; Code read prefetch from local IA that misses in the snoop filter",
+        "UMask": "0xc88fff01",
+        "Unit": "CHA"
+    },
+    {
+        "BriefDescription": "Data read opt from local IA that miss the cache",
+        "EventCode": "0x35",
+        "EventName": "UNC_CHA_TOR_INSERTS.IA_DRD_OPT",
+        "PerPkg": "1",
+        "PublicDescription": "TOR Inserts : DRd_Opts issued by iA Cores",
+        "UMask": "0xc827ff01",
+        "Unit": "CHA"
+    },
+    {
+        "BriefDescription": "Data read opt prefetch from local IA that miss the cache",
+        "EventCode": "0x35",
+        "EventName": "UNC_CHA_TOR_INSERTS.IA_DRD_OPT_PREF",
+        "PerPkg": "1",
+        "PublicDescription": "TOR Inserts : DRd_Opt_Prefs issued by iA Cores",
+        "UMask": "0xc8a7ff01",
+        "Unit": "CHA"
+    },
+    {
+        "BriefDescription": "All locally initiated requests from IA Cores which hit the cache",
+        "EventCode": "0x35",
+        "EventName": "UNC_CHA_TOR_INSERTS.IA_HIT",
+        "PerPkg": "1",
+        "PublicDescription": "TOR Inserts : All requests from iA Cores that Hit the LLC",
+        "UMask": "0xc001fd01",
+        "Unit": "CHA"
+    },
+    {
+        "BriefDescription": "Code read from local IA that hit the cache",
+        "EventCode": "0x35",
+        "EventName": "UNC_CHA_TOR_INSERTS.IA_HIT_CRD",
+        "PerPkg": "1",
+        "PublicDescription": "TOR Inserts : CRds issued by iA Cores that Hit the LLC",
+        "UMask": "0xc80ffd01",
+        "Unit": "CHA"
+    },
+    {
+        "BriefDescription": "Code read prefetch from local IA that hit the cache",
+        "EventCode": "0x35",
+        "EventName": "UNC_CHA_TOR_INSERTS.IA_HIT_CRD_PREF",
+        "PerPkg": "1",
+        "PublicDescription": "TOR Inserts : CRd_Prefs issued by iA Cores that hit the LLC",
+        "UMask": "0xc88ffd01",
+        "Unit": "CHA"
+    },
+    {
+        "BriefDescription": "All requests issued from IA cores to CXL accelerator memory regions that hit the LLC.",
+        "EventCode": "0x35",
+        "EventName": "UNC_CHA_TOR_INSERTS.IA_HIT_CXL_ACC",
+        "PerPkg": "1",
+        "UMask": "0x10c0018101",
+        "Unit": "CHA"
+    },
+    {
+        "BriefDescription": "Data read opt from local IA that hit the cache",
+        "EventCode": "0x35",
+        "EventName": "UNC_CHA_TOR_INSERTS.IA_HIT_DRD_OPT",
+        "PerPkg": "1",
+        "PublicDescription": "TOR Inserts : DRd_Opts issued by iA Cores that hit the LLC",
+        "UMask": "0xc827fd01",
+        "Unit": "CHA"
+    },
+    {
+        "BriefDescription": "Data read opt prefetch from local IA that hit the cache",
+        "EventCode": "0x35",
+        "EventName": "UNC_CHA_TOR_INSERTS.IA_HIT_DRD_OPT_PREF",
+        "PerPkg": "1",
+        "PublicDescription": "TOR Inserts : DRd_Opt_Prefs issued by iA Cores that hit the LLC",
+        "UMask": "0xc8a7fd01",
+        "Unit": "CHA"
+    },
+    {
+        "BriefDescription": "ItoM requests from local IA cores that hit the cache",
+        "EventCode": "0x35",
+        "EventName": "UNC_CHA_TOR_INSERTS.IA_HIT_ITOM",
+        "PerPkg": "1",
+        "PublicDescription": "TOR Inserts : ItoMs issued by iA Cores that Hit LLC",
+        "UMask": "0xcc47fd01",
+        "Unit": "CHA"
+    },
+    {
+        "BriefDescription": "Last level cache prefetch code read from local IA that hit the cache",
+        "EventCode": "0x35",
+        "EventName": "UNC_CHA_TOR_INSERTS.IA_HIT_LLCPREFCODE",
+        "PerPkg": "1",
+        "PublicDescription": "TOR Inserts : LLCPrefCode issued by iA Cores that hit the LLC",
+        "UMask": "0xcccffd01",
+        "Unit": "CHA"
+    },
+    {
+        "BriefDescription": "Last level cache prefetch data read from local IA that hit the cache",
+        "EventCode": "0x35",
+        "EventName": "UNC_CHA_TOR_INSERTS.IA_HIT_LLCPREFDATA",
+        "PerPkg": "1",
+        "PublicDescription": "TOR Inserts : LLCPrefData issued by iA Cores that hit the LLC",
+        "UMask": "0xccd7fd01",
+        "Unit": "CHA"
+    },
+    {
+        "BriefDescription": "Last level cache prefetch read for ownership from local IA that hit the cache",
+        "EventCode": "0x35",
+        "EventName": "UNC_CHA_TOR_INSERTS.IA_HIT_LLCPREFRFO",
+        "PerPkg": "1",
+        "PublicDescription": "TOR Inserts : LLCPrefRFO issued by iA Cores that hit the LLC",
+        "UMask": "0xccc7fd01",
+        "Unit": "CHA"
+    },
+    {
+        "BriefDescription": "Read for ownership from local IA that hit the cache",
+        "EventCode": "0x35",
+        "EventName": "UNC_CHA_TOR_INSERTS.IA_HIT_RFO",
+        "PerPkg": "1",
+        "PublicDescription": "TOR Inserts : RFOs issued by iA Cores that Hit the LLC",
+        "UMask": "0xc807fd01",
+        "Unit": "CHA"
+    },
+    {
+        "BriefDescription": "Read for ownership prefetch from local IA that hit the cache",
+        "EventCode": "0x35",
+        "EventName": "UNC_CHA_TOR_INSERTS.IA_HIT_RFO_PREF",
+        "PerPkg": "1",
+        "PublicDescription": "TOR Inserts : RFO_Prefs issued by iA Cores that Hit the LLC",
+        "UMask": "0xc887fd01",
+        "Unit": "CHA"
+    },
+    {
+        "BriefDescription": "ItoM events that are initiated from the Core",
+        "EventCode": "0x35",
+        "EventName": "UNC_CHA_TOR_INSERTS.IA_ITOM",
+        "PerPkg": "1",
+        "PublicDescription": "TOR Inserts : ItoMs issued by iA Cores",
+        "UMask": "0xcc47ff01",
+        "Unit": "CHA"
+    },
+    {
+        "BriefDescription": "ItoMCacheNear requests from local IA cores",
+        "EventCode": "0x35",
+        "EventName": "UNC_CHA_TOR_INSERTS.IA_ITOMCACHENEAR",
+        "PerPkg": "1",
+        "PublicDescription": "TOR Inserts : ItoMCacheNears issued by iA Cores",
+        "UMask": "0xcd47ff01",
+        "Unit": "CHA"
+    },
+    {
+        "BriefDescription": "Last level cache prefetch code read from local IA.",
+        "EventCode": "0x35",
+        "EventName": "UNC_CHA_TOR_INSERTS.IA_LLCPREFCODE",
+        "PerPkg": "1",
+        "PublicDescription": "TOR Inserts : LLCPrefCode issued by iA Cores",
+        "UMask": "0xcccfff01",
+        "Unit": "CHA"
+    },
+    {
+        "BriefDescription": "Last level cache prefetch data read from local IA.",
+        "EventCode": "0x35",
+        "EventName": "UNC_CHA_TOR_INSERTS.IA_LLCPREFDATA",
+        "PerPkg": "1",
+        "PublicDescription": "TOR Inserts : LLCPrefData issued by iA Cores",
+        "UMask": "0xccd7ff01",
+        "Unit": "CHA"
+    },
+    {
+        "BriefDescription": "Last level cache prefetch read for ownership from local IA that miss the cache",
+        "EventCode": "0x35",
+        "EventName": "UNC_CHA_TOR_INSERTS.IA_LLCPREFRFO",
+        "PerPkg": "1",
+        "PublicDescription": "TOR Inserts : LLCPrefRFO issued by iA Cores",
+        "UMask": "0xccc7ff01",
+        "Unit": "CHA"
+    },
+    {
+        "BriefDescription": "All locally initiated requests from IA Cores which miss the cache",
+        "EventCode": "0x35",
+        "EventName": "UNC_CHA_TOR_INSERTS.IA_MISS",
+        "PerPkg": "1",
+        "PublicDescription": "TOR Inserts : All requests from iA Cores that Missed the LLC",
+        "UMask": "0xc001fe01",
+        "Unit": "CHA"
+    },
+    {
+        "BriefDescription": "Code read from local IA that miss the cache",
+        "EventCode": "0x35",
+        "EventName": "UNC_CHA_TOR_INSERTS.IA_MISS_CRD",
+        "PerPkg": "1",
+        "PublicDescription": "TOR Inserts : CRds issued by iA Cores that Missed the LLC",
+        "UMask": "0xc80ffe01",
+        "Unit": "CHA"
+    },
+    {
+        "BriefDescription": "CRDs from local IA cores to locally homed memory",
+        "EventCode": "0x35",
+        "EventName": "UNC_CHA_TOR_INSERTS.IA_MISS_CRD_LOCAL",
+        "PerPkg": "1",
+        "PublicDescription": "TOR Inserts : CRd issued by iA Cores that Missed the LLC - HOMed locally",
+        "UMask": "0xc80efe01",
+        "Unit": "CHA"
+    },
+    {
+        "BriefDescription": "Code read prefetch from local IA that miss the cache",
+        "EventCode": "0x35",
+        "EventName": "UNC_CHA_TOR_INSERTS.IA_MISS_CRD_PREF",
+        "PerPkg": "1",
+        "PublicDescription": "TOR Inserts : CRd_Prefs issued by iA Cores that Missed the LLC",
+        "UMask": "0xc88ffe01",
+        "Unit": "CHA"
+    },
+    {
+        "BriefDescription": "CRD Prefetches from local IA cores to locally homed memory",
+        "EventCode": "0x35",
+        "EventName": "UNC_CHA_TOR_INSERTS.IA_MISS_CRD_PREF_LOCAL",
+        "PerPkg": "1",
+        "PublicDescription": "TOR Inserts : CRd_Prefs issued by iA Cores that Missed the LLC - HOMed locally",
+        "UMask": "0xc88efe01",
+        "Unit": "CHA"
+    },
+    {
+        "BriefDescription": "CRD Prefetches from local IA cores to remotely homed memory",
+        "EventCode": "0x35",
+        "EventName": "UNC_CHA_TOR_INSERTS.IA_MISS_CRD_PREF_REMOTE",
+        "PerPkg": "1",
+        "PublicDescription": "TOR Inserts : CRd_Prefs issued by iA Cores that Missed the LLC - HOMed remotely",
+        "UMask": "0xc88f7e01",
+        "Unit": "CHA"
+    },
+    {
+        "BriefDescription": "CRDs from local IA cores to remotely homed memory",
+        "EventCode": "0x35",
+        "EventName": "UNC_CHA_TOR_INSERTS.IA_MISS_CRD_REMOTE",
+        "PerPkg": "1",
+        "PublicDescription": "TOR Inserts : CRd issued by iA Cores that Missed the LLC - HOMed remotely",
+        "UMask": "0xc80f7e01",
+        "Unit": "CHA"
+    },
+    {
+        "BriefDescription": "All requests issued from IA cores to CXL accelerator memory regions that miss the LLC.",
+        "EventCode": "0x35",
+        "EventName": "UNC_CHA_TOR_INSERTS.IA_MISS_CXL_ACC",
+        "PerPkg": "1",
+        "UMask": "0x10c0018201",
+        "Unit": "CHA"
+    },
+    {
+        "BriefDescription": "DRds and equivalent opcodes issued from an IA core which miss the L3 and target memory in a CXL type 2 memory expander card.",
+        "EventCode": "0x35",
+        "EventName": "UNC_CHA_TOR_INSERTS.IA_MISS_DRD_CXL_ACC",
+        "PerPkg": "1",
+        "PublicDescription": "DRds issued from an IA core which miss the L3 and target memory in a CXL type 2 memory expander card.",
+        "UMask": "0x10c8178201",
+        "Unit": "CHA"
+    },
+    {
+        "BriefDescription": "Data read opt from local IA that miss the cache",
+        "EventCode": "0x35",
+        "EventName": "UNC_CHA_TOR_INSERTS.IA_MISS_DRD_OPT",
+        "PerPkg": "1",
+        "PublicDescription": "TOR Inserts : DRd_Opt issued by iA Cores that missed the LLC",
+        "UMask": "0xc827fe01",
+        "Unit": "CHA"
+    },
+    {
+        "BriefDescription": "Inserts into the TOR from local IA cores which miss the LLC and snoop filter with the opcode DRd_Opt, and which target local memory",
+        "EventCode": "0x35",
+        "EventName": "UNC_CHA_TOR_INSERTS.IA_MISS_DRD_OPT_LOCAL",
+        "PerPkg": "1",
+        "PublicDescription": "TOR Inserts : DRd_Opt issued by iA Cores that Missed the LLC - HOMed locally",
+        "UMask": "0xc826fe01",
+        "Unit": "CHA"
+    },
+    {
+        "BriefDescription": "Data read opt prefetch from local IA that miss the cache",
+        "EventCode": "0x35",
+        "EventName": "UNC_CHA_TOR_INSERTS.IA_MISS_DRD_OPT_PREF",
+        "PerPkg": "1",
+        "PublicDescription": "TOR Inserts : DRd_Opt_Prefs issued by iA Cores that missed the LLC",
+        "UMask": "0xc8a7fe01",
+        "Unit": "CHA"
+    },
+    {
+        "BriefDescription": "Inserts into the TOR from local IA cores which miss the LLC and snoop filter with the opcode DRD_PREF_OPT, and target local memory",
+        "EventCode": "0x35",
+        "EventName": "UNC_CHA_TOR_INSERTS.IA_MISS_DRD_OPT_PREF_LOCAL",
+        "PerPkg": "1",
+        "PublicDescription": "TOR Inserts : DRd_Opt_Prefs issued by iA Cores that missed the LLC",
+        "UMask": "0xc8a6fe01",
+        "Unit": "CHA"
+    },
+    {
+        "BriefDescription": "Inserts into the TOR from local IA cores which miss the LLC and snoop filter with the opcode DRD_PREF_OPT, and target remote memory",
+        "EventCode": "0x35",
+        "EventName": "UNC_CHA_TOR_INSERTS.IA_MISS_DRD_OPT_PREF_REMOTE",
+        "PerPkg": "1",
+        "PublicDescription": "TOR Inserts : DRd_Opt_Prefs issued by iA Cores that missed the LLC",
+        "UMask": "0xc8a77e01",
+        "Unit": "CHA"
+    },
+    {
+        "BriefDescription": "Inserts into the TOR from local IA cores which miss the LLC and snoop filter with the opcode DRd_Opt, and target remote memory",
+        "EventCode": "0x35",
+        "EventName": "UNC_CHA_TOR_INSERTS.IA_MISS_DRD_OPT_REMOTE",
+        "PerPkg": "1",
+        "PublicDescription": "TOR Inserts : DRd_Opt issued by iA Cores that missed the LLC",
+        "UMask": "0xc8277e01",
+        "Unit": "CHA"
+    },
+    {
+        "BriefDescription": "L2 data prefetches issued from an IA core which miss the L3 and target memory in a CXL type 2 accelerator.",
+        "EventCode": "0x35",
+        "EventName": "UNC_CHA_TOR_INSERTS.IA_MISS_DRD_PREF_CXL_ACC",
+        "PerPkg": "1",
+        "UMask": "0x10c8978201",
+        "Unit": "CHA"
+    },
+    {
+        "BriefDescription": "ItoM requests from local IA cores that miss the cache",
+        "EventCode": "0x35",
+        "EventName": "UNC_CHA_TOR_INSERTS.IA_MISS_ITOM",
+        "PerPkg": "1",
+        "PublicDescription": "TOR Inserts : ItoMs issued by iA Cores that Missed LLC",
+        "UMask": "0xcc47fe01",
+        "Unit": "CHA"
+    },
+    {
+        "BriefDescription": "Last level cache prefetch code read from local IA that miss the cache",
+        "EventCode": "0x35",
+        "EventName": "UNC_CHA_TOR_INSERTS.IA_MISS_LLCPREFCODE",
+        "PerPkg": "1",
+        "PublicDescription": "TOR Inserts : LLCPrefCode issued by iA Cores that missed the LLC",
+        "UMask": "0xcccffe01",
+        "Unit": "CHA"
+    },
+    {
+        "BriefDescription": "Last level cache prefetch data read from local IA that miss the cache",
+        "EventCode": "0x35",
+        "EventName": "UNC_CHA_TOR_INSERTS.IA_MISS_LLCPREFDATA",
+        "PerPkg": "1",
+        "PublicDescription": "TOR Inserts : LLCPrefData issued by iA Cores that missed the LLC",
+        "UMask": "0xccd7fe01",
+        "Unit": "CHA"
+    },
+    {
+        "BriefDescription": "LLC data prefetches issued from an IA core which miss the L3 and target memory in a CXL type 2 accelerator.",
+        "EventCode": "0x35",
+        "EventName": "UNC_CHA_TOR_INSERTS.IA_MISS_LLCPREFDATA_CXL_ACC",
+        "PerPkg": "1",
+        "UMask": "0x10ccd78201",
+        "Unit": "CHA"
+    },
+    {
+        "BriefDescription": "Last level cache prefetch read for ownership from local IA that miss the cache",
+        "EventCode": "0x35",
+        "EventName": "UNC_CHA_TOR_INSERTS.IA_MISS_LLCPREFRFO",
+        "PerPkg": "1",
+        "PublicDescription": "TOR Inserts : LLCPrefRFO issued by iA Cores that missed the LLC",
+        "UMask": "0xccc7fe01",
+        "Unit": "CHA"
+    },
+    {
+        "BriefDescription": "L2 RFO prefetches issued from an IA core which miss the L3 and target memory in a CXL type 2 accelerator.",
+        "EventCode": "0x35",
+        "EventName": "UNC_CHA_TOR_INSERTS.IA_MISS_LLCPREFRFO_CXL_ACC",
+        "PerPkg": "1",
+        "UMask": "0x10c8878201",
+        "Unit": "CHA"
+    },
+    {
+        "BriefDescription": "WCILF requests from local IA cores to locally homed DDR addresses that miss the cache",
+        "EventCode": "0x35",
+        "EventName": "UNC_CHA_TOR_INSERTS.IA_MISS_LOCAL_WCILF_DDR",
+        "PerPkg": "1",
+        "PublicDescription": "TOR Inserts : WCiLFs issued by iA Cores targeting DDR that missed the LLC - HOMed locally",
+        "UMask": "0xc8668601",
+        "Unit": "CHA"
+    },
+    {
+        "BriefDescription": "WCILF requests from local IA cores to locally homed PMM addresses which miss the cache",
+        "EventCode": "0x35",
+        "EventName": "UNC_CHA_TOR_INSERTS.IA_MISS_LOCAL_WCILF_PMM",
+        "PerPkg": "1",
+        "PublicDescription": "TOR Inserts : WCiLFs issued by iA Cores targeting PMM that missed the LLC - HOMed locally",
+        "UMask": "0xc8668a01",
+        "Unit": "CHA"
+    },
+    {
+        "BriefDescription": "WCIL requests from local IA cores to locally homed DDR addresses that miss the cache",
+        "EventCode": "0x35",
+        "EventName": "UNC_CHA_TOR_INSERTS.IA_MISS_LOCAL_WCIL_DDR",
+        "PerPkg": "1",
+        "PublicDescription": "TOR Inserts : WCiLs issued by iA Cores targeting DDR that missed the LLC - HOMed locally",
+        "UMask": "0xc86e8601",
+        "Unit": "CHA"
+    },
+    {
+        "BriefDescription": "WCIL requests from local IA cores to locally homed PMM addresses which miss the cache",
+        "EventCode": "0x35",
+        "EventName": "UNC_CHA_TOR_INSERTS.IA_MISS_LOCAL_WCIL_PMM",
+        "PerPkg": "1",
+        "PublicDescription": "TOR Inserts : WCiLs issued by iA Cores targeting PMM that missed the LLC - HOMed locally",
+        "UMask": "0xc86e8a01",
+        "Unit": "CHA"
+    },
+    {
+        "BriefDescription": "WCILF requests from local IA cores to remotely homed DDR addresses that miss the cache",
+        "EventCode": "0x35",
+        "EventName": "UNC_CHA_TOR_INSERTS.IA_MISS_REMOTE_WCILF_DDR",
+        "PerPkg": "1",
+        "PublicDescription": "TOR Inserts : WCiLFs issued by iA Cores targeting DDR that missed the LLC - HOMed remotely",
+        "UMask": "0xc8670601",
+        "Unit": "CHA"
+    },
+    {
+        "BriefDescription": "WCILF requests from local IA cores to remotely homed PMM addresses which miss the cache",
+        "EventCode": "0x35",
+        "EventName": "UNC_CHA_TOR_INSERTS.IA_MISS_REMOTE_WCILF_PMM",
+        "PerPkg": "1",
+        "PublicDescription": "TOR Inserts : WCiLFs issued by iA Cores targeting PMM that missed the LLC - HOMed remotely",
+        "UMask": "0xc8670a01",
+        "Unit": "CHA"
+    },
+    {
+        "BriefDescription": "WCIL requests from local IA cores to remotely homed DDR addresses that miss the cache",
+        "EventCode": "0x35",
+        "EventName": "UNC_CHA_TOR_INSERTS.IA_MISS_REMOTE_WCIL_DDR",
+        "PerPkg": "1",
+        "PublicDescription": "TOR Inserts : WCiLs issued by iA Cores targeting DDR that missed the LLC - HOMed remotely",
+        "UMask": "0xc86f0601",
+        "Unit": "CHA"
+    },
+    {
+        "BriefDescription": "WCIL requests from local IA cores to remotely homed PMM addresses which miss the cache",
+        "EventCode": "0x35",
+        "EventName": "UNC_CHA_TOR_INSERTS.IA_MISS_REMOTE_WCIL_PMM",
+        "PerPkg": "1",
+        "PublicDescription": "TOR Inserts : WCiLs issued by iA Cores targeting PMM that missed the LLC - HOMed remotely",
+        "UMask": "0xc86f0a01",
+        "Unit": "CHA"
+    },
+    {
+        "BriefDescription": "Read for ownership from local IA that miss the cache",
+        "EventCode": "0x35",
+        "EventName": "UNC_CHA_TOR_INSERTS.IA_MISS_RFO",
+        "PerPkg": "1",
+        "PublicDescription": "TOR Inserts : RFOs issued by iA Cores that Missed the LLC",
+        "UMask": "0xc807fe01",
+        "Unit": "CHA"
+    },
+    {
+        "BriefDescription": "RFOs issued from an IA core which miss the L3 and target memory in a CXL type 2 accelerator.",
+        "EventCode": "0x35",
+        "EventName": "UNC_CHA_TOR_INSERTS.IA_MISS_RFO_CXL_ACC",
+        "PerPkg": "1",
+        "UMask": "0x10c8078201",
+        "Unit": "CHA"
+    },
+    {
+        "BriefDescription": "Read for ownership from local IA that miss the cache",
+        "EventCode": "0x35",
+        "EventName": "UNC_CHA_TOR_INSERTS.IA_MISS_RFO_LOCAL",
+        "PerPkg": "1",
+        "PublicDescription": "TOR Inserts : RFOs issued by iA Cores that Missed the LLC - HOMed locally",
+        "UMask": "0xc806fe01",
+        "Unit": "CHA"
+    },
+    {
+        "BriefDescription": "Read for ownership prefetch from local IA that miss the cache",
+        "EventCode": "0x35",
+        "EventName": "UNC_CHA_TOR_INSERTS.IA_MISS_RFO_PREF",
+        "PerPkg": "1",
+        "PublicDescription": "TOR Inserts : RFO_Prefs issued by iA Cores that Missed the LLC",
+        "UMask": "0xc887fe01",
+        "Unit": "CHA"
+    },
+    {
+        "BriefDescription": "LLC RFO prefetches issued from an IA core which miss the L3 and target memory in a CXL type 2 accelerator.",
+        "EventCode": "0x35",
+        "EventName": "UNC_CHA_TOR_INSERTS.IA_MISS_RFO_PREF_CXL_ACC",
+        "PerPkg": "1",
+        "UMask": "0x10ccc78201",
+        "Unit": "CHA"
+    },
+    {
+        "BriefDescription": "Read for ownership prefetch from local IA that miss the cache",
+        "EventCode": "0x35",
+        "EventName": "UNC_CHA_TOR_INSERTS.IA_MISS_RFO_PREF_LOCAL",
+        "PerPkg": "1",
+        "PublicDescription": "TOR Inserts : RFO_Prefs issued by iA Cores that Missed the LLC - HOMed locally",
+        "UMask": "0xc886fe01",
+        "Unit": "CHA"
+    },
+    {
+        "BriefDescription": "Read for ownership prefetch from local IA that miss the cache",
+        "EventCode": "0x35",
+        "EventName": "UNC_CHA_TOR_INSERTS.IA_MISS_RFO_PREF_REMOTE",
+        "PerPkg": "1",
+        "PublicDescription": "TOR Inserts : RFO_Prefs issued by iA Cores that Missed the LLC - HOMed remotely",
+        "UMask": "0xc8877e01",
+        "Unit": "CHA"
+    },
+    {
+        "BriefDescription": "Read for ownership from local IA that miss the cache",
+        "EventCode": "0x35",
+        "EventName": "UNC_CHA_TOR_INSERTS.IA_MISS_RFO_REMOTE",
+        "PerPkg": "1",
+        "PublicDescription": "TOR Inserts : RFOs issued by iA Cores that Missed the LLC - HOMed remotely",
+        "UMask": "0xc8077e01",
+        "Unit": "CHA"
+    },
+    {
+        "BriefDescription": "UCRDF requests from local IA cores that miss the cache",
+        "EventCode": "0x35",
+        "EventName": "UNC_CHA_TOR_INSERTS.IA_MISS_UCRDF",
+        "PerPkg": "1",
+        "PublicDescription": "TOR Inserts : UCRdFs issued by iA Cores that Missed LLC",
+        "UMask": "0xc877de01",
+        "Unit": "CHA"
+    },
+    {
+        "BriefDescription": "WCIL requests from a local IA core that miss the cache",
+        "EventCode": "0x35",
+        "EventName": "UNC_CHA_TOR_INSERTS.IA_MISS_WCIL",
+        "PerPkg": "1",
+        "PublicDescription": "TOR Inserts : WCiLs issued by iA Cores that Missed the LLC",
+        "UMask": "0xc86ffe01",
+        "Unit": "CHA"
+    },
+    {
+        "BriefDescription": "WCILF requests from local IA core that miss the cache",
+        "EventCode": "0x35",
+        "EventName": "UNC_CHA_TOR_INSERTS.IA_MISS_WCILF",
+        "PerPkg": "1",
+        "PublicDescription": "TOR Inserts : WCiLF issued by iA Cores that Missed the LLC",
+        "UMask": "0xc867fe01",
+        "Unit": "CHA"
+    },
+    {
+        "BriefDescription": "WCILF requests from local IA cores to DDR homed addresses which miss the cache",
+        "EventCode": "0x35",
+        "EventName": "UNC_CHA_TOR_INSERTS.IA_MISS_WCILF_DDR",
+        "PerPkg": "1",
+        "PublicDescription": "TOR Inserts : WCiLFs issued by iA Cores targeting DDR that missed the LLC",
+        "UMask": "0xc8678601",
+        "Unit": "CHA"
+    },
+    {
+        "BriefDescription": "WCILF requests from local IA cores to PMM homed addresses which miss the cache",
+        "EventCode": "0x35",
+        "EventName": "UNC_CHA_TOR_INSERTS.IA_MISS_WCILF_PMM",
+        "PerPkg": "1",
+        "PublicDescription": "TOR Inserts : WCiLFs issued by iA Cores targeting PMM that missed the LLC",
+        "UMask": "0xc8678a01",
+        "Unit": "CHA"
+    },
+    {
+        "BriefDescription": "WCIL requests from local IA cores to DDR homed addresses which miss the cache",
+        "EventCode": "0x35",
+        "EventName": "UNC_CHA_TOR_INSERTS.IA_MISS_WCIL_DDR",
+        "PerPkg": "1",
+        "PublicDescription": "TOR Inserts : WCiLs issued by iA Cores targeting DDR that missed the LLC",
+        "UMask": "0xc86f8601",
+        "Unit": "CHA"
+    },
+    {
+        "BriefDescription": "WCIL requests from a local IA core to PMM homed addresses that miss the cache",
+        "EventCode": "0x35",
+        "EventName": "UNC_CHA_TOR_INSERTS.IA_MISS_WCIL_PMM",
+        "PerPkg": "1",
+        "PublicDescription": "TOR Inserts : WCiLs issued by iA Cores targeting PMM that missed the LLC",
+        "UMask": "0xc86f8a01",
+        "Unit": "CHA"
+    },
+    {
+        "BriefDescription": "WIL requests from local IA cores that miss the cache",
+        "EventCode": "0x35",
+        "EventName": "UNC_CHA_TOR_INSERTS.IA_MISS_WIL",
+        "PerPkg": "1",
+        "PublicDescription": "TOR Inserts : WiLs issued by iA Cores that Missed LLC",
+        "UMask": "0xc87fde01",
+        "Unit": "CHA"
+    },
+    {
+        "BriefDescription": "Read for ownership from local IA that miss the cache",
+        "EventCode": "0x35",
+        "EventName": "UNC_CHA_TOR_INSERTS.IA_RFO",
+        "PerPkg": "1",
+        "PublicDescription": "TOR Inserts : RFOs issued by iA Cores",
+        "UMask": "0xc807ff01",
+        "Unit": "CHA"
+    },
+    {
+        "BriefDescription": "Read for ownership prefetch from local IA that miss the cache",
+        "EventCode": "0x35",
+        "EventName": "UNC_CHA_TOR_INSERTS.IA_RFO_PREF",
+        "PerPkg": "1",
+        "PublicDescription": "TOR Inserts : RFO_Prefs issued by iA Cores",
+        "UMask": "0xc887ff01",
+        "Unit": "CHA"
+    },
+    {
+        "BriefDescription": "SpecItoM events that are initiated from the Core",
+        "EventCode": "0x35",
+        "EventName": "UNC_CHA_TOR_INSERTS.IA_SPECITOM",
+        "PerPkg": "1",
+        "PublicDescription": "TOR Inserts : SpecItoMs issued by iA Cores",
+        "UMask": "0xcc57ff01",
+        "Unit": "CHA"
+    },
+    {
+        "BriefDescription": "WbEFtoEs issued by iA Cores.  (Non Modified Write Backs)",
+        "EventCode": "0x35",
+        "EventName": "UNC_CHA_TOR_INSERTS.IA_WBEFTOE",
+        "PerPkg": "1",
+        "PublicDescription": "TOR Inserts : ItoMs issued by IO Devices that Hit the LLC",
+        "UMask": "0xcc3fff01",
+        "Unit": "CHA"
+    },
+    {
+        "BriefDescription": "WbEFtoIs issued by iA Cores .  (Non Modified Write Backs)",
+        "EventCode": "0x35",
+        "EventName": "UNC_CHA_TOR_INSERTS.IA_WBEFTOI",
+        "PerPkg": "1",
+        "PublicDescription": "TOR Inserts : ItoMs issued by IO Devices that Hit the LLC",
+        "UMask": "0xcc37ff01",
+        "Unit": "CHA"
+    },
+    {
+        "BriefDescription": "WbMtoEs issued by iA Cores .  (Modified Write Backs)",
+        "EventCode": "0x35",
+        "EventName": "UNC_CHA_TOR_INSERTS.IA_WBMTOE",
+        "PerPkg": "1",
+        "PublicDescription": "TOR Inserts : ItoMs issued by IO Devices that Hit the LLC",
+        "UMask": "0xcc2fff01",
+        "Unit": "CHA"
+    },
+    {
+        "BriefDescription": "WbMtoI requests from local IA cores",
+        "EventCode": "0x35",
+        "EventName": "UNC_CHA_TOR_INSERTS.IA_WBMTOI",
+        "PerPkg": "1",
+        "PublicDescription": "TOR Inserts : WbMtoIs issued by iA Cores",
+        "UMask": "0xcc27ff01",
+        "Unit": "CHA"
+    },
+    {
+        "BriefDescription": "WbStoIs issued by iA Cores .  (Non Modified Write Backs)",
+        "EventCode": "0x35",
+        "EventName": "UNC_CHA_TOR_INSERTS.IA_WBSTOI",
+        "PerPkg": "1",
+        "PublicDescription": "TOR Inserts : ItoMs issued by IO Devices that Hit the LLC",
+        "UMask": "0xcc67ff01",
+        "Unit": "CHA"
+    },
+    {
+        "BriefDescription": "WCIL requests from a local IA core",
+        "EventCode": "0x35",
+        "EventName": "UNC_CHA_TOR_INSERTS.IA_WCIL",
+        "PerPkg": "1",
+        "PublicDescription": "TOR Inserts : WCiLs issued by iA Cores",
+        "UMask": "0xc86fff01",
+        "Unit": "CHA"
+    },
+    {
+        "BriefDescription": "WCILF requests from local IA core",
+        "EventCode": "0x35",
+        "EventName": "UNC_CHA_TOR_INSERTS.IA_WCILF",
+        "PerPkg": "1",
+        "PublicDescription": "TOR Inserts : WCiLF issued by iA Cores",
+        "UMask": "0xc867ff01",
+        "Unit": "CHA"
+    },
+    {
+        "BriefDescription": "All TOR inserts from local IO devices",
+        "EventCode": "0x35",
+        "EventName": "UNC_CHA_TOR_INSERTS.IO",
+        "PerPkg": "1",
+        "PublicDescription": "TOR Inserts : All requests from IO Devices",
+        "UMask": "0xc001ff04",
+        "Unit": "CHA"
+    },
+    {
+        "BriefDescription": "CLFlush requests from IO devices",
+        "EventCode": "0x35",
+        "EventName": "UNC_CHA_TOR_INSERTS.IO_CLFLUSH",
+        "PerPkg": "1",
+        "PublicDescription": "TOR Inserts : CLFlushes issued by IO Devices",
+        "UMask": "0xc8c3ff04",
+        "Unit": "CHA"
+    },
+    {
+        "BriefDescription": "All TOR inserts from local IO devices which hit the cache",
+        "EventCode": "0x35",
+        "EventName": "UNC_CHA_TOR_INSERTS.IO_HIT",
+        "PerPkg": "1",
+        "PublicDescription": "TOR Inserts : All requests from IO Devices that hit the LLC",
+        "UMask": "0xc001fd04",
+        "Unit": "CHA"
+    },
+    {
+        "BriefDescription": "ItoMs from local IO devices which hit the cache",
+        "EventCode": "0x35",
+        "EventName": "UNC_CHA_TOR_INSERTS.IO_HIT_ITOM",
+        "PerPkg": "1",
+        "PublicDescription": "TOR Inserts : ItoMs issued by IO Devices that Hit the LLC",
+        "UMask": "0xcc43fd04",
+        "Unit": "CHA"
+    },
+    {
+        "BriefDescription": "ItoMCacheNears, indicating a partial write request, from IO Devices that hit the LLC",
+        "EventCode": "0x35",
+        "EventName": "UNC_CHA_TOR_INSERTS.IO_HIT_ITOMCACHENEAR",
+        "PerPkg": "1",
+        "PublicDescription": "TOR Inserts : ItoMCacheNears, indicating a partial write request, from IO Devices that hit the LLC",
+        "UMask": "0xcd43fd04",
+        "Unit": "CHA"
+    },
+    {
+        "BriefDescription": "PCIRDCURs issued by IO devices which hit the LLC",
+        "EventCode": "0x35",
+        "EventName": "UNC_CHA_TOR_INSERTS.IO_HIT_PCIRDCUR",
+        "PerPkg": "1",
+        "PublicDescription": "TOR Inserts : PCIRdCurs issued by IO Devices that hit the LLC",
+        "UMask": "0xc8f3fd04",
+        "Unit": "CHA"
+    },
+    {
+        "BriefDescription": "RFOs from local IO devices which hit the cache",
+        "EventCode": "0x35",
+        "EventName": "UNC_CHA_TOR_INSERTS.IO_HIT_RFO",
+        "PerPkg": "1",
+        "PublicDescription": "TOR Inserts : RFOs issued by IO Devices that hit the LLC",
+        "UMask": "0xc803fd04",
+        "Unit": "CHA"
+    },
+    {
+        "BriefDescription": "All TOR ItoM inserts from local IO devices",
+        "EventCode": "0x35",
+        "EventName": "UNC_CHA_TOR_INSERTS.IO_ITOM",
+        "PerPkg": "1",
+        "PublicDescription": "TOR Inserts : ItoMs issued by IO Devices",
+        "UMask": "0xcc43ff04",
+        "Unit": "CHA"
+    },
+    {
+        "BriefDescription": "ItoMCacheNears, indicating a partial write request, from IO Devices",
+        "EventCode": "0x35",
+        "EventName": "UNC_CHA_TOR_INSERTS.IO_ITOMCACHENEAR",
+        "PerPkg": "1",
+        "PublicDescription": "TOR Inserts : ItoMCacheNears, indicating a partial write request, from IO Devices",
+        "UMask": "0xcd43ff04",
+        "Unit": "CHA"
+    },
+    {
+        "BriefDescription": "All TOR inserts from local IO devices which miss the cache",
+        "EventCode": "0x35",
+        "EventName": "UNC_CHA_TOR_INSERTS.IO_MISS",
+        "PerPkg": "1",
+        "PublicDescription": "TOR Inserts : All requests from IO Devices that missed the LLC",
+        "UMask": "0xc001fe04",
+        "Unit": "CHA"
+    },
+    {
+        "BriefDescription": "All TOR ItoM inserts from local IO devices which miss the cache",
+        "EventCode": "0x35",
+        "EventName": "UNC_CHA_TOR_INSERTS.IO_MISS_ITOM",
+        "PerPkg": "1",
+        "PublicDescription": "TOR Inserts : ItoMs issued by IO Devices that missed the LLC",
+        "UMask": "0xcc43fe04",
+        "Unit": "CHA"
+    },
+    {
+        "BriefDescription": "ItoMCacheNears, indicating a partial write request, from IO Devices that missed the LLC",
+        "EventCode": "0x35",
+        "EventName": "UNC_CHA_TOR_INSERTS.IO_MISS_ITOMCACHENEAR",
+        "PerPkg": "1",
+        "PublicDescription": "TOR Inserts : ItoMCacheNears, indicating a partial write request, from IO Devices that missed the LLC",
+        "UMask": "0xcd43fe04",
+        "Unit": "CHA"
+    },
+    {
+        "BriefDescription": "ItoMCacheNear transactions from an IO device on the local socket that miss the cache",
+        "EventCode": "0x35",
+        "EventName": "UNC_CHA_TOR_INSERTS.IO_MISS_ITOMCACHENEAR_LOCAL",
+        "PerPkg": "1",
+        "PublicDescription": "TOR Inserts : ItoMCacheNears, indicating a partial write request, from IO Devices that missed the LLC",
+        "UMask": "0xcd42fe04",
+        "Unit": "CHA"
+    },
+    {
+        "BriefDescription": "ItoMCacheNear transactions from an IO device on a remote socket that miss the cache",
+        "EventCode": "0x35",
+        "EventName": "UNC_CHA_TOR_INSERTS.IO_MISS_ITOMCACHENEAR_REMOTE",
+        "PerPkg": "1",
+        "PublicDescription": "TOR Inserts : ItoMCacheNears, indicating a partial write request, from IO Devices that missed the LLC",
+        "UMask": "0xcd437e04",
+        "Unit": "CHA"
+    },
+    {
+        "BriefDescription": "ItoM transactions from an IO device on the local socket that miss the cache",
+        "EventCode": "0x35",
+        "EventName": "UNC_CHA_TOR_INSERTS.IO_MISS_ITOM_LOCAL",
+        "PerPkg": "1",
+        "PublicDescription": "TOR Inserts : ItoMs issued by IO Devices that missed the LLC",
+        "UMask": "0xcc42fe04",
+        "Unit": "CHA"
+    },
+    {
+        "BriefDescription": "ItoM transactions from an IO device on a remote socket that miss the cache",
+        "EventCode": "0x35",
+        "EventName": "UNC_CHA_TOR_INSERTS.IO_MISS_ITOM_REMOTE",
+        "PerPkg": "1",
+        "PublicDescription": "TOR Inserts : ItoMs issued by IO Devices that missed the LLC",
+        "UMask": "0xcc437e04",
+        "Unit": "CHA"
+    },
+    {
+        "BriefDescription": "PCIRDCURs issued by IO devices which miss the LLC",
+        "EventCode": "0x35",
+        "EventName": "UNC_CHA_TOR_INSERTS.IO_MISS_PCIRDCUR",
+        "PerPkg": "1",
+        "PublicDescription": "TOR Inserts : PCIRdCurs issued by IO Devices that missed the LLC",
+        "UMask": "0xc8f3fe04",
+        "Unit": "CHA"
+    },
+    {
+        "BriefDescription": "All TOR RFO inserts from local IO devices which miss the cache",
+        "EventCode": "0x35",
+        "EventName": "UNC_CHA_TOR_INSERTS.IO_MISS_RFO",
+        "PerPkg": "1",
+        "PublicDescription": "TOR Inserts : RFOs issued by IO Devices that missed the LLC",
+        "UMask": "0xc803fe04",
+        "Unit": "CHA"
+    },
+    {
+        "BriefDescription": "PCIRDCURs issued by IO devices",
+        "EventCode": "0x35",
+        "EventName": "UNC_CHA_TOR_INSERTS.IO_PCIRDCUR",
+        "PerPkg": "1",
+        "PublicDescription": "TOR Inserts : PCIRdCurs issued by IO Devices",
+        "UMask": "0xc8f3ff04",
+        "Unit": "CHA"
+    },
+    {
+        "BriefDescription": "RFOs from local IO devices",
+        "EventCode": "0x35",
+        "EventName": "UNC_CHA_TOR_INSERTS.IO_RFO",
+        "PerPkg": "1",
+        "PublicDescription": "TOR Inserts : RFOs issued by IO Devices",
+        "UMask": "0xc803ff04",
+        "Unit": "CHA"
+    },
+    {
+        "BriefDescription": "WBMtoI requests from IO devices",
+        "EventCode": "0x35",
+        "EventName": "UNC_CHA_TOR_INSERTS.IO_WBMTOI",
+        "PerPkg": "1",
+        "PublicDescription": "TOR Inserts : WbMtoIs issued by IO Devices",
+        "UMask": "0xcc23ff04",
+        "Unit": "CHA"
+    },
+    {
+        "BriefDescription": "TOR Inserts for SF or LLC Evictions",
+        "EventCode": "0x35",
+        "EventName": "UNC_CHA_TOR_INSERTS.LLC_OR_SF_EVICTIONS",
+        "PerPkg": "1",
+        "PublicDescription": "TOR allocation occurred as a result of SF/LLC evictions (came from the ISMQ)",
+        "UMask": "0xc001ff02",
+        "Unit": "CHA"
+    },
+    {
+        "BriefDescription": "All locally initiated requests",
+        "EventCode": "0x35",
+        "EventName": "UNC_CHA_TOR_INSERTS.LOC_ALL",
+        "PerPkg": "1",
+        "PublicDescription": "TOR Inserts : All from Local iA and IO",
+        "UMask": "0xc000ff05",
+        "Unit": "CHA"
+    },
+    {
+        "BriefDescription": "All from Local iA",
+        "EventCode": "0x35",
+        "EventName": "UNC_CHA_TOR_INSERTS.LOC_IA",
+        "PerPkg": "1",
+        "PublicDescription": "TOR Inserts : All from Local iA",
+        "UMask": "0xc000ff01",
+        "Unit": "CHA"
+    },
+    {
+        "BriefDescription": "All from Local IO",
+        "EventCode": "0x35",
+        "EventName": "UNC_CHA_TOR_INSERTS.LOC_IO",
+        "PerPkg": "1",
+        "PublicDescription": "TOR Inserts : All from Local IO",
+        "UMask": "0xc000ff04",
+        "Unit": "CHA"
+    },
+    {
+        "BriefDescription": "All remote requests (e.g. snoops, writebacks) that came from remote sockets",
+        "EventCode": "0x35",
+        "EventName": "UNC_CHA_TOR_INSERTS.REM_ALL",
+        "PerPkg": "1",
+        "PublicDescription": "TOR Inserts : All Remote Requests",
+        "UMask": "0xc001ffc8",
+        "Unit": "CHA"
+    },
+    {
+        "BriefDescription": "All snoops to this LLC that came from remote sockets",
+        "EventCode": "0x35",
+        "EventName": "UNC_CHA_TOR_INSERTS.REM_SNPS",
+        "PerPkg": "1",
+        "PublicDescription": "TOR Inserts : All Snoops from Remote",
+        "UMask": "0xc001ff08",
+        "Unit": "CHA"
+    },
+    {
+        "BriefDescription": "Occupancy for all TOR entries",
+        "EventCode": "0x36",
+        "EventName": "UNC_CHA_TOR_OCCUPANCY.ALL",
+        "PerPkg": "1",
+        "PublicDescription": "TOR Occupancy : All",
+        "UMask": "0xc001ffff",
+        "Unit": "CHA"
+    },
+    {
+        "BriefDescription": "TOR Occupancy for CLFlush transactions from a CXL device which hit in the L3.",
+        "EventCode": "0x36",
+        "EventName": "UNC_CHA_TOR_OCCUPANCY.CXL_HIT_CLFLUSH",
+        "PerPkg": "1",
+        "UMask": "0x78c8c7fd20",
+        "Unit": "CHA"
+    },
+    {
+        "BriefDescription": "TOR Occupancy for FsRdCur transactions from a CXL device which hit in the L3.",
+        "EventCode": "0x36",
+        "EventName": "UNC_CHA_TOR_OCCUPANCY.CXL_HIT_FSRDCUR",
+        "PerPkg": "1",
+        "UMask": "0x78c8effd20",
+        "Unit": "CHA"
+    },
+    {
+        "BriefDescription": "TOR Occupancy for FsRdCurPtl transactions from a CXL device which hit in the L3.",
+        "EventCode": "0x36",
+        "EventName": "UNC_CHA_TOR_OCCUPANCY.CXL_HIT_FSRDCURPTL",
+        "PerPkg": "1",
+        "UMask": "0x78c9effd20",
+        "Unit": "CHA"
+    },
+    {
+        "BriefDescription": "TOR Occupancy for ItoM transactions from a CXL device which hit in the L3.",
+        "EventCode": "0x36",
+        "EventName": "UNC_CHA_TOR_OCCUPANCY.CXL_HIT_ITOM",
+        "PerPkg": "1",
+        "UMask": "0x78cc47fd20",
+        "Unit": "CHA"
+    },
+    {
+        "BriefDescription": "TOR Occupancy for ItoMWr transactions from a CXL device which hit in the L3.",
+        "EventCode": "0x36",
+        "EventName": "UNC_CHA_TOR_OCCUPANCY.CXL_HIT_ITOMWR",
+        "PerPkg": "1",
+        "UMask": "0x78cc4ffd20",
+        "Unit": "CHA"
+    },
+    {
+        "BriefDescription": "TOR Occupancy for MemPushWr transactions from a CXL device which hit in the L3.",
+        "EventCode": "0x36",
+        "EventName": "UNC_CHA_TOR_OCCUPANCY.CXL_HIT_MEMPUSHWR",
+        "PerPkg": "1",
+        "UMask": "0x78cc6ffd20",
+        "Unit": "CHA"
+    },
+    {
+        "BriefDescription": "TOR Occupancy for WCiL transactions from a CXL device which hit in the L3.",
+        "EventCode": "0x36",
+        "EventName": "UNC_CHA_TOR_OCCUPANCY.CXL_HIT_WCIL",
+        "PerPkg": "1",
+        "UMask": "0x78c86ffd20",
+        "Unit": "CHA"
+    },
+    {
+        "BriefDescription": "TOR Occupancy for WcilF transactions from a CXL device which hit in the L3.",
+        "EventCode": "0x36",
+        "EventName": "UNC_CHA_TOR_OCCUPANCY.CXL_HIT_WCILF",
+        "PerPkg": "1",
+        "UMask": "0x78c867fd20",
+        "Unit": "CHA"
+    },
+    {
+        "BriefDescription": "TOR Occupancy for WiL transactions from a CXL device which hit in the L3.",
+        "EventCode": "0x36",
+        "EventName": "UNC_CHA_TOR_OCCUPANCY.CXL_HIT_WIL",
+        "PerPkg": "1",
+        "UMask": "0x78c87ffd20",
+        "Unit": "CHA"
+    },
+    {
+        "BriefDescription": "TOR Occupancy for CLFlush transactions from a CXL device which miss the L3.",
+        "EventCode": "0x36",
+        "EventName": "UNC_CHA_TOR_OCCUPANCY.CXL_MISS_CLFLUSH",
+        "PerPkg": "1",
+        "UMask": "0x78c8c7fe20",
+        "Unit": "CHA"
+    },
+    {
+        "BriefDescription": "TOR Occupancy for FsRdCur transactions from a CXL device which miss the L3.",
+        "EventCode": "0x36",
+        "EventName": "UNC_CHA_TOR_OCCUPANCY.CXL_MISS_FSRDCUR",
+        "PerPkg": "1",
+        "UMask": "0x78c8effe20",
+        "Unit": "CHA"
+    },
+    {
+        "BriefDescription": "TOR Occupancy for FsRdCurPtl transactions from a CXL device which miss the L3.",
+        "EventCode": "0x36",
+        "EventName": "UNC_CHA_TOR_OCCUPANCY.CXL_MISS_FSRDCURPTL",
+        "PerPkg": "1",
+        "UMask": "0x78c9effe20",
+        "Unit": "CHA"
+    },
+    {
+        "BriefDescription": "TOR Occupancy for ItoM transactions from a CXL device which miss the L3.",
+        "EventCode": "0x36",
+        "EventName": "UNC_CHA_TOR_OCCUPANCY.CXL_MISS_ITOM",
+        "PerPkg": "1",
+        "UMask": "0x78cc47fe20",
+        "Unit": "CHA"
+    },
+    {
+        "BriefDescription": "TOR Occupancy for ItoMWr transactions from a CXL device which miss the L3.",
+        "EventCode": "0x36",
+        "EventName": "UNC_CHA_TOR_OCCUPANCY.CXL_MISS_ITOMWR",
+        "PerPkg": "1",
+        "UMask": "0x78cc4ffe20",
+        "Unit": "CHA"
+    },
+    {
+        "BriefDescription": "TOR Occupancy for MemPushWr transactions from a CXL device which miss the L3.",
+        "EventCode": "0x36",
+        "EventName": "UNC_CHA_TOR_OCCUPANCY.CXL_MISS_MEMPUSHWR",
+        "PerPkg": "1",
+        "UMask": "0x78cc6ffe20",
+        "Unit": "CHA"
+    },
+    {
+        "BriefDescription": "TOR Occupancy for WCiL transactions from a CXL device which miss the L3.",
+        "EventCode": "0x36",
+        "EventName": "UNC_CHA_TOR_OCCUPANCY.CXL_MISS_WCIL",
+        "PerPkg": "1",
+        "UMask": "0x78c86ffe20",
+        "Unit": "CHA"
+    },
+    {
+        "BriefDescription": "TOR Occupancy for WcilF transactions from a CXL device which miss the L3.",
+        "EventCode": "0x36",
+        "EventName": "UNC_CHA_TOR_OCCUPANCY.CXL_MISS_WCILF",
+        "PerPkg": "1",
+        "UMask": "0x78c867fe20",
+        "Unit": "CHA"
+    },
+    {
+        "BriefDescription": "TOR Occupancy for WiL transactions from a CXL device which miss the L3.",
+        "EventCode": "0x36",
+        "EventName": "UNC_CHA_TOR_OCCUPANCY.CXL_MISS_WIL",
+        "PerPkg": "1",
+        "UMask": "0x78c87ffe20",
+        "Unit": "CHA"
+    },
+    {
+        "BriefDescription": "TOR Occupancy for All locally initiated requests from IA Cores",
+        "EventCode": "0x36",
+        "EventName": "UNC_CHA_TOR_OCCUPANCY.IA",
+        "PerPkg": "1",
+        "PublicDescription": "TOR Occupancy : All requests from iA Cores",
+        "UMask": "0xc001ff01",
+        "Unit": "CHA"
+    },
+    {
+        "BriefDescription": "TOR Occupancy for CLFlush events that are initiated from the Core",
+        "EventCode": "0x36",
+        "EventName": "UNC_CHA_TOR_OCCUPANCY.IA_CLFLUSH",
+        "PerPkg": "1",
+        "PublicDescription": "TOR Occupancy : CLFlushes issued by iA Cores",
+        "UMask": "0xc8c7ff01",
+        "Unit": "CHA"
+    },
+    {
+        "BriefDescription": "TOR Occupancy for CLFlushOpt events that are initiated from the Core",
+        "EventCode": "0x36",
+        "EventName": "UNC_CHA_TOR_OCCUPANCY.IA_CLFLUSHOPT",
+        "PerPkg": "1",
+        "PublicDescription": "TOR Occupancy : CLFlushOpts issued by iA Cores",
+        "UMask": "0xc8d7ff01",
+        "Unit": "CHA"
+    },
+    {
+        "BriefDescription": "TOR Occupancy for Code read from local IA that miss the cache",
+        "EventCode": "0x36",
+        "EventName": "UNC_CHA_TOR_OCCUPANCY.IA_CRD",
+        "PerPkg": "1",
+        "PublicDescription": "TOR Occupancy : CRDs issued by iA Cores",
+        "UMask": "0xc80fff01",
+        "Unit": "CHA"
+    },
+    {
+        "BriefDescription": "TOR Occupancy for Code read prefetch from local IA that miss the cache",
+        "EventCode": "0x36",
+        "EventName": "UNC_CHA_TOR_OCCUPANCY.IA_CRD_PREF",
+        "PerPkg": "1",
+        "PublicDescription": "TOR Occupancy; Code read prefetch from local IA that misses in the snoop filter",
+        "UMask": "0xc88fff01",
+        "Unit": "CHA"
+    },
+    {
+        "BriefDescription": "TOR Occupancy for Data read opt from local IA that miss the cache",
+        "EventCode": "0x36",
+        "EventName": "UNC_CHA_TOR_OCCUPANCY.IA_DRD_OPT",
+        "PerPkg": "1",
+        "PublicDescription": "TOR Occupancy : DRd_Opts issued by iA Cores",
+        "UMask": "0xc827ff01",
+        "Unit": "CHA"
+    },
+    {
+        "BriefDescription": "TOR Occupancy for Data read opt prefetch from local IA that miss the cache",
+        "EventCode": "0x36",
+        "EventName": "UNC_CHA_TOR_OCCUPANCY.IA_DRD_OPT_PREF",
+        "PerPkg": "1",
+        "PublicDescription": "TOR Occupancy : DRd_Opt_Prefs issued by iA Cores",
+        "UMask": "0xc8a7ff01",
+        "Unit": "CHA"
+    },
+    {
+        "BriefDescription": "TOR Occupancy for All locally initiated requests from IA Cores which hit the cache",
+        "EventCode": "0x36",
+        "EventName": "UNC_CHA_TOR_OCCUPANCY.IA_HIT",
+        "PerPkg": "1",
+        "PublicDescription": "TOR Occupancy : All requests from iA Cores that Hit the LLC",
+        "UMask": "0xc001fd01",
+        "Unit": "CHA"
+    },
+    {
+        "BriefDescription": "TOR Occupancy for Code read from local IA that hit the cache",
+        "EventCode": "0x36",
+        "EventName": "UNC_CHA_TOR_OCCUPANCY.IA_HIT_CRD",
+        "PerPkg": "1",
+        "PublicDescription": "TOR Occupancy : CRds issued by iA Cores that Hit the LLC",
+        "UMask": "0xc80ffd01",
+        "Unit": "CHA"
+    },
+    {
+        "BriefDescription": "TOR Occupancy for Code read prefetch from local IA that hit the cache",
+        "EventCode": "0x36",
+        "EventName": "UNC_CHA_TOR_OCCUPANCY.IA_HIT_CRD_PREF",
+        "PerPkg": "1",
+        "PublicDescription": "TOR Occupancy : CRd_Prefs issued by iA Cores that hit the LLC",
+        "UMask": "0xc88ffd01",
+        "Unit": "CHA"
+    },
+    {
+        "BriefDescription": "TOR Occupancy for All requests issued from IA cores to CXL accelerator memory regions that hit the LLC.",
+        "EventCode": "0x36",
+        "EventName": "UNC_CHA_TOR_OCCUPANCY.IA_HIT_CXL_ACC",
+        "PerPkg": "1",
+        "UMask": "0x10c0018101",
+        "Unit": "CHA"
+    },
+    {
+        "BriefDescription": "TOR Occupancy for Data read opt from local IA that hit the cache",
+        "EventCode": "0x36",
+        "EventName": "UNC_CHA_TOR_OCCUPANCY.IA_HIT_DRD_OPT",
+        "PerPkg": "1",
+        "PublicDescription": "TOR Occupancy : DRd_Opts issued by iA Cores that hit the LLC",
+        "UMask": "0xc827fd01",
+        "Unit": "CHA"
+    },
+    {
+        "BriefDescription": "TOR Occupancy for Data read opt prefetch from local IA that hit the cache",
+        "EventCode": "0x36",
+        "EventName": "UNC_CHA_TOR_OCCUPANCY.IA_HIT_DRD_OPT_PREF",
+        "PerPkg": "1",
+        "PublicDescription": "TOR Occupancy : DRd_Opt_Prefs issued by iA Cores that hit the LLC",
+        "UMask": "0xc8a7fd01",
+        "Unit": "CHA"
+    },
+    {
+        "BriefDescription": "TOR Occupancy for ItoM requests from local IA cores that hit the cache",
+        "EventCode": "0x36",
+        "EventName": "UNC_CHA_TOR_OCCUPANCY.IA_HIT_ITOM",
+        "PerPkg": "1",
+        "PublicDescription": "TOR Occupancy : ItoMs issued by iA Cores that Hit LLC",
+        "UMask": "0xcc47fd01",
+        "Unit": "CHA"
+    },
+    {
+        "BriefDescription": "TOR Occupancy for Last level cache prefetch code read from local IA that hit the cache",
+        "EventCode": "0x36",
+        "EventName": "UNC_CHA_TOR_OCCUPANCY.IA_HIT_LLCPREFCODE",
+        "PerPkg": "1",
+        "PublicDescription": "TOR Occupancy : LLCPrefCode issued by iA Cores that hit the LLC",
+        "UMask": "0xcccffd01",
+        "Unit": "CHA"
+    },
+    {
+        "BriefDescription": "TOR Occupancy for Last level cache prefetch data read from local IA that hit the cache",
+        "EventCode": "0x36",
+        "EventName": "UNC_CHA_TOR_OCCUPANCY.IA_HIT_LLCPREFDATA",
+        "PerPkg": "1",
+        "PublicDescription": "TOR Occupancy : LLCPrefData issued by iA Cores that hit the LLC",
+        "UMask": "0xccd7fd01",
+        "Unit": "CHA"
+    },
+    {
+        "BriefDescription": "TOR Occupancy for Last level cache prefetch read for ownership from local IA that hit the cache",
+        "EventCode": "0x36",
+        "EventName": "UNC_CHA_TOR_OCCUPANCY.IA_HIT_LLCPREFRFO",
+        "PerPkg": "1",
+        "PublicDescription": "TOR Occupancy : LLCPrefRFO issued by iA Cores that hit the LLC",
+        "UMask": "0xccc7fd01",
+        "Unit": "CHA"
+    },
+    {
+        "BriefDescription": "TOR Occupancy for Read for ownership from local IA that hit the cache",
+        "EventCode": "0x36",
+        "EventName": "UNC_CHA_TOR_OCCUPANCY.IA_HIT_RFO",
+        "PerPkg": "1",
+        "PublicDescription": "TOR Occupancy : RFOs issued by iA Cores that Hit the LLC",
+        "UMask": "0xc807fd01",
+        "Unit": "CHA"
+    },
+    {
+        "BriefDescription": "TOR Occupancy for Read for ownership prefetch from local IA that hit the cache",
+        "EventCode": "0x36",
+        "EventName": "UNC_CHA_TOR_OCCUPANCY.IA_HIT_RFO_PREF",
+        "PerPkg": "1",
+        "PublicDescription": "TOR Occupancy : RFO_Prefs issued by iA Cores that Hit the LLC",
+        "UMask": "0xc887fd01",
+        "Unit": "CHA"
+    },
+    {
+        "BriefDescription": "TOR Occupancy for ItoM events that are initiated from the Core",
+        "EventCode": "0x36",
+        "EventName": "UNC_CHA_TOR_OCCUPANCY.IA_ITOM",
+        "PerPkg": "1",
+        "PublicDescription": "TOR Occupancy : ItoMs issued by iA Cores",
+        "UMask": "0xcc47ff01",
+        "Unit": "CHA"
+    },
+    {
+        "BriefDescription": "TOR Occupancy for ItoMCacheNear requests from local IA cores",
+        "EventCode": "0x36",
+        "EventName": "UNC_CHA_TOR_OCCUPANCY.IA_ITOMCACHENEAR",
+        "PerPkg": "1",
+        "PublicDescription": "TOR Occupancy : ItoMCacheNears issued by iA Cores",
+        "UMask": "0xcd47ff01",
+        "Unit": "CHA"
+    },
+    {
+        "BriefDescription": "TOR Occupancy for Last level cache prefetch code read from local IA.",
+        "EventCode": "0x36",
+        "EventName": "UNC_CHA_TOR_OCCUPANCY.IA_LLCPREFCODE",
+        "PerPkg": "1",
+        "PublicDescription": "TOR Occupancy : LLCPrefCode issued by iA Cores",
+        "UMask": "0xcccfff01",
+        "Unit": "CHA"
+    },
+    {
+        "BriefDescription": "TOR Occupancy for Last level cache prefetch data read from local IA.",
+        "EventCode": "0x36",
+        "EventName": "UNC_CHA_TOR_OCCUPANCY.IA_LLCPREFDATA",
+        "PerPkg": "1",
+        "PublicDescription": "TOR Occupancy : LLCPrefData issued by iA Cores",
+        "UMask": "0xccd7ff01",
+        "Unit": "CHA"
+    },
+    {
+        "BriefDescription": "TOR Occupancy for Last level cache prefetch read for ownership from local IA that miss the cache",
+        "EventCode": "0x36",
+        "EventName": "UNC_CHA_TOR_OCCUPANCY.IA_LLCPREFRFO",
+        "PerPkg": "1",
+        "PublicDescription": "TOR Occupancy : LLCPrefRFO issued by iA Cores",
+        "UMask": "0xccc7ff01",
+        "Unit": "CHA"
+    },
+    {
+        "BriefDescription": "TOR Occupancy for All locally initiated requests from IA Cores which miss the cache",
+        "EventCode": "0x36",
+        "EventName": "UNC_CHA_TOR_OCCUPANCY.IA_MISS",
+        "PerPkg": "1",
+        "PublicDescription": "TOR Occupancy : All requests from iA Cores that Missed the LLC",
+        "UMask": "0xc001fe01",
+        "Unit": "CHA"
+    },
+    {
+        "BriefDescription": "TOR Occupancy for Code read from local IA that miss the cache",
+        "EventCode": "0x36",
+        "EventName": "UNC_CHA_TOR_OCCUPANCY.IA_MISS_CRD",
+        "PerPkg": "1",
+        "PublicDescription": "TOR Occupancy : CRds issued by iA Cores that Missed the LLC",
+        "UMask": "0xc80ffe01",
+        "Unit": "CHA"
+    },
+    {
+        "BriefDescription": "TOR Occupancy for CRDs from local IA cores to locally homed memory",
+        "EventCode": "0x36",
+        "EventName": "UNC_CHA_TOR_OCCUPANCY.IA_MISS_CRD_LOCAL",
+        "PerPkg": "1",
+        "PublicDescription": "TOR Occupancy : CRd issued by iA Cores that Missed the LLC - HOMed locally",
+        "UMask": "0xc80efe01",
+        "Unit": "CHA"
+    },
+    {
+        "BriefDescription": "TOR Occupancy for Code read prefetch from local IA that miss the cache",
+        "EventCode": "0x36",
+        "EventName": "UNC_CHA_TOR_OCCUPANCY.IA_MISS_CRD_PREF",
+        "PerPkg": "1",
+        "PublicDescription": "TOR Occupancy : CRd_Prefs issued by iA Cores that Missed the LLC",
+        "UMask": "0xc88ffe01",
+        "Unit": "CHA"
+    },
+    {
+        "BriefDescription": "TOR Occupancy for CRD Prefetches from local IA cores to locally homed memory",
+        "EventCode": "0x36",
+        "EventName": "UNC_CHA_TOR_OCCUPANCY.IA_MISS_CRD_PREF_LOCAL",
+        "PerPkg": "1",
+        "PublicDescription": "TOR Occupancy : CRd_Prefs issued by iA Cores that Missed the LLC - HOMed locally",
+        "UMask": "0xc88efe01",
+        "Unit": "CHA"
+    },
+    {
+        "BriefDescription": "TOR Occupancy for CRD Prefetches from local IA cores to remotely homed memory",
+        "EventCode": "0x36",
+        "EventName": "UNC_CHA_TOR_OCCUPANCY.IA_MISS_CRD_PREF_REMOTE",
+        "PerPkg": "1",
+        "PublicDescription": "TOR Occupancy : CRd_Prefs issued by iA Cores that Missed the LLC - HOMed remotely",
+        "UMask": "0xc88f7e01",
+        "Unit": "CHA"
+    },
+    {
+        "BriefDescription": "TOR Occupancy for CRDs from local IA cores to remotely homed memory",
+        "EventCode": "0x36",
+        "EventName": "UNC_CHA_TOR_OCCUPANCY.IA_MISS_CRD_REMOTE",
+        "PerPkg": "1",
+        "PublicDescription": "TOR Occupancy : CRd issued by iA Cores that Missed the LLC - HOMed remotely",
+        "UMask": "0xc80f7e01",
+        "Unit": "CHA"
+    },
+    {
+        "BriefDescription": "TOR Occupancy for All requests issued from IA cores to CXL accelerator memory regions that miss the LLC.",
+        "EventCode": "0x36",
+        "EventName": "UNC_CHA_TOR_OCCUPANCY.IA_MISS_CXL_ACC",
+        "PerPkg": "1",
+        "UMask": "0x10c0018201",
+        "Unit": "CHA"
+    },
+    {
+        "BriefDescription": "TOR Occupancy for DRds and equivalent opcodes issued from an IA core which miss the L3 and target memory in a CXL type 2 memory expander card.",
+        "EventCode": "0x36",
+        "EventName": "UNC_CHA_TOR_OCCUPANCY.IA_MISS_DRD_CXL_ACC",
+        "PerPkg": "1",
+        "UMask": "0x10c8178201",
+        "Unit": "CHA"
+    },
+    {
+        "BriefDescription": "TOR Occupancy for Data read opt from local IA that miss the cache",
+        "EventCode": "0x36",
+        "EventName": "UNC_CHA_TOR_OCCUPANCY.IA_MISS_DRD_OPT",
+        "PerPkg": "1",
+        "PublicDescription": "TOR Occupancy : DRd_Opt issued by iA Cores that missed the LLC",
+        "UMask": "0xc827fe01",
+        "Unit": "CHA"
+    },
+    {
+        "BriefDescription": "TOR Occupancy for Data read opt prefetch from local IA that miss the cache",
+        "EventCode": "0x36",
+        "EventName": "UNC_CHA_TOR_OCCUPANCY.IA_MISS_DRD_OPT_PREF",
+        "PerPkg": "1",
+        "PublicDescription": "TOR Occupancy : DRd_Opt_Prefs issued by iA Cores that missed the LLC",
+        "UMask": "0xc8a7fe01",
+        "Unit": "CHA"
+    },
+    {
+        "BriefDescription": "TOR Occupancy for L2 data prefetches issued from an IA core which miss the L3 and target memory in a CXL type 2 accelerator.",
+        "EventCode": "0x36",
+        "EventName": "UNC_CHA_TOR_OCCUPANCY.IA_MISS_DRD_PREF_CXL_ACC",
+        "PerPkg": "1",
+        "UMask": "0x10c8978201",
+        "Unit": "CHA"
+    },
+    {
+        "BriefDescription": "TOR Occupancy for ItoM requests from local IA cores that miss the cache",
+        "EventCode": "0x36",
+        "EventName": "UNC_CHA_TOR_OCCUPANCY.IA_MISS_ITOM",
+        "PerPkg": "1",
+        "PublicDescription": "TOR Occupancy : ItoMs issued by iA Cores that Missed LLC",
+        "UMask": "0xcc47fe01",
+        "Unit": "CHA"
+    },
+    {
+        "BriefDescription": "TOR Occupancy for Last level cache prefetch code read from local IA that miss the cache",
+        "EventCode": "0x36",
+        "EventName": "UNC_CHA_TOR_OCCUPANCY.IA_MISS_LLCPREFCODE",
+        "PerPkg": "1",
+        "PublicDescription": "TOR Occupancy : LLCPrefCode issued by iA Cores that missed the LLC",
+        "UMask": "0xcccffe01",
+        "Unit": "CHA"
+    },
+    {
+        "BriefDescription": "TOR Occupancy for Last level cache prefetch data read from local IA that miss the cache",
+        "EventCode": "0x36",
+        "EventName": "UNC_CHA_TOR_OCCUPANCY.IA_MISS_LLCPREFDATA",
+        "PerPkg": "1",
+        "PublicDescription": "TOR Occupancy : LLCPrefData issued by iA Cores that missed the LLC",
+        "UMask": "0xccd7fe01",
+        "Unit": "CHA"
+    },
+    {
+        "BriefDescription": "TOR Occupancy for LLC data prefetches issued from an IA core which miss the L3 and target memory in a CXL type 2 accelerator.",
+        "EventCode": "0x36",
+        "EventName": "UNC_CHA_TOR_OCCUPANCY.IA_MISS_LLCPREFDATA_CXL_ACC",
+        "PerPkg": "1",
+        "UMask": "0x10ccd78201",
+        "Unit": "CHA"
+    },
+    {
+        "BriefDescription": "TOR Occupancy for Last level cache prefetch read for ownership from local IA that miss the cache",
+        "EventCode": "0x36",
+        "EventName": "UNC_CHA_TOR_OCCUPANCY.IA_MISS_LLCPREFRFO",
+        "PerPkg": "1",
+        "PublicDescription": "TOR Occupancy : LLCPrefRFO issued by iA Cores that missed the LLC",
+        "UMask": "0xccc7fe01",
+        "Unit": "CHA"
+    },
+    {
+        "BriefDescription": "TOR Occupancy for L2 RFO prefetches issued from an IA core which miss the L3 and target memory in a CXL type 2 accelerator.",
+        "EventCode": "0x36",
+        "EventName": "UNC_CHA_TOR_OCCUPANCY.IA_MISS_LLCPREFRFO_CXL_ACC",
+        "PerPkg": "1",
+        "UMask": "0x10c8878201",
+        "Unit": "CHA"
+    },
+    {
+        "BriefDescription": "TOR Occupancy for WCILF requests from local IA cores to locally homed DDR addresses that miss the cache",
+        "EventCode": "0x36",
+        "EventName": "UNC_CHA_TOR_OCCUPANCY.IA_MISS_LOCAL_WCILF_DDR",
+        "PerPkg": "1",
+        "PublicDescription": "TOR Occupancy : WCiLFs issued by iA Cores targeting DDR that missed the LLC - HOMed locally",
+        "UMask": "0xc8668601",
+        "Unit": "CHA"
+    },
+    {
+        "BriefDescription": "TOR Occupancy for WCILF requests from local IA cores to locally homed PMM addresses which miss the cache",
+        "EventCode": "0x36",
+        "EventName": "UNC_CHA_TOR_OCCUPANCY.IA_MISS_LOCAL_WCILF_PMM",
+        "PerPkg": "1",
+        "PublicDescription": "TOR Occupancy : WCiLFs issued by iA Cores targeting PMM that missed the LLC - HOMed locally",
+        "UMask": "0xc8668a01",
+        "Unit": "CHA"
+    },
+    {
+        "BriefDescription": "TOR Occupancy for WCIL requests from local IA cores to locally homed DDR addresses that miss the cache",
+        "EventCode": "0x36",
+        "EventName": "UNC_CHA_TOR_OCCUPANCY.IA_MISS_LOCAL_WCIL_DDR",
+        "PerPkg": "1",
+        "PublicDescription": "TOR Occupancy : WCiLs issued by iA Cores targeting DDR that missed the LLC - HOMed locally",
+        "UMask": "0xc86e8601",
+        "Unit": "CHA"
+    },
+    {
+        "BriefDescription": "TOR Occupancy for WCIL requests from local IA cores to locally homed PMM addresses which miss the cache",
+        "EventCode": "0x36",
+        "EventName": "UNC_CHA_TOR_OCCUPANCY.IA_MISS_LOCAL_WCIL_PMM",
+        "PerPkg": "1",
+        "PublicDescription": "TOR Occupancy : WCiLs issued by iA Cores targeting PMM that missed the LLC - HOMed locally",
+        "UMask": "0xc86e8a01",
+        "Unit": "CHA"
+    },
+    {
+        "BriefDescription": "TOR Occupancy for WCILF requests from local IA cores to remotely homed DDR addresses that miss the cache",
+        "EventCode": "0x36",
+        "EventName": "UNC_CHA_TOR_OCCUPANCY.IA_MISS_REMOTE_WCILF_DDR",
+        "PerPkg": "1",
+        "PublicDescription": "TOR Occupancy : WCiLFs issued by iA Cores targeting DDR that missed the LLC - HOMed remotely",
+        "UMask": "0xc8670601",
+        "Unit": "CHA"
+    },
+    {
+        "BriefDescription": "TOR Occupancy for WCILF requests from local IA cores to remotely homed PMM addresses which miss the cache",
+        "EventCode": "0x36",
+        "EventName": "UNC_CHA_TOR_OCCUPANCY.IA_MISS_REMOTE_WCILF_PMM",
+        "PerPkg": "1",
+        "PublicDescription": "TOR Occupancy : WCiLFs issued by iA Cores targeting PMM that missed the LLC - HOMed remotely",
+        "UMask": "0xc8670a01",
+        "Unit": "CHA"
+    },
+    {
+        "BriefDescription": "TOR Occupancy for WCIL requests from local IA cores to remotely homed DDR addresses that miss the cache",
+        "EventCode": "0x36",
+        "EventName": "UNC_CHA_TOR_OCCUPANCY.IA_MISS_REMOTE_WCIL_DDR",
+        "PerPkg": "1",
+        "PublicDescription": "TOR Occupancy : WCiLs issued by iA Cores targeting DDR that missed the LLC - HOMed remotely",
+        "UMask": "0xc86f0601",
+        "Unit": "CHA"
+    },
+    {
+        "BriefDescription": "TOR Occupancy for WCIL requests from local IA cores to remotely homed PMM addresses which miss the cache",
+        "EventCode": "0x36",
+        "EventName": "UNC_CHA_TOR_OCCUPANCY.IA_MISS_REMOTE_WCIL_PMM",
+        "PerPkg": "1",
+        "PublicDescription": "TOR Occupancy : WCiLs issued by iA Cores targeting PMM that missed the LLC - HOMed remotely",
+        "UMask": "0xc86f0a01",
+        "Unit": "CHA"
+    },
+    {
+        "BriefDescription": "TOR Occupancy for Read for ownership from local IA that miss the cache",
+        "EventCode": "0x36",
+        "EventName": "UNC_CHA_TOR_OCCUPANCY.IA_MISS_RFO",
+        "PerPkg": "1",
+        "PublicDescription": "TOR Occupancy : RFOs issued by iA Cores that Missed the LLC",
+        "UMask": "0xc807fe01",
+        "Unit": "CHA"
+    },
+    {
+        "BriefDescription": "TOR Occupancy for RFOs issued from an IA core which miss the L3 and target memory in a CXL type 2 accelerator.",
+        "EventCode": "0x36",
+        "EventName": "UNC_CHA_TOR_OCCUPANCY.IA_MISS_RFO_CXL_ACC",
+        "PerPkg": "1",
+        "UMask": "0x10c8078201",
+        "Unit": "CHA"
+    },
+    {
+        "BriefDescription": "TOR Occupancy for Read for ownership from local IA that miss the cache",
+        "EventCode": "0x36",
+        "EventName": "UNC_CHA_TOR_OCCUPANCY.IA_MISS_RFO_LOCAL",
+        "PerPkg": "1",
+        "PublicDescription": "TOR Occupancy : RFOs issued by iA Cores that Missed the LLC - HOMed locally",
+        "UMask": "0xc806fe01",
+        "Unit": "CHA"
+    },
+    {
+        "BriefDescription": "TOR Occupancy for Read for ownership prefetch from local IA that miss the cache",
+        "EventCode": "0x36",
+        "EventName": "UNC_CHA_TOR_OCCUPANCY.IA_MISS_RFO_PREF",
+        "PerPkg": "1",
+        "PublicDescription": "TOR Occupancy : RFO_Prefs issued by iA Cores that Missed the LLC",
+        "UMask": "0xc887fe01",
+        "Unit": "CHA"
+    },
+    {
+        "BriefDescription": "TOR Occupancy for LLC RFO prefetches issued from an IA core which miss the L3 and target memory in a CXL type 2 accelerator.",
+        "EventCode": "0x36",
+        "EventName": "UNC_CHA_TOR_OCCUPANCY.IA_MISS_RFO_PREF_CXL_ACC",
+        "PerPkg": "1",
+        "UMask": "0x10ccc78201",
+        "Unit": "CHA"
+    },
+    {
+        "BriefDescription": "TOR Occupancy for Read for ownership prefetch from local IA that miss the cache",
+        "EventCode": "0x36",
+        "EventName": "UNC_CHA_TOR_OCCUPANCY.IA_MISS_RFO_PREF_LOCAL",
+        "PerPkg": "1",
+        "PublicDescription": "TOR Occupancy : RFO_Prefs issued by iA Cores that Missed the LLC - HOMed locally",
+        "UMask": "0xc886fe01",
+        "Unit": "CHA"
+    },
+    {
+        "BriefDescription": "TOR Occupancy for Read for ownership prefetch from local IA that miss the cache",
+        "EventCode": "0x36",
+        "EventName": "UNC_CHA_TOR_OCCUPANCY.IA_MISS_RFO_PREF_REMOTE",
+        "PerPkg": "1",
+        "PublicDescription": "TOR Occupancy : RFO_Prefs issued by iA Cores that Missed the LLC - HOMed remotely",
+        "UMask": "0xc8877e01",
+        "Unit": "CHA"
+    },
+    {
+        "BriefDescription": "TOR Occupancy for Read for ownership from local IA that miss the cache",
+        "EventCode": "0x36",
+        "EventName": "UNC_CHA_TOR_OCCUPANCY.IA_MISS_RFO_REMOTE",
+        "PerPkg": "1",
+        "PublicDescription": "TOR Occupancy : RFOs issued by iA Cores that Missed the LLC - HOMed remotely",
+        "UMask": "0xc8077e01",
+        "Unit": "CHA"
+    },
+    {
+        "BriefDescription": "TOR Occupancy for UCRDF requests from local IA cores that miss the cache",
+        "EventCode": "0x36",
+        "EventName": "UNC_CHA_TOR_OCCUPANCY.IA_MISS_UCRDF",
+        "PerPkg": "1",
+        "PublicDescription": "TOR Occupancy : UCRdFs issued by iA Cores that Missed LLC",
+        "UMask": "0xc877de01",
+        "Unit": "CHA"
+    },
+    {
+        "BriefDescription": "TOR Occupancy for WCIL requests from a local IA core that miss the cache",
+        "EventCode": "0x36",
+        "EventName": "UNC_CHA_TOR_OCCUPANCY.IA_MISS_WCIL",
+        "PerPkg": "1",
+        "PublicDescription": "TOR Occupancy : WCiLs issued by iA Cores that Missed the LLC",
+        "UMask": "0xc86ffe01",
+        "Unit": "CHA"
+    },
+    {
+        "BriefDescription": "TOR Occupancy for WCILF requests from local IA core that miss the cache",
+        "EventCode": "0x36",
+        "EventName": "UNC_CHA_TOR_OCCUPANCY.IA_MISS_WCILF",
+        "PerPkg": "1",
+        "PublicDescription": "TOR Occupancy : WCiLF issued by iA Cores that Missed the LLC",
+        "UMask": "0xc867fe01",
+        "Unit": "CHA"
+    },
+    {
+        "BriefDescription": "TOR Occupancy for WCILF requests from local IA cores to DDR homed addresses which miss the cache",
+        "EventCode": "0x36",
+        "EventName": "UNC_CHA_TOR_OCCUPANCY.IA_MISS_WCILF_DDR",
+        "PerPkg": "1",
+        "PublicDescription": "TOR Occupancy : WCiLFs issued by iA Cores targeting DDR that missed the LLC",
+        "UMask": "0xc8678601",
+        "Unit": "CHA"
+    },
+    {
+        "BriefDescription": "TOR Occupancy for WCILF requests from local IA cores to PMM homed addresses which miss the cache",
+        "EventCode": "0x36",
+        "EventName": "UNC_CHA_TOR_OCCUPANCY.IA_MISS_WCILF_PMM",
+        "PerPkg": "1",
+        "PublicDescription": "TOR Occupancy : WCiLFs issued by iA Cores targeting PMM that missed the LLC",
+        "UMask": "0xc8678a01",
+        "Unit": "CHA"
+    },
+    {
+        "BriefDescription": "TOR Occupancy for WCIL requests from local IA cores to DDR homed addresses which miss the cache",
+        "EventCode": "0x36",
+        "EventName": "UNC_CHA_TOR_OCCUPANCY.IA_MISS_WCIL_DDR",
+        "PerPkg": "1",
+        "PublicDescription": "TOR Occupancy : WCiLs issued by iA Cores targeting DDR that missed the LLC",
+        "UMask": "0xc86f8601",
+        "Unit": "CHA"
+    },
+    {
+        "BriefDescription": "TOR Occupancy for WCIL requests from a local IA core to PMM homed addresses that miss the cache",
+        "EventCode": "0x36",
+        "EventName": "UNC_CHA_TOR_OCCUPANCY.IA_MISS_WCIL_PMM",
+        "PerPkg": "1",
+        "PublicDescription": "TOR Occupancy : WCiLs issued by iA Cores targeting PMM that missed the LLC",
+        "UMask": "0xc86f8a01",
+        "Unit": "CHA"
+    },
+    {
+        "BriefDescription": "TOR Occupancy for WIL requests from local IA cores that miss the cache",
+        "EventCode": "0x36",
+        "EventName": "UNC_CHA_TOR_OCCUPANCY.IA_MISS_WIL",
+        "PerPkg": "1",
+        "PublicDescription": "TOR Occupancy : WiLs issued by iA Cores that Missed LLC",
+        "UMask": "0xc87fde01",
+        "Unit": "CHA"
+    },
+    {
+        "BriefDescription": "TOR Occupancy for Read for ownership from local IA that miss the cache",
+        "EventCode": "0x36",
+        "EventName": "UNC_CHA_TOR_OCCUPANCY.IA_RFO",
+        "PerPkg": "1",
+        "PublicDescription": "TOR Occupancy : RFOs issued by iA Cores",
+        "UMask": "0xc807ff01",
+        "Unit": "CHA"
+    },
+    {
+        "BriefDescription": "TOR Occupancy for Read for ownership prefetch from local IA that miss the cache",
+        "EventCode": "0x36",
+        "EventName": "UNC_CHA_TOR_OCCUPANCY.IA_RFO_PREF",
+        "PerPkg": "1",
+        "PublicDescription": "TOR Occupancy : RFO_Prefs issued by iA Cores",
+        "UMask": "0xc887ff01",
+        "Unit": "CHA"
+    },
+    {
+        "BriefDescription": "TOR Occupancy for SpecItoM events that are initiated from the Core",
+        "EventCode": "0x36",
+        "EventName": "UNC_CHA_TOR_OCCUPANCY.IA_SPECITOM",
+        "PerPkg": "1",
+        "PublicDescription": "TOR Occupancy : SpecItoMs issued by iA Cores",
+        "UMask": "0xcc57ff01",
+        "Unit": "CHA"
+    },
+    {
+        "BriefDescription": "TOR Occupancy for WbMtoI requests from local IA cores",
+        "EventCode": "0x36",
+        "EventName": "UNC_CHA_TOR_OCCUPANCY.IA_WBMTOI",
+        "PerPkg": "1",
+        "PublicDescription": "TOR Occupancy : WbMtoIs issued by iA Cores",
+        "UMask": "0xcc27ff01",
+        "Unit": "CHA"
+    },
+    {
+        "BriefDescription": "TOR Occupancy for WCIL requests from a local IA core",
+        "EventCode": "0x36",
+        "EventName": "UNC_CHA_TOR_OCCUPANCY.IA_WCIL",
+        "PerPkg": "1",
+        "PublicDescription": "TOR Occupancy : WCiLs issued by iA Cores",
+        "UMask": "0xc86fff01",
+        "Unit": "CHA"
+    },
+    {
+        "BriefDescription": "TOR Occupancy for WCILF requests from local IA core",
+        "EventCode": "0x36",
+        "EventName": "UNC_CHA_TOR_OCCUPANCY.IA_WCILF",
+        "PerPkg": "1",
+        "PublicDescription": "TOR Occupancy : WCiLF issued by iA Cores",
+        "UMask": "0xc867ff01",
+        "Unit": "CHA"
+    },
+    {
+        "BriefDescription": "TOR Occupancy for All TOR inserts from local IO devices",
+        "EventCode": "0x36",
+        "EventName": "UNC_CHA_TOR_OCCUPANCY.IO",
+        "PerPkg": "1",
+        "PublicDescription": "TOR Occupancy : All requests from IO Devices",
+        "UMask": "0xc001ff04",
+        "Unit": "CHA"
+    },
+    {
+        "BriefDescription": "TOR Occupancy for CLFlush requests from IO devices",
+        "EventCode": "0x36",
+        "EventName": "UNC_CHA_TOR_OCCUPANCY.IO_CLFLUSH",
+        "PerPkg": "1",
+        "PublicDescription": "TOR Occupancy : CLFlushes issued by IO Devices",
+        "UMask": "0xc8c3ff04",
+        "Unit": "CHA"
+    },
+    {
+        "BriefDescription": "TOR Occupancy for All TOR inserts from local IO devices which hit the cache",
+        "EventCode": "0x36",
+        "EventName": "UNC_CHA_TOR_OCCUPANCY.IO_HIT",
+        "PerPkg": "1",
+        "PublicDescription": "TOR Occupancy : All requests from IO Devices that hit the LLC",
+        "UMask": "0xc001fd04",
+        "Unit": "CHA"
+    },
+    {
+        "BriefDescription": "TOR Occupancy for ItoMs from local IO devices which hit the cache",
+        "EventCode": "0x36",
+        "EventName": "UNC_CHA_TOR_OCCUPANCY.IO_HIT_ITOM",
+        "PerPkg": "1",
+        "PublicDescription": "TOR Occupancy : ItoMs issued by IO Devices that Hit the LLC",
+        "UMask": "0xcc43fd04",
+        "Unit": "CHA"
+    },
+    {
+        "BriefDescription": "TOR Occupancy for ItoMCacheNears, indicating a partial write request, from IO Devices that hit the LLC",
+        "EventCode": "0x36",
+        "EventName": "UNC_CHA_TOR_OCCUPANCY.IO_HIT_ITOMCACHENEAR",
+        "PerPkg": "1",
+        "PublicDescription": "TOR Occupancy : ItoMCacheNears, indicating a partial write request, from IO Devices that hit the LLC",
+        "UMask": "0xcd43fd04",
+        "Unit": "CHA"
+    },
+    {
+        "BriefDescription": "TOR Occupancy for PCIRDCURs issued by IO devices which hit the LLC",
+        "EventCode": "0x36",
+        "EventName": "UNC_CHA_TOR_OCCUPANCY.IO_HIT_PCIRDCUR",
+        "PerPkg": "1",
+        "PublicDescription": "TOR Occupancy : PCIRdCurs issued by IO Devices that hit the LLC",
+        "UMask": "0xc8f3fd04",
+        "Unit": "CHA"
+    },
+    {
+        "BriefDescription": "TOR Occupancy for RFOs from local IO devices which hit the cache",
+        "EventCode": "0x36",
+        "EventName": "UNC_CHA_TOR_OCCUPANCY.IO_HIT_RFO",
+        "PerPkg": "1",
+        "PublicDescription": "TOR Occupancy : RFOs issued by IO Devices that hit the LLC",
+        "UMask": "0xc803fd04",
+        "Unit": "CHA"
+    },
+    {
+        "BriefDescription": "TOR Occupancy for All TOR ItoM inserts from local IO devices",
+        "EventCode": "0x36",
+        "EventName": "UNC_CHA_TOR_OCCUPANCY.IO_ITOM",
+        "PerPkg": "1",
+        "PublicDescription": "TOR Occupancy : ItoMs issued by IO Devices",
+        "UMask": "0xcc43ff04",
+        "Unit": "CHA"
+    },
+    {
+        "BriefDescription": "TOR Occupancy for ItoMCacheNears, indicating a partial write request, from IO Devices",
+        "EventCode": "0x36",
+        "EventName": "UNC_CHA_TOR_OCCUPANCY.IO_ITOMCACHENEAR",
+        "PerPkg": "1",
+        "PublicDescription": "TOR Occupancy : ItoMCacheNears, indicating a partial write request, from IO Devices",
+        "UMask": "0xcd43ff04",
+        "Unit": "CHA"
+    },
+    {
+        "BriefDescription": "TOR Occupancy for All TOR inserts from local IO devices which miss the cache",
+        "EventCode": "0x36",
+        "EventName": "UNC_CHA_TOR_OCCUPANCY.IO_MISS",
+        "PerPkg": "1",
+        "PublicDescription": "TOR Occupancy : All requests from IO Devices that missed the LLC",
+        "UMask": "0xc001fe04",
+        "Unit": "CHA"
+    },
+    {
+        "BriefDescription": "TOR Occupancy for All TOR ItoM inserts from local IO devices which miss the cache",
+        "EventCode": "0x36",
+        "EventName": "UNC_CHA_TOR_OCCUPANCY.IO_MISS_ITOM",
+        "PerPkg": "1",
+        "PublicDescription": "TOR Occupancy : ItoMs issued by IO Devices that missed the LLC",
+        "UMask": "0xcc43fe04",
+        "Unit": "CHA"
+    },
+    {
+        "BriefDescription": "TOR Occupancy for ItoMCacheNears, indicating a partial write request, from IO Devices that missed the LLC",
+        "EventCode": "0x36",
+        "EventName": "UNC_CHA_TOR_OCCUPANCY.IO_MISS_ITOMCACHENEAR",
+        "PerPkg": "1",
+        "PublicDescription": "TOR Occupancy : ItoMCacheNears, indicating a partial write request, from IO Devices that missed the LLC",
+        "UMask": "0xcd43fe04",
+        "Unit": "CHA"
+    },
+    {
+        "BriefDescription": "TOR Occupancy for ItoMCacheNear transactions from an IO device on the local socket that miss the cache",
+        "EventCode": "0x36",
+        "EventName": "UNC_CHA_TOR_OCCUPANCY.IO_MISS_ITOMCACHENEAR_LOCAL",
+        "PerPkg": "1",
+        "PublicDescription": "TOR Occupancy : ItoMCacheNears, indicating a partial write request, from IO Devices that missed the LLC",
+        "UMask": "0xcd42fe04",
+        "Unit": "CHA"
+    },
+    {
+        "BriefDescription": "TOR Occupancy for ItoMCacheNear transactions from an IO device on a remote socket that miss the cache",
+        "EventCode": "0x36",
+        "EventName": "UNC_CHA_TOR_OCCUPANCY.IO_MISS_ITOMCACHENEAR_REMOTE",
+        "PerPkg": "1",
+        "PublicDescription": "TOR Occupancy : ItoMCacheNears, indicating a partial write request, from IO Devices that missed the LLC",
+        "UMask": "0xcd437e04",
+        "Unit": "CHA"
+    },
+    {
+        "BriefDescription": "TOR Occupancy for ItoM transactions from an IO device on the local socket that miss the cache",
+        "EventCode": "0x36",
+        "EventName": "UNC_CHA_TOR_OCCUPANCY.IO_MISS_ITOM_LOCAL",
+        "PerPkg": "1",
+        "PublicDescription": "TOR Occupancy : ItoMs issued by IO Devices that missed the LLC",
+        "UMask": "0xcc42fe04",
+        "Unit": "CHA"
+    },
+    {
+        "BriefDescription": "TOR Occupancy for ItoM transactions from an IO device on a remote socket that miss the cache",
+        "EventCode": "0x36",
+        "EventName": "UNC_CHA_TOR_OCCUPANCY.IO_MISS_ITOM_REMOTE",
+        "PerPkg": "1",
+        "PublicDescription": "TOR Occupancy : ItoMs issued by IO Devices that missed the LLC",
+        "UMask": "0xcc437e04",
+        "Unit": "CHA"
+    },
+    {
+        "BriefDescription": "TOR Occupancy for PCIRDCURs issued by IO devices which miss the LLC",
+        "EventCode": "0x36",
+        "EventName": "UNC_CHA_TOR_OCCUPANCY.IO_MISS_PCIRDCUR",
+        "PerPkg": "1",
+        "PublicDescription": "TOR Occupancy : PCIRdCurs issued by IO Devices that missed the LLC",
+        "UMask": "0xc8f3fe04",
+        "Unit": "CHA"
+    },
+    {
+        "BriefDescription": "TOR Occupancy for PCIRDCUR transactions from an IO device on the local socket that miss the cache",
+        "EventCode": "0x36",
+        "EventName": "UNC_CHA_TOR_OCCUPANCY.IO_MISS_PCIRDCUR_LOCAL",
+        "PerPkg": "1",
+        "PublicDescription": "TOR Occupancy : PCIRdCurs issued by IO Devices that missed the LLC",
+        "UMask": "0xc8f2fe04",
+        "Unit": "CHA"
+    },
+    {
+        "BriefDescription": "TOR Occupancy for PCIRDCUR transactions from an IO device on a remote socket that miss the cache",
+        "EventCode": "0x36",
+        "EventName": "UNC_CHA_TOR_OCCUPANCY.IO_MISS_PCIRDCUR_REMOTE",
+        "PerPkg": "1",
+        "PublicDescription": "TOR Occupancy : PCIRdCurs issued by IO Devices that missed the LLC",
+        "UMask": "0xc8f37e04",
+        "Unit": "CHA"
+    },
+    {
+        "BriefDescription": "TOR Occupancy for All TOR RFO inserts from local IO devices which miss the cache",
+        "EventCode": "0x36",
+        "EventName": "UNC_CHA_TOR_OCCUPANCY.IO_MISS_RFO",
+        "PerPkg": "1",
+        "PublicDescription": "TOR Occupancy : RFOs issued by IO Devices that missed the LLC",
+        "UMask": "0xc803fe04",
+        "Unit": "CHA"
+    },
+    {
+        "BriefDescription": "TOR Occupancy for PCIRDCURs issued by IO devices",
+        "EventCode": "0x36",
+        "EventName": "UNC_CHA_TOR_OCCUPANCY.IO_PCIRDCUR",
+        "PerPkg": "1",
+        "PublicDescription": "TOR Occupancy : PCIRdCurs issued by IO Devices",
+        "UMask": "0xc8f3ff04",
+        "Unit": "CHA"
+    },
+    {
+        "BriefDescription": "TOR Occupancy for RFOs from local IO devices",
+        "EventCode": "0x36",
+        "EventName": "UNC_CHA_TOR_OCCUPANCY.IO_RFO",
+        "PerPkg": "1",
+        "PublicDescription": "TOR Occupancy : RFOs issued by IO Devices",
+        "UMask": "0xc803ff04",
+        "Unit": "CHA"
+    },
+    {
+        "BriefDescription": "TOR Occupancy for WBMtoI requests from IO devices",
+        "EventCode": "0x36",
+        "EventName": "UNC_CHA_TOR_OCCUPANCY.IO_WBMTOI",
+        "PerPkg": "1",
+        "PublicDescription": "TOR Occupancy : WbMtoIs issued by IO Devices",
+        "UMask": "0xcc23ff04",
+        "Unit": "CHA"
+    },
+    {
+        "BriefDescription": "TOR Occupancy for All locally initiated requests",
+        "EventCode": "0x36",
+        "EventName": "UNC_CHA_TOR_OCCUPANCY.LOC_ALL",
+        "PerPkg": "1",
+        "PublicDescription": "TOR Occupancy : All from Local iA and IO",
+        "UMask": "0xc000ff05",
+        "Unit": "CHA"
+    },
+    {
+        "BriefDescription": "TOR Occupancy for All from Local iA",
+        "EventCode": "0x36",
+        "EventName": "UNC_CHA_TOR_OCCUPANCY.LOC_IA",
+        "PerPkg": "1",
+        "PublicDescription": "TOR Occupancy : All from Local iA",
+        "UMask": "0xc000ff01",
+        "Unit": "CHA"
+    },
+    {
+        "BriefDescription": "TOR Occupancy for All from Local IO",
+        "EventCode": "0x36",
+        "EventName": "UNC_CHA_TOR_OCCUPANCY.LOC_IO",
+        "PerPkg": "1",
+        "PublicDescription": "TOR Occupancy : All from Local IO",
+        "UMask": "0xc000ff04",
+        "Unit": "CHA"
+    },
+    {
+        "BriefDescription": "TOR Occupancy for All remote requests (e.g. snoops, writebacks) that came from remote sockets",
+        "EventCode": "0x36",
+        "EventName": "UNC_CHA_TOR_OCCUPANCY.REM_ALL",
+        "PerPkg": "1",
+        "PublicDescription": "TOR Occupancy : All Remote Requests",
+        "UMask": "0xc001ffc8",
+        "Unit": "CHA"
+    },
+    {
+        "BriefDescription": "TOR Occupancy for All snoops to this LLC that came from remote sockets",
+        "EventCode": "0x36",
+        "EventName": "UNC_CHA_TOR_OCCUPANCY.REM_SNPS",
+        "PerPkg": "1",
+        "PublicDescription": "TOR Occupancy : All Snoops from Remote",
+        "UMask": "0xc001ff08",
+        "Unit": "CHA"
+    }
+]
diff --git a/tools/perf/pmu-events/arch/x86/sierraforest/uncore-cxl.json b/tools/perf/pmu-events/arch/x86/sierraforest/uncore-cxl.json
new file mode 100644 (file)
index 0000000..dc676c7
--- /dev/null
@@ -0,0 +1,10 @@
+[
+    {
+        "BriefDescription": "B2CXL Clockticks",
+        "EventCode": "0x01",
+        "EventName": "UNC_B2CXL_CLOCKTICKS",
+        "PerPkg": "1",
+        "PortMask": "0x000",
+        "Unit": "B2CXL"
+    }
+]
diff --git a/tools/perf/pmu-events/arch/x86/sierraforest/uncore-interconnect.json b/tools/perf/pmu-events/arch/x86/sierraforest/uncore-interconnect.json
new file mode 100644 (file)
index 0000000..6932b2f
--- /dev/null
@@ -0,0 +1,1228 @@
+[
+    {
+        "BriefDescription": "Clockticks of the mesh to memory (B2CMI)",
+        "EventCode": "0x01",
+        "EventName": "UNC_B2CMI_CLOCKTICKS",
+        "PerPkg": "1",
+        "Unit": "B2CMI"
+    },
+    {
+        "BriefDescription": "Counts the number of time D2C was not honoured by egress due to directory state constraints",
+        "EventCode": "0x17",
+        "EventName": "UNC_B2CMI_DIRECT2CORE_NOT_TAKEN_DIRSTATE",
+        "PerPkg": "1",
+        "UMask": "0x1",
+        "Unit": "B2CMI"
+    },
+    {
+        "BriefDescription": "Counts the number of times B2CMI egress did D2C (direct to core)",
+        "EventCode": "0x16",
+        "EventName": "UNC_B2CMI_DIRECT2CORE_TAKEN",
+        "PerPkg": "1",
+        "UMask": "0x1",
+        "Unit": "B2CMI"
+    },
+    {
+        "BriefDescription": "Counts the number of times D2C wasn't honoured even though the incoming request had d2c set for non cisgress txn",
+        "EventCode": "0x18",
+        "EventName": "UNC_B2CMI_DIRECT2CORE_TXN_OVERRIDE",
+        "PerPkg": "1",
+        "UMask": "0x1",
+        "Unit": "B2CMI"
+    },
+    {
+        "BriefDescription": "Counts the number of d2k wasn't done due to credit constraints",
+        "EventCode": "0x1B",
+        "EventName": "UNC_B2CMI_DIRECT2UPI_NOT_TAKEN_CREDITS",
+        "PerPkg": "1",
+        "UMask": "0x1",
+        "Unit": "B2CMI"
+    },
+    {
+        "BriefDescription": "Direct to UPI Transactions - Ignored due to lack of credits : All : Counts the number of d2k wasn't done due to credit constraints",
+        "EventCode": "0x1B",
+        "EventName": "UNC_B2CMI_DIRECT2UPI_NOT_TAKEN_CREDITS.EGRESS",
+        "PerPkg": "1",
+        "UMask": "0x1",
+        "Unit": "B2CMI"
+    },
+    {
+        "BriefDescription": "Counts the number of time D2K was not honoured by egress due to directory state constraints",
+        "EventCode": "0x1A",
+        "EventName": "UNC_B2CMI_DIRECT2UPI_NOT_TAKEN_DIRSTATE",
+        "PerPkg": "1",
+        "UMask": "0x1",
+        "Unit": "B2CMI"
+    },
+    {
+        "BriefDescription": "Cycles when Direct2UPI was Disabled : Egress Ignored D2U : Counts the number of time D2K was not honoured by egress due to directory state constraints",
+        "EventCode": "0x1A",
+        "EventName": "UNC_B2CMI_DIRECT2UPI_NOT_TAKEN_DIRSTATE.EGRESS",
+        "PerPkg": "1",
+        "UMask": "0x1",
+        "Unit": "B2CMI"
+    },
+    {
+        "BriefDescription": "Counts the number of times egress did D2K (Direct to KTI)",
+        "EventCode": "0x19",
+        "EventName": "UNC_B2CMI_DIRECT2UPI_TAKEN",
+        "PerPkg": "1",
+        "UMask": "0x1",
+        "Unit": "B2CMI"
+    },
+    {
+        "BriefDescription": "Counts the number of times D2K wasn't honoured even though the incoming request had d2k set for non cisgress txn",
+        "EventCode": "0x1C",
+        "EventName": "UNC_B2CMI_DIRECT2UPI_TXN_OVERRIDE",
+        "PerPkg": "1",
+        "UMask": "0x1",
+        "Unit": "B2CMI"
+    },
+    {
+        "BriefDescription": "Directory Hit Clean",
+        "EventCode": "0x1D",
+        "EventName": "UNC_B2CMI_DIRECTORY_HIT.CLEAN",
+        "PerPkg": "1",
+        "UMask": "0x38",
+        "Unit": "B2CMI"
+    },
+    {
+        "BriefDescription": "Directory Hit : On NonDirty Line in A State",
+        "EventCode": "0x1D",
+        "EventName": "UNC_B2CMI_DIRECTORY_HIT.CLEAN_A",
+        "PerPkg": "1",
+        "UMask": "0x20",
+        "Unit": "B2CMI"
+    },
+    {
+        "BriefDescription": "Directory Hit : On NonDirty Line in I State",
+        "EventCode": "0x1D",
+        "EventName": "UNC_B2CMI_DIRECTORY_HIT.CLEAN_I",
+        "PerPkg": "1",
+        "UMask": "0x8",
+        "Unit": "B2CMI"
+    },
+    {
+        "BriefDescription": "Directory Hit : On NonDirty Line in S State",
+        "EventCode": "0x1D",
+        "EventName": "UNC_B2CMI_DIRECTORY_HIT.CLEAN_S",
+        "PerPkg": "1",
+        "UMask": "0x10",
+        "Unit": "B2CMI"
+    },
+    {
+        "BriefDescription": "Directory Hit Dirty (modified)",
+        "EventCode": "0x1D",
+        "EventName": "UNC_B2CMI_DIRECTORY_HIT.DIRTY",
+        "PerPkg": "1",
+        "UMask": "0x7",
+        "Unit": "B2CMI"
+    },
+    {
+        "BriefDescription": "Directory Hit : On Dirty Line in A State",
+        "EventCode": "0x1D",
+        "EventName": "UNC_B2CMI_DIRECTORY_HIT.DIRTY_A",
+        "PerPkg": "1",
+        "UMask": "0x4",
+        "Unit": "B2CMI"
+    },
+    {
+        "BriefDescription": "Directory Hit : On Dirty Line in I State",
+        "EventCode": "0x1D",
+        "EventName": "UNC_B2CMI_DIRECTORY_HIT.DIRTY_I",
+        "PerPkg": "1",
+        "UMask": "0x1",
+        "Unit": "B2CMI"
+    },
+    {
+        "BriefDescription": "Directory Hit : On Dirty Line in S State",
+        "EventCode": "0x1D",
+        "EventName": "UNC_B2CMI_DIRECTORY_HIT.DIRTY_S",
+        "PerPkg": "1",
+        "UMask": "0x2",
+        "Unit": "B2CMI"
+    },
+    {
+        "BriefDescription": "Counts the number of 1lm or 2lm hit read data returns to egress with any directory to non persistent memory",
+        "EventCode": "0x20",
+        "EventName": "UNC_B2CMI_DIRECTORY_LOOKUP.ANY",
+        "PerPkg": "1",
+        "UMask": "0x1",
+        "Unit": "B2CMI"
+    },
+    {
+        "BriefDescription": "Counts the number of 1lm or 2lm hit read data returns to egress with directory A to non persistent memory",
+        "EventCode": "0x20",
+        "EventName": "UNC_B2CMI_DIRECTORY_LOOKUP.STATE_A",
+        "PerPkg": "1",
+        "UMask": "0x8",
+        "Unit": "B2CMI"
+    },
+    {
+        "BriefDescription": "Counts the number of 1lm or 2lm hit read data returns to egress with directory I to non persistent memory",
+        "EventCode": "0x20",
+        "EventName": "UNC_B2CMI_DIRECTORY_LOOKUP.STATE_I",
+        "PerPkg": "1",
+        "UMask": "0x2",
+        "Unit": "B2CMI"
+    },
+    {
+        "BriefDescription": "Counts the number of 1lm or 2lm hit read data returns to egress with directory S to non persistent memory",
+        "EventCode": "0x20",
+        "EventName": "UNC_B2CMI_DIRECTORY_LOOKUP.STATE_S",
+        "PerPkg": "1",
+        "PublicDescription": "Counts the number of 1lm or 2lm hit read  data returns to egress with directory S to non persistent memory",
+        "UMask": "0x4",
+        "Unit": "B2CMI"
+    },
+    {
+        "BriefDescription": "Directory Miss Clean",
+        "EventCode": "0x1E",
+        "EventName": "UNC_B2CMI_DIRECTORY_MISS.CLEAN",
+        "PerPkg": "1",
+        "UMask": "0x38",
+        "Unit": "B2CMI"
+    },
+    {
+        "BriefDescription": "Directory Miss : On NonDirty Line in A State",
+        "EventCode": "0x1E",
+        "EventName": "UNC_B2CMI_DIRECTORY_MISS.CLEAN_A",
+        "PerPkg": "1",
+        "UMask": "0x20",
+        "Unit": "B2CMI"
+    },
+    {
+        "BriefDescription": "Directory Miss : On NonDirty Line in I State",
+        "EventCode": "0x1E",
+        "EventName": "UNC_B2CMI_DIRECTORY_MISS.CLEAN_I",
+        "PerPkg": "1",
+        "UMask": "0x8",
+        "Unit": "B2CMI"
+    },
+    {
+        "BriefDescription": "Directory Miss : On NonDirty Line in S State",
+        "EventCode": "0x1E",
+        "EventName": "UNC_B2CMI_DIRECTORY_MISS.CLEAN_S",
+        "PerPkg": "1",
+        "UMask": "0x10",
+        "Unit": "B2CMI"
+    },
+    {
+        "BriefDescription": "Directory Miss Dirty (modified)",
+        "EventCode": "0x1E",
+        "EventName": "UNC_B2CMI_DIRECTORY_MISS.DIRTY",
+        "PerPkg": "1",
+        "UMask": "0x7",
+        "Unit": "B2CMI"
+    },
+    {
+        "BriefDescription": "Directory Miss : On Dirty Line in A State",
+        "EventCode": "0x1E",
+        "EventName": "UNC_B2CMI_DIRECTORY_MISS.DIRTY_A",
+        "PerPkg": "1",
+        "UMask": "0x4",
+        "Unit": "B2CMI"
+    },
+    {
+        "BriefDescription": "Directory Miss : On Dirty Line in I State",
+        "EventCode": "0x1E",
+        "EventName": "UNC_B2CMI_DIRECTORY_MISS.DIRTY_I",
+        "PerPkg": "1",
+        "UMask": "0x1",
+        "Unit": "B2CMI"
+    },
+    {
+        "BriefDescription": "Directory Miss : On Dirty Line in S State",
+        "EventCode": "0x1E",
+        "EventName": "UNC_B2CMI_DIRECTORY_MISS.DIRTY_S",
+        "PerPkg": "1",
+        "UMask": "0x2",
+        "Unit": "B2CMI"
+    },
+    {
+        "BriefDescription": "Any A2I Transition",
+        "EventCode": "0x21",
+        "EventName": "UNC_B2CMI_DIRECTORY_UPDATE.A2I",
+        "PerPkg": "1",
+        "UMask": "0x320",
+        "Unit": "B2CMI"
+    },
+    {
+        "BriefDescription": "Any A2S Transition",
+        "EventCode": "0x21",
+        "EventName": "UNC_B2CMI_DIRECTORY_UPDATE.A2S",
+        "PerPkg": "1",
+        "UMask": "0x340",
+        "Unit": "B2CMI"
+    },
+    {
+        "BriefDescription": "Counts cisgress directory updates",
+        "EventCode": "0x21",
+        "EventName": "UNC_B2CMI_DIRECTORY_UPDATE.ANY",
+        "PerPkg": "1",
+        "UMask": "0x301",
+        "Unit": "B2CMI"
+    },
+    {
+        "BriefDescription": "Counts any 1lm or 2lm hit data return that would result in directory update to non persistent memory (DRAM)",
+        "EventCode": "0x21",
+        "EventName": "UNC_B2CMI_DIRECTORY_UPDATE.HIT_ANY",
+        "PerPkg": "1",
+        "UMask": "0x101",
+        "Unit": "B2CMI"
+    },
+    {
+        "BriefDescription": "Directory update in near memory to the A state",
+        "EventCode": "0x21",
+        "EventName": "UNC_B2CMI_DIRECTORY_UPDATE.HIT_X2A",
+        "PerPkg": "1",
+        "UMask": "0x114",
+        "Unit": "B2CMI"
+    },
+    {
+        "BriefDescription": "Directory update in near memory to the I state",
+        "EventCode": "0x21",
+        "EventName": "UNC_B2CMI_DIRECTORY_UPDATE.HIT_X2I",
+        "PerPkg": "1",
+        "UMask": "0x128",
+        "Unit": "B2CMI"
+    },
+    {
+        "BriefDescription": "Directory update in near memory to the S state",
+        "EventCode": "0x21",
+        "EventName": "UNC_B2CMI_DIRECTORY_UPDATE.HIT_X2S",
+        "PerPkg": "1",
+        "UMask": "0x142",
+        "Unit": "B2CMI"
+    },
+    {
+        "BriefDescription": "Any I2A Transition",
+        "EventCode": "0x21",
+        "EventName": "UNC_B2CMI_DIRECTORY_UPDATE.I2A",
+        "PerPkg": "1",
+        "UMask": "0x304",
+        "Unit": "B2CMI"
+    },
+    {
+        "BriefDescription": "Any I2S Transition",
+        "EventCode": "0x21",
+        "EventName": "UNC_B2CMI_DIRECTORY_UPDATE.I2S",
+        "PerPkg": "1",
+        "UMask": "0x302",
+        "Unit": "B2CMI"
+    },
+    {
+        "BriefDescription": "Directory update in far memory to the A state",
+        "EventCode": "0x21",
+        "EventName": "UNC_B2CMI_DIRECTORY_UPDATE.MISS_X2A",
+        "PerPkg": "1",
+        "UMask": "0x214",
+        "Unit": "B2CMI"
+    },
+    {
+        "BriefDescription": "Directory update in far memory to the I state",
+        "EventCode": "0x21",
+        "EventName": "UNC_B2CMI_DIRECTORY_UPDATE.MISS_X2I",
+        "PerPkg": "1",
+        "UMask": "0x228",
+        "Unit": "B2CMI"
+    },
+    {
+        "BriefDescription": "Directory update in far memory to the S state",
+        "EventCode": "0x21",
+        "EventName": "UNC_B2CMI_DIRECTORY_UPDATE.MISS_X2S",
+        "PerPkg": "1",
+        "UMask": "0x242",
+        "Unit": "B2CMI"
+    },
+    {
+        "BriefDescription": "Any S2A Transition",
+        "EventCode": "0x21",
+        "EventName": "UNC_B2CMI_DIRECTORY_UPDATE.S2A",
+        "PerPkg": "1",
+        "UMask": "0x310",
+        "Unit": "B2CMI"
+    },
+    {
+        "BriefDescription": "Any S2I Transition",
+        "EventCode": "0x21",
+        "EventName": "UNC_B2CMI_DIRECTORY_UPDATE.S2I",
+        "PerPkg": "1",
+        "UMask": "0x308",
+        "Unit": "B2CMI"
+    },
+    {
+        "BriefDescription": "Directory update to the A state",
+        "EventCode": "0x21",
+        "EventName": "UNC_B2CMI_DIRECTORY_UPDATE.X2A",
+        "PerPkg": "1",
+        "UMask": "0x314",
+        "Unit": "B2CMI"
+    },
+    {
+        "BriefDescription": "Directory update to the I state",
+        "EventCode": "0x21",
+        "EventName": "UNC_B2CMI_DIRECTORY_UPDATE.X2I",
+        "PerPkg": "1",
+        "UMask": "0x328",
+        "Unit": "B2CMI"
+    },
+    {
+        "BriefDescription": "Directory update to the S state",
+        "EventCode": "0x21",
+        "EventName": "UNC_B2CMI_DIRECTORY_UPDATE.X2S",
+        "PerPkg": "1",
+        "UMask": "0x342",
+        "Unit": "B2CMI"
+    },
+    {
+        "BriefDescription": "Counts any read",
+        "EventCode": "0x24",
+        "EventName": "UNC_B2CMI_IMC_READS.ALL",
+        "PerPkg": "1",
+        "UMask": "0x104",
+        "Unit": "B2CMI"
+    },
+    {
+        "BriefDescription": "Counts normal reads issue to CMI",
+        "EventCode": "0x24",
+        "EventName": "UNC_B2CMI_IMC_READS.NORMAL",
+        "PerPkg": "1",
+        "UMask": "0x101",
+        "Unit": "B2CMI"
+    },
+    {
+        "BriefDescription": "Count reads to NM region",
+        "EventCode": "0x24",
+        "EventName": "UNC_B2CMI_IMC_READS.TO_DDR_AS_CACHE",
+        "PerPkg": "1",
+        "UMask": "0x110",
+        "Unit": "B2CMI"
+    },
+    {
+        "BriefDescription": "Counts reads to 1lm non persistent memory regions",
+        "EventCode": "0x24",
+        "EventName": "UNC_B2CMI_IMC_READS.TO_DDR_AS_MEM",
+        "PerPkg": "1",
+        "UMask": "0x108",
+        "Unit": "B2CMI"
+    },
+    {
+        "BriefDescription": "All Writes - All Channels",
+        "EventCode": "0x25",
+        "EventName": "UNC_B2CMI_IMC_WRITES.ALL",
+        "PerPkg": "1",
+        "UMask": "0x110",
+        "Unit": "B2CMI"
+    },
+    {
+        "BriefDescription": "Full Non-ISOCH - All Channels",
+        "EventCode": "0x25",
+        "EventName": "UNC_B2CMI_IMC_WRITES.FULL",
+        "PerPkg": "1",
+        "UMask": "0x101",
+        "Unit": "B2CMI"
+    },
+    {
+        "BriefDescription": "Non-Inclusive - All Channels",
+        "EventCode": "0x25",
+        "EventName": "UNC_B2CMI_IMC_WRITES.NI",
+        "PerPkg": "1",
+        "Unit": "B2CMI"
+    },
+    {
+        "BriefDescription": "Non-Inclusive Miss - All Channels",
+        "EventCode": "0x25",
+        "EventName": "UNC_B2CMI_IMC_WRITES.NI_MISS",
+        "PerPkg": "1",
+        "Unit": "B2CMI"
+    },
+    {
+        "BriefDescription": "Partial Non-ISOCH - All Channels",
+        "EventCode": "0x25",
+        "EventName": "UNC_B2CMI_IMC_WRITES.PARTIAL",
+        "PerPkg": "1",
+        "UMask": "0x102",
+        "Unit": "B2CMI"
+    },
+    {
+        "BriefDescription": "DDR, acting as Cache - All Channels",
+        "EventCode": "0x25",
+        "EventName": "UNC_B2CMI_IMC_WRITES.TO_DDR_AS_CACHE",
+        "PerPkg": "1",
+        "UMask": "0x140",
+        "Unit": "B2CMI"
+    },
+    {
+        "BriefDescription": "DDR - All Channels",
+        "EventCode": "0x25",
+        "EventName": "UNC_B2CMI_IMC_WRITES.TO_DDR_AS_MEM",
+        "PerPkg": "1",
+        "UMask": "0x120",
+        "Unit": "B2CMI"
+    },
+    {
+        "BriefDescription": "Prefetch CAM Inserts : UPI - Ch 0",
+        "EventCode": "0x56",
+        "EventName": "UNC_B2CMI_PREFCAM_INSERTS.CH0_UPI",
+        "PerPkg": "1",
+        "UMask": "0x2",
+        "Unit": "B2CMI"
+    },
+    {
+        "BriefDescription": "Prefetch CAM Inserts : XPT - Ch 0",
+        "EventCode": "0x56",
+        "EventName": "UNC_B2CMI_PREFCAM_INSERTS.CH0_XPT",
+        "PerPkg": "1",
+        "UMask": "0x1",
+        "Unit": "B2CMI"
+    },
+    {
+        "BriefDescription": "Prefetch CAM Inserts : UPI - All Channels",
+        "EventCode": "0x56",
+        "EventName": "UNC_B2CMI_PREFCAM_INSERTS.UPI_ALLCH",
+        "PerPkg": "1",
+        "UMask": "0x2",
+        "Unit": "B2CMI"
+    },
+    {
+        "BriefDescription": "Prefetch CAM Inserts : XPT -All Channels",
+        "EventCode": "0x56",
+        "EventName": "UNC_B2CMI_PREFCAM_INSERTS.XPT_ALLCH",
+        "PerPkg": "1",
+        "PublicDescription": "Prefetch CAM Inserts : XPT - All Channels",
+        "UMask": "0x1",
+        "Unit": "B2CMI"
+    },
+    {
+        "BriefDescription": "Prefetch CAM Occupancy : Channel 0",
+        "EventCode": "0x54",
+        "EventName": "UNC_B2CMI_PREFCAM_OCCUPANCY.CH0",
+        "PerPkg": "1",
+        "UMask": "0x1",
+        "Unit": "B2CMI"
+    },
+    {
+        "BriefDescription": "Counts the 2lm reads and WRNI which were a hit",
+        "EventCode": "0x1F",
+        "EventName": "UNC_B2CMI_TAG_HIT.ALL",
+        "PerPkg": "1",
+        "UMask": "0xf",
+        "Unit": "B2CMI"
+    },
+    {
+        "BriefDescription": "Counts the 2lm reads which were a hit clean",
+        "EventCode": "0x1F",
+        "EventName": "UNC_B2CMI_TAG_HIT.RD_CLEAN",
+        "PerPkg": "1",
+        "UMask": "0x1",
+        "Unit": "B2CMI"
+    },
+    {
+        "BriefDescription": "Counts the 2lm reads which were a hit dirty",
+        "EventCode": "0x1F",
+        "EventName": "UNC_B2CMI_TAG_HIT.RD_DIRTY",
+        "PerPkg": "1",
+        "UMask": "0x2",
+        "Unit": "B2CMI"
+    },
+    {
+        "BriefDescription": "Counts the 2lm WRNI which were a hit clean",
+        "EventCode": "0x1F",
+        "EventName": "UNC_B2CMI_TAG_HIT.WR_CLEAN",
+        "PerPkg": "1",
+        "UMask": "0x4",
+        "Unit": "B2CMI"
+    },
+    {
+        "BriefDescription": "Counts the 2lm WRNI which were a hit dirty",
+        "EventCode": "0x1F",
+        "EventName": "UNC_B2CMI_TAG_HIT.WR_DIRTY",
+        "PerPkg": "1",
+        "UMask": "0x8",
+        "Unit": "B2CMI"
+    },
+    {
+        "BriefDescription": "Counts the 2lm second way read miss for a WrNI",
+        "EventCode": "0x4B",
+        "EventName": "UNC_B2CMI_TAG_MISS.CLEAN",
+        "PerPkg": "1",
+        "UMask": "0x5",
+        "Unit": "B2CMI"
+    },
+    {
+        "BriefDescription": "Counts the 2lm second way read miss for a WrNI",
+        "EventCode": "0x4B",
+        "EventName": "UNC_B2CMI_TAG_MISS.DIRTY",
+        "PerPkg": "1",
+        "UMask": "0xa",
+        "Unit": "B2CMI"
+    },
+    {
+        "BriefDescription": "Counts the 2lm second way read miss for a Rd",
+        "EventCode": "0x4B",
+        "EventName": "UNC_B2CMI_TAG_MISS.RD_2WAY",
+        "PerPkg": "1",
+        "UMask": "0x10",
+        "Unit": "B2CMI"
+    },
+    {
+        "BriefDescription": "Counts the 2lm reads which were a miss and the cache line is unmodified",
+        "EventCode": "0x4B",
+        "EventName": "UNC_B2CMI_TAG_MISS.RD_CLEAN",
+        "PerPkg": "1",
+        "UMask": "0x1",
+        "Unit": "B2CMI"
+    },
+    {
+        "BriefDescription": "Counts the 2lm reads which were a miss and the cache line is modified",
+        "EventCode": "0x4B",
+        "EventName": "UNC_B2CMI_TAG_MISS.RD_DIRTY",
+        "PerPkg": "1",
+        "UMask": "0x2",
+        "Unit": "B2CMI"
+    },
+    {
+        "BriefDescription": "Counts the 2lm second way read miss for a WrNI",
+        "EventCode": "0x4B",
+        "EventName": "UNC_B2CMI_TAG_MISS.WR_2WAY",
+        "PerPkg": "1",
+        "UMask": "0x20",
+        "Unit": "B2CMI"
+    },
+    {
+        "BriefDescription": "Counts the 2lm WRNI which were a miss and the cache line is unmodified",
+        "EventCode": "0x4B",
+        "EventName": "UNC_B2CMI_TAG_MISS.WR_CLEAN",
+        "PerPkg": "1",
+        "UMask": "0x4",
+        "Unit": "B2CMI"
+    },
+    {
+        "BriefDescription": "Counts the 2lm WRNI which were a miss and the cache line is modified",
+        "EventCode": "0x4B",
+        "EventName": "UNC_B2CMI_TAG_MISS.WR_DIRTY",
+        "PerPkg": "1",
+        "UMask": "0x8",
+        "Unit": "B2CMI"
+    },
+    {
+        "BriefDescription": "Tracker Inserts : Channel 0",
+        "EventCode": "0x32",
+        "EventName": "UNC_B2CMI_TRACKER_INSERTS.CH0",
+        "PerPkg": "1",
+        "UMask": "0x104",
+        "Unit": "B2CMI"
+    },
+    {
+        "BriefDescription": "Tracker Occupancy : Channel 0",
+        "EventCode": "0x33",
+        "EventName": "UNC_B2CMI_TRACKER_OCCUPANCY.CH0",
+        "PerPkg": "1",
+        "UMask": "0x1",
+        "Unit": "B2CMI"
+    },
+    {
+        "BriefDescription": "Write Tracker Inserts : Channel 0",
+        "EventCode": "0x40",
+        "EventName": "UNC_B2CMI_WR_TRACKER_INSERTS.CH0",
+        "PerPkg": "1",
+        "UMask": "0x1",
+        "Unit": "B2CMI"
+    },
+    {
+        "BriefDescription": "UNC_B2HOT_CLOCKTICKS",
+        "EventCode": "0x01",
+        "EventName": "UNC_B2HOT_CLOCKTICKS",
+        "PerPkg": "1",
+        "UMask": "0x1",
+        "Unit": "B2HOT"
+    },
+    {
+        "BriefDescription": "Number of uclks in domain",
+        "EventCode": "0x01",
+        "EventName": "UNC_B2UPI_CLOCKTICKS",
+        "PerPkg": "1",
+        "Unit": "B2UPI"
+    },
+    {
+        "BriefDescription": "Total Write Cache Occupancy : Mem",
+        "EventCode": "0x0F",
+        "EventName": "UNC_I_CACHE_TOTAL_OCCUPANCY.MEM",
+        "PerPkg": "1",
+        "UMask": "0x4",
+        "Unit": "IRP"
+    },
+    {
+        "BriefDescription": "IRP Clockticks",
+        "EventCode": "0x01",
+        "EventName": "UNC_I_CLOCKTICKS",
+        "PerPkg": "1",
+        "Unit": "IRP"
+    },
+    {
+        "BriefDescription": "Inbound read requests received by the IRP and inserted into the FAF queue",
+        "EventCode": "0x18",
+        "EventName": "UNC_I_FAF_INSERTS",
+        "PerPkg": "1",
+        "Unit": "IRP"
+    },
+    {
+        "BriefDescription": "FAF occupancy",
+        "EventCode": "0x19",
+        "EventName": "UNC_I_FAF_OCCUPANCY",
+        "PerPkg": "1",
+        "Unit": "IRP"
+    },
+    {
+        "BriefDescription": "Misc Events - Set 1 : Lost Forward : Snoop pulled away ownership before a write was committed",
+        "EventCode": "0x1F",
+        "EventName": "UNC_I_MISC1.LOST_FWD",
+        "PerPkg": "1",
+        "UMask": "0x10",
+        "Unit": "IRP"
+    },
+    {
+        "BriefDescription": "Inbound write (fast path) requests to coherent memory, received by the IRP resulting in write ownership requests issued by IRP to the mesh.",
+        "EventCode": "0x11",
+        "EventName": "UNC_I_TRANSACTIONS.WR_PREF",
+        "PerPkg": "1",
+        "UMask": "0x8",
+        "Unit": "IRP"
+    },
+    {
+        "BriefDescription": "MDF Clockticks",
+        "EventCode": "0x01",
+        "EventName": "UNC_MDF_CLOCKTICKS",
+        "PerPkg": "1",
+        "Unit": "MDF"
+    },
+    {
+        "BriefDescription": "Number of UPI LL clock cycles while the event is enabled",
+        "EventCode": "0x01",
+        "EventName": "UNC_UPI_CLOCKTICKS",
+        "PerPkg": "1",
+        "PublicDescription": "Number of kfclks",
+        "Unit": "UPI"
+    },
+    {
+        "BriefDescription": "Cycles in L1 : Number of UPI qfclk cycles spent in L1 power mode.  L1 is a mode that totally shuts down a UPI link.  Use edge detect to count the number of instances when the UPI link entered L1.  Link power states are per link and per direction, so for example the Tx direction could be in one state while Rx was in another. Because L1 totally shuts down the link, it takes a good amount of time to exit this mode.",
+        "EventCode": "0x21",
+        "EventName": "UNC_UPI_L1_POWER_CYCLES",
+        "PerPkg": "1",
+        "Unit": "UPI"
+    },
+    {
+        "BriefDescription": "Matches on Receive path of a UPI Port : Non-Coherent Bypass",
+        "EventCode": "0x05",
+        "EventName": "UNC_UPI_RxL_BASIC_HDR_MATCH.NCB",
+        "PerPkg": "1",
+        "UMask": "0xe",
+        "Unit": "UPI"
+    },
+    {
+        "BriefDescription": "Matches on Receive path of a UPI Port : Non-Coherent Bypass, Match Opcode",
+        "EventCode": "0x05",
+        "EventName": "UNC_UPI_RxL_BASIC_HDR_MATCH.NCB_OPC",
+        "PerPkg": "1",
+        "UMask": "0x10e",
+        "Unit": "UPI"
+    },
+    {
+        "BriefDescription": "Matches on Receive path of a UPI Port : Non-Coherent Standard",
+        "EventCode": "0x05",
+        "EventName": "UNC_UPI_RxL_BASIC_HDR_MATCH.NCS",
+        "PerPkg": "1",
+        "UMask": "0xf",
+        "Unit": "UPI"
+    },
+    {
+        "BriefDescription": "Matches on Receive path of a UPI Port : Non-Coherent Standard, Match Opcode",
+        "EventCode": "0x05",
+        "EventName": "UNC_UPI_RxL_BASIC_HDR_MATCH.NCS_OPC",
+        "PerPkg": "1",
+        "UMask": "0x10f",
+        "Unit": "UPI"
+    },
+    {
+        "BriefDescription": "Matches on Receive path of a UPI Port : Request",
+        "EventCode": "0x05",
+        "EventName": "UNC_UPI_RxL_BASIC_HDR_MATCH.REQ",
+        "PerPkg": "1",
+        "UMask": "0x8",
+        "Unit": "UPI"
+    },
+    {
+        "BriefDescription": "Matches on Receive path of a UPI Port : Request, Match Opcode",
+        "EventCode": "0x05",
+        "EventName": "UNC_UPI_RxL_BASIC_HDR_MATCH.REQ_OPC",
+        "PerPkg": "1",
+        "UMask": "0x108",
+        "Unit": "UPI"
+    },
+    {
+        "BriefDescription": "Matches on Receive path of a UPI Port : Response - Conflict",
+        "EventCode": "0x05",
+        "EventName": "UNC_UPI_RxL_BASIC_HDR_MATCH.RSPCNFLT",
+        "PerPkg": "1",
+        "UMask": "0x1aa",
+        "Unit": "UPI"
+    },
+    {
+        "BriefDescription": "Matches on Receive path of a UPI Port : Response - Invalid",
+        "EventCode": "0x05",
+        "EventName": "UNC_UPI_RxL_BASIC_HDR_MATCH.RSPI",
+        "PerPkg": "1",
+        "UMask": "0x12a",
+        "Unit": "UPI"
+    },
+    {
+        "BriefDescription": "Matches on Receive path of a UPI Port : Response - Data",
+        "EventCode": "0x05",
+        "EventName": "UNC_UPI_RxL_BASIC_HDR_MATCH.RSP_DATA",
+        "PerPkg": "1",
+        "UMask": "0xc",
+        "Unit": "UPI"
+    },
+    {
+        "BriefDescription": "Matches on Receive path of a UPI Port : Response - Data, Match Opcode",
+        "EventCode": "0x05",
+        "EventName": "UNC_UPI_RxL_BASIC_HDR_MATCH.RSP_DATA_OPC",
+        "PerPkg": "1",
+        "UMask": "0x10c",
+        "Unit": "UPI"
+    },
+    {
+        "BriefDescription": "Matches on Receive path of a UPI Port : Response - No Data",
+        "EventCode": "0x05",
+        "EventName": "UNC_UPI_RxL_BASIC_HDR_MATCH.RSP_NODATA",
+        "PerPkg": "1",
+        "UMask": "0xa",
+        "Unit": "UPI"
+    },
+    {
+        "BriefDescription": "Matches on Receive path of a UPI Port : Response - No Data, Match Opcode",
+        "EventCode": "0x05",
+        "EventName": "UNC_UPI_RxL_BASIC_HDR_MATCH.RSP_NODATA_OPC",
+        "PerPkg": "1",
+        "UMask": "0x10a",
+        "Unit": "UPI"
+    },
+    {
+        "BriefDescription": "Matches on Receive path of a UPI Port : Snoop",
+        "EventCode": "0x05",
+        "EventName": "UNC_UPI_RxL_BASIC_HDR_MATCH.SNP",
+        "PerPkg": "1",
+        "UMask": "0x9",
+        "Unit": "UPI"
+    },
+    {
+        "BriefDescription": "Matches on Receive path of a UPI Port : Snoop, Match Opcode",
+        "EventCode": "0x05",
+        "EventName": "UNC_UPI_RxL_BASIC_HDR_MATCH.SNP_OPC",
+        "PerPkg": "1",
+        "UMask": "0x109",
+        "Unit": "UPI"
+    },
+    {
+        "BriefDescription": "Matches on Receive path of a UPI Port : Writeback",
+        "EventCode": "0x05",
+        "EventName": "UNC_UPI_RxL_BASIC_HDR_MATCH.WB",
+        "PerPkg": "1",
+        "UMask": "0xd",
+        "Unit": "UPI"
+    },
+    {
+        "BriefDescription": "Matches on Receive path of a UPI Port : Writeback, Match Opcode",
+        "EventCode": "0x05",
+        "EventName": "UNC_UPI_RxL_BASIC_HDR_MATCH.WB_OPC",
+        "PerPkg": "1",
+        "UMask": "0x10d",
+        "Unit": "UPI"
+    },
+    {
+        "BriefDescription": "Valid Flits Received : All Data : Shows legal flit time (hides impact of L0p and L0c).",
+        "EventCode": "0x03",
+        "EventName": "UNC_UPI_RxL_FLITS.ALL_DATA",
+        "PerPkg": "1",
+        "UMask": "0xf",
+        "Unit": "UPI"
+    },
+    {
+        "BriefDescription": "Null FLITs received from any slot",
+        "EventCode": "0x03",
+        "EventName": "UNC_UPI_RxL_FLITS.ALL_NULL",
+        "PerPkg": "1",
+        "PublicDescription": "Valid Flits Received : Null FLITs received from any slot",
+        "UMask": "0x27",
+        "Unit": "UPI"
+    },
+    {
+        "BriefDescription": "Valid Flits Received : Data : Shows legal flit time (hides impact of L0p and L0c). : Count Data Flits (which consume all slots), but how much to count is based on Slot0-2 mask, so count can be 0-3 depending on which slots are enabled for counting..",
+        "EventCode": "0x03",
+        "EventName": "UNC_UPI_RxL_FLITS.DATA",
+        "PerPkg": "1",
+        "UMask": "0x8",
+        "Unit": "UPI"
+    },
+    {
+        "BriefDescription": "Valid Flits Received : Idle : Shows legal flit time (hides impact of L0p and L0c).",
+        "EventCode": "0x03",
+        "EventName": "UNC_UPI_RxL_FLITS.IDLE",
+        "PerPkg": "1",
+        "UMask": "0x47",
+        "Unit": "UPI"
+    },
+    {
+        "BriefDescription": "Valid Flits Received : LLCRD Not Empty : Shows legal flit time (hides impact of L0p and L0c). : Enables counting of LLCRD (with non-zero payload). This only applies to slot 2 since LLCRD is only allowed in slot 2",
+        "EventCode": "0x03",
+        "EventName": "UNC_UPI_RxL_FLITS.LLCRD",
+        "PerPkg": "1",
+        "UMask": "0x10",
+        "Unit": "UPI"
+    },
+    {
+        "BriefDescription": "Valid Flits Received : LLCTRL : Shows legal flit time (hides impact of L0p and L0c). : Equivalent to an idle packet.  Enables counting of slot 0 LLCTRL messages.",
+        "EventCode": "0x03",
+        "EventName": "UNC_UPI_RxL_FLITS.LLCTRL",
+        "PerPkg": "1",
+        "UMask": "0x40",
+        "Unit": "UPI"
+    },
+    {
+        "BriefDescription": "Valid Flits Received : All Non Data : Shows legal flit time (hides impact of L0p and L0c).",
+        "EventCode": "0x03",
+        "EventName": "UNC_UPI_RxL_FLITS.NON_DATA",
+        "PerPkg": "1",
+        "UMask": "0x97",
+        "Unit": "UPI"
+    },
+    {
+        "BriefDescription": "Valid Flits Received : Slot NULL or LLCRD Empty : Shows legal flit time (hides impact of L0p and L0c). : LLCRD with all zeros is treated as NULL. Slot 1 is not treated as NULL if slot 0 is a dual slot. This can apply to slot 0,1, or 2.",
+        "EventCode": "0x03",
+        "EventName": "UNC_UPI_RxL_FLITS.NULL",
+        "PerPkg": "1",
+        "UMask": "0x20",
+        "Unit": "UPI"
+    },
+    {
+        "BriefDescription": "Valid Flits Received : Protocol Header : Shows legal flit time (hides impact of L0p and L0c). : Enables count of protocol headers in slot 0,1,2 (depending on slot uMask bits)",
+        "EventCode": "0x03",
+        "EventName": "UNC_UPI_RxL_FLITS.PROTHDR",
+        "PerPkg": "1",
+        "UMask": "0x80",
+        "Unit": "UPI"
+    },
+    {
+        "BriefDescription": "Valid Flits Received : Slot 0 : Shows legal flit time (hides impact of L0p and L0c). : Count Slot 0 - Other mask bits determine types of headers to count.",
+        "EventCode": "0x03",
+        "EventName": "UNC_UPI_RxL_FLITS.SLOT0",
+        "PerPkg": "1",
+        "UMask": "0x1",
+        "Unit": "UPI"
+    },
+    {
+        "BriefDescription": "Valid Flits Received : Slot 1 : Shows legal flit time (hides impact of L0p and L0c). : Count Slot 1 - Other mask bits determine types of headers to count.",
+        "EventCode": "0x03",
+        "EventName": "UNC_UPI_RxL_FLITS.SLOT1",
+        "PerPkg": "1",
+        "UMask": "0x2",
+        "Unit": "UPI"
+    },
+    {
+        "BriefDescription": "Valid Flits Received : Slot 2 : Shows legal flit time (hides impact of L0p and L0c). : Count Slot 2 - Other mask bits determine types of headers to count.",
+        "EventCode": "0x03",
+        "EventName": "UNC_UPI_RxL_FLITS.SLOT2",
+        "PerPkg": "1",
+        "UMask": "0x4",
+        "Unit": "UPI"
+    },
+    {
+        "BriefDescription": "RxQ Flit Buffer Allocations : Slot 0 : Number of allocations into the UPI Rx Flit Buffer.  Generally, when data is transmitted across UPI, it will bypass the RxQ and pass directly to the ring interface.  If things back up getting transmitted onto the ring, however, it may need to allocate into this buffer, thus increasing the latency.  This event can be used in conjunction with the Flit Buffer Occupancy event in order to calculate the average flit buffer lifetime.",
+        "EventCode": "0x30",
+        "EventName": "UNC_UPI_RxL_INSERTS.SLOT0",
+        "PerPkg": "1",
+        "UMask": "0x1",
+        "Unit": "UPI"
+    },
+    {
+        "BriefDescription": "RxQ Flit Buffer Allocations : Slot 1 : Number of allocations into the UPI Rx Flit Buffer.  Generally, when data is transmitted across UPI, it will bypass the RxQ and pass directly to the ring interface.  If things back up getting transmitted onto the ring, however, it may need to allocate into this buffer, thus increasing the latency.  This event can be used in conjunction with the Flit Buffer Occupancy event in order to calculate the average flit buffer lifetime.",
+        "EventCode": "0x30",
+        "EventName": "UNC_UPI_RxL_INSERTS.SLOT1",
+        "PerPkg": "1",
+        "UMask": "0x2",
+        "Unit": "UPI"
+    },
+    {
+        "BriefDescription": "RxQ Flit Buffer Allocations : Slot 2 : Number of allocations into the UPI Rx Flit Buffer.  Generally, when data is transmitted across UPI, it will bypass the RxQ and pass directly to the ring interface.  If things back up getting transmitted onto the ring, however, it may need to allocate into this buffer, thus increasing the latency.  This event can be used in conjunction with the Flit Buffer Occupancy event in order to calculate the average flit buffer lifetime.",
+        "EventCode": "0x30",
+        "EventName": "UNC_UPI_RxL_INSERTS.SLOT2",
+        "PerPkg": "1",
+        "UMask": "0x4",
+        "Unit": "UPI"
+    },
+    {
+        "BriefDescription": "RxQ Occupancy - All Packets : Slot 0",
+        "EventCode": "0x32",
+        "EventName": "UNC_UPI_RxL_OCCUPANCY.SLOT0",
+        "PerPkg": "1",
+        "UMask": "0x1",
+        "Unit": "UPI"
+    },
+    {
+        "BriefDescription": "RxQ Occupancy - All Packets : Slot 1",
+        "EventCode": "0x32",
+        "EventName": "UNC_UPI_RxL_OCCUPANCY.SLOT1",
+        "PerPkg": "1",
+        "UMask": "0x2",
+        "Unit": "UPI"
+    },
+    {
+        "BriefDescription": "RxQ Occupancy - All Packets : Slot 2",
+        "EventCode": "0x32",
+        "EventName": "UNC_UPI_RxL_OCCUPANCY.SLOT2",
+        "PerPkg": "1",
+        "UMask": "0x4",
+        "Unit": "UPI"
+    },
+    {
+        "BriefDescription": "Matches on Transmit path of a UPI Port : Non-Coherent Bypass",
+        "EventCode": "0x04",
+        "EventName": "UNC_UPI_TxL_BASIC_HDR_MATCH.NCB",
+        "PerPkg": "1",
+        "UMask": "0xe",
+        "Unit": "UPI"
+    },
+    {
+        "BriefDescription": "Matches on Transmit path of a UPI Port : Non-Coherent Bypass, Match Opcode",
+        "EventCode": "0x04",
+        "EventName": "UNC_UPI_TxL_BASIC_HDR_MATCH.NCB_OPC",
+        "PerPkg": "1",
+        "UMask": "0x10e",
+        "Unit": "UPI"
+    },
+    {
+        "BriefDescription": "Matches on Transmit path of a UPI Port : Non-Coherent Standard",
+        "EventCode": "0x04",
+        "EventName": "UNC_UPI_TxL_BASIC_HDR_MATCH.NCS",
+        "PerPkg": "1",
+        "UMask": "0xf",
+        "Unit": "UPI"
+    },
+    {
+        "BriefDescription": "Matches on Transmit path of a UPI Port : Non-Coherent Standard, Match Opcode",
+        "EventCode": "0x04",
+        "EventName": "UNC_UPI_TxL_BASIC_HDR_MATCH.NCS_OPC",
+        "PerPkg": "1",
+        "UMask": "0x10f",
+        "Unit": "UPI"
+    },
+    {
+        "BriefDescription": "Matches on Transmit path of a UPI Port : Request",
+        "EventCode": "0x04",
+        "EventName": "UNC_UPI_TxL_BASIC_HDR_MATCH.REQ",
+        "PerPkg": "1",
+        "UMask": "0x8",
+        "Unit": "UPI"
+    },
+    {
+        "BriefDescription": "Matches on Transmit path of a UPI Port : Request, Match Opcode",
+        "EventCode": "0x04",
+        "EventName": "UNC_UPI_TxL_BASIC_HDR_MATCH.REQ_OPC",
+        "PerPkg": "1",
+        "UMask": "0x108",
+        "Unit": "UPI"
+    },
+    {
+        "BriefDescription": "Matches on Transmit path of a UPI Port : Response - Conflict",
+        "EventCode": "0x04",
+        "EventName": "UNC_UPI_TxL_BASIC_HDR_MATCH.RSPCNFLT",
+        "PerPkg": "1",
+        "UMask": "0x1aa",
+        "Unit": "UPI"
+    },
+    {
+        "BriefDescription": "Matches on Transmit path of a UPI Port : Response - Invalid",
+        "EventCode": "0x04",
+        "EventName": "UNC_UPI_TxL_BASIC_HDR_MATCH.RSPI",
+        "PerPkg": "1",
+        "UMask": "0x12a",
+        "Unit": "UPI"
+    },
+    {
+        "BriefDescription": "Matches on Transmit path of a UPI Port : Response - Data",
+        "EventCode": "0x04",
+        "EventName": "UNC_UPI_TxL_BASIC_HDR_MATCH.RSP_DATA",
+        "PerPkg": "1",
+        "UMask": "0xc",
+        "Unit": "UPI"
+    },
+    {
+        "BriefDescription": "Matches on Transmit path of a UPI Port : Response - Data, Match Opcode",
+        "EventCode": "0x04",
+        "EventName": "UNC_UPI_TxL_BASIC_HDR_MATCH.RSP_DATA_OPC",
+        "PerPkg": "1",
+        "UMask": "0x10c",
+        "Unit": "UPI"
+    },
+    {
+        "BriefDescription": "Matches on Transmit path of a UPI Port : Response - No Data",
+        "EventCode": "0x04",
+        "EventName": "UNC_UPI_TxL_BASIC_HDR_MATCH.RSP_NODATA",
+        "PerPkg": "1",
+        "UMask": "0xa",
+        "Unit": "UPI"
+    },
+    {
+        "BriefDescription": "Matches on Transmit path of a UPI Port : Response - No Data, Match Opcode",
+        "EventCode": "0x04",
+        "EventName": "UNC_UPI_TxL_BASIC_HDR_MATCH.RSP_NODATA_OPC",
+        "PerPkg": "1",
+        "UMask": "0x10a",
+        "Unit": "UPI"
+    },
+    {
+        "BriefDescription": "Matches on Transmit path of a UPI Port : Snoop",
+        "EventCode": "0x04",
+        "EventName": "UNC_UPI_TxL_BASIC_HDR_MATCH.SNP",
+        "PerPkg": "1",
+        "UMask": "0x9",
+        "Unit": "UPI"
+    },
+    {
+        "BriefDescription": "Matches on Transmit path of a UPI Port : Snoop, Match Opcode",
+        "EventCode": "0x04",
+        "EventName": "UNC_UPI_TxL_BASIC_HDR_MATCH.SNP_OPC",
+        "PerPkg": "1",
+        "UMask": "0x109",
+        "Unit": "UPI"
+    },
+    {
+        "BriefDescription": "Matches on Transmit path of a UPI Port : Writeback",
+        "EventCode": "0x04",
+        "EventName": "UNC_UPI_TxL_BASIC_HDR_MATCH.WB",
+        "PerPkg": "1",
+        "UMask": "0xd",
+        "Unit": "UPI"
+    },
+    {
+        "BriefDescription": "Matches on Transmit path of a UPI Port : Writeback, Match Opcode",
+        "EventCode": "0x04",
+        "EventName": "UNC_UPI_TxL_BASIC_HDR_MATCH.WB_OPC",
+        "PerPkg": "1",
+        "UMask": "0x10d",
+        "Unit": "UPI"
+    },
+    {
+        "BriefDescription": "Valid Flits Sent : All Data : Counts number of data flits across this UPI link.",
+        "EventCode": "0x02",
+        "EventName": "UNC_UPI_TxL_FLITS.ALL_DATA",
+        "PerPkg": "1",
+        "UMask": "0xf",
+        "Unit": "UPI"
+    },
+    {
+        "BriefDescription": "All Null Flits",
+        "EventCode": "0x02",
+        "EventName": "UNC_UPI_TxL_FLITS.ALL_NULL",
+        "PerPkg": "1",
+        "PublicDescription": "Valid Flits Sent : Idle",
+        "UMask": "0x27",
+        "Unit": "UPI"
+    },
+    {
+        "BriefDescription": "Valid Flits Sent : Data : Shows legal flit time (hides impact of L0p and L0c). : Count Data Flits (which consume all slots), but how much to count is based on Slot0-2 mask, so count can be 0-3 depending on which slots are enabled for counting..",
+        "EventCode": "0x02",
+        "EventName": "UNC_UPI_TxL_FLITS.DATA",
+        "PerPkg": "1",
+        "UMask": "0x8",
+        "Unit": "UPI"
+    },
+    {
+        "BriefDescription": "Valid Flits Sent : Idle : Shows legal flit time (hides impact of L0p and L0c).",
+        "EventCode": "0x02",
+        "EventName": "UNC_UPI_TxL_FLITS.IDLE",
+        "PerPkg": "1",
+        "UMask": "0x47",
+        "Unit": "UPI"
+    },
+    {
+        "BriefDescription": "Valid Flits Sent : LLCRD Not Empty : Shows legal flit time (hides impact of L0p and L0c). : Enables counting of LLCRD (with non-zero payload). This only applies to slot 2 since LLCRD is only allowed in slot 2",
+        "EventCode": "0x02",
+        "EventName": "UNC_UPI_TxL_FLITS.LLCRD",
+        "PerPkg": "1",
+        "UMask": "0x10",
+        "Unit": "UPI"
+    },
+    {
+        "BriefDescription": "Valid Flits Sent : LLCTRL : Shows legal flit time (hides impact of L0p and L0c). : Equivalent to an idle packet.  Enables counting of slot 0 LLCTRL messages.",
+        "EventCode": "0x02",
+        "EventName": "UNC_UPI_TxL_FLITS.LLCTRL",
+        "PerPkg": "1",
+        "UMask": "0x40",
+        "Unit": "UPI"
+    },
+    {
+        "BriefDescription": "Valid Flits Sent : All Non Data : Shows legal flit time (hides impact of L0p and L0c).",
+        "EventCode": "0x02",
+        "EventName": "UNC_UPI_TxL_FLITS.NON_DATA",
+        "PerPkg": "1",
+        "PublicDescription": "Valid Flits Sent : Null FLITs transmitted to any slot",
+        "UMask": "0x97",
+        "Unit": "UPI"
+    },
+    {
+        "BriefDescription": "Valid Flits Sent : Slot NULL or LLCRD Empty : Shows legal flit time (hides impact of L0p and L0c). : LLCRD with all zeros is treated as NULL. Slot 1 is not treated as NULL if slot 0 is a dual slot. This can apply to slot 0,1, or 2.",
+        "EventCode": "0x02",
+        "EventName": "UNC_UPI_TxL_FLITS.NULL",
+        "PerPkg": "1",
+        "UMask": "0x20",
+        "Unit": "UPI"
+    },
+    {
+        "BriefDescription": "Valid Flits Sent : Protocol Header : Shows legal flit time (hides impact of L0p and L0c). : Enables count of protocol headers in slot 0,1,2 (depending on slot uMask bits)",
+        "EventCode": "0x02",
+        "EventName": "UNC_UPI_TxL_FLITS.PROTHDR",
+        "PerPkg": "1",
+        "UMask": "0x80",
+        "Unit": "UPI"
+    },
+    {
+        "BriefDescription": "Valid Flits Sent : Slot 0 : Shows legal flit time (hides impact of L0p and L0c). : Count Slot 0 - Other mask bits determine types of headers to count.",
+        "EventCode": "0x02",
+        "EventName": "UNC_UPI_TxL_FLITS.SLOT0",
+        "PerPkg": "1",
+        "UMask": "0x1",
+        "Unit": "UPI"
+    },
+    {
+        "BriefDescription": "Valid Flits Sent : Slot 1 : Shows legal flit time (hides impact of L0p and L0c). : Count Slot 1 - Other mask bits determine types of headers to count.",
+        "EventCode": "0x02",
+        "EventName": "UNC_UPI_TxL_FLITS.SLOT1",
+        "PerPkg": "1",
+        "UMask": "0x2",
+        "Unit": "UPI"
+    },
+    {
+        "BriefDescription": "Valid Flits Sent : Slot 2 : Shows legal flit time (hides impact of L0p and L0c). : Count Slot 2 - Other mask bits determine types of headers to count.",
+        "EventCode": "0x02",
+        "EventName": "UNC_UPI_TxL_FLITS.SLOT2",
+        "PerPkg": "1",
+        "UMask": "0x4",
+        "Unit": "UPI"
+    },
+    {
+        "BriefDescription": "Tx Flit Buffer Allocations : Number of allocations into the UPI Tx Flit Buffer.  Generally, when data is transmitted across UPI, it will bypass the TxQ and pass directly to the link.  However, the TxQ will be used with L0p and when LLR occurs, increasing latency to transfer out to the link.  This event can be used in conjunction with the Flit Buffer Occupancy event in order to calculate the average flit buffer lifetime.",
+        "EventCode": "0x40",
+        "EventName": "UNC_UPI_TxL_INSERTS",
+        "PerPkg": "1",
+        "Unit": "UPI"
+    },
+    {
+        "BriefDescription": "Tx Flit Buffer Occupancy : Accumulates the number of flits in the TxQ.  Generally, when data is transmitted across UPI, it will bypass the TxQ and pass directly to the link.  However, the TxQ will be used with L0p and when LLR occurs, increasing latency to transfer out to the link. This can be used with the cycles not empty event to track average occupancy, or the allocations event to track average lifetime in the TxQ.",
+        "EventCode": "0x42",
+        "EventName": "UNC_UPI_TxL_OCCUPANCY",
+        "PerPkg": "1",
+        "Unit": "UPI"
+    }
+]
diff --git a/tools/perf/pmu-events/arch/x86/sierraforest/uncore-io.json b/tools/perf/pmu-events/arch/x86/sierraforest/uncore-io.json
new file mode 100644 (file)
index 0000000..9495cb0
--- /dev/null
@@ -0,0 +1,1634 @@
+[
+    {
+        "BriefDescription": "IIO Clockticks",
+        "EventCode": "0x01",
+        "EventName": "UNC_IIO_CLOCKTICKS",
+        "PerPkg": "1",
+        "PortMask": "0x000",
+        "Unit": "IIO"
+    },
+    {
+        "BriefDescription": "PCIE Completion Buffer Inserts.  Counts once per 64 byte read issued from this PCIE device.",
+        "EventCode": "0xC2",
+        "EventName": "UNC_IIO_COMP_BUF_INSERTS.CMPD.ALL_PARTS",
+        "FCMask": "0x07",
+        "PerPkg": "1",
+        "PortMask": "0x0FF",
+        "UMask": "0x70ff004",
+        "Unit": "IIO"
+    },
+    {
+        "BriefDescription": "PCIE Completion Buffer Inserts.  Counts once per 64 byte read issued from this PCIE device.",
+        "EventCode": "0xC2",
+        "EventName": "UNC_IIO_COMP_BUF_INSERTS.CMPD.PART0",
+        "FCMask": "0x07",
+        "PerPkg": "1",
+        "PortMask": "0x001",
+        "UMask": "0x7001004",
+        "Unit": "IIO"
+    },
+    {
+        "BriefDescription": "PCIE Completion Buffer Inserts.  Counts once per 64 byte read issued from this PCIE device.",
+        "EventCode": "0xC2",
+        "EventName": "UNC_IIO_COMP_BUF_INSERTS.CMPD.PART1",
+        "FCMask": "0x07",
+        "PerPkg": "1",
+        "PortMask": "0x002",
+        "UMask": "0x7002004",
+        "Unit": "IIO"
+    },
+    {
+        "BriefDescription": "PCIE Completion Buffer Inserts.  Counts once per 64 byte read issued from this PCIE device.",
+        "EventCode": "0xC2",
+        "EventName": "UNC_IIO_COMP_BUF_INSERTS.CMPD.PART2",
+        "FCMask": "0x07",
+        "PerPkg": "1",
+        "PortMask": "0x004",
+        "UMask": "0x7004004",
+        "Unit": "IIO"
+    },
+    {
+        "BriefDescription": "PCIE Completion Buffer Inserts.  Counts once per 64 byte read issued from this PCIE device.",
+        "EventCode": "0xC2",
+        "EventName": "UNC_IIO_COMP_BUF_INSERTS.CMPD.PART3",
+        "FCMask": "0x07",
+        "PerPkg": "1",
+        "PortMask": "0x008",
+        "UMask": "0x7008004",
+        "Unit": "IIO"
+    },
+    {
+        "BriefDescription": "PCIE Completion Buffer Inserts.  Counts once per 64 byte read issued from this PCIE device.",
+        "EventCode": "0xC2",
+        "EventName": "UNC_IIO_COMP_BUF_INSERTS.CMPD.PART4",
+        "FCMask": "0x07",
+        "PerPkg": "1",
+        "PortMask": "0x010",
+        "UMask": "0x7010004",
+        "Unit": "IIO"
+    },
+    {
+        "BriefDescription": "PCIE Completion Buffer Inserts.  Counts once per 64 byte read issued from this PCIE device.",
+        "EventCode": "0xC2",
+        "EventName": "UNC_IIO_COMP_BUF_INSERTS.CMPD.PART5",
+        "FCMask": "0x07",
+        "PerPkg": "1",
+        "PortMask": "0x020",
+        "UMask": "0x7020004",
+        "Unit": "IIO"
+    },
+    {
+        "BriefDescription": "PCIE Completion Buffer Inserts.  Counts once per 64 byte read issued from this PCIE device.",
+        "EventCode": "0xC2",
+        "EventName": "UNC_IIO_COMP_BUF_INSERTS.CMPD.PART6",
+        "FCMask": "0x07",
+        "PerPkg": "1",
+        "PortMask": "0x040",
+        "UMask": "0x7040004",
+        "Unit": "IIO"
+    },
+    {
+        "BriefDescription": "PCIE Completion Buffer Inserts.  Counts once per 64 byte read issued from this PCIE device.",
+        "EventCode": "0xC2",
+        "EventName": "UNC_IIO_COMP_BUF_INSERTS.CMPD.PART7",
+        "FCMask": "0x07",
+        "PerPkg": "1",
+        "PortMask": "0x080",
+        "UMask": "0x7080004",
+        "Unit": "IIO"
+    },
+    {
+        "BriefDescription": "Count of allocations in the completion buffer",
+        "EventCode": "0xD5",
+        "EventName": "UNC_IIO_COMP_BUF_OCCUPANCY.CMPD.ALL_PARTS",
+        "FCMask": "0x07",
+        "PerPkg": "1",
+        "PortMask": "0x0FF",
+        "UMask": "0x70ff0ff",
+        "Unit": "IIO"
+    },
+    {
+        "BriefDescription": "Count of allocations in the completion buffer",
+        "EventCode": "0xD5",
+        "EventName": "UNC_IIO_COMP_BUF_OCCUPANCY.CMPD.PART0",
+        "FCMask": "0x07",
+        "PerPkg": "1",
+        "PortMask": "0x001",
+        "UMask": "0x7001001",
+        "Unit": "IIO"
+    },
+    {
+        "BriefDescription": "Count of allocations in the completion buffer",
+        "EventCode": "0xD5",
+        "EventName": "UNC_IIO_COMP_BUF_OCCUPANCY.CMPD.PART1",
+        "FCMask": "0x07",
+        "PerPkg": "1",
+        "PortMask": "0x002",
+        "UMask": "0x7002002",
+        "Unit": "IIO"
+    },
+    {
+        "BriefDescription": "Count of allocations in the completion buffer",
+        "EventCode": "0xD5",
+        "EventName": "UNC_IIO_COMP_BUF_OCCUPANCY.CMPD.PART2",
+        "FCMask": "0x07",
+        "PerPkg": "1",
+        "PortMask": "0x004",
+        "UMask": "0x7004004",
+        "Unit": "IIO"
+    },
+    {
+        "BriefDescription": "Count of allocations in the completion buffer",
+        "EventCode": "0xD5",
+        "EventName": "UNC_IIO_COMP_BUF_OCCUPANCY.CMPD.PART3",
+        "FCMask": "0x07",
+        "PerPkg": "1",
+        "PortMask": "0x008",
+        "UMask": "0x7008008",
+        "Unit": "IIO"
+    },
+    {
+        "BriefDescription": "Count of allocations in the completion buffer",
+        "EventCode": "0xD5",
+        "EventName": "UNC_IIO_COMP_BUF_OCCUPANCY.CMPD.PART4",
+        "FCMask": "0x07",
+        "PerPkg": "1",
+        "PortMask": "0x010",
+        "UMask": "0x7010010",
+        "Unit": "IIO"
+    },
+    {
+        "BriefDescription": "Count of allocations in the completion buffer",
+        "EventCode": "0xD5",
+        "EventName": "UNC_IIO_COMP_BUF_OCCUPANCY.CMPD.PART5",
+        "FCMask": "0x07",
+        "PerPkg": "1",
+        "PortMask": "0x020",
+        "UMask": "0x7020020",
+        "Unit": "IIO"
+    },
+    {
+        "BriefDescription": "Count of allocations in the completion buffer",
+        "EventCode": "0xD5",
+        "EventName": "UNC_IIO_COMP_BUF_OCCUPANCY.CMPD.PART6",
+        "FCMask": "0x07",
+        "PerPkg": "1",
+        "PortMask": "0x040",
+        "UMask": "0x7040040",
+        "Unit": "IIO"
+    },
+    {
+        "BriefDescription": "Count of allocations in the completion buffer",
+        "EventCode": "0xD5",
+        "EventName": "UNC_IIO_COMP_BUF_OCCUPANCY.CMPD.PART7",
+        "FCMask": "0x07",
+        "PerPkg": "1",
+        "PortMask": "0x080",
+        "UMask": "0x7080080",
+        "Unit": "IIO"
+    },
+    {
+        "BriefDescription": "Data requested by the CPU : Core reporting completion of Card read from Core DRAM",
+        "EventCode": "0xC0",
+        "EventName": "UNC_IIO_DATA_REQ_BY_CPU.MEM_READ.ALL_PARTS",
+        "FCMask": "0x07",
+        "PerPkg": "1",
+        "PortMask": "0x0FF",
+        "UMask": "0x70ff004",
+        "Unit": "IIO"
+    },
+    {
+        "BriefDescription": "Data requested by the CPU : Core reporting completion of Card read from Core DRAM",
+        "EventCode": "0xC0",
+        "EventName": "UNC_IIO_DATA_REQ_BY_CPU.MEM_READ.PART0",
+        "FCMask": "0x07",
+        "PerPkg": "1",
+        "PortMask": "0x001",
+        "UMask": "0x7001004",
+        "Unit": "IIO"
+    },
+    {
+        "BriefDescription": "Data requested by the CPU : Core reporting completion of Card read from Core DRAM",
+        "EventCode": "0xC0",
+        "EventName": "UNC_IIO_DATA_REQ_BY_CPU.MEM_READ.PART1",
+        "FCMask": "0x07",
+        "PerPkg": "1",
+        "PortMask": "0x002",
+        "UMask": "0x7002004",
+        "Unit": "IIO"
+    },
+    {
+        "BriefDescription": "Data requested by the CPU : Core reporting completion of Card read from Core DRAM",
+        "EventCode": "0xC0",
+        "EventName": "UNC_IIO_DATA_REQ_BY_CPU.MEM_READ.PART2",
+        "FCMask": "0x07",
+        "PerPkg": "1",
+        "PortMask": "0x004",
+        "UMask": "0x7004004",
+        "Unit": "IIO"
+    },
+    {
+        "BriefDescription": "Data requested by the CPU : Core reporting completion of Card read from Core DRAM",
+        "EventCode": "0xC0",
+        "EventName": "UNC_IIO_DATA_REQ_BY_CPU.MEM_READ.PART3",
+        "FCMask": "0x07",
+        "PerPkg": "1",
+        "PortMask": "0x008",
+        "UMask": "0x7008004",
+        "Unit": "IIO"
+    },
+    {
+        "BriefDescription": "Data requested by the CPU : Core reporting completion of Card read from Core DRAM",
+        "EventCode": "0xC0",
+        "EventName": "UNC_IIO_DATA_REQ_BY_CPU.MEM_READ.PART4",
+        "FCMask": "0x07",
+        "PerPkg": "1",
+        "PortMask": "0x010",
+        "UMask": "0x7010004",
+        "Unit": "IIO"
+    },
+    {
+        "BriefDescription": "Data requested by the CPU : Core reporting completion of Card read from Core DRAM",
+        "EventCode": "0xC0",
+        "EventName": "UNC_IIO_DATA_REQ_BY_CPU.MEM_READ.PART5",
+        "FCMask": "0x07",
+        "PerPkg": "1",
+        "PortMask": "0x020",
+        "UMask": "0x7020004",
+        "Unit": "IIO"
+    },
+    {
+        "BriefDescription": "Data requested by the CPU : Core reporting completion of Card read from Core DRAM",
+        "EventCode": "0xC0",
+        "EventName": "UNC_IIO_DATA_REQ_BY_CPU.MEM_READ.PART6",
+        "FCMask": "0x07",
+        "PerPkg": "1",
+        "PortMask": "0x040",
+        "UMask": "0x7040004",
+        "Unit": "IIO"
+    },
+    {
+        "BriefDescription": "Data requested by the CPU : Core reporting completion of Card read from Core DRAM",
+        "EventCode": "0xC0",
+        "EventName": "UNC_IIO_DATA_REQ_BY_CPU.MEM_READ.PART7",
+        "FCMask": "0x07",
+        "PerPkg": "1",
+        "PortMask": "0x080",
+        "UMask": "0x7080004",
+        "Unit": "IIO"
+    },
+    {
+        "BriefDescription": "Data requested by the CPU : Core writing to Cards MMIO space",
+        "EventCode": "0xC0",
+        "EventName": "UNC_IIO_DATA_REQ_BY_CPU.MEM_WRITE.ALL_PARTS",
+        "FCMask": "0x07",
+        "PerPkg": "1",
+        "PortMask": "0x0FF",
+        "UMask": "0x70ff001",
+        "Unit": "IIO"
+    },
+    {
+        "BriefDescription": "Data requested by the CPU : Core writing to Cards MMIO space",
+        "EventCode": "0xC0",
+        "EventName": "UNC_IIO_DATA_REQ_BY_CPU.MEM_WRITE.PART0",
+        "FCMask": "0x07",
+        "PerPkg": "1",
+        "PortMask": "0x001",
+        "UMask": "0x7001001",
+        "Unit": "IIO"
+    },
+    {
+        "BriefDescription": "Data requested by the CPU : Core writing to Cards MMIO space",
+        "EventCode": "0xC0",
+        "EventName": "UNC_IIO_DATA_REQ_BY_CPU.MEM_WRITE.PART1",
+        "FCMask": "0x07",
+        "PerPkg": "1",
+        "PortMask": "0x002",
+        "UMask": "0x7002001",
+        "Unit": "IIO"
+    },
+    {
+        "BriefDescription": "Data requested by the CPU : Core writing to Cards MMIO space",
+        "EventCode": "0xC0",
+        "EventName": "UNC_IIO_DATA_REQ_BY_CPU.MEM_WRITE.PART2",
+        "FCMask": "0x07",
+        "PerPkg": "1",
+        "PortMask": "0x004",
+        "UMask": "0x7004001",
+        "Unit": "IIO"
+    },
+    {
+        "BriefDescription": "Data requested by the CPU : Core writing to Cards MMIO space",
+        "EventCode": "0xC0",
+        "EventName": "UNC_IIO_DATA_REQ_BY_CPU.MEM_WRITE.PART3",
+        "FCMask": "0x07",
+        "PerPkg": "1",
+        "PortMask": "0x008",
+        "UMask": "0x7008001",
+        "Unit": "IIO"
+    },
+    {
+        "BriefDescription": "Data requested by the CPU : Core writing to Cards MMIO space",
+        "EventCode": "0xC0",
+        "EventName": "UNC_IIO_DATA_REQ_BY_CPU.MEM_WRITE.PART4",
+        "FCMask": "0x07",
+        "PerPkg": "1",
+        "PortMask": "0x010",
+        "UMask": "0x7010001",
+        "Unit": "IIO"
+    },
+    {
+        "BriefDescription": "Data requested by the CPU : Core writing to Cards MMIO space",
+        "EventCode": "0xC0",
+        "EventName": "UNC_IIO_DATA_REQ_BY_CPU.MEM_WRITE.PART5",
+        "FCMask": "0x07",
+        "PerPkg": "1",
+        "PortMask": "0x020",
+        "UMask": "0x7020001",
+        "Unit": "IIO"
+    },
+    {
+        "BriefDescription": "Data requested by the CPU : Core writing to Cards MMIO space",
+        "EventCode": "0xC0",
+        "EventName": "UNC_IIO_DATA_REQ_BY_CPU.MEM_WRITE.PART6",
+        "FCMask": "0x07",
+        "PerPkg": "1",
+        "PortMask": "0x040",
+        "UMask": "0x7040001",
+        "Unit": "IIO"
+    },
+    {
+        "BriefDescription": "Data requested by the CPU : Core writing to Cards MMIO space",
+        "EventCode": "0xC0",
+        "EventName": "UNC_IIO_DATA_REQ_BY_CPU.MEM_WRITE.PART7",
+        "FCMask": "0x07",
+        "PerPkg": "1",
+        "PortMask": "0x080",
+        "UMask": "0x7080001",
+        "Unit": "IIO"
+    },
+    {
+        "BriefDescription": "Data requested by the CPU : Another card (different IIO stack) reading from this card.",
+        "EventCode": "0xC0",
+        "EventName": "UNC_IIO_DATA_REQ_BY_CPU.PEER_READ.ALL_PARTS",
+        "FCMask": "0x07",
+        "PerPkg": "1",
+        "PortMask": "0x0FF",
+        "UMask": "0x70ff008",
+        "Unit": "IIO"
+    },
+    {
+        "BriefDescription": "Data requested by the CPU : Another card (different IIO stack) writing to this card.",
+        "EventCode": "0xC0",
+        "EventName": "UNC_IIO_DATA_REQ_BY_CPU.PEER_WRITE.ALL_PARTS",
+        "FCMask": "0x07",
+        "PerPkg": "1",
+        "PortMask": "0x0FF",
+        "UMask": "0x70ff002",
+        "Unit": "IIO"
+    },
+    {
+        "BriefDescription": "Counts once for every 4 bytes read from this card to memory.  This event does include reads to IO.",
+        "EventCode": "0x83",
+        "EventName": "UNC_IIO_DATA_REQ_OF_CPU.MEM_READ.ALL_PARTS",
+        "FCMask": "0x07",
+        "PerPkg": "1",
+        "PortMask": "0x0FF",
+        "UMask": "0x70ff004",
+        "Unit": "IIO"
+    },
+    {
+        "BriefDescription": "Four byte data request of the CPU : Card reading from DRAM",
+        "EventCode": "0x83",
+        "EventName": "UNC_IIO_DATA_REQ_OF_CPU.MEM_READ.PART0",
+        "FCMask": "0x07",
+        "PerPkg": "1",
+        "PortMask": "0x001",
+        "UMask": "0x7001004",
+        "Unit": "IIO"
+    },
+    {
+        "BriefDescription": "Four byte data request of the CPU : Card reading from DRAM",
+        "EventCode": "0x83",
+        "EventName": "UNC_IIO_DATA_REQ_OF_CPU.MEM_READ.PART1",
+        "FCMask": "0x07",
+        "PerPkg": "1",
+        "PortMask": "0x02",
+        "UMask": "0x7002004",
+        "Unit": "IIO"
+    },
+    {
+        "BriefDescription": "Four byte data request of the CPU : Card reading from DRAM",
+        "EventCode": "0x83",
+        "EventName": "UNC_IIO_DATA_REQ_OF_CPU.MEM_READ.PART2",
+        "FCMask": "0x07",
+        "PerPkg": "1",
+        "PortMask": "0x04",
+        "UMask": "0x7004004",
+        "Unit": "IIO"
+    },
+    {
+        "BriefDescription": "Four byte data request of the CPU : Card reading from DRAM",
+        "EventCode": "0x83",
+        "EventName": "UNC_IIO_DATA_REQ_OF_CPU.MEM_READ.PART3",
+        "FCMask": "0x07",
+        "PerPkg": "1",
+        "PortMask": "0x08",
+        "UMask": "0x7008004",
+        "Unit": "IIO"
+    },
+    {
+        "BriefDescription": "Four byte data request of the CPU : Card reading from DRAM",
+        "EventCode": "0x83",
+        "EventName": "UNC_IIO_DATA_REQ_OF_CPU.MEM_READ.PART4",
+        "FCMask": "0x07",
+        "PerPkg": "1",
+        "PortMask": "0x10",
+        "UMask": "0x7010004",
+        "Unit": "IIO"
+    },
+    {
+        "BriefDescription": "Four byte data request of the CPU : Card reading from DRAM",
+        "EventCode": "0x83",
+        "EventName": "UNC_IIO_DATA_REQ_OF_CPU.MEM_READ.PART5",
+        "FCMask": "0x07",
+        "PerPkg": "1",
+        "PortMask": "0x20",
+        "UMask": "0x7020004",
+        "Unit": "IIO"
+    },
+    {
+        "BriefDescription": "Four byte data request of the CPU : Card reading from DRAM",
+        "EventCode": "0x83",
+        "EventName": "UNC_IIO_DATA_REQ_OF_CPU.MEM_READ.PART6",
+        "FCMask": "0x07",
+        "PerPkg": "1",
+        "PortMask": "0x40",
+        "UMask": "0x7040004",
+        "Unit": "IIO"
+    },
+    {
+        "BriefDescription": "Four byte data request of the CPU : Card reading from DRAM",
+        "EventCode": "0x83",
+        "EventName": "UNC_IIO_DATA_REQ_OF_CPU.MEM_READ.PART7",
+        "FCMask": "0x07",
+        "PerPkg": "1",
+        "PortMask": "0x80",
+        "UMask": "0x7080004",
+        "Unit": "IIO"
+    },
+    {
+        "BriefDescription": "Counts once for every 4 bytes written from this card to memory.  This event does include writes to IO.",
+        "EventCode": "0x83",
+        "EventName": "UNC_IIO_DATA_REQ_OF_CPU.MEM_WRITE.ALL_PARTS",
+        "FCMask": "0x07",
+        "PerPkg": "1",
+        "PortMask": "0x0FF",
+        "UMask": "0x70ff001",
+        "Unit": "IIO"
+    },
+    {
+        "BriefDescription": "Four byte data request of the CPU : Card writing to DRAM",
+        "EventCode": "0x83",
+        "EventName": "UNC_IIO_DATA_REQ_OF_CPU.MEM_WRITE.PART0",
+        "FCMask": "0x07",
+        "PerPkg": "1",
+        "PortMask": "0x001",
+        "UMask": "0x7001001",
+        "Unit": "IIO"
+    },
+    {
+        "BriefDescription": "Four byte data request of the CPU : Card writing to DRAM",
+        "EventCode": "0x83",
+        "EventName": "UNC_IIO_DATA_REQ_OF_CPU.MEM_WRITE.PART1",
+        "FCMask": "0x07",
+        "PerPkg": "1",
+        "PortMask": "0x02",
+        "UMask": "0x7002001",
+        "Unit": "IIO"
+    },
+    {
+        "BriefDescription": "Four byte data request of the CPU : Card writing to DRAM",
+        "EventCode": "0x83",
+        "EventName": "UNC_IIO_DATA_REQ_OF_CPU.MEM_WRITE.PART2",
+        "FCMask": "0x07",
+        "PerPkg": "1",
+        "PortMask": "0x04",
+        "UMask": "0x7004001",
+        "Unit": "IIO"
+    },
+    {
+        "BriefDescription": "Four byte data request of the CPU : Card writing to DRAM",
+        "EventCode": "0x83",
+        "EventName": "UNC_IIO_DATA_REQ_OF_CPU.MEM_WRITE.PART3",
+        "FCMask": "0x07",
+        "PerPkg": "1",
+        "PortMask": "0x08",
+        "UMask": "0x7008001",
+        "Unit": "IIO"
+    },
+    {
+        "BriefDescription": "Four byte data request of the CPU : Card writing to DRAM",
+        "EventCode": "0x83",
+        "EventName": "UNC_IIO_DATA_REQ_OF_CPU.MEM_WRITE.PART4",
+        "FCMask": "0x07",
+        "PerPkg": "1",
+        "PortMask": "0x10",
+        "UMask": "0x7010001",
+        "Unit": "IIO"
+    },
+    {
+        "BriefDescription": "Four byte data request of the CPU : Card writing to DRAM",
+        "EventCode": "0x83",
+        "EventName": "UNC_IIO_DATA_REQ_OF_CPU.MEM_WRITE.PART5",
+        "FCMask": "0x07",
+        "PerPkg": "1",
+        "PortMask": "0x20",
+        "UMask": "0x7020001",
+        "Unit": "IIO"
+    },
+    {
+        "BriefDescription": "Four byte data request of the CPU : Card writing to DRAM",
+        "EventCode": "0x83",
+        "EventName": "UNC_IIO_DATA_REQ_OF_CPU.MEM_WRITE.PART6",
+        "FCMask": "0x07",
+        "PerPkg": "1",
+        "PortMask": "0x40",
+        "UMask": "0x7040001",
+        "Unit": "IIO"
+    },
+    {
+        "BriefDescription": "Four byte data request of the CPU : Card writing to DRAM",
+        "EventCode": "0x83",
+        "EventName": "UNC_IIO_DATA_REQ_OF_CPU.MEM_WRITE.PART7",
+        "FCMask": "0x07",
+        "PerPkg": "1",
+        "PortMask": "0x80",
+        "UMask": "0x7080001",
+        "Unit": "IIO"
+    },
+    {
+        "BriefDescription": "Data requested of the CPU : Card reading from another Card (same or different stack)",
+        "EventCode": "0x83",
+        "EventName": "UNC_IIO_DATA_REQ_OF_CPU.PEER_READ.PART0",
+        "FCMask": "0x07",
+        "PerPkg": "1",
+        "PortMask": "0x001",
+        "UMask": "0x7001008",
+        "Unit": "IIO"
+    },
+    {
+        "BriefDescription": "Data requested of the CPU : Card reading from another Card (same or different stack)",
+        "EventCode": "0x83",
+        "EventName": "UNC_IIO_DATA_REQ_OF_CPU.PEER_READ.PART1",
+        "FCMask": "0x07",
+        "PerPkg": "1",
+        "PortMask": "0x002",
+        "UMask": "0x7002008",
+        "Unit": "IIO"
+    },
+    {
+        "BriefDescription": "Data requested of the CPU : Card reading from another Card (same or different stack)",
+        "EventCode": "0x83",
+        "EventName": "UNC_IIO_DATA_REQ_OF_CPU.PEER_READ.PART2",
+        "FCMask": "0x07",
+        "PerPkg": "1",
+        "PortMask": "0x004",
+        "UMask": "0x7004008",
+        "Unit": "IIO"
+    },
+    {
+        "BriefDescription": "Data requested of the CPU : Card reading from another Card (same or different stack)",
+        "EventCode": "0x83",
+        "EventName": "UNC_IIO_DATA_REQ_OF_CPU.PEER_READ.PART3",
+        "FCMask": "0x07",
+        "PerPkg": "1",
+        "PortMask": "0x008",
+        "UMask": "0x7008008",
+        "Unit": "IIO"
+    },
+    {
+        "BriefDescription": "Data requested of the CPU : Card reading from another Card (same or different stack)",
+        "EventCode": "0x83",
+        "EventName": "UNC_IIO_DATA_REQ_OF_CPU.PEER_READ.PART4",
+        "FCMask": "0x07",
+        "PerPkg": "1",
+        "PortMask": "0x010",
+        "UMask": "0x7010008",
+        "Unit": "IIO"
+    },
+    {
+        "BriefDescription": "Data requested of the CPU : Card reading from another Card (same or different stack)",
+        "EventCode": "0x83",
+        "EventName": "UNC_IIO_DATA_REQ_OF_CPU.PEER_READ.PART5",
+        "FCMask": "0x07",
+        "PerPkg": "1",
+        "PortMask": "0x020",
+        "UMask": "0x7020008",
+        "Unit": "IIO"
+    },
+    {
+        "BriefDescription": "Data requested of the CPU : Card reading from another Card (same or different stack)",
+        "EventCode": "0x83",
+        "EventName": "UNC_IIO_DATA_REQ_OF_CPU.PEER_READ.PART6",
+        "FCMask": "0x07",
+        "PerPkg": "1",
+        "PortMask": "0x040",
+        "UMask": "0x7040008",
+        "Unit": "IIO"
+    },
+    {
+        "BriefDescription": "Data requested of the CPU : Card reading from another Card (same or different stack)",
+        "EventCode": "0x83",
+        "EventName": "UNC_IIO_DATA_REQ_OF_CPU.PEER_READ.PART7",
+        "FCMask": "0x07",
+        "PerPkg": "1",
+        "PortMask": "0x080",
+        "UMask": "0x7080008",
+        "Unit": "IIO"
+    },
+    {
+        "BriefDescription": "Counts once for every 4 bytes written from this card to a peer device's IO space.",
+        "EventCode": "0x83",
+        "EventName": "UNC_IIO_DATA_REQ_OF_CPU.PEER_WRITE.ALL_PARTS",
+        "FCMask": "0x07",
+        "PerPkg": "1",
+        "PortMask": "0x0FF",
+        "UMask": "0x70ff002",
+        "Unit": "IIO"
+    },
+    {
+        "BriefDescription": "Data requested of the CPU : Card writing to another Card (same or different stack)",
+        "EventCode": "0x83",
+        "EventName": "UNC_IIO_DATA_REQ_OF_CPU.PEER_WRITE.PART0",
+        "FCMask": "0x07",
+        "PerPkg": "1",
+        "PortMask": "0x001",
+        "UMask": "0x7001002",
+        "Unit": "IIO"
+    },
+    {
+        "BriefDescription": "Data requested of the CPU : Card writing to another Card (same or different stack)",
+        "EventCode": "0x83",
+        "EventName": "UNC_IIO_DATA_REQ_OF_CPU.PEER_WRITE.PART1",
+        "FCMask": "0x07",
+        "PerPkg": "1",
+        "PortMask": "0x002",
+        "UMask": "0x7002002",
+        "Unit": "IIO"
+    },
+    {
+        "BriefDescription": "Data requested of the CPU : Card writing to another Card (same or different stack)",
+        "EventCode": "0x83",
+        "EventName": "UNC_IIO_DATA_REQ_OF_CPU.PEER_WRITE.PART2",
+        "FCMask": "0x07",
+        "PerPkg": "1",
+        "PortMask": "0x004",
+        "UMask": "0x7004002",
+        "Unit": "IIO"
+    },
+    {
+        "BriefDescription": "Data requested of the CPU : Card writing to another Card (same or different stack)",
+        "EventCode": "0x83",
+        "EventName": "UNC_IIO_DATA_REQ_OF_CPU.PEER_WRITE.PART3",
+        "FCMask": "0x07",
+        "PerPkg": "1",
+        "PortMask": "0x008",
+        "UMask": "0x7008002",
+        "Unit": "IIO"
+    },
+    {
+        "BriefDescription": "Data requested of the CPU : Card writing to another Card (same or different stack)",
+        "EventCode": "0x83",
+        "EventName": "UNC_IIO_DATA_REQ_OF_CPU.PEER_WRITE.PART4",
+        "FCMask": "0x07",
+        "PerPkg": "1",
+        "PortMask": "0x010",
+        "UMask": "0x7010002",
+        "Unit": "IIO"
+    },
+    {
+        "BriefDescription": "Data requested of the CPU : Card writing to another Card (same or different stack)",
+        "EventCode": "0x83",
+        "EventName": "UNC_IIO_DATA_REQ_OF_CPU.PEER_WRITE.PART5",
+        "FCMask": "0x07",
+        "PerPkg": "1",
+        "PortMask": "0x020",
+        "UMask": "0x7020002",
+        "Unit": "IIO"
+    },
+    {
+        "BriefDescription": "Data requested of the CPU : Card writing to another Card (same or different stack)",
+        "EventCode": "0x83",
+        "EventName": "UNC_IIO_DATA_REQ_OF_CPU.PEER_WRITE.PART6",
+        "FCMask": "0x07",
+        "PerPkg": "1",
+        "PortMask": "0x040",
+        "UMask": "0x7040002",
+        "Unit": "IIO"
+    },
+    {
+        "BriefDescription": "Data requested of the CPU : Card writing to another Card (same or different stack)",
+        "EventCode": "0x83",
+        "EventName": "UNC_IIO_DATA_REQ_OF_CPU.PEER_WRITE.PART7",
+        "FCMask": "0x07",
+        "PerPkg": "1",
+        "PortMask": "0x080",
+        "UMask": "0x7080002",
+        "Unit": "IIO"
+    },
+    {
+        "BriefDescription": "IOTLB Hits to a 1G Page",
+        "EventCode": "0x40",
+        "EventName": "UNC_IIO_IOMMU0.1G_HITS",
+        "PerPkg": "1",
+        "PortMask": "0x000",
+        "UMask": "0x10",
+        "Unit": "IIO"
+    },
+    {
+        "BriefDescription": "IOTLB Hits to a 2M Page",
+        "EventCode": "0x40",
+        "EventName": "UNC_IIO_IOMMU0.2M_HITS",
+        "PerPkg": "1",
+        "PortMask": "0x000",
+        "UMask": "0x8",
+        "Unit": "IIO"
+    },
+    {
+        "BriefDescription": "IOTLB Hits to a 4K Page",
+        "EventCode": "0x40",
+        "EventName": "UNC_IIO_IOMMU0.4K_HITS",
+        "PerPkg": "1",
+        "PortMask": "0x000",
+        "UMask": "0x4",
+        "Unit": "IIO"
+    },
+    {
+        "BriefDescription": "IOTLB lookups all",
+        "EventCode": "0x40",
+        "EventName": "UNC_IIO_IOMMU0.ALL_LOOKUPS",
+        "PerPkg": "1",
+        "PortMask": "0x000",
+        "UMask": "0x2",
+        "Unit": "IIO"
+    },
+    {
+        "BriefDescription": "Context cache hits",
+        "EventCode": "0x40",
+        "EventName": "UNC_IIO_IOMMU0.CTXT_CACHE_HITS",
+        "PerPkg": "1",
+        "PortMask": "0x000",
+        "UMask": "0x80",
+        "Unit": "IIO"
+    },
+    {
+        "BriefDescription": "Context cache lookups",
+        "EventCode": "0x40",
+        "EventName": "UNC_IIO_IOMMU0.CTXT_CACHE_LOOKUPS",
+        "PerPkg": "1",
+        "PortMask": "0x000",
+        "UMask": "0x40",
+        "Unit": "IIO"
+    },
+    {
+        "BriefDescription": "IOTLB lookups first",
+        "EventCode": "0x40",
+        "EventName": "UNC_IIO_IOMMU0.FIRST_LOOKUPS",
+        "PerPkg": "1",
+        "PortMask": "0x000",
+        "UMask": "0x1",
+        "Unit": "IIO"
+    },
+    {
+        "BriefDescription": "IOTLB Fills (same as IOTLB miss)",
+        "EventCode": "0x40",
+        "EventName": "UNC_IIO_IOMMU0.MISSES",
+        "PerPkg": "1",
+        "PortMask": "0x000",
+        "UMask": "0x20",
+        "Unit": "IIO"
+    },
+    {
+        "BriefDescription": "IOMMU memory access (both low and high priority)",
+        "EventCode": "0x41",
+        "EventName": "UNC_IIO_IOMMU1.NUM_MEM_ACCESSES",
+        "PerPkg": "1",
+        "PortMask": "0x000",
+        "UMask": "0xc0",
+        "Unit": "IIO"
+    },
+    {
+        "BriefDescription": "IOMMU high priority memory access",
+        "EventCode": "0x41",
+        "EventName": "UNC_IIO_IOMMU1.NUM_MEM_ACCESSES_HIGH",
+        "PerPkg": "1",
+        "PortMask": "0x000",
+        "UMask": "0x80",
+        "Unit": "IIO"
+    },
+    {
+        "BriefDescription": "IOMMU low priority memory access",
+        "EventCode": "0x41",
+        "EventName": "UNC_IIO_IOMMU1.NUM_MEM_ACCESSES_LOW",
+        "PerPkg": "1",
+        "PortMask": "0x000",
+        "UMask": "0x40",
+        "Unit": "IIO"
+    },
+    {
+        "BriefDescription": "Second Level Page Walk Cache Hit to a 1G page",
+        "EventCode": "0x41",
+        "EventName": "UNC_IIO_IOMMU1.SLPWC_1G_HITS",
+        "PerPkg": "1",
+        "PortMask": "0x000",
+        "UMask": "0x4",
+        "Unit": "IIO"
+    },
+    {
+        "BriefDescription": "Second Level Page Walk Cache Hit to a 256T page",
+        "EventCode": "0x41",
+        "EventName": "UNC_IIO_IOMMU1.SLPWC_256T_HITS",
+        "PerPkg": "1",
+        "PortMask": "0x000",
+        "UMask": "0x10",
+        "Unit": "IIO"
+    },
+    {
+        "BriefDescription": "Second Level Page Walk Cache Hit to a 2M page",
+        "EventCode": "0x41",
+        "EventName": "UNC_IIO_IOMMU1.SLPWC_2M_HITS",
+        "PerPkg": "1",
+        "PortMask": "0x000",
+        "UMask": "0x2",
+        "Unit": "IIO"
+    },
+    {
+        "BriefDescription": "Second Level Page Walk Cache Hit to a 512G page",
+        "EventCode": "0x41",
+        "EventName": "UNC_IIO_IOMMU1.SLPWC_512G_HITS",
+        "PerPkg": "1",
+        "PortMask": "0x000",
+        "UMask": "0x8",
+        "Unit": "IIO"
+    },
+    {
+        "BriefDescription": "Second Level Page Walk Cache fill",
+        "EventCode": "0x41",
+        "EventName": "UNC_IIO_IOMMU1.SLPWC_CACHE_FILLS",
+        "PerPkg": "1",
+        "PortMask": "0x000",
+        "UMask": "0x20",
+        "Unit": "IIO"
+    },
+    {
+        "BriefDescription": "Second Level Page Walk Cache lookup",
+        "EventCode": "0x41",
+        "EventName": "UNC_IIO_IOMMU1.SLPWC_CACHE_LOOKUPS",
+        "PerPkg": "1",
+        "PortMask": "0x000",
+        "UMask": "0x1",
+        "Unit": "IIO"
+    },
+    {
+        "BriefDescription": "Cycles PWT full",
+        "EventCode": "0x43",
+        "EventName": "UNC_IIO_IOMMU3.CYC_PWT_FULL",
+        "PerPkg": "1",
+        "PortMask": "0x000",
+        "UMask": "0x2",
+        "Unit": "IIO"
+    },
+    {
+        "BriefDescription": "Interrupt Entry cache hit",
+        "EventCode": "0x43",
+        "EventName": "UNC_IIO_IOMMU3.INT_CACHE_HITS",
+        "PerPkg": "1",
+        "PortMask": "0x000",
+        "UMask": "0x80",
+        "Unit": "IIO"
+    },
+    {
+        "BriefDescription": "Interrupt Entry cache lookup",
+        "EventCode": "0x43",
+        "EventName": "UNC_IIO_IOMMU3.INT_CACHE_LOOKUPS",
+        "PerPkg": "1",
+        "PortMask": "0x000",
+        "UMask": "0x40",
+        "Unit": "IIO"
+    },
+    {
+        "BriefDescription": "Context Cache invalidation events",
+        "EventCode": "0x43",
+        "EventName": "UNC_IIO_IOMMU3.NUM_INVAL_CTXT_CACHE",
+        "PerPkg": "1",
+        "PortMask": "0x000",
+        "UMask": "0x8",
+        "Unit": "IIO"
+    },
+    {
+        "BriefDescription": "Interrupt Entry Cache invalidation events",
+        "EventCode": "0x43",
+        "EventName": "UNC_IIO_IOMMU3.NUM_INVAL_INT_CACHE",
+        "PerPkg": "1",
+        "PortMask": "0x000",
+        "UMask": "0x20",
+        "Unit": "IIO"
+    },
+    {
+        "BriefDescription": "IOTLB invalidation events",
+        "EventCode": "0x43",
+        "EventName": "UNC_IIO_IOMMU3.NUM_INVAL_IOTLB",
+        "PerPkg": "1",
+        "PortMask": "0x000",
+        "UMask": "0x4",
+        "Unit": "IIO"
+    },
+    {
+        "BriefDescription": "PASID Cache invalidation events",
+        "EventCode": "0x43",
+        "EventName": "UNC_IIO_IOMMU3.NUM_INVAL_PASID_CACHE",
+        "PerPkg": "1",
+        "PortMask": "0x000",
+        "UMask": "0x10",
+        "Unit": "IIO"
+    },
+    {
+        "BriefDescription": "Occupancy of outbound request queue : To device : Counts number of outbound requests/completions IIO is currently processing",
+        "EventCode": "0xc5",
+        "EventName": "UNC_IIO_NUM_OUSTANDING_REQ_FROM_CPU.TO_IO",
+        "FCMask": "0x07",
+        "PerPkg": "1",
+        "PortMask": "0x0FF",
+        "UMask": "0x70ff008",
+        "Unit": "IIO"
+    },
+    {
+        "BriefDescription": "Passing data to be written",
+        "EventCode": "0x88",
+        "EventName": "UNC_IIO_NUM_OUTSTANDING_REQ_OF_CPU.DATA",
+        "FCMask": "0x07",
+        "PerPkg": "1",
+        "PortMask": "0x0FF",
+        "UMask": "0x700f020",
+        "Unit": "IIO"
+    },
+    {
+        "BriefDescription": "Issuing final read or write of line",
+        "EventCode": "0x88",
+        "EventName": "UNC_IIO_NUM_OUTSTANDING_REQ_OF_CPU.FINAL_RD_WR",
+        "FCMask": "0x07",
+        "PerPkg": "1",
+        "PortMask": "0x0FF",
+        "UMask": "0x700f008",
+        "Unit": "IIO"
+    },
+    {
+        "BriefDescription": "Processing response from IOMMU",
+        "EventCode": "0x88",
+        "EventName": "UNC_IIO_NUM_OUTSTANDING_REQ_OF_CPU.IOMMU_HIT",
+        "FCMask": "0x07",
+        "PerPkg": "1",
+        "PortMask": "0x0FF",
+        "UMask": "0x700f002",
+        "Unit": "IIO"
+    },
+    {
+        "BriefDescription": "Issuing to IOMMU",
+        "EventCode": "0x88",
+        "EventName": "UNC_IIO_NUM_OUTSTANDING_REQ_OF_CPU.IOMMU_REQ",
+        "FCMask": "0x07",
+        "PerPkg": "1",
+        "PortMask": "0x0FF",
+        "UMask": "0x700f001",
+        "Unit": "IIO"
+    },
+    {
+        "BriefDescription": "Request Ownership",
+        "EventCode": "0x88",
+        "EventName": "UNC_IIO_NUM_OUTSTANDING_REQ_OF_CPU.REQ_OWN",
+        "FCMask": "0x07",
+        "PerPkg": "1",
+        "PortMask": "0x0FF",
+        "UMask": "0x700f004",
+        "Unit": "IIO"
+    },
+    {
+        "BriefDescription": "Writing line",
+        "EventCode": "0x88",
+        "EventName": "UNC_IIO_NUM_OUTSTANDING_REQ_OF_CPU.WR",
+        "FCMask": "0x07",
+        "PerPkg": "1",
+        "PortMask": "0x0FF",
+        "UMask": "0x700f010",
+        "Unit": "IIO"
+    },
+    {
+        "BriefDescription": "-",
+        "EventCode": "0x8e",
+        "EventName": "UNC_IIO_NUM_REQ_OF_CPU_BY_TGT.ABORT",
+        "FCMask": "0x07",
+        "PerPkg": "1",
+        "PortMask": "0x0FF",
+        "UMask": "0x70ff080",
+        "Unit": "IIO"
+    },
+    {
+        "BriefDescription": "-",
+        "EventCode": "0x8e",
+        "EventName": "UNC_IIO_NUM_REQ_OF_CPU_BY_TGT.CONFINED_P2P",
+        "FCMask": "0x07",
+        "PerPkg": "1",
+        "PortMask": "0x0FF",
+        "UMask": "0x70ff040",
+        "Unit": "IIO"
+    },
+    {
+        "BriefDescription": "-",
+        "EventCode": "0x8e",
+        "EventName": "UNC_IIO_NUM_REQ_OF_CPU_BY_TGT.LOC_P2P",
+        "FCMask": "0x07",
+        "PerPkg": "1",
+        "PortMask": "0x0FF",
+        "UMask": "0x70ff020",
+        "Unit": "IIO"
+    },
+    {
+        "BriefDescription": "-",
+        "EventCode": "0x8e",
+        "EventName": "UNC_IIO_NUM_REQ_OF_CPU_BY_TGT.MCAST",
+        "FCMask": "0x07",
+        "PerPkg": "1",
+        "PortMask": "0x0FF",
+        "UMask": "0x70ff002",
+        "Unit": "IIO"
+    },
+    {
+        "BriefDescription": "-",
+        "EventCode": "0x8e",
+        "EventName": "UNC_IIO_NUM_REQ_OF_CPU_BY_TGT.MEM",
+        "FCMask": "0x07",
+        "PerPkg": "1",
+        "PortMask": "0x0FF",
+        "UMask": "0x70ff008",
+        "Unit": "IIO"
+    },
+    {
+        "BriefDescription": "-",
+        "EventCode": "0x8e",
+        "EventName": "UNC_IIO_NUM_REQ_OF_CPU_BY_TGT.MSGB",
+        "FCMask": "0x07",
+        "PerPkg": "1",
+        "PortMask": "0x0FF",
+        "UMask": "0x70ff001",
+        "Unit": "IIO"
+    },
+    {
+        "BriefDescription": "-",
+        "EventCode": "0x8e",
+        "EventName": "UNC_IIO_NUM_REQ_OF_CPU_BY_TGT.REM_P2P",
+        "FCMask": "0x07",
+        "PerPkg": "1",
+        "PortMask": "0x0FF",
+        "UMask": "0x70ff010",
+        "Unit": "IIO"
+    },
+    {
+        "BriefDescription": "-",
+        "EventCode": "0x8e",
+        "EventName": "UNC_IIO_NUM_REQ_OF_CPU_BY_TGT.UBOX",
+        "FCMask": "0x07",
+        "PerPkg": "1",
+        "PortMask": "0x0FF",
+        "UMask": "0x70ff004",
+        "Unit": "IIO"
+    },
+    {
+        "BriefDescription": "All 9 bits of Page Walk Tracker Occupancy",
+        "EventCode": "0x42",
+        "EventName": "UNC_IIO_PWT_OCCUPANCY",
+        "PerPkg": "1",
+        "PortMask": "0x000",
+        "Unit": "IIO"
+    },
+    {
+        "BriefDescription": "Number Transactions requested by the CPU : Core reading from Cards MMIO space",
+        "EventCode": "0xC1",
+        "EventName": "UNC_IIO_TXN_REQ_BY_CPU.MEM_READ.ALL_PARTS",
+        "FCMask": "0x07",
+        "PerPkg": "1",
+        "PortMask": "0x0FF",
+        "UMask": "0x70ff004",
+        "Unit": "IIO"
+    },
+    {
+        "BriefDescription": "Number Transactions requested by the CPU : Core reading from Cards MMIO space",
+        "EventCode": "0xC1",
+        "EventName": "UNC_IIO_TXN_REQ_BY_CPU.MEM_READ.PART0",
+        "FCMask": "0x07",
+        "PerPkg": "1",
+        "PortMask": "0x001",
+        "UMask": "0x7001004",
+        "Unit": "IIO"
+    },
+    {
+        "BriefDescription": "Number Transactions requested by the CPU : Core reading from Cards MMIO space",
+        "EventCode": "0xC1",
+        "EventName": "UNC_IIO_TXN_REQ_BY_CPU.MEM_READ.PART1",
+        "FCMask": "0x07",
+        "PerPkg": "1",
+        "PortMask": "0x002",
+        "UMask": "0x7002004",
+        "Unit": "IIO"
+    },
+    {
+        "BriefDescription": "Number Transactions requested by the CPU : Core reading from Cards MMIO space",
+        "EventCode": "0xC1",
+        "EventName": "UNC_IIO_TXN_REQ_BY_CPU.MEM_READ.PART2",
+        "FCMask": "0x07",
+        "PerPkg": "1",
+        "PortMask": "0x004",
+        "UMask": "0x7004004",
+        "Unit": "IIO"
+    },
+    {
+        "BriefDescription": "Number Transactions requested by the CPU : Core reading from Cards MMIO space",
+        "EventCode": "0xC1",
+        "EventName": "UNC_IIO_TXN_REQ_BY_CPU.MEM_READ.PART3",
+        "FCMask": "0x07",
+        "PerPkg": "1",
+        "PortMask": "0x008",
+        "UMask": "0x7008004",
+        "Unit": "IIO"
+    },
+    {
+        "BriefDescription": "Number Transactions requested by the CPU : Core reading from Cards MMIO space",
+        "EventCode": "0xC1",
+        "EventName": "UNC_IIO_TXN_REQ_BY_CPU.MEM_READ.PART4",
+        "FCMask": "0x07",
+        "PerPkg": "1",
+        "PortMask": "0x010",
+        "UMask": "0x7010004",
+        "Unit": "IIO"
+    },
+    {
+        "BriefDescription": "Number Transactions requested by the CPU : Core reading from Cards MMIO space",
+        "EventCode": "0xC1",
+        "EventName": "UNC_IIO_TXN_REQ_BY_CPU.MEM_READ.PART5",
+        "FCMask": "0x07",
+        "PerPkg": "1",
+        "PortMask": "0x020",
+        "UMask": "0x7020004",
+        "Unit": "IIO"
+    },
+    {
+        "BriefDescription": "Number Transactions requested by the CPU : Core reading from Cards MMIO space",
+        "EventCode": "0xC1",
+        "EventName": "UNC_IIO_TXN_REQ_BY_CPU.MEM_READ.PART6",
+        "FCMask": "0x07",
+        "PerPkg": "1",
+        "PortMask": "0x040",
+        "UMask": "0x7040004",
+        "Unit": "IIO"
+    },
+    {
+        "BriefDescription": "Number Transactions requested by the CPU : Core reading from Cards MMIO space",
+        "EventCode": "0xC1",
+        "EventName": "UNC_IIO_TXN_REQ_BY_CPU.MEM_READ.PART7",
+        "FCMask": "0x07",
+        "PerPkg": "1",
+        "PortMask": "0x080",
+        "UMask": "0x7080004",
+        "Unit": "IIO"
+    },
+    {
+        "BriefDescription": "Number Transactions requested by the CPU : Core writing to Cards MMIO space",
+        "EventCode": "0xC1",
+        "EventName": "UNC_IIO_TXN_REQ_BY_CPU.MEM_WRITE.ALL_PARTS",
+        "FCMask": "0x07",
+        "PerPkg": "1",
+        "PortMask": "0x0FF",
+        "UMask": "0x70ff001",
+        "Unit": "IIO"
+    },
+    {
+        "BriefDescription": "Number Transactions requested by the CPU : Core writing to Cards MMIO space",
+        "EventCode": "0xC1",
+        "EventName": "UNC_IIO_TXN_REQ_BY_CPU.MEM_WRITE.PART0",
+        "FCMask": "0x07",
+        "PerPkg": "1",
+        "PortMask": "0x001",
+        "UMask": "0x7001001",
+        "Unit": "IIO"
+    },
+    {
+        "BriefDescription": "Number Transactions requested by the CPU : Core writing to Cards MMIO space",
+        "EventCode": "0xC1",
+        "EventName": "UNC_IIO_TXN_REQ_BY_CPU.MEM_WRITE.PART1",
+        "FCMask": "0x07",
+        "PerPkg": "1",
+        "PortMask": "0x002",
+        "UMask": "0x7002001",
+        "Unit": "IIO"
+    },
+    {
+        "BriefDescription": "Number Transactions requested by the CPU : Core writing to Cards MMIO space",
+        "EventCode": "0xC1",
+        "EventName": "UNC_IIO_TXN_REQ_BY_CPU.MEM_WRITE.PART2",
+        "FCMask": "0x07",
+        "PerPkg": "1",
+        "PortMask": "0x004",
+        "UMask": "0x7004001",
+        "Unit": "IIO"
+    },
+    {
+        "BriefDescription": "Number Transactions requested by the CPU : Core writing to Cards MMIO space",
+        "EventCode": "0xC1",
+        "EventName": "UNC_IIO_TXN_REQ_BY_CPU.MEM_WRITE.PART3",
+        "FCMask": "0x07",
+        "PerPkg": "1",
+        "PortMask": "0x008",
+        "UMask": "0x7008001",
+        "Unit": "IIO"
+    },
+    {
+        "BriefDescription": "Number Transactions requested by the CPU : Core writing to Cards MMIO space",
+        "EventCode": "0xC1",
+        "EventName": "UNC_IIO_TXN_REQ_BY_CPU.MEM_WRITE.PART4",
+        "FCMask": "0x07",
+        "PerPkg": "1",
+        "PortMask": "0x010",
+        "UMask": "0x7010001",
+        "Unit": "IIO"
+    },
+    {
+        "BriefDescription": "Number Transactions requested by the CPU : Core writing to Cards MMIO space",
+        "EventCode": "0xC1",
+        "EventName": "UNC_IIO_TXN_REQ_BY_CPU.MEM_WRITE.PART5",
+        "FCMask": "0x07",
+        "PerPkg": "1",
+        "PortMask": "0x020",
+        "UMask": "0x7020001",
+        "Unit": "IIO"
+    },
+    {
+        "BriefDescription": "Number Transactions requested by the CPU : Core writing to Cards MMIO space",
+        "EventCode": "0xC1",
+        "EventName": "UNC_IIO_TXN_REQ_BY_CPU.MEM_WRITE.PART6",
+        "FCMask": "0x07",
+        "PerPkg": "1",
+        "PortMask": "0x040",
+        "UMask": "0x7040001",
+        "Unit": "IIO"
+    },
+    {
+        "BriefDescription": "Number Transactions requested by the CPU : Core writing to Cards MMIO space",
+        "EventCode": "0xC1",
+        "EventName": "UNC_IIO_TXN_REQ_BY_CPU.MEM_WRITE.PART7",
+        "FCMask": "0x07",
+        "PerPkg": "1",
+        "PortMask": "0x080",
+        "UMask": "0x7080001",
+        "Unit": "IIO"
+    },
+    {
+        "BriefDescription": "Number Transactions requested by the CPU : Another card (different IIO stack) reading from this card.",
+        "EventCode": "0xC1",
+        "EventName": "UNC_IIO_TXN_REQ_BY_CPU.PEER_READ.ALL_PARTS",
+        "FCMask": "0x07",
+        "PerPkg": "1",
+        "PortMask": "0x0FF",
+        "UMask": "0x70ff008",
+        "Unit": "IIO"
+    },
+    {
+        "BriefDescription": "Number Transactions requested by the CPU : Another card (different IIO stack) writing to this card.",
+        "EventCode": "0xC1",
+        "EventName": "UNC_IIO_TXN_REQ_BY_CPU.PEER_WRITE.ALL_PARTS",
+        "FCMask": "0x07",
+        "PerPkg": "1",
+        "PortMask": "0x0FF",
+        "UMask": "0x70ff002",
+        "Unit": "IIO"
+    },
+    {
+        "BriefDescription": "Number Transactions requested of the CPU : Card reading from DRAM",
+        "EventCode": "0x84",
+        "EventName": "UNC_IIO_TXN_REQ_OF_CPU.MEM_READ.PART0",
+        "FCMask": "0x07",
+        "PerPkg": "1",
+        "PortMask": "0x001",
+        "UMask": "0x7001004",
+        "Unit": "IIO"
+    },
+    {
+        "BriefDescription": "Number Transactions requested of the CPU : Card reading from DRAM",
+        "EventCode": "0x84",
+        "EventName": "UNC_IIO_TXN_REQ_OF_CPU.MEM_READ.PART1",
+        "FCMask": "0x07",
+        "PerPkg": "1",
+        "PortMask": "0x002",
+        "UMask": "0x7002004",
+        "Unit": "IIO"
+    },
+    {
+        "BriefDescription": "Number Transactions requested of the CPU : Card reading from DRAM",
+        "EventCode": "0x84",
+        "EventName": "UNC_IIO_TXN_REQ_OF_CPU.MEM_READ.PART2",
+        "FCMask": "0x07",
+        "PerPkg": "1",
+        "PortMask": "0x004",
+        "UMask": "0x7004004",
+        "Unit": "IIO"
+    },
+    {
+        "BriefDescription": "Number Transactions requested of the CPU : Card reading from DRAM",
+        "EventCode": "0x84",
+        "EventName": "UNC_IIO_TXN_REQ_OF_CPU.MEM_READ.PART3",
+        "FCMask": "0x07",
+        "PerPkg": "1",
+        "PortMask": "0x008",
+        "UMask": "0x7008004",
+        "Unit": "IIO"
+    },
+    {
+        "BriefDescription": "Number Transactions requested of the CPU : Card reading from DRAM",
+        "EventCode": "0x84",
+        "EventName": "UNC_IIO_TXN_REQ_OF_CPU.MEM_READ.PART4",
+        "FCMask": "0x07",
+        "PerPkg": "1",
+        "PortMask": "0x010",
+        "UMask": "0x7010004",
+        "Unit": "IIO"
+    },
+    {
+        "BriefDescription": "Number Transactions requested of the CPU : Card reading from DRAM",
+        "EventCode": "0x84",
+        "EventName": "UNC_IIO_TXN_REQ_OF_CPU.MEM_READ.PART5",
+        "FCMask": "0x07",
+        "PerPkg": "1",
+        "PortMask": "0x020",
+        "UMask": "0x7020004",
+        "Unit": "IIO"
+    },
+    {
+        "BriefDescription": "Number Transactions requested of the CPU : Card reading from DRAM",
+        "EventCode": "0x84",
+        "EventName": "UNC_IIO_TXN_REQ_OF_CPU.MEM_READ.PART6",
+        "FCMask": "0x07",
+        "PerPkg": "1",
+        "PortMask": "0x040",
+        "UMask": "0x7040004",
+        "Unit": "IIO"
+    },
+    {
+        "BriefDescription": "Number Transactions requested of the CPU : Card reading from DRAM",
+        "EventCode": "0x84",
+        "EventName": "UNC_IIO_TXN_REQ_OF_CPU.MEM_READ.PART7",
+        "FCMask": "0x07",
+        "PerPkg": "1",
+        "PortMask": "0x080",
+        "UMask": "0x7080004",
+        "Unit": "IIO"
+    },
+    {
+        "BriefDescription": "Number Transactions requested of the CPU : Card writing to DRAM",
+        "EventCode": "0x84",
+        "EventName": "UNC_IIO_TXN_REQ_OF_CPU.MEM_WRITE.PART0",
+        "FCMask": "0x07",
+        "PerPkg": "1",
+        "PortMask": "0x001",
+        "UMask": "0x7001001",
+        "Unit": "IIO"
+    },
+    {
+        "BriefDescription": "Number Transactions requested of the CPU : Card writing to DRAM",
+        "EventCode": "0x84",
+        "EventName": "UNC_IIO_TXN_REQ_OF_CPU.MEM_WRITE.PART1",
+        "FCMask": "0x07",
+        "PerPkg": "1",
+        "PortMask": "0x002",
+        "UMask": "0x7002001",
+        "Unit": "IIO"
+    },
+    {
+        "BriefDescription": "Number Transactions requested of the CPU : Card writing to DRAM",
+        "EventCode": "0x84",
+        "EventName": "UNC_IIO_TXN_REQ_OF_CPU.MEM_WRITE.PART2",
+        "FCMask": "0x07",
+        "PerPkg": "1",
+        "PortMask": "0x004",
+        "UMask": "0x7004001",
+        "Unit": "IIO"
+    },
+    {
+        "BriefDescription": "Number Transactions requested of the CPU : Card writing to DRAM",
+        "EventCode": "0x84",
+        "EventName": "UNC_IIO_TXN_REQ_OF_CPU.MEM_WRITE.PART3",
+        "FCMask": "0x07",
+        "PerPkg": "1",
+        "PortMask": "0x008",
+        "UMask": "0x7008001",
+        "Unit": "IIO"
+    },
+    {
+        "BriefDescription": "Number Transactions requested of the CPU : Card writing to DRAM",
+        "EventCode": "0x84",
+        "EventName": "UNC_IIO_TXN_REQ_OF_CPU.MEM_WRITE.PART4",
+        "FCMask": "0x07",
+        "PerPkg": "1",
+        "PortMask": "0x010",
+        "UMask": "0x7010001",
+        "Unit": "IIO"
+    },
+    {
+        "BriefDescription": "Number Transactions requested of the CPU : Card writing to DRAM",
+        "EventCode": "0x84",
+        "EventName": "UNC_IIO_TXN_REQ_OF_CPU.MEM_WRITE.PART5",
+        "FCMask": "0x07",
+        "PerPkg": "1",
+        "PortMask": "0x020",
+        "UMask": "0x7020001",
+        "Unit": "IIO"
+    },
+    {
+        "BriefDescription": "Number Transactions requested of the CPU : Card writing to DRAM",
+        "EventCode": "0x84",
+        "EventName": "UNC_IIO_TXN_REQ_OF_CPU.MEM_WRITE.PART6",
+        "FCMask": "0x07",
+        "PerPkg": "1",
+        "PortMask": "0x040",
+        "UMask": "0x7040001",
+        "Unit": "IIO"
+    },
+    {
+        "BriefDescription": "Number Transactions requested of the CPU : Card writing to DRAM",
+        "EventCode": "0x84",
+        "EventName": "UNC_IIO_TXN_REQ_OF_CPU.MEM_WRITE.PART7",
+        "FCMask": "0x07",
+        "PerPkg": "1",
+        "PortMask": "0x080",
+        "UMask": "0x7080001",
+        "Unit": "IIO"
+    },
+    {
+        "BriefDescription": "Number Transactions requested of the CPU : Card reading from another Card (same or different stack)",
+        "EventCode": "0x84",
+        "EventName": "UNC_IIO_TXN_REQ_OF_CPU.PEER_READ.PART0",
+        "FCMask": "0x07",
+        "PerPkg": "1",
+        "PortMask": "0x001",
+        "UMask": "0x7001008",
+        "Unit": "IIO"
+    },
+    {
+        "BriefDescription": "Number Transactions requested of the CPU : Card reading from another Card (same or different stack)",
+        "EventCode": "0x84",
+        "EventName": "UNC_IIO_TXN_REQ_OF_CPU.PEER_READ.PART1",
+        "FCMask": "0x07",
+        "PerPkg": "1",
+        "PortMask": "0x002",
+        "UMask": "0x7002008",
+        "Unit": "IIO"
+    },
+    {
+        "BriefDescription": "Number Transactions requested of the CPU : Card reading from another Card (same or different stack)",
+        "EventCode": "0x84",
+        "EventName": "UNC_IIO_TXN_REQ_OF_CPU.PEER_READ.PART2",
+        "FCMask": "0x07",
+        "PerPkg": "1",
+        "PortMask": "0x004",
+        "UMask": "0x7004008",
+        "Unit": "IIO"
+    },
+    {
+        "BriefDescription": "Number Transactions requested of the CPU : Card reading from another Card (same or different stack)",
+        "EventCode": "0x84",
+        "EventName": "UNC_IIO_TXN_REQ_OF_CPU.PEER_READ.PART3",
+        "FCMask": "0x07",
+        "PerPkg": "1",
+        "PortMask": "0x008",
+        "UMask": "0x7008008",
+        "Unit": "IIO"
+    },
+    {
+        "BriefDescription": "Number Transactions requested of the CPU : Card reading from another Card (same or different stack)",
+        "EventCode": "0x84",
+        "EventName": "UNC_IIO_TXN_REQ_OF_CPU.PEER_READ.PART4",
+        "FCMask": "0x07",
+        "PerPkg": "1",
+        "PortMask": "0x010",
+        "UMask": "0x7010008",
+        "Unit": "IIO"
+    },
+    {
+        "BriefDescription": "Number Transactions requested of the CPU : Card reading from another Card (same or different stack)",
+        "EventCode": "0x84",
+        "EventName": "UNC_IIO_TXN_REQ_OF_CPU.PEER_READ.PART5",
+        "FCMask": "0x07",
+        "PerPkg": "1",
+        "PortMask": "0x020",
+        "UMask": "0x7020008",
+        "Unit": "IIO"
+    },
+    {
+        "BriefDescription": "Number Transactions requested of the CPU : Card reading from another Card (same or different stack)",
+        "EventCode": "0x84",
+        "EventName": "UNC_IIO_TXN_REQ_OF_CPU.PEER_READ.PART6",
+        "FCMask": "0x07",
+        "PerPkg": "1",
+        "PortMask": "0x040",
+        "UMask": "0x7040008",
+        "Unit": "IIO"
+    },
+    {
+        "BriefDescription": "Number Transactions requested of the CPU : Card reading from another Card (same or different stack)",
+        "EventCode": "0x84",
+        "EventName": "UNC_IIO_TXN_REQ_OF_CPU.PEER_READ.PART7",
+        "FCMask": "0x07",
+        "PerPkg": "1",
+        "PortMask": "0x080",
+        "UMask": "0x7080008",
+        "Unit": "IIO"
+    },
+    {
+        "BriefDescription": "Number Transactions requested of the CPU : Card writing to another Card (same or different stack)",
+        "EventCode": "0x84",
+        "EventName": "UNC_IIO_TXN_REQ_OF_CPU.PEER_WRITE.PART0",
+        "FCMask": "0x07",
+        "PerPkg": "1",
+        "PortMask": "0x001",
+        "UMask": "0x7001002",
+        "Unit": "IIO"
+    },
+    {
+        "BriefDescription": "Number Transactions requested of the CPU : Card writing to another Card (same or different stack)",
+        "EventCode": "0x84",
+        "EventName": "UNC_IIO_TXN_REQ_OF_CPU.PEER_WRITE.PART1",
+        "FCMask": "0x07",
+        "PerPkg": "1",
+        "PortMask": "0x002",
+        "UMask": "0x7002002",
+        "Unit": "IIO"
+    },
+    {
+        "BriefDescription": "Number Transactions requested of the CPU : Card writing to another Card (same or different stack)",
+        "EventCode": "0x84",
+        "EventName": "UNC_IIO_TXN_REQ_OF_CPU.PEER_WRITE.PART2",
+        "FCMask": "0x07",
+        "PerPkg": "1",
+        "PortMask": "0x004",
+        "UMask": "0x7004002",
+        "Unit": "IIO"
+    },
+    {
+        "BriefDescription": "Number Transactions requested of the CPU : Card writing to another Card (same or different stack)",
+        "EventCode": "0x84",
+        "EventName": "UNC_IIO_TXN_REQ_OF_CPU.PEER_WRITE.PART3",
+        "FCMask": "0x07",
+        "PerPkg": "1",
+        "PortMask": "0x008",
+        "UMask": "0x7008002",
+        "Unit": "IIO"
+    },
+    {
+        "BriefDescription": "Number Transactions requested of the CPU : Card writing to another Card (same or different stack)",
+        "EventCode": "0x84",
+        "EventName": "UNC_IIO_TXN_REQ_OF_CPU.PEER_WRITE.PART4",
+        "FCMask": "0x07",
+        "PerPkg": "1",
+        "PortMask": "0x010",
+        "UMask": "0x7010002",
+        "Unit": "IIO"
+    },
+    {
+        "BriefDescription": "Number Transactions requested of the CPU : Card writing to another Card (same or different stack)",
+        "EventCode": "0x84",
+        "EventName": "UNC_IIO_TXN_REQ_OF_CPU.PEER_WRITE.PART5",
+        "FCMask": "0x07",
+        "PerPkg": "1",
+        "PortMask": "0x020",
+        "UMask": "0x7020002",
+        "Unit": "IIO"
+    },
+    {
+        "BriefDescription": "Number Transactions requested of the CPU : Card writing to another Card (same or different stack)",
+        "EventCode": "0x84",
+        "EventName": "UNC_IIO_TXN_REQ_OF_CPU.PEER_WRITE.PART6",
+        "FCMask": "0x07",
+        "PerPkg": "1",
+        "PortMask": "0x040",
+        "UMask": "0x7040002",
+        "Unit": "IIO"
+    },
+    {
+        "BriefDescription": "Number Transactions requested of the CPU : Card writing to another Card (same or different stack)",
+        "EventCode": "0x84",
+        "EventName": "UNC_IIO_TXN_REQ_OF_CPU.PEER_WRITE.PART7",
+        "FCMask": "0x07",
+        "PerPkg": "1",
+        "PortMask": "0x080",
+        "UMask": "0x7080002",
+        "Unit": "IIO"
+    }
+]
diff --git a/tools/perf/pmu-events/arch/x86/sierraforest/uncore-memory.json b/tools/perf/pmu-events/arch/x86/sierraforest/uncore-memory.json
new file mode 100644 (file)
index 0000000..a2405ed
--- /dev/null
@@ -0,0 +1,385 @@
+[
+    {
+        "BriefDescription": "DRAM Activate Count : Counts the number of DRAM Activate commands sent on this channel.  Activate commands are issued to open up a page on the DRAM devices so that it can be read or written to with a CAS.  One can calculate the number of Page Misses by subtracting the number of Page Miss precharges from the number of Activates.",
+        "EventCode": "0x02",
+        "EventName": "UNC_M_ACT_COUNT.ALL",
+        "PerPkg": "1",
+        "UMask": "0xf7",
+        "Unit": "IMC"
+    },
+    {
+        "BriefDescription": "DRAM Activate Count : Read transaction on Page Empty or Page Miss : Counts the number of DRAM Activate commands sent on this channel.  Activate commands are issued to open up a page on the DRAM devices so that it can be read or written to with a CAS.  One can calculate the number of Page Misses by subtracting the number of Page Miss precharges from the number of Activates.",
+        "EventCode": "0x02",
+        "EventName": "UNC_M_ACT_COUNT.RD",
+        "PerPkg": "1",
+        "UMask": "0xf1",
+        "Unit": "IMC"
+    },
+    {
+        "BriefDescription": "DRAM Activate Count : Underfill Read transaction on Page Empty or Page Miss : Counts the number of DRAM Activate commands sent on this channel.  Activate commands are issued to open up a page on the DRAM devices so that it can be read or written to with a CAS.  One can calculate the number of Page Misses by subtracting the number of Page Miss precharges from the number of Activates.",
+        "EventCode": "0x02",
+        "EventName": "UNC_M_ACT_COUNT.UFILL",
+        "PerPkg": "1",
+        "UMask": "0xf4",
+        "Unit": "IMC"
+    },
+    {
+        "BriefDescription": "DRAM Activate Count : Write transaction on Page Empty or Page Miss : Counts the number of DRAM Activate commands sent on this channel.  Activate commands are issued to open up a page on the DRAM devices so that it can be read or written to with a CAS.  One can calculate the number of Page Misses by subtracting the number of Page Miss precharges from the number of Activates.",
+        "EventCode": "0x02",
+        "EventName": "UNC_M_ACT_COUNT.WR",
+        "PerPkg": "1",
+        "UMask": "0xf2",
+        "Unit": "IMC"
+    },
+    {
+        "BriefDescription": "CAS count for SubChannel 0, all CAS operations",
+        "EventCode": "0x05",
+        "EventName": "UNC_M_CAS_COUNT_SCH0.ALL",
+        "PerPkg": "1",
+        "UMask": "0xff",
+        "Unit": "IMC"
+    },
+    {
+        "BriefDescription": "CAS count for SubChannel 0, all reads",
+        "EventCode": "0x05",
+        "EventName": "UNC_M_CAS_COUNT_SCH0.RD",
+        "PerPkg": "1",
+        "UMask": "0xcf",
+        "Unit": "IMC"
+    },
+    {
+        "BriefDescription": "CAS count for SubChannel 0 regular reads",
+        "EventCode": "0x05",
+        "EventName": "UNC_M_CAS_COUNT_SCH0.RD_REG",
+        "PerPkg": "1",
+        "UMask": "0xc1",
+        "Unit": "IMC"
+    },
+    {
+        "BriefDescription": "CAS count for SubChannel 0 underfill reads",
+        "EventCode": "0x05",
+        "EventName": "UNC_M_CAS_COUNT_SCH0.RD_UNDERFILL",
+        "PerPkg": "1",
+        "UMask": "0xc4",
+        "Unit": "IMC"
+    },
+    {
+        "BriefDescription": "CAS count for SubChannel 0, all writes",
+        "EventCode": "0x05",
+        "EventName": "UNC_M_CAS_COUNT_SCH0.WR",
+        "PerPkg": "1",
+        "UMask": "0xf0",
+        "Unit": "IMC"
+    },
+    {
+        "BriefDescription": "CAS count for SubChannel 0 regular writes",
+        "EventCode": "0x05",
+        "EventName": "UNC_M_CAS_COUNT_SCH0.WR_NONPRE",
+        "PerPkg": "1",
+        "UMask": "0xd0",
+        "Unit": "IMC"
+    },
+    {
+        "BriefDescription": "CAS count for SubChannel 0 auto-precharge writes",
+        "EventCode": "0x05",
+        "EventName": "UNC_M_CAS_COUNT_SCH0.WR_PRE",
+        "PerPkg": "1",
+        "UMask": "0xe0",
+        "Unit": "IMC"
+    },
+    {
+        "BriefDescription": "CAS count for SubChannel 1, all CAS operations",
+        "EventCode": "0x06",
+        "EventName": "UNC_M_CAS_COUNT_SCH1.ALL",
+        "PerPkg": "1",
+        "UMask": "0xff",
+        "Unit": "IMC"
+    },
+    {
+        "BriefDescription": "CAS count for SubChannel 1, all reads",
+        "EventCode": "0x06",
+        "EventName": "UNC_M_CAS_COUNT_SCH1.RD",
+        "PerPkg": "1",
+        "UMask": "0xcf",
+        "Unit": "IMC"
+    },
+    {
+        "BriefDescription": "CAS count for SubChannel 1 regular reads",
+        "EventCode": "0x06",
+        "EventName": "UNC_M_CAS_COUNT_SCH1.RD_REG",
+        "PerPkg": "1",
+        "UMask": "0xc1",
+        "Unit": "IMC"
+    },
+    {
+        "BriefDescription": "CAS count for SubChannel 1 underfill reads",
+        "EventCode": "0x06",
+        "EventName": "UNC_M_CAS_COUNT_SCH1.RD_UNDERFILL",
+        "PerPkg": "1",
+        "UMask": "0xc4",
+        "Unit": "IMC"
+    },
+    {
+        "BriefDescription": "CAS count for SubChannel 1, all writes",
+        "EventCode": "0x06",
+        "EventName": "UNC_M_CAS_COUNT_SCH1.WR",
+        "PerPkg": "1",
+        "UMask": "0xf0",
+        "Unit": "IMC"
+    },
+    {
+        "BriefDescription": "CAS count for SubChannel 1 regular writes",
+        "EventCode": "0x06",
+        "EventName": "UNC_M_CAS_COUNT_SCH1.WR_NONPRE",
+        "PerPkg": "1",
+        "UMask": "0xd0",
+        "Unit": "IMC"
+    },
+    {
+        "BriefDescription": "CAS count for SubChannel 1 auto-precharge writes",
+        "EventCode": "0x06",
+        "EventName": "UNC_M_CAS_COUNT_SCH1.WR_PRE",
+        "PerPkg": "1",
+        "UMask": "0xe0",
+        "Unit": "IMC"
+    },
+    {
+        "BriefDescription": "Number of DRAM DCLK clock cycles while the event is enabled",
+        "EventCode": "0x01",
+        "EventName": "UNC_M_CLOCKTICKS",
+        "PerPkg": "1",
+        "PublicDescription": "DRAM Clockticks",
+        "UMask": "0x1",
+        "Unit": "IMC"
+    },
+    {
+        "BriefDescription": "Number of DRAM HCLK clock cycles while the event is enabled",
+        "EventCode": "0x01",
+        "EventName": "UNC_M_HCLOCKTICKS",
+        "PerPkg": "1",
+        "PublicDescription": "DRAM Clockticks",
+        "Unit": "IMC"
+    },
+    {
+        "BriefDescription": "DRAM Precharge commands. : Counts the number of DRAM Precharge commands sent on this channel.",
+        "EventCode": "0x03",
+        "EventName": "UNC_M_PRE_COUNT.ALL",
+        "PerPkg": "1",
+        "UMask": "0xff",
+        "Unit": "IMC"
+    },
+    {
+        "BriefDescription": "DRAM Precharge commands. : Precharge due to (?) : Counts the number of DRAM Precharge commands sent on this channel.",
+        "EventCode": "0x03",
+        "EventName": "UNC_M_PRE_COUNT.PGT",
+        "PerPkg": "1",
+        "UMask": "0xf8",
+        "Unit": "IMC"
+    },
+    {
+        "BriefDescription": "DRAM Precharge commands. : Counts the number of DRAM Precharge commands sent on this channel.",
+        "EventCode": "0x03",
+        "EventName": "UNC_M_PRE_COUNT.RD",
+        "PerPkg": "1",
+        "UMask": "0xf1",
+        "Unit": "IMC"
+    },
+    {
+        "BriefDescription": "DRAM Precharge commands. : Counts the number of DRAM Precharge commands sent on this channel.",
+        "EventCode": "0x03",
+        "EventName": "UNC_M_PRE_COUNT.UFILL",
+        "PerPkg": "1",
+        "UMask": "0xf4",
+        "Unit": "IMC"
+    },
+    {
+        "BriefDescription": "DRAM Precharge commands. : Counts the number of DRAM Precharge commands sent on this channel.",
+        "EventCode": "0x03",
+        "EventName": "UNC_M_PRE_COUNT.WR",
+        "PerPkg": "1",
+        "UMask": "0xf2",
+        "Unit": "IMC"
+    },
+    {
+        "BriefDescription": "Read buffer inserts on subchannel 0",
+        "EventCode": "0x17",
+        "EventName": "UNC_M_RDB_INSERTS.SCH0",
+        "PerPkg": "1",
+        "UMask": "0x40",
+        "Unit": "IMC"
+    },
+    {
+        "BriefDescription": "Read buffer inserts on subchannel 1",
+        "EventCode": "0x17",
+        "EventName": "UNC_M_RDB_INSERTS.SCH1",
+        "PerPkg": "1",
+        "UMask": "0x80",
+        "Unit": "IMC"
+    },
+    {
+        "BriefDescription": "Read buffer occupancy on subchannel 0",
+        "EventCode": "0x1a",
+        "EventName": "UNC_M_RDB_OCCUPANCY_SCH0",
+        "PerPkg": "1",
+        "Unit": "IMC"
+    },
+    {
+        "BriefDescription": "Read buffer occupancy on subchannel 1",
+        "EventCode": "0x1b",
+        "EventName": "UNC_M_RDB_OCCUPANCY_SCH1",
+        "PerPkg": "1",
+        "Unit": "IMC"
+    },
+    {
+        "BriefDescription": "Read Pending Queue Allocations : Counts the number of allocations into the Read Pending Queue.  This queue is used to schedule reads out to the memory controller and to track the requests.  Requests allocate into the RPQ soon after they enter the memory controller, and need credits for an entry in this buffer before being sent from the HA to the iMC.  They deallocate after the CAS command has been issued to memory.  This includes both ISOCH and non-ISOCH requests.",
+        "EventCode": "0x10",
+        "EventName": "UNC_M_RPQ_INSERTS.PCH0",
+        "PerPkg": "1",
+        "UMask": "0x50",
+        "Unit": "IMC"
+    },
+    {
+        "BriefDescription": "Read Pending Queue Allocations : Counts the number of allocations into the Read Pending Queue.  This queue is used to schedule reads out to the memory controller and to track the requests.  Requests allocate into the RPQ soon after they enter the memory controller, and need credits for an entry in this buffer before being sent from the HA to the iMC.  They deallocate after the CAS command has been issued to memory.  This includes both ISOCH and non-ISOCH requests.",
+        "EventCode": "0x10",
+        "EventName": "UNC_M_RPQ_INSERTS.PCH1",
+        "PerPkg": "1",
+        "UMask": "0xa0",
+        "Unit": "IMC"
+    },
+    {
+        "BriefDescription": "Read Pending Queue inserts for subchannel 0, pseudochannel 0",
+        "EventCode": "0x10",
+        "EventName": "UNC_M_RPQ_INSERTS.SCH0_PCH0",
+        "PerPkg": "1",
+        "UMask": "0x10",
+        "Unit": "IMC"
+    },
+    {
+        "BriefDescription": "Read Pending Queue inserts for subchannel 0, pseudochannel 1",
+        "EventCode": "0x10",
+        "EventName": "UNC_M_RPQ_INSERTS.SCH0_PCH1",
+        "PerPkg": "1",
+        "UMask": "0x20",
+        "Unit": "IMC"
+    },
+    {
+        "BriefDescription": "Read Pending Queue inserts for subchannel 1, pseudochannel 0",
+        "EventCode": "0x10",
+        "EventName": "UNC_M_RPQ_INSERTS.SCH1_PCH0",
+        "PerPkg": "1",
+        "UMask": "0x40",
+        "Unit": "IMC"
+    },
+    {
+        "BriefDescription": "Read Pending Queue inserts for subchannel 1, pseudochannel 1",
+        "EventCode": "0x10",
+        "EventName": "UNC_M_RPQ_INSERTS.SCH1_PCH1",
+        "PerPkg": "1",
+        "UMask": "0x80",
+        "Unit": "IMC"
+    },
+    {
+        "BriefDescription": "Read pending queue occupancy for subchannel 0, pseudochannel 0",
+        "EventCode": "0x80",
+        "EventName": "UNC_M_RPQ_OCCUPANCY_SCH0_PCH0",
+        "PerPkg": "1",
+        "Unit": "IMC"
+    },
+    {
+        "BriefDescription": "Read pending queue occupancy for subchannel 0, pseudochannel 1",
+        "EventCode": "0x81",
+        "EventName": "UNC_M_RPQ_OCCUPANCY_SCH0_PCH1",
+        "PerPkg": "1",
+        "Unit": "IMC"
+    },
+    {
+        "BriefDescription": "Read pending queue occupancy for subchannel 1, pseudochannel 0",
+        "EventCode": "0x82",
+        "EventName": "UNC_M_RPQ_OCCUPANCY_SCH1_PCH0",
+        "PerPkg": "1",
+        "Unit": "IMC"
+    },
+    {
+        "BriefDescription": "Read pending queue occupancy for subchannel 1, pseudochannel 1",
+        "EventCode": "0x83",
+        "EventName": "UNC_M_RPQ_OCCUPANCY_SCH1_PCH1",
+        "PerPkg": "1",
+        "Unit": "IMC"
+    },
+    {
+        "BriefDescription": "Write Pending Queue Allocations",
+        "EventCode": "0x22",
+        "EventName": "UNC_M_WPQ_INSERTS.PCH0",
+        "PerPkg": "1",
+        "UMask": "0x50",
+        "Unit": "IMC"
+    },
+    {
+        "BriefDescription": "Write Pending Queue Allocations",
+        "EventCode": "0x22",
+        "EventName": "UNC_M_WPQ_INSERTS.PCH1",
+        "PerPkg": "1",
+        "UMask": "0xa0",
+        "Unit": "IMC"
+    },
+    {
+        "BriefDescription": "Write Pending Queue inserts for subchannel 0, pseudochannel 0",
+        "EventCode": "0x22",
+        "EventName": "UNC_M_WPQ_INSERTS.SCH0_PCH0",
+        "PerPkg": "1",
+        "UMask": "0x10",
+        "Unit": "IMC"
+    },
+    {
+        "BriefDescription": "Write Pending Queue inserts for subchannel 0, pseudochannel 1",
+        "EventCode": "0x22",
+        "EventName": "UNC_M_WPQ_INSERTS.SCH0_PCH1",
+        "PerPkg": "1",
+        "UMask": "0x20",
+        "Unit": "IMC"
+    },
+    {
+        "BriefDescription": "Write Pending Queue inserts for subchannel 1, pseudochannel 0",
+        "EventCode": "0x22",
+        "EventName": "UNC_M_WPQ_INSERTS.SCH1_PCH0",
+        "PerPkg": "1",
+        "UMask": "0x40",
+        "Unit": "IMC"
+    },
+    {
+        "BriefDescription": "Write Pending Queue inserts for subchannel 1, pseudochannel 1",
+        "EventCode": "0x22",
+        "EventName": "UNC_M_WPQ_INSERTS.SCH1_PCH1",
+        "PerPkg": "1",
+        "UMask": "0x80",
+        "Unit": "IMC"
+    },
+    {
+        "BriefDescription": "Write pending queue occupancy for subchannel 0, pseudochannel 0",
+        "EventCode": "0x84",
+        "EventName": "UNC_M_WPQ_OCCUPANCY_SCH0_PCH0",
+        "PerPkg": "1",
+        "Unit": "IMC"
+    },
+    {
+        "BriefDescription": "Write pending queue occupancy for subchannel 0, pseudochannel 1",
+        "EventCode": "0x85",
+        "EventName": "UNC_M_WPQ_OCCUPANCY_SCH0_PCH1",
+        "PerPkg": "1",
+        "Unit": "IMC"
+    },
+    {
+        "BriefDescription": "Write pending queue occupancy for subchannel 1, pseudochannel 0",
+        "EventCode": "0x86",
+        "EventName": "UNC_M_WPQ_OCCUPANCY_SCH1_PCH0",
+        "PerPkg": "1",
+        "Unit": "IMC"
+    },
+    {
+        "BriefDescription": "Write pending queue occupancy for subchannel 1, pseudochannel 1",
+        "EventCode": "0x87",
+        "EventName": "UNC_M_WPQ_OCCUPANCY_SCH1_PCH1",
+        "PerPkg": "1",
+        "Unit": "IMC"
+    }
+]
diff --git a/tools/perf/pmu-events/arch/x86/sierraforest/uncore-power.json b/tools/perf/pmu-events/arch/x86/sierraforest/uncore-power.json
new file mode 100644 (file)
index 0000000..e3a6616
--- /dev/null
@@ -0,0 +1,10 @@
+[
+    {
+        "BriefDescription": "PCU Clockticks",
+        "EventCode": "0x01",
+        "EventName": "UNC_P_CLOCKTICKS",
+        "PerPkg": "1",
+        "PublicDescription": "PCU Clockticks:  The PCU runs off a fixed 1 GHz clock.  This event counts the number of pclk cycles measured while the counter was enabled.  The pclk, like the Memory Controller's dclk, counts at a constant rate making it a good measure of actual wall time.",
+        "Unit": "PCU"
+    }
+]
index bd5f2b634c98c07cf48c0984d73a191f57674c06..371974c6d6c3ae9eeea0d2523b58d0d10bc07ffe 100644 (file)
@@ -1,18 +1,94 @@
 [
     {
-        "BriefDescription": "Counts the number of page walks completed due to load DTLB misses to a 1G page.",
+        "BriefDescription": "Counts the number of first level TLB misses but second level hits due to a demand load that did not start a page walk. Accounts for all page sizes. Will result in a DTLB write from STLB.",
+        "EventCode": "0x08",
+        "EventName": "DTLB_LOAD_MISSES.STLB_HIT",
+        "SampleAfterValue": "200003",
+        "UMask": "0x20"
+    },
+    {
+        "BriefDescription": "Counts the number of page walks completed due to load DTLB misses.",
         "EventCode": "0x08",
         "EventName": "DTLB_LOAD_MISSES.WALK_COMPLETED",
-        "SampleAfterValue": "1000003",
+        "SampleAfterValue": "200003",
         "UMask": "0xe"
     },
+    {
+        "BriefDescription": "Counts the number of page walks completed due to load DTLB misses to a 2M or 4M page.",
+        "EventCode": "0x08",
+        "EventName": "DTLB_LOAD_MISSES.WALK_COMPLETED_2M_4M",
+        "PublicDescription": "Counts the number of page walks completed due to loads (including SW prefetches) whose address translations missed in all Translation Lookaside Buffer (TLB) levels and were mapped to 2M or 4M pages. Includes page walks that page fault.",
+        "SampleAfterValue": "200003",
+        "UMask": "0x4"
+    },
+    {
+        "BriefDescription": "Counts the number of page walks completed due to load DTLB misses to a 4K page.",
+        "EventCode": "0x08",
+        "EventName": "DTLB_LOAD_MISSES.WALK_COMPLETED_4K",
+        "PublicDescription": "Counts the number of page walks completed due to loads (including SW prefetches) whose address translations missed in all Translation Lookaside Buffer (TLB) levels and were mapped to 4K pages. Includes page walks that page fault.",
+        "SampleAfterValue": "200003",
+        "UMask": "0x2"
+    },
+    {
+        "BriefDescription": "Counts the number of page walks outstanding for Loads (demand or SW prefetch) in PMH every cycle.",
+        "EventCode": "0x08",
+        "EventName": "DTLB_LOAD_MISSES.WALK_PENDING",
+        "PublicDescription": "Counts the number of page walks outstanding for Loads (demand or SW prefetch) in PMH every cycle.  A PMH page walk is outstanding from page walk start till PMH becomes idle again (ready to serve next walk). Includes EPT-walk intervals.",
+        "SampleAfterValue": "200003",
+        "UMask": "0x10"
+    },
+    {
+        "BriefDescription": "Counts the number of first level TLB misses but second level hits due to stores that did not start a page walk. Accounts for all pages sizes. Will result in a DTLB write from STLB.",
+        "EventCode": "0x49",
+        "EventName": "DTLB_STORE_MISSES.STLB_HIT",
+        "SampleAfterValue": "2000003",
+        "UMask": "0x20"
+    },
     {
         "BriefDescription": "Counts the number of page walks completed due to store DTLB misses to a 1G page.",
         "EventCode": "0x49",
         "EventName": "DTLB_STORE_MISSES.WALK_COMPLETED",
-        "SampleAfterValue": "1000003",
+        "SampleAfterValue": "2000003",
         "UMask": "0xe"
     },
+    {
+        "BriefDescription": "Counts the number of page walks completed due to store DTLB misses to a 2M or 4M page.",
+        "EventCode": "0x49",
+        "EventName": "DTLB_STORE_MISSES.WALK_COMPLETED_2M_4M",
+        "PublicDescription": "Counts the number of page walks completed due to stores whose address translations missed in all Translation Lookaside Buffer (TLB) levels and were mapped to 2M or 4M pages.  Includes page walks that page fault.",
+        "SampleAfterValue": "2000003",
+        "UMask": "0x4"
+    },
+    {
+        "BriefDescription": "Counts the number of page walks completed due to store DTLB misses to a 4K page.",
+        "EventCode": "0x49",
+        "EventName": "DTLB_STORE_MISSES.WALK_COMPLETED_4K",
+        "PublicDescription": "Counts the number of page walks completed due to stores whose address translations missed in all Translation Lookaside Buffer (TLB) levels and were mapped to 4K pages.  Includes page walks that page fault.",
+        "SampleAfterValue": "2000003",
+        "UMask": "0x2"
+    },
+    {
+        "BriefDescription": "Counts the number of page walks outstanding in the page miss handler (PMH) for stores every cycle.",
+        "EventCode": "0x49",
+        "EventName": "DTLB_STORE_MISSES.WALK_PENDING",
+        "PublicDescription": "Counts the number of page walks outstanding in the page miss handler (PMH) for stores every cycle. A PMH page walk is outstanding from page walk start till PMH becomes idle again (ready to serve next walk). Includes EPT-walk intervals.",
+        "SampleAfterValue": "200003",
+        "UMask": "0x10"
+    },
+    {
+        "BriefDescription": "Counts the number of page walks initiated by a instruction fetch that missed the first and second level TLBs.",
+        "EventCode": "0x85",
+        "EventName": "ITLB_MISSES.MISS_CAUSED_WALK",
+        "SampleAfterValue": "1000003",
+        "UMask": "0x1"
+    },
+    {
+        "BriefDescription": "Counts the number of first level TLB misses but second level hits due to an instruction fetch that did not start a page walk. Account for all pages sizes. Will result in an ITLB write from STLB.",
+        "EventCode": "0x85",
+        "EventName": "ITLB_MISSES.STLB_HIT",
+        "SampleAfterValue": "2000003",
+        "UMask": "0x20"
+    },
     {
         "BriefDescription": "Counts the number of page walks completed due to instruction fetch misses to any page size.",
         "EventCode": "0x85",
         "PublicDescription": "Counts the number of page walks completed due to instruction fetches whose address translations missed in all Translation Lookaside Buffer (TLB) levels and were mapped to any page size.  Includes page walks that page fault.",
         "SampleAfterValue": "200003",
         "UMask": "0xe"
+    },
+    {
+        "BriefDescription": "Counts the number of page walks completed due to instruction fetch misses to a 2M or 4M page.",
+        "EventCode": "0x85",
+        "EventName": "ITLB_MISSES.WALK_COMPLETED_2M_4M",
+        "PublicDescription": "Counts the number of page walks completed due to instruction fetches whose address translations missed in all Translation Lookaside Buffer (TLB) levels and were mapped to 2M or 4M pages.  Includes page walks that page fault.",
+        "SampleAfterValue": "2000003",
+        "UMask": "0x4"
+    },
+    {
+        "BriefDescription": "Counts the number of page walks completed due to instruction fetch misses to a 4K page.",
+        "EventCode": "0x85",
+        "EventName": "ITLB_MISSES.WALK_COMPLETED_4K",
+        "PublicDescription": "Counts the number of page walks completed due to instruction fetches whose address translations missed in all Translation Lookaside Buffer (TLB) levels and were mapped to 4K pages.  Includes page walks that page fault.",
+        "SampleAfterValue": "2000003",
+        "UMask": "0x2"
+    },
+    {
+        "BriefDescription": "Counts the number of page walks outstanding for iside in PMH every cycle.",
+        "EventCode": "0x85",
+        "EventName": "ITLB_MISSES.WALK_PENDING",
+        "PublicDescription": "Counts the number of page walks outstanding for iside in PMH every cycle.  A PMH page walk is outstanding from page walk start till PMH becomes idle again (ready to serve next walk). Includes EPT-walk intervals.  Walks could be counted by edge detecting on this event, but would count restarted suspended walks.",
+        "SampleAfterValue": "200003",
+        "UMask": "0x10"
+    },
+    {
+        "BriefDescription": "Counts the number of cycles that the head (oldest load) of the load buffer and retirement are both stalled due to a DTLB miss.",
+        "EventCode": "0x05",
+        "EventName": "LD_HEAD.DTLB_MISS_AT_RET",
+        "SampleAfterValue": "1000003",
+        "UMask": "0x90"
     }
 ]
index 588ad6059a136e3243975f0230e243979413a5ca..f047862f97351c39cbd1a637996d13a51631b24b 100644 (file)
         "BriefDescription": "Number of times an RTM execution aborted due to any reasons (multiple categories may count as one).",
         "EventCode": "0xC9",
         "EventName": "RTM_RETIRED.ABORTED",
-        "PEBS": "1",
+        "PEBS": "2",
         "PublicDescription": "Number of times RTM abort was triggered.",
         "SampleAfterValue": "2000003",
         "UMask": "0x4"
index a151ba9cccb07c557d450f97b46f47bf62503416..5452a1448ded3bc350ed1a265cb100924cd00d14 100644 (file)
@@ -2,10 +2,10 @@
     "Backend": "Grouping from Top-down Microarchitecture Analysis Metrics spreadsheet",
     "Bad": "Grouping from Top-down Microarchitecture Analysis Metrics spreadsheet",
     "BadSpec": "Grouping from Top-down Microarchitecture Analysis Metrics spreadsheet",
-    "BigFoot": "Grouping from Top-down Microarchitecture Analysis Metrics spreadsheet",
+    "BigFootprint": "Grouping from Top-down Microarchitecture Analysis Metrics spreadsheet",
     "BrMispredicts": "Grouping from Top-down Microarchitecture Analysis Metrics spreadsheet",
     "Branches": "Grouping from Top-down Microarchitecture Analysis Metrics spreadsheet",
-    "CacheMisses": "Grouping from Top-down Microarchitecture Analysis Metrics spreadsheet",
+    "CacheHits": "Grouping from Top-down Microarchitecture Analysis Metrics spreadsheet",
     "CodeGen": "Grouping from Top-down Microarchitecture Analysis Metrics spreadsheet",
     "Compute": "Grouping from Top-down Microarchitecture Analysis Metrics spreadsheet",
     "Cor": "Grouping from Top-down Microarchitecture Analysis Metrics spreadsheet",
@@ -25,7 +25,9 @@
     "L2Evicts": "Grouping from Top-down Microarchitecture Analysis Metrics spreadsheet",
     "LSD": "Grouping from Top-down Microarchitecture Analysis Metrics spreadsheet",
     "MachineClears": "Grouping from Top-down Microarchitecture Analysis Metrics spreadsheet",
+    "Machine_Clears": "Grouping from Top-down Microarchitecture Analysis Metrics spreadsheet",
     "Mem": "Grouping from Top-down Microarchitecture Analysis Metrics spreadsheet",
+    "MemOffcore": "Grouping from Top-down Microarchitecture Analysis Metrics spreadsheet",
     "MemoryBW": "Grouping from Top-down Microarchitecture Analysis Metrics spreadsheet",
     "MemoryBound": "Grouping from Top-down Microarchitecture Analysis Metrics spreadsheet",
     "MemoryLat": "Grouping from Top-down Microarchitecture Analysis Metrics spreadsheet",
     "tma_L5_group": "Metrics for top-down breakdown at level 5",
     "tma_L6_group": "Metrics for top-down breakdown at level 6",
     "tma_alu_op_utilization_group": "Metrics contributing to tma_alu_op_utilization category",
+    "tma_assists_group": "Metrics contributing to tma_assists category",
     "tma_backend_bound_group": "Metrics contributing to tma_backend_bound category",
     "tma_bad_speculation_group": "Metrics contributing to tma_bad_speculation category",
+    "tma_branch_mispredicts_group": "Metrics contributing to tma_branch_mispredicts category",
     "tma_branch_resteers_group": "Metrics contributing to tma_branch_resteers category",
     "tma_core_bound_group": "Metrics contributing to tma_core_bound category",
     "tma_dram_bound_group": "Metrics contributing to tma_dram_bound category",
@@ -77,9 +81,9 @@
     "tma_frontend_bound_group": "Metrics contributing to tma_frontend_bound category",
     "tma_heavy_operations_group": "Metrics contributing to tma_heavy_operations category",
     "tma_issue2P": "Metrics related by the issue $issue2P",
-    "tma_issueBC": "Metrics related by the issue $issueBC",
     "tma_issueBM": "Metrics related by the issue $issueBM",
     "tma_issueBW": "Metrics related by the issue $issueBW",
+    "tma_issueComp": "Metrics related by the issue $issueComp",
     "tma_issueD0": "Metrics related by the issue $issueD0",
     "tma_issueFB": "Metrics related by the issue $issueFB",
     "tma_issueFL": "Metrics related by the issue $issueFL",
     "tma_l3_bound_group": "Metrics contributing to tma_l3_bound category",
     "tma_light_operations_group": "Metrics contributing to tma_light_operations category",
     "tma_load_op_utilization_group": "Metrics contributing to tma_load_op_utilization category",
+    "tma_machine_clears_group": "Metrics contributing to tma_machine_clears category",
     "tma_mem_latency_group": "Metrics contributing to tma_mem_latency category",
     "tma_memory_bound_group": "Metrics contributing to tma_memory_bound category",
     "tma_microcode_sequencer_group": "Metrics contributing to tma_microcode_sequencer category",
     "tma_mite_group": "Metrics contributing to tma_mite category",
+    "tma_other_light_ops_group": "Metrics contributing to tma_other_light_ops category",
     "tma_ports_utilization_group": "Metrics contributing to tma_ports_utilization category",
     "tma_ports_utilized_0_group": "Metrics contributing to tma_ports_utilized_0 category",
     "tma_ports_utilized_3m_group": "Metrics contributing to tma_ports_utilized_3m category",
index cd3e737bf4a126e858418e9eb6bb7506b158bf8a..fe202d1e368a9aaba0ad77726e4f5075e3b09531 100644 (file)
         "Errata": "SKL091, SKL044",
         "EventCode": "0xC0",
         "EventName": "INST_RETIRED.NOP",
-        "PEBS": "1",
+        "PEBS": "2",
         "SampleAfterValue": "2000003",
         "UMask": "0x2"
     },
index faa615c578933d609f0604b3424c61c5d77cc8cb..3af71b84bb9d4d9913d46465a077b95d841cd40f 100644 (file)
         "MetricExpr": "(UOPS_DISPATCHED_PORT.PORT_0 + UOPS_DISPATCHED_PORT.PORT_1 + UOPS_DISPATCHED_PORT.PORT_5 + UOPS_DISPATCHED_PORT.PORT_6) / tma_info_thread_slots",
         "MetricGroup": "TopdownL5;tma_L5_group;tma_ports_utilized_3m_group",
         "MetricName": "tma_alu_op_utilization",
-        "MetricThreshold": "tma_alu_op_utilization > 0.6",
+        "MetricThreshold": "tma_alu_op_utilization > 0.4",
         "ScaleUnit": "100%"
     },
     {
         "BriefDescription": "This metric estimates fraction of slots the CPU retired uops delivered by the Microcode_Sequencer as a result of Assists",
-        "MetricExpr": "100 * (FP_ASSIST.ANY + OTHER_ASSISTS.ANY) / tma_info_thread_slots",
+        "MetricExpr": "34 * (FP_ASSIST.ANY + OTHER_ASSISTS.ANY) / tma_info_thread_slots",
         "MetricGroup": "TopdownL4;tma_L4_group;tma_microcode_sequencer_group",
         "MetricName": "tma_assists",
         "MetricThreshold": "tma_assists > 0.1 & (tma_microcode_sequencer > 0.05 & tma_heavy_operations > 0.1)",
     {
         "BriefDescription": "This metric estimates fraction of cycles while the memory subsystem was handling synchronizations due to contested accesses",
         "MetricConstraint": "NO_GROUP_EVENTS",
-        "MetricExpr": "(18.5 * tma_info_system_average_frequency * MEM_LOAD_L3_HIT_RETIRED.XSNP_HITM + 16.5 * tma_info_system_average_frequency * MEM_LOAD_L3_HIT_RETIRED.XSNP_MISS) * (1 + MEM_LOAD_RETIRED.FB_HIT / MEM_LOAD_RETIRED.L1_MISS / 2) / tma_info_thread_clks",
+        "MetricExpr": "(18.5 * tma_info_system_core_frequency * MEM_LOAD_L3_HIT_RETIRED.XSNP_HITM + 16.5 * tma_info_system_core_frequency * MEM_LOAD_L3_HIT_RETIRED.XSNP_MISS) * (1 + MEM_LOAD_RETIRED.FB_HIT / MEM_LOAD_RETIRED.L1_MISS / 2) / tma_info_thread_clks",
         "MetricGroup": "DataSharing;Offcore;Snoop;TopdownL4;tma_L4_group;tma_issueSyncxn;tma_l3_bound_group",
         "MetricName": "tma_contested_accesses",
         "MetricThreshold": "tma_contested_accesses > 0.05 & (tma_l3_bound > 0.05 & (tma_memory_bound > 0.2 & tma_backend_bound > 0.2))",
     {
         "BriefDescription": "This metric estimates fraction of cycles while the memory subsystem was handling synchronizations due to data-sharing accesses",
         "MetricConstraint": "NO_GROUP_EVENTS",
-        "MetricExpr": "16.5 * tma_info_system_average_frequency * MEM_LOAD_L3_HIT_RETIRED.XSNP_HIT * (1 + MEM_LOAD_RETIRED.FB_HIT / MEM_LOAD_RETIRED.L1_MISS / 2) / tma_info_thread_clks",
+        "MetricExpr": "16.5 * tma_info_system_core_frequency * MEM_LOAD_L3_HIT_RETIRED.XSNP_HIT * (1 + MEM_LOAD_RETIRED.FB_HIT / MEM_LOAD_RETIRED.L1_MISS / 2) / tma_info_thread_clks",
         "MetricGroup": "Offcore;Snoop;TopdownL4;tma_L4_group;tma_issueSyncxn;tma_l3_bound_group",
         "MetricName": "tma_data_sharing",
         "MetricThreshold": "tma_data_sharing > 0.05 & (tma_l3_bound > 0.05 & (tma_memory_bound > 0.2 & tma_backend_bound > 0.2))",
         "MetricExpr": "(cpu@INST_DECODED.DECODERS\\,cmask\\=1@ - cpu@INST_DECODED.DECODERS\\,cmask\\=2@) / tma_info_core_core_clks / 2",
         "MetricGroup": "DSBmiss;FetchBW;TopdownL4;tma_L4_group;tma_issueD0;tma_mite_group",
         "MetricName": "tma_decoder0_alone",
-        "MetricThreshold": "tma_decoder0_alone > 0.1 & (tma_mite > 0.1 & (tma_fetch_bandwidth > 0.1 & tma_frontend_bound > 0.15 & tma_info_thread_ipc / 4 > 0.35))",
+        "MetricThreshold": "tma_decoder0_alone > 0.1 & (tma_mite > 0.1 & tma_fetch_bandwidth > 0.2)",
         "PublicDescription": "This metric represents fraction of cycles where decoder-0 was the only active decoder. Related metrics: tma_few_uops_instructions",
         "ScaleUnit": "100%"
     },
     },
     {
         "BriefDescription": "This metric represents Core fraction of cycles in which CPU was likely limited due to DSB (decoded uop cache) fetch pipeline",
-        "MetricExpr": "(IDQ.ALL_DSB_CYCLES_ANY_UOPS - IDQ.ALL_DSB_CYCLES_4_UOPS) / tma_info_core_core_clks / 2",
+        "MetricExpr": "(IDQ.DSB_CYCLES_ANY - IDQ.DSB_CYCLES_OK) / tma_info_core_core_clks / 2",
         "MetricGroup": "DSB;FetchBW;TopdownL3;tma_L3_group;tma_fetch_bandwidth_group",
         "MetricName": "tma_dsb",
-        "MetricThreshold": "tma_dsb > 0.15 & (tma_fetch_bandwidth > 0.1 & tma_frontend_bound > 0.15 & tma_info_thread_ipc / 4 > 0.35)",
+        "MetricThreshold": "tma_dsb > 0.15 & tma_fetch_bandwidth > 0.2",
         "PublicDescription": "This metric represents Core fraction of cycles in which CPU was likely limited due to DSB (decoded uop cache) fetch pipeline.  For example; inefficient utilization of the DSB cache structure or bank conflict when reading from it; are categorized here.",
         "ScaleUnit": "100%"
     },
         "MetricGroup": "MemoryTLB;TopdownL4;tma_L4_group;tma_issueTLB;tma_l1_bound_group",
         "MetricName": "tma_dtlb_load",
         "MetricThreshold": "tma_dtlb_load > 0.1 & (tma_l1_bound > 0.1 & (tma_memory_bound > 0.2 & tma_backend_bound > 0.2))",
-        "PublicDescription": "This metric roughly estimates the fraction of cycles where the Data TLB (DTLB) was missed by load accesses. TLBs (Translation Look-aside Buffers) are processor caches for recently used entries out of the Page Tables that are used to map virtual- to physical-addresses by the operating system. This metric approximates the potential delay of demand loads missing the first-level data TLB (assuming worst case scenario with back to back misses to different pages). This includes hitting in the second-level TLB (STLB) as well as performing a hardware page walk on an STLB miss. Sample with: MEM_INST_RETIRED.STLB_MISS_LOADS_PS. Related metrics: tma_dtlb_store, tma_info_bottleneck_memory_data_tlbs",
+        "PublicDescription": "This metric roughly estimates the fraction of cycles where the Data TLB (DTLB) was missed by load accesses. TLBs (Translation Look-aside Buffers) are processor caches for recently used entries out of the Page Tables that are used to map virtual- to physical-addresses by the operating system. This metric approximates the potential delay of demand loads missing the first-level data TLB (assuming worst case scenario with back to back misses to different pages). This includes hitting in the second-level TLB (STLB) as well as performing a hardware page walk on an STLB miss. Sample with: MEM_INST_RETIRED.STLB_MISS_LOADS_PS. Related metrics: tma_dtlb_store, tma_info_bottleneck_memory_data_tlbs, tma_info_bottleneck_memory_synchronization",
         "ScaleUnit": "100%"
     },
     {
         "MetricGroup": "MemoryTLB;TopdownL4;tma_L4_group;tma_issueTLB;tma_store_bound_group",
         "MetricName": "tma_dtlb_store",
         "MetricThreshold": "tma_dtlb_store > 0.05 & (tma_store_bound > 0.2 & (tma_memory_bound > 0.2 & tma_backend_bound > 0.2))",
-        "PublicDescription": "This metric roughly estimates the fraction of cycles spent handling first-level data TLB store misses.  As with ordinary data caching; focus on improving data locality and reducing working-set size to reduce DTLB overhead.  Additionally; consider using profile-guided optimization (PGO) to collocate frequently-used data on the same page.  Try using larger page sizes for large amounts of frequently-used data. Sample with: MEM_INST_RETIRED.STLB_MISS_STORES_PS. Related metrics: tma_dtlb_load, tma_info_bottleneck_memory_data_tlbs",
+        "PublicDescription": "This metric roughly estimates the fraction of cycles spent handling first-level data TLB store misses.  As with ordinary data caching; focus on improving data locality and reducing working-set size to reduce DTLB overhead.  Additionally; consider using profile-guided optimization (PGO) to collocate frequently-used data on the same page.  Try using larger page sizes for large amounts of frequently-used data. Sample with: MEM_INST_RETIRED.STLB_MISS_STORES_PS. Related metrics: tma_dtlb_load, tma_info_bottleneck_memory_data_tlbs, tma_info_bottleneck_memory_synchronization",
         "ScaleUnit": "100%"
     },
     {
         "BriefDescription": "This metric roughly estimates how often CPU was handling synchronizations due to False Sharing",
         "MetricConstraint": "NO_GROUP_EVENTS",
-        "MetricExpr": "22 * tma_info_system_average_frequency * OFFCORE_RESPONSE.DEMAND_RFO.L3_HIT.SNOOP_HITM / tma_info_thread_clks",
+        "MetricExpr": "22 * tma_info_system_core_frequency * OFFCORE_RESPONSE.DEMAND_RFO.L3_HIT.SNOOP_HITM / tma_info_thread_clks",
         "MetricGroup": "DataSharing;Offcore;Snoop;TopdownL4;tma_L4_group;tma_issueSyncxn;tma_store_bound_group",
         "MetricName": "tma_false_sharing",
         "MetricThreshold": "tma_false_sharing > 0.05 & (tma_store_bound > 0.2 & (tma_memory_bound > 0.2 & tma_backend_bound > 0.2))",
         "MetricGroup": "MemoryBW;TopdownL4;tma_L4_group;tma_issueBW;tma_issueSL;tma_issueSmSt;tma_l1_bound_group",
         "MetricName": "tma_fb_full",
         "MetricThreshold": "tma_fb_full > 0.3",
-        "PublicDescription": "This metric does a *rough estimation* of how often L1D Fill Buffer unavailability limited additional L1D miss memory access requests to proceed. The higher the metric value; the deeper the memory hierarchy level the misses are satisfied from (metric values >1 are valid). Often it hints on approaching bandwidth limits (to L2 cache; L3 cache or external memory). Related metrics: tma_info_bottleneck_memory_bandwidth, tma_info_system_dram_bw_use, tma_mem_bandwidth, tma_sq_full, tma_store_latency, tma_streaming_stores",
+        "PublicDescription": "This metric does a *rough estimation* of how often L1D Fill Buffer unavailability limited additional L1D miss memory access requests to proceed. The higher the metric value; the deeper the memory hierarchy level the misses are satisfied from (metric values >1 are valid). Often it hints on approaching bandwidth limits (to L2 cache; L3 cache or external memory). Related metrics: tma_info_bottleneck_cache_memory_bandwidth, tma_info_system_dram_bw_use, tma_mem_bandwidth, tma_sq_full, tma_store_latency, tma_streaming_stores",
         "ScaleUnit": "100%"
     },
     {
         "MetricExpr": "tma_frontend_bound - tma_fetch_latency",
         "MetricGroup": "FetchBW;Frontend;TmaL2;TopdownL2;tma_L2_group;tma_frontend_bound_group;tma_issueFB",
         "MetricName": "tma_fetch_bandwidth",
-        "MetricThreshold": "tma_fetch_bandwidth > 0.1 & tma_frontend_bound > 0.15 & tma_info_thread_ipc / 4 > 0.35",
+        "MetricThreshold": "tma_fetch_bandwidth > 0.2",
         "MetricgroupNoGroup": "TopdownL2",
         "PublicDescription": "This metric represents fraction of slots the CPU was stalled due to Frontend bandwidth issues.  For example; inefficiencies at the instruction decoders; or restrictions for caching in the DSB (decoded uops cache) are categorized under Fetch Bandwidth. In such cases; the Frontend typically delivers suboptimal amount of uops to the Backend. Sample with: FRONTEND_RETIRED.LATENCY_GE_2_BUBBLES_GE_1_PS;FRONTEND_RETIRED.LATENCY_GE_1_PS;FRONTEND_RETIRED.LATENCY_GE_2_PS. Related metrics: tma_dsb_switches, tma_info_botlnk_l2_dsb_misses, tma_info_frontend_dsb_coverage, tma_info_inst_mix_iptb, tma_lcp",
         "ScaleUnit": "100%"
         "PublicDescription": "This metric represents overall arithmetic floating-point (FP) operations fraction the CPU has executed (retired). Note this metric's value may exceed its parent due to use of \"Uops\" CountDomain and FMA double-counting.",
         "ScaleUnit": "100%"
     },
+    {
+        "BriefDescription": "This metric roughly estimates fraction of slots the CPU retired uops as a result of handing Floating Point (FP) Assists",
+        "MetricExpr": "34 * FP_ASSIST.ANY / tma_info_thread_slots",
+        "MetricGroup": "HPC;TopdownL5;tma_L5_group;tma_assists_group",
+        "MetricName": "tma_fp_assists",
+        "MetricThreshold": "tma_fp_assists > 0.1",
+        "PublicDescription": "This metric roughly estimates fraction of slots the CPU retired uops as a result of handing Floating Point (FP) Assists. FP Assist may apply when working with very small floating point values (so-called Denormals).",
+        "ScaleUnit": "100%"
+    },
     {
         "BriefDescription": "This metric approximates arithmetic floating-point (FP) scalar uops fraction the CPU has retired",
         "MetricExpr": "cpu@FP_ARITH_INST_RETIRED.SCALAR_SINGLE\\,umask\\=0x03@ / UOPS_RETIRED.RETIRE_SLOTS",
     {
         "BriefDescription": "This metric represents fraction of slots where the CPU was retiring fused instructions -- where one uop can represent multiple contiguous instructions",
         "MetricExpr": "tma_light_operations * UOPS_RETIRED.MACRO_FUSED / UOPS_RETIRED.RETIRE_SLOTS",
-        "MetricGroup": "Pipeline;TopdownL3;tma_L3_group;tma_light_operations_group",
+        "MetricGroup": "Branches;Pipeline;TopdownL3;tma_L3_group;tma_light_operations_group",
         "MetricName": "tma_fused_instructions",
         "MetricThreshold": "tma_fused_instructions > 0.1 & tma_light_operations > 0.6",
-        "PublicDescription": "This metric represents fraction of slots where the CPU was retiring fused instructions -- where one uop can represent multiple contiguous instructions. The instruction pairs of CMP+JCC or DEC+JCC are commonly used examples.",
+        "PublicDescription": "This metric represents fraction of slots where the CPU was retiring fused instructions -- where one uop can represent multiple contiguous instructions. CMP+JCC or DEC+JCC are common examples of legacy fusions. {([MTL] Note new MOV+OP and Load+OP fusions appear under Other_Light_Ops in MTL!)}",
         "ScaleUnit": "100%"
     },
     {
         "MetricName": "tma_heavy_operations",
         "MetricThreshold": "tma_heavy_operations > 0.1",
         "MetricgroupNoGroup": "TopdownL2",
-        "PublicDescription": "This metric represents fraction of slots where the CPU was retiring heavy-weight operations -- instructions that require two or more uops or micro-coded sequences. This highly-correlates with the uop length of these instructions/sequences.",
+        "PublicDescription": "This metric represents fraction of slots where the CPU was retiring heavy-weight operations -- instructions that require two or more uops or micro-coded sequences. This highly-correlates with the uop length of these instructions/sequences. ([ICL+] Note this may overcount due to approximation using indirect events; [ADL+] .)",
         "ScaleUnit": "100%"
     },
     {
         "BriefDescription": "This metric represents fraction of cycles the CPU was stalled due to instruction cache misses",
         "MetricExpr": "(ICACHE_16B.IFDATA_STALL + 2 * cpu@ICACHE_16B.IFDATA_STALL\\,cmask\\=1\\,edge@) / tma_info_thread_clks",
-        "MetricGroup": "BigFoot;FetchLat;IcMiss;TopdownL3;tma_L3_group;tma_fetch_latency_group",
+        "MetricGroup": "BigFootprint;FetchLat;IcMiss;TopdownL3;tma_L3_group;tma_fetch_latency_group",
         "MetricName": "tma_icache_misses",
         "MetricThreshold": "tma_icache_misses > 0.05 & (tma_fetch_latency > 0.1 & tma_frontend_bound > 0.15)",
         "PublicDescription": "This metric represents fraction of cycles the CPU was stalled due to instruction cache misses. Sample with: FRONTEND_RETIRED.L2_MISS_PS;FRONTEND_RETIRED.L1I_MISS_PS",
     },
     {
         "BriefDescription": "Branch Misprediction Cost: Fraction of TMA slots wasted per non-speculative branch misprediction (retired JEClear)",
-        "MetricExpr": "(tma_branch_mispredicts + tma_fetch_latency * tma_mispredicts_resteers / (tma_branch_resteers + tma_dsb_switches + tma_icache_misses + tma_itlb_misses + tma_lcp + tma_ms_switches)) * tma_info_thread_slots / BR_MISP_RETIRED.ALL_BRANCHES",
+        "MetricExpr": "tma_info_bottleneck_mispredictions * tma_info_thread_slots / BR_MISP_RETIRED.ALL_BRANCHES / 100",
         "MetricGroup": "Bad;BrMispredicts;tma_issueBM",
         "MetricName": "tma_info_bad_spec_branch_misprediction_cost",
         "PublicDescription": "Branch Misprediction Cost: Fraction of TMA slots wasted per non-speculative branch misprediction (retired JEClear). Related metrics: tma_branch_mispredicts, tma_info_bottleneck_mispredictions, tma_mispredicts_resteers"
     },
     {
         "BriefDescription": "Instructions per retired mispredicts for indirect CALL or JMP branches (lower number means higher occurrence rate).",
-        "MetricExpr": "tma_info_inst_mix_instructions / (UOPS_RETIRED.RETIRE_SLOTS / UOPS_ISSUED.ANY * cpu@BR_MISP_EXEC.ALL_BRANCHES\\,umask\\=0xE4@)",
+        "MetricExpr": "tma_info_inst_mix_instructions / (UOPS_RETIRED.RETIRE_SLOTS / UOPS_ISSUED.ANY * BR_MISP_EXEC.INDIRECT)",
         "MetricGroup": "Bad;BrMispredicts",
         "MetricName": "tma_info_bad_spec_ipmisp_indirect",
         "MetricThreshold": "tma_info_bad_spec_ipmisp_indirect < 1e3"
         "MetricName": "tma_info_bad_spec_ipmispredict",
         "MetricThreshold": "tma_info_bad_spec_ipmispredict < 200"
     },
+    {
+        "BriefDescription": "Speculative to Retired ratio of all clears (covering mispredicts and nukes)",
+        "MetricExpr": "INT_MISC.CLEARS_COUNT / (BR_MISP_RETIRED.ALL_BRANCHES + MACHINE_CLEARS.COUNT)",
+        "MetricGroup": "BrMispredicts",
+        "MetricName": "tma_info_bad_spec_spec_clears_ratio"
+    },
     {
         "BriefDescription": "Probability of Core Bound bottleneck hidden by SMT-profiling artifacts",
         "MetricConstraint": "NO_GROUP_EVENTS",
         "MetricThreshold": "tma_info_botlnk_l2_ic_misses > 5",
         "PublicDescription": "Total pipeline cost of Instruction Cache misses - subset of the Big_Code Bottleneck. Related metrics: "
     },
+    {
+        "BriefDescription": "Total pipeline cost of \"useful operations\" - the baseline operations not covered by Branching_Overhead nor Irregular_Overhead.",
+        "MetricExpr": "100 * (tma_retiring - (BR_INST_RETIRED.ALL_BRANCHES + BR_INST_RETIRED.NEAR_CALL) / tma_info_thread_slots - tma_microcode_sequencer / (tma_few_uops_instructions + tma_microcode_sequencer) * (tma_assists / tma_microcode_sequencer) * tma_heavy_operations)",
+        "MetricGroup": "Ret",
+        "MetricName": "tma_info_bottleneck_base_non_br",
+        "MetricThreshold": "tma_info_bottleneck_base_non_br > 20"
+    },
     {
         "BriefDescription": "Total pipeline cost of instruction fetch related bottlenecks by large code footprint programs (i-side cache; TLB and BTB misses)",
         "MetricConstraint": "NO_GROUP_EVENTS",
         "MetricExpr": "100 * tma_fetch_latency * (tma_itlb_misses + tma_icache_misses + tma_unknown_branches) / (tma_branch_resteers + tma_dsb_switches + tma_icache_misses + tma_itlb_misses + tma_lcp + tma_ms_switches)",
-        "MetricGroup": "BigFoot;Fed;Frontend;IcMiss;MemoryTLB;tma_issueBC",
+        "MetricGroup": "BigFootprint;Fed;Frontend;IcMiss;MemoryTLB",
         "MetricName": "tma_info_bottleneck_big_code",
-        "MetricThreshold": "tma_info_bottleneck_big_code > 20",
-        "PublicDescription": "Total pipeline cost of instruction fetch related bottlenecks by large code footprint programs (i-side cache; TLB and BTB misses). Related metrics: tma_info_bottleneck_branching_overhead"
+        "MetricThreshold": "tma_info_bottleneck_big_code > 20"
     },
     {
         "BriefDescription": "Total pipeline cost of branch related instructions (used for program control-flow including function calls)",
-        "MetricExpr": "100 * ((BR_INST_RETIRED.CONDITIONAL + 3 * BR_INST_RETIRED.NEAR_CALL + (BR_INST_RETIRED.NEAR_TAKEN - (BR_INST_RETIRED.CONDITIONAL - BR_INST_RETIRED.NOT_TAKEN) - 2 * BR_INST_RETIRED.NEAR_CALL)) / tma_info_thread_slots)",
-        "MetricGroup": "Ret;tma_issueBC",
+        "MetricExpr": "100 * ((BR_INST_RETIRED.ALL_BRANCHES + BR_INST_RETIRED.NEAR_CALL) / tma_info_thread_slots)",
+        "MetricGroup": "Ret",
         "MetricName": "tma_info_bottleneck_branching_overhead",
-        "MetricThreshold": "tma_info_bottleneck_branching_overhead > 10",
-        "PublicDescription": "Total pipeline cost of branch related instructions (used for program control-flow including function calls). Related metrics: tma_info_bottleneck_big_code"
+        "MetricThreshold": "tma_info_bottleneck_branching_overhead > 5"
+    },
+    {
+        "BriefDescription": "Total pipeline cost of external Memory- or Cache-Bandwidth related bottlenecks",
+        "MetricExpr": "100 * (tma_memory_bound * (tma_dram_bound / (tma_dram_bound + tma_l1_bound + tma_l2_bound + tma_l3_bound + tma_store_bound)) * (tma_mem_bandwidth / (tma_mem_bandwidth + tma_mem_latency)) + tma_memory_bound * (tma_l3_bound / (tma_dram_bound + tma_l1_bound + tma_l2_bound + tma_l3_bound + tma_store_bound)) * (tma_sq_full / (tma_contested_accesses + tma_data_sharing + tma_l3_hit_latency + tma_sq_full)) + tma_memory_bound * (tma_l1_bound / (tma_dram_bound + tma_l1_bound + tma_l2_bound + tma_l3_bound + tma_store_bound)) * (tma_fb_full / (tma_4k_aliasing + tma_dtlb_load + tma_fb_full + tma_lock_latency + tma_split_loads + tma_store_fwd_blk)))",
+        "MetricGroup": "Mem;MemoryBW;Offcore;tma_issueBW",
+        "MetricName": "tma_info_bottleneck_cache_memory_bandwidth",
+        "MetricThreshold": "tma_info_bottleneck_cache_memory_bandwidth > 20",
+        "PublicDescription": "Total pipeline cost of external Memory- or Cache-Bandwidth related bottlenecks. Related metrics: tma_fb_full, tma_info_system_dram_bw_use, tma_mem_bandwidth, tma_sq_full"
+    },
+    {
+        "BriefDescription": "Total pipeline cost of external Memory- or Cache-Latency related bottlenecks",
+        "MetricExpr": "100 * (tma_memory_bound * (tma_dram_bound / (tma_dram_bound + tma_l1_bound + tma_l2_bound + tma_l3_bound + tma_store_bound)) * (tma_mem_latency / (tma_mem_bandwidth + tma_mem_latency)) + tma_memory_bound * (tma_l3_bound / (tma_dram_bound + tma_l1_bound + tma_l2_bound + tma_l3_bound + tma_store_bound)) * (tma_l3_hit_latency / (tma_contested_accesses + tma_data_sharing + tma_l3_hit_latency + tma_sq_full)) + tma_memory_bound * tma_l2_bound / (tma_dram_bound + tma_l1_bound + tma_l2_bound + tma_l3_bound + tma_store_bound) + tma_memory_bound * (tma_store_bound / (tma_dram_bound + tma_l1_bound + tma_l2_bound + tma_l3_bound + tma_store_bound)) * (tma_store_latency / (tma_dtlb_store + tma_false_sharing + tma_split_stores + tma_store_latency)))",
+        "MetricGroup": "Mem;MemoryLat;Offcore;tma_issueLat",
+        "MetricName": "tma_info_bottleneck_cache_memory_latency",
+        "MetricThreshold": "tma_info_bottleneck_cache_memory_latency > 20",
+        "PublicDescription": "Total pipeline cost of external Memory- or Cache-Latency related bottlenecks. Related metrics: tma_l3_hit_latency, tma_mem_latency"
+    },
+    {
+        "BriefDescription": "Total pipeline cost when the execution is compute-bound - an estimation",
+        "MetricExpr": "100 * (tma_core_bound * tma_divider / (tma_divider + tma_ports_utilization + tma_serializing_operation) + tma_core_bound * (tma_ports_utilization / (tma_divider + tma_ports_utilization + tma_serializing_operation)) * (tma_ports_utilized_3m / (tma_ports_utilized_0 + tma_ports_utilized_1 + tma_ports_utilized_2 + tma_ports_utilized_3m)))",
+        "MetricGroup": "Cor;tma_issueComp",
+        "MetricName": "tma_info_bottleneck_compute_bound_est",
+        "MetricThreshold": "tma_info_bottleneck_compute_bound_est > 20",
+        "PublicDescription": "Total pipeline cost when the execution is compute-bound - an estimation. Covers Core Bound when High ILP as well as when long-latency execution units are busy. Related metrics: "
     },
     {
         "BriefDescription": "Total pipeline cost of instruction fetch bandwidth related bottlenecks",
         "MetricConstraint": "NO_GROUP_EVENTS",
-        "MetricExpr": "100 * (tma_frontend_bound - tma_fetch_latency * tma_mispredicts_resteers / (tma_branch_resteers + tma_dsb_switches + tma_icache_misses + tma_itlb_misses + tma_lcp + tma_ms_switches)) - tma_info_bottleneck_big_code",
+        "MetricExpr": "100 * (tma_frontend_bound - (1 - 10 * tma_microcode_sequencer * tma_other_mispredicts / tma_branch_mispredicts) * tma_fetch_latency * tma_mispredicts_resteers / (tma_branch_resteers + tma_dsb_switches + tma_icache_misses + tma_itlb_misses + tma_lcp + tma_ms_switches) - tma_microcode_sequencer / (tma_few_uops_instructions + tma_microcode_sequencer) * (tma_assists / tma_microcode_sequencer) * tma_fetch_latency * (tma_ms_switches + tma_branch_resteers * (tma_clears_resteers + tma_mispredicts_resteers * (10 * tma_microcode_sequencer * tma_other_mispredicts / tma_branch_mispredicts)) / (tma_clears_resteers + tma_mispredicts_resteers + tma_unknown_branches)) / (tma_branch_resteers + tma_dsb_switches + tma_icache_misses + tma_itlb_misses + tma_lcp + tma_ms_switches)) - tma_info_bottleneck_big_code",
         "MetricGroup": "Fed;FetchBW;Frontend",
         "MetricName": "tma_info_bottleneck_instruction_fetch_bw",
         "MetricThreshold": "tma_info_bottleneck_instruction_fetch_bw > 20"
     },
     {
-        "BriefDescription": "Total pipeline cost of (external) Memory Bandwidth related bottlenecks",
-        "MetricConstraint": "NO_GROUP_EVENTS",
-        "MetricExpr": "100 * tma_memory_bound * (tma_dram_bound / (tma_dram_bound + tma_l1_bound + tma_l2_bound + tma_l3_bound + tma_store_bound) * (tma_mem_bandwidth / (tma_mem_bandwidth + tma_mem_latency)) + tma_l3_bound / (tma_dram_bound + tma_l1_bound + tma_l2_bound + tma_l3_bound + tma_store_bound) * (tma_sq_full / (tma_contested_accesses + tma_data_sharing + tma_l3_hit_latency + tma_sq_full))) + tma_l1_bound / (tma_dram_bound + tma_l1_bound + tma_l2_bound + tma_l3_bound + tma_store_bound) * (tma_fb_full / (tma_4k_aliasing + tma_dtlb_load + tma_fb_full + tma_lock_latency + tma_split_loads + tma_store_fwd_blk))",
-        "MetricGroup": "Mem;MemoryBW;Offcore;tma_issueBW",
-        "MetricName": "tma_info_bottleneck_memory_bandwidth",
-        "MetricThreshold": "tma_info_bottleneck_memory_bandwidth > 20",
-        "PublicDescription": "Total pipeline cost of (external) Memory Bandwidth related bottlenecks. Related metrics: tma_fb_full, tma_info_system_dram_bw_use, tma_mem_bandwidth, tma_sq_full"
+        "BriefDescription": "Total pipeline cost of irregular execution (e.g",
+        "MetricExpr": "100 * (tma_microcode_sequencer / (tma_few_uops_instructions + tma_microcode_sequencer) * (tma_assists / tma_microcode_sequencer) * tma_fetch_latency * (tma_ms_switches + tma_branch_resteers * (tma_clears_resteers + tma_mispredicts_resteers * (10 * tma_microcode_sequencer * tma_other_mispredicts / tma_branch_mispredicts)) / (tma_clears_resteers + tma_mispredicts_resteers + tma_unknown_branches)) / (tma_branch_resteers + tma_dsb_switches + tma_icache_misses + tma_itlb_misses + tma_lcp + tma_ms_switches) + 10 * tma_microcode_sequencer * tma_other_mispredicts / tma_branch_mispredicts * tma_branch_mispredicts + tma_machine_clears * tma_other_nukes / tma_other_nukes + tma_core_bound * (tma_serializing_operation + tma_core_bound * RS_EVENTS.EMPTY_CYCLES / tma_info_thread_clks * tma_ports_utilized_0) / (tma_divider + tma_ports_utilization + tma_serializing_operation) + tma_microcode_sequencer / (tma_few_uops_instructions + tma_microcode_sequencer) * (tma_assists / tma_microcode_sequencer) * tma_heavy_operations)",
+        "MetricGroup": "Bad;Cor;Ret;tma_issueMS",
+        "MetricName": "tma_info_bottleneck_irregular_overhead",
+        "MetricThreshold": "tma_info_bottleneck_irregular_overhead > 10",
+        "PublicDescription": "Total pipeline cost of irregular execution (e.g. FP-assists in HPC, Wait time with work imbalance multithreaded workloads, overhead in system services or virtualized environments). Related metrics: tma_microcode_sequencer, tma_ms_switches"
     },
     {
         "BriefDescription": "Total pipeline cost of Memory Address Translation related bottlenecks (data-side TLBs)",
         "MetricConstraint": "NO_GROUP_EVENTS",
-        "MetricExpr": "100 * tma_memory_bound * (tma_l1_bound / max(tma_memory_bound, tma_dram_bound + tma_l1_bound + tma_l2_bound + tma_l3_bound + tma_store_bound) * (tma_dtlb_load / max(tma_l1_bound, tma_4k_aliasing + tma_dtlb_load + tma_fb_full + tma_lock_latency + tma_split_loads + tma_store_fwd_blk)) + tma_store_bound / (tma_dram_bound + tma_l1_bound + tma_l2_bound + tma_l3_bound + tma_store_bound) * (tma_dtlb_store / (tma_dtlb_store + tma_false_sharing + tma_split_stores + tma_store_latency)))",
+        "MetricExpr": "100 * (tma_memory_bound * (tma_l1_bound / max(tma_memory_bound, tma_dram_bound + tma_l1_bound + tma_l2_bound + tma_l3_bound + tma_store_bound)) * (tma_dtlb_load / max(tma_l1_bound, tma_4k_aliasing + tma_dtlb_load + tma_fb_full + tma_lock_latency + tma_split_loads + tma_store_fwd_blk)) + tma_memory_bound * (tma_store_bound / (tma_dram_bound + tma_l1_bound + tma_l2_bound + tma_l3_bound + tma_store_bound)) * (tma_dtlb_store / (tma_dtlb_store + tma_false_sharing + tma_split_stores + tma_store_latency)))",
         "MetricGroup": "Mem;MemoryTLB;Offcore;tma_issueTLB",
         "MetricName": "tma_info_bottleneck_memory_data_tlbs",
         "MetricThreshold": "tma_info_bottleneck_memory_data_tlbs > 20",
-        "PublicDescription": "Total pipeline cost of Memory Address Translation related bottlenecks (data-side TLBs). Related metrics: tma_dtlb_load, tma_dtlb_store"
+        "PublicDescription": "Total pipeline cost of Memory Address Translation related bottlenecks (data-side TLBs). Related metrics: tma_dtlb_load, tma_dtlb_store, tma_info_bottleneck_memory_synchronization"
     },
     {
-        "BriefDescription": "Total pipeline cost of Memory Latency related bottlenecks (external memory and off-core caches)",
-        "MetricConstraint": "NO_GROUP_EVENTS",
-        "MetricExpr": "100 * tma_memory_bound * (tma_dram_bound / (tma_dram_bound + tma_l1_bound + tma_l2_bound + tma_l3_bound + tma_store_bound) * (tma_mem_latency / (tma_mem_bandwidth + tma_mem_latency)) + tma_l3_bound / (tma_dram_bound + tma_l1_bound + tma_l2_bound + tma_l3_bound + tma_store_bound) * (tma_l3_hit_latency / (tma_contested_accesses + tma_data_sharing + tma_l3_hit_latency + tma_sq_full)) + tma_l2_bound / (tma_dram_bound + tma_l1_bound + tma_l2_bound + tma_l3_bound + tma_store_bound))",
-        "MetricGroup": "Mem;MemoryLat;Offcore;tma_issueLat",
-        "MetricName": "tma_info_bottleneck_memory_latency",
-        "MetricThreshold": "tma_info_bottleneck_memory_latency > 20",
-        "PublicDescription": "Total pipeline cost of Memory Latency related bottlenecks (external memory and off-core caches). Related metrics: tma_l3_hit_latency, tma_mem_latency"
+        "BriefDescription": "Total pipeline cost of Memory Synchronization related bottlenecks (data transfers and coherency updates across processors)",
+        "MetricExpr": "100 * (tma_memory_bound * (tma_l3_bound / (tma_dram_bound + tma_l1_bound + tma_l2_bound + tma_l3_bound + tma_store_bound) * (tma_contested_accesses + tma_data_sharing) / (tma_contested_accesses + tma_data_sharing + tma_l3_hit_latency + tma_sq_full) + tma_store_bound / (tma_dram_bound + tma_l1_bound + tma_l2_bound + tma_l3_bound + tma_store_bound) * tma_false_sharing / (tma_dtlb_store + tma_false_sharing + tma_split_stores + tma_store_latency - tma_store_latency)) + tma_machine_clears * (1 - tma_other_nukes / tma_other_nukes))",
+        "MetricGroup": "Mem;Offcore;tma_issueTLB",
+        "MetricName": "tma_info_bottleneck_memory_synchronization",
+        "MetricThreshold": "tma_info_bottleneck_memory_synchronization > 10",
+        "PublicDescription": "Total pipeline cost of Memory Synchronization related bottlenecks (data transfers and coherency updates across processors). Related metrics: tma_dtlb_load, tma_dtlb_store, tma_info_bottleneck_memory_data_tlbs"
     },
     {
         "BriefDescription": "Total pipeline cost of Branch Misprediction related bottlenecks",
         "MetricConstraint": "NO_GROUP_EVENTS",
-        "MetricExpr": "100 * (tma_branch_mispredicts + tma_fetch_latency * tma_mispredicts_resteers / (tma_branch_resteers + tma_dsb_switches + tma_icache_misses + tma_itlb_misses + tma_lcp + tma_ms_switches))",
+        "MetricExpr": "100 * (1 - 10 * tma_microcode_sequencer * tma_other_mispredicts / tma_branch_mispredicts) * (tma_branch_mispredicts + tma_fetch_latency * tma_mispredicts_resteers / (tma_branch_resteers + tma_dsb_switches + tma_icache_misses + tma_itlb_misses + tma_lcp + tma_ms_switches))",
         "MetricGroup": "Bad;BadSpec;BrMispredicts;tma_issueBM",
         "MetricName": "tma_info_bottleneck_mispredictions",
         "MetricThreshold": "tma_info_bottleneck_mispredictions > 20",
         "PublicDescription": "Total pipeline cost of Branch Misprediction related bottlenecks. Related metrics: tma_branch_mispredicts, tma_info_bad_spec_branch_misprediction_cost, tma_mispredicts_resteers"
     },
+    {
+        "BriefDescription": "Total pipeline cost of remaining bottlenecks (apart from those listed in the Info.Bottlenecks metrics class)",
+        "MetricExpr": "100 - (tma_info_bottleneck_big_code + tma_info_bottleneck_instruction_fetch_bw + tma_info_bottleneck_mispredictions + tma_info_bottleneck_cache_memory_bandwidth + tma_info_bottleneck_cache_memory_latency + tma_info_bottleneck_memory_data_tlbs + tma_info_bottleneck_memory_synchronization + tma_info_bottleneck_compute_bound_est + tma_info_bottleneck_irregular_overhead + tma_info_bottleneck_branching_overhead + tma_info_bottleneck_base_non_br)",
+        "MetricGroup": "Cor;Offcore",
+        "MetricName": "tma_info_bottleneck_other_bottlenecks",
+        "MetricThreshold": "tma_info_bottleneck_other_bottlenecks > 20",
+        "PublicDescription": "Total pipeline cost of remaining bottlenecks (apart from those listed in the Info.Bottlenecks metrics class). Examples include data-dependencies (Core Bound when Low ILP) and other unlisted memory-related stalls."
+    },
     {
         "BriefDescription": "Fraction of branches that are CALL or RET",
         "MetricExpr": "(BR_INST_RETIRED.NEAR_CALL + BR_INST_RETIRED.NEAR_RETURN) / BR_INST_RETIRED.ALL_BRANCHES",
     {
         "BriefDescription": "Fraction of branches that are unconditional (direct or indirect) jumps",
         "MetricConstraint": "NO_GROUP_EVENTS",
-        "MetricExpr": "(BR_INST_RETIRED.NEAR_TAKEN - (BR_INST_RETIRED.CONDITIONAL - BR_INST_RETIRED.NOT_TAKEN) - 2 * BR_INST_RETIRED.NEAR_CALL) / BR_INST_RETIRED.ALL_BRANCHES",
+        "MetricExpr": "(BR_INST_RETIRED.NEAR_TAKEN - (BR_INST_RETIRED.COND - BR_INST_RETIRED.NOT_TAKEN) - 2 * BR_INST_RETIRED.NEAR_CALL) / BR_INST_RETIRED.ALL_BRANCHES",
         "MetricGroup": "Bad;Branches",
         "MetricName": "tma_info_branches_jump"
     },
         "MetricGroup": "Ret;SMT;TmaL1;tma_L1_group",
         "MetricName": "tma_info_core_coreipc"
     },
+    {
+        "BriefDescription": "uops Executed per Cycle",
+        "MetricExpr": "UOPS_EXECUTED.THREAD / tma_info_thread_clks",
+        "MetricGroup": "Power",
+        "MetricName": "tma_info_core_epc"
+    },
     {
         "BriefDescription": "Floating Point Operations Per Cycle",
         "MetricConstraint": "NO_GROUP_EVENTS",
-        "MetricExpr": "(FP_ARITH_INST_RETIRED.SCALAR_SINGLE + FP_ARITH_INST_RETIRED.SCALAR_DOUBLE + 2 * FP_ARITH_INST_RETIRED.128B_PACKED_DOUBLE + 4 * (FP_ARITH_INST_RETIRED.128B_PACKED_SINGLE + FP_ARITH_INST_RETIRED.256B_PACKED_DOUBLE) + 8 * FP_ARITH_INST_RETIRED.256B_PACKED_SINGLE) / tma_info_core_core_clks",
+        "MetricExpr": "(FP_ARITH_INST_RETIRED.SCALAR + 2 * FP_ARITH_INST_RETIRED.128B_PACKED_DOUBLE + 4 * FP_ARITH_INST_RETIRED.4_FLOPS + 8 * FP_ARITH_INST_RETIRED.256B_PACKED_SINGLE) / tma_info_core_core_clks",
         "MetricGroup": "Flops;Ret",
         "MetricName": "tma_info_core_flopc"
     },
         "PublicDescription": "Actual per-core usage of the Floating Point non-X87 execution units (regardless of precision or vector-width). Values > 1 are possible due to ([BDW+] Fused-Multiply Add (FMA) counting - common; [ADL+] use all of ADD/MUL/FMA in Scalar or 128/256-bit vectors - less common)."
     },
     {
-        "BriefDescription": "Instruction-Level-Parallelism (average number of uops executed when there is execution) per-core",
-        "MetricExpr": "UOPS_EXECUTED.THREAD / (UOPS_EXECUTED.CORE_CYCLES_GE_1 / 2 if #SMT_on else UOPS_EXECUTED.CORE_CYCLES_GE_1)",
+        "BriefDescription": "Instruction-Level-Parallelism (average number of uops executed when there is execution) per thread (logical-processor)",
+        "MetricExpr": "UOPS_EXECUTED.THREAD / cpu@UOPS_EXECUTED.THREAD\\,cmask\\=1@",
         "MetricGroup": "Backend;Cor;Pipeline;PortsUtil",
         "MetricName": "tma_info_core_ilp"
     },
         "MetricGroup": "Flops;InsType",
         "MetricName": "tma_info_inst_mix_iparith",
         "MetricThreshold": "tma_info_inst_mix_iparith < 10",
-        "PublicDescription": "Instructions per FP Arithmetic instruction (lower number means higher occurrence rate). May undercount due to FMA double counting. Approximated prior to BDW."
+        "PublicDescription": "Instructions per FP Arithmetic instruction (lower number means higher occurrence rate). Values < 1 are possible due to intentional FMA double counting. Approximated prior to BDW."
     },
     {
         "BriefDescription": "Instructions per FP Arithmetic AVX/SSE 128-bit instruction (lower number means higher occurrence rate)",
         "MetricGroup": "Flops;FpVector;InsType",
         "MetricName": "tma_info_inst_mix_iparith_avx128",
         "MetricThreshold": "tma_info_inst_mix_iparith_avx128 < 10",
-        "PublicDescription": "Instructions per FP Arithmetic AVX/SSE 128-bit instruction (lower number means higher occurrence rate). May undercount due to FMA double counting."
+        "PublicDescription": "Instructions per FP Arithmetic AVX/SSE 128-bit instruction (lower number means higher occurrence rate). Values < 1 are possible due to intentional FMA double counting."
     },
     {
         "BriefDescription": "Instructions per FP Arithmetic AVX* 256-bit instruction (lower number means higher occurrence rate)",
         "MetricGroup": "Flops;FpVector;InsType",
         "MetricName": "tma_info_inst_mix_iparith_avx256",
         "MetricThreshold": "tma_info_inst_mix_iparith_avx256 < 10",
-        "PublicDescription": "Instructions per FP Arithmetic AVX* 256-bit instruction (lower number means higher occurrence rate). May undercount due to FMA double counting."
+        "PublicDescription": "Instructions per FP Arithmetic AVX* 256-bit instruction (lower number means higher occurrence rate). Values < 1 are possible due to intentional FMA double counting."
     },
     {
         "BriefDescription": "Instructions per FP Arithmetic Scalar Double-Precision instruction (lower number means higher occurrence rate)",
         "MetricGroup": "Flops;FpScalar;InsType",
         "MetricName": "tma_info_inst_mix_iparith_scalar_dp",
         "MetricThreshold": "tma_info_inst_mix_iparith_scalar_dp < 10",
-        "PublicDescription": "Instructions per FP Arithmetic Scalar Double-Precision instruction (lower number means higher occurrence rate). May undercount due to FMA double counting."
+        "PublicDescription": "Instructions per FP Arithmetic Scalar Double-Precision instruction (lower number means higher occurrence rate). Values < 1 are possible due to intentional FMA double counting."
     },
     {
         "BriefDescription": "Instructions per FP Arithmetic Scalar Single-Precision instruction (lower number means higher occurrence rate)",
         "MetricGroup": "Flops;FpScalar;InsType",
         "MetricName": "tma_info_inst_mix_iparith_scalar_sp",
         "MetricThreshold": "tma_info_inst_mix_iparith_scalar_sp < 10",
-        "PublicDescription": "Instructions per FP Arithmetic Scalar Single-Precision instruction (lower number means higher occurrence rate). May undercount due to FMA double counting."
+        "PublicDescription": "Instructions per FP Arithmetic Scalar Single-Precision instruction (lower number means higher occurrence rate). Values < 1 are possible due to intentional FMA double counting."
     },
     {
         "BriefDescription": "Instructions per Branch (lower number means higher occurrence rate)",
     {
         "BriefDescription": "Instructions per Floating Point (FP) Operation (lower number means higher occurrence rate)",
         "MetricConstraint": "NO_GROUP_EVENTS",
-        "MetricExpr": "INST_RETIRED.ANY / (FP_ARITH_INST_RETIRED.SCALAR_SINGLE + FP_ARITH_INST_RETIRED.SCALAR_DOUBLE + 2 * FP_ARITH_INST_RETIRED.128B_PACKED_DOUBLE + 4 * (FP_ARITH_INST_RETIRED.128B_PACKED_SINGLE + FP_ARITH_INST_RETIRED.256B_PACKED_DOUBLE) + 8 * FP_ARITH_INST_RETIRED.256B_PACKED_SINGLE)",
+        "MetricExpr": "INST_RETIRED.ANY / (FP_ARITH_INST_RETIRED.SCALAR + 2 * FP_ARITH_INST_RETIRED.128B_PACKED_DOUBLE + 4 * FP_ARITH_INST_RETIRED.4_FLOPS + 8 * FP_ARITH_INST_RETIRED.256B_PACKED_SINGLE)",
         "MetricGroup": "Flops;InsType",
         "MetricName": "tma_info_inst_mix_ipflop",
         "MetricThreshold": "tma_info_inst_mix_ipflop < 10"
     },
     {
         "BriefDescription": "Average per-core data fill bandwidth to the L1 data cache [GB / sec]",
-        "MetricExpr": "64 * L1D.REPLACEMENT / 1e9 / duration_time",
+        "MetricExpr": "tma_info_memory_l1d_cache_fill_bw",
         "MetricGroup": "Mem;MemoryBW",
-        "MetricName": "tma_info_memory_core_l1d_cache_fill_bw"
+        "MetricName": "tma_info_memory_core_l1d_cache_fill_bw_2t"
     },
     {
         "BriefDescription": "Average per-core data fill bandwidth to the L2 cache [GB / sec]",
-        "MetricExpr": "64 * L2_LINES_IN.ALL / 1e9 / duration_time",
+        "MetricExpr": "tma_info_memory_l2_cache_fill_bw",
         "MetricGroup": "Mem;MemoryBW",
-        "MetricName": "tma_info_memory_core_l2_cache_fill_bw"
+        "MetricName": "tma_info_memory_core_l2_cache_fill_bw_2t"
     },
     {
         "BriefDescription": "Average per-core data access bandwidth to the L3 cache [GB / sec]",
-        "MetricExpr": "64 * OFFCORE_REQUESTS.ALL_REQUESTS / 1e9 / duration_time",
+        "MetricExpr": "tma_info_memory_l3_cache_access_bw",
         "MetricGroup": "Mem;MemoryBW;Offcore",
-        "MetricName": "tma_info_memory_core_l3_cache_access_bw"
+        "MetricName": "tma_info_memory_core_l3_cache_access_bw_2t"
     },
     {
         "BriefDescription": "Average per-core data fill bandwidth to the L3 cache [GB / sec]",
-        "MetricExpr": "64 * LONGEST_LAT_CACHE.MISS / 1e9 / duration_time",
+        "MetricExpr": "tma_info_memory_l3_cache_fill_bw",
         "MetricGroup": "Mem;MemoryBW",
-        "MetricName": "tma_info_memory_core_l3_cache_fill_bw"
+        "MetricName": "tma_info_memory_core_l3_cache_fill_bw_2t"
     },
     {
         "BriefDescription": "Fill Buffer (FB) hits per kilo instructions for retired demand loads (L1D misses that merge into ongoing miss-handling entries)",
         "MetricExpr": "1e3 * MEM_LOAD_RETIRED.FB_HIT / INST_RETIRED.ANY",
-        "MetricGroup": "CacheMisses;Mem",
+        "MetricGroup": "CacheHits;Mem",
         "MetricName": "tma_info_memory_fb_hpki"
     },
+    {
+        "BriefDescription": "",
+        "MetricExpr": "64 * L1D.REPLACEMENT / 1e9 / duration_time",
+        "MetricGroup": "Mem;MemoryBW",
+        "MetricName": "tma_info_memory_l1d_cache_fill_bw"
+    },
     {
         "BriefDescription": "L1 cache true misses per kilo instruction for retired demand loads",
         "MetricExpr": "1e3 * MEM_LOAD_RETIRED.L1_MISS / INST_RETIRED.ANY",
-        "MetricGroup": "CacheMisses;Mem",
+        "MetricGroup": "CacheHits;Mem",
         "MetricName": "tma_info_memory_l1mpki"
     },
     {
         "BriefDescription": "L1 cache true misses per kilo instruction for all demand loads (including speculative)",
         "MetricExpr": "1e3 * L2_RQSTS.ALL_DEMAND_DATA_RD / INST_RETIRED.ANY",
-        "MetricGroup": "CacheMisses;Mem",
+        "MetricGroup": "CacheHits;Mem",
         "MetricName": "tma_info_memory_l1mpki_load"
     },
+    {
+        "BriefDescription": "",
+        "MetricExpr": "64 * L2_LINES_IN.ALL / 1e9 / duration_time",
+        "MetricGroup": "Mem;MemoryBW",
+        "MetricName": "tma_info_memory_l2_cache_fill_bw"
+    },
     {
         "BriefDescription": "L2 cache hits per kilo instruction for all request types (including speculative)",
         "MetricExpr": "1e3 * (L2_RQSTS.REFERENCES - L2_RQSTS.MISS) / INST_RETIRED.ANY",
-        "MetricGroup": "CacheMisses;Mem",
+        "MetricGroup": "CacheHits;Mem",
         "MetricName": "tma_info_memory_l2hpki_all"
     },
     {
         "BriefDescription": "L2 cache hits per kilo instruction for all demand loads  (including speculative)",
         "MetricExpr": "1e3 * L2_RQSTS.DEMAND_DATA_RD_HIT / INST_RETIRED.ANY",
-        "MetricGroup": "CacheMisses;Mem",
+        "MetricGroup": "CacheHits;Mem",
         "MetricName": "tma_info_memory_l2hpki_load"
     },
     {
         "BriefDescription": "L2 cache true misses per kilo instruction for retired demand loads",
         "MetricExpr": "1e3 * MEM_LOAD_RETIRED.L2_MISS / INST_RETIRED.ANY",
-        "MetricGroup": "Backend;CacheMisses;Mem",
+        "MetricGroup": "Backend;CacheHits;Mem",
         "MetricName": "tma_info_memory_l2mpki"
     },
     {
         "BriefDescription": "L2 cache ([RKL+] true) misses per kilo instruction for all request types (including speculative)",
         "MetricExpr": "1e3 * L2_RQSTS.MISS / INST_RETIRED.ANY",
-        "MetricGroup": "CacheMisses;Mem;Offcore",
+        "MetricGroup": "CacheHits;Mem;Offcore",
         "MetricName": "tma_info_memory_l2mpki_all"
     },
     {
         "BriefDescription": "L2 cache ([RKL+] true) misses per kilo instruction for all demand loads  (including speculative)",
         "MetricExpr": "1e3 * L2_RQSTS.DEMAND_DATA_RD_MISS / INST_RETIRED.ANY",
-        "MetricGroup": "CacheMisses;Mem",
+        "MetricGroup": "CacheHits;Mem",
         "MetricName": "tma_info_memory_l2mpki_load"
     },
     {
-        "BriefDescription": "L3 cache true misses per kilo instruction for retired demand loads",
-        "MetricExpr": "1e3 * MEM_LOAD_RETIRED.L3_MISS / INST_RETIRED.ANY",
-        "MetricGroup": "CacheMisses;Mem",
-        "MetricName": "tma_info_memory_l3mpki"
+        "BriefDescription": "",
+        "MetricExpr": "64 * OFFCORE_REQUESTS.ALL_REQUESTS / 1e9 / duration_time",
+        "MetricGroup": "Mem;MemoryBW;Offcore",
+        "MetricName": "tma_info_memory_l3_cache_access_bw"
     },
     {
-        "BriefDescription": "Actual Average Latency for L1 data-cache miss demand load operations (in core cycles)",
-        "MetricExpr": "L1D_PEND_MISS.PENDING / (MEM_LOAD_RETIRED.L1_MISS + MEM_LOAD_RETIRED.FB_HIT)",
-        "MetricGroup": "Mem;MemoryBound;MemoryLat",
-        "MetricName": "tma_info_memory_load_miss_real_latency"
+        "BriefDescription": "",
+        "MetricExpr": "64 * LONGEST_LAT_CACHE.MISS / 1e9 / duration_time",
+        "MetricGroup": "Mem;MemoryBW",
+        "MetricName": "tma_info_memory_l3_cache_fill_bw"
     },
     {
-        "BriefDescription": "Memory-Level-Parallelism (average number of L1 miss demand load when there is at least one such miss",
-        "MetricExpr": "L1D_PEND_MISS.PENDING / L1D_PEND_MISS.PENDING_CYCLES",
-        "MetricGroup": "Mem;MemoryBW;MemoryBound",
-        "MetricName": "tma_info_memory_mlp",
-        "PublicDescription": "Memory-Level-Parallelism (average number of L1 miss demand load when there is at least one such miss. Per-Logical Processor)"
+        "BriefDescription": "L3 cache true misses per kilo instruction for retired demand loads",
+        "MetricExpr": "1e3 * MEM_LOAD_RETIRED.L3_MISS / INST_RETIRED.ANY",
+        "MetricGroup": "Mem",
+        "MetricName": "tma_info_memory_l3mpki"
     },
     {
         "BriefDescription": "Average Parallel L2 cache miss data reads",
         "MetricExpr": "OFFCORE_REQUESTS_OUTSTANDING.ALL_DATA_RD / OFFCORE_REQUESTS_OUTSTANDING.CYCLES_WITH_DATA_RD",
         "MetricGroup": "Memory_BW;Offcore",
-        "MetricName": "tma_info_memory_oro_data_l2_mlp"
+        "MetricName": "tma_info_memory_latency_data_l2_mlp"
     },
     {
         "BriefDescription": "Average Latency for L2 cache miss demand Loads",
         "MetricExpr": "OFFCORE_REQUESTS_OUTSTANDING.DEMAND_DATA_RD / OFFCORE_REQUESTS.DEMAND_DATA_RD",
         "MetricGroup": "Memory_Lat;Offcore",
-        "MetricName": "tma_info_memory_oro_load_l2_miss_latency"
+        "MetricName": "tma_info_memory_latency_load_l2_miss_latency"
     },
     {
         "BriefDescription": "Average Parallel L2 cache miss demand Loads",
         "MetricExpr": "OFFCORE_REQUESTS_OUTSTANDING.DEMAND_DATA_RD / OFFCORE_REQUESTS_OUTSTANDING.CYCLES_WITH_DEMAND_DATA_RD",
         "MetricGroup": "Memory_BW;Offcore",
-        "MetricName": "tma_info_memory_oro_load_l2_mlp"
-    },
-    {
-        "BriefDescription": "Average per-thread data fill bandwidth to the L1 data cache [GB / sec]",
-        "MetricExpr": "tma_info_memory_core_l1d_cache_fill_bw",
-        "MetricGroup": "Mem;MemoryBW",
-        "MetricName": "tma_info_memory_thread_l1d_cache_fill_bw_1t"
+        "MetricName": "tma_info_memory_latency_load_l2_mlp"
     },
     {
-        "BriefDescription": "Average per-thread data fill bandwidth to the L2 cache [GB / sec]",
-        "MetricExpr": "tma_info_memory_core_l2_cache_fill_bw",
-        "MetricGroup": "Mem;MemoryBW",
-        "MetricName": "tma_info_memory_thread_l2_cache_fill_bw_1t"
+        "BriefDescription": "Actual Average Latency for L1 data-cache miss demand load operations (in core cycles)",
+        "MetricExpr": "L1D_PEND_MISS.PENDING / (MEM_LOAD_RETIRED.L1_MISS + MEM_LOAD_RETIRED.FB_HIT)",
+        "MetricGroup": "Mem;MemoryBound;MemoryLat",
+        "MetricName": "tma_info_memory_load_miss_real_latency"
     },
     {
-        "BriefDescription": "Average per-thread data access bandwidth to the L3 cache [GB / sec]",
-        "MetricExpr": "tma_info_memory_core_l3_cache_access_bw",
-        "MetricGroup": "Mem;MemoryBW;Offcore",
-        "MetricName": "tma_info_memory_thread_l3_cache_access_bw_1t"
+        "BriefDescription": "Un-cacheable retired load per kilo instruction",
+        "MetricExpr": "1e3 * MEM_LOAD_MISC_RETIRED.UC / INST_RETIRED.ANY",
+        "MetricGroup": "Mem",
+        "MetricName": "tma_info_memory_mix_uc_load_pki"
     },
     {
-        "BriefDescription": "Average per-thread data fill bandwidth to the L3 cache [GB / sec]",
-        "MetricExpr": "tma_info_memory_core_l3_cache_fill_bw",
-        "MetricGroup": "Mem;MemoryBW",
-        "MetricName": "tma_info_memory_thread_l3_cache_fill_bw_1t"
+        "BriefDescription": "Memory-Level-Parallelism (average number of L1 miss demand load when there is at least one such miss",
+        "MetricExpr": "L1D_PEND_MISS.PENDING / L1D_PEND_MISS.PENDING_CYCLES",
+        "MetricGroup": "Mem;MemoryBW;MemoryBound",
+        "MetricName": "tma_info_memory_mlp",
+        "PublicDescription": "Memory-Level-Parallelism (average number of L1 miss demand load when there is at least one such miss. Per-Logical Processor)"
     },
     {
         "BriefDescription": "STLB (2nd level TLB) code speculative misses per kilo instruction (misses of any page-size that complete the page walk)",
         "MetricName": "tma_info_memory_tlb_store_stlb_mpki"
     },
     {
-        "BriefDescription": "Instruction-Level-Parallelism (average number of uops executed when there is execution) per-thread",
-        "MetricExpr": "UOPS_EXECUTED.THREAD / cpu@UOPS_EXECUTED.THREAD\\,cmask\\=1@",
+        "BriefDescription": "",
+        "MetricExpr": "UOPS_EXECUTED.THREAD / (UOPS_EXECUTED.CORE_CYCLES_GE_1 / 2 if #SMT_on else cpu@UOPS_EXECUTED.THREAD\\,cmask\\=1@)",
         "MetricGroup": "Cor;Pipeline;PortsUtil;SMT",
         "MetricName": "tma_info_pipeline_execute"
     },
+    {
+        "BriefDescription": "Instructions per a microcode Assist invocation",
+        "MetricExpr": "INST_RETIRED.ANY / (FP_ASSIST.ANY + OTHER_ASSISTS.ANY)",
+        "MetricGroup": "MicroSeq;Pipeline;Ret;Retire",
+        "MetricName": "tma_info_pipeline_ipassist",
+        "MetricThreshold": "tma_info_pipeline_ipassist < 100e3",
+        "PublicDescription": "Instructions per a microcode Assist invocation. See Assists tree node for details (lower number means higher occurrence rate)"
+    },
     {
         "BriefDescription": "Average number of Uops retired in cycles where at least one uop has retired.",
         "MetricExpr": "UOPS_RETIRED.RETIRE_SLOTS / cpu@UOPS_RETIRED.RETIRE_SLOTS\\,cmask\\=1@",
         "MetricName": "tma_info_pipeline_retire"
     },
     {
-        "BriefDescription": "Measured Average Frequency for unhalted processors [GHz]",
+        "BriefDescription": "Measured Average Core Frequency for unhalted processors [GHz]",
         "MetricExpr": "tma_info_system_turbo_utilization * TSC / 1e9 / duration_time",
         "MetricGroup": "Power;Summary",
-        "MetricName": "tma_info_system_average_frequency"
+        "MetricName": "tma_info_system_core_frequency"
     },
     {
-        "BriefDescription": "Average CPU Utilization",
+        "BriefDescription": "Average CPU Utilization (percentage)",
         "MetricExpr": "CPU_CLK_UNHALTED.REF_TSC / TSC",
         "MetricGroup": "HPC;Summary",
         "MetricName": "tma_info_system_cpu_utilization"
     },
+    {
+        "BriefDescription": "Average number of utilized CPUs",
+        "MetricExpr": "#num_cpus_online * tma_info_system_cpu_utilization",
+        "MetricGroup": "Summary",
+        "MetricName": "tma_info_system_cpus_utilized"
+    },
     {
         "BriefDescription": "Average external Memory Bandwidth Use for reads and writes [GB / sec]",
         "MetricExpr": "64 * (UNC_ARB_TRK_REQUESTS.ALL + UNC_ARB_COH_TRK_REQUESTS.ALL) / 1e6 / duration_time / 1e3",
-        "MetricGroup": "HPC;Mem;MemoryBW;SoC;tma_issueBW",
+        "MetricGroup": "HPC;MemOffcore;MemoryBW;SoC;tma_issueBW",
         "MetricName": "tma_info_system_dram_bw_use",
-        "PublicDescription": "Average external Memory Bandwidth Use for reads and writes [GB / sec]. Related metrics: tma_fb_full, tma_info_bottleneck_memory_bandwidth, tma_mem_bandwidth, tma_sq_full"
+        "PublicDescription": "Average external Memory Bandwidth Use for reads and writes [GB / sec]. Related metrics: tma_fb_full, tma_info_bottleneck_cache_memory_bandwidth, tma_mem_bandwidth, tma_sq_full"
     },
     {
         "BriefDescription": "Giga Floating Point Operations Per Second",
         "MetricConstraint": "NO_GROUP_EVENTS",
-        "MetricExpr": "(FP_ARITH_INST_RETIRED.SCALAR_SINGLE + FP_ARITH_INST_RETIRED.SCALAR_DOUBLE + 2 * FP_ARITH_INST_RETIRED.128B_PACKED_DOUBLE + 4 * (FP_ARITH_INST_RETIRED.128B_PACKED_SINGLE + FP_ARITH_INST_RETIRED.256B_PACKED_DOUBLE) + 8 * FP_ARITH_INST_RETIRED.256B_PACKED_SINGLE) / 1e9 / duration_time",
+        "MetricExpr": "(FP_ARITH_INST_RETIRED.SCALAR + 2 * FP_ARITH_INST_RETIRED.128B_PACKED_DOUBLE + 4 * FP_ARITH_INST_RETIRED.4_FLOPS + 8 * FP_ARITH_INST_RETIRED.256B_PACKED_SINGLE) / 1e9 / duration_time",
         "MetricGroup": "Cor;Flops;HPC",
         "MetricName": "tma_info_system_gflops",
-        "PublicDescription": "Giga Floating Point Operations Per Second. Aggregate across all supported options of: FP precisions, scalar and vector instructions, vector-width and AMX engine."
+        "PublicDescription": "Giga Floating Point Operations Per Second. Aggregate across all supported options of: FP precisions, scalar and vector instructions, vector-width"
     },
     {
         "BriefDescription": "Instructions per Far Branch ( Far Branches apply upon transition from application to operating system, handling interrupts, exceptions) [lower number means higher occurrence rate]",
         "MetricName": "tma_info_system_mem_parallel_reads",
         "PublicDescription": "Average number of parallel data read requests to external memory. Accounts for demand loads and L1/L2 prefetches"
     },
-    {
-        "BriefDescription": "Average number of parallel requests to external memory",
-        "MetricExpr": "UNC_ARB_TRK_OCCUPANCY.ALL / UNC_ARB_TRK_OCCUPANCY.CYCLES_WITH_ANY_REQUEST",
-        "MetricGroup": "Mem;SoC",
-        "MetricName": "tma_info_system_mem_parallel_requests",
-        "PublicDescription": "Average number of parallel requests to external memory. Accounts for all requests"
-    },
     {
         "BriefDescription": "Average latency of data read request to external memory (in nanoseconds)",
         "MetricExpr": "1e9 * (UNC_ARB_TRK_OCCUPANCY.DATA_READ / UNC_ARB_TRK_REQUESTS.DATA_READ) / (tma_info_system_socket_clks / duration_time)",
         "MetricName": "tma_info_system_mem_read_latency",
         "PublicDescription": "Average latency of data read request to external memory (in nanoseconds). Accounts for demand loads and L1/L2 prefetches. ([RKL+]memory-controller only)"
     },
-    {
-        "BriefDescription": "Average latency of all requests to external memory (in Uncore cycles)",
-        "MetricExpr": "UNC_ARB_TRK_OCCUPANCY.ALL / UNC_ARB_TRK_REQUESTS.ALL",
-        "MetricGroup": "Mem;SoC",
-        "MetricName": "tma_info_system_mem_request_latency"
-    },
     {
         "BriefDescription": "Fraction of cycles where both hardware Logical Processors were active",
         "MetricExpr": "(1 - CPU_CLK_UNHALTED.ONE_THREAD_ACTIVE / (CPU_CLK_UNHALTED.REF_XCLK_ANY / 2) if #SMT_on else 0)",
     },
     {
         "BriefDescription": "This metric represents fraction of cycles the CPU was stalled due to Instruction TLB (ITLB) misses",
-        "MetricExpr": "ICACHE_64B.IFTAG_STALL / tma_info_thread_clks",
-        "MetricGroup": "BigFoot;FetchLat;MemoryTLB;TopdownL3;tma_L3_group;tma_fetch_latency_group",
+        "MetricExpr": "ICACHE_TAG.STALLS / tma_info_thread_clks",
+        "MetricGroup": "BigFootprint;FetchLat;MemoryTLB;TopdownL3;tma_L3_group;tma_fetch_latency_group",
         "MetricName": "tma_itlb_misses",
         "MetricThreshold": "tma_itlb_misses > 0.05 & (tma_fetch_latency > 0.1 & tma_frontend_bound > 0.15)",
         "PublicDescription": "This metric represents fraction of cycles the CPU was stalled due to Instruction TLB (ITLB) misses. Sample with: FRONTEND_RETIRED.STLB_MISS_PS;FRONTEND_RETIRED.ITLB_MISS_PS",
     {
         "BriefDescription": "This metric estimates how often the CPU was stalled without loads missing the L1 data cache",
         "MetricExpr": "max((CYCLE_ACTIVITY.STALLS_MEM_ANY - CYCLE_ACTIVITY.STALLS_L1D_MISS) / tma_info_thread_clks, 0)",
-        "MetricGroup": "CacheMisses;MemoryBound;TmaL3mem;TopdownL3;tma_L3_group;tma_issueL1;tma_issueMC;tma_memory_bound_group",
+        "MetricGroup": "CacheHits;MemoryBound;TmaL3mem;TopdownL3;tma_L3_group;tma_issueL1;tma_issueMC;tma_memory_bound_group",
         "MetricName": "tma_l1_bound",
         "MetricThreshold": "tma_l1_bound > 0.1 & (tma_memory_bound > 0.2 & tma_backend_bound > 0.2)",
         "PublicDescription": "This metric estimates how often the CPU was stalled without loads missing the L1 data cache.  The L1 data cache typically has the shortest latency.  However; in certain cases like loads blocked on older stores; a load might suffer due to high latency even though it is being satisfied by the L1. Another example is loads who miss in the TLB. These cases are characterized by execution unit stalls; while some non-completed demand load lives in the machine without having that demand load missing the L1 cache. Sample with: MEM_LOAD_RETIRED.L1_HIT_PS;MEM_LOAD_RETIRED.FB_HIT_PS. Related metrics: tma_clears_resteers, tma_machine_clears, tma_microcode_sequencer, tma_ms_switches, tma_ports_utilized_1",
         "BriefDescription": "This metric estimates how often the CPU was stalled due to L2 cache accesses by loads",
         "MetricConstraint": "NO_GROUP_EVENTS",
         "MetricExpr": "MEM_LOAD_RETIRED.L2_HIT * (1 + MEM_LOAD_RETIRED.FB_HIT / MEM_LOAD_RETIRED.L1_MISS) / (MEM_LOAD_RETIRED.L2_HIT * (1 + MEM_LOAD_RETIRED.FB_HIT / MEM_LOAD_RETIRED.L1_MISS) + cpu@L1D_PEND_MISS.FB_FULL\\,cmask\\=1@) * ((CYCLE_ACTIVITY.STALLS_L1D_MISS - CYCLE_ACTIVITY.STALLS_L2_MISS) / tma_info_thread_clks)",
-        "MetricGroup": "CacheMisses;MemoryBound;TmaL3mem;TopdownL3;tma_L3_group;tma_memory_bound_group",
+        "MetricGroup": "CacheHits;MemoryBound;TmaL3mem;TopdownL3;tma_L3_group;tma_memory_bound_group",
         "MetricName": "tma_l2_bound",
         "MetricThreshold": "tma_l2_bound > 0.05 & (tma_memory_bound > 0.2 & tma_backend_bound > 0.2)",
         "PublicDescription": "This metric estimates how often the CPU was stalled due to L2 cache accesses by loads.  Avoiding cache misses (i.e. L1 misses/L2 hits) can improve the latency and increase performance. Sample with: MEM_LOAD_RETIRED.L2_HIT_PS",
     {
         "BriefDescription": "This metric estimates how often the CPU was stalled due to loads accesses to L3 cache or contended with a sibling Core",
         "MetricExpr": "(CYCLE_ACTIVITY.STALLS_L2_MISS - CYCLE_ACTIVITY.STALLS_L3_MISS) / tma_info_thread_clks",
-        "MetricGroup": "CacheMisses;MemoryBound;TmaL3mem;TopdownL3;tma_L3_group;tma_memory_bound_group",
+        "MetricGroup": "CacheHits;MemoryBound;TmaL3mem;TopdownL3;tma_L3_group;tma_memory_bound_group",
         "MetricName": "tma_l3_bound",
         "MetricThreshold": "tma_l3_bound > 0.05 & (tma_memory_bound > 0.2 & tma_backend_bound > 0.2)",
         "PublicDescription": "This metric estimates how often the CPU was stalled due to loads accesses to L3 cache or contended with a sibling Core.  Avoiding cache misses (i.e. L2 misses/L3 hits) can improve the latency and increase performance. Sample with: MEM_LOAD_RETIRED.L3_HIT_PS",
         "ScaleUnit": "100%"
     },
     {
-        "BriefDescription": "This metric represents fraction of cycles with demand load accesses that hit the L3 cache under unloaded scenarios (possibly L3 latency limited)",
-        "MetricExpr": "6.5 * tma_info_system_average_frequency * MEM_LOAD_RETIRED.L3_HIT * (1 + MEM_LOAD_RETIRED.FB_HIT / MEM_LOAD_RETIRED.L1_MISS / 2) / tma_info_thread_clks",
+        "BriefDescription": "This metric estimates fraction of cycles with demand load accesses that hit the L3 cache under unloaded scenarios (possibly L3 latency limited)",
+        "MetricExpr": "6.5 * tma_info_system_core_frequency * (MEM_LOAD_RETIRED.L3_HIT * (1 + MEM_LOAD_RETIRED.FB_HIT / MEM_LOAD_RETIRED.L1_MISS / 2)) / tma_info_thread_clks",
         "MetricGroup": "MemoryLat;TopdownL4;tma_L4_group;tma_issueLat;tma_l3_bound_group",
         "MetricName": "tma_l3_hit_latency",
         "MetricThreshold": "tma_l3_hit_latency > 0.1 & (tma_l3_bound > 0.05 & (tma_memory_bound > 0.2 & tma_backend_bound > 0.2))",
-        "PublicDescription": "This metric represents fraction of cycles with demand load accesses that hit the L3 cache under unloaded scenarios (possibly L3 latency limited).  Avoiding private cache misses (i.e. L2 misses/L3 hits) will improve the latency; reduce contention with sibling physical cores and increase performance.  Note the value of this node may overlap with its siblings. Sample with: MEM_LOAD_RETIRED.L3_HIT_PS. Related metrics: tma_info_bottleneck_memory_latency, tma_mem_latency",
+        "PublicDescription": "This metric estimates fraction of cycles with demand load accesses that hit the L3 cache under unloaded scenarios (possibly L3 latency limited).  Avoiding private cache misses (i.e. L2 misses/L3 hits) will improve the latency; reduce contention with sibling physical cores and increase performance.  Note the value of this node may overlap with its siblings. Sample with: MEM_LOAD_RETIRED.L3_HIT_PS. Related metrics: tma_info_bottleneck_cache_memory_latency, tma_mem_latency",
         "ScaleUnit": "100%"
     },
     {
         "BriefDescription": "This metric represents fraction of cycles CPU was stalled due to Length Changing Prefixes (LCPs)",
-        "MetricExpr": "ILD_STALL.LCP / tma_info_thread_clks",
+        "MetricExpr": "DECODE.LCP / tma_info_thread_clks",
         "MetricGroup": "FetchLat;TopdownL3;tma_L3_group;tma_fetch_latency_group;tma_issueFB",
         "MetricName": "tma_lcp",
         "MetricThreshold": "tma_lcp > 0.05 & (tma_fetch_latency > 0.1 & tma_frontend_bound > 0.15)",
         "MetricName": "tma_light_operations",
         "MetricThreshold": "tma_light_operations > 0.6",
         "MetricgroupNoGroup": "TopdownL2",
-        "PublicDescription": "This metric represents fraction of slots where the CPU was retiring light-weight operations -- instructions that require no more than one uop (micro-operation). This correlates with total number of instructions used by the program. A uops-per-instruction (see UopPI metric) ratio of 1 or less should be expected for decently optimized software running on Intel Core/Xeon products. While this often indicates efficient X86 instructions were executed; high value does not necessarily mean better performance cannot be achieved. Sample with: INST_RETIRED.PREC_DIST",
+        "PublicDescription": "This metric represents fraction of slots where the CPU was retiring light-weight operations -- instructions that require no more than one uop (micro-operation). This correlates with total number of instructions used by the program. A uops-per-instruction (see UopPI metric) ratio of 1 or less should be expected for decently optimized code running on Intel Core/Xeon products. While this often indicates efficient X86 instructions were executed; high value does not necessarily mean better performance cannot be achieved. ([ICL+] Note this may undercount due to approximation using indirect events; [ADL+] .). Sample with: INST_RETIRED.PREC_DIST",
         "ScaleUnit": "100%"
     },
     {
         "ScaleUnit": "100%"
     },
     {
-        "BriefDescription": "This metric estimates fraction of cycles where the core's performance was likely hurt due to approaching bandwidth limits of external memory (DRAM)",
+        "BriefDescription": "This metric estimates fraction of cycles where the core's performance was likely hurt due to approaching bandwidth limits of external memory - DRAM ([SPR-HBM] and/or HBM)",
         "MetricExpr": "min(CPU_CLK_UNHALTED.THREAD, cpu@OFFCORE_REQUESTS_OUTSTANDING.ALL_DATA_RD\\,cmask\\=4@) / tma_info_thread_clks",
         "MetricGroup": "MemoryBW;Offcore;TopdownL4;tma_L4_group;tma_dram_bound_group;tma_issueBW",
         "MetricName": "tma_mem_bandwidth",
         "MetricThreshold": "tma_mem_bandwidth > 0.2 & (tma_dram_bound > 0.1 & (tma_memory_bound > 0.2 & tma_backend_bound > 0.2))",
-        "PublicDescription": "This metric estimates fraction of cycles where the core's performance was likely hurt due to approaching bandwidth limits of external memory (DRAM).  The underlying heuristic assumes that a similar off-core traffic is generated by all IA cores. This metric does not aggregate non-data-read requests by this logical processor; requests from other IA Logical Processors/Physical Cores/sockets; or other non-IA devices like GPU; hence the maximum external memory bandwidth limits may or may not be approached when this metric is flagged (see Uncore counters for that). Related metrics: tma_fb_full, tma_info_bottleneck_memory_bandwidth, tma_info_system_dram_bw_use, tma_sq_full",
+        "PublicDescription": "This metric estimates fraction of cycles where the core's performance was likely hurt due to approaching bandwidth limits of external memory - DRAM ([SPR-HBM] and/or HBM).  The underlying heuristic assumes that a similar off-core traffic is generated by all IA cores. This metric does not aggregate non-data-read requests by this logical processor; requests from other IA Logical Processors/Physical Cores/sockets; or other non-IA devices like GPU; hence the maximum external memory bandwidth limits may or may not be approached when this metric is flagged (see Uncore counters for that). Related metrics: tma_fb_full, tma_info_bottleneck_cache_memory_bandwidth, tma_info_system_dram_bw_use, tma_sq_full",
         "ScaleUnit": "100%"
     },
     {
-        "BriefDescription": "This metric estimates fraction of cycles where the performance was likely hurt due to latency from external memory (DRAM)",
+        "BriefDescription": "This metric estimates fraction of cycles where the performance was likely hurt due to latency from external memory - DRAM ([SPR-HBM] and/or HBM)",
         "MetricExpr": "min(CPU_CLK_UNHALTED.THREAD, OFFCORE_REQUESTS_OUTSTANDING.CYCLES_WITH_DATA_RD) / tma_info_thread_clks - tma_mem_bandwidth",
         "MetricGroup": "MemoryLat;Offcore;TopdownL4;tma_L4_group;tma_dram_bound_group;tma_issueLat",
         "MetricName": "tma_mem_latency",
         "MetricThreshold": "tma_mem_latency > 0.1 & (tma_dram_bound > 0.1 & (tma_memory_bound > 0.2 & tma_backend_bound > 0.2))",
-        "PublicDescription": "This metric estimates fraction of cycles where the performance was likely hurt due to latency from external memory (DRAM).  This metric does not aggregate requests from other Logical Processors/Physical Cores/sockets (see Uncore counters for that). Related metrics: tma_info_bottleneck_memory_latency, tma_l3_hit_latency",
+        "PublicDescription": "This metric estimates fraction of cycles where the performance was likely hurt due to latency from external memory - DRAM ([SPR-HBM] and/or HBM).  This metric does not aggregate requests from other Logical Processors/Physical Cores/sockets (see Uncore counters for that). Related metrics: tma_info_bottleneck_cache_memory_latency, tma_l3_hit_latency",
         "ScaleUnit": "100%"
     },
     {
         "MetricGroup": "MicroSeq;TopdownL3;tma_L3_group;tma_heavy_operations_group;tma_issueMC;tma_issueMS",
         "MetricName": "tma_microcode_sequencer",
         "MetricThreshold": "tma_microcode_sequencer > 0.05 & tma_heavy_operations > 0.1",
-        "PublicDescription": "This metric represents fraction of slots the CPU was retiring uops fetched by the Microcode Sequencer (MS) unit.  The MS is used for CISC instructions not supported by the default decoders (like repeat move strings; or CPUID); or by microcode assists used to address some operation modes (like in Floating Point assists). These cases can often be avoided. Sample with: IDQ.MS_UOPS. Related metrics: tma_clears_resteers, tma_l1_bound, tma_machine_clears, tma_ms_switches",
+        "PublicDescription": "This metric represents fraction of slots the CPU was retiring uops fetched by the Microcode Sequencer (MS) unit.  The MS is used for CISC instructions not supported by the default decoders (like repeat move strings; or CPUID); or by microcode assists used to address some operation modes (like in Floating Point assists). These cases can often be avoided. Sample with: IDQ.MS_UOPS. Related metrics: tma_clears_resteers, tma_info_bottleneck_irregular_overhead, tma_l1_bound, tma_machine_clears, tma_ms_switches",
         "ScaleUnit": "100%"
     },
     {
         "MetricExpr": "(IDQ.ALL_MITE_CYCLES_ANY_UOPS - IDQ.ALL_MITE_CYCLES_4_UOPS) / tma_info_core_core_clks / 2",
         "MetricGroup": "DSBmiss;FetchBW;TopdownL3;tma_L3_group;tma_fetch_bandwidth_group",
         "MetricName": "tma_mite",
-        "MetricThreshold": "tma_mite > 0.1 & (tma_fetch_bandwidth > 0.1 & tma_frontend_bound > 0.15 & tma_info_thread_ipc / 4 > 0.35)",
+        "MetricThreshold": "tma_mite > 0.1 & tma_fetch_bandwidth > 0.2",
         "PublicDescription": "This metric represents Core fraction of cycles in which CPU was likely limited due to the MITE pipeline (the legacy decode pipeline). This pipeline is used for code that was not pre-cached in the DSB or LSD. For example; inefficiencies due to asymmetric decoders; use of long immediate or LCP can manifest as MITE fetch bandwidth bottleneck. Sample with: FRONTEND_RETIRED.ANY_DSB_MISS",
         "ScaleUnit": "100%"
     },
     {
-        "BriefDescription": "The Mixing_Vectors metric gives the percentage of injected blend uops out of all uops issued",
+        "BriefDescription": "This metric estimates penalty in terms of percentage of([SKL+] injected blend uops out of all Uops Issued -- the Count Domain; [ADL+] cycles)",
         "MetricExpr": "UOPS_ISSUED.VECTOR_WIDTH_MISMATCH / UOPS_ISSUED.ANY",
         "MetricGroup": "TopdownL5;tma_L5_group;tma_issueMV;tma_ports_utilized_0_group",
         "MetricName": "tma_mixing_vectors",
         "MetricThreshold": "tma_mixing_vectors > 0.05",
-        "PublicDescription": "The Mixing_Vectors metric gives the percentage of injected blend uops out of all uops issued. Usually a Mixing_Vectors over 5% is worth investigating. Read more in Appendix B1 of the Optimizations Guide for this topic. Related metrics: tma_ms_switches",
+        "PublicDescription": "This metric estimates penalty in terms of percentage of([SKL+] injected blend uops out of all Uops Issued -- the Count Domain; [ADL+] cycles). Usually a Mixing_Vectors over 5% is worth investigating. Read more in Appendix B1 of the Optimizations Guide for this topic. Related metrics: tma_ms_switches",
         "ScaleUnit": "100%"
     },
     {
         "MetricGroup": "FetchLat;MicroSeq;TopdownL3;tma_L3_group;tma_fetch_latency_group;tma_issueMC;tma_issueMS;tma_issueMV;tma_issueSO",
         "MetricName": "tma_ms_switches",
         "MetricThreshold": "tma_ms_switches > 0.05 & (tma_fetch_latency > 0.1 & tma_frontend_bound > 0.15)",
-        "PublicDescription": "This metric estimates the fraction of cycles when the CPU was stalled due to switches of uop delivery to the Microcode Sequencer (MS). Commonly used instructions are optimized for delivery by the DSB (decoded i-cache) or MITE (legacy instruction decode) pipelines. Certain operations cannot be handled natively by the execution pipeline; and must be performed by microcode (small programs injected into the execution stream). Switching to the MS too often can negatively impact performance. The MS is designated to deliver long uop flows required by CISC instructions like CPUID; or uncommon conditions like Floating Point Assists when dealing with Denormals. Sample with: IDQ.MS_SWITCHES. Related metrics: tma_clears_resteers, tma_l1_bound, tma_machine_clears, tma_microcode_sequencer, tma_mixing_vectors, tma_serializing_operation",
+        "PublicDescription": "This metric estimates the fraction of cycles when the CPU was stalled due to switches of uop delivery to the Microcode Sequencer (MS). Commonly used instructions are optimized for delivery by the DSB (decoded i-cache) or MITE (legacy instruction decode) pipelines. Certain operations cannot be handled natively by the execution pipeline; and must be performed by microcode (small programs injected into the execution stream). Switching to the MS too often can negatively impact performance. The MS is designated to deliver long uop flows required by CISC instructions like CPUID; or uncommon conditions like Floating Point Assists when dealing with Denormals. Sample with: IDQ.MS_SWITCHES. Related metrics: tma_clears_resteers, tma_info_bottleneck_irregular_overhead, tma_l1_bound, tma_machine_clears, tma_microcode_sequencer, tma_mixing_vectors, tma_serializing_operation",
         "ScaleUnit": "100%"
     },
     {
         "BriefDescription": "This metric represents fraction of slots where the CPU was retiring branch instructions that were not fused",
         "MetricExpr": "tma_light_operations * (BR_INST_RETIRED.ALL_BRANCHES - UOPS_RETIRED.MACRO_FUSED) / UOPS_RETIRED.RETIRE_SLOTS",
-        "MetricGroup": "Pipeline;TopdownL3;tma_L3_group;tma_light_operations_group",
+        "MetricGroup": "Branches;Pipeline;TopdownL3;tma_L3_group;tma_light_operations_group",
         "MetricName": "tma_non_fused_branches",
         "MetricThreshold": "tma_non_fused_branches > 0.1 & tma_light_operations > 0.6",
         "PublicDescription": "This metric represents fraction of slots where the CPU was retiring branch instructions that were not fused. Non-conditional branches like direct JMP or CALL would count here. Can be used to examine fusible conditional jumps that were not fused.",
     {
         "BriefDescription": "This metric represents fraction of slots where the CPU was retiring NOP (no op) instructions",
         "MetricExpr": "tma_light_operations * INST_RETIRED.NOP / UOPS_RETIRED.RETIRE_SLOTS",
-        "MetricGroup": "Pipeline;TopdownL3;tma_L3_group;tma_light_operations_group",
+        "MetricGroup": "Pipeline;TopdownL4;tma_L4_group;tma_other_light_ops_group",
         "MetricName": "tma_nop_instructions",
-        "MetricThreshold": "tma_nop_instructions > 0.1 & tma_light_operations > 0.6",
+        "MetricThreshold": "tma_nop_instructions > 0.1 & (tma_other_light_ops > 0.3 & tma_light_operations > 0.6)",
         "PublicDescription": "This metric represents fraction of slots where the CPU was retiring NOP (no op) instructions. Compilers often use NOPs for certain address alignments - e.g. start address of a function or loop body. Sample with: INST_RETIRED.NOP",
         "ScaleUnit": "100%"
     },
     {
         "BriefDescription": "This metric represents the remaining light uops fraction the CPU has executed - remaining means not covered by other sibling nodes",
-        "MetricExpr": "max(0, tma_light_operations - (tma_fp_arith + tma_memory_operations + tma_fused_instructions + tma_non_fused_branches + tma_nop_instructions))",
+        "MetricExpr": "max(0, tma_light_operations - (tma_fp_arith + tma_memory_operations + tma_fused_instructions + tma_non_fused_branches))",
         "MetricGroup": "Pipeline;TopdownL3;tma_L3_group;tma_light_operations_group",
         "MetricName": "tma_other_light_ops",
         "MetricThreshold": "tma_other_light_ops > 0.3 & tma_light_operations > 0.6",
         "PublicDescription": "This metric represents the remaining light uops fraction the CPU has executed - remaining means not covered by other sibling nodes. May undercount due to FMA double counting",
         "ScaleUnit": "100%"
     },
+    {
+        "BriefDescription": "This metric estimates fraction of slots the CPU was stalled due to other cases of misprediction (non-retired x86 branches or other types).",
+        "MetricExpr": "max(tma_branch_mispredicts * (1 - BR_MISP_RETIRED.ALL_BRANCHES / (INT_MISC.CLEARS_COUNT - MACHINE_CLEARS.COUNT)), 0.0001)",
+        "MetricGroup": "BrMispredicts;TopdownL3;tma_L3_group;tma_branch_mispredicts_group",
+        "MetricName": "tma_other_mispredicts",
+        "MetricThreshold": "tma_other_mispredicts > 0.05 & (tma_branch_mispredicts > 0.1 & tma_bad_speculation > 0.15)",
+        "ScaleUnit": "100%"
+    },
+    {
+        "BriefDescription": "This metric represents fraction of slots the CPU has wasted due to Nukes (Machine Clears) not related to memory ordering.",
+        "MetricExpr": "max(tma_machine_clears * (1 - MACHINE_CLEARS.MEMORY_ORDERING / MACHINE_CLEARS.COUNT), 0.0001)",
+        "MetricGroup": "Machine_Clears;TopdownL3;tma_L3_group;tma_machine_clears_group",
+        "MetricName": "tma_other_nukes",
+        "MetricThreshold": "tma_other_nukes > 0.05 & (tma_machine_clears > 0.1 & tma_bad_speculation > 0.15)",
+        "ScaleUnit": "100%"
+    },
     {
         "BriefDescription": "This metric represents Core fraction of cycles CPU dispatched uops on execution port 0 ([SNB+] ALU; [HSW+] ALU and 2nd branch)",
         "MetricExpr": "UOPS_DISPATCHED_PORT.PORT_0 / tma_info_core_core_clks",
         "ScaleUnit": "100%"
     },
     {
-        "BriefDescription": "This metric represents Core fraction of cycles CPU dispatched uops on execution port 6 ([HSW+]Primary Branch and simple ALU)",
+        "BriefDescription": "This metric represents Core fraction of cycles CPU dispatched uops on execution port 6 ([HSW+] Primary Branch and simple ALU)",
         "MetricExpr": "UOPS_DISPATCHED_PORT.PORT_6 / tma_info_core_core_clks",
         "MetricGroup": "TopdownL6;tma_L6_group;tma_alu_op_utilization_group;tma_issue2P",
         "MetricName": "tma_port_6",
         "MetricThreshold": "tma_port_6 > 0.6",
-        "PublicDescription": "This metric represents Core fraction of cycles CPU dispatched uops on execution port 6 ([HSW+]Primary Branch and simple ALU). Sample with: UOPS_DISPATCHED_PORT.PORT_6. Related metrics: tma_fp_scalar, tma_fp_vector, tma_fp_vector_128b, tma_fp_vector_256b, tma_fp_vector_512b, tma_port_0, tma_port_1, tma_port_5, tma_ports_utilized_2",
+        "PublicDescription": "This metric represents Core fraction of cycles CPU dispatched uops on execution port 6 ([HSW+] Primary Branch and simple ALU). Sample with: UOPS_DISPATCHED_PORT.PORT_6. Related metrics: tma_fp_scalar, tma_fp_vector, tma_fp_vector_128b, tma_fp_vector_256b, tma_fp_vector_512b, tma_port_0, tma_port_1, tma_port_5, tma_ports_utilized_2",
         "ScaleUnit": "100%"
     },
     {
     },
     {
         "BriefDescription": "This metric estimates fraction of cycles the CPU performance was potentially limited due to Core computation issues (non divider-related)",
-        "MetricExpr": "((EXE_ACTIVITY.EXE_BOUND_0_PORTS + (EXE_ACTIVITY.1_PORTS_UTIL + tma_retiring * EXE_ACTIVITY.2_PORTS_UTIL)) / tma_info_thread_clks if ARITH.DIVIDER_ACTIVE < CYCLE_ACTIVITY.STALLS_TOTAL - CYCLE_ACTIVITY.STALLS_MEM_ANY else (EXE_ACTIVITY.1_PORTS_UTIL + tma_retiring * EXE_ACTIVITY.2_PORTS_UTIL) / tma_info_thread_clks)",
+        "MetricExpr": "((tma_ports_utilized_0 * tma_info_thread_clks + (EXE_ACTIVITY.1_PORTS_UTIL + tma_retiring * EXE_ACTIVITY.2_PORTS_UTIL)) / tma_info_thread_clks if ARITH.DIVIDER_ACTIVE < CYCLE_ACTIVITY.STALLS_TOTAL - CYCLE_ACTIVITY.STALLS_MEM_ANY else (EXE_ACTIVITY.1_PORTS_UTIL + tma_retiring * EXE_ACTIVITY.2_PORTS_UTIL) / tma_info_thread_clks)",
         "MetricGroup": "PortsUtil;TopdownL3;tma_L3_group;tma_core_bound_group",
         "MetricName": "tma_ports_utilization",
         "MetricThreshold": "tma_ports_utilization > 0.15 & (tma_core_bound > 0.1 & tma_backend_bound > 0.2)",
     },
     {
         "BriefDescription": "This metric represents fraction of cycles CPU executed no uops on any execution port (Logical Processor cycles since ICL, Physical Core cycles otherwise)",
-        "MetricExpr": "(UOPS_EXECUTED.CORE_CYCLES_NONE / 2 if #SMT_on else CYCLE_ACTIVITY.STALLS_TOTAL - CYCLE_ACTIVITY.STALLS_MEM_ANY) / tma_info_core_core_clks",
+        "MetricExpr": "(EXE_ACTIVITY.EXE_BOUND_0_PORTS + tma_core_bound * RS_EVENTS.EMPTY_CYCLES) / tma_info_thread_clks * (CYCLE_ACTIVITY.STALLS_TOTAL - CYCLE_ACTIVITY.STALLS_MEM_ANY) / tma_info_thread_clks",
         "MetricGroup": "PortsUtil;TopdownL4;tma_L4_group;tma_ports_utilization_group",
         "MetricName": "tma_ports_utilized_0",
         "MetricThreshold": "tma_ports_utilized_0 > 0.2 & (tma_ports_utilization > 0.15 & (tma_core_bound > 0.1 & tma_backend_bound > 0.2))",
         "MetricExpr": "(UOPS_EXECUTED.CORE_CYCLES_GE_3 / 2 if #SMT_on else UOPS_EXECUTED.CORE_CYCLES_GE_3) / tma_info_core_core_clks",
         "MetricGroup": "PortsUtil;TopdownL4;tma_L4_group;tma_ports_utilization_group",
         "MetricName": "tma_ports_utilized_3m",
-        "MetricThreshold": "tma_ports_utilized_3m > 0.7 & (tma_ports_utilization > 0.15 & (tma_core_bound > 0.1 & tma_backend_bound > 0.2))",
+        "MetricThreshold": "tma_ports_utilized_3m > 0.4 & (tma_ports_utilization > 0.15 & (tma_core_bound > 0.1 & tma_backend_bound > 0.2))",
         "ScaleUnit": "100%"
     },
     {
     {
         "BriefDescription": "This metric represents fraction of cycles the CPU issue-pipeline was stalled due to serializing operations",
         "MetricExpr": "PARTIAL_RAT_STALLS.SCOREBOARD / tma_info_thread_clks",
-        "MetricGroup": "PortsUtil;TopdownL5;tma_L5_group;tma_issueSO;tma_ports_utilized_0_group",
+        "MetricGroup": "PortsUtil;TopdownL3;tma_L3_group;tma_core_bound_group;tma_issueSO",
         "MetricName": "tma_serializing_operation",
-        "MetricThreshold": "tma_serializing_operation > 0.1 & (tma_ports_utilized_0 > 0.2 & (tma_ports_utilization > 0.15 & (tma_core_bound > 0.1 & tma_backend_bound > 0.2)))",
+        "MetricThreshold": "tma_serializing_operation > 0.1 & (tma_core_bound > 0.1 & tma_backend_bound > 0.2)",
         "PublicDescription": "This metric represents fraction of cycles the CPU issue-pipeline was stalled due to serializing operations. Instructions like CPUID; WRMSR or LFENCE serialize the out-of-order execution which may limit performance. Sample with: PARTIAL_RAT_STALLS.SCOREBOARD. Related metrics: tma_ms_switches",
         "ScaleUnit": "100%"
     },
         "MetricGroup": "MemoryBW;Offcore;TopdownL4;tma_L4_group;tma_issueBW;tma_l3_bound_group",
         "MetricName": "tma_sq_full",
         "MetricThreshold": "tma_sq_full > 0.3 & (tma_l3_bound > 0.05 & (tma_memory_bound > 0.2 & tma_backend_bound > 0.2))",
-        "PublicDescription": "This metric measures fraction of cycles where the Super Queue (SQ) was full taking into account all request-types and both hardware SMT threads (Logical Processors). Related metrics: tma_fb_full, tma_info_bottleneck_memory_bandwidth, tma_info_system_dram_bw_use, tma_mem_bandwidth",
+        "PublicDescription": "This metric measures fraction of cycles where the Super Queue (SQ) was full taking into account all request-types and both hardware SMT threads (Logical Processors). Related metrics: tma_fb_full, tma_info_bottleneck_cache_memory_bandwidth, tma_info_system_dram_bw_use, tma_mem_bandwidth",
         "ScaleUnit": "100%"
     },
     {
     {
         "BriefDescription": "This metric represents fraction of cycles the CPU was stalled due to new branch address clears",
         "MetricExpr": "9 * BACLEARS.ANY / tma_info_thread_clks",
-        "MetricGroup": "BigFoot;FetchLat;TopdownL4;tma_L4_group;tma_branch_resteers_group",
+        "MetricGroup": "BigFootprint;FetchLat;TopdownL4;tma_L4_group;tma_branch_resteers_group",
         "MetricName": "tma_unknown_branches",
         "MetricThreshold": "tma_unknown_branches > 0.05 & (tma_branch_resteers > 0.05 & (tma_fetch_latency > 0.1 & tma_frontend_bound > 0.15))",
-        "PublicDescription": "This metric represents fraction of cycles the CPU was stalled due to new branch address clears. These are fetched branches the Branch Prediction Unit was unable to recognize (e.g. first time the branch is fetched or hitting BTB capacity limit). Sample with: BACLEARS.ANY",
+        "PublicDescription": "This metric represents fraction of cycles the CPU was stalled due to new branch address clears. These are fetched branches the Branch Prediction Unit was unable to recognize (e.g. first time the branch is fetched or hitting BTB capacity limit) hence called Unknown Branches. Sample with: BACLEARS.ANY",
         "ScaleUnit": "100%"
     },
     {
index f59405877ae8bc9845a820b2bd2ac11efb39cc8f..73feadaf767406bd5ee5fd826d89883a9f0c2d70 100644 (file)
         "BriefDescription": "Counts 1 per cycle for each PMH that is busy with a page walk for an instruction fetch request. EPT page walk duration are excluded in Skylake.",
         "EventCode": "0x85",
         "EventName": "ITLB_MISSES.WALK_PENDING",
-        "PublicDescription": "Counts 1 per cycle for each PMH (Page Miss Handler) that is busy with a page walk for an instruction fetch request. EPT page walk duration are excluded in Skylake michroarchitecture.",
+        "PublicDescription": "Counts 1 per cycle for each PMH (Page Miss Handler) that is busy with a page walk for an instruction fetch request. EPT page walk duration are excluded in Skylake microarchitecture.",
         "SampleAfterValue": "100003",
         "UMask": "0x10"
     },
index bc6a9a4d27a9562a0de97285ce80fc03160ad583..904d299c95a317e880342e4c1e73be696cee73e1 100644 (file)
@@ -2,10 +2,10 @@
     "Backend": "Grouping from Top-down Microarchitecture Analysis Metrics spreadsheet",
     "Bad": "Grouping from Top-down Microarchitecture Analysis Metrics spreadsheet",
     "BadSpec": "Grouping from Top-down Microarchitecture Analysis Metrics spreadsheet",
-    "BigFoot": "Grouping from Top-down Microarchitecture Analysis Metrics spreadsheet",
+    "BigFootprint": "Grouping from Top-down Microarchitecture Analysis Metrics spreadsheet",
     "BrMispredicts": "Grouping from Top-down Microarchitecture Analysis Metrics spreadsheet",
     "Branches": "Grouping from Top-down Microarchitecture Analysis Metrics spreadsheet",
-    "CacheMisses": "Grouping from Top-down Microarchitecture Analysis Metrics spreadsheet",
+    "CacheHits": "Grouping from Top-down Microarchitecture Analysis Metrics spreadsheet",
     "CodeGen": "Grouping from Top-down Microarchitecture Analysis Metrics spreadsheet",
     "Compute": "Grouping from Top-down Microarchitecture Analysis Metrics spreadsheet",
     "Cor": "Grouping from Top-down Microarchitecture Analysis Metrics spreadsheet",
@@ -26,7 +26,9 @@
     "L2Evicts": "Grouping from Top-down Microarchitecture Analysis Metrics spreadsheet",
     "LSD": "Grouping from Top-down Microarchitecture Analysis Metrics spreadsheet",
     "MachineClears": "Grouping from Top-down Microarchitecture Analysis Metrics spreadsheet",
+    "Machine_Clears": "Grouping from Top-down Microarchitecture Analysis Metrics spreadsheet",
     "Mem": "Grouping from Top-down Microarchitecture Analysis Metrics spreadsheet",
+    "MemOffcore": "Grouping from Top-down Microarchitecture Analysis Metrics spreadsheet",
     "MemoryBW": "Grouping from Top-down Microarchitecture Analysis Metrics spreadsheet",
     "MemoryBound": "Grouping from Top-down Microarchitecture Analysis Metrics spreadsheet",
     "MemoryLat": "Grouping from Top-down Microarchitecture Analysis Metrics spreadsheet",
     "tma_L5_group": "Metrics for top-down breakdown at level 5",
     "tma_L6_group": "Metrics for top-down breakdown at level 6",
     "tma_alu_op_utilization_group": "Metrics contributing to tma_alu_op_utilization category",
+    "tma_assists_group": "Metrics contributing to tma_assists category",
     "tma_backend_bound_group": "Metrics contributing to tma_backend_bound category",
     "tma_bad_speculation_group": "Metrics contributing to tma_bad_speculation category",
+    "tma_branch_mispredicts_group": "Metrics contributing to tma_branch_mispredicts category",
     "tma_branch_resteers_group": "Metrics contributing to tma_branch_resteers category",
     "tma_core_bound_group": "Metrics contributing to tma_core_bound category",
     "tma_dram_bound_group": "Metrics contributing to tma_dram_bound category",
@@ -78,9 +82,9 @@
     "tma_frontend_bound_group": "Metrics contributing to tma_frontend_bound category",
     "tma_heavy_operations_group": "Metrics contributing to tma_heavy_operations category",
     "tma_issue2P": "Metrics related by the issue $issue2P",
-    "tma_issueBC": "Metrics related by the issue $issueBC",
     "tma_issueBM": "Metrics related by the issue $issueBM",
     "tma_issueBW": "Metrics related by the issue $issueBW",
+    "tma_issueComp": "Metrics related by the issue $issueComp",
     "tma_issueD0": "Metrics related by the issue $issueD0",
     "tma_issueFB": "Metrics related by the issue $issueFB",
     "tma_issueFL": "Metrics related by the issue $issueFL",
     "tma_l3_bound_group": "Metrics contributing to tma_l3_bound category",
     "tma_light_operations_group": "Metrics contributing to tma_light_operations category",
     "tma_load_op_utilization_group": "Metrics contributing to tma_load_op_utilization category",
+    "tma_machine_clears_group": "Metrics contributing to tma_machine_clears category",
     "tma_mem_latency_group": "Metrics contributing to tma_mem_latency category",
     "tma_memory_bound_group": "Metrics contributing to tma_memory_bound category",
     "tma_microcode_sequencer_group": "Metrics contributing to tma_microcode_sequencer category",
     "tma_mite_group": "Metrics contributing to tma_mite category",
+    "tma_other_light_ops_group": "Metrics contributing to tma_other_light_ops category",
     "tma_ports_utilization_group": "Metrics contributing to tma_ports_utilization category",
     "tma_ports_utilized_0_group": "Metrics contributing to tma_ports_utilized_0 category",
     "tma_ports_utilized_3m_group": "Metrics contributing to tma_ports_utilized_3m category",
index ec3aa5ef00a3c79bdf8cae408a271d49754204be..025e836a1c80dcd6dca23808f252878e1852a3b2 100644 (file)
         "MetricName": "llc_miss_remote_memory_bandwidth_read",
         "ScaleUnit": "1MB/s"
     },
+    {
+        "BriefDescription": "Bandwidth (MB/sec) of write requests that miss the last level cache (LLC) and go to remote memory.",
+        "MetricExpr": "UNC_CHA_REQUESTS.WRITES_REMOTE * 64 / 1e6 / duration_time",
+        "MetricName": "llc_miss_remote_memory_bandwidth_write",
+        "ScaleUnit": "1MB/s"
+    },
     {
         "BriefDescription": "The ratio of number of completed memory load instructions to the total number completed instructions",
         "MetricExpr": "MEM_INST_RETIRED.ALL_LOADS / INST_RETIRED.ANY",
         "MetricExpr": "(UOPS_DISPATCHED_PORT.PORT_0 + UOPS_DISPATCHED_PORT.PORT_1 + UOPS_DISPATCHED_PORT.PORT_5 + UOPS_DISPATCHED_PORT.PORT_6) / tma_info_thread_slots",
         "MetricGroup": "TopdownL5;tma_L5_group;tma_ports_utilized_3m_group",
         "MetricName": "tma_alu_op_utilization",
-        "MetricThreshold": "tma_alu_op_utilization > 0.6",
+        "MetricThreshold": "tma_alu_op_utilization > 0.4",
         "ScaleUnit": "100%"
     },
     {
         "BriefDescription": "This metric estimates fraction of slots the CPU retired uops delivered by the Microcode_Sequencer as a result of Assists",
-        "MetricExpr": "100 * (FP_ASSIST.ANY + OTHER_ASSISTS.ANY) / tma_info_thread_slots",
+        "MetricExpr": "34 * (FP_ASSIST.ANY + OTHER_ASSISTS.ANY) / tma_info_thread_slots",
         "MetricGroup": "TopdownL4;tma_L4_group;tma_microcode_sequencer_group",
         "MetricName": "tma_assists",
         "MetricThreshold": "tma_assists > 0.1 & (tma_microcode_sequencer > 0.05 & tma_heavy_operations > 0.1)",
     {
         "BriefDescription": "This metric estimates fraction of cycles while the memory subsystem was handling synchronizations due to contested accesses",
         "MetricConstraint": "NO_GROUP_EVENTS",
-        "MetricExpr": "(44 * tma_info_system_average_frequency * (MEM_LOAD_L3_HIT_RETIRED.XSNP_HITM * (OFFCORE_RESPONSE.DEMAND_DATA_RD.L3_HIT.HITM_OTHER_CORE / (OFFCORE_RESPONSE.DEMAND_DATA_RD.L3_HIT.HITM_OTHER_CORE + OFFCORE_RESPONSE.DEMAND_DATA_RD.L3_HIT.SNOOP_HIT_WITH_FWD))) + 44 * tma_info_system_average_frequency * MEM_LOAD_L3_HIT_RETIRED.XSNP_MISS) * (1 + MEM_LOAD_RETIRED.FB_HIT / MEM_LOAD_RETIRED.L1_MISS / 2) / tma_info_thread_clks",
+        "MetricExpr": "(44 * tma_info_system_core_frequency * (MEM_LOAD_L3_HIT_RETIRED.XSNP_HITM * (OFFCORE_RESPONSE.DEMAND_DATA_RD.L3_HIT.HITM_OTHER_CORE / (OFFCORE_RESPONSE.DEMAND_DATA_RD.L3_HIT.HITM_OTHER_CORE + OFFCORE_RESPONSE.DEMAND_DATA_RD.L3_HIT.SNOOP_HIT_WITH_FWD))) + 44 * tma_info_system_core_frequency * MEM_LOAD_L3_HIT_RETIRED.XSNP_MISS) * (1 + MEM_LOAD_RETIRED.FB_HIT / MEM_LOAD_RETIRED.L1_MISS / 2) / tma_info_thread_clks",
         "MetricGroup": "DataSharing;Offcore;Snoop;TopdownL4;tma_L4_group;tma_issueSyncxn;tma_l3_bound_group",
         "MetricName": "tma_contested_accesses",
         "MetricThreshold": "tma_contested_accesses > 0.05 & (tma_l3_bound > 0.05 & (tma_memory_bound > 0.2 & tma_backend_bound > 0.2))",
     {
         "BriefDescription": "This metric estimates fraction of cycles while the memory subsystem was handling synchronizations due to data-sharing accesses",
         "MetricConstraint": "NO_GROUP_EVENTS",
-        "MetricExpr": "44 * tma_info_system_average_frequency * (MEM_LOAD_L3_HIT_RETIRED.XSNP_HIT + MEM_LOAD_L3_HIT_RETIRED.XSNP_HITM * (1 - OFFCORE_RESPONSE.DEMAND_DATA_RD.L3_HIT.HITM_OTHER_CORE / (OFFCORE_RESPONSE.DEMAND_DATA_RD.L3_HIT.HITM_OTHER_CORE + OFFCORE_RESPONSE.DEMAND_DATA_RD.L3_HIT.SNOOP_HIT_WITH_FWD))) * (1 + MEM_LOAD_RETIRED.FB_HIT / MEM_LOAD_RETIRED.L1_MISS / 2) / tma_info_thread_clks",
+        "MetricExpr": "44 * tma_info_system_core_frequency * (MEM_LOAD_L3_HIT_RETIRED.XSNP_HIT + MEM_LOAD_L3_HIT_RETIRED.XSNP_HITM * (1 - OFFCORE_RESPONSE.DEMAND_DATA_RD.L3_HIT.HITM_OTHER_CORE / (OFFCORE_RESPONSE.DEMAND_DATA_RD.L3_HIT.HITM_OTHER_CORE + OFFCORE_RESPONSE.DEMAND_DATA_RD.L3_HIT.SNOOP_HIT_WITH_FWD))) * (1 + MEM_LOAD_RETIRED.FB_HIT / MEM_LOAD_RETIRED.L1_MISS / 2) / tma_info_thread_clks",
         "MetricGroup": "Offcore;Snoop;TopdownL4;tma_L4_group;tma_issueSyncxn;tma_l3_bound_group",
         "MetricName": "tma_data_sharing",
         "MetricThreshold": "tma_data_sharing > 0.05 & (tma_l3_bound > 0.05 & (tma_memory_bound > 0.2 & tma_backend_bound > 0.2))",
         "MetricExpr": "(cpu@INST_DECODED.DECODERS\\,cmask\\=1@ - cpu@INST_DECODED.DECODERS\\,cmask\\=2@) / tma_info_core_core_clks / 2",
         "MetricGroup": "DSBmiss;FetchBW;TopdownL4;tma_L4_group;tma_issueD0;tma_mite_group",
         "MetricName": "tma_decoder0_alone",
-        "MetricThreshold": "tma_decoder0_alone > 0.1 & (tma_mite > 0.1 & (tma_fetch_bandwidth > 0.1 & tma_frontend_bound > 0.15 & tma_info_thread_ipc / 4 > 0.35))",
+        "MetricThreshold": "tma_decoder0_alone > 0.1 & (tma_mite > 0.1 & tma_fetch_bandwidth > 0.2)",
         "PublicDescription": "This metric represents fraction of cycles where decoder-0 was the only active decoder. Related metrics: tma_few_uops_instructions",
         "ScaleUnit": "100%"
     },
     },
     {
         "BriefDescription": "This metric represents Core fraction of cycles in which CPU was likely limited due to DSB (decoded uop cache) fetch pipeline",
-        "MetricExpr": "(IDQ.ALL_DSB_CYCLES_ANY_UOPS - IDQ.ALL_DSB_CYCLES_4_UOPS) / tma_info_core_core_clks / 2",
+        "MetricExpr": "(IDQ.DSB_CYCLES_ANY - IDQ.DSB_CYCLES_OK) / tma_info_core_core_clks / 2",
         "MetricGroup": "DSB;FetchBW;TopdownL3;tma_L3_group;tma_fetch_bandwidth_group",
         "MetricName": "tma_dsb",
-        "MetricThreshold": "tma_dsb > 0.15 & (tma_fetch_bandwidth > 0.1 & tma_frontend_bound > 0.15 & tma_info_thread_ipc / 4 > 0.35)",
+        "MetricThreshold": "tma_dsb > 0.15 & tma_fetch_bandwidth > 0.2",
         "PublicDescription": "This metric represents Core fraction of cycles in which CPU was likely limited due to DSB (decoded uop cache) fetch pipeline.  For example; inefficient utilization of the DSB cache structure or bank conflict when reading from it; are categorized here.",
         "ScaleUnit": "100%"
     },
         "MetricGroup": "MemoryTLB;TopdownL4;tma_L4_group;tma_issueTLB;tma_l1_bound_group",
         "MetricName": "tma_dtlb_load",
         "MetricThreshold": "tma_dtlb_load > 0.1 & (tma_l1_bound > 0.1 & (tma_memory_bound > 0.2 & tma_backend_bound > 0.2))",
-        "PublicDescription": "This metric roughly estimates the fraction of cycles where the Data TLB (DTLB) was missed by load accesses. TLBs (Translation Look-aside Buffers) are processor caches for recently used entries out of the Page Tables that are used to map virtual- to physical-addresses by the operating system. This metric approximates the potential delay of demand loads missing the first-level data TLB (assuming worst case scenario with back to back misses to different pages). This includes hitting in the second-level TLB (STLB) as well as performing a hardware page walk on an STLB miss. Sample with: MEM_INST_RETIRED.STLB_MISS_LOADS_PS. Related metrics: tma_dtlb_store, tma_info_bottleneck_memory_data_tlbs",
+        "PublicDescription": "This metric roughly estimates the fraction of cycles where the Data TLB (DTLB) was missed by load accesses. TLBs (Translation Look-aside Buffers) are processor caches for recently used entries out of the Page Tables that are used to map virtual- to physical-addresses by the operating system. This metric approximates the potential delay of demand loads missing the first-level data TLB (assuming worst case scenario with back to back misses to different pages). This includes hitting in the second-level TLB (STLB) as well as performing a hardware page walk on an STLB miss. Sample with: MEM_INST_RETIRED.STLB_MISS_LOADS_PS. Related metrics: tma_dtlb_store, tma_info_bottleneck_memory_data_tlbs, tma_info_bottleneck_memory_synchronization",
         "ScaleUnit": "100%"
     },
     {
         "MetricGroup": "MemoryTLB;TopdownL4;tma_L4_group;tma_issueTLB;tma_store_bound_group",
         "MetricName": "tma_dtlb_store",
         "MetricThreshold": "tma_dtlb_store > 0.05 & (tma_store_bound > 0.2 & (tma_memory_bound > 0.2 & tma_backend_bound > 0.2))",
-        "PublicDescription": "This metric roughly estimates the fraction of cycles spent handling first-level data TLB store misses.  As with ordinary data caching; focus on improving data locality and reducing working-set size to reduce DTLB overhead.  Additionally; consider using profile-guided optimization (PGO) to collocate frequently-used data on the same page.  Try using larger page sizes for large amounts of frequently-used data. Sample with: MEM_INST_RETIRED.STLB_MISS_STORES_PS. Related metrics: tma_dtlb_load, tma_info_bottleneck_memory_data_tlbs",
+        "PublicDescription": "This metric roughly estimates the fraction of cycles spent handling first-level data TLB store misses.  As with ordinary data caching; focus on improving data locality and reducing working-set size to reduce DTLB overhead.  Additionally; consider using profile-guided optimization (PGO) to collocate frequently-used data on the same page.  Try using larger page sizes for large amounts of frequently-used data. Sample with: MEM_INST_RETIRED.STLB_MISS_STORES_PS. Related metrics: tma_dtlb_load, tma_info_bottleneck_memory_data_tlbs, tma_info_bottleneck_memory_synchronization",
         "ScaleUnit": "100%"
     },
     {
         "BriefDescription": "This metric roughly estimates how often CPU was handling synchronizations due to False Sharing",
         "MetricConstraint": "NO_GROUP_EVENTS",
-        "MetricExpr": "(110 * tma_info_system_average_frequency * (OFFCORE_RESPONSE.DEMAND_RFO.L3_MISS.REMOTE_HITM + OFFCORE_RESPONSE.PF_L2_RFO.L3_MISS.REMOTE_HITM) + 47.5 * tma_info_system_average_frequency * (OFFCORE_RESPONSE.DEMAND_RFO.L3_HIT.HITM_OTHER_CORE + OFFCORE_RESPONSE.PF_L2_RFO.L3_HIT.HITM_OTHER_CORE)) / tma_info_thread_clks",
+        "MetricExpr": "(110 * tma_info_system_core_frequency * (OFFCORE_RESPONSE.DEMAND_RFO.L3_MISS.REMOTE_HITM + OFFCORE_RESPONSE.PF_L2_RFO.L3_MISS.REMOTE_HITM) + 47.5 * tma_info_system_core_frequency * (OFFCORE_RESPONSE.DEMAND_RFO.L3_HIT.HITM_OTHER_CORE + OFFCORE_RESPONSE.PF_L2_RFO.L3_HIT.HITM_OTHER_CORE)) / tma_info_thread_clks",
         "MetricGroup": "DataSharing;Offcore;Snoop;TopdownL4;tma_L4_group;tma_issueSyncxn;tma_store_bound_group",
         "MetricName": "tma_false_sharing",
         "MetricThreshold": "tma_false_sharing > 0.05 & (tma_store_bound > 0.2 & (tma_memory_bound > 0.2 & tma_backend_bound > 0.2))",
         "MetricGroup": "MemoryBW;TopdownL4;tma_L4_group;tma_issueBW;tma_issueSL;tma_issueSmSt;tma_l1_bound_group",
         "MetricName": "tma_fb_full",
         "MetricThreshold": "tma_fb_full > 0.3",
-        "PublicDescription": "This metric does a *rough estimation* of how often L1D Fill Buffer unavailability limited additional L1D miss memory access requests to proceed. The higher the metric value; the deeper the memory hierarchy level the misses are satisfied from (metric values >1 are valid). Often it hints on approaching bandwidth limits (to L2 cache; L3 cache or external memory). Related metrics: tma_info_bottleneck_memory_bandwidth, tma_info_system_dram_bw_use, tma_mem_bandwidth, tma_sq_full, tma_store_latency, tma_streaming_stores",
+        "PublicDescription": "This metric does a *rough estimation* of how often L1D Fill Buffer unavailability limited additional L1D miss memory access requests to proceed. The higher the metric value; the deeper the memory hierarchy level the misses are satisfied from (metric values >1 are valid). Often it hints on approaching bandwidth limits (to L2 cache; L3 cache or external memory). Related metrics: tma_info_bottleneck_cache_memory_bandwidth, tma_info_system_dram_bw_use, tma_mem_bandwidth, tma_sq_full, tma_store_latency, tma_streaming_stores",
         "ScaleUnit": "100%"
     },
     {
         "MetricExpr": "tma_frontend_bound - tma_fetch_latency",
         "MetricGroup": "FetchBW;Frontend;TmaL2;TopdownL2;tma_L2_group;tma_frontend_bound_group;tma_issueFB",
         "MetricName": "tma_fetch_bandwidth",
-        "MetricThreshold": "tma_fetch_bandwidth > 0.1 & tma_frontend_bound > 0.15 & tma_info_thread_ipc / 4 > 0.35",
+        "MetricThreshold": "tma_fetch_bandwidth > 0.2",
         "MetricgroupNoGroup": "TopdownL2",
         "PublicDescription": "This metric represents fraction of slots the CPU was stalled due to Frontend bandwidth issues.  For example; inefficiencies at the instruction decoders; or restrictions for caching in the DSB (decoded uops cache) are categorized under Fetch Bandwidth. In such cases; the Frontend typically delivers suboptimal amount of uops to the Backend. Sample with: FRONTEND_RETIRED.LATENCY_GE_2_BUBBLES_GE_1_PS;FRONTEND_RETIRED.LATENCY_GE_1_PS;FRONTEND_RETIRED.LATENCY_GE_2_PS. Related metrics: tma_dsb_switches, tma_info_botlnk_l2_dsb_misses, tma_info_frontend_dsb_coverage, tma_info_inst_mix_iptb, tma_lcp",
         "ScaleUnit": "100%"
         "PublicDescription": "This metric represents overall arithmetic floating-point (FP) operations fraction the CPU has executed (retired). Note this metric's value may exceed its parent due to use of \"Uops\" CountDomain and FMA double-counting.",
         "ScaleUnit": "100%"
     },
+    {
+        "BriefDescription": "This metric roughly estimates fraction of slots the CPU retired uops as a result of handing Floating Point (FP) Assists",
+        "MetricExpr": "34 * FP_ASSIST.ANY / tma_info_thread_slots",
+        "MetricGroup": "HPC;TopdownL5;tma_L5_group;tma_assists_group",
+        "MetricName": "tma_fp_assists",
+        "MetricThreshold": "tma_fp_assists > 0.1",
+        "PublicDescription": "This metric roughly estimates fraction of slots the CPU retired uops as a result of handing Floating Point (FP) Assists. FP Assist may apply when working with very small floating point values (so-called Denormals).",
+        "ScaleUnit": "100%"
+    },
     {
         "BriefDescription": "This metric approximates arithmetic floating-point (FP) scalar uops fraction the CPU has retired",
         "MetricExpr": "cpu@FP_ARITH_INST_RETIRED.SCALAR_SINGLE\\,umask\\=0x03@ / UOPS_RETIRED.RETIRE_SLOTS",
     {
         "BriefDescription": "This metric represents fraction of slots where the CPU was retiring fused instructions -- where one uop can represent multiple contiguous instructions",
         "MetricExpr": "tma_light_operations * UOPS_RETIRED.MACRO_FUSED / UOPS_RETIRED.RETIRE_SLOTS",
-        "MetricGroup": "Pipeline;TopdownL3;tma_L3_group;tma_light_operations_group",
+        "MetricGroup": "Branches;Pipeline;TopdownL3;tma_L3_group;tma_light_operations_group",
         "MetricName": "tma_fused_instructions",
         "MetricThreshold": "tma_fused_instructions > 0.1 & tma_light_operations > 0.6",
-        "PublicDescription": "This metric represents fraction of slots where the CPU was retiring fused instructions -- where one uop can represent multiple contiguous instructions. The instruction pairs of CMP+JCC or DEC+JCC are commonly used examples.",
+        "PublicDescription": "This metric represents fraction of slots where the CPU was retiring fused instructions -- where one uop can represent multiple contiguous instructions. CMP+JCC or DEC+JCC are common examples of legacy fusions. {([MTL] Note new MOV+OP and Load+OP fusions appear under Other_Light_Ops in MTL!)}",
         "ScaleUnit": "100%"
     },
     {
         "MetricName": "tma_heavy_operations",
         "MetricThreshold": "tma_heavy_operations > 0.1",
         "MetricgroupNoGroup": "TopdownL2",
-        "PublicDescription": "This metric represents fraction of slots where the CPU was retiring heavy-weight operations -- instructions that require two or more uops or micro-coded sequences. This highly-correlates with the uop length of these instructions/sequences.",
+        "PublicDescription": "This metric represents fraction of slots where the CPU was retiring heavy-weight operations -- instructions that require two or more uops or micro-coded sequences. This highly-correlates with the uop length of these instructions/sequences. ([ICL+] Note this may overcount due to approximation using indirect events; [ADL+] .)",
         "ScaleUnit": "100%"
     },
     {
         "BriefDescription": "This metric represents fraction of cycles the CPU was stalled due to instruction cache misses",
         "MetricExpr": "(ICACHE_16B.IFDATA_STALL + 2 * cpu@ICACHE_16B.IFDATA_STALL\\,cmask\\=1\\,edge@) / tma_info_thread_clks",
-        "MetricGroup": "BigFoot;FetchLat;IcMiss;TopdownL3;tma_L3_group;tma_fetch_latency_group",
+        "MetricGroup": "BigFootprint;FetchLat;IcMiss;TopdownL3;tma_L3_group;tma_fetch_latency_group",
         "MetricName": "tma_icache_misses",
         "MetricThreshold": "tma_icache_misses > 0.05 & (tma_fetch_latency > 0.1 & tma_frontend_bound > 0.15)",
         "PublicDescription": "This metric represents fraction of cycles the CPU was stalled due to instruction cache misses. Sample with: FRONTEND_RETIRED.L2_MISS_PS;FRONTEND_RETIRED.L1I_MISS_PS",
     },
     {
         "BriefDescription": "Branch Misprediction Cost: Fraction of TMA slots wasted per non-speculative branch misprediction (retired JEClear)",
-        "MetricExpr": "(tma_branch_mispredicts + tma_fetch_latency * tma_mispredicts_resteers / (tma_branch_resteers + tma_dsb_switches + tma_icache_misses + tma_itlb_misses + tma_lcp + tma_ms_switches)) * tma_info_thread_slots / BR_MISP_RETIRED.ALL_BRANCHES",
+        "MetricExpr": "tma_info_bottleneck_mispredictions * tma_info_thread_slots / BR_MISP_RETIRED.ALL_BRANCHES / 100",
         "MetricGroup": "Bad;BrMispredicts;tma_issueBM",
         "MetricName": "tma_info_bad_spec_branch_misprediction_cost",
         "PublicDescription": "Branch Misprediction Cost: Fraction of TMA slots wasted per non-speculative branch misprediction (retired JEClear). Related metrics: tma_branch_mispredicts, tma_info_bottleneck_mispredictions, tma_mispredicts_resteers"
     },
     {
         "BriefDescription": "Instructions per retired mispredicts for indirect CALL or JMP branches (lower number means higher occurrence rate).",
-        "MetricExpr": "tma_info_inst_mix_instructions / (UOPS_RETIRED.RETIRE_SLOTS / UOPS_ISSUED.ANY * cpu@BR_MISP_EXEC.ALL_BRANCHES\\,umask\\=0xE4@)",
+        "MetricExpr": "tma_info_inst_mix_instructions / (UOPS_RETIRED.RETIRE_SLOTS / UOPS_ISSUED.ANY * BR_MISP_EXEC.INDIRECT)",
         "MetricGroup": "Bad;BrMispredicts",
         "MetricName": "tma_info_bad_spec_ipmisp_indirect",
         "MetricThreshold": "tma_info_bad_spec_ipmisp_indirect < 1e3"
     },
     {
         "BriefDescription": "Number of Instructions per non-speculative Branch Misprediction (JEClear) (lower number means higher occurrence rate)",
-        "MetricExpr": "tma_info_core_ipmispredict",
+        "MetricExpr": "INST_RETIRED.ANY / BR_MISP_RETIRED.ALL_BRANCHES",
         "MetricGroup": "Bad;BadSpec;BrMispredicts",
         "MetricName": "tma_info_bad_spec_ipmispredict",
         "MetricThreshold": "tma_info_bad_spec_ipmispredict < 200"
     },
+    {
+        "BriefDescription": "Speculative to Retired ratio of all clears (covering mispredicts and nukes)",
+        "MetricExpr": "INT_MISC.CLEARS_COUNT / (BR_MISP_RETIRED.ALL_BRANCHES + MACHINE_CLEARS.COUNT)",
+        "MetricGroup": "BrMispredicts",
+        "MetricName": "tma_info_bad_spec_spec_clears_ratio"
+    },
+    {
+        "BriefDescription": "Probability of Core Bound bottleneck hidden by SMT-profiling artifacts",
+        "MetricExpr": "(100 * (1 - tma_core_bound / (((EXE_ACTIVITY.EXE_BOUND_0_PORTS + tma_core_bound * RS_EVENTS.EMPTY_CYCLES) / CPU_CLK_UNHALTED.THREAD * (CYCLE_ACTIVITY.STALLS_TOTAL - CYCLE_ACTIVITY.STALLS_MEM_ANY) / CPU_CLK_UNHALTED.THREAD * CPU_CLK_UNHALTED.THREAD + (EXE_ACTIVITY.1_PORTS_UTIL + tma_retiring * EXE_ACTIVITY.2_PORTS_UTIL)) / CPU_CLK_UNHALTED.THREAD if ARITH.DIVIDER_ACTIVE < CYCLE_ACTIVITY.STALLS_TOTAL - CYCLE_ACTIVITY.STALLS_MEM_ANY else (EXE_ACTIVITY.1_PORTS_UTIL + tma_retiring * EXE_ACTIVITY.2_PORTS_UTIL) / CPU_CLK_UNHALTED.THREAD) if tma_core_bound < (((EXE_ACTIVITY.EXE_BOUND_0_PORTS + tma_core_bound * RS_EVENTS.EMPTY_CYCLES) / CPU_CLK_UNHALTED.THREAD * (CYCLE_ACTIVITY.STALLS_TOTAL - CYCLE_ACTIVITY.STALLS_MEM_ANY) / CPU_CLK_UNHALTED.THREAD * CPU_CLK_UNHALTED.THREAD + (EXE_ACTIVITY.1_PORTS_UTIL + tma_retiring * EXE_ACTIVITY.2_PORTS_UTIL)) / CPU_CLK_UNHALTED.THREAD if ARITH.DIVIDER_ACTIVE < CYCLE_ACTIVITY.STALLS_TOTAL - CYCLE_ACTIVITY.STALLS_MEM_ANY else (EXE_ACTIVITY.1_PORTS_UTIL + tma_retiring * EXE_ACTIVITY.2_PORTS_UTIL) / CPU_CLK_UNHALTED.THREAD) else 1) if tma_info_system_smt_2t_utilization > 0.5 else 0)",
+        "MetricGroup": "Cor;SMT;TopdownL1;tma_L1_group",
+        "MetricName": "tma_info_botlnk_core_bound_likely",
+        "MetricgroupNoGroup": "TopdownL1"
+    },
+    {
+        "BriefDescription": "Total pipeline cost of DSB (uop cache) misses - subset of the Instruction_Fetch_BW Bottleneck.",
+        "MetricExpr": "100 * (100 * (tma_fetch_latency * (DSB2MITE_SWITCHES.PENALTY_CYCLES / CPU_CLK_UNHALTED.THREAD) / ((ICACHE_16B.IFDATA_STALL + 2 * cpu@ICACHE_16B.IFDATA_STALL\\,cmask\\=0x1\\,edge\\=0x1@) / CPU_CLK_UNHALTED.THREAD + ICACHE_TAG.STALLS / CPU_CLK_UNHALTED.THREAD + (INT_MISC.CLEAR_RESTEER_CYCLES / CPU_CLK_UNHALTED.THREAD + 9 * BACLEARS.ANY / CPU_CLK_UNHALTED.THREAD) + min(2 * IDQ.MS_SWITCHES / CPU_CLK_UNHALTED.THREAD, 1) + DECODE.LCP / CPU_CLK_UNHALTED.THREAD + DSB2MITE_SWITCHES.PENALTY_CYCLES / CPU_CLK_UNHALTED.THREAD) + tma_fetch_bandwidth * tma_mite / (tma_mite + tma_dsb)))",
+        "MetricGroup": "DSBmiss;Fed;TopdownL1;tma_L1_group",
+        "MetricName": "tma_info_botlnk_dsb_misses",
+        "MetricgroupNoGroup": "TopdownL1"
+    },
+    {
+        "BriefDescription": "Total pipeline cost of Instruction Cache misses - subset of the Big_Code Bottleneck.",
+        "MetricExpr": "100 * (100 * (tma_fetch_latency * ((ICACHE_16B.IFDATA_STALL + 2 * cpu@ICACHE_16B.IFDATA_STALL\\,cmask\\=0x1\\,edge\\=0x1@) / CPU_CLK_UNHALTED.THREAD) / ((ICACHE_16B.IFDATA_STALL + 2 * cpu@ICACHE_16B.IFDATA_STALL\\,cmask\\=0x1\\,edge\\=0x1@) / CPU_CLK_UNHALTED.THREAD + ICACHE_TAG.STALLS / CPU_CLK_UNHALTED.THREAD + (INT_MISC.CLEAR_RESTEER_CYCLES / CPU_CLK_UNHALTED.THREAD + 9 * BACLEARS.ANY / CPU_CLK_UNHALTED.THREAD) + min(2 * IDQ.MS_SWITCHES / CPU_CLK_UNHALTED.THREAD, 1) + DECODE.LCP / CPU_CLK_UNHALTED.THREAD + DSB2MITE_SWITCHES.PENALTY_CYCLES / CPU_CLK_UNHALTED.THREAD)))",
+        "MetricGroup": "Fed;FetchLat;IcMiss;TopdownL1;tma_L1_group",
+        "MetricName": "tma_info_botlnk_ic_misses",
+        "MetricgroupNoGroup": "TopdownL1"
+    },
     {
         "BriefDescription": "Probability of Core Bound bottleneck hidden by SMT-profiling artifacts",
         "MetricConstraint": "NO_GROUP_EVENTS",
         "MetricThreshold": "tma_info_botlnk_l2_ic_misses > 5",
         "PublicDescription": "Total pipeline cost of Instruction Cache misses - subset of the Big_Code Bottleneck. Related metrics: "
     },
+    {
+        "BriefDescription": "Total pipeline cost of \"useful operations\" - the baseline operations not covered by Branching_Overhead nor Irregular_Overhead.",
+        "MetricExpr": "100 * (tma_retiring - (BR_INST_RETIRED.ALL_BRANCHES + BR_INST_RETIRED.NEAR_CALL) / tma_info_thread_slots - tma_microcode_sequencer / (tma_few_uops_instructions + tma_microcode_sequencer) * (tma_assists / tma_microcode_sequencer) * tma_heavy_operations)",
+        "MetricGroup": "Ret",
+        "MetricName": "tma_info_bottleneck_base_non_br",
+        "MetricThreshold": "tma_info_bottleneck_base_non_br > 20"
+    },
     {
         "BriefDescription": "Total pipeline cost of instruction fetch related bottlenecks by large code footprint programs (i-side cache; TLB and BTB misses)",
         "MetricConstraint": "NO_GROUP_EVENTS",
         "MetricExpr": "100 * tma_fetch_latency * (tma_itlb_misses + tma_icache_misses + tma_unknown_branches) / (tma_branch_resteers + tma_dsb_switches + tma_icache_misses + tma_itlb_misses + tma_lcp + tma_ms_switches)",
-        "MetricGroup": "BigFoot;Fed;Frontend;IcMiss;MemoryTLB;tma_issueBC",
+        "MetricGroup": "BigFootprint;Fed;Frontend;IcMiss;MemoryTLB",
         "MetricName": "tma_info_bottleneck_big_code",
-        "MetricThreshold": "tma_info_bottleneck_big_code > 20",
-        "PublicDescription": "Total pipeline cost of instruction fetch related bottlenecks by large code footprint programs (i-side cache; TLB and BTB misses). Related metrics: tma_info_bottleneck_branching_overhead"
+        "MetricThreshold": "tma_info_bottleneck_big_code > 20"
     },
     {
         "BriefDescription": "Total pipeline cost of branch related instructions (used for program control-flow including function calls)",
-        "MetricExpr": "100 * ((BR_INST_RETIRED.CONDITIONAL + 3 * BR_INST_RETIRED.NEAR_CALL + (BR_INST_RETIRED.NEAR_TAKEN - (BR_INST_RETIRED.CONDITIONAL - BR_INST_RETIRED.NOT_TAKEN) - 2 * BR_INST_RETIRED.NEAR_CALL)) / tma_info_thread_slots)",
-        "MetricGroup": "Ret;tma_issueBC",
+        "MetricExpr": "100 * ((BR_INST_RETIRED.ALL_BRANCHES + BR_INST_RETIRED.NEAR_CALL) / tma_info_thread_slots)",
+        "MetricGroup": "Ret",
         "MetricName": "tma_info_bottleneck_branching_overhead",
-        "MetricThreshold": "tma_info_bottleneck_branching_overhead > 10",
-        "PublicDescription": "Total pipeline cost of branch related instructions (used for program control-flow including function calls). Related metrics: tma_info_bottleneck_big_code"
+        "MetricThreshold": "tma_info_bottleneck_branching_overhead > 5"
+    },
+    {
+        "BriefDescription": "Total pipeline cost of external Memory- or Cache-Bandwidth related bottlenecks",
+        "MetricExpr": "100 * (tma_memory_bound * (tma_dram_bound / (tma_dram_bound + tma_l1_bound + tma_l2_bound + tma_l3_bound + tma_store_bound)) * (tma_mem_bandwidth / (tma_mem_bandwidth + tma_mem_latency)) + tma_memory_bound * (tma_l3_bound / (tma_dram_bound + tma_l1_bound + tma_l2_bound + tma_l3_bound + tma_store_bound)) * (tma_sq_full / (tma_contested_accesses + tma_data_sharing + tma_l3_hit_latency + tma_sq_full)) + tma_memory_bound * (tma_l1_bound / (tma_dram_bound + tma_l1_bound + tma_l2_bound + tma_l3_bound + tma_store_bound)) * (tma_fb_full / (tma_4k_aliasing + tma_dtlb_load + tma_fb_full + tma_lock_latency + tma_split_loads + tma_store_fwd_blk)))",
+        "MetricGroup": "Mem;MemoryBW;Offcore;tma_issueBW",
+        "MetricName": "tma_info_bottleneck_cache_memory_bandwidth",
+        "MetricThreshold": "tma_info_bottleneck_cache_memory_bandwidth > 20",
+        "PublicDescription": "Total pipeline cost of external Memory- or Cache-Bandwidth related bottlenecks. Related metrics: tma_fb_full, tma_info_system_dram_bw_use, tma_mem_bandwidth, tma_sq_full"
+    },
+    {
+        "BriefDescription": "Total pipeline cost of external Memory- or Cache-Latency related bottlenecks",
+        "MetricExpr": "100 * (tma_memory_bound * (tma_dram_bound / (tma_dram_bound + tma_l1_bound + tma_l2_bound + tma_l3_bound + tma_store_bound)) * (tma_mem_latency / (tma_mem_bandwidth + tma_mem_latency)) + tma_memory_bound * (tma_l3_bound / (tma_dram_bound + tma_l1_bound + tma_l2_bound + tma_l3_bound + tma_store_bound)) * (tma_l3_hit_latency / (tma_contested_accesses + tma_data_sharing + tma_l3_hit_latency + tma_sq_full)) + tma_memory_bound * tma_l2_bound / (tma_dram_bound + tma_l1_bound + tma_l2_bound + tma_l3_bound + tma_store_bound) + tma_memory_bound * (tma_store_bound / (tma_dram_bound + tma_l1_bound + tma_l2_bound + tma_l3_bound + tma_store_bound)) * (tma_store_latency / (tma_dtlb_store + tma_false_sharing + tma_split_stores + tma_store_latency)))",
+        "MetricGroup": "Mem;MemoryLat;Offcore;tma_issueLat",
+        "MetricName": "tma_info_bottleneck_cache_memory_latency",
+        "MetricThreshold": "tma_info_bottleneck_cache_memory_latency > 20",
+        "PublicDescription": "Total pipeline cost of external Memory- or Cache-Latency related bottlenecks. Related metrics: tma_l3_hit_latency, tma_mem_latency"
+    },
+    {
+        "BriefDescription": "Total pipeline cost when the execution is compute-bound - an estimation",
+        "MetricExpr": "100 * (tma_core_bound * tma_divider / (tma_divider + tma_ports_utilization + tma_serializing_operation) + tma_core_bound * (tma_ports_utilization / (tma_divider + tma_ports_utilization + tma_serializing_operation)) * (tma_ports_utilized_3m / (tma_ports_utilized_0 + tma_ports_utilized_1 + tma_ports_utilized_2 + tma_ports_utilized_3m)))",
+        "MetricGroup": "Cor;tma_issueComp",
+        "MetricName": "tma_info_bottleneck_compute_bound_est",
+        "MetricThreshold": "tma_info_bottleneck_compute_bound_est > 20",
+        "PublicDescription": "Total pipeline cost when the execution is compute-bound - an estimation. Covers Core Bound when High ILP as well as when long-latency execution units are busy. Related metrics: "
     },
     {
         "BriefDescription": "Total pipeline cost of instruction fetch bandwidth related bottlenecks",
         "MetricConstraint": "NO_GROUP_EVENTS",
-        "MetricExpr": "100 * (tma_frontend_bound - tma_fetch_latency * tma_mispredicts_resteers / (tma_branch_resteers + tma_dsb_switches + tma_icache_misses + tma_itlb_misses + tma_lcp + tma_ms_switches)) - tma_info_bottleneck_big_code",
+        "MetricExpr": "100 * (tma_frontend_bound - (1 - 10 * tma_microcode_sequencer * tma_other_mispredicts / tma_branch_mispredicts) * tma_fetch_latency * tma_mispredicts_resteers / (tma_branch_resteers + tma_dsb_switches + tma_icache_misses + tma_itlb_misses + tma_lcp + tma_ms_switches) - tma_microcode_sequencer / (tma_few_uops_instructions + tma_microcode_sequencer) * (tma_assists / tma_microcode_sequencer) * tma_fetch_latency * (tma_ms_switches + tma_branch_resteers * (tma_clears_resteers + tma_mispredicts_resteers * (10 * tma_microcode_sequencer * tma_other_mispredicts / tma_branch_mispredicts)) / (tma_clears_resteers + tma_mispredicts_resteers + tma_unknown_branches)) / (tma_branch_resteers + tma_dsb_switches + tma_icache_misses + tma_itlb_misses + tma_lcp + tma_ms_switches)) - tma_info_bottleneck_big_code",
         "MetricGroup": "Fed;FetchBW;Frontend",
         "MetricName": "tma_info_bottleneck_instruction_fetch_bw",
         "MetricThreshold": "tma_info_bottleneck_instruction_fetch_bw > 20"
     },
     {
-        "BriefDescription": "Total pipeline cost of (external) Memory Bandwidth related bottlenecks",
-        "MetricConstraint": "NO_GROUP_EVENTS",
-        "MetricExpr": "100 * tma_memory_bound * (tma_dram_bound / (tma_dram_bound + tma_l1_bound + tma_l2_bound + tma_l3_bound + tma_store_bound) * (tma_mem_bandwidth / (tma_mem_bandwidth + tma_mem_latency)) + tma_l3_bound / (tma_dram_bound + tma_l1_bound + tma_l2_bound + tma_l3_bound + tma_store_bound) * (tma_sq_full / (tma_contested_accesses + tma_data_sharing + tma_l3_hit_latency + tma_sq_full))) + tma_l1_bound / (tma_dram_bound + tma_l1_bound + tma_l2_bound + tma_l3_bound + tma_store_bound) * (tma_fb_full / (tma_4k_aliasing + tma_dtlb_load + tma_fb_full + tma_lock_latency + tma_split_loads + tma_store_fwd_blk))",
-        "MetricGroup": "Mem;MemoryBW;Offcore;tma_issueBW",
-        "MetricName": "tma_info_bottleneck_memory_bandwidth",
-        "MetricThreshold": "tma_info_bottleneck_memory_bandwidth > 20",
-        "PublicDescription": "Total pipeline cost of (external) Memory Bandwidth related bottlenecks. Related metrics: tma_fb_full, tma_info_system_dram_bw_use, tma_mem_bandwidth, tma_sq_full"
+        "BriefDescription": "Total pipeline cost of irregular execution (e.g",
+        "MetricExpr": "100 * (tma_microcode_sequencer / (tma_few_uops_instructions + tma_microcode_sequencer) * (tma_assists / tma_microcode_sequencer) * tma_fetch_latency * (tma_ms_switches + tma_branch_resteers * (tma_clears_resteers + tma_mispredicts_resteers * (10 * tma_microcode_sequencer * tma_other_mispredicts / tma_branch_mispredicts)) / (tma_clears_resteers + tma_mispredicts_resteers + tma_unknown_branches)) / (tma_branch_resteers + tma_dsb_switches + tma_icache_misses + tma_itlb_misses + tma_lcp + tma_ms_switches) + 10 * tma_microcode_sequencer * tma_other_mispredicts / tma_branch_mispredicts * tma_branch_mispredicts + tma_machine_clears * tma_other_nukes / tma_other_nukes + tma_core_bound * (tma_serializing_operation + tma_core_bound * RS_EVENTS.EMPTY_CYCLES / tma_info_thread_clks * tma_ports_utilized_0) / (tma_divider + tma_ports_utilization + tma_serializing_operation) + tma_microcode_sequencer / (tma_few_uops_instructions + tma_microcode_sequencer) * (tma_assists / tma_microcode_sequencer) * tma_heavy_operations)",
+        "MetricGroup": "Bad;Cor;Ret;tma_issueMS",
+        "MetricName": "tma_info_bottleneck_irregular_overhead",
+        "MetricThreshold": "tma_info_bottleneck_irregular_overhead > 10",
+        "PublicDescription": "Total pipeline cost of irregular execution (e.g. FP-assists in HPC, Wait time with work imbalance multithreaded workloads, overhead in system services or virtualized environments). Related metrics: tma_microcode_sequencer, tma_ms_switches"
     },
     {
         "BriefDescription": "Total pipeline cost of Memory Address Translation related bottlenecks (data-side TLBs)",
         "MetricConstraint": "NO_GROUP_EVENTS",
-        "MetricExpr": "100 * tma_memory_bound * (tma_l1_bound / max(tma_memory_bound, tma_dram_bound + tma_l1_bound + tma_l2_bound + tma_l3_bound + tma_store_bound) * (tma_dtlb_load / max(tma_l1_bound, tma_4k_aliasing + tma_dtlb_load + tma_fb_full + tma_lock_latency + tma_split_loads + tma_store_fwd_blk)) + tma_store_bound / (tma_dram_bound + tma_l1_bound + tma_l2_bound + tma_l3_bound + tma_store_bound) * (tma_dtlb_store / (tma_dtlb_store + tma_false_sharing + tma_split_stores + tma_store_latency)))",
+        "MetricExpr": "100 * (tma_memory_bound * (tma_l1_bound / max(tma_memory_bound, tma_dram_bound + tma_l1_bound + tma_l2_bound + tma_l3_bound + tma_store_bound)) * (tma_dtlb_load / max(tma_l1_bound, tma_4k_aliasing + tma_dtlb_load + tma_fb_full + tma_lock_latency + tma_split_loads + tma_store_fwd_blk)) + tma_memory_bound * (tma_store_bound / (tma_dram_bound + tma_l1_bound + tma_l2_bound + tma_l3_bound + tma_store_bound)) * (tma_dtlb_store / (tma_dtlb_store + tma_false_sharing + tma_split_stores + tma_store_latency)))",
         "MetricGroup": "Mem;MemoryTLB;Offcore;tma_issueTLB",
         "MetricName": "tma_info_bottleneck_memory_data_tlbs",
         "MetricThreshold": "tma_info_bottleneck_memory_data_tlbs > 20",
-        "PublicDescription": "Total pipeline cost of Memory Address Translation related bottlenecks (data-side TLBs). Related metrics: tma_dtlb_load, tma_dtlb_store"
+        "PublicDescription": "Total pipeline cost of Memory Address Translation related bottlenecks (data-side TLBs). Related metrics: tma_dtlb_load, tma_dtlb_store, tma_info_bottleneck_memory_synchronization"
     },
     {
-        "BriefDescription": "Total pipeline cost of Memory Latency related bottlenecks (external memory and off-core caches)",
-        "MetricConstraint": "NO_GROUP_EVENTS",
-        "MetricExpr": "100 * tma_memory_bound * (tma_dram_bound / (tma_dram_bound + tma_l1_bound + tma_l2_bound + tma_l3_bound + tma_store_bound) * (tma_mem_latency / (tma_mem_bandwidth + tma_mem_latency)) + tma_l3_bound / (tma_dram_bound + tma_l1_bound + tma_l2_bound + tma_l3_bound + tma_store_bound) * (tma_l3_hit_latency / (tma_contested_accesses + tma_data_sharing + tma_l3_hit_latency + tma_sq_full)) + tma_l2_bound / (tma_dram_bound + tma_l1_bound + tma_l2_bound + tma_l3_bound + tma_store_bound))",
-        "MetricGroup": "Mem;MemoryLat;Offcore;tma_issueLat",
-        "MetricName": "tma_info_bottleneck_memory_latency",
-        "MetricThreshold": "tma_info_bottleneck_memory_latency > 20",
-        "PublicDescription": "Total pipeline cost of Memory Latency related bottlenecks (external memory and off-core caches). Related metrics: tma_l3_hit_latency, tma_mem_latency"
+        "BriefDescription": "Total pipeline cost of Memory Synchronization related bottlenecks (data transfers and coherency updates across processors)",
+        "MetricExpr": "100 * (tma_memory_bound * (tma_dram_bound / (tma_dram_bound + tma_l1_bound + tma_l2_bound + tma_l3_bound + tma_store_bound) * (tma_mem_latency / (tma_mem_bandwidth + tma_mem_latency)) * tma_remote_cache / (tma_local_mem + tma_remote_cache + tma_remote_mem) + tma_l3_bound / (tma_dram_bound + tma_l1_bound + tma_l2_bound + tma_l3_bound + tma_store_bound) * (tma_contested_accesses + tma_data_sharing) / (tma_contested_accesses + tma_data_sharing + tma_l3_hit_latency + tma_sq_full) + tma_store_bound / (tma_dram_bound + tma_l1_bound + tma_l2_bound + tma_l3_bound + tma_store_bound) * tma_false_sharing / (tma_dtlb_store + tma_false_sharing + tma_split_stores + tma_store_latency - tma_store_latency)) + tma_machine_clears * (1 - tma_other_nukes / tma_other_nukes))",
+        "MetricGroup": "Mem;Offcore;tma_issueTLB",
+        "MetricName": "tma_info_bottleneck_memory_synchronization",
+        "MetricThreshold": "tma_info_bottleneck_memory_synchronization > 10",
+        "PublicDescription": "Total pipeline cost of Memory Synchronization related bottlenecks (data transfers and coherency updates across processors). Related metrics: tma_dtlb_load, tma_dtlb_store, tma_info_bottleneck_memory_data_tlbs"
     },
     {
         "BriefDescription": "Total pipeline cost of Branch Misprediction related bottlenecks",
         "MetricConstraint": "NO_GROUP_EVENTS",
-        "MetricExpr": "100 * (tma_branch_mispredicts + tma_fetch_latency * tma_mispredicts_resteers / (tma_branch_resteers + tma_dsb_switches + tma_icache_misses + tma_itlb_misses + tma_lcp + tma_ms_switches))",
+        "MetricExpr": "100 * (1 - 10 * tma_microcode_sequencer * tma_other_mispredicts / tma_branch_mispredicts) * (tma_branch_mispredicts + tma_fetch_latency * tma_mispredicts_resteers / (tma_branch_resteers + tma_dsb_switches + tma_icache_misses + tma_itlb_misses + tma_lcp + tma_ms_switches))",
         "MetricGroup": "Bad;BadSpec;BrMispredicts;tma_issueBM",
         "MetricName": "tma_info_bottleneck_mispredictions",
         "MetricThreshold": "tma_info_bottleneck_mispredictions > 20",
         "PublicDescription": "Total pipeline cost of Branch Misprediction related bottlenecks. Related metrics: tma_branch_mispredicts, tma_info_bad_spec_branch_misprediction_cost, tma_mispredicts_resteers"
     },
+    {
+        "BriefDescription": "Total pipeline cost of remaining bottlenecks (apart from those listed in the Info.Bottlenecks metrics class)",
+        "MetricExpr": "100 - (tma_info_bottleneck_big_code + tma_info_bottleneck_instruction_fetch_bw + tma_info_bottleneck_mispredictions + tma_info_bottleneck_cache_memory_bandwidth + tma_info_bottleneck_cache_memory_latency + tma_info_bottleneck_memory_data_tlbs + tma_info_bottleneck_memory_synchronization + tma_info_bottleneck_compute_bound_est + tma_info_bottleneck_irregular_overhead + tma_info_bottleneck_branching_overhead + tma_info_bottleneck_base_non_br)",
+        "MetricGroup": "Cor;Offcore",
+        "MetricName": "tma_info_bottleneck_other_bottlenecks",
+        "MetricThreshold": "tma_info_bottleneck_other_bottlenecks > 20",
+        "PublicDescription": "Total pipeline cost of remaining bottlenecks (apart from those listed in the Info.Bottlenecks metrics class). Examples include data-dependencies (Core Bound when Low ILP) and other unlisted memory-related stalls."
+    },
     {
         "BriefDescription": "Fraction of branches that are CALL or RET",
         "MetricExpr": "(BR_INST_RETIRED.NEAR_CALL + BR_INST_RETIRED.NEAR_RETURN) / BR_INST_RETIRED.ALL_BRANCHES",
     {
         "BriefDescription": "Fraction of branches that are unconditional (direct or indirect) jumps",
         "MetricConstraint": "NO_GROUP_EVENTS",
-        "MetricExpr": "(BR_INST_RETIRED.NEAR_TAKEN - (BR_INST_RETIRED.CONDITIONAL - BR_INST_RETIRED.NOT_TAKEN) - 2 * BR_INST_RETIRED.NEAR_CALL) / BR_INST_RETIRED.ALL_BRANCHES",
+        "MetricExpr": "(BR_INST_RETIRED.NEAR_TAKEN - (BR_INST_RETIRED.COND - BR_INST_RETIRED.NOT_TAKEN) - 2 * BR_INST_RETIRED.NEAR_CALL) / BR_INST_RETIRED.ALL_BRANCHES",
         "MetricGroup": "Bad;Branches",
         "MetricName": "tma_info_branches_jump"
     },
         "MetricGroup": "Ret;SMT;TmaL1;tma_L1_group",
         "MetricName": "tma_info_core_coreipc"
     },
+    {
+        "BriefDescription": "uops Executed per Cycle",
+        "MetricExpr": "UOPS_EXECUTED.THREAD / tma_info_thread_clks",
+        "MetricGroup": "Power",
+        "MetricName": "tma_info_core_epc"
+    },
     {
         "BriefDescription": "Floating Point Operations Per Cycle",
         "MetricConstraint": "NO_GROUP_EVENTS",
-        "MetricExpr": "(FP_ARITH_INST_RETIRED.SCALAR_SINGLE + FP_ARITH_INST_RETIRED.SCALAR_DOUBLE + 2 * FP_ARITH_INST_RETIRED.128B_PACKED_DOUBLE + 4 * (FP_ARITH_INST_RETIRED.128B_PACKED_SINGLE + FP_ARITH_INST_RETIRED.256B_PACKED_DOUBLE) + 8 * (FP_ARITH_INST_RETIRED.256B_PACKED_SINGLE + FP_ARITH_INST_RETIRED.512B_PACKED_DOUBLE) + 16 * FP_ARITH_INST_RETIRED.512B_PACKED_SINGLE) / tma_info_core_core_clks",
+        "MetricExpr": "(FP_ARITH_INST_RETIRED.SCALAR + 2 * FP_ARITH_INST_RETIRED.128B_PACKED_DOUBLE + 4 * FP_ARITH_INST_RETIRED.4_FLOPS + 8 * FP_ARITH_INST_RETIRED.8_FLOPS + 16 * FP_ARITH_INST_RETIRED.512B_PACKED_SINGLE) / tma_info_core_core_clks",
         "MetricGroup": "Flops;Ret",
         "MetricName": "tma_info_core_flopc"
     },
         "PublicDescription": "Actual per-core usage of the Floating Point non-X87 execution units (regardless of precision or vector-width). Values > 1 are possible due to ([BDW+] Fused-Multiply Add (FMA) counting - common; [ADL+] use all of ADD/MUL/FMA in Scalar or 128/256-bit vectors - less common)."
     },
     {
-        "BriefDescription": "Instruction-Level-Parallelism (average number of uops executed when there is execution) per-core",
-        "MetricExpr": "UOPS_EXECUTED.THREAD / (UOPS_EXECUTED.CORE_CYCLES_GE_1 / 2 if #SMT_on else UOPS_EXECUTED.CORE_CYCLES_GE_1)",
+        "BriefDescription": "Instruction-Level-Parallelism (average number of uops executed when there is execution) per thread (logical-processor)",
+        "MetricExpr": "UOPS_EXECUTED.THREAD / cpu@UOPS_EXECUTED.THREAD\\,cmask\\=1@",
         "MetricGroup": "Backend;Cor;Pipeline;PortsUtil",
         "MetricName": "tma_info_core_ilp"
     },
-    {
-        "BriefDescription": "Number of Instructions per non-speculative Branch Misprediction (JEClear)",
-        "MetricExpr": "INST_RETIRED.ANY / BR_MISP_RETIRED.ALL_BRANCHES",
-        "MetricGroup": "Bad;BadSpec;BrMispredicts;TopdownL1;tma_L1_group",
-        "MetricName": "tma_info_core_ipmispredict",
-        "MetricgroupNoGroup": "TopdownL1"
-    },
     {
         "BriefDescription": "Fraction of Uops delivered by the DSB (aka Decoded ICache; or Uop Cache)",
         "MetricExpr": "IDQ.DSB_UOPS / (IDQ.DSB_UOPS + IDQ.MITE_UOPS + IDQ.MS_UOPS)",
         "MetricGroup": "Flops;InsType",
         "MetricName": "tma_info_inst_mix_iparith",
         "MetricThreshold": "tma_info_inst_mix_iparith < 10",
-        "PublicDescription": "Instructions per FP Arithmetic instruction (lower number means higher occurrence rate). May undercount due to FMA double counting. Approximated prior to BDW."
+        "PublicDescription": "Instructions per FP Arithmetic instruction (lower number means higher occurrence rate). Values < 1 are possible due to intentional FMA double counting. Approximated prior to BDW."
     },
     {
         "BriefDescription": "Instructions per FP Arithmetic AVX/SSE 128-bit instruction (lower number means higher occurrence rate)",
         "MetricGroup": "Flops;FpVector;InsType",
         "MetricName": "tma_info_inst_mix_iparith_avx128",
         "MetricThreshold": "tma_info_inst_mix_iparith_avx128 < 10",
-        "PublicDescription": "Instructions per FP Arithmetic AVX/SSE 128-bit instruction (lower number means higher occurrence rate). May undercount due to FMA double counting."
+        "PublicDescription": "Instructions per FP Arithmetic AVX/SSE 128-bit instruction (lower number means higher occurrence rate). Values < 1 are possible due to intentional FMA double counting."
     },
     {
         "BriefDescription": "Instructions per FP Arithmetic AVX* 256-bit instruction (lower number means higher occurrence rate)",
         "MetricGroup": "Flops;FpVector;InsType",
         "MetricName": "tma_info_inst_mix_iparith_avx256",
         "MetricThreshold": "tma_info_inst_mix_iparith_avx256 < 10",
-        "PublicDescription": "Instructions per FP Arithmetic AVX* 256-bit instruction (lower number means higher occurrence rate). May undercount due to FMA double counting."
+        "PublicDescription": "Instructions per FP Arithmetic AVX* 256-bit instruction (lower number means higher occurrence rate). Values < 1 are possible due to intentional FMA double counting."
     },
     {
         "BriefDescription": "Instructions per FP Arithmetic AVX 512-bit instruction (lower number means higher occurrence rate)",
         "MetricGroup": "Flops;FpVector;InsType",
         "MetricName": "tma_info_inst_mix_iparith_avx512",
         "MetricThreshold": "tma_info_inst_mix_iparith_avx512 < 10",
-        "PublicDescription": "Instructions per FP Arithmetic AVX 512-bit instruction (lower number means higher occurrence rate). May undercount due to FMA double counting."
+        "PublicDescription": "Instructions per FP Arithmetic AVX 512-bit instruction (lower number means higher occurrence rate). Values < 1 are possible due to intentional FMA double counting."
     },
     {
         "BriefDescription": "Instructions per FP Arithmetic Scalar Double-Precision instruction (lower number means higher occurrence rate)",
         "MetricGroup": "Flops;FpScalar;InsType",
         "MetricName": "tma_info_inst_mix_iparith_scalar_dp",
         "MetricThreshold": "tma_info_inst_mix_iparith_scalar_dp < 10",
-        "PublicDescription": "Instructions per FP Arithmetic Scalar Double-Precision instruction (lower number means higher occurrence rate). May undercount due to FMA double counting."
+        "PublicDescription": "Instructions per FP Arithmetic Scalar Double-Precision instruction (lower number means higher occurrence rate). Values < 1 are possible due to intentional FMA double counting."
     },
     {
         "BriefDescription": "Instructions per FP Arithmetic Scalar Single-Precision instruction (lower number means higher occurrence rate)",
         "MetricGroup": "Flops;FpScalar;InsType",
         "MetricName": "tma_info_inst_mix_iparith_scalar_sp",
         "MetricThreshold": "tma_info_inst_mix_iparith_scalar_sp < 10",
-        "PublicDescription": "Instructions per FP Arithmetic Scalar Single-Precision instruction (lower number means higher occurrence rate). May undercount due to FMA double counting."
+        "PublicDescription": "Instructions per FP Arithmetic Scalar Single-Precision instruction (lower number means higher occurrence rate). Values < 1 are possible due to intentional FMA double counting."
     },
     {
         "BriefDescription": "Instructions per Branch (lower number means higher occurrence rate)",
     {
         "BriefDescription": "Instructions per Floating Point (FP) Operation (lower number means higher occurrence rate)",
         "MetricConstraint": "NO_GROUP_EVENTS",
-        "MetricExpr": "INST_RETIRED.ANY / (FP_ARITH_INST_RETIRED.SCALAR_SINGLE + FP_ARITH_INST_RETIRED.SCALAR_DOUBLE + 2 * FP_ARITH_INST_RETIRED.128B_PACKED_DOUBLE + 4 * (FP_ARITH_INST_RETIRED.128B_PACKED_SINGLE + FP_ARITH_INST_RETIRED.256B_PACKED_DOUBLE) + 8 * (FP_ARITH_INST_RETIRED.256B_PACKED_SINGLE + FP_ARITH_INST_RETIRED.512B_PACKED_DOUBLE) + 16 * FP_ARITH_INST_RETIRED.512B_PACKED_SINGLE)",
+        "MetricExpr": "INST_RETIRED.ANY / (FP_ARITH_INST_RETIRED.SCALAR + 2 * FP_ARITH_INST_RETIRED.128B_PACKED_DOUBLE + 4 * FP_ARITH_INST_RETIRED.4_FLOPS + 8 * FP_ARITH_INST_RETIRED.8_FLOPS + 16 * FP_ARITH_INST_RETIRED.512B_PACKED_SINGLE)",
         "MetricGroup": "Flops;InsType",
         "MetricName": "tma_info_inst_mix_ipflop",
         "MetricThreshold": "tma_info_inst_mix_ipflop < 10"
         "MetricThreshold": "tma_info_inst_mix_iptb < 9",
         "PublicDescription": "Instruction per taken branch. Related metrics: tma_dsb_switches, tma_fetch_bandwidth, tma_info_botlnk_l2_dsb_misses, tma_info_frontend_dsb_coverage, tma_lcp"
     },
+    {
+        "BriefDescription": "STLB (2nd level TLB) code speculative misses per kilo instruction (misses of any page-size that complete the page walk)",
+        "MetricExpr": "tma_info_memory_tlb_code_stlb_mpki",
+        "MetricGroup": "Fed;MemoryTLB;TopdownL1;tma_L1_group",
+        "MetricName": "tma_info_memory_code_stlb_mpki",
+        "MetricgroupNoGroup": "TopdownL1"
+    },
     {
         "BriefDescription": "Average per-core data fill bandwidth to the L1 data cache [GB / sec]",
-        "MetricExpr": "64 * L1D.REPLACEMENT / 1e9 / duration_time",
+        "MetricExpr": "tma_info_memory_l1d_cache_fill_bw",
         "MetricGroup": "Mem;MemoryBW",
-        "MetricName": "tma_info_memory_core_l1d_cache_fill_bw"
+        "MetricName": "tma_info_memory_core_l1d_cache_fill_bw_2t"
     },
     {
         "BriefDescription": "Average per-core data fill bandwidth to the L2 cache [GB / sec]",
-        "MetricExpr": "64 * L2_LINES_IN.ALL / 1e9 / duration_time",
+        "MetricExpr": "tma_info_memory_l2_cache_fill_bw",
         "MetricGroup": "Mem;MemoryBW",
-        "MetricName": "tma_info_memory_core_l2_cache_fill_bw"
+        "MetricName": "tma_info_memory_core_l2_cache_fill_bw_2t"
     },
     {
         "BriefDescription": "Rate of non silent evictions from the L2 cache per Kilo instruction",
     },
     {
         "BriefDescription": "Average per-core data access bandwidth to the L3 cache [GB / sec]",
-        "MetricExpr": "64 * OFFCORE_REQUESTS.ALL_REQUESTS / 1e9 / duration_time",
+        "MetricExpr": "tma_info_memory_l3_cache_access_bw",
         "MetricGroup": "Mem;MemoryBW;Offcore",
-        "MetricName": "tma_info_memory_core_l3_cache_access_bw"
+        "MetricName": "tma_info_memory_core_l3_cache_access_bw_2t"
     },
     {
         "BriefDescription": "Average per-core data fill bandwidth to the L3 cache [GB / sec]",
-        "MetricExpr": "64 * LONGEST_LAT_CACHE.MISS / 1e9 / duration_time",
+        "MetricExpr": "tma_info_memory_l3_cache_fill_bw",
         "MetricGroup": "Mem;MemoryBW",
-        "MetricName": "tma_info_memory_core_l3_cache_fill_bw"
+        "MetricName": "tma_info_memory_core_l3_cache_fill_bw_2t"
+    },
+    {
+        "BriefDescription": "Average Parallel L2 cache miss data reads",
+        "MetricExpr": "tma_info_memory_latency_data_l2_mlp",
+        "MetricGroup": "Memory_BW;Offcore;TopdownL1;tma_L1_group",
+        "MetricName": "tma_info_memory_data_l2_mlp",
+        "MetricgroupNoGroup": "TopdownL1"
     },
     {
         "BriefDescription": "Fill Buffer (FB) hits per kilo instructions for retired demand loads (L1D misses that merge into ongoing miss-handling entries)",
         "MetricExpr": "1e3 * MEM_LOAD_RETIRED.FB_HIT / INST_RETIRED.ANY",
-        "MetricGroup": "CacheMisses;Mem",
+        "MetricGroup": "CacheHits;Mem",
         "MetricName": "tma_info_memory_fb_hpki"
     },
+    {
+        "BriefDescription": "",
+        "MetricExpr": "64 * L1D.REPLACEMENT / 1e9 / duration_time",
+        "MetricGroup": "Mem;MemoryBW",
+        "MetricName": "tma_info_memory_l1d_cache_fill_bw"
+    },
+    {
+        "BriefDescription": "Average per-core data fill bandwidth to the L1 data cache [GB / sec]",
+        "MetricExpr": "64 * L1D.REPLACEMENT / 1e9 / (duration_time * 1e3 / 1e3)",
+        "MetricGroup": "Mem;MemoryBW;TopdownL1;tma_L1_group",
+        "MetricName": "tma_info_memory_l1d_cache_fill_bw_2t",
+        "MetricgroupNoGroup": "TopdownL1"
+    },
     {
         "BriefDescription": "L1 cache true misses per kilo instruction for retired demand loads",
         "MetricExpr": "1e3 * MEM_LOAD_RETIRED.L1_MISS / INST_RETIRED.ANY",
-        "MetricGroup": "CacheMisses;Mem",
+        "MetricGroup": "CacheHits;Mem",
         "MetricName": "tma_info_memory_l1mpki"
     },
     {
         "BriefDescription": "L1 cache true misses per kilo instruction for all demand loads (including speculative)",
         "MetricExpr": "1e3 * L2_RQSTS.ALL_DEMAND_DATA_RD / INST_RETIRED.ANY",
-        "MetricGroup": "CacheMisses;Mem",
+        "MetricGroup": "CacheHits;Mem",
         "MetricName": "tma_info_memory_l1mpki_load"
     },
+    {
+        "BriefDescription": "",
+        "MetricExpr": "64 * L2_LINES_IN.ALL / 1e9 / duration_time",
+        "MetricGroup": "Mem;MemoryBW",
+        "MetricName": "tma_info_memory_l2_cache_fill_bw"
+    },
+    {
+        "BriefDescription": "Average per-core data fill bandwidth to the L2 cache [GB / sec]",
+        "MetricExpr": "64 * L2_LINES_IN.ALL / 1e9 / (duration_time * 1e3 / 1e3)",
+        "MetricGroup": "Mem;MemoryBW;TopdownL1;tma_L1_group",
+        "MetricName": "tma_info_memory_l2_cache_fill_bw_2t",
+        "MetricgroupNoGroup": "TopdownL1"
+    },
+    {
+        "BriefDescription": "Rate of non silent evictions from the L2 cache per Kilo instruction",
+        "MetricExpr": "1e3 * L2_LINES_OUT.NON_SILENT / INST_RETIRED.ANY",
+        "MetricGroup": "L2Evicts;Mem;Server;TopdownL1;tma_L1_group",
+        "MetricName": "tma_info_memory_l2_evictions_nonsilent_pki",
+        "MetricgroupNoGroup": "TopdownL1"
+    },
+    {
+        "BriefDescription": "Rate of silent evictions from the L2 cache per Kilo instruction where the evicted lines are dropped (no writeback to L3 or memory)",
+        "MetricExpr": "1e3 * L2_LINES_OUT.SILENT / INST_RETIRED.ANY",
+        "MetricGroup": "L2Evicts;Mem;Server;TopdownL1;tma_L1_group",
+        "MetricName": "tma_info_memory_l2_evictions_silent_pki",
+        "MetricgroupNoGroup": "TopdownL1"
+    },
     {
         "BriefDescription": "L2 cache hits per kilo instruction for all request types (including speculative)",
         "MetricExpr": "1e3 * (L2_RQSTS.REFERENCES - L2_RQSTS.MISS) / INST_RETIRED.ANY",
-        "MetricGroup": "CacheMisses;Mem",
+        "MetricGroup": "CacheHits;Mem",
         "MetricName": "tma_info_memory_l2hpki_all"
     },
     {
         "BriefDescription": "L2 cache hits per kilo instruction for all demand loads  (including speculative)",
         "MetricExpr": "1e3 * L2_RQSTS.DEMAND_DATA_RD_HIT / INST_RETIRED.ANY",
-        "MetricGroup": "CacheMisses;Mem",
+        "MetricGroup": "CacheHits;Mem",
         "MetricName": "tma_info_memory_l2hpki_load"
     },
     {
         "BriefDescription": "L2 cache true misses per kilo instruction for retired demand loads",
         "MetricExpr": "1e3 * MEM_LOAD_RETIRED.L2_MISS / INST_RETIRED.ANY",
-        "MetricGroup": "Backend;CacheMisses;Mem",
+        "MetricGroup": "Backend;CacheHits;Mem",
         "MetricName": "tma_info_memory_l2mpki"
     },
     {
         "BriefDescription": "L2 cache ([RKL+] true) misses per kilo instruction for all request types (including speculative)",
         "MetricExpr": "1e3 * L2_RQSTS.MISS / INST_RETIRED.ANY",
-        "MetricGroup": "CacheMisses;Mem;Offcore",
+        "MetricGroup": "CacheHits;Mem;Offcore",
         "MetricName": "tma_info_memory_l2mpki_all"
     },
     {
         "BriefDescription": "L2 cache ([RKL+] true) misses per kilo instruction for all demand loads  (including speculative)",
         "MetricExpr": "1e3 * L2_RQSTS.DEMAND_DATA_RD_MISS / INST_RETIRED.ANY",
-        "MetricGroup": "CacheMisses;Mem",
+        "MetricGroup": "CacheHits;Mem",
         "MetricName": "tma_info_memory_l2mpki_load"
     },
+    {
+        "BriefDescription": "",
+        "MetricExpr": "64 * OFFCORE_REQUESTS.ALL_REQUESTS / 1e9 / duration_time",
+        "MetricGroup": "Mem;MemoryBW;Offcore",
+        "MetricName": "tma_info_memory_l3_cache_access_bw"
+    },
+    {
+        "BriefDescription": "Average per-core data access bandwidth to the L3 cache [GB / sec]",
+        "MetricExpr": "64 * OFFCORE_REQUESTS.ALL_REQUESTS / 1e9 / (duration_time * 1e3 / 1e3)",
+        "MetricGroup": "Mem;MemoryBW;Offcore;TopdownL1;tma_L1_group",
+        "MetricName": "tma_info_memory_l3_cache_access_bw_2t",
+        "MetricgroupNoGroup": "TopdownL1"
+    },
+    {
+        "BriefDescription": "",
+        "MetricExpr": "64 * LONGEST_LAT_CACHE.MISS / 1e9 / duration_time",
+        "MetricGroup": "Mem;MemoryBW",
+        "MetricName": "tma_info_memory_l3_cache_fill_bw"
+    },
+    {
+        "BriefDescription": "Average per-core data fill bandwidth to the L3 cache [GB / sec]",
+        "MetricExpr": "64 * LONGEST_LAT_CACHE.MISS / 1e9 / (duration_time * 1e3 / 1e3)",
+        "MetricGroup": "Mem;MemoryBW;TopdownL1;tma_L1_group",
+        "MetricName": "tma_info_memory_l3_cache_fill_bw_2t",
+        "MetricgroupNoGroup": "TopdownL1"
+    },
     {
         "BriefDescription": "L3 cache true misses per kilo instruction for retired demand loads",
         "MetricExpr": "1e3 * MEM_LOAD_RETIRED.L3_MISS / INST_RETIRED.ANY",
-        "MetricGroup": "CacheMisses;Mem",
+        "MetricGroup": "Mem",
         "MetricName": "tma_info_memory_l3mpki"
     },
     {
-        "BriefDescription": "Actual Average Latency for L1 data-cache miss demand load operations (in core cycles)",
-        "MetricExpr": "L1D_PEND_MISS.PENDING / (MEM_LOAD_RETIRED.L1_MISS + MEM_LOAD_RETIRED.FB_HIT)",
-        "MetricGroup": "Mem;MemoryBound;MemoryLat",
-        "MetricName": "tma_info_memory_load_miss_real_latency"
+        "BriefDescription": "Average Parallel L2 cache miss data reads",
+        "MetricExpr": "OFFCORE_REQUESTS_OUTSTANDING.ALL_DATA_RD / OFFCORE_REQUESTS_OUTSTANDING.CYCLES_WITH_DATA_RD",
+        "MetricGroup": "Memory_BW;Offcore",
+        "MetricName": "tma_info_memory_latency_data_l2_mlp"
     },
     {
-        "BriefDescription": "Memory-Level-Parallelism (average number of L1 miss demand load when there is at least one such miss",
-        "MetricExpr": "L1D_PEND_MISS.PENDING / L1D_PEND_MISS.PENDING_CYCLES",
-        "MetricGroup": "Mem;MemoryBW;MemoryBound",
-        "MetricName": "tma_info_memory_mlp",
-        "PublicDescription": "Memory-Level-Parallelism (average number of L1 miss demand load when there is at least one such miss. Per-Logical Processor)"
+        "BriefDescription": "Average Latency for L2 cache miss demand Loads",
+        "MetricExpr": "tma_info_memory_load_l2_miss_latency",
+        "MetricGroup": "Memory_Lat;Offcore",
+        "MetricName": "tma_info_memory_latency_load_l2_miss_latency"
     },
     {
-        "BriefDescription": "Average Parallel L2 cache miss data reads",
-        "MetricExpr": "OFFCORE_REQUESTS_OUTSTANDING.ALL_DATA_RD / OFFCORE_REQUESTS_OUTSTANDING.CYCLES_WITH_DATA_RD",
+        "BriefDescription": "Average Parallel L2 cache miss demand Loads",
+        "MetricExpr": "tma_info_memory_load_l2_mlp",
         "MetricGroup": "Memory_BW;Offcore",
-        "MetricName": "tma_info_memory_oro_data_l2_mlp"
+        "MetricName": "tma_info_memory_latency_load_l2_mlp"
     },
     {
         "BriefDescription": "Average Latency for L2 cache miss demand Loads",
         "MetricExpr": "OFFCORE_REQUESTS_OUTSTANDING.DEMAND_DATA_RD / OFFCORE_REQUESTS.DEMAND_DATA_RD",
-        "MetricGroup": "Memory_Lat;Offcore",
-        "MetricName": "tma_info_memory_oro_load_l2_miss_latency"
+        "MetricGroup": "Memory_Lat;Offcore;TopdownL1;tma_L1_group",
+        "MetricName": "tma_info_memory_load_l2_miss_latency",
+        "MetricgroupNoGroup": "TopdownL1"
     },
     {
         "BriefDescription": "Average Parallel L2 cache miss demand Loads",
         "MetricExpr": "OFFCORE_REQUESTS_OUTSTANDING.DEMAND_DATA_RD / OFFCORE_REQUESTS_OUTSTANDING.CYCLES_WITH_DEMAND_DATA_RD",
-        "MetricGroup": "Memory_BW;Offcore",
-        "MetricName": "tma_info_memory_oro_load_l2_mlp"
+        "MetricGroup": "Memory_BW;Offcore;TopdownL1;tma_L1_group",
+        "MetricName": "tma_info_memory_load_l2_mlp",
+        "MetricgroupNoGroup": "TopdownL1"
     },
     {
-        "BriefDescription": "Average per-thread data fill bandwidth to the L1 data cache [GB / sec]",
-        "MetricExpr": "tma_info_memory_core_l1d_cache_fill_bw",
-        "MetricGroup": "Mem;MemoryBW",
-        "MetricName": "tma_info_memory_thread_l1d_cache_fill_bw_1t"
+        "BriefDescription": "Actual Average Latency for L1 data-cache miss demand load operations (in core cycles)",
+        "MetricExpr": "L1D_PEND_MISS.PENDING / (MEM_LOAD_RETIRED.L1_MISS + MEM_LOAD_RETIRED.FB_HIT)",
+        "MetricGroup": "Mem;MemoryBound;MemoryLat",
+        "MetricName": "tma_info_memory_load_miss_real_latency"
     },
     {
-        "BriefDescription": "Average per-thread data fill bandwidth to the L2 cache [GB / sec]",
-        "MetricExpr": "tma_info_memory_core_l2_cache_fill_bw",
-        "MetricGroup": "Mem;MemoryBW",
-        "MetricName": "tma_info_memory_thread_l2_cache_fill_bw_1t"
+        "BriefDescription": "STLB (2nd level TLB) data load speculative misses per kilo instruction (misses of any page-size that complete the page walk)",
+        "MetricExpr": "tma_info_memory_tlb_load_stlb_mpki",
+        "MetricGroup": "Mem;MemoryTLB;TopdownL1;tma_L1_group",
+        "MetricName": "tma_info_memory_load_stlb_mpki",
+        "MetricgroupNoGroup": "TopdownL1"
     },
     {
-        "BriefDescription": "Average per-thread data access bandwidth to the L3 cache [GB / sec]",
-        "MetricExpr": "tma_info_memory_core_l3_cache_access_bw",
-        "MetricGroup": "Mem;MemoryBW;Offcore",
-        "MetricName": "tma_info_memory_thread_l3_cache_access_bw_1t"
+        "BriefDescription": "Un-cacheable retired load per kilo instruction",
+        "MetricExpr": "tma_info_memory_uc_load_pki",
+        "MetricGroup": "Mem",
+        "MetricName": "tma_info_memory_mix_uc_load_pki"
     },
     {
-        "BriefDescription": "Average per-thread data fill bandwidth to the L3 cache [GB / sec]",
-        "MetricExpr": "tma_info_memory_core_l3_cache_fill_bw",
-        "MetricGroup": "Mem;MemoryBW",
-        "MetricName": "tma_info_memory_thread_l3_cache_fill_bw_1t"
+        "BriefDescription": "Memory-Level-Parallelism (average number of L1 miss demand load when there is at least one such miss",
+        "MetricExpr": "L1D_PEND_MISS.PENDING / L1D_PEND_MISS.PENDING_CYCLES",
+        "MetricGroup": "Mem;MemoryBW;MemoryBound",
+        "MetricName": "tma_info_memory_mlp",
+        "PublicDescription": "Memory-Level-Parallelism (average number of L1 miss demand load when there is at least one such miss. Per-Logical Processor)"
+    },
+    {
+        "BriefDescription": "Utilization of the core's Page Walker(s) serving STLB misses triggered by instruction/Load/Store accesses",
+        "MetricExpr": "tma_info_memory_tlb_page_walks_utilization",
+        "MetricGroup": "Mem;MemoryTLB;TopdownL1;tma_L1_group",
+        "MetricName": "tma_info_memory_page_walks_utilization",
+        "MetricgroupNoGroup": "TopdownL1"
+    },
+    {
+        "BriefDescription": "STLB (2nd level TLB) data store speculative misses per kilo instruction (misses of any page-size that complete the page walk)",
+        "MetricExpr": "tma_info_memory_tlb_store_stlb_mpki",
+        "MetricGroup": "Mem;MemoryTLB;TopdownL1;tma_L1_group",
+        "MetricName": "tma_info_memory_store_stlb_mpki",
+        "MetricgroupNoGroup": "TopdownL1"
     },
     {
         "BriefDescription": "STLB (2nd level TLB) code speculative misses per kilo instruction (misses of any page-size that complete the page walk)",
         "MetricName": "tma_info_memory_tlb_store_stlb_mpki"
     },
     {
-        "BriefDescription": "Instruction-Level-Parallelism (average number of uops executed when there is execution) per-thread",
-        "MetricExpr": "UOPS_EXECUTED.THREAD / cpu@UOPS_EXECUTED.THREAD\\,cmask\\=1@",
+        "BriefDescription": "Un-cacheable retired load per kilo instruction",
+        "MetricExpr": "1e3 * MEM_LOAD_MISC_RETIRED.UC / INST_RETIRED.ANY",
+        "MetricGroup": "Mem;TopdownL1;tma_L1_group",
+        "MetricName": "tma_info_memory_uc_load_pki",
+        "MetricgroupNoGroup": "TopdownL1"
+    },
+    {
+        "BriefDescription": "",
+        "MetricExpr": "UOPS_EXECUTED.THREAD / (UOPS_EXECUTED.CORE_CYCLES_GE_1 / 2 if #SMT_on else cpu@UOPS_EXECUTED.THREAD\\,cmask\\=1@)",
         "MetricGroup": "Cor;Pipeline;PortsUtil;SMT",
         "MetricName": "tma_info_pipeline_execute"
     },
+    {
+        "BriefDescription": "Instructions per a microcode Assist invocation",
+        "MetricExpr": "INST_RETIRED.ANY / (FP_ASSIST.ANY + OTHER_ASSISTS.ANY)",
+        "MetricGroup": "MicroSeq;Pipeline;Ret;Retire",
+        "MetricName": "tma_info_pipeline_ipassist",
+        "MetricThreshold": "tma_info_pipeline_ipassist < 100e3",
+        "PublicDescription": "Instructions per a microcode Assist invocation. See Assists tree node for details (lower number means higher occurrence rate)"
+    },
     {
         "BriefDescription": "Average number of Uops retired in cycles where at least one uop has retired.",
         "MetricExpr": "UOPS_RETIRED.RETIRE_SLOTS / cpu@UOPS_RETIRED.RETIRE_SLOTS\\,cmask\\=1@",
         "MetricName": "tma_info_pipeline_retire"
     },
     {
-        "BriefDescription": "Measured Average Frequency for unhalted processors [GHz]",
+        "BriefDescription": "Measured Average Core Frequency for unhalted processors [GHz]",
         "MetricExpr": "tma_info_system_turbo_utilization * TSC / 1e9 / duration_time",
         "MetricGroup": "Power;Summary",
-        "MetricName": "tma_info_system_average_frequency"
+        "MetricName": "tma_info_system_core_frequency"
     },
     {
-        "BriefDescription": "Average CPU Utilization",
+        "BriefDescription": "Average CPU Utilization (percentage)",
         "MetricExpr": "CPU_CLK_UNHALTED.REF_TSC / TSC",
         "MetricGroup": "HPC;Summary",
         "MetricName": "tma_info_system_cpu_utilization"
     },
+    {
+        "BriefDescription": "Average number of utilized CPUs",
+        "MetricExpr": "#num_cpus_online * tma_info_system_cpu_utilization",
+        "MetricGroup": "Summary",
+        "MetricName": "tma_info_system_cpus_utilized"
+    },
     {
         "BriefDescription": "Average external Memory Bandwidth Use for reads and writes [GB / sec]",
         "MetricExpr": "64 * (UNC_M_CAS_COUNT.RD + UNC_M_CAS_COUNT.WR) / 1e9 / duration_time",
-        "MetricGroup": "HPC;Mem;MemoryBW;SoC;tma_issueBW",
+        "MetricGroup": "HPC;MemOffcore;MemoryBW;SoC;tma_issueBW",
         "MetricName": "tma_info_system_dram_bw_use",
-        "PublicDescription": "Average external Memory Bandwidth Use for reads and writes [GB / sec]. Related metrics: tma_fb_full, tma_info_bottleneck_memory_bandwidth, tma_mem_bandwidth, tma_sq_full"
+        "PublicDescription": "Average external Memory Bandwidth Use for reads and writes [GB / sec]. Related metrics: tma_fb_full, tma_info_bottleneck_cache_memory_bandwidth, tma_mem_bandwidth, tma_sq_full"
     },
     {
         "BriefDescription": "Giga Floating Point Operations Per Second",
         "MetricConstraint": "NO_GROUP_EVENTS",
-        "MetricExpr": "(FP_ARITH_INST_RETIRED.SCALAR_SINGLE + FP_ARITH_INST_RETIRED.SCALAR_DOUBLE + 2 * FP_ARITH_INST_RETIRED.128B_PACKED_DOUBLE + 4 * (FP_ARITH_INST_RETIRED.128B_PACKED_SINGLE + FP_ARITH_INST_RETIRED.256B_PACKED_DOUBLE) + 8 * (FP_ARITH_INST_RETIRED.256B_PACKED_SINGLE + FP_ARITH_INST_RETIRED.512B_PACKED_DOUBLE) + 16 * FP_ARITH_INST_RETIRED.512B_PACKED_SINGLE) / 1e9 / duration_time",
+        "MetricExpr": "(FP_ARITH_INST_RETIRED.SCALAR + 2 * FP_ARITH_INST_RETIRED.128B_PACKED_DOUBLE + 4 * FP_ARITH_INST_RETIRED.4_FLOPS + 8 * FP_ARITH_INST_RETIRED.8_FLOPS + 16 * FP_ARITH_INST_RETIRED.512B_PACKED_SINGLE) / 1e9 / duration_time",
         "MetricGroup": "Cor;Flops;HPC",
         "MetricName": "tma_info_system_gflops",
-        "PublicDescription": "Giga Floating Point Operations Per Second. Aggregate across all supported options of: FP precisions, scalar and vector instructions, vector-width and AMX engine."
+        "PublicDescription": "Giga Floating Point Operations Per Second. Aggregate across all supported options of: FP precisions, scalar and vector instructions, vector-width"
     },
     {
         "BriefDescription": "Average IO (network or disk) Bandwidth Use for Reads [GB / sec]",
         "MetricExpr": "(UNC_IIO_DATA_REQ_OF_CPU.MEM_WRITE.PART0 + UNC_IIO_DATA_REQ_OF_CPU.MEM_WRITE.PART1 + UNC_IIO_DATA_REQ_OF_CPU.MEM_WRITE.PART2 + UNC_IIO_DATA_REQ_OF_CPU.MEM_WRITE.PART3) * 4 / 1e9 / duration_time",
-        "MetricGroup": "IoBW;Mem;Server;SoC",
-        "MetricName": "tma_info_system_io_read_bw"
+        "MetricGroup": "IoBW;MemOffcore;Server;SoC",
+        "MetricName": "tma_info_system_io_read_bw",
+        "PublicDescription": "Average IO (network or disk) Bandwidth Use for Reads [GB / sec]. Bandwidth of IO reads that are initiated by end device controllers that are requesting memory from the CPU"
     },
     {
         "BriefDescription": "Average IO (network or disk) Bandwidth Use for Writes [GB / sec]",
         "MetricExpr": "(UNC_IIO_DATA_REQ_OF_CPU.MEM_READ.PART0 + UNC_IIO_DATA_REQ_OF_CPU.MEM_READ.PART1 + UNC_IIO_DATA_REQ_OF_CPU.MEM_READ.PART2 + UNC_IIO_DATA_REQ_OF_CPU.MEM_READ.PART3) * 4 / 1e9 / duration_time",
-        "MetricGroup": "IoBW;Mem;Server;SoC",
-        "MetricName": "tma_info_system_io_write_bw"
+        "MetricGroup": "IoBW;MemOffcore;Server;SoC",
+        "MetricName": "tma_info_system_io_write_bw",
+        "PublicDescription": "Average IO (network or disk) Bandwidth Use for Writes [GB / sec]. Bandwidth of IO writes that are initiated by end device controllers that are writing memory to the CPU"
     },
     {
         "BriefDescription": "Instructions per Far Branch ( Far Branches apply upon transition from application to operating system, handling interrupts, exceptions) [lower number means higher occurrence rate]",
     {
         "BriefDescription": "Average latency of data read request to external DRAM memory [in nanoseconds]",
         "MetricExpr": "1e9 * (UNC_M_RPQ_OCCUPANCY / UNC_M_RPQ_INSERTS) / imc_0@event\\=0x0@",
-        "MetricGroup": "Mem;MemoryLat;Server;SoC",
+        "MetricGroup": "MemOffcore;MemoryLat;Server;SoC",
         "MetricName": "tma_info_system_mem_dram_read_latency",
         "PublicDescription": "Average latency of data read request to external DRAM memory [in nanoseconds]. Accounts for demand loads and L1/L2 data-read prefetches"
     },
         "MetricGroup": "Power",
         "MetricName": "tma_info_system_turbo_utilization"
     },
+    {
+        "BriefDescription": "Measured Average Uncore Frequency for the SoC [GHz]",
+        "MetricExpr": "tma_info_system_socket_clks / 1e9 / duration_time",
+        "MetricGroup": "SoC",
+        "MetricName": "tma_info_system_uncore_frequency"
+    },
     {
         "BriefDescription": "Per-Logical Processor actual clocks when the Logical Processor is active.",
         "MetricExpr": "CPU_CLK_UNHALTED.THREAD",
     },
     {
         "BriefDescription": "This metric represents fraction of cycles the CPU was stalled due to Instruction TLB (ITLB) misses",
-        "MetricExpr": "ICACHE_64B.IFTAG_STALL / tma_info_thread_clks",
-        "MetricGroup": "BigFoot;FetchLat;MemoryTLB;TopdownL3;tma_L3_group;tma_fetch_latency_group",
+        "MetricExpr": "ICACHE_TAG.STALLS / tma_info_thread_clks",
+        "MetricGroup": "BigFootprint;FetchLat;MemoryTLB;TopdownL3;tma_L3_group;tma_fetch_latency_group",
         "MetricName": "tma_itlb_misses",
         "MetricThreshold": "tma_itlb_misses > 0.05 & (tma_fetch_latency > 0.1 & tma_frontend_bound > 0.15)",
         "PublicDescription": "This metric represents fraction of cycles the CPU was stalled due to Instruction TLB (ITLB) misses. Sample with: FRONTEND_RETIRED.STLB_MISS_PS;FRONTEND_RETIRED.ITLB_MISS_PS",
     {
         "BriefDescription": "This metric estimates how often the CPU was stalled without loads missing the L1 data cache",
         "MetricExpr": "max((CYCLE_ACTIVITY.STALLS_MEM_ANY - CYCLE_ACTIVITY.STALLS_L1D_MISS) / tma_info_thread_clks, 0)",
-        "MetricGroup": "CacheMisses;MemoryBound;TmaL3mem;TopdownL3;tma_L3_group;tma_issueL1;tma_issueMC;tma_memory_bound_group",
+        "MetricGroup": "CacheHits;MemoryBound;TmaL3mem;TopdownL3;tma_L3_group;tma_issueL1;tma_issueMC;tma_memory_bound_group",
         "MetricName": "tma_l1_bound",
         "MetricThreshold": "tma_l1_bound > 0.1 & (tma_memory_bound > 0.2 & tma_backend_bound > 0.2)",
         "PublicDescription": "This metric estimates how often the CPU was stalled without loads missing the L1 data cache.  The L1 data cache typically has the shortest latency.  However; in certain cases like loads blocked on older stores; a load might suffer due to high latency even though it is being satisfied by the L1. Another example is loads who miss in the TLB. These cases are characterized by execution unit stalls; while some non-completed demand load lives in the machine without having that demand load missing the L1 cache. Sample with: MEM_LOAD_RETIRED.L1_HIT_PS;MEM_LOAD_RETIRED.FB_HIT_PS. Related metrics: tma_clears_resteers, tma_machine_clears, tma_microcode_sequencer, tma_ms_switches, tma_ports_utilized_1",
         "BriefDescription": "This metric estimates how often the CPU was stalled due to L2 cache accesses by loads",
         "MetricConstraint": "NO_GROUP_EVENTS",
         "MetricExpr": "MEM_LOAD_RETIRED.L2_HIT * (1 + MEM_LOAD_RETIRED.FB_HIT / MEM_LOAD_RETIRED.L1_MISS) / (MEM_LOAD_RETIRED.L2_HIT * (1 + MEM_LOAD_RETIRED.FB_HIT / MEM_LOAD_RETIRED.L1_MISS) + cpu@L1D_PEND_MISS.FB_FULL\\,cmask\\=1@) * ((CYCLE_ACTIVITY.STALLS_L1D_MISS - CYCLE_ACTIVITY.STALLS_L2_MISS) / tma_info_thread_clks)",
-        "MetricGroup": "CacheMisses;MemoryBound;TmaL3mem;TopdownL3;tma_L3_group;tma_memory_bound_group",
+        "MetricGroup": "CacheHits;MemoryBound;TmaL3mem;TopdownL3;tma_L3_group;tma_memory_bound_group",
         "MetricName": "tma_l2_bound",
         "MetricThreshold": "tma_l2_bound > 0.05 & (tma_memory_bound > 0.2 & tma_backend_bound > 0.2)",
         "PublicDescription": "This metric estimates how often the CPU was stalled due to L2 cache accesses by loads.  Avoiding cache misses (i.e. L1 misses/L2 hits) can improve the latency and increase performance. Sample with: MEM_LOAD_RETIRED.L2_HIT_PS",
     {
         "BriefDescription": "This metric estimates how often the CPU was stalled due to loads accesses to L3 cache or contended with a sibling Core",
         "MetricExpr": "(CYCLE_ACTIVITY.STALLS_L2_MISS - CYCLE_ACTIVITY.STALLS_L3_MISS) / tma_info_thread_clks",
-        "MetricGroup": "CacheMisses;MemoryBound;TmaL3mem;TopdownL3;tma_L3_group;tma_memory_bound_group",
+        "MetricGroup": "CacheHits;MemoryBound;TmaL3mem;TopdownL3;tma_L3_group;tma_memory_bound_group",
         "MetricName": "tma_l3_bound",
         "MetricThreshold": "tma_l3_bound > 0.05 & (tma_memory_bound > 0.2 & tma_backend_bound > 0.2)",
         "PublicDescription": "This metric estimates how often the CPU was stalled due to loads accesses to L3 cache or contended with a sibling Core.  Avoiding cache misses (i.e. L2 misses/L3 hits) can improve the latency and increase performance. Sample with: MEM_LOAD_RETIRED.L3_HIT_PS",
         "ScaleUnit": "100%"
     },
     {
-        "BriefDescription": "This metric represents fraction of cycles with demand load accesses that hit the L3 cache under unloaded scenarios (possibly L3 latency limited)",
-        "MetricExpr": "17 * tma_info_system_average_frequency * MEM_LOAD_RETIRED.L3_HIT * (1 + MEM_LOAD_RETIRED.FB_HIT / MEM_LOAD_RETIRED.L1_MISS / 2) / tma_info_thread_clks",
+        "BriefDescription": "This metric estimates fraction of cycles with demand load accesses that hit the L3 cache under unloaded scenarios (possibly L3 latency limited)",
+        "MetricExpr": "17 * tma_info_system_core_frequency * (MEM_LOAD_RETIRED.L3_HIT * (1 + MEM_LOAD_RETIRED.FB_HIT / MEM_LOAD_RETIRED.L1_MISS / 2)) / tma_info_thread_clks",
         "MetricGroup": "MemoryLat;TopdownL4;tma_L4_group;tma_issueLat;tma_l3_bound_group",
         "MetricName": "tma_l3_hit_latency",
         "MetricThreshold": "tma_l3_hit_latency > 0.1 & (tma_l3_bound > 0.05 & (tma_memory_bound > 0.2 & tma_backend_bound > 0.2))",
-        "PublicDescription": "This metric represents fraction of cycles with demand load accesses that hit the L3 cache under unloaded scenarios (possibly L3 latency limited).  Avoiding private cache misses (i.e. L2 misses/L3 hits) will improve the latency; reduce contention with sibling physical cores and increase performance.  Note the value of this node may overlap with its siblings. Sample with: MEM_LOAD_RETIRED.L3_HIT_PS. Related metrics: tma_info_bottleneck_memory_latency, tma_mem_latency",
+        "PublicDescription": "This metric estimates fraction of cycles with demand load accesses that hit the L3 cache under unloaded scenarios (possibly L3 latency limited).  Avoiding private cache misses (i.e. L2 misses/L3 hits) will improve the latency; reduce contention with sibling physical cores and increase performance.  Note the value of this node may overlap with its siblings. Sample with: MEM_LOAD_RETIRED.L3_HIT_PS. Related metrics: tma_info_bottleneck_cache_memory_latency, tma_mem_latency",
         "ScaleUnit": "100%"
     },
     {
         "BriefDescription": "This metric represents fraction of cycles CPU was stalled due to Length Changing Prefixes (LCPs)",
-        "MetricExpr": "ILD_STALL.LCP / tma_info_thread_clks",
+        "MetricExpr": "DECODE.LCP / tma_info_thread_clks",
         "MetricGroup": "FetchLat;TopdownL3;tma_L3_group;tma_fetch_latency_group;tma_issueFB",
         "MetricName": "tma_lcp",
         "MetricThreshold": "tma_lcp > 0.05 & (tma_fetch_latency > 0.1 & tma_frontend_bound > 0.15)",
         "MetricName": "tma_light_operations",
         "MetricThreshold": "tma_light_operations > 0.6",
         "MetricgroupNoGroup": "TopdownL2",
-        "PublicDescription": "This metric represents fraction of slots where the CPU was retiring light-weight operations -- instructions that require no more than one uop (micro-operation). This correlates with total number of instructions used by the program. A uops-per-instruction (see UopPI metric) ratio of 1 or less should be expected for decently optimized software running on Intel Core/Xeon products. While this often indicates efficient X86 instructions were executed; high value does not necessarily mean better performance cannot be achieved. Sample with: INST_RETIRED.PREC_DIST",
+        "PublicDescription": "This metric represents fraction of slots where the CPU was retiring light-weight operations -- instructions that require no more than one uop (micro-operation). This correlates with total number of instructions used by the program. A uops-per-instruction (see UopPI metric) ratio of 1 or less should be expected for decently optimized code running on Intel Core/Xeon products. While this often indicates efficient X86 instructions were executed; high value does not necessarily mean better performance cannot be achieved. ([ICL+] Note this may undercount due to approximation using indirect events; [ADL+] .). Sample with: INST_RETIRED.PREC_DIST",
         "ScaleUnit": "100%"
     },
     {
     },
     {
         "BriefDescription": "This metric estimates fraction of cycles while the memory subsystem was handling loads from local memory",
-        "MetricExpr": "59.5 * tma_info_system_average_frequency * MEM_LOAD_L3_MISS_RETIRED.LOCAL_DRAM * (1 + MEM_LOAD_RETIRED.FB_HIT / MEM_LOAD_RETIRED.L1_MISS / 2) / tma_info_thread_clks",
+        "MetricExpr": "59.5 * tma_info_system_core_frequency * MEM_LOAD_L3_MISS_RETIRED.LOCAL_DRAM * (1 + MEM_LOAD_RETIRED.FB_HIT / MEM_LOAD_RETIRED.L1_MISS / 2) / tma_info_thread_clks",
         "MetricGroup": "Server;TopdownL5;tma_L5_group;tma_mem_latency_group",
-        "MetricName": "tma_local_dram",
-        "MetricThreshold": "tma_local_dram > 0.1 & (tma_mem_latency > 0.1 & (tma_dram_bound > 0.1 & (tma_memory_bound > 0.2 & tma_backend_bound > 0.2)))",
+        "MetricName": "tma_local_mem",
+        "MetricThreshold": "tma_local_mem > 0.1 & (tma_mem_latency > 0.1 & (tma_dram_bound > 0.1 & (tma_memory_bound > 0.2 & tma_backend_bound > 0.2)))",
         "PublicDescription": "This metric estimates fraction of cycles while the memory subsystem was handling loads from local memory. Caching will improve the latency and increase performance. Sample with: MEM_LOAD_L3_MISS_RETIRED.LOCAL_DRAM_PS",
         "ScaleUnit": "100%"
     },
         "ScaleUnit": "100%"
     },
     {
-        "BriefDescription": "This metric estimates fraction of cycles where the core's performance was likely hurt due to approaching bandwidth limits of external memory (DRAM)",
+        "BriefDescription": "This metric estimates fraction of cycles where the core's performance was likely hurt due to approaching bandwidth limits of external memory - DRAM ([SPR-HBM] and/or HBM)",
         "MetricExpr": "min(CPU_CLK_UNHALTED.THREAD, cpu@OFFCORE_REQUESTS_OUTSTANDING.ALL_DATA_RD\\,cmask\\=4@) / tma_info_thread_clks",
         "MetricGroup": "MemoryBW;Offcore;TopdownL4;tma_L4_group;tma_dram_bound_group;tma_issueBW",
         "MetricName": "tma_mem_bandwidth",
         "MetricThreshold": "tma_mem_bandwidth > 0.2 & (tma_dram_bound > 0.1 & (tma_memory_bound > 0.2 & tma_backend_bound > 0.2))",
-        "PublicDescription": "This metric estimates fraction of cycles where the core's performance was likely hurt due to approaching bandwidth limits of external memory (DRAM).  The underlying heuristic assumes that a similar off-core traffic is generated by all IA cores. This metric does not aggregate non-data-read requests by this logical processor; requests from other IA Logical Processors/Physical Cores/sockets; or other non-IA devices like GPU; hence the maximum external memory bandwidth limits may or may not be approached when this metric is flagged (see Uncore counters for that). Related metrics: tma_fb_full, tma_info_bottleneck_memory_bandwidth, tma_info_system_dram_bw_use, tma_sq_full",
+        "PublicDescription": "This metric estimates fraction of cycles where the core's performance was likely hurt due to approaching bandwidth limits of external memory - DRAM ([SPR-HBM] and/or HBM).  The underlying heuristic assumes that a similar off-core traffic is generated by all IA cores. This metric does not aggregate non-data-read requests by this logical processor; requests from other IA Logical Processors/Physical Cores/sockets; or other non-IA devices like GPU; hence the maximum external memory bandwidth limits may or may not be approached when this metric is flagged (see Uncore counters for that). Related metrics: tma_fb_full, tma_info_bottleneck_cache_memory_bandwidth, tma_info_system_dram_bw_use, tma_sq_full",
         "ScaleUnit": "100%"
     },
     {
-        "BriefDescription": "This metric estimates fraction of cycles where the performance was likely hurt due to latency from external memory (DRAM)",
+        "BriefDescription": "This metric estimates fraction of cycles where the performance was likely hurt due to latency from external memory - DRAM ([SPR-HBM] and/or HBM)",
         "MetricExpr": "min(CPU_CLK_UNHALTED.THREAD, OFFCORE_REQUESTS_OUTSTANDING.CYCLES_WITH_DATA_RD) / tma_info_thread_clks - tma_mem_bandwidth",
         "MetricGroup": "MemoryLat;Offcore;TopdownL4;tma_L4_group;tma_dram_bound_group;tma_issueLat",
         "MetricName": "tma_mem_latency",
         "MetricThreshold": "tma_mem_latency > 0.1 & (tma_dram_bound > 0.1 & (tma_memory_bound > 0.2 & tma_backend_bound > 0.2))",
-        "PublicDescription": "This metric estimates fraction of cycles where the performance was likely hurt due to latency from external memory (DRAM).  This metric does not aggregate requests from other Logical Processors/Physical Cores/sockets (see Uncore counters for that). Related metrics: tma_info_bottleneck_memory_latency, tma_l3_hit_latency",
+        "PublicDescription": "This metric estimates fraction of cycles where the performance was likely hurt due to latency from external memory - DRAM ([SPR-HBM] and/or HBM).  This metric does not aggregate requests from other Logical Processors/Physical Cores/sockets (see Uncore counters for that). Related metrics: tma_info_bottleneck_cache_memory_latency, tma_l3_hit_latency",
         "ScaleUnit": "100%"
     },
     {
         "MetricGroup": "MicroSeq;TopdownL3;tma_L3_group;tma_heavy_operations_group;tma_issueMC;tma_issueMS",
         "MetricName": "tma_microcode_sequencer",
         "MetricThreshold": "tma_microcode_sequencer > 0.05 & tma_heavy_operations > 0.1",
-        "PublicDescription": "This metric represents fraction of slots the CPU was retiring uops fetched by the Microcode Sequencer (MS) unit.  The MS is used for CISC instructions not supported by the default decoders (like repeat move strings; or CPUID); or by microcode assists used to address some operation modes (like in Floating Point assists). These cases can often be avoided. Sample with: IDQ.MS_UOPS. Related metrics: tma_clears_resteers, tma_l1_bound, tma_machine_clears, tma_ms_switches",
+        "PublicDescription": "This metric represents fraction of slots the CPU was retiring uops fetched by the Microcode Sequencer (MS) unit.  The MS is used for CISC instructions not supported by the default decoders (like repeat move strings; or CPUID); or by microcode assists used to address some operation modes (like in Floating Point assists). These cases can often be avoided. Sample with: IDQ.MS_UOPS. Related metrics: tma_clears_resteers, tma_info_bottleneck_irregular_overhead, tma_l1_bound, tma_machine_clears, tma_ms_switches",
         "ScaleUnit": "100%"
     },
     {
         "MetricExpr": "(IDQ.ALL_MITE_CYCLES_ANY_UOPS - IDQ.ALL_MITE_CYCLES_4_UOPS) / tma_info_core_core_clks / 2",
         "MetricGroup": "DSBmiss;FetchBW;TopdownL3;tma_L3_group;tma_fetch_bandwidth_group",
         "MetricName": "tma_mite",
-        "MetricThreshold": "tma_mite > 0.1 & (tma_fetch_bandwidth > 0.1 & tma_frontend_bound > 0.15 & tma_info_thread_ipc / 4 > 0.35)",
+        "MetricThreshold": "tma_mite > 0.1 & tma_fetch_bandwidth > 0.2",
         "PublicDescription": "This metric represents Core fraction of cycles in which CPU was likely limited due to the MITE pipeline (the legacy decode pipeline). This pipeline is used for code that was not pre-cached in the DSB or LSD. For example; inefficiencies due to asymmetric decoders; use of long immediate or LCP can manifest as MITE fetch bandwidth bottleneck. Sample with: FRONTEND_RETIRED.ANY_DSB_MISS",
         "ScaleUnit": "100%"
     },
     {
-        "BriefDescription": "The Mixing_Vectors metric gives the percentage of injected blend uops out of all uops issued",
+        "BriefDescription": "This metric estimates penalty in terms of percentage of([SKL+] injected blend uops out of all Uops Issued -- the Count Domain; [ADL+] cycles)",
         "MetricExpr": "UOPS_ISSUED.VECTOR_WIDTH_MISMATCH / UOPS_ISSUED.ANY",
         "MetricGroup": "TopdownL5;tma_L5_group;tma_issueMV;tma_ports_utilized_0_group",
         "MetricName": "tma_mixing_vectors",
         "MetricThreshold": "tma_mixing_vectors > 0.05",
-        "PublicDescription": "The Mixing_Vectors metric gives the percentage of injected blend uops out of all uops issued. Usually a Mixing_Vectors over 5% is worth investigating. Read more in Appendix B1 of the Optimizations Guide for this topic. Related metrics: tma_ms_switches",
+        "PublicDescription": "This metric estimates penalty in terms of percentage of([SKL+] injected blend uops out of all Uops Issued -- the Count Domain; [ADL+] cycles). Usually a Mixing_Vectors over 5% is worth investigating. Read more in Appendix B1 of the Optimizations Guide for this topic. Related metrics: tma_ms_switches",
         "ScaleUnit": "100%"
     },
     {
         "MetricGroup": "FetchLat;MicroSeq;TopdownL3;tma_L3_group;tma_fetch_latency_group;tma_issueMC;tma_issueMS;tma_issueMV;tma_issueSO",
         "MetricName": "tma_ms_switches",
         "MetricThreshold": "tma_ms_switches > 0.05 & (tma_fetch_latency > 0.1 & tma_frontend_bound > 0.15)",
-        "PublicDescription": "This metric estimates the fraction of cycles when the CPU was stalled due to switches of uop delivery to the Microcode Sequencer (MS). Commonly used instructions are optimized for delivery by the DSB (decoded i-cache) or MITE (legacy instruction decode) pipelines. Certain operations cannot be handled natively by the execution pipeline; and must be performed by microcode (small programs injected into the execution stream). Switching to the MS too often can negatively impact performance. The MS is designated to deliver long uop flows required by CISC instructions like CPUID; or uncommon conditions like Floating Point Assists when dealing with Denormals. Sample with: IDQ.MS_SWITCHES. Related metrics: tma_clears_resteers, tma_l1_bound, tma_machine_clears, tma_microcode_sequencer, tma_mixing_vectors, tma_serializing_operation",
+        "PublicDescription": "This metric estimates the fraction of cycles when the CPU was stalled due to switches of uop delivery to the Microcode Sequencer (MS). Commonly used instructions are optimized for delivery by the DSB (decoded i-cache) or MITE (legacy instruction decode) pipelines. Certain operations cannot be handled natively by the execution pipeline; and must be performed by microcode (small programs injected into the execution stream). Switching to the MS too often can negatively impact performance. The MS is designated to deliver long uop flows required by CISC instructions like CPUID; or uncommon conditions like Floating Point Assists when dealing with Denormals. Sample with: IDQ.MS_SWITCHES. Related metrics: tma_clears_resteers, tma_info_bottleneck_irregular_overhead, tma_l1_bound, tma_machine_clears, tma_microcode_sequencer, tma_mixing_vectors, tma_serializing_operation",
         "ScaleUnit": "100%"
     },
     {
         "BriefDescription": "This metric represents fraction of slots where the CPU was retiring branch instructions that were not fused",
         "MetricExpr": "tma_light_operations * (BR_INST_RETIRED.ALL_BRANCHES - UOPS_RETIRED.MACRO_FUSED) / UOPS_RETIRED.RETIRE_SLOTS",
-        "MetricGroup": "Pipeline;TopdownL3;tma_L3_group;tma_light_operations_group",
+        "MetricGroup": "Branches;Pipeline;TopdownL3;tma_L3_group;tma_light_operations_group",
         "MetricName": "tma_non_fused_branches",
         "MetricThreshold": "tma_non_fused_branches > 0.1 & tma_light_operations > 0.6",
         "PublicDescription": "This metric represents fraction of slots where the CPU was retiring branch instructions that were not fused. Non-conditional branches like direct JMP or CALL would count here. Can be used to examine fusible conditional jumps that were not fused.",
     {
         "BriefDescription": "This metric represents fraction of slots where the CPU was retiring NOP (no op) instructions",
         "MetricExpr": "tma_light_operations * INST_RETIRED.NOP / UOPS_RETIRED.RETIRE_SLOTS",
-        "MetricGroup": "Pipeline;TopdownL3;tma_L3_group;tma_light_operations_group",
+        "MetricGroup": "Pipeline;TopdownL4;tma_L4_group;tma_other_light_ops_group",
         "MetricName": "tma_nop_instructions",
-        "MetricThreshold": "tma_nop_instructions > 0.1 & tma_light_operations > 0.6",
+        "MetricThreshold": "tma_nop_instructions > 0.1 & (tma_other_light_ops > 0.3 & tma_light_operations > 0.6)",
         "PublicDescription": "This metric represents fraction of slots where the CPU was retiring NOP (no op) instructions. Compilers often use NOPs for certain address alignments - e.g. start address of a function or loop body. Sample with: INST_RETIRED.NOP",
         "ScaleUnit": "100%"
     },
     {
         "BriefDescription": "This metric represents the remaining light uops fraction the CPU has executed - remaining means not covered by other sibling nodes",
-        "MetricExpr": "max(0, tma_light_operations - (tma_fp_arith + tma_memory_operations + tma_fused_instructions + tma_non_fused_branches + tma_nop_instructions))",
+        "MetricExpr": "max(0, tma_light_operations - (tma_fp_arith + tma_memory_operations + tma_fused_instructions + tma_non_fused_branches))",
         "MetricGroup": "Pipeline;TopdownL3;tma_L3_group;tma_light_operations_group",
         "MetricName": "tma_other_light_ops",
         "MetricThreshold": "tma_other_light_ops > 0.3 & tma_light_operations > 0.6",
         "PublicDescription": "This metric represents the remaining light uops fraction the CPU has executed - remaining means not covered by other sibling nodes. May undercount due to FMA double counting",
         "ScaleUnit": "100%"
     },
+    {
+        "BriefDescription": "This metric estimates fraction of slots the CPU was stalled due to other cases of misprediction (non-retired x86 branches or other types).",
+        "MetricExpr": "max(tma_branch_mispredicts * (1 - BR_MISP_RETIRED.ALL_BRANCHES / (INT_MISC.CLEARS_COUNT - MACHINE_CLEARS.COUNT)), 0.0001)",
+        "MetricGroup": "BrMispredicts;TopdownL3;tma_L3_group;tma_branch_mispredicts_group",
+        "MetricName": "tma_other_mispredicts",
+        "MetricThreshold": "tma_other_mispredicts > 0.05 & (tma_branch_mispredicts > 0.1 & tma_bad_speculation > 0.15)",
+        "ScaleUnit": "100%"
+    },
+    {
+        "BriefDescription": "This metric represents fraction of slots the CPU has wasted due to Nukes (Machine Clears) not related to memory ordering.",
+        "MetricExpr": "max(tma_machine_clears * (1 - MACHINE_CLEARS.MEMORY_ORDERING / MACHINE_CLEARS.COUNT), 0.0001)",
+        "MetricGroup": "Machine_Clears;TopdownL3;tma_L3_group;tma_machine_clears_group",
+        "MetricName": "tma_other_nukes",
+        "MetricThreshold": "tma_other_nukes > 0.05 & (tma_machine_clears > 0.1 & tma_bad_speculation > 0.15)",
+        "ScaleUnit": "100%"
+    },
     {
         "BriefDescription": "This metric represents Core fraction of cycles CPU dispatched uops on execution port 0 ([SNB+] ALU; [HSW+] ALU and 2nd branch)",
         "MetricExpr": "UOPS_DISPATCHED_PORT.PORT_0 / tma_info_core_core_clks",
         "ScaleUnit": "100%"
     },
     {
-        "BriefDescription": "This metric represents Core fraction of cycles CPU dispatched uops on execution port 6 ([HSW+]Primary Branch and simple ALU)",
+        "BriefDescription": "This metric represents Core fraction of cycles CPU dispatched uops on execution port 6 ([HSW+] Primary Branch and simple ALU)",
         "MetricExpr": "UOPS_DISPATCHED_PORT.PORT_6 / tma_info_core_core_clks",
         "MetricGroup": "TopdownL6;tma_L6_group;tma_alu_op_utilization_group;tma_issue2P",
         "MetricName": "tma_port_6",
         "MetricThreshold": "tma_port_6 > 0.6",
-        "PublicDescription": "This metric represents Core fraction of cycles CPU dispatched uops on execution port 6 ([HSW+]Primary Branch and simple ALU). Sample with: UOPS_DISPATCHED_PORT.PORT_6. Related metrics: tma_fp_scalar, tma_fp_vector, tma_fp_vector_128b, tma_fp_vector_256b, tma_fp_vector_512b, tma_port_0, tma_port_1, tma_port_5, tma_ports_utilized_2",
+        "PublicDescription": "This metric represents Core fraction of cycles CPU dispatched uops on execution port 6 ([HSW+] Primary Branch and simple ALU). Sample with: UOPS_DISPATCHED_PORT.PORT_6. Related metrics: tma_fp_scalar, tma_fp_vector, tma_fp_vector_128b, tma_fp_vector_256b, tma_fp_vector_512b, tma_port_0, tma_port_1, tma_port_5, tma_ports_utilized_2",
         "ScaleUnit": "100%"
     },
     {
     },
     {
         "BriefDescription": "This metric estimates fraction of cycles the CPU performance was potentially limited due to Core computation issues (non divider-related)",
-        "MetricExpr": "((EXE_ACTIVITY.EXE_BOUND_0_PORTS + (EXE_ACTIVITY.1_PORTS_UTIL + tma_retiring * EXE_ACTIVITY.2_PORTS_UTIL)) / tma_info_thread_clks if ARITH.DIVIDER_ACTIVE < CYCLE_ACTIVITY.STALLS_TOTAL - CYCLE_ACTIVITY.STALLS_MEM_ANY else (EXE_ACTIVITY.1_PORTS_UTIL + tma_retiring * EXE_ACTIVITY.2_PORTS_UTIL) / tma_info_thread_clks)",
+        "MetricExpr": "((tma_ports_utilized_0 * tma_info_thread_clks + (EXE_ACTIVITY.1_PORTS_UTIL + tma_retiring * EXE_ACTIVITY.2_PORTS_UTIL)) / tma_info_thread_clks if ARITH.DIVIDER_ACTIVE < CYCLE_ACTIVITY.STALLS_TOTAL - CYCLE_ACTIVITY.STALLS_MEM_ANY else (EXE_ACTIVITY.1_PORTS_UTIL + tma_retiring * EXE_ACTIVITY.2_PORTS_UTIL) / tma_info_thread_clks)",
         "MetricGroup": "PortsUtil;TopdownL3;tma_L3_group;tma_core_bound_group",
         "MetricName": "tma_ports_utilization",
         "MetricThreshold": "tma_ports_utilization > 0.15 & (tma_core_bound > 0.1 & tma_backend_bound > 0.2)",
     },
     {
         "BriefDescription": "This metric represents fraction of cycles CPU executed no uops on any execution port (Logical Processor cycles since ICL, Physical Core cycles otherwise)",
-        "MetricExpr": "(UOPS_EXECUTED.CORE_CYCLES_NONE / 2 if #SMT_on else CYCLE_ACTIVITY.STALLS_TOTAL - CYCLE_ACTIVITY.STALLS_MEM_ANY) / tma_info_core_core_clks",
+        "MetricExpr": "(EXE_ACTIVITY.EXE_BOUND_0_PORTS + tma_core_bound * RS_EVENTS.EMPTY_CYCLES) / tma_info_thread_clks * (CYCLE_ACTIVITY.STALLS_TOTAL - CYCLE_ACTIVITY.STALLS_MEM_ANY) / tma_info_thread_clks",
         "MetricGroup": "PortsUtil;TopdownL4;tma_L4_group;tma_ports_utilization_group",
         "MetricName": "tma_ports_utilized_0",
         "MetricThreshold": "tma_ports_utilized_0 > 0.2 & (tma_ports_utilization > 0.15 & (tma_core_bound > 0.1 & tma_backend_bound > 0.2))",
         "MetricExpr": "(UOPS_EXECUTED.CORE_CYCLES_GE_3 / 2 if #SMT_on else UOPS_EXECUTED.CORE_CYCLES_GE_3) / tma_info_core_core_clks",
         "MetricGroup": "PortsUtil;TopdownL4;tma_L4_group;tma_ports_utilization_group",
         "MetricName": "tma_ports_utilized_3m",
-        "MetricThreshold": "tma_ports_utilized_3m > 0.7 & (tma_ports_utilization > 0.15 & (tma_core_bound > 0.1 & tma_backend_bound > 0.2))",
+        "MetricThreshold": "tma_ports_utilized_3m > 0.4 & (tma_ports_utilization > 0.15 & (tma_core_bound > 0.1 & tma_backend_bound > 0.2))",
         "ScaleUnit": "100%"
     },
     {
         "BriefDescription": "This metric estimates fraction of cycles while the memory subsystem was handling loads from remote cache in other sockets including synchronizations issues",
         "MetricConstraint": "NO_GROUP_EVENTS_NMI",
-        "MetricExpr": "(89.5 * tma_info_system_average_frequency * MEM_LOAD_L3_MISS_RETIRED.REMOTE_HITM + 89.5 * tma_info_system_average_frequency * MEM_LOAD_L3_MISS_RETIRED.REMOTE_FWD) * (1 + MEM_LOAD_RETIRED.FB_HIT / MEM_LOAD_RETIRED.L1_MISS / 2) / tma_info_thread_clks",
+        "MetricExpr": "(89.5 * tma_info_system_core_frequency * MEM_LOAD_L3_MISS_RETIRED.REMOTE_HITM + 89.5 * tma_info_system_core_frequency * MEM_LOAD_L3_MISS_RETIRED.REMOTE_FWD) * (1 + MEM_LOAD_RETIRED.FB_HIT / MEM_LOAD_RETIRED.L1_MISS / 2) / tma_info_thread_clks",
         "MetricGroup": "Offcore;Server;Snoop;TopdownL5;tma_L5_group;tma_issueSyncxn;tma_mem_latency_group",
         "MetricName": "tma_remote_cache",
         "MetricThreshold": "tma_remote_cache > 0.05 & (tma_mem_latency > 0.1 & (tma_dram_bound > 0.1 & (tma_memory_bound > 0.2 & tma_backend_bound > 0.2)))",
     },
     {
         "BriefDescription": "This metric estimates fraction of cycles while the memory subsystem was handling loads from remote memory",
-        "MetricExpr": "127 * tma_info_system_average_frequency * MEM_LOAD_L3_MISS_RETIRED.REMOTE_DRAM * (1 + MEM_LOAD_RETIRED.FB_HIT / MEM_LOAD_RETIRED.L1_MISS / 2) / tma_info_thread_clks",
+        "MetricExpr": "127 * tma_info_system_core_frequency * MEM_LOAD_L3_MISS_RETIRED.REMOTE_DRAM * (1 + MEM_LOAD_RETIRED.FB_HIT / MEM_LOAD_RETIRED.L1_MISS / 2) / tma_info_thread_clks",
         "MetricGroup": "Server;Snoop;TopdownL5;tma_L5_group;tma_mem_latency_group",
-        "MetricName": "tma_remote_dram",
-        "MetricThreshold": "tma_remote_dram > 0.1 & (tma_mem_latency > 0.1 & (tma_dram_bound > 0.1 & (tma_memory_bound > 0.2 & tma_backend_bound > 0.2)))",
+        "MetricName": "tma_remote_mem",
+        "MetricThreshold": "tma_remote_mem > 0.1 & (tma_mem_latency > 0.1 & (tma_dram_bound > 0.1 & (tma_memory_bound > 0.2 & tma_backend_bound > 0.2)))",
         "PublicDescription": "This metric estimates fraction of cycles while the memory subsystem was handling loads from remote memory. This is caused often due to non-optimal NUMA allocations. #link to NUMA article. Sample with: MEM_LOAD_L3_MISS_RETIRED.REMOTE_DRAM_PS",
         "ScaleUnit": "100%"
     },
     {
         "BriefDescription": "This metric represents fraction of cycles the CPU issue-pipeline was stalled due to serializing operations",
         "MetricExpr": "PARTIAL_RAT_STALLS.SCOREBOARD / tma_info_thread_clks",
-        "MetricGroup": "PortsUtil;TopdownL5;tma_L5_group;tma_issueSO;tma_ports_utilized_0_group",
+        "MetricGroup": "PortsUtil;TopdownL3;tma_L3_group;tma_core_bound_group;tma_issueSO",
         "MetricName": "tma_serializing_operation",
-        "MetricThreshold": "tma_serializing_operation > 0.1 & (tma_ports_utilized_0 > 0.2 & (tma_ports_utilization > 0.15 & (tma_core_bound > 0.1 & tma_backend_bound > 0.2)))",
+        "MetricThreshold": "tma_serializing_operation > 0.1 & (tma_core_bound > 0.1 & tma_backend_bound > 0.2)",
         "PublicDescription": "This metric represents fraction of cycles the CPU issue-pipeline was stalled due to serializing operations. Instructions like CPUID; WRMSR or LFENCE serialize the out-of-order execution which may limit performance. Sample with: PARTIAL_RAT_STALLS.SCOREBOARD. Related metrics: tma_ms_switches",
         "ScaleUnit": "100%"
     },
         "MetricGroup": "MemoryBW;Offcore;TopdownL4;tma_L4_group;tma_issueBW;tma_l3_bound_group",
         "MetricName": "tma_sq_full",
         "MetricThreshold": "tma_sq_full > 0.3 & (tma_l3_bound > 0.05 & (tma_memory_bound > 0.2 & tma_backend_bound > 0.2))",
-        "PublicDescription": "This metric measures fraction of cycles where the Super Queue (SQ) was full taking into account all request-types and both hardware SMT threads (Logical Processors). Related metrics: tma_fb_full, tma_info_bottleneck_memory_bandwidth, tma_info_system_dram_bw_use, tma_mem_bandwidth",
+        "PublicDescription": "This metric measures fraction of cycles where the Super Queue (SQ) was full taking into account all request-types and both hardware SMT threads (Logical Processors). Related metrics: tma_fb_full, tma_info_bottleneck_cache_memory_bandwidth, tma_info_system_dram_bw_use, tma_mem_bandwidth",
         "ScaleUnit": "100%"
     },
     {
     {
         "BriefDescription": "This metric represents fraction of cycles the CPU was stalled due to new branch address clears",
         "MetricExpr": "9 * BACLEARS.ANY / tma_info_thread_clks",
-        "MetricGroup": "BigFoot;FetchLat;TopdownL4;tma_L4_group;tma_branch_resteers_group",
+        "MetricGroup": "BigFootprint;FetchLat;TopdownL4;tma_L4_group;tma_branch_resteers_group",
         "MetricName": "tma_unknown_branches",
         "MetricThreshold": "tma_unknown_branches > 0.05 & (tma_branch_resteers > 0.05 & (tma_fetch_latency > 0.1 & tma_frontend_bound > 0.15))",
-        "PublicDescription": "This metric represents fraction of cycles the CPU was stalled due to new branch address clears. These are fetched branches the Branch Prediction Unit was unable to recognize (e.g. first time the branch is fetched or hitting BTB capacity limit). Sample with: BACLEARS.ANY",
+        "PublicDescription": "This metric represents fraction of cycles the CPU was stalled due to new branch address clears. These are fetched branches the Branch Prediction Unit was unable to recognize (e.g. first time the branch is fetched or hitting BTB capacity limit) hence called Unknown Branches. Sample with: BACLEARS.ANY",
         "ScaleUnit": "100%"
     },
     {
index c6254af7a468e03d704c122105d92dbe39582fb8..ceef46046488637a58de3d4a8dc758307929e147 100644 (file)
         "EventName": "UNC_P_POWER_STATE_OCCUPANCY.CORES_C0",
         "PerPkg": "1",
         "PublicDescription": "This is an occupancy event that tracks the number of cores that are in the chosen C-State.  It can be used by itself to get the average number of cores in that C-state with thresholding to generate histograms, or with other PCU events and occupancy triggering to capture other details.",
+        "UMask": "0x40",
         "Unit": "PCU"
     },
     {
         "EventName": "UNC_P_POWER_STATE_OCCUPANCY.CORES_C3",
         "PerPkg": "1",
         "PublicDescription": "This is an occupancy event that tracks the number of cores that are in the chosen C-State.  It can be used by itself to get the average number of cores in that C-state with thresholding to generate histograms, or with other PCU events and occupancy triggering to capture other details.",
+        "UMask": "0x80",
         "Unit": "PCU"
     },
     {
         "EventName": "UNC_P_POWER_STATE_OCCUPANCY.CORES_C6",
         "PerPkg": "1",
         "PublicDescription": "This is an occupancy event that tracks the number of cores that are in the chosen C-State.  It can be used by itself to get the average number of cores in that C-state with thresholding to generate histograms, or with other PCU events and occupancy triggering to capture other details.",
+        "UMask": "0xc0",
         "Unit": "PCU"
     },
     {
index a61ffca2dfea21444345f66a27b8ebb1fa17c866..dcf268467db961f608d241c379a5368b75658b60 100644 (file)
         "EventName": "UNC_P_POWER_STATE_OCCUPANCY.CORES_C0",
         "PerPkg": "1",
         "PublicDescription": "Number of cores in C-State : C0 and C1 : This is an occupancy event that tracks the number of cores that are in the chosen C-State.  It can be used by itself to get the average number of cores in that C-state with thresholding to generate histograms, or with other PCU events and occupancy triggering to capture other details.",
+        "UMask": "0x40",
         "Unit": "PCU"
     },
     {
         "EventName": "UNC_P_POWER_STATE_OCCUPANCY.CORES_C3",
         "PerPkg": "1",
         "PublicDescription": "Number of cores in C-State : C3 : This is an occupancy event that tracks the number of cores that are in the chosen C-State.  It can be used by itself to get the average number of cores in that C-state with thresholding to generate histograms, or with other PCU events and occupancy triggering to capture other details.",
+        "UMask": "0x80",
         "Unit": "PCU"
     },
     {
         "EventName": "UNC_P_POWER_STATE_OCCUPANCY.CORES_C6",
         "PerPkg": "1",
         "PublicDescription": "Number of cores in C-State : C6 and C7 : This is an occupancy event that tracks the number of cores that are in the chosen C-State.  It can be used by itself to get the average number of cores in that C-state with thresholding to generate histograms, or with other PCU events and occupancy triggering to capture other details.",
+        "UMask": "0xc0",
         "Unit": "PCU"
     },
     {
index a151ba9cccb07c557d450f97b46f47bf62503416..5452a1448ded3bc350ed1a265cb100924cd00d14 100644 (file)
@@ -2,10 +2,10 @@
     "Backend": "Grouping from Top-down Microarchitecture Analysis Metrics spreadsheet",
     "Bad": "Grouping from Top-down Microarchitecture Analysis Metrics spreadsheet",
     "BadSpec": "Grouping from Top-down Microarchitecture Analysis Metrics spreadsheet",
-    "BigFoot": "Grouping from Top-down Microarchitecture Analysis Metrics spreadsheet",
+    "BigFootprint": "Grouping from Top-down Microarchitecture Analysis Metrics spreadsheet",
     "BrMispredicts": "Grouping from Top-down Microarchitecture Analysis Metrics spreadsheet",
     "Branches": "Grouping from Top-down Microarchitecture Analysis Metrics spreadsheet",
-    "CacheMisses": "Grouping from Top-down Microarchitecture Analysis Metrics spreadsheet",
+    "CacheHits": "Grouping from Top-down Microarchitecture Analysis Metrics spreadsheet",
     "CodeGen": "Grouping from Top-down Microarchitecture Analysis Metrics spreadsheet",
     "Compute": "Grouping from Top-down Microarchitecture Analysis Metrics spreadsheet",
     "Cor": "Grouping from Top-down Microarchitecture Analysis Metrics spreadsheet",
@@ -25,7 +25,9 @@
     "L2Evicts": "Grouping from Top-down Microarchitecture Analysis Metrics spreadsheet",
     "LSD": "Grouping from Top-down Microarchitecture Analysis Metrics spreadsheet",
     "MachineClears": "Grouping from Top-down Microarchitecture Analysis Metrics spreadsheet",
+    "Machine_Clears": "Grouping from Top-down Microarchitecture Analysis Metrics spreadsheet",
     "Mem": "Grouping from Top-down Microarchitecture Analysis Metrics spreadsheet",
+    "MemOffcore": "Grouping from Top-down Microarchitecture Analysis Metrics spreadsheet",
     "MemoryBW": "Grouping from Top-down Microarchitecture Analysis Metrics spreadsheet",
     "MemoryBound": "Grouping from Top-down Microarchitecture Analysis Metrics spreadsheet",
     "MemoryLat": "Grouping from Top-down Microarchitecture Analysis Metrics spreadsheet",
     "tma_L5_group": "Metrics for top-down breakdown at level 5",
     "tma_L6_group": "Metrics for top-down breakdown at level 6",
     "tma_alu_op_utilization_group": "Metrics contributing to tma_alu_op_utilization category",
+    "tma_assists_group": "Metrics contributing to tma_assists category",
     "tma_backend_bound_group": "Metrics contributing to tma_backend_bound category",
     "tma_bad_speculation_group": "Metrics contributing to tma_bad_speculation category",
+    "tma_branch_mispredicts_group": "Metrics contributing to tma_branch_mispredicts category",
     "tma_branch_resteers_group": "Metrics contributing to tma_branch_resteers category",
     "tma_core_bound_group": "Metrics contributing to tma_core_bound category",
     "tma_dram_bound_group": "Metrics contributing to tma_dram_bound category",
@@ -77,9 +81,9 @@
     "tma_frontend_bound_group": "Metrics contributing to tma_frontend_bound category",
     "tma_heavy_operations_group": "Metrics contributing to tma_heavy_operations category",
     "tma_issue2P": "Metrics related by the issue $issue2P",
-    "tma_issueBC": "Metrics related by the issue $issueBC",
     "tma_issueBM": "Metrics related by the issue $issueBM",
     "tma_issueBW": "Metrics related by the issue $issueBW",
+    "tma_issueComp": "Metrics related by the issue $issueComp",
     "tma_issueD0": "Metrics related by the issue $issueD0",
     "tma_issueFB": "Metrics related by the issue $issueFB",
     "tma_issueFL": "Metrics related by the issue $issueFL",
     "tma_l3_bound_group": "Metrics contributing to tma_l3_bound category",
     "tma_light_operations_group": "Metrics contributing to tma_light_operations category",
     "tma_load_op_utilization_group": "Metrics contributing to tma_load_op_utilization category",
+    "tma_machine_clears_group": "Metrics contributing to tma_machine_clears category",
     "tma_mem_latency_group": "Metrics contributing to tma_mem_latency category",
     "tma_memory_bound_group": "Metrics contributing to tma_memory_bound category",
     "tma_microcode_sequencer_group": "Metrics contributing to tma_microcode_sequencer category",
     "tma_mite_group": "Metrics contributing to tma_mite category",
+    "tma_other_light_ops_group": "Metrics contributing to tma_other_light_ops category",
     "tma_ports_utilization_group": "Metrics contributing to tma_ports_utilization category",
     "tma_ports_utilized_0_group": "Metrics contributing to tma_ports_utilized_0 category",
     "tma_ports_utilized_3m_group": "Metrics contributing to tma_ports_utilized_3m category",
index 55f3048bcfa6df5e2cb355caacad27d81bcfc888..117b18abcaafa08d8f5b9c7b652c6b2b138ffef5 100644 (file)
@@ -19,7 +19,7 @@
         "BriefDescription": "Core cycles where the core was running in a manner where Turbo may be clipped to the AVX512 turbo schedule.",
         "EventCode": "0x28",
         "EventName": "CORE_POWER.LVL2_TURBO_LICENSE",
-        "PublicDescription": "Core cycles where the core was running with power-delivery for license level 2 (introduced in Skylake Server microarchtecture).  This includes high current AVX 512-bit instructions.",
+        "PublicDescription": "Core cycles where the core was running with power-delivery for license level 2 (introduced in Skylake Server microarchitecture).  This includes high current AVX 512-bit instructions.",
         "SampleAfterValue": "200003",
         "UMask": "0x20"
     },
index 541bf1dd167967cdd38c1c19f39438265c4d6aad..4f85d53edec23964856b6f6661acde15a0ecc70e 100644 (file)
         "BriefDescription": "Cycles when Reservation Station (RS) is empty for the thread",
         "EventCode": "0x5e",
         "EventName": "RS_EVENTS.EMPTY_CYCLES",
-        "PublicDescription": "Counts cycles during which the reservation station (RS) is empty for this logical processor. This is usually caused when the front-end pipeline runs into stravation periods (e.g. branch mispredictions or i-cache misses)",
+        "PublicDescription": "Counts cycles during which the reservation station (RS) is empty for this logical processor. This is usually caused when the front-end pipeline runs into starvation periods (e.g. branch mispredictions or i-cache misses)",
         "SampleAfterValue": "1000003",
         "UMask": "0x1"
     },
         "SampleAfterValue": "10000003",
         "UMask": "0x2"
     },
-    {
-        "BriefDescription": "TMA slots wasted due to incorrect speculation by branch mispredictions",
-        "EventCode": "0xa4",
-        "EventName": "TOPDOWN.BR_MISPREDICT_SLOTS",
-        "PublicDescription": "Number of TMA slots that were wasted due to incorrect speculation by branch mispredictions. This event estimates number of operations that were issued but not retired from the speculative path as well as the out-of-order engine recovery past a branch misprediction.",
-        "SampleAfterValue": "10000003",
-        "UMask": "0x8"
-    },
     {
         "BriefDescription": "TMA slots available for an unhalted logical processor. Fixed counter - architectural event",
         "EventName": "TOPDOWN.SLOTS",
index f11860f39c18fe017e02d10d443eb63c18e4d01f..8ae4f2474b25aa8cb21f7653a502cc0a7bc7c813 100644 (file)
         "MetricExpr": "(UOPS_DISPATCHED.PORT_0 + UOPS_DISPATCHED.PORT_1 + UOPS_DISPATCHED.PORT_5 + UOPS_DISPATCHED.PORT_6) / (4 * tma_info_core_core_clks)",
         "MetricGroup": "TopdownL5;tma_L5_group;tma_ports_utilized_3m_group",
         "MetricName": "tma_alu_op_utilization",
-        "MetricThreshold": "tma_alu_op_utilization > 0.6",
+        "MetricThreshold": "tma_alu_op_utilization > 0.4",
         "ScaleUnit": "100%"
     },
     {
         "BriefDescription": "This metric estimates fraction of slots the CPU retired uops delivered by the Microcode_Sequencer as a result of Assists",
-        "MetricExpr": "100 * ASSISTS.ANY / tma_info_thread_slots",
+        "MetricExpr": "34 * ASSISTS.ANY / tma_info_thread_slots",
         "MetricGroup": "TopdownL4;tma_L4_group;tma_microcode_sequencer_group",
         "MetricName": "tma_assists",
         "MetricThreshold": "tma_assists > 0.1 & (tma_microcode_sequencer > 0.05 & tma_heavy_operations > 0.1)",
     {
         "BriefDescription": "This category represents fraction of slots where no uops are being delivered due to a lack of required resources for accepting new uops in the Backend",
         "DefaultMetricgroupName": "TopdownL1",
-        "MetricExpr": "topdown\\-be\\-bound / (topdown\\-fe\\-bound + topdown\\-bad\\-spec + topdown\\-retiring + topdown\\-be\\-bound) + 5 * cpu@INT_MISC.RECOVERY_CYCLES\\,cmask\\=1\\,edge@ / tma_info_thread_slots",
+        "MetricExpr": "topdown\\-be\\-bound / (topdown\\-fe\\-bound + topdown\\-bad\\-spec + topdown\\-retiring + topdown\\-be\\-bound) + 5 * INT_MISC.CLEARS_COUNT / tma_info_thread_slots",
         "MetricGroup": "Default;TmaL1;TopdownL1;tma_L1_group",
         "MetricName": "tma_backend_bound",
         "MetricThreshold": "tma_backend_bound > 0.2",
     {
         "BriefDescription": "This metric represents fraction of slots where the CPU was retiring branch instructions.",
         "MetricExpr": "tma_light_operations * BR_INST_RETIRED.ALL_BRANCHES / (tma_retiring * tma_info_thread_slots)",
-        "MetricGroup": "Pipeline;TopdownL3;tma_L3_group;tma_light_operations_group",
+        "MetricGroup": "Branches;Pipeline;TopdownL3;tma_L3_group;tma_light_operations_group",
         "MetricName": "tma_branch_instructions",
         "MetricThreshold": "tma_branch_instructions > 0.1 & tma_light_operations > 0.6",
         "ScaleUnit": "100%"
     {
         "BriefDescription": "This metric estimates fraction of cycles while the memory subsystem was handling synchronizations due to contested accesses",
         "MetricConstraint": "NO_GROUP_EVENTS",
-        "MetricExpr": "(49 * tma_info_system_average_frequency * (MEM_LOAD_L3_HIT_RETIRED.XSNP_FWD * (OCR.DEMAND_DATA_RD.L3_HIT.SNOOP_HITM / (OCR.DEMAND_DATA_RD.L3_HIT.SNOOP_HITM + OCR.DEMAND_DATA_RD.L3_HIT.SNOOP_HIT_WITH_FWD))) + 48 * tma_info_system_average_frequency * MEM_LOAD_L3_HIT_RETIRED.XSNP_MISS) * (1 + MEM_LOAD_RETIRED.FB_HIT / MEM_LOAD_RETIRED.L1_MISS / 2) / tma_info_thread_clks",
+        "MetricExpr": "(49 * tma_info_system_core_frequency * (MEM_LOAD_L3_HIT_RETIRED.XSNP_FWD * (OCR.DEMAND_DATA_RD.L3_HIT.SNOOP_HITM / (OCR.DEMAND_DATA_RD.L3_HIT.SNOOP_HITM + OCR.DEMAND_DATA_RD.L3_HIT.SNOOP_HIT_WITH_FWD))) + 48 * tma_info_system_core_frequency * MEM_LOAD_L3_HIT_RETIRED.XSNP_MISS) * (1 + MEM_LOAD_RETIRED.FB_HIT / MEM_LOAD_RETIRED.L1_MISS / 2) / tma_info_thread_clks",
         "MetricGroup": "DataSharing;Offcore;Snoop;TopdownL4;tma_L4_group;tma_issueSyncxn;tma_l3_bound_group",
         "MetricName": "tma_contested_accesses",
         "MetricThreshold": "tma_contested_accesses > 0.05 & (tma_l3_bound > 0.05 & (tma_memory_bound > 0.2 & tma_backend_bound > 0.2))",
     {
         "BriefDescription": "This metric estimates fraction of cycles while the memory subsystem was handling synchronizations due to data-sharing accesses",
         "MetricConstraint": "NO_GROUP_EVENTS",
-        "MetricExpr": "48 * tma_info_system_average_frequency * (MEM_LOAD_L3_HIT_RETIRED.XSNP_NO_FWD + MEM_LOAD_L3_HIT_RETIRED.XSNP_FWD * (1 - OCR.DEMAND_DATA_RD.L3_HIT.SNOOP_HITM / (OCR.DEMAND_DATA_RD.L3_HIT.SNOOP_HITM + OCR.DEMAND_DATA_RD.L3_HIT.SNOOP_HIT_WITH_FWD))) * (1 + MEM_LOAD_RETIRED.FB_HIT / MEM_LOAD_RETIRED.L1_MISS / 2) / tma_info_thread_clks",
+        "MetricExpr": "48 * tma_info_system_core_frequency * (MEM_LOAD_L3_HIT_RETIRED.XSNP_NO_FWD + MEM_LOAD_L3_HIT_RETIRED.XSNP_FWD * (1 - OCR.DEMAND_DATA_RD.L3_HIT.SNOOP_HITM / (OCR.DEMAND_DATA_RD.L3_HIT.SNOOP_HITM + OCR.DEMAND_DATA_RD.L3_HIT.SNOOP_HIT_WITH_FWD))) * (1 + MEM_LOAD_RETIRED.FB_HIT / MEM_LOAD_RETIRED.L1_MISS / 2) / tma_info_thread_clks",
         "MetricGroup": "Offcore;Snoop;TopdownL4;tma_L4_group;tma_issueSyncxn;tma_l3_bound_group",
         "MetricName": "tma_data_sharing",
         "MetricThreshold": "tma_data_sharing > 0.05 & (tma_l3_bound > 0.05 & (tma_memory_bound > 0.2 & tma_backend_bound > 0.2))",
         "MetricExpr": "(cpu@INST_DECODED.DECODERS\\,cmask\\=1@ - cpu@INST_DECODED.DECODERS\\,cmask\\=2@) / tma_info_core_core_clks / 2",
         "MetricGroup": "DSBmiss;FetchBW;TopdownL4;tma_L4_group;tma_issueD0;tma_mite_group",
         "MetricName": "tma_decoder0_alone",
-        "MetricThreshold": "tma_decoder0_alone > 0.1 & (tma_mite > 0.1 & (tma_fetch_bandwidth > 0.1 & tma_frontend_bound > 0.15 & tma_info_thread_ipc / 5 > 0.35))",
+        "MetricThreshold": "tma_decoder0_alone > 0.1 & (tma_mite > 0.1 & tma_fetch_bandwidth > 0.2)",
         "PublicDescription": "This metric represents fraction of cycles where decoder-0 was the only active decoder. Related metrics: tma_few_uops_instructions",
         "ScaleUnit": "100%"
     },
         "MetricExpr": "(IDQ.DSB_CYCLES_ANY - IDQ.DSB_CYCLES_OK) / tma_info_core_core_clks / 2",
         "MetricGroup": "DSB;FetchBW;TopdownL3;tma_L3_group;tma_fetch_bandwidth_group",
         "MetricName": "tma_dsb",
-        "MetricThreshold": "tma_dsb > 0.15 & (tma_fetch_bandwidth > 0.1 & tma_frontend_bound > 0.15 & tma_info_thread_ipc / 5 > 0.35)",
+        "MetricThreshold": "tma_dsb > 0.15 & tma_fetch_bandwidth > 0.2",
         "PublicDescription": "This metric represents Core fraction of cycles in which CPU was likely limited due to DSB (decoded uop cache) fetch pipeline.  For example; inefficient utilization of the DSB cache structure or bank conflict when reading from it; are categorized here.",
         "ScaleUnit": "100%"
     },
         "MetricGroup": "MemoryTLB;TopdownL4;tma_L4_group;tma_issueTLB;tma_l1_bound_group",
         "MetricName": "tma_dtlb_load",
         "MetricThreshold": "tma_dtlb_load > 0.1 & (tma_l1_bound > 0.1 & (tma_memory_bound > 0.2 & tma_backend_bound > 0.2))",
-        "PublicDescription": "This metric roughly estimates the fraction of cycles where the Data TLB (DTLB) was missed by load accesses. TLBs (Translation Look-aside Buffers) are processor caches for recently used entries out of the Page Tables that are used to map virtual- to physical-addresses by the operating system. This metric approximates the potential delay of demand loads missing the first-level data TLB (assuming worst case scenario with back to back misses to different pages). This includes hitting in the second-level TLB (STLB) as well as performing a hardware page walk on an STLB miss. Sample with: MEM_INST_RETIRED.STLB_MISS_LOADS_PS. Related metrics: tma_dtlb_store, tma_info_bottleneck_memory_data_tlbs",
+        "PublicDescription": "This metric roughly estimates the fraction of cycles where the Data TLB (DTLB) was missed by load accesses. TLBs (Translation Look-aside Buffers) are processor caches for recently used entries out of the Page Tables that are used to map virtual- to physical-addresses by the operating system. This metric approximates the potential delay of demand loads missing the first-level data TLB (assuming worst case scenario with back to back misses to different pages). This includes hitting in the second-level TLB (STLB) as well as performing a hardware page walk on an STLB miss. Sample with: MEM_INST_RETIRED.STLB_MISS_LOADS_PS. Related metrics: tma_dtlb_store, tma_info_bottleneck_memory_data_tlbs, tma_info_bottleneck_memory_synchronization",
         "ScaleUnit": "100%"
     },
     {
         "MetricGroup": "MemoryTLB;TopdownL4;tma_L4_group;tma_issueTLB;tma_store_bound_group",
         "MetricName": "tma_dtlb_store",
         "MetricThreshold": "tma_dtlb_store > 0.05 & (tma_store_bound > 0.2 & (tma_memory_bound > 0.2 & tma_backend_bound > 0.2))",
-        "PublicDescription": "This metric roughly estimates the fraction of cycles spent handling first-level data TLB store misses.  As with ordinary data caching; focus on improving data locality and reducing working-set size to reduce DTLB overhead.  Additionally; consider using profile-guided optimization (PGO) to collocate frequently-used data on the same page.  Try using larger page sizes for large amounts of frequently-used data. Sample with: MEM_INST_RETIRED.STLB_MISS_STORES_PS. Related metrics: tma_dtlb_load, tma_info_bottleneck_memory_data_tlbs",
+        "PublicDescription": "This metric roughly estimates the fraction of cycles spent handling first-level data TLB store misses.  As with ordinary data caching; focus on improving data locality and reducing working-set size to reduce DTLB overhead.  Additionally; consider using profile-guided optimization (PGO) to collocate frequently-used data on the same page.  Try using larger page sizes for large amounts of frequently-used data. Sample with: MEM_INST_RETIRED.STLB_MISS_STORES_PS. Related metrics: tma_dtlb_load, tma_info_bottleneck_memory_data_tlbs, tma_info_bottleneck_memory_synchronization",
         "ScaleUnit": "100%"
     },
     {
         "BriefDescription": "This metric roughly estimates how often CPU was handling synchronizations due to False Sharing",
-        "MetricExpr": "54 * tma_info_system_average_frequency * OCR.DEMAND_RFO.L3_HIT.SNOOP_HITM / tma_info_thread_clks",
+        "MetricExpr": "54 * tma_info_system_core_frequency * OCR.DEMAND_RFO.L3_HIT.SNOOP_HITM / tma_info_thread_clks",
         "MetricGroup": "DataSharing;Offcore;Snoop;TopdownL4;tma_L4_group;tma_issueSyncxn;tma_store_bound_group",
         "MetricName": "tma_false_sharing",
         "MetricThreshold": "tma_false_sharing > 0.05 & (tma_store_bound > 0.2 & (tma_memory_bound > 0.2 & tma_backend_bound > 0.2))",
         "MetricGroup": "MemoryBW;TopdownL4;tma_L4_group;tma_issueBW;tma_issueSL;tma_issueSmSt;tma_l1_bound_group",
         "MetricName": "tma_fb_full",
         "MetricThreshold": "tma_fb_full > 0.3",
-        "PublicDescription": "This metric does a *rough estimation* of how often L1D Fill Buffer unavailability limited additional L1D miss memory access requests to proceed. The higher the metric value; the deeper the memory hierarchy level the misses are satisfied from (metric values >1 are valid). Often it hints on approaching bandwidth limits (to L2 cache; L3 cache or external memory). Related metrics: tma_info_bottleneck_memory_bandwidth, tma_info_system_dram_bw_use, tma_mem_bandwidth, tma_sq_full, tma_store_latency, tma_streaming_stores",
+        "PublicDescription": "This metric does a *rough estimation* of how often L1D Fill Buffer unavailability limited additional L1D miss memory access requests to proceed. The higher the metric value; the deeper the memory hierarchy level the misses are satisfied from (metric values >1 are valid). Often it hints on approaching bandwidth limits (to L2 cache; L3 cache or external memory). Related metrics: tma_info_bottleneck_cache_memory_bandwidth, tma_info_system_dram_bw_use, tma_mem_bandwidth, tma_sq_full, tma_store_latency, tma_streaming_stores",
         "ScaleUnit": "100%"
     },
     {
         "MetricExpr": "max(0, tma_frontend_bound - tma_fetch_latency)",
         "MetricGroup": "FetchBW;Frontend;TmaL2;TopdownL2;tma_L2_group;tma_frontend_bound_group;tma_issueFB",
         "MetricName": "tma_fetch_bandwidth",
-        "MetricThreshold": "tma_fetch_bandwidth > 0.1 & tma_frontend_bound > 0.15 & tma_info_thread_ipc / 5 > 0.35",
+        "MetricThreshold": "tma_fetch_bandwidth > 0.2",
         "MetricgroupNoGroup": "TopdownL2",
         "PublicDescription": "This metric represents fraction of slots the CPU was stalled due to Frontend bandwidth issues.  For example; inefficiencies at the instruction decoders; or restrictions for caching in the DSB (decoded uops cache) are categorized under Fetch Bandwidth. In such cases; the Frontend typically delivers suboptimal amount of uops to the Backend. Sample with: FRONTEND_RETIRED.LATENCY_GE_2_BUBBLES_GE_1_PS;FRONTEND_RETIRED.LATENCY_GE_1_PS;FRONTEND_RETIRED.LATENCY_GE_2_PS. Related metrics: tma_dsb_switches, tma_info_botlnk_l2_dsb_misses, tma_info_frontend_dsb_coverage, tma_info_inst_mix_iptb, tma_lcp",
         "ScaleUnit": "100%"
         "PublicDescription": "This metric represents overall arithmetic floating-point (FP) operations fraction the CPU has executed (retired). Note this metric's value may exceed its parent due to use of \"Uops\" CountDomain and FMA double-counting.",
         "ScaleUnit": "100%"
     },
+    {
+        "BriefDescription": "This metric roughly estimates fraction of slots the CPU retired uops as a result of handing Floating Point (FP) Assists",
+        "MetricExpr": "34 * ASSISTS.FP / tma_info_thread_slots",
+        "MetricGroup": "HPC;TopdownL5;tma_L5_group;tma_assists_group",
+        "MetricName": "tma_fp_assists",
+        "MetricThreshold": "tma_fp_assists > 0.1",
+        "PublicDescription": "This metric roughly estimates fraction of slots the CPU retired uops as a result of handing Floating Point (FP) Assists. FP Assist may apply when working with very small floating point values (so-called Denormals).",
+        "ScaleUnit": "100%"
+    },
     {
         "BriefDescription": "This metric approximates arithmetic floating-point (FP) scalar uops fraction the CPU has retired",
         "MetricExpr": "cpu@FP_ARITH_INST_RETIRED.SCALAR_SINGLE\\,umask\\=0x03@ / (tma_retiring * tma_info_thread_slots)",
         "MetricName": "tma_heavy_operations",
         "MetricThreshold": "tma_heavy_operations > 0.1",
         "MetricgroupNoGroup": "TopdownL2",
-        "PublicDescription": "This metric represents fraction of slots where the CPU was retiring heavy-weight operations -- instructions that require two or more uops or micro-coded sequences. This highly-correlates with the uop length of these instructions/sequences.",
+        "PublicDescription": "This metric represents fraction of slots where the CPU was retiring heavy-weight operations -- instructions that require two or more uops or micro-coded sequences. This highly-correlates with the uop length of these instructions/sequences. ([ICL+] Note this may overcount due to approximation using indirect events; [ADL+] .)",
         "ScaleUnit": "100%"
     },
     {
         "BriefDescription": "This metric represents fraction of cycles the CPU was stalled due to instruction cache misses",
-        "MetricExpr": "ICACHE_16B.IFDATA_STALL / tma_info_thread_clks",
-        "MetricGroup": "BigFoot;FetchLat;IcMiss;TopdownL3;tma_L3_group;tma_fetch_latency_group",
+        "MetricExpr": "ICACHE_DATA.STALLS / tma_info_thread_clks",
+        "MetricGroup": "BigFootprint;FetchLat;IcMiss;TopdownL3;tma_L3_group;tma_fetch_latency_group",
         "MetricName": "tma_icache_misses",
         "MetricThreshold": "tma_icache_misses > 0.05 & (tma_fetch_latency > 0.1 & tma_frontend_bound > 0.15)",
         "PublicDescription": "This metric represents fraction of cycles the CPU was stalled due to instruction cache misses. Sample with: FRONTEND_RETIRED.L2_MISS_PS;FRONTEND_RETIRED.L1I_MISS_PS",
     {
         "BriefDescription": "Branch Misprediction Cost: Fraction of TMA slots wasted per non-speculative branch misprediction (retired JEClear)",
         "MetricConstraint": "NO_GROUP_EVENTS",
-        "MetricExpr": "(tma_branch_mispredicts + tma_fetch_latency * tma_mispredicts_resteers / (tma_branch_resteers + tma_dsb_switches + tma_icache_misses + tma_itlb_misses + tma_lcp + tma_ms_switches)) * tma_info_thread_slots / BR_MISP_RETIRED.ALL_BRANCHES",
+        "MetricExpr": "tma_info_bottleneck_mispredictions * tma_info_thread_slots / BR_MISP_RETIRED.ALL_BRANCHES / 100",
         "MetricGroup": "Bad;BrMispredicts;tma_issueBM",
         "MetricName": "tma_info_bad_spec_branch_misprediction_cost",
         "PublicDescription": "Branch Misprediction Cost: Fraction of TMA slots wasted per non-speculative branch misprediction (retired JEClear). Related metrics: tma_branch_mispredicts, tma_info_bottleneck_mispredictions, tma_mispredicts_resteers"
         "MetricName": "tma_info_bad_spec_ipmispredict",
         "MetricThreshold": "tma_info_bad_spec_ipmispredict < 200"
     },
+    {
+        "BriefDescription": "Speculative to Retired ratio of all clears (covering mispredicts and nukes)",
+        "MetricExpr": "INT_MISC.CLEARS_COUNT / (BR_MISP_RETIRED.ALL_BRANCHES + MACHINE_CLEARS.COUNT)",
+        "MetricGroup": "BrMispredicts",
+        "MetricName": "tma_info_bad_spec_spec_clears_ratio"
+    },
     {
         "BriefDescription": "Probability of Core Bound bottleneck hidden by SMT-profiling artifacts",
         "MetricConstraint": "NO_GROUP_EVENTS",
         "MetricThreshold": "tma_info_botlnk_l2_ic_misses > 5",
         "PublicDescription": "Total pipeline cost of Instruction Cache misses - subset of the Big_Code Bottleneck. Related metrics: "
     },
+    {
+        "BriefDescription": "Total pipeline cost of \"useful operations\" - the baseline operations not covered by Branching_Overhead nor Irregular_Overhead.",
+        "MetricExpr": "100 * (tma_retiring - (BR_INST_RETIRED.ALL_BRANCHES + BR_INST_RETIRED.NEAR_CALL) / tma_info_thread_slots - tma_microcode_sequencer / (tma_few_uops_instructions + tma_microcode_sequencer) * (tma_assists / tma_microcode_sequencer) * tma_heavy_operations)",
+        "MetricGroup": "Ret",
+        "MetricName": "tma_info_bottleneck_base_non_br",
+        "MetricThreshold": "tma_info_bottleneck_base_non_br > 20"
+    },
     {
         "BriefDescription": "Total pipeline cost of instruction fetch related bottlenecks by large code footprint programs (i-side cache; TLB and BTB misses)",
         "MetricConstraint": "NO_GROUP_EVENTS",
         "MetricExpr": "100 * tma_fetch_latency * (tma_itlb_misses + tma_icache_misses + tma_unknown_branches) / (tma_branch_resteers + tma_dsb_switches + tma_icache_misses + tma_itlb_misses + tma_lcp + tma_ms_switches)",
-        "MetricGroup": "BigFoot;Fed;Frontend;IcMiss;MemoryTLB;tma_issueBC",
+        "MetricGroup": "BigFootprint;Fed;Frontend;IcMiss;MemoryTLB",
         "MetricName": "tma_info_bottleneck_big_code",
-        "MetricThreshold": "tma_info_bottleneck_big_code > 20",
-        "PublicDescription": "Total pipeline cost of instruction fetch related bottlenecks by large code footprint programs (i-side cache; TLB and BTB misses). Related metrics: tma_info_bottleneck_branching_overhead"
+        "MetricThreshold": "tma_info_bottleneck_big_code > 20"
     },
     {
         "BriefDescription": "Total pipeline cost of branch related instructions (used for program control-flow including function calls)",
-        "MetricExpr": "100 * ((BR_INST_RETIRED.COND + 3 * BR_INST_RETIRED.NEAR_CALL + (BR_INST_RETIRED.NEAR_TAKEN - BR_INST_RETIRED.COND_TAKEN - 2 * BR_INST_RETIRED.NEAR_CALL)) / tma_info_thread_slots)",
-        "MetricGroup": "Ret;tma_issueBC",
+        "MetricExpr": "100 * ((BR_INST_RETIRED.ALL_BRANCHES + BR_INST_RETIRED.NEAR_CALL) / tma_info_thread_slots)",
+        "MetricGroup": "Ret",
         "MetricName": "tma_info_bottleneck_branching_overhead",
-        "MetricThreshold": "tma_info_bottleneck_branching_overhead > 10",
-        "PublicDescription": "Total pipeline cost of branch related instructions (used for program control-flow including function calls). Related metrics: tma_info_bottleneck_big_code"
+        "MetricThreshold": "tma_info_bottleneck_branching_overhead > 5"
+    },
+    {
+        "BriefDescription": "Total pipeline cost of external Memory- or Cache-Bandwidth related bottlenecks",
+        "MetricExpr": "100 * (tma_memory_bound * (tma_dram_bound / (tma_dram_bound + tma_l1_bound + tma_l2_bound + tma_l3_bound + tma_store_bound)) * (tma_mem_bandwidth / (tma_mem_bandwidth + tma_mem_latency)) + tma_memory_bound * (tma_l3_bound / (tma_dram_bound + tma_l1_bound + tma_l2_bound + tma_l3_bound + tma_store_bound)) * (tma_sq_full / (tma_contested_accesses + tma_data_sharing + tma_l3_hit_latency + tma_sq_full)) + tma_memory_bound * (tma_l1_bound / (tma_dram_bound + tma_l1_bound + tma_l2_bound + tma_l3_bound + tma_store_bound)) * (tma_fb_full / (tma_4k_aliasing + tma_dtlb_load + tma_fb_full + tma_lock_latency + tma_split_loads + tma_store_fwd_blk)))",
+        "MetricGroup": "Mem;MemoryBW;Offcore;tma_issueBW",
+        "MetricName": "tma_info_bottleneck_cache_memory_bandwidth",
+        "MetricThreshold": "tma_info_bottleneck_cache_memory_bandwidth > 20",
+        "PublicDescription": "Total pipeline cost of external Memory- or Cache-Bandwidth related bottlenecks. Related metrics: tma_fb_full, tma_info_system_dram_bw_use, tma_mem_bandwidth, tma_sq_full"
+    },
+    {
+        "BriefDescription": "Total pipeline cost of external Memory- or Cache-Latency related bottlenecks",
+        "MetricExpr": "100 * (tma_memory_bound * (tma_dram_bound / (tma_dram_bound + tma_l1_bound + tma_l2_bound + tma_l3_bound + tma_store_bound)) * (tma_mem_latency / (tma_mem_bandwidth + tma_mem_latency)) + tma_memory_bound * (tma_l3_bound / (tma_dram_bound + tma_l1_bound + tma_l2_bound + tma_l3_bound + tma_store_bound)) * (tma_l3_hit_latency / (tma_contested_accesses + tma_data_sharing + tma_l3_hit_latency + tma_sq_full)) + tma_memory_bound * tma_l2_bound / (tma_dram_bound + tma_l1_bound + tma_l2_bound + tma_l3_bound + tma_store_bound) + tma_memory_bound * (tma_store_bound / (tma_dram_bound + tma_l1_bound + tma_l2_bound + tma_l3_bound + tma_store_bound)) * (tma_store_latency / (tma_dtlb_store + tma_false_sharing + tma_split_stores + tma_store_latency + tma_streaming_stores)))",
+        "MetricGroup": "Mem;MemoryLat;Offcore;tma_issueLat",
+        "MetricName": "tma_info_bottleneck_cache_memory_latency",
+        "MetricThreshold": "tma_info_bottleneck_cache_memory_latency > 20",
+        "PublicDescription": "Total pipeline cost of external Memory- or Cache-Latency related bottlenecks. Related metrics: tma_l3_hit_latency, tma_mem_latency"
+    },
+    {
+        "BriefDescription": "Total pipeline cost when the execution is compute-bound - an estimation",
+        "MetricExpr": "100 * (tma_core_bound * tma_divider / (tma_divider + tma_ports_utilization + tma_serializing_operation) + tma_core_bound * (tma_ports_utilization / (tma_divider + tma_ports_utilization + tma_serializing_operation)) * (tma_ports_utilized_3m / (tma_ports_utilized_0 + tma_ports_utilized_1 + tma_ports_utilized_2 + tma_ports_utilized_3m)))",
+        "MetricGroup": "Cor;tma_issueComp",
+        "MetricName": "tma_info_bottleneck_compute_bound_est",
+        "MetricThreshold": "tma_info_bottleneck_compute_bound_est > 20",
+        "PublicDescription": "Total pipeline cost when the execution is compute-bound - an estimation. Covers Core Bound when High ILP as well as when long-latency execution units are busy. Related metrics: "
     },
     {
         "BriefDescription": "Total pipeline cost of instruction fetch bandwidth related bottlenecks",
         "MetricConstraint": "NO_GROUP_EVENTS",
-        "MetricExpr": "100 * (tma_frontend_bound - tma_fetch_latency * tma_mispredicts_resteers / (tma_branch_resteers + tma_dsb_switches + tma_icache_misses + tma_itlb_misses + tma_lcp + tma_ms_switches)) - tma_info_bottleneck_big_code",
+        "MetricExpr": "100 * (tma_frontend_bound - (1 - 10 * tma_microcode_sequencer * tma_other_mispredicts / tma_branch_mispredicts) * tma_fetch_latency * tma_mispredicts_resteers / (tma_branch_resteers + tma_dsb_switches + tma_icache_misses + tma_itlb_misses + tma_lcp + tma_ms_switches) - tma_microcode_sequencer / (tma_few_uops_instructions + tma_microcode_sequencer) * (tma_assists / tma_microcode_sequencer) * tma_fetch_latency * (tma_ms_switches + tma_branch_resteers * (tma_clears_resteers + tma_mispredicts_resteers * (10 * tma_microcode_sequencer * tma_other_mispredicts / tma_branch_mispredicts)) / (tma_clears_resteers + tma_mispredicts_resteers + tma_unknown_branches)) / (tma_branch_resteers + tma_dsb_switches + tma_icache_misses + tma_itlb_misses + tma_lcp + tma_ms_switches)) - tma_info_bottleneck_big_code",
         "MetricGroup": "Fed;FetchBW;Frontend",
         "MetricName": "tma_info_bottleneck_instruction_fetch_bw",
         "MetricThreshold": "tma_info_bottleneck_instruction_fetch_bw > 20"
     },
     {
-        "BriefDescription": "Total pipeline cost of (external) Memory Bandwidth related bottlenecks",
-        "MetricConstraint": "NO_GROUP_EVENTS",
-        "MetricExpr": "100 * tma_memory_bound * (tma_dram_bound / (tma_dram_bound + tma_l1_bound + tma_l2_bound + tma_l3_bound + tma_store_bound) * (tma_mem_bandwidth / (tma_mem_bandwidth + tma_mem_latency)) + tma_l3_bound / (tma_dram_bound + tma_l1_bound + tma_l2_bound + tma_l3_bound + tma_store_bound) * (tma_sq_full / (tma_contested_accesses + tma_data_sharing + tma_l3_hit_latency + tma_sq_full))) + tma_l1_bound / (tma_dram_bound + tma_l1_bound + tma_l2_bound + tma_l3_bound + tma_store_bound) * (tma_fb_full / (tma_4k_aliasing + tma_dtlb_load + tma_fb_full + tma_lock_latency + tma_split_loads + tma_store_fwd_blk))",
-        "MetricGroup": "Mem;MemoryBW;Offcore;tma_issueBW",
-        "MetricName": "tma_info_bottleneck_memory_bandwidth",
-        "MetricThreshold": "tma_info_bottleneck_memory_bandwidth > 20",
-        "PublicDescription": "Total pipeline cost of (external) Memory Bandwidth related bottlenecks. Related metrics: tma_fb_full, tma_info_system_dram_bw_use, tma_mem_bandwidth, tma_sq_full"
+        "BriefDescription": "Total pipeline cost of irregular execution (e.g",
+        "MetricExpr": "100 * (tma_microcode_sequencer / (tma_few_uops_instructions + tma_microcode_sequencer) * (tma_assists / tma_microcode_sequencer) * tma_fetch_latency * (tma_ms_switches + tma_branch_resteers * (tma_clears_resteers + tma_mispredicts_resteers * (10 * tma_microcode_sequencer * tma_other_mispredicts / tma_branch_mispredicts)) / (tma_clears_resteers + tma_mispredicts_resteers + tma_unknown_branches)) / (tma_branch_resteers + tma_dsb_switches + tma_icache_misses + tma_itlb_misses + tma_lcp + tma_ms_switches) + 10 * tma_microcode_sequencer * tma_other_mispredicts / tma_branch_mispredicts * tma_branch_mispredicts + tma_machine_clears * tma_other_nukes / tma_other_nukes + tma_core_bound * (tma_serializing_operation + tma_core_bound * RS_EVENTS.EMPTY_CYCLES / tma_info_thread_clks * tma_ports_utilized_0) / (tma_divider + tma_ports_utilization + tma_serializing_operation) + tma_microcode_sequencer / (tma_few_uops_instructions + tma_microcode_sequencer) * (tma_assists / tma_microcode_sequencer) * tma_heavy_operations)",
+        "MetricGroup": "Bad;Cor;Ret;tma_issueMS",
+        "MetricName": "tma_info_bottleneck_irregular_overhead",
+        "MetricThreshold": "tma_info_bottleneck_irregular_overhead > 10",
+        "PublicDescription": "Total pipeline cost of irregular execution (e.g. FP-assists in HPC, Wait time with work imbalance multithreaded workloads, overhead in system services or virtualized environments). Related metrics: tma_microcode_sequencer, tma_ms_switches"
     },
     {
         "BriefDescription": "Total pipeline cost of Memory Address Translation related bottlenecks (data-side TLBs)",
         "MetricConstraint": "NO_GROUP_EVENTS",
-        "MetricExpr": "100 * tma_memory_bound * (tma_l1_bound / max(tma_memory_bound, tma_dram_bound + tma_l1_bound + tma_l2_bound + tma_l3_bound + tma_store_bound) * (tma_dtlb_load / max(tma_l1_bound, tma_4k_aliasing + tma_dtlb_load + tma_fb_full + tma_lock_latency + tma_split_loads + tma_store_fwd_blk)) + tma_store_bound / (tma_dram_bound + tma_l1_bound + tma_l2_bound + tma_l3_bound + tma_store_bound) * (tma_dtlb_store / (tma_dtlb_store + tma_false_sharing + tma_split_stores + tma_store_latency + tma_streaming_stores)))",
+        "MetricExpr": "100 * (tma_memory_bound * (tma_l1_bound / max(tma_memory_bound, tma_dram_bound + tma_l1_bound + tma_l2_bound + tma_l3_bound + tma_store_bound)) * (tma_dtlb_load / max(tma_l1_bound, tma_4k_aliasing + tma_dtlb_load + tma_fb_full + tma_lock_latency + tma_split_loads + tma_store_fwd_blk)) + tma_memory_bound * (tma_store_bound / (tma_dram_bound + tma_l1_bound + tma_l2_bound + tma_l3_bound + tma_store_bound)) * (tma_dtlb_store / (tma_dtlb_store + tma_false_sharing + tma_split_stores + tma_store_latency + tma_streaming_stores)))",
         "MetricGroup": "Mem;MemoryTLB;Offcore;tma_issueTLB",
         "MetricName": "tma_info_bottleneck_memory_data_tlbs",
         "MetricThreshold": "tma_info_bottleneck_memory_data_tlbs > 20",
-        "PublicDescription": "Total pipeline cost of Memory Address Translation related bottlenecks (data-side TLBs). Related metrics: tma_dtlb_load, tma_dtlb_store"
+        "PublicDescription": "Total pipeline cost of Memory Address Translation related bottlenecks (data-side TLBs). Related metrics: tma_dtlb_load, tma_dtlb_store, tma_info_bottleneck_memory_synchronization"
     },
     {
-        "BriefDescription": "Total pipeline cost of Memory Latency related bottlenecks (external memory and off-core caches)",
-        "MetricConstraint": "NO_GROUP_EVENTS",
-        "MetricExpr": "100 * tma_memory_bound * (tma_dram_bound / (tma_dram_bound + tma_l1_bound + tma_l2_bound + tma_l3_bound + tma_store_bound) * (tma_mem_latency / (tma_mem_bandwidth + tma_mem_latency)) + tma_l3_bound / (tma_dram_bound + tma_l1_bound + tma_l2_bound + tma_l3_bound + tma_store_bound) * (tma_l3_hit_latency / (tma_contested_accesses + tma_data_sharing + tma_l3_hit_latency + tma_sq_full)) + tma_l2_bound / (tma_dram_bound + tma_l1_bound + tma_l2_bound + tma_l3_bound + tma_store_bound))",
-        "MetricGroup": "Mem;MemoryLat;Offcore;tma_issueLat",
-        "MetricName": "tma_info_bottleneck_memory_latency",
-        "MetricThreshold": "tma_info_bottleneck_memory_latency > 20",
-        "PublicDescription": "Total pipeline cost of Memory Latency related bottlenecks (external memory and off-core caches). Related metrics: tma_l3_hit_latency, tma_mem_latency"
+        "BriefDescription": "Total pipeline cost of Memory Synchronization related bottlenecks (data transfers and coherency updates across processors)",
+        "MetricExpr": "100 * (tma_memory_bound * (tma_l3_bound / (tma_dram_bound + tma_l1_bound + tma_l2_bound + tma_l3_bound + tma_store_bound) * (tma_contested_accesses + tma_data_sharing) / (tma_contested_accesses + tma_data_sharing + tma_l3_hit_latency + tma_sq_full) + tma_store_bound / (tma_dram_bound + tma_l1_bound + tma_l2_bound + tma_l3_bound + tma_store_bound) * tma_false_sharing / (tma_dtlb_store + tma_false_sharing + tma_split_stores + tma_store_latency + tma_streaming_stores - tma_store_latency)) + tma_machine_clears * (1 - tma_other_nukes / tma_other_nukes))",
+        "MetricGroup": "Mem;Offcore;tma_issueTLB",
+        "MetricName": "tma_info_bottleneck_memory_synchronization",
+        "MetricThreshold": "tma_info_bottleneck_memory_synchronization > 10",
+        "PublicDescription": "Total pipeline cost of Memory Synchronization related bottlenecks (data transfers and coherency updates across processors). Related metrics: tma_dtlb_load, tma_dtlb_store, tma_info_bottleneck_memory_data_tlbs"
     },
     {
         "BriefDescription": "Total pipeline cost of Branch Misprediction related bottlenecks",
         "MetricConstraint": "NO_GROUP_EVENTS",
-        "MetricExpr": "100 * (tma_branch_mispredicts + tma_fetch_latency * tma_mispredicts_resteers / (tma_branch_resteers + tma_dsb_switches + tma_icache_misses + tma_itlb_misses + tma_lcp + tma_ms_switches))",
+        "MetricExpr": "100 * (1 - 10 * tma_microcode_sequencer * tma_other_mispredicts / tma_branch_mispredicts) * (tma_branch_mispredicts + tma_fetch_latency * tma_mispredicts_resteers / (tma_branch_resteers + tma_dsb_switches + tma_icache_misses + tma_itlb_misses + tma_lcp + tma_ms_switches))",
         "MetricGroup": "Bad;BadSpec;BrMispredicts;tma_issueBM",
         "MetricName": "tma_info_bottleneck_mispredictions",
         "MetricThreshold": "tma_info_bottleneck_mispredictions > 20",
         "PublicDescription": "Total pipeline cost of Branch Misprediction related bottlenecks. Related metrics: tma_branch_mispredicts, tma_info_bad_spec_branch_misprediction_cost, tma_mispredicts_resteers"
     },
+    {
+        "BriefDescription": "Total pipeline cost of remaining bottlenecks (apart from those listed in the Info.Bottlenecks metrics class)",
+        "MetricExpr": "100 - (tma_info_bottleneck_big_code + tma_info_bottleneck_instruction_fetch_bw + tma_info_bottleneck_mispredictions + tma_info_bottleneck_cache_memory_bandwidth + tma_info_bottleneck_cache_memory_latency + tma_info_bottleneck_memory_data_tlbs + tma_info_bottleneck_memory_synchronization + tma_info_bottleneck_compute_bound_est + tma_info_bottleneck_irregular_overhead + tma_info_bottleneck_branching_overhead + tma_info_bottleneck_base_non_br)",
+        "MetricGroup": "Cor;Offcore",
+        "MetricName": "tma_info_bottleneck_other_bottlenecks",
+        "MetricThreshold": "tma_info_bottleneck_other_bottlenecks > 20",
+        "PublicDescription": "Total pipeline cost of remaining bottlenecks (apart from those listed in the Info.Bottlenecks metrics class). Examples include data-dependencies (Core Bound when Low ILP) and other unlisted memory-related stalls."
+    },
     {
         "BriefDescription": "Fraction of branches that are CALL or RET",
         "MetricExpr": "(BR_INST_RETIRED.NEAR_CALL + BR_INST_RETIRED.NEAR_RETURN) / BR_INST_RETIRED.ALL_BRANCHES",
     },
     {
         "BriefDescription": "Core actual clocks when any Logical Processor is active on the Physical Core",
-        "MetricExpr": "CPU_CLK_UNHALTED.DISTRIBUTED",
+        "MetricExpr": "(CPU_CLK_UNHALTED.DISTRIBUTED if #SMT_on else tma_info_thread_clks)",
         "MetricGroup": "SMT",
         "MetricName": "tma_info_core_core_clks"
     },
         "MetricGroup": "Ret;SMT;TmaL1;tma_L1_group",
         "MetricName": "tma_info_core_coreipc"
     },
+    {
+        "BriefDescription": "uops Executed per Cycle",
+        "MetricExpr": "UOPS_EXECUTED.THREAD / tma_info_thread_clks",
+        "MetricGroup": "Power",
+        "MetricName": "tma_info_core_epc"
+    },
     {
         "BriefDescription": "Floating Point Operations Per Cycle",
-        "MetricExpr": "(cpu@FP_ARITH_INST_RETIRED.SCALAR_SINGLE\\,umask\\=0x03@ + 2 * FP_ARITH_INST_RETIRED.128B_PACKED_DOUBLE + 4 * cpu@FP_ARITH_INST_RETIRED.128B_PACKED_SINGLE\\,umask\\=0x18@ + 8 * cpu@FP_ARITH_INST_RETIRED.256B_PACKED_SINGLE\\,umask\\=0x60@ + 16 * FP_ARITH_INST_RETIRED.512B_PACKED_SINGLE) / tma_info_core_core_clks",
+        "MetricExpr": "(FP_ARITH_INST_RETIRED.SCALAR + 2 * FP_ARITH_INST_RETIRED.128B_PACKED_DOUBLE + 4 * FP_ARITH_INST_RETIRED.4_FLOPS + 8 * FP_ARITH_INST_RETIRED.8_FLOPS + 16 * FP_ARITH_INST_RETIRED.512B_PACKED_SINGLE) / tma_info_core_core_clks",
         "MetricGroup": "Flops;Ret",
         "MetricName": "tma_info_core_flopc"
     },
         "PublicDescription": "Actual per-core usage of the Floating Point non-X87 execution units (regardless of precision or vector-width). Values > 1 are possible due to ([BDW+] Fused-Multiply Add (FMA) counting - common; [ADL+] use all of ADD/MUL/FMA in Scalar or 128/256-bit vectors - less common)."
     },
     {
-        "BriefDescription": "Instruction-Level-Parallelism (average number of uops executed when there is execution) per-core",
-        "MetricExpr": "UOPS_EXECUTED.THREAD / (UOPS_EXECUTED.CORE_CYCLES_GE_1 / 2 if #SMT_on else UOPS_EXECUTED.CORE_CYCLES_GE_1)",
+        "BriefDescription": "Instruction-Level-Parallelism (average number of uops executed when there is execution) per thread (logical-processor)",
+        "MetricExpr": "UOPS_EXECUTED.THREAD / cpu@UOPS_EXECUTED.THREAD\\,cmask\\=1@",
         "MetricGroup": "Backend;Cor;Pipeline;PortsUtil",
         "MetricName": "tma_info_core_ilp"
     },
         "MetricGroup": "Flops;InsType",
         "MetricName": "tma_info_inst_mix_iparith",
         "MetricThreshold": "tma_info_inst_mix_iparith < 10",
-        "PublicDescription": "Instructions per FP Arithmetic instruction (lower number means higher occurrence rate). May undercount due to FMA double counting. Approximated prior to BDW."
+        "PublicDescription": "Instructions per FP Arithmetic instruction (lower number means higher occurrence rate). Values < 1 are possible due to intentional FMA double counting. Approximated prior to BDW."
     },
     {
         "BriefDescription": "Instructions per FP Arithmetic AVX/SSE 128-bit instruction (lower number means higher occurrence rate)",
         "MetricGroup": "Flops;FpVector;InsType",
         "MetricName": "tma_info_inst_mix_iparith_avx128",
         "MetricThreshold": "tma_info_inst_mix_iparith_avx128 < 10",
-        "PublicDescription": "Instructions per FP Arithmetic AVX/SSE 128-bit instruction (lower number means higher occurrence rate). May undercount due to FMA double counting."
+        "PublicDescription": "Instructions per FP Arithmetic AVX/SSE 128-bit instruction (lower number means higher occurrence rate). Values < 1 are possible due to intentional FMA double counting."
     },
     {
         "BriefDescription": "Instructions per FP Arithmetic AVX* 256-bit instruction (lower number means higher occurrence rate)",
         "MetricGroup": "Flops;FpVector;InsType",
         "MetricName": "tma_info_inst_mix_iparith_avx256",
         "MetricThreshold": "tma_info_inst_mix_iparith_avx256 < 10",
-        "PublicDescription": "Instructions per FP Arithmetic AVX* 256-bit instruction (lower number means higher occurrence rate). May undercount due to FMA double counting."
+        "PublicDescription": "Instructions per FP Arithmetic AVX* 256-bit instruction (lower number means higher occurrence rate). Values < 1 are possible due to intentional FMA double counting."
     },
     {
         "BriefDescription": "Instructions per FP Arithmetic AVX 512-bit instruction (lower number means higher occurrence rate)",
         "MetricGroup": "Flops;FpVector;InsType",
         "MetricName": "tma_info_inst_mix_iparith_avx512",
         "MetricThreshold": "tma_info_inst_mix_iparith_avx512 < 10",
-        "PublicDescription": "Instructions per FP Arithmetic AVX 512-bit instruction (lower number means higher occurrence rate). May undercount due to FMA double counting."
+        "PublicDescription": "Instructions per FP Arithmetic AVX 512-bit instruction (lower number means higher occurrence rate). Values < 1 are possible due to intentional FMA double counting."
     },
     {
         "BriefDescription": "Instructions per FP Arithmetic Scalar Double-Precision instruction (lower number means higher occurrence rate)",
         "MetricGroup": "Flops;FpScalar;InsType",
         "MetricName": "tma_info_inst_mix_iparith_scalar_dp",
         "MetricThreshold": "tma_info_inst_mix_iparith_scalar_dp < 10",
-        "PublicDescription": "Instructions per FP Arithmetic Scalar Double-Precision instruction (lower number means higher occurrence rate). May undercount due to FMA double counting."
+        "PublicDescription": "Instructions per FP Arithmetic Scalar Double-Precision instruction (lower number means higher occurrence rate). Values < 1 are possible due to intentional FMA double counting."
     },
     {
         "BriefDescription": "Instructions per FP Arithmetic Scalar Single-Precision instruction (lower number means higher occurrence rate)",
         "MetricGroup": "Flops;FpScalar;InsType",
         "MetricName": "tma_info_inst_mix_iparith_scalar_sp",
         "MetricThreshold": "tma_info_inst_mix_iparith_scalar_sp < 10",
-        "PublicDescription": "Instructions per FP Arithmetic Scalar Single-Precision instruction (lower number means higher occurrence rate). May undercount due to FMA double counting."
+        "PublicDescription": "Instructions per FP Arithmetic Scalar Single-Precision instruction (lower number means higher occurrence rate). Values < 1 are possible due to intentional FMA double counting."
     },
     {
         "BriefDescription": "Instructions per Branch (lower number means higher occurrence rate)",
     },
     {
         "BriefDescription": "Instructions per Floating Point (FP) Operation (lower number means higher occurrence rate)",
-        "MetricExpr": "INST_RETIRED.ANY / (cpu@FP_ARITH_INST_RETIRED.SCALAR_SINGLE\\,umask\\=0x03@ + 2 * FP_ARITH_INST_RETIRED.128B_PACKED_DOUBLE + 4 * cpu@FP_ARITH_INST_RETIRED.128B_PACKED_SINGLE\\,umask\\=0x18@ + 8 * cpu@FP_ARITH_INST_RETIRED.256B_PACKED_SINGLE\\,umask\\=0x60@ + 16 * FP_ARITH_INST_RETIRED.512B_PACKED_SINGLE)",
+        "MetricExpr": "INST_RETIRED.ANY / (FP_ARITH_INST_RETIRED.SCALAR + 2 * FP_ARITH_INST_RETIRED.128B_PACKED_DOUBLE + 4 * FP_ARITH_INST_RETIRED.4_FLOPS + 8 * FP_ARITH_INST_RETIRED.8_FLOPS + 16 * FP_ARITH_INST_RETIRED.512B_PACKED_SINGLE)",
         "MetricGroup": "Flops;InsType",
         "MetricName": "tma_info_inst_mix_ipflop",
         "MetricThreshold": "tma_info_inst_mix_ipflop < 10"
         "MetricName": "tma_info_inst_mix_ipload",
         "MetricThreshold": "tma_info_inst_mix_ipload < 3"
     },
+    {
+        "BriefDescription": "Instructions per PAUSE (lower number means higher occurrence rate)",
+        "MetricExpr": "tma_info_inst_mix_instructions / MISC_RETIRED.PAUSE_INST",
+        "MetricGroup": "Flops;FpVector;InsType",
+        "MetricName": "tma_info_inst_mix_ippause"
+    },
     {
         "BriefDescription": "Instructions per Store (lower number means higher occurrence rate)",
         "MetricExpr": "INST_RETIRED.ANY / MEM_INST_RETIRED.ALL_STORES",
     },
     {
         "BriefDescription": "Average per-core data fill bandwidth to the L1 data cache [GB / sec]",
-        "MetricExpr": "64 * L1D.REPLACEMENT / 1e9 / duration_time",
+        "MetricExpr": "tma_info_memory_l1d_cache_fill_bw",
         "MetricGroup": "Mem;MemoryBW",
-        "MetricName": "tma_info_memory_core_l1d_cache_fill_bw"
+        "MetricName": "tma_info_memory_core_l1d_cache_fill_bw_2t"
     },
     {
         "BriefDescription": "Average per-core data fill bandwidth to the L2 cache [GB / sec]",
-        "MetricExpr": "64 * L2_LINES_IN.ALL / 1e9 / duration_time",
+        "MetricExpr": "tma_info_memory_l2_cache_fill_bw",
         "MetricGroup": "Mem;MemoryBW",
-        "MetricName": "tma_info_memory_core_l2_cache_fill_bw"
+        "MetricName": "tma_info_memory_core_l2_cache_fill_bw_2t"
     },
     {
         "BriefDescription": "Average per-core data access bandwidth to the L3 cache [GB / sec]",
-        "MetricExpr": "64 * OFFCORE_REQUESTS.ALL_REQUESTS / 1e9 / duration_time",
+        "MetricExpr": "tma_info_memory_l3_cache_access_bw",
         "MetricGroup": "Mem;MemoryBW;Offcore",
-        "MetricName": "tma_info_memory_core_l3_cache_access_bw"
+        "MetricName": "tma_info_memory_core_l3_cache_access_bw_2t"
     },
     {
         "BriefDescription": "Average per-core data fill bandwidth to the L3 cache [GB / sec]",
-        "MetricExpr": "64 * LONGEST_LAT_CACHE.MISS / 1e9 / duration_time",
+        "MetricExpr": "tma_info_memory_l3_cache_fill_bw",
         "MetricGroup": "Mem;MemoryBW",
-        "MetricName": "tma_info_memory_core_l3_cache_fill_bw"
+        "MetricName": "tma_info_memory_core_l3_cache_fill_bw_2t"
     },
     {
         "BriefDescription": "Fill Buffer (FB) hits per kilo instructions for retired demand loads (L1D misses that merge into ongoing miss-handling entries)",
         "MetricExpr": "1e3 * MEM_LOAD_RETIRED.FB_HIT / INST_RETIRED.ANY",
-        "MetricGroup": "CacheMisses;Mem",
+        "MetricGroup": "CacheHits;Mem",
         "MetricName": "tma_info_memory_fb_hpki"
     },
+    {
+        "BriefDescription": "",
+        "MetricExpr": "64 * L1D.REPLACEMENT / 1e9 / duration_time",
+        "MetricGroup": "Mem;MemoryBW",
+        "MetricName": "tma_info_memory_l1d_cache_fill_bw"
+    },
     {
         "BriefDescription": "L1 cache true misses per kilo instruction for retired demand loads",
         "MetricExpr": "1e3 * MEM_LOAD_RETIRED.L1_MISS / INST_RETIRED.ANY",
-        "MetricGroup": "CacheMisses;Mem",
+        "MetricGroup": "CacheHits;Mem",
         "MetricName": "tma_info_memory_l1mpki"
     },
     {
         "BriefDescription": "L1 cache true misses per kilo instruction for all demand loads (including speculative)",
         "MetricExpr": "1e3 * L2_RQSTS.ALL_DEMAND_DATA_RD / INST_RETIRED.ANY",
-        "MetricGroup": "CacheMisses;Mem",
+        "MetricGroup": "CacheHits;Mem",
         "MetricName": "tma_info_memory_l1mpki_load"
     },
+    {
+        "BriefDescription": "",
+        "MetricExpr": "64 * L2_LINES_IN.ALL / 1e9 / duration_time",
+        "MetricGroup": "Mem;MemoryBW",
+        "MetricName": "tma_info_memory_l2_cache_fill_bw"
+    },
     {
         "BriefDescription": "L2 cache hits per kilo instruction for all request types (including speculative)",
         "MetricExpr": "1e3 * (L2_RQSTS.REFERENCES - L2_RQSTS.MISS) / INST_RETIRED.ANY",
-        "MetricGroup": "CacheMisses;Mem",
+        "MetricGroup": "CacheHits;Mem",
         "MetricName": "tma_info_memory_l2hpki_all"
     },
     {
         "BriefDescription": "L2 cache hits per kilo instruction for all demand loads  (including speculative)",
         "MetricExpr": "1e3 * L2_RQSTS.DEMAND_DATA_RD_HIT / INST_RETIRED.ANY",
-        "MetricGroup": "CacheMisses;Mem",
+        "MetricGroup": "CacheHits;Mem",
         "MetricName": "tma_info_memory_l2hpki_load"
     },
     {
         "BriefDescription": "L2 cache true misses per kilo instruction for retired demand loads",
         "MetricExpr": "1e3 * MEM_LOAD_RETIRED.L2_MISS / INST_RETIRED.ANY",
-        "MetricGroup": "Backend;CacheMisses;Mem",
+        "MetricGroup": "Backend;CacheHits;Mem",
         "MetricName": "tma_info_memory_l2mpki"
     },
     {
         "BriefDescription": "L2 cache ([RKL+] true) misses per kilo instruction for all request types (including speculative)",
         "MetricExpr": "1e3 * L2_RQSTS.MISS / INST_RETIRED.ANY",
-        "MetricGroup": "CacheMisses;Mem;Offcore",
+        "MetricGroup": "CacheHits;Mem;Offcore",
         "MetricName": "tma_info_memory_l2mpki_all"
     },
     {
         "BriefDescription": "L2 cache ([RKL+] true) misses per kilo instruction for all demand loads  (including speculative)",
         "MetricExpr": "1e3 * L2_RQSTS.DEMAND_DATA_RD_MISS / INST_RETIRED.ANY",
-        "MetricGroup": "CacheMisses;Mem",
+        "MetricGroup": "CacheHits;Mem",
         "MetricName": "tma_info_memory_l2mpki_load"
     },
     {
-        "BriefDescription": "L3 cache true misses per kilo instruction for retired demand loads",
-        "MetricExpr": "1e3 * MEM_LOAD_RETIRED.L3_MISS / INST_RETIRED.ANY",
-        "MetricGroup": "CacheMisses;Mem",
-        "MetricName": "tma_info_memory_l3mpki"
+        "BriefDescription": "",
+        "MetricExpr": "64 * OFFCORE_REQUESTS.ALL_REQUESTS / 1e9 / duration_time",
+        "MetricGroup": "Mem;MemoryBW;Offcore",
+        "MetricName": "tma_info_memory_l3_cache_access_bw"
     },
     {
-        "BriefDescription": "Actual Average Latency for L1 data-cache miss demand load operations (in core cycles)",
-        "MetricExpr": "L1D_PEND_MISS.PENDING / (MEM_LOAD_RETIRED.L1_MISS + MEM_LOAD_RETIRED.FB_HIT)",
-        "MetricGroup": "Mem;MemoryBound;MemoryLat",
-        "MetricName": "tma_info_memory_load_miss_real_latency"
+        "BriefDescription": "",
+        "MetricExpr": "64 * LONGEST_LAT_CACHE.MISS / 1e9 / duration_time",
+        "MetricGroup": "Mem;MemoryBW",
+        "MetricName": "tma_info_memory_l3_cache_fill_bw"
     },
     {
-        "BriefDescription": "Memory-Level-Parallelism (average number of L1 miss demand load when there is at least one such miss",
-        "MetricExpr": "L1D_PEND_MISS.PENDING / L1D_PEND_MISS.PENDING_CYCLES",
-        "MetricGroup": "Mem;MemoryBW;MemoryBound",
-        "MetricName": "tma_info_memory_mlp",
-        "PublicDescription": "Memory-Level-Parallelism (average number of L1 miss demand load when there is at least one such miss. Per-Logical Processor)"
+        "BriefDescription": "L3 cache true misses per kilo instruction for retired demand loads",
+        "MetricExpr": "1e3 * MEM_LOAD_RETIRED.L3_MISS / INST_RETIRED.ANY",
+        "MetricGroup": "Mem",
+        "MetricName": "tma_info_memory_l3mpki"
     },
     {
         "BriefDescription": "Average Parallel L2 cache miss data reads",
         "MetricExpr": "OFFCORE_REQUESTS_OUTSTANDING.ALL_DATA_RD / OFFCORE_REQUESTS_OUTSTANDING.CYCLES_WITH_DATA_RD",
         "MetricGroup": "Memory_BW;Offcore",
-        "MetricName": "tma_info_memory_oro_data_l2_mlp"
+        "MetricName": "tma_info_memory_latency_data_l2_mlp"
     },
     {
         "BriefDescription": "Average Latency for L2 cache miss demand Loads",
         "MetricExpr": "OFFCORE_REQUESTS_OUTSTANDING.DEMAND_DATA_RD / OFFCORE_REQUESTS.DEMAND_DATA_RD",
         "MetricGroup": "Memory_Lat;Offcore",
-        "MetricName": "tma_info_memory_oro_load_l2_miss_latency"
+        "MetricName": "tma_info_memory_latency_load_l2_miss_latency"
     },
     {
         "BriefDescription": "Average Parallel L2 cache miss demand Loads",
         "MetricExpr": "OFFCORE_REQUESTS_OUTSTANDING.DEMAND_DATA_RD / cpu@OFFCORE_REQUESTS_OUTSTANDING.DEMAND_DATA_RD\\,cmask\\=1@",
         "MetricGroup": "Memory_BW;Offcore",
-        "MetricName": "tma_info_memory_oro_load_l2_mlp"
+        "MetricName": "tma_info_memory_latency_load_l2_mlp"
     },
     {
         "BriefDescription": "Average Latency for L3 cache miss demand Loads",
         "MetricExpr": "cpu@OFFCORE_REQUESTS_OUTSTANDING.DEMAND_DATA_RD\\,umask\\=0x10@ / OFFCORE_REQUESTS.L3_MISS_DEMAND_DATA_RD",
         "MetricGroup": "Memory_Lat;Offcore",
-        "MetricName": "tma_info_memory_oro_load_l3_miss_latency"
+        "MetricName": "tma_info_memory_latency_load_l3_miss_latency"
     },
     {
-        "BriefDescription": "Average per-thread data fill bandwidth to the L1 data cache [GB / sec]",
-        "MetricExpr": "tma_info_memory_core_l1d_cache_fill_bw",
-        "MetricGroup": "Mem;MemoryBW",
-        "MetricName": "tma_info_memory_thread_l1d_cache_fill_bw_1t"
+        "BriefDescription": "Actual Average Latency for L1 data-cache miss demand load operations (in core cycles)",
+        "MetricExpr": "L1D_PEND_MISS.PENDING / (MEM_LOAD_RETIRED.L1_MISS + MEM_LOAD_RETIRED.FB_HIT)",
+        "MetricGroup": "Mem;MemoryBound;MemoryLat",
+        "MetricName": "tma_info_memory_load_miss_real_latency"
     },
     {
-        "BriefDescription": "Average per-thread data fill bandwidth to the L2 cache [GB / sec]",
-        "MetricExpr": "tma_info_memory_core_l2_cache_fill_bw",
-        "MetricGroup": "Mem;MemoryBW",
-        "MetricName": "tma_info_memory_thread_l2_cache_fill_bw_1t"
+        "BriefDescription": "\"Bus lock\" per kilo instruction",
+        "MetricExpr": "1e3 * SQ_MISC.BUS_LOCK / INST_RETIRED.ANY",
+        "MetricGroup": "Mem",
+        "MetricName": "tma_info_memory_mix_bus_lock_pki"
     },
     {
-        "BriefDescription": "Average per-thread data access bandwidth to the L3 cache [GB / sec]",
-        "MetricExpr": "tma_info_memory_core_l3_cache_access_bw",
-        "MetricGroup": "Mem;MemoryBW;Offcore",
-        "MetricName": "tma_info_memory_thread_l3_cache_access_bw_1t"
+        "BriefDescription": "Un-cacheable retired load per kilo instruction",
+        "MetricExpr": "1e3 * MEM_LOAD_MISC_RETIRED.UC / INST_RETIRED.ANY",
+        "MetricGroup": "Mem",
+        "MetricName": "tma_info_memory_mix_uc_load_pki"
     },
     {
-        "BriefDescription": "Average per-thread data fill bandwidth to the L3 cache [GB / sec]",
-        "MetricExpr": "tma_info_memory_core_l3_cache_fill_bw",
-        "MetricGroup": "Mem;MemoryBW",
-        "MetricName": "tma_info_memory_thread_l3_cache_fill_bw_1t"
+        "BriefDescription": "Memory-Level-Parallelism (average number of L1 miss demand load when there is at least one such miss",
+        "MetricExpr": "L1D_PEND_MISS.PENDING / L1D_PEND_MISS.PENDING_CYCLES",
+        "MetricGroup": "Mem;MemoryBW;MemoryBound",
+        "MetricName": "tma_info_memory_mlp",
+        "PublicDescription": "Memory-Level-Parallelism (average number of L1 miss demand load when there is at least one such miss. Per-Logical Processor)"
     },
     {
         "BriefDescription": "STLB (2nd level TLB) code speculative misses per kilo instruction (misses of any page-size that complete the page walk)",
         "MetricName": "tma_info_memory_tlb_store_stlb_mpki"
     },
     {
-        "BriefDescription": "Instruction-Level-Parallelism (average number of uops executed when there is execution) per-thread",
-        "MetricExpr": "UOPS_EXECUTED.THREAD / cpu@UOPS_EXECUTED.THREAD\\,cmask\\=1@",
+        "BriefDescription": "",
+        "MetricExpr": "UOPS_EXECUTED.THREAD / (UOPS_EXECUTED.CORE_CYCLES_GE_1 / 2 if #SMT_on else cpu@UOPS_EXECUTED.THREAD\\,cmask\\=1@)",
         "MetricGroup": "Cor;Pipeline;PortsUtil;SMT",
         "MetricName": "tma_info_pipeline_execute"
     },
+    {
+        "BriefDescription": "Instructions per a microcode Assist invocation",
+        "MetricExpr": "INST_RETIRED.ANY / ASSISTS.ANY",
+        "MetricGroup": "MicroSeq;Pipeline;Ret;Retire",
+        "MetricName": "tma_info_pipeline_ipassist",
+        "MetricThreshold": "tma_info_pipeline_ipassist < 100e3",
+        "PublicDescription": "Instructions per a microcode Assist invocation. See Assists tree node for details (lower number means higher occurrence rate)"
+    },
     {
         "BriefDescription": "Average number of Uops retired in cycles where at least one uop has retired.",
         "MetricExpr": "tma_retiring * tma_info_thread_slots / cpu@UOPS_RETIRED.SLOTS\\,cmask\\=1@",
         "MetricName": "tma_info_pipeline_retire"
     },
     {
-        "BriefDescription": "Measured Average Frequency for unhalted processors [GHz]",
+        "BriefDescription": "Measured Average Core Frequency for unhalted processors [GHz]",
         "MetricExpr": "tma_info_system_turbo_utilization * TSC / 1e9 / duration_time",
         "MetricGroup": "Power;Summary",
-        "MetricName": "tma_info_system_average_frequency"
+        "MetricName": "tma_info_system_core_frequency"
     },
     {
-        "BriefDescription": "Average CPU Utilization",
+        "BriefDescription": "Average CPU Utilization (percentage)",
         "MetricExpr": "CPU_CLK_UNHALTED.REF_TSC / TSC",
         "MetricGroup": "HPC;Summary",
         "MetricName": "tma_info_system_cpu_utilization"
     },
+    {
+        "BriefDescription": "Average number of utilized CPUs",
+        "MetricExpr": "#num_cpus_online * tma_info_system_cpu_utilization",
+        "MetricGroup": "Summary",
+        "MetricName": "tma_info_system_cpus_utilized"
+    },
     {
         "BriefDescription": "Average external Memory Bandwidth Use for reads and writes [GB / sec]",
         "MetricExpr": "64 * (arb@event\\=0x81\\,umask\\=0x1@ + arb@event\\=0x84\\,umask\\=0x1@) / 1e6 / duration_time / 1e3",
-        "MetricGroup": "HPC;Mem;MemoryBW;SoC;tma_issueBW",
+        "MetricGroup": "HPC;MemOffcore;MemoryBW;SoC;tma_issueBW",
         "MetricName": "tma_info_system_dram_bw_use",
-        "PublicDescription": "Average external Memory Bandwidth Use for reads and writes [GB / sec]. Related metrics: tma_fb_full, tma_info_bottleneck_memory_bandwidth, tma_mem_bandwidth, tma_sq_full"
+        "PublicDescription": "Average external Memory Bandwidth Use for reads and writes [GB / sec]. Related metrics: tma_fb_full, tma_info_bottleneck_cache_memory_bandwidth, tma_mem_bandwidth, tma_sq_full"
     },
     {
         "BriefDescription": "Giga Floating Point Operations Per Second",
-        "MetricExpr": "(cpu@FP_ARITH_INST_RETIRED.SCALAR_SINGLE\\,umask\\=0x03@ + 2 * FP_ARITH_INST_RETIRED.128B_PACKED_DOUBLE + 4 * cpu@FP_ARITH_INST_RETIRED.128B_PACKED_SINGLE\\,umask\\=0x18@ + 8 * cpu@FP_ARITH_INST_RETIRED.256B_PACKED_SINGLE\\,umask\\=0x60@ + 16 * FP_ARITH_INST_RETIRED.512B_PACKED_SINGLE) / 1e9 / duration_time",
+        "MetricExpr": "(FP_ARITH_INST_RETIRED.SCALAR + 2 * FP_ARITH_INST_RETIRED.128B_PACKED_DOUBLE + 4 * FP_ARITH_INST_RETIRED.4_FLOPS + 8 * FP_ARITH_INST_RETIRED.8_FLOPS + 16 * FP_ARITH_INST_RETIRED.512B_PACKED_SINGLE) / 1e9 / duration_time",
         "MetricGroup": "Cor;Flops;HPC",
         "MetricName": "tma_info_system_gflops",
-        "PublicDescription": "Giga Floating Point Operations Per Second. Aggregate across all supported options of: FP precisions, scalar and vector instructions, vector-width and AMX engine."
+        "PublicDescription": "Giga Floating Point Operations Per Second. Aggregate across all supported options of: FP precisions, scalar and vector instructions, vector-width"
     },
     {
         "BriefDescription": "Instructions per Far Branch ( Far Branches apply upon transition from application to operating system, handling interrupts, exceptions) [lower number means higher occurrence rate]",
         "MetricName": "tma_info_system_mem_read_latency",
         "PublicDescription": "Average latency of data read request to external memory (in nanoseconds). Accounts for demand loads and L1/L2 prefetches. ([RKL+]memory-controller only)"
     },
-    {
-        "BriefDescription": "Average latency of all requests to external memory (in Uncore cycles)",
-        "MetricExpr": "(UNC_ARB_TRK_OCCUPANCY.ALL + UNC_ARB_DAT_OCCUPANCY.RD) / arb@event\\=0x81\\,umask\\=0x1@",
-        "MetricGroup": "Mem;SoC",
-        "MetricName": "tma_info_system_mem_request_latency"
-    },
     {
         "BriefDescription": "Fraction of Core cycles where the core was running with power-delivery for baseline license level 0",
         "MetricExpr": "CORE_POWER.LVL0_TURBO_LICENSE / tma_info_core_core_clks",
     },
     {
         "BriefDescription": "This metric represents fraction of cycles the CPU was stalled due to Instruction TLB (ITLB) misses",
-        "MetricExpr": "ICACHE_64B.IFTAG_STALL / tma_info_thread_clks",
-        "MetricGroup": "BigFoot;FetchLat;MemoryTLB;TopdownL3;tma_L3_group;tma_fetch_latency_group",
+        "MetricExpr": "ICACHE_TAG.STALLS / tma_info_thread_clks",
+        "MetricGroup": "BigFootprint;FetchLat;MemoryTLB;TopdownL3;tma_L3_group;tma_fetch_latency_group",
         "MetricName": "tma_itlb_misses",
         "MetricThreshold": "tma_itlb_misses > 0.05 & (tma_fetch_latency > 0.1 & tma_frontend_bound > 0.15)",
         "PublicDescription": "This metric represents fraction of cycles the CPU was stalled due to Instruction TLB (ITLB) misses. Sample with: FRONTEND_RETIRED.STLB_MISS_PS;FRONTEND_RETIRED.ITLB_MISS_PS",
     {
         "BriefDescription": "This metric estimates how often the CPU was stalled without loads missing the L1 data cache",
         "MetricExpr": "max((CYCLE_ACTIVITY.STALLS_MEM_ANY - CYCLE_ACTIVITY.STALLS_L1D_MISS) / tma_info_thread_clks, 0)",
-        "MetricGroup": "CacheMisses;MemoryBound;TmaL3mem;TopdownL3;tma_L3_group;tma_issueL1;tma_issueMC;tma_memory_bound_group",
+        "MetricGroup": "CacheHits;MemoryBound;TmaL3mem;TopdownL3;tma_L3_group;tma_issueL1;tma_issueMC;tma_memory_bound_group",
         "MetricName": "tma_l1_bound",
         "MetricThreshold": "tma_l1_bound > 0.1 & (tma_memory_bound > 0.2 & tma_backend_bound > 0.2)",
         "PublicDescription": "This metric estimates how often the CPU was stalled without loads missing the L1 data cache.  The L1 data cache typically has the shortest latency.  However; in certain cases like loads blocked on older stores; a load might suffer due to high latency even though it is being satisfied by the L1. Another example is loads who miss in the TLB. These cases are characterized by execution unit stalls; while some non-completed demand load lives in the machine without having that demand load missing the L1 cache. Sample with: MEM_LOAD_RETIRED.L1_HIT_PS;MEM_LOAD_RETIRED.FB_HIT_PS. Related metrics: tma_clears_resteers, tma_machine_clears, tma_microcode_sequencer, tma_ms_switches, tma_ports_utilized_1",
         "BriefDescription": "This metric estimates how often the CPU was stalled due to L2 cache accesses by loads",
         "MetricConstraint": "NO_GROUP_EVENTS",
         "MetricExpr": "MEM_LOAD_RETIRED.L2_HIT * (1 + MEM_LOAD_RETIRED.FB_HIT / MEM_LOAD_RETIRED.L1_MISS) / (MEM_LOAD_RETIRED.L2_HIT * (1 + MEM_LOAD_RETIRED.FB_HIT / MEM_LOAD_RETIRED.L1_MISS) + L1D_PEND_MISS.FB_FULL_PERIODS) * ((CYCLE_ACTIVITY.STALLS_L1D_MISS - CYCLE_ACTIVITY.STALLS_L2_MISS) / tma_info_thread_clks)",
-        "MetricGroup": "CacheMisses;MemoryBound;TmaL3mem;TopdownL3;tma_L3_group;tma_memory_bound_group",
+        "MetricGroup": "CacheHits;MemoryBound;TmaL3mem;TopdownL3;tma_L3_group;tma_memory_bound_group",
         "MetricName": "tma_l2_bound",
         "MetricThreshold": "tma_l2_bound > 0.05 & (tma_memory_bound > 0.2 & tma_backend_bound > 0.2)",
         "PublicDescription": "This metric estimates how often the CPU was stalled due to L2 cache accesses by loads.  Avoiding cache misses (i.e. L1 misses/L2 hits) can improve the latency and increase performance. Sample with: MEM_LOAD_RETIRED.L2_HIT_PS",
         "BriefDescription": "This metric estimates how often the CPU was stalled due to loads accesses to L3 cache or contended with a sibling Core",
         "MetricConstraint": "NO_GROUP_EVENTS_NMI",
         "MetricExpr": "(CYCLE_ACTIVITY.STALLS_L2_MISS - CYCLE_ACTIVITY.STALLS_L3_MISS) / tma_info_thread_clks",
-        "MetricGroup": "CacheMisses;MemoryBound;TmaL3mem;TopdownL3;tma_L3_group;tma_memory_bound_group",
+        "MetricGroup": "CacheHits;MemoryBound;TmaL3mem;TopdownL3;tma_L3_group;tma_memory_bound_group",
         "MetricName": "tma_l3_bound",
         "MetricThreshold": "tma_l3_bound > 0.05 & (tma_memory_bound > 0.2 & tma_backend_bound > 0.2)",
         "PublicDescription": "This metric estimates how often the CPU was stalled due to loads accesses to L3 cache or contended with a sibling Core.  Avoiding cache misses (i.e. L2 misses/L3 hits) can improve the latency and increase performance. Sample with: MEM_LOAD_RETIRED.L3_HIT_PS",
         "ScaleUnit": "100%"
     },
     {
-        "BriefDescription": "This metric represents fraction of cycles with demand load accesses that hit the L3 cache under unloaded scenarios (possibly L3 latency limited)",
-        "MetricExpr": "17.5 * tma_info_system_average_frequency * MEM_LOAD_RETIRED.L3_HIT * (1 + MEM_LOAD_RETIRED.FB_HIT / MEM_LOAD_RETIRED.L1_MISS / 2) / tma_info_thread_clks",
+        "BriefDescription": "This metric estimates fraction of cycles with demand load accesses that hit the L3 cache under unloaded scenarios (possibly L3 latency limited)",
+        "MetricExpr": "17.5 * tma_info_system_core_frequency * (MEM_LOAD_RETIRED.L3_HIT * (1 + MEM_LOAD_RETIRED.FB_HIT / MEM_LOAD_RETIRED.L1_MISS / 2)) / tma_info_thread_clks",
         "MetricGroup": "MemoryLat;TopdownL4;tma_L4_group;tma_issueLat;tma_l3_bound_group",
         "MetricName": "tma_l3_hit_latency",
         "MetricThreshold": "tma_l3_hit_latency > 0.1 & (tma_l3_bound > 0.05 & (tma_memory_bound > 0.2 & tma_backend_bound > 0.2))",
-        "PublicDescription": "This metric represents fraction of cycles with demand load accesses that hit the L3 cache under unloaded scenarios (possibly L3 latency limited).  Avoiding private cache misses (i.e. L2 misses/L3 hits) will improve the latency; reduce contention with sibling physical cores and increase performance.  Note the value of this node may overlap with its siblings. Sample with: MEM_LOAD_RETIRED.L3_HIT_PS. Related metrics: tma_info_bottleneck_memory_latency, tma_mem_latency",
+        "PublicDescription": "This metric estimates fraction of cycles with demand load accesses that hit the L3 cache under unloaded scenarios (possibly L3 latency limited).  Avoiding private cache misses (i.e. L2 misses/L3 hits) will improve the latency; reduce contention with sibling physical cores and increase performance.  Note the value of this node may overlap with its siblings. Sample with: MEM_LOAD_RETIRED.L3_HIT_PS. Related metrics: tma_info_bottleneck_cache_memory_latency, tma_mem_latency",
         "ScaleUnit": "100%"
     },
     {
         "BriefDescription": "This metric represents fraction of cycles CPU was stalled due to Length Changing Prefixes (LCPs)",
-        "MetricExpr": "ILD_STALL.LCP / tma_info_thread_clks",
+        "MetricExpr": "DECODE.LCP / tma_info_thread_clks",
         "MetricGroup": "FetchLat;TopdownL3;tma_L3_group;tma_fetch_latency_group;tma_issueFB",
         "MetricName": "tma_lcp",
         "MetricThreshold": "tma_lcp > 0.05 & (tma_fetch_latency > 0.1 & tma_frontend_bound > 0.15)",
         "MetricName": "tma_light_operations",
         "MetricThreshold": "tma_light_operations > 0.6",
         "MetricgroupNoGroup": "TopdownL2",
-        "PublicDescription": "This metric represents fraction of slots where the CPU was retiring light-weight operations -- instructions that require no more than one uop (micro-operation). This correlates with total number of instructions used by the program. A uops-per-instruction (see UopPI metric) ratio of 1 or less should be expected for decently optimized software running on Intel Core/Xeon products. While this often indicates efficient X86 instructions were executed; high value does not necessarily mean better performance cannot be achieved. Sample with: INST_RETIRED.PREC_DIST",
+        "PublicDescription": "This metric represents fraction of slots where the CPU was retiring light-weight operations -- instructions that require no more than one uop (micro-operation). This correlates with total number of instructions used by the program. A uops-per-instruction (see UopPI metric) ratio of 1 or less should be expected for decently optimized code running on Intel Core/Xeon products. While this often indicates efficient X86 instructions were executed; high value does not necessarily mean better performance cannot be achieved. ([ICL+] Note this may undercount due to approximation using indirect events; [ADL+] .). Sample with: INST_RETIRED.PREC_DIST",
         "ScaleUnit": "100%"
     },
     {
         "MetricExpr": "(LSD.CYCLES_ACTIVE - LSD.CYCLES_OK) / tma_info_core_core_clks / 2",
         "MetricGroup": "FetchBW;LSD;TopdownL3;tma_L3_group;tma_fetch_bandwidth_group",
         "MetricName": "tma_lsd",
-        "MetricThreshold": "tma_lsd > 0.15 & (tma_fetch_bandwidth > 0.1 & tma_frontend_bound > 0.15 & tma_info_thread_ipc / 5 > 0.35)",
+        "MetricThreshold": "tma_lsd > 0.15 & tma_fetch_bandwidth > 0.2",
         "PublicDescription": "This metric represents Core fraction of cycles in which CPU was likely limited due to LSD (Loop Stream Detector) unit.  LSD typically does well sustaining Uop supply. However; in some rare cases; optimal uop-delivery could not be reached for small loops whose size (in terms of number of uops) does not suit well the LSD structure.",
         "ScaleUnit": "100%"
     },
         "ScaleUnit": "100%"
     },
     {
-        "BriefDescription": "This metric estimates fraction of cycles where the core's performance was likely hurt due to approaching bandwidth limits of external memory (DRAM)",
+        "BriefDescription": "This metric estimates fraction of cycles where the core's performance was likely hurt due to approaching bandwidth limits of external memory - DRAM ([SPR-HBM] and/or HBM)",
         "MetricExpr": "min(CPU_CLK_UNHALTED.THREAD, cpu@OFFCORE_REQUESTS_OUTSTANDING.ALL_DATA_RD\\,cmask\\=4@) / tma_info_thread_clks",
         "MetricGroup": "MemoryBW;Offcore;TopdownL4;tma_L4_group;tma_dram_bound_group;tma_issueBW",
         "MetricName": "tma_mem_bandwidth",
         "MetricThreshold": "tma_mem_bandwidth > 0.2 & (tma_dram_bound > 0.1 & (tma_memory_bound > 0.2 & tma_backend_bound > 0.2))",
-        "PublicDescription": "This metric estimates fraction of cycles where the core's performance was likely hurt due to approaching bandwidth limits of external memory (DRAM).  The underlying heuristic assumes that a similar off-core traffic is generated by all IA cores. This metric does not aggregate non-data-read requests by this logical processor; requests from other IA Logical Processors/Physical Cores/sockets; or other non-IA devices like GPU; hence the maximum external memory bandwidth limits may or may not be approached when this metric is flagged (see Uncore counters for that). Related metrics: tma_fb_full, tma_info_bottleneck_memory_bandwidth, tma_info_system_dram_bw_use, tma_sq_full",
+        "PublicDescription": "This metric estimates fraction of cycles where the core's performance was likely hurt due to approaching bandwidth limits of external memory - DRAM ([SPR-HBM] and/or HBM).  The underlying heuristic assumes that a similar off-core traffic is generated by all IA cores. This metric does not aggregate non-data-read requests by this logical processor; requests from other IA Logical Processors/Physical Cores/sockets; or other non-IA devices like GPU; hence the maximum external memory bandwidth limits may or may not be approached when this metric is flagged (see Uncore counters for that). Related metrics: tma_fb_full, tma_info_bottleneck_cache_memory_bandwidth, tma_info_system_dram_bw_use, tma_sq_full",
         "ScaleUnit": "100%"
     },
     {
-        "BriefDescription": "This metric estimates fraction of cycles where the performance was likely hurt due to latency from external memory (DRAM)",
+        "BriefDescription": "This metric estimates fraction of cycles where the performance was likely hurt due to latency from external memory - DRAM ([SPR-HBM] and/or HBM)",
         "MetricExpr": "min(CPU_CLK_UNHALTED.THREAD, OFFCORE_REQUESTS_OUTSTANDING.CYCLES_WITH_DATA_RD) / tma_info_thread_clks - tma_mem_bandwidth",
         "MetricGroup": "MemoryLat;Offcore;TopdownL4;tma_L4_group;tma_dram_bound_group;tma_issueLat",
         "MetricName": "tma_mem_latency",
         "MetricThreshold": "tma_mem_latency > 0.1 & (tma_dram_bound > 0.1 & (tma_memory_bound > 0.2 & tma_backend_bound > 0.2))",
-        "PublicDescription": "This metric estimates fraction of cycles where the performance was likely hurt due to latency from external memory (DRAM).  This metric does not aggregate requests from other Logical Processors/Physical Cores/sockets (see Uncore counters for that). Related metrics: tma_info_bottleneck_memory_latency, tma_l3_hit_latency",
+        "PublicDescription": "This metric estimates fraction of cycles where the performance was likely hurt due to latency from external memory - DRAM ([SPR-HBM] and/or HBM).  This metric does not aggregate requests from other Logical Processors/Physical Cores/sockets (see Uncore counters for that). Related metrics: tma_info_bottleneck_cache_memory_latency, tma_l3_hit_latency",
         "ScaleUnit": "100%"
     },
     {
     },
     {
         "BriefDescription": "This metric represents fraction of slots the CPU was retiring uops fetched by the Microcode Sequencer (MS) unit",
-        "MetricExpr": "tma_retiring * tma_info_thread_slots / UOPS_ISSUED.ANY * IDQ.MS_UOPS / tma_info_thread_slots",
+        "MetricExpr": "UOPS_RETIRED.SLOTS / UOPS_ISSUED.ANY * IDQ.MS_UOPS / tma_info_thread_slots",
         "MetricGroup": "MicroSeq;TopdownL3;tma_L3_group;tma_heavy_operations_group;tma_issueMC;tma_issueMS",
         "MetricName": "tma_microcode_sequencer",
         "MetricThreshold": "tma_microcode_sequencer > 0.05 & tma_heavy_operations > 0.1",
-        "PublicDescription": "This metric represents fraction of slots the CPU was retiring uops fetched by the Microcode Sequencer (MS) unit.  The MS is used for CISC instructions not supported by the default decoders (like repeat move strings; or CPUID); or by microcode assists used to address some operation modes (like in Floating Point assists). These cases can often be avoided. Sample with: IDQ.MS_UOPS. Related metrics: tma_clears_resteers, tma_l1_bound, tma_machine_clears, tma_ms_switches",
+        "PublicDescription": "This metric represents fraction of slots the CPU was retiring uops fetched by the Microcode Sequencer (MS) unit.  The MS is used for CISC instructions not supported by the default decoders (like repeat move strings; or CPUID); or by microcode assists used to address some operation modes (like in Floating Point assists). These cases can often be avoided. Sample with: IDQ.MS_UOPS. Related metrics: tma_clears_resteers, tma_info_bottleneck_irregular_overhead, tma_l1_bound, tma_machine_clears, tma_ms_switches",
         "ScaleUnit": "100%"
     },
     {
         "MetricExpr": "(IDQ.MITE_CYCLES_ANY - IDQ.MITE_CYCLES_OK) / tma_info_core_core_clks / 2",
         "MetricGroup": "DSBmiss;FetchBW;TopdownL3;tma_L3_group;tma_fetch_bandwidth_group",
         "MetricName": "tma_mite",
-        "MetricThreshold": "tma_mite > 0.1 & (tma_fetch_bandwidth > 0.1 & tma_frontend_bound > 0.15 & tma_info_thread_ipc / 5 > 0.35)",
+        "MetricThreshold": "tma_mite > 0.1 & tma_fetch_bandwidth > 0.2",
         "PublicDescription": "This metric represents Core fraction of cycles in which CPU was likely limited due to the MITE pipeline (the legacy decode pipeline). This pipeline is used for code that was not pre-cached in the DSB or LSD. For example; inefficiencies due to asymmetric decoders; use of long immediate or LCP can manifest as MITE fetch bandwidth bottleneck. Sample with: FRONTEND_RETIRED.ANY_DSB_MISS",
         "ScaleUnit": "100%"
     },
         "MetricExpr": "(cpu@IDQ.MITE_UOPS\\,cmask\\=4@ - cpu@IDQ.MITE_UOPS\\,cmask\\=5@) / tma_info_thread_clks",
         "MetricGroup": "DSBmiss;FetchBW;TopdownL4;tma_L4_group;tma_mite_group",
         "MetricName": "tma_mite_4wide",
-        "MetricThreshold": "tma_mite_4wide > 0.05 & (tma_mite > 0.1 & (tma_fetch_bandwidth > 0.1 & tma_frontend_bound > 0.15 & tma_info_thread_ipc / 5 > 0.35))",
+        "MetricThreshold": "tma_mite_4wide > 0.05 & (tma_mite > 0.1 & tma_fetch_bandwidth > 0.2)",
         "ScaleUnit": "100%"
     },
     {
-        "BriefDescription": "The Mixing_Vectors metric gives the percentage of injected blend uops out of all uops issued",
+        "BriefDescription": "This metric estimates penalty in terms of percentage of([SKL+] injected blend uops out of all Uops Issued -- the Count Domain; [ADL+] cycles)",
         "MetricExpr": "UOPS_ISSUED.VECTOR_WIDTH_MISMATCH / UOPS_ISSUED.ANY",
         "MetricGroup": "TopdownL5;tma_L5_group;tma_issueMV;tma_ports_utilized_0_group",
         "MetricName": "tma_mixing_vectors",
         "MetricThreshold": "tma_mixing_vectors > 0.05",
-        "PublicDescription": "The Mixing_Vectors metric gives the percentage of injected blend uops out of all uops issued. Usually a Mixing_Vectors over 5% is worth investigating. Read more in Appendix B1 of the Optimizations Guide for this topic. Related metrics: tma_ms_switches",
+        "PublicDescription": "This metric estimates penalty in terms of percentage of([SKL+] injected blend uops out of all Uops Issued -- the Count Domain; [ADL+] cycles). Usually a Mixing_Vectors over 5% is worth investigating. Read more in Appendix B1 of the Optimizations Guide for this topic. Related metrics: tma_ms_switches",
         "ScaleUnit": "100%"
     },
     {
         "MetricGroup": "FetchLat;MicroSeq;TopdownL3;tma_L3_group;tma_fetch_latency_group;tma_issueMC;tma_issueMS;tma_issueMV;tma_issueSO",
         "MetricName": "tma_ms_switches",
         "MetricThreshold": "tma_ms_switches > 0.05 & (tma_fetch_latency > 0.1 & tma_frontend_bound > 0.15)",
-        "PublicDescription": "This metric estimates the fraction of cycles when the CPU was stalled due to switches of uop delivery to the Microcode Sequencer (MS). Commonly used instructions are optimized for delivery by the DSB (decoded i-cache) or MITE (legacy instruction decode) pipelines. Certain operations cannot be handled natively by the execution pipeline; and must be performed by microcode (small programs injected into the execution stream). Switching to the MS too often can negatively impact performance. The MS is designated to deliver long uop flows required by CISC instructions like CPUID; or uncommon conditions like Floating Point Assists when dealing with Denormals. Sample with: IDQ.MS_SWITCHES. Related metrics: tma_clears_resteers, tma_l1_bound, tma_machine_clears, tma_microcode_sequencer, tma_mixing_vectors, tma_serializing_operation",
+        "PublicDescription": "This metric estimates the fraction of cycles when the CPU was stalled due to switches of uop delivery to the Microcode Sequencer (MS). Commonly used instructions are optimized for delivery by the DSB (decoded i-cache) or MITE (legacy instruction decode) pipelines. Certain operations cannot be handled natively by the execution pipeline; and must be performed by microcode (small programs injected into the execution stream). Switching to the MS too often can negatively impact performance. The MS is designated to deliver long uop flows required by CISC instructions like CPUID; or uncommon conditions like Floating Point Assists when dealing with Denormals. Sample with: IDQ.MS_SWITCHES. Related metrics: tma_clears_resteers, tma_info_bottleneck_irregular_overhead, tma_l1_bound, tma_machine_clears, tma_microcode_sequencer, tma_mixing_vectors, tma_serializing_operation",
         "ScaleUnit": "100%"
     },
     {
         "BriefDescription": "This metric represents fraction of slots where the CPU was retiring NOP (no op) instructions",
         "MetricExpr": "tma_light_operations * INST_RETIRED.NOP / (tma_retiring * tma_info_thread_slots)",
-        "MetricGroup": "Pipeline;TopdownL3;tma_L3_group;tma_light_operations_group",
+        "MetricGroup": "Pipeline;TopdownL4;tma_L4_group;tma_other_light_ops_group",
         "MetricName": "tma_nop_instructions",
-        "MetricThreshold": "tma_nop_instructions > 0.1 & tma_light_operations > 0.6",
+        "MetricThreshold": "tma_nop_instructions > 0.1 & (tma_other_light_ops > 0.3 & tma_light_operations > 0.6)",
         "PublicDescription": "This metric represents fraction of slots where the CPU was retiring NOP (no op) instructions. Compilers often use NOPs for certain address alignments - e.g. start address of a function or loop body. Sample with: INST_RETIRED.NOP",
         "ScaleUnit": "100%"
     },
     {
         "BriefDescription": "This metric represents the remaining light uops fraction the CPU has executed - remaining means not covered by other sibling nodes",
         "MetricConstraint": "NO_GROUP_EVENTS",
-        "MetricExpr": "max(0, tma_light_operations - (tma_fp_arith + tma_memory_operations + tma_branch_instructions + tma_nop_instructions))",
+        "MetricExpr": "max(0, tma_light_operations - (tma_fp_arith + tma_memory_operations + tma_branch_instructions))",
         "MetricGroup": "Pipeline;TopdownL3;tma_L3_group;tma_light_operations_group",
         "MetricName": "tma_other_light_ops",
         "MetricThreshold": "tma_other_light_ops > 0.3 & tma_light_operations > 0.6",
         "PublicDescription": "This metric represents the remaining light uops fraction the CPU has executed - remaining means not covered by other sibling nodes. May undercount due to FMA double counting",
         "ScaleUnit": "100%"
     },
+    {
+        "BriefDescription": "This metric estimates fraction of slots the CPU was stalled due to other cases of misprediction (non-retired x86 branches or other types).",
+        "MetricExpr": "max(tma_branch_mispredicts * (1 - BR_MISP_RETIRED.ALL_BRANCHES / (INT_MISC.CLEARS_COUNT - MACHINE_CLEARS.COUNT)), 0.0001)",
+        "MetricGroup": "BrMispredicts;TopdownL3;tma_L3_group;tma_branch_mispredicts_group",
+        "MetricName": "tma_other_mispredicts",
+        "MetricThreshold": "tma_other_mispredicts > 0.05 & (tma_branch_mispredicts > 0.1 & tma_bad_speculation > 0.15)",
+        "ScaleUnit": "100%"
+    },
+    {
+        "BriefDescription": "This metric represents fraction of slots the CPU has wasted due to Nukes (Machine Clears) not related to memory ordering.",
+        "MetricExpr": "max(tma_machine_clears * (1 - MACHINE_CLEARS.MEMORY_ORDERING / MACHINE_CLEARS.COUNT), 0.0001)",
+        "MetricGroup": "Machine_Clears;TopdownL3;tma_L3_group;tma_machine_clears_group",
+        "MetricName": "tma_other_nukes",
+        "MetricThreshold": "tma_other_nukes > 0.05 & (tma_machine_clears > 0.1 & tma_bad_speculation > 0.15)",
+        "ScaleUnit": "100%"
+    },
     {
         "BriefDescription": "This metric represents Core fraction of cycles CPU dispatched uops on execution port 0 ([SNB+] ALU; [HSW+] ALU and 2nd branch)",
         "MetricExpr": "UOPS_DISPATCHED.PORT_0 / tma_info_core_core_clks",
         "ScaleUnit": "100%"
     },
     {
-        "BriefDescription": "This metric represents Core fraction of cycles CPU dispatched uops on execution port 6 ([HSW+]Primary Branch and simple ALU)",
+        "BriefDescription": "This metric represents Core fraction of cycles CPU dispatched uops on execution port 6 ([HSW+] Primary Branch and simple ALU)",
         "MetricExpr": "UOPS_DISPATCHED.PORT_6 / tma_info_core_core_clks",
         "MetricGroup": "TopdownL6;tma_L6_group;tma_alu_op_utilization_group;tma_issue2P",
         "MetricName": "tma_port_6",
         "MetricThreshold": "tma_port_6 > 0.6",
-        "PublicDescription": "This metric represents Core fraction of cycles CPU dispatched uops on execution port 6 ([HSW+]Primary Branch and simple ALU). Sample with: UOPS_DISPATCHED.PORT_6. Related metrics: tma_fp_scalar, tma_fp_vector, tma_fp_vector_128b, tma_fp_vector_256b, tma_fp_vector_512b, tma_port_0, tma_port_1, tma_port_5, tma_ports_utilized_2",
+        "PublicDescription": "This metric represents Core fraction of cycles CPU dispatched uops on execution port 6 ([HSW+] Primary Branch and simple ALU). Sample with: UOPS_DISPATCHED.PORT_6. Related metrics: tma_fp_scalar, tma_fp_vector, tma_fp_vector_128b, tma_fp_vector_256b, tma_fp_vector_512b, tma_port_0, tma_port_1, tma_port_5, tma_ports_utilized_2",
         "ScaleUnit": "100%"
     },
     {
         "BriefDescription": "This metric estimates fraction of cycles the CPU performance was potentially limited due to Core computation issues (non divider-related)",
-        "MetricExpr": "((cpu@EXE_ACTIVITY.3_PORTS_UTIL\\,umask\\=0x80@ + tma_serializing_operation * (CYCLE_ACTIVITY.STALLS_TOTAL - CYCLE_ACTIVITY.STALLS_MEM_ANY) + (EXE_ACTIVITY.1_PORTS_UTIL + tma_retiring * EXE_ACTIVITY.2_PORTS_UTIL)) / tma_info_thread_clks if ARITH.DIVIDER_ACTIVE < CYCLE_ACTIVITY.STALLS_TOTAL - CYCLE_ACTIVITY.STALLS_MEM_ANY else (EXE_ACTIVITY.1_PORTS_UTIL + tma_retiring * EXE_ACTIVITY.2_PORTS_UTIL) / tma_info_thread_clks)",
+        "MetricExpr": "((tma_ports_utilized_0 * tma_info_thread_clks + (EXE_ACTIVITY.1_PORTS_UTIL + tma_retiring * EXE_ACTIVITY.2_PORTS_UTIL)) / tma_info_thread_clks if ARITH.DIVIDER_ACTIVE < CYCLE_ACTIVITY.STALLS_TOTAL - CYCLE_ACTIVITY.STALLS_MEM_ANY else (EXE_ACTIVITY.1_PORTS_UTIL + tma_retiring * EXE_ACTIVITY.2_PORTS_UTIL) / tma_info_thread_clks)",
         "MetricGroup": "PortsUtil;TopdownL3;tma_L3_group;tma_core_bound_group",
         "MetricName": "tma_ports_utilization",
         "MetricThreshold": "tma_ports_utilization > 0.15 & (tma_core_bound > 0.1 & tma_backend_bound > 0.2)",
     },
     {
         "BriefDescription": "This metric represents fraction of cycles CPU executed no uops on any execution port (Logical Processor cycles since ICL, Physical Core cycles otherwise)",
-        "MetricExpr": "cpu@EXE_ACTIVITY.3_PORTS_UTIL\\,umask\\=0x80@ / tma_info_thread_clks + tma_serializing_operation * (CYCLE_ACTIVITY.STALLS_TOTAL - CYCLE_ACTIVITY.STALLS_MEM_ANY) / tma_info_thread_clks",
+        "MetricExpr": "(cpu@EXE_ACTIVITY.3_PORTS_UTIL\\,umask\\=0x80@ + tma_core_bound * RS_EVENTS.EMPTY_CYCLES) / tma_info_thread_clks * (CYCLE_ACTIVITY.STALLS_TOTAL - CYCLE_ACTIVITY.STALLS_MEM_ANY) / tma_info_thread_clks",
         "MetricGroup": "PortsUtil;TopdownL4;tma_L4_group;tma_ports_utilization_group",
         "MetricName": "tma_ports_utilized_0",
         "MetricThreshold": "tma_ports_utilized_0 > 0.2 & (tma_ports_utilization > 0.15 & (tma_core_bound > 0.1 & tma_backend_bound > 0.2))",
         "MetricExpr": "UOPS_EXECUTED.CYCLES_GE_3 / tma_info_thread_clks",
         "MetricGroup": "PortsUtil;TopdownL4;tma_L4_group;tma_ports_utilization_group",
         "MetricName": "tma_ports_utilized_3m",
-        "MetricThreshold": "tma_ports_utilized_3m > 0.7 & (tma_ports_utilization > 0.15 & (tma_core_bound > 0.1 & tma_backend_bound > 0.2))",
+        "MetricThreshold": "tma_ports_utilized_3m > 0.4 & (tma_ports_utilization > 0.15 & (tma_core_bound > 0.1 & tma_backend_bound > 0.2))",
         "PublicDescription": "This metric represents fraction of cycles CPU executed total of 3 or more uops per cycle on all execution ports (Logical Processor cycles since ICL, Physical Core cycles otherwise). Sample with: UOPS_EXECUTED.CYCLES_GE_3",
         "ScaleUnit": "100%"
     },
     {
         "BriefDescription": "This metric represents fraction of cycles the CPU issue-pipeline was stalled due to serializing operations",
         "MetricExpr": "RESOURCE_STALLS.SCOREBOARD / tma_info_thread_clks",
-        "MetricGroup": "PortsUtil;TopdownL5;tma_L5_group;tma_issueSO;tma_ports_utilized_0_group",
+        "MetricGroup": "PortsUtil;TopdownL3;tma_L3_group;tma_core_bound_group;tma_issueSO",
         "MetricName": "tma_serializing_operation",
-        "MetricThreshold": "tma_serializing_operation > 0.1 & (tma_ports_utilized_0 > 0.2 & (tma_ports_utilization > 0.15 & (tma_core_bound > 0.1 & tma_backend_bound > 0.2)))",
+        "MetricThreshold": "tma_serializing_operation > 0.1 & (tma_core_bound > 0.1 & tma_backend_bound > 0.2)",
         "PublicDescription": "This metric represents fraction of cycles the CPU issue-pipeline was stalled due to serializing operations. Instructions like CPUID; WRMSR or LFENCE serialize the out-of-order execution which may limit performance. Sample with: RESOURCE_STALLS.SCOREBOARD. Related metrics: tma_ms_switches",
         "ScaleUnit": "100%"
     },
     {
         "BriefDescription": "This metric represents fraction of cycles the CPU was stalled due to PAUSE Instructions",
         "MetricExpr": "140 * MISC_RETIRED.PAUSE_INST / tma_info_thread_clks",
-        "MetricGroup": "TopdownL6;tma_L6_group;tma_serializing_operation_group",
+        "MetricGroup": "TopdownL4;tma_L4_group;tma_serializing_operation_group",
         "MetricName": "tma_slow_pause",
-        "MetricThreshold": "tma_slow_pause > 0.05 & (tma_serializing_operation > 0.1 & (tma_ports_utilized_0 > 0.2 & (tma_ports_utilization > 0.15 & (tma_core_bound > 0.1 & tma_backend_bound > 0.2))))",
+        "MetricThreshold": "tma_slow_pause > 0.05 & (tma_serializing_operation > 0.1 & (tma_core_bound > 0.1 & tma_backend_bound > 0.2))",
         "PublicDescription": "This metric represents fraction of cycles the CPU was stalled due to PAUSE Instructions. Sample with: MISC_RETIRED.PAUSE_INST",
         "ScaleUnit": "100%"
     },
         "MetricGroup": "MemoryBW;Offcore;TopdownL4;tma_L4_group;tma_issueBW;tma_l3_bound_group",
         "MetricName": "tma_sq_full",
         "MetricThreshold": "tma_sq_full > 0.3 & (tma_l3_bound > 0.05 & (tma_memory_bound > 0.2 & tma_backend_bound > 0.2))",
-        "PublicDescription": "This metric measures fraction of cycles where the Super Queue (SQ) was full taking into account all request-types and both hardware SMT threads (Logical Processors). Related metrics: tma_fb_full, tma_info_bottleneck_memory_bandwidth, tma_info_system_dram_bw_use, tma_mem_bandwidth",
+        "PublicDescription": "This metric measures fraction of cycles where the Super Queue (SQ) was full taking into account all request-types and both hardware SMT threads (Logical Processors). Related metrics: tma_fb_full, tma_info_bottleneck_cache_memory_bandwidth, tma_info_system_dram_bw_use, tma_mem_bandwidth",
         "ScaleUnit": "100%"
     },
     {
     {
         "BriefDescription": "This metric represents fraction of cycles the CPU was stalled due to new branch address clears",
         "MetricExpr": "10 * BACLEARS.ANY / tma_info_thread_clks",
-        "MetricGroup": "BigFoot;FetchLat;TopdownL4;tma_L4_group;tma_branch_resteers_group",
+        "MetricGroup": "BigFootprint;FetchLat;TopdownL4;tma_L4_group;tma_branch_resteers_group",
         "MetricName": "tma_unknown_branches",
         "MetricThreshold": "tma_unknown_branches > 0.05 & (tma_branch_resteers > 0.05 & (tma_fetch_latency > 0.1 & tma_frontend_bound > 0.15))",
-        "PublicDescription": "This metric represents fraction of cycles the CPU was stalled due to new branch address clears. These are fetched branches the Branch Prediction Unit was unable to recognize (e.g. first time the branch is fetched or hitting BTB capacity limit). Sample with: BACLEARS.ANY",
+        "PublicDescription": "This metric represents fraction of cycles the CPU was stalled due to new branch address clears. These are fetched branches the Branch Prediction Unit was unable to recognize (e.g. first time the branch is fetched or hitting BTB capacity limit) hence called Unknown Branches. Sample with: BACLEARS.ANY",
         "ScaleUnit": "100%"
     },
     {
index eed1b90a277946b8b25120448fb28e14bfc786f6..48f23acc76c0b35a270a4c7c86c18ff010a4efe3 100644 (file)
@@ -25,6 +25,7 @@
     },
     {
         "BriefDescription": "This event is deprecated. Refer to new event UNC_ARB_REQ_TRK_REQUEST.DRD",
+        "Deprecated": "1",
         "EventCode": "0x81",
         "EventName": "UNC_ARB_DAT_REQUESTS.RD",
         "PerPkg": "1",
@@ -33,6 +34,7 @@
     },
     {
         "BriefDescription": "This event is deprecated. Refer to new event UNC_ARB_DAT_OCCUPANCY.ALL",
+        "Deprecated": "1",
         "EventCode": "0x85",
         "EventName": "UNC_ARB_IFA_OCCUPANCY.ALL",
         "PerPkg": "1",
index 53ab050c8fa436f584867c707a91a1ae985aa567..e42efc16723e955a7e3f85b24dfa580d39515237 100755 (executable)
@@ -203,7 +203,7 @@ class JsonEvent:
 
     def llx(x: int) -> str:
       """Convert an int to a string similar to a printf modifier of %#llx."""
-      return '0' if x == 0 else hex(x)
+      return str(x) if x >= 0 and x < 10 else hex(x)
 
     def fixdesc(s: str) -> str:
       """Fix formatting issue for the desc string."""
@@ -294,6 +294,23 @@ class JsonEvent:
       }
       return table[unit] if unit in table else f'uncore_{unit.lower()}'
 
+    def is_zero(val: str) -> bool:
+        try:
+            if val.startswith('0x'):
+                return int(val, 16) == 0
+            else:
+                return int(val) == 0
+        except e:
+            return False
+
+    def canonicalize_value(val: str) -> str:
+        try:
+            if val.startswith('0x'):
+                return llx(int(val, 16))
+            return str(int(val))
+        except e:
+            return val
+
     eventcode = 0
     if 'EventCode' in jd:
       eventcode = int(jd['EventCode'].split(',', 1)[0], 0)
@@ -356,10 +373,14 @@ class JsonEvent:
         ('UMask', 'umask='),
         ('NodeType', 'type='),
         ('RdWrMask', 'rdwrmask='),
+        ('EnAllCores', 'enallcores='),
+        ('EnAllSlices', 'enallslices='),
+        ('SliceId', 'sliceid='),
+        ('ThreadMask', 'threadmask='),
     ]
     for key, value in event_fields:
-      if key in jd and jd[key] != '0':
-        event += ',' + value + jd[key]
+      if key in jd and not is_zero(jd[key]):
+        event += f',{value}{canonicalize_value(jd[key])}'
     if filter:
       event += f',{filter}'
     if msr:
index 53ba9c3e20e05782eb47e7368795b5869f263be9..c7f9d96760959e0fdf1380afa86ddb8c615fedf7 100644 (file)
@@ -1,7 +1,7 @@
 # SPDX-License-Identifier: GPL-2.0
 
 perf-y += builtin-test.o
-perf-y += builtin-test-list.o
+perf-y += tests-scripts.o
 perf-y += parse-events.o
 perf-y += dso-data.o
 perf-y += attr.o
diff --git a/tools/perf/tests/builtin-test-list.c b/tools/perf/tests/builtin-test-list.c
deleted file mode 100644 (file)
index a65b9e5..0000000
+++ /dev/null
@@ -1,207 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0 */
-#include <dirent.h>
-#include <errno.h>
-#include <fcntl.h>
-#include <linux/ctype.h>
-#include <linux/kernel.h>
-#include <linux/string.h>
-#include <linux/zalloc.h>
-#include <string.h>
-#include <stdlib.h>
-#include <sys/types.h>
-#include <unistd.h>
-#include <subcmd/exec-cmd.h>
-#include <subcmd/parse-options.h>
-#include <sys/wait.h>
-#include <sys/stat.h>
-#include "builtin.h"
-#include "builtin-test-list.h"
-#include "color.h"
-#include "debug.h"
-#include "hist.h"
-#include "intlist.h"
-#include "string2.h"
-#include "symbol.h"
-#include "tests.h"
-#include "util/rlimit.h"
-
-
-/*
- * As this is a singleton built once for the run of the process, there is
- * no value in trying to free it and just let it stay around until process
- * exits when it's cleaned up.
- */
-static size_t files_num = 0;
-static struct script_file *files = NULL;
-static int files_max_width = 0;
-
-static const char *shell_tests__dir(char *path, size_t size)
-{
-       const char *devel_dirs[] = { "./tools/perf/tests", "./tests", };
-       char *exec_path;
-       unsigned int i;
-
-       for (i = 0; i < ARRAY_SIZE(devel_dirs); ++i) {
-               struct stat st;
-
-               if (!lstat(devel_dirs[i], &st)) {
-                       scnprintf(path, size, "%s/shell", devel_dirs[i]);
-                       if (!lstat(devel_dirs[i], &st))
-                               return path;
-               }
-       }
-
-       /* Then installed path. */
-       exec_path = get_argv_exec_path();
-       scnprintf(path, size, "%s/tests/shell", exec_path);
-       free(exec_path);
-       return path;
-}
-
-static const char *shell_test__description(char *description, size_t size,
-                                           const char *path, const char *name)
-{
-       FILE *fp;
-       char filename[PATH_MAX];
-       int ch;
-
-       path__join(filename, sizeof(filename), path, name);
-       fp = fopen(filename, "r");
-       if (!fp)
-               return NULL;
-
-       /* Skip first line - should be #!/bin/sh Shebang */
-       do {
-               ch = fgetc(fp);
-       } while (ch != EOF && ch != '\n');
-
-       description = fgets(description, size, fp);
-       fclose(fp);
-
-       /* Assume first char on line is omment everything after that desc */
-       return description ? strim(description + 1) : NULL;
-}
-
-/* Is this full file path a shell script */
-static bool is_shell_script(const char *path)
-{
-       const char *ext;
-
-       ext = strrchr(path, '.');
-       if (!ext)
-               return false;
-       if (!strcmp(ext, ".sh")) { /* Has .sh extension */
-               if (access(path, R_OK | X_OK) == 0) /* Is executable */
-                       return true;
-       }
-       return false;
-}
-
-/* Is this file in this dir a shell script (for test purposes) */
-static bool is_test_script(const char *path, const char *name)
-{
-       char filename[PATH_MAX];
-
-       path__join(filename, sizeof(filename), path, name);
-       if (!is_shell_script(filename)) return false;
-       return true;
-}
-
-/* Duplicate a string and fall over and die if we run out of memory */
-static char *strdup_check(const char *str)
-{
-       char *newstr;
-
-       newstr = strdup(str);
-       if (!newstr) {
-               pr_err("Out of memory while duplicating test script string\n");
-               abort();
-       }
-       return newstr;
-}
-
-static void append_script(const char *dir, const char *file, const char *desc)
-{
-       struct script_file *files_tmp;
-       size_t files_num_tmp;
-       int width;
-
-       files_num_tmp = files_num + 1;
-       if (files_num_tmp >= SIZE_MAX) {
-               pr_err("Too many script files\n");
-               abort();
-       }
-       /* Realloc is good enough, though we could realloc by chunks, not that
-        * anyone will ever measure performance here */
-       files_tmp = realloc(files,
-                           (files_num_tmp + 1) * sizeof(struct script_file));
-       if (files_tmp == NULL) {
-               pr_err("Out of memory while building test list\n");
-               abort();
-       }
-       /* Add file to end and NULL terminate the struct array */
-       files = files_tmp;
-       files_num = files_num_tmp;
-       files[files_num - 1].dir = strdup_check(dir);
-       files[files_num - 1].file = strdup_check(file);
-       files[files_num - 1].desc = strdup_check(desc);
-       files[files_num].dir = NULL;
-       files[files_num].file = NULL;
-       files[files_num].desc = NULL;
-
-       width = strlen(desc); /* Track max width of desc */
-       if (width > files_max_width)
-               files_max_width = width;
-}
-
-static void append_scripts_in_dir(const char *path)
-{
-       struct dirent **entlist;
-       struct dirent *ent;
-       int n_dirs, i;
-       char filename[PATH_MAX];
-
-       /* List files, sorted by alpha */
-       n_dirs = scandir(path, &entlist, NULL, alphasort);
-       if (n_dirs == -1)
-               return;
-       for (i = 0; i < n_dirs && (ent = entlist[i]); i++) {
-               if (ent->d_name[0] == '.')
-                       continue; /* Skip hidden files */
-               if (is_test_script(path, ent->d_name)) { /* It's a test */
-                       char bf[256];
-                       const char *desc = shell_test__description
-                               (bf, sizeof(bf), path, ent->d_name);
-
-                       if (desc) /* It has a desc line - valid script */
-                               append_script(path, ent->d_name, desc);
-               } else if (is_directory(path, ent)) { /* Scan the subdir */
-                       path__join(filename, sizeof(filename),
-                                  path, ent->d_name);
-                       append_scripts_in_dir(filename);
-               }
-       }
-       for (i = 0; i < n_dirs; i++) /* Clean up */
-               zfree(&entlist[i]);
-       free(entlist);
-}
-
-const struct script_file *list_script_files(void)
-{
-       char path_dir[PATH_MAX];
-       const char *path;
-
-       if (files)
-               return files; /* Singleton - we already know our list */
-
-       path = shell_tests__dir(path_dir, sizeof(path_dir)); /* Walk  dir */
-       append_scripts_in_dir(path);
-
-       return files;
-}
-
-int list_script_max_width(void)
-{
-       list_script_files(); /* Ensure we have scanned all scripts */
-       return files_max_width;
-}
diff --git a/tools/perf/tests/builtin-test-list.h b/tools/perf/tests/builtin-test-list.h
deleted file mode 100644 (file)
index eb81f3a..0000000
+++ /dev/null
@@ -1,12 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0 */
-
-struct script_file {
-       char *dir;
-       char *file;
-       char *desc;
-};
-
-/* List available script tests to run - singleton - never freed */
-const struct script_file *list_script_files(void);
-/* Get maximum width of description string */
-int list_script_max_width(void);
index 4a5973f9bb9b370f1bc966f04e1efdd7b03ef64d..d13ee7683d9d835495eeaf3413c3cea1dd56c313 100644 (file)
@@ -6,6 +6,7 @@
  */
 #include <fcntl.h>
 #include <errno.h>
+#include <poll.h>
 #include <unistd.h>
 #include <string.h>
 #include <stdlib.h>
 #include "debug.h"
 #include "color.h"
 #include <subcmd/parse-options.h>
+#include <subcmd/run-command.h>
 #include "string2.h"
 #include "symbol.h"
 #include "util/rlimit.h"
+#include "util/strbuf.h"
 #include <linux/kernel.h>
 #include <linux/string.h>
 #include <subcmd/exec-cmd.h>
 #include <linux/zalloc.h>
 
-#include "builtin-test-list.h"
+#include "tests-scripts.h"
 
+/*
+ * Command line option to not fork the test running in the same process and
+ * making them easier to debug.
+ */
 static bool dont_fork;
+/* Fork the tests in parallel and then wait for their completion. */
+static bool parallel;
 const char *dso_to_test;
 const char *test_objdump_path = "objdump";
 
@@ -130,6 +139,7 @@ static struct test_suite *generic_tests[] = {
 static struct test_suite **tests[] = {
        generic_tests,
        arch_tests,
+       NULL, /* shell tests created at runtime. */
 };
 
 static struct test_workload *workloads[] = {
@@ -208,76 +218,36 @@ static bool perf_test__matches(const char *desc, int curr, int argc, const char
        return false;
 }
 
-static int run_test(struct test_suite *test, int subtest)
-{
-       int status, err = -1, child = dont_fork ? 0 : fork();
-       char sbuf[STRERR_BUFSIZE];
-
-       if (child < 0) {
-               pr_err("failed to fork test: %s\n",
-                       str_error_r(errno, sbuf, sizeof(sbuf)));
-               return -1;
-       }
-
-       if (!child) {
-               if (!dont_fork) {
-                       pr_debug("test child forked, pid %d\n", getpid());
-
-                       if (verbose <= 0) {
-                               int nullfd = open("/dev/null", O_WRONLY);
-
-                               if (nullfd >= 0) {
-                                       close(STDERR_FILENO);
-                                       close(STDOUT_FILENO);
-
-                                       dup2(nullfd, STDOUT_FILENO);
-                                       dup2(STDOUT_FILENO, STDERR_FILENO);
-                                       close(nullfd);
-                               }
-                       } else {
-                               signal(SIGSEGV, sighandler_dump_stack);
-                               signal(SIGFPE, sighandler_dump_stack);
-                       }
-               }
-
-               err = test_function(test, subtest)(test, subtest);
-               if (!dont_fork)
-                       exit(err);
-       }
-
-       if (!dont_fork) {
-               wait(&status);
+struct child_test {
+       struct child_process process;
+       struct test_suite *test;
+       int test_num;
+       int subtest;
+};
 
-               if (WIFEXITED(status)) {
-                       err = (signed char)WEXITSTATUS(status);
-                       pr_debug("test child finished with %d\n", err);
-               } else if (WIFSIGNALED(status)) {
-                       err = -1;
-                       pr_debug("test child interrupted\n");
-               }
-       }
+static int run_test_child(struct child_process *process)
+{
+       struct child_test *child = container_of(process, struct child_test, process);
+       int err;
 
-       return err;
+       pr_debug("--- start ---\n");
+       pr_debug("test child forked, pid %d\n", getpid());
+       err = test_function(child->test, child->subtest)(child->test, child->subtest);
+       pr_debug("---- end(%d) ----\n", err);
+       fflush(NULL);
+       return -err;
 }
 
-#define for_each_test(j, k, t)                 \
-       for (j = 0, k = 0; j < ARRAY_SIZE(tests); j++, k = 0)   \
-               while ((t = tests[j][k++]) != NULL)
-
-static int test_and_print(struct test_suite *t, int subtest)
+static int print_test_result(struct test_suite *t, int i, int subtest, int result, int width)
 {
-       int err;
-
-       pr_debug("\n--- start ---\n");
-       err = run_test(t, subtest);
-       pr_debug("---- end ----\n");
+       if (has_subtests(t)) {
+               int subw = width > 2 ? width - 2 : width;
 
-       if (!has_subtests(t))
-               pr_debug("%s:", t->desc);
-       else
-               pr_debug("%s subtest %d:", t->desc, subtest + 1);
+               pr_info("%3d.%1d: %-*s:", i + 1, subtest + 1, subw, test_description(t, subtest));
+       } else
+               pr_info("%3d: %-*s:", i + 1, width, test_description(t, subtest));
 
-       switch (err) {
+       switch (result) {
        case TEST_OK:
                pr_info(" Ok\n");
                break;
@@ -296,99 +266,186 @@ static int test_and_print(struct test_suite *t, int subtest)
                break;
        }
 
-       return err;
+       return 0;
 }
 
-struct shell_test {
-       const char *dir;
-       const char *file;
-};
-
-static int shell_test__run(struct test_suite *test, int subdir __maybe_unused)
+static int finish_test(struct child_test *child_test, int width)
 {
-       int err;
-       char script[PATH_MAX];
-       struct shell_test *st = test->priv;
+       struct test_suite *t = child_test->test;
+       int i = child_test->test_num;
+       int subi = child_test->subtest;
+       int out = child_test->process.out;
+       int err = child_test->process.err;
+       bool out_done = out <= 0;
+       bool err_done = err <= 0;
+       struct strbuf out_output = STRBUF_INIT;
+       struct strbuf err_output = STRBUF_INIT;
+       int ret;
 
-       path__join(script, sizeof(script) - 3, st->dir, st->file);
+       /*
+        * For test suites with subtests, display the suite name ahead of the
+        * sub test names.
+        */
+       if (has_subtests(t) && subi == 0)
+               pr_info("%3d: %-*s:\n", i + 1, width, test_description(t, -1));
+
+       /*
+        * Busy loop reading from the child's stdout and stderr that are set to
+        * be non-blocking until EOF.
+        */
+       if (!out_done)
+               fcntl(out, F_SETFL, O_NONBLOCK);
+       if (!err_done)
+               fcntl(err, F_SETFL, O_NONBLOCK);
+       if (verbose > 1) {
+               if (has_subtests(t))
+                       pr_info("%3d.%1d: %s:\n", i + 1, subi + 1, test_description(t, subi));
+               else
+                       pr_info("%3d: %s:\n", i + 1, test_description(t, -1));
+       }
+       while (!out_done || !err_done) {
+               struct pollfd pfds[2] = {
+                       { .fd = out,
+                         .events = POLLIN | POLLERR | POLLHUP | POLLNVAL,
+                       },
+                       { .fd = err,
+                         .events = POLLIN | POLLERR | POLLHUP | POLLNVAL,
+                       },
+               };
+               char buf[512];
+               ssize_t len;
 
-       if (verbose > 0)
-               strncat(script, " -v", sizeof(script) - strlen(script) - 1);
+               /* Poll to avoid excessive spinning, timeout set for 1000ms. */
+               poll(pfds, ARRAY_SIZE(pfds), /*timeout=*/1000);
+               if (!out_done && pfds[0].revents) {
+                       errno = 0;
+                       len = read(out, buf, sizeof(buf) - 1);
 
-       err = system(script);
-       if (!err)
-               return TEST_OK;
+                       if (len <= 0) {
+                               out_done = errno != EAGAIN;
+                       } else {
+                               buf[len] = '\0';
+                               if (verbose > 1)
+                                       fprintf(stdout, "%s", buf);
+                               else
+                                       strbuf_addstr(&out_output, buf);
+                       }
+               }
+               if (!err_done && pfds[1].revents) {
+                       errno = 0;
+                       len = read(err, buf, sizeof(buf) - 1);
 
-       return WEXITSTATUS(err) == 2 ? TEST_SKIP : TEST_FAIL;
+                       if (len <= 0) {
+                               err_done = errno != EAGAIN;
+                       } else {
+                               buf[len] = '\0';
+                               if (verbose > 1)
+                                       fprintf(stdout, "%s", buf);
+                               else
+                                       strbuf_addstr(&err_output, buf);
+                       }
+               }
+       }
+       /* Clean up child process. */
+       ret = finish_command(&child_test->process);
+       if (verbose == 1 && ret == TEST_FAIL) {
+               /* Add header for test that was skipped above. */
+               if (has_subtests(t))
+                       pr_info("%3d.%1d: %s:\n", i + 1, subi + 1, test_description(t, subi));
+               else
+                       pr_info("%3d: %s:\n", i + 1, test_description(t, -1));
+               fprintf(stdout, "%s", out_output.buf);
+               fprintf(stderr, "%s", err_output.buf);
+       }
+       strbuf_release(&out_output);
+       strbuf_release(&err_output);
+       print_test_result(t, i, subi, ret, width);
+       if (out > 0)
+               close(out);
+       if (err > 0)
+               close(err);
+       return 0;
 }
 
-static int run_shell_tests(int argc, const char *argv[], int i, int width,
-                               struct intlist *skiplist)
+static int start_test(struct test_suite *test, int i, int subi, struct child_test **child,
+                     int width)
 {
-       struct shell_test st;
-       const struct script_file *files, *file;
+       int err;
 
-       files = list_script_files();
-       if (!files)
+       *child = NULL;
+       if (dont_fork) {
+               pr_debug("--- start ---\n");
+               err = test_function(test, subi)(test, subi);
+               pr_debug("---- end ----\n");
+               print_test_result(test, i, subi, err, width);
                return 0;
-       for (file = files; file->dir; file++) {
-               int curr = i++;
-               struct test_case test_cases[] = {
-                       {
-                               .desc = file->desc,
-                               .run_case = shell_test__run,
-                       },
-                       { .name = NULL, }
-               };
-               struct test_suite test_suite = {
-                       .desc = test_cases[0].desc,
-                       .test_cases = test_cases,
-                       .priv = &st,
-               };
-               st.dir = file->dir;
-
-               if (test_suite.desc == NULL ||
-                   !perf_test__matches(test_suite.desc, curr, argc, argv))
-                       continue;
-
-               st.file = file->file;
-               pr_info("%3d: %-*s:", i, width, test_suite.desc);
-
-               if (intlist__find(skiplist, i)) {
-                       color_fprintf(stderr, PERF_COLOR_YELLOW, " Skip (user override)\n");
-                       continue;
-               }
+       }
 
-               test_and_print(&test_suite, 0);
+       *child = zalloc(sizeof(**child));
+       if (!*child)
+               return -ENOMEM;
+
+       (*child)->test = test;
+       (*child)->test_num = i;
+       (*child)->subtest = subi;
+       (*child)->process.pid = -1;
+       (*child)->process.no_stdin = 1;
+       if (verbose <= 0) {
+               (*child)->process.no_stdout = 1;
+               (*child)->process.no_stderr = 1;
+       } else {
+               (*child)->process.out = -1;
+               (*child)->process.err = -1;
        }
-       return 0;
+       (*child)->process.no_exec_cmd = run_test_child;
+       err = start_command(&(*child)->process);
+       if (err || parallel)
+               return  err;
+       return finish_test(*child, width);
 }
 
+#define for_each_test(j, k, t)                                 \
+       for (j = 0, k = 0; j < ARRAY_SIZE(tests); j++, k = 0)   \
+               while ((t = tests[j][k++]) != NULL)
+
 static int __cmd_test(int argc, const char *argv[], struct intlist *skiplist)
 {
        struct test_suite *t;
        unsigned int j, k;
        int i = 0;
-       int width = list_script_max_width();
+       int width = 0;
+       size_t num_tests = 0;
+       struct child_test **child_tests;
+       int child_test_num = 0;
 
        for_each_test(j, k, t) {
                int len = strlen(test_description(t, -1));
 
                if (width < len)
                        width = len;
+
+               if (has_subtests(t)) {
+                       for (int subi = 0, subn = num_subtests(t); subi < subn; subi++) {
+                               len = strlen(test_description(t, subi));
+                               if (width < len)
+                                       width = len;
+                               num_tests++;
+                       }
+               } else {
+                       num_tests++;
+               }
        }
+       child_tests = calloc(num_tests, sizeof(*child_tests));
+       if (!child_tests)
+               return -ENOMEM;
 
        for_each_test(j, k, t) {
                int curr = i++;
-               int subi;
 
                if (!perf_test__matches(test_description(t, -1), curr, argc, argv)) {
                        bool skip = true;
-                       int subn;
-
-                       subn = num_subtests(t);
 
-                       for (subi = 0; subi < subn; subi++) {
+                       for (int subi = 0, subn = num_subtests(t); subi < subn; subi++) {
                                if (perf_test__matches(test_description(t, subi),
                                                        curr, argc, argv))
                                        skip = false;
@@ -398,74 +455,45 @@ static int __cmd_test(int argc, const char *argv[], struct intlist *skiplist)
                                continue;
                }
 
-               pr_info("%3d: %-*s:", i, width, test_description(t, -1));
-
                if (intlist__find(skiplist, i)) {
+                       pr_info("%3d: %-*s:", curr + 1, width, test_description(t, -1));
                        color_fprintf(stderr, PERF_COLOR_YELLOW, " Skip (user override)\n");
                        continue;
                }
 
                if (!has_subtests(t)) {
-                       test_and_print(t, -1);
-               } else {
-                       int subn = num_subtests(t);
-                       /*
-                        * minus 2 to align with normal testcases.
-                        * For subtest we print additional '.x' in number.
-                        * for example:
-                        *
-                        * 35: Test LLVM searching and compiling                        :
-                        * 35.1: Basic BPF llvm compiling test                          : Ok
-                        */
-                       int subw = width > 2 ? width - 2 : width;
-
-                       if (subn <= 0) {
-                               color_fprintf(stderr, PERF_COLOR_YELLOW,
-                                             " Skip (not compiled in)\n");
-                               continue;
-                       }
-                       pr_info("\n");
-
-                       for (subi = 0; subi < subn; subi++) {
-                               int len = strlen(test_description(t, subi));
+                       int err = start_test(t, curr, -1, &child_tests[child_test_num++], width);
 
-                               if (subw < len)
-                                       subw = len;
+                       if (err) {
+                               /* TODO: if parallel waitpid the already forked children. */
+                               free(child_tests);
+                               return err;
                        }
+               } else {
+                       for (int subi = 0, subn = num_subtests(t); subi < subn; subi++) {
+                               int err;
 
-                       for (subi = 0; subi < subn; subi++) {
                                if (!perf_test__matches(test_description(t, subi),
                                                        curr, argc, argv))
                                        continue;
 
-                               pr_info("%3d.%1d: %-*s:", i, subi + 1, subw,
-                                       test_description(t, subi));
-                               test_and_print(t, subi);
+                               err = start_test(t, curr, subi, &child_tests[child_test_num++],
+                                                width);
+                               if (err)
+                                       return err;
                        }
                }
        }
+       for (i = 0; i < child_test_num; i++) {
+               if (parallel) {
+                       int ret  = finish_test(child_tests[i], width);
 
-       return run_shell_tests(argc, argv, i, width, skiplist);
-}
-
-static int perf_test__list_shell(int argc, const char **argv, int i)
-{
-       const struct script_file *files, *file;
-
-       files = list_script_files();
-       if (!files)
-               return 0;
-       for (file = files; file->dir; file++) {
-               int curr = i++;
-               struct test_suite t = {
-                       .desc = file->desc
-               };
-
-               if (!perf_test__matches(t.desc, curr, argc, argv))
-                       continue;
-
-               pr_info("%3d: %s\n", i, t.desc);
+                       if (ret)
+                               return ret;
+               }
+               free(child_tests[i]);
        }
+       free(child_tests);
        return 0;
 }
 
@@ -492,9 +520,6 @@ static int perf_test__list(int argc, const char **argv)
                                        test_description(t, subi));
                }
        }
-
-       perf_test__list_shell(argc, argv, i);
-
        return 0;
 }
 
@@ -536,6 +561,8 @@ int cmd_test(int argc, const char **argv)
                    "be more verbose (show symbol address, etc)"),
        OPT_BOOLEAN('F', "dont-fork", &dont_fork,
                    "Do not fork for testcase"),
+       OPT_BOOLEAN('p', "parallel", &parallel,
+                   "Run the tests altogether in parallel"),
        OPT_STRING('w', "workload", &workload, "work", "workload to run for testing"),
        OPT_STRING(0, "dso", &dso_to_test, "dso", "dso to test"),
        OPT_STRING(0, "objdump", &test_objdump_path, "path",
@@ -554,6 +581,7 @@ int cmd_test(int argc, const char **argv)
        /* Unbuffered output */
        setvbuf(stdout, NULL, _IONBF, 0);
 
+       tests[2] = create_script_test_suites();
        argc = parse_options_subcommand(argc, argv, test_options, test_subcommands, test_usage, 0);
        if (argc >= 1 && !strcmp(argv[0], "list"))
                return perf_test__list(argc - 1, argv + 1);
index 9c1a1f18db7506077581cbc29e11f8529a1b9c9a..31966ff856f8c5dfe3e7da4988c3f40f6a440d6f 100644 (file)
@@ -127,8 +127,7 @@ static int expand_group_events(void)
        parse_events_error__init(&err);
        ret = parse_events(evlist, event_str, &err);
        if (ret < 0) {
-               pr_debug("failed to parse event '%s', err %d, str '%s'\n",
-                        event_str, ret, err.str);
+               pr_debug("failed to parse event '%s', err %d\n", event_str, ret);
                parse_events_error__print(&err, event_str);
                goto out;
        }
index 8a4da7eb637a8abd38f047238c6433e9929f9a2d..a1f8adf853675095eba7558f8dcf405f5a41f595 100644 (file)
@@ -83,6 +83,7 @@ make_no_libelf      := NO_LIBELF=1
 make_no_libunwind   := NO_LIBUNWIND=1
 make_no_libdw_dwarf_unwind := NO_LIBDW_DWARF_UNWIND=1
 make_no_backtrace   := NO_BACKTRACE=1
+make_no_libcapstone := NO_CAPSTONE=1
 make_no_libnuma     := NO_LIBNUMA=1
 make_no_libaudit    := NO_LIBAUDIT=1
 make_no_libbionic   := NO_LIBBIONIC=1
@@ -122,7 +123,7 @@ make_minimal        += NO_DEMANGLE=1 NO_LIBELF=1 NO_LIBUNWIND=1 NO_BACKTRACE=1
 make_minimal        += NO_LIBNUMA=1 NO_LIBAUDIT=1 NO_LIBBIONIC=1
 make_minimal        += NO_LIBDW_DWARF_UNWIND=1 NO_AUXTRACE=1 NO_LIBBPF=1
 make_minimal        += NO_LIBCRYPTO=1 NO_SDT=1 NO_JVMTI=1 NO_LIBZSTD=1
-make_minimal        += NO_LIBCAP=1 NO_SYSCALL_TABLE=1
+make_minimal        += NO_LIBCAP=1 NO_SYSCALL_TABLE=1 NO_CAPSTONE=1
 
 # $(run) contains all available tests
 run := make_pure
@@ -152,6 +153,7 @@ run += make_no_libelf
 run += make_no_libunwind
 run += make_no_libdw_dwarf_unwind
 run += make_no_backtrace
+run += make_no_libcapstone
 run += make_no_libnuma
 run += make_no_libaudit
 run += make_no_libbionic
index bb3fbfe5a73e2302155fe40a953102e15f0dd103..b15417a0d617ff5e78d328bc9b49361699bc6bb8 100644 (file)
@@ -156,6 +156,9 @@ static int test__maps__merge_in(struct test_suite *t __maybe_unused, int subtest
        TEST_ASSERT_VAL("merge check failed", !ret);
 
        maps__zput(maps);
+       map__zput(map_kcore1);
+       map__zput(map_kcore2);
+       map__zput(map_kcore3);
        return TEST_OK;
 }
 
index fbdf710d5eea06047784aef245438b87442451d9..feb5727584d141632b76a4b82be3976dc305a2bc 100644 (file)
@@ -2506,11 +2506,10 @@ static int test_event(const struct evlist_test *e)
        parse_events_error__init(&err);
        ret = parse_events(evlist, e->name, &err);
        if (ret) {
-               pr_debug("failed to parse event '%s', err %d, str '%s'\n",
-                        e->name, ret, err.str);
+               pr_debug("failed to parse event '%s', err %d\n", e->name, ret);
                parse_events_error__print(&err, e->name);
                ret = TEST_FAIL;
-               if (err.str && strstr(err.str, "can't access trace events"))
+               if (parse_events_error__contains(&err, "can't access trace events"))
                        ret = TEST_SKIP;
        } else {
                ret = e->check(evlist);
@@ -2535,8 +2534,8 @@ static int test_event_fake_pmu(const char *str)
        ret = __parse_events(evlist, str, /*pmu_filter=*/NULL, &err,
                             &perf_pmu__fake, /*warn_if_reordered=*/true);
        if (ret) {
-               pr_debug("failed to parse event '%s', err %d, str '%s'\n",
-                        str, ret, err.str);
+               pr_debug("failed to parse event '%s', err %d\n",
+                        str, ret);
                parse_events_error__print(&err, str);
        }
 
index a56d32905743a00aa49d62fde542b426ee96ab5d..47a7c32775401bcfb49f12dbdde6a2d96c9d8efc 100644 (file)
@@ -70,7 +70,7 @@ static const struct perf_pmu_test_event segment_reg_loads_any = {
        .event = {
                .pmu = "default_core",
                .name = "segment_reg_loads.any",
-               .event = "event=0x6,period=200000,umask=0x80",
+               .event = "event=6,period=200000,umask=0x80",
                .desc = "Number of segment register loads",
                .topic = "other",
        },
@@ -82,7 +82,7 @@ static const struct perf_pmu_test_event dispatch_blocked_any = {
        .event = {
                .pmu = "default_core",
                .name = "dispatch_blocked.any",
-               .event = "event=0x9,period=200000,umask=0x20",
+               .event = "event=9,period=200000,umask=0x20",
                .desc = "Memory cluster signals to block micro-op dispatch for any reason",
                .topic = "other",
        },
@@ -94,11 +94,11 @@ static const struct perf_pmu_test_event eist_trans = {
        .event = {
                .pmu = "default_core",
                .name = "eist_trans",
-               .event = "event=0x3a,period=200000,umask=0x0",
+               .event = "event=0x3a,period=200000",
                .desc = "Number of Enhanced Intel SpeedStep(R) Technology (EIST) transitions",
                .topic = "other",
        },
-       .alias_str = "event=0x3a,period=0x30d40,umask=0",
+       .alias_str = "event=0x3a,period=0x30d40",
        .alias_long_desc = "Number of Enhanced Intel SpeedStep(R) Technology (EIST) transitions",
 };
 
@@ -128,7 +128,7 @@ static const struct perf_pmu_test_event *core_events[] = {
 static const struct perf_pmu_test_event uncore_hisi_ddrc_flux_wcmd = {
        .event = {
                .name = "uncore_hisi_ddrc.flux_wcmd",
-               .event = "event=0x2",
+               .event = "event=2",
                .desc = "DDRC write commands",
                .topic = "uncore",
                .long_desc = "DDRC write commands",
@@ -156,13 +156,13 @@ static const struct perf_pmu_test_event unc_cbo_xsnp_response_miss_eviction = {
 static const struct perf_pmu_test_event uncore_hyphen = {
        .event = {
                .name = "event-hyphen",
-               .event = "event=0xe0,umask=0x00",
+               .event = "event=0xe0",
                .desc = "UNC_CBO_HYPHEN",
                .topic = "uncore",
                .long_desc = "UNC_CBO_HYPHEN",
                .pmu = "uncore_cbox",
        },
-       .alias_str = "event=0xe0,umask=0",
+       .alias_str = "event=0xe0",
        .alias_long_desc = "UNC_CBO_HYPHEN",
        .matching_pmu = "uncore_cbox_0",
 };
@@ -170,13 +170,13 @@ static const struct perf_pmu_test_event uncore_hyphen = {
 static const struct perf_pmu_test_event uncore_two_hyph = {
        .event = {
                .name = "event-two-hyph",
-               .event = "event=0xc0,umask=0x00",
+               .event = "event=0xc0",
                .desc = "UNC_CBO_TWO_HYPH",
                .topic = "uncore",
                .long_desc = "UNC_CBO_TWO_HYPH",
                .pmu = "uncore_cbox",
        },
-       .alias_str = "event=0xc0,umask=0",
+       .alias_str = "event=0xc0",
        .alias_long_desc = "UNC_CBO_TWO_HYPH",
        .matching_pmu = "uncore_cbox_0",
 };
@@ -184,7 +184,7 @@ static const struct perf_pmu_test_event uncore_two_hyph = {
 static const struct perf_pmu_test_event uncore_hisi_l3c_rd_hit_cpipe = {
        .event = {
                .name = "uncore_hisi_l3c.rd_hit_cpipe",
-               .event = "event=0x7",
+               .event = "event=7",
                .desc = "Total read hits",
                .topic = "uncore",
                .long_desc = "Total read hits",
@@ -265,7 +265,7 @@ static const struct perf_pmu_test_event sys_ccn_pmu_read_cycles = {
 static const struct perf_pmu_test_event sys_cmn_pmu_hnf_cache_miss = {
        .event = {
                .name = "sys_cmn_pmu.hnf_cache_miss",
-               .event = "eventid=0x1,type=0x5",
+               .event = "eventid=1,type=5",
                .desc = "Counts total cache misses in first lookup result (high priority)",
                .topic = "uncore",
                .pmu = "uncore_sys_cmn_pmu",
diff --git a/tools/perf/tests/shell/base_probe/settings.sh b/tools/perf/tests/shell/base_probe/settings.sh
new file mode 100644 (file)
index 0000000..123621c
--- /dev/null
@@ -0,0 +1,48 @@
+# SPDX-License-Identifier: GPL-2.0
+#
+#      settings.sh of perf_probe test
+#      Author: Michael Petlan <mpetlan@redhat.com>
+#      Author: Masami Hiramatsu <masami.hiramatsu.pt@hitachi.com>
+#
+
+export TEST_NAME="perf_probe"
+
+export MY_ARCH=`arch`
+
+if [ -n "$PERFSUITE_RUN_DIR" ]; then
+       # when $PERFSUITE_RUN_DIR is set to something, all the logs and temp files will be placed there
+       # --> the $PERFSUITE_RUN_DIR/perf_something/examples and $PERFSUITE_RUN_DIR/perf_something/logs
+       #     dirs will be used for that
+       export PERFSUITE_RUN_DIR=`readlink -f $PERFSUITE_RUN_DIR`
+       export CURRENT_TEST_DIR="$PERFSUITE_RUN_DIR/$TEST_NAME"
+       export MAKE_TARGET_DIR="$CURRENT_TEST_DIR/examples"
+       test -d "$MAKE_TARGET_DIR" || mkdir -p "$MAKE_TARGET_DIR"
+       export LOGS_DIR="$PERFSUITE_RUN_DIR/$TEST_NAME/logs"
+       test -d "$LOGS_DIR" || mkdir -p "$LOGS_DIR"
+else
+       # when $PERFSUITE_RUN_DIR is not set, logs will be placed here
+       export CURRENT_TEST_DIR="."
+       export LOGS_DIR="."
+fi
+
+check_kprobes_available()
+{
+       test -e /sys/kernel/debug/tracing/kprobe_events
+}
+
+check_uprobes_available()
+{
+       test -e /sys/kernel/debug/tracing/uprobe_events
+}
+
+clear_all_probes()
+{
+       echo 0 > /sys/kernel/debug/tracing/events/enable
+       check_kprobes_available && echo > /sys/kernel/debug/tracing/kprobe_events
+       check_uprobes_available && echo > /sys/kernel/debug/tracing/uprobe_events
+}
+
+check_sdt_support()
+{
+       $CMD_PERF list sdt | grep sdt > /dev/null 2> /dev/null
+}
diff --git a/tools/perf/tests/shell/base_probe/test_adding_kernel.sh b/tools/perf/tests/shell/base_probe/test_adding_kernel.sh
new file mode 100755 (executable)
index 0000000..a5d707e
--- /dev/null
@@ -0,0 +1,278 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0
+
+#
+#      test_adding_kernel of perf_probe test
+#      Author: Masami Hiramatsu <masami.hiramatsu.pt@hitachi.com>
+#      Author: Michael Petlan <mpetlan@redhat.com>
+#
+#      Description:
+#
+#              This test tests adding of probes, their correct listing
+#              and removing.
+#
+
+# include working environment
+. ../common/init.sh
+. ./settings.sh
+
+# shellcheck disable=SC2034 # the variable is later used after the working environment is included
+THIS_TEST_NAME=`basename $0 .sh`
+TEST_RESULT=0
+
+TEST_PROBE=${TEST_PROBE:-"inode_permission"}
+
+check_kprobes_available
+if [ $? -ne 0 ]; then
+       print_overall_skipped
+       exit 0
+fi
+
+
+### basic probe adding
+
+for opt in "" "-a" "--add"; do
+       clear_all_probes
+       $CMD_PERF probe $opt $TEST_PROBE 2> $LOGS_DIR/adding_kernel_add$opt.err
+       PERF_EXIT_CODE=$?
+
+       ../common/check_all_patterns_found.pl "Added new events?:" "probe:$TEST_PROBE" "on $TEST_PROBE" < $LOGS_DIR/adding_kernel_add$opt.err
+       CHECK_EXIT_CODE=$?
+
+       print_results $PERF_EXIT_CODE $CHECK_EXIT_CODE "adding probe $TEST_PROBE :: $opt"
+       (( TEST_RESULT += $? ))
+done
+
+
+### listing added probe :: perf list
+
+# any added probes should appear in perf-list output
+$CMD_PERF list probe:\* > $LOGS_DIR/adding_kernel_list.log
+PERF_EXIT_CODE=$?
+
+../common/check_all_lines_matched.pl "$RE_LINE_EMPTY" "List of pre-defined events" "probe:${TEST_PROBE}(?:_\d+)?\s+\[Tracepoint event\]" "Metric Groups:" < $LOGS_DIR/adding_kernel_list.log
+CHECK_EXIT_CODE=$?
+
+print_results $PERF_EXIT_CODE $CHECK_EXIT_CODE "listing added probe :: perf list"
+(( TEST_RESULT += $? ))
+
+
+### listing added probe :: perf probe -l
+
+# '-l' should list all the added probes as well
+$CMD_PERF probe -l > $LOGS_DIR/adding_kernel_list-l.log
+PERF_EXIT_CODE=$?
+
+../common/check_all_patterns_found.pl "\s*probe:${TEST_PROBE}(?:_\d+)?\s+\(on ${TEST_PROBE}(?:[:\+]$RE_NUMBER_HEX)?@.+\)" < $LOGS_DIR/adding_kernel_list-l.log
+CHECK_EXIT_CODE=$?
+
+print_results $PERF_EXIT_CODE $CHECK_EXIT_CODE "listing added probe :: perf probe -l"
+(( TEST_RESULT += $? ))
+
+
+### using added probe
+
+$CMD_PERF stat -e probe:$TEST_PROBE\* -o $LOGS_DIR/adding_kernel_using_probe.log -- cat /proc/uptime > /dev/null
+PERF_EXIT_CODE=$?
+
+REGEX_STAT_HEADER="\s*Performance counter stats for \'cat /proc/uptime\':"
+REGEX_STAT_VALUES="\s*\d+\s+probe:$TEST_PROBE"
+# the value should be greater than 1
+REGEX_STAT_VALUE_NONZERO="\s*[1-9][0-9]*\s+probe:$TEST_PROBE"
+REGEX_STAT_TIME="\s*$RE_NUMBER\s+seconds (?:time elapsed|user|sys)"
+../common/check_all_lines_matched.pl "$REGEX_STAT_HEADER" "$REGEX_STAT_VALUES" "$REGEX_STAT_TIME" "$RE_LINE_COMMENT" "$RE_LINE_EMPTY" < $LOGS_DIR/adding_kernel_using_probe.log
+CHECK_EXIT_CODE=$?
+../common/check_all_patterns_found.pl "$REGEX_STAT_HEADER" "$REGEX_STAT_VALUE_NONZERO" "$REGEX_STAT_TIME" < $LOGS_DIR/adding_kernel_using_probe.log
+(( CHECK_EXIT_CODE += $? ))
+
+print_results $PERF_EXIT_CODE $CHECK_EXIT_CODE "using added probe"
+(( TEST_RESULT += $? ))
+
+
+### removing added probe
+
+# '-d' should remove the probe
+$CMD_PERF probe -d $TEST_PROBE\* 2> $LOGS_DIR/adding_kernel_removing.err
+PERF_EXIT_CODE=$?
+
+../common/check_all_lines_matched.pl "Removed event: probe:$TEST_PROBE" < $LOGS_DIR/adding_kernel_removing.err
+CHECK_EXIT_CODE=$?
+
+print_results $PERF_EXIT_CODE $CHECK_EXIT_CODE "deleting added probe"
+(( TEST_RESULT += $? ))
+
+
+### listing removed probe
+
+# removed probes should NOT appear in perf-list output
+$CMD_PERF list probe:\* > $LOGS_DIR/adding_kernel_list_removed.log
+PERF_EXIT_CODE=$?
+
+../common/check_all_lines_matched.pl "$RE_LINE_EMPTY" "List of pre-defined events" "Metric Groups:" < $LOGS_DIR/adding_kernel_list_removed.log
+CHECK_EXIT_CODE=$?
+
+print_results $PERF_EXIT_CODE $CHECK_EXIT_CODE "listing removed probe (should NOT be listed)"
+(( TEST_RESULT += $? ))
+
+
+### dry run
+
+# the '-n' switch should run it in dry mode
+$CMD_PERF probe -n --add $TEST_PROBE 2> $LOGS_DIR/adding_kernel_dryrun.err
+PERF_EXIT_CODE=$?
+
+# check for the output (should be the same as usual)
+../common/check_all_patterns_found.pl "Added new events?:" "probe:$TEST_PROBE" "on $TEST_PROBE" < $LOGS_DIR/adding_kernel_dryrun.err
+CHECK_EXIT_CODE=$?
+
+# check that no probe was added in real
+! ( $CMD_PERF probe -l | grep "probe:$TEST_PROBE" )
+(( CHECK_EXIT_CODE += $? ))
+
+print_results $PERF_EXIT_CODE $CHECK_EXIT_CODE "dry run :: adding probe"
+(( TEST_RESULT += $? ))
+
+
+### force-adding probes
+
+# when using '--force' a probe should be added even if it is already there
+$CMD_PERF probe --add $TEST_PROBE 2> $LOGS_DIR/adding_kernel_forceadd_01.err
+PERF_EXIT_CODE=$?
+
+../common/check_all_patterns_found.pl "Added new events?:" "probe:$TEST_PROBE" "on $TEST_PROBE" < $LOGS_DIR/adding_kernel_forceadd_01.err
+CHECK_EXIT_CODE=$?
+
+print_results $PERF_EXIT_CODE $CHECK_EXIT_CODE "force-adding probes :: first probe adding"
+(( TEST_RESULT += $? ))
+
+# adding existing probe without '--force' should fail
+! $CMD_PERF probe --add $TEST_PROBE 2> $LOGS_DIR/adding_kernel_forceadd_02.err
+PERF_EXIT_CODE=$?
+
+../common/check_all_patterns_found.pl "Error: event \"$TEST_PROBE\" already exists." "Error: Failed to add events." < $LOGS_DIR/adding_kernel_forceadd_02.err
+CHECK_EXIT_CODE=$?
+
+print_results $PERF_EXIT_CODE $CHECK_EXIT_CODE "force-adding probes :: second probe adding (without force)"
+(( TEST_RESULT += $? ))
+
+# adding existing probe with '--force' should pass
+NO_OF_PROBES=`$CMD_PERF probe -l | wc -l`
+$CMD_PERF probe --force --add $TEST_PROBE 2> $LOGS_DIR/adding_kernel_forceadd_03.err
+PERF_EXIT_CODE=$?
+
+../common/check_all_patterns_found.pl "Added new events?:" "probe:${TEST_PROBE}_${NO_OF_PROBES}" "on $TEST_PROBE" < $LOGS_DIR/adding_kernel_forceadd_03.err
+CHECK_EXIT_CODE=$?
+
+print_results $PERF_EXIT_CODE $CHECK_EXIT_CODE "force-adding probes :: second probe adding (with force)"
+(( TEST_RESULT += $? ))
+
+
+### using doubled probe
+
+# since they are the same, they should produce the same results
+$CMD_PERF stat -e probe:$TEST_PROBE -e probe:${TEST_PROBE}_${NO_OF_PROBES} -x';' -o $LOGS_DIR/adding_kernel_using_two.log -- bash -c 'cat /proc/cpuinfo > /dev/null'
+PERF_EXIT_CODE=$?
+
+REGEX_LINE="$RE_NUMBER;+probe:${TEST_PROBE}_?(?:$NO_OF_PROBES)?;$RE_NUMBER;$RE_NUMBER"
+../common/check_all_lines_matched.pl "$REGEX_LINE" "$RE_LINE_EMPTY" "$RE_LINE_COMMENT" < $LOGS_DIR/adding_kernel_using_two.log
+CHECK_EXIT_CODE=$?
+
+VALUE_1=`grep "$TEST_PROBE;" $LOGS_DIR/adding_kernel_using_two.log | awk -F';' '{print $1}'`
+VALUE_2=`grep "${TEST_PROBE}_${NO_OF_PROBES};" $LOGS_DIR/adding_kernel_using_two.log | awk -F';' '{print $1}'`
+
+test $VALUE_1 -eq $VALUE_2
+(( CHECK_EXIT_CODE += $? ))
+
+print_results $PERF_EXIT_CODE $CHECK_EXIT_CODE "using doubled probe"
+
+
+### removing multiple probes
+
+# using wildcards should remove all matching probes
+$CMD_PERF probe --del \* 2> $LOGS_DIR/adding_kernel_removing_wildcard.err
+PERF_EXIT_CODE=$?
+
+../common/check_all_lines_matched.pl "Removed event: probe:$TEST_PROBE" "Removed event: probe:${TEST_PROBE}_1" < $LOGS_DIR/adding_kernel_removing_wildcard.err
+CHECK_EXIT_CODE=$?
+
+print_results $PERF_EXIT_CODE $CHECK_EXIT_CODE "removing multiple probes"
+(( TEST_RESULT += $? ))
+
+
+### wildcard adding support
+
+$CMD_PERF probe -nf --max-probes=512 -a 'vfs_* $params' 2> $LOGS_DIR/adding_kernel_adding_wildcard.err
+PERF_EXIT_CODE=$?
+
+../common/check_all_patterns_found.pl "probe:vfs_mknod" "probe:vfs_create" "probe:vfs_rmdir" "probe:vfs_link" "probe:vfs_write" < $LOGS_DIR/adding_kernel_adding_wildcard.err
+CHECK_EXIT_CODE=$?
+
+print_results $PERF_EXIT_CODE $CHECK_EXIT_CODE "wildcard adding support"
+(( TEST_RESULT += $? ))
+
+
+### non-existing variable
+
+# perf probe should survive a non-existing variable probing attempt
+{ $CMD_PERF probe 'vfs_read somenonexistingrandomstuffwhichisalsoprettylongorevenlongertoexceed64' ; } 2> $LOGS_DIR/adding_kernel_nonexisting.err
+PERF_EXIT_CODE=$?
+
+# the exitcode should not be 0 or segfault
+test $PERF_EXIT_CODE -ne 139 -a $PERF_EXIT_CODE -ne 0
+PERF_EXIT_CODE=$?
+
+# check that the error message is reasonable
+../common/check_all_patterns_found.pl "Failed to find" "somenonexistingrandomstuffwhichisalsoprettylongorevenlongertoexceed64" < $LOGS_DIR/adding_kernel_nonexisting.err
+CHECK_EXIT_CODE=$?
+../common/check_all_patterns_found.pl "in this function|at this address" "Error" "Failed to add events" < $LOGS_DIR/adding_kernel_nonexisting.err
+(( CHECK_EXIT_CODE += $? ))
+../common/check_all_lines_matched.pl "Failed to find" "Error" "Probe point .+ not found" "optimized out" "Use.+\-\-range option to show.+location range" < $LOGS_DIR/adding_kernel_nonexisting.err
+(( CHECK_EXIT_CODE += $? ))
+../common/check_no_patterns_found.pl "$RE_SEGFAULT" < $LOGS_DIR/adding_kernel_nonexisting.err
+(( CHECK_EXIT_CODE += $? ))
+
+print_results $PERF_EXIT_CODE $CHECK_EXIT_CODE "non-existing variable"
+(( TEST_RESULT += $? ))
+
+
+### function with return value
+
+# adding probe with return value
+$CMD_PERF probe --add "$TEST_PROBE%return \$retval" 2> $LOGS_DIR/adding_kernel_func_retval_add.err
+PERF_EXIT_CODE=$?
+
+../common/check_all_patterns_found.pl "Added new events?:" "probe:$TEST_PROBE" "on $TEST_PROBE%return with \\\$retval" < $LOGS_DIR/adding_kernel_func_retval_add.err
+CHECK_EXIT_CODE=$?
+
+print_results $PERF_EXIT_CODE $CHECK_EXIT_CODE "function with retval :: add"
+(( TEST_RESULT += $? ))
+
+# recording some data
+$CMD_PERF record -e probe:$TEST_PROBE\* -o $CURRENT_TEST_DIR/perf.data -- cat /proc/cpuinfo > /dev/null 2> $LOGS_DIR/adding_kernel_func_retval_record.err
+PERF_EXIT_CODE=$?
+
+../common/check_all_patterns_found.pl "$RE_LINE_RECORD1" "$RE_LINE_RECORD2" < $LOGS_DIR/adding_kernel_func_retval_record.err
+CHECK_EXIT_CODE=$?
+
+print_results $PERF_EXIT_CODE $CHECK_EXIT_CODE "function with retval :: record"
+(( TEST_RESULT += $? ))
+
+# perf script should report the function calls with the correct arg values
+$CMD_PERF script -i $CURRENT_TEST_DIR/perf.data > $LOGS_DIR/adding_kernel_func_retval_script.log
+PERF_EXIT_CODE=$?
+
+REGEX_SCRIPT_LINE="\s*cat\s+$RE_NUMBER\s+\[$RE_NUMBER\]\s+$RE_NUMBER:\s+probe:$TEST_PROBE\w*:\s+\($RE_NUMBER_HEX\s+<\-\s+$RE_NUMBER_HEX\)\s+arg1=$RE_NUMBER_HEX"
+../common/check_all_lines_matched.pl "$REGEX_SCRIPT_LINE" < $LOGS_DIR/adding_kernel_func_retval_script.log
+CHECK_EXIT_CODE=$?
+../common/check_all_patterns_found.pl "$REGEX_SCRIPT_LINE" < $LOGS_DIR/adding_kernel_func_retval_script.log
+(( CHECK_EXIT_CODE += $? ))
+
+print_results $PERF_EXIT_CODE $CHECK_EXIT_CODE "function argument probing :: script"
+(( TEST_RESULT += $? ))
+
+
+clear_all_probes
+
+# print overall results
+print_overall_results "$TEST_RESULT"
+exit $?
diff --git a/tools/perf/tests/shell/common/check_all_lines_matched.pl b/tools/perf/tests/shell/common/check_all_lines_matched.pl
new file mode 100755 (executable)
index 0000000..fded489
--- /dev/null
@@ -0,0 +1,39 @@
+#!/usr/bin/perl
+# SPDX-License-Identifier: GPL-2.0
+
+@regexps = @ARGV;
+
+$max_printed_lines = 20;
+$max_printed_lines = $ENV{TESTLOG_ERR_MSG_MAX_LINES} if (defined $ENV{TESTLOG_ERR_MSG_MAX_LINES});
+
+$quiet = 1;
+$quiet = 0 if (defined $ENV{TESTLOG_VERBOSITY} && $ENV{TESTLOG_VERBOSITY} ge 2);
+
+$passed = 1;
+$lines_printed = 0;
+
+while (<STDIN>)
+{
+       s/\n//;
+
+       $line_matched = 0;
+       for $r (@regexps)
+       {
+               if (/$r/)
+               {
+                       $line_matched = 1;
+                       last;
+               }
+       }
+
+       unless ($line_matched)
+       {
+               if ($lines_printed++ < $max_printed_lines)
+               {
+                       print "Line did not match any pattern: \"$_\"\n" unless $quiet;
+               }
+               $passed = 0;
+       }
+}
+
+exit ($passed == 0);
diff --git a/tools/perf/tests/shell/common/check_all_patterns_found.pl b/tools/perf/tests/shell/common/check_all_patterns_found.pl
new file mode 100755 (executable)
index 0000000..11bdf1d
--- /dev/null
@@ -0,0 +1,34 @@
+#!/usr/bin/perl
+# SPDX-License-Identifier: GPL-2.0
+
+@regexps = @ARGV;
+
+$quiet = 1;
+$quiet = 0 if (defined $ENV{TESTLOG_VERBOSITY} && $ENV{TESTLOG_VERBOSITY} ge 2);
+
+%found = ();
+$passed = 1;
+
+while (<STDIN>)
+{
+       s/\n//;
+
+       for $r (@regexps)
+       {
+               if (/$r/)
+               {
+                       $found{$r} = 1; # FIXME: maybe add counters -- how many times was the regexp matched
+               }
+       }
+}
+
+for $r (@regexps)
+{
+       unless (exists $found{$r})
+       {
+               print "Regexp not found: \"$r\"\n" unless $quiet;
+               $passed = 0;
+       }
+}
+
+exit ($passed == 0);
diff --git a/tools/perf/tests/shell/common/check_no_patterns_found.pl b/tools/perf/tests/shell/common/check_no_patterns_found.pl
new file mode 100755 (executable)
index 0000000..770999e
--- /dev/null
@@ -0,0 +1,34 @@
+#!/usr/bin/perl
+# SPDX-License-Identifier: GPL-2.0
+
+@regexps = @ARGV;
+
+$quiet = 1;
+$quiet = 0 if (defined $ENV{TESTLOG_VERBOSITY} && $ENV{TESTLOG_VERBOSITY} ge 2);
+
+%found = ();
+$passed = 1;
+
+while (<STDIN>)
+{
+       s/\n//;
+
+       for $r (@regexps)
+       {
+               if (/$r/)
+               {
+                       $found{$r} = 1;
+               }
+       }
+}
+
+for $r (@regexps)
+{
+       if (exists $found{$r})
+       {
+               print "Regexp found: \"$r\"\n" unless $quiet;
+               $passed = 0;
+       }
+}
+
+exit ($passed == 0);
diff --git a/tools/perf/tests/shell/common/init.sh b/tools/perf/tests/shell/common/init.sh
new file mode 100644 (file)
index 0000000..aadeaf7
--- /dev/null
@@ -0,0 +1,117 @@
+# SPDX-License-Identifier: GPL-2.0
+#
+#      init.sh
+#      Author: Michael Petlan <mpetlan@redhat.com>
+#
+#      Description:
+#
+#              This file should be used for initialization of basic functions
+#      for checking, reporting results etc.
+#
+#
+
+
+. ../common/settings.sh
+. ../common/patterns.sh
+
+THIS_TEST_NAME=`basename $0 .sh`
+
+_echo()
+{
+       test "$TESTLOG_VERBOSITY" -ne 0 && echo -e "$@"
+}
+
+print_results()
+{
+       PERF_RETVAL="$1"; shift
+       CHECK_RETVAL="$1"; shift
+       FAILURE_REASON=""
+       TASK_COMMENT="$@"
+       if [ $PERF_RETVAL -eq 0 -a $CHECK_RETVAL -eq 0 ]; then
+               _echo "$MPASS-- [ PASS ] --$MEND $TEST_NAME :: $THIS_TEST_NAME :: $TASK_COMMENT"
+               return 0
+       else
+               if [ $PERF_RETVAL -ne 0 ]; then
+                       FAILURE_REASON="command exitcode"
+               fi
+               if [ $CHECK_RETVAL -ne 0 ]; then
+                       test -n "$FAILURE_REASON" && FAILURE_REASON="$FAILURE_REASON + "
+                       FAILURE_REASON="$FAILURE_REASON""output regexp parsing"
+               fi
+               _echo "$MFAIL-- [ FAIL ] --$MEND $TEST_NAME :: $THIS_TEST_NAME :: $TASK_COMMENT ($FAILURE_REASON)"
+               return 1
+       fi
+}
+
+print_overall_results()
+{
+       RETVAL="$1"; shift
+       if [ $RETVAL -eq 0 ]; then
+               _echo "$MALLPASS## [ PASS ] ##$MEND $TEST_NAME :: $THIS_TEST_NAME SUMMARY"
+       else
+               _echo "$MALLFAIL## [ FAIL ] ##$MEND $TEST_NAME :: $THIS_TEST_NAME SUMMARY :: $RETVAL failures found"
+       fi
+       return $RETVAL
+}
+
+print_testcase_skipped()
+{
+       TASK_COMMENT="$@"
+       _echo "$MSKIP-- [ SKIP ] --$MEND $TEST_NAME :: $THIS_TEST_NAME :: $TASK_COMMENT :: testcase skipped"
+       return 0
+}
+
+print_overall_skipped()
+{
+       _echo "$MSKIP## [ SKIP ] ##$MEND $TEST_NAME :: $THIS_TEST_NAME :: testcase skipped"
+       return 0
+}
+
+print_warning()
+{
+       WARN_COMMENT="$@"
+       _echo "$MWARN-- [ WARN ] --$MEND $TEST_NAME :: $THIS_TEST_NAME :: $WARN_COMMENT"
+       return 0
+}
+
+# this function should skip a testcase if the testsuite is not run in
+# a runmode that fits the testcase --> if the suite runs in BASIC mode
+# all STANDARD and EXPERIMENTAL testcases will be skipped; if the suite
+# runs in STANDARD mode, all EXPERIMENTAL testcases will be skipped and
+# if the suite runs in EXPERIMENTAL mode, nothing is skipped
+consider_skipping()
+{
+       TESTCASE_RUNMODE="$1"
+       # the runmode of a testcase needs to be at least the current suite's runmode
+       if [ $PERFTOOL_TESTSUITE_RUNMODE -lt $TESTCASE_RUNMODE ]; then
+               print_overall_skipped
+               exit 0
+       fi
+}
+
+detect_baremetal()
+{
+       # return values:
+       # 0 = bare metal
+       # 1 = virtualization detected
+       # 2 = unknown state
+       VIRT=`systemd-detect-virt 2>/dev/null`
+       test $? -eq 127 && return 2
+       test "$VIRT" = "none"
+}
+
+detect_intel()
+{
+       # return values:
+       # 0 = is Intel
+       # 1 = is not Intel or unknown
+       grep "vendor_id" < /proc/cpuinfo | grep -q "GenuineIntel"
+}
+
+detect_amd()
+{
+       # return values:
+       # 0 = is AMD
+       # 1 = is not AMD or unknown
+       grep "vendor_id" < /proc/cpuinfo | grep -q "AMD"
+}
diff --git a/tools/perf/tests/shell/common/patterns.sh b/tools/perf/tests/shell/common/patterns.sh
new file mode 100644 (file)
index 0000000..21dab25
--- /dev/null
@@ -0,0 +1,268 @@
+# SPDX-License-Identifier: GPL-2.0
+
+export RE_NUMBER="[0-9\.]+"
+# Number
+# Examples:
+#    123.456
+
+
+export RE_NUMBER_HEX="[0-9A-Fa-f]+"
+# Hexadecimal number
+# Examples:
+#    1234
+#    a58d
+#    aBcD
+#    deadbeef
+
+
+export RE_DATE_YYYYMMDD="[0-9]{4}-(?:(?:01|03|05|07|08|10|12)-(?:[0-2][0-9]|3[0-1])|02-[0-2][0-9]|(?:(?:04|06|09|11)-(?:[0-2][0-9]|30)))"
+# Date in YYYY-MM-DD form
+# Examples:
+#    1990-02-29
+#    0015-07-31
+#    2456-12-31
+#!   2012-13-01
+#!   1963-09-31
+
+
+export RE_TIME="(?:[0-1][0-9]|2[0-3]):[0-5][0-9]:[0-5][0-9]"
+# Time
+# Examples:
+#    15:12:27
+#    23:59:59
+#!   24:00:00
+#!   11:25:60
+#!   17:60:15
+
+
+export RE_DATE_TIME="\w+\s+\w+\s+$RE_NUMBER\s+$RE_TIME\s+$RE_NUMBER"
+# Time and date
+# Examples:
+#    Wed Feb 12 10:46:26 2020
+#    Mon Mar  2 13:27:06 2020
+#!   St úno 12 10:57:21 CET 2020
+#!   Po úno 14 15:17:32 2010
+
+
+export RE_ADDRESS="0x$RE_NUMBER_HEX"
+# Memory address
+# Examples:
+#    0x123abc
+#    0xffffffff9abe8ae8
+#    0x0
+
+
+export RE_ADDRESS_NOT_NULL="0x[0-9A-Fa-f]*[1-9A-Fa-f]+[0-9A-Fa-f]*"
+# Memory address (not NULL)
+# Examples:
+#    0xffffffff9abe8ae8
+#!   0x0
+#!   0x0000000000000000
+
+export RE_PROCESS_PID="[^\/]+\/\d+"
+# A process with PID
+# Example:
+#    sleep/4102
+#    test_overhead./866185
+#    in:imjournal/1096
+#    random#$& test/866607
+
+export RE_EVENT_ANY="[\w\-\:\/_=,]+"
+# Name of any event (universal)
+# Examples:
+#    cpu-cycles
+#    cpu/event=12,umask=34/
+#    r41e1
+#    nfs:nfs_getattr_enter
+
+
+export RE_EVENT="[\w\-:_]+"
+# Name of an usual event
+# Examples:
+#    cpu-cycles
+
+
+export RE_EVENT_RAW="r$RE_NUMBER_HEX"
+# Specification of a raw event
+# Examples:
+#    r41e1
+#    r1a
+
+
+export RE_EVENT_CPU="cpu/(\w+=$RE_NUMBER_HEX,?)+/p*"
+# Specification of a CPU event
+# Examples:
+#    cpu/event=12,umask=34/pp
+
+
+export RE_EVENT_UNCORE="uncore/[\w_]+/"
+# Specification of an uncore event
+# Examples:
+#    uncore/qhl_request_local_reads/
+
+
+export RE_EVENT_SUBSYSTEM="[\w\-]+:[\w\-]+"
+# Name of an event from subsystem
+# Examples:
+#    ext4:ext4_ordered_write_end
+#    sched:sched_switch
+
+
+export RE_FILE_NAME="[\w\+\.-]+"
+# A filename
+# Examples:
+#    libstdc++.so.6
+#!   some/path
+
+
+export RE_PATH_ABSOLUTE="(?:\/$RE_FILE_NAME)+"
+# A full filepath
+# Examples:
+#    /usr/lib64/somelib.so.5.4.0
+#    /lib/modules/4.3.0-rc5/kernel/fs/xfs/xfs.ko
+#    /usr/bin/mv
+#!   some/relative/path
+#!   ./some/relative/path
+
+
+export RE_PATH="(?:$RE_FILE_NAME)?$RE_PATH_ABSOLUTE"
+# A filepath
+# Examples:
+#    /usr/lib64/somelib.so.5.4.0
+#    /lib/modules/4.3.0-rc5/kernel/fs/xfs/xfs.ko
+#    ./.emacs
+#    src/fs/file.c
+
+
+export RE_DSO="(?:$RE_PATH_ABSOLUTE(?: \(deleted\))?|\[kernel\.kallsyms\]|\[unknown\]|\[vdso\]|\[kernel\.vmlinux\][\.\w]*)"
+# A DSO name in various result tables
+# Examples:
+#    /usr/lib64/somelib.so.5.4.0
+#    /usr/bin/somebinart (deleted)
+#    /lib/modules/4.3.0-rc5/kernel/fs/xfs/xfs.ko
+#    [kernel.kallsyms]
+#    [kernel.vmlinux]
+#    [vdso]
+#    [unknown]
+
+
+export RE_LINE_COMMENT="^#.*"
+# A comment line
+# Examples:
+#    # Started on Thu Sep 10 11:43:00 2015
+
+
+export RE_LINE_EMPTY="^\s*$"
+# An empty line with possible whitespaces
+# Examples:
+#
+
+
+export RE_LINE_RECORD1="^\[\s+perf\s+record:\s+Woken up $RE_NUMBER times? to write data\s+\].*$"
+# The first line of perf-record "OK" output
+# Examples:
+#    [ perf record: Woken up 1 times to write data ]
+
+
+export RE_LINE_RECORD2="^\[\s+perf\s+record:\s+Captured and wrote $RE_NUMBER\s*MB\s+(?:[\w\+\.-]*(?:$RE_PATH)?\/)?perf\.data(?:\.\d+)?\s*\(~?$RE_NUMBER samples\)\s+\].*$"
+# The second line of perf-record "OK" output
+# Examples:
+#    [ perf record: Captured and wrote 0.405 MB perf.data (109 samples) ]
+#    [ perf record: Captured and wrote 0.405 MB perf.data (~109 samples) ]
+#    [ perf record: Captured and wrote 0.405 MB /some/temp/dir/perf.data (109 samples) ]
+#    [ perf record: Captured and wrote 0.405 MB ./perf.data (109 samples) ]
+#    [ perf record: Captured and wrote 0.405 MB ./perf.data.3 (109 samples) ]
+
+
+export RE_LINE_RECORD2_TOLERANT="^\[\s+perf\s+record:\s+Captured and wrote $RE_NUMBER\s*MB\s+(?:[\w\+\.-]*(?:$RE_PATH)?\/)?perf\.data(?:\.\d+)?\s*(?:\(~?$RE_NUMBER samples\))?\s+\].*$"
+# The second line of perf-record "OK" output, even no samples is OK here
+# Examples:
+#    [ perf record: Captured and wrote 0.405 MB perf.data (109 samples) ]
+#    [ perf record: Captured and wrote 0.405 MB perf.data (~109 samples) ]
+#    [ perf record: Captured and wrote 0.405 MB /some/temp/dir/perf.data (109 samples) ]
+#    [ perf record: Captured and wrote 0.405 MB ./perf.data (109 samples) ]
+#    [ perf record: Captured and wrote 0.405 MB ./perf.data.3 (109 samples) ]
+#    [ perf record: Captured and wrote 0.405 MB perf.data ]
+
+
+export RE_LINE_RECORD2_TOLERANT_FILENAME="^\[\s+perf\s+record:\s+Captured and wrote $RE_NUMBER\s*MB\s+(?:[\w\+\.-]*(?:$RE_PATH)?\/)?perf\w*\.data(?:\.\d+)?\s*\(~?$RE_NUMBER samples\)\s+\].*$"
+# The second line of perf-record "OK" output
+# Examples:
+#    [ perf record: Captured and wrote 0.405 MB perf.data (109 samples) ]
+#    [ perf record: Captured and wrote 0.405 MB perf_ls.data (~109 samples) ]
+#    [ perf record: Captured and wrote 0.405 MB perf_aNyCaSe.data (109 samples) ]
+#    [ perf record: Captured and wrote 0.405 MB ./perfdata.data.3 (109 samples) ]
+#!    [ perf record: Captured and wrote 0.405 MB /some/temp/dir/my_own.data (109 samples) ]
+#!    [ perf record: Captured and wrote 0.405 MB ./UPPERCASE.data (109 samples) ]
+#!    [ perf record: Captured and wrote 0.405 MB ./aNyKiNDoF.data.3 (109 samples) ]
+#!    [ perf record: Captured and wrote 0.405 MB perf.data ]
+
+
+export RE_LINE_TRACE_FULL="^\s*$RE_NUMBER\s*\(\s*$RE_NUMBER\s*ms\s*\):\s*$RE_PROCESS_PID\s+.*\)\s+=\s+(:?\-?$RE_NUMBER|0x$RE_NUMBER_HEX).*$"
+# A line of perf-trace output
+# Examples:
+#    0.115 ( 0.005 ms): sleep/4102 open(filename: 0xd09e2ab2, flags: CLOEXEC                             ) = 3
+#    0.157 ( 0.005 ms): sleep/4102 mmap(len: 3932736, prot: EXEC|READ, flags: PRIVATE|DENYWRITE, fd: 3   ) = 0x7f89d0605000
+#!    0.115 ( 0.005 ms): sleep/4102 open(filename: 0xd09e2ab2, flags: CLOEXEC                             ) =
+
+export RE_LINE_TRACE_ONE_PROC="^\s*$RE_NUMBER\s*\(\s*$RE_NUMBER\s*ms\s*\):\s*\w+\(.*\)\s+=\s+(?:\-?$RE_NUMBER|0x$RE_NUMBER_HEX).*$"
+# A line of perf-trace output
+# Examples:
+#    0.115 ( 0.005 ms): open(filename: 0xd09e2ab2, flags: CLOEXEC                             ) = 3
+#    0.157 ( 0.005 ms): mmap(len: 3932736, prot: EXEC|READ, flags: PRIVATE|DENYWRITE, fd: 3   ) = 0x7f89d0605000
+#!    0.115 ( 0.005 ms): open(filename: 0xd09e2ab2, flags: CLOEXEC                             ) =
+
+export RE_LINE_TRACE_CONTINUED="^\s*(:?$RE_NUMBER|\?)\s*\(\s*($RE_NUMBER\s*ms\s*)?\):\s*($RE_PROCESS_PID\s*)?\.\.\.\s*\[continued\]:\s+\w+\(\).*\s+=\s+(?:\-?$RE_NUMBER|0x$RE_NUMBER_HEX).*$"
+# A line of perf-trace output
+# Examples:
+#    0.000 ( 0.000 ms):  ... [continued]: nanosleep()) = 0
+#    0.000 ( 0.000 ms):  ... [continued]: nanosleep()) = 0x00000000
+#    ? (         ): packagekitd/94838  ... [continued]: poll())                                             = 0 (Timeout)
+#!    0.000 ( 0.000 ms):  ... [continued]: nanosleep()) =
+
+export RE_LINE_TRACE_UNFINISHED="^\s*$RE_NUMBER\s*\(\s*\):\s*$RE_PROCESS_PID\s+.*\)\s+\.\.\.\s*$"
+# A line of perf-trace output
+# Examples:
+#    901.040 (         ): in:imjournal/1096 ppoll(ufds: 0x7f701a5adb70, nfds: 1, tsp: 0x7f701a5adaf0, sigsetsize: 8) ...
+#    613.727 (         ): gmain/1099 poll(ufds: 0x56248f6b64b0, nfds: 2, timeout_msecs: 3996)           ...
+
+export RE_LINE_TRACE_SUMMARY_HEADER="\s*syscall\s+calls\s+(?:errors\s+)?total\s+min\s+avg\s+max\s+stddev"
+# A header of a perf-trace summary table
+# Example:
+#    syscall            calls    total       min       avg       max      stddev
+#    syscall            calls  errors  total       min       avg       max       stddev
+
+
+export RE_LINE_TRACE_SUMMARY_CONTENT="^\s*\w+\s+(?:$RE_NUMBER\s+){5,6}$RE_NUMBER%"
+# A line of a perf-trace summary table
+# Example:
+#    open                   3     0.017     0.005     0.006     0.007     10.90%
+#    openat                 2      0     0.017     0.008     0.009     0.010     12.29%
+
+
+export RE_LINE_REPORT_CONTENT="^\s+$RE_NUMBER%\s+\w+\s+\S+\s+\S+\s+\S+" # FIXME
+# A line from typicap perf report --stdio output
+# Example:
+#     100.00%  sleep    [kernel.vmlinux]  [k] syscall_return_slowpath
+
+
+export RE_TASK="\s+[\w~\/ \.\+:#-]+(?:\[-1(?:\/\d+)?\]|\[\d+(?:\/\d+)?\])"
+# A name of a task used for perf sched timehist -s
+# Example:
+#     sleep[62755]
+#     runtest.sh[62762]
+#     gmain[705/682]
+#     xfsaild/dm-0[495]
+#     kworker/u8:1-ev[62714]
+#     :-1[-1/62756]
+#     :-1[-1]
+#     :-1[62756]
+
+
+export RE_SEGFAULT=".*(?:Segmentation\sfault|SIGSEGV|\score\s|dumped|segfault).*"
+# Possible variations of the segfault message
+# Example:
+#     /bin/bash: line 1:    32 Segmentation fault      timeout 15s
+#     Segmentation fault (core dumped)
+#     Program terminated with signal SIGSEGV
+#!     WARNING: 12323431 isn't a 'cpu_core', please use a CPU list in the 'cpu_core' range (0-15)
diff --git a/tools/perf/tests/shell/common/settings.sh b/tools/perf/tests/shell/common/settings.sh
new file mode 100644 (file)
index 0000000..361641d
--- /dev/null
@@ -0,0 +1,79 @@
+# SPDX-License-Identifier: GPL-2.0
+#
+#      settings.sh
+#      Author: Michael Petlan <mpetlan@redhat.com>
+#
+#      Description:
+#
+#              This file contains global settings for the whole testsuite.
+#      Its purpose is to make it easier when it is necessary i.e. to
+#      change the usual sample command which is used in all of the tests
+#      in many files.
+#
+#              This file is intended to be sourced in the tests.
+#
+
+#### which perf to use in the testing
+export CMD_PERF=${CMD_PERF:-`which perf`}
+
+#### basic programs examinated by perf
+export CMD_BASIC_SLEEP="sleep 0.1"
+export CMD_QUICK_SLEEP="sleep 0.01"
+export CMD_LONGER_SLEEP="sleep 2"
+export CMD_DOUBLE_LONGER_SLEEP="sleep 4"
+export CMD_VERY_LONG_SLEEP="sleep 30"
+export CMD_SIMPLE="true"
+
+#### testsuite run mode
+# define constants:
+export RUNMODE_BASIC=0
+export RUNMODE_STANDARD=1
+export RUNMODE_EXPERIMENTAL=2
+# default runmode is STANDARD
+export PERFTOOL_TESTSUITE_RUNMODE=${PERFTOOL_TESTSUITE_RUNMODE:-$RUNMODE_STANDARD}
+
+#### common settings
+export TESTLOG_VERBOSITY=${TESTLOG_VERBOSITY:-2}
+export TESTLOG_FORCE_COLOR=${TESTLOG_FORCE_COLOR:-n}
+export TESTLOG_ERR_MSG_MAX_LINES=${TESTLOG_ERR_MSG_MAX_LINES:-20}
+export TESTLOG_CLEAN=${TESTLOG_CLEAN:-y}
+
+#### other environment-related settings
+export TEST_IGNORE_MISSING_PMU=${TEST_IGNORE_MISSING_PMU:-n}
+
+#### clear locale
+export LC_ALL=C
+
+#### colors
+if [ -t 1 -o "$TESTLOG_FORCE_COLOR" = "yes" ]; then
+       export MPASS="\e[32m"
+       export MALLPASS="\e[1;32m"
+       export MFAIL="\e[31m"
+       export MALLFAIL="\e[1;31m"
+       export MWARN="\e[1;35m"
+       export MSKIP="\e[33m"
+       export MHIGH="\e[1;33m"
+       export MEND="\e[m"
+else
+       export MPASS=""
+       export MALLPASS=""
+       export MFAIL=""
+       export MALLFAIL=""
+       export MWARN=""
+       export MSKIP=""
+       export MHIGH=""
+       export MEND=""
+fi
+
+
+#### test parametrization
+if [ ! -d ./common ]; then
+       # set parameters based on runmode
+       if [ -f ../common/parametrization.$PERFTOOL_TESTSUITE_RUNMODE.sh ]; then
+               . ../common/parametrization.$PERFTOOL_TESTSUITE_RUNMODE.sh
+       fi
+       # if some parameters haven't been set until now, set them to default
+       if [ -f ../common/parametrization.sh ]; then
+               . ../common/parametrization.sh
+       fi
+fi
index 5d59c32ae3e7ba63350e431c5d0bbfa29734853c..561c93b75d77a57a28531a60d91d833cd689f24a 100644 (file)
@@ -3,7 +3,7 @@
 
 perf_has_symbol()
 {
-       if perf test -vv "Symbols" 2>&1 | grep "[[:space:]]$1$"; then
+       if perf test -vv -F "Symbols" 2>&1 | grep "[[:space:]]$1$"; then
                echo "perf does have symbol '$1'"
                return 0
        fi
index ea55d5ea1ced45d9734c2a78884efdd0518ca581..abc1fd737782291304aef0c04ef7c161b5adbba4 100644 (file)
@@ -15,6 +15,7 @@ ap.add_argument('--event', action='store_true')
 ap.add_argument('--per-core', action='store_true')
 ap.add_argument('--per-thread', action='store_true')
 ap.add_argument('--per-cache', action='store_true')
+ap.add_argument('--per-cluster', action='store_true')
 ap.add_argument('--per-die', action='store_true')
 ap.add_argument('--per-node', action='store_true')
 ap.add_argument('--per-socket', action='store_true')
@@ -49,6 +50,7 @@ def check_json_output(expected_items):
       'cgroup': lambda x: True,
       'cpu': lambda x: isint(x),
       'cache': lambda x: True,
+      'cluster': lambda x: True,
       'die': lambda x: True,
       'event': lambda x: True,
       'event-runtime': lambda x: isfloat(x),
@@ -88,7 +90,7 @@ try:
     expected_items = 7
   elif args.interval or args.per_thread or args.system_wide_no_aggr:
     expected_items = 8
-  elif args.per_core or args.per_socket or args.per_node or args.per_die or args.per_cache:
+  elif args.per_core or args.per_socket or args.per_node or args.per_die or args.per_cluster or args.per_cache:
     expected_items = 9
   else:
     # If no option is specified, don't check the number of items.
index 50a34a9cc04004a9aebba708ab1bb9437b315cbe..a2d235252183b5516c78603bf00e14733585605a 100644 (file)
@@ -1,4 +1,4 @@
-#SPDX-License-Identifier: GPL-2.0
+# SPDX-License-Identifier: GPL-2.0
 import re
 import csv
 import json
@@ -6,36 +6,61 @@ import argparse
 from pathlib import Path
 import subprocess
 
+
+class TestError:
+    def __init__(self, metric: list[str], wl: str, value: list[float], low: float, up=float('nan'), description=str()):
+        self.metric: list = metric  # multiple metrics in relationship type tests
+        self.workloads = [wl]  # multiple workloads possible
+        self.collectedValue: list = value
+        self.valueLowBound = low
+        self.valueUpBound = up
+        self.description = description
+
+    def __repr__(self) -> str:
+        if len(self.metric) > 1:
+            return "\nMetric Relationship Error: \tThe collected value of metric {0}\n\
+                \tis {1} in workload(s): {2} \n\
+                \tbut expected value range is [{3}, {4}]\n\
+                \tRelationship rule description: \'{5}\'".format(self.metric, self.collectedValue, self.workloads,
+                                                                 self.valueLowBound, self.valueUpBound, self.description)
+        elif len(self.collectedValue) == 0:
+            return "\nNo Metric Value Error: \tMetric {0} returns with no value \n\
+                    \tworkload(s): {1}".format(self.metric, self.workloads)
+        else:
+            return "\nWrong Metric Value Error: \tThe collected value of metric {0}\n\
+                    \tis {1} in workload(s): {2}\n\
+                    \tbut expected value range is [{3}, {4}]"\
+                        .format(self.metric, self.collectedValue, self.workloads,
+                                self.valueLowBound, self.valueUpBound)
+
+
 class Validator:
     def __init__(self, rulefname, reportfname='', t=5, debug=False, datafname='', fullrulefname='', workload='true', metrics=''):
         self.rulefname = rulefname
         self.reportfname = reportfname
         self.rules = None
-        self.collectlist:str = metrics
+        self.collectlist: str = metrics
         self.metrics = self.__set_metrics(metrics)
         self.skiplist = set()
         self.tolerance = t
 
         self.workloads = [x for x in workload.split(",") if x]
-        self.wlidx = 0 # idx of current workloads
-        self.allresults = dict() # metric results of all workload
-        self.allignoremetrics = dict() # metrics with no results or negative results
-        self.allfailtests = dict()
+        self.wlidx = 0  # idx of current workloads
+        self.allresults = dict()  # metric results of all workload
         self.alltotalcnt = dict()
         self.allpassedcnt = dict()
-        self.allerrlist = dict()
 
-        self.results = dict() # metric results of current workload
+        self.results = dict()  # metric results of current workload
         # vars for test pass/failure statistics
-        self.ignoremetrics= set() # metrics with no results or negative results, neg result counts as a failed test
-        self.failtests = dict()
+        # metrics with no results or negative results, neg result counts failed tests
+        self.ignoremetrics = set()
         self.totalcnt = 0
         self.passedcnt = 0
         # vars for errors
         self.errlist = list()
 
         # vars for Rule Generator
-        self.pctgmetrics = set() # Percentage rule
+        self.pctgmetrics = set()  # Percentage rule
 
         # vars for debug
         self.datafname = datafname
@@ -69,10 +94,10 @@ class Validator:
                       ensure_ascii=True,
                       indent=4)
 
-    def get_results(self, idx:int = 0):
+    def get_results(self, idx: int = 0):
         return self.results[idx]
 
-    def get_bounds(self, lb, ub, error, alias={}, ridx:int = 0) -> list:
+    def get_bounds(self, lb, ub, error, alias={}, ridx: int = 0) -> list:
         """
         Get bounds and tolerance from lb, ub, and error.
         If missing lb, use 0.0; missing ub, use float('inf); missing error, use self.tolerance.
@@ -85,7 +110,7 @@ class Validator:
                   tolerance, denormalized base on upper bound value
         """
         # init ubv and lbv to invalid values
-        def get_bound_value (bound, initval, ridx):
+        def get_bound_value(bound, initval, ridx):
             val = initval
             if isinstance(bound, int) or isinstance(bound, float):
                 val = bound
@@ -113,10 +138,10 @@ class Validator:
 
         return lbv, ubv, denormerr
 
-    def get_value(self, name:str, ridx:int = 0) -> list:
+    def get_value(self, name: str, ridx: int = 0) -> list:
         """
         Get value of the metric from self.results.
-        If result of this metric is not provided, the metric name will be added into self.ignoremetics and self.errlist.
+        If result of this metric is not provided, the metric name will be added into self.ignoremetics.
         All future test(s) on this metric will fail.
 
         @param name: name of the metric
@@ -142,7 +167,7 @@ class Validator:
         Check if metrics value are non-negative.
         One metric is counted as one test.
         Failure: when metric value is negative or not provided.
-        Metrics with negative value will be added into the self.failtests['PositiveValueTest'] and self.ignoremetrics.
+        Metrics with negative value will be added into self.ignoremetrics.
         """
         negmetric = dict()
         pcnt = 0
@@ -155,25 +180,27 @@ class Validator:
             else:
                 pcnt += 1
             tcnt += 1
+        # The first round collect_perf() run these metrics with simple workload
+        # "true". We give metrics a second chance with a longer workload if less
+        # than 20 metrics failed positive test.
         if len(rerun) > 0 and len(rerun) < 20:
             second_results = dict()
             self.second_test(rerun, second_results)
             for name, val in second_results.items():
-                if name not in negmetric: continue
+                if name not in negmetric:
+                    continue
                 if val >= 0:
                     del negmetric[name]
                     pcnt += 1
 
-        self.failtests['PositiveValueTest']['Total Tests'] = tcnt
-        self.failtests['PositiveValueTest']['Passed Tests'] = pcnt
         if len(negmetric.keys()):
             self.ignoremetrics.update(negmetric.keys())
-            negmessage = ["{0}(={1:.4f})".format(name, val) for name, val in negmetric.items()]
-            self.failtests['PositiveValueTest']['Failed Tests'].append({'NegativeValue': negmessage})
+            self.errlist.extend(
+                [TestError([m], self.workloads[self.wlidx], negmetric[m], 0) for m in negmetric.keys()])
 
         return
 
-    def evaluate_formula(self, formula:str, alias:dict, ridx:int = 0):
+    def evaluate_formula(self, formula: str, alias: dict, ridx: int = 0):
         """
         Evaluate the value of formula.
 
@@ -187,10 +214,11 @@ class Validator:
         sign = "+"
         f = str()
 
-        #TODO: support parenthesis?
+        # TODO: support parenthesis?
         for i in range(len(formula)):
             if i+1 == len(formula) or formula[i] in ('+', '-', '*', '/'):
-                s = alias[formula[b:i]] if i+1 < len(formula) else alias[formula[b:]]
+                s = alias[formula[b:i]] if i + \
+                    1 < len(formula) else alias[formula[b:]]
                 v = self.get_value(s, ridx)
                 if not v:
                     errs.append(s)
@@ -228,49 +256,49 @@ class Validator:
         alias = dict()
         for m in rule['Metrics']:
             alias[m['Alias']] = m['Name']
-        lbv, ubv, t = self.get_bounds(rule['RangeLower'], rule['RangeUpper'], rule['ErrorThreshold'], alias, ridx=rule['RuleIndex'])
-        val, f = self.evaluate_formula(rule['Formula'], alias, ridx=rule['RuleIndex'])
+        lbv, ubv, t = self.get_bounds(
+            rule['RangeLower'], rule['RangeUpper'], rule['ErrorThreshold'], alias, ridx=rule['RuleIndex'])
+        val, f = self.evaluate_formula(
+            rule['Formula'], alias, ridx=rule['RuleIndex'])
+
+        lb = rule['RangeLower']
+        ub = rule['RangeUpper']
+        if isinstance(lb, str):
+            if lb in alias:
+                lb = alias[lb]
+        if isinstance(ub, str):
+            if ub in alias:
+                ub = alias[ub]
+
         if val == -1:
-            self.failtests['RelationshipTest']['Failed Tests'].append({'RuleIndex': rule['RuleIndex'], 'Description':f})
+            self.errlist.append(TestError([m['Name'] for m in rule['Metrics']], self.workloads[self.wlidx], [],
+                                lb, ub, rule['Description']))
         elif not self.check_bound(val, lbv, ubv, t):
-            lb = rule['RangeLower']
-            ub = rule['RangeUpper']
-            if isinstance(lb, str):
-                if lb in alias:
-                    lb = alias[lb]
-            if isinstance(ub, str):
-                if ub in alias:
-                    ub = alias[ub]
-            self.failtests['RelationshipTest']['Failed Tests'].append({'RuleIndex': rule['RuleIndex'], 'Formula':f,
-                                                                       'RangeLower': lb, 'LowerBoundValue': self.get_value(lb),
-                                                                       'RangeUpper': ub, 'UpperBoundValue':self.get_value(ub),
-                                                                       'ErrorThreshold': t, 'CollectedValue': val})
+            self.errlist.append(TestError([m['Name'] for m in rule['Metrics']], self.workloads[self.wlidx], [val],
+                                lb, ub, rule['Description']))
         else:
             self.passedcnt += 1
-            self.failtests['RelationshipTest']['Passed Tests'] += 1
         self.totalcnt += 1
-        self.failtests['RelationshipTest']['Total Tests'] += 1
 
         return
 
-
     # Single Metric Test
-    def single_test(self, rule:dict):
+    def single_test(self, rule: dict):
         """
         Validate if the metrics are in the required value range.
         eg. lower_bound <= metrics_value <= upper_bound
         One metric is counted as one test in this type of test.
         One rule may include one or more metrics.
         Failure: when the metric value not provided or the value is outside the bounds.
-        This test updates self.total_cnt and records failed tests in self.failtest['SingleMetricTest'].
+        This test updates self.total_cnt.
 
         @param rule: dict with metrics to validate and the value range requirement
         """
-        lbv, ubv, t = self.get_bounds(rule['RangeLower'], rule['RangeUpper'], rule['ErrorThreshold'])
+        lbv, ubv, t = self.get_bounds(
+            rule['RangeLower'], rule['RangeUpper'], rule['ErrorThreshold'])
         metrics = rule['Metrics']
         passcnt = 0
         totalcnt = 0
-        faillist = list()
         failures = dict()
         rerun = list()
         for m in metrics:
@@ -286,25 +314,20 @@ class Validator:
             second_results = dict()
             self.second_test(rerun, second_results)
             for name, val in second_results.items():
-                if name not in failures: continue
+                if name not in failures:
+                    continue
                 if self.check_bound(val, lbv, ubv, t):
                     passcnt += 1
                     del failures[name]
                 else:
-                    failures[name] = val
+                    failures[name] = [val]
                     self.results[0][name] = val
 
         self.totalcnt += totalcnt
         self.passedcnt += passcnt
-        self.failtests['SingleMetricTest']['Total Tests'] += totalcnt
-        self.failtests['SingleMetricTest']['Passed Tests'] += passcnt
         if len(failures.keys()) != 0:
-            faillist = [{'MetricName':name, 'CollectedValue':val} for name, val in failures.items()]
-            self.failtests['SingleMetricTest']['Failed Tests'].append({'RuleIndex':rule['RuleIndex'],
-                                                                       'RangeLower': rule['RangeLower'],
-                                                                       'RangeUpper': rule['RangeUpper'],
-                                                                       'ErrorThreshold':rule['ErrorThreshold'],
-                                                                       'Failure':faillist})
+            self.errlist.extend([TestError([name], self.workloads[self.wlidx], val,
+                                rule['RangeLower'], rule['RangeUpper']) for name, val in failures.items()])
 
         return
 
@@ -312,19 +335,11 @@ class Validator:
         """
         Create final report and write into a JSON file.
         """
-        alldata = list()
-        for i in range(0, len(self.workloads)):
-            reportstas = {"Total Rule Count": self.alltotalcnt[i], "Passed Rule Count": self.allpassedcnt[i]}
-            data = {"Metric Validation Statistics": reportstas, "Tests in Category": self.allfailtests[i],
-                    "Errors":self.allerrlist[i]}
-            alldata.append({"Workload": self.workloads[i], "Report": data})
-
-        json_str = json.dumps(alldata, indent=4)
-        print("Test validation finished. Final report: ")
-        print(json_str)
+        print(self.errlist)
 
         if self.debug:
-            allres = [{"Workload": self.workloads[i], "Results": self.allresults[i]} for i in range(0, len(self.workloads))]
+            allres = [{"Workload": self.workloads[i], "Results": self.allresults[i]}
+                      for i in range(0, len(self.workloads))]
             self.json_dump(allres, self.datafname)
 
     def check_rule(self, testtype, metric_list):
@@ -342,13 +357,13 @@ class Validator:
         return True
 
     # Start of Collector and Converter
-    def convert(self, data: list, metricvalues:dict):
+    def convert(self, data: list, metricvalues: dict):
         """
         Convert collected metric data from the -j output to dict of {metric_name:value}.
         """
         for json_string in data:
             try:
-                result =json.loads(json_string)
+                result = json.loads(json_string)
                 if "metric-unit" in result and result["metric-unit"] != "(null)" and result["metric-unit"] != "":
                     name = result["metric-unit"].split("  ")[1] if len(result["metric-unit"].split("  ")) > 1 \
                         else result["metric-unit"]
@@ -365,9 +380,10 @@ class Validator:
         print(" ".join(command))
         cmd = subprocess.run(command, stderr=subprocess.PIPE, encoding='utf-8')
         data = [x+'}' for x in cmd.stderr.split('}\n') if x]
+        if data[0][0] != '{':
+            data[0] = data[0][data[0].find('{'):]
         return data
 
-
     def collect_perf(self, workload: str):
         """
         Collect metric data with "perf stat -M" on given workload with -a and -j.
@@ -385,14 +401,18 @@ class Validator:
             if rule["TestType"] == "RelationshipTest":
                 metrics = [m["Name"] for m in rule["Metrics"]]
                 if not any(m not in collectlist[0] for m in metrics):
-                    collectlist[rule["RuleIndex"]] = [",".join(list(set(metrics)))]
+                    collectlist[rule["RuleIndex"]] = [
+                        ",".join(list(set(metrics)))]
 
         for idx, metrics in collectlist.items():
-            if idx == 0: wl = "true"
-            else: wl = workload
+            if idx == 0:
+                wl = "true"
+            else:
+                wl = workload
             for metric in metrics:
                 data = self._run_perf(metric, wl)
-                if idx not in self.results: self.results[idx] = dict()
+                if idx not in self.results:
+                    self.results[idx] = dict()
                 self.convert(data, self.results[idx])
         return
 
@@ -412,7 +432,8 @@ class Validator:
         2) create metric name list
         """
         command = ['perf', 'list', '-j', '--details', 'metrics']
-        cmd = subprocess.run(command, stdout=subprocess.PIPE, stderr=subprocess.PIPE, encoding='utf-8')
+        cmd = subprocess.run(command, stdout=subprocess.PIPE,
+                             stderr=subprocess.PIPE, encoding='utf-8')
         try:
             data = json.loads(cmd.stdout)
             for m in data:
@@ -453,12 +474,12 @@ class Validator:
         rules = data['RelationshipRules']
         self.skiplist = set([name.lower() for name in data['SkipList']])
         self.rules = self.remove_unsupported_rules(rules)
-        pctgrule = {'RuleIndex':0,
-                    'TestType':'SingleMetricTest',
-                    'RangeLower':'0',
+        pctgrule = {'RuleIndex': 0,
+                    'TestType': 'SingleMetricTest',
+                    'RangeLower': '0',
                     'RangeUpper': '100',
                     'ErrorThreshold': self.tolerance,
-                    'Description':'Metrics in percent unit have value with in [0, 100]',
+                    'Description': 'Metrics in percent unit have value with in [0, 100]',
                     'Metrics': [{'Name': m.lower()} for m in self.pctgmetrics]}
         self.rules.append(pctgrule)
 
@@ -469,8 +490,9 @@ class Validator:
             idx += 1
 
         if self.debug:
-            #TODO: need to test and generate file name correctly
-            data = {'RelationshipRules':self.rules, 'SupportedMetrics': [{"MetricName": name} for name in self.metrics]}
+            # TODO: need to test and generate file name correctly
+            data = {'RelationshipRules': self.rules, 'SupportedMetrics': [
+                {"MetricName": name} for name in self.metrics]}
             self.json_dump(data, self.fullrulefname)
 
         return
@@ -482,20 +504,17 @@ class Validator:
         @param key: key to the dictionaries (index of self.workloads).
         '''
         self.allresults[key] = self.results
-        self.allignoremetrics[key] = self.ignoremetrics
-        self.allfailtests[key] = self.failtests
         self.alltotalcnt[key] = self.totalcnt
         self.allpassedcnt[key] = self.passedcnt
-        self.allerrlist[key] = self.errlist
 
-    #Initialize data structures before data validation of each workload
+    # Initialize data structures before data validation of each workload
     def _init_data(self):
 
-        testtypes = ['PositiveValueTest', 'RelationshipTest', 'SingleMetricTest']
+        testtypes = ['PositiveValueTest',
+                     'RelationshipTest', 'SingleMetricTest']
         self.results = dict()
-        self.ignoremetrics= set()
+        self.ignoremetrics = set()
         self.errlist = list()
-        self.failtests = {k:{'Total Tests':0, 'Passed Tests':0, 'Failed Tests':[]} for k in testtypes}
         self.totalcnt = 0
         self.passedcnt = 0
 
@@ -525,32 +544,33 @@ class Validator:
                 testtype = r['TestType']
                 if not self.check_rule(testtype, r['Metrics']):
                     continue
-                if  testtype == 'RelationshipTest':
+                if testtype == 'RelationshipTest':
                     self.relationship_test(r)
                 elif testtype == 'SingleMetricTest':
                     self.single_test(r)
                 else:
                     print("Unsupported Test Type: ", testtype)
-                    self.errlist.append("Unsupported Test Type from rule: " + r['RuleIndex'])
-            self._storewldata(i)
             print("Workload: ", self.workloads[i])
-            print("Total metrics collected: ", self.failtests['PositiveValueTest']['Total Tests'])
-            print("Non-negative metric count: ", self.failtests['PositiveValueTest']['Passed Tests'])
             print("Total Test Count: ", self.totalcnt)
             print("Passed Test Count: ", self.passedcnt)
-
+            self._storewldata(i)
         self.create_report()
-        return sum(self.alltotalcnt.values()) != sum(self.allpassedcnt.values())
+        return len(self.errlist) > 0
 # End of Class Validator
 
 
 def main() -> None:
-    parser = argparse.ArgumentParser(description="Launch metric value validation")
-
-    parser.add_argument("-rule", help="Base validation rule file", required=True)
-    parser.add_argument("-output_dir", help="Path for validator output file, report file", required=True)
-    parser.add_argument("-debug", help="Debug run, save intermediate data to files", action="store_true", default=False)
-    parser.add_argument("-wl", help="Workload to run while data collection", default="true")
+    parser = argparse.ArgumentParser(
+        description="Launch metric value validation")
+
+    parser.add_argument(
+        "-rule", help="Base validation rule file", required=True)
+    parser.add_argument(
+        "-output_dir", help="Path for validator output file, report file", required=True)
+    parser.add_argument("-debug", help="Debug run, save intermediate data to files",
+                        action="store_true", default=False)
+    parser.add_argument(
+        "-wl", help="Workload to run while data collection", default="true")
     parser.add_argument("-m", help="Metric list to validate", default="")
     args = parser.parse_args()
     outpath = Path(args.output_dir)
@@ -559,8 +579,8 @@ def main() -> None:
     datafile = Path.joinpath(outpath, 'perf_data.json')
 
     validator = Validator(args.rule, reportf, debug=args.debug,
-                        datafname=datafile, fullrulefname=fullrule, workload=args.wl,
-                        metrics=args.m)
+                          datafname=datafile, fullrulefname=fullrule, workload=args.wl,
+                          metrics=args.m)
     ret = validator.test()
 
     return ret
@@ -569,6 +589,3 @@ def main() -> None:
 if __name__ == "__main__":
     import sys
     sys.exit(main())
-
-
-
index 3cc158a643269688aa8682d8fe3f987691163008..c81d6a9f7983d369f7ee2dff907f2288b0ce3d9e 100644 (file)
@@ -97,6 +97,18 @@ check_per_cache_instance()
        echo "[Success]"
 }
 
+check_per_cluster()
+{
+       echo -n "Checking $1 output: per cluster "
+       if ParanoidAndNotRoot 0
+       then
+               echo "[Skip] paranoid and not root"
+               return
+       fi
+       perf stat --per-cluster -a $2 true
+       echo "[Success]"
+}
+
 check_per_die()
 {
        echo -n "Checking $1 output: per die "
diff --git a/tools/perf/tests/shell/perftool-testsuite_probe.sh b/tools/perf/tests/shell/perftool-testsuite_probe.sh
new file mode 100755 (executable)
index 0000000..a0fec33
--- /dev/null
@@ -0,0 +1,23 @@
+#!/bin/bash
+# perftool-testsuite_probe
+# SPDX-License-Identifier: GPL-2.0
+
+test -d "$(dirname "$0")/base_probe" || exit 2
+cd "$(dirname "$0")/base_probe" || exit 2
+status=0
+
+PERFSUITE_RUN_DIR=$(mktemp -d /tmp/"$(basename "$0" .sh)".XXX)
+export PERFSUITE_RUN_DIR
+
+for testcase in setup.sh test_*; do                  # skip setup.sh if not present or not executable
+     test -x "$testcase" || continue
+     ./"$testcase"
+     (( status += $? ))
+done
+
+if ! [ "$PERFTEST_KEEP_LOGS" = "y" ]; then
+       rm -rf "$PERFSUITE_RUN_DIR"
+fi
+
+test $status -ne 0 && exit 1
+exit 0
index f1818fa6d9ce931c956d8383e07f764f6c4236dd..fc2d8cc6e5e0b1e5b47378b4de3e079cacc32674 100755 (executable)
@@ -42,6 +42,7 @@ function commachecker()
        ;; "--per-socket")      exp=8
        ;; "--per-node")        exp=8
        ;; "--per-die")         exp=8
+       ;; "--per-cluster")     exp=8
        ;; "--per-cache")       exp=8
        esac
 
@@ -79,6 +80,7 @@ then
        check_system_wide_no_aggr "CSV" "$perf_cmd"
        check_per_core "CSV" "$perf_cmd"
        check_per_cache_instance "CSV" "$perf_cmd"
+       check_per_cluster "CSV" "$perf_cmd"
        check_per_die "CSV" "$perf_cmd"
        check_per_socket "CSV" "$perf_cmd"
 else
index 3bc900533a5d65e5f7c3495022802857da517388..2b9c6212dffc6f2a59a0e495b0628c38d34221e7 100755 (executable)
@@ -122,6 +122,18 @@ check_per_cache_instance()
        echo "[Success]"
 }
 
+check_per_cluster()
+{
+       echo -n "Checking json output: per cluster "
+       if ParanoidAndNotRoot 0
+       then
+               echo "[Skip] paranoia and not root"
+               return
+       fi
+       perf stat -j --per-cluster -a true 2>&1 | $PYTHON $pythonchecker --per-cluster
+       echo "[Success]"
+}
+
 check_per_die()
 {
        echo -n "Checking json output: per die "
@@ -200,6 +212,7 @@ then
        check_system_wide_no_aggr
        check_per_core
        check_per_cache_instance
+       check_per_cluster
        check_per_die
        check_per_socket
 else
index 4fcdd1a9142c682c357568600f9def3e05d7aaae..cbf2894b2c846a0495e56aa7728366c91cfe85cb 100755 (executable)
@@ -13,7 +13,7 @@ stat_output=$(mktemp /tmp/__perf_test.stat_output.std.XXXXX)
 
 event_name=(cpu-clock task-clock context-switches cpu-migrations page-faults stalled-cycles-frontend stalled-cycles-backend cycles instructions branches branch-misses)
 event_metric=("CPUs utilized" "CPUs utilized" "/sec" "/sec" "/sec" "frontend cycles idle" "backend cycles idle" "GHz" "insn per cycle" "/sec" "of all branches")
-skip_metric=("stalled cycles per insn" "tma_")
+skip_metric=("stalled cycles per insn" "tma_" "retiring" "frontend_bound" "bad_speculation" "backend_bound")
 
 cleanup() {
   rm -f "${stat_output}"
@@ -40,6 +40,7 @@ function commachecker()
        ;; "--per-node")        prefix=3
        ;; "--per-die")         prefix=3
        ;; "--per-cache")       prefix=3
+       ;; "--per-cluster")     prefix=3
        esac
 
        while read line
@@ -99,6 +100,7 @@ then
        check_system_wide_no_aggr "STD" "$perf_cmd"
        check_per_core "STD" "$perf_cmd"
        check_per_cache_instance "STD" "$perf_cmd"
+       check_per_cluster "STD" "$perf_cmd"
        check_per_die "STD" "$perf_cmd"
        check_per_socket "STD" "$perf_cmd"
 else
index a87bb2814b4c6de8395219d268eaa15fb21b6e15..2d920987477468a8a021871a94bd4ed226b34278 100755 (executable)
@@ -4,19 +4,19 @@
 
 set -e
 
-# check whether $2 is within +/- 10% of $1
+# check whether $2 is within +/- 20% of $1
 compare_number()
 {
        first_num=$1
        second_num=$2
 
-       # upper bound is first_num * 110%
-       upper=$(expr $first_num + $first_num / 10 )
-       # lower bound is first_num * 90%
-       lower=$(expr $first_num - $first_num / 10 )
+       # upper bound is first_num * 120%
+       upper=$(expr $first_num + $first_num / 5 )
+       # lower bound is first_num * 80%
+       lower=$(expr $first_num - $first_num / 5 )
 
        if [ $second_num -gt $upper ] || [ $second_num -lt $lower ]; then
-               echo "The difference between $first_num and $second_num are greater than 10%."
+               echo "The difference between $first_num and $second_num are greater than 20%."
                exit 1
        fi
 }
index 7ca172599aa6cdac7adb47d16716ff3ba3746e63..279f19c5919ae47f2eea8809b6e10d5cd6cd03b5 100755 (executable)
@@ -19,6 +19,8 @@ echo "Output will be stored in: $tmpdir"
 $PYTHON $pythonvalidator -rule $rulefile -output_dir $tmpdir -wl "${workload}"
 ret=$?
 rm -rf $tmpdir
-
+if [ $ret -ne 0 ]; then
+       echo "Metric validation return with erros. Please check metrics reported with errors."
+fi
 exit $ret
 
index e342e6c8aa50c41ddb86730e263c321907800d73..83b53591b1eaccd6223b465a91095cba6c535388 100755 (executable)
@@ -8,6 +8,12 @@ shelldir=$(dirname "$0")
 
 lscpu | grep -q "aarch64" || exit 2
 
+if perf version --build-options | grep HAVE_DWARF_UNWIND_SUPPORT | grep -q OFF
+then
+  echo "Skipping, no dwarf unwind support"
+  exit 2
+fi
+
 skip_test_missing_symbol leafloop
 
 PERF_DATA=$(mktemp /tmp/__perf_test.perf.data.XXXXX)
index 16e1c5502b094ced65b4c7104b638a06e9cc6bd7..d208105919ed3821aa304a38af15b4a43de9fdf2 100644 (file)
@@ -41,6 +41,30 @@ static void exit_test_info(struct test_info *ti)
        machine__delete(ti->machine);
 }
 
+struct dso_map {
+       struct dso *dso;
+       struct map *map;
+};
+
+static int find_map_cb(struct map *map, void *d)
+{
+       struct dso_map *data = d;
+
+       if (map__dso(map) != data->dso)
+               return 0;
+       data->map = map;
+       return 1;
+}
+
+static struct map *find_module_map(struct machine *machine, struct dso *dso)
+{
+       struct dso_map data = { .dso = dso };
+
+       machine__for_each_kernel_map(machine, find_map_cb, &data);
+
+       return data.map;
+}
+
 static void get_test_dso_filename(char *filename, size_t max_sz)
 {
        if (dso_to_test)
@@ -51,6 +75,26 @@ static void get_test_dso_filename(char *filename, size_t max_sz)
 
 static int create_map(struct test_info *ti, char *filename, struct map **map_p)
 {
+       struct dso *dso = machine__findnew_dso(ti->machine, filename);
+
+       /*
+        * If 'filename' matches a current kernel module, must use a kernel
+        * map. Find the one that already exists.
+        */
+       if (dso && dso->kernel) {
+               *map_p = find_module_map(ti->machine, dso);
+               dso__put(dso);
+               if (!*map_p) {
+                       pr_debug("Failed to find map for current kernel module %s",
+                                filename);
+                       return TEST_FAIL;
+               }
+               map__get(*map_p);
+               return TEST_OK;
+       }
+
+       dso__put(dso);
+
        /* Create a dummy map at 0x100000 */
        *map_p = map__new(ti->machine, 0x100000, 0xffffffff, 0, NULL,
                          PROT_EXEC, 0, NULL, filename, ti->thread);
@@ -97,6 +141,26 @@ static int test_dso(struct dso *dso)
        return ret;
 }
 
+static int subdivided_dso_cb(struct dso *dso, struct machine *machine __maybe_unused, void *d)
+{
+       struct dso *text_dso = d;
+
+       if (dso != text_dso && strstarts(dso->short_name, text_dso->short_name))
+               if (test_dso(dso) != TEST_OK)
+                       return -1;
+
+       return 0;
+}
+
+static int process_subdivided_dso(struct machine *machine, struct dso *dso)
+{
+       int ret;
+
+       ret = machine__for_each_dso(machine, subdivided_dso_cb, dso);
+
+       return ret < 0 ? TEST_FAIL : TEST_OK;
+}
+
 static int test_file(struct test_info *ti, char *filename)
 {
        struct map *map = NULL;
@@ -124,6 +188,10 @@ static int test_file(struct test_info *ti, char *filename)
        }
 
        ret = test_dso(dso);
+
+       /* Module dso is split into many dsos by section */
+       if (ret == TEST_OK && dso->kernel)
+               ret = process_subdivided_dso(ti->machine, dso);
 out_put:
        map__put(map);
 
diff --git a/tools/perf/tests/tests-scripts.c b/tools/perf/tests/tests-scripts.c
new file mode 100644 (file)
index 0000000..e2042b3
--- /dev/null
@@ -0,0 +1,257 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#include <dirent.h>
+#include <errno.h>
+#include <fcntl.h>
+#include <linux/ctype.h>
+#include <linux/kernel.h>
+#include <linux/string.h>
+#include <linux/zalloc.h>
+#include <string.h>
+#include <stdlib.h>
+#include <sys/types.h>
+#include <unistd.h>
+#include <subcmd/exec-cmd.h>
+#include <subcmd/parse-options.h>
+#include <sys/wait.h>
+#include <sys/stat.h>
+#include <api/io.h>
+#include "builtin.h"
+#include "tests-scripts.h"
+#include "color.h"
+#include "debug.h"
+#include "hist.h"
+#include "intlist.h"
+#include "string2.h"
+#include "symbol.h"
+#include "tests.h"
+#include "util/rlimit.h"
+#include "util/util.h"
+
+static int shell_tests__dir_fd(void)
+{
+       char path[PATH_MAX], *exec_path;
+       static const char * const devel_dirs[] = { "./tools/perf/tests/shell", "./tests/shell", };
+
+       for (size_t i = 0; i < ARRAY_SIZE(devel_dirs); ++i) {
+               int fd = open(devel_dirs[i], O_PATH);
+
+               if (fd >= 0)
+                       return fd;
+       }
+
+       /* Then installed path. */
+       exec_path = get_argv_exec_path();
+       scnprintf(path, sizeof(path), "%s/tests/shell", exec_path);
+       free(exec_path);
+       return open(path, O_PATH);
+}
+
+static char *shell_test__description(int dir_fd, const char *name)
+{
+       struct io io;
+       char buf[128], desc[256];
+       int ch, pos = 0;
+
+       io__init(&io, openat(dir_fd, name, O_RDONLY), buf, sizeof(buf));
+       if (io.fd < 0)
+               return NULL;
+
+       /* Skip first line - should be #!/bin/sh Shebang */
+       if (io__get_char(&io) != '#')
+               goto err_out;
+       if (io__get_char(&io) != '!')
+               goto err_out;
+       do {
+               ch = io__get_char(&io);
+               if (ch < 0)
+                       goto err_out;
+       } while (ch != '\n');
+
+       do {
+               ch = io__get_char(&io);
+               if (ch < 0)
+                       goto err_out;
+       } while (ch == '#' || isspace(ch));
+       while (ch > 0 && ch != '\n') {
+               desc[pos++] = ch;
+               if (pos >= (int)sizeof(desc) - 1)
+                       break;
+               ch = io__get_char(&io);
+       }
+       while (pos > 0 && isspace(desc[--pos]))
+               ;
+       desc[++pos] = '\0';
+       close(io.fd);
+       return strdup(desc);
+err_out:
+       close(io.fd);
+       return NULL;
+}
+
+/* Is this full file path a shell script */
+static bool is_shell_script(int dir_fd, const char *path)
+{
+       const char *ext;
+
+       ext = strrchr(path, '.');
+       if (!ext)
+               return false;
+       if (!strcmp(ext, ".sh")) { /* Has .sh extension */
+               if (faccessat(dir_fd, path, R_OK | X_OK, 0) == 0) /* Is executable */
+                       return true;
+       }
+       return false;
+}
+
+/* Is this file in this dir a shell script (for test purposes) */
+static bool is_test_script(int dir_fd, const char *name)
+{
+       return is_shell_script(dir_fd, name);
+}
+
+/* Duplicate a string and fall over and die if we run out of memory */
+static char *strdup_check(const char *str)
+{
+       char *newstr;
+
+       newstr = strdup(str);
+       if (!newstr) {
+               pr_err("Out of memory while duplicating test script string\n");
+               abort();
+       }
+       return newstr;
+}
+
+static int shell_test__run(struct test_suite *test, int subtest __maybe_unused)
+{
+       const char *file = test->priv;
+       int err;
+       char *cmd = NULL;
+
+       if (asprintf(&cmd, "%s%s", file, verbose ? " -v" : "") < 0)
+               return TEST_FAIL;
+       err = system(cmd);
+       free(cmd);
+       if (!err)
+               return TEST_OK;
+
+       return WEXITSTATUS(err) == 2 ? TEST_SKIP : TEST_FAIL;
+}
+
+static void append_script(int dir_fd, const char *name, char *desc,
+                         struct test_suite ***result,
+                         size_t *result_sz)
+{
+       char filename[PATH_MAX], link[128];
+       struct test_suite *test_suite, **result_tmp;
+       struct test_case *tests;
+       size_t len;
+
+       snprintf(link, sizeof(link), "/proc/%d/fd/%d", getpid(), dir_fd);
+       len = readlink(link, filename, sizeof(filename));
+       if (len < 0) {
+               pr_err("Failed to readlink %s", link);
+               return;
+       }
+       filename[len++] = '/';
+       strcpy(&filename[len], name);
+
+       tests = calloc(2, sizeof(*tests));
+       if (!tests) {
+               pr_err("Out of memory while building script test suite list\n");
+               return;
+       }
+       tests[0].name = strdup_check(name);
+       tests[0].desc = strdup_check(desc);
+       tests[0].run_case = shell_test__run;
+
+       test_suite = zalloc(sizeof(*test_suite));
+       if (!test_suite) {
+               pr_err("Out of memory while building script test suite list\n");
+               free(tests);
+               return;
+       }
+       test_suite->desc = desc;
+       test_suite->test_cases = tests;
+       test_suite->priv = strdup_check(filename);
+       /* Realloc is good enough, though we could realloc by chunks, not that
+        * anyone will ever measure performance here */
+       result_tmp = realloc(*result, (*result_sz + 1) * sizeof(*result_tmp));
+       if (result_tmp == NULL) {
+               pr_err("Out of memory while building script test suite list\n");
+               free(tests);
+               free(test_suite);
+               return;
+       }
+       /* Add file to end and NULL terminate the struct array */
+       *result = result_tmp;
+       (*result)[*result_sz] = test_suite;
+       (*result_sz)++;
+}
+
+static void append_scripts_in_dir(int dir_fd,
+                                 struct test_suite ***result,
+                                 size_t *result_sz)
+{
+       struct dirent **entlist;
+       struct dirent *ent;
+       int n_dirs, i;
+
+       /* List files, sorted by alpha */
+       n_dirs = scandirat(dir_fd, ".", &entlist, NULL, alphasort);
+       if (n_dirs == -1)
+               return;
+       for (i = 0; i < n_dirs && (ent = entlist[i]); i++) {
+               int fd;
+
+               if (ent->d_name[0] == '.')
+                       continue; /* Skip hidden files */
+               if (is_test_script(dir_fd, ent->d_name)) { /* It's a test */
+                       char *desc = shell_test__description(dir_fd, ent->d_name);
+
+                       if (desc) /* It has a desc line - valid script */
+                               append_script(dir_fd, ent->d_name, desc, result, result_sz);
+                       continue;
+               }
+               if (ent->d_type != DT_DIR) {
+                       struct stat st;
+
+                       if (ent->d_type != DT_UNKNOWN)
+                               continue;
+                       fstatat(dir_fd, ent->d_name, &st, 0);
+                       if (!S_ISDIR(st.st_mode))
+                               continue;
+               }
+               fd = openat(dir_fd, ent->d_name, O_PATH);
+               append_scripts_in_dir(fd, result, result_sz);
+       }
+       for (i = 0; i < n_dirs; i++) /* Clean up */
+               zfree(&entlist[i]);
+       free(entlist);
+}
+
+struct test_suite **create_script_test_suites(void)
+{
+       struct test_suite **result = NULL, **result_tmp;
+       size_t result_sz = 0;
+       int dir_fd = shell_tests__dir_fd(); /* Walk  dir */
+
+       /*
+        * Append scripts if fd is good, otherwise return a NULL terminated zero
+        * length array.
+        */
+       if (dir_fd >= 0)
+               append_scripts_in_dir(dir_fd, &result, &result_sz);
+
+       result_tmp = realloc(result, (result_sz + 1) * sizeof(*result_tmp));
+       if (result_tmp == NULL) {
+               pr_err("Out of memory while building script test suite list\n");
+               abort();
+       }
+       /* NULL terminate the test suite array. */
+       result = result_tmp;
+       result[result_sz] = NULL;
+       if (dir_fd >= 0)
+               close(dir_fd);
+       return result;
+}
diff --git a/tools/perf/tests/tests-scripts.h b/tools/perf/tests/tests-scripts.h
new file mode 100644 (file)
index 0000000..b553ad2
--- /dev/null
@@ -0,0 +1,9 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef TESTS_SCRIPTS_H
+#define TESTS_SCRIPTS_H
+
+#include "tests.h"
+
+struct test_suite **create_script_test_suites(void);
+
+#endif /* TESTS_SCRIPTS_H */
index dad3d7414142d1befc3d6eebe48d81a39ace153a..3aa7701ee0e939f70d9e8aef73095c65e6a68266 100644 (file)
@@ -4,11 +4,17 @@
 
 #include <stdbool.h>
 
+enum {
+       TEST_OK   =  0,
+       TEST_FAIL = -1,
+       TEST_SKIP = -2,
+};
+
 #define TEST_ASSERT_VAL(text, cond)                                     \
 do {                                                                    \
        if (!(cond)) {                                                   \
                pr_debug("FAILED %s:%d %s\n", __FILE__, __LINE__, text); \
-               return -1;                                               \
+               return TEST_FAIL;                                        \
        }                                                                \
 } while (0)
 
@@ -17,16 +23,10 @@ do {                                                                         \
        if (val != expected) {                                           \
                pr_debug("FAILED %s:%d %s (%d != %d)\n",                 \
                         __FILE__, __LINE__, text, val, expected);       \
-               return -1;                                               \
+               return TEST_FAIL;                                                \
        }                                                                \
 } while (0)
 
-enum {
-       TEST_OK   =  0,
-       TEST_FAIL = -1,
-       TEST_SKIP = -2,
-};
-
 struct test_suite;
 
 typedef int (*test_fnptr)(struct test_suite *, int);
index 7fa6f7c568e2f9b0721c21621fc04b92617bd94e..e9ecd30a5c058076b9eb8f1aa4f5950e3663cbea 100644 (file)
@@ -46,9 +46,9 @@ static int test__thread_maps_share(struct test_suite *test __maybe_unused, int s
        TEST_ASSERT_EQUAL("wrong refcnt", refcount_read(maps__refcnt(maps)), 4);
 
        /* test the maps pointer is shared */
-       TEST_ASSERT_VAL("maps don't match", RC_CHK_EQUAL(maps, thread__maps(t1)));
-       TEST_ASSERT_VAL("maps don't match", RC_CHK_EQUAL(maps, thread__maps(t2)));
-       TEST_ASSERT_VAL("maps don't match", RC_CHK_EQUAL(maps, thread__maps(t3)));
+       TEST_ASSERT_VAL("maps don't match", maps__equal(maps, thread__maps(t1)));
+       TEST_ASSERT_VAL("maps don't match", maps__equal(maps, thread__maps(t2)));
+       TEST_ASSERT_VAL("maps don't match", maps__equal(maps, thread__maps(t3)));
 
        /*
         * Verify the other leader was created by previous call.
@@ -73,7 +73,7 @@ static int test__thread_maps_share(struct test_suite *test __maybe_unused, int s
        other_maps = thread__maps(other);
        TEST_ASSERT_EQUAL("wrong refcnt", refcount_read(maps__refcnt(other_maps)), 2);
 
-       TEST_ASSERT_VAL("maps don't match", RC_CHK_EQUAL(other_maps, thread__maps(other_leader)));
+       TEST_ASSERT_VAL("maps don't match", maps__equal(other_maps, thread__maps(other_leader)));
 
        /* release thread group */
        thread__put(t3);
index 822f893e67d5f6643f5f1794801b87630f64fca1..fecbf851bb2e1faf2219d1ce2b30604105257bc4 100644 (file)
@@ -131,9 +131,10 @@ static int test__vmlinux_matches_kallsyms_cb1(struct map *map, void *data)
        struct map *pair = maps__find_by_name(args->kallsyms.kmaps,
                                        (dso->kernel ? dso->short_name : dso->name));
 
-       if (pair)
+       if (pair) {
                map__set_priv(pair, 1);
-       else {
+               map__put(pair);
+       } else {
                if (!args->header_printed) {
                        pr_info("WARN: Maps only in vmlinux:\n");
                        args->header_printed = true;
@@ -151,10 +152,8 @@ static int test__vmlinux_matches_kallsyms_cb2(struct map *map, void *data)
        u64 mem_end = map__unmap_ip(args->vmlinux_map, map__end(map));
 
        pair = maps__find(args->kallsyms.kmaps, mem_start);
-       if (pair == NULL || map__priv(pair))
-               return 0;
 
-       if (map__start(pair) == mem_start) {
+       if (pair != NULL && !map__priv(pair) && map__start(pair) == mem_start) {
                struct dso *dso = map__dso(map);
 
                if (!args->header_printed) {
@@ -170,6 +169,7 @@ static int test__vmlinux_matches_kallsyms_cb2(struct map *map, void *data)
                pr_info(" %s\n", dso->name);
                map__set_priv(pair, 1);
        }
+       map__put(pair);
        return 0;
 }
 
index 7cb2d6678039749813c405ac1f515c7796ce8b53..5f60e515b12ec0156a2089f6b16014dc80c5b1de 100644 (file)
@@ -83,7 +83,7 @@ int res_sample_browse(struct res_sample *res_samples, int num_res,
                     r->tid ? "--tid " : "",
                     r->tid ? (sprintf(tidbuf, "%d", r->tid), tidbuf) : "",
                     extra_format,
-                    rstype == A_ASM ? "-F +insn --xed" :
+                    rstype == A_ASM ? "-F +disasm" :
                     rstype == A_SOURCE ? "-F +srcline,+srccode" : "",
                     symbol_conf.inline_name ? "--inline" : "",
                     "--show-lost-events ",
index 50d45054ed6c1b435faf5cb4634ceae6eea03491..e437d7889de624f5a09d526ab11ec7f39c69ce5e 100644 (file)
@@ -107,7 +107,7 @@ static int list_scripts(char *script_name, bool *custom,
        if (evsel)
                attr_to_script(scriptc.extra_format, &evsel->core.attr);
        add_script_option("Show individual samples", "", &scriptc);
-       add_script_option("Show individual samples with assembler", "-F +insn --xed",
+       add_script_option("Show individual samples with assembler", "-F +disasm",
                          &scriptc);
        add_script_option("Show individual samples with source", "-F +srcline,+srccode",
                          &scriptc);
index 394861245fd3e48ff1cc43ae14b97dd2213dc64e..93ce3d47e47e6c07d889d485c57597d3bfa5e489 100644 (file)
@@ -28,21 +28,29 @@ static const char *const col_names[] = {
 static int perf_gtk__get_percent(char *buf, size_t size, struct symbol *sym,
                                 struct disasm_line *dl, int evidx)
 {
+       struct annotation *notes;
        struct sym_hist *symhist;
+       struct sym_hist_entry *entry;
        double percent = 0.0;
        const char *markup;
        int ret = 0;
+       u64 nr_samples = 0;
 
        strcpy(buf, "");
 
        if (dl->al.offset == (s64) -1)
                return 0;
 
-       symhist = annotation__histogram(symbol__annotation(sym), evidx);
-       if (!symbol_conf.event_group && !symhist->addr[dl->al.offset].nr_samples)
+       notes = symbol__annotation(sym);
+       symhist = annotation__histogram(notes, evidx);
+       entry = annotated_source__hist_entry(notes->src, evidx, dl->al.offset);
+       if (entry)
+               nr_samples = entry->nr_samples;
+
+       if (!symbol_conf.event_group && nr_samples == 0)
                return 0;
 
-       percent = 100.0 * symhist->addr[dl->al.offset].nr_samples / symhist->nr_samples;
+       percent = 100.0 * nr_samples / symhist->nr_samples;
 
        markup = perf_gtk__get_percent_color(percent);
        if (markup)
index 8027f450fa3e489e04769f42a146e4438350dbbb..e0a723e2450386874dca7cd311afc44e1a33dabc 100644 (file)
@@ -32,6 +32,7 @@ perf-y += perf_regs.o
 perf-y += perf-regs-arch/
 perf-y += path.o
 perf-y += print_binary.o
+perf-y += print_insn.o
 perf-y += rlimit.o
 perf-y += argv_split.o
 perf-y += rbtree.o
@@ -71,6 +72,7 @@ perf-y += ordered-events.o
 perf-y += namespaces.o
 perf-y += comm.o
 perf-y += thread.o
+perf-y += threads.o
 perf-y += thread_map.o
 perf-y += parse-events-flex.o
 perf-y += parse-events-bison.o
index f22b4f18271c96e406ce5109648e215c08372b86..30c4d19fcf112f5a7354711e30cd84f682fab9f0 100644 (file)
@@ -9,10 +9,12 @@
 #include <stdlib.h>
 #include <inttypes.h>
 
+#include "annotate.h"
 #include "annotate-data.h"
 #include "debuginfo.h"
 #include "debug.h"
 #include "dso.h"
+#include "dwarf-regs.h"
 #include "evsel.h"
 #include "evlist.h"
 #include "map.h"
@@ -192,7 +194,8 @@ static bool find_cu_die(struct debuginfo *di, u64 pc, Dwarf_Die *cu_die)
 }
 
 /* The type info will be saved in @type_die */
-static int check_variable(Dwarf_Die *var_die, Dwarf_Die *type_die, int offset)
+static int check_variable(Dwarf_Die *var_die, Dwarf_Die *type_die, int offset,
+                         bool is_pointer)
 {
        Dwarf_Word size;
 
@@ -204,14 +207,18 @@ static int check_variable(Dwarf_Die *var_die, Dwarf_Die *type_die, int offset)
        }
 
        /*
-        * It expects a pointer type for a memory access.
-        * Convert to a real type it points to.
+        * Usually it expects a pointer type for a memory access.
+        * Convert to a real type it points to.  But global variables
+        * and local variables are accessed directly without a pointer.
         */
-       if (dwarf_tag(type_die) != DW_TAG_pointer_type ||
-           die_get_real_type(type_die, type_die) == NULL) {
-               pr_debug("no pointer or no type\n");
-               ann_data_stat.no_typeinfo++;
-               return -1;
+       if (is_pointer) {
+               if ((dwarf_tag(type_die) != DW_TAG_pointer_type &&
+                    dwarf_tag(type_die) != DW_TAG_array_type) ||
+                   die_get_real_type(type_die, type_die) == NULL) {
+                       pr_debug("no pointer or no type\n");
+                       ann_data_stat.no_typeinfo++;
+                       return -1;
+               }
        }
 
        /* Get the size of the actual type */
@@ -232,13 +239,18 @@ static int check_variable(Dwarf_Die *var_die, Dwarf_Die *type_die, int offset)
 }
 
 /* The result will be saved in @type_die */
-static int find_data_type_die(struct debuginfo *di, u64 pc,
-                             int reg, int offset, Dwarf_Die *type_die)
+static int find_data_type_die(struct debuginfo *di, u64 pc, u64 addr,
+                             const char *var_name, struct annotated_op_loc *loc,
+                             Dwarf_Die *type_die)
 {
        Dwarf_Die cu_die, var_die;
        Dwarf_Die *scopes = NULL;
+       int reg, offset;
        int ret = -1;
        int i, nr_scopes;
+       int fbreg = -1;
+       bool is_fbreg = false;
+       int fb_offset = 0;
 
        /* Get a compile_unit for this address */
        if (!find_cu_die(di, pc, &cu_die)) {
@@ -247,19 +259,81 @@ static int find_data_type_die(struct debuginfo *di, u64 pc,
                return -1;
        }
 
+       reg = loc->reg1;
+       offset = loc->offset;
+
+       if (reg == DWARF_REG_PC) {
+               if (die_find_variable_by_addr(&cu_die, pc, addr, &var_die, &offset)) {
+                       ret = check_variable(&var_die, type_die, offset,
+                                            /*is_pointer=*/false);
+                       loc->offset = offset;
+                       goto out;
+               }
+
+               if (var_name && die_find_variable_at(&cu_die, var_name, pc,
+                                                    &var_die)) {
+                       ret = check_variable(&var_die, type_die, 0,
+                                            /*is_pointer=*/false);
+                       /* loc->offset will be updated by the caller */
+                       goto out;
+               }
+       }
+
        /* Get a list of nested scopes - i.e. (inlined) functions and blocks. */
        nr_scopes = die_get_scopes(&cu_die, pc, &scopes);
 
+       if (reg != DWARF_REG_PC && dwarf_hasattr(&scopes[0], DW_AT_frame_base)) {
+               Dwarf_Attribute attr;
+               Dwarf_Block block;
+
+               /* Check if the 'reg' is assigned as frame base register */
+               if (dwarf_attr(&scopes[0], DW_AT_frame_base, &attr) != NULL &&
+                   dwarf_formblock(&attr, &block) == 0 && block.length == 1) {
+                       switch (*block.data) {
+                       case DW_OP_reg0 ... DW_OP_reg31:
+                               fbreg = *block.data - DW_OP_reg0;
+                               break;
+                       case DW_OP_call_frame_cfa:
+                               if (die_get_cfa(di->dbg, pc, &fbreg,
+                                               &fb_offset) < 0)
+                                       fbreg = -1;
+                               break;
+                       default:
+                               break;
+                       }
+               }
+       }
+
+retry:
+       is_fbreg = (reg == fbreg);
+       if (is_fbreg)
+               offset = loc->offset - fb_offset;
+
        /* Search from the inner-most scope to the outer */
        for (i = nr_scopes - 1; i >= 0; i--) {
-               /* Look up variables/parameters in this scope */
-               if (!die_find_variable_by_reg(&scopes[i], pc, reg, &var_die))
-                       continue;
+               if (reg == DWARF_REG_PC) {
+                       if (!die_find_variable_by_addr(&scopes[i], pc, addr,
+                                                      &var_die, &offset))
+                               continue;
+               } else {
+                       /* Look up variables/parameters in this scope */
+                       if (!die_find_variable_by_reg(&scopes[i], pc, reg,
+                                                     &offset, is_fbreg, &var_die))
+                               continue;
+               }
 
                /* Found a variable, see if it's correct */
-               ret = check_variable(&var_die, type_die, offset);
+               ret = check_variable(&var_die, type_die, offset,
+                                    reg != DWARF_REG_PC && !is_fbreg);
+               loc->offset = offset;
                goto out;
        }
+
+       if (loc->multi_regs && reg == loc->reg1 && loc->reg1 != loc->reg2) {
+               reg = loc->reg2;
+               goto retry;
+       }
+
        if (ret < 0)
                ann_data_stat.no_var++;
 
@@ -272,15 +346,22 @@ out:
  * find_data_type - Return a data type at the location
  * @ms: map and symbol at the location
  * @ip: instruction address of the memory access
- * @reg: register that holds the base address
- * @offset: offset from the base address
+ * @loc: instruction operand location
+ * @addr: data address of the memory access
+ * @var_name: global variable name
  *
  * This functions searches the debug information of the binary to get the data
- * type it accesses.  The exact location is expressed by (ip, reg, offset).
+ * type it accesses.  The exact location is expressed by (@ip, reg, offset)
+ * for pointer variables or (@ip, @addr) for global variables.  Note that global
+ * variables might update the @loc->offset after finding the start of the variable.
+ * If it cannot find a global variable by address, it tried to fine a declaration
+ * of the variable using @var_name.  In that case, @loc->offset won't be updated.
+ *
  * It return %NULL if not found.
  */
 struct annotated_data_type *find_data_type(struct map_symbol *ms, u64 ip,
-                                          int reg, int offset)
+                                          struct annotated_op_loc *loc, u64 addr,
+                                          const char *var_name)
 {
        struct annotated_data_type *result = NULL;
        struct dso *dso = map__dso(ms->map);
@@ -300,7 +381,7 @@ struct annotated_data_type *find_data_type(struct map_symbol *ms, u64 ip,
         * a file address for DWARF processing.
         */
        pc = map__rip_2objdump(ms->map, ip);
-       if (find_data_type_die(di, pc, reg, offset, &type_die) < 0)
+       if (find_data_type_die(di, pc, addr, var_name, loc, &type_die) < 0)
                goto out;
 
        result = dso__findnew_data_type(dso, &type_die);
index 8e73096c01d1ab2fd532b0019c1c95d694feb5c8..1b0db8e8c40e6b706ad0adb4a31bda3859ba8488 100644 (file)
@@ -7,6 +7,7 @@
 #include <linux/rbtree.h>
 #include <linux/types.h>
 
+struct annotated_op_loc;
 struct evsel;
 struct map_symbol;
 
@@ -69,6 +70,7 @@ struct annotated_data_type {
 };
 
 extern struct annotated_data_type unknown_type;
+extern struct annotated_data_type stackop_type;
 
 /**
  * struct annotated_data_stat - Debug statistics
@@ -105,7 +107,8 @@ extern struct annotated_data_stat ann_data_stat;
 
 /* Returns data type at the location (ip, reg, offset) */
 struct annotated_data_type *find_data_type(struct map_symbol *ms, u64 ip,
-                                          int reg, int offset);
+                                          struct annotated_op_loc *loc, u64 addr,
+                                          const char *var_name);
 
 /* Update type access histogram at the given offset */
 int annotated_data_type__update_samples(struct annotated_data_type *adt,
@@ -119,7 +122,8 @@ void annotated_data_type__tree_delete(struct rb_root *root);
 
 static inline struct annotated_data_type *
 find_data_type(struct map_symbol *ms __maybe_unused, u64 ip __maybe_unused,
-              int reg __maybe_unused, int offset __maybe_unused)
+              struct annotated_op_loc *loc __maybe_unused,
+              u64 addr __maybe_unused, const char *var_name __maybe_unused)
 {
        return NULL;
 }
index 9b70ab110ce79f24da580611f1a2098726f1ae12..ac002d907d81801c941876731e29acfc8dcf97b3 100644 (file)
@@ -37,6 +37,8 @@
 #include "util/sharded_mutex.h"
 #include "arch/common.h"
 #include "namespaces.h"
+#include "thread.h"
+#include "hashmap.h"
 #include <regex.h>
 #include <linux/bitops.h>
 #include <linux/kernel.h>
@@ -107,6 +109,14 @@ static struct ins_ops ret_ops;
 struct annotated_data_stat ann_data_stat;
 LIST_HEAD(ann_insn_stat);
 
+/* Pseudo data types */
+struct annotated_data_type stackop_type = {
+       .self = {
+               .type_name = (char *)"(stack operation)",
+               .children = LIST_HEAD_INIT(stackop_type.self.children),
+       },
+};
+
 static int arch__grow_instructions(struct arch *arch)
 {
        struct ins *new_instructions;
@@ -854,6 +864,17 @@ bool arch__is(struct arch *arch, const char *name)
        return !strcmp(arch->name, name);
 }
 
+/* symbol histogram: key = offset << 16 | evsel->core.idx */
+static size_t sym_hist_hash(long key, void *ctx __maybe_unused)
+{
+       return (key >> 16) + (key & 0xffff);
+}
+
+static bool sym_hist_equal(long key1, long key2, void *ctx __maybe_unused)
+{
+       return key1 == key2;
+}
+
 static struct annotated_source *annotated_source__new(void)
 {
        struct annotated_source *src = zalloc(sizeof(*src));
@@ -868,38 +889,25 @@ static __maybe_unused void annotated_source__delete(struct annotated_source *src
 {
        if (src == NULL)
                return;
+
+       hashmap__free(src->samples);
        zfree(&src->histograms);
        free(src);
 }
 
 static int annotated_source__alloc_histograms(struct annotated_source *src,
-                                             size_t size, int nr_hists)
+                                             int nr_hists)
 {
-       size_t sizeof_sym_hist;
-
-       /*
-        * Add buffer of one element for zero length symbol.
-        * When sample is taken from first instruction of
-        * zero length symbol, perf still resolves it and
-        * shows symbol name in perf report and allows to
-        * annotate it.
-        */
-       if (size == 0)
-               size = 1;
+       src->nr_histograms   = nr_hists;
+       src->histograms      = calloc(nr_hists, sizeof(*src->histograms));
 
-       /* Check for overflow when calculating sizeof_sym_hist */
-       if (size > (SIZE_MAX - sizeof(struct sym_hist)) / sizeof(struct sym_hist_entry))
+       if (src->histograms == NULL)
                return -1;
 
-       sizeof_sym_hist = (sizeof(struct sym_hist) + size * sizeof(struct sym_hist_entry));
-
-       /* Check for overflow in zalloc argument */
-       if (sizeof_sym_hist > SIZE_MAX / nr_hists)
-               return -1;
+       src->samples = hashmap__new(sym_hist_hash, sym_hist_equal, NULL);
+       if (src->samples == NULL)
+               zfree(&src->histograms);
 
-       src->sizeof_sym_hist = sizeof_sym_hist;
-       src->nr_histograms   = nr_hists;
-       src->histograms      = calloc(nr_hists, sizeof_sym_hist) ;
        return src->histograms ? 0 : -1;
 }
 
@@ -910,7 +918,8 @@ void symbol__annotate_zero_histograms(struct symbol *sym)
        annotation__lock(notes);
        if (notes->src != NULL) {
                memset(notes->src->histograms, 0,
-                      notes->src->nr_histograms * notes->src->sizeof_sym_hist);
+                      notes->src->nr_histograms * sizeof(*notes->src->histograms));
+               hashmap__clear(notes->src->samples);
        }
        if (notes->branch && notes->branch->cycles_hist) {
                memset(notes->branch->cycles_hist, 0,
@@ -974,8 +983,10 @@ static int __symbol__inc_addr_samples(struct map_symbol *ms,
                                      struct perf_sample *sample)
 {
        struct symbol *sym = ms->sym;
-       unsigned offset;
+       long hash_key;
+       u64 offset;
        struct sym_hist *h;
+       struct sym_hist_entry *entry;
 
        pr_debug3("%s: addr=%#" PRIx64 "\n", __func__, map__unmap_ip(ms->map, addr));
 
@@ -993,15 +1004,26 @@ static int __symbol__inc_addr_samples(struct map_symbol *ms,
                         __func__, __LINE__, sym->name, sym->start, addr, sym->end, sym->type == STT_FUNC);
                return -ENOMEM;
        }
+
+       hash_key = offset << 16 | evidx;
+       if (!hashmap__find(src->samples, hash_key, &entry)) {
+               entry = zalloc(sizeof(*entry));
+               if (entry == NULL)
+                       return -ENOMEM;
+
+               if (hashmap__add(src->samples, hash_key, entry) < 0)
+                       return -ENOMEM;
+       }
+
        h->nr_samples++;
-       h->addr[offset].nr_samples++;
        h->period += sample->period;
-       h->addr[offset].period += sample->period;
+       entry->nr_samples++;
+       entry->period += sample->period;
 
        pr_debug3("%#" PRIx64 " %s: period++ [addr: %#" PRIx64 ", %#" PRIx64
                  ", evidx=%d] => nr_samples: %" PRIu64 ", period: %" PRIu64 "\n",
                  sym->start, sym->name, addr, addr - sym->start, evidx,
-                 h->addr[offset].nr_samples, h->addr[offset].period);
+                 entry->nr_samples, entry->period);
        return 0;
 }
 
@@ -1047,8 +1069,7 @@ struct annotated_source *symbol__hists(struct symbol *sym, int nr_hists)
 
        if (notes->src->histograms == NULL) {
 alloc_histograms:
-               annotated_source__alloc_histograms(notes->src, symbol__size(sym),
-                                                  nr_hists);
+               annotated_source__alloc_histograms(notes->src, nr_hists);
        }
 
        return notes->src;
@@ -2321,17 +2342,25 @@ out_remove_tmp:
        return err;
 }
 
-static void calc_percent(struct sym_hist *sym_hist,
-                        struct hists *hists,
+static void calc_percent(struct annotation *notes,
+                        struct evsel *evsel,
                         struct annotation_data *data,
                         s64 offset, s64 end)
 {
+       struct hists *hists = evsel__hists(evsel);
+       int evidx = evsel->core.idx;
+       struct sym_hist *sym_hist = annotation__histogram(notes, evidx);
        unsigned int hits = 0;
        u64 period = 0;
 
        while (offset < end) {
-               hits   += sym_hist->addr[offset].nr_samples;
-               period += sym_hist->addr[offset].period;
+               struct sym_hist_entry *entry;
+
+               entry = annotated_source__hist_entry(notes->src, evidx, offset);
+               if (entry) {
+                       hits   += entry->nr_samples;
+                       period += entry->period;
+               }
                ++offset;
        }
 
@@ -2368,16 +2397,13 @@ static void annotation__calc_percent(struct annotation *notes,
                end  = next ? next->offset : len;
 
                for_each_group_evsel(evsel, leader) {
-                       struct hists *hists = evsel__hists(evsel);
                        struct annotation_data *data;
-                       struct sym_hist *sym_hist;
 
                        BUG_ON(i >= al->data_nr);
 
-                       sym_hist = annotation__histogram(notes, evsel->core.idx);
                        data = &al->data[i++];
 
-                       calc_percent(sym_hist, hists, data, al->offset, end);
+                       calc_percent(notes, evsel, data, al->offset, end);
                }
        }
 }
@@ -2572,14 +2598,19 @@ static void print_summary(struct rb_root *root, const char *filename)
 
 static void symbol__annotate_hits(struct symbol *sym, struct evsel *evsel)
 {
+       int evidx = evsel->core.idx;
        struct annotation *notes = symbol__annotation(sym);
-       struct sym_hist *h = annotation__histogram(notes, evsel->core.idx);
+       struct sym_hist *h = annotation__histogram(notes, evidx);
        u64 len = symbol__size(sym), offset;
 
-       for (offset = 0; offset < len; ++offset)
-               if (h->addr[offset].nr_samples != 0)
+       for (offset = 0; offset < len; ++offset) {
+               struct sym_hist_entry *entry;
+
+               entry = annotated_source__hist_entry(notes->src, evidx, offset);
+               if (entry && entry->nr_samples != 0)
                        printf("%*" PRIx64 ": %" PRIu64 "\n", BITS_PER_LONG / 2,
-                              sym->start + offset, h->addr[offset].nr_samples);
+                              sym->start + offset, entry->nr_samples);
+       }
        printf("%*s: %" PRIu64 "\n", BITS_PER_LONG / 2, "h->nr_samples", h->nr_samples);
 }
 
@@ -2797,7 +2828,7 @@ void symbol__annotate_zero_histogram(struct symbol *sym, int evidx)
        struct annotation *notes = symbol__annotation(sym);
        struct sym_hist *h = annotation__histogram(notes, evidx);
 
-       memset(h, 0, notes->src->sizeof_sym_hist);
+       memset(h, 0, sizeof(*notes->src->histograms) * notes->src->nr_histograms);
 }
 
 void symbol__annotate_decay_histogram(struct symbol *sym, int evidx)
@@ -2808,8 +2839,14 @@ void symbol__annotate_decay_histogram(struct symbol *sym, int evidx)
 
        h->nr_samples = 0;
        for (offset = 0; offset < len; ++offset) {
-               h->addr[offset].nr_samples = h->addr[offset].nr_samples * 7 / 8;
-               h->nr_samples += h->addr[offset].nr_samples;
+               struct sym_hist_entry *entry;
+
+               entry = annotated_source__hist_entry(notes->src, evidx, offset);
+               if (entry == NULL)
+                       continue;
+
+               entry->nr_samples = entry->nr_samples * 7 / 8;
+               h->nr_samples += entry->nr_samples;
        }
 }
 
@@ -3563,8 +3600,22 @@ static int extract_reg_offset(struct arch *arch, const char *str,
        if (regname == NULL)
                return -1;
 
-       op_loc->reg = get_dwarf_regnum(regname, 0);
+       op_loc->reg1 = get_dwarf_regnum(regname, 0);
        free(regname);
+
+       /* Get the second register */
+       if (op_loc->multi_regs) {
+               p = strchr(p + 1, arch->objdump.register_char);
+               if (p == NULL)
+                       return -1;
+
+               regname = strdup(p);
+               if (regname == NULL)
+                       return -1;
+
+               op_loc->reg2 = get_dwarf_regnum(regname, 0);
+               free(regname);
+       }
        return 0;
 }
 
@@ -3577,14 +3628,20 @@ static int extract_reg_offset(struct arch *arch, const char *str,
  * Get detailed location info (register and offset) in the instruction.
  * It needs both source and target operand and whether it accesses a
  * memory location.  The offset field is meaningful only when the
- * corresponding mem flag is set.
+ * corresponding mem flag is set.  The reg2 field is meaningful only
+ * when multi_regs flag is set.
  *
  * Some examples on x86:
  *
- *   mov  (%rax), %rcx   # src_reg = rax, src_mem = 1, src_offset = 0
- *                       # dst_reg = rcx, dst_mem = 0
+ *   mov  (%rax), %rcx   # src_reg1 = rax, src_mem = 1, src_offset = 0
+ *                       # dst_reg1 = rcx, dst_mem = 0
+ *
+ *   mov  0x18, %r8      # src_reg1 = -1, src_mem = 0
+ *                       # dst_reg1 = r8, dst_mem = 0
  *
- *   mov  0x18, %r8      # src_reg = -1, dst_reg = r8
+ *   mov  %rsi, 8(%rbx,%rcx,4)  # src_reg1 = rsi, src_mem = 0, dst_multi_regs = 0
+ *                              # dst_reg1 = rbx, dst_reg2 = rcx, dst_mem = 1
+ *                              # dst_multi_regs = 1, dst_offset = 8
  */
 int annotate_get_insn_location(struct arch *arch, struct disasm_line *dl,
                               struct annotated_insn_loc *loc)
@@ -3605,24 +3662,29 @@ int annotate_get_insn_location(struct arch *arch, struct disasm_line *dl,
 
        for_each_insn_op_loc(loc, i, op_loc) {
                const char *insn_str = ops->source.raw;
+               bool multi_regs = ops->source.multi_regs;
 
-               if (i == INSN_OP_TARGET)
+               if (i == INSN_OP_TARGET) {
                        insn_str = ops->target.raw;
+                       multi_regs = ops->target.multi_regs;
+               }
 
                /* Invalidate the register by default */
-               op_loc->reg = -1;
+               op_loc->reg1 = -1;
+               op_loc->reg2 = -1;
 
                if (insn_str == NULL)
                        continue;
 
                if (strchr(insn_str, arch->objdump.memory_ref_char)) {
                        op_loc->mem_ref = true;
+                       op_loc->multi_regs = multi_regs;
                        extract_reg_offset(arch, insn_str, op_loc);
                } else {
                        char *s = strdup(insn_str);
 
                        if (s) {
-                               op_loc->reg = get_dwarf_regnum(s, 0);
+                               op_loc->reg1 = get_dwarf_regnum(s, 0);
                                free(s);
                        }
                }
@@ -3660,8 +3722,17 @@ static struct disasm_line *find_disasm_line(struct symbol *sym, u64 ip)
        notes = symbol__annotation(sym);
 
        list_for_each_entry(dl, &notes->src->source, al.node) {
-               if (sym->start + dl->al.offset == ip)
+               if (sym->start + dl->al.offset == ip) {
+                       /*
+                        * llvm-objdump places "lock" in a separate line and
+                        * in that case, we want to get the next line.
+                        */
+                       if (!strcmp(dl->ins.name, "lock") && *dl->ops.raw == '\0') {
+                               ip++;
+                               continue;
+                       }
                        return dl;
+               }
        }
        return NULL;
 }
@@ -3690,6 +3761,42 @@ static struct annotated_item_stat *annotate_data_stat(struct list_head *head,
        return istat;
 }
 
+static bool is_stack_operation(struct arch *arch, struct disasm_line *dl)
+{
+       if (arch__is(arch, "x86")) {
+               if (!strncmp(dl->ins.name, "push", 4) ||
+                   !strncmp(dl->ins.name, "pop", 3) ||
+                   !strncmp(dl->ins.name, "ret", 3))
+                       return true;
+       }
+
+       return false;
+}
+
+u64 annotate_calc_pcrel(struct map_symbol *ms, u64 ip, int offset,
+                       struct disasm_line *dl)
+{
+       struct annotation *notes;
+       struct disasm_line *next;
+       u64 addr;
+
+       notes = symbol__annotation(ms->sym);
+       /*
+        * PC-relative addressing starts from the next instruction address
+        * But the IP is for the current instruction.  Since disasm_line
+        * doesn't have the instruction size, calculate it using the next
+        * disasm_line.  If it's the last one, we can use symbol's end
+        * address directly.
+        */
+       if (&dl->al.node == notes->src->source.prev)
+               addr = ms->sym->end + offset;
+       else {
+               next = list_next_entry(dl, al.node);
+               addr = ip + (next->al.offset - dl->al.offset) + offset;
+       }
+       return map__rip_2objdump(ms->map, addr);
+}
+
 /**
  * hist_entry__get_data_type - find data type for given hist entry
  * @he: hist entry
@@ -3709,7 +3816,9 @@ struct annotated_data_type *hist_entry__get_data_type(struct hist_entry *he)
        struct annotated_op_loc *op_loc;
        struct annotated_data_type *mem_type;
        struct annotated_item_stat *istat;
-       u64 ip = he->ip;
+       u64 ip = he->ip, addr = 0;
+       const char *var_name = NULL;
+       int var_offset;
        int i;
 
        ann_data_stat.total++;
@@ -3742,6 +3851,7 @@ struct annotated_data_type *hist_entry__get_data_type(struct hist_entry *he)
                return NULL;
        }
 
+retry:
        istat = annotate_data_stat(&ann_insn_stat, dl->ins.name);
        if (istat == NULL) {
                ann_data_stat.no_insn++;
@@ -3754,16 +3864,51 @@ struct annotated_data_type *hist_entry__get_data_type(struct hist_entry *he)
                return NULL;
        }
 
+       if (is_stack_operation(arch, dl)) {
+               istat->good++;
+               he->mem_type_off = 0;
+               return &stackop_type;
+       }
+
        for_each_insn_op_loc(&loc, i, op_loc) {
                if (!op_loc->mem_ref)
                        continue;
 
-               mem_type = find_data_type(ms, ip, op_loc->reg, op_loc->offset);
+               /* Recalculate IP because of LOCK prefix or insn fusion */
+               ip = ms->sym->start + dl->al.offset;
+
+               var_offset = op_loc->offset;
+
+               /* PC-relative addressing */
+               if (op_loc->reg1 == DWARF_REG_PC) {
+                       struct addr_location al;
+                       struct symbol *var;
+                       u64 map_addr;
+
+                       addr = annotate_calc_pcrel(ms, ip, op_loc->offset, dl);
+                       /* Kernel symbols might be relocated */
+                       map_addr = addr + map__reloc(ms->map);
+
+                       addr_location__init(&al);
+                       var = thread__find_symbol_fb(he->thread, he->cpumode,
+                                                    map_addr, &al);
+                       if (var) {
+                               var_name = var->name;
+                               /* Calculate type offset from the start of variable */
+                               var_offset = map_addr - map__unmap_ip(al.map, var->start);
+                       }
+                       addr_location__exit(&al);
+               }
+
+               mem_type = find_data_type(ms, ip, op_loc, addr, var_name);
                if (mem_type)
                        istat->good++;
                else
                        istat->bad++;
 
+               if (mem_type && var_name)
+                       op_loc->offset = var_offset;
+
                if (symbol_conf.annotate_data_sample) {
                        annotated_data_type__update_samples(mem_type, evsel,
                                                            op_loc->offset,
@@ -3774,6 +3919,20 @@ struct annotated_data_type *hist_entry__get_data_type(struct hist_entry *he)
                return mem_type;
        }
 
+       /*
+        * Some instructions can be fused and the actual memory access came
+        * from the previous instruction.
+        */
+       if (dl->al.offset > 0) {
+               struct disasm_line *prev_dl;
+
+               prev_dl = list_prev_entry(dl, al.node);
+               if (ins__is_fused(arch, prev_dl->ins.name, dl->ins.name)) {
+                       dl = prev_dl;
+                       goto retry;
+               }
+       }
+
        ann_data_stat.no_mem_ops++;
        istat->bad++;
        return NULL;
index dba50762c6e807198880909a7a058e78bc7f9e21..13cc659e508c7958618d92f7d2819c2191a11963 100644 (file)
@@ -12,6 +12,7 @@
 #include "symbol_conf.h"
 #include "mutex.h"
 #include "spark.h"
+#include "hashmap.h"
 
 struct hist_browser_timer;
 struct hist_entry;
@@ -238,12 +239,42 @@ int disasm_line__scnprintf(struct disasm_line *dl, char *bf, size_t size, bool r
 size_t disasm__fprintf(struct list_head *head, FILE *fp);
 void symbol__calc_percent(struct symbol *sym, struct evsel *evsel);
 
+/**
+ * struct sym_hist - symbol histogram information for an event
+ *
+ * @nr_samples: Total number of samples.
+ * @period: Sum of sample periods.
+ */
 struct sym_hist {
        u64                   nr_samples;
        u64                   period;
-       struct sym_hist_entry addr[];
 };
 
+/**
+ * struct cyc_hist - (CPU) cycle histogram for a basic block
+ *
+ * @start: Start address of current block (if known).
+ * @cycles: Sum of cycles for the longest basic block.
+ * @cycles_aggr: Total cycles for this address.
+ * @cycles_max: Max cycles for this address.
+ * @cycles_min: Min cycles for this address.
+ * @cycles_spark: History of cycles for the longest basic block.
+ * @num: Number of samples for the longest basic block.
+ * @num_aggr: Total number of samples for this address.
+ * @have_start: Whether the current branch info has a start address.
+ * @reset: Number of resets due to a different start address.
+ *
+ * If sample has branch_stack and cycles info, it can construct basic blocks
+ * between two adjacent branches.  It'd have start and end addresses but
+ * sometimes the start address may not be available.  So the cycles are
+ * accounted at the end address.  If multiple basic blocks end at the same
+ * address, it will take the longest one.
+ *
+ * The @start, @cycles, @cycles_spark and @num fields are used for the longest
+ * block only.  Other fields are used for all cases.
+ *
+ * See __symbol__account_cycles().
+ */
 struct cyc_hist {
        u64     start;
        u64     cycles;
@@ -258,18 +289,24 @@ struct cyc_hist {
        u16     reset;
 };
 
-/** struct annotated_source - symbols with hits have this attached as in sannotation
+/**
+ * struct annotated_source - symbols with hits have this attached as in annotation
  *
- * @histograms: Array of addr hit histograms per event being monitored
- * nr_histograms: This may not be the same as evsel->evlist->core.nr_entries if
+ * @source: List head for annotated_line (embeded in disasm_line).
+ * @histograms: Array of symbol histograms per event to maintain the total number
+ *             of samples and period.
+ * @nr_histograms: This may not be the same as evsel->evlist->core.nr_entries if
  *               we have more than a group in a evlist, where we will want
  *               to see each group separately, that is why symbol__annotate2()
  *               sets src->nr_histograms to evsel->nr_members.
- * @lines: If 'print_lines' is specified, per source code line percentages
- * @source: source parsed from a disassembler like objdump -dS
- * @cyc_hist: Average cycles per basic block
+ * @offsets: Array of annotation_line to be accessed by offset.
+ * @samples: Hash map of sym_hist_entry.  Keyed by event index and offset in symbol.
+ * @nr_entries: Number of annotated_line in the source list.
+ * @nr_asm_entries: Number of annotated_line with actual asm instruction in the
+ *                 source list.
+ * @max_line_len: Maximum length of objdump output in an annotated_line.
  *
- * lines is allocated, percentages calculated and all sorted by percentage
+ * disasm_lines are allocated, percentages calculated and all sorted by percentage
  * when the annotation is about to be presented, so the percentages are for
  * one of the entries in the histogram array, i.e. for the event/counter being
  * presented. It is deallocated right after symbol__{tui,tty,etc}_annotate
@@ -277,15 +314,33 @@ struct cyc_hist {
  */
 struct annotated_source {
        struct list_head        source;
-       size_t                  sizeof_sym_hist;
        struct sym_hist         *histograms;
        struct annotation_line  **offsets;
+       struct hashmap          *samples;
        int                     nr_histograms;
        int                     nr_entries;
        int                     nr_asm_entries;
        u16                     max_line_len;
 };
 
+/**
+ * struct annotated_branch - basic block and IPC information for a symbol.
+ *
+ * @hit_cycles: Total executed cycles.
+ * @hit_insn: Total number of instructions executed.
+ * @total_insn: Number of instructions in the function.
+ * @cover_insn: Number of distinct, actually executed instructions.
+ * @cycles_hist: Array of cyc_hist for each instruction.
+ * @max_coverage: Maximum number of covered basic block (used for block-range).
+ *
+ * This struct is used by two different codes when the sample has branch stack
+ * and cycles information.  annotation__compute_ipc() calculates average IPC
+ * using @hit_insn / @hit_cycles.  The actual coverage can be calculated using
+ * @cover_insn / @total_insn.  The @cycles_hist can give IPC for each (longest)
+ * basic block ends at the given address.
+ * process_basic_block() calculates coverage of instructions (or basic blocks)
+ * in the function.
+ */
 struct annotated_branch {
        u64                     hit_cycles;
        u64                     hit_insn;
@@ -346,7 +401,7 @@ void annotation__toggle_full_addr(struct annotation *notes, struct map_symbol *m
 
 static inline struct sym_hist *annotated_source__histogram(struct annotated_source *src, int idx)
 {
-       return ((void *)src->histograms) + (src->sizeof_sym_hist * idx);
+       return &src->histograms[idx];
 }
 
 static inline struct sym_hist *annotation__histogram(struct annotation *notes, int idx)
@@ -354,6 +409,17 @@ static inline struct sym_hist *annotation__histogram(struct annotation *notes, i
        return annotated_source__histogram(notes->src, idx);
 }
 
+static inline struct sym_hist_entry *
+annotated_source__hist_entry(struct annotated_source *src, int idx, u64 offset)
+{
+       struct sym_hist_entry *entry;
+       long key = offset << 16 | idx;
+
+       if (!hashmap__find(src->samples, key, &entry))
+               return NULL;
+       return entry;
+}
+
 static inline struct annotation *symbol__annotation(struct symbol *sym)
 {
        return (void *)sym - symbol_conf.priv_size;
@@ -442,14 +508,18 @@ int annotate_check_args(void);
 
 /**
  * struct annotated_op_loc - Location info of instruction operand
- * @reg: Register in the operand
+ * @reg1: First register in the operand
+ * @reg2: Second register in the operand
  * @offset: Memory access offset in the operand
  * @mem_ref: Whether the operand accesses memory
+ * @multi_regs: Whether the second register is used
  */
 struct annotated_op_loc {
-       int reg;
+       int reg1;
+       int reg2;
        int offset;
        bool mem_ref;
+       bool multi_regs;
 };
 
 enum annotated_insn_ops {
@@ -487,4 +557,8 @@ struct annotated_item_stat {
 };
 extern struct list_head ann_insn_stat;
 
+/* Calculate PC-relative address */
+u64 annotate_calc_pcrel(struct map_symbol *ms, u64 ip, int offset,
+                       struct disasm_line *dl);
+
 #endif /* __PERF_ANNOTATE_H */
index 3573e0b7ef3eda83ba635868f303bfd30df7153e..83709146a48ac8f0b1cdada5e5cd1199096e9559 100644 (file)
@@ -63,6 +63,7 @@ static int machine__process_bpf_event_load(struct machine *machine,
                        dso->bpf_prog.id = id;
                        dso->bpf_prog.sub_id = i;
                        dso->bpf_prog.env = env;
+                       map__put(map);
                }
        }
        return 0;
index 31ff19afc20c1b857a4397185926007dacb75e71..b4cb3fe5cc25486560506625b3ba4d24bab1baae 100644 (file)
@@ -179,6 +179,123 @@ int lock_contention_prepare(struct lock_contention *con)
        return 0;
 }
 
+/*
+ * Run the BPF program directly using BPF_PROG_TEST_RUN to update the end
+ * timestamp in ktime so that it can calculate delta easily.
+ */
+static void mark_end_timestamp(void)
+{
+       DECLARE_LIBBPF_OPTS(bpf_test_run_opts, opts,
+               .flags = BPF_F_TEST_RUN_ON_CPU,
+       );
+       int prog_fd = bpf_program__fd(skel->progs.end_timestamp);
+
+       bpf_prog_test_run_opts(prog_fd, &opts);
+}
+
+static void update_lock_stat(int map_fd, int pid, u64 end_ts,
+                            enum lock_aggr_mode aggr_mode,
+                            struct tstamp_data *ts_data)
+{
+       u64 delta;
+       struct contention_key stat_key = {};
+       struct contention_data stat_data;
+
+       if (ts_data->timestamp >= end_ts)
+               return;
+
+       delta = end_ts - ts_data->timestamp;
+
+       switch (aggr_mode) {
+       case LOCK_AGGR_CALLER:
+               stat_key.stack_id = ts_data->stack_id;
+               break;
+       case LOCK_AGGR_TASK:
+               stat_key.pid = pid;
+               break;
+       case LOCK_AGGR_ADDR:
+               stat_key.lock_addr_or_cgroup = ts_data->lock;
+               break;
+       case LOCK_AGGR_CGROUP:
+               /* TODO */
+               return;
+       default:
+               return;
+       }
+
+       if (bpf_map_lookup_elem(map_fd, &stat_key, &stat_data) < 0)
+               return;
+
+       stat_data.total_time += delta;
+       stat_data.count++;
+
+       if (delta > stat_data.max_time)
+               stat_data.max_time = delta;
+       if (delta < stat_data.min_time)
+               stat_data.min_time = delta;
+
+       bpf_map_update_elem(map_fd, &stat_key, &stat_data, BPF_EXIST);
+}
+
+/*
+ * Account entries in the tstamp map (which didn't see the corresponding
+ * lock:contention_end tracepoint) using end_ts.
+ */
+static void account_end_timestamp(struct lock_contention *con)
+{
+       int ts_fd, stat_fd;
+       int *prev_key, key;
+       u64 end_ts = skel->bss->end_ts;
+       int total_cpus;
+       enum lock_aggr_mode aggr_mode = con->aggr_mode;
+       struct tstamp_data ts_data, *cpu_data;
+
+       /* Iterate per-task tstamp map (key = TID) */
+       ts_fd = bpf_map__fd(skel->maps.tstamp);
+       stat_fd = bpf_map__fd(skel->maps.lock_stat);
+
+       prev_key = NULL;
+       while (!bpf_map_get_next_key(ts_fd, prev_key, &key)) {
+               if (bpf_map_lookup_elem(ts_fd, &key, &ts_data) == 0) {
+                       int pid = key;
+
+                       if (aggr_mode == LOCK_AGGR_TASK && con->owner)
+                               pid = ts_data.flags;
+
+                       update_lock_stat(stat_fd, pid, end_ts, aggr_mode,
+                                        &ts_data);
+               }
+
+               prev_key = &key;
+       }
+
+       /* Now it'll check per-cpu tstamp map which doesn't have TID. */
+       if (aggr_mode == LOCK_AGGR_TASK || aggr_mode == LOCK_AGGR_CGROUP)
+               return;
+
+       total_cpus = cpu__max_cpu().cpu;
+       ts_fd = bpf_map__fd(skel->maps.tstamp_cpu);
+
+       cpu_data = calloc(total_cpus, sizeof(*cpu_data));
+       if (cpu_data == NULL)
+               return;
+
+       prev_key = NULL;
+       while (!bpf_map_get_next_key(ts_fd, prev_key, &key)) {
+               if (bpf_map_lookup_elem(ts_fd, &key, cpu_data) < 0)
+                       goto next;
+
+               for (int i = 0; i < total_cpus; i++) {
+                       update_lock_stat(stat_fd, -1, end_ts, aggr_mode,
+                                        &cpu_data[i]);
+               }
+
+next:
+               prev_key = &key;
+       }
+       free(cpu_data);
+}
+
 int lock_contention_start(void)
 {
        skel->bss->enabled = 1;
@@ -188,6 +305,7 @@ int lock_contention_start(void)
 int lock_contention_stop(void)
 {
        skel->bss->enabled = 0;
+       mark_end_timestamp();
        return 0;
 }
 
@@ -210,7 +328,7 @@ static const char *lock_contention_get_name(struct lock_contention *con,
 
                /* do not update idle comm which contains CPU number */
                if (pid) {
-                       struct thread *t = __machine__findnew_thread(machine, /*pid=*/-1, pid);
+                       struct thread *t = machine__findnew_thread(machine, /*pid=*/-1, pid);
 
                        if (t == NULL)
                                return name;
@@ -301,8 +419,10 @@ int lock_contention_read(struct lock_contention *con)
        if (stack_trace == NULL)
                return -1;
 
+       account_end_timestamp(con);
+
        if (con->aggr_mode == LOCK_AGGR_TASK) {
-               struct thread *idle = __machine__findnew_thread(machine,
+               struct thread *idle = machine__findnew_thread(machine,
                                                                /*pid=*/0,
                                                                /*tid=*/0);
                thread__set_comm(idle, "swapper", /*timestamp=*/0);
index 52c270330ae0d2f362aad1fa71f5ad5cc3c0b824..2872f9bc07850bd7ed34336d61c38fa7c485e84b 100644 (file)
@@ -6,7 +6,7 @@
  * payload expected by the 'perf trace' beautifiers.
  */
 
-#include <linux/bpf.h>
+#include "vmlinux.h"
 #include <bpf/bpf_helpers.h>
 #include <linux/limits.h>
 
 
 #define MAX_CPUS  4096
 
-// FIXME: These should come from system headers
-#ifndef bool
-typedef char bool;
-#endif
-typedef int pid_t;
-typedef long long int __s64;
-typedef __s64 time64_t;
-
-struct timespec64 {
-       time64_t        tv_sec;
-       long int        tv_nsec;
-};
-
 /* bpf-output associated map */
 struct __augmented_syscalls__ {
        __uint(type, BPF_MAP_TYPE_PERF_EVENT_ARRAY);
index 95cd8414f6ef85042885f9404097685dc940dc61..fb54bd38e7d0947cd1e1acef59977d46b0082028 100644 (file)
 #define LCB_F_PERCPU   (1U << 4)
 #define LCB_F_MUTEX    (1U << 5)
 
-struct tstamp_data {
-       __u64 timestamp;
-       __u64 lock;
-       __u32 flags;
-       __s32 stack_id;
-};
-
 /* callstack storage  */
 struct {
        __uint(type, BPF_MAP_TYPE_STACK_TRACE);
@@ -140,6 +133,8 @@ int perf_subsys_id = -1;
 /* determine the key of lock stat */
 int aggr_mode;
 
+__u64 end_ts;
+
 /* error stat */
 int task_fail;
 int stack_fail;
@@ -559,4 +554,11 @@ int BPF_PROG(collect_lock_syms)
        return 0;
 }
 
+SEC("raw_tp/bpf_test_finish")
+int BPF_PROG(end_timestamp)
+{
+       end_ts = bpf_ktime_get_ns();
+       return 0;
+}
+
 char LICENSE[] SEC("license") = "Dual BSD/GPL";
index 08482daf61beef1e2c55df5756a64464cddb8005..36af11faad03c19a9a198c99e0620d6d93f77de5 100644 (file)
@@ -3,6 +3,13 @@
 #ifndef UTIL_BPF_SKEL_LOCK_DATA_H
 #define UTIL_BPF_SKEL_LOCK_DATA_H
 
+struct tstamp_data {
+       u64 timestamp;
+       u64 lock;
+       u32 flags;
+       u32 stack_id;
+};
+
 struct contention_key {
        u32 stack_id;
        u32 pid;
index ab84a6e1da5eedb3f6877c0f83212cba3c55c1e4..e9028235d7717b59dcab75014dba7a6c16022760 100644 (file)
@@ -20,6 +20,13 @@ typedef __s64 s64;
 
 typedef int pid_t;
 
+typedef __s64 time64_t;
+
+struct timespec64 {
+        time64_t        tv_sec;
+        long int        tv_nsec;
+};
+
 enum cgroup_subsys_id {
        perf_event_cgrp_id  = 8,
 };
index 8262f69118dbbd6ee13ad32855886a49d23f4ab6..7517d16c02ec976adb3387f5a75bac71bf812934 100644 (file)
@@ -1157,7 +1157,7 @@ int fill_callchain_info(struct addr_location *al, struct callchain_cursor_node *
                if (al->map == NULL)
                        goto out;
        }
-       if (RC_CHK_EQUAL(al->maps, machine__kernel_maps(machine))) {
+       if (maps__equal(al->maps, machine__kernel_maps(machine))) {
                if (machine__is_host(machine)) {
                        al->cpumode = PERF_RECORD_MISC_KERNEL;
                        al->level = 'k';
index 0581ee0fa5f270b4eb6fa4ae5776afb056804099..356e30c42cd838301809b05aebe49083ae547a67 100644 (file)
@@ -222,6 +222,8 @@ static int aggr_cpu_id__cmp(const void *a_pointer, const void *b_pointer)
                return a->socket - b->socket;
        else if (a->die != b->die)
                return a->die - b->die;
+       else if (a->cluster != b->cluster)
+               return a->cluster - b->cluster;
        else if (a->cache_lvl != b->cache_lvl)
                return a->cache_lvl - b->cache_lvl;
        else if (a->cache != b->cache)
@@ -309,6 +311,30 @@ struct aggr_cpu_id aggr_cpu_id__die(struct perf_cpu cpu, void *data)
        return id;
 }
 
+int cpu__get_cluster_id(struct perf_cpu cpu)
+{
+       int value, ret = cpu__get_topology_int(cpu.cpu, "cluster_id", &value);
+
+       return ret ?: value;
+}
+
+struct aggr_cpu_id aggr_cpu_id__cluster(struct perf_cpu cpu, void *data)
+{
+       int cluster = cpu__get_cluster_id(cpu);
+       struct aggr_cpu_id id;
+
+       /* There is no cluster_id on legacy system. */
+       if (cluster == -1)
+               cluster = 0;
+
+       id = aggr_cpu_id__die(cpu, data);
+       if (aggr_cpu_id__is_empty(&id))
+               return id;
+
+       id.cluster = cluster;
+       return id;
+}
+
 int cpu__get_core_id(struct perf_cpu cpu)
 {
        int value, ret = cpu__get_topology_int(cpu.cpu, "core_id", &value);
@@ -320,8 +346,8 @@ struct aggr_cpu_id aggr_cpu_id__core(struct perf_cpu cpu, void *data)
        struct aggr_cpu_id id;
        int core = cpu__get_core_id(cpu);
 
-       /* aggr_cpu_id__die returns a struct with socket and die set. */
-       id = aggr_cpu_id__die(cpu, data);
+       /* aggr_cpu_id__die returns a struct with socket die, and cluster set. */
+       id = aggr_cpu_id__cluster(cpu, data);
        if (aggr_cpu_id__is_empty(&id))
                return id;
 
@@ -683,6 +709,7 @@ bool aggr_cpu_id__equal(const struct aggr_cpu_id *a, const struct aggr_cpu_id *b
                a->node == b->node &&
                a->socket == b->socket &&
                a->die == b->die &&
+               a->cluster == b->cluster &&
                a->cache_lvl == b->cache_lvl &&
                a->cache == b->cache &&
                a->core == b->core &&
@@ -695,6 +722,7 @@ bool aggr_cpu_id__is_empty(const struct aggr_cpu_id *a)
                a->node == -1 &&
                a->socket == -1 &&
                a->die == -1 &&
+               a->cluster == -1 &&
                a->cache_lvl == -1 &&
                a->cache == -1 &&
                a->core == -1 &&
@@ -708,6 +736,7 @@ struct aggr_cpu_id aggr_cpu_id__empty(void)
                .node = -1,
                .socket = -1,
                .die = -1,
+               .cluster = -1,
                .cache_lvl = -1,
                .cache = -1,
                .core = -1,
index 9df2aeb34d3d421374e29fc8f4b1571676322e10..26cf76c693f5ea3e6861f727126f731f92e33525 100644 (file)
@@ -20,6 +20,8 @@ struct aggr_cpu_id {
        int socket;
        /** The die id as read from /sys/devices/system/cpu/cpuX/topology/die_id. */
        int die;
+       /** The cluster id as read from /sys/devices/system/cpu/cpuX/topology/cluster_id */
+       int cluster;
        /** The cache level as read from /sys/devices/system/cpu/cpuX/cache/indexY/level */
        int cache_lvl;
        /**
@@ -86,6 +88,11 @@ int cpu__get_socket_id(struct perf_cpu cpu);
  * /sys/devices/system/cpu/cpuX/topology/die_id for the given CPU.
  */
 int cpu__get_die_id(struct perf_cpu cpu);
+/**
+ * cpu__get_cluster_id - Returns the cluster id as read from
+ * /sys/devices/system/cpu/cpuX/topology/cluster_id for the given CPU
+ */
+int cpu__get_cluster_id(struct perf_cpu cpu);
 /**
  * cpu__get_core_id - Returns the core id as read from
  * /sys/devices/system/cpu/cpuX/topology/core_id for the given CPU.
@@ -127,9 +134,15 @@ struct aggr_cpu_id aggr_cpu_id__socket(struct perf_cpu cpu, void *data);
  */
 struct aggr_cpu_id aggr_cpu_id__die(struct perf_cpu cpu, void *data);
 /**
- * aggr_cpu_id__core - Create an aggr_cpu_id with the core, die and socket
- * populated with the core, die and socket for cpu. The function signature is
- * compatible with aggr_cpu_id_get_t.
+ * aggr_cpu_id__cluster - Create an aggr_cpu_id with cluster, die and socket
+ * populated with the cluster, die and socket for cpu. The function signature
+ * is compatible with aggr_cpu_id_get_t.
+ */
+struct aggr_cpu_id aggr_cpu_id__cluster(struct perf_cpu cpu, void *data);
+/**
+ * aggr_cpu_id__core - Create an aggr_cpu_id with the core, cluster, die and
+ * socket populated with the core, die and socket for cpu. The function
+ * signature is compatible with aggr_cpu_id_get_t.
  */
 struct aggr_cpu_id aggr_cpu_id__core(struct perf_cpu cpu, void *data);
 /**
index 5bb3c2ba95ca29545f595228a18deb88aba9253c..09d57efd2d9db4142fdea2c4330dc1f716153a1f 100644 (file)
@@ -284,7 +284,9 @@ static void output_headers(struct perf_session *session, struct convert_json *c)
        output_json_key_string(out, true, 2, "os-release", header->env.os_release);
        output_json_key_string(out, true, 2, "arch", header->env.arch);
 
-       output_json_key_string(out, true, 2, "cpu-desc", header->env.cpu_desc);
+       if (header->env.cpu_desc)
+               output_json_key_string(out, true, 2, "cpu-desc", header->env.cpu_desc);
+
        output_json_key_string(out, true, 2, "cpuid", header->env.cpuid);
        output_json_key_format(out, true, 2, "nrcpus-online", "%u", header->env.nr_cpus_online);
        output_json_key_format(out, true, 2, "nrcpus-avail", "%u", header->env.nr_cpus_avail);
index c29d8a382b1963db452f60aba6509d47fd6436aa..08c4bfbd817f2b0b18c8b8d3d37193e671ce69d5 100644 (file)
@@ -413,7 +413,7 @@ ssize_t perf_data_file__write(struct perf_data_file *file,
 }
 
 ssize_t perf_data__write(struct perf_data *data,
-                             void *buf, size_t size)
+                        void *buf, size_t size)
 {
        if (data->use_stdio) {
                if (fwrite(buf, size, 1, data->file.fptr) == 1)
@@ -424,14 +424,12 @@ ssize_t perf_data__write(struct perf_data *data,
 }
 
 int perf_data__switch(struct perf_data *data,
-                          const char *postfix,
-                          size_t pos, bool at_exit,
-                          char **new_filepath)
+                     const char *postfix,
+                     size_t pos, bool at_exit,
+                     char **new_filepath)
 {
        int ret;
 
-       if (check_pipe(data))
-               return -EINVAL;
        if (perf_data__is_read(data))
                return -EINVAL;
 
index effcc195d7e9c0404cb6a90db236a4cf1761c6d7..110f3ebde30fdb4611e9ca813285d87eb857d606 100644 (file)
@@ -80,7 +80,7 @@ int perf_data__open(struct perf_data *data);
 void perf_data__close(struct perf_data *data);
 ssize_t perf_data__read(struct perf_data *data, void *buf, size_t size);
 ssize_t perf_data__write(struct perf_data *data,
-                             void *buf, size_t size);
+                        void *buf, size_t size);
 ssize_t perf_data_file__write(struct perf_data_file *file,
                              void *buf, size_t size);
 /*
@@ -91,8 +91,8 @@ ssize_t perf_data_file__write(struct perf_data_file *file,
  * Return value is fd of new output.
  */
 int perf_data__switch(struct perf_data *data,
-                          const char *postfix,
-                          size_t pos, bool at_exit, char **new_filepath);
+                     const char *postfix,
+                     size_t pos, bool at_exit, char **new_filepath);
 
 int perf_data__create_dir(struct perf_data *data, int nr);
 int perf_data__open_dir(struct perf_data *data);
index e282b4ceb4d25fd560a972acb50689010bd01a85..c39ee0fcb8cf1d158f635b134dfd930d8d0ccca8 100644 (file)
@@ -33,6 +33,7 @@
 #endif
 
 int verbose;
+int debug_kmaps;
 int debug_peo_args;
 bool dump_trace = false, quiet = false;
 int debug_ordered_events;
@@ -229,6 +230,7 @@ static struct sublevel_option debug_opts[] = {
        { .name = "stderr",             .value_ptr = &redirect_to_stderr},
        { .name = "data-convert",       .value_ptr = &debug_data_convert },
        { .name = "perf-event-open",    .value_ptr = &debug_peo_args },
+       { .name = "kmaps",              .value_ptr = &debug_kmaps },
        { .name = NULL, }
 };
 
@@ -267,6 +269,7 @@ int perf_quiet_option(void)
        /* For debug variables that are used as bool types, set to 0. */
        redirect_to_stderr = 0;
        debug_peo_args = 0;
+       debug_kmaps = 0;
 
        return 0;
 }
index de8870980d44abc3f4a52add52affbdaefe11448..35a7a5ae762e6f5be3da0265b6e542c5ec4a8969 100644 (file)
@@ -9,6 +9,7 @@
 #include <linux/compiler.h>
 
 extern int verbose;
+extern int debug_kmaps;
 extern int debug_peo_args;
 extern bool quiet, dump_trace;
 extern int debug_ordered_events;
index 7aa5fee0da1906a073ac9423305bf26deb569761..2791126069b4f8f7ef52816758c2e23c5bb1a246 100644 (file)
@@ -1272,11 +1272,67 @@ struct find_var_data {
        unsigned reg;
        /* Access offset, set for global data */
        int offset;
+       /* True if the current register is the frame base */
+       bool is_fbreg;
 };
 
 /* Max number of registers DW_OP_regN supports */
 #define DWARF_OP_DIRECT_REGS  32
 
+static bool match_var_offset(Dwarf_Die *die_mem, struct find_var_data *data,
+                            u64 addr_offset, u64 addr_type)
+{
+       Dwarf_Die type_die;
+       Dwarf_Word size;
+
+       if (addr_offset == addr_type) {
+               /* Update offset relative to the start of the variable */
+               data->offset = 0;
+               return true;
+       }
+
+       if (die_get_real_type(die_mem, &type_die) == NULL)
+               return false;
+
+       if (dwarf_aggregate_size(&type_die, &size) < 0)
+               return false;
+
+       if (addr_offset >= addr_type + size)
+               return false;
+
+       /* Update offset relative to the start of the variable */
+       data->offset = addr_offset - addr_type;
+       return true;
+}
+
+static bool check_allowed_ops(Dwarf_Op *ops, size_t nops)
+{
+       /* The first op is checked separately */
+       ops++;
+       nops--;
+
+       /*
+        * It needs to make sure if the location expression matches to the given
+        * register and offset exactly.  Thus it rejects any complex expressions
+        * and only allows a few of selected operators that doesn't change the
+        * location.
+        */
+       while (nops) {
+               switch (ops->atom) {
+               case DW_OP_stack_value:
+               case DW_OP_deref_size:
+               case DW_OP_deref:
+               case DW_OP_piece:
+                       break;
+               default:
+                       return false;
+               }
+               ops++;
+               nops--;
+       }
+       return true;
+}
+
 /* Only checks direct child DIEs in the given scope. */
 static int __die_find_var_reg_cb(Dwarf_Die *die_mem, void *arg)
 {
@@ -1301,13 +1357,35 @@ static int __die_find_var_reg_cb(Dwarf_Die *die_mem, void *arg)
                if (start > data->pc)
                        break;
 
+               /* Local variables accessed using frame base register */
+               if (data->is_fbreg && ops->atom == DW_OP_fbreg &&
+                   data->offset >= (int)ops->number &&
+                   check_allowed_ops(ops, nops) &&
+                   match_var_offset(die_mem, data, data->offset, ops->number))
+                       return DIE_FIND_CB_END;
+
                /* Only match with a simple case */
                if (data->reg < DWARF_OP_DIRECT_REGS) {
-                       if (ops->atom == (DW_OP_reg0 + data->reg) && nops == 1)
+                       /* pointer variables saved in a register 0 to 31 */
+                       if (ops->atom == (DW_OP_reg0 + data->reg) &&
+                           check_allowed_ops(ops, nops))
+                               return DIE_FIND_CB_END;
+
+                       /* Local variables accessed by a register + offset */
+                       if (ops->atom == (DW_OP_breg0 + data->reg) &&
+                           check_allowed_ops(ops, nops) &&
+                           match_var_offset(die_mem, data, data->offset, ops->number))
                                return DIE_FIND_CB_END;
                } else {
+                       /* pointer variables saved in a register 32 or above */
                        if (ops->atom == DW_OP_regx && ops->number == data->reg &&
-                           nops == 1)
+                           check_allowed_ops(ops, nops))
+                               return DIE_FIND_CB_END;
+
+                       /* Local variables accessed by a register + offset */
+                       if (ops->atom == DW_OP_bregx && data->reg == ops->number &&
+                           check_allowed_ops(ops, nops) &&
+                           match_var_offset(die_mem, data, data->offset, ops->number2))
                                return DIE_FIND_CB_END;
                }
        }
@@ -1319,18 +1397,29 @@ static int __die_find_var_reg_cb(Dwarf_Die *die_mem, void *arg)
  * @sc_die: a scope DIE
  * @pc: the program address to find
  * @reg: the register number to find
+ * @poffset: pointer to offset, will be updated for fbreg case
+ * @is_fbreg: boolean value if the current register is the frame base
  * @die_mem: a buffer to save the resulting DIE
  *
- * Find the variable DIE accessed by the given register.
+ * Find the variable DIE accessed by the given register.  It'll update the @offset
+ * when the variable is in the stack.
  */
 Dwarf_Die *die_find_variable_by_reg(Dwarf_Die *sc_die, Dwarf_Addr pc, int reg,
+                                   int *poffset, bool is_fbreg,
                                    Dwarf_Die *die_mem)
 {
        struct find_var_data data = {
                .pc = pc,
                .reg = reg,
+               .offset = *poffset,
+               .is_fbreg = is_fbreg,
        };
-       return die_find_child(sc_die, __die_find_var_reg_cb, &data, die_mem);
+       Dwarf_Die *result;
+
+       result = die_find_child(sc_die, __die_find_var_reg_cb, &data, die_mem);
+       if (result)
+               *poffset = data.offset;
+       return result;
 }
 
 /* Only checks direct child DIEs in the given scope */
@@ -1341,8 +1430,6 @@ static int __die_find_var_addr_cb(Dwarf_Die *die_mem, void *arg)
        ptrdiff_t off = 0;
        Dwarf_Attribute attr;
        Dwarf_Addr base, start, end;
-       Dwarf_Word size;
-       Dwarf_Die type_die;
        Dwarf_Op *ops;
        size_t nops;
 
@@ -1359,24 +1446,9 @@ static int __die_find_var_addr_cb(Dwarf_Die *die_mem, void *arg)
                if (data->addr < ops->number)
                        continue;
 
-               if (data->addr == ops->number) {
-                       /* Update offset relative to the start of the variable */
-                       data->offset = 0;
+               if (check_allowed_ops(ops, nops) &&
+                   match_var_offset(die_mem, data, data->addr, ops->number))
                        return DIE_FIND_CB_END;
-               }
-
-               if (die_get_real_type(die_mem, &type_die) == NULL)
-                       continue;
-
-               if (dwarf_aggregate_size(&type_die, &size) < 0)
-                       continue;
-
-               if (data->addr >= ops->number + size)
-                       continue;
-
-               /* Update offset relative to the start of the variable */
-               data->offset = data->addr - ops->number;
-               return DIE_FIND_CB_END;
        }
        return DIE_FIND_CB_SIBLING;
 }
@@ -1407,7 +1479,74 @@ Dwarf_Die *die_find_variable_by_addr(Dwarf_Die *sc_die, Dwarf_Addr pc,
                *offset = data.offset;
        return result;
 }
-#endif
+#endif /* HAVE_DWARF_GETLOCATIONS_SUPPORT */
+
+#ifdef HAVE_DWARF_CFI_SUPPORT
+static int reg_from_dwarf_op(Dwarf_Op *op)
+{
+       switch (op->atom) {
+       case DW_OP_reg0 ... DW_OP_reg31:
+               return op->atom - DW_OP_reg0;
+       case DW_OP_breg0 ... DW_OP_breg31:
+               return op->atom - DW_OP_breg0;
+       case DW_OP_regx:
+       case DW_OP_bregx:
+               return op->number;
+       default:
+               break;
+       }
+       return -1;
+}
+
+static int offset_from_dwarf_op(Dwarf_Op *op)
+{
+       switch (op->atom) {
+       case DW_OP_reg0 ... DW_OP_reg31:
+       case DW_OP_regx:
+               return 0;
+       case DW_OP_breg0 ... DW_OP_breg31:
+               return op->number;
+       case DW_OP_bregx:
+               return op->number2;
+       default:
+               break;
+       }
+       return -1;
+}
+
+/**
+ * die_get_cfa - Get frame base information
+ * @dwarf: a Dwarf info
+ * @pc: program address
+ * @preg: pointer for saved register
+ * @poffset: pointer for saved offset
+ *
+ * This function gets register and offset for CFA (Canonical Frame Address)
+ * by searching the CIE/FDE info.  The CFA usually points to the start address
+ * of the current stack frame and local variables can be located using an offset
+ * from the CFA.  The @preg and @poffset will be updated if it returns 0.
+ */
+int die_get_cfa(Dwarf *dwarf, u64 pc, int *preg, int *poffset)
+{
+       Dwarf_CFI *cfi;
+       Dwarf_Frame *frame = NULL;
+       Dwarf_Op *ops = NULL;
+       size_t nops;
+
+       cfi = dwarf_getcfi(dwarf);
+       if (cfi == NULL)
+               return -1;
+
+       if (!dwarf_cfi_addrframe(cfi, pc, &frame) &&
+           !dwarf_frame_cfa(frame, &ops, &nops) &&
+           check_allowed_ops(ops, nops)) {
+               *preg = reg_from_dwarf_op(ops);
+               *poffset = offset_from_dwarf_op(ops);
+               return 0;
+       }
+       return -1;
+}
+#endif /* HAVE_DWARF_CFI_SUPPORT */
 
 /*
  * die_has_loclist - Check if DW_AT_location of @vr_die is a location list
index 4e64caac6df83ea5ba292225894206b450d63222..85dd527ae1f70d9ba890456e3e81c200c995c87e 100644 (file)
@@ -142,6 +142,7 @@ int die_get_var_range(Dwarf_Die *sp_die, Dwarf_Die *vr_die, struct strbuf *buf);
 
 /* Find a variable saved in the 'reg' at given address */
 Dwarf_Die *die_find_variable_by_reg(Dwarf_Die *sc_die, Dwarf_Addr pc, int reg,
+                                   int *poffset, bool is_fbreg,
                                    Dwarf_Die *die_mem);
 
 /* Find a (global) variable located in the 'addr' */
@@ -161,6 +162,8 @@ static inline int die_get_var_range(Dwarf_Die *sp_die __maybe_unused,
 static inline Dwarf_Die *die_find_variable_by_reg(Dwarf_Die *sc_die __maybe_unused,
                                                  Dwarf_Addr pc __maybe_unused,
                                                  int reg __maybe_unused,
+                                                 int *poffset __maybe_unused,
+                                                 bool is_fbreg __maybe_unused,
                                                  Dwarf_Die *die_mem __maybe_unused)
 {
        return NULL;
@@ -177,4 +180,19 @@ static inline Dwarf_Die *die_find_variable_by_addr(Dwarf_Die *sc_die __maybe_unu
 
 #endif /* HAVE_DWARF_GETLOCATIONS_SUPPORT */
 
+#ifdef HAVE_DWARF_CFI_SUPPORT
+
+/* Get the frame base information from CFA */
+int die_get_cfa(Dwarf *dwarf, u64 pc, int *preg, int *poffset);
+
+#else /* HAVE_DWARF_CFI_SUPPORT */
+
+static inline int die_get_cfa(Dwarf *dwarf __maybe_unused, u64 pc __maybe_unused,
+                             int *preg __maybe_unused, int *poffset __maybe_unused)
+{
+       return -1;
+}
+
+#endif /* HAVE_DWARF_CFI_SUPPORT */
+
 #endif /* _DWARF_AUX_H */
index 7c527e65c1864b524c8dfc1d844fac1f6e3ee1a7..2a2c37cc40b7828ef8a92624e1d885ed9a326fc9 100644 (file)
@@ -12,6 +12,7 @@ struct perf_cpu_map;
 struct cpu_topology_map {
        int     socket_id;
        int     die_id;
+       int     cluster_id;
        int     core_id;
 };
 
index 68f45e9e63b6e4f8fcdf6476dd0b2f9c3789dd3a..198903157f9e6f591894955a7c39ae4e0750a693 100644 (file)
@@ -511,7 +511,7 @@ size_t perf_event__fprintf_text_poke(union perf_event *event, struct machine *ma
                struct addr_location al;
 
                addr_location__init(&al);
-               al.map = map__get(maps__find(machine__kernel_maps(machine), tp->addr));
+               al.map = maps__find(machine__kernel_maps(machine), tp->addr);
                if (al.map && map__load(al.map) >= 0) {
                        al.addr = map__map_ip(al.map, tp->addr);
                        al.sym = map__find_symbol(al.map, al.addr);
@@ -641,7 +641,7 @@ struct map *thread__find_map(struct thread *thread, u8 cpumode, u64 addr,
                return NULL;
        }
        al->maps = maps__get(maps);
-       al->map = map__get(maps__find(maps, al->addr));
+       al->map = maps__find(maps, al->addr);
        if (al->map != NULL) {
                /*
                 * Kernel maps might be changed when loading symbols so loading
index 6d7c9c58a9bcb8b7ed70e38286026cac16543163..3536404e9447bf65228b944c940eed8a9b127799 100644 (file)
@@ -2363,7 +2363,6 @@ int evsel__parse_sample(struct evsel *evsel, union perf_event *event,
        data->period = evsel->core.attr.sample_period;
        data->cpumode = event->header.misc & PERF_RECORD_MISC_CPUMODE_MASK;
        data->misc    = event->header.misc;
-       data->id = -1ULL;
        data->data_src = PERF_MEM_DATA_SRC_NONE;
        data->vcpu = -1;
 
@@ -2851,6 +2850,39 @@ u64 evsel__intval_common(struct evsel *evsel, struct perf_sample *sample, const
        return field ? format_field__intval(field, sample, evsel->needs_swap) : 0;
 }
 
+char evsel__taskstate(struct evsel *evsel, struct perf_sample *sample, const char *name)
+{
+       static struct tep_format_field *prev_state_field;
+       static const char *states;
+       struct tep_format_field *field;
+       unsigned long long val;
+       unsigned int bit;
+       char state = '?'; /* '?' denotes unknown task state */
+
+       field = evsel__field(evsel, name);
+
+       if (!field)
+               return state;
+
+       if (!states || field != prev_state_field) {
+               states = parse_task_states(field);
+               if (!states)
+                       return state;
+               prev_state_field = field;
+       }
+
+       /*
+        * Note since the kernel exposes TASK_REPORT_MAX to userspace
+        * to denote the 'preempted' state, we might as welll report
+        * 'R' for this case, which make senses to users as well.
+        *
+        * We can change this if we have a good reason in the future.
+        */
+       val = evsel__intval(evsel, sample, name);
+       bit = val ? ffs(val) : 0;
+       state = (!bit || bit > strlen(states)) ? 'R' : states[bit-1];
+       return state;
+}
 #endif
 
 bool evsel__fallback(struct evsel *evsel, struct target *target, int err,
index efbb6e848287f3f6b4f9f0aca779b2a6590ec42f..517cff431de2002eee9cc4f279a9f3f0578cf7da 100644 (file)
@@ -339,6 +339,7 @@ struct perf_sample;
 void *evsel__rawptr(struct evsel *evsel, struct perf_sample *sample, const char *name);
 u64 evsel__intval(struct evsel *evsel, struct perf_sample *sample, const char *name);
 u64 evsel__intval_common(struct evsel *evsel, struct perf_sample *sample, const char *name);
+char evsel__taskstate(struct evsel *evsel, struct perf_sample *sample, const char *name);
 
 static inline char *evsel__strval(struct evsel *evsel, struct perf_sample *sample, const char *name)
 {
index 7be23b3ac082178aed0debbbc842be83f14f7a00..b8875aac8f8709039681dd4bfce26df9acea731c 100644 (file)
@@ -500,7 +500,25 @@ double expr__has_event(const struct expr_parse_ctx *ctx, bool compute_ids, const
        tmp = evlist__new();
        if (!tmp)
                return NAN;
-       ret = parse_event(tmp, id) ? 0 : 1;
+
+       if (strchr(id, '@')) {
+               char *tmp_id, *p;
+
+               tmp_id = strdup(id);
+               if (!tmp_id) {
+                       ret = NAN;
+                       goto out;
+               }
+               p = strchr(tmp_id, '@');
+               *p = '/';
+               p = strrchr(tmp_id, '@');
+               *p = '/';
+               ret = parse_event(tmp, tmp_id) ? 0 : 1;
+               free(tmp_id);
+       } else {
+               ret = parse_event(tmp, id) ? 0 : 1;
+       }
+out:
        evlist__delete(tmp);
        return ret;
 }
index 0feef0726c48cbf4f8aa23d05aa5f5de1fc274de..a2fc43159ee9562c756bcb3545106f8078ff459b 100644 (file)
@@ -94,6 +94,14 @@ static int literal(yyscan_t scanner, const struct expr_scanner_ctx *sctx)
        }
        return LITERAL;
 }
+
+static int nan_value(yyscan_t scanner)
+{
+       YYSTYPE *yylval = expr_get_lval(scanner);
+
+       yylval->num = NAN;
+       return NUMBER;
+}
 %}
 
 number         ([0-9]+\.?[0-9]*|[0-9]*\.?[0-9]+)(e-?[0-9]+)?
@@ -115,6 +123,7 @@ else                { return ELSE; }
 source_count   { return SOURCE_COUNT; }
 has_event      { return HAS_EVENT; }
 strcmp_cpuid_str       { return STRCMP_CPUID_STR; }
+NaN            { return nan_value(yyscanner); }
 {literal}      { return literal(yyscanner, sctx); }
 {number}       { return value(yyscanner); }
 {symbol}       { return str(yyscanner, ID, sctx->runtime); }
index b397a769006f45ac1b7716f4efdb2147c86977ae..527517db31821ffa3d80c349c7433dcdc4f7cdfe 100644 (file)
@@ -43,9 +43,6 @@
 #include <linux/string.h>
 #include <linux/zalloc.h>
 
-static void __machine__remove_thread(struct machine *machine, struct thread_rb_node *nd,
-                                    struct thread *th, bool lock);
-
 static struct dso *machine__kernel_dso(struct machine *machine)
 {
        return map__dso(machine->vmlinux_map);
@@ -58,35 +55,6 @@ static void dsos__init(struct dsos *dsos)
        init_rwsem(&dsos->lock);
 }
 
-static void machine__threads_init(struct machine *machine)
-{
-       int i;
-
-       for (i = 0; i < THREADS__TABLE_SIZE; i++) {
-               struct threads *threads = &machine->threads[i];
-               threads->entries = RB_ROOT_CACHED;
-               init_rwsem(&threads->lock);
-               threads->nr = 0;
-               threads->last_match = NULL;
-       }
-}
-
-static int thread_rb_node__cmp_tid(const void *key, const struct rb_node *nd)
-{
-       int to_find = (int) *((pid_t *)key);
-
-       return to_find - (int)thread__tid(rb_entry(nd, struct thread_rb_node, rb_node)->thread);
-}
-
-static struct thread_rb_node *thread_rb_node__find(const struct thread *th,
-                                                  struct rb_root *tree)
-{
-       pid_t to_find = thread__tid(th);
-       struct rb_node *nd = rb_find(&to_find, tree, thread_rb_node__cmp_tid);
-
-       return rb_entry(nd, struct thread_rb_node, rb_node);
-}
-
 static int machine__set_mmap_name(struct machine *machine)
 {
        if (machine__is_host(machine))
@@ -120,7 +88,7 @@ int machine__init(struct machine *machine, const char *root_dir, pid_t pid)
        RB_CLEAR_NODE(&machine->rb_node);
        dsos__init(&machine->dsos);
 
-       machine__threads_init(machine);
+       threads__init(&machine->threads);
 
        machine->vdso_info = NULL;
        machine->env = NULL;
@@ -221,27 +189,11 @@ static void dsos__exit(struct dsos *dsos)
 
 void machine__delete_threads(struct machine *machine)
 {
-       struct rb_node *nd;
-       int i;
-
-       for (i = 0; i < THREADS__TABLE_SIZE; i++) {
-               struct threads *threads = &machine->threads[i];
-               down_write(&threads->lock);
-               nd = rb_first_cached(&threads->entries);
-               while (nd) {
-                       struct thread_rb_node *trb = rb_entry(nd, struct thread_rb_node, rb_node);
-
-                       nd = rb_next(nd);
-                       __machine__remove_thread(machine, trb, trb->thread, false);
-               }
-               up_write(&threads->lock);
-       }
+       threads__remove_all_threads(&machine->threads);
 }
 
 void machine__exit(struct machine *machine)
 {
-       int i;
-
        if (machine == NULL)
                return;
 
@@ -254,12 +206,7 @@ void machine__exit(struct machine *machine)
        zfree(&machine->current_tid);
        zfree(&machine->kallsyms_filename);
 
-       machine__delete_threads(machine);
-       for (i = 0; i < THREADS__TABLE_SIZE; i++) {
-               struct threads *threads = &machine->threads[i];
-
-               exit_rwsem(&threads->lock);
-       }
+       threads__exit(&machine->threads);
 }
 
 void machine__delete(struct machine *machine)
@@ -440,7 +387,7 @@ static struct thread *findnew_guest_code(struct machine *machine,
                return NULL;
 
        /* Assume maps are set up if there are any */
-       if (maps__nr_maps(thread__maps(thread)))
+       if (!maps__empty(thread__maps(thread)))
                return thread;
 
        host_thread = machine__find_thread(host_machine, -1, pid);
@@ -526,7 +473,7 @@ static void machine__update_thread_pid(struct machine *machine,
        if (thread__pid(th) == thread__tid(th))
                return;
 
-       leader = __machine__findnew_thread(machine, thread__pid(th), thread__pid(th));
+       leader = machine__findnew_thread(machine, thread__pid(th), thread__pid(th));
        if (!leader)
                goto out_err;
 
@@ -560,160 +507,56 @@ out_err:
        goto out_put;
 }
 
-/*
- * Front-end cache - TID lookups come in blocks,
- * so most of the time we dont have to look up
- * the full rbtree:
- */
-static struct thread*
-__threads__get_last_match(struct threads *threads, struct machine *machine,
-                         int pid, int tid)
-{
-       struct thread *th;
-
-       th = threads->last_match;
-       if (th != NULL) {
-               if (thread__tid(th) == tid) {
-                       machine__update_thread_pid(machine, th, pid);
-                       return thread__get(th);
-               }
-               thread__put(threads->last_match);
-               threads->last_match = NULL;
-       }
-
-       return NULL;
-}
-
-static struct thread*
-threads__get_last_match(struct threads *threads, struct machine *machine,
-                       int pid, int tid)
-{
-       struct thread *th = NULL;
-
-       if (perf_singlethreaded)
-               th = __threads__get_last_match(threads, machine, pid, tid);
-
-       return th;
-}
-
-static void
-__threads__set_last_match(struct threads *threads, struct thread *th)
-{
-       thread__put(threads->last_match);
-       threads->last_match = thread__get(th);
-}
-
-static void
-threads__set_last_match(struct threads *threads, struct thread *th)
-{
-       if (perf_singlethreaded)
-               __threads__set_last_match(threads, th);
-}
-
 /*
  * Caller must eventually drop thread->refcnt returned with a successful
  * lookup/new thread inserted.
  */
-static struct thread *____machine__findnew_thread(struct machine *machine,
-                                                 struct threads *threads,
-                                                 pid_t pid, pid_t tid,
-                                                 bool create)
+static struct thread *__machine__findnew_thread(struct machine *machine,
+                                               pid_t pid,
+                                               pid_t tid,
+                                               bool create)
 {
-       struct rb_node **p = &threads->entries.rb_root.rb_node;
-       struct rb_node *parent = NULL;
-       struct thread *th;
-       struct thread_rb_node *nd;
-       bool leftmost = true;
+       struct thread *th = threads__find(&machine->threads, tid);
+       bool created;
 
-       th = threads__get_last_match(threads, machine, pid, tid);
-       if (th)
+       if (th) {
+               machine__update_thread_pid(machine, th, pid);
                return th;
-
-       while (*p != NULL) {
-               parent = *p;
-               th = rb_entry(parent, struct thread_rb_node, rb_node)->thread;
-
-               if (thread__tid(th) == tid) {
-                       threads__set_last_match(threads, th);
-                       machine__update_thread_pid(machine, th, pid);
-                       return thread__get(th);
-               }
-
-               if (tid < thread__tid(th))
-                       p = &(*p)->rb_left;
-               else {
-                       p = &(*p)->rb_right;
-                       leftmost = false;
-               }
        }
-
        if (!create)
                return NULL;
 
-       th = thread__new(pid, tid);
-       if (th == NULL)
-               return NULL;
-
-       nd = malloc(sizeof(*nd));
-       if (nd == NULL) {
-               thread__put(th);
-               return NULL;
-       }
-       nd->thread = th;
-
-       rb_link_node(&nd->rb_node, parent, p);
-       rb_insert_color_cached(&nd->rb_node, &threads->entries, leftmost);
-       /*
-        * We have to initialize maps separately after rb tree is updated.
-        *
-        * The reason is that we call machine__findnew_thread within
-        * thread__init_maps to find the thread leader and that would screwed
-        * the rb tree.
-        */
-       if (thread__init_maps(th, machine)) {
-               pr_err("Thread init failed thread %d\n", pid);
-               rb_erase_cached(&nd->rb_node, &threads->entries);
-               RB_CLEAR_NODE(&nd->rb_node);
-               free(nd);
-               thread__put(th);
-               return NULL;
-       }
-       /*
-        * It is now in the rbtree, get a ref
-        */
-       threads__set_last_match(threads, th);
-       ++threads->nr;
-
-       return thread__get(th);
-}
+       th = threads__findnew(&machine->threads, pid, tid, &created);
+       if (created) {
+               /*
+                * We have to initialize maps separately after rb tree is
+                * updated.
+                *
+                * The reason is that we call machine__findnew_thread within
+                * thread__init_maps to find the thread leader and that would
+                * screwed the rb tree.
+                */
+               if (thread__init_maps(th, machine)) {
+                       pr_err("Thread init failed thread %d\n", pid);
+                       threads__remove(&machine->threads, th);
+                       thread__put(th);
+                       return NULL;
+               }
+       } else
+               machine__update_thread_pid(machine, th, pid);
 
-struct thread *__machine__findnew_thread(struct machine *machine, pid_t pid, pid_t tid)
-{
-       return ____machine__findnew_thread(machine, machine__threads(machine, tid), pid, tid, true);
+       return th;
 }
 
-struct thread *machine__findnew_thread(struct machine *machine, pid_t pid,
-                                      pid_t tid)
+struct thread *machine__findnew_thread(struct machine *machine, pid_t pid, pid_t tid)
 {
-       struct threads *threads = machine__threads(machine, tid);
-       struct thread *th;
-
-       down_write(&threads->lock);
-       th = __machine__findnew_thread(machine, pid, tid);
-       up_write(&threads->lock);
-       return th;
+       return __machine__findnew_thread(machine, pid, tid, /*create=*/true);
 }
 
 struct thread *machine__find_thread(struct machine *machine, pid_t pid,
                                    pid_t tid)
 {
-       struct threads *threads = machine__threads(machine, tid);
-       struct thread *th;
-
-       down_read(&threads->lock);
-       th =  ____machine__findnew_thread(machine, threads, pid, tid, false);
-       up_read(&threads->lock);
-       return th;
+       return __machine__findnew_thread(machine, pid, tid, /*create=*/false);
 }
 
 /*
@@ -896,7 +739,6 @@ static int machine__process_ksymbol_register(struct machine *machine,
        struct symbol *sym;
        struct dso *dso;
        struct map *map = maps__find(machine__kernel_maps(machine), event->ksymbol.addr);
-       bool put_map = false;
        int err = 0;
 
        if (!map) {
@@ -913,12 +755,6 @@ static int machine__process_ksymbol_register(struct machine *machine,
                        err = -ENOMEM;
                        goto out;
                }
-               /*
-                * The inserted map has a get on it, we need to put to release
-                * the reference count here, but do it after all accesses are
-                * done.
-                */
-               put_map = true;
                if (event->ksymbol.ksym_type == PERF_RECORD_KSYMBOL_TYPE_OOL) {
                        dso->binary_type = DSO_BINARY_TYPE__OOL;
                        dso->data.file_size = event->ksymbol.len;
@@ -952,8 +788,7 @@ static int machine__process_ksymbol_register(struct machine *machine,
        }
        dso__insert_symbol(dso, sym);
 out:
-       if (put_map)
-               map__put(map);
+       map__put(map);
        return err;
 }
 
@@ -977,7 +812,7 @@ static int machine__process_ksymbol_unregister(struct machine *machine,
                if (sym)
                        dso__delete_symbol(dso, sym);
        }
-
+       map__put(map);
        return 0;
 }
 
@@ -1005,11 +840,11 @@ int machine__process_text_poke(struct machine *machine, union perf_event *event,
                perf_event__fprintf_text_poke(event, machine, stdout);
 
        if (!event->text_poke.new_len)
-               return 0;
+               goto out;
 
        if (cpumode != PERF_RECORD_MISC_KERNEL) {
                pr_debug("%s: unsupported cpumode - ignoring\n", __func__);
-               return 0;
+               goto out;
        }
 
        if (dso) {
@@ -1032,7 +867,8 @@ int machine__process_text_poke(struct machine *machine, union perf_event *event,
                pr_debug("Failed to find kernel text poke address map for %#" PRI_lx64 "\n",
                         event->text_poke.addr);
        }
-
+out:
+       map__put(map);
        return 0;
 }
 
@@ -1120,29 +956,30 @@ size_t machine__fprintf_vmlinux_path(struct machine *machine, FILE *fp)
        return printed;
 }
 
-size_t machine__fprintf(struct machine *machine, FILE *fp)
-{
-       struct rb_node *nd;
-       size_t ret;
-       int i;
-
-       for (i = 0; i < THREADS__TABLE_SIZE; i++) {
-               struct threads *threads = &machine->threads[i];
-
-               down_read(&threads->lock);
+struct machine_fprintf_cb_args {
+       FILE *fp;
+       size_t printed;
+};
 
-               ret = fprintf(fp, "Threads: %u\n", threads->nr);
+static int machine_fprintf_cb(struct thread *thread, void *data)
+{
+       struct machine_fprintf_cb_args *args = data;
 
-               for (nd = rb_first_cached(&threads->entries); nd;
-                    nd = rb_next(nd)) {
-                       struct thread *pos = rb_entry(nd, struct thread_rb_node, rb_node)->thread;
+       /* TODO: handle fprintf errors. */
+       args->printed += thread__fprintf(thread, args->fp);
+       return 0;
+}
 
-                       ret += thread__fprintf(pos, fp);
-               }
+size_t machine__fprintf(struct machine *machine, FILE *fp)
+{
+       struct machine_fprintf_cb_args args = {
+               .fp = fp,
+               .printed = 0,
+       };
+       size_t ret = fprintf(fp, "Threads: %zu\n", threads__nr(&machine->threads));
 
-               up_read(&threads->lock);
-       }
-       return ret;
+       machine__for_each_thread(machine, machine_fprintf_cb, &args);
+       return ret + args.printed;
 }
 
 static struct dso *machine__get_kernel(struct machine *machine)
@@ -1300,9 +1137,10 @@ static int machine__map_x86_64_entry_trampolines_cb(struct map *map, void *data)
                return 0;
 
        dest_map = maps__find(args->kmaps, map__pgoff(map));
-       if (dest_map != map)
+       if (RC_CHK_ACCESS(dest_map) != RC_CHK_ACCESS(map))
                map__set_pgoff(map, map__map_ip(dest_map, map__pgoff(map)));
 
+       map__put(dest_map);
        args->found = true;
        return 0;
 }
@@ -1543,8 +1381,10 @@ static int maps__set_module_path(struct maps *maps, const char *path, struct kmo
                return 0;
 
        long_name = strdup(path);
-       if (long_name == NULL)
+       if (long_name == NULL) {
+               map__put(map);
                return -ENOMEM;
+       }
 
        dso = map__dso(map);
        dso__set_long_name(dso, long_name, true);
@@ -1558,7 +1398,7 @@ static int maps__set_module_path(struct maps *maps, const char *path, struct kmo
                dso->symtab_type++;
                dso->comp = m->comp;
        }
-
+       map__put(map);
        return 0;
 }
 
@@ -1765,8 +1605,10 @@ int machine__create_kernel_maps(struct machine *machine)
                struct map *next = maps__find_next_entry(machine__kernel_maps(machine),
                                                         machine__kernel_map(machine));
 
-               if (next)
+               if (next) {
                        machine__set_kernel_mmap(machine, start, map__start(next));
+                       map__put(next);
+               }
        }
 
 out_put:
@@ -2060,36 +1902,9 @@ out_problem:
        return 0;
 }
 
-static void __machine__remove_thread(struct machine *machine, struct thread_rb_node *nd,
-                                    struct thread *th, bool lock)
-{
-       struct threads *threads = machine__threads(machine, thread__tid(th));
-
-       if (!nd)
-               nd = thread_rb_node__find(th, &threads->entries.rb_root);
-
-       if (threads->last_match && RC_CHK_EQUAL(threads->last_match, th))
-               threads__set_last_match(threads, NULL);
-
-       if (lock)
-               down_write(&threads->lock);
-
-       BUG_ON(refcount_read(thread__refcnt(th)) == 0);
-
-       thread__put(nd->thread);
-       rb_erase_cached(&nd->rb_node, &threads->entries);
-       RB_CLEAR_NODE(&nd->rb_node);
-       --threads->nr;
-
-       free(nd);
-
-       if (lock)
-               up_write(&threads->lock);
-}
-
 void machine__remove_thread(struct machine *machine, struct thread *th)
 {
-       return __machine__remove_thread(machine, NULL, th, true);
+       return threads__remove(&machine->threads, th);
 }
 
 int machine__process_fork_event(struct machine *machine, union perf_event *event,
@@ -3223,23 +3038,7 @@ int machine__for_each_thread(struct machine *machine,
                             int (*fn)(struct thread *thread, void *p),
                             void *priv)
 {
-       struct threads *threads;
-       struct rb_node *nd;
-       int rc = 0;
-       int i;
-
-       for (i = 0; i < THREADS__TABLE_SIZE; i++) {
-               threads = &machine->threads[i];
-               for (nd = rb_first_cached(&threads->entries); nd;
-                    nd = rb_next(nd)) {
-                       struct thread_rb_node *trb = rb_entry(nd, struct thread_rb_node, rb_node);
-
-                       rc = fn(trb->thread, priv);
-                       if (rc != 0)
-                               return rc;
-               }
-       }
-       return rc;
+       return threads__for_each_thread(&machine->threads, fn, priv);
 }
 
 int machines__for_each_thread(struct machines *machines,
@@ -3263,6 +3062,36 @@ int machines__for_each_thread(struct machines *machines,
        return rc;
 }
 
+
+static int thread_list_cb(struct thread *thread, void *data)
+{
+       struct list_head *list = data;
+       struct thread_list *entry = malloc(sizeof(*entry));
+
+       if (!entry)
+               return -ENOMEM;
+
+       entry->thread = thread__get(thread);
+       list_add_tail(&entry->list, list);
+       return 0;
+}
+
+int machine__thread_list(struct machine *machine, struct list_head *list)
+{
+       return machine__for_each_thread(machine, thread_list_cb, list);
+}
+
+void thread_list__delete(struct list_head *list)
+{
+       struct thread_list *pos, *next;
+
+       list_for_each_entry_safe(pos, next, list, list) {
+               thread__zput(pos->thread);
+               list_del(&pos->list);
+               free(pos);
+       }
+}
+
 pid_t machine__get_current_tid(struct machine *machine, int cpu)
 {
        if (cpu < 0 || (size_t)cpu >= machine->current_tid_sz)
index 1279acda6a8a1b9779b6db48050578d6c299cec3..e28c787616fe4ea3cd6b0ed30aece17cfd0b92e3 100644 (file)
@@ -7,6 +7,7 @@
 #include "maps.h"
 #include "dsos.h"
 #include "rwsem.h"
+#include "threads.h"
 
 struct addr_location;
 struct branch_stack;
@@ -28,16 +29,6 @@ extern const char *ref_reloc_sym_names[];
 
 struct vdso_info;
 
-#define THREADS__TABLE_BITS    8
-#define THREADS__TABLE_SIZE    (1 << THREADS__TABLE_BITS)
-
-struct threads {
-       struct rb_root_cached  entries;
-       struct rw_semaphore    lock;
-       unsigned int           nr;
-       struct thread          *last_match;
-};
-
 struct machine {
        struct rb_node    rb_node;
        pid_t             pid;
@@ -48,7 +39,7 @@ struct machine {
        char              *root_dir;
        char              *mmap_name;
        char              *kallsyms_filename;
-       struct threads    threads[THREADS__TABLE_SIZE];
+       struct threads    threads;
        struct vdso_info  *vdso_info;
        struct perf_env   *env;
        struct dsos       dsos;
@@ -69,12 +60,6 @@ struct machine {
        bool              trampolines_mapped;
 };
 
-static inline struct threads *machine__threads(struct machine *machine, pid_t tid)
-{
-       /* Cast it to handle tid == -1 */
-       return &machine->threads[(unsigned int)tid % THREADS__TABLE_SIZE];
-}
-
 /*
  * The main kernel (vmlinux) map
  */
@@ -220,7 +205,6 @@ bool machine__is(struct machine *machine, const char *arch);
 bool machine__normalized_is(struct machine *machine, const char *arch);
 int machine__nr_cpus_avail(struct machine *machine);
 
-struct thread *__machine__findnew_thread(struct machine *machine, pid_t pid, pid_t tid);
 struct thread *machine__findnew_thread(struct machine *machine, pid_t pid, pid_t tid);
 
 struct dso *machine__findnew_dso_id(struct machine *machine, const char *filename, struct dso_id *id);
@@ -280,6 +264,16 @@ int machines__for_each_thread(struct machines *machines,
                              int (*fn)(struct thread *thread, void *p),
                              void *priv);
 
+struct thread_list {
+       struct list_head         list;
+       struct thread           *thread;
+};
+
+/* Make a list of struct thread_list based on threads in the machine. */
+int machine__thread_list(struct machine *machine, struct list_head *list);
+/* Free up the nodes within the thread_list list. */
+void thread_list__delete(struct list_head *list);
+
 pid_t machine__get_current_tid(struct machine *machine, int cpu);
 int machine__set_current_tid(struct machine *machine, int cpu, pid_t pid,
                             pid_t tid);
index 54c67cb7ecefa441608e383476c6953563272f5a..14a5ea70d81e114366b1e1d16697be437dea6d12 100644 (file)
@@ -168,6 +168,7 @@ struct map *map__new(struct machine *machine, u64 start, u64 len,
                if (dso == NULL)
                        goto out_delete;
 
+               assert(!dso->kernel);
                map__init(result, start, start + len, pgoff, dso);
 
                if (anon || no_dso) {
@@ -552,10 +553,6 @@ u64 map__rip_2objdump(struct map *map, u64 rip)
        if (dso->rel)
                return rip - map__pgoff(map);
 
-       /*
-        * kernel modules also have DSO_TYPE_USER in dso->kernel,
-        * but all kernel modules are ET_REL, so won't get here.
-        */
        if (dso->kernel == DSO_SPACE__USER)
                return rip + dso->text_offset;
 
@@ -584,10 +581,6 @@ u64 map__objdump_2mem(struct map *map, u64 ip)
        if (dso->rel)
                return map__unmap_ip(map, ip + map__pgoff(map));
 
-       /*
-        * kernel modules also have DSO_TYPE_USER in dso->kernel,
-        * but all kernel modules are ET_REL, so won't get here.
-        */
        if (dso->kernel == DSO_SPACE__USER)
                return map__unmap_ip(map, ip - dso->text_offset);
 
index 0334fc18d9c65897c5e76111d8cb3a6f8a4a53ba..ce13145a9f8e8d012e30bdc1973409f16c781aff 100644 (file)
 #include "dso.h"
 #include "map.h"
 #include "maps.h"
+#include "rwsem.h"
 #include "thread.h"
 #include "ui/ui.h"
 #include "unwind.h"
+#include <internal/rc_check.h>
 
-struct map_rb_node {
-       struct rb_node rb_node;
-       struct map *map;
+/*
+ * Locking/sorting note:
+ *
+ * Sorting is done with the write lock, iteration and binary searching happens
+ * under the read lock requiring being sorted. There is a race between sorting
+ * releasing the write lock and acquiring the read lock for iteration/searching
+ * where another thread could insert and break the sorting of the maps. In
+ * practice inserting maps should be rare meaning that the race shouldn't lead
+ * to live lock. Removal of maps doesn't break being sorted.
+ */
+
+DECLARE_RC_STRUCT(maps) {
+       struct rw_semaphore lock;
+       /**
+        * @maps_by_address: array of maps sorted by their starting address if
+        * maps_by_address_sorted is true.
+        */
+       struct map       **maps_by_address;
+       /**
+        * @maps_by_name: optional array of maps sorted by their dso name if
+        * maps_by_name_sorted is true.
+        */
+       struct map       **maps_by_name;
+       struct machine   *machine;
+#ifdef HAVE_LIBUNWIND_SUPPORT
+       void            *addr_space;
+       const struct unwind_libunwind_ops *unwind_libunwind_ops;
+#endif
+       refcount_t       refcnt;
+       /**
+        * @nr_maps: number of maps_by_address, and possibly maps_by_name,
+        * entries that contain maps.
+        */
+       unsigned int     nr_maps;
+       /**
+        * @nr_maps_allocated: number of entries in maps_by_address and possibly
+        * maps_by_name.
+        */
+       unsigned int     nr_maps_allocated;
+       /**
+        * @last_search_by_name_idx: cache of last found by name entry's index
+        * as frequent searches for the same dso name are common.
+        */
+       unsigned int     last_search_by_name_idx;
+       /** @maps_by_address_sorted: is maps_by_address sorted. */
+       bool             maps_by_address_sorted;
+       /** @maps_by_name_sorted: is maps_by_name sorted. */
+       bool             maps_by_name_sorted;
+       /** @ends_broken: does the map contain a map where end values are unset/unsorted? */
+       bool             ends_broken;
 };
 
-#define maps__for_each_entry(maps, map) \
-       for (map = maps__first(maps); map; map = map_rb_node__next(map))
+static void check_invariants(const struct maps *maps __maybe_unused)
+{
+#ifndef NDEBUG
+       assert(RC_CHK_ACCESS(maps)->nr_maps <= RC_CHK_ACCESS(maps)->nr_maps_allocated);
+       for (unsigned int i = 0; i < RC_CHK_ACCESS(maps)->nr_maps; i++) {
+               struct map *map = RC_CHK_ACCESS(maps)->maps_by_address[i];
+
+               /* Check map is well-formed. */
+               assert(map__end(map) == 0 || map__start(map) <= map__end(map));
+               /* Expect at least 1 reference count. */
+               assert(refcount_read(map__refcnt(map)) > 0);
+
+               if (map__dso(map) && map__dso(map)->kernel)
+                       assert(RC_CHK_EQUAL(map__kmap(map)->kmaps, maps));
+
+               if (i > 0) {
+                       struct map *prev = RC_CHK_ACCESS(maps)->maps_by_address[i - 1];
+
+                       /* If addresses are sorted... */
+                       if (RC_CHK_ACCESS(maps)->maps_by_address_sorted) {
+                               /* Maps should be in start address order. */
+                               assert(map__start(prev) <= map__start(map));
+                               /*
+                                * If the ends of maps aren't broken (during
+                                * construction) then they should be ordered
+                                * too.
+                                */
+                               if (!RC_CHK_ACCESS(maps)->ends_broken) {
+                                       assert(map__end(prev) <= map__end(map));
+                                       assert(map__end(prev) <= map__start(map) ||
+                                              map__start(prev) == map__start(map));
+                               }
+                       }
+               }
+       }
+       if (RC_CHK_ACCESS(maps)->maps_by_name) {
+               for (unsigned int i = 0; i < RC_CHK_ACCESS(maps)->nr_maps; i++) {
+                       struct map *map = RC_CHK_ACCESS(maps)->maps_by_name[i];
 
-#define maps__for_each_entry_safe(maps, map, next) \
-       for (map = maps__first(maps), next = map_rb_node__next(map); map; \
-            map = next, next = map_rb_node__next(map))
+                       /*
+                        * Maps by name maps should be in maps_by_address, so
+                        * the reference count should be higher.
+                        */
+                       assert(refcount_read(map__refcnt(map)) > 1);
+               }
+       }
+#endif
+}
 
-static struct rb_root *maps__entries(struct maps *maps)
+static struct map **maps__maps_by_address(const struct maps *maps)
 {
-       return &RC_CHK_ACCESS(maps)->entries;
+       return RC_CHK_ACCESS(maps)->maps_by_address;
 }
 
-static struct rw_semaphore *maps__lock(struct maps *maps)
+static void maps__set_maps_by_address(struct maps *maps, struct map **new)
 {
-       return &RC_CHK_ACCESS(maps)->lock;
+       RC_CHK_ACCESS(maps)->maps_by_address = new;
+
+}
+
+static struct map ***maps__maps_by_name_addr(struct maps *maps)
+{
+       return &RC_CHK_ACCESS(maps)->maps_by_name;
+}
+
+static void maps__set_nr_maps_allocated(struct maps *maps, unsigned int nr_maps_allocated)
+{
+       RC_CHK_ACCESS(maps)->nr_maps_allocated = nr_maps_allocated;
+}
+
+static void maps__set_nr_maps(struct maps *maps, unsigned int nr_maps)
+{
+       RC_CHK_ACCESS(maps)->nr_maps = nr_maps;
 }
 
-static struct map **maps__maps_by_name(struct maps *maps)
+/* Not in the header, to aid reference counting. */
+static struct map **maps__maps_by_name(const struct maps *maps)
 {
        return RC_CHK_ACCESS(maps)->maps_by_name;
+
 }
 
-static struct map_rb_node *maps__first(struct maps *maps)
+static void maps__set_maps_by_name(struct maps *maps, struct map **new)
 {
-       struct rb_node *first = rb_first(maps__entries(maps));
+       RC_CHK_ACCESS(maps)->maps_by_name = new;
 
-       if (first)
-               return rb_entry(first, struct map_rb_node, rb_node);
-       return NULL;
 }
 
-static struct map_rb_node *map_rb_node__next(struct map_rb_node *node)
+static bool maps__maps_by_address_sorted(const struct maps *maps)
+{
+       return RC_CHK_ACCESS(maps)->maps_by_address_sorted;
+}
+
+static void maps__set_maps_by_address_sorted(struct maps *maps, bool value)
 {
-       struct rb_node *next;
+       RC_CHK_ACCESS(maps)->maps_by_address_sorted = value;
+}
 
-       if (!node)
-               return NULL;
+static bool maps__maps_by_name_sorted(const struct maps *maps)
+{
+       return RC_CHK_ACCESS(maps)->maps_by_name_sorted;
+}
 
-       next = rb_next(&node->rb_node);
+static void maps__set_maps_by_name_sorted(struct maps *maps, bool value)
+{
+       RC_CHK_ACCESS(maps)->maps_by_name_sorted = value;
+}
 
-       if (!next)
-               return NULL;
+struct machine *maps__machine(const struct maps *maps)
+{
+       return RC_CHK_ACCESS(maps)->machine;
+}
 
-       return rb_entry(next, struct map_rb_node, rb_node);
+unsigned int maps__nr_maps(const struct maps *maps)
+{
+       return RC_CHK_ACCESS(maps)->nr_maps;
 }
 
-static struct map_rb_node *maps__find_node(struct maps *maps, struct map *map)
+refcount_t *maps__refcnt(struct maps *maps)
 {
-       struct map_rb_node *rb_node;
+       return &RC_CHK_ACCESS(maps)->refcnt;
+}
 
-       maps__for_each_entry(maps, rb_node) {
-               if (rb_node->RC_CHK_ACCESS(map) == RC_CHK_ACCESS(map))
-                       return rb_node;
-       }
-       return NULL;
+#ifdef HAVE_LIBUNWIND_SUPPORT
+void *maps__addr_space(const struct maps *maps)
+{
+       return RC_CHK_ACCESS(maps)->addr_space;
+}
+
+void maps__set_addr_space(struct maps *maps, void *addr_space)
+{
+       RC_CHK_ACCESS(maps)->addr_space = addr_space;
+}
+
+const struct unwind_libunwind_ops *maps__unwind_libunwind_ops(const struct maps *maps)
+{
+       return RC_CHK_ACCESS(maps)->unwind_libunwind_ops;
+}
+
+void maps__set_unwind_libunwind_ops(struct maps *maps, const struct unwind_libunwind_ops *ops)
+{
+       RC_CHK_ACCESS(maps)->unwind_libunwind_ops = ops;
+}
+#endif
+
+static struct rw_semaphore *maps__lock(struct maps *maps)
+{
+       /*
+        * When the lock is acquired or released the maps invariants should
+        * hold.
+        */
+       check_invariants(maps);
+       return &RC_CHK_ACCESS(maps)->lock;
 }
 
 static void maps__init(struct maps *maps, struct machine *machine)
 {
-       refcount_set(maps__refcnt(maps), 1);
        init_rwsem(maps__lock(maps));
-       RC_CHK_ACCESS(maps)->entries = RB_ROOT;
+       RC_CHK_ACCESS(maps)->maps_by_address = NULL;
+       RC_CHK_ACCESS(maps)->maps_by_name = NULL;
        RC_CHK_ACCESS(maps)->machine = machine;
-       RC_CHK_ACCESS(maps)->last_search_by_name = NULL;
+#ifdef HAVE_LIBUNWIND_SUPPORT
+       RC_CHK_ACCESS(maps)->addr_space = NULL;
+       RC_CHK_ACCESS(maps)->unwind_libunwind_ops = NULL;
+#endif
+       refcount_set(maps__refcnt(maps), 1);
        RC_CHK_ACCESS(maps)->nr_maps = 0;
-       RC_CHK_ACCESS(maps)->maps_by_name = NULL;
+       RC_CHK_ACCESS(maps)->nr_maps_allocated = 0;
+       RC_CHK_ACCESS(maps)->last_search_by_name_idx = 0;
+       RC_CHK_ACCESS(maps)->maps_by_address_sorted = true;
+       RC_CHK_ACCESS(maps)->maps_by_name_sorted = false;
+}
+
+static void maps__exit(struct maps *maps)
+{
+       struct map **maps_by_address = maps__maps_by_address(maps);
+       struct map **maps_by_name = maps__maps_by_name(maps);
+
+       for (unsigned int i = 0; i < maps__nr_maps(maps); i++) {
+               map__zput(maps_by_address[i]);
+               if (maps_by_name)
+                       map__zput(maps_by_name[i]);
+       }
+       zfree(&maps_by_address);
+       zfree(&maps_by_name);
+       unwind__finish_access(maps);
+}
+
+struct maps *maps__new(struct machine *machine)
+{
+       struct maps *result;
+       RC_STRUCT(maps) *maps = zalloc(sizeof(*maps));
+
+       if (ADD_RC_CHK(result, maps))
+               maps__init(result, machine);
+
+       return result;
+}
+
+static void maps__delete(struct maps *maps)
+{
+       maps__exit(maps);
+       RC_CHK_FREE(maps);
+}
+
+struct maps *maps__get(struct maps *maps)
+{
+       struct maps *result;
+
+       if (RC_CHK_GET(result, maps))
+               refcount_inc(maps__refcnt(maps));
+
+       return result;
+}
+
+void maps__put(struct maps *maps)
+{
+       if (maps && refcount_dec_and_test(maps__refcnt(maps)))
+               maps__delete(maps);
+       else
+               RC_CHK_PUT(maps);
 }
 
 static void __maps__free_maps_by_name(struct maps *maps)
@@ -92,219 +296,330 @@ static void __maps__free_maps_by_name(struct maps *maps)
                map__put(maps__maps_by_name(maps)[i]);
 
        zfree(&RC_CHK_ACCESS(maps)->maps_by_name);
-       RC_CHK_ACCESS(maps)->nr_maps_allocated = 0;
 }
 
-static int __maps__insert(struct maps *maps, struct map *map)
+static int map__start_cmp(const void *a, const void *b)
 {
-       struct rb_node **p = &maps__entries(maps)->rb_node;
-       struct rb_node *parent = NULL;
-       const u64 ip = map__start(map);
-       struct map_rb_node *m, *new_rb_node;
-
-       new_rb_node = malloc(sizeof(*new_rb_node));
-       if (!new_rb_node)
-               return -ENOMEM;
-
-       RB_CLEAR_NODE(&new_rb_node->rb_node);
-       new_rb_node->map = map__get(map);
-
-       while (*p != NULL) {
-               parent = *p;
-               m = rb_entry(parent, struct map_rb_node, rb_node);
-               if (ip < map__start(m->map))
-                       p = &(*p)->rb_left;
-               else
-                       p = &(*p)->rb_right;
+       const struct map *map_a = *(const struct map * const *)a;
+       const struct map *map_b = *(const struct map * const *)b;
+       u64 map_a_start = map__start(map_a);
+       u64 map_b_start = map__start(map_b);
+
+       if (map_a_start == map_b_start) {
+               u64 map_a_end = map__end(map_a);
+               u64 map_b_end = map__end(map_b);
+
+               if  (map_a_end == map_b_end) {
+                       /* Ensure maps with the same addresses have a fixed order. */
+                       if (RC_CHK_ACCESS(map_a) == RC_CHK_ACCESS(map_b))
+                               return 0;
+                       return (intptr_t)RC_CHK_ACCESS(map_a) > (intptr_t)RC_CHK_ACCESS(map_b)
+                               ? 1 : -1;
+               }
+               return map_a_end > map_b_end ? 1 : -1;
        }
-
-       rb_link_node(&new_rb_node->rb_node, parent, p);
-       rb_insert_color(&new_rb_node->rb_node, maps__entries(maps));
-       return 0;
+       return map_a_start > map_b_start ? 1 : -1;
 }
 
-int maps__insert(struct maps *maps, struct map *map)
+static void __maps__sort_by_address(struct maps *maps)
 {
-       int err;
-       const struct dso *dso = map__dso(map);
+       if (maps__maps_by_address_sorted(maps))
+               return;
+
+       qsort(maps__maps_by_address(maps),
+               maps__nr_maps(maps),
+               sizeof(struct map *),
+               map__start_cmp);
+       maps__set_maps_by_address_sorted(maps, true);
+}
 
+static void maps__sort_by_address(struct maps *maps)
+{
        down_write(maps__lock(maps));
-       err = __maps__insert(maps, map);
-       if (err)
-               goto out;
-
-       ++RC_CHK_ACCESS(maps)->nr_maps;
+       __maps__sort_by_address(maps);
+       up_write(maps__lock(maps));
+}
 
-       if (dso && dso->kernel) {
-               struct kmap *kmap = map__kmap(map);
+static int map__strcmp(const void *a, const void *b)
+{
+       const struct map *map_a = *(const struct map * const *)a;
+       const struct map *map_b = *(const struct map * const *)b;
+       const struct dso *dso_a = map__dso(map_a);
+       const struct dso *dso_b = map__dso(map_b);
+       int ret = strcmp(dso_a->short_name, dso_b->short_name);
 
-               if (kmap)
-                       kmap->kmaps = maps;
-               else
-                       pr_err("Internal error: kernel dso with non kernel map\n");
+       if (ret == 0 && RC_CHK_ACCESS(map_a) != RC_CHK_ACCESS(map_b)) {
+               /* Ensure distinct but name equal maps have an order. */
+               return map__start_cmp(a, b);
        }
+       return ret;
+}
 
+static int maps__sort_by_name(struct maps *maps)
+{
+       int err = 0;
+       down_write(maps__lock(maps));
+       if (!maps__maps_by_name_sorted(maps)) {
+               struct map **maps_by_name = maps__maps_by_name(maps);
 
-       /*
-        * If we already performed some search by name, then we need to add the just
-        * inserted map and resort.
-        */
-       if (maps__maps_by_name(maps)) {
-               if (maps__nr_maps(maps) > RC_CHK_ACCESS(maps)->nr_maps_allocated) {
-                       int nr_allocate = maps__nr_maps(maps) * 2;
-                       struct map **maps_by_name = realloc(maps__maps_by_name(maps),
-                                                           nr_allocate * sizeof(map));
-
-                       if (maps_by_name == NULL) {
-                               __maps__free_maps_by_name(maps);
+               if (!maps_by_name) {
+                       maps_by_name = malloc(RC_CHK_ACCESS(maps)->nr_maps_allocated *
+                                       sizeof(*maps_by_name));
+                       if (!maps_by_name)
                                err = -ENOMEM;
-                               goto out;
-                       }
+                       else {
+                               struct map **maps_by_address = maps__maps_by_address(maps);
+                               unsigned int n = maps__nr_maps(maps);
 
-                       RC_CHK_ACCESS(maps)->maps_by_name = maps_by_name;
-                       RC_CHK_ACCESS(maps)->nr_maps_allocated = nr_allocate;
+                               maps__set_maps_by_name(maps, maps_by_name);
+                               for (unsigned int i = 0; i < n; i++)
+                                       maps_by_name[i] = map__get(maps_by_address[i]);
+                       }
+               }
+               if (!err) {
+                       qsort(maps_by_name,
+                               maps__nr_maps(maps),
+                               sizeof(struct map *),
+                               map__strcmp);
+                       maps__set_maps_by_name_sorted(maps, true);
                }
-               maps__maps_by_name(maps)[maps__nr_maps(maps) - 1] = map__get(map);
-               __maps__sort_by_name(maps);
        }
- out:
        up_write(maps__lock(maps));
        return err;
 }
 
-static void __maps__remove(struct maps *maps, struct map_rb_node *rb_node)
+static unsigned int maps__by_address_index(const struct maps *maps, const struct map *map)
 {
-       rb_erase_init(&rb_node->rb_node, maps__entries(maps));
-       map__put(rb_node->map);
-       free(rb_node);
+       struct map **maps_by_address = maps__maps_by_address(maps);
+
+       if (maps__maps_by_address_sorted(maps)) {
+               struct map **mapp =
+                       bsearch(&map, maps__maps_by_address(maps), maps__nr_maps(maps),
+                               sizeof(*mapp), map__start_cmp);
+
+               if (mapp)
+                       return mapp - maps_by_address;
+       } else {
+               for (unsigned int i = 0; i < maps__nr_maps(maps); i++) {
+                       if (RC_CHK_ACCESS(maps_by_address[i]) == RC_CHK_ACCESS(map))
+                               return i;
+               }
+       }
+       pr_err("Map missing from maps");
+       return -1;
 }
 
-void maps__remove(struct maps *maps, struct map *map)
+static unsigned int maps__by_name_index(const struct maps *maps, const struct map *map)
 {
-       struct map_rb_node *rb_node;
-
-       down_write(maps__lock(maps));
-       if (RC_CHK_ACCESS(maps)->last_search_by_name == map)
-               RC_CHK_ACCESS(maps)->last_search_by_name = NULL;
-
-       rb_node = maps__find_node(maps, map);
-       assert(rb_node->RC_CHK_ACCESS(map) == RC_CHK_ACCESS(map));
-       __maps__remove(maps, rb_node);
-       if (maps__maps_by_name(maps))
-               __maps__free_maps_by_name(maps);
-       --RC_CHK_ACCESS(maps)->nr_maps;
-       up_write(maps__lock(maps));
+       struct map **maps_by_name = maps__maps_by_name(maps);
+
+       if (maps__maps_by_name_sorted(maps)) {
+               struct map **mapp =
+                       bsearch(&map, maps_by_name, maps__nr_maps(maps),
+                               sizeof(*mapp), map__strcmp);
+
+               if (mapp)
+                       return mapp - maps_by_name;
+       } else {
+               for (unsigned int i = 0; i < maps__nr_maps(maps); i++) {
+                       if (RC_CHK_ACCESS(maps_by_name[i]) == RC_CHK_ACCESS(map))
+                               return i;
+               }
+       }
+       pr_err("Map missing from maps");
+       return -1;
 }
 
-static void __maps__purge(struct maps *maps)
+static int __maps__insert(struct maps *maps, struct map *new)
 {
-       struct map_rb_node *pos, *next;
+       struct map **maps_by_address = maps__maps_by_address(maps);
+       struct map **maps_by_name = maps__maps_by_name(maps);
+       const struct dso *dso = map__dso(new);
+       unsigned int nr_maps = maps__nr_maps(maps);
+       unsigned int nr_allocate = RC_CHK_ACCESS(maps)->nr_maps_allocated;
+
+       if (nr_maps + 1 > nr_allocate) {
+               nr_allocate = !nr_allocate ? 32 : nr_allocate * 2;
+
+               maps_by_address = realloc(maps_by_address, nr_allocate * sizeof(new));
+               if (!maps_by_address)
+                       return -ENOMEM;
+
+               maps__set_maps_by_address(maps, maps_by_address);
+               if (maps_by_name) {
+                       maps_by_name = realloc(maps_by_name, nr_allocate * sizeof(new));
+                       if (!maps_by_name) {
+                               /*
+                                * If by name fails, just disable by name and it will
+                                * recompute next time it is required.
+                                */
+                               __maps__free_maps_by_name(maps);
+                       }
+                       maps__set_maps_by_name(maps, maps_by_name);
+               }
+               RC_CHK_ACCESS(maps)->nr_maps_allocated = nr_allocate;
+       }
+       /* Insert the value at the end. */
+       maps_by_address[nr_maps] = map__get(new);
+       if (maps_by_name)
+               maps_by_name[nr_maps] = map__get(new);
 
-       if (maps__maps_by_name(maps))
-               __maps__free_maps_by_name(maps);
+       nr_maps++;
+       RC_CHK_ACCESS(maps)->nr_maps = nr_maps;
 
-       maps__for_each_entry_safe(maps, pos, next) {
-               rb_erase_init(&pos->rb_node,  maps__entries(maps));
-               map__put(pos->map);
-               free(pos);
+       /*
+        * Recompute if things are sorted. If things are inserted in a sorted
+        * manner, for example by processing /proc/pid/maps, then no
+        * sorting/resorting will be necessary.
+        */
+       if (nr_maps == 1) {
+               /* If there's just 1 entry then maps are sorted. */
+               maps__set_maps_by_address_sorted(maps, true);
+               maps__set_maps_by_name_sorted(maps, maps_by_name != NULL);
+       } else {
+               /* Sorted if maps were already sorted and this map starts after the last one. */
+               maps__set_maps_by_address_sorted(maps,
+                       maps__maps_by_address_sorted(maps) &&
+                       map__end(maps_by_address[nr_maps - 2]) <= map__start(new));
+               maps__set_maps_by_name_sorted(maps, false);
        }
+       if (map__end(new) < map__start(new))
+               RC_CHK_ACCESS(maps)->ends_broken = true;
+       if (dso && dso->kernel) {
+               struct kmap *kmap = map__kmap(new);
+
+               if (kmap)
+                       kmap->kmaps = maps;
+               else
+                       pr_err("Internal error: kernel dso with non kernel map\n");
+       }
+       return 0;
 }
 
-static void maps__exit(struct maps *maps)
+int maps__insert(struct maps *maps, struct map *map)
 {
+       int ret;
+
        down_write(maps__lock(maps));
-       __maps__purge(maps);
+       ret = __maps__insert(maps, map);
        up_write(maps__lock(maps));
+       return ret;
 }
 
-bool maps__empty(struct maps *maps)
-{
-       return !maps__first(maps);
-}
-
-struct maps *maps__new(struct machine *machine)
+static void __maps__remove(struct maps *maps, struct map *map)
 {
-       struct maps *result;
-       RC_STRUCT(maps) *maps = zalloc(sizeof(*maps));
-
-       if (ADD_RC_CHK(result, maps))
-               maps__init(result, machine);
+       struct map **maps_by_address = maps__maps_by_address(maps);
+       struct map **maps_by_name = maps__maps_by_name(maps);
+       unsigned int nr_maps = maps__nr_maps(maps);
+       unsigned int address_idx;
+
+       /* Slide later mappings over the one to remove */
+       address_idx = maps__by_address_index(maps, map);
+       map__put(maps_by_address[address_idx]);
+       memmove(&maps_by_address[address_idx],
+               &maps_by_address[address_idx + 1],
+               (nr_maps - address_idx - 1) * sizeof(*maps_by_address));
+
+       if (maps_by_name) {
+               unsigned int name_idx = maps__by_name_index(maps, map);
+
+               map__put(maps_by_name[name_idx]);
+               memmove(&maps_by_name[name_idx],
+                       &maps_by_name[name_idx + 1],
+                       (nr_maps - name_idx - 1) *  sizeof(*maps_by_name));
+       }
 
-       return result;
+       --RC_CHK_ACCESS(maps)->nr_maps;
 }
 
-static void maps__delete(struct maps *maps)
+void maps__remove(struct maps *maps, struct map *map)
 {
-       maps__exit(maps);
-       unwind__finish_access(maps);
-       RC_CHK_FREE(maps);
+       down_write(maps__lock(maps));
+       __maps__remove(maps, map);
+       up_write(maps__lock(maps));
 }
 
-struct maps *maps__get(struct maps *maps)
+bool maps__empty(struct maps *maps)
 {
-       struct maps *result;
+       bool res;
 
-       if (RC_CHK_GET(result, maps))
-               refcount_inc(maps__refcnt(maps));
+       down_read(maps__lock(maps));
+       res = maps__nr_maps(maps) == 0;
+       up_read(maps__lock(maps));
 
-       return result;
+       return res;
 }
 
-void maps__put(struct maps *maps)
+bool maps__equal(struct maps *a, struct maps *b)
 {
-       if (maps && refcount_dec_and_test(maps__refcnt(maps)))
-               maps__delete(maps);
-       else
-               RC_CHK_PUT(maps);
+       return RC_CHK_EQUAL(a, b);
 }
 
 int maps__for_each_map(struct maps *maps, int (*cb)(struct map *map, void *data), void *data)
 {
-       struct map_rb_node *pos;
+       bool done = false;
        int ret = 0;
 
-       down_read(maps__lock(maps));
-       maps__for_each_entry(maps, pos) {
-               ret = cb(pos->map, data);
-               if (ret)
-                       break;
+       /* See locking/sorting note. */
+       while (!done) {
+               down_read(maps__lock(maps));
+               if (maps__maps_by_address_sorted(maps)) {
+                       /*
+                        * maps__for_each_map callbacks may buggily/unsafely
+                        * insert into maps_by_address. Deliberately reload
+                        * maps__nr_maps and maps_by_address on each iteration
+                        * to avoid using memory freed by maps__insert growing
+                        * the array - this may cause maps to be skipped or
+                        * repeated.
+                        */
+                       for (unsigned int i = 0; i < maps__nr_maps(maps); i++) {
+                               struct map **maps_by_address = maps__maps_by_address(maps);
+                               struct map *map = maps_by_address[i];
+
+                               ret = cb(map, data);
+                               if (ret)
+                                       break;
+                       }
+                       done = true;
+               }
+               up_read(maps__lock(maps));
+               if (!done)
+                       maps__sort_by_address(maps);
        }
-       up_read(maps__lock(maps));
        return ret;
 }
 
 void maps__remove_maps(struct maps *maps, bool (*cb)(struct map *map, void *data), void *data)
 {
-       struct map_rb_node *pos, *next;
-       unsigned int start_nr_maps;
+       struct map **maps_by_address;
 
        down_write(maps__lock(maps));
 
-       start_nr_maps = maps__nr_maps(maps);
-       maps__for_each_entry_safe(maps, pos, next)      {
-               if (cb(pos->map, data)) {
-                       __maps__remove(maps, pos);
-                       --RC_CHK_ACCESS(maps)->nr_maps;
-               }
+       maps_by_address = maps__maps_by_address(maps);
+       for (unsigned int i = 0; i < maps__nr_maps(maps);) {
+               if (cb(maps_by_address[i], data))
+                       __maps__remove(maps, maps_by_address[i]);
+               else
+                       i++;
        }
-       if (maps__maps_by_name(maps) && start_nr_maps != maps__nr_maps(maps))
-               __maps__free_maps_by_name(maps);
-
        up_write(maps__lock(maps));
 }
 
 struct symbol *maps__find_symbol(struct maps *maps, u64 addr, struct map **mapp)
 {
        struct map *map = maps__find(maps, addr);
+       struct symbol *result = NULL;
 
        /* Ensure map is loaded before using map->map_ip */
-       if (map != NULL && map__load(map) >= 0) {
-               if (mapp != NULL)
-                       *mapp = map;
-               return map__find_symbol(map, map__map_ip(map, addr));
-       }
+       if (map != NULL && map__load(map) >= 0)
+               result = map__find_symbol(map, map__map_ip(map, addr));
+
+       if (mapp)
+               *mapp = map;
+       else
+               map__put(map);
 
-       return NULL;
+       return result;
 }
 
 struct maps__find_symbol_by_name_args {
@@ -393,24 +708,28 @@ size_t maps__fprintf(struct maps *maps, FILE *fp)
  * Find first map where end > map->start.
  * Same as find_vma() in kernel.
  */
-static struct rb_node *first_ending_after(struct maps *maps, const struct map *map)
+static unsigned int first_ending_after(struct maps *maps, const struct map *map)
 {
-       struct rb_root *root;
-       struct rb_node *next, *first;
+       struct map **maps_by_address = maps__maps_by_address(maps);
+       int low = 0, high = (int)maps__nr_maps(maps) - 1, first = high + 1;
+
+       assert(maps__maps_by_address_sorted(maps));
+       if (low <= high && map__end(maps_by_address[0]) > map__start(map))
+               return 0;
 
-       root = maps__entries(maps);
-       next = root->rb_node;
-       first = NULL;
-       while (next) {
-               struct map_rb_node *pos = rb_entry(next, struct map_rb_node, rb_node);
+       while (low <= high) {
+               int mid = (low + high) / 2;
+               struct map *pos = maps_by_address[mid];
 
-               if (map__end(pos->map) > map__start(map)) {
-                       first = next;
-                       if (map__start(pos->map) <= map__start(map))
+               if (map__end(pos) > map__start(map)) {
+                       first = mid;
+                       if (map__start(pos) <= map__start(map)) {
+                               /* Entry overlaps map. */
                                break;
-                       next = next->rb_left;
+                       }
+                       high = mid - 1;
                } else
-                       next = next->rb_right;
+                       low = mid + 1;
        }
        return first;
 }
@@ -419,171 +738,249 @@ static struct rb_node *first_ending_after(struct maps *maps, const struct map *m
  * Adds new to maps, if new overlaps existing entries then the existing maps are
  * adjusted or removed so that new fits without overlapping any entries.
  */
-int maps__fixup_overlap_and_insert(struct maps *maps, struct map *new)
+static int __maps__fixup_overlap_and_insert(struct maps *maps, struct map *new)
 {
-
-       struct rb_node *next;
+       struct map **maps_by_address;
        int err = 0;
        FILE *fp = debug_file();
 
-       down_write(maps__lock(maps));
+sort_again:
+       if (!maps__maps_by_address_sorted(maps))
+               __maps__sort_by_address(maps);
 
-       next = first_ending_after(maps, new);
-       while (next && !err) {
-               struct map_rb_node *pos = rb_entry(next, struct map_rb_node, rb_node);
-               next = rb_next(&pos->rb_node);
+       maps_by_address = maps__maps_by_address(maps);
+       /*
+        * Iterate through entries where the end of the existing entry is
+        * greater-than the new map's start.
+        */
+       for (unsigned int i = first_ending_after(maps, new); i < maps__nr_maps(maps); ) {
+               struct map *pos = maps_by_address[i];
+               struct map *before = NULL, *after = NULL;
 
                /*
                 * Stop if current map starts after map->end.
                 * Maps are ordered by start: next will not overlap for sure.
                 */
-               if (map__start(pos->map) >= map__end(new))
+               if (map__start(pos) >= map__end(new))
                        break;
 
-               if (verbose >= 2) {
-
-                       if (use_browser) {
-                               pr_debug("overlapping maps in %s (disable tui for more info)\n",
-                                        map__dso(new)->name);
-                       } else {
-                               pr_debug("overlapping maps:\n");
-                               map__fprintf(new, fp);
-                               map__fprintf(pos->map, fp);
-                       }
+               if (use_browser) {
+                       pr_debug("overlapping maps in %s (disable tui for more info)\n",
+                               map__dso(new)->name);
+               } else if (verbose >= 2) {
+                       pr_debug("overlapping maps:\n");
+                       map__fprintf(new, fp);
+                       map__fprintf(pos, fp);
                }
 
-               rb_erase_init(&pos->rb_node, maps__entries(maps));
                /*
                 * Now check if we need to create new maps for areas not
                 * overlapped by the new map:
                 */
-               if (map__start(new) > map__start(pos->map)) {
-                       struct map *before = map__clone(pos->map);
+               if (map__start(new) > map__start(pos)) {
+                       /* Map starts within existing map. Need to shorten the existing map. */
+                       before = map__clone(pos);
 
                        if (before == NULL) {
                                err = -ENOMEM;
-                               goto put_map;
+                               goto out_err;
                        }
-
                        map__set_end(before, map__start(new));
-                       err = __maps__insert(maps, before);
-                       if (err) {
-                               map__put(before);
-                               goto put_map;
-                       }
 
                        if (verbose >= 2 && !use_browser)
                                map__fprintf(before, fp);
-                       map__put(before);
                }
-
-               if (map__end(new) < map__end(pos->map)) {
-                       struct map *after = map__clone(pos->map);
+               if (map__end(new) < map__end(pos)) {
+                       /* The new map isn't as long as the existing map. */
+                       after = map__clone(pos);
 
                        if (after == NULL) {
+                               map__zput(before);
                                err = -ENOMEM;
-                               goto put_map;
+                               goto out_err;
                        }
 
                        map__set_start(after, map__end(new));
-                       map__add_pgoff(after, map__end(new) - map__start(pos->map));
-                       assert(map__map_ip(pos->map, map__end(new)) ==
-                               map__map_ip(after, map__end(new)));
-                       err = __maps__insert(maps, after);
-                       if (err) {
-                               map__put(after);
-                               goto put_map;
-                       }
+                       map__add_pgoff(after, map__end(new) - map__start(pos));
+                       assert(map__map_ip(pos, map__end(new)) ==
+                              map__map_ip(after, map__end(new)));
+
                        if (verbose >= 2 && !use_browser)
                                map__fprintf(after, fp);
-                       map__put(after);
                }
-put_map:
-               map__put(pos->map);
-               free(pos);
+               /*
+                * If adding one entry, for `before` or `after`, we can replace
+                * the existing entry. If both `before` and `after` are
+                * necessary than an insert is needed. If the existing entry
+                * entirely overlaps the existing entry it can just be removed.
+                */
+               if (before) {
+                       map__put(maps_by_address[i]);
+                       maps_by_address[i] = before;
+                       /* Maps are still ordered, go to next one. */
+                       i++;
+                       if (after) {
+                               __maps__insert(maps, after);
+                               map__put(after);
+                               if (!maps__maps_by_address_sorted(maps)) {
+                                       /*
+                                        * Sorting broken so invariants don't
+                                        * hold, sort and go again.
+                                        */
+                                       goto sort_again;
+                               }
+                               /*
+                                * Maps are still ordered, skip after and go to
+                                * next one (terminate loop).
+                                */
+                               i++;
+                       }
+               } else if (after) {
+                       map__put(maps_by_address[i]);
+                       maps_by_address[i] = after;
+                       /* Maps are ordered, go to next one. */
+                       i++;
+               } else {
+                       __maps__remove(maps, pos);
+                       /*
+                        * Maps are ordered but no need to increase `i` as the
+                        * later maps were moved down.
+                        */
+               }
+               check_invariants(maps);
        }
        /* Add the map. */
-       err = __maps__insert(maps, new);
-       up_write(maps__lock(maps));
+       __maps__insert(maps, new);
+out_err:
        return err;
 }
 
-int maps__copy_from(struct maps *maps, struct maps *parent)
+int maps__fixup_overlap_and_insert(struct maps *maps, struct map *new)
 {
        int err;
-       struct map_rb_node *rb_node;
 
+       down_write(maps__lock(maps));
+       err =  __maps__fixup_overlap_and_insert(maps, new);
+       up_write(maps__lock(maps));
+       return err;
+}
+
+int maps__copy_from(struct maps *dest, struct maps *parent)
+{
+       /* Note, if struct map were immutable then cloning could use ref counts. */
+       struct map **parent_maps_by_address;
+       int err = 0;
+       unsigned int n;
+
+       down_write(maps__lock(dest));
        down_read(maps__lock(parent));
 
-       maps__for_each_entry(parent, rb_node) {
-               struct map *new = map__clone(rb_node->map);
+       parent_maps_by_address = maps__maps_by_address(parent);
+       n = maps__nr_maps(parent);
+       if (maps__nr_maps(dest) == 0) {
+               /* No existing mappings so just copy from parent to avoid reallocs in insert. */
+               unsigned int nr_maps_allocated = RC_CHK_ACCESS(parent)->nr_maps_allocated;
+               struct map **dest_maps_by_address =
+                       malloc(nr_maps_allocated * sizeof(struct map *));
+               struct map **dest_maps_by_name = NULL;
 
-               if (new == NULL) {
+               if (!dest_maps_by_address)
                        err = -ENOMEM;
-                       goto out_unlock;
+               else {
+                       if (maps__maps_by_name(parent)) {
+                               dest_maps_by_name =
+                                       malloc(nr_maps_allocated * sizeof(struct map *));
+                       }
+
+                       RC_CHK_ACCESS(dest)->maps_by_address = dest_maps_by_address;
+                       RC_CHK_ACCESS(dest)->maps_by_name = dest_maps_by_name;
+                       RC_CHK_ACCESS(dest)->nr_maps_allocated = nr_maps_allocated;
                }
 
-               err = unwind__prepare_access(maps, new, NULL);
-               if (err)
-                       goto out_unlock;
+               for (unsigned int i = 0; !err && i < n; i++) {
+                       struct map *pos = parent_maps_by_address[i];
+                       struct map *new = map__clone(pos);
 
-               err = maps__insert(maps, new);
-               if (err)
-                       goto out_unlock;
+                       if (!new)
+                               err = -ENOMEM;
+                       else {
+                               err = unwind__prepare_access(dest, new, NULL);
+                               if (!err) {
+                                       dest_maps_by_address[i] = new;
+                                       if (dest_maps_by_name)
+                                               dest_maps_by_name[i] = map__get(new);
+                                       RC_CHK_ACCESS(dest)->nr_maps = i + 1;
+                               }
+                       }
+                       if (err)
+                               map__put(new);
+               }
+               maps__set_maps_by_address_sorted(dest, maps__maps_by_address_sorted(parent));
+               if (!err) {
+                       RC_CHK_ACCESS(dest)->last_search_by_name_idx =
+                               RC_CHK_ACCESS(parent)->last_search_by_name_idx;
+                       maps__set_maps_by_name_sorted(dest,
+                                               dest_maps_by_name &&
+                                               maps__maps_by_name_sorted(parent));
+               } else {
+                       RC_CHK_ACCESS(dest)->last_search_by_name_idx = 0;
+                       maps__set_maps_by_name_sorted(dest, false);
+               }
+       } else {
+               /* Unexpected copying to a maps containing entries. */
+               for (unsigned int i = 0; !err && i < n; i++) {
+                       struct map *pos = parent_maps_by_address[i];
+                       struct map *new = map__clone(pos);
 
-               map__put(new);
+                       if (!new)
+                               err = -ENOMEM;
+                       else {
+                               err = unwind__prepare_access(dest, new, NULL);
+                               if (!err)
+                                       err = __maps__insert(dest, new);
+                       }
+                       map__put(new);
+               }
        }
-
-       err = 0;
-out_unlock:
        up_read(maps__lock(parent));
+       up_write(maps__lock(dest));
        return err;
 }
 
-struct map *maps__find(struct maps *maps, u64 ip)
+static int map__addr_cmp(const void *key, const void *entry)
 {
-       struct rb_node *p;
-       struct map_rb_node *m;
-
+       const u64 ip = *(const u64 *)key;
+       const struct map *map = *(const struct map * const *)entry;
 
-       down_read(maps__lock(maps));
-
-       p = maps__entries(maps)->rb_node;
-       while (p != NULL) {
-               m = rb_entry(p, struct map_rb_node, rb_node);
-               if (ip < map__start(m->map))
-                       p = p->rb_left;
-               else if (ip >= map__end(m->map))
-                       p = p->rb_right;
-               else
-                       goto out;
-       }
-
-       m = NULL;
-out:
-       up_read(maps__lock(maps));
-       return m ? m->map : NULL;
+       if (ip < map__start(map))
+               return -1;
+       if (ip >= map__end(map))
+               return 1;
+       return 0;
 }
 
-static int map__strcmp(const void *a, const void *b)
+struct map *maps__find(struct maps *maps, u64 ip)
 {
-       const struct map *map_a = *(const struct map **)a;
-       const struct map *map_b = *(const struct map **)b;
-       const struct dso *dso_a = map__dso(map_a);
-       const struct dso *dso_b = map__dso(map_b);
-       int ret = strcmp(dso_a->short_name, dso_b->short_name);
-
-       if (ret == 0 && map_a != map_b) {
-               /*
-                * Ensure distinct but name equal maps have an order in part to
-                * aid reference counting.
-                */
-               ret = (int)map__start(map_a) - (int)map__start(map_b);
-               if (ret == 0)
-                       ret = (int)((intptr_t)map_a - (intptr_t)map_b);
+       struct map *result = NULL;
+       bool done = false;
+
+       /* See locking/sorting note. */
+       while (!done) {
+               down_read(maps__lock(maps));
+               if (maps__maps_by_address_sorted(maps)) {
+                       struct map **mapp =
+                               bsearch(&ip, maps__maps_by_address(maps), maps__nr_maps(maps),
+                                       sizeof(*mapp), map__addr_cmp);
+
+                       if (mapp)
+                               result = map__get(*mapp);
+                       done = true;
+               }
+               up_read(maps__lock(maps));
+               if (!done)
+                       maps__sort_by_address(maps);
        }
-
-       return ret;
+       return result;
 }
 
 static int map__strcmp_name(const void *name, const void *b)
@@ -593,126 +990,113 @@ static int map__strcmp_name(const void *name, const void *b)
        return strcmp(name, dso->short_name);
 }
 
-void __maps__sort_by_name(struct maps *maps)
-{
-       qsort(maps__maps_by_name(maps), maps__nr_maps(maps), sizeof(struct map *), map__strcmp);
-}
-
-static int map__groups__sort_by_name_from_rbtree(struct maps *maps)
-{
-       struct map_rb_node *rb_node;
-       struct map **maps_by_name = realloc(maps__maps_by_name(maps),
-                                           maps__nr_maps(maps) * sizeof(struct map *));
-       int i = 0;
-
-       if (maps_by_name == NULL)
-               return -1;
-
-       up_read(maps__lock(maps));
-       down_write(maps__lock(maps));
-
-       RC_CHK_ACCESS(maps)->maps_by_name = maps_by_name;
-       RC_CHK_ACCESS(maps)->nr_maps_allocated = maps__nr_maps(maps);
-
-       maps__for_each_entry(maps, rb_node)
-               maps_by_name[i++] = map__get(rb_node->map);
-
-       __maps__sort_by_name(maps);
-
-       up_write(maps__lock(maps));
-       down_read(maps__lock(maps));
-
-       return 0;
-}
-
-static struct map *__maps__find_by_name(struct maps *maps, const char *name)
+struct map *maps__find_by_name(struct maps *maps, const char *name)
 {
-       struct map **mapp;
-
-       if (maps__maps_by_name(maps) == NULL &&
-           map__groups__sort_by_name_from_rbtree(maps))
-               return NULL;
+       struct map *result = NULL;
+       bool done = false;
 
-       mapp = bsearch(name, maps__maps_by_name(maps), maps__nr_maps(maps),
-                      sizeof(*mapp), map__strcmp_name);
-       if (mapp)
-               return *mapp;
-       return NULL;
-}
+       /* See locking/sorting note. */
+       while (!done) {
+               unsigned int i;
 
-struct map *maps__find_by_name(struct maps *maps, const char *name)
-{
-       struct map_rb_node *rb_node;
-       struct map *map;
+               down_read(maps__lock(maps));
 
-       down_read(maps__lock(maps));
+               /* First check last found entry. */
+               i = RC_CHK_ACCESS(maps)->last_search_by_name_idx;
+               if (i < maps__nr_maps(maps) && maps__maps_by_name(maps)) {
+                       struct dso *dso = map__dso(maps__maps_by_name(maps)[i]);
 
+                       if (dso && strcmp(dso->short_name, name) == 0) {
+                               result = map__get(maps__maps_by_name(maps)[i]);
+                               done = true;
+                       }
+               }
 
-       if (RC_CHK_ACCESS(maps)->last_search_by_name) {
-               const struct dso *dso = map__dso(RC_CHK_ACCESS(maps)->last_search_by_name);
+               /* Second search sorted array. */
+               if (!done && maps__maps_by_name_sorted(maps)) {
+                       struct map **mapp =
+                               bsearch(name, maps__maps_by_name(maps), maps__nr_maps(maps),
+                                       sizeof(*mapp), map__strcmp_name);
 
-               if (strcmp(dso->short_name, name) == 0) {
-                       map = RC_CHK_ACCESS(maps)->last_search_by_name;
-                       goto out_unlock;
+                       if (mapp) {
+                               result = map__get(*mapp);
+                               i = mapp - maps__maps_by_name(maps);
+                               RC_CHK_ACCESS(maps)->last_search_by_name_idx = i;
+                       }
+                       done = true;
                }
-       }
-       /*
-        * If we have maps->maps_by_name, then the name isn't in the rbtree,
-        * as maps->maps_by_name mirrors the rbtree when lookups by name are
-        * made.
-        */
-       map = __maps__find_by_name(maps, name);
-       if (map || maps__maps_by_name(maps) != NULL)
-               goto out_unlock;
-
-       /* Fallback to traversing the rbtree... */
-       maps__for_each_entry(maps, rb_node) {
-               struct dso *dso;
-
-               map = rb_node->map;
-               dso = map__dso(map);
-               if (strcmp(dso->short_name, name) == 0) {
-                       RC_CHK_ACCESS(maps)->last_search_by_name = map;
-                       goto out_unlock;
+               up_read(maps__lock(maps));
+               if (!done) {
+                       /* Sort and retry binary search. */
+                       if (maps__sort_by_name(maps)) {
+                               /*
+                                * Memory allocation failed do linear search
+                                * through address sorted maps.
+                                */
+                               struct map **maps_by_address;
+                               unsigned int n;
+
+                               down_read(maps__lock(maps));
+                               maps_by_address =  maps__maps_by_address(maps);
+                               n = maps__nr_maps(maps);
+                               for (i = 0; i < n; i++) {
+                                       struct map *pos = maps_by_address[i];
+                                       struct dso *dso = map__dso(pos);
+
+                                       if (dso && strcmp(dso->short_name, name) == 0) {
+                                               result = map__get(pos);
+                                               break;
+                                       }
+                               }
+                               up_read(maps__lock(maps));
+                               done = true;
+                       }
                }
        }
-       map = NULL;
-
-out_unlock:
-       up_read(maps__lock(maps));
-       return map;
+       return result;
 }
 
 struct map *maps__find_next_entry(struct maps *maps, struct map *map)
 {
-       struct map_rb_node *rb_node = maps__find_node(maps, map);
-       struct map_rb_node *next = map_rb_node__next(rb_node);
+       unsigned int i;
+       struct map *result = NULL;
 
-       if (next)
-               return next->map;
+       down_read(maps__lock(maps));
+       i = maps__by_address_index(maps, map);
+       if (i < maps__nr_maps(maps))
+               result = map__get(maps__maps_by_address(maps)[i]);
 
-       return NULL;
+       up_read(maps__lock(maps));
+       return result;
 }
 
 void maps__fixup_end(struct maps *maps)
 {
-       struct map_rb_node *prev = NULL, *curr;
+       struct map **maps_by_address;
+       unsigned int n;
 
        down_write(maps__lock(maps));
+       if (!maps__maps_by_address_sorted(maps))
+               __maps__sort_by_address(maps);
 
-       maps__for_each_entry(maps, curr) {
-               if (prev && (!map__end(prev->map) || map__end(prev->map) > map__start(curr->map)))
-                       map__set_end(prev->map, map__start(curr->map));
+       maps_by_address = maps__maps_by_address(maps);
+       n = maps__nr_maps(maps);
+       for (unsigned int i = 1; i < n; i++) {
+               struct map *prev = maps_by_address[i - 1];
+               struct map *curr = maps_by_address[i];
 
-               prev = curr;
+               if (!map__end(prev) || map__end(prev) > map__start(curr))
+                       map__set_end(prev, map__start(curr));
        }
 
        /*
         * We still haven't the actual symbols, so guess the
         * last map final address.
         */
-       if (curr && !map__end(curr->map))
-               map__set_end(curr->map, ~0ULL);
+       if (n > 0 && !map__end(maps_by_address[n - 1]))
+               map__set_end(maps_by_address[n - 1], ~0ULL);
+
+       RC_CHK_ACCESS(maps)->ends_broken = false;
 
        up_write(maps__lock(maps));
 }
@@ -723,117 +1107,93 @@ void maps__fixup_end(struct maps *maps)
  */
 int maps__merge_in(struct maps *kmaps, struct map *new_map)
 {
-       struct map_rb_node *rb_node;
-       struct rb_node *first;
-       bool overlaps;
-       LIST_HEAD(merged);
-       int err = 0;
-
-       down_read(maps__lock(kmaps));
-       first = first_ending_after(kmaps, new_map);
-       rb_node = first ? rb_entry(first, struct map_rb_node, rb_node) : NULL;
-       overlaps = rb_node && map__start(rb_node->map) < map__end(new_map);
-       up_read(maps__lock(kmaps));
+       unsigned int first_after_, kmaps__nr_maps;
+       struct map **kmaps_maps_by_address;
+       struct map **merged_maps_by_address;
+       unsigned int merged_nr_maps_allocated;
+
+       /* First try under a read lock. */
+       while (true) {
+               down_read(maps__lock(kmaps));
+               if (maps__maps_by_address_sorted(kmaps))
+                       break;
 
-       if (!overlaps)
-               return maps__insert(kmaps, new_map);
+               up_read(maps__lock(kmaps));
 
-       maps__for_each_entry(kmaps, rb_node) {
-               struct map *old_map = rb_node->map;
+               /* First after binary search requires sorted maps. Sort and try again. */
+               maps__sort_by_address(kmaps);
+       }
+       first_after_ = first_ending_after(kmaps, new_map);
+       kmaps_maps_by_address = maps__maps_by_address(kmaps);
 
-               /* no overload with this one */
-               if (map__end(new_map) < map__start(old_map) ||
-                   map__start(new_map) >= map__end(old_map))
-                       continue;
+       if (first_after_ >= maps__nr_maps(kmaps) ||
+           map__start(kmaps_maps_by_address[first_after_]) >= map__end(new_map)) {
+               /* No overlap so regular insert suffices. */
+               up_read(maps__lock(kmaps));
+               return maps__insert(kmaps, new_map);
+       }
+       up_read(maps__lock(kmaps));
 
-               if (map__start(new_map) < map__start(old_map)) {
-                       /*
-                        * |new......
-                        *       |old....
-                        */
-                       if (map__end(new_map) < map__end(old_map)) {
-                               /*
-                                * |new......|     -> |new..|
-                                *       |old....| ->       |old....|
-                                */
-                               map__set_end(new_map, map__start(old_map));
-                       } else {
-                               /*
-                                * |new.............| -> |new..|       |new..|
-                                *       |old....|    ->       |old....|
-                                */
-                               struct map_list_node *m = map_list_node__new();
+       /* Plain insert with a read-lock failed, try again now with the write lock. */
+       down_write(maps__lock(kmaps));
+       if (!maps__maps_by_address_sorted(kmaps))
+               __maps__sort_by_address(kmaps);
+
+       first_after_ = first_ending_after(kmaps, new_map);
+       kmaps_maps_by_address = maps__maps_by_address(kmaps);
+       kmaps__nr_maps = maps__nr_maps(kmaps);
+
+       if (first_after_ >= kmaps__nr_maps ||
+           map__start(kmaps_maps_by_address[first_after_]) >= map__end(new_map)) {
+               /* No overlap so regular insert suffices. */
+               int ret = __maps__insert(kmaps, new_map);
+               up_write(maps__lock(kmaps));
+               return ret;
+       }
+       /* Array to merge into, possibly 1 more for the sake of new_map. */
+       merged_nr_maps_allocated = RC_CHK_ACCESS(kmaps)->nr_maps_allocated;
+       if (kmaps__nr_maps + 1 == merged_nr_maps_allocated)
+               merged_nr_maps_allocated++;
+
+       merged_maps_by_address = malloc(merged_nr_maps_allocated * sizeof(*merged_maps_by_address));
+       if (!merged_maps_by_address) {
+               up_write(maps__lock(kmaps));
+               return -ENOMEM;
+       }
+       maps__set_maps_by_address(kmaps, merged_maps_by_address);
+       maps__set_maps_by_address_sorted(kmaps, true);
+       zfree(maps__maps_by_name_addr(kmaps));
+       maps__set_maps_by_name_sorted(kmaps, true);
+       maps__set_nr_maps_allocated(kmaps, merged_nr_maps_allocated);
 
-                               if (!m) {
-                                       err = -ENOMEM;
-                                       goto out;
-                               }
+       /* Copy entries before the new_map that can't overlap. */
+       for (unsigned int i = 0; i < first_after_; i++)
+               merged_maps_by_address[i] = map__get(kmaps_maps_by_address[i]);
 
-                               m->map = map__clone(new_map);
-                               if (!m->map) {
-                                       free(m);
-                                       err = -ENOMEM;
-                                       goto out;
-                               }
+       maps__set_nr_maps(kmaps, first_after_);
 
-                               map__set_end(m->map, map__start(old_map));
-                               list_add_tail(&m->node, &merged);
-                               map__add_pgoff(new_map, map__end(old_map) - map__start(new_map));
-                               map__set_start(new_map, map__end(old_map));
-                       }
-               } else {
-                       /*
-                        *      |new......
-                        * |old....
-                        */
-                       if (map__end(new_map) < map__end(old_map)) {
-                               /*
-                                *      |new..|   -> x
-                                * |old.........| -> |old.........|
-                                */
-                               map__put(new_map);
-                               new_map = NULL;
-                               break;
-                       } else {
-                               /*
-                                *      |new......| ->         |new...|
-                                * |old....|        -> |old....|
-                                */
-                               map__add_pgoff(new_map, map__end(old_map) - map__start(new_map));
-                               map__set_start(new_map, map__end(old_map));
-                       }
-               }
-       }
+       /* Add the new map, it will be split when the later overlapping mappings are added. */
+       __maps__insert(kmaps, new_map);
 
-out:
-       while (!list_empty(&merged)) {
-               struct map_list_node *old_node;
+       /* Insert mappings after new_map, splitting new_map in the process. */
+       for (unsigned int i = first_after_; i < kmaps__nr_maps; i++)
+               __maps__fixup_overlap_and_insert(kmaps, kmaps_maps_by_address[i]);
 
-               old_node = list_entry(merged.next, struct map_list_node, node);
-               list_del_init(&old_node->node);
-               if (!err)
-                       err = maps__insert(kmaps, old_node->map);
-               map__put(old_node->map);
-               free(old_node);
-       }
+       /* Copy the maps from merged into kmaps. */
+       for (unsigned int i = 0; i < kmaps__nr_maps; i++)
+               map__zput(kmaps_maps_by_address[i]);
 
-       if (new_map) {
-               if (!err)
-                       err = maps__insert(kmaps, new_map);
-               map__put(new_map);
-       }
-       return err;
+       free(kmaps_maps_by_address);
+       up_write(maps__lock(kmaps));
+       return 0;
 }
 
 void maps__load_first(struct maps *maps)
 {
-       struct map_rb_node *first;
-
        down_read(maps__lock(maps));
 
-       first = maps__first(maps);
-       if (first)
-               map__load(first->map);
+       if (maps__nr_maps(maps) > 0)
+               map__load(maps__maps_by_address(maps)[0]);
 
        up_read(maps__lock(maps));
 }
index d836d04c940229a70a30561ade8dc40edb02b4f0..d9aa62ed968ac43d16b67279b512c9393c8b219b 100644 (file)
@@ -3,45 +3,15 @@
 #define __PERF_MAPS_H
 
 #include <linux/refcount.h>
-#include <linux/rbtree.h>
 #include <stdio.h>
 #include <stdbool.h>
 #include <linux/types.h>
-#include "rwsem.h"
-#include <internal/rc_check.h>
 
 struct ref_reloc_sym;
 struct machine;
 struct map;
 struct maps;
 
-struct map_list_node {
-       struct list_head node;
-       struct map *map;
-};
-
-static inline struct map_list_node *map_list_node__new(void)
-{
-       return malloc(sizeof(struct map_list_node));
-}
-
-struct map *maps__find(struct maps *maps, u64 addr);
-
-DECLARE_RC_STRUCT(maps) {
-       struct rb_root      entries;
-       struct rw_semaphore lock;
-       struct machine   *machine;
-       struct map       *last_search_by_name;
-       struct map       **maps_by_name;
-       refcount_t       refcnt;
-       unsigned int     nr_maps;
-       unsigned int     nr_maps_allocated;
-#ifdef HAVE_LIBUNWIND_SUPPORT
-       void                            *addr_space;
-       const struct unwind_libunwind_ops *unwind_libunwind_ops;
-#endif
-};
-
 #define KMAP_NAME_LEN 256
 
 struct kmap {
@@ -65,36 +35,22 @@ static inline void __maps__zput(struct maps **map)
 
 #define maps__zput(map) __maps__zput(&map)
 
+bool maps__equal(struct maps *a, struct maps *b);
+
 /* Iterate over map calling cb for each entry. */
 int maps__for_each_map(struct maps *maps, int (*cb)(struct map *map, void *data), void *data);
 /* Iterate over map removing an entry if cb returns true. */
 void maps__remove_maps(struct maps *maps, bool (*cb)(struct map *map, void *data), void *data);
 
-static inline struct machine *maps__machine(struct maps *maps)
-{
-       return RC_CHK_ACCESS(maps)->machine;
-}
-
-static inline unsigned int maps__nr_maps(const struct maps *maps)
-{
-       return RC_CHK_ACCESS(maps)->nr_maps;
-}
-
-static inline refcount_t *maps__refcnt(struct maps *maps)
-{
-       return &RC_CHK_ACCESS(maps)->refcnt;
-}
+struct machine *maps__machine(const struct maps *maps);
+unsigned int maps__nr_maps(const struct maps *maps); /* Test only. */
+refcount_t *maps__refcnt(struct maps *maps); /* Test only. */
 
 #ifdef HAVE_LIBUNWIND_SUPPORT
-static inline void *maps__addr_space(struct maps *maps)
-{
-       return RC_CHK_ACCESS(maps)->addr_space;
-}
-
-static inline const struct unwind_libunwind_ops *maps__unwind_libunwind_ops(const struct maps *maps)
-{
-       return RC_CHK_ACCESS(maps)->unwind_libunwind_ops;
-}
+void *maps__addr_space(const struct maps *maps);
+void maps__set_addr_space(struct maps *maps, void *addr_space);
+const struct unwind_libunwind_ops *maps__unwind_libunwind_ops(const struct maps *maps);
+void maps__set_unwind_libunwind_ops(struct maps *maps, const struct unwind_libunwind_ops *ops);
 #endif
 
 size_t maps__fprintf(struct maps *maps, FILE *fp);
@@ -102,6 +58,7 @@ size_t maps__fprintf(struct maps *maps, FILE *fp);
 int maps__insert(struct maps *maps, struct map *map);
 void maps__remove(struct maps *maps, struct map *map);
 
+struct map *maps__find(struct maps *maps, u64 addr);
 struct symbol *maps__find_symbol(struct maps *maps, u64 addr, struct map **mapp);
 struct symbol *maps__find_symbol_by_name(struct maps *maps, const char *name, struct map **mapp);
 
@@ -117,8 +74,6 @@ struct map *maps__find_next_entry(struct maps *maps, struct map *map);
 
 int maps__merge_in(struct maps *kmaps, struct map *new_map);
 
-void __maps__sort_by_name(struct maps *maps);
-
 void maps__fixup_end(struct maps *maps);
 
 void maps__load_first(struct maps *maps);
index 3a2e3687878c1862c64d0f723496a76ceb2f8229..637cbd4a7bfb45836513bd257c56ffcaab3540f1 100644 (file)
 
 unsigned int perf_mem_events__loads_ldlat = 30;
 
-#define E(t, n, s) { .tag = t, .name = n, .sysfs_name = s }
+#define E(t, n, s, l, a) { .tag = t, .name = n, .event_name = s, .ldlat = l, .aux_event = a }
 
-static struct perf_mem_event perf_mem_events[PERF_MEM_EVENTS__MAX] = {
-       E("ldlat-loads",        "cpu/mem-loads,ldlat=%u/P",     "cpu/events/mem-loads"),
-       E("ldlat-stores",       "cpu/mem-stores/P",             "cpu/events/mem-stores"),
-       E(NULL,                 NULL,                           NULL),
+struct perf_mem_event perf_mem_events[PERF_MEM_EVENTS__MAX] = {
+       E("ldlat-loads",        "%s/mem-loads,ldlat=%u/P",      "mem-loads",    true,   0),
+       E("ldlat-stores",       "%s/mem-stores/P",              "mem-stores",   false,  0),
+       E(NULL,                 NULL,                           NULL,           false,  0),
 };
 #undef E
 
 static char mem_loads_name[100];
-static bool mem_loads_name__init;
+static char mem_stores_name[100];
 
-struct perf_mem_event * __weak perf_mem_events__ptr(int i)
+struct perf_mem_event *perf_pmu__mem_events_ptr(struct perf_pmu *pmu, int i)
 {
-       if (i >= PERF_MEM_EVENTS__MAX)
+       if (i >= PERF_MEM_EVENTS__MAX || !pmu)
                return NULL;
 
-       return &perf_mem_events[i];
+       return &pmu->mem_events[i];
 }
 
-const char * __weak perf_mem_events__name(int i, const char *pmu_name  __maybe_unused)
+static struct perf_pmu *perf_pmus__scan_mem(struct perf_pmu *pmu)
 {
-       struct perf_mem_event *e = perf_mem_events__ptr(i);
+       while ((pmu = perf_pmus__scan(pmu)) != NULL) {
+               if (pmu->mem_events)
+                       return pmu;
+       }
+       return NULL;
+}
+
+struct perf_pmu *perf_mem_events_find_pmu(void)
+{
+       /*
+        * The current perf mem doesn't support per-PMU configuration.
+        * The exact same configuration is applied to all the
+        * mem_events supported PMUs.
+        * Return the first mem_events supported PMU.
+        *
+        * Notes: The only case which may support multiple mem_events
+        * supported PMUs is Intel hybrid. The exact same mem_events
+        * is shared among the PMUs. Only configure the first PMU
+        * is good enough as well.
+        */
+       return perf_pmus__scan_mem(NULL);
+}
+
+/**
+ * perf_pmu__mem_events_num_mem_pmus - Get the number of mem PMUs since the given pmu
+ * @pmu: Start pmu. If it's NULL, search the entire PMU list.
+ */
+int perf_pmu__mem_events_num_mem_pmus(struct perf_pmu *pmu)
+{
+       int num = 0;
+
+       while ((pmu = perf_pmus__scan_mem(pmu)) != NULL)
+               num++;
+
+       return num;
+}
 
+static const char *perf_pmu__mem_events_name(int i, struct perf_pmu *pmu)
+{
+       struct perf_mem_event *e;
+
+       if (i >= PERF_MEM_EVENTS__MAX || !pmu)
+               return NULL;
+
+       e = &pmu->mem_events[i];
        if (!e)
                return NULL;
 
-       if (i == PERF_MEM_EVENTS__LOAD) {
-               if (!mem_loads_name__init) {
-                       mem_loads_name__init = true;
-                       scnprintf(mem_loads_name, sizeof(mem_loads_name),
-                                 e->name, perf_mem_events__loads_ldlat);
+       if (i == PERF_MEM_EVENTS__LOAD || i == PERF_MEM_EVENTS__LOAD_STORE) {
+               if (e->ldlat) {
+                       if (!e->aux_event) {
+                               /* ARM and Most of Intel */
+                               scnprintf(mem_loads_name, sizeof(mem_loads_name),
+                                         e->name, pmu->name,
+                                         perf_mem_events__loads_ldlat);
+                       } else {
+                               /* Intel with mem-loads-aux event */
+                               scnprintf(mem_loads_name, sizeof(mem_loads_name),
+                                         e->name, pmu->name, pmu->name,
+                                         perf_mem_events__loads_ldlat);
+                       }
+               } else {
+                       if (!e->aux_event) {
+                               /* AMD and POWER */
+                               scnprintf(mem_loads_name, sizeof(mem_loads_name),
+                                         e->name, pmu->name);
+                       } else
+                               return NULL;
                }
+
                return mem_loads_name;
        }
 
-       return e->name;
+       if (i == PERF_MEM_EVENTS__STORE) {
+               scnprintf(mem_stores_name, sizeof(mem_stores_name),
+                         e->name, pmu->name);
+               return mem_stores_name;
+       }
+
+       return NULL;
 }
 
-__weak bool is_mem_loads_aux_event(struct evsel *leader __maybe_unused)
+bool is_mem_loads_aux_event(struct evsel *leader)
 {
-       return false;
+       struct perf_pmu *pmu = leader->pmu;
+       struct perf_mem_event *e;
+
+       if (!pmu || !pmu->mem_events)
+               return false;
+
+       e = &pmu->mem_events[PERF_MEM_EVENTS__LOAD];
+       if (!e->aux_event)
+               return false;
+
+       return leader->core.attr.config == e->aux_event;
 }
 
-int perf_mem_events__parse(const char *str)
+int perf_pmu__mem_events_parse(struct perf_pmu *pmu, const char *str)
 {
        char *tok, *saveptr = NULL;
        bool found = false;
@@ -79,7 +154,7 @@ int perf_mem_events__parse(const char *str)
 
        while (tok) {
                for (j = 0; j < PERF_MEM_EVENTS__MAX; j++) {
-                       struct perf_mem_event *e = perf_mem_events__ptr(j);
+                       struct perf_mem_event *e = perf_pmu__mem_events_ptr(pmu, j);
 
                        if (!e->tag)
                                continue;
@@ -100,19 +175,21 @@ int perf_mem_events__parse(const char *str)
        return -1;
 }
 
-static bool perf_mem_event__supported(const char *mnt, struct perf_pmu *pmu,
+static bool perf_pmu__mem_events_supported(const char *mnt, struct perf_pmu *pmu,
                                      struct perf_mem_event *e)
 {
-       char sysfs_name[100];
        char path[PATH_MAX];
        struct stat st;
 
-       scnprintf(sysfs_name, sizeof(sysfs_name), e->sysfs_name, pmu->name);
-       scnprintf(path, PATH_MAX, "%s/devices/%s", mnt, sysfs_name);
+       if (!e->event_name)
+               return true;
+
+       scnprintf(path, PATH_MAX, "%s/devices/%s/events/%s", mnt, pmu->name, e->event_name);
+
        return !stat(path, &st);
 }
 
-int perf_mem_events__init(void)
+int perf_pmu__mem_events_init(struct perf_pmu *pmu)
 {
        const char *mnt = sysfs__mount();
        bool found = false;
@@ -122,8 +199,7 @@ int perf_mem_events__init(void)
                return -ENOENT;
 
        for (j = 0; j < PERF_MEM_EVENTS__MAX; j++) {
-               struct perf_mem_event *e = perf_mem_events__ptr(j);
-               struct perf_pmu *pmu = NULL;
+               struct perf_mem_event *e = perf_pmu__mem_events_ptr(pmu, j);
 
                /*
                 * If the event entry isn't valid, skip initialization
@@ -132,103 +208,66 @@ int perf_mem_events__init(void)
                if (!e->tag)
                        continue;
 
-               /*
-                * Scan all PMUs not just core ones, since perf mem/c2c on
-                * platforms like AMD uses IBS OP PMU which is independent
-                * of core PMU.
-                */
-               while ((pmu = perf_pmus__scan(pmu)) != NULL) {
-                       e->supported |= perf_mem_event__supported(mnt, pmu, e);
-                       if (e->supported) {
-                               found = true;
-                               break;
-                       }
-               }
+               e->supported |= perf_pmu__mem_events_supported(mnt, pmu, e);
+               if (e->supported)
+                       found = true;
        }
 
        return found ? 0 : -ENOENT;
 }
 
-void perf_mem_events__list(void)
+void perf_pmu__mem_events_list(struct perf_pmu *pmu)
 {
        int j;
 
        for (j = 0; j < PERF_MEM_EVENTS__MAX; j++) {
-               struct perf_mem_event *e = perf_mem_events__ptr(j);
+               struct perf_mem_event *e = perf_pmu__mem_events_ptr(pmu, j);
 
                fprintf(stderr, "%-*s%-*s%s",
                        e->tag ? 13 : 0,
                        e->tag ? : "",
                        e->tag && verbose > 0 ? 25 : 0,
-                       e->tag && verbose > 0 ? perf_mem_events__name(j, NULL) : "",
+                       e->tag && verbose > 0 ? perf_pmu__mem_events_name(j, pmu) : "",
                        e->supported ? ": available\n" : "");
        }
 }
 
-static void perf_mem_events__print_unsupport_hybrid(struct perf_mem_event *e,
-                                                   int idx)
+int perf_mem_events__record_args(const char **rec_argv, int *argv_nr)
 {
        const char *mnt = sysfs__mount();
        struct perf_pmu *pmu = NULL;
-
-       while ((pmu = perf_pmus__scan(pmu)) != NULL) {
-               if (!perf_mem_event__supported(mnt, pmu, e)) {
-                       pr_err("failed: event '%s' not supported\n",
-                              perf_mem_events__name(idx, pmu->name));
-               }
-       }
-}
-
-int perf_mem_events__record_args(const char **rec_argv, int *argv_nr,
-                                char **rec_tmp, int *tmp_nr)
-{
-       const char *mnt = sysfs__mount();
-       int i = *argv_nr, k = 0;
        struct perf_mem_event *e;
+       int i = *argv_nr;
+       const char *s;
+       char *copy;
 
-       for (int j = 0; j < PERF_MEM_EVENTS__MAX; j++) {
-               e = perf_mem_events__ptr(j);
-               if (!e->record)
-                       continue;
+       while ((pmu = perf_pmus__scan_mem(pmu)) != NULL) {
+               for (int j = 0; j < PERF_MEM_EVENTS__MAX; j++) {
+                       e = perf_pmu__mem_events_ptr(pmu, j);
+
+                       if (!e->record)
+                               continue;
 
-               if (perf_pmus__num_mem_pmus() == 1) {
                        if (!e->supported) {
                                pr_err("failed: event '%s' not supported\n",
-                                      perf_mem_events__name(j, NULL));
+                                       perf_pmu__mem_events_name(j, pmu));
                                return -1;
                        }
 
-                       rec_argv[i++] = "-e";
-                       rec_argv[i++] = perf_mem_events__name(j, NULL);
-               } else {
-                       struct perf_pmu *pmu = NULL;
+                       s = perf_pmu__mem_events_name(j, pmu);
+                       if (!s || !perf_pmu__mem_events_supported(mnt, pmu, e))
+                               continue;
 
-                       if (!e->supported) {
-                               perf_mem_events__print_unsupport_hybrid(e, j);
+                       copy = strdup(s);
+                       if (!copy)
                                return -1;
-                       }
-
-                       while ((pmu = perf_pmus__scan(pmu)) != NULL) {
-                               const char *s = perf_mem_events__name(j, pmu->name);
-
-                               if (!perf_mem_event__supported(mnt, pmu, e))
-                                       continue;
 
-                               rec_argv[i++] = "-e";
-                               if (s) {
-                                       char *copy = strdup(s);
-                                       if (!copy)
-                                               return -1;
-
-                                       rec_argv[i++] = copy;
-                                       rec_tmp[k++] = copy;
-                               }
-                       }
+                       rec_argv[i++] = "-e";
+                       rec_argv[i++] = copy;
                }
        }
 
        *argv_nr = i;
-       *tmp_nr = k;
        return 0;
 }
 
index b40ad6ea93fcdb70d0111b20abb8cbab475ec120..15d5f0320d2779027c55fbe5eb2d0bf96b93744e 100644 (file)
 struct perf_mem_event {
        bool            record;
        bool            supported;
+       bool            ldlat;
+       u32             aux_event;
        const char      *tag;
        const char      *name;
-       const char      *sysfs_name;
+       const char      *event_name;
 };
 
 struct mem_info {
@@ -34,17 +36,18 @@ enum {
 };
 
 extern unsigned int perf_mem_events__loads_ldlat;
+extern struct perf_mem_event perf_mem_events[PERF_MEM_EVENTS__MAX];
 
-int perf_mem_events__parse(const char *str);
-int perf_mem_events__init(void);
+int perf_pmu__mem_events_parse(struct perf_pmu *pmu, const char *str);
+int perf_pmu__mem_events_init(struct perf_pmu *pmu);
 
-const char *perf_mem_events__name(int i, const char *pmu_name);
-struct perf_mem_event *perf_mem_events__ptr(int i);
+struct perf_mem_event *perf_pmu__mem_events_ptr(struct perf_pmu *pmu, int i);
+struct perf_pmu *perf_mem_events_find_pmu(void);
+int perf_pmu__mem_events_num_mem_pmus(struct perf_pmu *pmu);
 bool is_mem_loads_aux_event(struct evsel *leader);
 
-void perf_mem_events__list(void);
-int perf_mem_events__record_args(const char **rec_argv, int *argv_nr,
-                                char **rec_tmp, int *tmp_nr);
+void perf_pmu__mem_events_list(struct perf_pmu *pmu);
+int perf_mem_events__record_args(const char **rec_argv, int *argv_nr);
 
 int perf_mem__tlb_scnprintf(char *out, size_t sz, struct mem_info *mem_info);
 int perf_mem__lvl_scnprintf(char *out, size_t sz, struct mem_info *mem_info);
index 966cca5a3e88cd94b78c27ce8429f820f1fa86b9..79ef6095ab28919c564e0d6000a4f503129e8305 100644 (file)
@@ -44,6 +44,8 @@ struct metric_event *metricgroup__lookup(struct rblist *metric_events,
        if (!metric_events)
                return NULL;
 
+       if (evsel && evsel->metric_leader)
+               me.evsel = evsel->metric_leader;
        nd = rblist__find(metric_events, &me);
        if (nd)
                return container_of(nd, struct metric_event, nd);
@@ -350,25 +352,23 @@ static int setup_metric_events(const char *pmu, struct hashmap *ids,
        return 0;
 }
 
-static bool match_metric(const char *n, const char *list)
+static bool match_metric(const char *metric_or_groups, const char *sought)
 {
        int len;
        char *m;
 
-       if (!list)
+       if (!sought)
                return false;
-       if (!strcmp(list, "all"))
+       if (!strcmp(sought, "all"))
                return true;
-       if (!n)
-               return !strcasecmp(list, "No_group");
-       len = strlen(list);
-       m = strcasestr(n, list);
-       if (!m)
-               return false;
-       if ((m == n || m[-1] == ';' || m[-1] == ' ') &&
-           (m[len] == 0 || m[len] == ';'))
+       if (!metric_or_groups)
+               return !strcasecmp(sought, "No_group");
+       len = strlen(sought);
+       if (!strncasecmp(metric_or_groups, sought, len) &&
+           (metric_or_groups[len] == 0 || metric_or_groups[len] == ';'))
                return true;
-       return false;
+       m = strchr(metric_or_groups, ';');
+       return m && match_metric(m + 1, sought);
 }
 
 static bool match_pm_metric(const struct pmu_metric *pm, const char *pmu, const char *metric)
index 66eabcea424274580abe69fd5226b16931a67dec..6f8b0fa176891f6042871984c07e292b77a656a0 100644 (file)
@@ -2181,50 +2181,53 @@ int parse_event(struct evlist *evlist, const char *str)
        return ret;
 }
 
+struct parse_events_error_entry {
+       /** @list: The list the error is part of. */
+       struct list_head list;
+       /** @idx: index in the parsed string */
+       int   idx;
+       /** @str: string to display at the index */
+       char *str;
+       /** @help: optional help string */
+       char *help;
+};
+
 void parse_events_error__init(struct parse_events_error *err)
 {
-       bzero(err, sizeof(*err));
+       INIT_LIST_HEAD(&err->list);
 }
 
 void parse_events_error__exit(struct parse_events_error *err)
 {
-       zfree(&err->str);
-       zfree(&err->help);
-       zfree(&err->first_str);
-       zfree(&err->first_help);
+       struct parse_events_error_entry *pos, *tmp;
+
+       list_for_each_entry_safe(pos, tmp, &err->list, list) {
+               zfree(&pos->str);
+               zfree(&pos->help);
+               list_del_init(&pos->list);
+               free(pos);
+       }
 }
 
 void parse_events_error__handle(struct parse_events_error *err, int idx,
                                char *str, char *help)
 {
+       struct parse_events_error_entry *entry;
+
        if (WARN(!str || !err, "WARNING: failed to provide error string or struct\n"))
                goto out_free;
-       switch (err->num_errors) {
-       case 0:
-               err->idx = idx;
-               err->str = str;
-               err->help = help;
-               break;
-       case 1:
-               err->first_idx = err->idx;
-               err->idx = idx;
-               err->first_str = err->str;
-               err->str = str;
-               err->first_help = err->help;
-               err->help = help;
-               break;
-       default:
-               pr_debug("Multiple errors dropping message: %s (%s)\n",
-                       err->str, err->help ?: "<no help>");
-               free(err->str);
-               err->str = str;
-               free(err->help);
-               err->help = help;
-               break;
+
+       entry = zalloc(sizeof(*entry));
+       if (!entry) {
+               pr_err("Failed to allocate memory for event parsing error: %s (%s)\n",
+                       str, help ?: "<no help>");
+               goto out_free;
        }
-       err->num_errors++;
+       entry->idx = idx;
+       entry->str = str;
+       entry->help = help;
+       list_add(&entry->list, &err->list);
        return;
-
 out_free:
        free(str);
        free(help);
@@ -2294,19 +2297,34 @@ static void __parse_events_error__print(int err_idx, const char *err_str,
        }
 }
 
-void parse_events_error__print(struct parse_events_error *err,
+void parse_events_error__print(const struct parse_events_error *err,
                               const char *event)
 {
-       if (!err->num_errors)
-               return;
+       struct parse_events_error_entry *pos;
+       bool first = true;
+
+       list_for_each_entry(pos, &err->list, list) {
+               if (!first)
+                       fputs("\n", stderr);
+               __parse_events_error__print(pos->idx, pos->str, pos->help, event);
+               first = false;
+       }
+}
 
-       __parse_events_error__print(err->idx, err->str, err->help, event);
+/*
+ * In the list of errors err, do any of the error strings (str) contain the
+ * given needle string?
+ */
+bool parse_events_error__contains(const struct parse_events_error *err,
+                                 const char *needle)
+{
+       struct parse_events_error_entry *pos;
 
-       if (err->num_errors > 1) {
-               fputs("\nInitial error:\n", stderr);
-               __parse_events_error__print(err->first_idx, err->first_str,
-                                       err->first_help, event);
+       list_for_each_entry(pos, &err->list, list) {
+               if (strstr(pos->str, needle) != NULL)
+                       return true;
        }
+       return false;
 }
 
 #undef MAX_WIDTH
index 63c0a36a4bf11b2662a128f7d5592143e9eac76b..809359e8544ef35a27459570a089156cb1d7c803 100644 (file)
@@ -130,13 +130,8 @@ struct parse_events_term {
 };
 
 struct parse_events_error {
-       int   num_errors;       /* number of errors encountered */
-       int   idx;      /* index in the parsed string */
-       char *str;      /* string to display at the index */
-       char *help;     /* optional help string */
-       int   first_idx;/* as above, but for the first encountered error */
-       char *first_str;
-       char *first_help;
+       /** @list: The head of a list of errors. */
+       struct list_head list;
 };
 
 /* A wrapper around a list of terms for the sake of better type safety. */
@@ -247,9 +242,10 @@ void parse_events_error__init(struct parse_events_error *err);
 void parse_events_error__exit(struct parse_events_error *err);
 void parse_events_error__handle(struct parse_events_error *err, int idx,
                                char *str, char *help);
-void parse_events_error__print(struct parse_events_error *err,
+void parse_events_error__print(const struct parse_events_error *err,
                               const char *event);
-
+bool parse_events_error__contains(const struct parse_events_error *err,
+                                 const char *needle);
 #ifdef HAVE_LIBELF_SUPPORT
 /*
  * If the probe point starts with '%',
index de098caf0c1c2ec8b65dc927f11e14e7817e4f23..d70f5d84af92d197d7f82e7f868dedff369e251c 100644 (file)
@@ -536,8 +536,6 @@ tracepoint_name opt_event_config
        list = alloc_list();
        if (!list)
                YYNOMEM;
-       if (error)
-               error->idx = @1.first_column;
 
        err = parse_events_add_tracepoint(list, &parse_state->idx, $1.sys, $1.event,
                                        error, $2, &@1);
index a4a100425b3a29bcb11765e833d9fb9e2d66da38..cda1c620968e514d1c9f9a889370fef5b22c00db 100644 (file)
@@ -46,22 +46,18 @@ __parse_regs(const struct option *opt, const char *str, int unset, bool intr)
 
                        if (!strcmp(s, "?")) {
                                fprintf(stderr, "available registers: ");
-#ifdef HAVE_PERF_REGS_SUPPORT
-                               for (r = sample_reg_masks; r->name; r++) {
+                               for (r = arch__sample_reg_masks(); r->name; r++) {
                                        if (r->mask & mask)
                                                fprintf(stderr, "%s ", r->name);
                                }
-#endif
                                fputc('\n', stderr);
                                /* just printing available regs */
                                goto error;
                        }
-#ifdef HAVE_PERF_REGS_SUPPORT
-                       for (r = sample_reg_masks; r->name; r++) {
+                       for (r = arch__sample_reg_masks(); r->name; r++) {
                                if ((r->mask & mask) && !strcasecmp(s, r->name))
                                        break;
                        }
-#endif
                        if (!r || !r->name) {
                                ui__warning("Unknown register \"%s\", check man page or run \"perf record %s?\"\n",
                                            s, intr ? "-I" : "--user-regs=");
index 696566c54768775f8dafd75ab1692b1ad0242ca7..9dcda80d310f014b4809d3c358c333789b8ca34e 100644 (file)
@@ -1,7 +1,5 @@
 // SPDX-License-Identifier: GPL-2.0
 
-#ifdef HAVE_PERF_REGS_SUPPORT
-
 #include "../perf_regs.h"
 #include "../../../arch/arm64/include/uapi/asm/perf_regs.h"
 
@@ -92,5 +90,3 @@ uint64_t __perf_reg_sp_arm64(void)
 {
        return PERF_REG_ARM64_SP;
 }
-
-#endif
index 700fd07cd2aa32097960e369053938a70f11367f..e29d130a587aa700806c5afae523c3aba5f80190 100644 (file)
@@ -1,7 +1,5 @@
 // SPDX-License-Identifier: GPL-2.0
 
-#ifdef HAVE_PERF_REGS_SUPPORT
-
 #include "../perf_regs.h"
 #include "../../../arch/arm/include/uapi/asm/perf_regs.h"
 
@@ -56,5 +54,3 @@ uint64_t __perf_reg_sp_arm(void)
 {
        return PERF_REG_ARM_SP;
 }
-
-#endif
index a2841094e096e75af4c42e22d81bdc2b08ccb0a7..75b461ef2eba83163cf55bbd3552531699b61294 100644 (file)
@@ -1,7 +1,5 @@
 // SPDX-License-Identifier: GPL-2.0
 
-#ifdef HAVE_PERF_REGS_SUPPORT
-
 #include "../perf_regs.h"
 #include "../../arch/csky/include/uapi/asm/perf_regs.h"
 
@@ -96,5 +94,3 @@ uint64_t __perf_reg_sp_csky(void)
 {
        return PERF_REG_CSKY_SP;
 }
-
-#endif
index a9ba0f934123db0ebc7fdec70735984afbf6a537..043f97f4e3ac4a74ca3940a849a327308375ee37 100644 (file)
@@ -1,7 +1,5 @@
 // SPDX-License-Identifier: GPL-2.0
 
-#ifdef HAVE_PERF_REGS_SUPPORT
-
 #include "../perf_regs.h"
 #include "../../../arch/loongarch/include/uapi/asm/perf_regs.h"
 
@@ -87,5 +85,3 @@ uint64_t __perf_reg_sp_loongarch(void)
 {
        return PERF_REG_LOONGARCH_R3;
 }
-
-#endif
index 5a45830cfbf58bf0dccd24a1b3650f4713905a6d..793178fc3c787f83bd3434857350e14846487da2 100644 (file)
@@ -1,7 +1,5 @@
 // SPDX-License-Identifier: GPL-2.0
 
-#ifdef HAVE_PERF_REGS_SUPPORT
-
 #include "../perf_regs.h"
 #include "../../../arch/mips/include/uapi/asm/perf_regs.h"
 
@@ -83,5 +81,3 @@ uint64_t __perf_reg_sp_mips(void)
 {
        return PERF_REG_MIPS_R29;
 }
-
-#endif
index 1f0d682db74a76396ab0469ed8014b891aafbecb..08636bb09a3a67e46882f26c3d14a3d12b542ec1 100644 (file)
@@ -1,7 +1,5 @@
 // SPDX-License-Identifier: GPL-2.0
 
-#ifdef HAVE_PERF_REGS_SUPPORT
-
 #include "../perf_regs.h"
 #include "../../../arch/powerpc/include/uapi/asm/perf_regs.h"
 
@@ -141,5 +139,3 @@ uint64_t __perf_reg_sp_powerpc(void)
 {
        return PERF_REG_POWERPC_R1;
 }
-
-#endif
index e432630be4c56e625ca2e26db5b01b52d7ec742f..337b687c655d04061758308988dd6450a78b9fe0 100644 (file)
@@ -1,7 +1,5 @@
 // SPDX-License-Identifier: GPL-2.0
 
-#ifdef HAVE_PERF_REGS_SUPPORT
-
 #include "../perf_regs.h"
 #include "../../../arch/riscv/include/uapi/asm/perf_regs.h"
 
@@ -88,5 +86,3 @@ uint64_t __perf_reg_sp_riscv(void)
 {
        return PERF_REG_RISCV_SP;
 }
-
-#endif
index 1c7a46db778c88fd6f753257d8298fe6609658f4..d69bba88108000c28dbe97022fdc4b81784bf24b 100644 (file)
@@ -1,7 +1,5 @@
 // SPDX-License-Identifier: GPL-2.0
 
-#ifdef HAVE_PERF_REGS_SUPPORT
-
 #include "../perf_regs.h"
 #include "../../../arch/s390/include/uapi/asm/perf_regs.h"
 
@@ -92,5 +90,3 @@ uint64_t __perf_reg_sp_s390(void)
 {
        return PERF_REG_S390_R15;
 }
-
-#endif
index 873c620f063426c5bb2a877533ba9bd24f14f9af..708954a9d35d7bfcf842dbd169e05a075e279ebd 100644 (file)
@@ -1,7 +1,5 @@
 // SPDX-License-Identifier: GPL-2.0
 
-#ifdef HAVE_PERF_REGS_SUPPORT
-
 #include "../perf_regs.h"
 #include "../../../arch/x86/include/uapi/asm/perf_regs.h"
 
@@ -94,5 +92,3 @@ uint64_t __perf_reg_sp_x86(void)
 {
        return PERF_REG_X86_SP;
 }
-
-#endif
index e2275856b5704e46e57327dcd6d161c9a8d6c373..44b90bbf2d077003e9a47496a1ade15498eeabd6 100644 (file)
@@ -21,7 +21,14 @@ uint64_t __weak arch__user_reg_mask(void)
        return 0;
 }
 
-#ifdef HAVE_PERF_REGS_SUPPORT
+static const struct sample_reg sample_reg_masks[] = {
+       SMPL_REG_END
+};
+
+const struct sample_reg * __weak arch__sample_reg_masks(void)
+{
+       return sample_reg_masks;
+}
 
 const char *perf_reg_name(int id, const char *arch)
 {
@@ -125,5 +132,3 @@ uint64_t perf_arch_reg_sp(const char *arch)
        pr_err("Fail to find SP register for arch %s, returns 0\n", arch);
        return 0;
 }
-
-#endif
index ecd2a5362042ae63059c29ce248c5a52eb120c40..f2d0736d65cc64e1effc3c2bafb520a60a8f655f 100644 (file)
@@ -26,9 +26,7 @@ enum {
 int arch_sdt_arg_parse_op(char *old_op, char **new_op);
 uint64_t arch__intr_reg_mask(void);
 uint64_t arch__user_reg_mask(void);
-
-#ifdef HAVE_PERF_REGS_SUPPORT
-extern const struct sample_reg sample_reg_masks[];
+const struct sample_reg *arch__sample_reg_masks(void);
 
 const char *perf_reg_name(int id, const char *arch);
 int perf_reg_value(u64 *valp, struct regs_dump *regs, int id);
@@ -67,34 +65,4 @@ static inline uint64_t DWARF_MINIMAL_REGS(const char *arch)
        return (1ULL << perf_arch_reg_ip(arch)) | (1ULL << perf_arch_reg_sp(arch));
 }
 
-#else
-
-static inline uint64_t DWARF_MINIMAL_REGS(const char *arch __maybe_unused)
-{
-       return 0;
-}
-
-static inline const char *perf_reg_name(int id __maybe_unused, const char *arch __maybe_unused)
-{
-       return "unknown";
-}
-
-static inline int perf_reg_value(u64 *valp __maybe_unused,
-                                struct regs_dump *regs __maybe_unused,
-                                int id __maybe_unused)
-{
-       return 0;
-}
-
-static inline uint64_t perf_arch_reg_ip(const char *arch __maybe_unused)
-{
-       return 0;
-}
-
-static inline uint64_t perf_arch_reg_sp(const char *arch __maybe_unused)
-{
-       return 0;
-}
-
-#endif /* HAVE_PERF_REGS_SUPPORT */
 #endif /* __PERF_REGS_H */
index 3c9609944a2f312e7cac681f8a19dd037d6eb01e..f39cbbc1a7ec1dcecd9052f917400a3596939363 100644 (file)
@@ -657,7 +657,7 @@ static int pmu_aliases_parse(struct perf_pmu *pmu)
        return 0;
 }
 
-static int pmu_alias_terms(struct perf_pmu_alias *alias, struct list_head *terms)
+static int pmu_alias_terms(struct perf_pmu_alias *alias, int err_loc, struct list_head *terms)
 {
        struct parse_events_term *term, *cloned;
        struct parse_events_terms clone_terms;
@@ -675,6 +675,7 @@ static int pmu_alias_terms(struct perf_pmu_alias *alias, struct list_head *terms
                 * which we don't want for implicit terms in aliases.
                 */
                cloned->weak = true;
+               cloned->err_term = cloned->err_val = err_loc;
                list_add_tail(&cloned->list, &clone_terms.terms);
        }
        list_splice_init(&clone_terms.terms, terms);
@@ -986,8 +987,10 @@ static int pmu_max_precise(int dirfd, struct perf_pmu *pmu)
 }
 
 void __weak
-perf_pmu__arch_init(struct perf_pmu *pmu __maybe_unused)
+perf_pmu__arch_init(struct perf_pmu *pmu)
 {
+       if (pmu->is_core)
+               pmu->mem_events = perf_mem_events;
 }
 
 struct perf_pmu *perf_pmu__lookup(struct list_head *pmus, int dirfd, const char *name)
@@ -1019,10 +1022,9 @@ struct perf_pmu *perf_pmu__lookup(struct list_head *pmus, int dirfd, const char
         * type value and format definitions. Load both right
         * now.
         */
-       if (pmu_format(pmu, dirfd, name)) {
-               free(pmu);
-               return NULL;
-       }
+       if (pmu_format(pmu, dirfd, name))
+               goto err;
+
        pmu->is_core = is_pmu_core(name);
        pmu->cpus = pmu_cpumask(dirfd, name, pmu->is_core);
 
@@ -1361,8 +1363,8 @@ static int pmu_config_term(const struct perf_pmu *pmu,
 
                        parse_events_error__handle(err, term->err_val,
                                asprintf(&err_str,
-                                   "value too big for format, maximum is %llu",
-                                   (unsigned long long)max_val) < 0
+                                   "value too big for format (%s), maximum is %llu",
+                                   format->name, (unsigned long long)max_val) < 0
                                    ? strdup("value too big for format")
                                    : err_str,
                                    NULL);
@@ -1516,7 +1518,7 @@ int perf_pmu__check_alias(struct perf_pmu *pmu, struct parse_events_terms *head_
                alias = pmu_find_alias(pmu, term);
                if (!alias)
                        continue;
-               ret = pmu_alias_terms(alias, &term->list);
+               ret = pmu_alias_terms(alias, term->err_term, &term->list);
                if (ret) {
                        parse_events_error__handle(err, term->err_term,
                                                strdup("Failure to duplicate terms"),
@@ -1760,6 +1762,12 @@ bool pmu__name_match(const struct perf_pmu *pmu, const char *pmu_name)
 
 bool perf_pmu__is_software(const struct perf_pmu *pmu)
 {
+       const char *known_sw_pmus[] = {
+               "kprobe",
+               "msr",
+               "uprobe",
+       };
+
        if (pmu->is_core || pmu->is_uncore || pmu->auxtrace)
                return false;
        switch (pmu->type) {
@@ -1771,7 +1779,11 @@ bool perf_pmu__is_software(const struct perf_pmu *pmu)
        case PERF_TYPE_BREAKPOINT:      return true;
        default: break;
        }
-       return !strcmp(pmu->name, "kprobe") || !strcmp(pmu->name, "uprobe");
+       for (size_t i = 0; i < ARRAY_SIZE(known_sw_pmus); i++) {
+               if (!strcmp(pmu->name, known_sw_pmus[i]))
+                       return true;
+       }
+       return false;
 }
 
 FILE *perf_pmu__open_file(const struct perf_pmu *pmu, const char *name)
index 424c3fee09496248d6168ba5361d4fa9f66e28a2..e35d985206db517dc32a4d71a49b67e6b10ce182 100644 (file)
@@ -10,6 +10,8 @@
 #include <stdio.h>
 #include "parse-events.h"
 #include "pmu-events/pmu-events.h"
+#include "map_symbol.h"
+#include "mem-events.h"
 
 struct evsel_config_term;
 struct perf_cpu_map;
@@ -162,6 +164,11 @@ struct perf_pmu {
                 */
                bool exclude_guest;
        } missing_features;
+
+       /**
+        * @mem_events: List of the supported mem events
+        */
+       struct perf_mem_event *mem_events;
 };
 
 /** @perf_pmu__fake: A special global PMU used for testing. */
index ce4931461741d491de27757091ee096b5f8eb6f3..16505071d362f43e004867e1548fc73de2e84316 100644 (file)
@@ -345,12 +345,6 @@ const struct perf_pmu *perf_pmus__pmu_for_pmu_filter(const char *str)
        return NULL;
 }
 
-int __weak perf_pmus__num_mem_pmus(void)
-{
-       /* All core PMUs are for mem events. */
-       return perf_pmus__num_core_pmus();
-}
-
 /** Struct for ordering events as output in perf list. */
 struct sevent {
        /** PMU for event. */
index 4c67153ac257617290bde2d1c2c0412925f70e29..94d2a08d894b7d21411f04a662bd52f93ea65a3d 100644 (file)
@@ -17,7 +17,6 @@ struct perf_pmu *perf_pmus__scan_core(struct perf_pmu *pmu);
 
 const struct perf_pmu *perf_pmus__pmu_for_pmu_filter(const char *str);
 
-int perf_pmus__num_mem_pmus(void);
 void perf_pmus__print_pmu_events(const struct print_callbacks *print_cb, void *print_state);
 bool perf_pmus__have_event(const char *pname, const char *name);
 int perf_pmus__num_core_pmus(void);
index 9e47712507cc265d46c7cf5d66033185006ba2f3..7b54e93854428a759edd6442eec1c9e2d7dd0aea 100644 (file)
@@ -28,6 +28,7 @@
 #include "tracepoint.h"
 #include "pfm.h"
 #include "thread_map.h"
+#include "util.h"
 
 #define MAX_NAME_LEN 100
 
@@ -63,6 +64,8 @@ void print_tracepoint_events(const struct print_callbacks *print_cb __maybe_unus
 {
        char *events_path = get_tracing_file("events");
        int events_fd = open(events_path, O_PATH);
+       struct dirent **sys_namelist = NULL;
+       int sys_items;
 
        put_tracing_file(events_path);
        if (events_fd < 0) {
@@ -70,10 +73,7 @@ void print_tracepoint_events(const struct print_callbacks *print_cb __maybe_unus
                return;
        }
 
-#ifdef HAVE_SCANDIRAT_SUPPORT
-{
-       struct dirent **sys_namelist = NULL;
-       int sys_items = tracing_events__scandir_alphasort(&sys_namelist);
+       sys_items = tracing_events__scandir_alphasort(&sys_namelist);
 
        for (int i = 0; i < sys_items; i++) {
                struct dirent *sys_dirent = sys_namelist[i];
@@ -130,11 +130,6 @@ next_sys:
        }
 
        free(sys_namelist);
-}
-#else
-       printf("\nWARNING: Your libc doesn't have the scandirat function, please ask its maintainers to implement it.\n"
-              "         As a rough fallback, please do 'ls %s' to see the available tracepoint events.\n", events_path);
-#endif
        close(events_fd);
 }
 
@@ -232,7 +227,6 @@ void print_sdt_events(const struct print_callbacks *print_cb, void *print_state)
 bool is_event_supported(u8 type, u64 config)
 {
        bool ret = true;
-       int open_return;
        struct evsel *evsel;
        struct perf_event_attr attr = {
                .type = type,
@@ -246,20 +240,32 @@ bool is_event_supported(u8 type, u64 config)
 
        evsel = evsel__new(&attr);
        if (evsel) {
-               open_return = evsel__open(evsel, NULL, tmap);
-               ret = open_return >= 0;
+               ret = evsel__open(evsel, NULL, tmap) >= 0;
 
-               if (open_return == -EACCES) {
+               if (!ret) {
                        /*
-                        * This happens if the paranoid value
+                        * The event may fail to open if the paranoid value
                         * /proc/sys/kernel/perf_event_paranoid is set to 2
-                        * Re-run with exclude_kernel set; we don't do that
-                        * by default as some ARM machines do not support it.
-                        *
+                        * Re-run with exclude_kernel set; we don't do that by
+                        * default as some ARM machines do not support it.
                         */
                        evsel->core.attr.exclude_kernel = 1;
                        ret = evsel__open(evsel, NULL, tmap) >= 0;
                }
+
+               if (!ret) {
+                       /*
+                        * The event may fail to open if the PMU requires
+                        * exclude_guest to be set (e.g. as the Apple M1 PMU
+                        * requires).
+                        * Re-run with exclude_guest set; we don't do that by
+                        * default as it's equally legitimate for another PMU
+                        * driver to require that exclude_guest is clear.
+                        */
+                       evsel->core.attr.exclude_guest = 1;
+                       ret = evsel__open(evsel, NULL, tmap) >= 0;
+               }
+
                evsel__delete(evsel);
        }
 
diff --git a/tools/perf/util/print_insn.c b/tools/perf/util/print_insn.c
new file mode 100644 (file)
index 0000000..459e0e9
--- /dev/null
@@ -0,0 +1,135 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Instruction binary disassembler based on capstone.
+ *
+ * Author(s): Changbin Du <changbin.du@huawei.com>
+ */
+#include <string.h>
+#include <stdbool.h>
+#include "debug.h"
+#include "sample.h"
+#include "symbol.h"
+#include "machine.h"
+#include "thread.h"
+#include "print_insn.h"
+
+size_t sample__fprintf_insn_raw(struct perf_sample *sample, FILE *fp)
+{
+       int printed = 0;
+
+       for (int i = 0; i < sample->insn_len; i++) {
+               printed += fprintf(fp, "%02x", (unsigned char)sample->insn[i]);
+               if (sample->insn_len - i > 1)
+                       printed += fprintf(fp, " ");
+       }
+       return printed;
+}
+
+#ifdef HAVE_LIBCAPSTONE_SUPPORT
+#include <capstone/capstone.h>
+
+static int capstone_init(struct machine *machine, csh *cs_handle)
+{
+       cs_arch arch;
+       cs_mode mode;
+
+       if (machine__is(machine, "x86_64")) {
+               arch = CS_ARCH_X86;
+               mode = CS_MODE_64;
+       } else if (machine__normalized_is(machine, "x86")) {
+               arch = CS_ARCH_X86;
+               mode = CS_MODE_32;
+       } else if (machine__normalized_is(machine, "arm64")) {
+               arch = CS_ARCH_ARM64;
+               mode = CS_MODE_ARM;
+       } else if (machine__normalized_is(machine, "arm")) {
+               arch = CS_ARCH_ARM;
+               mode = CS_MODE_ARM + CS_MODE_V8;
+       } else if (machine__normalized_is(machine, "s390")) {
+               arch = CS_ARCH_SYSZ;
+               mode = CS_MODE_BIG_ENDIAN;
+       } else {
+               return -1;
+       }
+
+       if (cs_open(arch, mode, cs_handle) != CS_ERR_OK) {
+               pr_warning_once("cs_open failed\n");
+               return -1;
+       }
+
+       if (machine__normalized_is(machine, "x86")) {
+               cs_option(*cs_handle, CS_OPT_SYNTAX, CS_OPT_SYNTAX_ATT);
+               /*
+                * Resolving address operands to symbols is implemented
+                * on x86 by investigating instruction details.
+                */
+               cs_option(*cs_handle, CS_OPT_DETAIL, CS_OPT_ON);
+       }
+
+       return 0;
+}
+
+static size_t print_insn_x86(struct perf_sample *sample, struct thread *thread,
+                            cs_insn *insn, FILE *fp)
+{
+       struct addr_location al;
+       size_t printed = 0;
+
+       if (insn->detail && insn->detail->x86.op_count == 1) {
+               cs_x86_op *op = &insn->detail->x86.operands[0];
+
+               addr_location__init(&al);
+               if (op->type == X86_OP_IMM &&
+                   thread__find_symbol(thread, sample->cpumode, op->imm, &al)) {
+                       printed += fprintf(fp, "%s ", insn[0].mnemonic);
+                       printed += symbol__fprintf_symname_offs(al.sym, &al, fp);
+                       addr_location__exit(&al);
+                       return printed;
+               }
+               addr_location__exit(&al);
+       }
+
+       printed += fprintf(fp, "%s %s", insn[0].mnemonic, insn[0].op_str);
+       return printed;
+}
+
+size_t sample__fprintf_insn_asm(struct perf_sample *sample, struct thread *thread,
+                               struct machine *machine, FILE *fp)
+{
+       csh cs_handle;
+       cs_insn *insn;
+       size_t count;
+       size_t printed = 0;
+       int ret;
+
+       /* TODO: Try to initiate capstone only once but need a proper place. */
+       ret = capstone_init(machine, &cs_handle);
+       if (ret < 0) {
+               /* fallback */
+               return sample__fprintf_insn_raw(sample, fp);
+       }
+
+       count = cs_disasm(cs_handle, (uint8_t *)sample->insn, sample->insn_len,
+                         sample->ip, 1, &insn);
+       if (count > 0) {
+               if (machine__normalized_is(machine, "x86"))
+                       printed += print_insn_x86(sample, thread, &insn[0], fp);
+               else
+                       printed += fprintf(fp, "%s %s", insn[0].mnemonic, insn[0].op_str);
+               cs_free(insn, count);
+       } else {
+               printed += fprintf(fp, "illegal instruction");
+       }
+
+       cs_close(&cs_handle);
+       return printed;
+}
+#else
+size_t sample__fprintf_insn_asm(struct perf_sample *sample __maybe_unused,
+                               struct thread *thread __maybe_unused,
+                               struct machine *machine __maybe_unused,
+                               FILE *fp __maybe_unused)
+{
+       return 0;
+}
+#endif
diff --git a/tools/perf/util/print_insn.h b/tools/perf/util/print_insn.h
new file mode 100644 (file)
index 0000000..465bdcf
--- /dev/null
@@ -0,0 +1,16 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef PERF_PRINT_INSN_H
+#define PERF_PRINT_INSN_H
+
+#include <stddef.h>
+#include <stdio.h>
+
+struct perf_sample;
+struct thread;
+struct machine;
+
+size_t sample__fprintf_insn_asm(struct perf_sample *sample, struct thread *thread,
+                               struct machine *machine, FILE *fp);
+size_t sample__fprintf_insn_raw(struct perf_sample *sample, FILE *fp);
+
+#endif /* PERF_PRINT_INSN_H */
index a1a796043691f487fe901e9fafef5888913f4ec7..2a0ad9ecf0a20efc70ddc4eb1d43e6419a4bde80 100644 (file)
@@ -358,6 +358,7 @@ static int kernel_get_module_dso(const char *module, struct dso **pdso)
                map = maps__find_by_name(machine__kernel_maps(host_machine), module_name);
                if (map) {
                        dso = map__dso(map);
+                       map__put(map);
                        goto found;
                }
                pr_debug("Failed to find module %s.\n", module);
@@ -2273,9 +2274,7 @@ static int find_perf_probe_point_from_map(struct probe_trace_point *tp,
        ret = pp->function ? 0 : -ENOMEM;
 
 out:
-       if (map && !is_kprobe) {
-               map__put(map);
-       }
+       map__put(map);
 
        return ret;
 }
index 593b660ec75e24e1d2ba28cfe86df446bd508179..1bec945f48383945728d6a56143ddf91c3626725 100644 (file)
@@ -31,6 +31,7 @@ util/counts.c
 util/print_binary.c
 util/strlist.c
 util/trace-event.c
+util/trace-event-parse.c
 ../lib/rbtree.c
 util/string.c
 util/symbol_fprintf.c
index 8761f51b5c7c88b938bd709ae9821e39045265b5..075c0f79b1b92d4c41f218adccd0c5c602556028 100644 (file)
@@ -181,6 +181,7 @@ int perf_bpf_filter__destroy(struct evsel *evsel __maybe_unused)
  * implementing 'verbose' and 'eprintf'.
  */
 int verbose;
+int debug_kmaps;
 int debug_peo_args;
 
 int eprintf(int level, int var, const char *fmt, ...);
index 376e86cb4c3c5facc467a01622d4287b5be9a7ad..d927a0d2505285059eb1e4ea1678153671c022b2 100644 (file)
@@ -143,9 +143,4 @@ struct __name##_sorted *__name = __name##_sorted__new
        DECLARE_RESORT_RB(__name)(&__ilist->rblist.entries.rb_root,             \
                                  __ilist->rblist.nr_entries)
 
-/* For 'struct machine->threads' */
-#define DECLARE_RESORT_RB_MACHINE_THREADS(__name, __machine, hash_bucket)    \
- DECLARE_RESORT_RB(__name)(&__machine->threads[hash_bucket].entries.rb_root, \
-                          __machine->threads[hash_bucket].nr)
-
 #endif /* _PERF_RESORT_RB_H_ */
index 860e1837ba9693eb437a9ae68d762f5a89cf1d2d..b4f0f60e60a63f34b04464543e36cab71eac3ec5 100644 (file)
@@ -858,6 +858,10 @@ static PyObject *get_perf_sample_dict(struct perf_sample *sample,
        pydict_set_item_string_decref(dict, "ev_name", _PyUnicode_FromString(evsel__name(evsel)));
        pydict_set_item_string_decref(dict, "attr", _PyBytes_FromStringAndSize((const char *)&evsel->core.attr, sizeof(evsel->core.attr)));
 
+       pydict_set_item_string_decref(dict_sample, "id",
+                       PyLong_FromUnsignedLongLong(sample->id));
+       pydict_set_item_string_decref(dict_sample, "stream_id",
+                       PyLong_FromUnsignedLongLong(sample->stream_id));
        pydict_set_item_string_decref(dict_sample, "pid",
                        _PyLong_FromLong(sample->pid));
        pydict_set_item_string_decref(dict_sample, "tid",
@@ -1306,7 +1310,7 @@ static void python_export_sample_table(struct db_export *dbe,
        struct tables *tables = container_of(dbe, struct tables, dbe);
        PyObject *t;
 
-       t = tuple_new(25);
+       t = tuple_new(27);
 
        tuple_set_d64(t, 0, es->db_id);
        tuple_set_d64(t, 1, es->evsel->db_id);
@@ -1333,6 +1337,8 @@ static void python_export_sample_table(struct db_export *dbe,
        tuple_set_d64(t, 22, es->sample->insn_cnt);
        tuple_set_d64(t, 23, es->sample->cyc_cnt);
        tuple_set_s32(t, 24, es->sample->flags);
+       tuple_set_d64(t, 25, es->sample->id);
+       tuple_set_d64(t, 26, es->sample->stream_id);
 
        call_object(tables->sample_handler, t, "sample_table");
 
index 199d3e8df31581c02245967d795847783556ee4c..06d0bd7fb45999740bd5c3462b17db2a555916c8 100644 (file)
@@ -2720,6 +2720,17 @@ size_t perf_session__fprintf(struct perf_session *session, FILE *fp)
        return machine__fprintf(&session->machines.host, fp);
 }
 
+void perf_session__dump_kmaps(struct perf_session *session)
+{
+       int save_verbose = verbose;
+
+       fflush(stdout);
+       fprintf(stderr, "Kernel and module maps:\n");
+       verbose = 0; /* Suppress verbose to print a summary only */
+       maps__fprintf(machine__kernel_maps(&session->machines.host), stderr);
+       verbose = save_verbose;
+}
+
 struct evsel *perf_session__find_first_evtype(struct perf_session *session,
                                              unsigned int type)
 {
index ee3715e8563bfc6f86b814778214be052e0a703e..5064c6ec11e7317515fc9ae812ac37fc4e6567b9 100644 (file)
@@ -133,6 +133,8 @@ size_t perf_session__fprintf_dsos_buildid(struct perf_session *session, FILE *fp
 size_t perf_session__fprintf_nr_events(struct perf_session *session, FILE *fp,
                                       bool skip_empty);
 
+void perf_session__dump_kmaps(struct perf_session *session);
+
 struct evsel *perf_session__find_first_evtype(struct perf_session *session,
                                            unsigned int type);
 
index 79d5e2955f85d8f6af24b598991f35c67bf0544c..3107f5aa8c9a0cb023f272a7663cf13bb107d51f 100644 (file)
@@ -85,6 +85,7 @@ if '-DHAVE_LIBTRACEEVENT' in cflags:
     extra_libraries += [ 'traceevent' ]
 else:
     ext_sources.remove('util/trace-event.c')
+    ext_sources.remove('util/trace-event-parse.c')
 
 # use full paths with source files
 ext_sources = list(map(lambda x: '%s/%s' % (src_perf, x) , ext_sources))
index 30254eb637099b07427d73944a6bb4e7baffc724..92a1bd695e8aa3f8bab45818942f098caea44756 100644 (file)
@@ -3372,7 +3372,7 @@ int sort_dimension__add(struct perf_hpp_list *list, const char *tok,
                                sort_dimension_add_dynamic_header(sd);
                }
 
-               if (sd->entry == &sort_parent) {
+               if (sd->entry == &sort_parent && parent_pattern) {
                        int ret = regcomp(&parent_regex, parent_pattern, REG_EXTENDED);
                        if (ret) {
                                char err[BUFSIZ];
index 034b496df29780b23d2efc40c2459376d8a8bfba..7addc34afcf5d694699c084454bfe1e9dec9bfc2 100644 (file)
@@ -399,6 +399,8 @@ static void addr2line_subprocess_cleanup(struct child_process *a2l)
                kill(a2l->pid, SIGKILL);
                finish_command(a2l); /* ignore result, we don't care */
                a2l->pid = -1;
+               close(a2l->in);
+               close(a2l->out);
        }
 
        free(a2l);
index 8c61f8627ebc9fb37cd645ea87a1d39378009db7..bfc1d705f4371b9d0865f32d6d9aab3ea0d8ac4a 100644 (file)
@@ -201,6 +201,9 @@ static void print_aggr_id_std(struct perf_stat_config *config,
                snprintf(buf, sizeof(buf), "S%d-D%d-L%d-ID%d",
                         id.socket, id.die, id.cache_lvl, id.cache);
                break;
+       case AGGR_CLUSTER:
+               snprintf(buf, sizeof(buf), "S%d-D%d-CLS%d", id.socket, id.die, id.cluster);
+               break;
        case AGGR_DIE:
                snprintf(buf, sizeof(buf), "S%d-D%d", id.socket, id.die);
                break;
@@ -251,6 +254,10 @@ static void print_aggr_id_csv(struct perf_stat_config *config,
                fprintf(config->output, "S%d-D%d-L%d-ID%d%s%d%s",
                        id.socket, id.die, id.cache_lvl, id.cache, sep, aggr_nr, sep);
                break;
+       case AGGR_CLUSTER:
+               fprintf(config->output, "S%d-D%d-CLS%d%s%d%s",
+                       id.socket, id.die, id.cluster, sep, aggr_nr, sep);
+               break;
        case AGGR_DIE:
                fprintf(output, "S%d-D%d%s%d%s",
                        id.socket, id.die, sep, aggr_nr, sep);
@@ -300,6 +307,10 @@ static void print_aggr_id_json(struct perf_stat_config *config,
                fprintf(output, "\"cache\" : \"S%d-D%d-L%d-ID%d\", \"aggregate-number\" : %d, ",
                        id.socket, id.die, id.cache_lvl, id.cache, aggr_nr);
                break;
+       case AGGR_CLUSTER:
+               fprintf(output, "\"cluster\" : \"S%d-D%d-CLS%d\", \"aggregate-number\" : %d, ",
+                       id.socket, id.die, id.cluster, aggr_nr);
+               break;
        case AGGR_DIE:
                fprintf(output, "\"die\" : \"S%d-D%d\", \"aggregate-number\" : %d, ",
                        id.socket, id.die, aggr_nr);
@@ -560,7 +571,7 @@ static void print_metric_only(struct perf_stat_config *config,
        if (color)
                mlen += strlen(color) + sizeof(PERF_COLOR_RESET) - 1;
 
-       color_snprintf(str, sizeof(str), color ?: "", fmt, val);
+       color_snprintf(str, sizeof(str), color ?: "", fmt ?: "", val);
        fprintf(out, "%*s ", mlen, str);
        os->first = false;
 }
@@ -1126,11 +1137,16 @@ static void print_no_aggr_metric(struct perf_stat_config *config,
                        u64 ena, run, val;
                        double uval;
                        struct perf_stat_evsel *ps = counter->stats;
-                       int aggr_idx = perf_cpu_map__idx(evsel__cpus(counter), cpu);
+                       int aggr_idx = 0;
 
-                       if (aggr_idx < 0)
+                       if (!perf_cpu_map__has(evsel__cpus(counter), cpu))
                                continue;
 
+                       cpu_aggr_map__for_each_idx(aggr_idx, config->aggr_map) {
+                               if (config->aggr_map->map[aggr_idx].cpu.cpu == cpu.cpu)
+                                       break;
+                       }
+
                        os->evsel = counter;
                        os->id = aggr_cpu_id__cpu(cpu, /*data=*/NULL);
                        if (first) {
@@ -1248,6 +1264,7 @@ static void print_header_interval_std(struct perf_stat_config *config,
        case AGGR_NODE:
        case AGGR_SOCKET:
        case AGGR_DIE:
+       case AGGR_CLUSTER:
        case AGGR_CACHE:
        case AGGR_CORE:
                fprintf(output, "#%*s %-*s cpus",
@@ -1550,6 +1567,7 @@ void evlist__print_counters(struct evlist *evlist, struct perf_stat_config *conf
        switch (config->aggr_mode) {
        case AGGR_CORE:
        case AGGR_CACHE:
+       case AGGR_CLUSTER:
        case AGGR_DIE:
        case AGGR_SOCKET:
        case AGGR_NODE:
index e31426167852ad0d6fe2d94b5f8b84e2a7e7da8a..3466aa9524421d734f5e4ef4e82979bb1954308d 100644 (file)
@@ -355,11 +355,13 @@ static void print_nsecs(struct perf_stat_config *config,
                print_metric(config, ctxp, NULL, NULL, "CPUs utilized", 0);
 }
 
-static int prepare_metric(struct evsel **metric_events,
-                         struct metric_ref *metric_refs,
+static int prepare_metric(const struct metric_expr *mexp,
+                         const struct evsel *evsel,
                          struct expr_parse_ctx *pctx,
                          int aggr_idx)
 {
+       struct evsel * const *metric_events = mexp->metric_events;
+       struct metric_ref *metric_refs = mexp->metric_refs;
        int i;
 
        for (i = 0; metric_events[i]; i++) {
@@ -398,12 +400,33 @@ static int prepare_metric(struct evsel **metric_events,
                        source_count = 1;
                } else {
                        struct perf_stat_evsel *ps = metric_events[i]->stats;
-                       struct perf_stat_aggr *aggr = &ps->aggr[aggr_idx];
+                       struct perf_stat_aggr *aggr;
 
+                       /*
+                        * If there are multiple uncore PMUs and we're not
+                        * reading the leader's stats, determine the stats for
+                        * the appropriate uncore PMU.
+                        */
+                       if (evsel && evsel->metric_leader &&
+                           evsel->pmu != evsel->metric_leader->pmu &&
+                           mexp->metric_events[i]->pmu == evsel->metric_leader->pmu) {
+                               struct evsel *pos;
+
+                               evlist__for_each_entry(evsel->evlist, pos) {
+                                       if (pos->pmu != evsel->pmu)
+                                               continue;
+                                       if (pos->metric_leader != mexp->metric_events[i])
+                                               continue;
+                                       ps = pos->stats;
+                                       source_count = 1;
+                                       break;
+                               }
+                       }
+                       aggr = &ps->aggr[aggr_idx];
                        if (!aggr)
                                break;
 
-                        if (!metric_events[i]->supported) {
+                       if (!metric_events[i]->supported) {
                                /*
                                 * Not supported events will have a count of 0,
                                 * which can be confusing in a
@@ -414,13 +437,9 @@ static int prepare_metric(struct evsel **metric_events,
                                val = NAN;
                                source_count = 0;
                        } else {
-                               /*
-                                * If an event was scaled during stat gathering,
-                                * reverse the scale before computing the
-                                * metric.
-                                */
-                               val = aggr->counts.val * (1.0 / metric_events[i]->scale);
-                               source_count = evsel__source_count(metric_events[i]);
+                               val = aggr->counts.val;
+                               if (!source_count)
+                                       source_count = evsel__source_count(metric_events[i]);
                        }
                }
                n = strdup(evsel__metric_id(metric_events[i]));
@@ -441,18 +460,18 @@ static int prepare_metric(struct evsel **metric_events,
 }
 
 static void generic_metric(struct perf_stat_config *config,
-                          const char *metric_expr,
-                          const char *metric_threshold,
-                          struct evsel **metric_events,
-                          struct metric_ref *metric_refs,
-                          char *name,
-                          const char *metric_name,
-                          const char *metric_unit,
-                          int runtime,
+                          struct metric_expr *mexp,
+                          struct evsel *evsel,
                           int aggr_idx,
                           struct perf_stat_output_ctx *out)
 {
        print_metric_t print_metric = out->print_metric;
+       const char *metric_name = mexp->metric_name;
+       const char *metric_expr = mexp->metric_expr;
+       const char *metric_threshold = mexp->metric_threshold;
+       const char *metric_unit = mexp->metric_unit;
+       struct evsel * const *metric_events = mexp->metric_events;
+       int runtime = mexp->runtime;
        struct expr_parse_ctx *pctx;
        double ratio, scale, threshold;
        int i;
@@ -467,7 +486,7 @@ static void generic_metric(struct perf_stat_config *config,
                pctx->sctx.user_requested_cpu_list = strdup(config->user_requested_cpu_list);
        pctx->sctx.runtime = runtime;
        pctx->sctx.system_wide = config->system_wide;
-       i = prepare_metric(metric_events, metric_refs, pctx, aggr_idx);
+       i = prepare_metric(mexp, evsel, pctx, aggr_idx);
        if (i < 0) {
                expr__ctx_free(pctx);
                return;
@@ -502,18 +521,18 @@ static void generic_metric(struct perf_stat_config *config,
                                print_metric(config, ctxp, color, "%8.2f",
                                        metric_name ?
                                        metric_name :
-                                       out->force_header ?  name : "",
+                                       out->force_header ?  evsel->name : "",
                                        ratio);
                        }
                } else {
                        print_metric(config, ctxp, color, /*unit=*/NULL,
                                     out->force_header ?
-                                    (metric_name ? metric_name : name) : "", 0);
+                                    (metric_name ?: evsel->name) : "", 0);
                }
        } else {
                print_metric(config, ctxp, color, /*unit=*/NULL,
                             out->force_header ?
-                            (metric_name ? metric_name : name) : "", 0);
+                            (metric_name ?: evsel->name) : "", 0);
        }
 
        expr__ctx_free(pctx);
@@ -528,7 +547,7 @@ double test_generic_metric(struct metric_expr *mexp, int aggr_idx)
        if (!pctx)
                return NAN;
 
-       if (prepare_metric(mexp->metric_events, mexp->metric_refs, pctx, aggr_idx) < 0)
+       if (prepare_metric(mexp, /*evsel=*/NULL, pctx, aggr_idx) < 0)
                goto out;
 
        if (expr__parse(&ratio, pctx, mexp->metric_expr))
@@ -630,10 +649,7 @@ void *perf_stat__print_shadow_stats_metricgroup(struct perf_stat_config *config,
 
                if ((*num)++ > 0)
                        out->new_line(config, ctxp);
-               generic_metric(config, mexp->metric_expr, mexp->metric_threshold,
-                              mexp->metric_events, mexp->metric_refs, evsel->name,
-                              mexp->metric_name, mexp->metric_unit, mexp->runtime,
-                              aggr_idx, out);
+               generic_metric(config, mexp, evsel, aggr_idx, out);
        }
 
        return NULL;
index 4357ba1148221bf27364ee14abe1184669635d1a..d6e5c8787ba23a49ac35389794d7d2fa20450f73 100644 (file)
@@ -48,6 +48,7 @@ enum aggr_mode {
        AGGR_GLOBAL,
        AGGR_SOCKET,
        AGGR_DIE,
+       AGGR_CLUSTER,
        AGGR_CACHE,
        AGGR_CORE,
        AGGR_THREAD,
index 4b934ed3bfd13ba2bf1c613e13e743c664b260aa..0b91f813c4facdd6ec92fbf350b80a948fa5702f 100644 (file)
@@ -23,6 +23,7 @@
 #include <linux/ctype.h>
 #include <linux/kernel.h>
 #include <linux/zalloc.h>
+#include <linux/string.h>
 #include <symbol/kallsyms.h>
 #include <internal/lib.h>
 
@@ -1329,6 +1330,58 @@ out_close:
        return -1;
 }
 
+static bool is_exe_text(int flags)
+{
+       return (flags & (SHF_ALLOC | SHF_EXECINSTR)) == (SHF_ALLOC | SHF_EXECINSTR);
+}
+
+/*
+ * Some executable module sections like .noinstr.text might be laid out with
+ * .text so they can use the same mapping (memory address to file offset).
+ * Check if that is the case. Refer to kernel layout_sections(). Return the
+ * maximum offset.
+ */
+static u64 max_text_section(Elf *elf, GElf_Ehdr *ehdr)
+{
+       Elf_Scn *sec = NULL;
+       GElf_Shdr shdr;
+       u64 offs = 0;
+
+       /* Doesn't work for some arch */
+       if (ehdr->e_machine == EM_PARISC ||
+           ehdr->e_machine == EM_ALPHA)
+               return 0;
+
+       /* ELF is corrupted/truncated, avoid calling elf_strptr. */
+       if (!elf_rawdata(elf_getscn(elf, ehdr->e_shstrndx), NULL))
+               return 0;
+
+       while ((sec = elf_nextscn(elf, sec)) != NULL) {
+               char *sec_name;
+
+               if (!gelf_getshdr(sec, &shdr))
+                       break;
+
+               if (!is_exe_text(shdr.sh_flags))
+                       continue;
+
+               /* .init and .exit sections are not placed with .text */
+               sec_name = elf_strptr(elf, ehdr->e_shstrndx, shdr.sh_name);
+               if (!sec_name ||
+                   strstarts(sec_name, ".init") ||
+                   strstarts(sec_name, ".exit"))
+                       break;
+
+               /* Must be next to previous, assumes .text is first */
+               if (offs && PERF_ALIGN(offs, shdr.sh_addralign ?: 1) != shdr.sh_offset)
+                       break;
+
+               offs = shdr.sh_offset + shdr.sh_size;
+       }
+
+       return offs;
+}
+
 /**
  * ref_reloc_sym_not_found - has kernel relocation symbol been found.
  * @kmap: kernel maps and relocation reference symbol
@@ -1368,7 +1421,8 @@ static int dso__process_kernel_symbol(struct dso *dso, struct map *map,
                                      struct maps *kmaps, struct kmap *kmap,
                                      struct dso **curr_dsop, struct map **curr_mapp,
                                      const char *section_name,
-                                     bool adjust_kernel_syms, bool kmodule, bool *remap_kernel)
+                                     bool adjust_kernel_syms, bool kmodule, bool *remap_kernel,
+                                     u64 max_text_sh_offset)
 {
        struct dso *curr_dso = *curr_dsop;
        struct map *curr_map;
@@ -1424,6 +1478,17 @@ static int dso__process_kernel_symbol(struct dso *dso, struct map *map,
        if (!kmap)
                return 0;
 
+       /*
+        * perf does not record module section addresses except for .text, but
+        * some sections can use the same mapping as .text.
+        */
+       if (kmodule && adjust_kernel_syms && is_exe_text(shdr->sh_flags) &&
+           shdr->sh_offset <= max_text_sh_offset) {
+               *curr_mapp = map;
+               *curr_dsop = dso;
+               return 0;
+       }
+
        snprintf(dso_name, sizeof(dso_name), "%s%s", dso->short_name, section_name);
 
        curr_map = maps__find_by_name(kmaps, dso_name);
@@ -1470,8 +1535,10 @@ static int dso__process_kernel_symbol(struct dso *dso, struct map *map,
                dso__set_loaded(curr_dso);
                *curr_mapp = curr_map;
                *curr_dsop = curr_dso;
-       } else
+       } else {
                *curr_dsop = map__dso(curr_map);
+               map__put(curr_map);
+       }
 
        return 0;
 }
@@ -1497,6 +1564,7 @@ dso__load_sym_internal(struct dso *dso, struct map *map, struct symsrc *syms_ss,
        Elf *elf;
        int nr = 0;
        bool remap_kernel = false, adjust_kernel_syms = false;
+       u64 max_text_sh_offset = 0;
 
        if (kmap && !kmaps)
                return -1;
@@ -1584,6 +1652,10 @@ dso__load_sym_internal(struct dso *dso, struct map *map, struct symsrc *syms_ss,
                remap_kernel = true;
                adjust_kernel_syms = dso->adjust_symbols;
        }
+
+       if (kmodule && adjust_kernel_syms)
+               max_text_sh_offset = max_text_section(runtime_ss->elf, &runtime_ss->ehdr);
+
        elf_symtab__for_each_symbol(syms, nr_syms, idx, sym) {
                struct symbol *f;
                const char *elf_name = elf_sym__name(&sym, symstrs);
@@ -1673,7 +1745,8 @@ dso__load_sym_internal(struct dso *dso, struct map *map, struct symsrc *syms_ss,
 
                if (dso->kernel) {
                        if (dso__process_kernel_symbol(dso, map, &sym, &shdr, kmaps, kmap, &curr_dso, &curr_map,
-                                                      section_name, adjust_kernel_syms, kmodule, &remap_kernel))
+                                                      section_name, adjust_kernel_syms, kmodule,
+                                                      &remap_kernel, max_text_sh_offset))
                                goto out_elf_end;
                } else if ((used_opd && runtime_ss->adjust_symbols) ||
                           (!used_opd && syms_ss->adjust_symbols)) {
index be212ba157dc321d96534ba6063febbc1ccd5e04..9ebdb8e13c0b84604b64a475c6be680d9ff211c1 100644 (file)
@@ -63,6 +63,16 @@ struct symbol_conf symbol_conf = {
        .res_sample             = 0,
 };
 
+struct map_list_node {
+       struct list_head node;
+       struct map *map;
+};
+
+static struct map_list_node *map_list_node__new(void)
+{
+       return malloc(sizeof(struct map_list_node));
+}
+
 static enum dso_binary_type binary_type_symtab[] = {
        DSO_BINARY_TYPE__KALLSYMS,
        DSO_BINARY_TYPE__GUEST_KALLSYMS,
@@ -238,14 +248,31 @@ void symbols__fixup_end(struct rb_root_cached *symbols, bool is_kallsyms)
                 * segment is very big.  Therefore do not fill this gap and do
                 * not assign it to the kernel dso map (kallsyms).
                 *
+                * Also BPF code can be allocated separately from text segments
+                * and modules.  So the last entry in a module should not fill
+                * the gap too.
+                *
                 * In kallsyms, it determines module symbols using '[' character
                 * like in:
                 *   ffffffffc1937000 T hdmi_driver_init  [snd_hda_codec_hdmi]
                 */
                if (prev->end == prev->start) {
+                       const char *prev_mod;
+                       const char *curr_mod;
+
+                       if (!is_kallsyms) {
+                               prev->end = curr->start;
+                               continue;
+                       }
+
+                       prev_mod = strchr(prev->name, '[');
+                       curr_mod = strchr(curr->name, '[');
+
                        /* Last kernel/module symbol mapped to end of page */
-                       if (is_kallsyms && (!strchr(prev->name, '[') !=
-                                           !strchr(curr->name, '[')))
+                       if (!prev_mod != !curr_mod)
+                               prev->end = roundup(prev->end + 4096, 4096);
+                       /* Last symbol in the previous module */
+                       else if (prev_mod && strcmp(prev_mod, curr_mod))
                                prev->end = roundup(prev->end + 4096, 4096);
                        else
                                prev->end = curr->start;
@@ -757,7 +784,6 @@ static int dso__load_all_kallsyms(struct dso *dso, const char *filename)
 
 static int maps__split_kallsyms_for_kcore(struct maps *kmaps, struct dso *dso)
 {
-       struct map *curr_map;
        struct symbol *pos;
        int count = 0;
        struct rb_root_cached old_root = dso->symbols;
@@ -770,6 +796,7 @@ static int maps__split_kallsyms_for_kcore(struct maps *kmaps, struct dso *dso)
        *root = RB_ROOT_CACHED;
 
        while (next) {
+               struct map *curr_map;
                struct dso *curr_map_dso;
                char *module;
 
@@ -796,6 +823,7 @@ static int maps__split_kallsyms_for_kcore(struct maps *kmaps, struct dso *dso)
                        pos->end -= map__start(curr_map) - map__pgoff(curr_map);
                symbols__insert(&curr_map_dso->symbols, pos);
                ++count;
+               map__put(curr_map);
        }
 
        /* Symbols have been adjusted */
@@ -813,7 +841,7 @@ static int maps__split_kallsyms(struct maps *kmaps, struct dso *dso, u64 delta,
                                struct map *initial_map)
 {
        struct machine *machine;
-       struct map *curr_map = initial_map;
+       struct map *curr_map = map__get(initial_map);
        struct symbol *pos;
        int count = 0, moved = 0;
        struct rb_root_cached *root = &dso->symbols;
@@ -857,13 +885,14 @@ static int maps__split_kallsyms(struct maps *kmaps, struct dso *dso, u64 delta,
                                        dso__set_loaded(curr_map_dso);
                                }
 
+                               map__zput(curr_map);
                                curr_map = maps__find_by_name(kmaps, module);
                                if (curr_map == NULL) {
                                        pr_debug("%s/proc/{kallsyms,modules} "
                                                 "inconsistency while looking "
                                                 "for \"%s\" module!\n",
                                                 machine->root_dir, module);
-                                       curr_map = initial_map;
+                                       curr_map = map__get(initial_map);
                                        goto discard_symbol;
                                }
                                curr_map_dso = map__dso(curr_map);
@@ -887,7 +916,7 @@ static int maps__split_kallsyms(struct maps *kmaps, struct dso *dso, u64 delta,
                         * symbols at this point.
                         */
                        goto discard_symbol;
-               } else if (curr_map != initial_map) {
+               } else if (!RC_CHK_EQUAL(curr_map, initial_map)) {
                        char dso_name[PATH_MAX];
                        struct dso *ndso;
 
@@ -898,7 +927,8 @@ static int maps__split_kallsyms(struct maps *kmaps, struct dso *dso, u64 delta,
                        }
 
                        if (count == 0) {
-                               curr_map = initial_map;
+                               map__zput(curr_map);
+                               curr_map = map__get(initial_map);
                                goto add_symbol;
                        }
 
@@ -912,6 +942,7 @@ static int maps__split_kallsyms(struct maps *kmaps, struct dso *dso, u64 delta,
                                        kernel_range++);
 
                        ndso = dso__new(dso_name);
+                       map__zput(curr_map);
                        if (ndso == NULL)
                                return -1;
 
@@ -925,6 +956,7 @@ static int maps__split_kallsyms(struct maps *kmaps, struct dso *dso, u64 delta,
 
                        map__set_mapping_type(curr_map, MAPPING_TYPE__IDENTITY);
                        if (maps__insert(kmaps, curr_map)) {
+                               map__zput(curr_map);
                                dso__put(ndso);
                                return -1;
                        }
@@ -935,7 +967,7 @@ static int maps__split_kallsyms(struct maps *kmaps, struct dso *dso, u64 delta,
                        pos->end -= delta;
                }
 add_symbol:
-               if (curr_map != initial_map) {
+               if (!RC_CHK_EQUAL(curr_map, initial_map)) {
                        struct dso *curr_map_dso = map__dso(curr_map);
 
                        rb_erase_cached(&pos->rb_node, root);
@@ -950,12 +982,12 @@ discard_symbol:
                symbol__delete(pos);
        }
 
-       if (curr_map != initial_map &&
+       if (!RC_CHK_EQUAL(curr_map, initial_map) &&
            dso->kernel == DSO_SPACE__KERNEL_GUEST &&
            machine__is_default_guest(maps__machine(kmaps))) {
                dso__set_loaded(map__dso(curr_map));
        }
-
+       map__put(curr_map);
        return count + moved;
 }
 
index 89c47a5098e289b14e9807a4ea894e15f9e8ce4a..1aa8962dcf52c9122cb2b36c095ff406a1290944 100644 (file)
@@ -26,7 +26,7 @@ int thread__init_maps(struct thread *thread, struct machine *machine)
        if (pid == thread__tid(thread) || pid == -1) {
                thread__set_maps(thread, maps__new(machine));
        } else {
-               struct thread *leader = __machine__findnew_thread(machine, pid, pid);
+               struct thread *leader = machine__findnew_thread(machine, pid, pid);
 
                if (leader) {
                        thread__set_maps(thread, maps__get(thread__maps(leader)));
@@ -383,7 +383,7 @@ static int thread__clone_maps(struct thread *thread, struct thread *parent, bool
        if (thread__pid(thread) == thread__pid(parent))
                return thread__prepare_access(thread);
 
-       if (RC_CHK_EQUAL(thread__maps(thread), thread__maps(parent))) {
+       if (maps__equal(thread__maps(thread), thread__maps(parent))) {
                pr_debug("broken map groups on thread %d/%d parent %d/%d\n",
                         thread__pid(thread), thread__tid(thread),
                         thread__pid(parent), thread__tid(parent));
index 0df775b5c1105d75d74192d2ae994d99dc5f001b..8b4a3c69bad19c269197d10dc3e9e4991033e478 100644 (file)
@@ -3,7 +3,6 @@
 #define __PERF_THREAD_H
 
 #include <linux/refcount.h>
-#include <linux/rbtree.h>
 #include <linux/list.h>
 #include <stdio.h>
 #include <unistd.h>
@@ -13,7 +12,6 @@
 #include <strlist.h>
 #include <intlist.h>
 #include "rwsem.h"
-#include "event.h"
 #include "callchain.h"
 #include <internal/rc_check.h>
 
@@ -30,11 +28,6 @@ struct lbr_stitch {
        struct callchain_cursor_node    *prev_lbr_cursor;
 };
 
-struct thread_rb_node {
-       struct rb_node rb_node;
-       struct thread *thread;
-};
-
 DECLARE_RC_STRUCT(thread) {
        /** @maps: mmaps associated with this thread. */
        struct maps             *maps;
index e848579e61a86330856a8cb2ae8f7c8564bfd5c5..b5f12390c3558016499b9dded1bf7d409c1738cc 100644 (file)
@@ -109,9 +109,10 @@ static struct perf_thread_map *__thread_map__new_all_cpus(uid_t uid)
 
                snprintf(path, sizeof(path), "/proc/%d/task", pid);
                items = scandir(path, &namelist, filter, NULL);
-               if (items <= 0)
-                       goto out_free_closedir;
-
+               if (items <= 0) {
+                       pr_debug("scandir for %d returned empty, skipping\n", pid);
+                       continue;
+               }
                while (threads->nr + items >= max_threads) {
                        max_threads *= 2;
                        grow = true;
@@ -152,8 +153,6 @@ out_free_namelist:
        for (i = 0; i < items; i++)
                zfree(&namelist[i]);
        free(namelist);
-
-out_free_closedir:
        zfree(&threads);
        goto out_closedir;
 }
@@ -280,13 +279,13 @@ struct perf_thread_map *thread_map__new_by_tid_str(const char *tid_str)
                threads->nr = ntasks;
        }
 out:
+       strlist__delete(slist);
        if (threads)
                refcount_set(&threads->refcnt, 1);
        return threads;
 
 out_free_threads:
        zfree(&threads);
-       strlist__delete(slist);
        goto out;
 }
 
diff --git a/tools/perf/util/threads.c b/tools/perf/util/threads.c
new file mode 100644 (file)
index 0000000..ff2b169
--- /dev/null
@@ -0,0 +1,190 @@
+// SPDX-License-Identifier: GPL-2.0
+#include "threads.h"
+#include "machine.h"
+#include "thread.h"
+
+static struct threads_table_entry *threads__table(struct threads *threads, pid_t tid)
+{
+       /* Cast it to handle tid == -1 */
+       return &threads->table[(unsigned int)tid % THREADS__TABLE_SIZE];
+}
+
+static size_t key_hash(long key, void *ctx __maybe_unused)
+{
+       /* The table lookup removes low bit entropy, but this is just ignored here. */
+       return key;
+}
+
+static bool key_equal(long key1, long key2, void *ctx __maybe_unused)
+{
+       return key1 == key2;
+}
+
+void threads__init(struct threads *threads)
+{
+       for (int i = 0; i < THREADS__TABLE_SIZE; i++) {
+               struct threads_table_entry *table = &threads->table[i];
+
+               hashmap__init(&table->shard, key_hash, key_equal, NULL);
+               init_rwsem(&table->lock);
+               table->last_match = NULL;
+       }
+}
+
+void threads__exit(struct threads *threads)
+{
+       threads__remove_all_threads(threads);
+       for (int i = 0; i < THREADS__TABLE_SIZE; i++) {
+               struct threads_table_entry *table = &threads->table[i];
+
+               hashmap__clear(&table->shard);
+               exit_rwsem(&table->lock);
+       }
+}
+
+size_t threads__nr(struct threads *threads)
+{
+       size_t nr = 0;
+
+       for (int i = 0; i < THREADS__TABLE_SIZE; i++) {
+               struct threads_table_entry *table = &threads->table[i];
+
+               down_read(&table->lock);
+               nr += hashmap__size(&table->shard);
+               up_read(&table->lock);
+       }
+       return nr;
+}
+
+/*
+ * Front-end cache - TID lookups come in blocks,
+ * so most of the time we dont have to look up
+ * the full rbtree:
+ */
+static struct thread *__threads_table_entry__get_last_match(struct threads_table_entry *table,
+                                                           pid_t tid)
+{
+       struct thread *th, *res = NULL;
+
+       th = table->last_match;
+       if (th != NULL) {
+               if (thread__tid(th) == tid)
+                       res = thread__get(th);
+       }
+       return res;
+}
+
+static void __threads_table_entry__set_last_match(struct threads_table_entry *table,
+                                                 struct thread *th)
+{
+       thread__put(table->last_match);
+       table->last_match = thread__get(th);
+}
+
+static void threads_table_entry__set_last_match(struct threads_table_entry *table,
+                                               struct thread *th)
+{
+       down_write(&table->lock);
+       __threads_table_entry__set_last_match(table, th);
+       up_write(&table->lock);
+}
+
+struct thread *threads__find(struct threads *threads, pid_t tid)
+{
+       struct threads_table_entry *table  = threads__table(threads, tid);
+       struct thread *res;
+
+       down_read(&table->lock);
+       res = __threads_table_entry__get_last_match(table, tid);
+       if (!res) {
+               if (hashmap__find(&table->shard, tid, &res))
+                       res = thread__get(res);
+       }
+       up_read(&table->lock);
+       if (res)
+               threads_table_entry__set_last_match(table, res);
+       return res;
+}
+
+struct thread *threads__findnew(struct threads *threads, pid_t pid, pid_t tid, bool *created)
+{
+       struct threads_table_entry *table  = threads__table(threads, tid);
+       struct thread *res = NULL;
+
+       *created = false;
+       down_write(&table->lock);
+       res = thread__new(pid, tid);
+       if (res) {
+               if (hashmap__add(&table->shard, tid, res)) {
+                       /* Add failed. Assume a race so find other entry. */
+                       thread__put(res);
+                       res = NULL;
+                       if (hashmap__find(&table->shard, tid, &res))
+                               res = thread__get(res);
+               } else {
+                       res = thread__get(res);
+                       *created = true;
+               }
+               if (res)
+                       __threads_table_entry__set_last_match(table, res);
+       }
+       up_write(&table->lock);
+       return res;
+}
+
+void threads__remove_all_threads(struct threads *threads)
+{
+       for (int i = 0; i < THREADS__TABLE_SIZE; i++) {
+               struct threads_table_entry *table = &threads->table[i];
+               struct hashmap_entry *cur, *tmp;
+               size_t bkt;
+
+               down_write(&table->lock);
+               __threads_table_entry__set_last_match(table, NULL);
+               hashmap__for_each_entry_safe((&table->shard), cur, tmp, bkt) {
+                       struct thread *old_value;
+
+                       hashmap__delete(&table->shard, cur->key, /*old_key=*/NULL, &old_value);
+                       thread__put(old_value);
+               }
+               up_write(&table->lock);
+       }
+}
+
+void threads__remove(struct threads *threads, struct thread *thread)
+{
+       struct threads_table_entry *table  = threads__table(threads, thread__tid(thread));
+       struct thread *old_value;
+
+       down_write(&table->lock);
+       if (table->last_match && RC_CHK_EQUAL(table->last_match, thread))
+               __threads_table_entry__set_last_match(table, NULL);
+
+       hashmap__delete(&table->shard, thread__tid(thread), /*old_key=*/NULL, &old_value);
+       thread__put(old_value);
+       up_write(&table->lock);
+}
+
+int threads__for_each_thread(struct threads *threads,
+                            int (*fn)(struct thread *thread, void *data),
+                            void *data)
+{
+       for (int i = 0; i < THREADS__TABLE_SIZE; i++) {
+               struct threads_table_entry *table = &threads->table[i];
+               struct hashmap_entry *cur;
+               size_t bkt;
+
+               down_read(&table->lock);
+               hashmap__for_each_entry((&table->shard), cur, bkt) {
+                       int rc = fn((struct thread *)cur->pvalue, data);
+
+                       if (rc != 0) {
+                               up_read(&table->lock);
+                               return rc;
+                       }
+               }
+               up_read(&table->lock);
+       }
+       return 0;
+
+}
diff --git a/tools/perf/util/threads.h b/tools/perf/util/threads.h
new file mode 100644 (file)
index 0000000..da68d22
--- /dev/null
@@ -0,0 +1,35 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef __PERF_THREADS_H
+#define __PERF_THREADS_H
+
+#include "hashmap.h"
+#include "rwsem.h"
+
+struct thread;
+
+#define THREADS__TABLE_BITS    3
+#define THREADS__TABLE_SIZE    (1 << THREADS__TABLE_BITS)
+
+struct threads_table_entry {
+       /* Key is tid, value is struct thread. */
+       struct hashmap         shard;
+       struct rw_semaphore    lock;
+       struct thread          *last_match;
+};
+
+struct threads {
+       struct threads_table_entry table[THREADS__TABLE_SIZE];
+};
+
+void threads__init(struct threads *threads);
+void threads__exit(struct threads *threads);
+size_t threads__nr(struct threads *threads);
+struct thread *threads__find(struct threads *threads, pid_t tid);
+struct thread *threads__findnew(struct threads *threads, pid_t pid, pid_t tid, bool *created);
+void threads__remove_all_threads(struct threads *threads);
+void threads__remove(struct threads *threads, struct thread *thread);
+int threads__for_each_thread(struct threads *threads,
+                            int (*fn)(struct thread *thread, void *data),
+                            void *data);
+
+#endif /* __PERF_THREADS_H */
index 2d3c2576bab7fe9b6f3ae065e9beae89cf86cf1b..f0332bd3a501ea4e3881178d62246a576b6ef875 100644 (file)
@@ -122,6 +122,119 @@ void event_format__print(struct tep_event *event,
        return event_format__fprintf(event, cpu, data, size, stdout);
 }
 
+/*
+ * prev_state is of size long, which is 32 bits on 32 bit architectures.
+ * As it needs to have the same bits for both 32 bit and 64 bit architectures
+ * we can just assume that the flags we care about will all be within
+ * the 32 bits.
+ */
+#define MAX_STATE_BITS 32
+
+static const char *convert_sym(struct tep_print_flag_sym *sym)
+{
+       static char save_states[MAX_STATE_BITS + 1];
+
+       memset(save_states, 0, sizeof(save_states));
+
+       /* This is the flags for the prev_state_field, now make them into a string */
+       for (; sym; sym = sym->next) {
+               long bitmask = strtoul(sym->value, NULL, 0);
+               int i;
+
+               for (i = 0; !(bitmask & 1); i++)
+                       bitmask >>= 1;
+
+               if (i >= MAX_STATE_BITS)
+                       continue;
+
+               save_states[i] = sym->str[0];
+       }
+
+       return save_states;
+}
+
+static struct tep_print_arg_field *
+find_arg_field(struct tep_format_field *prev_state_field, struct tep_print_arg *arg)
+{
+       struct tep_print_arg_field *field;
+
+       if (!arg)
+               return NULL;
+
+       if (arg->type == TEP_PRINT_FIELD)
+               return &arg->field;
+
+       if (arg->type == TEP_PRINT_OP) {
+               field = find_arg_field(prev_state_field, arg->op.left);
+               if (field && field->field == prev_state_field)
+                       return field;
+               field = find_arg_field(prev_state_field, arg->op.right);
+               if (field && field->field == prev_state_field)
+                       return field;
+       }
+       return NULL;
+}
+
+static struct tep_print_flag_sym *
+test_flags(struct tep_format_field *prev_state_field, struct tep_print_arg *arg)
+{
+       struct tep_print_arg_field *field;
+
+       field = find_arg_field(prev_state_field, arg->flags.field);
+       if (!field)
+               return NULL;
+
+       return arg->flags.flags;
+}
+
+static struct tep_print_flag_sym *
+search_op(struct tep_format_field *prev_state_field, struct tep_print_arg *arg)
+{
+       struct tep_print_flag_sym *sym = NULL;
+
+       if (!arg)
+               return NULL;
+
+       if (arg->type == TEP_PRINT_OP) {
+               sym = search_op(prev_state_field, arg->op.left);
+               if (sym)
+                       return sym;
+
+               sym = search_op(prev_state_field, arg->op.right);
+               if (sym)
+                       return sym;
+       } else if (arg->type == TEP_PRINT_FLAGS) {
+               sym = test_flags(prev_state_field, arg);
+       }
+
+       return sym;
+}
+
+const char *parse_task_states(struct tep_format_field *state_field)
+{
+       struct tep_print_flag_sym *sym;
+       struct tep_print_arg *arg;
+       struct tep_event *event;
+
+       event = state_field->event;
+
+       /*
+        * Look at the event format fields, and search for where
+        * the prev_state is parsed via the format flags.
+        */
+       for (arg = event->print_fmt.args; arg; arg = arg->next) {
+               /*
+                * Currently, the __print_flags() for the prev_state
+                * is embedded in operations, so they too must be
+                * searched.
+                */
+               sym = search_op(state_field, arg);
+               if (sym)
+                       return convert_sym(sym);
+       }
+       return NULL;
+}
+
 void parse_ftrace_printk(struct tep_handle *pevent,
                         char *file, unsigned int size __maybe_unused)
 {
index a69ee29419f3c889f6e244413bcd5496661ec09e..bbf8b26bc8da2b3c438742036bfc86599b460e61 100644 (file)
@@ -15,6 +15,7 @@ struct perf_tool;
 struct thread;
 struct tep_plugin_list;
 struct evsel;
+struct tep_format_field;
 
 struct trace_event {
        struct tep_handle       *pevent;
@@ -51,6 +52,8 @@ int parse_event_file(struct tep_handle *pevent,
 unsigned long long
 raw_field_value(struct tep_event *event, const char *name, void *data);
 
+const char *parse_task_states(struct tep_format_field *state_field);
+
 void parse_proc_kallsyms(struct tep_handle *pevent, char *file, unsigned int size);
 void parse_ftrace_printk(struct tep_handle *pevent, char *file, unsigned int size);
 void parse_saved_cmdline(struct tep_handle *pevent, char *file, unsigned int size);
index 6013335a8daea58a4fe19c0b4a5041e522f6707d..b38d322734b4a7fa0b2981bfb9e90cb26284dfdd 100644 (file)
@@ -263,7 +263,7 @@ int unwind__get_entries(unwind_entry_cb_t cb, void *arg,
        struct unwind_info *ui, ui_buf = {
                .sample         = data,
                .thread         = thread,
-               .machine        = RC_CHK_ACCESS(thread__maps(thread))->machine,
+               .machine        = maps__machine((thread__maps(thread))),
                .cb             = cb,
                .arg            = arg,
                .max_stack      = max_stack,
index dac536e28360a2481956360ec1753d2079488925..6a5ac0faa6f42114d428c715c3812fbbac7d407d 100644 (file)
@@ -706,7 +706,7 @@ static int _unwind__prepare_access(struct maps *maps)
 {
        void *addr_space = unw_create_addr_space(&accessors, 0);
 
-       RC_CHK_ACCESS(maps)->addr_space = addr_space;
+       maps__set_addr_space(maps, addr_space);
        if (!addr_space) {
                pr_err("unwind: Can't create unwind address space.\n");
                return -ENOMEM;
index 76cd63de80a8efef85cf91645b4d77a746f6c32b..2728eb4f13eab7943450db5b6ab01fd257e1f415 100644 (file)
@@ -12,11 +12,6 @@ struct unwind_libunwind_ops __weak *local_unwind_libunwind_ops;
 struct unwind_libunwind_ops __weak *x86_32_unwind_libunwind_ops;
 struct unwind_libunwind_ops __weak *arm64_unwind_libunwind_ops;
 
-static void unwind__register_ops(struct maps *maps, struct unwind_libunwind_ops *ops)
-{
-       RC_CHK_ACCESS(maps)->unwind_libunwind_ops = ops;
-}
-
 int unwind__prepare_access(struct maps *maps, struct map *map, bool *initialized)
 {
        const char *arch;
@@ -60,7 +55,7 @@ int unwind__prepare_access(struct maps *maps, struct map *map, bool *initialized
                return 0;
        }
 out_register:
-       unwind__register_ops(maps, ops);
+       maps__set_unwind_libunwind_ops(maps, ops);
 
        err = maps__unwind_libunwind_ops(maps)->prepare_access(maps);
        if (initialized)
index c1fd9ba6d697dd4319defb15c219cc4d8fc5b874..4f561e5e41622fbfe11dea9f7ae06e31c259953a 100644 (file)
@@ -552,3 +552,22 @@ int sched_getcpu(void)
        return -1;
 }
 #endif
+
+#ifndef HAVE_SCANDIRAT_SUPPORT
+int scandirat(int dirfd, const char *dirp,
+             struct dirent ***namelist,
+             int (*filter)(const struct dirent *),
+             int (*compar)(const struct dirent **, const struct dirent **))
+{
+       char path[PATH_MAX];
+       int err, fd = openat(dirfd, dirp, O_PATH);
+
+       if (fd < 0)
+               return fd;
+
+       snprintf(path, sizeof(path), "/proc/%d/fd/%d", getpid(), fd);
+       err = scandir(path, namelist, filter, compar);
+       close(fd);
+       return err;
+}
+#endif
index 7c8915d92dca20ace7e679d64126d95920584854..9966c21aaf0484797586855c10b6185414a4cb11 100644 (file)
@@ -6,6 +6,7 @@
 /* glibc 2.20 deprecates _BSD_SOURCE in favour of _DEFAULT_SOURCE */
 #define _DEFAULT_SOURCE 1
 
+#include <dirent.h>
 #include <fcntl.h>
 #include <stdbool.h>
 #include <stddef.h>
@@ -56,6 +57,13 @@ int perf_tip(char **strp, const char *dirpath);
 int sched_getcpu(void);
 #endif
 
+#ifndef HAVE_SCANDIRAT_SUPPORT
+int scandirat(int dirfd, const char *dirp,
+             struct dirent ***namelist,
+             int (*filter)(const struct dirent *),
+             int (*compar)(const struct dirent **, const struct dirent **));
+#endif
+
 extern bool perf_singlethreaded;
 
 void perf_set_singlethreaded(void);
index 6fba29f3222db2dc8e0bb972d01a39f72af58f56..0aa4005017c72f10bb7c5e47bc78ec81718c47ef 100644 (file)
@@ -1,7 +1,7 @@
 # SPDX-License-Identifier: GPL-2.0
 ifneq ($(O),)
 ifeq ($(origin O), command line)
-       dummy := $(if $(shell cd $(PWD); test -d $(O) || echo $(O)),$(error O=$(O) does not exist),)
+        $(if $(shell cd $(PWD); test -d $(O) || echo $(O)),$(error O=$(O) does not exist),)
        ABSOLUTE_O := $(shell cd $(PWD); cd $(O) ; pwd)
        OUTPUT := $(ABSOLUTE_O)/$(if $(subdir),$(subdir)/)
        COMMAND_O := O=$(ABSOLUTE_O)
index 47fdaa146443001491dc0ec9b56cf5e0677b49e6..f0e488ed90d8959dc5edb83da057741ad6608384 100644 (file)
@@ -52,7 +52,7 @@ static int get_zswap_stored_pages(size_t *value)
 
 static int get_cg_wb_count(const char *cg)
 {
-       return cg_read_key_long(cg, "memory.stat", "zswp_wb");
+       return cg_read_key_long(cg, "memory.stat", "zswpwb");
 }
 
 static long get_zswpout(const char *cgroup)
@@ -60,6 +60,27 @@ static long get_zswpout(const char *cgroup)
        return cg_read_key_long(cgroup, "memory.stat", "zswpout ");
 }
 
+static int allocate_and_read_bytes(const char *cgroup, void *arg)
+{
+       size_t size = (size_t)arg;
+       char *mem = (char *)malloc(size);
+       int ret = 0;
+
+       if (!mem)
+               return -1;
+       for (int i = 0; i < size; i += 4095)
+               mem[i] = 'a';
+
+       /* Go through the allocated memory to (z)swap in and out pages */
+       for (int i = 0; i < size; i += 4095) {
+               if (mem[i] != 'a')
+                       ret = -1;
+       }
+
+       free(mem);
+       return ret;
+}
+
 static int allocate_bytes(const char *cgroup, void *arg)
 {
        size_t size = (size_t)arg;
@@ -100,7 +121,6 @@ static int test_zswap_usage(const char *root)
        int ret = KSFT_FAIL;
        char *test_group;
 
-       /* Set up */
        test_group = cg_name(root, "no_shrink_test");
        if (!test_group)
                goto out;
@@ -133,6 +153,101 @@ out:
        return ret;
 }
 
+/*
+ * Check that when memory.zswap.max = 0, no pages can go to the zswap pool for
+ * the cgroup.
+ */
+static int test_swapin_nozswap(const char *root)
+{
+       int ret = KSFT_FAIL;
+       char *test_group;
+       long swap_peak, zswpout;
+
+       test_group = cg_name(root, "no_zswap_test");
+       if (!test_group)
+               goto out;
+       if (cg_create(test_group))
+               goto out;
+       if (cg_write(test_group, "memory.max", "8M"))
+               goto out;
+       if (cg_write(test_group, "memory.zswap.max", "0"))
+               goto out;
+
+       /* Allocate and read more than memory.max to trigger swapin */
+       if (cg_run(test_group, allocate_and_read_bytes, (void *)MB(32)))
+               goto out;
+
+       /* Verify that pages are swapped out, but no zswap happened */
+       swap_peak = cg_read_long(test_group, "memory.swap.peak");
+       if (swap_peak < 0) {
+               ksft_print_msg("failed to get cgroup's swap_peak\n");
+               goto out;
+       }
+
+       if (swap_peak < MB(24)) {
+               ksft_print_msg("at least 24MB of memory should be swapped out\n");
+               goto out;
+       }
+
+       zswpout = get_zswpout(test_group);
+       if (zswpout < 0) {
+               ksft_print_msg("failed to get zswpout\n");
+               goto out;
+       }
+
+       if (zswpout > 0) {
+               ksft_print_msg("zswapout > 0 when memory.zswap.max = 0\n");
+               goto out;
+       }
+
+       ret = KSFT_PASS;
+
+out:
+       cg_destroy(test_group);
+       free(test_group);
+       return ret;
+}
+
+/* Simple test to verify the (z)swapin code paths */
+static int test_zswapin(const char *root)
+{
+       int ret = KSFT_FAIL;
+       char *test_group;
+       long zswpin;
+
+       test_group = cg_name(root, "zswapin_test");
+       if (!test_group)
+               goto out;
+       if (cg_create(test_group))
+               goto out;
+       if (cg_write(test_group, "memory.max", "8M"))
+               goto out;
+       if (cg_write(test_group, "memory.zswap.max", "max"))
+               goto out;
+
+       /* Allocate and read more than memory.max to trigger (z)swap in */
+       if (cg_run(test_group, allocate_and_read_bytes, (void *)MB(32)))
+               goto out;
+
+       zswpin = cg_read_key_long(test_group, "memory.stat", "zswpin ");
+       if (zswpin < 0) {
+               ksft_print_msg("failed to get zswpin\n");
+               goto out;
+       }
+
+       if (zswpin < MB(24) / PAGE_SIZE) {
+               ksft_print_msg("at least 24MB should be brought back from zswap\n");
+               goto out;
+       }
+
+       ret = KSFT_PASS;
+
+out:
+       cg_destroy(test_group);
+       free(test_group);
+       return ret;
+}
+
 /*
  * When trying to store a memcg page in zswap, if the memcg hits its memory
  * limit in zswap, writeback should affect only the zswapped pages of that
@@ -144,7 +259,6 @@ static int test_no_invasive_cgroup_shrink(const char *root)
        size_t control_allocation_size = MB(10);
        char *control_allocation, *wb_group = NULL, *control_group = NULL;
 
-       /* Set up */
        wb_group = setup_test_group_1M(root, "per_memcg_wb_test1");
        if (!wb_group)
                return KSFT_FAIL;
@@ -309,6 +423,8 @@ struct zswap_test {
        const char *name;
 } tests[] = {
        T(test_zswap_usage),
+       T(test_swapin_nozswap),
+       T(test_zswapin),
        T(test_no_kmem_bypass),
        T(test_no_invasive_cgroup_shrink),
 };
index c6c2965a660754e9d0acc8aeb676272a374bc4d0..e65ef9d9cedceb572e20bb390de4b6b1b4a4f54f 100644 (file)
@@ -1,2 +1,5 @@
 # SPDX-License-Identifier: GPL-2.0-only
 huge_count_read_write
+debugfs_target_ids_read_before_terminate_race
+debugfs_target_ids_pid_leak
+access_memory
index 8a1cc2bf1864a8949db2b8f74901ca1cf800bd88..789d6949c2471535dfbe77329c6f571f16f79e7b 100644 (file)
@@ -2,6 +2,8 @@
 # Makefile for damon selftests
 
 TEST_GEN_FILES += huge_count_read_write
+TEST_GEN_FILES += debugfs_target_ids_read_before_terminate_race
+TEST_GEN_FILES += debugfs_target_ids_pid_leak
 TEST_GEN_FILES += access_memory
 
 TEST_FILES = _chk_dependency.sh _debugfs_common.sh
@@ -9,9 +11,12 @@ TEST_PROGS = debugfs_attrs.sh debugfs_schemes.sh debugfs_target_ids.sh
 TEST_PROGS += debugfs_empty_targets.sh debugfs_huge_count_read_write.sh
 TEST_PROGS += debugfs_duplicate_context_creation.sh
 TEST_PROGS += debugfs_rm_non_contexts.sh
+TEST_PROGS += debugfs_target_ids_read_before_terminate_race.sh
+TEST_PROGS += debugfs_target_ids_pid_leak.sh
 TEST_PROGS += sysfs.sh sysfs_update_removed_scheme_dir.sh
 TEST_PROGS += sysfs_update_schemes_tried_regions_hang.py
 TEST_PROGS += sysfs_update_schemes_tried_regions_wss_estimation.py
+TEST_PROGS += damos_quota.py damos_apply_interval.py
 TEST_PROGS += reclaim.sh lru_sort.sh
 
 include ../lib.mk
index 0328ac0b5a5ed08f412837cec45aed553af76eb8..dda3a87dc00a267a365d1116c987423f3510a823 100644 (file)
@@ -4,7 +4,14 @@
 # Kselftest framework requirement - SKIP code is 4.
 ksft_skip=4
 
-DBGFS=/sys/kernel/debug/damon
+DBGFS=$(grep debugfs /proc/mounts --max-count 1 | awk '{print $2}')
+if [ "$DBGFS" = "" ]
+then
+       echo "debugfs not mounted"
+       exit $ksft_skip
+fi
+
+DBGFS+="/damon"
 
 if [ $EUID -ne 0 ];
 then
@@ -18,7 +25,14 @@ then
        exit $ksft_skip
 fi
 
-for f in attrs target_ids monitor_on
+if [ -f "$DBGFS/monitor_on_DEPRECATED" ]
+then
+       monitor_on_file="monitor_on_DEPRECATED"
+else
+       monitor_on_file="monitor_on"
+fi
+
+for f in attrs target_ids "$monitor_on_file"
 do
        if [ ! -f "$DBGFS/$f" ]
        then
@@ -28,7 +42,7 @@ do
 done
 
 permission_error="Operation not permitted"
-for f in attrs target_ids monitor_on
+for f in attrs target_ids "$monitor_on_file"
 do
        status=$( cat "$DBGFS/$f" 2>&1 )
        if [ "${status#*$permission_error}" != "$status" ]; then
index e98cf4b6a4b76fe002747708510f7243c9b44cd3..d23d7398a27a8d46e0d3e9bf8981f5718c310124 100644 (file)
@@ -70,18 +70,65 @@ class DamosAccessPattern:
         if err != None:
             return err
 
+class DamosQuota:
+    sz = None                   # size quota, in bytes
+    ms = None                   # time quota
+    reset_interval_ms = None    # quota reset interval
+    scheme = None               # owner scheme
+
+    def __init__(self, sz=0, ms=0, reset_interval_ms=0):
+        self.sz = sz
+        self.ms = ms
+        self.reset_interval_ms = reset_interval_ms
+
+    def sysfs_dir(self):
+        return os.path.join(self.scheme.sysfs_dir(), 'quotas')
+
+    def stage(self):
+        err = write_file(os.path.join(self.sysfs_dir(), 'bytes'), self.sz)
+        if err != None:
+            return err
+        err = write_file(os.path.join(self.sysfs_dir(), 'ms'), self.ms)
+        if err != None:
+            return err
+        err = write_file(os.path.join(self.sysfs_dir(), 'reset_interval_ms'),
+                         self.reset_interval_ms)
+        if err != None:
+            return err
+
+class DamosStats:
+    nr_tried = None
+    sz_tried = None
+    nr_applied = None
+    sz_applied = None
+    qt_exceeds = None
+
+    def __init__(self, nr_tried, sz_tried, nr_applied, sz_applied, qt_exceeds):
+        self.nr_tried = nr_tried
+        self.sz_tried = sz_tried
+        self.nr_applied = nr_applied
+        self.sz_applied = sz_applied
+        self.qt_exceeds = qt_exceeds
+
 class Damos:
     action = None
     access_pattern = None
-    # todo: Support quotas, watermarks, stats, tried_regions
+    quota = None
+    apply_interval_us = None
+    # todo: Support watermarks, stats, tried_regions
     idx = None
     context = None
     tried_bytes = None
+    stats = None
 
-    def __init__(self, action='stat', access_pattern=DamosAccessPattern()):
+    def __init__(self, action='stat', access_pattern=DamosAccessPattern(),
+                 quota=DamosQuota(), apply_interval_us=0):
         self.action = action
         self.access_pattern = access_pattern
         self.access_pattern.scheme = self
+        self.quota = quota
+        self.quota.scheme = self
+        self.apply_interval_us = apply_interval_us
 
     def sysfs_dir(self):
         return os.path.join(
@@ -94,13 +141,12 @@ class Damos:
         err = self.access_pattern.stage()
         if err != None:
             return err
-
-        # disable quotas
-        err = write_file(os.path.join(self.sysfs_dir(), 'quotas', 'ms'), '0')
+        err = write_file(os.path.join(self.sysfs_dir(), 'apply_interval_us'),
+                         '%d' % self.apply_interval_us)
         if err != None:
             return err
-        err = write_file(
-                os.path.join(self.sysfs_dir(), 'quotas', 'bytes'), '0')
+
+        err = self.quota.stage()
         if err != None:
             return err
 
@@ -298,6 +344,23 @@ class Kdamond:
                     return err
                 scheme.tried_bytes = int(content)
 
+    def update_schemes_stats(self):
+        err = write_file(os.path.join(self.sysfs_dir(), 'state'),
+                'update_schemes_stats')
+        if err != None:
+            return err
+        for context in self.contexts:
+            for scheme in context.schemes:
+                stat_values = []
+                for stat in ['nr_tried', 'sz_tried', 'nr_applied',
+                             'sz_applied', 'qt_exceeds']:
+                    content, err = read_file(
+                            os.path.join(scheme.sysfs_dir(), 'stats', stat))
+                    if err != None:
+                        return err
+                    stat_values.append(int(content))
+                scheme.stats = DamosStats(*stat_values)
+
 class Kdamonds:
     kdamonds = []
 
index 48989d4813ae8b890a1d4d185e0fc77f15e78791..aa995516870bc8c4e0fe70d5dda6d6257c990470 100644 (file)
@@ -45,6 +45,13 @@ test_content() {
 source ./_chk_dependency.sh
 
 damon_onoff="$DBGFS/monitor_on"
+if [ -f "$DBGFS/monitor_on_DEPRECATED" ]
+then
+       damon_onoff="$DBGFS/monitor_on_DEPRECATED"
+else
+       damon_onoff="$DBGFS/monitor_on"
+fi
+
 if [ $(cat "$damon_onoff") = "on" ]
 then
        echo "monitoring is on"
diff --git a/tools/testing/selftests/damon/damos_apply_interval.py b/tools/testing/selftests/damon/damos_apply_interval.py
new file mode 100644 (file)
index 0000000..f04d437
--- /dev/null
@@ -0,0 +1,67 @@
+#!/usr/bin/env python3
+# SPDX-License-Identifier: GPL-2.0
+
+import subprocess
+import time
+
+import _damon_sysfs
+
+def main():
+    # access two 10 MiB memory regions, 2 second per each
+    sz_region = 10 * 1024 * 1024
+    proc = subprocess.Popen(['./access_memory', '2', '%d' % sz_region, '2000'])
+
+    # Set quota up to 1 MiB per 100 ms
+    kdamonds = _damon_sysfs.Kdamonds([_damon_sysfs.Kdamond(
+            contexts=[_damon_sysfs.DamonCtx(
+                ops='vaddr',
+                targets=[_damon_sysfs.DamonTarget(pid=proc.pid)],
+                schemes=[
+                    _damon_sysfs.Damos(
+                        access_pattern=_damon_sysfs.DamosAccessPattern(
+                            # >= 25% access rate, >= 200ms age
+                            nr_accesses=[5, 20], age=[2, 2**64 - 1]),
+                        # aggregation interval (100 ms) is used
+                        apply_interval_us=0),
+                    # use 10ms apply interval
+                    _damon_sysfs.Damos(
+                        access_pattern=_damon_sysfs.DamosAccessPattern(
+                            # >= 25% access rate, >= 200ms age
+                            nr_accesses=[5, 20], age=[2, 2**64 - 1]),
+                        # explicitly set 10 ms apply interval
+                        apply_interval_us=10 * 1000)
+                    ] # schemes
+                )] # contexts
+            )]) # kdamonds
+
+    err = kdamonds.start()
+    if err != None:
+        print('kdamond start failed: %s' % err)
+        exit(1)
+
+    wss_collected = []
+    nr_quota_exceeds = 0
+    while proc.poll() == None:
+        time.sleep(0.1)
+        err = kdamonds.kdamonds[0].update_schemes_stats()
+        if err != None:
+            print('stats update failed: %s' % err)
+            exit(1)
+    schemes = kdamonds.kdamonds[0].contexts[0].schemes
+    nr_tried_stats = [s.stats.nr_tried for s in schemes]
+    if nr_tried_stats[0] == 0 or nr_tried_stats[1] == 0:
+        print('scheme(s) are not tried')
+        exit(1)
+
+    # Because the second scheme was having the apply interval that is ten times
+    # lower than that of the first scheme, the second scheme should be tried
+    # about ten times more frequently than the first scheme.  For possible
+    # timing errors, check if it was at least nine times more freuqnetly tried.
+    ratio = nr_tried_stats[1] / nr_tried_stats[0]
+    if ratio < 9:
+        print('%d / %d = %f (< 9)' %
+              (nr_tried_stats[1], nr_tried_stats[0], ratio))
+        exit(1)
+
+if __name__ == '__main__':
+    main()
diff --git a/tools/testing/selftests/damon/damos_quota.py b/tools/testing/selftests/damon/damos_quota.py
new file mode 100644 (file)
index 0000000..7d4c6bb
--- /dev/null
@@ -0,0 +1,67 @@
+#!/usr/bin/env python3
+# SPDX-License-Identifier: GPL-2.0
+
+import subprocess
+import time
+
+import _damon_sysfs
+
+def main():
+    # access two 10 MiB memory regions, 2 second per each
+    sz_region = 10 * 1024 * 1024
+    proc = subprocess.Popen(['./access_memory', '2', '%d' % sz_region, '2000'])
+
+    # Set quota up to 1 MiB per 100 ms
+    sz_quota = 1024 * 1024 # 1 MiB
+    quota_reset_interval = 100 # 100 ms
+    kdamonds = _damon_sysfs.Kdamonds([_damon_sysfs.Kdamond(
+            contexts=[_damon_sysfs.DamonCtx(
+                ops='vaddr',
+                targets=[_damon_sysfs.DamonTarget(pid=proc.pid)],
+                schemes=[_damon_sysfs.Damos(
+                    access_pattern=_damon_sysfs.DamosAccessPattern(
+                        # >= 25% access rate, >= 200ms age
+                        nr_accesses=[5, 20], age=[2, 2**64 - 1]),
+                    quota=_damon_sysfs.DamosQuota(
+                        sz=sz_quota, reset_interval_ms=quota_reset_interval)
+                    )] # schemes
+                )] # contexts
+            )]) # kdamonds
+
+    err = kdamonds.start()
+    if err != None:
+        print('kdamond start failed: %s' % err)
+        exit(1)
+
+    wss_collected = []
+    nr_quota_exceeds = 0
+    while proc.poll() == None:
+        time.sleep(0.1)
+        err = kdamonds.kdamonds[0].update_schemes_tried_bytes()
+        if err != None:
+            print('tried bytes update failed: %s' % err)
+            exit(1)
+        err = kdamonds.kdamonds[0].update_schemes_stats()
+        if err != None:
+            print('stats update failed: %s' % err)
+            exit(1)
+
+        scheme = kdamonds.kdamonds[0].contexts[0].schemes[0]
+        wss_collected.append(scheme.tried_bytes)
+        nr_quota_exceeds = scheme.stats.qt_exceeds
+
+    wss_collected.sort()
+    for wss in wss_collected:
+        if wss > sz_quota:
+            print('quota is not kept: %s > %s' % (wss, sz_quota))
+            print('collected samples are as below')
+            print('\n'.join(['%d' % wss for wss in wss_collected]))
+            exit(1)
+
+    if nr_quota_exceeds < len(wss_collected):
+        print('quota is not always exceeded: %d > %d' %
+              (len(wss_collected), nr_quota_exceeds))
+        exit(1)
+
+if __name__ == '__main__':
+    main()
index 87aff8083822f6e2701ad65671ac428310b3bfa0..effbea33dc1640752492b66b5e2c042744d277ec 100755 (executable)
@@ -8,6 +8,14 @@ source _debugfs_common.sh
 
 orig_target_ids=$(cat "$DBGFS/target_ids")
 echo "" > "$DBGFS/target_ids"
-orig_monitor_on=$(cat "$DBGFS/monitor_on")
-test_write_fail "$DBGFS/monitor_on" "on" "orig_monitor_on" "empty target ids"
+
+if [ -f "$DBGFS/monitor_on_DEPRECATED" ]
+then
+       monitor_on_file="$DBGFS/monitor_on_DEPRECATED"
+else
+       monitor_on_file="$DBGFS/monitor_on"
+fi
+
+orig_monitor_on=$(cat "$monitor_on_file")
+test_write_fail "$monitor_on_file" "on" "orig_monitor_on" "empty target ids"
 echo "$orig_target_ids" > "$DBGFS/target_ids"
diff --git a/tools/testing/selftests/damon/debugfs_target_ids_pid_leak.c b/tools/testing/selftests/damon/debugfs_target_ids_pid_leak.c
new file mode 100644 (file)
index 0000000..0cc2eef
--- /dev/null
@@ -0,0 +1,68 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Author: SeongJae Park <sj@kernel.org>
+ */
+
+#define _GNU_SOURCE
+
+#include <fcntl.h>
+#include <stdbool.h>
+#include <stdint.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <sys/types.h>
+#include <sys/wait.h>
+#include <sys/time.h>
+#include <unistd.h>
+
+#define DBGFS_TARGET_IDS "/sys/kernel/debug/damon/target_ids"
+
+static void write_targetid_exit(void)
+{
+       int target_ids_fd = open(DBGFS_TARGET_IDS, O_RDWR);
+       char pid_str[128];
+
+       snprintf(pid_str, sizeof(pid_str), "%d", getpid());
+       write(target_ids_fd, pid_str, sizeof(pid_str));
+       close(target_ids_fd);
+       exit(0);
+}
+
+unsigned long msec_timestamp(void)
+{
+       struct timeval tv;
+
+       gettimeofday(&tv, NULL);
+       return tv.tv_sec * 1000UL + tv.tv_usec / 1000;
+}
+
+int main(int argc, char *argv[])
+{
+       unsigned long start_ms;
+       int time_to_run, nr_forks = 0;
+
+       if (argc != 2) {
+               fprintf(stderr, "Usage: %s <msecs to run>\n", argv[0]);
+               exit(1);
+       }
+       time_to_run = atoi(argv[1]);
+
+       start_ms = msec_timestamp();
+       while (true) {
+               int pid = fork();
+
+               if (pid < 0) {
+                       fprintf(stderr, "fork() failed\n");
+                       exit(1);
+               }
+               if (pid == 0)
+                       write_targetid_exit();
+               wait(NULL);
+               nr_forks++;
+
+               if (msec_timestamp() - start_ms > time_to_run)
+                       break;
+       }
+       printf("%d\n", nr_forks);
+       return 0;
+}
diff --git a/tools/testing/selftests/damon/debugfs_target_ids_pid_leak.sh b/tools/testing/selftests/damon/debugfs_target_ids_pid_leak.sh
new file mode 100644 (file)
index 0000000..31fe33c
--- /dev/null
@@ -0,0 +1,22 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0
+
+before=$(grep "^pid " /proc/slabinfo | awk '{print $2}')
+
+nr_leaks=$(./debugfs_target_ids_pid_leak 1000)
+expected_after_max=$((before + nr_leaks / 2))
+
+after=$(grep "^pid " /proc/slabinfo | awk '{print $2}')
+
+echo > /sys/kernel/debug/damon/target_ids
+
+echo "tried $nr_leaks pid leak"
+echo "number of active pid slabs: $before -> $after"
+echo "(up to $expected_after_max expected)"
+if [ $after -gt $expected_after_max ]
+then
+       echo "maybe pids are leaking"
+       exit 1
+else
+       exit 0
+fi
diff --git a/tools/testing/selftests/damon/debugfs_target_ids_read_before_terminate_race.c b/tools/testing/selftests/damon/debugfs_target_ids_read_before_terminate_race.c
new file mode 100644 (file)
index 0000000..b06f52a
--- /dev/null
@@ -0,0 +1,80 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Author: SeongJae Park <sj@kernel.org>
+ */
+#define _GNU_SOURCE
+
+#include <fcntl.h>
+#include <stdbool.h>
+#include <stdint.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <sys/types.h>
+#include <sys/wait.h>
+#include <time.h>
+#include <unistd.h>
+
+#define DBGFS_MONITOR_ON "/sys/kernel/debug/damon/monitor_on_DEPRECATED"
+#define DBGFS_TARGET_IDS "/sys/kernel/debug/damon/target_ids"
+
+static void turn_damon_on_exit(void)
+{
+       int target_ids_fd = open(DBGFS_TARGET_IDS, O_RDWR);
+       int monitor_on_fd = open(DBGFS_MONITOR_ON, O_RDWR);
+       char pid_str[128];
+
+       snprintf(pid_str, sizeof(pid_str), "%d", getpid());
+       write(target_ids_fd, pid_str, sizeof(pid_str));
+       write(monitor_on_fd, "on\n", 3);
+       close(target_ids_fd);
+       close(monitor_on_fd);
+       usleep(1000);
+       exit(0);
+}
+
+static void try_race(void)
+{
+       int target_ids_fd = open(DBGFS_TARGET_IDS, O_RDWR);
+       int pid = fork();
+       int buf[256];
+
+       if (pid < 0) {
+               fprintf(stderr, "fork() failed\n");
+               exit(1);
+       }
+       if (pid == 0)
+               turn_damon_on_exit();
+       while (true) {
+               int status;
+
+               read(target_ids_fd, buf, sizeof(buf));
+               if (waitpid(-1, &status, WNOHANG) == pid)
+                       break;
+       }
+       close(target_ids_fd);
+}
+
+static inline uint64_t ts_to_ms(struct timespec *ts)
+{
+       return (uint64_t)ts->tv_sec * 1000 + (uint64_t)ts->tv_nsec / 1000000;
+}
+
+int main(int argc, char *argv[])
+{
+       struct timespec start_time, now;
+       int runtime_ms;
+
+       if (argc != 2) {
+               fprintf(stderr, "Usage: %s <runtime in ms>\n", argv[0]);
+               exit(1);
+       }
+       runtime_ms = atoi(argv[1]);
+       clock_gettime(CLOCK_MONOTONIC, &start_time);
+       while (true) {
+               try_race();
+               clock_gettime(CLOCK_MONOTONIC, &now);
+               if (ts_to_ms(&now) - ts_to_ms(&start_time) > runtime_ms)
+                       break;
+       }
+       return 0;
+}
diff --git a/tools/testing/selftests/damon/debugfs_target_ids_read_before_terminate_race.sh b/tools/testing/selftests/damon/debugfs_target_ids_read_before_terminate_race.sh
new file mode 100644 (file)
index 0000000..fc793c4
--- /dev/null
@@ -0,0 +1,14 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0
+
+dmesg -C
+
+./debugfs_target_ids_read_before_terminate_race 5000
+
+if dmesg | grep -q dbgfs_target_ids_read
+then
+       dmesg
+       exit 1
+else
+       exit 0
+fi
index 8c690ba1a5730a7395ac74678f749129f25f2a4b..28c887a0108fde91c252c8b1a92fb27e3a0db337 100644 (file)
@@ -20,7 +20,7 @@ def main():
 
     err = kdamonds.start()
     if err != None:
-        print('kdmaond start failed: %s' % err)
+        print('kdamond start failed: %s' % err)
         exit(1)
 
     while proc.poll() == None:
index cdbf19b442c98f1ff6e4ed9577034852853fd60a..90ad7409a7a6bb15293ae60997969987dde7fa84 100644 (file)
@@ -23,7 +23,7 @@ def main():
 
     err = kdamonds.start()
     if err != None:
-        print('kdmaond start failed: %s' % err)
+        print('kdamond start failed: %s' % err)
         exit(1)
 
     wss_collected = []
similarity index 60%
rename from drivers/crypto/vmx/.gitignore
rename to tools/testing/selftests/filesystems/eventfd/.gitignore
index 7aa71d83f739b906d5a094ef263c3bc0edf1c446..483faf59fe4adbf0b3eb34e7db8ae11b40b6d3ee 100644 (file)
@@ -1,3 +1,2 @@
 # SPDX-License-Identifier: GPL-2.0-only
-aesp8-ppc.S
-ghashp8-ppc.S
+eventfd_test
diff --git a/tools/testing/selftests/filesystems/eventfd/Makefile b/tools/testing/selftests/filesystems/eventfd/Makefile
new file mode 100644 (file)
index 0000000..0a8e391
--- /dev/null
@@ -0,0 +1,7 @@
+# SPDX-License-Identifier: GPL-2.0
+
+CFLAGS += $(KHDR_INCLUDES)
+LDLIBS += -lpthread
+TEST_GEN_PROGS := eventfd_test
+
+include ../../lib.mk
diff --git a/tools/testing/selftests/filesystems/eventfd/eventfd_test.c b/tools/testing/selftests/filesystems/eventfd/eventfd_test.c
new file mode 100644 (file)
index 0000000..f142a13
--- /dev/null
@@ -0,0 +1,186 @@
+// SPDX-License-Identifier: GPL-2.0
+
+#define _GNU_SOURCE
+#include <errno.h>
+#include <fcntl.h>
+#include <asm/unistd.h>
+#include <linux/time_types.h>
+#include <unistd.h>
+#include <assert.h>
+#include <signal.h>
+#include <pthread.h>
+#include <sys/epoll.h>
+#include <sys/eventfd.h>
+#include "../../kselftest_harness.h"
+
+struct error {
+       int  code;
+       char msg[512];
+};
+
+static int error_set(struct error *err, int code, const char *fmt, ...)
+{
+       va_list args;
+       int r;
+
+       if (code == 0 || !err || err->code != 0)
+               return code;
+
+       err->code = code;
+       va_start(args, fmt);
+       r = vsnprintf(err->msg, sizeof(err->msg), fmt, args);
+       assert((size_t)r < sizeof(err->msg));
+       va_end(args);
+
+       return code;
+}
+
+static inline int sys_eventfd2(unsigned int count, int flags)
+{
+       return syscall(__NR_eventfd2, count, flags);
+}
+
+TEST(eventfd01)
+{
+       int fd, flags;
+
+       fd = sys_eventfd2(0, 0);
+       ASSERT_GE(fd, 0);
+
+       flags = fcntl(fd, F_GETFL);
+       // since the kernel automatically added O_RDWR.
+       EXPECT_EQ(flags, O_RDWR);
+
+       close(fd);
+}
+
+TEST(eventfd02)
+{
+       int fd, flags;
+
+       fd = sys_eventfd2(0, EFD_CLOEXEC);
+       ASSERT_GE(fd, 0);
+
+       flags = fcntl(fd, F_GETFD);
+       ASSERT_GT(flags, -1);
+       EXPECT_EQ(flags, FD_CLOEXEC);
+
+       close(fd);
+}
+
+TEST(eventfd03)
+{
+       int fd, flags;
+
+       fd = sys_eventfd2(0, EFD_NONBLOCK);
+       ASSERT_GE(fd, 0);
+
+       flags = fcntl(fd, F_GETFL);
+       ASSERT_GT(flags, -1);
+       EXPECT_EQ(flags & EFD_NONBLOCK, EFD_NONBLOCK);
+       EXPECT_EQ(flags & O_RDWR, O_RDWR);
+
+       close(fd);
+}
+
+TEST(eventfd04)
+{
+       int fd, flags;
+
+       fd = sys_eventfd2(0, EFD_CLOEXEC|EFD_NONBLOCK);
+       ASSERT_GE(fd, 0);
+
+       flags = fcntl(fd, F_GETFL);
+       ASSERT_GT(flags, -1);
+       EXPECT_EQ(flags & EFD_NONBLOCK, EFD_NONBLOCK);
+       EXPECT_EQ(flags & O_RDWR, O_RDWR);
+
+       flags = fcntl(fd, F_GETFD);
+       ASSERT_GT(flags, -1);
+       EXPECT_EQ(flags, FD_CLOEXEC);
+
+       close(fd);
+}
+
+static inline void trim_newline(char *str)
+{
+       char *pos = strrchr(str, '\n');
+
+       if (pos)
+               *pos = '\0';
+}
+
+static int verify_fdinfo(int fd, struct error *err, const char *prefix,
+               size_t prefix_len, const char *expect, ...)
+{
+       char buffer[512] = {0, };
+       char path[512] = {0, };
+       va_list args;
+       FILE *f;
+       char *line = NULL;
+       size_t n = 0;
+       int found = 0;
+       int r;
+
+       va_start(args, expect);
+       r = vsnprintf(buffer, sizeof(buffer), expect, args);
+       assert((size_t)r < sizeof(buffer));
+       va_end(args);
+
+       snprintf(path, sizeof(path), "/proc/self/fdinfo/%d", fd);
+       f = fopen(path, "re");
+       if (!f)
+               return error_set(err, -1, "fdinfo open failed for %d", fd);
+
+       while (getline(&line, &n, f) != -1) {
+               char *val;
+
+               if (strncmp(line, prefix, prefix_len))
+                       continue;
+
+               found = 1;
+
+               val = line + prefix_len;
+               r = strcmp(val, buffer);
+               if (r != 0) {
+                       trim_newline(line);
+                       trim_newline(buffer);
+                       error_set(err, -1, "%s '%s' != '%s'",
+                                 prefix, val, buffer);
+               }
+               break;
+       }
+
+       free(line);
+       fclose(f);
+
+       if (found == 0)
+               return error_set(err, -1, "%s not found for fd %d",
+                                prefix, fd);
+
+       return 0;
+}
+
+TEST(eventfd05)
+{
+       struct error err = {0};
+       int fd, ret;
+
+       fd = sys_eventfd2(0, EFD_SEMAPHORE);
+       ASSERT_GE(fd, 0);
+
+       ret = fcntl(fd, F_GETFL);
+       ASSERT_GT(ret, -1);
+       EXPECT_EQ(ret & O_RDWR, O_RDWR);
+
+       // The semaphore could only be obtained from fdinfo.
+       ret = verify_fdinfo(fd, &err, "eventfd-semaphore: ", 19, "1\n");
+       if (ret != 0)
+               ksft_print_msg("eventfd-semaphore check failed, msg: %s\n",
+                               err.msg);
+       EXPECT_EQ(ret, 0);
+
+       close(fd);
+}
+
+TEST_HARNESS_MAIN
index 492e937fab00648d5dbda4e1c98bdb1840468fde..741c7dc16afc750592706aa8e71d1452096843fb 100644 (file)
@@ -36,7 +36,9 @@ LIBKVM_x86_64 += lib/x86_64/apic.c
 LIBKVM_x86_64 += lib/x86_64/handlers.S
 LIBKVM_x86_64 += lib/x86_64/hyperv.c
 LIBKVM_x86_64 += lib/x86_64/memstress.c
+LIBKVM_x86_64 += lib/x86_64/pmu.c
 LIBKVM_x86_64 += lib/x86_64/processor.c
+LIBKVM_x86_64 += lib/x86_64/sev.c
 LIBKVM_x86_64 += lib/x86_64/svm.c
 LIBKVM_x86_64 += lib/x86_64/ucall.c
 LIBKVM_x86_64 += lib/x86_64/vmx.c
@@ -53,6 +55,7 @@ LIBKVM_s390x += lib/s390x/diag318_test_handler.c
 LIBKVM_s390x += lib/s390x/processor.c
 LIBKVM_s390x += lib/s390x/ucall.c
 
+LIBKVM_riscv += lib/riscv/handlers.S
 LIBKVM_riscv += lib/riscv/processor.c
 LIBKVM_riscv += lib/riscv/ucall.c
 
@@ -80,6 +83,7 @@ TEST_GEN_PROGS_x86_64 += x86_64/kvm_pv_test
 TEST_GEN_PROGS_x86_64 += x86_64/monitor_mwait_test
 TEST_GEN_PROGS_x86_64 += x86_64/nested_exceptions_test
 TEST_GEN_PROGS_x86_64 += x86_64/platform_info_test
+TEST_GEN_PROGS_x86_64 += x86_64/pmu_counters_test
 TEST_GEN_PROGS_x86_64 += x86_64/pmu_event_filter_test
 TEST_GEN_PROGS_x86_64 += x86_64/private_mem_conversions_test
 TEST_GEN_PROGS_x86_64 += x86_64/private_mem_kvm_exits_test
@@ -117,6 +121,7 @@ TEST_GEN_PROGS_x86_64 += x86_64/vmx_pmu_caps_test
 TEST_GEN_PROGS_x86_64 += x86_64/xen_shinfo_test
 TEST_GEN_PROGS_x86_64 += x86_64/xen_vmcall_test
 TEST_GEN_PROGS_x86_64 += x86_64/sev_migrate_tests
+TEST_GEN_PROGS_x86_64 += x86_64/sev_smoke_test
 TEST_GEN_PROGS_x86_64 += x86_64/amx_test
 TEST_GEN_PROGS_x86_64 += x86_64/max_vcpuid_cap_test
 TEST_GEN_PROGS_x86_64 += x86_64/triple_fault_event_test
@@ -143,7 +148,6 @@ TEST_GEN_PROGS_x86_64 += system_counter_offset_test
 TEST_GEN_PROGS_EXTENDED_x86_64 += x86_64/nx_huge_pages_test
 
 TEST_GEN_PROGS_aarch64 += aarch64/aarch32_id_regs
-TEST_GEN_PROGS_aarch64 += aarch64/arch_timer
 TEST_GEN_PROGS_aarch64 += aarch64/debug-exceptions
 TEST_GEN_PROGS_aarch64 += aarch64/hypercalls
 TEST_GEN_PROGS_aarch64 += aarch64/page_fault_test
@@ -155,6 +159,7 @@ TEST_GEN_PROGS_aarch64 += aarch64/vgic_init
 TEST_GEN_PROGS_aarch64 += aarch64/vgic_irq
 TEST_GEN_PROGS_aarch64 += aarch64/vpmu_counter_access
 TEST_GEN_PROGS_aarch64 += access_tracking_perf_test
+TEST_GEN_PROGS_aarch64 += arch_timer
 TEST_GEN_PROGS_aarch64 += demand_paging_test
 TEST_GEN_PROGS_aarch64 += dirty_log_test
 TEST_GEN_PROGS_aarch64 += dirty_log_perf_test
@@ -184,6 +189,7 @@ TEST_GEN_PROGS_s390x += rseq_test
 TEST_GEN_PROGS_s390x += set_memory_region_test
 TEST_GEN_PROGS_s390x += kvm_binary_stats_test
 
+TEST_GEN_PROGS_riscv += arch_timer
 TEST_GEN_PROGS_riscv += demand_paging_test
 TEST_GEN_PROGS_riscv += dirty_log_test
 TEST_GEN_PROGS_riscv += get-reg-list
@@ -194,6 +200,7 @@ TEST_GEN_PROGS_riscv += kvm_page_table_test
 TEST_GEN_PROGS_riscv += set_memory_region_test
 TEST_GEN_PROGS_riscv += steal_time
 
+SPLIT_TESTS += arch_timer
 SPLIT_TESTS += get-reg-list
 
 TEST_PROGS += $(TEST_PROGS_$(ARCH_DIR))
@@ -217,7 +224,7 @@ else
 LINUX_TOOL_ARCH_INCLUDE = $(top_srcdir)/tools/arch/$(ARCH)/include
 endif
 CFLAGS += -Wall -Wstrict-prototypes -Wuninitialized -O2 -g -std=gnu99 \
-       -Wno-gnu-variable-sized-type-not-at-end -MD -MP \
+       -Wno-gnu-variable-sized-type-not-at-end -MD -MP -DCONFIG_64BIT \
        -fno-builtin-memcmp -fno-builtin-memcpy -fno-builtin-memset \
        -fno-builtin-strnlen \
        -fno-stack-protector -fno-PIE -I$(LINUX_TOOL_INCLUDE) \
@@ -260,32 +267,36 @@ LIBKVM_C_OBJ := $(patsubst %.c, $(OUTPUT)/%.o, $(LIBKVM_C))
 LIBKVM_S_OBJ := $(patsubst %.S, $(OUTPUT)/%.o, $(LIBKVM_S))
 LIBKVM_STRING_OBJ := $(patsubst %.c, $(OUTPUT)/%.o, $(LIBKVM_STRING))
 LIBKVM_OBJS = $(LIBKVM_C_OBJ) $(LIBKVM_S_OBJ) $(LIBKVM_STRING_OBJ)
-SPLIT_TESTS_TARGETS := $(patsubst %, $(OUTPUT)/%, $(SPLIT_TESTS))
-SPLIT_TESTS_OBJS := $(patsubst %, $(ARCH_DIR)/%.o, $(SPLIT_TESTS))
+SPLIT_TEST_GEN_PROGS := $(patsubst %, $(OUTPUT)/%, $(SPLIT_TESTS))
+SPLIT_TEST_GEN_OBJ := $(patsubst %, $(OUTPUT)/$(ARCH_DIR)/%.o, $(SPLIT_TESTS))
 
 TEST_GEN_OBJ = $(patsubst %, %.o, $(TEST_GEN_PROGS))
 TEST_GEN_OBJ += $(patsubst %, %.o, $(TEST_GEN_PROGS_EXTENDED))
 TEST_DEP_FILES = $(patsubst %.o, %.d, $(TEST_GEN_OBJ))
 TEST_DEP_FILES += $(patsubst %.o, %.d, $(LIBKVM_OBJS))
-TEST_DEP_FILES += $(patsubst %.o, %.d, $(SPLIT_TESTS_OBJS))
+TEST_DEP_FILES += $(patsubst %.o, %.d, $(SPLIT_TEST_GEN_OBJ))
 -include $(TEST_DEP_FILES)
 
-$(TEST_GEN_PROGS) $(TEST_GEN_PROGS_EXTENDED): %: %.o
+$(shell mkdir -p $(sort $(OUTPUT)/$(ARCH_DIR) $(dir $(LIBKVM_C_OBJ) $(LIBKVM_S_OBJ))))
+
+$(filter-out $(SPLIT_TEST_GEN_PROGS), $(TEST_GEN_PROGS)) \
+$(TEST_GEN_PROGS_EXTENDED): %: %.o
        $(CC) $(CFLAGS) $(CPPFLAGS) $(LDFLAGS) $(TARGET_ARCH) $< $(LIBKVM_OBJS) $(LDLIBS) -o $@
 $(TEST_GEN_OBJ): $(OUTPUT)/%.o: %.c
        $(CC) $(CFLAGS) $(CPPFLAGS) $(TARGET_ARCH) -c $< -o $@
 
-$(SPLIT_TESTS_TARGETS): %: %.o $(SPLIT_TESTS_OBJS)
+$(SPLIT_TEST_GEN_PROGS): $(OUTPUT)/%: $(OUTPUT)/%.o $(OUTPUT)/$(ARCH_DIR)/%.o
        $(CC) $(CFLAGS) $(CPPFLAGS) $(LDFLAGS) $(TARGET_ARCH) $^ $(LDLIBS) -o $@
+$(SPLIT_TEST_GEN_OBJ): $(OUTPUT)/$(ARCH_DIR)/%.o: $(ARCH_DIR)/%.c
+       $(CC) $(CFLAGS) $(CPPFLAGS) $(TARGET_ARCH) -c $< -o $@
 
 EXTRA_CLEAN += $(GEN_HDRS) \
               $(LIBKVM_OBJS) \
-              $(SPLIT_TESTS_OBJS) \
+              $(SPLIT_TEST_GEN_OBJ) \
               $(TEST_DEP_FILES) \
               $(TEST_GEN_OBJ) \
               cscope.*
 
-x := $(shell mkdir -p $(sort $(dir $(LIBKVM_C_OBJ) $(LIBKVM_S_OBJ))))
 $(LIBKVM_C_OBJ): $(OUTPUT)/%.o: %.c $(GEN_HDRS)
        $(CC) $(CFLAGS) $(CPPFLAGS) $(TARGET_ARCH) -c $< -o $@
 
@@ -298,8 +309,8 @@ $(LIBKVM_S_OBJ): $(OUTPUT)/%.o: %.S $(GEN_HDRS)
 $(LIBKVM_STRING_OBJ): $(OUTPUT)/%.o: %.c
        $(CC) $(CFLAGS) $(CPPFLAGS) $(TARGET_ARCH) -c -ffreestanding $< -o $@
 
-x := $(shell mkdir -p $(sort $(dir $(TEST_GEN_PROGS))))
-$(SPLIT_TESTS_OBJS): $(GEN_HDRS)
+$(shell mkdir -p $(sort $(dir $(TEST_GEN_PROGS))))
+$(SPLIT_TEST_GEN_OBJ): $(GEN_HDRS)
 $(TEST_GEN_PROGS): $(LIBKVM_OBJS)
 $(TEST_GEN_PROGS_EXTENDED): $(LIBKVM_OBJS)
 $(TEST_GEN_OBJ): $(GEN_HDRS)
index 2cb8dd1f8275fb0a83e1a4cb605e7b63c46ef221..ddba2c2fb5deb1b2ec2ab02db352bada7e70feee 100644 (file)
@@ -1,64 +1,19 @@
 // SPDX-License-Identifier: GPL-2.0-only
 /*
- * arch_timer.c - Tests the aarch64 timer IRQ functionality
- *
  * The test validates both the virtual and physical timer IRQs using
- * CVAL and TVAL registers. This consitutes the four stages in the test.
- * The guest's main thread configures the timer interrupt for a stage
- * and waits for it to fire, with a timeout equal to the timer period.
- * It asserts that the timeout doesn't exceed the timer period.
- *
- * On the other hand, upon receipt of an interrupt, the guest's interrupt
- * handler validates the interrupt by checking if the architectural state
- * is in compliance with the specifications.
- *
- * The test provides command-line options to configure the timer's
- * period (-p), number of vCPUs (-n), and iterations per stage (-i).
- * To stress-test the timer stack even more, an option to migrate the
- * vCPUs across pCPUs (-m), at a particular rate, is also provided.
+ * CVAL and TVAL registers.
  *
  * Copyright (c) 2021, Google LLC.
  */
 #define _GNU_SOURCE
 
-#include <stdlib.h>
-#include <pthread.h>
-#include <linux/kvm.h>
-#include <linux/sizes.h>
-#include <linux/bitmap.h>
-#include <sys/sysinfo.h>
-
-#include "kvm_util.h"
-#include "processor.h"
-#include "delay.h"
 #include "arch_timer.h"
+#include "delay.h"
 #include "gic.h"
+#include "processor.h"
+#include "timer_test.h"
 #include "vgic.h"
 
-#define NR_VCPUS_DEF                   4
-#define NR_TEST_ITERS_DEF              5
-#define TIMER_TEST_PERIOD_MS_DEF       10
-#define TIMER_TEST_ERR_MARGIN_US       100
-#define TIMER_TEST_MIGRATION_FREQ_MS   2
-
-struct test_args {
-       int nr_vcpus;
-       int nr_iter;
-       int timer_period_ms;
-       int migration_freq_ms;
-       struct kvm_arm_counter_offset offset;
-};
-
-static struct test_args test_args = {
-       .nr_vcpus = NR_VCPUS_DEF,
-       .nr_iter = NR_TEST_ITERS_DEF,
-       .timer_period_ms = TIMER_TEST_PERIOD_MS_DEF,
-       .migration_freq_ms = TIMER_TEST_MIGRATION_FREQ_MS,
-       .offset = { .reserved = 1 },
-};
-
-#define msecs_to_usecs(msec)           ((msec) * 1000LL)
-
 #define GICD_BASE_GPA                  0x8000000ULL
 #define GICR_BASE_GPA                  0x80A0000ULL
 
@@ -70,22 +25,8 @@ enum guest_stage {
        GUEST_STAGE_MAX,
 };
 
-/* Shared variables between host and guest */
-struct test_vcpu_shared_data {
-       int nr_iter;
-       enum guest_stage guest_stage;
-       uint64_t xcnt;
-};
-
-static struct kvm_vcpu *vcpus[KVM_MAX_VCPUS];
-static pthread_t pt_vcpu_run[KVM_MAX_VCPUS];
-static struct test_vcpu_shared_data vcpu_shared_data[KVM_MAX_VCPUS];
-
 static int vtimer_irq, ptimer_irq;
 
-static unsigned long *vcpu_done_map;
-static pthread_mutex_t vcpu_done_map_lock;
-
 static void
 guest_configure_timer_action(struct test_vcpu_shared_data *shared_data)
 {
@@ -158,9 +99,9 @@ static void guest_validate_irq(unsigned int intid,
 
        /* Basic 'timer condition met' check */
        __GUEST_ASSERT(xcnt >= cval,
-                      "xcnt = 0x%llx, cval = 0x%llx, xcnt_diff_us = 0x%llx",
+                      "xcnt = 0x%lx, cval = 0x%lx, xcnt_diff_us = 0x%lx",
                       xcnt, cval, xcnt_diff_us);
-       __GUEST_ASSERT(xctl & CTL_ISTATUS, "xcnt = 0x%llx", xcnt);
+       __GUEST_ASSERT(xctl & CTL_ISTATUS, "xctl = 0x%lx", xctl);
 
        WRITE_ONCE(shared_data->nr_iter, shared_data->nr_iter + 1);
 }
@@ -190,10 +131,14 @@ static void guest_run_stage(struct test_vcpu_shared_data *shared_data,
 
                /* Setup a timeout for the interrupt to arrive */
                udelay(msecs_to_usecs(test_args.timer_period_ms) +
-                       TIMER_TEST_ERR_MARGIN_US);
+                       test_args.timer_err_margin_us);
 
                irq_iter = READ_ONCE(shared_data->nr_iter);
-               GUEST_ASSERT_EQ(config_iter + 1, irq_iter);
+               __GUEST_ASSERT(config_iter + 1 == irq_iter,
+                               "config_iter + 1 = 0x%lx, irq_iter = 0x%lx.\n"
+                               "  Guest timer interrupt was not trigged within the specified\n"
+                               "  interval, try to increase the error margin by [-e] option.\n",
+                               config_iter + 1, irq_iter);
        }
 }
 
@@ -222,137 +167,6 @@ static void guest_code(void)
        GUEST_DONE();
 }
 
-static void *test_vcpu_run(void *arg)
-{
-       unsigned int vcpu_idx = (unsigned long)arg;
-       struct ucall uc;
-       struct kvm_vcpu *vcpu = vcpus[vcpu_idx];
-       struct kvm_vm *vm = vcpu->vm;
-       struct test_vcpu_shared_data *shared_data = &vcpu_shared_data[vcpu_idx];
-
-       vcpu_run(vcpu);
-
-       /* Currently, any exit from guest is an indication of completion */
-       pthread_mutex_lock(&vcpu_done_map_lock);
-       __set_bit(vcpu_idx, vcpu_done_map);
-       pthread_mutex_unlock(&vcpu_done_map_lock);
-
-       switch (get_ucall(vcpu, &uc)) {
-       case UCALL_SYNC:
-       case UCALL_DONE:
-               break;
-       case UCALL_ABORT:
-               sync_global_from_guest(vm, *shared_data);
-               fprintf(stderr, "Guest assert failed,  vcpu %u; stage; %u; iter: %u\n",
-                       vcpu_idx, shared_data->guest_stage, shared_data->nr_iter);
-               REPORT_GUEST_ASSERT(uc);
-               break;
-       default:
-               TEST_FAIL("Unexpected guest exit");
-       }
-
-       return NULL;
-}
-
-static uint32_t test_get_pcpu(void)
-{
-       uint32_t pcpu;
-       unsigned int nproc_conf;
-       cpu_set_t online_cpuset;
-
-       nproc_conf = get_nprocs_conf();
-       sched_getaffinity(0, sizeof(cpu_set_t), &online_cpuset);
-
-       /* Randomly find an available pCPU to place a vCPU on */
-       do {
-               pcpu = rand() % nproc_conf;
-       } while (!CPU_ISSET(pcpu, &online_cpuset));
-
-       return pcpu;
-}
-
-static int test_migrate_vcpu(unsigned int vcpu_idx)
-{
-       int ret;
-       cpu_set_t cpuset;
-       uint32_t new_pcpu = test_get_pcpu();
-
-       CPU_ZERO(&cpuset);
-       CPU_SET(new_pcpu, &cpuset);
-
-       pr_debug("Migrating vCPU: %u to pCPU: %u\n", vcpu_idx, new_pcpu);
-
-       ret = pthread_setaffinity_np(pt_vcpu_run[vcpu_idx],
-                                    sizeof(cpuset), &cpuset);
-
-       /* Allow the error where the vCPU thread is already finished */
-       TEST_ASSERT(ret == 0 || ret == ESRCH,
-                   "Failed to migrate the vCPU:%u to pCPU: %u; ret: %d",
-                   vcpu_idx, new_pcpu, ret);
-
-       return ret;
-}
-
-static void *test_vcpu_migration(void *arg)
-{
-       unsigned int i, n_done;
-       bool vcpu_done;
-
-       do {
-               usleep(msecs_to_usecs(test_args.migration_freq_ms));
-
-               for (n_done = 0, i = 0; i < test_args.nr_vcpus; i++) {
-                       pthread_mutex_lock(&vcpu_done_map_lock);
-                       vcpu_done = test_bit(i, vcpu_done_map);
-                       pthread_mutex_unlock(&vcpu_done_map_lock);
-
-                       if (vcpu_done) {
-                               n_done++;
-                               continue;
-                       }
-
-                       test_migrate_vcpu(i);
-               }
-       } while (test_args.nr_vcpus != n_done);
-
-       return NULL;
-}
-
-static void test_run(struct kvm_vm *vm)
-{
-       pthread_t pt_vcpu_migration;
-       unsigned int i;
-       int ret;
-
-       pthread_mutex_init(&vcpu_done_map_lock, NULL);
-       vcpu_done_map = bitmap_zalloc(test_args.nr_vcpus);
-       TEST_ASSERT(vcpu_done_map, "Failed to allocate vcpu done bitmap");
-
-       for (i = 0; i < (unsigned long)test_args.nr_vcpus; i++) {
-               ret = pthread_create(&pt_vcpu_run[i], NULL, test_vcpu_run,
-                                    (void *)(unsigned long)i);
-               TEST_ASSERT(!ret, "Failed to create vCPU-%d pthread", i);
-       }
-
-       /* Spawn a thread to control the vCPU migrations */
-       if (test_args.migration_freq_ms) {
-               srand(time(NULL));
-
-               ret = pthread_create(&pt_vcpu_migration, NULL,
-                                       test_vcpu_migration, NULL);
-               TEST_ASSERT(!ret, "Failed to create the migration pthread");
-       }
-
-
-       for (i = 0; i < test_args.nr_vcpus; i++)
-               pthread_join(pt_vcpu_run[i], NULL);
-
-       if (test_args.migration_freq_ms)
-               pthread_join(pt_vcpu_migration, NULL);
-
-       bitmap_free(vcpu_done_map);
-}
-
 static void test_init_timer_irq(struct kvm_vm *vm)
 {
        /* Timer initid should be same for all the vCPUs, so query only vCPU-0 */
@@ -369,7 +183,7 @@ static void test_init_timer_irq(struct kvm_vm *vm)
 
 static int gic_fd;
 
-static struct kvm_vm *test_vm_create(void)
+struct kvm_vm *test_vm_create(void)
 {
        struct kvm_vm *vm;
        unsigned int i;
@@ -380,10 +194,14 @@ static struct kvm_vm *test_vm_create(void)
        vm_init_descriptor_tables(vm);
        vm_install_exception_handler(vm, VECTOR_IRQ_CURRENT, guest_irq_handler);
 
-       if (!test_args.offset.reserved) {
-               if (kvm_has_cap(KVM_CAP_COUNTER_OFFSET))
-                       vm_ioctl(vm, KVM_ARM_SET_COUNTER_OFFSET, &test_args.offset);
-               else
+       if (!test_args.reserved) {
+               if (kvm_has_cap(KVM_CAP_COUNTER_OFFSET)) {
+                       struct kvm_arm_counter_offset offset = {
+                               .counter_offset = test_args.counter_offset,
+                               .reserved = 0,
+                       };
+                       vm_ioctl(vm, KVM_ARM_SET_COUNTER_OFFSET, &offset);
+               } else
                        TEST_FAIL("no support for global offset");
        }
 
@@ -400,81 +218,8 @@ static struct kvm_vm *test_vm_create(void)
        return vm;
 }
 
-static void test_vm_cleanup(struct kvm_vm *vm)
+void test_vm_cleanup(struct kvm_vm *vm)
 {
        close(gic_fd);
        kvm_vm_free(vm);
 }
-
-static void test_print_help(char *name)
-{
-       pr_info("Usage: %s [-h] [-n nr_vcpus] [-i iterations] [-p timer_period_ms]\n",
-               name);
-       pr_info("\t-n: Number of vCPUs to configure (default: %u; max: %u)\n",
-               NR_VCPUS_DEF, KVM_MAX_VCPUS);
-       pr_info("\t-i: Number of iterations per stage (default: %u)\n",
-               NR_TEST_ITERS_DEF);
-       pr_info("\t-p: Periodicity (in ms) of the guest timer (default: %u)\n",
-               TIMER_TEST_PERIOD_MS_DEF);
-       pr_info("\t-m: Frequency (in ms) of vCPUs to migrate to different pCPU. 0 to turn off (default: %u)\n",
-               TIMER_TEST_MIGRATION_FREQ_MS);
-       pr_info("\t-o: Counter offset (in counter cycles, default: 0)\n");
-       pr_info("\t-h: print this help screen\n");
-}
-
-static bool parse_args(int argc, char *argv[])
-{
-       int opt;
-
-       while ((opt = getopt(argc, argv, "hn:i:p:m:o:")) != -1) {
-               switch (opt) {
-               case 'n':
-                       test_args.nr_vcpus = atoi_positive("Number of vCPUs", optarg);
-                       if (test_args.nr_vcpus > KVM_MAX_VCPUS) {
-                               pr_info("Max allowed vCPUs: %u\n",
-                                       KVM_MAX_VCPUS);
-                               goto err;
-                       }
-                       break;
-               case 'i':
-                       test_args.nr_iter = atoi_positive("Number of iterations", optarg);
-                       break;
-               case 'p':
-                       test_args.timer_period_ms = atoi_positive("Periodicity", optarg);
-                       break;
-               case 'm':
-                       test_args.migration_freq_ms = atoi_non_negative("Frequency", optarg);
-                       break;
-               case 'o':
-                       test_args.offset.counter_offset = strtol(optarg, NULL, 0);
-                       test_args.offset.reserved = 0;
-                       break;
-               case 'h':
-               default:
-                       goto err;
-               }
-       }
-
-       return true;
-
-err:
-       test_print_help(argv[0]);
-       return false;
-}
-
-int main(int argc, char *argv[])
-{
-       struct kvm_vm *vm;
-
-       if (!parse_args(argc, argv))
-               exit(KSFT_SKIP);
-
-       __TEST_REQUIRE(!test_args.migration_freq_ms || get_nprocs() >= 2,
-                      "At least two physical CPUs needed for vCPU migration");
-
-       vm = test_vm_create();
-       test_run(vm);
-       test_vm_cleanup(vm);
-
-       return 0;
-}
index 866002917441c6bc10995a6e631ca7cac2405325..2582c49e525adf64ddc0275eec65333bd4587a14 100644 (file)
@@ -365,7 +365,7 @@ static void guest_wp_handler(struct ex_regs *regs)
 
 static void guest_ss_handler(struct ex_regs *regs)
 {
-       __GUEST_ASSERT(ss_idx < 4, "Expected index < 4, got '%u'", ss_idx);
+       __GUEST_ASSERT(ss_idx < 4, "Expected index < 4, got '%lu'", ss_idx);
        ss_addr[ss_idx++] = regs->pc;
        regs->pstate |= SPSR_SS;
 }
index 27c10e7a7e0124f01e0e636dfb25944c0d7abc95..9d192ce0078d6989f34d32b7e7e41326507338c2 100644 (file)
@@ -105,12 +105,12 @@ static void guest_test_hvc(const struct test_hvc_info *hc_info)
                case TEST_STAGE_HVC_IFACE_FEAT_DISABLED:
                case TEST_STAGE_HVC_IFACE_FALSE_INFO:
                        __GUEST_ASSERT(res.a0 == SMCCC_RET_NOT_SUPPORTED,
-                                      "a0 = 0x%lx, func_id = 0x%x, arg1 = 0x%llx, stage = %u",
+                                      "a0 = 0x%lx, func_id = 0x%x, arg1 = 0x%lx, stage = %u",
                                        res.a0, hc_info->func_id, hc_info->arg1, stage);
                        break;
                case TEST_STAGE_HVC_IFACE_FEAT_ENABLED:
                        __GUEST_ASSERT(res.a0 != SMCCC_RET_NOT_SUPPORTED,
-                                      "a0 = 0x%lx, func_id = 0x%x, arg1 = 0x%llx, stage = %u",
+                                      "a0 = 0x%lx, func_id = 0x%x, arg1 = 0x%lx, stage = %u",
                                        res.a0, hc_info->func_id, hc_info->arg1, stage);
                        break;
                default:
index 53fddad57cbbc689231fd81cd1947d27f1c84807..5972905275cfacee914b618348a20a3cef080011 100644 (file)
@@ -292,7 +292,7 @@ static void guest_code(struct test_desc *test)
 
 static void no_dabt_handler(struct ex_regs *regs)
 {
-       GUEST_FAIL("Unexpected dabt, far_el1 = 0x%llx", read_sysreg(far_el1));
+       GUEST_FAIL("Unexpected dabt, far_el1 = 0x%lx", read_sysreg(far_el1));
 }
 
 static void no_iabt_handler(struct ex_regs *regs)
index bac05210b53927970eb91cbac3e985b8ba74c897..16e2338686c172c1e1ece67e405df3ab666f7ded 100644 (file)
@@ -32,6 +32,10 @@ struct reg_ftr_bits {
        enum ftr_type type;
        uint8_t shift;
        uint64_t mask;
+       /*
+        * For FTR_EXACT, safe_val is used as the exact safe value.
+        * For FTR_LOWER_SAFE, safe_val is used as the minimal safe value.
+        */
        int64_t safe_val;
 };
 
@@ -65,13 +69,13 @@ struct test_feature_reg {
 
 static const struct reg_ftr_bits ftr_id_aa64dfr0_el1[] = {
        S_REG_FTR_BITS(FTR_LOWER_SAFE, ID_AA64DFR0_EL1, PMUVer, 0),
-       REG_FTR_BITS(FTR_LOWER_SAFE, ID_AA64DFR0_EL1, DebugVer, 0),
+       REG_FTR_BITS(FTR_LOWER_SAFE, ID_AA64DFR0_EL1, DebugVer, ID_AA64DFR0_EL1_DebugVer_IMP),
        REG_FTR_END,
 };
 
 static const struct reg_ftr_bits ftr_id_dfr0_el1[] = {
-       S_REG_FTR_BITS(FTR_LOWER_SAFE, ID_DFR0_EL1, PerfMon, 0),
-       REG_FTR_BITS(FTR_LOWER_SAFE, ID_DFR0_EL1, CopDbg, 0),
+       S_REG_FTR_BITS(FTR_LOWER_SAFE, ID_DFR0_EL1, PerfMon, ID_DFR0_EL1_PerfMon_PMUv3),
+       REG_FTR_BITS(FTR_LOWER_SAFE, ID_DFR0_EL1, CopDbg, ID_DFR0_EL1_CopDbg_Armv8),
        REG_FTR_END,
 };
 
@@ -224,13 +228,13 @@ uint64_t get_safe_value(const struct reg_ftr_bits *ftr_bits, uint64_t ftr)
 {
        uint64_t ftr_max = GENMASK_ULL(ARM64_FEATURE_FIELD_BITS - 1, 0);
 
-       if (ftr_bits->type == FTR_UNSIGNED) {
+       if (ftr_bits->sign == FTR_UNSIGNED) {
                switch (ftr_bits->type) {
                case FTR_EXACT:
                        ftr = ftr_bits->safe_val;
                        break;
                case FTR_LOWER_SAFE:
-                       if (ftr > 0)
+                       if (ftr > ftr_bits->safe_val)
                                ftr--;
                        break;
                case FTR_HIGHER_SAFE:
@@ -252,7 +256,7 @@ uint64_t get_safe_value(const struct reg_ftr_bits *ftr_bits, uint64_t ftr)
                        ftr = ftr_bits->safe_val;
                        break;
                case FTR_LOWER_SAFE:
-                       if (ftr > 0)
+                       if (ftr > ftr_bits->safe_val)
                                ftr--;
                        break;
                case FTR_HIGHER_SAFE:
@@ -276,7 +280,7 @@ uint64_t get_invalid_value(const struct reg_ftr_bits *ftr_bits, uint64_t ftr)
 {
        uint64_t ftr_max = GENMASK_ULL(ARM64_FEATURE_FIELD_BITS - 1, 0);
 
-       if (ftr_bits->type == FTR_UNSIGNED) {
+       if (ftr_bits->sign == FTR_UNSIGNED) {
                switch (ftr_bits->type) {
                case FTR_EXACT:
                        ftr = max((uint64_t)ftr_bits->safe_val + 1, ftr + 1);
index 5f9713364693b4eacd175bad2b1531e6a91f5504..f2fb0e3f14bca8cb8dc18a8a9d328f0cb1299803 100644 (file)
@@ -93,22 +93,6 @@ static inline void write_sel_evtyper(int sel, unsigned long val)
        isb();
 }
 
-static inline void enable_counter(int idx)
-{
-       uint64_t v = read_sysreg(pmcntenset_el0);
-
-       write_sysreg(BIT(idx) | v, pmcntenset_el0);
-       isb();
-}
-
-static inline void disable_counter(int idx)
-{
-       uint64_t v = read_sysreg(pmcntenset_el0);
-
-       write_sysreg(BIT(idx) | v, pmcntenclr_el0);
-       isb();
-}
-
 static void pmu_disable_reset(void)
 {
        uint64_t pmcr = read_sysreg(pmcr_el0);
@@ -195,11 +179,11 @@ struct pmc_accessor pmc_accessors[] = {
                                                                                 \
        if (set_expected)                                                        \
                __GUEST_ASSERT((_tval & mask),                                   \
-                               "tval: 0x%lx; mask: 0x%lx; set_expected: 0x%lx", \
+                               "tval: 0x%lx; mask: 0x%lx; set_expected: %u",    \
                                _tval, mask, set_expected);                      \
        else                                                                     \
                __GUEST_ASSERT(!(_tval & mask),                                  \
-                               "tval: 0x%lx; mask: 0x%lx; set_expected: 0x%lx", \
+                               "tval: 0x%lx; mask: 0x%lx; set_expected: %u",    \
                                _tval, mask, set_expected);                      \
 }
 
@@ -286,7 +270,7 @@ static void test_access_pmc_regs(struct pmc_accessor *acc, int pmc_idx)
        acc->write_typer(pmc_idx, write_data);
        read_data = acc->read_typer(pmc_idx);
        __GUEST_ASSERT(read_data == write_data,
-                      "pmc_idx: 0x%lx; acc_idx: 0x%lx; read_data: 0x%lx; write_data: 0x%lx",
+                      "pmc_idx: 0x%x; acc_idx: 0x%lx; read_data: 0x%lx; write_data: 0x%lx",
                       pmc_idx, PMC_ACC_TO_IDX(acc), read_data, write_data);
 
        /*
@@ -297,14 +281,14 @@ static void test_access_pmc_regs(struct pmc_accessor *acc, int pmc_idx)
 
        /* The count value must be 0, as it is disabled and reset */
        __GUEST_ASSERT(read_data == 0,
-                      "pmc_idx: 0x%lx; acc_idx: 0x%lx; read_data: 0x%lx",
+                      "pmc_idx: 0x%x; acc_idx: 0x%lx; read_data: 0x%lx",
                       pmc_idx, PMC_ACC_TO_IDX(acc), read_data);
 
        write_data = read_data + pmc_idx + 0x12345;
        acc->write_cntr(pmc_idx, write_data);
        read_data = acc->read_cntr(pmc_idx);
        __GUEST_ASSERT(read_data == write_data,
-                      "pmc_idx: 0x%lx; acc_idx: 0x%lx; read_data: 0x%lx; write_data: 0x%lx",
+                      "pmc_idx: 0x%x; acc_idx: 0x%lx; read_data: 0x%lx; write_data: 0x%lx",
                       pmc_idx, PMC_ACC_TO_IDX(acc), read_data, write_data);
 }
 
@@ -379,7 +363,7 @@ static void guest_code(uint64_t expected_pmcr_n)
        int i, pmc;
 
        __GUEST_ASSERT(expected_pmcr_n <= ARMV8_PMU_MAX_GENERAL_COUNTERS,
-                       "Expected PMCR.N: 0x%lx; ARMv8 general counters: 0x%lx",
+                       "Expected PMCR.N: 0x%lx; ARMv8 general counters: 0x%x",
                        expected_pmcr_n, ARMV8_PMU_MAX_GENERAL_COUNTERS);
 
        pmcr = read_sysreg(pmcr_el0);
diff --git a/tools/testing/selftests/kvm/arch_timer.c b/tools/testing/selftests/kvm/arch_timer.c
new file mode 100644 (file)
index 0000000..ae1f1a6
--- /dev/null
@@ -0,0 +1,259 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * arch_timer.c - Tests the arch timer IRQ functionality
+ *
+ * The guest's main thread configures the timer interrupt and waits
+ * for it to fire, with a timeout equal to the timer period.
+ * It asserts that the timeout doesn't exceed the timer period plus
+ * a user configurable error margin(default to 100us)
+ *
+ * On the other hand, upon receipt of an interrupt, the guest's interrupt
+ * handler validates the interrupt by checking if the architectural state
+ * is in compliance with the specifications.
+ *
+ * The test provides command-line options to configure the timer's
+ * period (-p), number of vCPUs (-n), iterations per stage (-i) and timer
+ * interrupt arrival error margin (-e). To stress-test the timer stack
+ * even more, an option to migrate the vCPUs across pCPUs (-m), at a
+ * particular rate, is also provided.
+ *
+ * Copyright (c) 2021, Google LLC.
+ */
+
+#define _GNU_SOURCE
+
+#include <stdlib.h>
+#include <pthread.h>
+#include <linux/sizes.h>
+#include <linux/bitmap.h>
+#include <sys/sysinfo.h>
+
+#include "timer_test.h"
+
+struct test_args test_args = {
+       .nr_vcpus = NR_VCPUS_DEF,
+       .nr_iter = NR_TEST_ITERS_DEF,
+       .timer_period_ms = TIMER_TEST_PERIOD_MS_DEF,
+       .migration_freq_ms = TIMER_TEST_MIGRATION_FREQ_MS,
+       .timer_err_margin_us = TIMER_TEST_ERR_MARGIN_US,
+       .reserved = 1,
+};
+
+struct kvm_vcpu *vcpus[KVM_MAX_VCPUS];
+struct test_vcpu_shared_data vcpu_shared_data[KVM_MAX_VCPUS];
+
+static pthread_t pt_vcpu_run[KVM_MAX_VCPUS];
+static unsigned long *vcpu_done_map;
+static pthread_mutex_t vcpu_done_map_lock;
+
+static void *test_vcpu_run(void *arg)
+{
+       unsigned int vcpu_idx = (unsigned long)arg;
+       struct ucall uc;
+       struct kvm_vcpu *vcpu = vcpus[vcpu_idx];
+       struct kvm_vm *vm = vcpu->vm;
+       struct test_vcpu_shared_data *shared_data = &vcpu_shared_data[vcpu_idx];
+
+       vcpu_run(vcpu);
+
+       /* Currently, any exit from guest is an indication of completion */
+       pthread_mutex_lock(&vcpu_done_map_lock);
+       __set_bit(vcpu_idx, vcpu_done_map);
+       pthread_mutex_unlock(&vcpu_done_map_lock);
+
+       switch (get_ucall(vcpu, &uc)) {
+       case UCALL_SYNC:
+       case UCALL_DONE:
+               break;
+       case UCALL_ABORT:
+               sync_global_from_guest(vm, *shared_data);
+               fprintf(stderr, "Guest assert failed,  vcpu %u; stage; %u; iter: %u\n",
+                       vcpu_idx, shared_data->guest_stage, shared_data->nr_iter);
+               REPORT_GUEST_ASSERT(uc);
+               break;
+       default:
+               TEST_FAIL("Unexpected guest exit");
+       }
+
+       pr_info("PASS(vCPU-%d).\n", vcpu_idx);
+
+       return NULL;
+}
+
+static uint32_t test_get_pcpu(void)
+{
+       uint32_t pcpu;
+       unsigned int nproc_conf;
+       cpu_set_t online_cpuset;
+
+       nproc_conf = get_nprocs_conf();
+       sched_getaffinity(0, sizeof(cpu_set_t), &online_cpuset);
+
+       /* Randomly find an available pCPU to place a vCPU on */
+       do {
+               pcpu = rand() % nproc_conf;
+       } while (!CPU_ISSET(pcpu, &online_cpuset));
+
+       return pcpu;
+}
+
+static int test_migrate_vcpu(unsigned int vcpu_idx)
+{
+       int ret;
+       cpu_set_t cpuset;
+       uint32_t new_pcpu = test_get_pcpu();
+
+       CPU_ZERO(&cpuset);
+       CPU_SET(new_pcpu, &cpuset);
+
+       pr_debug("Migrating vCPU: %u to pCPU: %u\n", vcpu_idx, new_pcpu);
+
+       ret = pthread_setaffinity_np(pt_vcpu_run[vcpu_idx],
+                                    sizeof(cpuset), &cpuset);
+
+       /* Allow the error where the vCPU thread is already finished */
+       TEST_ASSERT(ret == 0 || ret == ESRCH,
+                   "Failed to migrate the vCPU:%u to pCPU: %u; ret: %d",
+                   vcpu_idx, new_pcpu, ret);
+
+       return ret;
+}
+
+static void *test_vcpu_migration(void *arg)
+{
+       unsigned int i, n_done;
+       bool vcpu_done;
+
+       do {
+               usleep(msecs_to_usecs(test_args.migration_freq_ms));
+
+               for (n_done = 0, i = 0; i < test_args.nr_vcpus; i++) {
+                       pthread_mutex_lock(&vcpu_done_map_lock);
+                       vcpu_done = test_bit(i, vcpu_done_map);
+                       pthread_mutex_unlock(&vcpu_done_map_lock);
+
+                       if (vcpu_done) {
+                               n_done++;
+                               continue;
+                       }
+
+                       test_migrate_vcpu(i);
+               }
+       } while (test_args.nr_vcpus != n_done);
+
+       return NULL;
+}
+
+static void test_run(struct kvm_vm *vm)
+{
+       pthread_t pt_vcpu_migration;
+       unsigned int i;
+       int ret;
+
+       pthread_mutex_init(&vcpu_done_map_lock, NULL);
+       vcpu_done_map = bitmap_zalloc(test_args.nr_vcpus);
+       TEST_ASSERT(vcpu_done_map, "Failed to allocate vcpu done bitmap");
+
+       for (i = 0; i < (unsigned long)test_args.nr_vcpus; i++) {
+               ret = pthread_create(&pt_vcpu_run[i], NULL, test_vcpu_run,
+                                    (void *)(unsigned long)i);
+               TEST_ASSERT(!ret, "Failed to create vCPU-%d pthread", i);
+       }
+
+       /* Spawn a thread to control the vCPU migrations */
+       if (test_args.migration_freq_ms) {
+               srand(time(NULL));
+
+               ret = pthread_create(&pt_vcpu_migration, NULL,
+                                       test_vcpu_migration, NULL);
+               TEST_ASSERT(!ret, "Failed to create the migration pthread");
+       }
+
+
+       for (i = 0; i < test_args.nr_vcpus; i++)
+               pthread_join(pt_vcpu_run[i], NULL);
+
+       if (test_args.migration_freq_ms)
+               pthread_join(pt_vcpu_migration, NULL);
+
+       bitmap_free(vcpu_done_map);
+}
+
+static void test_print_help(char *name)
+{
+       pr_info("Usage: %s [-h] [-n nr_vcpus] [-i iterations] [-p timer_period_ms]\n"
+               "\t\t    [-m migration_freq_ms] [-o counter_offset]\n"
+               "\t\t    [-e timer_err_margin_us]\n", name);
+       pr_info("\t-n: Number of vCPUs to configure (default: %u; max: %u)\n",
+               NR_VCPUS_DEF, KVM_MAX_VCPUS);
+       pr_info("\t-i: Number of iterations per stage (default: %u)\n",
+               NR_TEST_ITERS_DEF);
+       pr_info("\t-p: Periodicity (in ms) of the guest timer (default: %u)\n",
+               TIMER_TEST_PERIOD_MS_DEF);
+       pr_info("\t-m: Frequency (in ms) of vCPUs to migrate to different pCPU. 0 to turn off (default: %u)\n",
+               TIMER_TEST_MIGRATION_FREQ_MS);
+       pr_info("\t-o: Counter offset (in counter cycles, default: 0) [aarch64-only]\n");
+       pr_info("\t-e: Interrupt arrival error margin (in us) of the guest timer (default: %u)\n",
+               TIMER_TEST_ERR_MARGIN_US);
+       pr_info("\t-h: print this help screen\n");
+}
+
+static bool parse_args(int argc, char *argv[])
+{
+       int opt;
+
+       while ((opt = getopt(argc, argv, "hn:i:p:m:o:e:")) != -1) {
+               switch (opt) {
+               case 'n':
+                       test_args.nr_vcpus = atoi_positive("Number of vCPUs", optarg);
+                       if (test_args.nr_vcpus > KVM_MAX_VCPUS) {
+                               pr_info("Max allowed vCPUs: %u\n",
+                                       KVM_MAX_VCPUS);
+                               goto err;
+                       }
+                       break;
+               case 'i':
+                       test_args.nr_iter = atoi_positive("Number of iterations", optarg);
+                       break;
+               case 'p':
+                       test_args.timer_period_ms = atoi_positive("Periodicity", optarg);
+                       break;
+               case 'm':
+                       test_args.migration_freq_ms = atoi_non_negative("Frequency", optarg);
+                       break;
+               case 'e':
+                       test_args.timer_err_margin_us = atoi_non_negative("Error Margin", optarg);
+                       break;
+               case 'o':
+                       test_args.counter_offset = strtol(optarg, NULL, 0);
+                       test_args.reserved = 0;
+                       break;
+               case 'h':
+               default:
+                       goto err;
+               }
+       }
+
+       return true;
+
+err:
+       test_print_help(argv[0]);
+       return false;
+}
+
+int main(int argc, char *argv[])
+{
+       struct kvm_vm *vm;
+
+       if (!parse_args(argc, argv))
+               exit(KSFT_SKIP);
+
+       __TEST_REQUIRE(!test_args.migration_freq_ms || get_nprocs() >= 2,
+                      "At least two physical CPUs needed for vCPU migration");
+
+       vm = test_vm_create();
+       test_run(vm);
+       test_vm_cleanup(vm);
+
+       return 0;
+}
index c78a98c1a915ce9a95261206ee6b10cb258b2955..92eae206baa62ec410460fd18938b2777da58775 100644 (file)
@@ -167,6 +167,9 @@ static void test_create_guest_memfd_multiple(struct kvm_vm *vm)
        TEST_ASSERT(ret != -1, "memfd fstat should succeed");
        TEST_ASSERT(st1.st_size == 4096, "first memfd st_size should still match requested size");
        TEST_ASSERT(st1.st_ino != st2.st_ino, "different memfd should have different inode numbers");
+
+       close(fd2);
+       close(fd1);
 }
 
 int main(int argc, char *argv[])
diff --git a/tools/testing/selftests/kvm/include/aarch64/kvm_util_arch.h b/tools/testing/selftests/kvm/include/aarch64/kvm_util_arch.h
new file mode 100644 (file)
index 0000000..e43a57d
--- /dev/null
@@ -0,0 +1,7 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
+#ifndef SELFTEST_KVM_UTIL_ARCH_H
+#define SELFTEST_KVM_UTIL_ARCH_H
+
+struct kvm_vm_arch {};
+
+#endif  // SELFTEST_KVM_UTIL_ARCH_H
index cf20e44e86f2f9fb7feeccf88fdc93fecd7fbfd2..9e518b56282736caeaf549e1d504ce53f9fc64e3 100644 (file)
@@ -226,8 +226,4 @@ void smccc_smc(uint32_t function_id, uint64_t arg0, uint64_t arg1,
               uint64_t arg2, uint64_t arg3, uint64_t arg4, uint64_t arg5,
               uint64_t arg6, struct arm_smccc_res *res);
 
-
-
-uint32_t guest_get_vcpuid(void);
-
 #endif /* SELFTEST_KVM_PROCESSOR_H */
diff --git a/tools/testing/selftests/kvm/include/kvm_test_harness.h b/tools/testing/selftests/kvm/include/kvm_test_harness.h
new file mode 100644 (file)
index 0000000..8f7c685
--- /dev/null
@@ -0,0 +1,36 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
+/*
+ * Macros for defining a KVM test
+ *
+ * Copyright (C) 2022, Google LLC.
+ */
+
+#ifndef SELFTEST_KVM_TEST_HARNESS_H
+#define SELFTEST_KVM_TEST_HARNESS_H
+
+#include "kselftest_harness.h"
+
+#define KVM_ONE_VCPU_TEST_SUITE(name)                                  \
+       FIXTURE(name) {                                                 \
+               struct kvm_vcpu *vcpu;                                  \
+       };                                                              \
+                                                                       \
+       FIXTURE_SETUP(name) {                                           \
+               (void)vm_create_with_one_vcpu(&self->vcpu, NULL);       \
+       }                                                               \
+                                                                       \
+       FIXTURE_TEARDOWN(name) {                                        \
+               kvm_vm_free(self->vcpu->vm);                            \
+       }
+
+#define KVM_ONE_VCPU_TEST(suite, test, guestcode)                      \
+static void __suite##_##test(struct kvm_vcpu *vcpu);                   \
+                                                                       \
+TEST_F(suite, test)                                                    \
+{                                                                      \
+       vcpu_arch_set_entry_point(self->vcpu, guestcode);               \
+       __suite##_##test(self->vcpu);                                   \
+}                                                                      \
+static void __suite##_##test(struct kvm_vcpu *vcpu)
+
+#endif /* SELFTEST_KVM_TEST_HARNESS_H */
index 9e5afc472c14268bbe629cb9c1baf6049b702457..3e0db283a46ad18dcd9a51c47aa35fa04616e8e0 100644 (file)
 #include <linux/types.h>
 
 #include <asm/atomic.h>
+#include <asm/kvm.h>
 
 #include <sys/ioctl.h>
 
+#include "kvm_util_arch.h"
 #include "sparsebit.h"
 
 /*
@@ -46,6 +48,7 @@ typedef uint64_t vm_vaddr_t; /* Virtual Machine (Guest) virtual address */
 struct userspace_mem_region {
        struct kvm_userspace_memory_region2 region;
        struct sparsebit *unused_phy_pages;
+       struct sparsebit *protected_phy_pages;
        int fd;
        off_t offset;
        enum vm_mem_backing_src_type backing_src_type;
@@ -90,6 +93,7 @@ enum kvm_mem_region_type {
 struct kvm_vm {
        int mode;
        unsigned long type;
+       uint8_t subtype;
        int kvm_fd;
        int fd;
        unsigned int pgtable_levels;
@@ -111,6 +115,9 @@ struct kvm_vm {
        vm_vaddr_t idt;
        vm_vaddr_t handlers;
        uint32_t dirty_ring_size;
+       uint64_t gpa_tag_mask;
+
+       struct kvm_vm_arch arch;
 
        /* Cache of information for binary stats interface */
        int stats_fd;
@@ -191,10 +198,14 @@ enum vm_guest_mode {
 };
 
 struct vm_shape {
-       enum vm_guest_mode mode;
-       unsigned int type;
+       uint32_t type;
+       uint8_t  mode;
+       uint8_t  subtype;
+       uint16_t padding;
 };
 
+kvm_static_assert(sizeof(struct vm_shape) == sizeof(uint64_t));
+
 #define VM_TYPE_DEFAULT                        0
 
 #define VM_SHAPE(__mode)                       \
@@ -259,6 +270,10 @@ bool get_kvm_param_bool(const char *param);
 bool get_kvm_intel_param_bool(const char *param);
 bool get_kvm_amd_param_bool(const char *param);
 
+int get_kvm_param_integer(const char *param);
+int get_kvm_intel_param_integer(const char *param);
+int get_kvm_amd_param_integer(const char *param);
+
 unsigned int kvm_check_cap(long cap);
 
 static inline bool kvm_has_cap(long cap)
@@ -564,6 +579,13 @@ void vm_mem_add(struct kvm_vm *vm, enum vm_mem_backing_src_type src_type,
                uint64_t guest_paddr, uint32_t slot, uint64_t npages,
                uint32_t flags, int guest_memfd_fd, uint64_t guest_memfd_offset);
 
+#ifndef vm_arch_has_protected_memory
+static inline bool vm_arch_has_protected_memory(struct kvm_vm *vm)
+{
+       return false;
+}
+#endif
+
 void vm_mem_region_set_flags(struct kvm_vm *vm, uint32_t slot, uint32_t flags);
 void vm_mem_region_move(struct kvm_vm *vm, uint32_t slot, uint64_t new_gpa);
 void vm_mem_region_delete(struct kvm_vm *vm, uint32_t slot);
@@ -573,6 +595,9 @@ vm_vaddr_t vm_vaddr_unused_gap(struct kvm_vm *vm, size_t sz, vm_vaddr_t vaddr_mi
 vm_vaddr_t vm_vaddr_alloc(struct kvm_vm *vm, size_t sz, vm_vaddr_t vaddr_min);
 vm_vaddr_t __vm_vaddr_alloc(struct kvm_vm *vm, size_t sz, vm_vaddr_t vaddr_min,
                            enum kvm_mem_region_type type);
+vm_vaddr_t vm_vaddr_alloc_shared(struct kvm_vm *vm, size_t sz,
+                                vm_vaddr_t vaddr_min,
+                                enum kvm_mem_region_type type);
 vm_vaddr_t vm_vaddr_alloc_pages(struct kvm_vm *vm, int nr_pages);
 vm_vaddr_t __vm_vaddr_alloc_page(struct kvm_vm *vm,
                                 enum kvm_mem_region_type type);
@@ -585,6 +610,12 @@ void *addr_gva2hva(struct kvm_vm *vm, vm_vaddr_t gva);
 vm_paddr_t addr_hva2gpa(struct kvm_vm *vm, void *hva);
 void *addr_gpa2alias(struct kvm_vm *vm, vm_paddr_t gpa);
 
+
+static inline vm_paddr_t vm_untag_gpa(struct kvm_vm *vm, vm_paddr_t gpa)
+{
+       return gpa & ~vm->gpa_tag_mask;
+}
+
 void vcpu_run(struct kvm_vcpu *vcpu);
 int _vcpu_run(struct kvm_vcpu *vcpu);
 
@@ -827,10 +858,23 @@ const char *exit_reason_str(unsigned int exit_reason);
 
 vm_paddr_t vm_phy_page_alloc(struct kvm_vm *vm, vm_paddr_t paddr_min,
                             uint32_t memslot);
-vm_paddr_t vm_phy_pages_alloc(struct kvm_vm *vm, size_t num,
-                             vm_paddr_t paddr_min, uint32_t memslot);
+vm_paddr_t __vm_phy_pages_alloc(struct kvm_vm *vm, size_t num,
+                               vm_paddr_t paddr_min, uint32_t memslot,
+                               bool protected);
 vm_paddr_t vm_alloc_page_table(struct kvm_vm *vm);
 
+static inline vm_paddr_t vm_phy_pages_alloc(struct kvm_vm *vm, size_t num,
+                                           vm_paddr_t paddr_min, uint32_t memslot)
+{
+       /*
+        * By default, allocate memory as protected for VMs that support
+        * protected memory, as the majority of memory for such VMs is
+        * protected, i.e. using shared memory is effectively opt-in.
+        */
+       return __vm_phy_pages_alloc(vm, num, paddr_min, memslot,
+                                   vm_arch_has_protected_memory(vm));
+}
+
 /*
  * ____vm_create() does KVM_CREATE_VM and little else.  __vm_create() also
  * loads the test binary into guest memory and creates an IRQ chip (x86 only).
@@ -969,15 +1013,18 @@ static inline void vcpu_dump(FILE *stream, struct kvm_vcpu *vcpu,
  * Input Args:
  *   vm - Virtual Machine
  *   vcpu_id - The id of the VCPU to add to the VM.
- *   guest_code - The vCPU's entry point
  */
-struct kvm_vcpu *vm_arch_vcpu_add(struct kvm_vm *vm, uint32_t vcpu_id,
-                                 void *guest_code);
+struct kvm_vcpu *vm_arch_vcpu_add(struct kvm_vm *vm, uint32_t vcpu_id);
+void vcpu_arch_set_entry_point(struct kvm_vcpu *vcpu, void *guest_code);
 
 static inline struct kvm_vcpu *vm_vcpu_add(struct kvm_vm *vm, uint32_t vcpu_id,
                                           void *guest_code)
 {
-       return vm_arch_vcpu_add(vm, vcpu_id, guest_code);
+       struct kvm_vcpu *vcpu = vm_arch_vcpu_add(vm, vcpu_id);
+
+       vcpu_arch_set_entry_point(vcpu, guest_code);
+
+       return vcpu;
 }
 
 /* Re-create a vCPU after restarting a VM, e.g. for state save/restore tests. */
@@ -1081,4 +1128,8 @@ void kvm_selftest_arch_init(void);
 
 void kvm_arch_vm_post_create(struct kvm_vm *vm);
 
+bool vm_is_gpa_protected(struct kvm_vm *vm, vm_paddr_t paddr);
+
+uint32_t guest_get_vcpuid(void);
+
 #endif /* SELFTEST_KVM_UTIL_BASE_H */
diff --git a/tools/testing/selftests/kvm/include/riscv/arch_timer.h b/tools/testing/selftests/kvm/include/riscv/arch_timer.h
new file mode 100644 (file)
index 0000000..225d81d
--- /dev/null
@@ -0,0 +1,71 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * RISC-V Arch Timer(sstc) specific interface
+ *
+ * Copyright (c) 2024 Intel Corporation
+ */
+
+#ifndef SELFTEST_KVM_ARCH_TIMER_H
+#define SELFTEST_KVM_ARCH_TIMER_H
+
+#include <asm/csr.h>
+#include <asm/vdso/processor.h>
+
+static unsigned long timer_freq;
+
+#define msec_to_cycles(msec)   \
+       ((timer_freq) * (uint64_t)(msec) / 1000)
+
+#define usec_to_cycles(usec)   \
+       ((timer_freq) * (uint64_t)(usec) / 1000000)
+
+#define cycles_to_usec(cycles) \
+       ((uint64_t)(cycles) * 1000000 / (timer_freq))
+
+static inline uint64_t timer_get_cycles(void)
+{
+       return csr_read(CSR_TIME);
+}
+
+static inline void timer_set_cmp(uint64_t cval)
+{
+       csr_write(CSR_STIMECMP, cval);
+}
+
+static inline uint64_t timer_get_cmp(void)
+{
+       return csr_read(CSR_STIMECMP);
+}
+
+static inline void timer_irq_enable(void)
+{
+       csr_set(CSR_SIE, IE_TIE);
+}
+
+static inline void timer_irq_disable(void)
+{
+       csr_clear(CSR_SIE, IE_TIE);
+}
+
+static inline void timer_set_next_cmp_ms(uint32_t msec)
+{
+       uint64_t now_ct = timer_get_cycles();
+       uint64_t next_ct = now_ct + msec_to_cycles(msec);
+
+       timer_set_cmp(next_ct);
+}
+
+static inline void __delay(uint64_t cycles)
+{
+       uint64_t start = timer_get_cycles();
+
+       while ((timer_get_cycles() - start) < cycles)
+               cpu_relax();
+}
+
+static inline void udelay(unsigned long usec)
+{
+       __delay(usec_to_cycles(usec));
+}
+
+#endif /* SELFTEST_KVM_ARCH_TIMER_H */
diff --git a/tools/testing/selftests/kvm/include/riscv/kvm_util_arch.h b/tools/testing/selftests/kvm/include/riscv/kvm_util_arch.h
new file mode 100644 (file)
index 0000000..e43a57d
--- /dev/null
@@ -0,0 +1,7 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
+#ifndef SELFTEST_KVM_UTIL_ARCH_H
+#define SELFTEST_KVM_UTIL_ARCH_H
+
+struct kvm_vm_arch {};
+
+#endif  // SELFTEST_KVM_UTIL_ARCH_H
index a0f9efe5a2a8de6afda4d4531f1ca6bda22f4b9c..ce473fe251dde487a1775538a85c2b4166704012 100644 (file)
@@ -7,8 +7,9 @@
 #ifndef SELFTEST_KVM_PROCESSOR_H
 #define SELFTEST_KVM_PROCESSOR_H
 
-#include "kvm_util.h"
 #include <linux/stringify.h>
+#include <asm/csr.h>
+#include "kvm_util.h"
 
 static inline uint64_t __kvm_reg_id(uint64_t type, uint64_t subtype,
                                    uint64_t idx, uint64_t size)
@@ -47,6 +48,58 @@ static inline uint64_t __kvm_reg_id(uint64_t type, uint64_t subtype,
                                                     KVM_REG_RISCV_SBI_SINGLE,          \
                                                     idx, KVM_REG_SIZE_ULONG)
 
+bool __vcpu_has_ext(struct kvm_vcpu *vcpu, uint64_t ext);
+
+struct ex_regs {
+       unsigned long ra;
+       unsigned long sp;
+       unsigned long gp;
+       unsigned long tp;
+       unsigned long t0;
+       unsigned long t1;
+       unsigned long t2;
+       unsigned long s0;
+       unsigned long s1;
+       unsigned long a0;
+       unsigned long a1;
+       unsigned long a2;
+       unsigned long a3;
+       unsigned long a4;
+       unsigned long a5;
+       unsigned long a6;
+       unsigned long a7;
+       unsigned long s2;
+       unsigned long s3;
+       unsigned long s4;
+       unsigned long s5;
+       unsigned long s6;
+       unsigned long s7;
+       unsigned long s8;
+       unsigned long s9;
+       unsigned long s10;
+       unsigned long s11;
+       unsigned long t3;
+       unsigned long t4;
+       unsigned long t5;
+       unsigned long t6;
+       unsigned long epc;
+       unsigned long status;
+       unsigned long cause;
+};
+
+#define NR_VECTORS  2
+#define NR_EXCEPTIONS  32
+#define EC_MASK  (NR_EXCEPTIONS - 1)
+
+typedef void(*exception_handler_fn)(struct ex_regs *);
+
+void vm_init_vector_tables(struct kvm_vm *vm);
+void vcpu_init_vector_tables(struct kvm_vcpu *vcpu);
+
+void vm_install_exception_handler(struct kvm_vm *vm, int vector, exception_handler_fn handler);
+
+void vm_install_interrupt_handler(struct kvm_vm *vm, exception_handler_fn handler);
+
 /* L3 index Bit[47:39] */
 #define PGTBL_L3_INDEX_MASK                    0x0000FF8000000000ULL
 #define PGTBL_L3_INDEX_SHIFT                   39
@@ -101,13 +154,6 @@ static inline uint64_t __kvm_reg_id(uint64_t type, uint64_t subtype,
 #define PGTBL_PAGE_SIZE                                PGTBL_L0_BLOCK_SIZE
 #define PGTBL_PAGE_SIZE_SHIFT                  PGTBL_L0_BLOCK_SHIFT
 
-#define SATP_PPN                               _AC(0x00000FFFFFFFFFFF, UL)
-#define SATP_MODE_39                           _AC(0x8000000000000000, UL)
-#define SATP_MODE_48                           _AC(0x9000000000000000, UL)
-#define SATP_ASID_BITS                         16
-#define SATP_ASID_SHIFT                                44
-#define SATP_ASID_MASK                         _AC(0xFFFF, UL)
-
 /* SBI return error codes */
 #define SBI_SUCCESS                            0
 #define SBI_ERR_FAILURE                                -1
@@ -147,4 +193,14 @@ struct sbiret sbi_ecall(int ext, int fid, unsigned long arg0,
 
 bool guest_sbi_probe_extension(int extid, long *out_val);
 
+static inline void local_irq_enable(void)
+{
+       csr_set(CSR_SSTATUS, SR_SIE);
+}
+
+static inline void local_irq_disable(void)
+{
+       csr_clear(CSR_SSTATUS, SR_SIE);
+}
+
 #endif /* SELFTEST_KVM_PROCESSOR_H */
diff --git a/tools/testing/selftests/kvm/include/s390x/kvm_util_arch.h b/tools/testing/selftests/kvm/include/s390x/kvm_util_arch.h
new file mode 100644 (file)
index 0000000..e43a57d
--- /dev/null
@@ -0,0 +1,7 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
+#ifndef SELFTEST_KVM_UTIL_ARCH_H
+#define SELFTEST_KVM_UTIL_ARCH_H
+
+struct kvm_vm_arch {};
+
+#endif  // SELFTEST_KVM_UTIL_ARCH_H
index 12a9a4b9ceadc20d6421b9eafce2dc02e1bb002a..bc760761e1a320fff163a83e620874ed6be2daed 100644 (file)
@@ -30,26 +30,26 @@ typedef uint64_t sparsebit_num_t;
 
 struct sparsebit *sparsebit_alloc(void);
 void sparsebit_free(struct sparsebit **sbitp);
-void sparsebit_copy(struct sparsebit *dstp, struct sparsebit *src);
+void sparsebit_copy(struct sparsebit *dstp, const struct sparsebit *src);
 
-bool sparsebit_is_set(struct sparsebit *sbit, sparsebit_idx_t idx);
-bool sparsebit_is_set_num(struct sparsebit *sbit,
+bool sparsebit_is_set(const struct sparsebit *sbit, sparsebit_idx_t idx);
+bool sparsebit_is_set_num(const struct sparsebit *sbit,
                          sparsebit_idx_t idx, sparsebit_num_t num);
-bool sparsebit_is_clear(struct sparsebit *sbit, sparsebit_idx_t idx);
-bool sparsebit_is_clear_num(struct sparsebit *sbit,
+bool sparsebit_is_clear(const struct sparsebit *sbit, sparsebit_idx_t idx);
+bool sparsebit_is_clear_num(const struct sparsebit *sbit,
                            sparsebit_idx_t idx, sparsebit_num_t num);
-sparsebit_num_t sparsebit_num_set(struct sparsebit *sbit);
-bool sparsebit_any_set(struct sparsebit *sbit);
-bool sparsebit_any_clear(struct sparsebit *sbit);
-bool sparsebit_all_set(struct sparsebit *sbit);
-bool sparsebit_all_clear(struct sparsebit *sbit);
-sparsebit_idx_t sparsebit_first_set(struct sparsebit *sbit);
-sparsebit_idx_t sparsebit_first_clear(struct sparsebit *sbit);
-sparsebit_idx_t sparsebit_next_set(struct sparsebit *sbit, sparsebit_idx_t prev);
-sparsebit_idx_t sparsebit_next_clear(struct sparsebit *sbit, sparsebit_idx_t prev);
-sparsebit_idx_t sparsebit_next_set_num(struct sparsebit *sbit,
+sparsebit_num_t sparsebit_num_set(const struct sparsebit *sbit);
+bool sparsebit_any_set(const struct sparsebit *sbit);
+bool sparsebit_any_clear(const struct sparsebit *sbit);
+bool sparsebit_all_set(const struct sparsebit *sbit);
+bool sparsebit_all_clear(const struct sparsebit *sbit);
+sparsebit_idx_t sparsebit_first_set(const struct sparsebit *sbit);
+sparsebit_idx_t sparsebit_first_clear(const struct sparsebit *sbit);
+sparsebit_idx_t sparsebit_next_set(const struct sparsebit *sbit, sparsebit_idx_t prev);
+sparsebit_idx_t sparsebit_next_clear(const struct sparsebit *sbit, sparsebit_idx_t prev);
+sparsebit_idx_t sparsebit_next_set_num(const struct sparsebit *sbit,
                                       sparsebit_idx_t start, sparsebit_num_t num);
-sparsebit_idx_t sparsebit_next_clear_num(struct sparsebit *sbit,
+sparsebit_idx_t sparsebit_next_clear_num(const struct sparsebit *sbit,
                                         sparsebit_idx_t start, sparsebit_num_t num);
 
 void sparsebit_set(struct sparsebit *sbitp, sparsebit_idx_t idx);
@@ -62,9 +62,29 @@ void sparsebit_clear_num(struct sparsebit *sbitp,
                         sparsebit_idx_t start, sparsebit_num_t num);
 void sparsebit_clear_all(struct sparsebit *sbitp);
 
-void sparsebit_dump(FILE *stream, struct sparsebit *sbit,
+void sparsebit_dump(FILE *stream, const struct sparsebit *sbit,
                    unsigned int indent);
-void sparsebit_validate_internal(struct sparsebit *sbit);
+void sparsebit_validate_internal(const struct sparsebit *sbit);
+
+/*
+ * Iterate over an inclusive ranges within sparsebit @s. In each iteration,
+ * @range_begin and @range_end will take the beginning and end of the set
+ * range, which are of type sparsebit_idx_t.
+ *
+ * For example, if the range [3, 7] (inclusive) is set, within the
+ * iteration,@range_begin will take the value 3 and @range_end will take
+ * the value 7.
+ *
+ * Ensure that there is at least one bit set before using this macro with
+ * sparsebit_any_set(), because sparsebit_first_set() will abort if none
+ * are set.
+ */
+#define sparsebit_for_each_set_range(s, range_begin, range_end)         \
+       for (range_begin = sparsebit_first_set(s),                      \
+            range_end = sparsebit_next_clear(s, range_begin) - 1;      \
+            range_begin && range_end;                                  \
+            range_begin = sparsebit_next_set(s, range_end),            \
+            range_end = sparsebit_next_clear(s, range_begin) - 1)
 
 #ifdef __cplusplus
 }
index 50a5e31ba8da1bc904d7cb04be2571bf786b847e..8a6e30612c86269575bdf7fee6449a52098d374f 100644 (file)
@@ -20,6 +20,8 @@
 #include <sys/mman.h>
 #include "kselftest.h"
 
+#define msecs_to_usecs(msec)    ((msec) * 1000ULL)
+
 static inline int _no_printf(const char *format, ...) { return 0; }
 
 #ifdef DEBUG
diff --git a/tools/testing/selftests/kvm/include/timer_test.h b/tools/testing/selftests/kvm/include/timer_test.h
new file mode 100644 (file)
index 0000000..9b6edaa
--- /dev/null
@@ -0,0 +1,45 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
+/*
+ * timer test specific header
+ *
+ * Copyright (C) 2018, Google LLC
+ */
+
+#ifndef SELFTEST_KVM_TIMER_TEST_H
+#define SELFTEST_KVM_TIMER_TEST_H
+
+#include "kvm_util.h"
+
+#define NR_VCPUS_DEF            4
+#define NR_TEST_ITERS_DEF       5
+#define TIMER_TEST_PERIOD_MS_DEF    10
+#define TIMER_TEST_ERR_MARGIN_US    100
+#define TIMER_TEST_MIGRATION_FREQ_MS    2
+
+/* Timer test cmdline parameters */
+struct test_args {
+       uint32_t nr_vcpus;
+       uint32_t nr_iter;
+       uint32_t timer_period_ms;
+       uint32_t migration_freq_ms;
+       uint32_t timer_err_margin_us;
+       /* Members of struct kvm_arm_counter_offset */
+       uint64_t counter_offset;
+       uint64_t reserved;
+};
+
+/* Shared variables between host and guest */
+struct test_vcpu_shared_data {
+       uint32_t nr_iter;
+       int guest_stage;
+       uint64_t xcnt;
+};
+
+extern struct test_args test_args;
+extern struct kvm_vcpu *vcpus[];
+extern struct test_vcpu_shared_data vcpu_shared_data[];
+
+struct kvm_vm *test_vm_create(void);
+void test_vm_cleanup(struct kvm_vm *vm);
+
+#endif /* SELFTEST_KVM_TIMER_TEST_H */
diff --git a/tools/testing/selftests/kvm/include/x86_64/kvm_util_arch.h b/tools/testing/selftests/kvm/include/x86_64/kvm_util_arch.h
new file mode 100644 (file)
index 0000000..9f17251
--- /dev/null
@@ -0,0 +1,23 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
+#ifndef SELFTEST_KVM_UTIL_ARCH_H
+#define SELFTEST_KVM_UTIL_ARCH_H
+
+#include <stdbool.h>
+#include <stdint.h>
+
+struct kvm_vm_arch {
+       uint64_t c_bit;
+       uint64_t s_bit;
+       int sev_fd;
+       bool is_pt_protected;
+};
+
+static inline bool __vm_arch_has_protected_memory(struct kvm_vm_arch *arch)
+{
+       return arch->c_bit || arch->s_bit;
+}
+
+#define vm_arch_has_protected_memory(vm) \
+       __vm_arch_has_protected_memory(&(vm)->arch)
+
+#endif  // SELFTEST_KVM_UTIL_ARCH_H
diff --git a/tools/testing/selftests/kvm/include/x86_64/pmu.h b/tools/testing/selftests/kvm/include/x86_64/pmu.h
new file mode 100644 (file)
index 0000000..3c10c4d
--- /dev/null
@@ -0,0 +1,97 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
+/*
+ * Copyright (C) 2023, Tencent, Inc.
+ */
+#ifndef SELFTEST_KVM_PMU_H
+#define SELFTEST_KVM_PMU_H
+
+#include <stdint.h>
+
+#define KVM_PMU_EVENT_FILTER_MAX_EVENTS                        300
+
+/*
+ * Encode an eventsel+umask pair into event-select MSR format.  Note, this is
+ * technically AMD's format, as Intel's format only supports 8 bits for the
+ * event selector, i.e. doesn't use bits 24:16 for the selector.  But, OR-ing
+ * in '0' is a nop and won't clobber the CMASK.
+ */
+#define RAW_EVENT(eventsel, umask) (((eventsel & 0xf00UL) << 24) |     \
+                                   ((eventsel) & 0xff) |               \
+                                   ((umask) & 0xff) << 8)
+
+/*
+ * These are technically Intel's definitions, but except for CMASK (see above),
+ * AMD's layout is compatible with Intel's.
+ */
+#define ARCH_PERFMON_EVENTSEL_EVENT            GENMASK_ULL(7, 0)
+#define ARCH_PERFMON_EVENTSEL_UMASK            GENMASK_ULL(15, 8)
+#define ARCH_PERFMON_EVENTSEL_USR              BIT_ULL(16)
+#define ARCH_PERFMON_EVENTSEL_OS               BIT_ULL(17)
+#define ARCH_PERFMON_EVENTSEL_EDGE             BIT_ULL(18)
+#define ARCH_PERFMON_EVENTSEL_PIN_CONTROL      BIT_ULL(19)
+#define ARCH_PERFMON_EVENTSEL_INT              BIT_ULL(20)
+#define ARCH_PERFMON_EVENTSEL_ANY              BIT_ULL(21)
+#define ARCH_PERFMON_EVENTSEL_ENABLE           BIT_ULL(22)
+#define ARCH_PERFMON_EVENTSEL_INV              BIT_ULL(23)
+#define ARCH_PERFMON_EVENTSEL_CMASK            GENMASK_ULL(31, 24)
+
+/* RDPMC control flags, Intel only. */
+#define INTEL_RDPMC_METRICS                    BIT_ULL(29)
+#define INTEL_RDPMC_FIXED                      BIT_ULL(30)
+#define INTEL_RDPMC_FAST                       BIT_ULL(31)
+
+/* Fixed PMC controls, Intel only. */
+#define FIXED_PMC_GLOBAL_CTRL_ENABLE(_idx)     BIT_ULL((32 + (_idx)))
+
+#define FIXED_PMC_KERNEL                       BIT_ULL(0)
+#define FIXED_PMC_USER                         BIT_ULL(1)
+#define FIXED_PMC_ANYTHREAD                    BIT_ULL(2)
+#define FIXED_PMC_ENABLE_PMI                   BIT_ULL(3)
+#define FIXED_PMC_NR_BITS                      4
+#define FIXED_PMC_CTRL(_idx, _val)             ((_val) << ((_idx) * FIXED_PMC_NR_BITS))
+
+#define PMU_CAP_FW_WRITES                      BIT_ULL(13)
+#define PMU_CAP_LBR_FMT                                0x3f
+
+#define        INTEL_ARCH_CPU_CYCLES                   RAW_EVENT(0x3c, 0x00)
+#define        INTEL_ARCH_INSTRUCTIONS_RETIRED         RAW_EVENT(0xc0, 0x00)
+#define        INTEL_ARCH_REFERENCE_CYCLES             RAW_EVENT(0x3c, 0x01)
+#define        INTEL_ARCH_LLC_REFERENCES               RAW_EVENT(0x2e, 0x4f)
+#define        INTEL_ARCH_LLC_MISSES                   RAW_EVENT(0x2e, 0x41)
+#define        INTEL_ARCH_BRANCHES_RETIRED             RAW_EVENT(0xc4, 0x00)
+#define        INTEL_ARCH_BRANCHES_MISPREDICTED        RAW_EVENT(0xc5, 0x00)
+#define        INTEL_ARCH_TOPDOWN_SLOTS                RAW_EVENT(0xa4, 0x01)
+
+#define        AMD_ZEN_CORE_CYCLES                     RAW_EVENT(0x76, 0x00)
+#define        AMD_ZEN_INSTRUCTIONS_RETIRED            RAW_EVENT(0xc0, 0x00)
+#define        AMD_ZEN_BRANCHES_RETIRED                RAW_EVENT(0xc2, 0x00)
+#define        AMD_ZEN_BRANCHES_MISPREDICTED           RAW_EVENT(0xc3, 0x00)
+
+/*
+ * Note!  The order and thus the index of the architectural events matters as
+ * support for each event is enumerated via CPUID using the index of the event.
+ */
+enum intel_pmu_architectural_events {
+       INTEL_ARCH_CPU_CYCLES_INDEX,
+       INTEL_ARCH_INSTRUCTIONS_RETIRED_INDEX,
+       INTEL_ARCH_REFERENCE_CYCLES_INDEX,
+       INTEL_ARCH_LLC_REFERENCES_INDEX,
+       INTEL_ARCH_LLC_MISSES_INDEX,
+       INTEL_ARCH_BRANCHES_RETIRED_INDEX,
+       INTEL_ARCH_BRANCHES_MISPREDICTED_INDEX,
+       INTEL_ARCH_TOPDOWN_SLOTS_INDEX,
+       NR_INTEL_ARCH_EVENTS,
+};
+
+enum amd_pmu_zen_events {
+       AMD_ZEN_CORE_CYCLES_INDEX,
+       AMD_ZEN_INSTRUCTIONS_INDEX,
+       AMD_ZEN_BRANCHES_INDEX,
+       AMD_ZEN_BRANCH_MISSES_INDEX,
+       NR_AMD_ZEN_EVENTS,
+};
+
+extern const uint64_t intel_pmu_arch_events[];
+extern const uint64_t amd_pmu_zen_events[];
+
+#endif /* SELFTEST_KVM_PMU_H */
index 5bca8c947c8253819dd07ed6266a03194a0c1857..3bd03b088dda605348c7f85fc8d190ef63cf9e5e 100644 (file)
 extern bool host_cpu_is_intel;
 extern bool host_cpu_is_amd;
 
+enum vm_guest_x86_subtype {
+       VM_SUBTYPE_NONE = 0,
+       VM_SUBTYPE_SEV,
+       VM_SUBTYPE_SEV_ES,
+};
+
+/* Forced emulation prefix, used to invoke the emulator unconditionally. */
+#define KVM_FEP "ud2; .byte 'k', 'v', 'm';"
+
 #define NMI_VECTOR             0x02
 
 #define X86_EFLAGS_FIXED        (1u << 1)
@@ -273,6 +282,7 @@ struct kvm_x86_cpu_property {
 #define X86_PROPERTY_MAX_EXT_LEAF              KVM_X86_CPU_PROPERTY(0x80000000, 0, EAX, 0, 31)
 #define X86_PROPERTY_MAX_PHY_ADDR              KVM_X86_CPU_PROPERTY(0x80000008, 0, EAX, 0, 7)
 #define X86_PROPERTY_MAX_VIRT_ADDR             KVM_X86_CPU_PROPERTY(0x80000008, 0, EAX, 8, 15)
+#define X86_PROPERTY_SEV_C_BIT                 KVM_X86_CPU_PROPERTY(0x8000001F, 0, EBX, 0, 5)
 #define X86_PROPERTY_PHYS_ADDR_REDUCTION       KVM_X86_CPU_PROPERTY(0x8000001F, 0, EBX, 6, 11)
 
 #define X86_PROPERTY_MAX_CENTAUR_LEAF          KVM_X86_CPU_PROPERTY(0xC0000000, 0, EAX, 0, 31)
@@ -282,24 +292,41 @@ struct kvm_x86_cpu_property {
  * that indicates the feature is _not_ supported, and a property that states
  * the length of the bit mask of unsupported features.  A feature is supported
  * if the size of the bit mask is larger than the "unavailable" bit, and said
- * bit is not set.
+ * bit is not set.  Fixed counters also bizarre enumeration, but inverted from
+ * arch events for general purpose counters.  Fixed counters are supported if a
+ * feature flag is set **OR** the total number of fixed counters is greater
+ * than index of the counter.
  *
- * Wrap the "unavailable" feature to simplify checking whether or not a given
- * architectural event is supported.
+ * Wrap the events for general purpose and fixed counters to simplify checking
+ * whether or not a given architectural event is supported.
  */
 struct kvm_x86_pmu_feature {
-       struct kvm_x86_cpu_feature anti_feature;
+       struct kvm_x86_cpu_feature f;
 };
-#define        KVM_X86_PMU_FEATURE(name, __bit)                                        \
-({                                                                             \
-       struct kvm_x86_pmu_feature feature = {                                  \
-               .anti_feature = KVM_X86_CPU_FEATURE(0xa, 0, EBX, __bit),        \
-       };                                                                      \
-                                                                               \
-       feature;                                                                \
+#define        KVM_X86_PMU_FEATURE(__reg, __bit)                               \
+({                                                                     \
+       struct kvm_x86_pmu_feature feature = {                          \
+               .f = KVM_X86_CPU_FEATURE(0xa, 0, __reg, __bit),         \
+       };                                                              \
+                                                                       \
+       kvm_static_assert(KVM_CPUID_##__reg == KVM_CPUID_EBX ||         \
+                         KVM_CPUID_##__reg == KVM_CPUID_ECX);          \
+       feature;                                                        \
 })
 
-#define X86_PMU_FEATURE_BRANCH_INSNS_RETIRED   KVM_X86_PMU_FEATURE(BRANCH_INSNS_RETIRED, 5)
+#define X86_PMU_FEATURE_CPU_CYCLES                     KVM_X86_PMU_FEATURE(EBX, 0)
+#define X86_PMU_FEATURE_INSNS_RETIRED                  KVM_X86_PMU_FEATURE(EBX, 1)
+#define X86_PMU_FEATURE_REFERENCE_CYCLES               KVM_X86_PMU_FEATURE(EBX, 2)
+#define X86_PMU_FEATURE_LLC_REFERENCES                 KVM_X86_PMU_FEATURE(EBX, 3)
+#define X86_PMU_FEATURE_LLC_MISSES                     KVM_X86_PMU_FEATURE(EBX, 4)
+#define X86_PMU_FEATURE_BRANCH_INSNS_RETIRED           KVM_X86_PMU_FEATURE(EBX, 5)
+#define X86_PMU_FEATURE_BRANCHES_MISPREDICTED          KVM_X86_PMU_FEATURE(EBX, 6)
+#define X86_PMU_FEATURE_TOPDOWN_SLOTS                  KVM_X86_PMU_FEATURE(EBX, 7)
+
+#define X86_PMU_FEATURE_INSNS_RETIRED_FIXED            KVM_X86_PMU_FEATURE(ECX, 0)
+#define X86_PMU_FEATURE_CPU_CYCLES_FIXED               KVM_X86_PMU_FEATURE(ECX, 1)
+#define X86_PMU_FEATURE_REFERENCE_TSC_CYCLES_FIXED     KVM_X86_PMU_FEATURE(ECX, 2)
+#define X86_PMU_FEATURE_TOPDOWN_SLOTS_FIXED            KVM_X86_PMU_FEATURE(ECX, 3)
 
 static inline unsigned int x86_family(unsigned int eax)
 {
@@ -698,10 +725,16 @@ static __always_inline bool this_cpu_has_p(struct kvm_x86_cpu_property property)
 
 static inline bool this_pmu_has(struct kvm_x86_pmu_feature feature)
 {
-       uint32_t nr_bits = this_cpu_property(X86_PROPERTY_PMU_EBX_BIT_VECTOR_LENGTH);
+       uint32_t nr_bits;
 
-       return nr_bits > feature.anti_feature.bit &&
-              !this_cpu_has(feature.anti_feature);
+       if (feature.f.reg == KVM_CPUID_EBX) {
+               nr_bits = this_cpu_property(X86_PROPERTY_PMU_EBX_BIT_VECTOR_LENGTH);
+               return nr_bits > feature.f.bit && !this_cpu_has(feature.f);
+       }
+
+       GUEST_ASSERT(feature.f.reg == KVM_CPUID_ECX);
+       nr_bits = this_cpu_property(X86_PROPERTY_PMU_NR_FIXED_COUNTERS);
+       return nr_bits > feature.f.bit || this_cpu_has(feature.f);
 }
 
 static __always_inline uint64_t this_cpu_supported_xcr0(void)
@@ -917,10 +950,16 @@ static __always_inline bool kvm_cpu_has_p(struct kvm_x86_cpu_property property)
 
 static inline bool kvm_pmu_has(struct kvm_x86_pmu_feature feature)
 {
-       uint32_t nr_bits = kvm_cpu_property(X86_PROPERTY_PMU_EBX_BIT_VECTOR_LENGTH);
+       uint32_t nr_bits;
 
-       return nr_bits > feature.anti_feature.bit &&
-              !kvm_cpu_has(feature.anti_feature);
+       if (feature.f.reg == KVM_CPUID_EBX) {
+               nr_bits = kvm_cpu_property(X86_PROPERTY_PMU_EBX_BIT_VECTOR_LENGTH);
+               return nr_bits > feature.f.bit && !kvm_cpu_has(feature.f);
+       }
+
+       TEST_ASSERT_EQ(feature.f.reg, KVM_CPUID_ECX);
+       nr_bits = kvm_cpu_property(X86_PROPERTY_PMU_NR_FIXED_COUNTERS);
+       return nr_bits > feature.f.bit || kvm_cpu_has(feature.f);
 }
 
 static __always_inline uint64_t kvm_cpu_supported_xcr0(void)
@@ -995,7 +1034,9 @@ static inline void vcpu_set_cpuid(struct kvm_vcpu *vcpu)
        vcpu_ioctl(vcpu, KVM_GET_CPUID2, vcpu->cpuid);
 }
 
-void vcpu_set_cpuid_maxphyaddr(struct kvm_vcpu *vcpu, uint8_t maxphyaddr);
+void vcpu_set_cpuid_property(struct kvm_vcpu *vcpu,
+                            struct kvm_x86_cpu_property property,
+                            uint32_t value);
 
 void vcpu_clear_cpuid_entry(struct kvm_vcpu *vcpu, uint32_t function);
 void vcpu_set_or_clear_cpuid_feature(struct kvm_vcpu *vcpu,
@@ -1059,6 +1100,7 @@ do {                                                                                      \
 } while (0)
 
 void kvm_get_cpu_address_width(unsigned int *pa_bits, unsigned int *va_bits);
+void kvm_init_vm_address_properties(struct kvm_vm *vm);
 bool vm_is_unrestricted_guest(struct kvm_vm *vm);
 
 struct ex_regs {
@@ -1120,16 +1162,19 @@ void vm_install_exception_handler(struct kvm_vm *vm, int vector,
  * r9  = exception vector (non-zero)
  * r10 = error code
  */
-#define KVM_ASM_SAFE(insn)                                     \
+#define __KVM_ASM_SAFE(insn, fep)                              \
        "mov $" __stringify(KVM_EXCEPTION_MAGIC) ", %%r9\n\t"   \
        "lea 1f(%%rip), %%r10\n\t"                              \
        "lea 2f(%%rip), %%r11\n\t"                              \
-       "1: " insn "\n\t"                                       \
+       fep "1: " insn "\n\t"                                   \
        "xor %%r9, %%r9\n\t"                                    \
        "2:\n\t"                                                \
        "mov  %%r9b, %[vector]\n\t"                             \
        "mov  %%r10, %[error_code]\n\t"
 
+#define KVM_ASM_SAFE(insn) __KVM_ASM_SAFE(insn, "")
+#define KVM_ASM_SAFE_FEP(insn) __KVM_ASM_SAFE(insn, KVM_FEP)
+
 #define KVM_ASM_SAFE_OUTPUTS(v, ec)    [vector] "=qm"(v), [error_code] "=rm"(ec)
 #define KVM_ASM_SAFE_CLOBBERS  "r9", "r10", "r11"
 
@@ -1156,21 +1201,58 @@ void vm_install_exception_handler(struct kvm_vm *vm, int vector,
        vector;                                                         \
 })
 
-static inline uint8_t rdmsr_safe(uint32_t msr, uint64_t *val)
-{
-       uint64_t error_code;
-       uint8_t vector;
-       uint32_t a, d;
+#define kvm_asm_safe_fep(insn, inputs...)                              \
+({                                                                     \
+       uint64_t ign_error_code;                                        \
+       uint8_t vector;                                                 \
+                                                                       \
+       asm volatile(KVM_ASM_SAFE(insn)                                 \
+                    : KVM_ASM_SAFE_OUTPUTS(vector, ign_error_code)     \
+                    : inputs                                           \
+                    : KVM_ASM_SAFE_CLOBBERS);                          \
+       vector;                                                         \
+})
 
-       asm volatile(KVM_ASM_SAFE("rdmsr")
-                    : "=a"(a), "=d"(d), KVM_ASM_SAFE_OUTPUTS(vector, error_code)
-                    : "c"(msr)
-                    : KVM_ASM_SAFE_CLOBBERS);
+#define kvm_asm_safe_ec_fep(insn, error_code, inputs...)               \
+({                                                                     \
+       uint8_t vector;                                                 \
+                                                                       \
+       asm volatile(KVM_ASM_SAFE_FEP(insn)                             \
+                    : KVM_ASM_SAFE_OUTPUTS(vector, error_code)         \
+                    : inputs                                           \
+                    : KVM_ASM_SAFE_CLOBBERS);                          \
+       vector;                                                         \
+})
 
-       *val = (uint64_t)a | ((uint64_t)d << 32);
-       return vector;
+#define BUILD_READ_U64_SAFE_HELPER(insn, _fep, _FEP)                   \
+static inline uint8_t insn##_safe ##_fep(uint32_t idx, uint64_t *val)  \
+{                                                                      \
+       uint64_t error_code;                                            \
+       uint8_t vector;                                                 \
+       uint32_t a, d;                                                  \
+                                                                       \
+       asm volatile(KVM_ASM_SAFE##_FEP(#insn)                          \
+                    : "=a"(a), "=d"(d),                                \
+                      KVM_ASM_SAFE_OUTPUTS(vector, error_code)         \
+                    : "c"(idx)                                         \
+                    : KVM_ASM_SAFE_CLOBBERS);                          \
+                                                                       \
+       *val = (uint64_t)a | ((uint64_t)d << 32);                       \
+       return vector;                                                  \
 }
 
+/*
+ * Generate {insn}_safe() and {insn}_safe_fep() helpers for instructions that
+ * use ECX as in input index, and EDX:EAX as a 64-bit output.
+ */
+#define BUILD_READ_U64_SAFE_HELPERS(insn)                              \
+       BUILD_READ_U64_SAFE_HELPER(insn, , )                            \
+       BUILD_READ_U64_SAFE_HELPER(insn, _fep, _FEP)                    \
+
+BUILD_READ_U64_SAFE_HELPERS(rdmsr)
+BUILD_READ_U64_SAFE_HELPERS(rdpmc)
+BUILD_READ_U64_SAFE_HELPERS(xgetbv)
+
 static inline uint8_t wrmsr_safe(uint32_t msr, uint64_t val)
 {
        return kvm_asm_safe("wrmsr", "a"(val & -1u), "d"(val >> 32), "c"(msr));
@@ -1186,6 +1268,16 @@ static inline uint8_t xsetbv_safe(uint32_t index, uint64_t value)
 
 bool kvm_is_tdp_enabled(void);
 
+static inline bool kvm_is_pmu_enabled(void)
+{
+       return get_kvm_param_bool("enable_pmu");
+}
+
+static inline bool kvm_is_forced_emulation_enabled(void)
+{
+       return !!get_kvm_param_integer("force_emulation_prefix");
+}
+
 uint64_t *__vm_get_page_table_entry(struct kvm_vm *vm, uint64_t vaddr,
                                    int *level);
 uint64_t *vm_get_page_table_entry(struct kvm_vm *vm, uint64_t vaddr);
diff --git a/tools/testing/selftests/kvm/include/x86_64/sev.h b/tools/testing/selftests/kvm/include/x86_64/sev.h
new file mode 100644 (file)
index 0000000..8a1bf88
--- /dev/null
@@ -0,0 +1,107 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
+/*
+ * Helpers used for SEV guests
+ *
+ */
+#ifndef SELFTEST_KVM_SEV_H
+#define SELFTEST_KVM_SEV_H
+
+#include <stdint.h>
+#include <stdbool.h>
+
+#include "linux/psp-sev.h"
+
+#include "kvm_util.h"
+#include "svm_util.h"
+#include "processor.h"
+
+enum sev_guest_state {
+       SEV_GUEST_STATE_UNINITIALIZED = 0,
+       SEV_GUEST_STATE_LAUNCH_UPDATE,
+       SEV_GUEST_STATE_LAUNCH_SECRET,
+       SEV_GUEST_STATE_RUNNING,
+};
+
+#define SEV_POLICY_NO_DBG      (1UL << 0)
+#define SEV_POLICY_ES          (1UL << 2)
+
+#define GHCB_MSR_TERM_REQ      0x100
+
+void sev_vm_launch(struct kvm_vm *vm, uint32_t policy);
+void sev_vm_launch_measure(struct kvm_vm *vm, uint8_t *measurement);
+void sev_vm_launch_finish(struct kvm_vm *vm);
+
+struct kvm_vm *vm_sev_create_with_one_vcpu(uint32_t policy, void *guest_code,
+                                          struct kvm_vcpu **cpu);
+
+kvm_static_assert(SEV_RET_SUCCESS == 0);
+
+/*
+ * The KVM_MEMORY_ENCRYPT_OP uAPI is utter garbage and takes an "unsigned long"
+ * instead of a proper struct.  The size of the parameter is embedded in the
+ * ioctl number, i.e. is ABI and thus immutable.  Hack around the mess by
+ * creating an overlay to pass in an "unsigned long" without a cast (casting
+ * will make the compiler unhappy due to dereferencing an aliased pointer).
+ */
+#define __vm_sev_ioctl(vm, cmd, arg)                                   \
+({                                                                     \
+       int r;                                                          \
+                                                                       \
+       union {                                                         \
+               struct kvm_sev_cmd c;                                   \
+               unsigned long raw;                                      \
+       } sev_cmd = { .c = {                                            \
+               .id = (cmd),                                            \
+               .data = (uint64_t)(arg),                                \
+               .sev_fd = (vm)->arch.sev_fd,                            \
+       } };                                                            \
+                                                                       \
+       r = __vm_ioctl(vm, KVM_MEMORY_ENCRYPT_OP, &sev_cmd.raw);        \
+       r ?: sev_cmd.c.error;                                           \
+})
+
+#define vm_sev_ioctl(vm, cmd, arg)                                     \
+({                                                                     \
+       int ret = __vm_sev_ioctl(vm, cmd, arg);                         \
+                                                                       \
+       __TEST_ASSERT_VM_VCPU_IOCTL(!ret, #cmd, ret, vm);               \
+})
+
+static inline void sev_vm_init(struct kvm_vm *vm)
+{
+       vm->arch.sev_fd = open_sev_dev_path_or_exit();
+
+       vm_sev_ioctl(vm, KVM_SEV_INIT, NULL);
+}
+
+
+static inline void sev_es_vm_init(struct kvm_vm *vm)
+{
+       vm->arch.sev_fd = open_sev_dev_path_or_exit();
+
+       vm_sev_ioctl(vm, KVM_SEV_ES_INIT, NULL);
+}
+
+static inline void sev_register_encrypted_memory(struct kvm_vm *vm,
+                                                struct userspace_mem_region *region)
+{
+       struct kvm_enc_region range = {
+               .addr = region->region.userspace_addr,
+               .size = region->region.memory_size,
+       };
+
+       vm_ioctl(vm, KVM_MEMORY_ENCRYPT_REG_REGION, &range);
+}
+
+static inline void sev_launch_update_data(struct kvm_vm *vm, vm_paddr_t gpa,
+                                         uint64_t size)
+{
+       struct kvm_sev_launch_update_data update_data = {
+               .uaddr = (unsigned long)addr_gpa2hva(vm, gpa),
+               .len = size,
+       };
+
+       vm_sev_ioctl(vm, KVM_SEV_LAUNCH_UPDATE_DATA, &update_data);
+}
+
+#endif /* SELFTEST_KVM_SEV_H */
index 43b9a72833602e072ad7ba5e7e69c341fb8469a6..a9eb17295be42f36b564113d4a0b01a3e4782d12 100644 (file)
@@ -365,8 +365,13 @@ void vcpu_arch_dump(FILE *stream, struct kvm_vcpu *vcpu, uint8_t indent)
                indent, "", pstate, pc);
 }
 
-struct kvm_vcpu *aarch64_vcpu_add(struct kvm_vm *vm, uint32_t vcpu_id,
-                                 struct kvm_vcpu_init *init, void *guest_code)
+void vcpu_arch_set_entry_point(struct kvm_vcpu *vcpu, void *guest_code)
+{
+       vcpu_set_reg(vcpu, ARM64_CORE_REG(regs.pc), (uint64_t)guest_code);
+}
+
+static struct kvm_vcpu *__aarch64_vcpu_add(struct kvm_vm *vm, uint32_t vcpu_id,
+                                          struct kvm_vcpu_init *init)
 {
        size_t stack_size;
        uint64_t stack_vaddr;
@@ -381,15 +386,22 @@ struct kvm_vcpu *aarch64_vcpu_add(struct kvm_vm *vm, uint32_t vcpu_id,
        aarch64_vcpu_setup(vcpu, init);
 
        vcpu_set_reg(vcpu, ARM64_CORE_REG(sp_el1), stack_vaddr + stack_size);
-       vcpu_set_reg(vcpu, ARM64_CORE_REG(regs.pc), (uint64_t)guest_code);
+       return vcpu;
+}
+
+struct kvm_vcpu *aarch64_vcpu_add(struct kvm_vm *vm, uint32_t vcpu_id,
+                                 struct kvm_vcpu_init *init, void *guest_code)
+{
+       struct kvm_vcpu *vcpu = __aarch64_vcpu_add(vm, vcpu_id, init);
+
+       vcpu_arch_set_entry_point(vcpu, guest_code);
 
        return vcpu;
 }
 
-struct kvm_vcpu *vm_arch_vcpu_add(struct kvm_vm *vm, uint32_t vcpu_id,
-                                 void *guest_code)
+struct kvm_vcpu *vm_arch_vcpu_add(struct kvm_vm *vm, uint32_t vcpu_id)
 {
-       return aarch64_vcpu_add(vm, vcpu_id, NULL, guest_code);
+       return __aarch64_vcpu_add(vm, vcpu_id, NULL);
 }
 
 void vcpu_args_set(struct kvm_vcpu *vcpu, unsigned int num, ...)
index 1b197426f29fcd1e6faf4abe3069044dfe64b9ca..b2262b5fad9e79a509f9c686e675baf2b95b00a0 100644 (file)
@@ -52,13 +52,13 @@ int open_kvm_dev_path_or_exit(void)
        return _open_kvm_dev_path_or_exit(O_RDONLY);
 }
 
-static bool get_module_param_bool(const char *module_name, const char *param)
+static ssize_t get_module_param(const char *module_name, const char *param,
+                               void *buffer, size_t buffer_size)
 {
        const int path_size = 128;
        char path[path_size];
-       char value;
-       ssize_t r;
-       int fd;
+       ssize_t bytes_read;
+       int fd, r;
 
        r = snprintf(path, path_size, "/sys/module/%s/parameters/%s",
                     module_name, param);
@@ -67,11 +67,46 @@ static bool get_module_param_bool(const char *module_name, const char *param)
 
        fd = open_path_or_exit(path, O_RDONLY);
 
-       r = read(fd, &value, 1);
-       TEST_ASSERT(r == 1, "read(%s) failed", path);
+       bytes_read = read(fd, buffer, buffer_size);
+       TEST_ASSERT(bytes_read > 0, "read(%s) returned %ld, wanted %ld bytes",
+                   path, bytes_read, buffer_size);
 
        r = close(fd);
        TEST_ASSERT(!r, "close(%s) failed", path);
+       return bytes_read;
+}
+
+static int get_module_param_integer(const char *module_name, const char *param)
+{
+       /*
+        * 16 bytes to hold a 64-bit value (1 byte per char), 1 byte for the
+        * NUL char, and 1 byte because the kernel sucks and inserts a newline
+        * at the end.
+        */
+       char value[16 + 1 + 1];
+       ssize_t r;
+
+       memset(value, '\0', sizeof(value));
+
+       r = get_module_param(module_name, param, value, sizeof(value));
+       TEST_ASSERT(value[r - 1] == '\n',
+                   "Expected trailing newline, got char '%c'", value[r - 1]);
+
+       /*
+        * Squash the newline, otherwise atoi_paranoid() will complain about
+        * trailing non-NUL characters in the string.
+        */
+       value[r - 1] = '\0';
+       return atoi_paranoid(value);
+}
+
+static bool get_module_param_bool(const char *module_name, const char *param)
+{
+       char value;
+       ssize_t r;
+
+       r = get_module_param(module_name, param, &value, sizeof(value));
+       TEST_ASSERT_EQ(r, 1);
 
        if (value == 'Y')
                return true;
@@ -96,6 +131,21 @@ bool get_kvm_amd_param_bool(const char *param)
        return get_module_param_bool("kvm_amd", param);
 }
 
+int get_kvm_param_integer(const char *param)
+{
+       return get_module_param_integer("kvm", param);
+}
+
+int get_kvm_intel_param_integer(const char *param)
+{
+       return get_module_param_integer("kvm_intel", param);
+}
+
+int get_kvm_amd_param_integer(const char *param)
+{
+       return get_module_param_integer("kvm_amd", param);
+}
+
 /*
  * Capability
  *
@@ -226,6 +276,7 @@ struct kvm_vm *____vm_create(struct vm_shape shape)
 
        vm->mode = shape.mode;
        vm->type = shape.type;
+       vm->subtype = shape.subtype;
 
        vm->pa_bits = vm_guest_mode_params[vm->mode].pa_bits;
        vm->va_bits = vm_guest_mode_params[vm->mode].va_bits;
@@ -266,6 +317,7 @@ struct kvm_vm *____vm_create(struct vm_shape shape)
        case VM_MODE_PXXV48_4K:
 #ifdef __x86_64__
                kvm_get_cpu_address_width(&vm->pa_bits, &vm->va_bits);
+               kvm_init_vm_address_properties(vm);
                /*
                 * Ignore KVM support for 5-level paging (vm->va_bits == 57),
                 * it doesn't take effect unless a CR4.LA57 is set, which it
@@ -666,6 +718,7 @@ static void __vm_mem_region_delete(struct kvm_vm *vm,
        vm_ioctl(vm, KVM_SET_USER_MEMORY_REGION2, &region->region);
 
        sparsebit_free(&region->unused_phy_pages);
+       sparsebit_free(&region->protected_phy_pages);
        ret = munmap(region->mmap_start, region->mmap_size);
        TEST_ASSERT(!ret, __KVM_SYSCALL_ERROR("munmap()", ret));
        if (region->fd >= 0) {
@@ -1047,6 +1100,8 @@ void vm_mem_add(struct kvm_vm *vm, enum vm_mem_backing_src_type src_type,
        }
 
        region->unused_phy_pages = sparsebit_alloc();
+       if (vm_arch_has_protected_memory(vm))
+               region->protected_phy_pages = sparsebit_alloc();
        sparsebit_set_num(region->unused_phy_pages,
                guest_paddr >> vm->page_shift, npages);
        region->region.slot = slot;
@@ -1377,15 +1432,17 @@ va_found:
        return pgidx_start * vm->page_size;
 }
 
-vm_vaddr_t __vm_vaddr_alloc(struct kvm_vm *vm, size_t sz, vm_vaddr_t vaddr_min,
-                           enum kvm_mem_region_type type)
+static vm_vaddr_t ____vm_vaddr_alloc(struct kvm_vm *vm, size_t sz,
+                                    vm_vaddr_t vaddr_min,
+                                    enum kvm_mem_region_type type,
+                                    bool protected)
 {
        uint64_t pages = (sz >> vm->page_shift) + ((sz % vm->page_size) != 0);
 
        virt_pgd_alloc(vm);
-       vm_paddr_t paddr = vm_phy_pages_alloc(vm, pages,
-                                             KVM_UTIL_MIN_PFN * vm->page_size,
-                                             vm->memslots[type]);
+       vm_paddr_t paddr = __vm_phy_pages_alloc(vm, pages,
+                                               KVM_UTIL_MIN_PFN * vm->page_size,
+                                               vm->memslots[type], protected);
 
        /*
         * Find an unused range of virtual page addresses of at least
@@ -1405,6 +1462,20 @@ vm_vaddr_t __vm_vaddr_alloc(struct kvm_vm *vm, size_t sz, vm_vaddr_t vaddr_min,
        return vaddr_start;
 }
 
+vm_vaddr_t __vm_vaddr_alloc(struct kvm_vm *vm, size_t sz, vm_vaddr_t vaddr_min,
+                           enum kvm_mem_region_type type)
+{
+       return ____vm_vaddr_alloc(vm, sz, vaddr_min, type,
+                                 vm_arch_has_protected_memory(vm));
+}
+
+vm_vaddr_t vm_vaddr_alloc_shared(struct kvm_vm *vm, size_t sz,
+                                vm_vaddr_t vaddr_min,
+                                enum kvm_mem_region_type type)
+{
+       return ____vm_vaddr_alloc(vm, sz, vaddr_min, type, false);
+}
+
 /*
  * VM Virtual Address Allocate
  *
@@ -1527,6 +1598,8 @@ void *addr_gpa2hva(struct kvm_vm *vm, vm_paddr_t gpa)
 {
        struct userspace_mem_region *region;
 
+       gpa = vm_untag_gpa(vm, gpa);
+
        region = userspace_mem_region_find(vm, gpa, gpa);
        if (!region) {
                TEST_FAIL("No vm physical memory at 0x%lx", gpa);
@@ -1873,6 +1946,10 @@ void vm_dump(FILE *stream, struct kvm_vm *vm, uint8_t indent)
                        region->host_mem);
                fprintf(stream, "%*sunused_phy_pages: ", indent + 2, "");
                sparsebit_dump(stream, region->unused_phy_pages, 0);
+               if (region->protected_phy_pages) {
+                       fprintf(stream, "%*sprotected_phy_pages: ", indent + 2, "");
+                       sparsebit_dump(stream, region->protected_phy_pages, 0);
+               }
        }
        fprintf(stream, "%*sMapped Virtual Pages:\n", indent, "");
        sparsebit_dump(stream, vm->vpages_mapped, indent + 2);
@@ -1974,6 +2051,7 @@ const char *exit_reason_str(unsigned int exit_reason)
  *   num - number of pages
  *   paddr_min - Physical address minimum
  *   memslot - Memory region to allocate page from
+ *   protected - True if the pages will be used as protected/private memory
  *
  * Output Args: None
  *
@@ -1985,8 +2063,9 @@ const char *exit_reason_str(unsigned int exit_reason)
  * and their base address is returned. A TEST_ASSERT failure occurs if
  * not enough pages are available at or above paddr_min.
  */
-vm_paddr_t vm_phy_pages_alloc(struct kvm_vm *vm, size_t num,
-                             vm_paddr_t paddr_min, uint32_t memslot)
+vm_paddr_t __vm_phy_pages_alloc(struct kvm_vm *vm, size_t num,
+                               vm_paddr_t paddr_min, uint32_t memslot,
+                               bool protected)
 {
        struct userspace_mem_region *region;
        sparsebit_idx_t pg, base;
@@ -1999,8 +2078,10 @@ vm_paddr_t vm_phy_pages_alloc(struct kvm_vm *vm, size_t num,
                paddr_min, vm->page_size);
 
        region = memslot2region(vm, memslot);
-       base = pg = paddr_min >> vm->page_shift;
+       TEST_ASSERT(!protected || region->protected_phy_pages,
+                   "Region doesn't support protected memory");
 
+       base = pg = paddr_min >> vm->page_shift;
        do {
                for (; pg < base + num; ++pg) {
                        if (!sparsebit_is_set(region->unused_phy_pages, pg)) {
@@ -2019,8 +2100,11 @@ vm_paddr_t vm_phy_pages_alloc(struct kvm_vm *vm, size_t num,
                abort();
        }
 
-       for (pg = base; pg < base + num; ++pg)
+       for (pg = base; pg < base + num; ++pg) {
                sparsebit_clear(region->unused_phy_pages, pg);
+               if (protected)
+                       sparsebit_set(region->protected_phy_pages, pg);
+       }
 
        return base * vm->page_size;
 }
@@ -2224,3 +2308,18 @@ void __attribute((constructor)) kvm_selftest_init(void)
 
        kvm_selftest_arch_init();
 }
+
+bool vm_is_gpa_protected(struct kvm_vm *vm, vm_paddr_t paddr)
+{
+       sparsebit_idx_t pg = 0;
+       struct userspace_mem_region *region;
+
+       if (!vm_arch_has_protected_memory(vm))
+               return false;
+
+       region = userspace_mem_region_find(vm, paddr, paddr);
+       TEST_ASSERT(region, "No vm physical memory at 0x%lx", paddr);
+
+       pg = paddr >> vm->page_shift;
+       return sparsebit_is_set(region->protected_phy_pages, pg);
+}
diff --git a/tools/testing/selftests/kvm/lib/riscv/handlers.S b/tools/testing/selftests/kvm/lib/riscv/handlers.S
new file mode 100644 (file)
index 0000000..aa0abd3
--- /dev/null
@@ -0,0 +1,101 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * Copyright (c) 2023 Intel Corporation
+ */
+
+#ifndef __ASSEMBLY__
+#define __ASSEMBLY__
+#endif
+
+#include <asm/csr.h>
+
+.macro save_context
+       addi  sp, sp, (-8*34)
+       sd    x1, 0(sp)
+       sd    x2, 8(sp)
+       sd    x3, 16(sp)
+       sd    x4, 24(sp)
+       sd    x5, 32(sp)
+       sd    x6, 40(sp)
+       sd    x7, 48(sp)
+       sd    x8, 56(sp)
+       sd    x9, 64(sp)
+       sd    x10, 72(sp)
+       sd    x11, 80(sp)
+       sd    x12, 88(sp)
+       sd    x13, 96(sp)
+       sd    x14, 104(sp)
+       sd    x15, 112(sp)
+       sd    x16, 120(sp)
+       sd    x17, 128(sp)
+       sd    x18, 136(sp)
+       sd    x19, 144(sp)
+       sd    x20, 152(sp)
+       sd    x21, 160(sp)
+       sd    x22, 168(sp)
+       sd    x23, 176(sp)
+       sd    x24, 184(sp)
+       sd    x25, 192(sp)
+       sd    x26, 200(sp)
+       sd    x27, 208(sp)
+       sd    x28, 216(sp)
+       sd    x29, 224(sp)
+       sd    x30, 232(sp)
+       sd    x31, 240(sp)
+       csrr  s0, CSR_SEPC
+       csrr  s1, CSR_SSTATUS
+       csrr  s2, CSR_SCAUSE
+       sd    s0, 248(sp)
+       sd    s1, 256(sp)
+       sd    s2, 264(sp)
+.endm
+
+.macro restore_context
+       ld    s2, 264(sp)
+       ld    s1, 256(sp)
+       ld    s0, 248(sp)
+       csrw  CSR_SCAUSE, s2
+       csrw  CSR_SSTATUS, s1
+       csrw  CSR_SEPC, s0
+       ld    x31, 240(sp)
+       ld    x30, 232(sp)
+       ld    x29, 224(sp)
+       ld    x28, 216(sp)
+       ld    x27, 208(sp)
+       ld    x26, 200(sp)
+       ld    x25, 192(sp)
+       ld    x24, 184(sp)
+       ld    x23, 176(sp)
+       ld    x22, 168(sp)
+       ld    x21, 160(sp)
+       ld    x20, 152(sp)
+       ld    x19, 144(sp)
+       ld    x18, 136(sp)
+       ld    x17, 128(sp)
+       ld    x16, 120(sp)
+       ld    x15, 112(sp)
+       ld    x14, 104(sp)
+       ld    x13, 96(sp)
+       ld    x12, 88(sp)
+       ld    x11, 80(sp)
+       ld    x10, 72(sp)
+       ld    x9, 64(sp)
+       ld    x8, 56(sp)
+       ld    x7, 48(sp)
+       ld    x6, 40(sp)
+       ld    x5, 32(sp)
+       ld    x4, 24(sp)
+       ld    x3, 16(sp)
+       ld    x2, 8(sp)
+       ld    x1, 0(sp)
+       addi  sp, sp, (8*34)
+.endm
+
+.balign 4
+.global exception_vectors
+exception_vectors:
+       save_context
+       move  a0, sp
+       call  route_exception
+       restore_context
+       sret
index 2bb33a8ac03c25f622ec6dc21430529b8b128a9d..e8211f5d68637ea7d0b9916e7931a23c0573dbe0 100644 (file)
 
 #define DEFAULT_RISCV_GUEST_STACK_VADDR_MIN    0xac0000
 
+static vm_vaddr_t exception_handlers;
+
+bool __vcpu_has_ext(struct kvm_vcpu *vcpu, uint64_t ext)
+{
+       unsigned long value = 0;
+       int ret;
+
+       ret = __vcpu_get_reg(vcpu, ext, &value);
+
+       return !ret && !!value;
+}
+
 static uint64_t page_align(struct kvm_vm *vm, uint64_t v)
 {
        return (v + vm->page_size) & ~(vm->page_size - 1);
@@ -277,8 +289,12 @@ static void __aligned(16) guest_unexp_trap(void)
                  0, 0, 0, 0, 0, 0);
 }
 
-struct kvm_vcpu *vm_arch_vcpu_add(struct kvm_vm *vm, uint32_t vcpu_id,
-                                 void *guest_code)
+void vcpu_arch_set_entry_point(struct kvm_vcpu *vcpu, void *guest_code)
+{
+       vcpu_set_reg(vcpu, RISCV_CORE_REG(regs.pc), (unsigned long)guest_code);
+}
+
+struct kvm_vcpu *vm_arch_vcpu_add(struct kvm_vm *vm, uint32_t vcpu_id)
 {
        int r;
        size_t stack_size;
@@ -312,7 +328,9 @@ struct kvm_vcpu *vm_arch_vcpu_add(struct kvm_vm *vm, uint32_t vcpu_id,
 
        /* Setup stack pointer and program counter of guest */
        vcpu_set_reg(vcpu, RISCV_CORE_REG(regs.sp), stack_vaddr + stack_size);
-       vcpu_set_reg(vcpu, RISCV_CORE_REG(regs.pc), (unsigned long)guest_code);
+
+       /* Setup sscratch for guest_get_vcpuid() */
+       vcpu_set_reg(vcpu, RISCV_GENERAL_CSR_REG(sscratch), vcpu_id);
 
        /* Setup default exception vector of guest */
        vcpu_set_reg(vcpu, RISCV_GENERAL_CSR_REG(stvec), (unsigned long)guest_unexp_trap);
@@ -364,8 +382,80 @@ void vcpu_args_set(struct kvm_vcpu *vcpu, unsigned int num, ...)
        va_end(ap);
 }
 
+void kvm_exit_unexpected_exception(int vector, int ec)
+{
+       ucall(UCALL_UNHANDLED, 2, vector, ec);
+}
+
 void assert_on_unhandled_exception(struct kvm_vcpu *vcpu)
 {
+       struct ucall uc;
+
+       if (get_ucall(vcpu, &uc) == UCALL_UNHANDLED) {
+               TEST_FAIL("Unexpected exception (vector:0x%lx, ec:0x%lx)",
+                       uc.args[0], uc.args[1]);
+       }
+}
+
+struct handlers {
+       exception_handler_fn exception_handlers[NR_VECTORS][NR_EXCEPTIONS];
+};
+
+void route_exception(struct ex_regs *regs)
+{
+       struct handlers *handlers = (struct handlers *)exception_handlers;
+       int vector = 0, ec;
+
+       ec = regs->cause & ~CAUSE_IRQ_FLAG;
+       if (ec >= NR_EXCEPTIONS)
+               goto unexpected_exception;
+
+       /* Use the same handler for all the interrupts */
+       if (regs->cause & CAUSE_IRQ_FLAG) {
+               vector = 1;
+               ec = 0;
+       }
+
+       if (handlers && handlers->exception_handlers[vector][ec])
+               return handlers->exception_handlers[vector][ec](regs);
+
+unexpected_exception:
+       return kvm_exit_unexpected_exception(vector, ec);
+}
+
+void vcpu_init_vector_tables(struct kvm_vcpu *vcpu)
+{
+       extern char exception_vectors;
+
+       vcpu_set_reg(vcpu, RISCV_GENERAL_CSR_REG(stvec), (unsigned long)&exception_vectors);
+}
+
+void vm_init_vector_tables(struct kvm_vm *vm)
+{
+       vm->handlers = __vm_vaddr_alloc(vm, sizeof(struct handlers),
+                                  vm->page_size, MEM_REGION_DATA);
+
+       *(vm_vaddr_t *)addr_gva2hva(vm, (vm_vaddr_t)(&exception_handlers)) = vm->handlers;
+}
+
+void vm_install_exception_handler(struct kvm_vm *vm, int vector, exception_handler_fn handler)
+{
+       struct handlers *handlers = addr_gva2hva(vm, vm->handlers);
+
+       assert(vector < NR_EXCEPTIONS);
+       handlers->exception_handlers[0][vector] = handler;
+}
+
+void vm_install_interrupt_handler(struct kvm_vm *vm, exception_handler_fn handler)
+{
+       struct handlers *handlers = addr_gva2hva(vm, vm->handlers);
+
+       handlers->exception_handlers[1][0] = handler;
+}
+
+uint32_t guest_get_vcpuid(void)
+{
+       return csr_read(CSR_SSCRATCH);
 }
 
 struct sbiret sbi_ecall(int ext, int fid, unsigned long arg0,
index f6d227892cbcfc88cc97abcc9a0d78b2aa38f459..4ad4492eea1d96f88a544ab510300ed43e2934f1 100644 (file)
@@ -155,15 +155,18 @@ void virt_arch_dump(FILE *stream, struct kvm_vm *vm, uint8_t indent)
        virt_dump_region(stream, vm, indent, vm->pgd);
 }
 
-struct kvm_vcpu *vm_arch_vcpu_add(struct kvm_vm *vm, uint32_t vcpu_id,
-                                 void *guest_code)
+void vcpu_arch_set_entry_point(struct kvm_vcpu *vcpu, void *guest_code)
+{
+       vcpu->run->psw_addr = (uintptr_t)guest_code;
+}
+
+struct kvm_vcpu *vm_arch_vcpu_add(struct kvm_vm *vm, uint32_t vcpu_id)
 {
        size_t stack_size =  DEFAULT_STACK_PGS * getpagesize();
        uint64_t stack_vaddr;
        struct kvm_regs regs;
        struct kvm_sregs sregs;
        struct kvm_vcpu *vcpu;
-       struct kvm_run *run;
 
        TEST_ASSERT(vm->page_size == 4096, "Unsupported page size: 0x%x",
                    vm->page_size);
@@ -184,9 +187,7 @@ struct kvm_vcpu *vm_arch_vcpu_add(struct kvm_vm *vm, uint32_t vcpu_id,
        sregs.crs[1] = vm->pgd | 0xf;           /* Primary region table */
        vcpu_sregs_set(vcpu, &sregs);
 
-       run = vcpu->run;
-       run->psw_mask = 0x0400000180000000ULL;  /* DAT enabled + 64 bit mode */
-       run->psw_addr = (uintptr_t)guest_code;
+       vcpu->run->psw_mask = 0x0400000180000000ULL;  /* DAT enabled + 64 bit mode */
 
        return vcpu;
 }
index 88cb6b84e6f31009e8fc61e2ae0b69b9728e0ef1..cfed9d26cc71b06a9c4e52c89a52b079944d3a49 100644 (file)
@@ -202,7 +202,7 @@ static sparsebit_num_t node_num_set(struct node *nodep)
 /* Returns a pointer to the node that describes the
  * lowest bit index.
  */
-static struct node *node_first(struct sparsebit *s)
+static struct node *node_first(const struct sparsebit *s)
 {
        struct node *nodep;
 
@@ -216,7 +216,7 @@ static struct node *node_first(struct sparsebit *s)
  * lowest bit index > the index of the node pointed to by np.
  * Returns NULL if no node with a higher index exists.
  */
-static struct node *node_next(struct sparsebit *s, struct node *np)
+static struct node *node_next(const struct sparsebit *s, struct node *np)
 {
        struct node *nodep = np;
 
@@ -244,7 +244,7 @@ static struct node *node_next(struct sparsebit *s, struct node *np)
  * highest index < the index of the node pointed to by np.
  * Returns NULL if no node with a lower index exists.
  */
-static struct node *node_prev(struct sparsebit *s, struct node *np)
+static struct node *node_prev(const struct sparsebit *s, struct node *np)
 {
        struct node *nodep = np;
 
@@ -273,7 +273,7 @@ static struct node *node_prev(struct sparsebit *s, struct node *np)
  * subtree and duplicates the bit settings to the newly allocated nodes.
  * Returns the newly allocated copy of subtree.
  */
-static struct node *node_copy_subtree(struct node *subtree)
+static struct node *node_copy_subtree(const struct node *subtree)
 {
        struct node *root;
 
@@ -307,7 +307,7 @@ static struct node *node_copy_subtree(struct node *subtree)
  * index is within the bits described by the mask bits or the number of
  * contiguous bits set after the mask.  Returns NULL if there is no such node.
  */
-static struct node *node_find(struct sparsebit *s, sparsebit_idx_t idx)
+static struct node *node_find(const struct sparsebit *s, sparsebit_idx_t idx)
 {
        struct node *nodep;
 
@@ -393,7 +393,7 @@ static struct node *node_add(struct sparsebit *s, sparsebit_idx_t idx)
 }
 
 /* Returns whether all the bits in the sparsebit array are set.  */
-bool sparsebit_all_set(struct sparsebit *s)
+bool sparsebit_all_set(const struct sparsebit *s)
 {
        /*
         * If any nodes there must be at least one bit set.  Only case
@@ -775,7 +775,7 @@ static void node_reduce(struct sparsebit *s, struct node *nodep)
 /* Returns whether the bit at the index given by idx, within the
  * sparsebit array is set or not.
  */
-bool sparsebit_is_set(struct sparsebit *s, sparsebit_idx_t idx)
+bool sparsebit_is_set(const struct sparsebit *s, sparsebit_idx_t idx)
 {
        struct node *nodep;
 
@@ -921,7 +921,7 @@ static inline sparsebit_idx_t node_first_clear(struct node *nodep, int start)
  * used by test cases after they detect an unexpected condition, as a means
  * to capture diagnostic information.
  */
-static void sparsebit_dump_internal(FILE *stream, struct sparsebit *s,
+static void sparsebit_dump_internal(FILE *stream, const struct sparsebit *s,
        unsigned int indent)
 {
        /* Dump the contents of s */
@@ -969,7 +969,7 @@ void sparsebit_free(struct sparsebit **sbitp)
  * sparsebit_alloc().  It can though already have bits set, which
  * if different from src will be cleared.
  */
-void sparsebit_copy(struct sparsebit *d, struct sparsebit *s)
+void sparsebit_copy(struct sparsebit *d, const struct sparsebit *s)
 {
        /* First clear any bits already set in the destination */
        sparsebit_clear_all(d);
@@ -981,7 +981,7 @@ void sparsebit_copy(struct sparsebit *d, struct sparsebit *s)
 }
 
 /* Returns whether num consecutive bits starting at idx are all set.  */
-bool sparsebit_is_set_num(struct sparsebit *s,
+bool sparsebit_is_set_num(const struct sparsebit *s,
        sparsebit_idx_t idx, sparsebit_num_t num)
 {
        sparsebit_idx_t next_cleared;
@@ -1005,14 +1005,14 @@ bool sparsebit_is_set_num(struct sparsebit *s,
 }
 
 /* Returns whether the bit at the index given by idx.  */
-bool sparsebit_is_clear(struct sparsebit *s,
+bool sparsebit_is_clear(const struct sparsebit *s,
        sparsebit_idx_t idx)
 {
        return !sparsebit_is_set(s, idx);
 }
 
 /* Returns whether num consecutive bits starting at idx are all cleared.  */
-bool sparsebit_is_clear_num(struct sparsebit *s,
+bool sparsebit_is_clear_num(const struct sparsebit *s,
        sparsebit_idx_t idx, sparsebit_num_t num)
 {
        sparsebit_idx_t next_set;
@@ -1041,13 +1041,13 @@ bool sparsebit_is_clear_num(struct sparsebit *s,
  * value.  Use sparsebit_any_set(), instead of sparsebit_num_set() > 0,
  * to determine if the sparsebit array has any bits set.
  */
-sparsebit_num_t sparsebit_num_set(struct sparsebit *s)
+sparsebit_num_t sparsebit_num_set(const struct sparsebit *s)
 {
        return s->num_set;
 }
 
 /* Returns whether any bit is set in the sparsebit array.  */
-bool sparsebit_any_set(struct sparsebit *s)
+bool sparsebit_any_set(const struct sparsebit *s)
 {
        /*
         * Nodes only describe set bits.  If any nodes then there
@@ -1070,20 +1070,20 @@ bool sparsebit_any_set(struct sparsebit *s)
 }
 
 /* Returns whether all the bits in the sparsebit array are cleared.  */
-bool sparsebit_all_clear(struct sparsebit *s)
+bool sparsebit_all_clear(const struct sparsebit *s)
 {
        return !sparsebit_any_set(s);
 }
 
 /* Returns whether all the bits in the sparsebit array are set.  */
-bool sparsebit_any_clear(struct sparsebit *s)
+bool sparsebit_any_clear(const struct sparsebit *s)
 {
        return !sparsebit_all_set(s);
 }
 
 /* Returns the index of the first set bit.  Abort if no bits are set.
  */
-sparsebit_idx_t sparsebit_first_set(struct sparsebit *s)
+sparsebit_idx_t sparsebit_first_set(const struct sparsebit *s)
 {
        struct node *nodep;
 
@@ -1097,7 +1097,7 @@ sparsebit_idx_t sparsebit_first_set(struct sparsebit *s)
 /* Returns the index of the first cleared bit.  Abort if
  * no bits are cleared.
  */
-sparsebit_idx_t sparsebit_first_clear(struct sparsebit *s)
+sparsebit_idx_t sparsebit_first_clear(const struct sparsebit *s)
 {
        struct node *nodep1, *nodep2;
 
@@ -1151,7 +1151,7 @@ sparsebit_idx_t sparsebit_first_clear(struct sparsebit *s)
 /* Returns index of next bit set within s after the index given by prev.
  * Returns 0 if there are no bits after prev that are set.
  */
-sparsebit_idx_t sparsebit_next_set(struct sparsebit *s,
+sparsebit_idx_t sparsebit_next_set(const struct sparsebit *s,
        sparsebit_idx_t prev)
 {
        sparsebit_idx_t lowest_possible = prev + 1;
@@ -1244,7 +1244,7 @@ sparsebit_idx_t sparsebit_next_set(struct sparsebit *s,
 /* Returns index of next bit cleared within s after the index given by prev.
  * Returns 0 if there are no bits after prev that are cleared.
  */
-sparsebit_idx_t sparsebit_next_clear(struct sparsebit *s,
+sparsebit_idx_t sparsebit_next_clear(const struct sparsebit *s,
        sparsebit_idx_t prev)
 {
        sparsebit_idx_t lowest_possible = prev + 1;
@@ -1300,7 +1300,7 @@ sparsebit_idx_t sparsebit_next_clear(struct sparsebit *s,
  * and returns the index of the first sequence of num consecutively set
  * bits.  Returns a value of 0 of no such sequence exists.
  */
-sparsebit_idx_t sparsebit_next_set_num(struct sparsebit *s,
+sparsebit_idx_t sparsebit_next_set_num(const struct sparsebit *s,
        sparsebit_idx_t start, sparsebit_num_t num)
 {
        sparsebit_idx_t idx;
@@ -1335,7 +1335,7 @@ sparsebit_idx_t sparsebit_next_set_num(struct sparsebit *s,
  * and returns the index of the first sequence of num consecutively cleared
  * bits.  Returns a value of 0 of no such sequence exists.
  */
-sparsebit_idx_t sparsebit_next_clear_num(struct sparsebit *s,
+sparsebit_idx_t sparsebit_next_clear_num(const struct sparsebit *s,
        sparsebit_idx_t start, sparsebit_num_t num)
 {
        sparsebit_idx_t idx;
@@ -1583,7 +1583,7 @@ static size_t display_range(FILE *stream, sparsebit_idx_t low,
  * contiguous bits.  This is done because '-' is used to specify command-line
  * options, and sometimes ranges are specified as command-line arguments.
  */
-void sparsebit_dump(FILE *stream, struct sparsebit *s,
+void sparsebit_dump(FILE *stream, const struct sparsebit *s,
        unsigned int indent)
 {
        size_t current_line_len = 0;
@@ -1681,7 +1681,7 @@ void sparsebit_dump(FILE *stream, struct sparsebit *s,
  * s.  On error, diagnostic information is printed to stderr and
  * abort is called.
  */
-void sparsebit_validate_internal(struct sparsebit *s)
+void sparsebit_validate_internal(const struct sparsebit *s)
 {
        bool error_detected = false;
        struct node *nodep, *prev = NULL;
index 816a3fa109bfb6b608eadeda9f0827687ebbe1b5..f5af65a41c296e77f1502f32a8813fcf77dbdf20 100644 (file)
@@ -29,7 +29,8 @@ void ucall_init(struct kvm_vm *vm, vm_paddr_t mmio_gpa)
        vm_vaddr_t vaddr;
        int i;
 
-       vaddr = __vm_vaddr_alloc(vm, sizeof(*hdr), KVM_UTIL_MIN_VADDR, MEM_REGION_DATA);
+       vaddr = vm_vaddr_alloc_shared(vm, sizeof(*hdr), KVM_UTIL_MIN_VADDR,
+                                     MEM_REGION_DATA);
        hdr = (struct ucall_header *)addr_gva2hva(vm, vaddr);
        memset(hdr, 0, sizeof(*hdr));
 
diff --git a/tools/testing/selftests/kvm/lib/x86_64/pmu.c b/tools/testing/selftests/kvm/lib/x86_64/pmu.c
new file mode 100644 (file)
index 0000000..f31f042
--- /dev/null
@@ -0,0 +1,31 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * Copyright (C) 2023, Tencent, Inc.
+ */
+
+#include <stdint.h>
+
+#include <linux/kernel.h>
+
+#include "kvm_util.h"
+#include "pmu.h"
+
+const uint64_t intel_pmu_arch_events[] = {
+       INTEL_ARCH_CPU_CYCLES,
+       INTEL_ARCH_INSTRUCTIONS_RETIRED,
+       INTEL_ARCH_REFERENCE_CYCLES,
+       INTEL_ARCH_LLC_REFERENCES,
+       INTEL_ARCH_LLC_MISSES,
+       INTEL_ARCH_BRANCHES_RETIRED,
+       INTEL_ARCH_BRANCHES_MISPREDICTED,
+       INTEL_ARCH_TOPDOWN_SLOTS,
+};
+kvm_static_assert(ARRAY_SIZE(intel_pmu_arch_events) == NR_INTEL_ARCH_EVENTS);
+
+const uint64_t amd_pmu_zen_events[] = {
+       AMD_ZEN_CORE_CYCLES,
+       AMD_ZEN_INSTRUCTIONS_RETIRED,
+       AMD_ZEN_BRANCHES_RETIRED,
+       AMD_ZEN_BRANCHES_MISPREDICTED,
+};
+kvm_static_assert(ARRAY_SIZE(amd_pmu_zen_events) == NR_AMD_ZEN_EVENTS);
index f639b3e062e3a328165bfb6ed32f606cf1f48e76..74a4c736c9ae1e9849c22199af984dc75872d9ef 100644 (file)
@@ -9,6 +9,7 @@
 #include "test_util.h"
 #include "kvm_util.h"
 #include "processor.h"
+#include "sev.h"
 
 #ifndef NUM_INTERRUPTS
 #define NUM_INTERRUPTS 256
@@ -157,6 +158,8 @@ static uint64_t *virt_create_upper_pte(struct kvm_vm *vm,
 {
        uint64_t *pte = virt_get_pte(vm, parent_pte, vaddr, current_level);
 
+       paddr = vm_untag_gpa(vm, paddr);
+
        if (!(*pte & PTE_PRESENT_MASK)) {
                *pte = PTE_PRESENT_MASK | PTE_WRITABLE_MASK;
                if (current_level == target_level)
@@ -200,6 +203,8 @@ void __virt_pg_map(struct kvm_vm *vm, uint64_t vaddr, uint64_t paddr, int level)
                    "Physical address beyond maximum supported,\n"
                    "  paddr: 0x%lx vm->max_gfn: 0x%lx vm->page_size: 0x%x",
                    paddr, vm->max_gfn, vm->page_size);
+       TEST_ASSERT(vm_untag_gpa(vm, paddr) == paddr,
+                   "Unexpected bits in paddr: %lx", paddr);
 
        /*
         * Allocate upper level page tables, if not already present.  Return
@@ -222,6 +227,15 @@ void __virt_pg_map(struct kvm_vm *vm, uint64_t vaddr, uint64_t paddr, int level)
        TEST_ASSERT(!(*pte & PTE_PRESENT_MASK),
                    "PTE already present for 4k page at vaddr: 0x%lx", vaddr);
        *pte = PTE_PRESENT_MASK | PTE_WRITABLE_MASK | (paddr & PHYSICAL_PAGE_MASK);
+
+       /*
+        * Neither SEV nor TDX supports shared page tables, so only the final
+        * leaf PTE needs manually set the C/S-bit.
+        */
+       if (vm_is_gpa_protected(vm, paddr))
+               *pte |= vm->arch.c_bit;
+       else
+               *pte |= vm->arch.s_bit;
 }
 
 void virt_arch_pg_map(struct kvm_vm *vm, uint64_t vaddr, uint64_t paddr)
@@ -265,6 +279,9 @@ uint64_t *__vm_get_page_table_entry(struct kvm_vm *vm, uint64_t vaddr,
 {
        uint64_t *pml4e, *pdpe, *pde;
 
+       TEST_ASSERT(!vm->arch.is_pt_protected,
+                   "Walking page tables of protected guests is impossible");
+
        TEST_ASSERT(*level >= PG_LEVEL_NONE && *level < PG_LEVEL_NUM,
                    "Invalid PG_LEVEL_* '%d'", *level);
 
@@ -496,7 +513,7 @@ vm_paddr_t addr_arch_gva2gpa(struct kvm_vm *vm, vm_vaddr_t gva)
         * No need for a hugepage mask on the PTE, x86-64 requires the "unused"
         * address bits to be zero.
         */
-       return PTE_GET_PA(*pte) | (gva & ~HUGEPAGE_MASK(level));
+       return vm_untag_gpa(vm, PTE_GET_PA(*pte)) | (gva & ~HUGEPAGE_MASK(level));
 }
 
 static void kvm_setup_gdt(struct kvm_vm *vm, struct kvm_dtable *dt)
@@ -560,10 +577,23 @@ void kvm_arch_vm_post_create(struct kvm_vm *vm)
        vm_create_irqchip(vm);
        sync_global_to_guest(vm, host_cpu_is_intel);
        sync_global_to_guest(vm, host_cpu_is_amd);
+
+       if (vm->subtype == VM_SUBTYPE_SEV)
+               sev_vm_init(vm);
+       else if (vm->subtype == VM_SUBTYPE_SEV_ES)
+               sev_es_vm_init(vm);
+}
+
+void vcpu_arch_set_entry_point(struct kvm_vcpu *vcpu, void *guest_code)
+{
+       struct kvm_regs regs;
+
+       vcpu_regs_get(vcpu, &regs);
+       regs.rip = (unsigned long) guest_code;
+       vcpu_regs_set(vcpu, &regs);
 }
 
-struct kvm_vcpu *vm_arch_vcpu_add(struct kvm_vm *vm, uint32_t vcpu_id,
-                                 void *guest_code)
+struct kvm_vcpu *vm_arch_vcpu_add(struct kvm_vm *vm, uint32_t vcpu_id)
 {
        struct kvm_mp_state mp_state;
        struct kvm_regs regs;
@@ -597,7 +627,6 @@ struct kvm_vcpu *vm_arch_vcpu_add(struct kvm_vm *vm, uint32_t vcpu_id,
        vcpu_regs_get(vcpu, &regs);
        regs.rflags = regs.rflags | 0x2;
        regs.rsp = stack_vaddr;
-       regs.rip = (unsigned long) guest_code;
        vcpu_regs_set(vcpu, &regs);
 
        /* Setup the MP state */
@@ -752,12 +781,21 @@ void vcpu_init_cpuid(struct kvm_vcpu *vcpu, const struct kvm_cpuid2 *cpuid)
        vcpu_set_cpuid(vcpu);
 }
 
-void vcpu_set_cpuid_maxphyaddr(struct kvm_vcpu *vcpu, uint8_t maxphyaddr)
+void vcpu_set_cpuid_property(struct kvm_vcpu *vcpu,
+                            struct kvm_x86_cpu_property property,
+                            uint32_t value)
 {
-       struct kvm_cpuid_entry2 *entry = vcpu_get_cpuid_entry(vcpu, 0x80000008);
+       struct kvm_cpuid_entry2 *entry;
+
+       entry = __vcpu_get_cpuid_entry(vcpu, property.function, property.index);
+
+       (&entry->eax)[property.reg] &= ~GENMASK(property.hi_bit, property.lo_bit);
+       (&entry->eax)[property.reg] |= value << property.lo_bit;
 
-       entry->eax = (entry->eax & ~0xff) | maxphyaddr;
        vcpu_set_cpuid(vcpu);
+
+       /* Sanity check that @value doesn't exceed the bounds in any way. */
+       TEST_ASSERT_EQ(kvm_cpuid_property(vcpu->cpuid, property), value);
 }
 
 void vcpu_clear_cpuid_entry(struct kvm_vcpu *vcpu, uint32_t function)
@@ -1041,6 +1079,14 @@ void kvm_get_cpu_address_width(unsigned int *pa_bits, unsigned int *va_bits)
        }
 }
 
+void kvm_init_vm_address_properties(struct kvm_vm *vm)
+{
+       if (vm->subtype == VM_SUBTYPE_SEV || vm->subtype == VM_SUBTYPE_SEV_ES) {
+               vm->arch.c_bit = BIT_ULL(this_cpu_property(X86_PROPERTY_SEV_C_BIT));
+               vm->gpa_tag_mask = vm->arch.c_bit;
+       }
+}
+
 static void set_idt_entry(struct kvm_vm *vm, int vector, unsigned long addr,
                          int dpl, unsigned short selector)
 {
diff --git a/tools/testing/selftests/kvm/lib/x86_64/sev.c b/tools/testing/selftests/kvm/lib/x86_64/sev.c
new file mode 100644 (file)
index 0000000..e248d33
--- /dev/null
@@ -0,0 +1,114 @@
+// SPDX-License-Identifier: GPL-2.0-only
+#define _GNU_SOURCE /* for program_invocation_short_name */
+#include <stdint.h>
+#include <stdbool.h>
+
+#include "sev.h"
+
+/*
+ * sparsebit_next_clear() can return 0 if [x, 2**64-1] are all set, and the
+ * -1 would then cause an underflow back to 2**64 - 1. This is expected and
+ * correct.
+ *
+ * If the last range in the sparsebit is [x, y] and we try to iterate,
+ * sparsebit_next_set() will return 0, and sparsebit_next_clear() will try
+ * and find the first range, but that's correct because the condition
+ * expression would cause us to quit the loop.
+ */
+static void encrypt_region(struct kvm_vm *vm, struct userspace_mem_region *region)
+{
+       const struct sparsebit *protected_phy_pages = region->protected_phy_pages;
+       const vm_paddr_t gpa_base = region->region.guest_phys_addr;
+       const sparsebit_idx_t lowest_page_in_region = gpa_base >> vm->page_shift;
+       sparsebit_idx_t i, j;
+
+       if (!sparsebit_any_set(protected_phy_pages))
+               return;
+
+       sev_register_encrypted_memory(vm, region);
+
+       sparsebit_for_each_set_range(protected_phy_pages, i, j) {
+               const uint64_t size = (j - i + 1) * vm->page_size;
+               const uint64_t offset = (i - lowest_page_in_region) * vm->page_size;
+
+               sev_launch_update_data(vm, gpa_base + offset, size);
+       }
+}
+
+void sev_vm_launch(struct kvm_vm *vm, uint32_t policy)
+{
+       struct kvm_sev_launch_start launch_start = {
+               .policy = policy,
+       };
+       struct userspace_mem_region *region;
+       struct kvm_sev_guest_status status;
+       int ctr;
+
+       vm_sev_ioctl(vm, KVM_SEV_LAUNCH_START, &launch_start);
+       vm_sev_ioctl(vm, KVM_SEV_GUEST_STATUS, &status);
+
+       TEST_ASSERT_EQ(status.policy, policy);
+       TEST_ASSERT_EQ(status.state, SEV_GUEST_STATE_LAUNCH_UPDATE);
+
+       hash_for_each(vm->regions.slot_hash, ctr, region, slot_node)
+               encrypt_region(vm, region);
+
+       if (policy & SEV_POLICY_ES)
+               vm_sev_ioctl(vm, KVM_SEV_LAUNCH_UPDATE_VMSA, NULL);
+
+       vm->arch.is_pt_protected = true;
+}
+
+void sev_vm_launch_measure(struct kvm_vm *vm, uint8_t *measurement)
+{
+       struct kvm_sev_launch_measure launch_measure;
+       struct kvm_sev_guest_status guest_status;
+
+       launch_measure.len = 256;
+       launch_measure.uaddr = (__u64)measurement;
+       vm_sev_ioctl(vm, KVM_SEV_LAUNCH_MEASURE, &launch_measure);
+
+       vm_sev_ioctl(vm, KVM_SEV_GUEST_STATUS, &guest_status);
+       TEST_ASSERT_EQ(guest_status.state, SEV_GUEST_STATE_LAUNCH_SECRET);
+}
+
+void sev_vm_launch_finish(struct kvm_vm *vm)
+{
+       struct kvm_sev_guest_status status;
+
+       vm_sev_ioctl(vm, KVM_SEV_GUEST_STATUS, &status);
+       TEST_ASSERT(status.state == SEV_GUEST_STATE_LAUNCH_UPDATE ||
+                   status.state == SEV_GUEST_STATE_LAUNCH_SECRET,
+                   "Unexpected guest state: %d", status.state);
+
+       vm_sev_ioctl(vm, KVM_SEV_LAUNCH_FINISH, NULL);
+
+       vm_sev_ioctl(vm, KVM_SEV_GUEST_STATUS, &status);
+       TEST_ASSERT_EQ(status.state, SEV_GUEST_STATE_RUNNING);
+}
+
+struct kvm_vm *vm_sev_create_with_one_vcpu(uint32_t policy, void *guest_code,
+                                          struct kvm_vcpu **cpu)
+{
+       struct vm_shape shape = {
+               .type = VM_TYPE_DEFAULT,
+               .mode = VM_MODE_DEFAULT,
+               .subtype = policy & SEV_POLICY_ES ? VM_SUBTYPE_SEV_ES :
+                                                   VM_SUBTYPE_SEV,
+       };
+       struct kvm_vm *vm;
+       struct kvm_vcpu *cpus[1];
+       uint8_t measurement[512];
+
+       vm = __vm_create_with_vcpus(shape, 1, 0, guest_code, cpus);
+       *cpu = cpus[0];
+
+       sev_vm_launch(vm, policy);
+
+       /* TODO: Validate the measurement is as expected. */
+       sev_vm_launch_measure(vm, measurement);
+
+       sev_vm_launch_finish(vm);
+
+       return vm;
+}
diff --git a/tools/testing/selftests/kvm/riscv/arch_timer.c b/tools/testing/selftests/kvm/riscv/arch_timer.c
new file mode 100644 (file)
index 0000000..e22848f
--- /dev/null
@@ -0,0 +1,111 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * arch_timer.c - Tests the riscv64 sstc timer IRQ functionality
+ *
+ * The test validates the sstc timer IRQs using vstimecmp registers.
+ * It's ported from the aarch64 arch_timer test.
+ *
+ * Copyright (c) 2024, Intel Corporation.
+ */
+
+#define _GNU_SOURCE
+
+#include "arch_timer.h"
+#include "kvm_util.h"
+#include "processor.h"
+#include "timer_test.h"
+
+static int timer_irq = IRQ_S_TIMER;
+
+static void guest_irq_handler(struct ex_regs *regs)
+{
+       uint64_t xcnt, xcnt_diff_us, cmp;
+       unsigned int intid = regs->cause & ~CAUSE_IRQ_FLAG;
+       uint32_t cpu = guest_get_vcpuid();
+       struct test_vcpu_shared_data *shared_data = &vcpu_shared_data[cpu];
+
+       timer_irq_disable();
+
+       xcnt = timer_get_cycles();
+       cmp = timer_get_cmp();
+       xcnt_diff_us = cycles_to_usec(xcnt - shared_data->xcnt);
+
+       /* Make sure we are dealing with the correct timer IRQ */
+       GUEST_ASSERT_EQ(intid, timer_irq);
+
+       __GUEST_ASSERT(xcnt >= cmp,
+                       "xcnt = 0x%"PRIx64", cmp = 0x%"PRIx64", xcnt_diff_us = 0x%" PRIx64,
+                       xcnt, cmp, xcnt_diff_us);
+
+       WRITE_ONCE(shared_data->nr_iter, shared_data->nr_iter + 1);
+}
+
+static void guest_run(struct test_vcpu_shared_data *shared_data)
+{
+       uint32_t irq_iter, config_iter;
+
+       shared_data->nr_iter = 0;
+       shared_data->guest_stage = 0;
+
+       for (config_iter = 0; config_iter < test_args.nr_iter; config_iter++) {
+               /* Setup the next interrupt */
+               timer_set_next_cmp_ms(test_args.timer_period_ms);
+               shared_data->xcnt = timer_get_cycles();
+               timer_irq_enable();
+
+               /* Setup a timeout for the interrupt to arrive */
+               udelay(msecs_to_usecs(test_args.timer_period_ms) +
+                       test_args.timer_err_margin_us);
+
+               irq_iter = READ_ONCE(shared_data->nr_iter);
+               __GUEST_ASSERT(config_iter + 1 == irq_iter,
+                               "config_iter + 1 = 0x%x, irq_iter = 0x%x.\n"
+                               "  Guest timer interrupt was not trigged within the specified\n"
+                               "  interval, try to increase the error margin by [-e] option.\n",
+                               config_iter + 1, irq_iter);
+       }
+}
+
+static void guest_code(void)
+{
+       uint32_t cpu = guest_get_vcpuid();
+       struct test_vcpu_shared_data *shared_data = &vcpu_shared_data[cpu];
+
+       timer_irq_disable();
+       local_irq_enable();
+
+       guest_run(shared_data);
+
+       GUEST_DONE();
+}
+
+struct kvm_vm *test_vm_create(void)
+{
+       struct kvm_vm *vm;
+       int nr_vcpus = test_args.nr_vcpus;
+
+       vm = vm_create_with_vcpus(nr_vcpus, guest_code, vcpus);
+       __TEST_REQUIRE(__vcpu_has_ext(vcpus[0], RISCV_ISA_EXT_REG(KVM_RISCV_ISA_EXT_SSTC)),
+                                  "SSTC not available, skipping test\n");
+
+       vm_init_vector_tables(vm);
+       vm_install_interrupt_handler(vm, guest_irq_handler);
+
+       for (int i = 0; i < nr_vcpus; i++)
+               vcpu_init_vector_tables(vcpus[i]);
+
+       /* Initialize guest timer frequency. */
+       vcpu_get_reg(vcpus[0], RISCV_TIMER_REG(frequency), &timer_freq);
+       sync_global_to_guest(vm, timer_freq);
+       pr_debug("timer_freq: %lu\n", timer_freq);
+
+       /* Make all the test's cmdline args visible to the guest */
+       sync_global_to_guest(vm, test_args);
+
+       return vm;
+}
+
+void test_vm_cleanup(struct kvm_vm *vm)
+{
+       kvm_vm_free(vm);
+}
index 6435e7a6564252fae5e1cbd2275a6fc0c3a7f12f..b882b7b9b78506b04d60a2e6870e036272584f87 100644 (file)
@@ -47,6 +47,7 @@ bool filter_reg(__u64 reg)
        case KVM_REG_RISCV_ISA_EXT | KVM_REG_RISCV_ISA_SINGLE | KVM_RISCV_ISA_EXT_SVINVAL:
        case KVM_REG_RISCV_ISA_EXT | KVM_REG_RISCV_ISA_SINGLE | KVM_RISCV_ISA_EXT_SVNAPOT:
        case KVM_REG_RISCV_ISA_EXT | KVM_REG_RISCV_ISA_SINGLE | KVM_RISCV_ISA_EXT_SVPBMT:
+       case KVM_REG_RISCV_ISA_EXT | KVM_REG_RISCV_ISA_SINGLE | KVM_RISCV_ISA_EXT_ZACAS:
        case KVM_REG_RISCV_ISA_EXT | KVM_REG_RISCV_ISA_SINGLE | KVM_RISCV_ISA_EXT_ZBA:
        case KVM_REG_RISCV_ISA_EXT | KVM_REG_RISCV_ISA_SINGLE | KVM_RISCV_ISA_EXT_ZBB:
        case KVM_REG_RISCV_ISA_EXT | KVM_REG_RISCV_ISA_SINGLE | KVM_RISCV_ISA_EXT_ZBC:
@@ -73,6 +74,7 @@ bool filter_reg(__u64 reg)
        case KVM_REG_RISCV_ISA_EXT | KVM_REG_RISCV_ISA_SINGLE | KVM_RISCV_ISA_EXT_ZKSED:
        case KVM_REG_RISCV_ISA_EXT | KVM_REG_RISCV_ISA_SINGLE | KVM_RISCV_ISA_EXT_ZKSH:
        case KVM_REG_RISCV_ISA_EXT | KVM_REG_RISCV_ISA_SINGLE | KVM_RISCV_ISA_EXT_ZKT:
+       case KVM_REG_RISCV_ISA_EXT | KVM_REG_RISCV_ISA_SINGLE | KVM_RISCV_ISA_EXT_ZTSO:
        case KVM_REG_RISCV_ISA_EXT | KVM_REG_RISCV_ISA_SINGLE | KVM_RISCV_ISA_EXT_ZVBB:
        case KVM_REG_RISCV_ISA_EXT | KVM_REG_RISCV_ISA_SINGLE | KVM_RISCV_ISA_EXT_ZVBC:
        case KVM_REG_RISCV_ISA_EXT | KVM_REG_RISCV_ISA_SINGLE | KVM_RISCV_ISA_EXT_ZVFH:
@@ -123,15 +125,6 @@ bool check_reject_set(int err)
        return err == EINVAL;
 }
 
-static bool vcpu_has_ext(struct kvm_vcpu *vcpu, uint64_t ext_id)
-{
-       int ret;
-       unsigned long value;
-
-       ret = __vcpu_get_reg(vcpu, ext_id, &value);
-       return (ret) ? false : !!value;
-}
-
 void finalize_vcpu(struct kvm_vcpu *vcpu, struct vcpu_reg_list *c)
 {
        unsigned long isa_ext_state[KVM_RISCV_ISA_EXT_MAX] = { 0 };
@@ -176,7 +169,7 @@ void finalize_vcpu(struct kvm_vcpu *vcpu, struct vcpu_reg_list *c)
                __vcpu_set_reg(vcpu, feature, 1);
 
                /* Double check whether the desired extension was enabled */
-               __TEST_REQUIRE(vcpu_has_ext(vcpu, feature),
+               __TEST_REQUIRE(__vcpu_has_ext(vcpu, feature),
                               "%s not available, skipping tests", s->name);
        }
 }
@@ -419,6 +412,7 @@ static const char *isa_ext_single_id_to_str(__u64 reg_off)
                KVM_ISA_EXT_ARR(SVINVAL),
                KVM_ISA_EXT_ARR(SVNAPOT),
                KVM_ISA_EXT_ARR(SVPBMT),
+               KVM_ISA_EXT_ARR(ZACAS),
                KVM_ISA_EXT_ARR(ZBA),
                KVM_ISA_EXT_ARR(ZBB),
                KVM_ISA_EXT_ARR(ZBC),
@@ -445,6 +439,7 @@ static const char *isa_ext_single_id_to_str(__u64 reg_off)
                KVM_ISA_EXT_ARR(ZKSED),
                KVM_ISA_EXT_ARR(ZKSH),
                KVM_ISA_EXT_ARR(ZKT),
+               KVM_ISA_EXT_ARR(ZTSO),
                KVM_ISA_EXT_ARR(ZVBB),
                KVM_ISA_EXT_ARR(ZVBC),
                KVM_ISA_EXT_ARR(ZVFH),
@@ -940,6 +935,7 @@ KVM_ISA_EXT_SIMPLE_CONFIG(sstc, SSTC);
 KVM_ISA_EXT_SIMPLE_CONFIG(svinval, SVINVAL);
 KVM_ISA_EXT_SIMPLE_CONFIG(svnapot, SVNAPOT);
 KVM_ISA_EXT_SIMPLE_CONFIG(svpbmt, SVPBMT);
+KVM_ISA_EXT_SIMPLE_CONFIG(zacas, ZACAS);
 KVM_ISA_EXT_SIMPLE_CONFIG(zba, ZBA);
 KVM_ISA_EXT_SIMPLE_CONFIG(zbb, ZBB);
 KVM_ISA_EXT_SIMPLE_CONFIG(zbc, ZBC);
@@ -966,6 +962,7 @@ KVM_ISA_EXT_SIMPLE_CONFIG(zkr, ZKR);
 KVM_ISA_EXT_SIMPLE_CONFIG(zksed, ZKSED);
 KVM_ISA_EXT_SIMPLE_CONFIG(zksh, ZKSH);
 KVM_ISA_EXT_SIMPLE_CONFIG(zkt, ZKT);
+KVM_ISA_EXT_SIMPLE_CONFIG(ztso, ZTSO);
 KVM_ISA_EXT_SIMPLE_CONFIG(zvbb, ZVBB);
 KVM_ISA_EXT_SIMPLE_CONFIG(zvbc, ZVBC);
 KVM_ISA_EXT_SIMPLE_CONFIG(zvfh, ZVFH);
@@ -993,6 +990,7 @@ struct vcpu_reg_list *vcpu_configs[] = {
        &config_svinval,
        &config_svnapot,
        &config_svpbmt,
+       &config_zacas,
        &config_zba,
        &config_zbb,
        &config_zbc,
@@ -1019,6 +1017,7 @@ struct vcpu_reg_list *vcpu_configs[] = {
        &config_zksed,
        &config_zksh,
        &config_zkt,
+       &config_ztso,
        &config_zvbb,
        &config_zvbc,
        &config_zvfh,
index b6da8f71ea191e82a3cc68632bf61b6938f60c73..48cb910e660de08810cbf1324ebce571eb2e7d6a 100644 (file)
@@ -515,6 +515,8 @@ static __uint128_t rotate(int size, __uint128_t val, int amount)
 
        amount = (amount + bits) % bits;
        val = cut_to_size(size, val);
+       if (!amount)
+               return val;
        return (val << (bits - amount)) | (val >> amount);
 }
 
index 0f728f05ea82f7dc8051ed812cfde79639ae2385..f3c2239228b10e3ba7cbfe9f8406cfbbe5f58825 100644 (file)
@@ -9,6 +9,7 @@
 #include <linux/stringify.h>
 #include <stdint.h>
 
+#include "kvm_test_harness.h"
 #include "apic.h"
 #include "test_util.h"
 #include "kvm_util.h"
@@ -83,6 +84,8 @@ static void guest_main(void)
        GUEST_DONE();
 }
 
+KVM_ONE_VCPU_TEST_SUITE(fix_hypercall);
+
 static void enter_guest(struct kvm_vcpu *vcpu)
 {
        struct kvm_run *run = vcpu->run;
@@ -103,14 +106,11 @@ static void enter_guest(struct kvm_vcpu *vcpu)
        }
 }
 
-static void test_fix_hypercall(bool disable_quirk)
+static void test_fix_hypercall(struct kvm_vcpu *vcpu, bool disable_quirk)
 {
-       struct kvm_vcpu *vcpu;
-       struct kvm_vm *vm;
-
-       vm = vm_create_with_one_vcpu(&vcpu, guest_main);
+       struct kvm_vm *vm = vcpu->vm;
 
-       vm_init_descriptor_tables(vcpu->vm);
+       vm_init_descriptor_tables(vm);
        vcpu_init_descriptor_tables(vcpu);
        vm_install_exception_handler(vcpu->vm, UD_VECTOR, guest_ud_handler);
 
@@ -126,10 +126,19 @@ static void test_fix_hypercall(bool disable_quirk)
        enter_guest(vcpu);
 }
 
-int main(void)
+KVM_ONE_VCPU_TEST(fix_hypercall, enable_quirk, guest_main)
+{
+       test_fix_hypercall(vcpu, false);
+}
+
+KVM_ONE_VCPU_TEST(fix_hypercall, disable_quirk, guest_main)
+{
+       test_fix_hypercall(vcpu, true);
+}
+
+int main(int argc, char *argv[])
 {
        TEST_REQUIRE(kvm_check_cap(KVM_CAP_DISABLE_QUIRKS2) & KVM_X86_QUIRK_FIX_HYPERCALL_INSN);
 
-       test_fix_hypercall(false);
-       test_fix_hypercall(true);
+       return test_harness_run(argc, argv);
 }
diff --git a/tools/testing/selftests/kvm/x86_64/pmu_counters_test.c b/tools/testing/selftests/kvm/x86_64/pmu_counters_test.c
new file mode 100644 (file)
index 0000000..29609b5
--- /dev/null
@@ -0,0 +1,620 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Copyright (C) 2023, Tencent, Inc.
+ */
+
+#define _GNU_SOURCE /* for program_invocation_short_name */
+#include <x86intrin.h>
+
+#include "pmu.h"
+#include "processor.h"
+
+/* Number of LOOP instructions for the guest measurement payload. */
+#define NUM_BRANCHES           10
+/*
+ * Number of "extra" instructions that will be counted, i.e. the number of
+ * instructions that are needed to set up the loop and then disabled the
+ * counter.  1 CLFLUSH/CLFLUSHOPT/NOP, 1 MFENCE, 2 MOV, 2 XOR, 1 WRMSR.
+ */
+#define NUM_EXTRA_INSNS                7
+#define NUM_INSNS_RETIRED      (NUM_BRANCHES + NUM_EXTRA_INSNS)
+
+static uint8_t kvm_pmu_version;
+static bool kvm_has_perf_caps;
+static bool is_forced_emulation_enabled;
+
+static struct kvm_vm *pmu_vm_create_with_one_vcpu(struct kvm_vcpu **vcpu,
+                                                 void *guest_code,
+                                                 uint8_t pmu_version,
+                                                 uint64_t perf_capabilities)
+{
+       struct kvm_vm *vm;
+
+       vm = vm_create_with_one_vcpu(vcpu, guest_code);
+       vm_init_descriptor_tables(vm);
+       vcpu_init_descriptor_tables(*vcpu);
+
+       sync_global_to_guest(vm, kvm_pmu_version);
+       sync_global_to_guest(vm, is_forced_emulation_enabled);
+
+       /*
+        * Set PERF_CAPABILITIES before PMU version as KVM disallows enabling
+        * features via PERF_CAPABILITIES if the guest doesn't have a vPMU.
+        */
+       if (kvm_has_perf_caps)
+               vcpu_set_msr(*vcpu, MSR_IA32_PERF_CAPABILITIES, perf_capabilities);
+
+       vcpu_set_cpuid_property(*vcpu, X86_PROPERTY_PMU_VERSION, pmu_version);
+       return vm;
+}
+
+static void run_vcpu(struct kvm_vcpu *vcpu)
+{
+       struct ucall uc;
+
+       do {
+               vcpu_run(vcpu);
+               switch (get_ucall(vcpu, &uc)) {
+               case UCALL_SYNC:
+                       break;
+               case UCALL_ABORT:
+                       REPORT_GUEST_ASSERT(uc);
+                       break;
+               case UCALL_PRINTF:
+                       pr_info("%s", uc.buffer);
+                       break;
+               case UCALL_DONE:
+                       break;
+               default:
+                       TEST_FAIL("Unexpected ucall: %lu", uc.cmd);
+               }
+       } while (uc.cmd != UCALL_DONE);
+}
+
+static uint8_t guest_get_pmu_version(void)
+{
+       /*
+        * Return the effective PMU version, i.e. the minimum between what KVM
+        * supports and what is enumerated to the guest.  The host deliberately
+        * advertises a PMU version to the guest beyond what is actually
+        * supported by KVM to verify KVM doesn't freak out and do something
+        * bizarre with an architecturally valid, but unsupported, version.
+        */
+       return min_t(uint8_t, kvm_pmu_version, this_cpu_property(X86_PROPERTY_PMU_VERSION));
+}
+
+/*
+ * If an architectural event is supported and guaranteed to generate at least
+ * one "hit, assert that its count is non-zero.  If an event isn't supported or
+ * the test can't guarantee the associated action will occur, then all bets are
+ * off regarding the count, i.e. no checks can be done.
+ *
+ * Sanity check that in all cases, the event doesn't count when it's disabled,
+ * and that KVM correctly emulates the write of an arbitrary value.
+ */
+static void guest_assert_event_count(uint8_t idx,
+                                    struct kvm_x86_pmu_feature event,
+                                    uint32_t pmc, uint32_t pmc_msr)
+{
+       uint64_t count;
+
+       count = _rdpmc(pmc);
+       if (!this_pmu_has(event))
+               goto sanity_checks;
+
+       switch (idx) {
+       case INTEL_ARCH_INSTRUCTIONS_RETIRED_INDEX:
+               GUEST_ASSERT_EQ(count, NUM_INSNS_RETIRED);
+               break;
+       case INTEL_ARCH_BRANCHES_RETIRED_INDEX:
+               GUEST_ASSERT_EQ(count, NUM_BRANCHES);
+               break;
+       case INTEL_ARCH_LLC_REFERENCES_INDEX:
+       case INTEL_ARCH_LLC_MISSES_INDEX:
+               if (!this_cpu_has(X86_FEATURE_CLFLUSHOPT) &&
+                   !this_cpu_has(X86_FEATURE_CLFLUSH))
+                       break;
+               fallthrough;
+       case INTEL_ARCH_CPU_CYCLES_INDEX:
+       case INTEL_ARCH_REFERENCE_CYCLES_INDEX:
+               GUEST_ASSERT_NE(count, 0);
+               break;
+       case INTEL_ARCH_TOPDOWN_SLOTS_INDEX:
+               GUEST_ASSERT(count >= NUM_INSNS_RETIRED);
+               break;
+       default:
+               break;
+       }
+
+sanity_checks:
+       __asm__ __volatile__("loop ." : "+c"((int){NUM_BRANCHES}));
+       GUEST_ASSERT_EQ(_rdpmc(pmc), count);
+
+       wrmsr(pmc_msr, 0xdead);
+       GUEST_ASSERT_EQ(_rdpmc(pmc), 0xdead);
+}
+
+/*
+ * Enable and disable the PMC in a monolithic asm blob to ensure that the
+ * compiler can't insert _any_ code into the measured sequence.  Note, ECX
+ * doesn't need to be clobbered as the input value, @pmc_msr, is restored
+ * before the end of the sequence.
+ *
+ * If CLFUSH{,OPT} is supported, flush the cacheline containing (at least) the
+ * start of the loop to force LLC references and misses, i.e. to allow testing
+ * that those events actually count.
+ *
+ * If forced emulation is enabled (and specified), force emulation on a subset
+ * of the measured code to verify that KVM correctly emulates instructions and
+ * branches retired events in conjunction with hardware also counting said
+ * events.
+ */
+#define GUEST_MEASURE_EVENT(_msr, _value, clflush, FEP)                                \
+do {                                                                           \
+       __asm__ __volatile__("wrmsr\n\t"                                        \
+                            clflush "\n\t"                                     \
+                            "mfence\n\t"                                       \
+                            "1: mov $" __stringify(NUM_BRANCHES) ", %%ecx\n\t" \
+                            FEP "loop .\n\t"                                   \
+                            FEP "mov %%edi, %%ecx\n\t"                         \
+                            FEP "xor %%eax, %%eax\n\t"                         \
+                            FEP "xor %%edx, %%edx\n\t"                         \
+                            "wrmsr\n\t"                                        \
+                            :: "a"((uint32_t)_value), "d"(_value >> 32),       \
+                               "c"(_msr), "D"(_msr)                            \
+       );                                                                      \
+} while (0)
+
+#define GUEST_TEST_EVENT(_idx, _event, _pmc, _pmc_msr, _ctrl_msr, _value, FEP) \
+do {                                                                           \
+       wrmsr(pmc_msr, 0);                                                      \
+                                                                               \
+       if (this_cpu_has(X86_FEATURE_CLFLUSHOPT))                               \
+               GUEST_MEASURE_EVENT(_ctrl_msr, _value, "clflushopt 1f", FEP);   \
+       else if (this_cpu_has(X86_FEATURE_CLFLUSH))                             \
+               GUEST_MEASURE_EVENT(_ctrl_msr, _value, "clflush 1f", FEP);      \
+       else                                                                    \
+               GUEST_MEASURE_EVENT(_ctrl_msr, _value, "nop", FEP);             \
+                                                                               \
+       guest_assert_event_count(_idx, _event, _pmc, _pmc_msr);                 \
+} while (0)
+
+static void __guest_test_arch_event(uint8_t idx, struct kvm_x86_pmu_feature event,
+                                   uint32_t pmc, uint32_t pmc_msr,
+                                   uint32_t ctrl_msr, uint64_t ctrl_msr_value)
+{
+       GUEST_TEST_EVENT(idx, event, pmc, pmc_msr, ctrl_msr, ctrl_msr_value, "");
+
+       if (is_forced_emulation_enabled)
+               GUEST_TEST_EVENT(idx, event, pmc, pmc_msr, ctrl_msr, ctrl_msr_value, KVM_FEP);
+}
+
+#define X86_PMU_FEATURE_NULL                                           \
+({                                                                     \
+       struct kvm_x86_pmu_feature feature = {};                        \
+                                                                       \
+       feature;                                                        \
+})
+
+static bool pmu_is_null_feature(struct kvm_x86_pmu_feature event)
+{
+       return !(*(u64 *)&event);
+}
+
+static void guest_test_arch_event(uint8_t idx)
+{
+       const struct {
+               struct kvm_x86_pmu_feature gp_event;
+               struct kvm_x86_pmu_feature fixed_event;
+       } intel_event_to_feature[] = {
+               [INTEL_ARCH_CPU_CYCLES_INDEX]            = { X86_PMU_FEATURE_CPU_CYCLES, X86_PMU_FEATURE_CPU_CYCLES_FIXED },
+               [INTEL_ARCH_INSTRUCTIONS_RETIRED_INDEX]  = { X86_PMU_FEATURE_INSNS_RETIRED, X86_PMU_FEATURE_INSNS_RETIRED_FIXED },
+               /*
+                * Note, the fixed counter for reference cycles is NOT the same
+                * as the general purpose architectural event.  The fixed counter
+                * explicitly counts at the same frequency as the TSC, whereas
+                * the GP event counts at a fixed, but uarch specific, frequency.
+                * Bundle them here for simplicity.
+                */
+               [INTEL_ARCH_REFERENCE_CYCLES_INDEX]      = { X86_PMU_FEATURE_REFERENCE_CYCLES, X86_PMU_FEATURE_REFERENCE_TSC_CYCLES_FIXED },
+               [INTEL_ARCH_LLC_REFERENCES_INDEX]        = { X86_PMU_FEATURE_LLC_REFERENCES, X86_PMU_FEATURE_NULL },
+               [INTEL_ARCH_LLC_MISSES_INDEX]            = { X86_PMU_FEATURE_LLC_MISSES, X86_PMU_FEATURE_NULL },
+               [INTEL_ARCH_BRANCHES_RETIRED_INDEX]      = { X86_PMU_FEATURE_BRANCH_INSNS_RETIRED, X86_PMU_FEATURE_NULL },
+               [INTEL_ARCH_BRANCHES_MISPREDICTED_INDEX] = { X86_PMU_FEATURE_BRANCHES_MISPREDICTED, X86_PMU_FEATURE_NULL },
+               [INTEL_ARCH_TOPDOWN_SLOTS_INDEX]         = { X86_PMU_FEATURE_TOPDOWN_SLOTS, X86_PMU_FEATURE_TOPDOWN_SLOTS_FIXED },
+       };
+
+       uint32_t nr_gp_counters = this_cpu_property(X86_PROPERTY_PMU_NR_GP_COUNTERS);
+       uint32_t pmu_version = guest_get_pmu_version();
+       /* PERF_GLOBAL_CTRL exists only for Architectural PMU Version 2+. */
+       bool guest_has_perf_global_ctrl = pmu_version >= 2;
+       struct kvm_x86_pmu_feature gp_event, fixed_event;
+       uint32_t base_pmc_msr;
+       unsigned int i;
+
+       /* The host side shouldn't invoke this without a guest PMU. */
+       GUEST_ASSERT(pmu_version);
+
+       if (this_cpu_has(X86_FEATURE_PDCM) &&
+           rdmsr(MSR_IA32_PERF_CAPABILITIES) & PMU_CAP_FW_WRITES)
+               base_pmc_msr = MSR_IA32_PMC0;
+       else
+               base_pmc_msr = MSR_IA32_PERFCTR0;
+
+       gp_event = intel_event_to_feature[idx].gp_event;
+       GUEST_ASSERT_EQ(idx, gp_event.f.bit);
+
+       GUEST_ASSERT(nr_gp_counters);
+
+       for (i = 0; i < nr_gp_counters; i++) {
+               uint64_t eventsel = ARCH_PERFMON_EVENTSEL_OS |
+                                   ARCH_PERFMON_EVENTSEL_ENABLE |
+                                   intel_pmu_arch_events[idx];
+
+               wrmsr(MSR_P6_EVNTSEL0 + i, 0);
+               if (guest_has_perf_global_ctrl)
+                       wrmsr(MSR_CORE_PERF_GLOBAL_CTRL, BIT_ULL(i));
+
+               __guest_test_arch_event(idx, gp_event, i, base_pmc_msr + i,
+                                       MSR_P6_EVNTSEL0 + i, eventsel);
+       }
+
+       if (!guest_has_perf_global_ctrl)
+               return;
+
+       fixed_event = intel_event_to_feature[idx].fixed_event;
+       if (pmu_is_null_feature(fixed_event) || !this_pmu_has(fixed_event))
+               return;
+
+       i = fixed_event.f.bit;
+
+       wrmsr(MSR_CORE_PERF_FIXED_CTR_CTRL, FIXED_PMC_CTRL(i, FIXED_PMC_KERNEL));
+
+       __guest_test_arch_event(idx, fixed_event, i | INTEL_RDPMC_FIXED,
+                               MSR_CORE_PERF_FIXED_CTR0 + i,
+                               MSR_CORE_PERF_GLOBAL_CTRL,
+                               FIXED_PMC_GLOBAL_CTRL_ENABLE(i));
+}
+
+static void guest_test_arch_events(void)
+{
+       uint8_t i;
+
+       for (i = 0; i < NR_INTEL_ARCH_EVENTS; i++)
+               guest_test_arch_event(i);
+
+       GUEST_DONE();
+}
+
+static void test_arch_events(uint8_t pmu_version, uint64_t perf_capabilities,
+                            uint8_t length, uint8_t unavailable_mask)
+{
+       struct kvm_vcpu *vcpu;
+       struct kvm_vm *vm;
+
+       /* Testing arch events requires a vPMU (there are no negative tests). */
+       if (!pmu_version)
+               return;
+
+       vm = pmu_vm_create_with_one_vcpu(&vcpu, guest_test_arch_events,
+                                        pmu_version, perf_capabilities);
+
+       vcpu_set_cpuid_property(vcpu, X86_PROPERTY_PMU_EBX_BIT_VECTOR_LENGTH,
+                               length);
+       vcpu_set_cpuid_property(vcpu, X86_PROPERTY_PMU_EVENTS_MASK,
+                               unavailable_mask);
+
+       run_vcpu(vcpu);
+
+       kvm_vm_free(vm);
+}
+
+/*
+ * Limit testing to MSRs that are actually defined by Intel (in the SDM).  MSRs
+ * that aren't defined counter MSRs *probably* don't exist, but there's no
+ * guarantee that currently undefined MSR indices won't be used for something
+ * other than PMCs in the future.
+ */
+#define MAX_NR_GP_COUNTERS     8
+#define MAX_NR_FIXED_COUNTERS  3
+
+#define GUEST_ASSERT_PMC_MSR_ACCESS(insn, msr, expect_gp, vector)              \
+__GUEST_ASSERT(expect_gp ? vector == GP_VECTOR : !vector,                      \
+              "Expected %s on " #insn "(0x%x), got vector %u",                 \
+              expect_gp ? "#GP" : "no fault", msr, vector)                     \
+
+#define GUEST_ASSERT_PMC_VALUE(insn, msr, val, expected)                       \
+       __GUEST_ASSERT(val == expected_val,                                     \
+                      "Expected " #insn "(0x%x) to yield 0x%lx, got 0x%lx",    \
+                      msr, expected_val, val);
+
+static void guest_test_rdpmc(uint32_t rdpmc_idx, bool expect_success,
+                            uint64_t expected_val)
+{
+       uint8_t vector;
+       uint64_t val;
+
+       vector = rdpmc_safe(rdpmc_idx, &val);
+       GUEST_ASSERT_PMC_MSR_ACCESS(RDPMC, rdpmc_idx, !expect_success, vector);
+       if (expect_success)
+               GUEST_ASSERT_PMC_VALUE(RDPMC, rdpmc_idx, val, expected_val);
+
+       if (!is_forced_emulation_enabled)
+               return;
+
+       vector = rdpmc_safe_fep(rdpmc_idx, &val);
+       GUEST_ASSERT_PMC_MSR_ACCESS(RDPMC, rdpmc_idx, !expect_success, vector);
+       if (expect_success)
+               GUEST_ASSERT_PMC_VALUE(RDPMC, rdpmc_idx, val, expected_val);
+}
+
+static void guest_rd_wr_counters(uint32_t base_msr, uint8_t nr_possible_counters,
+                                uint8_t nr_counters, uint32_t or_mask)
+{
+       const bool pmu_has_fast_mode = !guest_get_pmu_version();
+       uint8_t i;
+
+       for (i = 0; i < nr_possible_counters; i++) {
+               /*
+                * TODO: Test a value that validates full-width writes and the
+                * width of the counters.
+                */
+               const uint64_t test_val = 0xffff;
+               const uint32_t msr = base_msr + i;
+
+               /*
+                * Fixed counters are supported if the counter is less than the
+                * number of enumerated contiguous counters *or* the counter is
+                * explicitly enumerated in the supported counters mask.
+                */
+               const bool expect_success = i < nr_counters || (or_mask & BIT(i));
+
+               /*
+                * KVM drops writes to MSR_P6_PERFCTR[0|1] if the counters are
+                * unsupported, i.e. doesn't #GP and reads back '0'.
+                */
+               const uint64_t expected_val = expect_success ? test_val : 0;
+               const bool expect_gp = !expect_success && msr != MSR_P6_PERFCTR0 &&
+                                      msr != MSR_P6_PERFCTR1;
+               uint32_t rdpmc_idx;
+               uint8_t vector;
+               uint64_t val;
+
+               vector = wrmsr_safe(msr, test_val);
+               GUEST_ASSERT_PMC_MSR_ACCESS(WRMSR, msr, expect_gp, vector);
+
+               vector = rdmsr_safe(msr, &val);
+               GUEST_ASSERT_PMC_MSR_ACCESS(RDMSR, msr, expect_gp, vector);
+
+               /* On #GP, the result of RDMSR is undefined. */
+               if (!expect_gp)
+                       GUEST_ASSERT_PMC_VALUE(RDMSR, msr, val, expected_val);
+
+               /*
+                * Redo the read tests with RDPMC, which has different indexing
+                * semantics and additional capabilities.
+                */
+               rdpmc_idx = i;
+               if (base_msr == MSR_CORE_PERF_FIXED_CTR0)
+                       rdpmc_idx |= INTEL_RDPMC_FIXED;
+
+               guest_test_rdpmc(rdpmc_idx, expect_success, expected_val);
+
+               /*
+                * KVM doesn't support non-architectural PMUs, i.e. it should
+                * impossible to have fast mode RDPMC.  Verify that attempting
+                * to use fast RDPMC always #GPs.
+                */
+               GUEST_ASSERT(!expect_success || !pmu_has_fast_mode);
+               rdpmc_idx |= INTEL_RDPMC_FAST;
+               guest_test_rdpmc(rdpmc_idx, false, -1ull);
+
+               vector = wrmsr_safe(msr, 0);
+               GUEST_ASSERT_PMC_MSR_ACCESS(WRMSR, msr, expect_gp, vector);
+       }
+}
+
+static void guest_test_gp_counters(void)
+{
+       uint8_t nr_gp_counters = 0;
+       uint32_t base_msr;
+
+       if (guest_get_pmu_version())
+               nr_gp_counters = this_cpu_property(X86_PROPERTY_PMU_NR_GP_COUNTERS);
+
+       if (this_cpu_has(X86_FEATURE_PDCM) &&
+           rdmsr(MSR_IA32_PERF_CAPABILITIES) & PMU_CAP_FW_WRITES)
+               base_msr = MSR_IA32_PMC0;
+       else
+               base_msr = MSR_IA32_PERFCTR0;
+
+       guest_rd_wr_counters(base_msr, MAX_NR_GP_COUNTERS, nr_gp_counters, 0);
+       GUEST_DONE();
+}
+
+static void test_gp_counters(uint8_t pmu_version, uint64_t perf_capabilities,
+                            uint8_t nr_gp_counters)
+{
+       struct kvm_vcpu *vcpu;
+       struct kvm_vm *vm;
+
+       vm = pmu_vm_create_with_one_vcpu(&vcpu, guest_test_gp_counters,
+                                        pmu_version, perf_capabilities);
+
+       vcpu_set_cpuid_property(vcpu, X86_PROPERTY_PMU_NR_GP_COUNTERS,
+                               nr_gp_counters);
+
+       run_vcpu(vcpu);
+
+       kvm_vm_free(vm);
+}
+
+static void guest_test_fixed_counters(void)
+{
+       uint64_t supported_bitmask = 0;
+       uint8_t nr_fixed_counters = 0;
+       uint8_t i;
+
+       /* Fixed counters require Architectural vPMU Version 2+. */
+       if (guest_get_pmu_version() >= 2)
+               nr_fixed_counters = this_cpu_property(X86_PROPERTY_PMU_NR_FIXED_COUNTERS);
+
+       /*
+        * The supported bitmask for fixed counters was introduced in PMU
+        * version 5.
+        */
+       if (guest_get_pmu_version() >= 5)
+               supported_bitmask = this_cpu_property(X86_PROPERTY_PMU_FIXED_COUNTERS_BITMASK);
+
+       guest_rd_wr_counters(MSR_CORE_PERF_FIXED_CTR0, MAX_NR_FIXED_COUNTERS,
+                            nr_fixed_counters, supported_bitmask);
+
+       for (i = 0; i < MAX_NR_FIXED_COUNTERS; i++) {
+               uint8_t vector;
+               uint64_t val;
+
+               if (i >= nr_fixed_counters && !(supported_bitmask & BIT_ULL(i))) {
+                       vector = wrmsr_safe(MSR_CORE_PERF_FIXED_CTR_CTRL,
+                                           FIXED_PMC_CTRL(i, FIXED_PMC_KERNEL));
+                       __GUEST_ASSERT(vector == GP_VECTOR,
+                                      "Expected #GP for counter %u in FIXED_CTR_CTRL", i);
+
+                       vector = wrmsr_safe(MSR_CORE_PERF_GLOBAL_CTRL,
+                                           FIXED_PMC_GLOBAL_CTRL_ENABLE(i));
+                       __GUEST_ASSERT(vector == GP_VECTOR,
+                                      "Expected #GP for counter %u in PERF_GLOBAL_CTRL", i);
+                       continue;
+               }
+
+               wrmsr(MSR_CORE_PERF_FIXED_CTR0 + i, 0);
+               wrmsr(MSR_CORE_PERF_FIXED_CTR_CTRL, FIXED_PMC_CTRL(i, FIXED_PMC_KERNEL));
+               wrmsr(MSR_CORE_PERF_GLOBAL_CTRL, FIXED_PMC_GLOBAL_CTRL_ENABLE(i));
+               __asm__ __volatile__("loop ." : "+c"((int){NUM_BRANCHES}));
+               wrmsr(MSR_CORE_PERF_GLOBAL_CTRL, 0);
+               val = rdmsr(MSR_CORE_PERF_FIXED_CTR0 + i);
+
+               GUEST_ASSERT_NE(val, 0);
+       }
+       GUEST_DONE();
+}
+
+static void test_fixed_counters(uint8_t pmu_version, uint64_t perf_capabilities,
+                               uint8_t nr_fixed_counters,
+                               uint32_t supported_bitmask)
+{
+       struct kvm_vcpu *vcpu;
+       struct kvm_vm *vm;
+
+       vm = pmu_vm_create_with_one_vcpu(&vcpu, guest_test_fixed_counters,
+                                        pmu_version, perf_capabilities);
+
+       vcpu_set_cpuid_property(vcpu, X86_PROPERTY_PMU_FIXED_COUNTERS_BITMASK,
+                               supported_bitmask);
+       vcpu_set_cpuid_property(vcpu, X86_PROPERTY_PMU_NR_FIXED_COUNTERS,
+                               nr_fixed_counters);
+
+       run_vcpu(vcpu);
+
+       kvm_vm_free(vm);
+}
+
+static void test_intel_counters(void)
+{
+       uint8_t nr_arch_events = kvm_cpu_property(X86_PROPERTY_PMU_EBX_BIT_VECTOR_LENGTH);
+       uint8_t nr_fixed_counters = kvm_cpu_property(X86_PROPERTY_PMU_NR_FIXED_COUNTERS);
+       uint8_t nr_gp_counters = kvm_cpu_property(X86_PROPERTY_PMU_NR_GP_COUNTERS);
+       uint8_t pmu_version = kvm_cpu_property(X86_PROPERTY_PMU_VERSION);
+       unsigned int i;
+       uint8_t v, j;
+       uint32_t k;
+
+       const uint64_t perf_caps[] = {
+               0,
+               PMU_CAP_FW_WRITES,
+       };
+
+       /*
+        * Test up to PMU v5, which is the current maximum version defined by
+        * Intel, i.e. is the last version that is guaranteed to be backwards
+        * compatible with KVM's existing behavior.
+        */
+       uint8_t max_pmu_version = max_t(typeof(pmu_version), pmu_version, 5);
+
+       /*
+        * Detect the existence of events that aren't supported by selftests.
+        * This will (obviously) fail any time the kernel adds support for a
+        * new event, but it's worth paying that price to keep the test fresh.
+        */
+       TEST_ASSERT(nr_arch_events <= NR_INTEL_ARCH_EVENTS,
+                   "New architectural event(s) detected; please update this test (length = %u, mask = %x)",
+                   nr_arch_events, kvm_cpu_property(X86_PROPERTY_PMU_EVENTS_MASK));
+
+       /*
+        * Force iterating over known arch events regardless of whether or not
+        * KVM/hardware supports a given event.
+        */
+       nr_arch_events = max_t(typeof(nr_arch_events), nr_arch_events, NR_INTEL_ARCH_EVENTS);
+
+       for (v = 0; v <= max_pmu_version; v++) {
+               for (i = 0; i < ARRAY_SIZE(perf_caps); i++) {
+                       if (!kvm_has_perf_caps && perf_caps[i])
+                               continue;
+
+                       pr_info("Testing arch events, PMU version %u, perf_caps = %lx\n",
+                               v, perf_caps[i]);
+                       /*
+                        * To keep the total runtime reasonable, test every
+                        * possible non-zero, non-reserved bitmap combination
+                        * only with the native PMU version and the full bit
+                        * vector length.
+                        */
+                       if (v == pmu_version) {
+                               for (k = 1; k < (BIT(nr_arch_events) - 1); k++)
+                                       test_arch_events(v, perf_caps[i], nr_arch_events, k);
+                       }
+                       /*
+                        * Test single bits for all PMU version and lengths up
+                        * the number of events +1 (to verify KVM doesn't do
+                        * weird things if the guest length is greater than the
+                        * host length).  Explicitly test a mask of '0' and all
+                        * ones i.e. all events being available and unavailable.
+                        */
+                       for (j = 0; j <= nr_arch_events + 1; j++) {
+                               test_arch_events(v, perf_caps[i], j, 0);
+                               test_arch_events(v, perf_caps[i], j, 0xff);
+
+                               for (k = 0; k < nr_arch_events; k++)
+                                       test_arch_events(v, perf_caps[i], j, BIT(k));
+                       }
+
+                       pr_info("Testing GP counters, PMU version %u, perf_caps = %lx\n",
+                               v, perf_caps[i]);
+                       for (j = 0; j <= nr_gp_counters; j++)
+                               test_gp_counters(v, perf_caps[i], j);
+
+                       pr_info("Testing fixed counters, PMU version %u, perf_caps = %lx\n",
+                               v, perf_caps[i]);
+                       for (j = 0; j <= nr_fixed_counters; j++) {
+                               for (k = 0; k <= (BIT(nr_fixed_counters) - 1); k++)
+                                       test_fixed_counters(v, perf_caps[i], j, k);
+                       }
+               }
+       }
+}
+
+int main(int argc, char *argv[])
+{
+       TEST_REQUIRE(kvm_is_pmu_enabled());
+
+       TEST_REQUIRE(host_cpu_is_intel);
+       TEST_REQUIRE(kvm_cpu_has_p(X86_PROPERTY_PMU_VERSION));
+       TEST_REQUIRE(kvm_cpu_property(X86_PROPERTY_PMU_VERSION) > 0);
+
+       kvm_pmu_version = kvm_cpu_property(X86_PROPERTY_PMU_VERSION);
+       kvm_has_perf_caps = kvm_cpu_has(X86_FEATURE_PDCM);
+       is_forced_emulation_enabled = kvm_is_forced_emulation_enabled();
+
+       test_intel_counters();
+
+       return 0;
+}
index a3bd54b925abaf10331886385f983001c47ebb06..3c85d1ae989366fd7983c41cf7b02d0a0ba4f8f2 100644 (file)
  */
 
 #define _GNU_SOURCE /* for program_invocation_short_name */
-#include "test_util.h"
+
 #include "kvm_util.h"
+#include "pmu.h"
 #include "processor.h"
-
-/*
- * In lieu of copying perf_event.h into tools...
- */
-#define ARCH_PERFMON_EVENTSEL_OS                       (1ULL << 17)
-#define ARCH_PERFMON_EVENTSEL_ENABLE                   (1ULL << 22)
-
-/* End of stuff taken from perf_event.h. */
-
-/* Oddly, this isn't in perf_event.h. */
-#define ARCH_PERFMON_BRANCHES_RETIRED          5
+#include "test_util.h"
 
 #define NUM_BRANCHES 42
-#define INTEL_PMC_IDX_FIXED            32
-
-/* Matches KVM_PMU_EVENT_FILTER_MAX_EVENTS in pmu.c */
-#define MAX_FILTER_EVENTS              300
 #define MAX_TEST_EVENTS                10
 
 #define PMU_EVENT_FILTER_INVALID_ACTION                (KVM_PMU_EVENT_DENY + 1)
 #define PMU_EVENT_FILTER_INVALID_FLAGS                 (KVM_PMU_EVENT_FLAGS_VALID_MASK << 1)
-#define PMU_EVENT_FILTER_INVALID_NEVENTS               (MAX_FILTER_EVENTS + 1)
-
-/*
- * This is how the event selector and unit mask are stored in an AMD
- * core performance event-select register. Intel's format is similar,
- * but the event selector is only 8 bits.
- */
-#define EVENT(select, umask) ((select & 0xf00UL) << 24 | (select & 0xff) | \
-                             (umask & 0xff) << 8)
-
-/*
- * "Branch instructions retired", from the Intel SDM, volume 3,
- * "Pre-defined Architectural Performance Events."
- */
-
-#define INTEL_BR_RETIRED EVENT(0xc4, 0)
-
-/*
- * "Retired branch instructions", from Processor Programming Reference
- * (PPR) for AMD Family 17h Model 01h, Revision B1 Processors,
- * Preliminary Processor Programming Reference (PPR) for AMD Family
- * 17h Model 31h, Revision B0 Processors, and Preliminary Processor
- * Programming Reference (PPR) for AMD Family 19h Model 01h, Revision
- * B1 Processors Volume 1 of 2.
- */
-
-#define AMD_ZEN_BR_RETIRED EVENT(0xc2, 0)
-
-
-/*
- * "Retired instructions", from Processor Programming Reference
- * (PPR) for AMD Family 17h Model 01h, Revision B1 Processors,
- * Preliminary Processor Programming Reference (PPR) for AMD Family
- * 17h Model 31h, Revision B0 Processors, and Preliminary Processor
- * Programming Reference (PPR) for AMD Family 19h Model 01h, Revision
- * B1 Processors Volume 1 of 2.
- *                      --- and ---
- * "Instructions retired", from the Intel SDM, volume 3,
- * "Pre-defined Architectural Performance Events."
- */
-
-#define INST_RETIRED EVENT(0xc0, 0)
+#define PMU_EVENT_FILTER_INVALID_NEVENTS               (KVM_PMU_EVENT_FILTER_MAX_EVENTS + 1)
 
 struct __kvm_pmu_event_filter {
        __u32 action;
@@ -84,26 +30,28 @@ struct __kvm_pmu_event_filter {
        __u32 fixed_counter_bitmap;
        __u32 flags;
        __u32 pad[4];
-       __u64 events[MAX_FILTER_EVENTS];
+       __u64 events[KVM_PMU_EVENT_FILTER_MAX_EVENTS];
 };
 
 /*
- * This event list comprises Intel's eight architectural events plus
- * AMD's "retired branch instructions" for Zen[123] (and possibly
- * other AMD CPUs).
+ * This event list comprises Intel's known architectural events, plus AMD's
+ * "retired branch instructions" for Zen1-Zen3 (and* possibly other AMD CPUs).
+ * Note, AMD and Intel use the same encoding for instructions retired.
  */
+kvm_static_assert(INTEL_ARCH_INSTRUCTIONS_RETIRED == AMD_ZEN_INSTRUCTIONS_RETIRED);
+
 static const struct __kvm_pmu_event_filter base_event_filter = {
        .nevents = ARRAY_SIZE(base_event_filter.events),
        .events = {
-               EVENT(0x3c, 0),
-               INST_RETIRED,
-               EVENT(0x3c, 1),
-               EVENT(0x2e, 0x4f),
-               EVENT(0x2e, 0x41),
-               EVENT(0xc4, 0),
-               EVENT(0xc5, 0),
-               EVENT(0xa4, 1),
-               AMD_ZEN_BR_RETIRED,
+               INTEL_ARCH_CPU_CYCLES,
+               INTEL_ARCH_INSTRUCTIONS_RETIRED,
+               INTEL_ARCH_REFERENCE_CYCLES,
+               INTEL_ARCH_LLC_REFERENCES,
+               INTEL_ARCH_LLC_MISSES,
+               INTEL_ARCH_BRANCHES_RETIRED,
+               INTEL_ARCH_BRANCHES_MISPREDICTED,
+               INTEL_ARCH_TOPDOWN_SLOTS,
+               AMD_ZEN_BRANCHES_RETIRED,
        },
 };
 
@@ -165,9 +113,9 @@ static void intel_guest_code(void)
        for (;;) {
                wrmsr(MSR_CORE_PERF_GLOBAL_CTRL, 0);
                wrmsr(MSR_P6_EVNTSEL0, ARCH_PERFMON_EVENTSEL_ENABLE |
-                     ARCH_PERFMON_EVENTSEL_OS | INTEL_BR_RETIRED);
+                     ARCH_PERFMON_EVENTSEL_OS | INTEL_ARCH_BRANCHES_RETIRED);
                wrmsr(MSR_P6_EVNTSEL1, ARCH_PERFMON_EVENTSEL_ENABLE |
-                     ARCH_PERFMON_EVENTSEL_OS | INST_RETIRED);
+                     ARCH_PERFMON_EVENTSEL_OS | INTEL_ARCH_INSTRUCTIONS_RETIRED);
                wrmsr(MSR_CORE_PERF_GLOBAL_CTRL, 0x3);
 
                run_and_measure_loop(MSR_IA32_PMC0);
@@ -189,9 +137,9 @@ static void amd_guest_code(void)
        for (;;) {
                wrmsr(MSR_K7_EVNTSEL0, 0);
                wrmsr(MSR_K7_EVNTSEL0, ARCH_PERFMON_EVENTSEL_ENABLE |
-                     ARCH_PERFMON_EVENTSEL_OS | AMD_ZEN_BR_RETIRED);
+                     ARCH_PERFMON_EVENTSEL_OS | AMD_ZEN_BRANCHES_RETIRED);
                wrmsr(MSR_K7_EVNTSEL1, ARCH_PERFMON_EVENTSEL_ENABLE |
-                     ARCH_PERFMON_EVENTSEL_OS | INST_RETIRED);
+                     ARCH_PERFMON_EVENTSEL_OS | AMD_ZEN_INSTRUCTIONS_RETIRED);
 
                run_and_measure_loop(MSR_K7_PERFCTR0);
                GUEST_SYNC(0);
@@ -312,7 +260,7 @@ static void test_amd_deny_list(struct kvm_vcpu *vcpu)
                .action = KVM_PMU_EVENT_DENY,
                .nevents = 1,
                .events = {
-                       EVENT(0x1C2, 0),
+                       RAW_EVENT(0x1C2, 0),
                },
        };
 
@@ -347,9 +295,9 @@ static void test_not_member_deny_list(struct kvm_vcpu *vcpu)
 
        f.action = KVM_PMU_EVENT_DENY;
 
-       remove_event(&f, INST_RETIRED);
-       remove_event(&f, INTEL_BR_RETIRED);
-       remove_event(&f, AMD_ZEN_BR_RETIRED);
+       remove_event(&f, INTEL_ARCH_INSTRUCTIONS_RETIRED);
+       remove_event(&f, INTEL_ARCH_BRANCHES_RETIRED);
+       remove_event(&f, AMD_ZEN_BRANCHES_RETIRED);
        test_with_filter(vcpu, &f);
 
        ASSERT_PMC_COUNTING_INSTRUCTIONS();
@@ -361,9 +309,9 @@ static void test_not_member_allow_list(struct kvm_vcpu *vcpu)
 
        f.action = KVM_PMU_EVENT_ALLOW;
 
-       remove_event(&f, INST_RETIRED);
-       remove_event(&f, INTEL_BR_RETIRED);
-       remove_event(&f, AMD_ZEN_BR_RETIRED);
+       remove_event(&f, INTEL_ARCH_INSTRUCTIONS_RETIRED);
+       remove_event(&f, INTEL_ARCH_BRANCHES_RETIRED);
+       remove_event(&f, AMD_ZEN_BRANCHES_RETIRED);
        test_with_filter(vcpu, &f);
 
        ASSERT_PMC_NOT_COUNTING_INSTRUCTIONS();
@@ -452,9 +400,9 @@ static bool use_amd_pmu(void)
  *  - Sapphire Rapids, Ice Lake, Cascade Lake, Skylake.
  */
 #define MEM_INST_RETIRED               0xD0
-#define MEM_INST_RETIRED_LOAD          EVENT(MEM_INST_RETIRED, 0x81)
-#define MEM_INST_RETIRED_STORE         EVENT(MEM_INST_RETIRED, 0x82)
-#define MEM_INST_RETIRED_LOAD_STORE    EVENT(MEM_INST_RETIRED, 0x83)
+#define MEM_INST_RETIRED_LOAD          RAW_EVENT(MEM_INST_RETIRED, 0x81)
+#define MEM_INST_RETIRED_STORE         RAW_EVENT(MEM_INST_RETIRED, 0x82)
+#define MEM_INST_RETIRED_LOAD_STORE    RAW_EVENT(MEM_INST_RETIRED, 0x83)
 
 static bool supports_event_mem_inst_retired(void)
 {
@@ -486,9 +434,9 @@ static bool supports_event_mem_inst_retired(void)
  * B1 Processors Volume 1 of 2.
  */
 #define LS_DISPATCH            0x29
-#define LS_DISPATCH_LOAD       EVENT(LS_DISPATCH, BIT(0))
-#define LS_DISPATCH_STORE      EVENT(LS_DISPATCH, BIT(1))
-#define LS_DISPATCH_LOAD_STORE EVENT(LS_DISPATCH, BIT(2))
+#define LS_DISPATCH_LOAD       RAW_EVENT(LS_DISPATCH, BIT(0))
+#define LS_DISPATCH_STORE      RAW_EVENT(LS_DISPATCH, BIT(1))
+#define LS_DISPATCH_LOAD_STORE RAW_EVENT(LS_DISPATCH, BIT(2))
 
 #define INCLUDE_MASKED_ENTRY(event_select, mask, match) \
        KVM_PMU_ENCODE_MASKED_ENTRY(event_select, mask, match, false)
@@ -729,14 +677,14 @@ static void add_dummy_events(uint64_t *events, int nevents)
 
 static void test_masked_events(struct kvm_vcpu *vcpu)
 {
-       int nevents = MAX_FILTER_EVENTS - MAX_TEST_EVENTS;
-       uint64_t events[MAX_FILTER_EVENTS];
+       int nevents = KVM_PMU_EVENT_FILTER_MAX_EVENTS - MAX_TEST_EVENTS;
+       uint64_t events[KVM_PMU_EVENT_FILTER_MAX_EVENTS];
 
        /* Run the test cases against a sparse PMU event filter. */
        run_masked_events_tests(vcpu, events, 0);
 
        /* Run the test cases against a dense PMU event filter. */
-       add_dummy_events(events, MAX_FILTER_EVENTS);
+       add_dummy_events(events, KVM_PMU_EVENT_FILTER_MAX_EVENTS);
        run_masked_events_tests(vcpu, events, nevents);
 }
 
@@ -809,20 +757,19 @@ static void test_filter_ioctl(struct kvm_vcpu *vcpu)
        TEST_ASSERT(!r, "Masking non-existent fixed counters should be allowed");
 }
 
-static void intel_run_fixed_counter_guest_code(uint8_t fixed_ctr_idx)
+static void intel_run_fixed_counter_guest_code(uint8_t idx)
 {
        for (;;) {
                wrmsr(MSR_CORE_PERF_GLOBAL_CTRL, 0);
-               wrmsr(MSR_CORE_PERF_FIXED_CTR0 + fixed_ctr_idx, 0);
+               wrmsr(MSR_CORE_PERF_FIXED_CTR0 + idx, 0);
 
                /* Only OS_EN bit is enabled for fixed counter[idx]. */
-               wrmsr(MSR_CORE_PERF_FIXED_CTR_CTRL, BIT_ULL(4 * fixed_ctr_idx));
-               wrmsr(MSR_CORE_PERF_GLOBAL_CTRL,
-                     BIT_ULL(INTEL_PMC_IDX_FIXED + fixed_ctr_idx));
+               wrmsr(MSR_CORE_PERF_FIXED_CTR_CTRL, FIXED_PMC_CTRL(idx, FIXED_PMC_KERNEL));
+               wrmsr(MSR_CORE_PERF_GLOBAL_CTRL, FIXED_PMC_GLOBAL_CTRL_ENABLE(idx));
                __asm__ __volatile__("loop ." : "+c"((int){NUM_BRANCHES}));
                wrmsr(MSR_CORE_PERF_GLOBAL_CTRL, 0);
 
-               GUEST_SYNC(rdmsr(MSR_CORE_PERF_FIXED_CTR0 + fixed_ctr_idx));
+               GUEST_SYNC(rdmsr(MSR_CORE_PERF_FIXED_CTR0 + idx));
        }
 }
 
@@ -920,7 +867,7 @@ int main(int argc, char *argv[])
        struct kvm_vcpu *vcpu, *vcpu2 = NULL;
        struct kvm_vm *vm;
 
-       TEST_REQUIRE(get_kvm_param_bool("enable_pmu"));
+       TEST_REQUIRE(kvm_is_pmu_enabled());
        TEST_REQUIRE(kvm_has_cap(KVM_CAP_PMU_EVENT_FILTER));
        TEST_REQUIRE(kvm_has_cap(KVM_CAP_PMU_EVENT_MASKED_EVENTS));
 
index 65ad38b6be1f1ad45afa085f5dbfb6402d581bb4..e0f642d2a3c4b562d186d4551583b6f5699d661b 100644 (file)
@@ -434,6 +434,8 @@ static void test_mem_conversions(enum vm_mem_backing_src_type src_type, uint32_t
 
        r = fallocate(memfd, FALLOC_FL_KEEP_SIZE, 0, memfd_size);
        TEST_ASSERT(!r, __KVM_SYSCALL_ERROR("fallocate()", r));
+
+       close(memfd);
 }
 
 static void usage(const char *cmd)
index a49828adf2949464b668f530a977e9efd9d75888..0a6dfba3905b68c03cebaf1e821c7019c4a68cf1 100644 (file)
 #include "test_util.h"
 #include "kvm_util.h"
 #include "processor.h"
-#include "svm_util.h"
+#include "sev.h"
 #include "kselftest.h"
 
-#define SEV_POLICY_ES 0b100
-
 #define NR_MIGRATE_TEST_VCPUS 4
 #define NR_MIGRATE_TEST_VMS 3
 #define NR_LOCK_TESTING_THREADS 3
 
 bool have_sev_es;
 
-static int __sev_ioctl(int vm_fd, int cmd_id, void *data, __u32 *fw_error)
-{
-       struct kvm_sev_cmd cmd = {
-               .id = cmd_id,
-               .data = (uint64_t)data,
-               .sev_fd = open_sev_dev_path_or_exit(),
-       };
-       int ret;
-
-       ret = ioctl(vm_fd, KVM_MEMORY_ENCRYPT_OP, &cmd);
-       *fw_error = cmd.error;
-       return ret;
-}
-
-static void sev_ioctl(int vm_fd, int cmd_id, void *data)
-{
-       int ret;
-       __u32 fw_error;
-
-       ret = __sev_ioctl(vm_fd, cmd_id, data, &fw_error);
-       TEST_ASSERT(ret == 0 && fw_error == SEV_RET_SUCCESS,
-                   "%d failed: return code: %d, errno: %d, fw error: %d",
-                   cmd_id, ret, errno, fw_error);
-}
-
 static struct kvm_vm *sev_vm_create(bool es)
 {
        struct kvm_vm *vm;
-       struct kvm_sev_launch_start start = { 0 };
        int i;
 
        vm = vm_create_barebones();
-       sev_ioctl(vm->fd, es ? KVM_SEV_ES_INIT : KVM_SEV_INIT, NULL);
+       if (!es)
+               sev_vm_init(vm);
+       else
+               sev_es_vm_init(vm);
+
        for (i = 0; i < NR_MIGRATE_TEST_VCPUS; ++i)
                __vm_vcpu_add(vm, i);
+
+       sev_vm_launch(vm, es ? SEV_POLICY_ES : 0);
+
        if (es)
-               start.policy |= SEV_POLICY_ES;
-       sev_ioctl(vm->fd, KVM_SEV_LAUNCH_START, &start);
-       if (es)
-               sev_ioctl(vm->fd, KVM_SEV_LAUNCH_UPDATE_VMSA, NULL);
+               vm_sev_ioctl(vm, KVM_SEV_LAUNCH_UPDATE_VMSA, NULL);
        return vm;
 }
 
@@ -181,7 +157,7 @@ static void test_sev_migrate_parameters(void)
        sev_vm = sev_vm_create(/* es= */ false);
        sev_es_vm = sev_vm_create(/* es= */ true);
        sev_es_vm_no_vmsa = vm_create_barebones();
-       sev_ioctl(sev_es_vm_no_vmsa->fd, KVM_SEV_ES_INIT, NULL);
+       sev_es_vm_init(sev_es_vm_no_vmsa);
        __vm_vcpu_add(sev_es_vm_no_vmsa, 1);
 
        ret = __sev_migrate_from(sev_vm, sev_es_vm);
@@ -230,13 +206,13 @@ static void sev_mirror_create(struct kvm_vm *dst, struct kvm_vm *src)
        TEST_ASSERT(!ret, "Copying context failed, ret: %d, errno: %d", ret, errno);
 }
 
-static void verify_mirror_allowed_cmds(int vm_fd)
+static void verify_mirror_allowed_cmds(struct kvm_vm *vm)
 {
        struct kvm_sev_guest_status status;
+       int cmd_id;
 
-       for (int cmd_id = KVM_SEV_INIT; cmd_id < KVM_SEV_NR_MAX; ++cmd_id) {
+       for (cmd_id = KVM_SEV_INIT; cmd_id < KVM_SEV_NR_MAX; ++cmd_id) {
                int ret;
-               __u32 fw_error;
 
                /*
                 * These commands are allowed for mirror VMs, all others are
@@ -256,14 +232,14 @@ static void verify_mirror_allowed_cmds(int vm_fd)
                 * These commands should be disallowed before the data
                 * parameter is examined so NULL is OK here.
                 */
-               ret = __sev_ioctl(vm_fd, cmd_id, NULL, &fw_error);
+               ret = __vm_sev_ioctl(vm, cmd_id, NULL);
                TEST_ASSERT(
                        ret == -1 && errno == EINVAL,
                        "Should not be able call command: %d. ret: %d, errno: %d",
                        cmd_id, ret, errno);
        }
 
-       sev_ioctl(vm_fd, KVM_SEV_GUEST_STATUS, &status);
+       vm_sev_ioctl(vm, KVM_SEV_GUEST_STATUS, &status);
 }
 
 static void test_sev_mirror(bool es)
@@ -281,9 +257,9 @@ static void test_sev_mirror(bool es)
                __vm_vcpu_add(dst_vm, i);
 
        if (es)
-               sev_ioctl(dst_vm->fd, KVM_SEV_LAUNCH_UPDATE_VMSA, NULL);
+               vm_sev_ioctl(dst_vm, KVM_SEV_LAUNCH_UPDATE_VMSA, NULL);
 
-       verify_mirror_allowed_cmds(dst_vm->fd);
+       verify_mirror_allowed_cmds(dst_vm);
 
        kvm_vm_free(src_vm);
        kvm_vm_free(dst_vm);
diff --git a/tools/testing/selftests/kvm/x86_64/sev_smoke_test.c b/tools/testing/selftests/kvm/x86_64/sev_smoke_test.c
new file mode 100644 (file)
index 0000000..026779f
--- /dev/null
@@ -0,0 +1,88 @@
+// SPDX-License-Identifier: GPL-2.0-only
+#include <fcntl.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <sys/ioctl.h>
+
+#include "test_util.h"
+#include "kvm_util.h"
+#include "processor.h"
+#include "svm_util.h"
+#include "linux/psp-sev.h"
+#include "sev.h"
+
+
+static void guest_sev_es_code(void)
+{
+       /* TODO: Check CPUID after GHCB-based hypercall support is added. */
+       GUEST_ASSERT(rdmsr(MSR_AMD64_SEV) & MSR_AMD64_SEV_ENABLED);
+       GUEST_ASSERT(rdmsr(MSR_AMD64_SEV) & MSR_AMD64_SEV_ES_ENABLED);
+
+       /*
+        * TODO: Add GHCB and ucall support for SEV-ES guests.  For now, simply
+        * force "termination" to signal "done" via the GHCB MSR protocol.
+        */
+       wrmsr(MSR_AMD64_SEV_ES_GHCB, GHCB_MSR_TERM_REQ);
+       __asm__ __volatile__("rep; vmmcall");
+}
+
+static void guest_sev_code(void)
+{
+       GUEST_ASSERT(this_cpu_has(X86_FEATURE_SEV));
+       GUEST_ASSERT(rdmsr(MSR_AMD64_SEV) & MSR_AMD64_SEV_ENABLED);
+
+       GUEST_DONE();
+}
+
+static void test_sev(void *guest_code, uint64_t policy)
+{
+       struct kvm_vcpu *vcpu;
+       struct kvm_vm *vm;
+       struct ucall uc;
+
+       vm = vm_sev_create_with_one_vcpu(policy, guest_code, &vcpu);
+
+       for (;;) {
+               vcpu_run(vcpu);
+
+               if (policy & SEV_POLICY_ES) {
+                       TEST_ASSERT(vcpu->run->exit_reason == KVM_EXIT_SYSTEM_EVENT,
+                                   "Wanted SYSTEM_EVENT, got %s",
+                                   exit_reason_str(vcpu->run->exit_reason));
+                       TEST_ASSERT_EQ(vcpu->run->system_event.type, KVM_SYSTEM_EVENT_SEV_TERM);
+                       TEST_ASSERT_EQ(vcpu->run->system_event.ndata, 1);
+                       TEST_ASSERT_EQ(vcpu->run->system_event.data[0], GHCB_MSR_TERM_REQ);
+                       break;
+               }
+
+               switch (get_ucall(vcpu, &uc)) {
+               case UCALL_SYNC:
+                       continue;
+               case UCALL_DONE:
+                       return;
+               case UCALL_ABORT:
+                       REPORT_GUEST_ASSERT(uc);
+               default:
+                       TEST_FAIL("Unexpected exit: %s",
+                                 exit_reason_str(vcpu->run->exit_reason));
+               }
+       }
+
+       kvm_vm_free(vm);
+}
+
+int main(int argc, char *argv[])
+{
+       TEST_REQUIRE(kvm_cpu_has(X86_FEATURE_SEV));
+
+       test_sev(guest_sev_code, SEV_POLICY_NO_DBG);
+       test_sev(guest_sev_code, 0);
+
+       if (kvm_cpu_has(X86_FEATURE_SEV_ES)) {
+               test_sev(guest_sev_es_code, SEV_POLICY_ES | SEV_POLICY_NO_DBG);
+               test_sev(guest_sev_es_code, SEV_POLICY_ES);
+       }
+
+       return 0;
+}
index 1a46dd7bb39136e51b1e021be08667b84c2fe001..416207c38a17ea416506a3f0babea8682b9862f5 100644 (file)
@@ -63,7 +63,7 @@ int main(int argc, char *argv[])
        vm_init_descriptor_tables(vm);
        vcpu_init_descriptor_tables(vcpu);
 
-       vcpu_set_cpuid_maxphyaddr(vcpu, MAXPHYADDR);
+       vcpu_set_cpuid_property(vcpu, X86_PROPERTY_MAX_PHY_ADDR, MAXPHYADDR);
 
        rc = kvm_check_cap(KVM_CAP_EXIT_ON_EMULATION_FAILURE);
        TEST_ASSERT(rc, "KVM_CAP_EXIT_ON_EMULATION_FAILURE is unavailable");
index a91b5b145fa35ac81dfaa225ff350e4186a229d0..adb5593daf483ec6b84d18dda8f5ebf8823a6827 100644 (file)
@@ -17,6 +17,7 @@
 #include <sys/ioctl.h>
 #include <pthread.h>
 
+#include "kvm_test_harness.h"
 #include "test_util.h"
 #include "kvm_util.h"
 #include "processor.h"
@@ -41,6 +42,8 @@ void guest_code(void)
                     : "rax", "rbx");
 }
 
+KVM_ONE_VCPU_TEST_SUITE(sync_regs_test);
+
 static void compare_regs(struct kvm_regs *left, struct kvm_regs *right)
 {
 #define REG_COMPARE(reg) \
@@ -152,18 +155,15 @@ static noinline void *race_sregs_cr4(void *arg)
        return NULL;
 }
 
-static void race_sync_regs(void *racer)
+static void race_sync_regs(struct kvm_vcpu *vcpu, void *racer)
 {
        const time_t TIMEOUT = 2; /* seconds, roughly */
        struct kvm_x86_state *state;
        struct kvm_translation tr;
-       struct kvm_vcpu *vcpu;
        struct kvm_run *run;
-       struct kvm_vm *vm;
        pthread_t thread;
        time_t t;
 
-       vm = vm_create_with_one_vcpu(&vcpu, guest_code);
        run = vcpu->run;
 
        run->kvm_valid_regs = KVM_SYNC_X86_SREGS;
@@ -205,26 +205,12 @@ static void race_sync_regs(void *racer)
        TEST_ASSERT_EQ(pthread_join(thread, NULL), 0);
 
        kvm_x86_state_cleanup(state);
-       kvm_vm_free(vm);
 }
 
-int main(int argc, char *argv[])
+KVM_ONE_VCPU_TEST(sync_regs_test, read_invalid, guest_code)
 {
-       struct kvm_vcpu *vcpu;
-       struct kvm_vm *vm;
-       struct kvm_run *run;
-       struct kvm_regs regs;
-       struct kvm_sregs sregs;
-       struct kvm_vcpu_events events;
-       int rv, cap;
-
-       cap = kvm_check_cap(KVM_CAP_SYNC_REGS);
-       TEST_REQUIRE((cap & TEST_SYNC_FIELDS) == TEST_SYNC_FIELDS);
-       TEST_REQUIRE(!(cap & INVALID_SYNC_FIELD));
-
-       vm = vm_create_with_one_vcpu(&vcpu, guest_code);
-
-       run = vcpu->run;
+       struct kvm_run *run = vcpu->run;
+       int rv;
 
        /* Request reading invalid register set from VCPU. */
        run->kvm_valid_regs = INVALID_SYNC_FIELD;
@@ -240,6 +226,12 @@ int main(int argc, char *argv[])
                    "Invalid kvm_valid_regs did not cause expected KVM_RUN error: %d",
                    rv);
        run->kvm_valid_regs = 0;
+}
+
+KVM_ONE_VCPU_TEST(sync_regs_test, set_invalid, guest_code)
+{
+       struct kvm_run *run = vcpu->run;
+       int rv;
 
        /* Request setting invalid register set into VCPU. */
        run->kvm_dirty_regs = INVALID_SYNC_FIELD;
@@ -255,11 +247,19 @@ int main(int argc, char *argv[])
                    "Invalid kvm_dirty_regs did not cause expected KVM_RUN error: %d",
                    rv);
        run->kvm_dirty_regs = 0;
+}
+
+KVM_ONE_VCPU_TEST(sync_regs_test, req_and_verify_all_valid, guest_code)
+{
+       struct kvm_run *run = vcpu->run;
+       struct kvm_vcpu_events events;
+       struct kvm_sregs sregs;
+       struct kvm_regs regs;
 
        /* Request and verify all valid register sets. */
        /* TODO: BUILD TIME CHECK: TEST_ASSERT(KVM_SYNC_X86_NUM_FIELDS != 3); */
        run->kvm_valid_regs = TEST_SYNC_FIELDS;
-       rv = _vcpu_run(vcpu);
+       vcpu_run(vcpu);
        TEST_ASSERT_KVM_EXIT_REASON(vcpu, KVM_EXIT_IO);
 
        vcpu_regs_get(vcpu, &regs);
@@ -270,6 +270,19 @@ int main(int argc, char *argv[])
 
        vcpu_events_get(vcpu, &events);
        compare_vcpu_events(&events, &run->s.regs.events);
+}
+
+KVM_ONE_VCPU_TEST(sync_regs_test, set_and_verify_various, guest_code)
+{
+       struct kvm_run *run = vcpu->run;
+       struct kvm_vcpu_events events;
+       struct kvm_sregs sregs;
+       struct kvm_regs regs;
+
+       /* Run once to get register set */
+       run->kvm_valid_regs = TEST_SYNC_FIELDS;
+       vcpu_run(vcpu);
+       TEST_ASSERT_KVM_EXIT_REASON(vcpu, KVM_EXIT_IO);
 
        /* Set and verify various register values. */
        run->s.regs.regs.rbx = 0xBAD1DEA;
@@ -278,7 +291,7 @@ int main(int argc, char *argv[])
 
        run->kvm_valid_regs = TEST_SYNC_FIELDS;
        run->kvm_dirty_regs = KVM_SYNC_X86_REGS | KVM_SYNC_X86_SREGS;
-       rv = _vcpu_run(vcpu);
+       vcpu_run(vcpu);
        TEST_ASSERT_KVM_EXIT_REASON(vcpu, KVM_EXIT_IO);
        TEST_ASSERT(run->s.regs.regs.rbx == 0xBAD1DEA + 1,
                    "rbx sync regs value incorrect 0x%llx.",
@@ -295,6 +308,11 @@ int main(int argc, char *argv[])
 
        vcpu_events_get(vcpu, &events);
        compare_vcpu_events(&events, &run->s.regs.events);
+}
+
+KVM_ONE_VCPU_TEST(sync_regs_test, clear_kvm_dirty_regs_bits, guest_code)
+{
+       struct kvm_run *run = vcpu->run;
 
        /* Clear kvm_dirty_regs bits, verify new s.regs values are
         * overwritten with existing guest values.
@@ -302,11 +320,22 @@ int main(int argc, char *argv[])
        run->kvm_valid_regs = TEST_SYNC_FIELDS;
        run->kvm_dirty_regs = 0;
        run->s.regs.regs.rbx = 0xDEADBEEF;
-       rv = _vcpu_run(vcpu);
+       vcpu_run(vcpu);
        TEST_ASSERT_KVM_EXIT_REASON(vcpu, KVM_EXIT_IO);
        TEST_ASSERT(run->s.regs.regs.rbx != 0xDEADBEEF,
                    "rbx sync regs value incorrect 0x%llx.",
                    run->s.regs.regs.rbx);
+}
+
+KVM_ONE_VCPU_TEST(sync_regs_test, clear_kvm_valid_and_dirty_regs, guest_code)
+{
+       struct kvm_run *run = vcpu->run;
+       struct kvm_regs regs;
+
+       /* Run once to get register set */
+       run->kvm_valid_regs = TEST_SYNC_FIELDS;
+       vcpu_run(vcpu);
+       TEST_ASSERT_KVM_EXIT_REASON(vcpu, KVM_EXIT_IO);
 
        /* Clear kvm_valid_regs bits and kvm_dirty_bits.
         * Verify s.regs values are not overwritten with existing guest values
@@ -315,9 +344,10 @@ int main(int argc, char *argv[])
        run->kvm_valid_regs = 0;
        run->kvm_dirty_regs = 0;
        run->s.regs.regs.rbx = 0xAAAA;
+       vcpu_regs_get(vcpu, &regs);
        regs.rbx = 0xBAC0;
        vcpu_regs_set(vcpu, &regs);
-       rv = _vcpu_run(vcpu);
+       vcpu_run(vcpu);
        TEST_ASSERT_KVM_EXIT_REASON(vcpu, KVM_EXIT_IO);
        TEST_ASSERT(run->s.regs.regs.rbx == 0xAAAA,
                    "rbx sync regs value incorrect 0x%llx.",
@@ -326,6 +356,17 @@ int main(int argc, char *argv[])
        TEST_ASSERT(regs.rbx == 0xBAC0 + 1,
                    "rbx guest value incorrect 0x%llx.",
                    regs.rbx);
+}
+
+KVM_ONE_VCPU_TEST(sync_regs_test, clear_kvm_valid_regs_bits, guest_code)
+{
+       struct kvm_run *run = vcpu->run;
+       struct kvm_regs regs;
+
+       /* Run once to get register set */
+       run->kvm_valid_regs = TEST_SYNC_FIELDS;
+       vcpu_run(vcpu);
+       TEST_ASSERT_KVM_EXIT_REASON(vcpu, KVM_EXIT_IO);
 
        /* Clear kvm_valid_regs bits. Verify s.regs values are not overwritten
         * with existing guest values but that guest values are overwritten
@@ -334,7 +375,7 @@ int main(int argc, char *argv[])
        run->kvm_valid_regs = 0;
        run->kvm_dirty_regs = TEST_SYNC_FIELDS;
        run->s.regs.regs.rbx = 0xBBBB;
-       rv = _vcpu_run(vcpu);
+       vcpu_run(vcpu);
        TEST_ASSERT_KVM_EXIT_REASON(vcpu, KVM_EXIT_IO);
        TEST_ASSERT(run->s.regs.regs.rbx == 0xBBBB,
                    "rbx sync regs value incorrect 0x%llx.",
@@ -343,12 +384,30 @@ int main(int argc, char *argv[])
        TEST_ASSERT(regs.rbx == 0xBBBB + 1,
                    "rbx guest value incorrect 0x%llx.",
                    regs.rbx);
+}
 
-       kvm_vm_free(vm);
+KVM_ONE_VCPU_TEST(sync_regs_test, race_cr4, guest_code)
+{
+       race_sync_regs(vcpu, race_sregs_cr4);
+}
+
+KVM_ONE_VCPU_TEST(sync_regs_test, race_exc, guest_code)
+{
+       race_sync_regs(vcpu, race_events_exc);
+}
 
-       race_sync_regs(race_sregs_cr4);
-       race_sync_regs(race_events_exc);
-       race_sync_regs(race_events_inj_pen);
+KVM_ONE_VCPU_TEST(sync_regs_test, race_inj_pen, guest_code)
+{
+       race_sync_regs(vcpu, race_events_inj_pen);
+}
+
+int main(int argc, char *argv[])
+{
+       int cap;
+
+       cap = kvm_check_cap(KVM_CAP_SYNC_REGS);
+       TEST_REQUIRE((cap & TEST_SYNC_FIELDS) == TEST_SYNC_FIELDS);
+       TEST_REQUIRE(!(cap & INVALID_SYNC_FIELD));
 
-       return 0;
+       return test_harness_run(argc, argv);
 }
index 3533dc2fbfeeb136b217eb79e819e2e374e2b0cd..f4f61a2d2464c1911efcf2dd9b02b1ff90af9e41 100644 (file)
@@ -8,14 +8,12 @@
 #define _GNU_SOURCE /* for program_invocation_short_name */
 #include <sys/ioctl.h>
 
+#include "kvm_test_harness.h"
 #include "test_util.h"
 #include "kvm_util.h"
 #include "vmx.h"
 
-/* Forced emulation prefix, used to invoke the emulator unconditionally. */
-#define KVM_FEP "ud2; .byte 'k', 'v', 'm';"
-#define KVM_FEP_LENGTH 5
-static int fep_available = 1;
+static bool fep_available;
 
 #define MSR_NON_EXISTENT 0x474f4f00
 
@@ -260,13 +258,6 @@ static void guest_code_filter_allow(void)
        GUEST_ASSERT(data == 2);
        GUEST_ASSERT(guest_exception_count == 0);
 
-       /*
-        * Test to see if the instruction emulator is available (ie: the module
-        * parameter 'kvm.force_emulation_prefix=1' is set).  This instruction
-        * will #UD if it isn't available.
-        */
-       __asm__ __volatile__(KVM_FEP "nop");
-
        if (fep_available) {
                /* Let userspace know we aren't done. */
                GUEST_SYNC(0);
@@ -388,12 +379,6 @@ static void guest_fep_gp_handler(struct ex_regs *regs)
                           &em_wrmsr_start, &em_wrmsr_end);
 }
 
-static void guest_ud_handler(struct ex_regs *regs)
-{
-       fep_available = 0;
-       regs->rip += KVM_FEP_LENGTH;
-}
-
 static void check_for_guest_assert(struct kvm_vcpu *vcpu)
 {
        struct ucall uc;
@@ -527,13 +512,15 @@ static void run_guest_then_process_ucall_done(struct kvm_vcpu *vcpu)
        process_ucall_done(vcpu);
 }
 
-static void test_msr_filter_allow(void)
+KVM_ONE_VCPU_TEST_SUITE(user_msr);
+
+KVM_ONE_VCPU_TEST(user_msr, msr_filter_allow, guest_code_filter_allow)
 {
-       struct kvm_vcpu *vcpu;
-       struct kvm_vm *vm;
+       struct kvm_vm *vm = vcpu->vm;
+       uint64_t cmd;
        int rc;
 
-       vm = vm_create_with_one_vcpu(&vcpu, guest_code_filter_allow);
+       sync_global_to_guest(vm, fep_available);
 
        rc = kvm_check_cap(KVM_CAP_X86_USER_SPACE_MSR);
        TEST_ASSERT(rc, "KVM_CAP_X86_USER_SPACE_MSR is available");
@@ -561,11 +548,11 @@ static void test_msr_filter_allow(void)
        run_guest_then_process_wrmsr(vcpu, MSR_NON_EXISTENT);
        run_guest_then_process_rdmsr(vcpu, MSR_NON_EXISTENT);
 
-       vm_install_exception_handler(vm, UD_VECTOR, guest_ud_handler);
        vcpu_run(vcpu);
-       vm_install_exception_handler(vm, UD_VECTOR, NULL);
+       cmd = process_ucall(vcpu);
 
-       if (process_ucall(vcpu) != UCALL_DONE) {
+       if (fep_available) {
+               TEST_ASSERT_EQ(cmd, UCALL_SYNC);
                vm_install_exception_handler(vm, GP_VECTOR, guest_fep_gp_handler);
 
                /* Process emulated rdmsr and wrmsr instructions. */
@@ -583,10 +570,9 @@ static void test_msr_filter_allow(void)
                /* Confirm the guest completed without issues. */
                run_guest_then_process_ucall_done(vcpu);
        } else {
+               TEST_ASSERT_EQ(cmd, UCALL_DONE);
                printf("To run the instruction emulated tests set the module parameter 'kvm.force_emulation_prefix=1'\n");
        }
-
-       kvm_vm_free(vm);
 }
 
 static int handle_ucall(struct kvm_vcpu *vcpu)
@@ -646,16 +632,12 @@ static void handle_wrmsr(struct kvm_run *run)
        }
 }
 
-static void test_msr_filter_deny(void)
+KVM_ONE_VCPU_TEST(user_msr, msr_filter_deny, guest_code_filter_deny)
 {
-       struct kvm_vcpu *vcpu;
-       struct kvm_vm *vm;
-       struct kvm_run *run;
+       struct kvm_vm *vm = vcpu->vm;
+       struct kvm_run *run = vcpu->run;
        int rc;
 
-       vm = vm_create_with_one_vcpu(&vcpu, guest_code_filter_deny);
-       run = vcpu->run;
-
        rc = kvm_check_cap(KVM_CAP_X86_USER_SPACE_MSR);
        TEST_ASSERT(rc, "KVM_CAP_X86_USER_SPACE_MSR is available");
        vm_enable_cap(vm, KVM_CAP_X86_USER_SPACE_MSR, KVM_MSR_EXIT_REASON_INVAL |
@@ -689,18 +671,13 @@ static void test_msr_filter_deny(void)
 done:
        TEST_ASSERT(msr_reads == 4, "Handled 4 rdmsr in user space");
        TEST_ASSERT(msr_writes == 3, "Handled 3 wrmsr in user space");
-
-       kvm_vm_free(vm);
 }
 
-static void test_msr_permission_bitmap(void)
+KVM_ONE_VCPU_TEST(user_msr, msr_permission_bitmap, guest_code_permission_bitmap)
 {
-       struct kvm_vcpu *vcpu;
-       struct kvm_vm *vm;
+       struct kvm_vm *vm = vcpu->vm;
        int rc;
 
-       vm = vm_create_with_one_vcpu(&vcpu, guest_code_permission_bitmap);
-
        rc = kvm_check_cap(KVM_CAP_X86_USER_SPACE_MSR);
        TEST_ASSERT(rc, "KVM_CAP_X86_USER_SPACE_MSR is available");
        vm_enable_cap(vm, KVM_CAP_X86_USER_SPACE_MSR, KVM_MSR_EXIT_REASON_FILTER);
@@ -715,8 +692,6 @@ static void test_msr_permission_bitmap(void)
        vm_ioctl(vm, KVM_X86_SET_MSR_FILTER, &filter_gs);
        run_guest_then_process_rdmsr(vcpu, MSR_GS_BASE);
        run_guest_then_process_ucall_done(vcpu);
-
-       kvm_vm_free(vm);
 }
 
 #define test_user_exit_msr_ioctl(vm, cmd, arg, flag, valid_mask)       \
@@ -786,31 +761,20 @@ static void run_msr_filter_flag_test(struct kvm_vm *vm)
 }
 
 /* Test that attempts to write to the unused bits in a flag fails. */
-static void test_user_exit_msr_flags(void)
+KVM_ONE_VCPU_TEST(user_msr, user_exit_msr_flags, NULL)
 {
-       struct kvm_vcpu *vcpu;
-       struct kvm_vm *vm;
-
-       vm = vm_create_with_one_vcpu(&vcpu, NULL);
+       struct kvm_vm *vm = vcpu->vm;
 
        /* Test flags for KVM_CAP_X86_USER_SPACE_MSR. */
        run_user_space_msr_flag_test(vm);
 
        /* Test flags and range flags for KVM_X86_SET_MSR_FILTER. */
        run_msr_filter_flag_test(vm);
-
-       kvm_vm_free(vm);
 }
 
 int main(int argc, char *argv[])
 {
-       test_msr_filter_allow();
-
-       test_msr_filter_deny();
-
-       test_msr_permission_bitmap();
+       fep_available = kvm_is_forced_emulation_enabled();
 
-       test_user_exit_msr_flags();
-
-       return 0;
+       return test_harness_run(argc, argv);
 }
index 2a8d4ac2f0204780498dd42b6343deb5fbb04c9b..ea0cb3cae0f759be3072b3facb92291edb8c2d55 100644 (file)
 
 #include <linux/bitmap.h>
 
+#include "kvm_test_harness.h"
 #include "kvm_util.h"
 #include "vmx.h"
 
-union perf_capabilities {
+static union perf_capabilities {
        struct {
                u64     lbr_format:6;
                u64     pebs_trap:1;
@@ -32,7 +33,7 @@ union perf_capabilities {
                u64     anythread_deprecated:1;
        };
        u64     capabilities;
-};
+} host_cap;
 
 /*
  * The LBR format and most PEBS features are immutable, all other features are
@@ -73,19 +74,19 @@ static void guest_code(uint64_t current_val)
        GUEST_DONE();
 }
 
+KVM_ONE_VCPU_TEST_SUITE(vmx_pmu_caps);
+
 /*
  * Verify that guest WRMSRs to PERF_CAPABILITIES #GP regardless of the value
  * written, that the guest always sees the userspace controlled value, and that
  * PERF_CAPABILITIES is immutable after KVM_RUN.
  */
-static void test_guest_wrmsr_perf_capabilities(union perf_capabilities host_cap)
+KVM_ONE_VCPU_TEST(vmx_pmu_caps, guest_wrmsr_perf_capabilities, guest_code)
 {
-       struct kvm_vcpu *vcpu;
-       struct kvm_vm *vm = vm_create_with_one_vcpu(&vcpu, guest_code);
        struct ucall uc;
        int r, i;
 
-       vm_init_descriptor_tables(vm);
+       vm_init_descriptor_tables(vcpu->vm);
        vcpu_init_descriptor_tables(vcpu);
 
        vcpu_set_msr(vcpu, MSR_IA32_PERF_CAPABILITIES, host_cap.capabilities);
@@ -117,31 +118,21 @@ static void test_guest_wrmsr_perf_capabilities(union perf_capabilities host_cap)
                TEST_ASSERT(!r, "Post-KVM_RUN write '0x%llx'didn't fail",
                            host_cap.capabilities ^ BIT_ULL(i));
        }
-
-       kvm_vm_free(vm);
 }
 
 /*
  * Verify KVM allows writing PERF_CAPABILITIES with all KVM-supported features
  * enabled, as well as '0' (to disable all features).
  */
-static void test_basic_perf_capabilities(union perf_capabilities host_cap)
+KVM_ONE_VCPU_TEST(vmx_pmu_caps, basic_perf_capabilities, guest_code)
 {
-       struct kvm_vcpu *vcpu;
-       struct kvm_vm *vm = vm_create_with_one_vcpu(&vcpu, NULL);
-
        vcpu_set_msr(vcpu, MSR_IA32_PERF_CAPABILITIES, 0);
        vcpu_set_msr(vcpu, MSR_IA32_PERF_CAPABILITIES, host_cap.capabilities);
-
-       kvm_vm_free(vm);
 }
 
-static void test_fungible_perf_capabilities(union perf_capabilities host_cap)
+KVM_ONE_VCPU_TEST(vmx_pmu_caps, fungible_perf_capabilities, guest_code)
 {
        const uint64_t fungible_caps = host_cap.capabilities & ~immutable_caps.capabilities;
-
-       struct kvm_vcpu *vcpu;
-       struct kvm_vm *vm = vm_create_with_one_vcpu(&vcpu, NULL);
        int bit;
 
        for_each_set_bit(bit, &fungible_caps, 64) {
@@ -150,8 +141,6 @@ static void test_fungible_perf_capabilities(union perf_capabilities host_cap)
                             host_cap.capabilities & ~BIT_ULL(bit));
        }
        vcpu_set_msr(vcpu, MSR_IA32_PERF_CAPABILITIES, host_cap.capabilities);
-
-       kvm_vm_free(vm);
 }
 
 /*
@@ -160,14 +149,11 @@ static void test_fungible_perf_capabilities(union perf_capabilities host_cap)
  * separately as they are multi-bit values, e.g. toggling or setting a single
  * bit can generate a false positive without dedicated safeguards.
  */
-static void test_immutable_perf_capabilities(union perf_capabilities host_cap)
+KVM_ONE_VCPU_TEST(vmx_pmu_caps, immutable_perf_capabilities, guest_code)
 {
        const uint64_t reserved_caps = (~host_cap.capabilities |
                                        immutable_caps.capabilities) &
                                       ~format_caps.capabilities;
-
-       struct kvm_vcpu *vcpu;
-       struct kvm_vm *vm = vm_create_with_one_vcpu(&vcpu, NULL);
        union perf_capabilities val = host_cap;
        int r, bit;
 
@@ -201,8 +187,6 @@ static void test_immutable_perf_capabilities(union perf_capabilities host_cap)
                TEST_ASSERT(!r, "Bad PEBS FMT = 0x%x didn't fail, host = 0x%x",
                            val.pebs_format, host_cap.pebs_format);
        }
-
-       kvm_vm_free(vm);
 }
 
 /*
@@ -211,17 +195,13 @@ static void test_immutable_perf_capabilities(union perf_capabilities host_cap)
  * LBR_TOS as those bits are writable across all uarch implementations (arch
  * LBRs will need to poke a different MSR).
  */
-static void test_lbr_perf_capabilities(union perf_capabilities host_cap)
+KVM_ONE_VCPU_TEST(vmx_pmu_caps, lbr_perf_capabilities, guest_code)
 {
-       struct kvm_vcpu *vcpu;
-       struct kvm_vm *vm;
        int r;
 
        if (!host_cap.lbr_format)
                return;
 
-       vm = vm_create_with_one_vcpu(&vcpu, NULL);
-
        vcpu_set_msr(vcpu, MSR_IA32_PERF_CAPABILITIES, host_cap.capabilities);
        vcpu_set_msr(vcpu, MSR_LBR_TOS, 7);
 
@@ -229,15 +209,11 @@ static void test_lbr_perf_capabilities(union perf_capabilities host_cap)
 
        r = _vcpu_set_msr(vcpu, MSR_LBR_TOS, 7);
        TEST_ASSERT(!r, "Writing LBR_TOS should fail after disabling vPMU");
-
-       kvm_vm_free(vm);
 }
 
 int main(int argc, char *argv[])
 {
-       union perf_capabilities host_cap;
-
-       TEST_REQUIRE(get_kvm_param_bool("enable_pmu"));
+       TEST_REQUIRE(kvm_is_pmu_enabled());
        TEST_REQUIRE(kvm_cpu_has(X86_FEATURE_PDCM));
 
        TEST_REQUIRE(kvm_cpu_has_p(X86_PROPERTY_PMU_VERSION));
@@ -248,9 +224,5 @@ int main(int argc, char *argv[])
        TEST_ASSERT(host_cap.full_width_write,
                    "Full-width writes should always be supported");
 
-       test_basic_perf_capabilities(host_cap);
-       test_fungible_perf_capabilities(host_cap);
-       test_immutable_perf_capabilities(host_cap);
-       test_guest_wrmsr_perf_capabilities(host_cap);
-       test_lbr_perf_capabilities(host_cap);
+       return test_harness_run(argc, argv);
 }
index 9ec9ab60b63ee27d2deb6f90d9b38a71e2deb142..d2ea0435f4f76332fb702d3e84d19b536a8ea4ee 100644 (file)
@@ -62,6 +62,7 @@ enum {
        TEST_POLL_TIMEOUT,
        TEST_POLL_MASKED,
        TEST_POLL_WAKE,
+       SET_VCPU_INFO,
        TEST_TIMER_PAST,
        TEST_LOCKING_SEND_RACE,
        TEST_LOCKING_POLL_RACE,
@@ -321,6 +322,10 @@ static void guest_code(void)
 
        GUEST_SYNC(TEST_POLL_WAKE);
 
+       /* Set the vcpu_info to point at exactly the place it already is to
+        * make sure the attribute is functional. */
+       GUEST_SYNC(SET_VCPU_INFO);
+
        /* A timer wake an *unmasked* port which should wake us with an
         * actual interrupt, while we're polling on a different port. */
        ports[0]++;
@@ -389,6 +394,7 @@ static int cmp_timespec(struct timespec *a, struct timespec *b)
                return 0;
 }
 
+static struct shared_info *shinfo;
 static struct vcpu_info *vinfo;
 static struct kvm_vcpu *vcpu;
 
@@ -404,20 +410,38 @@ static void *juggle_shinfo_state(void *arg)
 {
        struct kvm_vm *vm = (struct kvm_vm *)arg;
 
-       struct kvm_xen_hvm_attr cache_activate = {
+       struct kvm_xen_hvm_attr cache_activate_gfn = {
                .type = KVM_XEN_ATTR_TYPE_SHARED_INFO,
                .u.shared_info.gfn = SHINFO_REGION_GPA / PAGE_SIZE
        };
 
-       struct kvm_xen_hvm_attr cache_deactivate = {
+       struct kvm_xen_hvm_attr cache_deactivate_gfn = {
                .type = KVM_XEN_ATTR_TYPE_SHARED_INFO,
                .u.shared_info.gfn = KVM_XEN_INVALID_GFN
        };
 
+       struct kvm_xen_hvm_attr cache_activate_hva = {
+               .type = KVM_XEN_ATTR_TYPE_SHARED_INFO_HVA,
+               .u.shared_info.hva = (unsigned long)shinfo
+       };
+
+       struct kvm_xen_hvm_attr cache_deactivate_hva = {
+               .type = KVM_XEN_ATTR_TYPE_SHARED_INFO,
+               .u.shared_info.hva = 0
+       };
+
+       int xen_caps = kvm_check_cap(KVM_CAP_XEN_HVM);
+
        for (;;) {
-               __vm_ioctl(vm, KVM_XEN_HVM_SET_ATTR, &cache_activate);
-               __vm_ioctl(vm, KVM_XEN_HVM_SET_ATTR, &cache_deactivate);
+               __vm_ioctl(vm, KVM_XEN_HVM_SET_ATTR, &cache_activate_gfn);
                pthread_testcancel();
+               __vm_ioctl(vm, KVM_XEN_HVM_SET_ATTR, &cache_deactivate_gfn);
+
+               if (xen_caps & KVM_XEN_HVM_CONFIG_SHARED_INFO_HVA) {
+                       __vm_ioctl(vm, KVM_XEN_HVM_SET_ATTR, &cache_activate_hva);
+                       pthread_testcancel();
+                       __vm_ioctl(vm, KVM_XEN_HVM_SET_ATTR, &cache_deactivate_hva);
+               }
        }
 
        return NULL;
@@ -442,6 +466,7 @@ int main(int argc, char *argv[])
        bool do_runstate_flag = !!(xen_caps & KVM_XEN_HVM_CONFIG_RUNSTATE_UPDATE_FLAG);
        bool do_eventfd_tests = !!(xen_caps & KVM_XEN_HVM_CONFIG_EVTCHN_2LEVEL);
        bool do_evtchn_tests = do_eventfd_tests && !!(xen_caps & KVM_XEN_HVM_CONFIG_EVTCHN_SEND);
+       bool has_shinfo_hva = !!(xen_caps & KVM_XEN_HVM_CONFIG_SHARED_INFO_HVA);
 
        clock_gettime(CLOCK_REALTIME, &min_ts);
 
@@ -452,7 +477,7 @@ int main(int argc, char *argv[])
                                    SHINFO_REGION_GPA, SHINFO_REGION_SLOT, 3, 0);
        virt_map(vm, SHINFO_REGION_GVA, SHINFO_REGION_GPA, 3);
 
-       struct shared_info *shinfo = addr_gpa2hva(vm, SHINFO_VADDR);
+       shinfo = addr_gpa2hva(vm, SHINFO_VADDR);
 
        int zero_fd = open("/dev/zero", O_RDONLY);
        TEST_ASSERT(zero_fd != -1, "Failed to open /dev/zero");
@@ -488,10 +513,16 @@ int main(int argc, char *argv[])
                            "Failed to read back RUNSTATE_UPDATE_FLAG attr");
        }
 
-       struct kvm_xen_hvm_attr ha = {
-               .type = KVM_XEN_ATTR_TYPE_SHARED_INFO,
-               .u.shared_info.gfn = SHINFO_REGION_GPA / PAGE_SIZE,
-       };
+       struct kvm_xen_hvm_attr ha = {};
+
+       if (has_shinfo_hva) {
+               ha.type = KVM_XEN_ATTR_TYPE_SHARED_INFO_HVA;
+               ha.u.shared_info.hva = (unsigned long)shinfo;
+       } else {
+               ha.type = KVM_XEN_ATTR_TYPE_SHARED_INFO;
+               ha.u.shared_info.gfn = SHINFO_ADDR / PAGE_SIZE;
+       }
+
        vm_ioctl(vm, KVM_XEN_HVM_SET_ATTR, &ha);
 
        /*
@@ -862,6 +893,16 @@ int main(int argc, char *argv[])
                                alarm(1);
                                break;
 
+                       case SET_VCPU_INFO:
+                               if (has_shinfo_hva) {
+                                       struct kvm_xen_vcpu_attr vih = {
+                                               .type = KVM_XEN_VCPU_ATTR_TYPE_VCPU_INFO_HVA,
+                                               .u.hva = (unsigned long)vinfo
+                                       };
+                                       vcpu_ioctl(vcpu, KVM_XEN_VCPU_SET_ATTR, &vih);
+                               }
+                               break;
+
                        case TEST_TIMER_PAST:
                                TEST_ASSERT(!evtchn_irq_expected,
                                            "Expected event channel IRQ but it didn't happen");
index 3df00867723910162835de67c464a05017ba32c4..18f585684e20253b19e4e7eee60cc5505f99f05f 100644 (file)
@@ -44,8 +44,6 @@
  */
 static size_t mfd_def_size = MFD_DEF_SIZE;
 static const char *memfd_str = MEMFD_STR;
-static int newpid_thread_fn2(void *arg);
-static void join_newpid_thread(pid_t pid);
 
 static ssize_t fd2name(int fd, char *buf, size_t bufsize)
 {
@@ -194,7 +192,6 @@ static unsigned int mfd_assert_get_seals(int fd)
 static void mfd_assert_has_seals(int fd, unsigned int seals)
 {
        char buf[PATH_MAX];
-       int nbytes;
        unsigned int s;
        fd2name(fd, buf, PATH_MAX);
 
@@ -696,7 +693,6 @@ static void mfd_assert_mode(int fd, int mode)
 {
        struct stat st;
        char buf[PATH_MAX];
-       int nbytes;
 
        fd2name(fd, buf, PATH_MAX);
 
@@ -715,7 +711,6 @@ static void mfd_assert_mode(int fd, int mode)
 static void mfd_assert_chmod(int fd, int mode)
 {
        char buf[PATH_MAX];
-       int nbytes;
 
        fd2name(fd, buf, PATH_MAX);
 
@@ -731,7 +726,6 @@ static void mfd_fail_chmod(int fd, int mode)
 {
        struct stat st;
        char buf[PATH_MAX];
-       int nbytes;
 
        fd2name(fd, buf, PATH_MAX);
 
@@ -1254,9 +1248,6 @@ static void test_sysctl_set_sysctl2(void)
 
 static int sysctl_simple_child(void *arg)
 {
-       int fd;
-       int pid;
-
        printf("%s sysctl 0\n", memfd_str);
        test_sysctl_set_sysctl0();
 
@@ -1321,7 +1312,6 @@ static void test_sysctl_sysctl2_failset(void)
 
 static int sysctl_nested_child(void *arg)
 {
-       int fd;
        int pid;
 
        printf("%s nested sysctl 0\n", memfd_str);
index 4ff10ea61461796385159ae8b6fd88ae183bbbcd..d26e962f2ac490300498ae6bcfdbd3ea637a152e 100644 (file)
@@ -46,3 +46,4 @@ gup_longterm
 mkdirty
 va_high_addr_switch
 hugetlb_fault_after_madv
+hugetlb_madv_vs_map
index 2453add65d12f839fe454e374c7d691122e8cad7..eb5f39a2668b44cfdb4520ee3d4afab790b5042c 100644 (file)
@@ -70,6 +70,7 @@ TEST_GEN_FILES += ksm_tests
 TEST_GEN_FILES += ksm_functional_tests
 TEST_GEN_FILES += mdwe_test
 TEST_GEN_FILES += hugetlb_fault_after_madv
+TEST_GEN_FILES += hugetlb_madv_vs_map
 
 ifneq ($(ARCH),arm64)
 TEST_GEN_FILES += soft-dirty
@@ -114,6 +115,11 @@ TEST_PROGS := run_vmtests.sh
 TEST_FILES := test_vmalloc.sh
 TEST_FILES += test_hmm.sh
 TEST_FILES += va_high_addr_switch.sh
+TEST_FILES += charge_reserved_hugetlb.sh
+TEST_FILES += hugetlb_reparenting_test.sh
+
+# required by charge_reserved_hugetlb.sh
+TEST_FILES += write_hugetlb_memory.sh
 
 include ../lib.mk
 
index e14bdd4455f2d2798077b8a701790bcee0732e90..d680c00d2853acf12cd7291ec356ed5c95917a8e 100755 (executable)
@@ -11,6 +11,8 @@ if [[ $(id -u) -ne 0 ]]; then
   exit $ksft_skip
 fi
 
+nr_hugepgs=$(cat /proc/sys/vm/nr_hugepages)
+
 fault_limit_file=limit_in_bytes
 reservation_limit_file=rsvd.limit_in_bytes
 fault_usage_file=usage_in_bytes
@@ -582,3 +584,5 @@ if [[ $do_umount ]]; then
   umount $cgroup_path
   rmdir $cgroup_path
 fi
+
+echo "$nr_hugepgs" > /proc/sys/vm/nr_hugepages
index 656afba02dbcc099a29ae3d9c8ccb64b0d16fdc6..533999b6c284448360948bf4471f6e8c5b1c5aa5 100644 (file)
@@ -95,21 +95,22 @@ int check_compaction(unsigned long mem_free, unsigned int hugepage_size)
 
        fd = open("/proc/sys/vm/nr_hugepages", O_RDWR | O_NONBLOCK);
        if (fd < 0) {
-               ksft_test_result_fail("Failed to open /proc/sys/vm/nr_hugepages: %s\n",
-                                     strerror(errno));
-               return -1;
+               ksft_print_msg("Failed to open /proc/sys/vm/nr_hugepages: %s\n",
+                              strerror(errno));
+               ret = -1;
+               goto out;
        }
 
        if (read(fd, initial_nr_hugepages, sizeof(initial_nr_hugepages)) <= 0) {
-               ksft_test_result_fail("Failed to read from /proc/sys/vm/nr_hugepages: %s\n",
-                                     strerror(errno));
+               ksft_print_msg("Failed to read from /proc/sys/vm/nr_hugepages: %s\n",
+                              strerror(errno));
                goto close_fd;
        }
 
        /* Start with the initial condition of 0 huge pages*/
        if (write(fd, "0", sizeof(char)) != sizeof(char)) {
-               ksft_test_result_fail("Failed to write 0 to /proc/sys/vm/nr_hugepages: %s\n",
-                                     strerror(errno));
+               ksft_print_msg("Failed to write 0 to /proc/sys/vm/nr_hugepages: %s\n",
+                              strerror(errno));
                goto close_fd;
        }
 
@@ -118,16 +119,16 @@ int check_compaction(unsigned long mem_free, unsigned int hugepage_size)
        /* Request a large number of huge pages. The Kernel will allocate
           as much as it can */
        if (write(fd, "100000", (6*sizeof(char))) != (6*sizeof(char))) {
-               ksft_test_result_fail("Failed to write 100000 to /proc/sys/vm/nr_hugepages: %s\n",
-                                     strerror(errno));
+               ksft_print_msg("Failed to write 100000 to /proc/sys/vm/nr_hugepages: %s\n",
+                              strerror(errno));
                goto close_fd;
        }
 
        lseek(fd, 0, SEEK_SET);
 
        if (read(fd, nr_hugepages, sizeof(nr_hugepages)) <= 0) {
-               ksft_test_result_fail("Failed to re-read from /proc/sys/vm/nr_hugepages: %s\n",
-                                     strerror(errno));
+               ksft_print_msg("Failed to re-read from /proc/sys/vm/nr_hugepages: %s\n",
+                              strerror(errno));
                goto close_fd;
        }
 
@@ -139,24 +140,26 @@ int check_compaction(unsigned long mem_free, unsigned int hugepage_size)
 
        if (write(fd, initial_nr_hugepages, strlen(initial_nr_hugepages))
            != strlen(initial_nr_hugepages)) {
-               ksft_test_result_fail("Failed to write value to /proc/sys/vm/nr_hugepages: %s\n",
-                                     strerror(errno));
+               ksft_print_msg("Failed to write value to /proc/sys/vm/nr_hugepages: %s\n",
+                              strerror(errno));
                goto close_fd;
        }
 
+       ksft_print_msg("Number of huge pages allocated = %d\n",
+                      atoi(nr_hugepages));
+
        if (compaction_index > 3) {
                ksft_print_msg("ERROR: Less that 1/%d of memory is available\n"
                               "as huge pages\n", compaction_index);
-               ksft_test_result_fail("No of huge pages allocated = %d\n", (atoi(nr_hugepages)));
                goto close_fd;
        }
 
-       ksft_test_result_pass("Memory compaction succeeded. No of huge pages allocated = %d\n",
-                             (atoi(nr_hugepages)));
        ret = 0;
 
  close_fd:
        close(fd);
+ out:
+       ksft_test_result(ret == 0, "check_compaction\n");
        return ret;
 }
 
@@ -174,7 +177,7 @@ int main(int argc, char **argv)
        ksft_print_header();
 
        if (prereq() || geteuid())
-               return ksft_exit_pass();
+               return ksft_exit_skip("Prerequisites unsatisfied\n");
 
        ksft_set_plan(1);
 
index f32d99565c5eaa96f6a16aafc45a10f118652026..e74107185324fb8dfd768cba25218ab469612fec 100644 (file)
@@ -19,6 +19,7 @@
 #include <sys/mman.h>
 #include <fcntl.h>
 #include "vm_util.h"
+#include "../kselftest.h"
 
 #define MIN_FREE_PAGES 20
 #define NR_HUGE_PAGES  10      /* common number of pages to map/allocate */
@@ -78,7 +79,7 @@ int main(int argc, char **argv)
        free_hugepages = get_free_hugepages();
        if (free_hugepages < MIN_FREE_PAGES) {
                printf("Not enough free huge pages to test, exiting!\n");
-               exit(1);
+               exit(KSFT_SKIP);
        }
 
        fd = memfd_create(argv[0], MFD_HUGETLB);
diff --git a/tools/testing/selftests/mm/hugetlb_madv_vs_map.c b/tools/testing/selftests/mm/hugetlb_madv_vs_map.c
new file mode 100644 (file)
index 0000000..d01e8d4
--- /dev/null
@@ -0,0 +1,124 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * A test case that must run on a system with one and only one huge page available.
+ *     # echo 1 > /sys/kernel/mm/hugepages/hugepages-2048kB/nr_hugepages
+ *
+ * During setup, the test allocates the only available page, and starts three threads:
+ *  - thread1:
+ *     * madvise(MADV_DONTNEED) on the allocated huge page
+ *  - thread 2:
+ *     * Write to the allocated huge page
+ *  - thread 3:
+ *     * Try to allocated an extra huge page (which must not available)
+ *
+ *  The test fails if thread3 is able to allocate a page.
+ *
+ *  Touching the first page after thread3's allocation will raise a SIGBUS
+ *
+ *  Author: Breno Leitao <leitao@debian.org>
+ */
+#include <pthread.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <sys/mman.h>
+#include <sys/types.h>
+#include <unistd.h>
+
+#include "vm_util.h"
+#include "../kselftest.h"
+
+#define MMAP_SIZE (1 << 21)
+#define INLOOP_ITER 100
+
+char *huge_ptr;
+
+/* Touch the memory while it is being madvised() */
+void *touch(void *unused)
+{
+       for (int i = 0; i < INLOOP_ITER; i++)
+               huge_ptr[0] = '.';
+
+       return NULL;
+}
+
+void *madv(void *unused)
+{
+       for (int i = 0; i < INLOOP_ITER; i++)
+               madvise(huge_ptr, MMAP_SIZE, MADV_DONTNEED);
+
+       return NULL;
+}
+
+/*
+ * We got here, and there must be no huge page available for mapping
+ * The other hugepage should be flipping from used <-> reserved, because
+ * of madvise(DONTNEED).
+ */
+void *map_extra(void *unused)
+{
+       void *ptr;
+
+       for (int i = 0; i < INLOOP_ITER; i++) {
+               ptr = mmap(NULL, MMAP_SIZE, PROT_READ | PROT_WRITE,
+                          MAP_PRIVATE | MAP_ANONYMOUS | MAP_HUGETLB,
+                          -1, 0);
+
+               if ((long)ptr != -1) {
+                       /* Touching the other page now will cause a SIGBUG
+                        * huge_ptr[0] = '1';
+                        */
+                       return ptr;
+               }
+       }
+
+       return NULL;
+}
+
+int main(void)
+{
+       pthread_t thread1, thread2, thread3;
+       unsigned long free_hugepages;
+       void *ret;
+
+       /*
+        * On kernel 6.7, we are able to reproduce the problem with ~10
+        * interactions
+        */
+       int max = 10;
+
+       free_hugepages = get_free_hugepages();
+
+       if (free_hugepages != 1) {
+               ksft_exit_skip("This test needs one and only one page to execute. Got %lu\n",
+                              free_hugepages);
+       }
+
+       while (max--) {
+               huge_ptr = mmap(NULL, MMAP_SIZE, PROT_READ | PROT_WRITE,
+                               MAP_PRIVATE | MAP_ANONYMOUS | MAP_HUGETLB,
+                               -1, 0);
+
+               if ((unsigned long)huge_ptr == -1) {
+                       ksft_exit_skip("Failed to allocated huge page\n");
+                       return KSFT_SKIP;
+               }
+
+               pthread_create(&thread1, NULL, madv, NULL);
+               pthread_create(&thread2, NULL, touch, NULL);
+               pthread_create(&thread3, NULL, map_extra, NULL);
+
+               pthread_join(thread1, NULL);
+               pthread_join(thread2, NULL);
+               pthread_join(thread3, &ret);
+
+               if (ret) {
+                       ksft_test_result_fail("Unexpected huge page allocation\n");
+                       return KSFT_FAIL;
+               }
+
+               /* Unmap and restart */
+               munmap(huge_ptr, MMAP_SIZE);
+       }
+
+       return KSFT_PASS;
+}
index 14d26075c8635f67cc5885092de359d5c322f38b..11f9bbe7dc222be7672204518f13406437be8af4 100755 (executable)
@@ -11,6 +11,7 @@ if [[ $(id -u) -ne 0 ]]; then
   exit $ksft_skip
 fi
 
+nr_hugepgs=$(cat /proc/sys/vm/nr_hugepages)
 usage_file=usage_in_bytes
 
 if [[ "$1" == "-cgroup-v2" ]]; then
@@ -248,5 +249,9 @@ cleanup
 
 echo ALL PASS
 
-umount $CGROUP_ROOT
-rm -rf $CGROUP_ROOT
+if [[ $do_umount ]]; then
+  umount $CGROUP_ROOT
+  rm -rf $CGROUP_ROOT
+fi
+
+echo "$nr_hugepgs" > /proc/sys/vm/nr_hugepages
index fbff0dd09191f15fcfa67de0917c67325935583f..d615767e396bec5f6f61fe9625c1f01008ee8939 100644 (file)
@@ -155,12 +155,12 @@ static char *mmap_and_merge_range(char val, unsigned long size, int prot,
        /* Stabilize accounting by disabling KSM completely. */
        if (ksm_unmerge()) {
                ksft_test_result_fail("Disabling (unmerging) KSM failed\n");
-               goto unmap;
+               return MAP_FAILED;
        }
 
        if (get_my_merging_pages() > 0) {
                ksft_test_result_fail("Still pages merged\n");
-               goto unmap;
+               return MAP_FAILED;
        }
 
        map = mmap(NULL, size, PROT_READ|PROT_WRITE,
index 598159f3df1f246a088b7c145ebee7648d62bf62..b74813fdc95143ec21d92d8c3192ee90f3dc2f9e 100644 (file)
@@ -12,6 +12,7 @@
 #include <stdio.h>
 #include <stdlib.h>
 #include <unistd.h>
+#include "../kselftest.h"
 
 static void dump_maps(void)
 {
@@ -28,15 +29,12 @@ static unsigned long find_base_addr(unsigned long size)
 
        flags = MAP_PRIVATE | MAP_ANONYMOUS;
        addr = mmap(NULL, size, PROT_NONE, flags, -1, 0);
-       if (addr == MAP_FAILED) {
-               printf("Error: couldn't map the space we need for the test\n");
-               return 0;
-       }
+       if (addr == MAP_FAILED)
+               ksft_exit_fail_msg("Error: couldn't map the space we need for the test\n");
+
+       if (munmap(addr, size) != 0)
+               ksft_exit_fail_msg("Error: munmap failed\n");
 
-       if (munmap(addr, size) != 0) {
-               printf("Error: couldn't map the space we need for the test\n");
-               return 0;
-       }
        return (unsigned long)addr;
 }
 
@@ -46,51 +44,39 @@ int main(void)
        unsigned long flags, addr, size, page_size;
        char *p;
 
+       ksft_print_header();
+       ksft_set_plan(9);
+
        page_size = sysconf(_SC_PAGE_SIZE);
 
-       //let's find a base addr that is free before we start the tests
+       /* let's find a base addr that is free before we start the tests */
        size = 5 * page_size;
        base_addr = find_base_addr(size);
-       if (!base_addr) {
-               printf("Error: couldn't map the space we need for the test\n");
-               return 1;
-       }
 
        flags = MAP_PRIVATE | MAP_ANONYMOUS | MAP_FIXED_NOREPLACE;
 
-       // Check we can map all the areas we need below
-       errno = 0;
+       /* Check we can map all the areas we need below */
        addr = base_addr;
        size = 5 * page_size;
        p = mmap((void *)addr, size, PROT_NONE, flags, -1, 0);
-
-       printf("mmap() @ 0x%lx-0x%lx p=%p result=%m\n", addr, addr + size, p);
-
        if (p == MAP_FAILED) {
                dump_maps();
-               printf("Error: couldn't map the space we need for the test\n");
-               return 1;
+               ksft_exit_fail_msg("Error: couldn't map the space we need for the test\n");
        }
-
-       errno = 0;
        if (munmap((void *)addr, 5 * page_size) != 0) {
                dump_maps();
-               printf("Error: munmap failed!?\n");
-               return 1;
+               ksft_exit_fail_msg("Error: munmap failed!?\n");
        }
-       printf("unmap() successful\n");
+       ksft_test_result_pass("mmap() @ 0x%lx-0x%lx p=%p result=%m\n", addr, addr + size, p);
 
-       errno = 0;
        addr = base_addr + page_size;
        size = 3 * page_size;
        p = mmap((void *)addr, size, PROT_NONE, flags, -1, 0);
-       printf("mmap() @ 0x%lx-0x%lx p=%p result=%m\n", addr, addr + size, p);
-
        if (p == MAP_FAILED) {
                dump_maps();
-               printf("Error: first mmap() failed unexpectedly\n");
-               return 1;
+               ksft_exit_fail_msg("Error: first mmap() failed unexpectedly\n");
        }
+       ksft_test_result_pass("mmap() @ 0x%lx-0x%lx p=%p result=%m\n", addr, addr + size, p);
 
        /*
         * Exact same mapping again:
@@ -100,17 +86,14 @@ int main(void)
         *     +3 | mapped | new
         *     +4 |  free  | new
         */
-       errno = 0;
        addr = base_addr;
        size = 5 * page_size;
        p = mmap((void *)addr, size, PROT_NONE, flags, -1, 0);
-       printf("mmap() @ 0x%lx-0x%lx p=%p result=%m\n", addr, addr + size, p);
-
        if (p != MAP_FAILED) {
                dump_maps();
-               printf("Error:1: mmap() succeeded when it shouldn't have\n");
-               return 1;
+               ksft_exit_fail_msg("Error:1: mmap() succeeded when it shouldn't have\n");
        }
+       ksft_test_result_pass("mmap() @ 0x%lx-0x%lx p=%p result=%m\n", addr, addr + size, p);
 
        /*
         * Second mapping contained within first:
@@ -121,17 +104,14 @@ int main(void)
         *     +3 | mapped |
         *     +4 |  free  |
         */
-       errno = 0;
        addr = base_addr + (2 * page_size);
        size = page_size;
        p = mmap((void *)addr, size, PROT_NONE, flags, -1, 0);
-       printf("mmap() @ 0x%lx-0x%lx p=%p result=%m\n", addr, addr + size, p);
-
        if (p != MAP_FAILED) {
                dump_maps();
-               printf("Error:2: mmap() succeeded when it shouldn't have\n");
-               return 1;
+               ksft_exit_fail_msg("Error:2: mmap() succeeded when it shouldn't have\n");
        }
+       ksft_test_result_pass("mmap() @ 0x%lx-0x%lx p=%p result=%m\n", addr, addr + size, p);
 
        /*
         * Overlap end of existing mapping:
@@ -141,17 +121,14 @@ int main(void)
         *     +3 | mapped | new
         *     +4 |  free  | new
         */
-       errno = 0;
        addr = base_addr + (3 * page_size);
        size = 2 * page_size;
        p = mmap((void *)addr, size, PROT_NONE, flags, -1, 0);
-       printf("mmap() @ 0x%lx-0x%lx p=%p result=%m\n", addr, addr + size, p);
-
        if (p != MAP_FAILED) {
                dump_maps();
-               printf("Error:3: mmap() succeeded when it shouldn't have\n");
-               return 1;
+               ksft_exit_fail_msg("Error:3: mmap() succeeded when it shouldn't have\n");
        }
+       ksft_test_result_pass("mmap() @ 0x%lx-0x%lx p=%p result=%m\n", addr, addr + size, p);
 
        /*
         * Overlap start of existing mapping:
@@ -161,17 +138,14 @@ int main(void)
         *     +3 | mapped |
         *     +4 |  free  |
         */
-       errno = 0;
        addr = base_addr;
        size = 2 * page_size;
        p = mmap((void *)addr, size, PROT_NONE, flags, -1, 0);
-       printf("mmap() @ 0x%lx-0x%lx p=%p result=%m\n", addr, addr + size, p);
-
        if (p != MAP_FAILED) {
                dump_maps();
-               printf("Error:4: mmap() succeeded when it shouldn't have\n");
-               return 1;
+               ksft_exit_fail_msg("Error:4: mmap() succeeded when it shouldn't have\n");
        }
+       ksft_test_result_pass("mmap() @ 0x%lx-0x%lx p=%p result=%m\n", addr, addr + size, p);
 
        /*
         * Adjacent to start of existing mapping:
@@ -181,17 +155,14 @@ int main(void)
         *     +3 | mapped |
         *     +4 |  free  |
         */
-       errno = 0;
        addr = base_addr;
        size = page_size;
        p = mmap((void *)addr, size, PROT_NONE, flags, -1, 0);
-       printf("mmap() @ 0x%lx-0x%lx p=%p result=%m\n", addr, addr + size, p);
-
        if (p == MAP_FAILED) {
                dump_maps();
-               printf("Error:5: mmap() failed when it shouldn't have\n");
-               return 1;
+               ksft_exit_fail_msg("Error:5: mmap() failed when it shouldn't have\n");
        }
+       ksft_test_result_pass("mmap() @ 0x%lx-0x%lx p=%p result=%m\n", addr, addr + size, p);
 
        /*
         * Adjacent to end of existing mapping:
@@ -201,27 +172,22 @@ int main(void)
         *     +3 | mapped |
         *     +4 |  free  |  new
         */
-       errno = 0;
        addr = base_addr + (4 * page_size);
        size = page_size;
        p = mmap((void *)addr, size, PROT_NONE, flags, -1, 0);
-       printf("mmap() @ 0x%lx-0x%lx p=%p result=%m\n", addr, addr + size, p);
-
        if (p == MAP_FAILED) {
                dump_maps();
-               printf("Error:6: mmap() failed when it shouldn't have\n");
-               return 1;
+               ksft_exit_fail_msg("Error:6: mmap() failed when it shouldn't have\n");
        }
+       ksft_test_result_pass("mmap() @ 0x%lx-0x%lx p=%p result=%m\n", addr, addr + size, p);
 
        addr = base_addr;
        size = 5 * page_size;
        if (munmap((void *)addr, size) != 0) {
                dump_maps();
-               printf("Error: munmap failed!?\n");
-               return 1;
+               ksft_exit_fail_msg("Error: munmap failed!?\n");
        }
-       printf("unmap() successful\n");
+       ksft_test_result_pass("Base Address unmap() successful\n");
 
-       printf("OK\n");
-       return 0;
+       ksft_finished();
 }
index 86e8f2048a409028b28ece3f755d06f535726c47..a1f005a90a4f0deb5633702d389d16b2a7c0d3bb 100644 (file)
@@ -16,6 +16,7 @@
 #include <sys/mman.h>
 #include <fcntl.h>
 #include "vm_util.h"
+#include "../kselftest.h"
 
 #define LENGTH (256UL*1024*1024)
 #define PROTECTION (PROT_READ | PROT_WRITE)
@@ -31,7 +32,7 @@
 
 static void check_bytes(char *addr)
 {
-       printf("First hex is %x\n", *((unsigned int *)addr));
+       ksft_print_msg("First hex is %x\n", *((unsigned int *)addr));
 }
 
 static void write_bytes(char *addr, size_t length)
@@ -42,23 +43,21 @@ static void write_bytes(char *addr, size_t length)
                *(addr + i) = (char)i;
 }
 
-static int read_bytes(char *addr, size_t length)
+static void read_bytes(char *addr, size_t length)
 {
        unsigned long i;
 
        check_bytes(addr);
        for (i = 0; i < length; i++)
-               if (*(addr + i) != (char)i) {
-                       printf("Mismatch at %lu\n", i);
-                       return 1;
-               }
-       return 0;
+               if (*(addr + i) != (char)i)
+                       ksft_exit_fail_msg("Mismatch at %lu\n", i);
+
+       ksft_test_result_pass("Read correct data\n");
 }
 
 int main(int argc, char **argv)
 {
        void *addr;
-       int ret;
        size_t hugepage_size;
        size_t length = LENGTH;
        int flags = FLAGS;
@@ -69,6 +68,9 @@ int main(int argc, char **argv)
        if (hugepage_size > length)
                length = hugepage_size;
 
+       ksft_print_header();
+       ksft_set_plan(1);
+
        if (argc > 1)
                length = atol(argv[1]) << 20;
        if (argc > 2) {
@@ -78,27 +80,23 @@ int main(int argc, char **argv)
        }
 
        if (shift)
-               printf("%u kB hugepages\n", 1 << (shift - 10));
+               ksft_print_msg("%u kB hugepages\n", 1 << (shift - 10));
        else
-               printf("Default size hugepages\n");
-       printf("Mapping %lu Mbytes\n", (unsigned long)length >> 20);
+               ksft_print_msg("Default size hugepages\n");
+       ksft_print_msg("Mapping %lu Mbytes\n", (unsigned long)length >> 20);
 
        addr = mmap(ADDR, length, PROTECTION, flags, -1, 0);
-       if (addr == MAP_FAILED) {
-               perror("mmap");
-               exit(1);
-       }
+       if (addr == MAP_FAILED)
+               ksft_exit_fail_msg("mmap: %s\n", strerror(errno));
 
-       printf("Returned address is %p\n", addr);
+       ksft_print_msg("Returned address is %p\n", addr);
        check_bytes(addr);
        write_bytes(addr, length);
-       ret = read_bytes(addr, length);
+       read_bytes(addr, length);
 
        /* munmap() length of MAP_HUGETLB memory must be hugepage aligned */
-       if (munmap(addr, length)) {
-               perror("munmap");
-               exit(1);
-       }
+       if (munmap(addr, length))
+               ksft_exit_fail_msg("munmap: %s\n", strerror(errno));
 
-       return ret;
+       ksft_finished();
 }
index 7945d07548751b00baf7a11d692e41cdcbf575f4..5c8a53869b1bd287b09a250edf628a66c25c2439 100644 (file)
 #include <stdlib.h>
 #include <string.h>
 #include <unistd.h>
+#include "../kselftest.h"
 
 #define MMAP_SZ                4096
 
-#define BUG_ON(condition, description)                                 \
-       do {                                                            \
-               if (condition) {                                        \
-                       fprintf(stderr, "[FAIL]\t%s:%d\t%s:%s\n", __func__, \
-                               __LINE__, (description), strerror(errno)); \
-                       exit(1);                                        \
-               }                                                       \
+#define BUG_ON(condition, description)                                         \
+       do {                                                                    \
+               if (condition)                                                  \
+                       ksft_exit_fail_msg("[FAIL]\t%s:%d\t%s:%s\n",            \
+                                          __func__, __LINE__, (description),   \
+                                          strerror(errno));                    \
        } while (0)
 
-static int parent_f(int sock, unsigned long *smap, int child)
+#define TESTS_IN_CHILD 2
+
+static void parent_f(int sock, unsigned long *smap, int child)
 {
        int status, ret;
 
@@ -43,9 +45,10 @@ static int parent_f(int sock, unsigned long *smap, int child)
        BUG_ON(ret <= 0, "write(sock)");
 
        waitpid(child, &status, 0);
-       BUG_ON(!WIFEXITED(status), "child in unexpected state");
 
-       return WEXITSTATUS(status);
+       /* The ksft macros don't keep counters between processes */
+       ksft_cnt.ksft_pass = WEXITSTATUS(status);
+       ksft_cnt.ksft_fail = TESTS_IN_CHILD - WEXITSTATUS(status);
 }
 
 static int child_f(int sock, unsigned long *smap, int fd)
@@ -64,10 +67,11 @@ static int child_f(int sock, unsigned long *smap, int fd)
        ret = read(sock, &buf, sizeof(int));
        BUG_ON(ret <= 0, "read(sock)");
 
-       BUG_ON(*smap == 0x22222BAD, "MAP_POPULATE didn't COW private page");
-       BUG_ON(*smap != 0xdeadbabe, "mapping was corrupted");
+       ksft_test_result(*smap != 0x22222BAD, "MAP_POPULATE COW private page\n");
+       ksft_test_result(*smap == 0xdeadbabe, "The mapping state\n");
 
-       return 0;
+       /* The ksft macros don't keep counters between processes */
+       return ksft_cnt.ksft_pass;
 }
 
 int main(int argc, char **argv)
@@ -76,6 +80,9 @@ int main(int argc, char **argv)
        FILE *ftmp;
        unsigned long *smap;
 
+       ksft_print_header();
+       ksft_set_plan(TESTS_IN_CHILD);
+
        ftmp = tmpfile();
        BUG_ON(!ftmp, "tmpfile()");
 
@@ -101,7 +108,9 @@ int main(int argc, char **argv)
                ret = close(sock[0]);
                BUG_ON(ret, "close()");
 
-               return parent_f(sock[1], smap, child);
+               parent_f(sock[1], smap, child);
+
+               ksft_finished();
        }
 
        ret = close(sock[1]);
index 1fba77df7f628eee87d7aa3b82cb928d0d084077..1cd80b0f76c33f04ef01f4dee6602f28b6a7c991 100644 (file)
@@ -13,6 +13,7 @@
 #include <sys/ipc.h>
 #include <sys/shm.h>
 #include <time.h>
+#include "../kselftest.h"
 #include "mlock2.h"
 
 #define CHUNK_UNIT (128 * 1024)
@@ -31,14 +32,14 @@ int set_cap_limits(rlim_t max)
        new.rlim_cur = max;
        new.rlim_max = max;
        if (setrlimit(RLIMIT_MEMLOCK, &new)) {
-               perror("setrlimit() returns error\n");
+               ksft_perror("setrlimit() returns error\n");
                return -1;
        }
 
        /* drop capabilities including CAP_IPC_LOCK */
        if (cap_set_proc(cap)) {
-               perror("cap_set_proc() returns error\n");
-               return -2;
+               ksft_perror("cap_set_proc() returns error\n");
+               return -1;
        }
 
        return 0;
@@ -52,27 +53,24 @@ int get_proc_locked_vm_size(void)
        unsigned long lock_size = 0;
 
        f = fopen("/proc/self/status", "r");
-       if (!f) {
-               perror("fopen");
-               return -1;
-       }
+       if (!f)
+               ksft_exit_fail_msg("fopen: %s\n", strerror(errno));
 
        while (fgets(line, 1024, f)) {
                if (strstr(line, "VmLck")) {
                        ret = sscanf(line, "VmLck:\t%8lu kB", &lock_size);
                        if (ret <= 0) {
-                               printf("sscanf() on VmLck error: %s: %d\n",
-                                               line, ret);
                                fclose(f);
-                               return -1;
+                               ksft_exit_fail_msg("sscanf() on VmLck error: %s: %d\n",
+                                                  line, ret);
                        }
                        fclose(f);
                        return (int)(lock_size << 10);
                }
        }
 
-       perror("cannot parse VmLck in /proc/self/status\n");
        fclose(f);
+       ksft_exit_fail_msg("cannot parse VmLck in /proc/self/status: %s\n", strerror(errno));
        return -1;
 }
 
@@ -91,10 +89,8 @@ int get_proc_page_size(unsigned long addr)
        size_t size;
 
        smaps = seek_to_smaps_entry(addr);
-       if (!smaps) {
-               printf("Unable to parse /proc/self/smaps\n");
-               return 0;
-       }
+       if (!smaps)
+               ksft_exit_fail_msg("Unable to parse /proc/self/smaps\n");
 
        while (getline(&line, &size, smaps) > 0) {
                if (!strstr(line, "MMUPageSize")) {
@@ -105,12 +101,9 @@ int get_proc_page_size(unsigned long addr)
                }
 
                /* found the MMUPageSize of this section */
-               if (sscanf(line, "MMUPageSize:    %8lu kB",
-                                       &mmupage_size) < 1) {
-                       printf("Unable to parse smaps entry for Size:%s\n",
-                                       line);
-                       break;
-               }
+               if (sscanf(line, "MMUPageSize:    %8lu kB", &mmupage_size) < 1)
+                       ksft_exit_fail_msg("Unable to parse smaps entry for Size:%s\n",
+                                          line);
 
        }
        free(line);
@@ -136,7 +129,7 @@ int get_proc_page_size(unsigned long addr)
  *    return value: 0 - success
  *    else: failure
  */
-int test_mlock_within_limit(char *p, int alloc_size)
+static void test_mlock_within_limit(char *p, int alloc_size)
 {
        int i;
        int ret = 0;
@@ -145,11 +138,9 @@ int test_mlock_within_limit(char *p, int alloc_size)
        int page_size = 0;
 
        getrlimit(RLIMIT_MEMLOCK, &cur);
-       if (cur.rlim_cur < alloc_size) {
-               printf("alloc_size[%d] < %u rlimit,lead to mlock failure\n",
-                               alloc_size, (unsigned int)cur.rlim_cur);
-               return -1;
-       }
+       if (cur.rlim_cur < alloc_size)
+               ksft_exit_fail_msg("alloc_size[%d] < %u rlimit,lead to mlock failure\n",
+                                  alloc_size, (unsigned int)cur.rlim_cur);
 
        srand(time(NULL));
        for (i = 0; i < TEST_LOOP; i++) {
@@ -169,13 +160,11 @@ int test_mlock_within_limit(char *p, int alloc_size)
                        ret = mlock2_(p + start_offset, lock_size,
                                       MLOCK_ONFAULT);
 
-               if (ret) {
-                       printf("%s() failure at |%p(%d)| mlock:|%p(%d)|\n",
-                                       is_mlock ? "mlock" : "mlock2",
-                                       p, alloc_size,
-                                       p + start_offset, lock_size);
-                       return ret;
-               }
+               if (ret)
+                       ksft_exit_fail_msg("%s() failure at |%p(%d)| mlock:|%p(%d)|\n",
+                                          is_mlock ? "mlock" : "mlock2",
+                                          p, alloc_size,
+                                          p + start_offset, lock_size);
        }
 
        /*
@@ -183,18 +172,12 @@ int test_mlock_within_limit(char *p, int alloc_size)
         */
        locked_vm_size = get_proc_locked_vm_size();
        page_size = get_proc_page_size((unsigned long)p);
-       if (page_size == 0) {
-               printf("cannot get proc MMUPageSize\n");
-               return -1;
-       }
 
-       if (locked_vm_size > PAGE_ALIGN(alloc_size, page_size) + page_size) {
-               printf("test_mlock_within_limit() left VmLck:%d on %d chunk\n",
-                               locked_vm_size, alloc_size);
-               return -1;
-       }
+       if (locked_vm_size > PAGE_ALIGN(alloc_size, page_size) + page_size)
+               ksft_exit_fail_msg("%s left VmLck:%d on %d chunk\n",
+                                  __func__, locked_vm_size, alloc_size);
 
-       return 0;
+       ksft_test_result_pass("%s\n", __func__);
 }
 
 
@@ -213,7 +196,7 @@ int test_mlock_within_limit(char *p, int alloc_size)
  *    return value: 0 - success
  *    else: failure
  */
-int test_mlock_outof_limit(char *p, int alloc_size)
+static void test_mlock_outof_limit(char *p, int alloc_size)
 {
        int i;
        int ret = 0;
@@ -221,11 +204,9 @@ int test_mlock_outof_limit(char *p, int alloc_size)
        struct rlimit cur;
 
        getrlimit(RLIMIT_MEMLOCK, &cur);
-       if (cur.rlim_cur >= alloc_size) {
-               printf("alloc_size[%d] >%u rlimit, violates test condition\n",
-                               alloc_size, (unsigned int)cur.rlim_cur);
-               return -1;
-       }
+       if (cur.rlim_cur >= alloc_size)
+               ksft_exit_fail_msg("alloc_size[%d] >%u rlimit, violates test condition\n",
+                                  alloc_size, (unsigned int)cur.rlim_cur);
 
        old_locked_vm_size = get_proc_locked_vm_size();
        srand(time(NULL));
@@ -240,56 +221,47 @@ int test_mlock_outof_limit(char *p, int alloc_size)
                else
                        ret = mlock2_(p + start_offset, lock_size,
                                        MLOCK_ONFAULT);
-               if (ret == 0) {
-                       printf("%s() succeeds? on %p(%d) mlock%p(%d)\n",
-                                       is_mlock ? "mlock" : "mlock2",
-                                       p, alloc_size,
-                                       p + start_offset, lock_size);
-                       return -1;
-               }
+               if (ret == 0)
+                       ksft_exit_fail_msg("%s() succeeds? on %p(%d) mlock%p(%d)\n",
+                                          is_mlock ? "mlock" : "mlock2",
+                                          p, alloc_size, p + start_offset, lock_size);
        }
 
        locked_vm_size = get_proc_locked_vm_size();
-       if (locked_vm_size != old_locked_vm_size) {
-               printf("tests leads to new mlocked page: old[%d], new[%d]\n",
-                               old_locked_vm_size,
-                               locked_vm_size);
-               return -1;
-       }
+       if (locked_vm_size != old_locked_vm_size)
+               ksft_exit_fail_msg("tests leads to new mlocked page: old[%d], new[%d]\n",
+                                  old_locked_vm_size,
+                                  locked_vm_size);
 
-       return 0;
+       ksft_test_result_pass("%s\n", __func__);
 }
 
 int main(int argc, char **argv)
 {
        char *p = NULL;
-       int ret = 0;
+
+       ksft_print_header();
 
        if (set_cap_limits(MLOCK_RLIMIT_SIZE))
-               return -1;
+               ksft_finished();
+
+       ksft_set_plan(2);
 
        p = malloc(MLOCK_WITHIN_LIMIT_SIZE);
-       if (p == NULL) {
-               perror("malloc() failure\n");
-               return -1;
-       }
-       ret = test_mlock_within_limit(p, MLOCK_WITHIN_LIMIT_SIZE);
-       if (ret)
-               return ret;
+       if (p == NULL)
+               ksft_exit_fail_msg("malloc() failure: %s\n", strerror(errno));
+
+       test_mlock_within_limit(p, MLOCK_WITHIN_LIMIT_SIZE);
        munlock(p, MLOCK_WITHIN_LIMIT_SIZE);
        free(p);
 
-
        p = malloc(MLOCK_OUTOF_LIMIT_SIZE);
-       if (p == NULL) {
-               perror("malloc() failure\n");
-               return -1;
-       }
-       ret = test_mlock_outof_limit(p, MLOCK_OUTOF_LIMIT_SIZE);
-       if (ret)
-               return ret;
+       if (p == NULL)
+               ksft_exit_fail_msg("malloc() failure: %s\n", strerror(errno));
+
+       test_mlock_outof_limit(p, MLOCK_OUTOF_LIMIT_SIZE);
        munlock(p, MLOCK_OUTOF_LIMIT_SIZE);
        free(p);
 
-       return 0;
+       ksft_finished();
 }
index 80cddc0de2061024db2138e43206f75721d9524a..26f744188ad0c8c0535ec8ecc4e903f0bd1b86c5 100644 (file)
@@ -7,9 +7,8 @@
 #include <sys/time.h>
 #include <sys/resource.h>
 #include <stdbool.h>
-#include "mlock2.h"
-
 #include "../kselftest.h"
+#include "mlock2.h"
 
 struct vm_boundaries {
        unsigned long start;
@@ -40,14 +39,14 @@ static int get_vm_area(unsigned long addr, struct vm_boundaries *area)
        while(fgets(line, 1024, file)) {
                end_addr = strchr(line, '-');
                if (!end_addr) {
-                       printf("cannot parse /proc/self/maps\n");
+                       ksft_print_msg("cannot parse /proc/self/maps\n");
                        goto out;
                }
                *end_addr = '\0';
                end_addr++;
                stop = strchr(end_addr, ' ');
                if (!stop) {
-                       printf("cannot parse /proc/self/maps\n");
+                       ksft_print_msg("cannot parse /proc/self/maps\n");
                        goto out;
                }
 
@@ -78,7 +77,7 @@ static bool is_vmflag_set(unsigned long addr, const char *vmflag)
 
        smaps = seek_to_smaps_entry(addr);
        if (!smaps) {
-               printf("Unable to parse /proc/self/smaps\n");
+               ksft_print_msg("Unable to parse /proc/self/smaps\n");
                goto out;
        }
 
@@ -115,7 +114,7 @@ static unsigned long get_value_for_name(unsigned long addr, const char *name)
 
        smaps = seek_to_smaps_entry(addr);
        if (!smaps) {
-               printf("Unable to parse /proc/self/smaps\n");
+               ksft_print_msg("Unable to parse /proc/self/smaps\n");
                goto out;
        }
 
@@ -129,7 +128,7 @@ static unsigned long get_value_for_name(unsigned long addr, const char *name)
 
                value_ptr = line + strlen(name);
                if (sscanf(value_ptr, "%lu kB", &value) < 1) {
-                       printf("Unable to parse smaps entry for Size\n");
+                       ksft_print_msg("Unable to parse smaps entry for Size\n");
                        goto out;
                }
                break;
@@ -180,57 +179,45 @@ static int lock_check(unsigned long addr)
 static int unlock_lock_check(char *map)
 {
        if (is_vmflag_set((unsigned long)map, LOCKED)) {
-               printf("VMA flag %s is present on page 1 after unlock\n", LOCKED);
+               ksft_print_msg("VMA flag %s is present on page 1 after unlock\n", LOCKED);
                return 1;
        }
 
        return 0;
 }
 
-static int test_mlock_lock()
+static void test_mlock_lock(void)
 {
        char *map;
-       int ret = 1;
        unsigned long page_size = getpagesize();
 
        map = mmap(NULL, 2 * page_size, PROT_READ | PROT_WRITE,
                   MAP_ANONYMOUS | MAP_PRIVATE, -1, 0);
-       if (map == MAP_FAILED) {
-               perror("test_mlock_locked mmap");
-               goto out;
-       }
+       if (map == MAP_FAILED)
+               ksft_exit_fail_msg("mmap error: %s", strerror(errno));
 
        if (mlock2_(map, 2 * page_size, 0)) {
-               if (errno == ENOSYS) {
-                       printf("Cannot call new mlock family, skipping test\n");
-                       _exit(KSFT_SKIP);
-               }
-               perror("mlock2(0)");
-               goto unmap;
+               munmap(map, 2 * page_size);
+               ksft_exit_fail_msg("mlock2(0): %s\n", strerror(errno));
        }
 
-       if (!lock_check((unsigned long)map))
-               goto unmap;
+       ksft_test_result(lock_check((unsigned long)map), "%s: Locked\n", __func__);
 
        /* Now unlock and recheck attributes */
        if (munlock(map, 2 * page_size)) {
-               perror("munlock()");
-               goto unmap;
+               munmap(map, 2 * page_size);
+               ksft_exit_fail_msg("munlock(): %s\n", strerror(errno));
        }
 
-       ret = unlock_lock_check(map);
-
-unmap:
+       ksft_test_result(!unlock_lock_check(map), "%s: Locked\n", __func__);
        munmap(map, 2 * page_size);
-out:
-       return ret;
 }
 
 static int onfault_check(char *map)
 {
        *map = 'a';
        if (!is_vma_lock_on_fault((unsigned long)map)) {
-               printf("VMA is not marked for lock on fault\n");
+               ksft_print_msg("VMA is not marked for lock on fault\n");
                return 1;
        }
 
@@ -243,172 +230,131 @@ static int unlock_onfault_check(char *map)
 
        if (is_vma_lock_on_fault((unsigned long)map) ||
            is_vma_lock_on_fault((unsigned long)map + page_size)) {
-               printf("VMA is still lock on fault after unlock\n");
+               ksft_print_msg("VMA is still lock on fault after unlock\n");
                return 1;
        }
 
        return 0;
 }
 
-static int test_mlock_onfault()
+static void test_mlock_onfault(void)
 {
        char *map;
-       int ret = 1;
        unsigned long page_size = getpagesize();
 
        map = mmap(NULL, 2 * page_size, PROT_READ | PROT_WRITE,
                   MAP_ANONYMOUS | MAP_PRIVATE, -1, 0);
-       if (map == MAP_FAILED) {
-               perror("test_mlock_locked mmap");
-               goto out;
-       }
+       if (map == MAP_FAILED)
+               ksft_exit_fail_msg("mmap error: %s", strerror(errno));
 
        if (mlock2_(map, 2 * page_size, MLOCK_ONFAULT)) {
-               if (errno == ENOSYS) {
-                       printf("Cannot call new mlock family, skipping test\n");
-                       _exit(KSFT_SKIP);
-               }
-               perror("mlock2(MLOCK_ONFAULT)");
-               goto unmap;
+               munmap(map, 2 * page_size);
+               ksft_exit_fail_msg("mlock2(MLOCK_ONFAULT): %s\n", strerror(errno));
        }
 
-       if (onfault_check(map))
-               goto unmap;
+       ksft_test_result(!onfault_check(map), "%s: VMA marked for lock on fault\n", __func__);
 
        /* Now unlock and recheck attributes */
        if (munlock(map, 2 * page_size)) {
-               if (errno == ENOSYS) {
-                       printf("Cannot call new mlock family, skipping test\n");
-                       _exit(KSFT_SKIP);
-               }
-               perror("munlock()");
-               goto unmap;
+               munmap(map, 2 * page_size);
+               ksft_exit_fail_msg("munlock(): %s\n", strerror(errno));
        }
 
-       ret = unlock_onfault_check(map);
-unmap:
+       ksft_test_result(!unlock_onfault_check(map), "VMA open lock after fault\n");
        munmap(map, 2 * page_size);
-out:
-       return ret;
 }
 
-static int test_lock_onfault_of_present()
+static void test_lock_onfault_of_present(void)
 {
        char *map;
-       int ret = 1;
        unsigned long page_size = getpagesize();
 
        map = mmap(NULL, 2 * page_size, PROT_READ | PROT_WRITE,
                   MAP_ANONYMOUS | MAP_PRIVATE, -1, 0);
-       if (map == MAP_FAILED) {
-               perror("test_mlock_locked mmap");
-               goto out;
-       }
+       if (map == MAP_FAILED)
+               ksft_exit_fail_msg("mmap error: %s", strerror(errno));
 
        *map = 'a';
 
        if (mlock2_(map, 2 * page_size, MLOCK_ONFAULT)) {
-               if (errno == ENOSYS) {
-                       printf("Cannot call new mlock family, skipping test\n");
-                       _exit(KSFT_SKIP);
-               }
-               perror("mlock2(MLOCK_ONFAULT)");
-               goto unmap;
+               munmap(map, 2 * page_size);
+               ksft_test_result_fail("mlock2(MLOCK_ONFAULT) error: %s", strerror(errno));
        }
 
-       if (!is_vma_lock_on_fault((unsigned long)map) ||
-           !is_vma_lock_on_fault((unsigned long)map + page_size)) {
-               printf("VMA with present pages is not marked lock on fault\n");
-               goto unmap;
-       }
-       ret = 0;
-unmap:
+       ksft_test_result(is_vma_lock_on_fault((unsigned long)map) ||
+                        is_vma_lock_on_fault((unsigned long)map + page_size),
+                        "VMA with present pages is not marked lock on fault\n");
        munmap(map, 2 * page_size);
-out:
-       return ret;
 }
 
-static int test_munlockall()
+static void test_munlockall0(void)
 {
        char *map;
-       int ret = 1;
        unsigned long page_size = getpagesize();
 
        map = mmap(NULL, 2 * page_size, PROT_READ | PROT_WRITE,
                   MAP_ANONYMOUS | MAP_PRIVATE, -1, 0);
-
-       if (map == MAP_FAILED) {
-               perror("test_munlockall mmap");
-               goto out;
-       }
+       if (map == MAP_FAILED)
+               ksft_exit_fail_msg("mmap error: %s\n", strerror(errno));
 
        if (mlockall(MCL_CURRENT)) {
-               perror("mlockall(MCL_CURRENT)");
-               goto out;
+               munmap(map, 2 * page_size);
+               ksft_exit_fail_msg("mlockall(MCL_CURRENT): %s\n", strerror(errno));
        }
 
-       if (!lock_check((unsigned long)map))
-               goto unmap;
+       ksft_test_result(lock_check((unsigned long)map), "%s: Locked memory area\n", __func__);
 
        if (munlockall()) {
-               perror("munlockall()");
-               goto unmap;
+               munmap(map, 2 * page_size);
+               ksft_exit_fail_msg("munlockall(): %s\n", strerror(errno));
        }
 
-       if (unlock_lock_check(map))
-               goto unmap;
-
+       ksft_test_result(!unlock_lock_check(map), "%s: No locked memory\n", __func__);
        munmap(map, 2 * page_size);
+}
+
+static void test_munlockall1(void)
+{
+       char *map;
+       unsigned long page_size = getpagesize();
 
        map = mmap(NULL, 2 * page_size, PROT_READ | PROT_WRITE,
                   MAP_ANONYMOUS | MAP_PRIVATE, -1, 0);
-
-       if (map == MAP_FAILED) {
-               perror("test_munlockall second mmap");
-               goto out;
-       }
+       if (map == MAP_FAILED)
+               ksft_exit_fail_msg("mmap error: %s", strerror(errno));
 
        if (mlockall(MCL_CURRENT | MCL_ONFAULT)) {
-               perror("mlockall(MCL_CURRENT | MCL_ONFAULT)");
-               goto unmap;
+               munmap(map, 2 * page_size);
+               ksft_exit_fail_msg("mlockall(MCL_CURRENT | MCL_ONFAULT): %s\n", strerror(errno));
        }
 
-       if (onfault_check(map))
-               goto unmap;
+       ksft_test_result(!onfault_check(map), "%s: VMA marked for lock on fault\n", __func__);
 
        if (munlockall()) {
-               perror("munlockall()");
-               goto unmap;
+               munmap(map, 2 * page_size);
+               ksft_exit_fail_msg("munlockall(): %s\n", strerror(errno));
        }
 
-       if (unlock_onfault_check(map))
-               goto unmap;
+       ksft_test_result(!unlock_onfault_check(map), "%s: Unlocked\n", __func__);
 
        if (mlockall(MCL_CURRENT | MCL_FUTURE)) {
-               perror("mlockall(MCL_CURRENT | MCL_FUTURE)");
-               goto out;
+               munmap(map, 2 * page_size);
+               ksft_exit_fail_msg("mlockall(MCL_CURRENT | MCL_FUTURE): %s\n", strerror(errno));
        }
 
-       if (!lock_check((unsigned long)map))
-               goto unmap;
+       ksft_test_result(lock_check((unsigned long)map), "%s: Locked\n", __func__);
 
        if (munlockall()) {
-               perror("munlockall()");
-               goto unmap;
+               munmap(map, 2 * page_size);
+               ksft_exit_fail_msg("munlockall() %s\n", strerror(errno));
        }
 
-       ret = unlock_lock_check(map);
-
-unmap:
+       ksft_test_result(!unlock_lock_check(map), "%s: No locked memory\n", __func__);
        munmap(map, 2 * page_size);
-out:
-       munlockall();
-       return ret;
 }
 
-static int test_vma_management(bool call_mlock)
+static void test_vma_management(bool call_mlock)
 {
-       int ret = 1;
        void *map;
        unsigned long page_size = getpagesize();
        struct vm_boundaries page1;
@@ -417,25 +363,19 @@ static int test_vma_management(bool call_mlock)
 
        map = mmap(NULL, 3 * page_size, PROT_READ | PROT_WRITE,
                   MAP_ANONYMOUS | MAP_PRIVATE, -1, 0);
-       if (map == MAP_FAILED) {
-               perror("mmap()");
-               return ret;
-       }
+       if (map == MAP_FAILED)
+               ksft_exit_fail_msg("mmap error: %s", strerror(errno));
 
        if (call_mlock && mlock2_(map, 3 * page_size, MLOCK_ONFAULT)) {
-               if (errno == ENOSYS) {
-                       printf("Cannot call new mlock family, skipping test\n");
-                       _exit(KSFT_SKIP);
-               }
-               perror("mlock(ONFAULT)\n");
-               goto out;
+               munmap(map, 3 * page_size);
+               ksft_test_result_fail("mlock error: %s", strerror(errno));
        }
 
        if (get_vm_area((unsigned long)map, &page1) ||
            get_vm_area((unsigned long)map + page_size, &page2) ||
            get_vm_area((unsigned long)map + page_size * 2, &page3)) {
-               printf("couldn't find mapping in /proc/self/maps\n");
-               goto out;
+               munmap(map, 3 * page_size);
+               ksft_test_result_fail("couldn't find mapping in /proc/self/maps");
        }
 
        /*
@@ -444,76 +384,86 @@ static int test_vma_management(bool call_mlock)
         * not a failure)
         */
        if (page1.start != page2.start || page2.start != page3.start) {
-               printf("VMAs are not merged to start, aborting test\n");
-               ret = 0;
-               goto out;
+               munmap(map, 3 * page_size);
+               ksft_test_result_fail("VMAs are not merged to start, aborting test");
        }
 
        if (munlock(map + page_size, page_size)) {
-               perror("munlock()");
-               goto out;
+               munmap(map, 3 * page_size);
+               ksft_test_result_fail("munlock(): %s", strerror(errno));
        }
 
        if (get_vm_area((unsigned long)map, &page1) ||
            get_vm_area((unsigned long)map + page_size, &page2) ||
            get_vm_area((unsigned long)map + page_size * 2, &page3)) {
-               printf("couldn't find mapping in /proc/self/maps\n");
-               goto out;
+               munmap(map, 3 * page_size);
+               ksft_test_result_fail("couldn't find mapping in /proc/self/maps");
        }
 
        /* All three VMAs should be different */
        if (page1.start == page2.start || page2.start == page3.start) {
-               printf("failed to split VMA for munlock\n");
-               goto out;
+               munmap(map, 3 * page_size);
+               ksft_test_result_fail("failed to split VMA for munlock");
        }
 
        /* Now unlock the first and third page and check the VMAs again */
        if (munlock(map, page_size * 3)) {
-               perror("munlock()");
-               goto out;
+               munmap(map, 3 * page_size);
+               ksft_test_result_fail("munlock(): %s", strerror(errno));
        }
 
        if (get_vm_area((unsigned long)map, &page1) ||
            get_vm_area((unsigned long)map + page_size, &page2) ||
            get_vm_area((unsigned long)map + page_size * 2, &page3)) {
-               printf("couldn't find mapping in /proc/self/maps\n");
-               goto out;
+               munmap(map, 3 * page_size);
+               ksft_test_result_fail("couldn't find mapping in /proc/self/maps");
        }
 
        /* Now all three VMAs should be the same */
        if (page1.start != page2.start || page2.start != page3.start) {
-               printf("failed to merge VMAs after munlock\n");
-               goto out;
+               munmap(map, 3 * page_size);
+               ksft_test_result_fail("failed to merge VMAs after munlock");
        }
 
-       ret = 0;
-out:
+       ksft_test_result_pass("%s call_mlock %d\n", __func__, call_mlock);
        munmap(map, 3 * page_size);
-       return ret;
 }
 
-static int test_mlockall(int (test_function)(bool call_mlock))
+static void test_mlockall(void)
 {
-       int ret = 1;
+       if (mlockall(MCL_CURRENT | MCL_ONFAULT | MCL_FUTURE))
+               ksft_exit_fail_msg("mlockall failed: %s\n", strerror(errno));
 
-       if (mlockall(MCL_CURRENT | MCL_ONFAULT | MCL_FUTURE)) {
-               perror("mlockall");
-               return ret;
-       }
-
-       ret = test_function(false);
+       test_vma_management(false);
        munlockall();
-       return ret;
 }
 
 int main(int argc, char **argv)
 {
-       int ret = 0;
-       ret += test_mlock_lock();
-       ret += test_mlock_onfault();
-       ret += test_munlockall();
-       ret += test_lock_onfault_of_present();
-       ret += test_vma_management(true);
-       ret += test_mlockall(test_vma_management);
-       return ret;
+       int ret, size = 3 * getpagesize();
+       void *map;
+
+       ksft_print_header();
+
+       map = mmap(NULL, size, PROT_READ | PROT_WRITE, MAP_ANONYMOUS | MAP_PRIVATE, -1, 0);
+       if (map == MAP_FAILED)
+               ksft_exit_fail_msg("mmap error: %s", strerror(errno));
+
+       ret = mlock2_(map, size, MLOCK_ONFAULT);
+       if (ret && errno == ENOSYS)
+               ksft_finished();
+
+       munmap(map, size);
+
+       ksft_set_plan(13);
+
+       test_mlock_lock();
+       test_mlock_onfault();
+       test_munlockall0();
+       test_munlockall1();
+       test_lock_onfault_of_present();
+       test_vma_management(true);
+       test_mlockall();
+
+       ksft_finished();
 }
index 8e02991b313c8e574a38dce9f12de9b616779a34..4417eaa5cfb78ba2bb0f51d3418c9b768ff0fe90 100644 (file)
@@ -6,12 +6,7 @@
 
 static int mlock2_(void *start, size_t len, int flags)
 {
-#ifdef __NR_mlock2
        return syscall(__NR_mlock2, start, len, flags);
-#else
-       errno = ENOSYS;
-       return -1;
-#endif
 }
 
 static FILE *seek_to_smaps_entry(unsigned long addr)
@@ -27,10 +22,8 @@ static FILE *seek_to_smaps_entry(unsigned long addr)
        char path[BUFSIZ];
 
        file = fopen("/proc/self/smaps", "r");
-       if (!file) {
-               perror("fopen smaps");
-               _exit(1);
-       }
+       if (!file)
+               ksft_exit_fail_msg("fopen smaps: %s\n", strerror(errno));
 
        while (getline(&line, &size, file) > 0) {
                if (sscanf(line, "%lx-%lx %s %lx %s %lu %s\n",
index d822004a374e9d5d5223d334172129140a21511a..100370a7111df58eae8c188170f64f12ad6e7bb6 100644 (file)
@@ -26,19 +26,15 @@ static int alloc_noexit(unsigned long nr_pages, int pipefd)
 
        buf = (char *)mmap(NULL, nr_pages * psize(), PROT_READ | PROT_WRITE,
                           MAP_PRIVATE | MAP_ANON, 0, 0);
-       if (buf == MAP_FAILED) {
-               perror("mmap failed, halting the test");
-               return KSFT_FAIL;
-       }
+       if (buf == MAP_FAILED)
+               ksft_exit_fail_msg("mmap failed, halting the test: %s\n", strerror(errno));
 
        for (i = 0; i < nr_pages; i++)
                *((unsigned long *)(buf + (i * psize()))) = i;
 
        /* Signal the parent that the child is ready */
-       if (write(pipefd, "", 1) < 0) {
-               perror("write");
-               return KSFT_FAIL;
-       }
+       if (write(pipefd, "", 1) < 0)
+               ksft_exit_fail_msg("write: %s\n", strerror(errno));
 
        /* Wait to be killed (when reparenting happens) */
        while (getppid() == ppid && timeout > 0) {
@@ -54,23 +50,17 @@ static int alloc_noexit(unsigned long nr_pages, int pipefd)
 /* The process_mrelease calls in this test are expected to fail */
 static void run_negative_tests(int pidfd)
 {
-       int res;
        /* Test invalid flags. Expect to fail with EINVAL error code. */
        if (!syscall(__NR_process_mrelease, pidfd, (unsigned int)-1) ||
                        errno != EINVAL) {
-               res = (errno == ENOSYS ? KSFT_SKIP : KSFT_FAIL);
-               perror("process_mrelease with wrong flags");
-               exit(res);
+               ksft_exit_fail_msg("process_mrelease with wrong flags: %s\n", strerror(errno));
        }
        /*
         * Test reaping while process is alive with no pending SIGKILL.
         * Expect to fail with EINVAL error code.
         */
-       if (!syscall(__NR_process_mrelease, pidfd, 0) || errno != EINVAL) {
-               res = (errno == ENOSYS ? KSFT_SKIP : KSFT_FAIL);
-               perror("process_mrelease on a live process");
-               exit(res);
-       }
+       if (!syscall(__NR_process_mrelease, pidfd, 0) || errno != EINVAL)
+               ksft_exit_fail_msg("process_mrelease on a live process: %s\n", strerror(errno));
 }
 
 static int child_main(int pipefd[], size_t size)
@@ -93,11 +83,18 @@ int main(void)
        char byte;
        int res;
 
+       ksft_print_header();
+       ksft_set_plan(1);
+
        /* Test a wrong pidfd */
        if (!syscall(__NR_process_mrelease, -1, 0) || errno != EBADF) {
-               res = (errno == ENOSYS ? KSFT_SKIP : KSFT_FAIL);
-               perror("process_mrelease with wrong pidfd");
-               exit(res);
+               if (errno == ENOSYS) {
+                       ksft_test_result_skip("process_mrelease not implemented\n");
+                       ksft_finished();
+               } else {
+                       ksft_exit_fail_msg("process_mrelease with wrong pidfd: %s",
+                                          strerror(errno));
+               }
        }
 
        /* Start the test with 1MB child memory allocation */
@@ -107,16 +104,14 @@ retry:
         * Pipe for the child to signal when it's done allocating
         * memory
         */
-       if (pipe(pipefd)) {
-               perror("pipe");
-               exit(KSFT_FAIL);
-       }
+       if (pipe(pipefd))
+               ksft_exit_fail_msg("pipe: %s\n", strerror(errno));
+
        pid = fork();
        if (pid < 0) {
-               perror("fork");
                close(pipefd[0]);
                close(pipefd[1]);
-               exit(KSFT_FAIL);
+               ksft_exit_fail_msg("fork: %s\n", strerror(errno));
        }
 
        if (pid == 0) {
@@ -134,28 +129,23 @@ retry:
        res = read(pipefd[0], &byte, 1);
        close(pipefd[0]);
        if (res < 0) {
-               perror("read");
                if (!kill(pid, SIGKILL))
                        waitpid(pid, NULL, 0);
-               exit(KSFT_FAIL);
+               ksft_exit_fail_msg("read: %s\n", strerror(errno));
        }
 
        pidfd = syscall(__NR_pidfd_open, pid, 0);
        if (pidfd < 0) {
-               perror("pidfd_open");
                if (!kill(pid, SIGKILL))
                        waitpid(pid, NULL, 0);
-               exit(KSFT_FAIL);
+               ksft_exit_fail_msg("pidfd_open: %s\n", strerror(errno));
        }
 
        /* Run negative tests which require a live child */
        run_negative_tests(pidfd);
 
-       if (kill(pid, SIGKILL)) {
-               res = (errno == ENOSYS ? KSFT_SKIP : KSFT_FAIL);
-               perror("kill");
-               exit(res);
-       }
+       if (kill(pid, SIGKILL))
+               ksft_exit_fail_msg("kill: %s\n", strerror(errno));
 
        success = (syscall(__NR_process_mrelease, pidfd, 0) == 0);
        if (!success) {
@@ -169,18 +159,15 @@ retry:
                if (errno == ESRCH) {
                        retry = (size <= MAX_SIZE_MB);
                } else {
-                       res = (errno == ENOSYS ? KSFT_SKIP : KSFT_FAIL);
-                       perror("process_mrelease");
                        waitpid(pid, NULL, 0);
-                       exit(res);
+                       ksft_exit_fail_msg("process_mrelease: %s\n", strerror(errno));
                }
        }
 
        /* Cleanup to prevent zombies */
-       if (waitpid(pid, NULL, 0) < 0) {
-               perror("waitpid");
-               exit(KSFT_FAIL);
-       }
+       if (waitpid(pid, NULL, 0) < 0)
+               ksft_exit_fail_msg("waitpid: %s\n", strerror(errno));
+
        close(pidfd);
 
        if (!success) {
@@ -188,11 +175,10 @@ retry:
                        size *= 2;
                        goto retry;
                }
-               printf("All process_mrelease attempts failed!\n");
-               exit(KSFT_FAIL);
+               ksft_exit_fail_msg("All process_mrelease attempts failed!\n");
        }
 
-       printf("Success reaping a child with %zuMB of memory allocations\n",
-              size);
-       return KSFT_PASS;
+       ksft_test_result_pass("Success reaping a child with %zuMB of memory allocations\n",
+                             size);
+       ksft_finished();
 }
index a06e73ec856823a9899f7a9234db1aae52d1375f..1d75084b9ca56ba0b3101b295a3f7338eca48c6f 100644 (file)
@@ -27,14 +27,14 @@ static void dump_maps(void)
        system(cmd);
 }
 
-#define BUG_ON(condition, description)                                       \
-       do {                                                                  \
-               if (condition) {                                              \
-                       fprintf(stderr, "[FAIL]\t%s():%d\t%s:%s\n", __func__, \
-                               __LINE__, (description), strerror(errno));    \
-                       dump_maps();                                      \
-                       exit(1);                                              \
-               }                                                             \
+#define BUG_ON(condition, description)                                         \
+       do {                                                                    \
+               if (condition) {                                                \
+                       dump_maps();                                            \
+                       ksft_exit_fail_msg("[FAIL]\t%s:%d\t%s:%s\n",            \
+                                          __func__, __LINE__, (description),   \
+                                          strerror(errno));                    \
+               }                                                               \
        } while (0)
 
 // Try a simple operation for to "test" for kernel support this prevents
@@ -122,6 +122,7 @@ static void mremap_dontunmap_simple()
               "unable to unmap destination mapping");
        BUG_ON(munmap(source_mapping, num_pages * page_size) == -1,
               "unable to unmap source mapping");
+       ksft_test_result_pass("%s\n", __func__);
 }
 
 // This test validates that MREMAP_DONTUNMAP on a shared mapping works as expected.
@@ -173,6 +174,7 @@ static void mremap_dontunmap_simple_shmem()
               "unable to unmap destination mapping");
        BUG_ON(munmap(source_mapping, num_pages * page_size) == -1,
               "unable to unmap source mapping");
+       ksft_test_result_pass("%s\n", __func__);
 }
 
 // This test validates MREMAP_DONTUNMAP will move page tables to a specific
@@ -219,6 +221,7 @@ static void mremap_dontunmap_simple_fixed()
               "unable to unmap destination mapping");
        BUG_ON(munmap(source_mapping, num_pages * page_size) == -1,
               "unable to unmap source mapping");
+       ksft_test_result_pass("%s\n", __func__);
 }
 
 // This test validates that we can MREMAP_DONTUNMAP for a portion of an
@@ -269,6 +272,7 @@ static void mremap_dontunmap_partial_mapping()
               "unable to unmap destination mapping");
        BUG_ON(munmap(source_mapping, num_pages * page_size) == -1,
               "unable to unmap source mapping");
+       ksft_test_result_pass("%s\n", __func__);
 }
 
 // This test validates that we can remap over only a portion of a mapping.
@@ -328,19 +332,24 @@ static void mremap_dontunmap_partial_mapping_overwrite(void)
               "unable to unmap destination mapping");
        BUG_ON(munmap(source_mapping, 5 * page_size) == -1,
               "unable to unmap source mapping");
+       ksft_test_result_pass("%s\n", __func__);
 }
 
 int main(void)
 {
+       ksft_print_header();
+
        page_size = sysconf(_SC_PAGE_SIZE);
 
        // test for kernel support for MREMAP_DONTUNMAP skipping the test if
        // not.
        if (kernel_support_for_mremap_dontunmap() != 0) {
-               printf("No kernel support for MREMAP_DONTUNMAP\n");
-               return KSFT_SKIP;
+               ksft_print_msg("No kernel support for MREMAP_DONTUNMAP\n");
+               ksft_finished();
        }
 
+       ksft_set_plan(5);
+
        // Keep a page sized buffer around for when we need it.
        page_buffer =
            mmap(NULL, page_size, PROT_READ | PROT_WRITE,
@@ -356,6 +365,5 @@ int main(void)
        BUG_ON(munmap(page_buffer, page_size) == -1,
               "unable to unmap page buffer");
 
-       printf("OK\n");
-       return 0;
+       ksft_finished();
 }
index b5888d613f34ebe7f98017016f7a9d753452cf45..431c1277d83a1dcd6907c9a4ccaaa9b6227d4c76 100644 (file)
@@ -5,40 +5,38 @@
 #include <string.h>
 #include <sys/time.h>
 #include <sys/resource.h>
+#include "../kselftest.h"
 
-static int test_limit(void)
+static void test_limit(void)
 {
-       int ret = 1;
        struct rlimit lims;
        void *map;
 
-       if (getrlimit(RLIMIT_MEMLOCK, &lims)) {
-               perror("getrlimit");
-               return ret;
-       }
+       if (getrlimit(RLIMIT_MEMLOCK, &lims))
+               ksft_exit_fail_msg("getrlimit: %s\n", strerror(errno));
 
-       if (mlockall(MCL_ONFAULT | MCL_FUTURE)) {
-               perror("mlockall");
-               return ret;
-       }
+       if (mlockall(MCL_ONFAULT | MCL_FUTURE))
+               ksft_exit_fail_msg("mlockall: %s\n", strerror(errno));
 
        map = mmap(NULL, 2 * lims.rlim_max, PROT_READ | PROT_WRITE,
                   MAP_PRIVATE | MAP_ANONYMOUS | MAP_POPULATE, -1, 0);
+
+       ksft_test_result(map == MAP_FAILED, "The map failed respecting mlock limits\n");
+
        if (map != MAP_FAILED)
-               printf("mmap should have failed, but didn't\n");
-       else {
-               ret = 0;
                munmap(map, 2 * lims.rlim_max);
-       }
-
        munlockall();
-       return ret;
 }
 
 int main(int argc, char **argv)
 {
-       int ret = 0;
+       ksft_print_header();
+       ksft_set_plan(1);
+
+       if (!getuid())
+               ksft_test_result_skip("The test must be run from a normal user\n");
+       else
+               test_limit();
 
-       ret += test_limit();
-       return ret;
+       ksft_finished();
 }
index 48dc151f8fca8ab8a1ef300860cb59e0f491a2a8..f822ae31af22e20103900084f71e280e182a310a 100644 (file)
@@ -54,6 +54,7 @@ int test_nr;
 u64 shadow_pkey_reg;
 int dprint_in_signal;
 char dprint_in_signal_buffer[DPRINT_IN_SIGNAL_BUF_SIZE];
+char buf[256];
 
 void cat_into_file(char *str, char *file)
 {
@@ -1744,6 +1745,38 @@ void pkey_setup_shadow(void)
        shadow_pkey_reg = __read_pkey_reg();
 }
 
+void restore_settings_atexit(void)
+{
+       cat_into_file(buf, "/proc/sys/vm/nr_hugepages");
+}
+
+void save_settings(void)
+{
+       int fd;
+       int err;
+
+       if (geteuid())
+               return;
+
+       fd = open("/proc/sys/vm/nr_hugepages", O_RDONLY);
+       if (fd < 0) {
+               fprintf(stderr, "error opening\n");
+               perror("error: ");
+               exit(__LINE__);
+       }
+
+       /* -1 to guarantee leaving the trailing \0 */
+       err = read(fd, buf, sizeof(buf)-1);
+       if (err < 0) {
+               fprintf(stderr, "error reading\n");
+               perror("error: ");
+               exit(__LINE__);
+       }
+
+       atexit(restore_settings_atexit);
+       close(fd);
+}
+
 int main(void)
 {
        int nr_iterations = 22;
@@ -1751,6 +1784,7 @@ int main(void)
 
        srand((unsigned int)time(NULL));
 
+       save_settings();
        setup_handlers();
 
        printf("has pkeys: %d\n", pkeys_supported);
index 246d53a5d7f287756795ca6fc73103018402ddfa..c2c542fe7b17bb6a8b8b59b4a4de79ba26a07043 100755 (executable)
@@ -15,10 +15,11 @@ usage() {
        cat <<EOF
 usage: ${BASH_SOURCE[0]:-$0} [ options ]
 
-  -a: run all tests, including extra ones
+  -a: run all tests, including extra ones (other than destructive ones)
   -t: specify specific categories to tests to run
   -h: display this message
   -n: disable TAP output
+  -d: run destructive tests
 
 The default behavior is to run required tests only.  If -a is specified,
 will run all tests.
@@ -64,6 +65,8 @@ separated by spaces:
        test copy-on-write semantics
 - thp
        test transparent huge pages
+- hugetlb
+       test hugetlbfs huge pages
 - migration
        invoke move_pages(2) to exercise the migration entry code
        paths in the kernel
@@ -79,6 +82,7 @@ EOF
 }
 
 RUN_ALL=false
+RUN_DESTRUCTIVE=false
 TAP_PREFIX="# "
 
 while getopts "aht:n" OPT; do
@@ -87,6 +91,7 @@ while getopts "aht:n" OPT; do
                "h") usage ;;
                "t") VM_SELFTEST_ITEMS=${OPTARG} ;;
                "n") TAP_PREFIX= ;;
+               "d") RUN_DESTRUCTIVE=true ;;
        esac
 done
 shift $((OPTIND -1))
@@ -173,7 +178,6 @@ if [ -n "$freepgs" ] && [ -n "$hpgsize_KB" ]; then
        if [ "$freepgs" -lt "$needpgs" ]; then
                printf "Not enough huge pages available (%d < %d)\n" \
                       "$freepgs" "$needpgs"
-               exit 1
        fi
 else
        echo "no hugetlbfs support in kernel?"
@@ -206,6 +210,15 @@ pretty_name() {
 # Usage: run_test [test binary] [arbitrary test arguments...]
 run_test() {
        if test_selected ${CATEGORY}; then
+               # On memory constrainted systems some tests can fail to allocate hugepages.
+               # perform some cleanup before the test for a higher success rate.
+               if [ ${CATEGORY} == "thp" ] | [ ${CATEGORY} == "hugetlb" ]; then
+                       echo 3 > /proc/sys/vm/drop_caches
+                       sleep 2
+                       echo 1 > /proc/sys/vm/compact_memory
+                       sleep 2
+               fi
+
                local test=$(pretty_name "$*")
                local title="running $*"
                local sep=$(echo -n "$title" | tr "[:graph:][:space:]" -)
@@ -253,6 +266,7 @@ nr_hugepages_tmp=$(cat /proc/sys/vm/nr_hugepages)
 # For this test, we need one and just one huge page
 echo 1 > /proc/sys/vm/nr_hugepages
 CATEGORY="hugetlb" run_test ./hugetlb_fault_after_madv
+CATEGORY="hugetlb" run_test ./hugetlb_madv_vs_map
 # Restore the previous number of huge pages, since further tests rely on it
 echo "$nr_hugepages_tmp" > /proc/sys/vm/nr_hugepages
 
@@ -291,7 +305,12 @@ echo "$nr_hugepgs" > /proc/sys/vm/nr_hugepages
 
 CATEGORY="compaction" run_test ./compaction_test
 
-CATEGORY="mlock" run_test sudo -u nobody ./on-fault-limit
+if command -v sudo &> /dev/null;
+then
+       CATEGORY="mlock" run_test sudo -u nobody ./on-fault-limit
+else
+       echo "# SKIP ./on-fault-limit"
+fi
 
 CATEGORY="mmap" run_test ./map_populate
 
@@ -304,6 +323,11 @@ CATEGORY="process_mrelease" run_test ./mrelease_test
 CATEGORY="mremap" run_test ./mremap_test
 
 CATEGORY="hugetlb" run_test ./thuge-gen
+CATEGORY="hugetlb" run_test ./charge_reserved_hugetlb.sh -cgroup-v2
+CATEGORY="hugetlb" run_test ./hugetlb_reparenting_test.sh -cgroup-v2
+if $RUN_DESTRUCTIVE; then
+CATEGORY="hugetlb" run_test ./hugetlb-read-hwpoison
+fi
 
 if [ $VADDR64 -ne 0 ]; then
 
@@ -387,7 +411,27 @@ CATEGORY="thp" run_test ./khugepaged -s 2
 
 CATEGORY="thp" run_test ./transhuge-stress -d 20
 
-CATEGORY="thp" run_test ./split_huge_page_test
+# Try to create XFS if not provided
+if [ -z "${SPLIT_HUGE_PAGE_TEST_XFS_PATH}" ]; then
+    if test_selected "thp"; then
+        if grep xfs /proc/filesystems &>/dev/null; then
+            XFS_IMG=$(mktemp /tmp/xfs_img_XXXXXX)
+            SPLIT_HUGE_PAGE_TEST_XFS_PATH=$(mktemp -d /tmp/xfs_dir_XXXXXX)
+            truncate -s 314572800 ${XFS_IMG}
+            mkfs.xfs -q ${XFS_IMG}
+            mount -o loop ${XFS_IMG} ${SPLIT_HUGE_PAGE_TEST_XFS_PATH}
+            MOUNTED_XFS=1
+        fi
+    fi
+fi
+
+CATEGORY="thp" run_test ./split_huge_page_test ${SPLIT_HUGE_PAGE_TEST_XFS_PATH}
+
+if [ -n "${MOUNTED_XFS}" ]; then
+    umount ${SPLIT_HUGE_PAGE_TEST_XFS_PATH}
+    rmdir ${SPLIT_HUGE_PAGE_TEST_XFS_PATH}
+    rm -f ${XFS_IMG}
+fi
 
 CATEGORY="migration" run_test ./migration
 
index 0e74635c8c3d97fca497e73ac1e41a17f1677a0e..856662d2f87a1b0db004c3e4297d3e5697424d62 100644 (file)
 #include <sys/mount.h>
 #include <malloc.h>
 #include <stdbool.h>
+#include <time.h>
 #include "vm_util.h"
+#include "../kselftest.h"
 
 uint64_t pagesize;
 unsigned int pageshift;
 uint64_t pmd_pagesize;
 
 #define SPLIT_DEBUGFS "/sys/kernel/debug/split_huge_pages"
+#define SMAP_PATH "/proc/self/smaps"
 #define INPUT_MAX 80
 
-#define PID_FMT "%d,0x%lx,0x%lx"
-#define PATH_FMT "%s,0x%lx,0x%lx"
+#define PID_FMT "%d,0x%lx,0x%lx,%d"
+#define PATH_FMT "%s,0x%lx,0x%lx,%d"
 
 #define PFN_MASK     ((1UL<<55)-1)
 #define KPF_THP      (1UL<<22)
@@ -50,21 +53,19 @@ int is_backed_by_thp(char *vaddr, int pagemap_file, int kpageflags_file)
        return 0;
 }
 
-static int write_file(const char *path, const char *buf, size_t buflen)
+static void write_file(const char *path, const char *buf, size_t buflen)
 {
        int fd;
        ssize_t numwritten;
 
        fd = open(path, O_WRONLY);
        if (fd == -1)
-               return 0;
+               ksft_exit_fail_msg("%s open failed: %s\n", path, strerror(errno));
 
        numwritten = write(fd, buf, buflen - 1);
        close(fd);
        if (numwritten < 1)
-               return 0;
-
-       return (unsigned int) numwritten;
+               ksft_exit_fail_msg("Write failed\n");
 }
 
 static void write_debugfs(const char *fmt, ...)
@@ -77,15 +78,10 @@ static void write_debugfs(const char *fmt, ...)
        ret = vsnprintf(input, INPUT_MAX, fmt, argp);
        va_end(argp);
 
-       if (ret >= INPUT_MAX) {
-               printf("%s: Debugfs input is too long\n", __func__);
-               exit(EXIT_FAILURE);
-       }
+       if (ret >= INPUT_MAX)
+               ksft_exit_fail_msg("%s: Debugfs input is too long\n", __func__);
 
-       if (!write_file(SPLIT_DEBUGFS, input, ret + 1)) {
-               perror(SPLIT_DEBUGFS);
-               exit(EXIT_FAILURE);
-       }
+       write_file(SPLIT_DEBUGFS, input, ret + 1);
 }
 
 void split_pmd_thp(void)
@@ -95,39 +91,30 @@ void split_pmd_thp(void)
        size_t i;
 
        one_page = memalign(pmd_pagesize, len);
-
-       if (!one_page) {
-               printf("Fail to allocate memory\n");
-               exit(EXIT_FAILURE);
-       }
+       if (!one_page)
+               ksft_exit_fail_msg("Fail to allocate memory: %s\n", strerror(errno));
 
        madvise(one_page, len, MADV_HUGEPAGE);
 
        for (i = 0; i < len; i++)
                one_page[i] = (char)i;
 
-       if (!check_huge_anon(one_page, 4, pmd_pagesize)) {
-               printf("No THP is allocated\n");
-               exit(EXIT_FAILURE);
-       }
+       if (!check_huge_anon(one_page, 4, pmd_pagesize))
+               ksft_exit_fail_msg("No THP is allocated\n");
 
        /* split all THPs */
        write_debugfs(PID_FMT, getpid(), (uint64_t)one_page,
-               (uint64_t)one_page + len);
+               (uint64_t)one_page + len, 0);
 
        for (i = 0; i < len; i++)
-               if (one_page[i] != (char)i) {
-                       printf("%ld byte corrupted\n", i);
-                       exit(EXIT_FAILURE);
-               }
+               if (one_page[i] != (char)i)
+                       ksft_exit_fail_msg("%ld byte corrupted\n", i);
 
 
-       if (!check_huge_anon(one_page, 0, pmd_pagesize)) {
-               printf("Still AnonHugePages not split\n");
-               exit(EXIT_FAILURE);
-       }
+       if (!check_huge_anon(one_page, 0, pmd_pagesize))
+               ksft_exit_fail_msg("Still AnonHugePages not split\n");
 
-       printf("Split huge pages successful\n");
+       ksft_test_result_pass("Split huge pages successful\n");
        free(one_page);
 }
 
@@ -143,36 +130,29 @@ void split_pte_mapped_thp(void)
        int pagemap_fd;
        int kpageflags_fd;
 
-       if (snprintf(pagemap_proc, 255, pagemap_template, getpid()) < 0) {
-               perror("get pagemap proc error");
-               exit(EXIT_FAILURE);
-       }
-       pagemap_fd = open(pagemap_proc, O_RDONLY);
+       if (snprintf(pagemap_proc, 255, pagemap_template, getpid()) < 0)
+               ksft_exit_fail_msg("get pagemap proc error: %s\n", strerror(errno));
 
-       if (pagemap_fd == -1) {
-               perror("read pagemap:");
-               exit(EXIT_FAILURE);
-       }
+       pagemap_fd = open(pagemap_proc, O_RDONLY);
+       if (pagemap_fd == -1)
+               ksft_exit_fail_msg("read pagemap: %s\n", strerror(errno));
 
        kpageflags_fd = open(kpageflags_proc, O_RDONLY);
-
-       if (kpageflags_fd == -1) {
-               perror("read kpageflags:");
-               exit(EXIT_FAILURE);
-       }
+       if (kpageflags_fd == -1)
+               ksft_exit_fail_msg("read kpageflags: %s\n", strerror(errno));
 
        one_page = mmap((void *)(1UL << 30), len, PROT_READ | PROT_WRITE,
                        MAP_ANONYMOUS | MAP_PRIVATE, -1, 0);
+       if (one_page == MAP_FAILED)
+               ksft_exit_fail_msg("Fail to allocate memory: %s\n", strerror(errno));
 
        madvise(one_page, len, MADV_HUGEPAGE);
 
        for (i = 0; i < len; i++)
                one_page[i] = (char)i;
 
-       if (!check_huge_anon(one_page, 4, pmd_pagesize)) {
-               printf("No THP is allocated\n");
-               exit(EXIT_FAILURE);
-       }
+       if (!check_huge_anon(one_page, 4, pmd_pagesize))
+               ksft_exit_fail_msg("No THP is allocated\n");
 
        /* remap the first pagesize of first THP */
        pte_mapped = mremap(one_page, pagesize, pagesize, MREMAP_MAYMOVE);
@@ -183,10 +163,8 @@ void split_pte_mapped_thp(void)
                                     pagesize, pagesize,
                                     MREMAP_MAYMOVE|MREMAP_FIXED,
                                     pte_mapped + pagesize * i);
-               if (pte_mapped2 == (char *)-1) {
-                       perror("mremap failed");
-                       exit(EXIT_FAILURE);
-               }
+               if (pte_mapped2 == MAP_FAILED)
+                       ksft_exit_fail_msg("mremap failed: %s\n", strerror(errno));
        }
 
        /* smap does not show THPs after mremap, use kpageflags instead */
@@ -196,33 +174,28 @@ void split_pte_mapped_thp(void)
                    is_backed_by_thp(&pte_mapped[i], pagemap_fd, kpageflags_fd))
                        thp_size++;
 
-       if (thp_size != 4) {
-               printf("Some THPs are missing during mremap\n");
-               exit(EXIT_FAILURE);
-       }
+       if (thp_size != 4)
+               ksft_exit_fail_msg("Some THPs are missing during mremap\n");
 
        /* split all remapped THPs */
        write_debugfs(PID_FMT, getpid(), (uint64_t)pte_mapped,
-                     (uint64_t)pte_mapped + pagesize * 4);
+                     (uint64_t)pte_mapped + pagesize * 4, 0);
 
        /* smap does not show THPs after mremap, use kpageflags instead */
        thp_size = 0;
        for (i = 0; i < pagesize * 4; i++) {
-               if (pte_mapped[i] != (char)i) {
-                       printf("%ld byte corrupted\n", i);
-                       exit(EXIT_FAILURE);
-               }
+               if (pte_mapped[i] != (char)i)
+                       ksft_exit_fail_msg("%ld byte corrupted\n", i);
+
                if (i % pagesize == 0 &&
                    is_backed_by_thp(&pte_mapped[i], pagemap_fd, kpageflags_fd))
                        thp_size++;
        }
 
-       if (thp_size) {
-               printf("Still %ld THPs not split\n", thp_size);
-               exit(EXIT_FAILURE);
-       }
+       if (thp_size)
+               ksft_exit_fail_msg("Still %ld THPs not split\n", thp_size);
 
-       printf("Split PTE-mapped huge pages successful\n");
+       ksft_test_result_pass("Split PTE-mapped huge pages successful\n");
        munmap(one_page, len);
        close(pagemap_fd);
        close(kpageflags_fd);
@@ -238,24 +211,21 @@ void split_file_backed_thp(void)
        char testfile[INPUT_MAX];
        uint64_t pgoff_start = 0, pgoff_end = 1024;
 
-       printf("Please enable pr_debug in split_huge_pages_in_file() if you need more info.\n");
+       ksft_print_msg("Please enable pr_debug in split_huge_pages_in_file() for more info.\n");
 
        status = mount("tmpfs", tmpfs_loc, "tmpfs", 0, "huge=always,size=4m");
 
-       if (status) {
-               printf("Unable to create a tmpfs for testing\n");
-               exit(EXIT_FAILURE);
-       }
+       if (status)
+               ksft_exit_fail_msg("Unable to create a tmpfs for testing\n");
 
        status = snprintf(testfile, INPUT_MAX, "%s/thp_file", tmpfs_loc);
        if (status >= INPUT_MAX) {
-               printf("Fail to create file-backed THP split testing file\n");
-               goto cleanup;
+               ksft_exit_fail_msg("Fail to create file-backed THP split testing file\n");
        }
 
        fd = open(testfile, O_CREAT|O_WRONLY);
        if (fd == -1) {
-               perror("Cannot open testing file\n");
+               ksft_perror("Cannot open testing file");
                goto cleanup;
        }
 
@@ -264,50 +234,213 @@ void split_file_backed_thp(void)
        close(fd);
 
        if (num_written < 1) {
-               printf("Fail to write data to testing file\n");
+               ksft_perror("Fail to write data to testing file");
                goto cleanup;
        }
 
        /* split the file-backed THP */
-       write_debugfs(PATH_FMT, testfile, pgoff_start, pgoff_end);
+       write_debugfs(PATH_FMT, testfile, pgoff_start, pgoff_end, 0);
 
        status = unlink(testfile);
-       if (status)
-               perror("Cannot remove testing file\n");
+       if (status) {
+               ksft_perror("Cannot remove testing file");
+               goto cleanup;
+       }
 
-cleanup:
        status = umount(tmpfs_loc);
        if (status) {
-               printf("Unable to umount %s\n", tmpfs_loc);
-               exit(EXIT_FAILURE);
+               rmdir(tmpfs_loc);
+               ksft_exit_fail_msg("Unable to umount %s\n", tmpfs_loc);
        }
+
        status = rmdir(tmpfs_loc);
-       if (status) {
-               perror("cannot remove tmp dir");
-               exit(EXIT_FAILURE);
+       if (status)
+               ksft_exit_fail_msg("cannot remove tmp dir: %s\n", strerror(errno));
+
+       ksft_print_msg("Please check dmesg for more information\n");
+       ksft_test_result_pass("File-backed THP split test done\n");
+       return;
+
+cleanup:
+       umount(tmpfs_loc);
+       rmdir(tmpfs_loc);
+       ksft_exit_fail_msg("Error occurred\n");
+}
+
+bool prepare_thp_fs(const char *xfs_path, char *thp_fs_template,
+               const char **thp_fs_loc)
+{
+       if (xfs_path) {
+               *thp_fs_loc = xfs_path;
+               return false;
+       }
+
+       *thp_fs_loc = mkdtemp(thp_fs_template);
+
+       if (!*thp_fs_loc)
+               ksft_exit_fail_msg("cannot create temp folder\n");
+
+       return true;
+}
+
+void cleanup_thp_fs(const char *thp_fs_loc, bool created_tmp)
+{
+       int status;
+
+       if (!created_tmp)
+               return;
+
+       status = rmdir(thp_fs_loc);
+       if (status)
+               ksft_exit_fail_msg("cannot remove tmp dir: %s\n",
+                                  strerror(errno));
+}
+
+int create_pagecache_thp_and_fd(const char *testfile, size_t fd_size, int *fd,
+               char **addr)
+{
+       size_t i;
+       int dummy;
+
+       srand(time(NULL));
+
+       *fd = open(testfile, O_CREAT | O_RDWR, 0664);
+       if (*fd == -1)
+               ksft_exit_fail_msg("Failed to create a file at %s\n", testfile);
+
+       for (i = 0; i < fd_size; i++) {
+               unsigned char byte = (unsigned char)i;
+
+               write(*fd, &byte, sizeof(byte));
+       }
+       close(*fd);
+       sync();
+       *fd = open("/proc/sys/vm/drop_caches", O_WRONLY);
+       if (*fd == -1) {
+               ksft_perror("open drop_caches");
+               goto err_out_unlink;
+       }
+       if (write(*fd, "3", 1) != 1) {
+               ksft_perror("write to drop_caches");
+               goto err_out_unlink;
+       }
+       close(*fd);
+
+       *fd = open(testfile, O_RDWR);
+       if (*fd == -1) {
+               ksft_perror("Failed to open testfile\n");
+               goto err_out_unlink;
+       }
+
+       *addr = mmap(NULL, fd_size, PROT_READ|PROT_WRITE, MAP_SHARED, *fd, 0);
+       if (*addr == (char *)-1) {
+               ksft_perror("cannot mmap");
+               goto err_out_close;
+       }
+       madvise(*addr, fd_size, MADV_HUGEPAGE);
+
+       for (size_t i = 0; i < fd_size; i++)
+               dummy += *(*addr + i);
+
+       if (!check_huge_file(*addr, fd_size / pmd_pagesize, pmd_pagesize)) {
+               ksft_print_msg("No large pagecache folio generated, please provide a filesystem supporting large folio\n");
+               munmap(*addr, fd_size);
+               close(*fd);
+               unlink(testfile);
+               ksft_test_result_skip("Pagecache folio split skipped\n");
+               return -2;
+       }
+       return 0;
+err_out_close:
+       close(*fd);
+err_out_unlink:
+       unlink(testfile);
+       ksft_exit_fail_msg("Failed to create large pagecache folios\n");
+       return -1;
+}
+
+void split_thp_in_pagecache_to_order(size_t fd_size, int order, const char *fs_loc)
+{
+       int fd;
+       char *addr;
+       size_t i;
+       char testfile[INPUT_MAX];
+       int err = 0;
+
+       err = snprintf(testfile, INPUT_MAX, "%s/test", fs_loc);
+
+       if (err < 0)
+               ksft_exit_fail_msg("cannot generate right test file name\n");
+
+       err = create_pagecache_thp_and_fd(testfile, fd_size, &fd, &addr);
+       if (err)
+               return;
+       err = 0;
+
+       write_debugfs(PID_FMT, getpid(), (uint64_t)addr, (uint64_t)addr + fd_size, order);
+
+       for (i = 0; i < fd_size; i++)
+               if (*(addr + i) != (char)i) {
+                       ksft_print_msg("%lu byte corrupted in the file\n", i);
+                       err = EXIT_FAILURE;
+                       goto out;
+               }
+
+       if (!check_huge_file(addr, 0, pmd_pagesize)) {
+               ksft_print_msg("Still FilePmdMapped not split\n");
+               err = EXIT_FAILURE;
+               goto out;
        }
 
-       printf("file-backed THP split test done, please check dmesg for more information\n");
+out:
+       munmap(addr, fd_size);
+       close(fd);
+       unlink(testfile);
+       if (err)
+               ksft_exit_fail_msg("Split PMD-mapped pagecache folio to order %d failed\n", order);
+       ksft_test_result_pass("Split PMD-mapped pagecache folio to order %d passed\n", order);
 }
 
 int main(int argc, char **argv)
 {
+       int i;
+       size_t fd_size;
+       char *optional_xfs_path = NULL;
+       char fs_loc_template[] = "/tmp/thp_fs_XXXXXX";
+       const char *fs_loc;
+       bool created_tmp;
+
+       ksft_print_header();
+
        if (geteuid() != 0) {
-               printf("Please run the benchmark as root\n");
-               exit(EXIT_FAILURE);
+               ksft_print_msg("Please run the benchmark as root\n");
+               ksft_finished();
        }
 
+       if (argc > 1)
+               optional_xfs_path = argv[1];
+
+       ksft_set_plan(3+9);
+
        pagesize = getpagesize();
        pageshift = ffs(pagesize) - 1;
        pmd_pagesize = read_pmd_pagesize();
-       if (!pmd_pagesize) {
-               printf("Reading PMD pagesize failed\n");
-               exit(EXIT_FAILURE);
-       }
+       if (!pmd_pagesize)
+               ksft_exit_fail_msg("Reading PMD pagesize failed\n");
+
+       fd_size = 2 * pmd_pagesize;
 
        split_pmd_thp();
        split_pte_mapped_thp();
        split_file_backed_thp();
 
+       created_tmp = prepare_thp_fs(optional_xfs_path, fs_loc_template,
+                       &fs_loc);
+       for (i = 8; i >= 0; i--)
+               split_thp_in_pagecache_to_order(fd_size, i, fs_loc);
+       cleanup_thp_fs(fs_loc, created_tmp);
+
+       ksft_finished();
+
        return 0;
 }
index 622987f12c89a341ea7c85421ff80b3c0c52fac2..ea7fd8fe287630928ab449d93a8ef2a0266eb819 100644 (file)
@@ -4,7 +4,7 @@
    Before running this huge pages for each huge page size must have been
    reserved.
    For large pages beyond MAX_PAGE_ORDER (like 1GB on x86) boot options must
-   be used.
+   be used. 1GB wouldn't be tested if it isn't available.
    Also shmmax must be increased.
    And you need to run as root to work around some weird permissions in shm.
    And nothing using huge pages should run in parallel.
@@ -26,8 +26,7 @@
 #include <stdarg.h>
 #include <string.h>
 #include "vm_util.h"
-
-#define err(x) perror(x), exit(1)
+#include "../kselftest.h"
 
 #define MAP_HUGE_2MB    (21 << MAP_HUGE_SHIFT)
 #define MAP_HUGE_1GB    (30 << MAP_HUGE_SHIFT)
 #define SHM_HUGE_1GB    (30 << SHM_HUGE_SHIFT)
 
 #define NUM_PAGESIZES   5
-
 #define NUM_PAGES 4
 
-#define Dprintf(fmt...) // printf(fmt)
-
 unsigned long page_sizes[NUM_PAGESIZES];
 int num_page_sizes;
 
@@ -60,28 +56,15 @@ int ilog2(unsigned long v)
        return l;
 }
 
-void find_pagesizes(void)
-{
-       glob_t g;
-       int i;
-       glob("/sys/kernel/mm/hugepages/hugepages-*kB", 0, NULL, &g);
-       assert(g.gl_pathc <= NUM_PAGESIZES);
-       for (i = 0; i < g.gl_pathc; i++) {
-               sscanf(g.gl_pathv[i], "/sys/kernel/mm/hugepages/hugepages-%lukB",
-                               &page_sizes[i]);
-               page_sizes[i] <<= 10;
-               printf("Found %luMB\n", page_sizes[i] >> 20);
-       }
-       num_page_sizes = g.gl_pathc;
-       globfree(&g);
-}
-
 void show(unsigned long ps)
 {
        char buf[100];
+
        if (ps == getpagesize())
                return;
-       printf("%luMB: ", ps >> 20);
+
+       ksft_print_msg("%luMB: ", ps >> 20);
+
        fflush(stdout);
        snprintf(buf, sizeof buf,
                "cat /sys/kernel/mm/hugepages/hugepages-%lukB/free_hugepages",
@@ -105,7 +88,7 @@ unsigned long read_sysfs(int warn, char *fmt, ...)
        f = fopen(buf, "r");
        if (!f) {
                if (warn)
-                       printf("missing %s\n", buf);
+                       ksft_print_msg("missing %s\n", buf);
                return 0;
        }
        if (getline(&line, &linelen, f) > 0) {
@@ -119,123 +102,143 @@ unsigned long read_sysfs(int warn, char *fmt, ...)
 unsigned long read_free(unsigned long ps)
 {
        return read_sysfs(ps != getpagesize(),
-                       "/sys/kernel/mm/hugepages/hugepages-%lukB/free_hugepages",
-                       ps >> 10);
+                         "/sys/kernel/mm/hugepages/hugepages-%lukB/free_hugepages",
+                         ps >> 10);
 }
 
 void test_mmap(unsigned long size, unsigned flags)
 {
        char *map;
        unsigned long before, after;
-       int err;
 
        before = read_free(size);
        map = mmap(NULL, size*NUM_PAGES, PROT_READ|PROT_WRITE,
                        MAP_PRIVATE|MAP_ANONYMOUS|MAP_HUGETLB|flags, -1, 0);
+       if (map == MAP_FAILED)
+               ksft_exit_fail_msg("mmap: %s\n", strerror(errno));
 
-       if (map == (char *)-1) err("mmap");
        memset(map, 0xff, size*NUM_PAGES);
        after = read_free(size);
-       Dprintf("before %lu after %lu diff %ld size %lu\n",
-               before, after, before - after, size);
-       assert(size == getpagesize() || (before - after) == NUM_PAGES);
+
        show(size);
-       err = munmap(map, size * NUM_PAGES);
-       assert(!err);
+       ksft_test_result(size == getpagesize() || (before - after) == NUM_PAGES,
+                        "%s mmap\n", __func__);
+
+       if (munmap(map, size * NUM_PAGES))
+               ksft_exit_fail_msg("%s: unmap %s\n", __func__, strerror(errno));
 }
 
 void test_shmget(unsigned long size, unsigned flags)
 {
        int id;
        unsigned long before, after;
-       int err;
+       struct shm_info i;
+       char *map;
 
        before = read_free(size);
        id = shmget(IPC_PRIVATE, size * NUM_PAGES, IPC_CREAT|0600|flags);
-       if (id < 0) err("shmget");
-
-       struct shm_info i;
-       if (shmctl(id, SHM_INFO, (void *)&i) < 0) err("shmctl");
-       Dprintf("alloc %lu res %lu\n", i.shm_tot, i.shm_rss);
+       if (id < 0) {
+               if (errno == EPERM) {
+                       ksft_test_result_skip("shmget requires root privileges: %s\n",
+                                             strerror(errno));
+                       return;
+               }
+               ksft_exit_fail_msg("shmget: %s\n", strerror(errno));
+       }
 
+       if (shmctl(id, SHM_INFO, (void *)&i) < 0)
+               ksft_exit_fail_msg("shmctl: %s\n", strerror(errno));
 
-       Dprintf("id %d\n", id);
-       char *map = shmat(id, NULL, 0600);
-       if (map == (char*)-1) err("shmat");
+       map = shmat(id, NULL, 0600);
+       if (map == MAP_FAILED)
+               ksft_exit_fail_msg("shmat: %s\n", strerror(errno));
 
        shmctl(id, IPC_RMID, NULL);
 
        memset(map, 0xff, size*NUM_PAGES);
        after = read_free(size);
 
-       Dprintf("before %lu after %lu diff %ld size %lu\n",
-               before, after, before - after, size);
-       assert(size == getpagesize() || (before - after) == NUM_PAGES);
        show(size);
-       err = shmdt(map);
-       assert(!err);
+       ksft_test_result(size == getpagesize() || (before - after) == NUM_PAGES,
+                        "%s: mmap\n", __func__);
+       if (shmdt(map))
+               ksft_exit_fail_msg("%s: shmdt: %s\n", __func__, strerror(errno));
 }
 
-void sanity_checks(void)
+void find_pagesizes(void)
 {
-       int i;
        unsigned long largest = getpagesize();
+       int i;
+       glob_t g;
 
-       for (i = 0; i < num_page_sizes; i++) {
-               if (page_sizes[i] > largest)
+       glob("/sys/kernel/mm/hugepages/hugepages-*kB", 0, NULL, &g);
+       assert(g.gl_pathc <= NUM_PAGESIZES);
+       for (i = 0; (i < g.gl_pathc) && (num_page_sizes < NUM_PAGESIZES); i++) {
+               sscanf(g.gl_pathv[i], "/sys/kernel/mm/hugepages/hugepages-%lukB",
+                               &page_sizes[num_page_sizes]);
+               page_sizes[num_page_sizes] <<= 10;
+               ksft_print_msg("Found %luMB\n", page_sizes[i] >> 20);
+
+               if (page_sizes[num_page_sizes] > largest)
                        largest = page_sizes[i];
 
-               if (read_free(page_sizes[i]) < NUM_PAGES) {
-                       printf("Not enough huge pages for page size %lu MB, need %u\n",
-                               page_sizes[i] >> 20,
-                               NUM_PAGES);
-                       exit(0);
-               }
+               if (read_free(page_sizes[num_page_sizes]) >= NUM_PAGES)
+                       num_page_sizes++;
+               else
+                       ksft_print_msg("SKIP for size %lu MB as not enough huge pages, need %u\n",
+                                      page_sizes[num_page_sizes] >> 20, NUM_PAGES);
        }
+       globfree(&g);
 
-       if (read_sysfs(0, "/proc/sys/kernel/shmmax") < NUM_PAGES * largest) {
-               printf("Please do echo %lu > /proc/sys/kernel/shmmax", largest * NUM_PAGES);
-               exit(0);
-       }
+       if (read_sysfs(0, "/proc/sys/kernel/shmmax") < NUM_PAGES * largest)
+               ksft_exit_fail_msg("Please do echo %lu > /proc/sys/kernel/shmmax",
+                                  largest * NUM_PAGES);
 
 #if defined(__x86_64__)
        if (largest != 1U<<30) {
-               printf("No GB pages available on x86-64\n"
-                      "Please boot with hugepagesz=1G hugepages=%d\n", NUM_PAGES);
-               exit(0);
+               ksft_exit_fail_msg("No GB pages available on x86-64\n"
+                                  "Please boot with hugepagesz=1G hugepages=%d\n", NUM_PAGES);
        }
 #endif
 }
 
 int main(void)
 {
-       int i;
        unsigned default_hps = default_huge_page_size();
+       int i;
+
+       ksft_print_header();
 
        find_pagesizes();
 
-       sanity_checks();
+       if (!num_page_sizes)
+               ksft_finished();
+
+       ksft_set_plan(2 * num_page_sizes + 3);
 
        for (i = 0; i < num_page_sizes; i++) {
                unsigned long ps = page_sizes[i];
                int arg = ilog2(ps) << MAP_HUGE_SHIFT;
-               printf("Testing %luMB mmap with shift %x\n", ps >> 20, arg);
+
+               ksft_print_msg("Testing %luMB mmap with shift %x\n", ps >> 20, arg);
                test_mmap(ps, MAP_HUGETLB | arg);
        }
-       printf("Testing default huge mmap\n");
+
+       ksft_print_msg("Testing default huge mmap\n");
        test_mmap(default_hps, MAP_HUGETLB);
 
-       puts("Testing non-huge shmget");
+       ksft_print_msg("Testing non-huge shmget\n");
        test_shmget(getpagesize(), 0);
 
        for (i = 0; i < num_page_sizes; i++) {
                unsigned long ps = page_sizes[i];
                int arg = ilog2(ps) << SHM_HUGE_SHIFT;
-               printf("Testing %luMB shmget with shift %x\n", ps >> 20, arg);
+               ksft_print_msg("Testing %luMB shmget with shift %x\n", ps >> 20, arg);
                test_shmget(ps, SHM_HUGETLB | arg);
        }
-       puts("default huge shmget");
+
+       ksft_print_msg("default huge shmget\n");
        test_shmget(default_hps, SHM_HUGETLB);
 
-       return 0;
+       ksft_finished();
 }
index c61fb9350b8c21439bbf7f158210f42c8e59cefe..68201192e37c8da889f9f9ed701402c22be9b59f 100644 (file)
@@ -16,6 +16,7 @@
 #include <string.h>
 #include <sys/mman.h>
 #include "vm_util.h"
+#include "../kselftest.h"
 
 int backing_fd = -1;
 int mmap_flags = MAP_ANONYMOUS | MAP_NORESERVE | MAP_PRIVATE;
@@ -34,6 +35,8 @@ int main(int argc, char **argv)
        int pagemap_fd;
        int duration = 0;
 
+       ksft_print_header();
+
        ram = sysconf(_SC_PHYS_PAGES);
        if (ram > SIZE_MAX / psize() / 4)
                ram = SIZE_MAX / 4;
@@ -43,7 +46,8 @@ int main(int argc, char **argv)
 
        while (++i < argc) {
                if (!strcmp(argv[i], "-h"))
-                       errx(1, "usage: %s [-f <filename>] [-d <duration>] [size in MiB]", argv[0]);
+                       ksft_exit_fail_msg("usage: %s [-f <filename>] [-d <duration>] [size in MiB]\n",
+                                          argv[0]);
                else if (!strcmp(argv[i], "-f"))
                        name = argv[++i];
                else if (!strcmp(argv[i], "-d"))
@@ -52,10 +56,12 @@ int main(int argc, char **argv)
                        len = atoll(argv[i]) << 20;
        }
 
+       ksft_set_plan(1);
+
        if (name) {
                backing_fd = open(name, O_RDWR);
                if (backing_fd == -1)
-                       errx(2, "open %s", name);
+                       ksft_exit_fail_msg("open %s\n", name);
                mmap_flags = MAP_SHARED;
        }
 
@@ -65,21 +71,21 @@ int main(int argc, char **argv)
 
        pagemap_fd = open("/proc/self/pagemap", O_RDONLY);
        if (pagemap_fd < 0)
-               err(2, "open pagemap");
+               ksft_exit_fail_msg("open pagemap\n");
 
        len -= len % HPAGE_SIZE;
        ptr = mmap(NULL, len + HPAGE_SIZE, PROT_RW, mmap_flags, backing_fd, 0);
        if (ptr == MAP_FAILED)
-               err(2, "initial mmap");
+               ksft_exit_fail_msg("initial mmap");
        ptr += HPAGE_SIZE - (uintptr_t)ptr % HPAGE_SIZE;
 
        if (madvise(ptr, len, MADV_HUGEPAGE))
-               err(2, "MADV_HUGEPAGE");
+               ksft_exit_fail_msg("MADV_HUGEPAGE");
 
        map_len = ram >> (HPAGE_SHIFT - 1);
        map = malloc(map_len);
        if (!map)
-               errx(2, "map malloc");
+               ksft_exit_fail_msg("map malloc\n");
 
        clock_gettime(CLOCK_MONOTONIC, &start);
 
@@ -103,7 +109,7 @@ int main(int argc, char **argv)
                                if (idx >= map_len) {
                                        map = realloc(map, idx + 1);
                                        if (!map)
-                                               errx(2, "map realloc");
+                                               ksft_exit_fail_msg("map realloc\n");
                                        memset(map + map_len, 0, idx + 1 - map_len);
                                        map_len = idx + 1;
                                }
@@ -114,17 +120,19 @@ int main(int argc, char **argv)
 
                        /* split transhuge page, keep last page */
                        if (madvise(p, HPAGE_SIZE - psize(), MADV_DONTNEED))
-                               err(2, "MADV_DONTNEED");
+                               ksft_exit_fail_msg("MADV_DONTNEED");
                }
                clock_gettime(CLOCK_MONOTONIC, &b);
                s = b.tv_sec - a.tv_sec + (b.tv_nsec - a.tv_nsec) / 1000000000.;
 
-               warnx("%.3f s/loop, %.3f ms/page, %10.3f MiB/s\t"
-                     "%4d succeed, %4d failed, %4d different pages",
-                     s, s * 1000 / (len >> HPAGE_SHIFT), len / s / (1 << 20),
-                     nr_succeed, nr_failed, nr_pages);
+               ksft_print_msg("%.3f s/loop, %.3f ms/page, %10.3f MiB/s\t"
+                              "%4d succeed, %4d failed, %4d different pages\n",
+                              s, s * 1000 / (len >> HPAGE_SHIFT), len / s / (1 << 20),
+                              nr_succeed, nr_failed, nr_pages);
 
-               if (duration > 0 && b.tv_sec - start.tv_sec >= duration)
-                       return 0;
+               if (duration > 0 && b.tv_sec - start.tv_sec >= duration) {
+                       ksft_test_result_pass("Completed\n");
+                       ksft_finished();
+               }
        }
 }
index 7e83829bbb335b98022c354a6871e8f6340e7328..f78bab0f3d458feb6a7a92e078738cb09c825ea6 100644 (file)
@@ -441,6 +441,12 @@ int main(int argc, char **argv)
        parse_test_type_arg(argv[1]);
        bytes = atol(argv[2]) * 1024 * 1024;
 
+       if (test_type == TEST_HUGETLB &&
+          get_free_hugepages() < bytes / page_size) {
+               printf("skip: Skipping userfaultfd... not enough hugepages\n");
+               return KSFT_SKIP;
+       }
+
        nr_cpus = sysconf(_SC_NPROCESSORS_ONLN);
 
        nr_pages_per_cpu = bytes / page_size / nr_cpus;
index bae0ceaf95b13baeb997c720d6b4244948b98d51..7bcf8d48256a66b4e23165145e873cef9d6ec99c 100644 (file)
@@ -12,6 +12,7 @@
 #include <errno.h>
 #include <sys/mman.h>
 #include <sys/time.h>
+#include "../kselftest.h"
 
 /*
  * Maximum address range mapped with a single mmap()
@@ -68,23 +69,15 @@ static char *hind_addr(void)
        return (char *) (1UL << bits);
 }
 
-static int validate_addr(char *ptr, int high_addr)
+static void validate_addr(char *ptr, int high_addr)
 {
        unsigned long addr = (unsigned long) ptr;
 
-       if (high_addr) {
-               if (addr < HIGH_ADDR_MARK) {
-                       printf("Bad address %lx\n", addr);
-                       return 1;
-               }
-               return 0;
-       }
+       if (high_addr && addr < HIGH_ADDR_MARK)
+               ksft_exit_fail_msg("Bad address %lx\n", addr);
 
-       if (addr > HIGH_ADDR_MARK) {
-               printf("Bad address %lx\n", addr);
-               return 1;
-       }
-       return 0;
+       if (addr > HIGH_ADDR_MARK)
+               ksft_exit_fail_msg("Bad address %lx\n", addr);
 }
 
 static int validate_lower_address_hint(void)
@@ -107,23 +100,29 @@ int main(int argc, char *argv[])
        char *hint;
        unsigned long i, lchunks, hchunks;
 
+       ksft_print_header();
+       ksft_set_plan(1);
+
        for (i = 0; i < NR_CHUNKS_LOW; i++) {
                ptr[i] = mmap(NULL, MAP_CHUNK_SIZE, PROT_READ | PROT_WRITE,
                                        MAP_PRIVATE | MAP_ANONYMOUS, -1, 0);
 
                if (ptr[i] == MAP_FAILED) {
-                       if (validate_lower_address_hint())
-                               return 1;
+                       if (validate_lower_address_hint()) {
+                               ksft_test_result_skip("Memory constraint not fulfilled\n");
+                               ksft_finished();
+                       }
                        break;
                }
 
-               if (validate_addr(ptr[i], 0))
-                       return 1;
+               validate_addr(ptr[i], 0);
        }
        lchunks = i;
        hptr = (char **) calloc(NR_CHUNKS_HIGH, sizeof(char *));
-       if (hptr == NULL)
-               return 1;
+       if (hptr == NULL) {
+               ksft_test_result_skip("Memory constraint not fulfilled\n");
+               ksft_finished();
+       }
 
        for (i = 0; i < NR_CHUNKS_HIGH; i++) {
                hint = hind_addr();
@@ -133,8 +132,7 @@ int main(int argc, char *argv[])
                if (hptr[i] == MAP_FAILED)
                        break;
 
-               if (validate_addr(hptr[i], 1))
-                       return 1;
+               validate_addr(hptr[i], 1);
        }
        hchunks = i;
 
@@ -145,5 +143,7 @@ int main(int argc, char *argv[])
                munmap(hptr[i], MAP_CHUNK_SIZE);
 
        free(hptr);
-       return 0;
+
+       ksft_test_result_pass("Test\n");
+       ksft_finished();
 }
index 05736c615734fe9043c8378c1c06fc20e83c48c6..5a62530da3b563b38164fb9491bcc996cd323e33 100644 (file)
@@ -232,17 +232,17 @@ int64_t allocate_transhuge(void *ptr, int pagemap_fd)
        if (mmap(ptr, HPAGE_SIZE, PROT_READ | PROT_WRITE,
                 MAP_FIXED | MAP_ANONYMOUS |
                 MAP_NORESERVE | MAP_PRIVATE, -1, 0) != ptr)
-               errx(2, "mmap transhuge");
+               ksft_exit_fail_msg("mmap transhuge\n");
 
        if (madvise(ptr, HPAGE_SIZE, MADV_HUGEPAGE))
-               err(2, "MADV_HUGEPAGE");
+               ksft_exit_fail_msg("MADV_HUGEPAGE\n");
 
        /* allocate transparent huge page */
        *(volatile void **)ptr = ptr;
 
        if (pread(pagemap_fd, ent, sizeof(ent),
                  (uintptr_t)ptr >> (pshift() - 3)) != sizeof(ent))
-               err(2, "read pagemap");
+               ksft_exit_fail_msg("read pagemap\n");
 
        if (PAGEMAP_PRESENT(ent[0]) && PAGEMAP_PRESENT(ent[1]) &&
            PAGEMAP_PFN(ent[0]) + 1 == PAGEMAP_PFN(ent[1]) &&
index a89f1fbf86ecbfb42a1dd949fcb360a9b8df4539..1d293ab7718507d4f4e2dd3fdd8fa4f113c78ae6 100644 (file)
 /* Default to taking the first of any alternative feature sections */
 test_feature = 1
 
+#define DCBT_SETUP_STREAMS(from, from_parms, to, to_parms, scratch)    \
+       lis     scratch,0x8000; /* GO=1 */                              \
+       clrldi  scratch,scratch,32;                                     \
+       /* setup read stream 0 */                                       \
+       dcbt    0,from,0b01000;         /* addr from */                 \
+       dcbt    0,from_parms,0b01010;   /* length and depth from */     \
+       /* setup write stream 1 */                                      \
+       dcbtst  0,to,0b01000;           /* addr to */                   \
+       dcbtst  0,to_parms,0b01010;     /* length and depth to */       \
+       eieio;                                                          \
+       dcbt    0,scratch,0b01010;      /* all streams GO */
+
 #endif /* __SELFTESTS_POWERPC_PPC_ASM_H */
index 184dab4ee871c6d2f54e1b095fab43c5f9f97e9a..29b73eedfe741a43b231a8ca91fc5ef58add7d0b 100644 (file)
@@ -1,9 +1,6 @@
 # SPDX-License-Identifier: GPL-2.0
 # KVM common configuration items and defaults
 
-config HAVE_KVM
-       bool
-
 config KVM_COMMON
        bool
        select EVENTFD
@@ -55,6 +52,9 @@ config KVM_ASYNC_PF_SYNC
 config HAVE_KVM_MSI
        bool
 
+config HAVE_KVM_READONLY_MEM
+       bool
+
 config HAVE_KVM_CPU_RELAX_INTERCEPT
        bool
 
@@ -73,6 +73,7 @@ config KVM_COMPAT
 
 config HAVE_KVM_IRQ_BYPASS
        bool
+       select IRQ_BYPASS_MANAGER
 
 config HAVE_KVM_VCPU_ASYNC_IOCTL
        bool
index e033c79d528e0040e88fdd02f6eec3c6d6c00213..99a63bad0306c5699c8282b175f203bdc7bfba0a 100644 (file)
@@ -46,8 +46,8 @@ static void async_pf_execute(struct work_struct *work)
 {
        struct kvm_async_pf *apf =
                container_of(work, struct kvm_async_pf, work);
-       struct mm_struct *mm = apf->mm;
        struct kvm_vcpu *vcpu = apf->vcpu;
+       struct mm_struct *mm = vcpu->kvm->mm;
        unsigned long addr = apf->addr;
        gpa_t cr2_or_gpa = apf->cr2_or_gpa;
        int locked = 1;
@@ -56,15 +56,24 @@ static void async_pf_execute(struct work_struct *work)
        might_sleep();
 
        /*
-        * This work is run asynchronously to the task which owns
-        * mm and might be done in another context, so we must
-        * access remotely.
+        * Attempt to pin the VM's host address space, and simply skip gup() if
+        * acquiring a pin fail, i.e. if the process is exiting.  Note, KVM
+        * holds a reference to its associated mm_struct until the very end of
+        * kvm_destroy_vm(), i.e. the struct itself won't be freed before this
+        * work item is fully processed.
         */
-       mmap_read_lock(mm);
-       get_user_pages_remote(mm, addr, 1, FOLL_WRITE, NULL, &locked);
-       if (locked)
-               mmap_read_unlock(mm);
+       if (mmget_not_zero(mm)) {
+               mmap_read_lock(mm);
+               get_user_pages_remote(mm, addr, 1, FOLL_WRITE, NULL, &locked);
+               if (locked)
+                       mmap_read_unlock(mm);
+               mmput(mm);
+       }
 
+       /*
+        * Notify and kick the vCPU even if faulting in the page failed, e.g.
+        * so that the vCPU can retry the fault synchronously.
+        */
        if (IS_ENABLED(CONFIG_KVM_ASYNC_PF_SYNC))
                kvm_arch_async_page_present(vcpu, apf);
 
@@ -74,20 +83,39 @@ static void async_pf_execute(struct work_struct *work)
        apf->vcpu = NULL;
        spin_unlock(&vcpu->async_pf.lock);
 
-       if (!IS_ENABLED(CONFIG_KVM_ASYNC_PF_SYNC) && first)
-               kvm_arch_async_page_present_queued(vcpu);
-
        /*
-        * apf may be freed by kvm_check_async_pf_completion() after
-        * this point
+        * The apf struct may be freed by kvm_check_async_pf_completion() as
+        * soon as the lock is dropped.  Nullify it to prevent improper usage.
         */
+       apf = NULL;
+
+       if (!IS_ENABLED(CONFIG_KVM_ASYNC_PF_SYNC) && first)
+               kvm_arch_async_page_present_queued(vcpu);
 
        trace_kvm_async_pf_completed(addr, cr2_or_gpa);
 
        __kvm_vcpu_wake_up(vcpu);
+}
 
-       mmput(mm);
-       kvm_put_kvm(vcpu->kvm);
+static void kvm_flush_and_free_async_pf_work(struct kvm_async_pf *work)
+{
+       /*
+        * The async #PF is "done", but KVM must wait for the work item itself,
+        * i.e. async_pf_execute(), to run to completion.  If KVM is a module,
+        * KVM must ensure *no* code owned by the KVM (the module) can be run
+        * after the last call to module_put().  Note, flushing the work item
+        * is always required when the item is taken off the completion queue.
+        * E.g. even if the vCPU handles the item in the "normal" path, the VM
+        * could be terminated before async_pf_execute() completes.
+        *
+        * Wake all events skip the queue and go straight done, i.e. don't
+        * need to be flushed (but sanity check that the work wasn't queued).
+        */
+       if (work->wakeup_all)
+               WARN_ON_ONCE(work->work.func);
+       else
+               flush_work(&work->work);
+       kmem_cache_free(async_pf_cache, work);
 }
 
 void kvm_clear_async_pf_completion_queue(struct kvm_vcpu *vcpu)
@@ -112,11 +140,8 @@ void kvm_clear_async_pf_completion_queue(struct kvm_vcpu *vcpu)
 #ifdef CONFIG_KVM_ASYNC_PF_SYNC
                flush_work(&work->work);
 #else
-               if (cancel_work_sync(&work->work)) {
-                       mmput(work->mm);
-                       kvm_put_kvm(vcpu->kvm); /* == work->vcpu->kvm */
+               if (cancel_work_sync(&work->work))
                        kmem_cache_free(async_pf_cache, work);
-               }
 #endif
                spin_lock(&vcpu->async_pf.lock);
        }
@@ -126,7 +151,10 @@ void kvm_clear_async_pf_completion_queue(struct kvm_vcpu *vcpu)
                        list_first_entry(&vcpu->async_pf.done,
                                         typeof(*work), link);
                list_del(&work->link);
-               kmem_cache_free(async_pf_cache, work);
+
+               spin_unlock(&vcpu->async_pf.lock);
+               kvm_flush_and_free_async_pf_work(work);
+               spin_lock(&vcpu->async_pf.lock);
        }
        spin_unlock(&vcpu->async_pf.lock);
 
@@ -151,7 +179,7 @@ void kvm_check_async_pf_completion(struct kvm_vcpu *vcpu)
 
                list_del(&work->queue);
                vcpu->async_pf.queued--;
-               kmem_cache_free(async_pf_cache, work);
+               kvm_flush_and_free_async_pf_work(work);
        }
 }
 
@@ -184,9 +212,6 @@ bool kvm_setup_async_pf(struct kvm_vcpu *vcpu, gpa_t cr2_or_gpa,
        work->cr2_or_gpa = cr2_or_gpa;
        work->addr = hva;
        work->arch = *arch;
-       work->mm = current->mm;
-       mmget(work->mm);
-       kvm_get_kvm(work->vcpu->kvm);
 
        INIT_WORK(&work->work, async_pf_execute);
 
index 0f50960b0e3a89215757163ad3b458c92670f4de..fb49c2a602002ed30a5f426203fa0e30be2436b0 100644 (file)
@@ -421,7 +421,7 @@ int __kvm_mmu_topup_memory_cache(struct kvm_mmu_memory_cache *mc, int capacity,
                if (WARN_ON_ONCE(!capacity))
                        return -EIO;
 
-               mc->objects = kvmalloc_array(sizeof(void *), capacity, gfp);
+               mc->objects = kvmalloc_array(capacity, sizeof(void *), gfp);
                if (!mc->objects)
                        return -ENOMEM;
 
@@ -890,7 +890,9 @@ static void kvm_mmu_notifier_invalidate_range_end(struct mmu_notifier *mn,
 
        /* Pairs with the increment in range_start(). */
        spin_lock(&kvm->mn_invalidate_lock);
-       wake = (--kvm->mn_active_invalidate_count == 0);
+       if (!WARN_ON_ONCE(!kvm->mn_active_invalidate_count))
+               --kvm->mn_active_invalidate_count;
+       wake = !kvm->mn_active_invalidate_count;
        spin_unlock(&kvm->mn_invalidate_lock);
 
        /*
@@ -1150,10 +1152,7 @@ static int kvm_create_vm_debugfs(struct kvm *kvm, const char *fdname)
                                    &stat_fops_per_vm);
        }
 
-       ret = kvm_arch_create_vm_debugfs(kvm);
-       if (ret)
-               goto out_err;
-
+       kvm_arch_create_vm_debugfs(kvm);
        return 0;
 out_err:
        kvm_destroy_vm_debugfs(kvm);
@@ -1183,9 +1182,8 @@ void __weak kvm_arch_pre_destroy_vm(struct kvm *kvm)
  * Cleanup should be automatic done in kvm_destroy_vm_debugfs() recursively, so
  * a per-arch destroy interface is not needed.
  */
-int __weak kvm_arch_create_vm_debugfs(struct kvm *kvm)
+void __weak kvm_arch_create_vm_debugfs(struct kvm *kvm)
 {
-       return 0;
 }
 
 static struct kvm *kvm_create_vm(unsigned long type, const char *fdname)
@@ -1614,7 +1612,7 @@ static int check_memory_region_flags(struct kvm *kvm,
        if (mem->flags & KVM_MEM_GUEST_MEMFD)
                valid_flags &= ~KVM_MEM_LOG_DIRTY_PAGES;
 
-#ifdef __KVM_HAVE_READONLY_MEM
+#ifdef CONFIG_HAVE_KVM_READONLY_MEM
        /*
         * GUEST_MEMFD is incompatible with read-only memslots, as writes to
         * read-only memslots have emulated MMIO, not page fault, semantics,
@@ -4048,6 +4046,18 @@ static bool vcpu_dy_runnable(struct kvm_vcpu *vcpu)
        return false;
 }
 
+/*
+ * By default, simply query the target vCPU's current mode when checking if a
+ * vCPU was preempted in kernel mode.  All architectures except x86 (or more
+ * specifical, except VMX) allow querying whether or not a vCPU is in kernel
+ * mode even if the vCPU is NOT loaded, i.e. using kvm_arch_vcpu_in_kernel()
+ * directly for cross-vCPU checks is functionally correct and accurate.
+ */
+bool __weak kvm_arch_vcpu_preempted_in_kernel(struct kvm_vcpu *vcpu)
+{
+       return kvm_arch_vcpu_in_kernel(vcpu);
+}
+
 bool __weak kvm_arch_dy_has_pending_interrupt(struct kvm_vcpu *vcpu)
 {
        return false;
@@ -4084,9 +4094,16 @@ void kvm_vcpu_on_spin(struct kvm_vcpu *me, bool yield_to_kernel_mode)
                                continue;
                        if (kvm_vcpu_is_blocking(vcpu) && !vcpu_dy_runnable(vcpu))
                                continue;
+
+                       /*
+                        * Treat the target vCPU as being in-kernel if it has a
+                        * pending interrupt, as the vCPU trying to yield may
+                        * be spinning waiting on IPI delivery, i.e. the target
+                        * vCPU is in-kernel for the purposes of directed yield.
+                        */
                        if (READ_ONCE(vcpu->preempted) && yield_to_kernel_mode &&
                            !kvm_arch_dy_has_pending_interrupt(vcpu) &&
-                           !kvm_arch_vcpu_in_kernel(vcpu))
+                           !kvm_arch_vcpu_preempted_in_kernel(vcpu))
                                continue;
                        if (!kvm_vcpu_eligible_for_directed_yield(vcpu))
                                continue;
index 2d6aba67783078180bb89e77cfa6ef61239d497d..4e07112a24c2f6d02f67ee22a7f7eaa15c5f098b 100644 (file)
 void gfn_to_pfn_cache_invalidate_start(struct kvm *kvm, unsigned long start,
                                       unsigned long end, bool may_block)
 {
-       DECLARE_BITMAP(vcpu_bitmap, KVM_MAX_VCPUS);
        struct gfn_to_pfn_cache *gpc;
-       bool evict_vcpus = false;
 
        spin_lock(&kvm->gpc_lock);
        list_for_each_entry(gpc, &kvm->gpc_list, list) {
-               write_lock_irq(&gpc->lock);
+               read_lock_irq(&gpc->lock);
 
                /* Only a single page so no need to care about length */
                if (gpc->valid && !is_error_noslot_pfn(gpc->pfn) &&
                    gpc->uhva >= start && gpc->uhva < end) {
-                       gpc->valid = false;
+                       read_unlock_irq(&gpc->lock);
 
                        /*
-                        * If a guest vCPU could be using the physical address,
-                        * it needs to be forced out of guest mode.
+                        * There is a small window here where the cache could
+                        * be modified, and invalidation would no longer be
+                        * necessary. Hence check again whether invalidation
+                        * is still necessary once the write lock has been
+                        * acquired.
                         */
-                       if (gpc->usage & KVM_GUEST_USES_PFN) {
-                               if (!evict_vcpus) {
-                                       evict_vcpus = true;
-                                       bitmap_zero(vcpu_bitmap, KVM_MAX_VCPUS);
-                               }
-                               __set_bit(gpc->vcpu->vcpu_idx, vcpu_bitmap);
-                       }
-               }
-               write_unlock_irq(&gpc->lock);
-       }
-       spin_unlock(&kvm->gpc_lock);
-
-       if (evict_vcpus) {
-               /*
-                * KVM needs to ensure the vCPU is fully out of guest context
-                * before allowing the invalidation to continue.
-                */
-               unsigned int req = KVM_REQ_OUTSIDE_GUEST_MODE;
-               bool called;
 
-               /*
-                * If the OOM reaper is active, then all vCPUs should have
-                * been stopped already, so perform the request without
-                * KVM_REQUEST_WAIT and be sad if any needed to be IPI'd.
-                */
-               if (!may_block)
-                       req &= ~KVM_REQUEST_WAIT;
-
-               called = kvm_make_vcpus_request_mask(kvm, req, vcpu_bitmap);
+                       write_lock_irq(&gpc->lock);
+                       if (gpc->valid && !is_error_noslot_pfn(gpc->pfn) &&
+                           gpc->uhva >= start && gpc->uhva < end)
+                               gpc->valid = false;
+                       write_unlock_irq(&gpc->lock);
+                       continue;
+               }
 
-               WARN_ON_ONCE(called && !may_block);
+               read_unlock_irq(&gpc->lock);
        }
+       spin_unlock(&kvm->gpc_lock);
 }
 
 bool kvm_gpc_check(struct gfn_to_pfn_cache *gpc, unsigned long len)
@@ -83,10 +64,17 @@ bool kvm_gpc_check(struct gfn_to_pfn_cache *gpc, unsigned long len)
        if (!gpc->active)
                return false;
 
-       if ((gpc->gpa & ~PAGE_MASK) + len > PAGE_SIZE)
+       /*
+        * If the page was cached from a memslot, make sure the memslots have
+        * not been re-configured.
+        */
+       if (!kvm_is_error_gpa(gpc->gpa) && gpc->generation != slots->generation)
+               return false;
+
+       if (kvm_is_error_hva(gpc->uhva))
                return false;
 
-       if (gpc->generation != slots->generation || kvm_is_error_hva(gpc->uhva))
+       if (offset_in_page(gpc->uhva) + len > PAGE_SIZE)
                return false;
 
        if (!gpc->valid)
@@ -94,19 +82,33 @@ bool kvm_gpc_check(struct gfn_to_pfn_cache *gpc, unsigned long len)
 
        return true;
 }
-EXPORT_SYMBOL_GPL(kvm_gpc_check);
 
-static void gpc_unmap_khva(kvm_pfn_t pfn, void *khva)
+static void *gpc_map(kvm_pfn_t pfn)
 {
-       /* Unmap the old pfn/page if it was mapped before. */
-       if (!is_error_noslot_pfn(pfn) && khva) {
-               if (pfn_valid(pfn))
-                       kunmap(pfn_to_page(pfn));
+       if (pfn_valid(pfn))
+               return kmap(pfn_to_page(pfn));
+
 #ifdef CONFIG_HAS_IOMEM
-               else
-                       memunmap(khva);
+       return memremap(pfn_to_hpa(pfn), PAGE_SIZE, MEMREMAP_WB);
+#else
+       return NULL;
 #endif
+}
+
+static void gpc_unmap(kvm_pfn_t pfn, void *khva)
+{
+       /* Unmap the old pfn/page if it was mapped before. */
+       if (is_error_noslot_pfn(pfn) || !khva)
+               return;
+
+       if (pfn_valid(pfn)) {
+               kunmap(pfn_to_page(pfn));
+               return;
        }
+
+#ifdef CONFIG_HAS_IOMEM
+       memunmap(khva);
+#endif
 }
 
 static inline bool mmu_notifier_retry_cache(struct kvm *kvm, unsigned long mmu_seq)
@@ -140,7 +142,7 @@ static inline bool mmu_notifier_retry_cache(struct kvm *kvm, unsigned long mmu_s
 static kvm_pfn_t hva_to_pfn_retry(struct gfn_to_pfn_cache *gpc)
 {
        /* Note, the new page offset may be different than the old! */
-       void *old_khva = gpc->khva - offset_in_page(gpc->khva);
+       void *old_khva = (void *)PAGE_ALIGN_DOWN((uintptr_t)gpc->khva);
        kvm_pfn_t new_pfn = KVM_PFN_ERR_FAULT;
        void *new_khva = NULL;
        unsigned long mmu_seq;
@@ -175,7 +177,7 @@ static kvm_pfn_t hva_to_pfn_retry(struct gfn_to_pfn_cache *gpc)
                         * the existing mapping and didn't create a new one.
                         */
                        if (new_khva != old_khva)
-                               gpc_unmap_khva(new_pfn, new_khva);
+                               gpc_unmap(new_pfn, new_khva);
 
                        kvm_release_pfn_clean(new_pfn);
 
@@ -192,20 +194,14 @@ static kvm_pfn_t hva_to_pfn_retry(struct gfn_to_pfn_cache *gpc)
                 * pfn.  Note, kmap() and memremap() can both sleep, so this
                 * too must be done outside of gpc->lock!
                 */
-               if (gpc->usage & KVM_HOST_USES_PFN) {
-                       if (new_pfn == gpc->pfn) {
-                               new_khva = old_khva;
-                       } else if (pfn_valid(new_pfn)) {
-                               new_khva = kmap(pfn_to_page(new_pfn));
-#ifdef CONFIG_HAS_IOMEM
-                       } else {
-                               new_khva = memremap(pfn_to_hpa(new_pfn), PAGE_SIZE, MEMREMAP_WB);
-#endif
-                       }
-                       if (!new_khva) {
-                               kvm_release_pfn_clean(new_pfn);
-                               goto out_error;
-                       }
+               if (new_pfn == gpc->pfn)
+                       new_khva = old_khva;
+               else
+                       new_khva = gpc_map(new_pfn);
+
+               if (!new_khva) {
+                       kvm_release_pfn_clean(new_pfn);
+                       goto out_error;
                }
 
                write_lock_irq(&gpc->lock);
@@ -219,7 +215,7 @@ static kvm_pfn_t hva_to_pfn_retry(struct gfn_to_pfn_cache *gpc)
 
        gpc->valid = true;
        gpc->pfn = new_pfn;
-       gpc->khva = new_khva + (gpc->gpa & ~PAGE_MASK);
+       gpc->khva = new_khva + offset_in_page(gpc->uhva);
 
        /*
         * Put the reference to the _new_ pfn.  The pfn is now tracked by the
@@ -236,30 +232,31 @@ out_error:
        return -EFAULT;
 }
 
-static int __kvm_gpc_refresh(struct gfn_to_pfn_cache *gpc, gpa_t gpa,
+static int __kvm_gpc_refresh(struct gfn_to_pfn_cache *gpc, gpa_t gpa, unsigned long uhva,
                             unsigned long len)
 {
-       struct kvm_memslots *slots = kvm_memslots(gpc->kvm);
-       unsigned long page_offset = gpa & ~PAGE_MASK;
+       unsigned long page_offset;
        bool unmap_old = false;
        unsigned long old_uhva;
        kvm_pfn_t old_pfn;
+       bool hva_change = false;
        void *old_khva;
        int ret;
 
+       /* Either gpa or uhva must be valid, but not both */
+       if (WARN_ON_ONCE(kvm_is_error_gpa(gpa) == kvm_is_error_hva(uhva)))
+               return -EINVAL;
+
        /*
-        * If must fit within a single page. The 'len' argument is
-        * only to enforce that.
+        * The cached acces must fit within a single page. The 'len' argument
+        * exists only to enforce that.
         */
+       page_offset = kvm_is_error_gpa(gpa) ? offset_in_page(uhva) :
+                                             offset_in_page(gpa);
        if (page_offset + len > PAGE_SIZE)
                return -EINVAL;
 
-       /*
-        * If another task is refreshing the cache, wait for it to complete.
-        * There is no guarantee that concurrent refreshes will see the same
-        * gpa, memslots generation, etc..., so they must be fully serialized.
-        */
-       mutex_lock(&gpc->refresh_lock);
+       lockdep_assert_held(&gpc->refresh_lock);
 
        write_lock_irq(&gpc->lock);
 
@@ -269,30 +266,52 @@ static int __kvm_gpc_refresh(struct gfn_to_pfn_cache *gpc, gpa_t gpa,
        }
 
        old_pfn = gpc->pfn;
-       old_khva = gpc->khva - offset_in_page(gpc->khva);
-       old_uhva = gpc->uhva;
-
-       /* If the userspace HVA is invalid, refresh that first */
-       if (gpc->gpa != gpa || gpc->generation != slots->generation ||
-           kvm_is_error_hva(gpc->uhva)) {
-               gfn_t gfn = gpa_to_gfn(gpa);
-
-               gpc->gpa = gpa;
-               gpc->generation = slots->generation;
-               gpc->memslot = __gfn_to_memslot(slots, gfn);
-               gpc->uhva = gfn_to_hva_memslot(gpc->memslot, gfn);
-
-               if (kvm_is_error_hva(gpc->uhva)) {
-                       ret = -EFAULT;
-                       goto out;
+       old_khva = (void *)PAGE_ALIGN_DOWN((uintptr_t)gpc->khva);
+       old_uhva = PAGE_ALIGN_DOWN(gpc->uhva);
+
+       if (kvm_is_error_gpa(gpa)) {
+               gpc->gpa = INVALID_GPA;
+               gpc->memslot = NULL;
+               gpc->uhva = PAGE_ALIGN_DOWN(uhva);
+
+               if (gpc->uhva != old_uhva)
+                       hva_change = true;
+       } else {
+               struct kvm_memslots *slots = kvm_memslots(gpc->kvm);
+
+               if (gpc->gpa != gpa || gpc->generation != slots->generation ||
+                   kvm_is_error_hva(gpc->uhva)) {
+                       gfn_t gfn = gpa_to_gfn(gpa);
+
+                       gpc->gpa = gpa;
+                       gpc->generation = slots->generation;
+                       gpc->memslot = __gfn_to_memslot(slots, gfn);
+                       gpc->uhva = gfn_to_hva_memslot(gpc->memslot, gfn);
+
+                       if (kvm_is_error_hva(gpc->uhva)) {
+                               ret = -EFAULT;
+                               goto out;
+                       }
+
+                       /*
+                        * Even if the GPA and/or the memslot generation changed, the
+                        * HVA may still be the same.
+                        */
+                       if (gpc->uhva != old_uhva)
+                               hva_change = true;
+               } else {
+                       gpc->uhva = old_uhva;
                }
        }
 
+       /* Note: the offset must be correct before calling hva_to_pfn_retry() */
+       gpc->uhva += page_offset;
+
        /*
         * If the userspace HVA changed or the PFN was already invalid,
         * drop the lock and do the HVA to PFN lookup again.
         */
-       if (!gpc->valid || old_uhva != gpc->uhva) {
+       if (!gpc->valid || hva_change) {
                ret = hva_to_pfn_retry(gpc);
        } else {
                /*
@@ -323,41 +342,47 @@ static int __kvm_gpc_refresh(struct gfn_to_pfn_cache *gpc, gpa_t gpa,
 out_unlock:
        write_unlock_irq(&gpc->lock);
 
-       mutex_unlock(&gpc->refresh_lock);
-
        if (unmap_old)
-               gpc_unmap_khva(old_pfn, old_khva);
+               gpc_unmap(old_pfn, old_khva);
 
        return ret;
 }
 
 int kvm_gpc_refresh(struct gfn_to_pfn_cache *gpc, unsigned long len)
 {
-       return __kvm_gpc_refresh(gpc, gpc->gpa, len);
+       unsigned long uhva;
+
+       guard(mutex)(&gpc->refresh_lock);
+
+       /*
+        * If the GPA is valid then ignore the HVA, as a cache can be GPA-based
+        * or HVA-based, not both.  For GPA-based caches, the HVA will be
+        * recomputed during refresh if necessary.
+        */
+       uhva = kvm_is_error_gpa(gpc->gpa) ? gpc->uhva : KVM_HVA_ERR_BAD;
+
+       return __kvm_gpc_refresh(gpc, gpc->gpa, uhva, len);
 }
-EXPORT_SYMBOL_GPL(kvm_gpc_refresh);
 
-void kvm_gpc_init(struct gfn_to_pfn_cache *gpc, struct kvm *kvm,
-                 struct kvm_vcpu *vcpu, enum pfn_cache_usage usage)
+void kvm_gpc_init(struct gfn_to_pfn_cache *gpc, struct kvm *kvm)
 {
-       WARN_ON_ONCE(!usage || (usage & KVM_GUEST_AND_HOST_USE_PFN) != usage);
-       WARN_ON_ONCE((usage & KVM_GUEST_USES_PFN) && !vcpu);
-
        rwlock_init(&gpc->lock);
        mutex_init(&gpc->refresh_lock);
 
        gpc->kvm = kvm;
-       gpc->vcpu = vcpu;
-       gpc->usage = usage;
        gpc->pfn = KVM_PFN_ERR_FAULT;
+       gpc->gpa = INVALID_GPA;
        gpc->uhva = KVM_HVA_ERR_BAD;
+       gpc->active = gpc->valid = false;
 }
-EXPORT_SYMBOL_GPL(kvm_gpc_init);
 
-int kvm_gpc_activate(struct gfn_to_pfn_cache *gpc, gpa_t gpa, unsigned long len)
+static int __kvm_gpc_activate(struct gfn_to_pfn_cache *gpc, gpa_t gpa, unsigned long uhva,
+                             unsigned long len)
 {
        struct kvm *kvm = gpc->kvm;
 
+       guard(mutex)(&gpc->refresh_lock);
+
        if (!gpc->active) {
                if (KVM_BUG_ON(gpc->valid, kvm))
                        return -EIO;
@@ -375,9 +400,18 @@ int kvm_gpc_activate(struct gfn_to_pfn_cache *gpc, gpa_t gpa, unsigned long len)
                gpc->active = true;
                write_unlock_irq(&gpc->lock);
        }
-       return __kvm_gpc_refresh(gpc, gpa, len);
+       return __kvm_gpc_refresh(gpc, gpa, uhva, len);
+}
+
+int kvm_gpc_activate(struct gfn_to_pfn_cache *gpc, gpa_t gpa, unsigned long len)
+{
+       return __kvm_gpc_activate(gpc, gpa, KVM_HVA_ERR_BAD, len);
+}
+
+int kvm_gpc_activate_hva(struct gfn_to_pfn_cache *gpc, unsigned long uhva, unsigned long len)
+{
+       return __kvm_gpc_activate(gpc, INVALID_GPA, uhva, len);
 }
-EXPORT_SYMBOL_GPL(kvm_gpc_activate);
 
 void kvm_gpc_deactivate(struct gfn_to_pfn_cache *gpc)
 {
@@ -385,6 +419,8 @@ void kvm_gpc_deactivate(struct gfn_to_pfn_cache *gpc)
        kvm_pfn_t old_pfn;
        void *old_khva;
 
+       guard(mutex)(&gpc->refresh_lock);
+
        if (gpc->active) {
                /*
                 * Deactivate the cache before removing it from the list, KVM
@@ -412,7 +448,6 @@ void kvm_gpc_deactivate(struct gfn_to_pfn_cache *gpc)
                list_del(&gpc->list);
                spin_unlock(&kvm->gpc_lock);
 
-               gpc_unmap_khva(old_pfn, old_khva);
+               gpc_unmap(old_pfn, old_khva);
        }
 }
-EXPORT_SYMBOL_GPL(kvm_gpc_deactivate);